From adc675c8ef55b59bb2facf795a3c26411cfbf3ed Mon Sep 17 00:00:00 2001 From: "je@facebook.com" Date: Fri, 4 Jun 2010 17:36:05 -0700 Subject: [PATCH 0001/3378] Add support for libunwind backtrace caching. Use libunwind's unw_tdep_trace() if it is available. --- jemalloc/configure.ac | 13 ++- jemalloc/include/jemalloc/jemalloc_defs.h.in | 3 + jemalloc/src/prof.c | 94 ++++++++++++++++---- 3 files changed, 94 insertions(+), 16 deletions(-) diff --git a/jemalloc/configure.ac b/jemalloc/configure.ac index 412d3d1b..26e1c415 100644 --- a/jemalloc/configure.ac +++ b/jemalloc/configure.ac @@ -29,7 +29,7 @@ dnl JE_COMPILABLE(label, hcode, mcode, rvar) AC_DEFUN([JE_COMPILABLE], [ AC_MSG_CHECKING([whether $1 is compilable]) -AC_RUN_IFELSE([AC_LANG_PROGRAM( +AC_COMPILE_IFELSE([AC_LANG_PROGRAM( [$2], [$3])], AC_MSG_RESULT([yes]) [$4="yes"], @@ -454,6 +454,17 @@ if test "x$backtrace_method" = "x" -a "x$enable_prof_libunwind" = "x1" ; then if test "x${enable_prof_libunwind}" = "x1" ; then backtrace_method="libunwind" AC_DEFINE([JEMALLOC_PROF_LIBUNWIND], [ ]) + JE_COMPILABLE([libunwind frame cache], [ +#define UNW_LOCAL_ONLY +#include +], [ +unw_tdep_make_frame_cache(0, 0); +unw_tdep_free_frame_cache(0); +unw_tdep_trace(0, 0, 0, 0); +], [libunwind_cache]) + if test "x${libunwind_cache}" = "xyes" ; then + AC_DEFINE([JEMALLOC_PROF_LIBUNWIND_CACHE], [ ]) + fi fi fi diff --git a/jemalloc/include/jemalloc/jemalloc_defs.h.in b/jemalloc/include/jemalloc/jemalloc_defs.h.in index d8c81d7c..88e435d2 100644 --- a/jemalloc/include/jemalloc/jemalloc_defs.h.in +++ b/jemalloc/include/jemalloc/jemalloc_defs.h.in @@ -62,6 +62,9 @@ /* Use libunwind for profile backtracing if defined. */ #undef JEMALLOC_PROF_LIBUNWIND +/* Use libunwind's backtrace caching for profile backtracing if defined. */ +#undef JEMALLOC_PROF_LIBUNWIND_CACHE + /* Use libgcc for profile backtracing if defined. */ #undef JEMALLOC_PROF_LIBGCC diff --git a/jemalloc/src/prof.c b/jemalloc/src/prof.c index 8370042b..537c5c1d 100644 --- a/jemalloc/src/prof.c +++ b/jemalloc/src/prof.c @@ -44,6 +44,12 @@ pthread_key_t prof_tdata_tsd; static ckh_t bt2ctx; static malloc_mutex_t bt2ctx_mtx; +#ifdef JEMALLOC_PROF_LIBUNWIND_CACHE +static __thread unw_tdep_cache_t *libunwind_cache_tls + JEMALLOC_ATTR(tls_model("initial-exec")); +static pthread_key_t libunwind_cache_tsd; +#endif + static malloc_mutex_t prof_dump_seq_mtx; static uint64_t prof_dump_seq; static uint64_t prof_dump_iseq; @@ -95,6 +101,9 @@ static void prof_bt_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2); static bool prof_bt_keycomp(const void *k1, const void *k2); static void prof_tdata_cleanup(void *arg); +#ifdef JEMALLOC_PROF_LIBUNWIND_CACHE +static void libunwind_cache_thread_cleanup(void *arg); +#endif /******************************************************************************/ @@ -177,6 +186,11 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) unw_cursor_t cursor; unsigned i; int err; +#ifdef JEMALLOC_PROF_LIBUNWIND_CACHE + unw_tdep_cache_t *cache; + int len = nignore + 1 + max; + void* vec[len]; +#endif assert(bt->len == 0); assert(bt->vec != NULL); @@ -185,24 +199,53 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) unw_getcontext(&uc); unw_init_local(&cursor, &uc); - /* Throw away (nignore+1) stack frames, if that many exist. */ - for (i = 0; i < nignore + 1; i++) { - err = unw_step(&cursor); - if (err <= 0) - return; +#ifdef JEMALLOC_PROF_LIBUNWIND_CACHE + cache = libunwind_cache_tls; + if (cache == NULL) { + cache = unw_tdep_make_frame_cache(imalloc, idalloc); + if (cache != NULL) { + libunwind_cache_tls = cache; + pthread_setspecific(libunwind_cache_tsd, cache); + } } + if (cache != NULL && unw_tdep_trace(&cursor, vec, &len, cache) >= 0) { + /* + * The trace cache successfully looked up the backtrace. + * Discard the first (nignore+1) elements when copying the + * result, since there was no way to tell unw_tdep_trace() to + * skip those frames. + */ + assert(len >= nignore + 1); + len -= nignore + 1; + if (len > 0) { + memcpy(bt->vec, &vec[nignore + 1], sizeof(void *) * + len); + bt->len = len; + } + } else { +#endif + /* Throw away (nignore+1) stack frames, if that many exist. */ + for (i = 0; i < nignore + 1; i++) { + err = unw_step(&cursor); + if (err <= 0) + return; + } - /* - * Iterate over stack frames until there are no more, or until no space - * remains in bt. - */ - for (i = 0; i < max; i++) { - unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *)&bt->vec[i]); - bt->len++; - err = unw_step(&cursor); - if (err <= 0) - break; + /* + * Iterate over stack frames until there are no more, or until + * no space remains in bt. + */ + for (i = 0; i < max; i++) { + unw_get_reg(&cursor, UNW_REG_IP, + (unw_word_t *)&bt->vec[i]); + bt->len++; + err = unw_step(&cursor); + if (err <= 0) + break; + } +#ifdef JEMALLOC_PROF_LIBUNWIND_CACHE } +#endif } #endif #ifdef JEMALLOC_PROF_LIBGCC @@ -1156,6 +1199,19 @@ prof_tdata_cleanup(void *arg) } } +#ifdef JEMALLOC_PROF_LIBUNWIND_CACHE +static void +libunwind_cache_thread_cleanup(void *arg) +{ + unw_tdep_cache_t *cache = libunwind_cache_tls; + + if (cache != NULL) { + unw_tdep_free_frame_cache(cache); + libunwind_cache_tls = NULL; + } +} +#endif + void prof_boot0(void) { @@ -1208,6 +1264,14 @@ prof_boot2(void) ": Error in pthread_key_create()\n"); abort(); } +#ifdef JEMALLOC_PROF_LIBUNWIND_CACHE + if (pthread_key_create(&libunwind_cache_tsd, + libunwind_cache_thread_cleanup) != 0) { + malloc_write( + ": Error in pthread_key_create()\n"); + abort(); + } +#endif prof_bt_max = (1U << opt_lg_prof_bt_max); if (malloc_mutex_init(&prof_dump_seq_mtx)) From af8ad3ec6abe9840bd503535a604a6173ce73515 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 23 Mar 2011 20:39:02 -0700 Subject: [PATCH 0002/3378] Fix an assertion in arena_purge(). arena_purge() may be called even when there are no dirty pages, so loosen an assertion accordingly. --- jemalloc/src/arena.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/jemalloc/src/arena.c b/jemalloc/src/arena.c index 1954da97..9aaf47ff 100644 --- a/jemalloc/src/arena.c +++ b/jemalloc/src/arena.c @@ -868,9 +868,10 @@ arena_purge(arena_t *arena, bool all) } assert(ndirty == arena->ndirty); #endif - assert(arena->ndirty > arena->npurgatory); + assert(arena->ndirty > arena->npurgatory || all); assert(arena->ndirty > chunk_npages || all); - assert((arena->nactive >> opt_lg_dirty_mult) < arena->ndirty || all); + assert((arena->nactive >> opt_lg_dirty_mult) < (arena->ndirty - + npurgatory) || all); #ifdef JEMALLOC_STATS arena->stats.npurge++; @@ -882,8 +883,10 @@ arena_purge(arena_t *arena, bool all) * multiple threads from racing to reduce ndirty below the threshold. */ npurgatory = arena->ndirty - arena->npurgatory; - if (all == false) + if (all == false) { + assert(npurgatory >= arena->nactive >> opt_lg_dirty_mult); npurgatory -= arena->nactive >> opt_lg_dirty_mult; + } arena->npurgatory += npurgatory; while (npurgatory > 0) { From 9f949f9d82e63432b99000643fe983babd6a230b Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 22 Mar 2011 20:44:40 -0700 Subject: [PATCH 0003/3378] Revert "Add support for libunwind backtrace caching." This reverts commit adc675c8ef55b59bb2facf795a3c26411cfbf3ed. The original commit added support for a non-standard libunwind API, so it was not of general utility. --- jemalloc/configure.ac | 13 +-- jemalloc/include/jemalloc/jemalloc_defs.h.in | 3 - jemalloc/src/prof.c | 94 ++++---------------- 3 files changed, 16 insertions(+), 94 deletions(-) diff --git a/jemalloc/configure.ac b/jemalloc/configure.ac index 26e1c415..412d3d1b 100644 --- a/jemalloc/configure.ac +++ b/jemalloc/configure.ac @@ -29,7 +29,7 @@ dnl JE_COMPILABLE(label, hcode, mcode, rvar) AC_DEFUN([JE_COMPILABLE], [ AC_MSG_CHECKING([whether $1 is compilable]) -AC_COMPILE_IFELSE([AC_LANG_PROGRAM( +AC_RUN_IFELSE([AC_LANG_PROGRAM( [$2], [$3])], AC_MSG_RESULT([yes]) [$4="yes"], @@ -454,17 +454,6 @@ if test "x$backtrace_method" = "x" -a "x$enable_prof_libunwind" = "x1" ; then if test "x${enable_prof_libunwind}" = "x1" ; then backtrace_method="libunwind" AC_DEFINE([JEMALLOC_PROF_LIBUNWIND], [ ]) - JE_COMPILABLE([libunwind frame cache], [ -#define UNW_LOCAL_ONLY -#include -], [ -unw_tdep_make_frame_cache(0, 0); -unw_tdep_free_frame_cache(0); -unw_tdep_trace(0, 0, 0, 0); -], [libunwind_cache]) - if test "x${libunwind_cache}" = "xyes" ; then - AC_DEFINE([JEMALLOC_PROF_LIBUNWIND_CACHE], [ ]) - fi fi fi diff --git a/jemalloc/include/jemalloc/jemalloc_defs.h.in b/jemalloc/include/jemalloc/jemalloc_defs.h.in index 88e435d2..d8c81d7c 100644 --- a/jemalloc/include/jemalloc/jemalloc_defs.h.in +++ b/jemalloc/include/jemalloc/jemalloc_defs.h.in @@ -62,9 +62,6 @@ /* Use libunwind for profile backtracing if defined. */ #undef JEMALLOC_PROF_LIBUNWIND -/* Use libunwind's backtrace caching for profile backtracing if defined. */ -#undef JEMALLOC_PROF_LIBUNWIND_CACHE - /* Use libgcc for profile backtracing if defined. */ #undef JEMALLOC_PROF_LIBGCC diff --git a/jemalloc/src/prof.c b/jemalloc/src/prof.c index 537c5c1d..8370042b 100644 --- a/jemalloc/src/prof.c +++ b/jemalloc/src/prof.c @@ -44,12 +44,6 @@ pthread_key_t prof_tdata_tsd; static ckh_t bt2ctx; static malloc_mutex_t bt2ctx_mtx; -#ifdef JEMALLOC_PROF_LIBUNWIND_CACHE -static __thread unw_tdep_cache_t *libunwind_cache_tls - JEMALLOC_ATTR(tls_model("initial-exec")); -static pthread_key_t libunwind_cache_tsd; -#endif - static malloc_mutex_t prof_dump_seq_mtx; static uint64_t prof_dump_seq; static uint64_t prof_dump_iseq; @@ -101,9 +95,6 @@ static void prof_bt_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2); static bool prof_bt_keycomp(const void *k1, const void *k2); static void prof_tdata_cleanup(void *arg); -#ifdef JEMALLOC_PROF_LIBUNWIND_CACHE -static void libunwind_cache_thread_cleanup(void *arg); -#endif /******************************************************************************/ @@ -186,11 +177,6 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) unw_cursor_t cursor; unsigned i; int err; -#ifdef JEMALLOC_PROF_LIBUNWIND_CACHE - unw_tdep_cache_t *cache; - int len = nignore + 1 + max; - void* vec[len]; -#endif assert(bt->len == 0); assert(bt->vec != NULL); @@ -199,53 +185,24 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) unw_getcontext(&uc); unw_init_local(&cursor, &uc); -#ifdef JEMALLOC_PROF_LIBUNWIND_CACHE - cache = libunwind_cache_tls; - if (cache == NULL) { - cache = unw_tdep_make_frame_cache(imalloc, idalloc); - if (cache != NULL) { - libunwind_cache_tls = cache; - pthread_setspecific(libunwind_cache_tsd, cache); - } + /* Throw away (nignore+1) stack frames, if that many exist. */ + for (i = 0; i < nignore + 1; i++) { + err = unw_step(&cursor); + if (err <= 0) + return; } - if (cache != NULL && unw_tdep_trace(&cursor, vec, &len, cache) >= 0) { - /* - * The trace cache successfully looked up the backtrace. - * Discard the first (nignore+1) elements when copying the - * result, since there was no way to tell unw_tdep_trace() to - * skip those frames. - */ - assert(len >= nignore + 1); - len -= nignore + 1; - if (len > 0) { - memcpy(bt->vec, &vec[nignore + 1], sizeof(void *) * - len); - bt->len = len; - } - } else { -#endif - /* Throw away (nignore+1) stack frames, if that many exist. */ - for (i = 0; i < nignore + 1; i++) { - err = unw_step(&cursor); - if (err <= 0) - return; - } - /* - * Iterate over stack frames until there are no more, or until - * no space remains in bt. - */ - for (i = 0; i < max; i++) { - unw_get_reg(&cursor, UNW_REG_IP, - (unw_word_t *)&bt->vec[i]); - bt->len++; - err = unw_step(&cursor); - if (err <= 0) - break; - } -#ifdef JEMALLOC_PROF_LIBUNWIND_CACHE + /* + * Iterate over stack frames until there are no more, or until no space + * remains in bt. + */ + for (i = 0; i < max; i++) { + unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *)&bt->vec[i]); + bt->len++; + err = unw_step(&cursor); + if (err <= 0) + break; } -#endif } #endif #ifdef JEMALLOC_PROF_LIBGCC @@ -1199,19 +1156,6 @@ prof_tdata_cleanup(void *arg) } } -#ifdef JEMALLOC_PROF_LIBUNWIND_CACHE -static void -libunwind_cache_thread_cleanup(void *arg) -{ - unw_tdep_cache_t *cache = libunwind_cache_tls; - - if (cache != NULL) { - unw_tdep_free_frame_cache(cache); - libunwind_cache_tls = NULL; - } -} -#endif - void prof_boot0(void) { @@ -1264,14 +1208,6 @@ prof_boot2(void) ": Error in pthread_key_create()\n"); abort(); } -#ifdef JEMALLOC_PROF_LIBUNWIND_CACHE - if (pthread_key_create(&libunwind_cache_tsd, - libunwind_cache_thread_cleanup) != 0) { - malloc_write( - ": Error in pthread_key_create()\n"); - abort(); - } -#endif prof_bt_max = (1U << opt_lg_prof_bt_max); if (malloc_mutex_init(&prof_dump_seq_mtx)) From 3e292475ee336daa8c34498e461141808ca13bee Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 24 Mar 2011 16:48:11 -0700 Subject: [PATCH 0004/3378] Implement atomic operations for x86/x64. Add inline assembly implementations of atomic_{add,sub}_uint{32,64}() for x86/x64, in order to support compilers that are missing the relevant gcc intrinsics. --- jemalloc/include/jemalloc/internal/atomic.h | 56 +++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/jemalloc/include/jemalloc/internal/atomic.h b/jemalloc/include/jemalloc/internal/atomic.h index 821c2efb..9a298623 100644 --- a/jemalloc/include/jemalloc/internal/atomic.h +++ b/jemalloc/include/jemalloc/internal/atomic.h @@ -40,6 +40,7 @@ uint32_t atomic_sub_uint32(uint32_t *p, uint32_t x); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ATOMIC_C_)) +/******************************************************************************/ /* 64-bit operations. */ #ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 JEMALLOC_INLINE uint64_t @@ -69,12 +70,40 @@ atomic_sub_uint64(uint64_t *p, uint64_t x) return (OSAtomicAdd64(-((int64_t)x), (int64_t *)p)); } +#elif (defined(__amd64_) || defined(__x86_64__)) +JEMALLOC_INLINE uint64_t +atomic_add_uint64(uint64_t *p, uint64_t x) +{ + + asm volatile ( + "lock; xaddq %0, %1;" + : "+r" (x), "=m" (*p) /* Outputs. */ + : "m" (*p) /* Inputs. */ + ); + + return (x); +} + +JEMALLOC_INLINE uint64_t +atomic_sub_uint64(uint64_t *p, uint64_t x) +{ + + x = (uint64_t)(-(int64_t)x); + asm volatile ( + "lock; xaddq %0, %1;" + : "+r" (x), "=m" (*p) /* Outputs. */ + : "m" (*p) /* Inputs. */ + ); + + return (x); +} #else # if (LG_SIZEOF_PTR == 3) # error "Missing implementation for 64-bit atomic operations" # endif #endif +/******************************************************************************/ /* 32-bit operations. */ #ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 JEMALLOC_INLINE uint32_t @@ -104,6 +133,33 @@ atomic_sub_uint32(uint32_t *p, uint32_t x) return (OSAtomicAdd32(-((int32_t)x), (int32_t *)p)); } +#elif (defined(__i386__) || defined(__amd64_) || defined(__x86_64__)) +JEMALLOC_INLINE uint32_t +atomic_add_uint32(uint32_t *p, uint32_t x) +{ + + asm volatile ( + "lock; xaddl %0, %1;" + : "+r" (x), "=m" (*p) /* Outputs. */ + : "m" (*p) /* Inputs. */ + ); + + return (x); +} + +JEMALLOC_INLINE uint32_t +atomic_sub_uint32(uint32_t *p, uint32_t x) +{ + + x = (uint32_t)(-(int32_t)x); + asm volatile ( + "lock; xaddl %0, %1;" + : "+r" (x), "=m" (*p) /* Outputs. */ + : "m" (*p) /* Inputs. */ + ); + + return (x); +} #else # error "Missing implementation for 32-bit atomic operations" #endif From 7d9ebea57d1d82702261cb75a1b885a709b6bcc7 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 30 Mar 2011 15:01:08 -0700 Subject: [PATCH 0005/3378] Update ChangeLog for 2.2.1. --- jemalloc/ChangeLog | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/jemalloc/ChangeLog b/jemalloc/ChangeLog index 6db63db5..7b262c95 100644 --- a/jemalloc/ChangeLog +++ b/jemalloc/ChangeLog @@ -6,6 +6,13 @@ found in the git revision history: http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git git://canonware.com/jemalloc.git +* 2.2.1 (March 30, 2011) + + Bug fixes: + - Implement atomic operations for x86/x64. This fixes compilation failures + for versions of gcc that are still in wide use. + - Fix an assertion in arena_purge(). + * 2.2.0 (March 22, 2011) This version incorporates several improvements to algorithms and data From 64ba3d7cd9901ed00d690365b1b3da6aa7b9cd59 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 30 Mar 2011 16:02:25 -0700 Subject: [PATCH 0006/3378] Fix a build error for --disable-tcache. Add a missing #ifdef to conditionally exclude code that is relevant only to the tcache feature. --- jemalloc/src/ctl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/jemalloc/src/ctl.c b/jemalloc/src/ctl.c index 40fdbacb..e5336d36 100644 --- a/jemalloc/src/ctl.c +++ b/jemalloc/src/ctl.c @@ -1151,11 +1151,13 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, /* Set new arena association. */ ARENA_SET(arena); +#ifdef JEMALLOC_TCACHE { tcache_t *tcache = TCACHE_GET(); if (tcache != NULL) tcache->arena = arena; } +#endif } ret = 0; From 7427525c28d58c423a68930160e3b0fe577fe953 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 31 Mar 2011 20:36:17 -0700 Subject: [PATCH 0007/3378] Move repo contents in jemalloc/ to top level. --- jemalloc/COPYING => COPYING | 0 jemalloc/ChangeLog => ChangeLog | 0 jemalloc/INSTALL => INSTALL | 0 jemalloc/Makefile.in => Makefile.in | 0 jemalloc/README => README | 0 jemalloc/autogen.sh => autogen.sh | 0 {jemalloc/bin => bin}/pprof | 0 jemalloc/config.guess => config.guess | 0 jemalloc/config.stamp.in => config.stamp.in | 0 jemalloc/config.sub => config.sub | 0 jemalloc/configure.ac => configure.ac | 0 {jemalloc/doc => doc}/html.xsl.in | 0 {jemalloc/doc => doc}/jemalloc.xml.in | 0 {jemalloc/doc => doc}/manpages.xsl.in | 0 {jemalloc/doc => doc}/stylesheet.xsl | 0 {jemalloc/include => include}/jemalloc/internal/arena.h | 0 {jemalloc/include => include}/jemalloc/internal/atomic.h | 0 {jemalloc/include => include}/jemalloc/internal/base.h | 0 {jemalloc/include => include}/jemalloc/internal/bitmap.h | 0 {jemalloc/include => include}/jemalloc/internal/chunk.h | 0 {jemalloc/include => include}/jemalloc/internal/chunk_dss.h | 0 {jemalloc/include => include}/jemalloc/internal/chunk_mmap.h | 0 {jemalloc/include => include}/jemalloc/internal/chunk_swap.h | 0 {jemalloc/include => include}/jemalloc/internal/ckh.h | 0 {jemalloc/include => include}/jemalloc/internal/ctl.h | 0 {jemalloc/include => include}/jemalloc/internal/extent.h | 0 {jemalloc/include => include}/jemalloc/internal/hash.h | 0 {jemalloc/include => include}/jemalloc/internal/huge.h | 0 .../include => include}/jemalloc/internal/jemalloc_internal.h.in | 0 {jemalloc/include => include}/jemalloc/internal/mb.h | 0 {jemalloc/include => include}/jemalloc/internal/mutex.h | 0 {jemalloc/include => include}/jemalloc/internal/prn.h | 0 {jemalloc/include => include}/jemalloc/internal/prof.h | 0 {jemalloc/include => include}/jemalloc/internal/ql.h | 0 {jemalloc/include => include}/jemalloc/internal/qr.h | 0 {jemalloc/include => include}/jemalloc/internal/rb.h | 0 {jemalloc/include => include}/jemalloc/internal/rtree.h | 0 {jemalloc/include => include}/jemalloc/internal/stats.h | 0 {jemalloc/include => include}/jemalloc/internal/tcache.h | 0 {jemalloc/include => include}/jemalloc/internal/zone.h | 0 {jemalloc/include => include}/jemalloc/jemalloc.h.in | 0 {jemalloc/include => include}/jemalloc/jemalloc_defs.h.in | 0 jemalloc/install-sh => install-sh | 0 {jemalloc/src => src}/arena.c | 0 {jemalloc/src => src}/atomic.c | 0 {jemalloc/src => src}/base.c | 0 {jemalloc/src => src}/bitmap.c | 0 {jemalloc/src => src}/chunk.c | 0 {jemalloc/src => src}/chunk_dss.c | 0 {jemalloc/src => src}/chunk_mmap.c | 0 {jemalloc/src => src}/chunk_swap.c | 0 {jemalloc/src => src}/ckh.c | 0 {jemalloc/src => src}/ctl.c | 0 {jemalloc/src => src}/extent.c | 0 {jemalloc/src => src}/hash.c | 0 {jemalloc/src => src}/huge.c | 0 {jemalloc/src => src}/jemalloc.c | 0 {jemalloc/src => src}/mb.c | 0 {jemalloc/src => src}/mutex.c | 0 {jemalloc/src => src}/prof.c | 0 {jemalloc/src => src}/rtree.c | 0 {jemalloc/src => src}/stats.c | 0 {jemalloc/src => src}/tcache.c | 0 {jemalloc/src => src}/zone.c | 0 {jemalloc/test => test}/allocated.c | 0 {jemalloc/test => test}/allocated.exp | 0 {jemalloc/test => test}/allocm.c | 0 {jemalloc/test => test}/allocm.exp | 0 {jemalloc/test => test}/bitmap.c | 0 {jemalloc/test => test}/bitmap.exp | 0 {jemalloc/test => test}/jemalloc_test.h.in | 0 {jemalloc/test => test}/mremap.c | 0 {jemalloc/test => test}/mremap.exp | 0 {jemalloc/test => test}/posix_memalign.c | 0 {jemalloc/test => test}/posix_memalign.exp | 0 {jemalloc/test => test}/rallocm.c | 0 {jemalloc/test => test}/rallocm.exp | 0 {jemalloc/test => test}/thread_arena.c | 0 {jemalloc/test => test}/thread_arena.exp | 0 79 files changed, 0 insertions(+), 0 deletions(-) rename jemalloc/COPYING => COPYING (100%) rename jemalloc/ChangeLog => ChangeLog (100%) rename jemalloc/INSTALL => INSTALL (100%) rename jemalloc/Makefile.in => Makefile.in (100%) rename jemalloc/README => README (100%) rename jemalloc/autogen.sh => autogen.sh (100%) rename {jemalloc/bin => bin}/pprof (100%) rename jemalloc/config.guess => config.guess (100%) rename jemalloc/config.stamp.in => config.stamp.in (100%) rename jemalloc/config.sub => config.sub (100%) rename jemalloc/configure.ac => configure.ac (100%) rename {jemalloc/doc => doc}/html.xsl.in (100%) rename {jemalloc/doc => doc}/jemalloc.xml.in (100%) rename {jemalloc/doc => doc}/manpages.xsl.in (100%) rename {jemalloc/doc => doc}/stylesheet.xsl (100%) rename {jemalloc/include => include}/jemalloc/internal/arena.h (100%) rename {jemalloc/include => include}/jemalloc/internal/atomic.h (100%) rename {jemalloc/include => include}/jemalloc/internal/base.h (100%) rename {jemalloc/include => include}/jemalloc/internal/bitmap.h (100%) rename {jemalloc/include => include}/jemalloc/internal/chunk.h (100%) rename {jemalloc/include => include}/jemalloc/internal/chunk_dss.h (100%) rename {jemalloc/include => include}/jemalloc/internal/chunk_mmap.h (100%) rename {jemalloc/include => include}/jemalloc/internal/chunk_swap.h (100%) rename {jemalloc/include => include}/jemalloc/internal/ckh.h (100%) rename {jemalloc/include => include}/jemalloc/internal/ctl.h (100%) rename {jemalloc/include => include}/jemalloc/internal/extent.h (100%) rename {jemalloc/include => include}/jemalloc/internal/hash.h (100%) rename {jemalloc/include => include}/jemalloc/internal/huge.h (100%) rename {jemalloc/include => include}/jemalloc/internal/jemalloc_internal.h.in (100%) rename {jemalloc/include => include}/jemalloc/internal/mb.h (100%) rename {jemalloc/include => include}/jemalloc/internal/mutex.h (100%) rename {jemalloc/include => include}/jemalloc/internal/prn.h (100%) rename {jemalloc/include => include}/jemalloc/internal/prof.h (100%) rename {jemalloc/include => include}/jemalloc/internal/ql.h (100%) rename {jemalloc/include => include}/jemalloc/internal/qr.h (100%) rename {jemalloc/include => include}/jemalloc/internal/rb.h (100%) rename {jemalloc/include => include}/jemalloc/internal/rtree.h (100%) rename {jemalloc/include => include}/jemalloc/internal/stats.h (100%) rename {jemalloc/include => include}/jemalloc/internal/tcache.h (100%) rename {jemalloc/include => include}/jemalloc/internal/zone.h (100%) rename {jemalloc/include => include}/jemalloc/jemalloc.h.in (100%) rename {jemalloc/include => include}/jemalloc/jemalloc_defs.h.in (100%) rename jemalloc/install-sh => install-sh (100%) rename {jemalloc/src => src}/arena.c (100%) rename {jemalloc/src => src}/atomic.c (100%) rename {jemalloc/src => src}/base.c (100%) rename {jemalloc/src => src}/bitmap.c (100%) rename {jemalloc/src => src}/chunk.c (100%) rename {jemalloc/src => src}/chunk_dss.c (100%) rename {jemalloc/src => src}/chunk_mmap.c (100%) rename {jemalloc/src => src}/chunk_swap.c (100%) rename {jemalloc/src => src}/ckh.c (100%) rename {jemalloc/src => src}/ctl.c (100%) rename {jemalloc/src => src}/extent.c (100%) rename {jemalloc/src => src}/hash.c (100%) rename {jemalloc/src => src}/huge.c (100%) rename {jemalloc/src => src}/jemalloc.c (100%) rename {jemalloc/src => src}/mb.c (100%) rename {jemalloc/src => src}/mutex.c (100%) rename {jemalloc/src => src}/prof.c (100%) rename {jemalloc/src => src}/rtree.c (100%) rename {jemalloc/src => src}/stats.c (100%) rename {jemalloc/src => src}/tcache.c (100%) rename {jemalloc/src => src}/zone.c (100%) rename {jemalloc/test => test}/allocated.c (100%) rename {jemalloc/test => test}/allocated.exp (100%) rename {jemalloc/test => test}/allocm.c (100%) rename {jemalloc/test => test}/allocm.exp (100%) rename {jemalloc/test => test}/bitmap.c (100%) rename {jemalloc/test => test}/bitmap.exp (100%) rename {jemalloc/test => test}/jemalloc_test.h.in (100%) rename {jemalloc/test => test}/mremap.c (100%) rename {jemalloc/test => test}/mremap.exp (100%) rename {jemalloc/test => test}/posix_memalign.c (100%) rename {jemalloc/test => test}/posix_memalign.exp (100%) rename {jemalloc/test => test}/rallocm.c (100%) rename {jemalloc/test => test}/rallocm.exp (100%) rename {jemalloc/test => test}/thread_arena.c (100%) rename {jemalloc/test => test}/thread_arena.exp (100%) diff --git a/jemalloc/COPYING b/COPYING similarity index 100% rename from jemalloc/COPYING rename to COPYING diff --git a/jemalloc/ChangeLog b/ChangeLog similarity index 100% rename from jemalloc/ChangeLog rename to ChangeLog diff --git a/jemalloc/INSTALL b/INSTALL similarity index 100% rename from jemalloc/INSTALL rename to INSTALL diff --git a/jemalloc/Makefile.in b/Makefile.in similarity index 100% rename from jemalloc/Makefile.in rename to Makefile.in diff --git a/jemalloc/README b/README similarity index 100% rename from jemalloc/README rename to README diff --git a/jemalloc/autogen.sh b/autogen.sh similarity index 100% rename from jemalloc/autogen.sh rename to autogen.sh diff --git a/jemalloc/bin/pprof b/bin/pprof similarity index 100% rename from jemalloc/bin/pprof rename to bin/pprof diff --git a/jemalloc/config.guess b/config.guess similarity index 100% rename from jemalloc/config.guess rename to config.guess diff --git a/jemalloc/config.stamp.in b/config.stamp.in similarity index 100% rename from jemalloc/config.stamp.in rename to config.stamp.in diff --git a/jemalloc/config.sub b/config.sub similarity index 100% rename from jemalloc/config.sub rename to config.sub diff --git a/jemalloc/configure.ac b/configure.ac similarity index 100% rename from jemalloc/configure.ac rename to configure.ac diff --git a/jemalloc/doc/html.xsl.in b/doc/html.xsl.in similarity index 100% rename from jemalloc/doc/html.xsl.in rename to doc/html.xsl.in diff --git a/jemalloc/doc/jemalloc.xml.in b/doc/jemalloc.xml.in similarity index 100% rename from jemalloc/doc/jemalloc.xml.in rename to doc/jemalloc.xml.in diff --git a/jemalloc/doc/manpages.xsl.in b/doc/manpages.xsl.in similarity index 100% rename from jemalloc/doc/manpages.xsl.in rename to doc/manpages.xsl.in diff --git a/jemalloc/doc/stylesheet.xsl b/doc/stylesheet.xsl similarity index 100% rename from jemalloc/doc/stylesheet.xsl rename to doc/stylesheet.xsl diff --git a/jemalloc/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h similarity index 100% rename from jemalloc/include/jemalloc/internal/arena.h rename to include/jemalloc/internal/arena.h diff --git a/jemalloc/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h similarity index 100% rename from jemalloc/include/jemalloc/internal/atomic.h rename to include/jemalloc/internal/atomic.h diff --git a/jemalloc/include/jemalloc/internal/base.h b/include/jemalloc/internal/base.h similarity index 100% rename from jemalloc/include/jemalloc/internal/base.h rename to include/jemalloc/internal/base.h diff --git a/jemalloc/include/jemalloc/internal/bitmap.h b/include/jemalloc/internal/bitmap.h similarity index 100% rename from jemalloc/include/jemalloc/internal/bitmap.h rename to include/jemalloc/internal/bitmap.h diff --git a/jemalloc/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h similarity index 100% rename from jemalloc/include/jemalloc/internal/chunk.h rename to include/jemalloc/internal/chunk.h diff --git a/jemalloc/include/jemalloc/internal/chunk_dss.h b/include/jemalloc/internal/chunk_dss.h similarity index 100% rename from jemalloc/include/jemalloc/internal/chunk_dss.h rename to include/jemalloc/internal/chunk_dss.h diff --git a/jemalloc/include/jemalloc/internal/chunk_mmap.h b/include/jemalloc/internal/chunk_mmap.h similarity index 100% rename from jemalloc/include/jemalloc/internal/chunk_mmap.h rename to include/jemalloc/internal/chunk_mmap.h diff --git a/jemalloc/include/jemalloc/internal/chunk_swap.h b/include/jemalloc/internal/chunk_swap.h similarity index 100% rename from jemalloc/include/jemalloc/internal/chunk_swap.h rename to include/jemalloc/internal/chunk_swap.h diff --git a/jemalloc/include/jemalloc/internal/ckh.h b/include/jemalloc/internal/ckh.h similarity index 100% rename from jemalloc/include/jemalloc/internal/ckh.h rename to include/jemalloc/internal/ckh.h diff --git a/jemalloc/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h similarity index 100% rename from jemalloc/include/jemalloc/internal/ctl.h rename to include/jemalloc/internal/ctl.h diff --git a/jemalloc/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h similarity index 100% rename from jemalloc/include/jemalloc/internal/extent.h rename to include/jemalloc/internal/extent.h diff --git a/jemalloc/include/jemalloc/internal/hash.h b/include/jemalloc/internal/hash.h similarity index 100% rename from jemalloc/include/jemalloc/internal/hash.h rename to include/jemalloc/internal/hash.h diff --git a/jemalloc/include/jemalloc/internal/huge.h b/include/jemalloc/internal/huge.h similarity index 100% rename from jemalloc/include/jemalloc/internal/huge.h rename to include/jemalloc/internal/huge.h diff --git a/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in similarity index 100% rename from jemalloc/include/jemalloc/internal/jemalloc_internal.h.in rename to include/jemalloc/internal/jemalloc_internal.h.in diff --git a/jemalloc/include/jemalloc/internal/mb.h b/include/jemalloc/internal/mb.h similarity index 100% rename from jemalloc/include/jemalloc/internal/mb.h rename to include/jemalloc/internal/mb.h diff --git a/jemalloc/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h similarity index 100% rename from jemalloc/include/jemalloc/internal/mutex.h rename to include/jemalloc/internal/mutex.h diff --git a/jemalloc/include/jemalloc/internal/prn.h b/include/jemalloc/internal/prn.h similarity index 100% rename from jemalloc/include/jemalloc/internal/prn.h rename to include/jemalloc/internal/prn.h diff --git a/jemalloc/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h similarity index 100% rename from jemalloc/include/jemalloc/internal/prof.h rename to include/jemalloc/internal/prof.h diff --git a/jemalloc/include/jemalloc/internal/ql.h b/include/jemalloc/internal/ql.h similarity index 100% rename from jemalloc/include/jemalloc/internal/ql.h rename to include/jemalloc/internal/ql.h diff --git a/jemalloc/include/jemalloc/internal/qr.h b/include/jemalloc/internal/qr.h similarity index 100% rename from jemalloc/include/jemalloc/internal/qr.h rename to include/jemalloc/internal/qr.h diff --git a/jemalloc/include/jemalloc/internal/rb.h b/include/jemalloc/internal/rb.h similarity index 100% rename from jemalloc/include/jemalloc/internal/rb.h rename to include/jemalloc/internal/rb.h diff --git a/jemalloc/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h similarity index 100% rename from jemalloc/include/jemalloc/internal/rtree.h rename to include/jemalloc/internal/rtree.h diff --git a/jemalloc/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h similarity index 100% rename from jemalloc/include/jemalloc/internal/stats.h rename to include/jemalloc/internal/stats.h diff --git a/jemalloc/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h similarity index 100% rename from jemalloc/include/jemalloc/internal/tcache.h rename to include/jemalloc/internal/tcache.h diff --git a/jemalloc/include/jemalloc/internal/zone.h b/include/jemalloc/internal/zone.h similarity index 100% rename from jemalloc/include/jemalloc/internal/zone.h rename to include/jemalloc/internal/zone.h diff --git a/jemalloc/include/jemalloc/jemalloc.h.in b/include/jemalloc/jemalloc.h.in similarity index 100% rename from jemalloc/include/jemalloc/jemalloc.h.in rename to include/jemalloc/jemalloc.h.in diff --git a/jemalloc/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in similarity index 100% rename from jemalloc/include/jemalloc/jemalloc_defs.h.in rename to include/jemalloc/jemalloc_defs.h.in diff --git a/jemalloc/install-sh b/install-sh similarity index 100% rename from jemalloc/install-sh rename to install-sh diff --git a/jemalloc/src/arena.c b/src/arena.c similarity index 100% rename from jemalloc/src/arena.c rename to src/arena.c diff --git a/jemalloc/src/atomic.c b/src/atomic.c similarity index 100% rename from jemalloc/src/atomic.c rename to src/atomic.c diff --git a/jemalloc/src/base.c b/src/base.c similarity index 100% rename from jemalloc/src/base.c rename to src/base.c diff --git a/jemalloc/src/bitmap.c b/src/bitmap.c similarity index 100% rename from jemalloc/src/bitmap.c rename to src/bitmap.c diff --git a/jemalloc/src/chunk.c b/src/chunk.c similarity index 100% rename from jemalloc/src/chunk.c rename to src/chunk.c diff --git a/jemalloc/src/chunk_dss.c b/src/chunk_dss.c similarity index 100% rename from jemalloc/src/chunk_dss.c rename to src/chunk_dss.c diff --git a/jemalloc/src/chunk_mmap.c b/src/chunk_mmap.c similarity index 100% rename from jemalloc/src/chunk_mmap.c rename to src/chunk_mmap.c diff --git a/jemalloc/src/chunk_swap.c b/src/chunk_swap.c similarity index 100% rename from jemalloc/src/chunk_swap.c rename to src/chunk_swap.c diff --git a/jemalloc/src/ckh.c b/src/ckh.c similarity index 100% rename from jemalloc/src/ckh.c rename to src/ckh.c diff --git a/jemalloc/src/ctl.c b/src/ctl.c similarity index 100% rename from jemalloc/src/ctl.c rename to src/ctl.c diff --git a/jemalloc/src/extent.c b/src/extent.c similarity index 100% rename from jemalloc/src/extent.c rename to src/extent.c diff --git a/jemalloc/src/hash.c b/src/hash.c similarity index 100% rename from jemalloc/src/hash.c rename to src/hash.c diff --git a/jemalloc/src/huge.c b/src/huge.c similarity index 100% rename from jemalloc/src/huge.c rename to src/huge.c diff --git a/jemalloc/src/jemalloc.c b/src/jemalloc.c similarity index 100% rename from jemalloc/src/jemalloc.c rename to src/jemalloc.c diff --git a/jemalloc/src/mb.c b/src/mb.c similarity index 100% rename from jemalloc/src/mb.c rename to src/mb.c diff --git a/jemalloc/src/mutex.c b/src/mutex.c similarity index 100% rename from jemalloc/src/mutex.c rename to src/mutex.c diff --git a/jemalloc/src/prof.c b/src/prof.c similarity index 100% rename from jemalloc/src/prof.c rename to src/prof.c diff --git a/jemalloc/src/rtree.c b/src/rtree.c similarity index 100% rename from jemalloc/src/rtree.c rename to src/rtree.c diff --git a/jemalloc/src/stats.c b/src/stats.c similarity index 100% rename from jemalloc/src/stats.c rename to src/stats.c diff --git a/jemalloc/src/tcache.c b/src/tcache.c similarity index 100% rename from jemalloc/src/tcache.c rename to src/tcache.c diff --git a/jemalloc/src/zone.c b/src/zone.c similarity index 100% rename from jemalloc/src/zone.c rename to src/zone.c diff --git a/jemalloc/test/allocated.c b/test/allocated.c similarity index 100% rename from jemalloc/test/allocated.c rename to test/allocated.c diff --git a/jemalloc/test/allocated.exp b/test/allocated.exp similarity index 100% rename from jemalloc/test/allocated.exp rename to test/allocated.exp diff --git a/jemalloc/test/allocm.c b/test/allocm.c similarity index 100% rename from jemalloc/test/allocm.c rename to test/allocm.c diff --git a/jemalloc/test/allocm.exp b/test/allocm.exp similarity index 100% rename from jemalloc/test/allocm.exp rename to test/allocm.exp diff --git a/jemalloc/test/bitmap.c b/test/bitmap.c similarity index 100% rename from jemalloc/test/bitmap.c rename to test/bitmap.c diff --git a/jemalloc/test/bitmap.exp b/test/bitmap.exp similarity index 100% rename from jemalloc/test/bitmap.exp rename to test/bitmap.exp diff --git a/jemalloc/test/jemalloc_test.h.in b/test/jemalloc_test.h.in similarity index 100% rename from jemalloc/test/jemalloc_test.h.in rename to test/jemalloc_test.h.in diff --git a/jemalloc/test/mremap.c b/test/mremap.c similarity index 100% rename from jemalloc/test/mremap.c rename to test/mremap.c diff --git a/jemalloc/test/mremap.exp b/test/mremap.exp similarity index 100% rename from jemalloc/test/mremap.exp rename to test/mremap.exp diff --git a/jemalloc/test/posix_memalign.c b/test/posix_memalign.c similarity index 100% rename from jemalloc/test/posix_memalign.c rename to test/posix_memalign.c diff --git a/jemalloc/test/posix_memalign.exp b/test/posix_memalign.exp similarity index 100% rename from jemalloc/test/posix_memalign.exp rename to test/posix_memalign.exp diff --git a/jemalloc/test/rallocm.c b/test/rallocm.c similarity index 100% rename from jemalloc/test/rallocm.c rename to test/rallocm.c diff --git a/jemalloc/test/rallocm.exp b/test/rallocm.exp similarity index 100% rename from jemalloc/test/rallocm.exp rename to test/rallocm.exp diff --git a/jemalloc/test/thread_arena.c b/test/thread_arena.c similarity index 100% rename from jemalloc/test/thread_arena.c rename to test/thread_arena.c diff --git a/jemalloc/test/thread_arena.exp b/test/thread_arena.exp similarity index 100% rename from jemalloc/test/thread_arena.exp rename to test/thread_arena.exp From 955851f3849e3fb57541bef37e241caf5aa7692c Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 31 Mar 2011 22:38:51 -0700 Subject: [PATCH 0008/3378] Adjust repo path dependencies. Update .gitignore and configure.ac to deal with the recent directory restructuring. --- .gitignore | 46 +++++++++++++++++++++++----------------------- configure.ac | 2 +- 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/.gitignore b/.gitignore index d6fa8fd1..32b4c424 100644 --- a/.gitignore +++ b/.gitignore @@ -1,23 +1,23 @@ -/jemalloc/autom4te.cache/ -/jemalloc/config.stamp -/jemalloc/config.log -/jemalloc/config.status -/jemalloc/configure -/jemalloc/doc/html.xsl -/jemalloc/doc/manpages.xsl -/jemalloc/doc/jemalloc.xml -/jemalloc/doc/jemalloc.html -/jemalloc/doc/jemalloc.3 -/jemalloc/lib/ -/jemalloc/Makefile -/jemalloc/include/jemalloc/internal/jemalloc_internal\.h -/jemalloc/include/jemalloc/jemalloc\.h -/jemalloc/include/jemalloc/jemalloc_defs\.h -/jemalloc/test/jemalloc_test\.h -/jemalloc/src/*.[od] -/jemalloc/test/*.[od] -/jemalloc/test/*.out -/jemalloc/test/[a-z]* -!/jemalloc/test/*.c -!/jemalloc/test/*.exp -/jemalloc/VERSION +/autom4te.cache/ +/config.stamp +/config.log +/config.status +/configure +/doc/html.xsl +/doc/manpages.xsl +/doc/jemalloc.xml +/doc/jemalloc.html +/doc/jemalloc.3 +/lib/ +/Makefile +/include/jemalloc/internal/jemalloc_internal\.h +/include/jemalloc/jemalloc\.h +/include/jemalloc/jemalloc_defs\.h +/test/jemalloc_test\.h +/src/*.[od] +/test/*.[od] +/test/*.out +/test/[a-z]* +!test/*.c +!test/*.exp +/VERSION diff --git a/configure.ac b/configure.ac index 412d3d1b..e317d0a2 100644 --- a/configure.ac +++ b/configure.ac @@ -688,7 +688,7 @@ dnl jemalloc configuration. dnl dnl Set VERSION if source directory has an embedded git repository. -if test -d "${srcroot}../.git" ; then +if test -d "${srcroot}.git" ; then git describe --long --abbrev=40 > ${srcroot}VERSION fi jemalloc_version=`cat ${srcroot}VERSION` From 408ade654580ce2456b990cc7760f8554805111d Mon Sep 17 00:00:00 2001 From: Nathan McSween Date: Thu, 28 Apr 2011 19:43:06 +0000 Subject: [PATCH 0009/3378] Makefile.in - test/allocated requires pthread --- Makefile.in | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile.in b/Makefile.in index 26da0e28..de7492f9 100644 --- a/Makefile.in +++ b/Makefile.in @@ -136,9 +136,9 @@ doc: $(DOCS) @objroot@lib/libjemalloc@install_suffix@.$(SO) @mkdir -p $(@D) ifneq (@RPATH@, ) - $(CC) -o $@ $< @RPATH@@objroot@lib -L@objroot@lib -ljemalloc@install_suffix@ + $(CC) -o $@ $< @RPATH@@objroot@lib -L@objroot@lib -ljemalloc@install_suffix@ -lpthread else - $(CC) -o $@ $< -L@objroot@lib -ljemalloc@install_suffix@ + $(CC) -o $@ $< -L@objroot@lib -ljemalloc@install_suffix@ -lpthread endif install_bin: From f0b22cf932b6fa7be2027a1058802fab0d6e25c6 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sun, 22 May 2011 10:49:44 -0700 Subject: [PATCH 0010/3378] Use LLU suffix for all 64-bit constants. Add the LLU suffix for all 0x... 64-bit constants. Reported by Jakob Blomer. --- include/jemalloc/internal/hash.h | 2 +- src/ckh.c | 2 +- src/prof.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/jemalloc/internal/hash.h b/include/jemalloc/internal/hash.h index 93905bf8..8a46ce30 100644 --- a/include/jemalloc/internal/hash.h +++ b/include/jemalloc/internal/hash.h @@ -26,7 +26,7 @@ uint64_t hash(const void *key, size_t len, uint64_t seed); JEMALLOC_INLINE uint64_t hash(const void *key, size_t len, uint64_t seed) { - const uint64_t m = 0xc6a4a7935bd1e995; + const uint64_t m = 0xc6a4a7935bd1e995LLU; const int r = 47; uint64_t h = seed ^ (len * m); const uint64_t *data = (const uint64_t *)key; diff --git a/src/ckh.c b/src/ckh.c index 143b5b5f..43fcc252 100644 --- a/src/ckh.c +++ b/src/ckh.c @@ -556,7 +556,7 @@ ckh_string_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2) } else { ret1 = h; ret2 = hash(key, strlen((const char *)key), - 0x8432a476666bbc13U); + 0x8432a476666bbc13LLU); } *hash1 = ret1; diff --git a/src/prof.c b/src/prof.c index 8370042b..65493759 100644 --- a/src/prof.c +++ b/src/prof.c @@ -1072,7 +1072,7 @@ prof_bt_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2) } else { ret1 = h; ret2 = hash(bt->vec, bt->len * sizeof(void *), - 0x8432a476666bbc13U); + 0x8432a476666bbc13LLU); } *hash1 = ret1; From f9a8edbb50f8cbcaf8ed62b36e8d7191ed223d2a Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sun, 12 Jun 2011 16:46:03 -0700 Subject: [PATCH 0011/3378] Fix assertions in arena_purge(). Fix assertions in arena_purge() to accurately reflect the constraints in arena_maybe_purge(). There were two bugs here, one of which merely weakened the assertion, and the other of which referred to an uninitialized variable (typo; used npurgatory instead of arena->npurgatory). --- src/arena.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/arena.c b/src/arena.c index 9aaf47ff..e00dccc3 100644 --- a/src/arena.c +++ b/src/arena.c @@ -869,9 +869,9 @@ arena_purge(arena_t *arena, bool all) assert(ndirty == arena->ndirty); #endif assert(arena->ndirty > arena->npurgatory || all); - assert(arena->ndirty > chunk_npages || all); + assert(arena->ndirty - arena->npurgatory > chunk_npages || all); assert((arena->nactive >> opt_lg_dirty_mult) < (arena->ndirty - - npurgatory) || all); + arena->npurgatory) || all); #ifdef JEMALLOC_STATS arena->stats.npurge++; From 746e77a06bf1089d50fbd64e1759b96c1cfbd9f5 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 30 Jul 2011 16:40:52 -0700 Subject: [PATCH 0012/3378] Add the --with-private-namespace option. Add the --with-private-namespace option to make it possible to work around library-private symbols being exposed in static libraries. --- INSTALL | 6 + configure.ac | 19 +- .../jemalloc/internal/jemalloc_internal.h.in | 2 + include/jemalloc/internal/private_namespace.h | 196 ++++++++++++++++++ include/jemalloc/jemalloc_defs.h.in | 9 + 5 files changed, 228 insertions(+), 4 deletions(-) create mode 100644 include/jemalloc/internal/private_namespace.h diff --git a/INSTALL b/INSTALL index 11a457ae..2a1e469c 100644 --- a/INSTALL +++ b/INSTALL @@ -42,6 +42,12 @@ any of the following arguments (not a definitive list) to 'configure': jemalloc overlays the default malloc zone, but makes no attempt to actually replace the "malloc", "calloc", etc. symbols. +--with-private-namespace= + Prefix all library-private APIs with . For shared libraries, + symbol visibility mechanisms prevent these symbols from being exported, but + for static libraries, naming collisions are a real possibility. By + default, the prefix is "" (empty string). + --with-install-suffix= Append to the base name of all installed files, such that multiple versions of jemalloc can coexist in the same installation directory. For diff --git a/configure.ac b/configure.ac index e317d0a2..b58aa520 100644 --- a/configure.ac +++ b/configure.ac @@ -292,13 +292,22 @@ if test "x$JEMALLOC_PREFIX" != "x" ; then JEMALLOC_CPREFIX=`echo ${JEMALLOC_PREFIX} | tr "a-z" "A-Z"` AC_DEFINE_UNQUOTED([JEMALLOC_PREFIX], ["$JEMALLOC_PREFIX"]) AC_DEFINE_UNQUOTED([JEMALLOC_CPREFIX], ["$JEMALLOC_CPREFIX"]) - jemalloc_prefix="$JEMALLOC_PREFIX" - jemalloc_cprefix="$JEMALLOC_CPREFIX" - AC_SUBST([jemalloc_prefix]) - AC_SUBST([jemalloc_cprefix]) AC_DEFINE_UNQUOTED([JEMALLOC_P(string_that_no_one_should_want_to_use_as_a_jemalloc_API_prefix)], [${JEMALLOC_PREFIX}##string_that_no_one_should_want_to_use_as_a_jemalloc_API_prefix]) fi +dnl Do not mangle library-private APIs by default. +AC_ARG_WITH([private_namespace], + [AS_HELP_STRING([--with-private-namespace=], [Prefix to prepend to all library-private APIs])], + [JEMALLOC_PRIVATE_NAMESPACE="$with_private_namespace"], + [JEMALLOC_PRIVATE_NAMESPACE=""] +) +AC_DEFINE_UNQUOTED([JEMALLOC_PRIVATE_NAMESPACE], ["$JEMALLOC_PRIVATE_NAMESPACE"]) +if test "x$JEMALLOC_PRIVATE_NAMESPACE" != "x" ; then + AC_DEFINE_UNQUOTED([JEMALLOC_N(string_that_no_one_should_want_to_use_as_a_jemalloc_private_namespace_prefix)], [${JEMALLOC_PRIVATE_NAMESPACE}##string_that_no_one_should_want_to_use_as_a_jemalloc_private_namespace_prefix]) +else + AC_DEFINE_UNQUOTED([JEMALLOC_N(string_that_no_one_should_want_to_use_as_a_jemalloc_private_namespace_prefix)], [string_that_no_one_should_want_to_use_as_a_jemalloc_private_namespace_prefix]) +fi + dnl Do not add suffix to installed files by default. AC_ARG_WITH([install_suffix], [AS_HELP_STRING([--with-install-suffix=], [Suffix to append to all installed files])], @@ -905,6 +914,8 @@ AC_MSG_RESULT([objroot : ${objroot}]) AC_MSG_RESULT([abs_objroot : ${abs_objroot}]) AC_MSG_RESULT([]) AC_MSG_RESULT([JEMALLOC_PREFIX : ${JEMALLOC_PREFIX}]) +AC_MSG_RESULT([JEMALLOC_PRIVATE_NAMESPACE]) +AC_MSG_RESULT([ : ${JEMALLOC_PRIVATE_NAMESPACE}]) AC_MSG_RESULT([install_suffix : ${install_suffix}]) AC_MSG_RESULT([autogen : ${enable_autogen}]) AC_MSG_RESULT([cc-silence : ${enable_cc_silence}]) diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 254adb6d..e9d60da7 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -33,6 +33,8 @@ #define JEMALLOC_MANGLE #include "../jemalloc@install_suffix@.h" +#include "private_namespace.h" + #if (defined(JEMALLOC_OSATOMIC) || defined(JEMALLOC_OSSPIN)) #include #endif diff --git a/include/jemalloc/internal/private_namespace.h b/include/jemalloc/internal/private_namespace.h new file mode 100644 index 00000000..2cfb1716 --- /dev/null +++ b/include/jemalloc/internal/private_namespace.h @@ -0,0 +1,196 @@ +#define arena_bin_index JEMALLOC_N(arena_bin_index) +#define arena_boot JEMALLOC_N(arena_boot) +#define arena_dalloc JEMALLOC_N(arena_dalloc) +#define arena_dalloc_bin JEMALLOC_N(arena_dalloc_bin) +#define arena_dalloc_large JEMALLOC_N(arena_dalloc_large) +#define arena_malloc JEMALLOC_N(arena_malloc) +#define arena_malloc_large JEMALLOC_N(arena_malloc_large) +#define arena_malloc_small JEMALLOC_N(arena_malloc_small) +#define arena_new JEMALLOC_N(arena_new) +#define arena_palloc JEMALLOC_N(arena_palloc) +#define arena_prof_accum JEMALLOC_N(arena_prof_accum) +#define arena_prof_ctx_get JEMALLOC_N(arena_prof_ctx_get) +#define arena_prof_ctx_set JEMALLOC_N(arena_prof_ctx_set) +#define arena_prof_promoted JEMALLOC_N(arena_prof_promoted) +#define arena_purge_all JEMALLOC_N(arena_purge_all) +#define arena_ralloc JEMALLOC_N(arena_ralloc) +#define arena_ralloc_no_move JEMALLOC_N(arena_ralloc_no_move) +#define arena_run_regind JEMALLOC_N(arena_run_regind) +#define arena_salloc JEMALLOC_N(arena_salloc) +#define arena_salloc_demote JEMALLOC_N(arena_salloc_demote) +#define arena_stats_merge JEMALLOC_N(arena_stats_merge) +#define arena_tcache_fill_small JEMALLOC_N(arena_tcache_fill_small) +#define arenas_bin_i_index JEMALLOC_N(arenas_bin_i_index) +#define arenas_extend JEMALLOC_N(arenas_extend) +#define arenas_lrun_i_index JEMALLOC_N(arenas_lrun_i_index) +#define atomic_add_uint32 JEMALLOC_N(atomic_add_uint32) +#define atomic_add_uint64 JEMALLOC_N(atomic_add_uint64) +#define atomic_sub_uint32 JEMALLOC_N(atomic_sub_uint32) +#define atomic_sub_uint64 JEMALLOC_N(atomic_sub_uint64) +#define base_alloc JEMALLOC_N(base_alloc) +#define base_boot JEMALLOC_N(base_boot) +#define base_node_alloc JEMALLOC_N(base_node_alloc) +#define base_node_dealloc JEMALLOC_N(base_node_dealloc) +#define bitmap_full JEMALLOC_N(bitmap_full) +#define bitmap_get JEMALLOC_N(bitmap_get) +#define bitmap_info_init JEMALLOC_N(bitmap_info_init) +#define bitmap_info_ngroups JEMALLOC_N(bitmap_info_ngroups) +#define bitmap_init JEMALLOC_N(bitmap_init) +#define bitmap_set JEMALLOC_N(bitmap_set) +#define bitmap_sfu JEMALLOC_N(bitmap_sfu) +#define bitmap_size JEMALLOC_N(bitmap_size) +#define bitmap_unset JEMALLOC_N(bitmap_unset) +#define bt_init JEMALLOC_N(bt_init) +#define buferror JEMALLOC_N(buferror) +#define choose_arena JEMALLOC_N(choose_arena) +#define choose_arena_hard JEMALLOC_N(choose_arena_hard) +#define chunk_alloc JEMALLOC_N(chunk_alloc) +#define chunk_alloc_dss JEMALLOC_N(chunk_alloc_dss) +#define chunk_alloc_mmap JEMALLOC_N(chunk_alloc_mmap) +#define chunk_alloc_mmap_noreserve JEMALLOC_N(chunk_alloc_mmap_noreserve) +#define chunk_alloc_swap JEMALLOC_N(chunk_alloc_swap) +#define chunk_boot JEMALLOC_N(chunk_boot) +#define chunk_dealloc JEMALLOC_N(chunk_dealloc) +#define chunk_dealloc_dss JEMALLOC_N(chunk_dealloc_dss) +#define chunk_dealloc_mmap JEMALLOC_N(chunk_dealloc_mmap) +#define chunk_dealloc_swap JEMALLOC_N(chunk_dealloc_swap) +#define chunk_dss_boot JEMALLOC_N(chunk_dss_boot) +#define chunk_in_dss JEMALLOC_N(chunk_in_dss) +#define chunk_in_swap JEMALLOC_N(chunk_in_swap) +#define chunk_mmap_boot JEMALLOC_N(chunk_mmap_boot) +#define chunk_swap_boot JEMALLOC_N(chunk_swap_boot) +#define chunk_swap_enable JEMALLOC_N(chunk_swap_enable) +#define ckh_bucket_search JEMALLOC_N(ckh_bucket_search) +#define ckh_count JEMALLOC_N(ckh_count) +#define ckh_delete JEMALLOC_N(ckh_delete) +#define ckh_evict_reloc_insert JEMALLOC_N(ckh_evict_reloc_insert) +#define ckh_insert JEMALLOC_N(ckh_insert) +#define ckh_isearch JEMALLOC_N(ckh_isearch) +#define ckh_iter JEMALLOC_N(ckh_iter) +#define ckh_new JEMALLOC_N(ckh_new) +#define ckh_pointer_hash JEMALLOC_N(ckh_pointer_hash) +#define ckh_pointer_keycomp JEMALLOC_N(ckh_pointer_keycomp) +#define ckh_rebuild JEMALLOC_N(ckh_rebuild) +#define ckh_remove JEMALLOC_N(ckh_remove) +#define ckh_search JEMALLOC_N(ckh_search) +#define ckh_string_hash JEMALLOC_N(ckh_string_hash) +#define ckh_string_keycomp JEMALLOC_N(ckh_string_keycomp) +#define ckh_try_bucket_insert JEMALLOC_N(ckh_try_bucket_insert) +#define ckh_try_insert JEMALLOC_N(ckh_try_insert) +#define create_zone JEMALLOC_N(create_zone) +#define ctl_boot JEMALLOC_N(ctl_boot) +#define ctl_bymib JEMALLOC_N(ctl_bymib) +#define ctl_byname JEMALLOC_N(ctl_byname) +#define ctl_nametomib JEMALLOC_N(ctl_nametomib) +#define extent_tree_ad_first JEMALLOC_N(extent_tree_ad_first) +#define extent_tree_ad_insert JEMALLOC_N(extent_tree_ad_insert) +#define extent_tree_ad_iter JEMALLOC_N(extent_tree_ad_iter) +#define extent_tree_ad_iter_recurse JEMALLOC_N(extent_tree_ad_iter_recurse) +#define extent_tree_ad_iter_start JEMALLOC_N(extent_tree_ad_iter_start) +#define extent_tree_ad_last JEMALLOC_N(extent_tree_ad_last) +#define extent_tree_ad_new JEMALLOC_N(extent_tree_ad_new) +#define extent_tree_ad_next JEMALLOC_N(extent_tree_ad_next) +#define extent_tree_ad_nsearch JEMALLOC_N(extent_tree_ad_nsearch) +#define extent_tree_ad_prev JEMALLOC_N(extent_tree_ad_prev) +#define extent_tree_ad_psearch JEMALLOC_N(extent_tree_ad_psearch) +#define extent_tree_ad_remove JEMALLOC_N(extent_tree_ad_remove) +#define extent_tree_ad_reverse_iter JEMALLOC_N(extent_tree_ad_reverse_iter) +#define extent_tree_ad_reverse_iter_recurse JEMALLOC_N(extent_tree_ad_reverse_iter_recurse) +#define extent_tree_ad_reverse_iter_start JEMALLOC_N(extent_tree_ad_reverse_iter_start) +#define extent_tree_ad_search JEMALLOC_N(extent_tree_ad_search) +#define extent_tree_szad_first JEMALLOC_N(extent_tree_szad_first) +#define extent_tree_szad_insert JEMALLOC_N(extent_tree_szad_insert) +#define extent_tree_szad_iter JEMALLOC_N(extent_tree_szad_iter) +#define extent_tree_szad_iter_recurse JEMALLOC_N(extent_tree_szad_iter_recurse) +#define extent_tree_szad_iter_start JEMALLOC_N(extent_tree_szad_iter_start) +#define extent_tree_szad_last JEMALLOC_N(extent_tree_szad_last) +#define extent_tree_szad_new JEMALLOC_N(extent_tree_szad_new) +#define extent_tree_szad_next JEMALLOC_N(extent_tree_szad_next) +#define extent_tree_szad_nsearch JEMALLOC_N(extent_tree_szad_nsearch) +#define extent_tree_szad_prev JEMALLOC_N(extent_tree_szad_prev) +#define extent_tree_szad_psearch JEMALLOC_N(extent_tree_szad_psearch) +#define extent_tree_szad_remove JEMALLOC_N(extent_tree_szad_remove) +#define extent_tree_szad_reverse_iter JEMALLOC_N(extent_tree_szad_reverse_iter) +#define extent_tree_szad_reverse_iter_recurse JEMALLOC_N(extent_tree_szad_reverse_iter_recurse) +#define extent_tree_szad_reverse_iter_start JEMALLOC_N(extent_tree_szad_reverse_iter_start) +#define extent_tree_szad_search JEMALLOC_N(extent_tree_szad_search) +#define hash JEMALLOC_N(hash) +#define huge_boot JEMALLOC_N(huge_boot) +#define huge_dalloc JEMALLOC_N(huge_dalloc) +#define huge_malloc JEMALLOC_N(huge_malloc) +#define huge_palloc JEMALLOC_N(huge_palloc) +#define huge_prof_ctx_get JEMALLOC_N(huge_prof_ctx_get) +#define huge_prof_ctx_set JEMALLOC_N(huge_prof_ctx_set) +#define huge_ralloc JEMALLOC_N(huge_ralloc) +#define huge_ralloc_no_move JEMALLOC_N(huge_ralloc_no_move) +#define huge_salloc JEMALLOC_N(huge_salloc) +#define iallocm JEMALLOC_N(iallocm) +#define icalloc JEMALLOC_N(icalloc) +#define idalloc JEMALLOC_N(idalloc) +#define imalloc JEMALLOC_N(imalloc) +#define ipalloc JEMALLOC_N(ipalloc) +#define iralloc JEMALLOC_N(iralloc) +#define isalloc JEMALLOC_N(isalloc) +#define ivsalloc JEMALLOC_N(ivsalloc) +#define jemalloc_darwin_init JEMALLOC_N(jemalloc_darwin_init) +#define jemalloc_postfork JEMALLOC_N(jemalloc_postfork) +#define jemalloc_prefork JEMALLOC_N(jemalloc_prefork) +#define malloc_cprintf JEMALLOC_N(malloc_cprintf) +#define malloc_mutex_destroy JEMALLOC_N(malloc_mutex_destroy) +#define malloc_mutex_init JEMALLOC_N(malloc_mutex_init) +#define malloc_mutex_lock JEMALLOC_N(malloc_mutex_lock) +#define malloc_mutex_trylock JEMALLOC_N(malloc_mutex_trylock) +#define malloc_mutex_unlock JEMALLOC_N(malloc_mutex_unlock) +#define malloc_printf JEMALLOC_N(malloc_printf) +#define malloc_write JEMALLOC_N(malloc_write) +#define mb_write JEMALLOC_N(mb_write) +#define pow2_ceil JEMALLOC_N(pow2_ceil) +#define prof_alloc_prep JEMALLOC_N(prof_alloc_prep) +#define prof_backtrace JEMALLOC_N(prof_backtrace) +#define prof_boot0 JEMALLOC_N(prof_boot0) +#define prof_boot1 JEMALLOC_N(prof_boot1) +#define prof_boot2 JEMALLOC_N(prof_boot2) +#define prof_ctx_get JEMALLOC_N(prof_ctx_get) +#define prof_ctx_set JEMALLOC_N(prof_ctx_set) +#define prof_free JEMALLOC_N(prof_free) +#define prof_gdump JEMALLOC_N(prof_gdump) +#define prof_idump JEMALLOC_N(prof_idump) +#define prof_lookup JEMALLOC_N(prof_lookup) +#define prof_malloc JEMALLOC_N(prof_malloc) +#define prof_mdump JEMALLOC_N(prof_mdump) +#define prof_realloc JEMALLOC_N(prof_realloc) +#define prof_sample_accum_update JEMALLOC_N(prof_sample_accum_update) +#define prof_sample_threshold_update JEMALLOC_N(prof_sample_threshold_update) +#define prof_tdata_init JEMALLOC_N(prof_tdata_init) +#define pthread_create JEMALLOC_N(pthread_create) +#define rtree_get JEMALLOC_N(rtree_get) +#define rtree_get_locked JEMALLOC_N(rtree_get_locked) +#define rtree_new JEMALLOC_N(rtree_new) +#define rtree_set JEMALLOC_N(rtree_set) +#define s2u JEMALLOC_N(s2u) +#define sa2u JEMALLOC_N(sa2u) +#define stats_arenas_i_bins_j_index JEMALLOC_N(stats_arenas_i_bins_j_index) +#define stats_arenas_i_index JEMALLOC_N(stats_arenas_i_index) +#define stats_arenas_i_lruns_j_index JEMALLOC_N(stats_arenas_i_lruns_j_index) +#define stats_cactive_add JEMALLOC_N(stats_cactive_add) +#define stats_cactive_get JEMALLOC_N(stats_cactive_get) +#define stats_cactive_sub JEMALLOC_N(stats_cactive_sub) +#define stats_print JEMALLOC_N(stats_print) +#define szone2ozone JEMALLOC_N(szone2ozone) +#define tcache_alloc_easy JEMALLOC_N(tcache_alloc_easy) +#define tcache_alloc_large JEMALLOC_N(tcache_alloc_large) +#define tcache_alloc_small JEMALLOC_N(tcache_alloc_small) +#define tcache_alloc_small_hard JEMALLOC_N(tcache_alloc_small_hard) +#define tcache_bin_flush_large JEMALLOC_N(tcache_bin_flush_large) +#define tcache_bin_flush_small JEMALLOC_N(tcache_bin_flush_small) +#define tcache_boot JEMALLOC_N(tcache_boot) +#define tcache_create JEMALLOC_N(tcache_create) +#define tcache_dalloc_large JEMALLOC_N(tcache_dalloc_large) +#define tcache_dalloc_small JEMALLOC_N(tcache_dalloc_small) +#define tcache_destroy JEMALLOC_N(tcache_destroy) +#define tcache_event JEMALLOC_N(tcache_event) +#define tcache_get JEMALLOC_N(tcache_get) +#define tcache_stats_merge JEMALLOC_N(tcache_stats_merge) +#define thread_allocated_get JEMALLOC_N(thread_allocated_get) +#define thread_allocated_get_hard JEMALLOC_N(thread_allocated_get_hard) +#define u2s JEMALLOC_N(u2s) diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index d8c81d7c..9ac7e1c2 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -18,6 +18,15 @@ #undef JEMALLOC_P #endif +/* + * JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs. + * For shared libraries, symbol visibility mechanisms prevent these symbols + * from being exported, but for static libraries, naming collisions are a real + * possibility. + */ +#undef JEMALLOC_PRIVATE_NAMESPACE +#undef JEMALLOC_N + /* * Hyper-threaded CPUs may need a special instruction inside spin loops in * order to yield to another virtual CPU. From 4c48481e7c8f5e5dce55aa55d725e1a479b01224 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 30 Jul 2011 16:59:13 -0700 Subject: [PATCH 0013/3378] Update ChangeLog for 2.2.2. --- ChangeLog | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/ChangeLog b/ChangeLog index 7b262c95..fd9ee545 100644 --- a/ChangeLog +++ b/ChangeLog @@ -6,6 +6,14 @@ found in the git revision history: http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git git://canonware.com/jemalloc.git +* 2.2.2 (July 30, 2011) + + Bug fixes: + - Fix a build error for --disable-tcache. + - Fix assertions in arena_purge() (for real this time). + - Add the --with-private-namespace option. This is a workaround for symbol + conflicts that can inadvertently arise when using static libraries. + * 2.2.1 (March 30, 2011) Bug fixes: From 04ca1efe35349a6114523b37abbd4ca066cd17fa Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 30 Jul 2011 17:58:07 -0700 Subject: [PATCH 0014/3378] Adjust relative #include for private_namespace.h. --- include/jemalloc/internal/jemalloc_internal.h.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index e9d60da7..a44f0978 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -33,7 +33,7 @@ #define JEMALLOC_MANGLE #include "../jemalloc@install_suffix@.h" -#include "private_namespace.h" +#include "jemalloc/internal/private_namespace.h" #if (defined(JEMALLOC_OSATOMIC) || defined(JEMALLOC_OSSPIN)) #include From 41b954ed36f90e5a479bbe2118b5ce85189a55ee Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 8 Aug 2011 17:10:07 -0700 Subject: [PATCH 0015/3378] Use prof_tdata_cleanup() argument. Use the argument to prof_tdata_cleanup(), rather than calling PROF_TCACHE_GET(). This fixes a bug in the NO_TLS case. --- src/prof.c | 45 ++++++++++++++++++++------------------------- 1 file changed, 20 insertions(+), 25 deletions(-) diff --git a/src/prof.c b/src/prof.c index 65493759..a268e585 100644 --- a/src/prof.c +++ b/src/prof.c @@ -1109,7 +1109,6 @@ prof_tdata_init(void) prof_tdata->vec = imalloc(sizeof(void *) * prof_bt_max); if (prof_tdata->vec == NULL) { - ckh_delete(&prof_tdata->bt2cnt); idalloc(prof_tdata); return (NULL); @@ -1127,33 +1126,29 @@ prof_tdata_init(void) static void prof_tdata_cleanup(void *arg) { - prof_tdata_t *prof_tdata; + prof_thr_cnt_t *cnt; + prof_tdata_t *prof_tdata = (prof_tdata_t *)arg; - prof_tdata = PROF_TCACHE_GET(); - if (prof_tdata != NULL) { - prof_thr_cnt_t *cnt; + /* + * Delete the hash table. All of its contents can still be + * iterated over via the LRU. + */ + ckh_delete(&prof_tdata->bt2cnt); - /* - * Delete the hash table. All of its contents can still be - * iterated over via the LRU. - */ - ckh_delete(&prof_tdata->bt2cnt); - - /* - * Iteratively merge cnt's into the global stats and delete - * them. - */ - while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) { - prof_ctx_merge(cnt->ctx, cnt); - ql_remove(&prof_tdata->lru_ql, cnt, lru_link); - idalloc(cnt); - } - - idalloc(prof_tdata->vec); - - idalloc(prof_tdata); - PROF_TCACHE_SET(NULL); + /* + * Iteratively merge cnt's into the global stats and delete + * them. + */ + while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) { + prof_ctx_merge(cnt->ctx, cnt); + ql_remove(&prof_tdata->lru_ql, cnt, lru_link); + idalloc(cnt); } + + idalloc(prof_tdata->vec); + + idalloc(prof_tdata); + PROF_TCACHE_SET(NULL); } void From 0cdd42eb3204cdd2646561c90ec202716cd3c344 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 9 Aug 2011 19:06:06 -0700 Subject: [PATCH 0016/3378] Clean up prof-related comments. Clean up some prof-related comments to more accurately reflect how the code works. Simplify OOM handling code in a couple of prof-related error paths. --- src/prof.c | 39 ++++++++++++++++----------------------- 1 file changed, 16 insertions(+), 23 deletions(-) diff --git a/src/prof.c b/src/prof.c index a268e585..dea6d11c 100644 --- a/src/prof.c +++ b/src/prof.c @@ -503,11 +503,8 @@ prof_lookup(prof_bt_t *bt) /* Allocate and partially initialize a new cnt. */ ret.v = imalloc(sizeof(prof_thr_cnt_t)); if (ret.p == NULL) { - if (new_ctx) { - malloc_mutex_lock(&ctx.p->lock); - ctx.p->cnt_merged.curobjs--; - malloc_mutex_unlock(&ctx.p->lock); - } + if (new_ctx) + prof_ctx_destroy(ctx.p); return (NULL); } ql_elm_new(ret.p, cnts_link); @@ -518,11 +515,8 @@ prof_lookup(prof_bt_t *bt) ret.p->epoch = 0; memset(&ret.p->cnts, 0, sizeof(prof_cnt_t)); if (ckh_insert(&prof_tdata->bt2cnt, btkey.v, ret.v)) { - if (new_ctx) { - malloc_mutex_lock(&ctx.p->lock); - ctx.p->cnt_merged.curobjs--; - malloc_mutex_unlock(&ctx.p->lock); - } + if (new_ctx) + prof_ctx_destroy(ctx.p); idalloc(ret.v); return (NULL); } @@ -644,11 +638,10 @@ prof_ctx_destroy(prof_ctx_t *ctx) /* * Check that ctx is still unused by any thread cache before destroying - * it. prof_lookup() interlocks bt2ctx_mtx and ctx->lock in order to - * avoid a race condition with this function, and prof_ctx_merge() - * artificially raises ctx->cnt_merged.curobjs in order to avoid a race - * between the main body of prof_ctx_merge() and entry into this - * function. + * it. prof_lookup() artificially raises ctx->cnt_merge.curobjs in + * order to avoid a race condition with this function, as does + * prof_ctx_merge() in order to avoid a race between the main body of + * prof_ctx_merge() and entry into this function. */ prof_enter(); malloc_mutex_lock(&ctx->lock); @@ -665,7 +658,10 @@ prof_ctx_destroy(prof_ctx_t *ctx) malloc_mutex_destroy(&ctx->lock); idalloc(ctx); } else { - /* Compensate for increment in prof_ctx_merge(). */ + /* + * Compensate for increment in prof_ctx_merge() or + * prof_lookup(). + */ ctx->cnt_merged.curobjs--; malloc_mutex_unlock(&ctx->lock); prof_leave(); @@ -1130,18 +1126,15 @@ prof_tdata_cleanup(void *arg) prof_tdata_t *prof_tdata = (prof_tdata_t *)arg; /* - * Delete the hash table. All of its contents can still be - * iterated over via the LRU. + * Delete the hash table. All of its contents can still be iterated + * over via the LRU. */ ckh_delete(&prof_tdata->bt2cnt); - /* - * Iteratively merge cnt's into the global stats and delete - * them. - */ + /* Iteratively merge cnt's into the global stats and delete them. */ while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) { - prof_ctx_merge(cnt->ctx, cnt); ql_remove(&prof_tdata->lru_ql, cnt, lru_link); + prof_ctx_merge(cnt->ctx, cnt); idalloc(cnt); } From 183ba50c1940a95080f6cf890ae4ae40200301e7 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 11 Aug 2011 22:51:00 -0700 Subject: [PATCH 0017/3378] Fix two prof-related bugs in rallocm(). Properly handle boundary conditions for sampled region promotion in rallocm(). Prior to this fix, some combinations of 'size' and 'extra' values could cause erroneous behavior. Additionally, size class recording for promoted regions was incorrect. --- src/arena.c | 1 + src/jemalloc.c | 13 ++++++++++--- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/arena.c b/src/arena.c index e00dccc3..e749c1d5 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1657,6 +1657,7 @@ arena_prof_promoted(const void *ptr, size_t size) assert(ptr != NULL); assert(CHUNK_ADDR2BASE(ptr) != ptr); assert(isalloc(ptr) == PAGE_SIZE); + assert(size <= small_maxclass); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; diff --git a/src/jemalloc.c b/src/jemalloc.c index e2875169..afba0e1d 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1670,15 +1670,22 @@ JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, size_t extra, old_ctx = prof_ctx_get(p); if ((cnt = prof_alloc_prep(max_usize)) == NULL) goto OOM; - if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && max_usize - <= small_maxclass) { + /* + * Use minimum usize to determine whether promotion may happen. + */ + if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U + && ((alignment == 0) ? s2u(size) : sa2u(size, + alignment, NULL)) <= small_maxclass) { q = iralloc(p, small_maxclass+1, (small_maxclass+1 >= size+extra) ? 0 : size+extra - (small_maxclass+1), alignment, zero, no_move); if (q == NULL) goto ERR; usize = isalloc(q); - arena_prof_promoted(q, usize); + if (max_usize < PAGE_SIZE) { + usize = max_usize; + arena_prof_promoted(q, usize); + } } else { q = iralloc(p, size, extra, alignment, zero, no_move); if (q == NULL) From b493ce22a4b545c17d5942d0fc65f413dd97743a Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 12 Aug 2011 11:28:47 -0700 Subject: [PATCH 0018/3378] Conditionalize an isalloc() call in rallocm(). Conditionalize an isalloc() call in rallocm() that be unnecessary. --- src/jemalloc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/jemalloc.c b/src/jemalloc.c index afba0e1d..4d10e90a 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1681,11 +1681,11 @@ JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, size_t extra, alignment, zero, no_move); if (q == NULL) goto ERR; - usize = isalloc(q); if (max_usize < PAGE_SIZE) { usize = max_usize; arena_prof_promoted(q, usize); - } + } else + usize = isalloc(q); } else { q = iralloc(p, size, extra, alignment, zero, no_move); if (q == NULL) From 745e30b157f700d848cb5ed38eb6c17203760b7c Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 12 Aug 2011 11:40:55 -0700 Subject: [PATCH 0019/3378] Document swap.fds mallctl as read-write. Fix the manual page to document the swap.fds mallctl as read-write, rather than read-only. --- doc/jemalloc.xml.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 13f3aae0..7a32879a 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -2025,7 +2025,7 @@ malloc_conf = "xmalloc:true";]]> swap.fds (int *) - r- + rw [] When written to, the files associated with the From a507004d294ad0c78b4d01559479620ebb272a49 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 12 Aug 2011 13:48:27 -0700 Subject: [PATCH 0020/3378] Fix off-by-one backtracing issues. Rewrite prof_alloc_prep() as a cpp macro, PROF_ALLOC_PREP(), in order to remove any doubt as to whether an additional stack frame is created. Prior to this change, it was assumed that inlining would reduce the total number of frames in the backtrace, but in practice behavior wasn't completely predictable. Create imemalign() and call it from posix_memalign(), memalign(), and valloc(), so that all entry points require the same number of stack frames to be ignored during backtracing. --- include/jemalloc/internal/private_namespace.h | 1 - include/jemalloc/internal/prof.h | 122 ++++++++---------- src/jemalloc.c | 49 +++++-- 3 files changed, 90 insertions(+), 82 deletions(-) diff --git a/include/jemalloc/internal/private_namespace.h b/include/jemalloc/internal/private_namespace.h index 2cfb1716..d4f5f96d 100644 --- a/include/jemalloc/internal/private_namespace.h +++ b/include/jemalloc/internal/private_namespace.h @@ -145,7 +145,6 @@ #define malloc_write JEMALLOC_N(malloc_write) #define mb_write JEMALLOC_N(mb_write) #define pow2_ceil JEMALLOC_N(pow2_ceil) -#define prof_alloc_prep JEMALLOC_N(prof_alloc_prep) #define prof_backtrace JEMALLOC_N(prof_backtrace) #define prof_boot0 JEMALLOC_N(prof_boot0) #define prof_boot1 JEMALLOC_N(prof_boot1) diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index f9438735..e9064ba6 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -227,9 +227,60 @@ bool prof_boot2(void); /******************************************************************************/ #ifdef JEMALLOC_H_INLINES +#define PROF_ALLOC_PREP(nignore, size, ret) do { \ + prof_tdata_t *prof_tdata; \ + prof_bt_t bt; \ + \ + assert(size == s2u(size)); \ + \ + prof_tdata = PROF_TCACHE_GET(); \ + if (prof_tdata == NULL) { \ + prof_tdata = prof_tdata_init(); \ + if (prof_tdata == NULL) { \ + ret = NULL; \ + break; \ + } \ + } \ + \ + if (opt_prof_active == false) { \ + /* Sampling is currently inactive, so avoid sampling. */\ + ret = (prof_thr_cnt_t *)(uintptr_t)1U; \ + } else if (opt_lg_prof_sample == 0) { \ + /* Don't bother with sampling logic, since sampling */\ + /* interval is 1. */\ + bt_init(&bt, prof_tdata->vec); \ + prof_backtrace(&bt, nignore, prof_bt_max); \ + ret = prof_lookup(&bt); \ + } else { \ + if (prof_tdata->threshold == 0) { \ + /* Initialize. Seed the prng differently for */\ + /* each thread. */\ + prof_tdata->prn_state = \ + (uint64_t)(uintptr_t)&size; \ + prof_sample_threshold_update(prof_tdata); \ + } \ + \ + /* Determine whether to capture a backtrace based on */\ + /* whether size is enough for prof_accum to reach */\ + /* prof_tdata->threshold. However, delay updating */\ + /* these variables until prof_{m,re}alloc(), because */\ + /* we don't know for sure that the allocation will */\ + /* succeed. */\ + /* */\ + /* Use subtraction rather than addition to avoid */\ + /* potential integer overflow. */\ + if (size >= prof_tdata->threshold - \ + prof_tdata->accum) { \ + bt_init(&bt, prof_tdata->vec); \ + prof_backtrace(&bt, nignore, prof_bt_max); \ + ret = prof_lookup(&bt); \ + } else \ + ret = (prof_thr_cnt_t *)(uintptr_t)1U; \ + } \ +} while (0) + #ifndef JEMALLOC_ENABLE_INLINE void prof_sample_threshold_update(prof_tdata_t *prof_tdata); -prof_thr_cnt_t *prof_alloc_prep(size_t size); prof_ctx_t *prof_ctx_get(const void *ptr); void prof_ctx_set(const void *ptr, prof_ctx_t *ctx); bool prof_sample_accum_update(size_t size); @@ -272,71 +323,6 @@ prof_sample_threshold_update(prof_tdata_t *prof_tdata) + (uint64_t)1U; } -JEMALLOC_INLINE prof_thr_cnt_t * -prof_alloc_prep(size_t size) -{ -#ifdef JEMALLOC_ENABLE_INLINE - /* This function does not have its own stack frame, because it is inlined. */ -# define NIGNORE 1 -#else -# define NIGNORE 2 -#endif - prof_thr_cnt_t *ret; - prof_tdata_t *prof_tdata; - prof_bt_t bt; - - assert(size == s2u(size)); - - prof_tdata = PROF_TCACHE_GET(); - if (prof_tdata == NULL) { - prof_tdata = prof_tdata_init(); - if (prof_tdata == NULL) - return (NULL); - } - - if (opt_prof_active == false) { - /* Sampling is currently inactive, so avoid sampling. */ - ret = (prof_thr_cnt_t *)(uintptr_t)1U; - } else if (opt_lg_prof_sample == 0) { - /* - * Don't bother with sampling logic, since sampling interval is - * 1. - */ - bt_init(&bt, prof_tdata->vec); - prof_backtrace(&bt, NIGNORE, prof_bt_max); - ret = prof_lookup(&bt); - } else { - if (prof_tdata->threshold == 0) { - /* - * Initialize. Seed the prng differently for each - * thread. - */ - prof_tdata->prn_state = (uint64_t)(uintptr_t)&size; - prof_sample_threshold_update(prof_tdata); - } - - /* - * Determine whether to capture a backtrace based on whether - * size is enough for prof_accum to reach - * prof_tdata->threshold. However, delay updating these - * variables until prof_{m,re}alloc(), because we don't know - * for sure that the allocation will succeed. - * - * Use subtraction rather than addition to avoid potential - * integer overflow. - */ - if (size >= prof_tdata->threshold - prof_tdata->accum) { - bt_init(&bt, prof_tdata->vec); - prof_backtrace(&bt, NIGNORE, prof_bt_max); - ret = prof_lookup(&bt); - } else - ret = (prof_thr_cnt_t *)(uintptr_t)1U; - } - - return (ret); -#undef NIGNORE -} - JEMALLOC_INLINE prof_ctx_t * prof_ctx_get(const void *ptr) { @@ -415,7 +401,7 @@ prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt) * always possible to tell in advance how large an * object's usable size will be, so there should never * be a difference between the size passed to - * prof_alloc_prep() and prof_malloc(). + * PROF_ALLOC_PREP() and prof_malloc(). */ assert((uintptr_t)cnt == (uintptr_t)1U); } @@ -459,7 +445,7 @@ prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt, if (prof_sample_accum_update(size)) { /* * Don't sample. The size passed to - * prof_alloc_prep() was larger than what + * PROF_ALLOC_PREP() was larger than what * actually got allocated, so a backtrace was * captured for this allocation, even though * its actual size was insufficient to cross diff --git a/src/jemalloc.c b/src/jemalloc.c index 4d10e90a..14a0c7c2 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -84,6 +84,7 @@ static void malloc_conf_error(const char *msg, const char *k, size_t klen, const char *v, size_t vlen); static void malloc_conf_init(void); static bool malloc_init_hard(void); +static int imemalign(void **memptr, size_t alignment, size_t size); /******************************************************************************/ /* malloc_message() setup. */ @@ -939,7 +940,8 @@ JEMALLOC_P(malloc)(size_t size) #ifdef JEMALLOC_PROF if (opt_prof) { usize = s2u(size); - if ((cnt = prof_alloc_prep(usize)) == NULL) { + PROF_ALLOC_PREP(1, usize, cnt); + if (cnt == NULL) { ret = NULL; goto OOM; } @@ -988,9 +990,15 @@ RETURN: } JEMALLOC_ATTR(nonnull(1)) -JEMALLOC_ATTR(visibility("default")) -int -JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size) +#ifdef JEMALLOC_PROF +/* + * Avoid any uncertainty as to how many backtrace frames to ignore in + * PROF_ALLOC_PREP(). + */ +JEMALLOC_ATTR(noinline) +#endif +static int +imemalign(void **memptr, size_t alignment, size_t size) { int ret; size_t usize @@ -1057,7 +1065,8 @@ JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size) #ifdef JEMALLOC_PROF if (opt_prof) { - if ((cnt = prof_alloc_prep(usize)) == NULL) { + PROF_ALLOC_PREP(2, usize, cnt); + if (cnt == NULL) { result = NULL; ret = EINVAL; } else { @@ -1110,6 +1119,15 @@ RETURN: return (ret); } +JEMALLOC_ATTR(nonnull(1)) +JEMALLOC_ATTR(visibility("default")) +int +JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size) +{ + + return imemalign(memptr, alignment, size); +} + JEMALLOC_ATTR(malloc) JEMALLOC_ATTR(visibility("default")) void * @@ -1165,7 +1183,8 @@ JEMALLOC_P(calloc)(size_t num, size_t size) #ifdef JEMALLOC_PROF if (opt_prof) { usize = s2u(num_size); - if ((cnt = prof_alloc_prep(usize)) == NULL) { + PROF_ALLOC_PREP(1, usize, cnt); + if (cnt == NULL) { ret = NULL; goto RETURN; } @@ -1278,7 +1297,8 @@ JEMALLOC_P(realloc)(void *ptr, size_t size) if (opt_prof) { usize = s2u(size); old_ctx = prof_ctx_get(ptr); - if ((cnt = prof_alloc_prep(usize)) == NULL) { + PROF_ALLOC_PREP(1, usize, cnt); + if (cnt == NULL) { ret = NULL; goto OOM; } @@ -1327,7 +1347,8 @@ OOM: #ifdef JEMALLOC_PROF if (opt_prof) { usize = s2u(size); - if ((cnt = prof_alloc_prep(usize)) == NULL) + PROF_ALLOC_PREP(1, usize, cnt); + if (cnt == NULL) ret = NULL; else { if (prof_promote && (uintptr_t)cnt != @@ -1432,7 +1453,7 @@ JEMALLOC_P(memalign)(size_t alignment, size_t size) #ifdef JEMALLOC_CC_SILENCE int result = #endif - JEMALLOC_P(posix_memalign)(&ret, alignment, size); + imemalign(&ret, alignment, size); #ifdef JEMALLOC_CC_SILENCE if (result != 0) return (NULL); @@ -1451,7 +1472,7 @@ JEMALLOC_P(valloc)(size_t size) #ifdef JEMALLOC_CC_SILENCE int result = #endif - JEMALLOC_P(posix_memalign)(&ret, PAGE_SIZE, size); + imemalign(&ret, PAGE_SIZE, size); #ifdef JEMALLOC_CC_SILENCE if (result != 0) return (NULL); @@ -1573,7 +1594,8 @@ JEMALLOC_P(allocm)(void **ptr, size_t *rsize, size_t size, int flags) #ifdef JEMALLOC_PROF if (opt_prof) { - if ((cnt = prof_alloc_prep(usize)) == NULL) + PROF_ALLOC_PREP(1, usize, cnt); + if (cnt == NULL) goto OOM; if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <= small_maxclass) { @@ -1660,7 +1682,7 @@ JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, size_t extra, /* * usize isn't knowable before iralloc() returns when extra is * non-zero. Therefore, compute its maximum possible value and - * use that in prof_alloc_prep() to decide whether to capture a + * use that in PROF_ALLOC_PREP() to decide whether to capture a * backtrace. prof_realloc() will use the actual usize to * decide whether to sample. */ @@ -1668,7 +1690,8 @@ JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, size_t extra, sa2u(size+extra, alignment, NULL); old_size = isalloc(p); old_ctx = prof_ctx_get(p); - if ((cnt = prof_alloc_prep(max_usize)) == NULL) + PROF_ALLOC_PREP(1, max_usize, cnt); + if (cnt == NULL) goto OOM; /* * Use minimum usize to determine whether promotion may happen. From 749c2a0ab62089891d8e40840fbf384f12cb8401 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 12 Aug 2011 18:37:54 -0700 Subject: [PATCH 0021/3378] Add missing prof_malloc() call in allocm(). Add a missing prof_malloc() call in allocm(). Before this fix, negative object/byte counts could be observed in heap profiles for applications that use allocm(). --- src/jemalloc.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/jemalloc.c b/src/jemalloc.c index 14a0c7c2..b13b1bf9 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1587,8 +1587,7 @@ JEMALLOC_P(allocm)(void **ptr, size_t *rsize, size_t size, int flags) if (malloc_init()) goto OOM; - usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment, - NULL); + usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment, NULL); if (usize == 0) goto OOM; @@ -1612,7 +1611,7 @@ JEMALLOC_P(allocm)(void **ptr, size_t *rsize, size_t size, int flags) if (p == NULL) goto OOM; } - + prof_malloc(p, usize, cnt); if (rsize != NULL) *rsize = usize; } else From 46405e670f9b4831da9c24c15f0f3a537ef2606b Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 30 Aug 2011 23:37:29 -0700 Subject: [PATCH 0022/3378] Fix a prof-related bug in realloc(). Fix realloc() such that it only records the object passed in as freed if no OOM error occurs. --- src/jemalloc.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/jemalloc.c b/src/jemalloc.c index b13b1bf9..fd8bf52f 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1299,6 +1299,7 @@ JEMALLOC_P(realloc)(void *ptr, size_t size) old_ctx = prof_ctx_get(ptr); PROF_ALLOC_PREP(1, usize, cnt); if (cnt == NULL) { + old_ctx = NULL; ret = NULL; goto OOM; } @@ -1308,8 +1309,13 @@ JEMALLOC_P(realloc)(void *ptr, size_t size) false, false); if (ret != NULL) arena_prof_promoted(ret, usize); - } else + else + old_ctx = NULL; + } else { ret = iralloc(ptr, size, 0, 0, false, false); + if (ret == NULL) + old_ctx = NULL; + } } else #endif { @@ -1666,7 +1672,6 @@ JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, size_t extra, bool no_move = flags & ALLOCM_NO_MOVE; #ifdef JEMALLOC_PROF prof_thr_cnt_t *cnt; - prof_ctx_t *old_ctx; #endif assert(ptr != NULL); @@ -1687,8 +1692,8 @@ JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, size_t extra, */ size_t max_usize = (alignment == 0) ? s2u(size+extra) : sa2u(size+extra, alignment, NULL); + prof_ctx_t *old_ctx = prof_ctx_get(p); old_size = isalloc(p); - old_ctx = prof_ctx_get(p); PROF_ALLOC_PREP(1, max_usize, cnt); if (cnt == NULL) goto OOM; From a9076c9483a8efcb216b9f1303bf310f80ee3401 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 30 Aug 2011 23:40:11 -0700 Subject: [PATCH 0023/3378] Fix a prof-related race condition. Fix prof_lookup() to artificially raise curobjs for all paths through the code that creates a new entry in the per thread bt2cnt hash table. This fixes a race condition that could corrupt memory if prof_accum were false, and a non-default lg_prof_tcmax were used and/or threads were destroyed. --- src/prof.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/src/prof.c b/src/prof.c index dea6d11c..8a144b4e 100644 --- a/src/prof.c +++ b/src/prof.c @@ -474,11 +474,23 @@ prof_lookup(prof_bt_t *bt) /* * Artificially raise curobjs, in order to avoid a race * condition with prof_ctx_merge()/prof_ctx_destroy(). + * + * No locking is necessary for ctx here because no other + * threads have had the opportunity to fetch it from + * bt2ctx yet. */ ctx.p->cnt_merged.curobjs++; new_ctx = true; - } else + } else { + /* + * Artificially raise curobjs, in order to avoid a race + * condition with prof_ctx_merge()/prof_ctx_destroy(). + */ + malloc_mutex_lock(&ctx.p->lock); + ctx.p->cnt_merged.curobjs++; + malloc_mutex_unlock(&ctx.p->lock); new_ctx = false; + } prof_leave(); /* Link a prof_thd_cnt_t into ctx for this thread. */ @@ -491,8 +503,9 @@ prof_lookup(prof_bt_t *bt) */ ret.p = ql_last(&prof_tdata->lru_ql, lru_link); assert(ret.v != NULL); - ckh_remove(&prof_tdata->bt2cnt, ret.p->ctx->bt, NULL, - NULL); + if (ckh_remove(&prof_tdata->bt2cnt, ret.p->ctx->bt, + NULL, NULL)) + assert(false); ql_remove(&prof_tdata->lru_ql, ret.p, lru_link); prof_ctx_merge(ret.p->ctx, ret.p); /* ret can now be re-used. */ @@ -523,8 +536,7 @@ prof_lookup(prof_bt_t *bt) ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link); malloc_mutex_lock(&ctx.p->lock); ql_tail_insert(&ctx.p->cnts_ql, ret.p, cnts_link); - if (new_ctx) - ctx.p->cnt_merged.curobjs--; + ctx.p->cnt_merged.curobjs--; malloc_mutex_unlock(&ctx.p->lock); } else { /* Move ret to the front of the LRU. */ @@ -650,7 +662,8 @@ prof_ctx_destroy(prof_ctx_t *ctx) assert(ctx->cnt_merged.accumobjs == 0); assert(ctx->cnt_merged.accumbytes == 0); /* Remove ctx from bt2ctx. */ - ckh_remove(&bt2ctx, ctx->bt, NULL, NULL); + if (ckh_remove(&bt2ctx, ctx->bt, NULL, NULL)) + assert(false); prof_leave(); /* Destroy ctx. */ malloc_mutex_unlock(&ctx->lock); From c67e4fdc712aa5b818d69b7ef8e3963441febb16 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 31 Aug 2011 15:19:13 -0700 Subject: [PATCH 0024/3378] Update ChangeLog for 2.2.3. --- ChangeLog | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/ChangeLog b/ChangeLog index fd9ee545..66032b26 100644 --- a/ChangeLog +++ b/ChangeLog @@ -6,6 +6,20 @@ found in the git revision history: http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git git://canonware.com/jemalloc.git +* 2.2.3 (August 31, 2011) + + This version fixes numerous bugs related to heap profiling. + + Bug fixes: + - Fix a prof-related race condition. This bug could cause memory corruption, + but only occurred in non-default configurations (prof_accum:false). + - Fix off-by-one backtracing issues (make sure that prof_alloc_prep() is + excluded from backtraces). + - Fix a prof-related bug in realloc() (only triggered by OOM errors). + - Fix prof-related bugs in allocm() and rallocm(). + - Fix prof_tdata_cleanup() for --disable-tls builds. + - Fix a relative include path, to fix objdir builds. + * 2.2.2 (July 30, 2011) Bug fixes: From da9dde0854b2240882867f192a59ad391f4bf92b Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 1 Nov 2011 20:48:31 -0700 Subject: [PATCH 0025/3378] Clean up rb documentation. --- include/jemalloc/internal/rb.h | 92 +++++++++++++++++----------------- 1 file changed, 46 insertions(+), 46 deletions(-) diff --git a/include/jemalloc/internal/rb.h b/include/jemalloc/internal/rb.h index ee9b009d..7b675f09 100644 --- a/include/jemalloc/internal/rb.h +++ b/include/jemalloc/internal/rb.h @@ -223,88 +223,88 @@ a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \ * The following API is generated: * * static void - * ex_new(ex_t *extree); + * ex_new(ex_t *tree); * Description: Initialize a red-black tree structure. * Args: - * extree: Pointer to an uninitialized red-black tree object. + * tree: Pointer to an uninitialized red-black tree object. * * static ex_node_t * - * ex_first(ex_t *extree); + * ex_first(ex_t *tree); * static ex_node_t * - * ex_last(ex_t *extree); - * Description: Get the first/last node in extree. + * ex_last(ex_t *tree); + * Description: Get the first/last node in tree. * Args: - * extree: Pointer to an initialized red-black tree object. - * Ret: First/last node in extree, or NULL if extree is empty. + * tree: Pointer to an initialized red-black tree object. + * Ret: First/last node in tree, or NULL if tree is empty. * * static ex_node_t * - * ex_next(ex_t *extree, ex_node_t *node); + * ex_next(ex_t *tree, ex_node_t *node); * static ex_node_t * - * ex_prev(ex_t *extree, ex_node_t *node); + * ex_prev(ex_t *tree, ex_node_t *node); * Description: Get node's successor/predecessor. * Args: - * extree: Pointer to an initialized red-black tree object. - * node : A node in extree. - * Ret: node's successor/predecessor in extree, or NULL if node is + * tree: Pointer to an initialized red-black tree object. + * node: A node in tree. + * Ret: node's successor/predecessor in tree, or NULL if node is * last/first. * * static ex_node_t * - * ex_search(ex_t *extree, ex_node_t *key); + * ex_search(ex_t *tree, ex_node_t *key); * Description: Search for node that matches key. * Args: - * extree: Pointer to an initialized red-black tree object. - * key : Search key. - * Ret: Node in extree that matches key, or NULL if no match. + * tree: Pointer to an initialized red-black tree object. + * key : Search key. + * Ret: Node in tree that matches key, or NULL if no match. * * static ex_node_t * - * ex_nsearch(ex_t *extree, ex_node_t *key); + * ex_nsearch(ex_t *tree, ex_node_t *key); * static ex_node_t * - * ex_psearch(ex_t *extree, ex_node_t *key); + * ex_psearch(ex_t *tree, ex_node_t *key); * Description: Search for node that matches key. If no match is found, * return what would be key's successor/predecessor, were - * key in extree. + * key in tree. * Args: - * extree: Pointer to an initialized red-black tree object. - * key : Search key. - * Ret: Node in extree that matches key, or if no match, hypothetical - * node's successor/predecessor (NULL if no successor/predecessor). + * tree: Pointer to an initialized red-black tree object. + * key : Search key. + * Ret: Node in tree that matches key, or if no match, hypothetical node's + * successor/predecessor (NULL if no successor/predecessor). * * static void - * ex_insert(ex_t *extree, ex_node_t *node); - * Description: Insert node into extree. + * ex_insert(ex_t *tree, ex_node_t *node); + * Description: Insert node into tree. * Args: - * extree: Pointer to an initialized red-black tree object. - * node : Node to be inserted into extree. + * tree: Pointer to an initialized red-black tree object. + * node: Node to be inserted into tree. * * static void - * ex_remove(ex_t *extree, ex_node_t *node); - * Description: Remove node from extree. + * ex_remove(ex_t *tree, ex_node_t *node); + * Description: Remove node from tree. * Args: - * extree: Pointer to an initialized red-black tree object. - * node : Node in extree to be removed. + * tree: Pointer to an initialized red-black tree object. + * node: Node in tree to be removed. * * static ex_node_t * - * ex_iter(ex_t *extree, ex_node_t *start, ex_node_t *(*cb)(ex_t *, + * ex_iter(ex_t *tree, ex_node_t *start, ex_node_t *(*cb)(ex_t *, * ex_node_t *, void *), void *arg); * static ex_node_t * - * ex_reverse_iter(ex_t *extree, ex_node_t *start, ex_node *(*cb)(ex_t *, + * ex_reverse_iter(ex_t *tree, ex_node_t *start, ex_node *(*cb)(ex_t *, * ex_node_t *, void *), void *arg); - * Description: Iterate forward/backward over extree, starting at node. - * If extree is modified, iteration must be immediately + * Description: Iterate forward/backward over tree, starting at node. If + * tree is modified, iteration must be immediately * terminated by the callback function that causes the * modification. * Args: - * extree: Pointer to an initialized red-black tree object. - * start : Node at which to start iteration, or NULL to start at - * first/last node. - * cb : Callback function, which is called for each node during - * iteration. Under normal circumstances the callback function - * should return NULL, which causes iteration to continue. If a - * callback function returns non-NULL, iteration is immediately - * terminated and the non-NULL return value is returned by the - * iterator. This is useful for re-starting iteration after - * modifying extree. - * arg : Opaque pointer passed to cb(). + * tree : Pointer to an initialized red-black tree object. + * start: Node at which to start iteration, or NULL to start at + * first/last node. + * cb : Callback function, which is called for each node during + * iteration. Under normal circumstances the callback function + * should return NULL, which causes iteration to continue. If a + * callback function returns non-NULL, iteration is immediately + * terminated and the non-NULL return value is returned by the + * iterator. This is useful for re-starting iteration after + * modifying tree. + * arg : Opaque pointer passed to cb(). * Ret: NULL if iteration completed, or the non-NULL callback return value * that caused termination of the iteration. */ From 2bd3cbc5c68bb9b097c382108ae1aed793e08062 Mon Sep 17 00:00:00 2001 From: Antony Dovgal Date: Thu, 13 Oct 2011 09:33:33 +0400 Subject: [PATCH 0026/3378] add autogenerated jemalloc.sh wrapper script --- .gitignore | 1 + Makefile.in | 2 +- bin/jemalloc.sh.in | 9 +++++++++ configure.ac | 20 +++++++++++++++++++- 4 files changed, 30 insertions(+), 2 deletions(-) create mode 100644 bin/jemalloc.sh.in diff --git a/.gitignore b/.gitignore index 32b4c424..1a9bb068 100644 --- a/.gitignore +++ b/.gitignore @@ -21,3 +21,4 @@ !test/*.c !test/*.exp /VERSION +/bin/jemalloc.sh diff --git a/Makefile.in b/Makefile.in index de7492f9..6f66e4d2 100644 --- a/Makefile.in +++ b/Makefile.in @@ -42,7 +42,7 @@ TEST_LIBRARY_PATH := endif # Lists of files. -BINS := @srcroot@bin/pprof +BINS := @srcroot@bin/pprof @objroot@bin/jemalloc.sh CHDRS := @objroot@include/jemalloc/jemalloc@install_suffix@.h \ @objroot@include/jemalloc/jemalloc_defs@install_suffix@.h CSRCS := @srcroot@src/jemalloc.c @srcroot@src/arena.c @srcroot@src/atomic.c \ diff --git a/bin/jemalloc.sh.in b/bin/jemalloc.sh.in new file mode 100644 index 00000000..4d13cc6c --- /dev/null +++ b/bin/jemalloc.sh.in @@ -0,0 +1,9 @@ +#!/bin/sh + +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ + +@LD_PRELOAD_VAR@=${libdir}/libjemalloc.@SHLIB_SUFFIX_NAME@.1 +export @LD_PRELOAD_VAR@ +exec "$@" diff --git a/configure.ac b/configure.ac index b58aa520..688e0c83 100644 --- a/configure.ac +++ b/configure.ac @@ -167,6 +167,9 @@ case "${host_cpu}" in esac AC_DEFINE_UNQUOTED([CPU_SPINWAIT], [$CPU_SPINWAIT]) +LD_PRELOAD_VAR="LD_PRELOAD" +SHLIB_SUFFIX_NAME="so" + dnl Platform-specific settings. abi and RPATH can probably be determined dnl programmatically, but doing so is error-prone, which makes it generally dnl not worth the trouble. @@ -180,6 +183,8 @@ case "${host}" in abi="macho" AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE]) RPATH="" + LD_PRELOAD_VAR="DYLD_INSERT_LIBRARIES" + SHLIB_SUFFIX_NAME="dylib" ;; *-*-freebsd*) CFLAGS="$CFLAGS" @@ -217,6 +222,17 @@ case "${host}" in CPPFLAGS="$CPPFLAGS -D_POSIX_PTHREAD_SEMANTICS" LIBS="$LIBS -lposix4 -lsocket -lnsl" ;; + *-ibm-aix*) + if "$LG_SIZEOF_PTR" = "8"; then + dnl 64bit AIX + LD_PRELOAD_VAR="LDR_PRELOAD64" + else + dnl 32bit AIX + LD_PRELOAD_VAR="LDR_PRELOAD" + fi + abi="xcoff" + RPATH="-Wl,-rpath," + ;; *) AC_MSG_RESULT([Unsupported operating system: ${host}]) abi="elf" @@ -225,6 +241,8 @@ case "${host}" in esac AC_SUBST([abi]) AC_SUBST([RPATH]) +AC_SUBST([LD_PRELOAD_VAR]) +AC_SUBST([SHLIB_SUFFIX_NAME]) JE_COMPILABLE([__attribute__ syntax], [static __attribute__((unused)) void foo(void){}], @@ -881,7 +899,7 @@ AC_CONFIG_HEADERS([$cfghdrs_tup]) dnl ============================================================================ dnl Generate outputs. -AC_CONFIG_FILES([$cfgoutputs_tup config.stamp]) +AC_CONFIG_FILES([$cfgoutputs_tup config.stamp bin/jemalloc.sh]) AC_SUBST([cfgoutputs_in]) AC_SUBST([cfgoutputs_out]) AC_OUTPUT From f576c63f1eb29ce32e930501f65c541ff344e912 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 1 Nov 2011 22:27:41 -0700 Subject: [PATCH 0027/3378] Refactor SO and REV make variables. Refactor the SO and REV such that they are set via autoconf variables, @so@ and @rev@. These variables are both needed by the jemalloc.sh script, so this unifies their definitions. --- Makefile.in | 5 ++--- bin/jemalloc.sh.in | 2 +- configure.ac | 13 +++++++++---- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/Makefile.in b/Makefile.in index 6f66e4d2..82983892 100644 --- a/Makefile.in +++ b/Makefile.in @@ -27,14 +27,13 @@ endif LDFLAGS := @LDFLAGS@ LIBS := @LIBS@ RPATH_EXTRA := @RPATH_EXTRA@ +SO := @so@ ifeq (macho, @abi@) -SO := dylib WL_SONAME := dylib_install_name else -SO := so WL_SONAME := soname endif -REV := 1 +REV := @rev@ ifeq (macho, @abi@) TEST_LIBRARY_PATH := DYLD_FALLBACK_LIBRARY_PATH=@objroot@lib else diff --git a/bin/jemalloc.sh.in b/bin/jemalloc.sh.in index 4d13cc6c..56cdfaf4 100644 --- a/bin/jemalloc.sh.in +++ b/bin/jemalloc.sh.in @@ -4,6 +4,6 @@ prefix=@prefix@ exec_prefix=@exec_prefix@ libdir=@libdir@ -@LD_PRELOAD_VAR@=${libdir}/libjemalloc.@SHLIB_SUFFIX_NAME@.1 +@LD_PRELOAD_VAR@=${libdir}/libjemalloc.@so@.@rev@ export @LD_PRELOAD_VAR@ exec "$@" diff --git a/configure.ac b/configure.ac index 688e0c83..699f931c 100644 --- a/configure.ac +++ b/configure.ac @@ -40,6 +40,10 @@ AC_RUN_IFELSE([AC_LANG_PROGRAM( dnl ============================================================================ +dnl Library revision. +rev=1 +AC_SUBST([rev]) + srcroot=$srcdir if test "x${srcroot}" = "x." ; then srcroot="" @@ -168,7 +172,7 @@ esac AC_DEFINE_UNQUOTED([CPU_SPINWAIT], [$CPU_SPINWAIT]) LD_PRELOAD_VAR="LD_PRELOAD" -SHLIB_SUFFIX_NAME="so" +so="so" dnl Platform-specific settings. abi and RPATH can probably be determined dnl programmatically, but doing so is error-prone, which makes it generally @@ -184,7 +188,7 @@ case "${host}" in AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE]) RPATH="" LD_PRELOAD_VAR="DYLD_INSERT_LIBRARIES" - SHLIB_SUFFIX_NAME="dylib" + so="dylib" ;; *-*-freebsd*) CFLAGS="$CFLAGS" @@ -242,7 +246,7 @@ esac AC_SUBST([abi]) AC_SUBST([RPATH]) AC_SUBST([LD_PRELOAD_VAR]) -AC_SUBST([SHLIB_SUFFIX_NAME]) +AC_SUBST([so]) JE_COMPILABLE([__attribute__ syntax], [static __attribute__((unused)) void foo(void){}], @@ -907,7 +911,8 @@ AC_OUTPUT dnl ============================================================================ dnl Print out the results of configuration. AC_MSG_RESULT([===============================================================================]) -AC_MSG_RESULT([jemalloc version : $jemalloc_version]) +AC_MSG_RESULT([jemalloc version : ${jemalloc_version}]) +AC_MSG_RESULT([library revision : ${rev}]) AC_MSG_RESULT([]) AC_MSG_RESULT([CC : ${CC}]) AC_MSG_RESULT([CPPFLAGS : ${CPPFLAGS}]) From 8e6f8b490dbd4b9ae715267fd401f09a056f92c4 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 3 Nov 2011 18:40:03 -0700 Subject: [PATCH 0028/3378] Initialize arenas_tsd before setting it. Reported by: Ethan Burns, Rich Prohaska, Tudor Bosman --- src/jemalloc.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/jemalloc.c b/src/jemalloc.c index fd8bf52f..fd6b890a 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -769,6 +769,14 @@ malloc_init_hard(void) } #endif + if (malloc_mutex_init(&arenas_lock)) + return (true); + + if (pthread_key_create(&arenas_tsd, arenas_cleanup) != 0) { + malloc_mutex_unlock(&init_lock); + return (true); + } + /* * Create enough scaffolding to allow recursive allocation in * malloc_ncpus(). @@ -795,14 +803,6 @@ malloc_init_hard(void) ARENA_SET(arenas[0]); arenas[0]->nthreads++; - if (malloc_mutex_init(&arenas_lock)) - return (true); - - if (pthread_key_create(&arenas_tsd, arenas_cleanup) != 0) { - malloc_mutex_unlock(&init_lock); - return (true); - } - #ifdef JEMALLOC_PROF if (prof_boot2()) { malloc_mutex_unlock(&init_lock); From 30fbef8aeaf65fcd6b265fb9f551e7c2ec8cb22f Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 5 Nov 2011 21:06:55 -0700 Subject: [PATCH 0029/3378] Fix rallocm() test to support >4KiB pages. --- src/jemalloc.c | 2 +- test/rallocm.c | 18 ++++++++++++++---- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/src/jemalloc.c b/src/jemalloc.c index fd6b890a..a161c2e2 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -689,7 +689,7 @@ malloc_init_hard(void) result = sysconf(_SC_PAGESIZE); assert(result != -1); - pagesize = (unsigned)result; + pagesize = (size_t)result; /* * We assume that pagesize is a power of 2 when calculating diff --git a/test/rallocm.c b/test/rallocm.c index a8cadebc..ccf326bb 100644 --- a/test/rallocm.c +++ b/test/rallocm.c @@ -1,6 +1,8 @@ #include #include +#include #include +#include #define JEMALLOC_MANGLE #include "jemalloc_test.h" @@ -8,12 +10,20 @@ int main(void) { + size_t pagesize; void *p, *q; size_t sz, tsz; int r; fprintf(stderr, "Test begin\n"); + /* Get page size. */ + { + long result = sysconf(_SC_PAGESIZE); + assert(result != -1); + pagesize = (size_t)result; + } + r = JEMALLOC_P(allocm)(&p, &sz, 42, 0); if (r != ALLOCM_SUCCESS) { fprintf(stderr, "Unexpected allocm() error\n"); @@ -66,7 +76,7 @@ main(void) p = q; sz = tsz; - r = JEMALLOC_P(rallocm)(&q, &tsz, 8192, 0, 0); + r = JEMALLOC_P(rallocm)(&q, &tsz, pagesize*2, 0, 0); if (r != ALLOCM_SUCCESS) fprintf(stderr, "Unexpected rallocm() error\n"); if (q == p) @@ -78,7 +88,7 @@ main(void) p = q; sz = tsz; - r = JEMALLOC_P(rallocm)(&q, &tsz, 16384, 0, 0); + r = JEMALLOC_P(rallocm)(&q, &tsz, pagesize*4, 0, 0); if (r != ALLOCM_SUCCESS) fprintf(stderr, "Unexpected rallocm() error\n"); if (tsz == sz) { @@ -88,7 +98,7 @@ main(void) p = q; sz = tsz; - r = JEMALLOC_P(rallocm)(&q, &tsz, 8192, 0, ALLOCM_NO_MOVE); + r = JEMALLOC_P(rallocm)(&q, &tsz, pagesize*2, 0, ALLOCM_NO_MOVE); if (r != ALLOCM_SUCCESS) fprintf(stderr, "Unexpected rallocm() error\n"); if (q != p) @@ -99,7 +109,7 @@ main(void) } sz = tsz; - r = JEMALLOC_P(rallocm)(&q, &tsz, 16384, 0, ALLOCM_NO_MOVE); + r = JEMALLOC_P(rallocm)(&q, &tsz, pagesize*4, 0, ALLOCM_NO_MOVE); if (r != ALLOCM_SUCCESS) fprintf(stderr, "Unexpected rallocm() error\n"); if (q != p) From ca9ee1a409c32e052ab04ca727bbc257a43795fc Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 5 Nov 2011 21:46:23 -0700 Subject: [PATCH 0030/3378] Update ChangeLog for 2.2.4. --- ChangeLog | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ChangeLog b/ChangeLog index 66032b26..61979683 100644 --- a/ChangeLog +++ b/ChangeLog @@ -6,6 +6,13 @@ found in the git revision history: http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git git://canonware.com/jemalloc.git +* 2.2.4 (November 5, 2011) + + Bug fixes: + - Initialize arenas_tsd before using it. This bug existed for 2.2.[0-3], as + well as for --disable-tls builds in earlier releases. + - Do not assume a 4 KiB page size in test/rallocm.c. + * 2.2.3 (August 31, 2011) This version fixes numerous bugs related to heap profiling. From c87f10a325378dddf73a267a64fedeff7f3bcc95 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 3 Nov 2011 18:40:03 -0700 Subject: [PATCH 0031/3378] Initialize arenas_tsd before setting it. Reported by: Ethan Burns, Rich Prohaska, Tudor Bosman --- src/jemalloc.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/jemalloc.c b/src/jemalloc.c index fd8bf52f..fd6b890a 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -769,6 +769,14 @@ malloc_init_hard(void) } #endif + if (malloc_mutex_init(&arenas_lock)) + return (true); + + if (pthread_key_create(&arenas_tsd, arenas_cleanup) != 0) { + malloc_mutex_unlock(&init_lock); + return (true); + } + /* * Create enough scaffolding to allow recursive allocation in * malloc_ncpus(). @@ -795,14 +803,6 @@ malloc_init_hard(void) ARENA_SET(arenas[0]); arenas[0]->nthreads++; - if (malloc_mutex_init(&arenas_lock)) - return (true); - - if (pthread_key_create(&arenas_tsd, arenas_cleanup) != 0) { - malloc_mutex_unlock(&init_lock); - return (true); - } - #ifdef JEMALLOC_PROF if (prof_boot2()) { malloc_mutex_unlock(&init_lock); From dd2cb6484b92612f4c8602cd2bbb9895fd688439 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 5 Nov 2011 21:06:55 -0700 Subject: [PATCH 0032/3378] Fix rallocm() test to support >4KiB pages. --- src/jemalloc.c | 2 +- test/rallocm.c | 18 ++++++++++++++---- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/src/jemalloc.c b/src/jemalloc.c index fd6b890a..a161c2e2 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -689,7 +689,7 @@ malloc_init_hard(void) result = sysconf(_SC_PAGESIZE); assert(result != -1); - pagesize = (unsigned)result; + pagesize = (size_t)result; /* * We assume that pagesize is a power of 2 when calculating diff --git a/test/rallocm.c b/test/rallocm.c index a8cadebc..ccf326bb 100644 --- a/test/rallocm.c +++ b/test/rallocm.c @@ -1,6 +1,8 @@ #include #include +#include #include +#include #define JEMALLOC_MANGLE #include "jemalloc_test.h" @@ -8,12 +10,20 @@ int main(void) { + size_t pagesize; void *p, *q; size_t sz, tsz; int r; fprintf(stderr, "Test begin\n"); + /* Get page size. */ + { + long result = sysconf(_SC_PAGESIZE); + assert(result != -1); + pagesize = (size_t)result; + } + r = JEMALLOC_P(allocm)(&p, &sz, 42, 0); if (r != ALLOCM_SUCCESS) { fprintf(stderr, "Unexpected allocm() error\n"); @@ -66,7 +76,7 @@ main(void) p = q; sz = tsz; - r = JEMALLOC_P(rallocm)(&q, &tsz, 8192, 0, 0); + r = JEMALLOC_P(rallocm)(&q, &tsz, pagesize*2, 0, 0); if (r != ALLOCM_SUCCESS) fprintf(stderr, "Unexpected rallocm() error\n"); if (q == p) @@ -78,7 +88,7 @@ main(void) p = q; sz = tsz; - r = JEMALLOC_P(rallocm)(&q, &tsz, 16384, 0, 0); + r = JEMALLOC_P(rallocm)(&q, &tsz, pagesize*4, 0, 0); if (r != ALLOCM_SUCCESS) fprintf(stderr, "Unexpected rallocm() error\n"); if (tsz == sz) { @@ -88,7 +98,7 @@ main(void) p = q; sz = tsz; - r = JEMALLOC_P(rallocm)(&q, &tsz, 8192, 0, ALLOCM_NO_MOVE); + r = JEMALLOC_P(rallocm)(&q, &tsz, pagesize*2, 0, ALLOCM_NO_MOVE); if (r != ALLOCM_SUCCESS) fprintf(stderr, "Unexpected rallocm() error\n"); if (q != p) @@ -99,7 +109,7 @@ main(void) } sz = tsz; - r = JEMALLOC_P(rallocm)(&q, &tsz, 16384, 0, ALLOCM_NO_MOVE); + r = JEMALLOC_P(rallocm)(&q, &tsz, pagesize*4, 0, ALLOCM_NO_MOVE); if (r != ALLOCM_SUCCESS) fprintf(stderr, "Unexpected rallocm() error\n"); if (q != p) From 45e040a82c121fb74337b61b3d8597b028b2dd32 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 5 Nov 2011 21:46:23 -0700 Subject: [PATCH 0033/3378] Update ChangeLog for 2.2.4. --- ChangeLog | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ChangeLog b/ChangeLog index 66032b26..61979683 100644 --- a/ChangeLog +++ b/ChangeLog @@ -6,6 +6,13 @@ found in the git revision history: http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git git://canonware.com/jemalloc.git +* 2.2.4 (November 5, 2011) + + Bug fixes: + - Initialize arenas_tsd before using it. This bug existed for 2.2.[0-3], as + well as for --disable-tls builds in earlier releases. + - Do not assume a 4 KiB page size in test/rallocm.c. + * 2.2.3 (August 31, 2011) This version fixes numerous bugs related to heap profiling. From fa351d9fdcbbbfe7455279311fdf3d65751a4e75 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 9 Nov 2011 11:55:19 -0800 Subject: [PATCH 0034/3378] Fix huge_ralloc() race when using mremap(2). Fix huge_ralloc() to remove the old memory region from tree of huge allocations *before* calling mremap(2), in order to make sure that no other thread acquires the old memory region via mmap() and encounters stale metadata in the tree. Reported by: Rich Prohaska --- src/huge.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/huge.c b/src/huge.c index ac3f3a0d..5ee9f549 100644 --- a/src/huge.c +++ b/src/huge.c @@ -234,6 +234,13 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, ) { size_t newsize = huge_salloc(ret); + /* + * Remove ptr from the tree of huge allocations before + * performing the remap operation, in order to avoid the + * possibility of another thread acquiring that mapping before + * this one removes it from the tree. + */ + huge_dalloc(ptr, false); if (mremap(ptr, oldsize, newsize, MREMAP_MAYMOVE|MREMAP_FIXED, ret) == MAP_FAILED) { /* @@ -253,9 +260,8 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, if (opt_abort) abort(); memcpy(ret, ptr, copysize); - idalloc(ptr); - } else - huge_dalloc(ptr, false); + chunk_dealloc(ptr, oldsize); + } } else #endif { From 12a488782681cbd740a5f54e0b7e74ea84858e21 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 11 Nov 2011 14:41:59 -0800 Subject: [PATCH 0035/3378] Fix huge_ralloc to maintain chunk statistics. Fix huge_ralloc() to properly maintain chunk statistics when using mremap(2). --- include/jemalloc/internal/chunk.h | 2 +- src/arena.c | 2 +- src/chunk.c | 16 +++++++++------- src/huge.c | 11 ++++++----- 4 files changed, 17 insertions(+), 14 deletions(-) diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h index a60f0ad7..54b6a3ec 100644 --- a/include/jemalloc/internal/chunk.h +++ b/include/jemalloc/internal/chunk.h @@ -50,7 +50,7 @@ extern size_t map_bias; /* Number of arena chunk header pages. */ extern size_t arena_maxclass; /* Max size class for arenas. */ void *chunk_alloc(size_t size, bool base, bool *zero); -void chunk_dealloc(void *chunk, size_t size); +void chunk_dealloc(void *chunk, size_t size, bool unmap); bool chunk_boot(void); #endif /* JEMALLOC_H_EXTERNS */ diff --git a/src/arena.c b/src/arena.c index e749c1d5..d166ca1e 100644 --- a/src/arena.c +++ b/src/arena.c @@ -569,7 +569,7 @@ arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk) arena->ndirty -= spare->ndirty; } malloc_mutex_unlock(&arena->lock); - chunk_dealloc((void *)spare, chunksize); + chunk_dealloc((void *)spare, chunksize, true); malloc_mutex_lock(&arena->lock); #ifdef JEMALLOC_STATS arena->stats.mapped -= chunksize; diff --git a/src/chunk.c b/src/chunk.c index 301519e8..d190c6f4 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -70,7 +70,7 @@ RETURN: #ifdef JEMALLOC_IVSALLOC if (base == false && ret != NULL) { if (rtree_set(chunks_rtree, (uintptr_t)ret, ret)) { - chunk_dealloc(ret, size); + chunk_dealloc(ret, size, true); return (NULL); } } @@ -108,7 +108,7 @@ RETURN: } void -chunk_dealloc(void *chunk, size_t size) +chunk_dealloc(void *chunk, size_t size, bool unmap) { assert(chunk != NULL); @@ -125,15 +125,17 @@ chunk_dealloc(void *chunk, size_t size) malloc_mutex_unlock(&chunks_mtx); #endif + if (unmap) { #ifdef JEMALLOC_SWAP - if (swap_enabled && chunk_dealloc_swap(chunk, size) == false) - return; + if (swap_enabled && chunk_dealloc_swap(chunk, size) == false) + return; #endif #ifdef JEMALLOC_DSS - if (chunk_dealloc_dss(chunk, size) == false) - return; + if (chunk_dealloc_dss(chunk, size) == false) + return; #endif - chunk_dealloc_mmap(chunk, size); + chunk_dealloc_mmap(chunk, size); + } } bool diff --git a/src/huge.c b/src/huge.c index 5ee9f549..a4f9b054 100644 --- a/src/huge.c +++ b/src/huge.c @@ -110,12 +110,12 @@ huge_palloc(size_t size, size_t alignment, bool zero) if (offset == 0) { /* Trim trailing space. */ chunk_dealloc((void *)((uintptr_t)ret + chunk_size), alloc_size - - chunk_size); + - chunk_size, true); } else { size_t trailsize; /* Trim leading space. */ - chunk_dealloc(ret, alignment - offset); + chunk_dealloc(ret, alignment - offset, true); ret = (void *)((uintptr_t)ret + (alignment - offset)); @@ -124,7 +124,7 @@ huge_palloc(size_t size, size_t alignment, bool zero) /* Trim trailing space. */ assert(trailsize < alloc_size); chunk_dealloc((void *)((uintptr_t)ret + chunk_size), - trailsize); + trailsize, true); } } @@ -260,7 +260,7 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, if (opt_abort) abort(); memcpy(ret, ptr, copysize); - chunk_dealloc(ptr, oldsize); + chunk_dealloc_mmap(ptr, oldsize); } } else #endif @@ -301,9 +301,10 @@ huge_dalloc(void *ptr, bool unmap) memset(node->addr, 0x5a, node->size); #endif #endif - chunk_dealloc(node->addr, node->size); } + chunk_dealloc(node->addr, node->size, unmap); + base_node_dealloc(node); } From 334cc021422869329f08349e088e7f491318e087 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 11 Nov 2011 14:46:04 -0800 Subject: [PATCH 0036/3378] Fix malloc_stats_print(..., "a") output. Fix the logic in stats_print() such that if the "a" flag is passed in without the "m" flag, merged statistics will be printed even if only one arena is initialized. --- src/stats.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/stats.c b/src/stats.c index cbbbb5ba..dc172e42 100644 --- a/src/stats.c +++ b/src/stats.c @@ -748,7 +748,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, ninitialized++; } - if (ninitialized > 1) { + if (ninitialized > 1 || unmerged == false) { /* Print merged arena stats. */ malloc_cprintf(write_cb, cbopaque, "\nMerged arenas stats:\n"); From b3bd885090230cc28add77c399b4ed440b760ca3 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 14 Nov 2011 17:12:45 -0800 Subject: [PATCH 0037/3378] Update ChangeLog for 2.2.5. --- ChangeLog | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/ChangeLog b/ChangeLog index 61979683..326ee7a9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -6,6 +6,14 @@ found in the git revision history: http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git git://canonware.com/jemalloc.git +* 2.2.5 (November 14, 2011) + + Bug fixes: + - Fix huge_ralloc() race when using mremap(2). This is a serious bug that + could cause memory corruption and/or crashes. + - Fix huge_ralloc() to maintain chunk statistics. + - Fix malloc_stats_print(..., "a") output. + * 2.2.4 (November 5, 2011) Bug fixes: From 03bf7a7a26f0ee9549e27c8d2f942901aeb20cf3 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 9 Nov 2011 11:55:19 -0800 Subject: [PATCH 0038/3378] Fix huge_ralloc() race when using mremap(2). Fix huge_ralloc() to remove the old memory region from tree of huge allocations *before* calling mremap(2), in order to make sure that no other thread acquires the old memory region via mmap() and encounters stale metadata in the tree. Reported by: Rich Prohaska --- src/huge.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/huge.c b/src/huge.c index ac3f3a0d..5ee9f549 100644 --- a/src/huge.c +++ b/src/huge.c @@ -234,6 +234,13 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, ) { size_t newsize = huge_salloc(ret); + /* + * Remove ptr from the tree of huge allocations before + * performing the remap operation, in order to avoid the + * possibility of another thread acquiring that mapping before + * this one removes it from the tree. + */ + huge_dalloc(ptr, false); if (mremap(ptr, oldsize, newsize, MREMAP_MAYMOVE|MREMAP_FIXED, ret) == MAP_FAILED) { /* @@ -253,9 +260,8 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, if (opt_abort) abort(); memcpy(ret, ptr, copysize); - idalloc(ptr); - } else - huge_dalloc(ptr, false); + chunk_dealloc(ptr, oldsize); + } } else #endif { From 115704dcdb76d827950ec584cec09bad3417a1a9 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 11 Nov 2011 14:41:59 -0800 Subject: [PATCH 0039/3378] Fix huge_ralloc to maintain chunk statistics. Fix huge_ralloc() to properly maintain chunk statistics when using mremap(2). --- include/jemalloc/internal/chunk.h | 2 +- src/arena.c | 2 +- src/chunk.c | 16 +++++++++------- src/huge.c | 11 ++++++----- 4 files changed, 17 insertions(+), 14 deletions(-) diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h index a60f0ad7..54b6a3ec 100644 --- a/include/jemalloc/internal/chunk.h +++ b/include/jemalloc/internal/chunk.h @@ -50,7 +50,7 @@ extern size_t map_bias; /* Number of arena chunk header pages. */ extern size_t arena_maxclass; /* Max size class for arenas. */ void *chunk_alloc(size_t size, bool base, bool *zero); -void chunk_dealloc(void *chunk, size_t size); +void chunk_dealloc(void *chunk, size_t size, bool unmap); bool chunk_boot(void); #endif /* JEMALLOC_H_EXTERNS */ diff --git a/src/arena.c b/src/arena.c index e749c1d5..d166ca1e 100644 --- a/src/arena.c +++ b/src/arena.c @@ -569,7 +569,7 @@ arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk) arena->ndirty -= spare->ndirty; } malloc_mutex_unlock(&arena->lock); - chunk_dealloc((void *)spare, chunksize); + chunk_dealloc((void *)spare, chunksize, true); malloc_mutex_lock(&arena->lock); #ifdef JEMALLOC_STATS arena->stats.mapped -= chunksize; diff --git a/src/chunk.c b/src/chunk.c index 301519e8..d190c6f4 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -70,7 +70,7 @@ RETURN: #ifdef JEMALLOC_IVSALLOC if (base == false && ret != NULL) { if (rtree_set(chunks_rtree, (uintptr_t)ret, ret)) { - chunk_dealloc(ret, size); + chunk_dealloc(ret, size, true); return (NULL); } } @@ -108,7 +108,7 @@ RETURN: } void -chunk_dealloc(void *chunk, size_t size) +chunk_dealloc(void *chunk, size_t size, bool unmap) { assert(chunk != NULL); @@ -125,15 +125,17 @@ chunk_dealloc(void *chunk, size_t size) malloc_mutex_unlock(&chunks_mtx); #endif + if (unmap) { #ifdef JEMALLOC_SWAP - if (swap_enabled && chunk_dealloc_swap(chunk, size) == false) - return; + if (swap_enabled && chunk_dealloc_swap(chunk, size) == false) + return; #endif #ifdef JEMALLOC_DSS - if (chunk_dealloc_dss(chunk, size) == false) - return; + if (chunk_dealloc_dss(chunk, size) == false) + return; #endif - chunk_dealloc_mmap(chunk, size); + chunk_dealloc_mmap(chunk, size); + } } bool diff --git a/src/huge.c b/src/huge.c index 5ee9f549..a4f9b054 100644 --- a/src/huge.c +++ b/src/huge.c @@ -110,12 +110,12 @@ huge_palloc(size_t size, size_t alignment, bool zero) if (offset == 0) { /* Trim trailing space. */ chunk_dealloc((void *)((uintptr_t)ret + chunk_size), alloc_size - - chunk_size); + - chunk_size, true); } else { size_t trailsize; /* Trim leading space. */ - chunk_dealloc(ret, alignment - offset); + chunk_dealloc(ret, alignment - offset, true); ret = (void *)((uintptr_t)ret + (alignment - offset)); @@ -124,7 +124,7 @@ huge_palloc(size_t size, size_t alignment, bool zero) /* Trim trailing space. */ assert(trailsize < alloc_size); chunk_dealloc((void *)((uintptr_t)ret + chunk_size), - trailsize); + trailsize, true); } } @@ -260,7 +260,7 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, if (opt_abort) abort(); memcpy(ret, ptr, copysize); - chunk_dealloc(ptr, oldsize); + chunk_dealloc_mmap(ptr, oldsize); } } else #endif @@ -301,9 +301,10 @@ huge_dalloc(void *ptr, bool unmap) memset(node->addr, 0x5a, node->size); #endif #endif - chunk_dealloc(node->addr, node->size); } + chunk_dealloc(node->addr, node->size, unmap); + base_node_dealloc(node); } From f1cc61b93a732b4aec17beb93ac392ef961d801c Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 11 Nov 2011 14:46:04 -0800 Subject: [PATCH 0040/3378] Fix malloc_stats_print(..., "a") output. Fix the logic in stats_print() such that if the "a" flag is passed in without the "m" flag, merged statistics will be printed even if only one arena is initialized. --- src/stats.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/stats.c b/src/stats.c index cbbbb5ba..dc172e42 100644 --- a/src/stats.c +++ b/src/stats.c @@ -748,7 +748,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, ninitialized++; } - if (ninitialized > 1) { + if (ninitialized > 1 || unmerged == false) { /* Print merged arena stats. */ malloc_cprintf(write_cb, cbopaque, "\nMerged arenas stats:\n"); From 196c7b7e6d567dc5a4e9a70001e52c8b970ff318 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 14 Nov 2011 17:12:45 -0800 Subject: [PATCH 0041/3378] Update ChangeLog for 2.2.5. --- ChangeLog | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/ChangeLog b/ChangeLog index 61979683..326ee7a9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -6,6 +6,14 @@ found in the git revision history: http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git git://canonware.com/jemalloc.git +* 2.2.5 (November 14, 2011) + + Bug fixes: + - Fix huge_ralloc() race when using mremap(2). This is a serious bug that + could cause memory corruption and/or crashes. + - Fix huge_ralloc() to maintain chunk statistics. + - Fix malloc_stats_print(..., "a") output. + * 2.2.4 (November 5, 2011) Bug fixes: From 7372b15a31c63ac5cb9ed8aeabc2a0a3c005e8bf Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 10 Feb 2012 20:22:09 -0800 Subject: [PATCH 0042/3378] Reduce cpp conditional logic complexity. Convert configuration-related cpp conditional logic to use static constant variables, e.g.: #ifdef JEMALLOC_DEBUG [...] #endif becomes: if (config_debug) { [...] } The advantage is clearer, more concise code. The main disadvantage is that data structures no longer have conditionally defined fields, so they pay the cost of all fields regardless of whether they are used. In practice, this is only a minor concern; config_stats will go away in an upcoming change, and config_prof is the only other major feature that depends on more than a few special-purpose fields. --- configure.ac | 4 +- include/jemalloc/internal/arena.h | 108 +-- include/jemalloc/internal/chunk.h | 6 - include/jemalloc/internal/chunk_dss.h | 2 - include/jemalloc/internal/chunk_swap.h | 4 - include/jemalloc/internal/ckh.h | 2 - include/jemalloc/internal/ctl.h | 6 - include/jemalloc/internal/extent.h | 6 - include/jemalloc/internal/huge.h | 4 - .../jemalloc/internal/jemalloc_internal.h.in | 169 +++- include/jemalloc/internal/mutex.h | 12 +- include/jemalloc/internal/prof.h | 15 +- include/jemalloc/internal/stats.h | 21 - include/jemalloc/internal/tcache.h | 114 +-- include/jemalloc/jemalloc_defs.h.in | 6 +- src/arena.c | 734 +++++++----------- src/chunk.c | 104 +-- src/chunk_dss.c | 14 +- src/chunk_swap.c | 43 +- src/ckh.c | 17 +- src/ctl.c | 620 ++++++--------- src/extent.c | 2 - src/huge.c | 80 +- src/jemalloc.c | 608 +++++---------- src/prof.c | 75 +- src/stats.c | 13 +- src/tcache.c | 130 ++-- 27 files changed, 1194 insertions(+), 1725 deletions(-) diff --git a/configure.ac b/configure.ac index 699f931c..9617a5e3 100644 --- a/configure.ac +++ b/configure.ac @@ -174,6 +174,9 @@ AC_DEFINE_UNQUOTED([CPU_SPINWAIT], [$CPU_SPINWAIT]) LD_PRELOAD_VAR="LD_PRELOAD" so="so" +dnl Heap profiling uses the log(3) function. +LIBS="$LIBS -lm" + dnl Platform-specific settings. abi and RPATH can probably be determined dnl programmatically, but doing so is error-prone, which makes it generally dnl not worth the trouble. @@ -553,7 +556,6 @@ fi AC_MSG_CHECKING([configured backtracing method]) AC_MSG_RESULT([$backtrace_method]) if test "x$enable_prof" = "x1" ; then - LIBS="$LIBS -lm" AC_DEFINE([JEMALLOC_PROF], [ ]) fi AC_SUBST([enable_prof]) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index b80c118d..b6a5c23d 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -16,11 +16,9 @@ #define SUBPAGE_CEILING(s) \ (((s) + SUBPAGE_MASK) & ~SUBPAGE_MASK) -#ifdef JEMALLOC_TINY - /* Smallest size class to support. */ -# define LG_TINY_MIN LG_SIZEOF_PTR -# define TINY_MIN (1U << LG_TINY_MIN) -#endif +/* Smallest size class to support. */ +#define LG_TINY_MIN LG_SIZEOF_PTR +#define TINY_MIN (1U << LG_TINY_MIN) /* * Maximum size class that is a multiple of the quantum, but not (necessarily) @@ -85,6 +83,15 @@ typedef struct arena_s arena_t; /* Each element of the chunk map corresponds to one page within the chunk. */ struct arena_chunk_map_s { +#ifndef JEMALLOC_PROF + /* + * Overlay prof_ctx in order to allow it to be referenced by dead code. + * Such antics aren't warranted for per arena data structures, but + * chunk map overhead accounts for a percentage of memory, rather than + * being just a fixed cost. + */ + union { +#endif union { /* * Linkage for run trees. There are two disjoint uses: @@ -103,9 +110,10 @@ struct arena_chunk_map_s { ql_elm(arena_chunk_map_t) ql_link; } u; -#ifdef JEMALLOC_PROF /* Profile counters, used for large object runs. */ prof_ctx_t *prof_ctx; +#ifndef JEMALLOC_PROF + }; /* union { ... }; */ #endif /* @@ -162,10 +170,8 @@ struct arena_chunk_map_s { * ssssssss ssssssss ssss---- ----D-LA */ size_t bits; -#ifdef JEMALLOC_PROF #define CHUNK_MAP_CLASS_SHIFT 4 #define CHUNK_MAP_CLASS_MASK ((size_t)0xff0U) -#endif #define CHUNK_MAP_FLAGS_MASK ((size_t)0xfU) #define CHUNK_MAP_DIRTY ((size_t)0x8U) #define CHUNK_MAP_UNZEROED ((size_t)0x4U) @@ -205,10 +211,8 @@ struct arena_chunk_s { typedef rb_tree(arena_chunk_t) arena_chunk_tree_t; struct arena_run_s { -#ifdef JEMALLOC_DEBUG uint32_t magic; # define ARENA_RUN_MAGIC 0x384adf93 -#endif /* Bin this run is associated with. */ arena_bin_t *bin; @@ -247,13 +251,11 @@ struct arena_bin_info_s { */ bitmap_info_t bitmap_info; -#ifdef JEMALLOC_PROF /* * Offset of first (prof_ctx_t *) in a run header for this bin's size - * class, or 0 if (opt_prof == false). + * class, or 0 if (config_prof == false || opt_prof == false). */ uint32_t ctx0_offset; -#endif /* Offset of first region in a run for this bin's size class. */ uint32_t reg0_offset; @@ -283,17 +285,13 @@ struct arena_bin_s { */ arena_run_tree_t runs; -#ifdef JEMALLOC_STATS /* Bin statistics. */ malloc_bin_stats_t stats; -#endif }; struct arena_s { -#ifdef JEMALLOC_DEBUG uint32_t magic; # define ARENA_MAGIC 0x947d3d24 -#endif /* This arena's index within the arenas array. */ unsigned ind; @@ -314,20 +312,14 @@ struct arena_s { */ malloc_mutex_t lock; -#ifdef JEMALLOC_STATS arena_stats_t stats; -# ifdef JEMALLOC_TCACHE /* * List of tcaches for extant threads associated with this arena. * Stats from these are merged incrementally, and at exit. */ ql_head(tcache_t) tcache_ql; -# endif -#endif -#ifdef JEMALLOC_PROF uint64_t prof_accumbytes; -#endif /* List of dirty-page-containing chunks this arena manages. */ ql_head(arena_chunk_t) chunks_dirty; @@ -455,35 +447,23 @@ extern size_t sspace_max; #define nlclasses (chunk_npages - map_bias) void arena_purge_all(arena_t *arena); -#ifdef JEMALLOC_PROF void arena_prof_accum(arena_t *arena, uint64_t accumbytes); -#endif -#ifdef JEMALLOC_TCACHE void arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, - size_t binind -# ifdef JEMALLOC_PROF - , uint64_t prof_accumbytes -# endif - ); -#endif + size_t binind, uint64_t prof_accumbytes); void *arena_malloc_small(arena_t *arena, size_t size, bool zero); void *arena_malloc_large(arena_t *arena, size_t size, bool zero); void *arena_malloc(size_t size, bool zero); void *arena_palloc(arena_t *arena, size_t size, size_t alloc_size, size_t alignment, bool zero); size_t arena_salloc(const void *ptr); -#ifdef JEMALLOC_PROF void arena_prof_promoted(const void *ptr, size_t size); size_t arena_salloc_demote(const void *ptr); -#endif void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, arena_chunk_map_t *mapelm); void arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr); -#ifdef JEMALLOC_STATS void arena_stats_merge(arena_t *arena, size_t *nactive, size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats); -#endif void *arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, bool zero); void *arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, @@ -499,10 +479,8 @@ bool arena_boot(void); size_t arena_bin_index(arena_t *arena, arena_bin_t *bin); unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr); -# ifdef JEMALLOC_PROF prof_ctx_t *arena_prof_ctx_get(const void *ptr); void arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx); -# endif void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr); #endif @@ -521,7 +499,7 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) unsigned shift, diff, regind; size_t size; - dassert(run->magic == ARENA_RUN_MAGIC); + assert(run->magic == ARENA_RUN_MAGIC); /* * Freeing a pointer lower than region zero can cause assertion * failure. @@ -586,7 +564,6 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) return (regind); } -#ifdef JEMALLOC_PROF JEMALLOC_INLINE prof_ctx_t * arena_prof_ctx_get(const void *ptr) { @@ -594,6 +571,7 @@ arena_prof_ctx_get(const void *ptr) arena_chunk_t *chunk; size_t pageind, mapbits; + cassert(config_prof); assert(ptr != NULL); assert(CHUNK_ADDR2BASE(ptr) != ptr); @@ -612,7 +590,7 @@ arena_prof_ctx_get(const void *ptr) arena_bin_info_t *bin_info = &arena_bin_info[binind]; unsigned regind; - dassert(run->magic == ARENA_RUN_MAGIC); + assert(run->magic == ARENA_RUN_MAGIC); regind = arena_run_regind(run, bin_info, ptr); ret = *(prof_ctx_t **)((uintptr_t)run + bin_info->ctx0_offset + (regind * @@ -630,6 +608,7 @@ arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) arena_chunk_t *chunk; size_t pageind, mapbits; + cassert(config_prof); assert(ptr != NULL); assert(CHUNK_ADDR2BASE(ptr) != ptr); @@ -647,7 +626,7 @@ arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) arena_bin_info_t *bin_info; unsigned regind; - dassert(run->magic == ARENA_RUN_MAGIC); + assert(run->magic == ARENA_RUN_MAGIC); binind = arena_bin_index(chunk->arena, bin); bin_info = &arena_bin_info[binind]; regind = arena_run_regind(run, bin_info, ptr); @@ -659,7 +638,6 @@ arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) } else chunk->map[pageind-map_bias].prof_ctx = ctx; } -#endif JEMALLOC_INLINE void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr) @@ -668,7 +646,7 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr) arena_chunk_map_t *mapelm; assert(arena != NULL); - dassert(arena->magic == ARENA_MAGIC); + assert(arena->magic == ARENA_MAGIC); assert(chunk->arena == arena); assert(ptr != NULL); assert(CHUNK_ADDR2BASE(ptr) != ptr); @@ -678,63 +656,57 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr) assert((mapelm->bits & CHUNK_MAP_ALLOCATED) != 0); if ((mapelm->bits & CHUNK_MAP_LARGE) == 0) { /* Small allocation. */ -#ifdef JEMALLOC_TCACHE tcache_t *tcache; - if ((tcache = tcache_get()) != NULL) + if (config_tcache && (tcache = tcache_get()) != NULL) tcache_dalloc_small(tcache, ptr); else { -#endif arena_run_t *run; arena_bin_t *bin; run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT)); - dassert(run->magic == ARENA_RUN_MAGIC); + assert(run->magic == ARENA_RUN_MAGIC); bin = run->bin; -#ifdef JEMALLOC_DEBUG - { + if (config_debug) { size_t binind = arena_bin_index(arena, bin); - arena_bin_info_t *bin_info = + UNUSED arena_bin_info_t *bin_info = &arena_bin_info[binind]; assert(((uintptr_t)ptr - ((uintptr_t)run + (uintptr_t)bin_info->reg0_offset)) % bin_info->reg_size == 0); } -#endif malloc_mutex_lock(&bin->lock); arena_dalloc_bin(arena, chunk, ptr, mapelm); malloc_mutex_unlock(&bin->lock); -#ifdef JEMALLOC_TCACHE } -#endif } else { -#ifdef JEMALLOC_TCACHE - size_t size = mapelm->bits & ~PAGE_MASK; + if (config_tcache) { + size_t size = mapelm->bits & ~PAGE_MASK; - assert(((uintptr_t)ptr & PAGE_MASK) == 0); - if (size <= tcache_maxclass) { - tcache_t *tcache; + assert(((uintptr_t)ptr & PAGE_MASK) == 0); + if (size <= tcache_maxclass) { + tcache_t *tcache; - if ((tcache = tcache_get()) != NULL) - tcache_dalloc_large(tcache, ptr, size); - else { + if ((tcache = tcache_get()) != NULL) + tcache_dalloc_large(tcache, ptr, size); + else { + malloc_mutex_lock(&arena->lock); + arena_dalloc_large(arena, chunk, ptr); + malloc_mutex_unlock(&arena->lock); + } + } else { malloc_mutex_lock(&arena->lock); arena_dalloc_large(arena, chunk, ptr); malloc_mutex_unlock(&arena->lock); } } else { + assert(((uintptr_t)ptr & PAGE_MASK) == 0); malloc_mutex_lock(&arena->lock); arena_dalloc_large(arena, chunk, ptr); malloc_mutex_unlock(&arena->lock); } -#else - assert(((uintptr_t)ptr & PAGE_MASK) == 0); - malloc_mutex_lock(&arena->lock); - arena_dalloc_large(arena, chunk, ptr); - malloc_mutex_unlock(&arena->lock); -#endif } } #endif diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h index 54b6a3ec..4cc1e80e 100644 --- a/include/jemalloc/internal/chunk.h +++ b/include/jemalloc/internal/chunk.h @@ -28,20 +28,14 @@ #ifdef JEMALLOC_H_EXTERNS extern size_t opt_lg_chunk; -#ifdef JEMALLOC_SWAP extern bool opt_overcommit; -#endif -#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) /* Protects stats_chunks; currently not used for any other purpose. */ extern malloc_mutex_t chunks_mtx; /* Chunk statistics. */ extern chunk_stats_t stats_chunks; -#endif -#ifdef JEMALLOC_IVSALLOC extern rtree_t *chunks_rtree; -#endif extern size_t chunksize; extern size_t chunksize_mask; /* (chunksize - 1). */ diff --git a/include/jemalloc/internal/chunk_dss.h b/include/jemalloc/internal/chunk_dss.h index 6f005222..35cd461a 100644 --- a/include/jemalloc/internal/chunk_dss.h +++ b/include/jemalloc/internal/chunk_dss.h @@ -1,4 +1,3 @@ -#ifdef JEMALLOC_DSS /******************************************************************************/ #ifdef JEMALLOC_H_TYPES @@ -27,4 +26,3 @@ bool chunk_dss_boot(void); #endif /* JEMALLOC_H_INLINES */ /******************************************************************************/ -#endif /* JEMALLOC_DSS */ diff --git a/include/jemalloc/internal/chunk_swap.h b/include/jemalloc/internal/chunk_swap.h index 9faa739f..99a079eb 100644 --- a/include/jemalloc/internal/chunk_swap.h +++ b/include/jemalloc/internal/chunk_swap.h @@ -1,4 +1,3 @@ -#ifdef JEMALLOC_SWAP /******************************************************************************/ #ifdef JEMALLOC_H_TYPES @@ -15,9 +14,7 @@ extern bool swap_enabled; extern bool swap_prezeroed; extern size_t swap_nfds; extern int *swap_fds; -#ifdef JEMALLOC_STATS extern size_t swap_avail; -#endif void *chunk_alloc_swap(size_t size, bool *zero); bool chunk_in_swap(void *chunk); @@ -31,4 +28,3 @@ bool chunk_swap_boot(void); #endif /* JEMALLOC_H_INLINES */ /******************************************************************************/ -#endif /* JEMALLOC_SWAP */ diff --git a/include/jemalloc/internal/ckh.h b/include/jemalloc/internal/ckh.h index 3e4ad4c8..28f171c8 100644 --- a/include/jemalloc/internal/ckh.h +++ b/include/jemalloc/internal/ckh.h @@ -30,10 +30,8 @@ struct ckhc_s { }; struct ckh_s { -#ifdef JEMALLOC_DEBUG #define CKH_MAGIC 0x3af2489d uint32_t magic; -#endif #ifdef CKH_COUNT /* Counters used to get an idea of performance. */ diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h index f1f5eb70..31f9d99b 100644 --- a/include/jemalloc/internal/ctl.h +++ b/include/jemalloc/internal/ctl.h @@ -32,7 +32,6 @@ struct ctl_arena_stats_s { unsigned nthreads; size_t pactive; size_t pdirty; -#ifdef JEMALLOC_STATS arena_stats_t astats; /* Aggregate stats for small size classes, based on bin stats. */ @@ -43,11 +42,9 @@ struct ctl_arena_stats_s { malloc_bin_stats_t *bstats; /* nbins elements. */ malloc_large_stats_t *lstats; /* nlclasses elements. */ -#endif }; struct ctl_stats_s { -#ifdef JEMALLOC_STATS size_t allocated; size_t active; size_t mapped; @@ -61,11 +58,8 @@ struct ctl_stats_s { uint64_t nmalloc; /* huge_nmalloc */ uint64_t ndalloc; /* huge_ndalloc */ } huge; -#endif ctl_arena_stats_t *arenas; /* (narenas + 1) elements. */ -#ifdef JEMALLOC_SWAP size_t swap_avail; -#endif }; #endif /* JEMALLOC_H_STRUCTS */ diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h index 6fe9702b..36af8be8 100644 --- a/include/jemalloc/internal/extent.h +++ b/include/jemalloc/internal/extent.h @@ -9,18 +9,14 @@ typedef struct extent_node_s extent_node_t; /* Tree of extents. */ struct extent_node_s { -#if (defined(JEMALLOC_SWAP) || defined(JEMALLOC_DSS)) /* Linkage for the size/address-ordered tree. */ rb_node(extent_node_t) link_szad; -#endif /* Linkage for the address-ordered tree. */ rb_node(extent_node_t) link_ad; -#ifdef JEMALLOC_PROF /* Profile counters, used for huge objects. */ prof_ctx_t *prof_ctx; -#endif /* Pointer to the extent that this tree node is responsible for. */ void *addr; @@ -34,9 +30,7 @@ typedef rb_tree(extent_node_t) extent_tree_t; /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -#if (defined(JEMALLOC_SWAP) || defined(JEMALLOC_DSS)) rb_proto(, extent_tree_szad_, extent_tree_t, extent_node_t) -#endif rb_proto(, extent_tree_ad_, extent_tree_t, extent_node_t) diff --git a/include/jemalloc/internal/huge.h b/include/jemalloc/internal/huge.h index 66544cf8..3a6b0b87 100644 --- a/include/jemalloc/internal/huge.h +++ b/include/jemalloc/internal/huge.h @@ -9,12 +9,10 @@ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -#ifdef JEMALLOC_STATS /* Huge allocation statistics. */ extern uint64_t huge_nmalloc; extern uint64_t huge_ndalloc; extern size_t huge_allocated; -#endif /* Protects chunk-related data structures. */ extern malloc_mutex_t huge_mtx; @@ -27,10 +25,8 @@ void *huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, size_t alignment, bool zero); void huge_dalloc(void *ptr, bool unmap); size_t huge_salloc(const void *ptr); -#ifdef JEMALLOC_PROF prof_ctx_t *huge_prof_ctx_get(const void *ptr); void huge_prof_ctx_set(const void *ptr, prof_ctx_t *ctx); -#endif bool huge_boot(void); #endif /* JEMALLOC_H_EXTERNS */ diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index a44f0978..8842e4bf 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -35,6 +35,125 @@ #include "jemalloc/internal/private_namespace.h" +#ifdef JEMALLOC_CC_SILENCE +#define UNUSED JEMALLOC_ATTR(unused) +#else +#define UNUSED +#endif + +static const bool config_debug = +#ifdef JEMALLOC_DEBUG + true +#else + false +#endif + ; +static const bool config_dss = +#ifdef JEMALLOC_DSS + true +#else + false +#endif + ; +static const bool config_dynamic_page_shift = +#ifdef JEMALLOC_DYNAMIC_PAGE_SHIFT + true +#else + false +#endif + ; +static const bool config_fill = +#ifdef JEMALLOC_FILL + true +#else + false +#endif + ; +static const bool config_lazy_lock = +#ifdef JEMALLOC_LAZY_LOCK + true +#else + false +#endif + ; +static const bool config_prof = +#ifdef JEMALLOC_PROF + true +#else + false +#endif + ; +static const bool config_prof_libgcc = +#ifdef JEMALLOC_PROF_LIBGCC + true +#else + false +#endif + ; +static const bool config_prof_libunwind = +#ifdef JEMALLOC_PROF_LIBUNWIND + true +#else + false +#endif + ; +static const bool config_stats = +#ifdef JEMALLOC_STATS + true +#else + false +#endif + ; +static const bool config_swap = +#ifdef JEMALLOC_SWAP + true +#else + false +#endif + ; +static const bool config_sysv = +#ifdef JEMALLOC_SYSV + true +#else + false +#endif + ; +static const bool config_tcache = +#ifdef JEMALLOC_TCACHE + true +#else + false +#endif + ; +static const bool config_tiny = +#ifdef JEMALLOC_TINY + true +#else + false +#endif + ; +static const bool config_tls = +#ifdef JEMALLOC_TLS + true +#else + false +#endif + ; +static const bool config_xmalloc = +#ifdef JEMALLOC_XMALLOC + true +#else + false +#endif + ; +static const bool config_ivsalloc = +#ifdef JEMALLOC_IVSALLOC + true +#else + false +#endif + ; + #if (defined(JEMALLOC_OSATOMIC) || defined(JEMALLOC_OSSPIN)) #include #endif @@ -82,11 +201,11 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); # endif #endif -#ifdef JEMALLOC_DEBUG -# define dassert(e) assert(e) -#else -# define dassert(e) -#endif +/* Use to assert a particular configuration, e.g., cassert(config_debug). */ +#define cassert(c) do { \ + if ((c) == false) \ + assert(false); \ +} while (0) /* * jemalloc can conceptually be broken into components (arena, tcache, etc.), @@ -265,30 +384,20 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); #endif #include "jemalloc/internal/prof.h" -#ifdef JEMALLOC_STATS typedef struct { uint64_t allocated; uint64_t deallocated; } thread_allocated_t; -#endif #undef JEMALLOC_H_STRUCTS /******************************************************************************/ #define JEMALLOC_H_EXTERNS extern bool opt_abort; -#ifdef JEMALLOC_FILL extern bool opt_junk; -#endif -#ifdef JEMALLOC_SYSV extern bool opt_sysv; -#endif -#ifdef JEMALLOC_XMALLOC extern bool opt_xmalloc; -#endif -#ifdef JEMALLOC_FILL extern bool opt_zero; -#endif extern size_t opt_narenas; #ifdef DYNAMIC_PAGE_SHIFT @@ -327,8 +436,7 @@ extern __thread arena_t *arenas_tls JEMALLOC_ATTR(tls_model("initial-exec")); extern arena_t **arenas; extern unsigned narenas; -#ifdef JEMALLOC_STATS -# ifndef NO_TLS +#ifndef NO_TLS extern __thread thread_allocated_t thread_allocated_tls; # define ALLOCATED_GET() (thread_allocated_tls.allocated) # define ALLOCATEDP_GET() (&thread_allocated_tls.allocated) @@ -338,10 +446,7 @@ extern __thread thread_allocated_t thread_allocated_tls; thread_allocated_tls.allocated += a; \ thread_allocated_tls.deallocated += d; \ } while (0) -# else -extern pthread_key_t thread_allocated_tsd; -thread_allocated_t *thread_allocated_get_hard(void); - +#else # define ALLOCATED_GET() (thread_allocated_get()->allocated) # define ALLOCATEDP_GET() (&thread_allocated_get()->allocated) # define DEALLOCATED_GET() (thread_allocated_get()->deallocated) @@ -351,8 +456,9 @@ thread_allocated_t *thread_allocated_get_hard(void); thread_allocated->allocated += (a); \ thread_allocated->deallocated += (d); \ } while (0) -# endif #endif +extern pthread_key_t thread_allocated_tsd; +thread_allocated_t *thread_allocated_get_hard(void); arena_t *arenas_extend(unsigned ind); arena_t *choose_arena_hard(void); @@ -403,9 +509,7 @@ size_t s2u(size_t size); size_t sa2u(size_t size, size_t alignment, size_t *run_size_p); void malloc_write(const char *s); arena_t *choose_arena(void); -# if (defined(JEMALLOC_STATS) && defined(NO_TLS)) thread_allocated_t *thread_allocated_get(void); -# endif #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) @@ -565,7 +669,6 @@ choose_arena(void) return (ret); } -#if (defined(JEMALLOC_STATS) && defined(NO_TLS)) JEMALLOC_INLINE thread_allocated_t * thread_allocated_get(void) { @@ -577,7 +680,6 @@ thread_allocated_get(void) return (thread_allocated); } #endif -#endif #include "jemalloc/internal/bitmap.h" #include "jemalloc/internal/rtree.h" @@ -593,9 +695,7 @@ void *imalloc(size_t size); void *icalloc(size_t size); void *ipalloc(size_t usize, size_t alignment, bool zero); size_t isalloc(const void *ptr); -# ifdef JEMALLOC_IVSALLOC size_t ivsalloc(const void *ptr); -# endif void idalloc(void *ptr); void *iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, bool no_move); @@ -674,20 +774,18 @@ isalloc(const void *ptr) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk != ptr) { /* Region. */ - dassert(chunk->arena->magic == ARENA_MAGIC); + assert(chunk->arena->magic == ARENA_MAGIC); -#ifdef JEMALLOC_PROF - ret = arena_salloc_demote(ptr); -#else - ret = arena_salloc(ptr); -#endif + if (config_prof) + ret = arena_salloc_demote(ptr); + else + ret = arena_salloc(ptr); } else ret = huge_salloc(ptr); return (ret); } -#ifdef JEMALLOC_IVSALLOC JEMALLOC_INLINE size_t ivsalloc(const void *ptr) { @@ -698,7 +796,6 @@ ivsalloc(const void *ptr) return (isalloc(ptr)); } -#endif JEMALLOC_INLINE void idalloc(void *ptr) diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h index 62947ced..6a7b4fce 100644 --- a/include/jemalloc/internal/mutex.h +++ b/include/jemalloc/internal/mutex.h @@ -3,14 +3,14 @@ #ifdef JEMALLOC_OSSPIN typedef OSSpinLock malloc_mutex_t; +#define MALLOC_MUTEX_INITIALIZER 0 #else typedef pthread_mutex_t malloc_mutex_t; -#endif - -#ifdef PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP -# define MALLOC_MUTEX_INITIALIZER PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP -#else -# define MALLOC_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER +# ifdef PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP +# define MALLOC_MUTEX_INITIALIZER PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP +# else +# define MALLOC_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER +# endif #endif #endif /* JEMALLOC_H_TYPES */ diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index e9064ba6..d4700808 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -1,4 +1,3 @@ -#ifdef JEMALLOC_PROF /******************************************************************************/ #ifdef JEMALLOC_H_TYPES @@ -297,6 +296,8 @@ prof_sample_threshold_update(prof_tdata_t *prof_tdata) uint64_t r; double u; + cassert(config_prof); + /* * Compute sample threshold as a geometrically distributed random * variable with mean (2^opt_lg_prof_sample). @@ -329,12 +330,13 @@ prof_ctx_get(const void *ptr) prof_ctx_t *ret; arena_chunk_t *chunk; + cassert(config_prof); assert(ptr != NULL); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk != ptr) { /* Region. */ - dassert(chunk->arena->magic == ARENA_MAGIC); + assert(chunk->arena->magic == ARENA_MAGIC); ret = arena_prof_ctx_get(ptr); } else @@ -348,12 +350,13 @@ prof_ctx_set(const void *ptr, prof_ctx_t *ctx) { arena_chunk_t *chunk; + cassert(config_prof); assert(ptr != NULL); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk != ptr) { /* Region. */ - dassert(chunk->arena->magic == ARENA_MAGIC); + assert(chunk->arena->magic == ARENA_MAGIC); arena_prof_ctx_set(ptr, ctx); } else @@ -365,6 +368,7 @@ prof_sample_accum_update(size_t size) { prof_tdata_t *prof_tdata; + cassert(config_prof); /* Sampling logic is unnecessary if the interval is 1. */ assert(opt_lg_prof_sample != 0); @@ -391,6 +395,7 @@ JEMALLOC_INLINE void prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt) { + cassert(config_prof); assert(ptr != NULL); assert(size == isalloc(ptr)); @@ -437,6 +442,7 @@ prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt, { prof_thr_cnt_t *told_cnt; + cassert(config_prof); assert(ptr != NULL || (uintptr_t)cnt <= (uintptr_t)1U); if (ptr != NULL) { @@ -510,6 +516,8 @@ prof_free(const void *ptr, size_t size) { prof_ctx_t *ctx = prof_ctx_get(ptr); + cassert(config_prof); + if ((uintptr_t)ctx > (uintptr_t)1) { assert(size == isalloc(ptr)); prof_thr_cnt_t *tcnt = prof_lookup(ctx->bt); @@ -544,4 +552,3 @@ prof_free(const void *ptr, size_t size) #endif /* JEMALLOC_H_INLINES */ /******************************************************************************/ -#endif /* JEMALLOC_PROF */ diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h index 2a9b31d9..64ba4bd7 100644 --- a/include/jemalloc/internal/stats.h +++ b/include/jemalloc/internal/stats.h @@ -3,23 +3,16 @@ #define UMAX2S_BUFSIZE 65 -#ifdef JEMALLOC_STATS typedef struct tcache_bin_stats_s tcache_bin_stats_t; typedef struct malloc_bin_stats_s malloc_bin_stats_t; typedef struct malloc_large_stats_s malloc_large_stats_t; typedef struct arena_stats_s arena_stats_t; -#endif -#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) typedef struct chunk_stats_s chunk_stats_t; -#endif #endif /* JEMALLOC_H_TYPES */ /******************************************************************************/ #ifdef JEMALLOC_H_STRUCTS -#ifdef JEMALLOC_STATS - -#ifdef JEMALLOC_TCACHE struct tcache_bin_stats_s { /* * Number of allocation requests that corresponded to the size of this @@ -27,7 +20,6 @@ struct tcache_bin_stats_s { */ uint64_t nrequests; }; -#endif struct malloc_bin_stats_s { /* @@ -52,13 +44,11 @@ struct malloc_bin_stats_s { */ uint64_t nrequests; -#ifdef JEMALLOC_TCACHE /* Number of tcache fills from this bin. */ uint64_t nfills; /* Number of tcache flushes to this bin. */ uint64_t nflushes; -#endif /* Total number of runs created for this bin's size class. */ uint64_t nruns; @@ -127,14 +117,10 @@ struct arena_stats_s { */ malloc_large_stats_t *lstats; }; -#endif /* JEMALLOC_STATS */ -#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) struct chunk_stats_s { -# ifdef JEMALLOC_STATS /* Number of chunks that were allocated. */ uint64_t nchunks; -# endif /* High-water mark for number of chunks allocated. */ size_t highchunks; @@ -146,7 +132,6 @@ struct chunk_stats_s { */ size_t curchunks; }; -#endif /* JEMALLOC_STATS */ #endif /* JEMALLOC_H_STRUCTS */ /******************************************************************************/ @@ -154,24 +139,19 @@ struct chunk_stats_s { extern bool opt_stats_print; -#ifdef JEMALLOC_STATS extern size_t stats_cactive; -#endif char *u2s(uint64_t x, unsigned base, char *s); -#ifdef JEMALLOC_STATS void malloc_cprintf(void (*write)(void *, const char *), void *cbopaque, const char *format, ...) JEMALLOC_ATTR(format(printf, 3, 4)); void malloc_printf(const char *format, ...) JEMALLOC_ATTR(format(printf, 1, 2)); -#endif void stats_print(void (*write)(void *, const char *), void *cbopaque, const char *opts); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ #ifdef JEMALLOC_H_INLINES -#ifdef JEMALLOC_STATS #ifndef JEMALLOC_ENABLE_INLINE size_t stats_cactive_get(void); @@ -202,6 +182,5 @@ stats_cactive_sub(size_t size) } #endif -#endif /* JEMALLOC_STATS */ #endif /* JEMALLOC_H_INLINES */ /******************************************************************************/ diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index da3c68c5..0855d32e 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -42,9 +42,7 @@ struct tcache_bin_info_s { }; struct tcache_bin_s { -# ifdef JEMALLOC_STATS tcache_bin_stats_t tstats; -# endif int low_water; /* Min # cached since last GC. */ unsigned lg_fill_div; /* Fill (ncached_max >> lg_fill_div). */ unsigned ncached; /* # of cached objects. */ @@ -52,12 +50,8 @@ struct tcache_bin_s { }; struct tcache_s { -# ifdef JEMALLOC_STATS ql_elm(tcache_t) link; /* Used for aggregating stats. */ -# endif -# ifdef JEMALLOC_PROF uint64_t prof_accumbytes;/* Cleared after arena_prof_accum() */ -# endif arena_t *arena; /* This thread's arena. */ unsigned ev_cnt; /* Event count since incremental GC. */ unsigned next_gc_bin; /* Next bin to GC. */ @@ -109,23 +103,15 @@ extern size_t tcache_maxclass; /* Number of tcache allocation/deallocation events between incremental GCs. */ extern unsigned tcache_gc_incr; -void tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem -#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) - , tcache_t *tcache -#endif - ); -void tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem -#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) - , tcache_t *tcache -#endif - ); +void tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem, + tcache_t *tcache); +void tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem, + tcache_t *tcache); tcache_t *tcache_create(arena_t *arena); void *tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind); void tcache_destroy(tcache_t *tcache); -#ifdef JEMALLOC_STATS void tcache_stats_merge(tcache_t *tcache, arena_t *arena); -#endif bool tcache_boot(void); #endif /* JEMALLOC_H_EXTERNS */ @@ -195,19 +181,11 @@ tcache_event(tcache_t *tcache) if (binind < nbins) { tcache_bin_flush_small(tbin, binind, tbin->ncached - tbin->low_water + - (tbin->low_water >> 2) -#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) - , tcache -#endif - ); + (tbin->low_water >> 2), tcache); } else { tcache_bin_flush_large(tbin, binind, tbin->ncached - tbin->low_water + - (tbin->low_water >> 2) -#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) - , tcache -#endif - ); + (tbin->low_water >> 2), tcache); } /* * Reduce fill count by 2X. Limit lg_fill_div such that @@ -268,21 +246,19 @@ tcache_alloc_small(tcache_t *tcache, size_t size, bool zero) assert(arena_salloc(ret) == arena_bin_info[binind].reg_size); if (zero == false) { -#ifdef JEMALLOC_FILL - if (opt_junk) - memset(ret, 0xa5, size); - else if (opt_zero) - memset(ret, 0, size); -#endif + if (config_fill) { + if (opt_junk) + memset(ret, 0xa5, size); + else if (opt_zero) + memset(ret, 0, size); + } } else memset(ret, 0, size); -#ifdef JEMALLOC_STATS - tbin->tstats.nrequests++; -#endif -#ifdef JEMALLOC_PROF - tcache->prof_accumbytes += arena_bin_info[binind].reg_size; -#endif + if (config_stats) + tbin->tstats.nrequests++; + if (config_prof) + tcache->prof_accumbytes += arena_bin_info[binind].reg_size; tcache_event(tcache); return (ret); } @@ -309,28 +285,28 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) if (ret == NULL) return (NULL); } else { -#ifdef JEMALLOC_PROF - arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret); - size_t pageind = (((uintptr_t)ret - (uintptr_t)chunk) >> - PAGE_SHIFT); - chunk->map[pageind-map_bias].bits &= ~CHUNK_MAP_CLASS_MASK; -#endif + if (config_prof) { + arena_chunk_t *chunk = + (arena_chunk_t *)CHUNK_ADDR2BASE(ret); + size_t pageind = (((uintptr_t)ret - (uintptr_t)chunk) >> + PAGE_SHIFT); + chunk->map[pageind-map_bias].bits &= + ~CHUNK_MAP_CLASS_MASK; + } if (zero == false) { -#ifdef JEMALLOC_FILL - if (opt_junk) - memset(ret, 0xa5, size); - else if (opt_zero) - memset(ret, 0, size); -#endif + if (config_fill) { + if (opt_junk) + memset(ret, 0xa5, size); + else if (opt_zero) + memset(ret, 0, size); + } } else memset(ret, 0, size); -#ifdef JEMALLOC_STATS - tbin->tstats.nrequests++; -#endif -#ifdef JEMALLOC_PROF - tcache->prof_accumbytes += size; -#endif + if (config_stats) + tbin->tstats.nrequests++; + if (config_prof) + tcache->prof_accumbytes += size; } tcache_event(tcache); @@ -357,26 +333,20 @@ tcache_dalloc_small(tcache_t *tcache, void *ptr) mapelm = &chunk->map[pageind-map_bias]; run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT)); - dassert(run->magic == ARENA_RUN_MAGIC); + assert(run->magic == ARENA_RUN_MAGIC); bin = run->bin; binind = ((uintptr_t)bin - (uintptr_t)&arena->bins) / sizeof(arena_bin_t); assert(binind < nbins); -#ifdef JEMALLOC_FILL - if (opt_junk) + if (config_fill && opt_junk) memset(ptr, 0x5a, arena_bin_info[binind].reg_size); -#endif tbin = &tcache->tbins[binind]; tbin_info = &tcache_bin_info[binind]; if (tbin->ncached == tbin_info->ncached_max) { tcache_bin_flush_small(tbin, binind, (tbin_info->ncached_max >> - 1) -#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) - , tcache -#endif - ); + 1), tcache); } assert(tbin->ncached < tbin_info->ncached_max); tbin->avail[tbin->ncached] = ptr; @@ -403,20 +373,14 @@ tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size) pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; binind = nbins + (size >> PAGE_SHIFT) - 1; -#ifdef JEMALLOC_FILL - if (opt_junk) + if (config_fill && opt_junk) memset(ptr, 0x5a, size); -#endif tbin = &tcache->tbins[binind]; tbin_info = &tcache_bin_info[binind]; if (tbin->ncached == tbin_info->ncached_max) { tcache_bin_flush_large(tbin, binind, (tbin_info->ncached_max >> - 1) -#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) - , tcache -#endif - ); + 1), tcache); } assert(tbin->ncached < tbin_info->ncached_max); tbin->avail[tbin->ncached] = ptr; diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index 9ac7e1c2..d8052e2b 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -48,9 +48,11 @@ /* Defined if __attribute__((...)) syntax is supported. */ #undef JEMALLOC_HAVE_ATTR #ifdef JEMALLOC_HAVE_ATTR -# define JEMALLOC_ATTR(s) __attribute__((s)) +# define JEMALLOC_CATTR(s, a) __attribute__((s)) +# define JEMALLOC_ATTR(s) JEMALLOC_CATTR(s,) #else -# define JEMALLOC_ATTR(s) +# define JEMALLOC_CATTR(s, a) a +# define JEMALLOC_ATTR(s) JEMALLOC_CATTR(s,) #endif /* JEMALLOC_CC_SILENCE enables code that silences unuseful compiler warnings. */ diff --git a/src/arena.c b/src/arena.c index d166ca1e..356b628d 100644 --- a/src/arena.c +++ b/src/arena.c @@ -188,9 +188,7 @@ static bool arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, static bool arena_ralloc_large(void *ptr, size_t oldsize, size_t size, size_t extra, bool zero); static bool small_size2bin_init(void); -#ifdef JEMALLOC_DEBUG static void small_size2bin_validate(void); -#endif static bool small_size2bin_init_hard(void); static size_t bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size); @@ -211,8 +209,8 @@ arena_run_comp(arena_chunk_map_t *a, arena_chunk_map_t *b) } /* Generate red-black tree functions. */ -rb_gen(static JEMALLOC_ATTR(unused), arena_run_tree_, arena_run_tree_t, - arena_chunk_map_t, u.rb_link, arena_run_comp) +rb_gen(static UNUSED, arena_run_tree_, arena_run_tree_t, arena_chunk_map_t, + u.rb_link, arena_run_comp) static inline int arena_avail_comp(arena_chunk_map_t *a, arena_chunk_map_t *b) @@ -246,8 +244,8 @@ arena_avail_comp(arena_chunk_map_t *a, arena_chunk_map_t *b) } /* Generate red-black tree functions. */ -rb_gen(static JEMALLOC_ATTR(unused), arena_avail_tree_, arena_avail_tree_t, - arena_chunk_map_t, u.rb_link, arena_avail_comp) +rb_gen(static UNUSED, arena_avail_tree_, arena_avail_tree_t, arena_chunk_map_t, + u.rb_link, arena_avail_comp) static inline void * arena_run_reg_alloc(arena_run_t *run, arena_bin_info_t *bin_info) @@ -257,7 +255,7 @@ arena_run_reg_alloc(arena_run_t *run, arena_bin_info_t *bin_info) bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run + (uintptr_t)bin_info->bitmap_offset); - dassert(run->magic == ARENA_RUN_MAGIC); + assert(run->magic == ARENA_RUN_MAGIC); assert(run->nfree > 0); assert(bitmap_full(bitmap, &bin_info->bitmap_info) == false); @@ -295,17 +293,16 @@ arena_run_reg_dalloc(arena_run_t *run, void *ptr) run->nfree++; } -#ifdef JEMALLOC_DEBUG static inline void arena_chunk_validate_zeroed(arena_chunk_t *chunk, size_t run_ind) { size_t i; - size_t *p = (size_t *)((uintptr_t)chunk + (run_ind << PAGE_SHIFT)); + UNUSED size_t *p = (size_t *)((uintptr_t)chunk + (run_ind << + PAGE_SHIFT)); for (i = 0; i < PAGE_SIZE / sizeof(size_t); i++) assert(p[i] == 0); } -#endif static void arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, @@ -315,9 +312,6 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, size_t old_ndirty, run_ind, total_pages, need_pages, rem_pages, i; size_t flag_dirty; arena_avail_tree_t *runs_avail; -#ifdef JEMALLOC_STATS - size_t cactive_diff; -#endif chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); old_ndirty = chunk->ndirty; @@ -336,13 +330,17 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, rem_pages = total_pages - need_pages; arena_avail_tree_remove(runs_avail, &chunk->map[run_ind-map_bias]); -#ifdef JEMALLOC_STATS - /* Update stats_cactive if nactive is crossing a chunk multiple. */ - cactive_diff = CHUNK_CEILING((arena->nactive + need_pages) << - PAGE_SHIFT) - CHUNK_CEILING(arena->nactive << PAGE_SHIFT); - if (cactive_diff != 0) - stats_cactive_add(cactive_diff); -#endif + if (config_stats) { + /* + * Update stats_cactive if nactive is crossing a chunk + * multiple. + */ + size_t cactive_diff = CHUNK_CEILING((arena->nactive + + need_pages) << PAGE_SHIFT) - CHUNK_CEILING(arena->nactive << + PAGE_SHIFT); + if (cactive_diff != 0) + stats_cactive_add(cactive_diff); + } arena->nactive += need_pages; /* Keep track of trailing unused pages for later use. */ @@ -390,13 +388,10 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, chunk + ((run_ind+i) << PAGE_SHIFT)), 0, PAGE_SIZE); - } -#ifdef JEMALLOC_DEBUG - else { + } else if (config_debug) { arena_chunk_validate_zeroed( chunk, run_ind+i); } -#endif } } else { /* @@ -427,40 +422,34 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, chunk->map[run_ind-map_bias].bits = (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_UNZEROED) | CHUNK_MAP_ALLOCATED | flag_dirty; -#ifdef JEMALLOC_DEBUG /* * The first page will always be dirtied during small run * initialization, so a validation failure here would not * actually cause an observable failure. */ - if (flag_dirty == 0 && + if (config_debug && flag_dirty == 0 && (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_UNZEROED) == 0) arena_chunk_validate_zeroed(chunk, run_ind); -#endif for (i = 1; i < need_pages - 1; i++) { chunk->map[run_ind+i-map_bias].bits = (i << PAGE_SHIFT) | (chunk->map[run_ind+i-map_bias].bits & CHUNK_MAP_UNZEROED) | CHUNK_MAP_ALLOCATED; -#ifdef JEMALLOC_DEBUG - if (flag_dirty == 0 && + if (config_debug && flag_dirty == 0 && (chunk->map[run_ind+i-map_bias].bits & CHUNK_MAP_UNZEROED) == 0) arena_chunk_validate_zeroed(chunk, run_ind+i); -#endif } chunk->map[run_ind+need_pages-1-map_bias].bits = ((need_pages - 1) << PAGE_SHIFT) | (chunk->map[run_ind+need_pages-1-map_bias].bits & CHUNK_MAP_UNZEROED) | CHUNK_MAP_ALLOCATED | flag_dirty; -#ifdef JEMALLOC_DEBUG - if (flag_dirty == 0 && + if (config_debug && flag_dirty == 0 && (chunk->map[run_ind+need_pages-1-map_bias].bits & CHUNK_MAP_UNZEROED) == 0) { arena_chunk_validate_zeroed(chunk, run_ind+need_pages-1); } -#endif } } @@ -498,9 +487,8 @@ arena_chunk_alloc(arena_t *arena) malloc_mutex_lock(&arena->lock); if (chunk == NULL) return (NULL); -#ifdef JEMALLOC_STATS - arena->stats.mapped += chunksize; -#endif + if (config_stats) + arena->stats.mapped += chunksize; chunk->arena = arena; ql_elm_new(chunk, link_dirty); @@ -526,13 +514,10 @@ arena_chunk_alloc(arena_t *arena) if (zero == false) { for (i = map_bias+1; i < chunk_npages-1; i++) chunk->map[i-map_bias].bits = unzeroed; - } -#ifdef JEMALLOC_DEBUG - else { + } else if (config_debug) { for (i = map_bias+1; i < chunk_npages-1; i++) assert(chunk->map[i-map_bias].bits == unzeroed); } -#endif chunk->map[chunk_npages-1-map_bias].bits = arena_maxclass | unzeroed; @@ -571,9 +556,8 @@ arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk) malloc_mutex_unlock(&arena->lock); chunk_dealloc((void *)spare, chunksize, true); malloc_mutex_lock(&arena->lock); -#ifdef JEMALLOC_STATS - arena->stats.mapped -= chunksize; -#endif + if (config_stats) + arena->stats.mapped -= chunksize; } else arena->spare = chunk; } @@ -677,12 +661,8 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) ql_head(arena_chunk_map_t) mapelms; arena_chunk_map_t *mapelm; size_t pageind, flag_unzeroed; -#ifdef JEMALLOC_DEBUG size_t ndirty; -#endif -#ifdef JEMALLOC_STATS size_t nmadvise; -#endif ql_new(&mapelms); @@ -692,10 +672,7 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) * madvise(..., MADV_DONTNEED) results in zero-filled pages for anonymous * mappings, but not for file-backed mappings. */ -# ifdef JEMALLOC_SWAP - swap_enabled ? CHUNK_MAP_UNZEROED : -# endif - 0; + (config_swap && swap_enabled) ? CHUNK_MAP_UNZEROED : 0; #else CHUNK_MAP_UNZEROED; #endif @@ -730,9 +707,6 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) assert(pageind + npages <= chunk_npages); if (mapelm->bits & CHUNK_MAP_DIRTY) { size_t i; -#ifdef JEMALLOC_STATS - size_t cactive_diff; -#endif arena_avail_tree_remove( &arena->runs_avail_dirty, mapelm); @@ -755,17 +729,19 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) CHUNK_MAP_ALLOCATED; } -#ifdef JEMALLOC_STATS - /* - * Update stats_cactive if nactive is crossing a - * chunk multiple. - */ - cactive_diff = CHUNK_CEILING((arena->nactive + - npages) << PAGE_SHIFT) - - CHUNK_CEILING(arena->nactive << PAGE_SHIFT); - if (cactive_diff != 0) - stats_cactive_add(cactive_diff); -#endif + if (config_stats) { + /* + * Update stats_cactive if nactive is + * crossing a chunk multiple. + */ + size_t cactive_diff = + CHUNK_CEILING((arena->nactive + + npages) << PAGE_SHIFT) - + CHUNK_CEILING(arena->nactive << + PAGE_SHIFT); + if (cactive_diff != 0) + stats_cactive_add(cactive_diff); + } arena->nactive += npages; /* Append to list for later processing. */ ql_elm_new(mapelm, u.ql_link); @@ -782,7 +758,7 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) chunk + (uintptr_t)(pageind << PAGE_SHIFT)); assert((mapelm->bits >> PAGE_SHIFT) == 0); - dassert(run->magic == ARENA_RUN_MAGIC); + assert(run->magic == ARENA_RUN_MAGIC); size_t binind = arena_bin_index(arena, run->bin); arena_bin_info_t *bin_info = @@ -793,53 +769,45 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) } assert(pageind == chunk_npages); -#ifdef JEMALLOC_DEBUG - ndirty = chunk->ndirty; -#endif -#ifdef JEMALLOC_STATS - arena->stats.purged += chunk->ndirty; -#endif + if (config_debug) + ndirty = chunk->ndirty; + if (config_stats) + arena->stats.purged += chunk->ndirty; arena->ndirty -= chunk->ndirty; chunk->ndirty = 0; ql_remove(&arena->chunks_dirty, chunk, link_dirty); chunk->dirtied = false; malloc_mutex_unlock(&arena->lock); -#ifdef JEMALLOC_STATS - nmadvise = 0; -#endif + if (config_stats) + nmadvise = 0; ql_foreach(mapelm, &mapelms, u.ql_link) { size_t pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) / sizeof(arena_chunk_map_t)) + map_bias; size_t npages = mapelm->bits >> PAGE_SHIFT; assert(pageind + npages <= chunk_npages); -#ifdef JEMALLOC_DEBUG assert(ndirty >= npages); - ndirty -= npages; -#endif + if (config_debug) + ndirty -= npages; #ifdef JEMALLOC_PURGE_MADVISE_DONTNEED - madvise((void *)((uintptr_t)chunk + (pageind << PAGE_SHIFT)), - (npages << PAGE_SHIFT), MADV_DONTNEED); +# define MADV_PURGE MADV_DONTNEED #elif defined(JEMALLOC_PURGE_MADVISE_FREE) - madvise((void *)((uintptr_t)chunk + (pageind << PAGE_SHIFT)), - (npages << PAGE_SHIFT), MADV_FREE); +# define MADV_PURGE MADV_FREE #else # error "No method defined for purging unused dirty pages." #endif - -#ifdef JEMALLOC_STATS - nmadvise++; -#endif + madvise((void *)((uintptr_t)chunk + (pageind << PAGE_SHIFT)), + (npages << PAGE_SHIFT), MADV_PURGE); +#undef MADV_PURGE + if (config_stats) + nmadvise++; } -#ifdef JEMALLOC_DEBUG assert(ndirty == 0); -#endif malloc_mutex_lock(&arena->lock); -#ifdef JEMALLOC_STATS - arena->stats.nmadvise += nmadvise; -#endif + if (config_stats) + arena->stats.nmadvise += nmadvise; /* Deallocate runs. */ for (mapelm = ql_first(&mapelms); mapelm != NULL; @@ -859,23 +827,22 @@ arena_purge(arena_t *arena, bool all) { arena_chunk_t *chunk; size_t npurgatory; -#ifdef JEMALLOC_DEBUG - size_t ndirty = 0; + if (config_debug) { + size_t ndirty = 0; - ql_foreach(chunk, &arena->chunks_dirty, link_dirty) { - assert(chunk->dirtied); - ndirty += chunk->ndirty; + ql_foreach(chunk, &arena->chunks_dirty, link_dirty) { + assert(chunk->dirtied); + ndirty += chunk->ndirty; + } + assert(ndirty == arena->ndirty); } - assert(ndirty == arena->ndirty); -#endif assert(arena->ndirty > arena->npurgatory || all); assert(arena->ndirty - arena->npurgatory > chunk_npages || all); assert((arena->nactive >> opt_lg_dirty_mult) < (arena->ndirty - arena->npurgatory) || all); -#ifdef JEMALLOC_STATS - arena->stats.npurge++; -#endif + if (config_stats) + arena->stats.npurge++; /* * Compute the minimum number of pages that this thread should try to @@ -957,9 +924,6 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) arena_chunk_t *chunk; size_t size, run_ind, run_pages, flag_dirty; arena_avail_tree_t *runs_avail; -#ifdef JEMALLOC_STATS - size_t cactive_diff; -#endif chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) @@ -981,13 +945,17 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) size = bin_info->run_size; } run_pages = (size >> PAGE_SHIFT); -#ifdef JEMALLOC_STATS - /* Update stats_cactive if nactive is crossing a chunk multiple. */ - cactive_diff = CHUNK_CEILING(arena->nactive << PAGE_SHIFT) - - CHUNK_CEILING((arena->nactive - run_pages) << PAGE_SHIFT); - if (cactive_diff != 0) - stats_cactive_sub(cactive_diff); -#endif + if (config_stats) { + /* + * Update stats_cactive if nactive is crossing a chunk + * multiple. + */ + size_t cactive_diff = CHUNK_CEILING(arena->nactive << + PAGE_SHIFT) - CHUNK_CEILING((arena->nactive - run_pages) << + PAGE_SHIFT); + if (cactive_diff != 0) + stats_cactive_sub(cactive_diff); + } arena->nactive -= run_pages; /* @@ -1144,9 +1112,8 @@ arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, | flag_dirty | (chunk->map[pageind-map_bias].bits & CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; -#ifdef JEMALLOC_DEBUG - { - size_t tail_npages = newsize >> PAGE_SHIFT; + if (config_debug) { + UNUSED size_t tail_npages = newsize >> PAGE_SHIFT; assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias] .bits & ~PAGE_MASK) == 0); assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias] @@ -1156,7 +1123,6 @@ arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias] .bits & CHUNK_MAP_ALLOCATED) != 0); } -#endif chunk->map[pageind+head_npages-map_bias].bits = newsize | flag_dirty | (chunk->map[pageind+head_npages-map_bias].bits & CHUNK_MAP_FLAGS_MASK) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; @@ -1231,9 +1197,8 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin) run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT)); -#ifdef JEMALLOC_STATS - bin->stats.reruns++; -#endif + if (config_stats) + bin->stats.reruns++; return (run); } /* No existing runs have any space available. */ @@ -1255,20 +1220,19 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin) run->nextind = 0; run->nfree = bin_info->nregs; bitmap_init(bitmap, &bin_info->bitmap_info); -#ifdef JEMALLOC_DEBUG - run->magic = ARENA_RUN_MAGIC; -#endif + if (config_debug) + run->magic = ARENA_RUN_MAGIC; } malloc_mutex_unlock(&arena->lock); /********************************/ malloc_mutex_lock(&bin->lock); if (run != NULL) { -#ifdef JEMALLOC_STATS - bin->stats.nruns++; - bin->stats.curruns++; - if (bin->stats.curruns > bin->stats.highruns) - bin->stats.highruns = bin->stats.curruns; -#endif + if (config_stats) { + bin->stats.nruns++; + bin->stats.curruns++; + if (bin->stats.curruns > bin->stats.highruns) + bin->stats.highruns = bin->stats.curruns; + } return (run); } @@ -1291,9 +1255,8 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin) run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT)); -#ifdef JEMALLOC_STATS - bin->stats.reruns++; -#endif + if (config_stats) + bin->stats.reruns++; return (run); } @@ -1318,7 +1281,7 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin) * Another thread updated runcur while this one ran without the * bin lock in arena_bin_nonfull_run_get(). */ - dassert(bin->runcur->magic == ARENA_RUN_MAGIC); + assert(bin->runcur->magic == ARENA_RUN_MAGIC); assert(bin->runcur->nfree > 0); ret = arena_run_reg_alloc(bin->runcur, bin_info); if (run != NULL) { @@ -1346,13 +1309,12 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin) bin->runcur = run; - dassert(bin->runcur->magic == ARENA_RUN_MAGIC); + assert(bin->runcur->magic == ARENA_RUN_MAGIC); assert(bin->runcur->nfree > 0); return (arena_run_reg_alloc(bin->runcur, bin_info)); } -#ifdef JEMALLOC_PROF void arena_prof_accum(arena_t *arena, uint64_t accumbytes) { @@ -1365,15 +1327,10 @@ arena_prof_accum(arena_t *arena, uint64_t accumbytes) } } } -#endif -#ifdef JEMALLOC_TCACHE void -arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind -# ifdef JEMALLOC_PROF - , uint64_t prof_accumbytes -# endif - ) +arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind, + uint64_t prof_accumbytes) { unsigned i, nfill; arena_bin_t *bin; @@ -1382,11 +1339,11 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind assert(tbin->ncached == 0); -#ifdef JEMALLOC_PROF - malloc_mutex_lock(&arena->lock); - arena_prof_accum(arena, prof_accumbytes); - malloc_mutex_unlock(&arena->lock); -#endif + if (config_prof) { + malloc_mutex_lock(&arena->lock); + arena_prof_accum(arena, prof_accumbytes); + malloc_mutex_unlock(&arena->lock); + } bin = &arena->bins[binind]; malloc_mutex_lock(&bin->lock); for (i = 0, nfill = (tcache_bin_info[binind].ncached_max >> @@ -1400,17 +1357,16 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind /* Insert such that low regions get used first. */ tbin->avail[nfill - 1 - i] = ptr; } -#ifdef JEMALLOC_STATS - bin->stats.allocated += i * arena_bin_info[binind].reg_size; - bin->stats.nmalloc += i; - bin->stats.nrequests += tbin->tstats.nrequests; - bin->stats.nfills++; - tbin->tstats.nrequests = 0; -#endif + if (config_stats) { + bin->stats.allocated += i * arena_bin_info[binind].reg_size; + bin->stats.nmalloc += i; + bin->stats.nrequests += tbin->tstats.nrequests; + bin->stats.nfills++; + tbin->tstats.nrequests = 0; + } malloc_mutex_unlock(&bin->lock); tbin->ncached = i; } -#endif void * arena_malloc_small(arena_t *arena, size_t size, bool zero) @@ -1436,27 +1392,25 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero) return (NULL); } -#ifdef JEMALLOC_STATS - bin->stats.allocated += size; - bin->stats.nmalloc++; - bin->stats.nrequests++; -#endif + if (config_stats) { + bin->stats.allocated += size; + bin->stats.nmalloc++; + bin->stats.nrequests++; + } malloc_mutex_unlock(&bin->lock); -#ifdef JEMALLOC_PROF - if (isthreaded == false) { + if (config_prof && isthreaded == false) { malloc_mutex_lock(&arena->lock); arena_prof_accum(arena, size); malloc_mutex_unlock(&arena->lock); } -#endif if (zero == false) { -#ifdef JEMALLOC_FILL - if (opt_junk) - memset(ret, 0xa5, size); - else if (opt_zero) - memset(ret, 0, size); -#endif + if (config_fill) { + if (opt_junk) + memset(ret, 0xa5, size); + else if (opt_zero) + memset(ret, 0, size); + } } else memset(ret, 0, size); @@ -1476,31 +1430,31 @@ arena_malloc_large(arena_t *arena, size_t size, bool zero) malloc_mutex_unlock(&arena->lock); return (NULL); } -#ifdef JEMALLOC_STATS - arena->stats.nmalloc_large++; - arena->stats.nrequests_large++; - arena->stats.allocated_large += size; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nmalloc++; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++; - if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns > - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) { - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns = - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns; + if (config_stats) { + arena->stats.nmalloc_large++; + arena->stats.nrequests_large++; + arena->stats.allocated_large += size; + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nmalloc++; + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++; + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++; + if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns > + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) { + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns = + arena->stats.lstats[(size >> PAGE_SHIFT) + - 1].curruns; + } } -#endif -#ifdef JEMALLOC_PROF - arena_prof_accum(arena, size); -#endif + if (config_prof) + arena_prof_accum(arena, size); malloc_mutex_unlock(&arena->lock); if (zero == false) { -#ifdef JEMALLOC_FILL - if (opt_junk) - memset(ret, 0xa5, size); - else if (opt_zero) - memset(ret, 0, size); -#endif + if (config_fill) { + if (opt_junk) + memset(ret, 0xa5, size); + else if (opt_zero) + memset(ret, 0, size); + } } return (ret); @@ -1514,18 +1468,14 @@ arena_malloc(size_t size, bool zero) assert(QUANTUM_CEILING(size) <= arena_maxclass); if (size <= small_maxclass) { -#ifdef JEMALLOC_TCACHE tcache_t *tcache; - if ((tcache = tcache_get()) != NULL) + if (config_tcache && (tcache = tcache_get()) != NULL) return (tcache_alloc_small(tcache, size, zero)); else - -#endif return (arena_malloc_small(choose_arena(), size, zero)); } else { -#ifdef JEMALLOC_TCACHE - if (size <= tcache_maxclass) { + if (config_tcache && size <= tcache_maxclass) { tcache_t *tcache; if ((tcache = tcache_get()) != NULL) @@ -1535,7 +1485,6 @@ arena_malloc(size_t size, bool zero) size, zero)); } } else -#endif return (arena_malloc_large(choose_arena(), size, zero)); } } @@ -1586,29 +1535,28 @@ arena_palloc(arena_t *arena, size_t size, size_t alloc_size, size_t alignment, } } -#ifdef JEMALLOC_STATS - arena->stats.nmalloc_large++; - arena->stats.nrequests_large++; - arena->stats.allocated_large += size; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nmalloc++; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++; - if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns > - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) { - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns = - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns; + if (config_stats) { + arena->stats.nmalloc_large++; + arena->stats.nrequests_large++; + arena->stats.allocated_large += size; + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nmalloc++; + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++; + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++; + if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns > + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) { + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns = + arena->stats.lstats[(size >> PAGE_SHIFT) + - 1].curruns; + } } -#endif malloc_mutex_unlock(&arena->lock); -#ifdef JEMALLOC_FILL - if (zero == false) { + if (config_fill && zero == false) { if (opt_junk) memset(ret, 0xa5, size); else if (opt_zero) memset(ret, 0, size); } -#endif return (ret); } @@ -1631,7 +1579,7 @@ arena_salloc(const void *ptr) arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) << PAGE_SHIFT)); - dassert(run->magic == ARENA_RUN_MAGIC); + assert(run->magic == ARENA_RUN_MAGIC); size_t binind = arena_bin_index(chunk->arena, run->bin); arena_bin_info_t *bin_info = &arena_bin_info[binind]; assert(((uintptr_t)ptr - ((uintptr_t)run + @@ -1647,7 +1595,6 @@ arena_salloc(const void *ptr) return (ret); } -#ifdef JEMALLOC_PROF void arena_prof_promoted(const void *ptr, size_t size) { @@ -1685,7 +1632,7 @@ arena_salloc_demote(const void *ptr) arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) << PAGE_SHIFT)); - dassert(run->magic == ARENA_RUN_MAGIC); + assert(run->magic == ARENA_RUN_MAGIC); size_t binind = arena_bin_index(chunk->arena, run->bin); arena_bin_info_t *bin_info = &arena_bin_info[binind]; assert(((uintptr_t)ptr - ((uintptr_t)run + @@ -1707,7 +1654,6 @@ arena_salloc_demote(const void *ptr) return (ret); } -#endif static void arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run, @@ -1781,16 +1727,14 @@ arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, ((past - run_ind) << PAGE_SHIFT), false); /* npages = past - run_ind; */ } -#ifdef JEMALLOC_DEBUG - run->magic = 0; -#endif + if (config_debug) + run->magic = 0; arena_run_dalloc(arena, run, true); malloc_mutex_unlock(&arena->lock); /****************************/ malloc_mutex_lock(&bin->lock); -#ifdef JEMALLOC_STATS - bin->stats.curruns--; -#endif + if (config_stats) + bin->stats.curruns--; } static void @@ -1836,25 +1780,20 @@ arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, size_t pageind; arena_run_t *run; arena_bin_t *bin; -#if (defined(JEMALLOC_FILL) || defined(JEMALLOC_STATS)) size_t size; -#endif pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT)); - dassert(run->magic == ARENA_RUN_MAGIC); + assert(run->magic == ARENA_RUN_MAGIC); bin = run->bin; size_t binind = arena_bin_index(arena, bin); arena_bin_info_t *bin_info = &arena_bin_info[binind]; -#if (defined(JEMALLOC_FILL) || defined(JEMALLOC_STATS)) - size = bin_info->reg_size; -#endif + if (config_fill || config_stats) + size = bin_info->reg_size; -#ifdef JEMALLOC_FILL - if (opt_junk) + if (config_fill && opt_junk) memset(ptr, 0x5a, size); -#endif arena_run_reg_dalloc(run, ptr); if (run->nfree == bin_info->nregs) { @@ -1863,13 +1802,12 @@ arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, } else if (run->nfree == 1 && run != bin->runcur) arena_bin_lower_run(arena, chunk, run, bin); -#ifdef JEMALLOC_STATS - bin->stats.allocated -= size; - bin->stats.ndalloc++; -#endif + if (config_stats) { + bin->stats.allocated -= size; + bin->stats.ndalloc++; + } } -#ifdef JEMALLOC_STATS void arena_stats_merge(arena_t *arena, size_t *nactive, size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats, @@ -1907,10 +1845,10 @@ arena_stats_merge(arena_t *arena, size_t *nactive, size_t *ndirty, bstats[i].nmalloc += bin->stats.nmalloc; bstats[i].ndalloc += bin->stats.ndalloc; bstats[i].nrequests += bin->stats.nrequests; -#ifdef JEMALLOC_TCACHE - bstats[i].nfills += bin->stats.nfills; - bstats[i].nflushes += bin->stats.nflushes; -#endif + if (config_tcache) { + bstats[i].nfills += bin->stats.nfills; + bstats[i].nflushes += bin->stats.nflushes; + } bstats[i].nruns += bin->stats.nruns; bstats[i].reruns += bin->stats.reruns; bstats[i].highruns += bin->stats.highruns; @@ -1918,37 +1856,24 @@ arena_stats_merge(arena_t *arena, size_t *nactive, size_t *ndirty, malloc_mutex_unlock(&bin->lock); } } -#endif void arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr) { - /* Large allocation. */ -#ifdef JEMALLOC_FILL -# ifndef JEMALLOC_STATS - if (opt_junk) -# endif -#endif - { -#if (defined(JEMALLOC_FILL) || defined(JEMALLOC_STATS)) + if (config_fill || config_stats) { size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; size_t size = chunk->map[pageind-map_bias].bits & ~PAGE_MASK; -#endif -#ifdef JEMALLOC_FILL -# ifdef JEMALLOC_STATS - if (opt_junk) -# endif + if (config_fill && config_stats && opt_junk) memset(ptr, 0x5a, size); -#endif -#ifdef JEMALLOC_STATS - arena->stats.ndalloc_large++; - arena->stats.allocated_large -= size; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].ndalloc++; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns--; -#endif + if (config_stats) { + arena->stats.ndalloc_large++; + arena->stats.allocated_large -= size; + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].ndalloc++; + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns--; + } } arena_run_dalloc(arena, (arena_run_t *)ptr, true); @@ -1968,24 +1893,25 @@ arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr, malloc_mutex_lock(&arena->lock); arena_run_trim_tail(arena, chunk, (arena_run_t *)ptr, oldsize, size, true); -#ifdef JEMALLOC_STATS - arena->stats.ndalloc_large++; - arena->stats.allocated_large -= oldsize; - arena->stats.lstats[(oldsize >> PAGE_SHIFT) - 1].ndalloc++; - arena->stats.lstats[(oldsize >> PAGE_SHIFT) - 1].curruns--; + if (config_stats) { + arena->stats.ndalloc_large++; + arena->stats.allocated_large -= oldsize; + arena->stats.lstats[(oldsize >> PAGE_SHIFT) - 1].ndalloc++; + arena->stats.lstats[(oldsize >> PAGE_SHIFT) - 1].curruns--; - arena->stats.nmalloc_large++; - arena->stats.nrequests_large++; - arena->stats.allocated_large += size; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nmalloc++; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++; - if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns > - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) { - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns = - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns; + arena->stats.nmalloc_large++; + arena->stats.nrequests_large++; + arena->stats.allocated_large += size; + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nmalloc++; + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++; + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++; + if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns > + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) { + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns = + arena->stats.lstats[(size >> PAGE_SHIFT) + - 1].curruns; + } } -#endif malloc_mutex_unlock(&arena->lock); } @@ -2038,25 +1964,29 @@ arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr, chunk->map[pageind+npages-1-map_bias].bits = flag_dirty | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; -#ifdef JEMALLOC_STATS - arena->stats.ndalloc_large++; - arena->stats.allocated_large -= oldsize; - arena->stats.lstats[(oldsize >> PAGE_SHIFT) - 1].ndalloc++; - arena->stats.lstats[(oldsize >> PAGE_SHIFT) - 1].curruns--; + if (config_stats) { + arena->stats.ndalloc_large++; + arena->stats.allocated_large -= oldsize; + arena->stats.lstats[(oldsize >> PAGE_SHIFT) + - 1].ndalloc++; + arena->stats.lstats[(oldsize >> PAGE_SHIFT) + - 1].curruns--; - arena->stats.nmalloc_large++; - arena->stats.nrequests_large++; - arena->stats.allocated_large += size; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nmalloc++; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++; - if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns > - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) { - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns = - arena->stats.lstats[(size >> PAGE_SHIFT) - - 1].curruns; + arena->stats.nmalloc_large++; + arena->stats.nrequests_large++; + arena->stats.allocated_large += size; + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nmalloc++; + arena->stats.lstats[(size >> PAGE_SHIFT) + - 1].nrequests++; + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++; + if (arena->stats.lstats[(size >> PAGE_SHIFT) + - 1].curruns > arena->stats.lstats[(size >> + PAGE_SHIFT) - 1].highruns) { + arena->stats.lstats[(size >> PAGE_SHIFT) + - 1].highruns = arena->stats.lstats[(size >> + PAGE_SHIFT) - 1].curruns; + } } -#endif malloc_mutex_unlock(&arena->lock); return (false); } @@ -2078,12 +2008,10 @@ arena_ralloc_large(void *ptr, size_t oldsize, size_t size, size_t extra, psize = PAGE_CEILING(size + extra); if (psize == oldsize) { /* Same size class. */ -#ifdef JEMALLOC_FILL - if (opt_junk && size < oldsize) { + if (config_fill && opt_junk && size < oldsize) { memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize - size); } -#endif return (false); } else { arena_chunk_t *chunk; @@ -2091,16 +2019,14 @@ arena_ralloc_large(void *ptr, size_t oldsize, size_t size, size_t extra, chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); arena = chunk->arena; - dassert(arena->magic == ARENA_MAGIC); + assert(arena->magic == ARENA_MAGIC); if (psize < oldsize) { -#ifdef JEMALLOC_FILL /* Fill before shrinking in order avoid a race. */ - if (opt_junk) { + if (config_fill && opt_junk) { memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize - size); } -#endif arena_ralloc_large_shrink(arena, chunk, ptr, oldsize, psize); return (false); @@ -2108,12 +2034,11 @@ arena_ralloc_large(void *ptr, size_t oldsize, size_t size, size_t extra, bool ret = arena_ralloc_large_grow(arena, chunk, ptr, oldsize, PAGE_CEILING(size), psize - PAGE_CEILING(size), zero); -#ifdef JEMALLOC_FILL - if (ret == false && zero == false && opt_zero) { + if (config_fill && ret == false && zero == false && + opt_zero) { memset((void *)((uintptr_t)ptr + oldsize), 0, size - oldsize); } -#endif return (ret); } } @@ -2135,12 +2060,10 @@ arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, SMALL_SIZE2BIN(size + extra) == SMALL_SIZE2BIN(oldsize)) || (size <= oldsize && size + extra >= oldsize)) { -#ifdef JEMALLOC_FILL - if (opt_junk && size < oldsize) { + if (config_fill && opt_junk && size < oldsize) { memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize - size); } -#endif return (ptr); } } else { @@ -2222,22 +2145,21 @@ arena_new(arena_t *arena, unsigned ind) if (malloc_mutex_init(&arena->lock)) return (true); -#ifdef JEMALLOC_STATS - memset(&arena->stats, 0, sizeof(arena_stats_t)); - arena->stats.lstats = (malloc_large_stats_t *)base_alloc(nlclasses * - sizeof(malloc_large_stats_t)); - if (arena->stats.lstats == NULL) - return (true); - memset(arena->stats.lstats, 0, nlclasses * - sizeof(malloc_large_stats_t)); -# ifdef JEMALLOC_TCACHE - ql_new(&arena->tcache_ql); -# endif -#endif + if (config_stats) { + memset(&arena->stats, 0, sizeof(arena_stats_t)); + arena->stats.lstats = + (malloc_large_stats_t *)base_alloc(nlclasses * + sizeof(malloc_large_stats_t)); + if (arena->stats.lstats == NULL) + return (true); + memset(arena->stats.lstats, 0, nlclasses * + sizeof(malloc_large_stats_t)); + if (config_tcache) + ql_new(&arena->tcache_ql); + } -#ifdef JEMALLOC_PROF - arena->prof_accumbytes = 0; -#endif + if (config_prof) + arena->prof_accumbytes = 0; /* Initialize chunks. */ ql_new(&arena->chunks_dirty); @@ -2251,84 +2173,41 @@ arena_new(arena_t *arena, unsigned ind) arena_avail_tree_new(&arena->runs_avail_dirty); /* Initialize bins. */ - i = 0; -#ifdef JEMALLOC_TINY - /* (2^n)-spaced tiny bins. */ - for (; i < ntbins; i++) { + for (i = 0; i < nbins; i++) { bin = &arena->bins[i]; if (malloc_mutex_init(&bin->lock)) return (true); bin->runcur = NULL; arena_run_tree_new(&bin->runs); -#ifdef JEMALLOC_STATS - memset(&bin->stats, 0, sizeof(malloc_bin_stats_t)); -#endif - } -#endif - - /* Quantum-spaced bins. */ - for (; i < ntbins + nqbins; i++) { - bin = &arena->bins[i]; - if (malloc_mutex_init(&bin->lock)) - return (true); - bin->runcur = NULL; - arena_run_tree_new(&bin->runs); -#ifdef JEMALLOC_STATS - memset(&bin->stats, 0, sizeof(malloc_bin_stats_t)); -#endif + if (config_stats) + memset(&bin->stats, 0, sizeof(malloc_bin_stats_t)); } - /* Cacheline-spaced bins. */ - for (; i < ntbins + nqbins + ncbins; i++) { - bin = &arena->bins[i]; - if (malloc_mutex_init(&bin->lock)) - return (true); - bin->runcur = NULL; - arena_run_tree_new(&bin->runs); -#ifdef JEMALLOC_STATS - memset(&bin->stats, 0, sizeof(malloc_bin_stats_t)); -#endif - } - - /* Subpage-spaced bins. */ - for (; i < nbins; i++) { - bin = &arena->bins[i]; - if (malloc_mutex_init(&bin->lock)) - return (true); - bin->runcur = NULL; - arena_run_tree_new(&bin->runs); -#ifdef JEMALLOC_STATS - memset(&bin->stats, 0, sizeof(malloc_bin_stats_t)); -#endif - } - -#ifdef JEMALLOC_DEBUG - arena->magic = ARENA_MAGIC; -#endif + if (config_debug) + arena->magic = ARENA_MAGIC; return (false); } -#ifdef JEMALLOC_DEBUG static void small_size2bin_validate(void) { size_t i, size, binind; i = 1; -# ifdef JEMALLOC_TINY /* Tiny. */ - for (; i < (1U << LG_TINY_MIN); i++) { - size = pow2_ceil(1U << LG_TINY_MIN); - binind = ffs((int)(size >> (LG_TINY_MIN + 1))); - assert(SMALL_SIZE2BIN(i) == binind); + if (config_tiny) { + for (; i < (1U << LG_TINY_MIN); i++) { + size = pow2_ceil(1U << LG_TINY_MIN); + binind = ffs((int)(size >> (LG_TINY_MIN + 1))); + assert(SMALL_SIZE2BIN(i) == binind); + } + for (; i < qspace_min; i++) { + size = pow2_ceil(i); + binind = ffs((int)(size >> (LG_TINY_MIN + 1))); + assert(SMALL_SIZE2BIN(i) == binind); + } } - for (; i < qspace_min; i++) { - size = pow2_ceil(i); - binind = ffs((int)(size >> (LG_TINY_MIN + 1))); - assert(SMALL_SIZE2BIN(i) == binind); - } -# endif /* Quantum-spaced. */ for (; i <= qspace_max; i++) { size = QUANTUM_CEILING(i); @@ -2350,7 +2229,6 @@ small_size2bin_validate(void) assert(SMALL_SIZE2BIN(i) == binind); } } -#endif static bool small_size2bin_init(void) @@ -2363,9 +2241,8 @@ small_size2bin_init(void) return (small_size2bin_init_hard()); small_size2bin = const_small_size2bin; -#ifdef JEMALLOC_DEBUG - small_size2bin_validate(); -#endif + if (config_debug) + small_size2bin_validate(); return (false); } @@ -2388,19 +2265,19 @@ small_size2bin_init_hard(void) return (true); i = 1; -#ifdef JEMALLOC_TINY /* Tiny. */ - for (; i < (1U << LG_TINY_MIN); i += TINY_MIN) { - size = pow2_ceil(1U << LG_TINY_MIN); - binind = ffs((int)(size >> (LG_TINY_MIN + 1))); - CUSTOM_SMALL_SIZE2BIN(i) = binind; + if (config_tiny) { + for (; i < (1U << LG_TINY_MIN); i += TINY_MIN) { + size = pow2_ceil(1U << LG_TINY_MIN); + binind = ffs((int)(size >> (LG_TINY_MIN + 1))); + CUSTOM_SMALL_SIZE2BIN(i) = binind; + } + for (; i < qspace_min; i += TINY_MIN) { + size = pow2_ceil(i); + binind = ffs((int)(size >> (LG_TINY_MIN + 1))); + CUSTOM_SMALL_SIZE2BIN(i) = binind; + } } - for (; i < qspace_min; i += TINY_MIN) { - size = pow2_ceil(i); - binind = ffs((int)(size >> (LG_TINY_MIN + 1))); - CUSTOM_SMALL_SIZE2BIN(i) = binind; - } -#endif /* Quantum-spaced. */ for (; i <= qspace_max; i += TINY_MIN) { size = QUANTUM_CEILING(i); @@ -2423,9 +2300,8 @@ small_size2bin_init_hard(void) } small_size2bin = custom_small_size2bin; -#ifdef JEMALLOC_DEBUG - small_size2bin_validate(); -#endif + if (config_debug) + small_size2bin_validate(); return (false); #undef CUSTOM_SMALL_SIZE2BIN } @@ -2448,9 +2324,7 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) uint32_t try_nregs, good_nregs; uint32_t try_hdr_size, good_hdr_size; uint32_t try_bitmap_offset, good_bitmap_offset; -#ifdef JEMALLOC_PROF uint32_t try_ctx0_offset, good_ctx0_offset; -#endif uint32_t try_reg0_offset, good_reg0_offset; assert(min_run_size >= PAGE_SIZE); @@ -2481,8 +2355,7 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) try_bitmap_offset = try_hdr_size; /* Add space for bitmap. */ try_hdr_size += bitmap_size(try_nregs); -#ifdef JEMALLOC_PROF - if (opt_prof && prof_promote == false) { + if (config_prof && opt_prof && prof_promote == false) { /* Pad to a quantum boundary. */ try_hdr_size = QUANTUM_CEILING(try_hdr_size); try_ctx0_offset = try_hdr_size; @@ -2490,7 +2363,6 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) try_hdr_size += try_nregs * sizeof(prof_ctx_t *); } else try_ctx0_offset = 0; -#endif try_reg0_offset = try_run_size - (try_nregs * bin_info->reg_size); } while (try_hdr_size > try_reg0_offset); @@ -2504,9 +2376,7 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) good_nregs = try_nregs; good_hdr_size = try_hdr_size; good_bitmap_offset = try_bitmap_offset; -#ifdef JEMALLOC_PROF good_ctx0_offset = try_ctx0_offset; -#endif good_reg0_offset = try_reg0_offset; /* Try more aggressive settings. */ @@ -2526,8 +2396,7 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) try_bitmap_offset = try_hdr_size; /* Add space for bitmap. */ try_hdr_size += bitmap_size(try_nregs); -#ifdef JEMALLOC_PROF - if (opt_prof && prof_promote == false) { + if (config_prof && opt_prof && prof_promote == false) { /* Pad to a quantum boundary. */ try_hdr_size = QUANTUM_CEILING(try_hdr_size); try_ctx0_offset = try_hdr_size; @@ -2537,7 +2406,6 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) try_hdr_size += try_nregs * sizeof(prof_ctx_t *); } -#endif try_reg0_offset = try_run_size - (try_nregs * bin_info->reg_size); } while (try_hdr_size > try_reg0_offset); @@ -2553,9 +2421,7 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) bin_info->run_size = good_run_size; bin_info->nregs = good_nregs; bin_info->bitmap_offset = good_bitmap_offset; -#ifdef JEMALLOC_PROF bin_info->ctx0_offset = good_ctx0_offset; -#endif bin_info->reg0_offset = good_reg0_offset; return (good_run_size); @@ -2574,15 +2440,17 @@ bin_info_init(void) prev_run_size = PAGE_SIZE; i = 0; -#ifdef JEMALLOC_TINY /* (2^n)-spaced tiny bins. */ - for (; i < ntbins; i++) { - bin_info = &arena_bin_info[i]; - bin_info->reg_size = (1U << (LG_TINY_MIN + i)); - prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size); - bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs); + if (config_tiny) { + for (; i < ntbins; i++) { + bin_info = &arena_bin_info[i]; + bin_info->reg_size = (1U << (LG_TINY_MIN + i)); + prev_run_size = bin_info_run_size_calc(bin_info, + prev_run_size); + bitmap_info_init(&bin_info->bitmap_info, + bin_info->nregs); + } } -#endif /* Quantum-spaced bins. */ for (; i < ntbins + nqbins; i++) { @@ -2631,9 +2499,8 @@ arena_boot(void) assert(sspace_min < PAGE_SIZE); sspace_max = PAGE_SIZE - SUBPAGE; -#ifdef JEMALLOC_TINY - assert(LG_QUANTUM >= LG_TINY_MIN); -#endif + if (config_tiny) + assert(LG_QUANTUM >= LG_TINY_MIN); assert(ntbins <= LG_QUANTUM); nqbins = qspace_max >> LG_QUANTUM; ncbins = ((cspace_max - cspace_min) >> LG_CACHELINE) + 1; @@ -2652,23 +2519,18 @@ arena_boot(void) * small size classes, plus a "not small" size class must be stored in * 8 bits of arena_chunk_map_t's bits field. */ -#ifdef JEMALLOC_PROF - if (opt_prof && prof_promote) { - if (nbins > 255) { - char line_buf[UMAX2S_BUFSIZE]; - malloc_write(": Too many small size classes ("); - malloc_write(u2s(nbins, 10, line_buf)); - malloc_write(" > max 255)\n"); - abort(); - } - } else -#endif - if (nbins > 256) { - char line_buf[UMAX2S_BUFSIZE]; - malloc_write(": Too many small size classes ("); - malloc_write(u2s(nbins, 10, line_buf)); - malloc_write(" > max 256)\n"); - abort(); + if (config_prof && opt_prof && prof_promote && nbins > 255) { + char line_buf[UMAX2S_BUFSIZE]; + malloc_write(": Too many small size classes ("); + malloc_write(u2s(nbins, 10, line_buf)); + malloc_write(" > max 255)\n"); + abort(); + } else if (nbins > 256) { + char line_buf[UMAX2S_BUFSIZE]; + malloc_write(": Too many small size classes ("); + malloc_write(u2s(nbins, 10, line_buf)); + malloc_write(" > max 256)\n"); + abort(); } /* diff --git a/src/chunk.c b/src/chunk.c index d190c6f4..57ab20d8 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -5,18 +5,12 @@ /* Data. */ size_t opt_lg_chunk = LG_CHUNK_DEFAULT; -#ifdef JEMALLOC_SWAP bool opt_overcommit = true; -#endif -#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) malloc_mutex_t chunks_mtx; chunk_stats_t stats_chunks; -#endif -#ifdef JEMALLOC_IVSALLOC rtree_t *chunks_rtree; -#endif /* Various chunk-related settings. */ size_t chunksize; @@ -41,67 +35,50 @@ chunk_alloc(size_t size, bool base, bool *zero) assert(size != 0); assert((size & chunksize_mask) == 0); -#ifdef JEMALLOC_SWAP - if (swap_enabled) { + if (config_swap && swap_enabled) { ret = chunk_alloc_swap(size, zero); if (ret != NULL) goto RETURN; } if (swap_enabled == false || opt_overcommit) { -#endif -#ifdef JEMALLOC_DSS - ret = chunk_alloc_dss(size, zero); - if (ret != NULL) - goto RETURN; -#endif + if (config_dss) { + ret = chunk_alloc_dss(size, zero); + if (ret != NULL) + goto RETURN; + } ret = chunk_alloc_mmap(size); if (ret != NULL) { *zero = true; goto RETURN; } -#ifdef JEMALLOC_SWAP } -#endif /* All strategies for allocation failed. */ ret = NULL; RETURN: -#ifdef JEMALLOC_IVSALLOC - if (base == false && ret != NULL) { + if (config_ivsalloc && base == false && ret != NULL) { if (rtree_set(chunks_rtree, (uintptr_t)ret, ret)) { chunk_dealloc(ret, size, true); return (NULL); } } -#endif -#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) - if (ret != NULL) { -# ifdef JEMALLOC_PROF + if ((config_stats || config_prof) && ret != NULL) { bool gdump; -# endif malloc_mutex_lock(&chunks_mtx); -# ifdef JEMALLOC_STATS - stats_chunks.nchunks += (size / chunksize); -# endif + if (config_stats) + stats_chunks.nchunks += (size / chunksize); stats_chunks.curchunks += (size / chunksize); if (stats_chunks.curchunks > stats_chunks.highchunks) { stats_chunks.highchunks = stats_chunks.curchunks; -# ifdef JEMALLOC_PROF - gdump = true; -# endif - } -# ifdef JEMALLOC_PROF - else + if (config_prof) + gdump = true; + } else if (config_prof) gdump = false; -# endif malloc_mutex_unlock(&chunks_mtx); -# ifdef JEMALLOC_PROF - if (opt_prof && opt_prof_gdump && gdump) + if (config_prof && opt_prof && opt_prof_gdump && gdump) prof_gdump(); -# endif } -#endif assert(CHUNK_ADDR2BASE(ret) == ret); return (ret); @@ -116,24 +93,20 @@ chunk_dealloc(void *chunk, size_t size, bool unmap) assert(size != 0); assert((size & chunksize_mask) == 0); -#ifdef JEMALLOC_IVSALLOC - rtree_set(chunks_rtree, (uintptr_t)chunk, NULL); -#endif -#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) - malloc_mutex_lock(&chunks_mtx); - stats_chunks.curchunks -= (size / chunksize); - malloc_mutex_unlock(&chunks_mtx); -#endif + if (config_ivsalloc) + rtree_set(chunks_rtree, (uintptr_t)chunk, NULL); + if (config_stats || config_prof) { + malloc_mutex_lock(&chunks_mtx); + stats_chunks.curchunks -= (size / chunksize); + malloc_mutex_unlock(&chunks_mtx); + } if (unmap) { -#ifdef JEMALLOC_SWAP - if (swap_enabled && chunk_dealloc_swap(chunk, size) == false) + if (config_swap && swap_enabled && chunk_dealloc_swap(chunk, + size) == false) return; -#endif -#ifdef JEMALLOC_DSS - if (chunk_dealloc_dss(chunk, size) == false) + if (config_dss && chunk_dealloc_dss(chunk, size) == false) return; -#endif chunk_dealloc_mmap(chunk, size); } } @@ -148,26 +121,23 @@ chunk_boot(void) chunksize_mask = chunksize - 1; chunk_npages = (chunksize >> PAGE_SHIFT); -#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) - if (malloc_mutex_init(&chunks_mtx)) + if (config_stats || config_prof) { + if (malloc_mutex_init(&chunks_mtx)) + return (true); + memset(&stats_chunks, 0, sizeof(chunk_stats_t)); + } + if (config_swap && chunk_swap_boot()) return (true); - memset(&stats_chunks, 0, sizeof(chunk_stats_t)); -#endif -#ifdef JEMALLOC_SWAP - if (chunk_swap_boot()) - return (true); -#endif if (chunk_mmap_boot()) return (true); -#ifdef JEMALLOC_DSS - if (chunk_dss_boot()) + if (config_dss && chunk_dss_boot()) return (true); -#endif -#ifdef JEMALLOC_IVSALLOC - chunks_rtree = rtree_new((ZU(1) << (LG_SIZEOF_PTR+3)) - opt_lg_chunk); - if (chunks_rtree == NULL) - return (true); -#endif + if (config_ivsalloc) { + chunks_rtree = rtree_new((ZU(1) << (LG_SIZEOF_PTR+3)) - + opt_lg_chunk); + if (chunks_rtree == NULL) + return (true); + } return (false); } diff --git a/src/chunk_dss.c b/src/chunk_dss.c index 5c0e290e..c25baea3 100644 --- a/src/chunk_dss.c +++ b/src/chunk_dss.c @@ -1,6 +1,5 @@ #define JEMALLOC_CHUNK_DSS_C_ #include "jemalloc/internal/jemalloc_internal.h" -#ifdef JEMALLOC_DSS /******************************************************************************/ /* Data. */ @@ -35,6 +34,8 @@ chunk_recycle_dss(size_t size, bool *zero) { extent_node_t *node, key; + cassert(config_dss); + key.addr = NULL; key.size = size; malloc_mutex_lock(&dss_mtx); @@ -74,6 +75,8 @@ chunk_alloc_dss(size_t size, bool *zero) { void *ret; + cassert(config_dss); + ret = chunk_recycle_dss(size, zero); if (ret != NULL) return (ret); @@ -131,6 +134,8 @@ chunk_dealloc_dss_record(void *chunk, size_t size) { extent_node_t *xnode, *node, *prev, key; + cassert(config_dss); + xnode = NULL; while (true) { key.addr = (void *)((uintptr_t)chunk + size); @@ -204,6 +209,8 @@ chunk_in_dss(void *chunk) { bool ret; + cassert(config_dss); + malloc_mutex_lock(&dss_mtx); if ((uintptr_t)chunk >= (uintptr_t)dss_base && (uintptr_t)chunk < (uintptr_t)dss_max) @@ -220,6 +227,8 @@ chunk_dealloc_dss(void *chunk, size_t size) { bool ret; + cassert(config_dss); + malloc_mutex_lock(&dss_mtx); if ((uintptr_t)chunk >= (uintptr_t)dss_base && (uintptr_t)chunk < (uintptr_t)dss_max) { @@ -269,6 +278,8 @@ bool chunk_dss_boot(void) { + cassert(config_dss); + if (malloc_mutex_init(&dss_mtx)) return (true); dss_base = sbrk(0); @@ -281,4 +292,3 @@ chunk_dss_boot(void) } /******************************************************************************/ -#endif /* JEMALLOC_DSS */ diff --git a/src/chunk_swap.c b/src/chunk_swap.c index cb25ae0d..fe9ca303 100644 --- a/src/chunk_swap.c +++ b/src/chunk_swap.c @@ -1,6 +1,6 @@ #define JEMALLOC_CHUNK_SWAP_C_ #include "jemalloc/internal/jemalloc_internal.h" -#ifdef JEMALLOC_SWAP + /******************************************************************************/ /* Data. */ @@ -9,9 +9,7 @@ bool swap_enabled; bool swap_prezeroed; size_t swap_nfds; int *swap_fds; -#ifdef JEMALLOC_STATS size_t swap_avail; -#endif /* Base address of the mmap()ed file(s). */ static void *swap_base; @@ -42,6 +40,8 @@ chunk_recycle_swap(size_t size, bool *zero) { extent_node_t *node, key; + cassert(config_swap); + key.addr = NULL; key.size = size; malloc_mutex_lock(&swap_mtx); @@ -65,9 +65,8 @@ chunk_recycle_swap(size_t size, bool *zero) node->size -= size; extent_tree_szad_insert(&swap_chunks_szad, node); } -#ifdef JEMALLOC_STATS - swap_avail -= size; -#endif + if (config_stats) + swap_avail -= size; malloc_mutex_unlock(&swap_mtx); if (*zero) @@ -84,6 +83,7 @@ chunk_alloc_swap(size_t size, bool *zero) { void *ret; + cassert(config_swap); assert(swap_enabled); ret = chunk_recycle_swap(size, zero); @@ -94,9 +94,8 @@ chunk_alloc_swap(size_t size, bool *zero) if ((uintptr_t)swap_end + size <= (uintptr_t)swap_max) { ret = swap_end; swap_end = (void *)((uintptr_t)swap_end + size); -#ifdef JEMALLOC_STATS - swap_avail -= size; -#endif + if (config_stats) + swap_avail -= size; malloc_mutex_unlock(&swap_mtx); if (swap_prezeroed) @@ -116,6 +115,8 @@ chunk_dealloc_swap_record(void *chunk, size_t size) { extent_node_t *xnode, *node, *prev, key; + cassert(config_swap); + xnode = NULL; while (true) { key.addr = (void *)((uintptr_t)chunk + size); @@ -189,6 +190,7 @@ chunk_in_swap(void *chunk) { bool ret; + cassert(config_swap); assert(swap_enabled); malloc_mutex_lock(&swap_mtx); @@ -207,6 +209,7 @@ chunk_dealloc_swap(void *chunk, size_t size) { bool ret; + cassert(config_swap); assert(swap_enabled); malloc_mutex_lock(&swap_mtx); @@ -237,9 +240,8 @@ chunk_dealloc_swap(void *chunk, size_t size) } else madvise(chunk, size, MADV_DONTNEED); -#ifdef JEMALLOC_STATS - swap_avail += size; -#endif + if (config_stats) + swap_avail += size; ret = false; goto RETURN; } @@ -260,6 +262,8 @@ chunk_swap_enable(const int *fds, unsigned nfds, bool prezeroed) size_t cumsize, voff; size_t sizes[nfds]; + cassert(config_swap); + malloc_mutex_lock(&swap_mtx); /* Get file sizes. */ @@ -362,9 +366,8 @@ chunk_swap_enable(const int *fds, unsigned nfds, bool prezeroed) memcpy(swap_fds, fds, nfds * sizeof(int)); swap_nfds = nfds; -#ifdef JEMALLOC_STATS - swap_avail = cumsize; -#endif + if (config_stats) + swap_avail = cumsize; swap_enabled = true; @@ -378,6 +381,8 @@ bool chunk_swap_boot(void) { + cassert(config_swap); + if (malloc_mutex_init(&swap_mtx)) return (true); @@ -385,9 +390,8 @@ chunk_swap_boot(void) swap_prezeroed = false; /* swap.* mallctl's depend on this. */ swap_nfds = 0; swap_fds = NULL; -#ifdef JEMALLOC_STATS - swap_avail = 0; -#endif + if (config_stats) + swap_avail = 0; swap_base = NULL; swap_end = NULL; swap_max = NULL; @@ -397,6 +401,3 @@ chunk_swap_boot(void) return (false); } - -/******************************************************************************/ -#endif /* JEMALLOC_SWAP */ diff --git a/src/ckh.c b/src/ckh.c index 43fcc252..f7eaa78b 100644 --- a/src/ckh.c +++ b/src/ckh.c @@ -73,7 +73,7 @@ ckh_isearch(ckh_t *ckh, const void *key) size_t hash1, hash2, bucket, cell; assert(ckh != NULL); - dassert(ckh->magic == CKH_MAGIC); + assert(ckh->magic == CKH_MAGIC); ckh->hash(key, ckh->lg_curbuckets, &hash1, &hash2); @@ -394,9 +394,8 @@ ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp) goto RETURN; } -#ifdef JEMALLOC_DEBUG - ckh->magic = CKH_MAGIC; -#endif + if (config_debug) + ckh->magic = CKH_MAGIC; ret = false; RETURN: @@ -408,7 +407,7 @@ ckh_delete(ckh_t *ckh) { assert(ckh != NULL); - dassert(ckh->magic == CKH_MAGIC); + assert(ckh->magic == CKH_MAGIC); #ifdef CKH_VERBOSE malloc_printf( @@ -433,7 +432,7 @@ ckh_count(ckh_t *ckh) { assert(ckh != NULL); - dassert(ckh->magic == CKH_MAGIC); + assert(ckh->magic == CKH_MAGIC); return (ckh->count); } @@ -464,7 +463,7 @@ ckh_insert(ckh_t *ckh, const void *key, const void *data) bool ret; assert(ckh != NULL); - dassert(ckh->magic == CKH_MAGIC); + assert(ckh->magic == CKH_MAGIC); assert(ckh_search(ckh, key, NULL, NULL)); #ifdef CKH_COUNT @@ -489,7 +488,7 @@ ckh_remove(ckh_t *ckh, const void *searchkey, void **key, void **data) size_t cell; assert(ckh != NULL); - dassert(ckh->magic == CKH_MAGIC); + assert(ckh->magic == CKH_MAGIC); cell = ckh_isearch(ckh, searchkey); if (cell != SIZE_T_MAX) { @@ -521,7 +520,7 @@ ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data) size_t cell; assert(ckh != NULL); - dassert(ckh->magic == CKH_MAGIC); + assert(ckh->magic == CKH_MAGIC); cell = ckh_isearch(ckh, searchkey); if (cell != SIZE_T_MAX) { diff --git a/src/ctl.c b/src/ctl.c index e5336d36..05be4317 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -27,16 +27,12 @@ static int n##_ctl(const size_t *mib, size_t miblen, void *oldp, \ const ctl_node_t *n##_index(const size_t *mib, size_t miblen, \ size_t i); -#ifdef JEMALLOC_STATS static bool ctl_arena_init(ctl_arena_stats_t *astats); -#endif static void ctl_arena_clear(ctl_arena_stats_t *astats); -#ifdef JEMALLOC_STATS static void ctl_arena_stats_amerge(ctl_arena_stats_t *cstats, arena_t *arena); static void ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, ctl_arena_stats_t *astats); -#endif static void ctl_arena_refresh(arena_t *arena, unsigned i); static void ctl_refresh(void); static bool ctl_init(void); @@ -45,16 +41,12 @@ static int ctl_lookup(const char *name, ctl_node_t const **nodesp, CTL_PROTO(version) CTL_PROTO(epoch) -#ifdef JEMALLOC_TCACHE CTL_PROTO(tcache_flush) -#endif CTL_PROTO(thread_arena) -#ifdef JEMALLOC_STATS CTL_PROTO(thread_allocated) CTL_PROTO(thread_allocatedp) CTL_PROTO(thread_deallocated) CTL_PROTO(thread_deallocatedp) -#endif CTL_PROTO(config_debug) CTL_PROTO(config_dss) CTL_PROTO(config_dynamic_page_shift) @@ -77,21 +69,12 @@ CTL_PROTO(opt_lg_chunk) CTL_PROTO(opt_narenas) CTL_PROTO(opt_lg_dirty_mult) CTL_PROTO(opt_stats_print) -#ifdef JEMALLOC_FILL CTL_PROTO(opt_junk) CTL_PROTO(opt_zero) -#endif -#ifdef JEMALLOC_SYSV CTL_PROTO(opt_sysv) -#endif -#ifdef JEMALLOC_XMALLOC CTL_PROTO(opt_xmalloc) -#endif -#ifdef JEMALLOC_TCACHE CTL_PROTO(opt_tcache) CTL_PROTO(opt_lg_tcache_gc_sweep) -#endif -#ifdef JEMALLOC_PROF CTL_PROTO(opt_prof) CTL_PROTO(opt_prof_prefix) CTL_PROTO(opt_prof_active) @@ -102,10 +85,7 @@ CTL_PROTO(opt_prof_gdump) CTL_PROTO(opt_prof_leak) CTL_PROTO(opt_prof_accum) CTL_PROTO(opt_lg_prof_tcmax) -#endif -#ifdef JEMALLOC_SWAP CTL_PROTO(opt_overcommit) -#endif CTL_PROTO(arenas_bin_i_size) CTL_PROTO(arenas_bin_i_nregs) CTL_PROTO(arenas_bin_i_run_size) @@ -119,35 +99,26 @@ CTL_PROTO(arenas_cacheline) CTL_PROTO(arenas_subpage) CTL_PROTO(arenas_pagesize) CTL_PROTO(arenas_chunksize) -#ifdef JEMALLOC_TINY CTL_PROTO(arenas_tspace_min) CTL_PROTO(arenas_tspace_max) -#endif CTL_PROTO(arenas_qspace_min) CTL_PROTO(arenas_qspace_max) CTL_PROTO(arenas_cspace_min) CTL_PROTO(arenas_cspace_max) CTL_PROTO(arenas_sspace_min) CTL_PROTO(arenas_sspace_max) -#ifdef JEMALLOC_TCACHE CTL_PROTO(arenas_tcache_max) -#endif CTL_PROTO(arenas_ntbins) CTL_PROTO(arenas_nqbins) CTL_PROTO(arenas_ncbins) CTL_PROTO(arenas_nsbins) CTL_PROTO(arenas_nbins) -#ifdef JEMALLOC_TCACHE CTL_PROTO(arenas_nhbins) -#endif CTL_PROTO(arenas_nlruns) CTL_PROTO(arenas_purge) -#ifdef JEMALLOC_PROF CTL_PROTO(prof_active) CTL_PROTO(prof_dump) CTL_PROTO(prof_interval) -#endif -#ifdef JEMALLOC_STATS CTL_PROTO(stats_chunks_current) CTL_PROTO(stats_chunks_total) CTL_PROTO(stats_chunks_high) @@ -166,10 +137,8 @@ CTL_PROTO(stats_arenas_i_bins_j_allocated) CTL_PROTO(stats_arenas_i_bins_j_nmalloc) CTL_PROTO(stats_arenas_i_bins_j_ndalloc) CTL_PROTO(stats_arenas_i_bins_j_nrequests) -#ifdef JEMALLOC_TCACHE CTL_PROTO(stats_arenas_i_bins_j_nfills) CTL_PROTO(stats_arenas_i_bins_j_nflushes) -#endif CTL_PROTO(stats_arenas_i_bins_j_nruns) CTL_PROTO(stats_arenas_i_bins_j_nreruns) CTL_PROTO(stats_arenas_i_bins_j_highruns) @@ -181,31 +150,22 @@ CTL_PROTO(stats_arenas_i_lruns_j_nrequests) CTL_PROTO(stats_arenas_i_lruns_j_highruns) CTL_PROTO(stats_arenas_i_lruns_j_curruns) INDEX_PROTO(stats_arenas_i_lruns_j) -#endif CTL_PROTO(stats_arenas_i_nthreads) CTL_PROTO(stats_arenas_i_pactive) CTL_PROTO(stats_arenas_i_pdirty) -#ifdef JEMALLOC_STATS CTL_PROTO(stats_arenas_i_mapped) CTL_PROTO(stats_arenas_i_npurge) CTL_PROTO(stats_arenas_i_nmadvise) CTL_PROTO(stats_arenas_i_purged) -#endif INDEX_PROTO(stats_arenas_i) -#ifdef JEMALLOC_STATS CTL_PROTO(stats_cactive) CTL_PROTO(stats_allocated) CTL_PROTO(stats_active) CTL_PROTO(stats_mapped) -#endif -#ifdef JEMALLOC_SWAP -# ifdef JEMALLOC_STATS CTL_PROTO(swap_avail) -# endif CTL_PROTO(swap_prezeroed) CTL_PROTO(swap_nfds) CTL_PROTO(swap_fds) -#endif /******************************************************************************/ /* mallctl tree. */ @@ -223,21 +183,16 @@ CTL_PROTO(swap_fds) */ #define INDEX(i) false, {.indexed = {i##_index}}, NULL -#ifdef JEMALLOC_TCACHE static const ctl_node_t tcache_node[] = { {NAME("flush"), CTL(tcache_flush)} }; -#endif static const ctl_node_t thread_node[] = { - {NAME("arena"), CTL(thread_arena)} -#ifdef JEMALLOC_STATS - , + {NAME("arena"), CTL(thread_arena)}, {NAME("allocated"), CTL(thread_allocated)}, {NAME("allocatedp"), CTL(thread_allocatedp)}, {NAME("deallocated"), CTL(thread_deallocated)}, {NAME("deallocatedp"), CTL(thread_deallocatedp)} -#endif }; static const ctl_node_t config_node[] = { @@ -265,27 +220,13 @@ static const ctl_node_t opt_node[] = { {NAME("lg_chunk"), CTL(opt_lg_chunk)}, {NAME("narenas"), CTL(opt_narenas)}, {NAME("lg_dirty_mult"), CTL(opt_lg_dirty_mult)}, - {NAME("stats_print"), CTL(opt_stats_print)} -#ifdef JEMALLOC_FILL - , + {NAME("stats_print"), CTL(opt_stats_print)}, {NAME("junk"), CTL(opt_junk)}, - {NAME("zero"), CTL(opt_zero)} -#endif -#ifdef JEMALLOC_SYSV - , - {NAME("sysv"), CTL(opt_sysv)} -#endif -#ifdef JEMALLOC_XMALLOC - , - {NAME("xmalloc"), CTL(opt_xmalloc)} -#endif -#ifdef JEMALLOC_TCACHE - , + {NAME("zero"), CTL(opt_zero)}, + {NAME("sysv"), CTL(opt_sysv)}, + {NAME("xmalloc"), CTL(opt_xmalloc)}, {NAME("tcache"), CTL(opt_tcache)}, - {NAME("lg_tcache_gc_sweep"), CTL(opt_lg_tcache_gc_sweep)} -#endif -#ifdef JEMALLOC_PROF - , + {NAME("lg_tcache_gc_sweep"), CTL(opt_lg_tcache_gc_sweep)}, {NAME("prof"), CTL(opt_prof)}, {NAME("prof_prefix"), CTL(opt_prof_prefix)}, {NAME("prof_active"), CTL(opt_prof_active)}, @@ -295,12 +236,8 @@ static const ctl_node_t opt_node[] = { {NAME("prof_gdump"), CTL(opt_prof_gdump)}, {NAME("prof_leak"), CTL(opt_prof_leak)}, {NAME("prof_accum"), CTL(opt_prof_accum)}, - {NAME("lg_prof_tcmax"), CTL(opt_lg_prof_tcmax)} -#endif -#ifdef JEMALLOC_SWAP - , + {NAME("lg_prof_tcmax"), CTL(opt_lg_prof_tcmax)}, {NAME("overcommit"), CTL(opt_overcommit)} -#endif }; static const ctl_node_t arenas_bin_i_node[] = { @@ -335,42 +272,33 @@ static const ctl_node_t arenas_node[] = { {NAME("subpage"), CTL(arenas_subpage)}, {NAME("pagesize"), CTL(arenas_pagesize)}, {NAME("chunksize"), CTL(arenas_chunksize)}, -#ifdef JEMALLOC_TINY {NAME("tspace_min"), CTL(arenas_tspace_min)}, {NAME("tspace_max"), CTL(arenas_tspace_max)}, -#endif {NAME("qspace_min"), CTL(arenas_qspace_min)}, {NAME("qspace_max"), CTL(arenas_qspace_max)}, {NAME("cspace_min"), CTL(arenas_cspace_min)}, {NAME("cspace_max"), CTL(arenas_cspace_max)}, {NAME("sspace_min"), CTL(arenas_sspace_min)}, {NAME("sspace_max"), CTL(arenas_sspace_max)}, -#ifdef JEMALLOC_TCACHE {NAME("tcache_max"), CTL(arenas_tcache_max)}, -#endif {NAME("ntbins"), CTL(arenas_ntbins)}, {NAME("nqbins"), CTL(arenas_nqbins)}, {NAME("ncbins"), CTL(arenas_ncbins)}, {NAME("nsbins"), CTL(arenas_nsbins)}, {NAME("nbins"), CTL(arenas_nbins)}, -#ifdef JEMALLOC_TCACHE {NAME("nhbins"), CTL(arenas_nhbins)}, -#endif {NAME("bin"), CHILD(arenas_bin)}, {NAME("nlruns"), CTL(arenas_nlruns)}, {NAME("lrun"), CHILD(arenas_lrun)}, {NAME("purge"), CTL(arenas_purge)} }; -#ifdef JEMALLOC_PROF static const ctl_node_t prof_node[] = { {NAME("active"), CTL(prof_active)}, {NAME("dump"), CTL(prof_dump)}, {NAME("interval"), CTL(prof_interval)} }; -#endif -#ifdef JEMALLOC_STATS static const ctl_node_t stats_chunks_node[] = { {NAME("current"), CTL(stats_chunks_current)}, {NAME("total"), CTL(stats_chunks_total)}, @@ -402,10 +330,8 @@ static const ctl_node_t stats_arenas_i_bins_j_node[] = { {NAME("nmalloc"), CTL(stats_arenas_i_bins_j_nmalloc)}, {NAME("ndalloc"), CTL(stats_arenas_i_bins_j_ndalloc)}, {NAME("nrequests"), CTL(stats_arenas_i_bins_j_nrequests)}, -#ifdef JEMALLOC_TCACHE {NAME("nfills"), CTL(stats_arenas_i_bins_j_nfills)}, {NAME("nflushes"), CTL(stats_arenas_i_bins_j_nflushes)}, -#endif {NAME("nruns"), CTL(stats_arenas_i_bins_j_nruns)}, {NAME("nreruns"), CTL(stats_arenas_i_bins_j_nreruns)}, {NAME("highruns"), CTL(stats_arenas_i_bins_j_highruns)}, @@ -433,14 +359,11 @@ static const ctl_node_t super_stats_arenas_i_lruns_j_node[] = { static const ctl_node_t stats_arenas_i_lruns_node[] = { {INDEX(stats_arenas_i_lruns_j)} }; -#endif static const ctl_node_t stats_arenas_i_node[] = { {NAME("nthreads"), CTL(stats_arenas_i_nthreads)}, {NAME("pactive"), CTL(stats_arenas_i_pactive)}, - {NAME("pdirty"), CTL(stats_arenas_i_pdirty)} -#ifdef JEMALLOC_STATS - , + {NAME("pdirty"), CTL(stats_arenas_i_pdirty)}, {NAME("mapped"), CTL(stats_arenas_i_mapped)}, {NAME("npurge"), CTL(stats_arenas_i_npurge)}, {NAME("nmadvise"), CTL(stats_arenas_i_nmadvise)}, @@ -449,7 +372,6 @@ static const ctl_node_t stats_arenas_i_node[] = { {NAME("large"), CHILD(stats_arenas_i_large)}, {NAME("bins"), CHILD(stats_arenas_i_bins)}, {NAME("lruns"), CHILD(stats_arenas_i_lruns)} -#endif }; static const ctl_node_t super_stats_arenas_i_node[] = { {NAME(""), CHILD(stats_arenas_i)} @@ -460,46 +382,34 @@ static const ctl_node_t stats_arenas_node[] = { }; static const ctl_node_t stats_node[] = { -#ifdef JEMALLOC_STATS {NAME("cactive"), CTL(stats_cactive)}, {NAME("allocated"), CTL(stats_allocated)}, {NAME("active"), CTL(stats_active)}, {NAME("mapped"), CTL(stats_mapped)}, {NAME("chunks"), CHILD(stats_chunks)}, {NAME("huge"), CHILD(stats_huge)}, -#endif {NAME("arenas"), CHILD(stats_arenas)} }; -#ifdef JEMALLOC_SWAP static const ctl_node_t swap_node[] = { -# ifdef JEMALLOC_STATS {NAME("avail"), CTL(swap_avail)}, -# endif {NAME("prezeroed"), CTL(swap_prezeroed)}, {NAME("nfds"), CTL(swap_nfds)}, {NAME("fds"), CTL(swap_fds)} }; -#endif static const ctl_node_t root_node[] = { {NAME("version"), CTL(version)}, {NAME("epoch"), CTL(epoch)}, -#ifdef JEMALLOC_TCACHE {NAME("tcache"), CHILD(tcache)}, -#endif {NAME("thread"), CHILD(thread)}, {NAME("config"), CHILD(config)}, {NAME("opt"), CHILD(opt)}, {NAME("arenas"), CHILD(arenas)}, -#ifdef JEMALLOC_PROF {NAME("prof"), CHILD(prof)}, -#endif {NAME("stats"), CHILD(stats)} -#ifdef JEMALLOC_SWAP , {NAME("swap"), CHILD(swap)} -#endif }; static const ctl_node_t super_root_node[] = { {NAME(""), CHILD(root)} @@ -512,7 +422,6 @@ static const ctl_node_t super_root_node[] = { /******************************************************************************/ -#ifdef JEMALLOC_STATS static bool ctl_arena_init(ctl_arena_stats_t *astats) { @@ -532,7 +441,6 @@ ctl_arena_init(ctl_arena_stats_t *astats) return (false); } -#endif static void ctl_arena_clear(ctl_arena_stats_t *astats) @@ -540,18 +448,18 @@ ctl_arena_clear(ctl_arena_stats_t *astats) astats->pactive = 0; astats->pdirty = 0; -#ifdef JEMALLOC_STATS - memset(&astats->astats, 0, sizeof(arena_stats_t)); - astats->allocated_small = 0; - astats->nmalloc_small = 0; - astats->ndalloc_small = 0; - astats->nrequests_small = 0; - memset(astats->bstats, 0, nbins * sizeof(malloc_bin_stats_t)); - memset(astats->lstats, 0, nlclasses * sizeof(malloc_large_stats_t)); -#endif + if (config_stats) { + memset(&astats->astats, 0, sizeof(arena_stats_t)); + astats->allocated_small = 0; + astats->nmalloc_small = 0; + astats->ndalloc_small = 0; + astats->nrequests_small = 0; + memset(astats->bstats, 0, nbins * sizeof(malloc_bin_stats_t)); + memset(astats->lstats, 0, nlclasses * + sizeof(malloc_large_stats_t)); + } } -#ifdef JEMALLOC_STATS static void ctl_arena_stats_amerge(ctl_arena_stats_t *cstats, arena_t *arena) { @@ -604,17 +512,17 @@ ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, ctl_arena_stats_t *astats) sstats->bstats[i].nmalloc += astats->bstats[i].nmalloc; sstats->bstats[i].ndalloc += astats->bstats[i].ndalloc; sstats->bstats[i].nrequests += astats->bstats[i].nrequests; -#ifdef JEMALLOC_TCACHE - sstats->bstats[i].nfills += astats->bstats[i].nfills; - sstats->bstats[i].nflushes += astats->bstats[i].nflushes; -#endif + if (config_tcache) { + sstats->bstats[i].nfills += astats->bstats[i].nfills; + sstats->bstats[i].nflushes += + astats->bstats[i].nflushes; + } sstats->bstats[i].nruns += astats->bstats[i].nruns; sstats->bstats[i].reruns += astats->bstats[i].reruns; sstats->bstats[i].highruns += astats->bstats[i].highruns; sstats->bstats[i].curruns += astats->bstats[i].curruns; } } -#endif static void ctl_arena_refresh(arena_t *arena, unsigned i) @@ -625,17 +533,17 @@ ctl_arena_refresh(arena_t *arena, unsigned i) ctl_arena_clear(astats); sstats->nthreads += astats->nthreads; -#ifdef JEMALLOC_STATS - ctl_arena_stats_amerge(astats, arena); - /* Merge into sum stats as well. */ - ctl_arena_stats_smerge(sstats, astats); -#else - astats->pactive += arena->nactive; - astats->pdirty += arena->ndirty; - /* Merge into sum stats as well. */ - sstats->pactive += arena->nactive; - sstats->pdirty += arena->ndirty; -#endif + if (config_stats) { + ctl_arena_stats_amerge(astats, arena); + /* Merge into sum stats as well. */ + ctl_arena_stats_smerge(sstats, astats); + } else { + astats->pactive += arena->nactive; + astats->pdirty += arena->ndirty; + /* Merge into sum stats as well. */ + sstats->pactive += arena->nactive; + sstats->pdirty += arena->ndirty; + } } static void @@ -644,19 +552,19 @@ ctl_refresh(void) unsigned i; arena_t *tarenas[narenas]; -#ifdef JEMALLOC_STATS - malloc_mutex_lock(&chunks_mtx); - ctl_stats.chunks.current = stats_chunks.curchunks; - ctl_stats.chunks.total = stats_chunks.nchunks; - ctl_stats.chunks.high = stats_chunks.highchunks; - malloc_mutex_unlock(&chunks_mtx); + if (config_stats) { + malloc_mutex_lock(&chunks_mtx); + ctl_stats.chunks.current = stats_chunks.curchunks; + ctl_stats.chunks.total = stats_chunks.nchunks; + ctl_stats.chunks.high = stats_chunks.highchunks; + malloc_mutex_unlock(&chunks_mtx); - malloc_mutex_lock(&huge_mtx); - ctl_stats.huge.allocated = huge_allocated; - ctl_stats.huge.nmalloc = huge_nmalloc; - ctl_stats.huge.ndalloc = huge_ndalloc; - malloc_mutex_unlock(&huge_mtx); -#endif + malloc_mutex_lock(&huge_mtx); + ctl_stats.huge.allocated = huge_allocated; + ctl_stats.huge.nmalloc = huge_nmalloc; + ctl_stats.huge.ndalloc = huge_ndalloc; + malloc_mutex_unlock(&huge_mtx); + } /* * Clear sum stats, since they will be merged into by @@ -682,20 +590,20 @@ ctl_refresh(void) ctl_arena_refresh(tarenas[i], i); } -#ifdef JEMALLOC_STATS - ctl_stats.allocated = ctl_stats.arenas[narenas].allocated_small - + ctl_stats.arenas[narenas].astats.allocated_large - + ctl_stats.huge.allocated; - ctl_stats.active = (ctl_stats.arenas[narenas].pactive << PAGE_SHIFT) - + ctl_stats.huge.allocated; - ctl_stats.mapped = (ctl_stats.chunks.current << opt_lg_chunk); + if (config_stats) { + ctl_stats.allocated = ctl_stats.arenas[narenas].allocated_small + + ctl_stats.arenas[narenas].astats.allocated_large + + ctl_stats.huge.allocated; + ctl_stats.active = (ctl_stats.arenas[narenas].pactive << + PAGE_SHIFT) + ctl_stats.huge.allocated; + ctl_stats.mapped = (ctl_stats.chunks.current << opt_lg_chunk); -# ifdef JEMALLOC_SWAP - malloc_mutex_lock(&swap_mtx); - ctl_stats.swap_avail = swap_avail; - malloc_mutex_unlock(&swap_mtx); -# endif -#endif + if (config_swap) { + malloc_mutex_lock(&swap_mtx); + ctl_stats.swap_avail = swap_avail; + malloc_mutex_unlock(&swap_mtx); + } + } ctl_epoch++; } @@ -707,10 +615,6 @@ ctl_init(void) malloc_mutex_lock(&ctl_mtx); if (ctl_initialized == false) { -#ifdef JEMALLOC_STATS - unsigned i; -#endif - /* * Allocate space for one extra arena stats element, which * contains summed stats across all arenas. @@ -729,14 +633,15 @@ ctl_init(void) * ever get used. Lazy initialization would allow errors to * cause inconsistent state to be viewable by the application. */ -#ifdef JEMALLOC_STATS - for (i = 0; i <= narenas; i++) { - if (ctl_arena_init(&ctl_stats.arenas[i])) { - ret = true; - goto RETURN; + if (config_stats) { + unsigned i; + for (i = 0; i <= narenas; i++) { + if (ctl_arena_init(&ctl_stats.arenas[i])) { + ret = true; + goto RETURN; + } } } -#endif ctl_stats.arenas[narenas].initialized = true; ctl_epoch = 0; @@ -998,6 +903,54 @@ ctl_boot(void) } \ } while (0) +/* + * There's a lot of code duplication in the following macros due to limitations + * in how nested cpp macros are expanded. + */ +#define CTL_RO_CLGEN(c, l, n, v, t) \ +static int \ +n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ + void *newp, size_t newlen) \ +{ \ + int ret; \ + t oldval; \ + \ + if ((c) == false) \ + return (ENOENT); \ + if (l) \ + malloc_mutex_lock(&ctl_mtx); \ + READONLY(); \ + oldval = v; \ + READ(oldval, t); \ + \ + ret = 0; \ +RETURN: \ + if (l) \ + malloc_mutex_unlock(&ctl_mtx); \ + return (ret); \ +} + +#define CTL_RO_CGEN(c, n, v, t) \ +static int \ +n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ + void *newp, size_t newlen) \ +{ \ + int ret; \ + t oldval; \ + \ + if ((c) == false) \ + return (ENOENT); \ + malloc_mutex_lock(&ctl_mtx); \ + READONLY(); \ + oldval = v; \ + READ(oldval, t); \ + \ + ret = 0; \ +RETURN: \ + malloc_mutex_unlock(&ctl_mtx); \ + return (ret); \ +} + #define CTL_RO_GEN(n, v, t) \ static int \ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ @@ -1021,7 +974,26 @@ RETURN: \ * ctl_mtx is not acquired, under the assumption that no pertinent data will * mutate during the call. */ -#define CTL_RO_NL_GEN(n, v, t) \ +#define CTL_RO_NL_CGEN(c, n, v, t) \ +static int \ +n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ + void *newp, size_t newlen) \ +{ \ + int ret; \ + t oldval; \ + \ + if ((c) == false) \ + return (ENOENT); \ + READONLY(); \ + oldval = v; \ + READ(oldval, t); \ + \ + ret = 0; \ +RETURN: \ + return (ret); \ +} + +#define CTL_RO_NL_GEN(n, v, t) \ static int \ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ void *newp, size_t newlen) \ @@ -1038,7 +1010,7 @@ RETURN: \ return (ret); \ } -#define CTL_RO_TRUE_GEN(n) \ +#define CTL_RO_BOOL_CONFIG_GEN(n) \ static int \ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ void *newp, size_t newlen) \ @@ -1046,25 +1018,10 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ int ret; \ bool oldval; \ \ + if (n == false) \ + return (ENOENT); \ READONLY(); \ - oldval = true; \ - READ(oldval, bool); \ - \ - ret = 0; \ -RETURN: \ - return (ret); \ -} - -#define CTL_RO_FALSE_GEN(n) \ -static int \ -n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ - void *newp, size_t newlen) \ -{ \ - int ret; \ - bool oldval; \ - \ - READONLY(); \ - oldval = false; \ + oldval = n; \ READ(oldval, bool); \ \ ret = 0; \ @@ -1094,7 +1051,6 @@ RETURN: return (ret); } -#ifdef JEMALLOC_TCACHE static int tcache_flush_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) @@ -1102,6 +1058,9 @@ tcache_flush_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, int ret; tcache_t *tcache; + if (config_tcache == false) + return (ENOENT); + VOID(); tcache = TCACHE_GET(); @@ -1116,7 +1075,6 @@ tcache_flush_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, RETURN: return (ret); } -#endif static int thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, @@ -1151,13 +1109,11 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, /* Set new arena association. */ ARENA_SET(arena); -#ifdef JEMALLOC_TCACHE - { + if (config_tcache) { tcache_t *tcache = TCACHE_GET(); if (tcache != NULL) tcache->arena = arena; } -#endif } ret = 0; @@ -1165,104 +1121,29 @@ RETURN: return (ret); } -#ifdef JEMALLOC_STATS -CTL_RO_NL_GEN(thread_allocated, ALLOCATED_GET(), uint64_t); -CTL_RO_NL_GEN(thread_allocatedp, ALLOCATEDP_GET(), uint64_t *); -CTL_RO_NL_GEN(thread_deallocated, DEALLOCATED_GET(), uint64_t); -CTL_RO_NL_GEN(thread_deallocatedp, DEALLOCATEDP_GET(), uint64_t *); -#endif +CTL_RO_NL_CGEN(config_stats, thread_allocated, ALLOCATED_GET(), uint64_t) +CTL_RO_NL_CGEN(config_stats, thread_allocatedp, ALLOCATEDP_GET(), uint64_t *) +CTL_RO_NL_CGEN(config_stats, thread_deallocated, DEALLOCATED_GET(), uint64_t) +CTL_RO_NL_CGEN(config_stats, thread_deallocatedp, DEALLOCATEDP_GET(), + uint64_t *) /******************************************************************************/ -#ifdef JEMALLOC_DEBUG -CTL_RO_TRUE_GEN(config_debug) -#else -CTL_RO_FALSE_GEN(config_debug) -#endif - -#ifdef JEMALLOC_DSS -CTL_RO_TRUE_GEN(config_dss) -#else -CTL_RO_FALSE_GEN(config_dss) -#endif - -#ifdef JEMALLOC_DYNAMIC_PAGE_SHIFT -CTL_RO_TRUE_GEN(config_dynamic_page_shift) -#else -CTL_RO_FALSE_GEN(config_dynamic_page_shift) -#endif - -#ifdef JEMALLOC_FILL -CTL_RO_TRUE_GEN(config_fill) -#else -CTL_RO_FALSE_GEN(config_fill) -#endif - -#ifdef JEMALLOC_LAZY_LOCK -CTL_RO_TRUE_GEN(config_lazy_lock) -#else -CTL_RO_FALSE_GEN(config_lazy_lock) -#endif - -#ifdef JEMALLOC_PROF -CTL_RO_TRUE_GEN(config_prof) -#else -CTL_RO_FALSE_GEN(config_prof) -#endif - -#ifdef JEMALLOC_PROF_LIBGCC -CTL_RO_TRUE_GEN(config_prof_libgcc) -#else -CTL_RO_FALSE_GEN(config_prof_libgcc) -#endif - -#ifdef JEMALLOC_PROF_LIBUNWIND -CTL_RO_TRUE_GEN(config_prof_libunwind) -#else -CTL_RO_FALSE_GEN(config_prof_libunwind) -#endif - -#ifdef JEMALLOC_STATS -CTL_RO_TRUE_GEN(config_stats) -#else -CTL_RO_FALSE_GEN(config_stats) -#endif - -#ifdef JEMALLOC_SWAP -CTL_RO_TRUE_GEN(config_swap) -#else -CTL_RO_FALSE_GEN(config_swap) -#endif - -#ifdef JEMALLOC_SYSV -CTL_RO_TRUE_GEN(config_sysv) -#else -CTL_RO_FALSE_GEN(config_sysv) -#endif - -#ifdef JEMALLOC_TCACHE -CTL_RO_TRUE_GEN(config_tcache) -#else -CTL_RO_FALSE_GEN(config_tcache) -#endif - -#ifdef JEMALLOC_TINY -CTL_RO_TRUE_GEN(config_tiny) -#else -CTL_RO_FALSE_GEN(config_tiny) -#endif - -#ifdef JEMALLOC_TLS -CTL_RO_TRUE_GEN(config_tls) -#else -CTL_RO_FALSE_GEN(config_tls) -#endif - -#ifdef JEMALLOC_XMALLOC -CTL_RO_TRUE_GEN(config_xmalloc) -#else -CTL_RO_FALSE_GEN(config_xmalloc) -#endif +CTL_RO_BOOL_CONFIG_GEN(config_debug) +CTL_RO_BOOL_CONFIG_GEN(config_dss) +CTL_RO_BOOL_CONFIG_GEN(config_dynamic_page_shift) +CTL_RO_BOOL_CONFIG_GEN(config_fill) +CTL_RO_BOOL_CONFIG_GEN(config_lazy_lock) +CTL_RO_BOOL_CONFIG_GEN(config_prof) +CTL_RO_BOOL_CONFIG_GEN(config_prof_libgcc) +CTL_RO_BOOL_CONFIG_GEN(config_prof_libunwind) +CTL_RO_BOOL_CONFIG_GEN(config_stats) +CTL_RO_BOOL_CONFIG_GEN(config_swap) +CTL_RO_BOOL_CONFIG_GEN(config_sysv) +CTL_RO_BOOL_CONFIG_GEN(config_tcache) +CTL_RO_BOOL_CONFIG_GEN(config_tiny) +CTL_RO_BOOL_CONFIG_GEN(config_tls) +CTL_RO_BOOL_CONFIG_GEN(config_xmalloc) /******************************************************************************/ @@ -1273,35 +1154,24 @@ CTL_RO_NL_GEN(opt_lg_chunk, opt_lg_chunk, size_t) CTL_RO_NL_GEN(opt_narenas, opt_narenas, size_t) CTL_RO_NL_GEN(opt_lg_dirty_mult, opt_lg_dirty_mult, ssize_t) CTL_RO_NL_GEN(opt_stats_print, opt_stats_print, bool) -#ifdef JEMALLOC_FILL -CTL_RO_NL_GEN(opt_junk, opt_junk, bool) -CTL_RO_NL_GEN(opt_zero, opt_zero, bool) -#endif -#ifdef JEMALLOC_SYSV -CTL_RO_NL_GEN(opt_sysv, opt_sysv, bool) -#endif -#ifdef JEMALLOC_XMALLOC -CTL_RO_NL_GEN(opt_xmalloc, opt_xmalloc, bool) -#endif -#ifdef JEMALLOC_TCACHE -CTL_RO_NL_GEN(opt_tcache, opt_tcache, bool) -CTL_RO_NL_GEN(opt_lg_tcache_gc_sweep, opt_lg_tcache_gc_sweep, ssize_t) -#endif -#ifdef JEMALLOC_PROF -CTL_RO_NL_GEN(opt_prof, opt_prof, bool) -CTL_RO_NL_GEN(opt_prof_prefix, opt_prof_prefix, const char *) -CTL_RO_GEN(opt_prof_active, opt_prof_active, bool) /* Mutable. */ -CTL_RO_NL_GEN(opt_lg_prof_bt_max, opt_lg_prof_bt_max, size_t) -CTL_RO_NL_GEN(opt_lg_prof_sample, opt_lg_prof_sample, size_t) -CTL_RO_NL_GEN(opt_lg_prof_interval, opt_lg_prof_interval, ssize_t) -CTL_RO_NL_GEN(opt_prof_gdump, opt_prof_gdump, bool) -CTL_RO_NL_GEN(opt_prof_leak, opt_prof_leak, bool) -CTL_RO_NL_GEN(opt_prof_accum, opt_prof_accum, bool) -CTL_RO_NL_GEN(opt_lg_prof_tcmax, opt_lg_prof_tcmax, ssize_t) -#endif -#ifdef JEMALLOC_SWAP -CTL_RO_NL_GEN(opt_overcommit, opt_overcommit, bool) -#endif +CTL_RO_NL_CGEN(config_fill, opt_junk, opt_junk, bool) +CTL_RO_NL_CGEN(config_fill, opt_zero, opt_zero, bool) +CTL_RO_NL_CGEN(config_sysv, opt_sysv, opt_sysv, bool) +CTL_RO_NL_CGEN(config_xmalloc, opt_xmalloc, opt_xmalloc, bool) +CTL_RO_NL_CGEN(config_tcache, opt_tcache, opt_tcache, bool) +CTL_RO_NL_CGEN(config_tcache, opt_lg_tcache_gc_sweep, opt_lg_tcache_gc_sweep, + ssize_t) +CTL_RO_NL_CGEN(config_prof, opt_prof, opt_prof, bool) +CTL_RO_NL_CGEN(config_prof, opt_prof_prefix, opt_prof_prefix, const char *) +CTL_RO_CGEN(config_prof, opt_prof_active, opt_prof_active, bool) /* Mutable. */ +CTL_RO_NL_CGEN(config_prof, opt_lg_prof_bt_max, opt_lg_prof_bt_max, size_t) +CTL_RO_NL_CGEN(config_prof, opt_lg_prof_sample, opt_lg_prof_sample, size_t) +CTL_RO_NL_CGEN(config_prof, opt_lg_prof_interval, opt_lg_prof_interval, ssize_t) +CTL_RO_NL_CGEN(config_prof, opt_prof_gdump, opt_prof_gdump, bool) +CTL_RO_NL_CGEN(config_prof, opt_prof_leak, opt_prof_leak, bool) +CTL_RO_NL_CGEN(config_prof, opt_prof_accum, opt_prof_accum, bool) +CTL_RO_NL_CGEN(config_prof, opt_lg_prof_tcmax, opt_lg_prof_tcmax, ssize_t) +CTL_RO_NL_CGEN(config_swap, opt_overcommit, opt_overcommit, bool) /******************************************************************************/ @@ -1360,27 +1230,21 @@ CTL_RO_NL_GEN(arenas_cacheline, CACHELINE, size_t) CTL_RO_NL_GEN(arenas_subpage, SUBPAGE, size_t) CTL_RO_NL_GEN(arenas_pagesize, PAGE_SIZE, size_t) CTL_RO_NL_GEN(arenas_chunksize, chunksize, size_t) -#ifdef JEMALLOC_TINY -CTL_RO_NL_GEN(arenas_tspace_min, (1U << LG_TINY_MIN), size_t) -CTL_RO_NL_GEN(arenas_tspace_max, (qspace_min >> 1), size_t) -#endif +CTL_RO_NL_CGEN(config_tiny, arenas_tspace_min, (1U << LG_TINY_MIN), size_t) +CTL_RO_NL_CGEN(config_tiny, arenas_tspace_max, (qspace_min >> 1), size_t) CTL_RO_NL_GEN(arenas_qspace_min, qspace_min, size_t) CTL_RO_NL_GEN(arenas_qspace_max, qspace_max, size_t) CTL_RO_NL_GEN(arenas_cspace_min, cspace_min, size_t) CTL_RO_NL_GEN(arenas_cspace_max, cspace_max, size_t) CTL_RO_NL_GEN(arenas_sspace_min, sspace_min, size_t) CTL_RO_NL_GEN(arenas_sspace_max, sspace_max, size_t) -#ifdef JEMALLOC_TCACHE -CTL_RO_NL_GEN(arenas_tcache_max, tcache_maxclass, size_t) -#endif +CTL_RO_NL_CGEN(config_tcache, arenas_tcache_max, tcache_maxclass, size_t) CTL_RO_NL_GEN(arenas_ntbins, ntbins, unsigned) CTL_RO_NL_GEN(arenas_nqbins, nqbins, unsigned) CTL_RO_NL_GEN(arenas_ncbins, ncbins, unsigned) CTL_RO_NL_GEN(arenas_nsbins, nsbins, unsigned) CTL_RO_NL_GEN(arenas_nbins, nbins, unsigned) -#ifdef JEMALLOC_TCACHE -CTL_RO_NL_GEN(arenas_nhbins, nhbins, unsigned) -#endif +CTL_RO_NL_CGEN(config_tcache, arenas_nhbins, nhbins, unsigned) CTL_RO_NL_GEN(arenas_nlruns, nlclasses, size_t) static int @@ -1423,7 +1287,6 @@ RETURN: /******************************************************************************/ -#ifdef JEMALLOC_PROF static int prof_active_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) @@ -1431,6 +1294,9 @@ prof_active_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, int ret; bool oldval; + if (config_prof == false) + return (ENOENT); + malloc_mutex_lock(&ctl_mtx); /* Protect opt_prof_active. */ oldval = opt_prof_active; if (newp != NULL) { @@ -1457,6 +1323,9 @@ prof_dump_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, int ret; const char *filename = NULL; + if (config_prof == false) + return (ENOENT); + WRITEONLY(); WRITE(filename, const char *); @@ -1470,56 +1339,53 @@ RETURN: return (ret); } -CTL_RO_NL_GEN(prof_interval, prof_interval, uint64_t) -#endif +CTL_RO_NL_CGEN(config_prof, prof_interval, prof_interval, uint64_t) /******************************************************************************/ -#ifdef JEMALLOC_STATS -CTL_RO_GEN(stats_chunks_current, ctl_stats.chunks.current, size_t) -CTL_RO_GEN(stats_chunks_total, ctl_stats.chunks.total, uint64_t) -CTL_RO_GEN(stats_chunks_high, ctl_stats.chunks.high, size_t) -CTL_RO_GEN(stats_huge_allocated, huge_allocated, size_t) -CTL_RO_GEN(stats_huge_nmalloc, huge_nmalloc, uint64_t) -CTL_RO_GEN(stats_huge_ndalloc, huge_ndalloc, uint64_t) -CTL_RO_GEN(stats_arenas_i_small_allocated, +CTL_RO_CGEN(config_stats, stats_chunks_current, ctl_stats.chunks.current, + size_t) +CTL_RO_CGEN(config_stats, stats_chunks_total, ctl_stats.chunks.total, uint64_t) +CTL_RO_CGEN(config_stats, stats_chunks_high, ctl_stats.chunks.high, size_t) +CTL_RO_CGEN(config_stats, stats_huge_allocated, huge_allocated, size_t) +CTL_RO_CGEN(config_stats, stats_huge_nmalloc, huge_nmalloc, uint64_t) +CTL_RO_CGEN(config_stats, stats_huge_ndalloc, huge_ndalloc, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_small_allocated, ctl_stats.arenas[mib[2]].allocated_small, size_t) -CTL_RO_GEN(stats_arenas_i_small_nmalloc, +CTL_RO_CGEN(config_stats, stats_arenas_i_small_nmalloc, ctl_stats.arenas[mib[2]].nmalloc_small, uint64_t) -CTL_RO_GEN(stats_arenas_i_small_ndalloc, +CTL_RO_CGEN(config_stats, stats_arenas_i_small_ndalloc, ctl_stats.arenas[mib[2]].ndalloc_small, uint64_t) -CTL_RO_GEN(stats_arenas_i_small_nrequests, +CTL_RO_CGEN(config_stats, stats_arenas_i_small_nrequests, ctl_stats.arenas[mib[2]].nrequests_small, uint64_t) -CTL_RO_GEN(stats_arenas_i_large_allocated, +CTL_RO_CGEN(config_stats, stats_arenas_i_large_allocated, ctl_stats.arenas[mib[2]].astats.allocated_large, size_t) -CTL_RO_GEN(stats_arenas_i_large_nmalloc, +CTL_RO_CGEN(config_stats, stats_arenas_i_large_nmalloc, ctl_stats.arenas[mib[2]].astats.nmalloc_large, uint64_t) -CTL_RO_GEN(stats_arenas_i_large_ndalloc, +CTL_RO_CGEN(config_stats, stats_arenas_i_large_ndalloc, ctl_stats.arenas[mib[2]].astats.ndalloc_large, uint64_t) -CTL_RO_GEN(stats_arenas_i_large_nrequests, +CTL_RO_CGEN(config_stats, stats_arenas_i_large_nrequests, ctl_stats.arenas[mib[2]].astats.nrequests_large, uint64_t) -CTL_RO_GEN(stats_arenas_i_bins_j_allocated, +CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_allocated, ctl_stats.arenas[mib[2]].bstats[mib[4]].allocated, size_t) -CTL_RO_GEN(stats_arenas_i_bins_j_nmalloc, +CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nmalloc, ctl_stats.arenas[mib[2]].bstats[mib[4]].nmalloc, uint64_t) -CTL_RO_GEN(stats_arenas_i_bins_j_ndalloc, +CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_ndalloc, ctl_stats.arenas[mib[2]].bstats[mib[4]].ndalloc, uint64_t) -CTL_RO_GEN(stats_arenas_i_bins_j_nrequests, +CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nrequests, ctl_stats.arenas[mib[2]].bstats[mib[4]].nrequests, uint64_t) -#ifdef JEMALLOC_TCACHE -CTL_RO_GEN(stats_arenas_i_bins_j_nfills, +CTL_RO_CGEN(config_stats && config_tcache, stats_arenas_i_bins_j_nfills, ctl_stats.arenas[mib[2]].bstats[mib[4]].nfills, uint64_t) -CTL_RO_GEN(stats_arenas_i_bins_j_nflushes, +CTL_RO_CGEN(config_stats && config_tcache, stats_arenas_i_bins_j_nflushes, ctl_stats.arenas[mib[2]].bstats[mib[4]].nflushes, uint64_t) -#endif -CTL_RO_GEN(stats_arenas_i_bins_j_nruns, +CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nruns, ctl_stats.arenas[mib[2]].bstats[mib[4]].nruns, uint64_t) -CTL_RO_GEN(stats_arenas_i_bins_j_nreruns, +CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nreruns, ctl_stats.arenas[mib[2]].bstats[mib[4]].reruns, uint64_t) -CTL_RO_GEN(stats_arenas_i_bins_j_highruns, +CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_highruns, ctl_stats.arenas[mib[2]].bstats[mib[4]].highruns, size_t) -CTL_RO_GEN(stats_arenas_i_bins_j_curruns, +CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_curruns, ctl_stats.arenas[mib[2]].bstats[mib[4]].curruns, size_t) const ctl_node_t * @@ -1531,15 +1397,15 @@ stats_arenas_i_bins_j_index(const size_t *mib, size_t miblen, size_t j) return (super_stats_arenas_i_bins_j_node); } -CTL_RO_GEN(stats_arenas_i_lruns_j_nmalloc, +CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_nmalloc, ctl_stats.arenas[mib[2]].lstats[mib[4]].nmalloc, uint64_t) -CTL_RO_GEN(stats_arenas_i_lruns_j_ndalloc, +CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_ndalloc, ctl_stats.arenas[mib[2]].lstats[mib[4]].ndalloc, uint64_t) -CTL_RO_GEN(stats_arenas_i_lruns_j_nrequests, +CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_nrequests, ctl_stats.arenas[mib[2]].lstats[mib[4]].nrequests, uint64_t) -CTL_RO_GEN(stats_arenas_i_lruns_j_curruns, +CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_curruns, ctl_stats.arenas[mib[2]].lstats[mib[4]].curruns, size_t) -CTL_RO_GEN(stats_arenas_i_lruns_j_highruns, +CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_highruns, ctl_stats.arenas[mib[2]].lstats[mib[4]].highruns, size_t) const ctl_node_t * @@ -1551,20 +1417,17 @@ stats_arenas_i_lruns_j_index(const size_t *mib, size_t miblen, size_t j) return (super_stats_arenas_i_lruns_j_node); } -#endif CTL_RO_GEN(stats_arenas_i_nthreads, ctl_stats.arenas[mib[2]].nthreads, unsigned) CTL_RO_GEN(stats_arenas_i_pactive, ctl_stats.arenas[mib[2]].pactive, size_t) CTL_RO_GEN(stats_arenas_i_pdirty, ctl_stats.arenas[mib[2]].pdirty, size_t) -#ifdef JEMALLOC_STATS -CTL_RO_GEN(stats_arenas_i_mapped, ctl_stats.arenas[mib[2]].astats.mapped, - size_t) -CTL_RO_GEN(stats_arenas_i_npurge, ctl_stats.arenas[mib[2]].astats.npurge, - uint64_t) -CTL_RO_GEN(stats_arenas_i_nmadvise, ctl_stats.arenas[mib[2]].astats.nmadvise, - uint64_t) -CTL_RO_GEN(stats_arenas_i_purged, ctl_stats.arenas[mib[2]].astats.purged, - uint64_t) -#endif +CTL_RO_CGEN(config_stats, stats_arenas_i_mapped, + ctl_stats.arenas[mib[2]].astats.mapped, size_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_npurge, + ctl_stats.arenas[mib[2]].astats.npurge, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_nmadvise, + ctl_stats.arenas[mib[2]].astats.nmadvise, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_purged, + ctl_stats.arenas[mib[2]].astats.purged, uint64_t) const ctl_node_t * stats_arenas_i_index(const size_t *mib, size_t miblen, size_t i) @@ -1583,19 +1446,15 @@ RETURN: return (ret); } -#ifdef JEMALLOC_STATS -CTL_RO_GEN(stats_cactive, &stats_cactive, size_t *) -CTL_RO_GEN(stats_allocated, ctl_stats.allocated, size_t) -CTL_RO_GEN(stats_active, ctl_stats.active, size_t) -CTL_RO_GEN(stats_mapped, ctl_stats.mapped, size_t) -#endif +CTL_RO_CGEN(config_stats, stats_cactive, &stats_cactive, size_t *) +CTL_RO_CGEN(config_stats, stats_allocated, ctl_stats.allocated, size_t) +CTL_RO_CGEN(config_stats, stats_active, ctl_stats.active, size_t) +CTL_RO_CGEN(config_stats, stats_mapped, ctl_stats.mapped, size_t) /******************************************************************************/ -#ifdef JEMALLOC_SWAP -# ifdef JEMALLOC_STATS -CTL_RO_GEN(swap_avail, ctl_stats.swap_avail, size_t) -# endif +CTL_RO_CGEN(config_swap && config_stats, swap_avail, ctl_stats.swap_avail, + size_t) static int swap_prezeroed_ctl(const size_t *mib, size_t miblen, void *oldp, @@ -1603,6 +1462,9 @@ swap_prezeroed_ctl(const size_t *mib, size_t miblen, void *oldp, { int ret; + if (config_swap == false) + return (ENOENT); + malloc_mutex_lock(&ctl_mtx); if (swap_enabled) { READONLY(); @@ -1625,7 +1487,7 @@ RETURN: return (ret); } -CTL_RO_GEN(swap_nfds, swap_nfds, size_t) +CTL_RO_CGEN(config_swap, swap_nfds, swap_nfds, size_t) static int swap_fds_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, @@ -1633,6 +1495,9 @@ swap_fds_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, { int ret; + if (config_swap == false) + return (ENOENT); + malloc_mutex_lock(&ctl_mtx); if (swap_enabled) { READONLY(); @@ -1667,4 +1532,3 @@ RETURN: malloc_mutex_unlock(&ctl_mtx); return (ret); } -#endif diff --git a/src/extent.c b/src/extent.c index 3c04d3aa..8c09b486 100644 --- a/src/extent.c +++ b/src/extent.c @@ -3,7 +3,6 @@ /******************************************************************************/ -#if (defined(JEMALLOC_SWAP) || defined(JEMALLOC_DSS)) static inline int extent_szad_comp(extent_node_t *a, extent_node_t *b) { @@ -25,7 +24,6 @@ extent_szad_comp(extent_node_t *a, extent_node_t *b) /* Generate red-black tree functions. */ rb_gen(, extent_tree_szad_, extent_tree_t, extent_node_t, link_szad, extent_szad_comp) -#endif static inline int extent_ad_comp(extent_node_t *a, extent_node_t *b) diff --git a/src/huge.c b/src/huge.c index a4f9b054..1eee436e 100644 --- a/src/huge.c +++ b/src/huge.c @@ -4,11 +4,9 @@ /******************************************************************************/ /* Data. */ -#ifdef JEMALLOC_STATS uint64_t huge_nmalloc; uint64_t huge_ndalloc; size_t huge_allocated; -#endif malloc_mutex_t huge_mtx; @@ -49,21 +47,19 @@ huge_malloc(size_t size, bool zero) malloc_mutex_lock(&huge_mtx); extent_tree_ad_insert(&huge, node); -#ifdef JEMALLOC_STATS - stats_cactive_add(csize); - huge_nmalloc++; - huge_allocated += csize; -#endif + if (config_stats) { + stats_cactive_add(csize); + huge_nmalloc++; + huge_allocated += csize; + } malloc_mutex_unlock(&huge_mtx); -#ifdef JEMALLOC_FILL - if (zero == false) { + if (config_fill && zero == false) { if (opt_junk) memset(ret, 0xa5, csize); else if (opt_zero) memset(ret, 0, csize); } -#endif return (ret); } @@ -134,21 +130,19 @@ huge_palloc(size_t size, size_t alignment, bool zero) malloc_mutex_lock(&huge_mtx); extent_tree_ad_insert(&huge, node); -#ifdef JEMALLOC_STATS - stats_cactive_add(chunk_size); - huge_nmalloc++; - huge_allocated += chunk_size; -#endif + if (config_stats) { + stats_cactive_add(chunk_size); + huge_nmalloc++; + huge_allocated += chunk_size; + } malloc_mutex_unlock(&huge_mtx); -#ifdef JEMALLOC_FILL - if (zero == false) { + if (config_fill && zero == false) { if (opt_junk) memset(ret, 0xa5, chunk_size); else if (opt_zero) memset(ret, 0, chunk_size); } -#endif return (ret); } @@ -164,12 +158,10 @@ huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra) && CHUNK_CEILING(oldsize) >= CHUNK_CEILING(size) && CHUNK_CEILING(oldsize) <= CHUNK_CEILING(size+extra)) { assert(CHUNK_CEILING(oldsize) == oldsize); -#ifdef JEMALLOC_FILL - if (opt_junk && size < oldsize) { + if (config_fill && opt_junk && size < oldsize) { memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize - size); } -#endif return (ptr); } @@ -223,15 +215,10 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, * source nor the destination are in swap or dss. */ #ifdef JEMALLOC_MREMAP_FIXED - if (oldsize >= chunksize -# ifdef JEMALLOC_SWAP - && (swap_enabled == false || (chunk_in_swap(ptr) == false && - chunk_in_swap(ret) == false)) -# endif -# ifdef JEMALLOC_DSS - && chunk_in_dss(ptr) == false && chunk_in_dss(ret) == false -# endif - ) { + if (oldsize >= chunksize && (config_swap == false || swap_enabled == + false || (chunk_in_swap(ptr) == false && chunk_in_swap(ret) == + false)) && (config_dss == false || (chunk_in_dss(ptr) == false && + chunk_in_dss(ret) == false))) { size_t newsize = huge_salloc(ret); /* @@ -285,23 +272,16 @@ huge_dalloc(void *ptr, bool unmap) assert(node->addr == ptr); extent_tree_ad_remove(&huge, node); -#ifdef JEMALLOC_STATS - stats_cactive_sub(node->size); - huge_ndalloc++; - huge_allocated -= node->size; -#endif + if (config_stats) { + stats_cactive_sub(node->size); + huge_ndalloc++; + huge_allocated -= node->size; + } malloc_mutex_unlock(&huge_mtx); - if (unmap) { - /* Unmap chunk. */ -#ifdef JEMALLOC_FILL -#if (defined(JEMALLOC_SWAP) || defined(JEMALLOC_DSS)) - if (opt_junk) - memset(node->addr, 0x5a, node->size); -#endif -#endif - } + if (unmap && config_fill && (config_swap || config_dss) && opt_junk) + memset(node->addr, 0x5a, node->size); chunk_dealloc(node->addr, node->size, unmap); @@ -328,7 +308,6 @@ huge_salloc(const void *ptr) return (ret); } -#ifdef JEMALLOC_PROF prof_ctx_t * huge_prof_ctx_get(const void *ptr) { @@ -365,7 +344,6 @@ huge_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) malloc_mutex_unlock(&huge_mtx); } -#endif bool huge_boot(void) @@ -376,11 +354,11 @@ huge_boot(void) return (true); extent_tree_ad_new(&huge); -#ifdef JEMALLOC_STATS - huge_nmalloc = 0; - huge_ndalloc = 0; - huge_allocated = 0; -#endif + if (config_stats) { + huge_nmalloc = 0; + huge_ndalloc = 0; + huge_allocated = 0; + } return (false); } diff --git a/src/jemalloc.c b/src/jemalloc.c index a161c2e2..9e1814de 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -13,13 +13,10 @@ pthread_key_t arenas_tsd; __thread arena_t *arenas_tls JEMALLOC_ATTR(tls_model("initial-exec")); #endif -#ifdef JEMALLOC_STATS -# ifndef NO_TLS +#ifndef NO_TLS __thread thread_allocated_t thread_allocated_tls; -# else -pthread_key_t thread_allocated_tsd; -# endif #endif +pthread_key_t thread_allocated_tsd; /* Set to true once the allocator has been initialized. */ static bool malloc_initialized = false; @@ -28,13 +25,7 @@ static bool malloc_initialized = false; static pthread_t malloc_initializer = (unsigned long)0; /* Used to avoid initialization races. */ -static malloc_mutex_t init_lock = -#ifdef JEMALLOC_OSSPIN - 0 -#else - MALLOC_MUTEX_INITIALIZER -#endif - ; +static malloc_mutex_t init_lock = MALLOC_MUTEX_INITIALIZER; #ifdef DYNAMIC_PAGE_SHIFT size_t pagesize; @@ -50,22 +41,16 @@ const char *JEMALLOC_P(malloc_conf) JEMALLOC_ATTR(visibility("default")); bool opt_abort = true; # ifdef JEMALLOC_FILL bool opt_junk = true; +# else +bool opt_junk = false; # endif #else bool opt_abort = false; -# ifdef JEMALLOC_FILL bool opt_junk = false; -# endif #endif -#ifdef JEMALLOC_SYSV bool opt_sysv = false; -#endif -#ifdef JEMALLOC_XMALLOC bool opt_xmalloc = false; -#endif -#ifdef JEMALLOC_FILL bool opt_zero = false; -#endif size_t opt_narenas = 0; /******************************************************************************/ @@ -75,7 +60,7 @@ static void wrtmessage(void *cbopaque, const char *s); static void stats_print_atexit(void); static unsigned malloc_ncpus(void); static void arenas_cleanup(void *arg); -#if (defined(JEMALLOC_STATS) && defined(NO_TLS)) +#ifdef NO_TLS static void thread_allocated_cleanup(void *arg); #endif static bool malloc_conf_next(char const **opts_p, char const **k_p, @@ -89,22 +74,11 @@ static int imemalign(void **memptr, size_t alignment, size_t size); /******************************************************************************/ /* malloc_message() setup. */ -#ifdef JEMALLOC_HAVE_ATTR -JEMALLOC_ATTR(visibility("hidden")) -#else -static -#endif +JEMALLOC_CATTR(visibility("hidden"), static) void wrtmessage(void *cbopaque, const char *s) { -#ifdef JEMALLOC_CC_SILENCE - int result = -#endif - write(STDERR_FILENO, s, strlen(s)); -#ifdef JEMALLOC_CC_SILENCE - if (result < 0) - result = errno; -#endif + UNUSED int result = write(STDERR_FILENO, s, strlen(s)); } void (*JEMALLOC_P(malloc_message))(void *, const char *s) @@ -229,37 +203,38 @@ static void stats_print_atexit(void) { -#if (defined(JEMALLOC_TCACHE) && defined(JEMALLOC_STATS)) - unsigned i; + if (config_tcache && config_stats) { + unsigned i; - /* - * Merge stats from extant threads. This is racy, since individual - * threads do not lock when recording tcache stats events. As a - * consequence, the final stats may be slightly out of date by the time - * they are reported, if other threads continue to allocate. - */ - for (i = 0; i < narenas; i++) { - arena_t *arena = arenas[i]; - if (arena != NULL) { - tcache_t *tcache; + /* + * Merge stats from extant threads. This is racy, since + * individual threads do not lock when recording tcache stats + * events. As a consequence, the final stats may be slightly + * out of date by the time they are reported, if other threads + * continue to allocate. + */ + for (i = 0; i < narenas; i++) { + arena_t *arena = arenas[i]; + if (arena != NULL) { + tcache_t *tcache; - /* - * tcache_stats_merge() locks bins, so if any code is - * introduced that acquires both arena and bin locks in - * the opposite order, deadlocks may result. - */ - malloc_mutex_lock(&arena->lock); - ql_foreach(tcache, &arena->tcache_ql, link) { - tcache_stats_merge(tcache, arena); + /* + * tcache_stats_merge() locks bins, so if any + * code is introduced that acquires both arena + * and bin locks in the opposite order, + * deadlocks may result. + */ + malloc_mutex_lock(&arena->lock); + ql_foreach(tcache, &arena->tcache_ql, link) { + tcache_stats_merge(tcache, arena); + } + malloc_mutex_unlock(&arena->lock); } - malloc_mutex_unlock(&arena->lock); } } -#endif JEMALLOC_P(malloc_stats_print)(NULL, NULL, NULL); } -#if (defined(JEMALLOC_STATS) && defined(NO_TLS)) thread_allocated_t * thread_allocated_get_hard(void) { @@ -279,7 +254,6 @@ thread_allocated_get_hard(void) thread_allocated->deallocated = 0; return (thread_allocated); } -#endif /* * End miscellaneous support functions. @@ -315,7 +289,7 @@ arenas_cleanup(void *arg) malloc_mutex_unlock(&arenas_lock); } -#if (defined(JEMALLOC_STATS) && defined(NO_TLS)) +#ifdef NO_TLS static void thread_allocated_cleanup(void *arg) { @@ -603,41 +577,42 @@ malloc_conf_init(void) CONF_HANDLE_SSIZE_T(lg_dirty_mult, -1, (sizeof(size_t) << 3) - 1) CONF_HANDLE_BOOL(stats_print) -#ifdef JEMALLOC_FILL - CONF_HANDLE_BOOL(junk) - CONF_HANDLE_BOOL(zero) -#endif -#ifdef JEMALLOC_SYSV - CONF_HANDLE_BOOL(sysv) -#endif -#ifdef JEMALLOC_XMALLOC - CONF_HANDLE_BOOL(xmalloc) -#endif -#ifdef JEMALLOC_TCACHE - CONF_HANDLE_BOOL(tcache) - CONF_HANDLE_SSIZE_T(lg_tcache_gc_sweep, -1, - (sizeof(size_t) << 3) - 1) - CONF_HANDLE_SSIZE_T(lg_tcache_max, -1, - (sizeof(size_t) << 3) - 1) -#endif -#ifdef JEMALLOC_PROF - CONF_HANDLE_BOOL(prof) - CONF_HANDLE_CHAR_P(prof_prefix, "jeprof") - CONF_HANDLE_SIZE_T(lg_prof_bt_max, 0, LG_PROF_BT_MAX) - CONF_HANDLE_BOOL(prof_active) - CONF_HANDLE_SSIZE_T(lg_prof_sample, 0, - (sizeof(uint64_t) << 3) - 1) - CONF_HANDLE_BOOL(prof_accum) - CONF_HANDLE_SSIZE_T(lg_prof_tcmax, -1, - (sizeof(size_t) << 3) - 1) - CONF_HANDLE_SSIZE_T(lg_prof_interval, -1, - (sizeof(uint64_t) << 3) - 1) - CONF_HANDLE_BOOL(prof_gdump) - CONF_HANDLE_BOOL(prof_leak) -#endif -#ifdef JEMALLOC_SWAP - CONF_HANDLE_BOOL(overcommit) -#endif + if (config_fill) { + CONF_HANDLE_BOOL(junk) + CONF_HANDLE_BOOL(zero) + } + if (config_sysv) { + CONF_HANDLE_BOOL(sysv) + } + if (config_xmalloc) { + CONF_HANDLE_BOOL(xmalloc) + } + if (config_tcache) { + CONF_HANDLE_BOOL(tcache) + CONF_HANDLE_SSIZE_T(lg_tcache_gc_sweep, -1, + (sizeof(size_t) << 3) - 1) + CONF_HANDLE_SSIZE_T(lg_tcache_max, -1, + (sizeof(size_t) << 3) - 1) + } + if (config_prof) { + CONF_HANDLE_BOOL(prof) + CONF_HANDLE_CHAR_P(prof_prefix, "jeprof") + CONF_HANDLE_SIZE_T(lg_prof_bt_max, 0, + LG_PROF_BT_MAX) + CONF_HANDLE_BOOL(prof_active) + CONF_HANDLE_SSIZE_T(lg_prof_sample, 0, + (sizeof(uint64_t) << 3) - 1) + CONF_HANDLE_BOOL(prof_accum) + CONF_HANDLE_SSIZE_T(lg_prof_tcmax, -1, + (sizeof(size_t) << 3) - 1) + CONF_HANDLE_SSIZE_T(lg_prof_interval, -1, + (sizeof(uint64_t) << 3) - 1) + CONF_HANDLE_BOOL(prof_gdump) + CONF_HANDLE_BOOL(prof_leak) + } + if (config_swap) { + CONF_HANDLE_BOOL(overcommit) + } malloc_conf_error("Invalid conf pair", k, klen, v, vlen); #undef CONF_HANDLE_BOOL @@ -701,9 +676,8 @@ malloc_init_hard(void) } #endif -#ifdef JEMALLOC_PROF - prof_boot0(); -#endif + if (config_prof) + prof_boot0(); malloc_conf_init(); @@ -739,31 +713,28 @@ malloc_init_hard(void) return (true); } -#ifdef JEMALLOC_PROF - prof_boot1(); -#endif + if (config_prof) + prof_boot1(); if (arena_boot()) { malloc_mutex_unlock(&init_lock); return (true); } -#ifdef JEMALLOC_TCACHE - if (tcache_boot()) { + if (config_tcache && tcache_boot()) { malloc_mutex_unlock(&init_lock); return (true); } -#endif if (huge_boot()) { malloc_mutex_unlock(&init_lock); return (true); } -#if (defined(JEMALLOC_STATS) && defined(NO_TLS)) +#ifdef NO_TLS /* Initialize allocation counters before any allocations can occur. */ - if (pthread_key_create(&thread_allocated_tsd, thread_allocated_cleanup) - != 0) { + if (config_stats && pthread_key_create(&thread_allocated_tsd, + thread_allocated_cleanup) != 0) { malloc_mutex_unlock(&init_lock); return (true); } @@ -803,12 +774,10 @@ malloc_init_hard(void) ARENA_SET(arenas[0]); arenas[0]->nthreads++; -#ifdef JEMALLOC_PROF - if (prof_boot2()) { + if (config_prof && prof_boot2()) { malloc_mutex_unlock(&init_lock); return (true); } -#endif /* Get number of CPUs. */ malloc_initializer = pthread_self(); @@ -897,20 +866,8 @@ void * JEMALLOC_P(malloc)(size_t size) { void *ret; -#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) - size_t usize -# ifdef JEMALLOC_CC_SILENCE - = 0 -# endif - ; -#endif -#ifdef JEMALLOC_PROF - prof_thr_cnt_t *cnt -# ifdef JEMALLOC_CC_SILENCE - = NULL -# endif - ; -#endif + size_t usize; + prof_thr_cnt_t *cnt; if (malloc_init()) { ret = NULL; @@ -918,27 +875,20 @@ JEMALLOC_P(malloc)(size_t size) } if (size == 0) { -#ifdef JEMALLOC_SYSV - if (opt_sysv == false) -#endif + if (config_sysv == false || opt_sysv == false) size = 1; -#ifdef JEMALLOC_SYSV else { -# ifdef JEMALLOC_XMALLOC - if (opt_xmalloc) { + if (config_xmalloc && opt_xmalloc) { malloc_write(": Error in malloc(): " "invalid size 0\n"); abort(); } -# endif ret = NULL; goto RETURN; } -#endif } -#ifdef JEMALLOC_PROF - if (opt_prof) { + if (config_prof && opt_prof) { usize = s2u(size); PROF_ALLOC_PREP(1, usize, cnt); if (cnt == NULL) { @@ -952,47 +902,36 @@ JEMALLOC_P(malloc)(size_t size) arena_prof_promoted(ret, usize); } else ret = imalloc(size); - } else -#endif - { -#ifdef JEMALLOC_STATS - usize = s2u(size); -#endif + } else { + if (config_stats) + usize = s2u(size); ret = imalloc(size); } OOM: if (ret == NULL) { -#ifdef JEMALLOC_XMALLOC - if (opt_xmalloc) { + if (config_xmalloc && opt_xmalloc) { malloc_write(": Error in malloc(): " "out of memory\n"); abort(); } -#endif errno = ENOMEM; } -#ifdef JEMALLOC_SYSV RETURN: -#endif -#ifdef JEMALLOC_PROF - if (opt_prof && ret != NULL) + if (config_prof && opt_prof && ret != NULL) prof_malloc(ret, usize, cnt); -#endif -#ifdef JEMALLOC_STATS - if (ret != NULL) { + if (config_stats && ret != NULL) { assert(usize == isalloc(ret)); ALLOCATED_ADD(usize, 0); } -#endif return (ret); } JEMALLOC_ATTR(nonnull(1)) #ifdef JEMALLOC_PROF /* - * Avoid any uncertainty as to how many backtrace frames to ignore in + * Avoid any uncertainty as to how many backtrace frames to ignore in * PROF_ALLOC_PREP(). */ JEMALLOC_ATTR(noinline) @@ -1001,56 +940,38 @@ static int imemalign(void **memptr, size_t alignment, size_t size) { int ret; - size_t usize -#ifdef JEMALLOC_CC_SILENCE - = 0 -#endif - ; + size_t usize; void *result; -#ifdef JEMALLOC_PROF - prof_thr_cnt_t *cnt -# ifdef JEMALLOC_CC_SILENCE - = NULL -# endif - ; -#endif + prof_thr_cnt_t *cnt; if (malloc_init()) result = NULL; else { if (size == 0) { -#ifdef JEMALLOC_SYSV - if (opt_sysv == false) -#endif + if (config_sysv == false || opt_sysv == false) size = 1; -#ifdef JEMALLOC_SYSV else { -# ifdef JEMALLOC_XMALLOC - if (opt_xmalloc) { + if (config_xmalloc && opt_xmalloc) { malloc_write(": Error in " "posix_memalign(): invalid size " "0\n"); abort(); } -# endif result = NULL; *memptr = NULL; ret = 0; goto RETURN; } -#endif } /* Make sure that alignment is a large enough power of 2. */ if (((alignment - 1) & alignment) != 0 || alignment < sizeof(void *)) { -#ifdef JEMALLOC_XMALLOC - if (opt_xmalloc) { + if (config_xmalloc && opt_xmalloc) { malloc_write(": Error in " "posix_memalign(): invalid alignment\n"); abort(); } -#endif result = NULL; ret = EINVAL; goto RETURN; @@ -1063,8 +984,7 @@ imemalign(void **memptr, size_t alignment, size_t size) goto RETURN; } -#ifdef JEMALLOC_PROF - if (opt_prof) { + if (config_prof && opt_prof) { PROF_ALLOC_PREP(2, usize, cnt); if (cnt == NULL) { result = NULL; @@ -1086,18 +1006,15 @@ imemalign(void **memptr, size_t alignment, size_t size) } } } else -#endif result = ipalloc(usize, alignment, false); } if (result == NULL) { -#ifdef JEMALLOC_XMALLOC - if (opt_xmalloc) { + if (config_xmalloc && opt_xmalloc) { malloc_write(": Error in posix_memalign(): " "out of memory\n"); abort(); } -#endif ret = ENOMEM; goto RETURN; } @@ -1106,16 +1023,12 @@ imemalign(void **memptr, size_t alignment, size_t size) ret = 0; RETURN: -#ifdef JEMALLOC_STATS - if (result != NULL) { + if (config_stats && result != NULL) { assert(usize == isalloc(result)); ALLOCATED_ADD(usize, 0); } -#endif -#ifdef JEMALLOC_PROF - if (opt_prof && result != NULL) + if (config_prof && opt_prof && result != NULL) prof_malloc(result, usize, cnt); -#endif return (ret); } @@ -1135,20 +1048,8 @@ JEMALLOC_P(calloc)(size_t num, size_t size) { void *ret; size_t num_size; -#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) - size_t usize -# ifdef JEMALLOC_CC_SILENCE - = 0 -# endif - ; -#endif -#ifdef JEMALLOC_PROF - prof_thr_cnt_t *cnt -# ifdef JEMALLOC_CC_SILENCE - = NULL -# endif - ; -#endif + size_t usize; + prof_thr_cnt_t *cnt; if (malloc_init()) { num_size = 0; @@ -1158,16 +1059,13 @@ JEMALLOC_P(calloc)(size_t num, size_t size) num_size = num * size; if (num_size == 0) { -#ifdef JEMALLOC_SYSV - if ((opt_sysv == false) && ((num == 0) || (size == 0))) -#endif + if ((config_sysv == false || opt_sysv == false) + && ((num == 0) || (size == 0))) num_size = 1; -#ifdef JEMALLOC_SYSV else { ret = NULL; goto RETURN; } -#endif /* * Try to avoid division here. We know that it isn't possible to * overflow during multiplication if neither operand uses any of the @@ -1180,8 +1078,7 @@ JEMALLOC_P(calloc)(size_t num, size_t size) goto RETURN; } -#ifdef JEMALLOC_PROF - if (opt_prof) { + if (config_prof && opt_prof) { usize = s2u(num_size); PROF_ALLOC_PREP(1, usize, cnt); if (cnt == NULL) { @@ -1195,37 +1092,28 @@ JEMALLOC_P(calloc)(size_t num, size_t size) arena_prof_promoted(ret, usize); } else ret = icalloc(num_size); - } else -#endif - { -#ifdef JEMALLOC_STATS - usize = s2u(num_size); -#endif + } else { + if (config_stats) + usize = s2u(num_size); ret = icalloc(num_size); } RETURN: if (ret == NULL) { -#ifdef JEMALLOC_XMALLOC - if (opt_xmalloc) { + if (config_xmalloc && opt_xmalloc) { malloc_write(": Error in calloc(): out of " "memory\n"); abort(); } -#endif errno = ENOMEM; } -#ifdef JEMALLOC_PROF - if (opt_prof && ret != NULL) + if (config_prof && opt_prof && ret != NULL) prof_malloc(ret, usize, cnt); -#endif -#ifdef JEMALLOC_STATS - if (ret != NULL) { + if (config_stats && ret != NULL) { assert(usize == isalloc(ret)); ALLOCATED_ADD(usize, 0); } -#endif return (ret); } @@ -1234,67 +1122,39 @@ void * JEMALLOC_P(realloc)(void *ptr, size_t size) { void *ret; -#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) - size_t usize -# ifdef JEMALLOC_CC_SILENCE - = 0 -# endif - ; + size_t usize; size_t old_size = 0; -#endif -#ifdef JEMALLOC_PROF - prof_thr_cnt_t *cnt -# ifdef JEMALLOC_CC_SILENCE - = NULL -# endif - ; - prof_ctx_t *old_ctx -# ifdef JEMALLOC_CC_SILENCE - = NULL -# endif - ; -#endif + prof_thr_cnt_t *cnt; + prof_ctx_t *old_ctx; if (size == 0) { -#ifdef JEMALLOC_SYSV - if (opt_sysv == false) -#endif + if (config_sysv == false || opt_sysv == false) size = 1; -#ifdef JEMALLOC_SYSV else { if (ptr != NULL) { -#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) - old_size = isalloc(ptr); -#endif -#ifdef JEMALLOC_PROF - if (opt_prof) { + if (config_prof || config_stats) + old_size = isalloc(ptr); + if (config_prof && opt_prof) { old_ctx = prof_ctx_get(ptr); cnt = NULL; } -#endif idalloc(ptr); - } -#ifdef JEMALLOC_PROF - else if (opt_prof) { + } else if (config_prof && opt_prof) { old_ctx = NULL; cnt = NULL; } -#endif ret = NULL; goto RETURN; } -#endif } if (ptr != NULL) { assert(malloc_initialized || malloc_initializer == pthread_self()); -#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) - old_size = isalloc(ptr); -#endif -#ifdef JEMALLOC_PROF - if (opt_prof) { + if (config_prof || config_stats) + old_size = isalloc(ptr); + if (config_prof && opt_prof) { usize = s2u(size); old_ctx = prof_ctx_get(ptr); PROF_ALLOC_PREP(1, usize, cnt); @@ -1316,42 +1176,30 @@ JEMALLOC_P(realloc)(void *ptr, size_t size) if (ret == NULL) old_ctx = NULL; } - } else -#endif - { -#ifdef JEMALLOC_STATS - usize = s2u(size); -#endif + } else { + if (config_stats) + usize = s2u(size); ret = iralloc(ptr, size, 0, 0, false, false); } -#ifdef JEMALLOC_PROF OOM: -#endif if (ret == NULL) { -#ifdef JEMALLOC_XMALLOC - if (opt_xmalloc) { + if (config_xmalloc && opt_xmalloc) { malloc_write(": Error in realloc(): " "out of memory\n"); abort(); } -#endif errno = ENOMEM; } } else { -#ifdef JEMALLOC_PROF - if (opt_prof) + if (config_prof && opt_prof) old_ctx = NULL; -#endif if (malloc_init()) { -#ifdef JEMALLOC_PROF - if (opt_prof) + if (config_prof && opt_prof) cnt = NULL; -#endif ret = NULL; } else { -#ifdef JEMALLOC_PROF - if (opt_prof) { + if (config_prof && opt_prof) { usize = s2u(size); PROF_ALLOC_PREP(1, usize, cnt); if (cnt == NULL) @@ -1368,41 +1216,30 @@ OOM: } else ret = imalloc(size); } - } else -#endif - { -#ifdef JEMALLOC_STATS - usize = s2u(size); -#endif + } else { + if (config_stats) + usize = s2u(size); ret = imalloc(size); } } if (ret == NULL) { -#ifdef JEMALLOC_XMALLOC - if (opt_xmalloc) { + if (config_xmalloc && opt_xmalloc) { malloc_write(": Error in realloc(): " "out of memory\n"); abort(); } -#endif errno = ENOMEM; } } -#ifdef JEMALLOC_SYSV RETURN: -#endif -#ifdef JEMALLOC_PROF - if (opt_prof) + if (config_prof && opt_prof) prof_realloc(ret, usize, cnt, old_size, old_ctx); -#endif -#ifdef JEMALLOC_STATS - if (ret != NULL) { + if (config_stats && ret != NULL) { assert(usize == isalloc(ret)); ALLOCATED_ADD(usize, old_size); } -#endif return (ret); } @@ -1412,27 +1249,19 @@ JEMALLOC_P(free)(void *ptr) { if (ptr != NULL) { -#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) size_t usize; -#endif assert(malloc_initialized || malloc_initializer == pthread_self()); -#ifdef JEMALLOC_STATS - usize = isalloc(ptr); -#endif -#ifdef JEMALLOC_PROF - if (opt_prof) { -# ifndef JEMALLOC_STATS + if (config_prof && opt_prof) { usize = isalloc(ptr); -# endif prof_free(ptr, usize); + } else if (config_stats) { + usize = isalloc(ptr); } -#endif -#ifdef JEMALLOC_STATS - ALLOCATED_ADD(0, usize); -#endif + if (config_stats) + ALLOCATED_ADD(0, usize); idalloc(ptr); } } @@ -1455,15 +1284,12 @@ JEMALLOC_ATTR(visibility("default")) void * JEMALLOC_P(memalign)(size_t alignment, size_t size) { - void *ret; + void *ret #ifdef JEMALLOC_CC_SILENCE - int result = -#endif - imemalign(&ret, alignment, size); -#ifdef JEMALLOC_CC_SILENCE - if (result != 0) - return (NULL); + = NULL #endif + ; + imemalign(&ret, alignment, size); return (ret); } #endif @@ -1474,15 +1300,12 @@ JEMALLOC_ATTR(visibility("default")) void * JEMALLOC_P(valloc)(size_t size) { - void *ret; + void *ret #ifdef JEMALLOC_CC_SILENCE - int result = -#endif - imemalign(&ret, PAGE_SIZE, size); -#ifdef JEMALLOC_CC_SILENCE - if (result != 0) - return (NULL); + = NULL #endif + ; + imemalign(&ret, PAGE_SIZE, size); return (ret); } #endif @@ -1504,12 +1327,12 @@ JEMALLOC_P(malloc_usable_size)(const void *ptr) assert(malloc_initialized || malloc_initializer == pthread_self()); -#ifdef JEMALLOC_IVSALLOC - ret = ivsalloc(ptr); -#else - assert(ptr != NULL); - ret = isalloc(ptr); -#endif + if (config_ivsalloc) + ret = ivsalloc(ptr); + else { + assert(ptr != NULL); + ret = isalloc(ptr); + } return (ret); } @@ -1583,9 +1406,7 @@ JEMALLOC_P(allocm)(void **ptr, size_t *rsize, size_t size, int flags) size_t alignment = (ZU(1) << (flags & ALLOCM_LG_ALIGN_MASK) & (SIZE_T_MAX-1)); bool zero = flags & ALLOCM_ZERO; -#ifdef JEMALLOC_PROF prof_thr_cnt_t *cnt; -#endif assert(ptr != NULL); assert(size != 0); @@ -1597,8 +1418,7 @@ JEMALLOC_P(allocm)(void **ptr, size_t *rsize, size_t size, int flags) if (usize == 0) goto OOM; -#ifdef JEMALLOC_PROF - if (opt_prof) { + if (config_prof && opt_prof) { PROF_ALLOC_PREP(1, usize, cnt); if (cnt == NULL) goto OOM; @@ -1618,39 +1438,26 @@ JEMALLOC_P(allocm)(void **ptr, size_t *rsize, size_t size, int flags) goto OOM; } prof_malloc(p, usize, cnt); - if (rsize != NULL) - *rsize = usize; - } else -#endif - { + } else { p = iallocm(usize, alignment, zero); if (p == NULL) goto OOM; -#ifndef JEMALLOC_STATS - if (rsize != NULL) -#endif - { -#ifdef JEMALLOC_STATS - if (rsize != NULL) -#endif - *rsize = usize; - } } + if (rsize != NULL) + *rsize = usize; *ptr = p; -#ifdef JEMALLOC_STATS - assert(usize == isalloc(p)); - ALLOCATED_ADD(usize, 0); -#endif + if (config_stats) { + assert(usize == isalloc(p)); + ALLOCATED_ADD(usize, 0); + } return (ALLOCM_SUCCESS); OOM: -#ifdef JEMALLOC_XMALLOC - if (opt_xmalloc) { + if (config_xmalloc && opt_xmalloc) { malloc_write(": Error in allocm(): " "out of memory\n"); abort(); } -#endif *ptr = NULL; return (ALLOCM_ERR_OOM); } @@ -1663,16 +1470,12 @@ JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, size_t extra, { void *p, *q; size_t usize; -#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) size_t old_size; -#endif size_t alignment = (ZU(1) << (flags & ALLOCM_LG_ALIGN_MASK) & (SIZE_T_MAX-1)); bool zero = flags & ALLOCM_ZERO; bool no_move = flags & ALLOCM_NO_MOVE; -#ifdef JEMALLOC_PROF prof_thr_cnt_t *cnt; -#endif assert(ptr != NULL); assert(*ptr != NULL); @@ -1681,8 +1484,7 @@ JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, size_t extra, assert(malloc_initialized || malloc_initializer == pthread_self()); p = *ptr; -#ifdef JEMALLOC_PROF - if (opt_prof) { + if (config_prof && opt_prof) { /* * usize isn't knowable before iralloc() returns when extra is * non-zero. Therefore, compute its maximum possible value and @@ -1722,45 +1524,34 @@ JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, size_t extra, prof_realloc(q, usize, cnt, old_size, old_ctx); if (rsize != NULL) *rsize = usize; - } else -#endif - { -#ifdef JEMALLOC_STATS - old_size = isalloc(p); -#endif + } else { + if (config_stats) + old_size = isalloc(p); q = iralloc(p, size, extra, alignment, zero, no_move); if (q == NULL) goto ERR; -#ifndef JEMALLOC_STATS - if (rsize != NULL) -#endif - { + if (config_stats) usize = isalloc(q); -#ifdef JEMALLOC_STATS - if (rsize != NULL) -#endif - *rsize = usize; + if (rsize != NULL) { + if (config_stats == false) + usize = isalloc(q); + *rsize = usize; } } *ptr = q; -#ifdef JEMALLOC_STATS - ALLOCATED_ADD(usize, old_size); -#endif + if (config_stats) + ALLOCATED_ADD(usize, old_size); return (ALLOCM_SUCCESS); ERR: if (no_move) return (ALLOCM_ERR_NOT_MOVED); -#ifdef JEMALLOC_PROF OOM: -#endif -#ifdef JEMALLOC_XMALLOC - if (opt_xmalloc) { + if (config_xmalloc && opt_xmalloc) { malloc_write(": Error in rallocm(): " "out of memory\n"); abort(); } -#endif return (ALLOCM_ERR_OOM); } @@ -1773,12 +1564,12 @@ JEMALLOC_P(sallocm)(const void *ptr, size_t *rsize, int flags) assert(malloc_initialized || malloc_initializer == pthread_self()); -#ifdef JEMALLOC_IVSALLOC - sz = ivsalloc(ptr); -#else - assert(ptr != NULL); - sz = isalloc(ptr); -#endif + if (config_ivsalloc) + sz = ivsalloc(ptr); + else { + assert(ptr != NULL); + sz = isalloc(ptr); + } assert(rsize != NULL); *rsize = sz; @@ -1790,27 +1581,20 @@ JEMALLOC_ATTR(visibility("default")) int JEMALLOC_P(dallocm)(void *ptr, int flags) { -#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) size_t usize; -#endif assert(ptr != NULL); assert(malloc_initialized || malloc_initializer == pthread_self()); -#ifdef JEMALLOC_STATS - usize = isalloc(ptr); -#endif -#ifdef JEMALLOC_PROF - if (opt_prof) { -# ifndef JEMALLOC_STATS + if (config_stats) usize = isalloc(ptr); -# endif + if (config_prof && opt_prof) { + if (config_stats == false) + usize = isalloc(ptr); prof_free(ptr, usize); } -#endif -#ifdef JEMALLOC_STATS - ALLOCATED_ADD(0, usize); -#endif + if (config_stats) + ALLOCATED_ADD(0, usize); idalloc(ptr); return (ALLOCM_SUCCESS); @@ -1843,13 +1627,11 @@ jemalloc_prefork(void) malloc_mutex_lock(&huge_mtx); -#ifdef JEMALLOC_DSS - malloc_mutex_lock(&dss_mtx); -#endif + if (config_dss) + malloc_mutex_lock(&dss_mtx); -#ifdef JEMALLOC_SWAP - malloc_mutex_lock(&swap_mtx); -#endif + if (config_swap) + malloc_mutex_lock(&swap_mtx); } void @@ -1859,13 +1641,11 @@ jemalloc_postfork(void) /* Release all mutexes, now that fork() has completed. */ -#ifdef JEMALLOC_SWAP - malloc_mutex_unlock(&swap_mtx); -#endif + if (config_swap) + malloc_mutex_unlock(&swap_mtx); -#ifdef JEMALLOC_DSS - malloc_mutex_unlock(&dss_mtx); -#endif + if (config_dss) + malloc_mutex_unlock(&dss_mtx); malloc_mutex_unlock(&huge_mtx); diff --git a/src/prof.c b/src/prof.c index 8a144b4e..113cf15a 100644 --- a/src/prof.c +++ b/src/prof.c @@ -1,6 +1,5 @@ #define JEMALLOC_PROF_C_ #include "jemalloc/internal/jemalloc_internal.h" -#ifdef JEMALLOC_PROF /******************************************************************************/ #ifdef JEMALLOC_PROF_LIBUNWIND @@ -102,6 +101,8 @@ void bt_init(prof_bt_t *bt, void **vec) { + cassert(config_prof); + bt->vec = vec; bt->len = 0; } @@ -110,6 +111,8 @@ static void bt_destroy(prof_bt_t *bt) { + cassert(config_prof); + idalloc(bt); } @@ -118,6 +121,8 @@ bt_dup(prof_bt_t *bt) { prof_bt_t *ret; + cassert(config_prof); + /* * Create a single allocation that has space for vec immediately * following the prof_bt_t structure. The backtraces that get @@ -141,6 +146,8 @@ static inline void prof_enter(void) { + cassert(config_prof); + malloc_mutex_lock(&enq_mtx); enq = true; malloc_mutex_unlock(&enq_mtx); @@ -153,6 +160,8 @@ prof_leave(void) { bool idump, gdump; + cassert(config_prof); + malloc_mutex_unlock(&bt2ctx_mtx); malloc_mutex_lock(&enq_mtx); @@ -178,6 +187,7 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) unsigned i; int err; + cassert(config_prof); assert(bt->len == 0); assert(bt->vec != NULL); assert(max <= (1U << opt_lg_prof_bt_max)); @@ -204,12 +214,13 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) break; } } -#endif -#ifdef JEMALLOC_PROF_LIBGCC +#elif (defined(JEMALLOC_PROF_LIBGCC)) static _Unwind_Reason_Code prof_unwind_init_callback(struct _Unwind_Context *context, void *arg) { + cassert(config_prof); + return (_URC_NO_REASON); } @@ -218,6 +229,8 @@ prof_unwind_callback(struct _Unwind_Context *context, void *arg) { prof_unwind_data_t *data = (prof_unwind_data_t *)arg; + cassert(config_prof); + if (data->nignore > 0) data->nignore--; else { @@ -235,10 +248,11 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) { prof_unwind_data_t data = {bt, nignore, max}; + cassert(config_prof); + _Unwind_Backtrace(prof_unwind_callback, &data); } -#endif -#ifdef JEMALLOC_PROF_GCC +#elif (defined(JEMALLOC_PROF_GCC)) void prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) { @@ -257,6 +271,7 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) } else \ return; + cassert(config_prof); assert(nignore <= 3); assert(max <= (1U << opt_lg_prof_bt_max)); @@ -407,6 +422,14 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) BT_FRAME(130) #undef BT_FRAME } +#else +void +prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) +{ + + cassert(config_prof); + assert(false); +} #endif prof_thr_cnt_t * @@ -418,6 +441,8 @@ prof_lookup(prof_bt_t *bt) } ret; prof_tdata_t *prof_tdata; + cassert(config_prof); + prof_tdata = PROF_TCACHE_GET(); if (prof_tdata == NULL) { prof_tdata = prof_tdata_init(); @@ -553,6 +578,8 @@ prof_flush(bool propagate_err) bool ret = false; ssize_t err; + cassert(config_prof); + err = write(prof_dump_fd, prof_dump_buf, prof_dump_buf_end); if (err == -1) { if (propagate_err == false) { @@ -573,6 +600,8 @@ prof_write(const char *s, bool propagate_err) { unsigned i, slen, n; + cassert(config_prof); + i = 0; slen = strlen(s); while (i < slen) { @@ -602,6 +631,8 @@ prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx) prof_thr_cnt_t *thr_cnt; prof_cnt_t tcnt; + cassert(config_prof); + malloc_mutex_lock(&ctx->lock); memcpy(&ctx->cnt_summed, &ctx->cnt_merged, sizeof(prof_cnt_t)); @@ -648,6 +679,8 @@ static void prof_ctx_destroy(prof_ctx_t *ctx) { + cassert(config_prof); + /* * Check that ctx is still unused by any thread cache before destroying * it. prof_lookup() artificially raises ctx->cnt_merge.curobjs in @@ -686,6 +719,8 @@ prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt) { bool destroy; + cassert(config_prof); + /* Merge cnt stats and detach from ctx. */ malloc_mutex_lock(&ctx->lock); ctx->cnt_merged.curobjs += cnt->cnts.curobjs; @@ -723,6 +758,8 @@ prof_dump_ctx(prof_ctx_t *ctx, prof_bt_t *bt, bool propagate_err) char buf[UMAX2S_BUFSIZE]; unsigned i; + cassert(config_prof); + if (opt_prof_accum == false && ctx->cnt_summed.curobjs == 0) { assert(ctx->cnt_summed.curbytes == 0); assert(ctx->cnt_summed.accumobjs == 0); @@ -767,6 +804,8 @@ prof_dump_maps(bool propagate_err) char mpath[6 + UMAX2S_BUFSIZE + 5 + 1]; + cassert(config_prof); + i = 0; s = "/proc/"; @@ -827,6 +866,8 @@ prof_dump(const char *filename, bool leakcheck, bool propagate_err) char buf[UMAX2S_BUFSIZE]; size_t leak_nctx; + cassert(config_prof); + prof_enter(); prof_dump_fd = creat(filename, 0644); if (prof_dump_fd == -1) { @@ -917,6 +958,8 @@ prof_dump_filename(char *filename, char v, int64_t vseq) char *s; unsigned i, slen; + cassert(config_prof); + /* * Construct a filename of the form: * @@ -979,6 +1022,8 @@ prof_fdump(void) { char filename[DUMP_FILENAME_BUFSIZE]; + cassert(config_prof); + if (prof_booted == false) return; @@ -995,6 +1040,8 @@ prof_idump(void) { char filename[DUMP_FILENAME_BUFSIZE]; + cassert(config_prof); + if (prof_booted == false) return; malloc_mutex_lock(&enq_mtx); @@ -1019,6 +1066,8 @@ prof_mdump(const char *filename) { char filename_buf[DUMP_FILENAME_BUFSIZE]; + cassert(config_prof); + if (opt_prof == false || prof_booted == false) return (true); @@ -1040,6 +1089,8 @@ prof_gdump(void) { char filename[DUMP_FILENAME_BUFSIZE]; + cassert(config_prof); + if (prof_booted == false) return; malloc_mutex_lock(&enq_mtx); @@ -1066,6 +1117,7 @@ prof_bt_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2) uint64_t h; prof_bt_t *bt = (prof_bt_t *)key; + cassert(config_prof); assert(minbits <= 32 || (SIZEOF_PTR == 8 && minbits <= 64)); assert(hash1 != NULL); assert(hash2 != NULL); @@ -1094,6 +1146,8 @@ prof_bt_keycomp(const void *k1, const void *k2) const prof_bt_t *bt1 = (prof_bt_t *)k1; const prof_bt_t *bt2 = (prof_bt_t *)k2; + cassert(config_prof); + if (bt1->len != bt2->len) return (false); return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0); @@ -1104,6 +1158,8 @@ prof_tdata_init(void) { prof_tdata_t *prof_tdata; + cassert(config_prof); + /* Initialize an empty cache for this thread. */ prof_tdata = (prof_tdata_t *)imalloc(sizeof(prof_tdata_t)); if (prof_tdata == NULL) @@ -1138,6 +1194,8 @@ prof_tdata_cleanup(void *arg) prof_thr_cnt_t *cnt; prof_tdata_t *prof_tdata = (prof_tdata_t *)arg; + cassert(config_prof); + /* * Delete the hash table. All of its contents can still be iterated * over via the LRU. @@ -1161,6 +1219,8 @@ void prof_boot0(void) { + cassert(config_prof); + memcpy(opt_prof_prefix, PROF_PREFIX_DEFAULT, sizeof(PROF_PREFIX_DEFAULT)); } @@ -1169,6 +1229,8 @@ void prof_boot1(void) { + cassert(config_prof); + /* * opt_prof and prof_promote must be in their final state before any * arenas are initialized, so this function must be executed early. @@ -1197,6 +1259,8 @@ bool prof_boot2(void) { + cassert(config_prof); + if (opt_prof) { if (ckh_new(&bt2ctx, PROF_CKH_MINITEMS, prof_bt_hash, prof_bt_keycomp)) @@ -1241,4 +1305,3 @@ prof_boot2(void) } /******************************************************************************/ -#endif /* JEMALLOC_PROF */ diff --git a/src/stats.c b/src/stats.c index dc172e42..e6446530 100644 --- a/src/stats.c +++ b/src/stats.c @@ -39,14 +39,11 @@ bool opt_stats_print = false; -#ifdef JEMALLOC_STATS size_t stats_cactive = 0; -#endif /******************************************************************************/ /* Function prototypes for non-inline static functions. */ -#ifdef JEMALLOC_STATS static void malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque, const char *format, va_list ap); static void stats_arena_bins_print(void (*write_cb)(void *, const char *), @@ -55,10 +52,10 @@ static void stats_arena_lruns_print(void (*write_cb)(void *, const char *), void *cbopaque, unsigned i); static void stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, unsigned i); -#endif /******************************************************************************/ +/* XXX Refactor by adding malloc_vsnprintf(). */ /* * We don't want to depend on vsnprintf() for production builds, since that can * cause unnecessary bloat for static binaries. u2s() provides minimal integer @@ -99,7 +96,6 @@ u2s(uint64_t x, unsigned base, char *s) return (&s[i]); } -#ifdef JEMALLOC_STATS static void malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque, const char *format, va_list ap) @@ -149,9 +145,7 @@ malloc_printf(const char *format, ...) malloc_vcprintf(NULL, NULL, format, ap); va_end(ap); } -#endif -#ifdef JEMALLOC_STATS static void stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, unsigned i) @@ -377,7 +371,6 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, stats_arena_bins_print(write_cb, cbopaque, i); stats_arena_lruns_print(write_cb, cbopaque, i); } -#endif void stats_print(void (*write_cb)(void *, const char *), void *cbopaque, @@ -674,8 +667,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, write_cb(cbopaque, ")\n"); } -#ifdef JEMALLOC_STATS - { + if (config_stats) { int err; size_t sszp, ssz; size_t *cactive; @@ -785,6 +777,5 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, } } } -#endif /* #ifdef JEMALLOC_STATS */ write_cb(cbopaque, "--- End jemalloc statistics ---\n"); } diff --git a/src/tcache.c b/src/tcache.c index 31c329e1..398fc0aa 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -38,31 +38,22 @@ tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind) { void *ret; - arena_tcache_fill_small(tcache->arena, tbin, binind -#ifdef JEMALLOC_PROF - , tcache->prof_accumbytes -#endif - ); -#ifdef JEMALLOC_PROF - tcache->prof_accumbytes = 0; -#endif + arena_tcache_fill_small(tcache->arena, tbin, binind, + config_prof ? tcache->prof_accumbytes : 0); + if (config_prof) + tcache->prof_accumbytes = 0; ret = tcache_alloc_easy(tbin); return (ret); } void -tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem -#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) - , tcache_t *tcache -#endif - ) +tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem, + tcache_t *tcache) { void *ptr; unsigned i, nflush, ndeferred; -#ifdef JEMALLOC_STATS bool merged_stats = false; -#endif assert(binind < nbins); assert(rem <= tbin->ncached); @@ -74,25 +65,21 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem arena_t *arena = chunk->arena; arena_bin_t *bin = &arena->bins[binind]; -#ifdef JEMALLOC_PROF - if (arena == tcache->arena) { + if (config_prof && arena == tcache->arena) { malloc_mutex_lock(&arena->lock); arena_prof_accum(arena, tcache->prof_accumbytes); malloc_mutex_unlock(&arena->lock); tcache->prof_accumbytes = 0; } -#endif malloc_mutex_lock(&bin->lock); -#ifdef JEMALLOC_STATS - if (arena == tcache->arena) { + if (config_stats && arena == tcache->arena) { assert(merged_stats == false); merged_stats = true; bin->stats.nflushes++; bin->stats.nrequests += tbin->tstats.nrequests; tbin->tstats.nrequests = 0; } -#endif ndeferred = 0; for (i = 0; i < nflush; i++) { ptr = tbin->avail[i]; @@ -117,8 +104,7 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem } malloc_mutex_unlock(&bin->lock); } -#ifdef JEMALLOC_STATS - if (merged_stats == false) { + if (config_stats && merged_stats == false) { /* * The flush loop didn't happen to flush to this thread's * arena, so the stats didn't get merged. Manually do so now. @@ -130,7 +116,6 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem tbin->tstats.nrequests = 0; malloc_mutex_unlock(&bin->lock); } -#endif memmove(tbin->avail, &tbin->avail[tbin->ncached - rem], rem * sizeof(void *)); @@ -140,17 +125,12 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem } void -tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem -#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) - , tcache_t *tcache -#endif - ) +tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem, + tcache_t *tcache) { void *ptr; unsigned i, nflush, ndeferred; -#ifdef JEMALLOC_STATS bool merged_stats = false; -#endif assert(binind < nhbins); assert(rem <= tbin->ncached); @@ -162,23 +142,21 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem arena_t *arena = chunk->arena; malloc_mutex_lock(&arena->lock); -#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) - if (arena == tcache->arena) { -#endif -#ifdef JEMALLOC_PROF - arena_prof_accum(arena, tcache->prof_accumbytes); - tcache->prof_accumbytes = 0; -#endif -#ifdef JEMALLOC_STATS - merged_stats = true; - arena->stats.nrequests_large += tbin->tstats.nrequests; - arena->stats.lstats[binind - nbins].nrequests += - tbin->tstats.nrequests; - tbin->tstats.nrequests = 0; -#endif -#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) + if ((config_prof || config_stats) && arena == tcache->arena) { + if (config_prof) { + arena_prof_accum(arena, + tcache->prof_accumbytes); + tcache->prof_accumbytes = 0; + } + if (config_stats) { + merged_stats = true; + arena->stats.nrequests_large += + tbin->tstats.nrequests; + arena->stats.lstats[binind - nbins].nrequests += + tbin->tstats.nrequests; + tbin->tstats.nrequests = 0; + } } -#endif ndeferred = 0; for (i = 0; i < nflush; i++) { ptr = tbin->avail[i]; @@ -199,8 +177,7 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem } malloc_mutex_unlock(&arena->lock); } -#ifdef JEMALLOC_STATS - if (merged_stats == false) { + if (config_stats && merged_stats == false) { /* * The flush loop didn't happen to flush to this thread's * arena, so the stats didn't get merged. Manually do so now. @@ -213,7 +190,6 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem tbin->tstats.nrequests = 0; malloc_mutex_unlock(&arena->lock); } -#endif memmove(tbin->avail, &tbin->avail[tbin->ncached - rem], rem * sizeof(void *)); @@ -254,13 +230,13 @@ tcache_create(arena_t *arena) if (tcache == NULL) return (NULL); -#ifdef JEMALLOC_STATS - /* Link into list of extant tcaches. */ - malloc_mutex_lock(&arena->lock); - ql_elm_new(tcache, link); - ql_tail_insert(&arena->tcache_ql, tcache, link); - malloc_mutex_unlock(&arena->lock); -#endif + if (config_stats) { + /* Link into list of extant tcaches. */ + malloc_mutex_lock(&arena->lock); + ql_elm_new(tcache, link); + ql_tail_insert(&arena->tcache_ql, tcache, link); + malloc_mutex_unlock(&arena->lock); + } tcache->arena = arena; assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0); @@ -282,43 +258,32 @@ tcache_destroy(tcache_t *tcache) unsigned i; size_t tcache_size; -#ifdef JEMALLOC_STATS - /* Unlink from list of extant tcaches. */ - malloc_mutex_lock(&tcache->arena->lock); - ql_remove(&tcache->arena->tcache_ql, tcache, link); - malloc_mutex_unlock(&tcache->arena->lock); - tcache_stats_merge(tcache, tcache->arena); -#endif + if (config_stats) { + /* Unlink from list of extant tcaches. */ + malloc_mutex_lock(&tcache->arena->lock); + ql_remove(&tcache->arena->tcache_ql, tcache, link); + malloc_mutex_unlock(&tcache->arena->lock); + tcache_stats_merge(tcache, tcache->arena); + } for (i = 0; i < nbins; i++) { tcache_bin_t *tbin = &tcache->tbins[i]; - tcache_bin_flush_small(tbin, i, 0 -#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) - , tcache -#endif - ); + tcache_bin_flush_small(tbin, i, 0, tcache); -#ifdef JEMALLOC_STATS - if (tbin->tstats.nrequests != 0) { + if (config_stats && tbin->tstats.nrequests != 0) { arena_t *arena = tcache->arena; arena_bin_t *bin = &arena->bins[i]; malloc_mutex_lock(&bin->lock); bin->stats.nrequests += tbin->tstats.nrequests; malloc_mutex_unlock(&bin->lock); } -#endif } for (; i < nhbins; i++) { tcache_bin_t *tbin = &tcache->tbins[i]; - tcache_bin_flush_large(tbin, i, 0 -#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) - , tcache -#endif - ); + tcache_bin_flush_large(tbin, i, 0, tcache); -#ifdef JEMALLOC_STATS - if (tbin->tstats.nrequests != 0) { + if (config_stats && tbin->tstats.nrequests != 0) { arena_t *arena = tcache->arena; malloc_mutex_lock(&arena->lock); arena->stats.nrequests_large += tbin->tstats.nrequests; @@ -326,16 +291,13 @@ tcache_destroy(tcache_t *tcache) tbin->tstats.nrequests; malloc_mutex_unlock(&arena->lock); } -#endif } -#ifdef JEMALLOC_PROF - if (tcache->prof_accumbytes > 0) { + if (config_prof && tcache->prof_accumbytes > 0) { malloc_mutex_lock(&tcache->arena->lock); arena_prof_accum(tcache->arena, tcache->prof_accumbytes); malloc_mutex_unlock(&tcache->arena->lock); } -#endif tcache_size = arena_salloc(tcache); if (tcache_size <= small_maxclass) { @@ -389,7 +351,6 @@ tcache_thread_cleanup(void *arg) } } -#ifdef JEMALLOC_STATS void tcache_stats_merge(tcache_t *tcache, arena_t *arena) { @@ -413,7 +374,6 @@ tcache_stats_merge(tcache_t *tcache, arena_t *arena) tbin->tstats.nrequests = 0; } } -#endif bool tcache_boot(void) From fd56043c53f1cd1335ae6d1c0ee86cc0fbb9f12e Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 13 Feb 2012 10:24:43 -0800 Subject: [PATCH 0043/3378] Remove magic. Remove structure magic, because 1) it is no longer conditional, and 2) it stopped being very effective at detecting memory corruption several years ago. --- include/jemalloc/internal/arena.h | 11 ----------- include/jemalloc/internal/ckh.h | 3 --- include/jemalloc/internal/jemalloc_internal.h.in | 2 -- include/jemalloc/internal/prof.h | 4 ---- include/jemalloc/internal/tcache.h | 1 - src/arena.c | 15 --------------- src/ckh.c | 9 --------- 7 files changed, 45 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index b6a5c23d..78ea2696 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -211,9 +211,6 @@ struct arena_chunk_s { typedef rb_tree(arena_chunk_t) arena_chunk_tree_t; struct arena_run_s { - uint32_t magic; -# define ARENA_RUN_MAGIC 0x384adf93 - /* Bin this run is associated with. */ arena_bin_t *bin; @@ -290,9 +287,6 @@ struct arena_bin_s { }; struct arena_s { - uint32_t magic; -# define ARENA_MAGIC 0x947d3d24 - /* This arena's index within the arenas array. */ unsigned ind; @@ -499,7 +493,6 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) unsigned shift, diff, regind; size_t size; - assert(run->magic == ARENA_RUN_MAGIC); /* * Freeing a pointer lower than region zero can cause assertion * failure. @@ -590,7 +583,6 @@ arena_prof_ctx_get(const void *ptr) arena_bin_info_t *bin_info = &arena_bin_info[binind]; unsigned regind; - assert(run->magic == ARENA_RUN_MAGIC); regind = arena_run_regind(run, bin_info, ptr); ret = *(prof_ctx_t **)((uintptr_t)run + bin_info->ctx0_offset + (regind * @@ -626,7 +618,6 @@ arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) arena_bin_info_t *bin_info; unsigned regind; - assert(run->magic == ARENA_RUN_MAGIC); binind = arena_bin_index(chunk->arena, bin); bin_info = &arena_bin_info[binind]; regind = arena_run_regind(run, bin_info, ptr); @@ -646,7 +637,6 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr) arena_chunk_map_t *mapelm; assert(arena != NULL); - assert(arena->magic == ARENA_MAGIC); assert(chunk->arena == arena); assert(ptr != NULL); assert(CHUNK_ADDR2BASE(ptr) != ptr); @@ -667,7 +657,6 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr) run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT)); - assert(run->magic == ARENA_RUN_MAGIC); bin = run->bin; if (config_debug) { size_t binind = arena_bin_index(arena, bin); diff --git a/include/jemalloc/internal/ckh.h b/include/jemalloc/internal/ckh.h index 28f171c8..65f30f56 100644 --- a/include/jemalloc/internal/ckh.h +++ b/include/jemalloc/internal/ckh.h @@ -30,9 +30,6 @@ struct ckhc_s { }; struct ckh_s { -#define CKH_MAGIC 0x3af2489d - uint32_t magic; - #ifdef CKH_COUNT /* Counters used to get an idea of performance. */ uint64_t ngrows; diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 8842e4bf..3193d35e 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -774,8 +774,6 @@ isalloc(const void *ptr) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk != ptr) { /* Region. */ - assert(chunk->arena->magic == ARENA_MAGIC); - if (config_prof) ret = arena_salloc_demote(ptr); else diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index d4700808..98f96546 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -336,8 +336,6 @@ prof_ctx_get(const void *ptr) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk != ptr) { /* Region. */ - assert(chunk->arena->magic == ARENA_MAGIC); - ret = arena_prof_ctx_get(ptr); } else ret = huge_prof_ctx_get(ptr); @@ -356,8 +354,6 @@ prof_ctx_set(const void *ptr, prof_ctx_t *ctx) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk != ptr) { /* Region. */ - assert(chunk->arena->magic == ARENA_MAGIC); - arena_prof_ctx_set(ptr, ctx); } else huge_prof_ctx_set(ptr, ctx); diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index 0855d32e..83e03d9d 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -333,7 +333,6 @@ tcache_dalloc_small(tcache_t *tcache, void *ptr) mapelm = &chunk->map[pageind-map_bias]; run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT)); - assert(run->magic == ARENA_RUN_MAGIC); bin = run->bin; binind = ((uintptr_t)bin - (uintptr_t)&arena->bins) / sizeof(arena_bin_t); diff --git a/src/arena.c b/src/arena.c index 356b628d..4ada6a37 100644 --- a/src/arena.c +++ b/src/arena.c @@ -255,7 +255,6 @@ arena_run_reg_alloc(arena_run_t *run, arena_bin_info_t *bin_info) bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run + (uintptr_t)bin_info->bitmap_offset); - assert(run->magic == ARENA_RUN_MAGIC); assert(run->nfree > 0); assert(bitmap_full(bitmap, &bin_info->bitmap_info) == false); @@ -758,7 +757,6 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) chunk + (uintptr_t)(pageind << PAGE_SHIFT)); assert((mapelm->bits >> PAGE_SHIFT) == 0); - assert(run->magic == ARENA_RUN_MAGIC); size_t binind = arena_bin_index(arena, run->bin); arena_bin_info_t *bin_info = @@ -1220,8 +1218,6 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin) run->nextind = 0; run->nfree = bin_info->nregs; bitmap_init(bitmap, &bin_info->bitmap_info); - if (config_debug) - run->magic = ARENA_RUN_MAGIC; } malloc_mutex_unlock(&arena->lock); /********************************/ @@ -1281,7 +1277,6 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin) * Another thread updated runcur while this one ran without the * bin lock in arena_bin_nonfull_run_get(). */ - assert(bin->runcur->magic == ARENA_RUN_MAGIC); assert(bin->runcur->nfree > 0); ret = arena_run_reg_alloc(bin->runcur, bin_info); if (run != NULL) { @@ -1309,7 +1304,6 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin) bin->runcur = run; - assert(bin->runcur->magic == ARENA_RUN_MAGIC); assert(bin->runcur->nfree > 0); return (arena_run_reg_alloc(bin->runcur, bin_info)); @@ -1579,7 +1573,6 @@ arena_salloc(const void *ptr) arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) << PAGE_SHIFT)); - assert(run->magic == ARENA_RUN_MAGIC); size_t binind = arena_bin_index(chunk->arena, run->bin); arena_bin_info_t *bin_info = &arena_bin_info[binind]; assert(((uintptr_t)ptr - ((uintptr_t)run + @@ -1632,7 +1625,6 @@ arena_salloc_demote(const void *ptr) arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) << PAGE_SHIFT)); - assert(run->magic == ARENA_RUN_MAGIC); size_t binind = arena_bin_index(chunk->arena, run->bin); arena_bin_info_t *bin_info = &arena_bin_info[binind]; assert(((uintptr_t)ptr - ((uintptr_t)run + @@ -1727,8 +1719,6 @@ arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, ((past - run_ind) << PAGE_SHIFT), false); /* npages = past - run_ind; */ } - if (config_debug) - run->magic = 0; arena_run_dalloc(arena, run, true); malloc_mutex_unlock(&arena->lock); /****************************/ @@ -1785,7 +1775,6 @@ arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT)); - assert(run->magic == ARENA_RUN_MAGIC); bin = run->bin; size_t binind = arena_bin_index(arena, bin); arena_bin_info_t *bin_info = &arena_bin_info[binind]; @@ -2019,7 +2008,6 @@ arena_ralloc_large(void *ptr, size_t oldsize, size_t size, size_t extra, chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); arena = chunk->arena; - assert(arena->magic == ARENA_MAGIC); if (psize < oldsize) { /* Fill before shrinking in order avoid a race. */ @@ -2183,9 +2171,6 @@ arena_new(arena_t *arena, unsigned ind) memset(&bin->stats, 0, sizeof(malloc_bin_stats_t)); } - if (config_debug) - arena->magic = ARENA_MAGIC; - return (false); } diff --git a/src/ckh.c b/src/ckh.c index f7eaa78b..4a6d1e37 100644 --- a/src/ckh.c +++ b/src/ckh.c @@ -73,7 +73,6 @@ ckh_isearch(ckh_t *ckh, const void *key) size_t hash1, hash2, bucket, cell; assert(ckh != NULL); - assert(ckh->magic == CKH_MAGIC); ckh->hash(key, ckh->lg_curbuckets, &hash1, &hash2); @@ -394,9 +393,6 @@ ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp) goto RETURN; } - if (config_debug) - ckh->magic = CKH_MAGIC; - ret = false; RETURN: return (ret); @@ -407,7 +403,6 @@ ckh_delete(ckh_t *ckh) { assert(ckh != NULL); - assert(ckh->magic == CKH_MAGIC); #ifdef CKH_VERBOSE malloc_printf( @@ -432,7 +427,6 @@ ckh_count(ckh_t *ckh) { assert(ckh != NULL); - assert(ckh->magic == CKH_MAGIC); return (ckh->count); } @@ -463,7 +457,6 @@ ckh_insert(ckh_t *ckh, const void *key, const void *data) bool ret; assert(ckh != NULL); - assert(ckh->magic == CKH_MAGIC); assert(ckh_search(ckh, key, NULL, NULL)); #ifdef CKH_COUNT @@ -488,7 +481,6 @@ ckh_remove(ckh_t *ckh, const void *searchkey, void **key, void **data) size_t cell; assert(ckh != NULL); - assert(ckh->magic == CKH_MAGIC); cell = ckh_isearch(ckh, searchkey); if (cell != SIZE_T_MAX) { @@ -520,7 +512,6 @@ ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data) size_t cell; assert(ckh != NULL); - assert(ckh->magic == CKH_MAGIC); cell = ckh_isearch(ckh, searchkey); if (cell != SIZE_T_MAX) { From 4162627757889ea999264c2ddbc3c354768774e2 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 13 Feb 2012 10:56:17 -0800 Subject: [PATCH 0044/3378] Remove the swap feature. Remove the swap feature, which enabled per application swap files. In practice this feature has not proven itself useful to users. --- INSTALL | 5 - Makefile.in | 10 +- configure.ac | 17 - doc/jemalloc.xml.in | 94 +--- include/jemalloc/internal/chunk.h | 2 - include/jemalloc/internal/chunk_swap.h | 30 -- include/jemalloc/internal/ctl.h | 1 - .../jemalloc/internal/jemalloc_internal.h.in | 7 - include/jemalloc/internal/private_namespace.h | 5 - include/jemalloc/jemalloc_defs.h.in | 3 - src/arena.c | 5 +- src/chunk.c | 26 +- src/chunk_swap.c | 403 ------------------ src/ctl.c | 111 +---- src/huge.c | 10 +- src/jemalloc.c | 9 - src/stats.c | 26 +- 17 files changed, 26 insertions(+), 738 deletions(-) delete mode 100644 include/jemalloc/internal/chunk_swap.h delete mode 100644 src/chunk_swap.c diff --git a/INSTALL b/INSTALL index 2a1e469c..fa32d057 100644 --- a/INSTALL +++ b/INSTALL @@ -102,11 +102,6 @@ any of the following arguments (not a definitive list) to 'configure': released in bulk, thus reducing the total number of mutex operations. See the "opt.tcache" option for usage details. ---enable-swap - Enable mmap()ed swap file support. When this feature is built in, it is - possible to specify one or more files that act as backing store. This - effectively allows for per application swap files. - --enable-dss Enable support for page allocation/deallocation via sbrk(2), in addition to mmap(2). diff --git a/Makefile.in b/Makefile.in index 82983892..ca4365e8 100644 --- a/Makefile.in +++ b/Makefile.in @@ -47,14 +47,14 @@ CHDRS := @objroot@include/jemalloc/jemalloc@install_suffix@.h \ CSRCS := @srcroot@src/jemalloc.c @srcroot@src/arena.c @srcroot@src/atomic.c \ @srcroot@src/base.c @srcroot@src/bitmap.c @srcroot@src/chunk.c \ @srcroot@src/chunk_dss.c @srcroot@src/chunk_mmap.c \ - @srcroot@src/chunk_swap.c @srcroot@src/ckh.c @srcroot@src/ctl.c \ - @srcroot@src/extent.c @srcroot@src/hash.c @srcroot@src/huge.c \ - @srcroot@src/mb.c @srcroot@src/mutex.c @srcroot@src/prof.c \ - @srcroot@src/rtree.c @srcroot@src/stats.c @srcroot@src/tcache.c + @srcroot@src/ckh.c @srcroot@src/ctl.c @srcroot@src/extent.c \ + @srcroot@src/hash.c @srcroot@src/huge.c @srcroot@src/mb.c \ + @srcroot@src/mutex.c @srcroot@src/prof.c @srcroot@src/rtree.c \ + @srcroot@src/stats.c @srcroot@src/tcache.c ifeq (macho, @abi@) CSRCS += @srcroot@src/zone.c endif -STATIC_LIBS := @objroot@lib/libjemalloc@install_suffix@.a +STATIC_LIBS := @objroot@lib/libjemalloc@install_suffix@.a DSOS := @objroot@lib/libjemalloc@install_suffix@.$(SO).$(REV) \ @objroot@lib/libjemalloc@install_suffix@.$(SO) \ @objroot@lib/libjemalloc@install_suffix@_pic.a diff --git a/configure.ac b/configure.ac index 9617a5e3..cbcefdf3 100644 --- a/configure.ac +++ b/configure.ac @@ -592,22 +592,6 @@ if test "x$enable_tcache" = "x1" ; then fi AC_SUBST([enable_tcache]) -dnl Do not enable mmap()ped swap files by default. -AC_ARG_ENABLE([swap], - [AS_HELP_STRING([--enable-swap], [Enable mmap()ped swap files])], -[if test "x$enable_swap" = "xno" ; then - enable_swap="0" -else - enable_swap="1" -fi -], -[enable_swap="0"] -) -if test "x$enable_swap" = "x1" ; then - AC_DEFINE([JEMALLOC_SWAP], [ ]) -fi -AC_SUBST([enable_swap]) - dnl Do not enable allocation from DSS by default. AC_ARG_ENABLE([dss], [AS_HELP_STRING([--enable-dss], [Enable allocation from DSS])], @@ -955,7 +939,6 @@ AC_MSG_RESULT([tcache : ${enable_tcache}]) AC_MSG_RESULT([fill : ${enable_fill}]) AC_MSG_RESULT([xmalloc : ${enable_xmalloc}]) AC_MSG_RESULT([sysv : ${enable_sysv}]) -AC_MSG_RESULT([swap : ${enable_swap}]) AC_MSG_RESULT([dss : ${enable_dss}]) AC_MSG_RESULT([dynamic_page_shift : ${enable_dynamic_page_shift}]) AC_MSG_RESULT([lazy_lock : ${enable_lazy_lock}]) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 7a32879a..dc11642f 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -660,16 +660,6 @@ for (i = 0; i < nbins; i++) { build configuration. - - - config.swap - (bool) - r- - - was specified during - build configuration. - - config.sysv @@ -1118,25 +1108,6 @@ malloc_conf = "xmalloc:true";]]> by default. - - - opt.overcommit - (bool) - r- - [] - - Over-commit enabled/disabled. If enabled, over-commit - memory as a side effect of using anonymous - mmap - 2 or - sbrk - 2 for virtual memory allocation. - In order for overcommit to be disabled, the swap.fds mallctl must have - been successfully written to. This option is enabled by - default. - - tcache.flush @@ -1590,8 +1561,7 @@ malloc_conf = "xmalloc:true";]]> application. This is a multiple of the chunk size, and is at least as large as stats.active. This - does not include inactive chunks backed by swap files. his does not - include inactive chunks embedded in the DSS. + does not include inactive chunks embedded in the DSS. @@ -1602,8 +1572,7 @@ malloc_conf = "xmalloc:true";]]> [] Total number of chunks actively mapped on behalf of the - application. This does not include inactive chunks backed by swap - files. This does not include inactive chunks embedded in the DSS. + application. This does not include inactive chunks embedded in the DSS. @@ -1983,65 +1952,6 @@ malloc_conf = "xmalloc:true";]]> Current number of runs for this size class. - - - - swap.avail - (size_t) - r- - [] - - Number of swap file bytes that are currently not - associated with any chunk (i.e. mapped, but otherwise completely - unmanaged). - - - - - swap.prezeroed - (bool) - rw - [] - - If true, the allocator assumes that the swap file(s) - contain nothing but nil bytes. If this assumption is violated, - allocator behavior is undefined. This value becomes read-only after - swap.fds is - successfully written to. - - - - - swap.nfds - (size_t) - r- - [] - - Number of file descriptors in use for swap. - - - - - - swap.fds - (int *) - rw - [] - - When written to, the files associated with the - specified file descriptors are contiguously mapped via - mmap - 2. The resulting virtual memory - region is preferred over anonymous - mmap - 2 and - sbrk - 2 memory. Note that if a file's - size is not a multiple of the page size, it is automatically truncated - to the nearest page size multiple. See the - swap.prezeroed - mallctl for specifying that the files are pre-zeroed. - diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h index 4cc1e80e..9a62ba18 100644 --- a/include/jemalloc/internal/chunk.h +++ b/include/jemalloc/internal/chunk.h @@ -28,7 +28,6 @@ #ifdef JEMALLOC_H_EXTERNS extern size_t opt_lg_chunk; -extern bool opt_overcommit; /* Protects stats_chunks; currently not used for any other purpose. */ extern malloc_mutex_t chunks_mtx; @@ -54,6 +53,5 @@ bool chunk_boot(void); #endif /* JEMALLOC_H_INLINES */ /******************************************************************************/ -#include "jemalloc/internal/chunk_swap.h" #include "jemalloc/internal/chunk_dss.h" #include "jemalloc/internal/chunk_mmap.h" diff --git a/include/jemalloc/internal/chunk_swap.h b/include/jemalloc/internal/chunk_swap.h deleted file mode 100644 index 99a079eb..00000000 --- a/include/jemalloc/internal/chunk_swap.h +++ /dev/null @@ -1,30 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -extern malloc_mutex_t swap_mtx; -extern bool swap_enabled; -extern bool swap_prezeroed; -extern size_t swap_nfds; -extern int *swap_fds; -extern size_t swap_avail; - -void *chunk_alloc_swap(size_t size, bool *zero); -bool chunk_in_swap(void *chunk); -bool chunk_dealloc_swap(void *chunk, size_t size); -bool chunk_swap_enable(const int *fds, unsigned nfds, bool prezeroed); -bool chunk_swap_boot(void); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h index 31f9d99b..de4b9412 100644 --- a/include/jemalloc/internal/ctl.h +++ b/include/jemalloc/internal/ctl.h @@ -59,7 +59,6 @@ struct ctl_stats_s { uint64_t ndalloc; /* huge_ndalloc */ } huge; ctl_arena_stats_t *arenas; /* (narenas + 1) elements. */ - size_t swap_avail; }; #endif /* JEMALLOC_H_STRUCTS */ diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 3193d35e..44415370 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -104,13 +104,6 @@ static const bool config_stats = false #endif ; -static const bool config_swap = -#ifdef JEMALLOC_SWAP - true -#else - false -#endif - ; static const bool config_sysv = #ifdef JEMALLOC_SYSV true diff --git a/include/jemalloc/internal/private_namespace.h b/include/jemalloc/internal/private_namespace.h index d4f5f96d..db2192e6 100644 --- a/include/jemalloc/internal/private_namespace.h +++ b/include/jemalloc/internal/private_namespace.h @@ -48,18 +48,13 @@ #define chunk_alloc_dss JEMALLOC_N(chunk_alloc_dss) #define chunk_alloc_mmap JEMALLOC_N(chunk_alloc_mmap) #define chunk_alloc_mmap_noreserve JEMALLOC_N(chunk_alloc_mmap_noreserve) -#define chunk_alloc_swap JEMALLOC_N(chunk_alloc_swap) #define chunk_boot JEMALLOC_N(chunk_boot) #define chunk_dealloc JEMALLOC_N(chunk_dealloc) #define chunk_dealloc_dss JEMALLOC_N(chunk_dealloc_dss) #define chunk_dealloc_mmap JEMALLOC_N(chunk_dealloc_mmap) -#define chunk_dealloc_swap JEMALLOC_N(chunk_dealloc_swap) #define chunk_dss_boot JEMALLOC_N(chunk_dss_boot) #define chunk_in_dss JEMALLOC_N(chunk_in_dss) -#define chunk_in_swap JEMALLOC_N(chunk_in_swap) #define chunk_mmap_boot JEMALLOC_N(chunk_mmap_boot) -#define chunk_swap_boot JEMALLOC_N(chunk_swap_boot) -#define chunk_swap_enable JEMALLOC_N(chunk_swap_enable) #define ckh_bucket_search JEMALLOC_N(ckh_bucket_search) #define ckh_count JEMALLOC_N(ckh_count) #define ckh_delete JEMALLOC_N(ckh_delete) diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index d8052e2b..f78028b0 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -98,9 +98,6 @@ */ #undef JEMALLOC_DSS -/* JEMALLOC_SWAP enables mmap()ed swap file support. */ -#undef JEMALLOC_SWAP - /* Support memory filling (junk/zero). */ #undef JEMALLOC_FILL diff --git a/src/arena.c b/src/arena.c index 4ada6a37..c2632d97 100644 --- a/src/arena.c +++ b/src/arena.c @@ -671,10 +671,11 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) * madvise(..., MADV_DONTNEED) results in zero-filled pages for anonymous * mappings, but not for file-backed mappings. */ - (config_swap && swap_enabled) ? CHUNK_MAP_UNZEROED : 0; + 0 #else - CHUNK_MAP_UNZEROED; + CHUNK_MAP_UNZEROED #endif + ; /* * If chunk is the spare, temporarily re-allocate it, 1) so that its diff --git a/src/chunk.c b/src/chunk.c index 57ab20d8..b9086509 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -5,7 +5,6 @@ /* Data. */ size_t opt_lg_chunk = LG_CHUNK_DEFAULT; -bool opt_overcommit = true; malloc_mutex_t chunks_mtx; chunk_stats_t stats_chunks; @@ -35,23 +34,15 @@ chunk_alloc(size_t size, bool base, bool *zero) assert(size != 0); assert((size & chunksize_mask) == 0); - if (config_swap && swap_enabled) { - ret = chunk_alloc_swap(size, zero); + if (config_dss) { + ret = chunk_alloc_dss(size, zero); if (ret != NULL) goto RETURN; } - - if (swap_enabled == false || opt_overcommit) { - if (config_dss) { - ret = chunk_alloc_dss(size, zero); - if (ret != NULL) - goto RETURN; - } - ret = chunk_alloc_mmap(size); - if (ret != NULL) { - *zero = true; - goto RETURN; - } + ret = chunk_alloc_mmap(size); + if (ret != NULL) { + *zero = true; + goto RETURN; } /* All strategies for allocation failed. */ @@ -102,9 +93,6 @@ chunk_dealloc(void *chunk, size_t size, bool unmap) } if (unmap) { - if (config_swap && swap_enabled && chunk_dealloc_swap(chunk, - size) == false) - return; if (config_dss && chunk_dealloc_dss(chunk, size) == false) return; chunk_dealloc_mmap(chunk, size); @@ -126,8 +114,6 @@ chunk_boot(void) return (true); memset(&stats_chunks, 0, sizeof(chunk_stats_t)); } - if (config_swap && chunk_swap_boot()) - return (true); if (chunk_mmap_boot()) return (true); if (config_dss && chunk_dss_boot()) diff --git a/src/chunk_swap.c b/src/chunk_swap.c deleted file mode 100644 index fe9ca303..00000000 --- a/src/chunk_swap.c +++ /dev/null @@ -1,403 +0,0 @@ -#define JEMALLOC_CHUNK_SWAP_C_ -#include "jemalloc/internal/jemalloc_internal.h" - -/******************************************************************************/ -/* Data. */ - -malloc_mutex_t swap_mtx; -bool swap_enabled; -bool swap_prezeroed; -size_t swap_nfds; -int *swap_fds; -size_t swap_avail; - -/* Base address of the mmap()ed file(s). */ -static void *swap_base; -/* Current end of the space in use (<= swap_max). */ -static void *swap_end; -/* Absolute upper limit on file-backed addresses. */ -static void *swap_max; - -/* - * Trees of chunks that were previously allocated (trees differ only in node - * ordering). These are used when allocating chunks, in an attempt to re-use - * address space. Depending on function, different tree orderings are needed, - * which is why there are two trees with the same contents. - */ -static extent_tree_t swap_chunks_szad; -static extent_tree_t swap_chunks_ad; - -/******************************************************************************/ -/* Function prototypes for non-inline static functions. */ - -static void *chunk_recycle_swap(size_t size, bool *zero); -static extent_node_t *chunk_dealloc_swap_record(void *chunk, size_t size); - -/******************************************************************************/ - -static void * -chunk_recycle_swap(size_t size, bool *zero) -{ - extent_node_t *node, key; - - cassert(config_swap); - - key.addr = NULL; - key.size = size; - malloc_mutex_lock(&swap_mtx); - node = extent_tree_szad_nsearch(&swap_chunks_szad, &key); - if (node != NULL) { - void *ret = node->addr; - - /* Remove node from the tree. */ - extent_tree_szad_remove(&swap_chunks_szad, node); - if (node->size == size) { - extent_tree_ad_remove(&swap_chunks_ad, node); - base_node_dealloc(node); - } else { - /* - * Insert the remainder of node's address range as a - * smaller chunk. Its position within swap_chunks_ad - * does not change. - */ - assert(node->size > size); - node->addr = (void *)((uintptr_t)node->addr + size); - node->size -= size; - extent_tree_szad_insert(&swap_chunks_szad, node); - } - if (config_stats) - swap_avail -= size; - malloc_mutex_unlock(&swap_mtx); - - if (*zero) - memset(ret, 0, size); - return (ret); - } - malloc_mutex_unlock(&swap_mtx); - - return (NULL); -} - -void * -chunk_alloc_swap(size_t size, bool *zero) -{ - void *ret; - - cassert(config_swap); - assert(swap_enabled); - - ret = chunk_recycle_swap(size, zero); - if (ret != NULL) - return (ret); - - malloc_mutex_lock(&swap_mtx); - if ((uintptr_t)swap_end + size <= (uintptr_t)swap_max) { - ret = swap_end; - swap_end = (void *)((uintptr_t)swap_end + size); - if (config_stats) - swap_avail -= size; - malloc_mutex_unlock(&swap_mtx); - - if (swap_prezeroed) - *zero = true; - else if (*zero) - memset(ret, 0, size); - } else { - malloc_mutex_unlock(&swap_mtx); - return (NULL); - } - - return (ret); -} - -static extent_node_t * -chunk_dealloc_swap_record(void *chunk, size_t size) -{ - extent_node_t *xnode, *node, *prev, key; - - cassert(config_swap); - - xnode = NULL; - while (true) { - key.addr = (void *)((uintptr_t)chunk + size); - node = extent_tree_ad_nsearch(&swap_chunks_ad, &key); - /* Try to coalesce forward. */ - if (node != NULL && node->addr == key.addr) { - /* - * Coalesce chunk with the following address range. - * This does not change the position within - * swap_chunks_ad, so only remove/insert from/into - * swap_chunks_szad. - */ - extent_tree_szad_remove(&swap_chunks_szad, node); - node->addr = chunk; - node->size += size; - extent_tree_szad_insert(&swap_chunks_szad, node); - break; - } else if (xnode == NULL) { - /* - * It is possible that base_node_alloc() will cause a - * new base chunk to be allocated, so take care not to - * deadlock on swap_mtx, and recover if another thread - * deallocates an adjacent chunk while this one is busy - * allocating xnode. - */ - malloc_mutex_unlock(&swap_mtx); - xnode = base_node_alloc(); - malloc_mutex_lock(&swap_mtx); - if (xnode == NULL) - return (NULL); - } else { - /* Coalescing forward failed, so insert a new node. */ - node = xnode; - xnode = NULL; - node->addr = chunk; - node->size = size; - extent_tree_ad_insert(&swap_chunks_ad, node); - extent_tree_szad_insert(&swap_chunks_szad, node); - break; - } - } - /* Discard xnode if it ended up unused do to a race. */ - if (xnode != NULL) - base_node_dealloc(xnode); - - /* Try to coalesce backward. */ - prev = extent_tree_ad_prev(&swap_chunks_ad, node); - if (prev != NULL && (void *)((uintptr_t)prev->addr + prev->size) == - chunk) { - /* - * Coalesce chunk with the previous address range. This does - * not change the position within swap_chunks_ad, so only - * remove/insert node from/into swap_chunks_szad. - */ - extent_tree_szad_remove(&swap_chunks_szad, prev); - extent_tree_ad_remove(&swap_chunks_ad, prev); - - extent_tree_szad_remove(&swap_chunks_szad, node); - node->addr = prev->addr; - node->size += prev->size; - extent_tree_szad_insert(&swap_chunks_szad, node); - - base_node_dealloc(prev); - } - - return (node); -} - -bool -chunk_in_swap(void *chunk) -{ - bool ret; - - cassert(config_swap); - assert(swap_enabled); - - malloc_mutex_lock(&swap_mtx); - if ((uintptr_t)chunk >= (uintptr_t)swap_base - && (uintptr_t)chunk < (uintptr_t)swap_max) - ret = true; - else - ret = false; - malloc_mutex_unlock(&swap_mtx); - - return (ret); -} - -bool -chunk_dealloc_swap(void *chunk, size_t size) -{ - bool ret; - - cassert(config_swap); - assert(swap_enabled); - - malloc_mutex_lock(&swap_mtx); - if ((uintptr_t)chunk >= (uintptr_t)swap_base - && (uintptr_t)chunk < (uintptr_t)swap_max) { - extent_node_t *node; - - /* Try to coalesce with other unused chunks. */ - node = chunk_dealloc_swap_record(chunk, size); - if (node != NULL) { - chunk = node->addr; - size = node->size; - } - - /* - * Try to shrink the in-use memory if this chunk is at the end - * of the in-use memory. - */ - if ((void *)((uintptr_t)chunk + size) == swap_end) { - swap_end = (void *)((uintptr_t)swap_end - size); - - if (node != NULL) { - extent_tree_szad_remove(&swap_chunks_szad, - node); - extent_tree_ad_remove(&swap_chunks_ad, node); - base_node_dealloc(node); - } - } else - madvise(chunk, size, MADV_DONTNEED); - - if (config_stats) - swap_avail += size; - ret = false; - goto RETURN; - } - - ret = true; -RETURN: - malloc_mutex_unlock(&swap_mtx); - return (ret); -} - -bool -chunk_swap_enable(const int *fds, unsigned nfds, bool prezeroed) -{ - bool ret; - unsigned i; - off_t off; - void *vaddr; - size_t cumsize, voff; - size_t sizes[nfds]; - - cassert(config_swap); - - malloc_mutex_lock(&swap_mtx); - - /* Get file sizes. */ - for (i = 0, cumsize = 0; i < nfds; i++) { - off = lseek(fds[i], 0, SEEK_END); - if (off == ((off_t)-1)) { - ret = true; - goto RETURN; - } - if (PAGE_CEILING(off) != off) { - /* Truncate to a multiple of the page size. */ - off &= ~PAGE_MASK; - if (ftruncate(fds[i], off) != 0) { - ret = true; - goto RETURN; - } - } - sizes[i] = off; - if (cumsize + off < cumsize) { - /* - * Cumulative file size is greater than the total - * address space. Bail out while it's still obvious - * what the problem is. - */ - ret = true; - goto RETURN; - } - cumsize += off; - } - - /* Round down to a multiple of the chunk size. */ - cumsize &= ~chunksize_mask; - if (cumsize == 0) { - ret = true; - goto RETURN; - } - - /* - * Allocate a chunk-aligned region of anonymous memory, which will - * be the final location for the memory-mapped files. - */ - vaddr = chunk_alloc_mmap_noreserve(cumsize); - if (vaddr == NULL) { - ret = true; - goto RETURN; - } - - /* Overlay the files onto the anonymous mapping. */ - for (i = 0, voff = 0; i < nfds; i++) { - void *addr = mmap((void *)((uintptr_t)vaddr + voff), sizes[i], - PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, fds[i], 0); - if (addr == MAP_FAILED) { - char buf[BUFERROR_BUF]; - - - buferror(errno, buf, sizeof(buf)); - malloc_write( - ": Error in mmap(..., MAP_FIXED, ...): "); - malloc_write(buf); - malloc_write("\n"); - if (opt_abort) - abort(); - if (munmap(vaddr, voff) == -1) { - buferror(errno, buf, sizeof(buf)); - malloc_write(": Error in munmap(): "); - malloc_write(buf); - malloc_write("\n"); - } - ret = true; - goto RETURN; - } - assert(addr == (void *)((uintptr_t)vaddr + voff)); - - /* - * Tell the kernel that the mapping will be accessed randomly, - * and that it should not gratuitously sync pages to the - * filesystem. - */ -#ifdef MADV_RANDOM - madvise(addr, sizes[i], MADV_RANDOM); -#endif -#ifdef MADV_NOSYNC - madvise(addr, sizes[i], MADV_NOSYNC); -#endif - - voff += sizes[i]; - } - - swap_prezeroed = prezeroed; - swap_base = vaddr; - swap_end = swap_base; - swap_max = (void *)((uintptr_t)vaddr + cumsize); - - /* Copy the fds array for mallctl purposes. */ - swap_fds = (int *)base_alloc(nfds * sizeof(int)); - if (swap_fds == NULL) { - ret = true; - goto RETURN; - } - memcpy(swap_fds, fds, nfds * sizeof(int)); - swap_nfds = nfds; - - if (config_stats) - swap_avail = cumsize; - - swap_enabled = true; - - ret = false; -RETURN: - malloc_mutex_unlock(&swap_mtx); - return (ret); -} - -bool -chunk_swap_boot(void) -{ - - cassert(config_swap); - - if (malloc_mutex_init(&swap_mtx)) - return (true); - - swap_enabled = false; - swap_prezeroed = false; /* swap.* mallctl's depend on this. */ - swap_nfds = 0; - swap_fds = NULL; - if (config_stats) - swap_avail = 0; - swap_base = NULL; - swap_end = NULL; - swap_max = NULL; - - extent_tree_szad_new(&swap_chunks_szad); - extent_tree_ad_new(&swap_chunks_ad); - - return (false); -} diff --git a/src/ctl.c b/src/ctl.c index 05be4317..2ac2f66e 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -8,8 +8,6 @@ * ctl_mtx protects the following: * - ctl_stats.* * - opt_prof_active - * - swap_enabled - * - swap_prezeroed */ static malloc_mutex_t ctl_mtx; static bool ctl_initialized; @@ -56,7 +54,6 @@ CTL_PROTO(config_prof) CTL_PROTO(config_prof_libgcc) CTL_PROTO(config_prof_libunwind) CTL_PROTO(config_stats) -CTL_PROTO(config_swap) CTL_PROTO(config_sysv) CTL_PROTO(config_tcache) CTL_PROTO(config_tiny) @@ -85,7 +82,6 @@ CTL_PROTO(opt_prof_gdump) CTL_PROTO(opt_prof_leak) CTL_PROTO(opt_prof_accum) CTL_PROTO(opt_lg_prof_tcmax) -CTL_PROTO(opt_overcommit) CTL_PROTO(arenas_bin_i_size) CTL_PROTO(arenas_bin_i_nregs) CTL_PROTO(arenas_bin_i_run_size) @@ -162,10 +158,6 @@ CTL_PROTO(stats_cactive) CTL_PROTO(stats_allocated) CTL_PROTO(stats_active) CTL_PROTO(stats_mapped) -CTL_PROTO(swap_avail) -CTL_PROTO(swap_prezeroed) -CTL_PROTO(swap_nfds) -CTL_PROTO(swap_fds) /******************************************************************************/ /* mallctl tree. */ @@ -205,7 +197,6 @@ static const ctl_node_t config_node[] = { {NAME("prof_libgcc"), CTL(config_prof_libgcc)}, {NAME("prof_libunwind"), CTL(config_prof_libunwind)}, {NAME("stats"), CTL(config_stats)}, - {NAME("swap"), CTL(config_swap)}, {NAME("sysv"), CTL(config_sysv)}, {NAME("tcache"), CTL(config_tcache)}, {NAME("tiny"), CTL(config_tiny)}, @@ -236,8 +227,7 @@ static const ctl_node_t opt_node[] = { {NAME("prof_gdump"), CTL(opt_prof_gdump)}, {NAME("prof_leak"), CTL(opt_prof_leak)}, {NAME("prof_accum"), CTL(opt_prof_accum)}, - {NAME("lg_prof_tcmax"), CTL(opt_lg_prof_tcmax)}, - {NAME("overcommit"), CTL(opt_overcommit)} + {NAME("lg_prof_tcmax"), CTL(opt_lg_prof_tcmax)} }; static const ctl_node_t arenas_bin_i_node[] = { @@ -391,13 +381,6 @@ static const ctl_node_t stats_node[] = { {NAME("arenas"), CHILD(stats_arenas)} }; -static const ctl_node_t swap_node[] = { - {NAME("avail"), CTL(swap_avail)}, - {NAME("prezeroed"), CTL(swap_prezeroed)}, - {NAME("nfds"), CTL(swap_nfds)}, - {NAME("fds"), CTL(swap_fds)} -}; - static const ctl_node_t root_node[] = { {NAME("version"), CTL(version)}, {NAME("epoch"), CTL(epoch)}, @@ -408,8 +391,6 @@ static const ctl_node_t root_node[] = { {NAME("arenas"), CHILD(arenas)}, {NAME("prof"), CHILD(prof)}, {NAME("stats"), CHILD(stats)} - , - {NAME("swap"), CHILD(swap)} }; static const ctl_node_t super_root_node[] = { {NAME(""), CHILD(root)} @@ -597,12 +578,6 @@ ctl_refresh(void) ctl_stats.active = (ctl_stats.arenas[narenas].pactive << PAGE_SHIFT) + ctl_stats.huge.allocated; ctl_stats.mapped = (ctl_stats.chunks.current << opt_lg_chunk); - - if (config_swap) { - malloc_mutex_lock(&swap_mtx); - ctl_stats.swap_avail = swap_avail; - malloc_mutex_unlock(&swap_mtx); - } } ctl_epoch++; @@ -1138,7 +1113,6 @@ CTL_RO_BOOL_CONFIG_GEN(config_prof) CTL_RO_BOOL_CONFIG_GEN(config_prof_libgcc) CTL_RO_BOOL_CONFIG_GEN(config_prof_libunwind) CTL_RO_BOOL_CONFIG_GEN(config_stats) -CTL_RO_BOOL_CONFIG_GEN(config_swap) CTL_RO_BOOL_CONFIG_GEN(config_sysv) CTL_RO_BOOL_CONFIG_GEN(config_tcache) CTL_RO_BOOL_CONFIG_GEN(config_tiny) @@ -1171,7 +1145,6 @@ CTL_RO_NL_CGEN(config_prof, opt_prof_gdump, opt_prof_gdump, bool) CTL_RO_NL_CGEN(config_prof, opt_prof_leak, opt_prof_leak, bool) CTL_RO_NL_CGEN(config_prof, opt_prof_accum, opt_prof_accum, bool) CTL_RO_NL_CGEN(config_prof, opt_lg_prof_tcmax, opt_lg_prof_tcmax, ssize_t) -CTL_RO_NL_CGEN(config_swap, opt_overcommit, opt_overcommit, bool) /******************************************************************************/ @@ -1450,85 +1423,3 @@ CTL_RO_CGEN(config_stats, stats_cactive, &stats_cactive, size_t *) CTL_RO_CGEN(config_stats, stats_allocated, ctl_stats.allocated, size_t) CTL_RO_CGEN(config_stats, stats_active, ctl_stats.active, size_t) CTL_RO_CGEN(config_stats, stats_mapped, ctl_stats.mapped, size_t) - -/******************************************************************************/ - -CTL_RO_CGEN(config_swap && config_stats, swap_avail, ctl_stats.swap_avail, - size_t) - -static int -swap_prezeroed_ctl(const size_t *mib, size_t miblen, void *oldp, - size_t *oldlenp, void *newp, size_t newlen) -{ - int ret; - - if (config_swap == false) - return (ENOENT); - - malloc_mutex_lock(&ctl_mtx); - if (swap_enabled) { - READONLY(); - } else { - /* - * swap_prezeroed isn't actually used by the swap code until it - * is set during a successful chunk_swap_enabled() call. We - * use it here to store the value that we'll pass to - * chunk_swap_enable() in a swap.fds mallctl(). This is not - * very clean, but the obvious alternatives are even worse. - */ - WRITE(swap_prezeroed, bool); - } - - READ(swap_prezeroed, bool); - - ret = 0; -RETURN: - malloc_mutex_unlock(&ctl_mtx); - return (ret); -} - -CTL_RO_CGEN(config_swap, swap_nfds, swap_nfds, size_t) - -static int -swap_fds_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, - void *newp, size_t newlen) -{ - int ret; - - if (config_swap == false) - return (ENOENT); - - malloc_mutex_lock(&ctl_mtx); - if (swap_enabled) { - READONLY(); - } else if (newp != NULL) { - size_t nfds = newlen / sizeof(int); - - { - int fds[nfds]; - - memcpy(fds, newp, nfds * sizeof(int)); - if (chunk_swap_enable(fds, nfds, swap_prezeroed)) { - ret = EFAULT; - goto RETURN; - } - } - } - - if (oldp != NULL && oldlenp != NULL) { - if (*oldlenp != swap_nfds * sizeof(int)) { - size_t copylen = (swap_nfds * sizeof(int) <= *oldlenp) - ? swap_nfds * sizeof(int) : *oldlenp; - - memcpy(oldp, swap_fds, copylen); - ret = EINVAL; - goto RETURN; - } else - memcpy(oldp, swap_fds, *oldlenp); - } - - ret = 0; -RETURN: - malloc_mutex_unlock(&ctl_mtx); - return (ret); -} diff --git a/src/huge.c b/src/huge.c index 1eee436e..f2fba869 100644 --- a/src/huge.c +++ b/src/huge.c @@ -212,13 +212,11 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, /* * Use mremap(2) if this is a huge-->huge reallocation, and neither the - * source nor the destination are in swap or dss. + * source nor the destination are in dss. */ #ifdef JEMALLOC_MREMAP_FIXED - if (oldsize >= chunksize && (config_swap == false || swap_enabled == - false || (chunk_in_swap(ptr) == false && chunk_in_swap(ret) == - false)) && (config_dss == false || (chunk_in_dss(ptr) == false && - chunk_in_dss(ret) == false))) { + if (oldsize >= chunksize && (config_dss == false || (chunk_in_dss(ptr) + == false && chunk_in_dss(ret) == false))) { size_t newsize = huge_salloc(ret); /* @@ -280,7 +278,7 @@ huge_dalloc(void *ptr, bool unmap) malloc_mutex_unlock(&huge_mtx); - if (unmap && config_fill && (config_swap || config_dss) && opt_junk) + if (unmap && config_fill && config_dss && opt_junk) memset(node->addr, 0x5a, node->size); chunk_dealloc(node->addr, node->size, unmap); diff --git a/src/jemalloc.c b/src/jemalloc.c index 9e1814de..a32ce1a7 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -610,9 +610,6 @@ malloc_conf_init(void) CONF_HANDLE_BOOL(prof_gdump) CONF_HANDLE_BOOL(prof_leak) } - if (config_swap) { - CONF_HANDLE_BOOL(overcommit) - } malloc_conf_error("Invalid conf pair", k, klen, v, vlen); #undef CONF_HANDLE_BOOL @@ -1629,9 +1626,6 @@ jemalloc_prefork(void) if (config_dss) malloc_mutex_lock(&dss_mtx); - - if (config_swap) - malloc_mutex_lock(&swap_mtx); } void @@ -1641,9 +1635,6 @@ jemalloc_postfork(void) /* Release all mutexes, now that fork() has completed. */ - if (config_swap) - malloc_mutex_unlock(&swap_mtx); - if (config_dss) malloc_mutex_unlock(&dss_mtx); diff --git a/src/stats.c b/src/stats.c index e6446530..ad8cd13d 100644 --- a/src/stats.c +++ b/src/stats.c @@ -525,7 +525,6 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, OPT_WRITE_SSIZE_T(lg_prof_interval) OPT_WRITE_BOOL(prof_gdump) OPT_WRITE_BOOL(prof_leak) - OPT_WRITE_BOOL(overcommit) #undef OPT_WRITE_BOOL #undef OPT_WRITE_SIZE_T @@ -668,11 +667,10 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, } if (config_stats) { - int err; size_t sszp, ssz; size_t *cactive; size_t allocated, active, mapped; - size_t chunks_current, chunks_high, swap_avail; + size_t chunks_current, chunks_high; uint64_t chunks_total; size_t huge_allocated; uint64_t huge_nmalloc, huge_ndalloc; @@ -694,24 +692,10 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, CTL_GET("stats.chunks.total", &chunks_total, uint64_t); CTL_GET("stats.chunks.high", &chunks_high, size_t); CTL_GET("stats.chunks.current", &chunks_current, size_t); - if ((err = JEMALLOC_P(mallctl)("swap.avail", &swap_avail, &ssz, - NULL, 0)) == 0) { - size_t lg_chunk; - - malloc_cprintf(write_cb, cbopaque, "chunks: nchunks " - "highchunks curchunks swap_avail\n"); - CTL_GET("opt.lg_chunk", &lg_chunk, size_t); - malloc_cprintf(write_cb, cbopaque, - " %13"PRIu64"%13zu%13zu%13zu\n", - chunks_total, chunks_high, chunks_current, - swap_avail << lg_chunk); - } else { - malloc_cprintf(write_cb, cbopaque, "chunks: nchunks " - "highchunks curchunks\n"); - malloc_cprintf(write_cb, cbopaque, - " %13"PRIu64"%13zu%13zu\n", - chunks_total, chunks_high, chunks_current); - } + malloc_cprintf(write_cb, cbopaque, "chunks: nchunks " + "highchunks curchunks\n"); + malloc_cprintf(write_cb, cbopaque, " %13"PRIu64"%13zu%13zu\n", + chunks_total, chunks_high, chunks_current); /* Print huge stats. */ CTL_GET("stats.huge.nmalloc", &huge_nmalloc, uint64_t); From 962463d9b57bcc65de2fa108a691b4183b9b2faf Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 13 Feb 2012 12:29:49 -0800 Subject: [PATCH 0045/3378] Streamline tcache-related malloc/free fast paths. tcache_get() is inlined, so do the config_tcache check inside tcache_get() and simplify its callers. Make arena_malloc() an inline function, since it is part of the malloc() fast path. Remove conditional logic that cause build issues if --disable-tcache was specified. --- include/jemalloc/internal/arena.h | 50 +++++++++++++++++------------- include/jemalloc/internal/tcache.h | 8 +++-- src/arena.c | 29 ----------------- src/tcache.c | 4 +-- 4 files changed, 34 insertions(+), 57 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 78ea2696..b8de12be 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -446,7 +446,6 @@ void arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind, uint64_t prof_accumbytes); void *arena_malloc_small(arena_t *arena, size_t size, bool zero); void *arena_malloc_large(arena_t *arena, size_t size, bool zero); -void *arena_malloc(size_t size, bool zero); void *arena_palloc(arena_t *arena, size_t size, size_t alloc_size, size_t alignment, bool zero); size_t arena_salloc(const void *ptr); @@ -475,6 +474,7 @@ unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr); prof_ctx_t *arena_prof_ctx_get(const void *ptr); void arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx); +void *arena_malloc(size_t size, bool zero); void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr); #endif @@ -630,11 +630,33 @@ arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) chunk->map[pageind-map_bias].prof_ctx = ctx; } +JEMALLOC_INLINE void * +arena_malloc(size_t size, bool zero) +{ + tcache_t *tcache = tcache_get(); + + assert(size != 0); + assert(QUANTUM_CEILING(size) <= arena_maxclass); + + if (size <= small_maxclass) { + if (tcache != NULL) + return (tcache_alloc_small(tcache, size, zero)); + else + return (arena_malloc_small(choose_arena(), size, zero)); + } else { + if (tcache != NULL && size <= tcache_maxclass) + return (tcache_alloc_large(tcache, size, zero)); + else + return (arena_malloc_large(choose_arena(), size, zero)); + } +} + JEMALLOC_INLINE void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr) { size_t pageind; arena_chunk_map_t *mapelm; + tcache_t *tcache = tcache_get(); assert(arena != NULL); assert(chunk->arena == arena); @@ -646,9 +668,7 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr) assert((mapelm->bits & CHUNK_MAP_ALLOCATED) != 0); if ((mapelm->bits & CHUNK_MAP_LARGE) == 0) { /* Small allocation. */ - tcache_t *tcache; - - if (config_tcache && (tcache = tcache_get()) != NULL) + if (tcache != NULL) tcache_dalloc_small(tcache, ptr); else { arena_run_t *run; @@ -671,27 +691,13 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr) malloc_mutex_unlock(&bin->lock); } } else { - if (config_tcache) { - size_t size = mapelm->bits & ~PAGE_MASK; + size_t size = mapelm->bits & ~PAGE_MASK; - assert(((uintptr_t)ptr & PAGE_MASK) == 0); - if (size <= tcache_maxclass) { - tcache_t *tcache; + assert(((uintptr_t)ptr & PAGE_MASK) == 0); - if ((tcache = tcache_get()) != NULL) - tcache_dalloc_large(tcache, ptr, size); - else { - malloc_mutex_lock(&arena->lock); - arena_dalloc_large(arena, chunk, ptr); - malloc_mutex_unlock(&arena->lock); - } - } else { - malloc_mutex_lock(&arena->lock); - arena_dalloc_large(arena, chunk, ptr); - malloc_mutex_unlock(&arena->lock); - } + if (size <= tcache_maxclass && tcache != NULL) { + tcache_dalloc_large(tcache, ptr, size); } else { - assert(((uintptr_t)ptr & PAGE_MASK) == 0); malloc_mutex_lock(&arena->lock); arena_dalloc_large(arena, chunk, ptr); malloc_mutex_unlock(&arena->lock); diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index 83e03d9d..717682d7 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -1,4 +1,3 @@ -#ifdef JEMALLOC_TCACHE /******************************************************************************/ #ifdef JEMALLOC_H_TYPES @@ -134,7 +133,11 @@ tcache_get(void) { tcache_t *tcache; - if ((isthreaded & opt_tcache) == false) + if (config_tcache == false) + return (NULL); + if (config_lazy_lock && (isthreaded & opt_tcache) == false) + return (NULL); + else if (opt_tcache == false) return (NULL); tcache = TCACHE_GET(); @@ -391,4 +394,3 @@ tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size) #endif /* JEMALLOC_H_INLINES */ /******************************************************************************/ -#endif /* JEMALLOC_TCACHE */ diff --git a/src/arena.c b/src/arena.c index c2632d97..8a158df2 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1455,35 +1455,6 @@ arena_malloc_large(arena_t *arena, size_t size, bool zero) return (ret); } -void * -arena_malloc(size_t size, bool zero) -{ - - assert(size != 0); - assert(QUANTUM_CEILING(size) <= arena_maxclass); - - if (size <= small_maxclass) { - tcache_t *tcache; - - if (config_tcache && (tcache = tcache_get()) != NULL) - return (tcache_alloc_small(tcache, size, zero)); - else - return (arena_malloc_small(choose_arena(), size, zero)); - } else { - if (config_tcache && size <= tcache_maxclass) { - tcache_t *tcache; - - if ((tcache = tcache_get()) != NULL) - return (tcache_alloc_large(tcache, size, zero)); - else { - return (arena_malloc_large(choose_arena(), - size, zero)); - } - } else - return (arena_malloc_large(choose_arena(), size, zero)); - } -} - /* Only handles large allocations that require more than page alignment. */ void * arena_palloc(arena_t *arena, size_t size, size_t alloc_size, size_t alignment, diff --git a/src/tcache.c b/src/tcache.c index 398fc0aa..4f4ed6c6 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -1,6 +1,6 @@ #define JEMALLOC_TCACHE_C_ #include "jemalloc/internal/jemalloc_internal.h" -#ifdef JEMALLOC_TCACHE + /******************************************************************************/ /* Data. */ @@ -436,5 +436,3 @@ tcache_boot(void) return (false); } -/******************************************************************************/ -#endif /* JEMALLOC_TCACHE */ From 6ffbbeb5d60bdf16e15927cb1f173376fe355449 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 13 Feb 2012 12:31:30 -0800 Subject: [PATCH 0046/3378] Silence compiler warnings. --- src/jemalloc.c | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/src/jemalloc.c b/src/jemalloc.c index a32ce1a7..796c8158 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -864,7 +864,11 @@ JEMALLOC_P(malloc)(size_t size) { void *ret; size_t usize; - prof_thr_cnt_t *cnt; + prof_thr_cnt_t *cnt +#ifdef JEMALLOC_CC_SILENCE + = NULL +#endif + ; if (malloc_init()) { ret = NULL; @@ -939,7 +943,11 @@ imemalign(void **memptr, size_t alignment, size_t size) int ret; size_t usize; void *result; - prof_thr_cnt_t *cnt; + prof_thr_cnt_t *cnt +#ifdef JEMALLOC_CC_SILENCE + = NULL +#endif + ; if (malloc_init()) result = NULL; @@ -1046,7 +1054,11 @@ JEMALLOC_P(calloc)(size_t num, size_t size) void *ret; size_t num_size; size_t usize; - prof_thr_cnt_t *cnt; + prof_thr_cnt_t *cnt +#ifdef JEMALLOC_CC_SILENCE + = NULL +#endif + ; if (malloc_init()) { num_size = 0; @@ -1121,8 +1133,16 @@ JEMALLOC_P(realloc)(void *ptr, size_t size) void *ret; size_t usize; size_t old_size = 0; - prof_thr_cnt_t *cnt; - prof_ctx_t *old_ctx; + prof_thr_cnt_t *cnt +#ifdef JEMALLOC_CC_SILENCE + = NULL +#endif + ; + prof_ctx_t *old_ctx +#ifdef JEMALLOC_CC_SILENCE + = NULL +#endif + ; if (size == 0) { if (config_sysv == false || opt_sysv == false) From 0fee70d718b9846cfab04225dc86a4b4216b963f Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 13 Feb 2012 12:36:11 -0800 Subject: [PATCH 0047/3378] Do not enable lazy locking by default. Do not enable lazy locking by default, because: - It's fragile (applications can subvert detection of multi-threaded mode). - Thread caching amortizes locking overhead in the default configuration. --- INSTALL | 4 ++-- configure.ac | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/INSTALL b/INSTALL index fa32d057..a210ec5a 100644 --- a/INSTALL +++ b/INSTALL @@ -128,8 +128,8 @@ any of the following arguments (not a definitive list) to 'configure': a measurable impact on performance, since the compiler is forced to load the page size from memory rather than embedding immediate values. ---disable-lazy-lock - Disable code that wraps pthread_create() to detect when an application +--enable-lazy-lock + Enable code that wraps pthread_create() to detect when an application switches from single-threaded to multi-threaded mode, so that it can avoid mutex locking/unlocking operations while in single-threaded mode. In practice, this feature usually has little impact on performance unless diff --git a/configure.ac b/configure.ac index cbcefdf3..e818f65a 100644 --- a/configure.ac +++ b/configure.ac @@ -730,17 +730,17 @@ AC_CHECK_LIB([pthread], [pthread_create], [LIBS="$LIBS -lpthread"], CPPFLAGS="$CPPFLAGS -D_REENTRANT" -dnl Enable lazy locking by default. +dnl Disable lazy locking by default. AC_ARG_ENABLE([lazy_lock], - [AS_HELP_STRING([--disable-lazy-lock], - [Disable lazy locking (always lock, even when single-threaded)])], + [AS_HELP_STRING([--enable-lazy-lock], + [Enable lazy locking (only lock when multi-threaded)])], [if test "x$enable_lazy_lock" = "xno" ; then enable_lazy_lock="0" else enable_lazy_lock="1" fi ], -[enable_lazy_lock="1"] +[enable_lazy_lock="0"] ) if test "x$enable_lazy_lock" = "x1" ; then AC_CHECK_HEADERS([dlfcn.h], , [AC_MSG_ERROR([dlfcn.h is missing])]) From ef8897b4b938111fcc9b54725067f1dbb33a4c20 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 13 Feb 2012 14:30:52 -0800 Subject: [PATCH 0048/3378] Make 8-byte tiny size class non-optional. When tiny size class support was first added, it was intended to support truly tiny size classes (even 2 bytes). However, this wasn't very useful in practice, so the minimum tiny size class has been limited to sizeof(void *) for a long time now. This is too small to be standards compliant, but other commonly used malloc implementations do not even bother using a 16-byte quantum on systems with vector units (SSE2+, AltiVEC, etc.). As such, it is safe in practice to support an 8-byte tiny size class on 64-bit systems that support 16-byte types. --- INSTALL | 7 -- configure.ac | 17 --- doc/jemalloc.xml.in | 23 ++-- include/jemalloc/internal/arena.h | 19 ++-- .../jemalloc/internal/jemalloc_internal.h.in | 7 -- include/jemalloc/jemalloc_defs.h.in | 6 -- src/arena.c | 101 +++++------------- src/ctl.c | 9 +- 8 files changed, 44 insertions(+), 145 deletions(-) diff --git a/INSTALL b/INSTALL index a210ec5a..9124ac34 100644 --- a/INSTALL +++ b/INSTALL @@ -90,13 +90,6 @@ any of the following arguments (not a definitive list) to 'configure': Statically link against the specified libunwind.a rather than dynamically linking with -lunwind. ---disable-tiny - Disable tiny (sub-quantum-sized) object support. Technically it is not - legal for a malloc implementation to allocate objects with less than - quantum alignment (8 or 16 bytes, depending on architecture), but in - practice it never causes any problems if, for example, 4-byte allocations - are 4-byte-aligned. - --disable-tcache Disable thread-specific caches for small objects. Objects are cached and released in bulk, thus reducing the total number of mutex operations. See diff --git a/configure.ac b/configure.ac index e818f65a..fdbf1bad 100644 --- a/configure.ac +++ b/configure.ac @@ -560,22 +560,6 @@ if test "x$enable_prof" = "x1" ; then fi AC_SUBST([enable_prof]) -dnl Enable tiny allocations by default. -AC_ARG_ENABLE([tiny], - [AS_HELP_STRING([--disable-tiny], [Disable tiny (sub-quantum) allocations])], -[if test "x$enable_tiny" = "xno" ; then - enable_tiny="0" -else - enable_tiny="1" -fi -], -[enable_tiny="1"] -) -if test "x$enable_tiny" = "x1" ; then - AC_DEFINE([JEMALLOC_TINY], [ ]) -fi -AC_SUBST([enable_tiny]) - dnl Enable thread-specific caching by default. AC_ARG_ENABLE([tcache], [AS_HELP_STRING([--disable-tcache], [Disable per thread caches])], @@ -934,7 +918,6 @@ AC_MSG_RESULT([prof : ${enable_prof}]) AC_MSG_RESULT([prof-libunwind : ${enable_prof_libunwind}]) AC_MSG_RESULT([prof-libgcc : ${enable_prof_libgcc}]) AC_MSG_RESULT([prof-gcc : ${enable_prof_gcc}]) -AC_MSG_RESULT([tiny : ${enable_tiny}]) AC_MSG_RESULT([tcache : ${enable_tcache}]) AC_MSG_RESULT([fill : ${enable_fill}]) AC_MSG_RESULT([xmalloc : ${enable_xmalloc}]) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index dc11642f..f9f14750 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -455,13 +455,12 @@ for (i = 0; i < nbins; i++) { allocations in constant time. Small objects are managed in groups by page runs. Each run maintains - a frontier and free list to track which regions are in use. Unless - is specified during configuration, - allocation requests that are no more than half the quantum (8 or 16, - depending on architecture) are rounded up to the nearest power of two that - is at least sizeof(void *). - Allocation requests that are more than half the quantum, but no more than - the minimum cacheline-multiple size class (see the sizeof(void *). Allocation requests + that are more than half the quantum, but no more than the minimum + cacheline-multiple size class (see the opt.lg_qspace_max option) are rounded up to the nearest multiple of the quantum. Allocation requests that are more than the minimum cacheline-multiple size class, but @@ -680,16 +679,6 @@ for (i = 0; i < nbins; i++) { during build configuration. - - - config.tiny - (bool) - r- - - was not specified - during build configuration. - - config.tls diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index b8de12be..cacb03f8 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -17,7 +17,7 @@ (((s) + SUBPAGE_MASK) & ~SUBPAGE_MASK) /* Smallest size class to support. */ -#define LG_TINY_MIN LG_SIZEOF_PTR +#define LG_TINY_MIN 3 #define TINY_MIN (1U << LG_TINY_MIN) /* @@ -418,18 +418,13 @@ extern uint8_t const *small_size2bin; extern arena_bin_info_t *arena_bin_info; /* Various bin-related settings. */ -#ifdef JEMALLOC_TINY /* Number of (2^n)-spaced tiny bins. */ -# define ntbins ((unsigned)(LG_QUANTUM - LG_TINY_MIN)) -#else -# define ntbins 0 -#endif + /* Number of (2^n)-spaced tiny bins. */ +#define ntbins ((unsigned)(LG_QUANTUM - LG_TINY_MIN)) extern unsigned nqbins; /* Number of quantum-spaced bins. */ extern unsigned ncbins; /* Number of cacheline-spaced bins. */ extern unsigned nsbins; /* Number of subpage-spaced bins. */ extern unsigned nbins; -#ifdef JEMALLOC_TINY -# define tspace_max ((size_t)(QUANTUM >> 1)) -#endif +#define tspace_max ((size_t)(QUANTUM >> 1)) #define qspace_min QUANTUM extern size_t qspace_max; extern size_t cspace_min; @@ -633,18 +628,18 @@ arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) JEMALLOC_INLINE void * arena_malloc(size_t size, bool zero) { - tcache_t *tcache = tcache_get(); + tcache_t *tcache; assert(size != 0); assert(QUANTUM_CEILING(size) <= arena_maxclass); if (size <= small_maxclass) { - if (tcache != NULL) + if ((tcache = tcache_get()) != NULL) return (tcache_alloc_small(tcache, size, zero)); else return (arena_malloc_small(choose_arena(), size, zero)); } else { - if (tcache != NULL && size <= tcache_maxclass) + if (size <= tcache_maxclass && (tcache = tcache_get()) != NULL) return (tcache_alloc_large(tcache, size, zero)); else return (arena_malloc_large(choose_arena(), size, zero)); diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 44415370..971336ec 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -118,13 +118,6 @@ static const bool config_tcache = false #endif ; -static const bool config_tiny = -#ifdef JEMALLOC_TINY - true -#else - false -#endif - ; static const bool config_tls = #ifdef JEMALLOC_TLS true diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index f78028b0..66da6f3d 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -79,12 +79,6 @@ /* Use gcc intrinsics for profile backtracing if defined. */ #undef JEMALLOC_PROF_GCC -/* - * JEMALLOC_TINY enables support for tiny objects, which are smaller than one - * quantum. - */ -#undef JEMALLOC_TINY - /* * JEMALLOC_TCACHE enables a thread-specific caching layer for small objects. * This makes it possible to allocate/deallocate objects without any locking diff --git a/src/arena.c b/src/arena.c index 8a158df2..32afd0cf 100644 --- a/src/arena.c +++ b/src/arena.c @@ -28,14 +28,7 @@ size_t mspace_mask; * const_small_size2bin is a static constant lookup table that in the common * case can be used as-is for small_size2bin. */ -#if (LG_TINY_MIN == 2) -#define S2B_4(i) i, -#define S2B_8(i) S2B_4(i) S2B_4(i) -#elif (LG_TINY_MIN == 3) #define S2B_8(i) i, -#else -# error "Unsupported LG_TINY_MIN" -#endif #define S2B_16(i) S2B_8(i) S2B_8(i) #define S2B_32(i) S2B_16(i) S2B_16(i) #define S2B_64(i) S2B_32(i) S2B_32(i) @@ -49,23 +42,9 @@ static JEMALLOC_ATTR(aligned(CACHELINE)) const uint8_t const_small_size2bin[] = { #if (LG_QUANTUM == 4) /* 16-byte quantum **********************/ -# ifdef JEMALLOC_TINY -# if (LG_TINY_MIN == 2) - S2B_4(0) /* 4 */ - S2B_4(1) /* 8 */ - S2B_8(2) /* 16 */ -# define S2B_QMIN 2 -# elif (LG_TINY_MIN == 3) - S2B_8(0) /* 8 */ - S2B_8(1) /* 16 */ -# define S2B_QMIN 1 -# else -# error "Unsupported LG_TINY_MIN" -# endif -# else - S2B_16(0) /* 16 */ -# define S2B_QMIN 0 -# endif + S2B_8(0) /* 8 */ + S2B_8(1) /* 16 */ +# define S2B_QMIN 1 S2B_16(S2B_QMIN + 1) /* 32 */ S2B_16(S2B_QMIN + 2) /* 48 */ S2B_16(S2B_QMIN + 3) /* 64 */ @@ -76,18 +55,8 @@ static JEMALLOC_ATTR(aligned(CACHELINE)) # define S2B_CMIN (S2B_QMIN + 8) #else /* 8-byte quantum ***********************/ -# ifdef JEMALLOC_TINY -# if (LG_TINY_MIN == 2) - S2B_4(0) /* 4 */ - S2B_4(1) /* 8 */ -# define S2B_QMIN 1 -# else -# error "Unsupported LG_TINY_MIN" -# endif -# else - S2B_8(0) /* 8 */ -# define S2B_QMIN 0 -# endif +# define S2B_QMIN 0 + S2B_8(S2B_QMIN + 0) /* 8 */ S2B_8(S2B_QMIN + 1) /* 16 */ S2B_8(S2B_QMIN + 2) /* 24 */ S2B_8(S2B_QMIN + 3) /* 32 */ @@ -2153,17 +2122,15 @@ small_size2bin_validate(void) i = 1; /* Tiny. */ - if (config_tiny) { - for (; i < (1U << LG_TINY_MIN); i++) { - size = pow2_ceil(1U << LG_TINY_MIN); - binind = ffs((int)(size >> (LG_TINY_MIN + 1))); - assert(SMALL_SIZE2BIN(i) == binind); - } - for (; i < qspace_min; i++) { - size = pow2_ceil(i); - binind = ffs((int)(size >> (LG_TINY_MIN + 1))); - assert(SMALL_SIZE2BIN(i) == binind); - } + for (; i < TINY_MIN; i++) { + size = TINY_MIN; + binind = ffs((int)(size >> (LG_TINY_MIN + 1))); + assert(SMALL_SIZE2BIN(i) == binind); + } + for (; i < qspace_min; i++) { + size = pow2_ceil(i); + binind = ffs((int)(size >> (LG_TINY_MIN + 1))); + assert(SMALL_SIZE2BIN(i) == binind); } /* Quantum-spaced. */ for (; i <= qspace_max; i++) { @@ -2223,17 +2190,15 @@ small_size2bin_init_hard(void) i = 1; /* Tiny. */ - if (config_tiny) { - for (; i < (1U << LG_TINY_MIN); i += TINY_MIN) { - size = pow2_ceil(1U << LG_TINY_MIN); - binind = ffs((int)(size >> (LG_TINY_MIN + 1))); - CUSTOM_SMALL_SIZE2BIN(i) = binind; - } - for (; i < qspace_min; i += TINY_MIN) { - size = pow2_ceil(i); - binind = ffs((int)(size >> (LG_TINY_MIN + 1))); - CUSTOM_SMALL_SIZE2BIN(i) = binind; - } + for (; i < TINY_MIN; i += TINY_MIN) { + size = TINY_MIN; + binind = ffs((int)(size >> (LG_TINY_MIN + 1))); + CUSTOM_SMALL_SIZE2BIN(i) = binind; + } + for (; i < qspace_min; i += TINY_MIN) { + size = pow2_ceil(i); + binind = ffs((int)(size >> (LG_TINY_MIN + 1))); + CUSTOM_SMALL_SIZE2BIN(i) = binind; } /* Quantum-spaced. */ for (; i <= qspace_max; i += TINY_MIN) { @@ -2398,17 +2363,12 @@ bin_info_init(void) prev_run_size = PAGE_SIZE; i = 0; /* (2^n)-spaced tiny bins. */ - if (config_tiny) { - for (; i < ntbins; i++) { - bin_info = &arena_bin_info[i]; - bin_info->reg_size = (1U << (LG_TINY_MIN + i)); - prev_run_size = bin_info_run_size_calc(bin_info, - prev_run_size); - bitmap_info_init(&bin_info->bitmap_info, - bin_info->nregs); - } + for (; i < ntbins; i++) { + bin_info = &arena_bin_info[i]; + bin_info->reg_size = (1U << (LG_TINY_MIN + i)); + prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size); + bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs); } - /* Quantum-spaced bins. */ for (; i < ntbins + nqbins; i++) { bin_info = &arena_bin_info[i]; @@ -2416,7 +2376,6 @@ bin_info_init(void) prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size); bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs); } - /* Cacheline-spaced bins. */ for (; i < ntbins + nqbins + ncbins; i++) { bin_info = &arena_bin_info[i]; @@ -2425,7 +2384,6 @@ bin_info_init(void) prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size); bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs); } - /* Subpage-spaced bins. */ for (; i < nbins; i++) { bin_info = &arena_bin_info[i]; @@ -2456,8 +2414,7 @@ arena_boot(void) assert(sspace_min < PAGE_SIZE); sspace_max = PAGE_SIZE - SUBPAGE; - if (config_tiny) - assert(LG_QUANTUM >= LG_TINY_MIN); + assert(LG_QUANTUM >= LG_TINY_MIN); assert(ntbins <= LG_QUANTUM); nqbins = qspace_max >> LG_QUANTUM; ncbins = ((cspace_max - cspace_min) >> LG_CACHELINE) + 1; diff --git a/src/ctl.c b/src/ctl.c index 2ac2f66e..6d0423fa 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -56,7 +56,6 @@ CTL_PROTO(config_prof_libunwind) CTL_PROTO(config_stats) CTL_PROTO(config_sysv) CTL_PROTO(config_tcache) -CTL_PROTO(config_tiny) CTL_PROTO(config_tls) CTL_PROTO(config_xmalloc) CTL_PROTO(opt_abort) @@ -199,7 +198,6 @@ static const ctl_node_t config_node[] = { {NAME("stats"), CTL(config_stats)}, {NAME("sysv"), CTL(config_sysv)}, {NAME("tcache"), CTL(config_tcache)}, - {NAME("tiny"), CTL(config_tiny)}, {NAME("tls"), CTL(config_tls)}, {NAME("xmalloc"), CTL(config_xmalloc)} }; @@ -993,8 +991,6 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ int ret; \ bool oldval; \ \ - if (n == false) \ - return (ENOENT); \ READONLY(); \ oldval = n; \ READ(oldval, bool); \ @@ -1115,7 +1111,6 @@ CTL_RO_BOOL_CONFIG_GEN(config_prof_libunwind) CTL_RO_BOOL_CONFIG_GEN(config_stats) CTL_RO_BOOL_CONFIG_GEN(config_sysv) CTL_RO_BOOL_CONFIG_GEN(config_tcache) -CTL_RO_BOOL_CONFIG_GEN(config_tiny) CTL_RO_BOOL_CONFIG_GEN(config_tls) CTL_RO_BOOL_CONFIG_GEN(config_xmalloc) @@ -1203,8 +1198,8 @@ CTL_RO_NL_GEN(arenas_cacheline, CACHELINE, size_t) CTL_RO_NL_GEN(arenas_subpage, SUBPAGE, size_t) CTL_RO_NL_GEN(arenas_pagesize, PAGE_SIZE, size_t) CTL_RO_NL_GEN(arenas_chunksize, chunksize, size_t) -CTL_RO_NL_CGEN(config_tiny, arenas_tspace_min, (1U << LG_TINY_MIN), size_t) -CTL_RO_NL_CGEN(config_tiny, arenas_tspace_max, (qspace_min >> 1), size_t) +CTL_RO_NL_GEN(arenas_tspace_min, TINY_MIN, size_t) +CTL_RO_NL_GEN(arenas_tspace_max, (qspace_min >> 1), size_t) CTL_RO_NL_GEN(arenas_qspace_min, qspace_min, size_t) CTL_RO_NL_GEN(arenas_qspace_max, qspace_max, size_t) CTL_RO_NL_GEN(arenas_cspace_min, cspace_min, size_t) From 746868929afae3e346b47d0fa8a78d7fb131d5a4 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 13 Feb 2012 15:18:19 -0800 Subject: [PATCH 0049/3378] Remove highruns statistics. --- doc/jemalloc.xml.in | 22 ---------------------- include/jemalloc/internal/arena.h | 4 ++++ include/jemalloc/internal/stats.h | 6 ------ src/arena.c | 29 ----------------------------- src/ctl.c | 10 ---------- src/stats.c | 28 +++++++++++----------------- 6 files changed, 15 insertions(+), 84 deletions(-) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index f9f14750..4c7023b5 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -1866,17 +1866,6 @@ malloc_conf = "xmalloc:true";]]> to allocate changed. - - - stats.arenas.<i>.bins.<j>.highruns - (size_t) - r- - [] - - Maximum number of runs at any time thus far. - - - stats.arenas.<i>.bins.<j>.curruns @@ -1920,17 +1909,6 @@ malloc_conf = "xmalloc:true";]]> class. - - - stats.arenas.<i>.lruns.<j>.highruns - (size_t) - r- - [] - - Maximum number of runs at any time thus far for this - size class. - - stats.arenas.<i>.lruns.<j>.curruns diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index cacb03f8..4a87ef54 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -639,6 +639,10 @@ arena_malloc(size_t size, bool zero) else return (arena_malloc_small(choose_arena(), size, zero)); } else { + /* + * Initialize tcache after checking size in order to avoid + * infinite recursion during tcache initialization. + */ if (size <= tcache_maxclass && (tcache = tcache_get()) != NULL) return (tcache_alloc_large(tcache, size, zero)); else diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h index 64ba4bd7..4af23c33 100644 --- a/include/jemalloc/internal/stats.h +++ b/include/jemalloc/internal/stats.h @@ -59,9 +59,6 @@ struct malloc_bin_stats_s { */ uint64_t reruns; - /* High-water mark for this bin. */ - size_t highruns; - /* Current number of runs in this bin. */ size_t curruns; }; @@ -83,9 +80,6 @@ struct malloc_large_stats_s { */ uint64_t nrequests; - /* High-water mark for this size class. */ - size_t highruns; - /* Current number of runs of this size class. */ size_t curruns; }; diff --git a/src/arena.c b/src/arena.c index 32afd0cf..bd10de3b 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1196,8 +1196,6 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin) if (config_stats) { bin->stats.nruns++; bin->stats.curruns++; - if (bin->stats.curruns > bin->stats.highruns) - bin->stats.highruns = bin->stats.curruns; } return (run); } @@ -1401,12 +1399,6 @@ arena_malloc_large(arena_t *arena, size_t size, bool zero) arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nmalloc++; arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++; arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++; - if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns > - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) { - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns = - arena->stats.lstats[(size >> PAGE_SHIFT) - - 1].curruns; - } } if (config_prof) arena_prof_accum(arena, size); @@ -1477,12 +1469,6 @@ arena_palloc(arena_t *arena, size_t size, size_t alloc_size, size_t alignment, arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nmalloc++; arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++; arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++; - if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns > - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) { - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns = - arena->stats.lstats[(size >> PAGE_SHIFT) - - 1].curruns; - } } malloc_mutex_unlock(&arena->lock); @@ -1762,7 +1748,6 @@ arena_stats_merge(arena_t *arena, size_t *nactive, size_t *ndirty, lstats[i].nmalloc += arena->stats.lstats[i].nmalloc; lstats[i].ndalloc += arena->stats.lstats[i].ndalloc; lstats[i].nrequests += arena->stats.lstats[i].nrequests; - lstats[i].highruns += arena->stats.lstats[i].highruns; lstats[i].curruns += arena->stats.lstats[i].curruns; } malloc_mutex_unlock(&arena->lock); @@ -1781,7 +1766,6 @@ arena_stats_merge(arena_t *arena, size_t *nactive, size_t *ndirty, } bstats[i].nruns += bin->stats.nruns; bstats[i].reruns += bin->stats.reruns; - bstats[i].highruns += bin->stats.highruns; bstats[i].curruns += bin->stats.curruns; malloc_mutex_unlock(&bin->lock); } @@ -1835,12 +1819,6 @@ arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr, arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nmalloc++; arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++; arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++; - if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns > - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) { - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns = - arena->stats.lstats[(size >> PAGE_SHIFT) - - 1].curruns; - } } malloc_mutex_unlock(&arena->lock); } @@ -1909,13 +1887,6 @@ arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr, arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++; arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++; - if (arena->stats.lstats[(size >> PAGE_SHIFT) - - 1].curruns > arena->stats.lstats[(size >> - PAGE_SHIFT) - 1].highruns) { - arena->stats.lstats[(size >> PAGE_SHIFT) - - 1].highruns = arena->stats.lstats[(size >> - PAGE_SHIFT) - 1].curruns; - } } malloc_mutex_unlock(&arena->lock); return (false); diff --git a/src/ctl.c b/src/ctl.c index 6d0423fa..e33ce67d 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -136,13 +136,11 @@ CTL_PROTO(stats_arenas_i_bins_j_nfills) CTL_PROTO(stats_arenas_i_bins_j_nflushes) CTL_PROTO(stats_arenas_i_bins_j_nruns) CTL_PROTO(stats_arenas_i_bins_j_nreruns) -CTL_PROTO(stats_arenas_i_bins_j_highruns) CTL_PROTO(stats_arenas_i_bins_j_curruns) INDEX_PROTO(stats_arenas_i_bins_j) CTL_PROTO(stats_arenas_i_lruns_j_nmalloc) CTL_PROTO(stats_arenas_i_lruns_j_ndalloc) CTL_PROTO(stats_arenas_i_lruns_j_nrequests) -CTL_PROTO(stats_arenas_i_lruns_j_highruns) CTL_PROTO(stats_arenas_i_lruns_j_curruns) INDEX_PROTO(stats_arenas_i_lruns_j) CTL_PROTO(stats_arenas_i_nthreads) @@ -322,7 +320,6 @@ static const ctl_node_t stats_arenas_i_bins_j_node[] = { {NAME("nflushes"), CTL(stats_arenas_i_bins_j_nflushes)}, {NAME("nruns"), CTL(stats_arenas_i_bins_j_nruns)}, {NAME("nreruns"), CTL(stats_arenas_i_bins_j_nreruns)}, - {NAME("highruns"), CTL(stats_arenas_i_bins_j_highruns)}, {NAME("curruns"), CTL(stats_arenas_i_bins_j_curruns)} }; static const ctl_node_t super_stats_arenas_i_bins_j_node[] = { @@ -337,7 +334,6 @@ static const ctl_node_t stats_arenas_i_lruns_j_node[] = { {NAME("nmalloc"), CTL(stats_arenas_i_lruns_j_nmalloc)}, {NAME("ndalloc"), CTL(stats_arenas_i_lruns_j_ndalloc)}, {NAME("nrequests"), CTL(stats_arenas_i_lruns_j_nrequests)}, - {NAME("highruns"), CTL(stats_arenas_i_lruns_j_highruns)}, {NAME("curruns"), CTL(stats_arenas_i_lruns_j_curruns)} }; static const ctl_node_t super_stats_arenas_i_lruns_j_node[] = { @@ -482,7 +478,6 @@ ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, ctl_arena_stats_t *astats) sstats->lstats[i].nmalloc += astats->lstats[i].nmalloc; sstats->lstats[i].ndalloc += astats->lstats[i].ndalloc; sstats->lstats[i].nrequests += astats->lstats[i].nrequests; - sstats->lstats[i].highruns += astats->lstats[i].highruns; sstats->lstats[i].curruns += astats->lstats[i].curruns; } @@ -498,7 +493,6 @@ ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, ctl_arena_stats_t *astats) } sstats->bstats[i].nruns += astats->bstats[i].nruns; sstats->bstats[i].reruns += astats->bstats[i].reruns; - sstats->bstats[i].highruns += astats->bstats[i].highruns; sstats->bstats[i].curruns += astats->bstats[i].curruns; } } @@ -1351,8 +1345,6 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nruns, ctl_stats.arenas[mib[2]].bstats[mib[4]].nruns, uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nreruns, ctl_stats.arenas[mib[2]].bstats[mib[4]].reruns, uint64_t) -CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_highruns, - ctl_stats.arenas[mib[2]].bstats[mib[4]].highruns, size_t) CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_curruns, ctl_stats.arenas[mib[2]].bstats[mib[4]].curruns, size_t) @@ -1373,8 +1365,6 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_nrequests, ctl_stats.arenas[mib[2]].lstats[mib[4]].nrequests, uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_curruns, ctl_stats.arenas[mib[2]].lstats[mib[4]].curruns, size_t) -CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_highruns, - ctl_stats.arenas[mib[2]].lstats[mib[4]].highruns, size_t) const ctl_node_t * stats_arenas_i_lruns_j_index(const size_t *mib, size_t miblen, size_t j) diff --git a/src/stats.c b/src/stats.c index ad8cd13d..1e907823 100644 --- a/src/stats.c +++ b/src/stats.c @@ -161,12 +161,11 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, malloc_cprintf(write_cb, cbopaque, "bins: bin size regs pgs allocated nmalloc" " ndalloc nrequests nfills nflushes" - " newruns reruns maxruns curruns\n"); + " newruns reruns curruns\n"); } else { malloc_cprintf(write_cb, cbopaque, "bins: bin size regs pgs allocated nmalloc" - " ndalloc newruns reruns maxruns" - " curruns\n"); + " ndalloc newruns reruns curruns\n"); } CTL_GET("arenas.nbins", &nbins, unsigned); for (j = 0, gap_start = UINT_MAX; j < nbins; j++) { @@ -182,7 +181,7 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, uint32_t nregs; uint64_t nmalloc, ndalloc, nrequests, nfills, nflushes; uint64_t reruns; - size_t highruns, curruns; + size_t curruns; if (gap_start != UINT_MAX) { if (j > gap_start + 1) { @@ -220,8 +219,6 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, } CTL_IJ_GET("stats.arenas.0.bins.0.nreruns", &reruns, uint64_t); - CTL_IJ_GET("stats.arenas.0.bins.0.highruns", &highruns, - size_t); CTL_IJ_GET("stats.arenas.0.bins.0.curruns", &curruns, size_t); if (config_tcache) { @@ -229,27 +226,26 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, "%13u %1s %5zu %4u %3zu %12zu %12"PRIu64 " %12"PRIu64" %12"PRIu64" %12"PRIu64 " %12"PRIu64" %12"PRIu64" %12"PRIu64 - " %12zu %12zu\n", + " %12zu\n", j, j < ntbins_ ? "T" : j < ntbins_ + nqbins ? "Q" : j < ntbins_ + nqbins + ncbins ? "C" : "S", reg_size, nregs, run_size / pagesize, allocated, nmalloc, ndalloc, nrequests, - nfills, nflushes, nruns, reruns, highruns, - curruns); + nfills, nflushes, nruns, reruns, curruns); } else { malloc_cprintf(write_cb, cbopaque, "%13u %1s %5zu %4u %3zu %12zu %12"PRIu64 " %12"PRIu64" %12"PRIu64" %12"PRIu64 - " %12zu %12zu\n", + " %12zu\n", j, j < ntbins_ ? "T" : j < ntbins_ + nqbins ? "Q" : j < ntbins_ + nqbins + ncbins ? "C" : "S", reg_size, nregs, run_size / pagesize, allocated, nmalloc, ndalloc, nruns, reruns, - highruns, curruns); + curruns); } } } @@ -276,11 +272,11 @@ stats_arena_lruns_print(void (*write_cb)(void *, const char *), void *cbopaque, malloc_cprintf(write_cb, cbopaque, "large: size pages nmalloc ndalloc nrequests" - " maxruns curruns\n"); + " curruns\n"); CTL_GET("arenas.nlruns", &nlruns, size_t); for (j = 0, gap_start = -1; j < nlruns; j++) { uint64_t nmalloc, ndalloc, nrequests; - size_t run_size, highruns, curruns; + size_t run_size, curruns; CTL_IJ_GET("stats.arenas.0.lruns.0.nmalloc", &nmalloc, uint64_t); @@ -293,8 +289,6 @@ stats_arena_lruns_print(void (*write_cb)(void *, const char *), void *cbopaque, gap_start = j; } else { CTL_J_GET("arenas.lrun.0.size", &run_size, size_t); - CTL_IJ_GET("stats.arenas.0.lruns.0.highruns", &highruns, - size_t); CTL_IJ_GET("stats.arenas.0.lruns.0.curruns", &curruns, size_t); if (gap_start != -1) { @@ -304,9 +298,9 @@ stats_arena_lruns_print(void (*write_cb)(void *, const char *), void *cbopaque, } malloc_cprintf(write_cb, cbopaque, "%13zu %5zu %12"PRIu64" %12"PRIu64" %12"PRIu64 - " %12zu %12zu\n", + " %12zu\n", run_size, run_size / pagesize, nmalloc, ndalloc, - nrequests, highruns, curruns); + nrequests, curruns); } } if (gap_start != -1) From e7a1058aaa6b2cbdd19da297bf2250f86dcdac89 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 13 Feb 2012 17:36:52 -0800 Subject: [PATCH 0050/3378] Fix bin->runcur management. Fix an interaction between arena_dissociate_bin_run() and arena_bin_lower_run() that made it possible for bin->runcur to point to a run other than the lowest non-full run. This bug violated jemalloc's layout policy, but did not affect correctness. --- src/arena.c | 140 ++++++++++++++++++++++++++++------------------------ 1 file changed, 75 insertions(+), 65 deletions(-) diff --git a/src/arena.c b/src/arena.c index bd10de3b..33f3f85e 100644 --- a/src/arena.c +++ b/src/arena.c @@ -142,6 +142,10 @@ static void arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, size_t oldsize, size_t newsize); static void arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, size_t oldsize, size_t newsize, bool dirty); +static arena_run_t *arena_bin_runs_first(arena_bin_t *bin); +static void arena_bin_runs_insert(arena_bin_t *bin, arena_run_t *run); +static void arena_bin_runs_remove(arena_bin_t *bin, arena_run_t *run); +static arena_run_t *arena_bin_nonfull_run_tryget(arena_bin_t *bin); static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin); static void *arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin); static void arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run, @@ -1142,33 +1146,73 @@ arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, dirty); } +static arena_run_t * +arena_bin_runs_first(arena_bin_t *bin) +{ + arena_chunk_map_t *mapelm = arena_run_tree_first(&bin->runs); + if (mapelm != NULL) { + arena_chunk_t *chunk; + size_t pageind; + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(mapelm); + pageind = ((((uintptr_t)mapelm - (uintptr_t)chunk->map) / + sizeof(arena_chunk_map_t))) + map_bias; + arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + + (uintptr_t)((pageind - (mapelm->bits >> PAGE_SHIFT)) << + PAGE_SHIFT)); + return (run); + } + + return (NULL); +} + +static void +arena_bin_runs_insert(arena_bin_t *bin, arena_run_t *run) +{ + arena_chunk_t *chunk = CHUNK_ADDR2BASE(run); + size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT; + arena_chunk_map_t *mapelm = &chunk->map[pageind-map_bias]; + + assert(arena_run_tree_search(&bin->runs, mapelm) == NULL); + + arena_run_tree_insert(&bin->runs, mapelm); +} + +static void +arena_bin_runs_remove(arena_bin_t *bin, arena_run_t *run) +{ + arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); + size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT; + arena_chunk_map_t *mapelm = &chunk->map[pageind-map_bias]; + + assert(arena_run_tree_search(&bin->runs, mapelm) != NULL); + + arena_run_tree_remove(&bin->runs, mapelm); +} + +static arena_run_t * +arena_bin_nonfull_run_tryget(arena_bin_t *bin) +{ + arena_run_t *run = arena_bin_runs_first(bin); + if (run != NULL) { + arena_bin_runs_remove(bin, run); + if (config_stats) + bin->stats.reruns++; + } + return (run); +} + static arena_run_t * arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin) { - arena_chunk_map_t *mapelm; arena_run_t *run; size_t binind; arena_bin_info_t *bin_info; /* Look for a usable run. */ - mapelm = arena_run_tree_first(&bin->runs); - if (mapelm != NULL) { - arena_chunk_t *chunk; - size_t pageind; - - /* run is guaranteed to have available space. */ - arena_run_tree_remove(&bin->runs, mapelm); - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(mapelm); - pageind = ((((uintptr_t)mapelm - (uintptr_t)chunk->map) / - sizeof(arena_chunk_map_t))) + map_bias; - run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - - (mapelm->bits >> PAGE_SHIFT)) - << PAGE_SHIFT)); - if (config_stats) - bin->stats.reruns++; + run = arena_bin_nonfull_run_tryget(bin); + if (run != NULL) return (run); - } /* No existing runs have any space available. */ binind = arena_bin_index(arena, bin); @@ -1205,24 +1249,9 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin) * sufficient memory available while this one dropped bin->lock above, * so search one more time. */ - mapelm = arena_run_tree_first(&bin->runs); - if (mapelm != NULL) { - arena_chunk_t *chunk; - size_t pageind; - - /* run is guaranteed to have available space. */ - arena_run_tree_remove(&bin->runs, mapelm); - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(mapelm); - pageind = ((((uintptr_t)mapelm - (uintptr_t)chunk->map) / - sizeof(arena_chunk_map_t))) + map_bias; - run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - - (mapelm->bits >> PAGE_SHIFT)) - << PAGE_SHIFT)); - if (config_stats) - bin->stats.reruns++; + run = arena_bin_nonfull_run_tryget(bin); + if (run != NULL) return (run); - } return (NULL); } @@ -1587,16 +1616,12 @@ arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run, arena_bin_info_t *bin_info = &arena_bin_info[binind]; if (bin_info->nregs != 1) { - size_t run_pageind = (((uintptr_t)run - - (uintptr_t)chunk)) >> PAGE_SHIFT; - arena_chunk_map_t *run_mapelm = - &chunk->map[run_pageind-map_bias]; /* * This block's conditional is necessary because if the * run only contains one region, then it never gets * inserted into the non-full runs tree. */ - arena_run_tree_remove(&bin->runs, run_mapelm); + arena_bin_runs_remove(bin, run); } } } @@ -1660,34 +1685,19 @@ arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, { /* - * Make sure that bin->runcur always refers to the lowest non-full run, - * if one exists. + * Make sure that if bin->runcur is non-NULL, it refers to the lowest + * non-full run. It is okay to NULL runcur out rather than proactively + * keeping it pointing at the lowest non-full run. */ - if (bin->runcur == NULL) - bin->runcur = run; - else if ((uintptr_t)run < (uintptr_t)bin->runcur) { + if ((uintptr_t)run < (uintptr_t)bin->runcur) { /* Switch runcur. */ - if (bin->runcur->nfree > 0) { - arena_chunk_t *runcur_chunk = - CHUNK_ADDR2BASE(bin->runcur); - size_t runcur_pageind = (((uintptr_t)bin->runcur - - (uintptr_t)runcur_chunk)) >> PAGE_SHIFT; - arena_chunk_map_t *runcur_mapelm = - &runcur_chunk->map[runcur_pageind-map_bias]; - - /* Insert runcur. */ - arena_run_tree_insert(&bin->runs, runcur_mapelm); - } + if (bin->runcur->nfree > 0) + arena_bin_runs_insert(bin, bin->runcur); bin->runcur = run; - } else { - size_t run_pageind = (((uintptr_t)run - - (uintptr_t)chunk)) >> PAGE_SHIFT; - arena_chunk_map_t *run_mapelm = - &chunk->map[run_pageind-map_bias]; - - assert(arena_run_tree_search(&bin->runs, run_mapelm) == NULL); - arena_run_tree_insert(&bin->runs, run_mapelm); - } + if (config_stats) + bin->stats.reruns++; + } else + arena_bin_runs_insert(bin, run); } void From 0b526ff94da7e59aa947a4d3529b2376794f8b01 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 13 Feb 2012 18:04:26 -0800 Subject: [PATCH 0051/3378] Remove the opt.lg_prof_tcmax option. Remove the opt.lg_prof_tcmax option and hard-code a cache size of 1024. This setting is something that users just shouldn't have to worry about. If lock contention actually ends up being a problem, the simple solution available to the user is to reduce sampling frequency. --- doc/jemalloc.xml.in | 26 ++------------------------ include/jemalloc/internal/prof.h | 5 +++-- src/ctl.c | 5 +---- src/jemalloc.c | 2 -- src/prof.c | 8 ++------ src/stats.c | 12 ------------ 6 files changed, 8 insertions(+), 50 deletions(-) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 4c7023b5..2e5f10e3 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -938,8 +938,6 @@ malloc_conf = "xmalloc:true";]]> option for probabilistic sampling control. See the opt.prof_accum option for control of cumulative sample reporting. See the opt.lg_prof_tcmax - option for control of per thread backtrace caching. See the opt.lg_prof_interval option for information on interval-triggered profile dumping, and the opt.prof_gdump @@ -1017,28 +1015,8 @@ malloc_conf = "xmalloc:true";]]> dumps enabled/disabled. If this option is enabled, every unique backtrace must be stored for the duration of execution. Depending on the application, this can impose a large memory overhead, and the - cumulative counts are not always of interest. See the - opt.lg_prof_tcmax - option for control of per thread backtrace caching, which has important - interactions. This option is enabled by default. - - - - - opt.lg_prof_tcmax - (ssize_t) - r- - [] - - Maximum per thread backtrace cache (log base 2) used - for heap profiling. A backtrace can only be discarded if the - opt.prof_accum - option is disabled, and no thread caches currently refer to the - backtrace. Therefore, a backtrace cache limit should be imposed if the - intention is to limit how much memory is used by backtraces. By - default, no limit is imposed (encoded as -1). - + cumulative counts are not always of interest. This option is enabled + by default. diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index 98f96546..ad8bcd2c 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -12,7 +12,9 @@ typedef struct prof_tdata_s prof_tdata_t; #define LG_PROF_BT_MAX_DEFAULT 7 #define LG_PROF_SAMPLE_DEFAULT 0 #define LG_PROF_INTERVAL_DEFAULT -1 -#define LG_PROF_TCMAX_DEFAULT -1 + +/* Maximum number of backtraces to store in each per thread LRU cache. */ +#define PROF_TCMAX 1024 /* * Hard limit on stack backtrace depth. Note that the version of @@ -167,7 +169,6 @@ extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */ extern bool opt_prof_gdump; /* High-water memory dumping. */ extern bool opt_prof_leak; /* Dump leak summary at exit. */ extern bool opt_prof_accum; /* Report cumulative bytes. */ -extern ssize_t opt_lg_prof_tcmax; /* lg(max per thread bactrace cache) */ extern char opt_prof_prefix[PATH_MAX + 1]; /* diff --git a/src/ctl.c b/src/ctl.c index e33ce67d..12b41857 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -80,7 +80,6 @@ CTL_PROTO(opt_lg_prof_interval) CTL_PROTO(opt_prof_gdump) CTL_PROTO(opt_prof_leak) CTL_PROTO(opt_prof_accum) -CTL_PROTO(opt_lg_prof_tcmax) CTL_PROTO(arenas_bin_i_size) CTL_PROTO(arenas_bin_i_nregs) CTL_PROTO(arenas_bin_i_run_size) @@ -222,8 +221,7 @@ static const ctl_node_t opt_node[] = { {NAME("lg_prof_interval"), CTL(opt_lg_prof_interval)}, {NAME("prof_gdump"), CTL(opt_prof_gdump)}, {NAME("prof_leak"), CTL(opt_prof_leak)}, - {NAME("prof_accum"), CTL(opt_prof_accum)}, - {NAME("lg_prof_tcmax"), CTL(opt_lg_prof_tcmax)} + {NAME("prof_accum"), CTL(opt_prof_accum)} }; static const ctl_node_t arenas_bin_i_node[] = { @@ -1133,7 +1131,6 @@ CTL_RO_NL_CGEN(config_prof, opt_lg_prof_interval, opt_lg_prof_interval, ssize_t) CTL_RO_NL_CGEN(config_prof, opt_prof_gdump, opt_prof_gdump, bool) CTL_RO_NL_CGEN(config_prof, opt_prof_leak, opt_prof_leak, bool) CTL_RO_NL_CGEN(config_prof, opt_prof_accum, opt_prof_accum, bool) -CTL_RO_NL_CGEN(config_prof, opt_lg_prof_tcmax, opt_lg_prof_tcmax, ssize_t) /******************************************************************************/ diff --git a/src/jemalloc.c b/src/jemalloc.c index 796c8158..d2a6009f 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -603,8 +603,6 @@ malloc_conf_init(void) CONF_HANDLE_SSIZE_T(lg_prof_sample, 0, (sizeof(uint64_t) << 3) - 1) CONF_HANDLE_BOOL(prof_accum) - CONF_HANDLE_SSIZE_T(lg_prof_tcmax, -1, - (sizeof(size_t) << 3) - 1) CONF_HANDLE_SSIZE_T(lg_prof_interval, -1, (sizeof(uint64_t) << 3) - 1) CONF_HANDLE_BOOL(prof_gdump) diff --git a/src/prof.c b/src/prof.c index 113cf15a..a4012f04 100644 --- a/src/prof.c +++ b/src/prof.c @@ -22,7 +22,6 @@ ssize_t opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT; bool opt_prof_gdump = false; bool opt_prof_leak = false; bool opt_prof_accum = true; -ssize_t opt_lg_prof_tcmax = LG_PROF_TCMAX_DEFAULT; char opt_prof_prefix[PATH_MAX + 1]; uint64_t prof_interval; @@ -519,8 +518,7 @@ prof_lookup(prof_bt_t *bt) prof_leave(); /* Link a prof_thd_cnt_t into ctx for this thread. */ - if (opt_lg_prof_tcmax >= 0 && ckh_count(&prof_tdata->bt2cnt) - == (ZU(1) << opt_lg_prof_tcmax)) { + if (ckh_count(&prof_tdata->bt2cnt) == PROF_TCMAX) { assert(ckh_count(&prof_tdata->bt2cnt) > 0); /* * Flush the least recently used cnt in order to keep @@ -535,9 +533,7 @@ prof_lookup(prof_bt_t *bt) prof_ctx_merge(ret.p->ctx, ret.p); /* ret can now be re-used. */ } else { - assert(opt_lg_prof_tcmax < 0 || - ckh_count(&prof_tdata->bt2cnt) < (ZU(1) << - opt_lg_prof_tcmax)); + assert(ckh_count(&prof_tdata->bt2cnt) < PROF_TCMAX); /* Allocate and partially initialize a new cnt. */ ret.v = imalloc(sizeof(prof_thr_cnt_t)); if (ret.p == NULL) { diff --git a/src/stats.c b/src/stats.c index 1e907823..86a48c60 100644 --- a/src/stats.c +++ b/src/stats.c @@ -515,7 +515,6 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, OPT_WRITE_BOOL(prof_active) OPT_WRITE_SSIZE_T(lg_prof_sample) OPT_WRITE_BOOL(prof_accum) - OPT_WRITE_SSIZE_T(lg_prof_tcmax) OPT_WRITE_SSIZE_T(lg_prof_interval) OPT_WRITE_BOOL(prof_gdump) OPT_WRITE_BOOL(prof_leak) @@ -622,17 +621,6 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, write_cb(cbopaque, u2s((1U << sv), 10, s)); write_cb(cbopaque, "\n"); - CTL_GET("opt.lg_prof_tcmax", &ssv, ssize_t); - write_cb(cbopaque, - "Maximum per thread backtrace cache: "); - if (ssv >= 0) { - write_cb(cbopaque, u2s((1U << ssv), 10, s)); - write_cb(cbopaque, " (2^"); - write_cb(cbopaque, u2s(ssv, 10, s)); - write_cb(cbopaque, ")\n"); - } else - write_cb(cbopaque, "N/A\n"); - CTL_GET("opt.lg_prof_sample", &sv, size_t); write_cb(cbopaque, "Average profile sample interval: "); write_cb(cbopaque, u2s((((uint64_t)1U) << sv), 10, s)); From 5389146191b279ca3b90028357dd6ad66b283def Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 13 Feb 2012 18:23:41 -0800 Subject: [PATCH 0052/3378] Remove the opt.lg_prof_bt_max option. Remove opt.lg_prof_bt_max, and hard code it to 7. The original intention of this option was to enable faster backtracing by limiting backtrace depth. However, this makes graphical pprof output very difficult to interpret. In practice, decreasing sampling frequency is a better mechanism for limiting profiling overhead. --- doc/jemalloc.xml.in | 16 ---------------- include/jemalloc/internal/prof.h | 30 ++++++++++-------------------- src/ctl.c | 3 --- src/jemalloc.c | 2 -- src/prof.c | 22 ++++++++-------------- src/stats.c | 6 ------ 6 files changed, 18 insertions(+), 61 deletions(-) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 2e5f10e3..1e8c8005 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -930,8 +930,6 @@ malloc_conf = "xmalloc:true";]]> where <prefix> is controlled by the opt.prof_prefix option. See the opt.lg_prof_bt_max - option for backtrace depth control. See the opt.prof_active option for on-the-fly activation/deactivation. See the opt.lg_prof_sample @@ -962,17 +960,6 @@ malloc_conf = "xmalloc:true";]]> jeprof. - - - opt.lg_prof_bt_max - (size_t) - r- - [] - - Maximum backtrace depth (log base 2) when profiling - memory allocation activity. The default is 128 (2^7). - - opt.prof_active @@ -1067,9 +1054,6 @@ malloc_conf = "xmalloc:true";]]> atexit 3 function to report memory leaks detected by allocation sampling. See the - opt.lg_prof_bt_max - option for backtrace depth control. See the opt.prof option for information on analyzing heap profile output. This option is disabled by default. diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index ad8bcd2c..744d361e 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -9,25 +9,19 @@ typedef struct prof_tdata_s prof_tdata_t; /* Option defaults. */ #define PROF_PREFIX_DEFAULT "jeprof" -#define LG_PROF_BT_MAX_DEFAULT 7 #define LG_PROF_SAMPLE_DEFAULT 0 #define LG_PROF_INTERVAL_DEFAULT -1 +/* + * Hard limit on stack backtrace depth. The version of prof_backtrace() that + * is based on __builtin_return_address() necessarily has a hard-coded number + * of backtrace frame handlers, and should be kept in sync with this setting. + */ +#define PROF_BT_MAX 128 + /* Maximum number of backtraces to store in each per thread LRU cache. */ #define PROF_TCMAX 1024 -/* - * Hard limit on stack backtrace depth. Note that the version of - * prof_backtrace() that is based on __builtin_return_address() necessarily has - * a hard-coded number of backtrace frame handlers. - */ -#if (defined(JEMALLOC_PROF_LIBGCC) || defined(JEMALLOC_PROF_LIBUNWIND)) -# define LG_PROF_BT_MAX ((ZU(1) << (LG_SIZEOF_PTR+3)) - 1) -#else -# define LG_PROF_BT_MAX 7 /* >= LG_PROF_BT_MAX_DEFAULT */ -#endif -#define PROF_BT_MAX (1U << LG_PROF_BT_MAX) - /* Initial hash table size. */ #define PROF_CKH_MINITEMS 64 @@ -163,7 +157,6 @@ extern bool opt_prof; * to notice state changes. */ extern bool opt_prof_active; -extern size_t opt_lg_prof_bt_max; /* Maximum backtrace depth. */ extern size_t opt_lg_prof_sample; /* Mean bytes between samples. */ extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */ extern bool opt_prof_gdump; /* High-water memory dumping. */ @@ -186,9 +179,6 @@ extern uint64_t prof_interval; */ extern bool prof_promote; -/* (1U << opt_lg_prof_bt_max). */ -extern unsigned prof_bt_max; - /* Thread-specific backtrace cache, used to reduce bt2ctx contention. */ #ifndef NO_TLS extern __thread prof_tdata_t *prof_tdata_tls @@ -213,7 +203,7 @@ extern __thread prof_tdata_t *prof_tdata_tls extern pthread_key_t prof_tdata_tsd; void bt_init(prof_bt_t *bt, void **vec); -void prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max); +void prof_backtrace(prof_bt_t *bt, unsigned nignore); prof_thr_cnt_t *prof_lookup(prof_bt_t *bt); void prof_idump(void); bool prof_mdump(const char *filename); @@ -249,7 +239,7 @@ bool prof_boot2(void); /* Don't bother with sampling logic, since sampling */\ /* interval is 1. */\ bt_init(&bt, prof_tdata->vec); \ - prof_backtrace(&bt, nignore, prof_bt_max); \ + prof_backtrace(&bt, nignore); \ ret = prof_lookup(&bt); \ } else { \ if (prof_tdata->threshold == 0) { \ @@ -272,7 +262,7 @@ bool prof_boot2(void); if (size >= prof_tdata->threshold - \ prof_tdata->accum) { \ bt_init(&bt, prof_tdata->vec); \ - prof_backtrace(&bt, nignore, prof_bt_max); \ + prof_backtrace(&bt, nignore); \ ret = prof_lookup(&bt); \ } else \ ret = (prof_thr_cnt_t *)(uintptr_t)1U; \ diff --git a/src/ctl.c b/src/ctl.c index 12b41857..4938e10a 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -74,7 +74,6 @@ CTL_PROTO(opt_lg_tcache_gc_sweep) CTL_PROTO(opt_prof) CTL_PROTO(opt_prof_prefix) CTL_PROTO(opt_prof_active) -CTL_PROTO(opt_lg_prof_bt_max) CTL_PROTO(opt_lg_prof_sample) CTL_PROTO(opt_lg_prof_interval) CTL_PROTO(opt_prof_gdump) @@ -216,7 +215,6 @@ static const ctl_node_t opt_node[] = { {NAME("prof"), CTL(opt_prof)}, {NAME("prof_prefix"), CTL(opt_prof_prefix)}, {NAME("prof_active"), CTL(opt_prof_active)}, - {NAME("lg_prof_bt_max"), CTL(opt_lg_prof_bt_max)}, {NAME("lg_prof_sample"), CTL(opt_lg_prof_sample)}, {NAME("lg_prof_interval"), CTL(opt_lg_prof_interval)}, {NAME("prof_gdump"), CTL(opt_prof_gdump)}, @@ -1125,7 +1123,6 @@ CTL_RO_NL_CGEN(config_tcache, opt_lg_tcache_gc_sweep, opt_lg_tcache_gc_sweep, CTL_RO_NL_CGEN(config_prof, opt_prof, opt_prof, bool) CTL_RO_NL_CGEN(config_prof, opt_prof_prefix, opt_prof_prefix, const char *) CTL_RO_CGEN(config_prof, opt_prof_active, opt_prof_active, bool) /* Mutable. */ -CTL_RO_NL_CGEN(config_prof, opt_lg_prof_bt_max, opt_lg_prof_bt_max, size_t) CTL_RO_NL_CGEN(config_prof, opt_lg_prof_sample, opt_lg_prof_sample, size_t) CTL_RO_NL_CGEN(config_prof, opt_lg_prof_interval, opt_lg_prof_interval, ssize_t) CTL_RO_NL_CGEN(config_prof, opt_prof_gdump, opt_prof_gdump, bool) diff --git a/src/jemalloc.c b/src/jemalloc.c index d2a6009f..81829fe7 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -597,8 +597,6 @@ malloc_conf_init(void) if (config_prof) { CONF_HANDLE_BOOL(prof) CONF_HANDLE_CHAR_P(prof_prefix, "jeprof") - CONF_HANDLE_SIZE_T(lg_prof_bt_max, 0, - LG_PROF_BT_MAX) CONF_HANDLE_BOOL(prof_active) CONF_HANDLE_SSIZE_T(lg_prof_sample, 0, (sizeof(uint64_t) << 3) - 1) diff --git a/src/prof.c b/src/prof.c index a4012f04..21b60e3e 100644 --- a/src/prof.c +++ b/src/prof.c @@ -16,7 +16,6 @@ bool opt_prof = false; bool opt_prof_active = true; -size_t opt_lg_prof_bt_max = LG_PROF_BT_MAX_DEFAULT; size_t opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT; ssize_t opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT; bool opt_prof_gdump = false; @@ -27,8 +26,6 @@ char opt_prof_prefix[PATH_MAX + 1]; uint64_t prof_interval; bool prof_promote; -unsigned prof_bt_max; - #ifndef NO_TLS __thread prof_tdata_t *prof_tdata_tls JEMALLOC_ATTR(tls_model("initial-exec")); @@ -179,7 +176,7 @@ prof_leave(void) #ifdef JEMALLOC_PROF_LIBUNWIND void -prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) +prof_backtrace(prof_bt_t *bt, unsigned nignore) { unw_context_t uc; unw_cursor_t cursor; @@ -189,7 +186,6 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) cassert(config_prof); assert(bt->len == 0); assert(bt->vec != NULL); - assert(max <= (1U << opt_lg_prof_bt_max)); unw_getcontext(&uc); unw_init_local(&cursor, &uc); @@ -205,7 +201,7 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) * Iterate over stack frames until there are no more, or until no space * remains in bt. */ - for (i = 0; i < max; i++) { + for (i = 0; i < PROF_BT_MAX; i++) { unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *)&bt->vec[i]); bt->len++; err = unw_step(&cursor); @@ -243,9 +239,9 @@ prof_unwind_callback(struct _Unwind_Context *context, void *arg) } void -prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) +prof_backtrace(prof_bt_t *bt, unsigned nignore) { - prof_unwind_data_t data = {bt, nignore, max}; + prof_unwind_data_t data = {bt, nignore, PROF_BT_MAX}; cassert(config_prof); @@ -253,10 +249,10 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) } #elif (defined(JEMALLOC_PROF_GCC)) void -prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) +prof_backtrace(prof_bt_t *bt, unsigned nignore) { #define BT_FRAME(i) \ - if ((i) < nignore + max) { \ + if ((i) < nignore + PROF_BT_MAX) { \ void *p; \ if (__builtin_frame_address(i) == 0) \ return; \ @@ -272,7 +268,6 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) cassert(config_prof); assert(nignore <= 3); - assert(max <= (1U << opt_lg_prof_bt_max)); BT_FRAME(0) BT_FRAME(1) @@ -423,7 +418,7 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) } #else void -prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) +prof_backtrace(prof_bt_t *bt, unsigned nignore) { cassert(config_prof); @@ -1168,7 +1163,7 @@ prof_tdata_init(void) } ql_new(&prof_tdata->lru_ql); - prof_tdata->vec = imalloc(sizeof(void *) * prof_bt_max); + prof_tdata->vec = imalloc(sizeof(void *) * PROF_BT_MAX); if (prof_tdata->vec == NULL) { ckh_delete(&prof_tdata->bt2cnt); idalloc(prof_tdata); @@ -1270,7 +1265,6 @@ prof_boot2(void) abort(); } - prof_bt_max = (1U << opt_lg_prof_bt_max); if (malloc_mutex_init(&prof_dump_seq_mtx)) return (true); diff --git a/src/stats.c b/src/stats.c index 86a48c60..6d9ba9d0 100644 --- a/src/stats.c +++ b/src/stats.c @@ -511,7 +511,6 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, OPT_WRITE_SSIZE_T(lg_tcache_max) OPT_WRITE_BOOL(prof) OPT_WRITE_CHAR_P(prof_prefix) - OPT_WRITE_SIZE_T(lg_prof_bt_max) OPT_WRITE_BOOL(prof_active) OPT_WRITE_SSIZE_T(lg_prof_sample) OPT_WRITE_BOOL(prof_accum) @@ -616,11 +615,6 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, } if ((err = JEMALLOC_P(mallctl)("opt.prof", &bv, &bsz, NULL, 0)) == 0 && bv) { - CTL_GET("opt.lg_prof_bt_max", &sv, size_t); - write_cb(cbopaque, "Maximum profile backtrace depth: "); - write_cb(cbopaque, u2s((1U << sv), 10, s)); - write_cb(cbopaque, "\n"); - CTL_GET("opt.lg_prof_sample", &sv, size_t); write_cb(cbopaque, "Average profile sample interval: "); write_cb(cbopaque, u2s((((uint64_t)1U) << sv), 10, s)); From b172610317babc7f365584ddd7fdaf4eb8d9d04c Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 28 Feb 2012 16:50:47 -0800 Subject: [PATCH 0053/3378] Simplify small size class infrastructure. Program-generate small size class tables for all valid combinations of LG_TINY_MIN, LG_QUANTUM, and PAGE_SHIFT. Use the appropriate table to generate all relevant data structures, and remove the distinction between tiny/quantum/cacheline/subpage bins. Remove --enable-dynamic-page-shift. This option didn't prove useful in practice, and it prevented optimizations. Add Tilera architecture support. --- .gitignore | 1 + INSTALL | 8 - configure.ac | 24 +- doc/jemalloc.xml.in | 198 ++-------- include/jemalloc/internal/arena.h | 98 +---- include/jemalloc/internal/atomic.h | 4 +- include/jemalloc/internal/ctl.h | 2 +- .../jemalloc/internal/jemalloc_internal.h.in | 98 ++--- include/jemalloc/internal/mb.h | 9 +- include/jemalloc/internal/size_classes.sh | 132 +++++++ include/jemalloc/internal/tcache.h | 16 +- include/jemalloc/jemalloc_defs.h.in | 3 - src/arena.c | 367 ++---------------- src/ctl.c | 69 +--- src/jemalloc.c | 53 +-- src/stats.c | 72 +--- src/tcache.c | 31 +- 17 files changed, 327 insertions(+), 858 deletions(-) create mode 100755 include/jemalloc/internal/size_classes.sh diff --git a/.gitignore b/.gitignore index 1a9bb068..e6e8bb00 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,7 @@ /lib/ /Makefile /include/jemalloc/internal/jemalloc_internal\.h +/include/jemalloc/internal/size_classes\.h /include/jemalloc/jemalloc\.h /include/jemalloc/jemalloc_defs\.h /test/jemalloc_test\.h diff --git a/INSTALL b/INSTALL index 9124ac34..0ddcacb7 100644 --- a/INSTALL +++ b/INSTALL @@ -113,14 +113,6 @@ any of the following arguments (not a definitive list) to 'configure': rather than a minimal allocation. See the "opt.sysv" option documentation for usage details. ---enable-dynamic-page-shift - Under most conditions, the system page size never changes (usually 4KiB or - 8KiB, depending on architecture and configuration), and unless this option - is enabled, jemalloc assumes that page size can safely be determined during - configuration and hard-coded. Enabling dynamic page size determination has - a measurable impact on performance, since the compiler is forced to load - the page size from memory rather than embedding immediate values. - --enable-lazy-lock Enable code that wraps pthread_create() to detect when an application switches from single-threaded to multi-threaded mode, so that it can avoid diff --git a/configure.ac b/configure.ac index fdbf1bad..91caef46 100644 --- a/configure.ac +++ b/configure.ac @@ -367,8 +367,10 @@ cfgoutputs_tup="${cfgoutputs_tup} include/jemalloc/internal/jemalloc_internal.h" cfgoutputs_tup="${cfgoutputs_tup} test/jemalloc_test.h:test/jemalloc_test.h.in" cfghdrs_in="${srcroot}include/jemalloc/jemalloc_defs.h.in" +cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/internal/size_classes.sh" cfghdrs_out="include/jemalloc/jemalloc_defs${install_suffix}.h" +cfghdrs_out="${cfghdrs_out} include/jemalloc/internal/size_classes.h" cfghdrs_tup="include/jemalloc/jemalloc_defs${install_suffix}.h:include/jemalloc/jemalloc_defs.h.in" @@ -640,23 +642,6 @@ if test "x$enable_sysv" = "x1" ; then fi AC_SUBST([enable_sysv]) -dnl Do not determine page shift at run time by default. -AC_ARG_ENABLE([dynamic_page_shift], - [AS_HELP_STRING([--enable-dynamic-page-shift], - [Determine page size at run time (don't trust configure result)])], -[if test "x$enable_dynamic_page_shift" = "xno" ; then - enable_dynamic_page_shift="0" -else - enable_dynamic_page_shift="1" -fi -], -[enable_dynamic_page_shift="0"] -) -if test "x$enable_dynamic_page_shift" = "x1" ; then - AC_DEFINE([DYNAMIC_PAGE_SHIFT], [ ]) -fi -AC_SUBST([enable_dynamic_page_shift]) - AC_MSG_CHECKING([STATIC_PAGE_SHIFT]) AC_RUN_IFELSE([AC_LANG_PROGRAM( [[#include @@ -866,6 +851,11 @@ dnl ============================================================================ dnl Check for typedefs, structures, and compiler characteristics. AC_HEADER_STDBOOL +AC_CONFIG_COMMANDS([include/jemalloc/internal/size_classes.h], [ + mkdir -p "include/jemalloc/internal" + "${srcdir}/include/jemalloc/internal/size_classes.sh" > "${objroot}include/jemalloc/internal/size_classes.h" +]) + dnl Process .in files. AC_SUBST([cfghdrs_in]) AC_SUBST([cfghdrs_out]) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 1e8c8005..cfe120fb 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -458,20 +458,11 @@ for (i = 0; i < nbins; i++) { a frontier and free list to track which regions are in use. Allocation requests that are no more than half the quantum (8 or 16, depending on architecture) are rounded up to the nearest power of two that is at least - sizeof(void *). Allocation requests - that are more than half the quantum, but no more than the minimum - cacheline-multiple size class (see the opt.lg_qspace_max - option) are rounded up to the nearest multiple of the quantum. Allocation - requests that are more than the minimum cacheline-multiple size class, but - no more than the minimum subpage-multiple size class (see the opt.lg_cspace_max - option) are rounded up to the nearest multiple of the cacheline size (64). - Allocation requests that are more than the minimum subpage-multiple size - class, but no more than the maximum subpage-multiple size class are rounded - up to the nearest multiple of the subpage size (256). Allocation requests - that are more than the maximum subpage-multiple size class, but small - enough to fit in an arena-managed chunk (see the sizeof(double). All other small + object size classes are multiples of the quantum, spaced such that internal + fragmentation is limited to approximately 25% for all but the smallest size + classes. Allocation requests that are larger than the maximum small size + class, but small enough to fit in an arena-managed chunk (see the opt.lg_chunk option), are rounded up to the nearest run size. Allocation requests that are too large to fit in an arena-managed chunk are rounded up to the nearest multiple of @@ -507,16 +498,28 @@ for (i = 0; i < nbins; i++) { [8] - Quantum-spaced + 16-spaced [16, 32, 48, ..., 128] - Cacheline-spaced - [192, 256, 320, ..., 512] + 32-spaced + [160, 192, 224, 256] - Subpage-spaced - [768, 1024, 1280, ..., 3840] + 64-spaced + [320, 384, 448, 512] + + + 128-spaced + [640, 768, 896, 1024] + + + 256-spaced + [1280, 1536, 1792, 2048] + + + 512-spaced + [2560, 3072, 3584] Large @@ -714,30 +717,6 @@ for (i = 0; i < nbins; i++) { - - - opt.lg_qspace_max - (size_t) - r- - - Size (log base 2) of the maximum size class that is a - multiple of the quantum (8 or 16 bytes, depending on architecture). - Above this size, cacheline spacing is used for size classes. The - default value is 128 bytes (2^7). - - - - - opt.lg_cspace_max - (size_t) - r- - - Size (log base 2) of the maximum size class that is a - multiple of the cacheline size (64). Above this size, subpage spacing - (256 bytes) is used for size classes. The default value is 512 bytes - (2^9). - - opt.lg_chunk @@ -1178,24 +1157,6 @@ malloc_conf = "xmalloc:true";]]> Quantum size. - - - arenas.cacheline - (size_t) - r- - - Assumed cacheline size. - - - - - arenas.subpage - (size_t) - r- - - Subpage size class interval. - - arenas.pagesize @@ -1214,80 +1175,6 @@ malloc_conf = "xmalloc:true";]]> Chunk size. - - - arenas.tspace_min - (size_t) - r- - - Minimum tiny size class. Tiny size classes are powers - of two. - - - - - arenas.tspace_max - (size_t) - r- - - Maximum tiny size class. Tiny size classes are powers - of two. - - - - - arenas.qspace_min - (size_t) - r- - - Minimum quantum-spaced size class. - - - - - arenas.qspace_max - (size_t) - r- - - Maximum quantum-spaced size class. - - - - - arenas.cspace_min - (size_t) - r- - - Minimum cacheline-spaced size class. - - - - - arenas.cspace_max - (size_t) - r- - - Maximum cacheline-spaced size class. - - - - - arenas.sspace_min - (size_t) - r- - - Minimum subpage-spaced size class. - - - - - arenas.sspace_max - (size_t) - r- - - Maximum subpage-spaced size class. - - arenas.tcache_max @@ -1298,52 +1185,13 @@ malloc_conf = "xmalloc:true";]]> Maximum thread-cached size class. - - - arenas.ntbins - (unsigned) - r- - - Number of tiny bin size classes. - - - - - arenas.nqbins - (unsigned) - r- - - Number of quantum-spaced bin size - classes. - - - - - arenas.ncbins - (unsigned) - r- - - Number of cacheline-spaced bin size - classes. - - - - - arenas.nsbins - (unsigned) - r- - - Number of subpage-spaced bin size - classes. - - arenas.nbins (unsigned) r- - Total number of bin size classes. + Number of bin size classes. diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 4a87ef54..16c2b1e6 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -1,39 +1,6 @@ /******************************************************************************/ #ifdef JEMALLOC_H_TYPES -/* - * Subpages are an artificially designated partitioning of pages. Their only - * purpose is to support subpage-spaced size classes. - * - * There must be at least 4 subpages per page, due to the way size classes are - * handled. - */ -#define LG_SUBPAGE 8 -#define SUBPAGE ((size_t)(1U << LG_SUBPAGE)) -#define SUBPAGE_MASK (SUBPAGE - 1) - -/* Return the smallest subpage multiple that is >= s. */ -#define SUBPAGE_CEILING(s) \ - (((s) + SUBPAGE_MASK) & ~SUBPAGE_MASK) - -/* Smallest size class to support. */ -#define LG_TINY_MIN 3 -#define TINY_MIN (1U << LG_TINY_MIN) - -/* - * Maximum size class that is a multiple of the quantum, but not (necessarily) - * a power of 2. Above this size, allocations are rounded up to the nearest - * power of 2. - */ -#define LG_QSPACE_MAX_DEFAULT 7 - -/* - * Maximum size class that is a multiple of the cacheline, but not (necessarily) - * a power of 2. Above this size, allocations are rounded up to the nearest - * power of 2. - */ -#define LG_CSPACE_MAX_DEFAULT 9 - /* * RUN_MAX_OVRHD indicates maximum desired run header overhead. Runs are sized * as small as possible such that this setting is still honored, without @@ -364,75 +331,26 @@ struct arena_s { arena_avail_tree_t runs_avail_clean; arena_avail_tree_t runs_avail_dirty; - /* - * bins is used to store trees of free regions of the following sizes, - * assuming a 64-bit system with 16-byte quantum, 4 KiB page size, and - * default MALLOC_CONF. - * - * bins[i] | size | - * --------+--------+ - * 0 | 8 | - * --------+--------+ - * 1 | 16 | - * 2 | 32 | - * 3 | 48 | - * : : - * 6 | 96 | - * 7 | 112 | - * 8 | 128 | - * --------+--------+ - * 9 | 192 | - * 10 | 256 | - * 11 | 320 | - * 12 | 384 | - * 13 | 448 | - * 14 | 512 | - * --------+--------+ - * 15 | 768 | - * 16 | 1024 | - * 17 | 1280 | - * : : - * 25 | 3328 | - * 26 | 3584 | - * 27 | 3840 | - * --------+--------+ - */ - arena_bin_t bins[1]; /* Dynamically sized. */ + /* bins is used to store trees of free regions. */ + arena_bin_t bins[NBINS]; }; #endif /* JEMALLOC_H_STRUCTS */ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -extern size_t opt_lg_qspace_max; -extern size_t opt_lg_cspace_max; extern ssize_t opt_lg_dirty_mult; /* * small_size2bin is a compact lookup table that rounds request sizes up to * size classes. In order to reduce cache footprint, the table is compressed, * and all accesses are via the SMALL_SIZE2BIN macro. */ -extern uint8_t const *small_size2bin; +extern uint8_t const small_size2bin[]; #define SMALL_SIZE2BIN(s) (small_size2bin[(s-1) >> LG_TINY_MIN]) -extern arena_bin_info_t *arena_bin_info; - -/* Various bin-related settings. */ - /* Number of (2^n)-spaced tiny bins. */ -#define ntbins ((unsigned)(LG_QUANTUM - LG_TINY_MIN)) -extern unsigned nqbins; /* Number of quantum-spaced bins. */ -extern unsigned ncbins; /* Number of cacheline-spaced bins. */ -extern unsigned nsbins; /* Number of subpage-spaced bins. */ -extern unsigned nbins; -#define tspace_max ((size_t)(QUANTUM >> 1)) -#define qspace_min QUANTUM -extern size_t qspace_max; -extern size_t cspace_min; -extern size_t cspace_max; -extern size_t sspace_min; -extern size_t sspace_max; -#define small_maxclass sspace_max +extern arena_bin_info_t arena_bin_info[NBINS]; +/* Number of large size classes. */ #define nlclasses (chunk_npages - map_bias) void arena_purge_all(arena_t *arena); @@ -457,7 +375,7 @@ void *arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, void *arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, size_t alignment, bool zero); bool arena_new(arena_t *arena, unsigned ind); -bool arena_boot(void); +void arena_boot(void); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ @@ -478,7 +396,7 @@ JEMALLOC_INLINE size_t arena_bin_index(arena_t *arena, arena_bin_t *bin) { size_t binind = bin - arena->bins; - assert(binind < nbins); + assert(binind < NBINS); return (binind); } @@ -633,7 +551,7 @@ arena_malloc(size_t size, bool zero) assert(size != 0); assert(QUANTUM_CEILING(size) <= arena_maxclass); - if (size <= small_maxclass) { + if (size <= SMALL_MAXCLASS) { if ((tcache = tcache_get()) != NULL) return (tcache_alloc_small(tcache, size, zero)); else diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h index 9a298623..8c685939 100644 --- a/include/jemalloc/internal/atomic.h +++ b/include/jemalloc/internal/atomic.h @@ -70,7 +70,7 @@ atomic_sub_uint64(uint64_t *p, uint64_t x) return (OSAtomicAdd64(-((int64_t)x), (int64_t *)p)); } -#elif (defined(__amd64_) || defined(__x86_64__)) +#elif (defined(__amd64__) || defined(__x86_64__)) JEMALLOC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x) { @@ -133,7 +133,7 @@ atomic_sub_uint32(uint32_t *p, uint32_t x) return (OSAtomicAdd32(-((int32_t)x), (int32_t *)p)); } -#elif (defined(__i386__) || defined(__amd64_) || defined(__x86_64__)) +#elif (defined(__i386__) || defined(__amd64__) || defined(__x86_64__)) JEMALLOC_INLINE uint32_t atomic_add_uint32(uint32_t *p, uint32_t x) { diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h index de4b9412..28be2aef 100644 --- a/include/jemalloc/internal/ctl.h +++ b/include/jemalloc/internal/ctl.h @@ -40,7 +40,7 @@ struct ctl_arena_stats_s { uint64_t ndalloc_small; uint64_t nrequests_small; - malloc_bin_stats_t *bstats; /* nbins elements. */ + malloc_bin_stats_t bstats[NBINS]; malloc_large_stats_t *lstats; /* nlclasses elements. */ }; diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 971336ec..f43fcd20 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -229,33 +229,48 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); /* Size of stack-allocated buffer passed to buferror(). */ #define BUFERROR_BUF 64 -/* Minimum alignment of allocations is 2^LG_QUANTUM bytes. */ -#ifdef __i386__ -# define LG_QUANTUM 4 -#endif -#ifdef __ia64__ -# define LG_QUANTUM 4 -#endif -#ifdef __alpha__ -# define LG_QUANTUM 4 -#endif -#ifdef __sparc64__ -# define LG_QUANTUM 4 -#endif -#if (defined(__amd64__) || defined(__x86_64__)) -# define LG_QUANTUM 4 -#endif -#ifdef __arm__ -# define LG_QUANTUM 3 -#endif -#ifdef __mips__ -# define LG_QUANTUM 3 -#endif -#ifdef __powerpc__ -# define LG_QUANTUM 4 -#endif -#ifdef __s390x__ -# define LG_QUANTUM 4 +/* Smallest size class to support. */ +#define LG_TINY_MIN 3 +#define TINY_MIN (1U << LG_TINY_MIN) + +/* + * Minimum alignment of allocations is 2^LG_QUANTUM bytes (ignoring tiny size + * classes). + */ +#ifndef LG_QUANTUM +# ifdef __i386__ +# define LG_QUANTUM 4 +# endif +# ifdef __ia64__ +# define LG_QUANTUM 4 +# endif +# ifdef __alpha__ +# define LG_QUANTUM 4 +# endif +# ifdef __sparc64__ +# define LG_QUANTUM 4 +# endif +# if (defined(__amd64__) || defined(__x86_64__)) +# define LG_QUANTUM 4 +# endif +# ifdef __arm__ +# define LG_QUANTUM 3 +# endif +# ifdef __mips__ +# define LG_QUANTUM 3 +# endif +# ifdef __powerpc__ +# define LG_QUANTUM 4 +# endif +# ifdef __s390x__ +# define LG_QUANTUM 4 +# endif +# ifdef __tile__ +# define LG_QUANTUM 4 +# endif +# ifndef LG_QUANTUM +# error "No LG_QUANTUM definition for architecture; specify via CPPFLAGS" +# endif #endif #define QUANTUM ((size_t)(1U << LG_QUANTUM)) @@ -291,15 +306,9 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); #define CACHELINE_CEILING(s) \ (((s) + CACHELINE_MASK) & ~CACHELINE_MASK) -/* - * Page size. STATIC_PAGE_SHIFT is determined by the configure script. If - * DYNAMIC_PAGE_SHIFT is enabled, only use the STATIC_PAGE_* macros where - * compile-time values are required for the purposes of defining data - * structures. - */ +/* Page size. STATIC_PAGE_SHIFT is determined by the configure script. */ #define STATIC_PAGE_SIZE ((size_t)(1U << STATIC_PAGE_SHIFT)) #define STATIC_PAGE_MASK ((size_t)(STATIC_PAGE_SIZE - 1)) - #ifdef PAGE_SHIFT # undef PAGE_SHIFT #endif @@ -309,16 +318,9 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); #ifdef PAGE_MASK # undef PAGE_MASK #endif - -#ifdef DYNAMIC_PAGE_SHIFT -# define PAGE_SHIFT lg_pagesize -# define PAGE_SIZE pagesize -# define PAGE_MASK pagesize_mask -#else -# define PAGE_SHIFT STATIC_PAGE_SHIFT -# define PAGE_SIZE STATIC_PAGE_SIZE -# define PAGE_MASK STATIC_PAGE_MASK -#endif +#define PAGE_SHIFT STATIC_PAGE_SHIFT +#define PAGE_SIZE STATIC_PAGE_SIZE +#define PAGE_MASK STATIC_PAGE_MASK /* Return the smallest pagesize multiple that is >= s. */ #define PAGE_CEILING(s) \ @@ -327,6 +329,7 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); #include "jemalloc/internal/atomic.h" #include "jemalloc/internal/prn.h" #include "jemalloc/internal/ckh.h" +#include "jemalloc/internal/size_classes.h" #include "jemalloc/internal/stats.h" #include "jemalloc/internal/ctl.h" #include "jemalloc/internal/mutex.h" @@ -352,6 +355,7 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); #include "jemalloc/internal/atomic.h" #include "jemalloc/internal/prn.h" #include "jemalloc/internal/ckh.h" +#include "jemalloc/internal/size_classes.h" #include "jemalloc/internal/stats.h" #include "jemalloc/internal/ctl.h" #include "jemalloc/internal/mutex.h" @@ -455,6 +459,7 @@ void jemalloc_postfork(void); #include "jemalloc/internal/atomic.h" #include "jemalloc/internal/prn.h" #include "jemalloc/internal/ckh.h" +#include "jemalloc/internal/size_classes.h" #include "jemalloc/internal/stats.h" #include "jemalloc/internal/ctl.h" #include "jemalloc/internal/mutex.h" @@ -480,6 +485,7 @@ void jemalloc_postfork(void); #include "jemalloc/internal/atomic.h" #include "jemalloc/internal/prn.h" #include "jemalloc/internal/ckh.h" +#include "jemalloc/internal/size_classes.h" #include "jemalloc/internal/stats.h" #include "jemalloc/internal/ctl.h" #include "jemalloc/internal/mutex.h" @@ -525,7 +531,7 @@ JEMALLOC_INLINE size_t s2u(size_t size) { - if (size <= small_maxclass) + if (size <= SMALL_MAXCLASS) return (arena_bin_info[SMALL_SIZE2BIN(size)].reg_size); if (size <= arena_maxclass) return (PAGE_CEILING(size)); @@ -570,7 +576,7 @@ sa2u(size_t size, size_t alignment, size_t *run_size_p) } if (usize <= arena_maxclass && alignment <= PAGE_SIZE) { - if (usize <= small_maxclass) + if (usize <= SMALL_MAXCLASS) return (arena_bin_info[SMALL_SIZE2BIN(usize)].reg_size); return (PAGE_CEILING(usize)); } else { diff --git a/include/jemalloc/internal/mb.h b/include/jemalloc/internal/mb.h index dc9f2a54..3cfa7872 100644 --- a/include/jemalloc/internal/mb.h +++ b/include/jemalloc/internal/mb.h @@ -54,7 +54,7 @@ mb_write(void) ); #endif } -#elif (defined(__amd64_) || defined(__x86_64__)) +#elif (defined(__amd64__) || defined(__x86_64__)) JEMALLOC_INLINE void mb_write(void) { @@ -87,6 +87,13 @@ mb_write(void) : "memory" /* Clobbers. */ ); } +#elif defined(__tile__) +JEMALLOC_INLINE void +mb_write(void) +{ + + __sync_synchronize(); +} #else /* * This is much slower than a simple memory barrier, but the semantics of mutex diff --git a/include/jemalloc/internal/size_classes.sh b/include/jemalloc/internal/size_classes.sh new file mode 100755 index 00000000..d8306a58 --- /dev/null +++ b/include/jemalloc/internal/size_classes.sh @@ -0,0 +1,132 @@ +#!/bin/sh + +# The following limits are chosen such that they cover all supported platforms. + +# Range of quanta. +lg_qmin=3 +lg_qmax=4 + +# The range of tiny size classes is [2^lg_tmin..2^(lg_q-1)]. +lg_tmin=3 + +# Range of page sizes. +lg_pmin=12 +lg_pmax=16 + +function pow2() { + e=$1 + pow2_result=1 + while [ ${e} -gt 0 ] ; do + pow2_result=`expr ${pow2_result} + ${pow2_result}` + e=`expr ${e} - 1` + done +} + +cat < 255) +# error "Too many small size classes" +#endif + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ +EOF diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index 717682d7..b964a12e 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -91,7 +91,7 @@ extern __thread tcache_t *tcache_tls extern pthread_key_t tcache_tsd; /* - * Number of tcache bins. There are nbins small-object bins, plus 0 or more + * Number of tcache bins. There are NBINS small-object bins, plus 0 or more * large-object bins. */ extern size_t nhbins; @@ -181,7 +181,7 @@ tcache_event(tcache_t *tcache) * Flush (ceiling) 3/4 of the objects below the low * water mark. */ - if (binind < nbins) { + if (binind < NBINS) { tcache_bin_flush_small(tbin, binind, tbin->ncached - tbin->low_water + (tbin->low_water >> 2), tcache); @@ -238,7 +238,7 @@ tcache_alloc_small(tcache_t *tcache, size_t size, bool zero) tcache_bin_t *tbin; binind = SMALL_SIZE2BIN(size); - assert(binind < nbins); + assert(binind < NBINS); tbin = &tcache->tbins[binind]; ret = tcache_alloc_easy(tbin); if (ret == NULL) { @@ -275,7 +275,7 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) size = PAGE_CEILING(size); assert(size <= tcache_maxclass); - binind = nbins + (size >> PAGE_SHIFT) - 1; + binind = NBINS + (size >> PAGE_SHIFT) - 1; assert(binind < nhbins); tbin = &tcache->tbins[binind]; ret = tcache_alloc_easy(tbin); @@ -328,7 +328,7 @@ tcache_dalloc_small(tcache_t *tcache, void *ptr) size_t pageind, binind; arena_chunk_map_t *mapelm; - assert(arena_salloc(ptr) <= small_maxclass); + assert(arena_salloc(ptr) <= SMALL_MAXCLASS); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); arena = chunk->arena; @@ -339,7 +339,7 @@ tcache_dalloc_small(tcache_t *tcache, void *ptr) bin = run->bin; binind = ((uintptr_t)bin - (uintptr_t)&arena->bins) / sizeof(arena_bin_t); - assert(binind < nbins); + assert(binind < NBINS); if (config_fill && opt_junk) memset(ptr, 0x5a, arena_bin_info[binind].reg_size); @@ -367,13 +367,13 @@ tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size) tcache_bin_info_t *tbin_info; assert((size & PAGE_MASK) == 0); - assert(arena_salloc(ptr) > small_maxclass); + assert(arena_salloc(ptr) > SMALL_MAXCLASS); assert(arena_salloc(ptr) <= tcache_maxclass); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); arena = chunk->arena; pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; - binind = nbins + (size >> PAGE_SHIFT) - 1; + binind = NBINS + (size >> PAGE_SHIFT) - 1; if (config_fill && opt_junk) memset(ptr, 0x5a, size); diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index 66da6f3d..53e85208 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -104,9 +104,6 @@ /* Support lazy locking (avoid locking unless a second thread is launched). */ #undef JEMALLOC_LAZY_LOCK -/* Determine page size at run time if defined. */ -#undef DYNAMIC_PAGE_SHIFT - /* One page is 2^STATIC_PAGE_SHIFT bytes. */ #undef STATIC_PAGE_SHIFT diff --git a/src/arena.c b/src/arena.c index 33f3f85e..72b7f449 100644 --- a/src/arena.c +++ b/src/arena.c @@ -4,128 +4,38 @@ /******************************************************************************/ /* Data. */ -size_t opt_lg_qspace_max = LG_QSPACE_MAX_DEFAULT; -size_t opt_lg_cspace_max = LG_CSPACE_MAX_DEFAULT; ssize_t opt_lg_dirty_mult = LG_DIRTY_MULT_DEFAULT; -uint8_t const *small_size2bin; -arena_bin_info_t *arena_bin_info; +arena_bin_info_t arena_bin_info[NBINS]; -/* Various bin-related settings. */ -unsigned nqbins; -unsigned ncbins; -unsigned nsbins; -unsigned nbins; -size_t qspace_max; -size_t cspace_min; -size_t cspace_max; -size_t sspace_min; -size_t sspace_max; - -size_t lg_mspace; -size_t mspace_mask; - -/* - * const_small_size2bin is a static constant lookup table that in the common - * case can be used as-is for small_size2bin. - */ +JEMALLOC_ATTR(aligned(CACHELINE)) +const uint8_t small_size2bin[] = { #define S2B_8(i) i, #define S2B_16(i) S2B_8(i) S2B_8(i) #define S2B_32(i) S2B_16(i) S2B_16(i) #define S2B_64(i) S2B_32(i) S2B_32(i) #define S2B_128(i) S2B_64(i) S2B_64(i) #define S2B_256(i) S2B_128(i) S2B_128(i) -/* - * The number of elements in const_small_size2bin is dependent on the - * definition for SUBPAGE. - */ -static JEMALLOC_ATTR(aligned(CACHELINE)) - const uint8_t const_small_size2bin[] = { -#if (LG_QUANTUM == 4) -/* 16-byte quantum **********************/ - S2B_8(0) /* 8 */ - S2B_8(1) /* 16 */ -# define S2B_QMIN 1 - S2B_16(S2B_QMIN + 1) /* 32 */ - S2B_16(S2B_QMIN + 2) /* 48 */ - S2B_16(S2B_QMIN + 3) /* 64 */ - S2B_16(S2B_QMIN + 4) /* 80 */ - S2B_16(S2B_QMIN + 5) /* 96 */ - S2B_16(S2B_QMIN + 6) /* 112 */ - S2B_16(S2B_QMIN + 7) /* 128 */ -# define S2B_CMIN (S2B_QMIN + 8) -#else -/* 8-byte quantum ***********************/ -# define S2B_QMIN 0 - S2B_8(S2B_QMIN + 0) /* 8 */ - S2B_8(S2B_QMIN + 1) /* 16 */ - S2B_8(S2B_QMIN + 2) /* 24 */ - S2B_8(S2B_QMIN + 3) /* 32 */ - S2B_8(S2B_QMIN + 4) /* 40 */ - S2B_8(S2B_QMIN + 5) /* 48 */ - S2B_8(S2B_QMIN + 6) /* 56 */ - S2B_8(S2B_QMIN + 7) /* 64 */ - S2B_8(S2B_QMIN + 8) /* 72 */ - S2B_8(S2B_QMIN + 9) /* 80 */ - S2B_8(S2B_QMIN + 10) /* 88 */ - S2B_8(S2B_QMIN + 11) /* 96 */ - S2B_8(S2B_QMIN + 12) /* 104 */ - S2B_8(S2B_QMIN + 13) /* 112 */ - S2B_8(S2B_QMIN + 14) /* 120 */ - S2B_8(S2B_QMIN + 15) /* 128 */ -# define S2B_CMIN (S2B_QMIN + 16) -#endif -/****************************************/ - S2B_64(S2B_CMIN + 0) /* 192 */ - S2B_64(S2B_CMIN + 1) /* 256 */ - S2B_64(S2B_CMIN + 2) /* 320 */ - S2B_64(S2B_CMIN + 3) /* 384 */ - S2B_64(S2B_CMIN + 4) /* 448 */ - S2B_64(S2B_CMIN + 5) /* 512 */ -# define S2B_SMIN (S2B_CMIN + 6) - S2B_256(S2B_SMIN + 0) /* 768 */ - S2B_256(S2B_SMIN + 1) /* 1024 */ - S2B_256(S2B_SMIN + 2) /* 1280 */ - S2B_256(S2B_SMIN + 3) /* 1536 */ - S2B_256(S2B_SMIN + 4) /* 1792 */ - S2B_256(S2B_SMIN + 5) /* 2048 */ - S2B_256(S2B_SMIN + 6) /* 2304 */ - S2B_256(S2B_SMIN + 7) /* 2560 */ - S2B_256(S2B_SMIN + 8) /* 2816 */ - S2B_256(S2B_SMIN + 9) /* 3072 */ - S2B_256(S2B_SMIN + 10) /* 3328 */ - S2B_256(S2B_SMIN + 11) /* 3584 */ - S2B_256(S2B_SMIN + 12) /* 3840 */ -#if (STATIC_PAGE_SHIFT == 13) - S2B_256(S2B_SMIN + 13) /* 4096 */ - S2B_256(S2B_SMIN + 14) /* 4352 */ - S2B_256(S2B_SMIN + 15) /* 4608 */ - S2B_256(S2B_SMIN + 16) /* 4864 */ - S2B_256(S2B_SMIN + 17) /* 5120 */ - S2B_256(S2B_SMIN + 18) /* 5376 */ - S2B_256(S2B_SMIN + 19) /* 5632 */ - S2B_256(S2B_SMIN + 20) /* 5888 */ - S2B_256(S2B_SMIN + 21) /* 6144 */ - S2B_256(S2B_SMIN + 22) /* 6400 */ - S2B_256(S2B_SMIN + 23) /* 6656 */ - S2B_256(S2B_SMIN + 24) /* 6912 */ - S2B_256(S2B_SMIN + 25) /* 7168 */ - S2B_256(S2B_SMIN + 26) /* 7424 */ - S2B_256(S2B_SMIN + 27) /* 7680 */ - S2B_256(S2B_SMIN + 28) /* 7936 */ -#endif -}; -#undef S2B_1 -#undef S2B_2 -#undef S2B_4 +#define S2B_512(i) S2B_256(i) S2B_256(i) +#define S2B_1024(i) S2B_512(i) S2B_512(i) +#define S2B_2048(i) S2B_1024(i) S2B_1024(i) +#define S2B_4096(i) S2B_2048(i) S2B_2048(i) +#define S2B_8192(i) S2B_4096(i) S2B_4096(i) +#define SIZE_CLASS(bin, delta, size) \ + S2B_##delta(bin) + SIZE_CLASSES #undef S2B_8 #undef S2B_16 #undef S2B_32 #undef S2B_64 #undef S2B_128 #undef S2B_256 -#undef S2B_QMIN -#undef S2B_CMIN -#undef S2B_SMIN +#undef S2B_512 +#undef S2B_1024 +#undef S2B_2048 +#undef S2B_4096 +#undef S2B_8192 +#undef SIZE_CLASS +}; /******************************************************************************/ /* Function prototypes for non-inline static functions. */ @@ -160,12 +70,9 @@ static bool arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr, size_t oldsize, size_t size, size_t extra, bool zero); static bool arena_ralloc_large(void *ptr, size_t oldsize, size_t size, size_t extra, bool zero); -static bool small_size2bin_init(void); -static void small_size2bin_validate(void); -static bool small_size2bin_init_hard(void); static size_t bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size); -static bool bin_info_init(void); +static void bin_info_init(void); /******************************************************************************/ @@ -1368,7 +1275,7 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero) size_t binind; binind = SMALL_SIZE2BIN(size); - assert(binind < nbins); + assert(binind < NBINS); bin = &arena->bins[binind]; size = arena_bin_info[binind].reg_size; @@ -1553,12 +1460,12 @@ arena_prof_promoted(const void *ptr, size_t size) assert(ptr != NULL); assert(CHUNK_ADDR2BASE(ptr) != ptr); assert(isalloc(ptr) == PAGE_SIZE); - assert(size <= small_maxclass); + assert(size <= SMALL_MAXCLASS); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; binind = SMALL_SIZE2BIN(size); - assert(binind < nbins); + assert(binind < NBINS); chunk->map[pageind-map_bias].bits = (chunk->map[pageind-map_bias].bits & ~CHUNK_MAP_CLASS_MASK) | ((binind+1) << CHUNK_MAP_CLASS_SHIFT); } @@ -1594,7 +1501,7 @@ arena_salloc_demote(const void *ptr) CHUNK_MAP_CLASS_MASK) != 0) { size_t binind = ((mapbits & CHUNK_MAP_CLASS_MASK) >> CHUNK_MAP_CLASS_SHIFT) - 1; - assert(binind < nbins); + assert(binind < NBINS); ret = arena_bin_info[binind].reg_size; } assert(ret != 0); @@ -1762,7 +1669,7 @@ arena_stats_merge(arena_t *arena, size_t *nactive, size_t *ndirty, } malloc_mutex_unlock(&arena->lock); - for (i = 0; i < nbins; i++) { + for (i = 0; i < NBINS; i++) { arena_bin_t *bin = &arena->bins[i]; malloc_mutex_lock(&bin->lock); @@ -1963,10 +1870,10 @@ arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, * Avoid moving the allocation if the size class can be left the same. */ if (oldsize <= arena_maxclass) { - if (oldsize <= small_maxclass) { + if (oldsize <= SMALL_MAXCLASS) { assert(arena_bin_info[SMALL_SIZE2BIN(oldsize)].reg_size == oldsize); - if ((size + extra <= small_maxclass && + if ((size + extra <= SMALL_MAXCLASS && SMALL_SIZE2BIN(size + extra) == SMALL_SIZE2BIN(oldsize)) || (size <= oldsize && size + extra >= oldsize)) { @@ -1978,7 +1885,7 @@ arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, } } else { assert(size <= arena_maxclass); - if (size + extra > small_maxclass) { + if (size + extra > SMALL_MAXCLASS) { if (arena_ralloc_large(ptr, oldsize, size, extra, zero) == false) return (ptr); @@ -2083,7 +1990,7 @@ arena_new(arena_t *arena, unsigned ind) arena_avail_tree_new(&arena->runs_avail_dirty); /* Initialize bins. */ - for (i = 0; i < nbins; i++) { + for (i = 0; i < NBINS; i++) { bin = &arena->bins[i]; if (malloc_mutex_init(&bin->lock)) return (true); @@ -2096,119 +2003,6 @@ arena_new(arena_t *arena, unsigned ind) return (false); } -static void -small_size2bin_validate(void) -{ - size_t i, size, binind; - - i = 1; - /* Tiny. */ - for (; i < TINY_MIN; i++) { - size = TINY_MIN; - binind = ffs((int)(size >> (LG_TINY_MIN + 1))); - assert(SMALL_SIZE2BIN(i) == binind); - } - for (; i < qspace_min; i++) { - size = pow2_ceil(i); - binind = ffs((int)(size >> (LG_TINY_MIN + 1))); - assert(SMALL_SIZE2BIN(i) == binind); - } - /* Quantum-spaced. */ - for (; i <= qspace_max; i++) { - size = QUANTUM_CEILING(i); - binind = ntbins + (size >> LG_QUANTUM) - 1; - assert(SMALL_SIZE2BIN(i) == binind); - } - /* Cacheline-spaced. */ - for (; i <= cspace_max; i++) { - size = CACHELINE_CEILING(i); - binind = ntbins + nqbins + ((size - cspace_min) >> - LG_CACHELINE); - assert(SMALL_SIZE2BIN(i) == binind); - } - /* Sub-page. */ - for (; i <= sspace_max; i++) { - size = SUBPAGE_CEILING(i); - binind = ntbins + nqbins + ncbins + ((size - sspace_min) - >> LG_SUBPAGE); - assert(SMALL_SIZE2BIN(i) == binind); - } -} - -static bool -small_size2bin_init(void) -{ - - if (opt_lg_qspace_max != LG_QSPACE_MAX_DEFAULT - || opt_lg_cspace_max != LG_CSPACE_MAX_DEFAULT - || (sizeof(const_small_size2bin) != ((small_maxclass-1) >> - LG_TINY_MIN) + 1)) - return (small_size2bin_init_hard()); - - small_size2bin = const_small_size2bin; - if (config_debug) - small_size2bin_validate(); - return (false); -} - -static bool -small_size2bin_init_hard(void) -{ - size_t i, size, binind; - uint8_t *custom_small_size2bin; -#define CUSTOM_SMALL_SIZE2BIN(s) \ - custom_small_size2bin[(s-1) >> LG_TINY_MIN] - - assert(opt_lg_qspace_max != LG_QSPACE_MAX_DEFAULT - || opt_lg_cspace_max != LG_CSPACE_MAX_DEFAULT - || (sizeof(const_small_size2bin) != ((small_maxclass-1) >> - LG_TINY_MIN) + 1)); - - custom_small_size2bin = (uint8_t *) - base_alloc(small_maxclass >> LG_TINY_MIN); - if (custom_small_size2bin == NULL) - return (true); - - i = 1; - /* Tiny. */ - for (; i < TINY_MIN; i += TINY_MIN) { - size = TINY_MIN; - binind = ffs((int)(size >> (LG_TINY_MIN + 1))); - CUSTOM_SMALL_SIZE2BIN(i) = binind; - } - for (; i < qspace_min; i += TINY_MIN) { - size = pow2_ceil(i); - binind = ffs((int)(size >> (LG_TINY_MIN + 1))); - CUSTOM_SMALL_SIZE2BIN(i) = binind; - } - /* Quantum-spaced. */ - for (; i <= qspace_max; i += TINY_MIN) { - size = QUANTUM_CEILING(i); - binind = ntbins + (size >> LG_QUANTUM) - 1; - CUSTOM_SMALL_SIZE2BIN(i) = binind; - } - /* Cacheline-spaced. */ - for (; i <= cspace_max; i += TINY_MIN) { - size = CACHELINE_CEILING(i); - binind = ntbins + nqbins + ((size - cspace_min) >> - LG_CACHELINE); - CUSTOM_SMALL_SIZE2BIN(i) = binind; - } - /* Sub-page. */ - for (; i <= sspace_max; i += TINY_MIN) { - size = SUBPAGE_CEILING(i); - binind = ntbins + nqbins + ncbins + ((size - sspace_min) >> - LG_SUBPAGE); - CUSTOM_SMALL_SIZE2BIN(i) = binind; - } - - small_size2bin = custom_small_size2bin; - if (config_debug) - small_size2bin_validate(); - return (false); -#undef CUSTOM_SMALL_SIZE2BIN -} - /* * Calculate bin_info->run_size such that it meets the following constraints: * @@ -2330,104 +2124,27 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) return (good_run_size); } -static bool +static void bin_info_init(void) { arena_bin_info_t *bin_info; - unsigned i; - size_t prev_run_size; + size_t prev_run_size = PAGE_SIZE; - arena_bin_info = base_alloc(sizeof(arena_bin_info_t) * nbins); - if (arena_bin_info == NULL) - return (true); - - prev_run_size = PAGE_SIZE; - i = 0; - /* (2^n)-spaced tiny bins. */ - for (; i < ntbins; i++) { - bin_info = &arena_bin_info[i]; - bin_info->reg_size = (1U << (LG_TINY_MIN + i)); - prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size); - bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs); - } - /* Quantum-spaced bins. */ - for (; i < ntbins + nqbins; i++) { - bin_info = &arena_bin_info[i]; - bin_info->reg_size = (i - ntbins + 1) << LG_QUANTUM; - prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size); - bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs); - } - /* Cacheline-spaced bins. */ - for (; i < ntbins + nqbins + ncbins; i++) { - bin_info = &arena_bin_info[i]; - bin_info->reg_size = cspace_min + ((i - (ntbins + nqbins)) << - LG_CACHELINE); - prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size); - bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs); - } - /* Subpage-spaced bins. */ - for (; i < nbins; i++) { - bin_info = &arena_bin_info[i]; - bin_info->reg_size = sspace_min + ((i - (ntbins + nqbins + - ncbins)) << LG_SUBPAGE); - prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size); - bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs); - } - - return (false); +#define SIZE_CLASS(bin, delta, size) \ + bin_info = &arena_bin_info[bin]; \ + bin_info->reg_size = size; \ + prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size);\ + bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs); + SIZE_CLASSES +#undef SIZE_CLASS } -bool +void arena_boot(void) { size_t header_size; unsigned i; - /* Set variables according to the value of opt_lg_[qc]space_max. */ - qspace_max = (1U << opt_lg_qspace_max); - cspace_min = CACHELINE_CEILING(qspace_max); - if (cspace_min == qspace_max) - cspace_min += CACHELINE; - cspace_max = (1U << opt_lg_cspace_max); - sspace_min = SUBPAGE_CEILING(cspace_max); - if (sspace_min == cspace_max) - sspace_min += SUBPAGE; - assert(sspace_min < PAGE_SIZE); - sspace_max = PAGE_SIZE - SUBPAGE; - - assert(LG_QUANTUM >= LG_TINY_MIN); - assert(ntbins <= LG_QUANTUM); - nqbins = qspace_max >> LG_QUANTUM; - ncbins = ((cspace_max - cspace_min) >> LG_CACHELINE) + 1; - nsbins = ((sspace_max - sspace_min) >> LG_SUBPAGE) + 1; - nbins = ntbins + nqbins + ncbins + nsbins; - - /* - * The small_size2bin lookup table uses uint8_t to encode each bin - * index, so we cannot support more than 256 small size classes. This - * limit is difficult to exceed (not even possible with 16B quantum and - * 4KiB pages), and such configurations are impractical, but - * nonetheless we need to protect against this case in order to avoid - * undefined behavior. - * - * Further constrain nbins to 255 if prof_promote is true, since all - * small size classes, plus a "not small" size class must be stored in - * 8 bits of arena_chunk_map_t's bits field. - */ - if (config_prof && opt_prof && prof_promote && nbins > 255) { - char line_buf[UMAX2S_BUFSIZE]; - malloc_write(": Too many small size classes ("); - malloc_write(u2s(nbins, 10, line_buf)); - malloc_write(" > max 255)\n"); - abort(); - } else if (nbins > 256) { - char line_buf[UMAX2S_BUFSIZE]; - malloc_write(": Too many small size classes ("); - malloc_write(u2s(nbins, 10, line_buf)); - malloc_write(" > max 256)\n"); - abort(); - } - /* * Compute the header size such that it is large enough to contain the * page map. The page map is biased to omit entries for the header @@ -2451,11 +2168,5 @@ arena_boot(void) arena_maxclass = chunksize - (map_bias << PAGE_SHIFT); - if (small_size2bin_init()) - return (true); - - if (bin_info_init()) - return (true); - - return (false); + bin_info_init(); } diff --git a/src/ctl.c b/src/ctl.c index 4938e10a..0beeb3d0 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -47,7 +47,6 @@ CTL_PROTO(thread_deallocated) CTL_PROTO(thread_deallocatedp) CTL_PROTO(config_debug) CTL_PROTO(config_dss) -CTL_PROTO(config_dynamic_page_shift) CTL_PROTO(config_fill) CTL_PROTO(config_lazy_lock) CTL_PROTO(config_prof) @@ -59,8 +58,6 @@ CTL_PROTO(config_tcache) CTL_PROTO(config_tls) CTL_PROTO(config_xmalloc) CTL_PROTO(opt_abort) -CTL_PROTO(opt_lg_qspace_max) -CTL_PROTO(opt_lg_cspace_max) CTL_PROTO(opt_lg_chunk) CTL_PROTO(opt_narenas) CTL_PROTO(opt_lg_dirty_mult) @@ -88,23 +85,9 @@ INDEX_PROTO(arenas_lrun_i) CTL_PROTO(arenas_narenas) CTL_PROTO(arenas_initialized) CTL_PROTO(arenas_quantum) -CTL_PROTO(arenas_cacheline) -CTL_PROTO(arenas_subpage) CTL_PROTO(arenas_pagesize) CTL_PROTO(arenas_chunksize) -CTL_PROTO(arenas_tspace_min) -CTL_PROTO(arenas_tspace_max) -CTL_PROTO(arenas_qspace_min) -CTL_PROTO(arenas_qspace_max) -CTL_PROTO(arenas_cspace_min) -CTL_PROTO(arenas_cspace_max) -CTL_PROTO(arenas_sspace_min) -CTL_PROTO(arenas_sspace_max) CTL_PROTO(arenas_tcache_max) -CTL_PROTO(arenas_ntbins) -CTL_PROTO(arenas_nqbins) -CTL_PROTO(arenas_ncbins) -CTL_PROTO(arenas_nsbins) CTL_PROTO(arenas_nbins) CTL_PROTO(arenas_nhbins) CTL_PROTO(arenas_nlruns) @@ -185,7 +168,6 @@ static const ctl_node_t thread_node[] = { static const ctl_node_t config_node[] = { {NAME("debug"), CTL(config_debug)}, {NAME("dss"), CTL(config_dss)}, - {NAME("dynamic_page_shift"), CTL(config_dynamic_page_shift)}, {NAME("fill"), CTL(config_fill)}, {NAME("lazy_lock"), CTL(config_lazy_lock)}, {NAME("prof"), CTL(config_prof)}, @@ -200,8 +182,6 @@ static const ctl_node_t config_node[] = { static const ctl_node_t opt_node[] = { {NAME("abort"), CTL(opt_abort)}, - {NAME("lg_qspace_max"), CTL(opt_lg_qspace_max)}, - {NAME("lg_cspace_max"), CTL(opt_lg_cspace_max)}, {NAME("lg_chunk"), CTL(opt_lg_chunk)}, {NAME("narenas"), CTL(opt_narenas)}, {NAME("lg_dirty_mult"), CTL(opt_lg_dirty_mult)}, @@ -250,23 +230,9 @@ static const ctl_node_t arenas_node[] = { {NAME("narenas"), CTL(arenas_narenas)}, {NAME("initialized"), CTL(arenas_initialized)}, {NAME("quantum"), CTL(arenas_quantum)}, - {NAME("cacheline"), CTL(arenas_cacheline)}, - {NAME("subpage"), CTL(arenas_subpage)}, {NAME("pagesize"), CTL(arenas_pagesize)}, {NAME("chunksize"), CTL(arenas_chunksize)}, - {NAME("tspace_min"), CTL(arenas_tspace_min)}, - {NAME("tspace_max"), CTL(arenas_tspace_max)}, - {NAME("qspace_min"), CTL(arenas_qspace_min)}, - {NAME("qspace_max"), CTL(arenas_qspace_max)}, - {NAME("cspace_min"), CTL(arenas_cspace_min)}, - {NAME("cspace_max"), CTL(arenas_cspace_max)}, - {NAME("sspace_min"), CTL(arenas_sspace_min)}, - {NAME("sspace_max"), CTL(arenas_sspace_max)}, {NAME("tcache_max"), CTL(arenas_tcache_max)}, - {NAME("ntbins"), CTL(arenas_ntbins)}, - {NAME("nqbins"), CTL(arenas_nqbins)}, - {NAME("ncbins"), CTL(arenas_ncbins)}, - {NAME("nsbins"), CTL(arenas_nsbins)}, {NAME("nbins"), CTL(arenas_nbins)}, {NAME("nhbins"), CTL(arenas_nhbins)}, {NAME("bin"), CHILD(arenas_bin)}, @@ -397,12 +363,6 @@ static bool ctl_arena_init(ctl_arena_stats_t *astats) { - if (astats->bstats == NULL) { - astats->bstats = (malloc_bin_stats_t *)base_alloc(nbins * - sizeof(malloc_bin_stats_t)); - if (astats->bstats == NULL) - return (true); - } if (astats->lstats == NULL) { astats->lstats = (malloc_large_stats_t *)base_alloc(nlclasses * sizeof(malloc_large_stats_t)); @@ -425,7 +385,7 @@ ctl_arena_clear(ctl_arena_stats_t *astats) astats->nmalloc_small = 0; astats->ndalloc_small = 0; astats->nrequests_small = 0; - memset(astats->bstats, 0, nbins * sizeof(malloc_bin_stats_t)); + memset(astats->bstats, 0, NBINS * sizeof(malloc_bin_stats_t)); memset(astats->lstats, 0, nlclasses * sizeof(malloc_large_stats_t)); } @@ -439,7 +399,7 @@ ctl_arena_stats_amerge(ctl_arena_stats_t *cstats, arena_t *arena) arena_stats_merge(arena, &cstats->pactive, &cstats->pdirty, &cstats->astats, cstats->bstats, cstats->lstats); - for (i = 0; i < nbins; i++) { + for (i = 0; i < NBINS; i++) { cstats->allocated_small += cstats->bstats[i].allocated; cstats->nmalloc_small += cstats->bstats[i].nmalloc; cstats->ndalloc_small += cstats->bstats[i].ndalloc; @@ -477,7 +437,7 @@ ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, ctl_arena_stats_t *astats) sstats->lstats[i].curruns += astats->lstats[i].curruns; } - for (i = 0; i < nbins; i++) { + for (i = 0; i < NBINS; i++) { sstats->bstats[i].allocated += astats->bstats[i].allocated; sstats->bstats[i].nmalloc += astats->bstats[i].nmalloc; sstats->bstats[i].ndalloc += astats->bstats[i].ndalloc; @@ -1092,7 +1052,6 @@ CTL_RO_NL_CGEN(config_stats, thread_deallocatedp, DEALLOCATEDP_GET(), CTL_RO_BOOL_CONFIG_GEN(config_debug) CTL_RO_BOOL_CONFIG_GEN(config_dss) -CTL_RO_BOOL_CONFIG_GEN(config_dynamic_page_shift) CTL_RO_BOOL_CONFIG_GEN(config_fill) CTL_RO_BOOL_CONFIG_GEN(config_lazy_lock) CTL_RO_BOOL_CONFIG_GEN(config_prof) @@ -1107,8 +1066,6 @@ CTL_RO_BOOL_CONFIG_GEN(config_xmalloc) /******************************************************************************/ CTL_RO_NL_GEN(opt_abort, opt_abort, bool) -CTL_RO_NL_GEN(opt_lg_qspace_max, opt_lg_qspace_max, size_t) -CTL_RO_NL_GEN(opt_lg_cspace_max, opt_lg_cspace_max, size_t) CTL_RO_NL_GEN(opt_lg_chunk, opt_lg_chunk, size_t) CTL_RO_NL_GEN(opt_narenas, opt_narenas, size_t) CTL_RO_NL_GEN(opt_lg_dirty_mult, opt_lg_dirty_mult, ssize_t) @@ -1138,7 +1095,7 @@ const ctl_node_t * arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i) { - if (i > nbins) + if (i > NBINS) return (NULL); return (super_arenas_bin_i_node); } @@ -1182,24 +1139,10 @@ RETURN: } CTL_RO_NL_GEN(arenas_quantum, QUANTUM, size_t) -CTL_RO_NL_GEN(arenas_cacheline, CACHELINE, size_t) -CTL_RO_NL_GEN(arenas_subpage, SUBPAGE, size_t) CTL_RO_NL_GEN(arenas_pagesize, PAGE_SIZE, size_t) CTL_RO_NL_GEN(arenas_chunksize, chunksize, size_t) -CTL_RO_NL_GEN(arenas_tspace_min, TINY_MIN, size_t) -CTL_RO_NL_GEN(arenas_tspace_max, (qspace_min >> 1), size_t) -CTL_RO_NL_GEN(arenas_qspace_min, qspace_min, size_t) -CTL_RO_NL_GEN(arenas_qspace_max, qspace_max, size_t) -CTL_RO_NL_GEN(arenas_cspace_min, cspace_min, size_t) -CTL_RO_NL_GEN(arenas_cspace_max, cspace_max, size_t) -CTL_RO_NL_GEN(arenas_sspace_min, sspace_min, size_t) -CTL_RO_NL_GEN(arenas_sspace_max, sspace_max, size_t) CTL_RO_NL_CGEN(config_tcache, arenas_tcache_max, tcache_maxclass, size_t) -CTL_RO_NL_GEN(arenas_ntbins, ntbins, unsigned) -CTL_RO_NL_GEN(arenas_nqbins, nqbins, unsigned) -CTL_RO_NL_GEN(arenas_ncbins, ncbins, unsigned) -CTL_RO_NL_GEN(arenas_nsbins, nsbins, unsigned) -CTL_RO_NL_GEN(arenas_nbins, nbins, unsigned) +CTL_RO_NL_GEN(arenas_nbins, NBINS, unsigned) CTL_RO_NL_CGEN(config_tcache, arenas_nhbins, nhbins, unsigned) CTL_RO_NL_GEN(arenas_nlruns, nlclasses, size_t) @@ -1346,7 +1289,7 @@ const ctl_node_t * stats_arenas_i_bins_j_index(const size_t *mib, size_t miblen, size_t j) { - if (j > nbins) + if (j > NBINS) return (NULL); return (super_stats_arenas_i_bins_j_node); } diff --git a/src/jemalloc.c b/src/jemalloc.c index 81829fe7..08e5f31c 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -95,9 +95,7 @@ arenas_extend(unsigned ind) { arena_t *ret; - /* Allocate enough space for trailing bins. */ - ret = (arena_t *)base_alloc(offsetof(arena_t, bins) - + (sizeof(arena_bin_t) * nbins)); + ret = (arena_t *)base_alloc(sizeof(arena_t)); if (ret != NULL && arena_new(ret, ind) == false) { arenas[ind] = ret; return (ret); @@ -563,10 +561,6 @@ malloc_conf_init(void) } CONF_HANDLE_BOOL(abort) - CONF_HANDLE_SIZE_T(lg_qspace_max, LG_QUANTUM, - PAGE_SHIFT-1) - CONF_HANDLE_SIZE_T(lg_cspace_max, LG_QUANTUM, - PAGE_SHIFT-1) /* * Chunks always require at least one * header page, * plus one data page. @@ -613,14 +607,6 @@ malloc_conf_init(void) #undef CONF_HANDLE_SSIZE_T #undef CONF_HANDLE_CHAR_P } - - /* Validate configuration of options that are inter-related. */ - if (opt_lg_qspace_max+1 >= opt_lg_cspace_max) { - malloc_write(": Invalid lg_[qc]space_max " - "relationship; restoring defaults\n"); - opt_lg_qspace_max = LG_QSPACE_MAX_DEFAULT; - opt_lg_cspace_max = LG_CSPACE_MAX_DEFAULT; - } } } @@ -709,10 +695,7 @@ malloc_init_hard(void) if (config_prof) prof_boot1(); - if (arena_boot()) { - malloc_mutex_unlock(&init_lock); - return (true); - } + arena_boot(); if (config_tcache && tcache_boot()) { malloc_mutex_unlock(&init_lock); @@ -893,8 +876,8 @@ JEMALLOC_P(malloc)(size_t size) goto OOM; } if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <= - small_maxclass) { - ret = imalloc(small_maxclass+1); + SMALL_MAXCLASS) { + ret = imalloc(SMALL_MAXCLASS+1); if (ret != NULL) arena_prof_promoted(ret, usize); } else @@ -992,10 +975,10 @@ imemalign(void **memptr, size_t alignment, size_t size) ret = EINVAL; } else { if (prof_promote && (uintptr_t)cnt != - (uintptr_t)1U && usize <= small_maxclass) { - assert(sa2u(small_maxclass+1, + (uintptr_t)1U && usize <= SMALL_MAXCLASS) { + assert(sa2u(SMALL_MAXCLASS+1, alignment, NULL) != 0); - result = ipalloc(sa2u(small_maxclass+1, + result = ipalloc(sa2u(SMALL_MAXCLASS+1, alignment, NULL), alignment, false); if (result != NULL) { arena_prof_promoted(result, @@ -1091,8 +1074,8 @@ JEMALLOC_P(calloc)(size_t num, size_t size) goto RETURN; } if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize - <= small_maxclass) { - ret = icalloc(small_maxclass+1); + <= SMALL_MAXCLASS) { + ret = icalloc(SMALL_MAXCLASS+1); if (ret != NULL) arena_prof_promoted(ret, usize); } else @@ -1177,8 +1160,8 @@ JEMALLOC_P(realloc)(void *ptr, size_t size) goto OOM; } if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && - usize <= small_maxclass) { - ret = iralloc(ptr, small_maxclass+1, 0, 0, + usize <= SMALL_MAXCLASS) { + ret = iralloc(ptr, SMALL_MAXCLASS+1, 0, 0, false, false); if (ret != NULL) arena_prof_promoted(ret, usize); @@ -1220,8 +1203,8 @@ OOM: else { if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <= - small_maxclass) { - ret = imalloc(small_maxclass+1); + SMALL_MAXCLASS) { + ret = imalloc(SMALL_MAXCLASS+1); if (ret != NULL) { arena_prof_promoted(ret, usize); @@ -1436,9 +1419,9 @@ JEMALLOC_P(allocm)(void **ptr, size_t *rsize, size_t size, int flags) if (cnt == NULL) goto OOM; if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <= - small_maxclass) { + SMALL_MAXCLASS) { size_t usize_promoted = (alignment == 0) ? - s2u(small_maxclass+1) : sa2u(small_maxclass+1, + s2u(SMALL_MAXCLASS+1) : sa2u(SMALL_MAXCLASS+1, alignment, NULL); assert(usize_promoted != 0); p = iallocm(usize_promoted, alignment, zero); @@ -1517,9 +1500,9 @@ JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, size_t extra, */ if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && ((alignment == 0) ? s2u(size) : sa2u(size, - alignment, NULL)) <= small_maxclass) { - q = iralloc(p, small_maxclass+1, (small_maxclass+1 >= - size+extra) ? 0 : size+extra - (small_maxclass+1), + alignment, NULL)) <= SMALL_MAXCLASS) { + q = iralloc(p, SMALL_MAXCLASS+1, (SMALL_MAXCLASS+1 >= + size+extra) ? 0 : size+extra - (SMALL_MAXCLASS+1), alignment, zero, no_move); if (q == NULL) goto ERR; diff --git a/src/stats.c b/src/stats.c index 6d9ba9d0..e4500dfb 100644 --- a/src/stats.c +++ b/src/stats.c @@ -159,12 +159,12 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, CTL_GET("config.tcache", &config_tcache, bool); if (config_tcache) { malloc_cprintf(write_cb, cbopaque, - "bins: bin size regs pgs allocated nmalloc" + "bins: bin size regs pgs allocated nmalloc" " ndalloc nrequests nfills nflushes" " newruns reruns curruns\n"); } else { malloc_cprintf(write_cb, cbopaque, - "bins: bin size regs pgs allocated nmalloc" + "bins: bin size regs pgs allocated nmalloc" " ndalloc newruns reruns curruns\n"); } CTL_GET("arenas.nbins", &nbins, unsigned); @@ -176,7 +176,6 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, if (gap_start == UINT_MAX) gap_start = j; } else { - unsigned ntbins_, nqbins, ncbins, nsbins; size_t reg_size, run_size, allocated; uint32_t nregs; uint64_t nmalloc, ndalloc, nrequests, nfills, nflushes; @@ -196,10 +195,6 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, } gap_start = UINT_MAX; } - CTL_GET("arenas.ntbins", &ntbins_, unsigned); - CTL_GET("arenas.nqbins", &nqbins, unsigned); - CTL_GET("arenas.ncbins", &ncbins, unsigned); - CTL_GET("arenas.nsbins", &nsbins, unsigned); CTL_J_GET("arenas.bin.0.size", ®_size, size_t); CTL_J_GET("arenas.bin.0.nregs", &nregs, uint32_t); CTL_J_GET("arenas.bin.0.run_size", &run_size, size_t); @@ -223,27 +218,19 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, size_t); if (config_tcache) { malloc_cprintf(write_cb, cbopaque, - "%13u %1s %5zu %4u %3zu %12zu %12"PRIu64 + "%13u %5zu %4u %3zu %12zu %12"PRIu64 " %12"PRIu64" %12"PRIu64" %12"PRIu64 " %12"PRIu64" %12"PRIu64" %12"PRIu64 " %12zu\n", - j, - j < ntbins_ ? "T" : j < ntbins_ + nqbins ? - "Q" : j < ntbins_ + nqbins + ncbins ? "C" : - "S", - reg_size, nregs, run_size / pagesize, + j, reg_size, nregs, run_size / pagesize, allocated, nmalloc, ndalloc, nrequests, nfills, nflushes, nruns, reruns, curruns); } else { malloc_cprintf(write_cb, cbopaque, - "%13u %1s %5zu %4u %3zu %12zu %12"PRIu64 + "%13u %5zu %4u %3zu %12zu %12"PRIu64 " %12"PRIu64" %12"PRIu64" %12"PRIu64 " %12zu\n", - j, - j < ntbins_ ? "T" : j < ntbins_ + nqbins ? - "Q" : j < ntbins_ + nqbins + ncbins ? "C" : - "S", - reg_size, nregs, run_size / pagesize, + j, reg_size, nregs, run_size / pagesize, allocated, nmalloc, ndalloc, nruns, reruns, curruns); } @@ -496,8 +483,6 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, write_cb(cbopaque, "Run-time option settings:\n"); OPT_WRITE_BOOL(abort) - OPT_WRITE_SIZE_T(lg_qspace_max) - OPT_WRITE_SIZE_T(lg_cspace_max) OPT_WRITE_SIZE_T(lg_chunk) OPT_WRITE_SIZE_T(narenas) OPT_WRITE_SSIZE_T(lg_dirty_mult) @@ -541,51 +526,6 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, write_cb(cbopaque, u2s(sv, 10, s)); write_cb(cbopaque, "\n"); - CTL_GET("arenas.cacheline", &sv, size_t); - write_cb(cbopaque, "Cacheline size (assumed): "); - write_cb(cbopaque, u2s(sv, 10, s)); - write_cb(cbopaque, "\n"); - - CTL_GET("arenas.subpage", &sv, size_t); - write_cb(cbopaque, "Subpage spacing: "); - write_cb(cbopaque, u2s(sv, 10, s)); - write_cb(cbopaque, "\n"); - - if ((err = JEMALLOC_P(mallctl)("arenas.tspace_min", &sv, &ssz, - NULL, 0)) == 0) { - write_cb(cbopaque, "Tiny 2^n-spaced sizes: ["); - write_cb(cbopaque, u2s(sv, 10, s)); - write_cb(cbopaque, ".."); - - CTL_GET("arenas.tspace_max", &sv, size_t); - write_cb(cbopaque, u2s(sv, 10, s)); - write_cb(cbopaque, "]\n"); - } - - CTL_GET("arenas.qspace_min", &sv, size_t); - write_cb(cbopaque, "Quantum-spaced sizes: ["); - write_cb(cbopaque, u2s(sv, 10, s)); - write_cb(cbopaque, ".."); - CTL_GET("arenas.qspace_max", &sv, size_t); - write_cb(cbopaque, u2s(sv, 10, s)); - write_cb(cbopaque, "]\n"); - - CTL_GET("arenas.cspace_min", &sv, size_t); - write_cb(cbopaque, "Cacheline-spaced sizes: ["); - write_cb(cbopaque, u2s(sv, 10, s)); - write_cb(cbopaque, ".."); - CTL_GET("arenas.cspace_max", &sv, size_t); - write_cb(cbopaque, u2s(sv, 10, s)); - write_cb(cbopaque, "]\n"); - - CTL_GET("arenas.sspace_min", &sv, size_t); - write_cb(cbopaque, "Subpage-spaced sizes: ["); - write_cb(cbopaque, u2s(sv, 10, s)); - write_cb(cbopaque, ".."); - CTL_GET("arenas.sspace_max", &sv, size_t); - write_cb(cbopaque, u2s(sv, 10, s)); - write_cb(cbopaque, "]\n"); - CTL_GET("opt.lg_dirty_mult", &ssv, ssize_t); if (ssv >= 0) { write_cb(cbopaque, diff --git a/src/tcache.c b/src/tcache.c index 4f4ed6c6..fa05728e 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -55,7 +55,7 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem, unsigned i, nflush, ndeferred; bool merged_stats = false; - assert(binind < nbins); + assert(binind < NBINS); assert(rem <= tbin->ncached); for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) { @@ -152,7 +152,7 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem, merged_stats = true; arena->stats.nrequests_large += tbin->tstats.nrequests; - arena->stats.lstats[binind - nbins].nrequests += + arena->stats.lstats[binind - NBINS].nrequests += tbin->tstats.nrequests; tbin->tstats.nrequests = 0; } @@ -185,7 +185,7 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem, arena_t *arena = tcache->arena; malloc_mutex_lock(&arena->lock); arena->stats.nrequests_large += tbin->tstats.nrequests; - arena->stats.lstats[binind - nbins].nrequests += + arena->stats.lstats[binind - NBINS].nrequests += tbin->tstats.nrequests; tbin->tstats.nrequests = 0; malloc_mutex_unlock(&arena->lock); @@ -220,7 +220,7 @@ tcache_create(arena_t *arena) */ size = (size + CACHELINE_MASK) & (-CACHELINE); - if (size <= small_maxclass) + if (size <= SMALL_MAXCLASS) tcache = (tcache_t *)arena_malloc_small(arena, size, true); else if (size <= tcache_maxclass) tcache = (tcache_t *)arena_malloc_large(arena, size, true); @@ -266,7 +266,7 @@ tcache_destroy(tcache_t *tcache) tcache_stats_merge(tcache, tcache->arena); } - for (i = 0; i < nbins; i++) { + for (i = 0; i < NBINS; i++) { tcache_bin_t *tbin = &tcache->tbins[i]; tcache_bin_flush_small(tbin, i, 0, tcache); @@ -287,7 +287,7 @@ tcache_destroy(tcache_t *tcache) arena_t *arena = tcache->arena; malloc_mutex_lock(&arena->lock); arena->stats.nrequests_large += tbin->tstats.nrequests; - arena->stats.lstats[i - nbins].nrequests += + arena->stats.lstats[i - NBINS].nrequests += tbin->tstats.nrequests; malloc_mutex_unlock(&arena->lock); } @@ -300,7 +300,7 @@ tcache_destroy(tcache_t *tcache) } tcache_size = arena_salloc(tcache); - if (tcache_size <= small_maxclass) { + if (tcache_size <= SMALL_MAXCLASS) { arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache); arena_t *arena = chunk->arena; size_t pageind = ((uintptr_t)tcache - (uintptr_t)chunk) >> @@ -357,7 +357,7 @@ tcache_stats_merge(tcache_t *tcache, arena_t *arena) unsigned i; /* Merge and reset tcache stats. */ - for (i = 0; i < nbins; i++) { + for (i = 0; i < NBINS; i++) { arena_bin_t *bin = &arena->bins[i]; tcache_bin_t *tbin = &tcache->tbins[i]; malloc_mutex_lock(&bin->lock); @@ -367,7 +367,7 @@ tcache_stats_merge(tcache_t *tcache, arena_t *arena) } for (; i < nhbins; i++) { - malloc_large_stats_t *lstats = &arena->stats.lstats[i - nbins]; + malloc_large_stats_t *lstats = &arena->stats.lstats[i - NBINS]; tcache_bin_t *tbin = &tcache->tbins[i]; arena->stats.nrequests_large += tbin->tstats.nrequests; lstats->nrequests += tbin->tstats.nrequests; @@ -384,17 +384,18 @@ tcache_boot(void) /* * If necessary, clamp opt_lg_tcache_max, now that - * small_maxclass and arena_maxclass are known. + * SMALL_MAXCLASS and arena_maxclass are known. + * XXX Can this be done earlier? */ if (opt_lg_tcache_max < 0 || (1U << - opt_lg_tcache_max) < small_maxclass) - tcache_maxclass = small_maxclass; + opt_lg_tcache_max) < SMALL_MAXCLASS) + tcache_maxclass = SMALL_MAXCLASS; else if ((1U << opt_lg_tcache_max) > arena_maxclass) tcache_maxclass = arena_maxclass; else tcache_maxclass = (1U << opt_lg_tcache_max); - nhbins = nbins + (tcache_maxclass >> PAGE_SHIFT); + nhbins = NBINS + (tcache_maxclass >> PAGE_SHIFT); /* Initialize tcache_bin_info. */ tcache_bin_info = (tcache_bin_info_t *)base_alloc(nhbins * @@ -402,7 +403,7 @@ tcache_boot(void) if (tcache_bin_info == NULL) return (true); stack_nelms = 0; - for (i = 0; i < nbins; i++) { + for (i = 0; i < NBINS; i++) { if ((arena_bin_info[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MAX) { tcache_bin_info[i].ncached_max = @@ -421,7 +422,7 @@ tcache_boot(void) /* Compute incremental GC event threshold. */ if (opt_lg_tcache_gc_sweep >= 0) { tcache_gc_incr = ((1U << opt_lg_tcache_gc_sweep) / - nbins) + (((1U << opt_lg_tcache_gc_sweep) % nbins == + NBINS) + (((1U << opt_lg_tcache_gc_sweep) % NBINS == 0) ? 0 : 1); } else tcache_gc_incr = 0; From f081b88dfbce94c3c7c8faf0b0f91b117fbdfcc6 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 28 Feb 2012 20:24:05 -0800 Subject: [PATCH 0054/3378] Fix realloc(p, 0) to act like free(p). Reported by Yoni Londer. --- src/jemalloc.c | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/src/jemalloc.c b/src/jemalloc.c index 08e5f31c..865c6236 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1124,23 +1124,28 @@ JEMALLOC_P(realloc)(void *ptr, size_t size) ; if (size == 0) { - if (config_sysv == false || opt_sysv == false) - size = 1; - else { - if (ptr != NULL) { - if (config_prof || config_stats) - old_size = isalloc(ptr); - if (config_prof && opt_prof) { - old_ctx = prof_ctx_get(ptr); - cnt = NULL; - } - idalloc(ptr); - } else if (config_prof && opt_prof) { - old_ctx = NULL; + if (ptr != NULL) { + /* realloc(ptr, 0) is equivalent to free(p). */ + if (config_prof || config_stats) + old_size = isalloc(ptr); + if (config_prof && opt_prof) { + old_ctx = prof_ctx_get(ptr); cnt = NULL; } + idalloc(ptr); ret = NULL; goto RETURN; + } else { + if (config_sysv == false || opt_sysv == false) + size = 1; + else { + if (config_prof && opt_prof) { + old_ctx = NULL; + cnt = NULL; + } + ret = NULL; + goto RETURN; + } } } @@ -1188,6 +1193,7 @@ OOM: errno = ENOMEM; } } else { + /* realloc(NULL, size) is equivalent to malloc(size). */ if (config_prof && opt_prof) old_ctx = NULL; if (malloc_init()) { From c90ad71237c05473bcb968beddebb0a487c36e75 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 28 Feb 2012 20:31:37 -0800 Subject: [PATCH 0055/3378] Remove the sysv option. --- INSTALL | 5 -- configure.ac | 17 ------ doc/jemalloc.xml.in | 26 --------- .../jemalloc/internal/jemalloc_internal.h.in | 8 --- include/jemalloc/jemalloc_defs.h.in | 3 -- src/ctl.c | 6 --- src/jemalloc.c | 54 +++---------------- src/stats.c | 1 - 8 files changed, 7 insertions(+), 113 deletions(-) diff --git a/INSTALL b/INSTALL index 0ddcacb7..f9c8a369 100644 --- a/INSTALL +++ b/INSTALL @@ -108,11 +108,6 @@ any of the following arguments (not a definitive list) to 'configure': errors, as is commonly implemented by "xmalloc" wrapper function for malloc. See the "opt.xmalloc" option documentation for usage details. ---enable-sysv - Enable support for System V semantics, wherein malloc(0) returns NULL - rather than a minimal allocation. See the "opt.sysv" option documentation - for usage details. - --enable-lazy-lock Enable code that wraps pthread_create() to detect when an application switches from single-threaded to multi-threaded mode, so that it can avoid diff --git a/configure.ac b/configure.ac index 91caef46..3fa24919 100644 --- a/configure.ac +++ b/configure.ac @@ -626,22 +626,6 @@ if test "x$enable_xmalloc" = "x1" ; then fi AC_SUBST([enable_xmalloc]) -dnl Do not support the SYSV option by default. -AC_ARG_ENABLE([sysv], - [AS_HELP_STRING([--enable-sysv], [Support SYSV semantics option])], -[if test "x$enable_sysv" = "xno" ; then - enable_sysv="0" -else - enable_sysv="1" -fi -], -[enable_sysv="0"] -) -if test "x$enable_sysv" = "x1" ; then - AC_DEFINE([JEMALLOC_SYSV], [ ]) -fi -AC_SUBST([enable_sysv]) - AC_MSG_CHECKING([STATIC_PAGE_SHIFT]) AC_RUN_IFELSE([AC_LANG_PROGRAM( [[#include @@ -911,7 +895,6 @@ AC_MSG_RESULT([prof-gcc : ${enable_prof_gcc}]) AC_MSG_RESULT([tcache : ${enable_tcache}]) AC_MSG_RESULT([fill : ${enable_fill}]) AC_MSG_RESULT([xmalloc : ${enable_xmalloc}]) -AC_MSG_RESULT([sysv : ${enable_sysv}]) AC_MSG_RESULT([dss : ${enable_dss}]) AC_MSG_RESULT([dynamic_page_shift : ${enable_dynamic_page_shift}]) AC_MSG_RESULT([lazy_lock : ${enable_lazy_lock}]) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index cfe120fb..e7cc6284 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -662,16 +662,6 @@ for (i = 0; i < nbins; i++) { build configuration. - - - config.sysv - (bool) - r- - - was specified during - build configuration. - - config.tcache @@ -808,22 +798,6 @@ for (i = 0; i < nbins; i++) { - - - opt.sysv - (bool) - r- - [] - - If enabled, attempting to allocate zero bytes will - return a NULL pointer instead of a valid pointer. - (The default behavior is to make a minimal allocation and return a - pointer to it.) This option is provided for System V compatibility. - This option is incompatible with the opt.xmalloc option. - This option is disabled by default. - - opt.xmalloc diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index f43fcd20..35c05f4e 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -104,13 +104,6 @@ static const bool config_stats = false #endif ; -static const bool config_sysv = -#ifdef JEMALLOC_SYSV - true -#else - false -#endif - ; static const bool config_tcache = #ifdef JEMALLOC_TCACHE true @@ -385,7 +378,6 @@ typedef struct { extern bool opt_abort; extern bool opt_junk; -extern bool opt_sysv; extern bool opt_xmalloc; extern bool opt_zero; extern size_t opt_narenas; diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index 53e85208..1360364a 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -98,9 +98,6 @@ /* Support optional abort() on OOM. */ #undef JEMALLOC_XMALLOC -/* Support SYSV semantics. */ -#undef JEMALLOC_SYSV - /* Support lazy locking (avoid locking unless a second thread is launched). */ #undef JEMALLOC_LAZY_LOCK diff --git a/src/ctl.c b/src/ctl.c index 0beeb3d0..0fabd852 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -53,7 +53,6 @@ CTL_PROTO(config_prof) CTL_PROTO(config_prof_libgcc) CTL_PROTO(config_prof_libunwind) CTL_PROTO(config_stats) -CTL_PROTO(config_sysv) CTL_PROTO(config_tcache) CTL_PROTO(config_tls) CTL_PROTO(config_xmalloc) @@ -64,7 +63,6 @@ CTL_PROTO(opt_lg_dirty_mult) CTL_PROTO(opt_stats_print) CTL_PROTO(opt_junk) CTL_PROTO(opt_zero) -CTL_PROTO(opt_sysv) CTL_PROTO(opt_xmalloc) CTL_PROTO(opt_tcache) CTL_PROTO(opt_lg_tcache_gc_sweep) @@ -174,7 +172,6 @@ static const ctl_node_t config_node[] = { {NAME("prof_libgcc"), CTL(config_prof_libgcc)}, {NAME("prof_libunwind"), CTL(config_prof_libunwind)}, {NAME("stats"), CTL(config_stats)}, - {NAME("sysv"), CTL(config_sysv)}, {NAME("tcache"), CTL(config_tcache)}, {NAME("tls"), CTL(config_tls)}, {NAME("xmalloc"), CTL(config_xmalloc)} @@ -188,7 +185,6 @@ static const ctl_node_t opt_node[] = { {NAME("stats_print"), CTL(opt_stats_print)}, {NAME("junk"), CTL(opt_junk)}, {NAME("zero"), CTL(opt_zero)}, - {NAME("sysv"), CTL(opt_sysv)}, {NAME("xmalloc"), CTL(opt_xmalloc)}, {NAME("tcache"), CTL(opt_tcache)}, {NAME("lg_tcache_gc_sweep"), CTL(opt_lg_tcache_gc_sweep)}, @@ -1058,7 +1054,6 @@ CTL_RO_BOOL_CONFIG_GEN(config_prof) CTL_RO_BOOL_CONFIG_GEN(config_prof_libgcc) CTL_RO_BOOL_CONFIG_GEN(config_prof_libunwind) CTL_RO_BOOL_CONFIG_GEN(config_stats) -CTL_RO_BOOL_CONFIG_GEN(config_sysv) CTL_RO_BOOL_CONFIG_GEN(config_tcache) CTL_RO_BOOL_CONFIG_GEN(config_tls) CTL_RO_BOOL_CONFIG_GEN(config_xmalloc) @@ -1072,7 +1067,6 @@ CTL_RO_NL_GEN(opt_lg_dirty_mult, opt_lg_dirty_mult, ssize_t) CTL_RO_NL_GEN(opt_stats_print, opt_stats_print, bool) CTL_RO_NL_CGEN(config_fill, opt_junk, opt_junk, bool) CTL_RO_NL_CGEN(config_fill, opt_zero, opt_zero, bool) -CTL_RO_NL_CGEN(config_sysv, opt_sysv, opt_sysv, bool) CTL_RO_NL_CGEN(config_xmalloc, opt_xmalloc, opt_xmalloc, bool) CTL_RO_NL_CGEN(config_tcache, opt_tcache, opt_tcache, bool) CTL_RO_NL_CGEN(config_tcache, opt_lg_tcache_gc_sweep, opt_lg_tcache_gc_sweep, diff --git a/src/jemalloc.c b/src/jemalloc.c index 865c6236..cc0188c1 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -48,7 +48,6 @@ bool opt_junk = false; bool opt_abort = false; bool opt_junk = false; #endif -bool opt_sysv = false; bool opt_xmalloc = false; bool opt_zero = false; size_t opt_narenas = 0; @@ -575,9 +574,6 @@ malloc_conf_init(void) CONF_HANDLE_BOOL(junk) CONF_HANDLE_BOOL(zero) } - if (config_sysv) { - CONF_HANDLE_BOOL(sysv) - } if (config_xmalloc) { CONF_HANDLE_BOOL(xmalloc) } @@ -854,19 +850,8 @@ JEMALLOC_P(malloc)(size_t size) goto OOM; } - if (size == 0) { - if (config_sysv == false || opt_sysv == false) - size = 1; - else { - if (config_xmalloc && opt_xmalloc) { - malloc_write(": Error in malloc(): " - "invalid size 0\n"); - abort(); - } - ret = NULL; - goto RETURN; - } - } + if (size == 0) + size = 1; if (config_prof && opt_prof) { usize = s2u(size); @@ -931,22 +916,8 @@ imemalign(void **memptr, size_t alignment, size_t size) if (malloc_init()) result = NULL; else { - if (size == 0) { - if (config_sysv == false || opt_sysv == false) - size = 1; - else { - if (config_xmalloc && opt_xmalloc) { - malloc_write(": Error in " - "posix_memalign(): invalid size " - "0\n"); - abort(); - } - result = NULL; - *memptr = NULL; - ret = 0; - goto RETURN; - } - } + if (size == 0) + size = 1; /* Make sure that alignment is a large enough power of 2. */ if (((alignment - 1) & alignment) != 0 @@ -1047,8 +1018,7 @@ JEMALLOC_P(calloc)(size_t num, size_t size) num_size = num * size; if (num_size == 0) { - if ((config_sysv == false || opt_sysv == false) - && ((num == 0) || (size == 0))) + if (num == 0 || size == 0) num_size = 1; else { ret = NULL; @@ -1135,18 +1105,8 @@ JEMALLOC_P(realloc)(void *ptr, size_t size) idalloc(ptr); ret = NULL; goto RETURN; - } else { - if (config_sysv == false || opt_sysv == false) - size = 1; - else { - if (config_prof && opt_prof) { - old_ctx = NULL; - cnt = NULL; - } - ret = NULL; - goto RETURN; - } - } + } else + size = 1; } if (ptr != NULL) { diff --git a/src/stats.c b/src/stats.c index e4500dfb..f6851a01 100644 --- a/src/stats.c +++ b/src/stats.c @@ -489,7 +489,6 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, OPT_WRITE_BOOL(stats_print) OPT_WRITE_BOOL(junk) OPT_WRITE_BOOL(zero) - OPT_WRITE_BOOL(sysv) OPT_WRITE_BOOL(xmalloc) OPT_WRITE_BOOL(tcache) OPT_WRITE_SSIZE_T(lg_tcache_gc_sweep) From d073a321091800e71ea56f98701253dc0969d879 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 28 Feb 2012 20:41:16 -0800 Subject: [PATCH 0056/3378] Enable the stats configuration option by default. --- INSTALL | 4 ++-- configure.ac | 5 ++--- src/jemalloc.c | 2 -- 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/INSTALL b/INSTALL index f9c8a369..b84fb1bf 100644 --- a/INSTALL +++ b/INSTALL @@ -62,8 +62,8 @@ any of the following arguments (not a definitive list) to 'configure': Enable assertions and validation code. This incurs a substantial performance hit, but is very useful during application development. ---enable-stats - Enable statistics gathering functionality. See the "opt.stats_print" +--disable-stats + Disable statistics gathering functionality. See the "opt.stats_print" option documentation for usage details. --enable-prof diff --git a/configure.ac b/configure.ac index 3fa24919..78f7c8e5 100644 --- a/configure.ac +++ b/configure.ac @@ -423,7 +423,7 @@ if test "x$enable_debug" = "x0" -a "x$no_CFLAGS" = "xyes" ; then fi fi -dnl Do not enable statistics calculation by default. +dnl Enable statistics calculation by default. AC_ARG_ENABLE([stats], [AS_HELP_STRING([--enable-stats], [Enable statistics calculation/reporting])], [if test "x$enable_stats" = "xno" ; then @@ -432,7 +432,7 @@ else enable_stats="1" fi ], -[enable_stats="0"] +[enable_stats="1"] ) if test "x$enable_stats" = "x1" ; then AC_DEFINE([JEMALLOC_STATS], [ ]) @@ -896,7 +896,6 @@ AC_MSG_RESULT([tcache : ${enable_tcache}]) AC_MSG_RESULT([fill : ${enable_fill}]) AC_MSG_RESULT([xmalloc : ${enable_xmalloc}]) AC_MSG_RESULT([dss : ${enable_dss}]) -AC_MSG_RESULT([dynamic_page_shift : ${enable_dynamic_page_shift}]) AC_MSG_RESULT([lazy_lock : ${enable_lazy_lock}]) AC_MSG_RESULT([tls : ${enable_tls}]) AC_MSG_RESULT([===============================================================================]) diff --git a/src/jemalloc.c b/src/jemalloc.c index cc0188c1..a3a9a70a 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -882,8 +882,6 @@ OOM: } errno = ENOMEM; } - -RETURN: if (config_prof && opt_prof && ret != NULL) prof_malloc(ret, usize, cnt); if (config_stats && ret != NULL) { From 777c191485452251fbecfe6638a4a54c651e25b3 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 28 Feb 2012 20:49:22 -0800 Subject: [PATCH 0057/3378] Enable support for junk/zero filling by default. --- INSTALL | 4 ++-- configure.ac | 9 +++++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/INSTALL b/INSTALL index b84fb1bf..4d3e1af9 100644 --- a/INSTALL +++ b/INSTALL @@ -99,8 +99,8 @@ any of the following arguments (not a definitive list) to 'configure': Enable support for page allocation/deallocation via sbrk(2), in addition to mmap(2). ---enable-fill - Enable support for junk/zero filling of memory. See the "opt.junk"/ +--disable-fill + Disable support for junk/zero filling of memory. See the "opt.junk"/ "opt.zero" option documentation for usage details. --enable-xmalloc diff --git a/configure.ac b/configure.ac index 78f7c8e5..b503f646 100644 --- a/configure.ac +++ b/configure.ac @@ -425,7 +425,8 @@ fi dnl Enable statistics calculation by default. AC_ARG_ENABLE([stats], - [AS_HELP_STRING([--enable-stats], [Enable statistics calculation/reporting])], + [AS_HELP_STRING([--disable-stats], + [Disable statistics calculation/reporting])], [if test "x$enable_stats" = "xno" ; then enable_stats="0" else @@ -594,16 +595,16 @@ if test "x$enable_dss" = "x1" ; then fi AC_SUBST([enable_dss]) -dnl Do not support the junk/zero filling option by default. +dnl Support the junk/zero filling option by default. AC_ARG_ENABLE([fill], - [AS_HELP_STRING([--enable-fill], [Support junk/zero filling option])], + [AS_HELP_STRING([--disable-fill], [Disable support for junk/zero filling])], [if test "x$enable_fill" = "xno" ; then enable_fill="0" else enable_fill="1" fi ], -[enable_fill="0"] +[enable_fill="1"] ) if test "x$enable_fill" = "x1" ; then AC_DEFINE([JEMALLOC_FILL], [ ]) From 3add8d8cda2993f58fd2eba6efbf4fa12d5c72f3 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 28 Feb 2012 21:08:19 -0800 Subject: [PATCH 0058/3378] Remove unused variables in tcache_dalloc_large(). Submitted by Mike Hommey. --- include/jemalloc/internal/tcache.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index b964a12e..77bca8d9 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -360,9 +360,7 @@ tcache_dalloc_small(tcache_t *tcache, void *ptr) JEMALLOC_INLINE void tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size) { - arena_t *arena; - arena_chunk_t *chunk; - size_t pageind, binind; + size_t binind; tcache_bin_t *tbin; tcache_bin_info_t *tbin_info; @@ -370,9 +368,6 @@ tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size) assert(arena_salloc(ptr) > SMALL_MAXCLASS); assert(arena_salloc(ptr) <= tcache_maxclass); - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - arena = chunk->arena; - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; binind = NBINS + (size >> PAGE_SHIFT) - 1; if (config_fill && opt_junk) From bdcadf41e961a3c6bbb37d8d24e4b68a27f2b952 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 28 Feb 2012 21:11:03 -0800 Subject: [PATCH 0059/3378] Remove unused variable in arena_run_split(). Submitted by Mike Hommey. --- src/arena.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/arena.c b/src/arena.c index 72b7f449..c14cb2c2 100644 --- a/src/arena.c +++ b/src/arena.c @@ -188,12 +188,11 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, bool zero) { arena_chunk_t *chunk; - size_t old_ndirty, run_ind, total_pages, need_pages, rem_pages, i; + size_t run_ind, total_pages, need_pages, rem_pages, i; size_t flag_dirty; arena_avail_tree_t *runs_avail; chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); - old_ndirty = chunk->ndirty; run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT); flag_dirty = chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY; From 93c023d181269fd47ca3f8e773509bb5bd779684 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 28 Feb 2012 21:13:12 -0800 Subject: [PATCH 0060/3378] Remove unused variables in stats_print(). Submitted by Mike Hommey. --- src/stats.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/stats.c b/src/stats.c index f6851a01..2f61e7bc 100644 --- a/src/stats.c +++ b/src/stats.c @@ -582,7 +582,6 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, } if (config_stats) { - size_t sszp, ssz; size_t *cactive; size_t allocated, active, mapped; size_t chunks_current, chunks_high; @@ -590,9 +589,6 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, size_t huge_allocated; uint64_t huge_nmalloc, huge_ndalloc; - sszp = sizeof(size_t *); - ssz = sizeof(size_t); - CTL_GET("stats.cactive", &cactive, size_t *); CTL_GET("stats.allocated", &allocated, size_t); CTL_GET("stats.active", &active, size_t); From 5965631636c620fba2eb33698accee75fd207aab Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 28 Feb 2012 21:37:38 -0800 Subject: [PATCH 0061/3378] Do not enforce minimum alignment in memalign(). Do not enforce minimum alignment in memalign(). This is a non-standard function, and there is disagreement over whether to enforce minimum alignment. Solaris documentation (whence memalign() originated) says that minimum alignment is required: The value of alignment must be a power of two and must be greater than or equal to the size of a word. However, Linux's manual page says in its NOTES section: memalign() may not check that the boundary parameter is correct. This is descriptive rather than prescriptive, but applications with bad assumptions about memalign() exist, so be as forgiving as possible. Reported by Mike Hommey. --- src/jemalloc.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/jemalloc.c b/src/jemalloc.c index a3a9a70a..535efaa1 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -68,7 +68,8 @@ static void malloc_conf_error(const char *msg, const char *k, size_t klen, const char *v, size_t vlen); static void malloc_conf_init(void); static bool malloc_init_hard(void); -static int imemalign(void **memptr, size_t alignment, size_t size); +static int imemalign(void **memptr, size_t alignment, size_t size, + bool enforce_min_alignment); /******************************************************************************/ /* malloc_message() setup. */ @@ -900,7 +901,8 @@ JEMALLOC_ATTR(nonnull(1)) JEMALLOC_ATTR(noinline) #endif static int -imemalign(void **memptr, size_t alignment, size_t size) +imemalign(void **memptr, size_t alignment, size_t size, + bool enforce_min_alignment) { int ret; size_t usize; @@ -919,7 +921,7 @@ imemalign(void **memptr, size_t alignment, size_t size) /* Make sure that alignment is a large enough power of 2. */ if (((alignment - 1) & alignment) != 0 - || alignment < sizeof(void *)) { + || (enforce_min_alignment && alignment < sizeof(void *))) { if (config_xmalloc && opt_xmalloc) { malloc_write(": Error in " "posix_memalign(): invalid alignment\n"); @@ -991,7 +993,7 @@ int JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size) { - return imemalign(memptr, alignment, size); + return imemalign(memptr, alignment, size, true); } JEMALLOC_ATTR(malloc) @@ -1249,7 +1251,7 @@ JEMALLOC_P(memalign)(size_t alignment, size_t size) = NULL #endif ; - imemalign(&ret, alignment, size); + imemalign(&ret, alignment, size, false); return (ret); } #endif @@ -1265,7 +1267,7 @@ JEMALLOC_P(valloc)(size_t size) = NULL #endif ; - imemalign(&ret, PAGE_SIZE, size); + imemalign(&ret, PAGE_SIZE, size, false); return (ret); } #endif From 4bb09830133ffa8b27a95bc3727558007722c152 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 29 Feb 2012 10:37:27 -0800 Subject: [PATCH 0062/3378] Use glibc allocator hooks. When jemalloc is used as a libc malloc replacement (i.e. not prefixed), some particular setups may end up inconsistently calling malloc from libc and free from jemalloc, or the other way around. glibc provides hooks to make its functions use alternative implementations. Use them. Submitted by Karl Tomlinson and Mike Hommey. --- .../jemalloc/internal/jemalloc_internal.h.in | 4 ---- src/jemalloc.c | 24 +++++++++++++++++++ src/mutex.c | 4 ++++ 3 files changed, 28 insertions(+), 4 deletions(-) diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 35c05f4e..c21c218b 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -144,10 +144,6 @@ static const bool config_ivsalloc = #include #endif -#ifdef JEMALLOC_LAZY_LOCK -#include -#endif - #define RB_COMPACT #include "jemalloc/internal/rb.h" #include "jemalloc/internal/qr.h" diff --git a/src/jemalloc.c b/src/jemalloc.c index 535efaa1..ccc3a209 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1272,6 +1272,30 @@ JEMALLOC_P(valloc)(size_t size) } #endif +#if defined(__GLIBC__) && !defined(__UCLIBC__) +/* + * glibc provides the RTLD_DEEPBIND flag for dlopen which can make it possible + * to inconsistently reference libc's malloc(3)-compatible functions + * (https://bugzilla.mozilla.org/show_bug.cgi?id=493541). + * + * These definitions interpose hooks in glibc.  The functions are actually + * passed an extra argument for the caller return address, which will be + * ignored. + */ +JEMALLOC_ATTR(visibility("default")) +void (* const __free_hook)(void *ptr) = JEMALLOC_P(free); + +JEMALLOC_ATTR(visibility("default")) +void *(* const __malloc_hook)(size_t size) = JEMALLOC_P(malloc); + +JEMALLOC_ATTR(visibility("default")) +void *(* const __realloc_hook)(void *ptr, size_t size) = JEMALLOC_P(realloc); + +JEMALLOC_ATTR(visibility("default")) +void *(* const __memalign_hook)(size_t alignment, size_t size) = + JEMALLOC_P(memalign); +#endif + #endif /* JEMALLOC_PREFIX */ /* * End non-standard override functions. diff --git a/src/mutex.c b/src/mutex.c index ca89ef1c..0e09060e 100644 --- a/src/mutex.c +++ b/src/mutex.c @@ -1,6 +1,10 @@ #define JEMALLOC_MUTEX_C_ #include "jemalloc/internal/jemalloc_internal.h" +#ifdef JEMALLOC_LAZY_LOCK +#include +#endif + /******************************************************************************/ /* Data. */ From 7e15dab94d3f008b0a6c296ad7afec9ed47ff1ac Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 29 Feb 2012 12:56:37 -0800 Subject: [PATCH 0063/3378] Add nallocm(). Add nallocm(), which computes the real allocation size that would result from the corresponding allocm() call. nallocm() is a functional superset of OS X's malloc_good_size(), in that it takes alignment constraints into account. --- doc/jemalloc.xml.in | 38 ++++++-- .../jemalloc/internal/jemalloc_internal.h.in | 4 - include/jemalloc/jemalloc.h.in | 1 + src/jemalloc.c | 22 +++++ test/allocm.c | 91 ++++++++++++++++--- 5 files changed, 131 insertions(+), 25 deletions(-) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index e7cc6284..6aa412a1 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -41,6 +41,7 @@ rallocm sallocm dallocm + nallocm --> general purpose memory allocation functions @@ -154,6 +155,12 @@ void *ptr int flags + + int nallocm + size_t *rsize + size_t size + int flags + @@ -301,8 +308,9 @@ for (i = 0; i < nbins; i++) { The allocm, rallocm, - sallocm, and - dallocm functions all have a + sallocm, + dallocm, and + nallocm functions all have a flags argument that can be used to specify options. The functions only check the options that are contextually relevant. Use bitwise or (|) operations to @@ -351,7 +359,9 @@ for (i = 0; i < nbins; i++) { least size bytes of memory, sets *ptr to the base address of the allocation, and sets *rsize to the real size of the allocation if - rsize is not NULL. + rsize is not NULL. Behavior + is undefined if size is + 0. The rallocm function resizes the allocation at *ptr to be at least @@ -364,7 +374,8 @@ for (i = 0; i < nbins; i++) { language="C">size + extra) bytes, though inability to allocate the extra byte(s) will not by itself result in failure. Behavior is - undefined if (size + + undefined if size is 0, or if + (size + extra > SIZE_T_MAX). @@ -374,6 +385,15 @@ for (i = 0; i < nbins; i++) { The dallocm function causes the memory referenced by ptr to be made available for future allocations. + + The nallocm function allocates no + memory, but it performs the same size computation as the + allocm function, and if + rsize is not NULL it sets + *rsize to the real size of the allocation that + would result from the equivalent allocm + function call. Behavior is undefined if + size is 0. @@ -1857,11 +1877,13 @@ malloc_conf = "xmalloc:true";]]> Experimental API The allocm, rallocm, - sallocm, and - dallocm functions return + sallocm, + dallocm, and + nallocm functions return ALLOCM_SUCCESS on success; otherwise they return an - error value. The allocm and - rallocm functions will fail if: + error value. The allocm, + rallocm, and + nallocm functions will fail if: ALLOCM_ERR_OOM diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index c21c218b..aa073467 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -548,10 +548,6 @@ sa2u(size_t size, size_t alignment, size_t *run_size_p) * 96 | 1100000 | 32 * 144 | 10100000 | 32 * 192 | 11000000 | 64 - * - * Depending on runtime settings, it is possible that arena_malloc() - * will further round up to a power of two, but that never causes - * correctness issues. */ usize = (size + (alignment - 1)) & (-alignment); /* diff --git a/include/jemalloc/jemalloc.h.in b/include/jemalloc/jemalloc.h.in index 580a5ec5..428c0d39 100644 --- a/include/jemalloc/jemalloc.h.in +++ b/include/jemalloc/jemalloc.h.in @@ -59,6 +59,7 @@ int JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, int JEMALLOC_P(sallocm)(const void *ptr, size_t *rsize, int flags) JEMALLOC_ATTR(nonnull(1)); int JEMALLOC_P(dallocm)(void *ptr, int flags) JEMALLOC_ATTR(nonnull(1)); +int JEMALLOC_P(nallocm)(size_t *rsize, size_t size, int flags); #ifdef __cplusplus }; diff --git a/src/jemalloc.c b/src/jemalloc.c index ccc3a209..34fd1aa0 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1586,6 +1586,28 @@ JEMALLOC_P(dallocm)(void *ptr, int flags) return (ALLOCM_SUCCESS); } +JEMALLOC_ATTR(visibility("default")) +int +JEMALLOC_P(nallocm)(size_t *rsize, size_t size, int flags) +{ + size_t usize; + size_t alignment = (ZU(1) << (flags & ALLOCM_LG_ALIGN_MASK) + & (SIZE_T_MAX-1)); + + assert(size != 0); + + if (malloc_init()) + return (ALLOCM_ERR_OOM); + + usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment, NULL); + if (usize == 0) + return (ALLOCM_ERR_OOM); + + if (rsize != NULL) + *rsize = usize; + return (ALLOCM_SUCCESS); +} + /* * End non-standard functions. */ diff --git a/test/allocm.c b/test/allocm.c index 59d0002e..762e350c 100644 --- a/test/allocm.c +++ b/test/allocm.c @@ -15,24 +15,33 @@ main(void) { int r; void *p; - size_t sz, alignment, total, tsz; + size_t nsz, rsz, sz, alignment, total; unsigned i; void *ps[NITER]; fprintf(stderr, "Test begin\n"); - sz = 0; - r = JEMALLOC_P(allocm)(&p, &sz, 42, 0); + sz = 42; + nsz = 0; + r = JEMALLOC_P(nallocm)(&nsz, sz, 0); + if (r != ALLOCM_SUCCESS) { + fprintf(stderr, "Unexpected nallocm() error\n"); + abort(); + } + rsz = 0; + r = JEMALLOC_P(allocm)(&p, &rsz, sz, 0); if (r != ALLOCM_SUCCESS) { fprintf(stderr, "Unexpected allocm() error\n"); abort(); } - if (sz < 42) + if (rsz < sz) fprintf(stderr, "Real size smaller than expected\n"); + if (nsz != rsz) + fprintf(stderr, "nallocm()/allocm() rsize mismatch\n"); if (JEMALLOC_P(dallocm)(p, 0) != ALLOCM_SUCCESS) fprintf(stderr, "Unexpected dallocm() error\n"); - r = JEMALLOC_P(allocm)(&p, NULL, 42, 0); + r = JEMALLOC_P(allocm)(&p, NULL, sz, 0); if (r != ALLOCM_SUCCESS) { fprintf(stderr, "Unexpected allocm() error\n"); abort(); @@ -40,11 +49,20 @@ main(void) if (JEMALLOC_P(dallocm)(p, 0) != ALLOCM_SUCCESS) fprintf(stderr, "Unexpected dallocm() error\n"); - r = JEMALLOC_P(allocm)(&p, NULL, 42, ALLOCM_ZERO); + nsz = 0; + r = JEMALLOC_P(nallocm)(&nsz, sz, ALLOCM_ZERO); + if (r != ALLOCM_SUCCESS) { + fprintf(stderr, "Unexpected nallocm() error\n"); + abort(); + } + rsz = 0; + r = JEMALLOC_P(allocm)(&p, &rsz, sz, ALLOCM_ZERO); if (r != ALLOCM_SUCCESS) { fprintf(stderr, "Unexpected allocm() error\n"); abort(); } + if (nsz != rsz) + fprintf(stderr, "nallocm()/allocm() rsize mismatch\n"); if (JEMALLOC_P(dallocm)(p, 0) != ALLOCM_SUCCESS) fprintf(stderr, "Unexpected dallocm() error\n"); @@ -55,12 +73,22 @@ main(void) alignment = 0x80000000LU; sz = 0x80000000LU; #endif - r = JEMALLOC_P(allocm)(&p, NULL, sz, ALLOCM_ALIGN(alignment)); + nsz = 0; + r = JEMALLOC_P(nallocm)(&nsz, sz, ALLOCM_ALIGN(alignment)); + if (r == ALLOCM_SUCCESS) { + fprintf(stderr, + "Expected error for nallocm(&nsz, %zu, 0x%x)\n", + sz, ALLOCM_ALIGN(alignment)); + } + rsz = 0; + r = JEMALLOC_P(allocm)(&p, &rsz, sz, ALLOCM_ALIGN(alignment)); if (r == ALLOCM_SUCCESS) { fprintf(stderr, "Expected error for allocm(&p, %zu, 0x%x)\n", sz, ALLOCM_ALIGN(alignment)); } + if (nsz != rsz) + fprintf(stderr, "nallocm()/allocm() rsize mismatch\n"); #if LG_SIZEOF_PTR == 3 alignment = 0x4000000000000000LLU; @@ -69,7 +97,12 @@ main(void) alignment = 0x40000000LU; sz = 0x84000001LU; #endif - r = JEMALLOC_P(allocm)(&p, NULL, sz, ALLOCM_ALIGN(alignment)); + nsz = 0; + r = JEMALLOC_P(nallocm)(&nsz, sz, ALLOCM_ALIGN(alignment)); + if (r != ALLOCM_SUCCESS) + fprintf(stderr, "Unexpected nallocm() error\n"); + rsz = 0; + r = JEMALLOC_P(allocm)(&p, &rsz, sz, ALLOCM_ALIGN(alignment)); if (r == ALLOCM_SUCCESS) { fprintf(stderr, "Expected error for allocm(&p, %zu, 0x%x)\n", @@ -82,12 +115,22 @@ main(void) #else sz = 0xfffffff0LU; #endif - r = JEMALLOC_P(allocm)(&p, NULL, sz, ALLOCM_ALIGN(alignment)); + nsz = 0; + r = JEMALLOC_P(nallocm)(&nsz, sz, ALLOCM_ALIGN(alignment)); + if (r == ALLOCM_SUCCESS) { + fprintf(stderr, + "Expected error for nallocm(&nsz, %zu, 0x%x)\n", + sz, ALLOCM_ALIGN(alignment)); + } + rsz = 0; + r = JEMALLOC_P(allocm)(&p, &rsz, sz, ALLOCM_ALIGN(alignment)); if (r == ALLOCM_SUCCESS) { fprintf(stderr, "Expected error for allocm(&p, %zu, 0x%x)\n", sz, ALLOCM_ALIGN(alignment)); } + if (nsz != rsz) + fprintf(stderr, "nallocm()/allocm() rsize mismatch\n"); for (i = 0; i < NITER; i++) ps[i] = NULL; @@ -101,21 +144,43 @@ main(void) sz < 3 * alignment && sz < (1U << 31); sz += (alignment >> (LG_SIZEOF_PTR-1)) - 1) { for (i = 0; i < NITER; i++) { - r = JEMALLOC_P(allocm)(&ps[i], NULL, sz, + nsz = 0; + r = JEMALLOC_P(nallocm)(&nsz, sz, ALLOCM_ALIGN(alignment) | ALLOCM_ZERO); if (r != ALLOCM_SUCCESS) { fprintf(stderr, - "Error for size %zu (0x%zx): %d\n", + "nallocm() error for size %zu" + " (0x%zx): %d\n", sz, sz, r); exit(1); } + rsz = 0; + r = JEMALLOC_P(allocm)(&ps[i], &rsz, sz, + ALLOCM_ALIGN(alignment) | ALLOCM_ZERO); + if (r != ALLOCM_SUCCESS) { + fprintf(stderr, + "allocm() error for size %zu" + " (0x%zx): %d\n", + sz, sz, r); + exit(1); + } + if (rsz < sz) { + fprintf(stderr, + "Real size smaller than" + " expected\n"); + } + if (nsz != rsz) { + fprintf(stderr, + "nallocm()/allocm() rsize" + " mismatch\n"); + } if ((uintptr_t)p & (alignment-1)) { fprintf(stderr, "%p inadequately aligned for" " alignment: %zu\n", p, alignment); } - JEMALLOC_P(sallocm)(ps[i], &tsz, 0); - total += tsz; + JEMALLOC_P(sallocm)(ps[i], &rsz, 0); + total += rsz; if (total >= (MAXALIGN << 1)) break; } From 166a745b395198c2b0d661caa717e6a9400291c6 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 29 Feb 2012 12:58:39 -0800 Subject: [PATCH 0064/3378] Simplify zone_good_size(). Simplify zone_good_size() to avoid memory allocation. Submitted by Mike Hommey. --- src/zone.c | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/src/zone.c b/src/zone.c index 2c1b2318..07f88617 100644 --- a/src/zone.c +++ b/src/zone.c @@ -133,22 +133,10 @@ zone_destroy(malloc_zone_t *zone) static size_t zone_good_size(malloc_zone_t *zone, size_t size) { - size_t ret; - void *p; - /* - * Actually create an object of the appropriate size, then find out - * how large it could have been without moving up to the next size - * class. - */ - p = JEMALLOC_P(malloc)(size); - if (p != NULL) { - ret = isalloc(p); - JEMALLOC_P(free)(p); - } else - ret = size; - - return (ret); + if (size == 0) + size = 1; + return (s2u(size)); } static void From 0a5489e37da88a1a50fbf8552e0d3a7f8fd93ffc Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 1 Mar 2012 17:19:20 -0800 Subject: [PATCH 0065/3378] Add --with-mangling. Add the --with-mangling configure option, which can be used to specify name mangling on a per public symbol basis that takes precedence over --with-jemalloc-prefix. Expose the memalign() and valloc() overrides even if --with-jemalloc-prefix is specified. This change does no real harm, and simplifies the code. --- INSTALL | 13 +++ configure.ac | 19 ++- include/jemalloc/internal/ctl.h | 6 +- .../jemalloc/internal/jemalloc_internal.h.in | 8 +- include/jemalloc/jemalloc.h.in | 110 ++++++++++++++---- include/jemalloc/jemalloc_defs.h.in | 45 ++++--- src/jemalloc.c | 65 +++++------ src/stats.c | 35 +++--- src/zone.c | 26 ++--- test/allocated.c | 26 ++--- test/allocm.c | 36 +++--- test/mremap.c | 3 +- test/posix_memalign.c | 16 +-- test/rallocm.c | 20 ++-- test/thread_arena.c | 13 +-- 15 files changed, 265 insertions(+), 176 deletions(-) diff --git a/INSTALL b/INSTALL index 4d3e1af9..6e32d715 100644 --- a/INSTALL +++ b/INSTALL @@ -26,6 +26,19 @@ any of the following arguments (not a definitive list) to 'configure': Embed one or more library paths, so that libjemalloc can find the libraries it is linked to. This works only on ELF-based systems. +--with-mangling= + Mangle public symbols specified in which is a comma-separated list of + name:mangled pairs. + + For example, to use ld's --wrap option as an alternative method for + overriding libc's malloc implementation, specify something like: + + --with-mangling=malloc:__wrap_malloc,free:__wrap_free[...] + + Note that mangling happens prior to application of the prefix specified by + --with-jemalloc-prefix, and mangled symbols are then ignored when applying + the prefix. + --with-jemalloc-prefix= Prefix all public APIs with . For example, if is "prefix_", API changes like the following occur: diff --git a/configure.ac b/configure.ac index b503f646..81ab233a 100644 --- a/configure.ac +++ b/configure.ac @@ -303,6 +303,16 @@ AC_PATH_PROG([AR], [ar], , [$PATH]) AC_PATH_PROG([LD], [ld], , [$PATH]) AC_PATH_PROG([AUTOCONF], [autoconf], , [$PATH]) +dnl Perform no name mangling by default. +AC_ARG_WITH([mangling], + [AS_HELP_STRING([--with-mangling=], [Mangle symbols in ])], + [mangling_map="$with_mangling"], [mangling_map=""]) +for nm in `echo ${mangling_map} |tr ',' ' '` ; do + n="je_`echo ${nm} |tr ':' ' ' |awk '{print $1}'`" + m=`echo ${nm} |tr ':' ' ' |awk '{print $2}'` + AC_DEFINE_UNQUOTED([${n}], [${m}]) +done + dnl Do not prefix public APIs by default. AC_ARG_WITH([jemalloc_prefix], [AS_HELP_STRING([--with-jemalloc-prefix=], [Prefix to prepend to all public APIs])], @@ -317,8 +327,15 @@ if test "x$JEMALLOC_PREFIX" != "x" ; then JEMALLOC_CPREFIX=`echo ${JEMALLOC_PREFIX} | tr "a-z" "A-Z"` AC_DEFINE_UNQUOTED([JEMALLOC_PREFIX], ["$JEMALLOC_PREFIX"]) AC_DEFINE_UNQUOTED([JEMALLOC_CPREFIX], ["$JEMALLOC_CPREFIX"]) - AC_DEFINE_UNQUOTED([JEMALLOC_P(string_that_no_one_should_want_to_use_as_a_jemalloc_API_prefix)], [${JEMALLOC_PREFIX}##string_that_no_one_should_want_to_use_as_a_jemalloc_API_prefix]) fi +dnl Generate macros to rename public symbols. All public symbols are prefixed +dnl with je_ in the source code, so these macro definitions are needed even if +dnl --with-jemalloc-prefix wasn't specified. +for stem in malloc_conf malloc_message malloc calloc posix_memalign realloc free malloc_usable_size malloc_stats_print mallctl mallctlnametomib mallctlbymib memalign valloc allocm dallocm nallocm rallocm sallocm; do + n="je_${stem}" + m="${JEMALLOC_PREFIX}${stem}" + AC_DEFINE_UNQUOTED([${n}], [${m}]) +done dnl Do not mangle library-private APIs by default. AC_ARG_WITH([private_namespace], diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h index 28be2aef..8f72f7fa 100644 --- a/include/jemalloc/internal/ctl.h +++ b/include/jemalloc/internal/ctl.h @@ -74,7 +74,7 @@ int ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, bool ctl_boot(void); #define xmallctl(name, oldp, oldlenp, newp, newlen) do { \ - if (JEMALLOC_P(mallctl)(name, oldp, oldlenp, newp, newlen) \ + if (je_mallctl(name, oldp, oldlenp, newp, newlen) \ != 0) { \ malloc_write(": Failure in xmallctl(\""); \ malloc_write(name); \ @@ -84,7 +84,7 @@ bool ctl_boot(void); } while (0) #define xmallctlnametomib(name, mibp, miblenp) do { \ - if (JEMALLOC_P(mallctlnametomib)(name, mibp, miblenp) != 0) { \ + if (je_mallctlnametomib(name, mibp, miblenp) != 0) { \ malloc_write( \ ": Failure in xmallctlnametomib(\""); \ malloc_write(name); \ @@ -94,7 +94,7 @@ bool ctl_boot(void); } while (0) #define xmallctlbymib(mib, miblen, oldp, oldlenp, newp, newlen) do { \ - if (JEMALLOC_P(mallctlbymib)(mib, miblen, oldp, oldlenp, newp, \ + if (je_mallctlbymib(mib, miblen, oldp, oldlenp, newp, \ newlen) != 0) { \ malloc_write( \ ": Failure in xmallctlbymib()\n"); \ diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index aa073467..f13b406e 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -30,7 +30,7 @@ #include #include -#define JEMALLOC_MANGLE +#define JEMALLOC_NO_DEMANGLE #include "../jemalloc@install_suffix@.h" #include "jemalloc/internal/private_namespace.h" @@ -149,7 +149,7 @@ static const bool config_ivsalloc = #include "jemalloc/internal/qr.h" #include "jemalloc/internal/ql.h" -extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); +extern void (*je_malloc_message)(void *wcbopaque, const char *s); /* * Define a custom assert() in order to reduce the chances of deadlock during @@ -618,13 +618,13 @@ sa2u(size_t size, size_t alignment, size_t *run_size_p) /* * Wrapper around malloc_message() that avoids the need for - * JEMALLOC_P(malloc_message)(...) throughout the code. + * je_malloc_message(...) throughout the code. */ JEMALLOC_INLINE void malloc_write(const char *s) { - JEMALLOC_P(malloc_message)(NULL, s); + je_malloc_message(NULL, s); } /* diff --git a/include/jemalloc/jemalloc.h.in b/include/jemalloc/jemalloc.h.in index 428c0d39..b9301175 100644 --- a/include/jemalloc/jemalloc.h.in +++ b/include/jemalloc/jemalloc.h.in @@ -15,9 +15,6 @@ extern "C" { #define JEMALLOC_VERSION_GID "@jemalloc_version_gid@" #include "jemalloc_defs@install_suffix@.h" -#ifndef JEMALLOC_P -# define JEMALLOC_P(s) s -#endif #define ALLOCM_LG_ALIGN(la) (la) #if LG_SIZEOF_PTR == 2 @@ -32,34 +29,99 @@ extern "C" { #define ALLOCM_ERR_OOM 1 #define ALLOCM_ERR_NOT_MOVED 2 -extern const char *JEMALLOC_P(malloc_conf); -extern void (*JEMALLOC_P(malloc_message))(void *, const char *); +/* + * The je_ prefix on the following public symbol declarations is an artifact of + * namespace management, and should be omitted in application code unless + * JEMALLOC_NO_DEMANGLE is defined (see below). + */ +extern const char *je_malloc_conf; +extern void (*je_malloc_message)(void *, const char *); -void *JEMALLOC_P(malloc)(size_t size) JEMALLOC_ATTR(malloc); -void *JEMALLOC_P(calloc)(size_t num, size_t size) JEMALLOC_ATTR(malloc); -int JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size) +void *je_malloc(size_t size) JEMALLOC_ATTR(malloc); +void *je_calloc(size_t num, size_t size) JEMALLOC_ATTR(malloc); +int je_posix_memalign(void **memptr, size_t alignment, size_t size) JEMALLOC_ATTR(nonnull(1)); -void *JEMALLOC_P(realloc)(void *ptr, size_t size); -void JEMALLOC_P(free)(void *ptr); +void *je_realloc(void *ptr, size_t size); +void je_free(void *ptr); -size_t JEMALLOC_P(malloc_usable_size)(const void *ptr); -void JEMALLOC_P(malloc_stats_print)(void (*write_cb)(void *, const char *), - void *cbopaque, const char *opts); -int JEMALLOC_P(mallctl)(const char *name, void *oldp, size_t *oldlenp, - void *newp, size_t newlen); -int JEMALLOC_P(mallctlnametomib)(const char *name, size_t *mibp, - size_t *miblenp); -int JEMALLOC_P(mallctlbymib)(const size_t *mib, size_t miblen, void *oldp, +size_t je_malloc_usable_size(const void *ptr); +void je_malloc_stats_print(void (*write_cb)(void *, const char *), + void *je_cbopaque, const char *opts); +int je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp, + size_t newlen); +int je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp); +int je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen); -int JEMALLOC_P(allocm)(void **ptr, size_t *rsize, size_t size, int flags) +int je_allocm(void **ptr, size_t *rsize, size_t size, int flags) JEMALLOC_ATTR(nonnull(1)); -int JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, - size_t extra, int flags) JEMALLOC_ATTR(nonnull(1)); -int JEMALLOC_P(sallocm)(const void *ptr, size_t *rsize, int flags) +int je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, + int flags) JEMALLOC_ATTR(nonnull(1)); +int je_sallocm(const void *ptr, size_t *rsize, int flags) JEMALLOC_ATTR(nonnull(1)); -int JEMALLOC_P(dallocm)(void *ptr, int flags) JEMALLOC_ATTR(nonnull(1)); -int JEMALLOC_P(nallocm)(size_t *rsize, size_t size, int flags); +int je_dallocm(void *ptr, int flags) JEMALLOC_ATTR(nonnull(1)); +int je_nallocm(size_t *rsize, size_t size, int flags); + +/* + * By default application code must explicitly refer to mangled symbol names, + * so that it is possible to use jemalloc in conjunction with another allocator + * in the same application. Define JEMALLOC_MANGLE in order to cause automatic + * name mangling that matches the API prefixing that happened as a result of + * --with-mangling and/or --with-jemalloc-prefix configuration settings. + */ +#ifdef JEMALLOC_MANGLE +#ifndef JEMALLOC_NO_DEMANGLE +#define JEMALLOC_NO_DEMANGLE +#endif +#define malloc_conf je_malloc_conf +#define malloc_message je_malloc_message +#define malloc je_malloc +#define calloc je_calloc +#define posix_memalign je_posix_memalign +#define realloc je_realloc +#define free je_free +#define malloc_usable_size je_malloc_usable_size +#define malloc_stats_print je_malloc_stats_print +#define mallctl je_mallctl +#define mallctlnametomib je_mallctlnametomib +#define mallctlbymib je_mallctlbymib +#define memalign je_memalign +#define valloc je_valloc +#define allocm je_allocm +#define dallocm je_dallocm +#define nallocm je_nallocm +#define rallocm je_rallocm +#define sallocm je_sallocm +#endif + +/* + * The je_* macros can be used as stable alternative names for the public + * jemalloc API if JEMALLOC_NO_DEMANGLE is defined. This is primarily meant + * for use in jemalloc itself, but it can be used by application code to + * provide isolation from the name mangling specified via --with-mangling + * and/or --with-jemalloc-prefix. + */ +#ifndef JEMALLOC_NO_DEMANGLE +#undef je_malloc_conf +#undef je_malloc_message +#undef je_malloc +#undef je_calloc +#undef je_posix_memalign +#undef je_realloc +#undef je_free +#undef je_malloc_usable_size +#undef je_malloc_stats_print +#undef je_mallctl +#undef je_mallctlnametomib +#undef je_mallctlbymib +#undef je_memalign +#undef je_valloc +#undef je_allocm +#undef je_dallocm +#undef je_nallocm +#undef je_rallocm +#undef je_sallocm +#endif #ifdef __cplusplus }; diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index 1360364a..3d1a8d30 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -1,22 +1,35 @@ -#ifndef JEMALLOC_DEFS_H_ -#define JEMALLOC_DEFS_H_ - /* - * If JEMALLOC_PREFIX is defined, it will cause all public APIs to be prefixed. - * This makes it possible, with some care, to use multiple allocators - * simultaneously. - * - * In many cases it is more convenient to manually prefix allocator function - * calls than to let macros do it automatically, particularly when using - * multiple allocators simultaneously. Define JEMALLOC_MANGLE before - * #include'ing jemalloc.h in order to cause name mangling that corresponds to - * the API prefixing. + * If JEMALLOC_PREFIX is defined via --with-jemalloc-prefix, it will cause all + * public APIs to be prefixed. This makes it possible, with some care, to use + * multiple allocators simultaneously. */ #undef JEMALLOC_PREFIX #undef JEMALLOC_CPREFIX -#if (defined(JEMALLOC_PREFIX) && defined(JEMALLOC_MANGLE)) -#undef JEMALLOC_P -#endif + +/* + * Name mangling for public symbols is controlled by --with-mangling and + * --with-jemalloc-prefix. With default settings the je_ prefix is stripped by + * these macro definitions. + */ +#undef je_malloc_conf +#undef je_malloc_message +#undef je_malloc +#undef je_calloc +#undef je_posix_memalign +#undef je_realloc +#undef je_free +#undef je_malloc_usable_size +#undef je_malloc_stats_print +#undef je_mallctl +#undef je_mallctlnametomib +#undef je_mallctlbymib +#undef je_memalign +#undef je_valloc +#undef je_allocm +#undef je_dallocm +#undef je_nallocm +#undef je_rallocm +#undef je_sallocm /* * JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs. @@ -150,5 +163,3 @@ /* sizeof(long) == 2^LG_SIZEOF_LONG. */ #undef LG_SIZEOF_LONG - -#endif /* JEMALLOC_DEFS_H_ */ diff --git a/src/jemalloc.c b/src/jemalloc.c index 34fd1aa0..6e34706d 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -36,7 +36,7 @@ size_t lg_pagesize; unsigned ncpus; /* Runtime configuration options. */ -const char *JEMALLOC_P(malloc_conf) JEMALLOC_ATTR(visibility("default")); +const char *je_malloc_conf JEMALLOC_ATTR(visibility("default")); #ifdef JEMALLOC_DEBUG bool opt_abort = true; # ifdef JEMALLOC_FILL @@ -81,7 +81,7 @@ wrtmessage(void *cbopaque, const char *s) UNUSED int result = write(STDERR_FILENO, s, strlen(s)); } -void (*JEMALLOC_P(malloc_message))(void *, const char *s) +void (*je_malloc_message)(void *, const char *s) JEMALLOC_ATTR(visibility("default")) = wrtmessage; /******************************************************************************/ @@ -230,7 +230,7 @@ stats_print_atexit(void) } } } - JEMALLOC_P(malloc_stats_print)(NULL, NULL, NULL); + je_malloc_stats_print(NULL, NULL, NULL); } thread_allocated_t * @@ -422,12 +422,12 @@ malloc_conf_init(void) /* Get runtime configuration. */ switch (i) { case 0: - if (JEMALLOC_P(malloc_conf) != NULL) { + if (je_malloc_conf != NULL) { /* * Use options that were compiled into the * program. */ - opts = JEMALLOC_P(malloc_conf); + opts = je_malloc_conf; } else { /* No configuration specified. */ buf[0] = '\0'; @@ -836,7 +836,7 @@ jemalloc_darwin_init(void) JEMALLOC_ATTR(malloc) JEMALLOC_ATTR(visibility("default")) void * -JEMALLOC_P(malloc)(size_t size) +je_malloc(size_t size) { void *ret; size_t usize; @@ -990,7 +990,7 @@ RETURN: JEMALLOC_ATTR(nonnull(1)) JEMALLOC_ATTR(visibility("default")) int -JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size) +je_posix_memalign(void **memptr, size_t alignment, size_t size) { return imemalign(memptr, alignment, size, true); @@ -999,7 +999,7 @@ JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size) JEMALLOC_ATTR(malloc) JEMALLOC_ATTR(visibility("default")) void * -JEMALLOC_P(calloc)(size_t num, size_t size) +je_calloc(size_t num, size_t size) { void *ret; size_t num_size; @@ -1077,7 +1077,7 @@ RETURN: JEMALLOC_ATTR(visibility("default")) void * -JEMALLOC_P(realloc)(void *ptr, size_t size) +je_realloc(void *ptr, size_t size) { void *ret; size_t usize; @@ -1207,7 +1207,7 @@ RETURN: JEMALLOC_ATTR(visibility("default")) void -JEMALLOC_P(free)(void *ptr) +je_free(void *ptr) { if (ptr != NULL) { @@ -1234,17 +1234,13 @@ JEMALLOC_P(free)(void *ptr) /******************************************************************************/ /* * Begin non-standard override functions. - * - * These overrides are omitted if the JEMALLOC_PREFIX is defined, since the - * entire point is to avoid accidental mixed allocator usage. */ -#ifndef JEMALLOC_PREFIX #ifdef JEMALLOC_OVERRIDE_MEMALIGN JEMALLOC_ATTR(malloc) JEMALLOC_ATTR(visibility("default")) void * -JEMALLOC_P(memalign)(size_t alignment, size_t size) +je_memalign(size_t alignment, size_t size) { void *ret #ifdef JEMALLOC_CC_SILENCE @@ -1260,7 +1256,7 @@ JEMALLOC_P(memalign)(size_t alignment, size_t size) JEMALLOC_ATTR(malloc) JEMALLOC_ATTR(visibility("default")) void * -JEMALLOC_P(valloc)(size_t size) +je_valloc(size_t size) { void *ret #ifdef JEMALLOC_CC_SILENCE @@ -1272,7 +1268,7 @@ JEMALLOC_P(valloc)(size_t size) } #endif -#if defined(__GLIBC__) && !defined(__UCLIBC__) +#if (!defined(JEMALLOC_PREFIX) && defined(__GLIBC__) && !defined(__UCLIBC__)) /* * glibc provides the RTLD_DEEPBIND flag for dlopen which can make it possible * to inconsistently reference libc's malloc(3)-compatible functions @@ -1283,20 +1279,18 @@ JEMALLOC_P(valloc)(size_t size) * ignored. */ JEMALLOC_ATTR(visibility("default")) -void (* const __free_hook)(void *ptr) = JEMALLOC_P(free); +void (* const __free_hook)(void *ptr) = je_free; JEMALLOC_ATTR(visibility("default")) -void *(* const __malloc_hook)(size_t size) = JEMALLOC_P(malloc); +void *(* const __malloc_hook)(size_t size) = je_malloc; JEMALLOC_ATTR(visibility("default")) -void *(* const __realloc_hook)(void *ptr, size_t size) = JEMALLOC_P(realloc); +void *(* const __realloc_hook)(void *ptr, size_t size) = je_realloc; JEMALLOC_ATTR(visibility("default")) -void *(* const __memalign_hook)(size_t alignment, size_t size) = - JEMALLOC_P(memalign); +void *(* const __memalign_hook)(size_t alignment, size_t size) = je_memalign; #endif -#endif /* JEMALLOC_PREFIX */ /* * End non-standard override functions. */ @@ -1307,7 +1301,7 @@ void *(* const __memalign_hook)(size_t alignment, size_t size) = JEMALLOC_ATTR(visibility("default")) size_t -JEMALLOC_P(malloc_usable_size)(const void *ptr) +je_malloc_usable_size(const void *ptr) { size_t ret; @@ -1325,8 +1319,8 @@ JEMALLOC_P(malloc_usable_size)(const void *ptr) JEMALLOC_ATTR(visibility("default")) void -JEMALLOC_P(malloc_stats_print)(void (*write_cb)(void *, const char *), - void *cbopaque, const char *opts) +je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque, + const char *opts) { stats_print(write_cb, cbopaque, opts); @@ -1334,7 +1328,7 @@ JEMALLOC_P(malloc_stats_print)(void (*write_cb)(void *, const char *), JEMALLOC_ATTR(visibility("default")) int -JEMALLOC_P(mallctl)(const char *name, void *oldp, size_t *oldlenp, void *newp, +je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { @@ -1346,7 +1340,7 @@ JEMALLOC_P(mallctl)(const char *name, void *oldp, size_t *oldlenp, void *newp, JEMALLOC_ATTR(visibility("default")) int -JEMALLOC_P(mallctlnametomib)(const char *name, size_t *mibp, size_t *miblenp) +je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp) { if (malloc_init()) @@ -1357,8 +1351,8 @@ JEMALLOC_P(mallctlnametomib)(const char *name, size_t *mibp, size_t *miblenp) JEMALLOC_ATTR(visibility("default")) int -JEMALLOC_P(mallctlbymib)(const size_t *mib, size_t miblen, void *oldp, - size_t *oldlenp, void *newp, size_t newlen) +je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) { if (malloc_init()) @@ -1385,7 +1379,7 @@ iallocm(size_t usize, size_t alignment, bool zero) JEMALLOC_ATTR(nonnull(1)) JEMALLOC_ATTR(visibility("default")) int -JEMALLOC_P(allocm)(void **ptr, size_t *rsize, size_t size, int flags) +je_allocm(void **ptr, size_t *rsize, size_t size, int flags) { void *p; size_t usize; @@ -1451,8 +1445,7 @@ OOM: JEMALLOC_ATTR(nonnull(1)) JEMALLOC_ATTR(visibility("default")) int -JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, size_t extra, - int flags) +je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) { void *p, *q; size_t usize; @@ -1544,7 +1537,7 @@ OOM: JEMALLOC_ATTR(nonnull(1)) JEMALLOC_ATTR(visibility("default")) int -JEMALLOC_P(sallocm)(const void *ptr, size_t *rsize, int flags) +je_sallocm(const void *ptr, size_t *rsize, int flags) { size_t sz; @@ -1565,7 +1558,7 @@ JEMALLOC_P(sallocm)(const void *ptr, size_t *rsize, int flags) JEMALLOC_ATTR(nonnull(1)) JEMALLOC_ATTR(visibility("default")) int -JEMALLOC_P(dallocm)(void *ptr, int flags) +je_dallocm(void *ptr, int flags) { size_t usize; @@ -1588,7 +1581,7 @@ JEMALLOC_P(dallocm)(void *ptr, int flags) JEMALLOC_ATTR(visibility("default")) int -JEMALLOC_P(nallocm)(size_t *rsize, size_t size, int flags) +je_nallocm(size_t *rsize, size_t size, int flags) { size_t usize; size_t alignment = (ZU(1) << (flags & ALLOCM_LG_ALIGN_MASK) diff --git a/src/stats.c b/src/stats.c index 2f61e7bc..f905d048 100644 --- a/src/stats.c +++ b/src/stats.c @@ -108,7 +108,7 @@ malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque, * function, so use the default one. malloc_write() is an * inline function, so use malloc_message() directly here. */ - write_cb = JEMALLOC_P(malloc_message); + write_cb = je_malloc_message; cbopaque = NULL; } @@ -376,8 +376,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, * */ epoch = 1; u64sz = sizeof(uint64_t); - err = JEMALLOC_P(mallctl)("epoch", &epoch, &u64sz, &epoch, - sizeof(uint64_t)); + err = je_mallctl("epoch", &epoch, &u64sz, &epoch, sizeof(uint64_t)); if (err != 0) { if (err == EAGAIN) { malloc_write(": Memory allocation failure in " @@ -395,7 +394,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, * function, so use the default one. malloc_write() is an * inline function, so use malloc_message() directly here. */ - write_cb = JEMALLOC_P(malloc_message); + write_cb = je_malloc_message; cbopaque = NULL; } @@ -448,22 +447,22 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, write_cb(cbopaque, "\n"); #define OPT_WRITE_BOOL(n) \ - if ((err = JEMALLOC_P(mallctl)("opt."#n, &bv, &bsz, \ - NULL, 0)) == 0) { \ + if ((err = je_mallctl("opt."#n, &bv, &bsz, NULL, 0)) \ + == 0) { \ write_cb(cbopaque, " opt."#n": "); \ write_cb(cbopaque, bv ? "true" : "false"); \ write_cb(cbopaque, "\n"); \ } #define OPT_WRITE_SIZE_T(n) \ - if ((err = JEMALLOC_P(mallctl)("opt."#n, &sv, &ssz, \ - NULL, 0)) == 0) { \ + if ((err = je_mallctl("opt."#n, &sv, &ssz, NULL, 0)) \ + == 0) { \ write_cb(cbopaque, " opt."#n": "); \ write_cb(cbopaque, u2s(sv, 10, s)); \ write_cb(cbopaque, "\n"); \ } #define OPT_WRITE_SSIZE_T(n) \ - if ((err = JEMALLOC_P(mallctl)("opt."#n, &ssv, &sssz, \ - NULL, 0)) == 0) { \ + if ((err = je_mallctl("opt."#n, &ssv, &sssz, NULL, 0)) \ + == 0) { \ if (ssv >= 0) { \ write_cb(cbopaque, " opt."#n": "); \ write_cb(cbopaque, u2s(ssv, 10, s)); \ @@ -474,8 +473,8 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, write_cb(cbopaque, "\n"); \ } #define OPT_WRITE_CHAR_P(n) \ - if ((err = JEMALLOC_P(mallctl)("opt."#n, &cpv, &cpsz, \ - NULL, 0)) == 0) { \ + if ((err = je_mallctl("opt."#n, &cpv, &cpsz, NULL, 0)) \ + == 0) { \ write_cb(cbopaque, " opt."#n": \""); \ write_cb(cbopaque, cpv); \ write_cb(cbopaque, "\"\n"); \ @@ -535,15 +534,15 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, write_cb(cbopaque, "Min active:dirty page ratio per arena: N/A\n"); } - if ((err = JEMALLOC_P(mallctl)("arenas.tcache_max", &sv, - &ssz, NULL, 0)) == 0) { + if ((err = je_mallctl("arenas.tcache_max", &sv, &ssz, NULL, 0)) + == 0) { write_cb(cbopaque, "Maximum thread-cached size class: "); write_cb(cbopaque, u2s(sv, 10, s)); write_cb(cbopaque, "\n"); } - if ((err = JEMALLOC_P(mallctl)("opt.lg_tcache_gc_sweep", &ssv, - &ssz, NULL, 0)) == 0) { + if ((err = je_mallctl("opt.lg_tcache_gc_sweep", &ssv, &ssz, + NULL, 0)) == 0) { size_t tcache_gc_sweep = (1U << ssv); bool tcache_enabled; CTL_GET("opt.tcache", &tcache_enabled, bool); @@ -552,8 +551,8 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, u2s(tcache_gc_sweep, 10, s) : "N/A"); write_cb(cbopaque, "\n"); } - if ((err = JEMALLOC_P(mallctl)("opt.prof", &bv, &bsz, NULL, 0)) - == 0 && bv) { + if ((err = je_mallctl("opt.prof", &bv, &bsz, NULL, 0)) == 0 && + bv) { CTL_GET("opt.lg_prof_sample", &sv, size_t); write_cb(cbopaque, "Average profile sample interval: "); write_cb(cbopaque, u2s((((uint64_t)1U) << sv), 10, s)); diff --git a/src/zone.c b/src/zone.c index 07f88617..5beed5f3 100644 --- a/src/zone.c +++ b/src/zone.c @@ -67,14 +67,14 @@ static void * zone_malloc(malloc_zone_t *zone, size_t size) { - return (JEMALLOC_P(malloc)(size)); + return (je_malloc(size)); } static void * zone_calloc(malloc_zone_t *zone, size_t num, size_t size) { - return (JEMALLOC_P(calloc)(num, size)); + return (je_calloc(num, size)); } static void * @@ -82,7 +82,7 @@ zone_valloc(malloc_zone_t *zone, size_t size) { void *ret = NULL; /* Assignment avoids useless compiler warning. */ - JEMALLOC_P(posix_memalign)(&ret, PAGE_SIZE, size); + je_posix_memalign(&ret, PAGE_SIZE, size); return (ret); } @@ -91,14 +91,14 @@ static void zone_free(malloc_zone_t *zone, void *ptr) { - JEMALLOC_P(free)(ptr); + je_free(ptr); } static void * zone_realloc(malloc_zone_t *zone, void *ptr, size_t size) { - return (JEMALLOC_P(realloc)(ptr, size)); + return (je_realloc(ptr, size)); } #if (JEMALLOC_ZONE_VERSION >= 6) @@ -107,7 +107,7 @@ zone_memalign(malloc_zone_t *zone, size_t alignment, size_t size) { void *ret = NULL; /* Assignment avoids useless compiler warning. */ - JEMALLOC_P(posix_memalign)(&ret, alignment, size); + je_posix_memalign(&ret, alignment, size); return (ret); } @@ -117,7 +117,7 @@ zone_free_definite_size(malloc_zone_t *zone, void *ptr, size_t size) { assert(ivsalloc(ptr) == size); - JEMALLOC_P(free)(ptr); + je_free(ptr); } #endif @@ -208,7 +208,7 @@ ozone_free(malloc_zone_t *zone, void *ptr) { if (ivsalloc(ptr) != 0) - JEMALLOC_P(free)(ptr); + je_free(ptr); else { size_t size = szone.size(zone, ptr); if (size != 0) @@ -222,17 +222,17 @@ ozone_realloc(malloc_zone_t *zone, void *ptr, size_t size) size_t oldsize; if (ptr == NULL) - return (JEMALLOC_P(malloc)(size)); + return (je_malloc(size)); oldsize = ivsalloc(ptr); if (oldsize != 0) - return (JEMALLOC_P(realloc)(ptr, size)); + return (je_realloc(ptr, size)); else { oldsize = szone.size(zone, ptr); if (oldsize == 0) - return (JEMALLOC_P(malloc)(size)); + return (je_malloc(size)); else { - void *ret = JEMALLOC_P(malloc)(size); + void *ret = je_malloc(size); if (ret != NULL) { memcpy(ret, ptr, (oldsize < size) ? oldsize : size); @@ -268,7 +268,7 @@ ozone_free_definite_size(malloc_zone_t *zone, void *ptr, size_t size) if (ivsalloc(ptr) != 0) { assert(ivsalloc(ptr) == size); - JEMALLOC_P(free)(ptr); + je_free(ptr); } else { assert(size == szone.size(zone, ptr)); szone.free_definite_size(zone, ptr, size); diff --git a/test/allocated.c b/test/allocated.c index b1e40e47..701c1754 100644 --- a/test/allocated.c +++ b/test/allocated.c @@ -20,8 +20,7 @@ thread_start(void *arg) size_t sz, usize; sz = sizeof(a0); - if ((err = JEMALLOC_P(mallctl)("thread.allocated", &a0, &sz, NULL, - 0))) { + if ((err = mallctl("thread.allocated", &a0, &sz, NULL, 0))) { if (err == ENOENT) { #ifdef JEMALLOC_STATS assert(false); @@ -33,8 +32,7 @@ thread_start(void *arg) exit(1); } sz = sizeof(ap0); - if ((err = JEMALLOC_P(mallctl)("thread.allocatedp", &ap0, &sz, NULL, - 0))) { + if ((err = mallctl("thread.allocatedp", &ap0, &sz, NULL, 0))) { if (err == ENOENT) { #ifdef JEMALLOC_STATS assert(false); @@ -48,8 +46,7 @@ thread_start(void *arg) assert(*ap0 == a0); sz = sizeof(d0); - if ((err = JEMALLOC_P(mallctl)("thread.deallocated", &d0, &sz, NULL, - 0))) { + if ((err = mallctl("thread.deallocated", &d0, &sz, NULL, 0))) { if (err == ENOENT) { #ifdef JEMALLOC_STATS assert(false); @@ -61,8 +58,7 @@ thread_start(void *arg) exit(1); } sz = sizeof(dp0); - if ((err = JEMALLOC_P(mallctl)("thread.deallocatedp", &dp0, &sz, NULL, - 0))) { + if ((err = mallctl("thread.deallocatedp", &dp0, &sz, NULL, 0))) { if (err == ENOENT) { #ifdef JEMALLOC_STATS assert(false); @@ -75,28 +71,28 @@ thread_start(void *arg) } assert(*dp0 == d0); - p = JEMALLOC_P(malloc)(1); + p = malloc(1); if (p == NULL) { fprintf(stderr, "%s(): Error in malloc()\n", __func__); exit(1); } sz = sizeof(a1); - JEMALLOC_P(mallctl)("thread.allocated", &a1, &sz, NULL, 0); + mallctl("thread.allocated", &a1, &sz, NULL, 0); sz = sizeof(ap1); - JEMALLOC_P(mallctl)("thread.allocatedp", &ap1, &sz, NULL, 0); + mallctl("thread.allocatedp", &ap1, &sz, NULL, 0); assert(*ap1 == a1); assert(ap0 == ap1); - usize = JEMALLOC_P(malloc_usable_size)(p); + usize = malloc_usable_size(p); assert(a0 + usize <= a1); - JEMALLOC_P(free)(p); + free(p); sz = sizeof(d1); - JEMALLOC_P(mallctl)("thread.deallocated", &d1, &sz, NULL, 0); + mallctl("thread.deallocated", &d1, &sz, NULL, 0); sz = sizeof(dp1); - JEMALLOC_P(mallctl)("thread.deallocatedp", &dp1, &sz, NULL, 0); + mallctl("thread.deallocatedp", &dp1, &sz, NULL, 0); assert(*dp1 == d1); assert(dp0 == dp1); diff --git a/test/allocm.c b/test/allocm.c index 762e350c..151f5749 100644 --- a/test/allocm.c +++ b/test/allocm.c @@ -23,13 +23,13 @@ main(void) sz = 42; nsz = 0; - r = JEMALLOC_P(nallocm)(&nsz, sz, 0); + r = nallocm(&nsz, sz, 0); if (r != ALLOCM_SUCCESS) { fprintf(stderr, "Unexpected nallocm() error\n"); abort(); } rsz = 0; - r = JEMALLOC_P(allocm)(&p, &rsz, sz, 0); + r = allocm(&p, &rsz, sz, 0); if (r != ALLOCM_SUCCESS) { fprintf(stderr, "Unexpected allocm() error\n"); abort(); @@ -38,32 +38,32 @@ main(void) fprintf(stderr, "Real size smaller than expected\n"); if (nsz != rsz) fprintf(stderr, "nallocm()/allocm() rsize mismatch\n"); - if (JEMALLOC_P(dallocm)(p, 0) != ALLOCM_SUCCESS) + if (dallocm(p, 0) != ALLOCM_SUCCESS) fprintf(stderr, "Unexpected dallocm() error\n"); - r = JEMALLOC_P(allocm)(&p, NULL, sz, 0); + r = allocm(&p, NULL, sz, 0); if (r != ALLOCM_SUCCESS) { fprintf(stderr, "Unexpected allocm() error\n"); abort(); } - if (JEMALLOC_P(dallocm)(p, 0) != ALLOCM_SUCCESS) + if (dallocm(p, 0) != ALLOCM_SUCCESS) fprintf(stderr, "Unexpected dallocm() error\n"); nsz = 0; - r = JEMALLOC_P(nallocm)(&nsz, sz, ALLOCM_ZERO); + r = nallocm(&nsz, sz, ALLOCM_ZERO); if (r != ALLOCM_SUCCESS) { fprintf(stderr, "Unexpected nallocm() error\n"); abort(); } rsz = 0; - r = JEMALLOC_P(allocm)(&p, &rsz, sz, ALLOCM_ZERO); + r = allocm(&p, &rsz, sz, ALLOCM_ZERO); if (r != ALLOCM_SUCCESS) { fprintf(stderr, "Unexpected allocm() error\n"); abort(); } if (nsz != rsz) fprintf(stderr, "nallocm()/allocm() rsize mismatch\n"); - if (JEMALLOC_P(dallocm)(p, 0) != ALLOCM_SUCCESS) + if (dallocm(p, 0) != ALLOCM_SUCCESS) fprintf(stderr, "Unexpected dallocm() error\n"); #if LG_SIZEOF_PTR == 3 @@ -74,14 +74,14 @@ main(void) sz = 0x80000000LU; #endif nsz = 0; - r = JEMALLOC_P(nallocm)(&nsz, sz, ALLOCM_ALIGN(alignment)); + r = nallocm(&nsz, sz, ALLOCM_ALIGN(alignment)); if (r == ALLOCM_SUCCESS) { fprintf(stderr, "Expected error for nallocm(&nsz, %zu, 0x%x)\n", sz, ALLOCM_ALIGN(alignment)); } rsz = 0; - r = JEMALLOC_P(allocm)(&p, &rsz, sz, ALLOCM_ALIGN(alignment)); + r = allocm(&p, &rsz, sz, ALLOCM_ALIGN(alignment)); if (r == ALLOCM_SUCCESS) { fprintf(stderr, "Expected error for allocm(&p, %zu, 0x%x)\n", @@ -98,11 +98,11 @@ main(void) sz = 0x84000001LU; #endif nsz = 0; - r = JEMALLOC_P(nallocm)(&nsz, sz, ALLOCM_ALIGN(alignment)); + r = nallocm(&nsz, sz, ALLOCM_ALIGN(alignment)); if (r != ALLOCM_SUCCESS) fprintf(stderr, "Unexpected nallocm() error\n"); rsz = 0; - r = JEMALLOC_P(allocm)(&p, &rsz, sz, ALLOCM_ALIGN(alignment)); + r = allocm(&p, &rsz, sz, ALLOCM_ALIGN(alignment)); if (r == ALLOCM_SUCCESS) { fprintf(stderr, "Expected error for allocm(&p, %zu, 0x%x)\n", @@ -116,14 +116,14 @@ main(void) sz = 0xfffffff0LU; #endif nsz = 0; - r = JEMALLOC_P(nallocm)(&nsz, sz, ALLOCM_ALIGN(alignment)); + r = nallocm(&nsz, sz, ALLOCM_ALIGN(alignment)); if (r == ALLOCM_SUCCESS) { fprintf(stderr, "Expected error for nallocm(&nsz, %zu, 0x%x)\n", sz, ALLOCM_ALIGN(alignment)); } rsz = 0; - r = JEMALLOC_P(allocm)(&p, &rsz, sz, ALLOCM_ALIGN(alignment)); + r = allocm(&p, &rsz, sz, ALLOCM_ALIGN(alignment)); if (r == ALLOCM_SUCCESS) { fprintf(stderr, "Expected error for allocm(&p, %zu, 0x%x)\n", @@ -145,7 +145,7 @@ main(void) sz += (alignment >> (LG_SIZEOF_PTR-1)) - 1) { for (i = 0; i < NITER; i++) { nsz = 0; - r = JEMALLOC_P(nallocm)(&nsz, sz, + r = nallocm(&nsz, sz, ALLOCM_ALIGN(alignment) | ALLOCM_ZERO); if (r != ALLOCM_SUCCESS) { fprintf(stderr, @@ -155,7 +155,7 @@ main(void) exit(1); } rsz = 0; - r = JEMALLOC_P(allocm)(&ps[i], &rsz, sz, + r = allocm(&ps[i], &rsz, sz, ALLOCM_ALIGN(alignment) | ALLOCM_ZERO); if (r != ALLOCM_SUCCESS) { fprintf(stderr, @@ -179,14 +179,14 @@ main(void) "%p inadequately aligned for" " alignment: %zu\n", p, alignment); } - JEMALLOC_P(sallocm)(ps[i], &rsz, 0); + sallocm(ps[i], &rsz, 0); total += rsz; if (total >= (MAXALIGN << 1)) break; } for (i = 0; i < NITER; i++) { if (ps[i] != NULL) { - JEMALLOC_P(dallocm)(ps[i], 0); + dallocm(ps[i], 0); ps[i] = NULL; } } diff --git a/test/mremap.c b/test/mremap.c index 146c66f4..8d35a64e 100644 --- a/test/mremap.c +++ b/test/mremap.c @@ -17,8 +17,7 @@ main(void) fprintf(stderr, "Test begin\n"); sz = sizeof(lg_chunk); - if ((err = JEMALLOC_P(mallctl)("opt.lg_chunk", &lg_chunk, &sz, NULL, - 0))) { + if ((err = mallctl("opt.lg_chunk", &lg_chunk, &sz, NULL, 0))) { assert(err != ENOENT); fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__, strerror(err)); diff --git a/test/posix_memalign.c b/test/posix_memalign.c index 3e306c01..789131ce 100644 --- a/test/posix_memalign.c +++ b/test/posix_memalign.c @@ -24,7 +24,7 @@ main(void) /* Test error conditions. */ for (alignment = 0; alignment < sizeof(void *); alignment++) { - err = JEMALLOC_P(posix_memalign)(&p, alignment, 1); + err = posix_memalign(&p, alignment, 1); if (err != EINVAL) { fprintf(stderr, "Expected error for invalid alignment %zu\n", @@ -34,7 +34,7 @@ main(void) for (alignment = sizeof(size_t); alignment < MAXALIGN; alignment <<= 1) { - err = JEMALLOC_P(posix_memalign)(&p, alignment + 1, 1); + err = posix_memalign(&p, alignment + 1, 1); if (err == 0) { fprintf(stderr, "Expected error for invalid alignment %zu\n", @@ -49,7 +49,7 @@ main(void) alignment = 0x80000000LU; size = 0x80000000LU; #endif - err = JEMALLOC_P(posix_memalign)(&p, alignment, size); + err = posix_memalign(&p, alignment, size); if (err == 0) { fprintf(stderr, "Expected error for posix_memalign(&p, %zu, %zu)\n", @@ -63,7 +63,7 @@ main(void) alignment = 0x40000000LU; size = 0x84000001LU; #endif - err = JEMALLOC_P(posix_memalign)(&p, alignment, size); + err = posix_memalign(&p, alignment, size); if (err == 0) { fprintf(stderr, "Expected error for posix_memalign(&p, %zu, %zu)\n", @@ -76,7 +76,7 @@ main(void) #else size = 0xfffffff0LU; #endif - err = JEMALLOC_P(posix_memalign)(&p, alignment, size); + err = posix_memalign(&p, alignment, size); if (err == 0) { fprintf(stderr, "Expected error for posix_memalign(&p, %zu, %zu)\n", @@ -95,7 +95,7 @@ main(void) size < 3 * alignment && size < (1U << 31); size += (alignment >> (LG_SIZEOF_PTR-1)) - 1) { for (i = 0; i < NITER; i++) { - err = JEMALLOC_P(posix_memalign)(&ps[i], + err = posix_memalign(&ps[i], alignment, size); if (err) { fprintf(stderr, @@ -103,13 +103,13 @@ main(void) size, size, strerror(err)); exit(1); } - total += JEMALLOC_P(malloc_usable_size)(ps[i]); + total += malloc_usable_size(ps[i]); if (total >= (MAXALIGN << 1)) break; } for (i = 0; i < NITER; i++) { if (ps[i] != NULL) { - JEMALLOC_P(free)(ps[i]); + free(ps[i]); ps[i] = NULL; } } diff --git a/test/rallocm.c b/test/rallocm.c index ccf326bb..9c0df403 100644 --- a/test/rallocm.c +++ b/test/rallocm.c @@ -24,14 +24,14 @@ main(void) pagesize = (size_t)result; } - r = JEMALLOC_P(allocm)(&p, &sz, 42, 0); + r = allocm(&p, &sz, 42, 0); if (r != ALLOCM_SUCCESS) { fprintf(stderr, "Unexpected allocm() error\n"); abort(); } q = p; - r = JEMALLOC_P(rallocm)(&q, &tsz, sz, 0, ALLOCM_NO_MOVE); + r = rallocm(&q, &tsz, sz, 0, ALLOCM_NO_MOVE); if (r != ALLOCM_SUCCESS) fprintf(stderr, "Unexpected rallocm() error\n"); if (q != p) @@ -42,7 +42,7 @@ main(void) } q = p; - r = JEMALLOC_P(rallocm)(&q, &tsz, sz, 5, ALLOCM_NO_MOVE); + r = rallocm(&q, &tsz, sz, 5, ALLOCM_NO_MOVE); if (r != ALLOCM_SUCCESS) fprintf(stderr, "Unexpected rallocm() error\n"); if (q != p) @@ -53,7 +53,7 @@ main(void) } q = p; - r = JEMALLOC_P(rallocm)(&q, &tsz, sz + 5, 0, ALLOCM_NO_MOVE); + r = rallocm(&q, &tsz, sz + 5, 0, ALLOCM_NO_MOVE); if (r != ALLOCM_ERR_NOT_MOVED) fprintf(stderr, "Unexpected rallocm() result\n"); if (q != p) @@ -64,7 +64,7 @@ main(void) } q = p; - r = JEMALLOC_P(rallocm)(&q, &tsz, sz + 5, 0, 0); + r = rallocm(&q, &tsz, sz + 5, 0, 0); if (r != ALLOCM_SUCCESS) fprintf(stderr, "Unexpected rallocm() error\n"); if (q == p) @@ -76,7 +76,7 @@ main(void) p = q; sz = tsz; - r = JEMALLOC_P(rallocm)(&q, &tsz, pagesize*2, 0, 0); + r = rallocm(&q, &tsz, pagesize*2, 0, 0); if (r != ALLOCM_SUCCESS) fprintf(stderr, "Unexpected rallocm() error\n"); if (q == p) @@ -88,7 +88,7 @@ main(void) p = q; sz = tsz; - r = JEMALLOC_P(rallocm)(&q, &tsz, pagesize*4, 0, 0); + r = rallocm(&q, &tsz, pagesize*4, 0, 0); if (r != ALLOCM_SUCCESS) fprintf(stderr, "Unexpected rallocm() error\n"); if (tsz == sz) { @@ -98,7 +98,7 @@ main(void) p = q; sz = tsz; - r = JEMALLOC_P(rallocm)(&q, &tsz, pagesize*2, 0, ALLOCM_NO_MOVE); + r = rallocm(&q, &tsz, pagesize*2, 0, ALLOCM_NO_MOVE); if (r != ALLOCM_SUCCESS) fprintf(stderr, "Unexpected rallocm() error\n"); if (q != p) @@ -109,7 +109,7 @@ main(void) } sz = tsz; - r = JEMALLOC_P(rallocm)(&q, &tsz, pagesize*4, 0, ALLOCM_NO_MOVE); + r = rallocm(&q, &tsz, pagesize*4, 0, ALLOCM_NO_MOVE); if (r != ALLOCM_SUCCESS) fprintf(stderr, "Unexpected rallocm() error\n"); if (q != p) @@ -120,7 +120,7 @@ main(void) } sz = tsz; - JEMALLOC_P(dallocm)(p, 0); + dallocm(p, 0); fprintf(stderr, "Test end\n"); return (0); diff --git a/test/thread_arena.c b/test/thread_arena.c index ef8d6817..2922d1b4 100644 --- a/test/thread_arena.c +++ b/test/thread_arena.c @@ -18,22 +18,22 @@ thread_start(void *arg) size_t size; int err; - p = JEMALLOC_P(malloc)(1); + p = malloc(1); if (p == NULL) { fprintf(stderr, "%s(): Error in malloc()\n", __func__); return (void *)1; } size = sizeof(arena_ind); - if ((err = JEMALLOC_P(mallctl)("thread.arena", &arena_ind, &size, - &main_arena_ind, sizeof(main_arena_ind)))) { + if ((err = mallctl("thread.arena", &arena_ind, &size, &main_arena_ind, + sizeof(main_arena_ind)))) { fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__, strerror(err)); return (void *)1; } size = sizeof(arena_ind); - if ((err = JEMALLOC_P(mallctl)("thread.arena", &arena_ind, &size, NULL, + if ((err = mallctl("thread.arena", &arena_ind, &size, NULL, 0))) { fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__, strerror(err)); @@ -57,7 +57,7 @@ main(void) fprintf(stderr, "Test begin\n"); - p = JEMALLOC_P(malloc)(1); + p = malloc(1); if (p == NULL) { fprintf(stderr, "%s(): Error in malloc()\n", __func__); ret = 1; @@ -65,8 +65,7 @@ main(void) } size = sizeof(arena_ind); - if ((err = JEMALLOC_P(mallctl)("thread.arena", &arena_ind, &size, NULL, - 0))) { + if ((err = mallctl("thread.arena", &arena_ind, &size, NULL, 0))) { fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__, strerror(err)); ret = 1; From 62320b85517c472beb12bf0ba69660393712f51a Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 1 Mar 2012 17:53:16 -0800 Subject: [PATCH 0066/3378] Reorder macros. --- include/jemalloc/jemalloc.h.in | 8 ++++---- include/jemalloc/jemalloc_defs.h.in | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/include/jemalloc/jemalloc.h.in b/include/jemalloc/jemalloc.h.in index b9301175..f567ee57 100644 --- a/include/jemalloc/jemalloc.h.in +++ b/include/jemalloc/jemalloc.h.in @@ -88,10 +88,10 @@ int je_nallocm(size_t *rsize, size_t size, int flags); #define memalign je_memalign #define valloc je_valloc #define allocm je_allocm -#define dallocm je_dallocm -#define nallocm je_nallocm #define rallocm je_rallocm #define sallocm je_sallocm +#define dallocm je_dallocm +#define nallocm je_nallocm #endif /* @@ -117,10 +117,10 @@ int je_nallocm(size_t *rsize, size_t size, int flags); #undef je_memalign #undef je_valloc #undef je_allocm -#undef je_dallocm -#undef je_nallocm #undef je_rallocm #undef je_sallocm +#undef je_dallocm +#undef je_nallocm #endif #ifdef __cplusplus diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index 3d1a8d30..444950c8 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -26,10 +26,10 @@ #undef je_memalign #undef je_valloc #undef je_allocm -#undef je_dallocm -#undef je_nallocm #undef je_rallocm #undef je_sallocm +#undef je_dallocm +#undef je_nallocm /* * JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs. From 84f7cdb0c588322dfd50a26497fc1cb54b792018 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 2 Mar 2012 15:59:45 -0800 Subject: [PATCH 0067/3378] Rename prn to prng. Rename prn to prng so that Windows doesn't choke when trying to create a file named prn.h. --- include/jemalloc/internal/ckh.h | 2 +- include/jemalloc/internal/jemalloc_internal.h.in | 8 ++++---- include/jemalloc/internal/{prn.h => prng.h} | 8 ++++---- include/jemalloc/internal/prof.h | 6 +++--- src/ckh.c | 6 +++--- src/prof.c | 2 +- 6 files changed, 16 insertions(+), 16 deletions(-) rename include/jemalloc/internal/{prn.h => prng.h} (89%) diff --git a/include/jemalloc/internal/ckh.h b/include/jemalloc/internal/ckh.h index 65f30f56..05d1fc03 100644 --- a/include/jemalloc/internal/ckh.h +++ b/include/jemalloc/internal/ckh.h @@ -42,7 +42,7 @@ struct ckh_s { /* Used for pseudo-random number generation. */ #define CKH_A 1103515241 #define CKH_C 12347 - uint32_t prn_state; + uint32_t prng_state; /* Total number of items. */ size_t count; diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index f13b406e..403e4804 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -316,7 +316,7 @@ extern void (*je_malloc_message)(void *wcbopaque, const char *s); (((s) + PAGE_MASK) & ~PAGE_MASK) #include "jemalloc/internal/atomic.h" -#include "jemalloc/internal/prn.h" +#include "jemalloc/internal/prng.h" #include "jemalloc/internal/ckh.h" #include "jemalloc/internal/size_classes.h" #include "jemalloc/internal/stats.h" @@ -342,7 +342,7 @@ extern void (*je_malloc_message)(void *wcbopaque, const char *s); #define JEMALLOC_H_STRUCTS #include "jemalloc/internal/atomic.h" -#include "jemalloc/internal/prn.h" +#include "jemalloc/internal/prng.h" #include "jemalloc/internal/ckh.h" #include "jemalloc/internal/size_classes.h" #include "jemalloc/internal/stats.h" @@ -445,7 +445,7 @@ void jemalloc_prefork(void); void jemalloc_postfork(void); #include "jemalloc/internal/atomic.h" -#include "jemalloc/internal/prn.h" +#include "jemalloc/internal/prng.h" #include "jemalloc/internal/ckh.h" #include "jemalloc/internal/size_classes.h" #include "jemalloc/internal/stats.h" @@ -471,7 +471,7 @@ void jemalloc_postfork(void); #define JEMALLOC_H_INLINES #include "jemalloc/internal/atomic.h" -#include "jemalloc/internal/prn.h" +#include "jemalloc/internal/prng.h" #include "jemalloc/internal/ckh.h" #include "jemalloc/internal/size_classes.h" #include "jemalloc/internal/stats.h" diff --git a/include/jemalloc/internal/prn.h b/include/jemalloc/internal/prng.h similarity index 89% rename from include/jemalloc/internal/prn.h rename to include/jemalloc/internal/prng.h index 0709d708..83a5462b 100644 --- a/include/jemalloc/internal/prn.h +++ b/include/jemalloc/internal/prng.h @@ -4,7 +4,7 @@ /* * Simple linear congruential pseudo-random number generator: * - * prn(y) = (a*x + c) % m + * prng(y) = (a*x + c) % m * * where the following constants ensure maximal period: * @@ -25,7 +25,7 @@ * uint32_t state : Seed value. * const uint32_t a, c : See above discussion. */ -#define prn32(r, lg_range, state, a, c) do { \ +#define prng32(r, lg_range, state, a, c) do { \ assert(lg_range > 0); \ assert(lg_range <= 32); \ \ @@ -34,8 +34,8 @@ r >>= (32 - lg_range); \ } while (false) -/* Same as prn32(), but 64 bits of pseudo-randomness, using uint64_t. */ -#define prn64(r, lg_range, state, a, c) do { \ +/* Same as prng32(), but 64 bits of pseudo-randomness, using uint64_t. */ +#define prng64(r, lg_range, state, a, c) do { \ assert(lg_range > 0); \ assert(lg_range <= 64); \ \ diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index 744d361e..e08a50ab 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -140,7 +140,7 @@ struct prof_tdata_s { void **vec; /* Sampling state. */ - uint64_t prn_state; + uint64_t prng_state; uint64_t threshold; uint64_t accum; }; @@ -245,7 +245,7 @@ bool prof_boot2(void); if (prof_tdata->threshold == 0) { \ /* Initialize. Seed the prng differently for */\ /* each thread. */\ - prof_tdata->prn_state = \ + prof_tdata->prng_state = \ (uint64_t)(uintptr_t)&size; \ prof_sample_threshold_update(prof_tdata); \ } \ @@ -307,7 +307,7 @@ prof_sample_threshold_update(prof_tdata_t *prof_tdata) * pp 500 * (http://cg.scs.carleton.ca/~luc/rnbookindex.html) */ - prn64(r, 53, prof_tdata->prn_state, + prng64(r, 53, prof_tdata->prng_state, (uint64_t)6364136223846793005LLU, (uint64_t)1442695040888963407LLU); u = (double)r * (1.0/9007199254740992.0L); prof_tdata->threshold = (uint64_t)(log(u) / diff --git a/src/ckh.c b/src/ckh.c index 4a6d1e37..bea6ef8a 100644 --- a/src/ckh.c +++ b/src/ckh.c @@ -99,7 +99,7 @@ ckh_try_bucket_insert(ckh_t *ckh, size_t bucket, const void *key, * Cycle through the cells in the bucket, starting at a random position. * The randomness avoids worst-case search overhead as buckets fill up. */ - prn32(offset, LG_CKH_BUCKET_CELLS, ckh->prn_state, CKH_A, CKH_C); + prng32(offset, LG_CKH_BUCKET_CELLS, ckh->prng_state, CKH_A, CKH_C); for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) { cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + ((i + offset) & ((ZU(1) << LG_CKH_BUCKET_CELLS) - 1))]; @@ -141,7 +141,7 @@ ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey, * were an item for which both hashes indicated the same * bucket. */ - prn32(i, LG_CKH_BUCKET_CELLS, ckh->prn_state, CKH_A, CKH_C); + prng32(i, LG_CKH_BUCKET_CELLS, ckh->prng_state, CKH_A, CKH_C); cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + i]; assert(cell->key != NULL); @@ -361,7 +361,7 @@ ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp) ckh->ninserts = 0; ckh->nrelocs = 0; #endif - ckh->prn_state = 42; /* Value doesn't really matter. */ + ckh->prng_state = 42; /* Value doesn't really matter. */ ckh->count = 0; /* diff --git a/src/prof.c b/src/prof.c index 21b60e3e..c4a2d64d 100644 --- a/src/prof.c +++ b/src/prof.c @@ -1170,7 +1170,7 @@ prof_tdata_init(void) return (NULL); } - prof_tdata->prn_state = 0; + prof_tdata->prng_state = 0; prof_tdata->threshold = 0; prof_tdata->accum = 0; From 7b398aca3bfa558774548ffed6c1a8baba46cc79 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 2 Mar 2012 16:38:37 -0800 Subject: [PATCH 0068/3378] Add fine-grained build/install targets. --- INSTALL | 13 +++++++++++++ Makefile.in | 27 ++++++++++++++++++--------- 2 files changed, 31 insertions(+), 9 deletions(-) diff --git a/INSTALL b/INSTALL index 6e32d715..92cd0851 100644 --- a/INSTALL +++ b/INSTALL @@ -169,11 +169,24 @@ PATH="?" === Advanced compilation ======================================================= +To build only parts of jemalloc, use the following targets: + + build_lib_shared + build_lib_static + build_lib + build_doc_html + build_doc_man + build_doc + To install only parts of jemalloc, use the following targets: install_bin install_include + install_lib_shared + install_lib_static install_lib + install_doc_html + install_doc_man install_doc To clean up build results to varying degrees, use the following make targets: diff --git a/Makefile.in b/Makefile.in index ca4365e8..0accf102 100644 --- a/Makefile.in +++ b/Makefile.in @@ -76,9 +76,9 @@ CTESTS := @srcroot@test/allocated.c @srcroot@test/allocm.c \ .SECONDARY : $(CTESTS:@srcroot@%.c=@objroot@%.o) # Default target. -all: $(DSOS) $(STATIC_LIBS) +all: build -dist: doc +dist: build_doc @srcroot@doc/%.html : @objroot@doc/%.xml @srcroot@doc/stylesheet.xsl @objroot@doc/html.xsl @XSLTPROC@ -o $@ @objroot@doc/html.xsl $< @@ -86,9 +86,9 @@ dist: doc @srcroot@doc/%.3 : @objroot@doc/%.xml @srcroot@doc/stylesheet.xsl @objroot@doc/manpages.xsl @XSLTPROC@ -o $@ @objroot@doc/manpages.xsl $< -doc_html: $(DOCS_HTML) -doc_man: $(DOCS_MAN3) -doc: $(DOCS) +build_doc_html: $(DOCS_HTML) +build_doc_man: $(DOCS_MAN3) +build_doc: $(DOCS) # # Include generated dependency files. @@ -140,6 +140,10 @@ else $(CC) -o $@ $< -L@objroot@lib -ljemalloc@install_suffix@ -lpthread endif +build_lib_shared: $(DSOS) +build_lib_static: $(STATIC_LIBS) +build: build_lib_shared build_lib_static + install_bin: install -d $(BINDIR) @for b in $(BINS); do \ @@ -154,28 +158,33 @@ install_include: install -m 644 $$h $(INCLUDEDIR)/jemalloc; \ done -install_lib: $(DSOS) $(STATIC_LIBS) +install_lib_shared: $(DSOS) install -d $(LIBDIR) install -m 755 @objroot@lib/libjemalloc@install_suffix@.$(SO).$(REV) $(LIBDIR) ln -sf libjemalloc@install_suffix@.$(SO).$(REV) $(LIBDIR)/libjemalloc@install_suffix@.$(SO) + +install_lib_static: $(DSOS) $(STATIC_LIBS) + install -d $(LIBDIR) install -m 755 @objroot@lib/libjemalloc@install_suffix@_pic.a $(LIBDIR) install -m 755 @objroot@lib/libjemalloc@install_suffix@.a $(LIBDIR) -install_html: +install_lib: install_lib_shared install_lib_static + +install_doc_html: install -d $(DATADIR)/doc/jemalloc@install_suffix@ @for d in $(DOCS_HTML); do \ echo "install -m 644 $$d $(DATADIR)/doc/jemalloc@install_suffix@"; \ install -m 644 $$d $(DATADIR)/doc/jemalloc@install_suffix@; \ done -install_man: +install_doc_man: install -d $(MANDIR)/man3 @for d in $(DOCS_MAN3); do \ echo "install -m 644 $$d $(MANDIR)/man3"; \ install -m 644 $$d $(MANDIR)/man3; \ done -install_doc: install_html install_man +install_doc: install_doc_html install_doc_man install: install_bin install_include install_lib install_doc From 7e77eaffffe5c73d44ee64b14ba4b3d7693179d6 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 2 Mar 2012 17:47:37 -0800 Subject: [PATCH 0069/3378] Add the --disable-experimental option. --- INSTALL | 3 +++ Makefile.in | 8 +++--- configure.ac | 39 ++++++++++++++++++++++------- doc/jemalloc.xml.in | 4 ++- include/jemalloc/jemalloc.h.in | 8 ++++++ include/jemalloc/jemalloc_defs.h.in | 3 +++ src/jemalloc.c | 12 ++++++++- 7 files changed, 63 insertions(+), 14 deletions(-) diff --git a/INSTALL b/INSTALL index 92cd0851..c0ae106a 100644 --- a/INSTALL +++ b/INSTALL @@ -116,6 +116,9 @@ any of the following arguments (not a definitive list) to 'configure': Disable support for junk/zero filling of memory. See the "opt.junk"/ "opt.zero" option documentation for usage details. +--disable-experimental + Disable support for the experimental API (*allocm()). + --enable-xmalloc Enable support for optional immediate termination due to out-of-memory errors, as is commonly implemented by "xmalloc" wrapper function for malloc. diff --git a/Makefile.in b/Makefile.in index 0accf102..1f1ffd33 100644 --- a/Makefile.in +++ b/Makefile.in @@ -63,10 +63,12 @@ DOCS_XML := @objroot@doc/jemalloc@install_suffix@.xml DOCS_HTML := $(DOCS_XML:@objroot@%.xml=@srcroot@%.html) DOCS_MAN3 := $(DOCS_XML:@objroot@%.xml=@srcroot@%.3) DOCS := $(DOCS_HTML) $(DOCS_MAN3) -CTESTS := @srcroot@test/allocated.c @srcroot@test/allocm.c \ - @srcroot@test/bitmap.c @srcroot@test/mremap.c \ - @srcroot@test/posix_memalign.c @srcroot@test/rallocm.c \ +CTESTS := @srcroot@test/allocated.c @srcroot@test/bitmap.c \ + @srcroot@test/mremap.c @srcroot@test/posix_memalign.c \ @srcroot@test/thread_arena.c +ifeq (@enable_experimental@, 1) +CTESTS += @srcroot@test/allocm.c @srcroot@test/rallocm.c +endif .PHONY: all dist doc_html doc_man doc .PHONY: install_bin install_include install_lib diff --git a/configure.ac b/configure.ac index 81ab233a..c61a665b 100644 --- a/configure.ac +++ b/configure.ac @@ -303,6 +303,34 @@ AC_PATH_PROG([AR], [ar], , [$PATH]) AC_PATH_PROG([LD], [ld], , [$PATH]) AC_PATH_PROG([AUTOCONF], [autoconf], , [$PATH]) +public_syms="malloc_conf malloc_message malloc calloc posix_memalign realloc free malloc_usable_size malloc_stats_print mallctl mallctlnametomib mallctlbymib" + +dnl Check for allocator-related functions that should be wrapped. +AC_CHECK_FUNC([memalign], + [AC_DEFINE([JEMALLOC_OVERRIDE_MEMALIGN]) + public_syms="${public_syms} memalign"]) +AC_CHECK_FUNC([valloc], + [AC_DEFINE([JEMALLOC_OVERRIDE_VALLOC]) + public_syms="${public_syms} valloc"]) + +dnl Support the experimental API by default. +AC_ARG_ENABLE([experimental], + [AS_HELP_STRING([--disable-experimental], + [Disable support for the experimental API])], +[if test "x$enable_experimental" = "xno" ; then + enable_experimental="0" +else + enable_experimental="1" +fi +], +[enable_experimental="1"] +) +if test "x$enable_experimental" = "x1" ; then + AC_DEFINE([JEMALLOC_EXPERIMENTAL], [ ]) + public_syms="${public_syms} allocm dallocm nallocm rallocm sallocm" +fi +AC_SUBST([enable_experimental]) + dnl Perform no name mangling by default. AC_ARG_WITH([mangling], [AS_HELP_STRING([--with-mangling=], [Mangle symbols in ])], @@ -331,7 +359,7 @@ fi dnl Generate macros to rename public symbols. All public symbols are prefixed dnl with je_ in the source code, so these macro definitions are needed even if dnl --with-jemalloc-prefix wasn't specified. -for stem in malloc_conf malloc_message malloc calloc posix_memalign realloc free malloc_usable_size malloc_stats_print mallctl mallctlnametomib mallctlbymib memalign valloc allocm dallocm nallocm rallocm sallocm; do +for stem in ${public_syms}; do n="je_${stem}" m="${JEMALLOC_PREFIX}${stem}" AC_DEFINE_UNQUOTED([${n}], [${m}]) @@ -795,14 +823,6 @@ if test "x${osspin}" = "xyes" ; then AC_DEFINE([JEMALLOC_OSSPIN]) fi -dnl ============================================================================ -dnl Check for allocator-related functions that should be wrapped. - -AC_CHECK_FUNC([memalign], - [AC_DEFINE([JEMALLOC_OVERRIDE_MEMALIGN])]) -AC_CHECK_FUNC([valloc], - [AC_DEFINE([JEMALLOC_OVERRIDE_VALLOC])]) - dnl ============================================================================ dnl Darwin-related configuration. @@ -903,6 +923,7 @@ AC_MSG_RESULT([JEMALLOC_PRIVATE_NAMESPACE]) AC_MSG_RESULT([ : ${JEMALLOC_PRIVATE_NAMESPACE}]) AC_MSG_RESULT([install_suffix : ${install_suffix}]) AC_MSG_RESULT([autogen : ${enable_autogen}]) +AC_MSG_RESULT([experimental : ${enable_experimental}]) AC_MSG_RESULT([cc-silence : ${enable_cc_silence}]) AC_MSG_RESULT([debug : ${enable_debug}]) AC_MSG_RESULT([stats : ${enable_stats}]) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 6aa412a1..ca13493a 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -304,7 +304,9 @@ for (i = 0; i < nbins; i++) { Experimental API The experimental API is subject to change or removal without regard - for backward compatibility. + for backward compatibility. If + is specified during configuration, the experimental API is + omitted. The allocm, rallocm, diff --git a/include/jemalloc/jemalloc.h.in b/include/jemalloc/jemalloc.h.in index f567ee57..742daddd 100644 --- a/include/jemalloc/jemalloc.h.in +++ b/include/jemalloc/jemalloc.h.in @@ -16,6 +16,7 @@ extern "C" { #include "jemalloc_defs@install_suffix@.h" +#ifdef JEMALLOC_EXPERIMENTAL #define ALLOCM_LG_ALIGN(la) (la) #if LG_SIZEOF_PTR == 2 #define ALLOCM_ALIGN(a) (ffs(a)-1) @@ -28,6 +29,7 @@ extern "C" { #define ALLOCM_SUCCESS 0 #define ALLOCM_ERR_OOM 1 #define ALLOCM_ERR_NOT_MOVED 2 +#endif /* * The je_ prefix on the following public symbol declarations is an artifact of @@ -53,6 +55,7 @@ int je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp); int je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen); +#ifdef JEMALLOC_EXPERIMENTAL int je_allocm(void **ptr, size_t *rsize, size_t size, int flags) JEMALLOC_ATTR(nonnull(1)); int je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, @@ -61,6 +64,7 @@ int je_sallocm(const void *ptr, size_t *rsize, int flags) JEMALLOC_ATTR(nonnull(1)); int je_dallocm(void *ptr, int flags) JEMALLOC_ATTR(nonnull(1)); int je_nallocm(size_t *rsize, size_t size, int flags); +#endif /* * By default application code must explicitly refer to mangled symbol names, @@ -87,12 +91,14 @@ int je_nallocm(size_t *rsize, size_t size, int flags); #define mallctlbymib je_mallctlbymib #define memalign je_memalign #define valloc je_valloc +#ifdef JEMALLOC_EXPERIMENTAL #define allocm je_allocm #define rallocm je_rallocm #define sallocm je_sallocm #define dallocm je_dallocm #define nallocm je_nallocm #endif +#endif /* * The je_* macros can be used as stable alternative names for the public @@ -116,12 +122,14 @@ int je_nallocm(size_t *rsize, size_t size, int flags); #undef je_mallctlbymib #undef je_memalign #undef je_valloc +#ifdef JEMALLOC_EXPERIMENTAL #undef je_allocm #undef je_rallocm #undef je_sallocm #undef je_dallocm #undef je_nallocm #endif +#endif #ifdef __cplusplus }; diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index 444950c8..049cf01a 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -108,6 +108,9 @@ /* Support memory filling (junk/zero). */ #undef JEMALLOC_FILL +/* Support the experimental API. */ +#undef JEMALLOC_EXPERIMENTAL + /* Support optional abort() on OOM. */ #undef JEMALLOC_XMALLOC diff --git a/src/jemalloc.c b/src/jemalloc.c index 6e34706d..3c39c857 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1361,6 +1361,15 @@ je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, return (ctl_bymib(mib, miblen, oldp, oldlenp, newp, newlen)); } +/* + * End non-standard functions. + */ +/******************************************************************************/ +/* + * Begin experimental functions. + */ +#ifdef JEMALLOC_EXPERIMENTAL + JEMALLOC_INLINE void * iallocm(size_t usize, size_t alignment, bool zero) { @@ -1601,8 +1610,9 @@ je_nallocm(size_t *rsize, size_t size, int flags) return (ALLOCM_SUCCESS); } +#endif /* - * End non-standard functions. + * End experimental functions. */ /******************************************************************************/ From 1af52cfe64fb6dc11b845a4fa8c6332b319286cc Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 2 Mar 2012 17:51:26 -0800 Subject: [PATCH 0070/3378] Update copyright dates. --- COPYING | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/COPYING b/COPYING index 10ade120..b8a48ff0 100644 --- a/COPYING +++ b/COPYING @@ -1,9 +1,9 @@ Unless otherwise specified, files in the jemalloc source distribution are subject to the following licenses: -------------------------------------------------------------------------------- -Copyright (C) 2002-2010 Jason Evans . +Copyright (C) 2002-2012 Jason Evans . All rights reserved. -Copyright (C) 2007-2010 Mozilla Foundation. All rights reserved. +Copyright (C) 2007-2012 Mozilla Foundation. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: @@ -24,7 +24,7 @@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- -Copyright (C) 2009-2010 Facebook, Inc. +Copyright (C) 2009-2012 Facebook, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, From c71b9946cedec094b542016a15f752ff2fef997b Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sun, 4 Mar 2012 10:46:12 -0800 Subject: [PATCH 0071/3378] Unify license. Update Facebook's software license (with permission from Facebook) to merge with that of the other copyright holders. --- COPYING | 28 ++-------------------------- 1 file changed, 2 insertions(+), 26 deletions(-) diff --git a/COPYING b/COPYING index b8a48ff0..e27fc4d6 100644 --- a/COPYING +++ b/COPYING @@ -1,9 +1,10 @@ Unless otherwise specified, files in the jemalloc source distribution are -subject to the following licenses: +subject to the following license: -------------------------------------------------------------------------------- Copyright (C) 2002-2012 Jason Evans . All rights reserved. Copyright (C) 2007-2012 Mozilla Foundation. All rights reserved. +Copyright (C) 2009-2012 Facebook, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: @@ -24,28 +25,3 @@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- -Copyright (C) 2009-2012 Facebook, Inc. -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. -* Redistributions in binary form must reproduce the above copyright notice, this - list of conditions and the following disclaimer in the documentation and/or - other materials provided with the distribution. -* Neither the name of Facebook, Inc. nor the names of its contributors may be - used to endorse or promote products derived from this software without - specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR -ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON -ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. --------------------------------------------------------------------------------- From 6684cacfa8fa9d82762983bce11bb953ce62ba8e Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 5 Mar 2012 12:15:36 -0800 Subject: [PATCH 0072/3378] Tweak configure.ac to support cross-compiling. Submitted by Andreas Vinsander. --- configure.ac | 78 +++++++++++++++++++++++++++++++--------------------- 1 file changed, 46 insertions(+), 32 deletions(-) diff --git a/configure.ac b/configure.ac index c61a665b..5b6c6b37 100644 --- a/configure.ac +++ b/configure.ac @@ -14,7 +14,7 @@ if test "x${CFLAGS}" = "x" ; then else CFLAGS="${CFLAGS} $1" fi -AC_RUN_IFELSE([AC_LANG_PROGRAM( +AC_COMPILE_IFELSE([AC_LANG_PROGRAM( [[ ]], [[ return 0; @@ -28,14 +28,12 @@ AC_RUN_IFELSE([AC_LANG_PROGRAM( dnl JE_COMPILABLE(label, hcode, mcode, rvar) AC_DEFUN([JE_COMPILABLE], [ -AC_MSG_CHECKING([whether $1 is compilable]) -AC_RUN_IFELSE([AC_LANG_PROGRAM( -[$2], [$3])], - AC_MSG_RESULT([yes]) - [$4="yes"], - AC_MSG_RESULT([no]) - [$4="no"] -) +AC_CACHE_CHECK([whether $1 is compilable], + [$4], + [AC_COMPILE_IFELSE([AC_LANG_PROGRAM([$2], + [$3])], + [$4=yes], + [$4=no])]) ]) dnl ============================================================================ @@ -154,15 +152,15 @@ case "${host_cpu}" in ;; i686) JE_COMPILABLE([__asm__], [], [[__asm__ volatile("pause"); return 0;]], - [asm]) - if test "x${asm}" = "xyes" ; then + [je_cv_asm]) + if test "x${je_cv_asm}" = "xyes" ; then CPU_SPINWAIT='__asm__ volatile("pause")' fi ;; x86_64) JE_COMPILABLE([__asm__ syntax], [], - [[__asm__ volatile("pause"); return 0;]], [asm]) - if test "x${asm}" = "xyes" ; then + [[__asm__ volatile("pause"); return 0;]], [je_cv_asm]) + if test "x${je_cv_asm}" = "xyes" ; then CPU_SPINWAIT='__asm__ volatile("pause")' fi ;; @@ -254,8 +252,8 @@ AC_SUBST([so]) JE_COMPILABLE([__attribute__ syntax], [static __attribute__((unused)) void foo(void){}], [], - [attribute]) -if test "x${attribute}" = "xyes" ; then + [je_cv_attribute]) +if test "x${je_cv_attribute}" = "xyes" ; then AC_DEFINE([JEMALLOC_HAVE_ATTR], [ ]) if test "x${GCC}" = "xyes" -a "x${abi}" = "xelf"; then JE_CFLAGS_APPEND([-fvisibility=hidden]) @@ -267,8 +265,8 @@ JE_COMPILABLE([mremap(...MREMAP_FIXED...)], [ #include ], [ void *p = mremap((void *)0, 0, 0, MREMAP_MAYMOVE|MREMAP_FIXED, (void *)0); -], [mremap_fixed]) -if test "x${mremap_fixed}" = "xyes" ; then +], [je_cv_mremap_fixed]) +if test "x${je_cv_mremap_fixed}" = "xyes" ; then AC_DEFINE([JEMALLOC_MREMAP_FIXED]) fi @@ -672,12 +670,14 @@ if test "x$enable_xmalloc" = "x1" ; then fi AC_SUBST([enable_xmalloc]) -AC_MSG_CHECKING([STATIC_PAGE_SHIFT]) -AC_RUN_IFELSE([AC_LANG_PROGRAM( +AC_CACHE_CHECK([STATIC_PAGE_SHIFT], + [je_cv_static_page_shift], + AC_RUN_IFELSE([AC_LANG_PROGRAM( [[#include #include #include -]], [[ +]], +[[ long result; FILE *f; @@ -694,10 +694,14 @@ AC_RUN_IFELSE([AC_LANG_PROGRAM( return 0; ]])], - [STATIC_PAGE_SHIFT=`cat conftest.out`] - AC_MSG_RESULT([$STATIC_PAGE_SHIFT]) - AC_DEFINE_UNQUOTED([STATIC_PAGE_SHIFT], [$STATIC_PAGE_SHIFT]), - AC_MSG_RESULT([error])) + [je_cv_static_page_shift=`cat conftest.out`], + [je_cv_static_page_shift=undefined])) + +if test "x$je_cv_static_page_shift" != "xundefined"; then + AC_DEFINE_UNQUOTED([STATIC_PAGE_SHIFT], [$je_cv_static_page_shift]) +else + AC_MSG_ERROR([cannot determine value for STATIC_PAGE_SHIFT]) +fi dnl ============================================================================ dnl jemalloc configuration. @@ -761,7 +765,7 @@ enable_tls="1" ) if test "x${enable_tls}" = "x1" ; then AC_MSG_CHECKING([for TLS]) -AC_RUN_IFELSE([AC_LANG_PROGRAM( +AC_COMPILE_IFELSE([AC_LANG_PROGRAM( [[ __thread int x; ]], [[ @@ -782,9 +786,19 @@ dnl ============================================================================ dnl Check for ffsl(3), and fail if not found. This function exists on all dnl platforms that jemalloc currently has a chance of functioning on without dnl modification. - -AC_CHECK_FUNC([ffsl], [], - [AC_MSG_ERROR([Cannot build without ffsl(3)])]) +JE_COMPILABLE([a program using ffsl], + [ + #include + ], + [ + { + int rv = ffsl(0x08); + } + ], + [je_cv_function_ffsl]) +if test "x${je_cv_function_ffsl}" != "xyes" ; then + AC_MSG_ERROR([Cannot build without ffsl(3)]) +fi dnl ============================================================================ dnl Check for atomic(3) operations as provided on Darwin. @@ -803,8 +817,8 @@ JE_COMPILABLE([Darwin OSAtomic*()], [ volatile int64_t *x64p = &x64; OSAtomicAdd64(1, x64p); } -], [osatomic]) -if test "x${osatomic}" = "xyes" ; then +], [je_cv_osatomic]) +if test "x${je_cv_osatomic}" = "xyes" ; then AC_DEFINE([JEMALLOC_OSATOMIC]) fi @@ -818,8 +832,8 @@ JE_COMPILABLE([Darwin OSSpin*()], [ OSSpinLock lock = 0; OSSpinLockLock(&lock); OSSpinLockUnlock(&lock); -], [osspin]) -if test "x${osspin}" = "xyes" ; then +], [je_cv_osspin]) +if test "x${je_cv_osspin}" = "xyes" ; then AC_DEFINE([JEMALLOC_OSSPIN]) fi From 3492daf1ce6fb85040d28dfd4dcb51cbf6b0da51 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 5 Mar 2012 12:16:57 -0800 Subject: [PATCH 0073/3378] Add SH4 and mips architecture support. Submitted by Andreas Vinsander. --- include/jemalloc/internal/atomic.h | 16 ++++++++++++++++ include/jemalloc/internal/jemalloc_internal.h.in | 3 +++ 2 files changed, 19 insertions(+) diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h index 8c685939..1dbb7d6a 100644 --- a/include/jemalloc/internal/atomic.h +++ b/include/jemalloc/internal/atomic.h @@ -160,6 +160,22 @@ atomic_sub_uint32(uint32_t *p, uint32_t x) return (x); } +#elif (defined __SH4__ || defined __mips__) && (__GNUC__ > 4 || \ + (__GNUC__ == 4 && (__GNUC_MINOR__ > 1 || (__GNUC_MINOR__ == 1 && \ + __GNUC_PATCHLEVEL__ > 1)))) +JEMALLOC_INLINE uint32_t +atomic_add_uint32(uint32_t *p, uint32_t x) +{ + + return (__sync_add_and_fetch(p, x)); +} + +JEMALLOC_INLINE uint32_t +atomic_sub_uint32(uint32_t *p, uint32_t x) +{ + + return (__sync_sub_and_fetch(p, x)); +} #else # error "Missing implementation for 32-bit atomic operations" #endif diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 403e4804..3e445f76 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -254,6 +254,9 @@ extern void (*je_malloc_message)(void *wcbopaque, const char *s); # ifdef __s390x__ # define LG_QUANTUM 4 # endif +# ifdef __SH4__ +# define LG_QUANTUM 4 +# endif # ifdef __tile__ # define LG_QUANTUM 4 # endif From b8c8be7f8abe72f4cb4f315f3078ad864fd6a2d8 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 5 Mar 2012 12:26:26 -0800 Subject: [PATCH 0074/3378] Use UINT64_C() rather than LLU for 64-bit constants. --- include/jemalloc/internal/hash.h | 2 +- include/jemalloc/internal/prof.h | 2 +- src/ckh.c | 8 ++++---- src/prof.c | 9 +++++---- test/allocm.c | 18 +++++++++--------- test/posix_memalign.c | 17 +++++++++-------- 6 files changed, 29 insertions(+), 27 deletions(-) diff --git a/include/jemalloc/internal/hash.h b/include/jemalloc/internal/hash.h index 8a46ce30..d695e77f 100644 --- a/include/jemalloc/internal/hash.h +++ b/include/jemalloc/internal/hash.h @@ -26,7 +26,7 @@ uint64_t hash(const void *key, size_t len, uint64_t seed); JEMALLOC_INLINE uint64_t hash(const void *key, size_t len, uint64_t seed) { - const uint64_t m = 0xc6a4a7935bd1e995LLU; + const uint64_t m = UINT64_C(0xc6a4a7935bd1e995); const int r = 47; uint64_t h = seed ^ (len * m); const uint64_t *data = (const uint64_t *)key; diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index e08a50ab..f647f637 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -308,7 +308,7 @@ prof_sample_threshold_update(prof_tdata_t *prof_tdata) * (http://cg.scs.carleton.ca/~luc/rnbookindex.html) */ prng64(r, 53, prof_tdata->prng_state, - (uint64_t)6364136223846793005LLU, (uint64_t)1442695040888963407LLU); + UINT64_C(6364136223846793005), UINT64_C(1442695040888963407)); u = (double)r * (1.0/9007199254740992.0L); prof_tdata->threshold = (uint64_t)(log(u) / log(1.0 - (1.0 / (double)((uint64_t)1U << opt_lg_prof_sample)))) diff --git a/src/ckh.c b/src/ckh.c index bea6ef8a..39925ced 100644 --- a/src/ckh.c +++ b/src/ckh.c @@ -535,7 +535,7 @@ ckh_string_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2) assert(hash1 != NULL); assert(hash2 != NULL); - h = hash(key, strlen((const char *)key), 0x94122f335b332aeaLLU); + h = hash(key, strlen((const char *)key), UINT64_C(0x94122f335b332aea)); if (minbits <= 32) { /* * Avoid doing multiple hashes, since a single hash provides @@ -546,7 +546,7 @@ ckh_string_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2) } else { ret1 = h; ret2 = hash(key, strlen((const char *)key), - 0x8432a476666bbc13LLU); + UINT64_C(0x8432a476666bbc13)); } *hash1 = ret1; @@ -583,7 +583,7 @@ ckh_pointer_hash(const void *key, unsigned minbits, size_t *hash1, u.i = 0; #endif u.v = key; - h = hash(&u.i, sizeof(u.i), 0xd983396e68886082LLU); + h = hash(&u.i, sizeof(u.i), UINT64_C(0xd983396e68886082)); if (minbits <= 32) { /* * Avoid doing multiple hashes, since a single hash provides @@ -594,7 +594,7 @@ ckh_pointer_hash(const void *key, unsigned minbits, size_t *hash1, } else { assert(SIZEOF_PTR == 8); ret1 = h; - ret2 = hash(&u.i, sizeof(u.i), 0x5e2be9aff8709a5dLLU); + ret2 = hash(&u.i, sizeof(u.i), UINT64_C(0x5e2be9aff8709a5d)); } *hash1 = ret1; diff --git a/src/prof.c b/src/prof.c index c4a2d64d..d78658d6 100644 --- a/src/prof.c +++ b/src/prof.c @@ -993,7 +993,7 @@ prof_dump_filename(char *filename, char v, int64_t vseq) filename[i] = v; i++; - if (vseq != 0xffffffffffffffffLLU) { + if (vseq != UINT64_C(0xffffffffffffffff)) { s = u2s(vseq, 10, buf); slen = strlen(s); memcpy(&filename[i], s, slen); @@ -1020,7 +1020,7 @@ prof_fdump(void) if (opt_prof_prefix[0] != '\0') { malloc_mutex_lock(&prof_dump_seq_mtx); - prof_dump_filename(filename, 'f', 0xffffffffffffffffLLU); + prof_dump_filename(filename, 'f', UINT64_C(0xffffffffffffffff)); malloc_mutex_unlock(&prof_dump_seq_mtx); prof_dump(filename, opt_prof_leak, false); } @@ -1113,7 +1113,8 @@ prof_bt_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2) assert(hash1 != NULL); assert(hash2 != NULL); - h = hash(bt->vec, bt->len * sizeof(void *), 0x94122f335b332aeaLLU); + h = hash(bt->vec, bt->len * sizeof(void *), + UINT64_C(0x94122f335b332aea)); if (minbits <= 32) { /* * Avoid doing multiple hashes, since a single hash provides @@ -1124,7 +1125,7 @@ prof_bt_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2) } else { ret1 = h; ret2 = hash(bt->vec, bt->len * sizeof(void *), - 0x8432a476666bbc13LLU); + UINT64_C(0x8432a476666bbc13)); } *hash1 = ret1; diff --git a/test/allocm.c b/test/allocm.c index 151f5749..137e74c3 100644 --- a/test/allocm.c +++ b/test/allocm.c @@ -6,8 +6,8 @@ #include "jemalloc_test.h" #define CHUNK 0x400000 -/* #define MAXALIGN ((size_t)0x80000000000LLU) */ -#define MAXALIGN ((size_t)0x2000000LLU) +/* #define MAXALIGN ((size_t)UINT64_C(0x80000000000)) */ +#define MAXALIGN ((size_t)0x2000000LU) #define NITER 4 int @@ -67,8 +67,8 @@ main(void) fprintf(stderr, "Unexpected dallocm() error\n"); #if LG_SIZEOF_PTR == 3 - alignment = 0x8000000000000000LLU; - sz = 0x8000000000000000LLU; + alignment = UINT64_C(0x8000000000000000); + sz = UINT64_C(0x8000000000000000); #else alignment = 0x80000000LU; sz = 0x80000000LU; @@ -91,8 +91,8 @@ main(void) fprintf(stderr, "nallocm()/allocm() rsize mismatch\n"); #if LG_SIZEOF_PTR == 3 - alignment = 0x4000000000000000LLU; - sz = 0x8400000000000001LLU; + alignment = UINT64_C(0x4000000000000000); + sz = UINT64_C(0x8400000000000001); #else alignment = 0x40000000LU; sz = 0x84000001LU; @@ -109,11 +109,11 @@ main(void) sz, ALLOCM_ALIGN(alignment)); } - alignment = 0x10LLU; + alignment = 0x10LU; #if LG_SIZEOF_PTR == 3 - sz = 0xfffffffffffffff0LLU; + sz = UINT64_C(0xfffffffffffffff0); #else - sz = 0xfffffff0LU; + sz = 0xfffffff0LU; #endif nsz = 0; r = nallocm(&nsz, sz, ALLOCM_ALIGN(alignment)); diff --git a/test/posix_memalign.c b/test/posix_memalign.c index 789131ce..5abb4201 100644 --- a/test/posix_memalign.c +++ b/test/posix_memalign.c @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -8,8 +9,8 @@ #include "jemalloc_test.h" #define CHUNK 0x400000 -/* #define MAXALIGN ((size_t)0x80000000000LLU) */ -#define MAXALIGN ((size_t)0x2000000LLU) +/* #define MAXALIGN ((size_t)UINT64_C(0x80000000000)) */ +#define MAXALIGN ((size_t)0x2000000LU) #define NITER 4 int @@ -43,8 +44,8 @@ main(void) } #if LG_SIZEOF_PTR == 3 - alignment = 0x8000000000000000LLU; - size = 0x8000000000000000LLU; + alignment = UINT64_C(0x8000000000000000); + size = UINT64_C(0x8000000000000000); #else alignment = 0x80000000LU; size = 0x80000000LU; @@ -57,8 +58,8 @@ main(void) } #if LG_SIZEOF_PTR == 3 - alignment = 0x4000000000000000LLU; - size = 0x8400000000000001LLU; + alignment = UINT64_C(0x4000000000000000); + size = UINT64_C(0x8400000000000001); #else alignment = 0x40000000LU; size = 0x84000001LU; @@ -70,9 +71,9 @@ main(void) alignment, size); } - alignment = 0x10LLU; + alignment = 0x10LU; #if LG_SIZEOF_PTR == 3 - size = 0xfffffffffffffff0LLU; + size = UINT64_C(0xfffffffffffffff0); #else size = 0xfffffff0LU; #endif From 4507f34628dfae26e6b0a6faa13e5f9a49600616 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 5 Mar 2012 14:34:37 -0800 Subject: [PATCH 0075/3378] Remove the lg_tcache_gc_sweep option. Remove the lg_tcache_gc_sweep option, because it is no longer very useful. Prior to the addition of dynamic adjustment of tcache fill count, it was possible for fill/flush overhead to be a problem, but this problem no longer occurs. --- doc/jemalloc.xml.in | 20 +------------------- include/jemalloc/internal/tcache.h | 23 +++++++++++------------ src/ctl.c | 4 ---- src/jemalloc.c | 2 -- src/stats.c | 11 ----------- src/tcache.c | 10 ---------- 6 files changed, 12 insertions(+), 58 deletions(-) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index ca13493a..74da409f 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -853,29 +853,11 @@ malloc_conf = "xmalloc:true";]]> allocations to be satisfied without performing any thread synchronization, at the cost of increased memory use. See the opt.lg_tcache_gc_sweep - and opt.lg_tcache_max - options for related tuning information. This option is enabled by + option for related tuning information. This option is enabled by default. - - - opt.lg_tcache_gc_sweep - (ssize_t) - r- - [] - - Approximate interval (log base 2) between full - thread-specific cache garbage collection sweeps, counted in terms of - thread-specific cache allocation/deallocation events. Garbage - collection is actually performed incrementally, one size class at a - time, in order to avoid large collection pauses. The default sweep - interval is 8192 (2^13); setting this option to -1 will disable garbage - collection. - - opt.lg_tcache_max diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index 77bca8d9..e5f9518e 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -21,12 +21,15 @@ typedef struct tcache_s tcache_t; #define LG_TCACHE_MAXCLASS_DEFAULT 15 /* - * (1U << opt_lg_tcache_gc_sweep) is the approximate number of allocation - * events between full GC sweeps (-1: disabled). Integer rounding may cause - * the actual number to be slightly higher, since GC is performed - * incrementally. + * TCACHE_GC_SWEEP is the approximate number of allocation events between + * full GC sweeps. Integer rounding may cause the actual number to be + * slightly higher, since GC is performed incrementally. */ -#define LG_TCACHE_GC_SWEEP_DEFAULT 13 +#define TCACHE_GC_SWEEP 8192 + +/* Number of tcache allocation/deallocation events between incremental GCs. */ +#define TCACHE_GC_INCR \ + ((TCACHE_GC_SWEEP / NBINS) + ((TCACHE_GC_SWEEP / NBINS == 0) ? 0 : 1)) #endif /* JEMALLOC_H_TYPES */ /******************************************************************************/ @@ -69,7 +72,6 @@ struct tcache_s { extern bool opt_tcache; extern ssize_t opt_lg_tcache_max; -extern ssize_t opt_lg_tcache_gc_sweep; extern tcache_bin_info_t *tcache_bin_info; @@ -99,9 +101,6 @@ extern size_t nhbins; /* Maximum cached size class. */ extern size_t tcache_maxclass; -/* Number of tcache allocation/deallocation events between incremental GCs. */ -extern unsigned tcache_gc_incr; - void tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem, tcache_t *tcache); void tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem, @@ -166,12 +165,12 @@ JEMALLOC_INLINE void tcache_event(tcache_t *tcache) { - if (tcache_gc_incr == 0) + if (TCACHE_GC_INCR == 0) return; tcache->ev_cnt++; - assert(tcache->ev_cnt <= tcache_gc_incr); - if (tcache->ev_cnt == tcache_gc_incr) { + assert(tcache->ev_cnt <= TCACHE_GC_INCR); + if (tcache->ev_cnt == TCACHE_GC_INCR) { size_t binind = tcache->next_gc_bin; tcache_bin_t *tbin = &tcache->tbins[binind]; tcache_bin_info_t *tbin_info = &tcache_bin_info[binind]; diff --git a/src/ctl.c b/src/ctl.c index 0fabd852..e7639d7f 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -65,7 +65,6 @@ CTL_PROTO(opt_junk) CTL_PROTO(opt_zero) CTL_PROTO(opt_xmalloc) CTL_PROTO(opt_tcache) -CTL_PROTO(opt_lg_tcache_gc_sweep) CTL_PROTO(opt_prof) CTL_PROTO(opt_prof_prefix) CTL_PROTO(opt_prof_active) @@ -187,7 +186,6 @@ static const ctl_node_t opt_node[] = { {NAME("zero"), CTL(opt_zero)}, {NAME("xmalloc"), CTL(opt_xmalloc)}, {NAME("tcache"), CTL(opt_tcache)}, - {NAME("lg_tcache_gc_sweep"), CTL(opt_lg_tcache_gc_sweep)}, {NAME("prof"), CTL(opt_prof)}, {NAME("prof_prefix"), CTL(opt_prof_prefix)}, {NAME("prof_active"), CTL(opt_prof_active)}, @@ -1069,8 +1067,6 @@ CTL_RO_NL_CGEN(config_fill, opt_junk, opt_junk, bool) CTL_RO_NL_CGEN(config_fill, opt_zero, opt_zero, bool) CTL_RO_NL_CGEN(config_xmalloc, opt_xmalloc, opt_xmalloc, bool) CTL_RO_NL_CGEN(config_tcache, opt_tcache, opt_tcache, bool) -CTL_RO_NL_CGEN(config_tcache, opt_lg_tcache_gc_sweep, opt_lg_tcache_gc_sweep, - ssize_t) CTL_RO_NL_CGEN(config_prof, opt_prof, opt_prof, bool) CTL_RO_NL_CGEN(config_prof, opt_prof_prefix, opt_prof_prefix, const char *) CTL_RO_CGEN(config_prof, opt_prof_active, opt_prof_active, bool) /* Mutable. */ diff --git a/src/jemalloc.c b/src/jemalloc.c index 3c39c857..ad1ee8ef 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -580,8 +580,6 @@ malloc_conf_init(void) } if (config_tcache) { CONF_HANDLE_BOOL(tcache) - CONF_HANDLE_SSIZE_T(lg_tcache_gc_sweep, -1, - (sizeof(size_t) << 3) - 1) CONF_HANDLE_SSIZE_T(lg_tcache_max, -1, (sizeof(size_t) << 3) - 1) } diff --git a/src/stats.c b/src/stats.c index f905d048..f9763785 100644 --- a/src/stats.c +++ b/src/stats.c @@ -490,7 +490,6 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, OPT_WRITE_BOOL(zero) OPT_WRITE_BOOL(xmalloc) OPT_WRITE_BOOL(tcache) - OPT_WRITE_SSIZE_T(lg_tcache_gc_sweep) OPT_WRITE_SSIZE_T(lg_tcache_max) OPT_WRITE_BOOL(prof) OPT_WRITE_CHAR_P(prof_prefix) @@ -541,16 +540,6 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, write_cb(cbopaque, u2s(sv, 10, s)); write_cb(cbopaque, "\n"); } - if ((err = je_mallctl("opt.lg_tcache_gc_sweep", &ssv, &ssz, - NULL, 0)) == 0) { - size_t tcache_gc_sweep = (1U << ssv); - bool tcache_enabled; - CTL_GET("opt.tcache", &tcache_enabled, bool); - write_cb(cbopaque, "Thread cache GC sweep interval: "); - write_cb(cbopaque, tcache_enabled && ssv >= 0 ? - u2s(tcache_gc_sweep, 10, s) : "N/A"); - write_cb(cbopaque, "\n"); - } if ((err = je_mallctl("opt.prof", &bv, &bsz, NULL, 0)) == 0 && bv) { CTL_GET("opt.lg_prof_sample", &sv, size_t); diff --git a/src/tcache.c b/src/tcache.c index fa05728e..478b7f5f 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -6,7 +6,6 @@ bool opt_tcache = true; ssize_t opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT; -ssize_t opt_lg_tcache_gc_sweep = LG_TCACHE_GC_SWEEP_DEFAULT; tcache_bin_info_t *tcache_bin_info; static unsigned stack_nelms; /* Total stack elms per tcache. */ @@ -24,7 +23,6 @@ pthread_key_t tcache_tsd; size_t nhbins; size_t tcache_maxclass; -unsigned tcache_gc_incr; /******************************************************************************/ /* Function prototypes for non-inline static functions. */ @@ -419,14 +417,6 @@ tcache_boot(void) stack_nelms += tcache_bin_info[i].ncached_max; } - /* Compute incremental GC event threshold. */ - if (opt_lg_tcache_gc_sweep >= 0) { - tcache_gc_incr = ((1U << opt_lg_tcache_gc_sweep) / - NBINS) + (((1U << opt_lg_tcache_gc_sweep) % NBINS == - 0) ? 0 : 1); - } else - tcache_gc_incr = 0; - if (pthread_key_create(&tcache_tsd, tcache_thread_cleanup) != 0) { malloc_write( From d81e4bdd5c991bd5642c8b859ef1f752b51cd9be Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 6 Mar 2012 14:57:45 -0800 Subject: [PATCH 0076/3378] Implement malloc_vsnprintf(). Implement malloc_vsnprintf() (a subset of vsnprintf(3)) as well as several other printing functions based on it, so that formatted printing can be relied upon without concern for inducing a dependency on floating point runtime support. Replace malloc_write() calls with malloc_*printf() where doing so simplifies the code. Add name mangling for library-private symbols in the data and BSS sections. Adjust CONF_HANDLE_*() macros in malloc_conf_init() to expose all opt_* variable use to cpp so that proper mangling occurs. --- Makefile.in | 2 +- configure.ac | 12 + include/jemalloc/internal/ctl.h | 12 +- include/jemalloc/internal/hash.h | 16 +- .../jemalloc/internal/jemalloc_internal.h.in | 72 +-- include/jemalloc/internal/private_namespace.h | 28 +- include/jemalloc/internal/stats.h | 7 - include/jemalloc/internal/util.h | 130 +++++ include/jemalloc/jemalloc_defs.h.in | 3 + src/chunk_mmap.c | 9 +- src/huge.c | 5 +- src/jemalloc.c | 234 +++----- src/prof.c | 221 +++---- src/stats.c | 222 ++------ src/util.c | 539 ++++++++++++++++++ test/allocm.c | 14 +- test/posix_memalign.c | 2 +- 17 files changed, 958 insertions(+), 570 deletions(-) create mode 100644 include/jemalloc/internal/util.h create mode 100644 src/util.c diff --git a/Makefile.in b/Makefile.in index 1f1ffd33..62864556 100644 --- a/Makefile.in +++ b/Makefile.in @@ -50,7 +50,7 @@ CSRCS := @srcroot@src/jemalloc.c @srcroot@src/arena.c @srcroot@src/atomic.c \ @srcroot@src/ckh.c @srcroot@src/ctl.c @srcroot@src/extent.c \ @srcroot@src/hash.c @srcroot@src/huge.c @srcroot@src/mb.c \ @srcroot@src/mutex.c @srcroot@src/prof.c @srcroot@src/rtree.c \ - @srcroot@src/stats.c @srcroot@src/tcache.c + @srcroot@src/stats.c @srcroot@src/tcache.c @srcroot@src/util.c ifeq (macho, @abi@) CSRCS += @srcroot@src/zone.c endif diff --git a/configure.ac b/configure.ac index 5b6c6b37..5a11588f 100644 --- a/configure.ac +++ b/configure.ac @@ -144,6 +144,18 @@ else fi AC_DEFINE_UNQUOTED([LG_SIZEOF_LONG], [$LG_SIZEOF_LONG]) +AC_CHECK_SIZEOF([intmax_t]) +if test "x${ac_cv_sizeof_intmax_t}" = "x16" ; then + LG_SIZEOF_INTMAX_T=4 +elif test "x${ac_cv_sizeof_intmax_t}" = "x8" ; then + LG_SIZEOF_INTMAX_T=3 +elif test "x${ac_cv_sizeof_intmax_t}" = "x4" ; then + LG_SIZEOF_INTMAX_T=2 +else + AC_MSG_ERROR([Unsupported intmax_t size: ${ac_cv_sizeof_long}]) +fi +AC_DEFINE_UNQUOTED([LG_SIZEOF_INTMAX_T], [$LG_SIZEOF_INTMAX_T]) + AC_CANONICAL_HOST dnl CPU-specific settings. CPU_SPINWAIT="" diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h index 8f72f7fa..a48d09fe 100644 --- a/include/jemalloc/internal/ctl.h +++ b/include/jemalloc/internal/ctl.h @@ -76,19 +76,17 @@ bool ctl_boot(void); #define xmallctl(name, oldp, oldlenp, newp, newlen) do { \ if (je_mallctl(name, oldp, oldlenp, newp, newlen) \ != 0) { \ - malloc_write(": Failure in xmallctl(\""); \ - malloc_write(name); \ - malloc_write("\", ...)\n"); \ + malloc_printf( \ + ": Failure in xmallctl(\"%s\", ...)\n", \ + name); \ abort(); \ } \ } while (0) #define xmallctlnametomib(name, mibp, miblenp) do { \ if (je_mallctlnametomib(name, mibp, miblenp) != 0) { \ - malloc_write( \ - ": Failure in xmallctlnametomib(\""); \ - malloc_write(name); \ - malloc_write("\", ...)\n"); \ + malloc_printf(": Failure in " \ + "xmallctlnametomib(\"%s\", ...)\n", name); \ abort(); \ } \ } while (0) diff --git a/include/jemalloc/internal/hash.h b/include/jemalloc/internal/hash.h index d695e77f..2f501f5d 100644 --- a/include/jemalloc/internal/hash.h +++ b/include/jemalloc/internal/hash.h @@ -48,14 +48,14 @@ hash(const void *key, size_t len, uint64_t seed) data2 = (const unsigned char *)data; switch(len & 7) { - case 7: h ^= ((uint64_t)(data2[6])) << 48; - case 6: h ^= ((uint64_t)(data2[5])) << 40; - case 5: h ^= ((uint64_t)(data2[4])) << 32; - case 4: h ^= ((uint64_t)(data2[3])) << 24; - case 3: h ^= ((uint64_t)(data2[2])) << 16; - case 2: h ^= ((uint64_t)(data2[1])) << 8; - case 1: h ^= ((uint64_t)(data2[0])); - h *= m; + case 7: h ^= ((uint64_t)(data2[6])) << 48; + case 6: h ^= ((uint64_t)(data2[5])) << 40; + case 5: h ^= ((uint64_t)(data2[4])) << 32; + case 4: h ^= ((uint64_t)(data2[3])) << 24; + case 3: h ^= ((uint64_t)(data2[2])) << 16; + case 2: h ^= ((uint64_t)(data2[1])) << 8; + case 1: h ^= ((uint64_t)(data2[0])); + h *= m; } h ^= h >> r; diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 3e445f76..3774bb5d 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -149,39 +149,6 @@ static const bool config_ivsalloc = #include "jemalloc/internal/qr.h" #include "jemalloc/internal/ql.h" -extern void (*je_malloc_message)(void *wcbopaque, const char *s); - -/* - * Define a custom assert() in order to reduce the chances of deadlock during - * assertion failure. - */ -#ifndef assert -# ifdef JEMALLOC_DEBUG -# define assert(e) do { \ - if (!(e)) { \ - char line_buf[UMAX2S_BUFSIZE]; \ - malloc_write(": "); \ - malloc_write(__FILE__); \ - malloc_write(":"); \ - malloc_write(u2s(__LINE__, 10, line_buf)); \ - malloc_write(": Failed assertion: "); \ - malloc_write("\""); \ - malloc_write(#e); \ - malloc_write("\"\n"); \ - abort(); \ - } \ -} while (0) -# else -# define assert(e) -# endif -#endif - -/* Use to assert a particular configuration, e.g., cassert(config_debug). */ -#define cassert(c) do { \ - if ((c) == false) \ - assert(false); \ -} while (0) - /* * jemalloc can conceptually be broken into components (arena, tcache, etc.), * but there are circular dependencies that cannot be broken without @@ -215,9 +182,6 @@ extern void (*je_malloc_message)(void *wcbopaque, const char *s); # define JEMALLOC_INLINE static inline #endif -/* Size of stack-allocated buffer passed to buferror(). */ -#define BUFERROR_BUF 64 - /* Smallest size class to support. */ #define LG_TINY_MIN 3 #define TINY_MIN (1U << LG_TINY_MIN) @@ -318,6 +282,7 @@ extern void (*je_malloc_message)(void *wcbopaque, const char *s); #define PAGE_CEILING(s) \ (((s) + PAGE_MASK) & ~PAGE_MASK) +#include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" #include "jemalloc/internal/prng.h" #include "jemalloc/internal/ckh.h" @@ -344,6 +309,7 @@ extern void (*je_malloc_message)(void *wcbopaque, const char *s); /******************************************************************************/ #define JEMALLOC_H_STRUCTS +#include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" #include "jemalloc/internal/prng.h" #include "jemalloc/internal/ckh.h" @@ -443,10 +409,10 @@ thread_allocated_t *thread_allocated_get_hard(void); arena_t *arenas_extend(unsigned ind); arena_t *choose_arena_hard(void); -int buferror(int errnum, char *buf, size_t buflen); void jemalloc_prefork(void); void jemalloc_postfork(void); +#include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" #include "jemalloc/internal/prng.h" #include "jemalloc/internal/ckh.h" @@ -473,6 +439,7 @@ void jemalloc_postfork(void); /******************************************************************************/ #define JEMALLOC_H_INLINES +#include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" #include "jemalloc/internal/prng.h" #include "jemalloc/internal/ckh.h" @@ -487,33 +454,13 @@ void jemalloc_postfork(void); #include "jemalloc/internal/huge.h" #ifndef JEMALLOC_ENABLE_INLINE -size_t pow2_ceil(size_t x); size_t s2u(size_t size); size_t sa2u(size_t size, size_t alignment, size_t *run_size_p); -void malloc_write(const char *s); arena_t *choose_arena(void); thread_allocated_t *thread_allocated_get(void); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) -/* Compute the smallest power of 2 that is >= x. */ -JEMALLOC_INLINE size_t -pow2_ceil(size_t x) -{ - - x--; - x |= x >> 1; - x |= x >> 2; - x |= x >> 4; - x |= x >> 8; - x |= x >> 16; -#if (LG_SIZEOF_PTR == 3) - x |= x >> 32; -#endif - x++; - return (x); -} - /* * Compute usable size that would result from allocating an object with the * specified size. @@ -619,17 +566,6 @@ sa2u(size_t size, size_t alignment, size_t *run_size_p) } } -/* - * Wrapper around malloc_message() that avoids the need for - * je_malloc_message(...) throughout the code. - */ -JEMALLOC_INLINE void -malloc_write(const char *s) -{ - - je_malloc_message(NULL, s); -} - /* * Choose an arena based on a per-thread value (fast-path code, calls slow-path * code if necessary). diff --git a/include/jemalloc/internal/private_namespace.h b/include/jemalloc/internal/private_namespace.h index db2192e6..89d3b5ca 100644 --- a/include/jemalloc/internal/private_namespace.h +++ b/include/jemalloc/internal/private_namespace.h @@ -23,6 +23,7 @@ #define arenas_bin_i_index JEMALLOC_N(arenas_bin_i_index) #define arenas_extend JEMALLOC_N(arenas_extend) #define arenas_lrun_i_index JEMALLOC_N(arenas_lrun_i_index) +#define arenas_tls JEMALLOC_N(arenas_tls) #define atomic_add_uint32 JEMALLOC_N(atomic_add_uint32) #define atomic_add_uint64 JEMALLOC_N(atomic_add_uint64) #define atomic_sub_uint32 JEMALLOC_N(atomic_sub_uint32) @@ -137,8 +138,30 @@ #define malloc_mutex_trylock JEMALLOC_N(malloc_mutex_trylock) #define malloc_mutex_unlock JEMALLOC_N(malloc_mutex_unlock) #define malloc_printf JEMALLOC_N(malloc_printf) +#define malloc_snprintf JEMALLOC_N(malloc_snprintf) +#define malloc_tprintf JEMALLOC_N(malloc_tprintf) +#define malloc_vcprintf JEMALLOC_N(malloc_vcprintf) +#define malloc_vsnprintf JEMALLOC_N(malloc_vsnprintf) +#define malloc_vtprintf JEMALLOC_N(malloc_vtprintf) #define malloc_write JEMALLOC_N(malloc_write) #define mb_write JEMALLOC_N(mb_write) +#define opt_abort JEMALLOC_N(opt_abort) +#define opt_junk JEMALLOC_N(opt_junk) +#define opt_lg_chunk JEMALLOC_N(opt_lg_chunk) +#define opt_lg_dirty_mult JEMALLOC_N(opt_lg_dirty_mult) +#define opt_lg_prof_interval JEMALLOC_N(opt_lg_prof_interval) +#define opt_lg_prof_sample JEMALLOC_N(opt_lg_prof_sample) +#define opt_lg_tcache_max JEMALLOC_N(opt_lg_tcache_max) +#define opt_narenas JEMALLOC_N(opt_narenas) +#define opt_prof JEMALLOC_N(opt_prof) +#define opt_prof_accum JEMALLOC_N(opt_prof_accum) +#define opt_prof_active JEMALLOC_N(opt_prof_active) +#define opt_prof_gdump JEMALLOC_N(opt_prof_gdump) +#define opt_prof_leak JEMALLOC_N(opt_prof_leak) +#define opt_stats_print JEMALLOC_N(opt_stats_print) +#define opt_tcache JEMALLOC_N(opt_tcache) +#define opt_xmalloc JEMALLOC_N(opt_xmalloc) +#define opt_zero JEMALLOC_N(opt_zero) #define pow2_ceil JEMALLOC_N(pow2_ceil) #define prof_backtrace JEMALLOC_N(prof_backtrace) #define prof_boot0 JEMALLOC_N(prof_boot0) @@ -156,6 +179,7 @@ #define prof_sample_accum_update JEMALLOC_N(prof_sample_accum_update) #define prof_sample_threshold_update JEMALLOC_N(prof_sample_threshold_update) #define prof_tdata_init JEMALLOC_N(prof_tdata_init) +#define prof_tdata_tls JEMALLOC_N(prof_tdata_tls) #define pthread_create JEMALLOC_N(pthread_create) #define rtree_get JEMALLOC_N(rtree_get) #define rtree_get_locked JEMALLOC_N(rtree_get_locked) @@ -166,6 +190,7 @@ #define stats_arenas_i_bins_j_index JEMALLOC_N(stats_arenas_i_bins_j_index) #define stats_arenas_i_index JEMALLOC_N(stats_arenas_i_index) #define stats_arenas_i_lruns_j_index JEMALLOC_N(stats_arenas_i_lruns_j_index) +#define stats_cactive JEMALLOC_N(stats_cactive) #define stats_cactive_add JEMALLOC_N(stats_cactive_add) #define stats_cactive_get JEMALLOC_N(stats_cactive_get) #define stats_cactive_sub JEMALLOC_N(stats_cactive_sub) @@ -185,6 +210,7 @@ #define tcache_event JEMALLOC_N(tcache_event) #define tcache_get JEMALLOC_N(tcache_get) #define tcache_stats_merge JEMALLOC_N(tcache_stats_merge) +#define tcache_tls JEMALLOC_N(tcache_tls) #define thread_allocated_get JEMALLOC_N(thread_allocated_get) #define thread_allocated_get_hard JEMALLOC_N(thread_allocated_get_hard) -#define u2s JEMALLOC_N(u2s) +#define thread_allocated_tls JEMALLOC_N(thread_allocated_tls) diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h index 4af23c33..27f68e36 100644 --- a/include/jemalloc/internal/stats.h +++ b/include/jemalloc/internal/stats.h @@ -1,8 +1,6 @@ /******************************************************************************/ #ifdef JEMALLOC_H_TYPES -#define UMAX2S_BUFSIZE 65 - typedef struct tcache_bin_stats_s tcache_bin_stats_t; typedef struct malloc_bin_stats_s malloc_bin_stats_t; typedef struct malloc_large_stats_s malloc_large_stats_t; @@ -135,11 +133,6 @@ extern bool opt_stats_print; extern size_t stats_cactive; -char *u2s(uint64_t x, unsigned base, char *s); -void malloc_cprintf(void (*write)(void *, const char *), void *cbopaque, - const char *format, ...) JEMALLOC_ATTR(format(printf, 3, 4)); -void malloc_printf(const char *format, ...) - JEMALLOC_ATTR(format(printf, 1, 2)); void stats_print(void (*write)(void *, const char *), void *cbopaque, const char *opts); diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h new file mode 100644 index 00000000..a268109c --- /dev/null +++ b/include/jemalloc/internal/util.h @@ -0,0 +1,130 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +/* Size of stack-allocated buffer passed to buferror(). */ +#define BUFERROR_BUF 64 + +/* + * Define a custom assert() in order to reduce the chances of deadlock during + * assertion failure. + */ +#ifndef assert +#define assert(e) do { \ + if (config_debug && !(e)) { \ + malloc_printf( \ + ": %s:%d: Failed assertion: \"%s\"\n", \ + __FILE__, __LINE__, #e); \ + abort(); \ + } \ +} while (0) +#endif + +/* Use to assert a particular configuration, e.g., cassert(config_debug). */ +#define cassert(c) do { \ + if ((c) == false) \ + assert(false); \ +} while (0) + +#ifndef not_reached +#define not_reached() do { \ + if (config_debug) { \ + malloc_printf( \ + ": %s:%d: Unreachable code reached\n", \ + __FILE__, __LINE__); \ + abort(); \ + } \ +} while (0) +#endif + +#ifndef not_implemented +#define not_implemented() do { \ + if (config_debug) { \ + malloc_printf(": %s:%d: Not implemented\n", \ + __FILE__, __LINE__); \ + abort(); \ + } \ +} while (0) +#endif + +#define assert_not_implemented(e) do { \ + if (config_debug && !(e)) \ + not_implemented(); \ +} while (0) + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +extern void (*je_malloc_message)(void *wcbopaque, const char *s); + +int buferror(int errnum, char *buf, size_t buflen); + +/* + * malloc_vsnprintf() supports a subset of snprintf(3) that avoids floating + * point math. + */ +int malloc_vsnprintf(char *str, size_t size, const char *format, + va_list ap); +int malloc_snprintf(char *str, size_t size, const char *format, ...) + JEMALLOC_ATTR(format(printf, 3, 4)); +/* + * malloc_[v]tprintf() prints to a thread-local string buffer, so the result is + * overwritten by the next call to malloc_[v]{,c,t}printf(). + */ +const char * malloc_vtprintf(const char *format, va_list ap); +const char * malloc_tprintf(const char *format, ...) + JEMALLOC_ATTR(format(printf, 1, 2)); +void malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque, + const char *format, va_list ap); +void malloc_cprintf(void (*write)(void *, const char *), void *cbopaque, + const char *format, ...) JEMALLOC_ATTR(format(printf, 3, 4)); +void malloc_printf(const char *format, ...) + JEMALLOC_ATTR(format(printf, 1, 2)); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +size_t pow2_ceil(size_t x); +void malloc_write(const char *s); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_UTIL_C_)) +/* Compute the smallest power of 2 that is >= x. */ +JEMALLOC_INLINE size_t +pow2_ceil(size_t x) +{ + + x--; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; +#if (LG_SIZEOF_PTR == 3) + x |= x >> 32; +#endif + x++; + return (x); +} + +/* + * Wrapper around malloc_message() that avoids the need for + * je_malloc_message(...) throughout the code. + */ +JEMALLOC_INLINE void +malloc_write(const char *s) +{ + + je_malloc_message(NULL, s); +} + +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index 049cf01a..6b2b0d01 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -166,3 +166,6 @@ /* sizeof(long) == 2^LG_SIZEOF_LONG. */ #undef LG_SIZEOF_LONG + +/* sizeof(intmax_t) == 2^LG_SIZEOF_INTMAX_T. */ +#undef LG_SIZEOF_INTMAX_T diff --git a/src/chunk_mmap.c b/src/chunk_mmap.c index 164e86e7..c7409284 100644 --- a/src/chunk_mmap.c +++ b/src/chunk_mmap.c @@ -61,9 +61,8 @@ pages_map(void *addr, size_t size, bool noreserve) char buf[BUFERROR_BUF]; buferror(errno, buf, sizeof(buf)); - malloc_write(": Error in munmap(): "); - malloc_write(buf); - malloc_write("\n"); + malloc_printf(": Error in munmap(): "); - malloc_write(buf); - malloc_write("\n"); + malloc_printf(": Error in munmap(): %s\n", buf); if (opt_abort) abort(); } diff --git a/src/huge.c b/src/huge.c index f2fba869..2d51c529 100644 --- a/src/huge.c +++ b/src/huge.c @@ -239,9 +239,8 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, char buf[BUFERROR_BUF]; buferror(errno, buf, sizeof(buf)); - malloc_write(": Error in mremap(): "); - malloc_write(buf); - malloc_write("\n"); + malloc_printf(": Error in mremap(): %s\n", + buf); if (opt_abort) abort(); memcpy(ret, ptr, copysize); diff --git a/src/jemalloc.c b/src/jemalloc.c index ad1ee8ef..e148ae0e 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -55,7 +55,6 @@ size_t opt_narenas = 0; /******************************************************************************/ /* Function prototypes for non-inline static functions. */ -static void wrtmessage(void *cbopaque, const char *s); static void stats_print_atexit(void); static unsigned malloc_ncpus(void); static void arenas_cleanup(void *arg); @@ -71,19 +70,6 @@ static bool malloc_init_hard(void); static int imemalign(void **memptr, size_t alignment, size_t size, bool enforce_min_alignment); -/******************************************************************************/ -/* malloc_message() setup. */ - -JEMALLOC_CATTR(visibility("hidden"), static) -void -wrtmessage(void *cbopaque, const char *s) -{ - UNUSED int result = write(STDERR_FILENO, s, strlen(s)); -} - -void (*je_malloc_message)(void *, const char *s) - JEMALLOC_ATTR(visibility("default")) = wrtmessage; - /******************************************************************************/ /* * Begin miscellaneous support functions. @@ -178,25 +164,6 @@ choose_arena_hard(void) return (ret); } -/* - * glibc provides a non-standard strerror_r() when _GNU_SOURCE is defined, so - * provide a wrapper. - */ -int -buferror(int errnum, char *buf, size_t buflen) -{ -#ifdef _GNU_SOURCE - char *b = strerror_r(errno, buf, buflen); - if (b != buf) { - strncpy(buf, b, buflen); - buf[buflen-1] = '\0'; - } - return (0); -#else - return (strerror_r(errno, buf, buflen)); -#endif -} - static void stats_print_atexit(void) { @@ -324,68 +291,64 @@ malloc_conf_next(char const **opts_p, char const **k_p, size_t *klen_p, for (accept = false; accept == false;) { switch (*opts) { - case 'A': case 'B': case 'C': case 'D': case 'E': - case 'F': case 'G': case 'H': case 'I': case 'J': - case 'K': case 'L': case 'M': case 'N': case 'O': - case 'P': case 'Q': case 'R': case 'S': case 'T': - case 'U': case 'V': case 'W': case 'X': case 'Y': - case 'Z': - case 'a': case 'b': case 'c': case 'd': case 'e': - case 'f': case 'g': case 'h': case 'i': case 'j': - case 'k': case 'l': case 'm': case 'n': case 'o': - case 'p': case 'q': case 'r': case 's': case 't': - case 'u': case 'v': case 'w': case 'x': case 'y': - case 'z': - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - case '_': - opts++; - break; - case ':': - opts++; - *klen_p = (uintptr_t)opts - 1 - (uintptr_t)*k_p; - *v_p = opts; - accept = true; - break; - case '\0': - if (opts != *opts_p) { - malloc_write(": Conf string " - "ends with key\n"); - } - return (true); - default: - malloc_write(": Malformed conf " - "string\n"); - return (true); + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': + case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': + case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': + case 'Y': case 'Z': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': + case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': + case 's': case 't': case 'u': case 'v': case 'w': case 'x': + case 'y': case 'z': + case '0': case '1': case '2': case '3': case '4': case '5': + case '6': case '7': case '8': case '9': + case '_': + opts++; + break; + case ':': + opts++; + *klen_p = (uintptr_t)opts - 1 - (uintptr_t)*k_p; + *v_p = opts; + accept = true; + break; + case '\0': + if (opts != *opts_p) { + malloc_write(": Conf string ends " + "with key\n"); + } + return (true); + default: + malloc_write(": Malformed conf string\n"); + return (true); } } for (accept = false; accept == false;) { switch (*opts) { - case ',': - opts++; - /* - * Look ahead one character here, because the - * next time this function is called, it will - * assume that end of input has been cleanly - * reached if no input remains, but we have - * optimistically already consumed the comma if - * one exists. - */ - if (*opts == '\0') { - malloc_write(": Conf string " - "ends with comma\n"); - } - *vlen_p = (uintptr_t)opts - 1 - (uintptr_t)*v_p; - accept = true; - break; - case '\0': - *vlen_p = (uintptr_t)opts - (uintptr_t)*v_p; - accept = true; - break; - default: - opts++; - break; + case ',': + opts++; + /* + * Look ahead one character here, because the next time + * this function is called, it will assume that end of + * input has been cleanly reached if no input remains, + * but we have optimistically already consumed the + * comma if one exists. + */ + if (*opts == '\0') { + malloc_write(": Conf string ends " + "with comma\n"); + } + *vlen_p = (uintptr_t)opts - 1 - (uintptr_t)*v_p; + accept = true; + break; + case '\0': + *vlen_p = (uintptr_t)opts - (uintptr_t)*v_p; + accept = true; + break; + default: + opts++; + break; } } @@ -397,17 +360,9 @@ static void malloc_conf_error(const char *msg, const char *k, size_t klen, const char *v, size_t vlen) { - char buf[PATH_MAX + 1]; - malloc_write(": "); - malloc_write(msg); - malloc_write(": "); - memcpy(buf, k, klen); - memcpy(&buf[klen], ":", 1); - memcpy(&buf[klen+1], v, vlen); - buf[klen+1+vlen] = '\0'; - malloc_write(buf); - malloc_write("\n"); + malloc_printf(": %s: %.*s:%.*s\n", msg, (int)klen, k, + (int)vlen, v); } static void @@ -458,8 +413,7 @@ malloc_conf_init(void) opts = buf; } break; - } - case 2: { + } case 2: { const char *envname = #ifdef JEMALLOC_PREFIX JEMALLOC_CPREFIX"MALLOC_CONF" @@ -480,8 +434,7 @@ malloc_conf_init(void) opts = buf; } break; - } - default: + } default: /* NOTREACHED */ assert(false); buf[0] = '\0'; @@ -490,15 +443,15 @@ malloc_conf_init(void) while (*opts != '\0' && malloc_conf_next(&opts, &k, &klen, &v, &vlen) == false) { -#define CONF_HANDLE_BOOL(n) \ +#define CONF_HANDLE_BOOL(o, n) \ if (sizeof(#n)-1 == klen && strncmp(#n, k, \ klen) == 0) { \ if (strncmp("true", v, vlen) == 0 && \ vlen == sizeof("true")-1) \ - opt_##n = true; \ + o = true; \ else if (strncmp("false", v, vlen) == \ 0 && vlen == sizeof("false")-1) \ - opt_##n = false; \ + o = false; \ else { \ malloc_conf_error( \ "Invalid conf value", \ @@ -506,7 +459,7 @@ malloc_conf_init(void) } \ continue; \ } -#define CONF_HANDLE_SIZE_T(n, min, max) \ +#define CONF_HANDLE_SIZE_T(o, n, min, max) \ if (sizeof(#n)-1 == klen && strncmp(#n, k, \ klen) == 0) { \ unsigned long ul; \ @@ -524,10 +477,10 @@ malloc_conf_init(void) "Out-of-range conf value", \ k, klen, v, vlen); \ } else \ - opt_##n = ul; \ + o = ul; \ continue; \ } -#define CONF_HANDLE_SSIZE_T(n, min, max) \ +#define CONF_HANDLE_SSIZE_T(o, n, min, max) \ if (sizeof(#n)-1 == klen && strncmp(#n, k, \ klen) == 0) { \ long l; \ @@ -546,54 +499,58 @@ malloc_conf_init(void) "Out-of-range conf value", \ k, klen, v, vlen); \ } else \ - opt_##n = l; \ + o = l; \ continue; \ } -#define CONF_HANDLE_CHAR_P(n, d) \ +#define CONF_HANDLE_CHAR_P(o, n, d) \ if (sizeof(#n)-1 == klen && strncmp(#n, k, \ klen) == 0) { \ size_t cpylen = (vlen <= \ - sizeof(opt_##n)-1) ? vlen : \ - sizeof(opt_##n)-1; \ - strncpy(opt_##n, v, cpylen); \ - opt_##n[cpylen] = '\0'; \ + sizeof(o)-1) ? vlen : \ + sizeof(o)-1; \ + strncpy(o, v, cpylen); \ + o[cpylen] = '\0'; \ continue; \ } - CONF_HANDLE_BOOL(abort) + CONF_HANDLE_BOOL(opt_abort, abort) /* * Chunks always require at least one * header page, * plus one data page. */ - CONF_HANDLE_SIZE_T(lg_chunk, PAGE_SHIFT+1, + CONF_HANDLE_SIZE_T(opt_lg_chunk, lg_chunk, PAGE_SHIFT+1, (sizeof(size_t) << 3) - 1) - CONF_HANDLE_SIZE_T(narenas, 1, SIZE_T_MAX) - CONF_HANDLE_SSIZE_T(lg_dirty_mult, -1, - (sizeof(size_t) << 3) - 1) - CONF_HANDLE_BOOL(stats_print) + CONF_HANDLE_SIZE_T(opt_narenas, narenas, 1, SIZE_T_MAX) + CONF_HANDLE_SSIZE_T(opt_lg_dirty_mult, lg_dirty_mult, + -1, (sizeof(size_t) << 3) - 1) + CONF_HANDLE_BOOL(opt_stats_print, stats_print) if (config_fill) { - CONF_HANDLE_BOOL(junk) - CONF_HANDLE_BOOL(zero) + CONF_HANDLE_BOOL(opt_junk, junk) + CONF_HANDLE_BOOL(opt_zero, zero) } if (config_xmalloc) { - CONF_HANDLE_BOOL(xmalloc) + CONF_HANDLE_BOOL(opt_xmalloc, xmalloc) } if (config_tcache) { - CONF_HANDLE_BOOL(tcache) - CONF_HANDLE_SSIZE_T(lg_tcache_max, -1, + CONF_HANDLE_BOOL(opt_tcache, tcache) + CONF_HANDLE_SSIZE_T(opt_lg_tcache_max, + lg_tcache_max, -1, (sizeof(size_t) << 3) - 1) } if (config_prof) { - CONF_HANDLE_BOOL(prof) - CONF_HANDLE_CHAR_P(prof_prefix, "jeprof") - CONF_HANDLE_BOOL(prof_active) - CONF_HANDLE_SSIZE_T(lg_prof_sample, 0, + CONF_HANDLE_BOOL(opt_prof, prof) + CONF_HANDLE_CHAR_P(opt_prof_prefix, prof_prefix, + "jeprof") + CONF_HANDLE_BOOL(opt_prof_active, prof_active) + CONF_HANDLE_SSIZE_T(opt_lg_prof_sample, + lg_prof_sample, 0, (sizeof(uint64_t) << 3) - 1) - CONF_HANDLE_BOOL(prof_accum) - CONF_HANDLE_SSIZE_T(lg_prof_interval, -1, + CONF_HANDLE_BOOL(opt_prof_accum, prof_accum) + CONF_HANDLE_SSIZE_T(opt_lg_prof_interval, + lg_prof_interval, -1, (sizeof(uint64_t) << 3) - 1) - CONF_HANDLE_BOOL(prof_gdump) - CONF_HANDLE_BOOL(prof_leak) + CONF_HANDLE_BOOL(opt_prof_gdump, prof_gdump) + CONF_HANDLE_BOOL(opt_prof_leak, prof_leak) } malloc_conf_error("Invalid conf pair", k, klen, v, vlen); @@ -773,12 +730,9 @@ malloc_init_hard(void) * machinery will fail to allocate memory at far lower limits. */ if (narenas > chunksize / sizeof(arena_t *)) { - char buf[UMAX2S_BUFSIZE]; - narenas = chunksize / sizeof(arena_t *); - malloc_write(": Reducing narenas to limit ("); - malloc_write(u2s(narenas, 10, buf)); - malloc_write(")\n"); + malloc_printf(": Reducing narenas to limit (%d)\n", + narenas); } /* Allocate and initialize arenas. */ diff --git a/src/prof.c b/src/prof.c index d78658d6..b57c5b8a 100644 --- a/src/prof.c +++ b/src/prof.c @@ -74,16 +74,18 @@ static _Unwind_Reason_Code prof_unwind_callback( struct _Unwind_Context *context, void *arg); #endif static bool prof_flush(bool propagate_err); -static bool prof_write(const char *s, bool propagate_err); +static bool prof_write(bool propagate_err, const char *s); +static bool prof_printf(bool propagate_err, const char *format, ...) + JEMALLOC_ATTR(format(printf, 2, 3)); static void prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx); static void prof_ctx_destroy(prof_ctx_t *ctx); static void prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt); -static bool prof_dump_ctx(prof_ctx_t *ctx, prof_bt_t *bt, - bool propagate_err); +static bool prof_dump_ctx(bool propagate_err, prof_ctx_t *ctx, + prof_bt_t *bt); static bool prof_dump_maps(bool propagate_err); -static bool prof_dump(const char *filename, bool leakcheck, - bool propagate_err); +static bool prof_dump(bool propagate_err, const char *filename, + bool leakcheck); static void prof_dump_filename(char *filename, char v, int64_t vseq); static void prof_fdump(void); static void prof_bt_hash(const void *key, unsigned minbits, size_t *hash1, @@ -587,7 +589,7 @@ prof_flush(bool propagate_err) } static bool -prof_write(const char *s, bool propagate_err) +prof_write(bool propagate_err, const char *s) { unsigned i, slen, n; @@ -616,6 +618,20 @@ prof_write(const char *s, bool propagate_err) return (false); } +JEMALLOC_ATTR(format(printf, 2, 3)) +static bool +prof_printf(bool propagate_err, const char *format, ...) +{ + bool ret; + va_list ap; + + va_start(ap, format); + ret = prof_write(propagate_err, malloc_vtprintf(format, ap)); + va_end(ap); + + return (ret); +} + static void prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx) { @@ -744,9 +760,8 @@ prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt) } static bool -prof_dump_ctx(prof_ctx_t *ctx, prof_bt_t *bt, bool propagate_err) +prof_dump_ctx(bool propagate_err, prof_ctx_t *ctx, prof_bt_t *bt) { - char buf[UMAX2S_BUFSIZE]; unsigned i; cassert(config_prof); @@ -758,27 +773,19 @@ prof_dump_ctx(prof_ctx_t *ctx, prof_bt_t *bt, bool propagate_err) return (false); } - if (prof_write(u2s(ctx->cnt_summed.curobjs, 10, buf), propagate_err) - || prof_write(": ", propagate_err) - || prof_write(u2s(ctx->cnt_summed.curbytes, 10, buf), - propagate_err) - || prof_write(" [", propagate_err) - || prof_write(u2s(ctx->cnt_summed.accumobjs, 10, buf), - propagate_err) - || prof_write(": ", propagate_err) - || prof_write(u2s(ctx->cnt_summed.accumbytes, 10, buf), - propagate_err) - || prof_write("] @", propagate_err)) + if (prof_printf(propagate_err, "%"PRId64": %"PRId64 + " [%"PRIu64": %"PRIu64"] @", + ctx->cnt_summed.curobjs, ctx->cnt_summed.curbytes, + ctx->cnt_summed.accumobjs, ctx->cnt_summed.accumbytes)) return (true); for (i = 0; i < bt->len; i++) { - if (prof_write(" 0x", propagate_err) - || prof_write(u2s((uintptr_t)bt->vec[i], 16, buf), - propagate_err)) + if (prof_printf(propagate_err, " %#"PRIx64, + (uintptr_t)bt->vec[i])) return (true); } - if (prof_write("\n", propagate_err)) + if (prof_write(propagate_err, "\n")) return (true); return (false); @@ -788,39 +795,15 @@ static bool prof_dump_maps(bool propagate_err) { int mfd; - char buf[UMAX2S_BUFSIZE]; - char *s; - unsigned i, slen; - /* /proc//maps\0 */ - char mpath[6 + UMAX2S_BUFSIZE - + 5 + 1]; cassert(config_prof); - i = 0; - - s = "/proc/"; - slen = strlen(s); - memcpy(&mpath[i], s, slen); - i += slen; - - s = u2s(getpid(), 10, buf); - slen = strlen(s); - memcpy(&mpath[i], s, slen); - i += slen; - - s = "/maps"; - slen = strlen(s); - memcpy(&mpath[i], s, slen); - i += slen; - - mpath[i] = '\0'; - - mfd = open(mpath, O_RDONLY); + mfd = open(malloc_tprintf("/proc/%d/maps", (int)getpid()), + O_RDONLY); if (mfd != -1) { ssize_t nread; - if (prof_write("\nMAPPED_LIBRARIES:\n", propagate_err) && + if (prof_write(propagate_err, "\nMAPPED_LIBRARIES:\n") && propagate_err) return (true); nread = 0; @@ -842,7 +825,7 @@ prof_dump_maps(bool propagate_err) } static bool -prof_dump(const char *filename, bool leakcheck, bool propagate_err) +prof_dump(bool propagate_err, const char *filename, bool leakcheck) { prof_cnt_t cnt_all; size_t tabind; @@ -854,7 +837,6 @@ prof_dump(const char *filename, bool leakcheck, bool propagate_err) prof_ctx_t *p; void *v; } ctx; - char buf[UMAX2S_BUFSIZE]; size_t leak_nctx; cassert(config_prof); @@ -863,9 +845,9 @@ prof_dump(const char *filename, bool leakcheck, bool propagate_err) prof_dump_fd = creat(filename, 0644); if (prof_dump_fd == -1) { if (propagate_err == false) { - malloc_write(": creat(\""); - malloc_write(filename); - malloc_write("\", 0644) failed\n"); + malloc_printf( + ": creat(\"%s\"), 0644) failed\n", + filename); if (opt_abort) abort(); } @@ -879,31 +861,27 @@ prof_dump(const char *filename, bool leakcheck, bool propagate_err) prof_ctx_sum(ctx.p, &cnt_all, &leak_nctx); /* Dump profile header. */ - if (prof_write("heap profile: ", propagate_err) - || prof_write(u2s(cnt_all.curobjs, 10, buf), propagate_err) - || prof_write(": ", propagate_err) - || prof_write(u2s(cnt_all.curbytes, 10, buf), propagate_err) - || prof_write(" [", propagate_err) - || prof_write(u2s(cnt_all.accumobjs, 10, buf), propagate_err) - || prof_write(": ", propagate_err) - || prof_write(u2s(cnt_all.accumbytes, 10, buf), propagate_err)) - goto ERROR; - if (opt_lg_prof_sample == 0) { - if (prof_write("] @ heapprofile\n", propagate_err)) + if (prof_printf(propagate_err, + "heap profile: %"PRId64": %"PRId64 + " [%"PRIu64": %"PRIu64"] @ heapprofile\n", + cnt_all.curobjs, cnt_all.curbytes, + cnt_all.accumobjs, cnt_all.accumbytes)) goto ERROR; } else { - if (prof_write("] @ heap_v2/", propagate_err) - || prof_write(u2s((uint64_t)1U << opt_lg_prof_sample, 10, - buf), propagate_err) - || prof_write("\n", propagate_err)) + if (prof_printf(propagate_err, + "heap profile: %"PRId64": %"PRId64 + " [%"PRIu64": %"PRIu64"] @ heap_v2/%"PRIu64"\n", + cnt_all.curobjs, cnt_all.curbytes, + cnt_all.accumobjs, cnt_all.accumbytes, + ((uint64_t)1U << opt_lg_prof_sample))) goto ERROR; } /* Dump per ctx profile stats. */ for (tabind = 0; ckh_iter(&bt2ctx, &tabind, &bt.v, &ctx.v) == false;) { - if (prof_dump_ctx(ctx.p, bt.p, propagate_err)) + if (prof_dump_ctx(propagate_err, ctx.p, bt.p)) goto ERROR; } @@ -917,17 +895,14 @@ prof_dump(const char *filename, bool leakcheck, bool propagate_err) prof_leave(); if (leakcheck && cnt_all.curbytes != 0) { - malloc_write(": Leak summary: "); - malloc_write(u2s(cnt_all.curbytes, 10, buf)); - malloc_write((cnt_all.curbytes != 1) ? " bytes, " : " byte, "); - malloc_write(u2s(cnt_all.curobjs, 10, buf)); - malloc_write((cnt_all.curobjs != 1) ? " objects, " : - " object, "); - malloc_write(u2s(leak_nctx, 10, buf)); - malloc_write((leak_nctx != 1) ? " contexts\n" : " context\n"); - malloc_write(": Run pprof on \""); - malloc_write(filename); - malloc_write("\" for leak detail\n"); + malloc_printf(": Leak summary: %"PRId64" byte%s, %" + PRId64" object%s, %zu context%s\n", + cnt_all.curbytes, (cnt_all.curbytes != 1) ? "s" : "", + cnt_all.curobjs, (cnt_all.curobjs != 1) ? "s" : "", + leak_nctx, (leak_nctx != 1) ? "s" : ""); + malloc_printf( + ": Run pprof on \"%s\" for leak detail\n", + filename); } return (false); @@ -936,76 +911,24 @@ ERROR: return (true); } -#define DUMP_FILENAME_BUFSIZE (PATH_MAX+ UMAX2S_BUFSIZE \ - + 1 \ - + UMAX2S_BUFSIZE \ - + 2 \ - + UMAX2S_BUFSIZE \ - + 5 + 1) +#define DUMP_FILENAME_BUFSIZE (PATH_MAX + 1) static void prof_dump_filename(char *filename, char v, int64_t vseq) { - char buf[UMAX2S_BUFSIZE]; - char *s; - unsigned i, slen; cassert(config_prof); - /* - * Construct a filename of the form: - * - * ...v.heap\0 - */ - - i = 0; - - s = opt_prof_prefix; - slen = strlen(s); - memcpy(&filename[i], s, slen); - i += slen; - - s = "."; - slen = strlen(s); - memcpy(&filename[i], s, slen); - i += slen; - - s = u2s(getpid(), 10, buf); - slen = strlen(s); - memcpy(&filename[i], s, slen); - i += slen; - - s = "."; - slen = strlen(s); - memcpy(&filename[i], s, slen); - i += slen; - - s = u2s(prof_dump_seq, 10, buf); - prof_dump_seq++; - slen = strlen(s); - memcpy(&filename[i], s, slen); - i += slen; - - s = "."; - slen = strlen(s); - memcpy(&filename[i], s, slen); - i += slen; - - filename[i] = v; - i++; - if (vseq != UINT64_C(0xffffffffffffffff)) { - s = u2s(vseq, 10, buf); - slen = strlen(s); - memcpy(&filename[i], s, slen); - i += slen; + /* "...v.heap" */ + malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE, + "%s.%d.%"PRIu64".%c%"PRId64".heap", + opt_prof_prefix, (int)getpid(), prof_dump_seq, v, vseq); + } else { + /* "....heap" */ + malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE, + "%s.%d.%"PRIu64".%c.heap", + opt_prof_prefix, (int)getpid(), prof_dump_seq, v); } - - s = ".heap"; - slen = strlen(s); - memcpy(&filename[i], s, slen); - i += slen; - - filename[i] = '\0'; } static void @@ -1022,14 +945,14 @@ prof_fdump(void) malloc_mutex_lock(&prof_dump_seq_mtx); prof_dump_filename(filename, 'f', UINT64_C(0xffffffffffffffff)); malloc_mutex_unlock(&prof_dump_seq_mtx); - prof_dump(filename, opt_prof_leak, false); + prof_dump(false, filename, opt_prof_leak); } } void prof_idump(void) { - char filename[DUMP_FILENAME_BUFSIZE]; + char filename[PATH_MAX + 1]; cassert(config_prof); @@ -1048,7 +971,7 @@ prof_idump(void) prof_dump_filename(filename, 'i', prof_dump_iseq); prof_dump_iseq++; malloc_mutex_unlock(&prof_dump_seq_mtx); - prof_dump(filename, false, false); + prof_dump(false, filename, false); } } @@ -1072,7 +995,7 @@ prof_mdump(const char *filename) malloc_mutex_unlock(&prof_dump_seq_mtx); filename = filename_buf; } - return (prof_dump(filename, false, true)); + return (prof_dump(true, filename, false)); } void @@ -1097,7 +1020,7 @@ prof_gdump(void) prof_dump_filename(filename, 'u', prof_dump_useq); prof_dump_useq++; malloc_mutex_unlock(&prof_dump_seq_mtx); - prof_dump(filename, false, false); + prof_dump(false, filename, false); } } diff --git a/src/stats.c b/src/stats.c index f9763785..38c8bb3c 100644 --- a/src/stats.c +++ b/src/stats.c @@ -44,8 +44,6 @@ size_t stats_cactive = 0; /******************************************************************************/ /* Function prototypes for non-inline static functions. */ -static void malloc_vcprintf(void (*write_cb)(void *, const char *), - void *cbopaque, const char *format, va_list ap); static void stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, unsigned i); static void stats_arena_lruns_print(void (*write_cb)(void *, const char *), @@ -55,97 +53,6 @@ static void stats_arena_print(void (*write_cb)(void *, const char *), /******************************************************************************/ -/* XXX Refactor by adding malloc_vsnprintf(). */ -/* - * We don't want to depend on vsnprintf() for production builds, since that can - * cause unnecessary bloat for static binaries. u2s() provides minimal integer - * printing functionality, so that malloc_printf() use can be limited to - * JEMALLOC_STATS code. - */ -char * -u2s(uint64_t x, unsigned base, char *s) -{ - unsigned i; - - i = UMAX2S_BUFSIZE - 1; - s[i] = '\0'; - switch (base) { - case 10: - do { - i--; - s[i] = "0123456789"[x % (uint64_t)10]; - x /= (uint64_t)10; - } while (x > 0); - break; - case 16: - do { - i--; - s[i] = "0123456789abcdef"[x & 0xf]; - x >>= 4; - } while (x > 0); - break; - default: - do { - i--; - s[i] = "0123456789abcdefghijklmnopqrstuvwxyz"[x % - (uint64_t)base]; - x /= (uint64_t)base; - } while (x > 0); - } - - return (&s[i]); -} - -static void -malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque, - const char *format, va_list ap) -{ - char buf[4096]; - - if (write_cb == NULL) { - /* - * The caller did not provide an alternate write_cb callback - * function, so use the default one. malloc_write() is an - * inline function, so use malloc_message() directly here. - */ - write_cb = je_malloc_message; - cbopaque = NULL; - } - - vsnprintf(buf, sizeof(buf), format, ap); - write_cb(cbopaque, buf); -} - -/* - * Print to a callback function in such a way as to (hopefully) avoid memory - * allocation. - */ -JEMALLOC_ATTR(format(printf, 3, 4)) -void -malloc_cprintf(void (*write_cb)(void *, const char *), void *cbopaque, - const char *format, ...) -{ - va_list ap; - - va_start(ap, format); - malloc_vcprintf(write_cb, cbopaque, format, ap); - va_end(ap); -} - -/* - * Print to stderr in such a way as to (hopefully) avoid memory allocation. - */ -JEMALLOC_ATTR(format(printf, 1, 2)) -void -malloc_printf(const char *format, ...) -{ - va_list ap; - - va_start(ap, format); - malloc_vcprintf(NULL, NULL, format, ap); - va_end(ap); -} - static void stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, unsigned i) @@ -360,7 +267,6 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, int err; uint64_t epoch; size_t u64sz; - char s[UMAX2S_BUFSIZE]; bool general = true; bool merged = true; bool unmerged = true; @@ -403,22 +309,22 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, for (i = 0; opts[i] != '\0'; i++) { switch (opts[i]) { - case 'g': - general = false; - break; - case 'm': - merged = false; - break; - case 'a': - unmerged = false; - break; - case 'b': - bins = false; - break; - case 'l': - large = false; - break; - default:; + case 'g': + general = false; + break; + case 'm': + merged = false; + break; + case 'a': + unmerged = false; + break; + case 'b': + bins = false; + break; + case 'l': + large = false; + break; + default:; } } } @@ -438,46 +344,34 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, cpsz = sizeof(const char *); CTL_GET("version", &cpv, const char *); - write_cb(cbopaque, "Version: "); - write_cb(cbopaque, cpv); - write_cb(cbopaque, "\n"); + malloc_cprintf(write_cb, cbopaque, "Version: %s\n", cpv); CTL_GET("config.debug", &bv, bool); - write_cb(cbopaque, "Assertions "); - write_cb(cbopaque, bv ? "enabled" : "disabled"); - write_cb(cbopaque, "\n"); + malloc_cprintf(write_cb, cbopaque, "Assertions %s\n", + bv ? "enabled" : "disabled"); #define OPT_WRITE_BOOL(n) \ if ((err = je_mallctl("opt."#n, &bv, &bsz, NULL, 0)) \ == 0) { \ - write_cb(cbopaque, " opt."#n": "); \ - write_cb(cbopaque, bv ? "true" : "false"); \ - write_cb(cbopaque, "\n"); \ + malloc_cprintf(write_cb, cbopaque, \ + " opt."#n": %s\n", bv ? "true" : "false"); \ } #define OPT_WRITE_SIZE_T(n) \ if ((err = je_mallctl("opt."#n, &sv, &ssz, NULL, 0)) \ == 0) { \ - write_cb(cbopaque, " opt."#n": "); \ - write_cb(cbopaque, u2s(sv, 10, s)); \ - write_cb(cbopaque, "\n"); \ + malloc_cprintf(write_cb, cbopaque, \ + " opt."#n": %zu\n", sv); \ } #define OPT_WRITE_SSIZE_T(n) \ if ((err = je_mallctl("opt."#n, &ssv, &sssz, NULL, 0)) \ == 0) { \ - if (ssv >= 0) { \ - write_cb(cbopaque, " opt."#n": "); \ - write_cb(cbopaque, u2s(ssv, 10, s)); \ - } else { \ - write_cb(cbopaque, " opt."#n": -"); \ - write_cb(cbopaque, u2s(-ssv, 10, s)); \ - } \ - write_cb(cbopaque, "\n"); \ + malloc_cprintf(write_cb, cbopaque, \ + " opt."#n": %zd\n", ssv); \ } #define OPT_WRITE_CHAR_P(n) \ if ((err = je_mallctl("opt."#n, &cpv, &cpsz, NULL, 0)) \ == 0) { \ - write_cb(cbopaque, " opt."#n": \""); \ - write_cb(cbopaque, cpv); \ - write_cb(cbopaque, "\"\n"); \ + malloc_cprintf(write_cb, cbopaque, \ + " opt."#n": \"%s\"\n", cpv); \ } write_cb(cbopaque, "Run-time option settings:\n"); @@ -505,68 +399,52 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, #undef OPT_WRITE_SSIZE_T #undef OPT_WRITE_CHAR_P - write_cb(cbopaque, "CPUs: "); - write_cb(cbopaque, u2s(ncpus, 10, s)); - write_cb(cbopaque, "\n"); + malloc_cprintf(write_cb, cbopaque, "CPUs: %u\n", ncpus); CTL_GET("arenas.narenas", &uv, unsigned); - write_cb(cbopaque, "Max arenas: "); - write_cb(cbopaque, u2s(uv, 10, s)); - write_cb(cbopaque, "\n"); + malloc_cprintf(write_cb, cbopaque, "Max arenas: %u\n", uv); - write_cb(cbopaque, "Pointer size: "); - write_cb(cbopaque, u2s(sizeof(void *), 10, s)); - write_cb(cbopaque, "\n"); + malloc_cprintf(write_cb, cbopaque, "Pointer size: %zu\n", + sizeof(void *)); CTL_GET("arenas.quantum", &sv, size_t); - write_cb(cbopaque, "Quantum size: "); - write_cb(cbopaque, u2s(sv, 10, s)); - write_cb(cbopaque, "\n"); + malloc_cprintf(write_cb, cbopaque, "Quantum size: %zu\n", sv); CTL_GET("opt.lg_dirty_mult", &ssv, ssize_t); if (ssv >= 0) { - write_cb(cbopaque, - "Min active:dirty page ratio per arena: "); - write_cb(cbopaque, u2s((1U << ssv), 10, s)); - write_cb(cbopaque, ":1\n"); + malloc_cprintf(write_cb, cbopaque, + "Min active:dirty page ratio per arena: %u:1\n", + (1U << ssv)); } else { write_cb(cbopaque, "Min active:dirty page ratio per arena: N/A\n"); } if ((err = je_mallctl("arenas.tcache_max", &sv, &ssz, NULL, 0)) == 0) { - write_cb(cbopaque, - "Maximum thread-cached size class: "); - write_cb(cbopaque, u2s(sv, 10, s)); - write_cb(cbopaque, "\n"); + malloc_cprintf(write_cb, cbopaque, + "Maximum thread-cached size class: %zu\n", sv); } if ((err = je_mallctl("opt.prof", &bv, &bsz, NULL, 0)) == 0 && bv) { CTL_GET("opt.lg_prof_sample", &sv, size_t); - write_cb(cbopaque, "Average profile sample interval: "); - write_cb(cbopaque, u2s((((uint64_t)1U) << sv), 10, s)); - write_cb(cbopaque, " (2^"); - write_cb(cbopaque, u2s(sv, 10, s)); - write_cb(cbopaque, ")\n"); + malloc_cprintf(write_cb, cbopaque, + "Average profile sample interval: %"PRIu64 + " (2^%zu)\n", (((uint64_t)1U) << sv), sv); CTL_GET("opt.lg_prof_interval", &ssv, ssize_t); - write_cb(cbopaque, "Average profile dump interval: "); if (ssv >= 0) { - write_cb(cbopaque, u2s((((uint64_t)1U) << ssv), - 10, s)); - write_cb(cbopaque, " (2^"); - write_cb(cbopaque, u2s(ssv, 10, s)); - write_cb(cbopaque, ")\n"); - } else - write_cb(cbopaque, "N/A\n"); + malloc_cprintf(write_cb, cbopaque, + "Average profile dump interval: %"PRIu64 + " (2^%zd)\n", + (((uint64_t)1U) << ssv), ssv); + } else { + write_cb(cbopaque, + "Average profile dump interval: N/A\n"); + } } - CTL_GET("arenas.chunksize", &sv, size_t); - write_cb(cbopaque, "Chunk size: "); - write_cb(cbopaque, u2s(sv, 10, s)); CTL_GET("opt.lg_chunk", &sv, size_t); - write_cb(cbopaque, " (2^"); - write_cb(cbopaque, u2s(sv, 10, s)); - write_cb(cbopaque, ")\n"); + malloc_cprintf(write_cb, cbopaque, "Chunk size: %zu (2^%zu)\n", + (ZU(1) << sv), sv); } if (config_stats) { diff --git a/src/util.c b/src/util.c new file mode 100644 index 00000000..7c4c0d44 --- /dev/null +++ b/src/util.c @@ -0,0 +1,539 @@ +#define assert(e) do { \ + if (config_debug && !(e)) { \ + malloc_write(": Failed assertion\n"); \ + abort(); \ + } \ +} while (0) + +#define not_reached() do { \ + if (config_debug) { \ + malloc_write(": Unreachable code reached\n"); \ + abort(); \ + } \ +} while (0) + +#define not_implemented() do { \ + if (config_debug) { \ + malloc_write(": Not implemented\n"); \ + abort(); \ + } \ +} while (0) + +#define JEMALLOC_UTIL_C_ +#include "jemalloc/internal/jemalloc_internal.h" + +/******************************************************************************/ +/* Function prototypes for non-inline static functions. */ + +static void wrtmessage(void *cbopaque, const char *s); +#define U2S_BUFSIZE ((1U << (LG_SIZEOF_INTMAX_T + 3)) + 1) +static char *u2s(uintmax_t x, unsigned base, bool uppercase, char *s, + size_t *slen_p); +#define D2S_BUFSIZE (1 + U2S_BUFSIZE) +static char *d2s(intmax_t x, char sign, char *s, size_t *slen_p); +#define O2S_BUFSIZE (1 + U2S_BUFSIZE) +static char *o2s(uintmax_t x, bool alt_form, char *s, size_t *slen_p); +#define X2S_BUFSIZE (2 + U2S_BUFSIZE) +static char *x2s(uintmax_t x, bool alt_form, bool uppercase, char *s, + size_t *slen_p); + +/******************************************************************************/ + +/* malloc_message() setup. */ +JEMALLOC_CATTR(visibility("hidden"), static) +void +wrtmessage(void *cbopaque, const char *s) +{ + UNUSED int result = write(STDERR_FILENO, s, strlen(s)); +} + +void (*je_malloc_message)(void *, const char *s) + JEMALLOC_ATTR(visibility("default")) = wrtmessage; + +/* + * glibc provides a non-standard strerror_r() when _GNU_SOURCE is defined, so + * provide a wrapper. + */ +int +buferror(int errnum, char *buf, size_t buflen) +{ +#ifdef _GNU_SOURCE + char *b = strerror_r(errno, buf, buflen); + if (b != buf) { + strncpy(buf, b, buflen); + buf[buflen-1] = '\0'; + } + return (0); +#else + return (strerror_r(errno, buf, buflen)); +#endif +} + +static char * +u2s(uintmax_t x, unsigned base, bool uppercase, char *s, size_t *slen_p) +{ + unsigned i; + + i = U2S_BUFSIZE - 1; + s[i] = '\0'; + switch (base) { + case 10: + do { + i--; + s[i] = "0123456789"[x % (uint64_t)10]; + x /= (uint64_t)10; + } while (x > 0); + break; + case 16: { + const char *digits = (uppercase) + ? "0123456789ABCDEF" + : "0123456789abcdef"; + + do { + i--; + s[i] = digits[x & 0xf]; + x >>= 4; + } while (x > 0); + break; + } default: { + const char *digits = (uppercase) + ? "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" + : "0123456789abcdefghijklmnopqrstuvwxyz"; + + assert(base >= 2 && base <= 36); + do { + i--; + s[i] = digits[x % (uint64_t)base]; + x /= (uint64_t)base; + } while (x > 0); + }} + + *slen_p = U2S_BUFSIZE - 1 - i; + return (&s[i]); +} + +static char * +d2s(intmax_t x, char sign, char *s, size_t *slen_p) +{ + bool neg; + + if ((neg = (x < 0))) + x = -x; + s = u2s(x, 10, false, s, slen_p); + if (neg) + sign = '-'; + switch (sign) { + case '-': + if (neg == false) + break; + /* Fall through. */ + case ' ': + case '+': + s--; + (*slen_p)++; + *s = sign; + break; + default: not_reached(); + } + return (s); +} + +static char * +o2s(uintmax_t x, bool alt_form, char *s, size_t *slen_p) +{ + + s = u2s(x, 8, false, s, slen_p); + if (alt_form && *s != '0') { + s--; + (*slen_p)++; + *s = '0'; + } + return (s); +} + +static char * +x2s(uintmax_t x, bool alt_form, bool uppercase, char *s, size_t *slen_p) +{ + + s = u2s(x, 16, uppercase, s, slen_p); + if (alt_form) { + s -= 2; + (*slen_p) += 2; + memcpy(s, uppercase ? "0X" : "0x", 2); + } + return (s); +} + +int +malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) +{ + int ret; + size_t i; + const char *f; + va_list tap; + +#define APPEND_C(c) do { \ + if (i < size) \ + str[i] = (c); \ + i++; \ +} while (0) +#define APPEND_S(s, slen) do { \ + if (i < size) { \ + size_t cpylen = (slen <= size - i) ? slen : size - i; \ + memcpy(&str[i], s, cpylen); \ + } \ + i += slen; \ +} while (0) +#define APPEND_PADDED_S(s, slen, width, left_justify) do { \ + /* Left padding. */ \ + size_t pad_len = (width == -1) ? 0 : ((slen < (size_t)width) ? \ + (size_t)width - slen : 0); \ + if (left_justify == false && pad_len != 0) { \ + size_t j; \ + for (j = 0; j < pad_len; j++) \ + APPEND_C(' '); \ + } \ + /* Value. */ \ + APPEND_S(s, slen); \ + /* Right padding. */ \ + if (left_justify && pad_len != 0) { \ + size_t j; \ + for (j = 0; j < pad_len; j++) \ + APPEND_C(' '); \ + } \ +} while (0) +#define GET_ARG_NUMERIC(val, len) do { \ + switch (len) { \ + case '?': \ + val = va_arg(ap, int); \ + break; \ + case 'l': \ + val = va_arg(ap, long); \ + break; \ + case 'q': \ + val = va_arg(ap, long long); \ + break; \ + case 'j': \ + val = va_arg(ap, intmax_t); \ + break; \ + case 't': \ + val = va_arg(ap, ptrdiff_t); \ + break; \ + case 'z': \ + val = va_arg(ap, size_t); \ + break; \ + default: not_reached(); \ + } \ +} while (0) + + if (config_debug) + va_copy(tap, ap); + + i = 0; + f = format; + while (true) { + switch (*f) { + case '\0': goto OUT; + case '%': { + bool alt_form = false; + bool zero_pad = false; + bool left_justify = false; + bool plus_space = false; + bool plus_plus = false; + int prec = -1; + int width = -1; + char len = '?'; + + f++; + if (*f == '%') { + /* %% */ + APPEND_C(*f); + break; + } + /* Flags. */ + while (true) { + switch (*f) { + case '#': + assert(alt_form == false); + alt_form = true; + break; + case '0': + assert(zero_pad == false); + zero_pad = true; + break; + case '-': + assert(left_justify == false); + left_justify = true; + break; + case ' ': + assert(plus_space == false); + plus_space = true; + break; + case '+': + assert(plus_plus == false); + plus_plus = true; + break; + default: goto WIDTH; + } + f++; + } + /* Width. */ + WIDTH: + switch (*f) { + case '*': + width = va_arg(ap, int); + f++; + break; + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': { + unsigned long uwidth; + errno = 0; + uwidth = strtoul(f, (char **)&f, 10); + assert(uwidth != ULONG_MAX || errno != ERANGE); + width = (int)uwidth; + if (*f == '.') { + f++; + goto PRECISION; + } else + goto LENGTH; + break; + } case '.': + f++; + goto PRECISION; + default: goto LENGTH; + } + /* Precision. */ + PRECISION: + switch (*f) { + case '*': + prec = va_arg(ap, int); + f++; + break; + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': { + unsigned long uprec; + errno = 0; + uprec = strtoul(f, (char **)&f, 10); + assert(uprec != ULONG_MAX || errno != ERANGE); + prec = (int)uprec; + break; + } + default: break; + } + /* Length. */ + LENGTH: + switch (*f) { + case 'l': + f++; + if (*f == 'l') { + len = 'q'; + f++; + } else + len = 'l'; + break; + case 'j': + len = 'j'; + f++; + break; + case 't': + len = 't'; + f++; + break; + case 'z': + len = 'z'; + f++; + break; + default: break; + } + /* Conversion specifier. */ + switch (*f) { + char *s; + size_t slen; + case 'd': case 'i': { + intmax_t val; + char buf[D2S_BUFSIZE]; + + GET_ARG_NUMERIC(val, len); + s = d2s(val, (plus_plus ? '+' : (plus_space ? + ' ' : '-')), buf, &slen); + APPEND_PADDED_S(s, slen, width, left_justify); + f++; + break; + } case 'o': { + uintmax_t val; + char buf[O2S_BUFSIZE]; + + GET_ARG_NUMERIC(val, len); + s = o2s(val, alt_form, buf, &slen); + APPEND_PADDED_S(s, slen, width, left_justify); + f++; + break; + } case 'u': { + uintmax_t val; + char buf[U2S_BUFSIZE]; + + GET_ARG_NUMERIC(val, len); + s = u2s(val, 10, false, buf, &slen); + APPEND_PADDED_S(s, slen, width, left_justify); + f++; + break; + } case 'x': case 'X': { + uintmax_t val; + char buf[X2S_BUFSIZE]; + + GET_ARG_NUMERIC(val, len); + s = x2s(val, alt_form, *f == 'X', buf, &slen); + APPEND_PADDED_S(s, slen, width, left_justify); + f++; + break; + } case 'c': { + unsigned char val; + char buf[2]; + + assert(len == '?' || len == 'l'); + assert_not_implemented(len != 'l'); + val = va_arg(ap, int); + buf[0] = val; + buf[1] = '\0'; + APPEND_PADDED_S(buf, 1, width, left_justify); + f++; + break; + } case 's': + assert(len == '?' || len == 'l'); + assert_not_implemented(len != 'l'); + s = va_arg(ap, char *); + slen = (prec == -1) ? strlen(s) : prec; + APPEND_PADDED_S(s, slen, width, left_justify); + f++; + break; + case 'p': { + uintmax_t val; + char buf[X2S_BUFSIZE]; + + GET_ARG_NUMERIC(val, len); + s = x2s(val, true, false, buf, &slen); + APPEND_PADDED_S(s, slen, width, left_justify); + f++; + break; + } + default: not_implemented(); + } + break; + } default: { + APPEND_C(*f); + f++; + break; + }} + } + OUT: + if (i < size) + str[i] = '\0'; + else + str[size - 1] = '\0'; + ret = i; + + if (config_debug) { + char buf[ret + 2]; + int tret; + + /* + * Verify that the resulting string matches what vsnprintf() + * would have created. + */ + tret = vsnprintf(buf, sizeof(buf), format, tap); + assert(tret == ret); + assert(memcmp(str, buf, ret + 1) == 0); + } + } + +#undef APPEND_C +#undef APPEND_S +#undef APPEND_PADDED_S +#undef GET_ARG_NUMERIC + return (ret); +} + +JEMALLOC_ATTR(format(printf, 3, 4)) +int +malloc_snprintf(char *str, size_t size, const char *format, ...) +{ + int ret; + va_list ap; + + va_start(ap, format); + ret = malloc_vsnprintf(str, size, format, ap); + va_end(ap); + + return (ret); +} + +const char * +malloc_vtprintf(const char *format, va_list ap) +{ + /* buf must be large enough for all possible uses within jemalloc. */ + static __thread char buf[4096]; + + malloc_vsnprintf(buf, sizeof(buf), format, ap); + + return (buf); +} + +JEMALLOC_ATTR(format(printf, 1, 2)) +const char * +malloc_tprintf(const char *format, ...) +{ + const char *ret; + va_list ap; + + va_start(ap, format); + ret = malloc_vtprintf(format, ap); + va_end(ap); + + return (ret); +} + +void +malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque, + const char *format, va_list ap) +{ + + if (write_cb == NULL) { + /* + * The caller did not provide an alternate write_cb callback + * function, so use the default one. malloc_write() is an + * inline function, so use malloc_message() directly here. + */ + write_cb = je_malloc_message; + cbopaque = NULL; + } + + write_cb(cbopaque, malloc_vtprintf(format, ap)); +} + +/* + * Print to a callback function in such a way as to (hopefully) avoid memory + * allocation. + */ +JEMALLOC_ATTR(format(printf, 3, 4)) +void +malloc_cprintf(void (*write_cb)(void *, const char *), void *cbopaque, + const char *format, ...) +{ + va_list ap; + + va_start(ap, format); + malloc_vcprintf(write_cb, cbopaque, format, ap); + va_end(ap); +} + +/* Print to stderr in such a way as to avoid memory allocation. */ +JEMALLOC_ATTR(format(printf, 1, 2)) +void +malloc_printf(const char *format, ...) +{ + va_list ap; + + va_start(ap, format); + malloc_vcprintf(NULL, NULL, format, ap); + va_end(ap); +} diff --git a/test/allocm.c b/test/allocm.c index 137e74c3..3aa0fd23 100644 --- a/test/allocm.c +++ b/test/allocm.c @@ -77,14 +77,14 @@ main(void) r = nallocm(&nsz, sz, ALLOCM_ALIGN(alignment)); if (r == ALLOCM_SUCCESS) { fprintf(stderr, - "Expected error for nallocm(&nsz, %zu, 0x%x)\n", + "Expected error for nallocm(&nsz, %zu, %#x)\n", sz, ALLOCM_ALIGN(alignment)); } rsz = 0; r = allocm(&p, &rsz, sz, ALLOCM_ALIGN(alignment)); if (r == ALLOCM_SUCCESS) { fprintf(stderr, - "Expected error for allocm(&p, %zu, 0x%x)\n", + "Expected error for allocm(&p, %zu, %#x)\n", sz, ALLOCM_ALIGN(alignment)); } if (nsz != rsz) @@ -105,7 +105,7 @@ main(void) r = allocm(&p, &rsz, sz, ALLOCM_ALIGN(alignment)); if (r == ALLOCM_SUCCESS) { fprintf(stderr, - "Expected error for allocm(&p, %zu, 0x%x)\n", + "Expected error for allocm(&p, %zu, %#x)\n", sz, ALLOCM_ALIGN(alignment)); } @@ -119,14 +119,14 @@ main(void) r = nallocm(&nsz, sz, ALLOCM_ALIGN(alignment)); if (r == ALLOCM_SUCCESS) { fprintf(stderr, - "Expected error for nallocm(&nsz, %zu, 0x%x)\n", + "Expected error for nallocm(&nsz, %zu, %#x)\n", sz, ALLOCM_ALIGN(alignment)); } rsz = 0; r = allocm(&p, &rsz, sz, ALLOCM_ALIGN(alignment)); if (r == ALLOCM_SUCCESS) { fprintf(stderr, - "Expected error for allocm(&p, %zu, 0x%x)\n", + "Expected error for allocm(&p, %zu, %#x)\n", sz, ALLOCM_ALIGN(alignment)); } if (nsz != rsz) @@ -150,7 +150,7 @@ main(void) if (r != ALLOCM_SUCCESS) { fprintf(stderr, "nallocm() error for size %zu" - " (0x%zx): %d\n", + " (%#zx): %d\n", sz, sz, r); exit(1); } @@ -160,7 +160,7 @@ main(void) if (r != ALLOCM_SUCCESS) { fprintf(stderr, "allocm() error for size %zu" - " (0x%zx): %d\n", + " (%#zx): %d\n", sz, sz, r); exit(1); } diff --git a/test/posix_memalign.c b/test/posix_memalign.c index 5abb4201..0ea35c89 100644 --- a/test/posix_memalign.c +++ b/test/posix_memalign.c @@ -100,7 +100,7 @@ main(void) alignment, size); if (err) { fprintf(stderr, - "Error for size %zu (0x%zx): %s\n", + "Error for size %zu (%#zx): %s\n", size, size, strerror(err)); exit(1); } From 7cca6085754736c7e4e51d27e2f0234e84806628 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 12 Mar 2012 11:31:54 -0700 Subject: [PATCH 0077/3378] Remove extra '}'. --- src/util.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/util.c b/src/util.c index 7c4c0d44..7d658aaa 100644 --- a/src/util.c +++ b/src/util.c @@ -443,7 +443,6 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) tret = vsnprintf(buf, sizeof(buf), format, tap); assert(tret == ret); assert(memcmp(str, buf, ret + 1) == 0); - } } #undef APPEND_C From 125b93e43fe764e46c8a89fc2f3957a4e02c92e1 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 12 Mar 2012 11:33:59 -0700 Subject: [PATCH 0078/3378] Remove bashism. Submitted by Mike Hommey. --- include/jemalloc/internal/size_classes.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/jemalloc/internal/size_classes.sh b/include/jemalloc/internal/size_classes.sh index d8306a58..79b4ba23 100755 --- a/include/jemalloc/internal/size_classes.sh +++ b/include/jemalloc/internal/size_classes.sh @@ -13,7 +13,7 @@ lg_tmin=3 lg_pmin=12 lg_pmax=16 -function pow2() { +pow2() { e=$1 pow2_result=1 while [ ${e} -gt 0 ] ; do From 2bb6c7a632fc4c0afe2532ea4044d337d9b288ae Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 12 Mar 2012 13:38:00 -0700 Subject: [PATCH 0079/3378] s/PRIx64/PRIxPTR/ for uintptr_t printf() argument. --- src/prof.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/prof.c b/src/prof.c index b57c5b8a..2ca66c73 100644 --- a/src/prof.c +++ b/src/prof.c @@ -780,7 +780,7 @@ prof_dump_ctx(bool propagate_err, prof_ctx_t *ctx, prof_bt_t *bt) return (true); for (i = 0; i < bt->len; i++) { - if (prof_printf(propagate_err, " %#"PRIx64, + if (prof_printf(propagate_err, " %#"PRIxPTR, (uintptr_t)bt->vec[i])) return (true); } From 08fc3b2d5173512a2c1fdbe11cf00c8c70bad503 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 12 Mar 2012 15:07:53 -0700 Subject: [PATCH 0080/3378] Fix --with-mangling/--with-jemalloc-prefix interaction. Fix --with-mangling to remove mangled symbols from the set of functions to apply a prefix to. Prior to this change, the interaction was correct with autoconf 2.59, but incorrect with autoconf 2.65. --- configure.ac | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 5a11588f..5cf2855f 100644 --- a/configure.ac +++ b/configure.ac @@ -346,9 +346,12 @@ AC_ARG_WITH([mangling], [AS_HELP_STRING([--with-mangling=], [Mangle symbols in ])], [mangling_map="$with_mangling"], [mangling_map=""]) for nm in `echo ${mangling_map} |tr ',' ' '` ; do - n="je_`echo ${nm} |tr ':' ' ' |awk '{print $1}'`" + k="`echo ${nm} |tr ':' ' ' |awk '{print $1}'`" + n="je_${k}" m=`echo ${nm} |tr ':' ' ' |awk '{print $2}'` AC_DEFINE_UNQUOTED([${n}], [${m}]) + dnl Remove key from public_syms so that it isn't redefined later. + public_syms=`for sym in ${public_syms}; do echo "${sym}"; done |grep -v "^${k}\$" |tr '\n' ' '` done dnl Do not prefix public APIs by default. From 025d86118673f153b6ccd68e49054e58493b57f4 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 12 Mar 2012 15:57:24 -0700 Subject: [PATCH 0081/3378] Update config.guess and config.sub. --- config.guess | 1034 +++++++++++++++++++++++++++----------------------- config.sub | 432 ++++++++++++++++----- 2 files changed, 882 insertions(+), 584 deletions(-) diff --git a/config.guess b/config.guess index 0773d0f6..d622a44e 100755 --- a/config.guess +++ b/config.guess @@ -1,9 +1,10 @@ #! /bin/sh # Attempt to guess a canonical system name. # Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, -# 2000, 2001, 2002, 2003 Free Software Foundation, Inc. +# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, +# 2011, 2012 Free Software Foundation, Inc. -timestamp='2004-03-03' +timestamp='2012-02-10' # This file is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by @@ -16,24 +17,24 @@ timestamp='2004-03-03' # General Public License for more details. # # You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# along with this program; if not, see . # # As a special exception to the GNU General Public License, if you # distribute this file as part of a program that contains a # configuration script generated by Autoconf, you may include it under # the same distribution terms that you use for the rest of that program. -# Originally written by Per Bothner . -# Please send patches to . Submit a context -# diff and a properly formatted ChangeLog entry. + +# Originally written by Per Bothner. Please send patches (context +# diff format) to and include a ChangeLog +# entry. # # This script attempts to guess a canonical system name similar to # config.sub. If it succeeds, it prints the system name on stdout, and # exits with 0. Otherwise, it exits with 1. # -# The plan is that this can be called by configure scripts if you -# don't specify an explicit build system type. +# You can get the latest version of this script from: +# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD me=`echo "$0" | sed -e 's,.*/,,'` @@ -53,7 +54,8 @@ version="\ GNU config.guess ($timestamp) Originally written by Per Bothner. -Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001 +Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, +2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO @@ -66,11 +68,11 @@ Try \`$me --help' for more information." while test $# -gt 0 ; do case $1 in --time-stamp | --time* | -t ) - echo "$timestamp" ; exit 0 ;; + echo "$timestamp" ; exit ;; --version | -v ) - echo "$version" ; exit 0 ;; + echo "$version" ; exit ;; --help | --h* | -h ) - echo "$usage"; exit 0 ;; + echo "$usage"; exit ;; -- ) # Stop option processing shift; break ;; - ) # Use stdin as input. @@ -104,7 +106,7 @@ set_cc_for_build=' trap "exitcode=\$?; (rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null) && exit \$exitcode" 0 ; trap "rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null; exit 1" 1 2 13 15 ; : ${TMPDIR=/tmp} ; - { tmp=`(umask 077 && mktemp -d -q "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } || + { tmp=`(umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } || { test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir $tmp) ; } || { tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir $tmp) && echo "Warning: creating insecure temp directory" >&2 ; } || { echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; } ; @@ -123,7 +125,7 @@ case $CC_FOR_BUILD,$HOST_CC,$CC in ;; ,,*) CC_FOR_BUILD=$CC ;; ,*,*) CC_FOR_BUILD=$HOST_CC ;; -esac ;' +esac ; set_cc_for_build= ;' # This is needed to find uname on a Pyramid OSx when run in the BSD universe. # (ghazi@noc.rutgers.edu 1994-08-24) @@ -141,7 +143,7 @@ UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in *:NetBSD:*:*) # NetBSD (nbsd) targets should (where applicable) match one or - # more of the tupples: *-*-netbsdelf*, *-*-netbsdaout*, + # more of the tuples: *-*-netbsdelf*, *-*-netbsdaout*, # *-*-netbsdecoff* and *-*-netbsd*. For targets that recently # switched to ELF, *-*-netbsd* would select the old # object file format. This provides both forward @@ -158,6 +160,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in arm*) machine=arm-unknown ;; sh3el) machine=shl-unknown ;; sh3eb) machine=sh-unknown ;; + sh5el) machine=sh5le-unknown ;; *) machine=${UNAME_MACHINE_ARCH}-unknown ;; esac # The Operating System including object format, if it has switched @@ -166,7 +169,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in arm*|i386|m68k|ns32k|sh3*|sparc|vax) eval $set_cc_for_build if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \ - | grep __ELF__ >/dev/null + | grep -q __ELF__ then # Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout). # Return netbsd for either. FIX? @@ -176,7 +179,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in fi ;; *) - os=netbsd + os=netbsd ;; esac # The OS release @@ -196,71 +199,30 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in # contains redundant information, the shorter form: # CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used. echo "${machine}-${os}${release}" - exit 0 ;; - amd64:OpenBSD:*:*) - echo x86_64-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - amiga:OpenBSD:*:*) - echo m68k-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - arc:OpenBSD:*:*) - echo mipsel-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - cats:OpenBSD:*:*) - echo arm-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - hp300:OpenBSD:*:*) - echo m68k-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - mac68k:OpenBSD:*:*) - echo m68k-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - macppc:OpenBSD:*:*) - echo powerpc-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - mvme68k:OpenBSD:*:*) - echo m68k-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - mvme88k:OpenBSD:*:*) - echo m88k-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - mvmeppc:OpenBSD:*:*) - echo powerpc-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - pegasos:OpenBSD:*:*) - echo powerpc-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - pmax:OpenBSD:*:*) - echo mipsel-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - sgi:OpenBSD:*:*) - echo mipseb-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - sun3:OpenBSD:*:*) - echo m68k-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - wgrisc:OpenBSD:*:*) - echo mipsel-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; + exit ;; *:OpenBSD:*:*) - echo ${UNAME_MACHINE}-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; + UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'` + echo ${UNAME_MACHINE_ARCH}-unknown-openbsd${UNAME_RELEASE} + exit ;; *:ekkoBSD:*:*) echo ${UNAME_MACHINE}-unknown-ekkobsd${UNAME_RELEASE} - exit 0 ;; + exit ;; + *:SolidBSD:*:*) + echo ${UNAME_MACHINE}-unknown-solidbsd${UNAME_RELEASE} + exit ;; macppc:MirBSD:*:*) - echo powerppc-unknown-mirbsd${UNAME_RELEASE} - exit 0 ;; + echo powerpc-unknown-mirbsd${UNAME_RELEASE} + exit ;; *:MirBSD:*:*) echo ${UNAME_MACHINE}-unknown-mirbsd${UNAME_RELEASE} - exit 0 ;; + exit ;; alpha:OSF1:*:*) case $UNAME_RELEASE in *4.0) UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'` ;; *5.*) - UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'` + UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'` ;; esac # According to Compaq, /usr/sbin/psrinfo has been available on @@ -306,40 +268,46 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in # A Xn.n version is an unreleased experimental baselevel. # 1.2 uses "1.2" for uname -r. echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` - exit 0 ;; - Alpha*:OpenVMS:*:*) - echo alpha-hp-vms - exit 0 ;; + # Reset EXIT trap before exiting to avoid spurious non-zero exit code. + exitcode=$? + trap '' 0 + exit $exitcode ;; Alpha\ *:Windows_NT*:*) # How do we know it's Interix rather than the generic POSIX subsystem? # Should we change UNAME_MACHINE based on the output of uname instead # of the specific Alpha model? echo alpha-pc-interix - exit 0 ;; + exit ;; 21064:Windows_NT:50:3) echo alpha-dec-winnt3.5 - exit 0 ;; + exit ;; Amiga*:UNIX_System_V:4.0:*) echo m68k-unknown-sysv4 - exit 0;; + exit ;; *:[Aa]miga[Oo][Ss]:*:*) echo ${UNAME_MACHINE}-unknown-amigaos - exit 0 ;; + exit ;; *:[Mm]orph[Oo][Ss]:*:*) echo ${UNAME_MACHINE}-unknown-morphos - exit 0 ;; + exit ;; *:OS/390:*:*) echo i370-ibm-openedition - exit 0 ;; + exit ;; + *:z/VM:*:*) + echo s390-ibm-zvmoe + exit ;; *:OS400:*:*) - echo powerpc-ibm-os400 - exit 0 ;; + echo powerpc-ibm-os400 + exit ;; arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*) echo arm-acorn-riscix${UNAME_RELEASE} - exit 0;; + exit ;; + arm:riscos:*:*|arm:RISCOS:*:*) + echo arm-unknown-riscos + exit ;; SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*) echo hppa1.1-hitachi-hiuxmpp - exit 0;; + exit ;; Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*) # akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE. if test "`(/bin/universe) 2>/dev/null`" = att ; then @@ -347,32 +315,51 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in else echo pyramid-pyramid-bsd fi - exit 0 ;; + exit ;; NILE*:*:*:dcosx) echo pyramid-pyramid-svr4 - exit 0 ;; + exit ;; DRS?6000:unix:4.0:6*) echo sparc-icl-nx6 - exit 0 ;; - DRS?6000:UNIX_SV:4.2*:7*) + exit ;; + DRS?6000:UNIX_SV:4.2*:7* | DRS?6000:isis:4.2*:7*) case `/usr/bin/uname -p` in - sparc) echo sparc-icl-nx7 && exit 0 ;; + sparc) echo sparc-icl-nx7; exit ;; esac ;; + s390x:SunOS:*:*) + echo ${UNAME_MACHINE}-ibm-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; sun4H:SunOS:5.*:*) echo sparc-hal-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` - exit 0 ;; + exit ;; sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*) echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` - exit 0 ;; - i86pc:SunOS:5.*:*) - echo i386-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` - exit 0 ;; + exit ;; + i86pc:AuroraUX:5.*:* | i86xen:AuroraUX:5.*:*) + echo i386-pc-auroraux${UNAME_RELEASE} + exit ;; + i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*) + eval $set_cc_for_build + SUN_ARCH="i386" + # If there is a compiler, see if it is configured for 64-bit objects. + # Note that the Sun cc does not turn __LP64__ into 1 like gcc does. + # This test works for both compilers. + if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then + if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \ + (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \ + grep IS_64BIT_ARCH >/dev/null + then + SUN_ARCH="x86_64" + fi + fi + echo ${SUN_ARCH}-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; sun4*:SunOS:6*:*) # According to config.sub, this is the proper way to canonicalize # SunOS6. Hard to guess exactly what SunOS6 will be like, but # it's likely to be more like Solaris than SunOS4. echo sparc-sun-solaris3`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` - exit 0 ;; + exit ;; sun4*:SunOS:*:*) case "`/usr/bin/arch -k`" in Series*|S4*) @@ -381,10 +368,10 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in esac # Japanese Language versions have a version number like `4.1.3-JL'. echo sparc-sun-sunos`echo ${UNAME_RELEASE}|sed -e 's/-/_/'` - exit 0 ;; + exit ;; sun3*:SunOS:*:*) echo m68k-sun-sunos${UNAME_RELEASE} - exit 0 ;; + exit ;; sun*:*:4.2BSD:*) UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null` test "x${UNAME_RELEASE}" = "x" && UNAME_RELEASE=3 @@ -396,10 +383,10 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in echo sparc-sun-sunos${UNAME_RELEASE} ;; esac - exit 0 ;; + exit ;; aushp:SunOS:*:*) echo sparc-auspex-sunos${UNAME_RELEASE} - exit 0 ;; + exit ;; # The situation for MiNT is a little confusing. The machine name # can be virtually everything (everything which is not # "atarist" or "atariste" at least should have a processor @@ -409,41 +396,41 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in # MiNT. But MiNT is downward compatible to TOS, so this should # be no problem. atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*) - echo m68k-atari-mint${UNAME_RELEASE} - exit 0 ;; + echo m68k-atari-mint${UNAME_RELEASE} + exit ;; atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*) echo m68k-atari-mint${UNAME_RELEASE} - exit 0 ;; + exit ;; *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*) - echo m68k-atari-mint${UNAME_RELEASE} - exit 0 ;; + echo m68k-atari-mint${UNAME_RELEASE} + exit ;; milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*) - echo m68k-milan-mint${UNAME_RELEASE} - exit 0 ;; + echo m68k-milan-mint${UNAME_RELEASE} + exit ;; hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*) - echo m68k-hades-mint${UNAME_RELEASE} - exit 0 ;; + echo m68k-hades-mint${UNAME_RELEASE} + exit ;; *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*) - echo m68k-unknown-mint${UNAME_RELEASE} - exit 0 ;; + echo m68k-unknown-mint${UNAME_RELEASE} + exit ;; m68k:machten:*:*) echo m68k-apple-machten${UNAME_RELEASE} - exit 0 ;; + exit ;; powerpc:machten:*:*) echo powerpc-apple-machten${UNAME_RELEASE} - exit 0 ;; + exit ;; RISC*:Mach:*:*) echo mips-dec-mach_bsd4.3 - exit 0 ;; + exit ;; RISC*:ULTRIX:*:*) echo mips-dec-ultrix${UNAME_RELEASE} - exit 0 ;; + exit ;; VAX*:ULTRIX*:*:*) echo vax-dec-ultrix${UNAME_RELEASE} - exit 0 ;; + exit ;; 2020:CLIX:*:* | 2430:CLIX:*:*) echo clipper-intergraph-clix${UNAME_RELEASE} - exit 0 ;; + exit ;; mips:*:*:UMIPS | mips:*:*:RISCos) eval $set_cc_for_build sed 's/^ //' << EOF >$dummy.c @@ -467,35 +454,36 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in exit (-1); } EOF - $CC_FOR_BUILD -o $dummy $dummy.c \ - && $dummy `echo "${UNAME_RELEASE}" | sed -n 's/\([0-9]*\).*/\1/p'` \ - && exit 0 + $CC_FOR_BUILD -o $dummy $dummy.c && + dummyarg=`echo "${UNAME_RELEASE}" | sed -n 's/\([0-9]*\).*/\1/p'` && + SYSTEM_NAME=`$dummy $dummyarg` && + { echo "$SYSTEM_NAME"; exit; } echo mips-mips-riscos${UNAME_RELEASE} - exit 0 ;; + exit ;; Motorola:PowerMAX_OS:*:*) echo powerpc-motorola-powermax - exit 0 ;; + exit ;; Motorola:*:4.3:PL8-*) echo powerpc-harris-powermax - exit 0 ;; + exit ;; Night_Hawk:*:*:PowerMAX_OS | Synergy:PowerMAX_OS:*:*) echo powerpc-harris-powermax - exit 0 ;; + exit ;; Night_Hawk:Power_UNIX:*:*) echo powerpc-harris-powerunix - exit 0 ;; + exit ;; m88k:CX/UX:7*:*) echo m88k-harris-cxux7 - exit 0 ;; + exit ;; m88k:*:4*:R4*) echo m88k-motorola-sysv4 - exit 0 ;; + exit ;; m88k:*:3*:R3*) echo m88k-motorola-sysv3 - exit 0 ;; + exit ;; AViiON:dgux:*:*) - # DG/UX returns AViiON for all architectures - UNAME_PROCESSOR=`/usr/bin/uname -p` + # DG/UX returns AViiON for all architectures + UNAME_PROCESSOR=`/usr/bin/uname -p` if [ $UNAME_PROCESSOR = mc88100 ] || [ $UNAME_PROCESSOR = mc88110 ] then if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx ] || \ @@ -508,29 +496,29 @@ EOF else echo i586-dg-dgux${UNAME_RELEASE} fi - exit 0 ;; + exit ;; M88*:DolphinOS:*:*) # DolphinOS (SVR3) echo m88k-dolphin-sysv3 - exit 0 ;; + exit ;; M88*:*:R3*:*) # Delta 88k system running SVR3 echo m88k-motorola-sysv3 - exit 0 ;; + exit ;; XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3) echo m88k-tektronix-sysv3 - exit 0 ;; + exit ;; Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD) echo m68k-tektronix-bsd - exit 0 ;; + exit ;; *:IRIX*:*:*) echo mips-sgi-irix`echo ${UNAME_RELEASE}|sed -e 's/-/_/g'` - exit 0 ;; + exit ;; ????????:AIX?:[12].1:2) # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX. - echo romp-ibm-aix # uname -m gives an 8 hex-code CPU id - exit 0 ;; # Note that: echo "'`uname -s`'" gives 'AIX ' + echo romp-ibm-aix # uname -m gives an 8 hex-code CPU id + exit ;; # Note that: echo "'`uname -s`'" gives 'AIX ' i*86:AIX:*:*) echo i386-ibm-aix - exit 0 ;; + exit ;; ia64:AIX:*:*) if [ -x /usr/bin/oslevel ] ; then IBM_REV=`/usr/bin/oslevel` @@ -538,7 +526,7 @@ EOF IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE} fi echo ${UNAME_MACHINE}-ibm-aix${IBM_REV} - exit 0 ;; + exit ;; *:AIX:2:3) if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then eval $set_cc_for_build @@ -553,15 +541,19 @@ EOF exit(0); } EOF - $CC_FOR_BUILD -o $dummy $dummy.c && $dummy && exit 0 - echo rs6000-ibm-aix3.2.5 + if $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy` + then + echo "$SYSTEM_NAME" + else + echo rs6000-ibm-aix3.2.5 + fi elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then echo rs6000-ibm-aix3.2.4 else echo rs6000-ibm-aix3.2 fi - exit 0 ;; - *:AIX:*:[45]) + exit ;; + *:AIX:*:[4567]) IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'` if /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then IBM_ARCH=rs6000 @@ -574,28 +566,28 @@ EOF IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE} fi echo ${IBM_ARCH}-ibm-aix${IBM_REV} - exit 0 ;; + exit ;; *:AIX:*:*) echo rs6000-ibm-aix - exit 0 ;; + exit ;; ibmrt:4.4BSD:*|romp-ibm:BSD:*) echo romp-ibm-bsd4.4 - exit 0 ;; + exit ;; ibmrt:*BSD:*|romp-ibm:BSD:*) # covers RT/PC BSD and echo romp-ibm-bsd${UNAME_RELEASE} # 4.3 with uname added to - exit 0 ;; # report: romp-ibm BSD 4.3 + exit ;; # report: romp-ibm BSD 4.3 *:BOSX:*:*) echo rs6000-bull-bosx - exit 0 ;; + exit ;; DPX/2?00:B.O.S.:*:*) echo m68k-bull-sysv3 - exit 0 ;; + exit ;; 9000/[34]??:4.3bsd:1.*:*) echo m68k-hp-bsd - exit 0 ;; + exit ;; hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*) echo m68k-hp-bsd4.4 - exit 0 ;; + exit ;; 9000/[34678]??:HP-UX:*:*) HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'` case "${UNAME_MACHINE}" in @@ -604,52 +596,52 @@ EOF 9000/[678][0-9][0-9]) if [ -x /usr/bin/getconf ]; then sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null` - sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null` - case "${sc_cpu_version}" in - 523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0 - 528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1 - 532) # CPU_PA_RISC2_0 - case "${sc_kernel_bits}" in - 32) HP_ARCH="hppa2.0n" ;; - 64) HP_ARCH="hppa2.0w" ;; + sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null` + case "${sc_cpu_version}" in + 523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0 + 528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1 + 532) # CPU_PA_RISC2_0 + case "${sc_kernel_bits}" in + 32) HP_ARCH="hppa2.0n" ;; + 64) HP_ARCH="hppa2.0w" ;; '') HP_ARCH="hppa2.0" ;; # HP-UX 10.20 - esac ;; - esac + esac ;; + esac fi if [ "${HP_ARCH}" = "" ]; then eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c + sed 's/^ //' << EOF >$dummy.c - #define _HPUX_SOURCE - #include - #include + #define _HPUX_SOURCE + #include + #include - int main () - { - #if defined(_SC_KERNEL_BITS) - long bits = sysconf(_SC_KERNEL_BITS); - #endif - long cpu = sysconf (_SC_CPU_VERSION); + int main () + { + #if defined(_SC_KERNEL_BITS) + long bits = sysconf(_SC_KERNEL_BITS); + #endif + long cpu = sysconf (_SC_CPU_VERSION); - switch (cpu) - { - case CPU_PA_RISC1_0: puts ("hppa1.0"); break; - case CPU_PA_RISC1_1: puts ("hppa1.1"); break; - case CPU_PA_RISC2_0: - #if defined(_SC_KERNEL_BITS) - switch (bits) - { - case 64: puts ("hppa2.0w"); break; - case 32: puts ("hppa2.0n"); break; - default: puts ("hppa2.0"); break; - } break; - #else /* !defined(_SC_KERNEL_BITS) */ - puts ("hppa2.0"); break; - #endif - default: puts ("hppa1.0"); break; - } - exit (0); - } + switch (cpu) + { + case CPU_PA_RISC1_0: puts ("hppa1.0"); break; + case CPU_PA_RISC1_1: puts ("hppa1.1"); break; + case CPU_PA_RISC2_0: + #if defined(_SC_KERNEL_BITS) + switch (bits) + { + case 64: puts ("hppa2.0w"); break; + case 32: puts ("hppa2.0n"); break; + default: puts ("hppa2.0"); break; + } break; + #else /* !defined(_SC_KERNEL_BITS) */ + puts ("hppa2.0"); break; + #endif + default: puts ("hppa1.0"); break; + } + exit (0); + } EOF (CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy` test -z "$HP_ARCH" && HP_ARCH=hppa @@ -657,9 +649,19 @@ EOF esac if [ ${HP_ARCH} = "hppa2.0w" ] then - # avoid double evaluation of $set_cc_for_build - test -n "$CC_FOR_BUILD" || eval $set_cc_for_build - if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E -) | grep __LP64__ >/dev/null + eval $set_cc_for_build + + # hppa2.0w-hp-hpux* has a 64-bit kernel and a compiler generating + # 32-bit code. hppa64-hp-hpux* has the same kernel and a compiler + # generating 64-bit code. GNU and HP use different nomenclature: + # + # $ CC_FOR_BUILD=cc ./config.guess + # => hppa2.0w-hp-hpux11.23 + # $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess + # => hppa64-hp-hpux11.23 + + if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | + grep -q __LP64__ then HP_ARCH="hppa2.0w" else @@ -667,11 +669,11 @@ EOF fi fi echo ${HP_ARCH}-hp-hpux${HPUX_REV} - exit 0 ;; + exit ;; ia64:HP-UX:*:*) HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'` echo ia64-hp-hpux${HPUX_REV} - exit 0 ;; + exit ;; 3050*:HI-UX:*:*) eval $set_cc_for_build sed 's/^ //' << EOF >$dummy.c @@ -699,207 +701,173 @@ EOF exit (0); } EOF - $CC_FOR_BUILD -o $dummy $dummy.c && $dummy && exit 0 + $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy` && + { echo "$SYSTEM_NAME"; exit; } echo unknown-hitachi-hiuxwe2 - exit 0 ;; + exit ;; 9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:* ) echo hppa1.1-hp-bsd - exit 0 ;; + exit ;; 9000/8??:4.3bsd:*:*) echo hppa1.0-hp-bsd - exit 0 ;; + exit ;; *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*) echo hppa1.0-hp-mpeix - exit 0 ;; + exit ;; hp7??:OSF1:*:* | hp8?[79]:OSF1:*:* ) echo hppa1.1-hp-osf - exit 0 ;; + exit ;; hp8??:OSF1:*:*) echo hppa1.0-hp-osf - exit 0 ;; + exit ;; i*86:OSF1:*:*) if [ -x /usr/sbin/sysversion ] ; then echo ${UNAME_MACHINE}-unknown-osf1mk else echo ${UNAME_MACHINE}-unknown-osf1 fi - exit 0 ;; + exit ;; parisc*:Lites*:*:*) echo hppa1.1-hp-lites - exit 0 ;; + exit ;; C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*) echo c1-convex-bsd - exit 0 ;; + exit ;; C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*) if getsysinfo -f scalar_acc then echo c32-convex-bsd else echo c2-convex-bsd fi - exit 0 ;; + exit ;; C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*) echo c34-convex-bsd - exit 0 ;; + exit ;; C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*) echo c38-convex-bsd - exit 0 ;; + exit ;; C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*) echo c4-convex-bsd - exit 0 ;; + exit ;; CRAY*Y-MP:*:*:*) echo ymp-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' - exit 0 ;; + exit ;; CRAY*[A-Z]90:*:*:*) echo ${UNAME_MACHINE}-cray-unicos${UNAME_RELEASE} \ | sed -e 's/CRAY.*\([A-Z]90\)/\1/' \ -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ \ -e 's/\.[^.]*$/.X/' - exit 0 ;; + exit ;; CRAY*TS:*:*:*) echo t90-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' - exit 0 ;; + exit ;; CRAY*T3E:*:*:*) echo alphaev5-cray-unicosmk${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' - exit 0 ;; + exit ;; CRAY*SV1:*:*:*) echo sv1-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' - exit 0 ;; + exit ;; *:UNICOS/mp:*:*) - echo nv1-cray-unicosmp${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' - exit 0 ;; + echo craynv-cray-unicosmp${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit ;; F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*) FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` - FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` - FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'` - echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" - exit 0 ;; + FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` + FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'` + echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" + exit ;; 5000:UNIX_System_V:4.*:*) - FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` - FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'` - echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" - exit 0 ;; + FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` + FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'` + echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" + exit ;; i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*) echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE} - exit 0 ;; + exit ;; sparc*:BSD/OS:*:*) echo sparc-unknown-bsdi${UNAME_RELEASE} - exit 0 ;; + exit ;; *:BSD/OS:*:*) echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE} - exit 0 ;; + exit ;; *:FreeBSD:*:*) - # Determine whether the default compiler uses glibc. - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c - #include - #if __GLIBC__ >= 2 - LIBC=gnu - #else - LIBC= - #endif -EOF - eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep ^LIBC=` - # GNU/KFreeBSD systems have a "k" prefix to indicate we are using - # FreeBSD's kernel, but not the complete OS. - case ${LIBC} in gnu) kernel_only='k' ;; esac - echo ${UNAME_MACHINE}-unknown-${kernel_only}freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`${LIBC:+-$LIBC} - exit 0 ;; + UNAME_PROCESSOR=`/usr/bin/uname -p` + case ${UNAME_PROCESSOR} in + amd64) + echo x86_64-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; + *) + echo ${UNAME_PROCESSOR}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; + esac + exit ;; i*:CYGWIN*:*) echo ${UNAME_MACHINE}-pc-cygwin - exit 0 ;; - i*:MINGW*:*) + exit ;; + *:MINGW*:*) echo ${UNAME_MACHINE}-pc-mingw32 - exit 0 ;; + exit ;; + i*:MSYS*:*) + echo ${UNAME_MACHINE}-pc-msys + exit ;; + i*:windows32*:*) + # uname -m includes "-pc" on this system. + echo ${UNAME_MACHINE}-mingw32 + exit ;; i*:PW*:*) echo ${UNAME_MACHINE}-pc-pw32 - exit 0 ;; - x86:Interix*:[34]*) - echo i586-pc-interix${UNAME_RELEASE}|sed -e 's/\..*//' - exit 0 ;; + exit ;; + *:Interix*:*) + case ${UNAME_MACHINE} in + x86) + echo i586-pc-interix${UNAME_RELEASE} + exit ;; + authenticamd | genuineintel | EM64T) + echo x86_64-unknown-interix${UNAME_RELEASE} + exit ;; + IA64) + echo ia64-unknown-interix${UNAME_RELEASE} + exit ;; + esac ;; [345]86:Windows_95:* | [345]86:Windows_98:* | [345]86:Windows_NT:*) echo i${UNAME_MACHINE}-pc-mks - exit 0 ;; + exit ;; + 8664:Windows_NT:*) + echo x86_64-pc-mks + exit ;; i*:Windows_NT*:* | Pentium*:Windows_NT*:*) # How do we know it's Interix rather than the generic POSIX subsystem? # It also conflicts with pre-2.0 versions of AT&T UWIN. Should we # UNAME_MACHINE based on the output of uname instead of i386? echo i586-pc-interix - exit 0 ;; + exit ;; i*:UWIN*:*) echo ${UNAME_MACHINE}-pc-uwin - exit 0 ;; + exit ;; + amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*) + echo x86_64-unknown-cygwin + exit ;; p*:CYGWIN*:*) echo powerpcle-unknown-cygwin - exit 0 ;; + exit ;; prep*:SunOS:5.*:*) echo powerpcle-unknown-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` - exit 0 ;; + exit ;; *:GNU:*:*) # the GNU system echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-gnu`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'` - exit 0 ;; + exit ;; *:GNU/*:*:*) # other systems with GNU libc and userland echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-gnu - exit 0 ;; + exit ;; i*86:Minix:*:*) echo ${UNAME_MACHINE}-pc-minix - exit 0 ;; - arm*:Linux:*:*) + exit ;; + aarch64:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu - exit 0 ;; - cris:Linux:*:*) - echo cris-axis-linux-gnu - exit 0 ;; - ia64:Linux:*:*) + exit ;; + aarch64_be:Linux:*:*) + UNAME_MACHINE=aarch64_be echo ${UNAME_MACHINE}-unknown-linux-gnu - exit 0 ;; - m68*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu - exit 0 ;; - mips:Linux:*:*) - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c - #undef CPU - #undef mips - #undef mipsel - #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) - CPU=mipsel - #else - #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB) - CPU=mips - #else - CPU= - #endif - #endif -EOF - eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep ^CPU=` - test x"${CPU}" != x && echo "${CPU}-unknown-linux-gnu" && exit 0 - ;; - mips64:Linux:*:*) - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c - #undef CPU - #undef mips64 - #undef mips64el - #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) - CPU=mips64el - #else - #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB) - CPU=mips64 - #else - CPU= - #endif - #endif -EOF - eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep ^CPU=` - test x"${CPU}" != x && echo "${CPU}-unknown-linux-gnu" && exit 0 - ;; - ppc:Linux:*:*) - echo powerpc-unknown-linux-gnu - exit 0 ;; - ppc64:Linux:*:*) - echo powerpc64-unknown-linux-gnu - exit 0 ;; + exit ;; alpha:Linux:*:*) case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in EV5) UNAME_MACHINE=alphaev5 ;; @@ -909,11 +877,90 @@ EOF EV6) UNAME_MACHINE=alphaev6 ;; EV67) UNAME_MACHINE=alphaev67 ;; EV68*) UNAME_MACHINE=alphaev68 ;; - esac - objdump --private-headers /bin/sh | grep ld.so.1 >/dev/null + esac + objdump --private-headers /bin/sh | grep -q ld.so.1 if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC} - exit 0 ;; + exit ;; + arm*:Linux:*:*) + eval $set_cc_for_build + if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep -q __ARM_EABI__ + then + echo ${UNAME_MACHINE}-unknown-linux-gnu + else + if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep -q __ARM_PCS_VFP + then + echo ${UNAME_MACHINE}-unknown-linux-gnueabi + else + echo ${UNAME_MACHINE}-unknown-linux-gnueabihf + fi + fi + exit ;; + avr32*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + cris:Linux:*:*) + echo ${UNAME_MACHINE}-axis-linux-gnu + exit ;; + crisv32:Linux:*:*) + echo ${UNAME_MACHINE}-axis-linux-gnu + exit ;; + frv:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + hexagon:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + i*86:Linux:*:*) + LIBC=gnu + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + #ifdef __dietlibc__ + LIBC=dietlibc + #endif +EOF + eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC'` + echo "${UNAME_MACHINE}-pc-linux-${LIBC}" + exit ;; + ia64:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + m32r*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + m68*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + mips:Linux:*:* | mips64:Linux:*:*) + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + #undef CPU + #undef ${UNAME_MACHINE} + #undef ${UNAME_MACHINE}el + #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) + CPU=${UNAME_MACHINE}el + #else + #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB) + CPU=${UNAME_MACHINE} + #else + CPU= + #endif + #endif +EOF + eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'` + test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; } + ;; + or32:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + padre:Linux:*:*) + echo sparc-unknown-linux-gnu + exit ;; + parisc64:Linux:*:* | hppa64:Linux:*:*) + echo hppa64-unknown-linux-gnu + exit ;; parisc:Linux:*:* | hppa:Linux:*:*) # Look for CPU level case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in @@ -921,115 +968,71 @@ EOF PA8*) echo hppa2.0-unknown-linux-gnu ;; *) echo hppa-unknown-linux-gnu ;; esac - exit 0 ;; - parisc64:Linux:*:* | hppa64:Linux:*:*) - echo hppa64-unknown-linux-gnu - exit 0 ;; + exit ;; + ppc64:Linux:*:*) + echo powerpc64-unknown-linux-gnu + exit ;; + ppc:Linux:*:*) + echo powerpc-unknown-linux-gnu + exit ;; s390:Linux:*:* | s390x:Linux:*:*) echo ${UNAME_MACHINE}-ibm-linux - exit 0 ;; + exit ;; sh64*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu - exit 0 ;; + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; sh*:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu - exit 0 ;; + exit ;; sparc:Linux:*:* | sparc64:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu - exit 0 ;; + exit ;; + tile*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + vax:Linux:*:*) + echo ${UNAME_MACHINE}-dec-linux-gnu + exit ;; x86_64:Linux:*:*) - echo x86_64-unknown-linux-gnu - exit 0 ;; - i*86:Linux:*:*) - # The BFD linker knows what the default object file format is, so - # first see if it will tell us. cd to the root directory to prevent - # problems with other programs or directories called `ld' in the path. - # Set LC_ALL=C to ensure ld outputs messages in English. - ld_supported_targets=`cd /; LC_ALL=C ld --help 2>&1 \ - | sed -ne '/supported targets:/!d - s/[ ][ ]*/ /g - s/.*supported targets: *// - s/ .*// - p'` - case "$ld_supported_targets" in - elf32-i386) - TENTATIVE="${UNAME_MACHINE}-pc-linux-gnu" - ;; - a.out-i386-linux) - echo "${UNAME_MACHINE}-pc-linux-gnuaout" - exit 0 ;; - coff-i386) - echo "${UNAME_MACHINE}-pc-linux-gnucoff" - exit 0 ;; - "") - # Either a pre-BFD a.out linker (linux-gnuoldld) or - # one that does not give us useful --help. - echo "${UNAME_MACHINE}-pc-linux-gnuoldld" - exit 0 ;; - esac - # Determine whether the default compiler is a.out or elf - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c - #include - #ifdef __ELF__ - # ifdef __GLIBC__ - # if __GLIBC__ >= 2 - LIBC=gnu - # else - LIBC=gnulibc1 - # endif - # else - LIBC=gnulibc1 - # endif - #else - #ifdef __INTEL_COMPILER - LIBC=gnu - #else - LIBC=gnuaout - #endif - #endif - #ifdef __dietlibc__ - LIBC=dietlibc - #endif -EOF - eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep ^LIBC=` - test x"${LIBC}" != x && echo "${UNAME_MACHINE}-pc-linux-${LIBC}" && exit 0 - test x"${TENTATIVE}" != x && echo "${TENTATIVE}" && exit 0 - ;; + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + xtensa*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; i*86:DYNIX/ptx:4*:*) # ptx 4.0 does uname -s correctly, with DYNIX/ptx in there. # earlier versions are messed up and put the nodename in both # sysname and nodename. echo i386-sequent-sysv4 - exit 0 ;; + exit ;; i*86:UNIX_SV:4.2MP:2.*) - # Unixware is an offshoot of SVR4, but it has its own version - # number series starting with 2... - # I am not positive that other SVR4 systems won't match this, + # Unixware is an offshoot of SVR4, but it has its own version + # number series starting with 2... + # I am not positive that other SVR4 systems won't match this, # I just have to hope. -- rms. - # Use sysv4.2uw... so that sysv4* matches it. + # Use sysv4.2uw... so that sysv4* matches it. echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION} - exit 0 ;; + exit ;; i*86:OS/2:*:*) # If we were able to find `uname', then EMX Unix compatibility # is probably installed. echo ${UNAME_MACHINE}-pc-os2-emx - exit 0 ;; + exit ;; i*86:XTS-300:*:STOP) echo ${UNAME_MACHINE}-unknown-stop - exit 0 ;; + exit ;; i*86:atheos:*:*) echo ${UNAME_MACHINE}-unknown-atheos - exit 0 ;; - i*86:syllable:*:*) + exit ;; + i*86:syllable:*:*) echo ${UNAME_MACHINE}-pc-syllable - exit 0 ;; - i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.0*:*) + exit ;; + i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.[02]*:*) echo i386-unknown-lynxos${UNAME_RELEASE} - exit 0 ;; + exit ;; i*86:*DOS:*:*) echo ${UNAME_MACHINE}-pc-msdosdjgpp - exit 0 ;; + exit ;; i*86:*:4.*:* | i*86:SYSTEM_V:4.*:*) UNAME_REL=`echo ${UNAME_RELEASE} | sed 's/\/MP$//'` if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then @@ -1037,15 +1040,16 @@ EOF else echo ${UNAME_MACHINE}-pc-sysv${UNAME_REL} fi - exit 0 ;; - i*86:*:5:[78]*) + exit ;; + i*86:*:5:[678]*) + # UnixWare 7.x, OpenUNIX and OpenServer 6. case `/bin/uname -X | grep "^Machine"` in *486*) UNAME_MACHINE=i486 ;; *Pentium) UNAME_MACHINE=i586 ;; *Pent*|*Celeron) UNAME_MACHINE=i686 ;; esac echo ${UNAME_MACHINE}-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION} - exit 0 ;; + exit ;; i*86:*:3.2:*) if test -f /usr/options/cb.name; then UNAME_REL=`sed -n 's/.*Version //p' /dev/null 2>&1 ; then echo i860-stardent-sysv${UNAME_RELEASE} # Stardent Vistra i860-SVR4 else # Add other i860-SVR4 vendors below as they are discovered. echo i860-unknown-sysv${UNAME_RELEASE} # Unknown i860-SVR4 fi - exit 0 ;; + exit ;; mini*:CTIX:SYS*5:*) # "miniframe" echo m68010-convergent-sysv - exit 0 ;; + exit ;; mc68k:UNIX:SYSTEM5:3.51m) echo m68k-convergent-sysv - exit 0 ;; + exit ;; M680?0:D-NIX:5.3:*) echo m68k-diab-dnix - exit 0 ;; - M68*:*:R3V[567]*:*) - test -r /sysV68 && echo 'm68k-motorola-sysv' && exit 0 ;; - 3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0) + exit ;; + M68*:*:R3V[5678]*:*) + test -r /sysV68 && { echo 'm68k-motorola-sysv'; exit; } ;; + 3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0 | S7501*:*:4.0:3.0) OS_REL='' test -r /etc/.relid \ && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ - && echo i486-ncr-sysv4.3${OS_REL} && exit 0 + && { echo i486-ncr-sysv4.3${OS_REL}; exit; } /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ - && echo i586-ncr-sysv4.3${OS_REL} && exit 0 ;; + && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;; 3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*) - /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ - && echo i486-ncr-sysv4 && exit 0 ;; + /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ + && { echo i486-ncr-sysv4; exit; } ;; + NCR*:*:4.2:* | MPRAS*:*:4.2:*) + OS_REL='.3' + test -r /etc/.relid \ + && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` + /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ + && { echo i486-ncr-sysv4.3${OS_REL}; exit; } + /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ + && { echo i586-ncr-sysv4.3${OS_REL}; exit; } + /bin/uname -p 2>/dev/null | /bin/grep pteron >/dev/null \ + && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;; m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*) echo m68k-unknown-lynxos${UNAME_RELEASE} - exit 0 ;; + exit ;; mc68030:UNIX_System_V:4.*:*) echo m68k-atari-sysv4 - exit 0 ;; + exit ;; TSUNAMI:LynxOS:2.*:*) echo sparc-unknown-lynxos${UNAME_RELEASE} - exit 0 ;; + exit ;; rs6000:LynxOS:2.*:*) echo rs6000-unknown-lynxos${UNAME_RELEASE} - exit 0 ;; - PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.0*:*) + exit ;; + PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.[02]*:*) echo powerpc-unknown-lynxos${UNAME_RELEASE} - exit 0 ;; + exit ;; SM[BE]S:UNIX_SV:*:*) echo mips-dde-sysv${UNAME_RELEASE} - exit 0 ;; + exit ;; RM*:ReliantUNIX-*:*:*) echo mips-sni-sysv4 - exit 0 ;; + exit ;; RM*:SINIX-*:*:*) echo mips-sni-sysv4 - exit 0 ;; + exit ;; *:SINIX-*:*:*) if uname -p 2>/dev/null >/dev/null ; then UNAME_MACHINE=`(uname -p) 2>/dev/null` @@ -1137,68 +1154,94 @@ EOF else echo ns32k-sni-sysv fi - exit 0 ;; - PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort - # says - echo i586-unisys-sysv4 - exit 0 ;; + exit ;; + PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort + # says + echo i586-unisys-sysv4 + exit ;; *:UNIX_System_V:4*:FTX*) # From Gerald Hewes . # How about differentiating between stratus architectures? -djm echo hppa1.1-stratus-sysv4 - exit 0 ;; + exit ;; *:*:*:FTX*) # From seanf@swdc.stratus.com. echo i860-stratus-sysv4 - exit 0 ;; + exit ;; + i*86:VOS:*:*) + # From Paul.Green@stratus.com. + echo ${UNAME_MACHINE}-stratus-vos + exit ;; *:VOS:*:*) # From Paul.Green@stratus.com. echo hppa1.1-stratus-vos - exit 0 ;; + exit ;; mc68*:A/UX:*:*) echo m68k-apple-aux${UNAME_RELEASE} - exit 0 ;; + exit ;; news*:NEWS-OS:6*:*) echo mips-sony-newsos6 - exit 0 ;; + exit ;; R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*) if [ -d /usr/nec ]; then - echo mips-nec-sysv${UNAME_RELEASE} + echo mips-nec-sysv${UNAME_RELEASE} else - echo mips-unknown-sysv${UNAME_RELEASE} + echo mips-unknown-sysv${UNAME_RELEASE} fi - exit 0 ;; + exit ;; BeBox:BeOS:*:*) # BeOS running on hardware made by Be, PPC only. echo powerpc-be-beos - exit 0 ;; + exit ;; BeMac:BeOS:*:*) # BeOS running on Mac or Mac clone, PPC only. echo powerpc-apple-beos - exit 0 ;; + exit ;; BePC:BeOS:*:*) # BeOS running on Intel PC compatible. echo i586-pc-beos - exit 0 ;; + exit ;; + BePC:Haiku:*:*) # Haiku running on Intel PC compatible. + echo i586-pc-haiku + exit ;; SX-4:SUPER-UX:*:*) echo sx4-nec-superux${UNAME_RELEASE} - exit 0 ;; + exit ;; SX-5:SUPER-UX:*:*) echo sx5-nec-superux${UNAME_RELEASE} - exit 0 ;; + exit ;; SX-6:SUPER-UX:*:*) echo sx6-nec-superux${UNAME_RELEASE} - exit 0 ;; + exit ;; + SX-7:SUPER-UX:*:*) + echo sx7-nec-superux${UNAME_RELEASE} + exit ;; + SX-8:SUPER-UX:*:*) + echo sx8-nec-superux${UNAME_RELEASE} + exit ;; + SX-8R:SUPER-UX:*:*) + echo sx8r-nec-superux${UNAME_RELEASE} + exit ;; Power*:Rhapsody:*:*) echo powerpc-apple-rhapsody${UNAME_RELEASE} - exit 0 ;; + exit ;; *:Rhapsody:*:*) echo ${UNAME_MACHINE}-apple-rhapsody${UNAME_RELEASE} - exit 0 ;; + exit ;; *:Darwin:*:*) - case `uname -p` in - *86) UNAME_PROCESSOR=i686 ;; - powerpc) UNAME_PROCESSOR=powerpc ;; + UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown + case $UNAME_PROCESSOR in + i386) + eval $set_cc_for_build + if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then + if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \ + (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \ + grep IS_64BIT_ARCH >/dev/null + then + UNAME_PROCESSOR="x86_64" + fi + fi ;; + unknown) UNAME_PROCESSOR=powerpc ;; esac echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE} - exit 0 ;; + exit ;; *:procnto*:*:* | *:QNX:[0123456789]*:*) UNAME_PROCESSOR=`uname -p` if test "$UNAME_PROCESSOR" = "x86"; then @@ -1206,22 +1249,28 @@ EOF UNAME_MACHINE=pc fi echo ${UNAME_PROCESSOR}-${UNAME_MACHINE}-nto-qnx${UNAME_RELEASE} - exit 0 ;; + exit ;; *:QNX:*:4*) echo i386-pc-qnx - exit 0 ;; + exit ;; + NEO-?:NONSTOP_KERNEL:*:*) + echo neo-tandem-nsk${UNAME_RELEASE} + exit ;; + NSE-?:NONSTOP_KERNEL:*:*) + echo nse-tandem-nsk${UNAME_RELEASE} + exit ;; NSR-?:NONSTOP_KERNEL:*:*) echo nsr-tandem-nsk${UNAME_RELEASE} - exit 0 ;; + exit ;; *:NonStop-UX:*:*) echo mips-compaq-nonstopux - exit 0 ;; + exit ;; BS2000:POSIX*:*:*) echo bs2000-siemens-sysv - exit 0 ;; + exit ;; DS/*:UNIX_System_V:*:*) echo ${UNAME_MACHINE}-${UNAME_SYSTEM}-${UNAME_RELEASE} - exit 0 ;; + exit ;; *:Plan9:*:*) # "uname -m" is not consistent, so use $cputype instead. 386 # is converted to i386 for consistency with other x86 @@ -1232,31 +1281,53 @@ EOF UNAME_MACHINE="$cputype" fi echo ${UNAME_MACHINE}-unknown-plan9 - exit 0 ;; + exit ;; *:TOPS-10:*:*) echo pdp10-unknown-tops10 - exit 0 ;; + exit ;; *:TENEX:*:*) echo pdp10-unknown-tenex - exit 0 ;; + exit ;; KS10:TOPS-20:*:* | KL10:TOPS-20:*:* | TYPE4:TOPS-20:*:*) echo pdp10-dec-tops20 - exit 0 ;; + exit ;; XKL-1:TOPS-20:*:* | TYPE5:TOPS-20:*:*) echo pdp10-xkl-tops20 - exit 0 ;; + exit ;; *:TOPS-20:*:*) echo pdp10-unknown-tops20 - exit 0 ;; + exit ;; *:ITS:*:*) echo pdp10-unknown-its - exit 0 ;; + exit ;; SEI:*:*:SEIUX) - echo mips-sei-seiux${UNAME_RELEASE} - exit 0 ;; + echo mips-sei-seiux${UNAME_RELEASE} + exit ;; *:DragonFly:*:*) echo ${UNAME_MACHINE}-unknown-dragonfly`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` - exit 0 ;; + exit ;; + *:*VMS:*:*) + UNAME_MACHINE=`(uname -p) 2>/dev/null` + case "${UNAME_MACHINE}" in + A*) echo alpha-dec-vms ; exit ;; + I*) echo ia64-dec-vms ; exit ;; + V*) echo vax-dec-vms ; exit ;; + esac ;; + *:XENIX:*:SysV) + echo i386-pc-xenix + exit ;; + i*86:skyos:*:*) + echo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE}` | sed -e 's/ .*$//' + exit ;; + i*86:rdos:*:*) + echo ${UNAME_MACHINE}-pc-rdos + exit ;; + i*86:AROS:*:*) + echo ${UNAME_MACHINE}-pc-aros + exit ;; + x86_64:VMkernel:*:*) + echo ${UNAME_MACHINE}-unknown-esx + exit ;; esac #echo '(No uname command or uname output not recognized.)' 1>&2 @@ -1279,16 +1350,16 @@ main () #include printf ("m68k-sony-newsos%s\n", #ifdef NEWSOS4 - "4" + "4" #else - "" + "" #endif - ); exit (0); + ); exit (0); #endif #endif #if defined (__arm) && defined (__acorn) && defined (__unix) - printf ("arm-acorn-riscix"); exit (0); + printf ("arm-acorn-riscix\n"); exit (0); #endif #if defined (hp300) && !defined (hpux) @@ -1377,11 +1448,12 @@ main () } EOF -$CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null && $dummy && exit 0 +$CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null && SYSTEM_NAME=`$dummy` && + { echo "$SYSTEM_NAME"; exit; } # Apollos put the system type in the environment. -test -d /usr/apollo && { echo ${ISP}-apollo-${SYSTYPE}; exit 0; } +test -d /usr/apollo && { echo ${ISP}-apollo-${SYSTYPE}; exit; } # Convex versions that predate uname can use getsysinfo(1) @@ -1390,22 +1462,22 @@ then case `getsysinfo -f cpu_type` in c1*) echo c1-convex-bsd - exit 0 ;; + exit ;; c2*) if getsysinfo -f scalar_acc then echo c32-convex-bsd else echo c2-convex-bsd fi - exit 0 ;; + exit ;; c34*) echo c34-convex-bsd - exit 0 ;; + exit ;; c38*) echo c38-convex-bsd - exit 0 ;; + exit ;; c4*) echo c4-convex-bsd - exit 0 ;; + exit ;; esac fi @@ -1416,7 +1488,9 @@ This script, last modified $timestamp, has failed to recognize the operating system you are using. It is advised that you download the most up to date version of the config scripts from - ftp://ftp.gnu.org/pub/gnu/config/ + http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD +and + http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD If the version you run ($0) is already up to date, please send the following data and any information you think might be diff --git a/config.sub b/config.sub index 264f820a..c894da45 100755 --- a/config.sub +++ b/config.sub @@ -1,9 +1,10 @@ #! /bin/sh # Configuration validation subroutine script. # Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, -# 2000, 2001, 2002, 2003 Free Software Foundation, Inc. +# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, +# 2011, 2012 Free Software Foundation, Inc. -timestamp='2004-02-23' +timestamp='2012-02-10' # This file is (in principle) common to ALL GNU software. # The presence of a machine in this file suggests that SOME GNU software @@ -20,23 +21,25 @@ timestamp='2004-02-23' # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, -# Boston, MA 02111-1307, USA. - +# along with this program; if not, see . +# # As a special exception to the GNU General Public License, if you # distribute this file as part of a program that contains a # configuration script generated by Autoconf, you may include it under # the same distribution terms that you use for the rest of that program. + # Please send patches to . Submit a context -# diff and a properly formatted ChangeLog entry. +# diff and a properly formatted GNU ChangeLog entry. # # Configuration subroutine to validate and canonicalize a configuration type. # Supply the specified configuration type as an argument. # If it is invalid, we print an error message on stderr and exit with code 1. # Otherwise, we print the canonical config type on stdout and succeed. +# You can get the latest version of this script from: +# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD + # This file is supposed to be the same for all GNU packages # and recognize all the CPU types, system types and aliases # that are meaningful with *any* GNU software. @@ -70,7 +73,8 @@ Report bugs and patches to ." version="\ GNU config.sub ($timestamp) -Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001 +Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, +2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO @@ -83,11 +87,11 @@ Try \`$me --help' for more information." while test $# -gt 0 ; do case $1 in --time-stamp | --time* | -t ) - echo "$timestamp" ; exit 0 ;; + echo "$timestamp" ; exit ;; --version | -v ) - echo "$version" ; exit 0 ;; + echo "$version" ; exit ;; --help | --h* | -h ) - echo "$usage"; exit 0 ;; + echo "$usage"; exit ;; -- ) # Stop option processing shift; break ;; - ) # Use stdin as input. @@ -99,7 +103,7 @@ while test $# -gt 0 ; do *local*) # First pass through any local machine types. echo $1 - exit 0;; + exit ;; * ) break ;; @@ -118,11 +122,18 @@ esac # Here we must recognize all the valid KERNEL-OS combinations. maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'` case $maybe_os in - nto-qnx* | linux-gnu* | linux-dietlibc | linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | \ - kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* | storm-chaos* | os2-emx* | rtmk-nova*) + nto-qnx* | linux-gnu* | linux-android* | linux-dietlibc | linux-newlib* | \ + linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \ + knetbsd*-gnu* | netbsd*-gnu* | \ + kopensolaris*-gnu* | \ + storm-chaos* | os2-emx* | rtmk-nova*) os=-$maybe_os basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'` ;; + android-linux) + os=-linux-android + basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`-unknown + ;; *) basic_machine=`echo $1 | sed 's/-[^-]*$//'` if [ $basic_machine != $1 ] @@ -145,10 +156,13 @@ case $os in -convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\ -c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \ -harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \ - -apple | -axis) + -apple | -axis | -knuth | -cray | -microblaze) os= basic_machine=$1 ;; + -bluegene*) + os=-cnk + ;; -sim | -cisco | -oki | -wec | -winbond) os= basic_machine=$1 @@ -163,13 +177,17 @@ case $os in os=-chorusos basic_machine=$1 ;; - -chorusrdb) - os=-chorusrdb + -chorusrdb) + os=-chorusrdb basic_machine=$1 - ;; + ;; -hiux*) os=-hiuxwe2 ;; + -sco6) + os=-sco5v6 + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; -sco5) os=-sco3.2v5 basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` @@ -186,6 +204,10 @@ case $os in # Don't forget version if it is 3.2v4 or newer. basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` ;; + -sco5v6*) + # Don't forget version if it is 3.2v4 or newer. + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; -sco*) os=-sco3.2v2 basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` @@ -227,25 +249,36 @@ case $basic_machine in # Some are omitted here because they have special meanings below. 1750a | 580 \ | a29k \ + | aarch64 | aarch64_be \ | alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \ | alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \ | am33_2.0 \ - | arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr \ + | arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr | avr32 \ + | be32 | be64 \ + | bfin \ | c4x | clipper \ | d10v | d30v | dlx | dsp16xx \ - | fr30 | frv \ + | epiphany \ + | fido | fr30 | frv \ | h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \ + | hexagon \ | i370 | i860 | i960 | ia64 \ | ip2k | iq2000 \ - | m32r | m68000 | m68k | m88k | mcore \ + | le32 | le64 \ + | lm32 \ + | m32c | m32r | m32rle | m68000 | m68k | m88k \ + | maxq | mb | microblaze | mcore | mep | metag \ | mips | mipsbe | mipseb | mipsel | mipsle \ | mips16 \ | mips64 | mips64el \ - | mips64vr | mips64vrel \ + | mips64octeon | mips64octeonel \ | mips64orion | mips64orionel \ + | mips64r5900 | mips64r5900el \ + | mips64vr | mips64vrel \ | mips64vr4100 | mips64vr4100el \ | mips64vr4300 | mips64vr4300el \ | mips64vr5000 | mips64vr5000el \ + | mips64vr5900 | mips64vr5900el \ | mipsisa32 | mipsisa32el \ | mipsisa32r2 | mipsisa32r2el \ | mipsisa64 | mipsisa64el \ @@ -254,30 +287,65 @@ case $basic_machine in | mipsisa64sr71k | mipsisa64sr71kel \ | mipstx39 | mipstx39el \ | mn10200 | mn10300 \ + | moxie \ + | mt \ | msp430 \ + | nds32 | nds32le | nds32be \ + | nios | nios2 \ | ns16k | ns32k \ - | openrisc | or32 \ + | open8 \ + | or32 \ | pdp10 | pdp11 | pj | pjl \ - | powerpc | powerpc64 | powerpc64le | powerpcle | ppcbe \ + | powerpc | powerpc64 | powerpc64le | powerpcle \ | pyramid \ - | sh | sh[1234] | sh[23]e | sh[34]eb | shbe | shle | sh[1234]le | sh3ele \ + | rl78 | rx \ + | score \ + | sh | sh[1234] | sh[24]a | sh[24]aeb | sh[23]e | sh[34]eb | sheb | shbe | shle | sh[1234]le | sh3ele \ | sh64 | sh64le \ - | sparc | sparc64 | sparc86x | sparclet | sparclite | sparcv9 | sparcv9b \ - | strongarm \ - | tahoe | thumb | tic4x | tic80 | tron \ - | v850 | v850e \ + | sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \ + | sparcv8 | sparcv9 | sparcv9b | sparcv9v \ + | spu \ + | tahoe | tic4x | tic54x | tic55x | tic6x | tic80 | tron \ + | ubicom32 \ + | v850 | v850e | v850e1 | v850e2 | v850es | v850e2v3 \ | we32k \ - | x86 | xscale | xstormy16 | xtensa \ - | z8k) + | x86 | xc16x | xstormy16 | xtensa \ + | z8k | z80) basic_machine=$basic_machine-unknown ;; - m6811 | m68hc11 | m6812 | m68hc12) - # Motorola 68HC11/12. + c54x) + basic_machine=tic54x-unknown + ;; + c55x) + basic_machine=tic55x-unknown + ;; + c6x) + basic_machine=tic6x-unknown + ;; + m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | picochip) basic_machine=$basic_machine-unknown os=-none ;; m88110 | m680[12346]0 | m683?2 | m68360 | m5200 | v70 | w65 | z8k) ;; + ms1) + basic_machine=mt-unknown + ;; + + strongarm | thumb | xscale) + basic_machine=arm-unknown + ;; + xgate) + basic_machine=$basic_machine-unknown + os=-none + ;; + xscaleeb) + basic_machine=armeb-unknown + ;; + + xscaleel) + basic_machine=armel-unknown + ;; # We use `pc' rather than `unknown' # because (1) that's what they normally are, and @@ -293,32 +361,40 @@ case $basic_machine in # Recognize the basic CPU types with company name. 580-* \ | a29k-* \ + | aarch64-* | aarch64_be-* \ | alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \ | alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \ | alphapca5[67]-* | alpha64pca5[67]-* | arc-* \ | arm-* | armbe-* | armle-* | armeb-* | armv*-* \ - | avr-* \ - | bs2000-* \ - | c[123]* | c30-* | [cjt]90-* | c4x-* | c54x-* | c55x-* | c6x-* \ - | clipper-* | cydra-* \ + | avr-* | avr32-* \ + | be32-* | be64-* \ + | bfin-* | bs2000-* \ + | c[123]* | c30-* | [cjt]90-* | c4x-* \ + | clipper-* | craynv-* | cydra-* \ | d10v-* | d30v-* | dlx-* \ | elxsi-* \ - | f30[01]-* | f700-* | fr30-* | frv-* | fx80-* \ + | f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \ | h8300-* | h8500-* \ | hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \ + | hexagon-* \ | i*86-* | i860-* | i960-* | ia64-* \ | ip2k-* | iq2000-* \ - | m32r-* \ + | le32-* | le64-* \ + | lm32-* \ + | m32c-* | m32r-* | m32rle-* \ | m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \ - | m88110-* | m88k-* | mcore-* \ + | m88110-* | m88k-* | maxq-* | mcore-* | metag-* | microblaze-* \ | mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \ | mips16-* \ | mips64-* | mips64el-* \ - | mips64vr-* | mips64vrel-* \ + | mips64octeon-* | mips64octeonel-* \ | mips64orion-* | mips64orionel-* \ + | mips64r5900-* | mips64r5900el-* \ + | mips64vr-* | mips64vrel-* \ | mips64vr4100-* | mips64vr4100el-* \ | mips64vr4300-* | mips64vr4300el-* \ | mips64vr5000-* | mips64vr5000el-* \ + | mips64vr5900-* | mips64vr5900el-* \ | mipsisa32-* | mipsisa32el-* \ | mipsisa32r2-* | mipsisa32r2el-* \ | mipsisa64-* | mipsisa64el-* \ @@ -326,26 +402,39 @@ case $basic_machine in | mipsisa64sb1-* | mipsisa64sb1el-* \ | mipsisa64sr71k-* | mipsisa64sr71kel-* \ | mipstx39-* | mipstx39el-* \ + | mmix-* \ + | mt-* \ | msp430-* \ - | none-* | np1-* | nv1-* | ns16k-* | ns32k-* \ + | nds32-* | nds32le-* | nds32be-* \ + | nios-* | nios2-* \ + | none-* | np1-* | ns16k-* | ns32k-* \ + | open8-* \ | orion-* \ | pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \ - | powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* | ppcbe-* \ + | powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* \ | pyramid-* \ - | romp-* | rs6000-* \ - | sh-* | sh[1234]-* | sh[23]e-* | sh[34]eb-* | shbe-* \ + | rl78-* | romp-* | rs6000-* | rx-* \ + | sh-* | sh[1234]-* | sh[24]a-* | sh[24]aeb-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \ | shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \ - | sparc-* | sparc64-* | sparc86x-* | sparclet-* | sparclite-* \ - | sparcv9-* | sparcv9b-* | strongarm-* | sv1-* | sx?-* \ - | tahoe-* | thumb-* \ + | sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \ + | sparclite-* \ + | sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | sv1-* | sx?-* \ + | tahoe-* \ | tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \ + | tile*-* \ | tron-* \ - | v850-* | v850e-* | vax-* \ + | ubicom32-* \ + | v850-* | v850e-* | v850e1-* | v850es-* | v850e2-* | v850e2v3-* \ + | vax-* \ | we32k-* \ - | x86-* | x86_64-* | xps100-* | xscale-* | xstormy16-* \ - | xtensa-* \ + | x86-* | x86_64-* | xc16x-* | xps100-* \ + | xstormy16-* | xtensa*-* \ | ymp-* \ - | z8k-*) + | z8k-* | z80-*) + ;; + # Recognize the basic CPU types without company name, with glob match. + xtensa*) + basic_machine=$basic_machine-unknown ;; # Recognize the various machine names and aliases which stand # for a CPU type and a company and sometimes even an OS. @@ -363,7 +452,7 @@ case $basic_machine in basic_machine=a29k-amd os=-udi ;; - abacus) + abacus) basic_machine=abacus-unknown ;; adobe68k) @@ -409,6 +498,10 @@ case $basic_machine in basic_machine=m68k-apollo os=-bsd ;; + aros) + basic_machine=i386-pc + os=-aros + ;; aux) basic_machine=m68k-apple os=-aux @@ -417,10 +510,35 @@ case $basic_machine in basic_machine=ns32k-sequent os=-dynix ;; + blackfin) + basic_machine=bfin-unknown + os=-linux + ;; + blackfin-*) + basic_machine=bfin-`echo $basic_machine | sed 's/^[^-]*-//'` + os=-linux + ;; + bluegene*) + basic_machine=powerpc-ibm + os=-cnk + ;; + c54x-*) + basic_machine=tic54x-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + c55x-*) + basic_machine=tic55x-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + c6x-*) + basic_machine=tic6x-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; c90) basic_machine=c90-cray os=-unicos ;; + cegcc) + basic_machine=arm-unknown + os=-cegcc + ;; convex-c1) basic_machine=c1-convex os=-bsd @@ -445,13 +563,20 @@ case $basic_machine in basic_machine=j90-cray os=-unicos ;; - cr16c) - basic_machine=cr16c-unknown + craynv) + basic_machine=craynv-cray + os=-unicosmp + ;; + cr16 | cr16-*) + basic_machine=cr16-unknown os=-elf ;; crds | unos) basic_machine=m68k-crds ;; + crisv32 | crisv32-* | etraxfs*) + basic_machine=crisv32-axis + ;; cris | cris-* | etrax*) basic_machine=cris-axis ;; @@ -481,6 +606,14 @@ case $basic_machine in basic_machine=m88k-motorola os=-sysv3 ;; + dicos) + basic_machine=i686-pc + os=-dicos + ;; + djgpp) + basic_machine=i586-pc + os=-msdosdjgpp + ;; dpx20 | dpx20-*) basic_machine=rs6000-bull os=-bosx @@ -592,7 +725,6 @@ case $basic_machine in i370-ibm* | ibm*) basic_machine=i370-ibm ;; -# I'm not sure what "Sysv32" means. Should this be sysv3.2? i*86v32) basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` os=-sysv32 @@ -631,6 +763,14 @@ case $basic_machine in basic_machine=m68k-isi os=-sysv ;; + m68knommu) + basic_machine=m68k-unknown + os=-linux + ;; + m68knommu-*) + basic_machine=m68k-`echo $basic_machine | sed 's/^[^-]*-//'` + os=-linux + ;; m88k-omron*) basic_machine=m88k-omron ;; @@ -642,10 +782,17 @@ case $basic_machine in basic_machine=ns32k-utek os=-sysv ;; + microblaze) + basic_machine=microblaze-xilinx + ;; mingw32) basic_machine=i386-pc os=-mingw32 ;; + mingw32ce) + basic_machine=arm-unknown + os=-mingw32ce + ;; miniframe) basic_machine=m68000-convergent ;; @@ -659,10 +806,6 @@ case $basic_machine in mips3*) basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`-unknown ;; - mmix*) - basic_machine=mmix-knuth - os=-mmixware - ;; monitor) basic_machine=m68k-rom68k os=-coff @@ -675,10 +818,21 @@ case $basic_machine in basic_machine=i386-pc os=-msdos ;; + ms1-*) + basic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'` + ;; + msys) + basic_machine=i386-pc + os=-msys + ;; mvs) basic_machine=i370-ibm os=-mvs ;; + nacl) + basic_machine=le32-unknown + os=-nacl + ;; ncr3000) basic_machine=i486-ncr os=-sysv4 @@ -743,9 +897,11 @@ case $basic_machine in np1) basic_machine=np1-gould ;; - nv1) - basic_machine=nv1-cray - os=-unicosmp + neo-tandem) + basic_machine=neo-tandem + ;; + nse-tandem) + basic_machine=nse-tandem ;; nsr-tandem) basic_machine=nsr-tandem @@ -754,9 +910,8 @@ case $basic_machine in basic_machine=hppa1.1-oki os=-proelf ;; - or32 | or32-*) + openrisc | openrisc-*) basic_machine=or32-unknown - os=-coff ;; os400) basic_machine=powerpc-ibm @@ -778,6 +933,14 @@ case $basic_machine in basic_machine=i860-intel os=-osf ;; + parisc) + basic_machine=hppa-unknown + os=-linux + ;; + parisc-*) + basic_machine=hppa-`echo $basic_machine | sed 's/^[^-]*-//'` + os=-linux + ;; pbd) basic_machine=sparc-tti ;; @@ -787,6 +950,12 @@ case $basic_machine in pc532 | pc532-*) basic_machine=ns32k-pc532 ;; + pc98) + basic_machine=i386-pc + ;; + pc98-*) + basic_machine=i386-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; pentium | p5 | k5 | k6 | nexgen | viac3) basic_machine=i586-pc ;; @@ -816,9 +985,10 @@ case $basic_machine in ;; power) basic_machine=power-ibm ;; - ppc) basic_machine=powerpc-unknown + ppc | ppcbe) basic_machine=powerpc-unknown ;; - ppc-*) basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'` + ppc-* | ppcbe-*) + basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'` ;; ppcle | powerpclittle | ppc-le | powerpc-little) basic_machine=powerpcle-unknown @@ -843,6 +1013,10 @@ case $basic_machine in basic_machine=i586-unknown os=-pw32 ;; + rdos) + basic_machine=i386-pc + os=-rdos + ;; rom68k) basic_machine=m68k-rom68k os=-coff @@ -869,6 +1043,10 @@ case $basic_machine in sb1el) basic_machine=mipsisa64sb1el-unknown ;; + sde) + basic_machine=mipsisa32-sde + os=-elf + ;; sei) basic_machine=mips-sei os=-seiux @@ -880,6 +1058,9 @@ case $basic_machine in basic_machine=sh-hitachi os=-hms ;; + sh5el) + basic_machine=sh5le-unknown + ;; sh64) basic_machine=sh64-unknown ;; @@ -901,6 +1082,9 @@ case $basic_machine in basic_machine=i860-stratus os=-sysv4 ;; + strongarm-* | thumb-*) + basic_machine=arm-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; sun2) basic_machine=m68000-sun ;; @@ -957,17 +1141,9 @@ case $basic_machine in basic_machine=t90-cray os=-unicos ;; - tic54x | c54x*) - basic_machine=tic54x-unknown - os=-coff - ;; - tic55x | c55x*) - basic_machine=tic55x-unknown - os=-coff - ;; - tic6x | c6x*) - basic_machine=tic6x-unknown - os=-coff + tile*) + basic_machine=$basic_machine-unknown + os=-linux-gnu ;; tx39) basic_machine=mipstx39-unknown @@ -1029,9 +1205,16 @@ case $basic_machine in basic_machine=hppa1.1-winbond os=-proelf ;; + xbox) + basic_machine=i686-pc + os=-mingw32 + ;; xps | xps100) basic_machine=xps100-honeywell ;; + xscale-* | xscalee[bl]-*) + basic_machine=`echo $basic_machine | sed 's/^xscale/arm/'` + ;; ymp) basic_machine=ymp-cray os=-unicos @@ -1040,6 +1223,10 @@ case $basic_machine in basic_machine=z8k-unknown os=-sim ;; + z80-*-coff) + basic_machine=z80-unknown + os=-sim + ;; none) basic_machine=none-none os=-none @@ -1059,6 +1246,9 @@ case $basic_machine in romp) basic_machine=romp-ibm ;; + mmix) + basic_machine=mmix-knuth + ;; rs6000) basic_machine=rs6000-ibm ;; @@ -1075,13 +1265,10 @@ case $basic_machine in we32k) basic_machine=we32k-att ;; - sh3 | sh4 | sh[34]eb | sh[1234]le | sh[23]ele) + sh[1234] | sh[24]a | sh[24]aeb | sh[34]eb | sh[1234]le | sh[23]ele) basic_machine=sh-unknown ;; - sh64) - basic_machine=sh64-unknown - ;; - sparc | sparcv9 | sparcv9b) + sparc | sparcv8 | sparcv9 | sparcv9b | sparcv9v) basic_machine=sparc-sun ;; cydra) @@ -1125,9 +1312,12 @@ esac if [ x"$os" != x"" ] then case $os in - # First match some system type aliases - # that might get confused with valid system types. + # First match some system type aliases + # that might get confused with valid system types. # -solaris* is a basic system type, with this one exception. + -auroraux) + os=-auroraux + ;; -solaris1 | -solaris1.*) os=`echo $os | sed -e 's|solaris1|sunos4|'` ;; @@ -1148,26 +1338,31 @@ case $os in # Each alternative MUST END IN A *, to match a version number. # -sysv* is not here because it comes later, after sysvr4. -gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \ - | -*vms* | -sco* | -esix* | -isc* | -aix* | -sunos | -sunos[34]*\ - | -hpux* | -unos* | -osf* | -luna* | -dgux* | -solaris* | -sym* \ + | -*vms* | -sco* | -esix* | -isc* | -aix* | -cnk* | -sunos | -sunos[34]*\ + | -hpux* | -unos* | -osf* | -luna* | -dgux* | -auroraux* | -solaris* \ + | -sym* | -kopensolaris* \ | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \ - | -aos* \ + | -aos* | -aros* \ | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \ | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \ - | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* | -openbsd* \ + | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* \ + | -openbsd* | -solidbsd* \ | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \ | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \ | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \ | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \ - | -chorusos* | -chorusrdb* \ - | -cygwin* | -pe* | -psos* | -moss* | -proelf* | -rtems* \ - | -mingw32* | -linux-gnu* | -linux-uclibc* | -uxpv* | -beos* | -mpeix* | -udk* \ + | -chorusos* | -chorusrdb* | -cegcc* \ + | -cygwin* | -msys* | -pe* | -psos* | -moss* | -proelf* | -rtems* \ + | -mingw32* | -linux-gnu* | -linux-android* \ + | -linux-newlib* | -linux-uclibc* \ + | -uxpv* | -beos* | -mpeix* | -udk* \ | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \ | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \ | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \ | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \ | -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \ - | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly*) + | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \ + | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es*) # Remember, each alternative MUST END IN *, to match a version number. ;; -qnx*) @@ -1185,7 +1380,7 @@ case $os in os=`echo $os | sed -e 's|nto|nto-qnx|'` ;; -sim | -es1800* | -hms* | -xray | -os68k* | -none* | -v88r* \ - | -windows* | -osx | -abug | -netware* | -os9* | -beos* \ + | -windows* | -osx | -abug | -netware* | -os9* | -beos* | -haiku* \ | -macos* | -mpw* | -magic* | -mmixware* | -mon960* | -lnews*) ;; -mac*) @@ -1206,7 +1401,7 @@ case $os in -opened*) os=-openedition ;; - -os400*) + -os400*) os=-os400 ;; -wince*) @@ -1255,7 +1450,7 @@ case $os in -sinix*) os=-sysv4 ;; - -tpf*) + -tpf*) os=-tpf ;; -triton*) @@ -1294,6 +1489,14 @@ case $os in -kaos*) os=-kaos ;; + -zvmoe) + os=-zvmoe + ;; + -dicos*) + os=-dicos + ;; + -nacl*) + ;; -none) ;; *) @@ -1316,6 +1519,12 @@ else # system, and we'll never get to this point. case $basic_machine in + score-*) + os=-elf + ;; + spu-*) + os=-elf + ;; *-acorn) os=-riscix1.2 ;; @@ -1325,9 +1534,18 @@ case $basic_machine in arm*-semi) os=-aout ;; - c4x-* | tic4x-*) - os=-coff - ;; + c4x-* | tic4x-*) + os=-coff + ;; + tic54x-*) + os=-coff + ;; + tic55x-*) + os=-coff + ;; + tic6x-*) + os=-coff + ;; # This must come before the *-dec entry. pdp10-*) os=-tops20 @@ -1346,13 +1564,13 @@ case $basic_machine in ;; m68000-sun) os=-sunos3 - # This also exists in the configure program, but was not the - # default. - # os=-sunos4 ;; m68*-cisco) os=-aout ;; + mep-*) + os=-elf + ;; mips*-cisco) os=-elf ;; @@ -1371,9 +1589,15 @@ case $basic_machine in *-be) os=-beos ;; + *-haiku) + os=-haiku + ;; *-ibm) os=-aix ;; + *-knuth) + os=-mmixware + ;; *-wec) os=-proelf ;; @@ -1476,7 +1700,7 @@ case $basic_machine in -sunos*) vendor=sun ;; - -aix*) + -cnk*|-aix*) vendor=ibm ;; -beos*) @@ -1539,7 +1763,7 @@ case $basic_machine in esac echo $basic_machine$os -exit 0 +exit # Local variables: # eval: (add-hook 'write-file-hooks 'time-stamp) From eb2398106fd1b43989c12796eb706aea79b11859 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 13 Mar 2012 08:46:12 -0700 Subject: [PATCH 0082/3378] Fix malloc_stats_print() option support. Fix malloc_stats_print() to honor 'b' and 'l' in the opts parameter. --- src/stats.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/stats.c b/src/stats.c index 38c8bb3c..f494974b 100644 --- a/src/stats.c +++ b/src/stats.c @@ -49,7 +49,7 @@ static void stats_arena_bins_print(void (*write_cb)(void *, const char *), static void stats_arena_lruns_print(void (*write_cb)(void *, const char *), void *cbopaque, unsigned i); static void stats_arena_print(void (*write_cb)(void *, const char *), - void *cbopaque, unsigned i); + void *cbopaque, unsigned i, bool bins, bool large); /******************************************************************************/ @@ -203,7 +203,7 @@ stats_arena_lruns_print(void (*write_cb)(void *, const char *), void *cbopaque, static void stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, - unsigned i) + unsigned i, bool bins, bool large) { unsigned nthreads; size_t pagesize, pactive, pdirty, mapped; @@ -256,8 +256,10 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, CTL_I_GET("stats.arenas.0.mapped", &mapped, size_t); malloc_cprintf(write_cb, cbopaque, "mapped: %12zu\n", mapped); - stats_arena_bins_print(write_cb, cbopaque, i); - stats_arena_lruns_print(write_cb, cbopaque, i); + if (bins) + stats_arena_bins_print(write_cb, cbopaque, i); + if (large) + stats_arena_lruns_print(write_cb, cbopaque, i); } void @@ -506,7 +508,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, malloc_cprintf(write_cb, cbopaque, "\nMerged arenas stats:\n"); stats_arena_print(write_cb, cbopaque, - narenas); + narenas, bins, large); } } } @@ -532,7 +534,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, cbopaque, "\narenas[%u]:\n", i); stats_arena_print(write_cb, - cbopaque, i); + cbopaque, i, bins, large); } } } From 4c2faa8a7c42a47a6bea509f5a23234bc5a66d40 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 13 Mar 2012 11:09:23 -0700 Subject: [PATCH 0083/3378] Fix a regression in JE_COMPILABLE(). Revert JE_COMPILABLE() so that it detects link errors. Cross-compiling should still work as long as a valid configure cache is provided. Clean up some comments/whitespace. --- configure.ac | 28 +++++++++---------- .../jemalloc/internal/jemalloc_internal.h.in | 9 ++---- src/jemalloc.c | 5 +--- 3 files changed, 18 insertions(+), 24 deletions(-) diff --git a/configure.ac b/configure.ac index 5cf2855f..a5ca859b 100644 --- a/configure.ac +++ b/configure.ac @@ -26,14 +26,17 @@ AC_COMPILE_IFELSE([AC_LANG_PROGRAM( ]) dnl JE_COMPILABLE(label, hcode, mcode, rvar) +dnl +dnl Use AC_RUN_IFELSE() rather than AC_COMPILE_IFELSE() so that linker errors +dnl cause failure. AC_DEFUN([JE_COMPILABLE], [ AC_CACHE_CHECK([whether $1 is compilable], [$4], - [AC_COMPILE_IFELSE([AC_LANG_PROGRAM([$2], - [$3])], - [$4=yes], - [$4=no])]) + [AC_RUN_IFELSE([AC_LANG_PROGRAM([$2], + [$3])], + [$4=yes], + [$4=no])]) ]) dnl ============================================================================ @@ -801,16 +804,13 @@ dnl ============================================================================ dnl Check for ffsl(3), and fail if not found. This function exists on all dnl platforms that jemalloc currently has a chance of functioning on without dnl modification. -JE_COMPILABLE([a program using ffsl], - [ - #include - ], - [ - { - int rv = ffsl(0x08); - } - ], - [je_cv_function_ffsl]) +JE_COMPILABLE([a program using ffsl], [ +#include +], [ + { + int rv = ffsl(0x08); + } +], [je_cv_function_ffsl]) if test "x${je_cv_function_ffsl}" != "xyes" ; then AC_MSG_ERROR([Cannot build without ffsl(3)]) fi diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 3774bb5d..5759ed58 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -240,14 +240,14 @@ static const bool config_ivsalloc = #define LONG_MASK (LONG - 1) /* Return the smallest long multiple that is >= a. */ -#define LONG_CEILING(a) \ +#define LONG_CEILING(a) \ (((a) + LONG_MASK) & ~LONG_MASK) #define SIZEOF_PTR (1U << LG_SIZEOF_PTR) #define PTR_MASK (SIZEOF_PTR - 1) /* Return the smallest (void *) multiple that is >= a. */ -#define PTR_CEILING(a) \ +#define PTR_CEILING(a) \ (((a) + PTR_MASK) & ~PTR_MASK) /* @@ -566,10 +566,7 @@ sa2u(size_t size, size_t alignment, size_t *run_size_p) } } -/* - * Choose an arena based on a per-thread value (fast-path code, calls slow-path - * code if necessary). - */ +/* Choose an arena based on a per-thread value. */ JEMALLOC_INLINE arena_t * choose_arena(void) { diff --git a/src/jemalloc.c b/src/jemalloc.c index e148ae0e..f564b65e 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -101,10 +101,7 @@ arenas_extend(unsigned ind) return (arenas[0]); } -/* - * Choose an arena based on a per-thread value (slow-path code only, called - * only by choose_arena()). - */ +/* Slow path, called only by choose_arena(). */ arena_t * choose_arena_hard(void) { From 0a0bbf63e5d9bc60d6854c6d46b437fbeebd1470 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 13 Mar 2012 12:55:21 -0700 Subject: [PATCH 0084/3378] Implement aligned_alloc(). Implement aligned_alloc(), which was added in the C11 standard. The function is weakly specified to the point that a minimally compliant implementation would be painful to use (size must be an integral multiple of alignment!), which in practice makes posix_memalign() a safer choice. --- Makefile.in | 6 +- configure.ac | 2 +- doc/jemalloc.xml.in | 35 ++++++++ include/jemalloc/jemalloc.h.in | 3 + include/jemalloc/jemalloc_defs.h.in | 1 + src/jemalloc.c | 37 ++++++--- test/aligned_alloc.c | 123 ++++++++++++++++++++++++++++ test/aligned_alloc.exp | 25 ++++++ 8 files changed, 218 insertions(+), 14 deletions(-) create mode 100644 test/aligned_alloc.c create mode 100644 test/aligned_alloc.exp diff --git a/Makefile.in b/Makefile.in index 62864556..01ed083c 100644 --- a/Makefile.in +++ b/Makefile.in @@ -63,9 +63,9 @@ DOCS_XML := @objroot@doc/jemalloc@install_suffix@.xml DOCS_HTML := $(DOCS_XML:@objroot@%.xml=@srcroot@%.html) DOCS_MAN3 := $(DOCS_XML:@objroot@%.xml=@srcroot@%.3) DOCS := $(DOCS_HTML) $(DOCS_MAN3) -CTESTS := @srcroot@test/allocated.c @srcroot@test/bitmap.c \ - @srcroot@test/mremap.c @srcroot@test/posix_memalign.c \ - @srcroot@test/thread_arena.c +CTESTS := @srcroot@test/aligned_alloc.c @srcroot@test/allocated.c \ + @srcroot@test/bitmap.c @srcroot@test/mremap.c \ + @srcroot@test/posix_memalign.c @srcroot@test/thread_arena.c ifeq (@enable_experimental@, 1) CTESTS += @srcroot@test/allocm.c @srcroot@test/rallocm.c endif diff --git a/configure.ac b/configure.ac index a5ca859b..295e646d 100644 --- a/configure.ac +++ b/configure.ac @@ -316,7 +316,7 @@ AC_PATH_PROG([AR], [ar], , [$PATH]) AC_PATH_PROG([LD], [ld], , [$PATH]) AC_PATH_PROG([AUTOCONF], [autoconf], , [$PATH]) -public_syms="malloc_conf malloc_message malloc calloc posix_memalign realloc free malloc_usable_size malloc_stats_print mallctl mallctlnametomib mallctlbymib" +public_syms="malloc_conf malloc_message malloc calloc posix_memalign aligned_alloc realloc free malloc_usable_size malloc_stats_print mallctl mallctlnametomib mallctlbymib" dnl Check for allocator-related functions that should be wrapped. AC_CHECK_FUNC([memalign], diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 74da409f..926deafe 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -30,6 +30,7 @@ malloc calloc posix_memalign + aligned_alloc realloc free malloc_usable_size @@ -73,6 +74,11 @@ size_t alignment size_t size + + void *aligned_alloc + size_t alignment + size_t size + void *realloc void *ptr @@ -190,6 +196,14 @@ alignment must be a power of 2 at least as large as sizeof(void *). + The aligned_alloc function + allocates size bytes of memory such that the + allocation's base address is an even multiple of + alignment. The requested + alignment must be a power of 2. Behavior is + undefined if size is not an integral multiple of + alignment. + The realloc function changes the size of the previously allocated memory referenced by ptr to size bytes. The @@ -1789,6 +1803,27 @@ malloc_conf = "xmalloc:true";]]> + The aligned_alloc function returns + a pointer to the allocated memory if successful; otherwise a + NULL pointer is returned and + errno is set. The + aligned_alloc function will fail if: + + + EINVAL + + The alignment parameter is + not a power of 2. + + + + ENOMEM + + Memory allocation error. + + + + The realloc function returns a pointer, possibly identical to ptr, to the allocated memory if successful; otherwise a NULL diff --git a/include/jemalloc/jemalloc.h.in b/include/jemalloc/jemalloc.h.in index 742daddd..f0581dbd 100644 --- a/include/jemalloc/jemalloc.h.in +++ b/include/jemalloc/jemalloc.h.in @@ -43,6 +43,7 @@ void *je_malloc(size_t size) JEMALLOC_ATTR(malloc); void *je_calloc(size_t num, size_t size) JEMALLOC_ATTR(malloc); int je_posix_memalign(void **memptr, size_t alignment, size_t size) JEMALLOC_ATTR(nonnull(1)); +void *je_aligned_alloc(size_t alignment, size_t size) JEMALLOC_ATTR(malloc); void *je_realloc(void *ptr, size_t size); void je_free(void *ptr); @@ -82,6 +83,7 @@ int je_nallocm(size_t *rsize, size_t size, int flags); #define malloc je_malloc #define calloc je_calloc #define posix_memalign je_posix_memalign +#define aligned_alloc je_aligned_alloc #define realloc je_realloc #define free je_free #define malloc_usable_size je_malloc_usable_size @@ -113,6 +115,7 @@ int je_nallocm(size_t *rsize, size_t size, int flags); #undef je_malloc #undef je_calloc #undef je_posix_memalign +#undef je_aligned_alloc #undef je_realloc #undef je_free #undef je_malloc_usable_size diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index 6b2b0d01..f8f80e62 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -16,6 +16,7 @@ #undef je_malloc #undef je_calloc #undef je_posix_memalign +#undef je_aligned_alloc #undef je_realloc #undef je_free #undef je_malloc_usable_size diff --git a/src/jemalloc.c b/src/jemalloc.c index f564b65e..2f3f3722 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -68,7 +68,7 @@ static void malloc_conf_error(const char *msg, const char *k, size_t klen, static void malloc_conf_init(void); static bool malloc_init_hard(void); static int imemalign(void **memptr, size_t alignment, size_t size, - bool enforce_min_alignment); + size_t min_alignment); /******************************************************************************/ /* @@ -851,7 +851,7 @@ JEMALLOC_ATTR(noinline) #endif static int imemalign(void **memptr, size_t alignment, size_t size, - bool enforce_min_alignment) + size_t min_alignment) { int ret; size_t usize; @@ -862,6 +862,8 @@ imemalign(void **memptr, size_t alignment, size_t size, #endif ; + assert(min_alignment != 0); + if (malloc_init()) result = NULL; else { @@ -870,10 +872,10 @@ imemalign(void **memptr, size_t alignment, size_t size, /* Make sure that alignment is a large enough power of 2. */ if (((alignment - 1) & alignment) != 0 - || (enforce_min_alignment && alignment < sizeof(void *))) { + || (alignment < min_alignment)) { if (config_xmalloc && opt_xmalloc) { - malloc_write(": Error in " - "posix_memalign(): invalid alignment\n"); + malloc_write(": Error allocating " + "aligned memory: invalid alignment\n"); abort(); } result = NULL; @@ -915,8 +917,8 @@ imemalign(void **memptr, size_t alignment, size_t size, if (result == NULL) { if (config_xmalloc && opt_xmalloc) { - malloc_write(": Error in posix_memalign(): " - "out of memory\n"); + malloc_write(": Error allocating aligned " + "memory: out of memory\n"); abort(); } ret = ENOMEM; @@ -942,7 +944,22 @@ int je_posix_memalign(void **memptr, size_t alignment, size_t size) { - return imemalign(memptr, alignment, size, true); + return imemalign(memptr, alignment, size, sizeof(void *)); +} + +JEMALLOC_ATTR(malloc) +JEMALLOC_ATTR(visibility("default")) +void * +je_aligned_alloc(size_t alignment, size_t size) +{ + void *ret; + int err; + + if ((err = imemalign(&ret, alignment, size, 1)) != 0) { + ret = NULL; + errno = err; + } + return (ret); } JEMALLOC_ATTR(malloc) @@ -1196,7 +1213,7 @@ je_memalign(size_t alignment, size_t size) = NULL #endif ; - imemalign(&ret, alignment, size, false); + imemalign(&ret, alignment, size, 1); return (ret); } #endif @@ -1212,7 +1229,7 @@ je_valloc(size_t size) = NULL #endif ; - imemalign(&ret, PAGE_SIZE, size, false); + imemalign(&ret, PAGE_SIZE, size, 1); return (ret); } #endif diff --git a/test/aligned_alloc.c b/test/aligned_alloc.c new file mode 100644 index 00000000..2a95604f --- /dev/null +++ b/test/aligned_alloc.c @@ -0,0 +1,123 @@ +#include +#include +#include +#include +#include +#include + +#define JEMALLOC_MANGLE +#include "jemalloc_test.h" + +#define CHUNK 0x400000 +/* #define MAXALIGN ((size_t)UINT64_C(0x80000000000)) */ +#define MAXALIGN ((size_t)0x2000000LU) +#define NITER 4 + +int +main(void) +{ + size_t alignment, size, total; + unsigned i; + void *p, *ps[NITER]; + + fprintf(stderr, "Test begin\n"); + + /* Test error conditions. */ + alignment = 0; + errno = 0; + p = aligned_alloc(alignment, 1); + if (p != NULL || errno != EINVAL) { + fprintf(stderr, + "Expected error for invalid alignment %zu\n", alignment); + } + + for (alignment = sizeof(size_t); alignment < MAXALIGN; + alignment <<= 1) { + errno = 0; + p = aligned_alloc(alignment + 1, 1); + if (p != NULL || errno != EINVAL) { + fprintf(stderr, + "Expected error for invalid alignment %zu\n", + alignment + 1); + } + } + +#if LG_SIZEOF_PTR == 3 + alignment = UINT64_C(0x8000000000000000); + size = UINT64_C(0x8000000000000000); +#else + alignment = 0x80000000LU; + size = 0x80000000LU; +#endif + errno = 0; + p = aligned_alloc(alignment, size); + if (p != NULL || errno != ENOMEM) { + fprintf(stderr, + "Expected error for aligned_alloc(%zu, %zu)\n", + alignment, size); + } + +#if LG_SIZEOF_PTR == 3 + alignment = UINT64_C(0x4000000000000000); + size = UINT64_C(0x8400000000000001); +#else + alignment = 0x40000000LU; + size = 0x84000001LU; +#endif + errno = 0; + p = aligned_alloc(alignment, size); + if (p != NULL || errno != ENOMEM) { + fprintf(stderr, + "Expected error for aligned_alloc(%zu, %zu)\n", + alignment, size); + } + + alignment = 0x10LU; +#if LG_SIZEOF_PTR == 3 + size = UINT64_C(0xfffffffffffffff0); +#else + size = 0xfffffff0LU; +#endif + errno = 0; + p = aligned_alloc(alignment, size); + if (p != NULL || errno != ENOMEM) { + fprintf(stderr, + "Expected error for aligned_alloc(&p, %zu, %zu)\n", + alignment, size); + } + + for (i = 0; i < NITER; i++) + ps[i] = NULL; + + for (alignment = 8; + alignment <= MAXALIGN; + alignment <<= 1) { + total = 0; + fprintf(stderr, "Alignment: %zu\n", alignment); + for (size = 1; + size < 3 * alignment && size < (1U << 31); + size += (alignment >> (LG_SIZEOF_PTR-1)) - 1) { + for (i = 0; i < NITER; i++) { + ps[i] = aligned_alloc(alignment, size); + if (ps[i] == NULL) { + fprintf(stderr, + "Error for size %zu (%#zx): %s\n", + size, size, strerror(errno)); + exit(1); + } + total += malloc_usable_size(ps[i]); + if (total >= (MAXALIGN << 1)) + break; + } + for (i = 0; i < NITER; i++) { + if (ps[i] != NULL) { + free(ps[i]); + ps[i] = NULL; + } + } + } + } + + fprintf(stderr, "Test end\n"); + return (0); +} diff --git a/test/aligned_alloc.exp b/test/aligned_alloc.exp new file mode 100644 index 00000000..b5061c72 --- /dev/null +++ b/test/aligned_alloc.exp @@ -0,0 +1,25 @@ +Test begin +Alignment: 8 +Alignment: 16 +Alignment: 32 +Alignment: 64 +Alignment: 128 +Alignment: 256 +Alignment: 512 +Alignment: 1024 +Alignment: 2048 +Alignment: 4096 +Alignment: 8192 +Alignment: 16384 +Alignment: 32768 +Alignment: 65536 +Alignment: 131072 +Alignment: 262144 +Alignment: 524288 +Alignment: 1048576 +Alignment: 2097152 +Alignment: 4194304 +Alignment: 8388608 +Alignment: 16777216 +Alignment: 33554432 +Test end From 824d34e5b7f5cf00bf472ec79f7ec1c6e3474114 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 13 Mar 2012 13:19:04 -0700 Subject: [PATCH 0085/3378] Modify malloc_vsnprintf() validation code. Modify malloc_vsnprintf() validation code to verify that output is identical to vsnprintf() output, even if both outputs are truncated due to buffer exhaustion. --- include/jemalloc/internal/util.h | 6 ++++++ src/util.c | 7 +++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h index a268109c..c5f7520c 100644 --- a/include/jemalloc/internal/util.h +++ b/include/jemalloc/internal/util.h @@ -4,6 +4,12 @@ /* Size of stack-allocated buffer passed to buferror(). */ #define BUFERROR_BUF 64 +/* + * Size of static buffer used by malloc_[v]{,c,t}printf(). This must be large + * enough for all possible uses within jemalloc. + */ +#define MALLOC_PRINTF_BUFSIZE 4096 + /* * Define a custom assert() in order to reduce the chances of deadlock during * assertion failure. diff --git a/src/util.c b/src/util.c index 7d658aaa..47e7b66e 100644 --- a/src/util.c +++ b/src/util.c @@ -433,7 +433,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) ret = i; if (config_debug) { - char buf[ret + 2]; + char buf[MALLOC_PRINTF_BUFSIZE]; int tret; /* @@ -442,7 +442,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) */ tret = vsnprintf(buf, sizeof(buf), format, tap); assert(tret == ret); - assert(memcmp(str, buf, ret + 1) == 0); + assert(strcmp(buf, str) == 0); } #undef APPEND_C @@ -469,8 +469,7 @@ malloc_snprintf(char *str, size_t size, const char *format, ...) const char * malloc_vtprintf(const char *format, va_list ap) { - /* buf must be large enough for all possible uses within jemalloc. */ - static __thread char buf[4096]; + static __thread char buf[MALLOC_PRINTF_BUFSIZE]; malloc_vsnprintf(buf, sizeof(buf), format, ap); From 4e2e3dd9cf19ed5991938a708a8b50611aa5bbf8 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 13 Mar 2012 16:31:41 -0700 Subject: [PATCH 0086/3378] Fix fork-related bugs. Acquire/release arena bin locks as part of the prefork/postfork. This bug made deadlock in the child between fork and exec a possibility. Split jemalloc_postfork() into jemalloc_postfork_{parent,child}() so that the child can reinitialize mutexes rather than unlocking them. In practice, this bug tended not to cause problems. --- include/jemalloc/internal/arena.h | 3 ++ include/jemalloc/internal/base.h | 5 +- include/jemalloc/internal/chunk_dss.h | 9 ++-- include/jemalloc/internal/huge.h | 3 ++ .../jemalloc/internal/jemalloc_internal.h.in | 3 +- include/jemalloc/internal/mutex.h | 3 ++ include/jemalloc/internal/private_namespace.h | 18 ++++++- src/arena.c | 30 +++++++++++ src/base.c | 23 +++++++- src/chunk_dss.c | 36 +++++++++++-- src/huge.c | 21 ++++++++ src/jemalloc.c | 52 +++++++++++-------- src/mutex.c | 26 ++++++++++ 13 files changed, 194 insertions(+), 38 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 16c2b1e6..1609adcf 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -376,6 +376,9 @@ void *arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, size_t alignment, bool zero); bool arena_new(arena_t *arena, unsigned ind); void arena_boot(void); +void arena_prefork(arena_t *arena); +void arena_postfork_parent(arena_t *arena); +void arena_postfork_child(arena_t *arena); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ diff --git a/include/jemalloc/internal/base.h b/include/jemalloc/internal/base.h index e353f309..796a2835 100644 --- a/include/jemalloc/internal/base.h +++ b/include/jemalloc/internal/base.h @@ -9,12 +9,13 @@ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -extern malloc_mutex_t base_mtx; - void *base_alloc(size_t size); extent_node_t *base_node_alloc(void); void base_node_dealloc(extent_node_t *node); bool base_boot(void); +void base_prefork(void); +void base_postfork_parent(void); +void base_postfork_child(void); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ diff --git a/include/jemalloc/internal/chunk_dss.h b/include/jemalloc/internal/chunk_dss.h index 35cd461a..a39a2031 100644 --- a/include/jemalloc/internal/chunk_dss.h +++ b/include/jemalloc/internal/chunk_dss.h @@ -9,16 +9,13 @@ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -/* - * Protects sbrk() calls. This avoids malloc races among threads, though it - * does not protect against races with threads that call sbrk() directly. - */ -extern malloc_mutex_t dss_mtx; - void *chunk_alloc_dss(size_t size, bool *zero); bool chunk_in_dss(void *chunk); bool chunk_dealloc_dss(void *chunk, size_t size); bool chunk_dss_boot(void); +void chunk_dss_prefork(void); +void chunk_dss_postfork_parent(void); +void chunk_dss_postfork_child(void); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ diff --git a/include/jemalloc/internal/huge.h b/include/jemalloc/internal/huge.h index 3a6b0b87..e8513c93 100644 --- a/include/jemalloc/internal/huge.h +++ b/include/jemalloc/internal/huge.h @@ -28,6 +28,9 @@ size_t huge_salloc(const void *ptr); prof_ctx_t *huge_prof_ctx_get(const void *ptr); void huge_prof_ctx_set(const void *ptr, prof_ctx_t *ctx); bool huge_boot(void); +void huge_prefork(void); +void huge_postfork_parent(void); +void huge_postfork_child(void); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 5759ed58..800d72c2 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -410,7 +410,8 @@ thread_allocated_t *thread_allocated_get_hard(void); arena_t *arenas_extend(unsigned ind); arena_t *choose_arena_hard(void); void jemalloc_prefork(void); -void jemalloc_postfork(void); +void jemalloc_postfork_parent(void); +void jemalloc_postfork_child(void); #include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h index 6a7b4fce..9d136585 100644 --- a/include/jemalloc/internal/mutex.h +++ b/include/jemalloc/internal/mutex.h @@ -29,6 +29,9 @@ extern bool isthreaded; bool malloc_mutex_init(malloc_mutex_t *mutex); void malloc_mutex_destroy(malloc_mutex_t *mutex); +void malloc_mutex_prefork(malloc_mutex_t *mutex); +void malloc_mutex_postfork_parent(malloc_mutex_t *mutex); +void malloc_mutex_postfork_child(malloc_mutex_t *mutex); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ diff --git a/include/jemalloc/internal/private_namespace.h b/include/jemalloc/internal/private_namespace.h index 89d3b5ca..e7370fec 100644 --- a/include/jemalloc/internal/private_namespace.h +++ b/include/jemalloc/internal/private_namespace.h @@ -8,6 +8,9 @@ #define arena_malloc_small JEMALLOC_N(arena_malloc_small) #define arena_new JEMALLOC_N(arena_new) #define arena_palloc JEMALLOC_N(arena_palloc) +#define arena_postfork_child JEMALLOC_N(arena_postfork_child) +#define arena_postfork_parent JEMALLOC_N(arena_postfork_parent) +#define arena_prefork JEMALLOC_N(arena_prefork) #define arena_prof_accum JEMALLOC_N(arena_prof_accum) #define arena_prof_ctx_get JEMALLOC_N(arena_prof_ctx_get) #define arena_prof_ctx_set JEMALLOC_N(arena_prof_ctx_set) @@ -32,6 +35,9 @@ #define base_boot JEMALLOC_N(base_boot) #define base_node_alloc JEMALLOC_N(base_node_alloc) #define base_node_dealloc JEMALLOC_N(base_node_dealloc) +#define base_postfork_child JEMALLOC_N(base_postfork_child) +#define base_postfork_parent JEMALLOC_N(base_postfork_parent) +#define base_prefork JEMALLOC_N(base_prefork) #define bitmap_full JEMALLOC_N(bitmap_full) #define bitmap_get JEMALLOC_N(bitmap_get) #define bitmap_info_init JEMALLOC_N(bitmap_info_init) @@ -54,6 +60,9 @@ #define chunk_dealloc_dss JEMALLOC_N(chunk_dealloc_dss) #define chunk_dealloc_mmap JEMALLOC_N(chunk_dealloc_mmap) #define chunk_dss_boot JEMALLOC_N(chunk_dss_boot) +#define chunk_dss_postfork_child JEMALLOC_N(chunk_dss_postfork_child) +#define chunk_dss_postfork_parent JEMALLOC_N(chunk_dss_postfork_parent) +#define chunk_dss_prefork JEMALLOC_N(chunk_dss_prefork) #define chunk_in_dss JEMALLOC_N(chunk_in_dss) #define chunk_mmap_boot JEMALLOC_N(chunk_mmap_boot) #define ckh_bucket_search JEMALLOC_N(ckh_bucket_search) @@ -115,6 +124,9 @@ #define huge_dalloc JEMALLOC_N(huge_dalloc) #define huge_malloc JEMALLOC_N(huge_malloc) #define huge_palloc JEMALLOC_N(huge_palloc) +#define huge_postfork_child JEMALLOC_N(huge_postfork_child) +#define huge_postfork_parent JEMALLOC_N(huge_postfork_parent) +#define huge_prefork JEMALLOC_N(huge_prefork) #define huge_prof_ctx_get JEMALLOC_N(huge_prof_ctx_get) #define huge_prof_ctx_set JEMALLOC_N(huge_prof_ctx_set) #define huge_ralloc JEMALLOC_N(huge_ralloc) @@ -129,12 +141,16 @@ #define isalloc JEMALLOC_N(isalloc) #define ivsalloc JEMALLOC_N(ivsalloc) #define jemalloc_darwin_init JEMALLOC_N(jemalloc_darwin_init) -#define jemalloc_postfork JEMALLOC_N(jemalloc_postfork) +#define jemalloc_postfork_child JEMALLOC_N(jemalloc_postfork_child) +#define jemalloc_postfork_parent JEMALLOC_N(jemalloc_postfork_parent) #define jemalloc_prefork JEMALLOC_N(jemalloc_prefork) #define malloc_cprintf JEMALLOC_N(malloc_cprintf) #define malloc_mutex_destroy JEMALLOC_N(malloc_mutex_destroy) #define malloc_mutex_init JEMALLOC_N(malloc_mutex_init) #define malloc_mutex_lock JEMALLOC_N(malloc_mutex_lock) +#define malloc_mutex_postfork_child JEMALLOC_N(malloc_mutex_postfork_child) +#define malloc_mutex_postfork_parent JEMALLOC_N(malloc_mutex_postfork_parent) +#define malloc_mutex_prefork JEMALLOC_N(malloc_mutex_prefork) #define malloc_mutex_trylock JEMALLOC_N(malloc_mutex_trylock) #define malloc_mutex_unlock JEMALLOC_N(malloc_mutex_unlock) #define malloc_printf JEMALLOC_N(malloc_printf) diff --git a/src/arena.c b/src/arena.c index c14cb2c2..898f8c7d 100644 --- a/src/arena.c +++ b/src/arena.c @@ -2169,3 +2169,33 @@ arena_boot(void) bin_info_init(); } + +void +arena_prefork(arena_t *arena) +{ + unsigned i; + + malloc_mutex_prefork(&arena->lock); + for (i = 0; i < NBINS; i++) + malloc_mutex_prefork(&arena->bins[i].lock); +} + +void +arena_postfork_parent(arena_t *arena) +{ + unsigned i; + + for (i = 0; i < NBINS; i++) + malloc_mutex_postfork_parent(&arena->bins[i].lock); + malloc_mutex_postfork_parent(&arena->lock); +} + +void +arena_postfork_child(arena_t *arena) +{ + unsigned i; + + for (i = 0; i < NBINS; i++) + malloc_mutex_postfork_child(&arena->bins[i].lock); + malloc_mutex_postfork_child(&arena->lock); +} diff --git a/src/base.c b/src/base.c index cc85e849..eb68334b 100644 --- a/src/base.c +++ b/src/base.c @@ -4,7 +4,7 @@ /******************************************************************************/ /* Data. */ -malloc_mutex_t base_mtx; +static malloc_mutex_t base_mtx; /* * Current pages that are being used for internal memory allocations. These @@ -104,3 +104,24 @@ base_boot(void) return (false); } + +void +base_prefork(void) +{ + + malloc_mutex_prefork(&base_mtx); +} + +void +base_postfork_parent(void) +{ + + malloc_mutex_postfork_parent(&base_mtx); +} + +void +base_postfork_child(void) +{ + + malloc_mutex_postfork_child(&base_mtx); +} diff --git a/src/chunk_dss.c b/src/chunk_dss.c index c25baea3..405dc29b 100644 --- a/src/chunk_dss.c +++ b/src/chunk_dss.c @@ -3,14 +3,18 @@ /******************************************************************************/ /* Data. */ -malloc_mutex_t dss_mtx; +/* + * Protects sbrk() calls. This avoids malloc races among threads, though it + * does not protect against races with threads that call sbrk() directly. + */ +static malloc_mutex_t dss_mtx; /* Base address of the DSS. */ -static void *dss_base; +static void *dss_base; /* Current end of the DSS, or ((void *)-1) if the DSS is exhausted. */ -static void *dss_prev; +static void *dss_prev; /* Current upper limit on DSS addresses. */ -static void *dss_max; +static void *dss_max; /* * Trees of chunks that were previously allocated (trees differ only in node @@ -291,4 +295,28 @@ chunk_dss_boot(void) return (false); } +void +chunk_dss_prefork(void) +{ + + if (config_dss) + malloc_mutex_prefork(&dss_mtx); +} + +void +chunk_dss_postfork_parent(void) +{ + + if (config_dss) + malloc_mutex_postfork_parent(&dss_mtx); +} + +void +chunk_dss_postfork_child(void) +{ + + if (config_dss) + malloc_mutex_postfork_child(&dss_mtx); +} + /******************************************************************************/ diff --git a/src/huge.c b/src/huge.c index 2d51c529..a4e6cc8f 100644 --- a/src/huge.c +++ b/src/huge.c @@ -359,3 +359,24 @@ huge_boot(void) return (false); } + +void +huge_prefork(void) +{ + + malloc_mutex_prefork(&huge_mtx); +} + +void +huge_postfork_parent(void) +{ + + malloc_mutex_postfork_parent(&huge_mtx); +} + +void +huge_postfork_child(void) +{ + + malloc_mutex_postfork_child(&huge_mtx); +} diff --git a/src/jemalloc.c b/src/jemalloc.c index 2f3f3722..385eb03a 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -610,8 +610,8 @@ malloc_init_hard(void) malloc_conf_init(); /* Register fork handlers. */ - if (pthread_atfork(jemalloc_prefork, jemalloc_postfork, - jemalloc_postfork) != 0) { + if (pthread_atfork(jemalloc_prefork, jemalloc_postfork_parent, + jemalloc_postfork_child) != 0) { malloc_write(": Error in pthread_atfork()\n"); if (opt_abort) abort(); @@ -1593,40 +1593,46 @@ jemalloc_prefork(void) unsigned i; /* Acquire all mutexes in a safe order. */ - - malloc_mutex_lock(&arenas_lock); + malloc_mutex_prefork(&arenas_lock); for (i = 0; i < narenas; i++) { if (arenas[i] != NULL) - malloc_mutex_lock(&arenas[i]->lock); + arena_prefork(arenas[i]); } - - malloc_mutex_lock(&base_mtx); - - malloc_mutex_lock(&huge_mtx); - - if (config_dss) - malloc_mutex_lock(&dss_mtx); + base_prefork(); + huge_prefork(); + chunk_dss_prefork(); } void -jemalloc_postfork(void) +jemalloc_postfork_parent(void) { unsigned i; /* Release all mutexes, now that fork() has completed. */ - - if (config_dss) - malloc_mutex_unlock(&dss_mtx); - - malloc_mutex_unlock(&huge_mtx); - - malloc_mutex_unlock(&base_mtx); - + chunk_dss_postfork_parent(); + huge_postfork_parent(); + base_postfork_parent(); for (i = 0; i < narenas; i++) { if (arenas[i] != NULL) - malloc_mutex_unlock(&arenas[i]->lock); + arena_postfork_parent(arenas[i]); } - malloc_mutex_unlock(&arenas_lock); + malloc_mutex_postfork_parent(&arenas_lock); +} + +void +jemalloc_postfork_child(void) +{ + unsigned i; + + /* Release all mutexes, now that fork() has completed. */ + chunk_dss_postfork_child(); + huge_postfork_child(); + base_postfork_child(); + for (i = 0; i < narenas; i++) { + if (arenas[i] != NULL) + arena_postfork_child(arenas[i]); + } + malloc_mutex_postfork_child(&arenas_lock); } /******************************************************************************/ diff --git a/src/mutex.c b/src/mutex.c index 0e09060e..243b7129 100644 --- a/src/mutex.c +++ b/src/mutex.c @@ -92,3 +92,29 @@ malloc_mutex_destroy(malloc_mutex_t *mutex) } #endif } + +void +malloc_mutex_prefork(malloc_mutex_t *mutex) +{ + + malloc_mutex_lock(mutex); +} + +void +malloc_mutex_postfork_parent(malloc_mutex_t *mutex) +{ + + malloc_mutex_unlock(mutex); +} + +void +malloc_mutex_postfork_child(malloc_mutex_t *mutex) +{ + + if (malloc_mutex_init(mutex)) { + malloc_printf(": Error re-initializing mutex in " + "child\n"); + if (opt_abort) + abort(); + } +} From 6508bc6931b54b50aaa0976ee7f0681482b2a80a Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 15 Mar 2012 17:07:42 -0700 Subject: [PATCH 0087/3378] Remove #include . Remove #include , which is no longer needed (now using sysconf(3) to get number of CPUs). --- include/jemalloc/internal/jemalloc_internal.h.in | 1 - 1 file changed, 1 deletion(-) diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 800d72c2..2c213120 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -2,7 +2,6 @@ #include #include #include -#include #include #include From 39006f990771518b1b7d4b8dfdfac72409ef26ca Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 16 Mar 2012 16:57:02 -0700 Subject: [PATCH 0088/3378] Look for pthreads functionality in libc. If there is no libpthread, look for pthreads functionality in libc before failing to configure pthreads. This is necessary on at least the Android platform. Reported by Mike Hommey. --- configure.ac | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 295e646d..47d5784b 100644 --- a/configure.ac +++ b/configure.ac @@ -746,8 +746,11 @@ dnl ============================================================================ dnl Configure pthreads. AC_CHECK_HEADERS([pthread.h], , [AC_MSG_ERROR([pthread.h is missing])]) +dnl Some systems may embed pthreads functionality in libc; check for libpthread +dnl first, but try libc too before failing. AC_CHECK_LIB([pthread], [pthread_create], [LIBS="$LIBS -lpthread"], - [AC_MSG_ERROR([libpthread is missing])]) + [AC_SEARCH_LIBS([pthread_create], , , + AC_MSG_ERROR([libpthread is missing]))]) CPPFLAGS="$CPPFLAGS -D_REENTRANT" From e7b8fa18d256e0bc18b61ee03b69af87fa3d7969 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 16 Mar 2012 17:09:32 -0700 Subject: [PATCH 0089/3378] Rename the "tcache.flush" mallctl to "thread.tcache.flush". --- doc/jemalloc.xml.in | 36 ++++++++++++++++++------------------ src/ctl.c | 12 ++++++------ 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 926deafe..ffc6c94e 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -1030,24 +1030,6 @@ malloc_conf = "xmalloc:true";]]> by default. - - - tcache.flush - (void) - -- - [] - - Flush calling thread's tcache. This interface releases - all cached objects and internal data structures associated with the - calling thread's thread-specific cache. Ordinarily, this interface - need not be called, since automatic periodic incremental garbage - collection occurs, and the thread cache is automatically discarded when - a thread exits. However, garbage collection is triggered by allocation - activity, so it is possible for a thread that stops - allocating/deallocating to retain its cache indefinitely, in which case - the developer may find manual flushing useful. - - thread.arena @@ -1119,6 +1101,24 @@ malloc_conf = "xmalloc:true";]]> mallctl* calls. + + + thread.tcache.flush + (void) + -- + [] + + Flush calling thread's tcache. This interface releases + all cached objects and internal data structures associated with the + calling thread's thread-specific cache. Ordinarily, this interface + need not be called, since automatic periodic incremental garbage + collection occurs, and the thread cache is automatically discarded when + a thread exits. However, garbage collection is triggered by allocation + activity, so it is possible for a thread that stops + allocating/deallocating to retain its cache indefinitely, in which case + the developer may find manual flushing useful. + + arenas.narenas diff --git a/src/ctl.c b/src/ctl.c index e7639d7f..1ef84e80 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -39,7 +39,7 @@ static int ctl_lookup(const char *name, ctl_node_t const **nodesp, CTL_PROTO(version) CTL_PROTO(epoch) -CTL_PROTO(tcache_flush) +CTL_PROTO(thread_tcache_flush) CTL_PROTO(thread_arena) CTL_PROTO(thread_allocated) CTL_PROTO(thread_allocatedp) @@ -151,7 +151,7 @@ CTL_PROTO(stats_mapped) #define INDEX(i) false, {.indexed = {i##_index}}, NULL static const ctl_node_t tcache_node[] = { - {NAME("flush"), CTL(tcache_flush)} + {NAME("flush"), CTL(thread_tcache_flush)} }; static const ctl_node_t thread_node[] = { @@ -159,7 +159,8 @@ static const ctl_node_t thread_node[] = { {NAME("allocated"), CTL(thread_allocated)}, {NAME("allocatedp"), CTL(thread_allocatedp)}, {NAME("deallocated"), CTL(thread_deallocated)}, - {NAME("deallocatedp"), CTL(thread_deallocatedp)} + {NAME("deallocatedp"), CTL(thread_deallocatedp)}, + {NAME("tcache"), CHILD(tcache)} }; static const ctl_node_t config_node[] = { @@ -334,7 +335,6 @@ static const ctl_node_t stats_node[] = { static const ctl_node_t root_node[] = { {NAME("version"), CTL(version)}, {NAME("epoch"), CTL(epoch)}, - {NAME("tcache"), CHILD(tcache)}, {NAME("thread"), CHILD(thread)}, {NAME("config"), CHILD(config)}, {NAME("opt"), CHILD(opt)}, @@ -967,8 +967,8 @@ RETURN: } static int -tcache_flush_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, - void *newp, size_t newlen) +thread_tcache_flush_ctl(const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) { int ret; tcache_t *tcache; From 7091b415bb41b9d7098a24cfe0a577299622f5db Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 19 Mar 2012 09:36:44 -0700 Subject: [PATCH 0090/3378] Fix various documentation formatting regressions. --- configure.ac | 17 +++++++++++++---- doc/jemalloc.xml.in | 38 ++++++++++++++++++++------------------ 2 files changed, 33 insertions(+), 22 deletions(-) diff --git a/configure.ac b/configure.ac index 47d5784b..4a068d09 100644 --- a/configure.ac +++ b/configure.ac @@ -87,14 +87,23 @@ AC_SUBST([MANDIR]) dnl Support for building documentation. AC_PATH_PROG([XSLTPROC], [xsltproc], , [$PATH]) +if test -d "/usr/share/xml/docbook/stylesheet/docbook-xsl" ; then + DEFAULT_XSLROOT="/usr/share/xml/docbook/stylesheet/docbook-xsl" +elif test -d "/usr/share/sgml/docbook/xsl-stylesheets" ; then + DEFAULT_XSLROOT="/usr/share/sgml/docbook/xsl-stylesheets" +else + dnl Documentation building will fail if this default gets used. + DEFAULT_XSLROOT="" +fi AC_ARG_WITH([xslroot], - [AS_HELP_STRING([--with-xslroot=], [XSL stylesheet root path])], + [AS_HELP_STRING([--with-xslroot=], [XSL stylesheet root path])], [ if test "x$with_xslroot" = "xno" ; then - XSLROOT="/usr/share/xml/docbook/stylesheet/docbook-xsl" + XSLROOT="${DEFAULT_XSLROOT}" else XSLROOT="${with_xslroot}" -fi, - XSLROOT="/usr/share/xml/docbook/stylesheet/docbook-xsl" +fi +], + XSLROOT="${DEFAULT_XSLROOT}" ) AC_SUBST([XSLROOT]) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index ffc6c94e..3cbc851f 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -377,7 +377,7 @@ for (i = 0; i < nbins; i++) { sets *rsize to the real size of the allocation if rsize is not NULL. Behavior is undefined if size is - 0. + 0. The rallocm function resizes the allocation at *ptr to be at least @@ -390,7 +390,7 @@ for (i = 0; i < nbins; i++) { language="C">size + extra) bytes, though inability to allocate the extra byte(s) will not by itself result in failure. Behavior is - undefined if size is 0, or if + undefined if size is 0, or if (size + extra > SIZE_T_MAX). @@ -409,7 +409,7 @@ for (i = 0; i < nbins; i++) { *rsize to the real size of the allocation that would result from the equivalent allocm function call. Behavior is undefined if - size is 0. + size is 0. @@ -516,53 +516,55 @@ for (i = 0; i < nbins; i++) { Size classes - - - - + + + + Category - Subcategory + Spacing Size - Small - Tiny + Small + lg [8] - 16-spaced + 16 [16, 32, 48, ..., 128] - 32-spaced + 32 [160, 192, 224, 256] - 64-spaced + 64 [320, 384, 448, 512] - 128-spaced + 128 [640, 768, 896, 1024] - 256-spaced + 256 [1280, 1536, 1792, 2048] - 512-spaced + 512 [2560, 3072, 3584] - Large + Large + 4 KiB [4 KiB, 8 KiB, 12 KiB, ..., 4072 KiB] - Huge + Huge + 4 MiB [4 MiB, 8 MiB, 12 MiB, ...] From f3e139a1ef17f300ebed0577a0ee7b3714404707 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 19 Mar 2012 09:54:20 -0700 Subject: [PATCH 0091/3378] Use AC_LINK_IFELSE() rather than AC_RUN_IFELSE() in JE_COMPILABLE(). Reported by Mike Hommey. --- configure.ac | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/configure.ac b/configure.ac index 4a068d09..c7110240 100644 --- a/configure.ac +++ b/configure.ac @@ -27,16 +27,16 @@ AC_COMPILE_IFELSE([AC_LANG_PROGRAM( dnl JE_COMPILABLE(label, hcode, mcode, rvar) dnl -dnl Use AC_RUN_IFELSE() rather than AC_COMPILE_IFELSE() so that linker errors +dnl Use AC_LINK_IFELSE() rather than AC_COMPILE_IFELSE() so that linker errors dnl cause failure. AC_DEFUN([JE_COMPILABLE], [ AC_CACHE_CHECK([whether $1 is compilable], [$4], - [AC_RUN_IFELSE([AC_LANG_PROGRAM([$2], - [$3])], - [$4=yes], - [$4=no])]) + [AC_LINK_IFELSE([AC_LANG_PROGRAM([$2], + [$3])], + [$4=yes], + [$4=no])]) ]) dnl ============================================================================ From e24c7af35d1e9d24d02166ac98cfca7cf807ff13 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 19 Mar 2012 10:21:17 -0700 Subject: [PATCH 0092/3378] Invert NO_TLS to JEMALLOC_TLS. --- configure.ac | 28 +++++++++---------- .../jemalloc/internal/jemalloc_internal.h.in | 4 +-- include/jemalloc/internal/prof.h | 2 +- include/jemalloc/internal/tcache.h | 2 +- include/jemalloc/jemalloc_defs.h.in | 2 +- src/chunk_mmap.c | 4 +-- src/jemalloc.c | 10 +++---- src/prof.c | 2 +- src/tcache.c | 2 +- 9 files changed, 28 insertions(+), 28 deletions(-) diff --git a/configure.ac b/configure.ac index c7110240..b92a7491 100644 --- a/configure.ac +++ b/configure.ac @@ -210,7 +210,7 @@ case "${host}" in *-*-darwin*) CFLAGS="$CFLAGS -fno-common -no-cpp-precomp" abi="macho" - AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE]) + AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ]) RPATH="" LD_PRELOAD_VAR="DYLD_INSERT_LIBRARIES" so="dylib" @@ -218,14 +218,14 @@ case "${host}" in *-*-freebsd*) CFLAGS="$CFLAGS" abi="elf" - AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE]) + AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ]) RPATH="-Wl,-rpath," ;; *-*-linux*) CFLAGS="$CFLAGS" CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE" abi="elf" - AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED]) + AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED], [ ]) RPATH="-Wl,-rpath," ;; *-*-netbsd*) @@ -240,7 +240,7 @@ case "${host}" in [CFLAGS="$CFLAGS"; abi="elf"], [abi="aout"]) AC_MSG_RESULT([$abi]) - AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE]) + AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ]) RPATH="-Wl,-rpath," ;; *-*-solaris2*) @@ -291,7 +291,7 @@ JE_COMPILABLE([mremap(...MREMAP_FIXED...)], [ void *p = mremap((void *)0, 0, 0, MREMAP_MAYMOVE|MREMAP_FIXED, (void *)0); ], [je_cv_mremap_fixed]) if test "x${je_cv_mremap_fixed}" = "xyes" ; then - AC_DEFINE([JEMALLOC_MREMAP_FIXED]) + AC_DEFINE([JEMALLOC_MREMAP_FIXED], [ ]) fi dnl Support optional additions to rpath. @@ -329,10 +329,10 @@ public_syms="malloc_conf malloc_message malloc calloc posix_memalign aligned_all dnl Check for allocator-related functions that should be wrapped. AC_CHECK_FUNC([memalign], - [AC_DEFINE([JEMALLOC_OVERRIDE_MEMALIGN]) + [AC_DEFINE([JEMALLOC_OVERRIDE_MEMALIGN], [ ]) public_syms="${public_syms} memalign"]) AC_CHECK_FUNC([valloc], - [AC_DEFINE([JEMALLOC_OVERRIDE_VALLOC]) + [AC_DEFINE([JEMALLOC_OVERRIDE_VALLOC], [ ]) public_syms="${public_syms} valloc"]) dnl Support the experimental API by default. @@ -458,7 +458,7 @@ fi [enable_cc_silence="0"] ) if test "x$enable_cc_silence" = "x1" ; then - AC_DEFINE([JEMALLOC_CC_SILENCE]) + AC_DEFINE([JEMALLOC_CC_SILENCE], [ ]) fi dnl Do not compile with debugging by default. @@ -808,8 +808,8 @@ AC_COMPILE_IFELSE([AC_LANG_PROGRAM( enable_tls="0") fi AC_SUBST([enable_tls]) -if test "x${enable_tls}" = "x0" ; then - AC_DEFINE_UNQUOTED([NO_TLS], [ ]) +if test "x${enable_tls}" = "x1" ; then + AC_DEFINE_UNQUOTED([JEMALLOC_TLS], [ ]) fi dnl ============================================================================ @@ -846,7 +846,7 @@ JE_COMPILABLE([Darwin OSAtomic*()], [ } ], [je_cv_osatomic]) if test "x${je_cv_osatomic}" = "xyes" ; then - AC_DEFINE([JEMALLOC_OSATOMIC]) + AC_DEFINE([JEMALLOC_OSATOMIC], [ ]) fi dnl ============================================================================ @@ -861,15 +861,15 @@ JE_COMPILABLE([Darwin OSSpin*()], [ OSSpinLockUnlock(&lock); ], [je_cv_osspin]) if test "x${je_cv_osspin}" = "xyes" ; then - AC_DEFINE([JEMALLOC_OSSPIN]) + AC_DEFINE([JEMALLOC_OSSPIN], [ ]) fi dnl ============================================================================ dnl Darwin-related configuration. if test "x${abi}" = "xmacho" ; then - AC_DEFINE([JEMALLOC_IVSALLOC]) - AC_DEFINE([JEMALLOC_ZONE]) + AC_DEFINE([JEMALLOC_IVSALLOC], [ ]) + AC_DEFINE([JEMALLOC_ZONE], [ ]) dnl The szone version jumped from 3 to 6 between the OS X 10.5.x and 10.6 dnl releases. malloc_zone_t and malloc_introspection_t have new fields in diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 2c213120..dbfd3fc8 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -357,7 +357,7 @@ extern unsigned ncpus; extern malloc_mutex_t arenas_lock; /* Protects arenas initialization. */ extern pthread_key_t arenas_tsd; -#ifndef NO_TLS +#ifdef JEMALLOC_TLS /* * Map of pthread_self() --> arenas[???], used for selecting an arena to use * for allocations. @@ -382,7 +382,7 @@ extern __thread arena_t *arenas_tls JEMALLOC_ATTR(tls_model("initial-exec")); extern arena_t **arenas; extern unsigned narenas; -#ifndef NO_TLS +#ifdef JEMALLOC_TLS extern __thread thread_allocated_t thread_allocated_tls; # define ALLOCATED_GET() (thread_allocated_tls.allocated) # define ALLOCATEDP_GET() (&thread_allocated_tls.allocated) diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index f647f637..48231928 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -180,7 +180,7 @@ extern uint64_t prof_interval; extern bool prof_promote; /* Thread-specific backtrace cache, used to reduce bt2ctx contention. */ -#ifndef NO_TLS +#ifdef JEMALLOC_TLS extern __thread prof_tdata_t *prof_tdata_tls JEMALLOC_ATTR(tls_model("initial-exec")); # define PROF_TCACHE_GET() prof_tdata_tls diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index e5f9518e..ed037cf9 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -76,7 +76,7 @@ extern ssize_t opt_lg_tcache_max; extern tcache_bin_info_t *tcache_bin_info; /* Map of thread-specific caches. */ -#ifndef NO_TLS +#ifdef JEMALLOC_TLS extern __thread tcache_t *tcache_tls JEMALLOC_ATTR(tls_model("initial-exec")); # define TCACHE_GET() tcache_tls diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index f8f80e62..434dd368 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -122,7 +122,7 @@ #undef STATIC_PAGE_SHIFT /* TLS is used to map arenas and magazine caches to threads. */ -#undef NO_TLS +#undef JEMALLOC_TLS /* * JEMALLOC_IVSALLOC enables ivsalloc(), which verifies that pointers reside diff --git a/src/chunk_mmap.c b/src/chunk_mmap.c index c7409284..6ea21180 100644 --- a/src/chunk_mmap.c +++ b/src/chunk_mmap.c @@ -8,7 +8,7 @@ * Used by chunk_alloc_mmap() to decide whether to attempt the fast path and * potentially avoid some system calls. */ -#ifndef NO_TLS +#ifdef JEMALLOC_TLS static __thread bool mmap_unaligned_tls JEMALLOC_ATTR(tls_model("initial-exec")); #define MMAP_UNALIGNED_GET() mmap_unaligned_tls @@ -225,7 +225,7 @@ bool chunk_mmap_boot(void) { -#ifdef NO_TLS +#ifndef JEMALLOC_TLS if (pthread_key_create(&mmap_unaligned_tsd, NULL) != 0) { malloc_write(": Error in pthread_key_create()\n"); return (true); diff --git a/src/jemalloc.c b/src/jemalloc.c index 385eb03a..e2b61343 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -9,11 +9,11 @@ arena_t **arenas; unsigned narenas; pthread_key_t arenas_tsd; -#ifndef NO_TLS +#ifdef JEMALLOC_TLS __thread arena_t *arenas_tls JEMALLOC_ATTR(tls_model("initial-exec")); #endif -#ifndef NO_TLS +#ifdef JEMALLOC_TLS __thread thread_allocated_t thread_allocated_tls; #endif pthread_key_t thread_allocated_tsd; @@ -58,7 +58,7 @@ size_t opt_narenas = 0; static void stats_print_atexit(void); static unsigned malloc_ncpus(void); static void arenas_cleanup(void *arg); -#ifdef NO_TLS +#ifndef JEMALLOC_TLS static void thread_allocated_cleanup(void *arg); #endif static bool malloc_conf_next(char const **opts_p, char const **k_p, @@ -251,7 +251,7 @@ arenas_cleanup(void *arg) malloc_mutex_unlock(&arenas_lock); } -#ifdef NO_TLS +#ifndef JEMALLOC_TLS static void thread_allocated_cleanup(void *arg) { @@ -656,7 +656,7 @@ malloc_init_hard(void) return (true); } -#ifdef NO_TLS +#ifndef JEMALLOC_TLS /* Initialize allocation counters before any allocations can occur. */ if (config_stats && pthread_key_create(&thread_allocated_tsd, thread_allocated_cleanup) != 0) { diff --git a/src/prof.c b/src/prof.c index 2ca66c73..9c327379 100644 --- a/src/prof.c +++ b/src/prof.c @@ -26,7 +26,7 @@ char opt_prof_prefix[PATH_MAX + 1]; uint64_t prof_interval; bool prof_promote; -#ifndef NO_TLS +#ifdef JEMALLOC_TLS __thread prof_tdata_t *prof_tdata_tls JEMALLOC_ATTR(tls_model("initial-exec")); #endif diff --git a/src/tcache.c b/src/tcache.c index 478b7f5f..f90308cd 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -11,7 +11,7 @@ tcache_bin_info_t *tcache_bin_info; static unsigned stack_nelms; /* Total stack elms per tcache. */ /* Map of thread-specific caches. */ -#ifndef NO_TLS +#ifdef JEMALLOC_TLS __thread tcache_t *tcache_tls JEMALLOC_ATTR(tls_model("initial-exec")); #endif From 650285d5be2aacd9c0e60260563dd0235b729af7 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 19 Mar 2012 10:25:27 -0700 Subject: [PATCH 0093/3378] Generalize dlsym() configuration. Generalize dlsym() configuration to succeed if dlsym() is in libc rather than libdl. --- configure.ac | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/configure.ac b/configure.ac index b92a7491..76cb6700 100644 --- a/configure.ac +++ b/configure.ac @@ -777,8 +777,10 @@ fi ) if test "x$enable_lazy_lock" = "x1" ; then AC_CHECK_HEADERS([dlfcn.h], , [AC_MSG_ERROR([dlfcn.h is missing])]) - AC_CHECK_LIB([dl], [dlopen], [LIBS="$LIBS -ldl"], - [AC_MSG_ERROR([libdl is missing])]) + AC_CHECK_FUNC([dlsym], [], + [AC_CHECK_LIB([dl], [dlsym], [LIBS="$LIBS -ldl"], + [AC_MSG_ERROR([libdl is missing])]) + ]) AC_DEFINE([JEMALLOC_LAZY_LOCK], [ ]) fi AC_SUBST([enable_lazy_lock]) From f4d0fc310effc6d1200223851897a8036123738c Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Tue, 20 Mar 2012 18:03:09 +0100 Subject: [PATCH 0094/3378] Unbreak mac after commit 4e2e3dd --- src/zone.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zone.c b/src/zone.c index 5beed5f3..a0372e1a 100644 --- a/src/zone.c +++ b/src/zone.c @@ -152,7 +152,7 @@ zone_force_unlock(malloc_zone_t *zone) { if (isthreaded) - jemalloc_postfork(); + jemalloc_postfork_parent(); } malloc_zone_t * From 154829d2560a202ef6378b089655747585e44fb5 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Tue, 20 Mar 2012 18:01:38 +0100 Subject: [PATCH 0095/3378] Improve zone support for OSX I tested a build from 10.7 run on 10.7 and 10.6, and a build from 10.6 run on 10.6. The AC_COMPILE_IFELSE limbo is to avoid running a program during configure, which presumably makes it work when cross compiling for iOS. --- configure.ac | 57 +++++++-------- src/jemalloc.c | 20 +++-- src/zone.c | 193 ++++++------------------------------------------- 3 files changed, 63 insertions(+), 207 deletions(-) diff --git a/configure.ac b/configure.ac index 76cb6700..02d4f536 100644 --- a/configure.ac +++ b/configure.ac @@ -877,39 +877,32 @@ if test "x${abi}" = "xmacho" ; then dnl releases. malloc_zone_t and malloc_introspection_t have new fields in dnl 10.6, which is the only source-level indication of the change. AC_MSG_CHECKING([malloc zone version]) - AC_TRY_COMPILE([#include -#include ], [ - static malloc_zone_t zone; - static struct malloc_introspection_t zone_introspect; + AC_DEFUN([JE_ZONE_PROGRAM], + [AC_LANG_PROGRAM( + [#include ], + [static foo[[sizeof($1) $2 sizeof(void *) * $3 ? 1 : -1]]] + )]) - zone.size = NULL; - zone.malloc = NULL; - zone.calloc = NULL; - zone.valloc = NULL; - zone.free = NULL; - zone.realloc = NULL; - zone.destroy = NULL; - zone.zone_name = "jemalloc_zone"; - zone.batch_malloc = NULL; - zone.batch_free = NULL; - zone.introspect = &zone_introspect; - zone.version = 6; - zone.memalign = NULL; - zone.free_definite_size = NULL; - - zone_introspect.enumerator = NULL; - zone_introspect.good_size = NULL; - zone_introspect.check = NULL; - zone_introspect.print = NULL; - zone_introspect.log = NULL; - zone_introspect.force_lock = NULL; - zone_introspect.force_unlock = NULL; - zone_introspect.statistics = NULL; - zone_introspect.zone_locked = NULL; -], [AC_DEFINE_UNQUOTED([JEMALLOC_ZONE_VERSION], [6]) - AC_MSG_RESULT([6])], - [AC_DEFINE_UNQUOTED([JEMALLOC_ZONE_VERSION], [3]) - AC_MSG_RESULT([3])]) + AC_COMPILE_IFELSE([JE_ZONE_PROGRAM(malloc_zone_t,==,14)],[JEMALLOC_ZONE_VERSION=3],[ + AC_COMPILE_IFELSE([JE_ZONE_PROGRAM(malloc_zone_t,==,15)],[JEMALLOC_ZONE_VERSION=5],[ + AC_COMPILE_IFELSE([JE_ZONE_PROGRAM(malloc_zone_t,==,16)],[ + AC_COMPILE_IFELSE([JE_ZONE_PROGRAM(malloc_introspection_t,==,9)],[JEMALLOC_ZONE_VERSION=6],[ + AC_COMPILE_IFELSE([JE_ZONE_PROGRAM(malloc_introspection_t,==,13)],[JEMALLOC_ZONE_VERSION=7],[JEMALLOC_ZONE_VERSION=] + )])],[ + AC_COMPILE_IFELSE([JE_ZONE_PROGRAM(malloc_zone_t,==,17)],[JEMALLOC_ZONE_VERSION=8],[ + AC_COMPILE_IFELSE([JE_ZONE_PROGRAM(malloc_zone_t,>,17)],[JEMALLOC_ZONE_VERSION=9],[JEMALLOC_ZONE_VERSION=] + )])])])]) + if test "x${JEMALLOC_ZONE_VERSION}" = "x"; then + AC_MSG_RESULT([unsupported]) + AC_MSG_ERROR([Unsupported malloc zone version]) + fi + if test "${JEMALLOC_ZONE_VERSION}" = 9; then + JEMALLOC_ZONE_VERSION=8 + AC_MSG_RESULT([> 8]) + else + AC_MSG_RESULT([$JEMALLOC_ZONE_VERSION]) + fi + AC_DEFINE_UNQUOTED(JEMALLOC_ZONE_VERSION, [$JEMALLOC_ZONE_VERSION]) fi dnl ============================================================================ diff --git a/src/jemalloc.c b/src/jemalloc.c index e2b61343..2610452e 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -747,15 +747,23 @@ malloc_init_hard(void) arenas[0] = init_arenas[0]; #ifdef JEMALLOC_ZONE - /* Register the custom zone. */ - malloc_zone_register(create_zone()); + /* Register the custom zone. At this point it won't be the default. */ + malloc_zone_t *jemalloc_zone = create_zone(); + malloc_zone_register(jemalloc_zone); /* - * Convert the default szone to an "overlay zone" that is capable of - * deallocating szone-allocated objects, but allocating new objects - * from jemalloc. + * Unregister and reregister the default zone. On OSX >= 10.6, + * unregistering takes the last registered zone and places it at the + * location of the specified zone. Unregistering the default zone thus + * makes the last registered one the default. On OSX < 10.6, + * unregistering shifts all registered zones. The first registered zone + * then becomes the default. */ - szone2ozone(malloc_default_zone()); + do { + malloc_zone_t *default_zone = malloc_default_zone(); + malloc_zone_unregister(default_zone); + malloc_zone_register(default_zone); + } while (malloc_default_zone() != jemalloc_zone); #endif malloc_initialized = true; diff --git a/src/zone.c b/src/zone.c index a0372e1a..a8f09c98 100644 --- a/src/zone.c +++ b/src/zone.c @@ -6,8 +6,8 @@ /******************************************************************************/ /* Data. */ -static malloc_zone_t zone, szone; -static struct malloc_introspection_t zone_introspect, ozone_introspect; +static malloc_zone_t zone; +static struct malloc_introspection_t zone_introspect; /******************************************************************************/ /* Function prototypes for non-inline static functions. */ @@ -18,8 +18,10 @@ static void *zone_calloc(malloc_zone_t *zone, size_t num, size_t size); static void *zone_valloc(malloc_zone_t *zone, size_t size); static void zone_free(malloc_zone_t *zone, void *ptr); static void *zone_realloc(malloc_zone_t *zone, void *ptr, size_t size); -#if (JEMALLOC_ZONE_VERSION >= 6) +#if (JEMALLOC_ZONE_VERSION >= 5) static void *zone_memalign(malloc_zone_t *zone, size_t alignment, +#endif +#if (JEMALLOC_ZONE_VERSION >= 6) size_t size); static void zone_free_definite_size(malloc_zone_t *zone, void *ptr, size_t size); @@ -28,19 +30,6 @@ static void *zone_destroy(malloc_zone_t *zone); static size_t zone_good_size(malloc_zone_t *zone, size_t size); static void zone_force_lock(malloc_zone_t *zone); static void zone_force_unlock(malloc_zone_t *zone); -static size_t ozone_size(malloc_zone_t *zone, void *ptr); -static void ozone_free(malloc_zone_t *zone, void *ptr); -static void *ozone_realloc(malloc_zone_t *zone, void *ptr, size_t size); -static unsigned ozone_batch_malloc(malloc_zone_t *zone, size_t size, - void **results, unsigned num_requested); -static void ozone_batch_free(malloc_zone_t *zone, void **to_be_freed, - unsigned num); -#if (JEMALLOC_ZONE_VERSION >= 6) -static void ozone_free_definite_size(malloc_zone_t *zone, void *ptr, - size_t size); -#endif -static void ozone_force_lock(malloc_zone_t *zone); -static void ozone_force_unlock(malloc_zone_t *zone); /******************************************************************************/ /* @@ -101,7 +90,7 @@ zone_realloc(malloc_zone_t *zone, void *ptr, size_t size) return (je_realloc(ptr, size)); } -#if (JEMALLOC_ZONE_VERSION >= 6) +#if (JEMALLOC_ZONE_VERSION >= 5) static void * zone_memalign(malloc_zone_t *zone, size_t alignment, size_t size) { @@ -111,7 +100,9 @@ zone_memalign(malloc_zone_t *zone, size_t alignment, size_t size) return (ret); } +#endif +#if (JEMALLOC_ZONE_VERSION >= 6) static void zone_free_definite_size(malloc_zone_t *zone, void *ptr, size_t size) { @@ -171,10 +162,15 @@ create_zone(void) zone.batch_free = NULL; zone.introspect = &zone_introspect; zone.version = JEMALLOC_ZONE_VERSION; -#if (JEMALLOC_ZONE_VERSION >= 6) +#if (JEMALLOC_ZONE_VERSION >= 5) zone.memalign = zone_memalign; +#endif +#if (JEMALLOC_ZONE_VERSION >= 6) zone.free_definite_size = zone_free_definite_size; #endif +#if (JEMALLOC_ZONE_VERSION >= 8) + zone.pressure_relief = NULL; +#endif zone_introspect.enumerator = NULL; zone_introspect.good_size = (void *)zone_good_size; @@ -187,156 +183,15 @@ create_zone(void) #if (JEMALLOC_ZONE_VERSION >= 6) zone_introspect.zone_locked = NULL; #endif - +#if (JEMALLOC_ZONE_VERSION >= 7) + zone_introspect.enable_discharge_checking = NULL; + zone_introspect.disable_discharge_checking = NULL; + zone_introspect.discharge = NULL; +#ifdef __BLOCKS__ + zone_introspect.enumerate_discharged_pointers = NULL; +#else + zone_introspect.enumerate_unavailable_without_blocks = NULL; +#endif +#endif return (&zone); } - -static size_t -ozone_size(malloc_zone_t *zone, void *ptr) -{ - size_t ret; - - ret = ivsalloc(ptr); - if (ret == 0) - ret = szone.size(zone, ptr); - - return (ret); -} - -static void -ozone_free(malloc_zone_t *zone, void *ptr) -{ - - if (ivsalloc(ptr) != 0) - je_free(ptr); - else { - size_t size = szone.size(zone, ptr); - if (size != 0) - (szone.free)(zone, ptr); - } -} - -static void * -ozone_realloc(malloc_zone_t *zone, void *ptr, size_t size) -{ - size_t oldsize; - - if (ptr == NULL) - return (je_malloc(size)); - - oldsize = ivsalloc(ptr); - if (oldsize != 0) - return (je_realloc(ptr, size)); - else { - oldsize = szone.size(zone, ptr); - if (oldsize == 0) - return (je_malloc(size)); - else { - void *ret = je_malloc(size); - if (ret != NULL) { - memcpy(ret, ptr, (oldsize < size) ? oldsize : - size); - (szone.free)(zone, ptr); - } - return (ret); - } - } -} - -static unsigned -ozone_batch_malloc(malloc_zone_t *zone, size_t size, void **results, - unsigned num_requested) -{ - - /* Don't bother implementing this interface, since it isn't required. */ - return (0); -} - -static void -ozone_batch_free(malloc_zone_t *zone, void **to_be_freed, unsigned num) -{ - unsigned i; - - for (i = 0; i < num; i++) - ozone_free(zone, to_be_freed[i]); -} - -#if (JEMALLOC_ZONE_VERSION >= 6) -static void -ozone_free_definite_size(malloc_zone_t *zone, void *ptr, size_t size) -{ - - if (ivsalloc(ptr) != 0) { - assert(ivsalloc(ptr) == size); - je_free(ptr); - } else { - assert(size == szone.size(zone, ptr)); - szone.free_definite_size(zone, ptr, size); - } -} -#endif - -static void -ozone_force_lock(malloc_zone_t *zone) -{ - - /* jemalloc locking is taken care of by the normal jemalloc zone. */ - szone.introspect->force_lock(zone); -} - -static void -ozone_force_unlock(malloc_zone_t *zone) -{ - - /* jemalloc locking is taken care of by the normal jemalloc zone. */ - szone.introspect->force_unlock(zone); -} - -/* - * Overlay the default scalable zone (szone) such that existing allocations are - * drained, and further allocations come from jemalloc. This is necessary - * because Core Foundation directly accesses and uses the szone before the - * jemalloc library is even loaded. - */ -void -szone2ozone(malloc_zone_t *zone) -{ - - /* - * Stash a copy of the original szone so that we can call its - * functions as needed. Note that the internally, the szone stores its - * bookkeeping data structures immediately following the malloc_zone_t - * header, so when calling szone functions, we need to pass a pointer - * to the original zone structure. - */ - memcpy(&szone, zone, sizeof(malloc_zone_t)); - - zone->size = (void *)ozone_size; - zone->malloc = (void *)zone_malloc; - zone->calloc = (void *)zone_calloc; - zone->valloc = (void *)zone_valloc; - zone->free = (void *)ozone_free; - zone->realloc = (void *)ozone_realloc; - zone->destroy = (void *)zone_destroy; - zone->zone_name = "jemalloc_ozone"; - zone->batch_malloc = ozone_batch_malloc; - zone->batch_free = ozone_batch_free; - zone->introspect = &ozone_introspect; - zone->version = JEMALLOC_ZONE_VERSION; -#if (JEMALLOC_ZONE_VERSION >= 6) - zone->memalign = zone_memalign; - zone->free_definite_size = ozone_free_definite_size; -#endif - - ozone_introspect.enumerator = NULL; - ozone_introspect.good_size = (void *)zone_good_size; - ozone_introspect.check = NULL; - ozone_introspect.print = NULL; - ozone_introspect.log = NULL; - ozone_introspect.force_lock = (void *)ozone_force_lock; - ozone_introspect.force_unlock = (void *)ozone_force_unlock; - ozone_introspect.statistics = NULL; -#if (JEMALLOC_ZONE_VERSION >= 6) - ozone_introspect.zone_locked = NULL; -#endif -} From cd9a1346e96f71bdecdc654ea50fc62d76371e74 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 21 Mar 2012 18:33:03 -0700 Subject: [PATCH 0096/3378] Implement tsd. Implement tsd, which is a TLS/TSD abstraction that uses one or both internally. Modify bootstrapping such that no tsd's are utilized until allocation is safe. Remove malloc_[v]tprintf(), and use malloc_snprintf() instead. Fix %p argument size handling in malloc_vsnprintf(). Fix a long-standing statistics-related bug in the "thread.arena" mallctl that could cause crashes due to linked list corruption. --- Makefile.in | 3 +- configure.ac | 20 ++ include/jemalloc/internal/arena.h | 16 +- include/jemalloc/internal/chunk.h | 3 +- .../jemalloc/internal/jemalloc_internal.h.in | 82 ++--- include/jemalloc/internal/private_namespace.h | 2 - include/jemalloc/internal/prof.h | 42 +-- include/jemalloc/internal/tcache.h | 35 +- include/jemalloc/internal/tsd.h | 319 ++++++++++++++++++ include/jemalloc/internal/util.h | 17 +- include/jemalloc/jemalloc_defs.h.in | 9 + src/chunk.c | 14 +- src/chunk_mmap.c | 40 +-- src/ctl.c | 42 ++- src/jemalloc.c | 171 ++++------ src/prof.c | 45 ++- src/tcache.c | 103 +++--- src/tsd.c | 72 ++++ src/util.c | 33 +- 19 files changed, 705 insertions(+), 363 deletions(-) create mode 100644 include/jemalloc/internal/tsd.h create mode 100644 src/tsd.c diff --git a/Makefile.in b/Makefile.in index 01ed083c..494ac9a6 100644 --- a/Makefile.in +++ b/Makefile.in @@ -50,7 +50,8 @@ CSRCS := @srcroot@src/jemalloc.c @srcroot@src/arena.c @srcroot@src/atomic.c \ @srcroot@src/ckh.c @srcroot@src/ctl.c @srcroot@src/extent.c \ @srcroot@src/hash.c @srcroot@src/huge.c @srcroot@src/mb.c \ @srcroot@src/mutex.c @srcroot@src/prof.c @srcroot@src/rtree.c \ - @srcroot@src/stats.c @srcroot@src/tcache.c @srcroot@src/util.c + @srcroot@src/stats.c @srcroot@src/tcache.c @srcroot@src/util.c \ + @srcroot@src/tsd.c ifeq (macho, @abi@) CSRCS += @srcroot@src/zone.c endif diff --git a/configure.ac b/configure.ac index 02d4f536..44ff6eec 100644 --- a/configure.ac +++ b/configure.ac @@ -763,6 +763,20 @@ AC_CHECK_LIB([pthread], [pthread_create], [LIBS="$LIBS -lpthread"], CPPFLAGS="$CPPFLAGS -D_REENTRANT" +dnl Check whether the BSD-specific _malloc_thread_cleanup() exists. If so, use +dnl it rather than pthreads TSD cleanup functions to support cleanup during +dnl thread exit, in order to avoid pthreads library recursion during +dnl bootstrapping. +force_tls="0" +AC_CHECK_FUNC([_malloc_thread_cleanup], + [have__malloc_thread_cleanup="1"], + [have__malloc_thread_cleanup="0"] + ) +if test "x$have__malloc_thread_cleanup" = "x1" ; then + AC_DEFINE([JEMALLOC_MALLOC_THREAD_CLEANUP], [ ]) + force_tls="1" +fi + dnl Disable lazy locking by default. AC_ARG_ENABLE([lazy_lock], [AS_HELP_STRING([--enable-lazy-lock], @@ -795,6 +809,10 @@ fi , enable_tls="1" ) +if test "x${enable_tls}" = "x0" -a "x${force_tls}" = "x1" ; then + AC_MSG_RESULT([Forcing TLS to avoid allocator/threading bootstrap issues]) + enable_tls="1" +fi if test "x${enable_tls}" = "x1" ; then AC_MSG_CHECKING([for TLS]) AC_COMPILE_IFELSE([AC_LANG_PROGRAM( @@ -812,6 +830,8 @@ fi AC_SUBST([enable_tls]) if test "x${enable_tls}" = "x1" ; then AC_DEFINE_UNQUOTED([JEMALLOC_TLS], [ ]) +elif test "x${force_tls}" = "x1" ; then + AC_MSG_ERROR([Failed to configure TLS, which is mandatory for correct function]) fi dnl ============================================================================ diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 1609adcf..c5214893 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -391,6 +391,7 @@ unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, prof_ctx_t *arena_prof_ctx_get(const void *ptr); void arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx); void *arena_malloc(size_t size, bool zero); +void *arena_malloc_prechosen(arena_t *arena, size_t size, bool zero); void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr); #endif @@ -552,7 +553,7 @@ arena_malloc(size_t size, bool zero) tcache_t *tcache; assert(size != 0); - assert(QUANTUM_CEILING(size) <= arena_maxclass); + assert(size <= arena_maxclass); if (size <= SMALL_MAXCLASS) { if ((tcache = tcache_get()) != NULL) @@ -571,6 +572,19 @@ arena_malloc(size_t size, bool zero) } } +JEMALLOC_INLINE void * +arena_malloc_prechosen(arena_t *arena, size_t size, bool zero) +{ + + assert(size != 0); + assert(size <= arena_maxclass); + + if (size <= SMALL_MAXCLASS) + return (arena_malloc_small(arena, size, zero)); + else + return (arena_malloc_large(arena, size, zero)); +} + JEMALLOC_INLINE void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr) { diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h index 9a62ba18..8e24e8f3 100644 --- a/include/jemalloc/internal/chunk.h +++ b/include/jemalloc/internal/chunk.h @@ -44,7 +44,8 @@ extern size_t arena_maxclass; /* Max size class for arenas. */ void *chunk_alloc(size_t size, bool base, bool *zero); void chunk_dealloc(void *chunk, size_t size, bool unmap); -bool chunk_boot(void); +bool chunk_boot0(void); +bool chunk_boot1(void); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index dbfd3fc8..387aabbe 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -289,6 +289,7 @@ static const bool config_ivsalloc = #include "jemalloc/internal/stats.h" #include "jemalloc/internal/ctl.h" #include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/tsd.h" #include "jemalloc/internal/mb.h" #include "jemalloc/internal/extent.h" #include "jemalloc/internal/arena.h" @@ -316,6 +317,7 @@ static const bool config_ivsalloc = #include "jemalloc/internal/stats.h" #include "jemalloc/internal/ctl.h" #include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/tsd.h" #include "jemalloc/internal/mb.h" #include "jemalloc/internal/bitmap.h" #include "jemalloc/internal/extent.h" @@ -335,6 +337,11 @@ typedef struct { uint64_t allocated; uint64_t deallocated; } thread_allocated_t; +/* + * The JEMALLOC_CONCAT() wrapper is necessary to pass {0, 0} via a cpp macro + * argument. + */ +#define THREAD_ALLOCATED_INITIALIZER JEMALLOC_CONCAT({0, 0}) #undef JEMALLOC_H_STRUCTS /******************************************************************************/ @@ -356,25 +363,6 @@ extern size_t lg_pagesize; extern unsigned ncpus; extern malloc_mutex_t arenas_lock; /* Protects arenas initialization. */ -extern pthread_key_t arenas_tsd; -#ifdef JEMALLOC_TLS -/* - * Map of pthread_self() --> arenas[???], used for selecting an arena to use - * for allocations. - */ -extern __thread arena_t *arenas_tls JEMALLOC_ATTR(tls_model("initial-exec")); -# define ARENA_GET() arenas_tls -# define ARENA_SET(v) do { \ - arenas_tls = (v); \ - pthread_setspecific(arenas_tsd, (void *)(v)); \ -} while (0) -#else -# define ARENA_GET() ((arena_t *)pthread_getspecific(arenas_tsd)) -# define ARENA_SET(v) do { \ - pthread_setspecific(arenas_tsd, (void *)(v)); \ -} while (0) -#endif - /* * Arenas that are used to service external requests. Not all elements of the * arenas array are necessarily used; arenas are created lazily as needed. @@ -382,31 +370,8 @@ extern __thread arena_t *arenas_tls JEMALLOC_ATTR(tls_model("initial-exec")); extern arena_t **arenas; extern unsigned narenas; -#ifdef JEMALLOC_TLS -extern __thread thread_allocated_t thread_allocated_tls; -# define ALLOCATED_GET() (thread_allocated_tls.allocated) -# define ALLOCATEDP_GET() (&thread_allocated_tls.allocated) -# define DEALLOCATED_GET() (thread_allocated_tls.deallocated) -# define DEALLOCATEDP_GET() (&thread_allocated_tls.deallocated) -# define ALLOCATED_ADD(a, d) do { \ - thread_allocated_tls.allocated += a; \ - thread_allocated_tls.deallocated += d; \ -} while (0) -#else -# define ALLOCATED_GET() (thread_allocated_get()->allocated) -# define ALLOCATEDP_GET() (&thread_allocated_get()->allocated) -# define DEALLOCATED_GET() (thread_allocated_get()->deallocated) -# define DEALLOCATEDP_GET() (&thread_allocated_get()->deallocated) -# define ALLOCATED_ADD(a, d) do { \ - thread_allocated_t *thread_allocated = thread_allocated_get(); \ - thread_allocated->allocated += (a); \ - thread_allocated->deallocated += (d); \ -} while (0) -#endif -extern pthread_key_t thread_allocated_tsd; -thread_allocated_t *thread_allocated_get_hard(void); - arena_t *arenas_extend(unsigned ind); +void arenas_cleanup(void *arg); arena_t *choose_arena_hard(void); void jemalloc_prefork(void); void jemalloc_postfork_parent(void); @@ -420,6 +385,7 @@ void jemalloc_postfork_child(void); #include "jemalloc/internal/stats.h" #include "jemalloc/internal/ctl.h" #include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/tsd.h" #include "jemalloc/internal/mb.h" #include "jemalloc/internal/bitmap.h" #include "jemalloc/internal/extent.h" @@ -447,6 +413,7 @@ void jemalloc_postfork_child(void); #include "jemalloc/internal/stats.h" #include "jemalloc/internal/ctl.h" #include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/tsd.h" #include "jemalloc/internal/mb.h" #include "jemalloc/internal/extent.h" #include "jemalloc/internal/base.h" @@ -454,13 +421,21 @@ void jemalloc_postfork_child(void); #include "jemalloc/internal/huge.h" #ifndef JEMALLOC_ENABLE_INLINE +malloc_tsd_protos(JEMALLOC_ATTR(unused), arenas, arena_t *) + size_t s2u(size_t size); size_t sa2u(size_t size, size_t alignment, size_t *run_size_p); arena_t *choose_arena(void); -thread_allocated_t *thread_allocated_get(void); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) +/* + * Map of pthread_self() --> arenas[???], used for selecting an arena to use + * for allocations. + */ +malloc_tsd_externs(arenas, arena_t *) +malloc_tsd_funcs(JEMALLOC_INLINE, arenas, arena_t *, NULL, arenas_cleanup) + /* * Compute usable size that would result from allocating an object with the * specified size. @@ -572,25 +547,13 @@ choose_arena(void) { arena_t *ret; - ret = ARENA_GET(); - if (ret == NULL) { + if ((ret = *arenas_tsd_get()) == NULL) { ret = choose_arena_hard(); assert(ret != NULL); } return (ret); } - -JEMALLOC_INLINE thread_allocated_t * -thread_allocated_get(void) -{ - thread_allocated_t *thread_allocated = (thread_allocated_t *) - pthread_getspecific(thread_allocated_tsd); - - if (thread_allocated == NULL) - return (thread_allocated_get_hard()); - return (thread_allocated); -} #endif #include "jemalloc/internal/bitmap.h" @@ -611,6 +574,7 @@ size_t ivsalloc(const void *ptr); void idalloc(void *ptr); void *iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, bool no_move); +malloc_tsd_protos(JEMALLOC_ATTR(unused), thread_allocated, thread_allocated_t) #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) @@ -787,6 +751,10 @@ iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, } } } + +malloc_tsd_externs(thread_allocated, thread_allocated_t) +malloc_tsd_funcs(JEMALLOC_INLINE, thread_allocated, thread_allocated_t, + THREAD_ALLOCATED_INITIALIZER, malloc_tsd_no_cleanup) #endif #include "jemalloc/internal/prof.h" diff --git a/include/jemalloc/internal/private_namespace.h b/include/jemalloc/internal/private_namespace.h index e7370fec..7103e680 100644 --- a/include/jemalloc/internal/private_namespace.h +++ b/include/jemalloc/internal/private_namespace.h @@ -155,10 +155,8 @@ #define malloc_mutex_unlock JEMALLOC_N(malloc_mutex_unlock) #define malloc_printf JEMALLOC_N(malloc_printf) #define malloc_snprintf JEMALLOC_N(malloc_snprintf) -#define malloc_tprintf JEMALLOC_N(malloc_tprintf) #define malloc_vcprintf JEMALLOC_N(malloc_vcprintf) #define malloc_vsnprintf JEMALLOC_N(malloc_vsnprintf) -#define malloc_vtprintf JEMALLOC_N(malloc_vtprintf) #define malloc_write JEMALLOC_N(malloc_write) #define mb_write JEMALLOC_N(mb_write) #define opt_abort JEMALLOC_N(opt_abort) diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index 48231928..231a3876 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -23,10 +23,13 @@ typedef struct prof_tdata_s prof_tdata_t; #define PROF_TCMAX 1024 /* Initial hash table size. */ -#define PROF_CKH_MINITEMS 64 +#define PROF_CKH_MINITEMS 64 /* Size of memory buffer to use when writing dump files. */ -#define PROF_DUMP_BUF_SIZE 65536 +#define PROF_DUMP_BUFSIZE 65536 + +/* Size of stack-allocated buffer used by prof_printf(). */ +#define PROF_PRINTF_BUFSIZE 128 #endif /* JEMALLOC_H_TYPES */ /******************************************************************************/ @@ -179,29 +182,6 @@ extern uint64_t prof_interval; */ extern bool prof_promote; -/* Thread-specific backtrace cache, used to reduce bt2ctx contention. */ -#ifdef JEMALLOC_TLS -extern __thread prof_tdata_t *prof_tdata_tls - JEMALLOC_ATTR(tls_model("initial-exec")); -# define PROF_TCACHE_GET() prof_tdata_tls -# define PROF_TCACHE_SET(v) do { \ - prof_tdata_tls = (v); \ - pthread_setspecific(prof_tdata_tsd, (void *)(v)); \ -} while (0) -#else -# define PROF_TCACHE_GET() \ - ((prof_tdata_t *)pthread_getspecific(prof_tdata_tsd)) -# define PROF_TCACHE_SET(v) do { \ - pthread_setspecific(prof_tdata_tsd, (void *)(v)); \ -} while (0) -#endif -/* - * Same contents as b2cnt_tls, but initialized such that the TSD destructor is - * called when a thread exits, so that prof_tdata_tls contents can be merged, - * unlinked, and deallocated. - */ -extern pthread_key_t prof_tdata_tsd; - void bt_init(prof_bt_t *bt, void **vec); void prof_backtrace(prof_bt_t *bt, unsigned nignore); prof_thr_cnt_t *prof_lookup(prof_bt_t *bt); @@ -209,6 +189,7 @@ void prof_idump(void); bool prof_mdump(const char *filename); void prof_gdump(void); prof_tdata_t *prof_tdata_init(void); +void prof_tdata_cleanup(void *arg); void prof_boot0(void); void prof_boot1(void); bool prof_boot2(void); @@ -223,7 +204,7 @@ bool prof_boot2(void); \ assert(size == s2u(size)); \ \ - prof_tdata = PROF_TCACHE_GET(); \ + prof_tdata = *prof_tdata_tsd_get(); \ if (prof_tdata == NULL) { \ prof_tdata = prof_tdata_init(); \ if (prof_tdata == NULL) { \ @@ -270,6 +251,8 @@ bool prof_boot2(void); } while (0) #ifndef JEMALLOC_ENABLE_INLINE +malloc_tsd_protos(JEMALLOC_ATTR(unused), prof_tdata, prof_tdata_t *) + void prof_sample_threshold_update(prof_tdata_t *prof_tdata); prof_ctx_t *prof_ctx_get(const void *ptr); void prof_ctx_set(const void *ptr, prof_ctx_t *ctx); @@ -281,6 +264,11 @@ void prof_free(const void *ptr, size_t size); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_)) +/* Thread-specific backtrace cache, used to reduce bt2ctx contention. */ +malloc_tsd_externs(prof_tdata, prof_tdata_t *) +malloc_tsd_funcs(JEMALLOC_INLINE, prof_tdata, prof_tdata_t *, NULL, + prof_tdata_cleanup) + JEMALLOC_INLINE void prof_sample_threshold_update(prof_tdata_t *prof_tdata) { @@ -359,7 +347,7 @@ prof_sample_accum_update(size_t size) /* Sampling logic is unnecessary if the interval is 1. */ assert(opt_lg_prof_sample != 0); - prof_tdata = PROF_TCACHE_GET(); + prof_tdata = *prof_tdata_tsd_get(); assert(prof_tdata != NULL); /* Take care to avoid integer overflow. */ diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index ed037cf9..30e63a50 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -75,23 +75,6 @@ extern ssize_t opt_lg_tcache_max; extern tcache_bin_info_t *tcache_bin_info; -/* Map of thread-specific caches. */ -#ifdef JEMALLOC_TLS -extern __thread tcache_t *tcache_tls - JEMALLOC_ATTR(tls_model("initial-exec")); -# define TCACHE_GET() tcache_tls -# define TCACHE_SET(v) do { \ - tcache_tls = (tcache_t *)(v); \ - pthread_setspecific(tcache_tsd, (void *)(v)); \ -} while (0) -#else -# define TCACHE_GET() ((tcache_t *)pthread_getspecific(tcache_tsd)) -# define TCACHE_SET(v) do { \ - pthread_setspecific(tcache_tsd, (void *)(v)); \ -} while (0) -#endif -extern pthread_key_t tcache_tsd; - /* * Number of tcache bins. There are NBINS small-object bins, plus 0 or more * large-object bins. @@ -105,18 +88,24 @@ void tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem, tcache_t *tcache); void tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem, tcache_t *tcache); +void tcache_arena_associate(tcache_t *tcache, arena_t *arena); +void tcache_arena_dissociate(tcache_t *tcache); tcache_t *tcache_create(arena_t *arena); void *tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind); void tcache_destroy(tcache_t *tcache); +void tcache_thread_cleanup(void *arg); void tcache_stats_merge(tcache_t *tcache, arena_t *arena); -bool tcache_boot(void); +bool tcache_boot0(void); +bool tcache_boot1(void); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ #ifdef JEMALLOC_H_INLINES #ifndef JEMALLOC_ENABLE_INLINE +malloc_tsd_protos(JEMALLOC_ATTR(unused), tcache, tcache_t *) + void tcache_event(tcache_t *tcache); tcache_t *tcache_get(void); void *tcache_alloc_easy(tcache_bin_t *tbin); @@ -127,6 +116,11 @@ void tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TCACHE_C_)) +/* Map of thread-specific caches. */ +malloc_tsd_externs(tcache, tcache_t *) +malloc_tsd_funcs(JEMALLOC_INLINE, tcache, tcache_t *, NULL, + tcache_thread_cleanup) + JEMALLOC_INLINE tcache_t * tcache_get(void) { @@ -139,7 +133,7 @@ tcache_get(void) else if (opt_tcache == false) return (NULL); - tcache = TCACHE_GET(); + tcache = *tcache_tsd_get(); if ((uintptr_t)tcache <= (uintptr_t)2) { if (tcache == NULL) { tcache = tcache_create(choose_arena()); @@ -152,7 +146,8 @@ tcache_get(void) * called after the tcache_thread_cleanup() was * called. */ - TCACHE_SET((uintptr_t)2); + tcache = (tcache_t *)(uintptr_t)2; + tcache_tsd_set(&tcache); } return (NULL); } diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h new file mode 100644 index 00000000..5a174acd --- /dev/null +++ b/include/jemalloc/internal/tsd.h @@ -0,0 +1,319 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +/* Maximum number of malloc_tsd users with cleanup functions. */ +#define MALLOC_TSD_CLEANUPS_MAX 8 + +typedef struct malloc_tsd_cleanup_s malloc_tsd_cleanup_t; +struct malloc_tsd_cleanup_s { + bool (*f)(void *); + void *arg; +}; + +/* + * TLS/TSD-agnostic macro-based implementation of thread-specific data. There + * are four macros that support (at least) three use cases: file-private, + * library-private, and library-private inlined. Following is an example + * library-private tsd variable: + * + * In example.h: + * typedef struct { + * int x; + * int y; + * } example_t; + * #define EX_INITIALIZER JEMALLOC_CONCAT({0, 0}) + * malloc_tsd_protos(, example, example_t *) + * malloc_tsd_externs(example, example_t *) + * In example.c: + * malloc_tsd_data(, example, example_t *, EX_INITIALIZER) + * malloc_tsd_funcs(, example, example_t *, EX_INITIALIZER, + * example_tsd_cleanup) + * + * The result is a set of generated functions, e.g.: + * + * bool example_tsd_boot(void) {...} + * example_t **example_tsd_get() {...} + * void example_tsd_set(example_t **val) {...} + * + * Note that all of the functions deal in terms of (a_type *) rather than + * (a_type) so that it is possible to support non-pointer types (unlike + * pthreads TSD). example_tsd_cleanup() is passed an (a_type *) pointer that is + * cast to (void *). This means that the cleanup function needs to cast *and* + * dereference the function argument, e.g.: + * + * void + * example_tsd_cleanup(void *arg) + * { + * example_t *example = *(example_t **)arg; + * + * [...] + * if ([want the cleanup function to be called again]) { + * example_tsd_set(&example); + * } + * } + * + * If example_tsd_set() is called within example_tsd_cleanup(), it will be + * called again. This is similar to how pthreads TSD destruction works, except + * that pthreads only calls the cleanup function again if the value was set to + * non-NULL. + */ + +/* malloc_tsd_protos(). */ +#define malloc_tsd_protos(a_attr, a_name, a_type) \ +a_attr bool \ +a_name##_tsd_boot(void); \ +a_attr a_type * \ +a_name##_tsd_get(void); \ +a_attr void \ +a_name##_tsd_set(a_type *val); + +/* malloc_tsd_externs(). */ +#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP +#define malloc_tsd_externs(a_name, a_type) \ +extern __thread a_type a_name##_tls; \ +extern __thread bool *a_name##_initialized; \ +extern bool a_name##_booted; +#elif (defined(JEMALLOC_TLS)) +#define malloc_tsd_externs(a_name, a_type) \ +extern __thread a_type a_name##_tls; \ +extern pthread_key_t a_name##_tsd; \ +extern bool a_name##_booted; +#else +#define malloc_tsd_externs(a_name, a_type) \ +extern pthread_key_t a_name##_tsd; \ +extern bool a_name##_booted; +#endif + +/* malloc_tsd_data(). */ +#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP +#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ +a_attr __thread a_type JEMALLOC_ATTR(tls_model("initial-exec")) \ + a_name##_tls = a_initializer; \ +a_attr __thread bool JEMALLOC_ATTR(tls_model("initial-exec")) \ + a_name##_initialized = false; \ +a_attr bool a_name##_booted = false; +#elif (defined(JEMALLOC_TLS)) +#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ +a_attr __thread a_type JEMALLOC_ATTR(tls_model("initial-exec")) \ + a_name##_tls = a_initializer; \ +a_attr pthread_key_t a_name##_tsd; \ +a_attr bool a_name##_booted = false; +#else +#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ +a_attr pthread_key_t a_name##_tsd; \ +a_attr bool a_name##_booted = false; +#endif + +/* malloc_tsd_funcs(). */ +#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP +#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ + a_cleanup) \ +/* Initialization/cleanup. */ \ +a_attr void \ +a_name##_tsd_cleanup_wrapper(void *arg) \ +{ \ + \ +} \ +bool \ +a_name##_tsd_cleanup_pending(void *arg) \ +{ \ + bool (*cleanup)(void *) = arg; \ + \ + if (a_name##_initialized) { \ + a_name##_initialized = false; \ + cleanup(&a_name##_tls); \ + } \ + return (a_name##_initialized); \ +} \ +a_attr bool \ +a_name##_tsd_boot(void) \ +{ \ + \ + if (a_cleanup != malloc_tsd_no_cleanup) { \ + malloc_tsd_cleanup_register( \ + &a_name##_tsd_cleanup_pending, a_cleanup); \ + } \ + a_name##_booted = true; \ + return (false); \ +} \ +/* Get/set. */ \ +a_attr a_type * \ +a_name##_tsd_get(void) \ +{ \ + \ + assert(a_name##_booted); \ + return (&a_name##_tls); \ +} \ +a_attr void \ +a_name##_tsd_set(a_type *val) \ +{ \ + \ + assert(a_name##_booted); \ + a_name##_tls = (*val); \ + if (a_cleanup != malloc_tsd_no_cleanup) \ + a_name##_initialized = true; \ +} +#elif (defined(JEMALLOC_TLS)) +#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ + a_cleanup) \ +/* Initialization/cleanup. */ \ +a_attr void \ +a_name##_tsd_cleanup_wrapper(void *arg) \ +{ \ + \ +} \ +a_attr bool \ +a_name##_tsd_boot(void) \ +{ \ + \ + if (a_cleanup != malloc_tsd_no_cleanup) { \ + if (pthread_key_create(&a_name##_tsd, a_cleanup) != 0) \ + return (true); \ + } \ + a_name##_booted = true; \ + return (false); \ +} \ +/* Get/set. */ \ +a_attr a_type * \ +a_name##_tsd_get(void) \ +{ \ + \ + assert(a_name##_booted); \ + return (&a_name##_tls); \ +} \ +a_attr void \ +a_name##_tsd_set(a_type *val) \ +{ \ + \ + assert(a_name##_booted); \ + a_name##_tls = (*val); \ + if (a_cleanup != malloc_tsd_no_cleanup) { \ + if (pthread_setspecific(a_name##_tsd, \ + (void *)(&a_name##_tls))) { \ + malloc_write(": Error" \ + " setting TSD for "#a_name"\n"); \ + if (opt_abort) \ + abort(); \ + } \ + } \ +} +#else +#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ + a_cleanup) \ +/* Data structure. */ \ +typedef struct { \ + bool isstatic; \ + bool initialized; \ + a_type val; \ +} a_name##_tsd_wrapper_t; \ +/* Initialization/cleanup. */ \ +a_attr void \ +a_name##_tsd_cleanup_wrapper(void *arg) \ +{ \ + a_name##_tsd_wrapper_t *wrapper = (a_name##_tsd_wrapper_t *)arg;\ + \ + if (a_cleanup != malloc_tsd_no_cleanup && \ + wrapper->initialized) { \ + wrapper->initialized = false; \ + a_cleanup(&wrapper->val); \ + if (wrapper->initialized) { \ + /* Trigger another cleanup round. */ \ + if (pthread_setspecific(a_name##_tsd, \ + (void *)wrapper)) { \ + malloc_write(": Error" \ + " setting TSD for "#a_name"\n"); \ + if (opt_abort) \ + abort(); \ + } \ + return; \ + } \ + } \ + if (wrapper->isstatic == false) \ + malloc_tsd_dalloc(wrapper); \ +} \ +a_attr bool \ +a_name##_tsd_boot(void) \ +{ \ + \ + if (pthread_key_create(&a_name##_tsd, \ + a_name##_tsd_cleanup_wrapper) != 0) \ + return (true); \ + a_name##_booted = true; \ + return (false); \ +} \ +/* Get/set. */ \ +a_attr a_name##_tsd_wrapper_t * \ +a_name##_tsd_get_wrapper(void) \ +{ \ + a_name##_tsd_wrapper_t *wrapper = (a_name##_tsd_wrapper_t *) \ + pthread_getspecific(a_name##_tsd); \ + \ + if (wrapper == NULL) { \ + wrapper = (a_name##_tsd_wrapper_t *) \ + malloc_tsd_malloc(sizeof(a_name##_tsd_wrapper_t)); \ + if (wrapper == NULL) { \ + static a_name##_tsd_wrapper_t \ + a_name##_tsd_static_data = \ + {true, false, a_initializer}; \ + malloc_write(": Error allocating" \ + " TSD for "#a_name"\n"); \ + if (opt_abort) \ + abort(); \ + wrapper = &a_name##_tsd_static_data; \ + } else { \ + static a_type tsd_static_data = a_initializer; \ + wrapper->isstatic = false; \ + wrapper->val = tsd_static_data; \ + } \ + if (pthread_setspecific(a_name##_tsd, \ + (void *)wrapper)) { \ + malloc_write(": Error setting" \ + " TSD for "#a_name"\n"); \ + if (opt_abort) \ + abort(); \ + } \ + } \ + return (wrapper); \ +} \ +a_attr a_type * \ +a_name##_tsd_get(void) \ +{ \ + a_name##_tsd_wrapper_t *wrapper; \ + \ + assert(a_name##_booted); \ + wrapper = a_name##_tsd_get_wrapper(); \ + return (&wrapper->val); \ +} \ +a_attr void \ +a_name##_tsd_set(a_type *val) \ +{ \ + a_name##_tsd_wrapper_t *wrapper; \ + \ + assert(a_name##_booted); \ + wrapper = a_name##_tsd_get_wrapper(); \ + wrapper->val = *(val); \ + if (a_cleanup != malloc_tsd_no_cleanup) \ + wrapper->initialized = true; \ +} +#endif + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +void *malloc_tsd_malloc(size_t size); +void malloc_tsd_dalloc(void *wrapper); +void malloc_tsd_no_cleanup(void *); +void malloc_tsd_cleanup_register(bool (*f)(void *), void *arg); +void malloc_tsd_boot(void); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h index c5f7520c..fb354da5 100644 --- a/include/jemalloc/internal/util.h +++ b/include/jemalloc/internal/util.h @@ -5,11 +5,17 @@ #define BUFERROR_BUF 64 /* - * Size of static buffer used by malloc_[v]{,c,t}printf(). This must be large - * enough for all possible uses within jemalloc. + * Size of stack-allocated buffer used by malloc_{,v,vc}printf(). This must be + * large enough for all possible uses within jemalloc. */ #define MALLOC_PRINTF_BUFSIZE 4096 +/* + * Wrap a cpp argument that contains commas such that it isn't broken up into + * multiple arguments. + */ +#define JEMALLOC_CONCAT(...) __VA_ARGS__ + /* * Define a custom assert() in order to reduce the chances of deadlock during * assertion failure. @@ -77,13 +83,6 @@ int malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap); int malloc_snprintf(char *str, size_t size, const char *format, ...) JEMALLOC_ATTR(format(printf, 3, 4)); -/* - * malloc_[v]tprintf() prints to a thread-local string buffer, so the result is - * overwritten by the next call to malloc_[v]{,c,t}printf(). - */ -const char * malloc_vtprintf(const char *format, va_list ap); -const char * malloc_tprintf(const char *format, ...) - JEMALLOC_ATTR(format(printf, 1, 2)); void malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque, const char *format, va_list ap); void malloc_cprintf(void (*write)(void *, const char *), void *cbopaque, diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index 434dd368..838f5615 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -59,6 +59,15 @@ */ #undef JEMALLOC_OSSPIN +/* + * Defined if _malloc_thread_cleanup() exists. At least in the case of + * FreeBSD, pthread_key_create() allocates, which if used during malloc + * bootstrapping will cause recursion into the pthreads library. Therefore, if + * _malloc_thread_cleanup() exists, use it as the basis for thread cleanup in + * malloc_tsd. + */ +#undef JEMALLOC_MALLOC_THREAD_CLEANUP + /* Defined if __attribute__((...)) syntax is supported. */ #undef JEMALLOC_HAVE_ATTR #ifdef JEMALLOC_HAVE_ATTR diff --git a/src/chunk.c b/src/chunk.c index b9086509..f50e8409 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -100,7 +100,7 @@ chunk_dealloc(void *chunk, size_t size, bool unmap) } bool -chunk_boot(void) +chunk_boot0(void) { /* Set variables according to the value of opt_lg_chunk. */ @@ -114,8 +114,6 @@ chunk_boot(void) return (true); memset(&stats_chunks, 0, sizeof(chunk_stats_t)); } - if (chunk_mmap_boot()) - return (true); if (config_dss && chunk_dss_boot()) return (true); if (config_ivsalloc) { @@ -127,3 +125,13 @@ chunk_boot(void) return (false); } + +bool +chunk_boot1(void) +{ + + if (chunk_mmap_boot()) + return (true); + + return (false); +} diff --git a/src/chunk_mmap.c b/src/chunk_mmap.c index 6ea21180..749a2dac 100644 --- a/src/chunk_mmap.c +++ b/src/chunk_mmap.c @@ -8,20 +8,9 @@ * Used by chunk_alloc_mmap() to decide whether to attempt the fast path and * potentially avoid some system calls. */ -#ifdef JEMALLOC_TLS -static __thread bool mmap_unaligned_tls - JEMALLOC_ATTR(tls_model("initial-exec")); -#define MMAP_UNALIGNED_GET() mmap_unaligned_tls -#define MMAP_UNALIGNED_SET(v) do { \ - mmap_unaligned_tls = (v); \ -} while (0) -#else -static pthread_key_t mmap_unaligned_tsd; -#define MMAP_UNALIGNED_GET() ((bool)pthread_getspecific(mmap_unaligned_tsd)) -#define MMAP_UNALIGNED_SET(v) do { \ - pthread_setspecific(mmap_unaligned_tsd, (void *)(v)); \ -} while (0) -#endif +malloc_tsd_data(static, mmap_unaligned, bool, false) +malloc_tsd_funcs(JEMALLOC_INLINE, mmap_unaligned, bool, false, + malloc_tsd_no_cleanup) /******************************************************************************/ /* Function prototypes for non-inline static functions. */ @@ -128,8 +117,10 @@ chunk_alloc_mmap_slow(size_t size, bool unaligned, bool noreserve) * the next chunk_alloc_mmap() execution tries the fast allocation * method. */ - if (unaligned == false) - MMAP_UNALIGNED_SET(false); + if (unaligned == false && mmap_unaligned_booted) { + bool mu = false; + mmap_unaligned_tsd_set(&mu); + } return (ret); } @@ -167,7 +158,7 @@ chunk_alloc_mmap_internal(size_t size, bool noreserve) * fast method next time. */ - if (MMAP_UNALIGNED_GET() == false) { + if (mmap_unaligned_booted && *mmap_unaligned_tsd_get() == false) { size_t offset; ret = pages_map(NULL, size, noreserve); @@ -176,7 +167,8 @@ chunk_alloc_mmap_internal(size_t size, bool noreserve) offset = CHUNK_ADDR2OFFSET(ret); if (offset != 0) { - MMAP_UNALIGNED_SET(true); + bool mu = true; + mmap_unaligned_tsd_set(&mu); /* Try to extend chunk boundary. */ if (pages_map((void *)((uintptr_t)ret + size), chunksize - offset, noreserve) == NULL) { @@ -225,11 +217,15 @@ bool chunk_mmap_boot(void) { -#ifndef JEMALLOC_TLS - if (pthread_key_create(&mmap_unaligned_tsd, NULL) != 0) { - malloc_write(": Error in pthread_key_create()\n"); + /* + * XXX For the non-TLS implementation of tsd, the first access from + * each thread causes memory allocation. The result is a bootstrapping + * problem for this particular use case, so for now just disable it by + * leaving it in an unbooted state. + */ +#ifdef JEMALLOC_TLS + if (mmap_unaligned_tsd_boot()) return (true); - } #endif return (false); diff --git a/src/ctl.c b/src/ctl.c index 1ef84e80..e17e5034 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -978,13 +978,13 @@ thread_tcache_flush_ctl(const size_t *mib, size_t miblen, void *oldp, VOID(); - tcache = TCACHE_GET(); - if (tcache == NULL) { + if ((tcache = *tcache_tsd_get()) == NULL) { ret = 0; goto RETURN; } tcache_destroy(tcache); - TCACHE_SET(NULL); + tcache = NULL; + tcache_tsd_set(&tcache); ret = 0; RETURN: @@ -1012,23 +1012,26 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, /* Initialize arena if necessary. */ malloc_mutex_lock(&arenas_lock); - if ((arena = arenas[newind]) == NULL) - arena = arenas_extend(newind); - arenas[oldind]->nthreads--; - arenas[newind]->nthreads++; - malloc_mutex_unlock(&arenas_lock); - if (arena == NULL) { + if ((arena = arenas[newind]) == NULL && (arena = + arenas_extend(newind)) == NULL) { + malloc_mutex_unlock(&arenas_lock); ret = EAGAIN; goto RETURN; } + assert(arena == arenas[newind]); + arenas[oldind]->nthreads--; + arenas[newind]->nthreads++; + malloc_mutex_unlock(&arenas_lock); /* Set new arena association. */ - ARENA_SET(arena); if (config_tcache) { - tcache_t *tcache = TCACHE_GET(); - if (tcache != NULL) - tcache->arena = arena; + tcache_t *tcache; + if ((tcache = *tcache_tsd_get()) != NULL) { + tcache_arena_dissociate(tcache); + tcache_arena_associate(tcache, arena); + } } + arenas_tsd_set(&arena); } ret = 0; @@ -1036,11 +1039,14 @@ RETURN: return (ret); } -CTL_RO_NL_CGEN(config_stats, thread_allocated, ALLOCATED_GET(), uint64_t) -CTL_RO_NL_CGEN(config_stats, thread_allocatedp, ALLOCATEDP_GET(), uint64_t *) -CTL_RO_NL_CGEN(config_stats, thread_deallocated, DEALLOCATED_GET(), uint64_t) -CTL_RO_NL_CGEN(config_stats, thread_deallocatedp, DEALLOCATEDP_GET(), - uint64_t *) +CTL_RO_NL_CGEN(config_stats, thread_allocated, + thread_allocated_tsd_get()->allocated, uint64_t) +CTL_RO_NL_CGEN(config_stats, thread_allocatedp, + &thread_allocated_tsd_get()->allocated, uint64_t *) +CTL_RO_NL_CGEN(config_stats, thread_deallocated, + thread_allocated_tsd_get()->deallocated, uint64_t) +CTL_RO_NL_CGEN(config_stats, thread_deallocatedp, + &thread_allocated_tsd_get()->deallocated, uint64_t *) /******************************************************************************/ diff --git a/src/jemalloc.c b/src/jemalloc.c index 2610452e..331e4737 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -4,36 +4,9 @@ /******************************************************************************/ /* Data. */ -malloc_mutex_t arenas_lock; -arena_t **arenas; -unsigned narenas; - -pthread_key_t arenas_tsd; -#ifdef JEMALLOC_TLS -__thread arena_t *arenas_tls JEMALLOC_ATTR(tls_model("initial-exec")); -#endif - -#ifdef JEMALLOC_TLS -__thread thread_allocated_t thread_allocated_tls; -#endif -pthread_key_t thread_allocated_tsd; - -/* Set to true once the allocator has been initialized. */ -static bool malloc_initialized = false; - -/* Used to let the initializing thread recursively allocate. */ -static pthread_t malloc_initializer = (unsigned long)0; - -/* Used to avoid initialization races. */ -static malloc_mutex_t init_lock = MALLOC_MUTEX_INITIALIZER; - -#ifdef DYNAMIC_PAGE_SHIFT -size_t pagesize; -size_t pagesize_mask; -size_t lg_pagesize; -#endif - -unsigned ncpus; +malloc_tsd_data(, arenas, arena_t *, NULL) +malloc_tsd_data(, thread_allocated, thread_allocated_t, + THREAD_ALLOCATED_INITIALIZER) /* Runtime configuration options. */ const char *je_malloc_conf JEMALLOC_ATTR(visibility("default")); @@ -52,15 +25,32 @@ bool opt_xmalloc = false; bool opt_zero = false; size_t opt_narenas = 0; +#ifdef DYNAMIC_PAGE_SHIFT +size_t pagesize; +size_t pagesize_mask; +size_t lg_pagesize; +#endif + +unsigned ncpus; + +malloc_mutex_t arenas_lock; +arena_t **arenas; +unsigned narenas; + +/* Set to true once the allocator has been initialized. */ +static bool malloc_initialized = false; + +/* Used to let the initializing thread recursively allocate. */ +static pthread_t malloc_initializer = (unsigned long)0; + +/* Used to avoid initialization races. */ +static malloc_mutex_t init_lock = MALLOC_MUTEX_INITIALIZER; + /******************************************************************************/ /* Function prototypes for non-inline static functions. */ static void stats_print_atexit(void); static unsigned malloc_ncpus(void); -static void arenas_cleanup(void *arg); -#ifndef JEMALLOC_TLS -static void thread_allocated_cleanup(void *arg); -#endif static bool malloc_conf_next(char const **opts_p, char const **k_p, size_t *klen_p, char const **v_p, size_t *vlen_p); static void malloc_conf_error(const char *msg, const char *k, size_t klen, @@ -156,7 +146,7 @@ choose_arena_hard(void) malloc_mutex_unlock(&arenas_lock); } - ARENA_SET(ret); + arenas_tsd_set(&ret); return (ret); } @@ -197,26 +187,6 @@ stats_print_atexit(void) je_malloc_stats_print(NULL, NULL, NULL); } -thread_allocated_t * -thread_allocated_get_hard(void) -{ - thread_allocated_t *thread_allocated = (thread_allocated_t *) - imalloc(sizeof(thread_allocated_t)); - if (thread_allocated == NULL) { - static thread_allocated_t static_thread_allocated = {0, 0}; - malloc_write(": Error allocating TSD;" - " mallctl(\"thread.{de,}allocated[p]\", ...)" - " will be inaccurate\n"); - if (opt_abort) - abort(); - return (&static_thread_allocated); - } - pthread_setspecific(thread_allocated_tsd, thread_allocated); - thread_allocated->allocated = 0; - thread_allocated->deallocated = 0; - return (thread_allocated); -} - /* * End miscellaneous support functions. */ @@ -241,32 +211,16 @@ malloc_ncpus(void) return (ret); } -static void +void arenas_cleanup(void *arg) { - arena_t *arena = (arena_t *)arg; + arena_t *arena = *(arena_t **)arg; malloc_mutex_lock(&arenas_lock); arena->nthreads--; malloc_mutex_unlock(&arenas_lock); } -#ifndef JEMALLOC_TLS -static void -thread_allocated_cleanup(void *arg) -{ - uint64_t *allocated = (uint64_t *)arg; - - if (allocated != NULL) - idalloc(allocated); -} -#endif - -/* - * FreeBSD's pthreads implementation calls malloc(3), so the malloc - * implementation has to take pains to avoid infinite recursion during - * initialization. - */ static inline bool malloc_init(void) { @@ -604,6 +558,7 @@ malloc_init_hard(void) } #endif + malloc_tsd_boot(); if (config_prof) prof_boot0(); @@ -631,7 +586,7 @@ malloc_init_hard(void) } } - if (chunk_boot()) { + if (chunk_boot0()) { malloc_mutex_unlock(&init_lock); return (true); } @@ -646,7 +601,7 @@ malloc_init_hard(void) arena_boot(); - if (config_tcache && tcache_boot()) { + if (config_tcache && tcache_boot0()) { malloc_mutex_unlock(&init_lock); return (true); } @@ -656,23 +611,9 @@ malloc_init_hard(void) return (true); } -#ifndef JEMALLOC_TLS - /* Initialize allocation counters before any allocations can occur. */ - if (config_stats && pthread_key_create(&thread_allocated_tsd, - thread_allocated_cleanup) != 0) { - malloc_mutex_unlock(&init_lock); - return (true); - } -#endif - if (malloc_mutex_init(&arenas_lock)) return (true); - if (pthread_key_create(&arenas_tsd, arenas_cleanup) != 0) { - malloc_mutex_unlock(&init_lock); - return (true); - } - /* * Create enough scaffolding to allow recursive allocation in * malloc_ncpus(). @@ -691,25 +632,38 @@ malloc_init_hard(void) return (true); } - /* - * Assign the initial arena to the initial thread, in order to avoid - * spurious creation of an extra arena if the application switches to - * threaded mode. - */ - ARENA_SET(arenas[0]); - arenas[0]->nthreads++; + /* Initialize allocation counters before any allocations can occur. */ + if (config_stats && thread_allocated_tsd_boot()) { + malloc_mutex_unlock(&init_lock); + return (true); + } if (config_prof && prof_boot2()) { malloc_mutex_unlock(&init_lock); return (true); } + if (arenas_tsd_boot()) { + malloc_mutex_unlock(&init_lock); + return (true); + } + + if (config_tcache && tcache_boot1()) { + malloc_mutex_unlock(&init_lock); + return (true); + } + /* Get number of CPUs. */ malloc_initializer = pthread_self(); malloc_mutex_unlock(&init_lock); ncpus = malloc_ncpus(); malloc_mutex_lock(&init_lock); + if (chunk_boot1()) { + malloc_mutex_unlock(&init_lock); + return (true); + } + if (opt_narenas == 0) { /* * For SMP systems, create more than one arena per CPU by @@ -844,7 +798,7 @@ OOM: prof_malloc(ret, usize, cnt); if (config_stats && ret != NULL) { assert(usize == isalloc(ret)); - ALLOCATED_ADD(usize, 0); + thread_allocated_tsd_get()->allocated += usize; } return (ret); } @@ -939,7 +893,7 @@ imemalign(void **memptr, size_t alignment, size_t size, RETURN: if (config_stats && result != NULL) { assert(usize == isalloc(result)); - ALLOCATED_ADD(usize, 0); + thread_allocated_tsd_get()->allocated += usize; } if (config_prof && opt_prof && result != NULL) prof_malloc(result, usize, cnt); @@ -1044,7 +998,7 @@ RETURN: prof_malloc(ret, usize, cnt); if (config_stats && ret != NULL) { assert(usize == isalloc(ret)); - ALLOCATED_ADD(usize, 0); + thread_allocated_tsd_get()->allocated += usize; } return (ret); } @@ -1173,8 +1127,11 @@ RETURN: if (config_prof && opt_prof) prof_realloc(ret, usize, cnt, old_size, old_ctx); if (config_stats && ret != NULL) { + thread_allocated_t *ta; assert(usize == isalloc(ret)); - ALLOCATED_ADD(usize, old_size); + ta = thread_allocated_tsd_get(); + ta->allocated += usize; + ta->deallocated += old_size; } return (ret); } @@ -1197,7 +1154,7 @@ je_free(void *ptr) usize = isalloc(ptr); } if (config_stats) - ALLOCATED_ADD(0, usize); + thread_allocated_tsd_get()->deallocated += usize; idalloc(ptr); } } @@ -1412,7 +1369,7 @@ je_allocm(void **ptr, size_t *rsize, size_t size, int flags) *ptr = p; if (config_stats) { assert(usize == isalloc(p)); - ALLOCATED_ADD(usize, 0); + thread_allocated_tsd_get()->allocated += usize; } return (ALLOCM_SUCCESS); OOM: @@ -1502,8 +1459,12 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) } *ptr = q; - if (config_stats) - ALLOCATED_ADD(usize, old_size); + if (config_stats) { + thread_allocated_t *ta; + ta = thread_allocated_tsd_get(); + ta->allocated += usize; + ta->deallocated += old_size; + } return (ALLOCM_SUCCESS); ERR: if (no_move) @@ -1556,7 +1517,7 @@ je_dallocm(void *ptr, int flags) prof_free(ptr, usize); } if (config_stats) - ALLOCATED_ADD(0, usize); + thread_allocated_tsd_get()->deallocated += usize; idalloc(ptr); return (ALLOCM_SUCCESS); diff --git a/src/prof.c b/src/prof.c index 9c327379..ba0b64e1 100644 --- a/src/prof.c +++ b/src/prof.c @@ -14,6 +14,8 @@ /******************************************************************************/ /* Data. */ +malloc_tsd_data(, prof_tdata, prof_tdata_t *, NULL) + bool opt_prof = false; bool opt_prof_active = true; size_t opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT; @@ -26,12 +28,6 @@ char opt_prof_prefix[PATH_MAX + 1]; uint64_t prof_interval; bool prof_promote; -#ifdef JEMALLOC_TLS -__thread prof_tdata_t *prof_tdata_tls - JEMALLOC_ATTR(tls_model("initial-exec")); -#endif -pthread_key_t prof_tdata_tsd; - /* * Global hash of (prof_bt_t *)-->(prof_ctx_t *). This is the master data * structure that knows about all backtraces currently captured. @@ -50,7 +46,7 @@ static uint64_t prof_dump_useq; * all profile dumps. The buffer is implicitly protected by bt2ctx_mtx, since * it must be locked anyway during dumping. */ -static char prof_dump_buf[PROF_DUMP_BUF_SIZE]; +static char prof_dump_buf[PROF_DUMP_BUFSIZE]; static unsigned prof_dump_buf_end; static int prof_dump_fd; @@ -91,7 +87,6 @@ static void prof_fdump(void); static void prof_bt_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2); static bool prof_bt_keycomp(const void *k1, const void *k2); -static void prof_tdata_cleanup(void *arg); /******************************************************************************/ @@ -439,7 +434,7 @@ prof_lookup(prof_bt_t *bt) cassert(config_prof); - prof_tdata = PROF_TCACHE_GET(); + prof_tdata = *prof_tdata_tsd_get(); if (prof_tdata == NULL) { prof_tdata = prof_tdata_init(); if (prof_tdata == NULL) @@ -599,16 +594,16 @@ prof_write(bool propagate_err, const char *s) slen = strlen(s); while (i < slen) { /* Flush the buffer if it is full. */ - if (prof_dump_buf_end == PROF_DUMP_BUF_SIZE) + if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) if (prof_flush(propagate_err) && propagate_err) return (true); - if (prof_dump_buf_end + slen <= PROF_DUMP_BUF_SIZE) { + if (prof_dump_buf_end + slen <= PROF_DUMP_BUFSIZE) { /* Finish writing. */ n = slen - i; } else { /* Write as much of s as will fit. */ - n = PROF_DUMP_BUF_SIZE - prof_dump_buf_end; + n = PROF_DUMP_BUFSIZE - prof_dump_buf_end; } memcpy(&prof_dump_buf[prof_dump_buf_end], &s[i], n); prof_dump_buf_end += n; @@ -624,10 +619,12 @@ prof_printf(bool propagate_err, const char *format, ...) { bool ret; va_list ap; + char buf[PROF_PRINTF_BUFSIZE]; va_start(ap, format); - ret = prof_write(propagate_err, malloc_vtprintf(format, ap)); + malloc_snprintf(buf, sizeof(buf), format, ap); va_end(ap); + ret = prof_write(propagate_err, buf); return (ret); } @@ -795,11 +792,13 @@ static bool prof_dump_maps(bool propagate_err) { int mfd; + char filename[PATH_MAX + 1]; cassert(config_prof); - mfd = open(malloc_tprintf("/proc/%d/maps", (int)getpid()), - O_RDONLY); + malloc_snprintf(filename, sizeof(filename), "/proc/%d/maps", + (int)getpid()); + mfd = open(filename, O_RDONLY); if (mfd != -1) { ssize_t nread; @@ -809,13 +808,13 @@ prof_dump_maps(bool propagate_err) nread = 0; do { prof_dump_buf_end += nread; - if (prof_dump_buf_end == PROF_DUMP_BUF_SIZE) { + if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) { /* Make space in prof_dump_buf before read(). */ if (prof_flush(propagate_err) && propagate_err) return (true); } nread = read(mfd, &prof_dump_buf[prof_dump_buf_end], - PROF_DUMP_BUF_SIZE - prof_dump_buf_end); + PROF_DUMP_BUFSIZE - prof_dump_buf_end); } while (nread > 0); close(mfd); } else @@ -1098,16 +1097,16 @@ prof_tdata_init(void) prof_tdata->threshold = 0; prof_tdata->accum = 0; - PROF_TCACHE_SET(prof_tdata); + prof_tdata_tsd_set(&prof_tdata); return (prof_tdata); } -static void +void prof_tdata_cleanup(void *arg) { prof_thr_cnt_t *cnt; - prof_tdata_t *prof_tdata = (prof_tdata_t *)arg; + prof_tdata_t *prof_tdata = *(prof_tdata_t **)arg; cassert(config_prof); @@ -1127,7 +1126,8 @@ prof_tdata_cleanup(void *arg) idalloc(prof_tdata->vec); idalloc(prof_tdata); - PROF_TCACHE_SET(NULL); + prof_tdata = NULL; + prof_tdata_tsd_set(&prof_tdata); } void @@ -1182,8 +1182,7 @@ prof_boot2(void) return (true); if (malloc_mutex_init(&bt2ctx_mtx)) return (true); - if (pthread_key_create(&prof_tdata_tsd, prof_tdata_cleanup) - != 0) { + if (prof_tdata_tsd_boot()) { malloc_write( ": Error in pthread_key_create()\n"); abort(); diff --git a/src/tcache.c b/src/tcache.c index f90308cd..3442406d 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -4,30 +4,16 @@ /******************************************************************************/ /* Data. */ +malloc_tsd_data(, tcache, tcache_t *, NULL) + bool opt_tcache = true; ssize_t opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT; tcache_bin_info_t *tcache_bin_info; static unsigned stack_nelms; /* Total stack elms per tcache. */ -/* Map of thread-specific caches. */ -#ifdef JEMALLOC_TLS -__thread tcache_t *tcache_tls JEMALLOC_ATTR(tls_model("initial-exec")); -#endif - -/* - * Same contents as tcache, but initialized such that the TSD destructor is - * called when a thread exits, so that the cache can be cleaned up. - */ -pthread_key_t tcache_tsd; - -size_t nhbins; -size_t tcache_maxclass; - -/******************************************************************************/ -/* Function prototypes for non-inline static functions. */ - -static void tcache_thread_cleanup(void *arg); +size_t nhbins; +size_t tcache_maxclass; /******************************************************************************/ @@ -196,6 +182,33 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem, tbin->low_water = tbin->ncached; } +void +tcache_arena_associate(tcache_t *tcache, arena_t *arena) +{ + + if (config_stats) { + /* Link into list of extant tcaches. */ + malloc_mutex_lock(&arena->lock); + ql_elm_new(tcache, link); + ql_tail_insert(&arena->tcache_ql, tcache, link); + malloc_mutex_unlock(&arena->lock); + } + tcache->arena = arena; +} + +void +tcache_arena_dissociate(tcache_t *tcache) +{ + + if (config_stats) { + /* Unlink from list of extant tcaches. */ + malloc_mutex_lock(&tcache->arena->lock); + ql_remove(&tcache->arena->tcache_ql, tcache, link); + malloc_mutex_unlock(&tcache->arena->lock); + tcache_stats_merge(tcache, tcache->arena); + } +} + tcache_t * tcache_create(arena_t *arena) { @@ -228,15 +241,8 @@ tcache_create(arena_t *arena) if (tcache == NULL) return (NULL); - if (config_stats) { - /* Link into list of extant tcaches. */ - malloc_mutex_lock(&arena->lock); - ql_elm_new(tcache, link); - ql_tail_insert(&arena->tcache_ql, tcache, link); - malloc_mutex_unlock(&arena->lock); - } + tcache_arena_associate(tcache, arena); - tcache->arena = arena; assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0); for (i = 0; i < nhbins; i++) { tcache->tbins[i].lg_fill_div = 1; @@ -245,7 +251,7 @@ tcache_create(arena_t *arena) stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *); } - TCACHE_SET(tcache); + tcache_tsd_set(&tcache); return (tcache); } @@ -256,13 +262,7 @@ tcache_destroy(tcache_t *tcache) unsigned i; size_t tcache_size; - if (config_stats) { - /* Unlink from list of extant tcaches. */ - malloc_mutex_lock(&tcache->arena->lock); - ql_remove(&tcache->arena->tcache_ql, tcache, link); - malloc_mutex_unlock(&tcache->arena->lock); - tcache_stats_merge(tcache, tcache->arena); - } + tcache_arena_dissociate(tcache); for (i = 0; i < NBINS; i++) { tcache_bin_t *tbin = &tcache->tbins[i]; @@ -323,10 +323,10 @@ tcache_destroy(tcache_t *tcache) idalloc(tcache); } -static void +void tcache_thread_cleanup(void *arg) { - tcache_t *tcache = (tcache_t *)arg; + tcache_t *tcache = *(tcache_t **)arg; if (tcache == (void *)(uintptr_t)1) { /* @@ -341,11 +341,13 @@ tcache_thread_cleanup(void *arg) * destructor was called. Reset tcache to 1 in order to * receive another callback. */ - TCACHE_SET((uintptr_t)1); + tcache = (tcache_t *)(uintptr_t)1; + tcache_tsd_set(&tcache); } else if (tcache != NULL) { assert(tcache != (void *)(uintptr_t)1); tcache_destroy(tcache); - TCACHE_SET((uintptr_t)1); + tcache = (tcache_t *)(uintptr_t)1; + tcache_tsd_set(&tcache); } } @@ -374,7 +376,7 @@ tcache_stats_merge(tcache_t *tcache, arena_t *arena) } bool -tcache_boot(void) +tcache_boot0(void) { if (opt_tcache) { @@ -385,8 +387,8 @@ tcache_boot(void) * SMALL_MAXCLASS and arena_maxclass are known. * XXX Can this be done earlier? */ - if (opt_lg_tcache_max < 0 || (1U << - opt_lg_tcache_max) < SMALL_MAXCLASS) + if (opt_lg_tcache_max < 0 || (1U << opt_lg_tcache_max) < + SMALL_MAXCLASS) tcache_maxclass = SMALL_MAXCLASS; else if ((1U << opt_lg_tcache_max) > arena_maxclass) tcache_maxclass = arena_maxclass; @@ -416,13 +418,18 @@ tcache_boot(void) tcache_bin_info[i].ncached_max = TCACHE_NSLOTS_LARGE; stack_nelms += tcache_bin_info[i].ncached_max; } - - if (pthread_key_create(&tcache_tsd, tcache_thread_cleanup) != - 0) { - malloc_write( - ": Error in pthread_key_create()\n"); - abort(); - } + } + + return (false); +} + +bool +tcache_boot1(void) +{ + + if (opt_tcache) { + if (tcache_tsd_boot()) + return (true); } return (false); diff --git a/src/tsd.c b/src/tsd.c new file mode 100644 index 00000000..669ea8fc --- /dev/null +++ b/src/tsd.c @@ -0,0 +1,72 @@ +#define JEMALLOC_TSD_C_ +#include "jemalloc/internal/jemalloc_internal.h" + +/******************************************************************************/ +/* Data. */ + +static unsigned ncleanups; +static malloc_tsd_cleanup_t cleanups[MALLOC_TSD_CLEANUPS_MAX]; + +/******************************************************************************/ + +void * +malloc_tsd_malloc(size_t size) +{ + + /* Avoid choose_arena() in order to dodge bootstrapping issues. */ + return arena_malloc_prechosen(arenas[0], size, false); +} + +void +malloc_tsd_dalloc(void *wrapper) +{ + + idalloc(wrapper); +} + +void +malloc_tsd_no_cleanup(void *arg) +{ + + not_reached(); +} + +#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP +void +_malloc_thread_cleanup(void) +{ + bool pending[ncleanups], again; + unsigned i; + + for (i = 0; i < ncleanups; i++) + pending[i] = true; + + do { + again = false; + for (i = 0; i < ncleanups; i++) { + if (pending[i]) { + pending[i] = cleanups[i].f(cleanups[i].arg); + if (pending[i]) + again = true; + } + } + } while (again); +} +#endif + +void +malloc_tsd_cleanup_register(bool (*f)(void *), void *arg) +{ + + assert(ncleanups < MALLOC_TSD_CLEANUPS_MAX); + cleanups[ncleanups].f = f; + cleanups[ncleanups].arg = arg; + ncleanups++; +} + +void +malloc_tsd_boot(void) +{ + + ncleanups = 0; +} diff --git a/src/util.c b/src/util.c index 47e7b66e..96c87f78 100644 --- a/src/util.c +++ b/src/util.c @@ -222,6 +222,9 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) case 'z': \ val = va_arg(ap, size_t); \ break; \ + case 'p': /* Synthetic; used for %p. */ \ + val = va_arg(ap, uintptr_t); \ + break; \ default: not_reached(); \ } \ } while (0) @@ -410,7 +413,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) uintmax_t val; char buf[X2S_BUFSIZE]; - GET_ARG_NUMERIC(val, len); + GET_ARG_NUMERIC(val, 'p'); s = x2s(val, true, false, buf, &slen); APPEND_PADDED_S(s, slen, width, left_justify); f++; @@ -466,34 +469,11 @@ malloc_snprintf(char *str, size_t size, const char *format, ...) return (ret); } -const char * -malloc_vtprintf(const char *format, va_list ap) -{ - static __thread char buf[MALLOC_PRINTF_BUFSIZE]; - - malloc_vsnprintf(buf, sizeof(buf), format, ap); - - return (buf); -} - -JEMALLOC_ATTR(format(printf, 1, 2)) -const char * -malloc_tprintf(const char *format, ...) -{ - const char *ret; - va_list ap; - - va_start(ap, format); - ret = malloc_vtprintf(format, ap); - va_end(ap); - - return (ret); -} - void malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque, const char *format, va_list ap) { + char buf[MALLOC_PRINTF_BUFSIZE]; if (write_cb == NULL) { /* @@ -505,7 +485,8 @@ malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque, cbopaque = NULL; } - write_cb(cbopaque, malloc_vtprintf(format, ap)); + malloc_vsnprintf(buf, sizeof(buf), format, ap); + write_cb(cbopaque, buf); } /* From 9225a1991a58190207cca2ff3cdba966bb322dd5 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 23 Mar 2012 15:39:07 -0700 Subject: [PATCH 0097/3378] Add JEMALLOC_CC_SILENCE_INIT(). Add JEMALLOC_CC_SILENCE_INIT(), which provides succinct syntax for initializing a variable to avoid a spurious compiler warning. --- .../jemalloc/internal/jemalloc_internal.h.in | 6 +-- include/jemalloc/internal/util.h | 11 +++++ src/jemalloc.c | 42 ++++--------------- src/util.c | 8 ++-- 4 files changed, 23 insertions(+), 44 deletions(-) diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 387aabbe..e0558140 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -611,11 +611,7 @@ ipalloc(size_t usize, size_t alignment, bool zero) if (usize <= arena_maxclass && alignment <= PAGE_SIZE) ret = arena_malloc(usize, zero); else { - size_t run_size -#ifdef JEMALLOC_CC_SILENCE - = 0 -#endif - ; + size_t run_size JEMALLOC_CC_SILENCE_INIT(0); /* * Ideally we would only ever call sa2u() once per aligned diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h index fb354da5..5156399f 100644 --- a/include/jemalloc/internal/util.h +++ b/include/jemalloc/internal/util.h @@ -16,6 +16,17 @@ */ #define JEMALLOC_CONCAT(...) __VA_ARGS__ +/* + * Silence compiler warnings due to uninitialized values. This is used + * wherever the compiler fails to recognize that the variable is never used + * uninitialized. + */ +#ifdef JEMALLOC_CC_SILENCE +# define JEMALLOC_CC_SILENCE_INIT(v) = v +#else +# define JEMALLOC_CC_SILENCE_INIT(v) +#endif + /* * Define a custom assert() in order to reduce the chances of deadlock during * assertion failure. diff --git a/src/jemalloc.c b/src/jemalloc.c index 331e4737..f9451796 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -751,11 +751,7 @@ je_malloc(size_t size) { void *ret; size_t usize; - prof_thr_cnt_t *cnt -#ifdef JEMALLOC_CC_SILENCE - = NULL -#endif - ; + prof_thr_cnt_t *cnt JEMALLOC_CC_SILENCE_INIT(NULL); if (malloc_init()) { ret = NULL; @@ -818,11 +814,7 @@ imemalign(void **memptr, size_t alignment, size_t size, int ret; size_t usize; void *result; - prof_thr_cnt_t *cnt -#ifdef JEMALLOC_CC_SILENCE - = NULL -#endif - ; + prof_thr_cnt_t *cnt JEMALLOC_CC_SILENCE_INIT(NULL); assert(min_alignment != 0); @@ -932,11 +924,7 @@ je_calloc(size_t num, size_t size) void *ret; size_t num_size; size_t usize; - prof_thr_cnt_t *cnt -#ifdef JEMALLOC_CC_SILENCE - = NULL -#endif - ; + prof_thr_cnt_t *cnt JEMALLOC_CC_SILENCE_INIT(NULL); if (malloc_init()) { num_size = 0; @@ -1010,16 +998,8 @@ je_realloc(void *ptr, size_t size) void *ret; size_t usize; size_t old_size = 0; - prof_thr_cnt_t *cnt -#ifdef JEMALLOC_CC_SILENCE - = NULL -#endif - ; - prof_ctx_t *old_ctx -#ifdef JEMALLOC_CC_SILENCE - = NULL -#endif - ; + prof_thr_cnt_t *cnt JEMALLOC_CC_SILENCE_INIT(NULL); + prof_ctx_t *old_ctx JEMALLOC_CC_SILENCE_INIT(NULL); if (size == 0) { if (ptr != NULL) { @@ -1173,11 +1153,7 @@ JEMALLOC_ATTR(visibility("default")) void * je_memalign(size_t alignment, size_t size) { - void *ret -#ifdef JEMALLOC_CC_SILENCE - = NULL -#endif - ; + void *ret JEMALLOC_CC_SILENCE_INIT(NULL); imemalign(&ret, alignment, size, 1); return (ret); } @@ -1189,11 +1165,7 @@ JEMALLOC_ATTR(visibility("default")) void * je_valloc(size_t size) { - void *ret -#ifdef JEMALLOC_CC_SILENCE - = NULL -#endif - ; + void *ret JEMALLOC_CC_SILENCE_INIT(NULL); imemalign(&ret, PAGE_SIZE, size, 1); return (ret); } diff --git a/src/util.c b/src/util.c index 96c87f78..698b53a9 100644 --- a/src/util.c +++ b/src/util.c @@ -353,7 +353,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) char *s; size_t slen; case 'd': case 'i': { - intmax_t val; + intmax_t val JEMALLOC_CC_SILENCE_INIT(0); char buf[D2S_BUFSIZE]; GET_ARG_NUMERIC(val, len); @@ -363,7 +363,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) f++; break; } case 'o': { - uintmax_t val; + uintmax_t val JEMALLOC_CC_SILENCE_INIT(0); char buf[O2S_BUFSIZE]; GET_ARG_NUMERIC(val, len); @@ -372,7 +372,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) f++; break; } case 'u': { - uintmax_t val; + uintmax_t val JEMALLOC_CC_SILENCE_INIT(0); char buf[U2S_BUFSIZE]; GET_ARG_NUMERIC(val, len); @@ -381,7 +381,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) f++; break; } case 'x': case 'X': { - uintmax_t val; + uintmax_t val JEMALLOC_CC_SILENCE_INIT(0); char buf[X2S_BUFSIZE]; GET_ARG_NUMERIC(val, len); From 06304a97854dbbf09075278fe2c90365254480da Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 23 Mar 2012 16:09:56 -0700 Subject: [PATCH 0098/3378] Restructure atomic_*_z(). Restructure atomic_*_z() so that no casting within macros is necessary. This avoids warnings when compiling with clang. --- include/jemalloc/internal/atomic.h | 45 +++++++++++++++++++----------- 1 file changed, 29 insertions(+), 16 deletions(-) diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h index 1dbb7d6a..afeb9cb7 100644 --- a/include/jemalloc/internal/atomic.h +++ b/include/jemalloc/internal/atomic.h @@ -11,22 +11,7 @@ #define atomic_read_uint64(p) atomic_add_uint64(p, 0) #define atomic_read_uint32(p) atomic_add_uint32(p, 0) - -#if (LG_SIZEOF_PTR == 3) -# define atomic_read_z(p) \ - (size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)0) -# define atomic_add_z(p, x) \ - (size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)x) -# define atomic_sub_z(p, x) \ - (size_t)atomic_sub_uint64((uint64_t *)p, (uint64_t)x) -#elif (LG_SIZEOF_PTR == 2) -# define atomic_read_z(p) \ - (size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)0) -# define atomic_add_z(p, x) \ - (size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)x) -# define atomic_sub_z(p, x) \ - (size_t)atomic_sub_uint32((uint32_t *)p, (uint32_t)x) -#endif +#define atomic_read_z(p) atomic_add_z(p, 0) #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ @@ -37,6 +22,8 @@ uint64_t atomic_add_uint64(uint64_t *p, uint64_t x); uint64_t atomic_sub_uint64(uint64_t *p, uint64_t x); uint32_t atomic_add_uint32(uint32_t *p, uint32_t x); uint32_t atomic_sub_uint32(uint32_t *p, uint32_t x); +size_t atomic_add_z(size_t *p, size_t x); +size_t atomic_sub_z(size_t *p, size_t x); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ATOMIC_C_)) @@ -179,6 +166,32 @@ atomic_sub_uint32(uint32_t *p, uint32_t x) #else # error "Missing implementation for 32-bit atomic operations" #endif + +/******************************************************************************/ +/* size_t operations. */ +JEMALLOC_INLINE size_t +atomic_add_z(size_t *p, size_t x) +{ + +#if (LG_SIZEOF_PTR == 3) + return ((size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)x)); +#elif (LG_SIZEOF_PTR == 2) + return ((size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)x)); +#endif +} + +JEMALLOC_INLINE size_t +atomic_sub_z(size_t *p, size_t x) +{ + +#if (LG_SIZEOF_PTR == 3) + return ((size_t)atomic_add_uint64((uint64_t *)p, + (uint64_t)-((int64_t)x))); +#elif (LG_SIZEOF_PTR == 2) + return ((size_t)atomic_add_uint32((uint32_t *)p, + (uint32_t)-((int32_t)x))); +#endif +} #endif #endif /* JEMALLOC_H_INLINES */ From 9022bf9bfd6ab907e5b019fed09fdc3acdf1c280 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 23 Mar 2012 16:14:08 -0700 Subject: [PATCH 0099/3378] Remove -no-cpp-precomp compiler flag for OS X. Remove the -no-cpp-precomp compiler flag when compiling on OS X. clang does not support the flag, and gcc works fine without it. --- configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 44ff6eec..e1a399b0 100644 --- a/configure.ac +++ b/configure.ac @@ -208,7 +208,7 @@ dnl definitions need to be seen before any headers are included, which is a pain dnl to make happen otherwise. case "${host}" in *-*-darwin*) - CFLAGS="$CFLAGS -fno-common -no-cpp-precomp" + CFLAGS="$CFLAGS -fno-common" abi="macho" AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ]) RPATH="" From b80581d30928e04b3d12b1fec2b989da44a07e2c Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 23 Mar 2012 16:17:43 -0700 Subject: [PATCH 0100/3378] Forcibly disable TLS on OS X. Forcibly disable TLS on OS X. gcc and llvm-gcc on OS X do not support TLS, but clang does. Unfortunately, the implementation calls malloc() internally during TLS initialization, which causes an unresolvable bootstrapping issue. --- configure.ac | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index e1a399b0..7e4f2211 100644 --- a/configure.ac +++ b/configure.ac @@ -214,6 +214,7 @@ case "${host}" in RPATH="" LD_PRELOAD_VAR="DYLD_INSERT_LIBRARIES" so="dylib" + force_tls="0" ;; *-*-freebsd*) CFLAGS="$CFLAGS" @@ -767,7 +768,6 @@ dnl Check whether the BSD-specific _malloc_thread_cleanup() exists. If so, use dnl it rather than pthreads TSD cleanup functions to support cleanup during dnl thread exit, in order to avoid pthreads library recursion during dnl bootstrapping. -force_tls="0" AC_CHECK_FUNC([_malloc_thread_cleanup], [have__malloc_thread_cleanup="1"], [have__malloc_thread_cleanup="0"] @@ -813,6 +813,10 @@ if test "x${enable_tls}" = "x0" -a "x${force_tls}" = "x1" ; then AC_MSG_RESULT([Forcing TLS to avoid allocator/threading bootstrap issues]) enable_tls="1" fi +if test "x${enable_tls}" = "x1" -a "x${force_tls}" = "x0" ; then + AC_MSG_RESULT([Forcing no TLS to avoid allocator/threading bootstrap issues]) + enable_tls="0" +fi if test "x${enable_tls}" = "x1" ; then AC_MSG_CHECKING([for TLS]) AC_COMPILE_IFELSE([AC_LANG_PROGRAM( From 6da5418ded9170b087c35960e0010006430117c1 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 23 Mar 2012 18:05:51 -0700 Subject: [PATCH 0101/3378] Remove ephemeral mutexes. Remove ephemeral mutexes from the prof machinery, and remove malloc_mutex_destroy(). This simplifies mutex management on systems that call malloc()/free() inside pthread_mutex_{create,destroy}(). Add atomic_*_u() for operation on unsigned values. Fix prof_printf() to call malloc_vsnprintf() rather than malloc_snprintf(). --- include/jemalloc/internal/atomic.h | 30 ++++++++++++++ include/jemalloc/internal/mutex.h | 1 - include/jemalloc/internal/prof.h | 16 +++++--- src/jemalloc.c | 10 ++--- src/mutex.c | 12 ------ src/prof.c | 63 ++++++++++++++++++++---------- 6 files changed, 89 insertions(+), 43 deletions(-) diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h index afeb9cb7..7a9cb61e 100644 --- a/include/jemalloc/internal/atomic.h +++ b/include/jemalloc/internal/atomic.h @@ -12,6 +12,7 @@ #define atomic_read_uint64(p) atomic_add_uint64(p, 0) #define atomic_read_uint32(p) atomic_add_uint32(p, 0) #define atomic_read_z(p) atomic_add_z(p, 0) +#define atomic_read_u(p) atomic_add_u(p, 0) #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ @@ -24,6 +25,8 @@ uint32_t atomic_add_uint32(uint32_t *p, uint32_t x); uint32_t atomic_sub_uint32(uint32_t *p, uint32_t x); size_t atomic_add_z(size_t *p, size_t x); size_t atomic_sub_z(size_t *p, size_t x); +unsigned atomic_add_u(unsigned *p, unsigned x); +unsigned atomic_sub_u(unsigned *p, unsigned x); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ATOMIC_C_)) @@ -192,6 +195,33 @@ atomic_sub_z(size_t *p, size_t x) (uint32_t)-((int32_t)x))); #endif } + +/******************************************************************************/ +/* unsigned operations. */ +JEMALLOC_INLINE unsigned +atomic_add_u(unsigned *p, unsigned x) +{ + +#if (LG_SIZEOF_INT == 3) + return ((unsigned)atomic_add_uint64((uint64_t *)p, (uint64_t)x)); +#elif (LG_SIZEOF_INT == 2) + return ((unsigned)atomic_add_uint32((uint32_t *)p, (uint32_t)x)); +#endif +} + +JEMALLOC_INLINE unsigned +atomic_sub_u(unsigned *p, unsigned x) +{ + +#if (LG_SIZEOF_INT == 3) + return ((unsigned)atomic_add_uint64((uint64_t *)p, + (uint64_t)-((int64_t)x))); +#elif (LG_SIZEOF_INT == 2) + return ((unsigned)atomic_add_uint32((uint32_t *)p, + (uint32_t)-((int32_t)x))); +#endif +} +/******************************************************************************/ #endif #endif /* JEMALLOC_H_INLINES */ diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h index 9d136585..10637e92 100644 --- a/include/jemalloc/internal/mutex.h +++ b/include/jemalloc/internal/mutex.h @@ -28,7 +28,6 @@ extern bool isthreaded; #endif bool malloc_mutex_init(malloc_mutex_t *mutex); -void malloc_mutex_destroy(malloc_mutex_t *mutex); void malloc_mutex_prefork(malloc_mutex_t *mutex); void malloc_mutex_postfork_parent(malloc_mutex_t *mutex); void malloc_mutex_postfork_child(malloc_mutex_t *mutex); diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index 231a3876..34929e7e 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -31,6 +31,12 @@ typedef struct prof_tdata_s prof_tdata_t; /* Size of stack-allocated buffer used by prof_printf(). */ #define PROF_PRINTF_BUFSIZE 128 +/* + * Number of mutexes shared among all ctx's. No space is allocated for these + * unless profiling is enabled, so it's okay to over-provision. + */ +#define PROF_NCTX_LOCKS 1024 + #endif /* JEMALLOC_H_TYPES */ /******************************************************************************/ #ifdef JEMALLOC_H_STRUCTS @@ -108,7 +114,7 @@ struct prof_ctx_s { prof_bt_t *bt; /* Protects cnt_merged and cnts_ql. */ - malloc_mutex_t lock; + malloc_mutex_t *lock; /* Temporary storage for summation during dump. */ prof_cnt_t cnt_summed; @@ -444,10 +450,10 @@ prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt, * It's too late to propagate OOM for this realloc(), * so operate directly on old_cnt->ctx->cnt_merged. */ - malloc_mutex_lock(&old_ctx->lock); + malloc_mutex_lock(old_ctx->lock); old_ctx->cnt_merged.curobjs--; old_ctx->cnt_merged.curbytes -= old_size; - malloc_mutex_unlock(&old_ctx->lock); + malloc_mutex_unlock(old_ctx->lock); told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U; } } else @@ -516,10 +522,10 @@ prof_free(const void *ptr, size_t size) * OOM during free() cannot be propagated, so operate * directly on cnt->ctx->cnt_merged. */ - malloc_mutex_lock(&ctx->lock); + malloc_mutex_lock(ctx->lock); ctx->cnt_merged.curobjs--; ctx->cnt_merged.curbytes -= size; - malloc_mutex_unlock(&ctx->lock); + malloc_mutex_unlock(ctx->lock); } } } diff --git a/src/jemalloc.c b/src/jemalloc.c index f9451796..b3e898c1 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -638,11 +638,6 @@ malloc_init_hard(void) return (true); } - if (config_prof && prof_boot2()) { - malloc_mutex_unlock(&init_lock); - return (true); - } - if (arenas_tsd_boot()) { malloc_mutex_unlock(&init_lock); return (true); @@ -653,6 +648,11 @@ malloc_init_hard(void) return (true); } + if (config_prof && prof_boot2()) { + malloc_mutex_unlock(&init_lock); + return (true); + } + /* Get number of CPUs. */ malloc_initializer = pthread_self(); malloc_mutex_unlock(&init_lock); diff --git a/src/mutex.c b/src/mutex.c index 243b7129..07d2a033 100644 --- a/src/mutex.c +++ b/src/mutex.c @@ -81,18 +81,6 @@ malloc_mutex_init(malloc_mutex_t *mutex) return (false); } -void -malloc_mutex_destroy(malloc_mutex_t *mutex) -{ - -#ifndef JEMALLOC_OSSPIN - if (pthread_mutex_destroy(mutex) != 0) { - malloc_write(": Error in pthread_mutex_destroy()\n"); - abort(); - } -#endif -} - void malloc_mutex_prefork(malloc_mutex_t *mutex) { diff --git a/src/prof.c b/src/prof.c index ba0b64e1..bc21d894 100644 --- a/src/prof.c +++ b/src/prof.c @@ -28,6 +28,16 @@ char opt_prof_prefix[PATH_MAX + 1]; uint64_t prof_interval; bool prof_promote; +/* + * Table of mutexes that are shared among ctx's. These are leaf locks, so + * there is no problem with using them for more than one ctx at the same time. + * The primary motivation for this sharing though is that ctx's are ephemeral, + * and destroying mutexes causes complications for systems that allocate when + * creating/destroying mutexes. + */ +static malloc_mutex_t *ctx_locks; +static unsigned cum_ctxs; /* Atomic counter. */ + /* * Global hash of (prof_bt_t *)-->(prof_ctx_t *). This is the master data * structure that knows about all backtraces currently captured. @@ -87,6 +97,7 @@ static void prof_fdump(void); static void prof_bt_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2); static bool prof_bt_keycomp(const void *k1, const void *k2); +static malloc_mutex_t *prof_ctx_mutex_choose(void); /******************************************************************************/ @@ -471,18 +482,12 @@ prof_lookup(prof_bt_t *bt) return (NULL); } ctx.p->bt = btkey.p; - if (malloc_mutex_init(&ctx.p->lock)) { - prof_leave(); - idalloc(btkey.v); - idalloc(ctx.v); - return (NULL); - } + ctx.p->lock = prof_ctx_mutex_choose(); memset(&ctx.p->cnt_merged, 0, sizeof(prof_cnt_t)); ql_new(&ctx.p->cnts_ql); if (ckh_insert(&bt2ctx, btkey.v, ctx.v)) { /* OOM. */ prof_leave(); - malloc_mutex_destroy(&ctx.p->lock); idalloc(btkey.v); idalloc(ctx.v); return (NULL); @@ -502,9 +507,9 @@ prof_lookup(prof_bt_t *bt) * Artificially raise curobjs, in order to avoid a race * condition with prof_ctx_merge()/prof_ctx_destroy(). */ - malloc_mutex_lock(&ctx.p->lock); + malloc_mutex_lock(ctx.p->lock); ctx.p->cnt_merged.curobjs++; - malloc_mutex_unlock(&ctx.p->lock); + malloc_mutex_unlock(ctx.p->lock); new_ctx = false; } prof_leave(); @@ -547,10 +552,10 @@ prof_lookup(prof_bt_t *bt) return (NULL); } ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link); - malloc_mutex_lock(&ctx.p->lock); + malloc_mutex_lock(ctx.p->lock); ql_tail_insert(&ctx.p->cnts_ql, ret.p, cnts_link); ctx.p->cnt_merged.curobjs--; - malloc_mutex_unlock(&ctx.p->lock); + malloc_mutex_unlock(ctx.p->lock); } else { /* Move ret to the front of the LRU. */ ql_remove(&prof_tdata->lru_ql, ret.p, lru_link); @@ -622,7 +627,7 @@ prof_printf(bool propagate_err, const char *format, ...) char buf[PROF_PRINTF_BUFSIZE]; va_start(ap, format); - malloc_snprintf(buf, sizeof(buf), format, ap); + malloc_vsnprintf(buf, sizeof(buf), format, ap); va_end(ap); ret = prof_write(propagate_err, buf); @@ -637,7 +642,7 @@ prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx) cassert(config_prof); - malloc_mutex_lock(&ctx->lock); + malloc_mutex_lock(ctx->lock); memcpy(&ctx->cnt_summed, &ctx->cnt_merged, sizeof(prof_cnt_t)); ql_foreach(thr_cnt, &ctx->cnts_ql, cnts_link) { @@ -676,7 +681,7 @@ prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx) cnt_all->accumbytes += ctx->cnt_summed.accumbytes; } - malloc_mutex_unlock(&ctx->lock); + malloc_mutex_unlock(ctx->lock); } static void @@ -693,7 +698,7 @@ prof_ctx_destroy(prof_ctx_t *ctx) * prof_ctx_merge() and entry into this function. */ prof_enter(); - malloc_mutex_lock(&ctx->lock); + malloc_mutex_lock(ctx->lock); if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 1) { assert(ctx->cnt_merged.curbytes == 0); assert(ctx->cnt_merged.accumobjs == 0); @@ -703,9 +708,8 @@ prof_ctx_destroy(prof_ctx_t *ctx) assert(false); prof_leave(); /* Destroy ctx. */ - malloc_mutex_unlock(&ctx->lock); + malloc_mutex_unlock(ctx->lock); bt_destroy(ctx->bt); - malloc_mutex_destroy(&ctx->lock); idalloc(ctx); } else { /* @@ -713,7 +717,7 @@ prof_ctx_destroy(prof_ctx_t *ctx) * prof_lookup(). */ ctx->cnt_merged.curobjs--; - malloc_mutex_unlock(&ctx->lock); + malloc_mutex_unlock(ctx->lock); prof_leave(); } } @@ -726,7 +730,7 @@ prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt) cassert(config_prof); /* Merge cnt stats and detach from ctx. */ - malloc_mutex_lock(&ctx->lock); + malloc_mutex_lock(ctx->lock); ctx->cnt_merged.curobjs += cnt->cnts.curobjs; ctx->cnt_merged.curbytes += cnt->cnts.curbytes; ctx->cnt_merged.accumobjs += cnt->cnts.accumobjs; @@ -751,7 +755,7 @@ prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt) destroy = true; } else destroy = false; - malloc_mutex_unlock(&ctx->lock); + malloc_mutex_unlock(ctx->lock); if (destroy) prof_ctx_destroy(ctx); } @@ -1067,6 +1071,14 @@ prof_bt_keycomp(const void *k1, const void *k2) return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0); } +static malloc_mutex_t * +prof_ctx_mutex_choose(void) +{ + unsigned nctxs = atomic_add_u(&cum_ctxs, 1); + + return (&ctx_locks[(nctxs - 1) % PROF_NCTX_LOCKS]); +} + prof_tdata_t * prof_tdata_init(void) { @@ -1177,6 +1189,8 @@ prof_boot2(void) cassert(config_prof); if (opt_prof) { + unsigned i; + if (ckh_new(&bt2ctx, PROF_CKH_MINITEMS, prof_bt_hash, prof_bt_keycomp)) return (true); @@ -1202,6 +1216,15 @@ prof_boot2(void) if (opt_abort) abort(); } + + ctx_locks = (malloc_mutex_t *)base_alloc(PROF_NCTX_LOCKS * + sizeof(malloc_mutex_t)); + if (ctx_locks == NULL) + return (true); + for (i = 0; i < PROF_NCTX_LOCKS; i++) { + if (malloc_mutex_init(&ctx_locks[i])) + return (true); + } } #ifdef JEMALLOC_PROF_LIBGCC From 41b6afb834b1f5250223678c52bd4f013d4234f6 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 2 Feb 2012 22:04:57 -0800 Subject: [PATCH 0102/3378] Port to FreeBSD. Use FreeBSD-specific functions (_pthread_mutex_init_calloc_cb(), _malloc_{pre,post}fork()) to avoid bootstrapping issues due to allocation in libc and libthr. Add malloc_strtoumax() and use it instead of strtoul(). Disable validation code in malloc_vsnprintf() and malloc_strtoumax() until jemalloc is initialized. This is necessary because locale initialization causes allocation for both vsnprintf() and strtoumax(). Force the lazy-lock feature on in order to avoid pthread_self(), because it causes allocation. Use syscall(SYS_write, ...) rather than write(...), because libthr wraps write() and causes allocation. Without this workaround, it would not be possible to print error messages in malloc_conf_init() without substantially reworking bootstrapping. Fix choose_arena_hard() to look at how many threads are assigned to the candidate choice, rather than checking whether the arena is uninitialized. This bug potentially caused more arenas to be initialized than necessary. --- README | 10 +- configure.ac | 11 ++ include/jemalloc/internal/base.h | 1 + .../jemalloc/internal/jemalloc_internal.h.in | 3 + include/jemalloc/internal/mutex.h | 5 +- include/jemalloc/internal/tsd.h | 2 +- include/jemalloc/internal/util.h | 1 + include/jemalloc/jemalloc_defs.h.in | 14 ++ src/base.c | 11 ++ src/ctl.c | 8 +- src/jemalloc.c | 64 ++++++--- src/mutex.c | 18 ++- src/util.c | 136 ++++++++++++++++-- 13 files changed, 237 insertions(+), 47 deletions(-) diff --git a/README b/README index 4d7b552b..a7864f33 100644 --- a/README +++ b/README @@ -1,10 +1,10 @@ jemalloc is a general-purpose scalable concurrent malloc(3) implementation. This distribution is a stand-alone "portable" implementation that currently -targets Linux and Apple OS X. jemalloc is included as the default allocator in -the FreeBSD and NetBSD operating systems, and it is used by the Mozilla Firefox -web browser on Microsoft Windows-related platforms. Depending on your needs, -one of the other divergent versions may suit your needs better than this -distribution. +targets FreeBSD, Linux and Apple OS X. jemalloc is included as the default +allocator in the FreeBSD and NetBSD operating systems, and it is used by the +Mozilla Firefox web browser on Microsoft Windows-related platforms. Depending +on your needs, one of the other divergent versions may suit your needs better +than this distribution. The COPYING file contains copyright and licensing information. diff --git a/configure.ac b/configure.ac index 7e4f2211..478ae9d4 100644 --- a/configure.ac +++ b/configure.ac @@ -777,6 +777,17 @@ if test "x$have__malloc_thread_cleanup" = "x1" ; then force_tls="1" fi +dnl Check whether the BSD-specific _pthread_mutex_init_calloc_cb() exists. If +dnl so, mutex initialization causes allocation, and we need to implement this +dnl callback function in order to prevent recursive allocation. +AC_CHECK_FUNC([_pthread_mutex_init_calloc_cb], + [have__pthread_mutex_init_calloc_cb="1"], + [have__pthread_mutex_init_calloc_cb="0"] + ) +if test "x$have__pthread_mutex_init_calloc_cb" = "x1" ; then + AC_DEFINE([JEMALLOC_MUTEX_INIT_CB]) +fi + dnl Disable lazy locking by default. AC_ARG_ENABLE([lazy_lock], [AS_HELP_STRING([--enable-lazy-lock], diff --git a/include/jemalloc/internal/base.h b/include/jemalloc/internal/base.h index 796a2835..9cf75ffb 100644 --- a/include/jemalloc/internal/base.h +++ b/include/jemalloc/internal/base.h @@ -10,6 +10,7 @@ #ifdef JEMALLOC_H_EXTERNS void *base_alloc(size_t size); +void *base_calloc(size_t number, size_t size); extent_node_t *base_node_alloc(void); void base_node_dealloc(extent_node_t *node); bool base_boot(void); diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index e0558140..4f557794 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -370,6 +371,8 @@ extern malloc_mutex_t arenas_lock; /* Protects arenas initialization. */ extern arena_t **arenas; extern unsigned narenas; +extern bool malloc_initialized; + arena_t *arenas_extend(unsigned ind); void arenas_cleanup(void *arg); arena_t *choose_arena_hard(void); diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h index 10637e92..98f2cba5 100644 --- a/include/jemalloc/internal/mutex.h +++ b/include/jemalloc/internal/mutex.h @@ -6,9 +6,12 @@ typedef OSSpinLock malloc_mutex_t; #define MALLOC_MUTEX_INITIALIZER 0 #else typedef pthread_mutex_t malloc_mutex_t; -# ifdef PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP +# if (defined(PTHREAD_MUTEX_ADAPTIVE_NP) && \ + defined(PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP)) +# define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_ADAPTIVE_NP # define MALLOC_MUTEX_INITIALIZER PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP # else +# define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_DEFAULT # define MALLOC_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER # endif #endif diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h index 5a174acd..0e32c612 100644 --- a/include/jemalloc/internal/tsd.h +++ b/include/jemalloc/internal/tsd.h @@ -71,7 +71,7 @@ a_name##_tsd_set(a_type *val); #ifdef JEMALLOC_MALLOC_THREAD_CLEANUP #define malloc_tsd_externs(a_name, a_type) \ extern __thread a_type a_name##_tls; \ -extern __thread bool *a_name##_initialized; \ +extern __thread bool a_name##_initialized; \ extern bool a_name##_booted; #elif (defined(JEMALLOC_TLS)) #define malloc_tsd_externs(a_name, a_type) \ diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h index 5156399f..3d3ea3ab 100644 --- a/include/jemalloc/internal/util.h +++ b/include/jemalloc/internal/util.h @@ -85,6 +85,7 @@ extern void (*je_malloc_message)(void *wcbopaque, const char *s); int buferror(int errnum, char *buf, size_t buflen); +uintmax_t malloc_strtoumax(const char *nptr, char **endptr, int base); /* * malloc_vsnprintf() supports a subset of snprintf(3) that avoids floating diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index 838f5615..f150413e 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -68,6 +68,20 @@ */ #undef JEMALLOC_MALLOC_THREAD_CLEANUP +/* + * Defined if threaded initialization is known to be safe on this platform. + * Among other things, it must be possible to initialize a mutex without + * triggering allocation in order for threaded allocation to be safe. + */ +#undef JEMALLOC_THREADED_INIT + +/* + * Defined if the pthreads implementation defines + * _pthread_mutex_init_calloc_cb(), in which case the function is used in order + * to avoid recursive allocation during mutex initialization. + */ +#undef JEMALLOC_MUTEX_INIT_CB + /* Defined if __attribute__((...)) syntax is supported. */ #undef JEMALLOC_HAVE_ATTR #ifdef JEMALLOC_HAVE_ATTR diff --git a/src/base.c b/src/base.c index eb68334b..696c362a 100644 --- a/src/base.c +++ b/src/base.c @@ -66,6 +66,17 @@ base_alloc(size_t size) return (ret); } +void * +base_calloc(size_t number, size_t size) +{ + void *ret = base_alloc(number * size); + + if (ret != NULL) + memset(ret, 0, number * size); + + return (ret); +} + extent_node_t * base_node_alloc(void) { diff --git a/src/ctl.c b/src/ctl.c index e17e5034..943c2925 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -615,19 +615,19 @@ ctl_lookup(const char *name, ctl_node_t const **nodesp, size_t *mibp, goto RETURN; } } else { - unsigned long index; + uintmax_t index; const ctl_node_t *inode; /* Children are indexed. */ - index = strtoul(elm, NULL, 10); - if (index == ULONG_MAX) { + index = malloc_strtoumax(elm, NULL, 10); + if (index == UINTMAX_MAX || index > SIZE_T_MAX) { ret = ENOENT; goto RETURN; } inode = &node->u.named.children[0]; node = inode->u.indexed.index(mibp, *depthp, - index); + (size_t)index); if (node == NULL) { ret = ENOENT; goto RETURN; diff --git a/src/jemalloc.c b/src/jemalloc.c index b3e898c1..3e168fd0 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -38,10 +38,18 @@ arena_t **arenas; unsigned narenas; /* Set to true once the allocator has been initialized. */ -static bool malloc_initialized = false; +bool malloc_initialized = false; +#ifdef JEMALLOC_THREADED_INIT /* Used to let the initializing thread recursively allocate. */ static pthread_t malloc_initializer = (unsigned long)0; +# define INITIALIZER pthread_self() +# define IS_INITIALIZER (malloc_initializer == pthread_self()) +#else +static bool malloc_initializer = false; +# define INITIALIZER true +# define IS_INITIALIZER malloc_initializer +#endif /* Used to avoid initialization races. */ static malloc_mutex_t init_lock = MALLOC_MUTEX_INITIALIZER; @@ -127,7 +135,7 @@ choose_arena_hard(void) } } - if (arenas[choose] == 0 || first_null == narenas) { + if (arenas[choose]->nthreads == 0 || first_null == narenas) { /* * Use an unloaded arena, or the least loaded arena if * all arenas are already initialized. @@ -413,22 +421,22 @@ malloc_conf_init(void) #define CONF_HANDLE_SIZE_T(o, n, min, max) \ if (sizeof(#n)-1 == klen && strncmp(#n, k, \ klen) == 0) { \ - unsigned long ul; \ + uintmax_t um; \ char *end; \ \ errno = 0; \ - ul = strtoul(v, &end, 0); \ + um = malloc_strtoumax(v, &end, 0); \ if (errno != 0 || (uintptr_t)end - \ (uintptr_t)v != vlen) { \ malloc_conf_error( \ "Invalid conf value", \ k, klen, v, vlen); \ - } else if (ul < min || ul > max) { \ + } else if (um < min || um > max) { \ malloc_conf_error( \ "Out-of-range conf value", \ k, klen, v, vlen); \ } else \ - o = ul; \ + o = um; \ continue; \ } #define CONF_HANDLE_SSIZE_T(o, n, min, max) \ @@ -519,7 +527,7 @@ malloc_init_hard(void) arena_t *init_arenas[1]; malloc_mutex_lock(&init_lock); - if (malloc_initialized || malloc_initializer == pthread_self()) { + if (malloc_initialized || IS_INITIALIZER) { /* * Another thread initialized the allocator before this one * acquired init_lock, or this thread is the initializing @@ -528,7 +536,8 @@ malloc_init_hard(void) malloc_mutex_unlock(&init_lock); return (false); } - if (malloc_initializer != (unsigned long)0) { +#ifdef JEMALLOC_THREADED_INIT + if (IS_INITIALIZER == false) { /* Busy-wait until the initializing thread completes. */ do { malloc_mutex_unlock(&init_lock); @@ -538,6 +547,8 @@ malloc_init_hard(void) malloc_mutex_unlock(&init_lock); return (false); } +#endif + malloc_initializer = INITIALIZER; #ifdef DYNAMIC_PAGE_SHIFT /* Get page size. */ @@ -564,6 +575,7 @@ malloc_init_hard(void) malloc_conf_init(); +#ifndef JEMALLOC_MUTEX_INIT_CB /* Register fork handlers. */ if (pthread_atfork(jemalloc_prefork, jemalloc_postfork_parent, jemalloc_postfork_child) != 0) { @@ -571,11 +583,7 @@ malloc_init_hard(void) if (opt_abort) abort(); } - - if (ctl_boot()) { - malloc_mutex_unlock(&init_lock); - return (true); - } +#endif if (opt_stats_print) { /* Print statistics at exit. */ @@ -596,6 +604,11 @@ malloc_init_hard(void) return (true); } + if (ctl_boot()) { + malloc_mutex_unlock(&init_lock); + return (true); + } + if (config_prof) prof_boot1(); @@ -654,7 +667,6 @@ malloc_init_hard(void) } /* Get number of CPUs. */ - malloc_initializer = pthread_self(); malloc_mutex_unlock(&init_lock); ncpus = malloc_ncpus(); malloc_mutex_lock(&init_lock); @@ -1018,8 +1030,7 @@ je_realloc(void *ptr, size_t size) } if (ptr != NULL) { - assert(malloc_initialized || malloc_initializer == - pthread_self()); + assert(malloc_initialized || IS_INITIALIZER); if (config_prof || config_stats) old_size = isalloc(ptr); @@ -1124,8 +1135,7 @@ je_free(void *ptr) if (ptr != NULL) { size_t usize; - assert(malloc_initialized || malloc_initializer == - pthread_self()); + assert(malloc_initialized || IS_INITIALIZER); if (config_prof && opt_prof) { usize = isalloc(ptr); @@ -1208,7 +1218,7 @@ je_malloc_usable_size(const void *ptr) { size_t ret; - assert(malloc_initialized || malloc_initializer == pthread_self()); + assert(malloc_initialized || IS_INITIALIZER); if (config_ivsalloc) ret = ivsalloc(ptr); @@ -1372,7 +1382,7 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) assert(*ptr != NULL); assert(size != 0); assert(SIZE_T_MAX - size >= extra); - assert(malloc_initialized || malloc_initializer == pthread_self()); + assert(malloc_initialized || IS_INITIALIZER); p = *ptr; if (config_prof && opt_prof) { @@ -1457,7 +1467,7 @@ je_sallocm(const void *ptr, size_t *rsize, int flags) { size_t sz; - assert(malloc_initialized || malloc_initializer == pthread_self()); + assert(malloc_initialized || IS_INITIALIZER); if (config_ivsalloc) sz = ivsalloc(ptr); @@ -1479,7 +1489,7 @@ je_dallocm(void *ptr, int flags) size_t usize; assert(ptr != NULL); - assert(malloc_initialized || malloc_initializer == pthread_self()); + assert(malloc_initialized || IS_INITIALIZER); if (config_stats) usize = isalloc(ptr); @@ -1528,8 +1538,13 @@ je_nallocm(size_t *rsize, size_t size, int flags) * malloc during fork(). */ +#ifndef JEMALLOC_MUTEX_INIT_CB void jemalloc_prefork(void) +#else +void +_malloc_prefork(void) +#endif { unsigned i; @@ -1544,8 +1559,13 @@ jemalloc_prefork(void) chunk_dss_prefork(); } +#ifndef JEMALLOC_MUTEX_INIT_CB void jemalloc_postfork_parent(void) +#else +void +_malloc_postfork(void) +#endif { unsigned i; diff --git a/src/mutex.c b/src/mutex.c index 07d2a033..0b20bbf3 100644 --- a/src/mutex.c +++ b/src/mutex.c @@ -56,21 +56,25 @@ pthread_create(pthread_t *__restrict thread, /******************************************************************************/ +#ifdef JEMALLOC_MUTEX_INIT_CB +int _pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex, + void *(calloc_cb)(size_t, size_t)); +#endif + bool malloc_mutex_init(malloc_mutex_t *mutex) { #ifdef JEMALLOC_OSSPIN *mutex = 0; +#elif (defined(JEMALLOC_MUTEX_INIT_CB)) + if (_pthread_mutex_init_calloc_cb(mutex, base_calloc) != 0) + return (true); #else pthread_mutexattr_t attr; if (pthread_mutexattr_init(&attr) != 0) return (true); -#ifdef PTHREAD_MUTEX_ADAPTIVE_NP - pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ADAPTIVE_NP); -#else - pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_DEFAULT); -#endif + pthread_mutexattr_settype(&attr, MALLOC_MUTEX_TYPE); if (pthread_mutex_init(mutex, &attr) != 0) { pthread_mutexattr_destroy(&attr); return (true); @@ -99,10 +103,14 @@ void malloc_mutex_postfork_child(malloc_mutex_t *mutex) { +#ifdef JEMALLOC_MUTEX_INIT_CB + malloc_mutex_unlock(mutex); +#else if (malloc_mutex_init(mutex)) { malloc_printf(": Error re-initializing mutex in " "child\n"); if (opt_abort) abort(); } +#endif } diff --git a/src/util.c b/src/util.c index 698b53a9..090e1f06 100644 --- a/src/util.c +++ b/src/util.c @@ -44,7 +44,7 @@ JEMALLOC_CATTR(visibility("hidden"), static) void wrtmessage(void *cbopaque, const char *s) { - UNUSED int result = write(STDERR_FILENO, s, strlen(s)); + UNUSED int result = syscall(SYS_write, STDERR_FILENO, s, strlen(s)); } void (*je_malloc_message)(void *, const char *s) @@ -69,6 +69,123 @@ buferror(int errnum, char *buf, size_t buflen) #endif } +uintmax_t +malloc_strtoumax(const char *nptr, char **endptr, int base) +{ + uintmax_t ret, digit; + int b; + bool neg; + const char *p, *ns; + + if (base < 0 || base == 1 || base > 36) { + errno = EINVAL; + return (UINTMAX_MAX); + } + b = base; + + /* Swallow leading whitespace and get sign, if any. */ + neg = false; + p = nptr; + while (true) { + switch (*p) { + case '\t': case '\n': case '\v': case '\f': case '\r': case ' ': + p++; + break; + case '-': + neg = true; + /* Fall through. */ + case '+': + p++; + /* Fall through. */ + default: + goto PREFIX; + } + } + + /* Get prefix, if any. */ + PREFIX: + /* + * Note where the first non-whitespace/sign character is so that it is + * possible to tell whether any digits are consumed (e.g., " 0" vs. + * " -x"). + */ + ns = p; + if (*p == '0') { + switch (p[1]) { + case '0': case '1': case '2': case '3': case '4': case '5': + case '6': case '7': + if (b == 0) + b = 8; + if (b == 8) + p++; + break; + case 'x': + switch (p[2]) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': + if (b == 0) + b = 16; + if (b == 16) + p += 2; + break; + default: + break; + } + break; + default: + break; + } + } + if (b == 0) + b = 10; + + /* Convert. */ + ret = 0; + while ((*p >= '0' && *p <= '9' && (digit = *p - '0') < b) + || (*p >= 'A' && *p <= 'Z' && (digit = 10 + *p - 'A') < b) + || (*p >= 'a' && *p <= 'z' && (digit = 10 + *p - 'a') < b)) { + uintmax_t pret = ret; + ret *= b; + ret += digit; + if (ret < pret) { + /* Overflow. */ + errno = ERANGE; + return (UINTMAX_MAX); + } + p++; + } + if (neg) + ret = -ret; + + if (endptr != NULL) { + if (p == ns) { + /* No characters were converted. */ + *endptr = (char *)nptr; + } else + *endptr = (char *)p; + } + + if (config_debug && malloc_initialized) { + uintmax_t tret; + int perrno; + char *pend; + + perrno = errno; + if (endptr != NULL) + pend = *endptr; + tret = strtoumax(nptr, endptr, base); + assert(tret == ret); + assert(errno == perrno); + assert(endptr == NULL || *endptr == pend); + } + + return (ret); +} + static char * u2s(uintmax_t x, unsigned base, bool uppercase, char *s, size_t *slen_p) { @@ -220,7 +337,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) val = va_arg(ap, ptrdiff_t); \ break; \ case 'z': \ - val = va_arg(ap, size_t); \ + val = va_arg(ap, ssize_t); \ break; \ case 'p': /* Synthetic; used for %p. */ \ val = va_arg(ap, uintptr_t); \ @@ -289,10 +406,11 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { - unsigned long uwidth; + uintmax_t uwidth; errno = 0; - uwidth = strtoul(f, (char **)&f, 10); - assert(uwidth != ULONG_MAX || errno != ERANGE); + uwidth = malloc_strtoumax(f, (char **)&f, 10); + assert(uwidth != UINTMAX_MAX || errno != + ERANGE); width = (int)uwidth; if (*f == '.') { f++; @@ -314,10 +432,10 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { - unsigned long uprec; + uintmax_t uprec; errno = 0; - uprec = strtoul(f, (char **)&f, 10); - assert(uprec != ULONG_MAX || errno != ERANGE); + uprec = malloc_strtoumax(f, (char **)&f, 10); + assert(uprec != UINTMAX_MAX || errno != ERANGE); prec = (int)uprec; break; } @@ -435,7 +553,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) str[size - 1] = '\0'; ret = i; - if (config_debug) { + if (config_debug && malloc_initialized) { char buf[MALLOC_PRINTF_BUFSIZE]; int tret; From 1e6138c88c7f3bb1f0e8fb785080ac5abc24210c Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 24 Mar 2012 19:36:27 -0700 Subject: [PATCH 0103/3378] Remove malloc_mutex_trylock(). Remove malloc_mutex_trylock(); it has not been used for quite some time. --- include/jemalloc/internal/mutex.h | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h index 98f2cba5..ad4f9c24 100644 --- a/include/jemalloc/internal/mutex.h +++ b/include/jemalloc/internal/mutex.h @@ -41,7 +41,6 @@ void malloc_mutex_postfork_child(malloc_mutex_t *mutex); #ifndef JEMALLOC_ENABLE_INLINE void malloc_mutex_lock(malloc_mutex_t *mutex); -bool malloc_mutex_trylock(malloc_mutex_t *mutex); void malloc_mutex_unlock(malloc_mutex_t *mutex); #endif @@ -59,20 +58,6 @@ malloc_mutex_lock(malloc_mutex_t *mutex) } } -JEMALLOC_INLINE bool -malloc_mutex_trylock(malloc_mutex_t *mutex) -{ - - if (isthreaded) { -#ifdef JEMALLOC_OSSPIN - return (OSSpinLockTry(mutex) == false); -#else - return (pthread_mutex_trylock(mutex) != 0); -#endif - } else - return (false); -} - JEMALLOC_INLINE void malloc_mutex_unlock(malloc_mutex_t *mutex) { From c1e567bda042d94159026b96e7a77683606037fa Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Mon, 26 Mar 2012 17:03:41 +0200 Subject: [PATCH 0104/3378] Use __sync_add_and_fetch and __sync_sub_and_fetch when they are available These functions may be available as inlines or as libgcc functions. In the former case, a __GCC_HAVE_SYNC_COMPARE_AND_SWAP_n macro is defined. But we still want to use these functions in the latter case, when we don't have our own implementation. --- configure.ac | 34 +++++++++++++++++++++++++++++ include/jemalloc/internal/atomic.h | 18 ++++++++++++--- include/jemalloc/jemalloc_defs.h.in | 16 ++++++++++++++ 3 files changed, 65 insertions(+), 3 deletions(-) diff --git a/configure.ac b/configure.ac index 478ae9d4..5999a33a 100644 --- a/configure.ac +++ b/configure.ac @@ -886,6 +886,40 @@ if test "x${je_cv_osatomic}" = "xyes" ; then AC_DEFINE([JEMALLOC_OSATOMIC], [ ]) fi +dnl ============================================================================ +dnl Check whether __sync_{add,sub}_and_fetch() are available despite +dnl __GCC_HAVE_SYNC_COMPARE_AND_SWAP_n macros being undefined. + +AC_DEFUN([JE_SYNC_COMPARE_AND_SWAP_CHECK],[ + AC_CACHE_CHECK([whether to force $1-bit __sync_{add,sub}_and_fetch()], + [je_cv_sync_compare_and_swap_$2], + [AC_COMPILE_IFELSE([AC_LANG_PROGRAM([ + #include + ], + [ + #ifndef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_$2 + { + uint$1_t x$1 = 0; + __sync_add_and_fetch(&x$1, 42); + __sync_sub_and_fetch(&x$1, 1); + } + #else + #error __GCC_HAVE_SYNC_COMPARE_AND_SWAP_$2 is defined, no need to force + #endif + ])], + [je_cv_sync_compare_and_swap_$2=yes], + [je_cv_sync_compare_and_swap_$2=no])]) + + if test "x${je_cv_sync_compare_and_swap_$2}" = "xyes" ; then + AC_DEFINE([JE_FORCE_SYNC_COMPARE_AND_SWAP_$2], [ ]) + fi +]) + +if test "x${je_cv_osatomic}" != "xyes" ; then + JE_SYNC_COMPARE_AND_SWAP_CHECK(32, 4) + JE_SYNC_COMPARE_AND_SWAP_CHECK(64, 8) +fi + dnl ============================================================================ dnl Check for spinlock(3) operations as provided on Darwin. diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h index 7a9cb61e..d8f6ca57 100644 --- a/include/jemalloc/internal/atomic.h +++ b/include/jemalloc/internal/atomic.h @@ -87,6 +87,20 @@ atomic_sub_uint64(uint64_t *p, uint64_t x) return (x); } +#elif (defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8)) +JEMALLOC_INLINE uint64_t +atomic_add_uint64(uint64_t *p, uint64_t x) +{ + + return (__sync_add_and_fetch(p, x)); +} + +JEMALLOC_INLINE uint64_t +atomic_sub_uint64(uint64_t *p, uint64_t x) +{ + + return (__sync_sub_and_fetch(p, x)); +} #else # if (LG_SIZEOF_PTR == 3) # error "Missing implementation for 64-bit atomic operations" @@ -150,9 +164,7 @@ atomic_sub_uint32(uint32_t *p, uint32_t x) return (x); } -#elif (defined __SH4__ || defined __mips__) && (__GNUC__ > 4 || \ - (__GNUC__ == 4 && (__GNUC_MINOR__ > 1 || (__GNUC_MINOR__ == 1 && \ - __GNUC_PATCHLEVEL__ > 1)))) +#elif (defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_4)) JEMALLOC_INLINE uint32_t atomic_add_uint32(uint32_t *p, uint32_t x) { diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index f150413e..e4bfa04a 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -53,6 +53,22 @@ */ #undef JEMALLOC_OSATOMIC +/* + * Defined if __sync_add_and_fetch(uint32_t *, uint32_t) and + * __sync_sub_and_fetch(uint32_t *, uint32_t) are available, despite + * __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 not being defined (which means the + * functions are defined in libgcc instead of being inlines) + */ +#undef JE_FORCE_SYNC_COMPARE_AND_SWAP_4 + +/* + * Defined if __sync_add_and_fetch(uint64_t *, uint64_t) and + * __sync_sub_and_fetch(uint64_t *, uint64_t) are available, despite + * __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 not being defined (which means the + * functions are defined in libgcc instead of being inlines) + */ +#undef JE_FORCE_SYNC_COMPARE_AND_SWAP_8 + /* * Defined if OSSpin*() functions are available, as provided by Darwin, and * documented in the spinlock(3) manual page. From 5c89c50d1803dc0fb6544c1abd40552e76c8614d Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Mon, 26 Mar 2012 17:46:57 +0200 Subject: [PATCH 0105/3378] Fix glibc hooks when using both --with-jemalloc-prefix and --with-mangling --- src/jemalloc.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/jemalloc.c b/src/jemalloc.c index 3e168fd0..d08e103b 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1181,7 +1181,15 @@ je_valloc(size_t size) } #endif -#if (!defined(JEMALLOC_PREFIX) && defined(__GLIBC__) && !defined(__UCLIBC__)) +/* + * is_malloc(je_malloc) is some macro magic to detect if jemalloc_defs.h has + * #define je_malloc malloc + */ +#define malloc_is_malloc 1 +#define is_malloc_(a) malloc_is_ ## a +#define is_malloc(a) is_malloc_(a) + +#if ((is_malloc(je_malloc) == 1) && defined(__GLIBC__) && !defined(__UCLIBC__)) /* * glibc provides the RTLD_DEEPBIND flag for dlopen which can make it possible * to inconsistently reference libc's malloc(3)-compatible functions From 5b3db098f73f467a03f87a2242c692268f796a56 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Mon, 26 Mar 2012 18:39:35 +0200 Subject: [PATCH 0106/3378] Make zone_{free, realloc, free_definite_size} fallback to the system allocator if they are called with a pointer that jemalloc didn't allocate It turns out some OSX system libraries (like CoreGraphics on 10.6) like to call malloc_zone_* functions, but giving them pointers that weren't allocated with the zone they are using. Possibly, they do malloc_zone_malloc(malloc_default_zone()) before we register the jemalloc zone, and malloc_zone_realloc(malloc_default_zone()) after. malloc_default_zone() returning a different value in both cases. --- src/zone.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/src/zone.c b/src/zone.c index a8f09c98..d3107f85 100644 --- a/src/zone.c +++ b/src/zone.c @@ -80,14 +80,22 @@ static void zone_free(malloc_zone_t *zone, void *ptr) { - je_free(ptr); + if (ivsalloc(ptr) != 0) { + je_free(ptr); + return; + } + + free(ptr); } static void * zone_realloc(malloc_zone_t *zone, void *ptr, size_t size) { - return (je_realloc(ptr, size)); + if (ivsalloc(ptr) != 0) + return (je_realloc(ptr, size)); + + return (realloc(ptr, size)); } #if (JEMALLOC_ZONE_VERSION >= 5) @@ -107,8 +115,13 @@ static void zone_free_definite_size(malloc_zone_t *zone, void *ptr, size_t size) { - assert(ivsalloc(ptr) == size); - je_free(ptr); + if (ivsalloc(ptr) != 0) { + assert(ivsalloc(ptr) == size); + je_free(ptr); + return; + } + + free(ptr); } #endif From 2465bdf4937ffba309e7289014443c6b51566f22 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 26 Mar 2012 13:13:55 -0700 Subject: [PATCH 0107/3378] Check for NULL ptr in malloc_usable_size(). Check for NULL ptr in malloc_usable_size(), rather than just asserting that ptr is non-NULL. This matches behavior of other implementations (e.g., glibc and tcmalloc). --- src/jemalloc.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/jemalloc.c b/src/jemalloc.c index d08e103b..ee771c78 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1230,10 +1230,8 @@ je_malloc_usable_size(const void *ptr) if (config_ivsalloc) ret = ivsalloc(ptr); - else { - assert(ptr != NULL); - ret = isalloc(ptr); - } + else + ret = (ptr != NULL) ? isalloc(ptr) : 0; return (ret); } From fd4fcefa004e04ea8672b11e280a6ced16c38dd2 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 23 Mar 2012 17:40:58 -0700 Subject: [PATCH 0108/3378] Force the lazy-lock feature on FreeBSD. Force the lazy-lock feature on FreeBSD in order to avoid pthread_self(), because it causes allocation. (This change was mistakenly omitted from 41b6afb834b1f5250223678c52bd4f013d4234f6.) --- configure.ac | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/configure.ac b/configure.ac index 5999a33a..2616b0a6 100644 --- a/configure.ac +++ b/configure.ac @@ -221,6 +221,7 @@ case "${host}" in abi="elf" AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ]) RPATH="-Wl,-rpath," + force_lazy_lock="1" ;; *-*-linux*) CFLAGS="$CFLAGS" @@ -800,6 +801,10 @@ fi ], [enable_lazy_lock="0"] ) +if test "x$enable_lazy_lock" = "x0" -a "x${force_lazy_lock}" = "x1" ; then + AC_MSG_RESULT([Forcing lazy-lock to avoid allocator/threading bootstrap issues]) + enable_lazy_lock="1" +fi if test "x$enable_lazy_lock" = "x1" ; then AC_CHECK_HEADERS([dlfcn.h], , [AC_MSG_ERROR([dlfcn.h is missing])]) AC_CHECK_FUNC([dlsym], [], From d4be8b7b6ee2e21d079180455d4ccbf45cc1cee7 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 26 Mar 2012 18:54:44 -0700 Subject: [PATCH 0109/3378] Add the "thread.tcache.enabled" mallctl. --- Makefile.in | 3 +- doc/jemalloc.xml.in | 14 ++++ include/jemalloc/internal/tcache.h | 116 +++++++++++++++++++++++++---- src/ctl.c | 36 +++++++-- src/tcache.c | 27 ++++--- test/thread_tcache_enabled.c | 109 +++++++++++++++++++++++++++ test/thread_tcache_enabled.exp | 2 + 7 files changed, 271 insertions(+), 36 deletions(-) create mode 100644 test/thread_tcache_enabled.c create mode 100644 test/thread_tcache_enabled.exp diff --git a/Makefile.in b/Makefile.in index 494ac9a6..821c0634 100644 --- a/Makefile.in +++ b/Makefile.in @@ -66,7 +66,8 @@ DOCS_MAN3 := $(DOCS_XML:@objroot@%.xml=@srcroot@%.3) DOCS := $(DOCS_HTML) $(DOCS_MAN3) CTESTS := @srcroot@test/aligned_alloc.c @srcroot@test/allocated.c \ @srcroot@test/bitmap.c @srcroot@test/mremap.c \ - @srcroot@test/posix_memalign.c @srcroot@test/thread_arena.c + @srcroot@test/posix_memalign.c @srcroot@test/thread_arena.c \ + @srcroot@test/thread_tcache_enabled.c ifeq (@enable_experimental@, 1) CTESTS += @srcroot@test/allocm.c @srcroot@test/rallocm.c endif diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 3cbc851f..0b468b04 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -1103,6 +1103,20 @@ malloc_conf = "xmalloc:true";]]> mallctl* calls. + + + thread.tcache.enabled + (bool) + rw + [] + + Enable/disable calling thread's tcache. The tcache is + implicitly flushed as a side effect of becoming + disabled (see thread.tcache.flush). + + + thread.tcache.flush diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index 30e63a50..0d999f24 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -5,6 +5,16 @@ typedef struct tcache_bin_info_s tcache_bin_info_t; typedef struct tcache_bin_s tcache_bin_t; typedef struct tcache_s tcache_t; +/* + * tcache pointers close to NULL are used to encode state information that is + * used for two purposes: preventing thread caching on a per thread basis and + * cleaning up during thread shutdown. + */ +#define TCACHE_STATE_DISABLED ((tcache_t *)(uintptr_t)1) +#define TCACHE_STATE_REINCARNATED ((tcache_t *)(uintptr_t)2) +#define TCACHE_STATE_PURGATORY ((tcache_t *)(uintptr_t)3) +#define TCACHE_STATE_MAX TCACHE_STATE_PURGATORY + /* * Absolute maximum number of cache slots for each small bin in the thread * cache. This is an additional constraint beyond that imposed as: twice the @@ -35,6 +45,12 @@ typedef struct tcache_s tcache_t; /******************************************************************************/ #ifdef JEMALLOC_H_STRUCTS +typedef enum { + tcache_enabled_false = 0, /* Enable cast to/from bool. */ + tcache_enabled_true = 1, + tcache_enabled_default = 2 +} tcache_enabled_t; + /* * Read-only information associated with each element of tcache_t's tbins array * is stored separately, mainly to reduce memory usage. @@ -105,9 +121,13 @@ bool tcache_boot1(void); #ifndef JEMALLOC_ENABLE_INLINE malloc_tsd_protos(JEMALLOC_ATTR(unused), tcache, tcache_t *) +malloc_tsd_protos(JEMALLOC_ATTR(unused), tcache_enabled, tcache_enabled_t) void tcache_event(tcache_t *tcache); +void tcache_flush(void); +bool tcache_enabled_get(void); tcache_t *tcache_get(void); +void tcache_enabled_set(bool enabled); void *tcache_alloc_easy(tcache_bin_t *tbin); void *tcache_alloc_small(tcache_t *tcache, size_t size, bool zero); void *tcache_alloc_large(tcache_t *tcache, size_t size, bool zero); @@ -120,6 +140,69 @@ void tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size); malloc_tsd_externs(tcache, tcache_t *) malloc_tsd_funcs(JEMALLOC_INLINE, tcache, tcache_t *, NULL, tcache_thread_cleanup) +/* Per thread flag that allows thread caches to be disabled. */ +malloc_tsd_externs(tcache_enabled, tcache_enabled_t) +malloc_tsd_funcs(JEMALLOC_INLINE, tcache_enabled, tcache_enabled_t, + tcache_enabled_default, malloc_tsd_no_cleanup) + +JEMALLOC_INLINE void +tcache_flush(void) +{ + tcache_t *tcache; + + cassert(config_tcache); + + tcache = *tcache_tsd_get(); + if ((uintptr_t)tcache <= (uintptr_t)TCACHE_STATE_MAX) + return; + tcache_destroy(tcache); + tcache = NULL; + tcache_tsd_set(&tcache); +} + +JEMALLOC_INLINE bool +tcache_enabled_get(void) +{ + tcache_enabled_t tcache_enabled; + + cassert(config_tcache); + + tcache_enabled = *tcache_enabled_tsd_get(); + if (tcache_enabled == tcache_enabled_default) { + tcache_enabled = (tcache_enabled_t)opt_tcache; + tcache_enabled_tsd_set(&tcache_enabled); + } + + return ((bool)tcache_enabled); +} + +JEMALLOC_INLINE void +tcache_enabled_set(bool enabled) +{ + tcache_enabled_t tcache_enabled; + tcache_t *tcache; + + cassert(config_tcache); + + tcache_enabled = (tcache_enabled_t)enabled; + tcache_enabled_tsd_set(&tcache_enabled); + tcache = *tcache_tsd_get(); + if (enabled) { + if (tcache == TCACHE_STATE_DISABLED) { + tcache = NULL; + tcache_tsd_set(&tcache); + } + } else /* disabled */ { + if (tcache > TCACHE_STATE_MAX) { + tcache_destroy(tcache); + tcache = NULL; + } + if (tcache == NULL) { + tcache = TCACHE_STATE_DISABLED; + tcache_tsd_set(&tcache); + } + } +} JEMALLOC_INLINE tcache_t * tcache_get(void) @@ -128,29 +211,32 @@ tcache_get(void) if (config_tcache == false) return (NULL); - if (config_lazy_lock && (isthreaded & opt_tcache) == false) - return (NULL); - else if (opt_tcache == false) + if (config_lazy_lock && isthreaded == false) return (NULL); tcache = *tcache_tsd_get(); - if ((uintptr_t)tcache <= (uintptr_t)2) { + if ((uintptr_t)tcache <= (uintptr_t)TCACHE_STATE_MAX) { + if (tcache == TCACHE_STATE_DISABLED) + return (NULL); if (tcache == NULL) { - tcache = tcache_create(choose_arena()); - if (tcache == NULL) + if (tcache_enabled_get() == false) { + tcache_enabled_set(false); /* Memoize. */ return (NULL); - } else { - if (tcache == (void *)(uintptr_t)1) { - /* - * Make a note that an allocator function was - * called after the tcache_thread_cleanup() was - * called. - */ - tcache = (tcache_t *)(uintptr_t)2; - tcache_tsd_set(&tcache); } + return (tcache_create(choose_arena())); + } + if (tcache == TCACHE_STATE_PURGATORY) { + /* + * Make a note that an allocator function was called + * after tcache_thread_cleanup() was called. + */ + tcache = TCACHE_STATE_REINCARNATED; + tcache_tsd_set(&tcache); return (NULL); } + if (tcache == TCACHE_STATE_REINCARNATED) + return (NULL); + not_reached(); } return (tcache); diff --git a/src/ctl.c b/src/ctl.c index 943c2925..08011616 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -39,6 +39,7 @@ static int ctl_lookup(const char *name, ctl_node_t const **nodesp, CTL_PROTO(version) CTL_PROTO(epoch) +CTL_PROTO(thread_tcache_enabled) CTL_PROTO(thread_tcache_flush) CTL_PROTO(thread_arena) CTL_PROTO(thread_allocated) @@ -151,6 +152,7 @@ CTL_PROTO(stats_mapped) #define INDEX(i) false, {.indexed = {i##_index}}, NULL static const ctl_node_t tcache_node[] = { + {NAME("enabled"), CTL(thread_tcache_enabled)}, {NAME("flush"), CTL(thread_tcache_flush)} }; @@ -966,25 +968,43 @@ RETURN: return (ret); } +static int +thread_tcache_enabled_ctl(const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) +{ + int ret; + bool oldval; + + if (config_tcache == false) + return (ENOENT); + + oldval = tcache_enabled_get(); + if (newp != NULL) { + if (newlen != sizeof(bool)) { + ret = EINVAL; + goto RETURN; + } + tcache_enabled_set(*(bool *)newp); + } + READ(oldval, bool); + +RETURN: + ret = 0; + return (ret); +} + static int thread_tcache_flush_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { int ret; - tcache_t *tcache; if (config_tcache == false) return (ENOENT); VOID(); - if ((tcache = *tcache_tsd_get()) == NULL) { - ret = 0; - goto RETURN; - } - tcache_destroy(tcache); - tcache = NULL; - tcache_tsd_set(&tcache); + tcache_flush(); ret = 0; RETURN: diff --git a/src/tcache.c b/src/tcache.c index 3442406d..bc911a6e 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -5,6 +5,7 @@ /* Data. */ malloc_tsd_data(, tcache, tcache_t *, NULL) +malloc_tsd_data(, tcache_enabled, tcache_enabled_t, tcache_enabled_default) bool opt_tcache = true; ssize_t opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT; @@ -328,25 +329,27 @@ tcache_thread_cleanup(void *arg) { tcache_t *tcache = *(tcache_t **)arg; - if (tcache == (void *)(uintptr_t)1) { + if (tcache == TCACHE_STATE_DISABLED) { + /* Do nothing. */ + } else if (tcache == TCACHE_STATE_REINCARNATED) { + /* + * Another destructor called an allocator function after this + * destructor was called. Reset tcache to 1 in order to + * receive another callback. + */ + tcache = TCACHE_STATE_PURGATORY; + tcache_tsd_set(&tcache); + } else if (tcache == TCACHE_STATE_PURGATORY) { /* * The previous time this destructor was called, we set the key * to 1 so that other destructors wouldn't cause re-creation of * the tcache. This time, do nothing, so that the destructor * will not be called again. */ - } else if (tcache == (void *)(uintptr_t)2) { - /* - * Another destructor called an allocator function after this - * destructor was called. Reset tcache to 1 in order to - * receive another callback. - */ - tcache = (tcache_t *)(uintptr_t)1; - tcache_tsd_set(&tcache); } else if (tcache != NULL) { - assert(tcache != (void *)(uintptr_t)1); + assert(tcache != TCACHE_STATE_PURGATORY); tcache_destroy(tcache); - tcache = (tcache_t *)(uintptr_t)1; + tcache = TCACHE_STATE_PURGATORY; tcache_tsd_set(&tcache); } } @@ -428,7 +431,7 @@ tcache_boot1(void) { if (opt_tcache) { - if (tcache_tsd_boot()) + if (tcache_tsd_boot() || tcache_enabled_tsd_boot()) return (true); } diff --git a/test/thread_tcache_enabled.c b/test/thread_tcache_enabled.c new file mode 100644 index 00000000..46540385 --- /dev/null +++ b/test/thread_tcache_enabled.c @@ -0,0 +1,109 @@ +#include +#include +#include +#include +#include +#include + +#define JEMALLOC_MANGLE +#include "jemalloc_test.h" + +void * +thread_start(void *arg) +{ + int err; + size_t sz; + bool e0, e1; + + sz = sizeof(bool); + if ((err = mallctl("thread.tcache.enabled", &e0, &sz, NULL, 0))) { + if (err == ENOENT) { +#ifdef JEMALLOC_TCACHE + assert(false); +#endif + } + goto RETURN; + } + + if (e0) { + e1 = false; + assert(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz) + == 0); + assert(e0); + } + + e1 = true; + assert(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz) == 0); + assert(e0 == false); + + e1 = true; + assert(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz) == 0); + assert(e0); + + e1 = false; + assert(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz) == 0); + assert(e0); + + e1 = false; + assert(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz) == 0); + assert(e0 == false); + + free(malloc(1)); + e1 = true; + assert(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz) == 0); + assert(e0 == false); + + free(malloc(1)); + e1 = true; + assert(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz) == 0); + assert(e0); + + free(malloc(1)); + e1 = false; + assert(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz) == 0); + assert(e0); + + free(malloc(1)); + e1 = false; + assert(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz) == 0); + assert(e0 == false); + + free(malloc(1)); +RETURN: + return (NULL); +} + +int +main(void) +{ + int ret = 0; + pthread_t thread; + + fprintf(stderr, "Test begin\n"); + + thread_start(NULL); + + if (pthread_create(&thread, NULL, thread_start, NULL) + != 0) { + fprintf(stderr, "%s(): Error in pthread_create()\n", __func__); + ret = 1; + goto RETURN; + } + pthread_join(thread, (void *)&ret); + + thread_start(NULL); + + if (pthread_create(&thread, NULL, thread_start, NULL) + != 0) { + fprintf(stderr, "%s(): Error in pthread_create()\n", __func__); + ret = 1; + goto RETURN; + } + pthread_join(thread, (void *)&ret); + + thread_start(NULL); + +RETURN: + fprintf(stderr, "Test end\n"); + return (ret); +} diff --git a/test/thread_tcache_enabled.exp b/test/thread_tcache_enabled.exp new file mode 100644 index 00000000..369a88dd --- /dev/null +++ b/test/thread_tcache_enabled.exp @@ -0,0 +1,2 @@ +Test begin +Test end From e77fa59ece7e23de586f08980f627b8102511755 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Wed, 28 Mar 2012 09:53:16 +0200 Subject: [PATCH 0110/3378] Don't use pthread_atfork to register prefork/postfork handlers on OSX OSX libc calls zone allocators' force_lock/force_unlock already. --- src/jemalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/jemalloc.c b/src/jemalloc.c index ee771c78..9eae1372 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -575,7 +575,7 @@ malloc_init_hard(void) malloc_conf_init(); -#ifndef JEMALLOC_MUTEX_INIT_CB +#if (!defined(JEMALLOC_MUTEX_INIT_CB) && !defined(JEMALLOC_ZONE)) /* Register fork handlers. */ if (pthread_atfork(jemalloc_prefork, jemalloc_postfork_parent, jemalloc_postfork_child) != 0) { From 1a0e7770243e0539fa8fef7bb1512f784f93389f Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Tue, 27 Mar 2012 14:48:58 +0200 Subject: [PATCH 0111/3378] Add a SYS_write definition on systems where it is not defined in headers Namely, in the Android NDK headers, SYS_write is not defined; but __NR_write is. --- include/jemalloc/internal/jemalloc_internal.h.in | 3 +++ src/util.c | 10 ++++++++++ 2 files changed, 13 insertions(+) diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 4f557794..b7b8df8b 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -1,6 +1,9 @@ #include #include #include +#if !defined(SYS_write) && defined(__NR_write) +#define SYS_write __NR_write +#endif #include #include #include diff --git a/src/util.c b/src/util.c index 090e1f06..895aa198 100644 --- a/src/util.c +++ b/src/util.c @@ -44,7 +44,17 @@ JEMALLOC_CATTR(visibility("hidden"), static) void wrtmessage(void *cbopaque, const char *s) { + +#ifdef SYS_write + /* + * Use syscall(2) rather than write(2) when possible in order to avoid + * the possibility of memory allocation within libc. This is necessary + * on FreeBSD; most operating systems do not have this problem though. + */ UNUSED int result = syscall(SYS_write, STDERR_FILENO, s, strlen(s)); +#else + UNUSED int result = write(STDERR_FILENO, s, strlen(s)); +#endif } void (*je_malloc_message)(void *, const char *s) From 2cfe6d67ef6a622eeb47ba48b431bdafc0c45b35 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Tue, 27 Mar 2012 15:03:07 +0200 Subject: [PATCH 0112/3378] Change AC_COMPILE_IFELSE into AC_LINK_IFELSE for the __sync_{add, sub}_and_fetch() test With the Android NDK, __sync_{add,sub}_and_fetch() compile fine for uint64_t, but the corresponding libgcc function aren't there. --- configure.ac | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/configure.ac b/configure.ac index 2616b0a6..c1b46dc3 100644 --- a/configure.ac +++ b/configure.ac @@ -898,20 +898,20 @@ dnl __GCC_HAVE_SYNC_COMPARE_AND_SWAP_n macros being undefined. AC_DEFUN([JE_SYNC_COMPARE_AND_SWAP_CHECK],[ AC_CACHE_CHECK([whether to force $1-bit __sync_{add,sub}_and_fetch()], [je_cv_sync_compare_and_swap_$2], - [AC_COMPILE_IFELSE([AC_LANG_PROGRAM([ - #include - ], - [ - #ifndef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_$2 - { - uint$1_t x$1 = 0; - __sync_add_and_fetch(&x$1, 42); - __sync_sub_and_fetch(&x$1, 1); - } - #else - #error __GCC_HAVE_SYNC_COMPARE_AND_SWAP_$2 is defined, no need to force - #endif - ])], + [AC_LINK_IFELSE([AC_LANG_PROGRAM([ + #include + ], + [ + #ifndef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_$2 + { + uint$1_t x$1 = 0; + __sync_add_and_fetch(&x$1, 42); + __sync_sub_and_fetch(&x$1, 1); + } + #else + #error __GCC_HAVE_SYNC_COMPARE_AND_SWAP_$2 is defined, no need to force + #endif + ])], [je_cv_sync_compare_and_swap_$2=yes], [je_cv_sync_compare_and_swap_$2=no])]) From 71a93b8725fb52ae393ab88e2fccd5afa84c66a0 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Tue, 27 Mar 2012 14:20:12 +0200 Subject: [PATCH 0113/3378] Move zone registration to zone.c --- include/jemalloc/internal/private_namespace.h | 2 +- include/jemalloc/internal/zone.h | 3 +-- src/jemalloc.c | 24 ++----------------- src/zone.c | 22 ++++++++++++++--- 4 files changed, 23 insertions(+), 28 deletions(-) diff --git a/include/jemalloc/internal/private_namespace.h b/include/jemalloc/internal/private_namespace.h index 7103e680..ed34e328 100644 --- a/include/jemalloc/internal/private_namespace.h +++ b/include/jemalloc/internal/private_namespace.h @@ -82,7 +82,6 @@ #define ckh_string_keycomp JEMALLOC_N(ckh_string_keycomp) #define ckh_try_bucket_insert JEMALLOC_N(ckh_try_bucket_insert) #define ckh_try_insert JEMALLOC_N(ckh_try_insert) -#define create_zone JEMALLOC_N(create_zone) #define ctl_boot JEMALLOC_N(ctl_boot) #define ctl_bymib JEMALLOC_N(ctl_bymib) #define ctl_byname JEMALLOC_N(ctl_byname) @@ -195,6 +194,7 @@ #define prof_tdata_init JEMALLOC_N(prof_tdata_init) #define prof_tdata_tls JEMALLOC_N(prof_tdata_tls) #define pthread_create JEMALLOC_N(pthread_create) +#define register_zone JEMALLOC_N(register_zone) #define rtree_get JEMALLOC_N(rtree_get) #define rtree_get_locked JEMALLOC_N(rtree_get_locked) #define rtree_new JEMALLOC_N(rtree_new) diff --git a/include/jemalloc/internal/zone.h b/include/jemalloc/internal/zone.h index 859b529d..9eb4252f 100644 --- a/include/jemalloc/internal/zone.h +++ b/include/jemalloc/internal/zone.h @@ -12,8 +12,7 @@ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -malloc_zone_t *create_zone(void); -void szone2ozone(malloc_zone_t *zone); +void register_zone(void); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ diff --git a/src/jemalloc.c b/src/jemalloc.c index 9eae1372..908485a7 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -712,26 +712,6 @@ malloc_init_hard(void) /* Copy the pointer to the one arena that was already initialized. */ arenas[0] = init_arenas[0]; -#ifdef JEMALLOC_ZONE - /* Register the custom zone. At this point it won't be the default. */ - malloc_zone_t *jemalloc_zone = create_zone(); - malloc_zone_register(jemalloc_zone); - - /* - * Unregister and reregister the default zone. On OSX >= 10.6, - * unregistering takes the last registered zone and places it at the - * location of the specified zone. Unregistering the default zone thus - * makes the last registered one the default. On OSX < 10.6, - * unregistering shifts all registered zones. The first registered zone - * then becomes the default. - */ - do { - malloc_zone_t *default_zone = malloc_default_zone(); - malloc_zone_unregister(default_zone); - malloc_zone_register(default_zone); - } while (malloc_default_zone() != jemalloc_zone); -#endif - malloc_initialized = true; malloc_mutex_unlock(&init_lock); return (false); @@ -743,8 +723,8 @@ void jemalloc_darwin_init(void) { - if (malloc_init_hard()) - abort(); + if (malloc_init_hard() == false) + register_zone(); } #endif diff --git a/src/zone.c b/src/zone.c index d3107f85..4b6c75e4 100644 --- a/src/zone.c +++ b/src/zone.c @@ -159,8 +159,8 @@ zone_force_unlock(malloc_zone_t *zone) jemalloc_postfork_parent(); } -malloc_zone_t * -create_zone(void) +void +register_zone(void) { zone.size = (void *)zone_size; @@ -206,5 +206,21 @@ create_zone(void) zone_introspect.enumerate_unavailable_without_blocks = NULL; #endif #endif - return (&zone); + + /* Register the custom zone. At this point it won't be the default. */ + malloc_zone_register(&zone); + + /* + * Unregister and reregister the default zone. On OSX >= 10.6, + * unregistering takes the last registered zone and places it at the + * location of the specified zone. Unregistering the default zone thus + * makes the last registered one the default. On OSX < 10.6, + * unregistering shifts all registered zones. The first registered zone + * then becomes the default. + */ + do { + malloc_zone_t *default_zone = malloc_default_zone(); + malloc_zone_unregister(default_zone); + malloc_zone_register(default_zone); + } while (malloc_default_zone() != &zone); } From 3c2ba0dcbc2f4896a892fad84d5dcf5bd4c30a81 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Tue, 27 Mar 2012 14:20:13 +0200 Subject: [PATCH 0114/3378] Avoid crashes when system libraries use the purgeable zone allocator --- src/jemalloc.c | 2 +- src/zone.c | 31 ++++++++++++++++++++++++++----- 2 files changed, 27 insertions(+), 6 deletions(-) diff --git a/src/jemalloc.c b/src/jemalloc.c index 908485a7..c0fe6c91 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1175,7 +1175,7 @@ je_valloc(size_t size) * to inconsistently reference libc's malloc(3)-compatible functions * (https://bugzilla.mozilla.org/show_bug.cgi?id=493541). * - * These definitions interpose hooks in glibc.  The functions are actually + * These definitions interpose hooks in glibc. The functions are actually * passed an extra argument for the caller return address, which will be * ignored. */ diff --git a/src/zone.c b/src/zone.c index 4b6c75e4..9fc87bb7 100644 --- a/src/zone.c +++ b/src/zone.c @@ -3,6 +3,13 @@ # error "This source file is for zones on Darwin (OS X)." #endif +/* + * The malloc_default_purgeable_zone function is only available on >= 10.6. + * We need to check whether it is present at runtime, thus the weak_import. + */ +extern malloc_zone_t *malloc_default_purgeable_zone(void) +JEMALLOC_ATTR(weak_import); + /******************************************************************************/ /* Data. */ @@ -207,15 +214,29 @@ register_zone(void) #endif #endif - /* Register the custom zone. At this point it won't be the default. */ + /* + * The default purgeable zone is created lazily by OSX's libc. It uses + * the default zone when it is created for "small" allocations + * (< 15 KiB), but assumes the default zone is a scalable_zone. This + * obviously fails when the default zone is the jemalloc zone, so + * malloc_default_purgeable_zone is called beforehand so that the + * default purgeable zone is created when the default zone is still + * a scalable_zone. As purgeable zones only exist on >= 10.6, we need + * to check for the existence of malloc_default_purgeable_zone() at + * run time. + */ + if (malloc_default_purgeable_zone != NULL) + malloc_default_purgeable_zone(); + + /* Register the custom zone. At this point it won't be the default. */ malloc_zone_register(&zone); /* - * Unregister and reregister the default zone. On OSX >= 10.6, + * Unregister and reregister the default zone. On OSX >= 10.6, * unregistering takes the last registered zone and places it at the - * location of the specified zone. Unregistering the default zone thus - * makes the last registered one the default. On OSX < 10.6, - * unregistering shifts all registered zones. The first registered zone + * location of the specified zone. Unregistering the default zone thus + * makes the last registered one the default. On OSX < 10.6, + * unregistering shifts all registered zones. The first registered zone * then becomes the default. */ do { From 09a0769ba7a3d139168e606e4295f8002861355f Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 30 Mar 2012 12:11:03 -0700 Subject: [PATCH 0115/3378] Work around TLS deallocation via free(). glibc uses memalign()/free() to allocate/deallocate TLS, which means that it is unsafe to set TLS variables as a side effect of free() -- they may already be deallocated. Work around this by avoiding tcache_create() within free(). Reported by Mike Hommey. --- include/jemalloc/internal/arena.h | 7 ++++--- include/jemalloc/internal/tcache.h | 16 ++++++++++++++-- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index c5214893..2592e89d 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -556,7 +556,7 @@ arena_malloc(size_t size, bool zero) assert(size <= arena_maxclass); if (size <= SMALL_MAXCLASS) { - if ((tcache = tcache_get()) != NULL) + if ((tcache = tcache_get(true)) != NULL) return (tcache_alloc_small(tcache, size, zero)); else return (arena_malloc_small(choose_arena(), size, zero)); @@ -565,7 +565,8 @@ arena_malloc(size_t size, bool zero) * Initialize tcache after checking size in order to avoid * infinite recursion during tcache initialization. */ - if (size <= tcache_maxclass && (tcache = tcache_get()) != NULL) + if (size <= tcache_maxclass && (tcache = tcache_get(true)) != + NULL) return (tcache_alloc_large(tcache, size, zero)); else return (arena_malloc_large(choose_arena(), size, zero)); @@ -590,7 +591,7 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr) { size_t pageind; arena_chunk_map_t *mapelm; - tcache_t *tcache = tcache_get(); + tcache_t *tcache = tcache_get(false); assert(arena != NULL); assert(chunk->arena == arena); diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index 0d999f24..12552d81 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -126,7 +126,7 @@ malloc_tsd_protos(JEMALLOC_ATTR(unused), tcache_enabled, tcache_enabled_t) void tcache_event(tcache_t *tcache); void tcache_flush(void); bool tcache_enabled_get(void); -tcache_t *tcache_get(void); +tcache_t *tcache_get(bool create); void tcache_enabled_set(bool enabled); void *tcache_alloc_easy(tcache_bin_t *tbin); void *tcache_alloc_small(tcache_t *tcache, size_t size, bool zero); @@ -205,7 +205,7 @@ tcache_enabled_set(bool enabled) } JEMALLOC_INLINE tcache_t * -tcache_get(void) +tcache_get(bool create) { tcache_t *tcache; @@ -219,6 +219,18 @@ tcache_get(void) if (tcache == TCACHE_STATE_DISABLED) return (NULL); if (tcache == NULL) { + if (create == false) { + /* + * Creating a tcache here would cause + * allocation as a side effect of free(). + * Ordinarily that would be okay since + * tcache_create() failure is a soft failure + * that doesn't propagate. However, if TLS + * data are freed via free() as in glibc, + * subtle TLS corruption could result. + */ + return (NULL); + } if (tcache_enabled_get() == false) { tcache_enabled_set(false); /* Memoize. */ return (NULL); From f2296deb57cdda01685f0d0ccf3c6e200378c673 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 30 Mar 2012 12:36:52 -0700 Subject: [PATCH 0116/3378] Clean up tsd (no functional changes). --- include/jemalloc/internal/tcache.h | 4 +++- include/jemalloc/internal/tsd.h | 14 ++------------ 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index 12552d81..efae7003 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -227,7 +227,9 @@ tcache_get(bool create) * tcache_create() failure is a soft failure * that doesn't propagate. However, if TLS * data are freed via free() as in glibc, - * subtle TLS corruption could result. + * subtle corruption could result from setting + * a TLS variable after its backing memory is + * freed. */ return (NULL); } diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h index 0e32c612..60aaa427 100644 --- a/include/jemalloc/internal/tsd.h +++ b/include/jemalloc/internal/tsd.h @@ -109,13 +109,8 @@ a_attr bool a_name##_booted = false; #define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ a_cleanup) \ /* Initialization/cleanup. */ \ -a_attr void \ +a_attr bool \ a_name##_tsd_cleanup_wrapper(void *arg) \ -{ \ - \ -} \ -bool \ -a_name##_tsd_cleanup_pending(void *arg) \ { \ bool (*cleanup)(void *) = arg; \ \ @@ -131,7 +126,7 @@ a_name##_tsd_boot(void) \ \ if (a_cleanup != malloc_tsd_no_cleanup) { \ malloc_tsd_cleanup_register( \ - &a_name##_tsd_cleanup_pending, a_cleanup); \ + &a_name##_tsd_cleanup_wrapper, a_cleanup); \ } \ a_name##_booted = true; \ return (false); \ @@ -157,11 +152,6 @@ a_name##_tsd_set(a_type *val) \ #define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ a_cleanup) \ /* Initialization/cleanup. */ \ -a_attr void \ -a_name##_tsd_cleanup_wrapper(void *arg) \ -{ \ - \ -} \ a_attr bool \ a_name##_tsd_boot(void) \ { \ From 4eeb52f080edb1f4b518249388f6c617386c00e5 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 2 Apr 2012 01:46:25 -0700 Subject: [PATCH 0117/3378] Remove vsnprintf() and strtoumax() validation. Remove code that validates malloc_vsnprintf() and malloc_strtoumax() against their namesakes. The validation code has adequately served its usefulness at this point, and it isn't worth dealing with the different formatting for %p with glibc versus other implementations for NULL pointers ("(nil)" vs. "0x0"). Reported by Mike Hommey. --- .../jemalloc/internal/jemalloc_internal.h.in | 2 -- src/jemalloc.c | 2 +- src/util.c | 27 ------------------- 3 files changed, 1 insertion(+), 30 deletions(-) diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index b7b8df8b..9e57b62b 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -374,8 +374,6 @@ extern malloc_mutex_t arenas_lock; /* Protects arenas initialization. */ extern arena_t **arenas; extern unsigned narenas; -extern bool malloc_initialized; - arena_t *arenas_extend(unsigned ind); void arenas_cleanup(void *arg); arena_t *choose_arena_hard(void); diff --git a/src/jemalloc.c b/src/jemalloc.c index c0fe6c91..5b1e0fdd 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -38,7 +38,7 @@ arena_t **arenas; unsigned narenas; /* Set to true once the allocator has been initialized. */ -bool malloc_initialized = false; +static bool malloc_initialized = false; #ifdef JEMALLOC_THREADED_INIT /* Used to let the initializing thread recursively allocate. */ diff --git a/src/util.c b/src/util.c index 895aa198..107bdcff 100644 --- a/src/util.c +++ b/src/util.c @@ -179,20 +179,6 @@ malloc_strtoumax(const char *nptr, char **endptr, int base) *endptr = (char *)p; } - if (config_debug && malloc_initialized) { - uintmax_t tret; - int perrno; - char *pend; - - perrno = errno; - if (endptr != NULL) - pend = *endptr; - tret = strtoumax(nptr, endptr, base); - assert(tret == ret); - assert(errno == perrno); - assert(endptr == NULL || *endptr == pend); - } - return (ret); } @@ -563,19 +549,6 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) str[size - 1] = '\0'; ret = i; - if (config_debug && malloc_initialized) { - char buf[MALLOC_PRINTF_BUFSIZE]; - int tret; - - /* - * Verify that the resulting string matches what vsnprintf() - * would have created. - */ - tret = vsnprintf(buf, sizeof(buf), format, tap); - assert(tret == ret); - assert(strcmp(buf, str) == 0); - } - #undef APPEND_C #undef APPEND_S #undef APPEND_PADDED_S From 722b370399fd6734de6781285ce9a0cffd547bdd Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 2 Apr 2012 14:09:07 -0700 Subject: [PATCH 0118/3378] Use ffsl() in ALLOCM_ALIGN(). Use ffsl() rather than ffs() plus bitshifting in ALLOCM_ALIGN(). The original rational for using ffs() was portability, but the bitmap code has since induced a hard dependency on ffsl(). --- include/jemalloc/jemalloc.h.in | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/include/jemalloc/jemalloc.h.in b/include/jemalloc/jemalloc.h.in index f0581dbd..8825a943 100644 --- a/include/jemalloc/jemalloc.h.in +++ b/include/jemalloc/jemalloc.h.in @@ -4,7 +4,6 @@ extern "C" { #endif -#include #include #define JEMALLOC_VERSION "@jemalloc_version@" @@ -18,11 +17,7 @@ extern "C" { #ifdef JEMALLOC_EXPERIMENTAL #define ALLOCM_LG_ALIGN(la) (la) -#if LG_SIZEOF_PTR == 2 -#define ALLOCM_ALIGN(a) (ffs(a)-1) -#else -#define ALLOCM_ALIGN(a) ((a < (size_t)INT_MAX) ? ffs(a)-1 : ffs(a>>32)+31) -#endif +#define ALLOCM_ALIGN(a) (ffsl(a)-1) #define ALLOCM_ZERO ((int)0x40) #define ALLOCM_NO_MOVE ((int)0x80) From 80b25932ca52e9506d4e2b8ee0fa58aa5ae3306d Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Mon, 2 Apr 2012 09:04:54 +0200 Subject: [PATCH 0119/3378] Move last bit of zone initialization in zone.c, and lazy-initialize --- .../jemalloc/internal/jemalloc_internal.h.in | 12 ---------- include/jemalloc/internal/zone.h | 22 ------------------- src/jemalloc.c | 11 ---------- src/zone.c | 1 + 4 files changed, 1 insertion(+), 45 deletions(-) delete mode 100644 include/jemalloc/internal/zone.h diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 9e57b62b..db2deb03 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -304,9 +304,6 @@ static const bool config_ivsalloc = #include "jemalloc/internal/rtree.h" #include "jemalloc/internal/tcache.h" #include "jemalloc/internal/hash.h" -#ifdef JEMALLOC_ZONE -#include "jemalloc/internal/zone.h" -#endif #include "jemalloc/internal/prof.h" #undef JEMALLOC_H_TYPES @@ -332,9 +329,6 @@ static const bool config_ivsalloc = #include "jemalloc/internal/rtree.h" #include "jemalloc/internal/tcache.h" #include "jemalloc/internal/hash.h" -#ifdef JEMALLOC_ZONE -#include "jemalloc/internal/zone.h" -#endif #include "jemalloc/internal/prof.h" typedef struct { @@ -400,9 +394,6 @@ void jemalloc_postfork_child(void); #include "jemalloc/internal/rtree.h" #include "jemalloc/internal/tcache.h" #include "jemalloc/internal/hash.h" -#ifdef JEMALLOC_ZONE -#include "jemalloc/internal/zone.h" -#endif #include "jemalloc/internal/prof.h" #undef JEMALLOC_H_EXTERNS @@ -565,9 +556,6 @@ choose_arena(void) #include "jemalloc/internal/tcache.h" #include "jemalloc/internal/arena.h" #include "jemalloc/internal/hash.h" -#ifdef JEMALLOC_ZONE -#include "jemalloc/internal/zone.h" -#endif #ifndef JEMALLOC_ENABLE_INLINE void *imalloc(size_t size); diff --git a/include/jemalloc/internal/zone.h b/include/jemalloc/internal/zone.h deleted file mode 100644 index 9eb4252f..00000000 --- a/include/jemalloc/internal/zone.h +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef JEMALLOC_ZONE -# error "This source file is for zones on Darwin (OS X)." -#endif -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -void register_zone(void); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff --git a/src/jemalloc.c b/src/jemalloc.c index 5b1e0fdd..1deabcd9 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -717,17 +717,6 @@ malloc_init_hard(void) return (false); } -#ifdef JEMALLOC_ZONE -JEMALLOC_ATTR(constructor) -void -jemalloc_darwin_init(void) -{ - - if (malloc_init_hard() == false) - register_zone(); -} -#endif - /* * End initialization functions. */ diff --git a/src/zone.c b/src/zone.c index 9fc87bb7..6c1e415b 100644 --- a/src/zone.c +++ b/src/zone.c @@ -166,6 +166,7 @@ zone_force_unlock(malloc_zone_t *zone) jemalloc_postfork_parent(); } +JEMALLOC_ATTR(constructor) void register_zone(void) { From 96d4120ac08db3f2d566e8e5c3bc134a24aa0afc Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 2 Apr 2012 14:50:03 -0700 Subject: [PATCH 0120/3378] Avoid NULL check in free() and malloc_usable_size(). Generalize isalloc() to handle NULL pointers in such a way that the NULL checking overhead is only paid when introspecting huge allocations (or NULL). This allows free() and malloc_usable_size() to no longer check for NULL. Submitted by Igor Bukanov and Mike Hommey. --- Makefile.in | 2 +- .../jemalloc/internal/jemalloc_internal.h.in | 10 +++---- src/jemalloc.c | 26 ++++++++----------- test/null.c | 19 ++++++++++++++ test/null.exp | 2 ++ 5 files changed, 37 insertions(+), 22 deletions(-) create mode 100644 test/null.c create mode 100644 test/null.exp diff --git a/Makefile.in b/Makefile.in index 821c0634..d8b671ad 100644 --- a/Makefile.in +++ b/Makefile.in @@ -65,7 +65,7 @@ DOCS_HTML := $(DOCS_XML:@objroot@%.xml=@srcroot@%.html) DOCS_MAN3 := $(DOCS_XML:@objroot@%.xml=@srcroot@%.3) DOCS := $(DOCS_HTML) $(DOCS_MAN3) CTESTS := @srcroot@test/aligned_alloc.c @srcroot@test/allocated.c \ - @srcroot@test/bitmap.c @srcroot@test/mremap.c \ + @srcroot@test/bitmap.c @srcroot@test/mremap.c @srcroot@test/null.c \ @srcroot@test/posix_memalign.c @srcroot@test/thread_arena.c \ @srcroot@test/thread_tcache_enabled.c ifeq (@enable_experimental@, 1) diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index db2deb03..ed21bbe7 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -633,8 +633,6 @@ isalloc(const void *ptr) size_t ret; arena_chunk_t *chunk; - assert(ptr != NULL); - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk != ptr) { /* Region. */ @@ -642,8 +640,10 @@ isalloc(const void *ptr) ret = arena_salloc_demote(ptr); else ret = arena_salloc(ptr); - } else + } else if (ptr != NULL) ret = huge_salloc(ptr); + else + ret = 0; return (ret); } @@ -664,12 +664,10 @@ idalloc(void *ptr) { arena_chunk_t *chunk; - assert(ptr != NULL); - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk != ptr) arena_dalloc(chunk->arena, chunk, ptr); - else + else if (ptr != NULL) huge_dalloc(ptr, true); } diff --git a/src/jemalloc.c b/src/jemalloc.c index 1deabcd9..86ce695b 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1100,22 +1100,18 @@ JEMALLOC_ATTR(visibility("default")) void je_free(void *ptr) { + size_t usize; - if (ptr != NULL) { - size_t usize; + assert(malloc_initialized || IS_INITIALIZER); - assert(malloc_initialized || IS_INITIALIZER); - - if (config_prof && opt_prof) { - usize = isalloc(ptr); - prof_free(ptr, usize); - } else if (config_stats) { - usize = isalloc(ptr); - } - if (config_stats) - thread_allocated_tsd_get()->deallocated += usize; - idalloc(ptr); - } + if (config_prof && opt_prof) { + usize = isalloc(ptr); + prof_free(ptr, usize); + } else if (config_stats) + usize = isalloc(ptr); + if (config_stats) + thread_allocated_tsd_get()->deallocated += usize; + idalloc(ptr); } /* @@ -1200,7 +1196,7 @@ je_malloc_usable_size(const void *ptr) if (config_ivsalloc) ret = ivsalloc(ptr); else - ret = (ptr != NULL) ? isalloc(ptr) : 0; + ret = isalloc(ptr); return (ret); } diff --git a/test/null.c b/test/null.c new file mode 100644 index 00000000..ccd7ced3 --- /dev/null +++ b/test/null.c @@ -0,0 +1,19 @@ +#include +#include + +#define JEMALLOC_MANGLE +#include "jemalloc_test.h" + +int +main(void) +{ + + fprintf(stderr, "Test begin\n"); + + free(malloc(1)); + free(NULL); + assert(malloc_usable_size(NULL) == 0); + + fprintf(stderr, "Test end\n"); + return (0); +} diff --git a/test/null.exp b/test/null.exp new file mode 100644 index 00000000..369a88dd --- /dev/null +++ b/test/null.exp @@ -0,0 +1,2 @@ +Test begin +Test end From f0047372673da7f213f733465dab0d8825eb1c9f Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 2 Apr 2012 15:18:24 -0700 Subject: [PATCH 0121/3378] Revert "Avoid NULL check in free() and malloc_usable_size()." This reverts commit 96d4120ac08db3f2d566e8e5c3bc134a24aa0afc. ivsalloc() depends on chunks_rtree being initialized. This can be worked around via a NULL pointer check. However, thread_allocated_tsd_get() also depends on initialization having occurred, and there is no way to guard its call in free() that is cheaper than checking whether ptr is NULL. --- Makefile.in | 2 +- .../jemalloc/internal/jemalloc_internal.h.in | 10 ++++--- src/jemalloc.c | 26 +++++++++++-------- test/null.c | 19 -------------- test/null.exp | 2 -- 5 files changed, 22 insertions(+), 37 deletions(-) delete mode 100644 test/null.c delete mode 100644 test/null.exp diff --git a/Makefile.in b/Makefile.in index d8b671ad..821c0634 100644 --- a/Makefile.in +++ b/Makefile.in @@ -65,7 +65,7 @@ DOCS_HTML := $(DOCS_XML:@objroot@%.xml=@srcroot@%.html) DOCS_MAN3 := $(DOCS_XML:@objroot@%.xml=@srcroot@%.3) DOCS := $(DOCS_HTML) $(DOCS_MAN3) CTESTS := @srcroot@test/aligned_alloc.c @srcroot@test/allocated.c \ - @srcroot@test/bitmap.c @srcroot@test/mremap.c @srcroot@test/null.c \ + @srcroot@test/bitmap.c @srcroot@test/mremap.c \ @srcroot@test/posix_memalign.c @srcroot@test/thread_arena.c \ @srcroot@test/thread_tcache_enabled.c ifeq (@enable_experimental@, 1) diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index ed21bbe7..db2deb03 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -633,6 +633,8 @@ isalloc(const void *ptr) size_t ret; arena_chunk_t *chunk; + assert(ptr != NULL); + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk != ptr) { /* Region. */ @@ -640,10 +642,8 @@ isalloc(const void *ptr) ret = arena_salloc_demote(ptr); else ret = arena_salloc(ptr); - } else if (ptr != NULL) + } else ret = huge_salloc(ptr); - else - ret = 0; return (ret); } @@ -664,10 +664,12 @@ idalloc(void *ptr) { arena_chunk_t *chunk; + assert(ptr != NULL); + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk != ptr) arena_dalloc(chunk->arena, chunk, ptr); - else if (ptr != NULL) + else huge_dalloc(ptr, true); } diff --git a/src/jemalloc.c b/src/jemalloc.c index 86ce695b..1deabcd9 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1100,18 +1100,22 @@ JEMALLOC_ATTR(visibility("default")) void je_free(void *ptr) { - size_t usize; - assert(malloc_initialized || IS_INITIALIZER); + if (ptr != NULL) { + size_t usize; - if (config_prof && opt_prof) { - usize = isalloc(ptr); - prof_free(ptr, usize); - } else if (config_stats) - usize = isalloc(ptr); - if (config_stats) - thread_allocated_tsd_get()->deallocated += usize; - idalloc(ptr); + assert(malloc_initialized || IS_INITIALIZER); + + if (config_prof && opt_prof) { + usize = isalloc(ptr); + prof_free(ptr, usize); + } else if (config_stats) { + usize = isalloc(ptr); + } + if (config_stats) + thread_allocated_tsd_get()->deallocated += usize; + idalloc(ptr); + } } /* @@ -1196,7 +1200,7 @@ je_malloc_usable_size(const void *ptr) if (config_ivsalloc) ret = ivsalloc(ptr); else - ret = isalloc(ptr); + ret = (ptr != NULL) ? isalloc(ptr) : 0; return (ret); } diff --git a/test/null.c b/test/null.c deleted file mode 100644 index ccd7ced3..00000000 --- a/test/null.c +++ /dev/null @@ -1,19 +0,0 @@ -#include -#include - -#define JEMALLOC_MANGLE -#include "jemalloc_test.h" - -int -main(void) -{ - - fprintf(stderr, "Test begin\n"); - - free(malloc(1)); - free(NULL); - assert(malloc_usable_size(NULL) == 0); - - fprintf(stderr, "Test end\n"); - return (0); -} diff --git a/test/null.exp b/test/null.exp deleted file mode 100644 index 369a88dd..00000000 --- a/test/null.exp +++ /dev/null @@ -1,2 +0,0 @@ -Test begin -Test end From ae4c7b4b4092906c641d69b4bf9fcb4a7d50790d Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 2 Apr 2012 07:04:34 -0700 Subject: [PATCH 0122/3378] Clean up *PAGE* macros. s/PAGE_SHIFT/LG_PAGE/g and s/PAGE_SIZE/PAGE/g. Remove remnants of the dynamic-page-shift code. Rename the "arenas.pagesize" mallctl to "arenas.page". Remove the "arenas.chunksize" mallctl, which is redundant with "opt.lg_chunk". --- doc/jemalloc.xml.in | 11 +- include/jemalloc/internal/arena.h | 22 +- .../jemalloc/internal/jemalloc_internal.h.in | 45 +--- include/jemalloc/internal/size_classes.sh | 2 +- include/jemalloc/internal/tcache.h | 10 +- src/arena.c | 206 +++++++++--------- src/chunk.c | 4 +- src/ctl.c | 13 +- src/jemalloc.c | 31 +-- src/prof.c | 2 +- src/stats.c | 3 + src/tcache.c | 10 +- src/zone.c | 2 +- 13 files changed, 148 insertions(+), 213 deletions(-) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 0b468b04..28760b21 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -1167,22 +1167,13 @@ malloc_conf = "xmalloc:true";]]> - arenas.pagesize + arenas.page (size_t) r- Page size. - - - arenas.chunksize - (size_t) - r- - - Chunk size. - - arenas.tcache_max diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 2592e89d..41df11fd 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -130,10 +130,10 @@ struct arena_chunk_map_s { * xxxxxxxx xxxxxxxx xxxx---- ----xxxx * -------- -------- -------- ----D-LA * - * Large (sampled, size <= PAGE_SIZE): + * Large (sampled, size <= PAGE): * ssssssss ssssssss sssscccc ccccD-LA * - * Large (not sampled, size == PAGE_SIZE): + * Large (not sampled, size == PAGE): * ssssssss ssssssss ssss---- ----D-LA */ size_t bits; @@ -486,7 +486,7 @@ arena_prof_ctx_get(const void *ptr) assert(CHUNK_ADDR2BASE(ptr) != ptr); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; mapbits = chunk->map[pageind-map_bias].bits; assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); if ((mapbits & CHUNK_MAP_LARGE) == 0) { @@ -494,8 +494,8 @@ arena_prof_ctx_get(const void *ptr) ret = (prof_ctx_t *)(uintptr_t)1U; else { arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) << - PAGE_SHIFT)); + (uintptr_t)((pageind - (mapbits >> LG_PAGE)) << + LG_PAGE)); size_t binind = arena_bin_index(chunk->arena, run->bin); arena_bin_info_t *bin_info = &arena_bin_info[binind]; unsigned regind; @@ -522,14 +522,14 @@ arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) assert(CHUNK_ADDR2BASE(ptr) != ptr); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; mapbits = chunk->map[pageind-map_bias].bits; assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); if ((mapbits & CHUNK_MAP_LARGE) == 0) { if (prof_promote == false) { arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) << - PAGE_SHIFT)); + (uintptr_t)((pageind - (mapbits >> LG_PAGE)) << + LG_PAGE)); arena_bin_t *bin = run->bin; size_t binind; arena_bin_info_t *bin_info; @@ -598,7 +598,7 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr) assert(ptr != NULL); assert(CHUNK_ADDR2BASE(ptr) != ptr); - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; mapelm = &chunk->map[pageind-map_bias]; assert((mapelm->bits & CHUNK_MAP_ALLOCATED) != 0); if ((mapelm->bits & CHUNK_MAP_LARGE) == 0) { @@ -610,8 +610,8 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr) arena_bin_t *bin; run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)((pageind - (mapelm->bits >> - PAGE_SHIFT)) << PAGE_SHIFT)); + (uintptr_t)((pageind - (mapelm->bits >> LG_PAGE)) << + LG_PAGE)); bin = run->bin; if (config_debug) { size_t binind = arena_bin_index(arena, bin); diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index db2deb03..0c22bfb7 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -58,13 +58,6 @@ static const bool config_dss = false #endif ; -static const bool config_dynamic_page_shift = -#ifdef JEMALLOC_DYNAMIC_PAGE_SHIFT - true -#else - false -#endif - ; static const bool config_fill = #ifdef JEMALLOC_FILL true @@ -266,20 +259,12 @@ static const bool config_ivsalloc = (((s) + CACHELINE_MASK) & ~CACHELINE_MASK) /* Page size. STATIC_PAGE_SHIFT is determined by the configure script. */ -#define STATIC_PAGE_SIZE ((size_t)(1U << STATIC_PAGE_SHIFT)) -#define STATIC_PAGE_MASK ((size_t)(STATIC_PAGE_SIZE - 1)) -#ifdef PAGE_SHIFT -# undef PAGE_SHIFT -#endif -#ifdef PAGE_SIZE -# undef PAGE_SIZE -#endif #ifdef PAGE_MASK # undef PAGE_MASK #endif -#define PAGE_SHIFT STATIC_PAGE_SHIFT -#define PAGE_SIZE STATIC_PAGE_SIZE -#define PAGE_MASK STATIC_PAGE_MASK +#define LG_PAGE STATIC_PAGE_SHIFT +#define PAGE ((size_t)(1U << STATIC_PAGE_SHIFT)) +#define PAGE_MASK ((size_t)(PAGE - 1)) /* Return the smallest pagesize multiple that is >= s. */ #define PAGE_CEILING(s) \ @@ -351,12 +336,6 @@ extern bool opt_xmalloc; extern bool opt_zero; extern size_t opt_narenas; -#ifdef DYNAMIC_PAGE_SHIFT -extern size_t pagesize; -extern size_t pagesize_mask; -extern size_t lg_pagesize; -#endif - /* Number of CPUs. */ extern unsigned ncpus; @@ -479,7 +458,7 @@ sa2u(size_t size, size_t alignment, size_t *run_size_p) return (0); } - if (usize <= arena_maxclass && alignment <= PAGE_SIZE) { + if (usize <= arena_maxclass && alignment <= PAGE) { if (usize <= SMALL_MAXCLASS) return (arena_bin_info[SMALL_SIZE2BIN(usize)].reg_size); return (PAGE_CEILING(usize)); @@ -494,7 +473,7 @@ sa2u(size_t size, size_t alignment, size_t *run_size_p) usize = PAGE_CEILING(size); /* * (usize < size) protects against very large sizes within - * PAGE_SIZE of SIZE_T_MAX. + * PAGE of SIZE_T_MAX. * * (usize + alignment < usize) protects against the * combination of maximal alignment and usize large enough @@ -514,18 +493,18 @@ sa2u(size_t size, size_t alignment, size_t *run_size_p) * would need to allocate in order to guarantee the alignment. */ if (usize >= alignment) - run_size = usize + alignment - PAGE_SIZE; + run_size = usize + alignment - PAGE; else { /* * It is possible that (alignment << 1) will cause * overflow, but it doesn't matter because we also - * subtract PAGE_SIZE, which in the case of overflow - * leaves us with a very large run_size. That causes - * the first conditional below to fail, which means - * that the bogus run_size value never gets used for + * subtract PAGE, which in the case of overflow leaves + * us with a very large run_size. That causes the + * first conditional below to fail, which means that + * the bogus run_size value never gets used for * anything important. */ - run_size = (alignment << 1) - PAGE_SIZE; + run_size = (alignment << 1) - PAGE; } if (run_size_p != NULL) *run_size_p = run_size; @@ -600,7 +579,7 @@ ipalloc(size_t usize, size_t alignment, bool zero) assert(usize != 0); assert(usize == sa2u(usize, alignment, NULL)); - if (usize <= arena_maxclass && alignment <= PAGE_SIZE) + if (usize <= arena_maxclass && alignment <= PAGE) ret = arena_malloc(usize, zero); else { size_t run_size JEMALLOC_CC_SILENCE_INIT(0); diff --git a/include/jemalloc/internal/size_classes.sh b/include/jemalloc/internal/size_classes.sh index 79b4ba23..9829a2b3 100755 --- a/include/jemalloc/internal/size_classes.sh +++ b/include/jemalloc/internal/size_classes.sh @@ -36,7 +36,7 @@ while [ ${lg_q} -le ${lg_qmax} ] ; do lg_p=${lg_pmin} while [ ${lg_p} -le ${lg_pmax} ] ; do cat <> PAGE_SHIFT) - 1; + binind = NBINS + (size >> LG_PAGE) - 1; assert(binind < nhbins); tbin = &tcache->tbins[binind]; ret = tcache_alloc_easy(tbin); @@ -386,7 +386,7 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret); size_t pageind = (((uintptr_t)ret - (uintptr_t)chunk) >> - PAGE_SHIFT); + LG_PAGE); chunk->map[pageind-map_bias].bits &= ~CHUNK_MAP_CLASS_MASK; } @@ -426,10 +426,10 @@ tcache_dalloc_small(tcache_t *tcache, void *ptr) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); arena = chunk->arena; - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; mapelm = &chunk->map[pageind-map_bias]; run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - - (mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT)); + (mapelm->bits >> LG_PAGE)) << LG_PAGE)); bin = run->bin; binind = ((uintptr_t)bin - (uintptr_t)&arena->bins) / sizeof(arena_bin_t); @@ -462,7 +462,7 @@ tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size) assert(arena_salloc(ptr) > SMALL_MAXCLASS); assert(arena_salloc(ptr) <= tcache_maxclass); - binind = NBINS + (size >> PAGE_SHIFT) - 1; + binind = NBINS + (size >> LG_PAGE) - 1; if (config_fill && opt_junk) memset(ptr, 0x5a, size); diff --git a/src/arena.c b/src/arena.c index 898f8c7d..b7e14228 100644 --- a/src/arena.c +++ b/src/arena.c @@ -176,10 +176,9 @@ static inline void arena_chunk_validate_zeroed(arena_chunk_t *chunk, size_t run_ind) { size_t i; - UNUSED size_t *p = (size_t *)((uintptr_t)chunk + (run_ind << - PAGE_SHIFT)); + UNUSED size_t *p = (size_t *)((uintptr_t)chunk + (run_ind << LG_PAGE)); - for (i = 0; i < PAGE_SIZE / sizeof(size_t); i++) + for (i = 0; i < PAGE / sizeof(size_t); i++) assert(p[i] == 0); } @@ -193,16 +192,15 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, arena_avail_tree_t *runs_avail; chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); - run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk) - >> PAGE_SHIFT); + run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE); flag_dirty = chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY; runs_avail = (flag_dirty != 0) ? &arena->runs_avail_dirty : &arena->runs_avail_clean; total_pages = (chunk->map[run_ind-map_bias].bits & ~PAGE_MASK) >> - PAGE_SHIFT; + LG_PAGE; assert((chunk->map[run_ind+total_pages-1-map_bias].bits & CHUNK_MAP_DIRTY) == flag_dirty); - need_pages = (size >> PAGE_SHIFT); + need_pages = (size >> LG_PAGE); assert(need_pages > 0); assert(need_pages <= total_pages); rem_pages = total_pages - need_pages; @@ -214,8 +212,8 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, * multiple. */ size_t cactive_diff = CHUNK_CEILING((arena->nactive + - need_pages) << PAGE_SHIFT) - CHUNK_CEILING(arena->nactive << - PAGE_SHIFT); + need_pages) << LG_PAGE) - CHUNK_CEILING(arena->nactive << + LG_PAGE); if (cactive_diff != 0) stats_cactive_add(cactive_diff); } @@ -225,16 +223,16 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, if (rem_pages > 0) { if (flag_dirty != 0) { chunk->map[run_ind+need_pages-map_bias].bits = - (rem_pages << PAGE_SHIFT) | CHUNK_MAP_DIRTY; + (rem_pages << LG_PAGE) | CHUNK_MAP_DIRTY; chunk->map[run_ind+total_pages-1-map_bias].bits = - (rem_pages << PAGE_SHIFT) | CHUNK_MAP_DIRTY; + (rem_pages << LG_PAGE) | CHUNK_MAP_DIRTY; } else { chunk->map[run_ind+need_pages-map_bias].bits = - (rem_pages << PAGE_SHIFT) | + (rem_pages << LG_PAGE) | (chunk->map[run_ind+need_pages-map_bias].bits & CHUNK_MAP_UNZEROED); chunk->map[run_ind+total_pages-1-map_bias].bits = - (rem_pages << PAGE_SHIFT) | + (rem_pages << LG_PAGE) | (chunk->map[run_ind+total_pages-1-map_bias].bits & CHUNK_MAP_UNZEROED); } @@ -264,8 +262,7 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, & CHUNK_MAP_UNZEROED) != 0) { memset((void *)((uintptr_t) chunk + ((run_ind+i) << - PAGE_SHIFT)), 0, - PAGE_SIZE); + LG_PAGE)), 0, PAGE); } else if (config_debug) { arena_chunk_validate_zeroed( chunk, run_ind+i); @@ -277,8 +274,7 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, * zeroed. */ memset((void *)((uintptr_t)chunk + (run_ind << - PAGE_SHIFT)), 0, (need_pages << - PAGE_SHIFT)); + LG_PAGE)), 0, (need_pages << LG_PAGE)); } } @@ -310,7 +306,7 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, == 0) arena_chunk_validate_zeroed(chunk, run_ind); for (i = 1; i < need_pages - 1; i++) { - chunk->map[run_ind+i-map_bias].bits = (i << PAGE_SHIFT) + chunk->map[run_ind+i-map_bias].bits = (i << LG_PAGE) | (chunk->map[run_ind+i-map_bias].bits & CHUNK_MAP_UNZEROED) | CHUNK_MAP_ALLOCATED; if (config_debug && flag_dirty == 0 && @@ -319,7 +315,7 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, arena_chunk_validate_zeroed(chunk, run_ind+i); } chunk->map[run_ind+need_pages-1-map_bias].bits = ((need_pages - - 1) << PAGE_SHIFT) | + - 1) << LG_PAGE) | (chunk->map[run_ind+need_pages-1-map_bias].bits & CHUNK_MAP_UNZEROED) | CHUNK_MAP_ALLOCATED | flag_dirty; if (config_debug && flag_dirty == 0 && @@ -460,7 +456,7 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero) + map_bias; run = (arena_run_t *)((uintptr_t)run_chunk + (pageind << - PAGE_SHIFT)); + LG_PAGE)); arena_run_split(arena, run, size, large, zero); return (run); } @@ -472,7 +468,7 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero) + map_bias; run = (arena_run_t *)((uintptr_t)run_chunk + (pageind << - PAGE_SHIFT)); + LG_PAGE)); arena_run_split(arena, run, size, large, zero); return (run); } @@ -482,8 +478,7 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero) */ chunk = arena_chunk_alloc(arena); if (chunk != NULL) { - run = (arena_run_t *)((uintptr_t)chunk + (map_bias << - PAGE_SHIFT)); + run = (arena_run_t *)((uintptr_t)chunk + (map_bias << LG_PAGE)); arena_run_split(arena, run, size, large, zero); return (run); } @@ -501,7 +496,7 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero) + map_bias; run = (arena_run_t *)((uintptr_t)run_chunk + (pageind << - PAGE_SHIFT)); + LG_PAGE)); arena_run_split(arena, run, size, large, zero); return (run); } @@ -513,7 +508,7 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero) + map_bias; run = (arena_run_t *)((uintptr_t)run_chunk + (pageind << - PAGE_SHIFT)); + LG_PAGE)); arena_run_split(arena, run, size, large, zero); return (run); } @@ -582,7 +577,7 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) if ((mapelm->bits & CHUNK_MAP_ALLOCATED) == 0) { size_t npages; - npages = mapelm->bits >> PAGE_SHIFT; + npages = mapelm->bits >> LG_PAGE; assert(pageind + npages <= chunk_npages); if (mapelm->bits & CHUNK_MAP_DIRTY) { size_t i; @@ -590,7 +585,7 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) arena_avail_tree_remove( &arena->runs_avail_dirty, mapelm); - mapelm->bits = (npages << PAGE_SHIFT) | + mapelm->bits = (npages << LG_PAGE) | flag_unzeroed | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; /* @@ -615,9 +610,9 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) */ size_t cactive_diff = CHUNK_CEILING((arena->nactive + - npages) << PAGE_SHIFT) - + npages) << LG_PAGE) - CHUNK_CEILING(arena->nactive << - PAGE_SHIFT); + LG_PAGE); if (cactive_diff != 0) stats_cactive_add(cactive_diff); } @@ -631,17 +626,17 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) } else { /* Skip allocated run. */ if (mapelm->bits & CHUNK_MAP_LARGE) - pageind += mapelm->bits >> PAGE_SHIFT; + pageind += mapelm->bits >> LG_PAGE; else { arena_run_t *run = (arena_run_t *)((uintptr_t) - chunk + (uintptr_t)(pageind << PAGE_SHIFT)); + chunk + (uintptr_t)(pageind << LG_PAGE)); - assert((mapelm->bits >> PAGE_SHIFT) == 0); + assert((mapelm->bits >> LG_PAGE) == 0); size_t binind = arena_bin_index(arena, run->bin); arena_bin_info_t *bin_info = &arena_bin_info[binind]; - pageind += bin_info->run_size >> PAGE_SHIFT; + pageind += bin_info->run_size >> LG_PAGE; } } } @@ -662,7 +657,7 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) ql_foreach(mapelm, &mapelms, u.ql_link) { size_t pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) / sizeof(arena_chunk_map_t)) + map_bias; - size_t npages = mapelm->bits >> PAGE_SHIFT; + size_t npages = mapelm->bits >> LG_PAGE; assert(pageind + npages <= chunk_npages); assert(ndirty >= npages); @@ -676,8 +671,8 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) #else # error "No method defined for purging unused dirty pages." #endif - madvise((void *)((uintptr_t)chunk + (pageind << PAGE_SHIFT)), - (npages << PAGE_SHIFT), MADV_PURGE); + madvise((void *)((uintptr_t)chunk + (pageind << LG_PAGE)), + (npages << LG_PAGE), MADV_PURGE); #undef MADV_PURGE if (config_stats) nmadvise++; @@ -693,7 +688,7 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) size_t pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) / sizeof(arena_chunk_map_t)) + map_bias; arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)(pageind << PAGE_SHIFT)); + (uintptr_t)(pageind << LG_PAGE)); ql_remove(&mapelms, mapelm, u.ql_link); arena_run_dalloc(arena, run, false); @@ -804,33 +799,31 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) arena_avail_tree_t *runs_avail; chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); - run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) - >> PAGE_SHIFT); + run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE); assert(run_ind >= map_bias); assert(run_ind < chunk_npages); if ((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_LARGE) != 0) { size = chunk->map[run_ind-map_bias].bits & ~PAGE_MASK; - assert(size == PAGE_SIZE || - (chunk->map[run_ind+(size>>PAGE_SHIFT)-1-map_bias].bits & + assert(size == PAGE || + (chunk->map[run_ind+(size>>LG_PAGE)-1-map_bias].bits & ~PAGE_MASK) == 0); - assert((chunk->map[run_ind+(size>>PAGE_SHIFT)-1-map_bias].bits & + assert((chunk->map[run_ind+(size>>LG_PAGE)-1-map_bias].bits & CHUNK_MAP_LARGE) != 0); - assert((chunk->map[run_ind+(size>>PAGE_SHIFT)-1-map_bias].bits & + assert((chunk->map[run_ind+(size>>LG_PAGE)-1-map_bias].bits & CHUNK_MAP_ALLOCATED) != 0); } else { size_t binind = arena_bin_index(arena, run->bin); arena_bin_info_t *bin_info = &arena_bin_info[binind]; size = bin_info->run_size; } - run_pages = (size >> PAGE_SHIFT); + run_pages = (size >> LG_PAGE); if (config_stats) { /* * Update stats_cactive if nactive is crossing a chunk * multiple. */ - size_t cactive_diff = CHUNK_CEILING(arena->nactive << - PAGE_SHIFT) - CHUNK_CEILING((arena->nactive - run_pages) << - PAGE_SHIFT); + size_t cactive_diff = CHUNK_CEILING(arena->nactive << LG_PAGE) - + CHUNK_CEILING((arena->nactive - run_pages) << LG_PAGE); if (cactive_diff != 0) stats_cactive_sub(cactive_diff); } @@ -869,7 +862,7 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) CHUNK_MAP_DIRTY) == flag_dirty) { size_t nrun_size = chunk->map[run_ind+run_pages-map_bias].bits & ~PAGE_MASK; - size_t nrun_pages = nrun_size >> PAGE_SHIFT; + size_t nrun_pages = nrun_size >> LG_PAGE; /* * Remove successor from runs_avail; the coalesced run is @@ -900,7 +893,7 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) CHUNK_MAP_DIRTY) == flag_dirty) { size_t prun_size = chunk->map[run_ind-1-map_bias].bits & ~PAGE_MASK; - size_t prun_pages = prun_size >> PAGE_SHIFT; + size_t prun_pages = prun_size >> LG_PAGE; run_ind -= prun_pages; @@ -970,8 +963,8 @@ static void arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, size_t oldsize, size_t newsize) { - size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT; - size_t head_npages = (oldsize - newsize) >> PAGE_SHIFT; + size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE; + size_t head_npages = (oldsize - newsize) >> LG_PAGE; size_t flag_dirty = chunk->map[pageind-map_bias].bits & CHUNK_MAP_DIRTY; assert(oldsize > newsize); @@ -991,7 +984,7 @@ arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; if (config_debug) { - UNUSED size_t tail_npages = newsize >> PAGE_SHIFT; + UNUSED size_t tail_npages = newsize >> LG_PAGE; assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias] .bits & ~PAGE_MASK) == 0); assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias] @@ -1012,9 +1005,9 @@ static void arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, size_t oldsize, size_t newsize, bool dirty) { - size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT; - size_t head_npages = newsize >> PAGE_SHIFT; - size_t tail_npages = (oldsize - newsize) >> PAGE_SHIFT; + size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE; + size_t head_npages = newsize >> LG_PAGE; + size_t tail_npages = (oldsize - newsize) >> LG_PAGE; size_t flag_dirty = chunk->map[pageind-map_bias].bits & CHUNK_MAP_DIRTY; @@ -1064,8 +1057,8 @@ arena_bin_runs_first(arena_bin_t *bin) pageind = ((((uintptr_t)mapelm - (uintptr_t)chunk->map) / sizeof(arena_chunk_map_t))) + map_bias; arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)((pageind - (mapelm->bits >> PAGE_SHIFT)) << - PAGE_SHIFT)); + (uintptr_t)((pageind - (mapelm->bits >> LG_PAGE)) << + LG_PAGE)); return (run); } @@ -1076,7 +1069,7 @@ static void arena_bin_runs_insert(arena_bin_t *bin, arena_run_t *run) { arena_chunk_t *chunk = CHUNK_ADDR2BASE(run); - size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT; + size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE; arena_chunk_map_t *mapelm = &chunk->map[pageind-map_bias]; assert(arena_run_tree_search(&bin->runs, mapelm) == NULL); @@ -1088,7 +1081,7 @@ static void arena_bin_runs_remove(arena_bin_t *bin, arena_run_t *run) { arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); - size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT; + size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE; arena_chunk_map_t *mapelm = &chunk->map[pageind-map_bias]; assert(arena_run_tree_search(&bin->runs, mapelm) != NULL); @@ -1331,9 +1324,9 @@ arena_malloc_large(arena_t *arena, size_t size, bool zero) arena->stats.nmalloc_large++; arena->stats.nrequests_large++; arena->stats.allocated_large += size; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nmalloc++; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++; + arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++; + arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++; + arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++; } if (config_prof) arena_prof_accum(arena, size); @@ -1401,9 +1394,9 @@ arena_palloc(arena_t *arena, size_t size, size_t alloc_size, size_t alignment, arena->stats.nmalloc_large++; arena->stats.nrequests_large++; arena->stats.allocated_large += size; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nmalloc++; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++; + arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++; + arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++; + arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++; } malloc_mutex_unlock(&arena->lock); @@ -1428,13 +1421,12 @@ arena_salloc(const void *ptr) assert(CHUNK_ADDR2BASE(ptr) != ptr); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; mapbits = chunk->map[pageind-map_bias].bits; assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); if ((mapbits & CHUNK_MAP_LARGE) == 0) { arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) << - PAGE_SHIFT)); + (uintptr_t)((pageind - (mapbits >> LG_PAGE)) << LG_PAGE)); size_t binind = arena_bin_index(chunk->arena, run->bin); arena_bin_info_t *bin_info = &arena_bin_info[binind]; assert(((uintptr_t)ptr - ((uintptr_t)run + @@ -1458,11 +1450,11 @@ arena_prof_promoted(const void *ptr, size_t size) assert(ptr != NULL); assert(CHUNK_ADDR2BASE(ptr) != ptr); - assert(isalloc(ptr) == PAGE_SIZE); + assert(isalloc(ptr) == PAGE); assert(size <= SMALL_MAXCLASS); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; binind = SMALL_SIZE2BIN(size); assert(binind < NBINS); chunk->map[pageind-map_bias].bits = (chunk->map[pageind-map_bias].bits & @@ -1480,13 +1472,12 @@ arena_salloc_demote(const void *ptr) assert(CHUNK_ADDR2BASE(ptr) != ptr); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; mapbits = chunk->map[pageind-map_bias].bits; assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); if ((mapbits & CHUNK_MAP_LARGE) == 0) { arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) << - PAGE_SHIFT)); + (uintptr_t)((pageind - (mapbits >> LG_PAGE)) << LG_PAGE)); size_t binind = arena_bin_index(chunk->arena, run->bin); arena_bin_info_t *bin_info = &arena_bin_info[binind]; assert(((uintptr_t)ptr - ((uintptr_t)run + @@ -1496,7 +1487,7 @@ arena_salloc_demote(const void *ptr) } else { assert(((uintptr_t)ptr & PAGE_MASK) == 0); ret = mapbits & ~PAGE_MASK; - if (prof_promote && ret == PAGE_SIZE && (mapbits & + if (prof_promote && ret == PAGE && (mapbits & CHUNK_MAP_CLASS_MASK) != 0) { size_t binind = ((mapbits & CHUNK_MAP_CLASS_MASK) >> CHUNK_MAP_CLASS_SHIFT) - 1; @@ -1542,18 +1533,18 @@ arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, assert(run != bin->runcur); assert(arena_run_tree_search(&bin->runs, &chunk->map[ - (((uintptr_t)run-(uintptr_t)chunk)>>PAGE_SHIFT)-map_bias]) == NULL); + (((uintptr_t)run-(uintptr_t)chunk)>>LG_PAGE)-map_bias]) == NULL); binind = arena_bin_index(chunk->arena, run->bin); bin_info = &arena_bin_info[binind]; malloc_mutex_unlock(&bin->lock); /******************************/ - npages = bin_info->run_size >> PAGE_SHIFT; - run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT); + npages = bin_info->run_size >> LG_PAGE; + run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE); past = (size_t)(PAGE_CEILING((uintptr_t)run + (uintptr_t)bin_info->reg0_offset + (uintptr_t)(run->nextind * - bin_info->reg_size) - (uintptr_t)chunk) >> PAGE_SHIFT); + bin_info->reg_size) - (uintptr_t)chunk) >> LG_PAGE); malloc_mutex_lock(&arena->lock); /* @@ -1573,8 +1564,8 @@ arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, chunk->map[run_ind-map_bias].bits = bin_info->run_size | CHUNK_MAP_LARGE | (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_FLAGS_MASK); - arena_run_trim_tail(arena, chunk, run, (npages << PAGE_SHIFT), - ((past - run_ind) << PAGE_SHIFT), false); + arena_run_trim_tail(arena, chunk, run, (npages << LG_PAGE), + ((past - run_ind) << LG_PAGE), false); /* npages = past - run_ind; */ } arena_run_dalloc(arena, run, true); @@ -1615,9 +1606,9 @@ arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, arena_bin_t *bin; size_t size; - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - - (mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT)); + (mapelm->bits >> LG_PAGE)) << LG_PAGE)); bin = run->bin; size_t binind = arena_bin_index(arena, bin); arena_bin_info_t *bin_info = &arena_bin_info[binind]; @@ -1692,8 +1683,7 @@ arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr) { if (config_fill || config_stats) { - size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> - PAGE_SHIFT; + size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; size_t size = chunk->map[pageind-map_bias].bits & ~PAGE_MASK; if (config_fill && config_stats && opt_junk) @@ -1701,8 +1691,8 @@ arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr) if (config_stats) { arena->stats.ndalloc_large++; arena->stats.allocated_large -= size; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].ndalloc++; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns--; + arena->stats.lstats[(size >> LG_PAGE) - 1].ndalloc++; + arena->stats.lstats[(size >> LG_PAGE) - 1].curruns--; } } @@ -1726,15 +1716,15 @@ arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr, if (config_stats) { arena->stats.ndalloc_large++; arena->stats.allocated_large -= oldsize; - arena->stats.lstats[(oldsize >> PAGE_SHIFT) - 1].ndalloc++; - arena->stats.lstats[(oldsize >> PAGE_SHIFT) - 1].curruns--; + arena->stats.lstats[(oldsize >> LG_PAGE) - 1].ndalloc++; + arena->stats.lstats[(oldsize >> LG_PAGE) - 1].curruns--; arena->stats.nmalloc_large++; arena->stats.nrequests_large++; arena->stats.allocated_large += size; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nmalloc++; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++; + arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++; + arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++; + arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++; } malloc_mutex_unlock(&arena->lock); } @@ -1743,8 +1733,8 @@ static bool arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr, size_t oldsize, size_t size, size_t extra, bool zero) { - size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; - size_t npages = oldsize >> PAGE_SHIFT; + size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; + size_t npages = oldsize >> LG_PAGE; size_t followsize; assert(oldsize == (chunk->map[pageind-map_bias].bits & ~PAGE_MASK)); @@ -1766,10 +1756,10 @@ arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr, size_t splitsize = (oldsize + followsize <= size + extra) ? followsize : size + extra - oldsize; arena_run_split(arena, (arena_run_t *)((uintptr_t)chunk + - ((pageind+npages) << PAGE_SHIFT)), splitsize, true, zero); + ((pageind+npages) << LG_PAGE)), splitsize, true, zero); size = oldsize + splitsize; - npages = size >> PAGE_SHIFT; + npages = size >> LG_PAGE; /* * Mark the extended run as dirty if either portion of the run @@ -1791,18 +1781,18 @@ arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr, if (config_stats) { arena->stats.ndalloc_large++; arena->stats.allocated_large -= oldsize; - arena->stats.lstats[(oldsize >> PAGE_SHIFT) + arena->stats.lstats[(oldsize >> LG_PAGE) - 1].ndalloc++; - arena->stats.lstats[(oldsize >> PAGE_SHIFT) + arena->stats.lstats[(oldsize >> LG_PAGE) - 1].curruns--; arena->stats.nmalloc_large++; arena->stats.nrequests_large++; arena->stats.allocated_large += size; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nmalloc++; - arena->stats.lstats[(size >> PAGE_SHIFT) + arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++; + arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++; + arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++; } malloc_mutex_unlock(&arena->lock); return (false); @@ -2023,7 +2013,7 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) uint32_t try_ctx0_offset, good_ctx0_offset; uint32_t try_reg0_offset, good_reg0_offset; - assert(min_run_size >= PAGE_SIZE); + assert(min_run_size >= PAGE); assert(min_run_size <= arena_maxclass); /* @@ -2076,7 +2066,7 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) good_reg0_offset = try_reg0_offset; /* Try more aggressive settings. */ - try_run_size += PAGE_SIZE; + try_run_size += PAGE; try_nregs = ((try_run_size - sizeof(arena_run_t)) / bin_info->reg_size) + 1; /* Counter-act try_nregs-- in loop. */ @@ -2127,7 +2117,7 @@ static void bin_info_init(void) { arena_bin_info_t *bin_info; - size_t prev_run_size = PAGE_SIZE; + size_t prev_run_size = PAGE; #define SIZE_CLASS(bin, delta, size) \ bin_info = &arena_bin_info[bin]; \ @@ -2158,14 +2148,14 @@ arena_boot(void) */ map_bias = 0; for (i = 0; i < 3; i++) { - header_size = offsetof(arena_chunk_t, map) - + (sizeof(arena_chunk_map_t) * (chunk_npages-map_bias)); - map_bias = (header_size >> PAGE_SHIFT) + ((header_size & - PAGE_MASK) != 0); + header_size = offsetof(arena_chunk_t, map) + + (sizeof(arena_chunk_map_t) * (chunk_npages-map_bias)); + map_bias = (header_size >> LG_PAGE) + ((header_size & PAGE_MASK) + != 0); } assert(map_bias > 0); - arena_maxclass = chunksize - (map_bias << PAGE_SHIFT); + arena_maxclass = chunksize - (map_bias << LG_PAGE); bin_info_init(); } diff --git a/src/chunk.c b/src/chunk.c index f50e8409..8fcd61e4 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -105,9 +105,9 @@ chunk_boot0(void) /* Set variables according to the value of opt_lg_chunk. */ chunksize = (ZU(1) << opt_lg_chunk); - assert(chunksize >= PAGE_SIZE); + assert(chunksize >= PAGE); chunksize_mask = chunksize - 1; - chunk_npages = (chunksize >> PAGE_SHIFT); + chunk_npages = (chunksize >> LG_PAGE); if (config_stats || config_prof) { if (malloc_mutex_init(&chunks_mtx)) diff --git a/src/ctl.c b/src/ctl.c index 08011616..2afca51a 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -83,8 +83,7 @@ INDEX_PROTO(arenas_lrun_i) CTL_PROTO(arenas_narenas) CTL_PROTO(arenas_initialized) CTL_PROTO(arenas_quantum) -CTL_PROTO(arenas_pagesize) -CTL_PROTO(arenas_chunksize) +CTL_PROTO(arenas_page) CTL_PROTO(arenas_tcache_max) CTL_PROTO(arenas_nbins) CTL_PROTO(arenas_nhbins) @@ -227,8 +226,7 @@ static const ctl_node_t arenas_node[] = { {NAME("narenas"), CTL(arenas_narenas)}, {NAME("initialized"), CTL(arenas_initialized)}, {NAME("quantum"), CTL(arenas_quantum)}, - {NAME("pagesize"), CTL(arenas_pagesize)}, - {NAME("chunksize"), CTL(arenas_chunksize)}, + {NAME("page"), CTL(arenas_page)}, {NAME("tcache_max"), CTL(arenas_tcache_max)}, {NAME("nbins"), CTL(arenas_nbins)}, {NAME("nhbins"), CTL(arenas_nhbins)}, @@ -520,7 +518,7 @@ ctl_refresh(void) + ctl_stats.arenas[narenas].astats.allocated_large + ctl_stats.huge.allocated; ctl_stats.active = (ctl_stats.arenas[narenas].pactive << - PAGE_SHIFT) + ctl_stats.huge.allocated; + LG_PAGE) + ctl_stats.huge.allocated; ctl_stats.mapped = (ctl_stats.chunks.current << opt_lg_chunk); } @@ -1116,7 +1114,7 @@ arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i) return (super_arenas_bin_i_node); } -CTL_RO_NL_GEN(arenas_lrun_i_size, ((mib[2]+1) << PAGE_SHIFT), size_t) +CTL_RO_NL_GEN(arenas_lrun_i_size, ((mib[2]+1) << LG_PAGE), size_t) const ctl_node_t * arenas_lrun_i_index(const size_t *mib, size_t miblen, size_t i) { @@ -1155,8 +1153,7 @@ RETURN: } CTL_RO_NL_GEN(arenas_quantum, QUANTUM, size_t) -CTL_RO_NL_GEN(arenas_pagesize, PAGE_SIZE, size_t) -CTL_RO_NL_GEN(arenas_chunksize, chunksize, size_t) +CTL_RO_NL_GEN(arenas_page, PAGE, size_t) CTL_RO_NL_CGEN(config_tcache, arenas_tcache_max, tcache_maxclass, size_t) CTL_RO_NL_GEN(arenas_nbins, NBINS, unsigned) CTL_RO_NL_CGEN(config_tcache, arenas_nhbins, nhbins, unsigned) diff --git a/src/jemalloc.c b/src/jemalloc.c index 1deabcd9..c7019220 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -25,12 +25,6 @@ bool opt_xmalloc = false; bool opt_zero = false; size_t opt_narenas = 0; -#ifdef DYNAMIC_PAGE_SHIFT -size_t pagesize; -size_t pagesize_mask; -size_t lg_pagesize; -#endif - unsigned ncpus; malloc_mutex_t arenas_lock; @@ -477,7 +471,7 @@ malloc_conf_init(void) * Chunks always require at least one * header page, * plus one data page. */ - CONF_HANDLE_SIZE_T(opt_lg_chunk, lg_chunk, PAGE_SHIFT+1, + CONF_HANDLE_SIZE_T(opt_lg_chunk, lg_chunk, LG_PAGE+1, (sizeof(size_t) << 3) - 1) CONF_HANDLE_SIZE_T(opt_narenas, narenas, 1, SIZE_T_MAX) CONF_HANDLE_SSIZE_T(opt_lg_dirty_mult, lg_dirty_mult, @@ -550,25 +544,6 @@ malloc_init_hard(void) #endif malloc_initializer = INITIALIZER; -#ifdef DYNAMIC_PAGE_SHIFT - /* Get page size. */ - { - long result; - - result = sysconf(_SC_PAGESIZE); - assert(result != -1); - pagesize = (size_t)result; - - /* - * We assume that pagesize is a power of 2 when calculating - * pagesize_mask and lg_pagesize. - */ - assert(((result - 1) & result) == 0); - pagesize_mask = result - 1; - lg_pagesize = ffs((int)result) - 1; - } -#endif - malloc_tsd_boot(); if (config_prof) prof_boot0(); @@ -1145,7 +1120,7 @@ void * je_valloc(size_t size) { void *ret JEMALLOC_CC_SILENCE_INIT(NULL); - imemalign(&ret, PAGE_SIZE, size, 1); + imemalign(&ret, PAGE, size, 1); return (ret); } #endif @@ -1386,7 +1361,7 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) alignment, zero, no_move); if (q == NULL) goto ERR; - if (max_usize < PAGE_SIZE) { + if (max_usize < PAGE) { usize = max_usize; arena_prof_promoted(q, usize); } else diff --git a/src/prof.c b/src/prof.c index bc21d894..d2532ec1 100644 --- a/src/prof.c +++ b/src/prof.c @@ -1179,7 +1179,7 @@ prof_boot1(void) prof_interval = 0; } - prof_promote = (opt_prof && opt_lg_prof_sample > PAGE_SHIFT); + prof_promote = (opt_prof && opt_lg_prof_sample > LG_PAGE); } bool diff --git a/src/stats.c b/src/stats.c index f494974b..ca6d408c 100644 --- a/src/stats.c +++ b/src/stats.c @@ -412,6 +412,9 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, CTL_GET("arenas.quantum", &sv, size_t); malloc_cprintf(write_cb, cbopaque, "Quantum size: %zu\n", sv); + CTL_GET("arenas.page", &sv, size_t); + malloc_cprintf(write_cb, cbopaque, "Page size: %zu\n", sv); + CTL_GET("opt.lg_dirty_mult", &ssv, ssize_t); if (ssv >= 0) { malloc_cprintf(write_cb, cbopaque, diff --git a/src/tcache.c b/src/tcache.c index bc911a6e..6a7f17bf 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -72,7 +72,7 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem, chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk->arena == arena) { size_t pageind = ((uintptr_t)ptr - - (uintptr_t)chunk) >> PAGE_SHIFT; + (uintptr_t)chunk) >> LG_PAGE; arena_chunk_map_t *mapelm = &chunk->map[pageind-map_bias]; arena_dalloc_bin(arena, chunk, ptr, mapelm); @@ -303,11 +303,11 @@ tcache_destroy(tcache_t *tcache) arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache); arena_t *arena = chunk->arena; size_t pageind = ((uintptr_t)tcache - (uintptr_t)chunk) >> - PAGE_SHIFT; + LG_PAGE; arena_chunk_map_t *mapelm = &chunk->map[pageind-map_bias]; arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)((pageind - (mapelm->bits >> PAGE_SHIFT)) << - PAGE_SHIFT)); + (uintptr_t)((pageind - (mapelm->bits >> LG_PAGE)) << + LG_PAGE)); arena_bin_t *bin = run->bin; malloc_mutex_lock(&bin->lock); @@ -398,7 +398,7 @@ tcache_boot0(void) else tcache_maxclass = (1U << opt_lg_tcache_max); - nhbins = NBINS + (tcache_maxclass >> PAGE_SHIFT); + nhbins = NBINS + (tcache_maxclass >> LG_PAGE); /* Initialize tcache_bin_info. */ tcache_bin_info = (tcache_bin_info_t *)base_alloc(nhbins * diff --git a/src/zone.c b/src/zone.c index 6c1e415b..a50c129c 100644 --- a/src/zone.c +++ b/src/zone.c @@ -78,7 +78,7 @@ zone_valloc(malloc_zone_t *zone, size_t size) { void *ret = NULL; /* Assignment avoids useless compiler warning. */ - je_posix_memalign(&ret, PAGE_SIZE, size); + je_posix_memalign(&ret, PAGE, size); return (ret); } From 9d4d76874d888986e611bd9c9a0c905841956c4d Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 2 Apr 2012 07:15:42 -0700 Subject: [PATCH 0123/3378] Finish renaming "arenas.pagesize" to "arenas.page". --- src/stats.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/src/stats.c b/src/stats.c index ca6d408c..83baf568 100644 --- a/src/stats.c +++ b/src/stats.c @@ -57,11 +57,11 @@ static void stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, unsigned i) { - size_t pagesize; + size_t page; bool config_tcache; unsigned nbins, j, gap_start; - CTL_GET("arenas.pagesize", &pagesize, size_t); + CTL_GET("arenas.page", &page, size_t); CTL_GET("config.tcache", &config_tcache, bool); if (config_tcache) { @@ -129,7 +129,7 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, " %12"PRIu64" %12"PRIu64" %12"PRIu64 " %12"PRIu64" %12"PRIu64" %12"PRIu64 " %12zu\n", - j, reg_size, nregs, run_size / pagesize, + j, reg_size, nregs, run_size / page, allocated, nmalloc, ndalloc, nrequests, nfills, nflushes, nruns, reruns, curruns); } else { @@ -137,7 +137,7 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, "%13u %5zu %4u %3zu %12zu %12"PRIu64 " %12"PRIu64" %12"PRIu64" %12"PRIu64 " %12zu\n", - j, reg_size, nregs, run_size / pagesize, + j, reg_size, nregs, run_size / page, allocated, nmalloc, ndalloc, nruns, reruns, curruns); } @@ -159,10 +159,10 @@ static void stats_arena_lruns_print(void (*write_cb)(void *, const char *), void *cbopaque, unsigned i) { - size_t pagesize, nlruns, j; + size_t page, nlruns, j; ssize_t gap_start; - CTL_GET("arenas.pagesize", &pagesize, size_t); + CTL_GET("arenas.page", &page, size_t); malloc_cprintf(write_cb, cbopaque, "large: size pages nmalloc ndalloc nrequests" @@ -193,7 +193,7 @@ stats_arena_lruns_print(void (*write_cb)(void *, const char *), void *cbopaque, malloc_cprintf(write_cb, cbopaque, "%13zu %5zu %12"PRIu64" %12"PRIu64" %12"PRIu64 " %12zu\n", - run_size, run_size / pagesize, nmalloc, ndalloc, + run_size, run_size / page, nmalloc, ndalloc, nrequests, curruns); } } @@ -206,14 +206,14 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, unsigned i, bool bins, bool large) { unsigned nthreads; - size_t pagesize, pactive, pdirty, mapped; + size_t page, pactive, pdirty, mapped; uint64_t npurge, nmadvise, purged; size_t small_allocated; uint64_t small_nmalloc, small_ndalloc, small_nrequests; size_t large_allocated; uint64_t large_nmalloc, large_ndalloc, large_nrequests; - CTL_GET("arenas.pagesize", &pagesize, size_t); + CTL_GET("arenas.page", &page, size_t); CTL_I_GET("stats.arenas.0.nthreads", &nthreads, unsigned); malloc_cprintf(write_cb, cbopaque, @@ -251,8 +251,7 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, small_nmalloc + large_nmalloc, small_ndalloc + large_ndalloc, small_nrequests + large_nrequests); - malloc_cprintf(write_cb, cbopaque, "active: %12zu\n", - pactive * pagesize ); + malloc_cprintf(write_cb, cbopaque, "active: %12zu\n", pactive * page); CTL_I_GET("stats.arenas.0.mapped", &mapped, size_t); malloc_cprintf(write_cb, cbopaque, "mapped: %12zu\n", mapped); From 12a6845b6c91cf76caf3199495d76b16bba1f2fe Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 3 Apr 2012 13:20:21 -0700 Subject: [PATCH 0124/3378] Use $((...)) instead of expr. Use $((...)) for math in size_classes.h rather than expr, because it is much faster. This is not supported syntax in the classic Bourne shell, but all modern sh implementations support it, including bash, zsh, and ash. --- include/jemalloc/internal/size_classes.sh | 30 +++++++++++------------ 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/include/jemalloc/internal/size_classes.sh b/include/jemalloc/internal/size_classes.sh index 9829a2b3..3d236136 100755 --- a/include/jemalloc/internal/size_classes.sh +++ b/include/jemalloc/internal/size_classes.sh @@ -17,8 +17,8 @@ pow2() { e=$1 pow2_result=1 while [ ${e} -gt 0 ] ; do - pow2_result=`expr ${pow2_result} + ${pow2_result}` - e=`expr ${e} - 1` + pow2_result=$((${pow2_result} + ${pow2_result})) + e=$((${e} - 1)) done } @@ -45,7 +45,7 @@ EOF bin=0 psz=0 sz=${t} - delta=`expr ${sz} - ${psz}` + delta=$((${sz} - ${psz})) cat < Date: Tue, 3 Apr 2012 01:33:55 -0700 Subject: [PATCH 0125/3378] Remove obsolete "config.dynamic_page_shift" mallctl documentation. --- doc/jemalloc.xml.in | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 28760b21..7a2d033d 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -630,16 +630,6 @@ for (i = 0; i < nbins; i++) { build configuration. - - - config.dynamic_page_shift - (bool) - r- - - was - specified during build configuration. - - config.fill From 633aaff96787db82c06d35baf012de197a1a1902 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 3 Apr 2012 08:47:07 -0700 Subject: [PATCH 0126/3378] Postpone mutex initialization on FreeBSD. Postpone mutex initialization on FreeBSD until after base allocation is safe. --- include/jemalloc/internal/mutex.h | 32 +++++++++++++++++++++-------- src/jemalloc.c | 5 +++++ src/mutex.c | 34 +++++++++++++++++++++++++++---- 3 files changed, 58 insertions(+), 13 deletions(-) diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h index ad4f9c24..c46feee3 100644 --- a/include/jemalloc/internal/mutex.h +++ b/include/jemalloc/internal/mutex.h @@ -1,18 +1,20 @@ /******************************************************************************/ #ifdef JEMALLOC_H_TYPES +typedef struct malloc_mutex_s malloc_mutex_t; + #ifdef JEMALLOC_OSSPIN -typedef OSSpinLock malloc_mutex_t; -#define MALLOC_MUTEX_INITIALIZER 0 +#define MALLOC_MUTEX_INITIALIZER {0} +#elif (defined(JEMALLOC_MUTEX_INIT_CB)) +#define MALLOC_MUTEX_INITIALIZER {PTHREAD_MUTEX_INITIALIZER, NULL} #else -typedef pthread_mutex_t malloc_mutex_t; # if (defined(PTHREAD_MUTEX_ADAPTIVE_NP) && \ defined(PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP)) # define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_ADAPTIVE_NP -# define MALLOC_MUTEX_INITIALIZER PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP +# define MALLOC_MUTEX_INITIALIZER {PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP} # else # define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_DEFAULT -# define MALLOC_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER +# define MALLOC_MUTEX_INITIALIZER {PTHREAD_MUTEX_INITIALIZER} # endif #endif @@ -20,6 +22,17 @@ typedef pthread_mutex_t malloc_mutex_t; /******************************************************************************/ #ifdef JEMALLOC_H_STRUCTS +struct malloc_mutex_s { +#ifdef JEMALLOC_OSSPIN + OSSpinLock lock; +#elif (defined(JEMALLOC_MUTEX_INIT_CB)) + pthread_mutex_t lock; + malloc_mutex_t *postponed_next; +#else + pthread_mutex_t lock; +#endif +}; + #endif /* JEMALLOC_H_STRUCTS */ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS @@ -34,6 +47,7 @@ bool malloc_mutex_init(malloc_mutex_t *mutex); void malloc_mutex_prefork(malloc_mutex_t *mutex); void malloc_mutex_postfork_parent(malloc_mutex_t *mutex); void malloc_mutex_postfork_child(malloc_mutex_t *mutex); +bool mutex_boot(void); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ @@ -51,9 +65,9 @@ malloc_mutex_lock(malloc_mutex_t *mutex) if (isthreaded) { #ifdef JEMALLOC_OSSPIN - OSSpinLockLock(mutex); + OSSpinLockLock(&mutex->lock); #else - pthread_mutex_lock(mutex); + pthread_mutex_lock(&mutex->lock); #endif } } @@ -64,9 +78,9 @@ malloc_mutex_unlock(malloc_mutex_t *mutex) if (isthreaded) { #ifdef JEMALLOC_OSSPIN - OSSpinLockUnlock(mutex); + OSSpinLockUnlock(&mutex->lock); #else - pthread_mutex_unlock(mutex); + pthread_mutex_unlock(&mutex->lock); #endif } } diff --git a/src/jemalloc.c b/src/jemalloc.c index c7019220..a6d2df57 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -651,6 +651,11 @@ malloc_init_hard(void) return (true); } + if (mutex_boot()) { + malloc_mutex_unlock(&init_lock); + return (true); + } + if (opt_narenas == 0) { /* * For SMP systems, create more than one arena per CPU by diff --git a/src/mutex.c b/src/mutex.c index 0b20bbf3..4b8ce570 100644 --- a/src/mutex.c +++ b/src/mutex.c @@ -11,6 +11,10 @@ #ifdef JEMALLOC_LAZY_LOCK bool isthreaded = false; #endif +#ifdef JEMALLOC_MUTEX_INIT_CB +static bool postpone_init = true; +static malloc_mutex_t *postponed_mutexes = NULL; +#endif #ifdef JEMALLOC_LAZY_LOCK static void pthread_create_once(void); @@ -65,17 +69,23 @@ bool malloc_mutex_init(malloc_mutex_t *mutex) { #ifdef JEMALLOC_OSSPIN - *mutex = 0; + mutex->lock = 0; #elif (defined(JEMALLOC_MUTEX_INIT_CB)) - if (_pthread_mutex_init_calloc_cb(mutex, base_calloc) != 0) - return (true); + if (postpone_init) { + mutex->postponed_next = postponed_mutexes; + postponed_mutexes = mutex; + } else { + if (_pthread_mutex_init_calloc_cb(&mutex->lock, base_calloc) != + 0) + return (true); + } #else pthread_mutexattr_t attr; if (pthread_mutexattr_init(&attr) != 0) return (true); pthread_mutexattr_settype(&attr, MALLOC_MUTEX_TYPE); - if (pthread_mutex_init(mutex, &attr) != 0) { + if (pthread_mutex_init(&mutex->lock, &attr) != 0) { pthread_mutexattr_destroy(&attr); return (true); } @@ -114,3 +124,19 @@ malloc_mutex_postfork_child(malloc_mutex_t *mutex) } #endif } + +bool +mutex_boot(void) +{ + +#ifdef JEMALLOC_MUTEX_INIT_CB + postpone_init = false; + while (postponed_mutexes != NULL) { + if (_pthread_mutex_init_calloc_cb(&postponed_mutexes->lock, + base_calloc) != 0) + return (true); + postponed_mutexes = postponed_mutexes->postponed_next; + } +#endif + return (false); +} From 01b3fe55ff3ac8e4aa689f09fcb0729da8037638 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 3 Apr 2012 09:28:00 -0700 Subject: [PATCH 0127/3378] Add a0malloc(), a0calloc(), and a0free(). Add a0malloc(), a0calloc(), and a0free(), which are used by FreeBSD's libc to allocate/deallocate TLS in static binaries. --- include/jemalloc/internal/arena.h | 36 +++++++------ .../jemalloc/internal/jemalloc_internal.h.in | 19 ++++--- include/jemalloc/internal/tcache.h | 2 +- src/arena.c | 6 +-- src/ctl.c | 2 +- src/jemalloc.c | 53 ++++++++++++++++++- 6 files changed, 89 insertions(+), 29 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 41df11fd..03e3f3ce 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -373,7 +373,7 @@ void arena_stats_merge(arena_t *arena, size_t *nactive, size_t *ndirty, void *arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, bool zero); void *arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, - size_t alignment, bool zero); + size_t alignment, bool zero, bool try_tcache); bool arena_new(arena_t *arena, unsigned ind); void arena_boot(void); void arena_prefork(arena_t *arena); @@ -390,9 +390,10 @@ unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr); prof_ctx_t *arena_prof_ctx_get(const void *ptr); void arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx); -void *arena_malloc(size_t size, bool zero); +void *arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache); void *arena_malloc_prechosen(arena_t *arena, size_t size, bool zero); -void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr); +void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, + bool try_tcache); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) @@ -548,7 +549,7 @@ arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) } JEMALLOC_INLINE void * -arena_malloc(size_t size, bool zero) +arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache) { tcache_t *tcache; @@ -556,20 +557,24 @@ arena_malloc(size_t size, bool zero) assert(size <= arena_maxclass); if (size <= SMALL_MAXCLASS) { - if ((tcache = tcache_get(true)) != NULL) + if (try_tcache && (tcache = tcache_get(true)) != NULL) return (tcache_alloc_small(tcache, size, zero)); - else - return (arena_malloc_small(choose_arena(), size, zero)); + else { + return (arena_malloc_small(choose_arena(arena), size, + zero)); + } } else { /* * Initialize tcache after checking size in order to avoid * infinite recursion during tcache initialization. */ - if (size <= tcache_maxclass && (tcache = tcache_get(true)) != - NULL) + if (try_tcache && size <= tcache_maxclass && (tcache = + tcache_get(true)) != NULL) return (tcache_alloc_large(tcache, size, zero)); - else - return (arena_malloc_large(choose_arena(), size, zero)); + else { + return (arena_malloc_large(choose_arena(arena), size, + zero)); + } } } @@ -587,11 +592,11 @@ arena_malloc_prechosen(arena_t *arena, size_t size, bool zero) } JEMALLOC_INLINE void -arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr) +arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, bool try_tcache) { size_t pageind; arena_chunk_map_t *mapelm; - tcache_t *tcache = tcache_get(false); + tcache_t *tcache; assert(arena != NULL); assert(chunk->arena == arena); @@ -603,7 +608,7 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr) assert((mapelm->bits & CHUNK_MAP_ALLOCATED) != 0); if ((mapelm->bits & CHUNK_MAP_LARGE) == 0) { /* Small allocation. */ - if (tcache != NULL) + if (try_tcache && (tcache = tcache_get(false)) != NULL) tcache_dalloc_small(tcache, ptr); else { arena_run_t *run; @@ -630,7 +635,8 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr) assert(((uintptr_t)ptr & PAGE_MASK) == 0); - if (size <= tcache_maxclass && tcache != NULL) { + if (try_tcache && size <= tcache_maxclass && (tcache = + tcache_get(false)) != NULL) { tcache_dalloc_large(tcache, ptr, size); } else { malloc_mutex_lock(&arena->lock); diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 0c22bfb7..c8e40198 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -399,7 +399,7 @@ malloc_tsd_protos(JEMALLOC_ATTR(unused), arenas, arena_t *) size_t s2u(size_t size); size_t sa2u(size_t size, size_t alignment, size_t *run_size_p); -arena_t *choose_arena(void); +arena_t *choose_arena(arena_t *arena); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) @@ -517,10 +517,13 @@ sa2u(size_t size, size_t alignment, size_t *run_size_p) /* Choose an arena based on a per-thread value. */ JEMALLOC_INLINE arena_t * -choose_arena(void) +choose_arena(arena_t *arena) { arena_t *ret; + if (arena != NULL) + return (arena); + if ((ret = *arenas_tsd_get()) == NULL) { ret = choose_arena_hard(); assert(ret != NULL); @@ -556,7 +559,7 @@ imalloc(size_t size) assert(size != 0); if (size <= arena_maxclass) - return (arena_malloc(size, false)); + return (arena_malloc(NULL, size, false, true)); else return (huge_malloc(size, false)); } @@ -566,7 +569,7 @@ icalloc(size_t size) { if (size <= arena_maxclass) - return (arena_malloc(size, true)); + return (arena_malloc(NULL, size, true, true)); else return (huge_malloc(size, true)); } @@ -580,7 +583,7 @@ ipalloc(size_t usize, size_t alignment, bool zero) assert(usize == sa2u(usize, alignment, NULL)); if (usize <= arena_maxclass && alignment <= PAGE) - ret = arena_malloc(usize, zero); + ret = arena_malloc(NULL, usize, zero, true); else { size_t run_size JEMALLOC_CC_SILENCE_INIT(0); @@ -594,7 +597,7 @@ ipalloc(size_t usize, size_t alignment, bool zero) */ sa2u(usize, alignment, &run_size); if (run_size <= arena_maxclass) { - ret = arena_palloc(choose_arena(), usize, run_size, + ret = arena_palloc(choose_arena(NULL), usize, run_size, alignment, zero); } else if (alignment <= chunksize) ret = huge_malloc(usize, zero); @@ -647,7 +650,7 @@ idalloc(void *ptr) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk != ptr) - arena_dalloc(chunk->arena, chunk, ptr); + arena_dalloc(chunk->arena, chunk, ptr, true); else huge_dalloc(ptr, true); } @@ -711,7 +714,7 @@ iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, } else { if (size + extra <= arena_maxclass) { return (arena_ralloc(ptr, oldsize, size, extra, - alignment, zero)); + alignment, zero, true)); } else { return (huge_ralloc(ptr, oldsize, size, extra, alignment, zero)); diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index a1d9aae3..93e721d5 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -237,7 +237,7 @@ tcache_get(bool create) tcache_enabled_set(false); /* Memoize. */ return (NULL); } - return (tcache_create(choose_arena())); + return (tcache_create(choose_arena(NULL))); } if (tcache == TCACHE_STATE_PURGATORY) { /* diff --git a/src/arena.c b/src/arena.c index b7e14228..64440996 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1888,7 +1888,7 @@ arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, void * arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, - size_t alignment, bool zero) + size_t alignment, bool zero, bool try_tcache) { void *ret; size_t copysize; @@ -1909,7 +1909,7 @@ arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, return (NULL); ret = ipalloc(usize, alignment, zero); } else - ret = arena_malloc(size + extra, zero); + ret = arena_malloc(NULL, size + extra, zero, try_tcache); if (ret == NULL) { if (extra == 0) @@ -1921,7 +1921,7 @@ arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, return (NULL); ret = ipalloc(usize, alignment, zero); } else - ret = arena_malloc(size, zero); + ret = arena_malloc(NULL, size, zero, try_tcache); if (ret == NULL) return (NULL); diff --git a/src/ctl.c b/src/ctl.c index 2afca51a..6777688a 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -1016,7 +1016,7 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, int ret; unsigned newind, oldind; - newind = oldind = choose_arena()->ind; + newind = oldind = choose_arena(NULL)->ind; WRITE(newind, unsigned); READ(oldind, unsigned); if (newind != oldind) { diff --git a/src/jemalloc.c b/src/jemalloc.c index a6d2df57..690cf082 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1487,7 +1487,6 @@ je_nallocm(size_t *rsize, size_t size, int flags) * End experimental functions. */ /******************************************************************************/ - /* * The following functions are used by threading libraries for protection of * malloc during fork(). @@ -1552,3 +1551,55 @@ jemalloc_postfork_child(void) } /******************************************************************************/ +/* + * The following functions are used for TLS allocation/deallocation in static + * binaries on FreeBSD. The primary difference between these and i[mcd]alloc() + * is that these avoid accessing TLS variables. + */ + +static void * +a0alloc(size_t size, bool zero) +{ + + if (malloc_init()) + return (NULL); + + if (size == 0) + size = 1; + + if (size <= arena_maxclass) + return (arena_malloc(arenas[0], size, zero, false)); + else + return (huge_malloc(size, zero)); +} + +void * +a0malloc(size_t size) +{ + + return (a0alloc(size, false)); +} + +void * +a0calloc(size_t num, size_t size) +{ + + return (a0alloc(num * size, true)); +} + +void +a0free(void *ptr) +{ + arena_chunk_t *chunk; + + if (ptr == NULL) + return; + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + if (chunk != ptr) + arena_dalloc(chunk->arena, chunk, ptr, false); + else + huge_dalloc(ptr, true); +} + +/******************************************************************************/ From 3cc1f1aa6981d6647aa01cec725fb2c134c1b0e9 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 3 Apr 2012 22:30:05 -0700 Subject: [PATCH 0128/3378] Add tls_model configuration. The tls_model attribute isn't supporte by clang (yet?), so add a configure test that defines JEMALLOC_TLS_MODEL appropriately. --- configure.ac | 17 ++++++++++++++++- include/jemalloc/internal/tsd.h | 6 +++--- include/jemalloc/jemalloc_defs.h.in | 3 +++ 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/configure.ac b/configure.ac index c1b46dc3..e704d2de 100644 --- a/configure.ac +++ b/configure.ac @@ -285,6 +285,21 @@ if test "x${je_cv_attribute}" = "xyes" ; then JE_CFLAGS_APPEND([-fvisibility=hidden]) fi fi +dnl Check for tls_model attribute support (clang 3.0 still lacks support). +SAVED_CFLAGS="${CFLAGS}" +JE_CFLAGS_APPEND([-Werror]) +JE_COMPILABLE([tls_model attribute], [], + [static __thread int + __attribute__((tls_model("initial-exec"))) foo; + foo = 0;], + [je_cv_tls_model]) +CFLAGS="${SAVED_CFLAGS}" +if test "x${je_cv_tls_model}" = "xyes" ; then + AC_DEFINE([JEMALLOC_TLS_MODEL], + [__attribute__((tls_model("initial-exec")))]) +else + AC_DEFINE([JEMALLOC_TLS_MODEL], [ ]) +fi JE_COMPILABLE([mremap(...MREMAP_FIXED...)], [ #define _GNU_SOURCE @@ -719,7 +734,7 @@ AC_CACHE_CHECK([STATIC_PAGE_SHIFT], return 1; } fprintf(f, "%u\n", ffs((int)result) - 1); - close(f); + fclose(f); return 0; ]])], diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h index 60aaa427..35ae5e3c 100644 --- a/include/jemalloc/internal/tsd.h +++ b/include/jemalloc/internal/tsd.h @@ -87,14 +87,14 @@ extern bool a_name##_booted; /* malloc_tsd_data(). */ #ifdef JEMALLOC_MALLOC_THREAD_CLEANUP #define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ -a_attr __thread a_type JEMALLOC_ATTR(tls_model("initial-exec")) \ +a_attr __thread a_type JEMALLOC_TLS_MODEL \ a_name##_tls = a_initializer; \ -a_attr __thread bool JEMALLOC_ATTR(tls_model("initial-exec")) \ +a_attr __thread bool JEMALLOC_TLS_MODEL \ a_name##_initialized = false; \ a_attr bool a_name##_booted = false; #elif (defined(JEMALLOC_TLS)) #define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ -a_attr __thread a_type JEMALLOC_ATTR(tls_model("initial-exec")) \ +a_attr __thread a_type JEMALLOC_TLS_MODEL \ a_name##_tls = a_initializer; \ a_attr pthread_key_t a_name##_tsd; \ a_attr bool a_name##_booted = false; diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index e4bfa04a..040753a6 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -108,6 +108,9 @@ # define JEMALLOC_ATTR(s) JEMALLOC_CATTR(s,) #endif +/* Non-empty if the tls_model attribute is supported. */ +#undef JEMALLOC_TLS_MODEL + /* JEMALLOC_CC_SILENCE enables code that silences unuseful compiler warnings. */ #undef JEMALLOC_CC_SILENCE From bbe53b1c16d523d3c70cf8e942249f7d76f90e73 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 4 Apr 2012 15:24:01 -0700 Subject: [PATCH 0129/3378] Revert "Use ffsl() in ALLOCM_ALIGN()." This reverts commit 722b370399fd6734de6781285ce9a0cffd547bdd. Unfortunately, glibc requires _GNU_SOURCE to be defined before including string.h, but there is no reliable way to get the prototype within jemalloc.h unless _GNU_SOURCE was already defined. --- include/jemalloc/jemalloc.h.in | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/include/jemalloc/jemalloc.h.in b/include/jemalloc/jemalloc.h.in index 8825a943..f0581dbd 100644 --- a/include/jemalloc/jemalloc.h.in +++ b/include/jemalloc/jemalloc.h.in @@ -4,6 +4,7 @@ extern "C" { #endif +#include #include #define JEMALLOC_VERSION "@jemalloc_version@" @@ -17,7 +18,11 @@ extern "C" { #ifdef JEMALLOC_EXPERIMENTAL #define ALLOCM_LG_ALIGN(la) (la) -#define ALLOCM_ALIGN(a) (ffsl(a)-1) +#if LG_SIZEOF_PTR == 2 +#define ALLOCM_ALIGN(a) (ffs(a)-1) +#else +#define ALLOCM_ALIGN(a) ((a < (size_t)INT_MAX) ? ffs(a)-1 : ffs(a>>32)+31) +#endif #define ALLOCM_ZERO ((int)0x40) #define ALLOCM_NO_MOVE ((int)0x80) From 382132eeacd9311a7a25d5b8f126d82ef453d60d Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 4 Apr 2012 15:25:43 -0700 Subject: [PATCH 0130/3378] Add missing include for ffsl() test. --- configure.ac | 1 + 1 file changed, 1 insertion(+) diff --git a/configure.ac b/configure.ac index e704d2de..8e6a3099 100644 --- a/configure.ac +++ b/configure.ac @@ -874,6 +874,7 @@ dnl Check for ffsl(3), and fail if not found. This function exists on all dnl platforms that jemalloc currently has a chance of functioning on without dnl modification. JE_COMPILABLE([a program using ffsl], [ +#include #include ], [ { From f3ca7c8386f6a21347aed68053117c2c59939551 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 4 Apr 2012 16:16:09 -0700 Subject: [PATCH 0131/3378] Add missing "opt.lg_tcache_max" mallctl implementation. --- src/ctl.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/ctl.c b/src/ctl.c index 6777688a..df7affdc 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -66,6 +66,7 @@ CTL_PROTO(opt_junk) CTL_PROTO(opt_zero) CTL_PROTO(opt_xmalloc) CTL_PROTO(opt_tcache) +CTL_PROTO(opt_lg_tcache_max) CTL_PROTO(opt_prof) CTL_PROTO(opt_prof_prefix) CTL_PROTO(opt_prof_active) @@ -188,6 +189,7 @@ static const ctl_node_t opt_node[] = { {NAME("zero"), CTL(opt_zero)}, {NAME("xmalloc"), CTL(opt_xmalloc)}, {NAME("tcache"), CTL(opt_tcache)}, + {NAME("lg_tcache_max"), CTL(opt_lg_tcache_max)}, {NAME("prof"), CTL(opt_prof)}, {NAME("prof_prefix"), CTL(opt_prof_prefix)}, {NAME("prof_active"), CTL(opt_prof_active)}, @@ -1091,6 +1093,7 @@ CTL_RO_NL_CGEN(config_fill, opt_junk, opt_junk, bool) CTL_RO_NL_CGEN(config_fill, opt_zero, opt_zero, bool) CTL_RO_NL_CGEN(config_xmalloc, opt_xmalloc, opt_xmalloc, bool) CTL_RO_NL_CGEN(config_tcache, opt_tcache, opt_tcache, bool) +CTL_RO_NL_CGEN(config_tcache, opt_lg_tcache_max, opt_lg_tcache_max, ssize_t) CTL_RO_NL_CGEN(config_prof, opt_prof, opt_prof, bool) CTL_RO_NL_CGEN(config_prof, opt_prof_prefix, opt_prof_prefix, const char *) CTL_RO_CGEN(config_prof, opt_prof_active, opt_prof_active, bool) /* Mutable. */ From 02b231205e802a7c4f33899a569adcb1312a85d5 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 5 Apr 2012 11:06:23 -0700 Subject: [PATCH 0132/3378] Fix threaded initialization and enable it on Linux. Reported by Mike Hommey. --- configure.ac | 1 + src/jemalloc.c | 8 +++++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/configure.ac b/configure.ac index 8e6a3099..16f03784 100644 --- a/configure.ac +++ b/configure.ac @@ -228,6 +228,7 @@ case "${host}" in CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE" abi="elf" AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED], [ ]) + AC_DEFINE([JEMALLOC_THREADED_INIT], [ ]) RPATH="-Wl,-rpath," ;; *-*-netbsd*) diff --git a/src/jemalloc.c b/src/jemalloc.c index 690cf082..a531a216 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -36,13 +36,15 @@ static bool malloc_initialized = false; #ifdef JEMALLOC_THREADED_INIT /* Used to let the initializing thread recursively allocate. */ -static pthread_t malloc_initializer = (unsigned long)0; +# define NO_INITIALIZER ((unsigned long)0) # define INITIALIZER pthread_self() # define IS_INITIALIZER (malloc_initializer == pthread_self()) +static pthread_t malloc_initializer = NO_INITIALIZER; #else -static bool malloc_initializer = false; +# define NO_INITIALIZER false # define INITIALIZER true # define IS_INITIALIZER malloc_initializer +static bool malloc_initializer = NO_INITIALIZER; #endif /* Used to avoid initialization races. */ @@ -531,7 +533,7 @@ malloc_init_hard(void) return (false); } #ifdef JEMALLOC_THREADED_INIT - if (IS_INITIALIZER == false) { + if (malloc_initializer != NO_INITIALIZER && IS_INITIALIZER == false) { /* Busy-wait until the initializing thread completes. */ do { malloc_mutex_unlock(&init_lock); From b147611b5253921a873191bb0589d3b18f613946 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 5 Apr 2012 13:36:17 -0700 Subject: [PATCH 0133/3378] Add utrace(2)-based tracing (--enable-utrace). --- INSTALL | 4 ++ configure.ac | 29 ++++++++++++++ doc/jemalloc.xml.in | 25 ++++++++++++ .../jemalloc/internal/jemalloc_internal.h.in | 12 ++++++ include/jemalloc/internal/private_namespace.h | 1 + include/jemalloc/jemalloc_defs.h.in | 3 ++ src/ctl.c | 6 +++ src/jemalloc.c | 38 ++++++++++++++++++- src/stats.c | 1 + 9 files changed, 118 insertions(+), 1 deletion(-) diff --git a/INSTALL b/INSTALL index c0ae106a..8a825df9 100644 --- a/INSTALL +++ b/INSTALL @@ -119,6 +119,10 @@ any of the following arguments (not a definitive list) to 'configure': --disable-experimental Disable support for the experimental API (*allocm()). +--enable-utrace + Enable utrace(2)-based allocation tracing. This feature is not broadly + portable (FreeBSD has it, but Linux and OS X do not). + --enable-xmalloc Enable support for optional immediate termination due to out-of-memory errors, as is commonly implemented by "xmalloc" wrapper function for malloc. diff --git a/configure.ac b/configure.ac index 16f03784..8e94b5c9 100644 --- a/configure.ac +++ b/configure.ac @@ -699,6 +699,34 @@ if test "x$enable_fill" = "x1" ; then fi AC_SUBST([enable_fill]) +dnl Disable utrace(2)-based tracing by default. +AC_ARG_ENABLE([utrace], + [AS_HELP_STRING([--enable-utrace], [Enable utrace(2)-based tracing])], +[if test "x$enable_utrace" = "xno" ; then + enable_utrace="0" +else + enable_utrace="1" +fi +], +[enable_utrace="0"] +) +JE_COMPILABLE([utrace(2)], [ +#include +#include +#include +#include +#include +], [ + utrace((void *)0, 0); +], [je_cv_utrace]) +if test "x${je_cv_utrace}" = "xno" ; then + enable_utrace="0" +fi +if test "x$enable_utrace" = "x1" ; then + AC_DEFINE([JEMALLOC_UTRACE], [ ]) +fi +AC_SUBST([enable_utrace]) + dnl Do not support the xmalloc option by default. AC_ARG_ENABLE([xmalloc], [AS_HELP_STRING([--enable-xmalloc], [Support xmalloc option])], @@ -1061,6 +1089,7 @@ AC_MSG_RESULT([prof-gcc : ${enable_prof_gcc}]) AC_MSG_RESULT([tcache : ${enable_tcache}]) AC_MSG_RESULT([fill : ${enable_fill}]) AC_MSG_RESULT([xmalloc : ${enable_xmalloc}]) +AC_MSG_RESULT([utrace : ${enable_utrace}]) AC_MSG_RESULT([dss : ${enable_dss}]) AC_MSG_RESULT([lazy_lock : ${enable_lazy_lock}]) AC_MSG_RESULT([tls : ${enable_tls}]) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 7a2d033d..8ae82621 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -710,6 +710,16 @@ for (i = 0; i < nbins; i++) { build configuration. + + + config.utrace + (bool) + r- + + was specified during + build configuration. + + config.xmalloc @@ -826,6 +836,19 @@ for (i = 0; i < nbins; i++) { + + + opt.utrace + (bool) + r- + [] + + Allocation tracing based on + utrace + 2 enabled/disabled. This option + is disabled by default. + + opt.xmalloc @@ -1958,6 +1981,8 @@ malloc_conf = "lg_chunk:24";]]> 2, sbrk 2, + utrace + 2, alloca 3, atexit diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index c8e40198..66dd357f 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -36,6 +36,10 @@ #define JEMALLOC_NO_DEMANGLE #include "../jemalloc@install_suffix@.h" +#ifdef JEMALLOC_UTRACE +#include +#endif + #include "jemalloc/internal/private_namespace.h" #ifdef JEMALLOC_CC_SILENCE @@ -114,6 +118,13 @@ static const bool config_tls = false #endif ; +static const bool config_utrace = +#ifdef JEMALLOC_UTRACE + true +#else + false +#endif + ; static const bool config_xmalloc = #ifdef JEMALLOC_XMALLOC true @@ -332,6 +343,7 @@ typedef struct { extern bool opt_abort; extern bool opt_junk; +extern bool opt_utrace; extern bool opt_xmalloc; extern bool opt_zero; extern size_t opt_narenas; diff --git a/include/jemalloc/internal/private_namespace.h b/include/jemalloc/internal/private_namespace.h index ed34e328..de3042eb 100644 --- a/include/jemalloc/internal/private_namespace.h +++ b/include/jemalloc/internal/private_namespace.h @@ -173,6 +173,7 @@ #define opt_prof_leak JEMALLOC_N(opt_prof_leak) #define opt_stats_print JEMALLOC_N(opt_stats_print) #define opt_tcache JEMALLOC_N(opt_tcache) +#define opt_utrace JEMALLOC_N(opt_utrace) #define opt_xmalloc JEMALLOC_N(opt_xmalloc) #define opt_zero JEMALLOC_N(opt_zero) #define pow2_ceil JEMALLOC_N(pow2_ceil) diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index 040753a6..8e7442d6 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -154,6 +154,9 @@ /* Support the experimental API. */ #undef JEMALLOC_EXPERIMENTAL +/* Support utrace(2)-based tracing. */ +#undef JEMALLOC_UTRACE + /* Support optional abort() on OOM. */ #undef JEMALLOC_XMALLOC diff --git a/src/ctl.c b/src/ctl.c index df7affdc..1aaf1971 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -56,6 +56,7 @@ CTL_PROTO(config_prof_libunwind) CTL_PROTO(config_stats) CTL_PROTO(config_tcache) CTL_PROTO(config_tls) +CTL_PROTO(config_utrace) CTL_PROTO(config_xmalloc) CTL_PROTO(opt_abort) CTL_PROTO(opt_lg_chunk) @@ -64,6 +65,7 @@ CTL_PROTO(opt_lg_dirty_mult) CTL_PROTO(opt_stats_print) CTL_PROTO(opt_junk) CTL_PROTO(opt_zero) +CTL_PROTO(opt_utrace) CTL_PROTO(opt_xmalloc) CTL_PROTO(opt_tcache) CTL_PROTO(opt_lg_tcache_max) @@ -176,6 +178,7 @@ static const ctl_node_t config_node[] = { {NAME("stats"), CTL(config_stats)}, {NAME("tcache"), CTL(config_tcache)}, {NAME("tls"), CTL(config_tls)}, + {NAME("utrace"), CTL(config_utrace)}, {NAME("xmalloc"), CTL(config_xmalloc)} }; @@ -187,6 +190,7 @@ static const ctl_node_t opt_node[] = { {NAME("stats_print"), CTL(opt_stats_print)}, {NAME("junk"), CTL(opt_junk)}, {NAME("zero"), CTL(opt_zero)}, + {NAME("utrace"), CTL(opt_utrace)}, {NAME("xmalloc"), CTL(opt_xmalloc)}, {NAME("tcache"), CTL(opt_tcache)}, {NAME("lg_tcache_max"), CTL(opt_lg_tcache_max)}, @@ -1080,6 +1084,7 @@ CTL_RO_BOOL_CONFIG_GEN(config_prof_libunwind) CTL_RO_BOOL_CONFIG_GEN(config_stats) CTL_RO_BOOL_CONFIG_GEN(config_tcache) CTL_RO_BOOL_CONFIG_GEN(config_tls) +CTL_RO_BOOL_CONFIG_GEN(config_utrace) CTL_RO_BOOL_CONFIG_GEN(config_xmalloc) /******************************************************************************/ @@ -1091,6 +1096,7 @@ CTL_RO_NL_GEN(opt_lg_dirty_mult, opt_lg_dirty_mult, ssize_t) CTL_RO_NL_GEN(opt_stats_print, opt_stats_print, bool) CTL_RO_NL_CGEN(config_fill, opt_junk, opt_junk, bool) CTL_RO_NL_CGEN(config_fill, opt_zero, opt_zero, bool) +CTL_RO_NL_CGEN(config_utrace, opt_utrace, opt_utrace, bool) CTL_RO_NL_CGEN(config_xmalloc, opt_xmalloc, opt_xmalloc, bool) CTL_RO_NL_CGEN(config_tcache, opt_tcache, opt_tcache, bool) CTL_RO_NL_CGEN(config_tcache, opt_lg_tcache_max, opt_lg_tcache_max, ssize_t) diff --git a/src/jemalloc.c b/src/jemalloc.c index a531a216..e0510209 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -21,6 +21,7 @@ bool opt_junk = false; bool opt_abort = false; bool opt_junk = false; #endif +bool opt_utrace = false; bool opt_xmalloc = false; bool opt_zero = false; size_t opt_narenas = 0; @@ -50,6 +51,26 @@ static bool malloc_initializer = NO_INITIALIZER; /* Used to avoid initialization races. */ static malloc_mutex_t init_lock = MALLOC_MUTEX_INITIALIZER; +typedef struct { + void *p; /* Input pointer (as in realloc(p, s)). */ + size_t s; /* Request size. */ + void *r; /* Result pointer. */ +} malloc_utrace_t; + +#ifdef JEMALLOC_UTRACE +# define UTRACE(a, b, c) do { \ + if (opt_utrace) { \ + malloc_utrace_t ut; \ + ut.p = (a); \ + ut.s = (b); \ + ut.r = (c); \ + utrace(&ut, sizeof(ut)); \ + } \ +} while (0) +#else +# define UTRACE(a, b, c) +#endif + /******************************************************************************/ /* Function prototypes for non-inline static functions. */ @@ -483,6 +504,9 @@ malloc_conf_init(void) CONF_HANDLE_BOOL(opt_junk, junk) CONF_HANDLE_BOOL(opt_zero, zero) } + if (config_utrace) { + CONF_HANDLE_BOOL(opt_utrace, utrace) + } if (config_xmalloc) { CONF_HANDLE_BOOL(opt_xmalloc, xmalloc) } @@ -759,6 +783,7 @@ OOM: assert(usize == isalloc(ret)); thread_allocated_tsd_get()->allocated += usize; } + UTRACE(0, size, ret); return (ret); } @@ -852,6 +877,7 @@ RETURN: } if (config_prof && opt_prof && result != NULL) prof_malloc(result, usize, cnt); + UTRACE(0, size, result); return (ret); } @@ -951,6 +977,7 @@ RETURN: assert(usize == isalloc(ret)); thread_allocated_tsd_get()->allocated += usize; } + UTRACE(0, num_size, ret); return (ret); } @@ -1075,6 +1102,7 @@ RETURN: ta->allocated += usize; ta->deallocated += old_size; } + UTRACE(ptr, size, ret); return (ret); } @@ -1083,6 +1111,7 @@ void je_free(void *ptr) { + UTRACE(ptr, 0, 0); if (ptr != NULL) { size_t usize; @@ -1310,6 +1339,7 @@ je_allocm(void **ptr, size_t *rsize, size_t size, int flags) assert(usize == isalloc(p)); thread_allocated_tsd_get()->allocated += usize; } + UTRACE(0, size, p); return (ALLOCM_SUCCESS); OOM: if (config_xmalloc && opt_xmalloc) { @@ -1318,6 +1348,7 @@ OOM: abort(); } *ptr = NULL; + UTRACE(0, size, 0); return (ALLOCM_ERR_OOM); } @@ -1404,16 +1435,20 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) ta->allocated += usize; ta->deallocated += old_size; } + UTRACE(p, size, q); return (ALLOCM_SUCCESS); ERR: - if (no_move) + if (no_move) { + UTRACE(p, size, q); return (ALLOCM_ERR_NOT_MOVED); + } OOM: if (config_xmalloc && opt_xmalloc) { malloc_write(": Error in rallocm(): " "out of memory\n"); abort(); } + UTRACE(p, size, 0); return (ALLOCM_ERR_OOM); } @@ -1448,6 +1483,7 @@ je_dallocm(void *ptr, int flags) assert(ptr != NULL); assert(malloc_initialized || IS_INITIALIZER); + UTRACE(ptr, 0, 0); if (config_stats) usize = isalloc(ptr); if (config_prof && opt_prof) { diff --git a/src/stats.c b/src/stats.c index 83baf568..0cd70b0d 100644 --- a/src/stats.c +++ b/src/stats.c @@ -383,6 +383,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, OPT_WRITE_BOOL(stats_print) OPT_WRITE_BOOL(junk) OPT_WRITE_BOOL(zero) + OPT_WRITE_BOOL(utrace) OPT_WRITE_BOOL(xmalloc) OPT_WRITE_BOOL(tcache) OPT_WRITE_SSIZE_T(lg_tcache_max) From fad100bc35efba262f2c98cefc134899d393c734 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 6 Apr 2012 12:24:46 -0700 Subject: [PATCH 0134/3378] Remove arena_malloc_prechosen(). Remove arena_malloc_prechosen(), now that arena_malloc() can be invoked in a way that is semantically equivalent. --- include/jemalloc/internal/arena.h | 14 -------------- src/tsd.c | 2 +- 2 files changed, 1 insertion(+), 15 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 03e3f3ce..d25a2b1d 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -391,7 +391,6 @@ unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, prof_ctx_t *arena_prof_ctx_get(const void *ptr); void arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx); void *arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache); -void *arena_malloc_prechosen(arena_t *arena, size_t size, bool zero); void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, bool try_tcache); #endif @@ -578,19 +577,6 @@ arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache) } } -JEMALLOC_INLINE void * -arena_malloc_prechosen(arena_t *arena, size_t size, bool zero) -{ - - assert(size != 0); - assert(size <= arena_maxclass); - - if (size <= SMALL_MAXCLASS) - return (arena_malloc_small(arena, size, zero)); - else - return (arena_malloc_large(arena, size, zero)); -} - JEMALLOC_INLINE void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, bool try_tcache) { diff --git a/src/tsd.c b/src/tsd.c index 669ea8fc..0838dc86 100644 --- a/src/tsd.c +++ b/src/tsd.c @@ -14,7 +14,7 @@ malloc_tsd_malloc(size_t size) { /* Avoid choose_arena() in order to dodge bootstrapping issues. */ - return arena_malloc_prechosen(arenas[0], size, false); + return arena_malloc(arenas[0], size, false, false); } void From 3701367e4ca6b77109e1cce0a5b98a8ac69cf505 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 6 Apr 2012 12:41:55 -0700 Subject: [PATCH 0135/3378] Always initialize tcache data structures. Always initialize tcache data structures if the tcache configuration option is enabled, regardless of opt_tcache. This fixes "thread.tcache.enabled" mallctl manipulation in the case when opt_tcache is false. --- src/tcache.c | 82 ++++++++++++++++++++++++---------------------------- 1 file changed, 37 insertions(+), 45 deletions(-) diff --git a/src/tcache.c b/src/tcache.c index 6a7f17bf..99a657b6 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -334,17 +334,17 @@ tcache_thread_cleanup(void *arg) } else if (tcache == TCACHE_STATE_REINCARNATED) { /* * Another destructor called an allocator function after this - * destructor was called. Reset tcache to 1 in order to - * receive another callback. + * destructor was called. Reset tcache to + * TCACHE_STATE_PURGATORY in order to receive another callback. */ tcache = TCACHE_STATE_PURGATORY; tcache_tsd_set(&tcache); } else if (tcache == TCACHE_STATE_PURGATORY) { /* * The previous time this destructor was called, we set the key - * to 1 so that other destructors wouldn't cause re-creation of - * the tcache. This time, do nothing, so that the destructor - * will not be called again. + * to TCACHE_STATE_PURGATORY so that other destructors wouldn't + * cause re-creation of the tcache. This time, do nothing, so + * that the destructor will not be called again. */ } else if (tcache != NULL) { assert(tcache != TCACHE_STATE_PURGATORY); @@ -381,46 +381,40 @@ tcache_stats_merge(tcache_t *tcache, arena_t *arena) bool tcache_boot0(void) { + unsigned i; - if (opt_tcache) { - unsigned i; + /* + * If necessary, clamp opt_lg_tcache_max, now that arena_maxclass is + * known. + */ + if (opt_lg_tcache_max < 0 || (1U << opt_lg_tcache_max) < SMALL_MAXCLASS) + tcache_maxclass = SMALL_MAXCLASS; + else if ((1U << opt_lg_tcache_max) > arena_maxclass) + tcache_maxclass = arena_maxclass; + else + tcache_maxclass = (1U << opt_lg_tcache_max); - /* - * If necessary, clamp opt_lg_tcache_max, now that - * SMALL_MAXCLASS and arena_maxclass are known. - * XXX Can this be done earlier? - */ - if (opt_lg_tcache_max < 0 || (1U << opt_lg_tcache_max) < - SMALL_MAXCLASS) - tcache_maxclass = SMALL_MAXCLASS; - else if ((1U << opt_lg_tcache_max) > arena_maxclass) - tcache_maxclass = arena_maxclass; - else - tcache_maxclass = (1U << opt_lg_tcache_max); + nhbins = NBINS + (tcache_maxclass >> LG_PAGE); - nhbins = NBINS + (tcache_maxclass >> LG_PAGE); - - /* Initialize tcache_bin_info. */ - tcache_bin_info = (tcache_bin_info_t *)base_alloc(nhbins * - sizeof(tcache_bin_info_t)); - if (tcache_bin_info == NULL) - return (true); - stack_nelms = 0; - for (i = 0; i < NBINS; i++) { - if ((arena_bin_info[i].nregs << 1) <= - TCACHE_NSLOTS_SMALL_MAX) { - tcache_bin_info[i].ncached_max = - (arena_bin_info[i].nregs << 1); - } else { - tcache_bin_info[i].ncached_max = - TCACHE_NSLOTS_SMALL_MAX; - } - stack_nelms += tcache_bin_info[i].ncached_max; - } - for (; i < nhbins; i++) { - tcache_bin_info[i].ncached_max = TCACHE_NSLOTS_LARGE; - stack_nelms += tcache_bin_info[i].ncached_max; + /* Initialize tcache_bin_info. */ + tcache_bin_info = (tcache_bin_info_t *)base_alloc(nhbins * + sizeof(tcache_bin_info_t)); + if (tcache_bin_info == NULL) + return (true); + stack_nelms = 0; + for (i = 0; i < NBINS; i++) { + if ((arena_bin_info[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MAX) { + tcache_bin_info[i].ncached_max = + (arena_bin_info[i].nregs << 1); + } else { + tcache_bin_info[i].ncached_max = + TCACHE_NSLOTS_SMALL_MAX; } + stack_nelms += tcache_bin_info[i].ncached_max; + } + for (; i < nhbins; i++) { + tcache_bin_info[i].ncached_max = TCACHE_NSLOTS_LARGE; + stack_nelms += tcache_bin_info[i].ncached_max; } return (false); @@ -430,10 +424,8 @@ bool tcache_boot1(void) { - if (opt_tcache) { - if (tcache_tsd_boot() || tcache_enabled_tsd_boot()) - return (true); - } + if (tcache_tsd_boot() || tcache_enabled_tsd_boot()) + return (true); return (false); } From a8683fbaf9056c16a1c5bb2606316ebc35f592a4 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Tue, 10 Apr 2012 15:29:18 +0200 Subject: [PATCH 0136/3378] Ignore whitespaces when comparing test results with expected output In mingw, the test result may contain CRLF while the .exp files don't, or the other way around. --- Makefile.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.in b/Makefile.in index 821c0634..7df4fc60 100644 --- a/Makefile.in +++ b/Makefile.in @@ -205,7 +205,7 @@ check: tests $(TEST_LIBRARY_PATH) $${t} @abs_srcroot@ @abs_objroot@ \ > @objroot@$${t}.out 2>&1; \ if test -e "@srcroot@$${t}.exp"; then \ - diff -u @srcroot@$${t}.exp \ + diff -w -u @srcroot@$${t}.exp \ @objroot@$${t}.out >/dev/null 2>&1; \ fail=$$?; \ if test "$${fail}" -eq "1" ; then \ From c5851eaf6e0edb35a499d62d30199e336da5ccb6 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Tue, 10 Apr 2012 18:19:45 +0200 Subject: [PATCH 0137/3378] Remove MAP_NORESERVE support It was only used by the swap feature, and that is gone. --- include/jemalloc/internal/chunk_mmap.h | 1 - src/chunk_mmap.c | 41 +++++++++----------------- 2 files changed, 14 insertions(+), 28 deletions(-) diff --git a/include/jemalloc/internal/chunk_mmap.h b/include/jemalloc/internal/chunk_mmap.h index 07b50a4d..3f603158 100644 --- a/include/jemalloc/internal/chunk_mmap.h +++ b/include/jemalloc/internal/chunk_mmap.h @@ -10,7 +10,6 @@ #ifdef JEMALLOC_H_EXTERNS void *chunk_alloc_mmap(size_t size); -void *chunk_alloc_mmap_noreserve(size_t size); void chunk_dealloc_mmap(void *chunk, size_t size); bool chunk_mmap_boot(void); diff --git a/src/chunk_mmap.c b/src/chunk_mmap.c index 749a2dac..3a8105df 100644 --- a/src/chunk_mmap.c +++ b/src/chunk_mmap.c @@ -15,16 +15,15 @@ malloc_tsd_funcs(JEMALLOC_INLINE, mmap_unaligned, bool, false, /******************************************************************************/ /* Function prototypes for non-inline static functions. */ -static void *pages_map(void *addr, size_t size, bool noreserve); +static void *pages_map(void *addr, size_t size); static void pages_unmap(void *addr, size_t size); -static void *chunk_alloc_mmap_slow(size_t size, bool unaligned, - bool noreserve); -static void *chunk_alloc_mmap_internal(size_t size, bool noreserve); +static void *chunk_alloc_mmap_slow(size_t size, bool unaligned); +static void *chunk_alloc_mmap_internal(size_t size); /******************************************************************************/ static void * -pages_map(void *addr, size_t size, bool noreserve) +pages_map(void *addr, size_t size) { void *ret; @@ -32,12 +31,8 @@ pages_map(void *addr, size_t size, bool noreserve) * We don't use MAP_FIXED here, because it can cause the *replacement* * of existing mappings, and we only want to create new mappings. */ - int flags = MAP_PRIVATE | MAP_ANON; -#ifdef MAP_NORESERVE - if (noreserve) - flags |= MAP_NORESERVE; -#endif - ret = mmap(addr, size, PROT_READ | PROT_WRITE, flags, -1, 0); + ret = mmap(addr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, + -1, 0); assert(ret != NULL); if (ret == MAP_FAILED) @@ -78,7 +73,7 @@ pages_unmap(void *addr, size_t size) } static void * -chunk_alloc_mmap_slow(size_t size, bool unaligned, bool noreserve) +chunk_alloc_mmap_slow(size_t size, bool unaligned) { void *ret; size_t offset; @@ -87,7 +82,7 @@ chunk_alloc_mmap_slow(size_t size, bool unaligned, bool noreserve) if (size + chunksize <= size) return (NULL); - ret = pages_map(NULL, size + chunksize, noreserve); + ret = pages_map(NULL, size + chunksize); if (ret == NULL) return (NULL); @@ -126,7 +121,7 @@ chunk_alloc_mmap_slow(size_t size, bool unaligned, bool noreserve) } static void * -chunk_alloc_mmap_internal(size_t size, bool noreserve) +chunk_alloc_mmap_internal(size_t size) { void *ret; @@ -161,7 +156,7 @@ chunk_alloc_mmap_internal(size_t size, bool noreserve) if (mmap_unaligned_booted && *mmap_unaligned_tsd_get() == false) { size_t offset; - ret = pages_map(NULL, size, noreserve); + ret = pages_map(NULL, size); if (ret == NULL) return (NULL); @@ -171,14 +166,13 @@ chunk_alloc_mmap_internal(size_t size, bool noreserve) mmap_unaligned_tsd_set(&mu); /* Try to extend chunk boundary. */ if (pages_map((void *)((uintptr_t)ret + size), - chunksize - offset, noreserve) == NULL) { + chunksize - offset) == NULL) { /* * Extension failed. Clean up, then revert to * the reliable-but-expensive method. */ pages_unmap(ret, size); - ret = chunk_alloc_mmap_slow(size, true, - noreserve); + ret = chunk_alloc_mmap_slow(size, true); } else { /* Clean up unneeded leading space. */ pages_unmap(ret, chunksize - offset); @@ -187,7 +181,7 @@ chunk_alloc_mmap_internal(size_t size, bool noreserve) } } } else - ret = chunk_alloc_mmap_slow(size, false, noreserve); + ret = chunk_alloc_mmap_slow(size, false); return (ret); } @@ -196,14 +190,7 @@ void * chunk_alloc_mmap(size_t size) { - return (chunk_alloc_mmap_internal(size, false)); -} - -void * -chunk_alloc_mmap_noreserve(size_t size) -{ - - return (chunk_alloc_mmap_internal(size, true)); + return (chunk_alloc_mmap_internal(size)); } void From eae269036c9f702d9fa9be497a1a2aa1be13a29e Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Tue, 10 Apr 2012 19:50:33 +0200 Subject: [PATCH 0138/3378] Add alignment support to chunk_alloc(). --- include/jemalloc/internal/chunk.h | 2 +- include/jemalloc/internal/chunk_dss.h | 2 +- include/jemalloc/internal/chunk_mmap.h | 2 +- src/arena.c | 3 +- src/base.c | 2 +- src/chunk.c | 7 +- src/chunk_dss.c | 80 ++++++++++++++-------- src/chunk_mmap.c | 35 +++++----- src/huge.c | 92 +++----------------------- 9 files changed, 87 insertions(+), 138 deletions(-) diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h index 8e24e8f3..e047c2b1 100644 --- a/include/jemalloc/internal/chunk.h +++ b/include/jemalloc/internal/chunk.h @@ -42,7 +42,7 @@ extern size_t chunk_npages; extern size_t map_bias; /* Number of arena chunk header pages. */ extern size_t arena_maxclass; /* Max size class for arenas. */ -void *chunk_alloc(size_t size, bool base, bool *zero); +void *chunk_alloc(size_t size, size_t alignment, bool base, bool *zero); void chunk_dealloc(void *chunk, size_t size, bool unmap); bool chunk_boot0(void); bool chunk_boot1(void); diff --git a/include/jemalloc/internal/chunk_dss.h b/include/jemalloc/internal/chunk_dss.h index a39a2031..16ea9542 100644 --- a/include/jemalloc/internal/chunk_dss.h +++ b/include/jemalloc/internal/chunk_dss.h @@ -9,7 +9,7 @@ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -void *chunk_alloc_dss(size_t size, bool *zero); +void *chunk_alloc_dss(size_t size, size_t alignment, bool *zero); bool chunk_in_dss(void *chunk); bool chunk_dealloc_dss(void *chunk, size_t size); bool chunk_dss_boot(void); diff --git a/include/jemalloc/internal/chunk_mmap.h b/include/jemalloc/internal/chunk_mmap.h index 3f603158..148fefef 100644 --- a/include/jemalloc/internal/chunk_mmap.h +++ b/include/jemalloc/internal/chunk_mmap.h @@ -9,7 +9,7 @@ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -void *chunk_alloc_mmap(size_t size); +void *chunk_alloc_mmap(size_t size, size_t alignment); void chunk_dealloc_mmap(void *chunk, size_t size); bool chunk_mmap_boot(void); diff --git a/src/arena.c b/src/arena.c index 64440996..c84aaf47 100644 --- a/src/arena.c +++ b/src/arena.c @@ -357,7 +357,8 @@ arena_chunk_alloc(arena_t *arena) zero = false; malloc_mutex_unlock(&arena->lock); - chunk = (arena_chunk_t *)chunk_alloc(chunksize, false, &zero); + chunk = (arena_chunk_t *)chunk_alloc(chunksize, chunksize, + false, &zero); malloc_mutex_lock(&arena->lock); if (chunk == NULL) return (NULL); diff --git a/src/base.c b/src/base.c index 696c362a..bafaa743 100644 --- a/src/base.c +++ b/src/base.c @@ -32,7 +32,7 @@ base_pages_alloc(size_t minsize) assert(minsize != 0); csize = CHUNK_CEILING(minsize); zero = false; - base_pages = chunk_alloc(csize, true, &zero); + base_pages = chunk_alloc(csize, chunksize, true, &zero); if (base_pages == NULL) return (true); base_next_addr = base_pages; diff --git a/src/chunk.c b/src/chunk.c index 8fcd61e4..797bd341 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -27,19 +27,20 @@ size_t arena_maxclass; /* Max size class for arenas. */ * advantage of them if they are returned. */ void * -chunk_alloc(size_t size, bool base, bool *zero) +chunk_alloc(size_t size, size_t alignment, bool base, bool *zero) { void *ret; assert(size != 0); assert((size & chunksize_mask) == 0); + assert((alignment & chunksize_mask) == 0); if (config_dss) { - ret = chunk_alloc_dss(size, zero); + ret = chunk_alloc_dss(size, alignment, zero); if (ret != NULL) goto RETURN; } - ret = chunk_alloc_mmap(size); + ret = chunk_alloc_mmap(size, alignment); if (ret != NULL) { *zero = true; goto RETURN; diff --git a/src/chunk_dss.c b/src/chunk_dss.c index 405dc29b..a81a271c 100644 --- a/src/chunk_dss.c +++ b/src/chunk_dss.c @@ -28,41 +28,50 @@ static extent_tree_t dss_chunks_ad; /******************************************************************************/ /* Function prototypes for non-inline static functions. */ -static void *chunk_recycle_dss(size_t size, bool *zero); +static void *chunk_recycle_dss(size_t size, size_t alignment, bool *zero); static extent_node_t *chunk_dealloc_dss_record(void *chunk, size_t size); /******************************************************************************/ static void * -chunk_recycle_dss(size_t size, bool *zero) +chunk_recycle_dss(size_t size, size_t alignment, bool *zero) { extent_node_t *node, key; cassert(config_dss); key.addr = NULL; - key.size = size; + key.size = size + alignment - chunksize; malloc_mutex_lock(&dss_mtx); node = extent_tree_szad_nsearch(&dss_chunks_szad, &key); if (node != NULL) { - void *ret = node->addr; + size_t offset = (size_t)((uintptr_t)(node->addr) & (alignment - + 1)); + void *ret; + if (offset > 0) + offset = alignment - offset; + ret = (void *)((uintptr_t)(node->addr) + offset); /* Remove node from the tree. */ extent_tree_szad_remove(&dss_chunks_szad, node); - if (node->size == size) { - extent_tree_ad_remove(&dss_chunks_ad, node); - base_node_dealloc(node); - } else { - /* - * Insert the remainder of node's address range as a - * smaller chunk. Its position within dss_chunks_ad - * does not change. - */ - assert(node->size > size); - node->addr = (void *)((uintptr_t)node->addr + size); - node->size -= size; + extent_tree_ad_remove(&dss_chunks_ad, node); + if (offset > 0) { + /* Insert the leading space as a smaller chunk. */ + node->size = offset; extent_tree_szad_insert(&dss_chunks_szad, node); + extent_tree_ad_insert(&dss_chunks_ad, node); } + if (alignment - chunksize > offset) { + if (offset > 0) + node = base_node_alloc(); + /* Insert the trailing space as a smaller chunk. */ + node->addr = (void *)((uintptr_t)(ret) + size); + node->size = alignment - chunksize - offset; + extent_tree_szad_insert(&dss_chunks_szad, node); + extent_tree_ad_insert(&dss_chunks_ad, node); + } else if (offset == 0) + base_node_dealloc(node); + malloc_mutex_unlock(&dss_mtx); if (*zero) @@ -75,13 +84,15 @@ chunk_recycle_dss(size_t size, bool *zero) } void * -chunk_alloc_dss(size_t size, bool *zero) +chunk_alloc_dss(size_t size, size_t alignment, bool *zero) { void *ret; cassert(config_dss); + assert(size > 0 && (size & chunksize_mask) == 0); + assert(alignment > 0 && (alignment & chunksize_mask) == 0); - ret = chunk_recycle_dss(size, zero); + ret = chunk_recycle_dss(size, alignment, zero); if (ret != NULL) return (ret); @@ -94,6 +105,8 @@ chunk_alloc_dss(size_t size, bool *zero) malloc_mutex_lock(&dss_mtx); if (dss_prev != (void *)-1) { + size_t gap_size, cpad_size; + void *cpad, *dss_next; intptr_t incr; /* @@ -104,25 +117,36 @@ chunk_alloc_dss(size_t size, bool *zero) do { /* Get the current end of the DSS. */ dss_max = sbrk(0); - /* * Calculate how much padding is necessary to * chunk-align the end of the DSS. */ - incr = (intptr_t)size - - (intptr_t)CHUNK_ADDR2OFFSET(dss_max); - if (incr == (intptr_t)size) - ret = dss_max; - else { - ret = (void *)((intptr_t)dss_max + incr); - incr += size; + gap_size = (chunksize - CHUNK_ADDR2OFFSET(dss_max)) & + chunksize_mask; + /* + * Compute how much chunk-aligned pad space (if any) is + * necessary to satisfy alignment. This space can be + * recycled for later use. + */ + cpad = (void *)((uintptr_t)dss_max + gap_size); + ret = (void *)(((uintptr_t)dss_max + (alignment - 1)) & + ~(alignment - 1)); + cpad_size = (uintptr_t)ret - (uintptr_t)cpad; + dss_next = (void *)((uintptr_t)ret + size); + if ((uintptr_t)ret < (uintptr_t)dss_max || + (uintptr_t)dss_next < (uintptr_t)dss_max) { + /* Wrap-around. */ + malloc_mutex_unlock(&dss_mtx); + return (NULL); } - + incr = gap_size + cpad_size + size; dss_prev = sbrk(incr); if (dss_prev == dss_max) { /* Success. */ - dss_max = (void *)((intptr_t)dss_prev + incr); + dss_max = dss_next; malloc_mutex_unlock(&dss_mtx); + if (cpad_size != 0) + chunk_dealloc_dss(cpad, cpad_size); *zero = true; return (ret); } diff --git a/src/chunk_mmap.c b/src/chunk_mmap.c index 3a8105df..37dad204 100644 --- a/src/chunk_mmap.c +++ b/src/chunk_mmap.c @@ -17,8 +17,9 @@ malloc_tsd_funcs(JEMALLOC_INLINE, mmap_unaligned, bool, false, static void *pages_map(void *addr, size_t size); static void pages_unmap(void *addr, size_t size); -static void *chunk_alloc_mmap_slow(size_t size, bool unaligned); -static void *chunk_alloc_mmap_internal(size_t size); +static void *chunk_alloc_mmap_slow(size_t size, size_t alignment, + bool unaligned); +static void *chunk_alloc_mmap_internal(size_t size, size_t alignment); /******************************************************************************/ @@ -73,7 +74,7 @@ pages_unmap(void *addr, size_t size) } static void * -chunk_alloc_mmap_slow(size_t size, bool unaligned) +chunk_alloc_mmap_slow(size_t size, size_t alignment, bool unaligned) { void *ret; size_t offset; @@ -82,29 +83,26 @@ chunk_alloc_mmap_slow(size_t size, bool unaligned) if (size + chunksize <= size) return (NULL); - ret = pages_map(NULL, size + chunksize); + ret = pages_map(NULL, size + alignment); if (ret == NULL) return (NULL); /* Clean up unneeded leading/trailing space. */ - offset = CHUNK_ADDR2OFFSET(ret); + offset = (size_t)((uintptr_t)(ret) & (alignment - 1)); if (offset != 0) { /* Note that mmap() returned an unaligned mapping. */ unaligned = true; /* Leading space. */ - pages_unmap(ret, chunksize - offset); + pages_unmap(ret, alignment - offset); - ret = (void *)((uintptr_t)ret + - (chunksize - offset)); + ret = (void *)((uintptr_t)ret + (alignment - offset)); /* Trailing space. */ - pages_unmap((void *)((uintptr_t)ret + size), - offset); + pages_unmap((void *)((uintptr_t)ret + size), offset); } else { /* Trailing space only. */ - pages_unmap((void *)((uintptr_t)ret + size), - chunksize); + pages_unmap((void *)((uintptr_t)ret + size), alignment); } /* @@ -121,7 +119,7 @@ chunk_alloc_mmap_slow(size_t size, bool unaligned) } static void * -chunk_alloc_mmap_internal(size_t size) +chunk_alloc_mmap_internal(size_t size, size_t alignment) { void *ret; @@ -160,7 +158,7 @@ chunk_alloc_mmap_internal(size_t size) if (ret == NULL) return (NULL); - offset = CHUNK_ADDR2OFFSET(ret); + offset = (size_t)((uintptr_t)(ret) & (alignment - 1)); if (offset != 0) { bool mu = true; mmap_unaligned_tsd_set(&mu); @@ -172,7 +170,8 @@ chunk_alloc_mmap_internal(size_t size) * the reliable-but-expensive method. */ pages_unmap(ret, size); - ret = chunk_alloc_mmap_slow(size, true); + ret = chunk_alloc_mmap_slow(size, alignment, + true); } else { /* Clean up unneeded leading space. */ pages_unmap(ret, chunksize - offset); @@ -181,16 +180,16 @@ chunk_alloc_mmap_internal(size_t size) } } } else - ret = chunk_alloc_mmap_slow(size, false); + ret = chunk_alloc_mmap_slow(size, alignment, false); return (ret); } void * -chunk_alloc_mmap(size_t size) +chunk_alloc_mmap(size_t size, size_t alignment) { - return (chunk_alloc_mmap_internal(size)); + return (chunk_alloc_mmap_internal(size, alignment)); } void diff --git a/src/huge.c b/src/huge.c index a4e6cc8f..43c8f3b0 100644 --- a/src/huge.c +++ b/src/huge.c @@ -17,6 +17,13 @@ static extent_tree_t huge; void * huge_malloc(size_t size, bool zero) +{ + + return (huge_palloc(size, chunksize, zero)); +} + +void * +huge_palloc(size_t size, size_t alignment, bool zero) { void *ret; size_t csize; @@ -35,7 +42,7 @@ huge_malloc(size_t size, bool zero) if (node == NULL) return (NULL); - ret = chunk_alloc(csize, false, &zero); + ret = chunk_alloc(csize, alignment, false, &zero); if (ret == NULL) { base_node_dealloc(node); return (NULL); @@ -64,89 +71,6 @@ huge_malloc(size_t size, bool zero) return (ret); } -/* Only handles large allocations that require more than chunk alignment. */ -void * -huge_palloc(size_t size, size_t alignment, bool zero) -{ - void *ret; - size_t alloc_size, chunk_size, offset; - extent_node_t *node; - - /* - * This allocation requires alignment that is even larger than chunk - * alignment. This means that huge_malloc() isn't good enough. - * - * Allocate almost twice as many chunks as are demanded by the size or - * alignment, in order to assure the alignment can be achieved, then - * unmap leading and trailing chunks. - */ - assert(alignment > chunksize); - - chunk_size = CHUNK_CEILING(size); - - if (size >= alignment) - alloc_size = chunk_size + alignment - chunksize; - else - alloc_size = (alignment << 1) - chunksize; - - /* Allocate an extent node with which to track the chunk. */ - node = base_node_alloc(); - if (node == NULL) - return (NULL); - - ret = chunk_alloc(alloc_size, false, &zero); - if (ret == NULL) { - base_node_dealloc(node); - return (NULL); - } - - offset = (uintptr_t)ret & (alignment - 1); - assert((offset & chunksize_mask) == 0); - assert(offset < alloc_size); - if (offset == 0) { - /* Trim trailing space. */ - chunk_dealloc((void *)((uintptr_t)ret + chunk_size), alloc_size - - chunk_size, true); - } else { - size_t trailsize; - - /* Trim leading space. */ - chunk_dealloc(ret, alignment - offset, true); - - ret = (void *)((uintptr_t)ret + (alignment - offset)); - - trailsize = alloc_size - (alignment - offset) - chunk_size; - if (trailsize != 0) { - /* Trim trailing space. */ - assert(trailsize < alloc_size); - chunk_dealloc((void *)((uintptr_t)ret + chunk_size), - trailsize, true); - } - } - - /* Insert node into huge. */ - node->addr = ret; - node->size = chunk_size; - - malloc_mutex_lock(&huge_mtx); - extent_tree_ad_insert(&huge, node); - if (config_stats) { - stats_cactive_add(chunk_size); - huge_nmalloc++; - huge_allocated += chunk_size; - } - malloc_mutex_unlock(&huge_mtx); - - if (config_fill && zero == false) { - if (opt_junk) - memset(ret, 0xa5, chunk_size); - else if (opt_zero) - memset(ret, 0, chunk_size); - } - - return (ret); -} - void * huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra) { From a1ee7838e14b321a97bfacb1f1cf5004198f2203 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 10 Apr 2012 15:07:44 -0700 Subject: [PATCH 0139/3378] Rename labels. Rename labels from FOO to label_foo in order to avoid system macro definitions, in particular OUT and ERROR on mingw. Reported by Mike Hommey. --- src/chunk.c | 6 +-- src/chunk_dss.c | 4 +- src/ckh.c | 16 +++---- src/ctl.c | 92 ++++++++++++++++++------------------ src/jemalloc.c | 58 +++++++++++------------ src/prof.c | 14 +++--- src/util.c | 24 +++++----- test/allocated.c | 16 +++---- test/mremap.c | 10 ++-- test/thread_arena.c | 8 ++-- test/thread_tcache_enabled.c | 10 ++-- 11 files changed, 129 insertions(+), 129 deletions(-) diff --git a/src/chunk.c b/src/chunk.c index 797bd341..b0641294 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -38,17 +38,17 @@ chunk_alloc(size_t size, size_t alignment, bool base, bool *zero) if (config_dss) { ret = chunk_alloc_dss(size, alignment, zero); if (ret != NULL) - goto RETURN; + goto label_return; } ret = chunk_alloc_mmap(size, alignment); if (ret != NULL) { *zero = true; - goto RETURN; + goto label_return; } /* All strategies for allocation failed. */ ret = NULL; -RETURN: +label_return: if (config_ivsalloc && base == false && ret != NULL) { if (rtree_set(chunks_rtree, (uintptr_t)ret, ret)) { chunk_dealloc(ret, size, true); diff --git a/src/chunk_dss.c b/src/chunk_dss.c index a81a271c..ccd86b91 100644 --- a/src/chunk_dss.c +++ b/src/chunk_dss.c @@ -293,11 +293,11 @@ chunk_dealloc_dss(void *chunk, size_t size) madvise(chunk, size, MADV_DONTNEED); ret = false; - goto RETURN; + goto label_return; } ret = true; -RETURN: +label_return: malloc_mutex_unlock(&dss_mtx); return (ret); } diff --git a/src/ckh.c b/src/ckh.c index 39925ced..169fc0d4 100644 --- a/src/ckh.c +++ b/src/ckh.c @@ -267,12 +267,12 @@ ckh_grow(ckh_t *ckh) usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE, NULL); if (usize == 0) { ret = true; - goto RETURN; + goto label_return; } tab = (ckhc_t *)ipalloc(usize, CACHELINE, true); if (tab == NULL) { ret = true; - goto RETURN; + goto label_return; } /* Swap in new table. */ ttab = ckh->tab; @@ -292,7 +292,7 @@ ckh_grow(ckh_t *ckh) } ret = false; -RETURN: +label_return: return (ret); } @@ -385,16 +385,16 @@ ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp) usize = sa2u(sizeof(ckhc_t) << lg_mincells, CACHELINE, NULL); if (usize == 0) { ret = true; - goto RETURN; + goto label_return; } ckh->tab = (ckhc_t *)ipalloc(usize, CACHELINE, true); if (ckh->tab == NULL) { ret = true; - goto RETURN; + goto label_return; } ret = false; -RETURN: +label_return: return (ret); } @@ -466,12 +466,12 @@ ckh_insert(ckh_t *ckh, const void *key, const void *data) while (ckh_try_insert(ckh, &key, &data)) { if (ckh_grow(ckh)) { ret = true; - goto RETURN; + goto label_return; } } ret = false; -RETURN: +label_return: return (ret); } diff --git a/src/ctl.c b/src/ctl.c index 1aaf1971..a75ffef3 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -546,7 +546,7 @@ ctl_init(void) (narenas + 1) * sizeof(ctl_arena_stats_t)); if (ctl_stats.arenas == NULL) { ret = true; - goto RETURN; + goto label_return; } memset(ctl_stats.arenas, 0, (narenas + 1) * sizeof(ctl_arena_stats_t)); @@ -561,7 +561,7 @@ ctl_init(void) for (i = 0; i <= narenas; i++) { if (ctl_arena_init(&ctl_stats.arenas[i])) { ret = true; - goto RETURN; + goto label_return; } } } @@ -573,7 +573,7 @@ ctl_init(void) } ret = false; -RETURN: +label_return: malloc_mutex_unlock(&ctl_mtx); return (ret); } @@ -593,7 +593,7 @@ ctl_lookup(const char *name, ctl_node_t const **nodesp, size_t *mibp, elen = (size_t)((uintptr_t)dot - (uintptr_t)elm); if (elen == 0) { ret = ENOENT; - goto RETURN; + goto label_return; } node = super_root_node; for (i = 0; i < *depthp; i++) { @@ -618,7 +618,7 @@ ctl_lookup(const char *name, ctl_node_t const **nodesp, size_t *mibp, } if (node == pnode) { ret = ENOENT; - goto RETURN; + goto label_return; } } else { uintmax_t index; @@ -628,7 +628,7 @@ ctl_lookup(const char *name, ctl_node_t const **nodesp, size_t *mibp, index = malloc_strtoumax(elm, NULL, 10); if (index == UINTMAX_MAX || index > SIZE_T_MAX) { ret = ENOENT; - goto RETURN; + goto label_return; } inode = &node->u.named.children[0]; @@ -636,7 +636,7 @@ ctl_lookup(const char *name, ctl_node_t const **nodesp, size_t *mibp, (size_t)index); if (node == NULL) { ret = ENOENT; - goto RETURN; + goto label_return; } if (nodesp != NULL) @@ -652,7 +652,7 @@ ctl_lookup(const char *name, ctl_node_t const **nodesp, size_t *mibp, * in this path through the tree. */ ret = ENOENT; - goto RETURN; + goto label_return; } /* Complete lookup successful. */ *depthp = i + 1; @@ -663,7 +663,7 @@ ctl_lookup(const char *name, ctl_node_t const **nodesp, size_t *mibp, if (*dot == '\0') { /* No more elements. */ ret = ENOENT; - goto RETURN; + goto label_return; } elm = &dot[1]; dot = ((tdot = strchr(elm, '.')) != NULL) ? tdot : @@ -672,7 +672,7 @@ ctl_lookup(const char *name, ctl_node_t const **nodesp, size_t *mibp, } ret = 0; -RETURN: +label_return: return (ret); } @@ -687,22 +687,22 @@ ctl_byname(const char *name, void *oldp, size_t *oldlenp, void *newp, if (ctl_initialized == false && ctl_init()) { ret = EAGAIN; - goto RETURN; + goto label_return; } depth = CTL_MAX_DEPTH; ret = ctl_lookup(name, nodes, mib, &depth); if (ret != 0) - goto RETURN; + goto label_return; if (nodes[depth-1]->ctl == NULL) { /* The name refers to a partial path through the ctl tree. */ ret = ENOENT; - goto RETURN; + goto label_return; } ret = nodes[depth-1]->ctl(mib, depth, oldp, oldlenp, newp, newlen); -RETURN: +label_return: return(ret); } @@ -713,11 +713,11 @@ ctl_nametomib(const char *name, size_t *mibp, size_t *miblenp) if (ctl_initialized == false && ctl_init()) { ret = EAGAIN; - goto RETURN; + goto label_return; } ret = ctl_lookup(name, NULL, mibp, miblenp); -RETURN: +label_return: return(ret); } @@ -731,7 +731,7 @@ ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, if (ctl_initialized == false && ctl_init()) { ret = EAGAIN; - goto RETURN; + goto label_return; } /* Iterate down the tree. */ @@ -741,7 +741,7 @@ ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, /* Children are named. */ if (node->u.named.nchildren <= mib[i]) { ret = ENOENT; - goto RETURN; + goto label_return; } node = &node->u.named.children[mib[i]]; } else { @@ -752,7 +752,7 @@ ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, node = inode->u.indexed.index(mib, miblen, mib[i]); if (node == NULL) { ret = ENOENT; - goto RETURN; + goto label_return; } } } @@ -761,11 +761,11 @@ ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, if (node->ctl == NULL) { /* Partial MIB. */ ret = ENOENT; - goto RETURN; + goto label_return; } ret = node->ctl(mib, miblen, oldp, oldlenp, newp, newlen); -RETURN: +label_return: return(ret); } @@ -787,14 +787,14 @@ ctl_boot(void) #define READONLY() do { \ if (newp != NULL || newlen != 0) { \ ret = EPERM; \ - goto RETURN; \ + goto label_return; \ } \ } while (0) #define WRITEONLY() do { \ if (oldp != NULL || oldlenp != NULL) { \ ret = EPERM; \ - goto RETURN; \ + goto label_return; \ } \ } while (0) @@ -810,7 +810,7 @@ ctl_boot(void) ? sizeof(t) : *oldlenp; \ memcpy(oldp, (void *)&v, copylen); \ ret = EINVAL; \ - goto RETURN; \ + goto label_return; \ } else \ *(t *)oldp = v; \ } \ @@ -820,7 +820,7 @@ ctl_boot(void) if (newp != NULL) { \ if (newlen != sizeof(t)) { \ ret = EINVAL; \ - goto RETURN; \ + goto label_return; \ } \ v = *(t *)newp; \ } \ @@ -847,7 +847,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ READ(oldval, t); \ \ ret = 0; \ -RETURN: \ +label_return: \ if (l) \ malloc_mutex_unlock(&ctl_mtx); \ return (ret); \ @@ -869,7 +869,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ READ(oldval, t); \ \ ret = 0; \ -RETURN: \ +label_return: \ malloc_mutex_unlock(&ctl_mtx); \ return (ret); \ } @@ -888,7 +888,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ READ(oldval, t); \ \ ret = 0; \ -RETURN: \ +label_return: \ malloc_mutex_unlock(&ctl_mtx); \ return (ret); \ } @@ -912,7 +912,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ READ(oldval, t); \ \ ret = 0; \ -RETURN: \ +label_return: \ return (ret); \ } @@ -929,7 +929,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ READ(oldval, t); \ \ ret = 0; \ -RETURN: \ +label_return: \ return (ret); \ } @@ -946,7 +946,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ READ(oldval, bool); \ \ ret = 0; \ -RETURN: \ +label_return: \ return (ret); \ } @@ -967,7 +967,7 @@ epoch_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, READ(ctl_epoch, uint64_t); ret = 0; -RETURN: +label_return: malloc_mutex_unlock(&ctl_mtx); return (ret); } @@ -986,13 +986,13 @@ thread_tcache_enabled_ctl(const size_t *mib, size_t miblen, void *oldp, if (newp != NULL) { if (newlen != sizeof(bool)) { ret = EINVAL; - goto RETURN; + goto label_return; } tcache_enabled_set(*(bool *)newp); } READ(oldval, bool); -RETURN: +label_return: ret = 0; return (ret); } @@ -1011,7 +1011,7 @@ thread_tcache_flush_ctl(const size_t *mib, size_t miblen, void *oldp, tcache_flush(); ret = 0; -RETURN: +label_return: return (ret); } @@ -1031,7 +1031,7 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, if (newind >= narenas) { /* New arena index is out of range. */ ret = EFAULT; - goto RETURN; + goto label_return; } /* Initialize arena if necessary. */ @@ -1040,7 +1040,7 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, arenas_extend(newind)) == NULL) { malloc_mutex_unlock(&arenas_lock); ret = EAGAIN; - goto RETURN; + goto label_return; } assert(arena == arenas[newind]); arenas[oldind]->nthreads--; @@ -1059,7 +1059,7 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, } ret = 0; -RETURN: +label_return: return (ret); } @@ -1156,7 +1156,7 @@ arenas_initialized_ctl(const size_t *mib, size_t miblen, void *oldp, for (i = 0; i < nread; i++) ((bool *)oldp)[i] = ctl_stats.arenas[i].initialized; -RETURN: +label_return: malloc_mutex_unlock(&ctl_mtx); return (ret); } @@ -1180,7 +1180,7 @@ arenas_purge_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, WRITE(arena, unsigned); if (newp != NULL && arena >= narenas) { ret = EFAULT; - goto RETURN; + goto label_return; } else { arena_t *tarenas[narenas]; @@ -1202,7 +1202,7 @@ arenas_purge_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, } ret = 0; -RETURN: +label_return: return (ret); } @@ -1232,7 +1232,7 @@ prof_active_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, READ(oldval, bool); ret = 0; -RETURN: +label_return: malloc_mutex_unlock(&ctl_mtx); return (ret); } @@ -1252,11 +1252,11 @@ prof_dump_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, if (prof_mdump(filename)) { ret = EFAULT; - goto RETURN; + goto label_return; } ret = 0; -RETURN: +label_return: return (ret); } @@ -1354,11 +1354,11 @@ stats_arenas_i_index(const size_t *mib, size_t miblen, size_t i) malloc_mutex_lock(&ctl_mtx); if (ctl_stats.arenas[i].initialized == false) { ret = NULL; - goto RETURN; + goto label_return; } ret = super_stats_arenas_i_node; -RETURN: +label_return: malloc_mutex_unlock(&ctl_mtx); return (ret); } diff --git a/src/jemalloc.c b/src/jemalloc.c index e0510209..cde998c4 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -742,7 +742,7 @@ je_malloc(size_t size) if (malloc_init()) { ret = NULL; - goto OOM; + goto label_oom; } if (size == 0) @@ -753,7 +753,7 @@ je_malloc(size_t size) PROF_ALLOC_PREP(1, usize, cnt); if (cnt == NULL) { ret = NULL; - goto OOM; + goto label_oom; } if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <= SMALL_MAXCLASS) { @@ -768,7 +768,7 @@ je_malloc(size_t size) ret = imalloc(size); } -OOM: +label_oom: if (ret == NULL) { if (config_xmalloc && opt_xmalloc) { malloc_write(": Error in malloc(): " @@ -822,14 +822,14 @@ imemalign(void **memptr, size_t alignment, size_t size, } result = NULL; ret = EINVAL; - goto RETURN; + goto label_return; } usize = sa2u(size, alignment, NULL); if (usize == 0) { result = NULL; ret = ENOMEM; - goto RETURN; + goto label_return; } if (config_prof && opt_prof) { @@ -864,13 +864,13 @@ imemalign(void **memptr, size_t alignment, size_t size, abort(); } ret = ENOMEM; - goto RETURN; + goto label_return; } *memptr = result; ret = 0; -RETURN: +label_return: if (config_stats && result != NULL) { assert(usize == isalloc(result)); thread_allocated_tsd_get()->allocated += usize; @@ -918,7 +918,7 @@ je_calloc(size_t num, size_t size) if (malloc_init()) { num_size = 0; ret = NULL; - goto RETURN; + goto label_return; } num_size = num * size; @@ -927,7 +927,7 @@ je_calloc(size_t num, size_t size) num_size = 1; else { ret = NULL; - goto RETURN; + goto label_return; } /* * Try to avoid division here. We know that it isn't possible to @@ -938,7 +938,7 @@ je_calloc(size_t num, size_t size) && (num_size / size != num)) { /* size_t overflow. */ ret = NULL; - goto RETURN; + goto label_return; } if (config_prof && opt_prof) { @@ -946,7 +946,7 @@ je_calloc(size_t num, size_t size) PROF_ALLOC_PREP(1, usize, cnt); if (cnt == NULL) { ret = NULL; - goto RETURN; + goto label_return; } if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <= SMALL_MAXCLASS) { @@ -961,7 +961,7 @@ je_calloc(size_t num, size_t size) ret = icalloc(num_size); } -RETURN: +label_return: if (ret == NULL) { if (config_xmalloc && opt_xmalloc) { malloc_write(": Error in calloc(): out of " @@ -1002,7 +1002,7 @@ je_realloc(void *ptr, size_t size) } idalloc(ptr); ret = NULL; - goto RETURN; + goto label_return; } else size = 1; } @@ -1019,7 +1019,7 @@ je_realloc(void *ptr, size_t size) if (cnt == NULL) { old_ctx = NULL; ret = NULL; - goto OOM; + goto label_oom; } if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <= SMALL_MAXCLASS) { @@ -1040,7 +1040,7 @@ je_realloc(void *ptr, size_t size) ret = iralloc(ptr, size, 0, 0, false, false); } -OOM: +label_oom: if (ret == NULL) { if (config_xmalloc && opt_xmalloc) { malloc_write(": Error in realloc(): " @@ -1092,7 +1092,7 @@ OOM: } } -RETURN: +label_return: if (config_prof && opt_prof) prof_realloc(ret, usize, cnt, old_size, old_ctx); if (config_stats && ret != NULL) { @@ -1300,16 +1300,16 @@ je_allocm(void **ptr, size_t *rsize, size_t size, int flags) assert(size != 0); if (malloc_init()) - goto OOM; + goto label_oom; usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment, NULL); if (usize == 0) - goto OOM; + goto label_oom; if (config_prof && opt_prof) { PROF_ALLOC_PREP(1, usize, cnt); if (cnt == NULL) - goto OOM; + goto label_oom; if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <= SMALL_MAXCLASS) { size_t usize_promoted = (alignment == 0) ? @@ -1318,18 +1318,18 @@ je_allocm(void **ptr, size_t *rsize, size_t size, int flags) assert(usize_promoted != 0); p = iallocm(usize_promoted, alignment, zero); if (p == NULL) - goto OOM; + goto label_oom; arena_prof_promoted(p, usize); } else { p = iallocm(usize, alignment, zero); if (p == NULL) - goto OOM; + goto label_oom; } prof_malloc(p, usize, cnt); } else { p = iallocm(usize, alignment, zero); if (p == NULL) - goto OOM; + goto label_oom; } if (rsize != NULL) *rsize = usize; @@ -1341,7 +1341,7 @@ je_allocm(void **ptr, size_t *rsize, size_t size, int flags) } UTRACE(0, size, p); return (ALLOCM_SUCCESS); -OOM: +label_oom: if (config_xmalloc && opt_xmalloc) { malloc_write(": Error in allocm(): " "out of memory\n"); @@ -1387,7 +1387,7 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) old_size = isalloc(p); PROF_ALLOC_PREP(1, max_usize, cnt); if (cnt == NULL) - goto OOM; + goto label_oom; /* * Use minimum usize to determine whether promotion may happen. */ @@ -1398,7 +1398,7 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) size+extra) ? 0 : size+extra - (SMALL_MAXCLASS+1), alignment, zero, no_move); if (q == NULL) - goto ERR; + goto label_err; if (max_usize < PAGE) { usize = max_usize; arena_prof_promoted(q, usize); @@ -1407,7 +1407,7 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) } else { q = iralloc(p, size, extra, alignment, zero, no_move); if (q == NULL) - goto ERR; + goto label_err; usize = isalloc(q); } prof_realloc(q, usize, cnt, old_size, old_ctx); @@ -1418,7 +1418,7 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) old_size = isalloc(p); q = iralloc(p, size, extra, alignment, zero, no_move); if (q == NULL) - goto ERR; + goto label_err; if (config_stats) usize = isalloc(q); if (rsize != NULL) { @@ -1437,12 +1437,12 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) } UTRACE(p, size, q); return (ALLOCM_SUCCESS); -ERR: +label_err: if (no_move) { UTRACE(p, size, q); return (ALLOCM_ERR_NOT_MOVED); } -OOM: +label_oom: if (config_xmalloc && opt_xmalloc) { malloc_write(": Error in rallocm(): " "out of memory\n"); diff --git a/src/prof.c b/src/prof.c index d2532ec1..b509aaef 100644 --- a/src/prof.c +++ b/src/prof.c @@ -854,7 +854,7 @@ prof_dump(bool propagate_err, const char *filename, bool leakcheck) if (opt_abort) abort(); } - goto ERROR; + goto label_error; } /* Merge per thread profile stats, and sum them in cnt_all. */ @@ -870,7 +870,7 @@ prof_dump(bool propagate_err, const char *filename, bool leakcheck) " [%"PRIu64": %"PRIu64"] @ heapprofile\n", cnt_all.curobjs, cnt_all.curbytes, cnt_all.accumobjs, cnt_all.accumbytes)) - goto ERROR; + goto label_error; } else { if (prof_printf(propagate_err, "heap profile: %"PRId64": %"PRId64 @@ -878,22 +878,22 @@ prof_dump(bool propagate_err, const char *filename, bool leakcheck) cnt_all.curobjs, cnt_all.curbytes, cnt_all.accumobjs, cnt_all.accumbytes, ((uint64_t)1U << opt_lg_prof_sample))) - goto ERROR; + goto label_error; } /* Dump per ctx profile stats. */ for (tabind = 0; ckh_iter(&bt2ctx, &tabind, &bt.v, &ctx.v) == false;) { if (prof_dump_ctx(propagate_err, ctx.p, bt.p)) - goto ERROR; + goto label_error; } /* Dump /proc//maps if possible. */ if (prof_dump_maps(propagate_err)) - goto ERROR; + goto label_error; if (prof_flush(propagate_err)) - goto ERROR; + goto label_error; close(prof_dump_fd); prof_leave(); @@ -909,7 +909,7 @@ prof_dump(bool propagate_err, const char *filename, bool leakcheck) } return (false); -ERROR: +label_error: prof_leave(); return (true); } diff --git a/src/util.c b/src/util.c index 107bdcff..2aab61fe 100644 --- a/src/util.c +++ b/src/util.c @@ -108,12 +108,12 @@ malloc_strtoumax(const char *nptr, char **endptr, int base) p++; /* Fall through. */ default: - goto PREFIX; + goto label_prefix; } } /* Get prefix, if any. */ - PREFIX: + label_prefix: /* * Note where the first non-whitespace/sign character is so that it is * possible to tell whether any digits are consumed (e.g., " 0" vs. @@ -349,7 +349,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) f = format; while (true) { switch (*f) { - case '\0': goto OUT; + case '\0': goto label_out; case '%': { bool alt_form = false; bool zero_pad = false; @@ -389,12 +389,12 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) assert(plus_plus == false); plus_plus = true; break; - default: goto WIDTH; + default: goto label_width; } f++; } /* Width. */ - WIDTH: + label_width: switch (*f) { case '*': width = va_arg(ap, int); @@ -410,17 +410,17 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) width = (int)uwidth; if (*f == '.') { f++; - goto PRECISION; + goto label_precision; } else - goto LENGTH; + goto label_length; break; } case '.': f++; - goto PRECISION; - default: goto LENGTH; + goto label_precision; + default: goto label_length; } /* Precision. */ - PRECISION: + label_precision: switch (*f) { case '*': prec = va_arg(ap, int); @@ -438,7 +438,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) default: break; } /* Length. */ - LENGTH: + label_length: switch (*f) { case 'l': f++; @@ -542,7 +542,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) break; }} } - OUT: + label_out: if (i < size) str[i] = '\0'; else diff --git a/test/allocated.c b/test/allocated.c index 701c1754..921ab3ae 100644 --- a/test/allocated.c +++ b/test/allocated.c @@ -25,7 +25,7 @@ thread_start(void *arg) #ifdef JEMALLOC_STATS assert(false); #endif - goto RETURN; + goto label_return; } fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__, strerror(err)); @@ -37,7 +37,7 @@ thread_start(void *arg) #ifdef JEMALLOC_STATS assert(false); #endif - goto RETURN; + goto label_return; } fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__, strerror(err)); @@ -51,7 +51,7 @@ thread_start(void *arg) #ifdef JEMALLOC_STATS assert(false); #endif - goto RETURN; + goto label_return; } fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__, strerror(err)); @@ -63,7 +63,7 @@ thread_start(void *arg) #ifdef JEMALLOC_STATS assert(false); #endif - goto RETURN; + goto label_return; } fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__, strerror(err)); @@ -98,7 +98,7 @@ thread_start(void *arg) assert(d0 + usize <= d1); -RETURN: +label_return: return (NULL); } @@ -116,7 +116,7 @@ main(void) != 0) { fprintf(stderr, "%s(): Error in pthread_create()\n", __func__); ret = 1; - goto RETURN; + goto label_return; } pthread_join(thread, (void *)&ret); @@ -126,13 +126,13 @@ main(void) != 0) { fprintf(stderr, "%s(): Error in pthread_create()\n", __func__); ret = 1; - goto RETURN; + goto label_return; } pthread_join(thread, (void *)&ret); thread_start(NULL); -RETURN: +label_return: fprintf(stderr, "Test end\n"); return (ret); } diff --git a/test/mremap.c b/test/mremap.c index 8d35a64e..cac3bd82 100644 --- a/test/mremap.c +++ b/test/mremap.c @@ -22,7 +22,7 @@ main(void) fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__, strerror(err)); ret = 1; - goto RETURN; + goto label_return; } chunksize = ((size_t)1U) << lg_chunk; @@ -30,7 +30,7 @@ main(void) if (p == NULL) { fprintf(stderr, "malloc(%zu) --> %p\n", chunksize, p); ret = 1; - goto RETURN; + goto label_return; } memset(p, 'a', chunksize); @@ -39,7 +39,7 @@ main(void) fprintf(stderr, "realloc(%p, %zu) --> %p\n", p, chunksize * 2, q); ret = 1; - goto RETURN; + goto label_return; } for (i = 0; i < chunksize; i++) { assert(q[i] == 'a'); @@ -51,7 +51,7 @@ main(void) if (q == NULL) { fprintf(stderr, "realloc(%p, %zu) --> %p\n", p, chunksize, q); ret = 1; - goto RETURN; + goto label_return; } for (i = 0; i < chunksize; i++) { assert(q[i] == 'a'); @@ -60,7 +60,7 @@ main(void) free(q); ret = 0; -RETURN: +label_return: fprintf(stderr, "Test end\n"); return (ret); } diff --git a/test/thread_arena.c b/test/thread_arena.c index 2922d1b4..9b7b2ddc 100644 --- a/test/thread_arena.c +++ b/test/thread_arena.c @@ -61,7 +61,7 @@ main(void) if (p == NULL) { fprintf(stderr, "%s(): Error in malloc()\n", __func__); ret = 1; - goto RETURN; + goto label_return; } size = sizeof(arena_ind); @@ -69,7 +69,7 @@ main(void) fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__, strerror(err)); ret = 1; - goto RETURN; + goto label_return; } for (i = 0; i < NTHREADS; i++) { @@ -78,14 +78,14 @@ main(void) fprintf(stderr, "%s(): Error in pthread_create()\n", __func__); ret = 1; - goto RETURN; + goto label_return; } } for (i = 0; i < NTHREADS; i++) pthread_join(threads[i], (void *)&ret); -RETURN: +label_return: fprintf(stderr, "Test end\n"); return (ret); } diff --git a/test/thread_tcache_enabled.c b/test/thread_tcache_enabled.c index 46540385..0a3e45a9 100644 --- a/test/thread_tcache_enabled.c +++ b/test/thread_tcache_enabled.c @@ -22,7 +22,7 @@ thread_start(void *arg) assert(false); #endif } - goto RETURN; + goto label_return; } if (e0) { @@ -69,7 +69,7 @@ thread_start(void *arg) assert(e0 == false); free(malloc(1)); -RETURN: +label_return: return (NULL); } @@ -87,7 +87,7 @@ main(void) != 0) { fprintf(stderr, "%s(): Error in pthread_create()\n", __func__); ret = 1; - goto RETURN; + goto label_return; } pthread_join(thread, (void *)&ret); @@ -97,13 +97,13 @@ main(void) != 0) { fprintf(stderr, "%s(): Error in pthread_create()\n", __func__); ret = 1; - goto RETURN; + goto label_return; } pthread_join(thread, (void *)&ret); thread_start(NULL); -RETURN: +label_return: fprintf(stderr, "Test end\n"); return (ret); } From 122449b073bcbaa504c4f592ea2d733503c272d2 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 6 Apr 2012 00:35:09 -0700 Subject: [PATCH 0140/3378] Implement Valgrind support, redzones, and quarantine. Implement Valgrind support, as well as the redzone and quarantine features, which help Valgrind detect memory errors. Redzones are only implemented for small objects because the changes necessary to support redzones around large and huge objects are complicated by in-place reallocation, to the point that it isn't clear that the maintenance burden is worth the incremental improvement to Valgrind support. Merge arena_salloc() and arena_salloc_demote(). Refactor i[v]salloc() to expose the 'demote' option. --- INSTALL | 8 +- Makefile.in | 6 +- configure.ac | 38 +++- doc/jemalloc.xml.in | 79 ++++++- include/jemalloc/internal/arena.h | 81 +++++-- .../jemalloc/internal/jemalloc_internal.h.in | 153 ++++++++++++- include/jemalloc/internal/private_namespace.h | 8 +- include/jemalloc/internal/prof.h | 6 +- include/jemalloc/internal/quarantine.h | 24 ++ include/jemalloc/internal/tcache.h | 29 ++- include/jemalloc/internal/util.h | 1 - include/jemalloc/jemalloc_defs.h.in | 5 +- src/arena.c | 208 ++++++++++++------ src/ctl.c | 15 +- src/huge.c | 2 +- src/jemalloc.c | 165 ++++++++++---- src/quarantine.c | 163 ++++++++++++++ src/stats.c | 3 + src/tcache.c | 6 +- src/zone.c | 10 +- 20 files changed, 840 insertions(+), 170 deletions(-) create mode 100644 include/jemalloc/internal/quarantine.h create mode 100644 src/quarantine.c diff --git a/INSTALL b/INSTALL index 8a825df9..a5942ec8 100644 --- a/INSTALL +++ b/INSTALL @@ -113,8 +113,12 @@ any of the following arguments (not a definitive list) to 'configure': mmap(2). --disable-fill - Disable support for junk/zero filling of memory. See the "opt.junk"/ - "opt.zero" option documentation for usage details. + Disable support for junk/zero filling of memory, quarantine, and redzones. + See the "opt.junk", "opt.zero", "opt.quarantine", and "opt.redzone" option + documentation for usage details. + +--disable-valgrind + Disable support for Valgrind. --disable-experimental Disable support for the experimental API (*allocm()). diff --git a/Makefile.in b/Makefile.in index 7df4fc60..8aa94253 100644 --- a/Makefile.in +++ b/Makefile.in @@ -49,9 +49,9 @@ CSRCS := @srcroot@src/jemalloc.c @srcroot@src/arena.c @srcroot@src/atomic.c \ @srcroot@src/chunk_dss.c @srcroot@src/chunk_mmap.c \ @srcroot@src/ckh.c @srcroot@src/ctl.c @srcroot@src/extent.c \ @srcroot@src/hash.c @srcroot@src/huge.c @srcroot@src/mb.c \ - @srcroot@src/mutex.c @srcroot@src/prof.c @srcroot@src/rtree.c \ - @srcroot@src/stats.c @srcroot@src/tcache.c @srcroot@src/util.c \ - @srcroot@src/tsd.c + @srcroot@src/mutex.c @srcroot@src/prof.c @srcroot@src/quarantine.c \ + @srcroot@src/rtree.c @srcroot@src/stats.c @srcroot@src/tcache.c \ + @srcroot@src/util.c @srcroot@src/tsd.c ifeq (macho, @abi@) CSRCS += @srcroot@src/zone.c endif diff --git a/configure.ac b/configure.ac index 8e94b5c9..a272ecd0 100644 --- a/configure.ac +++ b/configure.ac @@ -685,7 +685,8 @@ AC_SUBST([enable_dss]) dnl Support the junk/zero filling option by default. AC_ARG_ENABLE([fill], - [AS_HELP_STRING([--disable-fill], [Disable support for junk/zero filling])], + [AS_HELP_STRING([--disable-fill], + [Disable support for junk/zero filling, quarantine, and redzones])], [if test "x$enable_fill" = "xno" ; then enable_fill="0" else @@ -727,6 +728,38 @@ if test "x$enable_utrace" = "x1" ; then fi AC_SUBST([enable_utrace]) +dnl Support Valgrind by default. +AC_ARG_ENABLE([valgrind], + [AS_HELP_STRING([--disable-valgrind], [Disable support for Valgrind])], +[if test "x$enable_valgrind" = "xno" ; then + enable_valgrind="0" +else + enable_valgrind="1" +fi +], +[enable_valgrind="1"] +) +if test "x$enable_valgrind" = "x1" ; then + JE_COMPILABLE([valgrind], [ +#include +#include + +#if defined(__VALGRIND_MAJOR__) && defined(__VALGRIND_MINOR__) \ + && (__VALGRIND_MAJOR__ > 3 || (__VALGRIND_MAJOR__ == 3 && \ + __VALGRIND_MINOR__ >= 6)) +#else +# error "Incompatible Valgrind version" +#endif +], [], [je_cv_valgrind]) + if test "x${je_cv_valgrind}" = "xno" ; then + enable_valgrind="0" + fi + if test "x$enable_valgrind" = "x1" ; then + AC_DEFINE([JEMALLOC_VALGRIND], [ ]) + fi +fi +AC_SUBST([enable_valgrind]) + dnl Do not support the xmalloc option by default. AC_ARG_ENABLE([xmalloc], [AS_HELP_STRING([--enable-xmalloc], [Support xmalloc option])], @@ -1088,8 +1121,9 @@ AC_MSG_RESULT([prof-libgcc : ${enable_prof_libgcc}]) AC_MSG_RESULT([prof-gcc : ${enable_prof_gcc}]) AC_MSG_RESULT([tcache : ${enable_tcache}]) AC_MSG_RESULT([fill : ${enable_fill}]) -AC_MSG_RESULT([xmalloc : ${enable_xmalloc}]) AC_MSG_RESULT([utrace : ${enable_utrace}]) +AC_MSG_RESULT([valgrind : ${enable_valgrind}]) +AC_MSG_RESULT([xmalloc : ${enable_xmalloc}]) AC_MSG_RESULT([dss : ${enable_dss}]) AC_MSG_RESULT([lazy_lock : ${enable_lazy_lock}]) AC_MSG_RESULT([tls : ${enable_tls}]) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 8ae82621..a47c7635 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -720,6 +720,16 @@ for (i = 0; i < nbins; i++) { build configuration. + + + config.valgrind + (bool) + r- + + was specified during + build configuration. + + config.xmalloc @@ -819,6 +829,47 @@ for (i = 0; i < nbins; i++) { configuration, in which case it is enabled by default. + + + opt.quarantine + (size_t) + r- + [] + + Per thread quarantine size in bytes. If non-zero, each + thread maintains a FIFO object quarantine that stores up to the + specified number of bytes of memory. The quarantined memory is not + freed until it is released from quarantine, though it is immediately + junk-filled if the opt.junk option is + enabled. This feature is of particular use in combination with Valgrind, which can detect + attempts to access quarantined objects. This is intended for debugging + and will impact performance negatively. The default quarantine size is + 0. + + + + + opt.redzone + (bool) + r- + [] + + Redzones enabled/disabled. If enabled, small + allocations have redzones before and after them. Furthermore, if the + opt.junk option is + enabled, the redzones are checked for corruption during deallocation. + However, the primary intended purpose of this feature is to be used in + combination with Valgrind, which needs + redzones in order to do effective buffer overflow/underflow detection. + This option is intended for debugging and will impact performance + negatively. This option is disabled by default unless + is specified during configuration, in + which case it is enabled by default. + + opt.zero @@ -849,6 +900,25 @@ for (i = 0; i < nbins; i++) { is disabled by default. + + + opt.valgrind + (bool) + r- + [] + + Valgrind support + enabled/disabled. If enabled, several other options are automatically + modified during options processing to work well with Valgrind: opt.junk and opt.zero are set to false, + opt.quarantine + is set to 16 MiB, and opt.redzone is set to + true. This option is disabled by default. + + opt.xmalloc @@ -1764,10 +1834,11 @@ malloc_conf = "xmalloc:true";]]> This implementation does not provide much detail about the problems it detects, because the performance impact for storing such information - would be prohibitive. There are a number of allocator implementations - available on the Internet which focus on detecting and pinpointing problems - by trading performance for extra sanity checks and detailed - diagnostics. + would be prohibitive. However, jemalloc does integrate with the most + excellent Valgrind tool if + the configuration option is enabled and + the opt.valgrind + option is enabled. DIAGNOSTIC MESSAGES diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index d25a2b1d..f52fac42 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -16,7 +16,7 @@ * constraint is relaxed (ignored) for runs that are so small that the * per-region overhead is greater than: * - * (RUN_MAX_OVRHD / (reg_size << (3+RUN_BFP)) + * (RUN_MAX_OVRHD / (reg_interval << (3+RUN_BFP)) */ #define RUN_BFP 12 /* \/ Implicit binary fixed point. */ @@ -27,6 +27,12 @@ #define LG_RUN_MAXREGS 11 #define RUN_MAXREGS (1U << LG_RUN_MAXREGS) +/* + * Minimum redzone size. Redzones may be larger than this if necessary to + * preserve region alignment. + */ +#define REDZONE_MINSIZE 16 + /* * The minimum ratio of active:dirty pages per arena is computed as: * @@ -192,11 +198,50 @@ struct arena_run_s { * Read-only information associated with each element of arena_t's bins array * is stored separately, partly to reduce memory usage (only one copy, rather * than one per arena), but mainly to avoid false cacheline sharing. + * + * Each run has the following layout: + * + * /--------------------\ + * | arena_run_t header | + * | ... | + * bitmap_offset | bitmap | + * | ... | + * ctx0_offset | ctx map | + * | ... | + * |--------------------| + * | redzone | + * reg0_offset | region 0 | + * | redzone | + * |--------------------| \ + * | redzone | | + * | region 1 | > reg_interval + * | redzone | / + * |--------------------| + * | ... | + * | ... | + * | ... | + * |--------------------| + * | redzone | + * | region nregs-1 | + * | redzone | + * |--------------------| + * | alignment pad? | + * \--------------------/ + * + * reg_interval has at least the same minimum alignment as reg_size; this + * preserves the alignment constraint that sa2u() depends on. Alignment pad is + * either 0 or redzone_size; it is present only if needed to align reg0_offset. */ struct arena_bin_info_s { /* Size of regions in a run for this bin's size class. */ size_t reg_size; + /* Redzone size. */ + size_t redzone_size; + + /* Interval between regions (reg_size + (redzone_size << 1)). */ + size_t reg_interval; + /* Total size of a run for this bin's size class. */ size_t run_size; @@ -357,13 +402,15 @@ void arena_purge_all(arena_t *arena); void arena_prof_accum(arena_t *arena, uint64_t accumbytes); void arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind, uint64_t prof_accumbytes); +void arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info, + bool zero); +void arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info); void *arena_malloc_small(arena_t *arena, size_t size, bool zero); void *arena_malloc_large(arena_t *arena, size_t size, bool zero); void *arena_palloc(arena_t *arena, size_t size, size_t alloc_size, size_t alignment, bool zero); -size_t arena_salloc(const void *ptr); +size_t arena_salloc(const void *ptr, bool demote); void arena_prof_promoted(const void *ptr, size_t size); -size_t arena_salloc_demote(const void *ptr); void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, arena_chunk_map_t *mapelm); void arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr); @@ -408,7 +455,7 @@ JEMALLOC_INLINE unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) { unsigned shift, diff, regind; - size_t size; + size_t interval; /* * Freeing a pointer lower than region zero can cause assertion @@ -425,12 +472,12 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) bin_info->reg0_offset); /* Rescale (factor powers of 2 out of the numerator and denominator). */ - size = bin_info->reg_size; - shift = ffs(size) - 1; + interval = bin_info->reg_interval; + shift = ffs(interval) - 1; diff >>= shift; - size >>= shift; + interval >>= shift; - if (size == 1) { + if (interval == 1) { /* The divisor was a power of 2. */ regind = diff; } else { @@ -442,7 +489,7 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) * * becomes * - * (X * size_invs[D - 3]) >> SIZE_INV_SHIFT + * (X * interval_invs[D - 3]) >> SIZE_INV_SHIFT * * We can omit the first three elements, because we never * divide by 0, and 1 and 2 are both powers of two, which are @@ -450,7 +497,7 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) */ #define SIZE_INV_SHIFT ((sizeof(unsigned) << 3) - LG_RUN_MAXREGS) #define SIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s)) + 1) - static const unsigned size_invs[] = { + static const unsigned interval_invs[] = { SIZE_INV(3), SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7), SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11), @@ -461,14 +508,16 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) SIZE_INV(28), SIZE_INV(29), SIZE_INV(30), SIZE_INV(31) }; - if (size <= ((sizeof(size_invs) / sizeof(unsigned)) + 2)) - regind = (diff * size_invs[size - 3]) >> SIZE_INV_SHIFT; - else - regind = diff / size; + if (interval <= ((sizeof(interval_invs) / sizeof(unsigned)) + + 2)) { + regind = (diff * interval_invs[interval - 3]) >> + SIZE_INV_SHIFT; + } else + regind = diff / interval; #undef SIZE_INV #undef SIZE_INV_SHIFT } - assert(diff == regind * size); + assert(diff == regind * interval); assert(regind < bin_info->nregs); return (regind); @@ -610,7 +659,7 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, bool try_tcache) &arena_bin_info[binind]; assert(((uintptr_t)ptr - ((uintptr_t)run + (uintptr_t)bin_info->reg0_offset)) % - bin_info->reg_size == 0); + bin_info->reg_interval == 0); } malloc_mutex_lock(&bin->lock); arena_dalloc_bin(arena, chunk, ptr, mapelm); diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 66dd357f..a16e5e27 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -40,6 +40,11 @@ #include #endif +#ifdef JEMALLOC_VALGRIND +#include +#include +#endif + #include "jemalloc/internal/private_namespace.h" #ifdef JEMALLOC_CC_SILENCE @@ -125,6 +130,13 @@ static const bool config_utrace = false #endif ; +static const bool config_valgrind = +#ifdef JEMALLOC_VALGRIND + true +#else + false +#endif + ; static const bool config_xmalloc = #ifdef JEMALLOC_XMALLOC true @@ -281,6 +293,77 @@ static const bool config_ivsalloc = #define PAGE_CEILING(s) \ (((s) + PAGE_MASK) & ~PAGE_MASK) +#ifdef JEMALLOC_VALGRIND +/* + * The JEMALLOC_VALGRIND_*() macros must be macros rather than functions + * so that when Valgrind reports errors, there are no extra stack frames + * in the backtraces. + * + * The size that is reported to valgrind must be consistent through a chain of + * malloc..realloc..realloc calls. Request size isn't recorded anywhere in + * jemalloc, so it is critical that all callers of these macros provide usize + * rather than request size. As a result, buffer overflow detection is + * technically weakened for the standard API, though it is generally accepted + * practice to consider any extra bytes reported by malloc_usable_size() as + * usable space. + */ +#define JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) do { \ + if (config_valgrind && opt_valgrind && cond) \ + VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, p2rz(ptr), zero); \ +} while (0) +#define JEMALLOC_VALGRIND_REALLOC(ptr, usize, old_ptr, old_usize, \ + old_rzsize, zero) do { \ + if (config_valgrind && opt_valgrind) { \ + size_t rzsize = p2rz(ptr); \ + \ + if (ptr == old_ptr) { \ + VALGRIND_RESIZEINPLACE_BLOCK(ptr, old_usize, \ + usize, rzsize); \ + if (zero && old_usize < usize) { \ + VALGRIND_MAKE_MEM_DEFINED( \ + (void *)((uintptr_t)ptr + \ + old_usize), usize - old_usize); \ + } \ + } else { \ + if (old_ptr != NULL) { \ + VALGRIND_FREELIKE_BLOCK(old_ptr, \ + old_rzsize); \ + } \ + if (ptr != NULL) { \ + size_t copy_size = (old_usize < usize) \ + ? old_usize : usize; \ + size_t tail_size = usize - copy_size; \ + VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, \ + rzsize, false); \ + if (copy_size > 0) { \ + VALGRIND_MAKE_MEM_DEFINED(ptr, \ + copy_size); \ + } \ + if (zero && tail_size > 0) { \ + VALGRIND_MAKE_MEM_DEFINED( \ + (void *)((uintptr_t)ptr + \ + copy_size), tail_size); \ + } \ + } \ + } \ + } \ +} while (0) +#define JEMALLOC_VALGRIND_FREE(ptr, rzsize) do { \ + if (config_valgrind && opt_valgrind) \ + VALGRIND_FREELIKE_BLOCK(ptr, rzsize); \ +} while (0) +#else +#define VALGRIND_MALLOCLIKE_BLOCK(addr, sizeB, rzB, is_zeroed) +#define VALGRIND_RESIZEINPLACE_BLOCK(addr, oldSizeB, newSizeB, rzB) +#define VALGRIND_FREELIKE_BLOCK(addr, rzB) +#define VALGRIND_MAKE_MEM_UNDEFINED(_qzz_addr, _qzz_len) +#define VALGRIND_MAKE_MEM_DEFINED(_qzz_addr, _qzz_len) +#define JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) +#define JEMALLOC_VALGRIND_REALLOC(ptr, usize, old_ptr, old_usize, \ + old_rzsize, zero) +#define JEMALLOC_VALGRIND_FREE(ptr, rzsize) +#endif + #include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" #include "jemalloc/internal/prng.h" @@ -300,6 +383,7 @@ static const bool config_ivsalloc = #include "jemalloc/internal/rtree.h" #include "jemalloc/internal/tcache.h" #include "jemalloc/internal/hash.h" +#include "jemalloc/internal/quarantine.h" #include "jemalloc/internal/prof.h" #undef JEMALLOC_H_TYPES @@ -325,6 +409,7 @@ static const bool config_ivsalloc = #include "jemalloc/internal/rtree.h" #include "jemalloc/internal/tcache.h" #include "jemalloc/internal/hash.h" +#include "jemalloc/internal/quarantine.h" #include "jemalloc/internal/prof.h" typedef struct { @@ -343,7 +428,10 @@ typedef struct { extern bool opt_abort; extern bool opt_junk; +extern size_t opt_quarantine; +extern bool opt_redzone; extern bool opt_utrace; +extern bool opt_valgrind; extern bool opt_xmalloc; extern bool opt_zero; extern size_t opt_narenas; @@ -385,6 +473,7 @@ void jemalloc_postfork_child(void); #include "jemalloc/internal/rtree.h" #include "jemalloc/internal/tcache.h" #include "jemalloc/internal/hash.h" +#include "jemalloc/internal/quarantine.h" #include "jemalloc/internal/prof.h" #undef JEMALLOC_H_EXTERNS @@ -550,14 +639,18 @@ choose_arena(arena_t *arena) #include "jemalloc/internal/tcache.h" #include "jemalloc/internal/arena.h" #include "jemalloc/internal/hash.h" +#include "jemalloc/internal/quarantine.h" #ifndef JEMALLOC_ENABLE_INLINE void *imalloc(size_t size); void *icalloc(size_t size); void *ipalloc(size_t usize, size_t alignment, bool zero); -size_t isalloc(const void *ptr); -size_t ivsalloc(const void *ptr); +size_t isalloc(const void *ptr, bool demote); +size_t ivsalloc(const void *ptr, bool demote); +size_t u2rz(size_t usize); +size_t p2rz(const void *ptr); void idalloc(void *ptr); +void iqalloc(void *ptr); void *iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, bool no_move); malloc_tsd_protos(JEMALLOC_ATTR(unused), thread_allocated, thread_allocated_t) @@ -621,21 +714,25 @@ ipalloc(size_t usize, size_t alignment, bool zero) return (ret); } +/* + * Typical usage: + * void *ptr = [...] + * size_t sz = isalloc(ptr, config_prof); + */ JEMALLOC_INLINE size_t -isalloc(const void *ptr) +isalloc(const void *ptr, bool demote) { size_t ret; arena_chunk_t *chunk; assert(ptr != NULL); + /* Demotion only makes sense if config_prof is true. */ + assert(config_prof || demote == false); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk != ptr) { /* Region. */ - if (config_prof) - ret = arena_salloc_demote(ptr); - else - ret = arena_salloc(ptr); + ret = arena_salloc(ptr, demote); } else ret = huge_salloc(ptr); @@ -643,14 +740,36 @@ isalloc(const void *ptr) } JEMALLOC_INLINE size_t -ivsalloc(const void *ptr) +ivsalloc(const void *ptr, bool demote) { /* Return 0 if ptr is not within a chunk managed by jemalloc. */ if (rtree_get(chunks_rtree, (uintptr_t)CHUNK_ADDR2BASE(ptr)) == NULL) return (0); - return (isalloc(ptr)); + return (isalloc(ptr, demote)); +} + +JEMALLOC_INLINE size_t +u2rz(size_t usize) +{ + size_t ret; + + if (usize <= SMALL_MAXCLASS) { + size_t binind = SMALL_SIZE2BIN(usize); + ret = arena_bin_info[binind].redzone_size; + } else + ret = 0; + + return (ret); +} + +JEMALLOC_INLINE size_t +p2rz(const void *ptr) +{ + size_t usize = isalloc(ptr, false); + + return (u2rz(usize)); } JEMALLOC_INLINE void @@ -667,6 +786,16 @@ idalloc(void *ptr) huge_dalloc(ptr, true); } +JEMALLOC_INLINE void +iqalloc(void *ptr) +{ + + if (config_fill && opt_quarantine) + quarantine(ptr); + else + idalloc(ptr); +} + JEMALLOC_INLINE void * iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, bool no_move) @@ -677,14 +806,14 @@ iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, assert(ptr != NULL); assert(size != 0); - oldsize = isalloc(ptr); + oldsize = isalloc(ptr, config_prof); if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1)) != 0) { size_t usize, copysize; /* - * Existing object alignment is inadquate; allocate new space + * Existing object alignment is inadequate; allocate new space * and copy. */ if (no_move) @@ -711,7 +840,7 @@ iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, */ copysize = (size < oldsize) ? size : oldsize; memcpy(ret, ptr, copysize); - idalloc(ptr); + iqalloc(ptr); return (ret); } diff --git a/include/jemalloc/internal/private_namespace.h b/include/jemalloc/internal/private_namespace.h index de3042eb..a962192c 100644 --- a/include/jemalloc/internal/private_namespace.h +++ b/include/jemalloc/internal/private_namespace.h @@ -1,7 +1,9 @@ +#define arena_alloc_junk_small JEMALLOC_N(arena_alloc_junk_small) #define arena_bin_index JEMALLOC_N(arena_bin_index) #define arena_boot JEMALLOC_N(arena_boot) #define arena_dalloc JEMALLOC_N(arena_dalloc) #define arena_dalloc_bin JEMALLOC_N(arena_dalloc_bin) +#define arena_dalloc_junk_small JEMALLOC_N(arena_dalloc_junk_small) #define arena_dalloc_large JEMALLOC_N(arena_dalloc_large) #define arena_malloc JEMALLOC_N(arena_malloc) #define arena_malloc_large JEMALLOC_N(arena_malloc_large) @@ -20,7 +22,6 @@ #define arena_ralloc_no_move JEMALLOC_N(arena_ralloc_no_move) #define arena_run_regind JEMALLOC_N(arena_run_regind) #define arena_salloc JEMALLOC_N(arena_salloc) -#define arena_salloc_demote JEMALLOC_N(arena_salloc_demote) #define arena_stats_merge JEMALLOC_N(arena_stats_merge) #define arena_tcache_fill_small JEMALLOC_N(arena_tcache_fill_small) #define arenas_bin_i_index JEMALLOC_N(arenas_bin_i_index) @@ -136,6 +137,7 @@ #define idalloc JEMALLOC_N(idalloc) #define imalloc JEMALLOC_N(imalloc) #define ipalloc JEMALLOC_N(ipalloc) +#define iqalloc JEMALLOC_N(iqalloc) #define iralloc JEMALLOC_N(iralloc) #define isalloc JEMALLOC_N(isalloc) #define ivsalloc JEMALLOC_N(ivsalloc) @@ -176,6 +178,7 @@ #define opt_utrace JEMALLOC_N(opt_utrace) #define opt_xmalloc JEMALLOC_N(opt_xmalloc) #define opt_zero JEMALLOC_N(opt_zero) +#define p2rz JEMALLOC_N(p2rz) #define pow2_ceil JEMALLOC_N(pow2_ceil) #define prof_backtrace JEMALLOC_N(prof_backtrace) #define prof_boot0 JEMALLOC_N(prof_boot0) @@ -195,6 +198,8 @@ #define prof_tdata_init JEMALLOC_N(prof_tdata_init) #define prof_tdata_tls JEMALLOC_N(prof_tdata_tls) #define pthread_create JEMALLOC_N(pthread_create) +#define quarantine JEMALLOC_N(quarantine) +#define quarantine_boot JEMALLOC_N(quarantine_boot) #define register_zone JEMALLOC_N(register_zone) #define rtree_get JEMALLOC_N(rtree_get) #define rtree_get_locked JEMALLOC_N(rtree_get_locked) @@ -229,3 +234,4 @@ #define thread_allocated_get JEMALLOC_N(thread_allocated_get) #define thread_allocated_get_hard JEMALLOC_N(thread_allocated_get_hard) #define thread_allocated_tls JEMALLOC_N(thread_allocated_tls) +#define u2rz JEMALLOC_N(u2rz) diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index 34929e7e..a37bb448 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -378,7 +378,7 @@ prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt) cassert(config_prof); assert(ptr != NULL); - assert(size == isalloc(ptr)); + assert(size == isalloc(ptr, true)); if (opt_lg_prof_sample != 0) { if (prof_sample_accum_update(size)) { @@ -427,7 +427,7 @@ prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt, assert(ptr != NULL || (uintptr_t)cnt <= (uintptr_t)1U); if (ptr != NULL) { - assert(size == isalloc(ptr)); + assert(size == isalloc(ptr, true)); if (opt_lg_prof_sample != 0) { if (prof_sample_accum_update(size)) { /* @@ -500,7 +500,7 @@ prof_free(const void *ptr, size_t size) cassert(config_prof); if ((uintptr_t)ctx > (uintptr_t)1) { - assert(size == isalloc(ptr)); + assert(size == isalloc(ptr, true)); prof_thr_cnt_t *tcnt = prof_lookup(ctx->bt); if (tcnt != NULL) { diff --git a/include/jemalloc/internal/quarantine.h b/include/jemalloc/internal/quarantine.h new file mode 100644 index 00000000..38f3d696 --- /dev/null +++ b/include/jemalloc/internal/quarantine.h @@ -0,0 +1,24 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +/* Default per thread quarantine size if valgrind is enabled. */ +#define JEMALLOC_VALGRIND_QUARANTINE_DEFAULT (ZU(1) << 24) + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +void quarantine(void *ptr); +bool quarantine_boot(void); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ + diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index 93e721d5..9d8c992d 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -340,17 +340,24 @@ tcache_alloc_small(tcache_t *tcache, size_t size, bool zero) if (ret == NULL) return (NULL); } - assert(arena_salloc(ret) == arena_bin_info[binind].reg_size); + assert(arena_salloc(ret, false) == arena_bin_info[binind].reg_size); if (zero == false) { if (config_fill) { - if (opt_junk) - memset(ret, 0xa5, size); - else if (opt_zero) + if (opt_junk) { + arena_alloc_junk_small(ret, + &arena_bin_info[binind], false); + } else if (opt_zero) memset(ret, 0, size); } - } else + } else { + if (config_fill && opt_junk) { + arena_alloc_junk_small(ret, &arena_bin_info[binind], + true); + } + VALGRIND_MAKE_MEM_UNDEFINED(ret, size); memset(ret, 0, size); + } if (config_stats) tbin->tstats.nrequests++; @@ -397,8 +404,10 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) else if (opt_zero) memset(ret, 0, size); } - } else + } else { + VALGRIND_MAKE_MEM_UNDEFINED(ret, size); memset(ret, 0, size); + } if (config_stats) tbin->tstats.nrequests++; @@ -422,7 +431,7 @@ tcache_dalloc_small(tcache_t *tcache, void *ptr) size_t pageind, binind; arena_chunk_map_t *mapelm; - assert(arena_salloc(ptr) <= SMALL_MAXCLASS); + assert(arena_salloc(ptr, false) <= SMALL_MAXCLASS); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); arena = chunk->arena; @@ -436,7 +445,7 @@ tcache_dalloc_small(tcache_t *tcache, void *ptr) assert(binind < NBINS); if (config_fill && opt_junk) - memset(ptr, 0x5a, arena_bin_info[binind].reg_size); + arena_dalloc_junk_small(ptr, &arena_bin_info[binind]); tbin = &tcache->tbins[binind]; tbin_info = &tcache_bin_info[binind]; @@ -459,8 +468,8 @@ tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size) tcache_bin_info_t *tbin_info; assert((size & PAGE_MASK) == 0); - assert(arena_salloc(ptr) > SMALL_MAXCLASS); - assert(arena_salloc(ptr) <= tcache_maxclass); + assert(arena_salloc(ptr, false) > SMALL_MAXCLASS); + assert(arena_salloc(ptr, false) <= tcache_maxclass); binind = NBINS + (size >> LG_PAGE) - 1; diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h index 3d3ea3ab..d360ae3f 100644 --- a/include/jemalloc/internal/util.h +++ b/include/jemalloc/internal/util.h @@ -140,7 +140,6 @@ malloc_write(const char *s) je_malloc_message(NULL, s); } - #endif #endif /* JEMALLOC_H_INLINES */ diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index 8e7442d6..7770a7af 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -148,7 +148,7 @@ */ #undef JEMALLOC_DSS -/* Support memory filling (junk/zero). */ +/* Support memory filling (junk/zero/quarantine/redzone). */ #undef JEMALLOC_FILL /* Support the experimental API. */ @@ -157,6 +157,9 @@ /* Support utrace(2)-based tracing. */ #undef JEMALLOC_UTRACE +/* Support Valgrind. */ +#undef JEMALLOC_VALGRIND + /* Support optional abort() on OOM. */ #undef JEMALLOC_XMALLOC diff --git a/src/arena.c b/src/arena.c index c84aaf47..1d4f61ea 100644 --- a/src/arena.c +++ b/src/arena.c @@ -140,7 +140,7 @@ arena_run_reg_alloc(arena_run_t *run, arena_bin_info_t *bin_info) regind = bitmap_sfu(bitmap, &bin_info->bitmap_info); ret = (void *)((uintptr_t)run + (uintptr_t)bin_info->reg0_offset + - (uintptr_t)(bin_info->reg_size * regind)); + (uintptr_t)(bin_info->reg_interval * regind)); run->nfree--; if (regind == run->nextind) run->nextind++; @@ -161,8 +161,8 @@ arena_run_reg_dalloc(arena_run_t *run, void *ptr) assert(run->nfree < bin_info->nregs); /* Freeing an interior pointer can cause assertion failure. */ assert(((uintptr_t)ptr - ((uintptr_t)run + - (uintptr_t)bin_info->reg0_offset)) % (uintptr_t)bin_info->reg_size - == 0); + (uintptr_t)bin_info->reg0_offset)) % + (uintptr_t)bin_info->reg_interval == 0); assert((uintptr_t)ptr >= (uintptr_t)run + (uintptr_t)bin_info->reg0_offset); /* Freeing an unallocated pointer can cause assertion failure. */ @@ -260,10 +260,18 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, for (i = 0; i < need_pages; i++) { if ((chunk->map[run_ind+i-map_bias].bits & CHUNK_MAP_UNZEROED) != 0) { + VALGRIND_MAKE_MEM_UNDEFINED( + (void *)((uintptr_t) + chunk + ((run_ind+i) << + LG_PAGE)), PAGE); memset((void *)((uintptr_t) chunk + ((run_ind+i) << LG_PAGE)), 0, PAGE); } else if (config_debug) { + VALGRIND_MAKE_MEM_DEFINED( + (void *)((uintptr_t) + chunk + ((run_ind+i) << + LG_PAGE)), PAGE); arena_chunk_validate_zeroed( chunk, run_ind+i); } @@ -273,6 +281,9 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, * The run is dirty, so all pages must be * zeroed. */ + VALGRIND_MAKE_MEM_UNDEFINED((void + *)((uintptr_t)chunk + (run_ind << + LG_PAGE)), (need_pages << LG_PAGE)); memset((void *)((uintptr_t)chunk + (run_ind << LG_PAGE)), 0, (need_pages << LG_PAGE)); } @@ -1245,6 +1256,10 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind, ptr = arena_bin_malloc_hard(arena, bin); if (ptr == NULL) break; + if (config_fill && opt_junk) { + arena_alloc_junk_small(ptr, &arena_bin_info[binind], + true); + } /* Insert such that low regions get used first. */ tbin->avail[nfill - 1 - i] = ptr; } @@ -1259,6 +1274,55 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind, tbin->ncached = i; } +void +arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info, bool zero) +{ + + if (zero) { + size_t redzone_size = bin_info->redzone_size; + memset((void *)((uintptr_t)ptr - redzone_size), 0xa5, + redzone_size); + memset((void *)((uintptr_t)ptr + bin_info->reg_size), 0xa5, + redzone_size); + } else { + memset((void *)((uintptr_t)ptr - bin_info->redzone_size), 0xa5, + bin_info->reg_interval); + } +} + +void +arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info) +{ + size_t size = bin_info->reg_size; + size_t redzone_size = bin_info->redzone_size; + size_t i; + bool error = false; + + for (i = 1; i <= redzone_size; i++) { + unsigned byte; + if ((byte = *(uint8_t *)((uintptr_t)ptr - i)) != 0xa5) { + error = true; + malloc_printf(": Corrupt redzone " + "%zu byte%s before %p (size %zu), byte=%#x\n", i, + (i == 1) ? "" : "s", ptr, size, byte); + } + } + for (i = 0; i < redzone_size; i++) { + unsigned byte; + if ((byte = *(uint8_t *)((uintptr_t)ptr + size + i)) != 0xa5) { + error = true; + malloc_printf(": Corrupt redzone " + "%zu byte%s after end of %p (size %zu), byte=%#x\n", + i, (i == 1) ? "" : "s", ptr, size, byte); + } + } + if (opt_abort && error) + abort(); + + memset((void *)((uintptr_t)ptr - redzone_size), 0x5a, + bin_info->reg_interval); +} + void * arena_malloc_small(arena_t *arena, size_t size, bool zero) { @@ -1297,13 +1361,20 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero) if (zero == false) { if (config_fill) { - if (opt_junk) - memset(ret, 0xa5, size); - else if (opt_zero) + if (opt_junk) { + arena_alloc_junk_small(ret, + &arena_bin_info[binind], false); + } else if (opt_zero) memset(ret, 0, size); } - } else + } else { + if (config_fill && opt_junk) { + arena_alloc_junk_small(ret, &arena_bin_info[binind], + true); + } + VALGRIND_MAKE_MEM_UNDEFINED(ret, size); memset(ret, 0, size); + } return (ret); } @@ -1412,7 +1483,7 @@ arena_palloc(arena_t *arena, size_t size, size_t alloc_size, size_t alignment, /* Return the size of the allocation pointed to by ptr. */ size_t -arena_salloc(const void *ptr) +arena_salloc(const void *ptr, bool demote) { size_t ret; arena_chunk_t *chunk; @@ -1431,12 +1502,19 @@ arena_salloc(const void *ptr) size_t binind = arena_bin_index(chunk->arena, run->bin); arena_bin_info_t *bin_info = &arena_bin_info[binind]; assert(((uintptr_t)ptr - ((uintptr_t)run + - (uintptr_t)bin_info->reg0_offset)) % bin_info->reg_size == - 0); + (uintptr_t)bin_info->reg0_offset)) % bin_info->reg_interval + == 0); ret = bin_info->reg_size; } else { assert(((uintptr_t)ptr & PAGE_MASK) == 0); ret = mapbits & ~PAGE_MASK; + if (demote && prof_promote && ret == PAGE && (mapbits & + CHUNK_MAP_CLASS_MASK) != 0) { + size_t binind = ((mapbits & CHUNK_MAP_CLASS_MASK) >> + CHUNK_MAP_CLASS_SHIFT) - 1; + assert(binind < NBINS); + ret = arena_bin_info[binind].reg_size; + } assert(ret != 0); } @@ -1449,9 +1527,11 @@ arena_prof_promoted(const void *ptr, size_t size) arena_chunk_t *chunk; size_t pageind, binind; + assert(config_prof); assert(ptr != NULL); assert(CHUNK_ADDR2BASE(ptr) != ptr); - assert(isalloc(ptr) == PAGE); + assert(isalloc(ptr, false) == PAGE); + assert(isalloc(ptr, true) == PAGE); assert(size <= SMALL_MAXCLASS); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); @@ -1460,45 +1540,9 @@ arena_prof_promoted(const void *ptr, size_t size) assert(binind < NBINS); chunk->map[pageind-map_bias].bits = (chunk->map[pageind-map_bias].bits & ~CHUNK_MAP_CLASS_MASK) | ((binind+1) << CHUNK_MAP_CLASS_SHIFT); -} -size_t -arena_salloc_demote(const void *ptr) -{ - size_t ret; - arena_chunk_t *chunk; - size_t pageind, mapbits; - - assert(ptr != NULL); - assert(CHUNK_ADDR2BASE(ptr) != ptr); - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - mapbits = chunk->map[pageind-map_bias].bits; - assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); - if ((mapbits & CHUNK_MAP_LARGE) == 0) { - arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)((pageind - (mapbits >> LG_PAGE)) << LG_PAGE)); - size_t binind = arena_bin_index(chunk->arena, run->bin); - arena_bin_info_t *bin_info = &arena_bin_info[binind]; - assert(((uintptr_t)ptr - ((uintptr_t)run + - (uintptr_t)bin_info->reg0_offset)) % bin_info->reg_size == - 0); - ret = bin_info->reg_size; - } else { - assert(((uintptr_t)ptr & PAGE_MASK) == 0); - ret = mapbits & ~PAGE_MASK; - if (prof_promote && ret == PAGE && (mapbits & - CHUNK_MAP_CLASS_MASK) != 0) { - size_t binind = ((mapbits & CHUNK_MAP_CLASS_MASK) >> - CHUNK_MAP_CLASS_SHIFT) - 1; - assert(binind < NBINS); - ret = arena_bin_info[binind].reg_size; - } - assert(ret != 0); - } - - return (ret); + assert(isalloc(ptr, false) == PAGE); + assert(isalloc(ptr, true) == size); } static void @@ -1545,7 +1589,8 @@ arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE); past = (size_t)(PAGE_CEILING((uintptr_t)run + (uintptr_t)bin_info->reg0_offset + (uintptr_t)(run->nextind * - bin_info->reg_size) - (uintptr_t)chunk) >> LG_PAGE); + bin_info->reg_interval - bin_info->redzone_size) - + (uintptr_t)chunk) >> LG_PAGE); malloc_mutex_lock(&arena->lock); /* @@ -1617,7 +1662,7 @@ arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, size = bin_info->reg_size; if (config_fill && opt_junk) - memset(ptr, 0x5a, size); + arena_dalloc_junk_small(ptr, bin_info); arena_run_reg_dalloc(run, ptr); if (run->nfree == bin_info->nregs) { @@ -1936,7 +1981,7 @@ arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, */ copysize = (size < oldsize) ? size : oldsize; memcpy(ret, ptr, copysize); - idalloc(ptr); + iqalloc(ptr); return (ret); } @@ -2007,16 +2052,40 @@ arena_new(arena_t *arena, unsigned ind) static size_t bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) { + size_t pad_size; size_t try_run_size, good_run_size; uint32_t try_nregs, good_nregs; uint32_t try_hdr_size, good_hdr_size; uint32_t try_bitmap_offset, good_bitmap_offset; uint32_t try_ctx0_offset, good_ctx0_offset; - uint32_t try_reg0_offset, good_reg0_offset; + uint32_t try_redzone0_offset, good_redzone0_offset; assert(min_run_size >= PAGE); assert(min_run_size <= arena_maxclass); + /* + * Determine redzone size based on minimum alignment and minimum + * redzone size. Add padding to the end of the run if it is needed to + * align the regions. The padding allows each redzone to be half the + * minimum alignment; without the padding, each redzone would have to + * be twice as large in order to maintain alignment. + */ + if (config_fill && opt_redzone) { + size_t align_min = ZU(1) << (ffs(bin_info->reg_size) - 1); + if (align_min <= REDZONE_MINSIZE) { + bin_info->redzone_size = REDZONE_MINSIZE; + pad_size = 0; + } else { + bin_info->redzone_size = align_min >> 1; + pad_size = bin_info->redzone_size; + } + } else { + bin_info->redzone_size = 0; + pad_size = 0; + } + bin_info->reg_interval = bin_info->reg_size + + (bin_info->redzone_size << 1); + /* * Calculate known-valid settings before entering the run_size * expansion loop, so that the first part of the loop always copies @@ -2028,7 +2097,8 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) * header's mask length and the number of regions. */ try_run_size = min_run_size; - try_nregs = ((try_run_size - sizeof(arena_run_t)) / bin_info->reg_size) + try_nregs = ((try_run_size - sizeof(arena_run_t)) / + bin_info->reg_interval) + 1; /* Counter-act try_nregs-- in loop. */ if (try_nregs > RUN_MAXREGS) { try_nregs = RUN_MAXREGS @@ -2050,9 +2120,9 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) try_hdr_size += try_nregs * sizeof(prof_ctx_t *); } else try_ctx0_offset = 0; - try_reg0_offset = try_run_size - (try_nregs * - bin_info->reg_size); - } while (try_hdr_size > try_reg0_offset); + try_redzone0_offset = try_run_size - (try_nregs * + bin_info->reg_interval) - pad_size; + } while (try_hdr_size > try_redzone0_offset); /* run_size expansion loop. */ do { @@ -2064,12 +2134,12 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) good_hdr_size = try_hdr_size; good_bitmap_offset = try_bitmap_offset; good_ctx0_offset = try_ctx0_offset; - good_reg0_offset = try_reg0_offset; + good_redzone0_offset = try_redzone0_offset; /* Try more aggressive settings. */ try_run_size += PAGE; - try_nregs = ((try_run_size - sizeof(arena_run_t)) / - bin_info->reg_size) + try_nregs = ((try_run_size - sizeof(arena_run_t) - pad_size) / + bin_info->reg_interval) + 1; /* Counter-act try_nregs-- in loop. */ if (try_nregs > RUN_MAXREGS) { try_nregs = RUN_MAXREGS @@ -2093,23 +2163,27 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) try_hdr_size += try_nregs * sizeof(prof_ctx_t *); } - try_reg0_offset = try_run_size - (try_nregs * - bin_info->reg_size); - } while (try_hdr_size > try_reg0_offset); + try_redzone0_offset = try_run_size - (try_nregs * + bin_info->reg_interval) - pad_size; + } while (try_hdr_size > try_redzone0_offset); } while (try_run_size <= arena_maxclass && try_run_size <= arena_maxclass - && RUN_MAX_OVRHD * (bin_info->reg_size << 3) > RUN_MAX_OVRHD_RELAX - && (try_reg0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size + && RUN_MAX_OVRHD * (bin_info->reg_interval << 3) > + RUN_MAX_OVRHD_RELAX + && (try_redzone0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size && try_nregs < RUN_MAXREGS); - assert(good_hdr_size <= good_reg0_offset); + assert(good_hdr_size <= good_redzone0_offset); /* Copy final settings. */ bin_info->run_size = good_run_size; bin_info->nregs = good_nregs; bin_info->bitmap_offset = good_bitmap_offset; bin_info->ctx0_offset = good_ctx0_offset; - bin_info->reg0_offset = good_reg0_offset; + bin_info->reg0_offset = good_redzone0_offset + bin_info->redzone_size; + + assert(bin_info->reg0_offset - bin_info->redzone_size + (bin_info->nregs + * bin_info->reg_interval) + pad_size == bin_info->run_size); return (good_run_size); } diff --git a/src/ctl.c b/src/ctl.c index a75ffef3..6be40561 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -57,6 +57,7 @@ CTL_PROTO(config_stats) CTL_PROTO(config_tcache) CTL_PROTO(config_tls) CTL_PROTO(config_utrace) +CTL_PROTO(config_valgrind) CTL_PROTO(config_xmalloc) CTL_PROTO(opt_abort) CTL_PROTO(opt_lg_chunk) @@ -65,7 +66,10 @@ CTL_PROTO(opt_lg_dirty_mult) CTL_PROTO(opt_stats_print) CTL_PROTO(opt_junk) CTL_PROTO(opt_zero) +CTL_PROTO(opt_quarantine) +CTL_PROTO(opt_redzone) CTL_PROTO(opt_utrace) +CTL_PROTO(opt_valgrind) CTL_PROTO(opt_xmalloc) CTL_PROTO(opt_tcache) CTL_PROTO(opt_lg_tcache_max) @@ -179,6 +183,7 @@ static const ctl_node_t config_node[] = { {NAME("tcache"), CTL(config_tcache)}, {NAME("tls"), CTL(config_tls)}, {NAME("utrace"), CTL(config_utrace)}, + {NAME("valgrind"), CTL(config_valgrind)}, {NAME("xmalloc"), CTL(config_xmalloc)} }; @@ -190,7 +195,10 @@ static const ctl_node_t opt_node[] = { {NAME("stats_print"), CTL(opt_stats_print)}, {NAME("junk"), CTL(opt_junk)}, {NAME("zero"), CTL(opt_zero)}, + {NAME("quarantine"), CTL(opt_quarantine)}, + {NAME("redzone"), CTL(opt_redzone)}, {NAME("utrace"), CTL(opt_utrace)}, + {NAME("valgrind"), CTL(opt_valgrind)}, {NAME("xmalloc"), CTL(opt_xmalloc)}, {NAME("tcache"), CTL(opt_tcache)}, {NAME("lg_tcache_max"), CTL(opt_lg_tcache_max)}, @@ -1050,7 +1058,8 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, /* Set new arena association. */ if (config_tcache) { tcache_t *tcache; - if ((tcache = *tcache_tsd_get()) != NULL) { + if ((uintptr_t)(tcache = *tcache_tsd_get()) > + (uintptr_t)TCACHE_STATE_MAX) { tcache_arena_dissociate(tcache); tcache_arena_associate(tcache, arena); } @@ -1085,6 +1094,7 @@ CTL_RO_BOOL_CONFIG_GEN(config_stats) CTL_RO_BOOL_CONFIG_GEN(config_tcache) CTL_RO_BOOL_CONFIG_GEN(config_tls) CTL_RO_BOOL_CONFIG_GEN(config_utrace) +CTL_RO_BOOL_CONFIG_GEN(config_valgrind) CTL_RO_BOOL_CONFIG_GEN(config_xmalloc) /******************************************************************************/ @@ -1096,7 +1106,10 @@ CTL_RO_NL_GEN(opt_lg_dirty_mult, opt_lg_dirty_mult, ssize_t) CTL_RO_NL_GEN(opt_stats_print, opt_stats_print, bool) CTL_RO_NL_CGEN(config_fill, opt_junk, opt_junk, bool) CTL_RO_NL_CGEN(config_fill, opt_zero, opt_zero, bool) +CTL_RO_NL_CGEN(config_fill, opt_quarantine, opt_quarantine, size_t) +CTL_RO_NL_CGEN(config_fill, opt_redzone, opt_redzone, bool) CTL_RO_NL_CGEN(config_utrace, opt_utrace, opt_utrace, bool) +CTL_RO_NL_CGEN(config_valgrind, opt_valgrind, opt_valgrind, bool) CTL_RO_NL_CGEN(config_xmalloc, opt_xmalloc, opt_xmalloc, bool) CTL_RO_NL_CGEN(config_tcache, opt_tcache, opt_tcache, bool) CTL_RO_NL_CGEN(config_tcache, opt_lg_tcache_max, opt_lg_tcache_max, ssize_t) diff --git a/src/huge.c b/src/huge.c index 43c8f3b0..daf0c622 100644 --- a/src/huge.c +++ b/src/huge.c @@ -174,7 +174,7 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, #endif { memcpy(ret, ptr, copysize); - idalloc(ptr); + iqalloc(ptr); } return (ret); } diff --git a/src/jemalloc.c b/src/jemalloc.c index cde998c4..237dd589 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -14,14 +14,19 @@ const char *je_malloc_conf JEMALLOC_ATTR(visibility("default")); bool opt_abort = true; # ifdef JEMALLOC_FILL bool opt_junk = true; +bool opt_redzone = true; # else bool opt_junk = false; +bool opt_redzone = false; # endif #else bool opt_abort = false; bool opt_junk = false; +bool opt_redzone = false; #endif +size_t opt_quarantine = ZU(0); bool opt_utrace = false; +bool opt_valgrind = false; bool opt_xmalloc = false; bool opt_zero = false; size_t opt_narenas = 0; @@ -419,7 +424,7 @@ malloc_conf_init(void) while (*opts != '\0' && malloc_conf_next(&opts, &k, &klen, &v, &vlen) == false) { -#define CONF_HANDLE_BOOL(o, n) \ +#define CONF_HANDLE_BOOL_HIT(o, n, hit) \ if (sizeof(#n)-1 == klen && strncmp(#n, k, \ klen) == 0) { \ if (strncmp("true", v, vlen) == 0 && \ @@ -433,12 +438,19 @@ malloc_conf_init(void) "Invalid conf value", \ k, klen, v, vlen); \ } \ + hit = true; \ + } else \ + hit = false; +#define CONF_HANDLE_BOOL(o, n) { \ + bool hit; \ + CONF_HANDLE_BOOL_HIT(o, n, hit); \ + if (hit) \ continue; \ - } +} #define CONF_HANDLE_SIZE_T(o, n, min, max) \ if (sizeof(#n)-1 == klen && strncmp(#n, k, \ klen) == 0) { \ - uintmax_t um; \ + uintmax_t um; \ char *end; \ \ errno = 0; \ @@ -502,11 +514,30 @@ malloc_conf_init(void) CONF_HANDLE_BOOL(opt_stats_print, stats_print) if (config_fill) { CONF_HANDLE_BOOL(opt_junk, junk) + CONF_HANDLE_SIZE_T(opt_quarantine, quarantine, + 0, SIZE_T_MAX) + CONF_HANDLE_BOOL(opt_redzone, redzone) CONF_HANDLE_BOOL(opt_zero, zero) } if (config_utrace) { CONF_HANDLE_BOOL(opt_utrace, utrace) } + if (config_valgrind) { + bool hit; + CONF_HANDLE_BOOL_HIT(opt_valgrind, + valgrind, hit) + if (config_fill && opt_valgrind && hit) { + opt_junk = false; + opt_zero = false; + if (opt_quarantine == 0) { + opt_quarantine = + JEMALLOC_VALGRIND_QUARANTINE_DEFAULT; + } + opt_redzone = true; + } + if (hit) + continue; + } if (config_xmalloc) { CONF_HANDLE_BOOL(opt_xmalloc, xmalloc) } @@ -662,6 +693,11 @@ malloc_init_hard(void) return (true); } + if (config_fill && quarantine_boot()) { + malloc_mutex_unlock(&init_lock); + return (true); + } + if (config_prof && prof_boot2()) { malloc_mutex_unlock(&init_lock); return (true); @@ -763,7 +799,7 @@ je_malloc(size_t size) } else ret = imalloc(size); } else { - if (config_stats) + if (config_stats || (config_valgrind && opt_valgrind)) usize = s2u(size); ret = imalloc(size); } @@ -780,10 +816,11 @@ label_oom: if (config_prof && opt_prof && ret != NULL) prof_malloc(ret, usize, cnt); if (config_stats && ret != NULL) { - assert(usize == isalloc(ret)); + assert(usize == isalloc(ret, config_prof)); thread_allocated_tsd_get()->allocated += usize; } UTRACE(0, size, ret); + JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, usize, false); return (ret); } @@ -872,7 +909,7 @@ imemalign(void **memptr, size_t alignment, size_t size, label_return: if (config_stats && result != NULL) { - assert(usize == isalloc(result)); + assert(usize == isalloc(result, config_prof)); thread_allocated_tsd_get()->allocated += usize; } if (config_prof && opt_prof && result != NULL) @@ -886,8 +923,10 @@ JEMALLOC_ATTR(visibility("default")) int je_posix_memalign(void **memptr, size_t alignment, size_t size) { - - return imemalign(memptr, alignment, size, sizeof(void *)); + int ret = imemalign(memptr, alignment, size, sizeof(void *)); + JEMALLOC_VALGRIND_MALLOC(ret == 0, *memptr, isalloc(*memptr, + config_prof), false); + return (ret); } JEMALLOC_ATTR(malloc) @@ -902,6 +941,8 @@ je_aligned_alloc(size_t alignment, size_t size) ret = NULL; errno = err; } + JEMALLOC_VALGRIND_MALLOC(err == 0, ret, isalloc(ret, config_prof), + false); return (ret); } @@ -956,7 +997,7 @@ je_calloc(size_t num, size_t size) } else ret = icalloc(num_size); } else { - if (config_stats) + if (config_stats || (config_valgrind && opt_valgrind)) usize = s2u(num_size); ret = icalloc(num_size); } @@ -974,10 +1015,11 @@ label_return: if (config_prof && opt_prof && ret != NULL) prof_malloc(ret, usize, cnt); if (config_stats && ret != NULL) { - assert(usize == isalloc(ret)); + assert(usize == isalloc(ret, config_prof)); thread_allocated_tsd_get()->allocated += usize; } UTRACE(0, num_size, ret); + JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, usize, true); return (ret); } @@ -988,19 +1030,30 @@ je_realloc(void *ptr, size_t size) void *ret; size_t usize; size_t old_size = 0; + size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0); prof_thr_cnt_t *cnt JEMALLOC_CC_SILENCE_INIT(NULL); prof_ctx_t *old_ctx JEMALLOC_CC_SILENCE_INIT(NULL); if (size == 0) { if (ptr != NULL) { /* realloc(ptr, 0) is equivalent to free(p). */ - if (config_prof || config_stats) - old_size = isalloc(ptr); + if (config_prof) { + old_size = isalloc(ptr, true); + if (config_valgrind && opt_valgrind) + old_rzsize = p2rz(ptr); + } else if (config_stats) { + old_size = isalloc(ptr, false); + if (config_valgrind && opt_valgrind) + old_rzsize = u2rz(old_size); + } else if (config_valgrind && opt_valgrind) { + old_size = isalloc(ptr, false); + old_rzsize = u2rz(old_size); + } if (config_prof && opt_prof) { old_ctx = prof_ctx_get(ptr); cnt = NULL; } - idalloc(ptr); + iqalloc(ptr); ret = NULL; goto label_return; } else @@ -1010,8 +1063,18 @@ je_realloc(void *ptr, size_t size) if (ptr != NULL) { assert(malloc_initialized || IS_INITIALIZER); - if (config_prof || config_stats) - old_size = isalloc(ptr); + if (config_prof) { + old_size = isalloc(ptr, true); + if (config_valgrind && opt_valgrind) + old_rzsize = p2rz(ptr); + } else if (config_stats) { + old_size = isalloc(ptr, false); + if (config_valgrind && opt_valgrind) + old_rzsize = u2rz(old_size); + } else if (config_valgrind && opt_valgrind) { + old_size = isalloc(ptr, false); + old_rzsize = u2rz(old_size); + } if (config_prof && opt_prof) { usize = s2u(size); old_ctx = prof_ctx_get(ptr); @@ -1035,7 +1098,7 @@ je_realloc(void *ptr, size_t size) old_ctx = NULL; } } else { - if (config_stats) + if (config_stats || (config_valgrind && opt_valgrind)) usize = s2u(size); ret = iralloc(ptr, size, 0, 0, false, false); } @@ -1076,7 +1139,8 @@ label_oom: ret = imalloc(size); } } else { - if (config_stats) + if (config_stats || (config_valgrind && + opt_valgrind)) usize = s2u(size); ret = imalloc(size); } @@ -1097,12 +1161,13 @@ label_return: prof_realloc(ret, usize, cnt, old_size, old_ctx); if (config_stats && ret != NULL) { thread_allocated_t *ta; - assert(usize == isalloc(ret)); + assert(usize == isalloc(ret, config_prof)); ta = thread_allocated_tsd_get(); ta->allocated += usize; ta->deallocated += old_size; } UTRACE(ptr, size, ret); + JEMALLOC_VALGRIND_REALLOC(ret, usize, ptr, old_size, old_rzsize, false); return (ret); } @@ -1114,18 +1179,21 @@ je_free(void *ptr) UTRACE(ptr, 0, 0); if (ptr != NULL) { size_t usize; + size_t rzsize JEMALLOC_CC_SILENCE_INIT(0); assert(malloc_initialized || IS_INITIALIZER); if (config_prof && opt_prof) { - usize = isalloc(ptr); + usize = isalloc(ptr, config_prof); prof_free(ptr, usize); - } else if (config_stats) { - usize = isalloc(ptr); - } + } else if (config_stats || config_valgrind) + usize = isalloc(ptr, config_prof); if (config_stats) thread_allocated_tsd_get()->deallocated += usize; - idalloc(ptr); + if (config_valgrind && opt_valgrind) + rzsize = p2rz(ptr); + iqalloc(ptr); + JEMALLOC_VALGRIND_FREE(ptr, rzsize); } } @@ -1145,6 +1213,7 @@ je_memalign(size_t alignment, size_t size) { void *ret JEMALLOC_CC_SILENCE_INIT(NULL); imemalign(&ret, alignment, size, 1); + JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, size, false); return (ret); } #endif @@ -1157,6 +1226,7 @@ je_valloc(size_t size) { void *ret JEMALLOC_CC_SILENCE_INIT(NULL); imemalign(&ret, PAGE, size, 1); + JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, size, false); return (ret); } #endif @@ -1209,9 +1279,9 @@ je_malloc_usable_size(const void *ptr) assert(malloc_initialized || IS_INITIALIZER); if (config_ivsalloc) - ret = ivsalloc(ptr); + ret = ivsalloc(ptr, config_prof); else - ret = (ptr != NULL) ? isalloc(ptr) : 0; + ret = (ptr != NULL) ? isalloc(ptr, config_prof) : 0; return (ret); } @@ -1336,10 +1406,11 @@ je_allocm(void **ptr, size_t *rsize, size_t size, int flags) *ptr = p; if (config_stats) { - assert(usize == isalloc(p)); + assert(usize == isalloc(p, config_prof)); thread_allocated_tsd_get()->allocated += usize; } UTRACE(0, size, p); + JEMALLOC_VALGRIND_MALLOC(true, p, usize, zero); return (ALLOCM_SUCCESS); label_oom: if (config_xmalloc && opt_xmalloc) { @@ -1360,6 +1431,7 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) void *p, *q; size_t usize; size_t old_size; + size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0); size_t alignment = (ZU(1) << (flags & ALLOCM_LG_ALIGN_MASK) & (SIZE_T_MAX-1)); bool zero = flags & ALLOCM_ZERO; @@ -1384,7 +1456,9 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) size_t max_usize = (alignment == 0) ? s2u(size+extra) : sa2u(size+extra, alignment, NULL); prof_ctx_t *old_ctx = prof_ctx_get(p); - old_size = isalloc(p); + old_size = isalloc(p, true); + if (config_valgrind && opt_valgrind) + old_rzsize = p2rz(p); PROF_ALLOC_PREP(1, max_usize, cnt); if (cnt == NULL) goto label_oom; @@ -1403,27 +1477,33 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) usize = max_usize; arena_prof_promoted(q, usize); } else - usize = isalloc(q); + usize = isalloc(q, config_prof); } else { q = iralloc(p, size, extra, alignment, zero, no_move); if (q == NULL) goto label_err; - usize = isalloc(q); + usize = isalloc(q, config_prof); } prof_realloc(q, usize, cnt, old_size, old_ctx); if (rsize != NULL) *rsize = usize; } else { - if (config_stats) - old_size = isalloc(p); + if (config_stats) { + old_size = isalloc(p, false); + if (config_valgrind && opt_valgrind) + old_rzsize = u2rz(old_size); + } else if (config_valgrind && opt_valgrind) { + old_size = isalloc(p, false); + old_rzsize = u2rz(old_size); + } q = iralloc(p, size, extra, alignment, zero, no_move); if (q == NULL) goto label_err; if (config_stats) - usize = isalloc(q); + usize = isalloc(q, config_prof); if (rsize != NULL) { if (config_stats == false) - usize = isalloc(q); + usize = isalloc(q, config_prof); *rsize = usize; } } @@ -1436,6 +1516,7 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) ta->deallocated += old_size; } UTRACE(p, size, q); + JEMALLOC_VALGRIND_REALLOC(q, usize, p, old_size, old_rzsize, zero); return (ALLOCM_SUCCESS); label_err: if (no_move) { @@ -1462,10 +1543,10 @@ je_sallocm(const void *ptr, size_t *rsize, int flags) assert(malloc_initialized || IS_INITIALIZER); if (config_ivsalloc) - sz = ivsalloc(ptr); + sz = ivsalloc(ptr, config_prof); else { assert(ptr != NULL); - sz = isalloc(ptr); + sz = isalloc(ptr, config_prof); } assert(rsize != NULL); *rsize = sz; @@ -1479,21 +1560,25 @@ int je_dallocm(void *ptr, int flags) { size_t usize; + size_t rzsize JEMALLOC_CC_SILENCE_INIT(0); assert(ptr != NULL); assert(malloc_initialized || IS_INITIALIZER); UTRACE(ptr, 0, 0); - if (config_stats) - usize = isalloc(ptr); + if (config_stats || config_valgrind) + usize = isalloc(ptr, config_prof); if (config_prof && opt_prof) { - if (config_stats == false) - usize = isalloc(ptr); + if (config_stats == false && config_valgrind == false) + usize = isalloc(ptr, config_prof); prof_free(ptr, usize); } if (config_stats) thread_allocated_tsd_get()->deallocated += usize; - idalloc(ptr); + if (config_valgrind && opt_valgrind) + rzsize = p2rz(ptr); + iqalloc(ptr); + JEMALLOC_VALGRIND_FREE(ptr, rzsize); return (ALLOCM_SUCCESS); } diff --git a/src/quarantine.c b/src/quarantine.c new file mode 100644 index 00000000..89a25c6a --- /dev/null +++ b/src/quarantine.c @@ -0,0 +1,163 @@ +#include "jemalloc/internal/jemalloc_internal.h" + +/******************************************************************************/ +/* Data. */ + +typedef struct quarantine_s quarantine_t; + +struct quarantine_s { + size_t curbytes; + size_t curobjs; + size_t first; +#define LG_MAXOBJS_INIT 10 + size_t lg_maxobjs; + void *objs[1]; /* Dynamically sized ring buffer. */ +}; + +static void quarantine_cleanup(void *arg); + +malloc_tsd_data(static, quarantine, quarantine_t *, NULL) +malloc_tsd_funcs(JEMALLOC_INLINE, quarantine, quarantine_t *, NULL, + quarantine_cleanup) + +/******************************************************************************/ +/* Function prototypes for non-inline static functions. */ + +static quarantine_t *quarantine_init(size_t lg_maxobjs); +static quarantine_t *quarantine_grow(quarantine_t *quarantine); +static void quarantine_drain(quarantine_t *quarantine, size_t upper_bound); + +/******************************************************************************/ + +static quarantine_t * +quarantine_init(size_t lg_maxobjs) +{ + quarantine_t *quarantine; + + quarantine = (quarantine_t *)imalloc(offsetof(quarantine_t, objs) + + ((ZU(1) << lg_maxobjs) * sizeof(void *))); + if (quarantine == NULL) + return (NULL); + quarantine->curbytes = 0; + quarantine->curobjs = 0; + quarantine->first = 0; + quarantine->lg_maxobjs = lg_maxobjs; + + quarantine_tsd_set(&quarantine); + + return (quarantine); +} + +static quarantine_t * +quarantine_grow(quarantine_t *quarantine) +{ + quarantine_t *ret; + + ret = quarantine_init(quarantine->lg_maxobjs + 1); + if (ret == NULL) + return (quarantine); + + ret->curbytes = quarantine->curbytes; + if (quarantine->first + quarantine->curobjs < (ZU(1) << + quarantine->lg_maxobjs)) { + /* objs ring buffer data are contiguous. */ + memcpy(ret->objs, &quarantine->objs[quarantine->first], + quarantine->curobjs * sizeof(void *)); + ret->curobjs = quarantine->curobjs; + } else { + /* objs ring buffer data wrap around. */ + size_t ncopy = (ZU(1) << quarantine->lg_maxobjs) - + quarantine->first; + memcpy(ret->objs, &quarantine->objs[quarantine->first], ncopy * + sizeof(void *)); + ret->curobjs = ncopy; + if (quarantine->curobjs != 0) { + memcpy(&ret->objs[ret->curobjs], quarantine->objs, + quarantine->curobjs - ncopy); + } + } + + return (ret); +} + +static void +quarantine_drain(quarantine_t *quarantine, size_t upper_bound) +{ + + while (quarantine->curbytes > upper_bound && quarantine->curobjs > 0) { + void *ptr = quarantine->objs[quarantine->first]; + size_t usize = isalloc(ptr, config_prof); + idalloc(ptr); + quarantine->curbytes -= usize; + quarantine->curobjs--; + quarantine->first = (quarantine->first + 1) & ((ZU(1) << + quarantine->lg_maxobjs) - 1); + } +} + +void +quarantine(void *ptr) +{ + quarantine_t *quarantine; + size_t usize = isalloc(ptr, config_prof); + + assert(config_fill); + assert(opt_quarantine); + + quarantine = *quarantine_tsd_get(); + if (quarantine == NULL && (quarantine = + quarantine_init(LG_MAXOBJS_INIT)) == NULL) { + idalloc(ptr); + return; + } + /* + * Drain one or more objects if the quarantine size limit would be + * exceeded by appending ptr. + */ + if (quarantine->curbytes + usize > opt_quarantine) { + size_t upper_bound = (opt_quarantine >= usize) ? opt_quarantine + - usize : 0; + quarantine_drain(quarantine, upper_bound); + } + /* Grow the quarantine ring buffer if it's full. */ + if (quarantine->curobjs == (ZU(1) << quarantine->lg_maxobjs)) + quarantine = quarantine_grow(quarantine); + /* quarantine_grow() must free a slot if it fails to grow. */ + assert(quarantine->curobjs < (ZU(1) << quarantine->lg_maxobjs)); + /* Append ptr if its size doesn't exceed the quarantine size. */ + if (quarantine->curbytes + usize <= opt_quarantine) { + size_t offset = (quarantine->first + quarantine->curobjs) & + ((ZU(1) << quarantine->lg_maxobjs) - 1); + quarantine->objs[offset] = ptr; + quarantine->curbytes += usize; + quarantine->curobjs++; + if (opt_junk) + memset(ptr, 0x5a, usize); + } else { + assert(quarantine->curbytes == 0); + idalloc(ptr); + } +} + +static void +quarantine_cleanup(void *arg) +{ + quarantine_t *quarantine = *(quarantine_t **)arg; + + if (quarantine != NULL) { + quarantine_drain(quarantine, 0); + idalloc(quarantine); + } +} + +bool +quarantine_boot(void) +{ + + assert(config_fill); + + if (quarantine_tsd_boot()) + return (true); + + return (false); +} diff --git a/src/stats.c b/src/stats.c index 0cd70b0d..4cad214f 100644 --- a/src/stats.c +++ b/src/stats.c @@ -382,8 +382,11 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, OPT_WRITE_SSIZE_T(lg_dirty_mult) OPT_WRITE_BOOL(stats_print) OPT_WRITE_BOOL(junk) + OPT_WRITE_SIZE_T(quarantine) + OPT_WRITE_BOOL(redzone) OPT_WRITE_BOOL(zero) OPT_WRITE_BOOL(utrace) + OPT_WRITE_BOOL(valgrind) OPT_WRITE_BOOL(xmalloc) OPT_WRITE_BOOL(tcache) OPT_WRITE_SSIZE_T(lg_tcache_max) diff --git a/src/tcache.c b/src/tcache.c index 99a657b6..be26b59c 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -75,6 +75,10 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem, (uintptr_t)chunk) >> LG_PAGE; arena_chunk_map_t *mapelm = &chunk->map[pageind-map_bias]; + if (config_fill && opt_junk) { + arena_alloc_junk_small(ptr, + &arena_bin_info[binind], true); + } arena_dalloc_bin(arena, chunk, ptr, mapelm); } else { /* @@ -298,7 +302,7 @@ tcache_destroy(tcache_t *tcache) malloc_mutex_unlock(&tcache->arena->lock); } - tcache_size = arena_salloc(tcache); + tcache_size = arena_salloc(tcache, false); if (tcache_size <= SMALL_MAXCLASS) { arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache); arena_t *arena = chunk->arena; diff --git a/src/zone.c b/src/zone.c index a50c129c..cde5d49a 100644 --- a/src/zone.c +++ b/src/zone.c @@ -56,7 +56,7 @@ zone_size(malloc_zone_t *zone, void *ptr) * not work in practice, we must check all pointers to assure that they * reside within a mapped chunk before determining size. */ - return (ivsalloc(ptr)); + return (ivsalloc(ptr, config_prof)); } static void * @@ -87,7 +87,7 @@ static void zone_free(malloc_zone_t *zone, void *ptr) { - if (ivsalloc(ptr) != 0) { + if (ivsalloc(ptr, config_prof) != 0) { je_free(ptr); return; } @@ -99,7 +99,7 @@ static void * zone_realloc(malloc_zone_t *zone, void *ptr, size_t size) { - if (ivsalloc(ptr) != 0) + if (ivsalloc(ptr, config_prof) != 0) return (je_realloc(ptr, size)); return (realloc(ptr, size)); @@ -122,8 +122,8 @@ static void zone_free_definite_size(malloc_zone_t *zone, void *ptr, size_t size) { - if (ivsalloc(ptr) != 0) { - assert(ivsalloc(ptr) == size); + if (ivsalloc(ptr, config_prof) != 0) { + assert(ivsalloc(ptr, config_prof) == size); je_free(ptr); return; } From 5ff709c264e52651de25b788692c62ff1f6f389c Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 11 Apr 2012 18:13:45 -0700 Subject: [PATCH 0141/3378] Normalize aligned allocation algorithms. Normalize arena_palloc(), chunk_alloc_mmap_slow(), and chunk_recycle_dss() to use the same algorithm for trimming over-allocation. Add the ALIGNMENT_ADDR2BASE(), ALIGNMENT_ADDR2OFFSET(), and ALIGNMENT_CEILING() macros, and use them where appropriate. Remove the run_size_p parameter from sa2u(). Fix a potential deadlock in chunk_recycle_dss() that was introduced by eae269036c9f702d9fa9be497a1a2aa1be13a29e (Add alignment support to chunk_alloc()). --- include/jemalloc/internal/arena.h | 3 +- .../jemalloc/internal/jemalloc_internal.h.in | 65 ++++++------- include/jemalloc/internal/private_namespace.h | 1 - src/arena.c | 52 +++++----- src/chunk_dss.c | 94 +++++++++++-------- src/chunk_mmap.c | 50 ++++------ src/ckh.c | 6 +- src/jemalloc.c | 22 ++--- 8 files changed, 138 insertions(+), 155 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index f52fac42..3790818c 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -407,8 +407,7 @@ void arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info, void arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info); void *arena_malloc_small(arena_t *arena, size_t size, bool zero); void *arena_malloc_large(arena_t *arena, size_t size, bool zero); -void *arena_palloc(arena_t *arena, size_t size, size_t alloc_size, - size_t alignment, bool zero); +void *arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero); size_t arena_salloc(const void *ptr, bool demote); void arena_prof_promoted(const void *ptr, size_t size); void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index a16e5e27..57895fb3 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -293,6 +293,18 @@ static const bool config_ivsalloc = #define PAGE_CEILING(s) \ (((s) + PAGE_MASK) & ~PAGE_MASK) +/* Return the nearest aligned address at or below a. */ +#define ALIGNMENT_ADDR2BASE(a, alignment) \ + ((void *)((uintptr_t)(a) & (-(alignment)))) + +/* Return the offset between a and the nearest aligned address at or below a. */ +#define ALIGNMENT_ADDR2OFFSET(a, alignment) \ + ((size_t)((uintptr_t)(a) & (alignment - 1))) + +/* Return the smallest alignment multiple that is >= s. */ +#define ALIGNMENT_CEILING(s, alignment) \ + (((s) + (alignment - 1)) & (-(alignment))) + #ifdef JEMALLOC_VALGRIND /* * The JEMALLOC_VALGRIND_*() macros must be macros rather than functions @@ -499,7 +511,7 @@ void jemalloc_postfork_child(void); malloc_tsd_protos(JEMALLOC_ATTR(unused), arenas, arena_t *) size_t s2u(size_t size); -size_t sa2u(size_t size, size_t alignment, size_t *run_size_p); +size_t sa2u(size_t size, size_t alignment); arena_t *choose_arena(arena_t *arena); #endif @@ -531,10 +543,12 @@ s2u(size_t size) * specified size and alignment. */ JEMALLOC_INLINE size_t -sa2u(size_t size, size_t alignment, size_t *run_size_p) +sa2u(size_t size, size_t alignment) { size_t usize; + assert(alignment != 0 && ((alignment - 1) & alignment) == 0); + /* * Round size up to the nearest multiple of alignment. * @@ -549,7 +563,7 @@ sa2u(size_t size, size_t alignment, size_t *run_size_p) * 144 | 10100000 | 32 * 192 | 11000000 | 64 */ - usize = (size + (alignment - 1)) & (-alignment); + usize = ALIGNMENT_CEILING(size, alignment); /* * (usize < size) protects against the combination of maximal * alignment and size greater than maximal alignment. @@ -592,24 +606,10 @@ sa2u(size_t size, size_t alignment, size_t *run_size_p) /* * Calculate the size of the over-size run that arena_palloc() * would need to allocate in order to guarantee the alignment. + * If the run wouldn't fit within a chunk, round up to a huge + * allocation size. */ - if (usize >= alignment) - run_size = usize + alignment - PAGE; - else { - /* - * It is possible that (alignment << 1) will cause - * overflow, but it doesn't matter because we also - * subtract PAGE, which in the case of overflow leaves - * us with a very large run_size. That causes the - * first conditional below to fail, which means that - * the bogus run_size value never gets used for - * anything important. - */ - run_size = (alignment << 1) - PAGE; - } - if (run_size_p != NULL) - *run_size_p = run_size; - + run_size = usize + alignment - PAGE; if (run_size <= arena_maxclass) return (PAGE_CEILING(usize)); return (CHUNK_CEILING(usize)); @@ -685,32 +685,21 @@ ipalloc(size_t usize, size_t alignment, bool zero) void *ret; assert(usize != 0); - assert(usize == sa2u(usize, alignment, NULL)); + assert(usize == sa2u(usize, alignment)); if (usize <= arena_maxclass && alignment <= PAGE) ret = arena_malloc(NULL, usize, zero, true); else { - size_t run_size JEMALLOC_CC_SILENCE_INIT(0); - - /* - * Ideally we would only ever call sa2u() once per aligned - * allocation request, and the caller of this function has - * already done so once. However, it's rather burdensome to - * require every caller to pass in run_size, especially given - * that it's only relevant to large allocations. Therefore, - * just call it again here in order to get run_size. - */ - sa2u(usize, alignment, &run_size); - if (run_size <= arena_maxclass) { - ret = arena_palloc(choose_arena(NULL), usize, run_size, - alignment, zero); + if (usize <= arena_maxclass) { + ret = arena_palloc(choose_arena(NULL), usize, alignment, + zero); } else if (alignment <= chunksize) ret = huge_malloc(usize, zero); else ret = huge_palloc(usize, alignment, zero); } - assert(((uintptr_t)ret & (alignment - 1)) == 0); + assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret); return (ret); } @@ -818,7 +807,7 @@ iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, */ if (no_move) return (NULL); - usize = sa2u(size + extra, alignment, NULL); + usize = sa2u(size + extra, alignment); if (usize == 0) return (NULL); ret = ipalloc(usize, alignment, zero); @@ -826,7 +815,7 @@ iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, if (extra == 0) return (NULL); /* Try again, without extra this time. */ - usize = sa2u(size, alignment, NULL); + usize = sa2u(size, alignment); if (usize == 0) return (NULL); ret = ipalloc(usize, alignment, zero); diff --git a/include/jemalloc/internal/private_namespace.h b/include/jemalloc/internal/private_namespace.h index a962192c..fca65950 100644 --- a/include/jemalloc/internal/private_namespace.h +++ b/include/jemalloc/internal/private_namespace.h @@ -55,7 +55,6 @@ #define chunk_alloc JEMALLOC_N(chunk_alloc) #define chunk_alloc_dss JEMALLOC_N(chunk_alloc_dss) #define chunk_alloc_mmap JEMALLOC_N(chunk_alloc_mmap) -#define chunk_alloc_mmap_noreserve JEMALLOC_N(chunk_alloc_mmap_noreserve) #define chunk_boot JEMALLOC_N(chunk_boot) #define chunk_dealloc JEMALLOC_N(chunk_dealloc) #define chunk_dealloc_dss JEMALLOC_N(chunk_dealloc_dss) diff --git a/src/arena.c b/src/arena.c index 1d4f61ea..1a108db5 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1418,48 +1418,38 @@ arena_malloc_large(arena_t *arena, size_t size, bool zero) /* Only handles large allocations that require more than page alignment. */ void * -arena_palloc(arena_t *arena, size_t size, size_t alloc_size, size_t alignment, - bool zero) +arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero) { void *ret; - size_t offset; + size_t alloc_size, leadsize, trailsize; + arena_run_t *run; arena_chunk_t *chunk; assert((size & PAGE_MASK) == 0); alignment = PAGE_CEILING(alignment); + alloc_size = size + alignment - PAGE; malloc_mutex_lock(&arena->lock); - ret = (void *)arena_run_alloc(arena, alloc_size, true, zero); - if (ret == NULL) { + run = arena_run_alloc(arena, alloc_size, true, zero); + if (run == NULL) { malloc_mutex_unlock(&arena->lock); return (NULL); } + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret); - - offset = (uintptr_t)ret & (alignment - 1); - assert((offset & PAGE_MASK) == 0); - assert(offset < alloc_size); - if (offset == 0) - arena_run_trim_tail(arena, chunk, ret, alloc_size, size, false); - else { - size_t leadsize, trailsize; - - leadsize = alignment - offset; - if (leadsize > 0) { - arena_run_trim_head(arena, chunk, ret, alloc_size, - alloc_size - leadsize); - ret = (void *)((uintptr_t)ret + leadsize); - } - - trailsize = alloc_size - leadsize - size; - if (trailsize != 0) { - /* Trim trailing space. */ - assert(trailsize < alloc_size); - arena_run_trim_tail(arena, chunk, ret, size + trailsize, - size, false); - } + leadsize = ALIGNMENT_CEILING((uintptr_t)run, alignment) - + (uintptr_t)run; + assert(alloc_size >= leadsize + size); + trailsize = alloc_size - leadsize - size; + ret = (void *)((uintptr_t)run + leadsize); + if (leadsize != 0) { + arena_run_trim_head(arena, chunk, run, alloc_size, alloc_size - + leadsize); + } + if (trailsize != 0) { + arena_run_trim_tail(arena, chunk, ret, size + trailsize, size, + false); } if (config_stats) { @@ -1950,7 +1940,7 @@ arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, * copying. */ if (alignment != 0) { - size_t usize = sa2u(size + extra, alignment, NULL); + size_t usize = sa2u(size + extra, alignment); if (usize == 0) return (NULL); ret = ipalloc(usize, alignment, zero); @@ -1962,7 +1952,7 @@ arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, return (NULL); /* Try again, this time without extra. */ if (alignment != 0) { - size_t usize = sa2u(size, alignment, NULL); + size_t usize = sa2u(size, alignment); if (usize == 0) return (NULL); ret = ipalloc(usize, alignment, zero); diff --git a/src/chunk_dss.c b/src/chunk_dss.c index ccd86b91..7c034092 100644 --- a/src/chunk_dss.c +++ b/src/chunk_dss.c @@ -36,51 +36,71 @@ static extent_node_t *chunk_dealloc_dss_record(void *chunk, size_t size); static void * chunk_recycle_dss(size_t size, size_t alignment, bool *zero) { - extent_node_t *node, key; + void *ret; + extent_node_t *node; + extent_node_t key; + size_t alloc_size, leadsize, trailsize; cassert(config_dss); + alloc_size = size + alignment - chunksize; + /* Beware size_t wrap-around. */ + if (alloc_size < size) + return (NULL); key.addr = NULL; - key.size = size + alignment - chunksize; + key.size = alloc_size; malloc_mutex_lock(&dss_mtx); node = extent_tree_szad_nsearch(&dss_chunks_szad, &key); - if (node != NULL) { - size_t offset = (size_t)((uintptr_t)(node->addr) & (alignment - - 1)); - void *ret; - if (offset > 0) - offset = alignment - offset; - ret = (void *)((uintptr_t)(node->addr) + offset); - - /* Remove node from the tree. */ - extent_tree_szad_remove(&dss_chunks_szad, node); - extent_tree_ad_remove(&dss_chunks_ad, node); - if (offset > 0) { - /* Insert the leading space as a smaller chunk. */ - node->size = offset; - extent_tree_szad_insert(&dss_chunks_szad, node); - extent_tree_ad_insert(&dss_chunks_ad, node); - } - if (alignment - chunksize > offset) { - if (offset > 0) - node = base_node_alloc(); - /* Insert the trailing space as a smaller chunk. */ - node->addr = (void *)((uintptr_t)(ret) + size); - node->size = alignment - chunksize - offset; - extent_tree_szad_insert(&dss_chunks_szad, node); - extent_tree_ad_insert(&dss_chunks_ad, node); - } else if (offset == 0) - base_node_dealloc(node); - + if (node == NULL) { malloc_mutex_unlock(&dss_mtx); - - if (*zero) - memset(ret, 0, size); - return (ret); + return (NULL); + } + leadsize = ALIGNMENT_CEILING((uintptr_t)node->addr, alignment) - + (uintptr_t)node->addr; + assert(alloc_size >= leadsize + size); + trailsize = alloc_size - leadsize - size; + ret = (void *)((uintptr_t)node->addr + leadsize); + /* Remove node from the tree. */ + extent_tree_szad_remove(&dss_chunks_szad, node); + extent_tree_ad_remove(&dss_chunks_ad, node); + if (leadsize != 0) { + /* Insert the leading space as a smaller chunk. */ + node->size = leadsize; + extent_tree_szad_insert(&dss_chunks_szad, node); + extent_tree_ad_insert(&dss_chunks_ad, node); + node = NULL; + } + if (trailsize != 0) { + /* Insert the trailing space as a smaller chunk. */ + if (node == NULL) { + /* + * An additional node is required, but + * base_node_alloc() can cause a new base chunk to be + * allocated. Drop dss_mtx in order to avoid deadlock, + * and if node allocation fails, deallocate the result + * before returning an error. + */ + malloc_mutex_unlock(&dss_mtx); + node = base_node_alloc(); + if (node == NULL) { + chunk_dealloc_dss(ret, size); + return (NULL); + } + malloc_mutex_lock(&dss_mtx); + } + node->addr = (void *)((uintptr_t)(ret) + size); + node->size = trailsize; + extent_tree_szad_insert(&dss_chunks_szad, node); + extent_tree_ad_insert(&dss_chunks_ad, node); + node = NULL; } malloc_mutex_unlock(&dss_mtx); - return (NULL); + if (node != NULL) + base_node_dealloc(node); + if (*zero) + memset(ret, 0, size); + return (ret); } void * @@ -129,8 +149,8 @@ chunk_alloc_dss(size_t size, size_t alignment, bool *zero) * recycled for later use. */ cpad = (void *)((uintptr_t)dss_max + gap_size); - ret = (void *)(((uintptr_t)dss_max + (alignment - 1)) & - ~(alignment - 1)); + ret = (void *)ALIGNMENT_CEILING((uintptr_t)dss_max, + alignment); cpad_size = (uintptr_t)ret - (uintptr_t)cpad; dss_next = (void *)((uintptr_t)ret + size); if ((uintptr_t)ret < (uintptr_t)dss_max || diff --git a/src/chunk_mmap.c b/src/chunk_mmap.c index 37dad204..6cbf094a 100644 --- a/src/chunk_mmap.c +++ b/src/chunk_mmap.c @@ -19,7 +19,6 @@ static void *pages_map(void *addr, size_t size); static void pages_unmap(void *addr, size_t size); static void *chunk_alloc_mmap_slow(size_t size, size_t alignment, bool unaligned); -static void *chunk_alloc_mmap_internal(size_t size, size_t alignment); /******************************************************************************/ @@ -76,34 +75,28 @@ pages_unmap(void *addr, size_t size) static void * chunk_alloc_mmap_slow(size_t size, size_t alignment, bool unaligned) { - void *ret; - size_t offset; + void *ret, *pages; + size_t alloc_size, leadsize, trailsize; + alloc_size = size + alignment - PAGE; /* Beware size_t wrap-around. */ - if (size + chunksize <= size) + if (alloc_size < size) return (NULL); - - ret = pages_map(NULL, size + alignment); - if (ret == NULL) + pages = pages_map(NULL, alloc_size); + if (pages == NULL) return (NULL); - - /* Clean up unneeded leading/trailing space. */ - offset = (size_t)((uintptr_t)(ret) & (alignment - 1)); - if (offset != 0) { + leadsize = ALIGNMENT_CEILING((uintptr_t)pages, alignment) - + (uintptr_t)pages; + assert(alloc_size >= leadsize + size); + trailsize = alloc_size - leadsize - size; + ret = (void *)((uintptr_t)pages + leadsize); + if (leadsize != 0) { /* Note that mmap() returned an unaligned mapping. */ unaligned = true; - - /* Leading space. */ - pages_unmap(ret, alignment - offset); - - ret = (void *)((uintptr_t)ret + (alignment - offset)); - - /* Trailing space. */ - pages_unmap((void *)((uintptr_t)ret + size), offset); - } else { - /* Trailing space only. */ - pages_unmap((void *)((uintptr_t)ret + size), alignment); + pages_unmap(pages, leadsize); } + if (trailsize != 0) + pages_unmap((void *)((uintptr_t)ret + size), trailsize); /* * If mmap() returned an aligned mapping, reset mmap_unaligned so that @@ -118,8 +111,8 @@ chunk_alloc_mmap_slow(size_t size, size_t alignment, bool unaligned) return (ret); } -static void * -chunk_alloc_mmap_internal(size_t size, size_t alignment) +void * +chunk_alloc_mmap(size_t size, size_t alignment) { void *ret; @@ -158,7 +151,7 @@ chunk_alloc_mmap_internal(size_t size, size_t alignment) if (ret == NULL) return (NULL); - offset = (size_t)((uintptr_t)(ret) & (alignment - 1)); + offset = ALIGNMENT_ADDR2OFFSET(ret, alignment); if (offset != 0) { bool mu = true; mmap_unaligned_tsd_set(&mu); @@ -185,13 +178,6 @@ chunk_alloc_mmap_internal(size_t size, size_t alignment) return (ret); } -void * -chunk_alloc_mmap(size_t size, size_t alignment) -{ - - return (chunk_alloc_mmap_internal(size, alignment)); -} - void chunk_dealloc_mmap(void *chunk, size_t size) { diff --git a/src/ckh.c b/src/ckh.c index 169fc0d4..742a950b 100644 --- a/src/ckh.c +++ b/src/ckh.c @@ -264,7 +264,7 @@ ckh_grow(ckh_t *ckh) size_t usize; lg_curcells++; - usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE, NULL); + usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE); if (usize == 0) { ret = true; goto label_return; @@ -309,7 +309,7 @@ ckh_shrink(ckh_t *ckh) */ lg_prevbuckets = ckh->lg_curbuckets; lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS - 1; - usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE, NULL); + usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE); if (usize == 0) return; tab = (ckhc_t *)ipalloc(usize, CACHELINE, true); @@ -382,7 +382,7 @@ ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp) ckh->hash = hash; ckh->keycomp = keycomp; - usize = sa2u(sizeof(ckhc_t) << lg_mincells, CACHELINE, NULL); + usize = sa2u(sizeof(ckhc_t) << lg_mincells, CACHELINE); if (usize == 0) { ret = true; goto label_return; diff --git a/src/jemalloc.c b/src/jemalloc.c index 237dd589..8e10c556 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -862,7 +862,7 @@ imemalign(void **memptr, size_t alignment, size_t size, goto label_return; } - usize = sa2u(size, alignment, NULL); + usize = sa2u(size, alignment); if (usize == 0) { result = NULL; ret = ENOMEM; @@ -878,9 +878,9 @@ imemalign(void **memptr, size_t alignment, size_t size, if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <= SMALL_MAXCLASS) { assert(sa2u(SMALL_MAXCLASS+1, - alignment, NULL) != 0); + alignment) != 0); result = ipalloc(sa2u(SMALL_MAXCLASS+1, - alignment, NULL), alignment, false); + alignment), alignment, false); if (result != NULL) { arena_prof_promoted(result, usize); @@ -1343,8 +1343,8 @@ JEMALLOC_INLINE void * iallocm(size_t usize, size_t alignment, bool zero) { - assert(usize == ((alignment == 0) ? s2u(usize) : sa2u(usize, alignment, - NULL))); + assert(usize == ((alignment == 0) ? s2u(usize) : sa2u(usize, + alignment))); if (alignment != 0) return (ipalloc(usize, alignment, zero)); @@ -1372,7 +1372,7 @@ je_allocm(void **ptr, size_t *rsize, size_t size, int flags) if (malloc_init()) goto label_oom; - usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment, NULL); + usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment); if (usize == 0) goto label_oom; @@ -1384,7 +1384,7 @@ je_allocm(void **ptr, size_t *rsize, size_t size, int flags) SMALL_MAXCLASS) { size_t usize_promoted = (alignment == 0) ? s2u(SMALL_MAXCLASS+1) : sa2u(SMALL_MAXCLASS+1, - alignment, NULL); + alignment); assert(usize_promoted != 0); p = iallocm(usize_promoted, alignment, zero); if (p == NULL) @@ -1454,7 +1454,7 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) * decide whether to sample. */ size_t max_usize = (alignment == 0) ? s2u(size+extra) : - sa2u(size+extra, alignment, NULL); + sa2u(size+extra, alignment); prof_ctx_t *old_ctx = prof_ctx_get(p); old_size = isalloc(p, true); if (config_valgrind && opt_valgrind) @@ -1466,8 +1466,8 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) * Use minimum usize to determine whether promotion may happen. */ if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U - && ((alignment == 0) ? s2u(size) : sa2u(size, - alignment, NULL)) <= SMALL_MAXCLASS) { + && ((alignment == 0) ? s2u(size) : sa2u(size, alignment)) + <= SMALL_MAXCLASS) { q = iralloc(p, SMALL_MAXCLASS+1, (SMALL_MAXCLASS+1 >= size+extra) ? 0 : size+extra - (SMALL_MAXCLASS+1), alignment, zero, no_move); @@ -1596,7 +1596,7 @@ je_nallocm(size_t *rsize, size_t size, int flags) if (malloc_init()) return (ALLOCM_ERR_OOM); - usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment, NULL); + usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment); if (usize == 0) return (ALLOCM_ERR_OOM); From 83c324acd8bd5f32e0ce9b4d3df2f1a0ae46f487 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Thu, 12 Apr 2012 10:13:03 +0200 Subject: [PATCH 0142/3378] Use a stub replacement and disable dss when sbrk is not supported --- configure.ac | 8 ++++++++ include/jemalloc/jemalloc_defs.h.in | 3 +++ src/chunk_dss.c | 11 +++++++++++ 3 files changed, 22 insertions(+) diff --git a/configure.ac b/configure.ac index a272ecd0..739cfa9a 100644 --- a/configure.ac +++ b/configure.ac @@ -678,6 +678,14 @@ fi ], [enable_dss="0"] ) +dnl Check whether the BSD/SUSv1 sbrk() exists. If not, disable DSS support. +AC_CHECK_FUNC([sbrk], [have_sbrk="1"], [have_sbrk="0"]) +if test "x$have_sbrk" = "x1" ; then + AC_DEFINE([JEMALLOC_HAVE_SBRK], [ ]) +else + enable_dss="0" +fi + if test "x$enable_dss" = "x1" ; then AC_DEFINE([JEMALLOC_DSS], [ ]) fi diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index 7770a7af..28fe5e9a 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -108,6 +108,9 @@ # define JEMALLOC_ATTR(s) JEMALLOC_CATTR(s,) #endif +/* Defined if sbrk() is supported. */ +#undef JEMALLOC_HAVE_SBRK + /* Non-empty if the tls_model attribute is supported. */ #undef JEMALLOC_TLS_MODEL diff --git a/src/chunk_dss.c b/src/chunk_dss.c index 7c034092..5fb6a73d 100644 --- a/src/chunk_dss.c +++ b/src/chunk_dss.c @@ -3,6 +3,17 @@ /******************************************************************************/ /* Data. */ +#ifndef JEMALLOC_HAVE_SBRK +void * +sbrk(intptr_t increment) +{ + + not_implemented(); + + return (NULL); +} +#endif + /* * Protects sbrk() calls. This avoids malloc races among threads, though it * does not protect against races with threads that call sbrk() directly. From b8325f9cb031285585567cdeb1338aeca4185f6c Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Thu, 12 Apr 2012 15:15:35 +0200 Subject: [PATCH 0143/3378] Call base_boot before chunk_boot0 Chunk_boot0 calls rtree_new, which calls base_alloc, which locks the base_mtx mutex. That mutex is initialized in base_boot. --- src/jemalloc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/jemalloc.c b/src/jemalloc.c index 8e10c556..1622937d 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -626,12 +626,12 @@ malloc_init_hard(void) } } - if (chunk_boot0()) { + if (base_boot()) { malloc_mutex_unlock(&init_lock); return (true); } - if (base_boot()) { + if (chunk_boot0()) { malloc_mutex_unlock(&init_lock); return (true); } From 927893b4784f732a2b8006a0490293ab18d0c2cf Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Thu, 12 Apr 2012 17:21:58 +0200 Subject: [PATCH 0144/3378] Remove bogus dependency test/bitmap.c #includes src/bitmap.c, which is correctly detected by gcc -MM, but building test/bitmap.o doesn't require src/bitmap.o. --- Makefile.in | 3 --- 1 file changed, 3 deletions(-) diff --git a/Makefile.in b/Makefile.in index 8aa94253..3bdfeaf3 100644 --- a/Makefile.in +++ b/Makefile.in @@ -132,9 +132,6 @@ build_doc: $(DOCS) $(CC) $(CFLAGS) -c $(CPPFLAGS) -I@objroot@test -o $@ $< @$(SHELL) -ec "$(CC) -MM $(CPPFLAGS) -I@objroot@test $< | sed \"s/\($(subst /,\/,$(notdir $(basename $@)))\)\.o\([ :]*\)/$(subst /,\/,$(strip $(dir $@)))\1.o \2/g\" > $(@:%.o=%.d)" -# Automatic dependency generation misses #include "*.c". -@objroot@test/bitmap.o : @objroot@src/bitmap.o - @objroot@test/%: @objroot@test/%.o \ @objroot@lib/libjemalloc@install_suffix@.$(SO) @mkdir -p $(@D) From fd5c36466d0665b26bd96badc1416b61bd55802e Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Thu, 12 Apr 2012 17:19:20 +0200 Subject: [PATCH 0145/3378] Use -MT options to build dependency files --- Makefile.in | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile.in b/Makefile.in index 3bdfeaf3..59ac103f 100644 --- a/Makefile.in +++ b/Makefile.in @@ -104,12 +104,12 @@ build_doc: $(DOCS) @objroot@src/%.o: @srcroot@src/%.c @mkdir -p $(@D) $(CC) $(CFLAGS) -c $(CPPFLAGS) -o $@ $< - @$(SHELL) -ec "$(CC) -MM $(CPPFLAGS) $< | sed \"s/\($(subst /,\/,$(notdir $(basename $@)))\)\.o\([ :]*\)/$(subst /,\/,$(strip $(dir $@)))\1.o \2/g\" > $(@:%.o=%.d)" + $(CC) -MM $(CPPFLAGS) -MT $@ -o $(@:%.o=%.d) $< @objroot@src/%.pic.o: @srcroot@src/%.c @mkdir -p $(@D) $(CC) $(CFLAGS) -fPIC -DPIC -c $(CPPFLAGS) -o $@ $< - @$(SHELL) -ec "$(CC) -MM $(CPPFLAGS) $< | sed \"s/\($(subst /,\/,$(notdir $(basename $(basename $@))))\)\.o\([ :]*\)/$(subst /,\/,$(strip $(dir $@)))\1.pic.o \2/g\" > $(@:%.o=%.d)" + $(CC) -MM $(CPPFLAGS) -MT $@ -o $(@:%.o=%.d) $< %.$(SO) : %.$(SO).$(REV) @mkdir -p $(@D) @@ -130,7 +130,7 @@ build_doc: $(DOCS) @objroot@test/%.o: @srcroot@test/%.c @mkdir -p $(@D) $(CC) $(CFLAGS) -c $(CPPFLAGS) -I@objroot@test -o $@ $< - @$(SHELL) -ec "$(CC) -MM $(CPPFLAGS) -I@objroot@test $< | sed \"s/\($(subst /,\/,$(notdir $(basename $@)))\)\.o\([ :]*\)/$(subst /,\/,$(strip $(dir $@)))\1.o \2/g\" > $(@:%.o=%.d)" + $(CC) -MM $(CPPFLAGS) -I@objroot@test -MT $@ -o $(@:%.o=%.d) $< @objroot@test/%: @objroot@test/%.o \ @objroot@lib/libjemalloc@install_suffix@.$(SO) From c751b1c2b03db82a3041ffdd110cd90cb50aa34f Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 12 Apr 2012 17:08:45 -0700 Subject: [PATCH 0146/3378] Re-silence -MM compiler invocations. --- Makefile.in | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile.in b/Makefile.in index 59ac103f..8828d7f7 100644 --- a/Makefile.in +++ b/Makefile.in @@ -104,12 +104,12 @@ build_doc: $(DOCS) @objroot@src/%.o: @srcroot@src/%.c @mkdir -p $(@D) $(CC) $(CFLAGS) -c $(CPPFLAGS) -o $@ $< - $(CC) -MM $(CPPFLAGS) -MT $@ -o $(@:%.o=%.d) $< + @$(CC) -MM $(CPPFLAGS) -MT $@ -o $(@:%.o=%.d) $< @objroot@src/%.pic.o: @srcroot@src/%.c @mkdir -p $(@D) $(CC) $(CFLAGS) -fPIC -DPIC -c $(CPPFLAGS) -o $@ $< - $(CC) -MM $(CPPFLAGS) -MT $@ -o $(@:%.o=%.d) $< + @$(CC) -MM $(CPPFLAGS) -MT $@ -o $(@:%.o=%.d) $< %.$(SO) : %.$(SO).$(REV) @mkdir -p $(@D) @@ -130,7 +130,7 @@ build_doc: $(DOCS) @objroot@test/%.o: @srcroot@test/%.c @mkdir -p $(@D) $(CC) $(CFLAGS) -c $(CPPFLAGS) -I@objroot@test -o $@ $< - $(CC) -MM $(CPPFLAGS) -I@objroot@test -MT $@ -o $(@:%.o=%.d) $< + @$(CC) -MM $(CPPFLAGS) -I@objroot@test -MT $@ -o $(@:%.o=%.d) $< @objroot@test/%: @objroot@test/%.o \ @objroot@lib/libjemalloc@install_suffix@.$(SO) From d6abcbb14b8d1c8beb1c61bfc5a24cb54578b85c Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 12 Apr 2012 17:09:54 -0700 Subject: [PATCH 0147/3378] Always disable redzone by default. Always disable redzone by default, even when --enable-debug is specified. The memory overhead for redzones can be substantial, which makes this feature something that should only be opted into. --- doc/jemalloc.xml.in | 4 +--- src/jemalloc.c | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index a47c7635..ee60c98a 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -865,9 +865,7 @@ for (i = 0; i < nbins; i++) { url="http://http://valgrind.org/">Valgrind, which needs redzones in order to do effective buffer overflow/underflow detection. This option is intended for debugging and will impact performance - negatively. This option is disabled by default unless - is specified during configuration, in - which case it is enabled by default. + negatively. This option is disabled by default. diff --git a/src/jemalloc.c b/src/jemalloc.c index 1622937d..9b8b52d2 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -14,17 +14,15 @@ const char *je_malloc_conf JEMALLOC_ATTR(visibility("default")); bool opt_abort = true; # ifdef JEMALLOC_FILL bool opt_junk = true; -bool opt_redzone = true; # else bool opt_junk = false; -bool opt_redzone = false; # endif #else bool opt_abort = false; bool opt_junk = false; -bool opt_redzone = false; #endif size_t opt_quarantine = ZU(0); +bool opt_redzone = false; bool opt_utrace = false; bool opt_valgrind = false; bool opt_xmalloc = false; From 7ca0fdfb85b2a9fc7a112e158892c098e004385b Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 12 Apr 2012 20:20:58 -0700 Subject: [PATCH 0148/3378] Disable munmap() if it causes VM map holes. Add a configure test to determine whether common mmap()/munmap() patterns cause VM map holes, and only use munmap() to discard unused chunks if the problem does not exist. Unify the chunk caching for mmap and dss. Fix options processing to limit lg_chunk to be large enough that redzones will always fit. --- configure.ac | 67 +++++ include/jemalloc/internal/chunk_dss.h | 1 - include/jemalloc/internal/chunk_mmap.h | 2 +- .../jemalloc/internal/jemalloc_internal.h.in | 7 + include/jemalloc/internal/private_namespace.h | 1 - include/jemalloc/jemalloc_defs.h.in | 15 ++ src/arena.c | 10 +- src/chunk.c | 170 ++++++++++++- src/chunk_dss.c | 230 +----------------- src/chunk_mmap.c | 7 +- src/jemalloc.c | 11 +- 11 files changed, 277 insertions(+), 244 deletions(-) diff --git a/configure.ac b/configure.ac index 739cfa9a..8d20659f 100644 --- a/configure.ac +++ b/configure.ac @@ -817,6 +817,73 @@ else AC_MSG_ERROR([cannot determine value for STATIC_PAGE_SHIFT]) fi +dnl Determine whether common sequences of mmap()/munmap() calls will leave +dnl semi-permanent VM map holes. If so, disable munmap. +AC_CACHE_CHECK([whether munmap() leaves semi-permanent VM map holes], + [je_cv_vmmap_hole], + AC_RUN_IFELSE([AC_LANG_PROGRAM( +[[#include +#include +#include + +#define NPTRS 11 +#define MMAP_SIZE ((size_t)(1U << 22)) + +static void * +do_mmap(size_t size) +{ + void *ret; + + ret = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, + 0); + if (ret == MAP_FAILED) { + fprintf(stderr, "mmap() error\n"); + exit(1); + } + + return (ret); +} + +static void +do_munmap(void *ptr, size_t size) +{ + if (munmap(ptr, size) == -1) { + fprintf(stderr, "munmap() error\n"); + exit(1); + } +} +]], +[[ + void *p0, *p1, *p2, *p3, *p4; + FILE *f; + + f = fopen("conftest.out", "w"); + if (f == NULL) + exit(1); + + p0 = do_mmap(MMAP_SIZE); + p1 = do_mmap(MMAP_SIZE); + p2 = do_mmap(MMAP_SIZE); + do_munmap(p1, MMAP_SIZE); + p3 = do_mmap(MMAP_SIZE * 2); + do_munmap(p3, MMAP_SIZE * 2); + p4 = do_mmap(MMAP_SIZE); + if (p4 != p1) { + fprintf(stderr, "Hoped for %p, got %p\n", p1, p4); + fprintf(stderr, "%p..%p..%p..%p..%p\n", p0, p1, p2, p3, p4); + fprintf(f, "yes\n"); + } else + fprintf(f, "no\n"); + + fclose(f); + return (0); +]])], + [je_cv_vmmap_hole=`cat conftest.out`], + [je_cv_vmmap_hole=unknown])) +if test "x$je_cv_vmmap_hole" = "xno" ; then + AC_DEFINE([JEMALLOC_MUNMAP], [ ]) +fi + dnl ============================================================================ dnl jemalloc configuration. dnl diff --git a/include/jemalloc/internal/chunk_dss.h b/include/jemalloc/internal/chunk_dss.h index 16ea9542..6e2643b2 100644 --- a/include/jemalloc/internal/chunk_dss.h +++ b/include/jemalloc/internal/chunk_dss.h @@ -11,7 +11,6 @@ void *chunk_alloc_dss(size_t size, size_t alignment, bool *zero); bool chunk_in_dss(void *chunk); -bool chunk_dealloc_dss(void *chunk, size_t size); bool chunk_dss_boot(void); void chunk_dss_prefork(void); void chunk_dss_postfork_parent(void); diff --git a/include/jemalloc/internal/chunk_mmap.h b/include/jemalloc/internal/chunk_mmap.h index 148fefef..04e86af9 100644 --- a/include/jemalloc/internal/chunk_mmap.h +++ b/include/jemalloc/internal/chunk_mmap.h @@ -10,7 +10,7 @@ #ifdef JEMALLOC_H_EXTERNS void *chunk_alloc_mmap(size_t size, size_t alignment); -void chunk_dealloc_mmap(void *chunk, size_t size); +bool chunk_dealloc_mmap(void *chunk, size_t size); bool chunk_mmap_boot(void); diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 57895fb3..aa21aa5d 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -102,6 +102,13 @@ static const bool config_prof_libunwind = false #endif ; +static const bool config_munmap = +#ifdef JEMALLOC_MUNMAP + true +#else + false +#endif + ; static const bool config_stats = #ifdef JEMALLOC_STATS true diff --git a/include/jemalloc/internal/private_namespace.h b/include/jemalloc/internal/private_namespace.h index fca65950..742d1605 100644 --- a/include/jemalloc/internal/private_namespace.h +++ b/include/jemalloc/internal/private_namespace.h @@ -57,7 +57,6 @@ #define chunk_alloc_mmap JEMALLOC_N(chunk_alloc_mmap) #define chunk_boot JEMALLOC_N(chunk_boot) #define chunk_dealloc JEMALLOC_N(chunk_dealloc) -#define chunk_dealloc_dss JEMALLOC_N(chunk_dealloc_dss) #define chunk_dealloc_mmap JEMALLOC_N(chunk_dealloc_mmap) #define chunk_dss_boot JEMALLOC_N(chunk_dss_boot) #define chunk_dss_postfork_child JEMALLOC_N(chunk_dss_postfork_child) diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index 28fe5e9a..b6e5593b 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -172,6 +172,14 @@ /* One page is 2^STATIC_PAGE_SHIFT bytes. */ #undef STATIC_PAGE_SHIFT +/* + * If defined, use munmap() to unmap freed chunks, rather than storing them for + * later reuse. This is automatically disabled if configuration determines + * that common sequences of mmap()/munmap() calls will cause virtual memory map + * holes. + */ +#undef JEMALLOC_MUNMAP + /* TLS is used to map arenas and magazine caches to threads. */ #undef JEMALLOC_TLS @@ -209,6 +217,13 @@ */ #undef JEMALLOC_PURGE_MADVISE_DONTNEED #undef JEMALLOC_PURGE_MADVISE_FREE +#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED +# define JEMALLOC_MADV_PURGE MADV_DONTNEED +#elif defined(JEMALLOC_PURGE_MADVISE_FREE) +# define JEMALLOC_MADV_PURGE MADV_FREE +#else +# error "No method defined for purging unused dirty pages." +#endif /* sizeof(void *) == 2^LG_SIZEOF_PTR. */ #undef LG_SIZEOF_PTR diff --git a/src/arena.c b/src/arena.c index 1a108db5..989034d4 100644 --- a/src/arena.c +++ b/src/arena.c @@ -676,16 +676,8 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) if (config_debug) ndirty -= npages; -#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED -# define MADV_PURGE MADV_DONTNEED -#elif defined(JEMALLOC_PURGE_MADVISE_FREE) -# define MADV_PURGE MADV_FREE -#else -# error "No method defined for purging unused dirty pages." -#endif madvise((void *)((uintptr_t)chunk + (pageind << LG_PAGE)), - (npages << LG_PAGE), MADV_PURGE); -#undef MADV_PURGE + (npages << LG_PAGE), JEMALLOC_MADV_PURGE); if (config_stats) nmadvise++; } diff --git a/src/chunk.c b/src/chunk.c index b0641294..67e0d503 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -9,6 +9,15 @@ size_t opt_lg_chunk = LG_CHUNK_DEFAULT; malloc_mutex_t chunks_mtx; chunk_stats_t stats_chunks; +/* + * Trees of chunks that were previously allocated (trees differ only in node + * ordering). These are used when allocating chunks, in an attempt to re-use + * address space. Depending on function, different tree orderings are needed, + * which is why there are two trees with the same contents. + */ +static extent_tree_t chunks_szad; +static extent_tree_t chunks_ad; + rtree_t *chunks_rtree; /* Various chunk-related settings. */ @@ -19,6 +28,84 @@ size_t map_bias; size_t arena_maxclass; /* Max size class for arenas. */ /******************************************************************************/ +/* Function prototypes for non-inline static functions. */ + +static void *chunk_recycle(size_t size, size_t alignment, bool *zero); +static void chunk_record(void *chunk, size_t size); + +/******************************************************************************/ + +static void * +chunk_recycle(size_t size, size_t alignment, bool *zero) +{ + void *ret; + extent_node_t *node; + extent_node_t key; + size_t alloc_size, leadsize, trailsize; + + alloc_size = size + alignment - chunksize; + /* Beware size_t wrap-around. */ + if (alloc_size < size) + return (NULL); + key.addr = NULL; + key.size = alloc_size; + malloc_mutex_lock(&chunks_mtx); + node = extent_tree_szad_nsearch(&chunks_szad, &key); + if (node == NULL) { + malloc_mutex_unlock(&chunks_mtx); + return (NULL); + } + leadsize = ALIGNMENT_CEILING((uintptr_t)node->addr, alignment) - + (uintptr_t)node->addr; + assert(alloc_size >= leadsize + size); + trailsize = alloc_size - leadsize - size; + ret = (void *)((uintptr_t)node->addr + leadsize); + /* Remove node from the tree. */ + extent_tree_szad_remove(&chunks_szad, node); + extent_tree_ad_remove(&chunks_ad, node); + if (leadsize != 0) { + /* Insert the leading space as a smaller chunk. */ + node->size = leadsize; + extent_tree_szad_insert(&chunks_szad, node); + extent_tree_ad_insert(&chunks_ad, node); + node = NULL; + } + if (trailsize != 0) { + /* Insert the trailing space as a smaller chunk. */ + if (node == NULL) { + /* + * An additional node is required, but + * base_node_alloc() can cause a new base chunk to be + * allocated. Drop chunks_mtx in order to avoid + * deadlock, and if node allocation fails, deallocate + * the result before returning an error. + */ + malloc_mutex_unlock(&chunks_mtx); + node = base_node_alloc(); + if (node == NULL) { + chunk_dealloc(ret, size, true); + return (NULL); + } + malloc_mutex_lock(&chunks_mtx); + } + node->addr = (void *)((uintptr_t)(ret) + size); + node->size = trailsize; + extent_tree_szad_insert(&chunks_szad, node); + extent_tree_ad_insert(&chunks_ad, node); + node = NULL; + } + malloc_mutex_unlock(&chunks_mtx); + + if (node != NULL) + base_node_dealloc(node); +#ifdef JEMALLOC_PURGE_MADVISE_FREE + if (*zero) { + VALGRIND_MAKE_MEM_UNDEFINED(ret, size); + memset(ret, 0, size); + } +#endif + return (ret); +} /* * If the caller specifies (*zero == false), it is still possible to receive @@ -35,6 +122,9 @@ chunk_alloc(size_t size, size_t alignment, bool base, bool *zero) assert((size & chunksize_mask) == 0); assert((alignment & chunksize_mask) == 0); + ret = chunk_recycle(size, alignment, zero); + if (ret != NULL) + goto label_return; if (config_dss) { ret = chunk_alloc_dss(size, alignment, zero); if (ret != NULL) @@ -76,6 +166,80 @@ label_return: return (ret); } +static void +chunk_record(void *chunk, size_t size) +{ + extent_node_t *xnode, *node, *prev, key; + + madvise(chunk, size, JEMALLOC_MADV_PURGE); + + xnode = NULL; + malloc_mutex_lock(&chunks_mtx); + while (true) { + key.addr = (void *)((uintptr_t)chunk + size); + node = extent_tree_ad_nsearch(&chunks_ad, &key); + /* Try to coalesce forward. */ + if (node != NULL && node->addr == key.addr) { + /* + * Coalesce chunk with the following address range. + * This does not change the position within chunks_ad, + * so only remove/insert from/into chunks_szad. + */ + extent_tree_szad_remove(&chunks_szad, node); + node->addr = chunk; + node->size += size; + extent_tree_szad_insert(&chunks_szad, node); + break; + } else if (xnode == NULL) { + /* + * It is possible that base_node_alloc() will cause a + * new base chunk to be allocated, so take care not to + * deadlock on chunks_mtx, and recover if another thread + * deallocates an adjacent chunk while this one is busy + * allocating xnode. + */ + malloc_mutex_unlock(&chunks_mtx); + xnode = base_node_alloc(); + if (xnode == NULL) + return; + malloc_mutex_lock(&chunks_mtx); + } else { + /* Coalescing forward failed, so insert a new node. */ + node = xnode; + xnode = NULL; + node->addr = chunk; + node->size = size; + extent_tree_ad_insert(&chunks_ad, node); + extent_tree_szad_insert(&chunks_szad, node); + break; + } + } + /* Discard xnode if it ended up unused due to a race. */ + if (xnode != NULL) + base_node_dealloc(xnode); + + /* Try to coalesce backward. */ + prev = extent_tree_ad_prev(&chunks_ad, node); + if (prev != NULL && (void *)((uintptr_t)prev->addr + prev->size) == + chunk) { + /* + * Coalesce chunk with the previous address range. This does + * not change the position within chunks_ad, so only + * remove/insert node from/into chunks_szad. + */ + extent_tree_szad_remove(&chunks_szad, prev); + extent_tree_ad_remove(&chunks_ad, prev); + + extent_tree_szad_remove(&chunks_szad, node); + node->addr = prev->addr; + node->size += prev->size; + extent_tree_szad_insert(&chunks_szad, node); + + base_node_dealloc(prev); + } + malloc_mutex_unlock(&chunks_mtx); +} + void chunk_dealloc(void *chunk, size_t size, bool unmap) { @@ -94,9 +258,9 @@ chunk_dealloc(void *chunk, size_t size, bool unmap) } if (unmap) { - if (config_dss && chunk_dealloc_dss(chunk, size) == false) + if (chunk_dealloc_mmap(chunk, size) == false) return; - chunk_dealloc_mmap(chunk, size); + chunk_record(chunk, size); } } @@ -117,6 +281,8 @@ chunk_boot0(void) } if (config_dss && chunk_dss_boot()) return (true); + extent_tree_szad_new(&chunks_szad); + extent_tree_ad_new(&chunks_ad); if (config_ivsalloc) { chunks_rtree = rtree_new((ZU(1) << (LG_SIZEOF_PTR+3)) - opt_lg_chunk); diff --git a/src/chunk_dss.c b/src/chunk_dss.c index 5fb6a73d..b05509a5 100644 --- a/src/chunk_dss.c +++ b/src/chunk_dss.c @@ -3,17 +3,6 @@ /******************************************************************************/ /* Data. */ -#ifndef JEMALLOC_HAVE_SBRK -void * -sbrk(intptr_t increment) -{ - - not_implemented(); - - return (NULL); -} -#endif - /* * Protects sbrk() calls. This avoids malloc races among threads, though it * does not protect against races with threads that call sbrk() directly. @@ -27,92 +16,18 @@ static void *dss_prev; /* Current upper limit on DSS addresses. */ static void *dss_max; -/* - * Trees of chunks that were previously allocated (trees differ only in node - * ordering). These are used when allocating chunks, in an attempt to re-use - * address space. Depending on function, different tree orderings are needed, - * which is why there are two trees with the same contents. - */ -static extent_tree_t dss_chunks_szad; -static extent_tree_t dss_chunks_ad; - -/******************************************************************************/ -/* Function prototypes for non-inline static functions. */ - -static void *chunk_recycle_dss(size_t size, size_t alignment, bool *zero); -static extent_node_t *chunk_dealloc_dss_record(void *chunk, size_t size); - /******************************************************************************/ +#ifndef JEMALLOC_HAVE_SBRK static void * -chunk_recycle_dss(size_t size, size_t alignment, bool *zero) +sbrk(intptr_t increment) { - void *ret; - extent_node_t *node; - extent_node_t key; - size_t alloc_size, leadsize, trailsize; - cassert(config_dss); + not_implemented(); - alloc_size = size + alignment - chunksize; - /* Beware size_t wrap-around. */ - if (alloc_size < size) - return (NULL); - key.addr = NULL; - key.size = alloc_size; - malloc_mutex_lock(&dss_mtx); - node = extent_tree_szad_nsearch(&dss_chunks_szad, &key); - if (node == NULL) { - malloc_mutex_unlock(&dss_mtx); - return (NULL); - } - leadsize = ALIGNMENT_CEILING((uintptr_t)node->addr, alignment) - - (uintptr_t)node->addr; - assert(alloc_size >= leadsize + size); - trailsize = alloc_size - leadsize - size; - ret = (void *)((uintptr_t)node->addr + leadsize); - /* Remove node from the tree. */ - extent_tree_szad_remove(&dss_chunks_szad, node); - extent_tree_ad_remove(&dss_chunks_ad, node); - if (leadsize != 0) { - /* Insert the leading space as a smaller chunk. */ - node->size = leadsize; - extent_tree_szad_insert(&dss_chunks_szad, node); - extent_tree_ad_insert(&dss_chunks_ad, node); - node = NULL; - } - if (trailsize != 0) { - /* Insert the trailing space as a smaller chunk. */ - if (node == NULL) { - /* - * An additional node is required, but - * base_node_alloc() can cause a new base chunk to be - * allocated. Drop dss_mtx in order to avoid deadlock, - * and if node allocation fails, deallocate the result - * before returning an error. - */ - malloc_mutex_unlock(&dss_mtx); - node = base_node_alloc(); - if (node == NULL) { - chunk_dealloc_dss(ret, size); - return (NULL); - } - malloc_mutex_lock(&dss_mtx); - } - node->addr = (void *)((uintptr_t)(ret) + size); - node->size = trailsize; - extent_tree_szad_insert(&dss_chunks_szad, node); - extent_tree_ad_insert(&dss_chunks_ad, node); - node = NULL; - } - malloc_mutex_unlock(&dss_mtx); - - if (node != NULL) - base_node_dealloc(node); - if (*zero) - memset(ret, 0, size); - return (ret); + return (NULL); } +#endif void * chunk_alloc_dss(size_t size, size_t alignment, bool *zero) @@ -123,10 +38,6 @@ chunk_alloc_dss(size_t size, size_t alignment, bool *zero) assert(size > 0 && (size & chunksize_mask) == 0); assert(alignment > 0 && (alignment & chunksize_mask) == 0); - ret = chunk_recycle_dss(size, alignment, zero); - if (ret != NULL) - return (ret); - /* * sbrk() uses a signed increment argument, so take care not to * interpret a huge allocation request as a negative increment. @@ -177,7 +88,7 @@ chunk_alloc_dss(size_t size, size_t alignment, bool *zero) dss_max = dss_next; malloc_mutex_unlock(&dss_mtx); if (cpad_size != 0) - chunk_dealloc_dss(cpad, cpad_size); + chunk_dealloc(cpad, cpad_size, true); *zero = true; return (ret); } @@ -188,81 +99,6 @@ chunk_alloc_dss(size_t size, size_t alignment, bool *zero) return (NULL); } -static extent_node_t * -chunk_dealloc_dss_record(void *chunk, size_t size) -{ - extent_node_t *xnode, *node, *prev, key; - - cassert(config_dss); - - xnode = NULL; - while (true) { - key.addr = (void *)((uintptr_t)chunk + size); - node = extent_tree_ad_nsearch(&dss_chunks_ad, &key); - /* Try to coalesce forward. */ - if (node != NULL && node->addr == key.addr) { - /* - * Coalesce chunk with the following address range. - * This does not change the position within - * dss_chunks_ad, so only remove/insert from/into - * dss_chunks_szad. - */ - extent_tree_szad_remove(&dss_chunks_szad, node); - node->addr = chunk; - node->size += size; - extent_tree_szad_insert(&dss_chunks_szad, node); - break; - } else if (xnode == NULL) { - /* - * It is possible that base_node_alloc() will cause a - * new base chunk to be allocated, so take care not to - * deadlock on dss_mtx, and recover if another thread - * deallocates an adjacent chunk while this one is busy - * allocating xnode. - */ - malloc_mutex_unlock(&dss_mtx); - xnode = base_node_alloc(); - malloc_mutex_lock(&dss_mtx); - if (xnode == NULL) - return (NULL); - } else { - /* Coalescing forward failed, so insert a new node. */ - node = xnode; - xnode = NULL; - node->addr = chunk; - node->size = size; - extent_tree_ad_insert(&dss_chunks_ad, node); - extent_tree_szad_insert(&dss_chunks_szad, node); - break; - } - } - /* Discard xnode if it ended up unused do to a race. */ - if (xnode != NULL) - base_node_dealloc(xnode); - - /* Try to coalesce backward. */ - prev = extent_tree_ad_prev(&dss_chunks_ad, node); - if (prev != NULL && (void *)((uintptr_t)prev->addr + prev->size) == - chunk) { - /* - * Coalesce chunk with the previous address range. This does - * not change the position within dss_chunks_ad, so only - * remove/insert node from/into dss_chunks_szad. - */ - extent_tree_szad_remove(&dss_chunks_szad, prev); - extent_tree_ad_remove(&dss_chunks_ad, prev); - - extent_tree_szad_remove(&dss_chunks_szad, node); - node->addr = prev->addr; - node->size += prev->size; - extent_tree_szad_insert(&dss_chunks_szad, node); - - base_node_dealloc(prev); - } - - return (node); -} - bool chunk_in_dss(void *chunk) { @@ -281,58 +117,6 @@ chunk_in_dss(void *chunk) return (ret); } -bool -chunk_dealloc_dss(void *chunk, size_t size) -{ - bool ret; - - cassert(config_dss); - - malloc_mutex_lock(&dss_mtx); - if ((uintptr_t)chunk >= (uintptr_t)dss_base - && (uintptr_t)chunk < (uintptr_t)dss_max) { - extent_node_t *node; - - /* Try to coalesce with other unused chunks. */ - node = chunk_dealloc_dss_record(chunk, size); - if (node != NULL) { - chunk = node->addr; - size = node->size; - } - - /* Get the current end of the DSS. */ - dss_max = sbrk(0); - - /* - * Try to shrink the DSS if this chunk is at the end of the - * DSS. The sbrk() call here is subject to a race condition - * with threads that use brk(2) or sbrk(2) directly, but the - * alternative would be to leak memory for the sake of poorly - * designed multi-threaded programs. - */ - if ((void *)((uintptr_t)chunk + size) == dss_max - && (dss_prev = sbrk(-(intptr_t)size)) == dss_max) { - /* Success. */ - dss_max = (void *)((intptr_t)dss_prev - (intptr_t)size); - - if (node != NULL) { - extent_tree_szad_remove(&dss_chunks_szad, node); - extent_tree_ad_remove(&dss_chunks_ad, node); - base_node_dealloc(node); - } - } else - madvise(chunk, size, MADV_DONTNEED); - - ret = false; - goto label_return; - } - - ret = true; -label_return: - malloc_mutex_unlock(&dss_mtx); - return (ret); -} - bool chunk_dss_boot(void) { @@ -344,8 +128,6 @@ chunk_dss_boot(void) dss_base = sbrk(0); dss_prev = dss_base; dss_max = dss_base; - extent_tree_szad_new(&dss_chunks_szad); - extent_tree_ad_new(&dss_chunks_ad); return (false); } diff --git a/src/chunk_mmap.c b/src/chunk_mmap.c index 6cbf094a..e11cc0e6 100644 --- a/src/chunk_mmap.c +++ b/src/chunk_mmap.c @@ -178,11 +178,14 @@ chunk_alloc_mmap(size_t size, size_t alignment) return (ret); } -void +bool chunk_dealloc_mmap(void *chunk, size_t size) { - pages_unmap(chunk, size); + if (config_munmap) + pages_unmap(chunk, size); + + return (config_munmap == false); } bool diff --git a/src/jemalloc.c b/src/jemalloc.c index 9b8b52d2..0decd8a8 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -501,11 +501,14 @@ malloc_conf_init(void) CONF_HANDLE_BOOL(opt_abort, abort) /* - * Chunks always require at least one * header page, - * plus one data page. + * Chunks always require at least one header page, plus + * one data page in the absence of redzones, or three + * pages in the presence of redzones. In order to + * simplify options processing, fix the limit based on + * config_fill. */ - CONF_HANDLE_SIZE_T(opt_lg_chunk, lg_chunk, LG_PAGE+1, - (sizeof(size_t) << 3) - 1) + CONF_HANDLE_SIZE_T(opt_lg_chunk, lg_chunk, LG_PAGE + + (config_fill ? 2 : 1), (sizeof(size_t) << 3) - 1) CONF_HANDLE_SIZE_T(opt_narenas, narenas, 1, SIZE_T_MAX) CONF_HANDLE_SSIZE_T(opt_lg_dirty_mult, lg_dirty_mult, -1, (sizeof(size_t) << 3) - 1) From 1dbfd5a209bed1a3d4bacaa31726a4179a4f1215 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 14 Apr 2012 00:16:02 -0700 Subject: [PATCH 0149/3378] Add/remove missing/cruft entries to/from private_namespace.h. --- include/jemalloc/internal/private_namespace.h | 65 +++++++++++++++---- 1 file changed, 52 insertions(+), 13 deletions(-) diff --git a/include/jemalloc/internal/private_namespace.h b/include/jemalloc/internal/private_namespace.h index 742d1605..a69482b6 100644 --- a/include/jemalloc/internal/private_namespace.h +++ b/include/jemalloc/internal/private_namespace.h @@ -25,15 +25,25 @@ #define arena_stats_merge JEMALLOC_N(arena_stats_merge) #define arena_tcache_fill_small JEMALLOC_N(arena_tcache_fill_small) #define arenas_bin_i_index JEMALLOC_N(arenas_bin_i_index) +#define arenas_cleanup JEMALLOC_N(arenas_cleanup) #define arenas_extend JEMALLOC_N(arenas_extend) #define arenas_lrun_i_index JEMALLOC_N(arenas_lrun_i_index) #define arenas_tls JEMALLOC_N(arenas_tls) +#define arenas_tsd_boot JEMALLOC_N(arenas_tsd_boot) +#define arenas_tsd_cleanup_wrapper JEMALLOC_N(arenas_tsd_cleanup_wrapper) +#define arenas_tsd_get JEMALLOC_N(arenas_tsd_get) +#define arenas_tsd_set JEMALLOC_N(arenas_tsd_set) +#define atomic_add_u JEMALLOC_N(atomic_add_u) #define atomic_add_uint32 JEMALLOC_N(atomic_add_uint32) #define atomic_add_uint64 JEMALLOC_N(atomic_add_uint64) +#define atomic_add_z JEMALLOC_N(atomic_add_z) +#define atomic_sub_u JEMALLOC_N(atomic_sub_u) #define atomic_sub_uint32 JEMALLOC_N(atomic_sub_uint32) #define atomic_sub_uint64 JEMALLOC_N(atomic_sub_uint64) +#define atomic_sub_z JEMALLOC_N(atomic_sub_z) #define base_alloc JEMALLOC_N(base_alloc) #define base_boot JEMALLOC_N(base_boot) +#define base_calloc JEMALLOC_N(base_calloc) #define base_node_alloc JEMALLOC_N(base_node_alloc) #define base_node_dealloc JEMALLOC_N(base_node_dealloc) #define base_postfork_child JEMALLOC_N(base_postfork_child) @@ -55,7 +65,8 @@ #define chunk_alloc JEMALLOC_N(chunk_alloc) #define chunk_alloc_dss JEMALLOC_N(chunk_alloc_dss) #define chunk_alloc_mmap JEMALLOC_N(chunk_alloc_mmap) -#define chunk_boot JEMALLOC_N(chunk_boot) +#define chunk_boot0 JEMALLOC_N(chunk_boot0) +#define chunk_boot1 JEMALLOC_N(chunk_boot1) #define chunk_dealloc JEMALLOC_N(chunk_dealloc) #define chunk_dealloc_mmap JEMALLOC_N(chunk_dealloc_mmap) #define chunk_dss_boot JEMALLOC_N(chunk_dss_boot) @@ -139,25 +150,33 @@ #define iralloc JEMALLOC_N(iralloc) #define isalloc JEMALLOC_N(isalloc) #define ivsalloc JEMALLOC_N(ivsalloc) -#define jemalloc_darwin_init JEMALLOC_N(jemalloc_darwin_init) #define jemalloc_postfork_child JEMALLOC_N(jemalloc_postfork_child) #define jemalloc_postfork_parent JEMALLOC_N(jemalloc_postfork_parent) #define jemalloc_prefork JEMALLOC_N(jemalloc_prefork) #define malloc_cprintf JEMALLOC_N(malloc_cprintf) -#define malloc_mutex_destroy JEMALLOC_N(malloc_mutex_destroy) #define malloc_mutex_init JEMALLOC_N(malloc_mutex_init) #define malloc_mutex_lock JEMALLOC_N(malloc_mutex_lock) #define malloc_mutex_postfork_child JEMALLOC_N(malloc_mutex_postfork_child) #define malloc_mutex_postfork_parent JEMALLOC_N(malloc_mutex_postfork_parent) #define malloc_mutex_prefork JEMALLOC_N(malloc_mutex_prefork) -#define malloc_mutex_trylock JEMALLOC_N(malloc_mutex_trylock) #define malloc_mutex_unlock JEMALLOC_N(malloc_mutex_unlock) #define malloc_printf JEMALLOC_N(malloc_printf) #define malloc_snprintf JEMALLOC_N(malloc_snprintf) +#define malloc_strtoumax JEMALLOC_N(malloc_strtoumax) +#define malloc_tsd_boot JEMALLOC_N(malloc_tsd_boot) +#define malloc_tsd_cleanup_register JEMALLOC_N(malloc_tsd_cleanup_register) +#define malloc_tsd_dalloc JEMALLOC_N(malloc_tsd_dalloc) +#define malloc_tsd_malloc JEMALLOC_N(malloc_tsd_malloc) +#define malloc_tsd_no_cleanup JEMALLOC_N(malloc_tsd_no_cleanup) #define malloc_vcprintf JEMALLOC_N(malloc_vcprintf) #define malloc_vsnprintf JEMALLOC_N(malloc_vsnprintf) #define malloc_write JEMALLOC_N(malloc_write) #define mb_write JEMALLOC_N(mb_write) +#define mmap_unaligned_tsd_boot JEMALLOC_N(mmap_unaligned_tsd_boot) +#define mmap_unaligned_tsd_cleanup_wrapper JEMALLOC_N(mmap_unaligned_tsd_cleanup_wrapper) +#define mmap_unaligned_tsd_get JEMALLOC_N(mmap_unaligned_tsd_get) +#define mmap_unaligned_tsd_set JEMALLOC_N(mmap_unaligned_tsd_set) +#define mutex_boot JEMALLOC_N(mutex_boot) #define opt_abort JEMALLOC_N(opt_abort) #define opt_junk JEMALLOC_N(opt_junk) #define opt_lg_chunk JEMALLOC_N(opt_lg_chunk) @@ -193,11 +212,18 @@ #define prof_realloc JEMALLOC_N(prof_realloc) #define prof_sample_accum_update JEMALLOC_N(prof_sample_accum_update) #define prof_sample_threshold_update JEMALLOC_N(prof_sample_threshold_update) -#define prof_tdata_init JEMALLOC_N(prof_tdata_init) -#define prof_tdata_tls JEMALLOC_N(prof_tdata_tls) +#define prof_tdata_cleanup JEMALLOC_N(prof_tdata_cleanup) +#define prof_tdata_tsd_boot JEMALLOC_N(prof_tdata_tsd_boot) +#define prof_tdata_tsd_cleanup_wrapper JEMALLOC_N(prof_tdata_tsd_cleanup_wrapper) +#define prof_tdata_tsd_get JEMALLOC_N(prof_tdata_tsd_get) +#define prof_tdata_tsd_set JEMALLOC_N(prof_tdata_tsd_set) #define pthread_create JEMALLOC_N(pthread_create) #define quarantine JEMALLOC_N(quarantine) #define quarantine_boot JEMALLOC_N(quarantine_boot) +#define quarantine_tsd_boot JEMALLOC_N(quarantine_tsd_boot) +#define quarantine_tsd_cleanup_wrapper JEMALLOC_N(quarantine_tsd_cleanup_wrapper) +#define quarantine_tsd_get JEMALLOC_N(quarantine_tsd_get) +#define quarantine_tsd_set JEMALLOC_N(quarantine_tsd_set) #define register_zone JEMALLOC_N(register_zone) #define rtree_get JEMALLOC_N(rtree_get) #define rtree_get_locked JEMALLOC_N(rtree_get_locked) @@ -213,23 +239,36 @@ #define stats_cactive_get JEMALLOC_N(stats_cactive_get) #define stats_cactive_sub JEMALLOC_N(stats_cactive_sub) #define stats_print JEMALLOC_N(stats_print) -#define szone2ozone JEMALLOC_N(szone2ozone) #define tcache_alloc_easy JEMALLOC_N(tcache_alloc_easy) #define tcache_alloc_large JEMALLOC_N(tcache_alloc_large) #define tcache_alloc_small JEMALLOC_N(tcache_alloc_small) #define tcache_alloc_small_hard JEMALLOC_N(tcache_alloc_small_hard) +#define tcache_arena_associate JEMALLOC_N(tcache_arena_associate) +#define tcache_arena_dissociate JEMALLOC_N(tcache_arena_dissociate) #define tcache_bin_flush_large JEMALLOC_N(tcache_bin_flush_large) #define tcache_bin_flush_small JEMALLOC_N(tcache_bin_flush_small) -#define tcache_boot JEMALLOC_N(tcache_boot) +#define tcache_boot0 JEMALLOC_N(tcache_boot0) +#define tcache_boot1 JEMALLOC_N(tcache_boot1) #define tcache_create JEMALLOC_N(tcache_create) #define tcache_dalloc_large JEMALLOC_N(tcache_dalloc_large) #define tcache_dalloc_small JEMALLOC_N(tcache_dalloc_small) #define tcache_destroy JEMALLOC_N(tcache_destroy) +#define tcache_enabled_get JEMALLOC_N(tcache_enabled_get) +#define tcache_enabled_set JEMALLOC_N(tcache_enabled_set) +#define tcache_enabled_tsd_boot JEMALLOC_N(tcache_enabled_tsd_boot) +#define tcache_enabled_tsd_cleanup_wrapper JEMALLOC_N(tcache_enabled_tsd_cleanup_wrapper) +#define tcache_enabled_tsd_get JEMALLOC_N(tcache_enabled_tsd_get) +#define tcache_enabled_tsd_set JEMALLOC_N(tcache_enabled_tsd_set) #define tcache_event JEMALLOC_N(tcache_event) -#define tcache_get JEMALLOC_N(tcache_get) +#define tcache_flush JEMALLOC_N(tcache_flush) #define tcache_stats_merge JEMALLOC_N(tcache_stats_merge) -#define tcache_tls JEMALLOC_N(tcache_tls) -#define thread_allocated_get JEMALLOC_N(thread_allocated_get) -#define thread_allocated_get_hard JEMALLOC_N(thread_allocated_get_hard) -#define thread_allocated_tls JEMALLOC_N(thread_allocated_tls) +#define tcache_thread_cleanup JEMALLOC_N(tcache_thread_cleanup) +#define tcache_tsd_boot JEMALLOC_N(tcache_tsd_boot) +#define tcache_tsd_cleanup_wrapper JEMALLOC_N(tcache_tsd_cleanup_wrapper) +#define tcache_tsd_get JEMALLOC_N(tcache_tsd_get) +#define tcache_tsd_set JEMALLOC_N(tcache_tsd_set) +#define thread_allocated_tsd_boot JEMALLOC_N(thread_allocated_tsd_boot) +#define thread_allocated_tsd_cleanup_wrapper JEMALLOC_N(thread_allocated_tsd_cleanup_wrapper) +#define thread_allocated_tsd_get JEMALLOC_N(thread_allocated_tsd_get) +#define thread_allocated_tsd_set JEMALLOC_N(thread_allocated_tsd_set) #define u2rz JEMALLOC_N(u2rz) From a398a6b46e53035a4ef660b4c7a1c406f3abe645 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 16 Apr 2012 12:39:45 -0700 Subject: [PATCH 0150/3378] Remove configure test cruft. --- configure.ac | 1 - 1 file changed, 1 deletion(-) diff --git a/configure.ac b/configure.ac index 8d20659f..5d0c7262 100644 --- a/configure.ac +++ b/configure.ac @@ -826,7 +826,6 @@ AC_CACHE_CHECK([whether munmap() leaves semi-permanent VM map holes], #include #include -#define NPTRS 11 #define MMAP_SIZE ((size_t)(1U << 22)) static void * From 59ae2766af88bad07ac721c4ee427b171e897bcb Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 16 Apr 2012 17:52:27 -0700 Subject: [PATCH 0151/3378] Add the --disable-munmap option. Add the --disable-munmap option, remove the configure test that attempted to detect the VM allocation quirk known to exist on Linux x86[_64], and make --disable-munmap implicit on Linux. --- INSTALL | 7 ++++ configure.ac | 85 ++++++++++----------------------------------- doc/jemalloc.xml.in | 10 ++++++ src/ctl.c | 3 ++ 4 files changed, 39 insertions(+), 66 deletions(-) diff --git a/INSTALL b/INSTALL index a5942ec8..04671a1d 100644 --- a/INSTALL +++ b/INSTALL @@ -108,6 +108,13 @@ any of the following arguments (not a definitive list) to 'configure': released in bulk, thus reducing the total number of mutex operations. See the "opt.tcache" option for usage details. +--disable-munmap + Disable virtual memory deallocation via munmap(2); instead keep track of + the virtual memory for later use. munmap() is disabled by default (i.e. + --disable-munmap is implied) on Linux, which has a quirk in its virtual + memory allocation algorithm that causes semi-permanent VM map holes under + normal jemalloc operation. + --enable-dss Enable support for page allocation/deallocation via sbrk(2), in addition to mmap(2). diff --git a/configure.ac b/configure.ac index 5d0c7262..90235f7b 100644 --- a/configure.ac +++ b/configure.ac @@ -206,6 +206,7 @@ dnl dnl Define cpp macros in CPPFLAGS, rather than doing AC_DEFINE(macro), since the dnl definitions need to be seen before any headers are included, which is a pain dnl to make happen otherwise. +default_munmap="1" case "${host}" in *-*-darwin*) CFLAGS="$CFLAGS -fno-common" @@ -230,6 +231,7 @@ case "${host}" in AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED], [ ]) AC_DEFINE([JEMALLOC_THREADED_INIT], [ ]) RPATH="-Wl,-rpath," + default_munmap="0" ;; *-*-netbsd*) AC_MSG_CHECKING([ABI]) @@ -667,6 +669,22 @@ if test "x$enable_tcache" = "x1" ; then fi AC_SUBST([enable_tcache]) +dnl Enable VM deallocation via munmap() by default. +AC_ARG_ENABLE([munmap], + [AS_HELP_STRING([--disable-munmap], [Disable VM deallocation via munmap(2)])], +[if test "x$enable_munmap" = "xno" ; then + enable_munmap="0" +else + enable_munmap="1" +fi +], +[enable_munmap="${default_munmap}"] +) +if test "x$enable_munmap" = "x1" ; then + AC_DEFINE([JEMALLOC_MUNMAP], [ ]) +fi +AC_SUBST([enable_munmap]) + dnl Do not enable allocation from DSS by default. AC_ARG_ENABLE([dss], [AS_HELP_STRING([--enable-dss], [Enable allocation from DSS])], @@ -817,72 +835,6 @@ else AC_MSG_ERROR([cannot determine value for STATIC_PAGE_SHIFT]) fi -dnl Determine whether common sequences of mmap()/munmap() calls will leave -dnl semi-permanent VM map holes. If so, disable munmap. -AC_CACHE_CHECK([whether munmap() leaves semi-permanent VM map holes], - [je_cv_vmmap_hole], - AC_RUN_IFELSE([AC_LANG_PROGRAM( -[[#include -#include -#include - -#define MMAP_SIZE ((size_t)(1U << 22)) - -static void * -do_mmap(size_t size) -{ - void *ret; - - ret = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, - 0); - if (ret == MAP_FAILED) { - fprintf(stderr, "mmap() error\n"); - exit(1); - } - - return (ret); -} - -static void -do_munmap(void *ptr, size_t size) -{ - if (munmap(ptr, size) == -1) { - fprintf(stderr, "munmap() error\n"); - exit(1); - } -} -]], -[[ - void *p0, *p1, *p2, *p3, *p4; - FILE *f; - - f = fopen("conftest.out", "w"); - if (f == NULL) - exit(1); - - p0 = do_mmap(MMAP_SIZE); - p1 = do_mmap(MMAP_SIZE); - p2 = do_mmap(MMAP_SIZE); - do_munmap(p1, MMAP_SIZE); - p3 = do_mmap(MMAP_SIZE * 2); - do_munmap(p3, MMAP_SIZE * 2); - p4 = do_mmap(MMAP_SIZE); - if (p4 != p1) { - fprintf(stderr, "Hoped for %p, got %p\n", p1, p4); - fprintf(stderr, "%p..%p..%p..%p..%p\n", p0, p1, p2, p3, p4); - fprintf(f, "yes\n"); - } else - fprintf(f, "no\n"); - - fclose(f); - return (0); -]])], - [je_cv_vmmap_hole=`cat conftest.out`], - [je_cv_vmmap_hole=unknown])) -if test "x$je_cv_vmmap_hole" = "xno" ; then - AC_DEFINE([JEMALLOC_MUNMAP], [ ]) -fi - dnl ============================================================================ dnl jemalloc configuration. dnl @@ -1198,6 +1150,7 @@ AC_MSG_RESULT([fill : ${enable_fill}]) AC_MSG_RESULT([utrace : ${enable_utrace}]) AC_MSG_RESULT([valgrind : ${enable_valgrind}]) AC_MSG_RESULT([xmalloc : ${enable_xmalloc}]) +AC_MSG_RESULT([munmap : ${enable_munmap}]) AC_MSG_RESULT([dss : ${enable_dss}]) AC_MSG_RESULT([lazy_lock : ${enable_lazy_lock}]) AC_MSG_RESULT([tls : ${enable_tls}]) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index ee60c98a..98d0ba41 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -650,6 +650,16 @@ for (i = 0; i < nbins; i++) { during build configuration. + + + config.munmap + (bool) + r- + + was specified during + build configuration. + + config.prof diff --git a/src/ctl.c b/src/ctl.c index 6be40561..a6a02cc5 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -50,6 +50,7 @@ CTL_PROTO(config_debug) CTL_PROTO(config_dss) CTL_PROTO(config_fill) CTL_PROTO(config_lazy_lock) +CTL_PROTO(config_munmap) CTL_PROTO(config_prof) CTL_PROTO(config_prof_libgcc) CTL_PROTO(config_prof_libunwind) @@ -176,6 +177,7 @@ static const ctl_node_t config_node[] = { {NAME("dss"), CTL(config_dss)}, {NAME("fill"), CTL(config_fill)}, {NAME("lazy_lock"), CTL(config_lazy_lock)}, + {NAME("munmap"), CTL(config_munmap)}, {NAME("prof"), CTL(config_prof)}, {NAME("prof_libgcc"), CTL(config_prof_libgcc)}, {NAME("prof_libunwind"), CTL(config_prof_libunwind)}, @@ -1087,6 +1089,7 @@ CTL_RO_BOOL_CONFIG_GEN(config_debug) CTL_RO_BOOL_CONFIG_GEN(config_dss) CTL_RO_BOOL_CONFIG_GEN(config_fill) CTL_RO_BOOL_CONFIG_GEN(config_lazy_lock) +CTL_RO_BOOL_CONFIG_GEN(config_munmap) CTL_RO_BOOL_CONFIG_GEN(config_prof) CTL_RO_BOOL_CONFIG_GEN(config_prof_libgcc) CTL_RO_BOOL_CONFIG_GEN(config_prof_libunwind) From 9ef7f5dc34ff02f50d401e41c8d9a4a928e7c2aa Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 16 Apr 2012 18:16:48 -0700 Subject: [PATCH 0152/3378] Start preparing ChangeLog for 3.0.0 release. Start preparing ChangeLog for 3.0.0 release. Additional fixes and changes are yet to come, so this is not a complete ChangeLog. --- ChangeLog | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/ChangeLog b/ChangeLog index 326ee7a9..8f6edd5a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -6,6 +6,78 @@ found in the git revision history: http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git git://canonware.com/jemalloc.git +* 3.0.0 (XXX not yet released) + + Although this version adds some major new features, the primary focus is on + internal code cleanup that facilitates maintainability and portability, most + of which is not reflected in the ChangeLog. This is the first release to + incorporate substantial contributions from numerous other developers, and the + result is a more broadly useful allocator (see the git revision history for + contribution details). Note that the license has been unified, thanks to + Facebook granting a license under the same terms as the other copyright + holders (see COPYING). + + New features: + - Implement Valgrind support, redzones, and quarantine. + - Add support for additional operating systems: + + FreeBSD + + Mac OS X Lion + - Add support for additional architectures: + + MIPS + + SH4 + + Tilera + - Add support for cross compiling. + - Add nallocm(), which rounds a request size up to the nearest size class + without actually allocating. + - Implement aligned_alloc() (blame C11). + - Add the --disable-munmap option, and make it the default on Linux. + - Add the --with-mangling option. + - Add the --disable-experimental option. + - Add the "thread.tcache.enabled" mallctl. + + Incompatible changes: + - Enable stats by default. + - Enable fill by default. + - Disable lazy locking by default. + - Rename the "tcache.flush" mallctl to "thread.tcache.flush". + - Rename the "arenas.pagesize" mallctl to "arenas.page". + + Removed features: + - Remove the swap feature, including the "config.swap", "swap.avail", + "swap.prezeroed", "swap.nfds", and "swap.fds" mallctls. + - Remove highruns statistics, including the + "stats.arenas..bins..highruns" and + "stats.arenas..lruns..highruns" mallctls. + - As part of small size class refactoring, remove the "opt.lg_[qc]space_max", + "arenas.cacheline", "arenas.subpage", "arenas.[tqcs]space_{min,max}", and + "arenas.[tqcs]bins" mallctls. + - Remove the "arenas.chunksize" mallctl. + - Remove the "opt.lg_prof_tcmax" option. + - Remove the "opt.lg_prof_bt_max" option. + - Remove the "opt.lg_tcache_gc_sweep" option. + - Remove the --disable-tiny option, including the "config.tiny" mallctl. + - Remove the --enable-dynamic-page-shift configure option. + - Remove the --enable-sysv configure option. + + Bug fixes: + - Fix fork-related bugs that could cause deadlock in children between fork + and exec. + - Fix a statistics-related bug in the "thread.arena" mallctl that could cause + invalid statistics and crashes. + - Work around TLS dallocation via free() on Linux. This bug could cause + write-after-free memory corruption. + - Fix malloc_stats_print() to honor 'b' and 'l' in the opts parameter. + - Fix realloc(p, 0) to act like free(p). + - Do not enforce minimum alignment in memalign(). + - Check for NULL pointer in malloc_usable_size(). + - Fix bin->runcur management to fix a layout policy bug. This bug did not + affect correctness. + - Fix a bug in choose_arena_hard() that potentially caused more arenas to be + initialized than necessary. + - Add missing "opt.lg_tcache_max" mallctl implementation. + - Use glibc allocator hooks to make mixed allocator usage less likely. + - Fix build issues for --disable-tcache. + * 2.2.5 (November 14, 2011) Bug fixes: From f5e0f526ec2079bf9b734f500df1cbc090f33b39 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Mon, 16 Apr 2012 16:30:20 +0200 Subject: [PATCH 0153/3378] Remove -dynamic CFLAG on OSX It is a linker flag, so it doesn't make sense in CFLAGS, and it's the default when invoking the linker for shared libraries. --- Makefile.in | 3 --- 1 file changed, 3 deletions(-) diff --git a/Makefile.in b/Makefile.in index 8828d7f7..64798571 100644 --- a/Makefile.in +++ b/Makefile.in @@ -21,9 +21,6 @@ MANDIR := $(DESTDIR)@MANDIR@ # Build parameters. CPPFLAGS := @CPPFLAGS@ -I@srcroot@include -I@objroot@include CFLAGS := @CFLAGS@ -ifeq (macho, @abi@) -CFLAGS += -dynamic -endif LDFLAGS := @LDFLAGS@ LIBS := @LIBS@ RPATH_EXTRA := @RPATH_EXTRA@ From 6f2ed70f5a33c4d6bae93b7f18d6b636843a6495 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Mon, 16 Apr 2012 16:30:21 +0200 Subject: [PATCH 0154/3378] Use $(LIBS) instead of -lpthread when linking tests This will allow linking for win32 without pthreads more easily --- Makefile.in | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile.in b/Makefile.in index 64798571..8cd0418c 100644 --- a/Makefile.in +++ b/Makefile.in @@ -133,9 +133,9 @@ build_doc: $(DOCS) @objroot@lib/libjemalloc@install_suffix@.$(SO) @mkdir -p $(@D) ifneq (@RPATH@, ) - $(CC) -o $@ $< @RPATH@@objroot@lib -L@objroot@lib -ljemalloc@install_suffix@ -lpthread + $(CC) -o $@ $< @RPATH@@objroot@lib -L@objroot@lib -ljemalloc@install_suffix@ $(LIBS) else - $(CC) -o $@ $< -L@objroot@lib -ljemalloc@install_suffix@ -lpthread + $(CC) -o $@ $< -L@objroot@lib -ljemalloc@install_suffix@ $(LIBS) endif build_lib_shared: $(DSOS) From 2d04f5e5ffb607e270356e6fa258e708dafb5b86 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Mon, 16 Apr 2012 16:30:22 +0200 Subject: [PATCH 0155/3378] Use make variables instead of preprocessing --- Makefile.in | 202 ++++++++++++++++++++++++++++------------------------ 1 file changed, 109 insertions(+), 93 deletions(-) diff --git a/Makefile.in b/Makefile.in index 8cd0418c..1c30f328 100644 --- a/Makefile.in +++ b/Makefile.in @@ -17,9 +17,13 @@ INCLUDEDIR := $(DESTDIR)@INCLUDEDIR@ LIBDIR := $(DESTDIR)@LIBDIR@ DATADIR := $(DESTDIR)@DATADIR@ MANDIR := $(DESTDIR)@MANDIR@ +srcroot := @srcroot@ +objroot := @objroot@ +abs_srcroot := @abs_srcroot@ +abs_objroot := @abs_objroot@ # Build parameters. -CPPFLAGS := @CPPFLAGS@ -I@srcroot@include -I@objroot@include +CPPFLAGS := @CPPFLAGS@ -I$(srcroot)include -I$(objroot)include CFLAGS := @CFLAGS@ LDFLAGS := @LDFLAGS@ LIBS := @LIBS@ @@ -31,42 +35,54 @@ else WL_SONAME := soname endif REV := @rev@ -ifeq (macho, @abi@) -TEST_LIBRARY_PATH := DYLD_FALLBACK_LIBRARY_PATH=@objroot@lib +install_suffix := @install_suffix@ +ABI := @abi@ +XSLTPROC := @XSLTPROC@ +AUTOCONF := @AUTOCONF@ +RPATH := @RPATH@ +cfghdrs_in := @cfghdrs_in@ +cfghdrs_out := @cfghdrs_out@ +cfgoutputs_in := @cfgoutputs_in@ +cfgoutputs_out := @cfgoutputs_out@ +enable_autogen := @enable_autogen@ +enable_experimental := @enable_experimental@ + +ifeq (macho, $(ABI)) +TEST_LIBRARY_PATH := DYLD_FALLBACK_LIBRARY_PATH=$(objroot)lib else TEST_LIBRARY_PATH := endif # Lists of files. -BINS := @srcroot@bin/pprof @objroot@bin/jemalloc.sh -CHDRS := @objroot@include/jemalloc/jemalloc@install_suffix@.h \ - @objroot@include/jemalloc/jemalloc_defs@install_suffix@.h -CSRCS := @srcroot@src/jemalloc.c @srcroot@src/arena.c @srcroot@src/atomic.c \ - @srcroot@src/base.c @srcroot@src/bitmap.c @srcroot@src/chunk.c \ - @srcroot@src/chunk_dss.c @srcroot@src/chunk_mmap.c \ - @srcroot@src/ckh.c @srcroot@src/ctl.c @srcroot@src/extent.c \ - @srcroot@src/hash.c @srcroot@src/huge.c @srcroot@src/mb.c \ - @srcroot@src/mutex.c @srcroot@src/prof.c @srcroot@src/quarantine.c \ - @srcroot@src/rtree.c @srcroot@src/stats.c @srcroot@src/tcache.c \ - @srcroot@src/util.c @srcroot@src/tsd.c -ifeq (macho, @abi@) -CSRCS += @srcroot@src/zone.c +BINS := $(srcroot)bin/pprof $(objroot)bin/jemalloc.sh +CHDRS := $(objroot)include/jemalloc/jemalloc$(install_suffix).h \ + $(objroot)include/jemalloc/jemalloc_defs$(install_suffix).h +CSRCS := $(srcroot)src/jemalloc.c $(srcroot)src/arena.c $(srcroot)src/atomic.c \ + $(srcroot)src/base.c $(srcroot)src/bitmap.c $(srcroot)src/chunk.c \ + $(srcroot)src/chunk_dss.c $(srcroot)src/chunk_mmap.c \ + $(srcroot)src/ckh.c $(srcroot)src/ctl.c $(srcroot)src/extent.c \ + $(srcroot)src/hash.c $(srcroot)src/huge.c $(srcroot)src/mb.c \ + $(srcroot)src/mutex.c $(srcroot)src/prof.c $(srcroot)src/quarantine.c \ + $(srcroot)src/rtree.c $(srcroot)src/stats.c $(srcroot)src/tcache.c \ + $(srcroot)src/util.c $(srcroot)src/tsd.c +ifeq (macho, $(ABI)) +CSRCS += $(srcroot)src/zone.c endif -STATIC_LIBS := @objroot@lib/libjemalloc@install_suffix@.a -DSOS := @objroot@lib/libjemalloc@install_suffix@.$(SO).$(REV) \ - @objroot@lib/libjemalloc@install_suffix@.$(SO) \ - @objroot@lib/libjemalloc@install_suffix@_pic.a -MAN3 := @objroot@doc/jemalloc@install_suffix@.3 -DOCS_XML := @objroot@doc/jemalloc@install_suffix@.xml -DOCS_HTML := $(DOCS_XML:@objroot@%.xml=@srcroot@%.html) -DOCS_MAN3 := $(DOCS_XML:@objroot@%.xml=@srcroot@%.3) +STATIC_LIBS := $(objroot)lib/libjemalloc$(install_suffix).a +DSOS := $(objroot)lib/libjemalloc$(install_suffix).$(SO).$(REV) \ + $(objroot)lib/libjemalloc$(install_suffix).$(SO) \ + $(objroot)lib/libjemalloc$(install_suffix)_pic.a +MAN3 := $(objroot)doc/jemalloc$(install_suffix).3 +DOCS_XML := $(objroot)doc/jemalloc$(install_suffix).xml +DOCS_HTML := $(DOCS_XML:$(objroot)%.xml=$(srcroot)%.html) +DOCS_MAN3 := $(DOCS_XML:$(objroot)%.xml=$(srcroot)%.3) DOCS := $(DOCS_HTML) $(DOCS_MAN3) -CTESTS := @srcroot@test/aligned_alloc.c @srcroot@test/allocated.c \ - @srcroot@test/bitmap.c @srcroot@test/mremap.c \ - @srcroot@test/posix_memalign.c @srcroot@test/thread_arena.c \ - @srcroot@test/thread_tcache_enabled.c -ifeq (@enable_experimental@, 1) -CTESTS += @srcroot@test/allocm.c @srcroot@test/rallocm.c +CTESTS := $(srcroot)test/aligned_alloc.c $(srcroot)test/allocated.c \ + $(srcroot)test/bitmap.c $(srcroot)test/mremap.c \ + $(srcroot)test/posix_memalign.c $(srcroot)test/thread_arena.c \ + $(srcroot)test/thread_tcache_enabled.c +ifeq ($(enable_experimental), 1) +CTESTS += $(srcroot)test/allocm.c $(srcroot)test/rallocm.c endif .PHONY: all dist doc_html doc_man doc @@ -74,18 +90,18 @@ endif .PHONY: install_html install_man install_doc install .PHONY: tests check clean distclean relclean -.SECONDARY : $(CTESTS:@srcroot@%.c=@objroot@%.o) +.SECONDARY : $(CTESTS:$(srcroot)%.c=$(objroot)%.o) # Default target. all: build dist: build_doc -@srcroot@doc/%.html : @objroot@doc/%.xml @srcroot@doc/stylesheet.xsl @objroot@doc/html.xsl - @XSLTPROC@ -o $@ @objroot@doc/html.xsl $< +$(srcroot)doc/%.html : $(objroot)doc/%.xml $(srcroot)doc/stylesheet.xsl $(objroot)doc/html.xsl + $(XSLTPROC) -o $@ $(objroot)doc/html.xsl $< -@srcroot@doc/%.3 : @objroot@doc/%.xml @srcroot@doc/stylesheet.xsl @objroot@doc/manpages.xsl - @XSLTPROC@ -o $@ @objroot@doc/manpages.xsl $< +$(srcroot)doc/%.3 : $(objroot)doc/%.xml $(srcroot)doc/stylesheet.xsl $(objroot)doc/manpages.xsl + $(XSLTPROC) -o $@ $(objroot)doc/manpages.xsl $< build_doc_html: $(DOCS_HTML) build_doc_man: $(DOCS_MAN3) @@ -94,16 +110,16 @@ build_doc: $(DOCS) # # Include generated dependency files. # --include $(CSRCS:@srcroot@%.c=@objroot@%.d) --include $(CSRCS:@srcroot@%.c=@objroot@%.pic.d) --include $(CTESTS:@srcroot@%.c=@objroot@%.d) +-include $(CSRCS:$(srcroot)%.c=$(objroot)%.d) +-include $(CSRCS:$(srcroot)%.c=$(objroot)%.pic.d) +-include $(CTESTS:$(srcroot)%.c=$(objroot)%.d) -@objroot@src/%.o: @srcroot@src/%.c +$(objroot)src/%.o: $(srcroot)src/%.c @mkdir -p $(@D) $(CC) $(CFLAGS) -c $(CPPFLAGS) -o $@ $< @$(CC) -MM $(CPPFLAGS) -MT $@ -o $(@:%.o=%.d) $< -@objroot@src/%.pic.o: @srcroot@src/%.c +$(objroot)src/%.pic.o: $(srcroot)src/%.c @mkdir -p $(@D) $(CC) $(CFLAGS) -fPIC -DPIC -c $(CPPFLAGS) -o $@ $< @$(CC) -MM $(CPPFLAGS) -MT $@ -o $(@:%.o=%.d) $< @@ -112,30 +128,30 @@ build_doc: $(DOCS) @mkdir -p $(@D) ln -sf $( @objroot@$${t}.out 2>&1; \ - if test -e "@srcroot@$${t}.exp"; then \ - diff -w -u @srcroot@$${t}.exp \ - @objroot@$${t}.out >/dev/null 2>&1; \ + $(TEST_LIBRARY_PATH) $${t} $(abs_srcroot) $(abs_objroot) \ + > $(objroot)$${t}.out 2>&1; \ + if test -e "$(srcroot)$${t}.exp"; then \ + diff -w -u $(srcroot)$${t}.exp \ + $(objroot)$${t}.out >/dev/null 2>&1; \ fail=$$?; \ if test "$${fail}" -eq "1" ; then \ failures=`expr $${failures} + 1`; \ @@ -217,49 +233,49 @@ check: tests echo "Failures: $${failures}/$${total}"' clean: - rm -f $(CSRCS:@srcroot@%.c=@objroot@%.o) - rm -f $(CSRCS:@srcroot@%.c=@objroot@%.pic.o) - rm -f $(CSRCS:@srcroot@%.c=@objroot@%.d) - rm -f $(CSRCS:@srcroot@%.c=@objroot@%.pic.d) - rm -f $(CTESTS:@srcroot@%.c=@objroot@%) - rm -f $(CTESTS:@srcroot@%.c=@objroot@%.o) - rm -f $(CTESTS:@srcroot@%.c=@objroot@%.d) - rm -f $(CTESTS:@srcroot@%.c=@objroot@%.out) + rm -f $(CSRCS:$(srcroot)%.c=$(objroot)%.o) + rm -f $(CSRCS:$(srcroot)%.c=$(objroot)%.pic.o) + rm -f $(CSRCS:$(srcroot)%.c=$(objroot)%.d) + rm -f $(CSRCS:$(srcroot)%.c=$(objroot)%.pic.d) + rm -f $(CTESTS:$(srcroot)%.c=$(objroot)%) + rm -f $(CTESTS:$(srcroot)%.c=$(objroot)%.o) + rm -f $(CTESTS:$(srcroot)%.c=$(objroot)%.d) + rm -f $(CTESTS:$(srcroot)%.c=$(objroot)%.out) rm -f $(DSOS) $(STATIC_LIBS) distclean: clean - rm -rf @objroot@autom4te.cache - rm -f @objroot@config.log - rm -f @objroot@config.status - rm -f @objroot@config.stamp - rm -f @cfghdrs_out@ - rm -f @cfgoutputs_out@ + rm -rf $(objroot)autom4te.cache + rm -f $(objroot)config.log + rm -f $(objroot)config.status + rm -f $(objroot)config.stamp + rm -f $(cfghdrs_out) + rm -f $(cfgoutputs_out) relclean: distclean - rm -f @objroot@configure - rm -f @srcroot@VERSION + rm -f $(objroot)configure + rm -f $(srcroot)VERSION rm -f $(DOCS_HTML) rm -f $(DOCS_MAN3) #=============================================================================== # Re-configuration rules. -ifeq (@enable_autogen@, 1) -@srcroot@configure : @srcroot@configure.ac - cd ./@srcroot@ && @AUTOCONF@ +ifeq ($(enable_autogen), 1) +$(srcroot)configure : $(srcroot)configure.ac + cd ./$(srcroot) && $(AUTOCONF) -@objroot@config.status : @srcroot@configure - ./@objroot@config.status --recheck +$(objroot)config.status : $(srcroot)configure + ./$(objroot)config.status --recheck -@srcroot@config.stamp.in : @srcroot@configure.ac - echo stamp > @srcroot@config.stamp.in +$(srcroot)config.stamp.in : $(srcroot)configure.ac + echo stamp > $(srcroot)config.stamp.in -@objroot@config.stamp : @cfgoutputs_in@ @cfghdrs_in@ @srcroot@configure - ./@objroot@config.status +$(objroot)config.stamp : $(cfgoutputs_in) $(cfghdrs_in) $(srcroot)configure + ./$(objroot)config.status @touch $@ # There must be some action in order for make to re-read Makefile when it is # out of date. -@cfgoutputs_out@ @cfghdrs_out@ : @objroot@config.stamp +$(cfgoutputs_out) $(cfghdrs_out) : $(objroot)config.stamp @true endif From 72ca7220f21ef32f17d12cfde1bd9732d56fb872 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Mon, 16 Apr 2012 16:30:23 +0200 Subject: [PATCH 0156/3378] Use echo instead of cat in loops in size_classes.sh This avoids fork/exec()ing in loops, as echo is a builtin, and makes size_classes.sh much faster (from > 10s to < 0.2s on mingw on my machine). --- include/jemalloc/internal/size_classes.sh | 32 ++++++++--------------- 1 file changed, 11 insertions(+), 21 deletions(-) diff --git a/include/jemalloc/internal/size_classes.sh b/include/jemalloc/internal/size_classes.sh index 3d236136..29c80c1f 100755 --- a/include/jemalloc/internal/size_classes.sh +++ b/include/jemalloc/internal/size_classes.sh @@ -35,10 +35,8 @@ while [ ${lg_q} -le ${lg_qmax} ] ; do while [ ${lg_t} -le ${lg_q} ] ; do lg_p=${lg_pmin} while [ ${lg_p} -le ${lg_pmax} ] ; do - cat < Date: Mon, 16 Apr 2012 16:30:24 +0200 Subject: [PATCH 0157/3378] Add variables for library prefix, and static library, object and executable suffixes This makes hacking on Makefile easier. --- Makefile.in | 58 +++++++++++++++++++++++++++++----------------------- configure.ac | 8 ++++++++ 2 files changed, 40 insertions(+), 26 deletions(-) diff --git a/Makefile.in b/Makefile.in index 1c30f328..146f57fd 100644 --- a/Makefile.in +++ b/Makefile.in @@ -29,6 +29,10 @@ LDFLAGS := @LDFLAGS@ LIBS := @LIBS@ RPATH_EXTRA := @RPATH_EXTRA@ SO := @so@ +O := @o@ +A := @a@ +EXE := @exe@ +LIB := @lib@ ifeq (macho, @abi@) WL_SONAME := dylib_install_name else @@ -53,6 +57,8 @@ else TEST_LIBRARY_PATH := endif +LIBJEMALLOC := $(LIB)jemalloc$(install_suffix) + # Lists of files. BINS := $(srcroot)bin/pprof $(objroot)bin/jemalloc.sh CHDRS := $(objroot)include/jemalloc/jemalloc$(install_suffix).h \ @@ -68,10 +74,10 @@ CSRCS := $(srcroot)src/jemalloc.c $(srcroot)src/arena.c $(srcroot)src/atomic.c \ ifeq (macho, $(ABI)) CSRCS += $(srcroot)src/zone.c endif -STATIC_LIBS := $(objroot)lib/libjemalloc$(install_suffix).a -DSOS := $(objroot)lib/libjemalloc$(install_suffix).$(SO).$(REV) \ - $(objroot)lib/libjemalloc$(install_suffix).$(SO) \ - $(objroot)lib/libjemalloc$(install_suffix)_pic.a +STATIC_LIBS := $(objroot)lib/$(LIBJEMALLOC).$(A) +DSOS := $(objroot)lib/$(LIBJEMALLOC).$(SO).$(REV) \ + $(objroot)lib/$(LIBJEMALLOC).$(SO) \ + $(objroot)lib/$(LIBJEMALLOC)_pic.$(A) MAN3 := $(objroot)doc/jemalloc$(install_suffix).3 DOCS_XML := $(objroot)doc/jemalloc$(install_suffix).xml DOCS_HTML := $(DOCS_XML:$(objroot)%.xml=$(srcroot)%.html) @@ -90,7 +96,7 @@ endif .PHONY: install_html install_man install_doc install .PHONY: tests check clean distclean relclean -.SECONDARY : $(CTESTS:$(srcroot)%.c=$(objroot)%.o) +.SECONDARY : $(CTESTS:$(srcroot)%.c=$(objroot)%.$(O)) # Default target. all: build @@ -114,39 +120,39 @@ build_doc: $(DOCS) -include $(CSRCS:$(srcroot)%.c=$(objroot)%.pic.d) -include $(CTESTS:$(srcroot)%.c=$(objroot)%.d) -$(objroot)src/%.o: $(srcroot)src/%.c +$(objroot)src/%.$(O): $(srcroot)src/%.c @mkdir -p $(@D) $(CC) $(CFLAGS) -c $(CPPFLAGS) -o $@ $< - @$(CC) -MM $(CPPFLAGS) -MT $@ -o $(@:%.o=%.d) $< + @$(CC) -MM $(CPPFLAGS) -MT $@ -o $(@:%.$(O)=%.d) $< -$(objroot)src/%.pic.o: $(srcroot)src/%.c +$(objroot)src/%.pic.$(O): $(srcroot)src/%.c @mkdir -p $(@D) $(CC) $(CFLAGS) -fPIC -DPIC -c $(CPPFLAGS) -o $@ $< - @$(CC) -MM $(CPPFLAGS) -MT $@ -o $(@:%.o=%.d) $< + @$(CC) -MM $(CPPFLAGS) -MT $@ -o $(@:%.$(O)=%.d) $< %.$(SO) : %.$(SO).$(REV) @mkdir -p $(@D) ln -sf $( $(objroot)$${t}.out 2>&1; \ if test -e "$(srcroot)$${t}.exp"; then \ diff -w -u $(srcroot)$${t}.exp \ @@ -233,12 +239,12 @@ check: tests echo "Failures: $${failures}/$${total}"' clean: - rm -f $(CSRCS:$(srcroot)%.c=$(objroot)%.o) - rm -f $(CSRCS:$(srcroot)%.c=$(objroot)%.pic.o) + rm -f $(CSRCS:$(srcroot)%.c=$(objroot)%.$(O)) + rm -f $(CSRCS:$(srcroot)%.c=$(objroot)%.pic.$(O)) rm -f $(CSRCS:$(srcroot)%.c=$(objroot)%.d) rm -f $(CSRCS:$(srcroot)%.c=$(objroot)%.pic.d) - rm -f $(CTESTS:$(srcroot)%.c=$(objroot)%) - rm -f $(CTESTS:$(srcroot)%.c=$(objroot)%.o) + rm -f $(CTESTS:$(srcroot)%.c=$(objroot)%$(EXE)) + rm -f $(CTESTS:$(srcroot)%.c=$(objroot)%.$(O)) rm -f $(CTESTS:$(srcroot)%.c=$(objroot)%.d) rm -f $(CTESTS:$(srcroot)%.c=$(objroot)%.out) rm -f $(DSOS) $(STATIC_LIBS) diff --git a/configure.ac b/configure.ac index 90235f7b..55efcea0 100644 --- a/configure.ac +++ b/configure.ac @@ -195,6 +195,10 @@ AC_DEFINE_UNQUOTED([CPU_SPINWAIT], [$CPU_SPINWAIT]) LD_PRELOAD_VAR="LD_PRELOAD" so="so" +o="o" +a="a" +exe= +lib="lib" dnl Heap profiling uses the log(3) function. LIBS="$LIBS -lm" @@ -277,6 +281,10 @@ AC_SUBST([abi]) AC_SUBST([RPATH]) AC_SUBST([LD_PRELOAD_VAR]) AC_SUBST([so]) +AC_SUBST([o]) +AC_SUBST([a]) +AC_SUBST([exe]) +AC_SUBST([lib]) JE_COMPILABLE([__attribute__ syntax], [static __attribute__((unused)) void foo(void){}], From fa08da752bf91c146f77fff59b4ed09b42633260 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Mon, 16 Apr 2012 16:30:25 +0200 Subject: [PATCH 0158/3378] Limit the number of flags directly given to the linker, and refactor rpath This will make things easier for MSVC support. --- Makefile.in | 17 +++++------------ configure.ac | 11 +++++------ 2 files changed, 10 insertions(+), 18 deletions(-) diff --git a/Makefile.in b/Makefile.in index 146f57fd..ef8f7175 100644 --- a/Makefile.in +++ b/Makefile.in @@ -33,23 +33,20 @@ O := @o@ A := @a@ EXE := @exe@ LIB := @lib@ -ifeq (macho, @abi@) -WL_SONAME := dylib_install_name -else -WL_SONAME := soname -endif REV := @rev@ install_suffix := @install_suffix@ ABI := @abi@ XSLTPROC := @XSLTPROC@ AUTOCONF := @AUTOCONF@ -RPATH := @RPATH@ +_RPATH = @RPATH@ +RPATH = $(if $(1),$(call _RPATH,$(1))) cfghdrs_in := @cfghdrs_in@ cfghdrs_out := @cfghdrs_out@ cfgoutputs_in := @cfgoutputs_in@ cfgoutputs_out := @cfgoutputs_out@ enable_autogen := @enable_autogen@ enable_experimental := @enable_experimental@ +DSO_LDFLAGS = @DSO_LDFLAGS@ ifeq (macho, $(ABI)) TEST_LIBRARY_PATH := DYLD_FALLBACK_LIBRARY_PATH=$(objroot)lib @@ -136,7 +133,7 @@ $(objroot)src/%.pic.$(O): $(srcroot)src/%.c $(objroot)lib/$(LIBJEMALLOC).$(SO).$(REV) : $(CSRCS:$(srcroot)%.c=$(objroot)%.pic.$(O)) @mkdir -p $(@D) - $(CC) -shared -Wl,-$(WL_SONAME),$(@F) $(RPATH_EXTRA:%=$(RPATH)%) -o $@ $+ $(LDFLAGS) $(LIBS) + $(CC) $(DSO_LDFLAGS) $(call RPATH,$(RPATH_EXTRA)) -o $@ $+ $(LDFLAGS) $(LIBS) $(objroot)lib/$(LIBJEMALLOC)_pic.$(A) : $(CSRCS:$(srcroot)%.c=$(objroot)%.pic.$(O)) @mkdir -p $(@D) @@ -154,11 +151,7 @@ $(objroot)test/%.$(O): $(srcroot)test/%.c $(objroot)test/%$(EXE): $(objroot)test/%.$(O) \ $(objroot)lib/$(LIBJEMALLOC).$(SO) @mkdir -p $(@D) -ifneq ($(RPATH), ) - $(CC) -o $@ $< $(RPATH)$(objroot)lib -L$(objroot)lib -ljemalloc$(install_suffix) $(LIBS) -else - $(CC) -o $@ $< -L$(objroot)lib -ljemalloc$(install_suffix) $(LIBS) -endif + $(CC) -o $@ $< $(call RPATH,$(objroot)lib) -L$(objroot)lib -ljemalloc$(install_suffix) $(LIBS) build_lib_shared: $(DSOS) build_lib_static: $(STATIC_LIBS) diff --git a/configure.ac b/configure.ac index 55efcea0..3a7a2457 100644 --- a/configure.ac +++ b/configure.ac @@ -199,6 +199,8 @@ o="o" a="a" exe= lib="lib" +DSO_LDFLAGS='-shared -Wl,-soname,$(@F)' +RPATH='-Wl,-rpath,$(1)' dnl Heap profiling uses the log(3) function. LIBS="$LIBS -lm" @@ -220,12 +222,12 @@ case "${host}" in LD_PRELOAD_VAR="DYLD_INSERT_LIBRARIES" so="dylib" force_tls="0" + DSO_LDFLAGS='-shared -Wl,-dylib_install_name,$(@F)' ;; *-*-freebsd*) CFLAGS="$CFLAGS" abi="elf" AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ]) - RPATH="-Wl,-rpath," force_lazy_lock="1" ;; *-*-linux*) @@ -234,7 +236,6 @@ case "${host}" in abi="elf" AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED], [ ]) AC_DEFINE([JEMALLOC_THREADED_INIT], [ ]) - RPATH="-Wl,-rpath," default_munmap="0" ;; *-*-netbsd*) @@ -250,12 +251,11 @@ case "${host}" in [abi="aout"]) AC_MSG_RESULT([$abi]) AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ]) - RPATH="-Wl,-rpath," ;; *-*-solaris2*) CFLAGS="$CFLAGS" abi="elf" - RPATH="-Wl,-R," + RPATH='-Wl,-R,$(1)' dnl Solaris needs this for sigwait(). CPPFLAGS="$CPPFLAGS -D_POSIX_PTHREAD_SEMANTICS" LIBS="$LIBS -lposix4 -lsocket -lnsl" @@ -269,12 +269,10 @@ case "${host}" in LD_PRELOAD_VAR="LDR_PRELOAD" fi abi="xcoff" - RPATH="-Wl,-rpath," ;; *) AC_MSG_RESULT([Unsupported operating system: ${host}]) abi="elf" - RPATH="-Wl,-rpath," ;; esac AC_SUBST([abi]) @@ -285,6 +283,7 @@ AC_SUBST([o]) AC_SUBST([a]) AC_SUBST([exe]) AC_SUBST([lib]) +AC_SUBST([DSO_LDFLAGS]) JE_COMPILABLE([__attribute__ syntax], [static __attribute__((unused)) void foo(void){}], From 45f208e112fcb82e0c98d572fc34259d65d6b6c1 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Mon, 16 Apr 2012 16:30:26 +0200 Subject: [PATCH 0159/3378] Replace fprintf with malloc_printf in tests. --- Makefile.in | 6 ++- .../jemalloc/internal/jemalloc_internal.h.in | 3 ++ test/aligned_alloc.c | 18 +++---- test/allocated.c | 18 +++---- test/allocm.c | 54 +++++++++---------- test/bitmap.c | 10 +--- test/jemalloc_test.h.in | 1 + test/mremap.c | 12 ++--- test/posix_memalign.c | 18 +++---- test/rallocm.c | 52 +++++++++--------- test/thread_arena.c | 16 +++--- test/thread_tcache_enabled.c | 8 +-- 12 files changed, 108 insertions(+), 108 deletions(-) diff --git a/Makefile.in b/Makefile.in index ef8f7175..0dfddea0 100644 --- a/Makefile.in +++ b/Makefile.in @@ -148,10 +148,12 @@ $(objroot)test/%.$(O): $(srcroot)test/%.c $(CC) $(CFLAGS) -c $(CPPFLAGS) -I$(objroot)test -o $@ $< @$(CC) -MM $(CPPFLAGS) -I$(objroot)test -MT $@ -o $(@:%.$(O)=%.d) $< -$(objroot)test/%$(EXE): $(objroot)test/%.$(O) \ +$(objroot)test/bitmap$(EXE): $(objroot)src/bitmap.$(O) + +$(objroot)test/%$(EXE): $(objroot)test/%.$(O) $(objroot)src/util.$(O) \ $(objroot)lib/$(LIBJEMALLOC).$(SO) @mkdir -p $(@D) - $(CC) -o $@ $< $(call RPATH,$(objroot)lib) -L$(objroot)lib -ljemalloc$(install_suffix) $(LIBS) + $(CC) -o $@ $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) -L$(objroot)lib -ljemalloc$(install_suffix) $(LIBS) build_lib_shared: $(DSOS) build_lib_static: $(STATIC_LIBS) diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index aa21aa5d..51d40fb8 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -1,3 +1,5 @@ +#ifndef JEMALLOC_INTERNAL_H +#define JEMALLOC_INTERNAL_H #include #include #include @@ -868,3 +870,4 @@ malloc_tsd_funcs(JEMALLOC_INLINE, thread_allocated, thread_allocated_t, #undef JEMALLOC_H_INLINES /******************************************************************************/ +#endif /* JEMALLOC_INTERNAL_H */ diff --git a/test/aligned_alloc.c b/test/aligned_alloc.c index 2a95604f..81b8f933 100644 --- a/test/aligned_alloc.c +++ b/test/aligned_alloc.c @@ -20,14 +20,14 @@ main(void) unsigned i; void *p, *ps[NITER]; - fprintf(stderr, "Test begin\n"); + malloc_printf("Test begin\n"); /* Test error conditions. */ alignment = 0; errno = 0; p = aligned_alloc(alignment, 1); if (p != NULL || errno != EINVAL) { - fprintf(stderr, + malloc_printf( "Expected error for invalid alignment %zu\n", alignment); } @@ -36,7 +36,7 @@ main(void) errno = 0; p = aligned_alloc(alignment + 1, 1); if (p != NULL || errno != EINVAL) { - fprintf(stderr, + malloc_printf( "Expected error for invalid alignment %zu\n", alignment + 1); } @@ -52,7 +52,7 @@ main(void) errno = 0; p = aligned_alloc(alignment, size); if (p != NULL || errno != ENOMEM) { - fprintf(stderr, + malloc_printf( "Expected error for aligned_alloc(%zu, %zu)\n", alignment, size); } @@ -67,7 +67,7 @@ main(void) errno = 0; p = aligned_alloc(alignment, size); if (p != NULL || errno != ENOMEM) { - fprintf(stderr, + malloc_printf( "Expected error for aligned_alloc(%zu, %zu)\n", alignment, size); } @@ -81,7 +81,7 @@ main(void) errno = 0; p = aligned_alloc(alignment, size); if (p != NULL || errno != ENOMEM) { - fprintf(stderr, + malloc_printf( "Expected error for aligned_alloc(&p, %zu, %zu)\n", alignment, size); } @@ -93,14 +93,14 @@ main(void) alignment <= MAXALIGN; alignment <<= 1) { total = 0; - fprintf(stderr, "Alignment: %zu\n", alignment); + malloc_printf("Alignment: %zu\n", alignment); for (size = 1; size < 3 * alignment && size < (1U << 31); size += (alignment >> (LG_SIZEOF_PTR-1)) - 1) { for (i = 0; i < NITER; i++) { ps[i] = aligned_alloc(alignment, size); if (ps[i] == NULL) { - fprintf(stderr, + malloc_printf( "Error for size %zu (%#zx): %s\n", size, size, strerror(errno)); exit(1); @@ -118,6 +118,6 @@ main(void) } } - fprintf(stderr, "Test end\n"); + malloc_printf("Test end\n"); return (0); } diff --git a/test/allocated.c b/test/allocated.c index 921ab3ae..81cd4ca9 100644 --- a/test/allocated.c +++ b/test/allocated.c @@ -27,7 +27,7 @@ thread_start(void *arg) #endif goto label_return; } - fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__, + malloc_printf("%s(): Error in mallctl(): %s\n", __func__, strerror(err)); exit(1); } @@ -39,7 +39,7 @@ thread_start(void *arg) #endif goto label_return; } - fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__, + malloc_printf("%s(): Error in mallctl(): %s\n", __func__, strerror(err)); exit(1); } @@ -53,7 +53,7 @@ thread_start(void *arg) #endif goto label_return; } - fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__, + malloc_printf("%s(): Error in mallctl(): %s\n", __func__, strerror(err)); exit(1); } @@ -65,7 +65,7 @@ thread_start(void *arg) #endif goto label_return; } - fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__, + malloc_printf("%s(): Error in mallctl(): %s\n", __func__, strerror(err)); exit(1); } @@ -73,7 +73,7 @@ thread_start(void *arg) p = malloc(1); if (p == NULL) { - fprintf(stderr, "%s(): Error in malloc()\n", __func__); + malloc_printf("%s(): Error in malloc()\n", __func__); exit(1); } @@ -108,13 +108,13 @@ main(void) int ret = 0; pthread_t thread; - fprintf(stderr, "Test begin\n"); + malloc_printf("Test begin\n"); thread_start(NULL); if (pthread_create(&thread, NULL, thread_start, NULL) != 0) { - fprintf(stderr, "%s(): Error in pthread_create()\n", __func__); + malloc_printf("%s(): Error in pthread_create()\n", __func__); ret = 1; goto label_return; } @@ -124,7 +124,7 @@ main(void) if (pthread_create(&thread, NULL, thread_start, NULL) != 0) { - fprintf(stderr, "%s(): Error in pthread_create()\n", __func__); + malloc_printf("%s(): Error in pthread_create()\n", __func__); ret = 1; goto label_return; } @@ -133,6 +133,6 @@ main(void) thread_start(NULL); label_return: - fprintf(stderr, "Test end\n"); + malloc_printf("Test end\n"); return (ret); } diff --git a/test/allocm.c b/test/allocm.c index 3aa0fd23..c6bc6f83 100644 --- a/test/allocm.c +++ b/test/allocm.c @@ -19,52 +19,52 @@ main(void) unsigned i; void *ps[NITER]; - fprintf(stderr, "Test begin\n"); + malloc_printf("Test begin\n"); sz = 42; nsz = 0; r = nallocm(&nsz, sz, 0); if (r != ALLOCM_SUCCESS) { - fprintf(stderr, "Unexpected nallocm() error\n"); + malloc_printf("Unexpected nallocm() error\n"); abort(); } rsz = 0; r = allocm(&p, &rsz, sz, 0); if (r != ALLOCM_SUCCESS) { - fprintf(stderr, "Unexpected allocm() error\n"); + malloc_printf("Unexpected allocm() error\n"); abort(); } if (rsz < sz) - fprintf(stderr, "Real size smaller than expected\n"); + malloc_printf("Real size smaller than expected\n"); if (nsz != rsz) - fprintf(stderr, "nallocm()/allocm() rsize mismatch\n"); + malloc_printf("nallocm()/allocm() rsize mismatch\n"); if (dallocm(p, 0) != ALLOCM_SUCCESS) - fprintf(stderr, "Unexpected dallocm() error\n"); + malloc_printf("Unexpected dallocm() error\n"); r = allocm(&p, NULL, sz, 0); if (r != ALLOCM_SUCCESS) { - fprintf(stderr, "Unexpected allocm() error\n"); + malloc_printf("Unexpected allocm() error\n"); abort(); } if (dallocm(p, 0) != ALLOCM_SUCCESS) - fprintf(stderr, "Unexpected dallocm() error\n"); + malloc_printf("Unexpected dallocm() error\n"); nsz = 0; r = nallocm(&nsz, sz, ALLOCM_ZERO); if (r != ALLOCM_SUCCESS) { - fprintf(stderr, "Unexpected nallocm() error\n"); + malloc_printf("Unexpected nallocm() error\n"); abort(); } rsz = 0; r = allocm(&p, &rsz, sz, ALLOCM_ZERO); if (r != ALLOCM_SUCCESS) { - fprintf(stderr, "Unexpected allocm() error\n"); + malloc_printf("Unexpected allocm() error\n"); abort(); } if (nsz != rsz) - fprintf(stderr, "nallocm()/allocm() rsize mismatch\n"); + malloc_printf("nallocm()/allocm() rsize mismatch\n"); if (dallocm(p, 0) != ALLOCM_SUCCESS) - fprintf(stderr, "Unexpected dallocm() error\n"); + malloc_printf("Unexpected dallocm() error\n"); #if LG_SIZEOF_PTR == 3 alignment = UINT64_C(0x8000000000000000); @@ -76,19 +76,19 @@ main(void) nsz = 0; r = nallocm(&nsz, sz, ALLOCM_ALIGN(alignment)); if (r == ALLOCM_SUCCESS) { - fprintf(stderr, + malloc_printf( "Expected error for nallocm(&nsz, %zu, %#x)\n", sz, ALLOCM_ALIGN(alignment)); } rsz = 0; r = allocm(&p, &rsz, sz, ALLOCM_ALIGN(alignment)); if (r == ALLOCM_SUCCESS) { - fprintf(stderr, + malloc_printf( "Expected error for allocm(&p, %zu, %#x)\n", sz, ALLOCM_ALIGN(alignment)); } if (nsz != rsz) - fprintf(stderr, "nallocm()/allocm() rsize mismatch\n"); + malloc_printf("nallocm()/allocm() rsize mismatch\n"); #if LG_SIZEOF_PTR == 3 alignment = UINT64_C(0x4000000000000000); @@ -100,11 +100,11 @@ main(void) nsz = 0; r = nallocm(&nsz, sz, ALLOCM_ALIGN(alignment)); if (r != ALLOCM_SUCCESS) - fprintf(stderr, "Unexpected nallocm() error\n"); + malloc_printf("Unexpected nallocm() error\n"); rsz = 0; r = allocm(&p, &rsz, sz, ALLOCM_ALIGN(alignment)); if (r == ALLOCM_SUCCESS) { - fprintf(stderr, + malloc_printf( "Expected error for allocm(&p, %zu, %#x)\n", sz, ALLOCM_ALIGN(alignment)); } @@ -118,19 +118,19 @@ main(void) nsz = 0; r = nallocm(&nsz, sz, ALLOCM_ALIGN(alignment)); if (r == ALLOCM_SUCCESS) { - fprintf(stderr, + malloc_printf( "Expected error for nallocm(&nsz, %zu, %#x)\n", sz, ALLOCM_ALIGN(alignment)); } rsz = 0; r = allocm(&p, &rsz, sz, ALLOCM_ALIGN(alignment)); if (r == ALLOCM_SUCCESS) { - fprintf(stderr, + malloc_printf( "Expected error for allocm(&p, %zu, %#x)\n", sz, ALLOCM_ALIGN(alignment)); } if (nsz != rsz) - fprintf(stderr, "nallocm()/allocm() rsize mismatch\n"); + malloc_printf("nallocm()/allocm() rsize mismatch\n"); for (i = 0; i < NITER; i++) ps[i] = NULL; @@ -139,7 +139,7 @@ main(void) alignment <= MAXALIGN; alignment <<= 1) { total = 0; - fprintf(stderr, "Alignment: %zu\n", alignment); + malloc_printf("Alignment: %zu\n", alignment); for (sz = 1; sz < 3 * alignment && sz < (1U << 31); sz += (alignment >> (LG_SIZEOF_PTR-1)) - 1) { @@ -148,7 +148,7 @@ main(void) r = nallocm(&nsz, sz, ALLOCM_ALIGN(alignment) | ALLOCM_ZERO); if (r != ALLOCM_SUCCESS) { - fprintf(stderr, + malloc_printf( "nallocm() error for size %zu" " (%#zx): %d\n", sz, sz, r); @@ -158,24 +158,24 @@ main(void) r = allocm(&ps[i], &rsz, sz, ALLOCM_ALIGN(alignment) | ALLOCM_ZERO); if (r != ALLOCM_SUCCESS) { - fprintf(stderr, + malloc_printf( "allocm() error for size %zu" " (%#zx): %d\n", sz, sz, r); exit(1); } if (rsz < sz) { - fprintf(stderr, + malloc_printf( "Real size smaller than" " expected\n"); } if (nsz != rsz) { - fprintf(stderr, + malloc_printf( "nallocm()/allocm() rsize" " mismatch\n"); } if ((uintptr_t)p & (alignment-1)) { - fprintf(stderr, + malloc_printf( "%p inadequately aligned for" " alignment: %zu\n", p, alignment); } @@ -193,6 +193,6 @@ main(void) } } - fprintf(stderr, "Test end\n"); + malloc_printf("Test end\n"); return (0); } diff --git a/test/bitmap.c b/test/bitmap.c index adfaacfe..ee9b1ecc 100644 --- a/test/bitmap.c +++ b/test/bitmap.c @@ -7,12 +7,6 @@ * */ #include -/* - * Directly include the bitmap code, since it isn't exposed outside - * libjemalloc. - */ -#include "../src/bitmap.c" - #if (LG_BITMAP_MAXBITS > 12) # define MAXBITS 4500 #else @@ -144,7 +138,7 @@ test_bitmap_sfu(void) int main(void) { - fprintf(stderr, "Test begin\n"); + malloc_printf("Test begin\n"); test_bitmap_size(); test_bitmap_init(); @@ -152,6 +146,6 @@ main(void) test_bitmap_unset(); test_bitmap_sfu(); - fprintf(stderr, "Test end\n"); + malloc_printf("Test end\n"); return (0); } diff --git a/test/jemalloc_test.h.in b/test/jemalloc_test.h.in index 0c48895e..58fa08e4 100644 --- a/test/jemalloc_test.h.in +++ b/test/jemalloc_test.h.in @@ -4,3 +4,4 @@ * have a different name. */ #include "jemalloc/jemalloc@install_suffix@.h" +#include "jemalloc/internal/jemalloc_internal.h" diff --git a/test/mremap.c b/test/mremap.c index cac3bd82..84c03491 100644 --- a/test/mremap.c +++ b/test/mremap.c @@ -14,12 +14,12 @@ main(void) size_t sz, lg_chunk, chunksize, i; char *p, *q; - fprintf(stderr, "Test begin\n"); + malloc_printf("Test begin\n"); sz = sizeof(lg_chunk); if ((err = mallctl("opt.lg_chunk", &lg_chunk, &sz, NULL, 0))) { assert(err != ENOENT); - fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__, + malloc_printf("%s(): Error in mallctl(): %s\n", __func__, strerror(err)); ret = 1; goto label_return; @@ -28,7 +28,7 @@ main(void) p = (char *)malloc(chunksize); if (p == NULL) { - fprintf(stderr, "malloc(%zu) --> %p\n", chunksize, p); + malloc_printf("malloc(%zu) --> %p\n", chunksize, p); ret = 1; goto label_return; } @@ -36,7 +36,7 @@ main(void) q = (char *)realloc(p, chunksize * 2); if (q == NULL) { - fprintf(stderr, "realloc(%p, %zu) --> %p\n", p, chunksize * 2, + malloc_printf("realloc(%p, %zu) --> %p\n", p, chunksize * 2, q); ret = 1; goto label_return; @@ -49,7 +49,7 @@ main(void) q = (char *)realloc(p, chunksize); if (q == NULL) { - fprintf(stderr, "realloc(%p, %zu) --> %p\n", p, chunksize, q); + malloc_printf("realloc(%p, %zu) --> %p\n", p, chunksize, q); ret = 1; goto label_return; } @@ -61,6 +61,6 @@ main(void) ret = 0; label_return: - fprintf(stderr, "Test end\n"); + malloc_printf("Test end\n"); return (ret); } diff --git a/test/posix_memalign.c b/test/posix_memalign.c index 0ea35c89..e1302df2 100644 --- a/test/posix_memalign.c +++ b/test/posix_memalign.c @@ -21,13 +21,13 @@ main(void) int err; void *p, *ps[NITER]; - fprintf(stderr, "Test begin\n"); + malloc_printf("Test begin\n"); /* Test error conditions. */ for (alignment = 0; alignment < sizeof(void *); alignment++) { err = posix_memalign(&p, alignment, 1); if (err != EINVAL) { - fprintf(stderr, + malloc_printf( "Expected error for invalid alignment %zu\n", alignment); } @@ -37,7 +37,7 @@ main(void) alignment <<= 1) { err = posix_memalign(&p, alignment + 1, 1); if (err == 0) { - fprintf(stderr, + malloc_printf( "Expected error for invalid alignment %zu\n", alignment + 1); } @@ -52,7 +52,7 @@ main(void) #endif err = posix_memalign(&p, alignment, size); if (err == 0) { - fprintf(stderr, + malloc_printf( "Expected error for posix_memalign(&p, %zu, %zu)\n", alignment, size); } @@ -66,7 +66,7 @@ main(void) #endif err = posix_memalign(&p, alignment, size); if (err == 0) { - fprintf(stderr, + malloc_printf( "Expected error for posix_memalign(&p, %zu, %zu)\n", alignment, size); } @@ -79,7 +79,7 @@ main(void) #endif err = posix_memalign(&p, alignment, size); if (err == 0) { - fprintf(stderr, + malloc_printf( "Expected error for posix_memalign(&p, %zu, %zu)\n", alignment, size); } @@ -91,7 +91,7 @@ main(void) alignment <= MAXALIGN; alignment <<= 1) { total = 0; - fprintf(stderr, "Alignment: %zu\n", alignment); + malloc_printf("Alignment: %zu\n", alignment); for (size = 1; size < 3 * alignment && size < (1U << 31); size += (alignment >> (LG_SIZEOF_PTR-1)) - 1) { @@ -99,7 +99,7 @@ main(void) err = posix_memalign(&ps[i], alignment, size); if (err) { - fprintf(stderr, + malloc_printf( "Error for size %zu (%#zx): %s\n", size, size, strerror(err)); exit(1); @@ -117,6 +117,6 @@ main(void) } } - fprintf(stderr, "Test end\n"); + malloc_printf("Test end\n"); return (0); } diff --git a/test/rallocm.c b/test/rallocm.c index 9c0df403..18db5eec 100644 --- a/test/rallocm.c +++ b/test/rallocm.c @@ -15,7 +15,7 @@ main(void) size_t sz, tsz; int r; - fprintf(stderr, "Test begin\n"); + malloc_printf("Test begin\n"); /* Get page size. */ { @@ -26,51 +26,51 @@ main(void) r = allocm(&p, &sz, 42, 0); if (r != ALLOCM_SUCCESS) { - fprintf(stderr, "Unexpected allocm() error\n"); + malloc_printf("Unexpected allocm() error\n"); abort(); } q = p; r = rallocm(&q, &tsz, sz, 0, ALLOCM_NO_MOVE); if (r != ALLOCM_SUCCESS) - fprintf(stderr, "Unexpected rallocm() error\n"); + malloc_printf("Unexpected rallocm() error\n"); if (q != p) - fprintf(stderr, "Unexpected object move\n"); + malloc_printf("Unexpected object move\n"); if (tsz != sz) { - fprintf(stderr, "Unexpected size change: %zu --> %zu\n", + malloc_printf("Unexpected size change: %zu --> %zu\n", sz, tsz); } q = p; r = rallocm(&q, &tsz, sz, 5, ALLOCM_NO_MOVE); if (r != ALLOCM_SUCCESS) - fprintf(stderr, "Unexpected rallocm() error\n"); + malloc_printf("Unexpected rallocm() error\n"); if (q != p) - fprintf(stderr, "Unexpected object move\n"); + malloc_printf("Unexpected object move\n"); if (tsz != sz) { - fprintf(stderr, "Unexpected size change: %zu --> %zu\n", + malloc_printf("Unexpected size change: %zu --> %zu\n", sz, tsz); } q = p; r = rallocm(&q, &tsz, sz + 5, 0, ALLOCM_NO_MOVE); if (r != ALLOCM_ERR_NOT_MOVED) - fprintf(stderr, "Unexpected rallocm() result\n"); + malloc_printf("Unexpected rallocm() result\n"); if (q != p) - fprintf(stderr, "Unexpected object move\n"); + malloc_printf("Unexpected object move\n"); if (tsz != sz) { - fprintf(stderr, "Unexpected size change: %zu --> %zu\n", + malloc_printf("Unexpected size change: %zu --> %zu\n", sz, tsz); } q = p; r = rallocm(&q, &tsz, sz + 5, 0, 0); if (r != ALLOCM_SUCCESS) - fprintf(stderr, "Unexpected rallocm() error\n"); + malloc_printf("Unexpected rallocm() error\n"); if (q == p) - fprintf(stderr, "Expected object move\n"); + malloc_printf("Expected object move\n"); if (tsz == sz) { - fprintf(stderr, "Expected size change: %zu --> %zu\n", + malloc_printf("Expected size change: %zu --> %zu\n", sz, tsz); } p = q; @@ -78,11 +78,11 @@ main(void) r = rallocm(&q, &tsz, pagesize*2, 0, 0); if (r != ALLOCM_SUCCESS) - fprintf(stderr, "Unexpected rallocm() error\n"); + malloc_printf("Unexpected rallocm() error\n"); if (q == p) - fprintf(stderr, "Expected object move\n"); + malloc_printf("Expected object move\n"); if (tsz == sz) { - fprintf(stderr, "Expected size change: %zu --> %zu\n", + malloc_printf("Expected size change: %zu --> %zu\n", sz, tsz); } p = q; @@ -90,9 +90,9 @@ main(void) r = rallocm(&q, &tsz, pagesize*4, 0, 0); if (r != ALLOCM_SUCCESS) - fprintf(stderr, "Unexpected rallocm() error\n"); + malloc_printf("Unexpected rallocm() error\n"); if (tsz == sz) { - fprintf(stderr, "Expected size change: %zu --> %zu\n", + malloc_printf("Expected size change: %zu --> %zu\n", sz, tsz); } p = q; @@ -100,28 +100,28 @@ main(void) r = rallocm(&q, &tsz, pagesize*2, 0, ALLOCM_NO_MOVE); if (r != ALLOCM_SUCCESS) - fprintf(stderr, "Unexpected rallocm() error\n"); + malloc_printf("Unexpected rallocm() error\n"); if (q != p) - fprintf(stderr, "Unexpected object move\n"); + malloc_printf("Unexpected object move\n"); if (tsz == sz) { - fprintf(stderr, "Expected size change: %zu --> %zu\n", + malloc_printf("Expected size change: %zu --> %zu\n", sz, tsz); } sz = tsz; r = rallocm(&q, &tsz, pagesize*4, 0, ALLOCM_NO_MOVE); if (r != ALLOCM_SUCCESS) - fprintf(stderr, "Unexpected rallocm() error\n"); + malloc_printf("Unexpected rallocm() error\n"); if (q != p) - fprintf(stderr, "Unexpected object move\n"); + malloc_printf("Unexpected object move\n"); if (tsz == sz) { - fprintf(stderr, "Expected size change: %zu --> %zu\n", + malloc_printf("Expected size change: %zu --> %zu\n", sz, tsz); } sz = tsz; dallocm(p, 0); - fprintf(stderr, "Test end\n"); + malloc_printf("Test end\n"); return (0); } diff --git a/test/thread_arena.c b/test/thread_arena.c index 9b7b2ddc..e443b712 100644 --- a/test/thread_arena.c +++ b/test/thread_arena.c @@ -20,14 +20,14 @@ thread_start(void *arg) p = malloc(1); if (p == NULL) { - fprintf(stderr, "%s(): Error in malloc()\n", __func__); + malloc_printf("%s(): Error in malloc()\n", __func__); return (void *)1; } size = sizeof(arena_ind); if ((err = mallctl("thread.arena", &arena_ind, &size, &main_arena_ind, sizeof(main_arena_ind)))) { - fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__, + malloc_printf("%s(): Error in mallctl(): %s\n", __func__, strerror(err)); return (void *)1; } @@ -35,7 +35,7 @@ thread_start(void *arg) size = sizeof(arena_ind); if ((err = mallctl("thread.arena", &arena_ind, &size, NULL, 0))) { - fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__, + malloc_printf("%s(): Error in mallctl(): %s\n", __func__, strerror(err)); return (void *)1; } @@ -55,18 +55,18 @@ main(void) pthread_t threads[NTHREADS]; unsigned i; - fprintf(stderr, "Test begin\n"); + malloc_printf("Test begin\n"); p = malloc(1); if (p == NULL) { - fprintf(stderr, "%s(): Error in malloc()\n", __func__); + malloc_printf("%s(): Error in malloc()\n", __func__); ret = 1; goto label_return; } size = sizeof(arena_ind); if ((err = mallctl("thread.arena", &arena_ind, &size, NULL, 0))) { - fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__, + malloc_printf("%s(): Error in mallctl(): %s\n", __func__, strerror(err)); ret = 1; goto label_return; @@ -75,7 +75,7 @@ main(void) for (i = 0; i < NTHREADS; i++) { if (pthread_create(&threads[i], NULL, thread_start, (void *)&arena_ind) != 0) { - fprintf(stderr, "%s(): Error in pthread_create()\n", + malloc_printf("%s(): Error in pthread_create()\n", __func__); ret = 1; goto label_return; @@ -86,6 +86,6 @@ main(void) pthread_join(threads[i], (void *)&ret); label_return: - fprintf(stderr, "Test end\n"); + malloc_printf("Test end\n"); return (ret); } diff --git a/test/thread_tcache_enabled.c b/test/thread_tcache_enabled.c index 0a3e45a9..59b76a27 100644 --- a/test/thread_tcache_enabled.c +++ b/test/thread_tcache_enabled.c @@ -79,13 +79,13 @@ main(void) int ret = 0; pthread_t thread; - fprintf(stderr, "Test begin\n"); + malloc_printf("Test begin\n"); thread_start(NULL); if (pthread_create(&thread, NULL, thread_start, NULL) != 0) { - fprintf(stderr, "%s(): Error in pthread_create()\n", __func__); + malloc_printf("%s(): Error in pthread_create()\n", __func__); ret = 1; goto label_return; } @@ -95,7 +95,7 @@ main(void) if (pthread_create(&thread, NULL, thread_start, NULL) != 0) { - fprintf(stderr, "%s(): Error in pthread_create()\n", __func__); + malloc_printf("%s(): Error in pthread_create()\n", __func__); ret = 1; goto label_return; } @@ -104,6 +104,6 @@ main(void) thread_start(NULL); label_return: - fprintf(stderr, "Test end\n"); + malloc_printf("Test end\n"); return (ret); } From b57d3ec571c6551231be62b7bf92c084a8c8291c Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 17 Apr 2012 13:17:54 -0700 Subject: [PATCH 0160/3378] Add atomic(9) implementations of atomic operations. Add atomic(9) implementations of atomic operations. These are used on FreeBSD for non-x86 architectures. --- configure.ac | 25 +++++++++- include/jemalloc/internal/atomic.h | 46 +++++++++++++++++-- .../jemalloc/internal/jemalloc_internal.h.in | 4 ++ include/jemalloc/jemalloc_defs.h.in | 3 ++ 4 files changed, 72 insertions(+), 6 deletions(-) diff --git a/configure.ac b/configure.ac index 3a7a2457..f4c25060 100644 --- a/configure.ac +++ b/configure.ac @@ -980,6 +980,29 @@ if test "x${je_cv_function_ffsl}" != "xyes" ; then AC_MSG_ERROR([Cannot build without ffsl(3)]) fi +dnl ============================================================================ +dnl Check for atomic(9) operations as provided on FreeBSD. + +JE_COMPILABLE([atomic(9)], [ +#include +#include +#include +], [ + { + uint32_t x32 = 0; + volatile uint32_t *x32p = &x32; + atomic_fetchadd_32(x32p, 1); + } + { + unsigned long xlong = 0; + volatile unsigned long *xlongp = &xlong; + atomic_fetchadd_long(xlongp, 1); + } +], [je_cv_atomic9]) +if test "x${je_cv_atomic9}" = "xyes" ; then + AC_DEFINE([JEMALLOC_ATOMIC9]) +fi + dnl ============================================================================ dnl Check for atomic(3) operations as provided on Darwin. @@ -1031,7 +1054,7 @@ AC_DEFUN([JE_SYNC_COMPARE_AND_SWAP_CHECK],[ fi ]) -if test "x${je_cv_osatomic}" != "xyes" ; then +if test "x${je_cv_atomic9}" != "xyes" -a "x${je_cv_osatomic}" != "xyes" ; then JE_SYNC_COMPARE_AND_SWAP_CHECK(32, 4) JE_SYNC_COMPARE_AND_SWAP_CHECK(64, 8) fi diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h index d8f6ca57..016c472a 100644 --- a/include/jemalloc/internal/atomic.h +++ b/include/jemalloc/internal/atomic.h @@ -32,7 +32,8 @@ unsigned atomic_sub_u(unsigned *p, unsigned x); #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ATOMIC_C_)) /******************************************************************************/ /* 64-bit operations. */ -#ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 +#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3) +# ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 JEMALLOC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x) { @@ -60,7 +61,7 @@ atomic_sub_uint64(uint64_t *p, uint64_t x) return (OSAtomicAdd64(-((int64_t)x), (int64_t *)p)); } -#elif (defined(__amd64__) || defined(__x86_64__)) +# elif (defined(__amd64__) || defined(__x86_64__)) JEMALLOC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x) { @@ -87,7 +88,29 @@ atomic_sub_uint64(uint64_t *p, uint64_t x) return (x); } -#elif (defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8)) +# elif (defined(JEMALLOC_ATOMIC9)) +JEMALLOC_INLINE uint64_t +atomic_add_uint64(uint64_t *p, uint64_t x) +{ + + /* + * atomic_fetchadd_64() doesn't exist, but we only ever use this + * function on LP64 systems, so atomic_fetchadd_long() will do. + */ + assert(sizeof(uint64_t) == sizeof(unsigned long)); + + return (atomic_fetchadd_long(p, (unsigned long)x) + x); +} + +JEMALLOC_INLINE uint64_t +atomic_sub_uint64(uint64_t *p, uint64_t x) +{ + + assert(sizeof(uint64_t) == sizeof(unsigned long)); + + return (atomic_fetchadd_long(p, (unsigned long)(-(long)x)) - x); +} +# elif (defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8)) JEMALLOC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x) { @@ -101,8 +124,7 @@ atomic_sub_uint64(uint64_t *p, uint64_t x) return (__sync_sub_and_fetch(p, x)); } -#else -# if (LG_SIZEOF_PTR == 3) +# else # error "Missing implementation for 64-bit atomic operations" # endif #endif @@ -164,6 +186,20 @@ atomic_sub_uint32(uint32_t *p, uint32_t x) return (x); } +#elif (defined(JEMALLOC_ATOMIC9)) +JEMALLOC_INLINE uint32_t +atomic_add_uint32(uint32_t *p, uint32_t x) +{ + + return (atomic_fetchadd_32(p, x) + x); +} + +JEMALLOC_INLINE uint32_t +atomic_sub_uint32(uint32_t *p, uint32_t x) +{ + + return (atomic_fetchadd_32(p, (uint32_t)(-(int32_t)x)) - x); +} #elif (defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_4)) JEMALLOC_INLINE uint32_t atomic_add_uint32(uint32_t *p, uint32_t x) diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 51d40fb8..905653a2 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -161,6 +161,10 @@ static const bool config_ivsalloc = #endif ; +#ifdef JEMALLOC_ATOMIC9 +#include +#endif + #if (defined(JEMALLOC_OSATOMIC) || defined(JEMALLOC_OSSPIN)) #include #endif diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index b6e5593b..90baa355 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -47,6 +47,9 @@ */ #undef CPU_SPINWAIT +/* Defined if the equivalent of FreeBSD's atomic(9) functions are available. */ +#undef JEMALLOC_ATOMIC9 + /* * Defined if OSAtomic*() functions are available, as provided by Darwin, and * documented in the atomic(3) manual page. From 25a000e89649d9ce5aacc1089408b8b3bafeb5e4 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 17 Apr 2012 15:49:30 -0700 Subject: [PATCH 0161/3378] Update pprof (from gperftools 2.0). --- ChangeLog | 1 + bin/pprof | 839 ++++++++++++++++++++++++++++++++++---------- doc/jemalloc.xml.in | 2 +- 3 files changed, 649 insertions(+), 193 deletions(-) diff --git a/ChangeLog b/ChangeLog index 8f6edd5a..1db47d8c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -34,6 +34,7 @@ found in the git revision history: - Add the --with-mangling option. - Add the --disable-experimental option. - Add the "thread.tcache.enabled" mallctl. + - Update pprof (from gperftools 2.0). Incompatible changes: - Enable stats by default. diff --git a/bin/pprof b/bin/pprof index 280ddcc8..727eb437 100755 --- a/bin/pprof +++ b/bin/pprof @@ -72,7 +72,7 @@ use strict; use warnings; use Getopt::Long; -my $PPROF_VERSION = "1.7"; +my $PPROF_VERSION = "2.0"; # These are the object tools we use which can come from a # user-specified location using --tools, from the PPROF_TOOLS @@ -87,13 +87,14 @@ my %obj_tool_map = ( #"addr2line_pdb" => "addr2line-pdb", # ditto #"otool" => "otool", # equivalent of objdump on OS X ); -my $DOT = "dot"; # leave non-absolute, since it may be in /usr/local -my $GV = "gv"; -my $EVINCE = "evince"; # could also be xpdf or perhaps acroread -my $KCACHEGRIND = "kcachegrind"; -my $PS2PDF = "ps2pdf"; +# NOTE: these are lists, so you can put in commandline flags if you want. +my @DOT = ("dot"); # leave non-absolute, since it may be in /usr/local +my @GV = ("gv"); +my @EVINCE = ("evince"); # could also be xpdf or perhaps acroread +my @KCACHEGRIND = ("kcachegrind"); +my @PS2PDF = ("ps2pdf"); # These are used for dynamic profiles -my $URL_FETCHER = "curl -s"; +my @URL_FETCHER = ("curl", "-s"); # These are the web pages that servers need to support for dynamic profiles my $HEAP_PAGE = "/pprof/heap"; @@ -104,7 +105,10 @@ my $GROWTH_PAGE = "/pprof/growth"; my $CONTENTION_PAGE = "/pprof/contention"; my $WALL_PAGE = "/pprof/wall(?:\\?.*)?"; # accepts options like namefilter my $FILTEREDPROFILE_PAGE = "/pprof/filteredprofile(?:\\?.*)?"; -my $CENSUSPROFILE_PAGE = "/pprof/censusprofile"; # must support "?seconds=#" +my $CENSUSPROFILE_PAGE = "/pprof/censusprofile(?:\\?.*)?"; # must support cgi-param + # "?seconds=#", + # "?tags_regexp=#" and + # "?type=#". my $SYMBOL_PAGE = "/pprof/symbol"; # must support symbol lookup via POST my $PROGRAM_NAME_PAGE = "/pprof/cmdline"; @@ -122,6 +126,11 @@ my $UNKNOWN_BINARY = "(unknown)"; # 64-bit profiles. To err on the safe size, default to 64-bit here: my $address_length = 16; +my $dev_null = "/dev/null"; +if (! -e $dev_null && $^O =~ /MSWin/) { # $^O is the OS perl was built for + $dev_null = "nul"; +} + # A list of paths to search for shared object files my @prefix_list = (); @@ -151,7 +160,8 @@ pprof [options] The / can be $HEAP_PAGE, $PROFILE_PAGE, /pprof/pmuprofile, $GROWTH_PAGE, $CONTENTION_PAGE, /pprof/wall, $CENSUSPROFILE_PAGE, or /pprof/filteredprofile. - For instance: "pprof http://myserver.com:80$HEAP_PAGE". + For instance: + pprof http://myserver.com:80$HEAP_PAGE If / is omitted, the service defaults to $PROFILE_PAGE (cpu profiling). pprof --symbols Maps addresses to symbol names. In this mode, stdin should be a @@ -162,7 +172,7 @@ pprof --symbols For more help with querying remote servers, including how to add the necessary server-side support code, see this filename (or one like it): - /usr/doc/google-perftools-$PPROF_VERSION/pprof_remote_servers.html + /usr/doc/gperftools-$PPROF_VERSION/pprof_remote_servers.html Options: --cum Sort by cumulative data @@ -260,7 +270,7 @@ EOF sub version_string { return < 0) { + if (IsProfileURL($ARGV[0])) { + $main::use_symbol_page = 1; + } elsif (IsSymbolizedProfileFile($ARGV[0])) { + $main::use_symbolized_profile = 1; + $main::prog = $UNKNOWN_BINARY; # will be set later from the profile file + } } if ($main::use_symbol_page || $main::use_symbolized_profile) { @@ -540,7 +552,7 @@ sub Init() { ConfigureObjTools($main::prog) } - # Break the opt_list_prefix into the prefix_list array + # Break the opt_lib_prefix into the prefix_list array @prefix_list = split (',', $main::opt_lib_prefix); # Remove trailing / from the prefixes, in the list to prevent @@ -636,9 +648,9 @@ sub Main() { # Print if (!$main::opt_interactive) { if ($main::opt_disasm) { - PrintDisassembly($libs, $flat, $cumulative, $main::opt_disasm, $total); + PrintDisassembly($libs, $flat, $cumulative, $main::opt_disasm); } elsif ($main::opt_list) { - PrintListing($libs, $flat, $cumulative, $main::opt_list); + PrintListing($total, $libs, $flat, $cumulative, $main::opt_list, 0); } elsif ($main::opt_text) { # Make sure the output is empty when have nothing to report # (only matters when --heapcheck is given but we must be @@ -646,7 +658,7 @@ sub Main() { if ($total != 0) { printf("Total: %s %s\n", Unparse($total), Units()); } - PrintText($symbols, $flat, $cumulative, $total, -1); + PrintText($symbols, $flat, $cumulative, -1); } elsif ($main::opt_raw) { PrintSymbolizedProfile($symbols, $profile, $main::prog); } elsif ($main::opt_callgrind) { @@ -656,7 +668,7 @@ sub Main() { if ($main::opt_gv) { RunGV(TempName($main::next_tmpfile, "ps"), ""); } elsif ($main::opt_evince) { - RunEvince(TempName($main::next_tmpfile, "pdf"), ""); + RunEvince(TempName($main::next_tmpfile, "pdf"), ""); } elsif ($main::opt_web) { my $tmp = TempName($main::next_tmpfile, "svg"); RunWeb($tmp); @@ -705,24 +717,25 @@ sub ReadlineMightFail { sub RunGV { my $fname = shift; my $bg = shift; # "" or " &" if we should run in background - if (!system("$GV --version >/dev/null 2>&1")) { + if (!system(ShellEscape(@GV, "--version") . " >$dev_null 2>&1")) { # Options using double dash are supported by this gv version. # Also, turn on noantialias to better handle bug in gv for # postscript files with large dimensions. # TODO: Maybe we should not pass the --noantialias flag # if the gv version is known to work properly without the flag. - system("$GV --scale=$main::opt_scale --noantialias " . $fname . $bg); + system(ShellEscape(@GV, "--scale=$main::opt_scale", "--noantialias", $fname) + . $bg); } else { # Old gv version - only supports options that use single dash. - print STDERR "$GV -scale $main::opt_scale\n"; - system("$GV -scale $main::opt_scale " . $fname . $bg); + print STDERR ShellEscape(@GV, "-scale", $main::opt_scale) . "\n"; + system(ShellEscape(@GV, "-scale", "$main::opt_scale", $fname) . $bg); } } sub RunEvince { my $fname = shift; my $bg = shift; # "" or " &" if we should run in background - system("$EVINCE " . $fname . $bg); + system(ShellEscape(@EVINCE, $fname) . $bg); } sub RunWeb { @@ -756,8 +769,8 @@ sub RunWeb { sub RunKcachegrind { my $fname = shift; my $bg = shift; # "" or " &" if we should run in background - print STDERR "Starting '$KCACHEGRIND " . $fname . $bg . "'\n"; - system("$KCACHEGRIND " . $fname . $bg); + print STDERR "Starting '@KCACHEGRIND " . $fname . $bg . "'\n"; + system(ShellEscape(@KCACHEGRIND, $fname) . $bg); } @@ -834,14 +847,14 @@ sub InteractiveCommand { my $ignore; ($routine, $ignore) = ParseInteractiveArgs($3); - my $profile = ProcessProfile($orig_profile, $symbols, "", $ignore); + my $profile = ProcessProfile($total, $orig_profile, $symbols, "", $ignore); my $reduced = ReduceProfile($symbols, $profile); # Get derived profiles my $flat = FlatProfile($reduced); my $cumulative = CumulativeProfile($reduced); - PrintText($symbols, $flat, $cumulative, $total, $line_limit); + PrintText($symbols, $flat, $cumulative, $line_limit); return 1; } if (m/^\s*callgrind\s*([^ \n]*)/) { @@ -861,21 +874,22 @@ sub InteractiveCommand { return 1; } - if (m/^\s*list\s*(.+)/) { + if (m/^\s*(web)?list\s*(.+)/) { + my $html = (defined($1) && ($1 eq "web")); $main::opt_list = 1; my $routine; my $ignore; - ($routine, $ignore) = ParseInteractiveArgs($1); + ($routine, $ignore) = ParseInteractiveArgs($2); - my $profile = ProcessProfile($orig_profile, $symbols, "", $ignore); + my $profile = ProcessProfile($total, $orig_profile, $symbols, "", $ignore); my $reduced = ReduceProfile($symbols, $profile); # Get derived profiles my $flat = FlatProfile($reduced); my $cumulative = CumulativeProfile($reduced); - PrintListing($libs, $flat, $cumulative, $routine); + PrintListing($total, $libs, $flat, $cumulative, $routine, $html); return 1; } if (m/^\s*disasm\s*(.+)/) { @@ -886,14 +900,14 @@ sub InteractiveCommand { ($routine, $ignore) = ParseInteractiveArgs($1); # Process current profile to account for various settings - my $profile = ProcessProfile($orig_profile, $symbols, "", $ignore); + my $profile = ProcessProfile($total, $orig_profile, $symbols, "", $ignore); my $reduced = ReduceProfile($symbols, $profile); # Get derived profiles my $flat = FlatProfile($reduced); my $cumulative = CumulativeProfile($reduced); - PrintDisassembly($libs, $flat, $cumulative, $routine, $total); + PrintDisassembly($libs, $flat, $cumulative, $routine); return 1; } if (m/^\s*(gv|web|evince)\s*(.*)/) { @@ -913,7 +927,8 @@ sub InteractiveCommand { ($focus, $ignore) = ParseInteractiveArgs($2); # Process current profile to account for various settings - my $profile = ProcessProfile($orig_profile, $symbols, $focus, $ignore); + my $profile = ProcessProfile($total, $orig_profile, $symbols, + $focus, $ignore); my $reduced = ReduceProfile($symbols, $profile); # Get derived profiles @@ -941,6 +956,7 @@ sub InteractiveCommand { sub ProcessProfile { + my $total_count = shift; my $orig_profile = shift; my $symbols = shift; my $focus = shift; @@ -948,7 +964,6 @@ sub ProcessProfile { # Process current profile to account for various settings my $profile = $orig_profile; - my $total_count = TotalProfile($profile); printf("Total: %s %s\n", Unparse($total_count), Units()); if ($focus ne '') { $profile = FocusProfile($symbols, $profile, $focus); @@ -995,6 +1010,11 @@ Commands: list [routine_regexp] [-ignore1] [-ignore2] Show source listing of routines whose names match "routine_regexp" + weblist [routine_regexp] [-ignore1] [-ignore2] + Displays a source listing of routines whose names match "routine_regexp" + in a web browser. You can click on source lines to view the + corresponding disassembly. + top [--cum] [-ignore1] [-ignore2] top20 [--cum] [-ignore1] [-ignore2] top37 [--cum] [-ignore1] [-ignore2] @@ -1019,8 +1039,8 @@ parameters will be ignored. Further pprof details are available at this location (or one similar): - /usr/doc/google-perftools-$PPROF_VERSION/cpu_profiler.html - /usr/doc/google-perftools-$PPROF_VERSION/heap_profiler.html + /usr/doc/gperftools-$PPROF_VERSION/cpu_profiler.html + /usr/doc/gperftools-$PPROF_VERSION/heap_profiler.html ENDOFHELP } @@ -1137,9 +1157,10 @@ sub PrintText { my $symbols = shift; my $flat = shift; my $cumulative = shift; - my $total = shift; my $line_limit = shift; + my $total = TotalProfile($flat); + # Which profile to sort by? my $s = $main::opt_cum ? $cumulative : $flat; @@ -1169,7 +1190,29 @@ sub PrintText { $sym); } $lines++; - last if ($line_limit >= 0 && $lines > $line_limit); + last if ($line_limit >= 0 && $lines >= $line_limit); + } +} + +# Callgrind format has a compression for repeated function and file +# names. You show the name the first time, and just use its number +# subsequently. This can cut down the file to about a third or a +# quarter of its uncompressed size. $key and $val are the key/value +# pair that would normally be printed by callgrind; $map is a map from +# value to number. +sub CompressedCGName { + my($key, $val, $map) = @_; + my $idx = $map->{$val}; + # For very short keys, providing an index hurts rather than helps. + if (length($val) <= 3) { + return "$key=$val\n"; + } elsif (defined($idx)) { + return "$key=($idx)\n"; + } else { + # scalar(keys $map) gives the number of items in the map. + $idx = scalar(keys(%{$map})) + 1; + $map->{$val} = $idx; + return "$key=($idx) $val\n"; } } @@ -1177,13 +1220,16 @@ sub PrintText { sub PrintCallgrind { my $calls = shift; my $filename; + my %filename_to_index_map; + my %fnname_to_index_map; + if ($main::opt_interactive) { $filename = shift; print STDERR "Writing callgrind file to '$filename'.\n" } else { $filename = "&STDOUT"; } - open(CG, ">".$filename ); + open(CG, ">$filename"); printf CG ("events: Hits\n\n"); foreach my $call ( map { $_->[0] } sort { $a->[1] cmp $b ->[1] || @@ -1197,11 +1243,14 @@ sub PrintCallgrind { $callee_file, $callee_line, $callee_function ) = ( $1, $2, $3, $5, $6, $7 ); - - printf CG ("fl=$caller_file\nfn=$caller_function\n"); + # TODO(csilvers): for better compression, collect all the + # caller/callee_files and functions first, before printing + # anything, and only compress those referenced more than once. + printf CG CompressedCGName("fl", $caller_file, \%filename_to_index_map); + printf CG CompressedCGName("fn", $caller_function, \%fnname_to_index_map); if (defined $6) { - printf CG ("cfl=$callee_file\n"); - printf CG ("cfn=$callee_function\n"); + printf CG CompressedCGName("cfl", $callee_file, \%filename_to_index_map); + printf CG CompressedCGName("cfn", $callee_function, \%fnname_to_index_map); printf CG ("calls=$count $callee_line\n"); } printf CG ("$caller_line $count\n\n"); @@ -1214,7 +1263,8 @@ sub PrintDisassembly { my $flat = shift; my $cumulative = shift; my $disasm_opts = shift; - my $total = shift; + + my $total = TotalProfile($flat); foreach my $lib (@{$libs}) { my $symbol_table = GetProcedureBoundaries($lib->[0], $disasm_opts); @@ -1249,10 +1299,10 @@ sub Disassemble { my $end_addr = shift; my $objdump = $obj_tool_map{"objdump"}; - my $cmd = sprintf("$objdump -C -d -l --no-show-raw-insn " . - "--start-address=0x$start_addr " . - "--stop-address=0x$end_addr $prog"); - open(OBJDUMP, "$cmd |") || error("$objdump: $!\n"); + my $cmd = ShellEscape($objdump, "-C", "-d", "-l", "--no-show-raw-insn", + "--start-address=0x$start_addr", + "--stop-address=0x$end_addr", $prog); + open(OBJDUMP, "$cmd |") || error("$cmd: $!\n"); my @result = (); my $filename = ""; my $linenumber = -1; @@ -1315,13 +1365,33 @@ sub ByName { return ShortFunctionName($a) cmp ShortFunctionName($b); } -# Print source-listing for all all routines that match $main::opt_list +# Print source-listing for all all routines that match $list_opts sub PrintListing { + my $total = shift; my $libs = shift; my $flat = shift; my $cumulative = shift; my $list_opts = shift; + my $html = shift; + my $output = \*STDOUT; + my $fname = ""; + + if ($html) { + # Arrange to write the output to a temporary file + $fname = TempName($main::next_tmpfile, "html"); + $main::next_tmpfile++; + if (!open(TEMP, ">$fname")) { + print STDERR "$fname: $!\n"; + return; + } + $output = \*TEMP; + print $output HtmlListingHeader(); + printf $output ("
%s
Total: %s %s
\n", + $main::prog, Unparse($total), Units()); + } + + my $listed = 0; foreach my $lib (@{$libs}) { my $symbol_table = GetProcedureBoundaries($lib->[0], $list_opts); my $offset = AddressSub($lib->[1], $lib->[3]); @@ -1333,15 +1403,113 @@ sub PrintListing { my $addr = AddressAdd($start_addr, $offset); for (my $i = 0; $i < $length; $i++) { if (defined($cumulative->{$addr})) { - PrintSource($lib->[0], $offset, - $routine, $flat, $cumulative, - $start_addr, $end_addr); + $listed += PrintSource( + $lib->[0], $offset, + $routine, $flat, $cumulative, + $start_addr, $end_addr, + $html, + $output); last; } $addr = AddressInc($addr); } } } + + if ($html) { + if ($listed > 0) { + print $output HtmlListingFooter(); + close($output); + RunWeb($fname); + } else { + close($output); + unlink($fname); + } + } +} + +sub HtmlListingHeader { + return <<'EOF'; + + + +Pprof listing + + + + +EOF +} + +sub HtmlListingFooter { + return <<'EOF'; + + +EOF +} + +sub HtmlEscape { + my $text = shift; + $text =~ s/&/&/g; + $text =~ s//>/g; + return $text; } # Returns the indentation of the line, if it has any non-whitespace @@ -1355,6 +1523,45 @@ sub Indentation { } } +# If the symbol table contains inlining info, Disassemble() may tag an +# instruction with a location inside an inlined function. But for +# source listings, we prefer to use the location in the function we +# are listing. So use MapToSymbols() to fetch full location +# information for each instruction and then pick out the first +# location from a location list (location list contains callers before +# callees in case of inlining). +# +# After this routine has run, each entry in $instructions contains: +# [0] start address +# [1] filename for function we are listing +# [2] line number for function we are listing +# [3] disassembly +# [4] limit address +# [5] most specific filename (may be different from [1] due to inlining) +# [6] most specific line number (may be different from [2] due to inlining) +sub GetTopLevelLineNumbers { + my ($lib, $offset, $instructions) = @_; + my $pcs = []; + for (my $i = 0; $i <= $#{$instructions}; $i++) { + push(@{$pcs}, $instructions->[$i]->[0]); + } + my $symbols = {}; + MapToSymbols($lib, $offset, $pcs, $symbols); + for (my $i = 0; $i <= $#{$instructions}; $i++) { + my $e = $instructions->[$i]; + push(@{$e}, $e->[1]); + push(@{$e}, $e->[2]); + my $addr = $e->[0]; + my $sym = $symbols->{$addr}; + if (defined($sym)) { + if ($#{$sym} >= 2 && $sym->[1] =~ m/^(.*):(\d+)$/) { + $e->[1] = $1; # File name + $e->[2] = $2; # Line number + } + } + } +} + # Print source-listing for one routine sub PrintSource { my $prog = shift; @@ -1364,9 +1571,12 @@ sub PrintSource { my $cumulative = shift; my $start_addr = shift; my $end_addr = shift; + my $html = shift; + my $output = shift; # Disassemble all instructions (just to get line numbers) my @instructions = Disassemble($prog, $offset, $start_addr, $end_addr); + GetTopLevelLineNumbers($prog, $offset, \@instructions); # Hack 1: assume that the first source file encountered in the # disassembly contains the routine @@ -1379,7 +1589,7 @@ sub PrintSource { } if (!defined($filename)) { print STDERR "no filename found in $routine\n"; - return; + return 0; } # Hack 2: assume that the largest line number from $filename is the @@ -1412,7 +1622,7 @@ sub PrintSource { { if (!open(FILE, "<$filename")) { print STDERR "$filename: $!\n"; - return; + return 0; } my $l = 0; my $first_indentation = -1; @@ -1440,12 +1650,24 @@ sub PrintSource { # Assign all samples to the range $firstline,$lastline, # Hack 4: If an instruction does not occur in the range, its samples # are moved to the next instruction that occurs in the range. - my $samples1 = {}; - my $samples2 = {}; - my $running1 = 0; # Unassigned flat counts - my $running2 = 0; # Unassigned cumulative counts - my $total1 = 0; # Total flat counts - my $total2 = 0; # Total cumulative counts + my $samples1 = {}; # Map from line number to flat count + my $samples2 = {}; # Map from line number to cumulative count + my $running1 = 0; # Unassigned flat counts + my $running2 = 0; # Unassigned cumulative counts + my $total1 = 0; # Total flat counts + my $total2 = 0; # Total cumulative counts + my %disasm = (); # Map from line number to disassembly + my $running_disasm = ""; # Unassigned disassembly + my $skip_marker = "---\n"; + if ($html) { + $skip_marker = ""; + for (my $l = $firstline; $l <= $lastline; $l++) { + $disasm{$l} = ""; + } + } + my $last_dis_filename = ''; + my $last_dis_linenum = -1; + my $last_touched_line = -1; # To detect gaps in disassembly for a line foreach my $e (@instructions) { # Add up counts for all address that fall inside this instruction my $c1 = 0; @@ -1454,6 +1676,38 @@ sub PrintSource { $c1 += GetEntry($flat, $a); $c2 += GetEntry($cumulative, $a); } + + if ($html) { + my $dis = sprintf(" %6s %6s \t\t%8s: %s ", + HtmlPrintNumber($c1), + HtmlPrintNumber($c2), + UnparseAddress($offset, $e->[0]), + CleanDisassembly($e->[3])); + + # Append the most specific source line associated with this instruction + if (length($dis) < 80) { $dis .= (' ' x (80 - length($dis))) }; + $dis = HtmlEscape($dis); + my $f = $e->[5]; + my $l = $e->[6]; + if ($f ne $last_dis_filename) { + $dis .= sprintf("%s:%d", + HtmlEscape(CleanFileName($f)), $l); + } elsif ($l ne $last_dis_linenum) { + # De-emphasize the unchanged file name portion + $dis .= sprintf("%s" . + ":%d", + HtmlEscape(CleanFileName($f)), $l); + } else { + # De-emphasize the entire location + $dis .= sprintf("%s:%d", + HtmlEscape(CleanFileName($f)), $l); + } + $last_dis_filename = $f; + $last_dis_linenum = $l; + $running_disasm .= $dis; + $running_disasm .= "\n"; + } + $running1 += $c1; $running2 += $c2; $total1 += $c1; @@ -1468,23 +1722,49 @@ sub PrintSource { AddEntry($samples2, $line, $running2); $running1 = 0; $running2 = 0; + if ($html) { + if ($line != $last_touched_line && $disasm{$line} ne '') { + $disasm{$line} .= "\n"; + } + $disasm{$line} .= $running_disasm; + $running_disasm = ''; + $last_touched_line = $line; + } } } # Assign any leftover samples to $lastline AddEntry($samples1, $lastline, $running1); AddEntry($samples2, $lastline, $running2); + if ($html) { + if ($lastline != $last_touched_line && $disasm{$lastline} ne '') { + $disasm{$lastline} .= "\n"; + } + $disasm{$lastline} .= $running_disasm; + } - printf("ROUTINE ====================== %s in %s\n" . - "%6s %6s Total %s (flat / cumulative)\n", - ShortFunctionName($routine), - $filename, - Units(), - Unparse($total1), - Unparse($total2)); + if ($html) { + printf $output ( + "

%s

%s\n
\n" .
+      "Total:%6s %6s (flat / cumulative %s)\n",
+      HtmlEscape(ShortFunctionName($routine)),
+      HtmlEscape(CleanFileName($filename)),
+      Unparse($total1),
+      Unparse($total2),
+      Units());
+  } else {
+    printf $output (
+      "ROUTINE ====================== %s in %s\n" .
+      "%6s %6s Total %s (flat / cumulative)\n",
+      ShortFunctionName($routine),
+      CleanFileName($filename),
+      Unparse($total1),
+      Unparse($total2),
+      Units());
+  }
   if (!open(FILE, "<$filename")) {
     print STDERR "$filename: $!\n";
-    return;
+    return 0;
   }
   my $l = 0;
   while () {
@@ -1494,16 +1774,47 @@ sub PrintSource {
         (($l <= $oldlastline + 5) || ($l <= $lastline))) {
       chop;
       my $text = $_;
-      if ($l == $firstline) { printf("---\n"); }
-      printf("%6s %6s %4d: %s\n",
-             UnparseAlt(GetEntry($samples1, $l)),
-             UnparseAlt(GetEntry($samples2, $l)),
-             $l,
-             $text);
-      if ($l == $lastline)  { printf("---\n"); }
+      if ($l == $firstline) { print $output $skip_marker; }
+      my $n1 = GetEntry($samples1, $l);
+      my $n2 = GetEntry($samples2, $l);
+      if ($html) {
+        # Emit a span that has one of the following classes:
+        #    livesrc -- has samples
+        #    deadsrc -- has disassembly, but with no samples
+        #    nop     -- has no matching disasembly
+        # Also emit an optional span containing disassembly.
+        my $dis = $disasm{$l};
+        my $asm = "";
+        if (defined($dis) && $dis ne '') {
+          $asm = "" . $dis . "";
+        }
+        my $source_class = (($n1 + $n2 > 0) 
+                            ? "livesrc" 
+                            : (($asm ne "") ? "deadsrc" : "nop"));
+        printf $output (
+          "%5d " .
+          "%6s %6s %s%s\n",
+          $l, $source_class,
+          HtmlPrintNumber($n1),
+          HtmlPrintNumber($n2),
+          HtmlEscape($text),
+          $asm);
+      } else {
+        printf $output(
+          "%6s %6s %4d: %s\n",
+          UnparseAlt($n1),
+          UnparseAlt($n2),
+          $l,
+          $text);
+      }
+      if ($l == $lastline)  { print $output $skip_marker; }
     };
   }
   close(FILE);
+  if ($html) {
+    print $output "
\n"; + } + return 1; } # Return the source line for the specified file/linenumber. @@ -1646,21 +1957,11 @@ sub PrintDisassembledFunction { # Print disassembly for (my $x = $first_inst; $x <= $last_inst; $x++) { my $e = $instructions[$x]; - my $address = $e->[0]; - $address = AddressSub($address, $offset); # Make relative to section - $address =~ s/^0x//; - $address =~ s/^0*//; - - # Trim symbols - my $d = $e->[3]; - while ($d =~ s/\([^()%]*\)(\s*const)?//g) { } # Argument types, not (%rax) - while ($d =~ s/(\w+)<[^<>]*>/$1/g) { } # Remove template arguments - printf("%6s %6s %8s: %6s\n", UnparseAlt($flat_count[$x]), UnparseAlt($cum_count[$x]), - $address, - $d); + UnparseAddress($offset, $e->[0]), + CleanDisassembly($e->[3])); } } } @@ -1706,19 +2007,24 @@ sub PrintDot { # Open DOT output file my $output; + my $escaped_dot = ShellEscape(@DOT); + my $escaped_ps2pdf = ShellEscape(@PS2PDF); if ($main::opt_gv) { - $output = "| $DOT -Tps2 >" . TempName($main::next_tmpfile, "ps"); + my $escaped_outfile = ShellEscape(TempName($main::next_tmpfile, "ps")); + $output = "| $escaped_dot -Tps2 >$escaped_outfile"; } elsif ($main::opt_evince) { - $output = "| $DOT -Tps2 | $PS2PDF - " . TempName($main::next_tmpfile, "pdf"); + my $escaped_outfile = ShellEscape(TempName($main::next_tmpfile, "pdf")); + $output = "| $escaped_dot -Tps2 | $escaped_ps2pdf - $escaped_outfile"; } elsif ($main::opt_ps) { - $output = "| $DOT -Tps2"; + $output = "| $escaped_dot -Tps2"; } elsif ($main::opt_pdf) { - $output = "| $DOT -Tps2 | $PS2PDF - -"; + $output = "| $escaped_dot -Tps2 | $escaped_ps2pdf - -"; } elsif ($main::opt_web || $main::opt_svg) { # We need to post-process the SVG, so write to a temporary file always. - $output = "| $DOT -Tsvg >" . TempName($main::next_tmpfile, "svg"); + my $escaped_outfile = ShellEscape(TempName($main::next_tmpfile, "svg")); + $output = "| $escaped_dot -Tsvg >$escaped_outfile"; } elsif ($main::opt_gif) { - $output = "| $DOT -Tgif"; + $output = "| $escaped_dot -Tgif"; } else { $output = ">&STDOUT"; } @@ -1770,7 +2076,7 @@ sub PrintDot { if ($f != $c) { $extra = sprintf("\\rof %s (%s)", Unparse($c), - Percent($c, $overall_total)); + Percent($c, $local_total)); } my $style = ""; if ($main::opt_heapcheck) { @@ -1789,7 +2095,7 @@ sub PrintDot { $node{$a}, $sym, Unparse($f), - Percent($f, $overall_total), + Percent($f, $local_total), $extra, $fs, $style, @@ -1799,10 +2105,12 @@ sub PrintDot { # Get edges and counts per edge my %edge = (); my $n; + my $fullname_to_shortname_map = {}; + FillFullnameToShortnameMap($symbols, $fullname_to_shortname_map); foreach my $k (keys(%{$raw})) { # TODO: omit low %age edges $n = $raw->{$k}; - my @translated = TranslateStack($symbols, $k); + my @translated = TranslateStack($symbols, $fullname_to_shortname_map, $k); for (my $i = 1; $i <= $#translated; $i++) { my $src = $translated[$i]; my $dst = $translated[$i-1]; @@ -2186,6 +2494,50 @@ function handleMouseUp(evt) { EOF } +# Provides a map from fullname to shortname for cases where the +# shortname is ambiguous. The symlist has both the fullname and +# shortname for all symbols, which is usually fine, but sometimes -- +# such as overloaded functions -- two different fullnames can map to +# the same shortname. In that case, we use the address of the +# function to disambiguate the two. This function fills in a map that +# maps fullnames to modified shortnames in such cases. If a fullname +# is not present in the map, the 'normal' shortname provided by the +# symlist is the appropriate one to use. +sub FillFullnameToShortnameMap { + my $symbols = shift; + my $fullname_to_shortname_map = shift; + my $shortnames_seen_once = {}; + my $shortnames_seen_more_than_once = {}; + + foreach my $symlist (values(%{$symbols})) { + # TODO(csilvers): deal with inlined symbols too. + my $shortname = $symlist->[0]; + my $fullname = $symlist->[2]; + if ($fullname !~ /<[0-9a-fA-F]+>$/) { # fullname doesn't end in an address + next; # the only collisions we care about are when addresses differ + } + if (defined($shortnames_seen_once->{$shortname}) && + $shortnames_seen_once->{$shortname} ne $fullname) { + $shortnames_seen_more_than_once->{$shortname} = 1; + } else { + $shortnames_seen_once->{$shortname} = $fullname; + } + } + + foreach my $symlist (values(%{$symbols})) { + my $shortname = $symlist->[0]; + my $fullname = $symlist->[2]; + # TODO(csilvers): take in a list of addresses we care about, and only + # store in the map if $symlist->[1] is in that list. Saves space. + next if defined($fullname_to_shortname_map->{$fullname}); + if (defined($shortnames_seen_more_than_once->{$shortname})) { + if ($fullname =~ /<0*([^>]*)>$/) { # fullname has address at end of it + $fullname_to_shortname_map->{$fullname} = "$shortname\@$1"; + } + } + } +} + # Return a small number that identifies the argument. # Multiple calls with the same argument will return the same number. # Calls with different arguments will return different numbers. @@ -2202,6 +2554,7 @@ sub ShortIdFor { # Translate a stack of addresses into a stack of symbols sub TranslateStack { my $symbols = shift; + my $fullname_to_shortname_map = shift; my $k = shift; my @addrs = split(/\n/, $k); @@ -2233,6 +2586,9 @@ sub TranslateStack { my $func = $symlist->[$j-2]; my $fileline = $symlist->[$j-1]; my $fullfunc = $symlist->[$j]; + if (defined($fullname_to_shortname_map->{$fullfunc})) { + $func = $fullname_to_shortname_map->{$fullfunc}; + } if ($j > 2) { $func = "$func (inline)"; } @@ -2319,6 +2675,16 @@ sub UnparseAlt { } } +# Alternate pretty-printed form: 0 maps to "" +sub HtmlPrintNumber { + my $num = shift; + if ($num == 0) { + return ""; + } else { + return Unparse($num); + } +} + # Return output units sub Units { if ($main::profile_type eq 'heap' || $main::profile_type eq 'growth') { @@ -2475,6 +2841,13 @@ sub RemoveUninterestingFrames { '__builtin_vec_new', 'operator new', 'operator new[]', + # The entry to our memory-allocation routines on OS X + 'malloc_zone_malloc', + 'malloc_zone_calloc', + 'malloc_zone_valloc', + 'malloc_zone_realloc', + 'malloc_zone_memalign', + 'malloc_zone_free', # These mark the beginning/end of our custom sections '__start_google_malloc', '__stop_google_malloc', @@ -2566,9 +2939,11 @@ sub ReduceProfile { my $symbols = shift; my $profile = shift; my $result = {}; + my $fullname_to_shortname_map = {}; + FillFullnameToShortnameMap($symbols, $fullname_to_shortname_map); foreach my $k (keys(%{$profile})) { my $count = $profile->{$k}; - my @translated = TranslateStack($symbols, $k); + my @translated = TranslateStack($symbols, $fullname_to_shortname_map, $k); my @path = (); my %seen = (); $seen{''} = 1; # So that empty keys are skipped @@ -2775,7 +3150,8 @@ sub AddEntries { sub CheckSymbolPage { my $url = SymbolPageURL(); - open(SYMBOL, "$URL_FETCHER '$url' |"); + my $command = ShellEscape(@URL_FETCHER, $url); + open(SYMBOL, "$command |") or error($command); my $line = ; $line =~ s/\r//g; # turn windows-looking lines into unix-looking lines close(SYMBOL); @@ -2832,7 +3208,7 @@ sub SymbolPageURL { sub FetchProgramName() { my ($host, $baseURL, $path) = ParseProfileURL($main::pfile_args[0]); my $url = "$baseURL$PROGRAM_NAME_PAGE"; - my $command_line = "$URL_FETCHER '$url'"; + my $command_line = ShellEscape(@URL_FETCHER, $url); open(CMDLINE, "$command_line |") or error($command_line); my $cmdline = ; $cmdline =~ s/\r//g; # turn windows-looking lines into unix-looking lines @@ -2849,7 +3225,7 @@ sub FetchProgramName() { # curl. Redirection happens on borg hosts. sub ResolveRedirectionForCurl { my $url = shift; - my $command_line = "$URL_FETCHER --head '$url'"; + my $command_line = ShellEscape(@URL_FETCHER, "--head", $url); open(CMDLINE, "$command_line |") or error($command_line); while () { s/\r//g; # turn windows-looking lines into unix-looking lines @@ -2861,18 +3237,18 @@ sub ResolveRedirectionForCurl { return $url; } -# Add a timeout flat to URL_FETCHER +# Add a timeout flat to URL_FETCHER. Returns a new list. sub AddFetchTimeout { - my $fetcher = shift; my $timeout = shift; + my @fetcher = shift; if (defined($timeout)) { - if ($fetcher =~ m/\bcurl -s/) { - $fetcher .= sprintf(" --max-time %d", $timeout); - } elsif ($fetcher =~ m/\brpcget\b/) { - $fetcher .= sprintf(" --deadline=%d", $timeout); + if (join(" ", @fetcher) =~ m/\bcurl -s/) { + push(@fetcher, "--max-time", sprintf("%d", $timeout)); + } elsif (join(" ", @fetcher) =~ m/\brpcget\b/) { + push(@fetcher, sprintf("--deadline=%d", $timeout)); } } - return $fetcher; + return @fetcher; } # Reads a symbol map from the file handle name given as $1, returning @@ -2932,15 +3308,17 @@ sub FetchSymbols { my $url = SymbolPageURL(); my $command_line; - if ($URL_FETCHER =~ m/\bcurl -s/) { + if (join(" ", @URL_FETCHER) =~ m/\bcurl -s/) { $url = ResolveRedirectionForCurl($url); - $command_line = "$URL_FETCHER -d '\@$main::tmpfile_sym' '$url'"; + $command_line = ShellEscape(@URL_FETCHER, "-d", "\@$main::tmpfile_sym", + $url); } else { - $command_line = "$URL_FETCHER --post '$url' < '$main::tmpfile_sym'"; + $command_line = (ShellEscape(@URL_FETCHER, "--post", $url) + . " < " . ShellEscape($main::tmpfile_sym)); } # We use c++filt in case $SYMBOL_PAGE gives us mangled symbols. - my $cppfilt = $obj_tool_map{"c++filt"}; - open(SYMBOL, "$command_line | $cppfilt |") or error($command_line); + my $escaped_cppfilt = ShellEscape($obj_tool_map{"c++filt"}); + open(SYMBOL, "$command_line | $escaped_cppfilt |") or error($command_line); $symbol_map = ReadSymbols(*SYMBOL{IO}); close(SYMBOL); } @@ -2956,8 +3334,8 @@ sub FetchSymbols { my $shortpc = $pc; $shortpc =~ s/^0*//; # Each line may have a list of names, which includes the function - # and also other functions it has inlined. They are separated - # (in PrintSymbolizedFile), by --, which is illegal in function names. + # and also other functions it has inlined. They are separated (in + # PrintSymbolizedProfile), by --, which is illegal in function names. my $fullnames; if (defined($symbol_map->{$shortpc})) { $fullnames = $symbol_map->{$shortpc}; @@ -3035,8 +3413,8 @@ sub FetchDynamicProfile { return $real_profile; } - my $fetcher = AddFetchTimeout($URL_FETCHER, $fetch_timeout); - my $cmd = "$fetcher '$url' > '$tmp_profile'"; + my @fetcher = AddFetchTimeout($fetch_timeout, @URL_FETCHER); + my $cmd = ShellEscape(@fetcher, $url) . " > " . ShellEscape($tmp_profile); if ($path =~ m/$PROFILE_PAGE|$PMUPROFILE_PAGE|$CENSUSPROFILE_PAGE/){ print STDERR "Gathering CPU profile from $url for $main::opt_seconds seconds to\n ${real_profile}\n"; if ($encourage_patience) { @@ -3047,7 +3425,7 @@ sub FetchDynamicProfile { } (system($cmd) == 0) || error("Failed to get profile: $cmd: $!\n"); - (system("mv $tmp_profile $real_profile") == 0) || error("Unable to rename profile\n"); + (system("mv", $tmp_profile, $real_profile) == 0) || error("Unable to rename profile\n"); print STDERR "Wrote profile to $real_profile\n"; $main::collected_profile = $real_profile; return $main::collected_profile; @@ -3161,7 +3539,7 @@ BEGIN { my $has_q = 0; eval { $has_q = pack("Q", "1") ? 1 : 1; }; if (!$has_q) { - $self->{perl_is_64bit} = 0; + $self->{perl_is_64bit} = 0; } read($self->{file}, $str, 8); if (substr($str, 4, 4) eq chr(0)x4) { @@ -3197,17 +3575,17 @@ BEGIN { # TODO(csilvers): if this is a 32-bit perl, the math below # could end up in a too-large int, which perl will promote # to a double, losing necessary precision. Deal with that. - # Right now, we just die. - my ($lo, $hi) = ($b32_values[$i], $b32_values[$i+1]); + # Right now, we just die. + my ($lo, $hi) = ($b32_values[$i], $b32_values[$i+1]); if ($self->{unpack_code} eq 'N') { # big-endian - ($lo, $hi) = ($hi, $lo); - } - my $value = $lo + $hi * (2**32); - if (!$self->{perl_is_64bit} && # check value is exactly represented - (($value % (2**32)) != $lo || int($value / (2**32)) != $hi)) { - ::error("Need a 64-bit perl to process this 64-bit profile.\n"); - } - push(@b64_values, $value); + ($lo, $hi) = ($hi, $lo); + } + my $value = $lo + $hi * (2**32); + if (!$self->{perl_is_64bit} && # check value is exactly represented + (($value % (2**32)) != $lo || int($value / (2**32)) != $hi)) { + ::error("Need a 64-bit perl to process this 64-bit profile.\n"); + } + push(@b64_values, $value); } @$slots = @b64_values; } @@ -3335,7 +3713,7 @@ sub ReadProfile { if (!$main::use_symbolized_profile) { # we have both a binary and symbolized profiles, abort error("FATAL ERROR: Symbolized profile\n $fname\ncannot be used with " . - "a binary arg. Try again without passing\n $prog\n"); + "a binary arg. Try again without passing\n $prog\n"); } # Read the symbol section of the symbolized profile file. $symbols = ReadSymbols(*PROFILE{IO}); @@ -3636,18 +4014,18 @@ sub ReadHeapProfile { # The sampling frequency is the rate of a Poisson process. # This means that the probability of sampling an allocation of # size X with sampling rate Y is 1 - exp(-X/Y) - if ($n1 != 0) { - my $ratio = (($s1*1.0)/$n1)/($sample_adjustment); - my $scale_factor = 1/(1 - exp(-$ratio)); - $n1 *= $scale_factor; - $s1 *= $scale_factor; - } - if ($n2 != 0) { - my $ratio = (($s2*1.0)/$n2)/($sample_adjustment); - my $scale_factor = 1/(1 - exp(-$ratio)); - $n2 *= $scale_factor; - $s2 *= $scale_factor; - } + if ($n1 != 0) { + my $ratio = (($s1*1.0)/$n1)/($sample_adjustment); + my $scale_factor = 1/(1 - exp(-$ratio)); + $n1 *= $scale_factor; + $s1 *= $scale_factor; + } + if ($n2 != 0) { + my $ratio = (($s2*1.0)/$n2)/($sample_adjustment); + my $scale_factor = 1/(1 - exp(-$ratio)); + $n2 *= $scale_factor; + $s2 *= $scale_factor; + } } else { # Remote-heap version 1 my $ratio; @@ -3771,19 +4149,19 @@ sub ReadSynchProfile { return $r; } -# Given a hex value in the form "0x1abcd" return "0001abcd" or -# "000000000001abcd", depending on the current address length. -# There's probably a more idiomatic (or faster) way to do this... +# Given a hex value in the form "0x1abcd" or "1abcd", return either +# "0001abcd" or "000000000001abcd", depending on the current (global) +# address length. sub HexExtend { my $addr = shift; - $addr =~ s/^0x//; - - if (length $addr > $address_length) { - printf STDERR "Warning: address $addr is longer than address length $address_length\n"; + $addr =~ s/^(0x)?0*//; + my $zeros_needed = $address_length - length($addr); + if ($zeros_needed < 0) { + printf STDERR "Warning: address $addr is longer than address length $address_length\n"; + return $addr; } - - return substr("000000000000000".$addr, -$address_length); + return ("0" x $zeros_needed) . $addr; } ##### Symbol extraction ##### @@ -3834,9 +4212,8 @@ sub ParseTextSectionHeaderFromObjdump { my $file_offset; # Get objdump output from the library file to figure out how to # map between mapped addresses and addresses in the library. - my $objdump = $obj_tool_map{"objdump"}; - open(OBJDUMP, "$objdump -h $lib |") - || error("$objdump $lib: $!\n"); + my $cmd = ShellEscape($obj_tool_map{"objdump"}, "-h", $lib); + open(OBJDUMP, "$cmd |") || error("$cmd: $!\n"); while () { s/\r//g; # turn windows-looking lines into unix-looking lines # Idx Name Size VMA LMA File off Algn @@ -3874,9 +4251,8 @@ sub ParseTextSectionHeaderFromOtool { my $file_offset = undef; # Get otool output from the library file to figure out how to # map between mapped addresses and addresses in the library. - my $otool = $obj_tool_map{"otool"}; - open(OTOOL, "$otool -l $lib |") - || error("$otool $lib: $!\n"); + my $command = ShellEscape($obj_tool_map{"otool"}, "-l", $lib); + open(OTOOL, "$command |") || error("$command: $!\n"); my $cmd = ""; my $sectname = ""; my $segname = ""; @@ -4218,18 +4594,18 @@ sub ExtractSymbols { my ($start_pc_index, $finish_pc_index); # Find smallest finish_pc_index such that $finish < $pc[$finish_pc_index]. for ($finish_pc_index = $#pcs + 1; $finish_pc_index > 0; - $finish_pc_index--) { + $finish_pc_index--) { last if $pcs[$finish_pc_index - 1] le $finish; } # Find smallest start_pc_index such that $start <= $pc[$start_pc_index]. for ($start_pc_index = $finish_pc_index; $start_pc_index > 0; - $start_pc_index--) { + $start_pc_index--) { last if $pcs[$start_pc_index - 1] lt $start; } # This keeps PC values higher than $pc[$finish_pc_index] in @pcs, # in case there are overlaps in libraries and the main binary. @{$contained} = splice(@pcs, $start_pc_index, - $finish_pc_index - $start_pc_index); + $finish_pc_index - $start_pc_index); # Map to symbols MapToSymbols($libname, AddressSub($start, $offset), $contained, $symbols); } @@ -4251,15 +4627,15 @@ sub MapToSymbols { # Figure out the addr2line command to use my $addr2line = $obj_tool_map{"addr2line"}; - my $cmd = "$addr2line -f -C -e $image"; + my $cmd = ShellEscape($addr2line, "-f", "-C", "-e", $image); if (exists $obj_tool_map{"addr2line_pdb"}) { $addr2line = $obj_tool_map{"addr2line_pdb"}; - $cmd = "$addr2line --demangle -f -C -e $image"; + $cmd = ShellEscape($addr2line, "--demangle", "-f", "-C", "-e", $image); } # If "addr2line" isn't installed on the system at all, just use # nm to get what info we can (function names, but not line numbers). - if (system("$addr2line --help >/dev/null 2>&1") != 0) { + if (system(ShellEscape($addr2line, "--help") . " >$dev_null 2>&1") != 0) { MapSymbolsWithNM($image, $offset, $pclist, $symbols); return; } @@ -4273,11 +4649,10 @@ sub MapToSymbols { $sep_address = undef; # May be filled in by MapSymbolsWithNM() my $nm_symbols = {}; MapSymbolsWithNM($image, $offset, $pclist, $nm_symbols); - # TODO(csilvers): only add '-i' if addr2line supports it. if (defined($sep_address)) { # Only add " -i" to addr2line if the binary supports it. # addr2line --help returns 0, but not if it sees an unknown flag first. - if (system("$cmd -i --help >/dev/null 2>&1") == 0) { + if (system("$cmd -i --help >$dev_null 2>&1") == 0) { $cmd .= " -i"; } else { $sep_address = undef; # no need for sep_address if we don't support -i @@ -4299,13 +4674,14 @@ sub MapToSymbols { close(ADDRESSES); if ($debug) { print("----\n"); - system("cat $main::tmpfile_sym"); + system("cat", $main::tmpfile_sym); print("----\n"); - system("$cmd <$main::tmpfile_sym"); + system("$cmd < " . ShellEscape($main::tmpfile_sym)); print("----\n"); } - open(SYMBOLS, "$cmd <$main::tmpfile_sym |") || error("$cmd: $!\n"); + open(SYMBOLS, "$cmd <" . ShellEscape($main::tmpfile_sym) . " |") + || error("$cmd: $!\n"); my $count = 0; # Index in pclist while () { # Read fullfunction and filelineinfo from next pair of lines @@ -4325,15 +4701,29 @@ sub MapToSymbols { my $pcstr = $pclist->[$count]; my $function = ShortFunctionName($fullfunction); - if ($fullfunction eq '??') { - # See if nm found a symbol - my $nms = $nm_symbols->{$pcstr}; - if (defined($nms)) { + my $nms = $nm_symbols->{$pcstr}; + if (defined($nms)) { + if ($fullfunction eq '??') { + # nm found a symbol for us. $function = $nms->[0]; $fullfunction = $nms->[2]; + } else { + # MapSymbolsWithNM tags each routine with its starting address, + # useful in case the image has multiple occurrences of this + # routine. (It uses a syntax that resembles template paramters, + # that are automatically stripped out by ShortFunctionName().) + # addr2line does not provide the same information. So we check + # if nm disambiguated our symbol, and if so take the annotated + # (nm) version of the routine-name. TODO(csilvers): this won't + # catch overloaded, inlined symbols, which nm doesn't see. + # Better would be to do a check similar to nm's, in this fn. + if ($nms->[2] =~ m/^\Q$function\E/) { # sanity check it's the right fn + $function = $nms->[0]; + $fullfunction = $nms->[2]; + } } } - + # Prepend to accumulated symbols for pcstr # (so that caller comes before callee) my $sym = $symbols->{$pcstr}; @@ -4344,7 +4734,7 @@ sub MapToSymbols { unshift(@{$sym}, $function, $filelinenum, $fullfunction); if ($debug) { printf STDERR ("%s => [%s]\n", $pcstr, join(" ", @{$sym})); } if (!defined($sep_address)) { - # Inlining is off, se this entry ends immediately + # Inlining is off, so this entry ends immediately $count++; } } @@ -4407,6 +4797,31 @@ sub ShortFunctionName { return $function; } +# Trim overly long symbols found in disassembler output +sub CleanDisassembly { + my $d = shift; + while ($d =~ s/\([^()%]*\)(\s*const)?//g) { } # Argument types, not (%rax) + while ($d =~ s/(\w+)<[^<>]*>/$1/g) { } # Remove template arguments + return $d; +} + +# Clean file name for display +sub CleanFileName { + my ($f) = @_; + $f =~ s|^/proc/self/cwd/||; + $f =~ s|^\./||; + return $f; +} + +# Make address relative to section and clean up for display +sub UnparseAddress { + my ($offset, $address) = @_; + $address = AddressSub($address, $offset); + $address =~ s/^0x//; + $address =~ s/^0*//; + return $address; +} + ##### Miscellaneous ##### # Find the right versions of the above object tools to use. The @@ -4423,8 +4838,18 @@ sub ConfigureObjTools { # predictably return error status in prod. (-e $prog_file) || error("$prog_file does not exist.\n"); - # Follow symlinks (at least for systems where "file" supports that) - my $file_type = `/usr/bin/file -L $prog_file 2>/dev/null || /usr/bin/file $prog_file`; + my $file_type = undef; + if (-e "/usr/bin/file") { + # Follow symlinks (at least for systems where "file" supports that). + my $escaped_prog_file = ShellEscape($prog_file); + $file_type = `/usr/bin/file -L $escaped_prog_file 2>$dev_null || + /usr/bin/file $escaped_prog_file`; + } elsif ($^O == "MSWin32") { + $file_type = "MS Windows"; + } else { + print STDERR "WARNING: Can't determine the file type of $prog_file"; + } + if ($file_type =~ /64-bit/) { # Change $address_length to 16 if the program file is ELF 64-bit. # We can't detect this from many (most?) heap or lock contention @@ -4500,6 +4925,19 @@ sub ConfigureTool { return $path; } +sub ShellEscape { + my @escaped_words = (); + foreach my $word (@_) { + my $escaped_word = $word; + if ($word =~ m![^a-zA-Z0-9/.,_=-]!) { # check for anything not in whitelist + $escaped_word =~ s/'/'\\''/; + $escaped_word = "'$escaped_word'"; + } + push(@escaped_words, $escaped_word); + } + return join(" ", @escaped_words); +} + sub cleanup { unlink($main::tmpfile_sym); unlink(keys %main::tempnames); @@ -4537,11 +4975,11 @@ sub error { # names match "$regexp" and returns them in a hashtable mapping from # procedure name to a two-element vector of [start address, end address] sub GetProcedureBoundariesViaNm { - my $nm_command = shift; + my $escaped_nm_command = shift; # shell-escaped my $regexp = shift; my $symbol_table = {}; - open(NM, "$nm_command |") || error("$nm_command: $!\n"); + open(NM, "$escaped_nm_command |") || error("$escaped_nm_command: $!\n"); my $last_start = "0"; my $routine = ""; while () { @@ -4619,6 +5057,21 @@ sub GetProcedureBoundaries { my $image = shift; my $regexp = shift; + # If $image doesn't start with /, then put ./ in front of it. This works + # around an obnoxious bug in our probing of nm -f behavior. + # "nm -f $image" is supposed to fail on GNU nm, but if: + # + # a. $image starts with [BbSsPp] (for example, bin/foo/bar), AND + # b. you have a.out in your current directory (a not uncommon occurence) + # + # then "nm -f $image" succeeds because -f only looks at the first letter of + # the argument, which looks valid because it's [BbSsPp], and then since + # there's no image provided, it looks for a.out and finds it. + # + # This regex makes sure that $image starts with . or /, forcing the -f + # parsing to fail since . and / are not valid formats. + $image =~ s#^[^/]#./$&#; + # For libc libraries, the copy in /usr/lib/debug contains debugging symbols my $debugging = DebuggingLibrary($image); if ($debugging) { @@ -4636,28 +5089,29 @@ sub GetProcedureBoundaries { # --demangle and -f. my $demangle_flag = ""; my $cppfilt_flag = ""; - if (system("$nm --demangle $image >/dev/null 2>&1") == 0) { + my $to_devnull = ">$dev_null 2>&1"; + if (system(ShellEscape($nm, "--demangle", "image") . $to_devnull) == 0) { # In this mode, we do "nm --demangle " $demangle_flag = "--demangle"; $cppfilt_flag = ""; - } elsif (system("$cppfilt $image >/dev/null 2>&1") == 0) { + } elsif (system(ShellEscape($cppfilt, $image) . $to_devnull) == 0) { # In this mode, we do "nm | c++filt" - $cppfilt_flag = " | $cppfilt"; + $cppfilt_flag = " | " . ShellEscape($cppfilt); }; my $flatten_flag = ""; - if (system("$nm -f $image >/dev/null 2>&1") == 0) { + if (system(ShellEscape($nm, "-f", $image) . $to_devnull) == 0) { $flatten_flag = "-f"; } # Finally, in the case $imagie isn't a debug library, we try again with # -D to at least get *exported* symbols. If we can't use --demangle, # we use c++filt instead, if it exists on this system. - my @nm_commands = ("$nm -n $flatten_flag $demangle_flag" . - " $image 2>/dev/null $cppfilt_flag", - "$nm -D -n $flatten_flag $demangle_flag" . - " $image 2>/dev/null $cppfilt_flag", + my @nm_commands = (ShellEscape($nm, "-n", $flatten_flag, $demangle_flag, + $image) . " 2>$dev_null $cppfilt_flag", + ShellEscape($nm, "-D", "-n", $flatten_flag, $demangle_flag, + $image) . " 2>$dev_null $cppfilt_flag", # 6nm is for Go binaries - "6nm $image 2>/dev/null | sort", + ShellEscape("6nm", "$image") . " 2>$dev_null | sort", ); # If the executable is an MS Windows PDB-format executable, we'll @@ -4665,8 +5119,9 @@ sub GetProcedureBoundaries { # want to use both unix nm and windows-specific nm_pdb, since # PDB-format executables can apparently include dwarf .o files. if (exists $obj_tool_map{"nm_pdb"}) { - my $nm_pdb = $obj_tool_map{"nm_pdb"}; - push(@nm_commands, "$nm_pdb --demangle $image 2>/dev/null"); + push(@nm_commands, + ShellEscape($obj_tool_map{"nm_pdb"}, "--demangle", $image) + . " 2>$dev_null"); } foreach my $nm_command (@nm_commands) { diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 98d0ba41..3a6781b3 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -1006,7 +1006,7 @@ malloc_conf = "xmalloc:true";]]> option for information on high-water-triggered profile dumping. Profile output is compatible with the included pprof Perl script, which originates from the google-perftools + url="http://code.google.com/p/gperftools/">gperftools package.
From 0b25fe79aaf8840a5acda7e3160a053d42349872 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 17 Apr 2012 16:39:33 -0700 Subject: [PATCH 0162/3378] Update prof defaults to match common usage. Change the "opt.lg_prof_sample" default from 0 to 19 (1 B to 512 KiB). Change the "opt.prof_accum" default from true to false. Add the "opt.prof_final" mallctl, so that "opt.prof_prefix" need not be abused to disable final profile dumping. --- ChangeLog | 3 +++ doc/jemalloc.xml.in | 45 ++++++++++++++++++++------------ include/jemalloc/internal/prof.h | 3 ++- src/ctl.c | 3 +++ src/jemalloc.c | 1 + src/prof.c | 5 ++-- src/stats.c | 1 + 7 files changed, 41 insertions(+), 20 deletions(-) diff --git a/ChangeLog b/ChangeLog index 1db47d8c..1fed914f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -34,6 +34,7 @@ found in the git revision history: - Add the --with-mangling option. - Add the --disable-experimental option. - Add the "thread.tcache.enabled" mallctl. + - Add the "opt.prof_final" mallctl. - Update pprof (from gperftools 2.0). Incompatible changes: @@ -42,6 +43,8 @@ found in the git revision history: - Disable lazy locking by default. - Rename the "tcache.flush" mallctl to "thread.tcache.flush". - Rename the "arenas.pagesize" mallctl to "arenas.page". + - Change the "opt.lg_prof_sample" default from 0 to 19 (1 B to 512 KiB). + - Change the "opt.prof_accum" default from true to false. Removed features: - Remove the swap feature, including the "config.swap", "swap.avail", diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 3a6781b3..f78f423c 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -986,14 +986,7 @@ malloc_conf = "xmalloc:true";]]> []
Memory profiling enabled/disabled. If enabled, profile - memory allocation activity, and use an - atexit - 3 function to dump final memory - usage to a file named according to the pattern - <prefix>.<pid>.<seq>.f.heap, - where <prefix> is controlled by the opt.prof_prefix - option. See the opt.prof_active option for on-the-fly activation/deactivation. See the opt.lg_prof_sample @@ -1001,12 +994,13 @@ malloc_conf = "xmalloc:true";]]> linkend="opt.prof_accum">opt.prof_accum option for control of cumulative sample reporting. See the opt.lg_prof_interval - option for information on interval-triggered profile dumping, and the - opt.prof_gdump - option for information on high-water-triggered profile dumping. - Profile output is compatible with the included pprof - Perl script, which originates from the gperftools + option for information on interval-triggered profile dumping, the opt.prof_gdump + option for information on high-water-triggered profile dumping, and the + opt.prof_final + option for final profile dumping. Profile output is compatible with + the included pprof Perl script, which originates + from the gperftools package.
@@ -1051,8 +1045,8 @@ malloc_conf = "xmalloc:true";]]> Average interval (log base 2) between allocation samples, as measured in bytes of allocation activity. Increasing the sampling interval decreases profile fidelity, but also decreases the - computational overhead. The default sample interval is 1 (2^0) (i.e. - all allocations are sampled). + computational overhead. The default sample interval is 512 KiB (2^19 + B). @@ -1066,7 +1060,7 @@ malloc_conf = "xmalloc:true";]]> dumps enabled/disabled. If this option is enabled, every unique backtrace must be stored for the duration of execution. Depending on the application, this can impose a large memory overhead, and the - cumulative counts are not always of interest. This option is enabled + cumulative counts are not always of interest. This option is disabled by default. @@ -1107,6 +1101,23 @@ malloc_conf = "xmalloc:true";]]> option. This option is disabled by default. + + + opt.prof_final + (bool) + r- + [] + + Use an + atexit + 3 function to dump final memory + usage to a file named according to the pattern + <prefix>.<pid>.<seq>.f.heap, + where <prefix> is controlled by the opt.prof_prefix + option. This option is enabled by default. + + opt.prof_leak diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index a37bb448..a4c563cc 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -9,7 +9,7 @@ typedef struct prof_tdata_s prof_tdata_t; /* Option defaults. */ #define PROF_PREFIX_DEFAULT "jeprof" -#define LG_PROF_SAMPLE_DEFAULT 0 +#define LG_PROF_SAMPLE_DEFAULT 19 #define LG_PROF_INTERVAL_DEFAULT -1 /* @@ -169,6 +169,7 @@ extern bool opt_prof_active; extern size_t opt_lg_prof_sample; /* Mean bytes between samples. */ extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */ extern bool opt_prof_gdump; /* High-water memory dumping. */ +extern bool opt_prof_final; /* Final profile dumping. */ extern bool opt_prof_leak; /* Dump leak summary at exit. */ extern bool opt_prof_accum; /* Report cumulative bytes. */ extern char opt_prof_prefix[PATH_MAX + 1]; diff --git a/src/ctl.c b/src/ctl.c index a6a02cc5..98ea3d1c 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -80,6 +80,7 @@ CTL_PROTO(opt_prof_active) CTL_PROTO(opt_lg_prof_sample) CTL_PROTO(opt_lg_prof_interval) CTL_PROTO(opt_prof_gdump) +CTL_PROTO(opt_prof_final) CTL_PROTO(opt_prof_leak) CTL_PROTO(opt_prof_accum) CTL_PROTO(arenas_bin_i_size) @@ -210,6 +211,7 @@ static const ctl_node_t opt_node[] = { {NAME("lg_prof_sample"), CTL(opt_lg_prof_sample)}, {NAME("lg_prof_interval"), CTL(opt_lg_prof_interval)}, {NAME("prof_gdump"), CTL(opt_prof_gdump)}, + {NAME("prof_final"), CTL(opt_prof_final)}, {NAME("prof_leak"), CTL(opt_prof_leak)}, {NAME("prof_accum"), CTL(opt_prof_accum)} }; @@ -1122,6 +1124,7 @@ CTL_RO_CGEN(config_prof, opt_prof_active, opt_prof_active, bool) /* Mutable. */ CTL_RO_NL_CGEN(config_prof, opt_lg_prof_sample, opt_lg_prof_sample, size_t) CTL_RO_NL_CGEN(config_prof, opt_lg_prof_interval, opt_lg_prof_interval, ssize_t) CTL_RO_NL_CGEN(config_prof, opt_prof_gdump, opt_prof_gdump, bool) +CTL_RO_NL_CGEN(config_prof, opt_prof_final, opt_prof_final, bool) CTL_RO_NL_CGEN(config_prof, opt_prof_leak, opt_prof_leak, bool) CTL_RO_NL_CGEN(config_prof, opt_prof_accum, opt_prof_accum, bool) diff --git a/src/jemalloc.c b/src/jemalloc.c index 0decd8a8..d4b681b4 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -561,6 +561,7 @@ malloc_conf_init(void) lg_prof_interval, -1, (sizeof(uint64_t) << 3) - 1) CONF_HANDLE_BOOL(opt_prof_gdump, prof_gdump) + CONF_HANDLE_BOOL(opt_prof_final, prof_final) CONF_HANDLE_BOOL(opt_prof_leak, prof_leak) } malloc_conf_error("Invalid conf pair", k, klen, v, diff --git a/src/prof.c b/src/prof.c index b509aaef..227560b8 100644 --- a/src/prof.c +++ b/src/prof.c @@ -21,8 +21,9 @@ bool opt_prof_active = true; size_t opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT; ssize_t opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT; bool opt_prof_gdump = false; +bool opt_prof_final = true; bool opt_prof_leak = false; -bool opt_prof_accum = true; +bool opt_prof_accum = false; char opt_prof_prefix[PATH_MAX + 1]; uint64_t prof_interval; @@ -944,7 +945,7 @@ prof_fdump(void) if (prof_booted == false) return; - if (opt_prof_prefix[0] != '\0') { + if (opt_prof_final && opt_prof_prefix[0] != '\0') { malloc_mutex_lock(&prof_dump_seq_mtx); prof_dump_filename(filename, 'f', UINT64_C(0xffffffffffffffff)); malloc_mutex_unlock(&prof_dump_seq_mtx); diff --git a/src/stats.c b/src/stats.c index 4cad214f..08f7098c 100644 --- a/src/stats.c +++ b/src/stats.c @@ -397,6 +397,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, OPT_WRITE_BOOL(prof_accum) OPT_WRITE_SSIZE_T(lg_prof_interval) OPT_WRITE_BOOL(prof_gdump) + OPT_WRITE_BOOL(prof_final) OPT_WRITE_BOOL(prof_leak) #undef OPT_WRITE_BOOL From 78f7352259768f670f8e1f9b000388dd32b62493 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 18 Apr 2012 13:38:40 -0700 Subject: [PATCH 0163/3378] Clean up a few config-related conditionals/asserts. Clean up a few config-related conditionals to avoid unnecessary dependencies on prof symbols. Use cassert() rather than assert() everywhere that it's appropriate. --- src/arena.c | 10 ++++++---- src/quarantine.c | 4 ++-- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/arena.c b/src/arena.c index 989034d4..0f15562a 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1213,7 +1213,9 @@ void arena_prof_accum(arena_t *arena, uint64_t accumbytes) { - if (prof_interval != 0) { + cassert(config_prof); + + if (config_prof && prof_interval != 0) { arena->prof_accumbytes += accumbytes; if (arena->prof_accumbytes >= prof_interval) { prof_idump(); @@ -1490,8 +1492,8 @@ arena_salloc(const void *ptr, bool demote) } else { assert(((uintptr_t)ptr & PAGE_MASK) == 0); ret = mapbits & ~PAGE_MASK; - if (demote && prof_promote && ret == PAGE && (mapbits & - CHUNK_MAP_CLASS_MASK) != 0) { + if (config_prof && demote && prof_promote && ret == PAGE && + (mapbits & CHUNK_MAP_CLASS_MASK) != 0) { size_t binind = ((mapbits & CHUNK_MAP_CLASS_MASK) >> CHUNK_MAP_CLASS_SHIFT) - 1; assert(binind < NBINS); @@ -1509,7 +1511,7 @@ arena_prof_promoted(const void *ptr, size_t size) arena_chunk_t *chunk; size_t pageind, binind; - assert(config_prof); + cassert(config_prof); assert(ptr != NULL); assert(CHUNK_ADDR2BASE(ptr) != ptr); assert(isalloc(ptr, false) == PAGE); diff --git a/src/quarantine.c b/src/quarantine.c index 89a25c6a..5fb6c390 100644 --- a/src/quarantine.c +++ b/src/quarantine.c @@ -101,7 +101,7 @@ quarantine(void *ptr) quarantine_t *quarantine; size_t usize = isalloc(ptr, config_prof); - assert(config_fill); + cassert(config_fill); assert(opt_quarantine); quarantine = *quarantine_tsd_get(); @@ -154,7 +154,7 @@ bool quarantine_boot(void) { - assert(config_fill); + cassert(config_fill); if (quarantine_tsd_boot()) return (true); From 85221d5d75be26ce8941cc08a798e69ecdd0a57c Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Wed, 18 Apr 2012 18:29:40 +0200 Subject: [PATCH 0164/3378] Make versioned shared library suffix configurable This allows for different patterns for file names: - lib.so.version for e.g. Linux - lib.version.dylib for OSX (which is much more common than lib.dylib.version) - lib.dll for Windows (no version at all). --- Makefile.in | 21 ++++++++++++++------- bin/jemalloc.sh.in | 2 +- configure.ac | 3 +++ 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/Makefile.in b/Makefile.in index 0dfddea0..d426cbdb 100644 --- a/Makefile.in +++ b/Makefile.in @@ -47,6 +47,7 @@ cfgoutputs_out := @cfgoutputs_out@ enable_autogen := @enable_autogen@ enable_experimental := @enable_experimental@ DSO_LDFLAGS = @DSO_LDFLAGS@ +SOREV = @SOREV@ ifeq (macho, $(ABI)) TEST_LIBRARY_PATH := DYLD_FALLBACK_LIBRARY_PATH=$(objroot)lib @@ -72,9 +73,11 @@ ifeq (macho, $(ABI)) CSRCS += $(srcroot)src/zone.c endif STATIC_LIBS := $(objroot)lib/$(LIBJEMALLOC).$(A) -DSOS := $(objroot)lib/$(LIBJEMALLOC).$(SO).$(REV) \ - $(objroot)lib/$(LIBJEMALLOC).$(SO) \ - $(objroot)lib/$(LIBJEMALLOC)_pic.$(A) +DSOS := $(objroot)lib/$(LIBJEMALLOC)_pic.$(A) \ + $(objroot)lib/$(LIBJEMALLOC).$(SOREV) +ifneq ($(SOREV),$(SO)) +DSOS += $(objroot)lib/$(LIBJEMALLOC).$(SO) +endif MAN3 := $(objroot)doc/jemalloc$(install_suffix).3 DOCS_XML := $(objroot)doc/jemalloc$(install_suffix).xml DOCS_HTML := $(DOCS_XML:$(objroot)%.xml=$(srcroot)%.html) @@ -127,11 +130,13 @@ $(objroot)src/%.pic.$(O): $(srcroot)src/%.c $(CC) $(CFLAGS) -fPIC -DPIC -c $(CPPFLAGS) -o $@ $< @$(CC) -MM $(CPPFLAGS) -MT $@ -o $(@:%.$(O)=%.d) $< -%.$(SO) : %.$(SO).$(REV) +ifneq ($(SOREV),$(SO)) +%.$(SO) : %.$(SOREV) @mkdir -p $(@D) ln -sf $( Date: Wed, 18 Apr 2012 18:29:41 +0200 Subject: [PATCH 0165/3378] Refactor object and library build, and only build PIC libraries when PIC_CFLAGS is defined --- Makefile.in | 75 +++++++++++++++++++++++++++------------------------- configure.ac | 2 ++ 2 files changed, 41 insertions(+), 36 deletions(-) diff --git a/Makefile.in b/Makefile.in index d426cbdb..8a34928b 100644 --- a/Makefile.in +++ b/Makefile.in @@ -48,6 +48,7 @@ enable_autogen := @enable_autogen@ enable_experimental := @enable_experimental@ DSO_LDFLAGS = @DSO_LDFLAGS@ SOREV = @SOREV@ +PIC_CFLAGS = @PIC_CFLAGS@ ifeq (macho, $(ABI)) TEST_LIBRARY_PATH := DYLD_FALLBACK_LIBRARY_PATH=$(objroot)lib @@ -73,8 +74,10 @@ ifeq (macho, $(ABI)) CSRCS += $(srcroot)src/zone.c endif STATIC_LIBS := $(objroot)lib/$(LIBJEMALLOC).$(A) -DSOS := $(objroot)lib/$(LIBJEMALLOC)_pic.$(A) \ - $(objroot)lib/$(LIBJEMALLOC).$(SOREV) +ifdef PIC_CFLAGS +STATIC_LIBS += $(objroot)lib/$(LIBJEMALLOC)_pic.$(A) +endif +DSOS := $(objroot)lib/$(LIBJEMALLOC).$(SOREV) ifneq ($(SOREV),$(SO)) DSOS += $(objroot)lib/$(LIBJEMALLOC).$(SO) endif @@ -91,12 +94,16 @@ ifeq ($(enable_experimental), 1) CTESTS += $(srcroot)test/allocm.c $(srcroot)test/rallocm.c endif +COBJS := $(CSRCS:$(srcroot)%.c=$(objroot)%.$(O)) +CPICOBJS := $(CSRCS:$(srcroot)%.c=$(objroot)%.pic.$(O)) +CTESTOBJS := $(CTESTS:$(srcroot)%.c=$(objroot)%.$(O)) + .PHONY: all dist doc_html doc_man doc .PHONY: install_bin install_include install_lib .PHONY: install_html install_man install_doc install .PHONY: tests check clean distclean relclean -.SECONDARY : $(CTESTS:$(srcroot)%.c=$(objroot)%.$(O)) +.SECONDARY : $(CTESTOBJS) # Default target. all: build @@ -116,47 +123,41 @@ build_doc: $(DOCS) # # Include generated dependency files. # --include $(CSRCS:$(srcroot)%.c=$(objroot)%.d) --include $(CSRCS:$(srcroot)%.c=$(objroot)%.pic.d) --include $(CTESTS:$(srcroot)%.c=$(objroot)%.d) +-include $(COBJS:%.$(O)=%.d) +-include $(CPICOBJS:%.$(O)=%.d) +-include $(CTESTOBJS:%.$(O)=%.d) -$(objroot)src/%.$(O): $(srcroot)src/%.c +$(COBJS): $(objroot)src/%.$(O): $(srcroot)src/%.c +$(CPICOBJS): $(objroot)src/%.pic.$(O): $(srcroot)src/%.c +$(CPICOBJS): CFLAGS += $(PIC_CFLAGS) +$(CTESTOBJS): $(objroot)test/%.$(O): $(srcroot)test/%.c +$(CTESTOBJS): CPPFLAGS += -I$(objroot)test + +$(COBJS) $(CPICOBJS) $(CTESTOBJS): %.$(O): @mkdir -p $(@D) $(CC) $(CFLAGS) -c $(CPPFLAGS) -o $@ $< @$(CC) -MM $(CPPFLAGS) -MT $@ -o $(@:%.$(O)=%.d) $< -$(objroot)src/%.pic.$(O): $(srcroot)src/%.c - @mkdir -p $(@D) - $(CC) $(CFLAGS) -fPIC -DPIC -c $(CPPFLAGS) -o $@ $< - @$(CC) -MM $(CPPFLAGS) -MT $@ -o $(@:%.$(O)=%.d) $< - ifneq ($(SOREV),$(SO)) %.$(SO) : %.$(SOREV) @mkdir -p $(@D) ln -sf $( Date: Wed, 18 Apr 2012 18:29:42 +0200 Subject: [PATCH 0166/3378] Add an abstraction layer for threading in tests --- test/allocated.c | 30 +++++++++--------------------- test/jemalloc_test.h.in | 22 ++++++++++++++++++++++ test/thread_arena.c | 18 +++++------------- test/thread_tcache_enabled.c | 30 +++++++++--------------------- 4 files changed, 45 insertions(+), 55 deletions(-) diff --git a/test/allocated.c b/test/allocated.c index 81cd4ca9..00039ed8 100644 --- a/test/allocated.c +++ b/test/allocated.c @@ -2,7 +2,6 @@ #include #include #include -#include #include #include #include @@ -11,7 +10,7 @@ #include "jemalloc_test.h" void * -thread_start(void *arg) +je_thread_start(void *arg) { int err; void *p; @@ -106,33 +105,22 @@ int main(void) { int ret = 0; - pthread_t thread; + je_thread_t thread; malloc_printf("Test begin\n"); - thread_start(NULL); + je_thread_start(NULL); - if (pthread_create(&thread, NULL, thread_start, NULL) - != 0) { - malloc_printf("%s(): Error in pthread_create()\n", __func__); - ret = 1; - goto label_return; - } - pthread_join(thread, (void *)&ret); + je_thread_create(&thread, je_thread_start, NULL); + je_thread_join(thread, (void *)&ret); - thread_start(NULL); + je_thread_start(NULL); - if (pthread_create(&thread, NULL, thread_start, NULL) - != 0) { - malloc_printf("%s(): Error in pthread_create()\n", __func__); - ret = 1; - goto label_return; - } - pthread_join(thread, (void *)&ret); + je_thread_create(&thread, je_thread_start, NULL); + je_thread_join(thread, (void *)&ret); - thread_start(NULL); + je_thread_start(NULL); -label_return: malloc_printf("Test end\n"); return (ret); } diff --git a/test/jemalloc_test.h.in b/test/jemalloc_test.h.in index 58fa08e4..8833a03e 100644 --- a/test/jemalloc_test.h.in +++ b/test/jemalloc_test.h.in @@ -5,3 +5,25 @@ */ #include "jemalloc/jemalloc@install_suffix@.h" #include "jemalloc/internal/jemalloc_internal.h" + +/* Abstraction layer for threading in tests */ +#include + +typedef pthread_t je_thread_t; + +void +je_thread_create(je_thread_t *thread, void *(*proc)(void *), void *arg) +{ + + if (pthread_create(thread, NULL, proc, arg) != 0) { + malloc_printf("Error in pthread_create()\n"); + exit(1); + } +} + +void +je_thread_join(je_thread_t thread, void **ret) +{ + + pthread_join(thread, ret); +} diff --git a/test/thread_arena.c b/test/thread_arena.c index e443b712..98354282 100644 --- a/test/thread_arena.c +++ b/test/thread_arena.c @@ -1,6 +1,5 @@ #include #include -#include #include #include @@ -10,7 +9,7 @@ #define NTHREADS 10 void * -thread_start(void *arg) +je_thread_start(void *arg) { unsigned main_arena_ind = *(unsigned *)arg; void *p; @@ -52,7 +51,7 @@ main(void) unsigned arena_ind; size_t size; int err; - pthread_t threads[NTHREADS]; + je_thread_t threads[NTHREADS]; unsigned i; malloc_printf("Test begin\n"); @@ -72,18 +71,11 @@ main(void) goto label_return; } - for (i = 0; i < NTHREADS; i++) { - if (pthread_create(&threads[i], NULL, thread_start, - (void *)&arena_ind) != 0) { - malloc_printf("%s(): Error in pthread_create()\n", - __func__); - ret = 1; - goto label_return; - } - } + for (i = 0; i < NTHREADS; i++) + je_thread_create(&threads[i], je_thread_start, (void *)&arena_ind); for (i = 0; i < NTHREADS; i++) - pthread_join(threads[i], (void *)&ret); + je_thread_join(threads[i], (void *)&ret); label_return: malloc_printf("Test end\n"); diff --git a/test/thread_tcache_enabled.c b/test/thread_tcache_enabled.c index 59b76a27..9f765841 100644 --- a/test/thread_tcache_enabled.c +++ b/test/thread_tcache_enabled.c @@ -1,7 +1,6 @@ #include #include #include -#include #include #include @@ -9,7 +8,7 @@ #include "jemalloc_test.h" void * -thread_start(void *arg) +je_thread_start(void *arg) { int err; size_t sz; @@ -77,33 +76,22 @@ int main(void) { int ret = 0; - pthread_t thread; + je_thread_t thread; malloc_printf("Test begin\n"); - thread_start(NULL); + je_thread_start(NULL); - if (pthread_create(&thread, NULL, thread_start, NULL) - != 0) { - malloc_printf("%s(): Error in pthread_create()\n", __func__); - ret = 1; - goto label_return; - } - pthread_join(thread, (void *)&ret); + je_thread_create(&thread, je_thread_start, NULL); + je_thread_join(thread, (void *)&ret); - thread_start(NULL); + je_thread_start(NULL); - if (pthread_create(&thread, NULL, thread_start, NULL) - != 0) { - malloc_printf("%s(): Error in pthread_create()\n", __func__); - ret = 1; - goto label_return; - } - pthread_join(thread, (void *)&ret); + je_thread_create(&thread, je_thread_start, NULL); + je_thread_join(thread, (void *)&ret); - thread_start(NULL); + je_thread_start(NULL); -label_return: malloc_printf("Test end\n"); return (ret); } From 666c5bf7a8baaa842da69cb402948411432a9d00 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Wed, 18 Apr 2012 18:29:43 +0200 Subject: [PATCH 0167/3378] Add a pages_purge function to wrap madvise(JEMALLOC_MADV_PURGE) calls This will be used to implement the feature on mingw, which doesn't have madvise. --- include/jemalloc/internal/chunk_mmap.h | 2 ++ include/jemalloc/internal/private_namespace.h | 1 + include/jemalloc/jemalloc_defs.h.in | 7 ------- src/arena.c | 4 ++-- src/chunk.c | 2 +- src/chunk_mmap.c | 14 ++++++++++++++ 6 files changed, 20 insertions(+), 10 deletions(-) diff --git a/include/jemalloc/internal/chunk_mmap.h b/include/jemalloc/internal/chunk_mmap.h index 04e86af9..2d01ac22 100644 --- a/include/jemalloc/internal/chunk_mmap.h +++ b/include/jemalloc/internal/chunk_mmap.h @@ -9,6 +9,8 @@ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS +void pages_purge(void *addr, size_t length); + void *chunk_alloc_mmap(size_t size, size_t alignment); bool chunk_dealloc_mmap(void *chunk, size_t size); diff --git a/include/jemalloc/internal/private_namespace.h b/include/jemalloc/internal/private_namespace.h index a69482b6..a2171ed2 100644 --- a/include/jemalloc/internal/private_namespace.h +++ b/include/jemalloc/internal/private_namespace.h @@ -196,6 +196,7 @@ #define opt_xmalloc JEMALLOC_N(opt_xmalloc) #define opt_zero JEMALLOC_N(opt_zero) #define p2rz JEMALLOC_N(p2rz) +#define pages_purge JEMALLOC_N(pages_purge) #define pow2_ceil JEMALLOC_N(pow2_ceil) #define prof_backtrace JEMALLOC_N(prof_backtrace) #define prof_boot0 JEMALLOC_N(prof_boot0) diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index 90baa355..6e816557 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -220,13 +220,6 @@ */ #undef JEMALLOC_PURGE_MADVISE_DONTNEED #undef JEMALLOC_PURGE_MADVISE_FREE -#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED -# define JEMALLOC_MADV_PURGE MADV_DONTNEED -#elif defined(JEMALLOC_PURGE_MADVISE_FREE) -# define JEMALLOC_MADV_PURGE MADV_FREE -#else -# error "No method defined for purging unused dirty pages." -#endif /* sizeof(void *) == 2^LG_SIZEOF_PTR. */ #undef LG_SIZEOF_PTR diff --git a/src/arena.c b/src/arena.c index 0f15562a..ed47824a 100644 --- a/src/arena.c +++ b/src/arena.c @@ -676,8 +676,8 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) if (config_debug) ndirty -= npages; - madvise((void *)((uintptr_t)chunk + (pageind << LG_PAGE)), - (npages << LG_PAGE), JEMALLOC_MADV_PURGE); + pages_purge((void *)((uintptr_t)chunk + (pageind << LG_PAGE)), + (npages << LG_PAGE)); if (config_stats) nmadvise++; } diff --git a/src/chunk.c b/src/chunk.c index 67e0d503..bcaedea4 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -171,7 +171,7 @@ chunk_record(void *chunk, size_t size) { extent_node_t *xnode, *node, *prev, key; - madvise(chunk, size, JEMALLOC_MADV_PURGE); + pages_purge(chunk, size); xnode = NULL; malloc_mutex_lock(&chunks_mtx); diff --git a/src/chunk_mmap.c b/src/chunk_mmap.c index e11cc0e6..9dea8318 100644 --- a/src/chunk_mmap.c +++ b/src/chunk_mmap.c @@ -72,6 +72,20 @@ pages_unmap(void *addr, size_t size) } } +void +pages_purge(void *addr, size_t length) +{ + +#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED +# define JEMALLOC_MADV_PURGE MADV_DONTNEED +#elif defined(JEMALLOC_PURGE_MADVISE_FREE) +# define JEMALLOC_MADV_PURGE MADV_FREE +#else +# error "No method defined for purging unused dirty pages." +#endif + madvise(addr, length, JEMALLOC_MADV_PURGE); +} + static void * chunk_alloc_mmap_slow(size_t size, size_t alignment, bool unaligned) { From 1ad56385adc40cfbca1b14c240a9c647135ac641 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Wed, 18 Apr 2012 18:29:44 +0200 Subject: [PATCH 0168/3378] Fix malloc_vsnprintf handling of %o, %u and %x These flags take unsigned values, but they were fed with signed values taken with va_arg, and that led to sign extension in cases where the corresponding value has the most significant bit set. --- src/util.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/src/util.c b/src/util.c index 2aab61fe..99ae26dd 100644 --- a/src/util.c +++ b/src/util.c @@ -320,12 +320,21 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) case '?': \ val = va_arg(ap, int); \ break; \ + case '?' | 0x80: \ + val = va_arg(ap, unsigned int); \ + break; \ case 'l': \ val = va_arg(ap, long); \ break; \ + case 'l' | 0x80: \ + val = va_arg(ap, unsigned long); \ + break; \ case 'q': \ val = va_arg(ap, long long); \ break; \ + case 'q' | 0x80: \ + val = va_arg(ap, unsigned long long); \ + break; \ case 'j': \ val = va_arg(ap, intmax_t); \ break; \ @@ -335,6 +344,9 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) case 'z': \ val = va_arg(ap, ssize_t); \ break; \ + case 'z' | 0x80: \ + val = va_arg(ap, size_t); \ + break; \ case 'p': /* Synthetic; used for %p. */ \ val = va_arg(ap, uintptr_t); \ break; \ @@ -358,7 +370,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) bool plus_plus = false; int prec = -1; int width = -1; - char len = '?'; + unsigned char len = '?'; f++; if (*f == '%') { @@ -480,7 +492,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) uintmax_t val JEMALLOC_CC_SILENCE_INIT(0); char buf[O2S_BUFSIZE]; - GET_ARG_NUMERIC(val, len); + GET_ARG_NUMERIC(val, len | 0x80); s = o2s(val, alt_form, buf, &slen); APPEND_PADDED_S(s, slen, width, left_justify); f++; @@ -489,7 +501,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) uintmax_t val JEMALLOC_CC_SILENCE_INIT(0); char buf[U2S_BUFSIZE]; - GET_ARG_NUMERIC(val, len); + GET_ARG_NUMERIC(val, len | 0x80); s = u2s(val, 10, false, buf, &slen); APPEND_PADDED_S(s, slen, width, left_justify); f++; @@ -498,7 +510,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) uintmax_t val JEMALLOC_CC_SILENCE_INIT(0); char buf[X2S_BUFSIZE]; - GET_ARG_NUMERIC(val, len); + GET_ARG_NUMERIC(val, len | 0x80); s = x2s(val, alt_form, *f == 'X', buf, &slen); APPEND_PADDED_S(s, slen, width, left_justify); f++; From 86e58583bb443fcfe885a1a96b466ab5933cb443 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 18 Apr 2012 19:01:00 -0700 Subject: [PATCH 0169/3378] Make special FreeBSD function overrides visible. Make special FreeBSD libc/libthr function overrides for _malloc_prefork(), _malloc_postfork(), and _malloc_thread_cleanup() visible. --- include/jemalloc/internal/private_namespace.h | 3 +++ src/jemalloc.c | 2 ++ src/tsd.c | 1 + 3 files changed, 6 insertions(+) diff --git a/include/jemalloc/internal/private_namespace.h b/include/jemalloc/internal/private_namespace.h index a2171ed2..b1e8330e 100644 --- a/include/jemalloc/internal/private_namespace.h +++ b/include/jemalloc/internal/private_namespace.h @@ -1,3 +1,6 @@ +#define a0calloc JEMALLOC_N(a0calloc) +#define a0free JEMALLOC_N(a0free) +#define a0malloc JEMALLOC_N(a0malloc) #define arena_alloc_junk_small JEMALLOC_N(arena_alloc_junk_small) #define arena_bin_index JEMALLOC_N(arena_bin_index) #define arena_boot JEMALLOC_N(arena_boot) diff --git a/src/jemalloc.c b/src/jemalloc.c index d4b681b4..6669c110 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1621,6 +1621,7 @@ je_nallocm(size_t *rsize, size_t size, int flags) void jemalloc_prefork(void) #else +JEMALLOC_ATTR(visibility("default")) void _malloc_prefork(void) #endif @@ -1642,6 +1643,7 @@ _malloc_prefork(void) void jemalloc_postfork_parent(void) #else +JEMALLOC_ATTR(visibility("default")) void _malloc_postfork(void) #endif diff --git a/src/tsd.c b/src/tsd.c index 0838dc86..f63493d3 100644 --- a/src/tsd.c +++ b/src/tsd.c @@ -32,6 +32,7 @@ malloc_tsd_no_cleanup(void *arg) } #ifdef JEMALLOC_MALLOC_THREAD_CLEANUP +JEMALLOC_ATTR(visibility("default")) void _malloc_thread_cleanup(void) { From 7ff1ce4131651fea1df7b1c010d71667bc574816 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Wed, 18 Apr 2012 18:29:47 +0200 Subject: [PATCH 0170/3378] Initialize all members of non-TLS tsd wrapper when creating it Not setting the initialized member leads to randomly calling the cleanup function in cases it shouldn't be called (and isn't called in other implementations). --- include/jemalloc/internal/tsd.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h index 35ae5e3c..3f953f9a 100644 --- a/include/jemalloc/internal/tsd.h +++ b/include/jemalloc/internal/tsd.h @@ -253,6 +253,7 @@ a_name##_tsd_get_wrapper(void) \ } else { \ static a_type tsd_static_data = a_initializer; \ wrapper->isstatic = false; \ + wrapper->initialized = false; \ wrapper->val = tsd_static_data; \ } \ if (pthread_setspecific(a_name##_tsd, \ From 8ad483fe60a803acdbd403d88bb30b548ee1b5f9 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Wed, 18 Apr 2012 18:29:48 +0200 Subject: [PATCH 0171/3378] Remove initialization of the non-TLS tsd wrapper from static memory Using static memory when malloc_tsd_malloc fails means all threads share the same wrapper and thus the same wrapped value. This defeats the purpose of TSD. --- include/jemalloc/internal/tsd.h | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h index 3f953f9a..5888b377 100644 --- a/include/jemalloc/internal/tsd.h +++ b/include/jemalloc/internal/tsd.h @@ -192,7 +192,6 @@ a_name##_tsd_set(a_type *val) \ a_cleanup) \ /* Data structure. */ \ typedef struct { \ - bool isstatic; \ bool initialized; \ a_type val; \ } a_name##_tsd_wrapper_t; \ @@ -218,8 +217,7 @@ a_name##_tsd_cleanup_wrapper(void *arg) \ return; \ } \ } \ - if (wrapper->isstatic == false) \ - malloc_tsd_dalloc(wrapper); \ + malloc_tsd_dalloc(wrapper); \ } \ a_attr bool \ a_name##_tsd_boot(void) \ @@ -242,17 +240,11 @@ a_name##_tsd_get_wrapper(void) \ wrapper = (a_name##_tsd_wrapper_t *) \ malloc_tsd_malloc(sizeof(a_name##_tsd_wrapper_t)); \ if (wrapper == NULL) { \ - static a_name##_tsd_wrapper_t \ - a_name##_tsd_static_data = \ - {true, false, a_initializer}; \ malloc_write(": Error allocating" \ " TSD for "#a_name"\n"); \ - if (opt_abort) \ - abort(); \ - wrapper = &a_name##_tsd_static_data; \ + abort(); \ } else { \ static a_type tsd_static_data = a_initializer; \ - wrapper->isstatic = false; \ wrapper->initialized = false; \ wrapper->val = tsd_static_data; \ } \ @@ -260,8 +252,7 @@ a_name##_tsd_get_wrapper(void) \ (void *)wrapper)) { \ malloc_write(": Error setting" \ " TSD for "#a_name"\n"); \ - if (opt_abort) \ - abort(); \ + abort(); \ } \ } \ return (wrapper); \ From 13067ec8350f213c3accc2e5fb70ca5a503e0e17 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Wed, 18 Apr 2012 18:29:49 +0200 Subject: [PATCH 0172/3378] Remove extra argument for malloc_tsd_cleanup_register Bookkeeping an extra argument that actually only stores a function pointer for a function we already have is not very useful. --- include/jemalloc/internal/tsd.h | 15 +++++---------- src/tsd.c | 7 +++---- 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h index 5888b377..5e904cbb 100644 --- a/include/jemalloc/internal/tsd.h +++ b/include/jemalloc/internal/tsd.h @@ -4,11 +4,7 @@ /* Maximum number of malloc_tsd users with cleanup functions. */ #define MALLOC_TSD_CLEANUPS_MAX 8 -typedef struct malloc_tsd_cleanup_s malloc_tsd_cleanup_t; -struct malloc_tsd_cleanup_s { - bool (*f)(void *); - void *arg; -}; +typedef bool (*malloc_tsd_cleanup_t)(void); /* * TLS/TSD-agnostic macro-based implementation of thread-specific data. There @@ -110,13 +106,12 @@ a_attr bool a_name##_booted = false; a_cleanup) \ /* Initialization/cleanup. */ \ a_attr bool \ -a_name##_tsd_cleanup_wrapper(void *arg) \ +a_name##_tsd_cleanup_wrapper(void) \ { \ - bool (*cleanup)(void *) = arg; \ \ if (a_name##_initialized) { \ a_name##_initialized = false; \ - cleanup(&a_name##_tls); \ + a_cleanup(&a_name##_tls); \ } \ return (a_name##_initialized); \ } \ @@ -126,7 +121,7 @@ a_name##_tsd_boot(void) \ \ if (a_cleanup != malloc_tsd_no_cleanup) { \ malloc_tsd_cleanup_register( \ - &a_name##_tsd_cleanup_wrapper, a_cleanup); \ + &a_name##_tsd_cleanup_wrapper); \ } \ a_name##_booted = true; \ return (false); \ @@ -290,7 +285,7 @@ a_name##_tsd_set(a_type *val) \ void *malloc_tsd_malloc(size_t size); void malloc_tsd_dalloc(void *wrapper); void malloc_tsd_no_cleanup(void *); -void malloc_tsd_cleanup_register(bool (*f)(void *), void *arg); +void malloc_tsd_cleanup_register(bool (*f)(void)); void malloc_tsd_boot(void); #endif /* JEMALLOC_H_EXTERNS */ diff --git a/src/tsd.c b/src/tsd.c index f63493d3..281a2e9b 100644 --- a/src/tsd.c +++ b/src/tsd.c @@ -46,7 +46,7 @@ _malloc_thread_cleanup(void) again = false; for (i = 0; i < ncleanups; i++) { if (pending[i]) { - pending[i] = cleanups[i].f(cleanups[i].arg); + pending[i] = cleanups[i](); if (pending[i]) again = true; } @@ -56,12 +56,11 @@ _malloc_thread_cleanup(void) #endif void -malloc_tsd_cleanup_register(bool (*f)(void *), void *arg) +malloc_tsd_cleanup_register(bool (*f)(void)) { assert(ncleanups < MALLOC_TSD_CLEANUPS_MAX); - cleanups[ncleanups].f = f; - cleanups[ncleanups].arg = arg; + cleanups[ncleanups] = f; ncleanups++; } From f7088e6c992d079bc3162e0c48ed4dc5def6d263 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 19 Apr 2012 18:28:03 -0700 Subject: [PATCH 0173/3378] Make arena_salloc() an inline function. --- include/jemalloc/internal/arena.h | 42 ++++++++++++++++++- .../jemalloc/internal/jemalloc_internal.h.in | 5 +-- include/jemalloc/internal/tcache.h | 13 +++--- src/arena.c | 40 ------------------ src/tcache.c | 6 +++ 5 files changed, 56 insertions(+), 50 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 3790818c..2eb41cd9 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -408,7 +408,6 @@ void arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info); void *arena_malloc_small(arena_t *arena, size_t size, bool zero); void *arena_malloc_large(arena_t *arena, size_t size, bool zero); void *arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero); -size_t arena_salloc(const void *ptr, bool demote); void arena_prof_promoted(const void *ptr, size_t size); void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, arena_chunk_map_t *mapelm); @@ -437,6 +436,7 @@ unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, prof_ctx_t *arena_prof_ctx_get(const void *ptr); void arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx); void *arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache); +size_t arena_salloc(const void *ptr, bool demote); void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, bool try_tcache); #endif @@ -625,6 +625,46 @@ arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache) } } +/* Return the size of the allocation pointed to by ptr. */ +JEMALLOC_INLINE size_t +arena_salloc(const void *ptr, bool demote) +{ + size_t ret; + arena_chunk_t *chunk; + size_t pageind, mapbits; + + assert(ptr != NULL); + assert(CHUNK_ADDR2BASE(ptr) != ptr); + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; + mapbits = chunk->map[pageind-map_bias].bits; + assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); + if ((mapbits & CHUNK_MAP_LARGE) == 0) { + arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + + (uintptr_t)((pageind - (mapbits >> LG_PAGE)) << LG_PAGE)); + size_t binind = arena_bin_index(chunk->arena, run->bin); + arena_bin_info_t *bin_info = &arena_bin_info[binind]; + assert(((uintptr_t)ptr - ((uintptr_t)run + + (uintptr_t)bin_info->reg0_offset)) % bin_info->reg_interval + == 0); + ret = bin_info->reg_size; + } else { + assert(((uintptr_t)ptr & PAGE_MASK) == 0); + ret = mapbits & ~PAGE_MASK; + if (config_prof && demote && prof_promote && ret == PAGE && + (mapbits & CHUNK_MAP_CLASS_MASK) != 0) { + size_t binind = ((mapbits & CHUNK_MAP_CLASS_MASK) >> + CHUNK_MAP_CLASS_SHIFT) - 1; + assert(binind < NBINS); + ret = arena_bin_info[binind].reg_size; + } + assert(ret != 0); + } + + return (ret); +} + JEMALLOC_INLINE void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, bool try_tcache) { diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 905653a2..b61abe84 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -732,10 +732,9 @@ isalloc(const void *ptr, bool demote) assert(config_prof || demote == false); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (chunk != ptr) { - /* Region. */ + if (chunk != ptr) ret = arena_salloc(ptr, demote); - } else + else ret = huge_salloc(ptr); return (ret); diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index 9d8c992d..cfb17c28 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -100,6 +100,9 @@ extern size_t nhbins; /* Maximum cached size class. */ extern size_t tcache_maxclass; +size_t tcache_salloc(const void *ptr); +void *tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, + size_t binind); void tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem, tcache_t *tcache); void tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem, @@ -107,8 +110,6 @@ void tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem, void tcache_arena_associate(tcache_t *tcache, arena_t *arena); void tcache_arena_dissociate(tcache_t *tcache); tcache_t *tcache_create(arena_t *arena); -void *tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, - size_t binind); void tcache_destroy(tcache_t *tcache); void tcache_thread_cleanup(void *arg); void tcache_stats_merge(tcache_t *tcache, arena_t *arena); @@ -340,7 +341,7 @@ tcache_alloc_small(tcache_t *tcache, size_t size, bool zero) if (ret == NULL) return (NULL); } - assert(arena_salloc(ret, false) == arena_bin_info[binind].reg_size); + assert(tcache_salloc(ret) == arena_bin_info[binind].reg_size); if (zero == false) { if (config_fill) { @@ -431,7 +432,7 @@ tcache_dalloc_small(tcache_t *tcache, void *ptr) size_t pageind, binind; arena_chunk_map_t *mapelm; - assert(arena_salloc(ptr, false) <= SMALL_MAXCLASS); + assert(tcache_salloc(ptr) <= SMALL_MAXCLASS); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); arena = chunk->arena; @@ -468,8 +469,8 @@ tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size) tcache_bin_info_t *tbin_info; assert((size & PAGE_MASK) == 0); - assert(arena_salloc(ptr, false) > SMALL_MAXCLASS); - assert(arena_salloc(ptr, false) <= tcache_maxclass); + assert(tcache_salloc(ptr) > SMALL_MAXCLASS); + assert(tcache_salloc(ptr) <= tcache_maxclass); binind = NBINS + (size >> LG_PAGE) - 1; diff --git a/src/arena.c b/src/arena.c index ed47824a..6f28abe9 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1465,46 +1465,6 @@ arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero) return (ret); } -/* Return the size of the allocation pointed to by ptr. */ -size_t -arena_salloc(const void *ptr, bool demote) -{ - size_t ret; - arena_chunk_t *chunk; - size_t pageind, mapbits; - - assert(ptr != NULL); - assert(CHUNK_ADDR2BASE(ptr) != ptr); - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - mapbits = chunk->map[pageind-map_bias].bits; - assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); - if ((mapbits & CHUNK_MAP_LARGE) == 0) { - arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)((pageind - (mapbits >> LG_PAGE)) << LG_PAGE)); - size_t binind = arena_bin_index(chunk->arena, run->bin); - arena_bin_info_t *bin_info = &arena_bin_info[binind]; - assert(((uintptr_t)ptr - ((uintptr_t)run + - (uintptr_t)bin_info->reg0_offset)) % bin_info->reg_interval - == 0); - ret = bin_info->reg_size; - } else { - assert(((uintptr_t)ptr & PAGE_MASK) == 0); - ret = mapbits & ~PAGE_MASK; - if (config_prof && demote && prof_promote && ret == PAGE && - (mapbits & CHUNK_MAP_CLASS_MASK) != 0) { - size_t binind = ((mapbits & CHUNK_MAP_CLASS_MASK) >> - CHUNK_MAP_CLASS_SHIFT) - 1; - assert(binind < NBINS); - ret = arena_bin_info[binind].reg_size; - } - assert(ret != 0); - } - - return (ret); -} - void arena_prof_promoted(const void *ptr, size_t size) { diff --git a/src/tcache.c b/src/tcache.c index be26b59c..9c4970c5 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -18,6 +18,12 @@ size_t tcache_maxclass; /******************************************************************************/ +size_t tcache_salloc(const void *ptr) +{ + + return (arena_salloc(ptr, false)); +} + void * tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind) { From 7d20fbc44a28209e8b70e2d8efe12254962c6fd6 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 20 Apr 2012 13:06:39 -0700 Subject: [PATCH 0174/3378] Don't mangle pthread_create(). Don't mangle pthread_create(); it's an exported symbol when defined. --- include/jemalloc/internal/private_namespace.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/jemalloc/internal/private_namespace.h b/include/jemalloc/internal/private_namespace.h index b1e8330e..547ead99 100644 --- a/include/jemalloc/internal/private_namespace.h +++ b/include/jemalloc/internal/private_namespace.h @@ -221,7 +221,6 @@ #define prof_tdata_tsd_cleanup_wrapper JEMALLOC_N(prof_tdata_tsd_cleanup_wrapper) #define prof_tdata_tsd_get JEMALLOC_N(prof_tdata_tsd_get) #define prof_tdata_tsd_set JEMALLOC_N(prof_tdata_tsd_set) -#define pthread_create JEMALLOC_N(pthread_create) #define quarantine JEMALLOC_N(quarantine) #define quarantine_boot JEMALLOC_N(quarantine_boot) #define quarantine_tsd_boot JEMALLOC_N(quarantine_tsd_boot) From 918d6e20b760da13776ca0faf8bc00b4647a482c Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 20 Apr 2012 13:42:21 -0700 Subject: [PATCH 0175/3378] Add missing private namespace mangling. --- ChangeLog | 1 + include/jemalloc/internal/private_namespace.h | 46 +++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/ChangeLog b/ChangeLog index 1fed914f..6d5670f5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -81,6 +81,7 @@ found in the git revision history: - Add missing "opt.lg_tcache_max" mallctl implementation. - Use glibc allocator hooks to make mixed allocator usage less likely. - Fix build issues for --disable-tcache. + - Don't mangle pthread_create() when --with-private-namespace is specified. * 2.2.5 (November 14, 2011) diff --git a/include/jemalloc/internal/private_namespace.h b/include/jemalloc/internal/private_namespace.h index 547ead99..15fe3c51 100644 --- a/include/jemalloc/internal/private_namespace.h +++ b/include/jemalloc/internal/private_namespace.h @@ -3,6 +3,7 @@ #define a0malloc JEMALLOC_N(a0malloc) #define arena_alloc_junk_small JEMALLOC_N(arena_alloc_junk_small) #define arena_bin_index JEMALLOC_N(arena_bin_index) +#define arena_bin_info JEMALLOC_N(arena_bin_info) #define arena_boot JEMALLOC_N(arena_boot) #define arena_dalloc JEMALLOC_N(arena_dalloc) #define arena_dalloc_bin JEMALLOC_N(arena_dalloc_bin) @@ -11,6 +12,7 @@ #define arena_malloc JEMALLOC_N(arena_malloc) #define arena_malloc_large JEMALLOC_N(arena_malloc_large) #define arena_malloc_small JEMALLOC_N(arena_malloc_small) +#define arena_maxclass JEMALLOC_N(arena_maxclass) #define arena_new JEMALLOC_N(arena_new) #define arena_palloc JEMALLOC_N(arena_palloc) #define arena_postfork_child JEMALLOC_N(arena_postfork_child) @@ -27,9 +29,13 @@ #define arena_salloc JEMALLOC_N(arena_salloc) #define arena_stats_merge JEMALLOC_N(arena_stats_merge) #define arena_tcache_fill_small JEMALLOC_N(arena_tcache_fill_small) +#define arenas JEMALLOC_N(arenas) #define arenas_bin_i_index JEMALLOC_N(arenas_bin_i_index) +#define arenas_booted JEMALLOC_N(arenas_booted) #define arenas_cleanup JEMALLOC_N(arenas_cleanup) #define arenas_extend JEMALLOC_N(arenas_extend) +#define arenas_initialized JEMALLOC_N(arenas_initialized) +#define arenas_lock JEMALLOC_N(arenas_lock) #define arenas_lrun_i_index JEMALLOC_N(arenas_lrun_i_index) #define arenas_tls JEMALLOC_N(arenas_tls) #define arenas_tsd_boot JEMALLOC_N(arenas_tsd_boot) @@ -78,6 +84,11 @@ #define chunk_dss_prefork JEMALLOC_N(chunk_dss_prefork) #define chunk_in_dss JEMALLOC_N(chunk_in_dss) #define chunk_mmap_boot JEMALLOC_N(chunk_mmap_boot) +#define chunk_npages JEMALLOC_N(chunk_npages) +#define chunks_mtx JEMALLOC_N(chunks_mtx) +#define chunks_rtree JEMALLOC_N(chunks_rtree) +#define chunksize JEMALLOC_N(chunksize) +#define chunksize_mask JEMALLOC_N(chunksize_mask) #define ckh_bucket_search JEMALLOC_N(ckh_bucket_search) #define ckh_count JEMALLOC_N(ckh_count) #define ckh_delete JEMALLOC_N(ckh_delete) @@ -132,9 +143,13 @@ #define extent_tree_szad_reverse_iter_start JEMALLOC_N(extent_tree_szad_reverse_iter_start) #define extent_tree_szad_search JEMALLOC_N(extent_tree_szad_search) #define hash JEMALLOC_N(hash) +#define huge_allocated JEMALLOC_N(huge_allocated) #define huge_boot JEMALLOC_N(huge_boot) #define huge_dalloc JEMALLOC_N(huge_dalloc) #define huge_malloc JEMALLOC_N(huge_malloc) +#define huge_mtx JEMALLOC_N(huge_mtx) +#define huge_ndalloc JEMALLOC_N(huge_ndalloc) +#define huge_nmalloc JEMALLOC_N(huge_nmalloc) #define huge_palloc JEMALLOC_N(huge_palloc) #define huge_postfork_child JEMALLOC_N(huge_postfork_child) #define huge_postfork_parent JEMALLOC_N(huge_postfork_parent) @@ -152,6 +167,7 @@ #define iqalloc JEMALLOC_N(iqalloc) #define iralloc JEMALLOC_N(iralloc) #define isalloc JEMALLOC_N(isalloc) +#define isthreaded JEMALLOC_N(isthreaded) #define ivsalloc JEMALLOC_N(ivsalloc) #define jemalloc_postfork_child JEMALLOC_N(jemalloc_postfork_child) #define jemalloc_postfork_parent JEMALLOC_N(jemalloc_postfork_parent) @@ -174,12 +190,16 @@ #define malloc_vcprintf JEMALLOC_N(malloc_vcprintf) #define malloc_vsnprintf JEMALLOC_N(malloc_vsnprintf) #define malloc_write JEMALLOC_N(malloc_write) +#define map_bias JEMALLOC_N(map_bias) #define mb_write JEMALLOC_N(mb_write) #define mmap_unaligned_tsd_boot JEMALLOC_N(mmap_unaligned_tsd_boot) #define mmap_unaligned_tsd_cleanup_wrapper JEMALLOC_N(mmap_unaligned_tsd_cleanup_wrapper) #define mmap_unaligned_tsd_get JEMALLOC_N(mmap_unaligned_tsd_get) #define mmap_unaligned_tsd_set JEMALLOC_N(mmap_unaligned_tsd_set) #define mutex_boot JEMALLOC_N(mutex_boot) +#define narenas JEMALLOC_N(narenas) +#define ncpus JEMALLOC_N(ncpus) +#define nhbins JEMALLOC_N(nhbins) #define opt_abort JEMALLOC_N(opt_abort) #define opt_junk JEMALLOC_N(opt_junk) #define opt_lg_chunk JEMALLOC_N(opt_lg_chunk) @@ -191,11 +211,16 @@ #define opt_prof JEMALLOC_N(opt_prof) #define opt_prof_accum JEMALLOC_N(opt_prof_accum) #define opt_prof_active JEMALLOC_N(opt_prof_active) +#define opt_prof_final JEMALLOC_N(opt_prof_final) #define opt_prof_gdump JEMALLOC_N(opt_prof_gdump) #define opt_prof_leak JEMALLOC_N(opt_prof_leak) +#define opt_prof_prefix JEMALLOC_N(opt_prof_prefix) +#define opt_quarantine JEMALLOC_N(opt_quarantine) +#define opt_redzone JEMALLOC_N(opt_redzone) #define opt_stats_print JEMALLOC_N(opt_stats_print) #define opt_tcache JEMALLOC_N(opt_tcache) #define opt_utrace JEMALLOC_N(opt_utrace) +#define opt_valgrind JEMALLOC_N(opt_valgrind) #define opt_xmalloc JEMALLOC_N(opt_xmalloc) #define opt_zero JEMALLOC_N(opt_zero) #define p2rz JEMALLOC_N(p2rz) @@ -210,13 +235,20 @@ #define prof_free JEMALLOC_N(prof_free) #define prof_gdump JEMALLOC_N(prof_gdump) #define prof_idump JEMALLOC_N(prof_idump) +#define prof_interval JEMALLOC_N(prof_interval) #define prof_lookup JEMALLOC_N(prof_lookup) #define prof_malloc JEMALLOC_N(prof_malloc) #define prof_mdump JEMALLOC_N(prof_mdump) +#define prof_lookup JEMALLOC_N(prof_lookup) +#define prof_promote JEMALLOC_N(prof_promote) #define prof_realloc JEMALLOC_N(prof_realloc) #define prof_sample_accum_update JEMALLOC_N(prof_sample_accum_update) #define prof_sample_threshold_update JEMALLOC_N(prof_sample_threshold_update) +#define prof_tdata_booted JEMALLOC_N(prof_tdata_booted) #define prof_tdata_cleanup JEMALLOC_N(prof_tdata_cleanup) +#define prof_tdata_init JEMALLOC_N(prof_tdata_init) +#define prof_tdata_initialized JEMALLOC_N(prof_tdata_initialized) +#define prof_tdata_tls JEMALLOC_N(prof_tdata_tls) #define prof_tdata_tsd_boot JEMALLOC_N(prof_tdata_tsd_boot) #define prof_tdata_tsd_cleanup_wrapper JEMALLOC_N(prof_tdata_tsd_cleanup_wrapper) #define prof_tdata_tsd_get JEMALLOC_N(prof_tdata_tsd_get) @@ -241,6 +273,7 @@ #define stats_cactive_add JEMALLOC_N(stats_cactive_add) #define stats_cactive_get JEMALLOC_N(stats_cactive_get) #define stats_cactive_sub JEMALLOC_N(stats_cactive_sub) +#define stats_chunks JEMALLOC_N(stats_chunks) #define stats_print JEMALLOC_N(stats_print) #define tcache_alloc_easy JEMALLOC_N(tcache_alloc_easy) #define tcache_alloc_large JEMALLOC_N(tcache_alloc_large) @@ -250,26 +283,39 @@ #define tcache_arena_dissociate JEMALLOC_N(tcache_arena_dissociate) #define tcache_bin_flush_large JEMALLOC_N(tcache_bin_flush_large) #define tcache_bin_flush_small JEMALLOC_N(tcache_bin_flush_small) +#define tcache_bin_info JEMALLOC_N(tcache_bin_info) #define tcache_boot0 JEMALLOC_N(tcache_boot0) #define tcache_boot1 JEMALLOC_N(tcache_boot1) +#define tcache_booted JEMALLOC_N(tcache_booted) #define tcache_create JEMALLOC_N(tcache_create) #define tcache_dalloc_large JEMALLOC_N(tcache_dalloc_large) #define tcache_dalloc_small JEMALLOC_N(tcache_dalloc_small) #define tcache_destroy JEMALLOC_N(tcache_destroy) +#define tcache_enabled_booted JEMALLOC_N(tcache_enabled_booted) #define tcache_enabled_get JEMALLOC_N(tcache_enabled_get) +#define tcache_enabled_initialized JEMALLOC_N(tcache_enabled_initialized) #define tcache_enabled_set JEMALLOC_N(tcache_enabled_set) +#define tcache_enabled_tls JEMALLOC_N(tcache_enabled_tls) #define tcache_enabled_tsd_boot JEMALLOC_N(tcache_enabled_tsd_boot) #define tcache_enabled_tsd_cleanup_wrapper JEMALLOC_N(tcache_enabled_tsd_cleanup_wrapper) #define tcache_enabled_tsd_get JEMALLOC_N(tcache_enabled_tsd_get) #define tcache_enabled_tsd_set JEMALLOC_N(tcache_enabled_tsd_set) #define tcache_event JEMALLOC_N(tcache_event) +#define tcache_initialized JEMALLOC_N(tcache_initialized) #define tcache_flush JEMALLOC_N(tcache_flush) +#define tcache_get JEMALLOC_N(tcache_get) +#define tcache_maxclass JEMALLOC_N(tcache_maxclass) #define tcache_stats_merge JEMALLOC_N(tcache_stats_merge) +#define tcache_salloc JEMALLOC_N(tcache_salloc) #define tcache_thread_cleanup JEMALLOC_N(tcache_thread_cleanup) +#define tcache_tls JEMALLOC_N(tcache_tls) #define tcache_tsd_boot JEMALLOC_N(tcache_tsd_boot) #define tcache_tsd_cleanup_wrapper JEMALLOC_N(tcache_tsd_cleanup_wrapper) #define tcache_tsd_get JEMALLOC_N(tcache_tsd_get) #define tcache_tsd_set JEMALLOC_N(tcache_tsd_set) +#define thread_allocated_booted JEMALLOC_N(thread_allocated_booted) +#define thread_allocated_initialized JEMALLOC_N(thread_allocated_initialized) +#define thread_allocated_tls JEMALLOC_N(thread_allocated_tls) #define thread_allocated_tsd_boot JEMALLOC_N(thread_allocated_tsd_boot) #define thread_allocated_tsd_cleanup_wrapper JEMALLOC_N(thread_allocated_tsd_cleanup_wrapper) #define thread_allocated_tsd_get JEMALLOC_N(thread_allocated_tsd_get) From bedceea2a8aef427d96a77762e9d4bda9f0cc0c3 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 20 Apr 2012 14:12:30 -0700 Subject: [PATCH 0176/3378] Fix isthreaded-related build breakage. --- include/jemalloc/internal/mutex.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h index c46feee3..8837ef57 100644 --- a/include/jemalloc/internal/mutex.h +++ b/include/jemalloc/internal/mutex.h @@ -40,6 +40,7 @@ struct malloc_mutex_s { #ifdef JEMALLOC_LAZY_LOCK extern bool isthreaded; #else +# undef isthreaded /* Undo private_namespace.h definition. */ # define isthreaded true #endif From 606f1fdc3cdbc700717133ca56685313caea24bb Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 20 Apr 2012 21:39:14 -0700 Subject: [PATCH 0177/3378] Put CONF_HANDLE_*() keys in quotes. Put CONF_HANDLE_*() keys in quotes, so that they aren't mangled when --with-private-namespace is used. --- src/jemalloc.c | 57 +++++++++++++++++++++++++------------------------- 1 file changed, 29 insertions(+), 28 deletions(-) diff --git a/src/jemalloc.c b/src/jemalloc.c index 6669c110..00c2b23c 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -423,7 +423,7 @@ malloc_conf_init(void) while (*opts != '\0' && malloc_conf_next(&opts, &k, &klen, &v, &vlen) == false) { #define CONF_HANDLE_BOOL_HIT(o, n, hit) \ - if (sizeof(#n)-1 == klen && strncmp(#n, k, \ + if (sizeof(n)-1 == klen && strncmp(n, k, \ klen) == 0) { \ if (strncmp("true", v, vlen) == 0 && \ vlen == sizeof("true")-1) \ @@ -446,7 +446,7 @@ malloc_conf_init(void) continue; \ } #define CONF_HANDLE_SIZE_T(o, n, min, max) \ - if (sizeof(#n)-1 == klen && strncmp(#n, k, \ + if (sizeof(n)-1 == klen && strncmp(n, k, \ klen) == 0) { \ uintmax_t um; \ char *end; \ @@ -467,7 +467,7 @@ malloc_conf_init(void) continue; \ } #define CONF_HANDLE_SSIZE_T(o, n, min, max) \ - if (sizeof(#n)-1 == klen && strncmp(#n, k, \ + if (sizeof(n)-1 == klen && strncmp(n, k, \ klen) == 0) { \ long l; \ char *end; \ @@ -489,7 +489,7 @@ malloc_conf_init(void) continue; \ } #define CONF_HANDLE_CHAR_P(o, n, d) \ - if (sizeof(#n)-1 == klen && strncmp(#n, k, \ + if (sizeof(n)-1 == klen && strncmp(n, k, \ klen) == 0) { \ size_t cpylen = (vlen <= \ sizeof(o)-1) ? vlen : \ @@ -499,7 +499,7 @@ malloc_conf_init(void) continue; \ } - CONF_HANDLE_BOOL(opt_abort, abort) + CONF_HANDLE_BOOL(opt_abort, "abort") /* * Chunks always require at least one header page, plus * one data page in the absence of redzones, or three @@ -507,26 +507,27 @@ malloc_conf_init(void) * simplify options processing, fix the limit based on * config_fill. */ - CONF_HANDLE_SIZE_T(opt_lg_chunk, lg_chunk, LG_PAGE + + CONF_HANDLE_SIZE_T(opt_lg_chunk, "lg_chunk", LG_PAGE + (config_fill ? 2 : 1), (sizeof(size_t) << 3) - 1) - CONF_HANDLE_SIZE_T(opt_narenas, narenas, 1, SIZE_T_MAX) - CONF_HANDLE_SSIZE_T(opt_lg_dirty_mult, lg_dirty_mult, + CONF_HANDLE_SIZE_T(opt_narenas, "narenas", 1, + SIZE_T_MAX) + CONF_HANDLE_SSIZE_T(opt_lg_dirty_mult, "lg_dirty_mult", -1, (sizeof(size_t) << 3) - 1) - CONF_HANDLE_BOOL(opt_stats_print, stats_print) + CONF_HANDLE_BOOL(opt_stats_print, "stats_print") if (config_fill) { - CONF_HANDLE_BOOL(opt_junk, junk) - CONF_HANDLE_SIZE_T(opt_quarantine, quarantine, + CONF_HANDLE_BOOL(opt_junk, "junk") + CONF_HANDLE_SIZE_T(opt_quarantine, "quarantine", 0, SIZE_T_MAX) - CONF_HANDLE_BOOL(opt_redzone, redzone) - CONF_HANDLE_BOOL(opt_zero, zero) + CONF_HANDLE_BOOL(opt_redzone, "redzone") + CONF_HANDLE_BOOL(opt_zero, "zero") } if (config_utrace) { - CONF_HANDLE_BOOL(opt_utrace, utrace) + CONF_HANDLE_BOOL(opt_utrace, "utrace") } if (config_valgrind) { bool hit; CONF_HANDLE_BOOL_HIT(opt_valgrind, - valgrind, hit) + "valgrind", hit) if (config_fill && opt_valgrind && hit) { opt_junk = false; opt_zero = false; @@ -540,29 +541,29 @@ malloc_conf_init(void) continue; } if (config_xmalloc) { - CONF_HANDLE_BOOL(opt_xmalloc, xmalloc) + CONF_HANDLE_BOOL(opt_xmalloc, "xmalloc") } if (config_tcache) { - CONF_HANDLE_BOOL(opt_tcache, tcache) + CONF_HANDLE_BOOL(opt_tcache, "tcache") CONF_HANDLE_SSIZE_T(opt_lg_tcache_max, - lg_tcache_max, -1, + "lg_tcache_max", -1, (sizeof(size_t) << 3) - 1) } if (config_prof) { - CONF_HANDLE_BOOL(opt_prof, prof) - CONF_HANDLE_CHAR_P(opt_prof_prefix, prof_prefix, - "jeprof") - CONF_HANDLE_BOOL(opt_prof_active, prof_active) + CONF_HANDLE_BOOL(opt_prof, "prof") + CONF_HANDLE_CHAR_P(opt_prof_prefix, + "prof_prefix", "jeprof") + CONF_HANDLE_BOOL(opt_prof_active, "prof_active") CONF_HANDLE_SSIZE_T(opt_lg_prof_sample, - lg_prof_sample, 0, + "lg_prof_sample", 0, (sizeof(uint64_t) << 3) - 1) - CONF_HANDLE_BOOL(opt_prof_accum, prof_accum) + CONF_HANDLE_BOOL(opt_prof_accum, "prof_accum") CONF_HANDLE_SSIZE_T(opt_lg_prof_interval, - lg_prof_interval, -1, + "lg_prof_interval", -1, (sizeof(uint64_t) << 3) - 1) - CONF_HANDLE_BOOL(opt_prof_gdump, prof_gdump) - CONF_HANDLE_BOOL(opt_prof_final, prof_final) - CONF_HANDLE_BOOL(opt_prof_leak, prof_leak) + CONF_HANDLE_BOOL(opt_prof_gdump, "prof_gdump") + CONF_HANDLE_BOOL(opt_prof_final, "prof_final") + CONF_HANDLE_BOOL(opt_prof_leak, "prof_leak") } malloc_conf_error("Invalid conf pair", k, klen, v, vlen); From 8f0e0eb1c01d5d934586ea62e519ca8b8637aebc Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 21 Apr 2012 13:33:48 -0700 Subject: [PATCH 0178/3378] Fix a memory corruption bug in chunk_alloc_dss(). Fix a memory corruption bug in chunk_alloc_dss() that was due to claiming newly allocated memory is zeroed. Reverse order of preference between mmap() and sbrk() to prefer mmap(). Clean up management of 'zero' parameter in chunk_alloc*(). --- ChangeLog | 2 ++ doc/jemalloc.xml.in | 4 ++-- include/jemalloc/internal/chunk_mmap.h | 2 +- src/chunk.c | 10 +++++----- src/chunk_dss.c | 1 - src/chunk_mmap.c | 16 ++++++++++------ 6 files changed, 20 insertions(+), 15 deletions(-) diff --git a/ChangeLog b/ChangeLog index 6d5670f5..b71fa165 100644 --- a/ChangeLog +++ b/ChangeLog @@ -70,6 +70,8 @@ found in the git revision history: invalid statistics and crashes. - Work around TLS dallocation via free() on Linux. This bug could cause write-after-free memory corruption. + - Fix chunk_alloc_dss() to stop claiming memory is zeroed. This bug could + cause memory corruption and crashes with --enable-dss specified. - Fix malloc_stats_print() to honor 'b' and 'l' in the opts parameter. - Fix realloc(p, 0) to act like free(p). - Do not enforce minimum alignment in memalign(). diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index f78f423c..e8a57225 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -444,9 +444,9 @@ for (i = 0; i < nbins; i++) { suboptimal for several reasons, including race conditions, increased fragmentation, and artificial limitations on maximum usable memory. If is specified during configuration, this - allocator uses both sbrk + allocator uses both mmap 2 and - mmap + sbrk 2, in that order of preference; otherwise only mmap 2 is used. diff --git a/include/jemalloc/internal/chunk_mmap.h b/include/jemalloc/internal/chunk_mmap.h index 2d01ac22..8224430a 100644 --- a/include/jemalloc/internal/chunk_mmap.h +++ b/include/jemalloc/internal/chunk_mmap.h @@ -11,7 +11,7 @@ void pages_purge(void *addr, size_t length); -void *chunk_alloc_mmap(size_t size, size_t alignment); +void *chunk_alloc_mmap(size_t size, size_t alignment, bool *zero); bool chunk_dealloc_mmap(void *chunk, size_t size); bool chunk_mmap_boot(void); diff --git a/src/chunk.c b/src/chunk.c index bcaedea4..31485058 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -125,16 +125,16 @@ chunk_alloc(size_t size, size_t alignment, bool base, bool *zero) ret = chunk_recycle(size, alignment, zero); if (ret != NULL) goto label_return; + + ret = chunk_alloc_mmap(size, alignment, zero); + if (ret != NULL) + goto label_return; + if (config_dss) { ret = chunk_alloc_dss(size, alignment, zero); if (ret != NULL) goto label_return; } - ret = chunk_alloc_mmap(size, alignment); - if (ret != NULL) { - *zero = true; - goto label_return; - } /* All strategies for allocation failed. */ ret = NULL; diff --git a/src/chunk_dss.c b/src/chunk_dss.c index b05509a5..bd4a724b 100644 --- a/src/chunk_dss.c +++ b/src/chunk_dss.c @@ -89,7 +89,6 @@ chunk_alloc_dss(size_t size, size_t alignment, bool *zero) malloc_mutex_unlock(&dss_mtx); if (cpad_size != 0) chunk_dealloc(cpad, cpad_size, true); - *zero = true; return (ret); } } while (dss_prev != (void *)-1); diff --git a/src/chunk_mmap.c b/src/chunk_mmap.c index 9dea8318..126406ae 100644 --- a/src/chunk_mmap.c +++ b/src/chunk_mmap.c @@ -18,7 +18,7 @@ malloc_tsd_funcs(JEMALLOC_INLINE, mmap_unaligned, bool, false, static void *pages_map(void *addr, size_t size); static void pages_unmap(void *addr, size_t size); static void *chunk_alloc_mmap_slow(size_t size, size_t alignment, - bool unaligned); + bool unaligned, bool *zero); /******************************************************************************/ @@ -87,7 +87,7 @@ pages_purge(void *addr, size_t length) } static void * -chunk_alloc_mmap_slow(size_t size, size_t alignment, bool unaligned) +chunk_alloc_mmap_slow(size_t size, size_t alignment, bool unaligned, bool *zero) { void *ret, *pages; size_t alloc_size, leadsize, trailsize; @@ -122,11 +122,13 @@ chunk_alloc_mmap_slow(size_t size, size_t alignment, bool unaligned) mmap_unaligned_tsd_set(&mu); } + assert(ret != NULL); + *zero = true; return (ret); } void * -chunk_alloc_mmap(size_t size, size_t alignment) +chunk_alloc_mmap(size_t size, size_t alignment, bool *zero) { void *ret; @@ -177,8 +179,8 @@ chunk_alloc_mmap(size_t size, size_t alignment) * the reliable-but-expensive method. */ pages_unmap(ret, size); - ret = chunk_alloc_mmap_slow(size, alignment, - true); + return (chunk_alloc_mmap_slow(size, alignment, + true, zero)); } else { /* Clean up unneeded leading space. */ pages_unmap(ret, chunksize - offset); @@ -187,8 +189,10 @@ chunk_alloc_mmap(size_t size, size_t alignment) } } } else - ret = chunk_alloc_mmap_slow(size, alignment, false); + return (chunk_alloc_mmap_slow(size, alignment, false, zero)); + assert(ret != NULL); + *zero = true; return (ret); } From 7ad54c1c30e0805e0758690115875f982de46cf2 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 21 Apr 2012 16:04:51 -0700 Subject: [PATCH 0179/3378] Fix chunk allocation/deallocation bugs. Fix chunk_alloc_dss() to zero memory when requested. Fix chunk_dealloc() to avoid chunk_dealloc_mmap() for dss-allocated memory. Fix huge_palloc() to always junk fill when requested. Improve chunk_recycle() to report that memory is zeroed as a side effect of pages_purge(). --- include/jemalloc/internal/tsd.h | 2 +- src/chunk.c | 17 +++++++++++++---- src/chunk_dss.c | 4 ++++ src/huge.c | 10 ++++++++-- 4 files changed, 26 insertions(+), 7 deletions(-) diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h index 5e904cbb..20491c88 100644 --- a/include/jemalloc/internal/tsd.h +++ b/include/jemalloc/internal/tsd.h @@ -111,7 +111,7 @@ a_name##_tsd_cleanup_wrapper(void) \ \ if (a_name##_initialized) { \ a_name##_initialized = false; \ - a_cleanup(&a_name##_tls); \ + a_cleanup(&a_name##_tls); \ } \ return (a_name##_initialized); \ } \ diff --git a/src/chunk.c b/src/chunk.c index 31485058..0fccd0ce 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -98,7 +98,10 @@ chunk_recycle(size_t size, size_t alignment, bool *zero) if (node != NULL) base_node_dealloc(node); -#ifdef JEMALLOC_PURGE_MADVISE_FREE +#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED + /* Pages are zeroed as a side effect of pages_purge(). */ + *zero = true; +#else if (*zero) { VALGRIND_MAKE_MEM_UNDEFINED(ret, size); memset(ret, 0, size); @@ -161,7 +164,13 @@ label_return: if (config_prof && opt_prof && opt_prof_gdump && gdump) prof_gdump(); } + if (config_debug && *zero && ret != NULL) { + size_t i; + size_t *p = (size_t *)(uintptr_t)ret; + for (i = 0; i < size / sizeof(size_t); i++) + assert(p[i] == 0); + } assert(CHUNK_ADDR2BASE(ret) == ret); return (ret); } @@ -258,9 +267,9 @@ chunk_dealloc(void *chunk, size_t size, bool unmap) } if (unmap) { - if (chunk_dealloc_mmap(chunk, size) == false) - return; - chunk_record(chunk, size); + if ((config_dss && chunk_in_dss(chunk)) || + chunk_dealloc_mmap(chunk, size)) + chunk_record(chunk, size); } } diff --git a/src/chunk_dss.c b/src/chunk_dss.c index bd4a724b..2d68e480 100644 --- a/src/chunk_dss.c +++ b/src/chunk_dss.c @@ -89,6 +89,10 @@ chunk_alloc_dss(size_t size, size_t alignment, bool *zero) malloc_mutex_unlock(&dss_mtx); if (cpad_size != 0) chunk_dealloc(cpad, cpad_size, true); + if (*zero) { + VALGRIND_MAKE_MEM_UNDEFINED(ret, size); + memset(ret, 0, size); + } return (ret); } } while (dss_prev != (void *)-1); diff --git a/src/huge.c b/src/huge.c index daf0c622..23eb074a 100644 --- a/src/huge.c +++ b/src/huge.c @@ -28,6 +28,7 @@ huge_palloc(size_t size, size_t alignment, bool zero) void *ret; size_t csize; extent_node_t *node; + bool is_zeroed; /* Allocate one or more contiguous chunks for this request. */ @@ -42,7 +43,12 @@ huge_palloc(size_t size, size_t alignment, bool zero) if (node == NULL) return (NULL); - ret = chunk_alloc(csize, alignment, false, &zero); + /* + * Copy zero into is_zeroed and pass the copy to chunk_alloc(), so that + * it is possible to make correct junk/zero fill decisions below. + */ + is_zeroed = zero; + ret = chunk_alloc(csize, alignment, false, &is_zeroed); if (ret == NULL) { base_node_dealloc(node); return (NULL); @@ -64,7 +70,7 @@ huge_palloc(size_t size, size_t alignment, bool zero) if (config_fill && zero == false) { if (opt_junk) memset(ret, 0xa5, csize); - else if (opt_zero) + else if (opt_zero && is_zeroed == false) memset(ret, 0, csize); } From a8f8d7540d66ddee7337db80c92890916e1063ca Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 21 Apr 2012 19:17:21 -0700 Subject: [PATCH 0180/3378] Remove mmap_unaligned. Remove mmap_unaligned, which was used to heuristically decide whether to optimistically call mmap() in such a way that could reduce the total number of system calls. If I remember correctly, the intention of mmap_unaligned was to avoid always executing the slow path in the presence of ASLR. However, that reasoning seems to have been based on a flawed understanding of how ASLR actually works. Although ASLR apparently causes mmap() to ignore address requests, it does not cause total placement randomness, so there is a reasonable expectation that iterative mmap() calls will start returning chunk-aligned mappings once the first chunk has been properly aligned. --- include/jemalloc/internal/chunk.h | 3 +- include/jemalloc/internal/chunk_mmap.h | 2 - include/jemalloc/internal/private_namespace.h | 8 +- src/chunk.c | 12 +-- src/chunk_mmap.c | 98 +++++-------------- src/jemalloc.c | 7 +- 6 files changed, 29 insertions(+), 101 deletions(-) diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h index e047c2b1..8fb1fe6d 100644 --- a/include/jemalloc/internal/chunk.h +++ b/include/jemalloc/internal/chunk.h @@ -44,8 +44,7 @@ extern size_t arena_maxclass; /* Max size class for arenas. */ void *chunk_alloc(size_t size, size_t alignment, bool base, bool *zero); void chunk_dealloc(void *chunk, size_t size, bool unmap); -bool chunk_boot0(void); -bool chunk_boot1(void); +bool chunk_boot(void); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ diff --git a/include/jemalloc/internal/chunk_mmap.h b/include/jemalloc/internal/chunk_mmap.h index 8224430a..b29f39e9 100644 --- a/include/jemalloc/internal/chunk_mmap.h +++ b/include/jemalloc/internal/chunk_mmap.h @@ -14,8 +14,6 @@ void pages_purge(void *addr, size_t length); void *chunk_alloc_mmap(size_t size, size_t alignment, bool *zero); bool chunk_dealloc_mmap(void *chunk, size_t size); -bool chunk_mmap_boot(void); - #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ #ifdef JEMALLOC_H_INLINES diff --git a/include/jemalloc/internal/private_namespace.h b/include/jemalloc/internal/private_namespace.h index 15fe3c51..bb1b63e9 100644 --- a/include/jemalloc/internal/private_namespace.h +++ b/include/jemalloc/internal/private_namespace.h @@ -74,8 +74,7 @@ #define chunk_alloc JEMALLOC_N(chunk_alloc) #define chunk_alloc_dss JEMALLOC_N(chunk_alloc_dss) #define chunk_alloc_mmap JEMALLOC_N(chunk_alloc_mmap) -#define chunk_boot0 JEMALLOC_N(chunk_boot0) -#define chunk_boot1 JEMALLOC_N(chunk_boot1) +#define chunk_boot JEMALLOC_N(chunk_boot) #define chunk_dealloc JEMALLOC_N(chunk_dealloc) #define chunk_dealloc_mmap JEMALLOC_N(chunk_dealloc_mmap) #define chunk_dss_boot JEMALLOC_N(chunk_dss_boot) @@ -83,7 +82,6 @@ #define chunk_dss_postfork_parent JEMALLOC_N(chunk_dss_postfork_parent) #define chunk_dss_prefork JEMALLOC_N(chunk_dss_prefork) #define chunk_in_dss JEMALLOC_N(chunk_in_dss) -#define chunk_mmap_boot JEMALLOC_N(chunk_mmap_boot) #define chunk_npages JEMALLOC_N(chunk_npages) #define chunks_mtx JEMALLOC_N(chunks_mtx) #define chunks_rtree JEMALLOC_N(chunks_rtree) @@ -192,10 +190,6 @@ #define malloc_write JEMALLOC_N(malloc_write) #define map_bias JEMALLOC_N(map_bias) #define mb_write JEMALLOC_N(mb_write) -#define mmap_unaligned_tsd_boot JEMALLOC_N(mmap_unaligned_tsd_boot) -#define mmap_unaligned_tsd_cleanup_wrapper JEMALLOC_N(mmap_unaligned_tsd_cleanup_wrapper) -#define mmap_unaligned_tsd_get JEMALLOC_N(mmap_unaligned_tsd_get) -#define mmap_unaligned_tsd_set JEMALLOC_N(mmap_unaligned_tsd_set) #define mutex_boot JEMALLOC_N(mutex_boot) #define narenas JEMALLOC_N(narenas) #define ncpus JEMALLOC_N(ncpus) diff --git a/src/chunk.c b/src/chunk.c index 0fccd0ce..5426b027 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -274,7 +274,7 @@ chunk_dealloc(void *chunk, size_t size, bool unmap) } bool -chunk_boot0(void) +chunk_boot(void) { /* Set variables according to the value of opt_lg_chunk. */ @@ -301,13 +301,3 @@ chunk_boot0(void) return (false); } - -bool -chunk_boot1(void) -{ - - if (chunk_mmap_boot()) - return (true); - - return (false); -} diff --git a/src/chunk_mmap.c b/src/chunk_mmap.c index 126406ae..9ff7480a 100644 --- a/src/chunk_mmap.c +++ b/src/chunk_mmap.c @@ -1,17 +1,6 @@ #define JEMALLOC_CHUNK_MMAP_C_ #include "jemalloc/internal/jemalloc_internal.h" -/******************************************************************************/ -/* Data. */ - -/* - * Used by chunk_alloc_mmap() to decide whether to attempt the fast path and - * potentially avoid some system calls. - */ -malloc_tsd_data(static, mmap_unaligned, bool, false) -malloc_tsd_funcs(JEMALLOC_INLINE, mmap_unaligned, bool, false, - malloc_tsd_no_cleanup) - /******************************************************************************/ /* Function prototypes for non-inline static functions. */ @@ -112,16 +101,6 @@ chunk_alloc_mmap_slow(size_t size, size_t alignment, bool unaligned, bool *zero) if (trailsize != 0) pages_unmap((void *)((uintptr_t)ret + size), trailsize); - /* - * If mmap() returned an aligned mapping, reset mmap_unaligned so that - * the next chunk_alloc_mmap() execution tries the fast allocation - * method. - */ - if (unaligned == false && mmap_unaligned_booted) { - bool mu = false; - mmap_unaligned_tsd_set(&mu); - } - assert(ret != NULL); *zero = true; return (ret); @@ -131,6 +110,7 @@ void * chunk_alloc_mmap(size_t size, size_t alignment, bool *zero) { void *ret; + size_t offset; /* * Ideally, there would be a way to specify alignment to mmap() (like @@ -152,44 +132,34 @@ chunk_alloc_mmap(size_t size, size_t alignment, bool *zero) * * Another possible confounding factor is address space layout * randomization (ASLR), which causes mmap(2) to disregard the - * requested address. mmap_unaligned tracks whether the previous - * chunk_alloc_mmap() execution received any unaligned or relocated - * mappings, and if so, the current execution will immediately fall - * back to the slow method. However, we keep track of whether the fast - * method would have succeeded, and if so, we make a note to try the - * fast method next time. + * requested address. As such, repeatedly trying to extend unaligned + * mappings could result in an infinite loop, so if extension fails, + * immediately fall back to the reliable method of over-allocation + * followed by trimming. */ - if (mmap_unaligned_booted && *mmap_unaligned_tsd_get() == false) { - size_t offset; + ret = pages_map(NULL, size); + if (ret == NULL) + return (NULL); - ret = pages_map(NULL, size); - if (ret == NULL) - return (NULL); - - offset = ALIGNMENT_ADDR2OFFSET(ret, alignment); - if (offset != 0) { - bool mu = true; - mmap_unaligned_tsd_set(&mu); - /* Try to extend chunk boundary. */ - if (pages_map((void *)((uintptr_t)ret + size), - chunksize - offset) == NULL) { - /* - * Extension failed. Clean up, then revert to - * the reliable-but-expensive method. - */ - pages_unmap(ret, size); - return (chunk_alloc_mmap_slow(size, alignment, - true, zero)); - } else { - /* Clean up unneeded leading space. */ - pages_unmap(ret, chunksize - offset); - ret = (void *)((uintptr_t)ret + (chunksize - - offset)); - } + offset = ALIGNMENT_ADDR2OFFSET(ret, alignment); + if (offset != 0) { + /* Try to extend chunk boundary. */ + if (pages_map((void *)((uintptr_t)ret + size), chunksize - + offset) == NULL) { + /* + * Extension failed. Clean up, then fall back to the + * reliable-but-expensive method. + */ + pages_unmap(ret, size); + return (chunk_alloc_mmap_slow(size, alignment, true, + zero)); + } else { + /* Clean up unneeded leading space. */ + pages_unmap(ret, chunksize - offset); + ret = (void *)((uintptr_t)ret + (chunksize - offset)); } - } else - return (chunk_alloc_mmap_slow(size, alignment, false, zero)); + } assert(ret != NULL); *zero = true; @@ -205,21 +175,3 @@ chunk_dealloc_mmap(void *chunk, size_t size) return (config_munmap == false); } - -bool -chunk_mmap_boot(void) -{ - - /* - * XXX For the non-TLS implementation of tsd, the first access from - * each thread causes memory allocation. The result is a bootstrapping - * problem for this particular use case, so for now just disable it by - * leaving it in an unbooted state. - */ -#ifdef JEMALLOC_TLS - if (mmap_unaligned_tsd_boot()) - return (true); -#endif - - return (false); -} diff --git a/src/jemalloc.c b/src/jemalloc.c index 00c2b23c..f9c89168 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -634,7 +634,7 @@ malloc_init_hard(void) return (true); } - if (chunk_boot0()) { + if (chunk_boot()) { malloc_mutex_unlock(&init_lock); return (true); } @@ -711,11 +711,6 @@ malloc_init_hard(void) ncpus = malloc_ncpus(); malloc_mutex_lock(&init_lock); - if (chunk_boot1()) { - malloc_mutex_unlock(&init_lock); - return (true); - } - if (mutex_boot()) { malloc_mutex_unlock(&init_lock); return (true); From a19e87fbad020e8dd3d26682032929e8e5ae71c1 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Sat, 21 Apr 2012 21:27:46 -0700 Subject: [PATCH 0181/3378] Add support for Mingw --- Makefile.in | 10 +- configure.ac | 60 +++++++--- .../jemalloc/internal/jemalloc_internal.h.in | 17 +-- include/jemalloc/internal/mutex.h | 20 +++- include/jemalloc/internal/tsd.h | 101 ++++++++++++++++ src/chunk_mmap.c | 108 +++++++++++++----- src/jemalloc.c | 32 +++++- src/mutex.c | 16 ++- src/tsd.c | 27 ++++- src/util.c | 7 +- test/jemalloc_test.h.in | 24 ++++ test/rallocm.c | 6 + 12 files changed, 357 insertions(+), 71 deletions(-) diff --git a/Makefile.in b/Makefile.in index 8a34928b..b9917da4 100644 --- a/Makefile.in +++ b/Makefile.in @@ -32,7 +32,7 @@ SO := @so@ O := @o@ A := @a@ EXE := @exe@ -LIB := @lib@ +LIBPREFIX := @libprefix@ REV := @rev@ install_suffix := @install_suffix@ ABI := @abi@ @@ -51,12 +51,16 @@ SOREV = @SOREV@ PIC_CFLAGS = @PIC_CFLAGS@ ifeq (macho, $(ABI)) -TEST_LIBRARY_PATH := DYLD_FALLBACK_LIBRARY_PATH=$(objroot)lib +TEST_LIBRARY_PATH := DYLD_FALLBACK_LIBRARY_PATH="$(objroot)lib" +else +ifeq (pecoff, $(ABI)) +TEST_LIBRARY_PATH := PATH="$(PATH):$(objroot)lib" else TEST_LIBRARY_PATH := endif +endif -LIBJEMALLOC := $(LIB)jemalloc$(install_suffix) +LIBJEMALLOC := $(LIBPREFIX)jemalloc$(install_suffix) # Lists of files. BINS := $(srcroot)bin/pprof $(objroot)bin/jemalloc.sh diff --git a/configure.ac b/configure.ac index 0ed0494b..5bdddfab 100644 --- a/configure.ac +++ b/configure.ac @@ -198,7 +198,7 @@ so="so" o="o" a="a" exe= -lib="lib" +libprefix="lib" DSO_LDFLAGS='-shared -Wl,-soname,$(@F)' RPATH='-Wl,-rpath,$(1)' SOREV='$(SO).$(REV)' @@ -273,6 +273,19 @@ case "${host}" in fi abi="xcoff" ;; + *-*-mingw*) + abi="pecoff" + force_tls="0" + RPATH="" + so="dll" + DSO_LDFLAGS="-shared" + o="obj" + a="lib" + libprefix="" + exe=".exe" + SOREV='$(SO)' + PIC_CFLAGS="" + ;; *) AC_MSG_RESULT([Unsupported operating system: ${host}]) abi="elf" @@ -285,7 +298,7 @@ AC_SUBST([so]) AC_SUBST([o]) AC_SUBST([a]) AC_SUBST([exe]) -AC_SUBST([lib]) +AC_SUBST([libprefix]) AC_SUBST([DSO_LDFLAGS]) AC_SUBST([SOREV]) AC_SUBST([PIC_CFLAGS]) @@ -817,23 +830,36 @@ AC_SUBST([enable_xmalloc]) AC_CACHE_CHECK([STATIC_PAGE_SHIFT], [je_cv_static_page_shift], AC_RUN_IFELSE([AC_LANG_PROGRAM( -[[#include +[[ +#ifdef _WIN32 +#include +#else #include #include +#endif +#include ]], [[ long result; FILE *f; +#ifdef _WIN32 + SYSTEM_INFO si; + GetSystemInfo(&si); + result = si.dwPageSize; +#else result = sysconf(_SC_PAGESIZE); +#endif if (result == -1) { return 1; } + result = ffsl(result) - 1; + f = fopen("conftest.out", "w"); if (f == NULL) { return 1; } - fprintf(f, "%u\n", ffs((int)result) - 1); + fprintf(f, "%u\n", result); fclose(f); return 0; @@ -871,12 +897,14 @@ AC_SUBST([jemalloc_version_gid]) dnl ============================================================================ dnl Configure pthreads. -AC_CHECK_HEADERS([pthread.h], , [AC_MSG_ERROR([pthread.h is missing])]) -dnl Some systems may embed pthreads functionality in libc; check for libpthread -dnl first, but try libc too before failing. -AC_CHECK_LIB([pthread], [pthread_create], [LIBS="$LIBS -lpthread"], - [AC_SEARCH_LIBS([pthread_create], , , - AC_MSG_ERROR([libpthread is missing]))]) +if test "x$abi" != "xpecoff" ; then + AC_CHECK_HEADERS([pthread.h], , [AC_MSG_ERROR([pthread.h is missing])]) + dnl Some systems may embed pthreads functionality in libc; check for libpthread + dnl first, but try libc too before failing. + AC_CHECK_LIB([pthread], [pthread_create], [LIBS="$LIBS -lpthread"], + [AC_SEARCH_LIBS([pthread_create], , , + AC_MSG_ERROR([libpthread is missing]))]) +fi CPPFLAGS="$CPPFLAGS -D_REENTRANT" @@ -921,11 +949,13 @@ if test "x$enable_lazy_lock" = "x0" -a "x${force_lazy_lock}" = "x1" ; then enable_lazy_lock="1" fi if test "x$enable_lazy_lock" = "x1" ; then - AC_CHECK_HEADERS([dlfcn.h], , [AC_MSG_ERROR([dlfcn.h is missing])]) - AC_CHECK_FUNC([dlsym], [], - [AC_CHECK_LIB([dl], [dlsym], [LIBS="$LIBS -ldl"], - [AC_MSG_ERROR([libdl is missing])]) - ]) + if test "x$abi" != "xpecoff" ; then + AC_CHECK_HEADERS([dlfcn.h], , [AC_MSG_ERROR([dlfcn.h is missing])]) + AC_CHECK_FUNC([dlsym], [], + [AC_CHECK_LIB([dl], [dlsym], [LIBS="$LIBS -ldl"], + [AC_MSG_ERROR([libdl is missing])]) + ]) + fi AC_DEFINE([JEMALLOC_LAZY_LOCK], [ ]) fi AC_SUBST([enable_lazy_lock]) diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index b61abe84..fd5de725 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -1,21 +1,25 @@ #ifndef JEMALLOC_INTERNAL_H #define JEMALLOC_INTERNAL_H -#include #include -#include -#if !defined(SYS_write) && defined(__NR_write) -#define SYS_write __NR_write +#ifdef _WIN32 +# include +#else +# include +# include +# if !defined(SYS_write) && defined(__NR_write) +# define SYS_write __NR_write +# endif +# include +# include #endif #include #include -#include #include #include #ifndef SIZE_T_MAX # define SIZE_T_MAX SIZE_MAX #endif -#include #include #include #include @@ -32,7 +36,6 @@ #include #include #include -#include #include #define JEMALLOC_NO_DEMANGLE diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h index 8837ef57..de44e143 100644 --- a/include/jemalloc/internal/mutex.h +++ b/include/jemalloc/internal/mutex.h @@ -3,10 +3,12 @@ typedef struct malloc_mutex_s malloc_mutex_t; -#ifdef JEMALLOC_OSSPIN -#define MALLOC_MUTEX_INITIALIZER {0} +#ifdef _WIN32 +# define MALLOC_MUTEX_INITIALIZER +#elif (defined(JEMALLOC_OSSPIN)) +# define MALLOC_MUTEX_INITIALIZER {0} #elif (defined(JEMALLOC_MUTEX_INIT_CB)) -#define MALLOC_MUTEX_INITIALIZER {PTHREAD_MUTEX_INITIALIZER, NULL} +# define MALLOC_MUTEX_INITIALIZER {PTHREAD_MUTEX_INITIALIZER, NULL} #else # if (defined(PTHREAD_MUTEX_ADAPTIVE_NP) && \ defined(PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP)) @@ -23,7 +25,9 @@ typedef struct malloc_mutex_s malloc_mutex_t; #ifdef JEMALLOC_H_STRUCTS struct malloc_mutex_s { -#ifdef JEMALLOC_OSSPIN +#ifdef _WIN32 + CRITICAL_SECTION lock; +#elif (defined(JEMALLOC_OSSPIN)) OSSpinLock lock; #elif (defined(JEMALLOC_MUTEX_INIT_CB)) pthread_mutex_t lock; @@ -65,7 +69,9 @@ malloc_mutex_lock(malloc_mutex_t *mutex) { if (isthreaded) { -#ifdef JEMALLOC_OSSPIN +#ifdef _WIN32 + EnterCriticalSection(&mutex->lock); +#elif (defined(JEMALLOC_OSSPIN)) OSSpinLockLock(&mutex->lock); #else pthread_mutex_lock(&mutex->lock); @@ -78,7 +84,9 @@ malloc_mutex_unlock(malloc_mutex_t *mutex) { if (isthreaded) { -#ifdef JEMALLOC_OSSPIN +#ifdef _WIN32 + LeaveCriticalSection(&mutex->lock); +#elif (defined(JEMALLOC_OSSPIN)) OSSpinLockUnlock(&mutex->lock); #else pthread_mutex_unlock(&mutex->lock); diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h index 20491c88..0037cf35 100644 --- a/include/jemalloc/internal/tsd.h +++ b/include/jemalloc/internal/tsd.h @@ -74,6 +74,10 @@ extern bool a_name##_booted; extern __thread a_type a_name##_tls; \ extern pthread_key_t a_name##_tsd; \ extern bool a_name##_booted; +#elif (defined(_WIN32)) +#define malloc_tsd_externs(a_name, a_type) \ +extern DWORD a_name##_tsd; \ +extern bool a_name##_booted; #else #define malloc_tsd_externs(a_name, a_type) \ extern pthread_key_t a_name##_tsd; \ @@ -94,6 +98,10 @@ a_attr __thread a_type JEMALLOC_TLS_MODEL \ a_name##_tls = a_initializer; \ a_attr pthread_key_t a_name##_tsd; \ a_attr bool a_name##_booted = false; +#elif (defined(_WIN32)) +#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ +a_attr DWORD a_name##_tsd; \ +a_attr bool a_name##_booted = false; #else #define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ a_attr pthread_key_t a_name##_tsd; \ @@ -182,6 +190,99 @@ a_name##_tsd_set(a_type *val) \ } \ } \ } +#elif (defined(_WIN32)) +#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ + a_cleanup) \ +/* Data structure. */ \ +typedef struct { \ + bool initialized; \ + a_type val; \ +} a_name##_tsd_wrapper_t; \ +/* Initialization/cleanup. */ \ +a_attr bool \ +a_name##_tsd_cleanup_wrapper(void) \ +{ \ + a_name##_tsd_wrapper_t *wrapper; \ + \ + wrapper = (a_name##_tsd_wrapper_t *) TlsGetValue(a_name##_tsd); \ + if (wrapper == NULL) \ + return (false); \ + if (a_cleanup != malloc_tsd_no_cleanup && \ + wrapper->initialized) { \ + a_type val = wrapper->val; \ + a_type tsd_static_data = a_initializer; \ + wrapper->initialized = false; \ + wrapper->val = tsd_static_data; \ + a_cleanup(&val); \ + if (wrapper->initialized) { \ + /* Trigger another cleanup round. */ \ + return (true); \ + } \ + } \ + malloc_tsd_dalloc(wrapper); \ + return (false); \ +} \ +a_attr bool \ +a_name##_tsd_boot(void) \ +{ \ + \ + a_name##_tsd = TlsAlloc(); \ + if (a_name##_tsd == TLS_OUT_OF_INDEXES) \ + return (true); \ + if (a_cleanup != malloc_tsd_no_cleanup) { \ + malloc_tsd_cleanup_register( \ + &a_name##_tsd_cleanup_wrapper); \ + } \ + a_name##_booted = true; \ + return (false); \ +} \ +/* Get/set. */ \ +a_attr a_name##_tsd_wrapper_t * \ +a_name##_tsd_get_wrapper(void) \ +{ \ + a_name##_tsd_wrapper_t *wrapper = (a_name##_tsd_wrapper_t *) \ + TlsGetValue(a_name##_tsd); \ + \ + if (wrapper == NULL) { \ + wrapper = (a_name##_tsd_wrapper_t *) \ + malloc_tsd_malloc(sizeof(a_name##_tsd_wrapper_t)); \ + if (wrapper == NULL) { \ + malloc_write(": Error allocating" \ + " TSD for "#a_name"\n"); \ + abort(); \ + } else { \ + static a_type tsd_static_data = a_initializer; \ + wrapper->initialized = false; \ + wrapper->val = tsd_static_data; \ + } \ + if (!TlsSetValue(a_name##_tsd, (void *)wrapper)) { \ + malloc_write(": Error setting" \ + " TSD for "#a_name"\n"); \ + abort(); \ + } \ + } \ + return (wrapper); \ +} \ +a_attr a_type * \ +a_name##_tsd_get(void) \ +{ \ + a_name##_tsd_wrapper_t *wrapper; \ + \ + assert(a_name##_booted); \ + wrapper = a_name##_tsd_get_wrapper(); \ + return (&wrapper->val); \ +} \ +a_attr void \ +a_name##_tsd_set(a_type *val) \ +{ \ + a_name##_tsd_wrapper_t *wrapper; \ + \ + assert(a_name##_booted); \ + wrapper = a_name##_tsd_get_wrapper(); \ + wrapper->val = *(val); \ + if (a_cleanup != malloc_tsd_no_cleanup) \ + wrapper->initialized = true; \ +} #else #define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ a_cleanup) \ diff --git a/src/chunk_mmap.c b/src/chunk_mmap.c index 9ff7480a..0ad65a10 100644 --- a/src/chunk_mmap.c +++ b/src/chunk_mmap.c @@ -7,7 +7,7 @@ static void *pages_map(void *addr, size_t size); static void pages_unmap(void *addr, size_t size); static void *chunk_alloc_mmap_slow(size_t size, size_t alignment, - bool unaligned, bool *zero); + bool *zero); /******************************************************************************/ @@ -16,6 +16,14 @@ pages_map(void *addr, size_t size) { void *ret; +#ifdef _WIN32 + /* + * If VirtualAlloc can't allocate at the given address when one is + * given, it fails and returns NULL. + */ + ret = VirtualAlloc(addr, size, MEM_COMMIT | MEM_RESERVE, + PAGE_READWRITE); +#else /* * We don't use MAP_FIXED here, because it can cause the *replacement* * of existing mappings, and we only want to create new mappings. @@ -41,7 +49,7 @@ pages_map(void *addr, size_t size) } ret = NULL; } - +#endif assert(ret == NULL || (addr == NULL && ret != addr) || (addr != NULL && ret == addr)); return (ret); @@ -51,55 +59,94 @@ static void pages_unmap(void *addr, size_t size) { - if (munmap(addr, size) == -1) { +#ifdef _WIN32 + if (VirtualFree(addr, 0, MEM_RELEASE) == 0) +#else + if (munmap(addr, size) == -1) +#endif + { char buf[BUFERROR_BUF]; buferror(errno, buf, sizeof(buf)); - malloc_printf(": Error in munmap(): %s\n", buf); + malloc_printf(": Error in " +#ifdef _WIN32 + "VirtualFree" +#else + "munmap" +#endif + "(): %s\n", buf); if (opt_abort) abort(); } } +static void * +pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size) +{ + void *ret = (void *)((uintptr_t)addr + leadsize); + + assert(alloc_size >= leadsize + size); +#ifdef _WIN32 + { + void *new_addr; + + pages_unmap(addr, alloc_size); + new_addr = pages_map(ret, size); + if (new_addr == ret) + return (ret); + if (new_addr) + pages_unmap(new_addr, size); + return (NULL); + } +#else + { + size_t trailsize = alloc_size - leadsize - size; + + if (leadsize != 0) + pages_unmap(addr, leadsize); + if (trailsize != 0) + pages_unmap((void *)((uintptr_t)ret + size), trailsize); + return (ret); + } +#endif +} + void pages_purge(void *addr, size_t length) { -#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED -# define JEMALLOC_MADV_PURGE MADV_DONTNEED -#elif defined(JEMALLOC_PURGE_MADVISE_FREE) -# define JEMALLOC_MADV_PURGE MADV_FREE +#ifdef _WIN32 + VirtualAlloc(addr, length, MEM_RESET, PAGE_READWRITE); #else -# error "No method defined for purging unused dirty pages." -#endif +# ifdef JEMALLOC_PURGE_MADVISE_DONTNEED +# define JEMALLOC_MADV_PURGE MADV_DONTNEED +# elif defined(JEMALLOC_PURGE_MADVISE_FREE) +# define JEMALLOC_MADV_PURGE MADV_FREE +# else +# error "No method defined for purging unused dirty pages." +# endif madvise(addr, length, JEMALLOC_MADV_PURGE); +#endif } static void * -chunk_alloc_mmap_slow(size_t size, size_t alignment, bool unaligned, bool *zero) +chunk_alloc_mmap_slow(size_t size, size_t alignment, bool *zero) { void *ret, *pages; - size_t alloc_size, leadsize, trailsize; + size_t alloc_size, leadsize; alloc_size = size + alignment - PAGE; /* Beware size_t wrap-around. */ if (alloc_size < size) return (NULL); - pages = pages_map(NULL, alloc_size); - if (pages == NULL) - return (NULL); - leadsize = ALIGNMENT_CEILING((uintptr_t)pages, alignment) - - (uintptr_t)pages; - assert(alloc_size >= leadsize + size); - trailsize = alloc_size - leadsize - size; - ret = (void *)((uintptr_t)pages + leadsize); - if (leadsize != 0) { - /* Note that mmap() returned an unaligned mapping. */ - unaligned = true; - pages_unmap(pages, leadsize); - } - if (trailsize != 0) - pages_unmap((void *)((uintptr_t)ret + size), trailsize); + do { + pages = pages_map(NULL, alloc_size); + if (pages == NULL) + return (NULL); + leadsize = ALIGNMENT_CEILING((uintptr_t)pages, alignment) - + (uintptr_t)pages; + ret = pages_trim(pages, alloc_size, leadsize, size); + } while (ret == NULL); assert(ret != NULL); *zero = true; @@ -144,6 +191,9 @@ chunk_alloc_mmap(size_t size, size_t alignment, bool *zero) offset = ALIGNMENT_ADDR2OFFSET(ret, alignment); if (offset != 0) { +#ifdef _WIN32 + return (chunk_alloc_mmap_slow(size, alignment, zero)); +#else /* Try to extend chunk boundary. */ if (pages_map((void *)((uintptr_t)ret + size), chunksize - offset) == NULL) { @@ -152,13 +202,13 @@ chunk_alloc_mmap(size_t size, size_t alignment, bool *zero) * reliable-but-expensive method. */ pages_unmap(ret, size); - return (chunk_alloc_mmap_slow(size, alignment, true, - zero)); + return (chunk_alloc_mmap_slow(size, alignment, zero)); } else { /* Clean up unneeded leading space. */ pages_unmap(ret, chunksize - offset); ret = (void *)((uintptr_t)ret + (chunksize - offset)); } +#endif } assert(ret != NULL); diff --git a/src/jemalloc.c b/src/jemalloc.c index f9c89168..67ac90b2 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -52,7 +52,19 @@ static bool malloc_initializer = NO_INITIALIZER; #endif /* Used to avoid initialization races. */ +#ifdef _WIN32 +static malloc_mutex_t init_lock; + +JEMALLOC_ATTR(constructor) +static void +init_init_lock() +{ + + malloc_mutex_init(&init_lock); +} +#else static malloc_mutex_t init_lock = MALLOC_MUTEX_INITIALIZER; +#endif typedef struct { void *p; /* Input pointer (as in realloc(p, s)). */ @@ -229,11 +241,17 @@ malloc_ncpus(void) unsigned ret; long result; +#ifdef _WIN32 + SYSTEM_INFO si; + GetSystemInfo(&si); + result = si.dwNumberOfProcessors; +#else result = sysconf(_SC_NPROCESSORS_ONLN); if (result == -1) { /* Error. */ ret = 1; } +#endif ret = (unsigned)result; return (ret); @@ -369,13 +387,14 @@ malloc_conf_init(void) } break; case 1: { +#ifndef _WIN32 int linklen; const char *linkname = -#ifdef JEMALLOC_PREFIX +# ifdef JEMALLOC_PREFIX "/etc/"JEMALLOC_PREFIX"malloc.conf" -#else +# else "/etc/malloc.conf" -#endif +# endif ; if ((linklen = readlink(linkname, buf, @@ -386,7 +405,9 @@ malloc_conf_init(void) */ buf[linklen] = '\0'; opts = buf; - } else { + } else +#endif + { /* No configuration specified. */ buf[0] = '\0'; opts = buf; @@ -610,7 +631,8 @@ malloc_init_hard(void) malloc_conf_init(); -#if (!defined(JEMALLOC_MUTEX_INIT_CB) && !defined(JEMALLOC_ZONE)) +#if (!defined(JEMALLOC_MUTEX_INIT_CB) && !defined(JEMALLOC_ZONE) \ + && !defined(_WIN32)) /* Register fork handlers. */ if (pthread_atfork(jemalloc_prefork, jemalloc_postfork_parent, jemalloc_postfork_child) != 0) { diff --git a/src/mutex.c b/src/mutex.c index 4b8ce570..159d82a3 100644 --- a/src/mutex.c +++ b/src/mutex.c @@ -1,10 +1,14 @@ #define JEMALLOC_MUTEX_C_ #include "jemalloc/internal/jemalloc_internal.h" -#ifdef JEMALLOC_LAZY_LOCK +#if defined(JEMALLOC_LAZY_LOCK) && !defined(_WIN32) #include #endif +#ifndef _CRT_SPINCOUNT +#define _CRT_SPINCOUNT 4000 +#endif + /******************************************************************************/ /* Data. */ @@ -16,7 +20,7 @@ static bool postpone_init = true; static malloc_mutex_t *postponed_mutexes = NULL; #endif -#ifdef JEMALLOC_LAZY_LOCK +#if defined(JEMALLOC_LAZY_LOCK) && !defined(_WIN32) static void pthread_create_once(void); #endif @@ -26,7 +30,7 @@ static void pthread_create_once(void); * process goes multi-threaded. */ -#ifdef JEMALLOC_LAZY_LOCK +#if defined(JEMALLOC_LAZY_LOCK) && !defined(_WIN32) static int (*pthread_create_fptr)(pthread_t *__restrict, const pthread_attr_t *, void *(*)(void *), void *__restrict); @@ -68,7 +72,11 @@ int _pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex, bool malloc_mutex_init(malloc_mutex_t *mutex) { -#ifdef JEMALLOC_OSSPIN +#ifdef _WIN32 + if (!InitializeCriticalSectionAndSpinCount(&mutex->lock, + _CRT_SPINCOUNT)) + return (true); +#elif (defined(JEMALLOC_OSSPIN)) mutex->lock = 0; #elif (defined(JEMALLOC_MUTEX_INIT_CB)) if (postpone_init) { diff --git a/src/tsd.c b/src/tsd.c index 281a2e9b..09f06e88 100644 --- a/src/tsd.c +++ b/src/tsd.c @@ -31,7 +31,7 @@ malloc_tsd_no_cleanup(void *arg) not_reached(); } -#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP +#if defined(JEMALLOC_MALLOC_THREAD_CLEANUP) || defined(_WIN32) JEMALLOC_ATTR(visibility("default")) void _malloc_thread_cleanup(void) @@ -70,3 +70,28 @@ malloc_tsd_boot(void) ncleanups = 0; } + +#ifdef _WIN32 +static BOOL WINAPI +_tls_callback(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) +{ + + switch (fdwReason) { +#ifdef JEMALLOC_LAZY_LOCK + case DLL_THREAD_ATTACH: + isthreaded = true; + break; +#endif + case DLL_THREAD_DETACH: + _malloc_thread_cleanup(); + break; + default: + break; + } + return (true); +} + +JEMALLOC_ATTR(section(".CRT$XLY")) JEMALLOC_ATTR(used) +static const BOOL (WINAPI *tls_callback)(HINSTANCE hinstDLL, + DWORD fdwReason, LPVOID lpvReserved) = _tls_callback; +#endif diff --git a/src/util.c b/src/util.c index 99ae26dd..ee9efdfa 100644 --- a/src/util.c +++ b/src/util.c @@ -67,7 +67,12 @@ void (*je_malloc_message)(void *, const char *s) int buferror(int errnum, char *buf, size_t buflen) { -#ifdef _GNU_SOURCE + +#ifdef _WIN32 + FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM, NULL, GetLastError(), 0, + (LPSTR)buf, buflen, NULL); + return (0); +#elif defined(_GNU_SOURCE) char *b = strerror_r(errno, buf, buflen); if (b != buf) { strncpy(buf, b, buflen); diff --git a/test/jemalloc_test.h.in b/test/jemalloc_test.h.in index 8833a03e..e38b48ef 100644 --- a/test/jemalloc_test.h.in +++ b/test/jemalloc_test.h.in @@ -7,6 +7,29 @@ #include "jemalloc/internal/jemalloc_internal.h" /* Abstraction layer for threading in tests */ +#ifdef _WIN32 +#include + +typedef HANDLE je_thread_t; + +void +je_thread_create(je_thread_t *thread, void *(*proc)(void *), void *arg) +{ + LPTHREAD_START_ROUTINE routine = (LPTHREAD_START_ROUTINE)proc; + *thread = CreateThread(NULL, 0, routine, arg, 0, NULL); + if (*thread == NULL) { + malloc_printf("Error in CreateThread()\n"); + exit(1); + } +} + +void +je_thread_join(je_thread_t thread, void **ret) +{ + WaitForSingleObject(thread, INFINITE); +} + +#else #include typedef pthread_t je_thread_t; @@ -27,3 +50,4 @@ je_thread_join(je_thread_t thread, void **ret) pthread_join(thread, ret); } +#endif diff --git a/test/rallocm.c b/test/rallocm.c index 18db5eec..f2a47708 100644 --- a/test/rallocm.c +++ b/test/rallocm.c @@ -19,9 +19,15 @@ main(void) /* Get page size. */ { +#ifdef _WIN32 + SYSTEM_INFO si; + GetSystemInfo(&si); + pagesize = (size_t)si.dwPageSize; +#else long result = sysconf(_SC_PAGESIZE); assert(result != -1); pagesize = (size_t)result; +#endif } r = allocm(&p, &sz, 42, 0); From 08e2221e99ef96277331f43e87d3f3ff770d27a7 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Fri, 20 Apr 2012 08:38:38 +0200 Subject: [PATCH 0182/3378] Remove leftovers from the vsnprintf check in malloc_vsnprintf Commit 4eeb52f removed vsnprintf validation, but left a now unused va_copy. It so happens that MSVC doesn't support va_copy. --- src/util.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/util.c b/src/util.c index ee9efdfa..2042329c 100644 --- a/src/util.c +++ b/src/util.c @@ -288,7 +288,6 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) int ret; size_t i; const char *f; - va_list tap; #define APPEND_C(c) do { \ if (i < size) \ @@ -359,9 +358,6 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) } \ } while (0) - if (config_debug) - va_copy(tap, ap); - i = 0; f = format; while (true) { From 14103d3598e7b828e79a81f2978dc08348677b02 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Fri, 20 Apr 2012 08:38:39 +0200 Subject: [PATCH 0183/3378] Fix intmax_t configure error message --- configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 5bdddfab..396b5ef3 100644 --- a/configure.ac +++ b/configure.ac @@ -164,7 +164,7 @@ elif test "x${ac_cv_sizeof_intmax_t}" = "x8" ; then elif test "x${ac_cv_sizeof_intmax_t}" = "x4" ; then LG_SIZEOF_INTMAX_T=2 else - AC_MSG_ERROR([Unsupported intmax_t size: ${ac_cv_sizeof_long}]) + AC_MSG_ERROR([Unsupported intmax_t size: ${ac_cv_sizeof_intmax_t}]) fi AC_DEFINE_UNQUOTED([LG_SIZEOF_INTMAX_T], [$LG_SIZEOF_INTMAX_T]) From 834f8770ee780ef439d7ee639caa668a5e23cf76 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Fri, 20 Apr 2012 08:38:40 +0200 Subject: [PATCH 0184/3378] Remove #includes in tests Since we're now including jemalloc_internal.h, all the required headers are already pulled. This will avoid having to fiddle with headers that can or can't be used with MSVC. Also, now that we use malloc_printf, we can use util.h's definition of assert instead of assert.h's. --- test/aligned_alloc.c | 7 ------- test/allocated.c | 8 -------- test/allocm.c | 4 ---- test/bitmap.c | 6 ------ test/mremap.c | 6 ------ test/posix_memalign.c | 7 ------- test/rallocm.c | 6 ------ test/thread_arena.c | 5 ----- test/thread_tcache_enabled.c | 6 ------ 9 files changed, 55 deletions(-) diff --git a/test/aligned_alloc.c b/test/aligned_alloc.c index 81b8f933..81caa0ad 100644 --- a/test/aligned_alloc.c +++ b/test/aligned_alloc.c @@ -1,10 +1,3 @@ -#include -#include -#include -#include -#include -#include - #define JEMALLOC_MANGLE #include "jemalloc_test.h" diff --git a/test/allocated.c b/test/allocated.c index 00039ed8..9884905d 100644 --- a/test/allocated.c +++ b/test/allocated.c @@ -1,11 +1,3 @@ -#include -#include -#include -#include -#include -#include -#include - #define JEMALLOC_MANGLE #include "jemalloc_test.h" diff --git a/test/allocm.c b/test/allocm.c index c6bc6f83..80be673b 100644 --- a/test/allocm.c +++ b/test/allocm.c @@ -1,7 +1,3 @@ -#include -#include -#include - #define JEMALLOC_MANGLE #include "jemalloc_test.h" diff --git a/test/bitmap.c b/test/bitmap.c index ee9b1ecc..ff50ecb3 100644 --- a/test/bitmap.c +++ b/test/bitmap.c @@ -1,12 +1,6 @@ #define JEMALLOC_MANGLE #include "jemalloc_test.h" -/* - * Avoid using the assert() from jemalloc_internal.h, since it requires - * internal libjemalloc functionality. - * */ -#include - #if (LG_BITMAP_MAXBITS > 12) # define MAXBITS 4500 #else diff --git a/test/mremap.c b/test/mremap.c index 84c03491..47efa7c4 100644 --- a/test/mremap.c +++ b/test/mremap.c @@ -1,9 +1,3 @@ -#include -#include -#include -#include -#include - #define JEMALLOC_MANGLE #include "jemalloc_test.h" diff --git a/test/posix_memalign.c b/test/posix_memalign.c index e1302df2..2185bcf7 100644 --- a/test/posix_memalign.c +++ b/test/posix_memalign.c @@ -1,10 +1,3 @@ -#include -#include -#include -#include -#include -#include - #define JEMALLOC_MANGLE #include "jemalloc_test.h" diff --git a/test/rallocm.c b/test/rallocm.c index f2a47708..c5dedf48 100644 --- a/test/rallocm.c +++ b/test/rallocm.c @@ -1,9 +1,3 @@ -#include -#include -#include -#include -#include - #define JEMALLOC_MANGLE #include "jemalloc_test.h" diff --git a/test/thread_arena.c b/test/thread_arena.c index 98354282..2020d994 100644 --- a/test/thread_arena.c +++ b/test/thread_arena.c @@ -1,8 +1,3 @@ -#include -#include -#include -#include - #define JEMALLOC_MANGLE #include "jemalloc_test.h" diff --git a/test/thread_tcache_enabled.c b/test/thread_tcache_enabled.c index 9f765841..2061b7bb 100644 --- a/test/thread_tcache_enabled.c +++ b/test/thread_tcache_enabled.c @@ -1,9 +1,3 @@ -#include -#include -#include -#include -#include - #define JEMALLOC_MANGLE #include "jemalloc_test.h" From a5288ca93434d98f91438de40d99177ffdfd2a17 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Fri, 20 Apr 2012 08:38:41 +0200 Subject: [PATCH 0185/3378] Remove unused #includes --- include/jemalloc/internal/jemalloc_internal.h.in | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index fd5de725..691f50a9 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -12,7 +12,6 @@ # include # include #endif -#include #include #include @@ -20,7 +19,6 @@ #ifndef SIZE_T_MAX # define SIZE_T_MAX SIZE_MAX #endif -#include #include #include #include From 52386b2dc689db3bf71307424c4e1a2b7044c363 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sun, 22 Apr 2012 16:00:11 -0700 Subject: [PATCH 0186/3378] Fix heap profiling bugs. Fix a potential deadlock that could occur during interval- and growth-triggered heap profile dumps. Fix an off-by-one heap profile statistics bug that could be observed in interval- and growth-triggered heap profiles. Fix heap profile dump filename sequence numbers (regression during conversion to malloc_snprintf()). --- ChangeLog | 8 +- include/jemalloc/internal/private_namespace.h | 1 + include/jemalloc/internal/prof.h | 43 ++++- src/prof.c | 164 ++++++++++-------- 4 files changed, 129 insertions(+), 87 deletions(-) diff --git a/ChangeLog b/ChangeLog index b71fa165..42fc5197 100644 --- a/ChangeLog +++ b/ChangeLog @@ -64,18 +64,22 @@ found in the git revision history: - Remove the --enable-sysv configure option. Bug fixes: - - Fix fork-related bugs that could cause deadlock in children between fork - and exec. - Fix a statistics-related bug in the "thread.arena" mallctl that could cause invalid statistics and crashes. - Work around TLS dallocation via free() on Linux. This bug could cause write-after-free memory corruption. + - Fix a potential deadlock that could occur during interval- and + growth-triggered heap profile dumps. - Fix chunk_alloc_dss() to stop claiming memory is zeroed. This bug could cause memory corruption and crashes with --enable-dss specified. + - Fix fork-related bugs that could cause deadlock in children between fork + and exec. - Fix malloc_stats_print() to honor 'b' and 'l' in the opts parameter. - Fix realloc(p, 0) to act like free(p). - Do not enforce minimum alignment in memalign(). - Check for NULL pointer in malloc_usable_size(). + - Fix an off-by-one heap profile statistics bug that could be observed in + interval- and growth-triggered heap profiles. - Fix bin->runcur management to fix a layout policy bug. This bug did not affect correctness. - Fix a bug in choose_arena_hard() that potentially caused more arenas to be diff --git a/include/jemalloc/internal/private_namespace.h b/include/jemalloc/internal/private_namespace.h index bb1b63e9..c467153a 100644 --- a/include/jemalloc/internal/private_namespace.h +++ b/include/jemalloc/internal/private_namespace.h @@ -240,6 +240,7 @@ #define prof_sample_threshold_update JEMALLOC_N(prof_sample_threshold_update) #define prof_tdata_booted JEMALLOC_N(prof_tdata_booted) #define prof_tdata_cleanup JEMALLOC_N(prof_tdata_cleanup) +#define prof_tdata_get JEMALLOC_N(prof_tdata_get) #define prof_tdata_init JEMALLOC_N(prof_tdata_init) #define prof_tdata_initialized JEMALLOC_N(prof_tdata_initialized) #define prof_tdata_tls JEMALLOC_N(prof_tdata_tls) diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index a4c563cc..093ac93c 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -113,9 +113,19 @@ struct prof_ctx_s { /* Associated backtrace. */ prof_bt_t *bt; - /* Protects cnt_merged and cnts_ql. */ + /* Protects nlimbo, cnt_merged, and cnts_ql. */ malloc_mutex_t *lock; + /* + * Number of threads that currently cause this ctx to be in a state of + * limbo due to one of: + * - Initializing per thread counters associated with this ctx. + * - Preparing to destroy this ctx. + * nlimbo must be 1 (single destroyer) in order to safely destroy the + * ctx. + */ + unsigned nlimbo; + /* Temporary storage for summation during dump. */ prof_cnt_t cnt_summed; @@ -152,6 +162,11 @@ struct prof_tdata_s { uint64_t prng_state; uint64_t threshold; uint64_t accum; + + /* State used to avoid dumping while operating on prof internals. */ + bool enq; + bool enq_idump; + bool enq_gdump; }; #endif /* JEMALLOC_H_STRUCTS */ @@ -211,14 +226,9 @@ bool prof_boot2(void); \ assert(size == s2u(size)); \ \ - prof_tdata = *prof_tdata_tsd_get(); \ - if (prof_tdata == NULL) { \ - prof_tdata = prof_tdata_init(); \ - if (prof_tdata == NULL) { \ - ret = NULL; \ - break; \ - } \ - } \ + prof_tdata = prof_tdata_get(); \ + if (prof_tdata == NULL) \ + break; \ \ if (opt_prof_active == false) { \ /* Sampling is currently inactive, so avoid sampling. */\ @@ -260,6 +270,7 @@ bool prof_boot2(void); #ifndef JEMALLOC_ENABLE_INLINE malloc_tsd_protos(JEMALLOC_ATTR(unused), prof_tdata, prof_tdata_t *) +prof_tdata_t *prof_tdata_get(void); void prof_sample_threshold_update(prof_tdata_t *prof_tdata); prof_ctx_t *prof_ctx_get(const void *ptr); void prof_ctx_set(const void *ptr, prof_ctx_t *ctx); @@ -276,6 +287,20 @@ malloc_tsd_externs(prof_tdata, prof_tdata_t *) malloc_tsd_funcs(JEMALLOC_INLINE, prof_tdata, prof_tdata_t *, NULL, prof_tdata_cleanup) +JEMALLOC_INLINE prof_tdata_t * +prof_tdata_get(void) +{ + prof_tdata_t *prof_tdata; + + cassert(config_prof); + + prof_tdata = *prof_tdata_tsd_get(); + if (prof_tdata == NULL) + prof_tdata = prof_tdata_init(); + + return (prof_tdata); +} + JEMALLOC_INLINE void prof_sample_threshold_update(prof_tdata_t *prof_tdata) { diff --git a/src/prof.c b/src/prof.c index 227560b8..187bda7d 100644 --- a/src/prof.c +++ b/src/prof.c @@ -64,11 +64,6 @@ static int prof_dump_fd; /* Do not dump any profiles until bootstrapping is complete. */ static bool prof_booted = false; -static malloc_mutex_t enq_mtx; -static bool enq; -static bool enq_idump; -static bool enq_gdump; - /******************************************************************************/ /* Function prototypes for non-inline static functions. */ @@ -148,20 +143,19 @@ bt_dup(prof_bt_t *bt) } static inline void -prof_enter(void) +prof_enter(prof_tdata_t *prof_tdata) { cassert(config_prof); - malloc_mutex_lock(&enq_mtx); - enq = true; - malloc_mutex_unlock(&enq_mtx); + assert(prof_tdata->enq == false); + prof_tdata->enq = true; malloc_mutex_lock(&bt2ctx_mtx); } static inline void -prof_leave(void) +prof_leave(prof_tdata_t *prof_tdata) { bool idump, gdump; @@ -169,13 +163,12 @@ prof_leave(void) malloc_mutex_unlock(&bt2ctx_mtx); - malloc_mutex_lock(&enq_mtx); - enq = false; - idump = enq_idump; - enq_idump = false; - gdump = enq_gdump; - enq_gdump = false; - malloc_mutex_unlock(&enq_mtx); + assert(prof_tdata->enq); + prof_tdata->enq = false; + idump = prof_tdata->enq_idump; + prof_tdata->enq_idump = false; + gdump = prof_tdata->enq_gdump; + prof_tdata->enq_gdump = false; if (idump) prof_idump(); @@ -446,12 +439,9 @@ prof_lookup(prof_bt_t *bt) cassert(config_prof); - prof_tdata = *prof_tdata_tsd_get(); - if (prof_tdata == NULL) { - prof_tdata = prof_tdata_init(); - if (prof_tdata == NULL) - return (NULL); - } + prof_tdata = prof_tdata_get(); + if (prof_tdata == NULL) + return (NULL); if (ckh_search(&prof_tdata->bt2cnt, bt, NULL, &ret.v)) { union { @@ -468,52 +458,48 @@ prof_lookup(prof_bt_t *bt) * This thread's cache lacks bt. Look for it in the global * cache. */ - prof_enter(); + prof_enter(prof_tdata); if (ckh_search(&bt2ctx, bt, &btkey.v, &ctx.v)) { /* bt has never been seen before. Insert it. */ ctx.v = imalloc(sizeof(prof_ctx_t)); if (ctx.v == NULL) { - prof_leave(); + prof_leave(prof_tdata); return (NULL); } btkey.p = bt_dup(bt); if (btkey.v == NULL) { - prof_leave(); + prof_leave(prof_tdata); idalloc(ctx.v); return (NULL); } ctx.p->bt = btkey.p; ctx.p->lock = prof_ctx_mutex_choose(); + /* + * Set nlimbo to 1, in order to avoid a race condition + * with prof_ctx_merge()/prof_ctx_destroy(). + */ + ctx.p->nlimbo = 1; memset(&ctx.p->cnt_merged, 0, sizeof(prof_cnt_t)); ql_new(&ctx.p->cnts_ql); if (ckh_insert(&bt2ctx, btkey.v, ctx.v)) { /* OOM. */ - prof_leave(); + prof_leave(prof_tdata); idalloc(btkey.v); idalloc(ctx.v); return (NULL); } - /* - * Artificially raise curobjs, in order to avoid a race - * condition with prof_ctx_merge()/prof_ctx_destroy(). - * - * No locking is necessary for ctx here because no other - * threads have had the opportunity to fetch it from - * bt2ctx yet. - */ - ctx.p->cnt_merged.curobjs++; new_ctx = true; } else { /* - * Artificially raise curobjs, in order to avoid a race - * condition with prof_ctx_merge()/prof_ctx_destroy(). + * Increment nlimbo, in order to avoid a race condition + * with prof_ctx_merge()/prof_ctx_destroy(). */ malloc_mutex_lock(ctx.p->lock); - ctx.p->cnt_merged.curobjs++; + ctx.p->nlimbo++; malloc_mutex_unlock(ctx.p->lock); new_ctx = false; } - prof_leave(); + prof_leave(prof_tdata); /* Link a prof_thd_cnt_t into ctx for this thread. */ if (ckh_count(&prof_tdata->bt2cnt) == PROF_TCMAX) { @@ -555,7 +541,7 @@ prof_lookup(prof_bt_t *bt) ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link); malloc_mutex_lock(ctx.p->lock); ql_tail_insert(&ctx.p->cnts_ql, ret.p, cnts_link); - ctx.p->cnt_merged.curobjs--; + ctx.p->nlimbo--; malloc_mutex_unlock(ctx.p->lock); } else { /* Move ret to the front of the LRU. */ @@ -688,26 +674,30 @@ prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx) static void prof_ctx_destroy(prof_ctx_t *ctx) { + prof_tdata_t *prof_tdata; cassert(config_prof); /* * Check that ctx is still unused by any thread cache before destroying - * it. prof_lookup() artificially raises ctx->cnt_merge.curobjs in - * order to avoid a race condition with this function, as does - * prof_ctx_merge() in order to avoid a race between the main body of - * prof_ctx_merge() and entry into this function. + * it. prof_lookup() increments ctx->nlimbo in order to avoid a race + * condition with this function, as does prof_ctx_merge() in order to + * avoid a race between the main body of prof_ctx_merge() and entry + * into this function. */ - prof_enter(); + prof_tdata = *prof_tdata_tsd_get(); + assert(prof_tdata != NULL); + prof_enter(prof_tdata); malloc_mutex_lock(ctx->lock); - if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 1) { + if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 0 && + ctx->nlimbo == 1) { assert(ctx->cnt_merged.curbytes == 0); assert(ctx->cnt_merged.accumobjs == 0); assert(ctx->cnt_merged.accumbytes == 0); /* Remove ctx from bt2ctx. */ if (ckh_remove(&bt2ctx, ctx->bt, NULL, NULL)) assert(false); - prof_leave(); + prof_leave(prof_tdata); /* Destroy ctx. */ malloc_mutex_unlock(ctx->lock); bt_destroy(ctx->bt); @@ -717,9 +707,9 @@ prof_ctx_destroy(prof_ctx_t *ctx) * Compensate for increment in prof_ctx_merge() or * prof_lookup(). */ - ctx->cnt_merged.curobjs--; + ctx->nlimbo--; malloc_mutex_unlock(ctx->lock); - prof_leave(); + prof_leave(prof_tdata); } } @@ -738,12 +728,12 @@ prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt) ctx->cnt_merged.accumbytes += cnt->cnts.accumbytes; ql_remove(&ctx->cnts_ql, cnt, cnts_link); if (opt_prof_accum == false && ql_first(&ctx->cnts_ql) == NULL && - ctx->cnt_merged.curobjs == 0) { + ctx->cnt_merged.curobjs == 0 && ctx->nlimbo == 0) { /* - * Artificially raise ctx->cnt_merged.curobjs in order to keep - * another thread from winning the race to destroy ctx while - * this one has ctx->lock dropped. Without this, it would be - * possible for another thread to: + * Increment ctx->nlimbo in order to keep another thread from + * winning the race to destroy ctx while this one has ctx->lock + * dropped. Without this, it would be possible for another + * thread to: * * 1) Sample an allocation associated with ctx. * 2) Deallocate the sampled object. @@ -752,7 +742,7 @@ prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt) * The result would be that ctx no longer exists by the time * this thread accesses it in prof_ctx_destroy(). */ - ctx->cnt_merged.curobjs++; + ctx->nlimbo++; destroy = true; } else destroy = false; @@ -768,7 +758,16 @@ prof_dump_ctx(bool propagate_err, prof_ctx_t *ctx, prof_bt_t *bt) cassert(config_prof); - if (opt_prof_accum == false && ctx->cnt_summed.curobjs == 0) { + /* + * Current statistics can sum to 0 as a result of unmerged per thread + * statistics. Additionally, interval- and growth-triggered dumps can + * occur between the time a ctx is created and when its statistics are + * filled in. Avoid dumping any ctx that is an artifact of either + * implementation detail. + */ + if ((opt_prof_accum == false && ctx->cnt_summed.curobjs == 0) || + (opt_prof_accum && ctx->cnt_summed.accumobjs == 0)) { + assert(ctx->cnt_summed.curobjs == 0); assert(ctx->cnt_summed.curbytes == 0); assert(ctx->cnt_summed.accumobjs == 0); assert(ctx->cnt_summed.accumbytes == 0); @@ -831,6 +830,7 @@ prof_dump_maps(bool propagate_err) static bool prof_dump(bool propagate_err, const char *filename, bool leakcheck) { + prof_tdata_t *prof_tdata; prof_cnt_t cnt_all; size_t tabind; union { @@ -845,7 +845,10 @@ prof_dump(bool propagate_err, const char *filename, bool leakcheck) cassert(config_prof); - prof_enter(); + prof_tdata = prof_tdata_get(); + if (prof_tdata == NULL) + return (true); + prof_enter(prof_tdata); prof_dump_fd = creat(filename, 0644); if (prof_dump_fd == -1) { if (propagate_err == false) { @@ -896,7 +899,7 @@ prof_dump(bool propagate_err, const char *filename, bool leakcheck) if (prof_flush(propagate_err)) goto label_error; close(prof_dump_fd); - prof_leave(); + prof_leave(prof_tdata); if (leakcheck && cnt_all.curbytes != 0) { malloc_printf(": Leak summary: %"PRId64" byte%s, %" @@ -911,7 +914,7 @@ prof_dump(bool propagate_err, const char *filename, bool leakcheck) return (false); label_error: - prof_leave(); + prof_leave(prof_tdata); return (true); } @@ -933,6 +936,7 @@ prof_dump_filename(char *filename, char v, int64_t vseq) "%s.%d.%"PRIu64".%c.heap", opt_prof_prefix, (int)getpid(), prof_dump_seq, v); } + prof_dump_seq++; } static void @@ -956,19 +960,24 @@ prof_fdump(void) void prof_idump(void) { + prof_tdata_t *prof_tdata; char filename[PATH_MAX + 1]; cassert(config_prof); if (prof_booted == false) return; - malloc_mutex_lock(&enq_mtx); - if (enq) { - enq_idump = true; - malloc_mutex_unlock(&enq_mtx); + /* + * Don't call prof_tdata_get() here, because it could cause recursive + * allocation. + */ + prof_tdata = *prof_tdata_tsd_get(); + if (prof_tdata == NULL) + return; + if (prof_tdata->enq) { + prof_tdata->enq_idump = true; return; } - malloc_mutex_unlock(&enq_mtx); if (opt_prof_prefix[0] != '\0') { malloc_mutex_lock(&prof_dump_seq_mtx); @@ -1005,19 +1014,24 @@ prof_mdump(const char *filename) void prof_gdump(void) { + prof_tdata_t *prof_tdata; char filename[DUMP_FILENAME_BUFSIZE]; cassert(config_prof); if (prof_booted == false) return; - malloc_mutex_lock(&enq_mtx); - if (enq) { - enq_gdump = true; - malloc_mutex_unlock(&enq_mtx); + /* + * Don't call prof_tdata_get() here, because it could cause recursive + * allocation. + */ + prof_tdata = *prof_tdata_tsd_get(); + if (prof_tdata == NULL) + return; + if (prof_tdata->enq) { + prof_tdata->enq_gdump = true; return; } - malloc_mutex_unlock(&enq_mtx); if (opt_prof_prefix[0] != '\0') { malloc_mutex_lock(&prof_dump_seq_mtx); @@ -1110,6 +1124,10 @@ prof_tdata_init(void) prof_tdata->threshold = 0; prof_tdata->accum = 0; + prof_tdata->enq = false; + prof_tdata->enq_idump = false; + prof_tdata->enq_gdump = false; + prof_tdata_tsd_set(&prof_tdata); return (prof_tdata); @@ -1206,12 +1224,6 @@ prof_boot2(void) if (malloc_mutex_init(&prof_dump_seq_mtx)) return (true); - if (malloc_mutex_init(&enq_mtx)) - return (true); - enq = false; - enq_idump = false; - enq_gdump = false; - if (atexit(prof_fdump) != 0) { malloc_write(": Error in atexit()\n"); if (opt_abort) From 40f514fd92d50320075bf9fd8748edb71092a1d8 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sun, 22 Apr 2012 16:21:06 -0700 Subject: [PATCH 0187/3378] Document MinGW support. --- ChangeLog | 1 + README | 12 ++++++------ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/ChangeLog b/ChangeLog index 42fc5197..97a90468 100644 --- a/ChangeLog +++ b/ChangeLog @@ -22,6 +22,7 @@ found in the git revision history: - Add support for additional operating systems: + FreeBSD + Mac OS X Lion + + MinGW - Add support for additional architectures: + MIPS + SH4 diff --git a/README b/README index a7864f33..411d52d4 100644 --- a/README +++ b/README @@ -1,10 +1,10 @@ jemalloc is a general-purpose scalable concurrent malloc(3) implementation. -This distribution is a stand-alone "portable" implementation that currently -targets FreeBSD, Linux and Apple OS X. jemalloc is included as the default -allocator in the FreeBSD and NetBSD operating systems, and it is used by the -Mozilla Firefox web browser on Microsoft Windows-related platforms. Depending -on your needs, one of the other divergent versions may suit your needs better -than this distribution. +This distribution is a "portable" implementation that currently +targets FreeBSD, Linux, Apple OS X, and MinGW. jemalloc is included as the +default allocator in the FreeBSD and NetBSD operating systems, and it is used +by the Mozilla Firefox web browser on Microsoft Windows-related platforms. +Depending on your needs, one of the other divergent versions may suit your +needs better than this distribution. The COPYING file contains copyright and licensing information. From 461ad5c87ae5f89cd086e47b31372e9123dcfcdf Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Fri, 20 Apr 2012 08:38:42 +0200 Subject: [PATCH 0188/3378] Avoid using a union for ctl_node_s MSVC doesn't support C99, and as such doesn't support designated initialization of structs and unions. As there is never a mix of indexed and named nodes, it is pretty straightforward to use a different type for each. --- include/jemalloc/internal/ctl.h | 27 ++--- src/ctl.c | 168 +++++++++++++++++++------------- 2 files changed, 114 insertions(+), 81 deletions(-) diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h index a48d09fe..c06b9af0 100644 --- a/include/jemalloc/internal/ctl.h +++ b/include/jemalloc/internal/ctl.h @@ -2,6 +2,8 @@ #ifdef JEMALLOC_H_TYPES typedef struct ctl_node_s ctl_node_t; +typedef struct ctl_named_node_s ctl_named_node_t; +typedef struct ctl_indexed_node_s ctl_indexed_node_t; typedef struct ctl_arena_stats_s ctl_arena_stats_t; typedef struct ctl_stats_s ctl_stats_t; @@ -11,22 +13,23 @@ typedef struct ctl_stats_s ctl_stats_t; struct ctl_node_s { bool named; - union { - struct { - const char *name; - /* If (nchildren == 0), this is a terminal node. */ - unsigned nchildren; - const ctl_node_t *children; - } named; - struct { - const ctl_node_t *(*index)(const size_t *, size_t, - size_t); - } indexed; - } u; +}; + +struct ctl_named_node_s { + struct ctl_node_s node; + const char *name; + /* If (nchildren == 0), this is a terminal node. */ + unsigned nchildren; + const ctl_node_t *children; int (*ctl)(const size_t *, size_t, void *, size_t *, void *, size_t); }; +struct ctl_indexed_node_s { + struct ctl_node_s node; + const ctl_named_node_t *(*index)(const size_t *, size_t, size_t); +}; + struct ctl_arena_stats_s { bool initialized; unsigned nthreads; diff --git a/src/ctl.c b/src/ctl.c index 98ea3d1c..52c85945 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -14,6 +14,32 @@ static bool ctl_initialized; static uint64_t ctl_epoch; static ctl_stats_t ctl_stats; +/******************************************************************************/ +/* Helpers for named and indexed nodes. */ + +static inline const ctl_named_node_t * +ctl_named_node(const ctl_node_t *node) +{ + + return ((node->named) ? (const ctl_named_node_t *)node : NULL); +} + +static inline const ctl_named_node_t * +ctl_named_children(const ctl_named_node_t *node, int index) +{ + const ctl_named_node_t *children = ctl_named_node(node->children); + + return (children ? &children[index] : NULL); +} + +static inline const ctl_indexed_node_t * +ctl_indexed_node(const ctl_node_t *node) +{ + + return ((node->named == false) ? (const ctl_indexed_node_t *)node : + NULL); +} + /******************************************************************************/ /* Function prototypes for non-inline static functions. */ @@ -22,7 +48,7 @@ static int n##_ctl(const size_t *mib, size_t miblen, void *oldp, \ size_t *oldlenp, void *newp, size_t newlen); #define INDEX_PROTO(n) \ -const ctl_node_t *n##_index(const size_t *mib, size_t miblen, \ +const ctl_named_node_t *n##_index(const size_t *mib, size_t miblen, \ size_t i); static bool ctl_arena_init(ctl_arena_stats_t *astats); @@ -149,22 +175,23 @@ CTL_PROTO(stats_mapped) /* Maximum tree depth. */ #define CTL_MAX_DEPTH 6 -#define NAME(n) true, {.named = {n -#define CHILD(c) sizeof(c##_node) / sizeof(ctl_node_t), c##_node}}, NULL -#define CTL(c) 0, NULL}}, c##_ctl +#define NAME(n) {true}, n +#define CHILD(c) \ + sizeof(c##_node) / sizeof(ctl_node_t), (ctl_node_t *)c##_node, NULL +#define CTL(c) 0, NULL, c##_ctl /* * Only handles internal indexed nodes, since there are currently no external * ones. */ -#define INDEX(i) false, {.indexed = {i##_index}}, NULL +#define INDEX(i) {false}, i##_index -static const ctl_node_t tcache_node[] = { +static const ctl_named_node_t tcache_node[] = { {NAME("enabled"), CTL(thread_tcache_enabled)}, {NAME("flush"), CTL(thread_tcache_flush)} }; -static const ctl_node_t thread_node[] = { +static const ctl_named_node_t thread_node[] = { {NAME("arena"), CTL(thread_arena)}, {NAME("allocated"), CTL(thread_allocated)}, {NAME("allocatedp"), CTL(thread_allocatedp)}, @@ -173,7 +200,7 @@ static const ctl_node_t thread_node[] = { {NAME("tcache"), CHILD(tcache)} }; -static const ctl_node_t config_node[] = { +static const ctl_named_node_t config_node[] = { {NAME("debug"), CTL(config_debug)}, {NAME("dss"), CTL(config_dss)}, {NAME("fill"), CTL(config_fill)}, @@ -190,7 +217,7 @@ static const ctl_node_t config_node[] = { {NAME("xmalloc"), CTL(config_xmalloc)} }; -static const ctl_node_t opt_node[] = { +static const ctl_named_node_t opt_node[] = { {NAME("abort"), CTL(opt_abort)}, {NAME("lg_chunk"), CTL(opt_lg_chunk)}, {NAME("narenas"), CTL(opt_narenas)}, @@ -216,31 +243,31 @@ static const ctl_node_t opt_node[] = { {NAME("prof_accum"), CTL(opt_prof_accum)} }; -static const ctl_node_t arenas_bin_i_node[] = { +static const ctl_named_node_t arenas_bin_i_node[] = { {NAME("size"), CTL(arenas_bin_i_size)}, {NAME("nregs"), CTL(arenas_bin_i_nregs)}, {NAME("run_size"), CTL(arenas_bin_i_run_size)} }; -static const ctl_node_t super_arenas_bin_i_node[] = { +static const ctl_named_node_t super_arenas_bin_i_node[] = { {NAME(""), CHILD(arenas_bin_i)} }; -static const ctl_node_t arenas_bin_node[] = { +static const ctl_indexed_node_t arenas_bin_node[] = { {INDEX(arenas_bin_i)} }; -static const ctl_node_t arenas_lrun_i_node[] = { +static const ctl_named_node_t arenas_lrun_i_node[] = { {NAME("size"), CTL(arenas_lrun_i_size)} }; -static const ctl_node_t super_arenas_lrun_i_node[] = { +static const ctl_named_node_t super_arenas_lrun_i_node[] = { {NAME(""), CHILD(arenas_lrun_i)} }; -static const ctl_node_t arenas_lrun_node[] = { +static const ctl_indexed_node_t arenas_lrun_node[] = { {INDEX(arenas_lrun_i)} }; -static const ctl_node_t arenas_node[] = { +static const ctl_named_node_t arenas_node[] = { {NAME("narenas"), CTL(arenas_narenas)}, {NAME("initialized"), CTL(arenas_initialized)}, {NAME("quantum"), CTL(arenas_quantum)}, @@ -254,39 +281,39 @@ static const ctl_node_t arenas_node[] = { {NAME("purge"), CTL(arenas_purge)} }; -static const ctl_node_t prof_node[] = { +static const ctl_named_node_t prof_node[] = { {NAME("active"), CTL(prof_active)}, {NAME("dump"), CTL(prof_dump)}, {NAME("interval"), CTL(prof_interval)} }; -static const ctl_node_t stats_chunks_node[] = { +static const ctl_named_node_t stats_chunks_node[] = { {NAME("current"), CTL(stats_chunks_current)}, {NAME("total"), CTL(stats_chunks_total)}, {NAME("high"), CTL(stats_chunks_high)} }; -static const ctl_node_t stats_huge_node[] = { +static const ctl_named_node_t stats_huge_node[] = { {NAME("allocated"), CTL(stats_huge_allocated)}, {NAME("nmalloc"), CTL(stats_huge_nmalloc)}, {NAME("ndalloc"), CTL(stats_huge_ndalloc)} }; -static const ctl_node_t stats_arenas_i_small_node[] = { +static const ctl_named_node_t stats_arenas_i_small_node[] = { {NAME("allocated"), CTL(stats_arenas_i_small_allocated)}, {NAME("nmalloc"), CTL(stats_arenas_i_small_nmalloc)}, {NAME("ndalloc"), CTL(stats_arenas_i_small_ndalloc)}, {NAME("nrequests"), CTL(stats_arenas_i_small_nrequests)} }; -static const ctl_node_t stats_arenas_i_large_node[] = { +static const ctl_named_node_t stats_arenas_i_large_node[] = { {NAME("allocated"), CTL(stats_arenas_i_large_allocated)}, {NAME("nmalloc"), CTL(stats_arenas_i_large_nmalloc)}, {NAME("ndalloc"), CTL(stats_arenas_i_large_ndalloc)}, {NAME("nrequests"), CTL(stats_arenas_i_large_nrequests)} }; -static const ctl_node_t stats_arenas_i_bins_j_node[] = { +static const ctl_named_node_t stats_arenas_i_bins_j_node[] = { {NAME("allocated"), CTL(stats_arenas_i_bins_j_allocated)}, {NAME("nmalloc"), CTL(stats_arenas_i_bins_j_nmalloc)}, {NAME("ndalloc"), CTL(stats_arenas_i_bins_j_ndalloc)}, @@ -297,29 +324,29 @@ static const ctl_node_t stats_arenas_i_bins_j_node[] = { {NAME("nreruns"), CTL(stats_arenas_i_bins_j_nreruns)}, {NAME("curruns"), CTL(stats_arenas_i_bins_j_curruns)} }; -static const ctl_node_t super_stats_arenas_i_bins_j_node[] = { +static const ctl_named_node_t super_stats_arenas_i_bins_j_node[] = { {NAME(""), CHILD(stats_arenas_i_bins_j)} }; -static const ctl_node_t stats_arenas_i_bins_node[] = { +static const ctl_indexed_node_t stats_arenas_i_bins_node[] = { {INDEX(stats_arenas_i_bins_j)} }; -static const ctl_node_t stats_arenas_i_lruns_j_node[] = { +static const ctl_named_node_t stats_arenas_i_lruns_j_node[] = { {NAME("nmalloc"), CTL(stats_arenas_i_lruns_j_nmalloc)}, {NAME("ndalloc"), CTL(stats_arenas_i_lruns_j_ndalloc)}, {NAME("nrequests"), CTL(stats_arenas_i_lruns_j_nrequests)}, {NAME("curruns"), CTL(stats_arenas_i_lruns_j_curruns)} }; -static const ctl_node_t super_stats_arenas_i_lruns_j_node[] = { +static const ctl_named_node_t super_stats_arenas_i_lruns_j_node[] = { {NAME(""), CHILD(stats_arenas_i_lruns_j)} }; -static const ctl_node_t stats_arenas_i_lruns_node[] = { +static const ctl_indexed_node_t stats_arenas_i_lruns_node[] = { {INDEX(stats_arenas_i_lruns_j)} }; -static const ctl_node_t stats_arenas_i_node[] = { +static const ctl_named_node_t stats_arenas_i_node[] = { {NAME("nthreads"), CTL(stats_arenas_i_nthreads)}, {NAME("pactive"), CTL(stats_arenas_i_pactive)}, {NAME("pdirty"), CTL(stats_arenas_i_pdirty)}, @@ -332,15 +359,15 @@ static const ctl_node_t stats_arenas_i_node[] = { {NAME("bins"), CHILD(stats_arenas_i_bins)}, {NAME("lruns"), CHILD(stats_arenas_i_lruns)} }; -static const ctl_node_t super_stats_arenas_i_node[] = { +static const ctl_named_node_t super_stats_arenas_i_node[] = { {NAME(""), CHILD(stats_arenas_i)} }; -static const ctl_node_t stats_arenas_node[] = { +static const ctl_indexed_node_t stats_arenas_node[] = { {INDEX(stats_arenas_i)} }; -static const ctl_node_t stats_node[] = { +static const ctl_named_node_t stats_node[] = { {NAME("cactive"), CTL(stats_cactive)}, {NAME("allocated"), CTL(stats_allocated)}, {NAME("active"), CTL(stats_active)}, @@ -350,7 +377,7 @@ static const ctl_node_t stats_node[] = { {NAME("arenas"), CHILD(stats_arenas)} }; -static const ctl_node_t root_node[] = { +static const ctl_named_node_t root_node[] = { {NAME("version"), CTL(version)}, {NAME("epoch"), CTL(epoch)}, {NAME("thread"), CHILD(thread)}, @@ -360,7 +387,7 @@ static const ctl_node_t root_node[] = { {NAME("prof"), CHILD(prof)}, {NAME("stats"), CHILD(stats)} }; -static const ctl_node_t super_root_node[] = { +static const ctl_named_node_t super_root_node[] = { {NAME(""), CHILD(root)} }; @@ -597,7 +624,7 @@ ctl_lookup(const char *name, ctl_node_t const **nodesp, size_t *mibp, int ret; const char *elm, *tdot, *dot; size_t elen, i, j; - const ctl_node_t *node; + const ctl_named_node_t *node; elm = name; /* Equivalent to strchrnul(). */ @@ -609,21 +636,21 @@ ctl_lookup(const char *name, ctl_node_t const **nodesp, size_t *mibp, } node = super_root_node; for (i = 0; i < *depthp; i++) { - assert(node->named); - assert(node->u.named.nchildren > 0); - if (node->u.named.children[0].named) { - const ctl_node_t *pnode = node; + assert(node); + assert(node->nchildren > 0); + if (ctl_named_node(node->children) != NULL) { + const ctl_named_node_t *pnode = node; /* Children are named. */ - for (j = 0; j < node->u.named.nchildren; j++) { - const ctl_node_t *child = - &node->u.named.children[j]; - if (strlen(child->u.named.name) == elen - && strncmp(elm, child->u.named.name, - elen) == 0) { + for (j = 0; j < node->nchildren; j++) { + const ctl_named_node_t *child = + ctl_named_children(node, j); + if (strlen(child->name) == elen && + strncmp(elm, child->name, elen) == 0) { node = child; if (nodesp != NULL) - nodesp[i] = node; + nodesp[i] = + (const ctl_node_t *)node; mibp[i] = j; break; } @@ -634,7 +661,7 @@ ctl_lookup(const char *name, ctl_node_t const **nodesp, size_t *mibp, } } else { uintmax_t index; - const ctl_node_t *inode; + const ctl_indexed_node_t *inode; /* Children are indexed. */ index = malloc_strtoumax(elm, NULL, 10); @@ -643,16 +670,15 @@ ctl_lookup(const char *name, ctl_node_t const **nodesp, size_t *mibp, goto label_return; } - inode = &node->u.named.children[0]; - node = inode->u.indexed.index(mibp, *depthp, - (size_t)index); + inode = ctl_indexed_node(node->children); + node = inode->index(mibp, *depthp, (size_t)index); if (node == NULL) { ret = ENOENT; goto label_return; } if (nodesp != NULL) - nodesp[i] = node; + nodesp[i] = (const ctl_node_t *)node; mibp[i] = (size_t)index; } @@ -696,6 +722,7 @@ ctl_byname(const char *name, void *oldp, size_t *oldlenp, void *newp, size_t depth; ctl_node_t const *nodes[CTL_MAX_DEPTH]; size_t mib[CTL_MAX_DEPTH]; + const ctl_named_node_t *node; if (ctl_initialized == false && ctl_init()) { ret = EAGAIN; @@ -707,13 +734,14 @@ ctl_byname(const char *name, void *oldp, size_t *oldlenp, void *newp, if (ret != 0) goto label_return; - if (nodes[depth-1]->ctl == NULL) { + node = ctl_named_node(nodes[depth-1]); + if (node != NULL && node->ctl) + ret = node->ctl(mib, depth, oldp, oldlenp, newp, newlen); + else { /* The name refers to a partial path through the ctl tree. */ ret = ENOENT; - goto label_return; } - ret = nodes[depth-1]->ctl(mib, depth, oldp, oldlenp, newp, newlen); label_return: return(ret); } @@ -738,7 +766,7 @@ ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { int ret; - const ctl_node_t *node; + const ctl_named_node_t *node; size_t i; if (ctl_initialized == false && ctl_init()) { @@ -749,19 +777,21 @@ ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, /* Iterate down the tree. */ node = super_root_node; for (i = 0; i < miblen; i++) { - if (node->u.named.children[0].named) { + assert(node); + assert(node->nchildren > 0); + if (ctl_named_node(node->children) != NULL) { /* Children are named. */ - if (node->u.named.nchildren <= mib[i]) { + if (node->nchildren <= mib[i]) { ret = ENOENT; goto label_return; } - node = &node->u.named.children[mib[i]]; + node = ctl_named_children(node, mib[i]); } else { - const ctl_node_t *inode; + const ctl_indexed_node_t *inode; /* Indexed element. */ - inode = &node->u.named.children[0]; - node = inode->u.indexed.index(mib, miblen, mib[i]); + inode = ctl_indexed_node(node->children); + node = inode->index(mib, miblen, mib[i]); if (node == NULL) { ret = ENOENT; goto label_return; @@ -770,12 +800,12 @@ ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, } /* Call the ctl function. */ - if (node->ctl == NULL) { + if (node && node->ctl) + ret = node->ctl(mib, miblen, oldp, oldlenp, newp, newlen); + else { /* Partial MIB. */ ret = ENOENT; - goto label_return; } - ret = node->ctl(mib, miblen, oldp, oldlenp, newp, newlen); label_return: return(ret); @@ -1133,7 +1163,7 @@ CTL_RO_NL_CGEN(config_prof, opt_prof_accum, opt_prof_accum, bool) CTL_RO_NL_GEN(arenas_bin_i_size, arena_bin_info[mib[2]].reg_size, size_t) CTL_RO_NL_GEN(arenas_bin_i_nregs, arena_bin_info[mib[2]].nregs, uint32_t) CTL_RO_NL_GEN(arenas_bin_i_run_size, arena_bin_info[mib[2]].run_size, size_t) -const ctl_node_t * +const ctl_named_node_t * arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i) { @@ -1143,7 +1173,7 @@ arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i) } CTL_RO_NL_GEN(arenas_lrun_i_size, ((mib[2]+1) << LG_PAGE), size_t) -const ctl_node_t * +const ctl_named_node_t * arenas_lrun_i_index(const size_t *mib, size_t miblen, size_t i) { @@ -1326,7 +1356,7 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nreruns, CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_curruns, ctl_stats.arenas[mib[2]].bstats[mib[4]].curruns, size_t) -const ctl_node_t * +const ctl_named_node_t * stats_arenas_i_bins_j_index(const size_t *mib, size_t miblen, size_t j) { @@ -1344,7 +1374,7 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_nrequests, CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_curruns, ctl_stats.arenas[mib[2]].lstats[mib[4]].curruns, size_t) -const ctl_node_t * +const ctl_named_node_t * stats_arenas_i_lruns_j_index(const size_t *mib, size_t miblen, size_t j) { @@ -1365,10 +1395,10 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_nmadvise, CTL_RO_CGEN(config_stats, stats_arenas_i_purged, ctl_stats.arenas[mib[2]].astats.purged, uint64_t) -const ctl_node_t * +const ctl_named_node_t * stats_arenas_i_index(const size_t *mib, size_t miblen, size_t i) { - const ctl_node_t * ret; + const ctl_named_node_t * ret; malloc_mutex_lock(&ctl_mtx); if (ctl_stats.arenas[i].initialized == false) { From a4936ce4d635ef129be201f53cdc0786315ac3b6 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 23 Apr 2012 12:46:46 -0700 Subject: [PATCH 0189/3378] Fix jemalloc.sh code generation. Fix jemalloc.sh code generation by adding @sorev@ and using it instead of @SOREV@ (which contains Makefile-specific variables). --- bin/jemalloc.sh.in | 2 +- configure.ac | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/bin/jemalloc.sh.in b/bin/jemalloc.sh.in index cdf36737..58683f5d 100644 --- a/bin/jemalloc.sh.in +++ b/bin/jemalloc.sh.in @@ -4,6 +4,6 @@ prefix=@prefix@ exec_prefix=@exec_prefix@ libdir=@libdir@ -@LD_PRELOAD_VAR@=${libdir}/libjemalloc.@SOREV@ +@LD_PRELOAD_VAR@=${libdir}/libjemalloc.@sorev@ export @LD_PRELOAD_VAR@ exec "$@" diff --git a/configure.ac b/configure.ac index 396b5ef3..73389905 100644 --- a/configure.ac +++ b/configure.ac @@ -202,6 +202,7 @@ libprefix="lib" DSO_LDFLAGS='-shared -Wl,-soname,$(@F)' RPATH='-Wl,-rpath,$(1)' SOREV='$(SO).$(REV)' +sorev="${so}.${rev}" PIC_CFLAGS='-fPIC -DPIC' dnl Heap profiling uses the log(3) function. @@ -226,6 +227,7 @@ case "${host}" in force_tls="0" DSO_LDFLAGS='-shared -Wl,-dylib_install_name,$(@F)' SOREV='$(REV).$(SO)' + sorev="${rev}.${so}" ;; *-*-freebsd*) CFLAGS="$CFLAGS" @@ -284,6 +286,7 @@ case "${host}" in libprefix="" exe=".exe" SOREV='$(SO)' + sorev="${so}" PIC_CFLAGS="" ;; *) @@ -301,6 +304,7 @@ AC_SUBST([exe]) AC_SUBST([libprefix]) AC_SUBST([DSO_LDFLAGS]) AC_SUBST([SOREV]) +AC_SUBST([sorev]) AC_SUBST([PIC_CFLAGS]) JE_COMPILABLE([__attribute__ syntax], From 079687bb87e2ac13274c2c4ff1134d42a78e9c7a Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 23 Apr 2012 12:49:23 -0700 Subject: [PATCH 0190/3378] Clean up documentation and formatting. --- ChangeLog | 4 ++-- INSTALL | 6 +----- Makefile.in | 4 ++-- README | 12 ++++++------ 4 files changed, 11 insertions(+), 15 deletions(-) diff --git a/ChangeLog b/ChangeLog index 97a90468..c93f5df2 100644 --- a/ChangeLog +++ b/ChangeLog @@ -19,7 +19,7 @@ found in the git revision history: New features: - Implement Valgrind support, redzones, and quarantine. - - Add support for additional operating systems: + - Add support for additional platforms: + FreeBSD + Mac OS X Lion + MinGW @@ -67,7 +67,7 @@ found in the git revision history: Bug fixes: - Fix a statistics-related bug in the "thread.arena" mallctl that could cause invalid statistics and crashes. - - Work around TLS dallocation via free() on Linux. This bug could cause + - Work around TLS deallocation via free() on Linux. This bug could cause write-after-free memory corruption. - Fix a potential deadlock that could occur during interval- and growth-triggered heap profile dumps. diff --git a/INSTALL b/INSTALL index 04671a1d..7e3051ac 100644 --- a/INSTALL +++ b/INSTALL @@ -267,10 +267,6 @@ directory, issue configuration and build commands: The manual page is generated in both html and roff formats. Any web browser can be used to view the html manual. The roff manual page can be formatted -prior to installation via any of the following commands: +prior to installation via the following command: nroff -man -t doc/jemalloc.3 - - groff -man -t -Tps doc/jemalloc.3 | ps2pdf - doc/jemalloc.3.pdf - - (cd doc; groff -man -man-ext -t -Thtml jemalloc.3 > jemalloc.3.html) diff --git a/Makefile.in b/Makefile.in index b9917da4..e39b8ea0 100644 --- a/Makefile.in +++ b/Makefile.in @@ -227,8 +227,8 @@ check: tests for t in $(CTESTS:$(srcroot)%.c=$(objroot)%); do \ total=`expr $$total + 1`; \ /bin/echo -n "$${t} ... "; \ - $(TEST_LIBRARY_PATH) $${t}$(EXE) $(abs_srcroot) $(abs_objroot) \ - > $(objroot)$${t}.out 2>&1; \ + $(TEST_LIBRARY_PATH) $${t}$(EXE) $(abs_srcroot) \ + $(abs_objroot) > $(objroot)$${t}.out 2>&1; \ if test -e "$(srcroot)$${t}.exp"; then \ diff -w -u $(srcroot)$${t}.exp \ $(objroot)$${t}.out >/dev/null 2>&1; \ diff --git a/README b/README index 411d52d4..7661683b 100644 --- a/README +++ b/README @@ -1,10 +1,10 @@ jemalloc is a general-purpose scalable concurrent malloc(3) implementation. -This distribution is a "portable" implementation that currently -targets FreeBSD, Linux, Apple OS X, and MinGW. jemalloc is included as the -default allocator in the FreeBSD and NetBSD operating systems, and it is used -by the Mozilla Firefox web browser on Microsoft Windows-related platforms. -Depending on your needs, one of the other divergent versions may suit your -needs better than this distribution. +This distribution is a "portable" implementation that currently targets +FreeBSD, Linux, Apple OS X, and MinGW. jemalloc is included as the default +allocator in the FreeBSD and NetBSD operating systems, and it is used by the +Mozilla Firefox web browser on Microsoft Windows-related platforms. Depending +on your needs, one of the other divergent versions may suit your needs better +than this distribution. The COPYING file contains copyright and licensing information. From 6716aa83526b3f866d73a033970cc920bc61c13f Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 23 Apr 2012 13:04:55 -0700 Subject: [PATCH 0191/3378] Force use of TLS if heap profiling is enabled. --- configure.ac | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/configure.ac b/configure.ac index 73389905..98211c8c 100644 --- a/configure.ac +++ b/configure.ac @@ -678,6 +678,10 @@ fi AC_MSG_CHECKING([configured backtracing method]) AC_MSG_RESULT([$backtrace_method]) if test "x$enable_prof" = "x1" ; then + if test "x${force_tls}" = "x0" ; then + AC_MSG_ERROR([Heap profiling requires TLS]); + fi + force_tls="1" AC_DEFINE([JEMALLOC_PROF], [ ]) fi AC_SUBST([enable_prof]) From 8694e2e7b901eb3254a7da2461709ba2ce135aba Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 23 Apr 2012 13:05:32 -0700 Subject: [PATCH 0192/3378] Silence compiler warnings. --- src/jemalloc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/jemalloc.c b/src/jemalloc.c index 67ac90b2..d9fecef5 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -793,7 +793,7 @@ void * je_malloc(size_t size) { void *ret; - size_t usize; + size_t usize JEMALLOC_CC_SILENCE_INIT(0); prof_thr_cnt_t *cnt JEMALLOC_CC_SILENCE_INIT(NULL); if (malloc_init()) { @@ -973,7 +973,7 @@ je_calloc(size_t num, size_t size) { void *ret; size_t num_size; - size_t usize; + size_t usize JEMALLOC_CC_SILENCE_INIT(0); prof_thr_cnt_t *cnt JEMALLOC_CC_SILENCE_INIT(NULL); if (malloc_init()) { @@ -1048,7 +1048,7 @@ void * je_realloc(void *ptr, size_t size) { void *ret; - size_t usize; + size_t usize JEMALLOC_CC_SILENCE_INIT(0); size_t old_size = 0; size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0); prof_thr_cnt_t *cnt JEMALLOC_CC_SILENCE_INIT(NULL); From 598779aa554dd4356a8c4464b67b99b29e9a8489 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 23 Apr 2012 18:04:52 -0700 Subject: [PATCH 0193/3378] Don't link tests with superfluous libraries. Don't link tests with libraries that only libjemalloc needs to be linked to. --- Makefile.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.in b/Makefile.in index e39b8ea0..7685f156 100644 --- a/Makefile.in +++ b/Makefile.in @@ -163,7 +163,7 @@ $(objroot)test/bitmap$(EXE): $(objroot)src/bitmap.$(O) $(objroot)test/%$(EXE): $(objroot)test/%.$(O) $(objroot)src/util.$(O) $(DSOS) @mkdir -p $(@D) - $(CC) -o $@ $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) -L$(objroot)lib -ljemalloc$(install_suffix) $(LIBS) + $(CC) -o $@ $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) -L$(objroot)lib -ljemalloc$(install_suffix) $(filter -lpthread,$(LIBS)) build_lib_shared: $(DSOS) build_lib_static: $(STATIC_LIBS) From 65f343a632aa1f6bd9b8a65761706391469d2620 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 23 Apr 2012 19:31:45 -0700 Subject: [PATCH 0194/3378] Fix ctl regression. Fix ctl to correctly compute the number of children at each level of the ctl tree. --- include/jemalloc/internal/ctl.h | 12 ++++---- src/ctl.c | 50 +++++++++++++++++---------------- 2 files changed, 32 insertions(+), 30 deletions(-) diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h index c06b9af0..adf3827f 100644 --- a/include/jemalloc/internal/ctl.h +++ b/include/jemalloc/internal/ctl.h @@ -17,17 +17,17 @@ struct ctl_node_s { struct ctl_named_node_s { struct ctl_node_s node; - const char *name; + const char *name; /* If (nchildren == 0), this is a terminal node. */ - unsigned nchildren; - const ctl_node_t *children; - int (*ctl)(const size_t *, size_t, void *, size_t *, void *, - size_t); + unsigned nchildren; + const ctl_node_t *children; + int (*ctl)(const size_t *, size_t, void *, size_t *, + void *, size_t); }; struct ctl_indexed_node_s { struct ctl_node_s node; - const ctl_named_node_t *(*index)(const size_t *, size_t, size_t); + const ctl_named_node_t *(*index)(const size_t *, size_t, size_t); }; struct ctl_arena_stats_s { diff --git a/src/ctl.c b/src/ctl.c index 52c85945..59499731 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -176,8 +176,10 @@ CTL_PROTO(stats_mapped) #define CTL_MAX_DEPTH 6 #define NAME(n) {true}, n -#define CHILD(c) \ - sizeof(c##_node) / sizeof(ctl_node_t), (ctl_node_t *)c##_node, NULL +#define CHILD(t, c) \ + sizeof(c##_node) / sizeof(ctl_##t##_node_t), \ + (ctl_node_t *)c##_node, \ + NULL #define CTL(c) 0, NULL, c##_ctl /* @@ -197,7 +199,7 @@ static const ctl_named_node_t thread_node[] = { {NAME("allocatedp"), CTL(thread_allocatedp)}, {NAME("deallocated"), CTL(thread_deallocated)}, {NAME("deallocatedp"), CTL(thread_deallocatedp)}, - {NAME("tcache"), CHILD(tcache)} + {NAME("tcache"), CHILD(named, tcache)} }; static const ctl_named_node_t config_node[] = { @@ -249,7 +251,7 @@ static const ctl_named_node_t arenas_bin_i_node[] = { {NAME("run_size"), CTL(arenas_bin_i_run_size)} }; static const ctl_named_node_t super_arenas_bin_i_node[] = { - {NAME(""), CHILD(arenas_bin_i)} + {NAME(""), CHILD(named, arenas_bin_i)} }; static const ctl_indexed_node_t arenas_bin_node[] = { @@ -260,7 +262,7 @@ static const ctl_named_node_t arenas_lrun_i_node[] = { {NAME("size"), CTL(arenas_lrun_i_size)} }; static const ctl_named_node_t super_arenas_lrun_i_node[] = { - {NAME(""), CHILD(arenas_lrun_i)} + {NAME(""), CHILD(named, arenas_lrun_i)} }; static const ctl_indexed_node_t arenas_lrun_node[] = { @@ -275,9 +277,9 @@ static const ctl_named_node_t arenas_node[] = { {NAME("tcache_max"), CTL(arenas_tcache_max)}, {NAME("nbins"), CTL(arenas_nbins)}, {NAME("nhbins"), CTL(arenas_nhbins)}, - {NAME("bin"), CHILD(arenas_bin)}, + {NAME("bin"), CHILD(indexed, arenas_bin)}, {NAME("nlruns"), CTL(arenas_nlruns)}, - {NAME("lrun"), CHILD(arenas_lrun)}, + {NAME("lrun"), CHILD(indexed, arenas_lrun)}, {NAME("purge"), CTL(arenas_purge)} }; @@ -325,7 +327,7 @@ static const ctl_named_node_t stats_arenas_i_bins_j_node[] = { {NAME("curruns"), CTL(stats_arenas_i_bins_j_curruns)} }; static const ctl_named_node_t super_stats_arenas_i_bins_j_node[] = { - {NAME(""), CHILD(stats_arenas_i_bins_j)} + {NAME(""), CHILD(named, stats_arenas_i_bins_j)} }; static const ctl_indexed_node_t stats_arenas_i_bins_node[] = { @@ -339,7 +341,7 @@ static const ctl_named_node_t stats_arenas_i_lruns_j_node[] = { {NAME("curruns"), CTL(stats_arenas_i_lruns_j_curruns)} }; static const ctl_named_node_t super_stats_arenas_i_lruns_j_node[] = { - {NAME(""), CHILD(stats_arenas_i_lruns_j)} + {NAME(""), CHILD(named, stats_arenas_i_lruns_j)} }; static const ctl_indexed_node_t stats_arenas_i_lruns_node[] = { @@ -354,13 +356,13 @@ static const ctl_named_node_t stats_arenas_i_node[] = { {NAME("npurge"), CTL(stats_arenas_i_npurge)}, {NAME("nmadvise"), CTL(stats_arenas_i_nmadvise)}, {NAME("purged"), CTL(stats_arenas_i_purged)}, - {NAME("small"), CHILD(stats_arenas_i_small)}, - {NAME("large"), CHILD(stats_arenas_i_large)}, - {NAME("bins"), CHILD(stats_arenas_i_bins)}, - {NAME("lruns"), CHILD(stats_arenas_i_lruns)} + {NAME("small"), CHILD(named, stats_arenas_i_small)}, + {NAME("large"), CHILD(named, stats_arenas_i_large)}, + {NAME("bins"), CHILD(named, stats_arenas_i_bins)}, + {NAME("lruns"), CHILD(named, stats_arenas_i_lruns)} }; static const ctl_named_node_t super_stats_arenas_i_node[] = { - {NAME(""), CHILD(stats_arenas_i)} + {NAME(""), CHILD(named, stats_arenas_i)} }; static const ctl_indexed_node_t stats_arenas_node[] = { @@ -372,23 +374,23 @@ static const ctl_named_node_t stats_node[] = { {NAME("allocated"), CTL(stats_allocated)}, {NAME("active"), CTL(stats_active)}, {NAME("mapped"), CTL(stats_mapped)}, - {NAME("chunks"), CHILD(stats_chunks)}, - {NAME("huge"), CHILD(stats_huge)}, - {NAME("arenas"), CHILD(stats_arenas)} + {NAME("chunks"), CHILD(named, stats_chunks)}, + {NAME("huge"), CHILD(named, stats_huge)}, + {NAME("arenas"), CHILD(indexed, stats_arenas)} }; static const ctl_named_node_t root_node[] = { {NAME("version"), CTL(version)}, {NAME("epoch"), CTL(epoch)}, - {NAME("thread"), CHILD(thread)}, - {NAME("config"), CHILD(config)}, - {NAME("opt"), CHILD(opt)}, - {NAME("arenas"), CHILD(arenas)}, - {NAME("prof"), CHILD(prof)}, - {NAME("stats"), CHILD(stats)} + {NAME("thread"), CHILD(named, thread)}, + {NAME("config"), CHILD(named, config)}, + {NAME("opt"), CHILD(named, opt)}, + {NAME("arenas"), CHILD(named, arenas)}, + {NAME("prof"), CHILD(named, prof)}, + {NAME("stats"), CHILD(named, stats)} }; static const ctl_named_node_t super_root_node[] = { - {NAME(""), CHILD(root)} + {NAME(""), CHILD(named, root)} }; #undef NAME From 87667a86a072ed5127343cc7698c3144cac37e05 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 23 Apr 2012 19:54:15 -0700 Subject: [PATCH 0195/3378] Fix two CHILD() macro calls in the ctl tree. --- src/ctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ctl.c b/src/ctl.c index 59499731..4dbbefc2 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -358,8 +358,8 @@ static const ctl_named_node_t stats_arenas_i_node[] = { {NAME("purged"), CTL(stats_arenas_i_purged)}, {NAME("small"), CHILD(named, stats_arenas_i_small)}, {NAME("large"), CHILD(named, stats_arenas_i_large)}, - {NAME("bins"), CHILD(named, stats_arenas_i_bins)}, - {NAME("lruns"), CHILD(named, stats_arenas_i_lruns)} + {NAME("bins"), CHILD(indexed, stats_arenas_i_bins)}, + {NAME("lruns"), CHILD(indexed, stats_arenas_i_lruns)} }; static const ctl_named_node_t super_stats_arenas_i_node[] = { {NAME(""), CHILD(named, stats_arenas_i)} From 577dd84660351c727a62db99b308e35d9da224a1 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 23 Apr 2012 21:14:26 -0700 Subject: [PATCH 0196/3378] Handle quarantine resurrection during thread exit. Handle quarantine resurrection during thread exit in much the same way as tcache resurrection is handled. --- src/quarantine.c | 50 +++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 45 insertions(+), 5 deletions(-) diff --git a/src/quarantine.c b/src/quarantine.c index 5fb6c390..26ec5896 100644 --- a/src/quarantine.c +++ b/src/quarantine.c @@ -1,5 +1,13 @@ #include "jemalloc/internal/jemalloc_internal.h" +/* + * quarantine pointers close to NULL are used to encode state information that + * is used for cleaning up during thread shutdown. + */ +#define QUARANTINE_STATE_REINCARNATED ((quarantine_t *)(uintptr_t)1) +#define QUARANTINE_STATE_PURGATORY ((quarantine_t *)(uintptr_t)2) +#define QUARANTINE_STATE_MAX QUARANTINE_STATE_PURGATORY + /******************************************************************************/ /* Data. */ @@ -105,10 +113,25 @@ quarantine(void *ptr) assert(opt_quarantine); quarantine = *quarantine_tsd_get(); - if (quarantine == NULL && (quarantine = - quarantine_init(LG_MAXOBJS_INIT)) == NULL) { - idalloc(ptr); - return; + if ((uintptr_t)quarantine <= (uintptr_t)QUARANTINE_STATE_MAX) { + if (quarantine == NULL) { + if ((quarantine = quarantine_init(LG_MAXOBJS_INIT)) == + NULL) { + idalloc(ptr); + return; + } + } else { + if (quarantine == QUARANTINE_STATE_PURGATORY) { + /* + * Make a note that quarantine() was called + * after quarantine_cleanup() was called. + */ + quarantine = QUARANTINE_STATE_REINCARNATED; + quarantine_tsd_set(&quarantine); + } + idalloc(ptr); + return; + } } /* * Drain one or more objects if the quarantine size limit would be @@ -144,9 +167,26 @@ quarantine_cleanup(void *arg) { quarantine_t *quarantine = *(quarantine_t **)arg; - if (quarantine != NULL) { + if (quarantine == QUARANTINE_STATE_REINCARNATED) { + /* + * Another destructor deallocated memory after this destructor + * was called. Reset quarantine to QUARANTINE_STATE_PURGATORY + * in order to receive another callback. + */ + quarantine = QUARANTINE_STATE_PURGATORY; + quarantine_tsd_set(&quarantine); + } else if (quarantine == QUARANTINE_STATE_PURGATORY) { + /* + * The previous time this destructor was called, we set the key + * to QUARANTINE_STATE_PURGATORY so that other destructors + * wouldn't cause re-creation of the quarantine. This time, do + * nothing, so that the destructor will not be called again. + */ + } else if (quarantine != NULL) { quarantine_drain(quarantine, 0); idalloc(quarantine); + quarantine = QUARANTINE_STATE_PURGATORY; + quarantine_tsd_set(&quarantine); } } From 9cd351d147d1e79bff6b89586f168e81c0be034e Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 23 Apr 2012 21:43:18 -0700 Subject: [PATCH 0197/3378] Add usize sanity checking to quarantine. --- src/quarantine.c | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/src/quarantine.c b/src/quarantine.c index 26ec5896..88d0c77e 100644 --- a/src/quarantine.c +++ b/src/quarantine.c @@ -11,15 +11,21 @@ /******************************************************************************/ /* Data. */ +typedef struct quarantine_obj_s quarantine_obj_t; typedef struct quarantine_s quarantine_t; +struct quarantine_obj_s { + void *ptr; + size_t usize; +}; + struct quarantine_s { - size_t curbytes; - size_t curobjs; - size_t first; + size_t curbytes; + size_t curobjs; + size_t first; #define LG_MAXOBJS_INIT 10 - size_t lg_maxobjs; - void *objs[1]; /* Dynamically sized ring buffer. */ + size_t lg_maxobjs; + quarantine_obj_t objs[1]; /* Dynamically sized ring buffer. */ }; static void quarantine_cleanup(void *arg); @@ -43,7 +49,7 @@ quarantine_init(size_t lg_maxobjs) quarantine_t *quarantine; quarantine = (quarantine_t *)imalloc(offsetof(quarantine_t, objs) + - ((ZU(1) << lg_maxobjs) * sizeof(void *))); + ((ZU(1) << lg_maxobjs) * sizeof(quarantine_obj_t))); if (quarantine == NULL) return (NULL); quarantine->curbytes = 0; @@ -70,14 +76,14 @@ quarantine_grow(quarantine_t *quarantine) quarantine->lg_maxobjs)) { /* objs ring buffer data are contiguous. */ memcpy(ret->objs, &quarantine->objs[quarantine->first], - quarantine->curobjs * sizeof(void *)); + quarantine->curobjs * sizeof(quarantine_obj_t)); ret->curobjs = quarantine->curobjs; } else { /* objs ring buffer data wrap around. */ size_t ncopy = (ZU(1) << quarantine->lg_maxobjs) - quarantine->first; memcpy(ret->objs, &quarantine->objs[quarantine->first], ncopy * - sizeof(void *)); + sizeof(quarantine_obj_t)); ret->curobjs = ncopy; if (quarantine->curobjs != 0) { memcpy(&ret->objs[ret->curobjs], quarantine->objs, @@ -93,10 +99,10 @@ quarantine_drain(quarantine_t *quarantine, size_t upper_bound) { while (quarantine->curbytes > upper_bound && quarantine->curobjs > 0) { - void *ptr = quarantine->objs[quarantine->first]; - size_t usize = isalloc(ptr, config_prof); - idalloc(ptr); - quarantine->curbytes -= usize; + quarantine_obj_t *obj = &quarantine->objs[quarantine->first]; + assert(obj->usize == isalloc(obj->ptr, config_prof)); + idalloc(obj->ptr); + quarantine->curbytes -= obj->usize; quarantine->curobjs--; quarantine->first = (quarantine->first + 1) & ((ZU(1) << quarantine->lg_maxobjs) - 1); @@ -151,7 +157,9 @@ quarantine(void *ptr) if (quarantine->curbytes + usize <= opt_quarantine) { size_t offset = (quarantine->first + quarantine->curobjs) & ((ZU(1) << quarantine->lg_maxobjs) - 1); - quarantine->objs[offset] = ptr; + quarantine_obj_t *obj = &quarantine->objs[offset]; + obj->ptr = ptr; + obj->usize = usize; quarantine->curbytes += usize; quarantine->curobjs++; if (opt_junk) From 7e060397a38e301d7d3317fbf138f342c2bbd1b9 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 23 Apr 2012 22:07:30 -0700 Subject: [PATCH 0198/3378] Fix quarantine_grow() bugs. --- src/quarantine.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/quarantine.c b/src/quarantine.c index 88d0c77e..9005ab3b 100644 --- a/src/quarantine.c +++ b/src/quarantine.c @@ -72,23 +72,22 @@ quarantine_grow(quarantine_t *quarantine) return (quarantine); ret->curbytes = quarantine->curbytes; - if (quarantine->first + quarantine->curobjs < (ZU(1) << + ret->curobjs = quarantine->curobjs; + if (quarantine->first + quarantine->curobjs <= (ZU(1) << quarantine->lg_maxobjs)) { /* objs ring buffer data are contiguous. */ memcpy(ret->objs, &quarantine->objs[quarantine->first], quarantine->curobjs * sizeof(quarantine_obj_t)); - ret->curobjs = quarantine->curobjs; } else { /* objs ring buffer data wrap around. */ - size_t ncopy = (ZU(1) << quarantine->lg_maxobjs) - + size_t ncopy_a = (ZU(1) << quarantine->lg_maxobjs) - quarantine->first; - memcpy(ret->objs, &quarantine->objs[quarantine->first], ncopy * + size_t ncopy_b = quarantine->curobjs - ncopy_a; + + memcpy(ret->objs, &quarantine->objs[quarantine->first], ncopy_a + * sizeof(quarantine_obj_t)); + memcpy(&ret->objs[ncopy_a], quarantine->objs, ncopy_b * sizeof(quarantine_obj_t)); - ret->curobjs = ncopy; - if (quarantine->curobjs != 0) { - memcpy(&ret->objs[ret->curobjs], quarantine->objs, - quarantine->curobjs - ncopy); - } } return (ret); From f54166e7ef5313c3b5c773cbb0ca2af95f5a15ae Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 23 Apr 2012 22:41:36 -0700 Subject: [PATCH 0199/3378] Add missing Valgrind annotations. --- src/arena.c | 3 +++ src/chunk.c | 1 + 2 files changed, 4 insertions(+) diff --git a/src/arena.c b/src/arena.c index 6f28abe9..f13b5e1e 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1131,6 +1131,8 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin) (uintptr_t)bin_info->bitmap_offset); /* Initialize run internals. */ + VALGRIND_MAKE_MEM_UNDEFINED(run, bin_info->reg0_offset - + bin_info->redzone_size); run->bin = bin; run->nextind = 0; run->nfree = bin_info->nregs; @@ -1924,6 +1926,7 @@ arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, * expectation that the extra bytes will be reliably preserved. */ copysize = (size < oldsize) ? size : oldsize; + VALGRIND_MAKE_MEM_UNDEFINED(ret, copysize); memcpy(ret, ptr, copysize); iqalloc(ptr); return (ret); diff --git a/src/chunk.c b/src/chunk.c index 5426b027..7ac229cb 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -168,6 +168,7 @@ label_return: size_t i; size_t *p = (size_t *)(uintptr_t)ret; + VALGRIND_MAKE_MEM_DEFINED(ret, size); for (i = 0; i < size / sizeof(size_t); i++) assert(p[i] == 0); } From 6b9ed67b4b9d65731d1eeb7937989ef96288b706 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 25 Apr 2012 13:12:46 -0700 Subject: [PATCH 0200/3378] Fix the "epoch" mallctl. Fix the "epoch" mallctl to update cached stats even if the passed in epoch is 0. --- ChangeLog | 2 ++ src/ctl.c | 23 +++++++++++------------ 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/ChangeLog b/ChangeLog index c93f5df2..691630bc 100644 --- a/ChangeLog +++ b/ChangeLog @@ -81,6 +81,8 @@ found in the git revision history: - Check for NULL pointer in malloc_usable_size(). - Fix an off-by-one heap profile statistics bug that could be observed in interval- and growth-triggered heap profiles. + - Fix the "epoch" mallctl to update cached stats even if the passed in epoch + is 0. - Fix bin->runcur management to fix a layout policy bug. This bug did not affect correctness. - Fix a bug in choose_arena_hard() that potentially caused more arenas to be diff --git a/src/ctl.c b/src/ctl.c index 4dbbefc2..4b41d1d3 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -831,14 +831,14 @@ ctl_boot(void) #define READONLY() do { \ if (newp != NULL || newlen != 0) { \ ret = EPERM; \ - goto label_return; \ + goto label_return; \ } \ } while (0) #define WRITEONLY() do { \ if (oldp != NULL || oldlenp != NULL) { \ ret = EPERM; \ - goto label_return; \ + goto label_return; \ } \ } while (0) @@ -854,7 +854,7 @@ ctl_boot(void) ? sizeof(t) : *oldlenp; \ memcpy(oldp, (void *)&v, copylen); \ ret = EINVAL; \ - goto label_return; \ + goto label_return; \ } else \ *(t *)oldp = v; \ } \ @@ -864,7 +864,7 @@ ctl_boot(void) if (newp != NULL) { \ if (newlen != sizeof(t)) { \ ret = EINVAL; \ - goto label_return; \ + goto label_return; \ } \ v = *(t *)newp; \ } \ @@ -891,7 +891,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ READ(oldval, t); \ \ ret = 0; \ -label_return: \ +label_return: \ if (l) \ malloc_mutex_unlock(&ctl_mtx); \ return (ret); \ @@ -913,7 +913,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ READ(oldval, t); \ \ ret = 0; \ -label_return: \ +label_return: \ malloc_mutex_unlock(&ctl_mtx); \ return (ret); \ } @@ -932,7 +932,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ READ(oldval, t); \ \ ret = 0; \ -label_return: \ +label_return: \ malloc_mutex_unlock(&ctl_mtx); \ return (ret); \ } @@ -956,7 +956,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ READ(oldval, t); \ \ ret = 0; \ -label_return: \ +label_return: \ return (ret); \ } @@ -973,7 +973,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ READ(oldval, t); \ \ ret = 0; \ -label_return: \ +label_return: \ return (ret); \ } @@ -990,7 +990,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ READ(oldval, bool); \ \ ret = 0; \ -label_return: \ +label_return: \ return (ret); \ } @@ -1004,9 +1004,8 @@ epoch_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, uint64_t newval; malloc_mutex_lock(&ctl_mtx); - newval = 0; WRITE(newval, uint64_t); - if (newval != 0) + if (newp != NULL) ctl_refresh(); READ(ctl_epoch, uint64_t); From 3fb50b0407ff7dfe14727995706e2b42836f0f7e Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 25 Apr 2012 13:13:44 -0700 Subject: [PATCH 0201/3378] Fix a PROF_ALLOC_PREP() error path. Fix a PROF_ALLOC_PREP() error path to initialize the return value to NULL. --- include/jemalloc/internal/prof.h | 4 +++- src/jemalloc.c | 6 ++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index 093ac93c..4c398515 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -227,8 +227,10 @@ bool prof_boot2(void); assert(size == s2u(size)); \ \ prof_tdata = prof_tdata_get(); \ - if (prof_tdata == NULL) \ + if (prof_tdata == NULL) { \ + ret = NULL; \ break; \ + } \ \ if (opt_prof_active == false) { \ /* Sampling is currently inactive, so avoid sampling. */\ diff --git a/src/jemalloc.c b/src/jemalloc.c index d9fecef5..52296e07 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1384,7 +1384,6 @@ je_allocm(void **ptr, size_t *rsize, size_t size, int flags) size_t alignment = (ZU(1) << (flags & ALLOCM_LG_ALIGN_MASK) & (SIZE_T_MAX-1)); bool zero = flags & ALLOCM_ZERO; - prof_thr_cnt_t *cnt; assert(ptr != NULL); assert(size != 0); @@ -1397,6 +1396,8 @@ je_allocm(void **ptr, size_t *rsize, size_t size, int flags) goto label_oom; if (config_prof && opt_prof) { + prof_thr_cnt_t *cnt; + PROF_ALLOC_PREP(1, usize, cnt); if (cnt == NULL) goto label_oom; @@ -1456,7 +1457,6 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) & (SIZE_T_MAX-1)); bool zero = flags & ALLOCM_ZERO; bool no_move = flags & ALLOCM_NO_MOVE; - prof_thr_cnt_t *cnt; assert(ptr != NULL); assert(*ptr != NULL); @@ -1466,6 +1466,8 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) p = *ptr; if (config_prof && opt_prof) { + prof_thr_cnt_t *cnt; + /* * usize isn't knowable before iralloc() returns when extra is * non-zero. Therefore, compute its maximum possible value and From d926c90500d47c5e18811a944c70924c09b5890e Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 25 Apr 2012 23:17:57 -0700 Subject: [PATCH 0202/3378] Fix Valgrind URL in documentation. Reported by Daichi GOTO. --- doc/jemalloc.xml.in | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index e8a57225..02961f6b 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -853,9 +853,9 @@ for (i = 0; i < nbins; i++) { junk-filled if the opt.junk option is enabled. This feature is of particular use in combination with Valgrind, which can detect - attempts to access quarantined objects. This is intended for debugging - and will impact performance negatively. The default quarantine size is + url="http://valgrind.org/">Valgrind, which can detect attempts + to access quarantined objects. This is intended for debugging and will + impact performance negatively. The default quarantine size is 0. @@ -871,11 +871,11 @@ for (i = 0; i < nbins; i++) { opt.junk option is enabled, the redzones are checked for corruption during deallocation. However, the primary intended purpose of this feature is to be used in - combination with Valgrind, which needs - redzones in order to do effective buffer overflow/underflow detection. - This option is intended for debugging and will impact performance - negatively. This option is disabled by default. + combination with Valgrind, + which needs redzones in order to do effective buffer overflow/underflow + detection. This option is intended for debugging and will impact + performance negatively. This option is disabled by + default. @@ -915,14 +915,14 @@ for (i = 0; i < nbins; i++) { r- [] - Valgrind support - enabled/disabled. If enabled, several other options are automatically - modified during options processing to work well with Valgrind: opt.junk and opt.zero are set to false, - opt.quarantine - is set to 16 MiB, and Valgrind + support enabled/disabled. If enabled, several other options are + automatically modified during options processing to work well with + Valgrind: opt.junk + and opt.zero are set + to false, opt.quarantine is + set to 16 MiB, and opt.redzone is set to true. This option is disabled by default. @@ -1854,10 +1854,10 @@ malloc_conf = "xmalloc:true";]]> This implementation does not provide much detail about the problems it detects, because the performance impact for storing such information would be prohibitive. However, jemalloc does integrate with the most - excellent Valgrind tool if - the configuration option is enabled and - the opt.valgrind - option is enabled. + excellent Valgrind tool if the + configuration option is enabled and the + opt.valgrind option + is enabled. DIAGNOSTIC MESSAGES From 95ff6aadca1e91641e093c61091e70344d048b50 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 28 Apr 2012 14:15:28 -0700 Subject: [PATCH 0203/3378] Don't set prof_tdata during thread cleanup. Don't set prof_tdata during thread cleanup, because doing so will cause the cleanup function to be called again, the second time with a NULL argument. --- src/prof.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/prof.c b/src/prof.c index 187bda7d..cad56929 100644 --- a/src/prof.c +++ b/src/prof.c @@ -1155,10 +1155,7 @@ prof_tdata_cleanup(void *arg) } idalloc(prof_tdata->vec); - idalloc(prof_tdata); - prof_tdata = NULL; - prof_tdata_tsd_set(&prof_tdata); } void From 0050a0f7e6ea5a33c9aed769e2652afe20714194 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 28 Apr 2012 18:14:24 -0700 Subject: [PATCH 0204/3378] Handle prof_tdata resurrection. Handle prof_tdata resurrection during thread shutdown, similarly to how tcache and quarantine handle resurrection. --- include/jemalloc/internal/prof.h | 18 +++++++++-- src/prof.c | 54 ++++++++++++++++++++++---------- 2 files changed, 52 insertions(+), 20 deletions(-) diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index 4c398515..b165e196 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -37,6 +37,14 @@ typedef struct prof_tdata_s prof_tdata_t; */ #define PROF_NCTX_LOCKS 1024 +/* + * prof_tdata pointers close to NULL are used to encode state information that + * is used for cleaning up during thread shutdown. + */ +#define PROF_TDATA_STATE_REINCARNATED ((prof_tdata_t *)(uintptr_t)1) +#define PROF_TDATA_STATE_PURGATORY ((prof_tdata_t *)(uintptr_t)2) +#define PROF_TDATA_STATE_MAX PROF_TDATA_STATE_PURGATORY + #endif /* JEMALLOC_H_TYPES */ /******************************************************************************/ #ifdef JEMALLOC_H_STRUCTS @@ -297,8 +305,12 @@ prof_tdata_get(void) cassert(config_prof); prof_tdata = *prof_tdata_tsd_get(); - if (prof_tdata == NULL) - prof_tdata = prof_tdata_init(); + if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) { + if (prof_tdata == NULL) + prof_tdata = prof_tdata_init(); + else + prof_tdata = NULL; + } return (prof_tdata); } @@ -382,7 +394,7 @@ prof_sample_accum_update(size_t size) assert(opt_lg_prof_sample != 0); prof_tdata = *prof_tdata_tsd_get(); - assert(prof_tdata != NULL); + assert((uintptr_t)prof_tdata > (uintptr_t)PROF_TDATA_STATE_MAX); /* Take care to avoid integer overflow. */ if (size >= prof_tdata->threshold - prof_tdata->accum) { diff --git a/src/prof.c b/src/prof.c index cad56929..e3549662 100644 --- a/src/prof.c +++ b/src/prof.c @@ -686,7 +686,7 @@ prof_ctx_destroy(prof_ctx_t *ctx) * into this function. */ prof_tdata = *prof_tdata_tsd_get(); - assert(prof_tdata != NULL); + assert((uintptr_t)prof_tdata > (uintptr_t)PROF_TDATA_STATE_MAX); prof_enter(prof_tdata); malloc_mutex_lock(ctx->lock); if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 0 && @@ -972,7 +972,7 @@ prof_idump(void) * allocation. */ prof_tdata = *prof_tdata_tsd_get(); - if (prof_tdata == NULL) + if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) return; if (prof_tdata->enq) { prof_tdata->enq_idump = true; @@ -1026,7 +1026,7 @@ prof_gdump(void) * allocation. */ prof_tdata = *prof_tdata_tsd_get(); - if (prof_tdata == NULL) + if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) return; if (prof_tdata->enq) { prof_tdata->enq_gdump = true; @@ -1141,21 +1141,41 @@ prof_tdata_cleanup(void *arg) cassert(config_prof); - /* - * Delete the hash table. All of its contents can still be iterated - * over via the LRU. - */ - ckh_delete(&prof_tdata->bt2cnt); - - /* Iteratively merge cnt's into the global stats and delete them. */ - while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) { - ql_remove(&prof_tdata->lru_ql, cnt, lru_link); - prof_ctx_merge(cnt->ctx, cnt); - idalloc(cnt); + if (prof_tdata == PROF_TDATA_STATE_REINCARNATED) { + /* + * Another destructor deallocated memory after this destructor + * was called. Reset prof_tdata to PROF_TDATA_STATE_PURGATORY + * in order to receive another callback. + */ + prof_tdata = PROF_TDATA_STATE_PURGATORY; + prof_tdata_tsd_set(&prof_tdata); + } else if (prof_tdata == PROF_TDATA_STATE_PURGATORY) { + /* + * The previous time this destructor was called, we set the key + * to PROF_TDATA_STATE_PURGATORY so that other destructors + * wouldn't cause re-creation of the prof_tdata. This time, do + * nothing, so that the destructor will not be called again. + */ + } else if (prof_tdata != NULL) { + /* + * Delete the hash table. All of its contents can still be + * iterated over via the LRU. + */ + ckh_delete(&prof_tdata->bt2cnt); + /* + * Iteratively merge cnt's into the global stats and delete + * them. + */ + while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) { + ql_remove(&prof_tdata->lru_ql, cnt, lru_link); + prof_ctx_merge(cnt->ctx, cnt); + idalloc(cnt); + } + idalloc(prof_tdata->vec); + idalloc(prof_tdata); + prof_tdata = PROF_TDATA_STATE_PURGATORY; + prof_tdata_tsd_set(&prof_tdata); } - - idalloc(prof_tdata->vec); - idalloc(prof_tdata); } void From f27899402914065a6c1484ea8d81a2c8b70aa659 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 28 Apr 2012 23:27:13 -0700 Subject: [PATCH 0205/3378] Fix more prof_tdata resurrection corner cases. --- include/jemalloc/internal/prof.h | 12 +++++++----- src/prof.c | 4 ++-- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index b165e196..41a66923 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -235,8 +235,11 @@ bool prof_boot2(void); assert(size == s2u(size)); \ \ prof_tdata = prof_tdata_get(); \ - if (prof_tdata == NULL) { \ - ret = NULL; \ + if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) { \ + if (prof_tdata != NULL) \ + ret = (prof_thr_cnt_t *)(uintptr_t)1U; \ + else \ + ret = NULL; \ break; \ } \ \ @@ -308,8 +311,6 @@ prof_tdata_get(void) if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) { if (prof_tdata == NULL) prof_tdata = prof_tdata_init(); - else - prof_tdata = NULL; } return (prof_tdata); @@ -394,7 +395,8 @@ prof_sample_accum_update(size_t size) assert(opt_lg_prof_sample != 0); prof_tdata = *prof_tdata_tsd_get(); - assert((uintptr_t)prof_tdata > (uintptr_t)PROF_TDATA_STATE_MAX); + if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) + return (true); /* Take care to avoid integer overflow. */ if (size >= prof_tdata->threshold - prof_tdata->accum) { diff --git a/src/prof.c b/src/prof.c index e3549662..de1d3929 100644 --- a/src/prof.c +++ b/src/prof.c @@ -440,7 +440,7 @@ prof_lookup(prof_bt_t *bt) cassert(config_prof); prof_tdata = prof_tdata_get(); - if (prof_tdata == NULL) + if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) return (NULL); if (ckh_search(&prof_tdata->bt2cnt, bt, NULL, &ret.v)) { @@ -846,7 +846,7 @@ prof_dump(bool propagate_err, const char *filename, bool leakcheck) cassert(config_prof); prof_tdata = prof_tdata_get(); - if (prof_tdata == NULL) + if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) return (true); prof_enter(prof_tdata); prof_dump_fd = creat(filename, 0644); From 8b49971d0ce0819af78aa2a278c26ecb298ee134 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Tue, 24 Apr 2012 23:22:02 +0200 Subject: [PATCH 0206/3378] Avoid variable length arrays and remove declarations within code MSVC doesn't support C99, and building as C++ to be able to use them is dangerous, as C++ and C99 are incompatible. Introduce a VARIABLE_ARRAY macro that either uses VLA when supported, or alloca() otherwise. Note that using alloca() inside loops doesn't quite work like VLAs, thus the use of VARIABLE_ARRAY there is discouraged. It might be worth investigating ways to check whether VARIABLE_ARRAY is used in such context at runtime in debug builds and bail out if that happens. --- .../jemalloc/internal/jemalloc_internal.h.in | 14 ++++++++++++++ include/jemalloc/internal/prof.h | 3 ++- src/arena.c | 18 ++++++++++-------- src/ctl.c | 4 ++-- src/stats.c | 4 ++-- src/tsd.c | 2 +- test/bitmap.c | 16 ++++++++++++---- 7 files changed, 43 insertions(+), 18 deletions(-) diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 691f50a9..a364d7a3 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -319,6 +319,20 @@ static const bool config_ivsalloc = #define ALIGNMENT_CEILING(s, alignment) \ (((s) + (alignment - 1)) & (-(alignment))) +/* Declare a variable length array */ +#if __STDC_VERSION__ < 199901L +# ifdef _MSC_VER +# include +# define alloca _alloca +# else +# include +# endif +# define VARIABLE_ARRAY(type, name, count) \ + type *name = alloca(sizeof(type) * count) +#else +# define VARIABLE_ARRAY(type, name, count) type name[count] +#endif + #ifdef JEMALLOC_VALGRIND /* * The JEMALLOC_VALGRIND_*() macros must be macros rather than functions diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index 41a66923..c3e3f9e4 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -542,8 +542,9 @@ prof_free(const void *ptr, size_t size) cassert(config_prof); if ((uintptr_t)ctx > (uintptr_t)1) { + prof_thr_cnt_t *tcnt; assert(size == isalloc(ptr, true)); - prof_thr_cnt_t *tcnt = prof_lookup(ctx->bt); + tcnt = prof_lookup(ctx->bt); if (tcnt != NULL) { tcnt->epoch++; diff --git a/src/arena.c b/src/arena.c index f13b5e1e..7fac3619 100644 --- a/src/arena.c +++ b/src/arena.c @@ -640,14 +640,14 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) if (mapelm->bits & CHUNK_MAP_LARGE) pageind += mapelm->bits >> LG_PAGE; else { + size_t binind; + arena_bin_info_t *bin_info; arena_run_t *run = (arena_run_t *)((uintptr_t) chunk + (uintptr_t)(pageind << LG_PAGE)); assert((mapelm->bits >> LG_PAGE) == 0); - size_t binind = arena_bin_index(arena, - run->bin); - arena_bin_info_t *bin_info = - &arena_bin_info[binind]; + binind = arena_bin_index(arena, run->bin); + bin_info = &arena_bin_info[binind]; pageind += bin_info->run_size >> LG_PAGE; } } @@ -1056,11 +1056,12 @@ arena_bin_runs_first(arena_bin_t *bin) if (mapelm != NULL) { arena_chunk_t *chunk; size_t pageind; + arena_run_t *run; chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(mapelm); pageind = ((((uintptr_t)mapelm - (uintptr_t)chunk->map) / sizeof(arena_chunk_map_t))) + map_bias; - arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + + run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapelm->bits >> LG_PAGE)) << LG_PAGE)); return (run); @@ -1596,14 +1597,15 @@ arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, size_t pageind; arena_run_t *run; arena_bin_t *bin; - size_t size; + arena_bin_info_t *bin_info; + size_t size, binind; pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapelm->bits >> LG_PAGE)) << LG_PAGE)); bin = run->bin; - size_t binind = arena_bin_index(arena, bin); - arena_bin_info_t *bin_info = &arena_bin_info[binind]; + binind = arena_bin_index(arena, bin); + bin_info = &arena_bin_info[binind]; if (config_fill || config_stats) size = bin_info->reg_size; diff --git a/src/ctl.c b/src/ctl.c index 4b41d1d3..27346045 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -520,7 +520,7 @@ static void ctl_refresh(void) { unsigned i; - arena_t *tarenas[narenas]; + VARIABLE_ARRAY(arena_t *, tarenas, narenas); if (config_stats) { malloc_mutex_lock(&chunks_mtx); @@ -1232,7 +1232,7 @@ arenas_purge_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, ret = EFAULT; goto label_return; } else { - arena_t *tarenas[narenas]; + VARIABLE_ARRAY(arena_t *, tarenas, narenas); malloc_mutex_lock(&arenas_lock); memcpy(tarenas, arenas, sizeof(arena_t *) * narenas); diff --git a/src/stats.c b/src/stats.c index 08f7098c..2854b309 100644 --- a/src/stats.c +++ b/src/stats.c @@ -498,7 +498,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, CTL_GET("arenas.narenas", &narenas, unsigned); { - bool initialized[narenas]; + VARIABLE_ARRAY(bool, initialized, narenas); size_t isz; unsigned i, ninitialized; @@ -527,7 +527,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, CTL_GET("arenas.narenas", &narenas, unsigned); { - bool initialized[narenas]; + VARIABLE_ARRAY(bool, initialized, narenas); size_t isz; unsigned i; diff --git a/src/tsd.c b/src/tsd.c index 09f06e88..d7714b02 100644 --- a/src/tsd.c +++ b/src/tsd.c @@ -36,7 +36,7 @@ JEMALLOC_ATTR(visibility("default")) void _malloc_thread_cleanup(void) { - bool pending[ncleanups], again; + bool pending[MALLOC_TSD_CLEANUPS_MAX], again; unsigned i; for (i = 0; i < ncleanups; i++) diff --git a/test/bitmap.c b/test/bitmap.c index ff50ecb3..b2cb6300 100644 --- a/test/bitmap.c +++ b/test/bitmap.c @@ -30,11 +30,13 @@ test_bitmap_init(void) bitmap_info_init(&binfo, i); { size_t j; - bitmap_t bitmap[bitmap_info_ngroups(&binfo)]; + bitmap_t *bitmap = malloc(sizeof(bitmap_t) * + bitmap_info_ngroups(&binfo)); bitmap_init(bitmap, &binfo); for (j = 0; j < i; j++) assert(bitmap_get(bitmap, &binfo, j) == false); + free(bitmap); } } @@ -50,12 +52,14 @@ test_bitmap_set(void) bitmap_info_init(&binfo, i); { size_t j; - bitmap_t bitmap[bitmap_info_ngroups(&binfo)]; + bitmap_t *bitmap = malloc(sizeof(bitmap_t) * + bitmap_info_ngroups(&binfo)); bitmap_init(bitmap, &binfo); for (j = 0; j < i; j++) bitmap_set(bitmap, &binfo, j); assert(bitmap_full(bitmap, &binfo)); + free(bitmap); } } } @@ -70,7 +74,8 @@ test_bitmap_unset(void) bitmap_info_init(&binfo, i); { size_t j; - bitmap_t bitmap[bitmap_info_ngroups(&binfo)]; + bitmap_t *bitmap = malloc(sizeof(bitmap_t) * + bitmap_info_ngroups(&binfo)); bitmap_init(bitmap, &binfo); for (j = 0; j < i; j++) @@ -81,6 +86,7 @@ test_bitmap_unset(void) for (j = 0; j < i; j++) bitmap_set(bitmap, &binfo, j); assert(bitmap_full(bitmap, &binfo)); + free(bitmap); } } } @@ -95,7 +101,8 @@ test_bitmap_sfu(void) bitmap_info_init(&binfo, i); { ssize_t j; - bitmap_t bitmap[bitmap_info_ngroups(&binfo)]; + bitmap_t *bitmap = malloc(sizeof(bitmap_t) * + bitmap_info_ngroups(&binfo)); bitmap_init(bitmap, &binfo); /* Iteratively set bits starting at the beginning. */ @@ -125,6 +132,7 @@ test_bitmap_sfu(void) } assert(bitmap_sfu(bitmap, &binfo) == i - 1); assert(bitmap_full(bitmap, &binfo)); + free(bitmap); } } } From af04b744bda40842631d80ad04e1510308b13e54 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Mon, 30 Apr 2012 12:38:25 +0200 Subject: [PATCH 0207/3378] Remove the VOID macro Windows headers define a VOID macro. --- src/ctl.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/ctl.c b/src/ctl.c index 27346045..dddf3bee 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -842,11 +842,6 @@ ctl_boot(void) } \ } while (0) -#define VOID() do { \ - READONLY(); \ - WRITEONLY(); \ -} while (0) - #define READ(v, t) do { \ if (oldp != NULL && oldlenp != NULL) { \ if (*oldlenp != sizeof(t)) { \ @@ -1049,7 +1044,8 @@ thread_tcache_flush_ctl(const size_t *mib, size_t miblen, void *oldp, if (config_tcache == false) return (ENOENT); - VOID(); + READONLY(); + WRITEONLY(); tcache_flush(); From a14bce85e885f83c96116cc5438ae52d740f3727 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Mon, 30 Apr 2012 12:38:26 +0200 Subject: [PATCH 0208/3378] Use Get/SetLastError on Win32 Using errno on win32 doesn't quite work, because the value set in a shared library can't be read from e.g. an executable calling the function setting errno. At the same time, since buferror always uses errno/GetLastError, don't pass it. --- .../jemalloc/internal/jemalloc_internal.h.in | 12 ++++++-- include/jemalloc/internal/util.h | 28 ++++++++++++++++++- src/chunk_mmap.c | 4 +-- src/huge.c | 2 +- src/jemalloc.c | 18 ++++++------ src/util.c | 15 +++++----- test/aligned_alloc.c | 25 +++++++++-------- 7 files changed, 71 insertions(+), 33 deletions(-) diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index a364d7a3..d4c4b4cb 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -1,8 +1,17 @@ #ifndef JEMALLOC_INTERNAL_H #define JEMALLOC_INTERNAL_H #include +#include #ifdef _WIN32 # include +# define ENOENT ERROR_PATH_NOT_FOUND +# define EINVAL ERROR_BAD_ARGUMENTS +# define EAGAIN ERROR_OUTOFMEMORY +# define EPERM ERROR_WRITE_FAULT +# define EFAULT ERROR_INVALID_ADDRESS +# define ENOMEM ERROR_NOT_ENOUGH_MEMORY +# undef ERANGE +# define ERANGE ERROR_INVALID_DATA #else # include # include @@ -11,10 +20,10 @@ # endif # include # include +# include #endif #include -#include #include #ifndef SIZE_T_MAX # define SIZE_T_MAX SIZE_MAX @@ -34,7 +43,6 @@ #include #include #include -#include #define JEMALLOC_NO_DEMANGLE #include "../jemalloc@install_suffix@.h" diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h index d360ae3f..9661c7b1 100644 --- a/include/jemalloc/internal/util.h +++ b/include/jemalloc/internal/util.h @@ -84,7 +84,7 @@ extern void (*je_malloc_message)(void *wcbopaque, const char *s); -int buferror(int errnum, char *buf, size_t buflen); +int buferror(char *buf, size_t buflen); uintmax_t malloc_strtoumax(const char *nptr, char **endptr, int base); /* @@ -109,6 +109,8 @@ void malloc_printf(const char *format, ...) #ifndef JEMALLOC_ENABLE_INLINE size_t pow2_ceil(size_t x); void malloc_write(const char *s); +void set_errno(int errnum); +int get_errno(void); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_UTIL_C_)) @@ -140,6 +142,30 @@ malloc_write(const char *s) je_malloc_message(NULL, s); } + +/* Sets error code */ +JEMALLOC_INLINE void +set_errno(int errnum) +{ + +#ifdef _WIN32 + SetLastError(errnum); +#else + errno = errnum; +#endif +} + +/* Get last error code */ +JEMALLOC_INLINE int +get_errno(void) +{ + +#ifdef _WIN32 + return GetLastError(); +#else + return errno; +#endif +} #endif #endif /* JEMALLOC_H_INLINES */ diff --git a/src/chunk_mmap.c b/src/chunk_mmap.c index 0ad65a10..9f388d28 100644 --- a/src/chunk_mmap.c +++ b/src/chunk_mmap.c @@ -41,7 +41,7 @@ pages_map(void *addr, size_t size) if (munmap(ret, size) == -1) { char buf[BUFERROR_BUF]; - buferror(errno, buf, sizeof(buf)); + buferror(buf, sizeof(buf)); malloc_printf(": Error in " #ifdef _WIN32 "VirtualFree" diff --git a/src/huge.c b/src/huge.c index 23eb074a..67b282d1 100644 --- a/src/huge.c +++ b/src/huge.c @@ -168,7 +168,7 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, */ char buf[BUFERROR_BUF]; - buferror(errno, buf, sizeof(buf)); + buferror(buf, sizeof(buf)); malloc_printf(": Error in mremap(): %s\n", buf); if (opt_abort) diff --git a/src/jemalloc.c b/src/jemalloc.c index 52296e07..cae0098b 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -472,9 +472,9 @@ malloc_conf_init(void) uintmax_t um; \ char *end; \ \ - errno = 0; \ + set_errno(0); \ um = malloc_strtoumax(v, &end, 0); \ - if (errno != 0 || (uintptr_t)end - \ + if (get_errno() != 0 || (uintptr_t)end -\ (uintptr_t)v != vlen) { \ malloc_conf_error( \ "Invalid conf value", \ @@ -493,9 +493,9 @@ malloc_conf_init(void) long l; \ char *end; \ \ - errno = 0; \ + set_errno(0); \ l = strtol(v, &end, 0); \ - if (errno != 0 || (uintptr_t)end - \ + if (get_errno() != 0 || (uintptr_t)end -\ (uintptr_t)v != vlen) { \ malloc_conf_error( \ "Invalid conf value", \ @@ -831,7 +831,7 @@ label_oom: "out of memory\n"); abort(); } - errno = ENOMEM; + set_errno(ENOMEM); } if (config_prof && opt_prof && ret != NULL) prof_malloc(ret, usize, cnt); @@ -959,7 +959,7 @@ je_aligned_alloc(size_t alignment, size_t size) if ((err = imemalign(&ret, alignment, size, 1)) != 0) { ret = NULL; - errno = err; + set_errno(err); } JEMALLOC_VALGRIND_MALLOC(err == 0, ret, isalloc(ret, config_prof), false); @@ -1029,7 +1029,7 @@ label_return: "memory\n"); abort(); } - errno = ENOMEM; + set_errno(ENOMEM); } if (config_prof && opt_prof && ret != NULL) @@ -1130,7 +1130,7 @@ label_oom: "out of memory\n"); abort(); } - errno = ENOMEM; + set_errno(ENOMEM); } } else { /* realloc(NULL, size) is equivalent to malloc(size). */ @@ -1172,7 +1172,7 @@ label_oom: "out of memory\n"); abort(); } - errno = ENOMEM; + set_errno(ENOMEM); } } diff --git a/src/util.c b/src/util.c index 2042329c..64d53dd9 100644 --- a/src/util.c +++ b/src/util.c @@ -65,7 +65,7 @@ void (*je_malloc_message)(void *, const char *s) * provide a wrapper. */ int -buferror(int errnum, char *buf, size_t buflen) +buferror(char *buf, size_t buflen) { #ifdef _WIN32 @@ -93,7 +93,7 @@ malloc_strtoumax(const char *nptr, char **endptr, int base) const char *p, *ns; if (base < 0 || base == 1 || base > 36) { - errno = EINVAL; + set_errno(EINVAL); return (UINTMAX_MAX); } b = base; @@ -168,7 +168,7 @@ malloc_strtoumax(const char *nptr, char **endptr, int base) ret += digit; if (ret < pret) { /* Overflow. */ - errno = ERANGE; + set_errno(ERANGE); return (UINTMAX_MAX); } p++; @@ -416,9 +416,9 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { uintmax_t uwidth; - errno = 0; + set_errno(0); uwidth = malloc_strtoumax(f, (char **)&f, 10); - assert(uwidth != UINTMAX_MAX || errno != + assert(uwidth != UINTMAX_MAX || get_errno() != ERANGE); width = (int)uwidth; if (*f == '.') { @@ -442,9 +442,10 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { uintmax_t uprec; - errno = 0; + set_errno(0); uprec = malloc_strtoumax(f, (char **)&f, 10); - assert(uprec != UINTMAX_MAX || errno != ERANGE); + assert(uprec != UINTMAX_MAX || get_errno() != + ERANGE); prec = (int)uprec; break; } diff --git a/test/aligned_alloc.c b/test/aligned_alloc.c index 81caa0ad..5a9b0cae 100644 --- a/test/aligned_alloc.c +++ b/test/aligned_alloc.c @@ -17,18 +17,18 @@ main(void) /* Test error conditions. */ alignment = 0; - errno = 0; + set_errno(0); p = aligned_alloc(alignment, 1); - if (p != NULL || errno != EINVAL) { + if (p != NULL || get_errno() != EINVAL) { malloc_printf( "Expected error for invalid alignment %zu\n", alignment); } for (alignment = sizeof(size_t); alignment < MAXALIGN; alignment <<= 1) { - errno = 0; + set_errno(0); p = aligned_alloc(alignment + 1, 1); - if (p != NULL || errno != EINVAL) { + if (p != NULL || get_errno() != EINVAL) { malloc_printf( "Expected error for invalid alignment %zu\n", alignment + 1); @@ -42,9 +42,9 @@ main(void) alignment = 0x80000000LU; size = 0x80000000LU; #endif - errno = 0; + set_errno(0); p = aligned_alloc(alignment, size); - if (p != NULL || errno != ENOMEM) { + if (p != NULL || get_errno() != ENOMEM) { malloc_printf( "Expected error for aligned_alloc(%zu, %zu)\n", alignment, size); @@ -57,9 +57,9 @@ main(void) alignment = 0x40000000LU; size = 0x84000001LU; #endif - errno = 0; + set_errno(0); p = aligned_alloc(alignment, size); - if (p != NULL || errno != ENOMEM) { + if (p != NULL || get_errno() != ENOMEM) { malloc_printf( "Expected error for aligned_alloc(%zu, %zu)\n", alignment, size); @@ -71,9 +71,9 @@ main(void) #else size = 0xfffffff0LU; #endif - errno = 0; + set_errno(0); p = aligned_alloc(alignment, size); - if (p != NULL || errno != ENOMEM) { + if (p != NULL || get_errno() != ENOMEM) { malloc_printf( "Expected error for aligned_alloc(&p, %zu, %zu)\n", alignment, size); @@ -93,9 +93,12 @@ main(void) for (i = 0; i < NITER; i++) { ps[i] = aligned_alloc(alignment, size); if (ps[i] == NULL) { + char buf[BUFERROR_BUF]; + + buferror(buf, sizeof(buf)); malloc_printf( "Error for size %zu (%#zx): %s\n", - size, size, strerror(errno)); + size, size, buf); exit(1); } total += malloc_usable_size(ps[i]); From 7cdea3973cab8640d1f44c7638ed5e30ed18be24 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Mon, 30 Apr 2012 12:38:27 +0200 Subject: [PATCH 0209/3378] Few configure.ac adjustments - Use the extensions autoconf finds for object and executable files. - Remove the sorev variable, and replace SOREV definition with sorev's. - Default to je_ prefix on win32. --- bin/jemalloc.sh.in | 2 +- configure.ac | 18 ++++++------------ include/jemalloc/internal/util.h | 4 ++-- 3 files changed, 9 insertions(+), 15 deletions(-) diff --git a/bin/jemalloc.sh.in b/bin/jemalloc.sh.in index 58683f5d..cdf36737 100644 --- a/bin/jemalloc.sh.in +++ b/bin/jemalloc.sh.in @@ -4,6 +4,6 @@ prefix=@prefix@ exec_prefix=@exec_prefix@ libdir=@libdir@ -@LD_PRELOAD_VAR@=${libdir}/libjemalloc.@sorev@ +@LD_PRELOAD_VAR@=${libdir}/libjemalloc.@SOREV@ export @LD_PRELOAD_VAR@ exec "$@" diff --git a/configure.ac b/configure.ac index 98211c8c..6e74238f 100644 --- a/configure.ac +++ b/configure.ac @@ -195,14 +195,13 @@ AC_DEFINE_UNQUOTED([CPU_SPINWAIT], [$CPU_SPINWAIT]) LD_PRELOAD_VAR="LD_PRELOAD" so="so" -o="o" +o="$ac_objext" a="a" -exe= +exe="$ac_exeext" libprefix="lib" DSO_LDFLAGS='-shared -Wl,-soname,$(@F)' RPATH='-Wl,-rpath,$(1)' -SOREV='$(SO).$(REV)' -sorev="${so}.${rev}" +SOREV="${so}.${rev}" PIC_CFLAGS='-fPIC -DPIC' dnl Heap profiling uses the log(3) function. @@ -226,8 +225,7 @@ case "${host}" in so="dylib" force_tls="0" DSO_LDFLAGS='-shared -Wl,-dylib_install_name,$(@F)' - SOREV='$(REV).$(SO)' - sorev="${rev}.${so}" + SOREV="${rev}.${so}" ;; *-*-freebsd*) CFLAGS="$CFLAGS" @@ -281,12 +279,9 @@ case "${host}" in RPATH="" so="dll" DSO_LDFLAGS="-shared" - o="obj" a="lib" libprefix="" - exe=".exe" - SOREV='$(SO)' - sorev="${so}" + SOREV="${so}" PIC_CFLAGS="" ;; *) @@ -304,7 +299,6 @@ AC_SUBST([exe]) AC_SUBST([libprefix]) AC_SUBST([DSO_LDFLAGS]) AC_SUBST([SOREV]) -AC_SUBST([sorev]) AC_SUBST([PIC_CFLAGS]) JE_COMPILABLE([__attribute__ syntax], @@ -419,7 +413,7 @@ dnl Do not prefix public APIs by default. AC_ARG_WITH([jemalloc_prefix], [AS_HELP_STRING([--with-jemalloc-prefix=], [Prefix to prepend to all public APIs])], [JEMALLOC_PREFIX="$with_jemalloc_prefix"], - [if test "x$abi" != "xmacho" ; then + [if test "x$abi" != "xmacho" -a "x$abi" != "xpecoff"; then JEMALLOC_PREFIX="" else JEMALLOC_PREFIX="je_" diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h index 9661c7b1..d84c7a98 100644 --- a/include/jemalloc/internal/util.h +++ b/include/jemalloc/internal/util.h @@ -161,9 +161,9 @@ get_errno(void) { #ifdef _WIN32 - return GetLastError(); + return (GetLastError()); #else - return errno; + return (errno); #endif } #endif From da99e31105eb709ef4ec8a120b115c32a6b9723a Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Mon, 30 Apr 2012 12:38:29 +0200 Subject: [PATCH 0210/3378] Replace JEMALLOC_ATTR with various different macros when it makes sense Theses newly added macros will be used to implement the equivalent under MSVC. Also, move the definitions to headers, where they make more sense, and for some, are even more useful there (e.g. malloc). --- include/jemalloc/internal/util.h | 2 -- include/jemalloc/jemalloc.h.in | 49 ++++++++++++++------------ include/jemalloc/jemalloc_defs.h.in | 14 +++++--- src/arena.c | 2 +- src/jemalloc.c | 53 +++++------------------------ src/mutex.c | 5 ++- src/tsd.c | 6 ++-- src/util.c | 7 ++-- 8 files changed, 56 insertions(+), 82 deletions(-) diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h index d84c7a98..fa88bf3f 100644 --- a/include/jemalloc/internal/util.h +++ b/include/jemalloc/internal/util.h @@ -82,8 +82,6 @@ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -extern void (*je_malloc_message)(void *wcbopaque, const char *s); - int buferror(char *buf, size_t buflen); uintmax_t malloc_strtoumax(const char *nptr, char **endptr, int base); diff --git a/include/jemalloc/jemalloc.h.in b/include/jemalloc/jemalloc.h.in index f0581dbd..47a4b9b7 100644 --- a/include/jemalloc/jemalloc.h.in +++ b/include/jemalloc/jemalloc.h.in @@ -36,35 +36,40 @@ extern "C" { * namespace management, and should be omitted in application code unless * JEMALLOC_NO_DEMANGLE is defined (see below). */ -extern const char *je_malloc_conf; -extern void (*je_malloc_message)(void *, const char *); +extern JEMALLOC_EXPORT const char *je_malloc_conf; +extern JEMALLOC_EXPORT void (*je_malloc_message)(void *cbopaque, + const char *s); -void *je_malloc(size_t size) JEMALLOC_ATTR(malloc); -void *je_calloc(size_t num, size_t size) JEMALLOC_ATTR(malloc); -int je_posix_memalign(void **memptr, size_t alignment, size_t size) - JEMALLOC_ATTR(nonnull(1)); -void *je_aligned_alloc(size_t alignment, size_t size) JEMALLOC_ATTR(malloc); -void *je_realloc(void *ptr, size_t size); -void je_free(void *ptr); +JEMALLOC_EXPORT void *je_malloc(size_t size) JEMALLOC_ATTR(malloc); +JEMALLOC_EXPORT void *je_calloc(size_t num, size_t size) + JEMALLOC_ATTR(malloc); +JEMALLOC_EXPORT int je_posix_memalign(void **memptr, size_t alignment, + size_t size) JEMALLOC_ATTR(nonnull(1)); +JEMALLOC_EXPORT void *je_aligned_alloc(size_t alignment, size_t size) + JEMALLOC_ATTR(malloc); +JEMALLOC_EXPORT void *je_realloc(void *ptr, size_t size); +JEMALLOC_EXPORT void je_free(void *ptr); -size_t je_malloc_usable_size(const void *ptr); -void je_malloc_stats_print(void (*write_cb)(void *, const char *), - void *je_cbopaque, const char *opts); -int je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp, - size_t newlen); -int je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp); -int je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, +JEMALLOC_EXPORT size_t je_malloc_usable_size(const void *ptr); +JEMALLOC_EXPORT void je_malloc_stats_print(void (*write_cb)(void *, + const char *), void *je_cbopaque, const char *opts); +JEMALLOC_EXPORT int je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp, size_t newlen); +JEMALLOC_EXPORT int je_mallctlnametomib(const char *name, size_t *mibp, + size_t *miblenp); +JEMALLOC_EXPORT int je_mallctlbymib(const size_t *mib, size_t miblen, + void *oldp, size_t *oldlenp, void *newp, size_t newlen); #ifdef JEMALLOC_EXPERIMENTAL -int je_allocm(void **ptr, size_t *rsize, size_t size, int flags) - JEMALLOC_ATTR(nonnull(1)); -int je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, +JEMALLOC_EXPORT int je_allocm(void **ptr, size_t *rsize, size_t size, int flags) JEMALLOC_ATTR(nonnull(1)); -int je_sallocm(const void *ptr, size_t *rsize, int flags) +JEMALLOC_EXPORT int je_rallocm(void **ptr, size_t *rsize, size_t size, + size_t extra, int flags) JEMALLOC_ATTR(nonnull(1)); +JEMALLOC_EXPORT int je_sallocm(const void *ptr, size_t *rsize, int flags) JEMALLOC_ATTR(nonnull(1)); -int je_dallocm(void *ptr, int flags) JEMALLOC_ATTR(nonnull(1)); -int je_nallocm(size_t *rsize, size_t size, int flags); +JEMALLOC_EXPORT int je_dallocm(void *ptr, int flags) + JEMALLOC_ATTR(nonnull(1)); +JEMALLOC_EXPORT int je_nallocm(size_t *rsize, size_t size, int flags); #endif /* diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index 6e816557..126f6b76 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -104,11 +104,17 @@ /* Defined if __attribute__((...)) syntax is supported. */ #undef JEMALLOC_HAVE_ATTR #ifdef JEMALLOC_HAVE_ATTR -# define JEMALLOC_CATTR(s, a) __attribute__((s)) -# define JEMALLOC_ATTR(s) JEMALLOC_CATTR(s,) +# define JEMALLOC_ATTR(s) __attribute__((s)) +# define JEMALLOC_EXPORT JEMALLOC_ATTR(visibility("default")) +# define JEMALLOC_ALIGNED(s) JEMALLOC_ATTR(aligned(s)) +# define JEMALLOC_SECTION(s) JEMALLOC_ATTR(section(s)) +# define JEMALLOC_NOINLINE JEMALLOC_ATTR(noinline) #else -# define JEMALLOC_CATTR(s, a) a -# define JEMALLOC_ATTR(s) JEMALLOC_CATTR(s,) +# define JEMALLOC_ATTR(s) +# define JEMALLOC_EXPORT +# define JEMALLOC_ALIGNED(s) +# define JEMALLOC_SECTION(s) +# define JEMALLOC_NOINLINE #endif /* Defined if sbrk() is supported. */ diff --git a/src/arena.c b/src/arena.c index 7fac3619..51c268c2 100644 --- a/src/arena.c +++ b/src/arena.c @@ -7,7 +7,7 @@ ssize_t opt_lg_dirty_mult = LG_DIRTY_MULT_DEFAULT; arena_bin_info_t arena_bin_info[NBINS]; -JEMALLOC_ATTR(aligned(CACHELINE)) +JEMALLOC_ALIGNED(CACHELINE) const uint8_t small_size2bin[] = { #define S2B_8(i) i, #define S2B_16(i) S2B_8(i) S2B_8(i) diff --git a/src/jemalloc.c b/src/jemalloc.c index cae0098b..2f858c3c 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -9,7 +9,7 @@ malloc_tsd_data(, thread_allocated, thread_allocated_t, THREAD_ALLOCATED_INITIALIZER) /* Runtime configuration options. */ -const char *je_malloc_conf JEMALLOC_ATTR(visibility("default")); +const char *je_malloc_conf; #ifdef JEMALLOC_DEBUG bool opt_abort = true; # ifdef JEMALLOC_FILL @@ -787,8 +787,6 @@ malloc_init_hard(void) * Begin malloc(3)-compatible functions. */ -JEMALLOC_ATTR(malloc) -JEMALLOC_ATTR(visibility("default")) void * je_malloc(size_t size) { @@ -938,8 +936,6 @@ label_return: return (ret); } -JEMALLOC_ATTR(nonnull(1)) -JEMALLOC_ATTR(visibility("default")) int je_posix_memalign(void **memptr, size_t alignment, size_t size) { @@ -949,8 +945,6 @@ je_posix_memalign(void **memptr, size_t alignment, size_t size) return (ret); } -JEMALLOC_ATTR(malloc) -JEMALLOC_ATTR(visibility("default")) void * je_aligned_alloc(size_t alignment, size_t size) { @@ -966,8 +960,6 @@ je_aligned_alloc(size_t alignment, size_t size) return (ret); } -JEMALLOC_ATTR(malloc) -JEMALLOC_ATTR(visibility("default")) void * je_calloc(size_t num, size_t size) { @@ -1043,7 +1035,6 @@ label_return: return (ret); } -JEMALLOC_ATTR(visibility("default")) void * je_realloc(void *ptr, size_t size) { @@ -1191,7 +1182,6 @@ label_return: return (ret); } -JEMALLOC_ATTR(visibility("default")) void je_free(void *ptr) { @@ -1226,8 +1216,6 @@ je_free(void *ptr) */ #ifdef JEMALLOC_OVERRIDE_MEMALIGN -JEMALLOC_ATTR(malloc) -JEMALLOC_ATTR(visibility("default")) void * je_memalign(size_t alignment, size_t size) { @@ -1239,8 +1227,6 @@ je_memalign(size_t alignment, size_t size) #endif #ifdef JEMALLOC_OVERRIDE_VALLOC -JEMALLOC_ATTR(malloc) -JEMALLOC_ATTR(visibility("default")) void * je_valloc(size_t size) { @@ -1269,17 +1255,12 @@ je_valloc(size_t size) * passed an extra argument for the caller return address, which will be * ignored. */ -JEMALLOC_ATTR(visibility("default")) -void (* const __free_hook)(void *ptr) = je_free; - -JEMALLOC_ATTR(visibility("default")) -void *(* const __malloc_hook)(size_t size) = je_malloc; - -JEMALLOC_ATTR(visibility("default")) -void *(* const __realloc_hook)(void *ptr, size_t size) = je_realloc; - -JEMALLOC_ATTR(visibility("default")) -void *(* const __memalign_hook)(size_t alignment, size_t size) = je_memalign; +JEMALLOC_EXPORT void (* const __free_hook)(void *ptr) = je_free; +JEMALLOC_EXPORT void *(* const __malloc_hook)(size_t size) = je_malloc; +JEMALLOC_EXPORT void *(* const __realloc_hook)(void *ptr, size_t size) = + je_realloc; +JEMALLOC_EXPORT void *(* const __memalign_hook)(size_t alignment, size_t size) = + je_memalign; #endif /* @@ -1290,7 +1271,6 @@ void *(* const __memalign_hook)(size_t alignment, size_t size) = je_memalign; * Begin non-standard functions. */ -JEMALLOC_ATTR(visibility("default")) size_t je_malloc_usable_size(const void *ptr) { @@ -1306,7 +1286,6 @@ je_malloc_usable_size(const void *ptr) return (ret); } -JEMALLOC_ATTR(visibility("default")) void je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque, const char *opts) @@ -1315,7 +1294,6 @@ je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque, stats_print(write_cb, cbopaque, opts); } -JEMALLOC_ATTR(visibility("default")) int je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp, size_t newlen) @@ -1327,7 +1305,6 @@ je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp, return (ctl_byname(name, oldp, oldlenp, newp, newlen)); } -JEMALLOC_ATTR(visibility("default")) int je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp) { @@ -1338,7 +1315,6 @@ je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp) return (ctl_nametomib(name, mibp, miblenp)); } -JEMALLOC_ATTR(visibility("default")) int je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) @@ -1374,8 +1350,6 @@ iallocm(size_t usize, size_t alignment, bool zero) return (imalloc(usize)); } -JEMALLOC_ATTR(nonnull(1)) -JEMALLOC_ATTR(visibility("default")) int je_allocm(void **ptr, size_t *rsize, size_t size, int flags) { @@ -1444,8 +1418,6 @@ label_oom: return (ALLOCM_ERR_OOM); } -JEMALLOC_ATTR(nonnull(1)) -JEMALLOC_ATTR(visibility("default")) int je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) { @@ -1555,8 +1527,6 @@ label_oom: return (ALLOCM_ERR_OOM); } -JEMALLOC_ATTR(nonnull(1)) -JEMALLOC_ATTR(visibility("default")) int je_sallocm(const void *ptr, size_t *rsize, int flags) { @@ -1576,8 +1546,6 @@ je_sallocm(const void *ptr, size_t *rsize, int flags) return (ALLOCM_SUCCESS); } -JEMALLOC_ATTR(nonnull(1)) -JEMALLOC_ATTR(visibility("default")) int je_dallocm(void *ptr, int flags) { @@ -1605,7 +1573,6 @@ je_dallocm(void *ptr, int flags) return (ALLOCM_SUCCESS); } -JEMALLOC_ATTR(visibility("default")) int je_nallocm(size_t *rsize, size_t size, int flags) { @@ -1641,8 +1608,7 @@ je_nallocm(size_t *rsize, size_t size, int flags) void jemalloc_prefork(void) #else -JEMALLOC_ATTR(visibility("default")) -void +JEMALLOC_EXPORT void _malloc_prefork(void) #endif { @@ -1663,8 +1629,7 @@ _malloc_prefork(void) void jemalloc_postfork_parent(void) #else -JEMALLOC_ATTR(visibility("default")) -void +JEMALLOC_EXPORT void _malloc_postfork(void) #endif { diff --git a/src/mutex.c b/src/mutex.c index 159d82a3..37a843e6 100644 --- a/src/mutex.c +++ b/src/mutex.c @@ -48,8 +48,7 @@ pthread_create_once(void) isthreaded = true; } -JEMALLOC_ATTR(visibility("default")) -int +JEMALLOC_EXPORT int pthread_create(pthread_t *__restrict thread, const pthread_attr_t *__restrict attr, void *(*start_routine)(void *), void *__restrict arg) @@ -72,6 +71,7 @@ int _pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex, bool malloc_mutex_init(malloc_mutex_t *mutex) { + #ifdef _WIN32 if (!InitializeCriticalSectionAndSpinCount(&mutex->lock, _CRT_SPINCOUNT)) @@ -98,7 +98,6 @@ malloc_mutex_init(malloc_mutex_t *mutex) return (true); } pthread_mutexattr_destroy(&attr); - #endif return (false); } diff --git a/src/tsd.c b/src/tsd.c index d7714b02..0506c8a0 100644 --- a/src/tsd.c +++ b/src/tsd.c @@ -32,7 +32,9 @@ malloc_tsd_no_cleanup(void *arg) } #if defined(JEMALLOC_MALLOC_THREAD_CLEANUP) || defined(_WIN32) -JEMALLOC_ATTR(visibility("default")) +#ifndef _WIN32 +JEMALLOC_EXPORT +#endif void _malloc_thread_cleanup(void) { @@ -91,7 +93,7 @@ _tls_callback(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) return (true); } -JEMALLOC_ATTR(section(".CRT$XLY")) JEMALLOC_ATTR(used) +JEMALLOC_SECTION(".CRT$XLY") JEMALLOC_ATTR(used) static const BOOL (WINAPI *tls_callback)(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) = _tls_callback; #endif diff --git a/src/util.c b/src/util.c index 64d53dd9..3c92ad2c 100644 --- a/src/util.c +++ b/src/util.c @@ -40,8 +40,7 @@ static char *x2s(uintmax_t x, bool alt_form, bool uppercase, char *s, /******************************************************************************/ /* malloc_message() setup. */ -JEMALLOC_CATTR(visibility("hidden"), static) -void +static void wrtmessage(void *cbopaque, const char *s) { @@ -57,8 +56,8 @@ wrtmessage(void *cbopaque, const char *s) #endif } -void (*je_malloc_message)(void *, const char *s) - JEMALLOC_ATTR(visibility("default")) = wrtmessage; +JEMALLOC_EXPORT void (*je_malloc_message)(void *, const char *s) = + wrtmessage; /* * glibc provides a non-standard strerror_r() when _GNU_SOURCE is defined, so From b45c57ecaf2bf7ff8abe013b856be90fb4e365f3 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Mon, 30 Apr 2012 12:38:30 +0200 Subject: [PATCH 0211/3378] Import msinttypes http://code.google.com/p/msinttypes/ --- include/msc_compat/inttypes.h | 305 ++++++++++++++++++++++++++++++++++ include/msc_compat/stdint.h | 247 +++++++++++++++++++++++++++ 2 files changed, 552 insertions(+) create mode 100644 include/msc_compat/inttypes.h create mode 100644 include/msc_compat/stdint.h diff --git a/include/msc_compat/inttypes.h b/include/msc_compat/inttypes.h new file mode 100644 index 00000000..4b3828a2 --- /dev/null +++ b/include/msc_compat/inttypes.h @@ -0,0 +1,305 @@ +// ISO C9x compliant inttypes.h for Microsoft Visual Studio +// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 +// +// Copyright (c) 2006 Alexander Chemeris +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. The name of the author may be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef _MSC_VER // [ +#error "Use this header only with Microsoft Visual C++ compilers!" +#endif // _MSC_VER ] + +#ifndef _MSC_INTTYPES_H_ // [ +#define _MSC_INTTYPES_H_ + +#if _MSC_VER > 1000 +#pragma once +#endif + +#include "stdint.h" + +// 7.8 Format conversion of integer types + +typedef struct { + intmax_t quot; + intmax_t rem; +} imaxdiv_t; + +// 7.8.1 Macros for format specifiers + +#if !defined(__cplusplus) || defined(__STDC_FORMAT_MACROS) // [ See footnote 185 at page 198 + +// The fprintf macros for signed integers are: +#define PRId8 "d" +#define PRIi8 "i" +#define PRIdLEAST8 "d" +#define PRIiLEAST8 "i" +#define PRIdFAST8 "d" +#define PRIiFAST8 "i" + +#define PRId16 "hd" +#define PRIi16 "hi" +#define PRIdLEAST16 "hd" +#define PRIiLEAST16 "hi" +#define PRIdFAST16 "hd" +#define PRIiFAST16 "hi" + +#define PRId32 "I32d" +#define PRIi32 "I32i" +#define PRIdLEAST32 "I32d" +#define PRIiLEAST32 "I32i" +#define PRIdFAST32 "I32d" +#define PRIiFAST32 "I32i" + +#define PRId64 "I64d" +#define PRIi64 "I64i" +#define PRIdLEAST64 "I64d" +#define PRIiLEAST64 "I64i" +#define PRIdFAST64 "I64d" +#define PRIiFAST64 "I64i" + +#define PRIdMAX "I64d" +#define PRIiMAX "I64i" + +#define PRIdPTR "Id" +#define PRIiPTR "Ii" + +// The fprintf macros for unsigned integers are: +#define PRIo8 "o" +#define PRIu8 "u" +#define PRIx8 "x" +#define PRIX8 "X" +#define PRIoLEAST8 "o" +#define PRIuLEAST8 "u" +#define PRIxLEAST8 "x" +#define PRIXLEAST8 "X" +#define PRIoFAST8 "o" +#define PRIuFAST8 "u" +#define PRIxFAST8 "x" +#define PRIXFAST8 "X" + +#define PRIo16 "ho" +#define PRIu16 "hu" +#define PRIx16 "hx" +#define PRIX16 "hX" +#define PRIoLEAST16 "ho" +#define PRIuLEAST16 "hu" +#define PRIxLEAST16 "hx" +#define PRIXLEAST16 "hX" +#define PRIoFAST16 "ho" +#define PRIuFAST16 "hu" +#define PRIxFAST16 "hx" +#define PRIXFAST16 "hX" + +#define PRIo32 "I32o" +#define PRIu32 "I32u" +#define PRIx32 "I32x" +#define PRIX32 "I32X" +#define PRIoLEAST32 "I32o" +#define PRIuLEAST32 "I32u" +#define PRIxLEAST32 "I32x" +#define PRIXLEAST32 "I32X" +#define PRIoFAST32 "I32o" +#define PRIuFAST32 "I32u" +#define PRIxFAST32 "I32x" +#define PRIXFAST32 "I32X" + +#define PRIo64 "I64o" +#define PRIu64 "I64u" +#define PRIx64 "I64x" +#define PRIX64 "I64X" +#define PRIoLEAST64 "I64o" +#define PRIuLEAST64 "I64u" +#define PRIxLEAST64 "I64x" +#define PRIXLEAST64 "I64X" +#define PRIoFAST64 "I64o" +#define PRIuFAST64 "I64u" +#define PRIxFAST64 "I64x" +#define PRIXFAST64 "I64X" + +#define PRIoMAX "I64o" +#define PRIuMAX "I64u" +#define PRIxMAX "I64x" +#define PRIXMAX "I64X" + +#define PRIoPTR "Io" +#define PRIuPTR "Iu" +#define PRIxPTR "Ix" +#define PRIXPTR "IX" + +// The fscanf macros for signed integers are: +#define SCNd8 "d" +#define SCNi8 "i" +#define SCNdLEAST8 "d" +#define SCNiLEAST8 "i" +#define SCNdFAST8 "d" +#define SCNiFAST8 "i" + +#define SCNd16 "hd" +#define SCNi16 "hi" +#define SCNdLEAST16 "hd" +#define SCNiLEAST16 "hi" +#define SCNdFAST16 "hd" +#define SCNiFAST16 "hi" + +#define SCNd32 "ld" +#define SCNi32 "li" +#define SCNdLEAST32 "ld" +#define SCNiLEAST32 "li" +#define SCNdFAST32 "ld" +#define SCNiFAST32 "li" + +#define SCNd64 "I64d" +#define SCNi64 "I64i" +#define SCNdLEAST64 "I64d" +#define SCNiLEAST64 "I64i" +#define SCNdFAST64 "I64d" +#define SCNiFAST64 "I64i" + +#define SCNdMAX "I64d" +#define SCNiMAX "I64i" + +#ifdef _WIN64 // [ +# define SCNdPTR "I64d" +# define SCNiPTR "I64i" +#else // _WIN64 ][ +# define SCNdPTR "ld" +# define SCNiPTR "li" +#endif // _WIN64 ] + +// The fscanf macros for unsigned integers are: +#define SCNo8 "o" +#define SCNu8 "u" +#define SCNx8 "x" +#define SCNX8 "X" +#define SCNoLEAST8 "o" +#define SCNuLEAST8 "u" +#define SCNxLEAST8 "x" +#define SCNXLEAST8 "X" +#define SCNoFAST8 "o" +#define SCNuFAST8 "u" +#define SCNxFAST8 "x" +#define SCNXFAST8 "X" + +#define SCNo16 "ho" +#define SCNu16 "hu" +#define SCNx16 "hx" +#define SCNX16 "hX" +#define SCNoLEAST16 "ho" +#define SCNuLEAST16 "hu" +#define SCNxLEAST16 "hx" +#define SCNXLEAST16 "hX" +#define SCNoFAST16 "ho" +#define SCNuFAST16 "hu" +#define SCNxFAST16 "hx" +#define SCNXFAST16 "hX" + +#define SCNo32 "lo" +#define SCNu32 "lu" +#define SCNx32 "lx" +#define SCNX32 "lX" +#define SCNoLEAST32 "lo" +#define SCNuLEAST32 "lu" +#define SCNxLEAST32 "lx" +#define SCNXLEAST32 "lX" +#define SCNoFAST32 "lo" +#define SCNuFAST32 "lu" +#define SCNxFAST32 "lx" +#define SCNXFAST32 "lX" + +#define SCNo64 "I64o" +#define SCNu64 "I64u" +#define SCNx64 "I64x" +#define SCNX64 "I64X" +#define SCNoLEAST64 "I64o" +#define SCNuLEAST64 "I64u" +#define SCNxLEAST64 "I64x" +#define SCNXLEAST64 "I64X" +#define SCNoFAST64 "I64o" +#define SCNuFAST64 "I64u" +#define SCNxFAST64 "I64x" +#define SCNXFAST64 "I64X" + +#define SCNoMAX "I64o" +#define SCNuMAX "I64u" +#define SCNxMAX "I64x" +#define SCNXMAX "I64X" + +#ifdef _WIN64 // [ +# define SCNoPTR "I64o" +# define SCNuPTR "I64u" +# define SCNxPTR "I64x" +# define SCNXPTR "I64X" +#else // _WIN64 ][ +# define SCNoPTR "lo" +# define SCNuPTR "lu" +# define SCNxPTR "lx" +# define SCNXPTR "lX" +#endif // _WIN64 ] + +#endif // __STDC_FORMAT_MACROS ] + +// 7.8.2 Functions for greatest-width integer types + +// 7.8.2.1 The imaxabs function +#define imaxabs _abs64 + +// 7.8.2.2 The imaxdiv function + +// This is modified version of div() function from Microsoft's div.c found +// in %MSVC.NET%\crt\src\div.c +#ifdef STATIC_IMAXDIV // [ +static +#else // STATIC_IMAXDIV ][ +_inline +#endif // STATIC_IMAXDIV ] +imaxdiv_t __cdecl imaxdiv(intmax_t numer, intmax_t denom) +{ + imaxdiv_t result; + + result.quot = numer / denom; + result.rem = numer % denom; + + if (numer < 0 && result.rem > 0) { + // did division wrong; must fix up + ++result.quot; + result.rem -= denom; + } + + return result; +} + +// 7.8.2.3 The strtoimax and strtoumax functions +#define strtoimax _strtoi64 +#define strtoumax _strtoui64 + +// 7.8.2.4 The wcstoimax and wcstoumax functions +#define wcstoimax _wcstoi64 +#define wcstoumax _wcstoui64 + + +#endif // _MSC_INTTYPES_H_ ] diff --git a/include/msc_compat/stdint.h b/include/msc_compat/stdint.h new file mode 100644 index 00000000..d02608a5 --- /dev/null +++ b/include/msc_compat/stdint.h @@ -0,0 +1,247 @@ +// ISO C9x compliant stdint.h for Microsoft Visual Studio +// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 +// +// Copyright (c) 2006-2008 Alexander Chemeris +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. The name of the author may be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef _MSC_VER // [ +#error "Use this header only with Microsoft Visual C++ compilers!" +#endif // _MSC_VER ] + +#ifndef _MSC_STDINT_H_ // [ +#define _MSC_STDINT_H_ + +#if _MSC_VER > 1000 +#pragma once +#endif + +#include + +// For Visual Studio 6 in C++ mode and for many Visual Studio versions when +// compiling for ARM we should wrap include with 'extern "C++" {}' +// or compiler give many errors like this: +// error C2733: second C linkage of overloaded function 'wmemchr' not allowed +#ifdef __cplusplus +extern "C" { +#endif +# include +#ifdef __cplusplus +} +#endif + +// Define _W64 macros to mark types changing their size, like intptr_t. +#ifndef _W64 +# if !defined(__midl) && (defined(_X86_) || defined(_M_IX86)) && _MSC_VER >= 1300 +# define _W64 __w64 +# else +# define _W64 +# endif +#endif + + +// 7.18.1 Integer types + +// 7.18.1.1 Exact-width integer types + +// Visual Studio 6 and Embedded Visual C++ 4 doesn't +// realize that, e.g. char has the same size as __int8 +// so we give up on __intX for them. +#if (_MSC_VER < 1300) + typedef signed char int8_t; + typedef signed short int16_t; + typedef signed int int32_t; + typedef unsigned char uint8_t; + typedef unsigned short uint16_t; + typedef unsigned int uint32_t; +#else + typedef signed __int8 int8_t; + typedef signed __int16 int16_t; + typedef signed __int32 int32_t; + typedef unsigned __int8 uint8_t; + typedef unsigned __int16 uint16_t; + typedef unsigned __int32 uint32_t; +#endif +typedef signed __int64 int64_t; +typedef unsigned __int64 uint64_t; + + +// 7.18.1.2 Minimum-width integer types +typedef int8_t int_least8_t; +typedef int16_t int_least16_t; +typedef int32_t int_least32_t; +typedef int64_t int_least64_t; +typedef uint8_t uint_least8_t; +typedef uint16_t uint_least16_t; +typedef uint32_t uint_least32_t; +typedef uint64_t uint_least64_t; + +// 7.18.1.3 Fastest minimum-width integer types +typedef int8_t int_fast8_t; +typedef int16_t int_fast16_t; +typedef int32_t int_fast32_t; +typedef int64_t int_fast64_t; +typedef uint8_t uint_fast8_t; +typedef uint16_t uint_fast16_t; +typedef uint32_t uint_fast32_t; +typedef uint64_t uint_fast64_t; + +// 7.18.1.4 Integer types capable of holding object pointers +#ifdef _WIN64 // [ + typedef signed __int64 intptr_t; + typedef unsigned __int64 uintptr_t; +#else // _WIN64 ][ + typedef _W64 signed int intptr_t; + typedef _W64 unsigned int uintptr_t; +#endif // _WIN64 ] + +// 7.18.1.5 Greatest-width integer types +typedef int64_t intmax_t; +typedef uint64_t uintmax_t; + + +// 7.18.2 Limits of specified-width integer types + +#if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [ See footnote 220 at page 257 and footnote 221 at page 259 + +// 7.18.2.1 Limits of exact-width integer types +#define INT8_MIN ((int8_t)_I8_MIN) +#define INT8_MAX _I8_MAX +#define INT16_MIN ((int16_t)_I16_MIN) +#define INT16_MAX _I16_MAX +#define INT32_MIN ((int32_t)_I32_MIN) +#define INT32_MAX _I32_MAX +#define INT64_MIN ((int64_t)_I64_MIN) +#define INT64_MAX _I64_MAX +#define UINT8_MAX _UI8_MAX +#define UINT16_MAX _UI16_MAX +#define UINT32_MAX _UI32_MAX +#define UINT64_MAX _UI64_MAX + +// 7.18.2.2 Limits of minimum-width integer types +#define INT_LEAST8_MIN INT8_MIN +#define INT_LEAST8_MAX INT8_MAX +#define INT_LEAST16_MIN INT16_MIN +#define INT_LEAST16_MAX INT16_MAX +#define INT_LEAST32_MIN INT32_MIN +#define INT_LEAST32_MAX INT32_MAX +#define INT_LEAST64_MIN INT64_MIN +#define INT_LEAST64_MAX INT64_MAX +#define UINT_LEAST8_MAX UINT8_MAX +#define UINT_LEAST16_MAX UINT16_MAX +#define UINT_LEAST32_MAX UINT32_MAX +#define UINT_LEAST64_MAX UINT64_MAX + +// 7.18.2.3 Limits of fastest minimum-width integer types +#define INT_FAST8_MIN INT8_MIN +#define INT_FAST8_MAX INT8_MAX +#define INT_FAST16_MIN INT16_MIN +#define INT_FAST16_MAX INT16_MAX +#define INT_FAST32_MIN INT32_MIN +#define INT_FAST32_MAX INT32_MAX +#define INT_FAST64_MIN INT64_MIN +#define INT_FAST64_MAX INT64_MAX +#define UINT_FAST8_MAX UINT8_MAX +#define UINT_FAST16_MAX UINT16_MAX +#define UINT_FAST32_MAX UINT32_MAX +#define UINT_FAST64_MAX UINT64_MAX + +// 7.18.2.4 Limits of integer types capable of holding object pointers +#ifdef _WIN64 // [ +# define INTPTR_MIN INT64_MIN +# define INTPTR_MAX INT64_MAX +# define UINTPTR_MAX UINT64_MAX +#else // _WIN64 ][ +# define INTPTR_MIN INT32_MIN +# define INTPTR_MAX INT32_MAX +# define UINTPTR_MAX UINT32_MAX +#endif // _WIN64 ] + +// 7.18.2.5 Limits of greatest-width integer types +#define INTMAX_MIN INT64_MIN +#define INTMAX_MAX INT64_MAX +#define UINTMAX_MAX UINT64_MAX + +// 7.18.3 Limits of other integer types + +#ifdef _WIN64 // [ +# define PTRDIFF_MIN _I64_MIN +# define PTRDIFF_MAX _I64_MAX +#else // _WIN64 ][ +# define PTRDIFF_MIN _I32_MIN +# define PTRDIFF_MAX _I32_MAX +#endif // _WIN64 ] + +#define SIG_ATOMIC_MIN INT_MIN +#define SIG_ATOMIC_MAX INT_MAX + +#ifndef SIZE_MAX // [ +# ifdef _WIN64 // [ +# define SIZE_MAX _UI64_MAX +# else // _WIN64 ][ +# define SIZE_MAX _UI32_MAX +# endif // _WIN64 ] +#endif // SIZE_MAX ] + +// WCHAR_MIN and WCHAR_MAX are also defined in +#ifndef WCHAR_MIN // [ +# define WCHAR_MIN 0 +#endif // WCHAR_MIN ] +#ifndef WCHAR_MAX // [ +# define WCHAR_MAX _UI16_MAX +#endif // WCHAR_MAX ] + +#define WINT_MIN 0 +#define WINT_MAX _UI16_MAX + +#endif // __STDC_LIMIT_MACROS ] + + +// 7.18.4 Limits of other integer types + +#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [ See footnote 224 at page 260 + +// 7.18.4.1 Macros for minimum-width integer constants + +#define INT8_C(val) val##i8 +#define INT16_C(val) val##i16 +#define INT32_C(val) val##i32 +#define INT64_C(val) val##i64 + +#define UINT8_C(val) val##ui8 +#define UINT16_C(val) val##ui16 +#define UINT32_C(val) val##ui32 +#define UINT64_C(val) val##ui64 + +// 7.18.4.2 Macros for greatest-width integer constants +#define INTMAX_C INT64_C +#define UINTMAX_C UINT64_C + +#endif // __STDC_CONSTANT_MACROS ] + + +#endif // _MSC_STDINT_H_ ] From fd97b1dfc76647c3f90f28dc63cc987041fe20df Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Mon, 30 Apr 2012 12:38:31 +0200 Subject: [PATCH 0212/3378] Add support for MSVC Tested with MSVC 8 32 and 64 bits. --- Makefile.in | 21 ++++++-- configure.ac | 54 +++++++++++++++++-- include/jemalloc/internal/atomic.h | 28 ++++++++++ .../jemalloc/internal/jemalloc_internal.h.in | 21 ++++++-- include/jemalloc/jemalloc_defs.h.in | 10 ++++ .../{msc_compat => msvc_compat}/inttypes.h | 0 include/msvc_compat/stdbool.h | 16 ++++++ include/{msc_compat => msvc_compat}/stdint.h | 0 include/msvc_compat/strings.h | 23 ++++++++ src/jemalloc.c | 11 +++- src/tsd.c | 8 +++ 11 files changed, 177 insertions(+), 15 deletions(-) rename include/{msc_compat => msvc_compat}/inttypes.h (100%) create mode 100644 include/msvc_compat/stdbool.h rename include/{msc_compat => msvc_compat}/stdint.h (100%) create mode 100644 include/msvc_compat/strings.h diff --git a/Makefile.in b/Makefile.in index 7685f156..94f48699 100644 --- a/Makefile.in +++ b/Makefile.in @@ -26,9 +26,11 @@ abs_objroot := @abs_objroot@ CPPFLAGS := @CPPFLAGS@ -I$(srcroot)include -I$(objroot)include CFLAGS := @CFLAGS@ LDFLAGS := @LDFLAGS@ +EXTRA_LDFLAGS := @EXTRA_LDFLAGS@ LIBS := @LIBS@ RPATH_EXTRA := @RPATH_EXTRA@ SO := @so@ +IMPORTLIB := @importlib@ O := @o@ A := @a@ EXE := @exe@ @@ -49,6 +51,9 @@ enable_experimental := @enable_experimental@ DSO_LDFLAGS = @DSO_LDFLAGS@ SOREV = @SOREV@ PIC_CFLAGS = @PIC_CFLAGS@ +CTARGET = @CTARGET@ +LDTARGET = @LDTARGET@ +MKLIB = @MKLIB@ ifeq (macho, $(ABI)) TEST_LIBRARY_PATH := DYLD_FALLBACK_LIBRARY_PATH="$(objroot)lib" @@ -77,9 +82,13 @@ CSRCS := $(srcroot)src/jemalloc.c $(srcroot)src/arena.c $(srcroot)src/atomic.c \ ifeq (macho, $(ABI)) CSRCS += $(srcroot)src/zone.c endif +ifeq ($(IMPORTLIB),$(SO)) STATIC_LIBS := $(objroot)lib/$(LIBJEMALLOC).$(A) +endif ifdef PIC_CFLAGS STATIC_LIBS += $(objroot)lib/$(LIBJEMALLOC)_pic.$(A) +else +STATIC_LIBS += $(objroot)lib/$(LIBJEMALLOC)_s.$(A) endif DSOS := $(objroot)lib/$(LIBJEMALLOC).$(SOREV) ifneq ($(SOREV),$(SO)) @@ -136,10 +145,13 @@ $(CPICOBJS): $(objroot)src/%.pic.$(O): $(srcroot)src/%.c $(CPICOBJS): CFLAGS += $(PIC_CFLAGS) $(CTESTOBJS): $(objroot)test/%.$(O): $(srcroot)test/%.c $(CTESTOBJS): CPPFLAGS += -I$(objroot)test +ifneq ($(IMPORTLIB),$(SO)) +$(COBJS): CPPFLAGS += -DDLLEXPORT +endif $(COBJS) $(CPICOBJS) $(CTESTOBJS): %.$(O): @mkdir -p $(@D) - $(CC) $(CFLAGS) -c $(CPPFLAGS) -o $@ $< + $(CC) $(CFLAGS) -c $(CPPFLAGS) $(CTARGET) $< @$(CC) -MM $(CPPFLAGS) -MT $@ -o $(@:%.$(O)=%.d) $< ifneq ($(SOREV),$(SO)) @@ -150,20 +162,21 @@ endif $(objroot)lib/$(LIBJEMALLOC).$(SOREV) : $(if $(PIC_CFLAGS),$(CPICOBJS),$(COBJS)) @mkdir -p $(@D) - $(CC) $(DSO_LDFLAGS) $(call RPATH,$(RPATH_EXTRA)) -o $@ $+ $(LDFLAGS) $(LIBS) + $(CC) $(DSO_LDFLAGS) $(call RPATH,$(RPATH_EXTRA)) $(LDTARGET) $+ $(LDFLAGS) $(LIBS) $(EXTRA_LDFLAGS) $(objroot)lib/$(LIBJEMALLOC)_pic.$(A) : $(CPICOBJS) $(objroot)lib/$(LIBJEMALLOC).$(A) : $(COBJS) +$(objroot)lib/$(LIBJEMALLOC)_s.$(A) : $(COBJS) $(STATIC_LIBS): @mkdir -p $(@D) - ar crus $@ $+ + $(MKLIB) $+ $(objroot)test/bitmap$(EXE): $(objroot)src/bitmap.$(O) $(objroot)test/%$(EXE): $(objroot)test/%.$(O) $(objroot)src/util.$(O) $(DSOS) @mkdir -p $(@D) - $(CC) -o $@ $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) -L$(objroot)lib -ljemalloc$(install_suffix) $(filter -lpthread,$(LIBS)) + $(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(filter -lpthread,$(LIBS)) $(EXTRA_LDFLAGS) build_lib_shared: $(DSOS) build_lib_static: $(STATIC_LIBS) diff --git a/configure.ac b/configure.ac index 6e74238f..50eaae65 100644 --- a/configure.ac +++ b/configure.ac @@ -111,6 +111,19 @@ dnl If CFLAGS isn't defined, set CFLAGS to something reasonable. Otherwise, dnl just prevent autoconf from molesting CFLAGS. CFLAGS=$CFLAGS AC_PROG_CC +if test "x$GCC" != "xyes" ; then + AC_CACHE_CHECK([whether compiler is MSVC], + [je_cv_msvc], + [AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], + [ +#ifndef _MSC_VER + int fail[-1]; +#endif +])], + [je_cv_msvc=yes], + [je_cv_msvc=no])]) +fi + if test "x$CFLAGS" = "x" ; then no_CFLAGS="yes" if test "x$GCC" = "xyes" ; then @@ -118,6 +131,12 @@ if test "x$CFLAGS" = "x" ; then JE_CFLAGS_APPEND([-Wall]) JE_CFLAGS_APPEND([-pipe]) JE_CFLAGS_APPEND([-g3]) + elif test "x$je_cv_msvc" = "xyes" ; then + CC="$CC -nologo" + JE_CFLAGS_APPEND([-Zi]) + JE_CFLAGS_APPEND([-MT]) + JE_CFLAGS_APPEND([-W3]) + CPPFLAGS="$CPPFLAGS -I${srcroot}/include/msvc_compat" fi fi dnl Append EXTRA_CFLAGS to CFLAGS, if defined. @@ -195,6 +214,7 @@ AC_DEFINE_UNQUOTED([CPU_SPINWAIT], [$CPU_SPINWAIT]) LD_PRELOAD_VAR="LD_PRELOAD" so="so" +importlib="${so}" o="$ac_objext" a="a" exe="$ac_exeext" @@ -203,9 +223,10 @@ DSO_LDFLAGS='-shared -Wl,-soname,$(@F)' RPATH='-Wl,-rpath,$(1)' SOREV="${so}.${rev}" PIC_CFLAGS='-fPIC -DPIC' - -dnl Heap profiling uses the log(3) function. -LIBS="$LIBS -lm" +CTARGET='-o $@' +LDTARGET='-o $@' +EXTRA_LDFLAGS= +MKLIB='ar crus $@' dnl Platform-specific settings. abi and RPATH can probably be determined dnl programmatically, but doing so is error-prone, which makes it generally @@ -223,6 +244,7 @@ case "${host}" in RPATH="" LD_PRELOAD_VAR="DYLD_INSERT_LIBRARIES" so="dylib" + importlib="${so}" force_tls="0" DSO_LDFLAGS='-shared -Wl,-dylib_install_name,$(@F)' SOREV="${rev}.${so}" @@ -278,7 +300,17 @@ case "${host}" in force_tls="0" RPATH="" so="dll" - DSO_LDFLAGS="-shared" + if test "x$je_cv_msvc" = "xyes" ; then + importlib="lib" + DSO_LDFLAGS="-LD" + EXTRA_LDFLAGS="-link -DEBUG" + CTARGET='-Fo$@' + LDTARGET='-Fe$@' + MKLIB='lib -nologo -out:$@' + else + importlib="${so}" + DSO_LDFLAGS="-shared" + fi a="lib" libprefix="" SOREV="${so}" @@ -293,13 +325,23 @@ AC_SUBST([abi]) AC_SUBST([RPATH]) AC_SUBST([LD_PRELOAD_VAR]) AC_SUBST([so]) +AC_SUBST([importlib]) AC_SUBST([o]) AC_SUBST([a]) AC_SUBST([exe]) AC_SUBST([libprefix]) AC_SUBST([DSO_LDFLAGS]) +AC_SUBST([EXTRA_LDFLAGS]) AC_SUBST([SOREV]) AC_SUBST([PIC_CFLAGS]) +AC_SUBST([CTARGET]) +AC_SUBST([LDTARGET]) +AC_SUBST([MKLIB]) + +if test "x$abi" != "xpecoff"; then + dnl Heap profiling uses the log(3) function. + LIBS="$LIBS -lm" +fi JE_COMPILABLE([__attribute__ syntax], [static __attribute__((unused)) void foo(void){}], @@ -530,6 +572,8 @@ if test "x$enable_debug" = "x0" -a "x$no_CFLAGS" = "xyes" ; then if test "x$GCC" = "xyes" ; then JE_CFLAGS_APPEND([-O3]) JE_CFLAGS_APPEND([-funroll-loops]) + elif test "x$je_cv_msvc" = "xyes" ; then + JE_CFLAGS_APPEND([-O2]) else JE_CFLAGS_APPEND([-O]) fi @@ -833,11 +877,11 @@ AC_CACHE_CHECK([STATIC_PAGE_SHIFT], [je_cv_static_page_shift], AC_RUN_IFELSE([AC_LANG_PROGRAM( [[ +#include #ifdef _WIN32 #include #else #include -#include #endif #include ]], diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h index 016c472a..11a7b47f 100644 --- a/include/jemalloc/internal/atomic.h +++ b/include/jemalloc/internal/atomic.h @@ -47,6 +47,20 @@ atomic_sub_uint64(uint64_t *p, uint64_t x) return (__sync_sub_and_fetch(p, x)); } +#elif (defined(_MSC_VER)) +JEMALLOC_INLINE uint64_t +atomic_add_uint64(uint64_t *p, uint64_t x) +{ + + return (InterlockedExchangeAdd64(p, x)); +} + +JEMALLOC_INLINE uint64_t +atomic_sub_uint64(uint64_t *p, uint64_t x) +{ + + return (InterlockedExchangeAdd64(p, -((int64_t)x))); +} #elif (defined(JEMALLOC_OSATOMIC)) JEMALLOC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x) @@ -145,6 +159,20 @@ atomic_sub_uint32(uint32_t *p, uint32_t x) return (__sync_sub_and_fetch(p, x)); } +#elif (defined(_MSC_VER)) +JEMALLOC_INLINE uint32_t +atomic_add_uint32(uint32_t *p, uint32_t x) +{ + + return (InterlockedExchangeAdd(p, x)); +} + +JEMALLOC_INLINE uint32_t +atomic_sub_uint32(uint32_t *p, uint32_t x) +{ + + return (InterlockedExchangeAdd(p, -((int32_t)x))); +} #elif (defined(JEMALLOC_OSATOMIC)) JEMALLOC_INLINE uint32_t atomic_add_uint32(uint32_t *p, uint32_t x) diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index d4c4b4cb..18861476 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -1,6 +1,5 @@ #ifndef JEMALLOC_INTERNAL_H #define JEMALLOC_INTERNAL_H -#include #include #ifdef _WIN32 # include @@ -13,6 +12,7 @@ # undef ERANGE # define ERANGE ERROR_INVALID_DATA #else +# include # include # include # if !defined(SYS_write) && defined(__NR_write) @@ -41,7 +41,17 @@ #include #include #include -#include +#ifdef _MSC_VER +# include +typedef intptr_t ssize_t; +# define PATH_MAX 1024 +# define STDERR_FILENO 2 +# define __func__ __FUNCTION__ +/* Disable warnings about deprecated system functions */ +# pragma warning(disable: 4996) +#else +# include +#endif #include #define JEMALLOC_NO_DEMANGLE @@ -221,6 +231,9 @@ static const bool config_ivsalloc = #else # define JEMALLOC_ENABLE_INLINE # define JEMALLOC_INLINE static inline +# ifdef _MSC_VER +# define inline _inline +# endif #endif /* Smallest size class to support. */ @@ -232,7 +245,7 @@ static const bool config_ivsalloc = * classes). */ #ifndef LG_QUANTUM -# ifdef __i386__ +# if (defined(__i386__) || defined(_M_IX86)) # define LG_QUANTUM 4 # endif # ifdef __ia64__ @@ -244,7 +257,7 @@ static const bool config_ivsalloc = # ifdef __sparc64__ # define LG_QUANTUM 4 # endif -# if (defined(__amd64__) || defined(__x86_64__)) +# if (defined(__amd64__) || defined(__x86_64__) || defined(_M_X64)) # define LG_QUANTUM 4 # endif # ifdef __arm__ diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index 126f6b76..e61597c0 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -109,6 +109,16 @@ # define JEMALLOC_ALIGNED(s) JEMALLOC_ATTR(aligned(s)) # define JEMALLOC_SECTION(s) JEMALLOC_ATTR(section(s)) # define JEMALLOC_NOINLINE JEMALLOC_ATTR(noinline) +#elif _MSC_VER +# define JEMALLOC_ATTR(s) +#ifdef DLLEXPORT +# define EXPORT __declspec(dllexport) +#else +# define EXPORT __declspec(dllimport) +#endif +# define JEMALLOC_ALIGNED(s) __declspec(align(s)) +# define JEMALLOC_SECTION(s) __declspec(allocate(s)) +# define JEMALLOC_NOINLINE __declspec(noinline) #else # define JEMALLOC_ATTR(s) # define JEMALLOC_EXPORT diff --git a/include/msc_compat/inttypes.h b/include/msvc_compat/inttypes.h similarity index 100% rename from include/msc_compat/inttypes.h rename to include/msvc_compat/inttypes.h diff --git a/include/msvc_compat/stdbool.h b/include/msvc_compat/stdbool.h new file mode 100644 index 00000000..da9ee8b8 --- /dev/null +++ b/include/msvc_compat/stdbool.h @@ -0,0 +1,16 @@ +#ifndef stdbool_h +#define stdbool_h + +#include + +/* MSVC doesn't define _Bool or bool in C, but does have BOOL */ +/* Note this doesn't pass autoconf's test because (bool) 0.5 != true */ +typedef BOOL _Bool; + +#define bool _Bool +#define true 1 +#define false 0 + +#define __bool_true_false_are_defined 1 + +#endif /* stdbool_h */ diff --git a/include/msc_compat/stdint.h b/include/msvc_compat/stdint.h similarity index 100% rename from include/msc_compat/stdint.h rename to include/msvc_compat/stdint.h diff --git a/include/msvc_compat/strings.h b/include/msvc_compat/strings.h new file mode 100644 index 00000000..c84975b6 --- /dev/null +++ b/include/msvc_compat/strings.h @@ -0,0 +1,23 @@ +#ifndef strings_h +#define strings_h + +/* MSVC doesn't define ffs/ffsl. This dummy strings.h header is provided + * for both */ +#include +#pragma intrinsic(_BitScanForward) +static __forceinline int ffsl(long x) +{ + unsigned long i; + + if (_BitScanForward(&i, x)) + return (i + 1); + return (0); +} + +static __forceinline int ffs(int x) +{ + + return (ffsl(x)); +} + +#endif diff --git a/src/jemalloc.c b/src/jemalloc.c index 2f858c3c..d42e91db 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -56,12 +56,19 @@ static bool malloc_initializer = NO_INITIALIZER; static malloc_mutex_t init_lock; JEMALLOC_ATTR(constructor) -static void -init_init_lock() +static void WINAPI +_init_init_lock(void) { malloc_mutex_init(&init_lock); } + +#ifdef _MSC_VER +# pragma section(".CRT$XCU", read) +JEMALLOC_SECTION(".CRT$XCU") JEMALLOC_ATTR(used) +static const void (WINAPI *init_init_lock)(void) = _init_init_lock; +#endif + #else static malloc_mutex_t init_lock = MALLOC_MUTEX_INITIALIZER; #endif diff --git a/src/tsd.c b/src/tsd.c index 0506c8a0..00d7b45d 100644 --- a/src/tsd.c +++ b/src/tsd.c @@ -93,6 +93,14 @@ _tls_callback(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) return (true); } +#ifdef _MSC_VER +# ifdef _M_IX86 +# pragma comment(linker, "/INCLUDE:__tls_used") +# else +# pragma comment(linker, "/INCLUDE:_tls_used") +# endif +# pragma section(".CRT$XLY",long,read) +#endif JEMALLOC_SECTION(".CRT$XLY") JEMALLOC_ATTR(used) static const BOOL (WINAPI *tls_callback)(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) = _tls_callback; From 203484e2ea267e068a68fd2922263f0ff1d5ac6f Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 2 May 2012 00:30:36 -0700 Subject: [PATCH 0213/3378] Optimize malloc() and free() fast paths. Embed the bin index for small page runs into the chunk page map, in order to omit [...] in the following dependent load sequence: ptr-->mapelm-->[run-->bin-->]bin_info Move various non-critcal code out of the inlined function chain into helper functions (tcache_event_hard(), arena_dalloc_small(), and locking). --- include/jemalloc/internal/arena.h | 353 ++++++++++--- .../jemalloc/internal/jemalloc_internal.h.in | 11 +- include/jemalloc/internal/private_namespace.h | 21 + include/jemalloc/internal/tcache.h | 71 +-- src/arena.c | 483 +++++++++--------- src/tcache.c | 59 ++- src/tsd.c | 2 +- 7 files changed, 613 insertions(+), 387 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 2eb41cd9..067b75a8 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -109,7 +109,8 @@ struct arena_chunk_map_s { * * p : run page offset * s : run size - * c : (binind+1) for size class (used only if prof_promote is true) + * n : binind for size class; large objects set these to BININD_INVALID + * except for promoted allocations (see prof_promote) * x : don't care * - : 0 * + : 1 @@ -117,35 +118,38 @@ struct arena_chunk_map_s { * [dula] : bit unset * * Unallocated (clean): - * ssssssss ssssssss ssss---- ----du-a - * xxxxxxxx xxxxxxxx xxxx---- -----Uxx - * ssssssss ssssssss ssss---- ----dU-a + * ssssssss ssssssss ssss1111 1111du-a + * xxxxxxxx xxxxxxxx xxxxxxxx xxxx-Uxx + * ssssssss ssssssss ssss1111 1111dU-a * * Unallocated (dirty): - * ssssssss ssssssss ssss---- ----D--a - * xxxxxxxx xxxxxxxx xxxx---- ----xxxx - * ssssssss ssssssss ssss---- ----D--a + * ssssssss ssssssss ssss1111 1111D--a + * xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx + * ssssssss ssssssss ssss1111 1111D--a * * Small: - * pppppppp pppppppp pppp---- ----d--A - * pppppppp pppppppp pppp---- -------A - * pppppppp pppppppp pppp---- ----d--A + * pppppppp pppppppp ppppnnnn nnnnd--A + * pppppppp pppppppp ppppnnnn nnnn---A + * pppppppp pppppppp ppppnnnn nnnnd--A * * Large: - * ssssssss ssssssss ssss---- ----D-LA - * xxxxxxxx xxxxxxxx xxxx---- ----xxxx - * -------- -------- -------- ----D-LA + * ssssssss ssssssss ssss1111 1111D-LA + * xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx + * -------- -------- ----1111 1111D-LA * * Large (sampled, size <= PAGE): - * ssssssss ssssssss sssscccc ccccD-LA + * ssssssss ssssssss ssssnnnn nnnnD-LA * * Large (not sampled, size == PAGE): - * ssssssss ssssssss ssss---- ----D-LA + * ssssssss ssssssss ssss1111 1111D-LA */ size_t bits; -#define CHUNK_MAP_CLASS_SHIFT 4 -#define CHUNK_MAP_CLASS_MASK ((size_t)0xff0U) -#define CHUNK_MAP_FLAGS_MASK ((size_t)0xfU) +#define CHUNK_MAP_BININD_SHIFT 4 +#define BININD_INVALID ((size_t)0xffU) +/* CHUNK_MAP_BININD_MASK == (BININD_INVALID << CHUNK_MAP_BININD_SHIFT) */ +#define CHUNK_MAP_BININD_MASK ((size_t)0xff0U) +#define CHUNK_MAP_BININD_INVALID CHUNK_MAP_BININD_MASK +#define CHUNK_MAP_FLAGS_MASK ((size_t)0xcU) #define CHUNK_MAP_DIRTY ((size_t)0x8U) #define CHUNK_MAP_UNZEROED ((size_t)0x4U) #define CHUNK_MAP_LARGE ((size_t)0x2U) @@ -409,8 +413,14 @@ void *arena_malloc_small(arena_t *arena, size_t size, bool zero); void *arena_malloc_large(arena_t *arena, size_t size, bool zero); void *arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero); void arena_prof_promoted(const void *ptr, size_t size); -void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, +void arena_dalloc_bin_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr, arena_chunk_map_t *mapelm); +void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, + size_t pageind, arena_chunk_map_t *mapelm); +void arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr, + size_t pageind); +void arena_dalloc_large_locked(arena_t *arena, arena_chunk_t *chunk, + void *ptr); void arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr); void arena_stats_merge(arena_t *arena, size_t *nactive, size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats, @@ -430,6 +440,30 @@ void arena_postfork_child(arena_t *arena); #ifdef JEMALLOC_H_INLINES #ifndef JEMALLOC_ENABLE_INLINE +arena_chunk_map_t *arena_mapp_get(arena_chunk_t *chunk, size_t pageind); +size_t *arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbits_get(arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbits_unallocated_size_get(arena_chunk_t *chunk, + size_t pageind); +size_t arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbits_allocated_get(arena_chunk_t *chunk, size_t pageind); +void arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, + size_t size, size_t flags); +void arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind, + size_t size); +void arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind, + size_t size, size_t flags); +void arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind, + size_t binind); +void arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, + size_t runind, size_t binind, size_t flags); +void arena_mapbits_unzeroed_set(arena_chunk_t *chunk, size_t pageind, + size_t unzeroed); +size_t arena_ptr_binind(const void *ptr, size_t mapbits); size_t arena_bin_index(arena_t *arena, arena_bin_t *bin); unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr); @@ -442,6 +476,203 @@ void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) +# ifdef JEMALLOC_ARENA_INLINE_A +JEMALLOC_INLINE arena_chunk_map_t * +arena_mapp_get(arena_chunk_t *chunk, size_t pageind) +{ + + assert(pageind >= map_bias); + assert(pageind < chunk_npages); + + return (&chunk->map[pageind-map_bias]); +} + +JEMALLOC_INLINE size_t * +arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind) +{ + + return (&arena_mapp_get(chunk, pageind)->bits); +} + +JEMALLOC_INLINE size_t +arena_mapbits_get(arena_chunk_t *chunk, size_t pageind) +{ + + return (*arena_mapbitsp_get(chunk, pageind)); +} + +JEMALLOC_INLINE size_t +arena_mapbits_unallocated_size_get(arena_chunk_t *chunk, size_t pageind) +{ + size_t mapbits; + + mapbits = arena_mapbits_get(chunk, pageind); + assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == 0); + return (mapbits & ~PAGE_MASK); +} + +JEMALLOC_INLINE size_t +arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind) +{ + size_t mapbits; + + mapbits = arena_mapbits_get(chunk, pageind); + assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == + (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)); + return (mapbits & ~PAGE_MASK); +} + +JEMALLOC_INLINE size_t +arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind) +{ + size_t mapbits; + + mapbits = arena_mapbits_get(chunk, pageind); + assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == + CHUNK_MAP_ALLOCATED); + return (mapbits >> LG_PAGE); +} + +JEMALLOC_INLINE size_t +arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind) +{ + size_t mapbits; + + mapbits = arena_mapbits_get(chunk, pageind); + return (mapbits & CHUNK_MAP_DIRTY); +} + +JEMALLOC_INLINE size_t +arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind) +{ + size_t mapbits; + + mapbits = arena_mapbits_get(chunk, pageind); + return (mapbits & CHUNK_MAP_UNZEROED); +} + +JEMALLOC_INLINE size_t +arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind) +{ + size_t mapbits; + + mapbits = arena_mapbits_get(chunk, pageind); + return (mapbits & CHUNK_MAP_LARGE); +} + +JEMALLOC_INLINE size_t +arena_mapbits_allocated_get(arena_chunk_t *chunk, size_t pageind) +{ + size_t mapbits; + + mapbits = arena_mapbits_get(chunk, pageind); + return (mapbits & CHUNK_MAP_ALLOCATED); +} + +JEMALLOC_INLINE void +arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, size_t size, + size_t flags) +{ + size_t *mapbitsp; + + mapbitsp = arena_mapbitsp_get(chunk, pageind); + assert((size & PAGE_MASK) == 0); + assert((flags & ~CHUNK_MAP_FLAGS_MASK) == 0); + *mapbitsp = size | CHUNK_MAP_BININD_INVALID | flags; +} + +JEMALLOC_INLINE void +arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind, + size_t size) +{ + size_t *mapbitsp; + + mapbitsp = arena_mapbitsp_get(chunk, pageind); + assert((size & PAGE_MASK) == 0); + assert((*mapbitsp & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == 0); + *mapbitsp = size | (*mapbitsp & PAGE_MASK); +} + +JEMALLOC_INLINE void +arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind, size_t size, + size_t flags) +{ + size_t *mapbitsp; + + mapbitsp = arena_mapbitsp_get(chunk, pageind); + assert((size & PAGE_MASK) == 0); + assert((flags & ~CHUNK_MAP_FLAGS_MASK) == 0); + *mapbitsp = size | CHUNK_MAP_BININD_INVALID | flags | CHUNK_MAP_LARGE | + CHUNK_MAP_ALLOCATED; +} + +JEMALLOC_INLINE void +arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind, + size_t binind) +{ + size_t *mapbitsp; + + assert(binind <= BININD_INVALID); + mapbitsp = arena_mapbitsp_get(chunk, pageind); + assert(arena_mapbits_large_size_get(chunk, pageind) == PAGE); + *mapbitsp = (*mapbitsp & ~CHUNK_MAP_BININD_MASK) | (binind << + CHUNK_MAP_BININD_SHIFT); +} + +JEMALLOC_INLINE void +arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, size_t runind, + size_t binind, size_t flags) +{ + size_t *mapbitsp; + + assert(binind < BININD_INVALID); + mapbitsp = arena_mapbitsp_get(chunk, pageind); + assert(pageind - runind >= map_bias); + assert((flags & ~CHUNK_MAP_FLAGS_MASK) == 0); + *mapbitsp = (runind << LG_PAGE) | (binind << CHUNK_MAP_BININD_SHIFT) | + flags | CHUNK_MAP_ALLOCATED; +} + +JEMALLOC_INLINE void +arena_mapbits_unzeroed_set(arena_chunk_t *chunk, size_t pageind, + size_t unzeroed) +{ + size_t *mapbitsp; + + mapbitsp = arena_mapbitsp_get(chunk, pageind); + *mapbitsp = (*mapbitsp & ~CHUNK_MAP_UNZEROED) | unzeroed; +} + +JEMALLOC_INLINE size_t +arena_ptr_binind(const void *ptr, size_t mapbits) +{ + size_t binind; + + binind = (mapbits & CHUNK_MAP_BININD_MASK) >> CHUNK_MAP_BININD_SHIFT; + + if (config_debug) { + arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + arena_t *arena = chunk->arena; + size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; + size_t actual_mapbits = arena_mapbits_get(chunk, pageind); + arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + + (uintptr_t)((pageind - (actual_mapbits >> LG_PAGE)) << + LG_PAGE)); arena_bin_t *bin = run->bin; + size_t actual_binind = bin - arena->bins; + arena_bin_info_t *bin_info = &arena_bin_info[actual_binind]; + + assert(mapbits == actual_mapbits); + assert(binind == actual_binind); + assert(((uintptr_t)ptr - ((uintptr_t)run + + (uintptr_t)bin_info->reg0_offset)) % bin_info->reg_interval + == 0); + } + + return (binind); +} +# endif /* JEMALLOC_ARENA_INLINE_A */ + +# ifdef JEMALLOC_ARENA_INLINE_B JEMALLOC_INLINE size_t arena_bin_index(arena_t *arena, arena_bin_t *bin) { @@ -535,7 +766,7 @@ arena_prof_ctx_get(const void *ptr) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - mapbits = chunk->map[pageind-map_bias].bits; + mapbits = arena_mapbits_get(chunk, pageind); assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); if ((mapbits & CHUNK_MAP_LARGE) == 0) { if (prof_promote) @@ -544,7 +775,7 @@ arena_prof_ctx_get(const void *ptr) arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapbits >> LG_PAGE)) << LG_PAGE)); - size_t binind = arena_bin_index(chunk->arena, run->bin); + size_t binind = arena_ptr_binind(ptr, mapbits); arena_bin_info_t *bin_info = &arena_bin_info[binind]; unsigned regind; @@ -554,7 +785,7 @@ arena_prof_ctx_get(const void *ptr) sizeof(prof_ctx_t *))); } } else - ret = chunk->map[pageind-map_bias].prof_ctx; + ret = arena_mapp_get(chunk, pageind)->prof_ctx; return (ret); } @@ -571,19 +802,18 @@ arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - mapbits = chunk->map[pageind-map_bias].bits; + mapbits = arena_mapbits_get(chunk, pageind); assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); if ((mapbits & CHUNK_MAP_LARGE) == 0) { if (prof_promote == false) { arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapbits >> LG_PAGE)) << LG_PAGE)); - arena_bin_t *bin = run->bin; size_t binind; arena_bin_info_t *bin_info; unsigned regind; - binind = arena_bin_index(chunk->arena, bin); + binind = arena_ptr_binind(ptr, mapbits); bin_info = &arena_bin_info[binind]; regind = arena_run_regind(run, bin_info, ptr); @@ -592,7 +822,7 @@ arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) } else assert((uintptr_t)ctx == (uintptr_t)1U); } else - chunk->map[pageind-map_bias].prof_ctx = ctx; + arena_mapp_get(chunk, pageind)->prof_ctx = ctx; } JEMALLOC_INLINE void * @@ -638,26 +868,24 @@ arena_salloc(const void *ptr, bool demote) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - mapbits = chunk->map[pageind-map_bias].bits; - assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); + assert(arena_mapbits_allocated_get(chunk, pageind) != 0); + mapbits = arena_mapbits_get(chunk, pageind); if ((mapbits & CHUNK_MAP_LARGE) == 0) { - arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)((pageind - (mapbits >> LG_PAGE)) << LG_PAGE)); - size_t binind = arena_bin_index(chunk->arena, run->bin); - arena_bin_info_t *bin_info = &arena_bin_info[binind]; - assert(((uintptr_t)ptr - ((uintptr_t)run + - (uintptr_t)bin_info->reg0_offset)) % bin_info->reg_interval - == 0); - ret = bin_info->reg_size; + size_t binind = arena_ptr_binind(ptr, mapbits); + ret = arena_bin_info[binind].reg_size; } else { assert(((uintptr_t)ptr & PAGE_MASK) == 0); - ret = mapbits & ~PAGE_MASK; + ret = arena_mapbits_large_size_get(chunk, pageind); if (config_prof && demote && prof_promote && ret == PAGE && - (mapbits & CHUNK_MAP_CLASS_MASK) != 0) { - size_t binind = ((mapbits & CHUNK_MAP_CLASS_MASK) >> - CHUNK_MAP_CLASS_SHIFT) - 1; + (mapbits & CHUNK_MAP_BININD_MASK) != + CHUNK_MAP_BININD_MASK) { + size_t binind = ((mapbits & CHUNK_MAP_BININD_MASK) >> + CHUNK_MAP_BININD_SHIFT); assert(binind < NBINS); ret = arena_bin_info[binind].reg_size; + } else { + assert(demote == false || (mapbits & + CHUNK_MAP_BININD_MASK) == CHUNK_MAP_BININD_MASK); } assert(ret != 0); } @@ -668,8 +896,7 @@ arena_salloc(const void *ptr, bool demote) JEMALLOC_INLINE void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, bool try_tcache) { - size_t pageind; - arena_chunk_map_t *mapelm; + size_t pageind, mapbits; tcache_t *tcache; assert(arena != NULL); @@ -678,47 +905,31 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, bool try_tcache) assert(CHUNK_ADDR2BASE(ptr) != ptr); pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - mapelm = &chunk->map[pageind-map_bias]; - assert((mapelm->bits & CHUNK_MAP_ALLOCATED) != 0); - if ((mapelm->bits & CHUNK_MAP_LARGE) == 0) { + mapbits = arena_mapbits_get(chunk, pageind); + assert(arena_mapbits_allocated_get(chunk, pageind) != 0); + if ((mapbits & CHUNK_MAP_LARGE) == 0) { /* Small allocation. */ - if (try_tcache && (tcache = tcache_get(false)) != NULL) - tcache_dalloc_small(tcache, ptr); - else { - arena_run_t *run; - arena_bin_t *bin; + if (try_tcache && (tcache = tcache_get(false)) != NULL) { + size_t binind; - run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)((pageind - (mapelm->bits >> LG_PAGE)) << - LG_PAGE)); - bin = run->bin; - if (config_debug) { - size_t binind = arena_bin_index(arena, bin); - UNUSED arena_bin_info_t *bin_info = - &arena_bin_info[binind]; - assert(((uintptr_t)ptr - ((uintptr_t)run + - (uintptr_t)bin_info->reg0_offset)) % - bin_info->reg_interval == 0); - } - malloc_mutex_lock(&bin->lock); - arena_dalloc_bin(arena, chunk, ptr, mapelm); - malloc_mutex_unlock(&bin->lock); - } + binind = arena_ptr_binind(ptr, mapbits); + assert(binind < NBINS); + tcache_dalloc_small(tcache, ptr, binind); + } else + arena_dalloc_small(arena, chunk, ptr, pageind); } else { - size_t size = mapelm->bits & ~PAGE_MASK; + size_t size = arena_mapbits_large_size_get(chunk, pageind); assert(((uintptr_t)ptr & PAGE_MASK) == 0); if (try_tcache && size <= tcache_maxclass && (tcache = tcache_get(false)) != NULL) { tcache_dalloc_large(tcache, ptr, size); - } else { - malloc_mutex_lock(&arena->lock); + } else arena_dalloc_large(arena, chunk, ptr); - malloc_mutex_unlock(&arena->lock); - } } } +# endif /* JEMALLOC_ARENA_INLINE_B */ #endif #endif /* JEMALLOC_H_INLINES */ diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 18861476..ccecfaa1 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -685,8 +685,17 @@ choose_arena(arena_t *arena) #include "jemalloc/internal/bitmap.h" #include "jemalloc/internal/rtree.h" -#include "jemalloc/internal/tcache.h" +/* + * Include arena.h twice in order to resolve circular dependencies with + * tcache.h. + */ +#define JEMALLOC_ARENA_INLINE_A #include "jemalloc/internal/arena.h" +#undef JEMALLOC_ARENA_INLINE_A +#include "jemalloc/internal/tcache.h" +#define JEMALLOC_ARENA_INLINE_B +#include "jemalloc/internal/arena.h" +#undef JEMALLOC_ARENA_INLINE_B #include "jemalloc/internal/hash.h" #include "jemalloc/internal/quarantine.h" diff --git a/include/jemalloc/internal/private_namespace.h b/include/jemalloc/internal/private_namespace.h index c467153a..6ac07631 100644 --- a/include/jemalloc/internal/private_namespace.h +++ b/include/jemalloc/internal/private_namespace.h @@ -7,11 +7,30 @@ #define arena_boot JEMALLOC_N(arena_boot) #define arena_dalloc JEMALLOC_N(arena_dalloc) #define arena_dalloc_bin JEMALLOC_N(arena_dalloc_bin) +#define arena_dalloc_bin_locked JEMALLOC_N(arena_dalloc_bin_locked) #define arena_dalloc_junk_small JEMALLOC_N(arena_dalloc_junk_small) #define arena_dalloc_large JEMALLOC_N(arena_dalloc_large) +#define arena_dalloc_large_locked JEMALLOC_N(arena_dalloc_large_locked) +#define arena_dalloc_small JEMALLOC_N(arena_dalloc_small) #define arena_malloc JEMALLOC_N(arena_malloc) #define arena_malloc_large JEMALLOC_N(arena_malloc_large) #define arena_malloc_small JEMALLOC_N(arena_malloc_small) +#define arena_mapbitsp_get JEMALLOC_N(arena_mapbitsp_get) +#define arena_mapbits_get JEMALLOC_N(arena_mapbits_get) +#define arena_mapbits_unallocated_size_get JEMALLOC_N(arena_mapbits_unallocated_size_get) +#define arena_mapbits_large_size_get JEMALLOC_N(arena_mapbits_large_size_get) +#define arena_mapbits_small_runind_get JEMALLOC_N(arena_mapbits_small_runind_get) +#define arena_mapbits_dirty_get JEMALLOC_N(arena_mapbits_dirty_get) +#define arena_mapbits_unzeroed_get JEMALLOC_N(arena_mapbits_unzeroed_get) +#define arena_mapbits_large_get JEMALLOC_N(arena_mapbits_large_get) +#define arena_mapbits_allocated_get JEMALLOC_N(arena_mapbits_allocated_get) +#define arena_mapbits_unallocated_set JEMALLOC_N(arena_mapbits_unallocated_set) +#define arena_mapbits_unallocated_size_set JEMALLOC_N(arena_mapbits_unallocated_size_set) +#define arena_mapbits_large_set JEMALLOC_N(arena_mapbits_large_set) +#define arena_mapbits_large_binind_set JEMALLOC_N(arena_mapbits_large_binind_set) +#define arena_mapbits_small_set JEMALLOC_N(arena_mapbits_small_set) +#define arena_mapbits_unzeroed_set JEMALLOC_N(arena_mapbits_unzeroed_set) +#define arena_mapp_get JEMALLOC_N(arena_mapp_get) #define arena_maxclass JEMALLOC_N(arena_maxclass) #define arena_new JEMALLOC_N(arena_new) #define arena_palloc JEMALLOC_N(arena_palloc) @@ -22,6 +41,7 @@ #define arena_prof_ctx_get JEMALLOC_N(arena_prof_ctx_get) #define arena_prof_ctx_set JEMALLOC_N(arena_prof_ctx_set) #define arena_prof_promoted JEMALLOC_N(arena_prof_promoted) +#define arena_ptr_binind JEMALLOC_N(arena_ptr_binind) #define arena_purge_all JEMALLOC_N(arena_purge_all) #define arena_ralloc JEMALLOC_N(arena_ralloc) #define arena_ralloc_no_move JEMALLOC_N(arena_ralloc_no_move) @@ -296,6 +316,7 @@ #define tcache_enabled_tsd_get JEMALLOC_N(tcache_enabled_tsd_get) #define tcache_enabled_tsd_set JEMALLOC_N(tcache_enabled_tsd_set) #define tcache_event JEMALLOC_N(tcache_event) +#define tcache_event_hard JEMALLOC_N(tcache_event_hard) #define tcache_initialized JEMALLOC_N(tcache_initialized) #define tcache_flush JEMALLOC_N(tcache_flush) #define tcache_get JEMALLOC_N(tcache_get) diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index cfb17c28..38d735c8 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -101,6 +101,7 @@ extern size_t nhbins; extern size_t tcache_maxclass; size_t tcache_salloc(const void *ptr); +void tcache_event_hard(tcache_t *tcache); void *tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind); void tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem, @@ -132,7 +133,7 @@ void tcache_enabled_set(bool enabled); void *tcache_alloc_easy(tcache_bin_t *tbin); void *tcache_alloc_small(tcache_t *tcache, size_t size, bool zero); void *tcache_alloc_large(tcache_t *tcache, size_t size, bool zero); -void tcache_dalloc_small(tcache_t *tcache, void *ptr); +void tcache_dalloc_small(tcache_t *tcache, void *ptr, size_t binind); void tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size); #endif @@ -266,47 +267,8 @@ tcache_event(tcache_t *tcache) tcache->ev_cnt++; assert(tcache->ev_cnt <= TCACHE_GC_INCR); - if (tcache->ev_cnt == TCACHE_GC_INCR) { - size_t binind = tcache->next_gc_bin; - tcache_bin_t *tbin = &tcache->tbins[binind]; - tcache_bin_info_t *tbin_info = &tcache_bin_info[binind]; - - if (tbin->low_water > 0) { - /* - * Flush (ceiling) 3/4 of the objects below the low - * water mark. - */ - if (binind < NBINS) { - tcache_bin_flush_small(tbin, binind, - tbin->ncached - tbin->low_water + - (tbin->low_water >> 2), tcache); - } else { - tcache_bin_flush_large(tbin, binind, - tbin->ncached - tbin->low_water + - (tbin->low_water >> 2), tcache); - } - /* - * Reduce fill count by 2X. Limit lg_fill_div such that - * the fill count is always at least 1. - */ - if ((tbin_info->ncached_max >> (tbin->lg_fill_div+1)) - >= 1) - tbin->lg_fill_div++; - } else if (tbin->low_water < 0) { - /* - * Increase fill count by 2X. Make sure lg_fill_div - * stays greater than 0. - */ - if (tbin->lg_fill_div > 1) - tbin->lg_fill_div--; - } - tbin->low_water = tbin->ncached; - - tcache->next_gc_bin++; - if (tcache->next_gc_bin == nhbins) - tcache->next_gc_bin = 0; - tcache->ev_cnt = 0; - } + if (tcache->ev_cnt == TCACHE_GC_INCR) + tcache_event_hard(tcache); } JEMALLOC_INLINE void * @@ -390,13 +352,13 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) if (ret == NULL) return (NULL); } else { - if (config_prof) { + if (config_prof && prof_promote && size == PAGE) { arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret); size_t pageind = (((uintptr_t)ret - (uintptr_t)chunk) >> LG_PAGE); - chunk->map[pageind-map_bias].bits &= - ~CHUNK_MAP_CLASS_MASK; + arena_mapbits_large_binind_set(chunk, pageind, + BININD_INVALID); } if (zero == false) { if (config_fill) { @@ -421,30 +383,13 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) } JEMALLOC_INLINE void -tcache_dalloc_small(tcache_t *tcache, void *ptr) +tcache_dalloc_small(tcache_t *tcache, void *ptr, size_t binind) { - arena_t *arena; - arena_chunk_t *chunk; - arena_run_t *run; - arena_bin_t *bin; tcache_bin_t *tbin; tcache_bin_info_t *tbin_info; - size_t pageind, binind; - arena_chunk_map_t *mapelm; assert(tcache_salloc(ptr) <= SMALL_MAXCLASS); - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - arena = chunk->arena; - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - mapelm = &chunk->map[pageind-map_bias]; - run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - - (mapelm->bits >> LG_PAGE)) << LG_PAGE)); - bin = run->bin; - binind = ((uintptr_t)bin - (uintptr_t)&arena->bins) / - sizeof(arena_bin_t); - assert(binind < NBINS); - if (config_fill && opt_junk) arena_dalloc_junk_small(ptr, &arena_bin_info[binind]); diff --git a/src/arena.c b/src/arena.c index 51c268c2..3ef4b8fb 100644 --- a/src/arena.c +++ b/src/arena.c @@ -41,11 +41,11 @@ const uint8_t small_size2bin[] = { /* Function prototypes for non-inline static functions. */ static void arena_run_split(arena_t *arena, arena_run_t *run, size_t size, - bool large, bool zero); + bool large, size_t binind, bool zero); static arena_chunk_t *arena_chunk_alloc(arena_t *arena); static void arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk); static arena_run_t *arena_run_alloc(arena_t *arena, size_t size, bool large, - bool zero); + size_t binind, bool zero); static void arena_purge(arena_t *arena, bool all); static void arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty); static void arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, @@ -152,7 +152,9 @@ static inline void arena_run_reg_dalloc(arena_run_t *run, void *ptr) { arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); - size_t binind = arena_bin_index(chunk->arena, run->bin); + size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; + size_t mapbits = arena_mapbits_get(chunk, pageind); + size_t binind = arena_ptr_binind(ptr, mapbits); arena_bin_info_t *bin_info = &arena_bin_info[binind]; unsigned regind = arena_run_regind(run, bin_info, ptr); bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run + @@ -184,28 +186,31 @@ arena_chunk_validate_zeroed(arena_chunk_t *chunk, size_t run_ind) static void arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, - bool zero) + size_t binind, bool zero) { arena_chunk_t *chunk; size_t run_ind, total_pages, need_pages, rem_pages, i; size_t flag_dirty; arena_avail_tree_t *runs_avail; + assert((large && binind == BININD_INVALID) || (large == false && binind + != BININD_INVALID)); + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE); - flag_dirty = chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY; + flag_dirty = arena_mapbits_dirty_get(chunk, run_ind); runs_avail = (flag_dirty != 0) ? &arena->runs_avail_dirty : &arena->runs_avail_clean; - total_pages = (chunk->map[run_ind-map_bias].bits & ~PAGE_MASK) >> + total_pages = arena_mapbits_unallocated_size_get(chunk, run_ind) >> LG_PAGE; - assert((chunk->map[run_ind+total_pages-1-map_bias].bits & - CHUNK_MAP_DIRTY) == flag_dirty); + assert(arena_mapbits_dirty_get(chunk, run_ind+total_pages-1) == + flag_dirty); need_pages = (size >> LG_PAGE); assert(need_pages > 0); assert(need_pages <= total_pages); rem_pages = total_pages - need_pages; - arena_avail_tree_remove(runs_avail, &chunk->map[run_ind-map_bias]); + arena_avail_tree_remove(runs_avail, arena_mapp_get(chunk, run_ind)); if (config_stats) { /* * Update stats_cactive if nactive is crossing a chunk @@ -222,22 +227,23 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, /* Keep track of trailing unused pages for later use. */ if (rem_pages > 0) { if (flag_dirty != 0) { - chunk->map[run_ind+need_pages-map_bias].bits = - (rem_pages << LG_PAGE) | CHUNK_MAP_DIRTY; - chunk->map[run_ind+total_pages-1-map_bias].bits = - (rem_pages << LG_PAGE) | CHUNK_MAP_DIRTY; + arena_mapbits_unallocated_set(chunk, run_ind+need_pages, + (rem_pages << LG_PAGE), CHUNK_MAP_DIRTY); + arena_mapbits_unallocated_set(chunk, + run_ind+total_pages-1, (rem_pages << LG_PAGE), + CHUNK_MAP_DIRTY); } else { - chunk->map[run_ind+need_pages-map_bias].bits = - (rem_pages << LG_PAGE) | - (chunk->map[run_ind+need_pages-map_bias].bits & - CHUNK_MAP_UNZEROED); - chunk->map[run_ind+total_pages-1-map_bias].bits = - (rem_pages << LG_PAGE) | - (chunk->map[run_ind+total_pages-1-map_bias].bits & - CHUNK_MAP_UNZEROED); + arena_mapbits_unallocated_set(chunk, run_ind+need_pages, + (rem_pages << LG_PAGE), + arena_mapbits_unzeroed_get(chunk, + run_ind+need_pages)); + arena_mapbits_unallocated_set(chunk, + run_ind+total_pages-1, (rem_pages << LG_PAGE), + arena_mapbits_unzeroed_get(chunk, + run_ind+total_pages-1)); } - arena_avail_tree_insert(runs_avail, - &chunk->map[run_ind+need_pages-map_bias]); + arena_avail_tree_insert(runs_avail, arena_mapp_get(chunk, + run_ind+need_pages)); } /* Update dirty page accounting. */ @@ -258,8 +264,8 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, * zeroed (i.e. never before touched). */ for (i = 0; i < need_pages; i++) { - if ((chunk->map[run_ind+i-map_bias].bits - & CHUNK_MAP_UNZEROED) != 0) { + if (arena_mapbits_unzeroed_get(chunk, + run_ind+i) != 0) { VALGRIND_MAKE_MEM_UNDEFINED( (void *)((uintptr_t) chunk + ((run_ind+i) << @@ -293,10 +299,9 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, * Set the last element first, in case the run only contains one * page (i.e. both statements set the same element). */ - chunk->map[run_ind+need_pages-1-map_bias].bits = - CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED | flag_dirty; - chunk->map[run_ind-map_bias].bits = size | flag_dirty | - CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; + arena_mapbits_large_set(chunk, run_ind+need_pages-1, 0, + flag_dirty); + arena_mapbits_large_set(chunk, run_ind, size, flag_dirty); } else { assert(zero == false); /* @@ -304,34 +309,30 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, * small run, so that arena_dalloc_bin_run() has the ability to * conditionally trim clean pages. */ - chunk->map[run_ind-map_bias].bits = - (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_UNZEROED) | - CHUNK_MAP_ALLOCATED | flag_dirty; + arena_mapbits_small_set(chunk, run_ind, 0, binind, + arena_mapbits_unzeroed_get(chunk, run_ind) | flag_dirty); /* * The first page will always be dirtied during small run * initialization, so a validation failure here would not * actually cause an observable failure. */ if (config_debug && flag_dirty == 0 && - (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_UNZEROED) - == 0) + arena_mapbits_unzeroed_get(chunk, run_ind) == 0) arena_chunk_validate_zeroed(chunk, run_ind); for (i = 1; i < need_pages - 1; i++) { - chunk->map[run_ind+i-map_bias].bits = (i << LG_PAGE) - | (chunk->map[run_ind+i-map_bias].bits & - CHUNK_MAP_UNZEROED) | CHUNK_MAP_ALLOCATED; + arena_mapbits_small_set(chunk, run_ind+i, i, + binind, arena_mapbits_unzeroed_get(chunk, + run_ind+i)); if (config_debug && flag_dirty == 0 && - (chunk->map[run_ind+i-map_bias].bits & - CHUNK_MAP_UNZEROED) == 0) + arena_mapbits_unzeroed_get(chunk, run_ind+i) == 0) arena_chunk_validate_zeroed(chunk, run_ind+i); } - chunk->map[run_ind+need_pages-1-map_bias].bits = ((need_pages - - 1) << LG_PAGE) | - (chunk->map[run_ind+need_pages-1-map_bias].bits & - CHUNK_MAP_UNZEROED) | CHUNK_MAP_ALLOCATED | flag_dirty; + arena_mapbits_small_set(chunk, run_ind+need_pages-1, + need_pages-1, binind, arena_mapbits_unzeroed_get(chunk, + run_ind+need_pages-1) | flag_dirty); if (config_debug && flag_dirty == 0 && - (chunk->map[run_ind+need_pages-1-map_bias].bits & - CHUNK_MAP_UNZEROED) == 0) { + arena_mapbits_unzeroed_get(chunk, run_ind+need_pages-1) == + 0) { arena_chunk_validate_zeroed(chunk, run_ind+need_pages-1); } @@ -351,17 +352,18 @@ arena_chunk_alloc(arena_t *arena) arena->spare = NULL; /* Insert the run into the appropriate runs_avail_* tree. */ - if ((chunk->map[0].bits & CHUNK_MAP_DIRTY) == 0) + if (arena_mapbits_dirty_get(chunk, map_bias) == 0) runs_avail = &arena->runs_avail_clean; else runs_avail = &arena->runs_avail_dirty; - assert((chunk->map[0].bits & ~PAGE_MASK) == arena_maxclass); - assert((chunk->map[chunk_npages-1-map_bias].bits & ~PAGE_MASK) - == arena_maxclass); - assert((chunk->map[0].bits & CHUNK_MAP_DIRTY) == - (chunk->map[chunk_npages-1-map_bias].bits & - CHUNK_MAP_DIRTY)); - arena_avail_tree_insert(runs_avail, &chunk->map[0]); + assert(arena_mapbits_unallocated_size_get(chunk, map_bias) == + arena_maxclass); + assert(arena_mapbits_unallocated_size_get(chunk, + chunk_npages-1) == arena_maxclass); + assert(arena_mapbits_dirty_get(chunk, map_bias) == + arena_mapbits_dirty_get(chunk, chunk_npages-1)); + arena_avail_tree_insert(runs_avail, arena_mapp_get(chunk, + map_bias)); } else { bool zero; size_t unzeroed; @@ -392,24 +394,27 @@ arena_chunk_alloc(arena_t *arena) * chunk. */ unzeroed = zero ? 0 : CHUNK_MAP_UNZEROED; - chunk->map[0].bits = arena_maxclass | unzeroed; + arena_mapbits_unallocated_set(chunk, map_bias, arena_maxclass, + unzeroed); /* * There is no need to initialize the internal page map entries * unless the chunk is not zeroed. */ if (zero == false) { for (i = map_bias+1; i < chunk_npages-1; i++) - chunk->map[i-map_bias].bits = unzeroed; + arena_mapbits_unzeroed_set(chunk, i, unzeroed); } else if (config_debug) { - for (i = map_bias+1; i < chunk_npages-1; i++) - assert(chunk->map[i-map_bias].bits == unzeroed); + for (i = map_bias+1; i < chunk_npages-1; i++) { + assert(arena_mapbits_unzeroed_get(chunk, i) == + unzeroed); + } } - chunk->map[chunk_npages-1-map_bias].bits = arena_maxclass | - unzeroed; + arena_mapbits_unallocated_set(chunk, chunk_npages-1, + arena_maxclass, unzeroed); /* Insert the run into the runs_avail_clean tree. */ arena_avail_tree_insert(&arena->runs_avail_clean, - &chunk->map[0]); + arena_mapp_get(chunk, map_bias)); } return (chunk); @@ -424,11 +429,11 @@ arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk) * Remove run from the appropriate runs_avail_* tree, so that the arena * does not use it. */ - if ((chunk->map[0].bits & CHUNK_MAP_DIRTY) == 0) + if (arena_mapbits_dirty_get(chunk, map_bias) == 0) runs_avail = &arena->runs_avail_clean; else runs_avail = &arena->runs_avail_dirty; - arena_avail_tree_remove(runs_avail, &chunk->map[0]); + arena_avail_tree_remove(runs_avail, arena_mapp_get(chunk, map_bias)); if (arena->spare != NULL) { arena_chunk_t *spare = arena->spare; @@ -449,7 +454,8 @@ arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk) } static arena_run_t * -arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero) +arena_run_alloc(arena_t *arena, size_t size, bool large, size_t binind, + bool zero) { arena_chunk_t *chunk; arena_run_t *run; @@ -457,6 +463,8 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero) assert(size <= arena_maxclass); assert((size & PAGE_MASK) == 0); + assert((large && binind == BININD_INVALID) || (large == false && binind + != BININD_INVALID)); /* Search the arena's chunks for the lowest best fit. */ key.bits = size | CHUNK_MAP_KEY; @@ -469,7 +477,7 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero) run = (arena_run_t *)((uintptr_t)run_chunk + (pageind << LG_PAGE)); - arena_run_split(arena, run, size, large, zero); + arena_run_split(arena, run, size, large, binind, zero); return (run); } mapelm = arena_avail_tree_nsearch(&arena->runs_avail_clean, &key); @@ -481,7 +489,7 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero) run = (arena_run_t *)((uintptr_t)run_chunk + (pageind << LG_PAGE)); - arena_run_split(arena, run, size, large, zero); + arena_run_split(arena, run, size, large, binind, zero); return (run); } @@ -491,7 +499,7 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero) chunk = arena_chunk_alloc(arena); if (chunk != NULL) { run = (arena_run_t *)((uintptr_t)chunk + (map_bias << LG_PAGE)); - arena_run_split(arena, run, size, large, zero); + arena_run_split(arena, run, size, large, binind, zero); return (run); } @@ -509,7 +517,7 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero) run = (arena_run_t *)((uintptr_t)run_chunk + (pageind << LG_PAGE)); - arena_run_split(arena, run, size, large, zero); + arena_run_split(arena, run, size, large, binind, zero); return (run); } mapelm = arena_avail_tree_nsearch(&arena->runs_avail_clean, &key); @@ -521,7 +529,7 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero) run = (arena_run_t *)((uintptr_t)run_chunk + (pageind << LG_PAGE)); - arena_run_split(arena, run, size, large, zero); + arena_run_split(arena, run, size, large, binind, zero); return (run); } @@ -579,40 +587,38 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) * run. */ if (chunk == arena->spare) { - assert((chunk->map[0].bits & CHUNK_MAP_DIRTY) != 0); + assert(arena_mapbits_dirty_get(chunk, map_bias) != 0); arena_chunk_alloc(arena); } /* Temporarily allocate all free dirty runs within chunk. */ for (pageind = map_bias; pageind < chunk_npages;) { - mapelm = &chunk->map[pageind-map_bias]; - if ((mapelm->bits & CHUNK_MAP_ALLOCATED) == 0) { + mapelm = arena_mapp_get(chunk, pageind); + if (arena_mapbits_allocated_get(chunk, pageind) == 0) { size_t npages; - npages = mapelm->bits >> LG_PAGE; + npages = arena_mapbits_unallocated_size_get(chunk, + pageind) >> LG_PAGE; assert(pageind + npages <= chunk_npages); - if (mapelm->bits & CHUNK_MAP_DIRTY) { + if (arena_mapbits_dirty_get(chunk, pageind)) { size_t i; arena_avail_tree_remove( &arena->runs_avail_dirty, mapelm); - mapelm->bits = (npages << LG_PAGE) | - flag_unzeroed | CHUNK_MAP_LARGE | - CHUNK_MAP_ALLOCATED; + arena_mapbits_large_set(chunk, pageind, + (npages << LG_PAGE), flag_unzeroed); /* * Update internal elements in the page map, so * that CHUNK_MAP_UNZEROED is properly set. */ for (i = 1; i < npages - 1; i++) { - chunk->map[pageind+i-map_bias].bits = - flag_unzeroed; + arena_mapbits_unzeroed_set(chunk, + pageind+i, flag_unzeroed); } if (npages > 1) { - chunk->map[ - pageind+npages-1-map_bias].bits = - flag_unzeroed | CHUNK_MAP_LARGE | - CHUNK_MAP_ALLOCATED; + arena_mapbits_large_set(chunk, + pageind+npages-1, 0, flag_unzeroed); } if (config_stats) { @@ -637,15 +643,17 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) pageind += npages; } else { /* Skip allocated run. */ - if (mapelm->bits & CHUNK_MAP_LARGE) - pageind += mapelm->bits >> LG_PAGE; + if (arena_mapbits_large_get(chunk, pageind)) + pageind += arena_mapbits_large_size_get(chunk, + pageind) >> LG_PAGE; else { size_t binind; arena_bin_info_t *bin_info; arena_run_t *run = (arena_run_t *)((uintptr_t) chunk + (uintptr_t)(pageind << LG_PAGE)); - assert((mapelm->bits >> LG_PAGE) == 0); + assert(arena_mapbits_small_runind_get(chunk, + pageind) == 0); binind = arena_bin_index(arena, run->bin); bin_info = &arena_bin_info[binind]; pageind += bin_info->run_size >> LG_PAGE; @@ -669,7 +677,8 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) ql_foreach(mapelm, &mapelms, u.ql_link) { size_t pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) / sizeof(arena_chunk_map_t)) + map_bias; - size_t npages = mapelm->bits >> LG_PAGE; + size_t npages = arena_mapbits_large_size_get(chunk, pageind) >> + LG_PAGE; assert(pageind + npages <= chunk_npages); assert(ndirty >= npages); @@ -806,15 +815,11 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE); assert(run_ind >= map_bias); assert(run_ind < chunk_npages); - if ((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_LARGE) != 0) { - size = chunk->map[run_ind-map_bias].bits & ~PAGE_MASK; + if (arena_mapbits_large_get(chunk, run_ind) != 0) { + size = arena_mapbits_large_size_get(chunk, run_ind); assert(size == PAGE || - (chunk->map[run_ind+(size>>LG_PAGE)-1-map_bias].bits & - ~PAGE_MASK) == 0); - assert((chunk->map[run_ind+(size>>LG_PAGE)-1-map_bias].bits & - CHUNK_MAP_LARGE) != 0); - assert((chunk->map[run_ind+(size>>LG_PAGE)-1-map_bias].bits & - CHUNK_MAP_ALLOCATED) != 0); + arena_mapbits_large_size_get(chunk, + run_ind+(size>>LG_PAGE)-1) == 0); } else { size_t binind = arena_bin_index(arena, run->bin); arena_bin_info_t *bin_info = &arena_bin_info[binind]; @@ -837,7 +842,7 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) * The run is dirty if the caller claims to have dirtied it, as well as * if it was already dirty before being allocated. */ - if ((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY) != 0) + if (arena_mapbits_dirty_get(chunk, run_ind) != 0) dirty = true; flag_dirty = dirty ? CHUNK_MAP_DIRTY : 0; runs_avail = dirty ? &arena->runs_avail_dirty : @@ -845,58 +850,52 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) /* Mark pages as unallocated in the chunk map. */ if (dirty) { - chunk->map[run_ind-map_bias].bits = size | CHUNK_MAP_DIRTY; - chunk->map[run_ind+run_pages-1-map_bias].bits = size | - CHUNK_MAP_DIRTY; + arena_mapbits_unallocated_set(chunk, run_ind, size, + CHUNK_MAP_DIRTY); + arena_mapbits_unallocated_set(chunk, run_ind+run_pages-1, size, + CHUNK_MAP_DIRTY); chunk->ndirty += run_pages; arena->ndirty += run_pages; } else { - chunk->map[run_ind-map_bias].bits = size | - (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_UNZEROED); - chunk->map[run_ind+run_pages-1-map_bias].bits = size | - (chunk->map[run_ind+run_pages-1-map_bias].bits & - CHUNK_MAP_UNZEROED); + arena_mapbits_unallocated_set(chunk, run_ind, size, + arena_mapbits_unzeroed_get(chunk, run_ind)); + arena_mapbits_unallocated_set(chunk, run_ind+run_pages-1, size, + arena_mapbits_unzeroed_get(chunk, run_ind+run_pages-1)); } /* Try to coalesce forward. */ if (run_ind + run_pages < chunk_npages && - (chunk->map[run_ind+run_pages-map_bias].bits & CHUNK_MAP_ALLOCATED) - == 0 && (chunk->map[run_ind+run_pages-map_bias].bits & - CHUNK_MAP_DIRTY) == flag_dirty) { - size_t nrun_size = chunk->map[run_ind+run_pages-map_bias].bits & - ~PAGE_MASK; + arena_mapbits_allocated_get(chunk, run_ind+run_pages) == 0 && + arena_mapbits_dirty_get(chunk, run_ind+run_pages) == flag_dirty) { + size_t nrun_size = arena_mapbits_unallocated_size_get(chunk, + run_ind+run_pages); size_t nrun_pages = nrun_size >> LG_PAGE; /* * Remove successor from runs_avail; the coalesced run is * inserted later. */ - assert((chunk->map[run_ind+run_pages+nrun_pages-1-map_bias].bits - & ~PAGE_MASK) == nrun_size); - assert((chunk->map[run_ind+run_pages+nrun_pages-1-map_bias].bits - & CHUNK_MAP_ALLOCATED) == 0); - assert((chunk->map[run_ind+run_pages+nrun_pages-1-map_bias].bits - & CHUNK_MAP_DIRTY) == flag_dirty); + assert(arena_mapbits_unallocated_size_get(chunk, + run_ind+run_pages+nrun_pages-1) == nrun_size); + assert(arena_mapbits_dirty_get(chunk, + run_ind+run_pages+nrun_pages-1) == flag_dirty); arena_avail_tree_remove(runs_avail, - &chunk->map[run_ind+run_pages-map_bias]); + arena_mapp_get(chunk, run_ind+run_pages)); size += nrun_size; run_pages += nrun_pages; - chunk->map[run_ind-map_bias].bits = size | - (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_FLAGS_MASK); - chunk->map[run_ind+run_pages-1-map_bias].bits = size | - (chunk->map[run_ind+run_pages-1-map_bias].bits & - CHUNK_MAP_FLAGS_MASK); + arena_mapbits_unallocated_size_set(chunk, run_ind, size); + arena_mapbits_unallocated_size_set(chunk, run_ind+run_pages-1, + size); } /* Try to coalesce backward. */ - if (run_ind > map_bias && (chunk->map[run_ind-1-map_bias].bits & - CHUNK_MAP_ALLOCATED) == 0 && (chunk->map[run_ind-1-map_bias].bits & - CHUNK_MAP_DIRTY) == flag_dirty) { - size_t prun_size = chunk->map[run_ind-1-map_bias].bits & - ~PAGE_MASK; + if (run_ind > map_bias && arena_mapbits_allocated_get(chunk, run_ind-1) + == 0 && arena_mapbits_dirty_get(chunk, run_ind-1) == flag_dirty) { + size_t prun_size = arena_mapbits_unallocated_size_get(chunk, + run_ind-1); size_t prun_pages = prun_size >> LG_PAGE; run_ind -= prun_pages; @@ -905,31 +904,26 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) * Remove predecessor from runs_avail; the coalesced run is * inserted later. */ - assert((chunk->map[run_ind-map_bias].bits & ~PAGE_MASK) - == prun_size); - assert((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_ALLOCATED) - == 0); - assert((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY) - == flag_dirty); - arena_avail_tree_remove(runs_avail, - &chunk->map[run_ind-map_bias]); + assert(arena_mapbits_unallocated_size_get(chunk, run_ind) == + prun_size); + assert(arena_mapbits_dirty_get(chunk, run_ind) == flag_dirty); + arena_avail_tree_remove(runs_avail, arena_mapp_get(chunk, + run_ind)); size += prun_size; run_pages += prun_pages; - chunk->map[run_ind-map_bias].bits = size | - (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_FLAGS_MASK); - chunk->map[run_ind+run_pages-1-map_bias].bits = size | - (chunk->map[run_ind+run_pages-1-map_bias].bits & - CHUNK_MAP_FLAGS_MASK); + arena_mapbits_unallocated_size_set(chunk, run_ind, size); + arena_mapbits_unallocated_size_set(chunk, run_ind+run_pages-1, + size); } /* Insert into runs_avail, now that coalescing is complete. */ - assert((chunk->map[run_ind-map_bias].bits & ~PAGE_MASK) == - (chunk->map[run_ind+run_pages-1-map_bias].bits & ~PAGE_MASK)); - assert((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY) == - (chunk->map[run_ind+run_pages-1-map_bias].bits & CHUNK_MAP_DIRTY)); - arena_avail_tree_insert(runs_avail, &chunk->map[run_ind-map_bias]); + assert(arena_mapbits_unallocated_size_get(chunk, run_ind) == + arena_mapbits_unallocated_size_get(chunk, run_ind+run_pages-1)); + assert(arena_mapbits_dirty_get(chunk, run_ind) == + arena_mapbits_dirty_get(chunk, run_ind+run_pages-1)); + arena_avail_tree_insert(runs_avail, arena_mapp_get(chunk, run_ind)); if (dirty) { /* @@ -943,14 +937,15 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) } } - /* - * Deallocate chunk if it is now completely unused. The bit - * manipulation checks whether the first run is unallocated and extends - * to the end of the chunk. - */ - if ((chunk->map[0].bits & (~PAGE_MASK | CHUNK_MAP_ALLOCATED)) == - arena_maxclass) + /* Deallocate chunk if it is now completely unused. */ + if (size == arena_maxclass) { + assert(run_ind == map_bias); + assert(run_pages == (arena_maxclass >> LG_PAGE)); + assert(arena_mapbits_allocated_get(chunk, map_bias) == 0); + assert(arena_mapbits_unallocated_size_get(chunk, map_bias) == + arena_maxclass); arena_chunk_dealloc(arena, chunk); + } /* * It is okay to do dirty page processing here even if the chunk was @@ -969,7 +964,7 @@ arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, { size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE; size_t head_npages = (oldsize - newsize) >> LG_PAGE; - size_t flag_dirty = chunk->map[pageind-map_bias].bits & CHUNK_MAP_DIRTY; + size_t flag_dirty = arena_mapbits_dirty_get(chunk, pageind); assert(oldsize > newsize); @@ -978,29 +973,21 @@ arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, * leading run as separately allocated. Set the last element of each * run first, in case of single-page runs. */ - assert((chunk->map[pageind-map_bias].bits & CHUNK_MAP_LARGE) != 0); - assert((chunk->map[pageind-map_bias].bits & CHUNK_MAP_ALLOCATED) != 0); - chunk->map[pageind+head_npages-1-map_bias].bits = flag_dirty | - (chunk->map[pageind+head_npages-1-map_bias].bits & - CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; - chunk->map[pageind-map_bias].bits = (oldsize - newsize) - | flag_dirty | (chunk->map[pageind-map_bias].bits & - CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; + assert(arena_mapbits_large_size_get(chunk, pageind) == oldsize); + arena_mapbits_large_set(chunk, pageind+head_npages-1, 0, flag_dirty | + arena_mapbits_unzeroed_get(chunk, pageind+head_npages-1)); + arena_mapbits_large_set(chunk, pageind, oldsize-newsize, flag_dirty | + arena_mapbits_unzeroed_get(chunk, pageind)); if (config_debug) { UNUSED size_t tail_npages = newsize >> LG_PAGE; - assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias] - .bits & ~PAGE_MASK) == 0); - assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias] - .bits & CHUNK_MAP_DIRTY) == flag_dirty); - assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias] - .bits & CHUNK_MAP_LARGE) != 0); - assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias] - .bits & CHUNK_MAP_ALLOCATED) != 0); + assert(arena_mapbits_large_size_get(chunk, + pageind+head_npages+tail_npages-1) == 0); + assert(arena_mapbits_dirty_get(chunk, + pageind+head_npages+tail_npages-1) == flag_dirty); } - chunk->map[pageind+head_npages-map_bias].bits = newsize | flag_dirty | - (chunk->map[pageind+head_npages-map_bias].bits & - CHUNK_MAP_FLAGS_MASK) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; + arena_mapbits_large_set(chunk, pageind+head_npages, newsize, flag_dirty + | arena_mapbits_unzeroed_get(chunk, pageind+head_npages)); arena_run_dalloc(arena, run, false); } @@ -1011,9 +998,7 @@ arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, { size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE; size_t head_npages = newsize >> LG_PAGE; - size_t tail_npages = (oldsize - newsize) >> LG_PAGE; - size_t flag_dirty = chunk->map[pageind-map_bias].bits & - CHUNK_MAP_DIRTY; + size_t flag_dirty = arena_mapbits_dirty_get(chunk, pageind); assert(oldsize > newsize); @@ -1022,28 +1007,22 @@ arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, * trailing run as separately allocated. Set the last element of each * run first, in case of single-page runs. */ - assert((chunk->map[pageind-map_bias].bits & CHUNK_MAP_LARGE) != 0); - assert((chunk->map[pageind-map_bias].bits & CHUNK_MAP_ALLOCATED) != 0); - chunk->map[pageind+head_npages-1-map_bias].bits = flag_dirty | - (chunk->map[pageind+head_npages-1-map_bias].bits & - CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; - chunk->map[pageind-map_bias].bits = newsize | flag_dirty | - (chunk->map[pageind-map_bias].bits & CHUNK_MAP_UNZEROED) | - CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; + assert(arena_mapbits_large_size_get(chunk, pageind) == oldsize); + arena_mapbits_large_set(chunk, pageind+head_npages-1, 0, flag_dirty | + arena_mapbits_unzeroed_get(chunk, pageind+head_npages-1)); + arena_mapbits_large_set(chunk, pageind, newsize, flag_dirty | + arena_mapbits_unzeroed_get(chunk, pageind)); - assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias].bits & - ~PAGE_MASK) == 0); - assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias].bits & - CHUNK_MAP_LARGE) != 0); - assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias].bits & - CHUNK_MAP_ALLOCATED) != 0); - chunk->map[pageind+head_npages+tail_npages-1-map_bias].bits = - flag_dirty | - (chunk->map[pageind+head_npages+tail_npages-1-map_bias].bits & - CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; - chunk->map[pageind+head_npages-map_bias].bits = (oldsize - newsize) | - flag_dirty | (chunk->map[pageind+head_npages-map_bias].bits & - CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; + if (config_debug) { + UNUSED size_t tail_npages = (oldsize - newsize) >> LG_PAGE; + assert(arena_mapbits_large_size_get(chunk, + pageind+head_npages+tail_npages-1) == 0); + assert(arena_mapbits_dirty_get(chunk, + pageind+head_npages+tail_npages-1) == flag_dirty); + } + arena_mapbits_large_set(chunk, pageind+head_npages, oldsize-newsize, + flag_dirty | arena_mapbits_unzeroed_get(chunk, + pageind+head_npages)); arena_run_dalloc(arena, (arena_run_t *)((uintptr_t)run + newsize), dirty); @@ -1061,8 +1040,8 @@ arena_bin_runs_first(arena_bin_t *bin) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(mapelm); pageind = ((((uintptr_t)mapelm - (uintptr_t)chunk->map) / sizeof(arena_chunk_map_t))) + map_bias; - run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)((pageind - (mapelm->bits >> LG_PAGE)) << + run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - + arena_mapbits_small_runind_get(chunk, pageind)) << LG_PAGE)); return (run); } @@ -1075,7 +1054,7 @@ arena_bin_runs_insert(arena_bin_t *bin, arena_run_t *run) { arena_chunk_t *chunk = CHUNK_ADDR2BASE(run); size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE; - arena_chunk_map_t *mapelm = &chunk->map[pageind-map_bias]; + arena_chunk_map_t *mapelm = arena_mapp_get(chunk, pageind); assert(arena_run_tree_search(&bin->runs, mapelm) == NULL); @@ -1087,7 +1066,7 @@ arena_bin_runs_remove(arena_bin_t *bin, arena_run_t *run) { arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE; - arena_chunk_map_t *mapelm = &chunk->map[pageind-map_bias]; + arena_chunk_map_t *mapelm = arena_mapp_get(chunk, pageind); assert(arena_run_tree_search(&bin->runs, mapelm) != NULL); @@ -1126,7 +1105,7 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin) malloc_mutex_unlock(&bin->lock); /******************************/ malloc_mutex_lock(&arena->lock); - run = arena_run_alloc(arena, bin_info->run_size, false, false); + run = arena_run_alloc(arena, bin_info->run_size, false, binind, false); if (run != NULL) { bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run + (uintptr_t)bin_info->bitmap_offset); @@ -1384,7 +1363,7 @@ arena_malloc_large(arena_t *arena, size_t size, bool zero) /* Large allocation. */ size = PAGE_CEILING(size); malloc_mutex_lock(&arena->lock); - ret = (void *)arena_run_alloc(arena, size, true, zero); + ret = (void *)arena_run_alloc(arena, size, true, BININD_INVALID, zero); if (ret == NULL) { malloc_mutex_unlock(&arena->lock); return (NULL); @@ -1428,7 +1407,7 @@ arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero) alloc_size = size + alignment - PAGE; malloc_mutex_lock(&arena->lock); - run = arena_run_alloc(arena, alloc_size, true, zero); + run = arena_run_alloc(arena, alloc_size, true, BININD_INVALID, zero); if (run == NULL) { malloc_mutex_unlock(&arena->lock); return (NULL); @@ -1485,8 +1464,7 @@ arena_prof_promoted(const void *ptr, size_t size) pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; binind = SMALL_SIZE2BIN(size); assert(binind < NBINS); - chunk->map[pageind-map_bias].bits = (chunk->map[pageind-map_bias].bits & - ~CHUNK_MAP_CLASS_MASK) | ((binind+1) << CHUNK_MAP_CLASS_SHIFT); + arena_mapbits_large_binind_set(chunk, pageind, binind); assert(isalloc(ptr, false) == PAGE); assert(isalloc(ptr, true) == size); @@ -1524,8 +1502,9 @@ arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, size_t npages, run_ind, past; assert(run != bin->runcur); - assert(arena_run_tree_search(&bin->runs, &chunk->map[ - (((uintptr_t)run-(uintptr_t)chunk)>>LG_PAGE)-map_bias]) == NULL); + assert(arena_run_tree_search(&bin->runs, + arena_mapp_get(chunk, ((uintptr_t)run-(uintptr_t)chunk)>>LG_PAGE)) + == NULL); binind = arena_bin_index(chunk->arena, run->bin); bin_info = &arena_bin_info[binind]; @@ -1545,18 +1524,16 @@ arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, * trim the clean pages before deallocating the dirty portion of the * run. */ - if ((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY) == 0 && past - - run_ind < npages) { + if (arena_mapbits_dirty_get(chunk, run_ind) == 0 && past - run_ind < + npages) { /* * Trim clean pages. Convert to large run beforehand. Set the * last map element first, in case this is a one-page run. */ - chunk->map[run_ind+npages-1-map_bias].bits = CHUNK_MAP_LARGE | - (chunk->map[run_ind+npages-1-map_bias].bits & - CHUNK_MAP_FLAGS_MASK); - chunk->map[run_ind-map_bias].bits = bin_info->run_size | - CHUNK_MAP_LARGE | (chunk->map[run_ind-map_bias].bits & - CHUNK_MAP_FLAGS_MASK); + arena_mapbits_large_set(chunk, run_ind+npages-1, 0, + arena_mapbits_unzeroed_get(chunk, run_ind+npages-1)); + arena_mapbits_large_set(chunk, run_ind, bin_info->run_size, + arena_mapbits_unzeroed_get(chunk, run_ind)); arena_run_trim_tail(arena, chunk, run, (npages << LG_PAGE), ((past - run_ind) << LG_PAGE), false); /* npages = past - run_ind; */ @@ -1591,7 +1568,7 @@ arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, } void -arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, +arena_dalloc_bin_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr, arena_chunk_map_t *mapelm) { size_t pageind; @@ -1602,9 +1579,9 @@ arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - - (mapelm->bits >> LG_PAGE)) << LG_PAGE)); + arena_mapbits_small_runind_get(chunk, pageind)) << LG_PAGE)); bin = run->bin; - binind = arena_bin_index(arena, bin); + binind = arena_ptr_binind(ptr, mapelm->bits); bin_info = &arena_bin_info[binind]; if (config_fill || config_stats) size = bin_info->reg_size; @@ -1625,6 +1602,34 @@ arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, } } +void +arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, + size_t pageind, arena_chunk_map_t *mapelm) +{ + arena_run_t *run; + arena_bin_t *bin; + + run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - + arena_mapbits_small_runind_get(chunk, pageind)) << LG_PAGE)); + bin = run->bin; + malloc_mutex_lock(&bin->lock); + arena_dalloc_bin_locked(arena, chunk, ptr, mapelm); + malloc_mutex_unlock(&bin->lock); +} + +void +arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr, + size_t pageind) +{ + arena_chunk_map_t *mapelm; + + if (config_debug) { + assert(arena_ptr_binind(ptr, arena_mapbits_get(chunk, pageind)) + != BININD_INVALID); + } + mapelm = arena_mapp_get(chunk, pageind); + arena_dalloc_bin(arena, chunk, ptr, pageind, mapelm); +} void arena_stats_merge(arena_t *arena, size_t *nactive, size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats, @@ -1673,12 +1678,12 @@ arena_stats_merge(arena_t *arena, size_t *nactive, size_t *ndirty, } void -arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr) +arena_dalloc_large_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr) { if (config_fill || config_stats) { size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - size_t size = chunk->map[pageind-map_bias].bits & ~PAGE_MASK; + size_t size = arena_mapbits_large_size_get(chunk, pageind); if (config_fill && config_stats && opt_junk) memset(ptr, 0x5a, size); @@ -1693,6 +1698,15 @@ arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr) arena_run_dalloc(arena, (arena_run_t *)ptr, true); } +void +arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr) +{ + + malloc_mutex_lock(&arena->lock); + arena_dalloc_large_locked(arena, chunk, ptr); + malloc_mutex_unlock(&arena->lock); +} + static void arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr, size_t oldsize, size_t size) @@ -1731,16 +1745,15 @@ arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr, size_t npages = oldsize >> LG_PAGE; size_t followsize; - assert(oldsize == (chunk->map[pageind-map_bias].bits & ~PAGE_MASK)); + assert(oldsize == arena_mapbits_large_size_get(chunk, pageind)); /* Try to extend the run. */ assert(size + extra > oldsize); malloc_mutex_lock(&arena->lock); if (pageind + npages < chunk_npages && - (chunk->map[pageind+npages-map_bias].bits - & CHUNK_MAP_ALLOCATED) == 0 && (followsize = - chunk->map[pageind+npages-map_bias].bits & ~PAGE_MASK) >= size - - oldsize) { + arena_mapbits_allocated_get(chunk, pageind+npages) == 0 && + (followsize = arena_mapbits_unallocated_size_get(chunk, + pageind+npages)) >= size - oldsize) { /* * The next run is available and sufficiently large. Split the * following run, then merge the first part with the existing @@ -1750,7 +1763,8 @@ arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr, size_t splitsize = (oldsize + followsize <= size + extra) ? followsize : size + extra - oldsize; arena_run_split(arena, (arena_run_t *)((uintptr_t)chunk + - ((pageind+npages) << LG_PAGE)), splitsize, true, zero); + ((pageind+npages) << LG_PAGE)), splitsize, true, + BININD_INVALID, zero); size = oldsize + splitsize; npages = size >> LG_PAGE; @@ -1763,29 +1777,22 @@ arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr, * arena_run_dalloc() with the dirty argument set to false * (which is when dirty flag consistency would really matter). */ - flag_dirty = (chunk->map[pageind-map_bias].bits & - CHUNK_MAP_DIRTY) | - (chunk->map[pageind+npages-1-map_bias].bits & - CHUNK_MAP_DIRTY); - chunk->map[pageind-map_bias].bits = size | flag_dirty - | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; - chunk->map[pageind+npages-1-map_bias].bits = flag_dirty | - CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; + flag_dirty = arena_mapbits_dirty_get(chunk, pageind) | + arena_mapbits_dirty_get(chunk, pageind+npages-1); + arena_mapbits_large_set(chunk, pageind, size, flag_dirty); + arena_mapbits_large_set(chunk, pageind+npages-1, 0, flag_dirty); if (config_stats) { arena->stats.ndalloc_large++; arena->stats.allocated_large -= oldsize; - arena->stats.lstats[(oldsize >> LG_PAGE) - - 1].ndalloc++; - arena->stats.lstats[(oldsize >> LG_PAGE) - - 1].curruns--; + arena->stats.lstats[(oldsize >> LG_PAGE) - 1].ndalloc++; + arena->stats.lstats[(oldsize >> LG_PAGE) - 1].curruns--; arena->stats.nmalloc_large++; arena->stats.nrequests_large++; arena->stats.allocated_large += size; arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++; - arena->stats.lstats[(size >> LG_PAGE) - - 1].nrequests++; + arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++; arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++; } malloc_mutex_unlock(&arena->lock); diff --git a/src/tcache.c b/src/tcache.c index 9c4970c5..60244c45 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -24,6 +24,46 @@ size_t tcache_salloc(const void *ptr) return (arena_salloc(ptr, false)); } +void +tcache_event_hard(tcache_t *tcache) +{ + size_t binind = tcache->next_gc_bin; + tcache_bin_t *tbin = &tcache->tbins[binind]; + tcache_bin_info_t *tbin_info = &tcache_bin_info[binind]; + + if (tbin->low_water > 0) { + /* + * Flush (ceiling) 3/4 of the objects below the low water mark. + */ + if (binind < NBINS) { + tcache_bin_flush_small(tbin, binind, tbin->ncached - + tbin->low_water + (tbin->low_water >> 2), tcache); + } else { + tcache_bin_flush_large(tbin, binind, tbin->ncached - + tbin->low_water + (tbin->low_water >> 2), tcache); + } + /* + * Reduce fill count by 2X. Limit lg_fill_div such that the + * fill count is always at least 1. + */ + if ((tbin_info->ncached_max >> (tbin->lg_fill_div+1)) >= 1) + tbin->lg_fill_div++; + } else if (tbin->low_water < 0) { + /* + * Increase fill count by 2X. Make sure lg_fill_div stays + * greater than 0. + */ + if (tbin->lg_fill_div > 1) + tbin->lg_fill_div--; + } + tbin->low_water = tbin->ncached; + + tcache->next_gc_bin++; + if (tcache->next_gc_bin == nhbins) + tcache->next_gc_bin = 0; + tcache->ev_cnt = 0; +} + void * tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind) { @@ -80,12 +120,13 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem, size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; arena_chunk_map_t *mapelm = - &chunk->map[pageind-map_bias]; + arena_mapp_get(chunk, pageind); if (config_fill && opt_junk) { arena_alloc_junk_small(ptr, &arena_bin_info[binind], true); } - arena_dalloc_bin(arena, chunk, ptr, mapelm); + arena_dalloc_bin_locked(arena, chunk, ptr, + mapelm); } else { /* * This object was allocated via a different @@ -158,7 +199,7 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem, assert(ptr != NULL); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk->arena == arena) - arena_dalloc_large(arena, chunk, ptr); + arena_dalloc_large_locked(arena, chunk, ptr); else { /* * This object was allocated via a different @@ -314,22 +355,14 @@ tcache_destroy(tcache_t *tcache) arena_t *arena = chunk->arena; size_t pageind = ((uintptr_t)tcache - (uintptr_t)chunk) >> LG_PAGE; - arena_chunk_map_t *mapelm = &chunk->map[pageind-map_bias]; - arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)((pageind - (mapelm->bits >> LG_PAGE)) << - LG_PAGE)); - arena_bin_t *bin = run->bin; + arena_chunk_map_t *mapelm = arena_mapp_get(chunk, pageind); - malloc_mutex_lock(&bin->lock); - arena_dalloc_bin(arena, chunk, tcache, mapelm); - malloc_mutex_unlock(&bin->lock); + arena_dalloc_bin(arena, chunk, tcache, pageind, mapelm); } else if (tcache_size <= tcache_maxclass) { arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache); arena_t *arena = chunk->arena; - malloc_mutex_lock(&arena->lock); arena_dalloc_large(arena, chunk, tcache); - malloc_mutex_unlock(&arena->lock); } else idalloc(tcache); } diff --git a/src/tsd.c b/src/tsd.c index 00d7b45d..961a5463 100644 --- a/src/tsd.c +++ b/src/tsd.c @@ -14,7 +14,7 @@ malloc_tsd_malloc(size_t size) { /* Avoid choose_arena() in order to dodge bootstrapping issues. */ - return arena_malloc(arenas[0], size, false, false); + return (arena_malloc(arenas[0], size, false, false)); } void From 8d5865eb578e99369382d90bdd1e557e5b233277 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 2 May 2012 01:22:16 -0700 Subject: [PATCH 0214/3378] Make CACHELINE a raw constant. Make CACHELINE a raw constant in order to work around a __declspec(align()) limitation. Submitted by Mike Hommey. --- include/jemalloc/internal/jemalloc_internal.h.in | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index ccecfaa1..e441285b 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -307,9 +307,12 @@ static const bool config_ivsalloc = /* * Maximum size of L1 cache line. This is used to avoid cache line aliasing. * In addition, this controls the spacing of cacheline-spaced size classes. + * + * CACHELINE cannot be based on LG_CACHELINE because __declspec(align()) can + * only handle raw constants. */ #define LG_CACHELINE 6 -#define CACHELINE ((size_t)(1U << LG_CACHELINE)) +#define CACHELINE 64 #define CACHELINE_MASK (CACHELINE - 1) /* Return the smallest cacheline multiple that is >= s. */ From 889ec59bd3ae3190fb715e64d8d15b6a1b47314a Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 2 May 2012 02:08:03 -0700 Subject: [PATCH 0215/3378] Make malloc_write() non-inline. Make malloc_write() non-inline, in order to resolve its dependency on je_malloc_write(). --- include/jemalloc/internal/util.h | 12 +----------- src/util.c | 11 +++++++++++ 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h index fa88bf3f..84796936 100644 --- a/include/jemalloc/internal/util.h +++ b/include/jemalloc/internal/util.h @@ -84,6 +84,7 @@ int buferror(char *buf, size_t buflen); uintmax_t malloc_strtoumax(const char *nptr, char **endptr, int base); +void malloc_write(const char *s); /* * malloc_vsnprintf() supports a subset of snprintf(3) that avoids floating @@ -130,17 +131,6 @@ pow2_ceil(size_t x) return (x); } -/* - * Wrapper around malloc_message() that avoids the need for - * je_malloc_message(...) throughout the code. - */ -JEMALLOC_INLINE void -malloc_write(const char *s) -{ - - je_malloc_message(NULL, s); -} - /* Sets error code */ JEMALLOC_INLINE void set_errno(int errnum) diff --git a/src/util.c b/src/util.c index 3c92ad2c..4f716957 100644 --- a/src/util.c +++ b/src/util.c @@ -59,6 +59,17 @@ wrtmessage(void *cbopaque, const char *s) JEMALLOC_EXPORT void (*je_malloc_message)(void *, const char *s) = wrtmessage; +/* + * Wrapper around malloc_message() that avoids the need for + * je_malloc_message(...) throughout the code. + */ +void +malloc_write(const char *s) +{ + + je_malloc_message(NULL, s); +} + /* * glibc provides a non-standard strerror_r() when _GNU_SOURCE is defined, so * provide a wrapper. From 9a7944f8aba8806c2806c7bb4d61b1b6df59f044 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 2 May 2012 02:16:51 -0700 Subject: [PATCH 0216/3378] Update private namespace mangling. --- include/jemalloc/internal/private_namespace.h | 25 +++++++++---------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/include/jemalloc/internal/private_namespace.h b/include/jemalloc/internal/private_namespace.h index 6ac07631..e599d64e 100644 --- a/include/jemalloc/internal/private_namespace.h +++ b/include/jemalloc/internal/private_namespace.h @@ -15,21 +15,21 @@ #define arena_malloc JEMALLOC_N(arena_malloc) #define arena_malloc_large JEMALLOC_N(arena_malloc_large) #define arena_malloc_small JEMALLOC_N(arena_malloc_small) -#define arena_mapbitsp_get JEMALLOC_N(arena_mapbitsp_get) +#define arena_mapbits_allocated_get JEMALLOC_N(arena_mapbits_allocated_get) +#define arena_mapbits_dirty_get JEMALLOC_N(arena_mapbits_dirty_get) #define arena_mapbits_get JEMALLOC_N(arena_mapbits_get) -#define arena_mapbits_unallocated_size_get JEMALLOC_N(arena_mapbits_unallocated_size_get) +#define arena_mapbits_large_binind_set JEMALLOC_N(arena_mapbits_large_binind_set) +#define arena_mapbits_large_get JEMALLOC_N(arena_mapbits_large_get) +#define arena_mapbits_large_set JEMALLOC_N(arena_mapbits_large_set) #define arena_mapbits_large_size_get JEMALLOC_N(arena_mapbits_large_size_get) #define arena_mapbits_small_runind_get JEMALLOC_N(arena_mapbits_small_runind_get) -#define arena_mapbits_dirty_get JEMALLOC_N(arena_mapbits_dirty_get) -#define arena_mapbits_unzeroed_get JEMALLOC_N(arena_mapbits_unzeroed_get) -#define arena_mapbits_large_get JEMALLOC_N(arena_mapbits_large_get) -#define arena_mapbits_allocated_get JEMALLOC_N(arena_mapbits_allocated_get) -#define arena_mapbits_unallocated_set JEMALLOC_N(arena_mapbits_unallocated_set) -#define arena_mapbits_unallocated_size_set JEMALLOC_N(arena_mapbits_unallocated_size_set) -#define arena_mapbits_large_set JEMALLOC_N(arena_mapbits_large_set) -#define arena_mapbits_large_binind_set JEMALLOC_N(arena_mapbits_large_binind_set) #define arena_mapbits_small_set JEMALLOC_N(arena_mapbits_small_set) +#define arena_mapbits_unallocated_set JEMALLOC_N(arena_mapbits_unallocated_set) +#define arena_mapbits_unallocated_size_get JEMALLOC_N(arena_mapbits_unallocated_size_get) +#define arena_mapbits_unallocated_size_set JEMALLOC_N(arena_mapbits_unallocated_size_set) +#define arena_mapbits_unzeroed_get JEMALLOC_N(arena_mapbits_unzeroed_get) #define arena_mapbits_unzeroed_set JEMALLOC_N(arena_mapbits_unzeroed_set) +#define arena_mapbitsp_get JEMALLOC_N(arena_mapbitsp_get) #define arena_mapp_get JEMALLOC_N(arena_mapp_get) #define arena_maxclass JEMALLOC_N(arena_maxclass) #define arena_new JEMALLOC_N(arena_new) @@ -253,7 +253,6 @@ #define prof_lookup JEMALLOC_N(prof_lookup) #define prof_malloc JEMALLOC_N(prof_malloc) #define prof_mdump JEMALLOC_N(prof_mdump) -#define prof_lookup JEMALLOC_N(prof_lookup) #define prof_promote JEMALLOC_N(prof_promote) #define prof_realloc JEMALLOC_N(prof_realloc) #define prof_sample_accum_update JEMALLOC_N(prof_sample_accum_update) @@ -317,12 +316,12 @@ #define tcache_enabled_tsd_set JEMALLOC_N(tcache_enabled_tsd_set) #define tcache_event JEMALLOC_N(tcache_event) #define tcache_event_hard JEMALLOC_N(tcache_event_hard) -#define tcache_initialized JEMALLOC_N(tcache_initialized) #define tcache_flush JEMALLOC_N(tcache_flush) #define tcache_get JEMALLOC_N(tcache_get) +#define tcache_initialized JEMALLOC_N(tcache_initialized) #define tcache_maxclass JEMALLOC_N(tcache_maxclass) -#define tcache_stats_merge JEMALLOC_N(tcache_stats_merge) #define tcache_salloc JEMALLOC_N(tcache_salloc) +#define tcache_stats_merge JEMALLOC_N(tcache_stats_merge) #define tcache_thread_cleanup JEMALLOC_N(tcache_thread_cleanup) #define tcache_tls JEMALLOC_N(tcache_tls) #define tcache_tsd_boot JEMALLOC_N(tcache_tsd_boot) From 1b523da21c8aded36dbe669a9e9ca78c3c96cad7 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 2 May 2012 03:02:53 -0700 Subject: [PATCH 0217/3378] Fix partial rename of s/EXPORT/JEMALLOC_EXPORT/g. --- include/jemalloc/jemalloc_defs.h.in | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index e61597c0..c9ab6468 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -111,11 +111,11 @@ # define JEMALLOC_NOINLINE JEMALLOC_ATTR(noinline) #elif _MSC_VER # define JEMALLOC_ATTR(s) -#ifdef DLLEXPORT -# define EXPORT __declspec(dllexport) -#else -# define EXPORT __declspec(dllimport) -#endif +# ifdef DLLEXPORT +# define JEMALLOC_EXPORT __declspec(dllexport) +# else +# define JEMALLOC_EXPORT __declspec(dllimport) +# endif # define JEMALLOC_ALIGNED(s) __declspec(align(s)) # define JEMALLOC_SECTION(s) __declspec(allocate(s)) # define JEMALLOC_NOINLINE __declspec(noinline) From 7bfecf412dab69e771c9bbbaa01160bb69af8ec0 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Mon, 30 Apr 2012 15:15:15 +0200 Subject: [PATCH 0218/3378] Check for VALGRIND_RESIZEINPLACE_BLOCK support VALGRIND_RESIZEINPLACE_BLOCK was added in valgrind 3.7. Unfortunately, the __VALGRIND_MINOR__ macro still says 6 in 3.7 :( --- configure.ac | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/configure.ac b/configure.ac index 50eaae65..eeb2a290 100644 --- a/configure.ac +++ b/configure.ac @@ -841,10 +841,7 @@ if test "x$enable_valgrind" = "x1" ; then #include #include -#if defined(__VALGRIND_MAJOR__) && defined(__VALGRIND_MINOR__) \ - && (__VALGRIND_MAJOR__ > 3 || (__VALGRIND_MAJOR__ == 3 && \ - __VALGRIND_MINOR__ >= 6)) -#else +#if !defined(VALGRIND_RESIZEINPLACE_BLOCK) # error "Incompatible Valgrind version" #endif ], [], [je_cv_valgrind]) From 80737c3323dabc45232affcaeb99ac2bad6ea647 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 2 May 2012 16:11:03 -0700 Subject: [PATCH 0219/3378] Further optimize and harden arena_salloc(). Further optimize arena_salloc() to only look at the binind chunk map bits in the common case. Add more sanity checks to arena_salloc() that detect chunk map inconsistencies for large allocations (whether due to allocator bugs or application bugs). --- include/jemalloc/internal/arena.h | 98 +++++++++++++------ include/jemalloc/internal/private_namespace.h | 3 +- src/arena.c | 9 +- 3 files changed, 73 insertions(+), 37 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 067b75a8..264b5d3a 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -447,6 +447,7 @@ size_t arena_mapbits_unallocated_size_get(arena_chunk_t *chunk, size_t pageind); size_t arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind); size_t arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind); size_t arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind); size_t arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind); size_t arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind); @@ -463,7 +464,7 @@ void arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, size_t runind, size_t binind, size_t flags); void arena_mapbits_unzeroed_set(arena_chunk_t *chunk, size_t pageind, size_t unzeroed); -size_t arena_ptr_binind(const void *ptr, size_t mapbits); +size_t arena_ptr_small_binind_get(const void *ptr, size_t mapbits); size_t arena_bin_index(arena_t *arena, arena_bin_t *bin); unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr); @@ -533,6 +534,18 @@ arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind) return (mapbits >> LG_PAGE); } +JEMALLOC_INLINE size_t +arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind) +{ + size_t mapbits; + size_t binind; + + mapbits = arena_mapbits_get(chunk, pageind); + binind = (mapbits & CHUNK_MAP_BININD_MASK) >> CHUNK_MAP_BININD_SHIFT; + assert(binind < NBINS || binind == BININD_INVALID); + return (binind); +} + JEMALLOC_INLINE size_t arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind) { @@ -644,25 +657,37 @@ arena_mapbits_unzeroed_set(arena_chunk_t *chunk, size_t pageind, } JEMALLOC_INLINE size_t -arena_ptr_binind(const void *ptr, size_t mapbits) +arena_ptr_small_binind_get(const void *ptr, size_t mapbits) { size_t binind; binind = (mapbits & CHUNK_MAP_BININD_MASK) >> CHUNK_MAP_BININD_SHIFT; if (config_debug) { - arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - arena_t *arena = chunk->arena; - size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - size_t actual_mapbits = arena_mapbits_get(chunk, pageind); - arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)((pageind - (actual_mapbits >> LG_PAGE)) << - LG_PAGE)); arena_bin_t *bin = run->bin; - size_t actual_binind = bin - arena->bins; - arena_bin_info_t *bin_info = &arena_bin_info[actual_binind]; + arena_chunk_t *chunk; + arena_t *arena; + size_t pageind; + size_t actual_mapbits; + arena_run_t *run; + arena_bin_t *bin; + size_t actual_binind; + arena_bin_info_t *bin_info; + assert(binind != BININD_INVALID); + assert(binind < NBINS); + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + arena = chunk->arena; + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; + actual_mapbits = arena_mapbits_get(chunk, pageind); assert(mapbits == actual_mapbits); + assert(arena_mapbits_large_get(chunk, pageind) == 0); + assert(arena_mapbits_allocated_get(chunk, pageind) != 0); + run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - + (actual_mapbits >> LG_PAGE)) << LG_PAGE)); + bin = run->bin; + actual_binind = bin - arena->bins; assert(binind == actual_binind); + bin_info = &arena_bin_info[actual_binind]; assert(((uintptr_t)ptr - ((uintptr_t)run + (uintptr_t)bin_info->reg0_offset)) % bin_info->reg_interval == 0); @@ -775,7 +800,8 @@ arena_prof_ctx_get(const void *ptr) arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapbits >> LG_PAGE)) << LG_PAGE)); - size_t binind = arena_ptr_binind(ptr, mapbits); + size_t binind = arena_ptr_small_binind_get(ptr, + mapbits); arena_bin_info_t *bin_info = &arena_bin_info[binind]; unsigned regind; @@ -813,7 +839,7 @@ arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) arena_bin_info_t *bin_info; unsigned regind; - binind = arena_ptr_binind(ptr, mapbits); + binind = arena_ptr_small_binind_get(ptr, mapbits); bin_info = &arena_bin_info[binind]; regind = arena_run_regind(run, bin_info, ptr); @@ -861,7 +887,7 @@ arena_salloc(const void *ptr, bool demote) { size_t ret; arena_chunk_t *chunk; - size_t pageind, mapbits; + size_t pageind, binind; assert(ptr != NULL); assert(CHUNK_ADDR2BASE(ptr) != ptr); @@ -869,25 +895,34 @@ arena_salloc(const void *ptr, bool demote) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; assert(arena_mapbits_allocated_get(chunk, pageind) != 0); - mapbits = arena_mapbits_get(chunk, pageind); - if ((mapbits & CHUNK_MAP_LARGE) == 0) { - size_t binind = arena_ptr_binind(ptr, mapbits); - ret = arena_bin_info[binind].reg_size; - } else { + binind = arena_mapbits_binind_get(chunk, pageind); + if (binind == BININD_INVALID || (config_prof && demote == false && + prof_promote && arena_mapbits_large_get(chunk, pageind) != 0)) { + /* + * Large allocation. In the common case (demote == true), and + * as this is an inline function, most callers will only end up + * looking at binind to determine that ptr is a small + * allocation. + */ assert(((uintptr_t)ptr & PAGE_MASK) == 0); ret = arena_mapbits_large_size_get(chunk, pageind); - if (config_prof && demote && prof_promote && ret == PAGE && - (mapbits & CHUNK_MAP_BININD_MASK) != - CHUNK_MAP_BININD_MASK) { - size_t binind = ((mapbits & CHUNK_MAP_BININD_MASK) >> - CHUNK_MAP_BININD_SHIFT); - assert(binind < NBINS); - ret = arena_bin_info[binind].reg_size; - } else { - assert(demote == false || (mapbits & - CHUNK_MAP_BININD_MASK) == CHUNK_MAP_BININD_MASK); - } assert(ret != 0); + assert(pageind + (ret>>LG_PAGE) <= chunk_npages); + assert(ret == PAGE || arena_mapbits_large_size_get(chunk, + pageind+(ret>>LG_PAGE)-1) == 0); + assert(binind == arena_mapbits_binind_get(chunk, + pageind+(ret>>LG_PAGE)-1)); + assert(arena_mapbits_dirty_get(chunk, pageind) == + arena_mapbits_dirty_get(chunk, pageind+(ret>>LG_PAGE)-1)); + } else { + /* + * Small allocation (possibly promoted to a large object due to + * prof_promote). + */ + assert(arena_mapbits_large_get(chunk, pageind) != 0 || + arena_ptr_small_binind_get(ptr, arena_mapbits_get(chunk, + pageind)) == binind); + ret = arena_bin_info[binind].reg_size; } return (ret); @@ -912,8 +947,7 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, bool try_tcache) if (try_tcache && (tcache = tcache_get(false)) != NULL) { size_t binind; - binind = arena_ptr_binind(ptr, mapbits); - assert(binind < NBINS); + binind = arena_ptr_small_binind_get(ptr, mapbits); tcache_dalloc_small(tcache, ptr, binind); } else arena_dalloc_small(arena, chunk, ptr, pageind); diff --git a/include/jemalloc/internal/private_namespace.h b/include/jemalloc/internal/private_namespace.h index e599d64e..b8166470 100644 --- a/include/jemalloc/internal/private_namespace.h +++ b/include/jemalloc/internal/private_namespace.h @@ -16,6 +16,7 @@ #define arena_malloc_large JEMALLOC_N(arena_malloc_large) #define arena_malloc_small JEMALLOC_N(arena_malloc_small) #define arena_mapbits_allocated_get JEMALLOC_N(arena_mapbits_allocated_get) +#define arena_mapbits_binind_get JEMALLOC_N(arena_mapbits_binind_get) #define arena_mapbits_dirty_get JEMALLOC_N(arena_mapbits_dirty_get) #define arena_mapbits_get JEMALLOC_N(arena_mapbits_get) #define arena_mapbits_large_binind_set JEMALLOC_N(arena_mapbits_large_binind_set) @@ -41,7 +42,7 @@ #define arena_prof_ctx_get JEMALLOC_N(arena_prof_ctx_get) #define arena_prof_ctx_set JEMALLOC_N(arena_prof_ctx_set) #define arena_prof_promoted JEMALLOC_N(arena_prof_promoted) -#define arena_ptr_binind JEMALLOC_N(arena_ptr_binind) +#define arena_ptr_small_binind_get JEMALLOC_N(arena_ptr_small_binind_get) #define arena_purge_all JEMALLOC_N(arena_purge_all) #define arena_ralloc JEMALLOC_N(arena_ralloc) #define arena_ralloc_no_move JEMALLOC_N(arena_ralloc_no_move) diff --git a/src/arena.c b/src/arena.c index 3ef4b8fb..9f24e7ca 100644 --- a/src/arena.c +++ b/src/arena.c @@ -154,7 +154,7 @@ arena_run_reg_dalloc(arena_run_t *run, void *ptr) arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; size_t mapbits = arena_mapbits_get(chunk, pageind); - size_t binind = arena_ptr_binind(ptr, mapbits); + size_t binind = arena_ptr_small_binind_get(ptr, mapbits); arena_bin_info_t *bin_info = &arena_bin_info[binind]; unsigned regind = arena_run_regind(run, bin_info, ptr); bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run + @@ -1581,7 +1581,7 @@ arena_dalloc_bin_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr, run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - arena_mapbits_small_runind_get(chunk, pageind)) << LG_PAGE)); bin = run->bin; - binind = arena_ptr_binind(ptr, mapelm->bits); + binind = arena_ptr_small_binind_get(ptr, mapelm->bits); bin_info = &arena_bin_info[binind]; if (config_fill || config_stats) size = bin_info->reg_size; @@ -1624,8 +1624,9 @@ arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr, arena_chunk_map_t *mapelm; if (config_debug) { - assert(arena_ptr_binind(ptr, arena_mapbits_get(chunk, pageind)) - != BININD_INVALID); + /* arena_ptr_small_binind_get() does extra sanity checking. */ + assert(arena_ptr_small_binind_get(ptr, arena_mapbits_get(chunk, + pageind)) != BININD_INVALID); } mapelm = arena_mapp_get(chunk, pageind); arena_dalloc_bin(arena, chunk, ptr, pageind, mapelm); From 3597e91482c804592105ea078a0825fdb7c68dff Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Wed, 2 May 2012 13:15:00 +0200 Subject: [PATCH 0220/3378] Allow je_malloc_message to be overridden when linking statically If an application wants to override je_malloc_message, it is better to define the symbol locally than to change its value in main(), which might be too late for various reasons. Due to je_malloc_message being initialized in util.c, statically linking jemalloc with an application defining je_malloc_message fails due to "multiple definition of" the symbol. Defining it without a value (like je_malloc_conf) makes it more easily overridable. --- src/stats.c | 22 +++++++--------------- src/util.c | 11 +++++++---- 2 files changed, 14 insertions(+), 19 deletions(-) diff --git a/src/stats.c b/src/stats.c index 2854b309..1234e565 100644 --- a/src/stats.c +++ b/src/stats.c @@ -295,16 +295,6 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, abort(); } - if (write_cb == NULL) { - /* - * The caller did not provide an alternate write_cb callback - * function, so use the default one. malloc_write() is an - * inline function, so use malloc_message() directly here. - */ - write_cb = je_malloc_message; - cbopaque = NULL; - } - if (opts != NULL) { unsigned i; @@ -330,7 +320,8 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, } } - write_cb(cbopaque, "___ Begin jemalloc statistics ___\n"); + malloc_cprintf(write_cb, cbopaque, + "___ Begin jemalloc statistics ___\n"); if (general) { int err; const char *cpv; @@ -375,7 +366,8 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, " opt."#n": \"%s\"\n", cpv); \ } - write_cb(cbopaque, "Run-time option settings:\n"); + malloc_cprintf(write_cb, cbopaque, + "Run-time option settings:\n"); OPT_WRITE_BOOL(abort) OPT_WRITE_SIZE_T(lg_chunk) OPT_WRITE_SIZE_T(narenas) @@ -425,7 +417,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, "Min active:dirty page ratio per arena: %u:1\n", (1U << ssv)); } else { - write_cb(cbopaque, + malloc_cprintf(write_cb, cbopaque, "Min active:dirty page ratio per arena: N/A\n"); } if ((err = je_mallctl("arenas.tcache_max", &sv, &ssz, NULL, 0)) @@ -447,7 +439,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, " (2^%zd)\n", (((uint64_t)1U) << ssv), ssv); } else { - write_cb(cbopaque, + malloc_cprintf(write_cb, cbopaque, "Average profile dump interval: N/A\n"); } } @@ -547,5 +539,5 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, } } } - write_cb(cbopaque, "--- End jemalloc statistics ---\n"); + malloc_cprintf(write_cb, cbopaque, "--- End jemalloc statistics ---\n"); } diff --git a/src/util.c b/src/util.c index 4f716957..9b73c3ec 100644 --- a/src/util.c +++ b/src/util.c @@ -56,8 +56,7 @@ wrtmessage(void *cbopaque, const char *s) #endif } -JEMALLOC_EXPORT void (*je_malloc_message)(void *, const char *s) = - wrtmessage; +JEMALLOC_EXPORT void (*je_malloc_message)(void *, const char *s); /* * Wrapper around malloc_message() that avoids the need for @@ -67,7 +66,10 @@ void malloc_write(const char *s) { - je_malloc_message(NULL, s); + if (je_malloc_message != NULL) + je_malloc_message(NULL, s); + else + wrtmessage(NULL, s); } /* @@ -606,7 +608,8 @@ malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque, * function, so use the default one. malloc_write() is an * inline function, so use malloc_message() directly here. */ - write_cb = je_malloc_message; + write_cb = (je_malloc_message != NULL) ? je_malloc_message : + wrtmessage; cbopaque = NULL; } From 79c4bca7d10e967ff524f17a6990e5c630116198 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Wed, 2 May 2012 21:30:51 +0200 Subject: [PATCH 0221/3378] Use "hardcoded" dependencies when compiler doesn't support -MM --- Makefile.in | 14 ++++++++++++++ configure.ac | 3 +++ 2 files changed, 17 insertions(+) diff --git a/Makefile.in b/Makefile.in index 94f48699..6675b596 100644 --- a/Makefile.in +++ b/Makefile.in @@ -54,6 +54,7 @@ PIC_CFLAGS = @PIC_CFLAGS@ CTARGET = @CTARGET@ LDTARGET = @LDTARGET@ MKLIB = @MKLIB@ +CC_MM = @CC_MM@ ifeq (macho, $(ABI)) TEST_LIBRARY_PATH := DYLD_FALLBACK_LIBRARY_PATH="$(objroot)lib" @@ -136,9 +137,11 @@ build_doc: $(DOCS) # # Include generated dependency files. # +ifdef CC_MM -include $(COBJS:%.$(O)=%.d) -include $(CPICOBJS:%.$(O)=%.d) -include $(CTESTOBJS:%.$(O)=%.d) +endif $(COBJS): $(objroot)src/%.$(O): $(srcroot)src/%.c $(CPICOBJS): $(objroot)src/%.pic.$(O): $(srcroot)src/%.c @@ -149,10 +152,21 @@ ifneq ($(IMPORTLIB),$(SO)) $(COBJS): CPPFLAGS += -DDLLEXPORT endif +ifndef CC_MM +# Dependencies +HEADER_DIRS = $(srcroot)include/jemalloc/internal \ + $(objroot)include/jemalloc $(objroot)include/jemalloc/internal +HEADERS = $(wildcard $(foreach dir,$(HEADER_DIRS),$(dir)/*.h)) +$(COBJS) $(CPICOBJS) $(CTESTOBJS): $(HEADERS) +$(CTESTOBJS): $(objroot)test/jemalloc_test.h +endif + $(COBJS) $(CPICOBJS) $(CTESTOBJS): %.$(O): @mkdir -p $(@D) $(CC) $(CFLAGS) -c $(CPPFLAGS) $(CTARGET) $< +ifdef CC_MM @$(CC) -MM $(CPPFLAGS) -MT $@ -o $(@:%.$(O)=%.d) $< +endif ifneq ($(SOREV),$(SO)) %.$(SO) : %.$(SOREV) diff --git a/configure.ac b/configure.ac index eeb2a290..3b32b884 100644 --- a/configure.ac +++ b/configure.ac @@ -227,6 +227,7 @@ CTARGET='-o $@' LDTARGET='-o $@' EXTRA_LDFLAGS= MKLIB='ar crus $@' +CC_MM=1 dnl Platform-specific settings. abi and RPATH can probably be determined dnl programmatically, but doing so is error-prone, which makes it generally @@ -307,6 +308,7 @@ case "${host}" in CTARGET='-Fo$@' LDTARGET='-Fe$@' MKLIB='lib -nologo -out:$@' + CC_MM= else importlib="${so}" DSO_LDFLAGS="-shared" @@ -337,6 +339,7 @@ AC_SUBST([PIC_CFLAGS]) AC_SUBST([CTARGET]) AC_SUBST([LDTARGET]) AC_SUBST([MKLIB]) +AC_SUBST([CC_MM]) if test "x$abi" != "xpecoff"; then dnl Heap profiling uses the log(3) function. From 1d01206bbc07a87ed8bf62d4d854ff3ff7dcae9a Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Wed, 2 May 2012 21:30:52 +0200 Subject: [PATCH 0222/3378] Use "standard" printf prefixes instead of MSVC ones in inttypes.h We don't use MSVC's printf, but ours, and it doesn't support the I32 and I64 prefixes. --- include/msvc_compat/inttypes.h | 104 ++++++++++++++++++--------------- 1 file changed, 56 insertions(+), 48 deletions(-) diff --git a/include/msvc_compat/inttypes.h b/include/msvc_compat/inttypes.h index 4b3828a2..a4e6b75c 100644 --- a/include/msvc_compat/inttypes.h +++ b/include/msvc_compat/inttypes.h @@ -53,6 +53,14 @@ typedef struct { #if !defined(__cplusplus) || defined(__STDC_FORMAT_MACROS) // [ See footnote 185 at page 198 +#ifdef _WIN64 +# define __PRI64_PREFIX "l" +# define __PRIPTR_PREFIX "l" +#else +# define __PRI64_PREFIX "ll" +# define __PRIPTR_PREFIX +#endif + // The fprintf macros for signed integers are: #define PRId8 "d" #define PRIi8 "i" @@ -68,25 +76,25 @@ typedef struct { #define PRIdFAST16 "hd" #define PRIiFAST16 "hi" -#define PRId32 "I32d" -#define PRIi32 "I32i" -#define PRIdLEAST32 "I32d" -#define PRIiLEAST32 "I32i" -#define PRIdFAST32 "I32d" -#define PRIiFAST32 "I32i" +#define PRId32 "d" +#define PRIi32 "i" +#define PRIdLEAST32 "d" +#define PRIiLEAST32 "i" +#define PRIdFAST32 "d" +#define PRIiFAST32 "i" -#define PRId64 "I64d" -#define PRIi64 "I64i" -#define PRIdLEAST64 "I64d" -#define PRIiLEAST64 "I64i" -#define PRIdFAST64 "I64d" -#define PRIiFAST64 "I64i" +#define PRId64 __PRI64_PREFIX "d" +#define PRIi64 __PRI64_PREFIX "i" +#define PRIdLEAST64 __PRI64_PREFIX "d" +#define PRIiLEAST64 __PRI64_PREFIX "i" +#define PRIdFAST64 __PRI64_PREFIX "d" +#define PRIiFAST64 __PRI64_PREFIX "i" -#define PRIdMAX "I64d" -#define PRIiMAX "I64i" +#define PRIdMAX __PRI64_PREFIX "d" +#define PRIiMAX __PRI64_PREFIX "i" -#define PRIdPTR "Id" -#define PRIiPTR "Ii" +#define PRIdPTR __PRIPTR_PREFIX "d" +#define PRIiPTR __PRIPTR_PREFIX "i" // The fprintf macros for unsigned integers are: #define PRIo8 "o" @@ -115,41 +123,41 @@ typedef struct { #define PRIxFAST16 "hx" #define PRIXFAST16 "hX" -#define PRIo32 "I32o" -#define PRIu32 "I32u" -#define PRIx32 "I32x" -#define PRIX32 "I32X" -#define PRIoLEAST32 "I32o" -#define PRIuLEAST32 "I32u" -#define PRIxLEAST32 "I32x" -#define PRIXLEAST32 "I32X" -#define PRIoFAST32 "I32o" -#define PRIuFAST32 "I32u" -#define PRIxFAST32 "I32x" -#define PRIXFAST32 "I32X" +#define PRIo32 "o" +#define PRIu32 "u" +#define PRIx32 "x" +#define PRIX32 "X" +#define PRIoLEAST32 "o" +#define PRIuLEAST32 "u" +#define PRIxLEAST32 "x" +#define PRIXLEAST32 "X" +#define PRIoFAST32 "o" +#define PRIuFAST32 "u" +#define PRIxFAST32 "x" +#define PRIXFAST32 "X" -#define PRIo64 "I64o" -#define PRIu64 "I64u" -#define PRIx64 "I64x" -#define PRIX64 "I64X" -#define PRIoLEAST64 "I64o" -#define PRIuLEAST64 "I64u" -#define PRIxLEAST64 "I64x" -#define PRIXLEAST64 "I64X" -#define PRIoFAST64 "I64o" -#define PRIuFAST64 "I64u" -#define PRIxFAST64 "I64x" -#define PRIXFAST64 "I64X" +#define PRIo64 __PRI64_PREFIX "o" +#define PRIu64 __PRI64_PREFIX "u" +#define PRIx64 __PRI64_PREFIX "x" +#define PRIX64 __PRI64_PREFIX "X" +#define PRIoLEAST64 __PRI64_PREFIX "o" +#define PRIuLEAST64 __PRI64_PREFIX "u" +#define PRIxLEAST64 __PRI64_PREFIX "x" +#define PRIXLEAST64 __PRI64_PREFIX "X" +#define PRIoFAST64 __PRI64_PREFIX "o" +#define PRIuFAST64 __PRI64_PREFIX "u" +#define PRIxFAST64 __PRI64_PREFIX "x" +#define PRIXFAST64 __PRI64_PREFIX "X" -#define PRIoMAX "I64o" -#define PRIuMAX "I64u" -#define PRIxMAX "I64x" -#define PRIXMAX "I64X" +#define PRIoMAX __PRI64_PREFIX "o" +#define PRIuMAX __PRI64_PREFIX "u" +#define PRIxMAX __PRI64_PREFIX "x" +#define PRIXMAX __PRI64_PREFIX "X" -#define PRIoPTR "Io" -#define PRIuPTR "Iu" -#define PRIxPTR "Ix" -#define PRIXPTR "IX" +#define PRIoPTR __PRIPTR_PREFIX "o" +#define PRIuPTR __PRIPTR_PREFIX "u" +#define PRIxPTR __PRIPTR_PREFIX "x" +#define PRIXPTR __PRIPTR_PREFIX "X" // The fscanf macros for signed integers are: #define SCNd8 "d" From c584fc75bb57ba275605c66522cbae45098d56f0 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Wed, 2 May 2012 21:30:53 +0200 Subject: [PATCH 0223/3378] Don't use sizeof() on a VARIABLE_ARRAY In the alloca() case, this fails to be the right size. --- src/stats.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/stats.c b/src/stats.c index 1234e565..433b80d1 100644 --- a/src/stats.c +++ b/src/stats.c @@ -494,7 +494,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, size_t isz; unsigned i, ninitialized; - isz = sizeof(initialized); + isz = sizeof(bool) * narenas; xmallctl("arenas.initialized", initialized, &isz, NULL, 0); for (i = ninitialized = 0; i < narenas; i++) { @@ -523,7 +523,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, size_t isz; unsigned i; - isz = sizeof(initialized); + isz = sizeof(bool) * narenas; xmallctl("arenas.initialized", initialized, &isz, NULL, 0); From 34a8cf6c4029c09e1db776b7027a9c1b31f9e5b4 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 2 May 2012 20:41:42 -0700 Subject: [PATCH 0224/3378] Fix a base allocator deadlock. Fix a base allocator deadlock due to chunk_recycle() calling back into the base allocator. --- src/chunk.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/chunk.c b/src/chunk.c index 7ac229cb..166d1eab 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -30,19 +30,30 @@ size_t arena_maxclass; /* Max size class for arenas. */ /******************************************************************************/ /* Function prototypes for non-inline static functions. */ -static void *chunk_recycle(size_t size, size_t alignment, bool *zero); +static void *chunk_recycle(size_t size, size_t alignment, bool base, + bool *zero); static void chunk_record(void *chunk, size_t size); /******************************************************************************/ static void * -chunk_recycle(size_t size, size_t alignment, bool *zero) +chunk_recycle(size_t size, size_t alignment, bool base, bool *zero) { void *ret; extent_node_t *node; extent_node_t key; size_t alloc_size, leadsize, trailsize; + if (base) { + /* + * This function may need to call base_node_{,de}alloc(), but + * the current chunk allocation request is on behalf of the + * base allocator. Avoid deadlock (and if that weren't an + * issue, potential for infinite recursion) by returning NULL. + */ + return (NULL); + } + alloc_size = size + alignment - chunksize; /* Beware size_t wrap-around. */ if (alloc_size < size) @@ -125,7 +136,7 @@ chunk_alloc(size_t size, size_t alignment, bool base, bool *zero) assert((size & chunksize_mask) == 0); assert((alignment & chunksize_mask) == 0); - ret = chunk_recycle(size, alignment, zero); + ret = chunk_recycle(size, alignment, base, zero); if (ret != NULL) goto label_return; From de6fbdb72c6e1401b36f8f2073404645bac6cd2b Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 9 May 2012 13:05:04 -0700 Subject: [PATCH 0225/3378] Fix chunk_alloc_mmap() bugs. Simplify chunk_alloc_mmap() to no longer attempt map extension. The extra complexity isn't warranted, because although in the success case it saves one system call as compared to immediately falling back to chunk_alloc_mmap_slow(), it also makes the failure case even more expensive. This simplification removes two bugs: - For Windows platforms, pages_unmap() wasn't being called for unaligned mappings prior to falling back to chunk_alloc_mmap_slow(). This caused permanent virtual memory leaks. - For non-Windows platforms, alignment greater than chunksize caused pages_map() to be called with size 0 when attempting map extension. This always resulted in an mmap() error, and subsequent fallback to chunk_alloc_mmap_slow(). --- src/chunk.c | 1 + src/chunk_mmap.c | 45 ++++++++++----------------------------------- 2 files changed, 11 insertions(+), 35 deletions(-) diff --git a/src/chunk.c b/src/chunk.c index 166d1eab..bb6189ee 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -134,6 +134,7 @@ chunk_alloc(size_t size, size_t alignment, bool base, bool *zero) assert(size != 0); assert((size & chunksize_mask) == 0); + assert(alignment != 0); assert((alignment & chunksize_mask) == 0); ret = chunk_recycle(size, alignment, base, zero); diff --git a/src/chunk_mmap.c b/src/chunk_mmap.c index 9f388d28..c8da6556 100644 --- a/src/chunk_mmap.c +++ b/src/chunk_mmap.c @@ -16,6 +16,8 @@ pages_map(void *addr, size_t size) { void *ret; + assert(size != 0); + #ifdef _WIN32 /* * If VirtualAlloc can't allocate at the given address when one is @@ -164,51 +166,24 @@ chunk_alloc_mmap(size_t size, size_t alignment, bool *zero) * NetBSD has), but in the absence of such a feature, we have to work * hard to efficiently create aligned mappings. The reliable, but * slow method is to create a mapping that is over-sized, then trim the - * excess. However, that always results in at least one call to + * excess. However, that always results in one or two calls to * pages_unmap(). * - * A more optimistic approach is to try mapping precisely the right - * amount, then try to append another mapping if alignment is off. In - * practice, this works out well as long as the application is not - * interleaving mappings via direct mmap() calls. If we do run into a - * situation where there is an interleaved mapping and we are unable to - * extend an unaligned mapping, our best option is to switch to the - * slow method until mmap() returns another aligned mapping. This will - * tend to leave a gap in the memory map that is too small to cause - * later problems for the optimistic method. - * - * Another possible confounding factor is address space layout - * randomization (ASLR), which causes mmap(2) to disregard the - * requested address. As such, repeatedly trying to extend unaligned - * mappings could result in an infinite loop, so if extension fails, - * immediately fall back to the reliable method of over-allocation - * followed by trimming. + * Optimistically try mapping precisely the right amount before falling + * back to the slow method, with the expectation that the optimistic + * approach works most of the time. */ + assert(alignment != 0); + assert((alignment & chunksize_mask) == 0); + ret = pages_map(NULL, size); if (ret == NULL) return (NULL); - offset = ALIGNMENT_ADDR2OFFSET(ret, alignment); if (offset != 0) { -#ifdef _WIN32 + pages_unmap(ret, size); return (chunk_alloc_mmap_slow(size, alignment, zero)); -#else - /* Try to extend chunk boundary. */ - if (pages_map((void *)((uintptr_t)ret + size), chunksize - - offset) == NULL) { - /* - * Extension failed. Clean up, then fall back to the - * reliable-but-expensive method. - */ - pages_unmap(ret, size); - return (chunk_alloc_mmap_slow(size, alignment, zero)); - } else { - /* Clean up unneeded leading space. */ - pages_unmap(ret, chunksize - offset); - ret = (void *)((uintptr_t)ret + (chunksize - offset)); - } -#endif } assert(ret != NULL); From 374d26a43bcceb12eb56ed7cc47815d7f933901c Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 9 May 2012 14:48:35 -0700 Subject: [PATCH 0226/3378] Fix chunk_recycle() to stop leaking trailing chunks. Fix chunk_recycle() to correctly compute trailsize and re-insert trailing chunks. This fixes a major virtual memory leak. Simplify chunk_record() to avoid dropping/re-acquiring chunks_mtx. --- src/chunk.c | 78 ++++++++++++++++++++++++++--------------------------- 1 file changed, 38 insertions(+), 40 deletions(-) diff --git a/src/chunk.c b/src/chunk.c index bb6189ee..6bc24544 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -68,8 +68,8 @@ chunk_recycle(size_t size, size_t alignment, bool base, bool *zero) } leadsize = ALIGNMENT_CEILING((uintptr_t)node->addr, alignment) - (uintptr_t)node->addr; - assert(alloc_size >= leadsize + size); - trailsize = alloc_size - leadsize - size; + assert(node->size >= leadsize + size); + trailsize = node->size - leadsize - size; ret = (void *)((uintptr_t)node->addr + leadsize); /* Remove node from the tree. */ extent_tree_szad_remove(&chunks_szad, node); @@ -195,50 +195,48 @@ chunk_record(void *chunk, size_t size) pages_purge(chunk, size); - xnode = NULL; + /* + * Allocate a node before acquiring chunks_mtx even though it might not + * be needed, because base_node_alloc() may cause a new base chunk to + * be allocated, which could cause deadlock if chunks_mtx were already + * held. + */ + xnode = base_node_alloc(); + malloc_mutex_lock(&chunks_mtx); - while (true) { - key.addr = (void *)((uintptr_t)chunk + size); - node = extent_tree_ad_nsearch(&chunks_ad, &key); - /* Try to coalesce forward. */ - if (node != NULL && node->addr == key.addr) { + key.addr = (void *)((uintptr_t)chunk + size); + node = extent_tree_ad_nsearch(&chunks_ad, &key); + /* Try to coalesce forward. */ + if (node != NULL && node->addr == key.addr) { + /* + * Coalesce chunk with the following address range. This does + * not change the position within chunks_ad, so only + * remove/insert from/into chunks_szad. + */ + extent_tree_szad_remove(&chunks_szad, node); + node->addr = chunk; + node->size += size; + extent_tree_szad_insert(&chunks_szad, node); + if (xnode != NULL) + base_node_dealloc(xnode); + } else { + /* Coalescing forward failed, so insert a new node. */ + if (xnode == NULL) { /* - * Coalesce chunk with the following address range. - * This does not change the position within chunks_ad, - * so only remove/insert from/into chunks_szad. - */ - extent_tree_szad_remove(&chunks_szad, node); - node->addr = chunk; - node->size += size; - extent_tree_szad_insert(&chunks_szad, node); - break; - } else if (xnode == NULL) { - /* - * It is possible that base_node_alloc() will cause a - * new base chunk to be allocated, so take care not to - * deadlock on chunks_mtx, and recover if another thread - * deallocates an adjacent chunk while this one is busy - * allocating xnode. + * base_node_alloc() failed, which is an exceedingly + * unlikely failure. Leak chunk; its pages have + * already been purged, so this is only a virtual + * memory leak. */ malloc_mutex_unlock(&chunks_mtx); - xnode = base_node_alloc(); - if (xnode == NULL) - return; - malloc_mutex_lock(&chunks_mtx); - } else { - /* Coalescing forward failed, so insert a new node. */ - node = xnode; - xnode = NULL; - node->addr = chunk; - node->size = size; - extent_tree_ad_insert(&chunks_ad, node); - extent_tree_szad_insert(&chunks_szad, node); - break; + return; } + node = xnode; + node->addr = chunk; + node->size = size; + extent_tree_ad_insert(&chunks_ad, node); + extent_tree_szad_insert(&chunks_szad, node); } - /* Discard xnode if it ended up unused due to a race. */ - if (xnode != NULL) - base_node_dealloc(xnode); /* Try to coalesce backward. */ prev = extent_tree_ad_prev(&chunks_ad, node); From 2e671ffbadc02fc7de8cbafdd1031e3b0ad73c5b Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 9 May 2012 16:12:00 -0700 Subject: [PATCH 0227/3378] Add the --enable-mremap option. Add the --enable-mremap option, and disable the use of mremap(2) by default, for the same reason that freeing chunks via munmap(2) is disabled by default on Linux: semi-permanent VM map fragmentation. --- INSTALL | 6 +++ configure.ac | 38 ++++++++++++++----- doc/jemalloc.xml.in | 10 +++++ .../jemalloc/internal/jemalloc_internal.h.in | 7 ++++ include/jemalloc/jemalloc_defs.h.in | 15 +++++--- src/ctl.c | 3 ++ src/huge.c | 2 +- 7 files changed, 64 insertions(+), 17 deletions(-) diff --git a/INSTALL b/INSTALL index 7e3051ac..e40a7edd 100644 --- a/INSTALL +++ b/INSTALL @@ -108,6 +108,12 @@ any of the following arguments (not a definitive list) to 'configure': released in bulk, thus reducing the total number of mutex operations. See the "opt.tcache" option for usage details. +--enable-mremap + Enable huge realloc() via mremap(2). mremap() is disabled by default + because the flavor used is specific to Linux, which has a quirk in its + virtual memory allocation algorithm that causes semi-permanent VM map holes + under normal jemalloc operation. + --disable-munmap Disable virtual memory deallocation via munmap(2); instead keep track of the virtual memory for later use. munmap() is disabled by default (i.e. diff --git a/configure.ac b/configure.ac index 3b32b884..a09db7d0 100644 --- a/configure.ac +++ b/configure.ac @@ -372,16 +372,6 @@ else AC_DEFINE([JEMALLOC_TLS_MODEL], [ ]) fi -JE_COMPILABLE([mremap(...MREMAP_FIXED...)], [ -#define _GNU_SOURCE -#include -], [ -void *p = mremap((void *)0, 0, 0, MREMAP_MAYMOVE|MREMAP_FIXED, (void *)0); -], [je_cv_mremap_fixed]) -if test "x${je_cv_mremap_fixed}" = "xyes" ; then - AC_DEFINE([JEMALLOC_MREMAP_FIXED], [ ]) -fi - dnl Support optional additions to rpath. AC_ARG_WITH([rpath], [AS_HELP_STRING([--with-rpath=], [Colon-separated rpath (ELF systems only)])], @@ -743,6 +733,33 @@ if test "x$enable_tcache" = "x1" ; then fi AC_SUBST([enable_tcache]) +dnl Disable mremap() for huge realloc() by default. +AC_ARG_ENABLE([mremap], + [AS_HELP_STRING([--enable-mremap], [Enable mremap(2) for huge realloc()])], +[if test "x$enable_mremap" = "xno" ; then + enable_mremap="0" +else + enable_mremap="1" +fi +], +[enable_mremap="0"] +) +if test "x$enable_mremap" = "x1" ; then + JE_COMPILABLE([mremap(...MREMAP_FIXED...)], [ +#define _GNU_SOURCE +#include +], [ +void *p = mremap((void *)0, 0, 0, MREMAP_MAYMOVE|MREMAP_FIXED, (void *)0); +], [je_cv_mremap_fixed]) + if test "x${je_cv_mremap_fixed}" = "xno" ; then + enable_mremap="0" + fi +fi +if test "x$enable_mremap" = "x1" ; then + AC_DEFINE([JEMALLOC_MREMAP], [ ]) +fi +AC_SUBST([enable_mremap]) + dnl Enable VM deallocation via munmap() by default. AC_ARG_ENABLE([munmap], [AS_HELP_STRING([--disable-munmap], [Disable VM deallocation via munmap(2)])], @@ -1261,6 +1278,7 @@ AC_MSG_RESULT([fill : ${enable_fill}]) AC_MSG_RESULT([utrace : ${enable_utrace}]) AC_MSG_RESULT([valgrind : ${enable_valgrind}]) AC_MSG_RESULT([xmalloc : ${enable_xmalloc}]) +AC_MSG_RESULT([mremap : ${enable_mremap}]) AC_MSG_RESULT([munmap : ${enable_munmap}]) AC_MSG_RESULT([dss : ${enable_dss}]) AC_MSG_RESULT([lazy_lock : ${enable_lazy_lock}]) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 02961f6b..93c16dcf 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -650,6 +650,16 @@ for (i = 0; i < nbins; i++) { during build configuration. + + + config.mremap + (bool) + r- + + was specified during + build configuration. + + config.munmap diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index e441285b..268cd146 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -123,6 +123,13 @@ static const bool config_prof_libunwind = false #endif ; +static const bool config_mremap = +#ifdef JEMALLOC_MREMAP + true +#else + false +#endif + ; static const bool config_munmap = #ifdef JEMALLOC_MUNMAP true diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index c9ab6468..c469142a 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -193,12 +193,18 @@ /* * If defined, use munmap() to unmap freed chunks, rather than storing them for - * later reuse. This is automatically disabled if configuration determines - * that common sequences of mmap()/munmap() calls will cause virtual memory map - * holes. + * later reuse. This is disabled by default on Linux because common sequences + * of mmap()/munmap() calls will cause virtual memory map holes. */ #undef JEMALLOC_MUNMAP +/* + * If defined, use mremap(...MREMAP_FIXED...) for huge realloc(). This is + * disabled by default because it is Linux-specific and it will cause virtual + * memory map holes, much like munmap(2) does. + */ +#undef JEMALLOC_MREMAP + /* TLS is used to map arenas and magazine caches to threads. */ #undef JEMALLOC_TLS @@ -221,9 +227,6 @@ #undef JEMALLOC_ZONE #undef JEMALLOC_ZONE_VERSION -/* If defined, use mremap(...MREMAP_FIXED...) for huge realloc(). */ -#undef JEMALLOC_MREMAP_FIXED - /* * Methods for purging unused pages differ between operating systems. * diff --git a/src/ctl.c b/src/ctl.c index dddf3bee..55e76677 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -76,6 +76,7 @@ CTL_PROTO(config_debug) CTL_PROTO(config_dss) CTL_PROTO(config_fill) CTL_PROTO(config_lazy_lock) +CTL_PROTO(config_mremap) CTL_PROTO(config_munmap) CTL_PROTO(config_prof) CTL_PROTO(config_prof_libgcc) @@ -207,6 +208,7 @@ static const ctl_named_node_t config_node[] = { {NAME("dss"), CTL(config_dss)}, {NAME("fill"), CTL(config_fill)}, {NAME("lazy_lock"), CTL(config_lazy_lock)}, + {NAME("mremap"), CTL(config_mremap)}, {NAME("munmap"), CTL(config_munmap)}, {NAME("prof"), CTL(config_prof)}, {NAME("prof_libgcc"), CTL(config_prof_libgcc)}, @@ -1118,6 +1120,7 @@ CTL_RO_BOOL_CONFIG_GEN(config_debug) CTL_RO_BOOL_CONFIG_GEN(config_dss) CTL_RO_BOOL_CONFIG_GEN(config_fill) CTL_RO_BOOL_CONFIG_GEN(config_lazy_lock) +CTL_RO_BOOL_CONFIG_GEN(config_mremap) CTL_RO_BOOL_CONFIG_GEN(config_munmap) CTL_RO_BOOL_CONFIG_GEN(config_prof) CTL_RO_BOOL_CONFIG_GEN(config_prof_libgcc) diff --git a/src/huge.c b/src/huge.c index 67b282d1..8a4ec942 100644 --- a/src/huge.c +++ b/src/huge.c @@ -140,11 +140,11 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, */ copysize = (size < oldsize) ? size : oldsize; +#ifdef JEMALLOC_MREMAP /* * Use mremap(2) if this is a huge-->huge reallocation, and neither the * source nor the destination are in dss. */ -#ifdef JEMALLOC_MREMAP_FIXED if (oldsize >= chunksize && (config_dss == false || (chunk_in_dss(ptr) == false && chunk_in_dss(ret) == false))) { size_t newsize = huge_salloc(ret); From a6770a70493bc25495d17bc8b4a0246b3877918b Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Thu, 3 May 2012 14:12:49 +0200 Subject: [PATCH 0228/3378] Remove -fno-common compiler flag for OS X. It doesn't allow the je_malloc_message and je_malloc_conf symbols to be overridden when linking statically. --- configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index a09db7d0..a72019e5 100644 --- a/configure.ac +++ b/configure.ac @@ -239,7 +239,7 @@ dnl to make happen otherwise. default_munmap="1" case "${host}" in *-*-darwin*) - CFLAGS="$CFLAGS -fno-common" + CFLAGS="$CFLAGS" abi="macho" AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ]) RPATH="" From 37b6f95dcd866f51c91488531a2efc3ed4c2b754 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Mon, 7 May 2012 16:08:34 +0200 Subject: [PATCH 0229/3378] Export je_memalign and je_valloc da99e31 removed attributes on je_memalign and je_valloc, while they didn't have a definition in the jemalloc.h header, thus making them non-exported. Export them again, by defining them in the jemalloc.h header. --- include/jemalloc/jemalloc.h.in | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/include/jemalloc/jemalloc.h.in b/include/jemalloc/jemalloc.h.in index 47a4b9b7..ad069485 100644 --- a/include/jemalloc/jemalloc.h.in +++ b/include/jemalloc/jemalloc.h.in @@ -50,6 +50,15 @@ JEMALLOC_EXPORT void *je_aligned_alloc(size_t alignment, size_t size) JEMALLOC_EXPORT void *je_realloc(void *ptr, size_t size); JEMALLOC_EXPORT void je_free(void *ptr); +#ifdef JEMALLOC_OVERRIDE_MEMALIGN +JEMALLOC_EXPORT void * je_memalign(size_t alignment, size_t size) + JEMALLOC_ATTR(malloc); +#endif + +#ifdef JEMALLOC_OVERRIDE_VALLOC +JEMALLOC_EXPORT void * je_valloc(size_t size) JEMALLOC_ATTR(malloc); +#endif + JEMALLOC_EXPORT size_t je_malloc_usable_size(const void *ptr); JEMALLOC_EXPORT void je_malloc_stats_print(void (*write_cb)(void *, const char *), void *je_cbopaque, const char *opts); From 80fe0478e6b0b3a0c84ea1dfdeec1fc5685841dc Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 9 May 2012 23:08:48 -0700 Subject: [PATCH 0230/3378] Generalize "stats.mapped" documentation. Generalize "stats.mapped" documentation to state that all inactive chunks are omitted, now that it is possible for mmap'ed chunks to be omitted in addition to DSS chunks. --- doc/jemalloc.xml.in | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 93c16dcf..877c500f 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -1471,7 +1471,7 @@ malloc_conf = "xmalloc:true";]]> application. This is a multiple of the chunk size, and is at least as large as stats.active. This - does not include inactive chunks embedded in the DSS. + does not include inactive chunks. @@ -1482,7 +1482,7 @@ malloc_conf = "xmalloc:true";]]> [] Total number of chunks actively mapped on behalf of the - application. This does not include inactive chunks embedded in the DSS. + application. This does not include inactive chunks. From 53bd42c1fe35c25ea299b96d546a9d0089c6f78d Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 10 May 2012 00:18:46 -0700 Subject: [PATCH 0231/3378] Update a comment. --- include/jemalloc/internal/arena.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 264b5d3a..9ce08e5c 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -93,13 +93,13 @@ struct arena_chunk_map_s { * Run address (or size) and various flags are stored together. The bit * layout looks like (assuming 32-bit system): * - * ???????? ???????? ????---- ----dula + * ???????? ???????? ????nnnn nnnndula * * ? : Unallocated: Run address for first/last pages, unset for internal * pages. * Small: Run page offset. * Large: Run size for first page, unset for trailing pages. - * - : Unused. + * n : binind for small size class, BININD_INVALID for large size class. * d : dirty? * u : unzeroed? * l : large? @@ -118,14 +118,14 @@ struct arena_chunk_map_s { * [dula] : bit unset * * Unallocated (clean): - * ssssssss ssssssss ssss1111 1111du-a + * ssssssss ssssssss ssss++++ ++++du-a * xxxxxxxx xxxxxxxx xxxxxxxx xxxx-Uxx - * ssssssss ssssssss ssss1111 1111dU-a + * ssssssss ssssssss ssss++++ ++++dU-a * * Unallocated (dirty): - * ssssssss ssssssss ssss1111 1111D--a + * ssssssss ssssssss ssss++++ ++++D--a * xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx - * ssssssss ssssssss ssss1111 1111D--a + * ssssssss ssssssss ssss++++ ++++D--a * * Small: * pppppppp pppppppp ppppnnnn nnnnd--A @@ -133,15 +133,15 @@ struct arena_chunk_map_s { * pppppppp pppppppp ppppnnnn nnnnd--A * * Large: - * ssssssss ssssssss ssss1111 1111D-LA + * ssssssss ssssssss ssss++++ ++++D-LA * xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx - * -------- -------- ----1111 1111D-LA + * -------- -------- ----++++ ++++D-LA * * Large (sampled, size <= PAGE): * ssssssss ssssssss ssssnnnn nnnnD-LA * * Large (not sampled, size == PAGE): - * ssssssss ssssssss ssss1111 1111D-LA + * ssssssss ssssssss ssss++++ ++++D-LA */ size_t bits; #define CHUNK_MAP_BININD_SHIFT 4 From 5b0c99649fa71674daadf4dd53b1ab05428483fb Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 10 May 2012 15:47:24 -0700 Subject: [PATCH 0232/3378] Refactor arena_run_alloc(). Refactor duplicated arena_run_alloc() code into arena_run_alloc_helper(). --- src/arena.c | 58 ++++++++++++++++++++++------------------------------- 1 file changed, 24 insertions(+), 34 deletions(-) diff --git a/src/arena.c b/src/arena.c index 9f24e7ca..94410aab 100644 --- a/src/arena.c +++ b/src/arena.c @@ -44,6 +44,8 @@ static void arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, size_t binind, bool zero); static arena_chunk_t *arena_chunk_alloc(arena_t *arena); static void arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk); +static arena_run_t *arena_run_alloc_helper(arena_t *arena, size_t size, + bool large, size_t binind, bool zero); static arena_run_t *arena_run_alloc(arena_t *arena, size_t size, bool large, size_t binind, bool zero); static void arena_purge(arena_t *arena, bool all); @@ -454,19 +456,12 @@ arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk) } static arena_run_t * -arena_run_alloc(arena_t *arena, size_t size, bool large, size_t binind, +arena_run_alloc_helper(arena_t *arena, size_t size, bool large, size_t binind, bool zero) { - arena_chunk_t *chunk; arena_run_t *run; arena_chunk_map_t *mapelm, key; - assert(size <= arena_maxclass); - assert((size & PAGE_MASK) == 0); - assert((large && binind == BININD_INVALID) || (large == false && binind - != BININD_INVALID)); - - /* Search the arena's chunks for the lowest best fit. */ key.bits = size | CHUNK_MAP_KEY; mapelm = arena_avail_tree_nsearch(&arena->runs_avail_dirty, &key); if (mapelm != NULL) { @@ -493,6 +488,26 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, size_t binind, return (run); } + return (NULL); +} + +static arena_run_t * +arena_run_alloc(arena_t *arena, size_t size, bool large, size_t binind, + bool zero) +{ + arena_chunk_t *chunk; + arena_run_t *run; + + assert(size <= arena_maxclass); + assert((size & PAGE_MASK) == 0); + assert((large && binind == BININD_INVALID) || (large == false && binind + != BININD_INVALID)); + + /* Search the arena's chunks for the lowest best fit. */ + run = arena_run_alloc_helper(arena, size, large, binind, zero); + if (run != NULL) + return (run); + /* * No usable runs. Create a new chunk from which to allocate the run. */ @@ -508,32 +523,7 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, size_t binind, * sufficient memory available while this one dropped arena->lock in * arena_chunk_alloc(), so search one more time. */ - mapelm = arena_avail_tree_nsearch(&arena->runs_avail_dirty, &key); - if (mapelm != NULL) { - arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm); - size_t pageind = (((uintptr_t)mapelm - - (uintptr_t)run_chunk->map) / sizeof(arena_chunk_map_t)) - + map_bias; - - run = (arena_run_t *)((uintptr_t)run_chunk + (pageind << - LG_PAGE)); - arena_run_split(arena, run, size, large, binind, zero); - return (run); - } - mapelm = arena_avail_tree_nsearch(&arena->runs_avail_clean, &key); - if (mapelm != NULL) { - arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm); - size_t pageind = (((uintptr_t)mapelm - - (uintptr_t)run_chunk->map) / sizeof(arena_chunk_map_t)) - + map_bias; - - run = (arena_run_t *)((uintptr_t)run_chunk + (pageind << - LG_PAGE)); - arena_run_split(arena, run, size, large, binind, zero); - return (run); - } - - return (NULL); + return (arena_run_alloc_helper(arena, size, large, binind, zero)); } static inline void From 30fe12b866edbc2cf9aaef299063b392ea125aac Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 10 May 2012 17:09:17 -0700 Subject: [PATCH 0233/3378] Add arena chunk map assertions. --- src/arena.c | 45 ++++++++++++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 15 deletions(-) diff --git a/src/arena.c b/src/arena.c index 94410aab..021a0e2d 100644 --- a/src/arena.c +++ b/src/arena.c @@ -353,17 +353,20 @@ arena_chunk_alloc(arena_t *arena) chunk = arena->spare; arena->spare = NULL; - /* Insert the run into the appropriate runs_avail_* tree. */ - if (arena_mapbits_dirty_get(chunk, map_bias) == 0) - runs_avail = &arena->runs_avail_clean; - else - runs_avail = &arena->runs_avail_dirty; + assert(arena_mapbits_allocated_get(chunk, map_bias) == 0); + assert(arena_mapbits_allocated_get(chunk, chunk_npages-1) == 0); assert(arena_mapbits_unallocated_size_get(chunk, map_bias) == arena_maxclass); assert(arena_mapbits_unallocated_size_get(chunk, chunk_npages-1) == arena_maxclass); assert(arena_mapbits_dirty_get(chunk, map_bias) == arena_mapbits_dirty_get(chunk, chunk_npages-1)); + + /* Insert the run into the appropriate runs_avail_* tree. */ + if (arena_mapbits_dirty_get(chunk, map_bias) == 0) + runs_avail = &arena->runs_avail_clean; + else + runs_avail = &arena->runs_avail_dirty; arena_avail_tree_insert(runs_avail, arena_mapp_get(chunk, map_bias)); } else { @@ -427,6 +430,15 @@ arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk) { arena_avail_tree_t *runs_avail; + assert(arena_mapbits_allocated_get(chunk, map_bias) == 0); + assert(arena_mapbits_allocated_get(chunk, chunk_npages-1) == 0); + assert(arena_mapbits_unallocated_size_get(chunk, map_bias) == + arena_maxclass); + assert(arena_mapbits_unallocated_size_get(chunk, chunk_npages-1) == + arena_maxclass); + assert(arena_mapbits_dirty_get(chunk, map_bias) == + arena_mapbits_dirty_get(chunk, chunk_npages-1)); + /* * Remove run from the appropriate runs_avail_* tree, so that the arena * does not use it. @@ -578,6 +590,8 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) */ if (chunk == arena->spare) { assert(arena_mapbits_dirty_get(chunk, map_bias) != 0); + assert(arena_mapbits_dirty_get(chunk, chunk_npages-1) != 0); + arena_chunk_alloc(arena); } @@ -590,7 +604,9 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) npages = arena_mapbits_unallocated_size_get(chunk, pageind) >> LG_PAGE; assert(pageind + npages <= chunk_npages); - if (arena_mapbits_dirty_get(chunk, pageind)) { + assert(arena_mapbits_dirty_get(chunk, pageind) == + arena_mapbits_dirty_get(chunk, pageind+npages-1)); + if (arena_mapbits_dirty_get(chunk, pageind) != 0) { size_t i; arena_avail_tree_remove( @@ -832,6 +848,8 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) * The run is dirty if the caller claims to have dirtied it, as well as * if it was already dirty before being allocated. */ + assert(arena_mapbits_dirty_get(chunk, run_ind) == + arena_mapbits_dirty_get(chunk, run_ind+run_pages-1)); if (arena_mapbits_dirty_get(chunk, run_ind) != 0) dirty = true; flag_dirty = dirty ? CHUNK_MAP_DIRTY : 0; @@ -931,9 +949,6 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) if (size == arena_maxclass) { assert(run_ind == map_bias); assert(run_pages == (arena_maxclass >> LG_PAGE)); - assert(arena_mapbits_allocated_get(chunk, map_bias) == 0); - assert(arena_mapbits_unallocated_size_get(chunk, map_bias) == - arena_maxclass); arena_chunk_dealloc(arena, chunk); } @@ -1514,16 +1529,16 @@ arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, * trim the clean pages before deallocating the dirty portion of the * run. */ + assert(arena_mapbits_dirty_get(chunk, run_ind) == + arena_mapbits_dirty_get(chunk, run_ind+npages-1)); if (arena_mapbits_dirty_get(chunk, run_ind) == 0 && past - run_ind < npages) { - /* - * Trim clean pages. Convert to large run beforehand. Set the - * last map element first, in case this is a one-page run. - */ - arena_mapbits_large_set(chunk, run_ind+npages-1, 0, - arena_mapbits_unzeroed_get(chunk, run_ind+npages-1)); + /* Trim clean pages. Convert to large run beforehand. */ + assert(npages > 0); arena_mapbits_large_set(chunk, run_ind, bin_info->run_size, arena_mapbits_unzeroed_get(chunk, run_ind)); + arena_mapbits_large_set(chunk, run_ind+npages-1, 0, + arena_mapbits_unzeroed_get(chunk, run_ind+npages-1)); arena_run_trim_tail(arena, chunk, run, (npages << LG_PAGE), ((past - run_ind) << LG_PAGE), false); /* npages = past - run_ind; */ From d8ceef6c5558fdab8f9448376ae065a9e5ffcbdd Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 10 May 2012 20:59:39 -0700 Subject: [PATCH 0234/3378] Fix large calloc() zeroing bugs. Refactor code such that arena_mapbits_{large,small}_set() always preserves the unzeroed flag, and manually manipulate the unzeroed flag in the one case where it actually gets reset (in arena_chunk_purge()). This fixes unzeroed preservation bugs in arena_run_split() and arena_ralloc_large_grow(). These bugs caused large calloc() to return non-zeroed memory under some circumstances. --- ChangeLog | 1 + include/jemalloc/internal/arena.h | 15 +++++++---- src/arena.c | 43 +++++++++++++------------------ 3 files changed, 29 insertions(+), 30 deletions(-) diff --git a/ChangeLog b/ChangeLog index 691630bc..0a2b2ca5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -71,6 +71,7 @@ found in the git revision history: write-after-free memory corruption. - Fix a potential deadlock that could occur during interval- and growth-triggered heap profile dumps. + - Fix large calloc() zeroing bugs due to dropping chunk map unzeroed flags. - Fix chunk_alloc_dss() to stop claiming memory is zeroed. This bug could cause memory corruption and crashes with --enable-dss specified. - Fix fork-related bugs that could cause deadlock in children between fork diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 9ce08e5c..0b0f640a 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -591,6 +591,7 @@ arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, size_t size, mapbitsp = arena_mapbitsp_get(chunk, pageind); assert((size & PAGE_MASK) == 0); assert((flags & ~CHUNK_MAP_FLAGS_MASK) == 0); + assert((flags & (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == flags); *mapbitsp = size | CHUNK_MAP_BININD_INVALID | flags; } @@ -611,12 +612,14 @@ arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind, size_t size, size_t flags) { size_t *mapbitsp; + size_t unzeroed; mapbitsp = arena_mapbitsp_get(chunk, pageind); assert((size & PAGE_MASK) == 0); - assert((flags & ~CHUNK_MAP_FLAGS_MASK) == 0); - *mapbitsp = size | CHUNK_MAP_BININD_INVALID | flags | CHUNK_MAP_LARGE | - CHUNK_MAP_ALLOCATED; + assert((flags & CHUNK_MAP_DIRTY) == flags); + unzeroed = *mapbitsp & CHUNK_MAP_UNZEROED; /* Preserve unzeroed. */ + *mapbitsp = size | CHUNK_MAP_BININD_INVALID | flags | unzeroed | + CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; } JEMALLOC_INLINE void @@ -637,13 +640,15 @@ arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, size_t runind, size_t binind, size_t flags) { size_t *mapbitsp; + size_t unzeroed; assert(binind < BININD_INVALID); mapbitsp = arena_mapbitsp_get(chunk, pageind); assert(pageind - runind >= map_bias); - assert((flags & ~CHUNK_MAP_FLAGS_MASK) == 0); + assert((flags & CHUNK_MAP_DIRTY) == flags); + unzeroed = *mapbitsp & CHUNK_MAP_UNZEROED; /* Preserve unzeroed. */ *mapbitsp = (runind << LG_PAGE) | (binind << CHUNK_MAP_BININD_SHIFT) | - flags | CHUNK_MAP_ALLOCATED; + flags | unzeroed | CHUNK_MAP_ALLOCATED; } JEMALLOC_INLINE void diff --git a/src/arena.c b/src/arena.c index 021a0e2d..2a6150f3 100644 --- a/src/arena.c +++ b/src/arena.c @@ -311,8 +311,7 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, * small run, so that arena_dalloc_bin_run() has the ability to * conditionally trim clean pages. */ - arena_mapbits_small_set(chunk, run_ind, 0, binind, - arena_mapbits_unzeroed_get(chunk, run_ind) | flag_dirty); + arena_mapbits_small_set(chunk, run_ind, 0, binind, flag_dirty); /* * The first page will always be dirtied during small run * initialization, so a validation failure here would not @@ -322,16 +321,13 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, arena_mapbits_unzeroed_get(chunk, run_ind) == 0) arena_chunk_validate_zeroed(chunk, run_ind); for (i = 1; i < need_pages - 1; i++) { - arena_mapbits_small_set(chunk, run_ind+i, i, - binind, arena_mapbits_unzeroed_get(chunk, - run_ind+i)); + arena_mapbits_small_set(chunk, run_ind+i, i, binind, 0); if (config_debug && flag_dirty == 0 && arena_mapbits_unzeroed_get(chunk, run_ind+i) == 0) arena_chunk_validate_zeroed(chunk, run_ind+i); } arena_mapbits_small_set(chunk, run_ind+need_pages-1, - need_pages-1, binind, arena_mapbits_unzeroed_get(chunk, - run_ind+need_pages-1) | flag_dirty); + need_pages-1, binind, flag_dirty); if (config_debug && flag_dirty == 0 && arena_mapbits_unzeroed_get(chunk, run_ind+need_pages-1) == 0) { @@ -612,8 +608,10 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) arena_avail_tree_remove( &arena->runs_avail_dirty, mapelm); + arena_mapbits_unzeroed_set(chunk, pageind, + flag_unzeroed); arena_mapbits_large_set(chunk, pageind, - (npages << LG_PAGE), flag_unzeroed); + (npages << LG_PAGE), 0); /* * Update internal elements in the page map, so * that CHUNK_MAP_UNZEROED is properly set. @@ -623,8 +621,10 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) pageind+i, flag_unzeroed); } if (npages > 1) { + arena_mapbits_unzeroed_set(chunk, + pageind+npages-1, flag_unzeroed); arena_mapbits_large_set(chunk, - pageind+npages-1, 0, flag_unzeroed); + pageind+npages-1, 0, 0); } if (config_stats) { @@ -979,10 +979,8 @@ arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, * run first, in case of single-page runs. */ assert(arena_mapbits_large_size_get(chunk, pageind) == oldsize); - arena_mapbits_large_set(chunk, pageind+head_npages-1, 0, flag_dirty | - arena_mapbits_unzeroed_get(chunk, pageind+head_npages-1)); - arena_mapbits_large_set(chunk, pageind, oldsize-newsize, flag_dirty | - arena_mapbits_unzeroed_get(chunk, pageind)); + arena_mapbits_large_set(chunk, pageind+head_npages-1, 0, flag_dirty); + arena_mapbits_large_set(chunk, pageind, oldsize-newsize, flag_dirty); if (config_debug) { UNUSED size_t tail_npages = newsize >> LG_PAGE; @@ -991,8 +989,8 @@ arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, assert(arena_mapbits_dirty_get(chunk, pageind+head_npages+tail_npages-1) == flag_dirty); } - arena_mapbits_large_set(chunk, pageind+head_npages, newsize, flag_dirty - | arena_mapbits_unzeroed_get(chunk, pageind+head_npages)); + arena_mapbits_large_set(chunk, pageind+head_npages, newsize, + flag_dirty); arena_run_dalloc(arena, run, false); } @@ -1013,10 +1011,8 @@ arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, * run first, in case of single-page runs. */ assert(arena_mapbits_large_size_get(chunk, pageind) == oldsize); - arena_mapbits_large_set(chunk, pageind+head_npages-1, 0, flag_dirty | - arena_mapbits_unzeroed_get(chunk, pageind+head_npages-1)); - arena_mapbits_large_set(chunk, pageind, newsize, flag_dirty | - arena_mapbits_unzeroed_get(chunk, pageind)); + arena_mapbits_large_set(chunk, pageind+head_npages-1, 0, flag_dirty); + arena_mapbits_large_set(chunk, pageind, newsize, flag_dirty); if (config_debug) { UNUSED size_t tail_npages = (oldsize - newsize) >> LG_PAGE; @@ -1026,8 +1022,7 @@ arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, pageind+head_npages+tail_npages-1) == flag_dirty); } arena_mapbits_large_set(chunk, pageind+head_npages, oldsize-newsize, - flag_dirty | arena_mapbits_unzeroed_get(chunk, - pageind+head_npages)); + flag_dirty); arena_run_dalloc(arena, (arena_run_t *)((uintptr_t)run + newsize), dirty); @@ -1535,10 +1530,8 @@ arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, npages) { /* Trim clean pages. Convert to large run beforehand. */ assert(npages > 0); - arena_mapbits_large_set(chunk, run_ind, bin_info->run_size, - arena_mapbits_unzeroed_get(chunk, run_ind)); - arena_mapbits_large_set(chunk, run_ind+npages-1, 0, - arena_mapbits_unzeroed_get(chunk, run_ind+npages-1)); + arena_mapbits_large_set(chunk, run_ind, bin_info->run_size, 0); + arena_mapbits_large_set(chunk, run_ind+npages-1, 0, 0); arena_run_trim_tail(arena, chunk, run, (npages << LG_PAGE), ((past - run_ind) << LG_PAGE), false); /* npages = past - run_ind; */ From 58ad1e4956affe0f9949445dce4410ad70b4cdac Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 11 May 2012 17:40:16 -0700 Subject: [PATCH 0235/3378] Return early in _malloc_{pre,post}fork() if uninitialized. Avoid mutex operations in _malloc_{pre,post}fork() unless jemalloc has been initialized. Reported by David Xu. --- src/jemalloc.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/jemalloc.c b/src/jemalloc.c index d42e91db..bc54cd7c 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1621,6 +1621,12 @@ _malloc_prefork(void) { unsigned i; +#ifdef JEMALLOC_MUTEX_INIT_CB + if (malloc_initialized == false) + return; +#endif + assert(malloc_initialized); + /* Acquire all mutexes in a safe order. */ malloc_mutex_prefork(&arenas_lock); for (i = 0; i < narenas; i++) { @@ -1642,6 +1648,12 @@ _malloc_postfork(void) { unsigned i; +#ifdef JEMALLOC_MUTEX_INIT_CB + if (malloc_initialized == false) + return; +#endif + assert(malloc_initialized); + /* Release all mutexes, now that fork() has completed. */ chunk_dss_postfork_parent(); huge_postfork_parent(); @@ -1658,6 +1670,8 @@ jemalloc_postfork_child(void) { unsigned i; + assert(malloc_initialized); + /* Release all mutexes, now that fork() has completed. */ chunk_dss_postfork_child(); huge_postfork_child(); From cbb71caceb1e53d0fd21284ce298885327c211b4 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 11 May 2012 17:00:20 -0700 Subject: [PATCH 0236/3378] Update ChangeLog for 3.0.0. --- ChangeLog | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/ChangeLog b/ChangeLog index 0a2b2ca5..231dd6da 100644 --- a/ChangeLog +++ b/ChangeLog @@ -6,7 +6,7 @@ found in the git revision history: http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git git://canonware.com/jemalloc.git -* 3.0.0 (XXX not yet released) +* 3.0.0 (May 11, 2012) Although this version adds some major new features, the primary focus is on internal code cleanup that facilitates maintainability and portability, most @@ -23,6 +23,7 @@ found in the git revision history: + FreeBSD + Mac OS X Lion + MinGW + + Windows (no support yet for replacing the system malloc) - Add support for additional architectures: + MIPS + SH4 @@ -31,12 +32,13 @@ found in the git revision history: - Add nallocm(), which rounds a request size up to the nearest size class without actually allocating. - Implement aligned_alloc() (blame C11). - - Add the --disable-munmap option, and make it the default on Linux. - - Add the --with-mangling option. - - Add the --disable-experimental option. - Add the "thread.tcache.enabled" mallctl. - Add the "opt.prof_final" mallctl. - Update pprof (from gperftools 2.0). + - Add the --with-mangling option. + - Add the --disable-experimental option. + - Add the --disable-munmap option, and make it the default on Linux. + - Add the --enable-mremap option, which disables use of mremap(2) by default. Incompatible changes: - Enable stats by default. From 3860eac17023933180ef5dfb5bd24077cda57dfe Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 15 May 2012 13:53:21 -0700 Subject: [PATCH 0237/3378] Fix heap profiling crash for realloc(p, 0) case. Fix prof_realloc() to not call prof_ctx_set() if a sampled object is being freed via realloc(p, 0). --- ChangeLog | 5 +++++ include/jemalloc/internal/prof.h | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 231dd6da..829482fe 100644 --- a/ChangeLog +++ b/ChangeLog @@ -6,6 +6,11 @@ found in the git revision history: http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git git://canonware.com/jemalloc.git +* 3.x.x (XXX not yet released) + + Bug fixes: + - Fix heap profiling crash if sampled object is freed via realloc(p, 0). + * 3.0.0 (May 11, 2012) Although this version adds some major new features, the primary focus is on diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index c3e3f9e4..6bed90b9 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -506,7 +506,7 @@ prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt, if ((uintptr_t)cnt > (uintptr_t)1U) { prof_ctx_set(ptr, cnt->ctx); cnt->epoch++; - } else + } else if (ptr != NULL) prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U); /*********/ mb_write(); From 781fe75e0a03f13bc1f5403acbbf87796ceea1dc Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 15 May 2012 14:48:14 -0700 Subject: [PATCH 0238/3378] Auto-detect whether running inside Valgrind. Auto-detect whether running inside Valgrind, thus removing the need to manually specify MALLOC_CONF=valgrind:true. --- ChangeLog | 4 +++ doc/jemalloc.xml.in | 27 +++++++---------- .../jemalloc/internal/jemalloc_internal.h.in | 1 + src/jemalloc.c | 29 ++++++++++--------- 4 files changed, 31 insertions(+), 30 deletions(-) diff --git a/ChangeLog b/ChangeLog index 829482fe..c8865059 100644 --- a/ChangeLog +++ b/ChangeLog @@ -8,6 +8,10 @@ found in the git revision history: * 3.x.x (XXX not yet released) + New features: + - Auto-detect whether running inside Valgrind, thus removing the need to + manually specify MALLOC_CONF=valgrind:true. + Bug fixes: - Fix heap profiling crash if sampled object is freed via realloc(p, 0). diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 877c500f..8a13a22c 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -846,7 +846,9 @@ for (i = 0; i < nbins; i++) { 0x5a. This is intended for debugging and will impact performance negatively. This option is disabled by default unless is specified during - configuration, in which case it is enabled by default. + configuration, in which case it is enabled by default unless running + inside Valgrind. @@ -865,8 +867,9 @@ for (i = 0; i < nbins; i++) { enabled. This feature is of particular use in combination with Valgrind, which can detect attempts to access quarantined objects. This is intended for debugging and will - impact performance negatively. The default quarantine size is - 0. + impact performance negatively. The default quarantine size is 0 unless + running inside Valgrind, in which case the default is 16 + MiB. @@ -885,7 +888,7 @@ for (i = 0; i < nbins; i++) { which needs redzones in order to do effective buffer overflow/underflow detection. This option is intended for debugging and will impact performance negatively. This option is disabled by - default. + default unless running inside Valgrind. @@ -926,15 +929,9 @@ for (i = 0; i < nbins; i++) { [] Valgrind - support enabled/disabled. If enabled, several other options are - automatically modified during options processing to work well with - Valgrind: opt.junk - and opt.zero are set - to false, opt.quarantine is - set to 16 MiB, and opt.redzone is set to - true. This option is disabled by default. + support enabled/disabled. This option is vestigal because jemalloc + auto-detects whether it is running inside Valgrind. This option is + disabled by default, unless running inside Valgrind. @@ -1865,9 +1862,7 @@ malloc_conf = "xmalloc:true";]]> it detects, because the performance impact for storing such information would be prohibitive. However, jemalloc does integrate with the most excellent Valgrind tool if the - configuration option is enabled and the - opt.valgrind option - is enabled. + configuration option is enabled. DIAGNOSTIC MESSAGES diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 268cd146..c6714ec1 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -424,6 +424,7 @@ static const bool config_ivsalloc = VALGRIND_FREELIKE_BLOCK(ptr, rzsize); \ } while (0) #else +#define RUNNING_ON_VALGRIND ((unsigned)0) #define VALGRIND_MALLOCLIKE_BLOCK(addr, sizeB, rzB, is_zeroed) #define VALGRIND_RESIZEINPLACE_BLOCK(addr, oldSizeB, newSizeB, rzB) #define VALGRIND_FREELIKE_BLOCK(addr, rzB) diff --git a/src/jemalloc.c b/src/jemalloc.c index bc54cd7c..77ea8c81 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -377,6 +377,20 @@ malloc_conf_init(void) const char *opts, *k, *v; size_t klen, vlen; + /* + * Automatically configure valgrind before processing options. The + * valgrind option remains in jemalloc 3.x for compatibility reasons. + */ + if (config_valgrind) { + opt_valgrind = (RUNNING_ON_VALGRIND != 0) ? true : false; + if (config_fill && opt_valgrind) { + opt_junk = false; + assert(opt_zero == false); + opt_quarantine = JEMALLOC_VALGRIND_QUARANTINE_DEFAULT; + opt_redzone = true; + } + } + for (i = 0; i < 3; i++) { /* Get runtime configuration. */ switch (i) { @@ -553,20 +567,7 @@ malloc_conf_init(void) CONF_HANDLE_BOOL(opt_utrace, "utrace") } if (config_valgrind) { - bool hit; - CONF_HANDLE_BOOL_HIT(opt_valgrind, - "valgrind", hit) - if (config_fill && opt_valgrind && hit) { - opt_junk = false; - opt_zero = false; - if (opt_quarantine == 0) { - opt_quarantine = - JEMALLOC_VALGRIND_QUARANTINE_DEFAULT; - } - opt_redzone = true; - } - if (hit) - continue; + CONF_HANDLE_BOOL(opt_valgrind, "valgrind") } if (config_xmalloc) { CONF_HANDLE_BOOL(opt_xmalloc, "xmalloc") From 174b70efb4942be112b1ea38db1e5c6ca7599e5d Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 15 May 2012 23:31:53 -0700 Subject: [PATCH 0239/3378] Disable tcache by default if running inside Valgrind. Disable tcache by default if running inside Valgrind, in order to avoid making unallocated objects appear reachable to Valgrind. --- ChangeLog | 4 ++++ doc/jemalloc.xml.in | 3 ++- src/jemalloc.c | 2 ++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index c8865059..80a21e79 100644 --- a/ChangeLog +++ b/ChangeLog @@ -12,6 +12,10 @@ found in the git revision history: - Auto-detect whether running inside Valgrind, thus removing the need to manually specify MALLOC_CONF=valgrind:true. + Incompatible changes: + - Disable tcache by default if running inside Valgrind, in order to avoid + making unallocated objects appear reachable to Valgrind. + Bug fixes: - Fix heap profiling crash if sampled object is freed via realloc(p, 0). diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 8a13a22c..b34467be 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -969,7 +969,8 @@ malloc_conf = "xmalloc:true";]]> opt.lg_tcache_max option for related tuning information. This option is enabled by - default. + default unless running inside Valgrind. diff --git a/src/jemalloc.c b/src/jemalloc.c index 77ea8c81..481936dc 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -389,6 +389,8 @@ malloc_conf_init(void) opt_quarantine = JEMALLOC_VALGRIND_QUARANTINE_DEFAULT; opt_redzone = true; } + if (config_tcache && opt_valgrind) + opt_tcache = false; } for (i = 0; i < 3; i++) { From f1966e1dc7543543e98386180f2b8530bf9725ab Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 14 May 2012 15:03:48 -0700 Subject: [PATCH 0240/3378] Update a comment. --- src/arena.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/arena.c b/src/arena.c index 2a6150f3..bf1614b3 100644 --- a/src/arena.c +++ b/src/arena.c @@ -561,7 +561,7 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) #ifdef JEMALLOC_PURGE_MADVISE_DONTNEED /* * madvise(..., MADV_DONTNEED) results in zero-filled pages for anonymous - * mappings, but not for file-backed mappings. + * mappings. */ 0 #else From 5c710cee783a44061fa2c467ffd8984b8047b90e Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 23 May 2012 16:09:22 -0700 Subject: [PATCH 0241/3378] Remove const from __*_hook variable declarations. Remove const from __*_hook variable declarations, so that glibc can modify them during process forking. --- ChangeLog | 2 ++ src/jemalloc.c | 9 ++++----- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/ChangeLog b/ChangeLog index 80a21e79..66f1d691 100644 --- a/ChangeLog +++ b/ChangeLog @@ -18,6 +18,8 @@ found in the git revision history: Bug fixes: - Fix heap profiling crash if sampled object is freed via realloc(p, 0). + - Remove const from __*_hook variable declarations, so that glibc can modify + them during process forking. * 3.0.0 (May 11, 2012) diff --git a/src/jemalloc.c b/src/jemalloc.c index 481936dc..1ab8a1cf 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1265,11 +1265,10 @@ je_valloc(size_t size) * passed an extra argument for the caller return address, which will be * ignored. */ -JEMALLOC_EXPORT void (* const __free_hook)(void *ptr) = je_free; -JEMALLOC_EXPORT void *(* const __malloc_hook)(size_t size) = je_malloc; -JEMALLOC_EXPORT void *(* const __realloc_hook)(void *ptr, size_t size) = - je_realloc; -JEMALLOC_EXPORT void *(* const __memalign_hook)(size_t alignment, size_t size) = +JEMALLOC_EXPORT void (* __free_hook)(void *ptr) = je_free; +JEMALLOC_EXPORT void *(* __malloc_hook)(size_t size) = je_malloc; +JEMALLOC_EXPORT void *(* __realloc_hook)(void *ptr, size_t size) = je_realloc; +JEMALLOC_EXPORT void *(* __memalign_hook)(size_t alignment, size_t size) = je_memalign; #endif From dd03a2e37702b21fd692bf05d46ee9f97133ab78 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 8 Oct 2012 15:41:06 -0700 Subject: [PATCH 0242/3378] Define LG_QUANTUM for hppa. Submitted by Jory Pratt. --- include/jemalloc/internal/jemalloc_internal.h.in | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index c6714ec1..b5b09e28 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -270,6 +270,9 @@ static const bool config_ivsalloc = # ifdef __arm__ # define LG_QUANTUM 3 # endif +# ifdef __hppa__ +# define LG_QUANTUM 4 +# endif # ifdef __mips__ # define LG_QUANTUM 3 # endif From 35579afb55c0a53261743b3e292f60e76046ff16 Mon Sep 17 00:00:00 2001 From: Corey Richardson Date: Wed, 26 Sep 2012 16:06:14 -0400 Subject: [PATCH 0243/3378] Remove unused variable and branch (reported by clang-analzyer) --- src/util.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/util.c b/src/util.c index 9b73c3ec..b3a01143 100644 --- a/src/util.c +++ b/src/util.c @@ -377,7 +377,6 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) case '\0': goto label_out; case '%': { bool alt_form = false; - bool zero_pad = false; bool left_justify = false; bool plus_space = false; bool plus_plus = false; @@ -398,10 +397,6 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) assert(alt_form == false); alt_form = true; break; - case '0': - assert(zero_pad == false); - zero_pad = true; - break; case '-': assert(left_justify == false); left_justify = true; From 1d553f72cbbcbacc1802d2cc96a4024315e616b3 Mon Sep 17 00:00:00 2001 From: Corey Richardson Date: Wed, 26 Sep 2012 16:28:29 -0400 Subject: [PATCH 0244/3378] If sysconf() fails, the number of CPUs is reported as UINT_MAX, not 1 as it should be --- src/jemalloc.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/jemalloc.c b/src/jemalloc.c index 1ab8a1cf..7fa07449 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -254,12 +254,13 @@ malloc_ncpus(void) result = si.dwNumberOfProcessors; #else result = sysconf(_SC_NPROCESSORS_ONLN); +#endif if (result == -1) { /* Error. */ ret = 1; - } -#endif - ret = (unsigned)result; + } else { + ret = (unsigned)result; + } return (ret); } From f4c3f8545beed9f7e606cef7b1d06fae3f630269 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 8 Oct 2012 15:48:04 -0700 Subject: [PATCH 0245/3378] Fix error return value in thread_tcache_enabled_ctl(). Reported by Corey Richardson. --- src/ctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ctl.c b/src/ctl.c index 55e76677..5be066a2 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -1032,8 +1032,8 @@ thread_tcache_enabled_ctl(const size_t *mib, size_t miblen, void *oldp, } READ(oldval, bool); -label_return: ret = 0; +label_return: return (ret); } From 7de92767c20cb72c94609b9c78985526fb84a679 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 8 Oct 2012 17:56:11 -0700 Subject: [PATCH 0246/3378] Fix mlockall()/madvise() interaction. mlockall(2) can cause purging via madvise(2) to fail. Fix purging code to check whether madvise() succeeded, and base zeroed page metadata on the result. Reported by Olivier Lecomte. --- include/jemalloc/internal/chunk_mmap.h | 2 +- include/jemalloc/internal/extent.h | 3 ++ src/arena.c | 50 +++++++++++--------------- src/chunk.c | 22 +++++++----- src/chunk_mmap.c | 12 +++++-- 5 files changed, 48 insertions(+), 41 deletions(-) diff --git a/include/jemalloc/internal/chunk_mmap.h b/include/jemalloc/internal/chunk_mmap.h index b29f39e9..f24abac7 100644 --- a/include/jemalloc/internal/chunk_mmap.h +++ b/include/jemalloc/internal/chunk_mmap.h @@ -9,7 +9,7 @@ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -void pages_purge(void *addr, size_t length); +bool pages_purge(void *addr, size_t length); void *chunk_alloc_mmap(size_t size, size_t alignment, bool *zero); bool chunk_dealloc_mmap(void *chunk, size_t size); diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h index 36af8be8..ba95ca81 100644 --- a/include/jemalloc/internal/extent.h +++ b/include/jemalloc/internal/extent.h @@ -23,6 +23,9 @@ struct extent_node_s { /* Total region size. */ size_t size; + + /* True if zero-filled; used by chunk recycling code. */ + bool zeroed; }; typedef rb_tree(extent_node_t) extent_tree_t; diff --git a/src/arena.c b/src/arena.c index bf1614b3..674ffe90 100644 --- a/src/arena.c +++ b/src/arena.c @@ -551,24 +551,12 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) { ql_head(arena_chunk_map_t) mapelms; arena_chunk_map_t *mapelm; - size_t pageind, flag_unzeroed; + size_t pageind; size_t ndirty; size_t nmadvise; ql_new(&mapelms); - flag_unzeroed = -#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED - /* - * madvise(..., MADV_DONTNEED) results in zero-filled pages for anonymous - * mappings. - */ - 0 -#else - CHUNK_MAP_UNZEROED -#endif - ; - /* * If chunk is the spare, temporarily re-allocate it, 1) so that its * run is reinserted into runs_avail_dirty, and 2) so that it cannot be @@ -603,26 +591,12 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) assert(arena_mapbits_dirty_get(chunk, pageind) == arena_mapbits_dirty_get(chunk, pageind+npages-1)); if (arena_mapbits_dirty_get(chunk, pageind) != 0) { - size_t i; - arena_avail_tree_remove( &arena->runs_avail_dirty, mapelm); - arena_mapbits_unzeroed_set(chunk, pageind, - flag_unzeroed); arena_mapbits_large_set(chunk, pageind, (npages << LG_PAGE), 0); - /* - * Update internal elements in the page map, so - * that CHUNK_MAP_UNZEROED is properly set. - */ - for (i = 1; i < npages - 1; i++) { - arena_mapbits_unzeroed_set(chunk, - pageind+i, flag_unzeroed); - } if (npages > 1) { - arena_mapbits_unzeroed_set(chunk, - pageind+npages-1, flag_unzeroed); arena_mapbits_large_set(chunk, pageind+npages-1, 0, 0); } @@ -685,14 +659,30 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) sizeof(arena_chunk_map_t)) + map_bias; size_t npages = arena_mapbits_large_size_get(chunk, pageind) >> LG_PAGE; + bool unzeroed; + size_t flag_unzeroed, i; assert(pageind + npages <= chunk_npages); assert(ndirty >= npages); if (config_debug) ndirty -= npages; - - pages_purge((void *)((uintptr_t)chunk + (pageind << LG_PAGE)), - (npages << LG_PAGE)); + unzeroed = pages_purge((void *)((uintptr_t)chunk + (pageind << + LG_PAGE)), (npages << LG_PAGE)); + flag_unzeroed = unzeroed ? CHUNK_MAP_UNZEROED : 0; + /* + * Set the unzeroed flag for all pages, now that pages_purge() + * has returned whether the pages were zeroed as a side effect + * of purging. This chunk map modification is safe even though + * the arena mutex isn't currently owned by this thread, + * because the run is marked as allocated, thus protecting it + * from being modified by any other thread. As long as these + * writes don't perturb the first and last elements' + * CHUNK_MAP_ALLOCATED bits, behavior is well defined. + */ + for (i = 0; i < npages; i++) { + arena_mapbits_unzeroed_set(chunk, pageind+i, + flag_unzeroed); + } if (config_stats) nmadvise++; } diff --git a/src/chunk.c b/src/chunk.c index 6bc24544..b43f9507 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -43,6 +43,7 @@ chunk_recycle(size_t size, size_t alignment, bool base, bool *zero) extent_node_t *node; extent_node_t key; size_t alloc_size, leadsize, trailsize; + bool zeroed; if (base) { /* @@ -107,17 +108,18 @@ chunk_recycle(size_t size, size_t alignment, bool base, bool *zero) } malloc_mutex_unlock(&chunks_mtx); - if (node != NULL) + zeroed = false; + if (node != NULL) { + if (node->zeroed) { + zeroed = true; + *zero = true; + } base_node_dealloc(node); -#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED - /* Pages are zeroed as a side effect of pages_purge(). */ - *zero = true; -#else - if (*zero) { + } + if (zeroed == false && *zero) { VALGRIND_MAKE_MEM_UNDEFINED(ret, size); memset(ret, 0, size); } -#endif return (ret); } @@ -191,9 +193,10 @@ label_return: static void chunk_record(void *chunk, size_t size) { + bool unzeroed; extent_node_t *xnode, *node, *prev, key; - pages_purge(chunk, size); + unzeroed = pages_purge(chunk, size); /* * Allocate a node before acquiring chunks_mtx even though it might not @@ -216,6 +219,7 @@ chunk_record(void *chunk, size_t size) extent_tree_szad_remove(&chunks_szad, node); node->addr = chunk; node->size += size; + node->zeroed = (node->zeroed && (unzeroed == false)); extent_tree_szad_insert(&chunks_szad, node); if (xnode != NULL) base_node_dealloc(xnode); @@ -234,6 +238,7 @@ chunk_record(void *chunk, size_t size) node = xnode; node->addr = chunk; node->size = size; + node->zeroed = (unzeroed == false); extent_tree_ad_insert(&chunks_ad, node); extent_tree_szad_insert(&chunks_szad, node); } @@ -253,6 +258,7 @@ chunk_record(void *chunk, size_t size) extent_tree_szad_remove(&chunks_szad, node); node->addr = prev->addr; node->size += prev->size; + node->zeroed = (node->zeroed && prev->zeroed); extent_tree_szad_insert(&chunks_szad, node); base_node_dealloc(prev); diff --git a/src/chunk_mmap.c b/src/chunk_mmap.c index c8da6556..8a42e759 100644 --- a/src/chunk_mmap.c +++ b/src/chunk_mmap.c @@ -113,22 +113,30 @@ pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size) #endif } -void +bool pages_purge(void *addr, size_t length) { + bool unzeroed; #ifdef _WIN32 VirtualAlloc(addr, length, MEM_RESET, PAGE_READWRITE); + unzeroed = true; #else # ifdef JEMALLOC_PURGE_MADVISE_DONTNEED # define JEMALLOC_MADV_PURGE MADV_DONTNEED +# define JEMALLOC_MADV_ZEROS true # elif defined(JEMALLOC_PURGE_MADVISE_FREE) # define JEMALLOC_MADV_PURGE MADV_FREE +# define JEMALLOC_MADV_ZEROS false # else # error "No method defined for purging unused dirty pages." # endif - madvise(addr, length, JEMALLOC_MADV_PURGE); + int err = madvise(addr, length, JEMALLOC_MADV_PURGE); + unzeroed = (JEMALLOC_MADV_ZEROS == false || err != 0); +# undef JEMALLOC_MADV_PURGE +# undef JEMALLOC_MADV_ZEROS #endif + return (unzeroed); } static void * From 20f1fc95adb35ea63dc61f47f2b0ffbd37d39f32 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 9 Oct 2012 14:46:22 -0700 Subject: [PATCH 0247/3378] Fix fork(2)-related deadlocks. Add a library constructor for jemalloc that initializes the allocator. This fixes a race that could occur if threads were created by the main thread prior to any memory allocation, followed by fork(2), and then memory allocation in the child process. Fix the prefork/postfork functions to acquire/release the ctl, prof, and rtree mutexes. This fixes various fork() child process deadlocks, but one possible deadlock remains (intentionally) unaddressed: prof backtracing can acquire runtime library mutexes, so deadlock is still possible if heap profiling is enabled during fork(). This deadlock is known to be a real issue in at least the case of libgcc-based backtracing. Reported by tfengjun. --- include/jemalloc/internal/chunk.h | 3 ++ include/jemalloc/internal/ctl.h | 3 ++ include/jemalloc/internal/private_namespace.h | 19 +++++++++ include/jemalloc/internal/prof.h | 3 ++ include/jemalloc/internal/rtree.h | 3 ++ src/chunk.c | 30 +++++++++++++ src/ctl.c | 21 ++++++++++ src/jemalloc.c | 33 +++++++++++++-- src/prof.c | 42 +++++++++++++++++++ src/rtree.c | 21 ++++++++++ 10 files changed, 175 insertions(+), 3 deletions(-) diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h index 8fb1fe6d..c3c3e9cd 100644 --- a/include/jemalloc/internal/chunk.h +++ b/include/jemalloc/internal/chunk.h @@ -45,6 +45,9 @@ extern size_t arena_maxclass; /* Max size class for arenas. */ void *chunk_alloc(size_t size, size_t alignment, bool base, bool *zero); void chunk_dealloc(void *chunk, size_t size, bool unmap); bool chunk_boot(void); +void chunk_prefork(void); +void chunk_postfork_parent(void); +void chunk_postfork_child(void); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h index adf3827f..1d0c76a0 100644 --- a/include/jemalloc/internal/ctl.h +++ b/include/jemalloc/internal/ctl.h @@ -75,6 +75,9 @@ int ctl_nametomib(const char *name, size_t *mibp, size_t *miblenp); int ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen); bool ctl_boot(void); +void ctl_prefork(void); +void ctl_postfork_parent(void); +void ctl_postfork_child(void); #define xmallctl(name, oldp, oldlenp, newp, newlen) do { \ if (je_mallctl(name, oldp, oldlenp, newp, newlen) \ diff --git a/include/jemalloc/internal/private_namespace.h b/include/jemalloc/internal/private_namespace.h index b8166470..28686dce 100644 --- a/include/jemalloc/internal/private_namespace.h +++ b/include/jemalloc/internal/private_namespace.h @@ -59,6 +59,7 @@ #define arenas_lock JEMALLOC_N(arenas_lock) #define arenas_lrun_i_index JEMALLOC_N(arenas_lrun_i_index) #define arenas_tls JEMALLOC_N(arenas_tls) +#define arenas_tsd JEMALLOC_N(arenas_tsd) #define arenas_tsd_boot JEMALLOC_N(arenas_tsd_boot) #define arenas_tsd_cleanup_wrapper JEMALLOC_N(arenas_tsd_cleanup_wrapper) #define arenas_tsd_get JEMALLOC_N(arenas_tsd_get) @@ -104,6 +105,9 @@ #define chunk_dss_prefork JEMALLOC_N(chunk_dss_prefork) #define chunk_in_dss JEMALLOC_N(chunk_in_dss) #define chunk_npages JEMALLOC_N(chunk_npages) +#define chunk_postfork_child JEMALLOC_N(chunk_postfork_child) +#define chunk_postfork_parent JEMALLOC_N(chunk_postfork_parent) +#define chunk_prefork JEMALLOC_N(chunk_prefork) #define chunks_mtx JEMALLOC_N(chunks_mtx) #define chunks_rtree JEMALLOC_N(chunks_rtree) #define chunksize JEMALLOC_N(chunksize) @@ -129,6 +133,9 @@ #define ctl_bymib JEMALLOC_N(ctl_bymib) #define ctl_byname JEMALLOC_N(ctl_byname) #define ctl_nametomib JEMALLOC_N(ctl_nametomib) +#define ctl_postfork_child JEMALLOC_N(ctl_postfork_child) +#define ctl_postfork_parent JEMALLOC_N(ctl_postfork_parent) +#define ctl_prefork JEMALLOC_N(ctl_prefork) #define extent_tree_ad_first JEMALLOC_N(extent_tree_ad_first) #define extent_tree_ad_insert JEMALLOC_N(extent_tree_ad_insert) #define extent_tree_ad_iter JEMALLOC_N(extent_tree_ad_iter) @@ -161,6 +168,7 @@ #define extent_tree_szad_reverse_iter_recurse JEMALLOC_N(extent_tree_szad_reverse_iter_recurse) #define extent_tree_szad_reverse_iter_start JEMALLOC_N(extent_tree_szad_reverse_iter_start) #define extent_tree_szad_search JEMALLOC_N(extent_tree_szad_search) +#define get_errno JEMALLOC_N(get_errno) #define hash JEMALLOC_N(hash) #define huge_allocated JEMALLOC_N(huge_allocated) #define huge_boot JEMALLOC_N(huge_boot) @@ -254,6 +262,9 @@ #define prof_lookup JEMALLOC_N(prof_lookup) #define prof_malloc JEMALLOC_N(prof_malloc) #define prof_mdump JEMALLOC_N(prof_mdump) +#define prof_postfork_child JEMALLOC_N(prof_postfork_child) +#define prof_postfork_parent JEMALLOC_N(prof_postfork_parent) +#define prof_prefork JEMALLOC_N(prof_prefork) #define prof_promote JEMALLOC_N(prof_promote) #define prof_realloc JEMALLOC_N(prof_realloc) #define prof_sample_accum_update JEMALLOC_N(prof_sample_accum_update) @@ -264,6 +275,7 @@ #define prof_tdata_init JEMALLOC_N(prof_tdata_init) #define prof_tdata_initialized JEMALLOC_N(prof_tdata_initialized) #define prof_tdata_tls JEMALLOC_N(prof_tdata_tls) +#define prof_tdata_tsd JEMALLOC_N(prof_tdata_tsd) #define prof_tdata_tsd_boot JEMALLOC_N(prof_tdata_tsd_boot) #define prof_tdata_tsd_cleanup_wrapper JEMALLOC_N(prof_tdata_tsd_cleanup_wrapper) #define prof_tdata_tsd_get JEMALLOC_N(prof_tdata_tsd_get) @@ -278,9 +290,13 @@ #define rtree_get JEMALLOC_N(rtree_get) #define rtree_get_locked JEMALLOC_N(rtree_get_locked) #define rtree_new JEMALLOC_N(rtree_new) +#define rtree_postfork_child JEMALLOC_N(rtree_postfork_child) +#define rtree_postfork_parent JEMALLOC_N(rtree_postfork_parent) +#define rtree_prefork JEMALLOC_N(rtree_prefork) #define rtree_set JEMALLOC_N(rtree_set) #define s2u JEMALLOC_N(s2u) #define sa2u JEMALLOC_N(sa2u) +#define set_errno JEMALLOC_N(set_errno) #define stats_arenas_i_bins_j_index JEMALLOC_N(stats_arenas_i_bins_j_index) #define stats_arenas_i_index JEMALLOC_N(stats_arenas_i_index) #define stats_arenas_i_lruns_j_index JEMALLOC_N(stats_arenas_i_lruns_j_index) @@ -311,6 +327,7 @@ #define tcache_enabled_initialized JEMALLOC_N(tcache_enabled_initialized) #define tcache_enabled_set JEMALLOC_N(tcache_enabled_set) #define tcache_enabled_tls JEMALLOC_N(tcache_enabled_tls) +#define tcache_enabled_tsd JEMALLOC_N(tcache_enabled_tsd) #define tcache_enabled_tsd_boot JEMALLOC_N(tcache_enabled_tsd_boot) #define tcache_enabled_tsd_cleanup_wrapper JEMALLOC_N(tcache_enabled_tsd_cleanup_wrapper) #define tcache_enabled_tsd_get JEMALLOC_N(tcache_enabled_tsd_get) @@ -325,6 +342,7 @@ #define tcache_stats_merge JEMALLOC_N(tcache_stats_merge) #define tcache_thread_cleanup JEMALLOC_N(tcache_thread_cleanup) #define tcache_tls JEMALLOC_N(tcache_tls) +#define tcache_tsd JEMALLOC_N(tcache_tsd) #define tcache_tsd_boot JEMALLOC_N(tcache_tsd_boot) #define tcache_tsd_cleanup_wrapper JEMALLOC_N(tcache_tsd_cleanup_wrapper) #define tcache_tsd_get JEMALLOC_N(tcache_tsd_get) @@ -332,6 +350,7 @@ #define thread_allocated_booted JEMALLOC_N(thread_allocated_booted) #define thread_allocated_initialized JEMALLOC_N(thread_allocated_initialized) #define thread_allocated_tls JEMALLOC_N(thread_allocated_tls) +#define thread_allocated_tsd JEMALLOC_N(thread_allocated_tsd) #define thread_allocated_tsd_boot JEMALLOC_N(thread_allocated_tsd_boot) #define thread_allocated_tsd_cleanup_wrapper JEMALLOC_N(thread_allocated_tsd_cleanup_wrapper) #define thread_allocated_tsd_get JEMALLOC_N(thread_allocated_tsd_get) diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index 6bed90b9..47f22ad2 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -223,6 +223,9 @@ void prof_tdata_cleanup(void *arg); void prof_boot0(void); void prof_boot1(void); bool prof_boot2(void); +void prof_prefork(void); +void prof_postfork_parent(void); +void prof_postfork_child(void); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h index 95d6355a..9bd98548 100644 --- a/include/jemalloc/internal/rtree.h +++ b/include/jemalloc/internal/rtree.h @@ -36,6 +36,9 @@ struct rtree_s { #ifdef JEMALLOC_H_EXTERNS rtree_t *rtree_new(unsigned bits); +void rtree_prefork(rtree_t *rtree); +void rtree_postfork_parent(rtree_t *rtree); +void rtree_postfork_child(rtree_t *rtree); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ diff --git a/src/chunk.c b/src/chunk.c index b43f9507..1730452f 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -318,3 +318,33 @@ chunk_boot(void) return (false); } + +void +chunk_prefork(void) +{ + + malloc_mutex_lock(&chunks_mtx); + if (config_ivsalloc) + rtree_prefork(chunks_rtree); + chunk_dss_prefork(); +} + +void +chunk_postfork_parent(void) +{ + + chunk_dss_postfork_parent(); + if (config_ivsalloc) + rtree_postfork_parent(chunks_rtree); + malloc_mutex_postfork_parent(&chunks_mtx); +} + +void +chunk_postfork_child(void) +{ + + chunk_dss_postfork_child(); + if (config_ivsalloc) + rtree_postfork_child(chunks_rtree); + malloc_mutex_postfork_child(&chunks_mtx); +} diff --git a/src/ctl.c b/src/ctl.c index 5be066a2..dec98832 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -827,6 +827,27 @@ ctl_boot(void) return (false); } +void +ctl_prefork(void) +{ + + malloc_mutex_lock(&ctl_mtx); +} + +void +ctl_postfork_parent(void) +{ + + malloc_mutex_postfork_parent(&ctl_mtx); +} + +void +ctl_postfork_child(void) +{ + + malloc_mutex_postfork_child(&ctl_mtx); +} + /******************************************************************************/ /* *_ctl() functions. */ diff --git a/src/jemalloc.c b/src/jemalloc.c index 7fa07449..4ea1f759 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1614,6 +1614,27 @@ je_nallocm(size_t *rsize, size_t size, int flags) * malloc during fork(). */ +/* + * If an application creates a thread before doing any allocation in the main + * thread, then calls fork(2) in the main thread followed by memory allocation + * in the child process, a race can occur that results in deadlock within the + * child: the main thread may have forked while the created thread had + * partially initialized the allocator. Ordinarily jemalloc prevents + * fork/malloc races via the following functions it registers during + * initialization using pthread_atfork(), but of course that does no good if + * the allocator isn't fully initialized at fork time. The following library + * constructor is a partial solution to this problem. It may still possible to + * trigger the deadlock described above, but doing so would involve forking via + * a library constructor that runs before jemalloc's runs. + */ +JEMALLOC_ATTR(constructor) +static void +jemalloc_constructor(void) +{ + + malloc_init(); +} + #ifndef JEMALLOC_MUTEX_INIT_CB void jemalloc_prefork(void) @@ -1631,14 +1652,16 @@ _malloc_prefork(void) assert(malloc_initialized); /* Acquire all mutexes in a safe order. */ + ctl_prefork(); malloc_mutex_prefork(&arenas_lock); for (i = 0; i < narenas; i++) { if (arenas[i] != NULL) arena_prefork(arenas[i]); } + prof_prefork(); base_prefork(); huge_prefork(); - chunk_dss_prefork(); + chunk_prefork(); } #ifndef JEMALLOC_MUTEX_INIT_CB @@ -1658,14 +1681,16 @@ _malloc_postfork(void) assert(malloc_initialized); /* Release all mutexes, now that fork() has completed. */ - chunk_dss_postfork_parent(); + chunk_postfork_parent(); huge_postfork_parent(); base_postfork_parent(); + prof_postfork_parent(); for (i = 0; i < narenas; i++) { if (arenas[i] != NULL) arena_postfork_parent(arenas[i]); } malloc_mutex_postfork_parent(&arenas_lock); + ctl_postfork_parent(); } void @@ -1676,14 +1701,16 @@ jemalloc_postfork_child(void) assert(malloc_initialized); /* Release all mutexes, now that fork() has completed. */ - chunk_dss_postfork_child(); + chunk_postfork_child(); huge_postfork_child(); base_postfork_child(); + prof_postfork_child(); for (i = 0; i < narenas; i++) { if (arenas[i] != NULL) arena_postfork_child(arenas[i]); } malloc_mutex_postfork_child(&arenas_lock); + ctl_postfork_child(); } /******************************************************************************/ diff --git a/src/prof.c b/src/prof.c index de1d3929..04964ef7 100644 --- a/src/prof.c +++ b/src/prof.c @@ -1270,4 +1270,46 @@ prof_boot2(void) return (false); } +void +prof_prefork(void) +{ + + if (opt_prof) { + unsigned i; + + malloc_mutex_lock(&bt2ctx_mtx); + malloc_mutex_lock(&prof_dump_seq_mtx); + for (i = 0; i < PROF_NCTX_LOCKS; i++) + malloc_mutex_lock(&ctx_locks[i]); + } +} + +void +prof_postfork_parent(void) +{ + + if (opt_prof) { + unsigned i; + + for (i = 0; i < PROF_NCTX_LOCKS; i++) + malloc_mutex_postfork_parent(&ctx_locks[i]); + malloc_mutex_postfork_parent(&prof_dump_seq_mtx); + malloc_mutex_postfork_parent(&bt2ctx_mtx); + } +} + +void +prof_postfork_child(void) +{ + + if (opt_prof) { + unsigned i; + + for (i = 0; i < PROF_NCTX_LOCKS; i++) + malloc_mutex_postfork_child(&ctx_locks[i]); + malloc_mutex_postfork_child(&prof_dump_seq_mtx); + malloc_mutex_postfork_child(&bt2ctx_mtx); + } +} + /******************************************************************************/ diff --git a/src/rtree.c b/src/rtree.c index eb0ff1e2..90c6935a 100644 --- a/src/rtree.c +++ b/src/rtree.c @@ -44,3 +44,24 @@ rtree_new(unsigned bits) return (ret); } + +void +rtree_prefork(rtree_t *rtree) +{ + + malloc_mutex_prefork(&rtree->mutex); +} + +void +rtree_postfork_parent(rtree_t *rtree) +{ + + malloc_mutex_postfork_parent(&rtree->mutex); +} + +void +rtree_postfork_child(rtree_t *rtree) +{ + + malloc_mutex_postfork_child(&rtree->mutex); +} From b5225928fe106a7d809bd34e849abcd6941e93c7 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 9 Oct 2012 16:16:00 -0700 Subject: [PATCH 0248/3378] Fix fork(2)-related mutex acquisition order. Fix mutex acquisition order inversion for the chunks rtree and the base mutex. Chunks rtree acquisition was introduced by the previous commit, so this bug was short-lived. --- src/jemalloc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/jemalloc.c b/src/jemalloc.c index 4ea1f759..b04da188 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1659,9 +1659,9 @@ _malloc_prefork(void) arena_prefork(arenas[i]); } prof_prefork(); + chunk_prefork(); base_prefork(); huge_prefork(); - chunk_prefork(); } #ifndef JEMALLOC_MUTEX_INIT_CB @@ -1681,9 +1681,9 @@ _malloc_postfork(void) assert(malloc_initialized); /* Release all mutexes, now that fork() has completed. */ - chunk_postfork_parent(); huge_postfork_parent(); base_postfork_parent(); + chunk_postfork_parent(); prof_postfork_parent(); for (i = 0; i < narenas; i++) { if (arenas[i] != NULL) @@ -1701,9 +1701,9 @@ jemalloc_postfork_child(void) assert(malloc_initialized); /* Release all mutexes, now that fork() has completed. */ - chunk_postfork_child(); huge_postfork_child(); base_postfork_child(); + chunk_postfork_child(); prof_postfork_child(); for (i = 0; i < narenas; i++) { if (arenas[i] != NULL) From 247d1248478561c669a85d831e9758089f93a076 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 9 Oct 2012 16:20:10 -0700 Subject: [PATCH 0249/3378] Drop const from malloc_usable_size() argument on Linux. Drop const from malloc_usable_size() argument on Linux, in order to match the prototype in Linux's malloc.h. --- configure.ac | 3 +++ include/jemalloc/jemalloc.h.in | 3 ++- include/jemalloc/jemalloc_defs.h.in | 9 +++++++++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index a72019e5..1c52439e 100644 --- a/configure.ac +++ b/configure.ac @@ -237,6 +237,7 @@ dnl Define cpp macros in CPPFLAGS, rather than doing AC_DEFINE(macro), since the dnl definitions need to be seen before any headers are included, which is a pain dnl to make happen otherwise. default_munmap="1" +JEMALLOC_USABLE_SIZE_CONST="const" case "${host}" in *-*-darwin*) CFLAGS="$CFLAGS" @@ -262,6 +263,7 @@ case "${host}" in abi="elf" AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED], [ ]) AC_DEFINE([JEMALLOC_THREADED_INIT], [ ]) + JEMALLOC_USABLE_SIZE_CONST="" default_munmap="0" ;; *-*-netbsd*) @@ -323,6 +325,7 @@ case "${host}" in abi="elf" ;; esac +AC_DEFINE_UNQUOTED([JEMALLOC_USABLE_SIZE_CONST], [$JEMALLOC_USABLE_SIZE_CONST]) AC_SUBST([abi]) AC_SUBST([RPATH]) AC_SUBST([LD_PRELOAD_VAR]) diff --git a/include/jemalloc/jemalloc.h.in b/include/jemalloc/jemalloc.h.in index ad069485..aeb5d2b8 100644 --- a/include/jemalloc/jemalloc.h.in +++ b/include/jemalloc/jemalloc.h.in @@ -59,7 +59,8 @@ JEMALLOC_EXPORT void * je_memalign(size_t alignment, size_t size) JEMALLOC_EXPORT void * je_valloc(size_t size) JEMALLOC_ATTR(malloc); #endif -JEMALLOC_EXPORT size_t je_malloc_usable_size(const void *ptr); +JEMALLOC_EXPORT size_t je_malloc_usable_size( + JEMALLOC_USABLE_SIZE_CONST void *ptr); JEMALLOC_EXPORT void je_malloc_stats_print(void (*write_cb)(void *, const char *), void *je_cbopaque, const char *opts); JEMALLOC_EXPORT int je_mallctl(const char *name, void *oldp, diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index c469142a..1cd60254 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -221,6 +221,15 @@ #undef JEMALLOC_OVERRIDE_MEMALIGN #undef JEMALLOC_OVERRIDE_VALLOC +/* + * At least Linux omits the "const" in: + * + * size_t malloc_usable_size(const void *ptr); + * + * Match the operating system's prototype. + */ +#undef JEMALLOC_USABLE_SIZE_CONST + /* * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings. */ From 2cc11ff83748be63302b0289a3abb1d86e1e437f Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 9 Oct 2012 16:29:21 -0700 Subject: [PATCH 0250/3378] Make malloc_usable_size() implementation consistent with prototype. Use JEMALLOC_USABLE_SIZE_CONST for the malloc_usable_size() implementation as well as the prototype, for consistency's sake. --- src/jemalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/jemalloc.c b/src/jemalloc.c index b04da188..b2daa306 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1282,7 +1282,7 @@ JEMALLOC_EXPORT void *(* __memalign_hook)(size_t alignment, size_t size) = */ size_t -je_malloc_usable_size(const void *ptr) +je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) { size_t ret; From d0ffd8ed4f6aa4cf7248028eddfcb35f93247fe4 Mon Sep 17 00:00:00 2001 From: Jan Beich Date: Tue, 18 Sep 2012 07:40:31 -0500 Subject: [PATCH 0251/3378] mark _pthread_mutex_init_calloc_cb as public explicitly Mozilla build hides everything by default using visibility pragma and unhides only explicitly listed headers. But this doesn't work on FreeBSD because _pthread_mutex_init_calloc_cb is neither documented nor exposed via any header. --- src/mutex.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mutex.c b/src/mutex.c index 37a843e6..55e18c23 100644 --- a/src/mutex.c +++ b/src/mutex.c @@ -64,7 +64,7 @@ pthread_create(pthread_t *__restrict thread, /******************************************************************************/ #ifdef JEMALLOC_MUTEX_INIT_CB -int _pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex, +JEMALLOC_EXPORT int _pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex, void *(calloc_cb)(size_t, size_t)); #endif From 609ae595f0358157b19311b0f9f9591db7cee705 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 11 Oct 2012 13:53:15 -0700 Subject: [PATCH 0252/3378] Add arena-specific and selective dss allocation. Add the "arenas.extend" mallctl, so that it is possible to create new arenas that are outside the set that jemalloc automatically multiplexes threads onto. Add the ALLOCM_ARENA() flag for {,r,d}allocm(), so that it is possible to explicitly allocate from a particular arena. Add the "opt.dss" mallctl, which controls the default precedence of dss allocation relative to mmap allocation. Add the "arena..dss" mallctl, which makes it possible to set the default dss precedence on a per arena or global basis. Add the "arena..purge" mallctl, which obsoletes "arenas.purge". Add the "stats.arenas..dss" mallctl. --- .gitignore | 2 +- Makefile.in | 6 +- doc/jemalloc.xml.in | 89 ++++- include/jemalloc/internal/arena.h | 15 +- include/jemalloc/internal/chunk.h | 5 +- include/jemalloc/internal/chunk_dss.h | 14 + include/jemalloc/internal/ctl.h | 2 + include/jemalloc/internal/huge.h | 2 +- .../jemalloc/internal/jemalloc_internal.h.in | 127 +++++-- include/jemalloc/internal/private_namespace.h | 21 +- include/jemalloc/jemalloc.h.in | 2 + src/arena.c | 134 ++++--- src/base.c | 3 +- src/chunk.c | 127 ++++--- src/chunk_dss.c | 37 +- src/ctl.c | 344 +++++++++++++++--- src/huge.c | 7 +- src/jemalloc.c | 130 +++++-- src/stats.c | 10 +- src/tcache.c | 4 +- test/ALLOCM_ARENA.c | 66 ++++ test/ALLOCM_ARENA.exp | 2 + test/thread_arena.c | 8 +- 23 files changed, 911 insertions(+), 246 deletions(-) create mode 100644 test/ALLOCM_ARENA.c create mode 100644 test/ALLOCM_ARENA.exp diff --git a/.gitignore b/.gitignore index e6e8bb00..6607a5fd 100644 --- a/.gitignore +++ b/.gitignore @@ -18,7 +18,7 @@ /src/*.[od] /test/*.[od] /test/*.out -/test/[a-z]* +/test/[a-zA-Z_]* !test/*.c !test/*.exp /VERSION diff --git a/Makefile.in b/Makefile.in index 6675b596..36448189 100644 --- a/Makefile.in +++ b/Makefile.in @@ -101,9 +101,9 @@ DOCS_HTML := $(DOCS_XML:$(objroot)%.xml=$(srcroot)%.html) DOCS_MAN3 := $(DOCS_XML:$(objroot)%.xml=$(srcroot)%.3) DOCS := $(DOCS_HTML) $(DOCS_MAN3) CTESTS := $(srcroot)test/aligned_alloc.c $(srcroot)test/allocated.c \ - $(srcroot)test/bitmap.c $(srcroot)test/mremap.c \ - $(srcroot)test/posix_memalign.c $(srcroot)test/thread_arena.c \ - $(srcroot)test/thread_tcache_enabled.c + $(srcroot)test/ALLOCM_ARENA.c $(srcroot)test/bitmap.c \ + $(srcroot)test/mremap.c $(srcroot)test/posix_memalign.c \ + $(srcroot)test/thread_arena.c $(srcroot)test/thread_tcache_enabled.c ifeq ($(enable_experimental), 1) CTESTS += $(srcroot)test/allocm.c $(srcroot)test/rallocm.c endif diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index b34467be..441c1a4a 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -368,6 +368,15 @@ for (i = 0; i < nbins; i++) { object. This constraint can apply to both growth and shrinkage. + + ALLOCM_ARENA(a) + + + Use the arena specified by the index + a. This macro does not validate that + a specifies an arena in the valid + range. + @@ -785,15 +794,29 @@ for (i = 0; i < nbins; i++) { chunk size is 4 MiB (2^22). + + + opt.dss + (const char *) + r- + + dss (sbrk + 2) allocation precedence as + related to mmap + 2 allocation. The following + settings are supported: “disabled”, “primary”, + and “secondary” (default). + + opt.narenas (size_t) r- - Maximum number of arenas to use. The default maximum - number of arenas is four times the number of CPUs, or one if there is a - single CPU. + Maximum number of arenas to use for automatic + multiplexing of threads and arenas. The default is four times the + number of CPUs, or one if there is a single CPU. @@ -1149,11 +1172,8 @@ malloc_conf = "xmalloc:true";]]> rw Get or set the arena associated with the calling - thread. The arena index must be less than the maximum number of arenas - (see the arenas.narenas - mallctl). If the specified arena was not initialized beforehand (see - the arenas.initialized mallctl), it will be automatically initialized as a side effect of calling this interface. @@ -1245,13 +1265,40 @@ malloc_conf = "xmalloc:true";]]> the developer may find manual flushing useful. + + + arena.<i>.purge + (unsigned) + -- + + Purge unused dirty pages for arena <i>, or for + all arenas if <i> equals arenas.narenas. + + + + + + arena.<i>.dss + (const char *) + rw + + Set the precedence of dss allocation as related to mmap + allocation for arena <i>, or for all arenas if <i> equals + arenas.narenas. See + opt.dss for supported + settings. + + + arenas.narenas (unsigned) r- - Maximum number of arenas. + Current limit on number of arenas. @@ -1370,6 +1417,16 @@ malloc_conf = "xmalloc:true";]]> for all arenas if none is specified. + + + arenas.extend + (unsigned) + r- + + Extend the array of arenas by appending a new arena, + and returning the new arena index. + + prof.active @@ -1538,6 +1595,20 @@ malloc_conf = "xmalloc:true";]]> + + + stats.arenas.<i>.dss + (const char *) + r- + + dss (sbrk + 2) allocation precedence as + related to mmap + 2 allocation. See opt.dss for details. + + + stats.arenas.<i>.nthreads diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 0b0f640a..49213e32 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -331,6 +331,8 @@ struct arena_s { uint64_t prof_accumbytes; + dss_prec_t dss_prec; + /* List of dirty-page-containing chunks this arena manages. */ ql_head(arena_chunk_t) chunks_dirty; @@ -422,13 +424,16 @@ void arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr, void arena_dalloc_large_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr); void arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr); -void arena_stats_merge(arena_t *arena, size_t *nactive, size_t *ndirty, - arena_stats_t *astats, malloc_bin_stats_t *bstats, - malloc_large_stats_t *lstats); void *arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, bool zero); -void *arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, - size_t alignment, bool zero, bool try_tcache); +void *arena_ralloc(arena_t *arena, void *ptr, size_t oldsize, size_t size, + size_t extra, size_t alignment, bool zero, bool try_tcache_alloc, + bool try_tcache_dalloc); +dss_prec_t arena_dss_prec_get(arena_t *arena); +void arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec); +void arena_stats_merge(arena_t *arena, const char **dss, size_t *nactive, + size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats, + malloc_large_stats_t *lstats); bool arena_new(arena_t *arena, unsigned ind); void arena_boot(void); void arena_prefork(arena_t *arena); diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h index c3c3e9cd..87d8700d 100644 --- a/include/jemalloc/internal/chunk.h +++ b/include/jemalloc/internal/chunk.h @@ -28,6 +28,7 @@ #ifdef JEMALLOC_H_EXTERNS extern size_t opt_lg_chunk; +extern const char *opt_dss; /* Protects stats_chunks; currently not used for any other purpose. */ extern malloc_mutex_t chunks_mtx; @@ -42,7 +43,9 @@ extern size_t chunk_npages; extern size_t map_bias; /* Number of arena chunk header pages. */ extern size_t arena_maxclass; /* Max size class for arenas. */ -void *chunk_alloc(size_t size, size_t alignment, bool base, bool *zero); +void *chunk_alloc(size_t size, size_t alignment, bool base, bool *zero, + dss_prec_t dss_prec); +void chunk_unmap(void *chunk, size_t size); void chunk_dealloc(void *chunk, size_t size, bool unmap); bool chunk_boot(void); void chunk_prefork(void); diff --git a/include/jemalloc/internal/chunk_dss.h b/include/jemalloc/internal/chunk_dss.h index 6e2643b2..6585f071 100644 --- a/include/jemalloc/internal/chunk_dss.h +++ b/include/jemalloc/internal/chunk_dss.h @@ -1,14 +1,28 @@ /******************************************************************************/ #ifdef JEMALLOC_H_TYPES +typedef enum { + dss_prec_disabled = 0, + dss_prec_primary = 1, + dss_prec_secondary = 2, + + dss_prec_limit = 3 +} dss_prec_t ; +#define DSS_PREC_DEFAULT dss_prec_secondary +#define DSS_DEFAULT "secondary" + #endif /* JEMALLOC_H_TYPES */ /******************************************************************************/ #ifdef JEMALLOC_H_STRUCTS +extern const char *dss_prec_names[]; + #endif /* JEMALLOC_H_STRUCTS */ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS +dss_prec_t chunk_dss_prec_get(void); +bool chunk_dss_prec_set(dss_prec_t dss_prec); void *chunk_alloc_dss(size_t size, size_t alignment, bool *zero); bool chunk_in_dss(void *chunk); bool chunk_dss_boot(void); diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h index 1d0c76a0..0ffecc5f 100644 --- a/include/jemalloc/internal/ctl.h +++ b/include/jemalloc/internal/ctl.h @@ -33,6 +33,7 @@ struct ctl_indexed_node_s { struct ctl_arena_stats_s { bool initialized; unsigned nthreads; + const char *dss; size_t pactive; size_t pdirty; arena_stats_t astats; @@ -61,6 +62,7 @@ struct ctl_stats_s { uint64_t nmalloc; /* huge_nmalloc */ uint64_t ndalloc; /* huge_ndalloc */ } huge; + unsigned narenas; ctl_arena_stats_t *arenas; /* (narenas + 1) elements. */ }; diff --git a/include/jemalloc/internal/huge.h b/include/jemalloc/internal/huge.h index e8513c93..d987d370 100644 --- a/include/jemalloc/internal/huge.h +++ b/include/jemalloc/internal/huge.h @@ -22,7 +22,7 @@ void *huge_palloc(size_t size, size_t alignment, bool zero); void *huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra); void *huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, - size_t alignment, bool zero); + size_t alignment, bool zero, bool try_tcache_dalloc); void huge_dalloc(void *ptr, bool unmap); size_t huge_salloc(const void *ptr); prof_ctx_t *huge_prof_ctx_get(const void *ptr); diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index b5b09e28..475821ac 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -514,13 +514,19 @@ extern size_t opt_narenas; /* Number of CPUs. */ extern unsigned ncpus; -extern malloc_mutex_t arenas_lock; /* Protects arenas initialization. */ +/* Protects arenas initialization (arenas, arenas_total). */ +extern malloc_mutex_t arenas_lock; /* * Arenas that are used to service external requests. Not all elements of the * arenas array are necessarily used; arenas are created lazily as needed. + * + * arenas[0..narenas_auto) are used for automatic multiplexing of threads and + * arenas. arenas[narenas_auto..narenas_total) are only used if the application + * takes some action to create them and allocate from them. */ extern arena_t **arenas; -extern unsigned narenas; +extern unsigned narenas_total; +extern unsigned narenas_auto; /* Read-only after initialization. */ arena_t *arenas_extend(unsigned ind); void arenas_cleanup(void *arg); @@ -575,6 +581,7 @@ malloc_tsd_protos(JEMALLOC_ATTR(unused), arenas, arena_t *) size_t s2u(size_t size); size_t sa2u(size_t size, size_t alignment); +unsigned narenas_total_get(void); arena_t *choose_arena(arena_t *arena); #endif @@ -679,6 +686,18 @@ sa2u(size_t size, size_t alignment) } } +JEMALLOC_INLINE unsigned +narenas_total_get(void) +{ + unsigned narenas; + + malloc_mutex_lock(&arenas_lock); + narenas = narenas_total; + malloc_mutex_unlock(&arenas_lock); + + return (narenas); +} + /* Choose an arena based on a per-thread value. */ JEMALLOC_INLINE arena_t * choose_arena(arena_t *arena) @@ -714,15 +733,24 @@ choose_arena(arena_t *arena) #include "jemalloc/internal/quarantine.h" #ifndef JEMALLOC_ENABLE_INLINE +void *imallocx(size_t size, bool try_tcache, arena_t *arena); void *imalloc(size_t size); +void *icallocx(size_t size, bool try_tcache, arena_t *arena); void *icalloc(size_t size); +void *ipallocx(size_t usize, size_t alignment, bool zero, bool try_tcache, + arena_t *arena); void *ipalloc(size_t usize, size_t alignment, bool zero); size_t isalloc(const void *ptr, bool demote); size_t ivsalloc(const void *ptr, bool demote); size_t u2rz(size_t usize); size_t p2rz(const void *ptr); +void idallocx(void *ptr, bool try_tcache); void idalloc(void *ptr); +void iqallocx(void *ptr, bool try_tcache); void iqalloc(void *ptr); +void *irallocx(void *ptr, size_t size, size_t extra, size_t alignment, + bool zero, bool no_move, bool try_tcache_alloc, bool try_tcache_dalloc, + arena_t *arena); void *iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, bool no_move); malloc_tsd_protos(JEMALLOC_ATTR(unused), thread_allocated, thread_allocated_t) @@ -730,29 +758,44 @@ malloc_tsd_protos(JEMALLOC_ATTR(unused), thread_allocated, thread_allocated_t) #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) JEMALLOC_INLINE void * -imalloc(size_t size) +imallocx(size_t size, bool try_tcache, arena_t *arena) { assert(size != 0); if (size <= arena_maxclass) - return (arena_malloc(NULL, size, false, true)); + return (arena_malloc(arena, size, false, try_tcache)); else return (huge_malloc(size, false)); } +JEMALLOC_INLINE void * +imalloc(size_t size) +{ + + return (imallocx(size, true, NULL)); +} + +JEMALLOC_INLINE void * +icallocx(size_t size, bool try_tcache, arena_t *arena) +{ + + if (size <= arena_maxclass) + return (arena_malloc(arena, size, true, try_tcache)); + else + return (huge_malloc(size, true)); +} + JEMALLOC_INLINE void * icalloc(size_t size) { - if (size <= arena_maxclass) - return (arena_malloc(NULL, size, true, true)); - else - return (huge_malloc(size, true)); + return (icallocx(size, true, NULL)); } JEMALLOC_INLINE void * -ipalloc(size_t usize, size_t alignment, bool zero) +ipallocx(size_t usize, size_t alignment, bool zero, bool try_tcache, + arena_t *arena) { void *ret; @@ -760,11 +803,11 @@ ipalloc(size_t usize, size_t alignment, bool zero) assert(usize == sa2u(usize, alignment)); if (usize <= arena_maxclass && alignment <= PAGE) - ret = arena_malloc(NULL, usize, zero, true); + ret = arena_malloc(arena, usize, zero, try_tcache); else { if (usize <= arena_maxclass) { - ret = arena_palloc(choose_arena(NULL), usize, alignment, - zero); + ret = arena_palloc(choose_arena(arena), usize, + alignment, zero); } else if (alignment <= chunksize) ret = huge_malloc(usize, zero); else @@ -775,6 +818,13 @@ ipalloc(size_t usize, size_t alignment, bool zero) return (ret); } +JEMALLOC_INLINE void * +ipalloc(size_t usize, size_t alignment, bool zero) +{ + + return (ipallocx(usize, alignment, zero, true, NULL)); +} + /* * Typical usage: * void *ptr = [...] @@ -833,7 +883,7 @@ p2rz(const void *ptr) } JEMALLOC_INLINE void -idalloc(void *ptr) +idallocx(void *ptr, bool try_tcache) { arena_chunk_t *chunk; @@ -841,24 +891,38 @@ idalloc(void *ptr) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk != ptr) - arena_dalloc(chunk->arena, chunk, ptr, true); + arena_dalloc(chunk->arena, chunk, ptr, try_tcache); else huge_dalloc(ptr, true); } +JEMALLOC_INLINE void +idalloc(void *ptr) +{ + + idallocx(ptr, true); +} + +JEMALLOC_INLINE void +iqallocx(void *ptr, bool try_tcache) +{ + + if (config_fill && opt_quarantine) + quarantine(ptr); + else + idallocx(ptr, try_tcache); +} + JEMALLOC_INLINE void iqalloc(void *ptr) { - if (config_fill && opt_quarantine) - quarantine(ptr); - else - idalloc(ptr); + iqallocx(ptr, true); } JEMALLOC_INLINE void * -iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, - bool no_move) +irallocx(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, + bool no_move, bool try_tcache_alloc, bool try_tcache_dalloc, arena_t *arena) { void *ret; size_t oldsize; @@ -881,7 +945,7 @@ iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, usize = sa2u(size + extra, alignment); if (usize == 0) return (NULL); - ret = ipalloc(usize, alignment, zero); + ret = ipallocx(usize, alignment, zero, try_tcache_alloc, arena); if (ret == NULL) { if (extra == 0) return (NULL); @@ -889,7 +953,8 @@ iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, usize = sa2u(size, alignment); if (usize == 0) return (NULL); - ret = ipalloc(usize, alignment, zero); + ret = ipallocx(usize, alignment, zero, try_tcache_alloc, + arena); if (ret == NULL) return (NULL); } @@ -900,7 +965,7 @@ iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, */ copysize = (size < oldsize) ? size : oldsize; memcpy(ret, ptr, copysize); - iqalloc(ptr); + iqallocx(ptr, try_tcache_dalloc); return (ret); } @@ -914,15 +979,25 @@ iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, } } else { if (size + extra <= arena_maxclass) { - return (arena_ralloc(ptr, oldsize, size, extra, - alignment, zero, true)); + return (arena_ralloc(arena, ptr, oldsize, size, extra, + alignment, zero, try_tcache_alloc, + try_tcache_dalloc)); } else { return (huge_ralloc(ptr, oldsize, size, extra, - alignment, zero)); + alignment, zero, try_tcache_dalloc)); } } } +JEMALLOC_INLINE void * +iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, + bool no_move) +{ + + return (irallocx(ptr, size, extra, alignment, zero, no_move, true, true, + NULL)); +} + malloc_tsd_externs(thread_allocated, thread_allocated_t) malloc_tsd_funcs(JEMALLOC_INLINE, thread_allocated, thread_allocated_t, THREAD_ALLOCATED_INITIALIZER, malloc_tsd_no_cleanup) diff --git a/include/jemalloc/internal/private_namespace.h b/include/jemalloc/internal/private_namespace.h index 28686dce..06241cd2 100644 --- a/include/jemalloc/internal/private_namespace.h +++ b/include/jemalloc/internal/private_namespace.h @@ -12,6 +12,8 @@ #define arena_dalloc_large JEMALLOC_N(arena_dalloc_large) #define arena_dalloc_large_locked JEMALLOC_N(arena_dalloc_large_locked) #define arena_dalloc_small JEMALLOC_N(arena_dalloc_small) +#define arena_dss_prec_get JEMALLOC_N(arena_dss_prec_get) +#define arena_dss_prec_set JEMALLOC_N(arena_dss_prec_set) #define arena_malloc JEMALLOC_N(arena_malloc) #define arena_malloc_large JEMALLOC_N(arena_malloc_large) #define arena_malloc_small JEMALLOC_N(arena_malloc_small) @@ -51,13 +53,11 @@ #define arena_stats_merge JEMALLOC_N(arena_stats_merge) #define arena_tcache_fill_small JEMALLOC_N(arena_tcache_fill_small) #define arenas JEMALLOC_N(arenas) -#define arenas_bin_i_index JEMALLOC_N(arenas_bin_i_index) #define arenas_booted JEMALLOC_N(arenas_booted) #define arenas_cleanup JEMALLOC_N(arenas_cleanup) #define arenas_extend JEMALLOC_N(arenas_extend) #define arenas_initialized JEMALLOC_N(arenas_initialized) #define arenas_lock JEMALLOC_N(arenas_lock) -#define arenas_lrun_i_index JEMALLOC_N(arenas_lrun_i_index) #define arenas_tls JEMALLOC_N(arenas_tls) #define arenas_tsd JEMALLOC_N(arenas_tsd) #define arenas_tsd_boot JEMALLOC_N(arenas_tsd_boot) @@ -102,12 +102,15 @@ #define chunk_dss_boot JEMALLOC_N(chunk_dss_boot) #define chunk_dss_postfork_child JEMALLOC_N(chunk_dss_postfork_child) #define chunk_dss_postfork_parent JEMALLOC_N(chunk_dss_postfork_parent) +#define chunk_dss_prec_get JEMALLOC_N(chunk_dss_prec_get) +#define chunk_dss_prec_set JEMALLOC_N(chunk_dss_prec_set) #define chunk_dss_prefork JEMALLOC_N(chunk_dss_prefork) #define chunk_in_dss JEMALLOC_N(chunk_in_dss) #define chunk_npages JEMALLOC_N(chunk_npages) #define chunk_postfork_child JEMALLOC_N(chunk_postfork_child) #define chunk_postfork_parent JEMALLOC_N(chunk_postfork_parent) #define chunk_prefork JEMALLOC_N(chunk_prefork) +#define chunk_unmap JEMALLOC_N(chunk_unmap) #define chunks_mtx JEMALLOC_N(chunks_mtx) #define chunks_rtree JEMALLOC_N(chunks_rtree) #define chunksize JEMALLOC_N(chunksize) @@ -136,6 +139,7 @@ #define ctl_postfork_child JEMALLOC_N(ctl_postfork_child) #define ctl_postfork_parent JEMALLOC_N(ctl_postfork_parent) #define ctl_prefork JEMALLOC_N(ctl_prefork) +#define dss_prec_names JEMALLOC_N(dss_prec_names) #define extent_tree_ad_first JEMALLOC_N(extent_tree_ad_first) #define extent_tree_ad_insert JEMALLOC_N(extent_tree_ad_insert) #define extent_tree_ad_iter JEMALLOC_N(extent_tree_ad_iter) @@ -188,11 +192,17 @@ #define huge_salloc JEMALLOC_N(huge_salloc) #define iallocm JEMALLOC_N(iallocm) #define icalloc JEMALLOC_N(icalloc) +#define icallocx JEMALLOC_N(icallocx) #define idalloc JEMALLOC_N(idalloc) +#define idallocx JEMALLOC_N(idallocx) #define imalloc JEMALLOC_N(imalloc) +#define imallocx JEMALLOC_N(imallocx) #define ipalloc JEMALLOC_N(ipalloc) +#define ipallocx JEMALLOC_N(ipallocx) #define iqalloc JEMALLOC_N(iqalloc) +#define iqallocx JEMALLOC_N(iqallocx) #define iralloc JEMALLOC_N(iralloc) +#define irallocx JEMALLOC_N(irallocx) #define isalloc JEMALLOC_N(isalloc) #define isthreaded JEMALLOC_N(isthreaded) #define ivsalloc JEMALLOC_N(ivsalloc) @@ -220,7 +230,9 @@ #define map_bias JEMALLOC_N(map_bias) #define mb_write JEMALLOC_N(mb_write) #define mutex_boot JEMALLOC_N(mutex_boot) -#define narenas JEMALLOC_N(narenas) +#define narenas_auto JEMALLOC_N(narenas_auto) +#define narenas_total JEMALLOC_N(narenas_total) +#define narenas_total_get JEMALLOC_N(narenas_total_get) #define ncpus JEMALLOC_N(ncpus) #define nhbins JEMALLOC_N(nhbins) #define opt_abort JEMALLOC_N(opt_abort) @@ -297,9 +309,6 @@ #define s2u JEMALLOC_N(s2u) #define sa2u JEMALLOC_N(sa2u) #define set_errno JEMALLOC_N(set_errno) -#define stats_arenas_i_bins_j_index JEMALLOC_N(stats_arenas_i_bins_j_index) -#define stats_arenas_i_index JEMALLOC_N(stats_arenas_i_index) -#define stats_arenas_i_lruns_j_index JEMALLOC_N(stats_arenas_i_lruns_j_index) #define stats_cactive JEMALLOC_N(stats_cactive) #define stats_cactive_add JEMALLOC_N(stats_cactive_add) #define stats_cactive_get JEMALLOC_N(stats_cactive_get) diff --git a/include/jemalloc/jemalloc.h.in b/include/jemalloc/jemalloc.h.in index aeb5d2b8..31b1304a 100644 --- a/include/jemalloc/jemalloc.h.in +++ b/include/jemalloc/jemalloc.h.in @@ -25,6 +25,8 @@ extern "C" { #endif #define ALLOCM_ZERO ((int)0x40) #define ALLOCM_NO_MOVE ((int)0x80) +/* Bias arena index bits so that 0 encodes "ALLOCM_ARENA() unspecified". */ +#define ALLOCM_ARENA(a) ((int)(((a)+1) << 8)) #define ALLOCM_SUCCESS 0 #define ALLOCM_ERR_OOM 1 diff --git a/src/arena.c b/src/arena.c index 674ffe90..1e6964a8 100644 --- a/src/arena.c +++ b/src/arena.c @@ -372,7 +372,7 @@ arena_chunk_alloc(arena_t *arena) zero = false; malloc_mutex_unlock(&arena->lock); chunk = (arena_chunk_t *)chunk_alloc(chunksize, chunksize, - false, &zero); + false, &zero, arena->dss_prec); malloc_mutex_lock(&arena->lock); if (chunk == NULL) return (NULL); @@ -1619,52 +1619,6 @@ arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr, mapelm = arena_mapp_get(chunk, pageind); arena_dalloc_bin(arena, chunk, ptr, pageind, mapelm); } -void -arena_stats_merge(arena_t *arena, size_t *nactive, size_t *ndirty, - arena_stats_t *astats, malloc_bin_stats_t *bstats, - malloc_large_stats_t *lstats) -{ - unsigned i; - - malloc_mutex_lock(&arena->lock); - *nactive += arena->nactive; - *ndirty += arena->ndirty; - - astats->mapped += arena->stats.mapped; - astats->npurge += arena->stats.npurge; - astats->nmadvise += arena->stats.nmadvise; - astats->purged += arena->stats.purged; - astats->allocated_large += arena->stats.allocated_large; - astats->nmalloc_large += arena->stats.nmalloc_large; - astats->ndalloc_large += arena->stats.ndalloc_large; - astats->nrequests_large += arena->stats.nrequests_large; - - for (i = 0; i < nlclasses; i++) { - lstats[i].nmalloc += arena->stats.lstats[i].nmalloc; - lstats[i].ndalloc += arena->stats.lstats[i].ndalloc; - lstats[i].nrequests += arena->stats.lstats[i].nrequests; - lstats[i].curruns += arena->stats.lstats[i].curruns; - } - malloc_mutex_unlock(&arena->lock); - - for (i = 0; i < NBINS; i++) { - arena_bin_t *bin = &arena->bins[i]; - - malloc_mutex_lock(&bin->lock); - bstats[i].allocated += bin->stats.allocated; - bstats[i].nmalloc += bin->stats.nmalloc; - bstats[i].ndalloc += bin->stats.ndalloc; - bstats[i].nrequests += bin->stats.nrequests; - if (config_tcache) { - bstats[i].nfills += bin->stats.nfills; - bstats[i].nflushes += bin->stats.nflushes; - } - bstats[i].nruns += bin->stats.nruns; - bstats[i].reruns += bin->stats.reruns; - bstats[i].curruns += bin->stats.curruns; - malloc_mutex_unlock(&bin->lock); - } -} void arena_dalloc_large_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr) @@ -1877,8 +1831,9 @@ arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, } void * -arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, - size_t alignment, bool zero, bool try_tcache) +arena_ralloc(arena_t *arena, void *ptr, size_t oldsize, size_t size, + size_t extra, size_t alignment, bool zero, bool try_tcache_alloc, + bool try_tcache_dalloc) { void *ret; size_t copysize; @@ -1897,9 +1852,9 @@ arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, size_t usize = sa2u(size + extra, alignment); if (usize == 0) return (NULL); - ret = ipalloc(usize, alignment, zero); + ret = ipallocx(usize, alignment, zero, try_tcache_alloc, arena); } else - ret = arena_malloc(NULL, size + extra, zero, try_tcache); + ret = arena_malloc(arena, size + extra, zero, try_tcache_alloc); if (ret == NULL) { if (extra == 0) @@ -1909,9 +1864,10 @@ arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, size_t usize = sa2u(size, alignment); if (usize == 0) return (NULL); - ret = ipalloc(usize, alignment, zero); + ret = ipallocx(usize, alignment, zero, try_tcache_alloc, + arena); } else - ret = arena_malloc(NULL, size, zero, try_tcache); + ret = arena_malloc(arena, size, zero, try_tcache_alloc); if (ret == NULL) return (NULL); @@ -1926,10 +1882,78 @@ arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, copysize = (size < oldsize) ? size : oldsize; VALGRIND_MAKE_MEM_UNDEFINED(ret, copysize); memcpy(ret, ptr, copysize); - iqalloc(ptr); + iqallocx(ptr, try_tcache_dalloc); return (ret); } +dss_prec_t +arena_dss_prec_get(arena_t *arena) +{ + dss_prec_t ret; + + malloc_mutex_lock(&arena->lock); + ret = arena->dss_prec; + malloc_mutex_unlock(&arena->lock); + return (ret); +} + +void +arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec) +{ + + malloc_mutex_lock(&arena->lock); + arena->dss_prec = dss_prec; + malloc_mutex_unlock(&arena->lock); +} + +void +arena_stats_merge(arena_t *arena, const char **dss, size_t *nactive, + size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats, + malloc_large_stats_t *lstats) +{ + unsigned i; + + malloc_mutex_lock(&arena->lock); + *dss = dss_prec_names[arena->dss_prec]; + *nactive += arena->nactive; + *ndirty += arena->ndirty; + + astats->mapped += arena->stats.mapped; + astats->npurge += arena->stats.npurge; + astats->nmadvise += arena->stats.nmadvise; + astats->purged += arena->stats.purged; + astats->allocated_large += arena->stats.allocated_large; + astats->nmalloc_large += arena->stats.nmalloc_large; + astats->ndalloc_large += arena->stats.ndalloc_large; + astats->nrequests_large += arena->stats.nrequests_large; + + for (i = 0; i < nlclasses; i++) { + lstats[i].nmalloc += arena->stats.lstats[i].nmalloc; + lstats[i].ndalloc += arena->stats.lstats[i].ndalloc; + lstats[i].nrequests += arena->stats.lstats[i].nrequests; + lstats[i].curruns += arena->stats.lstats[i].curruns; + } + malloc_mutex_unlock(&arena->lock); + + for (i = 0; i < NBINS; i++) { + arena_bin_t *bin = &arena->bins[i]; + + malloc_mutex_lock(&bin->lock); + bstats[i].allocated += bin->stats.allocated; + bstats[i].nmalloc += bin->stats.nmalloc; + bstats[i].ndalloc += bin->stats.ndalloc; + bstats[i].nrequests += bin->stats.nrequests; + if (config_tcache) { + bstats[i].nfills += bin->stats.nfills; + bstats[i].nflushes += bin->stats.nflushes; + } + bstats[i].nruns += bin->stats.nruns; + bstats[i].reruns += bin->stats.reruns; + bstats[i].curruns += bin->stats.curruns; + malloc_mutex_unlock(&bin->lock); + } +} + bool arena_new(arena_t *arena, unsigned ind) { @@ -1958,6 +1982,8 @@ arena_new(arena_t *arena, unsigned ind) if (config_prof) arena->prof_accumbytes = 0; + arena->dss_prec = chunk_dss_prec_get(); + /* Initialize chunks. */ ql_new(&arena->chunks_dirty); arena->spare = NULL; diff --git a/src/base.c b/src/base.c index bafaa743..b1a5945e 100644 --- a/src/base.c +++ b/src/base.c @@ -32,7 +32,8 @@ base_pages_alloc(size_t minsize) assert(minsize != 0); csize = CHUNK_CEILING(minsize); zero = false; - base_pages = chunk_alloc(csize, chunksize, true, &zero); + base_pages = chunk_alloc(csize, chunksize, true, &zero, + chunk_dss_prec_get()); if (base_pages == NULL) return (true); base_next_addr = base_pages; diff --git a/src/chunk.c b/src/chunk.c index 1730452f..5fc9e75c 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -4,7 +4,8 @@ /******************************************************************************/ /* Data. */ -size_t opt_lg_chunk = LG_CHUNK_DEFAULT; +const char *opt_dss = DSS_DEFAULT; +size_t opt_lg_chunk = LG_CHUNK_DEFAULT; malloc_mutex_t chunks_mtx; chunk_stats_t stats_chunks; @@ -15,8 +16,10 @@ chunk_stats_t stats_chunks; * address space. Depending on function, different tree orderings are needed, * which is why there are two trees with the same contents. */ -static extent_tree_t chunks_szad; -static extent_tree_t chunks_ad; +static extent_tree_t chunks_szad_mmap; +static extent_tree_t chunks_ad_mmap; +static extent_tree_t chunks_szad_dss; +static extent_tree_t chunks_ad_dss; rtree_t *chunks_rtree; @@ -30,14 +33,17 @@ size_t arena_maxclass; /* Max size class for arenas. */ /******************************************************************************/ /* Function prototypes for non-inline static functions. */ -static void *chunk_recycle(size_t size, size_t alignment, bool base, +static void *chunk_recycle(extent_tree_t *chunks_szad, + extent_tree_t *chunks_ad, size_t size, size_t alignment, bool base, bool *zero); -static void chunk_record(void *chunk, size_t size); +static void chunk_record(extent_tree_t *chunks_szad, + extent_tree_t *chunks_ad, void *chunk, size_t size); /******************************************************************************/ static void * -chunk_recycle(size_t size, size_t alignment, bool base, bool *zero) +chunk_recycle(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, size_t size, + size_t alignment, bool base, bool *zero) { void *ret; extent_node_t *node; @@ -62,7 +68,7 @@ chunk_recycle(size_t size, size_t alignment, bool base, bool *zero) key.addr = NULL; key.size = alloc_size; malloc_mutex_lock(&chunks_mtx); - node = extent_tree_szad_nsearch(&chunks_szad, &key); + node = extent_tree_szad_nsearch(chunks_szad, &key); if (node == NULL) { malloc_mutex_unlock(&chunks_mtx); return (NULL); @@ -73,13 +79,13 @@ chunk_recycle(size_t size, size_t alignment, bool base, bool *zero) trailsize = node->size - leadsize - size; ret = (void *)((uintptr_t)node->addr + leadsize); /* Remove node from the tree. */ - extent_tree_szad_remove(&chunks_szad, node); - extent_tree_ad_remove(&chunks_ad, node); + extent_tree_szad_remove(chunks_szad, node); + extent_tree_ad_remove(chunks_ad, node); if (leadsize != 0) { /* Insert the leading space as a smaller chunk. */ node->size = leadsize; - extent_tree_szad_insert(&chunks_szad, node); - extent_tree_ad_insert(&chunks_ad, node); + extent_tree_szad_insert(chunks_szad, node); + extent_tree_ad_insert(chunks_ad, node); node = NULL; } if (trailsize != 0) { @@ -102,8 +108,8 @@ chunk_recycle(size_t size, size_t alignment, bool base, bool *zero) } node->addr = (void *)((uintptr_t)(ret) + size); node->size = trailsize; - extent_tree_szad_insert(&chunks_szad, node); - extent_tree_ad_insert(&chunks_ad, node); + extent_tree_szad_insert(chunks_szad, node); + extent_tree_ad_insert(chunks_ad, node); node = NULL; } malloc_mutex_unlock(&chunks_mtx); @@ -130,7 +136,8 @@ chunk_recycle(size_t size, size_t alignment, bool base, bool *zero) * advantage of them if they are returned. */ void * -chunk_alloc(size_t size, size_t alignment, bool base, bool *zero) +chunk_alloc(size_t size, size_t alignment, bool base, bool *zero, + dss_prec_t dss_prec) { void *ret; @@ -139,19 +146,40 @@ chunk_alloc(size_t size, size_t alignment, bool base, bool *zero) assert(alignment != 0); assert((alignment & chunksize_mask) == 0); - ret = chunk_recycle(size, alignment, base, zero); - if (ret != NULL) + /* + * Try to recycle an existing mapping. + */ + + /* "primary" dss. */ + if (config_dss && dss_prec == dss_prec_primary && (ret = + chunk_recycle(&chunks_szad_dss, &chunks_ad_dss, size, alignment, + base, zero)) != NULL) + goto label_return; + /* mmap. */ + if ((ret = chunk_recycle(&chunks_szad_mmap, &chunks_ad_mmap, size, + alignment, base, zero)) != NULL) + goto label_return; + /* "secondary" dss. */ + if (config_dss && dss_prec == dss_prec_secondary && (ret = + chunk_recycle(&chunks_szad_dss, &chunks_ad_dss, size, alignment, + base, zero)) != NULL) goto label_return; - ret = chunk_alloc_mmap(size, alignment, zero); - if (ret != NULL) - goto label_return; + /* + * Try to allocate a new mapping. + */ - if (config_dss) { - ret = chunk_alloc_dss(size, alignment, zero); - if (ret != NULL) - goto label_return; - } + /* "primary" dss. */ + if (config_dss && dss_prec == dss_prec_primary && (ret = + chunk_alloc_dss(size, alignment, zero)) != NULL) + goto label_return; + /* mmap. */ + if ((ret = chunk_alloc_mmap(size, alignment, zero)) != NULL) + goto label_return; + /* "secondary" dss. */ + if (config_dss && dss_prec == dss_prec_secondary && (ret = + chunk_alloc_dss(size, alignment, zero)) != NULL) + goto label_return; /* All strategies for allocation failed. */ ret = NULL; @@ -191,7 +219,8 @@ label_return: } static void -chunk_record(void *chunk, size_t size) +chunk_record(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, void *chunk, + size_t size) { bool unzeroed; extent_node_t *xnode, *node, *prev, key; @@ -208,7 +237,7 @@ chunk_record(void *chunk, size_t size) malloc_mutex_lock(&chunks_mtx); key.addr = (void *)((uintptr_t)chunk + size); - node = extent_tree_ad_nsearch(&chunks_ad, &key); + node = extent_tree_ad_nsearch(chunks_ad, &key); /* Try to coalesce forward. */ if (node != NULL && node->addr == key.addr) { /* @@ -216,11 +245,11 @@ chunk_record(void *chunk, size_t size) * not change the position within chunks_ad, so only * remove/insert from/into chunks_szad. */ - extent_tree_szad_remove(&chunks_szad, node); + extent_tree_szad_remove(chunks_szad, node); node->addr = chunk; node->size += size; node->zeroed = (node->zeroed && (unzeroed == false)); - extent_tree_szad_insert(&chunks_szad, node); + extent_tree_szad_insert(chunks_szad, node); if (xnode != NULL) base_node_dealloc(xnode); } else { @@ -239,12 +268,12 @@ chunk_record(void *chunk, size_t size) node->addr = chunk; node->size = size; node->zeroed = (unzeroed == false); - extent_tree_ad_insert(&chunks_ad, node); - extent_tree_szad_insert(&chunks_szad, node); + extent_tree_ad_insert(chunks_ad, node); + extent_tree_szad_insert(chunks_szad, node); } /* Try to coalesce backward. */ - prev = extent_tree_ad_prev(&chunks_ad, node); + prev = extent_tree_ad_prev(chunks_ad, node); if (prev != NULL && (void *)((uintptr_t)prev->addr + prev->size) == chunk) { /* @@ -252,20 +281,34 @@ chunk_record(void *chunk, size_t size) * not change the position within chunks_ad, so only * remove/insert node from/into chunks_szad. */ - extent_tree_szad_remove(&chunks_szad, prev); - extent_tree_ad_remove(&chunks_ad, prev); + extent_tree_szad_remove(chunks_szad, prev); + extent_tree_ad_remove(chunks_ad, prev); - extent_tree_szad_remove(&chunks_szad, node); + extent_tree_szad_remove(chunks_szad, node); node->addr = prev->addr; node->size += prev->size; node->zeroed = (node->zeroed && prev->zeroed); - extent_tree_szad_insert(&chunks_szad, node); + extent_tree_szad_insert(chunks_szad, node); base_node_dealloc(prev); } malloc_mutex_unlock(&chunks_mtx); } +void +chunk_unmap(void *chunk, size_t size) +{ + assert(chunk != NULL); + assert(CHUNK_ADDR2BASE(chunk) == chunk); + assert(size != 0); + assert((size & chunksize_mask) == 0); + + if (config_dss && chunk_in_dss(chunk)) + chunk_record(&chunks_szad_dss, &chunks_ad_dss, chunk, size); + else if (chunk_dealloc_mmap(chunk, size)) + chunk_record(&chunks_szad_mmap, &chunks_ad_mmap, chunk, size); +} + void chunk_dealloc(void *chunk, size_t size, bool unmap) { @@ -279,15 +322,13 @@ chunk_dealloc(void *chunk, size_t size, bool unmap) rtree_set(chunks_rtree, (uintptr_t)chunk, NULL); if (config_stats || config_prof) { malloc_mutex_lock(&chunks_mtx); + assert(stats_chunks.curchunks >= (size / chunksize)); stats_chunks.curchunks -= (size / chunksize); malloc_mutex_unlock(&chunks_mtx); } - if (unmap) { - if ((config_dss && chunk_in_dss(chunk)) || - chunk_dealloc_mmap(chunk, size)) - chunk_record(chunk, size); - } + if (unmap) + chunk_unmap(chunk, size); } bool @@ -307,8 +348,10 @@ chunk_boot(void) } if (config_dss && chunk_dss_boot()) return (true); - extent_tree_szad_new(&chunks_szad); - extent_tree_ad_new(&chunks_ad); + extent_tree_szad_new(&chunks_szad_mmap); + extent_tree_ad_new(&chunks_ad_mmap); + extent_tree_szad_new(&chunks_szad_dss); + extent_tree_ad_new(&chunks_ad_dss); if (config_ivsalloc) { chunks_rtree = rtree_new((ZU(1) << (LG_SIZEOF_PTR+3)) - opt_lg_chunk); diff --git a/src/chunk_dss.c b/src/chunk_dss.c index 2d68e480..24781cc5 100644 --- a/src/chunk_dss.c +++ b/src/chunk_dss.c @@ -3,6 +3,16 @@ /******************************************************************************/ /* Data. */ +const char *dss_prec_names[] = { + "disabled", + "primary", + "secondary", + "N/A" +}; + +/* Current dss precedence default, used when creating new arenas. */ +static dss_prec_t dss_prec_default = DSS_PREC_DEFAULT; + /* * Protects sbrk() calls. This avoids malloc races among threads, though it * does not protect against races with threads that call sbrk() directly. @@ -29,6 +39,31 @@ sbrk(intptr_t increment) } #endif +dss_prec_t +chunk_dss_prec_get(void) +{ + dss_prec_t ret; + + if (config_dss == false) + return (dss_prec_disabled); + malloc_mutex_lock(&dss_mtx); + ret = dss_prec_default; + malloc_mutex_unlock(&dss_mtx); + return (ret); +} + +bool +chunk_dss_prec_set(dss_prec_t dss_prec) +{ + + if (config_dss == false) + return (true); + malloc_mutex_lock(&dss_mtx); + dss_prec_default = dss_prec; + malloc_mutex_unlock(&dss_mtx); + return (false); +} + void * chunk_alloc_dss(size_t size, size_t alignment, bool *zero) { @@ -88,7 +123,7 @@ chunk_alloc_dss(size_t size, size_t alignment, bool *zero) dss_max = dss_next; malloc_mutex_unlock(&dss_mtx); if (cpad_size != 0) - chunk_dealloc(cpad, cpad_size, true); + chunk_unmap(cpad, cpad_size); if (*zero) { VALGRIND_MAKE_MEM_UNDEFINED(ret, size); memset(ret, 0, size); diff --git a/src/ctl.c b/src/ctl.c index dec98832..30a5a46a 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -48,8 +48,8 @@ static int n##_ctl(const size_t *mib, size_t miblen, void *oldp, \ size_t *oldlenp, void *newp, size_t newlen); #define INDEX_PROTO(n) \ -const ctl_named_node_t *n##_index(const size_t *mib, size_t miblen, \ - size_t i); +static const ctl_named_node_t *n##_index(const size_t *mib, \ + size_t miblen, size_t i); static bool ctl_arena_init(ctl_arena_stats_t *astats); static void ctl_arena_clear(ctl_arena_stats_t *astats); @@ -58,6 +58,7 @@ static void ctl_arena_stats_amerge(ctl_arena_stats_t *cstats, static void ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, ctl_arena_stats_t *astats); static void ctl_arena_refresh(arena_t *arena, unsigned i); +static bool ctl_grow(void); static void ctl_refresh(void); static bool ctl_init(void); static int ctl_lookup(const char *name, ctl_node_t const **nodesp, @@ -88,6 +89,7 @@ CTL_PROTO(config_utrace) CTL_PROTO(config_valgrind) CTL_PROTO(config_xmalloc) CTL_PROTO(opt_abort) +CTL_PROTO(opt_dss) CTL_PROTO(opt_lg_chunk) CTL_PROTO(opt_narenas) CTL_PROTO(opt_lg_dirty_mult) @@ -110,6 +112,10 @@ CTL_PROTO(opt_prof_gdump) CTL_PROTO(opt_prof_final) CTL_PROTO(opt_prof_leak) CTL_PROTO(opt_prof_accum) +CTL_PROTO(arena_i_purge) +static int arena_purge(unsigned arena_ind); +CTL_PROTO(arena_i_dss) +INDEX_PROTO(arena_i) CTL_PROTO(arenas_bin_i_size) CTL_PROTO(arenas_bin_i_nregs) CTL_PROTO(arenas_bin_i_run_size) @@ -125,6 +131,7 @@ CTL_PROTO(arenas_nbins) CTL_PROTO(arenas_nhbins) CTL_PROTO(arenas_nlruns) CTL_PROTO(arenas_purge) +CTL_PROTO(arenas_extend) CTL_PROTO(prof_active) CTL_PROTO(prof_dump) CTL_PROTO(prof_interval) @@ -158,6 +165,7 @@ CTL_PROTO(stats_arenas_i_lruns_j_nrequests) CTL_PROTO(stats_arenas_i_lruns_j_curruns) INDEX_PROTO(stats_arenas_i_lruns_j) CTL_PROTO(stats_arenas_i_nthreads) +CTL_PROTO(stats_arenas_i_dss) CTL_PROTO(stats_arenas_i_pactive) CTL_PROTO(stats_arenas_i_pdirty) CTL_PROTO(stats_arenas_i_mapped) @@ -223,6 +231,7 @@ static const ctl_named_node_t config_node[] = { static const ctl_named_node_t opt_node[] = { {NAME("abort"), CTL(opt_abort)}, + {NAME("dss"), CTL(opt_dss)}, {NAME("lg_chunk"), CTL(opt_lg_chunk)}, {NAME("narenas"), CTL(opt_narenas)}, {NAME("lg_dirty_mult"), CTL(opt_lg_dirty_mult)}, @@ -247,6 +256,18 @@ static const ctl_named_node_t opt_node[] = { {NAME("prof_accum"), CTL(opt_prof_accum)} }; +static const ctl_named_node_t arena_i_node[] = { + {NAME("purge"), CTL(arena_i_purge)}, + {NAME("dss"), CTL(arena_i_dss)} +}; +static const ctl_named_node_t super_arena_i_node[] = { + {NAME(""), CHILD(named, arena_i)} +}; + +static const ctl_indexed_node_t arena_node[] = { + {INDEX(arena_i)} +}; + static const ctl_named_node_t arenas_bin_i_node[] = { {NAME("size"), CTL(arenas_bin_i_size)}, {NAME("nregs"), CTL(arenas_bin_i_nregs)}, @@ -282,7 +303,8 @@ static const ctl_named_node_t arenas_node[] = { {NAME("bin"), CHILD(indexed, arenas_bin)}, {NAME("nlruns"), CTL(arenas_nlruns)}, {NAME("lrun"), CHILD(indexed, arenas_lrun)}, - {NAME("purge"), CTL(arenas_purge)} + {NAME("purge"), CTL(arenas_purge)}, + {NAME("extend"), CTL(arenas_extend)} }; static const ctl_named_node_t prof_node[] = { @@ -352,6 +374,7 @@ static const ctl_indexed_node_t stats_arenas_i_lruns_node[] = { static const ctl_named_node_t stats_arenas_i_node[] = { {NAME("nthreads"), CTL(stats_arenas_i_nthreads)}, + {NAME("dss"), CTL(stats_arenas_i_dss)}, {NAME("pactive"), CTL(stats_arenas_i_pactive)}, {NAME("pdirty"), CTL(stats_arenas_i_pdirty)}, {NAME("mapped"), CTL(stats_arenas_i_mapped)}, @@ -387,6 +410,7 @@ static const ctl_named_node_t root_node[] = { {NAME("thread"), CHILD(named, thread)}, {NAME("config"), CHILD(named, config)}, {NAME("opt"), CHILD(named, opt)}, + {NAME("arena"), CHILD(indexed, arena)}, {NAME("arenas"), CHILD(named, arenas)}, {NAME("prof"), CHILD(named, prof)}, {NAME("stats"), CHILD(named, stats)} @@ -420,6 +444,7 @@ static void ctl_arena_clear(ctl_arena_stats_t *astats) { + astats->dss = dss_prec_names[dss_prec_limit]; astats->pactive = 0; astats->pdirty = 0; if (config_stats) { @@ -439,8 +464,8 @@ ctl_arena_stats_amerge(ctl_arena_stats_t *cstats, arena_t *arena) { unsigned i; - arena_stats_merge(arena, &cstats->pactive, &cstats->pdirty, - &cstats->astats, cstats->bstats, cstats->lstats); + arena_stats_merge(arena, &cstats->dss, &cstats->pactive, + &cstats->pdirty, &cstats->astats, cstats->bstats, cstats->lstats); for (i = 0; i < NBINS; i++) { cstats->allocated_small += cstats->bstats[i].allocated; @@ -500,7 +525,7 @@ static void ctl_arena_refresh(arena_t *arena, unsigned i) { ctl_arena_stats_t *astats = &ctl_stats.arenas[i]; - ctl_arena_stats_t *sstats = &ctl_stats.arenas[narenas]; + ctl_arena_stats_t *sstats = &ctl_stats.arenas[ctl_stats.narenas]; ctl_arena_clear(astats); @@ -518,11 +543,72 @@ ctl_arena_refresh(arena_t *arena, unsigned i) } } +static bool +ctl_grow(void) +{ + size_t astats_size; + ctl_arena_stats_t *astats; + arena_t **tarenas; + + /* Extend arena stats and arenas arrays. */ + astats_size = (ctl_stats.narenas + 2) * sizeof(ctl_arena_stats_t); + if (ctl_stats.narenas == narenas_auto) { + /* ctl_stats.arenas and arenas came from base_alloc(). */ + astats = (ctl_arena_stats_t *)imalloc(astats_size); + if (astats == NULL) + return (true); + memcpy(astats, ctl_stats.arenas, (ctl_stats.narenas + 1) * + sizeof(ctl_arena_stats_t)); + + tarenas = (arena_t **)imalloc((ctl_stats.narenas + 1) * + sizeof(arena_t *)); + if (tarenas == NULL) { + idalloc(astats); + return (true); + } + memcpy(tarenas, arenas, ctl_stats.narenas * sizeof(arena_t *)); + } else { + astats = (ctl_arena_stats_t *)iralloc(ctl_stats.arenas, + astats_size, 0, 0, false, false); + if (astats == NULL) + return (true); + + tarenas = (arena_t **)iralloc(arenas, (ctl_stats.narenas + 1) * + sizeof(arena_t *), 0, 0, false, false); + if (tarenas == NULL) + return (true); + } + /* Initialize the new astats and arenas elements. */ + memset(&astats[ctl_stats.narenas + 1], 0, sizeof(ctl_arena_stats_t)); + if (ctl_arena_init(&astats[ctl_stats.narenas + 1])) + return (true); + tarenas[ctl_stats.narenas] = NULL; + /* Swap merged stats to their new location. */ + { + ctl_arena_stats_t tstats; + memcpy(&tstats, &astats[ctl_stats.narenas], + sizeof(ctl_arena_stats_t)); + memcpy(&astats[ctl_stats.narenas], + &astats[ctl_stats.narenas + 1], sizeof(ctl_arena_stats_t)); + memcpy(&astats[ctl_stats.narenas + 1], &tstats, + sizeof(ctl_arena_stats_t)); + } + ctl_stats.arenas = astats; + ctl_stats.narenas++; + malloc_mutex_lock(&arenas_lock); + arenas = tarenas; + narenas_total++; + arenas_extend(narenas_total - 1); + malloc_mutex_unlock(&arenas_lock); + + return (false); +} + static void ctl_refresh(void) { unsigned i; - VARIABLE_ARRAY(arena_t *, tarenas, narenas); + VARIABLE_ARRAY(arena_t *, tarenas, ctl_stats.narenas); if (config_stats) { malloc_mutex_lock(&chunks_mtx); @@ -542,19 +628,19 @@ ctl_refresh(void) * Clear sum stats, since they will be merged into by * ctl_arena_refresh(). */ - ctl_stats.arenas[narenas].nthreads = 0; - ctl_arena_clear(&ctl_stats.arenas[narenas]); + ctl_stats.arenas[ctl_stats.narenas].nthreads = 0; + ctl_arena_clear(&ctl_stats.arenas[ctl_stats.narenas]); malloc_mutex_lock(&arenas_lock); - memcpy(tarenas, arenas, sizeof(arena_t *) * narenas); - for (i = 0; i < narenas; i++) { + memcpy(tarenas, arenas, sizeof(arena_t *) * ctl_stats.narenas); + for (i = 0; i < ctl_stats.narenas; i++) { if (arenas[i] != NULL) ctl_stats.arenas[i].nthreads = arenas[i]->nthreads; else ctl_stats.arenas[i].nthreads = 0; } malloc_mutex_unlock(&arenas_lock); - for (i = 0; i < narenas; i++) { + for (i = 0; i < ctl_stats.narenas; i++) { bool initialized = (tarenas[i] != NULL); ctl_stats.arenas[i].initialized = initialized; @@ -563,11 +649,13 @@ ctl_refresh(void) } if (config_stats) { - ctl_stats.allocated = ctl_stats.arenas[narenas].allocated_small - + ctl_stats.arenas[narenas].astats.allocated_large + ctl_stats.allocated = + ctl_stats.arenas[ctl_stats.narenas].allocated_small + + ctl_stats.arenas[ctl_stats.narenas].astats.allocated_large + + ctl_stats.huge.allocated; + ctl_stats.active = + (ctl_stats.arenas[ctl_stats.narenas].pactive << LG_PAGE) + ctl_stats.huge.allocated; - ctl_stats.active = (ctl_stats.arenas[narenas].pactive << - LG_PAGE) + ctl_stats.huge.allocated; ctl_stats.mapped = (ctl_stats.chunks.current << opt_lg_chunk); } @@ -585,13 +673,15 @@ ctl_init(void) * Allocate space for one extra arena stats element, which * contains summed stats across all arenas. */ + assert(narenas_auto == narenas_total_get()); + ctl_stats.narenas = narenas_auto; ctl_stats.arenas = (ctl_arena_stats_t *)base_alloc( - (narenas + 1) * sizeof(ctl_arena_stats_t)); + (ctl_stats.narenas + 1) * sizeof(ctl_arena_stats_t)); if (ctl_stats.arenas == NULL) { ret = true; goto label_return; } - memset(ctl_stats.arenas, 0, (narenas + 1) * + memset(ctl_stats.arenas, 0, (ctl_stats.narenas + 1) * sizeof(ctl_arena_stats_t)); /* @@ -601,14 +691,14 @@ ctl_init(void) */ if (config_stats) { unsigned i; - for (i = 0; i <= narenas; i++) { + for (i = 0; i <= ctl_stats.narenas; i++) { if (ctl_arena_init(&ctl_stats.arenas[i])) { ret = true; goto label_return; } } } - ctl_stats.arenas[narenas].initialized = true; + ctl_stats.arenas[ctl_stats.narenas].initialized = true; ctl_epoch = 0; ctl_refresh(); @@ -1084,13 +1174,14 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, int ret; unsigned newind, oldind; + malloc_mutex_lock(&ctl_mtx); newind = oldind = choose_arena(NULL)->ind; WRITE(newind, unsigned); READ(oldind, unsigned); if (newind != oldind) { arena_t *arena; - if (newind >= narenas) { + if (newind >= ctl_stats.narenas) { /* New arena index is out of range. */ ret = EFAULT; goto label_return; @@ -1123,6 +1214,7 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, ret = 0; label_return: + malloc_mutex_unlock(&ctl_mtx); return (ret); } @@ -1156,6 +1248,7 @@ CTL_RO_BOOL_CONFIG_GEN(config_xmalloc) /******************************************************************************/ CTL_RO_NL_GEN(opt_abort, opt_abort, bool) +CTL_RO_NL_GEN(opt_dss, opt_dss, const char *) CTL_RO_NL_GEN(opt_lg_chunk, opt_lg_chunk, size_t) CTL_RO_NL_GEN(opt_narenas, opt_narenas, size_t) CTL_RO_NL_GEN(opt_lg_dirty_mult, opt_lg_dirty_mult, ssize_t) @@ -1179,12 +1272,126 @@ CTL_RO_NL_CGEN(config_prof, opt_prof_final, opt_prof_final, bool) CTL_RO_NL_CGEN(config_prof, opt_prof_leak, opt_prof_leak, bool) CTL_RO_NL_CGEN(config_prof, opt_prof_accum, opt_prof_accum, bool) +/******************************************************************************/ + +static int +arena_purge(unsigned arena_ind) +{ + int ret; + + malloc_mutex_lock(&ctl_mtx); + { + VARIABLE_ARRAY(arena_t *, tarenas, ctl_stats.narenas); + + malloc_mutex_lock(&arenas_lock); + memcpy(tarenas, arenas, sizeof(arena_t *) * ctl_stats.narenas); + malloc_mutex_unlock(&arenas_lock); + + if (arena_ind == ctl_stats.narenas) { + unsigned i; + for (i = 0; i < ctl_stats.narenas; i++) { + if (tarenas[i] != NULL) + arena_purge_all(tarenas[i]); + } + } else { + assert(arena_ind < ctl_stats.narenas); + if (tarenas[arena_ind] != NULL) + arena_purge_all(tarenas[arena_ind]); + } + } + + ret = 0; + malloc_mutex_unlock(&ctl_mtx); + return (ret); +} + +static int +arena_i_purge_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) +{ + int ret; + + READONLY(); + WRITEONLY(); + ret = arena_purge(mib[1]); + +label_return: + return (ret); +} + +static int +arena_i_dss_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) +{ + int ret, i; + bool match, err; + const char *dss; + unsigned arena_ind = mib[1]; + dss_prec_t dss_prec_old = dss_prec_limit; + dss_prec_t dss_prec = dss_prec_limit; + + WRITE(dss, const char *); + match = false; + for (i = 0; i < dss_prec_limit; i++) { + if (strcmp(dss_prec_names[i], dss) == 0) { + dss_prec = i; + match = true; + break; + } + } + if (match == false) { + ret = EINVAL; + goto label_return; + } + + if (arena_ind < ctl_stats.narenas) { + arena_t *arena = arenas[arena_ind]; + if (arena != NULL) { + dss_prec_old = arena_dss_prec_get(arena); + arena_dss_prec_set(arena, dss_prec); + err = false; + } else + err = true; + } else { + dss_prec_old = chunk_dss_prec_get(); + err = chunk_dss_prec_set(dss_prec); + } + dss = dss_prec_names[dss_prec_old]; + READ(dss, const char *); + if (err) { + ret = EFAULT; + goto label_return; + } + + ret = 0; +label_return: + return (ret); +} + +static const ctl_named_node_t * +arena_i_index(const size_t *mib, size_t miblen, size_t i) +{ + const ctl_named_node_t * ret; + + malloc_mutex_lock(&ctl_mtx); + if (i > ctl_stats.narenas) { + ret = NULL; + goto label_return; + } + + ret = super_arena_i_node; +label_return: + malloc_mutex_unlock(&ctl_mtx); + return (ret); +} + + /******************************************************************************/ CTL_RO_NL_GEN(arenas_bin_i_size, arena_bin_info[mib[2]].reg_size, size_t) CTL_RO_NL_GEN(arenas_bin_i_nregs, arena_bin_info[mib[2]].nregs, uint32_t) CTL_RO_NL_GEN(arenas_bin_i_run_size, arena_bin_info[mib[2]].run_size, size_t) -const ctl_named_node_t * +static const ctl_named_node_t * arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i) { @@ -1194,7 +1401,7 @@ arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i) } CTL_RO_NL_GEN(arenas_lrun_i_size, ((mib[2]+1) << LG_PAGE), size_t) -const ctl_named_node_t * +static const ctl_named_node_t * arenas_lrun_i_index(const size_t *mib, size_t miblen, size_t i) { @@ -1203,7 +1410,27 @@ arenas_lrun_i_index(const size_t *mib, size_t miblen, size_t i) return (super_arenas_lrun_i_node); } -CTL_RO_NL_GEN(arenas_narenas, narenas, unsigned) +static int +arenas_narenas_ctl(const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) +{ + int ret; + unsigned narenas; + + malloc_mutex_lock(&ctl_mtx); + READONLY(); + if (*oldlenp != sizeof(unsigned)) { + ret = EINVAL; + goto label_return; + } + narenas = ctl_stats.narenas; + READ(narenas, unsigned); + + ret = 0; +label_return: + malloc_mutex_unlock(&ctl_mtx); + return (ret); +} static int arenas_initialized_ctl(const size_t *mib, size_t miblen, void *oldp, @@ -1214,13 +1441,13 @@ arenas_initialized_ctl(const size_t *mib, size_t miblen, void *oldp, malloc_mutex_lock(&ctl_mtx); READONLY(); - if (*oldlenp != narenas * sizeof(bool)) { + if (*oldlenp != ctl_stats.narenas * sizeof(bool)) { ret = EINVAL; - nread = (*oldlenp < narenas * sizeof(bool)) - ? (*oldlenp / sizeof(bool)) : narenas; + nread = (*oldlenp < ctl_stats.narenas * sizeof(bool)) + ? (*oldlenp / sizeof(bool)) : ctl_stats.narenas; } else { ret = 0; - nread = narenas; + nread = ctl_stats.narenas; } for (i = 0; i < nread; i++) @@ -1243,36 +1470,42 @@ arenas_purge_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { int ret; - unsigned arena; + unsigned arena_ind; + malloc_mutex_lock(&ctl_mtx); WRITEONLY(); - arena = UINT_MAX; - WRITE(arena, unsigned); - if (newp != NULL && arena >= narenas) { + arena_ind = UINT_MAX; + WRITE(arena_ind, unsigned); + if (newp != NULL && arena_ind >= ctl_stats.narenas) ret = EFAULT; - goto label_return; - } else { - VARIABLE_ARRAY(arena_t *, tarenas, narenas); - - malloc_mutex_lock(&arenas_lock); - memcpy(tarenas, arenas, sizeof(arena_t *) * narenas); - malloc_mutex_unlock(&arenas_lock); - - if (arena == UINT_MAX) { - unsigned i; - for (i = 0; i < narenas; i++) { - if (tarenas[i] != NULL) - arena_purge_all(tarenas[i]); - } - } else { - assert(arena < narenas); - if (tarenas[arena] != NULL) - arena_purge_all(tarenas[arena]); - } + else { + if (arena_ind == UINT_MAX) + arena_ind = ctl_stats.narenas; + ret = arena_purge(arena_ind); } +label_return: + malloc_mutex_unlock(&ctl_mtx); + return (ret); +} + +static int +arenas_extend_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) +{ + int ret; + + malloc_mutex_lock(&ctl_mtx); + READONLY(); + if (ctl_grow()) { + ret = EAGAIN; + goto label_return; + } + READ(ctl_stats.narenas - 1, unsigned); + ret = 0; label_return: + malloc_mutex_unlock(&ctl_mtx); return (ret); } @@ -1377,7 +1610,7 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nreruns, CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_curruns, ctl_stats.arenas[mib[2]].bstats[mib[4]].curruns, size_t) -const ctl_named_node_t * +static const ctl_named_node_t * stats_arenas_i_bins_j_index(const size_t *mib, size_t miblen, size_t j) { @@ -1395,7 +1628,7 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_nrequests, CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_curruns, ctl_stats.arenas[mib[2]].lstats[mib[4]].curruns, size_t) -const ctl_named_node_t * +static const ctl_named_node_t * stats_arenas_i_lruns_j_index(const size_t *mib, size_t miblen, size_t j) { @@ -1405,6 +1638,7 @@ stats_arenas_i_lruns_j_index(const size_t *mib, size_t miblen, size_t j) } CTL_RO_GEN(stats_arenas_i_nthreads, ctl_stats.arenas[mib[2]].nthreads, unsigned) +CTL_RO_GEN(stats_arenas_i_dss, ctl_stats.arenas[mib[2]].dss, const char *) CTL_RO_GEN(stats_arenas_i_pactive, ctl_stats.arenas[mib[2]].pactive, size_t) CTL_RO_GEN(stats_arenas_i_pdirty, ctl_stats.arenas[mib[2]].pdirty, size_t) CTL_RO_CGEN(config_stats, stats_arenas_i_mapped, @@ -1416,13 +1650,13 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_nmadvise, CTL_RO_CGEN(config_stats, stats_arenas_i_purged, ctl_stats.arenas[mib[2]].astats.purged, uint64_t) -const ctl_named_node_t * +static const ctl_named_node_t * stats_arenas_i_index(const size_t *mib, size_t miblen, size_t i) { const ctl_named_node_t * ret; malloc_mutex_lock(&ctl_mtx); - if (ctl_stats.arenas[i].initialized == false) { + if (i > ctl_stats.narenas || ctl_stats.arenas[i].initialized == false) { ret = NULL; goto label_return; } diff --git a/src/huge.c b/src/huge.c index 8a4ec942..aa08d43d 100644 --- a/src/huge.c +++ b/src/huge.c @@ -48,7 +48,8 @@ huge_palloc(size_t size, size_t alignment, bool zero) * it is possible to make correct junk/zero fill decisions below. */ is_zeroed = zero; - ret = chunk_alloc(csize, alignment, false, &is_zeroed); + ret = chunk_alloc(csize, alignment, false, &is_zeroed, + chunk_dss_prec_get()); if (ret == NULL) { base_node_dealloc(node); return (NULL); @@ -101,7 +102,7 @@ huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra) void * huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, - size_t alignment, bool zero) + size_t alignment, bool zero, bool try_tcache_dalloc) { void *ret; size_t copysize; @@ -180,7 +181,7 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, #endif { memcpy(ret, ptr, copysize); - iqalloc(ptr); + iqallocx(ptr, try_tcache_dalloc); } return (ret); } diff --git a/src/jemalloc.c b/src/jemalloc.c index b2daa306..8a667b62 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -33,7 +33,8 @@ unsigned ncpus; malloc_mutex_t arenas_lock; arena_t **arenas; -unsigned narenas; +unsigned narenas_total; +unsigned narenas_auto; /* Set to true once the allocator has been initialized. */ static bool malloc_initialized = false; @@ -144,14 +145,14 @@ choose_arena_hard(void) { arena_t *ret; - if (narenas > 1) { + if (narenas_auto > 1) { unsigned i, choose, first_null; choose = 0; - first_null = narenas; + first_null = narenas_auto; malloc_mutex_lock(&arenas_lock); assert(arenas[0] != NULL); - for (i = 1; i < narenas; i++) { + for (i = 1; i < narenas_auto; i++) { if (arenas[i] != NULL) { /* * Choose the first arena that has the lowest @@ -160,7 +161,7 @@ choose_arena_hard(void) if (arenas[i]->nthreads < arenas[choose]->nthreads) choose = i; - } else if (first_null == narenas) { + } else if (first_null == narenas_auto) { /* * Record the index of the first uninitialized * arena, in case all extant arenas are in use. @@ -174,7 +175,8 @@ choose_arena_hard(void) } } - if (arenas[choose]->nthreads == 0 || first_null == narenas) { + if (arenas[choose]->nthreads == 0 + || first_null == narenas_auto) { /* * Use an unloaded arena, or the least loaded arena if * all arenas are already initialized. @@ -203,7 +205,7 @@ stats_print_atexit(void) { if (config_tcache && config_stats) { - unsigned i; + unsigned narenas, i; /* * Merge stats from extant threads. This is racy, since @@ -212,7 +214,7 @@ stats_print_atexit(void) * out of date by the time they are reported, if other threads * continue to allocate. */ - for (i = 0; i < narenas; i++) { + for (i = 0, narenas = narenas_total_get(); i < narenas; i++) { arena_t *arena = arenas[i]; if (arena != NULL) { tcache_t *tcache; @@ -554,6 +556,30 @@ malloc_conf_init(void) */ CONF_HANDLE_SIZE_T(opt_lg_chunk, "lg_chunk", LG_PAGE + (config_fill ? 2 : 1), (sizeof(size_t) << 3) - 1) + if (strncmp("dss", k, klen) == 0) { + int i; + bool match = false; + for (i = 0; i < dss_prec_limit; i++) { + if (strncmp(dss_prec_names[i], v, vlen) + == 0) { + if (chunk_dss_prec_set(i)) { + malloc_conf_error( + "Error setting dss", + k, klen, v, vlen); + } else { + opt_dss = + dss_prec_names[i]; + match = true; + break; + } + } + } + if (match == false) { + malloc_conf_error("Invalid conf value", + k, klen, v, vlen); + } + continue; + } CONF_HANDLE_SIZE_T(opt_narenas, "narenas", 1, SIZE_T_MAX) CONF_HANDLE_SSIZE_T(opt_lg_dirty_mult, "lg_dirty_mult", @@ -699,9 +725,9 @@ malloc_init_hard(void) * Create enough scaffolding to allow recursive allocation in * malloc_ncpus(). */ - narenas = 1; + narenas_total = narenas_auto = 1; arenas = init_arenas; - memset(arenas, 0, sizeof(arena_t *) * narenas); + memset(arenas, 0, sizeof(arena_t *) * narenas_auto); /* * Initialize one arena here. The rest are lazily created in @@ -759,20 +785,21 @@ malloc_init_hard(void) else opt_narenas = 1; } - narenas = opt_narenas; + narenas_auto = opt_narenas; /* * Make sure that the arenas array can be allocated. In practice, this * limit is enough to allow the allocator to function, but the ctl * machinery will fail to allocate memory at far lower limits. */ - if (narenas > chunksize / sizeof(arena_t *)) { - narenas = chunksize / sizeof(arena_t *); + if (narenas_auto > chunksize / sizeof(arena_t *)) { + narenas_auto = chunksize / sizeof(arena_t *); malloc_printf(": Reducing narenas to limit (%d)\n", - narenas); + narenas_auto); } + narenas_total = narenas_auto; /* Allocate and initialize arenas. */ - arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas); + arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas_total); if (arenas == NULL) { malloc_mutex_unlock(&init_lock); return (true); @@ -781,7 +808,7 @@ malloc_init_hard(void) * Zero the array. In practice, this should always be pre-zeroed, * since it was just mmap()ed, but let's be sure. */ - memset(arenas, 0, sizeof(arena_t *) * narenas); + memset(arenas, 0, sizeof(arena_t *) * narenas_total); /* Copy the pointer to the one arena that was already initialized. */ arenas[0] = init_arenas[0]; @@ -1346,18 +1373,19 @@ je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, #ifdef JEMALLOC_EXPERIMENTAL JEMALLOC_INLINE void * -iallocm(size_t usize, size_t alignment, bool zero) +iallocm(size_t usize, size_t alignment, bool zero, bool try_tcache, + arena_t *arena) { assert(usize == ((alignment == 0) ? s2u(usize) : sa2u(usize, alignment))); if (alignment != 0) - return (ipalloc(usize, alignment, zero)); + return (ipallocx(usize, alignment, zero, try_tcache, arena)); else if (zero) - return (icalloc(usize)); + return (icallocx(usize, try_tcache, arena)); else - return (imalloc(usize)); + return (imallocx(usize, try_tcache, arena)); } int @@ -1368,6 +1396,9 @@ je_allocm(void **ptr, size_t *rsize, size_t size, int flags) size_t alignment = (ZU(1) << (flags & ALLOCM_LG_ALIGN_MASK) & (SIZE_T_MAX-1)); bool zero = flags & ALLOCM_ZERO; + unsigned arena_ind = ((unsigned)(flags >> 8)) - 1; + arena_t *arena; + bool try_tcache; assert(ptr != NULL); assert(size != 0); @@ -1375,6 +1406,14 @@ je_allocm(void **ptr, size_t *rsize, size_t size, int flags) if (malloc_init()) goto label_oom; + if (arena_ind != UINT_MAX) { + arena = arenas[arena_ind]; + try_tcache = false; + } else { + arena = NULL; + try_tcache = true; + } + usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment); if (usize == 0) goto label_oom; @@ -1391,18 +1430,19 @@ je_allocm(void **ptr, size_t *rsize, size_t size, int flags) s2u(SMALL_MAXCLASS+1) : sa2u(SMALL_MAXCLASS+1, alignment); assert(usize_promoted != 0); - p = iallocm(usize_promoted, alignment, zero); + p = iallocm(usize_promoted, alignment, zero, + try_tcache, arena); if (p == NULL) goto label_oom; arena_prof_promoted(p, usize); } else { - p = iallocm(usize, alignment, zero); + p = iallocm(usize, alignment, zero, try_tcache, arena); if (p == NULL) goto label_oom; } prof_malloc(p, usize, cnt); } else { - p = iallocm(usize, alignment, zero); + p = iallocm(usize, alignment, zero, try_tcache, arena); if (p == NULL) goto label_oom; } @@ -1439,6 +1479,9 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) & (SIZE_T_MAX-1)); bool zero = flags & ALLOCM_ZERO; bool no_move = flags & ALLOCM_NO_MOVE; + unsigned arena_ind = ((unsigned)(flags >> 8)) - 1; + bool try_tcache_alloc, try_tcache_dalloc; + arena_t *arena; assert(ptr != NULL); assert(*ptr != NULL); @@ -1446,6 +1489,19 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) assert(SIZE_T_MAX - size >= extra); assert(malloc_initialized || IS_INITIALIZER); + if (arena_ind != UINT_MAX) { + arena_chunk_t *chunk; + try_tcache_alloc = true; + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(*ptr); + try_tcache_dalloc = (chunk == *ptr || chunk->arena != + arenas[arena_ind]); + arena = arenas[arena_ind]; + } else { + try_tcache_alloc = true; + try_tcache_dalloc = true; + arena = NULL; + } + p = *ptr; if (config_prof && opt_prof) { prof_thr_cnt_t *cnt; @@ -1472,9 +1528,10 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && ((alignment == 0) ? s2u(size) : sa2u(size, alignment)) <= SMALL_MAXCLASS) { - q = iralloc(p, SMALL_MAXCLASS+1, (SMALL_MAXCLASS+1 >= + q = irallocx(p, SMALL_MAXCLASS+1, (SMALL_MAXCLASS+1 >= size+extra) ? 0 : size+extra - (SMALL_MAXCLASS+1), - alignment, zero, no_move); + alignment, zero, no_move, try_tcache_alloc, + try_tcache_dalloc, arena); if (q == NULL) goto label_err; if (max_usize < PAGE) { @@ -1483,7 +1540,8 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) } else usize = isalloc(q, config_prof); } else { - q = iralloc(p, size, extra, alignment, zero, no_move); + q = irallocx(p, size, extra, alignment, zero, no_move, + try_tcache_alloc, try_tcache_dalloc, arena); if (q == NULL) goto label_err; usize = isalloc(q, config_prof); @@ -1500,7 +1558,8 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) old_size = isalloc(p, false); old_rzsize = u2rz(old_size); } - q = iralloc(p, size, extra, alignment, zero, no_move); + q = irallocx(p, size, extra, alignment, zero, no_move, + try_tcache_alloc, try_tcache_dalloc, arena); if (q == NULL) goto label_err; if (config_stats) @@ -1561,10 +1620,19 @@ je_dallocm(void *ptr, int flags) { size_t usize; size_t rzsize JEMALLOC_CC_SILENCE_INIT(0); + unsigned arena_ind = ((unsigned)(flags >> 8)) - 1; + bool try_tcache; assert(ptr != NULL); assert(malloc_initialized || IS_INITIALIZER); + if (arena_ind != UINT_MAX) { + arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + try_tcache = (chunk == ptr || chunk->arena != + arenas[arena_ind]); + } else + try_tcache = true; + UTRACE(ptr, 0, 0); if (config_stats || config_valgrind) usize = isalloc(ptr, config_prof); @@ -1577,7 +1645,7 @@ je_dallocm(void *ptr, int flags) thread_allocated_tsd_get()->deallocated += usize; if (config_valgrind && opt_valgrind) rzsize = p2rz(ptr); - iqalloc(ptr); + iqallocx(ptr, try_tcache); JEMALLOC_VALGRIND_FREE(ptr, rzsize); return (ALLOCM_SUCCESS); @@ -1654,7 +1722,7 @@ _malloc_prefork(void) /* Acquire all mutexes in a safe order. */ ctl_prefork(); malloc_mutex_prefork(&arenas_lock); - for (i = 0; i < narenas; i++) { + for (i = 0; i < narenas_total; i++) { if (arenas[i] != NULL) arena_prefork(arenas[i]); } @@ -1685,7 +1753,7 @@ _malloc_postfork(void) base_postfork_parent(); chunk_postfork_parent(); prof_postfork_parent(); - for (i = 0; i < narenas; i++) { + for (i = 0; i < narenas_total; i++) { if (arenas[i] != NULL) arena_postfork_parent(arenas[i]); } @@ -1705,7 +1773,7 @@ jemalloc_postfork_child(void) base_postfork_child(); chunk_postfork_child(); prof_postfork_child(); - for (i = 0; i < narenas; i++) { + for (i = 0; i < narenas_total; i++) { if (arenas[i] != NULL) arena_postfork_child(arenas[i]); } diff --git a/src/stats.c b/src/stats.c index 433b80d1..43f87af6 100644 --- a/src/stats.c +++ b/src/stats.c @@ -206,6 +206,7 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, unsigned i, bool bins, bool large) { unsigned nthreads; + const char *dss; size_t page, pactive, pdirty, mapped; uint64_t npurge, nmadvise, purged; size_t small_allocated; @@ -218,6 +219,9 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, CTL_I_GET("stats.arenas.0.nthreads", &nthreads, unsigned); malloc_cprintf(write_cb, cbopaque, "assigned threads: %u\n", nthreads); + CTL_I_GET("stats.arenas.0.dss", &dss, const char *); + malloc_cprintf(write_cb, cbopaque, "dss allocation precedence: %s\n", + dss); CTL_I_GET("stats.arenas.0.pactive", &pactive, size_t); CTL_I_GET("stats.arenas.0.pdirty", &pdirty, size_t); CTL_I_GET("stats.arenas.0.npurge", &npurge, uint64_t); @@ -370,6 +374,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, "Run-time option settings:\n"); OPT_WRITE_BOOL(abort) OPT_WRITE_SIZE_T(lg_chunk) + OPT_WRITE_CHAR_P(dss) OPT_WRITE_SIZE_T(narenas) OPT_WRITE_SSIZE_T(lg_dirty_mult) OPT_WRITE_BOOL(stats_print) @@ -400,7 +405,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, malloc_cprintf(write_cb, cbopaque, "CPUs: %u\n", ncpus); CTL_GET("arenas.narenas", &uv, unsigned); - malloc_cprintf(write_cb, cbopaque, "Max arenas: %u\n", uv); + malloc_cprintf(write_cb, cbopaque, "Arenas: %u\n", uv); malloc_cprintf(write_cb, cbopaque, "Pointer size: %zu\n", sizeof(void *)); @@ -472,7 +477,8 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, CTL_GET("stats.chunks.current", &chunks_current, size_t); malloc_cprintf(write_cb, cbopaque, "chunks: nchunks " "highchunks curchunks\n"); - malloc_cprintf(write_cb, cbopaque, " %13"PRIu64"%13zu%13zu\n", + malloc_cprintf(write_cb, cbopaque, + " %13"PRIu64" %12zu %12zu\n", chunks_total, chunks_high, chunks_current); /* Print huge stats. */ diff --git a/src/tcache.c b/src/tcache.c index 60244c45..47e14f30 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -288,7 +288,7 @@ tcache_create(arena_t *arena) else if (size <= tcache_maxclass) tcache = (tcache_t *)arena_malloc_large(arena, size, true); else - tcache = (tcache_t *)icalloc(size); + tcache = (tcache_t *)icallocx(size, false, arena); if (tcache == NULL) return (NULL); @@ -364,7 +364,7 @@ tcache_destroy(tcache_t *tcache) arena_dalloc_large(arena, chunk, tcache); } else - idalloc(tcache); + idallocx(tcache, false); } void diff --git a/test/ALLOCM_ARENA.c b/test/ALLOCM_ARENA.c new file mode 100644 index 00000000..15856908 --- /dev/null +++ b/test/ALLOCM_ARENA.c @@ -0,0 +1,66 @@ +#define JEMALLOC_MANGLE +#include "jemalloc_test.h" + +#define NTHREADS 10 + +void * +je_thread_start(void *arg) +{ + unsigned thread_ind = (unsigned)(uintptr_t)arg; + unsigned arena_ind; + int r; + void *p; + size_t rsz, sz; + + sz = sizeof(arena_ind); + if (mallctl("arenas.extend", &arena_ind, &sz, NULL, 0) + != 0) { + malloc_printf("Error in arenas.extend\n"); + abort(); + } + + if (thread_ind % 4 != 3) { + size_t mib[3]; + size_t miblen = sizeof(mib) / sizeof(size_t); + const char *dss_precs[] = {"disabled", "primary", "secondary"}; + const char *dss = dss_precs[thread_ind % 4]; + if (mallctlnametomib("arena.0.dss", mib, &miblen) != 0) { + malloc_printf("Error in mallctlnametomib()\n"); + abort(); + } + mib[1] = arena_ind; + if (mallctlbymib(mib, miblen, NULL, NULL, (void *)&dss, + sizeof(const char *))) { + malloc_printf("Error in mallctlbymib()\n"); + abort(); + } + } + + r = allocm(&p, &rsz, 1, ALLOCM_ARENA(arena_ind)); + if (r != ALLOCM_SUCCESS) { + malloc_printf("Unexpected allocm() error\n"); + abort(); + } + + return (NULL); +} + +int +main(void) +{ + je_thread_t threads[NTHREADS]; + unsigned i; + + malloc_printf("Test begin\n"); + + for (i = 0; i < NTHREADS; i++) { + je_thread_create(&threads[i], je_thread_start, + (void *)(uintptr_t)i); + } + + for (i = 0; i < NTHREADS; i++) + je_thread_join(threads[i], NULL); + + malloc_printf("Test end\n"); + return (0); +} diff --git a/test/ALLOCM_ARENA.exp b/test/ALLOCM_ARENA.exp new file mode 100644 index 00000000..369a88dd --- /dev/null +++ b/test/ALLOCM_ARENA.exp @@ -0,0 +1,2 @@ +Test begin +Test end diff --git a/test/thread_arena.c b/test/thread_arena.c index 2020d994..2ffdb5e8 100644 --- a/test/thread_arena.c +++ b/test/thread_arena.c @@ -1,7 +1,7 @@ #define JEMALLOC_MANGLE #include "jemalloc_test.h" -#define NTHREADS 10 +#define NTHREADS 10 void * je_thread_start(void *arg) @@ -66,8 +66,10 @@ main(void) goto label_return; } - for (i = 0; i < NTHREADS; i++) - je_thread_create(&threads[i], je_thread_start, (void *)&arena_ind); + for (i = 0; i < NTHREADS; i++) { + je_thread_create(&threads[i], je_thread_start, + (void *)&arena_ind); + } for (i = 0; i < NTHREADS; i++) je_thread_join(threads[i], (void *)&ret); From a5c80f893edbf44d70336e8a3d558bac9d057929 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 15 Oct 2012 12:48:59 -0700 Subject: [PATCH 0253/3378] Add ctl_mutex proection to arena_i_dss_ctl(). Add ctl_mutex proection to arena_i_dss_ctl(), since ctl_stats.narenas is accessed. --- src/ctl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/ctl.c b/src/ctl.c index 30a5a46a..e3ab9da9 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -1330,6 +1330,7 @@ arena_i_dss_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, dss_prec_t dss_prec_old = dss_prec_limit; dss_prec_t dss_prec = dss_prec_limit; + malloc_mutex_lock(&ctl_mtx); WRITE(dss, const char *); match = false; for (i = 0; i < dss_prec_limit; i++) { @@ -1365,6 +1366,7 @@ arena_i_dss_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, ret = 0; label_return: + malloc_mutex_unlock(&ctl_mtx); return (ret); } From 2b592b0f0bd043c0d14f8923f3c16009e5e312d5 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 16 Oct 2012 10:12:40 -0700 Subject: [PATCH 0254/3378] Update ChangeLog for 3.1.0. --- ChangeLog | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 66f1d691..0ae4f118 100644 --- a/ChangeLog +++ b/ChangeLog @@ -6,20 +6,31 @@ found in the git revision history: http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git git://canonware.com/jemalloc.git -* 3.x.x (XXX not yet released) +* 3.1.0 (October 16, 2012) New features: - Auto-detect whether running inside Valgrind, thus removing the need to manually specify MALLOC_CONF=valgrind:true. + - Add the "arenas.extend" mallctl, which allows applications to create + manually managed arenas. + - Add the ALLOCM_ARENA() flag for {,r,d}allocm(). + - Add the "opt.dss", "arena..dss", and "stats.arenas..dss" mallctls, + which provide control over dss/mmap precedence. + - Add the "arena..purge" mallctl, which obsoletes "arenas.purge". + - Define LG_QUANTUM for hppa. Incompatible changes: - Disable tcache by default if running inside Valgrind, in order to avoid making unallocated objects appear reachable to Valgrind. + - Drop const from malloc_usable_size() argument on Linux. Bug fixes: - Fix heap profiling crash if sampled object is freed via realloc(p, 0). - Remove const from __*_hook variable declarations, so that glibc can modify them during process forking. + - Fix mlockall(2)/madvise(2) interaction. + - Fix fork(2)-related deadlocks. + - Fix error return value for "thread.tcache.enabled" mallctl. * 3.0.0 (May 11, 2012) From 12efefb1953062795f5a971c1a72706787c7895c Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 16 Oct 2012 22:06:56 -0700 Subject: [PATCH 0255/3378] Fix dss/mmap allocation precedence code. Fix dss/mmap allocation precedence code to use recyclable mmap memory only after primary dss allocation fails. --- ChangeLog | 6 ++++++ src/chunk.c | 40 ++++++++++++++-------------------------- 2 files changed, 20 insertions(+), 26 deletions(-) diff --git a/ChangeLog b/ChangeLog index 0ae4f118..9a618724 100644 --- a/ChangeLog +++ b/ChangeLog @@ -6,6 +6,12 @@ found in the git revision history: http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git git://canonware.com/jemalloc.git +* 3.x.x (XXX Not released) + + Bug fixes: + - Fix dss/mmap allocation precedence code to use recyclable mmap memory only + after primary dss allocation fails. + * 3.1.0 (October 16, 2012) New features: diff --git a/src/chunk.c b/src/chunk.c index 5fc9e75c..1a3bb4f6 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -146,40 +146,28 @@ chunk_alloc(size_t size, size_t alignment, bool base, bool *zero, assert(alignment != 0); assert((alignment & chunksize_mask) == 0); - /* - * Try to recycle an existing mapping. - */ - /* "primary" dss. */ - if (config_dss && dss_prec == dss_prec_primary && (ret = - chunk_recycle(&chunks_szad_dss, &chunks_ad_dss, size, alignment, - base, zero)) != NULL) - goto label_return; + if (config_dss && dss_prec == dss_prec_primary) { + if ((ret = chunk_recycle(&chunks_szad_dss, &chunks_ad_dss, size, + alignment, base, zero)) != NULL) + goto label_return; + if ((ret = chunk_alloc_dss(size, alignment, zero)) != NULL) + goto label_return; + } /* mmap. */ if ((ret = chunk_recycle(&chunks_szad_mmap, &chunks_ad_mmap, size, alignment, base, zero)) != NULL) goto label_return; - /* "secondary" dss. */ - if (config_dss && dss_prec == dss_prec_secondary && (ret = - chunk_recycle(&chunks_szad_dss, &chunks_ad_dss, size, alignment, - base, zero)) != NULL) - goto label_return; - - /* - * Try to allocate a new mapping. - */ - - /* "primary" dss. */ - if (config_dss && dss_prec == dss_prec_primary && (ret = - chunk_alloc_dss(size, alignment, zero)) != NULL) - goto label_return; - /* mmap. */ if ((ret = chunk_alloc_mmap(size, alignment, zero)) != NULL) goto label_return; /* "secondary" dss. */ - if (config_dss && dss_prec == dss_prec_secondary && (ret = - chunk_alloc_dss(size, alignment, zero)) != NULL) - goto label_return; + if (config_dss && dss_prec == dss_prec_secondary) { + if ((ret = chunk_recycle(&chunks_szad_dss, &chunks_ad_dss, size, + alignment, base, zero)) != NULL) + goto label_return; + if ((ret = chunk_alloc_dss(size, alignment, zero)) != NULL) + goto label_return; + } /* All strategies for allocation failed. */ ret = NULL; From 34457f51448e81f32a1bff16bbf600b79dd9ec5a Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 3 Nov 2012 21:18:28 -0700 Subject: [PATCH 0256/3378] Fix deadlock in the arenas.purge mallctl. Fix deadlock in the arenas.purge mallctl due to recursive mutex acquisition. --- src/ctl.c | 48 ++++++++++++++++++++++-------------------------- 1 file changed, 22 insertions(+), 26 deletions(-) diff --git a/src/ctl.c b/src/ctl.c index e3ab9da9..6e01b1e2 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -113,7 +113,7 @@ CTL_PROTO(opt_prof_final) CTL_PROTO(opt_prof_leak) CTL_PROTO(opt_prof_accum) CTL_PROTO(arena_i_purge) -static int arena_purge(unsigned arena_ind); +static void arena_purge(unsigned arena_ind); CTL_PROTO(arena_i_dss) INDEX_PROTO(arena_i) CTL_PROTO(arenas_bin_i_size) @@ -1274,35 +1274,27 @@ CTL_RO_NL_CGEN(config_prof, opt_prof_accum, opt_prof_accum, bool) /******************************************************************************/ -static int +/* ctl_mutex must be held during execution of this function. */ +static void arena_purge(unsigned arena_ind) { - int ret; + VARIABLE_ARRAY(arena_t *, tarenas, ctl_stats.narenas); - malloc_mutex_lock(&ctl_mtx); - { - VARIABLE_ARRAY(arena_t *, tarenas, ctl_stats.narenas); + malloc_mutex_lock(&arenas_lock); + memcpy(tarenas, arenas, sizeof(arena_t *) * ctl_stats.narenas); + malloc_mutex_unlock(&arenas_lock); - malloc_mutex_lock(&arenas_lock); - memcpy(tarenas, arenas, sizeof(arena_t *) * ctl_stats.narenas); - malloc_mutex_unlock(&arenas_lock); - - if (arena_ind == ctl_stats.narenas) { - unsigned i; - for (i = 0; i < ctl_stats.narenas; i++) { - if (tarenas[i] != NULL) - arena_purge_all(tarenas[i]); - } - } else { - assert(arena_ind < ctl_stats.narenas); - if (tarenas[arena_ind] != NULL) - arena_purge_all(tarenas[arena_ind]); + if (arena_ind == ctl_stats.narenas) { + unsigned i; + for (i = 0; i < ctl_stats.narenas; i++) { + if (tarenas[i] != NULL) + arena_purge_all(tarenas[i]); } + } else { + assert(arena_ind < ctl_stats.narenas); + if (tarenas[arena_ind] != NULL) + arena_purge_all(tarenas[arena_ind]); } - - ret = 0; - malloc_mutex_unlock(&ctl_mtx); - return (ret); } static int @@ -1313,8 +1305,11 @@ arena_i_purge_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, READONLY(); WRITEONLY(); - ret = arena_purge(mib[1]); + malloc_mutex_lock(&ctl_mtx); + arena_purge(mib[1]); + malloc_mutex_unlock(&ctl_mtx); + ret = 0; label_return: return (ret); } @@ -1483,7 +1478,8 @@ arenas_purge_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, else { if (arena_ind == UINT_MAX) arena_ind = ctl_stats.narenas; - ret = arena_purge(arena_ind); + arena_purge(arena_ind); + ret = 0; } label_return: From e3d13060c8a04f08764b16b003169eb205fa09eb Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 30 Oct 2012 15:42:37 -0700 Subject: [PATCH 0257/3378] Purge unused dirty pages in a fragmentation-reducing order. Purge unused dirty pages in an order that first performs clean/dirty run defragmentation, in order to mitigate available run fragmentation. Remove the limitation that prevented purging unless at least one chunk worth of dirty pages had accumulated in an arena. This limitation was intended to avoid excessive purging for small applications, but the threshold was arbitrary, and the effect of questionable utility. Relax opt_lg_dirty_mult from 5 to 3. This compensates for increased likelihood of allocating clean runs, given the same ratio of clean:dirty runs, and reduces the potential for repeated purging in pathological large malloc/free loops that push the active:dirty page ratio just over the purge threshold. --- ChangeLog | 5 + doc/jemalloc.xml.in | 2 +- include/jemalloc/internal/arena.h | 53 ++-- src/arena.c | 496 ++++++++++++++++++------------ 4 files changed, 336 insertions(+), 220 deletions(-) diff --git a/ChangeLog b/ChangeLog index 9a618724..edba5269 100644 --- a/ChangeLog +++ b/ChangeLog @@ -8,9 +8,14 @@ found in the git revision history: * 3.x.x (XXX Not released) + Incompatible changes: + - Change the "opt.lg_dirty_mult" from 5 to 3 (32:1 to 8:1). + Bug fixes: - Fix dss/mmap allocation precedence code to use recyclable mmap memory only after primary dss allocation fails. + - Fix deadlock in the "arenas.purge" mallctl. This regression was introduced + in 3.1.0 by the addition of the "arena..purge" mallctl. * 3.1.0 (October 16, 2012) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 441c1a4a..754fdcc7 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -833,7 +833,7 @@ for (i = 0; i < nbins; i++) { 2 or a similar system call. This provides the kernel with sufficient information to recycle dirty pages if physical memory becomes scarce and the pages remain unused. The - default minimum ratio is 32:1 (2^5:1); an option value of -1 will + default minimum ratio is 8:1 (2^3:1); an option value of -1 will disable dirty page purging. diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 49213e32..561c9b6f 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -38,10 +38,10 @@ * * (nactive >> opt_lg_dirty_mult) >= ndirty * - * So, supposing that opt_lg_dirty_mult is 5, there can be no less than 32 - * times as many active pages as dirty pages. + * So, supposing that opt_lg_dirty_mult is 3, there can be no less than 8 times + * as many active pages as dirty pages. */ -#define LG_DIRTY_MULT_DEFAULT 5 +#define LG_DIRTY_MULT_DEFAULT 3 typedef struct arena_chunk_map_s arena_chunk_map_t; typedef struct arena_chunk_s arena_chunk_t; @@ -69,7 +69,7 @@ struct arena_chunk_map_s { /* * Linkage for run trees. There are two disjoint uses: * - * 1) arena_t's runs_avail_{clean,dirty} trees. + * 1) arena_t's runs_avail tree. * 2) arena_run_t conceptually uses this linkage for in-use * non-full runs, rather than directly embedding linkage. */ @@ -162,20 +162,24 @@ typedef rb_tree(arena_chunk_map_t) arena_run_tree_t; /* Arena chunk header. */ struct arena_chunk_s { /* Arena that owns the chunk. */ - arena_t *arena; + arena_t *arena; - /* Linkage for the arena's chunks_dirty list. */ - ql_elm(arena_chunk_t) link_dirty; - - /* - * True if the chunk is currently in the chunks_dirty list, due to - * having at some point contained one or more dirty pages. Removal - * from chunks_dirty is lazy, so (dirtied && ndirty == 0) is possible. - */ - bool dirtied; + /* Linkage for tree of arena chunks that contain dirty runs. */ + rb_node(arena_chunk_t) dirty_link; /* Number of dirty pages. */ - size_t ndirty; + size_t ndirty; + + /* Number of available runs. */ + size_t nruns_avail; + + /* + * Number of available run adjacencies. Clean and dirty available runs + * are not coalesced, which causes virtual memory fragmentation. The + * ratio of (nruns_avail-nruns_adjac):nruns_adjac is used for tracking + * this fragmentation. + * */ + size_t nruns_adjac; /* * Map of pages within chunk that keeps track of free/large/small. The @@ -183,7 +187,7 @@ struct arena_chunk_s { * need to be tracked in the map. This omission saves a header page * for common chunk sizes (e.g. 4 MiB). */ - arena_chunk_map_t map[1]; /* Dynamically sized. */ + arena_chunk_map_t map[1]; /* Dynamically sized. */ }; typedef rb_tree(arena_chunk_t) arena_chunk_tree_t; @@ -333,8 +337,8 @@ struct arena_s { dss_prec_t dss_prec; - /* List of dirty-page-containing chunks this arena manages. */ - ql_head(arena_chunk_t) chunks_dirty; + /* Tree of dirty-page-containing chunks this arena manages. */ + arena_chunk_tree_t chunks_dirty; /* * In order to avoid rapid chunk allocation/deallocation when an arena @@ -369,18 +373,9 @@ struct arena_s { /* * Size/address-ordered trees of this arena's available runs. The trees - * are used for first-best-fit run allocation. The dirty tree contains - * runs with dirty pages (i.e. very likely to have been touched and - * therefore have associated physical pages), whereas the clean tree - * contains runs with pages that either have no associated physical - * pages, or have pages that the kernel may recycle at any time due to - * previous madvise(2) calls. The dirty tree is used in preference to - * the clean tree for allocations, because using dirty pages reduces - * the amount of dirty purging necessary to keep the active:dirty page - * ratio below the purge threshold. + * are used for first-best-fit run allocation. */ - arena_avail_tree_t runs_avail_clean; - arena_avail_tree_t runs_avail_dirty; + arena_avail_tree_t runs_avail; /* bins is used to store trees of free regions. */ arena_bin_t bins[NBINS]; diff --git a/src/arena.c b/src/arena.c index 1e6964a8..b93a6799 100644 --- a/src/arena.c +++ b/src/arena.c @@ -40,6 +40,12 @@ const uint8_t small_size2bin[] = { /******************************************************************************/ /* Function prototypes for non-inline static functions. */ +static void arena_avail_insert(arena_t *arena, arena_chunk_t *chunk, + size_t pageind, size_t npages, bool maybe_adjac_pred, + bool maybe_adjac_succ); +static void arena_avail_remove(arena_t *arena, arena_chunk_t *chunk, + size_t pageind, size_t npages, bool maybe_adjac_pred, + bool maybe_adjac_succ); static void arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, size_t binind, bool zero); static arena_chunk_t *arena_chunk_alloc(arena_t *arena); @@ -48,8 +54,11 @@ static arena_run_t *arena_run_alloc_helper(arena_t *arena, size_t size, bool large, size_t binind, bool zero); static arena_run_t *arena_run_alloc(arena_t *arena, size_t size, bool large, size_t binind, bool zero); +static arena_chunk_t *chunks_dirty_iter_cb(arena_chunk_tree_t *tree, + arena_chunk_t *chunk, void *arg); static void arena_purge(arena_t *arena, bool all); -static void arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty); +static void arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, + bool cleaned); static void arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, size_t oldsize, size_t newsize); static void arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, @@ -101,9 +110,6 @@ arena_avail_comp(arena_chunk_map_t *a, arena_chunk_map_t *b) size_t a_size = a->bits & ~PAGE_MASK; size_t b_size = b->bits & ~PAGE_MASK; - assert((a->bits & CHUNK_MAP_KEY) == CHUNK_MAP_KEY || (a->bits & - CHUNK_MAP_DIRTY) == (b->bits & CHUNK_MAP_DIRTY)); - ret = (a_size > b_size) - (a_size < b_size); if (ret == 0) { uintptr_t a_mapelm, b_mapelm; @@ -129,6 +135,159 @@ arena_avail_comp(arena_chunk_map_t *a, arena_chunk_map_t *b) rb_gen(static UNUSED, arena_avail_tree_, arena_avail_tree_t, arena_chunk_map_t, u.rb_link, arena_avail_comp) +static inline int +arena_chunk_dirty_comp(arena_chunk_t *a, arena_chunk_t *b) +{ + size_t a_val, b_val; + + assert(a != NULL); + assert(b != NULL); + + /* + * Order such that chunks with higher fragmentation are "less than" + * those with lower fragmentation. Fragmentation is measured as: + * + * mean current avail run size + * -------------------------------- + * mean defragmented avail run size + * + * navail + * ----------- + * nruns_avail nruns_avail-nruns_adjac + * = ========================= = ----------------------- + * navail nruns_avail + * ----------------------- + * nruns_avail-nruns_adjac + * + * The following code multiplies away the denominator prior to + * comparison, in order to avoid division. + * + */ + a_val = (a->nruns_avail - a->nruns_adjac) * b->nruns_avail; + b_val = (b->nruns_avail - b->nruns_adjac) * a->nruns_avail; + if (a_val < b_val) + return (1); + if (a_val > b_val) + return (-1); + /* Break ties by chunk address. */ + { + uintptr_t a_chunk = (uintptr_t)a; + uintptr_t b_chunk = (uintptr_t)b; + + return ((a_chunk > b_chunk) - (a_chunk < b_chunk)); + } +} + +/* Generate red-black tree functions. */ +rb_gen(static UNUSED, arena_chunk_dirty_, arena_chunk_tree_t, arena_chunk_t, + dirty_link, arena_chunk_dirty_comp) + +static inline bool +arena_avail_adjac_pred(arena_chunk_t *chunk, size_t pageind) +{ + bool ret; + + if (pageind-1 < map_bias) + ret = false; + else { + ret = (arena_mapbits_allocated_get(chunk, pageind-1) == 0); + assert(ret == false || arena_mapbits_dirty_get(chunk, + pageind-1) != arena_mapbits_dirty_get(chunk, pageind)); + } + return (ret); +} + +static inline bool +arena_avail_adjac_succ(arena_chunk_t *chunk, size_t pageind, size_t npages) +{ + bool ret; + + if (pageind+npages == chunk_npages) + ret = false; + else { + assert(pageind+npages < chunk_npages); + ret = (arena_mapbits_allocated_get(chunk, pageind+npages) == 0); + assert(ret == false || arena_mapbits_dirty_get(chunk, pageind) + != arena_mapbits_dirty_get(chunk, pageind+npages)); + } + return (ret); +} + +static inline bool +arena_avail_adjac(arena_chunk_t *chunk, size_t pageind, size_t npages) +{ + + return (arena_avail_adjac_pred(chunk, pageind) || + arena_avail_adjac_succ(chunk, pageind, npages)); +} + +static void +arena_avail_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind, + size_t npages, bool maybe_adjac_pred, bool maybe_adjac_succ) +{ + + assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >> + LG_PAGE)); + + /* + * chunks_dirty is keyed by nruns_{avail,adjac}, so the chunk must be + * removed and reinserted even if the run to be inserted is clean. + */ + if (chunk->ndirty != 0) + arena_chunk_dirty_remove(&arena->chunks_dirty, chunk); + + if (maybe_adjac_pred && arena_avail_adjac_pred(chunk, pageind)) + chunk->nruns_adjac++; + if (maybe_adjac_succ && arena_avail_adjac_succ(chunk, pageind, npages)) + chunk->nruns_adjac++; + chunk->nruns_avail++; + assert(chunk->nruns_avail > chunk->nruns_adjac); + + if (arena_mapbits_dirty_get(chunk, pageind) != 0) { + arena->ndirty += npages; + chunk->ndirty += npages; + } + if (chunk->ndirty != 0) + arena_chunk_dirty_insert(&arena->chunks_dirty, chunk); + + arena_avail_tree_insert(&arena->runs_avail, arena_mapp_get(chunk, + pageind)); +} + +static void +arena_avail_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind, + size_t npages, bool maybe_adjac_pred, bool maybe_adjac_succ) +{ + + assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >> + LG_PAGE)); + + /* + * chunks_dirty is keyed by nruns_{avail,adjac}, so the chunk must be + * removed and reinserted even if the run to be removed is clean. + */ + if (chunk->ndirty != 0) + arena_chunk_dirty_remove(&arena->chunks_dirty, chunk); + + if (maybe_adjac_pred && arena_avail_adjac_pred(chunk, pageind)) + chunk->nruns_adjac--; + if (maybe_adjac_succ && arena_avail_adjac_succ(chunk, pageind, npages)) + chunk->nruns_adjac--; + chunk->nruns_avail--; + assert(chunk->nruns_avail > chunk->nruns_adjac || (chunk->nruns_avail + == 0 && chunk->nruns_adjac == 0)); + + if (arena_mapbits_dirty_get(chunk, pageind) != 0) { + arena->ndirty -= npages; + chunk->ndirty -= npages; + } + if (chunk->ndirty != 0) + arena_chunk_dirty_insert(&arena->chunks_dirty, chunk); + + arena_avail_tree_remove(&arena->runs_avail, arena_mapp_get(chunk, + pageind)); +} + static inline void * arena_run_reg_alloc(arena_run_t *run, arena_bin_info_t *bin_info) { @@ -193,7 +352,6 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, arena_chunk_t *chunk; size_t run_ind, total_pages, need_pages, rem_pages, i; size_t flag_dirty; - arena_avail_tree_t *runs_avail; assert((large && binind == BININD_INVALID) || (large == false && binind != BININD_INVALID)); @@ -201,8 +359,6 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE); flag_dirty = arena_mapbits_dirty_get(chunk, run_ind); - runs_avail = (flag_dirty != 0) ? &arena->runs_avail_dirty : - &arena->runs_avail_clean; total_pages = arena_mapbits_unallocated_size_get(chunk, run_ind) >> LG_PAGE; assert(arena_mapbits_dirty_get(chunk, run_ind+total_pages-1) == @@ -212,7 +368,7 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, assert(need_pages <= total_pages); rem_pages = total_pages - need_pages; - arena_avail_tree_remove(runs_avail, arena_mapp_get(chunk, run_ind)); + arena_avail_remove(arena, chunk, run_ind, total_pages, true, true); if (config_stats) { /* * Update stats_cactive if nactive is crossing a chunk @@ -244,14 +400,8 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, arena_mapbits_unzeroed_get(chunk, run_ind+total_pages-1)); } - arena_avail_tree_insert(runs_avail, arena_mapp_get(chunk, - run_ind+need_pages)); - } - - /* Update dirty page accounting. */ - if (flag_dirty != 0) { - chunk->ndirty -= need_pages; - arena->ndirty -= need_pages; + arena_avail_insert(arena, chunk, run_ind+need_pages, rem_pages, + false, true); } /* @@ -344,8 +494,6 @@ arena_chunk_alloc(arena_t *arena) size_t i; if (arena->spare != NULL) { - arena_avail_tree_t *runs_avail; - chunk = arena->spare; arena->spare = NULL; @@ -357,14 +505,6 @@ arena_chunk_alloc(arena_t *arena) chunk_npages-1) == arena_maxclass); assert(arena_mapbits_dirty_get(chunk, map_bias) == arena_mapbits_dirty_get(chunk, chunk_npages-1)); - - /* Insert the run into the appropriate runs_avail_* tree. */ - if (arena_mapbits_dirty_get(chunk, map_bias) == 0) - runs_avail = &arena->runs_avail_clean; - else - runs_avail = &arena->runs_avail_dirty; - arena_avail_tree_insert(runs_avail, arena_mapp_get(chunk, - map_bias)); } else { bool zero; size_t unzeroed; @@ -380,8 +520,6 @@ arena_chunk_alloc(arena_t *arena) arena->stats.mapped += chunksize; chunk->arena = arena; - ql_elm_new(chunk, link_dirty); - chunk->dirtied = false; /* * Claim that no pages are in use, since the header is merely @@ -389,6 +527,9 @@ arena_chunk_alloc(arena_t *arena) */ chunk->ndirty = 0; + chunk->nruns_avail = 0; + chunk->nruns_adjac = 0; + /* * Initialize the map to contain one maximal free untouched run. * Mark the pages as zeroed iff chunk_alloc() returned a zeroed @@ -412,20 +553,18 @@ arena_chunk_alloc(arena_t *arena) } arena_mapbits_unallocated_set(chunk, chunk_npages-1, arena_maxclass, unzeroed); - - /* Insert the run into the runs_avail_clean tree. */ - arena_avail_tree_insert(&arena->runs_avail_clean, - arena_mapp_get(chunk, map_bias)); } + /* Insert the run into the runs_avail tree. */ + arena_avail_insert(arena, chunk, map_bias, chunk_npages-map_bias, + false, false); + return (chunk); } static void arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk) { - arena_avail_tree_t *runs_avail; - assert(arena_mapbits_allocated_get(chunk, map_bias) == 0); assert(arena_mapbits_allocated_get(chunk, chunk_npages-1) == 0); assert(arena_mapbits_unallocated_size_get(chunk, map_bias) == @@ -436,24 +575,16 @@ arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk) arena_mapbits_dirty_get(chunk, chunk_npages-1)); /* - * Remove run from the appropriate runs_avail_* tree, so that the arena - * does not use it. + * Remove run from the runs_avail tree, so that the arena does not use + * it. */ - if (arena_mapbits_dirty_get(chunk, map_bias) == 0) - runs_avail = &arena->runs_avail_clean; - else - runs_avail = &arena->runs_avail_dirty; - arena_avail_tree_remove(runs_avail, arena_mapp_get(chunk, map_bias)); + arena_avail_remove(arena, chunk, map_bias, chunk_npages-map_bias, + false, false); if (arena->spare != NULL) { arena_chunk_t *spare = arena->spare; arena->spare = chunk; - if (spare->dirtied) { - ql_remove(&chunk->arena->chunks_dirty, spare, - link_dirty); - arena->ndirty -= spare->ndirty; - } malloc_mutex_unlock(&arena->lock); chunk_dealloc((void *)spare, chunksize, true); malloc_mutex_lock(&arena->lock); @@ -471,19 +602,7 @@ arena_run_alloc_helper(arena_t *arena, size_t size, bool large, size_t binind, arena_chunk_map_t *mapelm, key; key.bits = size | CHUNK_MAP_KEY; - mapelm = arena_avail_tree_nsearch(&arena->runs_avail_dirty, &key); - if (mapelm != NULL) { - arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm); - size_t pageind = (((uintptr_t)mapelm - - (uintptr_t)run_chunk->map) / sizeof(arena_chunk_map_t)) - + map_bias; - - run = (arena_run_t *)((uintptr_t)run_chunk + (pageind << - LG_PAGE)); - arena_run_split(arena, run, size, large, binind, zero); - return (run); - } - mapelm = arena_avail_tree_nsearch(&arena->runs_avail_clean, &key); + mapelm = arena_avail_tree_nsearch(&arena->runs_avail, &key); if (mapelm != NULL) { arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm); size_t pageind = (((uintptr_t)mapelm - @@ -537,29 +656,40 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, size_t binind, static inline void arena_maybe_purge(arena_t *arena) { + size_t npurgeable, threshold; - /* Enforce opt_lg_dirty_mult. */ - if (opt_lg_dirty_mult >= 0 && arena->ndirty > arena->npurgatory && - (arena->ndirty - arena->npurgatory) > chunk_npages && - (arena->nactive >> opt_lg_dirty_mult) < (arena->ndirty - - arena->npurgatory)) - arena_purge(arena, false); + /* Don't purge if the option is disabled. */ + if (opt_lg_dirty_mult < 0) + return; + /* Don't purge if all dirty pages are already being purged. */ + if (arena->ndirty <= arena->npurgatory) + return; + npurgeable = arena->ndirty - arena->npurgatory; + threshold = (arena->nactive >> opt_lg_dirty_mult); + /* + * Don't purge unless the number of purgeable pages exceeds the + * threshold. + */ + if (npurgeable <= threshold) + return; + + arena_purge(arena, false); } -static inline void -arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) +static inline size_t +arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk, bool all) { + size_t npurged; ql_head(arena_chunk_map_t) mapelms; arena_chunk_map_t *mapelm; - size_t pageind; - size_t ndirty; + size_t pageind, npages; size_t nmadvise; ql_new(&mapelms); /* * If chunk is the spare, temporarily re-allocate it, 1) so that its - * run is reinserted into runs_avail_dirty, and 2) so that it cannot be + * run is reinserted into runs_avail, and 2) so that it cannot be * completely discarded by another thread while arena->lock is dropped * by this thread. Note that the arena_run_dalloc() call will * implicitly deallocate the chunk, so no explicit action is required @@ -579,54 +709,50 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) arena_chunk_alloc(arena); } - /* Temporarily allocate all free dirty runs within chunk. */ - for (pageind = map_bias; pageind < chunk_npages;) { + if (config_stats) + arena->stats.purged += chunk->ndirty; + + /* + * Operate on all dirty runs if there is no clean/dirty run + * fragmentation. + */ + if (chunk->nruns_adjac == 0) + all = true; + + /* + * Temporarily allocate free dirty runs within chunk. If all is false, + * only operate on dirty runs that are fragments; otherwise operate on + * all dirty runs. + */ + for (pageind = map_bias; pageind < chunk_npages; pageind += npages) { mapelm = arena_mapp_get(chunk, pageind); if (arena_mapbits_allocated_get(chunk, pageind) == 0) { - size_t npages; + size_t run_size = + arena_mapbits_unallocated_size_get(chunk, pageind); - npages = arena_mapbits_unallocated_size_get(chunk, - pageind) >> LG_PAGE; + npages = run_size >> LG_PAGE; assert(pageind + npages <= chunk_npages); assert(arena_mapbits_dirty_get(chunk, pageind) == arena_mapbits_dirty_get(chunk, pageind+npages-1)); - if (arena_mapbits_dirty_get(chunk, pageind) != 0) { - arena_avail_tree_remove( - &arena->runs_avail_dirty, mapelm); - arena_mapbits_large_set(chunk, pageind, - (npages << LG_PAGE), 0); - if (npages > 1) { - arena_mapbits_large_set(chunk, - pageind+npages-1, 0, 0); - } + if (arena_mapbits_dirty_get(chunk, pageind) != 0 && + (all || arena_avail_adjac(chunk, pageind, + npages))) { + arena_run_t *run = (arena_run_t *)((uintptr_t) + chunk + (uintptr_t)(pageind << LG_PAGE)); - if (config_stats) { - /* - * Update stats_cactive if nactive is - * crossing a chunk multiple. - */ - size_t cactive_diff = - CHUNK_CEILING((arena->nactive + - npages) << LG_PAGE) - - CHUNK_CEILING(arena->nactive << - LG_PAGE); - if (cactive_diff != 0) - stats_cactive_add(cactive_diff); - } - arena->nactive += npages; + arena_run_split(arena, run, run_size, true, + BININD_INVALID, false); /* Append to list for later processing. */ ql_elm_new(mapelm, u.ql_link); ql_tail_insert(&mapelms, mapelm, u.ql_link); } - - pageind += npages; } else { - /* Skip allocated run. */ - if (arena_mapbits_large_get(chunk, pageind)) - pageind += arena_mapbits_large_size_get(chunk, + /* Skip run. */ + if (arena_mapbits_large_get(chunk, pageind) != 0) { + npages = arena_mapbits_large_size_get(chunk, pageind) >> LG_PAGE; - else { + } else { size_t binind; arena_bin_info_t *bin_info; arena_run_t *run = (arena_run_t *)((uintptr_t) @@ -636,36 +762,27 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) pageind) == 0); binind = arena_bin_index(arena, run->bin); bin_info = &arena_bin_info[binind]; - pageind += bin_info->run_size >> LG_PAGE; + npages = bin_info->run_size >> LG_PAGE; } } } assert(pageind == chunk_npages); - - if (config_debug) - ndirty = chunk->ndirty; - if (config_stats) - arena->stats.purged += chunk->ndirty; - arena->ndirty -= chunk->ndirty; - chunk->ndirty = 0; - ql_remove(&arena->chunks_dirty, chunk, link_dirty); - chunk->dirtied = false; + assert(chunk->ndirty == 0 || all == false); + assert(chunk->nruns_adjac == 0); malloc_mutex_unlock(&arena->lock); if (config_stats) nmadvise = 0; + npurged = 0; ql_foreach(mapelm, &mapelms, u.ql_link) { - size_t pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) / - sizeof(arena_chunk_map_t)) + map_bias; - size_t npages = arena_mapbits_large_size_get(chunk, pageind) >> - LG_PAGE; bool unzeroed; size_t flag_unzeroed, i; + pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) / + sizeof(arena_chunk_map_t)) + map_bias; + npages = arena_mapbits_large_size_get(chunk, pageind) >> + LG_PAGE; assert(pageind + npages <= chunk_npages); - assert(ndirty >= npages); - if (config_debug) - ndirty -= npages; unzeroed = pages_purge((void *)((uintptr_t)chunk + (pageind << LG_PAGE)), (npages << LG_PAGE)); flag_unzeroed = unzeroed ? CHUNK_MAP_UNZEROED : 0; @@ -683,10 +800,10 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) arena_mapbits_unzeroed_set(chunk, pageind+i, flag_unzeroed); } + npurged += npages; if (config_stats) nmadvise++; } - assert(ndirty == 0); malloc_mutex_lock(&arena->lock); if (config_stats) arena->stats.nmadvise += nmadvise; @@ -694,14 +811,27 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) /* Deallocate runs. */ for (mapelm = ql_first(&mapelms); mapelm != NULL; mapelm = ql_first(&mapelms)) { - size_t pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) / - sizeof(arena_chunk_map_t)) + map_bias; - arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)(pageind << LG_PAGE)); + arena_run_t *run; + pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) / + sizeof(arena_chunk_map_t)) + map_bias; + run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)(pageind << + LG_PAGE)); ql_remove(&mapelms, mapelm, u.ql_link); - arena_run_dalloc(arena, run, false); + arena_run_dalloc(arena, run, false, true); } + + return (npurged); +} + +static arena_chunk_t * +chunks_dirty_iter_cb(arena_chunk_tree_t *tree, arena_chunk_t *chunk, void *arg) +{ + size_t *ndirty = (size_t *)arg; + + assert(chunk->ndirty != 0); + *ndirty += chunk->ndirty; + return (NULL); } static void @@ -712,14 +842,11 @@ arena_purge(arena_t *arena, bool all) if (config_debug) { size_t ndirty = 0; - ql_foreach(chunk, &arena->chunks_dirty, link_dirty) { - assert(chunk->dirtied); - ndirty += chunk->ndirty; - } + arena_chunk_dirty_iter(&arena->chunks_dirty, NULL, + chunks_dirty_iter_cb, (void *)&ndirty); assert(ndirty == arena->ndirty); } assert(arena->ndirty > arena->npurgatory || all); - assert(arena->ndirty - arena->npurgatory > chunk_npages || all); assert((arena->nactive >> opt_lg_dirty_mult) < (arena->ndirty - arena->npurgatory) || all); @@ -731,16 +858,24 @@ arena_purge(arena_t *arena, bool all) * purge, and add the result to arena->npurgatory. This will keep * multiple threads from racing to reduce ndirty below the threshold. */ - npurgatory = arena->ndirty - arena->npurgatory; - if (all == false) { - assert(npurgatory >= arena->nactive >> opt_lg_dirty_mult); - npurgatory -= arena->nactive >> opt_lg_dirty_mult; + { + size_t npurgeable = arena->ndirty - arena->npurgatory; + + if (all == false) { + size_t threshold = (arena->nactive >> + opt_lg_dirty_mult); + + npurgatory = npurgeable - threshold; + } else + npurgatory = npurgeable; } arena->npurgatory += npurgatory; while (npurgatory > 0) { + size_t npurgeable, npurged, nunpurged; + /* Get next chunk with dirty pages. */ - chunk = ql_first(&arena->chunks_dirty); + chunk = arena_chunk_dirty_first(&arena->chunks_dirty); if (chunk == NULL) { /* * This thread was unable to purge as many pages as @@ -751,23 +886,15 @@ arena_purge(arena_t *arena, bool all) arena->npurgatory -= npurgatory; return; } - while (chunk->ndirty == 0) { - ql_remove(&arena->chunks_dirty, chunk, link_dirty); - chunk->dirtied = false; - chunk = ql_first(&arena->chunks_dirty); - if (chunk == NULL) { - /* Same logic as for above. */ - arena->npurgatory -= npurgatory; - return; - } - } + npurgeable = chunk->ndirty; + assert(npurgeable != 0); - if (chunk->ndirty > npurgatory) { + if (npurgeable > npurgatory && chunk->nruns_adjac == 0) { /* - * This thread will, at a minimum, purge all the dirty - * pages in chunk, so set npurgatory to reflect this - * thread's commitment to purge the pages. This tends - * to reduce the chances of the following scenario: + * This thread will purge all the dirty pages in chunk, + * so set npurgatory to reflect this thread's intent to + * purge the pages. This tends to reduce the chances + * of the following scenario: * * 1) This thread sets arena->npurgatory such that * (arena->ndirty - arena->npurgatory) is at the @@ -781,13 +908,20 @@ arena_purge(arena_t *arena, bool all) * because all of the purging work being done really * needs to happen. */ - arena->npurgatory += chunk->ndirty - npurgatory; - npurgatory = chunk->ndirty; + arena->npurgatory += npurgeable - npurgatory; + npurgatory = npurgeable; } - arena->npurgatory -= chunk->ndirty; - npurgatory -= chunk->ndirty; - arena_chunk_purge(arena, chunk); + /* + * Keep track of how many pages are purgeable, versus how many + * actually get purged, and adjust counters accordingly. + */ + arena->npurgatory -= npurgeable; + npurgatory -= npurgeable; + npurged = arena_chunk_purge(arena, chunk, all); + nunpurged = npurgeable - npurged; + arena->npurgatory += nunpurged; + npurgatory += nunpurged; } } @@ -801,11 +935,10 @@ arena_purge_all(arena_t *arena) } static void -arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) +arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned) { arena_chunk_t *chunk; size_t size, run_ind, run_pages, flag_dirty; - arena_avail_tree_t *runs_avail; chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE); @@ -836,15 +969,14 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) /* * The run is dirty if the caller claims to have dirtied it, as well as - * if it was already dirty before being allocated. + * if it was already dirty before being allocated and the caller + * doesn't claim to have cleaned it. */ assert(arena_mapbits_dirty_get(chunk, run_ind) == arena_mapbits_dirty_get(chunk, run_ind+run_pages-1)); - if (arena_mapbits_dirty_get(chunk, run_ind) != 0) + if (cleaned == false && arena_mapbits_dirty_get(chunk, run_ind) != 0) dirty = true; flag_dirty = dirty ? CHUNK_MAP_DIRTY : 0; - runs_avail = dirty ? &arena->runs_avail_dirty : - &arena->runs_avail_clean; /* Mark pages as unallocated in the chunk map. */ if (dirty) { @@ -852,9 +984,6 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) CHUNK_MAP_DIRTY); arena_mapbits_unallocated_set(chunk, run_ind+run_pages-1, size, CHUNK_MAP_DIRTY); - - chunk->ndirty += run_pages; - arena->ndirty += run_pages; } else { arena_mapbits_unallocated_set(chunk, run_ind, size, arena_mapbits_unzeroed_get(chunk, run_ind)); @@ -878,8 +1007,8 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) run_ind+run_pages+nrun_pages-1) == nrun_size); assert(arena_mapbits_dirty_get(chunk, run_ind+run_pages+nrun_pages-1) == flag_dirty); - arena_avail_tree_remove(runs_avail, - arena_mapp_get(chunk, run_ind+run_pages)); + arena_avail_remove(arena, chunk, run_ind+run_pages, nrun_pages, + false, true); size += nrun_size; run_pages += nrun_pages; @@ -905,8 +1034,8 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) assert(arena_mapbits_unallocated_size_get(chunk, run_ind) == prun_size); assert(arena_mapbits_dirty_get(chunk, run_ind) == flag_dirty); - arena_avail_tree_remove(runs_avail, arena_mapp_get(chunk, - run_ind)); + arena_avail_remove(arena, chunk, run_ind, prun_pages, true, + false); size += prun_size; run_pages += prun_pages; @@ -921,19 +1050,7 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) arena_mapbits_unallocated_size_get(chunk, run_ind+run_pages-1)); assert(arena_mapbits_dirty_get(chunk, run_ind) == arena_mapbits_dirty_get(chunk, run_ind+run_pages-1)); - arena_avail_tree_insert(runs_avail, arena_mapp_get(chunk, run_ind)); - - if (dirty) { - /* - * Insert into chunks_dirty before potentially calling - * arena_chunk_dealloc(), so that chunks_dirty and - * arena->ndirty are consistent. - */ - if (chunk->dirtied == false) { - ql_tail_insert(&arena->chunks_dirty, chunk, link_dirty); - chunk->dirtied = true; - } - } + arena_avail_insert(arena, chunk, run_ind, run_pages, true, true); /* Deallocate chunk if it is now completely unused. */ if (size == arena_maxclass) { @@ -982,7 +1099,7 @@ arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, arena_mapbits_large_set(chunk, pageind+head_npages, newsize, flag_dirty); - arena_run_dalloc(arena, run, false); + arena_run_dalloc(arena, run, false, false); } static void @@ -1015,7 +1132,7 @@ arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, flag_dirty); arena_run_dalloc(arena, (arena_run_t *)((uintptr_t)run + newsize), - dirty); + dirty, false); } static arena_run_t * @@ -1526,7 +1643,7 @@ arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, ((past - run_ind) << LG_PAGE), false); /* npages = past - run_ind; */ } - arena_run_dalloc(arena, run, true); + arena_run_dalloc(arena, run, true, false); malloc_mutex_unlock(&arena->lock); /****************************/ malloc_mutex_lock(&bin->lock); @@ -1638,7 +1755,7 @@ arena_dalloc_large_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr) } } - arena_run_dalloc(arena, (arena_run_t *)ptr, true); + arena_run_dalloc(arena, (arena_run_t *)ptr, true, false); } void @@ -1985,15 +2102,14 @@ arena_new(arena_t *arena, unsigned ind) arena->dss_prec = chunk_dss_prec_get(); /* Initialize chunks. */ - ql_new(&arena->chunks_dirty); + arena_chunk_dirty_new(&arena->chunks_dirty); arena->spare = NULL; arena->nactive = 0; arena->ndirty = 0; arena->npurgatory = 0; - arena_avail_tree_new(&arena->runs_avail_clean); - arena_avail_tree_new(&arena->runs_avail_dirty); + arena_avail_tree_new(&arena->runs_avail); /* Initialize bins. */ for (i = 0; i < NBINS; i++) { From 847ff223dedad6b0f5186f904c817c0306ce599f Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Tue, 23 Oct 2012 08:42:48 +0200 Subject: [PATCH 0258/3378] Don't register jemalloc's zone allocator if something else already replaced the system default zone --- src/zone.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/zone.c b/src/zone.c index cde5d49a..c62c183f 100644 --- a/src/zone.c +++ b/src/zone.c @@ -171,6 +171,16 @@ void register_zone(void) { + /* + * If something else replaced the system default zone allocator, don't + * register jemalloc's. + */ + malloc_zone_t *default_zone = malloc_default_zone(); + if (!default_zone->zone_name || + strcmp(default_zone->zone_name, "DefaultMallocZone") != 0) { + return; + } + zone.size = (void *)zone_size; zone.malloc = (void *)zone_malloc; zone.calloc = (void *)zone_calloc; @@ -241,7 +251,7 @@ register_zone(void) * then becomes the default. */ do { - malloc_zone_t *default_zone = malloc_default_zone(); + default_zone = malloc_default_zone(); malloc_zone_unregister(default_zone); malloc_zone_register(default_zone); } while (malloc_default_zone() != &zone); From ed90c97332e34e9552cdde102d8b4a9cd11bb5cb Mon Sep 17 00:00:00 2001 From: Jan Beich Date: Thu, 18 Oct 2012 00:06:32 +0400 Subject: [PATCH 0259/3378] document what stats.active does not track Based on http://www.canonware.com/pipermail/jemalloc-discuss/2012-March/000164.html --- doc/jemalloc.xml.in | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 754fdcc7..54b87474 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -1512,7 +1512,9 @@ malloc_conf = "xmalloc:true";]]> application. This is a multiple of the page size, and greater than or equal to stats.allocated. - + This does not include + stats.arenas.<i>.pdirty and pages + entirely devoted to allocator metadata. @@ -1628,7 +1630,7 @@ malloc_conf = "xmalloc:true";]]> Number of pages in active runs. - + stats.arenas.<i>.pdirty (size_t) From abf6739317742ca4677bf885178984a8757ee14a Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 7 Nov 2012 10:05:04 -0800 Subject: [PATCH 0260/3378] Tweak chunk purge order according to fragmentation. Tweak chunk purge order to purge unfragmented chunks from high to low memory. This facilitates dirty run reuse. --- src/arena.c | 45 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 11 deletions(-) diff --git a/src/arena.c b/src/arena.c index b93a6799..0c53b071 100644 --- a/src/arena.c +++ b/src/arena.c @@ -138,14 +138,22 @@ rb_gen(static UNUSED, arena_avail_tree_, arena_avail_tree_t, arena_chunk_map_t, static inline int arena_chunk_dirty_comp(arena_chunk_t *a, arena_chunk_t *b) { - size_t a_val, b_val; assert(a != NULL); assert(b != NULL); + /* + * Short-circuit for self comparison. The following comparison code + * would come to the same result, but at the cost of executing the slow + * path. + */ + if (a == b) + return (0); + /* * Order such that chunks with higher fragmentation are "less than" - * those with lower fragmentation. Fragmentation is measured as: + * those with lower fragmentation -- purging order is from "least" to + * "greatest". Fragmentation is measured as: * * mean current avail run size * -------------------------------- @@ -163,18 +171,33 @@ arena_chunk_dirty_comp(arena_chunk_t *a, arena_chunk_t *b) * comparison, in order to avoid division. * */ - a_val = (a->nruns_avail - a->nruns_adjac) * b->nruns_avail; - b_val = (b->nruns_avail - b->nruns_adjac) * a->nruns_avail; - if (a_val < b_val) - return (1); - if (a_val > b_val) - return (-1); - /* Break ties by chunk address. */ + { + size_t a_val = (a->nruns_avail - a->nruns_adjac) * + b->nruns_avail; + size_t b_val = (b->nruns_avail - b->nruns_adjac) * + a->nruns_avail; + + if (a_val < b_val) + return (1); + if (a_val > b_val) + return (-1); + } + /* + * Break ties by chunk address. For fragmented chunks, report lower + * addresses as "lower", so that fragmentation reduction happens first + * at lower addresses. However, use the opposite ordering for + * unfragmented chunks, in order to increase the chances of + * re-allocating dirty runs. + */ { uintptr_t a_chunk = (uintptr_t)a; uintptr_t b_chunk = (uintptr_t)b; - - return ((a_chunk > b_chunk) - (a_chunk < b_chunk)); + int ret = ((a_chunk > b_chunk) - (a_chunk < b_chunk)); + if (a->nruns_adjac == 0) { + assert(b->nruns_adjac == 0); + ret = -ret; + } + return (ret); } } From 556ddc7fa94f13c388ec6c9d2d54ace250540f2c Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 7 Nov 2012 15:16:29 -0800 Subject: [PATCH 0261/3378] Update ChangeLog for 3.2.0. --- ChangeLog | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index edba5269..ab3476c6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -6,10 +6,14 @@ found in the git revision history: http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git git://canonware.com/jemalloc.git -* 3.x.x (XXX Not released) +* 3.2.0 (November 9, 2012) + + In addition to a couple of bug fixes, this version modifies page run + allocation and dirty page purging algorithms in order to better control + page-level virtual memory fragmentation. Incompatible changes: - - Change the "opt.lg_dirty_mult" from 5 to 3 (32:1 to 8:1). + - Change the "opt.lg_dirty_mult" default from 5 to 3 (32:1 to 8:1). Bug fixes: - Fix dss/mmap allocation precedence code to use recyclable mmap memory only From a3b3386ddde8048b9d6b54c397bb93da5e806cef Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 13 Nov 2012 12:56:27 -0800 Subject: [PATCH 0262/3378] Avoid arena_prof_accum()-related locking when possible. Refactor arena_prof_accum() and its callers to avoid arena locking when prof_interval is 0 (as when profiling is disabled). Reported by Ben Maurer. --- include/jemalloc/internal/arena.h | 42 ++++++++++++++++++- include/jemalloc/internal/private_namespace.h | 2 + src/arena.c | 27 ++---------- src/prof.c | 6 +-- src/tcache.c | 9 +--- 5 files changed, 50 insertions(+), 36 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 561c9b6f..5ba16406 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -400,7 +400,6 @@ extern arena_bin_info_t arena_bin_info[NBINS]; #define nlclasses (chunk_npages - map_bias) void arena_purge_all(arena_t *arena); -void arena_prof_accum(arena_t *arena, uint64_t accumbytes); void arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind, uint64_t prof_accumbytes); void arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info, @@ -464,6 +463,9 @@ void arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, size_t runind, size_t binind, size_t flags); void arena_mapbits_unzeroed_set(arena_chunk_t *chunk, size_t pageind, size_t unzeroed); +void arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes); +void arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes); +void arena_prof_accum(arena_t *arena, uint64_t accumbytes); size_t arena_ptr_small_binind_get(const void *ptr, size_t mapbits); size_t arena_bin_index(arena_t *arena, arena_bin_t *bin); unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, @@ -661,6 +663,44 @@ arena_mapbits_unzeroed_set(arena_chunk_t *chunk, size_t pageind, *mapbitsp = (*mapbitsp & ~CHUNK_MAP_UNZEROED) | unzeroed; } +JEMALLOC_INLINE void +arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes) +{ + + cassert(config_prof); + assert(prof_interval != 0); + + arena->prof_accumbytes += accumbytes; + if (arena->prof_accumbytes >= prof_interval) { + prof_idump(); + arena->prof_accumbytes -= prof_interval; + } +} + +JEMALLOC_INLINE void +arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes) +{ + + cassert(config_prof); + + if (prof_interval == 0) + return; + arena_prof_accum_impl(arena, accumbytes); +} + +JEMALLOC_INLINE void +arena_prof_accum(arena_t *arena, uint64_t accumbytes) +{ + + cassert(config_prof); + + if (prof_interval == 0) + return; + malloc_mutex_lock(&arena->lock); + arena_prof_accum_impl(arena, accumbytes); + malloc_mutex_unlock(&arena->lock); +} + JEMALLOC_INLINE size_t arena_ptr_small_binind_get(const void *ptr, size_t mapbits) { diff --git a/include/jemalloc/internal/private_namespace.h b/include/jemalloc/internal/private_namespace.h index 06241cd2..951df24b 100644 --- a/include/jemalloc/internal/private_namespace.h +++ b/include/jemalloc/internal/private_namespace.h @@ -41,6 +41,8 @@ #define arena_postfork_parent JEMALLOC_N(arena_postfork_parent) #define arena_prefork JEMALLOC_N(arena_prefork) #define arena_prof_accum JEMALLOC_N(arena_prof_accum) +#define arena_prof_accum_impl JEMALLOC_N(arena_prof_accum_impl) +#define arena_prof_accum_locked JEMALLOC_N(arena_prof_accum_locked) #define arena_prof_ctx_get JEMALLOC_N(arena_prof_ctx_get) #define arena_prof_ctx_set JEMALLOC_N(arena_prof_ctx_set) #define arena_prof_promoted JEMALLOC_N(arena_prof_promoted) diff --git a/src/arena.c b/src/arena.c index 0c53b071..f9406c79 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1321,21 +1321,6 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin) return (arena_run_reg_alloc(bin->runcur, bin_info)); } -void -arena_prof_accum(arena_t *arena, uint64_t accumbytes) -{ - - cassert(config_prof); - - if (config_prof && prof_interval != 0) { - arena->prof_accumbytes += accumbytes; - if (arena->prof_accumbytes >= prof_interval) { - prof_idump(); - arena->prof_accumbytes -= prof_interval; - } - } -} - void arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind, uint64_t prof_accumbytes) @@ -1347,11 +1332,8 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind, assert(tbin->ncached == 0); - if (config_prof) { - malloc_mutex_lock(&arena->lock); + if (config_prof) arena_prof_accum(arena, prof_accumbytes); - malloc_mutex_unlock(&arena->lock); - } bin = &arena->bins[binind]; malloc_mutex_lock(&bin->lock); for (i = 0, nfill = (tcache_bin_info[binind].ncached_max >> @@ -1459,11 +1441,8 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero) bin->stats.nrequests++; } malloc_mutex_unlock(&bin->lock); - if (config_prof && isthreaded == false) { - malloc_mutex_lock(&arena->lock); + if (config_prof && isthreaded == false) arena_prof_accum(arena, size); - malloc_mutex_unlock(&arena->lock); - } if (zero == false) { if (config_fill) { @@ -1507,7 +1486,7 @@ arena_malloc_large(arena_t *arena, size_t size, bool zero) arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++; } if (config_prof) - arena_prof_accum(arena, size); + arena_prof_accum_locked(arena, size); malloc_mutex_unlock(&arena->lock); if (zero == false) { diff --git a/src/prof.c b/src/prof.c index 04964ef7..e21d1667 100644 --- a/src/prof.c +++ b/src/prof.c @@ -26,7 +26,7 @@ bool opt_prof_leak = false; bool opt_prof_accum = false; char opt_prof_prefix[PATH_MAX + 1]; -uint64_t prof_interval; +uint64_t prof_interval = 0; bool prof_promote; /* @@ -1206,13 +1206,11 @@ prof_boot1(void) */ opt_prof = true; opt_prof_gdump = false; - prof_interval = 0; } else if (opt_prof) { if (opt_lg_prof_interval >= 0) { prof_interval = (((uint64_t)1U) << opt_lg_prof_interval); - } else - prof_interval = 0; + } } prof_promote = (opt_prof && opt_lg_prof_sample > LG_PAGE); diff --git a/src/tcache.c b/src/tcache.c index 47e14f30..7befdc86 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -97,9 +97,7 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem, arena_bin_t *bin = &arena->bins[binind]; if (config_prof && arena == tcache->arena) { - malloc_mutex_lock(&arena->lock); arena_prof_accum(arena, tcache->prof_accumbytes); - malloc_mutex_unlock(&arena->lock); tcache->prof_accumbytes = 0; } @@ -180,7 +178,7 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem, malloc_mutex_lock(&arena->lock); if ((config_prof || config_stats) && arena == tcache->arena) { if (config_prof) { - arena_prof_accum(arena, + arena_prof_accum_locked(arena, tcache->prof_accumbytes); tcache->prof_accumbytes = 0; } @@ -343,11 +341,8 @@ tcache_destroy(tcache_t *tcache) } } - if (config_prof && tcache->prof_accumbytes > 0) { - malloc_mutex_lock(&tcache->arena->lock); + if (config_prof && tcache->prof_accumbytes > 0) arena_prof_accum(tcache->arena, tcache->prof_accumbytes); - malloc_mutex_unlock(&tcache->arena->lock); - } tcache_size = arena_salloc(tcache, false); if (tcache_size <= SMALL_MAXCLASS) { From 9906660eb7365abb54e4495407ffddb1069ef654 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Mon, 19 Nov 2012 10:55:26 +0100 Subject: [PATCH 0263/3378] Allow to build without exporting symbols When statically linking jemalloc, it may be beneficial not to export its symbols if it makes sense, which allows the compiler and the linker to do some further optimizations. --- INSTALL | 5 +++++ configure.ac | 7 +++++++ 2 files changed, 12 insertions(+) diff --git a/INSTALL b/INSTALL index e40a7edd..7c2ed686 100644 --- a/INSTALL +++ b/INSTALL @@ -55,6 +55,11 @@ any of the following arguments (not a definitive list) to 'configure': jemalloc overlays the default malloc zone, but makes no attempt to actually replace the "malloc", "calloc", etc. symbols. +--without-export + Don't export public APIs. This can be useful when building jemalloc as a + static library, or to avoid exporting public APIs when using the zone + allocator on OSX. + --with-private-namespace= Prefix all library-private APIs with . For shared libraries, symbol visibility mechanisms prevent these symbols from being exported, but diff --git a/configure.ac b/configure.ac index 1c52439e..8558961c 100644 --- a/configure.ac +++ b/configure.ac @@ -471,6 +471,13 @@ for stem in ${public_syms}; do AC_DEFINE_UNQUOTED([${n}], [${m}]) done +AC_ARG_WITH([export], + [AS_HELP_STRING([--without-export], [disable exporting jemalloc public APIs])], + [if test "x$with_export" = "xno"; then + AC_DEFINE([JEMALLOC_EXPORT],[])] +fi] +) + dnl Do not mangle library-private APIs by default. AC_ARG_WITH([private_namespace], [AS_HELP_STRING([--with-private-namespace=], [Prefix to prepend to all library-private APIs])], From 6eb84fbe315add1e1d4f8deedc25d260fff3ae97 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 29 Nov 2012 22:13:04 -0800 Subject: [PATCH 0264/3378] Fix "arenas.extend" mallctl to return the number of arenas. Reported by Mike Hommey. --- ChangeLog | 5 +++++ src/ctl.c | 20 +++++++++++--------- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/ChangeLog b/ChangeLog index ab3476c6..3dff01e1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -6,6 +6,11 @@ found in the git revision history: http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git git://canonware.com/jemalloc.git +* 3.x.x (Not yet released) + + Bug fixes: + - Fix "arenas.extend" mallctl to output the number of arenas. + * 3.2.0 (November 9, 2012) In addition to a couple of bug fixes, this version modifies page run diff --git a/src/ctl.c b/src/ctl.c index 6e01b1e2..f2ef4e60 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -960,11 +960,11 @@ ctl_postfork_child(void) if (*oldlenp != sizeof(t)) { \ size_t copylen = (sizeof(t) <= *oldlenp) \ ? sizeof(t) : *oldlenp; \ - memcpy(oldp, (void *)&v, copylen); \ + memcpy(oldp, (void *)&(v), copylen); \ ret = EINVAL; \ goto label_return; \ } else \ - *(t *)oldp = v; \ + *(t *)oldp = (v); \ } \ } while (0) @@ -974,7 +974,7 @@ ctl_postfork_child(void) ret = EINVAL; \ goto label_return; \ } \ - v = *(t *)newp; \ + (v) = *(t *)newp; \ } \ } while (0) @@ -995,7 +995,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ if (l) \ malloc_mutex_lock(&ctl_mtx); \ READONLY(); \ - oldval = v; \ + oldval = (v); \ READ(oldval, t); \ \ ret = 0; \ @@ -1017,7 +1017,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ return (ENOENT); \ malloc_mutex_lock(&ctl_mtx); \ READONLY(); \ - oldval = v; \ + oldval = (v); \ READ(oldval, t); \ \ ret = 0; \ @@ -1036,7 +1036,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ \ malloc_mutex_lock(&ctl_mtx); \ READONLY(); \ - oldval = v; \ + oldval = (v); \ READ(oldval, t); \ \ ret = 0; \ @@ -1060,7 +1060,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ if ((c) == false) \ return (ENOENT); \ READONLY(); \ - oldval = v; \ + oldval = (v); \ READ(oldval, t); \ \ ret = 0; \ @@ -1077,7 +1077,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ t oldval; \ \ READONLY(); \ - oldval = v; \ + oldval = (v); \ READ(oldval, t); \ \ ret = 0; \ @@ -1492,6 +1492,7 @@ arenas_extend_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { int ret; + unsigned narenas; malloc_mutex_lock(&ctl_mtx); READONLY(); @@ -1499,7 +1500,8 @@ arenas_extend_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, ret = EAGAIN; goto label_return; } - READ(ctl_stats.narenas - 1, unsigned); + narenas = ctl_stats.narenas - 1; + READ(narenas, unsigned); ret = 0; label_return: From 1271185b87fcf54afb37dc05e7e0c58e5fb8f06a Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 12 Dec 2012 10:12:18 -0800 Subject: [PATCH 0265/3378] Fix chunk_recycle() Valgrind integration. Fix chunk_recycyle() to unconditionally inform Valgrind that returned memory is undefined. This fixes Valgrind warnings that would result from a huge allocation being freed, then recycled for use as an arena chunk. The arena code would write metadata to the chunk header, and Valgrind would consider these invalid writes. --- ChangeLog | 2 ++ src/chunk.c | 5 ++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/ChangeLog b/ChangeLog index 3dff01e1..374459de 100644 --- a/ChangeLog +++ b/ChangeLog @@ -10,6 +10,8 @@ found in the git revision history: Bug fixes: - Fix "arenas.extend" mallctl to output the number of arenas. + - Fix chunk_recycyle() to unconditionally inform Valgrind that returned memory + is undefined. * 3.2.0 (November 9, 2012) diff --git a/src/chunk.c b/src/chunk.c index 1a3bb4f6..40c108a4 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -122,10 +122,9 @@ chunk_recycle(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, size_t size, } base_node_dealloc(node); } - if (zeroed == false && *zero) { - VALGRIND_MAKE_MEM_UNDEFINED(ret, size); + VALGRIND_MAKE_MEM_UNDEFINED(ret, size); + if (zeroed == false && *zero) memset(ret, 0, size); - } return (ret); } From 1bf2743e08ba66cc141e296812839947223e4370 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sun, 23 Dec 2012 08:51:48 -0800 Subject: [PATCH 0266/3378] Add clipping support to lg_chunk option processing. Modify processing of the lg_chunk option so that it clips an out-of-range input to the edge of the valid range. This makes it possible to request the minimum possible chunk size without intimate knowledge of allocator internals. Submitted by Ian Lepore (see FreeBSD PR bin/174641). --- doc/jemalloc.xml.in | 7 +++++-- src/jemalloc.c | 42 +++++++++++++++++++++++------------------- 2 files changed, 28 insertions(+), 21 deletions(-) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 54b87474..09305801 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -790,8 +790,11 @@ for (i = 0; i < nbins; i++) { (size_t) r- - Virtual memory chunk size (log base 2). The default - chunk size is 4 MiB (2^22). + Virtual memory chunk size (log base 2). If a chunk + size outside the supported size range is specified, the size is + silently clipped to the minimum/maximum supported size. The default + chunk size is 4 MiB (2^22). + diff --git a/src/jemalloc.c b/src/jemalloc.c index 8a667b62..cecd012c 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -469,7 +469,7 @@ malloc_conf_init(void) while (*opts != '\0' && malloc_conf_next(&opts, &k, &klen, &v, &vlen) == false) { -#define CONF_HANDLE_BOOL_HIT(o, n, hit) \ +#define CONF_HANDLE_BOOL(o, n) \ if (sizeof(n)-1 == klen && strncmp(n, k, \ klen) == 0) { \ if (strncmp("true", v, vlen) == 0 && \ @@ -483,16 +483,9 @@ malloc_conf_init(void) "Invalid conf value", \ k, klen, v, vlen); \ } \ - hit = true; \ - } else \ - hit = false; -#define CONF_HANDLE_BOOL(o, n) { \ - bool hit; \ - CONF_HANDLE_BOOL_HIT(o, n, hit); \ - if (hit) \ continue; \ -} -#define CONF_HANDLE_SIZE_T(o, n, min, max) \ + } +#define CONF_HANDLE_SIZE_T(o, n, min, max, clip) \ if (sizeof(n)-1 == klen && strncmp(n, k, \ klen) == 0) { \ uintmax_t um; \ @@ -505,12 +498,22 @@ malloc_conf_init(void) malloc_conf_error( \ "Invalid conf value", \ k, klen, v, vlen); \ - } else if (um < min || um > max) { \ - malloc_conf_error( \ - "Out-of-range conf value", \ - k, klen, v, vlen); \ - } else \ - o = um; \ + } else if (clip) { \ + if (um < min) \ + o = min; \ + else if (um > max) \ + o = max; \ + else \ + o = um; \ + } else { \ + if (um < min || um > max) { \ + malloc_conf_error( \ + "Out-of-range " \ + "conf value", \ + k, klen, v, vlen); \ + } else \ + o = um; \ + } \ continue; \ } #define CONF_HANDLE_SSIZE_T(o, n, min, max) \ @@ -555,7 +558,8 @@ malloc_conf_init(void) * config_fill. */ CONF_HANDLE_SIZE_T(opt_lg_chunk, "lg_chunk", LG_PAGE + - (config_fill ? 2 : 1), (sizeof(size_t) << 3) - 1) + (config_fill ? 2 : 1), (sizeof(size_t) << 3) - 1, + true) if (strncmp("dss", k, klen) == 0) { int i; bool match = false; @@ -581,14 +585,14 @@ malloc_conf_init(void) continue; } CONF_HANDLE_SIZE_T(opt_narenas, "narenas", 1, - SIZE_T_MAX) + SIZE_T_MAX, false) CONF_HANDLE_SSIZE_T(opt_lg_dirty_mult, "lg_dirty_mult", -1, (sizeof(size_t) << 3) - 1) CONF_HANDLE_BOOL(opt_stats_print, "stats_print") if (config_fill) { CONF_HANDLE_BOOL(opt_junk, "junk") CONF_HANDLE_SIZE_T(opt_quarantine, "quarantine", - 0, SIZE_T_MAX) + 0, SIZE_T_MAX, false) CONF_HANDLE_BOOL(opt_redzone, "redzone") CONF_HANDLE_BOOL(opt_zero, "zero") } From d0357f7a09a6fcbf1df461b07851f61a7f0bdc2d Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Mon, 26 Nov 2012 18:52:41 +0100 Subject: [PATCH 0267/3378] Allow to disable the zone allocator on Darwin --- INSTALL | 4 ++++ Makefile.in | 3 ++- configure.ac | 21 ++++++++++++++++++++- 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/INSTALL b/INSTALL index 7c2ed686..9bd1dac0 100644 --- a/INSTALL +++ b/INSTALL @@ -141,6 +141,10 @@ any of the following arguments (not a definitive list) to 'configure': --disable-experimental Disable support for the experimental API (*allocm()). +--disable-zone-allocator + Disable zone allocator for Darwin. This means jemalloc won't be hooked as + the default allocator on OSX/iOS. + --enable-utrace Enable utrace(2)-based allocation tracing. This feature is not broadly portable (FreeBSD has it, but Linux and OS X do not). diff --git a/Makefile.in b/Makefile.in index 36448189..0062747c 100644 --- a/Makefile.in +++ b/Makefile.in @@ -48,6 +48,7 @@ cfgoutputs_in := @cfgoutputs_in@ cfgoutputs_out := @cfgoutputs_out@ enable_autogen := @enable_autogen@ enable_experimental := @enable_experimental@ +enable_zone_allocator := @enable_zone_allocator@ DSO_LDFLAGS = @DSO_LDFLAGS@ SOREV = @SOREV@ PIC_CFLAGS = @PIC_CFLAGS@ @@ -80,7 +81,7 @@ CSRCS := $(srcroot)src/jemalloc.c $(srcroot)src/arena.c $(srcroot)src/atomic.c \ $(srcroot)src/mutex.c $(srcroot)src/prof.c $(srcroot)src/quarantine.c \ $(srcroot)src/rtree.c $(srcroot)src/stats.c $(srcroot)src/tcache.c \ $(srcroot)src/util.c $(srcroot)src/tsd.c -ifeq (macho, $(ABI)) +ifeq ($(enable_zone_allocator), 1) CSRCS += $(srcroot)src/zone.c endif ifeq ($(IMPORTLIB),$(SO)) diff --git a/configure.ac b/configure.ac index 8558961c..249a66c4 100644 --- a/configure.ac +++ b/configure.ac @@ -1185,7 +1185,26 @@ fi dnl ============================================================================ dnl Darwin-related configuration. -if test "x${abi}" = "xmacho" ; then +AC_ARG_ENABLE([zone-allocator], + [AS_HELP_STRING([--disable-zone-allocator], + [Disable zone allocator for Darwin])], +[if test "x$enable_zone_allocator" = "xno" ; then + enable_zone_allocator="0" +else + enable_zone_allocator="1" +fi +], +[if test "x${abi}" = "xmacho"; then + enable_zone_allocator="1" +fi +] +) +AC_SUBST([enable_zone_allocator]) + +if test "x${enable_zone_allocator}" = "x1" ; then + if test "x${abi}" != "xmacho"; then + AC_MSG_ERROR([--enable-zone-allocator is only supported on Darwin]) + fi AC_DEFINE([JEMALLOC_IVSALLOC], [ ]) AC_DEFINE([JEMALLOC_ZONE], [ ]) From 5135e34062584f67ae2d12b1f3940a9fad32ca9f Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Thu, 6 Dec 2012 22:16:26 +0100 Subject: [PATCH 0268/3378] Allow to enable ivsalloc independently --- INSTALL | 6 ++++++ configure.ac | 19 +++++++++++++++++-- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/INSTALL b/INSTALL index 9bd1dac0..6e371ce5 100644 --- a/INSTALL +++ b/INSTALL @@ -79,6 +79,12 @@ any of the following arguments (not a definitive list) to 'configure': --enable-debug Enable assertions and validation code. This incurs a substantial performance hit, but is very useful during application development. + Implies --enable-ivsalloc. + +--enable-ivsalloc + Enable validation code, which verifies that pointers reside within + jemalloc-owned chunks before dereferencing them. This incurs a substantial + performance hit. --disable-stats Disable statistics gathering functionality. See the "opt.stats_print" diff --git a/configure.ac b/configure.ac index 249a66c4..9d062a98 100644 --- a/configure.ac +++ b/configure.ac @@ -551,7 +551,7 @@ fi dnl Do not compile with debugging by default. AC_ARG_ENABLE([debug], - [AS_HELP_STRING([--enable-debug], [Build debugging code])], + [AS_HELP_STRING([--enable-debug], [Build debugging code (implies --enable-ivsalloc)])], [if test "x$enable_debug" = "xno" ; then enable_debug="0" else @@ -562,10 +562,25 @@ fi ) if test "x$enable_debug" = "x1" ; then AC_DEFINE([JEMALLOC_DEBUG], [ ]) - AC_DEFINE([JEMALLOC_IVSALLOC], [ ]) + enable_ivsalloc="1" fi AC_SUBST([enable_debug]) +dnl Do not validate pointers by default. +AC_ARG_ENABLE([ivsalloc], + [AS_HELP_STRING([--enable-ivsalloc], [Validate pointers passed through the public API])], +[if test "x$enable_ivsalloc" = "xno" ; then + enable_ivsalloc="0" +else + enable_ivsalloc="1" +fi +], +[enable_ivsalloc="0"] +) +if test "x$enable_ivsalloc" = "x1" ; then + AC_DEFINE([JEMALLOC_IVSALLOC], [ ]) +fi + dnl Only optimize if not debugging. if test "x$enable_debug" = "x0" -a "x$no_CFLAGS" = "xyes" ; then dnl Make sure that an optimization flag was not specified in EXTRA_CFLAGS. From 6e6164ae159d9c3bd4f44bd2cba6fc3237687c80 Mon Sep 17 00:00:00 2001 From: Garrett Cooper Date: Sun, 2 Dec 2012 17:56:25 -0800 Subject: [PATCH 0269/3378] Don't mangle errno with free(3) if utrace(2) fails This ensures POLA on FreeBSD (at least) as free(3) is generally assumed to not fiddle around with errno. Signed-off-by: Garrett Cooper --- src/jemalloc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/jemalloc.c b/src/jemalloc.c index cecd012c..ec88700a 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -83,11 +83,13 @@ typedef struct { #ifdef JEMALLOC_UTRACE # define UTRACE(a, b, c) do { \ if (opt_utrace) { \ + int utrace_serrno = errno; \ malloc_utrace_t ut; \ ut.p = (a); \ ut.s = (b); \ ut.r = (c); \ utrace(&ut, sizeof(ut)); \ + errno = utrace_serrno; \ } \ } while (0) #else From 72c1e59fd249f99dcf5d3992cbdd570a381a67ce Mon Sep 17 00:00:00 2001 From: Garrett Cooper Date: Sun, 2 Dec 2012 17:57:28 -0800 Subject: [PATCH 0270/3378] Improve configure tests for ffsl In particular: - ffsl always returns int, not long, on FreeBSD, Linux, and OSX. - Mute compiler warnings about rv being unused (and the potential for compilers optimizing out the call completely) by dumping the value with printf(3). Signed-off-by: Garrett Cooper --- configure.ac | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/configure.ac b/configure.ac index 9d062a98..88297c5f 100644 --- a/configure.ac +++ b/configure.ac @@ -928,7 +928,7 @@ AC_CACHE_CHECK([STATIC_PAGE_SHIFT], #include ]], [[ - long result; + int result; FILE *f; #ifdef _WIN32 @@ -947,7 +947,7 @@ AC_CACHE_CHECK([STATIC_PAGE_SHIFT], if (f == NULL) { return 1; } - fprintf(f, "%u\n", result); + fprintf(f, "%d\n", result); fclose(f); return 0; @@ -1092,11 +1092,13 @@ dnl Check for ffsl(3), and fail if not found. This function exists on all dnl platforms that jemalloc currently has a chance of functioning on without dnl modification. JE_COMPILABLE([a program using ffsl], [ +#include #include #include ], [ { int rv = ffsl(0x08); + printf("%d\n", rv); } ], [je_cv_function_ffsl]) if test "x${je_cv_function_ffsl}" != "xyes" ; then From 13e4e24c42d17492f85cdd550c1e13d6f929307e Mon Sep 17 00:00:00 2001 From: Garrett Cooper Date: Sun, 2 Dec 2012 17:58:40 -0800 Subject: [PATCH 0271/3378] Fix build break on *BSD Linux uses alloca.h; many other operating systems define alloca(3) in stdlib.h. Signed-off-by: Garrett Cooper --- configure.ac | 1 + include/jemalloc/internal/jemalloc_internal.h.in | 6 +++++- include/jemalloc/jemalloc_defs.h.in | 5 +++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 88297c5f..aee74828 100644 --- a/configure.ac +++ b/configure.ac @@ -261,6 +261,7 @@ case "${host}" in CFLAGS="$CFLAGS" CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE" abi="elf" + AC_DEFINE([JEMALLOC_HAS_ALLOCA_H]) AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED], [ ]) AC_DEFINE([JEMALLOC_THREADED_INIT], [ ]) JEMALLOC_USABLE_SIZE_CONST="" diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 475821ac..484f351b 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -359,7 +359,11 @@ static const bool config_ivsalloc = # include # define alloca _alloca # else -# include +# ifdef JEMALLOC_HAS_ALLOCA_H +# include +# else +# include +# endif # endif # define VARIABLE_ARRAY(type, name, count) \ type *name = alloca(sizeof(type) * count) diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index 1cd60254..3fcf93ce 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -249,6 +249,11 @@ #undef JEMALLOC_PURGE_MADVISE_DONTNEED #undef JEMALLOC_PURGE_MADVISE_FREE +/* + * Define if operating system has alloca.h header. + */ +#undef JEMALLOC_HAS_ALLOCA_H + /* sizeof(void *) == 2^LG_SIZEOF_PTR. */ #undef LG_SIZEOF_PTR From 14a2c6a698a207ac3f3825443cf3441c8842e990 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 21 Jan 2013 19:56:34 -0800 Subject: [PATCH 0272/3378] Avoid validating freshly mapped memory. Move validation of supposedly zeroed pages from chunk_alloc() to chunk_recycle(). There is little point to validating newly mapped memory returned by chunk_alloc_mmap(), and memory that comes from sbrk() is explicitly zeroed, so there is little risk to assuming that chunk_alloc_dss() actually does the zeroing properly. This relaxation of validation can make a big difference to application startup time and overall system usage on platforms that use jemalloc as the system allocator (namely FreeBSD). Submitted by Ian Lepore . --- src/chunk.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/src/chunk.c b/src/chunk.c index 40c108a4..46e387e1 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -78,6 +78,9 @@ chunk_recycle(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, size_t size, assert(node->size >= leadsize + size); trailsize = node->size - leadsize - size; ret = (void *)((uintptr_t)node->addr + leadsize); + zeroed = node->zeroed; + if (zeroed) + *zero = true; /* Remove node from the tree. */ extent_tree_szad_remove(chunks_szad, node); extent_tree_ad_remove(chunks_ad, node); @@ -114,17 +117,22 @@ chunk_recycle(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, size_t size, } malloc_mutex_unlock(&chunks_mtx); - zeroed = false; - if (node != NULL) { - if (node->zeroed) { - zeroed = true; - *zero = true; - } + if (node != NULL) base_node_dealloc(node); - } VALGRIND_MAKE_MEM_UNDEFINED(ret, size); - if (zeroed == false && *zero) - memset(ret, 0, size); + if (*zero) { + if (zeroed == false) + memset(ret, 0, size); + else if (config_debug) { + size_t i; + size_t *p = (size_t *)(uintptr_t)ret; + + VALGRIND_MAKE_MEM_DEFINED(ret, size); + for (i = 0; i < size / sizeof(size_t); i++) + assert(p[i] == 0); + VALGRIND_MAKE_MEM_UNDEFINED(ret, size); + } + } return (ret); } @@ -193,14 +201,6 @@ label_return: if (config_prof && opt_prof && opt_prof_gdump && gdump) prof_gdump(); } - if (config_debug && *zero && ret != NULL) { - size_t i; - size_t *p = (size_t *)(uintptr_t)ret; - - VALGRIND_MAKE_MEM_DEFINED(ret, size); - for (i = 0; i < size / sizeof(size_t); i++) - assert(p[i] == 0); - } assert(CHUNK_ADDR2BASE(ret) == ret); return (ret); } From 38067483c542adfe092644d1ecc103c6bc74add0 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 21 Jan 2013 20:04:42 -0800 Subject: [PATCH 0273/3378] Tighten valgrind integration. Tighten valgrind integration such that immediately after memory is validated or zeroed, valgrind is told to forget the memory's 'defined' state. The only place newly allocated memory should be left marked as 'defined' is in the public functions (e.g. calloc() and realloc()). --- include/jemalloc/internal/tcache.h | 2 ++ src/arena.c | 50 +++++++++++++++++------------- src/chunk_dss.c | 1 + 3 files changed, 31 insertions(+), 22 deletions(-) diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index 38d735c8..6957da3f 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -320,6 +320,7 @@ tcache_alloc_small(tcache_t *tcache, size_t size, bool zero) } VALGRIND_MAKE_MEM_UNDEFINED(ret, size); memset(ret, 0, size); + VALGRIND_MAKE_MEM_UNDEFINED(ret, size); } if (config_stats) @@ -370,6 +371,7 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) } else { VALGRIND_MAKE_MEM_UNDEFINED(ret, size); memset(ret, 0, size); + VALGRIND_MAKE_MEM_UNDEFINED(ret, size); } if (config_stats) diff --git a/src/arena.c b/src/arena.c index f9406c79..8d50f4d4 100644 --- a/src/arena.c +++ b/src/arena.c @@ -359,13 +359,29 @@ arena_run_reg_dalloc(arena_run_t *run, void *ptr) } static inline void -arena_chunk_validate_zeroed(arena_chunk_t *chunk, size_t run_ind) +arena_run_zero(arena_chunk_t *chunk, size_t run_ind, size_t npages) +{ + + VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk + (run_ind << + LG_PAGE)), (npages << LG_PAGE)); + memset((void *)((uintptr_t)chunk + (run_ind << LG_PAGE)), 0, + (npages << LG_PAGE)); + VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk + (run_ind << + LG_PAGE)), (npages << LG_PAGE)); +} + +static inline void +arena_run_page_validate_zeroed(arena_chunk_t *chunk, size_t run_ind) { size_t i; UNUSED size_t *p = (size_t *)((uintptr_t)chunk + (run_ind << LG_PAGE)); + VALGRIND_MAKE_MEM_DEFINED((void *)((uintptr_t)chunk + (run_ind << + LG_PAGE)), PAGE); for (i = 0; i < PAGE / sizeof(size_t); i++) assert(p[i] == 0); + VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk + (run_ind << + LG_PAGE)), PAGE); } static void @@ -441,19 +457,10 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, for (i = 0; i < need_pages; i++) { if (arena_mapbits_unzeroed_get(chunk, run_ind+i) != 0) { - VALGRIND_MAKE_MEM_UNDEFINED( - (void *)((uintptr_t) - chunk + ((run_ind+i) << - LG_PAGE)), PAGE); - memset((void *)((uintptr_t) - chunk + ((run_ind+i) << - LG_PAGE)), 0, PAGE); + arena_run_zero(chunk, run_ind+i, + 1); } else if (config_debug) { - VALGRIND_MAKE_MEM_DEFINED( - (void *)((uintptr_t) - chunk + ((run_ind+i) << - LG_PAGE)), PAGE); - arena_chunk_validate_zeroed( + arena_run_page_validate_zeroed( chunk, run_ind+i); } } @@ -462,11 +469,7 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, * The run is dirty, so all pages must be * zeroed. */ - VALGRIND_MAKE_MEM_UNDEFINED((void - *)((uintptr_t)chunk + (run_ind << - LG_PAGE)), (need_pages << LG_PAGE)); - memset((void *)((uintptr_t)chunk + (run_ind << - LG_PAGE)), 0, (need_pages << LG_PAGE)); + arena_run_zero(chunk, run_ind, need_pages); } } @@ -492,19 +495,21 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, */ if (config_debug && flag_dirty == 0 && arena_mapbits_unzeroed_get(chunk, run_ind) == 0) - arena_chunk_validate_zeroed(chunk, run_ind); + arena_run_page_validate_zeroed(chunk, run_ind); for (i = 1; i < need_pages - 1; i++) { arena_mapbits_small_set(chunk, run_ind+i, i, binind, 0); if (config_debug && flag_dirty == 0 && - arena_mapbits_unzeroed_get(chunk, run_ind+i) == 0) - arena_chunk_validate_zeroed(chunk, run_ind+i); + arena_mapbits_unzeroed_get(chunk, run_ind+i) == 0) { + arena_run_page_validate_zeroed(chunk, + run_ind+i); + } } arena_mapbits_small_set(chunk, run_ind+need_pages-1, need_pages-1, binind, flag_dirty); if (config_debug && flag_dirty == 0 && arena_mapbits_unzeroed_get(chunk, run_ind+need_pages-1) == 0) { - arena_chunk_validate_zeroed(chunk, + arena_run_page_validate_zeroed(chunk, run_ind+need_pages-1); } } @@ -1459,6 +1464,7 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero) } VALGRIND_MAKE_MEM_UNDEFINED(ret, size); memset(ret, 0, size); + VALGRIND_MAKE_MEM_UNDEFINED(ret, size); } return (ret); diff --git a/src/chunk_dss.c b/src/chunk_dss.c index 24781cc5..d1aea930 100644 --- a/src/chunk_dss.c +++ b/src/chunk_dss.c @@ -127,6 +127,7 @@ chunk_alloc_dss(size_t size, size_t alignment, bool *zero) if (*zero) { VALGRIND_MAKE_MEM_UNDEFINED(ret, size); memset(ret, 0, size); + VALGRIND_MAKE_MEM_UNDEFINED(ret, size); } return (ret); } From 88393cb0eb9a046000d20809809d4adac11957ab Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 22 Jan 2013 08:45:43 -0800 Subject: [PATCH 0274/3378] Add and use JEMALLOC_ALWAYS_INLINE. Add JEMALLOC_ALWAYS_INLINE and use it to guarantee that the entire fast paths of the primary allocation/deallocation functions are inlined. --- include/jemalloc/internal/arena.h | 42 +++++++++--------- .../jemalloc/internal/jemalloc_internal.h.in | 44 +++++++++++-------- include/jemalloc/internal/tcache.h | 18 ++++---- src/jemalloc.c | 6 +-- 4 files changed, 59 insertions(+), 51 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 5ba16406..8fdee931 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -480,7 +480,7 @@ void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) # ifdef JEMALLOC_ARENA_INLINE_A -JEMALLOC_INLINE arena_chunk_map_t * +JEMALLOC_ALWAYS_INLINE arena_chunk_map_t * arena_mapp_get(arena_chunk_t *chunk, size_t pageind) { @@ -490,21 +490,21 @@ arena_mapp_get(arena_chunk_t *chunk, size_t pageind) return (&chunk->map[pageind-map_bias]); } -JEMALLOC_INLINE size_t * +JEMALLOC_ALWAYS_INLINE size_t * arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind) { return (&arena_mapp_get(chunk, pageind)->bits); } -JEMALLOC_INLINE size_t +JEMALLOC_ALWAYS_INLINE size_t arena_mapbits_get(arena_chunk_t *chunk, size_t pageind) { return (*arena_mapbitsp_get(chunk, pageind)); } -JEMALLOC_INLINE size_t +JEMALLOC_ALWAYS_INLINE size_t arena_mapbits_unallocated_size_get(arena_chunk_t *chunk, size_t pageind) { size_t mapbits; @@ -514,7 +514,7 @@ arena_mapbits_unallocated_size_get(arena_chunk_t *chunk, size_t pageind) return (mapbits & ~PAGE_MASK); } -JEMALLOC_INLINE size_t +JEMALLOC_ALWAYS_INLINE size_t arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind) { size_t mapbits; @@ -525,7 +525,7 @@ arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind) return (mapbits & ~PAGE_MASK); } -JEMALLOC_INLINE size_t +JEMALLOC_ALWAYS_INLINE size_t arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind) { size_t mapbits; @@ -536,7 +536,7 @@ arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind) return (mapbits >> LG_PAGE); } -JEMALLOC_INLINE size_t +JEMALLOC_ALWAYS_INLINE size_t arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind) { size_t mapbits; @@ -548,7 +548,7 @@ arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind) return (binind); } -JEMALLOC_INLINE size_t +JEMALLOC_ALWAYS_INLINE size_t arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind) { size_t mapbits; @@ -557,7 +557,7 @@ arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind) return (mapbits & CHUNK_MAP_DIRTY); } -JEMALLOC_INLINE size_t +JEMALLOC_ALWAYS_INLINE size_t arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind) { size_t mapbits; @@ -566,7 +566,7 @@ arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind) return (mapbits & CHUNK_MAP_UNZEROED); } -JEMALLOC_INLINE size_t +JEMALLOC_ALWAYS_INLINE size_t arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind) { size_t mapbits; @@ -575,7 +575,7 @@ arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind) return (mapbits & CHUNK_MAP_LARGE); } -JEMALLOC_INLINE size_t +JEMALLOC_ALWAYS_INLINE size_t arena_mapbits_allocated_get(arena_chunk_t *chunk, size_t pageind) { size_t mapbits; @@ -584,7 +584,7 @@ arena_mapbits_allocated_get(arena_chunk_t *chunk, size_t pageind) return (mapbits & CHUNK_MAP_ALLOCATED); } -JEMALLOC_INLINE void +JEMALLOC_ALWAYS_INLINE void arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, size_t size, size_t flags) { @@ -597,7 +597,7 @@ arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, size_t size, *mapbitsp = size | CHUNK_MAP_BININD_INVALID | flags; } -JEMALLOC_INLINE void +JEMALLOC_ALWAYS_INLINE void arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind, size_t size) { @@ -609,7 +609,7 @@ arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind, *mapbitsp = size | (*mapbitsp & PAGE_MASK); } -JEMALLOC_INLINE void +JEMALLOC_ALWAYS_INLINE void arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind, size_t size, size_t flags) { @@ -624,7 +624,7 @@ arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind, size_t size, CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; } -JEMALLOC_INLINE void +JEMALLOC_ALWAYS_INLINE void arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind, size_t binind) { @@ -637,7 +637,7 @@ arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind, CHUNK_MAP_BININD_SHIFT); } -JEMALLOC_INLINE void +JEMALLOC_ALWAYS_INLINE void arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, size_t runind, size_t binind, size_t flags) { @@ -653,7 +653,7 @@ arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, size_t runind, flags | unzeroed | CHUNK_MAP_ALLOCATED; } -JEMALLOC_INLINE void +JEMALLOC_ALWAYS_INLINE void arena_mapbits_unzeroed_set(arena_chunk_t *chunk, size_t pageind, size_t unzeroed) { @@ -701,7 +701,7 @@ arena_prof_accum(arena_t *arena, uint64_t accumbytes) malloc_mutex_unlock(&arena->lock); } -JEMALLOC_INLINE size_t +JEMALLOC_ALWAYS_INLINE size_t arena_ptr_small_binind_get(const void *ptr, size_t mapbits) { size_t binind; @@ -896,7 +896,7 @@ arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) arena_mapp_get(chunk, pageind)->prof_ctx = ctx; } -JEMALLOC_INLINE void * +JEMALLOC_ALWAYS_INLINE void * arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache) { tcache_t *tcache; @@ -927,7 +927,7 @@ arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache) } /* Return the size of the allocation pointed to by ptr. */ -JEMALLOC_INLINE size_t +JEMALLOC_ALWAYS_INLINE size_t arena_salloc(const void *ptr, bool demote) { size_t ret; @@ -973,7 +973,7 @@ arena_salloc(const void *ptr, bool demote) return (ret); } -JEMALLOC_INLINE void +JEMALLOC_ALWAYS_INLINE void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, bool try_tcache) { size_t pageind, mapbits; diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 484f351b..fb53e131 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -233,10 +233,17 @@ static const bool config_ivsalloc = #ifdef JEMALLOC_DEBUG /* Disable inlining to make debugging easier. */ +# define JEMALLOC_ALWAYS_INLINE # define JEMALLOC_INLINE # define inline #else # define JEMALLOC_ENABLE_INLINE +# ifdef JEMALLOC_HAVE_ATTR +# define JEMALLOC_ALWAYS_INLINE \ + static JEMALLOC_ATTR(unused) JEMALLOC_ATTR(always_inline) +# else +# define JEMALLOC_ALWAYS_INLINE static inline +# endif # define JEMALLOC_INLINE static inline # ifdef _MSC_VER # define inline _inline @@ -595,13 +602,14 @@ arena_t *choose_arena(arena_t *arena); * for allocations. */ malloc_tsd_externs(arenas, arena_t *) -malloc_tsd_funcs(JEMALLOC_INLINE, arenas, arena_t *, NULL, arenas_cleanup) +malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, arenas, arena_t *, NULL, + arenas_cleanup) /* * Compute usable size that would result from allocating an object with the * specified size. */ -JEMALLOC_INLINE size_t +JEMALLOC_ALWAYS_INLINE size_t s2u(size_t size) { @@ -616,7 +624,7 @@ s2u(size_t size) * Compute usable size that would result from allocating an object with the * specified size and alignment. */ -JEMALLOC_INLINE size_t +JEMALLOC_ALWAYS_INLINE size_t sa2u(size_t size, size_t alignment) { size_t usize; @@ -761,7 +769,7 @@ malloc_tsd_protos(JEMALLOC_ATTR(unused), thread_allocated, thread_allocated_t) #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) -JEMALLOC_INLINE void * +JEMALLOC_ALWAYS_INLINE void * imallocx(size_t size, bool try_tcache, arena_t *arena) { @@ -773,14 +781,14 @@ imallocx(size_t size, bool try_tcache, arena_t *arena) return (huge_malloc(size, false)); } -JEMALLOC_INLINE void * +JEMALLOC_ALWAYS_INLINE void * imalloc(size_t size) { return (imallocx(size, true, NULL)); } -JEMALLOC_INLINE void * +JEMALLOC_ALWAYS_INLINE void * icallocx(size_t size, bool try_tcache, arena_t *arena) { @@ -790,14 +798,14 @@ icallocx(size_t size, bool try_tcache, arena_t *arena) return (huge_malloc(size, true)); } -JEMALLOC_INLINE void * +JEMALLOC_ALWAYS_INLINE void * icalloc(size_t size) { return (icallocx(size, true, NULL)); } -JEMALLOC_INLINE void * +JEMALLOC_ALWAYS_INLINE void * ipallocx(size_t usize, size_t alignment, bool zero, bool try_tcache, arena_t *arena) { @@ -822,7 +830,7 @@ ipallocx(size_t usize, size_t alignment, bool zero, bool try_tcache, return (ret); } -JEMALLOC_INLINE void * +JEMALLOC_ALWAYS_INLINE void * ipalloc(size_t usize, size_t alignment, bool zero) { @@ -834,7 +842,7 @@ ipalloc(size_t usize, size_t alignment, bool zero) * void *ptr = [...] * size_t sz = isalloc(ptr, config_prof); */ -JEMALLOC_INLINE size_t +JEMALLOC_ALWAYS_INLINE size_t isalloc(const void *ptr, bool demote) { size_t ret; @@ -853,7 +861,7 @@ isalloc(const void *ptr, bool demote) return (ret); } -JEMALLOC_INLINE size_t +JEMALLOC_ALWAYS_INLINE size_t ivsalloc(const void *ptr, bool demote) { @@ -886,7 +894,7 @@ p2rz(const void *ptr) return (u2rz(usize)); } -JEMALLOC_INLINE void +JEMALLOC_ALWAYS_INLINE void idallocx(void *ptr, bool try_tcache) { arena_chunk_t *chunk; @@ -900,14 +908,14 @@ idallocx(void *ptr, bool try_tcache) huge_dalloc(ptr, true); } -JEMALLOC_INLINE void +JEMALLOC_ALWAYS_INLINE void idalloc(void *ptr) { idallocx(ptr, true); } -JEMALLOC_INLINE void +JEMALLOC_ALWAYS_INLINE void iqallocx(void *ptr, bool try_tcache) { @@ -917,14 +925,14 @@ iqallocx(void *ptr, bool try_tcache) idallocx(ptr, try_tcache); } -JEMALLOC_INLINE void +JEMALLOC_ALWAYS_INLINE void iqalloc(void *ptr) { iqallocx(ptr, true); } -JEMALLOC_INLINE void * +JEMALLOC_ALWAYS_INLINE void * irallocx(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, bool no_move, bool try_tcache_alloc, bool try_tcache_dalloc, arena_t *arena) { @@ -993,7 +1001,7 @@ irallocx(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, } } -JEMALLOC_INLINE void * +JEMALLOC_ALWAYS_INLINE void * iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, bool no_move) { @@ -1003,7 +1011,7 @@ iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, } malloc_tsd_externs(thread_allocated, thread_allocated_t) -malloc_tsd_funcs(JEMALLOC_INLINE, thread_allocated, thread_allocated_t, +malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, thread_allocated, thread_allocated_t, THREAD_ALLOCATED_INITIALIZER, malloc_tsd_no_cleanup) #endif diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index 6957da3f..71900c2f 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -140,11 +140,11 @@ void tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size); #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TCACHE_C_)) /* Map of thread-specific caches. */ malloc_tsd_externs(tcache, tcache_t *) -malloc_tsd_funcs(JEMALLOC_INLINE, tcache, tcache_t *, NULL, +malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, tcache, tcache_t *, NULL, tcache_thread_cleanup) /* Per thread flag that allows thread caches to be disabled. */ malloc_tsd_externs(tcache_enabled, tcache_enabled_t) -malloc_tsd_funcs(JEMALLOC_INLINE, tcache_enabled, tcache_enabled_t, +malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, tcache_enabled, tcache_enabled_t, tcache_enabled_default, malloc_tsd_no_cleanup) JEMALLOC_INLINE void @@ -206,7 +206,7 @@ tcache_enabled_set(bool enabled) } } -JEMALLOC_INLINE tcache_t * +JEMALLOC_ALWAYS_INLINE tcache_t * tcache_get(bool create) { tcache_t *tcache; @@ -258,7 +258,7 @@ tcache_get(bool create) return (tcache); } -JEMALLOC_INLINE void +JEMALLOC_ALWAYS_INLINE void tcache_event(tcache_t *tcache) { @@ -271,7 +271,7 @@ tcache_event(tcache_t *tcache) tcache_event_hard(tcache); } -JEMALLOC_INLINE void * +JEMALLOC_ALWAYS_INLINE void * tcache_alloc_easy(tcache_bin_t *tbin) { void *ret; @@ -287,7 +287,7 @@ tcache_alloc_easy(tcache_bin_t *tbin) return (ret); } -JEMALLOC_INLINE void * +JEMALLOC_ALWAYS_INLINE void * tcache_alloc_small(tcache_t *tcache, size_t size, bool zero) { void *ret; @@ -331,7 +331,7 @@ tcache_alloc_small(tcache_t *tcache, size_t size, bool zero) return (ret); } -JEMALLOC_INLINE void * +JEMALLOC_ALWAYS_INLINE void * tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) { void *ret; @@ -384,7 +384,7 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) return (ret); } -JEMALLOC_INLINE void +JEMALLOC_ALWAYS_INLINE void tcache_dalloc_small(tcache_t *tcache, void *ptr, size_t binind) { tcache_bin_t *tbin; @@ -408,7 +408,7 @@ tcache_dalloc_small(tcache_t *tcache, void *ptr, size_t binind) tcache_event(tcache); } -JEMALLOC_INLINE void +JEMALLOC_ALWAYS_INLINE void tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size) { size_t binind; diff --git a/src/jemalloc.c b/src/jemalloc.c index ec88700a..58e18df4 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -279,7 +279,7 @@ arenas_cleanup(void *arg) malloc_mutex_unlock(&arenas_lock); } -static inline bool +static JEMALLOC_ATTR(always_inline) bool malloc_init(void) { @@ -892,7 +892,7 @@ JEMALLOC_ATTR(nonnull(1)) * Avoid any uncertainty as to how many backtrace frames to ignore in * PROF_ALLOC_PREP(). */ -JEMALLOC_ATTR(noinline) +JEMALLOC_NOINLINE #endif static int imemalign(void **memptr, size_t alignment, size_t size, @@ -1378,7 +1378,7 @@ je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, */ #ifdef JEMALLOC_EXPERIMENTAL -JEMALLOC_INLINE void * +static JEMALLOC_ATTR(always_inline) void * iallocm(size_t usize, size_t alignment, bool zero, bool try_tcache, arena_t *arena) { From 7329a4f038ed096f3cfa11cb60433f44009fbe16 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 22 Jan 2013 10:53:29 -0800 Subject: [PATCH 0275/3378] Fix AC_PATH_PROG() calls to specify default. Fix AC_PATH_PROG() calls to specify 'false' as the default, so that if the configure script fails to find a program, the false program is instead called, and an error occurs. Prior to this fix, if xsltproc could not be found, make would not report an error due to the leading -o in the xsltproc invocation. Reported by David Reiss. --- configure.ac | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/configure.ac b/configure.ac index aee74828..8c130827 100644 --- a/configure.ac +++ b/configure.ac @@ -86,7 +86,7 @@ MANDIR=`eval echo $MANDIR` AC_SUBST([MANDIR]) dnl Support for building documentation. -AC_PATH_PROG([XSLTPROC], [xsltproc], , [$PATH]) +AC_PATH_PROG([XSLTPROC], [xsltproc], [false], [$PATH]) if test -d "/usr/share/xml/docbook/stylesheet/docbook-xsl" ; then DEFAULT_XSLROOT="/usr/share/xml/docbook/stylesheet/docbook-xsl" elif test -d "/usr/share/sgml/docbook/xsl-stylesheets" ; then @@ -403,9 +403,9 @@ AC_SUBST([enable_autogen]) AC_PROG_INSTALL AC_PROG_RANLIB -AC_PATH_PROG([AR], [ar], , [$PATH]) -AC_PATH_PROG([LD], [ld], , [$PATH]) -AC_PATH_PROG([AUTOCONF], [autoconf], , [$PATH]) +AC_PATH_PROG([AR], [ar], [false], [$PATH]) +AC_PATH_PROG([LD], [ld], [false], [$PATH]) +AC_PATH_PROG([AUTOCONF], [autoconf], [false], [$PATH]) public_syms="malloc_conf malloc_message malloc calloc posix_memalign aligned_alloc realloc free malloc_usable_size malloc_stats_print mallctl mallctlnametomib mallctlbymib" From ae03bf6a57f0dd6a009288fa6477a300cabf6d5e Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 22 Jan 2013 12:02:08 -0800 Subject: [PATCH 0276/3378] Update hash from MurmurHash2 to MurmurHash3. Update hash from MurmurHash2 to MurmurHash3, primarily because the latter generates 128 bits in a single call for no extra cost, which simplifies integration with cuckoo hashing. --- include/jemalloc/internal/ckh.h | 8 +- include/jemalloc/internal/hash.h | 333 ++++++++++++++++-- .../jemalloc/internal/jemalloc_internal.h.in | 1 + include/jemalloc/internal/private_namespace.h | 15 + src/ckh.c | 81 +---- src/prof.c | 28 +- 6 files changed, 337 insertions(+), 129 deletions(-) diff --git a/include/jemalloc/internal/ckh.h b/include/jemalloc/internal/ckh.h index 05d1fc03..50c39ed9 100644 --- a/include/jemalloc/internal/ckh.h +++ b/include/jemalloc/internal/ckh.h @@ -5,7 +5,7 @@ typedef struct ckh_s ckh_t; typedef struct ckhc_s ckhc_t; /* Typedefs to allow easy function pointer passing. */ -typedef void ckh_hash_t (const void *, unsigned, size_t *, size_t *); +typedef void ckh_hash_t (const void *, size_t[2]); typedef bool ckh_keycomp_t (const void *, const void *); /* Maintain counters used to get an idea of performance. */ @@ -75,11 +75,9 @@ bool ckh_insert(ckh_t *ckh, const void *key, const void *data); bool ckh_remove(ckh_t *ckh, const void *searchkey, void **key, void **data); bool ckh_search(ckh_t *ckh, const void *seachkey, void **key, void **data); -void ckh_string_hash(const void *key, unsigned minbits, size_t *hash1, - size_t *hash2); +void ckh_string_hash(const void *key, size_t r_hash[2]); bool ckh_string_keycomp(const void *k1, const void *k2); -void ckh_pointer_hash(const void *key, unsigned minbits, size_t *hash1, - size_t *hash2); +void ckh_pointer_hash(const void *key, size_t r_hash[2]); bool ckh_pointer_keycomp(const void *k1, const void *k2); #endif /* JEMALLOC_H_EXTERNS */ diff --git a/include/jemalloc/internal/hash.h b/include/jemalloc/internal/hash.h index 2f501f5d..56ecc793 100644 --- a/include/jemalloc/internal/hash.h +++ b/include/jemalloc/internal/hash.h @@ -1,3 +1,8 @@ +/* + * The following hash function is based on MurmurHash3, placed into the public + * domain by Austin Appleby. See http://code.google.com/p/smhasher/ for + * details. + */ /******************************************************************************/ #ifdef JEMALLOC_H_TYPES @@ -14,55 +19,311 @@ #ifdef JEMALLOC_H_INLINES #ifndef JEMALLOC_ENABLE_INLINE -uint64_t hash(const void *key, size_t len, uint64_t seed); +void hash(const void *key, size_t len, const uint32_t seed, + size_t r_hash[2]); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_HASH_C_)) -/* - * The following hash function is based on MurmurHash64A(), placed into the - * public domain by Austin Appleby. See http://murmurhash.googlepages.com/ for - * details. - */ -JEMALLOC_INLINE uint64_t -hash(const void *key, size_t len, uint64_t seed) +/******************************************************************************/ +/* Internal implementation. */ +JEMALLOC_INLINE uint32_t +hash_rotl_32(uint32_t x, int8_t r) { - const uint64_t m = UINT64_C(0xc6a4a7935bd1e995); - const int r = 47; - uint64_t h = seed ^ (len * m); - const uint64_t *data = (const uint64_t *)key; - const uint64_t *end = data + (len/8); - const unsigned char *data2; - assert(((uintptr_t)key & 0x7) == 0); + return (x << r) | (x >> (32 - r)); +} - while(data != end) { - uint64_t k = *data++; +JEMALLOC_INLINE uint64_t +hash_rotl_64(uint64_t x, int8_t r) +{ + return (x << r) | (x >> (64 - r)); +} - k *= m; - k ^= k >> r; - k *= m; +JEMALLOC_INLINE uint32_t +hash_get_block_32(const uint32_t *p, int i) +{ - h ^= k; - h *= m; + return p[i]; +} + +JEMALLOC_INLINE uint64_t +hash_get_block_64(const uint64_t *p, int i) +{ + + return p[i]; +} + +JEMALLOC_INLINE uint32_t +hash_fmix_32(uint32_t h) +{ + + h ^= h >> 16; + h *= 0x85ebca6b; + h ^= h >> 13; + h *= 0xc2b2ae35; + h ^= h >> 16; + + return h; +} + +JEMALLOC_INLINE uint64_t +hash_fmix_64(uint64_t k) +{ + + k ^= k >> 33; + k *= QU(0xff51afd7ed558ccdLLU); + k ^= k >> 33; + k *= QU(0xc4ceb9fe1a85ec53LLU); + k ^= k >> 33; + + return k; +} + +JEMALLOC_INLINE uint32_t +hash_x86_32(const void *key, int len, uint32_t seed) +{ + const uint8_t *data = (const uint8_t *) key; + const int nblocks = len / 4; + + uint32_t h1 = seed; + + const uint32_t c1 = 0xcc9e2d51; + const uint32_t c2 = 0x1b873593; + + /* body */ + { + const uint32_t *blocks = (const uint32_t *) (data + nblocks*4); + int i; + + for (i = -nblocks; i; i++) { + uint32_t k1 = hash_get_block_32(blocks, i); + + k1 *= c1; + k1 = hash_rotl_32(k1, 15); + k1 *= c2; + + h1 ^= k1; + h1 = hash_rotl_32(h1, 13); + h1 = h1*5 + 0xe6546b64; + } } - data2 = (const unsigned char *)data; - switch(len & 7) { - case 7: h ^= ((uint64_t)(data2[6])) << 48; - case 6: h ^= ((uint64_t)(data2[5])) << 40; - case 5: h ^= ((uint64_t)(data2[4])) << 32; - case 4: h ^= ((uint64_t)(data2[3])) << 24; - case 3: h ^= ((uint64_t)(data2[2])) << 16; - case 2: h ^= ((uint64_t)(data2[1])) << 8; - case 1: h ^= ((uint64_t)(data2[0])); - h *= m; + /* tail */ + { + const uint8_t *tail = (const uint8_t *) (data + nblocks*4); + + uint32_t k1 = 0; + + switch (len & 3) { + case 3: k1 ^= tail[2] << 16; + case 2: k1 ^= tail[1] << 8; + case 1: k1 ^= tail[0]; k1 *= c1; k1 = hash_rotl_32(k1, 15); + k1 *= c2; h1 ^= k1; + } } - h ^= h >> r; - h *= m; - h ^= h >> r; + /* finalization */ + h1 ^= len; - return (h); + h1 = hash_fmix_32(h1); + + return h1; +} + +UNUSED JEMALLOC_INLINE void +hash_x86_128(const void *key, const int len, uint32_t seed, + uint64_t r_out[2]) +{ + const uint8_t * data = (const uint8_t *) key; + const int nblocks = len / 16; + + uint32_t h1 = seed; + uint32_t h2 = seed; + uint32_t h3 = seed; + uint32_t h4 = seed; + + const uint32_t c1 = 0x239b961b; + const uint32_t c2 = 0xab0e9789; + const uint32_t c3 = 0x38b34ae5; + const uint32_t c4 = 0xa1e38b93; + + /* body */ + { + const uint32_t *blocks = (const uint32_t *) (data + nblocks*16); + int i; + + for (i = -nblocks; i; i++) { + uint32_t k1 = hash_get_block_32(blocks, i*4 + 0); + uint32_t k2 = hash_get_block_32(blocks, i*4 + 1); + uint32_t k3 = hash_get_block_32(blocks, i*4 + 2); + uint32_t k4 = hash_get_block_32(blocks, i*4 + 3); + + k1 *= c1; k1 = hash_rotl_32(k1, 15); k1 *= c2; h1 ^= k1; + + h1 = hash_rotl_32(h1, 19); h1 += h2; + h1 = h1*5 + 0x561ccd1b; + + k2 *= c2; k2 = hash_rotl_32(k2, 16); k2 *= c3; h2 ^= k2; + + h2 = hash_rotl_32(h2, 17); h2 += h3; + h2 = h2*5 + 0x0bcaa747; + + k3 *= c3; k3 = hash_rotl_32(k3, 17); k3 *= c4; h3 ^= k3; + + h3 = hash_rotl_32(h3, 15); h3 += h4; + h3 = h3*5 + 0x96cd1c35; + + k4 *= c4; k4 = hash_rotl_32(k4, 18); k4 *= c1; h4 ^= k4; + + h4 = hash_rotl_32(h4, 13); h4 += h1; + h4 = h4*5 + 0x32ac3b17; + } + } + + /* tail */ + { + const uint8_t *tail = (const uint8_t *) (data + nblocks*16); + uint32_t k1 = 0; + uint32_t k2 = 0; + uint32_t k3 = 0; + uint32_t k4 = 0; + + switch (len & 15) { + case 15: k4 ^= tail[14] << 16; + case 14: k4 ^= tail[13] << 8; + case 13: k4 ^= tail[12] << 0; + k4 *= c4; k4 = hash_rotl_32(k4, 18); k4 *= c1; h4 ^= k4; + + case 12: k3 ^= tail[11] << 24; + case 11: k3 ^= tail[10] << 16; + case 10: k3 ^= tail[ 9] << 8; + case 9: k3 ^= tail[ 8] << 0; + k3 *= c3; k3 = hash_rotl_32(k3, 17); k3 *= c4; h3 ^= k3; + + case 8: k2 ^= tail[ 7] << 24; + case 7: k2 ^= tail[ 6] << 16; + case 6: k2 ^= tail[ 5] << 8; + case 5: k2 ^= tail[ 4] << 0; + k2 *= c2; k2 = hash_rotl_32(k2, 16); k2 *= c3; h2 ^= k2; + + case 4: k1 ^= tail[ 3] << 24; + case 3: k1 ^= tail[ 2] << 16; + case 2: k1 ^= tail[ 1] << 8; + case 1: k1 ^= tail[ 0] << 0; + k1 *= c1; k1 = hash_rotl_32(k1, 15); k1 *= c2; h1 ^= k1; + } + } + + /* finalization */ + h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len; + + h1 += h2; h1 += h3; h1 += h4; + h2 += h1; h3 += h1; h4 += h1; + + h1 = hash_fmix_32(h1); + h2 = hash_fmix_32(h2); + h3 = hash_fmix_32(h3); + h4 = hash_fmix_32(h4); + + h1 += h2; h1 += h3; h1 += h4; + h2 += h1; h3 += h1; h4 += h1; + + r_out[0] = (((uint64_t) h2) << 32) | h1; + r_out[1] = (((uint64_t) h4) << 32) | h3; +} + +UNUSED JEMALLOC_INLINE void +hash_x64_128(const void *key, const int len, const uint32_t seed, + uint64_t r_out[2]) +{ + const uint8_t *data = (const uint8_t *) key; + const int nblocks = len / 16; + + uint64_t h1 = seed; + uint64_t h2 = seed; + + const uint64_t c1 = QU(0x87c37b91114253d5LLU); + const uint64_t c2 = QU(0x4cf5ad432745937fLLU); + + /* body */ + { + const uint64_t *blocks = (const uint64_t *) (data); + int i; + + for (i = 0; i < nblocks; i++) { + uint64_t k1 = hash_get_block_64(blocks, i*2 + 0); + uint64_t k2 = hash_get_block_64(blocks, i*2 + 1); + + k1 *= c1; k1 = hash_rotl_64(k1, 31); k1 *= c2; h1 ^= k1; + + h1 = hash_rotl_64(h1, 27); h1 += h2; + h1 = h1*5 + 0x52dce729; + + k2 *= c2; k2 = hash_rotl_64(k2, 33); k2 *= c1; h2 ^= k2; + + h2 = hash_rotl_64(h2, 31); h2 += h1; + h2 = h2*5 + 0x38495ab5; + } + } + + /* tail */ + { + const uint8_t *tail = (const uint8_t*)(data + nblocks*16); + uint64_t k1 = 0; + uint64_t k2 = 0; + + switch (len & 15) { + case 15: k2 ^= ((uint64_t)(tail[14])) << 48; + case 14: k2 ^= ((uint64_t)(tail[13])) << 40; + case 13: k2 ^= ((uint64_t)(tail[12])) << 32; + case 12: k2 ^= ((uint64_t)(tail[11])) << 24; + case 11: k2 ^= ((uint64_t)(tail[10])) << 16; + case 10: k2 ^= ((uint64_t)(tail[ 9])) << 8; + case 9: k2 ^= ((uint64_t)(tail[ 8])) << 0; + k2 *= c2; k2 = hash_rotl_64(k2, 33); k2 *= c1; h2 ^= k2; + + case 8: k1 ^= ((uint64_t)(tail[ 7])) << 56; + case 7: k1 ^= ((uint64_t)(tail[ 6])) << 48; + case 6: k1 ^= ((uint64_t)(tail[ 5])) << 40; + case 5: k1 ^= ((uint64_t)(tail[ 4])) << 32; + case 4: k1 ^= ((uint64_t)(tail[ 3])) << 24; + case 3: k1 ^= ((uint64_t)(tail[ 2])) << 16; + case 2: k1 ^= ((uint64_t)(tail[ 1])) << 8; + case 1: k1 ^= ((uint64_t)(tail[ 0])) << 0; + k1 *= c1; k1 = hash_rotl_64(k1, 31); k1 *= c2; h1 ^= k1; + } + } + + /* finalization */ + h1 ^= len; h2 ^= len; + + h1 += h2; + h2 += h1; + + h1 = hash_fmix_64(h1); + h2 = hash_fmix_64(h2); + + h1 += h2; + h2 += h1; + + r_out[0] = h1; + r_out[1] = h2; +} + + +/******************************************************************************/ +/* API. */ +JEMALLOC_INLINE void +hash(const void *key, size_t len, const uint32_t seed, size_t r_hash[2]) +{ +#if (LG_SIZEOF_PTR == 3) + hash_x64_128(key, len, seed, (uint64_t *)r_hash); +#else + uint64_t hashes[2]; + hash_x86_128(key, len, seed, hashes); + r_hash[0] = (size_t)hashes[0]; + r_hash[1] = (size_t)hashes[1]; +#endif } #endif diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index fb53e131..13a2ffb7 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -226,6 +226,7 @@ static const bool config_ivsalloc = #define ALLOCM_LG_ALIGN_MASK ((int)0x3f) #define ZU(z) ((size_t)z) +#define QU(q) ((uint64_t)q) #ifndef __DECONST # define __DECONST(type, var) ((type)(uintptr_t)(const void *)(var)) diff --git a/include/jemalloc/internal/private_namespace.h b/include/jemalloc/internal/private_namespace.h index 951df24b..903fb4df 100644 --- a/include/jemalloc/internal/private_namespace.h +++ b/include/jemalloc/internal/private_namespace.h @@ -65,6 +65,7 @@ #define arenas_tsd_boot JEMALLOC_N(arenas_tsd_boot) #define arenas_tsd_cleanup_wrapper JEMALLOC_N(arenas_tsd_cleanup_wrapper) #define arenas_tsd_get JEMALLOC_N(arenas_tsd_get) +#define arenas_tsd_get_wrapper JEMALLOC_N(arenas_tsd_get_wrapper) #define arenas_tsd_set JEMALLOC_N(arenas_tsd_set) #define atomic_add_u JEMALLOC_N(atomic_add_u) #define atomic_add_uint32 JEMALLOC_N(atomic_add_uint32) @@ -176,6 +177,15 @@ #define extent_tree_szad_search JEMALLOC_N(extent_tree_szad_search) #define get_errno JEMALLOC_N(get_errno) #define hash JEMALLOC_N(hash) +#define hash_fmix_32 JEMALLOC_N(hash_fmix_32) +#define hash_fmix_64 JEMALLOC_N(hash_fmix_64) +#define hash_get_block_32 JEMALLOC_N(hash_get_block_32) +#define hash_get_block_64 JEMALLOC_N(hash_get_block_64) +#define hash_rotl_32 JEMALLOC_N(hash_rotl_32) +#define hash_rotl_64 JEMALLOC_N(hash_rotl_64) +#define hash_x64_128 JEMALLOC_N(hash_x64_128) +#define hash_x86_128 JEMALLOC_N(hash_x86_128) +#define hash_x86_32 JEMALLOC_N(hash_x86_32) #define huge_allocated JEMALLOC_N(huge_allocated) #define huge_boot JEMALLOC_N(huge_boot) #define huge_dalloc JEMALLOC_N(huge_dalloc) @@ -293,12 +303,14 @@ #define prof_tdata_tsd_boot JEMALLOC_N(prof_tdata_tsd_boot) #define prof_tdata_tsd_cleanup_wrapper JEMALLOC_N(prof_tdata_tsd_cleanup_wrapper) #define prof_tdata_tsd_get JEMALLOC_N(prof_tdata_tsd_get) +#define prof_tdata_tsd_get_wrapper JEMALLOC_N(prof_tdata_tsd_get_wrapper) #define prof_tdata_tsd_set JEMALLOC_N(prof_tdata_tsd_set) #define quarantine JEMALLOC_N(quarantine) #define quarantine_boot JEMALLOC_N(quarantine_boot) #define quarantine_tsd_boot JEMALLOC_N(quarantine_tsd_boot) #define quarantine_tsd_cleanup_wrapper JEMALLOC_N(quarantine_tsd_cleanup_wrapper) #define quarantine_tsd_get JEMALLOC_N(quarantine_tsd_get) +#define quarantine_tsd_get_wrapper JEMALLOC_N(quarantine_tsd_get_wrapper) #define quarantine_tsd_set JEMALLOC_N(quarantine_tsd_set) #define register_zone JEMALLOC_N(register_zone) #define rtree_get JEMALLOC_N(rtree_get) @@ -342,6 +354,7 @@ #define tcache_enabled_tsd_boot JEMALLOC_N(tcache_enabled_tsd_boot) #define tcache_enabled_tsd_cleanup_wrapper JEMALLOC_N(tcache_enabled_tsd_cleanup_wrapper) #define tcache_enabled_tsd_get JEMALLOC_N(tcache_enabled_tsd_get) +#define tcache_enabled_tsd_get_wrapper JEMALLOC_N(tcache_enabled_tsd_get_wrapper) #define tcache_enabled_tsd_set JEMALLOC_N(tcache_enabled_tsd_set) #define tcache_event JEMALLOC_N(tcache_event) #define tcache_event_hard JEMALLOC_N(tcache_event_hard) @@ -357,6 +370,7 @@ #define tcache_tsd_boot JEMALLOC_N(tcache_tsd_boot) #define tcache_tsd_cleanup_wrapper JEMALLOC_N(tcache_tsd_cleanup_wrapper) #define tcache_tsd_get JEMALLOC_N(tcache_tsd_get) +#define tcache_tsd_get_wrapper JEMALLOC_N(tcache_tsd_get_wrapper) #define tcache_tsd_set JEMALLOC_N(tcache_tsd_set) #define thread_allocated_booted JEMALLOC_N(thread_allocated_booted) #define thread_allocated_initialized JEMALLOC_N(thread_allocated_initialized) @@ -365,5 +379,6 @@ #define thread_allocated_tsd_boot JEMALLOC_N(thread_allocated_tsd_boot) #define thread_allocated_tsd_cleanup_wrapper JEMALLOC_N(thread_allocated_tsd_cleanup_wrapper) #define thread_allocated_tsd_get JEMALLOC_N(thread_allocated_tsd_get) +#define thread_allocated_tsd_get_wrapper JEMALLOC_N(thread_allocated_tsd_get_wrapper) #define thread_allocated_tsd_set JEMALLOC_N(thread_allocated_tsd_set) #define u2rz JEMALLOC_N(u2rz) diff --git a/src/ckh.c b/src/ckh.c index 742a950b..e58980de 100644 --- a/src/ckh.c +++ b/src/ckh.c @@ -70,20 +70,20 @@ ckh_bucket_search(ckh_t *ckh, size_t bucket, const void *key) JEMALLOC_INLINE size_t ckh_isearch(ckh_t *ckh, const void *key) { - size_t hash1, hash2, bucket, cell; + size_t hashes[2], bucket, cell; assert(ckh != NULL); - ckh->hash(key, ckh->lg_curbuckets, &hash1, &hash2); + ckh->hash(key, hashes); /* Search primary bucket. */ - bucket = hash1 & ((ZU(1) << ckh->lg_curbuckets) - 1); + bucket = hashes[0] & ((ZU(1) << ckh->lg_curbuckets) - 1); cell = ckh_bucket_search(ckh, bucket, key); if (cell != SIZE_T_MAX) return (cell); /* Search secondary bucket. */ - bucket = hash2 & ((ZU(1) << ckh->lg_curbuckets) - 1); + bucket = hashes[1] & ((ZU(1) << ckh->lg_curbuckets) - 1); cell = ckh_bucket_search(ckh, bucket, key); return (cell); } @@ -126,7 +126,7 @@ ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey, { const void *key, *data, *tkey, *tdata; ckhc_t *cell; - size_t hash1, hash2, bucket, tbucket; + size_t hashes[2], bucket, tbucket; unsigned i; bucket = argbucket; @@ -155,10 +155,11 @@ ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey, #endif /* Find the alternate bucket for the evicted item. */ - ckh->hash(key, ckh->lg_curbuckets, &hash1, &hash2); - tbucket = hash2 & ((ZU(1) << ckh->lg_curbuckets) - 1); + ckh->hash(key, hashes); + tbucket = hashes[1] & ((ZU(1) << ckh->lg_curbuckets) - 1); if (tbucket == bucket) { - tbucket = hash1 & ((ZU(1) << ckh->lg_curbuckets) - 1); + tbucket = hashes[0] & ((ZU(1) << ckh->lg_curbuckets) + - 1); /* * It may be that (tbucket == bucket) still, if the * item's hashes both indicate this bucket. However, @@ -192,19 +193,19 @@ ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey, JEMALLOC_INLINE bool ckh_try_insert(ckh_t *ckh, void const**argkey, void const**argdata) { - size_t hash1, hash2, bucket; + size_t hashes[2], bucket; const void *key = *argkey; const void *data = *argdata; - ckh->hash(key, ckh->lg_curbuckets, &hash1, &hash2); + ckh->hash(key, hashes); /* Try to insert in primary bucket. */ - bucket = hash1 & ((ZU(1) << ckh->lg_curbuckets) - 1); + bucket = hashes[0] & ((ZU(1) << ckh->lg_curbuckets) - 1); if (ckh_try_bucket_insert(ckh, bucket, key, data) == false) return (false); /* Try to insert in secondary bucket. */ - bucket = hash2 & ((ZU(1) << ckh->lg_curbuckets) - 1); + bucket = hashes[1] & ((ZU(1) << ckh->lg_curbuckets) - 1); if (ckh_try_bucket_insert(ckh, bucket, key, data) == false) return (false); @@ -526,31 +527,10 @@ ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data) } void -ckh_string_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2) +ckh_string_hash(const void *key, size_t r_hash[2]) { - size_t ret1, ret2; - uint64_t h; - assert(minbits <= 32 || (SIZEOF_PTR == 8 && minbits <= 64)); - assert(hash1 != NULL); - assert(hash2 != NULL); - - h = hash(key, strlen((const char *)key), UINT64_C(0x94122f335b332aea)); - if (minbits <= 32) { - /* - * Avoid doing multiple hashes, since a single hash provides - * enough bits. - */ - ret1 = h & ZU(0xffffffffU); - ret2 = h >> 32; - } else { - ret1 = h; - ret2 = hash(key, strlen((const char *)key), - UINT64_C(0x8432a476666bbc13)); - } - - *hash1 = ret1; - *hash2 = ret2; + hash(key, strlen((const char *)key), 0x94122f33U, r_hash); } bool @@ -564,41 +544,16 @@ ckh_string_keycomp(const void *k1, const void *k2) } void -ckh_pointer_hash(const void *key, unsigned minbits, size_t *hash1, - size_t *hash2) +ckh_pointer_hash(const void *key, size_t r_hash[2]) { - size_t ret1, ret2; - uint64_t h; union { const void *v; - uint64_t i; + size_t i; } u; - assert(minbits <= 32 || (SIZEOF_PTR == 8 && minbits <= 64)); - assert(hash1 != NULL); - assert(hash2 != NULL); - assert(sizeof(u.v) == sizeof(u.i)); -#if (LG_SIZEOF_PTR != LG_SIZEOF_INT) - u.i = 0; -#endif u.v = key; - h = hash(&u.i, sizeof(u.i), UINT64_C(0xd983396e68886082)); - if (minbits <= 32) { - /* - * Avoid doing multiple hashes, since a single hash provides - * enough bits. - */ - ret1 = h & ZU(0xffffffffU); - ret2 = h >> 32; - } else { - assert(SIZEOF_PTR == 8); - ret1 = h; - ret2 = hash(&u.i, sizeof(u.i), UINT64_C(0x5e2be9aff8709a5d)); - } - - *hash1 = ret1; - *hash2 = ret2; + hash(&u.i, sizeof(u.i), 0xd983396eU, r_hash); } bool diff --git a/src/prof.c b/src/prof.c index e21d1667..b9f03a0d 100644 --- a/src/prof.c +++ b/src/prof.c @@ -90,8 +90,7 @@ static bool prof_dump(bool propagate_err, const char *filename, bool leakcheck); static void prof_dump_filename(char *filename, char v, int64_t vseq); static void prof_fdump(void); -static void prof_bt_hash(const void *key, unsigned minbits, size_t *hash1, - size_t *hash2); +static void prof_bt_hash(const void *key, size_t r_hash[2]); static bool prof_bt_keycomp(const void *k1, const void *k2); static malloc_mutex_t *prof_ctx_mutex_choose(void); @@ -1043,34 +1042,13 @@ prof_gdump(void) } static void -prof_bt_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2) +prof_bt_hash(const void *key, size_t r_hash[2]) { - size_t ret1, ret2; - uint64_t h; prof_bt_t *bt = (prof_bt_t *)key; cassert(config_prof); - assert(minbits <= 32 || (SIZEOF_PTR == 8 && minbits <= 64)); - assert(hash1 != NULL); - assert(hash2 != NULL); - h = hash(bt->vec, bt->len * sizeof(void *), - UINT64_C(0x94122f335b332aea)); - if (minbits <= 32) { - /* - * Avoid doing multiple hashes, since a single hash provides - * enough bits. - */ - ret1 = h & ZU(0xffffffffU); - ret2 = h >> 32; - } else { - ret1 = h; - ret2 = hash(bt->vec, bt->len * sizeof(void *), - UINT64_C(0x8432a476666bbc13)); - } - - *hash1 = ret1; - *hash2 = ret2; + hash(bt->vec, bt->len * sizeof(void *), 0x94122f33U, r_hash); } static bool From ba175a2bfb236d79404012d9b5bb6e9b3c8be8dd Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 22 Jan 2013 12:14:45 -0800 Subject: [PATCH 0277/3378] Use config_* instead of JEMALLOC_*. Convert a couple of stragglers from JEMALLOC_* to use config_*. --- src/ckh.c | 5 ++--- src/jemalloc.c | 13 ++----------- 2 files changed, 4 insertions(+), 14 deletions(-) diff --git a/src/ckh.c b/src/ckh.c index e58980de..2f38348b 100644 --- a/src/ckh.c +++ b/src/ckh.c @@ -418,9 +418,8 @@ ckh_delete(ckh_t *ckh) #endif idalloc(ckh->tab); -#ifdef JEMALLOC_DEBUG - memset(ckh, 0x5a, sizeof(ckh_t)); -#endif + if (config_debug) + memset(ckh, 0x5a, sizeof(ckh_t)); } size_t diff --git a/src/jemalloc.c b/src/jemalloc.c index 58e18df4..2d56e4aa 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -10,17 +10,8 @@ malloc_tsd_data(, thread_allocated, thread_allocated_t, /* Runtime configuration options. */ const char *je_malloc_conf; -#ifdef JEMALLOC_DEBUG -bool opt_abort = true; -# ifdef JEMALLOC_FILL -bool opt_junk = true; -# else -bool opt_junk = false; -# endif -#else -bool opt_abort = false; -bool opt_junk = false; -#endif +bool opt_abort = config_debug; +bool opt_junk = (config_debug && config_fill); size_t opt_quarantine = ZU(0); bool opt_redzone = false; bool opt_utrace = false; From 42ba90eb7f9e12d9cf6d7f9be82e239f0ffb04f5 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 22 Jan 2013 12:55:42 -0800 Subject: [PATCH 0278/3378] Update phony targets. Submitted by Frederik Deweerdt. --- Makefile.in | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile.in b/Makefile.in index 0062747c..74810472 100644 --- a/Makefile.in +++ b/Makefile.in @@ -113,9 +113,9 @@ COBJS := $(CSRCS:$(srcroot)%.c=$(objroot)%.$(O)) CPICOBJS := $(CSRCS:$(srcroot)%.c=$(objroot)%.pic.$(O)) CTESTOBJS := $(CTESTS:$(srcroot)%.c=$(objroot)%.$(O)) -.PHONY: all dist doc_html doc_man doc +.PHONY: all dist build_doc_html build_doc_man build_doc .PHONY: install_bin install_include install_lib -.PHONY: install_html install_man install_doc install +.PHONY: install_doc_html install_doc_man install_doc install .PHONY: tests check clean distclean relclean .SECONDARY : $(CTESTOBJS) From 2625c8968e88de435d6452e6f202c8dbdeb1775b Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 22 Jan 2013 16:46:27 -0800 Subject: [PATCH 0279/3378] Fix quoting bug in --without-export implementation. --- configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 8c130827..c270662b 100644 --- a/configure.ac +++ b/configure.ac @@ -475,7 +475,7 @@ done AC_ARG_WITH([export], [AS_HELP_STRING([--without-export], [disable exporting jemalloc public APIs])], [if test "x$with_export" = "xno"; then - AC_DEFINE([JEMALLOC_EXPORT],[])] + AC_DEFINE([JEMALLOC_EXPORT],[]) fi] ) From d1b6e18a99caf7e0c38707f4aed7ec8c492e0424 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 22 Jan 2013 16:54:26 -0800 Subject: [PATCH 0280/3378] Revert opt_abort and opt_junk refactoring. Revert refactoring of opt_abort and opt_junk declarations. clang accepts the config_*-based declarations (and generates correct code), but gcc complains with: error: initializer element is not constant --- src/jemalloc.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/jemalloc.c b/src/jemalloc.c index 2d56e4aa..c117685a 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -10,8 +10,20 @@ malloc_tsd_data(, thread_allocated, thread_allocated_t, /* Runtime configuration options. */ const char *je_malloc_conf; -bool opt_abort = config_debug; -bool opt_junk = (config_debug && config_fill); +bool opt_abort = +#ifdef JEMALLOC_DEBUG + true +#else + false +#endif + ; +bool opt_junk = +#if (defined(JEMALLOC_DEBUG) && defined(JEMALLOC_FILL)) + true +#else + false +#endif + ; size_t opt_quarantine = ZU(0); bool opt_redzone = false; bool opt_utrace = false; From dd0438ee6b7b3640516d5a48feec1490ca2f1cc3 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 22 Jan 2013 20:43:04 -0800 Subject: [PATCH 0281/3378] Specify 'inline' in addition to always_inline attribute. Specify both inline and __attribute__((always_inline)), in order to avoid warnings when using newer versions of gcc. --- include/jemalloc/internal/jemalloc_internal.h.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 13a2ffb7..c606c122 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -241,7 +241,7 @@ static const bool config_ivsalloc = # define JEMALLOC_ENABLE_INLINE # ifdef JEMALLOC_HAVE_ATTR # define JEMALLOC_ALWAYS_INLINE \ - static JEMALLOC_ATTR(unused) JEMALLOC_ATTR(always_inline) + static inline JEMALLOC_ATTR(unused) JEMALLOC_ATTR(always_inline) # else # define JEMALLOC_ALWAYS_INLINE static inline # endif From b5681fb20c17478f2193fead19b7788807e39996 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 22 Jan 2013 22:45:09 -0800 Subject: [PATCH 0282/3378] Updated ChangeLog for 3.3.0. --- COPYING | 4 ++-- ChangeLog | 12 +++++++++++- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/COPYING b/COPYING index e27fc4d6..019e8132 100644 --- a/COPYING +++ b/COPYING @@ -1,10 +1,10 @@ Unless otherwise specified, files in the jemalloc source distribution are subject to the following license: -------------------------------------------------------------------------------- -Copyright (C) 2002-2012 Jason Evans . +Copyright (C) 2002-2013 Jason Evans . All rights reserved. Copyright (C) 2007-2012 Mozilla Foundation. All rights reserved. -Copyright (C) 2009-2012 Facebook, Inc. All rights reserved. +Copyright (C) 2009-2013 Facebook, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: diff --git a/ChangeLog b/ChangeLog index 374459de..65782253 100644 --- a/ChangeLog +++ b/ChangeLog @@ -6,12 +6,22 @@ found in the git revision history: http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git git://canonware.com/jemalloc.git -* 3.x.x (Not yet released) +* 3.3.0 (January 23, 2013) + + This version includes a few minor performance improvements in addition to the + listed new features and bug fixes. + + New features: + - Add clipping support to lg_chunk option processing. + - Add the --enable-ivsalloc option. + - Add the --without-export option. + - Add the --disable-zone-allocator option. Bug fixes: - Fix "arenas.extend" mallctl to output the number of arenas. - Fix chunk_recycyle() to unconditionally inform Valgrind that returned memory is undefined. + - Fix build break on FreeBSD related to alloca.h. * 3.2.0 (November 9, 2012) From bbe29d374d0fa5f4684621f16c099294e56c26ef Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 30 Jan 2013 15:03:11 -0800 Subject: [PATCH 0283/3378] Fix potential TLS-related memory corruption. Avoid writing to uninitialized TLS as a side effect of deallocation. Initializing TLS during deallocation is unsafe because it is possible that a thread never did any allocation, and that TLS has already been deallocated by the threads library, resulting in write-after-free corruption. These fixes affect prof_tdata and quarantine; all other uses of TLS are already safe, whether intentionally (as for tcache) or unintentionally (as for arenas). --- ChangeLog | 7 +++ include/jemalloc/internal/private_namespace.h | 6 ++ include/jemalloc/internal/prof.h | 14 ++--- include/jemalloc/internal/quarantine.h | 43 +++++++++++++++ src/jemalloc.c | 27 ++++++++- src/prof.c | 18 ++---- src/quarantine.c | 55 +++++-------------- 7 files changed, 105 insertions(+), 65 deletions(-) diff --git a/ChangeLog b/ChangeLog index 65782253..ae7d0bfe 100644 --- a/ChangeLog +++ b/ChangeLog @@ -6,6 +6,13 @@ found in the git revision history: http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git git://canonware.com/jemalloc.git +* 3.x.x (XXX Not yet released) + + Bug fixes: + - Fix TLS-related memory corruption that could occur during thread exit if the + thread never allocated memory. Only the quarantine and prof facilities were + susceptible. + * 3.3.0 (January 23, 2013) This version includes a few minor performance improvements in addition to the diff --git a/include/jemalloc/internal/private_namespace.h b/include/jemalloc/internal/private_namespace.h index 903fb4df..65de3163 100644 --- a/include/jemalloc/internal/private_namespace.h +++ b/include/jemalloc/internal/private_namespace.h @@ -306,7 +306,13 @@ #define prof_tdata_tsd_get_wrapper JEMALLOC_N(prof_tdata_tsd_get_wrapper) #define prof_tdata_tsd_set JEMALLOC_N(prof_tdata_tsd_set) #define quarantine JEMALLOC_N(quarantine) +#define quarantine_alloc_hook JEMALLOC_N(quarantine_alloc_hook) #define quarantine_boot JEMALLOC_N(quarantine_boot) +#define quarantine_booted JEMALLOC_N(quarantine_booted) +#define quarantine_cleanup JEMALLOC_N(quarantine_cleanup) +#define quarantine_init JEMALLOC_N(quarantine_init) +#define quarantine_tls JEMALLOC_N(quarantine_tls) +#define quarantine_tsd JEMALLOC_N(quarantine_tsd) #define quarantine_tsd_boot JEMALLOC_N(quarantine_tsd_boot) #define quarantine_tsd_cleanup_wrapper JEMALLOC_N(quarantine_tsd_cleanup_wrapper) #define quarantine_tsd_get JEMALLOC_N(quarantine_tsd_get) diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index 47f22ad2..119a5b1b 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -237,7 +237,7 @@ void prof_postfork_child(void); \ assert(size == s2u(size)); \ \ - prof_tdata = prof_tdata_get(); \ + prof_tdata = prof_tdata_get(true); \ if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) { \ if (prof_tdata != NULL) \ ret = (prof_thr_cnt_t *)(uintptr_t)1U; \ @@ -286,7 +286,7 @@ void prof_postfork_child(void); #ifndef JEMALLOC_ENABLE_INLINE malloc_tsd_protos(JEMALLOC_ATTR(unused), prof_tdata, prof_tdata_t *) -prof_tdata_t *prof_tdata_get(void); +prof_tdata_t *prof_tdata_get(bool create); void prof_sample_threshold_update(prof_tdata_t *prof_tdata); prof_ctx_t *prof_ctx_get(const void *ptr); void prof_ctx_set(const void *ptr, prof_ctx_t *ctx); @@ -304,17 +304,15 @@ malloc_tsd_funcs(JEMALLOC_INLINE, prof_tdata, prof_tdata_t *, NULL, prof_tdata_cleanup) JEMALLOC_INLINE prof_tdata_t * -prof_tdata_get(void) +prof_tdata_get(bool create) { prof_tdata_t *prof_tdata; cassert(config_prof); prof_tdata = *prof_tdata_tsd_get(); - if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) { - if (prof_tdata == NULL) - prof_tdata = prof_tdata_init(); - } + if (create && prof_tdata == NULL) + prof_tdata = prof_tdata_init(); return (prof_tdata); } @@ -397,7 +395,7 @@ prof_sample_accum_update(size_t size) /* Sampling logic is unnecessary if the interval is 1. */ assert(opt_lg_prof_sample != 0); - prof_tdata = *prof_tdata_tsd_get(); + prof_tdata = prof_tdata_get(false); if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) return (true); diff --git a/include/jemalloc/internal/quarantine.h b/include/jemalloc/internal/quarantine.h index 38f3d696..16f677f7 100644 --- a/include/jemalloc/internal/quarantine.h +++ b/include/jemalloc/internal/quarantine.h @@ -1,6 +1,9 @@ /******************************************************************************/ #ifdef JEMALLOC_H_TYPES +typedef struct quarantine_obj_s quarantine_obj_t; +typedef struct quarantine_s quarantine_t; + /* Default per thread quarantine size if valgrind is enabled. */ #define JEMALLOC_VALGRIND_QUARANTINE_DEFAULT (ZU(1) << 24) @@ -8,17 +11,57 @@ /******************************************************************************/ #ifdef JEMALLOC_H_STRUCTS +struct quarantine_obj_s { + void *ptr; + size_t usize; +}; + +struct quarantine_s { + size_t curbytes; + size_t curobjs; + size_t first; +#define LG_MAXOBJS_INIT 10 + size_t lg_maxobjs; + quarantine_obj_t objs[1]; /* Dynamically sized ring buffer. */ +}; + #endif /* JEMALLOC_H_STRUCTS */ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS +quarantine_t *quarantine_init(size_t lg_maxobjs); void quarantine(void *ptr); +void quarantine_cleanup(void *arg); bool quarantine_boot(void); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ #ifdef JEMALLOC_H_INLINES +#ifndef JEMALLOC_ENABLE_INLINE +malloc_tsd_protos(JEMALLOC_ATTR(unused), quarantine, quarantine_t *) + +void quarantine_alloc_hook(void); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_QUARANTINE_C_)) +malloc_tsd_externs(quarantine, quarantine_t *) +malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, quarantine, quarantine_t *, NULL, + quarantine_cleanup) + +JEMALLOC_ALWAYS_INLINE void +quarantine_alloc_hook(void) +{ + quarantine_t *quarantine; + + assert(config_fill && opt_quarantine); + + quarantine = *quarantine_tsd_get(); + if (quarantine == NULL) + quarantine_init(LG_MAXOBJS_INIT); +} +#endif + #endif /* JEMALLOC_H_INLINES */ /******************************************************************************/ diff --git a/src/jemalloc.c b/src/jemalloc.c index c117685a..6f6464db 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -282,12 +282,30 @@ arenas_cleanup(void *arg) malloc_mutex_unlock(&arenas_lock); } +static JEMALLOC_ATTR(always_inline) void +malloc_thread_init(void) +{ + + /* + * TSD initialization can't be safely done as a side effect of + * deallocation, because it is possible for a thread to do nothing but + * deallocate its TLS data via free(), in which case writing to TLS + * would cause write-after-free memory corruption. The quarantine + * facility *only* gets used as a side effect of deallocation, so make + * a best effort attempt at initializing its TSD by hooking all + * allocation events. + */ + if (config_fill && opt_quarantine) + quarantine_alloc_hook(); +} + static JEMALLOC_ATTR(always_inline) bool malloc_init(void) { - if (malloc_initialized == false) - return (malloc_init_hard()); + if (malloc_initialized == false && malloc_init_hard()) + return (true); + malloc_thread_init(); return (false); } @@ -1095,6 +1113,7 @@ je_realloc(void *ptr, size_t size) if (size == 0) { if (ptr != NULL) { /* realloc(ptr, 0) is equivalent to free(p). */ + assert(malloc_initialized || IS_INITIALIZER); if (config_prof) { old_size = isalloc(ptr, true); if (config_valgrind && opt_valgrind) @@ -1120,6 +1139,7 @@ je_realloc(void *ptr, size_t size) if (ptr != NULL) { assert(malloc_initialized || IS_INITIALIZER); + malloc_thread_init(); if (config_prof) { old_size = isalloc(ptr, true); @@ -1323,6 +1343,7 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) size_t ret; assert(malloc_initialized || IS_INITIALIZER); + malloc_thread_init(); if (config_ivsalloc) ret = ivsalloc(ptr, config_prof); @@ -1497,6 +1518,7 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) assert(size != 0); assert(SIZE_T_MAX - size >= extra); assert(malloc_initialized || IS_INITIALIZER); + malloc_thread_init(); if (arena_ind != UINT_MAX) { arena_chunk_t *chunk; @@ -1611,6 +1633,7 @@ je_sallocm(const void *ptr, size_t *rsize, int flags) size_t sz; assert(malloc_initialized || IS_INITIALIZER); + malloc_thread_init(); if (config_ivsalloc) sz = ivsalloc(ptr, config_prof); diff --git a/src/prof.c b/src/prof.c index b9f03a0d..c133b95c 100644 --- a/src/prof.c +++ b/src/prof.c @@ -438,7 +438,7 @@ prof_lookup(prof_bt_t *bt) cassert(config_prof); - prof_tdata = prof_tdata_get(); + prof_tdata = prof_tdata_get(false); if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) return (NULL); @@ -684,7 +684,7 @@ prof_ctx_destroy(prof_ctx_t *ctx) * avoid a race between the main body of prof_ctx_merge() and entry * into this function. */ - prof_tdata = *prof_tdata_tsd_get(); + prof_tdata = prof_tdata_get(false); assert((uintptr_t)prof_tdata > (uintptr_t)PROF_TDATA_STATE_MAX); prof_enter(prof_tdata); malloc_mutex_lock(ctx->lock); @@ -844,7 +844,7 @@ prof_dump(bool propagate_err, const char *filename, bool leakcheck) cassert(config_prof); - prof_tdata = prof_tdata_get(); + prof_tdata = prof_tdata_get(false); if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) return (true); prof_enter(prof_tdata); @@ -966,11 +966,7 @@ prof_idump(void) if (prof_booted == false) return; - /* - * Don't call prof_tdata_get() here, because it could cause recursive - * allocation. - */ - prof_tdata = *prof_tdata_tsd_get(); + prof_tdata = prof_tdata_get(false); if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) return; if (prof_tdata->enq) { @@ -1020,11 +1016,7 @@ prof_gdump(void) if (prof_booted == false) return; - /* - * Don't call prof_tdata_get() here, because it could cause recursive - * allocation. - */ - prof_tdata = *prof_tdata_tsd_get(); + prof_tdata = prof_tdata_get(false); if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) return; if (prof_tdata->enq) { diff --git a/src/quarantine.c b/src/quarantine.c index 9005ab3b..cab7e169 100644 --- a/src/quarantine.c +++ b/src/quarantine.c @@ -1,3 +1,4 @@ +#define JEMALLOC_QUARANTINE_C_ #include "jemalloc/internal/jemalloc_internal.h" /* @@ -11,39 +12,17 @@ /******************************************************************************/ /* Data. */ -typedef struct quarantine_obj_s quarantine_obj_t; -typedef struct quarantine_s quarantine_t; - -struct quarantine_obj_s { - void *ptr; - size_t usize; -}; - -struct quarantine_s { - size_t curbytes; - size_t curobjs; - size_t first; -#define LG_MAXOBJS_INIT 10 - size_t lg_maxobjs; - quarantine_obj_t objs[1]; /* Dynamically sized ring buffer. */ -}; - -static void quarantine_cleanup(void *arg); - -malloc_tsd_data(static, quarantine, quarantine_t *, NULL) -malloc_tsd_funcs(JEMALLOC_INLINE, quarantine, quarantine_t *, NULL, - quarantine_cleanup) +malloc_tsd_data(, quarantine, quarantine_t *, NULL) /******************************************************************************/ /* Function prototypes for non-inline static functions. */ -static quarantine_t *quarantine_init(size_t lg_maxobjs); static quarantine_t *quarantine_grow(quarantine_t *quarantine); static void quarantine_drain(quarantine_t *quarantine, size_t upper_bound); /******************************************************************************/ -static quarantine_t * +quarantine_t * quarantine_init(size_t lg_maxobjs) { quarantine_t *quarantine; @@ -119,24 +98,16 @@ quarantine(void *ptr) quarantine = *quarantine_tsd_get(); if ((uintptr_t)quarantine <= (uintptr_t)QUARANTINE_STATE_MAX) { - if (quarantine == NULL) { - if ((quarantine = quarantine_init(LG_MAXOBJS_INIT)) == - NULL) { - idalloc(ptr); - return; - } - } else { - if (quarantine == QUARANTINE_STATE_PURGATORY) { - /* - * Make a note that quarantine() was called - * after quarantine_cleanup() was called. - */ - quarantine = QUARANTINE_STATE_REINCARNATED; - quarantine_tsd_set(&quarantine); - } - idalloc(ptr); - return; + if (quarantine == QUARANTINE_STATE_PURGATORY) { + /* + * Make a note that quarantine() was called after + * quarantine_cleanup() was called. + */ + quarantine = QUARANTINE_STATE_REINCARNATED; + quarantine_tsd_set(&quarantine); } + idalloc(ptr); + return; } /* * Drain one or more objects if the quarantine size limit would be @@ -169,7 +140,7 @@ quarantine(void *ptr) } } -static void +void quarantine_cleanup(void *arg) { quarantine_t *quarantine = *(quarantine_t **)arg; From d0e942e4669b8600b0bd7e5ae132ae26d10a40ed Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 31 Jan 2013 14:42:41 -0800 Subject: [PATCH 0284/3378] Fix two quarantine bugs. Internal reallocation of the quarantined object array leaked the old array. Reallocation failure for internal reallocation of the quarantined object array (very unlikely) resulted in memory corruption. --- ChangeLog | 5 +++++ src/quarantine.c | 29 +++++++++++++++++++---------- 2 files changed, 24 insertions(+), 10 deletions(-) diff --git a/ChangeLog b/ChangeLog index ae7d0bfe..5f2cc455 100644 --- a/ChangeLog +++ b/ChangeLog @@ -12,6 +12,11 @@ found in the git revision history: - Fix TLS-related memory corruption that could occur during thread exit if the thread never allocated memory. Only the quarantine and prof facilities were susceptible. + - Fix two quarantine bugs: + + Internal reallocation of the quarantined object array leaked the old + array. + + Reallocation failure for internal reallocation of the quarantined object + array (very unlikely) resulted in memory corruption. * 3.3.0 (January 23, 2013) diff --git a/src/quarantine.c b/src/quarantine.c index cab7e169..f96a948d 100644 --- a/src/quarantine.c +++ b/src/quarantine.c @@ -18,6 +18,7 @@ malloc_tsd_data(, quarantine, quarantine_t *, NULL) /* Function prototypes for non-inline static functions. */ static quarantine_t *quarantine_grow(quarantine_t *quarantine); +static void quarantine_drain_one(quarantine_t *quarantine); static void quarantine_drain(quarantine_t *quarantine, size_t upper_bound); /******************************************************************************/ @@ -47,8 +48,10 @@ quarantine_grow(quarantine_t *quarantine) quarantine_t *ret; ret = quarantine_init(quarantine->lg_maxobjs + 1); - if (ret == NULL) + if (ret == NULL) { + quarantine_drain_one(quarantine); return (quarantine); + } ret->curbytes = quarantine->curbytes; ret->curobjs = quarantine->curobjs; @@ -68,23 +71,29 @@ quarantine_grow(quarantine_t *quarantine) memcpy(&ret->objs[ncopy_a], quarantine->objs, ncopy_b * sizeof(quarantine_obj_t)); } + idalloc(quarantine); return (ret); } +static void +quarantine_drain_one(quarantine_t *quarantine) +{ + quarantine_obj_t *obj = &quarantine->objs[quarantine->first]; + assert(obj->usize == isalloc(obj->ptr, config_prof)); + idalloc(obj->ptr); + quarantine->curbytes -= obj->usize; + quarantine->curobjs--; + quarantine->first = (quarantine->first + 1) & ((ZU(1) << + quarantine->lg_maxobjs) - 1); +} + static void quarantine_drain(quarantine_t *quarantine, size_t upper_bound) { - while (quarantine->curbytes > upper_bound && quarantine->curobjs > 0) { - quarantine_obj_t *obj = &quarantine->objs[quarantine->first]; - assert(obj->usize == isalloc(obj->ptr, config_prof)); - idalloc(obj->ptr); - quarantine->curbytes -= obj->usize; - quarantine->curobjs--; - quarantine->first = (quarantine->first + 1) & ((ZU(1) << - quarantine->lg_maxobjs) - 1); - } + while (quarantine->curbytes > upper_bound && quarantine->curobjs > 0) + quarantine_drain_one(quarantine); } void From a7a28c334e5526ba716bf6046eab8d60598183eb Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 31 Jan 2013 16:53:58 -0800 Subject: [PATCH 0285/3378] Fix a chunk recycling bug. Fix a chunk recycling bug that could cause the allocator to lose track of whether a chunk was zeroed. On FreeBSD, NetBSD, and OS X, it could cause corruption if allocating via sbrk(2) (unlikely unless running with the "dss:primary" option specified). This was completely harmless on Linux unless using mlockall(2) (and unlikely even then, unless the --disable-munmap configure option or the "dss:primary" option was specified). This regression was introduced in 3.1.0 by the mlockall(2)/madvise(2) interaction fix. --- ChangeLog | 8 ++++++++ src/chunk.c | 1 + 2 files changed, 9 insertions(+) diff --git a/ChangeLog b/ChangeLog index 5f2cc455..ee63cb48 100644 --- a/ChangeLog +++ b/ChangeLog @@ -9,6 +9,14 @@ found in the git revision history: * 3.x.x (XXX Not yet released) Bug fixes: + - Fix a chunk recycling bug that could cause the allocator to lose track of + whether a chunk was zeroed. On FreeBSD, NetBSD, and OS X, it could cause + corruption if allocating via sbrk(2) (unlikely unless running with the + "dss:primary" option specified). This was completely harmless on Linux + unless using mlockall(2) (and unlikely even then, unless the + --disable-munmap configure option or the "dss:primary" option was + specified). This regression was introduced in 3.1.0 by the + mlockall(2)/madvise(2) interaction fix. - Fix TLS-related memory corruption that could occur during thread exit if the thread never allocated memory. Only the quarantine and prof facilities were susceptible. diff --git a/src/chunk.c b/src/chunk.c index 46e387e1..8cff240a 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -111,6 +111,7 @@ chunk_recycle(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, size_t size, } node->addr = (void *)((uintptr_t)(ret) + size); node->size = trailsize; + node->zeroed = zeroed; extent_tree_szad_insert(chunks_szad, node); extent_tree_ad_insert(chunks_ad, node); node = NULL; From 06912756cccd0064a9c5c59992dbac1cec68ba3f Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 31 Jan 2013 17:02:53 -0800 Subject: [PATCH 0286/3378] Fix Valgrind integration. Fix Valgrind integration to annotate all internally allocated memory in a way that keeps Valgrind happy about internal data structure access. --- ChangeLog | 2 + .../jemalloc/internal/jemalloc_internal.h.in | 1 + include/jemalloc/internal/tcache.h | 4 +- src/arena.c | 15 +++--- src/base.c | 3 ++ src/chunk.c | 48 ++++++++++--------- src/chunk_dss.c | 1 - test/ALLOCM_ARENA.c | 1 + test/thread_arena.c | 1 + 9 files changed, 44 insertions(+), 32 deletions(-) diff --git a/ChangeLog b/ChangeLog index ee63cb48..3c0af684 100644 --- a/ChangeLog +++ b/ChangeLog @@ -25,6 +25,8 @@ found in the git revision history: array. + Reallocation failure for internal reallocation of the quarantined object array (very unlikely) resulted in memory corruption. + - Fix Valgrind integration to annotate all internally allocated memory in a + way that keeps Valgrind happy about internal data structure access. * 3.3.0 (January 23, 2013) diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index c606c122..6270a08e 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -443,6 +443,7 @@ static const bool config_ivsalloc = #define VALGRIND_MALLOCLIKE_BLOCK(addr, sizeB, rzB, is_zeroed) #define VALGRIND_RESIZEINPLACE_BLOCK(addr, oldSizeB, newSizeB, rzB) #define VALGRIND_FREELIKE_BLOCK(addr, rzB) +#define VALGRIND_MAKE_MEM_NOACCESS(_qzz_addr, _qzz_len) #define VALGRIND_MAKE_MEM_UNDEFINED(_qzz_addr, _qzz_len) #define VALGRIND_MAKE_MEM_DEFINED(_qzz_addr, _qzz_len) #define JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index 71900c2f..ba36204f 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -320,8 +320,8 @@ tcache_alloc_small(tcache_t *tcache, size_t size, bool zero) } VALGRIND_MAKE_MEM_UNDEFINED(ret, size); memset(ret, 0, size); - VALGRIND_MAKE_MEM_UNDEFINED(ret, size); } + VALGRIND_MAKE_MEM_UNDEFINED(ret, size); if (config_stats) tbin->tstats.nrequests++; @@ -371,8 +371,8 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) } else { VALGRIND_MAKE_MEM_UNDEFINED(ret, size); memset(ret, 0, size); - VALGRIND_MAKE_MEM_UNDEFINED(ret, size); } + VALGRIND_MAKE_MEM_UNDEFINED(ret, size); if (config_stats) tbin->tstats.nrequests++; diff --git a/src/arena.c b/src/arena.c index 8d50f4d4..d79e0358 100644 --- a/src/arena.c +++ b/src/arena.c @@ -366,8 +366,6 @@ arena_run_zero(arena_chunk_t *chunk, size_t run_ind, size_t npages) LG_PAGE)), (npages << LG_PAGE)); memset((void *)((uintptr_t)chunk + (run_ind << LG_PAGE)), 0, (npages << LG_PAGE)); - VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk + (run_ind << - LG_PAGE)), (npages << LG_PAGE)); } static inline void @@ -380,8 +378,6 @@ arena_run_page_validate_zeroed(arena_chunk_t *chunk, size_t run_ind) LG_PAGE)), PAGE); for (i = 0; i < PAGE / sizeof(size_t); i++) assert(p[i] == 0); - VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk + (run_ind << - LG_PAGE)), PAGE); } static void @@ -513,6 +509,8 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, run_ind+need_pages-1); } } + VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk + (run_ind << + LG_PAGE)), (need_pages << LG_PAGE)); } static arena_chunk_t * @@ -574,6 +572,11 @@ arena_chunk_alloc(arena_t *arena) for (i = map_bias+1; i < chunk_npages-1; i++) arena_mapbits_unzeroed_set(chunk, i, unzeroed); } else if (config_debug) { + VALGRIND_MAKE_MEM_DEFINED( + (void *)arena_mapp_get(chunk, map_bias+1), + (void *)((uintptr_t) + arena_mapp_get(chunk, chunk_npages-1) + - (uintptr_t)arena_mapp_get(chunk, map_bias+1))); for (i = map_bias+1; i < chunk_npages-1; i++) { assert(arena_mapbits_unzeroed_get(chunk, i) == unzeroed); @@ -1246,8 +1249,6 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin) (uintptr_t)bin_info->bitmap_offset); /* Initialize run internals. */ - VALGRIND_MAKE_MEM_UNDEFINED(run, bin_info->reg0_offset - - bin_info->redzone_size); run->bin = bin; run->nextind = 0; run->nfree = bin_info->nregs; @@ -1464,8 +1465,8 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero) } VALGRIND_MAKE_MEM_UNDEFINED(ret, size); memset(ret, 0, size); - VALGRIND_MAKE_MEM_UNDEFINED(ret, size); } + VALGRIND_MAKE_MEM_UNDEFINED(ret, size); return (ret); } diff --git a/src/base.c b/src/base.c index b1a5945e..4e62e8fa 100644 --- a/src/base.c +++ b/src/base.c @@ -63,6 +63,7 @@ base_alloc(size_t size) ret = base_next_addr; base_next_addr = (void *)((uintptr_t)base_next_addr + csize); malloc_mutex_unlock(&base_mtx); + VALGRIND_MAKE_MEM_UNDEFINED(ret, csize); return (ret); } @@ -88,6 +89,7 @@ base_node_alloc(void) ret = base_nodes; base_nodes = *(extent_node_t **)ret; malloc_mutex_unlock(&base_mtx); + VALGRIND_MAKE_MEM_UNDEFINED(ret, sizeof(extent_node_t)); } else { malloc_mutex_unlock(&base_mtx); ret = (extent_node_t *)base_alloc(sizeof(extent_node_t)); @@ -100,6 +102,7 @@ void base_node_dealloc(extent_node_t *node) { + VALGRIND_MAKE_MEM_UNDEFINED(node, sizeof(extent_node_t)); malloc_mutex_lock(&base_mtx); *(extent_node_t **)node = base_nodes; base_nodes = node; diff --git a/src/chunk.c b/src/chunk.c index 8cff240a..044f76be 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -120,7 +120,6 @@ chunk_recycle(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, size_t size, if (node != NULL) base_node_dealloc(node); - VALGRIND_MAKE_MEM_UNDEFINED(ret, size); if (*zero) { if (zeroed == false) memset(ret, 0, size); @@ -131,7 +130,6 @@ chunk_recycle(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, size_t size, VALGRIND_MAKE_MEM_DEFINED(ret, size); for (i = 0; i < size / sizeof(size_t); i++) assert(p[i] == 0); - VALGRIND_MAKE_MEM_UNDEFINED(ret, size); } } return (ret); @@ -180,27 +178,32 @@ chunk_alloc(size_t size, size_t alignment, bool base, bool *zero, /* All strategies for allocation failed. */ ret = NULL; label_return: - if (config_ivsalloc && base == false && ret != NULL) { - if (rtree_set(chunks_rtree, (uintptr_t)ret, ret)) { - chunk_dealloc(ret, size, true); - return (NULL); + if (ret != NULL) { + if (config_ivsalloc && base == false) { + if (rtree_set(chunks_rtree, (uintptr_t)ret, ret)) { + chunk_dealloc(ret, size, true); + return (NULL); + } } - } - if ((config_stats || config_prof) && ret != NULL) { - bool gdump; - malloc_mutex_lock(&chunks_mtx); - if (config_stats) - stats_chunks.nchunks += (size / chunksize); - stats_chunks.curchunks += (size / chunksize); - if (stats_chunks.curchunks > stats_chunks.highchunks) { - stats_chunks.highchunks = stats_chunks.curchunks; - if (config_prof) - gdump = true; - } else if (config_prof) - gdump = false; - malloc_mutex_unlock(&chunks_mtx); - if (config_prof && opt_prof && opt_prof_gdump && gdump) - prof_gdump(); + if (config_stats || config_prof) { + bool gdump; + malloc_mutex_lock(&chunks_mtx); + if (config_stats) + stats_chunks.nchunks += (size / chunksize); + stats_chunks.curchunks += (size / chunksize); + if (stats_chunks.curchunks > stats_chunks.highchunks) { + stats_chunks.highchunks = + stats_chunks.curchunks; + if (config_prof) + gdump = true; + } else if (config_prof) + gdump = false; + malloc_mutex_unlock(&chunks_mtx); + if (config_prof && opt_prof && opt_prof_gdump && gdump) + prof_gdump(); + } + if (config_valgrind) + VALGRIND_MAKE_MEM_UNDEFINED(ret, size); } assert(CHUNK_ADDR2BASE(ret) == ret); return (ret); @@ -214,6 +217,7 @@ chunk_record(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, void *chunk, extent_node_t *xnode, *node, *prev, key; unzeroed = pages_purge(chunk, size); + VALGRIND_MAKE_MEM_NOACCESS(chunk, size); /* * Allocate a node before acquiring chunks_mtx even though it might not diff --git a/src/chunk_dss.c b/src/chunk_dss.c index d1aea930..24781cc5 100644 --- a/src/chunk_dss.c +++ b/src/chunk_dss.c @@ -127,7 +127,6 @@ chunk_alloc_dss(size_t size, size_t alignment, bool *zero) if (*zero) { VALGRIND_MAKE_MEM_UNDEFINED(ret, size); memset(ret, 0, size); - VALGRIND_MAKE_MEM_UNDEFINED(ret, size); } return (ret); } diff --git a/test/ALLOCM_ARENA.c b/test/ALLOCM_ARENA.c index 15856908..2c52485e 100644 --- a/test/ALLOCM_ARENA.c +++ b/test/ALLOCM_ARENA.c @@ -41,6 +41,7 @@ je_thread_start(void *arg) malloc_printf("Unexpected allocm() error\n"); abort(); } + dallocm(p, 0); return (NULL); } diff --git a/test/thread_arena.c b/test/thread_arena.c index 2ffdb5e8..c5a21fa0 100644 --- a/test/thread_arena.c +++ b/test/thread_arena.c @@ -17,6 +17,7 @@ je_thread_start(void *arg) malloc_printf("%s(): Error in malloc()\n", __func__); return (void *)1; } + free(p); size = sizeof(arena_ind); if ((err = mallctl("thread.arena", &arena_ind, &size, &main_arena_ind, From 88c222c8e91499bf5d3fba53b24222df0cda5771 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 6 Feb 2013 11:59:30 -0800 Subject: [PATCH 0287/3378] Fix a prof-related locking order bug. Fix a locking order bug that could cause deadlock during fork if heap profiling were enabled. --- ChangeLog | 2 ++ include/jemalloc/internal/arena.h | 33 +++++++++++++++++++------------ src/arena.c | 13 +++++++----- src/jemalloc.c | 6 +++--- src/tcache.c | 15 ++++++++++---- 5 files changed, 44 insertions(+), 25 deletions(-) diff --git a/ChangeLog b/ChangeLog index 3c0af684..bf96306a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -9,6 +9,8 @@ found in the git revision history: * 3.x.x (XXX Not yet released) Bug fixes: + - Fix a locking order bug that could cause deadlock during fork if heap + profiling were enabled. - Fix a chunk recycling bug that could cause the allocator to lose track of whether a chunk was zeroed. On FreeBSD, NetBSD, and OS X, it could cause corruption if allocating via sbrk(2) (unlikely unless running with the diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 8fdee931..f2c18f43 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -463,9 +463,9 @@ void arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, size_t runind, size_t binind, size_t flags); void arena_mapbits_unzeroed_set(arena_chunk_t *chunk, size_t pageind, size_t unzeroed); -void arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes); -void arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes); -void arena_prof_accum(arena_t *arena, uint64_t accumbytes); +bool arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes); +bool arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes); +bool arena_prof_accum(arena_t *arena, uint64_t accumbytes); size_t arena_ptr_small_binind_get(const void *ptr, size_t mapbits); size_t arena_bin_index(arena_t *arena, arena_bin_t *bin); unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, @@ -663,7 +663,7 @@ arena_mapbits_unzeroed_set(arena_chunk_t *chunk, size_t pageind, *mapbitsp = (*mapbitsp & ~CHUNK_MAP_UNZEROED) | unzeroed; } -JEMALLOC_INLINE void +JEMALLOC_INLINE bool arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes) { @@ -672,33 +672,40 @@ arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes) arena->prof_accumbytes += accumbytes; if (arena->prof_accumbytes >= prof_interval) { - prof_idump(); arena->prof_accumbytes -= prof_interval; + return (true); } + return (false); } -JEMALLOC_INLINE void +JEMALLOC_INLINE bool arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes) { cassert(config_prof); if (prof_interval == 0) - return; - arena_prof_accum_impl(arena, accumbytes); + return (false); + return (arena_prof_accum_impl(arena, accumbytes)); } -JEMALLOC_INLINE void +JEMALLOC_INLINE bool arena_prof_accum(arena_t *arena, uint64_t accumbytes) { cassert(config_prof); if (prof_interval == 0) - return; - malloc_mutex_lock(&arena->lock); - arena_prof_accum_impl(arena, accumbytes); - malloc_mutex_unlock(&arena->lock); + return (false); + + { + bool ret; + + malloc_mutex_lock(&arena->lock); + ret = arena_prof_accum_impl(arena, accumbytes); + malloc_mutex_unlock(&arena->lock); + return (ret); + } } JEMALLOC_ALWAYS_INLINE size_t diff --git a/src/arena.c b/src/arena.c index d79e0358..05a787f8 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1338,8 +1338,8 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind, assert(tbin->ncached == 0); - if (config_prof) - arena_prof_accum(arena, prof_accumbytes); + if (config_prof && arena_prof_accum(arena, prof_accumbytes)) + prof_idump(); bin = &arena->bins[binind]; malloc_mutex_lock(&bin->lock); for (i = 0, nfill = (tcache_bin_info[binind].ncached_max >> @@ -1447,8 +1447,8 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero) bin->stats.nrequests++; } malloc_mutex_unlock(&bin->lock); - if (config_prof && isthreaded == false) - arena_prof_accum(arena, size); + if (config_prof && isthreaded == false && arena_prof_accum(arena, size)) + prof_idump(); if (zero == false) { if (config_fill) { @@ -1475,6 +1475,7 @@ void * arena_malloc_large(arena_t *arena, size_t size, bool zero) { void *ret; + UNUSED bool idump; /* Large allocation. */ size = PAGE_CEILING(size); @@ -1493,8 +1494,10 @@ arena_malloc_large(arena_t *arena, size_t size, bool zero) arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++; } if (config_prof) - arena_prof_accum_locked(arena, size); + idump = arena_prof_accum_locked(arena, size); malloc_mutex_unlock(&arena->lock); + if (config_prof && idump) + prof_idump(); if (zero == false) { if (config_fill) { diff --git a/src/jemalloc.c b/src/jemalloc.c index 6f6464db..bc350ed9 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1753,12 +1753,12 @@ _malloc_prefork(void) /* Acquire all mutexes in a safe order. */ ctl_prefork(); + prof_prefork(); malloc_mutex_prefork(&arenas_lock); for (i = 0; i < narenas_total; i++) { if (arenas[i] != NULL) arena_prefork(arenas[i]); } - prof_prefork(); chunk_prefork(); base_prefork(); huge_prefork(); @@ -1784,12 +1784,12 @@ _malloc_postfork(void) huge_postfork_parent(); base_postfork_parent(); chunk_postfork_parent(); - prof_postfork_parent(); for (i = 0; i < narenas_total; i++) { if (arenas[i] != NULL) arena_postfork_parent(arenas[i]); } malloc_mutex_postfork_parent(&arenas_lock); + prof_postfork_parent(); ctl_postfork_parent(); } @@ -1804,12 +1804,12 @@ jemalloc_postfork_child(void) huge_postfork_child(); base_postfork_child(); chunk_postfork_child(); - prof_postfork_child(); for (i = 0; i < narenas_total; i++) { if (arenas[i] != NULL) arena_postfork_child(arenas[i]); } malloc_mutex_postfork_child(&arenas_lock); + prof_postfork_child(); ctl_postfork_child(); } diff --git a/src/tcache.c b/src/tcache.c index 7befdc86..98ed19ed 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -97,7 +97,8 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem, arena_bin_t *bin = &arena->bins[binind]; if (config_prof && arena == tcache->arena) { - arena_prof_accum(arena, tcache->prof_accumbytes); + if (arena_prof_accum(arena, tcache->prof_accumbytes)) + prof_idump(); tcache->prof_accumbytes = 0; } @@ -174,11 +175,14 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem, arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE( tbin->avail[0]); arena_t *arena = chunk->arena; + UNUSED bool idump; + if (config_prof) + idump = false; malloc_mutex_lock(&arena->lock); if ((config_prof || config_stats) && arena == tcache->arena) { if (config_prof) { - arena_prof_accum_locked(arena, + idump = arena_prof_accum_locked(arena, tcache->prof_accumbytes); tcache->prof_accumbytes = 0; } @@ -210,6 +214,8 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem, } } malloc_mutex_unlock(&arena->lock); + if (config_prof && idump) + prof_idump(); } if (config_stats && merged_stats == false) { /* @@ -341,8 +347,9 @@ tcache_destroy(tcache_t *tcache) } } - if (config_prof && tcache->prof_accumbytes > 0) - arena_prof_accum(tcache->arena, tcache->prof_accumbytes); + if (config_prof && tcache->prof_accumbytes > 0 && + arena_prof_accum(tcache->arena, tcache->prof_accumbytes)) + prof_idump(); tcache_size = arena_salloc(tcache, false); if (tcache_size <= SMALL_MAXCLASS) { From 9f9897ad4275e540cf1bea5a6de762c809b7695c Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Mon, 28 Jan 2013 15:19:34 -0500 Subject: [PATCH 0288/3378] fix building for s390 systems Checking for __s390x__ means you work on s390x, but not s390 (32bit) systems. So use __s390__ which works for both. With this, `make check` passes on s390. Signed-off-by: Mike Frysinger --- include/jemalloc/internal/jemalloc_internal.h.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 6270a08e..381bd609 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -287,7 +287,7 @@ static const bool config_ivsalloc = # ifdef __powerpc__ # define LG_QUANTUM 4 # endif -# ifdef __s390x__ +# ifdef __s390__ # define LG_QUANTUM 4 # endif # ifdef __SH4__ From a4915851577c948cf9051d60152944ca2d1b2d59 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 6 Mar 2013 11:19:31 -0800 Subject: [PATCH 0289/3378] Add no-op bodies to VALGRIND_*() macro stubs. Add no-op bodies to VALGRIND_*() macro stubs so that they can be used in contexts like the following without generating a compiler warning about the 'if' statement having an empty body: if (config_valgrind) VALGRIND_MAKE_MEM_UNDEFINED(ret, size); --- .../jemalloc/internal/jemalloc_internal.h.in | 20 ++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 381bd609..50d84cab 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -440,16 +440,18 @@ static const bool config_ivsalloc = } while (0) #else #define RUNNING_ON_VALGRIND ((unsigned)0) -#define VALGRIND_MALLOCLIKE_BLOCK(addr, sizeB, rzB, is_zeroed) -#define VALGRIND_RESIZEINPLACE_BLOCK(addr, oldSizeB, newSizeB, rzB) -#define VALGRIND_FREELIKE_BLOCK(addr, rzB) -#define VALGRIND_MAKE_MEM_NOACCESS(_qzz_addr, _qzz_len) -#define VALGRIND_MAKE_MEM_UNDEFINED(_qzz_addr, _qzz_len) -#define VALGRIND_MAKE_MEM_DEFINED(_qzz_addr, _qzz_len) -#define JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) +#define VALGRIND_MALLOCLIKE_BLOCK(addr, sizeB, rzB, is_zeroed) \ + do {} while (0) +#define VALGRIND_RESIZEINPLACE_BLOCK(addr, oldSizeB, newSizeB, rzB) \ + do {} while (0) +#define VALGRIND_FREELIKE_BLOCK(addr, rzB) do {} while (0) +#define VALGRIND_MAKE_MEM_NOACCESS(_qzz_addr, _qzz_len) do {} while (0) +#define VALGRIND_MAKE_MEM_UNDEFINED(_qzz_addr, _qzz_len) do {} while (0) +#define VALGRIND_MAKE_MEM_DEFINED(_qzz_addr, _qzz_len) do {} while (0) +#define JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) do {} while (0) #define JEMALLOC_VALGRIND_REALLOC(ptr, usize, old_ptr, old_usize, \ - old_rzsize, zero) -#define JEMALLOC_VALGRIND_FREE(ptr, rzsize) + old_rzsize, zero) do {} while (0) +#define JEMALLOC_VALGRIND_FREE(ptr, rzsize) do {} while (0) #endif #include "jemalloc/internal/util.h" From 2298835e70ae79577a1c691020874bb11eefc039 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 6 Mar 2013 11:11:17 -0800 Subject: [PATCH 0290/3378] Update ChangeLog for 3.3.1. --- ChangeLog | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index bf96306a..fc096d8f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -6,7 +6,10 @@ found in the git revision history: http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git git://canonware.com/jemalloc.git -* 3.x.x (XXX Not yet released) +* 3.3.1 (March 6, 2013) + + This version fixes bugs that are typically encountered only when utilizing + custom run-time options. Bug fixes: - Fix a locking order bug that could cause deadlock during fork if heap @@ -29,6 +32,7 @@ found in the git revision history: array (very unlikely) resulted in memory corruption. - Fix Valgrind integration to annotate all internally allocated memory in a way that keeps Valgrind happy about internal data structure access. + - Fix building for s390 systems. * 3.3.0 (January 23, 2013) From 705328ca46ac2ae2c1d2e172917a9278107d1288 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 19 Mar 2013 16:28:41 -0700 Subject: [PATCH 0291/3378] Clarify how to use malloc_conf. Clarify that malloc_conf is intended only for compile-time configuration, since jemalloc may be initialized before main() is entered. --- doc/jemalloc.xml.in | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 09305801..abd5e6fc 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -432,7 +432,14 @@ for (i = 0; i < nbins; i++) { referenced by the symbolic link named /etc/malloc.conf, and the value of the environment variable MALLOC_CONF, will be interpreted, in - that order, from left to right as options. + that order, from left to right as options. Note that + malloc_conf may be read before + main is entered, so the declaration of + malloc_conf should specify an initializer that contains + the final value to be read by jemalloc. malloc_conf is + a compile-time setting, whereas /etc/malloc.conf and MALLOC_CONF + can be safely set any time prior to program invocation. An options string is a comma-separated list of option:value pairs. There is one key corresponding to each Date: Wed, 17 Apr 2013 09:57:11 -0700 Subject: [PATCH 0292/3378] Fix deadlock related to chunk_record(). Fix chunk_record() to unlock chunks_mtx before deallocating a base node, in order to avoid potential deadlock. Reported by Tudor Bosman. --- src/chunk.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/chunk.c b/src/chunk.c index 044f76be..e8fc473c 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -242,8 +242,6 @@ chunk_record(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, void *chunk, node->size += size; node->zeroed = (node->zeroed && (unzeroed == false)); extent_tree_szad_insert(chunks_szad, node); - if (xnode != NULL) - base_node_dealloc(xnode); } else { /* Coalescing forward failed, so insert a new node. */ if (xnode == NULL) { @@ -253,10 +251,10 @@ chunk_record(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, void *chunk, * already been purged, so this is only a virtual * memory leak. */ - malloc_mutex_unlock(&chunks_mtx); - return; + goto label_return; } node = xnode; + xnode = NULL; /* Prevent deallocation below. */ node->addr = chunk; node->size = size; node->zeroed = (unzeroed == false); @@ -284,7 +282,16 @@ chunk_record(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, void *chunk, base_node_dealloc(prev); } + +label_return: malloc_mutex_unlock(&chunks_mtx); + if (xnode != NULL) { + /* + * Deallocate xnode after unlocking chunks_mtx in order to + * avoid potential deadlock. + */ + base_node_dealloc(xnode); + } } void From 4f929aa94853ecd7da2791f462d1b972ee66db8e Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 22 Apr 2013 22:36:18 -0700 Subject: [PATCH 0293/3378] Fix another deadlock related to chunk_record(). Fix chunk_record() to unlock chunks_mtx before deallocating a base node, in order to avoid potential deadlock. This fix addresses the second of two similar bugs. --- src/chunk.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/chunk.c b/src/chunk.c index e8fc473c..aef3fede 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -214,7 +214,7 @@ chunk_record(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, void *chunk, size_t size) { bool unzeroed; - extent_node_t *xnode, *node, *prev, key; + extent_node_t *xnode, *node, *prev, *xprev, key; unzeroed = pages_purge(chunk, size); VALGRIND_MAKE_MEM_NOACCESS(chunk, size); @@ -226,6 +226,8 @@ chunk_record(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, void *chunk, * held. */ xnode = base_node_alloc(); + /* Use xprev to implement conditional deferred deallocation of prev. */ + xprev = NULL; malloc_mutex_lock(&chunks_mtx); key.addr = (void *)((uintptr_t)chunk + size); @@ -280,18 +282,19 @@ chunk_record(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, void *chunk, node->zeroed = (node->zeroed && prev->zeroed); extent_tree_szad_insert(chunks_szad, node); - base_node_dealloc(prev); + xprev = prev; } label_return: malloc_mutex_unlock(&chunks_mtx); - if (xnode != NULL) { - /* - * Deallocate xnode after unlocking chunks_mtx in order to - * avoid potential deadlock. - */ + /* + * Deallocate xnode and/or xprev after unlocking chunks_mtx in order to + * avoid potential deadlock. + */ + if (xnode != NULL) base_node_dealloc(xnode); - } + if (xprev != NULL) + base_node_dealloc(prev); } void From daf6d0446ce64fb563b7d96fda077e6406c602be Mon Sep 17 00:00:00 2001 From: Riku Voipio Date: Mon, 18 Mar 2013 16:40:20 +0200 Subject: [PATCH 0294/3378] Add aarch64 LG_QUANTUM size definition Signed-off-by: Riku Voipio --- include/jemalloc/internal/jemalloc_internal.h.in | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 50d84cab..e46ac544 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -278,6 +278,9 @@ static const bool config_ivsalloc = # ifdef __arm__ # define LG_QUANTUM 3 # endif +# ifdef __aarch64__ +# define LG_QUANTUM 4 +# endif # ifdef __hppa__ # define LG_QUANTUM 4 # endif From 765cc2b58377551c820e2f2ffc0a311ed31a386c Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sun, 2 Jun 2013 20:58:00 -0700 Subject: [PATCH 0295/3378] Update ChangeLog for 3.4.0. --- ChangeLog | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index fc096d8f..8ab88487 100644 --- a/ChangeLog +++ b/ChangeLog @@ -6,6 +6,19 @@ found in the git revision history: http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git git://canonware.com/jemalloc.git +* 3.4.0 (June 2, 2013) + + This version is essentially a small bugfix release, but the addition of + aarch64 support requires that the minor version be incremented. + + Bug fixes: + - Fix race-triggered deadlocks in chunk_record(). These deadlocks were + typically triggered by multiple threads concurrently deallocating huge + objects. + + New features: + - Add support for the aarch64 architecture. + * 3.3.1 (March 6, 2013) This version fixes bugs that are typically encountered only when utilizing @@ -15,7 +28,7 @@ found in the git revision history: - Fix a locking order bug that could cause deadlock during fork if heap profiling were enabled. - Fix a chunk recycling bug that could cause the allocator to lose track of - whether a chunk was zeroed. On FreeBSD, NetBSD, and OS X, it could cause + whether a chunk was zeroed. On FreeBSD, NetBSD, and OS X, it could cause corruption if allocating via sbrk(2) (unlikely unless running with the "dss:primary" option specified). This was completely harmless on Linux unless using mlockall(2) (and unlikely even then, unless the From ad505e0ec622883fbb0650763ea8b54f64a770c9 Mon Sep 17 00:00:00 2001 From: "Jory A. Pratt" Date: Sun, 11 Aug 2013 09:44:59 -0500 Subject: [PATCH 0296/3378] Allow toolchain to determine ar --- Makefile.in | 4 +++- configure.ac | 10 +++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/Makefile.in b/Makefile.in index 74810472..478becbe 100644 --- a/Makefile.in +++ b/Makefile.in @@ -55,6 +55,8 @@ PIC_CFLAGS = @PIC_CFLAGS@ CTARGET = @CTARGET@ LDTARGET = @LDTARGET@ MKLIB = @MKLIB@ +AR = @AR@ +ARFLAGS = crus CC_MM = @CC_MM@ ifeq (macho, $(ABI)) @@ -185,7 +187,7 @@ $(objroot)lib/$(LIBJEMALLOC)_s.$(A) : $(COBJS) $(STATIC_LIBS): @mkdir -p $(@D) - $(MKLIB) $+ + $(AR) $(ARFLAGS) $@ $+ $(objroot)test/bitmap$(EXE): $(objroot)src/bitmap.$(O) diff --git a/configure.ac b/configure.ac index c270662b..f4b4c21f 100644 --- a/configure.ac +++ b/configure.ac @@ -226,9 +226,13 @@ PIC_CFLAGS='-fPIC -DPIC' CTARGET='-o $@' LDTARGET='-o $@' EXTRA_LDFLAGS= -MKLIB='ar crus $@' CC_MM=1 +AN_MAKEVAR([AR], [AC_PROG_AR]) +AN_PROGRAM([ar], [AC_PROG_AR]) +AC_DEFUN([AC_PROG_AR], [AC_CHECK_TOOL(AR, ar, :)]) +AC_PROG_AR + dnl Platform-specific settings. abi and RPATH can probably be determined dnl programmatically, but doing so is error-prone, which makes it generally dnl not worth the trouble. @@ -310,7 +314,8 @@ case "${host}" in EXTRA_LDFLAGS="-link -DEBUG" CTARGET='-Fo$@' LDTARGET='-Fe$@' - MKLIB='lib -nologo -out:$@' + AR='lib' + ARFLAGS='-nologo -out:' CC_MM= else importlib="${so}" @@ -403,7 +408,6 @@ AC_SUBST([enable_autogen]) AC_PROG_INSTALL AC_PROG_RANLIB -AC_PATH_PROG([AR], [ar], [false], [$PATH]) AC_PATH_PROG([LD], [ld], [false], [$PATH]) AC_PATH_PROG([AUTOCONF], [autoconf], [false], [$PATH]) From 80ddf498eb166cad45c8592973eb4f949f176688 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 20 Aug 2013 11:48:19 +0100 Subject: [PATCH 0297/3378] Fix build break for MSVC. Introduce AROUT to control whether there is space between ARFLAGS and $@. This regression was introduced by ad505e0ec622883fbb0650763ea8b54f64a770c9. Reported by Mike Hommey. --- Makefile.in | 4 ++-- configure.ac | 5 +++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/Makefile.in b/Makefile.in index 478becbe..5909416e 100644 --- a/Makefile.in +++ b/Makefile.in @@ -56,7 +56,7 @@ CTARGET = @CTARGET@ LDTARGET = @LDTARGET@ MKLIB = @MKLIB@ AR = @AR@ -ARFLAGS = crus +ARFLAGS = @ARFLAGS@ CC_MM = @CC_MM@ ifeq (macho, $(ABI)) @@ -187,7 +187,7 @@ $(objroot)lib/$(LIBJEMALLOC)_s.$(A) : $(COBJS) $(STATIC_LIBS): @mkdir -p $(@D) - $(AR) $(ARFLAGS) $@ $+ + $(AR) $(ARFLAGS)@AROUT@ $+ $(objroot)test/bitmap$(EXE): $(objroot)src/bitmap.$(O) diff --git a/configure.ac b/configure.ac index f4b4c21f..73d3f94f 100644 --- a/configure.ac +++ b/configure.ac @@ -226,6 +226,8 @@ PIC_CFLAGS='-fPIC -DPIC' CTARGET='-o $@' LDTARGET='-o $@' EXTRA_LDFLAGS= +ARFLAGS='crus' +AROUT=' $@' CC_MM=1 AN_MAKEVAR([AR], [AC_PROG_AR]) @@ -316,6 +318,7 @@ case "${host}" in LDTARGET='-Fe$@' AR='lib' ARFLAGS='-nologo -out:' + AROUT='$@' CC_MM= else importlib="${so}" @@ -348,6 +351,8 @@ AC_SUBST([PIC_CFLAGS]) AC_SUBST([CTARGET]) AC_SUBST([LDTARGET]) AC_SUBST([MKLIB]) +AC_SUBST([ARFLAGS]) +AC_SUBST([AROUT]) AC_SUBST([CC_MM]) if test "x$abi" != "xpecoff"; then From a33488d648ebe6e56b266210fc8d468fbf48a6a2 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 3 Oct 2013 14:38:39 -0700 Subject: [PATCH 0298/3378] Fix typo. --- ChangeLog | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 8ab88487..d758be17 100644 --- a/ChangeLog +++ b/ChangeLog @@ -60,7 +60,7 @@ found in the git revision history: Bug fixes: - Fix "arenas.extend" mallctl to output the number of arenas. - - Fix chunk_recycyle() to unconditionally inform Valgrind that returned memory + - Fix chunk_recycle() to unconditionally inform Valgrind that returned memory is undefined. - Fix build break on FreeBSD related to alloca.h. From dd6ef0302f3980200ed602ec600e211f55e58694 Mon Sep 17 00:00:00 2001 From: Alexandre Perrin Date: Fri, 20 Sep 2013 19:58:11 +0200 Subject: [PATCH 0299/3378] malloc_conf_init: revert errno value when readlink(2) fail. --- src/jemalloc.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/jemalloc.c b/src/jemalloc.c index bc350ed9..e3991da2 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -436,8 +436,9 @@ malloc_conf_init(void) } break; case 1: { + int linklen = 0; #ifndef _WIN32 - int linklen; + int saved_errno = errno; const char *linkname = # ifdef JEMALLOC_PREFIX "/etc/"JEMALLOC_PREFIX"malloc.conf" @@ -446,21 +447,20 @@ malloc_conf_init(void) # endif ; - if ((linklen = readlink(linkname, buf, - sizeof(buf) - 1)) != -1) { - /* - * Use the contents of the "/etc/malloc.conf" - * symbolic link's name. - */ - buf[linklen] = '\0'; - opts = buf; - } else -#endif - { + /* + * Try to use the contents of the "/etc/malloc.conf" + * symbolic link's name. + */ + linklen = readlink(linkname, buf, sizeof(buf) - 1); + if (linklen == -1) { /* No configuration specified. */ - buf[0] = '\0'; - opts = buf; + linklen = 0; + /* restore errno */ + set_errno(saved_errno); } +#endif + buf[linklen] = '\0'; + opts = buf; break; } case 2: { const char *envname = From 3ab682d341f033017d042e8498578c2332eacd69 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 19 Oct 2013 17:19:49 -0700 Subject: [PATCH 0300/3378] Silence an unused variable warning. Reported by Ricardo Nabinger Sanchez. --- src/ctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ctl.c b/src/ctl.c index f2ef4e60..f278105a 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -1109,7 +1109,7 @@ epoch_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { int ret; - uint64_t newval; + UNUSED uint64_t newval; malloc_mutex_lock(&ctl_mtx); WRITE(newval, uint64_t); From 543abf7e6c7de06fe9654e91190b5c44a11b065e Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 19 Oct 2013 17:20:18 -0700 Subject: [PATCH 0301/3378] Fix inlining warning. Add the JEMALLOC_ALWAYS_INLINE_C macro and use it for always-inlined functions declared in .c files. This fixes a function attribute inconsistency for debug builds that resulted in (harmless) compiler warnings about functions not being inlinable. Reported by Ricardo Nabinger Sanchez. --- include/jemalloc/internal/jemalloc_internal.h.in | 12 ++++++++++++ src/jemalloc.c | 6 +++--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index e46ac544..53c135c2 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -232,9 +232,18 @@ static const bool config_ivsalloc = # define __DECONST(type, var) ((type)(uintptr_t)(const void *)(var)) #endif +/* + * JEMALLOC_ALWAYS_INLINE is used within header files for functions that are + * static inline functions if inlining is enabled, and single-definition + * library-private functions if inlining is disabled. + * + * JEMALLOC_ALWAYS_INLINE_C is for use in .c files, in which case the denoted + * functions are always static, regardless of whether inlining is enabled. + */ #ifdef JEMALLOC_DEBUG /* Disable inlining to make debugging easier. */ # define JEMALLOC_ALWAYS_INLINE +# define JEMALLOC_ALWAYS_INLINE_C static # define JEMALLOC_INLINE # define inline #else @@ -242,8 +251,11 @@ static const bool config_ivsalloc = # ifdef JEMALLOC_HAVE_ATTR # define JEMALLOC_ALWAYS_INLINE \ static inline JEMALLOC_ATTR(unused) JEMALLOC_ATTR(always_inline) +# define JEMALLOC_ALWAYS_INLINE_C \ + static inline JEMALLOC_ATTR(always_inline) # else # define JEMALLOC_ALWAYS_INLINE static inline +# define JEMALLOC_ALWAYS_INLINE_C static inline # endif # define JEMALLOC_INLINE static inline # ifdef _MSC_VER diff --git a/src/jemalloc.c b/src/jemalloc.c index e3991da2..ae56db6b 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -282,7 +282,7 @@ arenas_cleanup(void *arg) malloc_mutex_unlock(&arenas_lock); } -static JEMALLOC_ATTR(always_inline) void +JEMALLOC_ALWAYS_INLINE_C void malloc_thread_init(void) { @@ -299,7 +299,7 @@ malloc_thread_init(void) quarantine_alloc_hook(); } -static JEMALLOC_ATTR(always_inline) bool +JEMALLOC_ALWAYS_INLINE_C bool malloc_init(void) { @@ -1402,7 +1402,7 @@ je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, */ #ifdef JEMALLOC_EXPERIMENTAL -static JEMALLOC_ATTR(always_inline) void * +JEMALLOC_ALWAYS_INLINE_C void * iallocm(size_t usize, size_t alignment, bool zero, bool try_tcache, arena_t *arena) { From 87a02d2bb18dbcb2955541b849bc95862e864803 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 19 Oct 2013 21:40:20 -0700 Subject: [PATCH 0302/3378] Fix a Valgrind integration flaw. Fix a Valgrind integration flaw that caused Valgrind warnings about reads of uninitialized memory in arena chunk headers. --- include/jemalloc/internal/arena.h | 66 ++++++++++++------- include/jemalloc/internal/private_namespace.h | 2 + src/arena.c | 21 ++++-- 3 files changed, 57 insertions(+), 32 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index f2c18f43..bbcfedac 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -441,6 +441,7 @@ void arena_postfork_child(arena_t *arena); #ifndef JEMALLOC_ENABLE_INLINE arena_chunk_map_t *arena_mapp_get(arena_chunk_t *chunk, size_t pageind); size_t *arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbitsp_read(size_t *mapbitsp); size_t arena_mapbits_get(arena_chunk_t *chunk, size_t pageind); size_t arena_mapbits_unallocated_size_get(arena_chunk_t *chunk, size_t pageind); @@ -451,6 +452,7 @@ size_t arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind); size_t arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind); size_t arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind); size_t arena_mapbits_allocated_get(arena_chunk_t *chunk, size_t pageind); +void arena_mapbitsp_write(size_t *mapbitsp, size_t mapbits); void arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, size_t size, size_t flags); void arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind, @@ -497,11 +499,18 @@ arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind) return (&arena_mapp_get(chunk, pageind)->bits); } +JEMALLOC_ALWAYS_INLINE size_t +arena_mapbitsp_read(size_t *mapbitsp) +{ + + return (*mapbitsp); +} + JEMALLOC_ALWAYS_INLINE size_t arena_mapbits_get(arena_chunk_t *chunk, size_t pageind) { - return (*arena_mapbitsp_get(chunk, pageind)); + return (arena_mapbitsp_read(arena_mapbitsp_get(chunk, pageind))); } JEMALLOC_ALWAYS_INLINE size_t @@ -584,83 +593,90 @@ arena_mapbits_allocated_get(arena_chunk_t *chunk, size_t pageind) return (mapbits & CHUNK_MAP_ALLOCATED); } +JEMALLOC_ALWAYS_INLINE void +arena_mapbitsp_write(size_t *mapbitsp, size_t mapbits) +{ + + *mapbitsp = mapbits; +} + JEMALLOC_ALWAYS_INLINE void arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, size_t size, size_t flags) { - size_t *mapbitsp; + size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind); - mapbitsp = arena_mapbitsp_get(chunk, pageind); assert((size & PAGE_MASK) == 0); assert((flags & ~CHUNK_MAP_FLAGS_MASK) == 0); assert((flags & (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == flags); - *mapbitsp = size | CHUNK_MAP_BININD_INVALID | flags; + arena_mapbitsp_write(mapbitsp, size | CHUNK_MAP_BININD_INVALID | flags); } JEMALLOC_ALWAYS_INLINE void arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind, size_t size) { - size_t *mapbitsp; + size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind); + size_t mapbits = arena_mapbitsp_read(mapbitsp); - mapbitsp = arena_mapbitsp_get(chunk, pageind); assert((size & PAGE_MASK) == 0); - assert((*mapbitsp & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == 0); - *mapbitsp = size | (*mapbitsp & PAGE_MASK); + assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == 0); + arena_mapbitsp_write(mapbitsp, size | (mapbits & PAGE_MASK)); } JEMALLOC_ALWAYS_INLINE void arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind, size_t size, size_t flags) { - size_t *mapbitsp; + size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind); + size_t mapbits = arena_mapbitsp_read(mapbitsp); size_t unzeroed; - mapbitsp = arena_mapbitsp_get(chunk, pageind); assert((size & PAGE_MASK) == 0); assert((flags & CHUNK_MAP_DIRTY) == flags); - unzeroed = *mapbitsp & CHUNK_MAP_UNZEROED; /* Preserve unzeroed. */ - *mapbitsp = size | CHUNK_MAP_BININD_INVALID | flags | unzeroed | - CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; + unzeroed = mapbits & CHUNK_MAP_UNZEROED; /* Preserve unzeroed. */ + arena_mapbitsp_write(mapbitsp, size | CHUNK_MAP_BININD_INVALID | flags + | unzeroed | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED); } JEMALLOC_ALWAYS_INLINE void arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind, size_t binind) { - size_t *mapbitsp; + size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind); + size_t mapbits = arena_mapbitsp_read(mapbitsp); assert(binind <= BININD_INVALID); - mapbitsp = arena_mapbitsp_get(chunk, pageind); assert(arena_mapbits_large_size_get(chunk, pageind) == PAGE); - *mapbitsp = (*mapbitsp & ~CHUNK_MAP_BININD_MASK) | (binind << - CHUNK_MAP_BININD_SHIFT); + arena_mapbitsp_write(mapbitsp, (mapbits & ~CHUNK_MAP_BININD_MASK) | + (binind << CHUNK_MAP_BININD_SHIFT)); } JEMALLOC_ALWAYS_INLINE void arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, size_t runind, size_t binind, size_t flags) { - size_t *mapbitsp; + size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind); + size_t mapbits = arena_mapbitsp_read(mapbitsp); size_t unzeroed; assert(binind < BININD_INVALID); - mapbitsp = arena_mapbitsp_get(chunk, pageind); assert(pageind - runind >= map_bias); assert((flags & CHUNK_MAP_DIRTY) == flags); - unzeroed = *mapbitsp & CHUNK_MAP_UNZEROED; /* Preserve unzeroed. */ - *mapbitsp = (runind << LG_PAGE) | (binind << CHUNK_MAP_BININD_SHIFT) | - flags | unzeroed | CHUNK_MAP_ALLOCATED; + unzeroed = mapbits & CHUNK_MAP_UNZEROED; /* Preserve unzeroed. */ + arena_mapbitsp_write(mapbitsp, (runind << LG_PAGE) | (binind << + CHUNK_MAP_BININD_SHIFT) | flags | unzeroed | CHUNK_MAP_ALLOCATED); } JEMALLOC_ALWAYS_INLINE void arena_mapbits_unzeroed_set(arena_chunk_t *chunk, size_t pageind, size_t unzeroed) { - size_t *mapbitsp; + size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind); + size_t mapbits = arena_mapbitsp_read(mapbitsp); - mapbitsp = arena_mapbitsp_get(chunk, pageind); - *mapbitsp = (*mapbitsp & ~CHUNK_MAP_UNZEROED) | unzeroed; + arena_mapbitsp_write(mapbitsp, (mapbits & ~CHUNK_MAP_UNZEROED) | + unzeroed); } JEMALLOC_INLINE bool diff --git a/include/jemalloc/internal/private_namespace.h b/include/jemalloc/internal/private_namespace.h index 65de3163..cdb0b0eb 100644 --- a/include/jemalloc/internal/private_namespace.h +++ b/include/jemalloc/internal/private_namespace.h @@ -33,6 +33,8 @@ #define arena_mapbits_unzeroed_get JEMALLOC_N(arena_mapbits_unzeroed_get) #define arena_mapbits_unzeroed_set JEMALLOC_N(arena_mapbits_unzeroed_set) #define arena_mapbitsp_get JEMALLOC_N(arena_mapbitsp_get) +#define arena_mapbitsp_read JEMALLOC_N(arena_mapbitsp_read) +#define arena_mapbitsp_write JEMALLOC_N(arena_mapbitsp_write) #define arena_mapp_get JEMALLOC_N(arena_mapp_get) #define arena_maxclass JEMALLOC_N(arena_maxclass) #define arena_new JEMALLOC_N(arena_new) diff --git a/src/arena.c b/src/arena.c index 05a787f8..0a73be2a 100644 --- a/src/arena.c +++ b/src/arena.c @@ -569,17 +569,24 @@ arena_chunk_alloc(arena_t *arena) * unless the chunk is not zeroed. */ if (zero == false) { + VALGRIND_MAKE_MEM_UNDEFINED( + (void *)arena_mapp_get(chunk, map_bias+1), + (size_t)((uintptr_t) arena_mapp_get(chunk, + chunk_npages-1) - (uintptr_t)arena_mapp_get(chunk, + map_bias+1))); for (i = map_bias+1; i < chunk_npages-1; i++) arena_mapbits_unzeroed_set(chunk, i, unzeroed); - } else if (config_debug) { + } else { VALGRIND_MAKE_MEM_DEFINED( (void *)arena_mapp_get(chunk, map_bias+1), - (void *)((uintptr_t) - arena_mapp_get(chunk, chunk_npages-1) - - (uintptr_t)arena_mapp_get(chunk, map_bias+1))); - for (i = map_bias+1; i < chunk_npages-1; i++) { - assert(arena_mapbits_unzeroed_get(chunk, i) == - unzeroed); + (size_t)((uintptr_t) arena_mapp_get(chunk, + chunk_npages-1) - (uintptr_t)arena_mapp_get(chunk, + map_bias+1))); + if (config_debug) { + for (i = map_bias+1; i < chunk_npages-1; i++) { + assert(arena_mapbits_unzeroed_get(chunk, + i) == unzeroed); + } } } arena_mapbits_unallocated_set(chunk, chunk_npages-1, From ff08ef7046563ed3a2bf2bfb2acdcf91218df88e Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 19 Oct 2013 21:41:10 -0700 Subject: [PATCH 0303/3378] Update ChangeLog. --- ChangeLog | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/ChangeLog b/ChangeLog index d758be17..36614273 100644 --- a/ChangeLog +++ b/ChangeLog @@ -6,6 +6,16 @@ found in the git revision history: http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git git://canonware.com/jemalloc.git +* 3.4.1 (XXX) + + Bug fixes: + - Fix a Valgrind integration flaw that caused Valgrind warnings about reads of + uninitialized memory in arena chunk headers. + - Preserve errno during the first allocation. A readlink(2) call during + initialization fails unless /etc/malloc.conf exists, so errno was typically + set during the first allocation prior to this fix. + - Fix compilation warnings reported by gcc 4.8.1. + * 3.4.0 (June 2, 2013) This version is essentially a small bugfix release, but the addition of From dda90f59e2b67903668a2799970f64df163e9ccf Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 19 Oct 2013 23:48:40 -0700 Subject: [PATCH 0304/3378] Fix a Valgrind integration flaw. Fix a Valgrind integration flaw that caused Valgrind warnings about reads of uninitialized memory in internal zero-initialized data structures (relevant to tcache and prof code). --- ChangeLog | 7 +++++-- include/jemalloc/internal/tcache.h | 4 ++-- src/arena.c | 23 ++++++++++++++++++----- 3 files changed, 25 insertions(+), 9 deletions(-) diff --git a/ChangeLog b/ChangeLog index 36614273..9b51fd57 100644 --- a/ChangeLog +++ b/ChangeLog @@ -9,8 +9,11 @@ found in the git revision history: * 3.4.1 (XXX) Bug fixes: - - Fix a Valgrind integration flaw that caused Valgrind warnings about reads of - uninitialized memory in arena chunk headers. + - Fix Valgrind integration flaws that caused Valgrind warnings about reads of + uninitialized memory in: + + arena chunk headers + + internal zero-initialized data structures (relevant to tcache and prof + code) - Preserve errno during the first allocation. A readlink(2) call during initialization fails unless /etc/malloc.conf exists, so errno was typically set during the first allocation prior to this fix. diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index ba36204f..d4eecdee 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -313,6 +313,7 @@ tcache_alloc_small(tcache_t *tcache, size_t size, bool zero) } else if (opt_zero) memset(ret, 0, size); } + VALGRIND_MAKE_MEM_UNDEFINED(ret, size); } else { if (config_fill && opt_junk) { arena_alloc_junk_small(ret, &arena_bin_info[binind], @@ -321,7 +322,6 @@ tcache_alloc_small(tcache_t *tcache, size_t size, bool zero) VALGRIND_MAKE_MEM_UNDEFINED(ret, size); memset(ret, 0, size); } - VALGRIND_MAKE_MEM_UNDEFINED(ret, size); if (config_stats) tbin->tstats.nrequests++; @@ -368,11 +368,11 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) else if (opt_zero) memset(ret, 0, size); } + VALGRIND_MAKE_MEM_UNDEFINED(ret, size); } else { VALGRIND_MAKE_MEM_UNDEFINED(ret, size); memset(ret, 0, size); } - VALGRIND_MAKE_MEM_UNDEFINED(ret, size); if (config_stats) tbin->tstats.nrequests++; diff --git a/src/arena.c b/src/arena.c index 0a73be2a..d28b629a 100644 --- a/src/arena.c +++ b/src/arena.c @@ -368,14 +368,21 @@ arena_run_zero(arena_chunk_t *chunk, size_t run_ind, size_t npages) (npages << LG_PAGE)); } +static inline void +arena_run_page_mark_zeroed(arena_chunk_t *chunk, size_t run_ind) +{ + + VALGRIND_MAKE_MEM_DEFINED((void *)((uintptr_t)chunk + (run_ind << + LG_PAGE)), PAGE); +} + static inline void arena_run_page_validate_zeroed(arena_chunk_t *chunk, size_t run_ind) { size_t i; UNUSED size_t *p = (size_t *)((uintptr_t)chunk + (run_ind << LG_PAGE)); - VALGRIND_MAKE_MEM_DEFINED((void *)((uintptr_t)chunk + (run_ind << - LG_PAGE)), PAGE); + arena_run_page_mark_zeroed(chunk, run_ind); for (i = 0; i < PAGE / sizeof(size_t); i++) assert(p[i] == 0); } @@ -458,6 +465,9 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, } else if (config_debug) { arena_run_page_validate_zeroed( chunk, run_ind+i); + } else { + arena_run_page_mark_zeroed( + chunk, run_ind+i); } } } else { @@ -467,6 +477,9 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, */ arena_run_zero(chunk, run_ind, need_pages); } + } else { + VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk + + (run_ind << LG_PAGE)), (need_pages << LG_PAGE)); } /* @@ -508,9 +521,9 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, arena_run_page_validate_zeroed(chunk, run_ind+need_pages-1); } + VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk + + (run_ind << LG_PAGE)), (need_pages << LG_PAGE)); } - VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk + (run_ind << - LG_PAGE)), (need_pages << LG_PAGE)); } static arena_chunk_t * @@ -1465,6 +1478,7 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero) } else if (opt_zero) memset(ret, 0, size); } + VALGRIND_MAKE_MEM_UNDEFINED(ret, size); } else { if (config_fill && opt_junk) { arena_alloc_junk_small(ret, &arena_bin_info[binind], @@ -1473,7 +1487,6 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero) VALGRIND_MAKE_MEM_UNDEFINED(ret, size); memset(ret, 0, size); } - VALGRIND_MAKE_MEM_UNDEFINED(ret, size); return (ret); } From 8edaf86b67579f480343f720b89704456a20d1d6 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sun, 20 Oct 2013 14:07:18 -0700 Subject: [PATCH 0305/3378] Fix dangerous casts in tests. Fix dangerous casts of int variables to pointers in thread join function calls. On LP64 systems, int and pointers are different sizes, so writes can corrupt memory. --- test/allocated.c | 4 ++-- test/thread_arena.c | 8 ++++++-- test/thread_tcache_enabled.c | 4 ++-- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/test/allocated.c b/test/allocated.c index 9884905d..b1a9cfd9 100644 --- a/test/allocated.c +++ b/test/allocated.c @@ -104,12 +104,12 @@ main(void) je_thread_start(NULL); je_thread_create(&thread, je_thread_start, NULL); - je_thread_join(thread, (void *)&ret); + je_thread_join(thread, NULL); je_thread_start(NULL); je_thread_create(&thread, je_thread_start, NULL); - je_thread_join(thread, (void *)&ret); + je_thread_join(thread, NULL); je_thread_start(NULL); diff --git a/test/thread_arena.c b/test/thread_arena.c index c5a21fa0..6b9bc9cf 100644 --- a/test/thread_arena.c +++ b/test/thread_arena.c @@ -72,8 +72,12 @@ main(void) (void *)&arena_ind); } - for (i = 0; i < NTHREADS; i++) - je_thread_join(threads[i], (void *)&ret); + for (i = 0; i < NTHREADS; i++) { + intptr_t join_ret; + je_thread_join(threads[i], (void *)&join_ret); + if (join_ret != 0) + ret = 1; + } label_return: malloc_printf("Test end\n"); diff --git a/test/thread_tcache_enabled.c b/test/thread_tcache_enabled.c index 2061b7bb..586b5330 100644 --- a/test/thread_tcache_enabled.c +++ b/test/thread_tcache_enabled.c @@ -77,12 +77,12 @@ main(void) je_thread_start(NULL); je_thread_create(&thread, je_thread_start, NULL); - je_thread_join(thread, (void *)&ret); + je_thread_join(thread, NULL); je_thread_start(NULL); je_thread_create(&thread, je_thread_start, NULL); - je_thread_join(thread, (void *)&ret); + je_thread_join(thread, NULL); je_thread_start(NULL); From 0f1d8ec300f746d5c9618904aa1d5568a6f524b5 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sun, 20 Oct 2013 14:09:01 -0700 Subject: [PATCH 0306/3378] Fix an off-by-one flaw in a test. --- test/ALLOCM_ARENA.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/ALLOCM_ARENA.c b/test/ALLOCM_ARENA.c index 2c52485e..ca91b621 100644 --- a/test/ALLOCM_ARENA.c +++ b/test/ALLOCM_ARENA.c @@ -23,7 +23,8 @@ je_thread_start(void *arg) size_t mib[3]; size_t miblen = sizeof(mib) / sizeof(size_t); const char *dss_precs[] = {"disabled", "primary", "secondary"}; - const char *dss = dss_precs[thread_ind % 4]; + const char *dss = dss_precs[thread_ind % + (sizeof(dss_precs)/sizeof(char*))]; if (mallctlnametomib("arena.0.dss", mib, &miblen) != 0) { malloc_printf("Error in mallctlnametomib()\n"); abort(); From 7b65180b32558fc4f2bc7b6ac5602f306ed3a014 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sun, 20 Oct 2013 14:09:54 -0700 Subject: [PATCH 0307/3378] Fix a race condition in the "arenas.extend" mallctl. Fix a race condition in the "arenas.extend" mallctl that could lead to internal data structure corruption. The race could be hit if one thread called the "arenas.extend" mallctl while another thread concurrently triggered initialization of one of the lazily created arenas. --- ChangeLog | 2 ++ src/ctl.c | 84 ++++++++++++++++++++++++++++++------------------------- 2 files changed, 48 insertions(+), 38 deletions(-) diff --git a/ChangeLog b/ChangeLog index 9b51fd57..def7685e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -9,6 +9,8 @@ found in the git revision history: * 3.4.1 (XXX) Bug fixes: + - Fix a race in the "arenas.extend" mallctl that could cause memory corruption + of internal data structures and subsequent crashes. - Fix Valgrind integration flaws that caused Valgrind warnings about reads of uninitialized memory in: + arena chunk headers diff --git a/src/ctl.c b/src/ctl.c index f278105a..ebba7c25 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -546,43 +546,30 @@ ctl_arena_refresh(arena_t *arena, unsigned i) static bool ctl_grow(void) { - size_t astats_size; ctl_arena_stats_t *astats; arena_t **tarenas; - /* Extend arena stats and arenas arrays. */ - astats_size = (ctl_stats.narenas + 2) * sizeof(ctl_arena_stats_t); - if (ctl_stats.narenas == narenas_auto) { - /* ctl_stats.arenas and arenas came from base_alloc(). */ - astats = (ctl_arena_stats_t *)imalloc(astats_size); - if (astats == NULL) - return (true); - memcpy(astats, ctl_stats.arenas, (ctl_stats.narenas + 1) * - sizeof(ctl_arena_stats_t)); - - tarenas = (arena_t **)imalloc((ctl_stats.narenas + 1) * - sizeof(arena_t *)); - if (tarenas == NULL) { - idalloc(astats); - return (true); - } - memcpy(tarenas, arenas, ctl_stats.narenas * sizeof(arena_t *)); - } else { - astats = (ctl_arena_stats_t *)iralloc(ctl_stats.arenas, - astats_size, 0, 0, false, false); - if (astats == NULL) - return (true); - - tarenas = (arena_t **)iralloc(arenas, (ctl_stats.narenas + 1) * - sizeof(arena_t *), 0, 0, false, false); - if (tarenas == NULL) - return (true); - } - /* Initialize the new astats and arenas elements. */ - memset(&astats[ctl_stats.narenas + 1], 0, sizeof(ctl_arena_stats_t)); - if (ctl_arena_init(&astats[ctl_stats.narenas + 1])) + /* Allocate extended arena stats and arenas arrays. */ + astats = (ctl_arena_stats_t *)imalloc((ctl_stats.narenas + 2) * + sizeof(ctl_arena_stats_t)); + if (astats == NULL) return (true); - tarenas[ctl_stats.narenas] = NULL; + tarenas = (arena_t **)imalloc((ctl_stats.narenas + 1) * + sizeof(arena_t *)); + if (tarenas == NULL) { + idalloc(astats); + return (true); + } + + /* Initialize the new astats element. */ + memcpy(astats, ctl_stats.arenas, (ctl_stats.narenas + 1) * + sizeof(ctl_arena_stats_t)); + memset(&astats[ctl_stats.narenas + 1], 0, sizeof(ctl_arena_stats_t)); + if (ctl_arena_init(&astats[ctl_stats.narenas + 1])) { + idalloc(tarenas); + idalloc(astats); + return (true); + } /* Swap merged stats to their new location. */ { ctl_arena_stats_t tstats; @@ -593,13 +580,34 @@ ctl_grow(void) memcpy(&astats[ctl_stats.narenas + 1], &tstats, sizeof(ctl_arena_stats_t)); } + /* Initialize the new arenas element. */ + tarenas[ctl_stats.narenas] = NULL; + { + arena_t **arenas_old = arenas; + /* + * Swap extended arenas array into place. Although ctl_mtx + * protects this function from other threads extending the + * array, it does not protect from other threads mutating it + * (i.e. initializing arenas and setting array elements to + * point to them). Therefore, array copying must happen under + * the protection of arenas_lock. + */ + malloc_mutex_lock(&arenas_lock); + arenas = tarenas; + memcpy(arenas, arenas_old, ctl_stats.narenas * + sizeof(arena_t *)); + narenas_total++; + arenas_extend(narenas_total - 1); + malloc_mutex_unlock(&arenas_lock); + /* + * Deallocate arenas_old only if it came from imalloc() (not + * base_alloc()). + */ + if (ctl_stats.narenas != narenas_auto) + idalloc(arenas_old); + } ctl_stats.arenas = astats; ctl_stats.narenas++; - malloc_mutex_lock(&arenas_lock); - arenas = tarenas; - narenas_total++; - arenas_extend(narenas_total - 1); - malloc_mutex_unlock(&arenas_lock); return (false); } From d504477935151ed7befb77930f3ca64fa4d4102b Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sun, 20 Oct 2013 15:11:01 -0700 Subject: [PATCH 0308/3378] Fix a compiler warning. Fix a compiler warning in chunk_record() that was due to reading node rather than xnode. In practice this did not cause any correctness issue, but dataflow analysis in some compilers cannot tell that node and xnode are always equal in cases that the read is reached. --- src/chunk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/chunk.c b/src/chunk.c index aef3fede..b17f43f0 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -294,7 +294,7 @@ label_return: if (xnode != NULL) base_node_dealloc(xnode); if (xprev != NULL) - base_node_dealloc(prev); + base_node_dealloc(xprev); } void From 7a9c8d10b6c412338b39342026831bc444e15c7d Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sun, 20 Oct 2013 19:38:19 -0700 Subject: [PATCH 0309/3378] Update README. --- README | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/README b/README index 7661683b..9b268f42 100644 --- a/README +++ b/README @@ -1,10 +1,14 @@ -jemalloc is a general-purpose scalable concurrent malloc(3) implementation. -This distribution is a "portable" implementation that currently targets -FreeBSD, Linux, Apple OS X, and MinGW. jemalloc is included as the default -allocator in the FreeBSD and NetBSD operating systems, and it is used by the -Mozilla Firefox web browser on Microsoft Windows-related platforms. Depending -on your needs, one of the other divergent versions may suit your needs better -than this distribution. +jemalloc is a general purpose malloc(3) implementation that emphasizes +fragmentation avoidance and scalable concurrency support. jemalloc first came +into use as the FreeBSD libc allocator in 2005, and since then it has found its +way into numerous applications that rely on its predictable behavior. In 2010 +jemalloc development efforts broadened to include developer support features +such as heap profiling, Valgrind integration, and extensive monitoring/tuning +hooks. Modern jemalloc releases continue to be integrated back into FreeBSD, +and therefore versatility remains critical. Ongoing development efforts trend +toward making jemalloc among the best allocators for a broad range of demanding +applications, and eliminating/mitigating weaknesses that have practical +repercussions for real world applications. The COPYING file contains copyright and licensing information. From 0f7ba3ff2a3f05c990b369bbf67b8bcc9bfbf35b Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sun, 20 Oct 2013 19:40:09 -0700 Subject: [PATCH 0310/3378] Update ChangeLog for 3.4.1. --- ChangeLog | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index def7685e..0efc7426 100644 --- a/ChangeLog +++ b/ChangeLog @@ -6,7 +6,7 @@ found in the git revision history: http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git git://canonware.com/jemalloc.git -* 3.4.1 (XXX) +* 3.4.1 (October 20, 2013) Bug fixes: - Fix a race in the "arenas.extend" mallctl that could cause memory corruption From 6556e28be15d9acd8f3835fb9fad90145e1edbff Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 21 Oct 2013 14:56:27 -0700 Subject: [PATCH 0311/3378] Prefer not_reached() over assert(false) where appropriate. --- include/jemalloc/internal/util.h | 12 ++++++------ src/jemalloc.c | 3 +-- src/prof.c | 6 +++--- src/util.c | 2 +- src/zone.c | 2 +- 5 files changed, 12 insertions(+), 13 deletions(-) diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h index 84796936..969e3e39 100644 --- a/include/jemalloc/internal/util.h +++ b/include/jemalloc/internal/util.h @@ -42,12 +42,6 @@ } while (0) #endif -/* Use to assert a particular configuration, e.g., cassert(config_debug). */ -#define cassert(c) do { \ - if ((c) == false) \ - assert(false); \ -} while (0) - #ifndef not_reached #define not_reached() do { \ if (config_debug) { \ @@ -74,6 +68,12 @@ not_implemented(); \ } while (0) +/* Use to assert a particular configuration, e.g., cassert(config_debug). */ +#define cassert(c) do { \ + if ((c) == false) \ + not_reached(); \ +} while (0) + #endif /* JEMALLOC_H_TYPES */ /******************************************************************************/ #ifdef JEMALLOC_H_STRUCTS diff --git a/src/jemalloc.c b/src/jemalloc.c index ae56db6b..5cbfc3f6 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -484,8 +484,7 @@ malloc_conf_init(void) } break; } default: - /* NOTREACHED */ - assert(false); + not_reached(); buf[0] = '\0'; opts = buf; } diff --git a/src/prof.c b/src/prof.c index c133b95c..4a12d1f9 100644 --- a/src/prof.c +++ b/src/prof.c @@ -423,7 +423,7 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore) { cassert(config_prof); - assert(false); + not_reached(); } #endif @@ -511,7 +511,7 @@ prof_lookup(prof_bt_t *bt) assert(ret.v != NULL); if (ckh_remove(&prof_tdata->bt2cnt, ret.p->ctx->bt, NULL, NULL)) - assert(false); + not_reached(); ql_remove(&prof_tdata->lru_ql, ret.p, lru_link); prof_ctx_merge(ret.p->ctx, ret.p); /* ret can now be re-used. */ @@ -695,7 +695,7 @@ prof_ctx_destroy(prof_ctx_t *ctx) assert(ctx->cnt_merged.accumbytes == 0); /* Remove ctx from bt2ctx. */ if (ckh_remove(&bt2ctx, ctx->bt, NULL, NULL)) - assert(false); + not_reached(); prof_leave(prof_tdata); /* Destroy ctx. */ malloc_mutex_unlock(ctx->lock); diff --git a/src/util.c b/src/util.c index b3a01143..679fa763 100644 --- a/src/util.c +++ b/src/util.c @@ -554,7 +554,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) f++; break; } - default: not_implemented(); + default: not_reached(); } break; } default: { diff --git a/src/zone.c b/src/zone.c index c62c183f..e0302ef4 100644 --- a/src/zone.c +++ b/src/zone.c @@ -137,7 +137,7 @@ zone_destroy(malloc_zone_t *zone) { /* This function should never be called. */ - assert(false); + not_reached(); return (NULL); } From f1c3da8b02489b7728d4275ac011336299eace62 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 21 Oct 2013 14:59:10 -0700 Subject: [PATCH 0312/3378] Consistently use malloc_mutex_prefork(). Consistently use malloc_mutex_prefork() instead of malloc_mutex_lock() in all prefork functions. --- src/chunk.c | 2 +- src/ctl.c | 2 +- src/prof.c | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/chunk.c b/src/chunk.c index b17f43f0..a93d28aa 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -368,7 +368,7 @@ void chunk_prefork(void) { - malloc_mutex_lock(&chunks_mtx); + malloc_mutex_prefork(&chunks_mtx); if (config_ivsalloc) rtree_prefork(chunks_rtree); chunk_dss_prefork(); diff --git a/src/ctl.c b/src/ctl.c index ebba7c25..68ffba35 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -929,7 +929,7 @@ void ctl_prefork(void) { - malloc_mutex_lock(&ctl_mtx); + malloc_mutex_prefork(&ctl_mtx); } void diff --git a/src/prof.c b/src/prof.c index 4a12d1f9..c0dad860 100644 --- a/src/prof.c +++ b/src/prof.c @@ -1245,10 +1245,10 @@ prof_prefork(void) if (opt_prof) { unsigned i; - malloc_mutex_lock(&bt2ctx_mtx); - malloc_mutex_lock(&prof_dump_seq_mtx); + malloc_mutex_prefork(&bt2ctx_mtx); + malloc_mutex_prefork(&prof_dump_seq_mtx); for (i = 0; i < PROF_NCTX_LOCKS; i++) - malloc_mutex_lock(&ctx_locks[i]); + malloc_mutex_prefork(&ctx_locks[i]); } } From 30e7cb11186554eb3ee860856eb5b8d541d7740c Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 21 Oct 2013 15:00:06 -0700 Subject: [PATCH 0313/3378] Fix a data race for large allocation stats counters. Reported by Pat Lynch. --- src/tcache.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/tcache.c b/src/tcache.c index 98ed19ed..88ec4810 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -260,8 +260,8 @@ tcache_arena_dissociate(tcache_t *tcache) /* Unlink from list of extant tcaches. */ malloc_mutex_lock(&tcache->arena->lock); ql_remove(&tcache->arena->tcache_ql, tcache, link); - malloc_mutex_unlock(&tcache->arena->lock); tcache_stats_merge(tcache, tcache->arena); + malloc_mutex_unlock(&tcache->arena->lock); } } @@ -399,11 +399,14 @@ tcache_thread_cleanup(void *arg) } } +/* Caller must own arena->lock. */ void tcache_stats_merge(tcache_t *tcache, arena_t *arena) { unsigned i; + cassert(config_stats); + /* Merge and reset tcache stats. */ for (i = 0; i < NBINS; i++) { arena_bin_t *bin = &arena->bins[i]; From e2985a23819670866c041ba07964099eeb9e0e07 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 21 Oct 2013 15:01:44 -0700 Subject: [PATCH 0314/3378] Avoid (x < 0) comparison for unsigned x. Avoid (min < 0) comparison for unsigned min in malloc_conf_init(). This bug had no practical consequences. Reported by Pat Lynch. --- src/jemalloc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/jemalloc.c b/src/jemalloc.c index 5cbfc3f6..eb4bb7b3 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -521,14 +521,15 @@ malloc_conf_init(void) "Invalid conf value", \ k, klen, v, vlen); \ } else if (clip) { \ - if (um < min) \ + if (min != 0 && um < min) \ o = min; \ else if (um > max) \ o = max; \ else \ o = um; \ } else { \ - if (um < min || um > max) { \ + if ((min != 0 && um < min) || \ + um > max) { \ malloc_conf_error( \ "Out-of-range " \ "conf value", \ From 1d1cee127aebc6ca25207435ddc6ae5d9bb90d41 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 21 Oct 2013 15:04:12 -0700 Subject: [PATCH 0315/3378] Add a missing mutex unlock in malloc_init_hard() error path. Add a missing mutex unlock in a malloc_init_hard() error path (failed mutex initialization). In practice this bug was very unlikely to ever trigger, but if it did, application deadlock would likely result. Reported by Pat Lynch. --- src/jemalloc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/jemalloc.c b/src/jemalloc.c index eb4bb7b3..0014a34a 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -745,8 +745,10 @@ malloc_init_hard(void) return (true); } - if (malloc_mutex_init(&arenas_lock)) + if (malloc_mutex_init(&arenas_lock)) { + malloc_mutex_unlock(&init_lock); return (true); + } /* * Create enough scaffolding to allow recursive allocation in From 93f39f8d23bf0a8554b16962a43dd75258e8e337 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 21 Oct 2013 15:07:40 -0700 Subject: [PATCH 0316/3378] Fix a file descriptor leak in a prof_dump_maps() error path. Reported by Pat Lynch. --- src/prof.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/src/prof.c b/src/prof.c index c0dad860..d246a32c 100644 --- a/src/prof.c +++ b/src/prof.c @@ -794,6 +794,7 @@ prof_dump_ctx(bool propagate_err, prof_ctx_t *ctx, prof_bt_t *bt) static bool prof_dump_maps(bool propagate_err) { + bool ret; int mfd; char filename[PATH_MAX + 1]; @@ -806,24 +807,34 @@ prof_dump_maps(bool propagate_err) ssize_t nread; if (prof_write(propagate_err, "\nMAPPED_LIBRARIES:\n") && - propagate_err) - return (true); + propagate_err) { + ret = true; + goto label_return; + } nread = 0; do { prof_dump_buf_end += nread; if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) { /* Make space in prof_dump_buf before read(). */ - if (prof_flush(propagate_err) && propagate_err) - return (true); + if (prof_flush(propagate_err) && + propagate_err) { + ret = true; + goto label_return; + } } nread = read(mfd, &prof_dump_buf[prof_dump_buf_end], PROF_DUMP_BUFSIZE - prof_dump_buf_end); } while (nread > 0); - close(mfd); - } else - return (true); + } else { + ret = true; + goto label_return; + } - return (false); + ret = false; +label_return: + if (mfd != -1) + close(mfd); + return (ret); } static bool From ac4403cacb225c0cf2c926179af39c21bd7bfc3a Mon Sep 17 00:00:00 2001 From: Leonard Crestez Date: Tue, 22 Oct 2013 00:11:09 +0300 Subject: [PATCH 0317/3378] Delay pthread_atfork registering. This function causes recursive allocation on LinuxThreads. Signed-off-by: Crestez Dan Leonard --- src/jemalloc.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/src/jemalloc.c b/src/jemalloc.c index 0014a34a..972f4546 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -695,17 +695,6 @@ malloc_init_hard(void) malloc_conf_init(); -#if (!defined(JEMALLOC_MUTEX_INIT_CB) && !defined(JEMALLOC_ZONE) \ - && !defined(_WIN32)) - /* Register fork handlers. */ - if (pthread_atfork(jemalloc_prefork, jemalloc_postfork_parent, - jemalloc_postfork_child) != 0) { - malloc_write(": Error in pthread_atfork()\n"); - if (opt_abort) - abort(); - } -#endif - if (opt_stats_print) { /* Print statistics at exit. */ if (atexit(stats_print_atexit) != 0) { @@ -794,9 +783,25 @@ malloc_init_hard(void) return (true); } - /* Get number of CPUs. */ malloc_mutex_unlock(&init_lock); + /**********************************************************************/ + /* Recursive allocation may follow. */ + ncpus = malloc_ncpus(); + +#if (!defined(JEMALLOC_MUTEX_INIT_CB) && !defined(JEMALLOC_ZONE) \ + && !defined(_WIN32)) + /* LinuxThreads's pthread_atfork() allocates. */ + if (pthread_atfork(jemalloc_prefork, jemalloc_postfork_parent, + jemalloc_postfork_child) != 0) { + malloc_write(": Error in pthread_atfork()\n"); + if (opt_abort) + abort(); + } +#endif + + /* Done recursively allocating. */ + /**********************************************************************/ malloc_mutex_lock(&init_lock); if (mutex_boot()) { @@ -843,6 +848,7 @@ malloc_init_hard(void) malloc_initialized = true; malloc_mutex_unlock(&init_lock); + return (false); } From cb17fc6a8f1ce29be18de7af6d03e66056751fb2 Mon Sep 17 00:00:00 2001 From: Leonard Crestez Date: Tue, 22 Oct 2013 00:12:16 +0300 Subject: [PATCH 0318/3378] Add support for LinuxThreads. When using LinuxThreads pthread_setspecific triggers recursive allocation on all threads. Work around this by creating a global linked list of in-progress tsd initializations. This modifies the _tsd_get_wrapper macro-generated function. When it has to initialize an TSD object it will push the item to the linked list first. If this causes a recursive allocation then the _get_wrapper request is satisfied from the list. When pthread_setspecific returns the item is removed from the list. This effectively adds a very poor substitute for real TLS used only during pthread_setspecific allocation recursion. Signed-off-by: Crestez Dan Leonard --- include/jemalloc/internal/tsd.h | 37 +++++++++++++++++++++++++++++++++ src/tsd.c | 34 ++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+) diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h index 0037cf35..eec14445 100644 --- a/include/jemalloc/internal/tsd.h +++ b/include/jemalloc/internal/tsd.h @@ -6,6 +6,12 @@ typedef bool (*malloc_tsd_cleanup_t)(void); +#if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \ + !defined(_WIN32)) +typedef struct tsd_init_block_s tsd_init_block_t; +typedef struct tsd_init_head_s tsd_init_head_t; +#endif + /* * TLS/TSD-agnostic macro-based implementation of thread-specific data. There * are four macros that support (at least) three use cases: file-private, @@ -81,6 +87,7 @@ extern bool a_name##_booted; #else #define malloc_tsd_externs(a_name, a_type) \ extern pthread_key_t a_name##_tsd; \ +extern tsd_init_head_t a_name##_tsd_init_head; \ extern bool a_name##_booted; #endif @@ -105,6 +112,10 @@ a_attr bool a_name##_booted = false; #else #define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ a_attr pthread_key_t a_name##_tsd; \ +a_attr tsd_init_head_t a_name##_tsd_init_head = { \ + ql_head_initializer(blocks), \ + MALLOC_MUTEX_INITIALIZER \ +}; \ a_attr bool a_name##_booted = false; #endif @@ -333,8 +344,14 @@ a_name##_tsd_get_wrapper(void) \ pthread_getspecific(a_name##_tsd); \ \ if (wrapper == NULL) { \ + tsd_init_block_t block; \ + wrapper = tsd_init_check_recursion( \ + &a_name##_tsd_init_head, &block); \ + if (wrapper) \ + return wrapper; \ wrapper = (a_name##_tsd_wrapper_t *) \ malloc_tsd_malloc(sizeof(a_name##_tsd_wrapper_t)); \ + block.data = wrapper; \ if (wrapper == NULL) { \ malloc_write(": Error allocating" \ " TSD for "#a_name"\n"); \ @@ -350,6 +367,7 @@ a_name##_tsd_get_wrapper(void) \ " TSD for "#a_name"\n"); \ abort(); \ } \ + tsd_init_finish(&a_name##_tsd_init_head, &block); \ } \ return (wrapper); \ } \ @@ -379,6 +397,19 @@ a_name##_tsd_set(a_type *val) \ /******************************************************************************/ #ifdef JEMALLOC_H_STRUCTS +#if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \ + !defined(_WIN32)) +struct tsd_init_block_s { + ql_elm(tsd_init_block_t) link; + pthread_t thread; + void *data; +}; +struct tsd_init_head_s { + ql_head(tsd_init_block_t) blocks; + malloc_mutex_t lock; +}; +#endif + #endif /* JEMALLOC_H_STRUCTS */ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS @@ -388,6 +419,12 @@ void malloc_tsd_dalloc(void *wrapper); void malloc_tsd_no_cleanup(void *); void malloc_tsd_cleanup_register(bool (*f)(void)); void malloc_tsd_boot(void); +#if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \ + !defined(_WIN32)) +void *tsd_init_check_recursion(tsd_init_head_t *head, + tsd_init_block_t *block); +void tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block); +#endif #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ diff --git a/src/tsd.c b/src/tsd.c index 961a5463..59e8da32 100644 --- a/src/tsd.c +++ b/src/tsd.c @@ -105,3 +105,37 @@ JEMALLOC_SECTION(".CRT$XLY") JEMALLOC_ATTR(used) static const BOOL (WINAPI *tls_callback)(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) = _tls_callback; #endif + +#if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \ + !defined(_WIN32)) +void * +tsd_init_check_recursion(tsd_init_head_t *head, tsd_init_block_t *block) +{ + pthread_t self = pthread_self(); + tsd_init_block_t *iter; + + /* Check whether this thread has already inserted into the list. */ + malloc_mutex_lock(&head->lock); + ql_foreach(iter, &head->blocks, link) { + if (iter->thread == self) { + malloc_mutex_unlock(&head->lock); + return (iter->data); + } + } + /* Insert block into list. */ + ql_elm_new(block, link); + block->thread = self; + ql_tail_insert(&head->blocks, block, link); + malloc_mutex_unlock(&head->lock); + return (NULL); +} + +void +tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block) +{ + + malloc_mutex_lock(&head->lock); + ql_remove(&head->blocks, block, link); + malloc_mutex_unlock(&head->lock); +} +#endif From 239692b18edb5bc1b2557f1e215509072866d2bb Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 28 Oct 2013 12:41:37 -0700 Subject: [PATCH 0319/3378] Fix whitespace. --- src/prof.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/prof.c b/src/prof.c index d246a32c..40718968 100644 --- a/src/prof.c +++ b/src/prof.c @@ -895,7 +895,7 @@ prof_dump(bool propagate_err, const char *filename, bool leakcheck) goto label_error; } - /* Dump per ctx profile stats. */ + /* Dump per ctx profile stats. */ for (tabind = 0; ckh_iter(&bt2ctx, &tabind, &bt.v, &ctx.v) == false;) { if (prof_dump_ctx(propagate_err, ctx.p, bt.p)) From 96eeaec5dd5ac4d11be36945240fa823abc0c3f9 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 28 Oct 2013 12:44:16 -0700 Subject: [PATCH 0320/3378] Update autoconf support files. --- config.guess | 184 +++++++++++++++++++++++++++++---------------------- config.sub | 110 +++++++++++++++++------------- 2 files changed, 171 insertions(+), 123 deletions(-) diff --git a/config.guess b/config.guess index d622a44e..b79252d6 100755 --- a/config.guess +++ b/config.guess @@ -1,14 +1,12 @@ #! /bin/sh # Attempt to guess a canonical system name. -# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, -# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, -# 2011, 2012 Free Software Foundation, Inc. +# Copyright 1992-2013 Free Software Foundation, Inc. -timestamp='2012-02-10' +timestamp='2013-06-10' # This file is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or +# the Free Software Foundation; either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but @@ -22,19 +20,17 @@ timestamp='2012-02-10' # As a special exception to the GNU General Public License, if you # distribute this file as part of a program that contains a # configuration script generated by Autoconf, you may include it under -# the same distribution terms that you use for the rest of that program. - - -# Originally written by Per Bothner. Please send patches (context -# diff format) to and include a ChangeLog -# entry. +# the same distribution terms that you use for the rest of that +# program. This Exception is an additional permission under section 7 +# of the GNU General Public License, version 3 ("GPLv3"). # -# This script attempts to guess a canonical system name similar to -# config.sub. If it succeeds, it prints the system name on stdout, and -# exits with 0. Otherwise, it exits with 1. +# Originally written by Per Bothner. # # You can get the latest version of this script from: # http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD +# +# Please send patches with a ChangeLog entry to config-patches@gnu.org. + me=`echo "$0" | sed -e 's,.*/,,'` @@ -54,9 +50,7 @@ version="\ GNU config.guess ($timestamp) Originally written by Per Bothner. -Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, -2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 -Free Software Foundation, Inc. +Copyright 1992-2013 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." @@ -138,6 +132,27 @@ UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown +case "${UNAME_SYSTEM}" in +Linux|GNU|GNU/*) + # If the system lacks a compiler, then just pick glibc. + # We could probably try harder. + LIBC=gnu + + eval $set_cc_for_build + cat <<-EOF > $dummy.c + #include + #if defined(__UCLIBC__) + LIBC=uclibc + #elif defined(__dietlibc__) + LIBC=dietlibc + #else + LIBC=gnu + #endif + EOF + eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC'` + ;; +esac + # Note: order is significant - the case branches are not exclusive. case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in @@ -200,6 +215,10 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in # CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used. echo "${machine}-${os}${release}" exit ;; + *:Bitrig:*:*) + UNAME_MACHINE_ARCH=`arch | sed 's/Bitrig.//'` + echo ${UNAME_MACHINE_ARCH}-unknown-bitrig${UNAME_RELEASE} + exit ;; *:OpenBSD:*:*) UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'` echo ${UNAME_MACHINE_ARCH}-unknown-openbsd${UNAME_RELEASE} @@ -302,7 +321,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*) echo arm-acorn-riscix${UNAME_RELEASE} exit ;; - arm:riscos:*:*|arm:RISCOS:*:*) + arm*:riscos:*:*|arm*:RISCOS:*:*) echo arm-unknown-riscos exit ;; SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*) @@ -801,6 +820,9 @@ EOF i*:CYGWIN*:*) echo ${UNAME_MACHINE}-pc-cygwin exit ;; + *:MINGW64*:*) + echo ${UNAME_MACHINE}-pc-mingw64 + exit ;; *:MINGW*:*) echo ${UNAME_MACHINE}-pc-mingw32 exit ;; @@ -852,21 +874,21 @@ EOF exit ;; *:GNU:*:*) # the GNU system - echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-gnu`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'` + echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-${LIBC}`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'` exit ;; *:GNU/*:*:*) # other systems with GNU libc and userland - echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-gnu + echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-${LIBC} exit ;; i*86:Minix:*:*) echo ${UNAME_MACHINE}-pc-minix exit ;; aarch64:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} exit ;; aarch64_be:Linux:*:*) UNAME_MACHINE=aarch64_be - echo ${UNAME_MACHINE}-unknown-linux-gnu + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} exit ;; alpha:Linux:*:*) case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in @@ -879,59 +901,54 @@ EOF EV68*) UNAME_MACHINE=alphaev68 ;; esac objdump --private-headers /bin/sh | grep -q ld.so.1 - if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi - echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC} + if test "$?" = 0 ; then LIBC="gnulibc1" ; fi + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + arc:Linux:*:* | arceb:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} exit ;; arm*:Linux:*:*) eval $set_cc_for_build if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \ | grep -q __ARM_EABI__ then - echo ${UNAME_MACHINE}-unknown-linux-gnu + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} else if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \ | grep -q __ARM_PCS_VFP then - echo ${UNAME_MACHINE}-unknown-linux-gnueabi + echo ${UNAME_MACHINE}-unknown-linux-${LIBC}eabi else - echo ${UNAME_MACHINE}-unknown-linux-gnueabihf + echo ${UNAME_MACHINE}-unknown-linux-${LIBC}eabihf fi fi exit ;; avr32*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} exit ;; cris:Linux:*:*) - echo ${UNAME_MACHINE}-axis-linux-gnu + echo ${UNAME_MACHINE}-axis-linux-${LIBC} exit ;; crisv32:Linux:*:*) - echo ${UNAME_MACHINE}-axis-linux-gnu + echo ${UNAME_MACHINE}-axis-linux-${LIBC} exit ;; frv:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} exit ;; hexagon:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} exit ;; i*86:Linux:*:*) - LIBC=gnu - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c - #ifdef __dietlibc__ - LIBC=dietlibc - #endif -EOF - eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC'` - echo "${UNAME_MACHINE}-pc-linux-${LIBC}" + echo ${UNAME_MACHINE}-pc-linux-${LIBC} exit ;; ia64:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} exit ;; m32r*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} exit ;; m68*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} exit ;; mips:Linux:*:* | mips64:Linux:*:*) eval $set_cc_for_build @@ -950,54 +967,63 @@ EOF #endif EOF eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'` - test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; } + test x"${CPU}" != x && { echo "${CPU}-unknown-linux-${LIBC}"; exit; } ;; + or1k:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; or32:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} exit ;; padre:Linux:*:*) - echo sparc-unknown-linux-gnu + echo sparc-unknown-linux-${LIBC} exit ;; parisc64:Linux:*:* | hppa64:Linux:*:*) - echo hppa64-unknown-linux-gnu + echo hppa64-unknown-linux-${LIBC} exit ;; parisc:Linux:*:* | hppa:Linux:*:*) # Look for CPU level case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in - PA7*) echo hppa1.1-unknown-linux-gnu ;; - PA8*) echo hppa2.0-unknown-linux-gnu ;; - *) echo hppa-unknown-linux-gnu ;; + PA7*) echo hppa1.1-unknown-linux-${LIBC} ;; + PA8*) echo hppa2.0-unknown-linux-${LIBC} ;; + *) echo hppa-unknown-linux-${LIBC} ;; esac exit ;; ppc64:Linux:*:*) - echo powerpc64-unknown-linux-gnu + echo powerpc64-unknown-linux-${LIBC} exit ;; ppc:Linux:*:*) - echo powerpc-unknown-linux-gnu + echo powerpc-unknown-linux-${LIBC} + exit ;; + ppc64le:Linux:*:*) + echo powerpc64le-unknown-linux-${LIBC} + exit ;; + ppcle:Linux:*:*) + echo powerpcle-unknown-linux-${LIBC} exit ;; s390:Linux:*:* | s390x:Linux:*:*) - echo ${UNAME_MACHINE}-ibm-linux + echo ${UNAME_MACHINE}-ibm-linux-${LIBC} exit ;; sh64*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} exit ;; sh*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} exit ;; sparc:Linux:*:* | sparc64:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} exit ;; tile*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} exit ;; vax:Linux:*:*) - echo ${UNAME_MACHINE}-dec-linux-gnu + echo ${UNAME_MACHINE}-dec-linux-${LIBC} exit ;; x86_64:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} exit ;; xtensa*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} exit ;; i*86:DYNIX/ptx:4*:*) # ptx 4.0 does uname -s correctly, with DYNIX/ptx in there. @@ -1201,6 +1227,9 @@ EOF BePC:Haiku:*:*) # Haiku running on Intel PC compatible. echo i586-pc-haiku exit ;; + x86_64:Haiku:*:*) + echo x86_64-unknown-haiku + exit ;; SX-4:SUPER-UX:*:*) echo sx4-nec-superux${UNAME_RELEASE} exit ;; @@ -1227,19 +1256,21 @@ EOF exit ;; *:Darwin:*:*) UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown - case $UNAME_PROCESSOR in - i386) - eval $set_cc_for_build - if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then - if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \ - (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \ - grep IS_64BIT_ARCH >/dev/null - then - UNAME_PROCESSOR="x86_64" - fi - fi ;; - unknown) UNAME_PROCESSOR=powerpc ;; - esac + eval $set_cc_for_build + if test "$UNAME_PROCESSOR" = unknown ; then + UNAME_PROCESSOR=powerpc + fi + if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then + if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \ + (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \ + grep IS_64BIT_ARCH >/dev/null + then + case $UNAME_PROCESSOR in + i386) UNAME_PROCESSOR=x86_64 ;; + powerpc) UNAME_PROCESSOR=powerpc64 ;; + esac + fi + fi echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE} exit ;; *:procnto*:*:* | *:QNX:[0123456789]*:*) @@ -1256,7 +1287,7 @@ EOF NEO-?:NONSTOP_KERNEL:*:*) echo neo-tandem-nsk${UNAME_RELEASE} exit ;; - NSE-?:NONSTOP_KERNEL:*:*) + NSE-*:NONSTOP_KERNEL:*:*) echo nse-tandem-nsk${UNAME_RELEASE} exit ;; NSR-?:NONSTOP_KERNEL:*:*) @@ -1330,9 +1361,6 @@ EOF exit ;; esac -#echo '(No uname command or uname output not recognized.)' 1>&2 -#echo "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" 1>&2 - eval $set_cc_for_build cat >$dummy.c <. @@ -26,11 +20,12 @@ timestamp='2012-02-10' # As a special exception to the GNU General Public License, if you # distribute this file as part of a program that contains a # configuration script generated by Autoconf, you may include it under -# the same distribution terms that you use for the rest of that program. +# the same distribution terms that you use for the rest of that +# program. This Exception is an additional permission under section 7 +# of the GNU General Public License, version 3 ("GPLv3"). -# Please send patches to . Submit a context -# diff and a properly formatted GNU ChangeLog entry. +# Please send patches with a ChangeLog entry to config-patches@gnu.org. # # Configuration subroutine to validate and canonicalize a configuration type. # Supply the specified configuration type as an argument. @@ -73,9 +68,7 @@ Report bugs and patches to ." version="\ GNU config.sub ($timestamp) -Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, -2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 -Free Software Foundation, Inc. +Copyright 1992-2013 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." @@ -123,7 +116,7 @@ esac maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'` case $maybe_os in nto-qnx* | linux-gnu* | linux-android* | linux-dietlibc | linux-newlib* | \ - linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \ + linux-musl* | linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \ knetbsd*-gnu* | netbsd*-gnu* | \ kopensolaris*-gnu* | \ storm-chaos* | os2-emx* | rtmk-nova*) @@ -156,7 +149,7 @@ case $os in -convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\ -c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \ -harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \ - -apple | -axis | -knuth | -cray | -microblaze) + -apple | -axis | -knuth | -cray | -microblaze*) os= basic_machine=$1 ;; @@ -225,6 +218,12 @@ case $os in -isc*) basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` ;; + -lynx*178) + os=-lynxos178 + ;; + -lynx*5) + os=-lynxos5 + ;; -lynx*) os=-lynxos ;; @@ -253,10 +252,12 @@ case $basic_machine in | alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \ | alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \ | am33_2.0 \ - | arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr | avr32 \ - | be32 | be64 \ + | arc | arceb \ + | arm | arm[bl]e | arme[lb] | armv[2-8] | armv[3-8][lb] | armv7[arm] \ + | avr | avr32 \ + | be32 | be64 \ | bfin \ - | c4x | clipper \ + | c4x | c8051 | clipper \ | d10v | d30v | dlx | dsp16xx \ | epiphany \ | fido | fr30 | frv \ @@ -264,10 +265,11 @@ case $basic_machine in | hexagon \ | i370 | i860 | i960 | ia64 \ | ip2k | iq2000 \ + | k1om \ | le32 | le64 \ | lm32 \ | m32c | m32r | m32rle | m68000 | m68k | m88k \ - | maxq | mb | microblaze | mcore | mep | metag \ + | maxq | mb | microblaze | microblazeel | mcore | mep | metag \ | mips | mipsbe | mipseb | mipsel | mipsle \ | mips16 \ | mips64 | mips64el \ @@ -285,16 +287,17 @@ case $basic_machine in | mipsisa64r2 | mipsisa64r2el \ | mipsisa64sb1 | mipsisa64sb1el \ | mipsisa64sr71k | mipsisa64sr71kel \ + | mipsr5900 | mipsr5900el \ | mipstx39 | mipstx39el \ | mn10200 | mn10300 \ | moxie \ | mt \ | msp430 \ | nds32 | nds32le | nds32be \ - | nios | nios2 \ + | nios | nios2 | nios2eb | nios2el \ | ns16k | ns32k \ | open8 \ - | or32 \ + | or1k | or32 \ | pdp10 | pdp11 | pj | pjl \ | powerpc | powerpc64 | powerpc64le | powerpcle \ | pyramid \ @@ -322,7 +325,7 @@ case $basic_machine in c6x) basic_machine=tic6x-unknown ;; - m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | picochip) + m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | nvptx | picochip) basic_machine=$basic_machine-unknown os=-none ;; @@ -364,13 +367,13 @@ case $basic_machine in | aarch64-* | aarch64_be-* \ | alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \ | alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \ - | alphapca5[67]-* | alpha64pca5[67]-* | arc-* \ + | alphapca5[67]-* | alpha64pca5[67]-* | arc-* | arceb-* \ | arm-* | armbe-* | armle-* | armeb-* | armv*-* \ | avr-* | avr32-* \ | be32-* | be64-* \ | bfin-* | bs2000-* \ | c[123]* | c30-* | [cjt]90-* | c4x-* \ - | clipper-* | craynv-* | cydra-* \ + | c8051-* | clipper-* | craynv-* | cydra-* \ | d10v-* | d30v-* | dlx-* \ | elxsi-* \ | f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \ @@ -379,11 +382,13 @@ case $basic_machine in | hexagon-* \ | i*86-* | i860-* | i960-* | ia64-* \ | ip2k-* | iq2000-* \ + | k1om-* \ | le32-* | le64-* \ | lm32-* \ | m32c-* | m32r-* | m32rle-* \ | m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \ - | m88110-* | m88k-* | maxq-* | mcore-* | metag-* | microblaze-* \ + | m88110-* | m88k-* | maxq-* | mcore-* | metag-* \ + | microblaze-* | microblazeel-* \ | mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \ | mips16-* \ | mips64-* | mips64el-* \ @@ -401,12 +406,13 @@ case $basic_machine in | mipsisa64r2-* | mipsisa64r2el-* \ | mipsisa64sb1-* | mipsisa64sb1el-* \ | mipsisa64sr71k-* | mipsisa64sr71kel-* \ + | mipsr5900-* | mipsr5900el-* \ | mipstx39-* | mipstx39el-* \ | mmix-* \ | mt-* \ | msp430-* \ | nds32-* | nds32le-* | nds32be-* \ - | nios-* | nios2-* \ + | nios-* | nios2-* | nios2eb-* | nios2el-* \ | none-* | np1-* | ns16k-* | ns32k-* \ | open8-* \ | orion-* \ @@ -782,11 +788,15 @@ case $basic_machine in basic_machine=ns32k-utek os=-sysv ;; - microblaze) + microblaze*) basic_machine=microblaze-xilinx ;; + mingw64) + basic_machine=x86_64-pc + os=-mingw64 + ;; mingw32) - basic_machine=i386-pc + basic_machine=i686-pc os=-mingw32 ;; mingw32ce) @@ -822,7 +832,7 @@ case $basic_machine in basic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'` ;; msys) - basic_machine=i386-pc + basic_machine=i686-pc os=-msys ;; mvs) @@ -1013,7 +1023,11 @@ case $basic_machine in basic_machine=i586-unknown os=-pw32 ;; - rdos) + rdos | rdos64) + basic_machine=x86_64-pc + os=-rdos + ;; + rdos32) basic_machine=i386-pc os=-rdos ;; @@ -1340,21 +1354,21 @@ case $os in -gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \ | -*vms* | -sco* | -esix* | -isc* | -aix* | -cnk* | -sunos | -sunos[34]*\ | -hpux* | -unos* | -osf* | -luna* | -dgux* | -auroraux* | -solaris* \ - | -sym* | -kopensolaris* \ + | -sym* | -kopensolaris* | -plan9* \ | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \ | -aos* | -aros* \ | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \ | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \ | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* \ - | -openbsd* | -solidbsd* \ + | -bitrig* | -openbsd* | -solidbsd* \ | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \ | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \ | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \ | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \ | -chorusos* | -chorusrdb* | -cegcc* \ | -cygwin* | -msys* | -pe* | -psos* | -moss* | -proelf* | -rtems* \ - | -mingw32* | -linux-gnu* | -linux-android* \ - | -linux-newlib* | -linux-uclibc* \ + | -mingw32* | -mingw64* | -linux-gnu* | -linux-android* \ + | -linux-newlib* | -linux-musl* | -linux-uclibc* \ | -uxpv* | -beos* | -mpeix* | -udk* \ | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \ | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \ @@ -1486,9 +1500,6 @@ case $os in -aros*) os=-aros ;; - -kaos*) - os=-kaos - ;; -zvmoe) os=-zvmoe ;; @@ -1537,6 +1548,12 @@ case $basic_machine in c4x-* | tic4x-*) os=-coff ;; + c8051-*) + os=-elf + ;; + hexagon-*) + os=-elf + ;; tic54x-*) os=-coff ;; @@ -1577,6 +1594,9 @@ case $basic_machine in mips*-*) os=-elf ;; + or1k-*) + os=-elf + ;; or32-*) os=-coff ;; From c368f8c8a243248feb7771f4d32691e7b2aa6f1a Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 29 Oct 2013 18:17:42 -0700 Subject: [PATCH 0321/3378] Remove unnecessary zeroing in arena_palloc(). --- include/jemalloc/internal/arena.h | 11 +-- src/arena.c | 114 +++++++++++++++++++----------- 2 files changed, 78 insertions(+), 47 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index bbcfedac..075c263a 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -174,11 +174,12 @@ struct arena_chunk_s { size_t nruns_avail; /* - * Number of available run adjacencies. Clean and dirty available runs - * are not coalesced, which causes virtual memory fragmentation. The - * ratio of (nruns_avail-nruns_adjac):nruns_adjac is used for tracking - * this fragmentation. - * */ + * Number of available run adjacencies that purging could coalesce. + * Clean and dirty available runs are not coalesced, which causes + * virtual memory fragmentation. The ratio of + * (nruns_avail-nruns_adjac):nruns_adjac is used for tracking this + * fragmentation. + */ size_t nruns_adjac; /* diff --git a/src/arena.c b/src/arena.c index d28b629a..145de863 100644 --- a/src/arena.c +++ b/src/arena.c @@ -46,8 +46,12 @@ static void arena_avail_insert(arena_t *arena, arena_chunk_t *chunk, static void arena_avail_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind, size_t npages, bool maybe_adjac_pred, bool maybe_adjac_succ); +static void arena_run_split_helper(arena_t *arena, arena_run_t *run, + size_t size, bool large, size_t binind, bool remove, bool zero); static void arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, size_t binind, bool zero); +static void arena_run_init(arena_t *arena, arena_run_t *run, size_t size, + bool large, size_t binind, bool zero); static arena_chunk_t *arena_chunk_alloc(arena_t *arena); static void arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk); static arena_run_t *arena_run_alloc_helper(arena_t *arena, size_t size, @@ -388,62 +392,70 @@ arena_run_page_validate_zeroed(arena_chunk_t *chunk, size_t run_ind) } static void -arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, - size_t binind, bool zero) +arena_run_split_helper(arena_t *arena, arena_run_t *run, size_t size, + bool large, size_t binind, bool remove, bool zero) { arena_chunk_t *chunk; - size_t run_ind, total_pages, need_pages, rem_pages, i; + size_t run_ind, need_pages, i; size_t flag_dirty; + assert(large || remove); assert((large && binind == BININD_INVALID) || (large == false && binind != BININD_INVALID)); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE); flag_dirty = arena_mapbits_dirty_get(chunk, run_ind); - total_pages = arena_mapbits_unallocated_size_get(chunk, run_ind) >> - LG_PAGE; - assert(arena_mapbits_dirty_get(chunk, run_ind+total_pages-1) == - flag_dirty); need_pages = (size >> LG_PAGE); assert(need_pages > 0); - assert(need_pages <= total_pages); - rem_pages = total_pages - need_pages; - arena_avail_remove(arena, chunk, run_ind, total_pages, true, true); - if (config_stats) { - /* - * Update stats_cactive if nactive is crossing a chunk - * multiple. - */ - size_t cactive_diff = CHUNK_CEILING((arena->nactive + - need_pages) << LG_PAGE) - CHUNK_CEILING(arena->nactive << - LG_PAGE); - if (cactive_diff != 0) - stats_cactive_add(cactive_diff); - } - arena->nactive += need_pages; + if (remove) { + size_t total_pages, rem_pages; - /* Keep track of trailing unused pages for later use. */ - if (rem_pages > 0) { - if (flag_dirty != 0) { - arena_mapbits_unallocated_set(chunk, run_ind+need_pages, - (rem_pages << LG_PAGE), CHUNK_MAP_DIRTY); - arena_mapbits_unallocated_set(chunk, - run_ind+total_pages-1, (rem_pages << LG_PAGE), - CHUNK_MAP_DIRTY); - } else { - arena_mapbits_unallocated_set(chunk, run_ind+need_pages, - (rem_pages << LG_PAGE), - arena_mapbits_unzeroed_get(chunk, - run_ind+need_pages)); - arena_mapbits_unallocated_set(chunk, - run_ind+total_pages-1, (rem_pages << LG_PAGE), - arena_mapbits_unzeroed_get(chunk, - run_ind+total_pages-1)); + total_pages = arena_mapbits_unallocated_size_get(chunk, + run_ind) >> LG_PAGE; + assert(arena_mapbits_dirty_get(chunk, run_ind+total_pages-1) == + flag_dirty); + assert(need_pages <= total_pages); + rem_pages = total_pages - need_pages; + + arena_avail_remove(arena, chunk, run_ind, total_pages, true, + true); + if (config_stats) { + /* + * Update stats_cactive if nactive is crossing a chunk + * multiple. + */ + size_t cactive_diff = CHUNK_CEILING((arena->nactive + + need_pages) << LG_PAGE) - + CHUNK_CEILING(arena->nactive << LG_PAGE); + if (cactive_diff != 0) + stats_cactive_add(cactive_diff); + } + arena->nactive += need_pages; + + /* Keep track of trailing unused pages for later use. */ + if (rem_pages > 0) { + if (flag_dirty != 0) { + arena_mapbits_unallocated_set(chunk, + run_ind+need_pages, (rem_pages << LG_PAGE), + flag_dirty); + arena_mapbits_unallocated_set(chunk, + run_ind+total_pages-1, (rem_pages << + LG_PAGE), flag_dirty); + } else { + arena_mapbits_unallocated_set(chunk, + run_ind+need_pages, (rem_pages << LG_PAGE), + arena_mapbits_unzeroed_get(chunk, + run_ind+need_pages)); + arena_mapbits_unallocated_set(chunk, + run_ind+total_pages-1, (rem_pages << + LG_PAGE), arena_mapbits_unzeroed_get(chunk, + run_ind+total_pages-1)); + } + arena_avail_insert(arena, chunk, run_ind+need_pages, + rem_pages, false, true); } - arena_avail_insert(arena, chunk, run_ind+need_pages, rem_pages, - false, true); } /* @@ -526,6 +538,22 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, } } +static void +arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, + size_t binind, bool zero) +{ + + arena_run_split_helper(arena, run, size, large, binind, true, zero); +} + +static void +arena_run_init(arena_t *arena, arena_run_t *run, size_t size, bool large, + size_t binind, bool zero) +{ + + arena_run_split_helper(arena, run, size, large, binind, false, zero); +} + static arena_chunk_t * arena_chunk_alloc(arena_t *arena) { @@ -1546,7 +1574,7 @@ arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero) alloc_size = size + alignment - PAGE; malloc_mutex_lock(&arena->lock); - run = arena_run_alloc(arena, alloc_size, true, BININD_INVALID, zero); + run = arena_run_alloc(arena, alloc_size, true, BININD_INVALID, false); if (run == NULL) { malloc_mutex_unlock(&arena->lock); return (NULL); @@ -1566,6 +1594,8 @@ arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero) arena_run_trim_tail(arena, chunk, ret, size + trailsize, size, false); } + arena_run_init(arena, (arena_run_t *)ret, size, true, BININD_INVALID, + zero); if (config_stats) { arena->stats.nmalloc_large++; From aabaf851b2be141e2932f736042577ba6586c8d6 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 30 Oct 2013 14:52:09 -0700 Subject: [PATCH 0322/3378] Add ids for all mallctl entries. Add ids for all mallctl entries, so that external documents can link to arbitrary mallctl entries. --- doc/jemalloc.xml.in | 138 ++++++++++++++++++++++---------------------- 1 file changed, 69 insertions(+), 69 deletions(-) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index abd5e6fc..8837d343 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -605,7 +605,7 @@ for (i = 0; i < nbins; i++) { which controls refreshing of cached dynamic statistics. - + version (const char *) @@ -626,7 +626,7 @@ for (i = 0; i < nbins; i++) { detecting whether another thread caused a refresh. - + config.debug (bool) @@ -636,7 +636,7 @@ for (i = 0; i < nbins; i++) { build configuration. - + config.dss (bool) @@ -646,7 +646,7 @@ for (i = 0; i < nbins; i++) { build configuration. - + config.fill (bool) @@ -656,7 +656,7 @@ for (i = 0; i < nbins; i++) { build configuration. - + config.lazy_lock (bool) @@ -666,7 +666,7 @@ for (i = 0; i < nbins; i++) { during build configuration. - + config.mremap (bool) @@ -676,7 +676,7 @@ for (i = 0; i < nbins; i++) { build configuration. - + config.munmap (bool) @@ -686,7 +686,7 @@ for (i = 0; i < nbins; i++) { build configuration. - + config.prof (bool) @@ -696,7 +696,7 @@ for (i = 0; i < nbins; i++) { build configuration. - + config.prof_libgcc (bool) @@ -706,7 +706,7 @@ for (i = 0; i < nbins; i++) { specified during build configuration. - + config.prof_libunwind (bool) @@ -716,7 +716,7 @@ for (i = 0; i < nbins; i++) { during build configuration. - + config.stats (bool) @@ -726,7 +726,7 @@ for (i = 0; i < nbins; i++) { build configuration. - + config.tcache (bool) @@ -736,7 +736,7 @@ for (i = 0; i < nbins; i++) { during build configuration. - + config.tls (bool) @@ -746,7 +746,7 @@ for (i = 0; i < nbins; i++) { build configuration. - + config.utrace (bool) @@ -756,7 +756,7 @@ for (i = 0; i < nbins; i++) { build configuration. - + config.valgrind (bool) @@ -766,7 +766,7 @@ for (i = 0; i < nbins; i++) { build configuration. - + config.xmalloc (bool) @@ -1175,7 +1175,7 @@ malloc_conf = "xmalloc:true";]]> by default. - + thread.arena (unsigned) @@ -1202,7 +1202,7 @@ malloc_conf = "xmalloc:true";]]> cases. - + thread.allocatedp (uint64_t *) @@ -1229,7 +1229,7 @@ malloc_conf = "xmalloc:true";]]> cases. - + thread.deallocatedp (uint64_t *) @@ -1243,7 +1243,7 @@ malloc_conf = "xmalloc:true";]]> mallctl* calls. - + thread.tcache.enabled (bool) @@ -1257,7 +1257,7 @@ malloc_conf = "xmalloc:true";]]> - + thread.tcache.flush (void) @@ -1323,7 +1323,7 @@ malloc_conf = "xmalloc:true";]]> initialized. - + arenas.quantum (size_t) @@ -1332,7 +1332,7 @@ malloc_conf = "xmalloc:true";]]> Quantum size. - + arenas.page (size_t) @@ -1341,7 +1341,7 @@ malloc_conf = "xmalloc:true";]]> Page size. - + arenas.tcache_max (size_t) @@ -1351,7 +1351,7 @@ malloc_conf = "xmalloc:true";]]> Maximum thread-cached size class. - + arenas.nbins (unsigned) @@ -1360,7 +1360,7 @@ malloc_conf = "xmalloc:true";]]> Number of bin size classes. - + arenas.nhbins (unsigned) @@ -1380,7 +1380,7 @@ malloc_conf = "xmalloc:true";]]> Maximum size supported by size class. - + arenas.bin.<i>.nregs (uint32_t) @@ -1389,7 +1389,7 @@ malloc_conf = "xmalloc:true";]]> Number of regions per page run. - + arenas.bin.<i>.run_size (size_t) @@ -1398,7 +1398,7 @@ malloc_conf = "xmalloc:true";]]> Number of bytes per page run. - + arenas.nlruns (size_t) @@ -1407,7 +1407,7 @@ malloc_conf = "xmalloc:true";]]> Total number of large size classes. - + arenas.lrun.<i>.size (size_t) @@ -1417,7 +1417,7 @@ malloc_conf = "xmalloc:true";]]> class. - + arenas.purge (unsigned) @@ -1427,7 +1427,7 @@ malloc_conf = "xmalloc:true";]]> for all arenas if none is specified. - + arenas.extend (unsigned) @@ -1451,7 +1451,7 @@ malloc_conf = "xmalloc:true";]]> - + prof.dump (const char *) @@ -1467,7 +1467,7 @@ malloc_conf = "xmalloc:true";]]> option. - + prof.interval (uint64_t) @@ -1527,7 +1527,7 @@ malloc_conf = "xmalloc:true";]]> entirely devoted to allocator metadata. - + stats.mapped (size_t) @@ -1541,7 +1541,7 @@ malloc_conf = "xmalloc:true";]]> does not include inactive chunks. - + stats.chunks.current (size_t) @@ -1553,7 +1553,7 @@ malloc_conf = "xmalloc:true";]]> - + stats.chunks.total (uint64_t) @@ -1563,7 +1563,7 @@ malloc_conf = "xmalloc:true";]]> Cumulative number of chunks allocated. - + stats.chunks.high (size_t) @@ -1574,7 +1574,7 @@ malloc_conf = "xmalloc:true";]]> - + stats.huge.allocated (size_t) @@ -1585,7 +1585,7 @@ malloc_conf = "xmalloc:true";]]> - + stats.huge.nmalloc (uint64_t) @@ -1596,7 +1596,7 @@ malloc_conf = "xmalloc:true";]]> - + stats.huge.ndalloc (uint64_t) @@ -1607,7 +1607,7 @@ malloc_conf = "xmalloc:true";]]> - + stats.arenas.<i>.dss (const char *) @@ -1621,7 +1621,7 @@ malloc_conf = "xmalloc:true";]]> - + stats.arenas.<i>.nthreads (unsigned) @@ -1631,7 +1631,7 @@ malloc_conf = "xmalloc:true";]]> arena. - + stats.arenas.<i>.pactive (size_t) @@ -1652,7 +1652,7 @@ malloc_conf = "xmalloc:true";]]> similar has not been called. - + stats.arenas.<i>.mapped (size_t) @@ -1662,7 +1662,7 @@ malloc_conf = "xmalloc:true";]]> Number of mapped bytes. - + stats.arenas.<i>.npurge (uint64_t) @@ -1673,7 +1673,7 @@ malloc_conf = "xmalloc:true";]]> - + stats.arenas.<i>.nmadvise (uint64_t) @@ -1685,7 +1685,7 @@ malloc_conf = "xmalloc:true";]]> similar calls made to purge dirty pages. - + stats.arenas.<i>.npurged (uint64_t) @@ -1695,7 +1695,7 @@ malloc_conf = "xmalloc:true";]]> Number of pages purged. - + stats.arenas.<i>.small.allocated (size_t) @@ -1706,7 +1706,7 @@ malloc_conf = "xmalloc:true";]]> - + stats.arenas.<i>.small.nmalloc (uint64_t) @@ -1717,7 +1717,7 @@ malloc_conf = "xmalloc:true";]]> small bins. - + stats.arenas.<i>.small.ndalloc (uint64_t) @@ -1728,7 +1728,7 @@ malloc_conf = "xmalloc:true";]]> - + stats.arenas.<i>.small.nrequests (uint64_t) @@ -1739,7 +1739,7 @@ malloc_conf = "xmalloc:true";]]> - + stats.arenas.<i>.large.allocated (size_t) @@ -1750,7 +1750,7 @@ malloc_conf = "xmalloc:true";]]> - + stats.arenas.<i>.large.nmalloc (uint64_t) @@ -1761,7 +1761,7 @@ malloc_conf = "xmalloc:true";]]> directly by the arena. - + stats.arenas.<i>.large.ndalloc (uint64_t) @@ -1772,7 +1772,7 @@ malloc_conf = "xmalloc:true";]]> directly by the arena. - + stats.arenas.<i>.large.nrequests (uint64_t) @@ -1783,7 +1783,7 @@ malloc_conf = "xmalloc:true";]]> - + stats.arenas.<i>.bins.<j>.allocated (size_t) @@ -1794,7 +1794,7 @@ malloc_conf = "xmalloc:true";]]> bin. - + stats.arenas.<i>.bins.<j>.nmalloc (uint64_t) @@ -1805,7 +1805,7 @@ malloc_conf = "xmalloc:true";]]> - + stats.arenas.<i>.bins.<j>.ndalloc (uint64_t) @@ -1816,7 +1816,7 @@ malloc_conf = "xmalloc:true";]]> - + stats.arenas.<i>.bins.<j>.nrequests (uint64_t) @@ -1827,7 +1827,7 @@ malloc_conf = "xmalloc:true";]]> requests. - + stats.arenas.<i>.bins.<j>.nfills (uint64_t) @@ -1837,7 +1837,7 @@ malloc_conf = "xmalloc:true";]]> Cumulative number of tcache fills. - + stats.arenas.<i>.bins.<j>.nflushes (uint64_t) @@ -1847,7 +1847,7 @@ malloc_conf = "xmalloc:true";]]> Cumulative number of tcache flushes. - + stats.arenas.<i>.bins.<j>.nruns (uint64_t) @@ -1857,7 +1857,7 @@ malloc_conf = "xmalloc:true";]]> Cumulative number of runs created. - + stats.arenas.<i>.bins.<j>.nreruns (uint64_t) @@ -1868,7 +1868,7 @@ malloc_conf = "xmalloc:true";]]> to allocate changed. - + stats.arenas.<i>.bins.<j>.curruns (size_t) @@ -1878,7 +1878,7 @@ malloc_conf = "xmalloc:true";]]> Current number of runs. - + stats.arenas.<i>.lruns.<j>.nmalloc (uint64_t) @@ -1889,7 +1889,7 @@ malloc_conf = "xmalloc:true";]]> class served directly by the arena. - + stats.arenas.<i>.lruns.<j>.ndalloc (uint64_t) @@ -1900,7 +1900,7 @@ malloc_conf = "xmalloc:true";]]> size class served directly by the arena. - + stats.arenas.<i>.lruns.<j>.nrequests (uint64_t) @@ -1911,7 +1911,7 @@ malloc_conf = "xmalloc:true";]]> class. - + stats.arenas.<i>.lruns.<j>.curruns (size_t) From d6df91438a1cf25ea248c3897da463c51709c580 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 19 Nov 2013 18:01:45 -0800 Subject: [PATCH 0323/3378] Fix a potential infinite loop during thread exit. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix malloc_tsd_dalloc() to bypass tcache when dallocating, so that there is no danger of causing tcache reincarnation during thread exit. Whether this infinite loop occurs depends on the pthreads TSD implementation; it is known to occur on Solaris. Submitted by Markus Eberspächer. --- src/tsd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tsd.c b/src/tsd.c index 59e8da32..8431751f 100644 --- a/src/tsd.c +++ b/src/tsd.c @@ -21,7 +21,7 @@ void malloc_tsd_dalloc(void *wrapper) { - idalloc(wrapper); + idallocx(wrapper, false); } void From 39e7fd0580a140912fa1170de7a7699c86afe45d Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 25 Nov 2013 18:02:35 -0800 Subject: [PATCH 0324/3378] Fix ALLOCM_ARENA(a) handling in rallocm(). Fix rallocm() to use the specified arena for allocation, not just deallocation. Clarify ALLOCM_ARENA(a) documentation. --- doc/jemalloc.xml.in | 10 ++++++---- src/jemalloc.c | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 8837d343..596f6458 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -358,7 +358,7 @@ for (i = 0; i < nbins; i++) { Initialize newly allocated memory to contain zero bytes. In the growing reallocation case, the real size prior to reallocation defines the boundary between untouched bytes and those - that are initialized to contain zero bytes. If this option is + that are initialized to contain zero bytes. If this macro is absent, newly allocated memory is uninitialized. @@ -373,9 +373,11 @@ for (i = 0; i < nbins; i++) { Use the arena specified by the index - a. This macro does not validate that - a specifies an arena in the valid - range. + a (and by necessity bypass the thread + cache). This macro has no effect for huge regions, nor for regions + that were allocated via an arena other than the one specified. + This macro does not validate that a + specifies an arena index in the valid range. diff --git a/src/jemalloc.c b/src/jemalloc.c index 972f4546..491ec324 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1530,7 +1530,7 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) if (arena_ind != UINT_MAX) { arena_chunk_t *chunk; - try_tcache_alloc = true; + try_tcache_alloc = false; chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(*ptr); try_tcache_dalloc = (chunk == *ptr || chunk->arena != arenas[arena_ind]); From addad093f887cecddd462b7130125a0e08060e1f Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 29 Nov 2013 16:19:44 -0800 Subject: [PATCH 0325/3378] Clean up malloc_ncpus(). Clean up malloc_ncpus() by replacing incorrectly indented if..else branches with a ?: expression. Submitted by Igor Podlesny. --- src/jemalloc.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/jemalloc.c b/src/jemalloc.c index 491ec324..d1521eaf 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -262,14 +262,7 @@ malloc_ncpus(void) #else result = sysconf(_SC_NPROCESSORS_ONLN); #endif - if (result == -1) { - /* Error. */ - ret = 1; - } else { - ret = (unsigned)result; - } - - return (ret); + return ((result == -1) ? 1 : (unsigned)result); } void From 52b30691f9a98fe7c8c59d587eb6285a3bacaabc Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 2 Dec 2013 15:16:39 -0800 Subject: [PATCH 0326/3378] Remove unused variable. --- src/jemalloc.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/jemalloc.c b/src/jemalloc.c index d1521eaf..57a5e359 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -252,7 +252,6 @@ stats_print_atexit(void) static unsigned malloc_ncpus(void) { - unsigned ret; long result; #ifdef _WIN32 From 66688535969c6dcb234448e590f27df38b4eebdf Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 3 Dec 2013 21:49:36 -0800 Subject: [PATCH 0327/3378] Avoid deprecated sbrk(2) on OS X. Avoid referencing sbrk(2) on OS X, because it is deprecated as of OS X 10.9 (Mavericks), and the compiler warns against using it. --- configure.ac | 8 +++++++- src/chunk_dss.c | 15 ++++++++------- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/configure.ac b/configure.ac index 73d3f94f..8b134a16 100644 --- a/configure.ac +++ b/configure.ac @@ -256,6 +256,7 @@ case "${host}" in force_tls="0" DSO_LDFLAGS='-shared -Wl,-dylib_install_name,$(@F)' SOREV="${rev}.${so}" + sbrk_deprecated="1" ;; *-*-freebsd*) CFLAGS="$CFLAGS" @@ -825,7 +826,12 @@ fi dnl Check whether the BSD/SUSv1 sbrk() exists. If not, disable DSS support. AC_CHECK_FUNC([sbrk], [have_sbrk="1"], [have_sbrk="0"]) if test "x$have_sbrk" = "x1" ; then - AC_DEFINE([JEMALLOC_HAVE_SBRK], [ ]) + if test "x$sbrk_deprecated" == "x1" ; then + AC_MSG_RESULT([Disabling dss allocation because sbrk is deprecated]) + enable_dss="0" + else + AC_DEFINE([JEMALLOC_HAVE_SBRK], [ ]) + fi else enable_dss="0" fi diff --git a/src/chunk_dss.c b/src/chunk_dss.c index 24781cc5..510bb8be 100644 --- a/src/chunk_dss.c +++ b/src/chunk_dss.c @@ -28,16 +28,17 @@ static void *dss_max; /******************************************************************************/ -#ifndef JEMALLOC_HAVE_SBRK static void * -sbrk(intptr_t increment) +chunk_dss_sbrk(intptr_t increment) { +#ifdef JEMALLOC_HAVE_SBRK + return (sbrk(increment)); +#else not_implemented(); - return (NULL); -} #endif +} dss_prec_t chunk_dss_prec_get(void) @@ -93,7 +94,7 @@ chunk_alloc_dss(size_t size, size_t alignment, bool *zero) */ do { /* Get the current end of the DSS. */ - dss_max = sbrk(0); + dss_max = chunk_dss_sbrk(0); /* * Calculate how much padding is necessary to * chunk-align the end of the DSS. @@ -117,7 +118,7 @@ chunk_alloc_dss(size_t size, size_t alignment, bool *zero) return (NULL); } incr = gap_size + cpad_size + size; - dss_prev = sbrk(incr); + dss_prev = chunk_dss_sbrk(incr); if (dss_prev == dss_max) { /* Success. */ dss_max = dss_next; @@ -163,7 +164,7 @@ chunk_dss_boot(void) if (malloc_mutex_init(&dss_mtx)) return (true); - dss_base = sbrk(0); + dss_base = chunk_dss_sbrk(0); dss_prev = dss_base; dss_max = dss_base; From 86abd0dcd8e478759fe409d338d11558c4cec427 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 30 Nov 2013 15:25:42 -0800 Subject: [PATCH 0328/3378] Refactor to support more varied testing. Refactor the test harness to support three types of tests: - unit: White box unit tests. These tests have full access to all internal jemalloc library symbols. Though in actuality all symbols are prefixed by jet_, macro-based name mangling abstracts this away from test code. - integration: Black box integration tests. These tests link with the installable shared jemalloc library, and with the exception of some utility code and configure-generated macro definitions, they have no access to jemalloc internals. - stress: Black box stress tests. These tests link with the installable shared jemalloc library, as well as with an internal allocator with symbols prefixed by jet_ (same as for unit tests) that can be used to allocate data structures that are internal to the test code. Move existing tests into test/{unit,integration}/ as appropriate. Split out internal parts of jemalloc_defs.h.in and put them in jemalloc_internal_defs.h.in. This reduces internals exposure to applications that #include . Refactor jemalloc.h header generation so that a single header file results, and the prototypes can be used to generate jet_ prototypes for tests. Split jemalloc.h.in into multiple parts (jemalloc_defs.h.in, jemalloc_macros.h.in, jemalloc_protos.h.in, jemalloc_mangle.h.in) and use a shell script to generate a unified jemalloc.h at configure time. Change the default private namespace prefix from "" to "je_". Add missing private namespace mangling. Remove hard-coded private_namespace.h. Instead generate it and private_unnamespace.h from private_symbols.txt. Use similar logic for public symbols, which aids in name mangling for jet_ symbols. Add test_warn() and test_fail(). Replace existing exit(1) calls with test_fail() calls. --- .gitignore | 53 ++- ChangeLog | 8 + INSTALL | 4 +- Makefile.in | 187 ++++---- configure.ac | 88 +++- .../jemalloc/internal/jemalloc_internal.h.in | 17 +- .../internal/jemalloc_internal_defs.h.in | 202 +++++++++ include/jemalloc/internal/private_namespace.h | 392 ----------------- .../jemalloc/internal/private_namespace.sh | 5 + include/jemalloc/internal/private_symbols.txt | 402 ++++++++++++++++++ .../jemalloc/internal/private_unnamespace.sh | 5 + include/jemalloc/internal/public_namespace.sh | 5 + include/jemalloc/internal/public_symbols.txt | 21 + .../jemalloc/internal/public_unnamespace.sh | 5 + include/jemalloc/internal/util.h | 5 +- include/jemalloc/jemalloc.h.in | 157 ------- include/jemalloc/jemalloc.sh | 24 ++ include/jemalloc/jemalloc_defs.h.in | 246 +---------- include/jemalloc/jemalloc_macros.h.in | 28 ++ include/jemalloc/jemalloc_mangle.h.in | 66 +++ include/jemalloc/jemalloc_protos.h.in | 50 +++ test/include/test/jemalloc_test.h.in | 111 +++++ test/include/test/test.h | 2 + test/include/test/thread.h | 12 + test/{ => integration}/ALLOCM_ARENA.c | 3 +- test/{ => integration}/ALLOCM_ARENA.exp | 0 test/{ => integration}/aligned_alloc.c | 6 +- test/{ => integration}/aligned_alloc.exp | 0 test/{ => integration}/allocated.c | 21 +- test/{ => integration}/allocated.exp | 0 test/{ => integration}/allocm.c | 9 +- test/{ => integration}/allocm.exp | 0 .../jemalloc_integration.h.in} | 15 +- test/{ => integration}/mremap.c | 3 +- test/{ => integration}/mremap.exp | 0 test/{ => integration}/posix_memalign.c | 6 +- test/{ => integration}/posix_memalign.exp | 0 test/{ => integration}/rallocm.c | 5 +- test/{ => integration}/rallocm.exp | 0 test/{ => integration}/thread_arena.c | 3 +- test/{ => integration}/thread_arena.exp | 0 .../{ => integration}/thread_tcache_enabled.c | 3 +- .../thread_tcache_enabled.exp | 0 test/src/test.c | 28 ++ test/src/thread.c | 35 ++ test/test.sh.in | 37 ++ test/{ => unit}/bitmap.c | 4 +- test/{ => unit}/bitmap.exp | 0 48 files changed, 1335 insertions(+), 938 deletions(-) create mode 100644 include/jemalloc/internal/jemalloc_internal_defs.h.in delete mode 100644 include/jemalloc/internal/private_namespace.h create mode 100755 include/jemalloc/internal/private_namespace.sh create mode 100644 include/jemalloc/internal/private_symbols.txt create mode 100755 include/jemalloc/internal/private_unnamespace.sh create mode 100755 include/jemalloc/internal/public_namespace.sh create mode 100644 include/jemalloc/internal/public_symbols.txt create mode 100755 include/jemalloc/internal/public_unnamespace.sh delete mode 100644 include/jemalloc/jemalloc.h.in create mode 100755 include/jemalloc/jemalloc.sh create mode 100644 include/jemalloc/jemalloc_macros.h.in create mode 100644 include/jemalloc/jemalloc_mangle.h.in create mode 100644 include/jemalloc/jemalloc_protos.h.in create mode 100644 test/include/test/jemalloc_test.h.in create mode 100644 test/include/test/test.h create mode 100644 test/include/test/thread.h rename test/{ => integration}/ALLOCM_ARENA.c (96%) rename test/{ => integration}/ALLOCM_ARENA.exp (100%) rename test/{ => integration}/aligned_alloc.c (96%) rename test/{ => integration}/aligned_alloc.exp (100%) rename test/{ => integration}/allocated.c (81%) rename test/{ => integration}/allocated.exp (100%) rename test/{ => integration}/allocm.c (97%) rename test/{ => integration}/allocm.exp (100%) rename test/{jemalloc_test.h.in => integration/jemalloc_integration.h.in} (78%) rename test/{ => integration}/mremap.c (95%) rename test/{ => integration}/mremap.exp (100%) rename test/{ => integration}/posix_memalign.c (96%) rename test/{ => integration}/posix_memalign.exp (100%) rename test/{ => integration}/rallocm.c (98%) rename test/{ => integration}/rallocm.exp (100%) rename test/{ => integration}/thread_arena.c (96%) rename test/{ => integration}/thread_arena.exp (100%) rename test/{ => integration}/thread_tcache_enabled.c (97%) rename test/{ => integration}/thread_tcache_enabled.exp (100%) create mode 100644 test/src/test.c create mode 100644 test/src/thread.c create mode 100644 test/test.sh.in rename test/{ => unit}/bitmap.c (98%) rename test/{ => unit}/bitmap.exp (100%) diff --git a/.gitignore b/.gitignore index 6607a5fd..0a9ca185 100644 --- a/.gitignore +++ b/.gitignore @@ -1,25 +1,56 @@ /autom4te.cache/ + +/bin/jemalloc.sh + /config.stamp /config.log /config.status /configure + /doc/html.xsl /doc/manpages.xsl /doc/jemalloc.xml /doc/jemalloc.html /doc/jemalloc.3 + /lib/ + /Makefile -/include/jemalloc/internal/jemalloc_internal\.h -/include/jemalloc/internal/size_classes\.h -/include/jemalloc/jemalloc\.h -/include/jemalloc/jemalloc_defs\.h -/test/jemalloc_test\.h + +/include/jemalloc/internal/jemalloc_internal.h +/include/jemalloc/internal/jemalloc_internal_defs.h +/include/jemalloc/internal/private_namespace.h +/include/jemalloc/internal/private_unnamespace.h +/include/jemalloc/internal/public_namespace.h +/include/jemalloc/internal/public_unnamespace.h +/include/jemalloc/internal/size_classes.h +/include/jemalloc/jemalloc.h +/include/jemalloc/jemalloc_defs.h +/include/jemalloc/jemalloc_macros.h +/include/jemalloc/jemalloc_protos.h +/include/jemalloc/jemalloc_protos_jet.h +/include/jemalloc/jemalloc_mangle.h + /src/*.[od] -/test/*.[od] -/test/*.out -/test/[a-zA-Z_]* -!test/*.c -!test/*.exp + +/test/test.sh +test/include/test/jemalloc_test.h + +/test/integration/[A-Za-z]* +!/test/integration/*.* +/test/integration/*.[od] +/test/integration/*.out + +/test/src/*.[od] + +/test/stress/[A-Za-z]* +!/test/stress/*.* +/test/stress/*.[od] +/test/stress/*.out + +/test/unit/[A-Za-z]* +!/test/unit/*.* +/test/unit/*.[od] +/test/unit/*.out + /VERSION -/bin/jemalloc.sh diff --git a/ChangeLog b/ChangeLog index 0efc7426..90ab107a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -6,6 +6,14 @@ found in the git revision history: http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git git://canonware.com/jemalloc.git +* 3.5.0 (XXX) + + Bug fixes: + - Change the default private namespace prefix from empty to je_, and change + --with-private-namespace-prefix so that it prepends an additional prefix + rather than replacing je_. This reduces the likelihood of applications + which statically link jemalloc experiencing symbol name collisions. + * 3.4.1 (October 20, 2013) Bug fixes: diff --git a/INSTALL b/INSTALL index 6e371ce5..39ad26db 100644 --- a/INSTALL +++ b/INSTALL @@ -61,10 +61,10 @@ any of the following arguments (not a definitive list) to 'configure': allocator on OSX. --with-private-namespace= - Prefix all library-private APIs with . For shared libraries, + Prefix all library-private APIs with je_. For shared libraries, symbol visibility mechanisms prevent these symbols from being exported, but for static libraries, naming collisions are a real possibility. By - default, the prefix is "" (empty string). + default, is empty, which results in a symbol prefix of je_ . --with-install-suffix= Append to the base name of all installed files, such that multiple diff --git a/Makefile.in b/Makefile.in index 5909416e..57020ad7 100644 --- a/Makefile.in +++ b/Makefile.in @@ -73,18 +73,17 @@ LIBJEMALLOC := $(LIBPREFIX)jemalloc$(install_suffix) # Lists of files. BINS := $(srcroot)bin/pprof $(objroot)bin/jemalloc.sh -CHDRS := $(objroot)include/jemalloc/jemalloc$(install_suffix).h \ - $(objroot)include/jemalloc/jemalloc_defs$(install_suffix).h -CSRCS := $(srcroot)src/jemalloc.c $(srcroot)src/arena.c $(srcroot)src/atomic.c \ - $(srcroot)src/base.c $(srcroot)src/bitmap.c $(srcroot)src/chunk.c \ - $(srcroot)src/chunk_dss.c $(srcroot)src/chunk_mmap.c \ - $(srcroot)src/ckh.c $(srcroot)src/ctl.c $(srcroot)src/extent.c \ - $(srcroot)src/hash.c $(srcroot)src/huge.c $(srcroot)src/mb.c \ - $(srcroot)src/mutex.c $(srcroot)src/prof.c $(srcroot)src/quarantine.c \ - $(srcroot)src/rtree.c $(srcroot)src/stats.c $(srcroot)src/tcache.c \ - $(srcroot)src/util.c $(srcroot)src/tsd.c +C_HDRS := $(objroot)include/jemalloc/jemalloc$(install_suffix).h +C_SRCS := $(srcroot)src/jemalloc.c $(srcroot)src/arena.c \ + $(srcroot)src/atomic.c $(srcroot)src/base.c $(srcroot)src/bitmap.c \ + $(srcroot)src/chunk.c $(srcroot)src/chunk_dss.c \ + $(srcroot)src/chunk_mmap.c $(srcroot)src/ckh.c $(srcroot)src/ctl.c \ + $(srcroot)src/extent.c $(srcroot)src/hash.c $(srcroot)src/huge.c \ + $(srcroot)src/mb.c $(srcroot)src/mutex.c $(srcroot)src/prof.c \ + $(srcroot)src/quarantine.c $(srcroot)src/rtree.c $(srcroot)src/stats.c \ + $(srcroot)src/tcache.c $(srcroot)src/util.c $(srcroot)src/tsd.c ifeq ($(enable_zone_allocator), 1) -CSRCS += $(srcroot)src/zone.c +C_SRCS += $(srcroot)src/zone.c endif ifeq ($(IMPORTLIB),$(SO)) STATIC_LIBS := $(objroot)lib/$(LIBJEMALLOC).$(A) @@ -103,24 +102,43 @@ DOCS_XML := $(objroot)doc/jemalloc$(install_suffix).xml DOCS_HTML := $(DOCS_XML:$(objroot)%.xml=$(srcroot)%.html) DOCS_MAN3 := $(DOCS_XML:$(objroot)%.xml=$(srcroot)%.3) DOCS := $(DOCS_HTML) $(DOCS_MAN3) -CTESTS := $(srcroot)test/aligned_alloc.c $(srcroot)test/allocated.c \ - $(srcroot)test/ALLOCM_ARENA.c $(srcroot)test/bitmap.c \ - $(srcroot)test/mremap.c $(srcroot)test/posix_memalign.c \ - $(srcroot)test/thread_arena.c $(srcroot)test/thread_tcache_enabled.c +C_TESTLIB_SRCS := $(srcroot)test/src/test.c $(srcroot)test/src/thread.c +C_UTIL_INTEGRATION_SRCS := $(srcroot)src/util.c +TESTS_UNIT := $(srcroot)test/unit/bitmap.c +TESTS_INTEGRATION := $(srcroot)test/integration/aligned_alloc.c \ + $(srcroot)test/integration/allocated.c \ + $(srcroot)test/integration/ALLOCM_ARENA.c \ + $(srcroot)test/integration/mremap.c \ + $(srcroot)test/integration/posix_memalign.c \ + $(srcroot)test/integration/thread_arena.c \ + $(srcroot)test/integration/thread_tcache_enabled.c ifeq ($(enable_experimental), 1) -CTESTS += $(srcroot)test/allocm.c $(srcroot)test/rallocm.c +TESTS_INTEGRATION += $(srcroot)test/integration/allocm.c \ + $(srcroot)test/integration/rallocm.c endif +TESTS_STRESS := +TESTS := $(TESTS_UNIT) $(TESTS_INTEGRATION) $(TESTS_STRESS) -COBJS := $(CSRCS:$(srcroot)%.c=$(objroot)%.$(O)) -CPICOBJS := $(CSRCS:$(srcroot)%.c=$(objroot)%.pic.$(O)) -CTESTOBJS := $(CTESTS:$(srcroot)%.c=$(objroot)%.$(O)) +C_OBJS := $(C_SRCS:$(srcroot)%.c=$(objroot)%.$(O)) +C_PIC_OBJS := $(C_SRCS:$(srcroot)%.c=$(objroot)%.pic.$(O)) +C_JET_OBJS := $(C_SRCS:$(srcroot)%.c=$(objroot)%.jet.$(O)) +C_TESTLIB_UNIT_OBJS := $(C_TESTLIB_SRCS:$(srcroot)%.c=$(objroot)%.unit.$(O)) +C_TESTLIB_INTEGRATION_OBJS := $(C_TESTLIB_SRCS:$(srcroot)%.c=$(objroot)%.integration.$(O)) +C_UTIL_INTEGRATION_OBJS := $(C_UTIL_INTEGRATION_SRCS:$(srcroot)%.c=$(objroot)%.integration.$(O)) +C_TESTLIB_STRESS_OBJS := $(C_TESTLIB_SRCS:$(srcroot)%.c=$(objroot)%.stress.$(O)) +C_TESTLIB_OBJS := $(C_TESTLIB_UNIT_OBJS) $(C_TESTLIB_INTEGRATION_OBJS) $(C_UTIL_INTEGRATION_OBJS) $(C_TESTLIB_STRESS_OBJS) + +TESTS_UNIT_OBJS := $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%.$(O)) +TESTS_INTEGRATION_OBJS := $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%.$(O)) +TESTS_STRESS_OBJS := $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%.$(O)) +TESTS_OBJS := $(TESTS_UNIT_OBJS) $(TESTS_INTEGRATION_OBJS) $(TESTS_STRESS_OBJS) .PHONY: all dist build_doc_html build_doc_man build_doc .PHONY: install_bin install_include install_lib .PHONY: install_doc_html install_doc_man install_doc install .PHONY: tests check clean distclean relclean -.SECONDARY : $(CTESTOBJS) +.SECONDARY : $(TESTS_OBJS) # Default target. all: build @@ -141,30 +159,44 @@ build_doc: $(DOCS) # Include generated dependency files. # ifdef CC_MM --include $(COBJS:%.$(O)=%.d) --include $(CPICOBJS:%.$(O)=%.d) --include $(CTESTOBJS:%.$(O)=%.d) +-include $(C_OBJS:%.$(O)=%.d) +-include $(C_PIC_OBJS:%.$(O)=%.d) +-include $(C_JET_OBJS:%.$(O)=%.d) +-include $(TESTS_OBJS:%.$(O)=%.d) endif -$(COBJS): $(objroot)src/%.$(O): $(srcroot)src/%.c -$(CPICOBJS): $(objroot)src/%.pic.$(O): $(srcroot)src/%.c -$(CPICOBJS): CFLAGS += $(PIC_CFLAGS) -$(CTESTOBJS): $(objroot)test/%.$(O): $(srcroot)test/%.c -$(CTESTOBJS): CPPFLAGS += -I$(objroot)test +$(C_OBJS): $(objroot)src/%.$(O): $(srcroot)src/%.c +$(C_PIC_OBJS): $(objroot)src/%.pic.$(O): $(srcroot)src/%.c +$(C_PIC_OBJS): CFLAGS += $(PIC_CFLAGS) +$(C_JET_OBJS): $(objroot)src/%.jet.$(O): $(srcroot)src/%.c +$(C_JET_OBJS): CFLAGS += -DJEMALLOC_JET +$(C_TESTLIB_UNIT_OBJS): $(objroot)test/src/%.unit.$(O): $(srcroot)test/src/%.c +$(C_TESTLIB_UNIT_OBJS): CPPFLAGS += -DJEMALLOC_UNIT_TEST +$(C_TESTLIB_INTEGRATION_OBJS): $(objroot)test/src/%.integration.$(O): $(srcroot)test/src/%.c +$(C_TESTLIB_INTEGRATION_OBJS): CPPFLAGS += -DJEMALLOC_INTEGRATION_TEST +$(C_UTIL_INTEGRATION_OBJS): $(objroot)src/%.integration.$(O): $(srcroot)src/%.c +$(C_TESTLIB_STRESS_OBJS): $(objroot)test/src/%.stress.$(O): $(srcroot)test/src/%.c +$(C_TESTLIB_STRESS_OBJS): CPPFLAGS += -DJEMALLOC_STRESS_TEST +$(C_TESTLIB_OBJS): CPPFLAGS += -I$(objroot)test/include +$(TESTS_UNIT_OBJS): CPPFLAGS += -DJEMALLOC_UNIT_TEST +$(TESTS_INTEGRATION_OBJS): CPPFLAGS += -DJEMALLOC_INTEGRATION_TEST +$(TESTS_STRESS_OBJS): CPPFLAGS += -DJEMALLOC_STRESS_TEST +$(TESTS_OBJS): $(objroot)test/%.$(O): $(srcroot)test/%.c +$(TESTS_OBJS): CPPFLAGS += -I$(objroot)test/include ifneq ($(IMPORTLIB),$(SO)) -$(COBJS): CPPFLAGS += -DDLLEXPORT +$(C_OBJS): CPPFLAGS += -DDLLEXPORT endif ifndef CC_MM -# Dependencies +# Dependencies. HEADER_DIRS = $(srcroot)include/jemalloc/internal \ $(objroot)include/jemalloc $(objroot)include/jemalloc/internal HEADERS = $(wildcard $(foreach dir,$(HEADER_DIRS),$(dir)/*.h)) -$(COBJS) $(CPICOBJS) $(CTESTOBJS): $(HEADERS) -$(CTESTOBJS): $(objroot)test/jemalloc_test.h +$(C_OBJS) $(C_PIC_OBJS) $(C_JET_OBJS) $(C_TESTLIB_OBJS) $(TESTS_OBJS): $(HEADERS) +$(TESTS_OBJS): $(objroot)test/unit/jemalloc_test.h endif -$(COBJS) $(CPICOBJS) $(CTESTOBJS): %.$(O): +$(C_OBJS) $(C_PIC_OBJS) $(C_JET_OBJS) $(C_TESTLIB_OBJS) $(TESTS_OBJS): %.$(O): @mkdir -p $(@D) $(CC) $(CFLAGS) -c $(CPPFLAGS) $(CTARGET) $< ifdef CC_MM @@ -177,21 +209,27 @@ ifneq ($(SOREV),$(SO)) ln -sf $( $(objroot)$${t}.out 2>&1; \ - if test -e "$(srcroot)$${t}.exp"; then \ - diff -w -u $(srcroot)$${t}.exp \ - $(objroot)$${t}.out >/dev/null 2>&1; \ - fail=$$?; \ - if test "$${fail}" -eq "1" ; then \ - failures=`expr $${failures} + 1`; \ - echo "*** FAIL ***"; \ - else \ - echo "pass"; \ - fi; \ - else \ - echo "*** FAIL *** (.exp file is missing)"; \ - failures=`expr $${failures} + 1`; \ - fi; \ - done; \ - echo "========================================="; \ - echo "Failures: $${failures}/$${total}"' +check_unit_dir: + @mkdir -p $(objroot)test/unit +check_integration_dir: + @mkdir -p $(objroot)test/integration +check_stress_dir: + @mkdir -p $(objroot)test/stress +check_dir: check_unit_dir check_integration_dir check_stress_dir + +check_unit: tests_unit check_unit_dir + @$(SHELL) $(objroot)test/test.sh $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%) +check_integration: tests_integration check_integration_dir + @$(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) +check_stress: tests_stress check_stress_dir + @$(SHELL) $(objroot)test/test.sh $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%) +check: tests check_dir + @$(SHELL) $(objroot)test/test.sh $(TESTS:$(srcroot)%.c=$(objroot)%) clean: - rm -f $(COBJS) - rm -f $(CPICOBJS) - rm -f $(COBJS:%.$(O)=%.d) - rm -f $(CPICOBJS:%.$(O)=%.d) - rm -f $(CTESTOBJS:%.$(O)=%$(EXE)) - rm -f $(CTESTOBJS) - rm -f $(CTESTOBJS:%.$(O)=%.d) - rm -f $(CTESTOBJS:%.$(O)=%.out) + rm -f $(C_OBJS) + rm -f $(C_PIC_OBJS) + rm -f $(C_JET_OBJS) + rm -f $(C_TESTLIB_OBJS) + rm -f $(C_OBJS:%.$(O)=%.d) + rm -f $(C_PIC_OBJS:%.$(O)=%.d) + rm -f $(C_JET_OBJS:%.$(O)=%.d) + rm -f $(C_TESTLIB_OBJS:%.$(O)=%.d) + rm -f $(TESTS_OBJS:%.$(O)=%$(EXE)) + rm -f $(TESTS_OBJS) + rm -f $(TESTS_OBJS:%.$(O)=%.d) + rm -f $(TESTS_OBJS:%.$(O)=%.out) rm -f $(DSOS) $(STATIC_LIBS) distclean: clean rm -rf $(objroot)autom4te.cache + rm -f $(objroot)bin/jemalloc.sh rm -f $(objroot)config.log rm -f $(objroot)config.status rm -f $(objroot)config.stamp diff --git a/configure.ac b/configure.ac index 8b134a16..570a7f56 100644 --- a/configure.ac +++ b/configure.ac @@ -489,18 +489,15 @@ AC_ARG_WITH([export], fi] ) -dnl Do not mangle library-private APIs by default. +dnl Mangle library-private APIs. AC_ARG_WITH([private_namespace], [AS_HELP_STRING([--with-private-namespace=], [Prefix to prepend to all library-private APIs])], - [JEMALLOC_PRIVATE_NAMESPACE="$with_private_namespace"], - [JEMALLOC_PRIVATE_NAMESPACE=""] + [JEMALLOC_PRIVATE_NAMESPACE="${with_private_namespace}je_"], + [JEMALLOC_PRIVATE_NAMESPACE="je_"] ) -AC_DEFINE_UNQUOTED([JEMALLOC_PRIVATE_NAMESPACE], ["$JEMALLOC_PRIVATE_NAMESPACE"]) -if test "x$JEMALLOC_PRIVATE_NAMESPACE" != "x" ; then - AC_DEFINE_UNQUOTED([JEMALLOC_N(string_that_no_one_should_want_to_use_as_a_jemalloc_private_namespace_prefix)], [${JEMALLOC_PRIVATE_NAMESPACE}##string_that_no_one_should_want_to_use_as_a_jemalloc_private_namespace_prefix]) -else - AC_DEFINE_UNQUOTED([JEMALLOC_N(string_that_no_one_should_want_to_use_as_a_jemalloc_private_namespace_prefix)], [string_that_no_one_should_want_to_use_as_a_jemalloc_private_namespace_prefix]) -fi +AC_DEFINE_UNQUOTED([JEMALLOC_PRIVATE_NAMESPACE], [$JEMALLOC_PRIVATE_NAMESPACE]) +private_namespace="$JEMALLOC_PRIVATE_NAMESPACE" +AC_SUBST([private_namespace]) dnl Do not add suffix to installed files by default. AC_ARG_WITH([install_suffix], @@ -511,37 +508,64 @@ AC_ARG_WITH([install_suffix], install_suffix="$INSTALL_SUFFIX" AC_SUBST([install_suffix]) +dnl Substitute @je_@ in jemalloc_protos.h.in, primarily to make generation of +dnl jemalloc_protos_jet.h easy. +je_="je_" +AC_SUBST([je_]) + cfgoutputs_in="${srcroot}Makefile.in" cfgoutputs_in="${cfgoutputs_in} ${srcroot}doc/html.xsl.in" cfgoutputs_in="${cfgoutputs_in} ${srcroot}doc/manpages.xsl.in" cfgoutputs_in="${cfgoutputs_in} ${srcroot}doc/jemalloc.xml.in" -cfgoutputs_in="${cfgoutputs_in} ${srcroot}include/jemalloc/jemalloc.h.in" +cfgoutputs_in="${cfgoutputs_in} ${srcroot}include/jemalloc/jemalloc_macros.h.in" +cfgoutputs_in="${cfgoutputs_in} ${srcroot}include/jemalloc/jemalloc_protos.h.in" +cfgoutputs_in="${cfgoutputs_in} ${srcroot}include/jemalloc/jemalloc_mangle.h.in" cfgoutputs_in="${cfgoutputs_in} ${srcroot}include/jemalloc/internal/jemalloc_internal.h.in" -cfgoutputs_in="${cfgoutputs_in} ${srcroot}test/jemalloc_test.h.in" +cfgoutputs_in="${cfgoutputs_in} ${srcroot}test/test.sh.in" +cfgoutputs_in="${cfgoutputs_in} ${srcroot}test/include/test/jemalloc_test.h.in" cfgoutputs_out="Makefile" cfgoutputs_out="${cfgoutputs_out} doc/html.xsl" cfgoutputs_out="${cfgoutputs_out} doc/manpages.xsl" -cfgoutputs_out="${cfgoutputs_out} doc/jemalloc${install_suffix}.xml" -cfgoutputs_out="${cfgoutputs_out} include/jemalloc/jemalloc${install_suffix}.h" +cfgoutputs_out="${cfgoutputs_out} doc/jemalloc.xml" +cfgoutputs_out="${cfgoutputs_out} include/jemalloc/jemalloc_macros.h" +cfgoutputs_out="${cfgoutputs_out} include/jemalloc/jemalloc_protos.h" +cfgoutputs_out="${cfgoutputs_out} include/jemalloc/jemalloc_mangle.h" cfgoutputs_out="${cfgoutputs_out} include/jemalloc/internal/jemalloc_internal.h" -cfgoutputs_out="${cfgoutputs_out} test/jemalloc_test.h" +cfgoutputs_out="${cfgoutputs_out} test/test.sh" +cfgoutputs_out="${cfgoutputs_out} test/include/test/jemalloc_test.h" cfgoutputs_tup="Makefile" cfgoutputs_tup="${cfgoutputs_tup} doc/html.xsl:doc/html.xsl.in" cfgoutputs_tup="${cfgoutputs_tup} doc/manpages.xsl:doc/manpages.xsl.in" -cfgoutputs_tup="${cfgoutputs_tup} doc/jemalloc${install_suffix}.xml:doc/jemalloc.xml.in" -cfgoutputs_tup="${cfgoutputs_tup} include/jemalloc/jemalloc${install_suffix}.h:include/jemalloc/jemalloc.h.in" +cfgoutputs_tup="${cfgoutputs_tup} doc/jemalloc.xml:doc/jemalloc.xml.in" +cfgoutputs_tup="${cfgoutputs_tup} include/jemalloc/jemalloc_macros.h:include/jemalloc/jemalloc_macros.h.in" +cfgoutputs_tup="${cfgoutputs_tup} include/jemalloc/jemalloc_protos.h:include/jemalloc/jemalloc_protos.h.in" +cfgoutputs_tup="${cfgoutputs_tup} include/jemalloc/jemalloc_mangle.h:include/jemalloc/jemalloc_mangle.h.in" cfgoutputs_tup="${cfgoutputs_tup} include/jemalloc/internal/jemalloc_internal.h" -cfgoutputs_tup="${cfgoutputs_tup} test/jemalloc_test.h:test/jemalloc_test.h.in" +cfgoutputs_tup="${cfgoutputs_tup} test/test.sh:test/test.sh.in" +cfgoutputs_tup="${cfgoutputs_tup} test/include/test/jemalloc_test.h:test/include/test/jemalloc_test.h.in" cfghdrs_in="${srcroot}include/jemalloc/jemalloc_defs.h.in" +cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/internal/jemalloc_internal_defs.h.in" +cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/internal/private_namespace.sh" +cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/internal/private_unnamespace.sh" +cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/internal/public_namespace.sh" +cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/internal/public_unnamespace.sh" cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/internal/size_classes.sh" -cfghdrs_out="include/jemalloc/jemalloc_defs${install_suffix}.h" +cfghdrs_out="include/jemalloc/jemalloc_defs.h" +cfghdrs_out="${cfghdrs_out} include/jemalloc/jemalloc${install_suffix}.h" +cfghdrs_out="${cfghdrs_out} include/jemalloc/jemalloc_protos_jet.h" +cfghdrs_out="${cfghdrs_out} include/jemalloc/internal/jemalloc_internal_defs.h" +cfghdrs_out="${cfghdrs_out} include/jemalloc/internal/private_namespace.h" +cfghdrs_out="${cfghdrs_out} include/jemalloc/internal/private_unnamespace.h" +cfghdrs_out="${cfghdrs_out} include/jemalloc/internal/public_namespace.h" +cfghdrs_out="${cfghdrs_out} include/jemalloc/internal/public_unnamespace.h" cfghdrs_out="${cfghdrs_out} include/jemalloc/internal/size_classes.h" -cfghdrs_tup="include/jemalloc/jemalloc_defs${install_suffix}.h:include/jemalloc/jemalloc_defs.h.in" +cfghdrs_tup="include/jemalloc/jemalloc_defs.h:include/jemalloc/jemalloc_defs.h.in" +cfghdrs_tup="${cfghdrs_tup} include/jemalloc/internal/jemalloc_internal_defs.h:${srcroot}include/jemalloc/internal/jemalloc_internal_defs.h.in" dnl Do not silence irrelevant compiler warnings by default, since enabling this dnl option incurs a performance penalty. @@ -1277,10 +1301,34 @@ dnl ============================================================================ dnl Check for typedefs, structures, and compiler characteristics. AC_HEADER_STDBOOL +AC_CONFIG_COMMANDS([include/jemalloc/internal/private_namespace.h], [ + mkdir -p "${objroot}include/jemalloc/internal" + "${srcdir}/include/jemalloc/internal/private_namespace.sh" "${srcdir}/include/jemalloc/internal/private_symbols.txt" > "${objroot}include/jemalloc/internal/private_namespace.h" +]) +AC_CONFIG_COMMANDS([include/jemalloc/internal/private_unnamespace.h], [ + mkdir -p "${objroot}include/jemalloc/internal" + "${srcdir}/include/jemalloc/internal/private_unnamespace.sh" "${srcdir}/include/jemalloc/internal/private_symbols.txt" > "${objroot}include/jemalloc/internal/private_unnamespace.h" +]) +AC_CONFIG_COMMANDS([include/jemalloc/internal/public_namespace.h], [ + mkdir -p "${objroot}include/jemalloc/internal" + "${srcdir}/include/jemalloc/internal/public_namespace.sh" "${srcdir}/include/jemalloc/internal/public_symbols.txt" > "${objroot}include/jemalloc/internal/public_namespace.h" +]) +AC_CONFIG_COMMANDS([include/jemalloc/internal/public_unnamespace.h], [ + mkdir -p "${objroot}include/jemalloc/internal" + "${srcdir}/include/jemalloc/internal/public_unnamespace.sh" "${srcdir}/include/jemalloc/internal/public_symbols.txt" > "${objroot}include/jemalloc/internal/public_unnamespace.h" +]) AC_CONFIG_COMMANDS([include/jemalloc/internal/size_classes.h], [ - mkdir -p "include/jemalloc/internal" + mkdir -p "${objroot}include/jemalloc/internal" "${srcdir}/include/jemalloc/internal/size_classes.sh" > "${objroot}include/jemalloc/internal/size_classes.h" ]) +AC_CONFIG_COMMANDS([include/jemalloc/jemalloc_protos_jet.h], [ + mkdir -p "${objroot}include/jemalloc" + cat "${srcdir}/include/jemalloc/jemalloc_protos.h.in" | sed -e 's/@je_@/jet_/g' > "${objroot}include/jemalloc/jemalloc_protos_jet.h" +]) +AC_CONFIG_COMMANDS([include/jemalloc/jemalloc.h], [ + mkdir -p "${objroot}include/jemalloc" + "${srcdir}/include/jemalloc/jemalloc.sh" "${objroot}" > "${objroot}include/jemalloc/jemalloc${install_suffix}.h" +]) dnl Process .in files. AC_SUBST([cfghdrs_in]) diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 53c135c2..e6303103 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -54,8 +54,7 @@ typedef intptr_t ssize_t; #endif #include -#define JEMALLOC_NO_DEMANGLE -#include "../jemalloc@install_suffix@.h" +#include "jemalloc_internal_defs.h" #ifdef JEMALLOC_UTRACE #include @@ -66,6 +65,16 @@ typedef intptr_t ssize_t; #include #endif +#define JEMALLOC_NO_DEMANGLE +#ifdef JEMALLOC_JET +# define JEMALLOC_N(n) jet_##n +# include "jemalloc/internal/public_namespace.h" +# define JEMALLOC_NO_RENAME +# include "../jemalloc@install_suffix@.h" +#else +# define JEMALLOC_N(n) @private_namespace@##n +# include "../jemalloc@install_suffix@.h" +#endif #include "jemalloc/internal/private_namespace.h" #ifdef JEMALLOC_CC_SILENCE @@ -522,10 +531,10 @@ typedef struct { uint64_t deallocated; } thread_allocated_t; /* - * The JEMALLOC_CONCAT() wrapper is necessary to pass {0, 0} via a cpp macro + * The JEMALLOC_ARG_CONCAT() wrapper is necessary to pass {0, 0} via a cpp macro * argument. */ -#define THREAD_ALLOCATED_INITIALIZER JEMALLOC_CONCAT({0, 0}) +#define THREAD_ALLOCATED_INITIALIZER JEMALLOC_ARG_CONCAT({0, 0}) #undef JEMALLOC_H_STRUCTS /******************************************************************************/ diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in new file mode 100644 index 00000000..36826d85 --- /dev/null +++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in @@ -0,0 +1,202 @@ +/* + * If JEMALLOC_PREFIX is defined via --with-jemalloc-prefix, it will cause all + * public APIs to be prefixed. This makes it possible, with some care, to use + * multiple allocators simultaneously. + */ +#undef JEMALLOC_PREFIX +#undef JEMALLOC_CPREFIX + +/* + * JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs. + * For shared libraries, symbol visibility mechanisms prevent these symbols + * from being exported, but for static libraries, naming collisions are a real + * possibility. + */ +#undef JEMALLOC_PRIVATE_NAMESPACE + +/* + * Hyper-threaded CPUs may need a special instruction inside spin loops in + * order to yield to another virtual CPU. + */ +#undef CPU_SPINWAIT + +/* Defined if the equivalent of FreeBSD's atomic(9) functions are available. */ +#undef JEMALLOC_ATOMIC9 + +/* + * Defined if OSAtomic*() functions are available, as provided by Darwin, and + * documented in the atomic(3) manual page. + */ +#undef JEMALLOC_OSATOMIC + +/* + * Defined if __sync_add_and_fetch(uint32_t *, uint32_t) and + * __sync_sub_and_fetch(uint32_t *, uint32_t) are available, despite + * __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 not being defined (which means the + * functions are defined in libgcc instead of being inlines) + */ +#undef JE_FORCE_SYNC_COMPARE_AND_SWAP_4 + +/* + * Defined if __sync_add_and_fetch(uint64_t *, uint64_t) and + * __sync_sub_and_fetch(uint64_t *, uint64_t) are available, despite + * __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 not being defined (which means the + * functions are defined in libgcc instead of being inlines) + */ +#undef JE_FORCE_SYNC_COMPARE_AND_SWAP_8 + +/* + * Defined if OSSpin*() functions are available, as provided by Darwin, and + * documented in the spinlock(3) manual page. + */ +#undef JEMALLOC_OSSPIN + +/* + * Defined if _malloc_thread_cleanup() exists. At least in the case of + * FreeBSD, pthread_key_create() allocates, which if used during malloc + * bootstrapping will cause recursion into the pthreads library. Therefore, if + * _malloc_thread_cleanup() exists, use it as the basis for thread cleanup in + * malloc_tsd. + */ +#undef JEMALLOC_MALLOC_THREAD_CLEANUP + +/* + * Defined if threaded initialization is known to be safe on this platform. + * Among other things, it must be possible to initialize a mutex without + * triggering allocation in order for threaded allocation to be safe. + */ +#undef JEMALLOC_THREADED_INIT + +/* + * Defined if the pthreads implementation defines + * _pthread_mutex_init_calloc_cb(), in which case the function is used in order + * to avoid recursive allocation during mutex initialization. + */ +#undef JEMALLOC_MUTEX_INIT_CB + +/* Defined if sbrk() is supported. */ +#undef JEMALLOC_HAVE_SBRK + +/* Non-empty if the tls_model attribute is supported. */ +#undef JEMALLOC_TLS_MODEL + +/* JEMALLOC_CC_SILENCE enables code that silences unuseful compiler warnings. */ +#undef JEMALLOC_CC_SILENCE + +/* + * JEMALLOC_DEBUG enables assertions and other sanity checks, and disables + * inline functions. + */ +#undef JEMALLOC_DEBUG + +/* JEMALLOC_STATS enables statistics calculation. */ +#undef JEMALLOC_STATS + +/* JEMALLOC_PROF enables allocation profiling. */ +#undef JEMALLOC_PROF + +/* Use libunwind for profile backtracing if defined. */ +#undef JEMALLOC_PROF_LIBUNWIND + +/* Use libgcc for profile backtracing if defined. */ +#undef JEMALLOC_PROF_LIBGCC + +/* Use gcc intrinsics for profile backtracing if defined. */ +#undef JEMALLOC_PROF_GCC + +/* + * JEMALLOC_TCACHE enables a thread-specific caching layer for small objects. + * This makes it possible to allocate/deallocate objects without any locking + * when the cache is in the steady state. + */ +#undef JEMALLOC_TCACHE + +/* + * JEMALLOC_DSS enables use of sbrk(2) to allocate chunks from the data storage + * segment (DSS). + */ +#undef JEMALLOC_DSS + +/* Support memory filling (junk/zero/quarantine/redzone). */ +#undef JEMALLOC_FILL + +/* Support utrace(2)-based tracing. */ +#undef JEMALLOC_UTRACE + +/* Support Valgrind. */ +#undef JEMALLOC_VALGRIND + +/* Support optional abort() on OOM. */ +#undef JEMALLOC_XMALLOC + +/* Support lazy locking (avoid locking unless a second thread is launched). */ +#undef JEMALLOC_LAZY_LOCK + +/* One page is 2^STATIC_PAGE_SHIFT bytes. */ +#undef STATIC_PAGE_SHIFT + +/* + * If defined, use munmap() to unmap freed chunks, rather than storing them for + * later reuse. This is disabled by default on Linux because common sequences + * of mmap()/munmap() calls will cause virtual memory map holes. + */ +#undef JEMALLOC_MUNMAP + +/* + * If defined, use mremap(...MREMAP_FIXED...) for huge realloc(). This is + * disabled by default because it is Linux-specific and it will cause virtual + * memory map holes, much like munmap(2) does. + */ +#undef JEMALLOC_MREMAP + +/* TLS is used to map arenas and magazine caches to threads. */ +#undef JEMALLOC_TLS + +/* + * JEMALLOC_IVSALLOC enables ivsalloc(), which verifies that pointers reside + * within jemalloc-owned chunks before dereferencing them. + */ +#undef JEMALLOC_IVSALLOC + +/* + * Define overrides for non-standard allocator-related functions if they + * are present on the system. + */ +#undef JEMALLOC_OVERRIDE_MEMALIGN +#undef JEMALLOC_OVERRIDE_VALLOC + +/* + * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings. + */ +#undef JEMALLOC_ZONE +#undef JEMALLOC_ZONE_VERSION + +/* + * Methods for purging unused pages differ between operating systems. + * + * madvise(..., MADV_DONTNEED) : On Linux, this immediately discards pages, + * such that new pages will be demand-zeroed if + * the address region is later touched. + * madvise(..., MADV_FREE) : On FreeBSD and Darwin, this marks pages as being + * unused, such that they will be discarded rather + * than swapped out. + */ +#undef JEMALLOC_PURGE_MADVISE_DONTNEED +#undef JEMALLOC_PURGE_MADVISE_FREE + +/* + * Define if operating system has alloca.h header. + */ +#undef JEMALLOC_HAS_ALLOCA_H + +/* sizeof(void *) == 2^LG_SIZEOF_PTR. */ +#undef LG_SIZEOF_PTR + +/* sizeof(int) == 2^LG_SIZEOF_INT. */ +#undef LG_SIZEOF_INT + +/* sizeof(long) == 2^LG_SIZEOF_LONG. */ +#undef LG_SIZEOF_LONG + +/* sizeof(intmax_t) == 2^LG_SIZEOF_INTMAX_T. */ +#undef LG_SIZEOF_INTMAX_T diff --git a/include/jemalloc/internal/private_namespace.h b/include/jemalloc/internal/private_namespace.h deleted file mode 100644 index cdb0b0eb..00000000 --- a/include/jemalloc/internal/private_namespace.h +++ /dev/null @@ -1,392 +0,0 @@ -#define a0calloc JEMALLOC_N(a0calloc) -#define a0free JEMALLOC_N(a0free) -#define a0malloc JEMALLOC_N(a0malloc) -#define arena_alloc_junk_small JEMALLOC_N(arena_alloc_junk_small) -#define arena_bin_index JEMALLOC_N(arena_bin_index) -#define arena_bin_info JEMALLOC_N(arena_bin_info) -#define arena_boot JEMALLOC_N(arena_boot) -#define arena_dalloc JEMALLOC_N(arena_dalloc) -#define arena_dalloc_bin JEMALLOC_N(arena_dalloc_bin) -#define arena_dalloc_bin_locked JEMALLOC_N(arena_dalloc_bin_locked) -#define arena_dalloc_junk_small JEMALLOC_N(arena_dalloc_junk_small) -#define arena_dalloc_large JEMALLOC_N(arena_dalloc_large) -#define arena_dalloc_large_locked JEMALLOC_N(arena_dalloc_large_locked) -#define arena_dalloc_small JEMALLOC_N(arena_dalloc_small) -#define arena_dss_prec_get JEMALLOC_N(arena_dss_prec_get) -#define arena_dss_prec_set JEMALLOC_N(arena_dss_prec_set) -#define arena_malloc JEMALLOC_N(arena_malloc) -#define arena_malloc_large JEMALLOC_N(arena_malloc_large) -#define arena_malloc_small JEMALLOC_N(arena_malloc_small) -#define arena_mapbits_allocated_get JEMALLOC_N(arena_mapbits_allocated_get) -#define arena_mapbits_binind_get JEMALLOC_N(arena_mapbits_binind_get) -#define arena_mapbits_dirty_get JEMALLOC_N(arena_mapbits_dirty_get) -#define arena_mapbits_get JEMALLOC_N(arena_mapbits_get) -#define arena_mapbits_large_binind_set JEMALLOC_N(arena_mapbits_large_binind_set) -#define arena_mapbits_large_get JEMALLOC_N(arena_mapbits_large_get) -#define arena_mapbits_large_set JEMALLOC_N(arena_mapbits_large_set) -#define arena_mapbits_large_size_get JEMALLOC_N(arena_mapbits_large_size_get) -#define arena_mapbits_small_runind_get JEMALLOC_N(arena_mapbits_small_runind_get) -#define arena_mapbits_small_set JEMALLOC_N(arena_mapbits_small_set) -#define arena_mapbits_unallocated_set JEMALLOC_N(arena_mapbits_unallocated_set) -#define arena_mapbits_unallocated_size_get JEMALLOC_N(arena_mapbits_unallocated_size_get) -#define arena_mapbits_unallocated_size_set JEMALLOC_N(arena_mapbits_unallocated_size_set) -#define arena_mapbits_unzeroed_get JEMALLOC_N(arena_mapbits_unzeroed_get) -#define arena_mapbits_unzeroed_set JEMALLOC_N(arena_mapbits_unzeroed_set) -#define arena_mapbitsp_get JEMALLOC_N(arena_mapbitsp_get) -#define arena_mapbitsp_read JEMALLOC_N(arena_mapbitsp_read) -#define arena_mapbitsp_write JEMALLOC_N(arena_mapbitsp_write) -#define arena_mapp_get JEMALLOC_N(arena_mapp_get) -#define arena_maxclass JEMALLOC_N(arena_maxclass) -#define arena_new JEMALLOC_N(arena_new) -#define arena_palloc JEMALLOC_N(arena_palloc) -#define arena_postfork_child JEMALLOC_N(arena_postfork_child) -#define arena_postfork_parent JEMALLOC_N(arena_postfork_parent) -#define arena_prefork JEMALLOC_N(arena_prefork) -#define arena_prof_accum JEMALLOC_N(arena_prof_accum) -#define arena_prof_accum_impl JEMALLOC_N(arena_prof_accum_impl) -#define arena_prof_accum_locked JEMALLOC_N(arena_prof_accum_locked) -#define arena_prof_ctx_get JEMALLOC_N(arena_prof_ctx_get) -#define arena_prof_ctx_set JEMALLOC_N(arena_prof_ctx_set) -#define arena_prof_promoted JEMALLOC_N(arena_prof_promoted) -#define arena_ptr_small_binind_get JEMALLOC_N(arena_ptr_small_binind_get) -#define arena_purge_all JEMALLOC_N(arena_purge_all) -#define arena_ralloc JEMALLOC_N(arena_ralloc) -#define arena_ralloc_no_move JEMALLOC_N(arena_ralloc_no_move) -#define arena_run_regind JEMALLOC_N(arena_run_regind) -#define arena_salloc JEMALLOC_N(arena_salloc) -#define arena_stats_merge JEMALLOC_N(arena_stats_merge) -#define arena_tcache_fill_small JEMALLOC_N(arena_tcache_fill_small) -#define arenas JEMALLOC_N(arenas) -#define arenas_booted JEMALLOC_N(arenas_booted) -#define arenas_cleanup JEMALLOC_N(arenas_cleanup) -#define arenas_extend JEMALLOC_N(arenas_extend) -#define arenas_initialized JEMALLOC_N(arenas_initialized) -#define arenas_lock JEMALLOC_N(arenas_lock) -#define arenas_tls JEMALLOC_N(arenas_tls) -#define arenas_tsd JEMALLOC_N(arenas_tsd) -#define arenas_tsd_boot JEMALLOC_N(arenas_tsd_boot) -#define arenas_tsd_cleanup_wrapper JEMALLOC_N(arenas_tsd_cleanup_wrapper) -#define arenas_tsd_get JEMALLOC_N(arenas_tsd_get) -#define arenas_tsd_get_wrapper JEMALLOC_N(arenas_tsd_get_wrapper) -#define arenas_tsd_set JEMALLOC_N(arenas_tsd_set) -#define atomic_add_u JEMALLOC_N(atomic_add_u) -#define atomic_add_uint32 JEMALLOC_N(atomic_add_uint32) -#define atomic_add_uint64 JEMALLOC_N(atomic_add_uint64) -#define atomic_add_z JEMALLOC_N(atomic_add_z) -#define atomic_sub_u JEMALLOC_N(atomic_sub_u) -#define atomic_sub_uint32 JEMALLOC_N(atomic_sub_uint32) -#define atomic_sub_uint64 JEMALLOC_N(atomic_sub_uint64) -#define atomic_sub_z JEMALLOC_N(atomic_sub_z) -#define base_alloc JEMALLOC_N(base_alloc) -#define base_boot JEMALLOC_N(base_boot) -#define base_calloc JEMALLOC_N(base_calloc) -#define base_node_alloc JEMALLOC_N(base_node_alloc) -#define base_node_dealloc JEMALLOC_N(base_node_dealloc) -#define base_postfork_child JEMALLOC_N(base_postfork_child) -#define base_postfork_parent JEMALLOC_N(base_postfork_parent) -#define base_prefork JEMALLOC_N(base_prefork) -#define bitmap_full JEMALLOC_N(bitmap_full) -#define bitmap_get JEMALLOC_N(bitmap_get) -#define bitmap_info_init JEMALLOC_N(bitmap_info_init) -#define bitmap_info_ngroups JEMALLOC_N(bitmap_info_ngroups) -#define bitmap_init JEMALLOC_N(bitmap_init) -#define bitmap_set JEMALLOC_N(bitmap_set) -#define bitmap_sfu JEMALLOC_N(bitmap_sfu) -#define bitmap_size JEMALLOC_N(bitmap_size) -#define bitmap_unset JEMALLOC_N(bitmap_unset) -#define bt_init JEMALLOC_N(bt_init) -#define buferror JEMALLOC_N(buferror) -#define choose_arena JEMALLOC_N(choose_arena) -#define choose_arena_hard JEMALLOC_N(choose_arena_hard) -#define chunk_alloc JEMALLOC_N(chunk_alloc) -#define chunk_alloc_dss JEMALLOC_N(chunk_alloc_dss) -#define chunk_alloc_mmap JEMALLOC_N(chunk_alloc_mmap) -#define chunk_boot JEMALLOC_N(chunk_boot) -#define chunk_dealloc JEMALLOC_N(chunk_dealloc) -#define chunk_dealloc_mmap JEMALLOC_N(chunk_dealloc_mmap) -#define chunk_dss_boot JEMALLOC_N(chunk_dss_boot) -#define chunk_dss_postfork_child JEMALLOC_N(chunk_dss_postfork_child) -#define chunk_dss_postfork_parent JEMALLOC_N(chunk_dss_postfork_parent) -#define chunk_dss_prec_get JEMALLOC_N(chunk_dss_prec_get) -#define chunk_dss_prec_set JEMALLOC_N(chunk_dss_prec_set) -#define chunk_dss_prefork JEMALLOC_N(chunk_dss_prefork) -#define chunk_in_dss JEMALLOC_N(chunk_in_dss) -#define chunk_npages JEMALLOC_N(chunk_npages) -#define chunk_postfork_child JEMALLOC_N(chunk_postfork_child) -#define chunk_postfork_parent JEMALLOC_N(chunk_postfork_parent) -#define chunk_prefork JEMALLOC_N(chunk_prefork) -#define chunk_unmap JEMALLOC_N(chunk_unmap) -#define chunks_mtx JEMALLOC_N(chunks_mtx) -#define chunks_rtree JEMALLOC_N(chunks_rtree) -#define chunksize JEMALLOC_N(chunksize) -#define chunksize_mask JEMALLOC_N(chunksize_mask) -#define ckh_bucket_search JEMALLOC_N(ckh_bucket_search) -#define ckh_count JEMALLOC_N(ckh_count) -#define ckh_delete JEMALLOC_N(ckh_delete) -#define ckh_evict_reloc_insert JEMALLOC_N(ckh_evict_reloc_insert) -#define ckh_insert JEMALLOC_N(ckh_insert) -#define ckh_isearch JEMALLOC_N(ckh_isearch) -#define ckh_iter JEMALLOC_N(ckh_iter) -#define ckh_new JEMALLOC_N(ckh_new) -#define ckh_pointer_hash JEMALLOC_N(ckh_pointer_hash) -#define ckh_pointer_keycomp JEMALLOC_N(ckh_pointer_keycomp) -#define ckh_rebuild JEMALLOC_N(ckh_rebuild) -#define ckh_remove JEMALLOC_N(ckh_remove) -#define ckh_search JEMALLOC_N(ckh_search) -#define ckh_string_hash JEMALLOC_N(ckh_string_hash) -#define ckh_string_keycomp JEMALLOC_N(ckh_string_keycomp) -#define ckh_try_bucket_insert JEMALLOC_N(ckh_try_bucket_insert) -#define ckh_try_insert JEMALLOC_N(ckh_try_insert) -#define ctl_boot JEMALLOC_N(ctl_boot) -#define ctl_bymib JEMALLOC_N(ctl_bymib) -#define ctl_byname JEMALLOC_N(ctl_byname) -#define ctl_nametomib JEMALLOC_N(ctl_nametomib) -#define ctl_postfork_child JEMALLOC_N(ctl_postfork_child) -#define ctl_postfork_parent JEMALLOC_N(ctl_postfork_parent) -#define ctl_prefork JEMALLOC_N(ctl_prefork) -#define dss_prec_names JEMALLOC_N(dss_prec_names) -#define extent_tree_ad_first JEMALLOC_N(extent_tree_ad_first) -#define extent_tree_ad_insert JEMALLOC_N(extent_tree_ad_insert) -#define extent_tree_ad_iter JEMALLOC_N(extent_tree_ad_iter) -#define extent_tree_ad_iter_recurse JEMALLOC_N(extent_tree_ad_iter_recurse) -#define extent_tree_ad_iter_start JEMALLOC_N(extent_tree_ad_iter_start) -#define extent_tree_ad_last JEMALLOC_N(extent_tree_ad_last) -#define extent_tree_ad_new JEMALLOC_N(extent_tree_ad_new) -#define extent_tree_ad_next JEMALLOC_N(extent_tree_ad_next) -#define extent_tree_ad_nsearch JEMALLOC_N(extent_tree_ad_nsearch) -#define extent_tree_ad_prev JEMALLOC_N(extent_tree_ad_prev) -#define extent_tree_ad_psearch JEMALLOC_N(extent_tree_ad_psearch) -#define extent_tree_ad_remove JEMALLOC_N(extent_tree_ad_remove) -#define extent_tree_ad_reverse_iter JEMALLOC_N(extent_tree_ad_reverse_iter) -#define extent_tree_ad_reverse_iter_recurse JEMALLOC_N(extent_tree_ad_reverse_iter_recurse) -#define extent_tree_ad_reverse_iter_start JEMALLOC_N(extent_tree_ad_reverse_iter_start) -#define extent_tree_ad_search JEMALLOC_N(extent_tree_ad_search) -#define extent_tree_szad_first JEMALLOC_N(extent_tree_szad_first) -#define extent_tree_szad_insert JEMALLOC_N(extent_tree_szad_insert) -#define extent_tree_szad_iter JEMALLOC_N(extent_tree_szad_iter) -#define extent_tree_szad_iter_recurse JEMALLOC_N(extent_tree_szad_iter_recurse) -#define extent_tree_szad_iter_start JEMALLOC_N(extent_tree_szad_iter_start) -#define extent_tree_szad_last JEMALLOC_N(extent_tree_szad_last) -#define extent_tree_szad_new JEMALLOC_N(extent_tree_szad_new) -#define extent_tree_szad_next JEMALLOC_N(extent_tree_szad_next) -#define extent_tree_szad_nsearch JEMALLOC_N(extent_tree_szad_nsearch) -#define extent_tree_szad_prev JEMALLOC_N(extent_tree_szad_prev) -#define extent_tree_szad_psearch JEMALLOC_N(extent_tree_szad_psearch) -#define extent_tree_szad_remove JEMALLOC_N(extent_tree_szad_remove) -#define extent_tree_szad_reverse_iter JEMALLOC_N(extent_tree_szad_reverse_iter) -#define extent_tree_szad_reverse_iter_recurse JEMALLOC_N(extent_tree_szad_reverse_iter_recurse) -#define extent_tree_szad_reverse_iter_start JEMALLOC_N(extent_tree_szad_reverse_iter_start) -#define extent_tree_szad_search JEMALLOC_N(extent_tree_szad_search) -#define get_errno JEMALLOC_N(get_errno) -#define hash JEMALLOC_N(hash) -#define hash_fmix_32 JEMALLOC_N(hash_fmix_32) -#define hash_fmix_64 JEMALLOC_N(hash_fmix_64) -#define hash_get_block_32 JEMALLOC_N(hash_get_block_32) -#define hash_get_block_64 JEMALLOC_N(hash_get_block_64) -#define hash_rotl_32 JEMALLOC_N(hash_rotl_32) -#define hash_rotl_64 JEMALLOC_N(hash_rotl_64) -#define hash_x64_128 JEMALLOC_N(hash_x64_128) -#define hash_x86_128 JEMALLOC_N(hash_x86_128) -#define hash_x86_32 JEMALLOC_N(hash_x86_32) -#define huge_allocated JEMALLOC_N(huge_allocated) -#define huge_boot JEMALLOC_N(huge_boot) -#define huge_dalloc JEMALLOC_N(huge_dalloc) -#define huge_malloc JEMALLOC_N(huge_malloc) -#define huge_mtx JEMALLOC_N(huge_mtx) -#define huge_ndalloc JEMALLOC_N(huge_ndalloc) -#define huge_nmalloc JEMALLOC_N(huge_nmalloc) -#define huge_palloc JEMALLOC_N(huge_palloc) -#define huge_postfork_child JEMALLOC_N(huge_postfork_child) -#define huge_postfork_parent JEMALLOC_N(huge_postfork_parent) -#define huge_prefork JEMALLOC_N(huge_prefork) -#define huge_prof_ctx_get JEMALLOC_N(huge_prof_ctx_get) -#define huge_prof_ctx_set JEMALLOC_N(huge_prof_ctx_set) -#define huge_ralloc JEMALLOC_N(huge_ralloc) -#define huge_ralloc_no_move JEMALLOC_N(huge_ralloc_no_move) -#define huge_salloc JEMALLOC_N(huge_salloc) -#define iallocm JEMALLOC_N(iallocm) -#define icalloc JEMALLOC_N(icalloc) -#define icallocx JEMALLOC_N(icallocx) -#define idalloc JEMALLOC_N(idalloc) -#define idallocx JEMALLOC_N(idallocx) -#define imalloc JEMALLOC_N(imalloc) -#define imallocx JEMALLOC_N(imallocx) -#define ipalloc JEMALLOC_N(ipalloc) -#define ipallocx JEMALLOC_N(ipallocx) -#define iqalloc JEMALLOC_N(iqalloc) -#define iqallocx JEMALLOC_N(iqallocx) -#define iralloc JEMALLOC_N(iralloc) -#define irallocx JEMALLOC_N(irallocx) -#define isalloc JEMALLOC_N(isalloc) -#define isthreaded JEMALLOC_N(isthreaded) -#define ivsalloc JEMALLOC_N(ivsalloc) -#define jemalloc_postfork_child JEMALLOC_N(jemalloc_postfork_child) -#define jemalloc_postfork_parent JEMALLOC_N(jemalloc_postfork_parent) -#define jemalloc_prefork JEMALLOC_N(jemalloc_prefork) -#define malloc_cprintf JEMALLOC_N(malloc_cprintf) -#define malloc_mutex_init JEMALLOC_N(malloc_mutex_init) -#define malloc_mutex_lock JEMALLOC_N(malloc_mutex_lock) -#define malloc_mutex_postfork_child JEMALLOC_N(malloc_mutex_postfork_child) -#define malloc_mutex_postfork_parent JEMALLOC_N(malloc_mutex_postfork_parent) -#define malloc_mutex_prefork JEMALLOC_N(malloc_mutex_prefork) -#define malloc_mutex_unlock JEMALLOC_N(malloc_mutex_unlock) -#define malloc_printf JEMALLOC_N(malloc_printf) -#define malloc_snprintf JEMALLOC_N(malloc_snprintf) -#define malloc_strtoumax JEMALLOC_N(malloc_strtoumax) -#define malloc_tsd_boot JEMALLOC_N(malloc_tsd_boot) -#define malloc_tsd_cleanup_register JEMALLOC_N(malloc_tsd_cleanup_register) -#define malloc_tsd_dalloc JEMALLOC_N(malloc_tsd_dalloc) -#define malloc_tsd_malloc JEMALLOC_N(malloc_tsd_malloc) -#define malloc_tsd_no_cleanup JEMALLOC_N(malloc_tsd_no_cleanup) -#define malloc_vcprintf JEMALLOC_N(malloc_vcprintf) -#define malloc_vsnprintf JEMALLOC_N(malloc_vsnprintf) -#define malloc_write JEMALLOC_N(malloc_write) -#define map_bias JEMALLOC_N(map_bias) -#define mb_write JEMALLOC_N(mb_write) -#define mutex_boot JEMALLOC_N(mutex_boot) -#define narenas_auto JEMALLOC_N(narenas_auto) -#define narenas_total JEMALLOC_N(narenas_total) -#define narenas_total_get JEMALLOC_N(narenas_total_get) -#define ncpus JEMALLOC_N(ncpus) -#define nhbins JEMALLOC_N(nhbins) -#define opt_abort JEMALLOC_N(opt_abort) -#define opt_junk JEMALLOC_N(opt_junk) -#define opt_lg_chunk JEMALLOC_N(opt_lg_chunk) -#define opt_lg_dirty_mult JEMALLOC_N(opt_lg_dirty_mult) -#define opt_lg_prof_interval JEMALLOC_N(opt_lg_prof_interval) -#define opt_lg_prof_sample JEMALLOC_N(opt_lg_prof_sample) -#define opt_lg_tcache_max JEMALLOC_N(opt_lg_tcache_max) -#define opt_narenas JEMALLOC_N(opt_narenas) -#define opt_prof JEMALLOC_N(opt_prof) -#define opt_prof_accum JEMALLOC_N(opt_prof_accum) -#define opt_prof_active JEMALLOC_N(opt_prof_active) -#define opt_prof_final JEMALLOC_N(opt_prof_final) -#define opt_prof_gdump JEMALLOC_N(opt_prof_gdump) -#define opt_prof_leak JEMALLOC_N(opt_prof_leak) -#define opt_prof_prefix JEMALLOC_N(opt_prof_prefix) -#define opt_quarantine JEMALLOC_N(opt_quarantine) -#define opt_redzone JEMALLOC_N(opt_redzone) -#define opt_stats_print JEMALLOC_N(opt_stats_print) -#define opt_tcache JEMALLOC_N(opt_tcache) -#define opt_utrace JEMALLOC_N(opt_utrace) -#define opt_valgrind JEMALLOC_N(opt_valgrind) -#define opt_xmalloc JEMALLOC_N(opt_xmalloc) -#define opt_zero JEMALLOC_N(opt_zero) -#define p2rz JEMALLOC_N(p2rz) -#define pages_purge JEMALLOC_N(pages_purge) -#define pow2_ceil JEMALLOC_N(pow2_ceil) -#define prof_backtrace JEMALLOC_N(prof_backtrace) -#define prof_boot0 JEMALLOC_N(prof_boot0) -#define prof_boot1 JEMALLOC_N(prof_boot1) -#define prof_boot2 JEMALLOC_N(prof_boot2) -#define prof_ctx_get JEMALLOC_N(prof_ctx_get) -#define prof_ctx_set JEMALLOC_N(prof_ctx_set) -#define prof_free JEMALLOC_N(prof_free) -#define prof_gdump JEMALLOC_N(prof_gdump) -#define prof_idump JEMALLOC_N(prof_idump) -#define prof_interval JEMALLOC_N(prof_interval) -#define prof_lookup JEMALLOC_N(prof_lookup) -#define prof_malloc JEMALLOC_N(prof_malloc) -#define prof_mdump JEMALLOC_N(prof_mdump) -#define prof_postfork_child JEMALLOC_N(prof_postfork_child) -#define prof_postfork_parent JEMALLOC_N(prof_postfork_parent) -#define prof_prefork JEMALLOC_N(prof_prefork) -#define prof_promote JEMALLOC_N(prof_promote) -#define prof_realloc JEMALLOC_N(prof_realloc) -#define prof_sample_accum_update JEMALLOC_N(prof_sample_accum_update) -#define prof_sample_threshold_update JEMALLOC_N(prof_sample_threshold_update) -#define prof_tdata_booted JEMALLOC_N(prof_tdata_booted) -#define prof_tdata_cleanup JEMALLOC_N(prof_tdata_cleanup) -#define prof_tdata_get JEMALLOC_N(prof_tdata_get) -#define prof_tdata_init JEMALLOC_N(prof_tdata_init) -#define prof_tdata_initialized JEMALLOC_N(prof_tdata_initialized) -#define prof_tdata_tls JEMALLOC_N(prof_tdata_tls) -#define prof_tdata_tsd JEMALLOC_N(prof_tdata_tsd) -#define prof_tdata_tsd_boot JEMALLOC_N(prof_tdata_tsd_boot) -#define prof_tdata_tsd_cleanup_wrapper JEMALLOC_N(prof_tdata_tsd_cleanup_wrapper) -#define prof_tdata_tsd_get JEMALLOC_N(prof_tdata_tsd_get) -#define prof_tdata_tsd_get_wrapper JEMALLOC_N(prof_tdata_tsd_get_wrapper) -#define prof_tdata_tsd_set JEMALLOC_N(prof_tdata_tsd_set) -#define quarantine JEMALLOC_N(quarantine) -#define quarantine_alloc_hook JEMALLOC_N(quarantine_alloc_hook) -#define quarantine_boot JEMALLOC_N(quarantine_boot) -#define quarantine_booted JEMALLOC_N(quarantine_booted) -#define quarantine_cleanup JEMALLOC_N(quarantine_cleanup) -#define quarantine_init JEMALLOC_N(quarantine_init) -#define quarantine_tls JEMALLOC_N(quarantine_tls) -#define quarantine_tsd JEMALLOC_N(quarantine_tsd) -#define quarantine_tsd_boot JEMALLOC_N(quarantine_tsd_boot) -#define quarantine_tsd_cleanup_wrapper JEMALLOC_N(quarantine_tsd_cleanup_wrapper) -#define quarantine_tsd_get JEMALLOC_N(quarantine_tsd_get) -#define quarantine_tsd_get_wrapper JEMALLOC_N(quarantine_tsd_get_wrapper) -#define quarantine_tsd_set JEMALLOC_N(quarantine_tsd_set) -#define register_zone JEMALLOC_N(register_zone) -#define rtree_get JEMALLOC_N(rtree_get) -#define rtree_get_locked JEMALLOC_N(rtree_get_locked) -#define rtree_new JEMALLOC_N(rtree_new) -#define rtree_postfork_child JEMALLOC_N(rtree_postfork_child) -#define rtree_postfork_parent JEMALLOC_N(rtree_postfork_parent) -#define rtree_prefork JEMALLOC_N(rtree_prefork) -#define rtree_set JEMALLOC_N(rtree_set) -#define s2u JEMALLOC_N(s2u) -#define sa2u JEMALLOC_N(sa2u) -#define set_errno JEMALLOC_N(set_errno) -#define stats_cactive JEMALLOC_N(stats_cactive) -#define stats_cactive_add JEMALLOC_N(stats_cactive_add) -#define stats_cactive_get JEMALLOC_N(stats_cactive_get) -#define stats_cactive_sub JEMALLOC_N(stats_cactive_sub) -#define stats_chunks JEMALLOC_N(stats_chunks) -#define stats_print JEMALLOC_N(stats_print) -#define tcache_alloc_easy JEMALLOC_N(tcache_alloc_easy) -#define tcache_alloc_large JEMALLOC_N(tcache_alloc_large) -#define tcache_alloc_small JEMALLOC_N(tcache_alloc_small) -#define tcache_alloc_small_hard JEMALLOC_N(tcache_alloc_small_hard) -#define tcache_arena_associate JEMALLOC_N(tcache_arena_associate) -#define tcache_arena_dissociate JEMALLOC_N(tcache_arena_dissociate) -#define tcache_bin_flush_large JEMALLOC_N(tcache_bin_flush_large) -#define tcache_bin_flush_small JEMALLOC_N(tcache_bin_flush_small) -#define tcache_bin_info JEMALLOC_N(tcache_bin_info) -#define tcache_boot0 JEMALLOC_N(tcache_boot0) -#define tcache_boot1 JEMALLOC_N(tcache_boot1) -#define tcache_booted JEMALLOC_N(tcache_booted) -#define tcache_create JEMALLOC_N(tcache_create) -#define tcache_dalloc_large JEMALLOC_N(tcache_dalloc_large) -#define tcache_dalloc_small JEMALLOC_N(tcache_dalloc_small) -#define tcache_destroy JEMALLOC_N(tcache_destroy) -#define tcache_enabled_booted JEMALLOC_N(tcache_enabled_booted) -#define tcache_enabled_get JEMALLOC_N(tcache_enabled_get) -#define tcache_enabled_initialized JEMALLOC_N(tcache_enabled_initialized) -#define tcache_enabled_set JEMALLOC_N(tcache_enabled_set) -#define tcache_enabled_tls JEMALLOC_N(tcache_enabled_tls) -#define tcache_enabled_tsd JEMALLOC_N(tcache_enabled_tsd) -#define tcache_enabled_tsd_boot JEMALLOC_N(tcache_enabled_tsd_boot) -#define tcache_enabled_tsd_cleanup_wrapper JEMALLOC_N(tcache_enabled_tsd_cleanup_wrapper) -#define tcache_enabled_tsd_get JEMALLOC_N(tcache_enabled_tsd_get) -#define tcache_enabled_tsd_get_wrapper JEMALLOC_N(tcache_enabled_tsd_get_wrapper) -#define tcache_enabled_tsd_set JEMALLOC_N(tcache_enabled_tsd_set) -#define tcache_event JEMALLOC_N(tcache_event) -#define tcache_event_hard JEMALLOC_N(tcache_event_hard) -#define tcache_flush JEMALLOC_N(tcache_flush) -#define tcache_get JEMALLOC_N(tcache_get) -#define tcache_initialized JEMALLOC_N(tcache_initialized) -#define tcache_maxclass JEMALLOC_N(tcache_maxclass) -#define tcache_salloc JEMALLOC_N(tcache_salloc) -#define tcache_stats_merge JEMALLOC_N(tcache_stats_merge) -#define tcache_thread_cleanup JEMALLOC_N(tcache_thread_cleanup) -#define tcache_tls JEMALLOC_N(tcache_tls) -#define tcache_tsd JEMALLOC_N(tcache_tsd) -#define tcache_tsd_boot JEMALLOC_N(tcache_tsd_boot) -#define tcache_tsd_cleanup_wrapper JEMALLOC_N(tcache_tsd_cleanup_wrapper) -#define tcache_tsd_get JEMALLOC_N(tcache_tsd_get) -#define tcache_tsd_get_wrapper JEMALLOC_N(tcache_tsd_get_wrapper) -#define tcache_tsd_set JEMALLOC_N(tcache_tsd_set) -#define thread_allocated_booted JEMALLOC_N(thread_allocated_booted) -#define thread_allocated_initialized JEMALLOC_N(thread_allocated_initialized) -#define thread_allocated_tls JEMALLOC_N(thread_allocated_tls) -#define thread_allocated_tsd JEMALLOC_N(thread_allocated_tsd) -#define thread_allocated_tsd_boot JEMALLOC_N(thread_allocated_tsd_boot) -#define thread_allocated_tsd_cleanup_wrapper JEMALLOC_N(thread_allocated_tsd_cleanup_wrapper) -#define thread_allocated_tsd_get JEMALLOC_N(thread_allocated_tsd_get) -#define thread_allocated_tsd_get_wrapper JEMALLOC_N(thread_allocated_tsd_get_wrapper) -#define thread_allocated_tsd_set JEMALLOC_N(thread_allocated_tsd_set) -#define u2rz JEMALLOC_N(u2rz) diff --git a/include/jemalloc/internal/private_namespace.sh b/include/jemalloc/internal/private_namespace.sh new file mode 100755 index 00000000..cd25eb30 --- /dev/null +++ b/include/jemalloc/internal/private_namespace.sh @@ -0,0 +1,5 @@ +#!/bin/sh + +for symbol in `cat $1` ; do + echo "#define ${symbol} JEMALLOC_N(${symbol})" +done diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt new file mode 100644 index 00000000..9fbc625f --- /dev/null +++ b/include/jemalloc/internal/private_symbols.txt @@ -0,0 +1,402 @@ +a0calloc +a0free +a0malloc +arena_alloc_junk_small +arena_bin_index +arena_bin_info +arena_boot +arena_dalloc +arena_dalloc_bin +arena_dalloc_bin_locked +arena_dalloc_junk_small +arena_dalloc_large +arena_dalloc_large_locked +arena_dalloc_small +arena_dss_prec_get +arena_dss_prec_set +arena_malloc +arena_malloc_large +arena_malloc_small +arena_mapbits_allocated_get +arena_mapbits_binind_get +arena_mapbits_dirty_get +arena_mapbits_get +arena_mapbits_large_binind_set +arena_mapbits_large_get +arena_mapbits_large_set +arena_mapbits_large_size_get +arena_mapbits_small_runind_get +arena_mapbits_small_set +arena_mapbits_unallocated_set +arena_mapbits_unallocated_size_get +arena_mapbits_unallocated_size_set +arena_mapbits_unzeroed_get +arena_mapbits_unzeroed_set +arena_mapbitsp_get +arena_mapbitsp_read +arena_mapbitsp_write +arena_mapp_get +arena_maxclass +arena_new +arena_palloc +arena_postfork_child +arena_postfork_parent +arena_prefork +arena_prof_accum +arena_prof_accum_impl +arena_prof_accum_locked +arena_prof_ctx_get +arena_prof_ctx_set +arena_prof_promoted +arena_ptr_small_binind_get +arena_purge_all +arena_ralloc +arena_ralloc_no_move +arena_run_regind +arena_salloc +arena_stats_merge +arena_tcache_fill_small +arenas +arenas_booted +arenas_cleanup +arenas_extend +arenas_initialized +arenas_lock +arenas_tls +arenas_tsd +arenas_tsd_boot +arenas_tsd_cleanup_wrapper +arenas_tsd_get +arenas_tsd_get_wrapper +arenas_tsd_init_head +arenas_tsd_set +atomic_add_u +atomic_add_uint32 +atomic_add_uint64 +atomic_add_z +atomic_sub_u +atomic_sub_uint32 +atomic_sub_uint64 +atomic_sub_z +base_alloc +base_boot +base_calloc +base_node_alloc +base_node_dealloc +base_postfork_child +base_postfork_parent +base_prefork +bitmap_full +bitmap_get +bitmap_info_init +bitmap_info_ngroups +bitmap_init +bitmap_set +bitmap_sfu +bitmap_size +bitmap_unset +bt_init +buferror +choose_arena +choose_arena_hard +chunk_alloc +chunk_alloc_dss +chunk_alloc_mmap +chunk_boot +chunk_dealloc +chunk_dealloc_mmap +chunk_dss_boot +chunk_dss_postfork_child +chunk_dss_postfork_parent +chunk_dss_prec_get +chunk_dss_prec_set +chunk_dss_prefork +chunk_in_dss +chunk_npages +chunk_postfork_child +chunk_postfork_parent +chunk_prefork +chunk_unmap +chunks_mtx +chunks_rtree +chunksize +chunksize_mask +ckh_bucket_search +ckh_count +ckh_delete +ckh_evict_reloc_insert +ckh_insert +ckh_isearch +ckh_iter +ckh_new +ckh_pointer_hash +ckh_pointer_keycomp +ckh_rebuild +ckh_remove +ckh_search +ckh_string_hash +ckh_string_keycomp +ckh_try_bucket_insert +ckh_try_insert +ctl_boot +ctl_bymib +ctl_byname +ctl_nametomib +ctl_postfork_child +ctl_postfork_parent +ctl_prefork +dss_prec_names +extent_tree_ad_first +extent_tree_ad_insert +extent_tree_ad_iter +extent_tree_ad_iter_recurse +extent_tree_ad_iter_start +extent_tree_ad_last +extent_tree_ad_new +extent_tree_ad_next +extent_tree_ad_nsearch +extent_tree_ad_prev +extent_tree_ad_psearch +extent_tree_ad_remove +extent_tree_ad_reverse_iter +extent_tree_ad_reverse_iter_recurse +extent_tree_ad_reverse_iter_start +extent_tree_ad_search +extent_tree_szad_first +extent_tree_szad_insert +extent_tree_szad_iter +extent_tree_szad_iter_recurse +extent_tree_szad_iter_start +extent_tree_szad_last +extent_tree_szad_new +extent_tree_szad_next +extent_tree_szad_nsearch +extent_tree_szad_prev +extent_tree_szad_psearch +extent_tree_szad_remove +extent_tree_szad_reverse_iter +extent_tree_szad_reverse_iter_recurse +extent_tree_szad_reverse_iter_start +extent_tree_szad_search +get_errno +hash +hash_fmix_32 +hash_fmix_64 +hash_get_block_32 +hash_get_block_64 +hash_rotl_32 +hash_rotl_64 +hash_x64_128 +hash_x86_128 +hash_x86_32 +huge_allocated +huge_boot +huge_dalloc +huge_malloc +huge_mtx +huge_ndalloc +huge_nmalloc +huge_palloc +huge_postfork_child +huge_postfork_parent +huge_prefork +huge_prof_ctx_get +huge_prof_ctx_set +huge_ralloc +huge_ralloc_no_move +huge_salloc +iallocm +icalloc +icallocx +idalloc +idallocx +imalloc +imallocx +ipalloc +ipallocx +iqalloc +iqallocx +iralloc +irallocx +isalloc +isthreaded +ivsalloc +jemalloc_postfork_child +jemalloc_postfork_parent +jemalloc_prefork +malloc_cprintf +malloc_mutex_init +malloc_mutex_lock +malloc_mutex_postfork_child +malloc_mutex_postfork_parent +malloc_mutex_prefork +malloc_mutex_unlock +malloc_printf +malloc_snprintf +malloc_strtoumax +malloc_tsd_boot +malloc_tsd_cleanup_register +malloc_tsd_dalloc +malloc_tsd_malloc +malloc_tsd_no_cleanup +malloc_vcprintf +malloc_vsnprintf +malloc_write +map_bias +mb_write +mutex_boot +narenas_auto +narenas_total +narenas_total_get +ncpus +nhbins +opt_abort +opt_dss +opt_junk +opt_lg_chunk +opt_lg_dirty_mult +opt_lg_prof_interval +opt_lg_prof_sample +opt_lg_tcache_max +opt_narenas +opt_prof +opt_prof_accum +opt_prof_active +opt_prof_final +opt_prof_gdump +opt_prof_leak +opt_prof_prefix +opt_quarantine +opt_redzone +opt_stats_print +opt_tcache +opt_utrace +opt_valgrind +opt_xmalloc +opt_zero +p2rz +pages_purge +pow2_ceil +prof_backtrace +prof_boot0 +prof_boot1 +prof_boot2 +prof_ctx_get +prof_ctx_set +prof_free +prof_gdump +prof_idump +prof_interval +prof_lookup +prof_malloc +prof_mdump +prof_postfork_child +prof_postfork_parent +prof_prefork +prof_promote +prof_realloc +prof_sample_accum_update +prof_sample_threshold_update +prof_tdata_booted +prof_tdata_cleanup +prof_tdata_get +prof_tdata_init +prof_tdata_initialized +prof_tdata_tls +prof_tdata_tsd +prof_tdata_tsd_boot +prof_tdata_tsd_cleanup_wrapper +prof_tdata_tsd_get +prof_tdata_tsd_get_wrapper +prof_tdata_tsd_init_head +prof_tdata_tsd_set +quarantine +quarantine_alloc_hook +quarantine_boot +quarantine_booted +quarantine_cleanup +quarantine_init +quarantine_tls +quarantine_tsd +quarantine_tsd_boot +quarantine_tsd_cleanup_wrapper +quarantine_tsd_get +quarantine_tsd_get_wrapper +quarantine_tsd_init_head +quarantine_tsd_set +register_zone +rtree_get +rtree_get_locked +rtree_new +rtree_postfork_child +rtree_postfork_parent +rtree_prefork +rtree_set +s2u +sa2u +set_errno +small_size2bin +stats_cactive +stats_cactive_add +stats_cactive_get +stats_cactive_sub +stats_chunks +stats_print +tcache_alloc_easy +tcache_alloc_large +tcache_alloc_small +tcache_alloc_small_hard +tcache_arena_associate +tcache_arena_dissociate +tcache_bin_flush_large +tcache_bin_flush_small +tcache_bin_info +tcache_boot0 +tcache_boot1 +tcache_booted +tcache_create +tcache_dalloc_large +tcache_dalloc_small +tcache_destroy +tcache_enabled_booted +tcache_enabled_get +tcache_enabled_initialized +tcache_enabled_set +tcache_enabled_tls +tcache_enabled_tsd +tcache_enabled_tsd_boot +tcache_enabled_tsd_cleanup_wrapper +tcache_enabled_tsd_get +tcache_enabled_tsd_get_wrapper +tcache_enabled_tsd_init_head +tcache_enabled_tsd_set +tcache_event +tcache_event_hard +tcache_flush +tcache_get +tcache_initialized +tcache_maxclass +tcache_salloc +tcache_stats_merge +tcache_thread_cleanup +tcache_tls +tcache_tsd +tcache_tsd_boot +tcache_tsd_cleanup_wrapper +tcache_tsd_get +tcache_tsd_get_wrapper +tcache_tsd_init_head +tcache_tsd_set +thread_allocated_booted +thread_allocated_initialized +thread_allocated_tls +thread_allocated_tsd +thread_allocated_tsd_boot +thread_allocated_tsd_cleanup_wrapper +thread_allocated_tsd_get +thread_allocated_tsd_get_wrapper +thread_allocated_tsd_init_head +thread_allocated_tsd_set +tsd_init_check_recursion +tsd_init_finish +u2rz diff --git a/include/jemalloc/internal/private_unnamespace.sh b/include/jemalloc/internal/private_unnamespace.sh new file mode 100755 index 00000000..23fed8e8 --- /dev/null +++ b/include/jemalloc/internal/private_unnamespace.sh @@ -0,0 +1,5 @@ +#!/bin/sh + +for symbol in `cat $1` ; do + echo "#undef ${symbol}" +done diff --git a/include/jemalloc/internal/public_namespace.sh b/include/jemalloc/internal/public_namespace.sh new file mode 100755 index 00000000..6d6de619 --- /dev/null +++ b/include/jemalloc/internal/public_namespace.sh @@ -0,0 +1,5 @@ +#!/bin/sh + +for symbol in `cat $1` ; do + echo "#define je_${symbol} JEMALLOC_N(${symbol})" +done diff --git a/include/jemalloc/internal/public_symbols.txt b/include/jemalloc/internal/public_symbols.txt new file mode 100644 index 00000000..7d097422 --- /dev/null +++ b/include/jemalloc/internal/public_symbols.txt @@ -0,0 +1,21 @@ +malloc_conf +malloc_message +malloc +calloc +posix_memalign +aligned_alloc +realloc +free +malloc_usable_size +malloc_stats_print +mallctl +mallctlnametomib +mallctlbymib +memalign +valloc +allocm +rallocm +sallocm +dallocm +nallocm + diff --git a/include/jemalloc/internal/public_unnamespace.sh b/include/jemalloc/internal/public_unnamespace.sh new file mode 100755 index 00000000..f42ff6e2 --- /dev/null +++ b/include/jemalloc/internal/public_unnamespace.sh @@ -0,0 +1,5 @@ +#!/bin/sh + +for symbol in `cat $1` ; do + echo "#undef je_${symbol}" +done diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h index 969e3e39..bf18d3c9 100644 --- a/include/jemalloc/internal/util.h +++ b/include/jemalloc/internal/util.h @@ -14,7 +14,7 @@ * Wrap a cpp argument that contains commas such that it isn't broken up into * multiple arguments. */ -#define JEMALLOC_CONCAT(...) __VA_ARGS__ +#define JEMALLOC_ARG_CONCAT(...) __VA_ARGS__ /* * Silence compiler warnings due to uninitialized values. This is used @@ -63,10 +63,12 @@ } while (0) #endif +#ifndef assert_not_implemented #define assert_not_implemented(e) do { \ if (config_debug && !(e)) \ not_implemented(); \ } while (0) +#endif /* Use to assert a particular configuration, e.g., cassert(config_debug). */ #define cassert(c) do { \ @@ -107,7 +109,6 @@ void malloc_printf(const char *format, ...) #ifndef JEMALLOC_ENABLE_INLINE size_t pow2_ceil(size_t x); -void malloc_write(const char *s); void set_errno(int errnum); int get_errno(void); #endif diff --git a/include/jemalloc/jemalloc.h.in b/include/jemalloc/jemalloc.h.in deleted file mode 100644 index 31b1304a..00000000 --- a/include/jemalloc/jemalloc.h.in +++ /dev/null @@ -1,157 +0,0 @@ -#ifndef JEMALLOC_H_ -#define JEMALLOC_H_ -#ifdef __cplusplus -extern "C" { -#endif - -#include -#include - -#define JEMALLOC_VERSION "@jemalloc_version@" -#define JEMALLOC_VERSION_MAJOR @jemalloc_version_major@ -#define JEMALLOC_VERSION_MINOR @jemalloc_version_minor@ -#define JEMALLOC_VERSION_BUGFIX @jemalloc_version_bugfix@ -#define JEMALLOC_VERSION_NREV @jemalloc_version_nrev@ -#define JEMALLOC_VERSION_GID "@jemalloc_version_gid@" - -#include "jemalloc_defs@install_suffix@.h" - -#ifdef JEMALLOC_EXPERIMENTAL -#define ALLOCM_LG_ALIGN(la) (la) -#if LG_SIZEOF_PTR == 2 -#define ALLOCM_ALIGN(a) (ffs(a)-1) -#else -#define ALLOCM_ALIGN(a) ((a < (size_t)INT_MAX) ? ffs(a)-1 : ffs(a>>32)+31) -#endif -#define ALLOCM_ZERO ((int)0x40) -#define ALLOCM_NO_MOVE ((int)0x80) -/* Bias arena index bits so that 0 encodes "ALLOCM_ARENA() unspecified". */ -#define ALLOCM_ARENA(a) ((int)(((a)+1) << 8)) - -#define ALLOCM_SUCCESS 0 -#define ALLOCM_ERR_OOM 1 -#define ALLOCM_ERR_NOT_MOVED 2 -#endif - -/* - * The je_ prefix on the following public symbol declarations is an artifact of - * namespace management, and should be omitted in application code unless - * JEMALLOC_NO_DEMANGLE is defined (see below). - */ -extern JEMALLOC_EXPORT const char *je_malloc_conf; -extern JEMALLOC_EXPORT void (*je_malloc_message)(void *cbopaque, - const char *s); - -JEMALLOC_EXPORT void *je_malloc(size_t size) JEMALLOC_ATTR(malloc); -JEMALLOC_EXPORT void *je_calloc(size_t num, size_t size) - JEMALLOC_ATTR(malloc); -JEMALLOC_EXPORT int je_posix_memalign(void **memptr, size_t alignment, - size_t size) JEMALLOC_ATTR(nonnull(1)); -JEMALLOC_EXPORT void *je_aligned_alloc(size_t alignment, size_t size) - JEMALLOC_ATTR(malloc); -JEMALLOC_EXPORT void *je_realloc(void *ptr, size_t size); -JEMALLOC_EXPORT void je_free(void *ptr); - -#ifdef JEMALLOC_OVERRIDE_MEMALIGN -JEMALLOC_EXPORT void * je_memalign(size_t alignment, size_t size) - JEMALLOC_ATTR(malloc); -#endif - -#ifdef JEMALLOC_OVERRIDE_VALLOC -JEMALLOC_EXPORT void * je_valloc(size_t size) JEMALLOC_ATTR(malloc); -#endif - -JEMALLOC_EXPORT size_t je_malloc_usable_size( - JEMALLOC_USABLE_SIZE_CONST void *ptr); -JEMALLOC_EXPORT void je_malloc_stats_print(void (*write_cb)(void *, - const char *), void *je_cbopaque, const char *opts); -JEMALLOC_EXPORT int je_mallctl(const char *name, void *oldp, - size_t *oldlenp, void *newp, size_t newlen); -JEMALLOC_EXPORT int je_mallctlnametomib(const char *name, size_t *mibp, - size_t *miblenp); -JEMALLOC_EXPORT int je_mallctlbymib(const size_t *mib, size_t miblen, - void *oldp, size_t *oldlenp, void *newp, size_t newlen); - -#ifdef JEMALLOC_EXPERIMENTAL -JEMALLOC_EXPORT int je_allocm(void **ptr, size_t *rsize, size_t size, - int flags) JEMALLOC_ATTR(nonnull(1)); -JEMALLOC_EXPORT int je_rallocm(void **ptr, size_t *rsize, size_t size, - size_t extra, int flags) JEMALLOC_ATTR(nonnull(1)); -JEMALLOC_EXPORT int je_sallocm(const void *ptr, size_t *rsize, int flags) - JEMALLOC_ATTR(nonnull(1)); -JEMALLOC_EXPORT int je_dallocm(void *ptr, int flags) - JEMALLOC_ATTR(nonnull(1)); -JEMALLOC_EXPORT int je_nallocm(size_t *rsize, size_t size, int flags); -#endif - -/* - * By default application code must explicitly refer to mangled symbol names, - * so that it is possible to use jemalloc in conjunction with another allocator - * in the same application. Define JEMALLOC_MANGLE in order to cause automatic - * name mangling that matches the API prefixing that happened as a result of - * --with-mangling and/or --with-jemalloc-prefix configuration settings. - */ -#ifdef JEMALLOC_MANGLE -#ifndef JEMALLOC_NO_DEMANGLE -#define JEMALLOC_NO_DEMANGLE -#endif -#define malloc_conf je_malloc_conf -#define malloc_message je_malloc_message -#define malloc je_malloc -#define calloc je_calloc -#define posix_memalign je_posix_memalign -#define aligned_alloc je_aligned_alloc -#define realloc je_realloc -#define free je_free -#define malloc_usable_size je_malloc_usable_size -#define malloc_stats_print je_malloc_stats_print -#define mallctl je_mallctl -#define mallctlnametomib je_mallctlnametomib -#define mallctlbymib je_mallctlbymib -#define memalign je_memalign -#define valloc je_valloc -#ifdef JEMALLOC_EXPERIMENTAL -#define allocm je_allocm -#define rallocm je_rallocm -#define sallocm je_sallocm -#define dallocm je_dallocm -#define nallocm je_nallocm -#endif -#endif - -/* - * The je_* macros can be used as stable alternative names for the public - * jemalloc API if JEMALLOC_NO_DEMANGLE is defined. This is primarily meant - * for use in jemalloc itself, but it can be used by application code to - * provide isolation from the name mangling specified via --with-mangling - * and/or --with-jemalloc-prefix. - */ -#ifndef JEMALLOC_NO_DEMANGLE -#undef je_malloc_conf -#undef je_malloc_message -#undef je_malloc -#undef je_calloc -#undef je_posix_memalign -#undef je_aligned_alloc -#undef je_realloc -#undef je_free -#undef je_malloc_usable_size -#undef je_malloc_stats_print -#undef je_mallctl -#undef je_mallctlnametomib -#undef je_mallctlbymib -#undef je_memalign -#undef je_valloc -#ifdef JEMALLOC_EXPERIMENTAL -#undef je_allocm -#undef je_rallocm -#undef je_sallocm -#undef je_dallocm -#undef je_nallocm -#endif -#endif - -#ifdef __cplusplus -}; -#endif -#endif /* JEMALLOC_H_ */ diff --git a/include/jemalloc/jemalloc.sh b/include/jemalloc/jemalloc.sh new file mode 100755 index 00000000..00a0b76c --- /dev/null +++ b/include/jemalloc/jemalloc.sh @@ -0,0 +1,24 @@ +#!/bin/sh + +objroot=$1 + +cat < +#include + +#define JEMALLOC_VERSION "@jemalloc_version@" +#define JEMALLOC_VERSION_MAJOR @jemalloc_version_major@ +#define JEMALLOC_VERSION_MINOR @jemalloc_version_minor@ +#define JEMALLOC_VERSION_BUGFIX @jemalloc_version_bugfix@ +#define JEMALLOC_VERSION_NREV @jemalloc_version_nrev@ +#define JEMALLOC_VERSION_GID "@jemalloc_version_gid@" + +#include "jemalloc_defs@install_suffix@.h" + +#ifdef JEMALLOC_EXPERIMENTAL +#define ALLOCM_LG_ALIGN(la) (la) +#if LG_SIZEOF_PTR == 2 +#define ALLOCM_ALIGN(a) (ffs(a)-1) +#else +#define ALLOCM_ALIGN(a) ((a < (size_t)INT_MAX) ? ffs(a)-1 : ffs(a>>32)+31) +#endif +#define ALLOCM_ZERO ((int)0x40) +#define ALLOCM_NO_MOVE ((int)0x80) +/* Bias arena index bits so that 0 encodes "ALLOCM_ARENA() unspecified". */ +#define ALLOCM_ARENA(a) ((int)(((a)+1) << 8)) + +#define ALLOCM_SUCCESS 0 +#define ALLOCM_ERR_OOM 1 +#define ALLOCM_ERR_NOT_MOVED 2 +#endif diff --git a/include/jemalloc/jemalloc_mangle.h.in b/include/jemalloc/jemalloc_mangle.h.in new file mode 100644 index 00000000..ad4c27c0 --- /dev/null +++ b/include/jemalloc/jemalloc_mangle.h.in @@ -0,0 +1,66 @@ +/* + * By default application code must explicitly refer to mangled symbol names, + * so that it is possible to use jemalloc in conjunction with another allocator + * in the same application. Define JEMALLOC_MANGLE in order to cause automatic + * name mangling that matches the API prefixing that happened as a result of + * --with-mangling and/or --with-jemalloc-prefix configuration settings. + */ +#ifdef JEMALLOC_MANGLE +#ifndef JEMALLOC_NO_DEMANGLE +#define JEMALLOC_NO_DEMANGLE +#endif +#define malloc_conf je_malloc_conf +#define malloc_message je_malloc_message +#define malloc je_malloc +#define calloc je_calloc +#define posix_memalign je_posix_memalign +#define aligned_alloc je_aligned_alloc +#define realloc je_realloc +#define free je_free +#define malloc_usable_size je_malloc_usable_size +#define malloc_stats_print je_malloc_stats_print +#define mallctl je_mallctl +#define mallctlnametomib je_mallctlnametomib +#define mallctlbymib je_mallctlbymib +#define memalign je_memalign +#define valloc je_valloc +#ifdef JEMALLOC_EXPERIMENTAL +#define allocm je_allocm +#define rallocm je_rallocm +#define sallocm je_sallocm +#define dallocm je_dallocm +#define nallocm je_nallocm +#endif +#endif + +/* + * The je_* macros can be used as stable alternative names for the public + * jemalloc API if JEMALLOC_NO_DEMANGLE is defined. This is primarily meant + * for use in jemalloc itself, but it can be used by application code to + * provide isolation from the name mangling specified via --with-mangling + * and/or --with-jemalloc-prefix. + */ +#ifndef JEMALLOC_NO_DEMANGLE +#undef je_malloc_conf +#undef je_malloc_message +#undef je_malloc +#undef je_calloc +#undef je_posix_memalign +#undef je_aligned_alloc +#undef je_realloc +#undef je_free +#undef je_malloc_usable_size +#undef je_malloc_stats_print +#undef je_mallctl +#undef je_mallctlnametomib +#undef je_mallctlbymib +#undef je_memalign +#undef je_valloc +#ifdef JEMALLOC_EXPERIMENTAL +#undef je_allocm +#undef je_rallocm +#undef je_sallocm +#undef je_dallocm +#undef je_nallocm +#endif +#endif diff --git a/include/jemalloc/jemalloc_protos.h.in b/include/jemalloc/jemalloc_protos.h.in new file mode 100644 index 00000000..3dad8596 --- /dev/null +++ b/include/jemalloc/jemalloc_protos.h.in @@ -0,0 +1,50 @@ +/* + * The @je_@ prefix on the following public symbol declarations is an artifact + * of namespace management, and should be omitted in application code unless + * JEMALLOC_NO_DEMANGLE is defined (see jemalloc_mangle@install_suffix@.h). + */ +extern JEMALLOC_EXPORT const char *@je_@malloc_conf; +extern JEMALLOC_EXPORT void (*@je_@malloc_message)(void *cbopaque, + const char *s); + +JEMALLOC_EXPORT void *@je_@malloc(size_t size) JEMALLOC_ATTR(malloc); +JEMALLOC_EXPORT void *@je_@calloc(size_t num, size_t size) + JEMALLOC_ATTR(malloc); +JEMALLOC_EXPORT int @je_@posix_memalign(void **memptr, size_t alignment, + size_t size) JEMALLOC_ATTR(nonnull(1)); +JEMALLOC_EXPORT void *@je_@aligned_alloc(size_t alignment, size_t size) + JEMALLOC_ATTR(malloc); +JEMALLOC_EXPORT void *@je_@realloc(void *ptr, size_t size); +JEMALLOC_EXPORT void @je_@free(void *ptr); + +#ifdef JEMALLOC_OVERRIDE_MEMALIGN +JEMALLOC_EXPORT void * @je_@memalign(size_t alignment, size_t size) + JEMALLOC_ATTR(malloc); +#endif + +#ifdef JEMALLOC_OVERRIDE_VALLOC +JEMALLOC_EXPORT void * @je_@valloc(size_t size) JEMALLOC_ATTR(malloc); +#endif + +JEMALLOC_EXPORT size_t @je_@malloc_usable_size( + JEMALLOC_USABLE_SIZE_CONST void *ptr); +JEMALLOC_EXPORT void @je_@malloc_stats_print(void (*write_cb)(void *, + const char *), void *@je_@cbopaque, const char *opts); +JEMALLOC_EXPORT int @je_@mallctl(const char *name, void *oldp, + size_t *oldlenp, void *newp, size_t newlen); +JEMALLOC_EXPORT int @je_@mallctlnametomib(const char *name, size_t *mibp, + size_t *miblenp); +JEMALLOC_EXPORT int @je_@mallctlbymib(const size_t *mib, size_t miblen, + void *oldp, size_t *oldlenp, void *newp, size_t newlen); + +#ifdef JEMALLOC_EXPERIMENTAL +JEMALLOC_EXPORT int @je_@allocm(void **ptr, size_t *rsize, size_t size, + int flags) JEMALLOC_ATTR(nonnull(1)); +JEMALLOC_EXPORT int @je_@rallocm(void **ptr, size_t *rsize, size_t size, + size_t extra, int flags) JEMALLOC_ATTR(nonnull(1)); +JEMALLOC_EXPORT int @je_@sallocm(const void *ptr, size_t *rsize, int flags) + JEMALLOC_ATTR(nonnull(1)); +JEMALLOC_EXPORT int @je_@dallocm(void *ptr, int flags) + JEMALLOC_ATTR(nonnull(1)); +JEMALLOC_EXPORT int @je_@nallocm(size_t *rsize, size_t size, int flags); +#endif diff --git a/test/include/test/jemalloc_test.h.in b/test/include/test/jemalloc_test.h.in new file mode 100644 index 00000000..1d18eec7 --- /dev/null +++ b/test/include/test/jemalloc_test.h.in @@ -0,0 +1,111 @@ +#include +#include + +/******************************************************************************/ +/* + * Define always-enabled assertion macros, so that test assertions execute even + * if assertions are disabled in the library code. These definitions must + * exist prior to including "jemalloc/internal/util.h". + */ +#define assert(e) do { \ + if (!(e)) { \ + malloc_printf( \ + ": %s:%d: Failed assertion: \"%s\"\n", \ + __FILE__, __LINE__, #e); \ + abort(); \ + } \ +} while (0) + +#define not_reached() do { \ + malloc_printf( \ + ": %s:%d: Unreachable code reached\n", \ + __FILE__, __LINE__); \ + abort(); \ +} while (0) + +#define not_implemented() do { \ + malloc_printf(": %s:%d: Not implemented\n", \ + __FILE__, __LINE__); \ + abort(); \ +} while (0) + +#define assert_not_implemented(e) do { \ + if (!(e)) \ + not_implemented(); \ +} while (0) + +/******************************************************************************/ +/* + * For unit tests, expose all public and private interfaces. + */ +#ifdef JEMALLOC_UNIT_TEST +# define JEMALLOC_JET +# include "jemalloc/internal/jemalloc_internal.h" + +/******************************************************************************/ +/* + * For integration tests, expose the public jemalloc interfaces, but only + * expose the minimum necessary internal utility code (to avoid re-implementing + * essentially identical code within the test infrastructure). + */ +#elif defined(JEMALLOC_INTEGRATION_TEST) +# define JEMALLOC_MANGLE +# include "jemalloc/jemalloc@install_suffix@.h" +# include "jemalloc/internal/jemalloc_internal_defs.h" + +# define JEMALLOC_N(n) @private_namespace@##n +# include "jemalloc/internal/private_namespace.h" + +# include +# include +# include +# define JEMALLOC_CC_SILENCE +# define JEMALLOC_H_TYPES +# define JEMALLOC_H_STRUCTS +# define JEMALLOC_H_EXTERNS +# define JEMALLOC_H_INLINES +# include "jemalloc/internal/util.h" +# undef JEMALLOC_H_TYPES +# undef JEMALLOC_H_STRUCTS +# undef JEMALLOC_H_EXTERNS +# undef JEMALLOC_H_INLINES +# undef JEMALLOC_CC_SILENCE + +/******************************************************************************/ +/* + * For stress tests, expose the public jemalloc interfaces with name mangling + * so that they can be tested as e.g. malloc() and free(). Also expose the + * public jemalloc interfaces with jet_ prefixes, so that stress tests can use + * a separate allocator for their internal data structures. + */ +#elif defined(JEMALLOC_STRESS_TEST) +# define JEMALLOC_NO_DEMANGLE +# include "jemalloc/jemalloc@install_suffix@.h" +# include "jemalloc/internal/public_unnamespace.h" +# undef JEMALLOC_NO_DEMANGLE + +# include "jemalloc/jemalloc_protos_jet.h" + +# define JEMALLOC_JET +# include "jemalloc/internal/jemalloc_internal.h" +# include "jemalloc/internal/public_unnamespace.h" +# undef JEMALLOC_JET + +# define JEMALLOC_MANGLE +# include "jemalloc/jemalloc_mangle@install_suffix@.h" + +/******************************************************************************/ +/* + * This header does dangerous things, the effects of which only test code + * should be subject to. + */ +#else +# error "This header cannot be included outside a testing context" +#endif + +/******************************************************************************/ +/* + * Common test utilities. + */ +#include "test/test.h" +#include "test/thread.h" diff --git a/test/include/test/test.h b/test/include/test/test.h new file mode 100644 index 00000000..ddbc55f7 --- /dev/null +++ b/test/include/test/test.h @@ -0,0 +1,2 @@ +void test_fail(const char *format, ...) JEMALLOC_ATTR(format(printf, 1, 2)); +void test_skip(const char *format, ...) JEMALLOC_ATTR(format(printf, 1, 2)); diff --git a/test/include/test/thread.h b/test/include/test/thread.h new file mode 100644 index 00000000..e3c0e270 --- /dev/null +++ b/test/include/test/thread.h @@ -0,0 +1,12 @@ + +/* Abstraction layer for threading in tests */ +#ifdef _WIN32 +#include +typedef HANDLE je_thread_t; +#else +#include +typedef pthread_t je_thread_t; +#endif + +void je_thread_create(je_thread_t *thread, void *(*proc)(void *), void *arg); +void je_thread_join(je_thread_t thread, void **ret); diff --git a/test/ALLOCM_ARENA.c b/test/integration/ALLOCM_ARENA.c similarity index 96% rename from test/ALLOCM_ARENA.c rename to test/integration/ALLOCM_ARENA.c index ca91b621..e83056c0 100644 --- a/test/ALLOCM_ARENA.c +++ b/test/integration/ALLOCM_ARENA.c @@ -1,5 +1,4 @@ -#define JEMALLOC_MANGLE -#include "jemalloc_test.h" +#include "test/jemalloc_test.h" #define NTHREADS 10 diff --git a/test/ALLOCM_ARENA.exp b/test/integration/ALLOCM_ARENA.exp similarity index 100% rename from test/ALLOCM_ARENA.exp rename to test/integration/ALLOCM_ARENA.exp diff --git a/test/aligned_alloc.c b/test/integration/aligned_alloc.c similarity index 96% rename from test/aligned_alloc.c rename to test/integration/aligned_alloc.c index 5a9b0cae..2c44751b 100644 --- a/test/aligned_alloc.c +++ b/test/integration/aligned_alloc.c @@ -1,5 +1,4 @@ -#define JEMALLOC_MANGLE -#include "jemalloc_test.h" +#include "test/jemalloc_test.h" #define CHUNK 0x400000 /* #define MAXALIGN ((size_t)UINT64_C(0x80000000000)) */ @@ -96,10 +95,9 @@ main(void) char buf[BUFERROR_BUF]; buferror(buf, sizeof(buf)); - malloc_printf( + test_fail( "Error for size %zu (%#zx): %s\n", size, size, buf); - exit(1); } total += malloc_usable_size(ps[i]); if (total >= (MAXALIGN << 1)) diff --git a/test/aligned_alloc.exp b/test/integration/aligned_alloc.exp similarity index 100% rename from test/aligned_alloc.exp rename to test/integration/aligned_alloc.exp diff --git a/test/allocated.c b/test/integration/allocated.c similarity index 81% rename from test/allocated.c rename to test/integration/allocated.c index b1a9cfd9..73ea738d 100644 --- a/test/allocated.c +++ b/test/integration/allocated.c @@ -1,5 +1,4 @@ -#define JEMALLOC_MANGLE -#include "jemalloc_test.h" +#include "test/jemalloc_test.h" void * je_thread_start(void *arg) @@ -18,9 +17,8 @@ je_thread_start(void *arg) #endif goto label_return; } - malloc_printf("%s(): Error in mallctl(): %s\n", __func__, + test_fail("%s(): Error in mallctl(): %s\n", __func__, strerror(err)); - exit(1); } sz = sizeof(ap0); if ((err = mallctl("thread.allocatedp", &ap0, &sz, NULL, 0))) { @@ -30,9 +28,8 @@ je_thread_start(void *arg) #endif goto label_return; } - malloc_printf("%s(): Error in mallctl(): %s\n", __func__, + test_fail("%s(): Error in mallctl(): %s\n", __func__, strerror(err)); - exit(1); } assert(*ap0 == a0); @@ -44,9 +41,8 @@ je_thread_start(void *arg) #endif goto label_return; } - malloc_printf("%s(): Error in mallctl(): %s\n", __func__, + test_fail("%s(): Error in mallctl(): %s\n", __func__, strerror(err)); - exit(1); } sz = sizeof(dp0); if ((err = mallctl("thread.deallocatedp", &dp0, &sz, NULL, 0))) { @@ -56,17 +52,14 @@ je_thread_start(void *arg) #endif goto label_return; } - malloc_printf("%s(): Error in mallctl(): %s\n", __func__, + test_fail("%s(): Error in mallctl(): %s\n", __func__, strerror(err)); - exit(1); } assert(*dp0 == d0); p = malloc(1); - if (p == NULL) { - malloc_printf("%s(): Error in malloc()\n", __func__); - exit(1); - } + if (p == NULL) + test_fail("%s(): Error in malloc()\n", __func__); sz = sizeof(a1); mallctl("thread.allocated", &a1, &sz, NULL, 0); diff --git a/test/allocated.exp b/test/integration/allocated.exp similarity index 100% rename from test/allocated.exp rename to test/integration/allocated.exp diff --git a/test/allocm.c b/test/integration/allocm.c similarity index 97% rename from test/allocm.c rename to test/integration/allocm.c index 80be673b..3b892827 100644 --- a/test/allocm.c +++ b/test/integration/allocm.c @@ -1,5 +1,4 @@ -#define JEMALLOC_MANGLE -#include "jemalloc_test.h" +#include "test/jemalloc_test.h" #define CHUNK 0x400000 /* #define MAXALIGN ((size_t)UINT64_C(0x80000000000)) */ @@ -144,21 +143,19 @@ main(void) r = nallocm(&nsz, sz, ALLOCM_ALIGN(alignment) | ALLOCM_ZERO); if (r != ALLOCM_SUCCESS) { - malloc_printf( + test_fail( "nallocm() error for size %zu" " (%#zx): %d\n", sz, sz, r); - exit(1); } rsz = 0; r = allocm(&ps[i], &rsz, sz, ALLOCM_ALIGN(alignment) | ALLOCM_ZERO); if (r != ALLOCM_SUCCESS) { - malloc_printf( + test_fail( "allocm() error for size %zu" " (%#zx): %d\n", sz, sz, r); - exit(1); } if (rsz < sz) { malloc_printf( diff --git a/test/allocm.exp b/test/integration/allocm.exp similarity index 100% rename from test/allocm.exp rename to test/integration/allocm.exp diff --git a/test/jemalloc_test.h.in b/test/integration/jemalloc_integration.h.in similarity index 78% rename from test/jemalloc_test.h.in rename to test/integration/jemalloc_integration.h.in index e38b48ef..4730aab1 100644 --- a/test/jemalloc_test.h.in +++ b/test/integration/jemalloc_integration.h.in @@ -6,7 +6,7 @@ #include "jemalloc/jemalloc@install_suffix@.h" #include "jemalloc/internal/jemalloc_internal.h" -/* Abstraction layer for threading in tests */ +/* Abstraction layer for threading in tests. */ #ifdef _WIN32 #include @@ -17,15 +17,14 @@ je_thread_create(je_thread_t *thread, void *(*proc)(void *), void *arg) { LPTHREAD_START_ROUTINE routine = (LPTHREAD_START_ROUTINE)proc; *thread = CreateThread(NULL, 0, routine, arg, 0, NULL); - if (*thread == NULL) { - malloc_printf("Error in CreateThread()\n"); - exit(1); - } + if (*thread == NULL) + test_fail("Error in CreateThread()\n"); } void je_thread_join(je_thread_t thread, void **ret) { + WaitForSingleObject(thread, INFINITE); } @@ -38,10 +37,8 @@ void je_thread_create(je_thread_t *thread, void *(*proc)(void *), void *arg) { - if (pthread_create(thread, NULL, proc, arg) != 0) { - malloc_printf("Error in pthread_create()\n"); - exit(1); - } + if (pthread_create(thread, NULL, proc, arg) != 0) + test_fail("Error in pthread_create()\n"); } void diff --git a/test/mremap.c b/test/integration/mremap.c similarity index 95% rename from test/mremap.c rename to test/integration/mremap.c index 47efa7c4..cdef9de0 100644 --- a/test/mremap.c +++ b/test/integration/mremap.c @@ -1,5 +1,4 @@ -#define JEMALLOC_MANGLE -#include "jemalloc_test.h" +#include "test/jemalloc_test.h" int main(void) diff --git a/test/mremap.exp b/test/integration/mremap.exp similarity index 100% rename from test/mremap.exp rename to test/integration/mremap.exp diff --git a/test/posix_memalign.c b/test/integration/posix_memalign.c similarity index 96% rename from test/posix_memalign.c rename to test/integration/posix_memalign.c index 2185bcf7..dc5cd0e2 100644 --- a/test/posix_memalign.c +++ b/test/integration/posix_memalign.c @@ -1,5 +1,4 @@ -#define JEMALLOC_MANGLE -#include "jemalloc_test.h" +#include "test/jemalloc_test.h" #define CHUNK 0x400000 /* #define MAXALIGN ((size_t)UINT64_C(0x80000000000)) */ @@ -92,10 +91,9 @@ main(void) err = posix_memalign(&ps[i], alignment, size); if (err) { - malloc_printf( + test_fail( "Error for size %zu (%#zx): %s\n", size, size, strerror(err)); - exit(1); } total += malloc_usable_size(ps[i]); if (total >= (MAXALIGN << 1)) diff --git a/test/posix_memalign.exp b/test/integration/posix_memalign.exp similarity index 100% rename from test/posix_memalign.exp rename to test/integration/posix_memalign.exp diff --git a/test/rallocm.c b/test/integration/rallocm.c similarity index 98% rename from test/rallocm.c rename to test/integration/rallocm.c index c5dedf48..2c10dbae 100644 --- a/test/rallocm.c +++ b/test/integration/rallocm.c @@ -1,5 +1,6 @@ -#define JEMALLOC_MANGLE -#include "jemalloc_test.h" +#include + +#include "test/jemalloc_test.h" int main(void) diff --git a/test/rallocm.exp b/test/integration/rallocm.exp similarity index 100% rename from test/rallocm.exp rename to test/integration/rallocm.exp diff --git a/test/thread_arena.c b/test/integration/thread_arena.c similarity index 96% rename from test/thread_arena.c rename to test/integration/thread_arena.c index 6b9bc9cf..eb5b0988 100644 --- a/test/thread_arena.c +++ b/test/integration/thread_arena.c @@ -1,5 +1,4 @@ -#define JEMALLOC_MANGLE -#include "jemalloc_test.h" +#include "test/jemalloc_test.h" #define NTHREADS 10 diff --git a/test/thread_arena.exp b/test/integration/thread_arena.exp similarity index 100% rename from test/thread_arena.exp rename to test/integration/thread_arena.exp diff --git a/test/thread_tcache_enabled.c b/test/integration/thread_tcache_enabled.c similarity index 97% rename from test/thread_tcache_enabled.c rename to test/integration/thread_tcache_enabled.c index 586b5330..f9da0526 100644 --- a/test/thread_tcache_enabled.c +++ b/test/integration/thread_tcache_enabled.c @@ -1,5 +1,4 @@ -#define JEMALLOC_MANGLE -#include "jemalloc_test.h" +#include "test/jemalloc_test.h" void * je_thread_start(void *arg) diff --git a/test/thread_tcache_enabled.exp b/test/integration/thread_tcache_enabled.exp similarity index 100% rename from test/thread_tcache_enabled.exp rename to test/integration/thread_tcache_enabled.exp diff --git a/test/src/test.c b/test/src/test.c new file mode 100644 index 00000000..1bc34b4a --- /dev/null +++ b/test/src/test.c @@ -0,0 +1,28 @@ +#include "test/jemalloc_test.h" + +#define JEMALLOC_TEST_EXIT_FAIL 1 +#define JEMALLOC_TEST_EXIT_SKIP 2 + +JEMALLOC_ATTR(format(printf, 1, 2)) +void +test_fail(const char *format, ...) +{ + va_list ap; + + va_start(ap, format); + malloc_vcprintf(NULL, NULL, format, ap); + va_end(ap); + exit(JEMALLOC_TEST_EXIT_FAIL); +} + +JEMALLOC_ATTR(format(printf, 1, 2)) +void +test_skip(const char *format, ...) +{ + va_list ap; + + va_start(ap, format); + malloc_vcprintf(NULL, NULL, format, ap); + va_end(ap); + exit(JEMALLOC_TEST_EXIT_SKIP); +} diff --git a/test/src/thread.c b/test/src/thread.c new file mode 100644 index 00000000..5a91e27e --- /dev/null +++ b/test/src/thread.c @@ -0,0 +1,35 @@ +#include "test/jemalloc_test.h" + +#ifdef _WIN32 +void +je_thread_create(je_thread_t *thread, void *(*proc)(void *), void *arg) +{ + LPTHREAD_START_ROUTINE routine = (LPTHREAD_START_ROUTINE)proc; + *thread = CreateThread(NULL, 0, routine, arg, 0, NULL); + if (*thread == NULL) + test_fail("Error in CreateThread()\n"); +} + +void +je_thread_join(je_thread_t thread, void **ret) +{ + + WaitForSingleObject(thread, INFINITE); +} + +#else +void +je_thread_create(je_thread_t *thread, void *(*proc)(void *), void *arg) +{ + + if (pthread_create(thread, NULL, proc, arg) != 0) + test_fail("Error in pthread_create()\n"); +} + +void +je_thread_join(je_thread_t thread, void **ret) +{ + + pthread_join(thread, ret); +} +#endif diff --git a/test/test.sh.in b/test/test.sh.in new file mode 100644 index 00000000..726cd636 --- /dev/null +++ b/test/test.sh.in @@ -0,0 +1,37 @@ +#!/bin/sh + +case @abi@ in + macho) + export DYLD_FALLBACK_LIBRARY_PATH="@objroot@lib" + ;; + pecoff) + export PATH="${PATH}:@objroot@lib" + ;; + *) + ;; +esac + +total=0 +failures=0 +echo "=========================================" +for t in $@; do + total=`expr $total + 1` + /bin/echo -n "${t} ... " + ${t}@exe@ @abs_srcroot@ @abs_objroot@ > @objroot@${t}.out 2>&1 + result=$? + if [ -e "@srcroot@${t}.exp" ] ; then + diff -w -u @srcroot@${t}.exp @objroot@${t}.out >/dev/null 2>&1 + fail=$? + if [ "${fail}" -eq "1" ] ; then + failures=`expr ${failures} + 1` + echo "*** FAIL ***" + else + echo "pass" + fi + else + echo "*** FAIL *** (.exp file is missing)" + failures=`expr ${failures} + 1` + fi +done +echo "=========================================" +echo "Failures: ${failures}/${total}" diff --git a/test/bitmap.c b/test/unit/bitmap.c similarity index 98% rename from test/bitmap.c rename to test/unit/bitmap.c index b2cb6300..37c3043c 100644 --- a/test/bitmap.c +++ b/test/unit/bitmap.c @@ -1,5 +1,4 @@ -#define JEMALLOC_MANGLE -#include "jemalloc_test.h" +#include "test/jemalloc_test.h" #if (LG_BITMAP_MAXBITS > 12) # define MAXBITS 4500 @@ -37,7 +36,6 @@ test_bitmap_init(void) for (j = 0; j < i; j++) assert(bitmap_get(bitmap, &binfo, j) == false); free(bitmap); - } } } diff --git a/test/bitmap.exp b/test/unit/bitmap.exp similarity index 100% rename from test/bitmap.exp rename to test/unit/bitmap.exp From 95424fc1884112d6b90193481e8ad26247463b4b Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 3 Dec 2013 22:28:10 -0800 Subject: [PATCH 0329/3378] Fix build target to be build_lib, as documented. Reported by Michael Truog. --- Makefile.in | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile.in b/Makefile.in index 57020ad7..8d3e22a5 100644 --- a/Makefile.in +++ b/Makefile.in @@ -141,7 +141,7 @@ TESTS_OBJS := $(TESTS_UNIT_OBJS) $(TESTS_INTEGRATION_OBJS) $(TESTS_STRESS_OBJS) .SECONDARY : $(TESTS_OBJS) # Default target. -all: build +all: build_lib dist: build_doc @@ -235,7 +235,7 @@ $(objroot)test/stress/%$(EXE): $(objroot)test/stress/%.$(O) $(C_JET_OBJS) $(C_TE build_lib_shared: $(DSOS) build_lib_static: $(STATIC_LIBS) -build: build_lib_shared build_lib_static +build_lib: build_lib_shared build_lib_static install_bin: install -d $(BINDIR) From 72284f03357d6fb4a7ff82542dd1a41d567b0bb2 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 4 Dec 2013 17:40:49 -0800 Subject: [PATCH 0330/3378] Add tsd test. Submitted by Mike Hommey. --- .gitignore | 6 +++--- Makefile.in | 3 ++- test/unit/tsd.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++ test/unit/tsd.exp | 9 ++++++++ 4 files changed, 68 insertions(+), 4 deletions(-) create mode 100644 test/unit/tsd.c create mode 100644 test/unit/tsd.exp diff --git a/.gitignore b/.gitignore index 0a9ca185..afde57ad 100644 --- a/.gitignore +++ b/.gitignore @@ -37,19 +37,19 @@ test/include/test/jemalloc_test.h /test/integration/[A-Za-z]* -!/test/integration/*.* +!/test/integration/[A-Za-z]*.* /test/integration/*.[od] /test/integration/*.out /test/src/*.[od] /test/stress/[A-Za-z]* -!/test/stress/*.* +!/test/stress/[A-Za-z]*.* /test/stress/*.[od] /test/stress/*.out /test/unit/[A-Za-z]* -!/test/unit/*.* +!/test/unit/[A-Za-z]*.* /test/unit/*.[od] /test/unit/*.out diff --git a/Makefile.in b/Makefile.in index 8d3e22a5..ab6662cf 100644 --- a/Makefile.in +++ b/Makefile.in @@ -104,7 +104,8 @@ DOCS_MAN3 := $(DOCS_XML:$(objroot)%.xml=$(srcroot)%.3) DOCS := $(DOCS_HTML) $(DOCS_MAN3) C_TESTLIB_SRCS := $(srcroot)test/src/test.c $(srcroot)test/src/thread.c C_UTIL_INTEGRATION_SRCS := $(srcroot)src/util.c -TESTS_UNIT := $(srcroot)test/unit/bitmap.c +TESTS_UNIT := $(srcroot)test/unit/bitmap.c \ + $(srcroot)test/unit/tsd.c TESTS_INTEGRATION := $(srcroot)test/integration/aligned_alloc.c \ $(srcroot)test/integration/allocated.c \ $(srcroot)test/integration/ALLOCM_ARENA.c \ diff --git a/test/unit/tsd.c b/test/unit/tsd.c new file mode 100644 index 00000000..dacddfff --- /dev/null +++ b/test/unit/tsd.c @@ -0,0 +1,54 @@ +#include "test/jemalloc_test.h" + +#define THREAD_DATA 0x72b65c10 + +typedef unsigned int data_t; + +void +data_cleanup(void *arg) +{ + data_t *data = (data_t *)arg; + + malloc_printf("Cleanup for data %x.\n", *data); +} + +malloc_tsd_protos(, data, data_t) +malloc_tsd_externs(data, data_t) +#define DATA_INIT 0x12345678 +malloc_tsd_data(, data, data_t, DATA_INIT) +malloc_tsd_funcs(, data, data_t, DATA_INIT, data_cleanup) + +void * +je_thread_start(void *arg) +{ + data_t d = (data_t)(uintptr_t) arg; + malloc_printf("Initial tsd_get returns %x. Expected %x.\n", + *data_tsd_get(), DATA_INIT); + + data_tsd_set(&d); + malloc_printf("After tsd_set: %x. Expected %x.\n", + *data_tsd_get(), d); + + d = 0; + malloc_printf("After resetting local data: %x. Expected %x.\n", + *data_tsd_get(), (data_t)(uintptr_t) arg); + + return NULL; +} + +int +main(void) +{ + je_thread_t thread; + + malloc_printf("Test begin\n"); + + data_tsd_boot(); + je_thread_start((void *) 0xa5f3e329); + + je_thread_create(&thread, je_thread_start, (void *) THREAD_DATA); + je_thread_join(thread, NULL); + + malloc_printf("Test end\n"); + return (0); +} diff --git a/test/unit/tsd.exp b/test/unit/tsd.exp new file mode 100644 index 00000000..b4abedcf --- /dev/null +++ b/test/unit/tsd.exp @@ -0,0 +1,9 @@ +Test begin +Initial tsd_get returns 12345678. Expected 12345678. +After tsd_set: a5f3e329. Expected a5f3e329. +After resetting local data: a5f3e329. Expected a5f3e329. +Initial tsd_get returns 12345678. Expected 12345678. +After tsd_set: 72b65c10. Expected 72b65c10. +After resetting local data: 72b65c10. Expected 72b65c10. +Cleanup for data 72b65c10. +Test end From 14990b83d1dffe04638df0c09eb1a5c3b1118462 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 5 Dec 2013 17:58:32 -0800 Subject: [PATCH 0331/3378] Fix test refactoring issues for Linux. --- Makefile.in | 4 +-- .../jemalloc/internal/jemalloc_internal.h.in | 31 ------------------- .../internal/jemalloc_internal_defs.h.in | 31 +++++++++++++++++++ test/include/test/jemalloc_test.h.in | 3 +- 4 files changed, 34 insertions(+), 35 deletions(-) diff --git a/Makefile.in b/Makefile.in index ab6662cf..16a9ba4d 100644 --- a/Makefile.in +++ b/Makefile.in @@ -224,7 +224,7 @@ $(STATIC_LIBS): $(objroot)test/unit/%$(EXE): $(objroot)test/unit/%.$(O) $(C_JET_OBJS) $(C_TESTLIB_UNIT_OBJS) @mkdir -p $(@D) - $(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(filter -lpthread,$(LIBS)) $(EXTRA_LDFLAGS) + $(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LIBS) $(EXTRA_LDFLAGS) $(objroot)test/integration/%$(EXE): $(objroot)test/integration/%.$(O) $(C_TESTLIB_INTEGRATION_OBJS) $(C_UTIL_INTEGRATION_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) @mkdir -p $(@D) @@ -232,7 +232,7 @@ $(objroot)test/integration/%$(EXE): $(objroot)test/integration/%.$(O) $(C_TESTLI $(objroot)test/stress/%$(EXE): $(objroot)test/stress/%.$(O) $(C_JET_OBJS) $(C_TESTLIB_STRESS_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) @mkdir -p $(@D) - $(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(filter -lpthread,$(LIBS)) $(EXTRA_LDFLAGS) + $(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LIBS) $(EXTRA_LDFLAGS) build_lib_shared: $(DSOS) build_lib_static: $(STATIC_LIBS) diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index e6303103..989c19b0 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -241,37 +241,6 @@ static const bool config_ivsalloc = # define __DECONST(type, var) ((type)(uintptr_t)(const void *)(var)) #endif -/* - * JEMALLOC_ALWAYS_INLINE is used within header files for functions that are - * static inline functions if inlining is enabled, and single-definition - * library-private functions if inlining is disabled. - * - * JEMALLOC_ALWAYS_INLINE_C is for use in .c files, in which case the denoted - * functions are always static, regardless of whether inlining is enabled. - */ -#ifdef JEMALLOC_DEBUG - /* Disable inlining to make debugging easier. */ -# define JEMALLOC_ALWAYS_INLINE -# define JEMALLOC_ALWAYS_INLINE_C static -# define JEMALLOC_INLINE -# define inline -#else -# define JEMALLOC_ENABLE_INLINE -# ifdef JEMALLOC_HAVE_ATTR -# define JEMALLOC_ALWAYS_INLINE \ - static inline JEMALLOC_ATTR(unused) JEMALLOC_ATTR(always_inline) -# define JEMALLOC_ALWAYS_INLINE_C \ - static inline JEMALLOC_ATTR(always_inline) -# else -# define JEMALLOC_ALWAYS_INLINE static inline -# define JEMALLOC_ALWAYS_INLINE_C static inline -# endif -# define JEMALLOC_INLINE static inline -# ifdef _MSC_VER -# define inline _inline -# endif -#endif - /* Smallest size class to support. */ #define LG_TINY_MIN 3 #define TINY_MIN (1U << LG_TINY_MIN) diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in index 36826d85..477b55b5 100644 --- a/include/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in @@ -89,6 +89,37 @@ */ #undef JEMALLOC_DEBUG +/* + * JEMALLOC_ALWAYS_INLINE is used within header files for functions that are + * static inline functions if inlining is enabled, and single-definition + * library-private functions if inlining is disabled. + * + * JEMALLOC_ALWAYS_INLINE_C is for use in .c files, in which case the denoted + * functions are always static, regardless of whether inlining is enabled. + */ +#ifdef JEMALLOC_DEBUG + /* Disable inlining to make debugging easier. */ +# define JEMALLOC_ALWAYS_INLINE +# define JEMALLOC_ALWAYS_INLINE_C static +# define JEMALLOC_INLINE +# define inline +#else +# define JEMALLOC_ENABLE_INLINE +# ifdef JEMALLOC_HAVE_ATTR +# define JEMALLOC_ALWAYS_INLINE \ + static inline JEMALLOC_ATTR(unused) JEMALLOC_ATTR(always_inline) +# define JEMALLOC_ALWAYS_INLINE_C \ + static inline JEMALLOC_ATTR(always_inline) +# else +# define JEMALLOC_ALWAYS_INLINE static inline +# define JEMALLOC_ALWAYS_INLINE_C static inline +# endif +# define JEMALLOC_INLINE static inline +# ifdef _MSC_VER +# define inline _inline +# endif +#endif + /* JEMALLOC_STATS enables statistics calculation. */ #undef JEMALLOC_STATS diff --git a/test/include/test/jemalloc_test.h.in b/test/include/test/jemalloc_test.h.in index 1d18eec7..1eb7a0b3 100644 --- a/test/include/test/jemalloc_test.h.in +++ b/test/include/test/jemalloc_test.h.in @@ -1,5 +1,6 @@ #include #include +#include /******************************************************************************/ /* @@ -59,7 +60,6 @@ # include # include # include -# define JEMALLOC_CC_SILENCE # define JEMALLOC_H_TYPES # define JEMALLOC_H_STRUCTS # define JEMALLOC_H_EXTERNS @@ -69,7 +69,6 @@ # undef JEMALLOC_H_STRUCTS # undef JEMALLOC_H_EXTERNS # undef JEMALLOC_H_INLINES -# undef JEMALLOC_CC_SILENCE /******************************************************************************/ /* From dc1bed62272045651e4bbf2cd85f6fccaf7b1331 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 5 Dec 2013 21:43:46 -0800 Subject: [PATCH 0332/3378] Fix more test refactoring issues. --- configure.ac | 1 + .../jemalloc/internal/jemalloc_internal.h.in | 15 +---- .../internal/jemalloc_internal_defs.h.in | 41 ------------- .../internal/jemalloc_internal_macros.h | 44 +++++++++++++ include/jemalloc/jemalloc.sh | 5 +- include/jemalloc/jemalloc_defs.h.in | 61 +++---------------- include/jemalloc/jemalloc_macros.h.in | 54 +++++++++++++++- test/include/test/jemalloc_test.h.in | 1 + 8 files changed, 113 insertions(+), 109 deletions(-) create mode 100644 include/jemalloc/internal/jemalloc_internal_macros.h diff --git a/configure.ac b/configure.ac index 570a7f56..7f0fecce 100644 --- a/configure.ac +++ b/configure.ac @@ -553,6 +553,7 @@ cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/internal/private_unnamespac cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/internal/public_namespace.sh" cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/internal/public_unnamespace.sh" cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/internal/size_classes.sh" +cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/jemalloc.sh" cfghdrs_out="include/jemalloc/jemalloc_defs.h" cfghdrs_out="${cfghdrs_out} include/jemalloc/jemalloc${install_suffix}.h" diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 989c19b0..edb40a29 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -77,12 +77,6 @@ typedef intptr_t ssize_t; #endif #include "jemalloc/internal/private_namespace.h" -#ifdef JEMALLOC_CC_SILENCE -#define UNUSED JEMALLOC_ATTR(unused) -#else -#define UNUSED -#endif - static const bool config_debug = #ifdef JEMALLOC_DEBUG true @@ -232,15 +226,10 @@ static const bool config_ivsalloc = /******************************************************************************/ #define JEMALLOC_H_TYPES +#include "jemalloc/internal/jemalloc_internal_macros.h" + #define ALLOCM_LG_ALIGN_MASK ((int)0x3f) -#define ZU(z) ((size_t)z) -#define QU(q) ((uint64_t)q) - -#ifndef __DECONST -# define __DECONST(type, var) ((type)(uintptr_t)(const void *)(var)) -#endif - /* Smallest size class to support. */ #define LG_TINY_MIN 3 #define TINY_MIN (1U << LG_TINY_MIN) diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in index 477b55b5..346e39e2 100644 --- a/include/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in @@ -89,37 +89,6 @@ */ #undef JEMALLOC_DEBUG -/* - * JEMALLOC_ALWAYS_INLINE is used within header files for functions that are - * static inline functions if inlining is enabled, and single-definition - * library-private functions if inlining is disabled. - * - * JEMALLOC_ALWAYS_INLINE_C is for use in .c files, in which case the denoted - * functions are always static, regardless of whether inlining is enabled. - */ -#ifdef JEMALLOC_DEBUG - /* Disable inlining to make debugging easier. */ -# define JEMALLOC_ALWAYS_INLINE -# define JEMALLOC_ALWAYS_INLINE_C static -# define JEMALLOC_INLINE -# define inline -#else -# define JEMALLOC_ENABLE_INLINE -# ifdef JEMALLOC_HAVE_ATTR -# define JEMALLOC_ALWAYS_INLINE \ - static inline JEMALLOC_ATTR(unused) JEMALLOC_ATTR(always_inline) -# define JEMALLOC_ALWAYS_INLINE_C \ - static inline JEMALLOC_ATTR(always_inline) -# else -# define JEMALLOC_ALWAYS_INLINE static inline -# define JEMALLOC_ALWAYS_INLINE_C static inline -# endif -# define JEMALLOC_INLINE static inline -# ifdef _MSC_VER -# define inline _inline -# endif -#endif - /* JEMALLOC_STATS enables statistics calculation. */ #undef JEMALLOC_STATS @@ -189,13 +158,6 @@ */ #undef JEMALLOC_IVSALLOC -/* - * Define overrides for non-standard allocator-related functions if they - * are present on the system. - */ -#undef JEMALLOC_OVERRIDE_MEMALIGN -#undef JEMALLOC_OVERRIDE_VALLOC - /* * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings. */ @@ -220,9 +182,6 @@ */ #undef JEMALLOC_HAS_ALLOCA_H -/* sizeof(void *) == 2^LG_SIZEOF_PTR. */ -#undef LG_SIZEOF_PTR - /* sizeof(int) == 2^LG_SIZEOF_INT. */ #undef LG_SIZEOF_INT diff --git a/include/jemalloc/internal/jemalloc_internal_macros.h b/include/jemalloc/internal/jemalloc_internal_macros.h new file mode 100644 index 00000000..f2780982 --- /dev/null +++ b/include/jemalloc/internal/jemalloc_internal_macros.h @@ -0,0 +1,44 @@ +/* + * JEMALLOC_ALWAYS_INLINE is used within header files for functions that are + * static inline functions if inlining is enabled, and single-definition + * library-private functions if inlining is disabled. + * + * JEMALLOC_ALWAYS_INLINE_C is for use in .c files, in which case the denoted + * functions are always static, regardless of whether inlining is enabled. + */ +#ifdef JEMALLOC_DEBUG + /* Disable inlining to make debugging easier. */ +# define JEMALLOC_ALWAYS_INLINE +# define JEMALLOC_ALWAYS_INLINE_C static +# define JEMALLOC_INLINE +# define inline +#else +# define JEMALLOC_ENABLE_INLINE +# ifdef JEMALLOC_HAVE_ATTR +# define JEMALLOC_ALWAYS_INLINE \ + static inline JEMALLOC_ATTR(unused) JEMALLOC_ATTR(always_inline) +# define JEMALLOC_ALWAYS_INLINE_C \ + static inline JEMALLOC_ATTR(always_inline) +# else +# define JEMALLOC_ALWAYS_INLINE static inline +# define JEMALLOC_ALWAYS_INLINE_C static inline +# endif +# define JEMALLOC_INLINE static inline +# ifdef _MSC_VER +# define inline _inline +# endif +#endif + +#ifdef JEMALLOC_CC_SILENCE +#define UNUSED JEMALLOC_ATTR(unused) +#else +#define UNUSED +#endif + +#define ZU(z) ((size_t)z) +#define QU(q) ((uint64_t)q) + +#ifndef __DECONST +# define __DECONST(type, var) ((type)(uintptr_t)(const void *)(var)) +#endif + diff --git a/include/jemalloc/jemalloc.sh b/include/jemalloc/jemalloc.sh index 00a0b76c..f5da79c8 100755 --- a/include/jemalloc/jemalloc.sh +++ b/include/jemalloc/jemalloc.sh @@ -11,8 +11,9 @@ extern "C" { EOF -for hdr in jemalloc_macros.h jemalloc_protos.h jemalloc_mangle.h ; do - cat "${objroot}include/jemalloc/${hdr}" +for hdr in jemalloc_defs.h jemalloc_macros.h jemalloc_protos.h \ + jemalloc_mangle.h ; do + cat "${objroot}include/jemalloc/${hdr}" | grep -v 'Generated from .* by configure\.' echo done diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index 921fa8c5..bc3153b8 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -1,60 +1,16 @@ -/* - * Name mangling for public symbols is controlled by --with-mangling and - * --with-jemalloc-prefix. With default settings the je_ prefix is stripped by - * these macro definitions. - */ -#ifndef JEMALLOC_NO_RENAME -# undef je_malloc_conf -# undef je_malloc_message -# undef je_malloc -# undef je_calloc -# undef je_posix_memalign -# undef je_aligned_alloc -# undef je_realloc -# undef je_free -# undef je_malloc_usable_size -# undef je_malloc_stats_print -# undef je_mallctl -# undef je_mallctlnametomib -# undef je_mallctlbymib -# undef je_memalign -# undef je_valloc -# undef je_allocm -# undef je_rallocm -# undef je_sallocm -# undef je_dallocm -# undef je_nallocm -#endif - /* Defined if __attribute__((...)) syntax is supported. */ #undef JEMALLOC_HAVE_ATTR -#ifdef JEMALLOC_HAVE_ATTR -# define JEMALLOC_ATTR(s) __attribute__((s)) -# define JEMALLOC_EXPORT JEMALLOC_ATTR(visibility("default")) -# define JEMALLOC_ALIGNED(s) JEMALLOC_ATTR(aligned(s)) -# define JEMALLOC_SECTION(s) JEMALLOC_ATTR(section(s)) -# define JEMALLOC_NOINLINE JEMALLOC_ATTR(noinline) -#elif _MSC_VER -# define JEMALLOC_ATTR(s) -# ifdef DLLEXPORT -# define JEMALLOC_EXPORT __declspec(dllexport) -# else -# define JEMALLOC_EXPORT __declspec(dllimport) -# endif -# define JEMALLOC_ALIGNED(s) __declspec(align(s)) -# define JEMALLOC_SECTION(s) __declspec(allocate(s)) -# define JEMALLOC_NOINLINE __declspec(noinline) -#else -# define JEMALLOC_ATTR(s) -# define JEMALLOC_EXPORT -# define JEMALLOC_ALIGNED(s) -# define JEMALLOC_SECTION(s) -# define JEMALLOC_NOINLINE -#endif /* Support the experimental API. */ #undef JEMALLOC_EXPERIMENTAL +/* + * Define overrides for non-standard allocator-related functions if they + * are present on the system. + */ +#undef JEMALLOC_OVERRIDE_MEMALIGN +#undef JEMALLOC_OVERRIDE_VALLOC + /* * At least Linux omits the "const" in: * @@ -63,3 +19,6 @@ * Match the operating system's prototype. */ #undef JEMALLOC_USABLE_SIZE_CONST + +/* sizeof(void *) == 2^LG_SIZEOF_PTR. */ +#undef LG_SIZEOF_PTR diff --git a/include/jemalloc/jemalloc_macros.h.in b/include/jemalloc/jemalloc_macros.h.in index 2443a539..a4923af1 100644 --- a/include/jemalloc/jemalloc_macros.h.in +++ b/include/jemalloc/jemalloc_macros.h.in @@ -8,8 +8,6 @@ #define JEMALLOC_VERSION_NREV @jemalloc_version_nrev@ #define JEMALLOC_VERSION_GID "@jemalloc_version_gid@" -#include "jemalloc_defs@install_suffix@.h" - #ifdef JEMALLOC_EXPERIMENTAL #define ALLOCM_LG_ALIGN(la) (la) #if LG_SIZEOF_PTR == 2 @@ -26,3 +24,55 @@ #define ALLOCM_ERR_OOM 1 #define ALLOCM_ERR_NOT_MOVED 2 #endif + +/* + * Name mangling for public symbols is controlled by --with-mangling and + * --with-jemalloc-prefix. With default settings the je_ prefix is stripped by + * these macro definitions. + */ +#ifndef JEMALLOC_NO_RENAME +# undef je_malloc_conf +# undef je_malloc_message +# undef je_malloc +# undef je_calloc +# undef je_posix_memalign +# undef je_aligned_alloc +# undef je_realloc +# undef je_free +# undef je_malloc_usable_size +# undef je_malloc_stats_print +# undef je_mallctl +# undef je_mallctlnametomib +# undef je_mallctlbymib +# undef je_memalign +# undef je_valloc +# undef je_allocm +# undef je_rallocm +# undef je_sallocm +# undef je_dallocm +# undef je_nallocm +#endif + +#ifdef JEMALLOC_HAVE_ATTR +# define JEMALLOC_ATTR(s) __attribute__((s)) +# define JEMALLOC_EXPORT JEMALLOC_ATTR(visibility("default")) +# define JEMALLOC_ALIGNED(s) JEMALLOC_ATTR(aligned(s)) +# define JEMALLOC_SECTION(s) JEMALLOC_ATTR(section(s)) +# define JEMALLOC_NOINLINE JEMALLOC_ATTR(noinline) +#elif _MSC_VER +# define JEMALLOC_ATTR(s) +# ifdef DLLEXPORT +# define JEMALLOC_EXPORT __declspec(dllexport) +# else +# define JEMALLOC_EXPORT __declspec(dllimport) +# endif +# define JEMALLOC_ALIGNED(s) __declspec(align(s)) +# define JEMALLOC_SECTION(s) __declspec(allocate(s)) +# define JEMALLOC_NOINLINE __declspec(noinline) +#else +# define JEMALLOC_ATTR(s) +# define JEMALLOC_EXPORT +# define JEMALLOC_ALIGNED(s) +# define JEMALLOC_SECTION(s) +# define JEMALLOC_NOINLINE +#endif diff --git a/test/include/test/jemalloc_test.h.in b/test/include/test/jemalloc_test.h.in index 1eb7a0b3..20ccba76 100644 --- a/test/include/test/jemalloc_test.h.in +++ b/test/include/test/jemalloc_test.h.in @@ -53,6 +53,7 @@ # define JEMALLOC_MANGLE # include "jemalloc/jemalloc@install_suffix@.h" # include "jemalloc/internal/jemalloc_internal_defs.h" +# include "jemalloc/internal/jemalloc_internal_macros.h" # define JEMALLOC_N(n) @private_namespace@##n # include "jemalloc/internal/private_namespace.h" From d37d5adee4e4570cfda83e5f1b948a25b9226224 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 5 Dec 2013 23:01:50 -0800 Subject: [PATCH 0333/3378] Disable floating point code/linking when possible. Unless heap profiling is enabled, disable floating point code and don't link with libm. This, in combination with e.g. EXTRA_CFLAGS=-mno-sse on x64 systems, makes it possible to completely disable floating point register use. Some versions of glibc neglect to save/restore caller-saved floating point registers during dynamic lazy symbol loading, and the symbol loading code uses whatever malloc the application happens to have linked/loaded with, the result being potential floating point register corruption. --- ChangeLog | 7 +++++++ configure.ac | 11 ++++++----- include/jemalloc/internal/prof.h | 15 +++++++++++++++ 3 files changed, 28 insertions(+), 5 deletions(-) diff --git a/ChangeLog b/ChangeLog index 90ab107a..34d017e9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -9,6 +9,13 @@ found in the git revision history: * 3.5.0 (XXX) Bug fixes: + - Unless heap profiling is enabled, disable floating point code and don't link + with libm. This, in combination with e.g. EXTRA_CFLAGS=-mno-sse on x64 + systems, makes it possible to completely disable floating point register + use. Some versions of glibc neglect to save/restore caller-saved floating + point registers during dynamic lazy symbol loading, and the symbol loading + code uses whatever malloc the application happens to have linked/loaded + with, the result being potential floating point register corruption. - Change the default private namespace prefix from empty to je_, and change --with-private-namespace-prefix so that it prepends an additional prefix rather than replacing je_. This reduces the likelihood of applications diff --git a/configure.ac b/configure.ac index 7f0fecce..1103cc77 100644 --- a/configure.ac +++ b/configure.ac @@ -356,11 +356,6 @@ AC_SUBST([ARFLAGS]) AC_SUBST([AROUT]) AC_SUBST([CC_MM]) -if test "x$abi" != "xpecoff"; then - dnl Heap profiling uses the log(3) function. - LIBS="$LIBS -lm" -fi - JE_COMPILABLE([__attribute__ syntax], [static __attribute__((unused)) void foo(void){}], [], @@ -774,6 +769,12 @@ if test "x$enable_prof" = "x1" ; then AC_MSG_ERROR([Heap profiling requires TLS]); fi force_tls="1" + + if test "x$abi" != "xpecoff"; then + dnl Heap profiling uses the log(3) function. + LIBS="$LIBS -lm" + fi + AC_DEFINE([JEMALLOC_PROF], [ ]) fi AC_SUBST([enable_prof]) diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index 119a5b1b..38a761bf 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -320,6 +320,20 @@ prof_tdata_get(bool create) JEMALLOC_INLINE void prof_sample_threshold_update(prof_tdata_t *prof_tdata) { + /* + * The body of this function is compiled out unless heap profiling is + * enabled, so that it is possible to compile jemalloc with floating + * point support completely disabled. Avoiding floating point code is + * important on memory-constrained systems, but it also enables a + * workaround for versions of glibc that don't properly save/restore + * floating point registers during dynamic lazy symbol loading (which + * internally calls into whatever malloc implementation happens to be + * integrated into the application). Note that some compilers (e.g. + * gcc 4.8) may use floating point registers for fast memory moves, so + * jemalloc must be compiled with such optimizations disabled (e.g. + * -mno-sse) in order for the workaround to be complete. + */ +#ifdef JEMALLOC_PROF uint64_t r; double u; @@ -349,6 +363,7 @@ prof_sample_threshold_update(prof_tdata_t *prof_tdata) prof_tdata->threshold = (uint64_t)(log(u) / log(1.0 - (1.0 / (double)((uint64_t)1U << opt_lg_prof_sample)))) + (uint64_t)1U; +#endif } JEMALLOC_INLINE prof_ctx_t * From 748dfac7788e3cbc2fc6d36196a81d3f002669f6 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 6 Dec 2013 18:27:33 -0800 Subject: [PATCH 0334/3378] Add test code coverage analysis. Add test code coverage analysis based on gcov. --- .gitignore | 12 ++++ INSTALL | 13 +++++ Makefile.in | 56 ++++++++++++++++--- configure.ac | 33 ++++++++++- coverage.sh | 16 ++++++ .../internal/jemalloc_internal_defs.h.in | 3 + .../internal/jemalloc_internal_macros.h | 4 +- 7 files changed, 127 insertions(+), 10 deletions(-) create mode 100755 coverage.sh diff --git a/.gitignore b/.gitignore index afde57ad..b7715e8d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ +/*.gcov.* + /autom4te.cache/ /bin/jemalloc.sh @@ -32,6 +34,8 @@ /include/jemalloc/jemalloc_mangle.h /src/*.[od] +/src/*.gcda +/src/*.gcno /test/test.sh test/include/test/jemalloc_test.h @@ -39,18 +43,26 @@ test/include/test/jemalloc_test.h /test/integration/[A-Za-z]* !/test/integration/[A-Za-z]*.* /test/integration/*.[od] +/test/integration/*.gcda +/test/integration/*.gcno /test/integration/*.out /test/src/*.[od] +/test/src/*.gcda +/test/src/*.gcno /test/stress/[A-Za-z]* !/test/stress/[A-Za-z]*.* /test/stress/*.[od] +/test/stress/*.gcda +/test/stress/*.gcno /test/stress/*.out /test/unit/[A-Za-z]* !/test/unit/[A-Za-z]*.* /test/unit/*.[od] +/test/unit/*.gcda +/test/unit/*.gcno /test/unit/*.out /VERSION diff --git a/INSTALL b/INSTALL index 39ad26db..841704d2 100644 --- a/INSTALL +++ b/INSTALL @@ -81,6 +81,19 @@ any of the following arguments (not a definitive list) to 'configure': performance hit, but is very useful during application development. Implies --enable-ivsalloc. +--enable-code-coverage + Enable code coverage support, for use during jemalloc test development. + Additional testing targets are available if this option is enabled: + + coverage + coverage_unit + coverage_integration + coverage_stress + + These targets do not clear code coverage results from previous runs, and + there are interactions between the various coverage targets, so it is + usually advisable to run 'make clean' between repeated code coverage runs. + --enable-ivsalloc Enable validation code, which verifies that pointers reside within jemalloc-owned chunks before dereferencing them. This incurs a substantial diff --git a/Makefile.in b/Makefile.in index 16a9ba4d..242331c6 100644 --- a/Makefile.in +++ b/Makefile.in @@ -47,6 +47,7 @@ cfghdrs_out := @cfghdrs_out@ cfgoutputs_in := @cfgoutputs_in@ cfgoutputs_out := @cfgoutputs_out@ enable_autogen := @enable_autogen@ +enable_code_coverage := @enable_code_coverage@ enable_experimental := @enable_experimental@ enable_zone_allocator := @enable_zone_allocator@ DSO_LDFLAGS = @DSO_LDFLAGS@ @@ -224,15 +225,15 @@ $(STATIC_LIBS): $(objroot)test/unit/%$(EXE): $(objroot)test/unit/%.$(O) $(C_JET_OBJS) $(C_TESTLIB_UNIT_OBJS) @mkdir -p $(@D) - $(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LIBS) $(EXTRA_LDFLAGS) + $(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LDFLAGS) $(LIBS) $(EXTRA_LDFLAGS) $(objroot)test/integration/%$(EXE): $(objroot)test/integration/%.$(O) $(C_TESTLIB_INTEGRATION_OBJS) $(C_UTIL_INTEGRATION_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) @mkdir -p $(@D) - $(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(filter -lpthread,$(LIBS)) $(EXTRA_LDFLAGS) + $(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter -lpthread,$(LIBS)) $(EXTRA_LDFLAGS) $(objroot)test/stress/%$(EXE): $(objroot)test/stress/%.$(O) $(C_JET_OBJS) $(C_TESTLIB_STRESS_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) @mkdir -p $(@D) - $(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LIBS) $(EXTRA_LDFLAGS) + $(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(LIBS) $(EXTRA_LDFLAGS) build_lib_shared: $(DSOS) build_lib_static: $(STATIC_LIBS) @@ -300,13 +301,43 @@ check_stress_dir: check_dir: check_unit_dir check_integration_dir check_stress_dir check_unit: tests_unit check_unit_dir - @$(SHELL) $(objroot)test/test.sh $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%) + $(SHELL) $(objroot)test/test.sh $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%) check_integration: tests_integration check_integration_dir - @$(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) + $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) check_stress: tests_stress check_stress_dir - @$(SHELL) $(objroot)test/test.sh $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%) + $(SHELL) $(objroot)test/test.sh $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%) check: tests check_dir - @$(SHELL) $(objroot)test/test.sh $(TESTS:$(srcroot)%.c=$(objroot)%) + $(SHELL) $(objroot)test/test.sh $(TESTS:$(srcroot)%.c=$(objroot)%) + +ifeq ($(enable_code_coverage), 1) +coverage_unit: check_unit + $(SHELL) $(srcroot)coverage.sh $(srcroot)src jet $(C_JET_OBJS) + $(SHELL) $(srcroot)coverage.sh $(srcroot)test/src unit $(C_TESTLIB_UNIT_OBJS) + $(SHELL) $(srcroot)coverage.sh $(srcroot)test/unit unit $(TESTS_UNIT_OBJS) + +coverage_integration: check_integration + $(SHELL) $(srcroot)coverage.sh $(srcroot)src pic $(C_PIC_OBJS) + $(SHELL) $(srcroot)coverage.sh $(srcroot)src integration $(C_UTIL_INTEGRATION_OBJS) + $(SHELL) $(srcroot)coverage.sh $(srcroot)test/src integration $(C_TESTLIB_INTEGRATION_OBJS) + $(SHELL) $(srcroot)coverage.sh $(srcroot)test/integration integration $(TESTS_INTEGRATION_OBJS) + +coverage_stress: check_stress + $(SHELL) $(srcroot)coverage.sh $(srcroot)src pic $(C_PIC_OBJS) + $(SHELL) $(srcroot)coverage.sh $(srcroot)src jet $(C_JET_OBJS) + $(SHELL) $(srcroot)coverage.sh $(srcroot)test/src stress $(C_TESTLIB_STRESS_OBJS) + $(SHELL) $(srcroot)coverage.sh $(srcroot)test/stress stress $(TESTS_STRESS_OBJS) + +coverage: check + $(SHELL) $(srcroot)coverage.sh $(srcroot)src pic $(C_PIC_OBJS) + $(SHELL) $(srcroot)coverage.sh $(srcroot)src jet $(C_JET_OBJS) + $(SHELL) $(srcroot)coverage.sh $(srcroot)src integration $(C_UTIL_INTEGRATION_OBJS) + $(SHELL) $(srcroot)coverage.sh $(srcroot)test/src unit $(C_TESTLIB_UNIT_OBJS) + $(SHELL) $(srcroot)coverage.sh $(srcroot)test/src integration $(C_TESTLIB_INTEGRATION_OBJS) + $(SHELL) $(srcroot)coverage.sh $(srcroot)test/src stress $(C_TESTLIB_STRESS_OBJS) + $(SHELL) $(srcroot)coverage.sh $(srcroot)test/unit unit $(TESTS_UNIT_OBJS) + $(SHELL) $(srcroot)coverage.sh $(srcroot)test/integration integration $(TESTS_INTEGRATION_OBJS) + $(SHELL) $(srcroot)coverage.sh $(srcroot)test/stress integration $(TESTS_STRESS_OBJS) +endif clean: rm -f $(C_OBJS) @@ -314,14 +345,25 @@ clean: rm -f $(C_JET_OBJS) rm -f $(C_TESTLIB_OBJS) rm -f $(C_OBJS:%.$(O)=%.d) + rm -f $(C_OBJS:%.$(O)=%.gcda) + rm -f $(C_OBJS:%.$(O)=%.gcno) rm -f $(C_PIC_OBJS:%.$(O)=%.d) + rm -f $(C_PIC_OBJS:%.$(O)=%.gcda) + rm -f $(C_PIC_OBJS:%.$(O)=%.gcno) rm -f $(C_JET_OBJS:%.$(O)=%.d) + rm -f $(C_JET_OBJS:%.$(O)=%.gcda) + rm -f $(C_JET_OBJS:%.$(O)=%.gcno) rm -f $(C_TESTLIB_OBJS:%.$(O)=%.d) + rm -f $(C_TESTLIB_OBJS:%.$(O)=%.gcda) + rm -f $(C_TESTLIB_OBJS:%.$(O)=%.gcno) rm -f $(TESTS_OBJS:%.$(O)=%$(EXE)) rm -f $(TESTS_OBJS) rm -f $(TESTS_OBJS:%.$(O)=%.d) + rm -f $(TESTS_OBJS:%.$(O)=%.gcda) + rm -f $(TESTS_OBJS:%.$(O)=%.gcno) rm -f $(TESTS_OBJS:%.$(O)=%.out) rm -f $(DSOS) $(STATIC_LIBS) + rm -f $(objroot)*.gcov.* distclean: clean rm -rf $(objroot)autom4te.cache diff --git a/configure.ac b/configure.ac index 1103cc77..45b510c5 100644 --- a/configure.ac +++ b/configure.ac @@ -440,6 +440,31 @@ if test "x$enable_experimental" = "x1" ; then fi AC_SUBST([enable_experimental]) +dnl Do not compute test code coverage by default. +GCOV_FLAGS= +AC_ARG_ENABLE([code-coverage], + [AS_HELP_STRING([--enable-code-coverage], + [Enable code coverage])], +[if test "x$enable_code_coverage" = "xno" ; then + enable_code_coverage="0" +else + enable_code_coverage="1" +fi +], +[enable_code_coverage="0"] +) +if test "x$enable_code_coverage" = "x1" ; then + deoptimize="no" + echo "$CFLAGS $EXTRA_CFLAGS" | grep '\-O' >/dev/null || deoptimize="yes" + if test "x${deoptimize}" = "xyes" ; then + JE_CFLAGS_APPEND([-O0]) + fi + JE_CFLAGS_APPEND([-fprofile-arcs -ftest-coverage]) + EXTRA_LDFLAGS="$EXTRA_LDFLAGS -fprofile-arcs -ftest-coverage" + AC_DEFINE([JEMALLOC_CODE_COVERAGE], [ ]) +fi +AC_SUBST([enable_code_coverage]) + dnl Perform no name mangling by default. AC_ARG_WITH([mangling], [AS_HELP_STRING([--with-mangling=], [Mangle symbols in ])], @@ -616,7 +641,7 @@ dnl Only optimize if not debugging. if test "x$enable_debug" = "x0" -a "x$no_CFLAGS" = "xyes" ; then dnl Make sure that an optimization flag was not specified in EXTRA_CFLAGS. optimize="no" - echo "$EXTRA_CFLAGS" | grep "\-O" >/dev/null || optimize="yes" + echo "$CFLAGS $EXTRA_CFLAGS" | grep '\-O' >/dev/null || optimize="yes" if test "x${optimize}" = "xyes" ; then if test "x$GCC" = "xyes" ; then JE_CFLAGS_APPEND([-O3]) @@ -1303,6 +1328,9 @@ dnl ============================================================================ dnl Check for typedefs, structures, and compiler characteristics. AC_HEADER_STDBOOL +dnl ============================================================================ +dnl Define commands that generate output files. + AC_CONFIG_COMMANDS([include/jemalloc/internal/private_namespace.h], [ mkdir -p "${objroot}include/jemalloc/internal" "${srcdir}/include/jemalloc/internal/private_namespace.sh" "${srcdir}/include/jemalloc/internal/private_symbols.txt" > "${objroot}include/jemalloc/internal/private_namespace.h" @@ -1339,6 +1367,7 @@ AC_CONFIG_HEADERS([$cfghdrs_tup]) dnl ============================================================================ dnl Generate outputs. + AC_CONFIG_FILES([$cfgoutputs_tup config.stamp bin/jemalloc.sh]) AC_SUBST([cfgoutputs_in]) AC_SUBST([cfgoutputs_out]) @@ -1354,6 +1383,7 @@ AC_MSG_RESULT([CC : ${CC}]) AC_MSG_RESULT([CPPFLAGS : ${CPPFLAGS}]) AC_MSG_RESULT([CFLAGS : ${CFLAGS}]) AC_MSG_RESULT([LDFLAGS : ${LDFLAGS}]) +AC_MSG_RESULT([EXTRA_LDFLAGS : ${EXTRA_LDFLAGS}]) AC_MSG_RESULT([LIBS : ${LIBS}]) AC_MSG_RESULT([RPATH_EXTRA : ${RPATH_EXTRA}]) AC_MSG_RESULT([]) @@ -1380,6 +1410,7 @@ AC_MSG_RESULT([autogen : ${enable_autogen}]) AC_MSG_RESULT([experimental : ${enable_experimental}]) AC_MSG_RESULT([cc-silence : ${enable_cc_silence}]) AC_MSG_RESULT([debug : ${enable_debug}]) +AC_MSG_RESULT([code-coverage : ${enable_code_coverage}]) AC_MSG_RESULT([stats : ${enable_stats}]) AC_MSG_RESULT([prof : ${enable_prof}]) AC_MSG_RESULT([prof-libunwind : ${enable_prof_libunwind}]) diff --git a/coverage.sh b/coverage.sh new file mode 100755 index 00000000..6d1362a8 --- /dev/null +++ b/coverage.sh @@ -0,0 +1,16 @@ +#!/bin/sh + +set -e + +objdir=$1 +suffix=$2 +shift 2 +objs=$@ + +gcov -b -p -f -o "${objdir}" ${objs} + +# Move gcov outputs so that subsequent gcov invocations won't clobber results +# for the same sources with different compilation flags. +for f in `find . -maxdepth 1 -type f -name '*.gcov'` ; do + mv "${f}" "${f}.${suffix}" +done diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in index 346e39e2..3b72b35f 100644 --- a/include/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in @@ -83,6 +83,9 @@ /* JEMALLOC_CC_SILENCE enables code that silences unuseful compiler warnings. */ #undef JEMALLOC_CC_SILENCE +/* JEMALLOC_CODE_COVERAGE enables test code coverage analysis. */ +#undef JEMALLOC_CODE_COVERAGE + /* * JEMALLOC_DEBUG enables assertions and other sanity checks, and disables * inline functions. diff --git a/include/jemalloc/internal/jemalloc_internal_macros.h b/include/jemalloc/internal/jemalloc_internal_macros.h index f2780982..ebb62168 100644 --- a/include/jemalloc/internal/jemalloc_internal_macros.h +++ b/include/jemalloc/internal/jemalloc_internal_macros.h @@ -6,8 +6,8 @@ * JEMALLOC_ALWAYS_INLINE_C is for use in .c files, in which case the denoted * functions are always static, regardless of whether inlining is enabled. */ -#ifdef JEMALLOC_DEBUG - /* Disable inlining to make debugging easier. */ +#if defined(JEMALLOC_DEBUG) || defined(JEMALLOC_CODE_COVERAGE) + /* Disable inlining to make debugging/profiling easier. */ # define JEMALLOC_ALWAYS_INLINE # define JEMALLOC_ALWAYS_INLINE_C static # define JEMALLOC_INLINE From 9f35a71a81adcfd6c0ea6461ecd2b84ac384e34f Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 7 Dec 2013 11:53:26 -0800 Subject: [PATCH 0335/3378] Make jemalloc.h formatting more consistent. --- include/jemalloc/jemalloc.sh | 5 +- include/jemalloc/jemalloc_defs.h.in | 4 +- include/jemalloc/jemalloc_macros.h.in | 26 ++++---- include/jemalloc/jemalloc_mangle.h.in | 94 +++++++++++++-------------- 4 files changed, 66 insertions(+), 63 deletions(-) diff --git a/include/jemalloc/jemalloc.sh b/include/jemalloc/jemalloc.sh index f5da79c8..ad8376e7 100755 --- a/include/jemalloc/jemalloc.sh +++ b/include/jemalloc/jemalloc.sh @@ -13,7 +13,10 @@ EOF for hdr in jemalloc_defs.h jemalloc_macros.h jemalloc_protos.h \ jemalloc_mangle.h ; do - cat "${objroot}include/jemalloc/${hdr}" | grep -v 'Generated from .* by configure\.' + cat "${objroot}include/jemalloc/${hdr}" \ + | grep -v 'Generated from .* by configure\.' \ + | sed -e 's/^#define /#define /g' \ + | sed -e 's/ $//g' echo done diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index bc3153b8..eb38d710 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -5,8 +5,8 @@ #undef JEMALLOC_EXPERIMENTAL /* - * Define overrides for non-standard allocator-related functions if they - * are present on the system. + * Define overrides for non-standard allocator-related functions if they are + * present on the system. */ #undef JEMALLOC_OVERRIDE_MEMALIGN #undef JEMALLOC_OVERRIDE_VALLOC diff --git a/include/jemalloc/jemalloc_macros.h.in b/include/jemalloc/jemalloc_macros.h.in index a4923af1..d1455319 100644 --- a/include/jemalloc/jemalloc_macros.h.in +++ b/include/jemalloc/jemalloc_macros.h.in @@ -9,20 +9,20 @@ #define JEMALLOC_VERSION_GID "@jemalloc_version_gid@" #ifdef JEMALLOC_EXPERIMENTAL -#define ALLOCM_LG_ALIGN(la) (la) -#if LG_SIZEOF_PTR == 2 -#define ALLOCM_ALIGN(a) (ffs(a)-1) -#else -#define ALLOCM_ALIGN(a) ((a < (size_t)INT_MAX) ? ffs(a)-1 : ffs(a>>32)+31) -#endif -#define ALLOCM_ZERO ((int)0x40) -#define ALLOCM_NO_MOVE ((int)0x80) +# define ALLOCM_LG_ALIGN(la) (la) +# if LG_SIZEOF_PTR == 2 +# define ALLOCM_ALIGN(a) (ffs(a)-1) +# else +# define ALLOCM_ALIGN(a) \ + ((a < (size_t)INT_MAX) ? ffs(a)-1 : ffs(a>>32)+31) +# endif +# define ALLOCM_ZERO ((int)0x40) +# define ALLOCM_NO_MOVE ((int)0x80) /* Bias arena index bits so that 0 encodes "ALLOCM_ARENA() unspecified". */ -#define ALLOCM_ARENA(a) ((int)(((a)+1) << 8)) - -#define ALLOCM_SUCCESS 0 -#define ALLOCM_ERR_OOM 1 -#define ALLOCM_ERR_NOT_MOVED 2 +# define ALLOCM_ARENA(a) ((int)(((a)+1) << 8)) +# define ALLOCM_SUCCESS 0 +# define ALLOCM_ERR_OOM 1 +# define ALLOCM_ERR_NOT_MOVED 2 #endif /* diff --git a/include/jemalloc/jemalloc_mangle.h.in b/include/jemalloc/jemalloc_mangle.h.in index ad4c27c0..215de9d0 100644 --- a/include/jemalloc/jemalloc_mangle.h.in +++ b/include/jemalloc/jemalloc_mangle.h.in @@ -6,31 +6,31 @@ * --with-mangling and/or --with-jemalloc-prefix configuration settings. */ #ifdef JEMALLOC_MANGLE -#ifndef JEMALLOC_NO_DEMANGLE -#define JEMALLOC_NO_DEMANGLE -#endif -#define malloc_conf je_malloc_conf -#define malloc_message je_malloc_message -#define malloc je_malloc -#define calloc je_calloc -#define posix_memalign je_posix_memalign -#define aligned_alloc je_aligned_alloc -#define realloc je_realloc -#define free je_free -#define malloc_usable_size je_malloc_usable_size -#define malloc_stats_print je_malloc_stats_print -#define mallctl je_mallctl -#define mallctlnametomib je_mallctlnametomib -#define mallctlbymib je_mallctlbymib -#define memalign je_memalign -#define valloc je_valloc -#ifdef JEMALLOC_EXPERIMENTAL -#define allocm je_allocm -#define rallocm je_rallocm -#define sallocm je_sallocm -#define dallocm je_dallocm -#define nallocm je_nallocm -#endif +# ifndef JEMALLOC_NO_DEMANGLE +# define JEMALLOC_NO_DEMANGLE +# endif +# define malloc_conf je_malloc_conf +# define malloc_message je_malloc_message +# define malloc je_malloc +# define calloc je_calloc +# define posix_memalign je_posix_memalign +# define aligned_alloc je_aligned_alloc +# define realloc je_realloc +# define free je_free +# define malloc_usable_size je_malloc_usable_size +# define malloc_stats_print je_malloc_stats_print +# define mallctl je_mallctl +# define mallctlnametomib je_mallctlnametomib +# define mallctlbymib je_mallctlbymib +# define memalign je_memalign +# define valloc je_valloc +# ifdef JEMALLOC_EXPERIMENTAL +# define allocm je_allocm +# define rallocm je_rallocm +# define sallocm je_sallocm +# define dallocm je_dallocm +# define nallocm je_nallocm +# endif #endif /* @@ -41,26 +41,26 @@ * and/or --with-jemalloc-prefix. */ #ifndef JEMALLOC_NO_DEMANGLE -#undef je_malloc_conf -#undef je_malloc_message -#undef je_malloc -#undef je_calloc -#undef je_posix_memalign -#undef je_aligned_alloc -#undef je_realloc -#undef je_free -#undef je_malloc_usable_size -#undef je_malloc_stats_print -#undef je_mallctl -#undef je_mallctlnametomib -#undef je_mallctlbymib -#undef je_memalign -#undef je_valloc -#ifdef JEMALLOC_EXPERIMENTAL -#undef je_allocm -#undef je_rallocm -#undef je_sallocm -#undef je_dallocm -#undef je_nallocm -#endif +# undef je_malloc_conf +# undef je_malloc_message +# undef je_malloc +# undef je_calloc +# undef je_posix_memalign +# undef je_aligned_alloc +# undef je_realloc +# undef je_free +# undef je_malloc_usable_size +# undef je_malloc_stats_print +# undef je_mallctl +# undef je_mallctlnametomib +# undef je_mallctlbymib +# undef je_memalign +# undef je_valloc +# ifdef JEMALLOC_EXPERIMENTAL +# undef je_allocm +# undef je_rallocm +# undef je_sallocm +# undef je_dallocm +# undef je_nallocm +# endif #endif From 2a83ed0284e92c7ba4bd4efe9df149ac724b2f26 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sun, 8 Dec 2013 20:52:21 -0800 Subject: [PATCH 0336/3378] Refactor tests. Refactor tests to use explicit testing assertions, rather than diff'ing test output. This makes the test code a bit shorter, more explicitly encodes testing intent, and makes test failure diagnosis more straightforward. --- Makefile.in | 4 +- include/jemalloc/internal/chunk_dss.h | 2 +- include/jemalloc/internal/util.h | 2 +- src/chunk_mmap.c | 4 +- src/huge.c | 2 +- src/util.c | 6 +- test/include/test/test.h | 177 +++++++++++++++- test/integration/ALLOCM_ARENA.c | 45 ++--- test/integration/ALLOCM_ARENA.exp | 2 - test/integration/aligned_alloc.c | 86 ++++---- test/integration/aligned_alloc.exp | 25 --- test/integration/allocated.c | 128 ++++++------ test/integration/allocated.exp | 2 - test/integration/allocm.c | 203 ++++++++----------- test/integration/allocm.exp | 25 --- test/integration/mremap.c | 56 ++---- test/integration/mremap.exp | 2 - test/integration/posix_memalign.c | 96 ++++----- test/integration/posix_memalign.exp | 25 --- test/integration/rallocm.c | 223 ++++++++++----------- test/integration/rallocm.exp | 2 - test/integration/thread_arena.c | 61 +++--- test/integration/thread_arena.exp | 2 - test/integration/thread_tcache_enabled.c | 107 ++++++---- test/integration/thread_tcache_enabled.exp | 2 - test/src/test.c | 98 +++++++-- test/test.sh.in | 57 ++++-- test/unit/bitmap.c | 80 +++++--- test/unit/bitmap.exp | 2 - test/unit/tsd.c | 53 +++-- test/unit/tsd.exp | 9 - 31 files changed, 880 insertions(+), 708 deletions(-) delete mode 100644 test/integration/ALLOCM_ARENA.exp delete mode 100644 test/integration/aligned_alloc.exp delete mode 100644 test/integration/allocated.exp delete mode 100644 test/integration/allocm.exp delete mode 100644 test/integration/mremap.exp delete mode 100644 test/integration/posix_memalign.exp delete mode 100644 test/integration/rallocm.exp delete mode 100644 test/integration/thread_arena.exp delete mode 100644 test/integration/thread_tcache_enabled.exp delete mode 100644 test/unit/bitmap.exp delete mode 100644 test/unit/tsd.exp diff --git a/Makefile.in b/Makefile.in index 242331c6..ca160402 100644 --- a/Makefile.in +++ b/Makefile.in @@ -179,12 +179,12 @@ $(C_TESTLIB_INTEGRATION_OBJS): CPPFLAGS += -DJEMALLOC_INTEGRATION_TEST $(C_UTIL_INTEGRATION_OBJS): $(objroot)src/%.integration.$(O): $(srcroot)src/%.c $(C_TESTLIB_STRESS_OBJS): $(objroot)test/src/%.stress.$(O): $(srcroot)test/src/%.c $(C_TESTLIB_STRESS_OBJS): CPPFLAGS += -DJEMALLOC_STRESS_TEST -$(C_TESTLIB_OBJS): CPPFLAGS += -I$(objroot)test/include +$(C_TESTLIB_OBJS): CPPFLAGS += -I$(srcroot)test/include -I$(objroot)test/include $(TESTS_UNIT_OBJS): CPPFLAGS += -DJEMALLOC_UNIT_TEST $(TESTS_INTEGRATION_OBJS): CPPFLAGS += -DJEMALLOC_INTEGRATION_TEST $(TESTS_STRESS_OBJS): CPPFLAGS += -DJEMALLOC_STRESS_TEST $(TESTS_OBJS): $(objroot)test/%.$(O): $(srcroot)test/%.c -$(TESTS_OBJS): CPPFLAGS += -I$(objroot)test/include +$(TESTS_OBJS): CPPFLAGS += -I$(srcroot)test/include -I$(objroot)test/include ifneq ($(IMPORTLIB),$(SO)) $(C_OBJS): CPPFLAGS += -DDLLEXPORT endif diff --git a/include/jemalloc/internal/chunk_dss.h b/include/jemalloc/internal/chunk_dss.h index 6585f071..4535ce09 100644 --- a/include/jemalloc/internal/chunk_dss.h +++ b/include/jemalloc/internal/chunk_dss.h @@ -7,7 +7,7 @@ typedef enum { dss_prec_secondary = 2, dss_prec_limit = 3 -} dss_prec_t ; +} dss_prec_t; #define DSS_PREC_DEFAULT dss_prec_secondary #define DSS_DEFAULT "secondary" diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h index bf18d3c9..a8904d0c 100644 --- a/include/jemalloc/internal/util.h +++ b/include/jemalloc/internal/util.h @@ -84,7 +84,7 @@ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -int buferror(char *buf, size_t buflen); +int buferror(int err, char *buf, size_t buflen); uintmax_t malloc_strtoumax(const char *nptr, char **endptr, int base); void malloc_write(const char *s); diff --git a/src/chunk_mmap.c b/src/chunk_mmap.c index 8a42e759..2056d793 100644 --- a/src/chunk_mmap.c +++ b/src/chunk_mmap.c @@ -43,7 +43,7 @@ pages_map(void *addr, size_t size) if (munmap(ret, size) == -1) { char buf[BUFERROR_BUF]; - buferror(buf, sizeof(buf)); + buferror(get_errno(), buf, sizeof(buf)); malloc_printf(": Error in " #ifdef _WIN32 "VirtualFree" diff --git a/src/huge.c b/src/huge.c index aa08d43d..443b4007 100644 --- a/src/huge.c +++ b/src/huge.c @@ -169,7 +169,7 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, */ char buf[BUFERROR_BUF]; - buferror(buf, sizeof(buf)); + buferror(get_errno(), buf, sizeof(buf)); malloc_printf(": Error in mremap(): %s\n", buf); if (opt_abort) diff --git a/src/util.c b/src/util.c index 679fa763..d2ca4f21 100644 --- a/src/util.c +++ b/src/util.c @@ -77,7 +77,7 @@ malloc_write(const char *s) * provide a wrapper. */ int -buferror(char *buf, size_t buflen) +buferror(int err, char *buf, size_t buflen) { #ifdef _WIN32 @@ -85,14 +85,14 @@ buferror(char *buf, size_t buflen) (LPSTR)buf, buflen, NULL); return (0); #elif defined(_GNU_SOURCE) - char *b = strerror_r(errno, buf, buflen); + char *b = strerror_r(err, buf, buflen); if (b != buf) { strncpy(buf, b, buflen); buf[buflen-1] = '\0'; } return (0); #else - return (strerror_r(errno, buf, buflen)); + return (strerror_r(err, buf, buflen)); #endif } diff --git a/test/include/test/test.h b/test/include/test/test.h index ddbc55f7..3376d33a 100644 --- a/test/include/test/test.h +++ b/test/include/test/test.h @@ -1,2 +1,177 @@ -void test_fail(const char *format, ...) JEMALLOC_ATTR(format(printf, 1, 2)); +#define assert_cmp(t, a, b, cmp, neg_cmp, pri, fmt...) do { \ + t a_ = (a); \ + t b_ = (b); \ + if (!(a_ cmp b_)) { \ + p_test_fail( \ + "%s:%s:%d: Failed assertion: " \ + "(%s) "#cmp" (%s) --> " \ + "%"pri" "#neg_cmp" %"pri": ", \ + __func__, __FILE__, __LINE__, \ + #a, #b, a_, b_, fmt); \ + } \ +} while (0) + +#define assert_ptr_eq(a, b, fmt...) assert_cmp(void *, a, b, ==, \ + !=, "p", fmt) +#define assert_ptr_ne(a, b, fmt...) assert_cmp(void *, a, b, !=, \ + ==, "p", fmt) +#define assert_ptr_null(a, fmt...) assert_cmp(void *, a, NULL, ==, \ + !=, "p", fmt) +#define assert_ptr_not_null(a, fmt...) assert_cmp(void *, a, NULL, !=, \ + ==, "p", fmt) + +#define assert_c_eq(a, b, fmt...) assert_cmp(char, a, b, ==, !=, "c", fmt) +#define assert_c_ne(a, b, fmt...) assert_cmp(char, a, b, !=, ==, "c", fmt) +#define assert_c_lt(a, b, fmt...) assert_cmp(char, a, b, <, >=, "c", fmt) +#define assert_c_le(a, b, fmt...) assert_cmp(char, a, b, <=, >, "c", fmt) +#define assert_c_ge(a, b, fmt...) assert_cmp(char, a, b, >=, <, "c", fmt) +#define assert_c_gt(a, b, fmt...) assert_cmp(char, a, b, >, <=, "c", fmt) + +#define assert_x_eq(a, b, fmt...) assert_cmp(int, a, b, ==, !=, "#x", fmt) +#define assert_x_ne(a, b, fmt...) assert_cmp(int, a, b, !=, ==, "#x", fmt) +#define assert_x_lt(a, b, fmt...) assert_cmp(int, a, b, <, >=, "#x", fmt) +#define assert_x_le(a, b, fmt...) assert_cmp(int, a, b, <=, >, "#x", fmt) +#define assert_x_ge(a, b, fmt...) assert_cmp(int, a, b, >=, <, "#x", fmt) +#define assert_x_gt(a, b, fmt...) assert_cmp(int, a, b, >, <=, "#x", fmt) + +#define assert_d_eq(a, b, fmt...) assert_cmp(int, a, b, ==, !=, "d", fmt) +#define assert_d_ne(a, b, fmt...) assert_cmp(int, a, b, !=, ==, "d", fmt) +#define assert_d_lt(a, b, fmt...) assert_cmp(int, a, b, <, >=, "d", fmt) +#define assert_d_le(a, b, fmt...) assert_cmp(int, a, b, <=, >, "d", fmt) +#define assert_d_ge(a, b, fmt...) assert_cmp(int, a, b, >=, <, "d", fmt) +#define assert_d_gt(a, b, fmt...) assert_cmp(int, a, b, >, <=, "d", fmt) + +#define assert_u_eq(a, b, fmt...) assert_cmp(int, a, b, ==, !=, "u", fmt) +#define assert_u_ne(a, b, fmt...) assert_cmp(int, a, b, !=, ==, "u", fmt) +#define assert_u_lt(a, b, fmt...) assert_cmp(int, a, b, <, >=, "u", fmt) +#define assert_u_le(a, b, fmt...) assert_cmp(int, a, b, <=, >, "u", fmt) +#define assert_u_ge(a, b, fmt...) assert_cmp(int, a, b, >=, <, "u", fmt) +#define assert_u_gt(a, b, fmt...) assert_cmp(int, a, b, >, <=, "u", fmt) + +#define assert_zd_eq(a, b, fmt...) assert_cmp(ssize_t, a, b, ==, \ + !=, "zd", fmt) +#define assert_zd_ne(a, b, fmt...) assert_cmp(ssize_t, a, b, !=, \ + ==, "zd", fmt) +#define assert_zd_lt(a, b, fmt...) assert_cmp(ssize_t, a, b, <, \ + >=, "zd", fmt) +#define assert_zd_le(a, b, fmt...) assert_cmp(ssize_t, a, b, <=, \ + >, "zd", fmt) +#define assert_zd_ge(a, b, fmt...) assert_cmp(ssize_t, a, b, >=, \ + <, "zd", fmt) +#define assert_zd_gt(a, b, fmt...) assert_cmp(ssize_t, a, b, >, \ + <=, "zd", fmt) + +#define assert_zu_eq(a, b, fmt...) assert_cmp(size_t, a, b, ==, \ + !=, "zu", fmt) +#define assert_zu_ne(a, b, fmt...) assert_cmp(size_t, a, b, !=, \ + ==, "zu", fmt) +#define assert_zu_lt(a, b, fmt...) assert_cmp(size_t, a, b, <, \ + >=, "zu", fmt) +#define assert_zu_le(a, b, fmt...) assert_cmp(size_t, a, b, <=, \ + >, "zu", fmt) +#define assert_zu_ge(a, b, fmt...) assert_cmp(size_t, a, b, >=, \ + <, "zu", fmt) +#define assert_zu_gt(a, b, fmt...) assert_cmp(size_t, a, b, >, \ + <=, "zu", fmt) + +#define assert_d64_eq(a, b, fmt...) assert_cmp(int64_t, a, b, ==, \ + !=, PRId64, fmt) +#define assert_d64_ne(a, b, fmt...) assert_cmp(int64_t, a, b, !=, \ + ==, PRId64, fmt) +#define assert_d64_lt(a, b, fmt...) assert_cmp(int64_t, a, b, <, \ + >=, PRId64, fmt) +#define assert_d64_le(a, b, fmt...) assert_cmp(int64_t, a, b, <=, \ + >, PRId64, fmt) +#define assert_d64_ge(a, b, fmt...) assert_cmp(int64_t, a, b, >=, \ + <, PRId64, fmt) +#define assert_d64_gt(a, b, fmt...) assert_cmp(int64_t, a, b, >, \ + <=, PRId64, fmt) + +#define assert_u64_eq(a, b, fmt...) assert_cmp(uint64_t, a, b, ==, \ + !=, PRIu64, fmt) +#define assert_u64_ne(a, b, fmt...) assert_cmp(uint64_t, a, b, !=, \ + ==, PRIu64, fmt) +#define assert_u64_lt(a, b, fmt...) assert_cmp(uint64_t, a, b, <, \ + >=, PRIu64, fmt) +#define assert_u64_le(a, b, fmt...) assert_cmp(uint64_t, a, b, <=, \ + >, PRIu64, fmt) +#define assert_u64_ge(a, b, fmt...) assert_cmp(uint64_t, a, b, >=, \ + <, PRIu64, fmt) +#define assert_u64_gt(a, b, fmt...) assert_cmp(uint64_t, a, b, >, \ + <=, PRIu64, fmt) + +#define assert_true(a, fmt...) do { \ + bool a_ = (a); \ + if (!(a_ == true)) { \ + p_test_fail( \ + "%s:%s:%d: Failed assertion: " \ + "(%s) == true --> %s != true: %s\n", \ + __func__, __FILE__, __LINE__, \ + #a, a_ ? "true" : "false", fmt); \ + } \ +} while (0) +#define assert_false(a, fmt...) do { \ + bool a_ = (a); \ + if (!(a_ == false)) { \ + p_test_fail( \ + "%s:%s:%d: Failed assertion: " \ + "(%s) == false --> %s != false: %s\n", \ + __func__, __FILE__, __LINE__, \ + #a, a_ ? "true" : "false", fmt); \ + } \ +} while (0) + +#define assert_str_eq(a, b, fmt...) do { \ + if (strcmp((a), (b))) { \ + p_test_fail( \ + "%s:%s:%d: Failed assertion: " \ + "(%s) same as (%s) --> " \ + "\"%s\" differs from \"%s\": %s\n", \ + __func__, __FILE__, __LINE__, #a, #b, a, b, fmt); \ + } \ +} while (0) +#define assert_str_ne(a, b, fmt...) do { \ + if (!strcmp((a), (b))) { \ + p_test_fail( \ + "%s:%s:%d: Failed assertion: " \ + "(%s) differs from (%s) --> " \ + "\"%s\" same as \"%s\": %s\n", \ + __func__, __FILE__, __LINE__, #a, #b, a, b, fmt); \ + } \ +} while (0) + +/* + * If this enum changes, corresponding changes in test/test.sh.in are also + * necessary. + */ +typedef enum { + test_status_pass = 0, + test_status_skip = 1, + test_status_fail = 2, + + test_status_count = 3 +} test_status_t; + +typedef void (test_t)(void); + +#define TEST_BEGIN(f) \ +static void \ +f(void) \ +{ \ + p_test_init(#f); + +#define TEST_END \ + p_test_fini(); \ +} + +#define test(tests...) \ + p_test(tests, NULL) + void test_skip(const char *format, ...) JEMALLOC_ATTR(format(printf, 1, 2)); +void test_fail(const char *format, ...) JEMALLOC_ATTR(format(printf, 1, 2)); + +/* For private use by macros. */ +test_status_t p_test(test_t* t, ...); +void p_test_init(const char *name); +void p_test_fini(void); +void p_test_fail(const char *format, ...); diff --git a/test/integration/ALLOCM_ARENA.c b/test/integration/ALLOCM_ARENA.c index e83056c0..ec91c59b 100644 --- a/test/integration/ALLOCM_ARENA.c +++ b/test/integration/ALLOCM_ARENA.c @@ -7,16 +7,12 @@ je_thread_start(void *arg) { unsigned thread_ind = (unsigned)(uintptr_t)arg; unsigned arena_ind; - int r; void *p; size_t rsz, sz; sz = sizeof(arena_ind); - if (mallctl("arenas.extend", &arena_ind, &sz, NULL, 0) - != 0) { - malloc_printf("Error in arenas.extend\n"); - abort(); - } + assert_d_eq(mallctl("arenas.extend", &arena_ind, &sz, NULL, 0), 0, + "Error in arenas.extend"); if (thread_ind % 4 != 3) { size_t mib[3]; @@ -24,36 +20,25 @@ je_thread_start(void *arg) const char *dss_precs[] = {"disabled", "primary", "secondary"}; const char *dss = dss_precs[thread_ind % (sizeof(dss_precs)/sizeof(char*))]; - if (mallctlnametomib("arena.0.dss", mib, &miblen) != 0) { - malloc_printf("Error in mallctlnametomib()\n"); - abort(); - } + assert_d_eq(mallctlnametomib("arena.0.dss", mib, &miblen), 0, + "Error in mallctlnametomib()"); mib[1] = arena_ind; - if (mallctlbymib(mib, miblen, NULL, NULL, (void *)&dss, - sizeof(const char *))) { - malloc_printf("Error in mallctlbymib()\n"); - abort(); - } + assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, (void *)&dss, + sizeof(const char *)), 0, "Error in mallctlbymib()"); } - r = allocm(&p, &rsz, 1, ALLOCM_ARENA(arena_ind)); - if (r != ALLOCM_SUCCESS) { - malloc_printf("Unexpected allocm() error\n"); - abort(); - } + assert_d_eq(allocm(&p, &rsz, 1, ALLOCM_ARENA(arena_ind)), + ALLOCM_SUCCESS, "Unexpected allocm() error"); dallocm(p, 0); return (NULL); } -int -main(void) +TEST_BEGIN(test_ALLOCM_ARENA) { je_thread_t threads[NTHREADS]; unsigned i; - malloc_printf("Test begin\n"); - for (i = 0; i < NTHREADS; i++) { je_thread_create(&threads[i], je_thread_start, (void *)(uintptr_t)i); @@ -61,7 +46,13 @@ main(void) for (i = 0; i < NTHREADS; i++) je_thread_join(threads[i], NULL); - - malloc_printf("Test end\n"); - return (0); +} +TEST_END + +int +main(void) +{ + + return (test( + test_ALLOCM_ARENA)); } diff --git a/test/integration/ALLOCM_ARENA.exp b/test/integration/ALLOCM_ARENA.exp deleted file mode 100644 index 369a88dd..00000000 --- a/test/integration/ALLOCM_ARENA.exp +++ /dev/null @@ -1,2 +0,0 @@ -Test begin -Test end diff --git a/test/integration/aligned_alloc.c b/test/integration/aligned_alloc.c index 2c44751b..73f73f8e 100644 --- a/test/integration/aligned_alloc.c +++ b/test/integration/aligned_alloc.c @@ -5,34 +5,32 @@ #define MAXALIGN ((size_t)0x2000000LU) #define NITER 4 -int -main(void) +TEST_BEGIN(test_alignment_errors) { - size_t alignment, size, total; - unsigned i; - void *p, *ps[NITER]; + size_t alignment; + void *p; - malloc_printf("Test begin\n"); - - /* Test error conditions. */ alignment = 0; set_errno(0); p = aligned_alloc(alignment, 1); - if (p != NULL || get_errno() != EINVAL) { - malloc_printf( - "Expected error for invalid alignment %zu\n", alignment); - } + assert_false(p != NULL || get_errno() != EINVAL, + "Expected error for invalid alignment %zu", alignment); for (alignment = sizeof(size_t); alignment < MAXALIGN; alignment <<= 1) { set_errno(0); p = aligned_alloc(alignment + 1, 1); - if (p != NULL || get_errno() != EINVAL) { - malloc_printf( - "Expected error for invalid alignment %zu\n", - alignment + 1); - } + assert_false(p != NULL || get_errno() != EINVAL, + "Expected error for invalid alignment %zu", + alignment + 1); } +} +TEST_END + +TEST_BEGIN(test_oom_errors) +{ + size_t alignment, size; + void *p; #if LG_SIZEOF_PTR == 3 alignment = UINT64_C(0x8000000000000000); @@ -43,11 +41,9 @@ main(void) #endif set_errno(0); p = aligned_alloc(alignment, size); - if (p != NULL || get_errno() != ENOMEM) { - malloc_printf( - "Expected error for aligned_alloc(%zu, %zu)\n", - alignment, size); - } + assert_false(p != NULL || get_errno() != ENOMEM, + "Expected error for aligned_alloc(%zu, %zu)", + alignment, size); #if LG_SIZEOF_PTR == 3 alignment = UINT64_C(0x4000000000000000); @@ -58,11 +54,9 @@ main(void) #endif set_errno(0); p = aligned_alloc(alignment, size); - if (p != NULL || get_errno() != ENOMEM) { - malloc_printf( - "Expected error for aligned_alloc(%zu, %zu)\n", - alignment, size); - } + assert_false(p != NULL || get_errno() != ENOMEM, + "Expected error for aligned_alloc(%zu, %zu)", + alignment, size); alignment = 0x10LU; #if LG_SIZEOF_PTR == 3 @@ -72,11 +66,17 @@ main(void) #endif set_errno(0); p = aligned_alloc(alignment, size); - if (p != NULL || get_errno() != ENOMEM) { - malloc_printf( - "Expected error for aligned_alloc(&p, %zu, %zu)\n", - alignment, size); - } + assert_false(p != NULL || get_errno() != ENOMEM, + "Expected error for aligned_alloc(&p, %zu, %zu)", + alignment, size); +} +TEST_END + +TEST_BEGIN(test_alignment_and_size) +{ + size_t alignment, size, total; + unsigned i; + void *ps[NITER]; for (i = 0; i < NITER; i++) ps[i] = NULL; @@ -85,7 +85,6 @@ main(void) alignment <= MAXALIGN; alignment <<= 1) { total = 0; - malloc_printf("Alignment: %zu\n", alignment); for (size = 1; size < 3 * alignment && size < (1U << 31); size += (alignment >> (LG_SIZEOF_PTR-1)) - 1) { @@ -94,10 +93,11 @@ main(void) if (ps[i] == NULL) { char buf[BUFERROR_BUF]; - buferror(buf, sizeof(buf)); + buferror(get_errno(), buf, sizeof(buf)); test_fail( - "Error for size %zu (%#zx): %s\n", - size, size, buf); + "Error for alignment=%zu, " + "size=%zu (%#zx): %s", + alignment, size, size, buf); } total += malloc_usable_size(ps[i]); if (total >= (MAXALIGN << 1)) @@ -111,7 +111,15 @@ main(void) } } } - - malloc_printf("Test end\n"); - return (0); +} +TEST_END + +int +main(void) +{ + + return (test( + test_alignment_errors, + test_oom_errors, + test_alignment_and_size)); } diff --git a/test/integration/aligned_alloc.exp b/test/integration/aligned_alloc.exp deleted file mode 100644 index b5061c72..00000000 --- a/test/integration/aligned_alloc.exp +++ /dev/null @@ -1,25 +0,0 @@ -Test begin -Alignment: 8 -Alignment: 16 -Alignment: 32 -Alignment: 64 -Alignment: 128 -Alignment: 256 -Alignment: 512 -Alignment: 1024 -Alignment: 2048 -Alignment: 4096 -Alignment: 8192 -Alignment: 16384 -Alignment: 32768 -Alignment: 65536 -Alignment: 131072 -Alignment: 262144 -Alignment: 524288 -Alignment: 1048576 -Alignment: 2097152 -Alignment: 4194304 -Alignment: 8388608 -Alignment: 16777216 -Alignment: 33554432 -Test end diff --git a/test/integration/allocated.c b/test/integration/allocated.c index 73ea738d..156451dc 100644 --- a/test/integration/allocated.c +++ b/test/integration/allocated.c @@ -1,5 +1,13 @@ #include "test/jemalloc_test.h" +static const bool config_stats = +#ifdef JEMALLOC_STATS + true +#else + false +#endif + ; + void * je_thread_start(void *arg) { @@ -11,65 +19,57 @@ je_thread_start(void *arg) sz = sizeof(a0); if ((err = mallctl("thread.allocated", &a0, &sz, NULL, 0))) { - if (err == ENOENT) { -#ifdef JEMALLOC_STATS - assert(false); -#endif - goto label_return; - } - test_fail("%s(): Error in mallctl(): %s\n", __func__, + if (err == ENOENT) + goto label_ENOENT; + test_fail("%s(): Error in mallctl(): %s", __func__, strerror(err)); } sz = sizeof(ap0); if ((err = mallctl("thread.allocatedp", &ap0, &sz, NULL, 0))) { - if (err == ENOENT) { -#ifdef JEMALLOC_STATS - assert(false); -#endif - goto label_return; - } - test_fail("%s(): Error in mallctl(): %s\n", __func__, + if (err == ENOENT) + goto label_ENOENT; + test_fail("%s(): Error in mallctl(): %s", __func__, strerror(err)); } - assert(*ap0 == a0); + assert_u64_eq(*ap0, a0, + "\"thread.allocatedp\" should provide a pointer to internal " + "storage"); sz = sizeof(d0); if ((err = mallctl("thread.deallocated", &d0, &sz, NULL, 0))) { - if (err == ENOENT) { -#ifdef JEMALLOC_STATS - assert(false); -#endif - goto label_return; - } - test_fail("%s(): Error in mallctl(): %s\n", __func__, + if (err == ENOENT) + goto label_ENOENT; + test_fail("%s(): Error in mallctl(): %s", __func__, strerror(err)); } sz = sizeof(dp0); if ((err = mallctl("thread.deallocatedp", &dp0, &sz, NULL, 0))) { - if (err == ENOENT) { -#ifdef JEMALLOC_STATS - assert(false); -#endif - goto label_return; - } - test_fail("%s(): Error in mallctl(): %s\n", __func__, + if (err == ENOENT) + goto label_ENOENT; + test_fail("%s(): Error in mallctl(): %s", __func__, strerror(err)); } - assert(*dp0 == d0); + assert_u64_eq(*dp0, d0, + "\"thread.deallocatedp\" should provide a pointer to internal " + "storage"); p = malloc(1); - if (p == NULL) - test_fail("%s(): Error in malloc()\n", __func__); + assert_ptr_not_null(p, "Unexpected malloc() error"); sz = sizeof(a1); mallctl("thread.allocated", &a1, &sz, NULL, 0); sz = sizeof(ap1); mallctl("thread.allocatedp", &ap1, &sz, NULL, 0); - assert(*ap1 == a1); - assert(ap0 == ap1); + assert_u64_eq(*ap1, a1, + "Dereferenced \"thread.allocatedp\" value should equal " + "\"thread.allocated\" value"); + assert_ptr_eq(ap0, ap1, + "Pointer returned by \"thread.allocatedp\" should not change"); usize = malloc_usable_size(p); - assert(a0 + usize <= a1); + assert_u64_le(a0 + usize, a1, + "Allocated memory counter should increase by at least the amount " + "explicitly allocated"); free(p); @@ -77,35 +77,49 @@ je_thread_start(void *arg) mallctl("thread.deallocated", &d1, &sz, NULL, 0); sz = sizeof(dp1); mallctl("thread.deallocatedp", &dp1, &sz, NULL, 0); - assert(*dp1 == d1); - assert(dp0 == dp1); + assert_u64_eq(*dp1, d1, + "Dereferenced \"thread.deallocatedp\" value should equal " + "\"thread.deallocated\" value"); + assert_ptr_eq(dp0, dp1, + "Pointer returned by \"thread.deallocatedp\" should not change"); - assert(d0 + usize <= d1); + assert_u64_le(d0 + usize, d1, + "Deallocated memory counter should increase by at least the amount " + "explicitly deallocated"); -label_return: + return (NULL); +label_ENOENT: + assert_false(config_stats, + "ENOENT should only be returned if stats are disabled"); + test_skip("\"thread.allocated\" mallctl not available"); return (NULL); } +TEST_BEGIN(test_main_thread) +{ + + je_thread_start(NULL); +} +TEST_END + +TEST_BEGIN(test_subthread) +{ + je_thread_t thread; + + je_thread_create(&thread, je_thread_start, NULL); + je_thread_join(thread, NULL); +} +TEST_END + int main(void) { - int ret = 0; - je_thread_t thread; - malloc_printf("Test begin\n"); - - je_thread_start(NULL); - - je_thread_create(&thread, je_thread_start, NULL); - je_thread_join(thread, NULL); - - je_thread_start(NULL); - - je_thread_create(&thread, je_thread_start, NULL); - je_thread_join(thread, NULL); - - je_thread_start(NULL); - - malloc_printf("Test end\n"); - return (ret); + /* Run tests multiple times to check for bad interactions. */ + return (test( + test_main_thread, + test_subthread, + test_main_thread, + test_subthread, + test_main_thread)); } diff --git a/test/integration/allocated.exp b/test/integration/allocated.exp deleted file mode 100644 index 369a88dd..00000000 --- a/test/integration/allocated.exp +++ /dev/null @@ -1,2 +0,0 @@ -Test begin -Test end diff --git a/test/integration/allocm.c b/test/integration/allocm.c index 3b892827..59dfcf1c 100644 --- a/test/integration/allocm.c +++ b/test/integration/allocm.c @@ -5,61 +5,44 @@ #define MAXALIGN ((size_t)0x2000000LU) #define NITER 4 -int -main(void) +TEST_BEGIN(test_basic) { - int r; + size_t nsz, rsz, sz; void *p; - size_t nsz, rsz, sz, alignment, total; - unsigned i; - void *ps[NITER]; - - malloc_printf("Test begin\n"); sz = 42; nsz = 0; - r = nallocm(&nsz, sz, 0); - if (r != ALLOCM_SUCCESS) { - malloc_printf("Unexpected nallocm() error\n"); - abort(); - } + assert_d_eq(nallocm(&nsz, sz, 0), ALLOCM_SUCCESS, + "Unexpected nallocm() error"); rsz = 0; - r = allocm(&p, &rsz, sz, 0); - if (r != ALLOCM_SUCCESS) { - malloc_printf("Unexpected allocm() error\n"); - abort(); - } - if (rsz < sz) - malloc_printf("Real size smaller than expected\n"); - if (nsz != rsz) - malloc_printf("nallocm()/allocm() rsize mismatch\n"); - if (dallocm(p, 0) != ALLOCM_SUCCESS) - malloc_printf("Unexpected dallocm() error\n"); + assert_d_eq(allocm(&p, &rsz, sz, 0), ALLOCM_SUCCESS, + "Unexpected allocm() error"); + assert_zu_ge(rsz, sz, "Real size smaller than expected"); + assert_zu_eq(nsz, rsz, "nallocm()/allocm() rsize mismatch"); + assert_d_eq(dallocm(p, 0), ALLOCM_SUCCESS, + "Unexpected dallocm() error"); - r = allocm(&p, NULL, sz, 0); - if (r != ALLOCM_SUCCESS) { - malloc_printf("Unexpected allocm() error\n"); - abort(); - } - if (dallocm(p, 0) != ALLOCM_SUCCESS) - malloc_printf("Unexpected dallocm() error\n"); + assert_d_eq(allocm(&p, NULL, sz, 0), ALLOCM_SUCCESS, + "Unexpected allocm() error"); + assert_d_eq(dallocm(p, 0), ALLOCM_SUCCESS, + "Unexpected dallocm() error"); nsz = 0; - r = nallocm(&nsz, sz, ALLOCM_ZERO); - if (r != ALLOCM_SUCCESS) { - malloc_printf("Unexpected nallocm() error\n"); - abort(); - } + assert_d_eq(nallocm(&nsz, sz, ALLOCM_ZERO), ALLOCM_SUCCESS, + "Unexpected nallocm() error"); rsz = 0; - r = allocm(&p, &rsz, sz, ALLOCM_ZERO); - if (r != ALLOCM_SUCCESS) { - malloc_printf("Unexpected allocm() error\n"); - abort(); - } - if (nsz != rsz) - malloc_printf("nallocm()/allocm() rsize mismatch\n"); - if (dallocm(p, 0) != ALLOCM_SUCCESS) - malloc_printf("Unexpected dallocm() error\n"); + assert_d_eq(allocm(&p, &rsz, sz, ALLOCM_ZERO), ALLOCM_SUCCESS, + "Unexpected allocm() error"); + assert_zu_eq(nsz, rsz, "nallocm()/allocm() rsize mismatch"); + assert_d_eq(dallocm(p, 0), ALLOCM_SUCCESS, + "Unexpected dallocm() error"); +} +TEST_END + +TEST_BEGIN(test_alignment_errors) +{ + void *p; + size_t nsz, rsz, sz, alignment; #if LG_SIZEOF_PTR == 3 alignment = UINT64_C(0x8000000000000000); @@ -69,21 +52,14 @@ main(void) sz = 0x80000000LU; #endif nsz = 0; - r = nallocm(&nsz, sz, ALLOCM_ALIGN(alignment)); - if (r == ALLOCM_SUCCESS) { - malloc_printf( - "Expected error for nallocm(&nsz, %zu, %#x)\n", - sz, ALLOCM_ALIGN(alignment)); - } + assert_d_ne(nallocm(&nsz, sz, ALLOCM_ALIGN(alignment)), ALLOCM_SUCCESS, + "Expected error for nallocm(&nsz, %zu, %#x)", + sz, ALLOCM_ALIGN(alignment)); rsz = 0; - r = allocm(&p, &rsz, sz, ALLOCM_ALIGN(alignment)); - if (r == ALLOCM_SUCCESS) { - malloc_printf( - "Expected error for allocm(&p, %zu, %#x)\n", - sz, ALLOCM_ALIGN(alignment)); - } - if (nsz != rsz) - malloc_printf("nallocm()/allocm() rsize mismatch\n"); + assert_d_ne(allocm(&p, &rsz, sz, ALLOCM_ALIGN(alignment)), + ALLOCM_SUCCESS, "Expected error for allocm(&p, %zu, %#x)", + sz, ALLOCM_ALIGN(alignment)); + assert_zu_eq(nsz, rsz, "nallocm()/allocm() rsize mismatch"); #if LG_SIZEOF_PTR == 3 alignment = UINT64_C(0x4000000000000000); @@ -93,16 +69,12 @@ main(void) sz = 0x84000001LU; #endif nsz = 0; - r = nallocm(&nsz, sz, ALLOCM_ALIGN(alignment)); - if (r != ALLOCM_SUCCESS) - malloc_printf("Unexpected nallocm() error\n"); + assert_d_eq(nallocm(&nsz, sz, ALLOCM_ALIGN(alignment)), ALLOCM_SUCCESS, + "Unexpected nallocm() error"); rsz = 0; - r = allocm(&p, &rsz, sz, ALLOCM_ALIGN(alignment)); - if (r == ALLOCM_SUCCESS) { - malloc_printf( - "Expected error for allocm(&p, %zu, %#x)\n", - sz, ALLOCM_ALIGN(alignment)); - } + assert_d_ne(allocm(&p, &rsz, sz, ALLOCM_ALIGN(alignment)), + ALLOCM_SUCCESS, "Expected error for allocm(&p, %zu, %#x)", + sz, ALLOCM_ALIGN(alignment)); alignment = 0x10LU; #if LG_SIZEOF_PTR == 3 @@ -111,21 +83,23 @@ main(void) sz = 0xfffffff0LU; #endif nsz = 0; - r = nallocm(&nsz, sz, ALLOCM_ALIGN(alignment)); - if (r == ALLOCM_SUCCESS) { - malloc_printf( - "Expected error for nallocm(&nsz, %zu, %#x)\n", - sz, ALLOCM_ALIGN(alignment)); - } + assert_d_ne(nallocm(&nsz, sz, ALLOCM_ALIGN(alignment)), ALLOCM_SUCCESS, + "Expected error for nallocm(&nsz, %zu, %#x)", + sz, ALLOCM_ALIGN(alignment)); rsz = 0; - r = allocm(&p, &rsz, sz, ALLOCM_ALIGN(alignment)); - if (r == ALLOCM_SUCCESS) { - malloc_printf( - "Expected error for allocm(&p, %zu, %#x)\n", - sz, ALLOCM_ALIGN(alignment)); - } - if (nsz != rsz) - malloc_printf("nallocm()/allocm() rsize mismatch\n"); + assert_d_ne(allocm(&p, &rsz, sz, ALLOCM_ALIGN(alignment)), + ALLOCM_SUCCESS, "Expected error for allocm(&p, %zu, %#x)", + sz, ALLOCM_ALIGN(alignment)); + assert_zu_eq(nsz, rsz, "nallocm()/allocm() rsize mismatch"); +} +TEST_END + +TEST_BEGIN(test_alignment_and_size) +{ + int r; + size_t nsz, rsz, sz, alignment, total; + unsigned i; + void *ps[NITER]; for (i = 0; i < NITER; i++) ps[i] = NULL; @@ -134,44 +108,35 @@ main(void) alignment <= MAXALIGN; alignment <<= 1) { total = 0; - malloc_printf("Alignment: %zu\n", alignment); for (sz = 1; sz < 3 * alignment && sz < (1U << 31); sz += (alignment >> (LG_SIZEOF_PTR-1)) - 1) { for (i = 0; i < NITER; i++) { nsz = 0; - r = nallocm(&nsz, sz, - ALLOCM_ALIGN(alignment) | ALLOCM_ZERO); - if (r != ALLOCM_SUCCESS) { - test_fail( - "nallocm() error for size %zu" - " (%#zx): %d\n", - sz, sz, r); - } + r = nallocm(&nsz, sz, ALLOCM_ALIGN(alignment) | + ALLOCM_ZERO); + assert_d_eq(r, ALLOCM_SUCCESS, + "nallocm() error for alignment=%zu, " + "size=%zu (%#zx): %d", + alignment, sz, sz, r); rsz = 0; r = allocm(&ps[i], &rsz, sz, ALLOCM_ALIGN(alignment) | ALLOCM_ZERO); - if (r != ALLOCM_SUCCESS) { - test_fail( - "allocm() error for size %zu" - " (%#zx): %d\n", - sz, sz, r); - } - if (rsz < sz) { - malloc_printf( - "Real size smaller than" - " expected\n"); - } - if (nsz != rsz) { - malloc_printf( - "nallocm()/allocm() rsize" - " mismatch\n"); - } - if ((uintptr_t)p & (alignment-1)) { - malloc_printf( - "%p inadequately aligned for" - " alignment: %zu\n", p, alignment); - } + assert_d_eq(r, ALLOCM_SUCCESS, + "allocm() error for alignment=%zu, " + "size=%zu (%#zx): %d", + alignment, sz, sz, r); + assert_zu_ge(rsz, sz, + "Real size smaller than expected for " + "alignment=%zu, size=%zu", alignment, sz); + assert_zu_eq(nsz, rsz, + "nallocm()/allocm() rsize mismatch for " + "alignment=%zu, size=%zu", alignment, sz); + assert_ptr_null( + (void *)((uintptr_t)ps[i] & (alignment-1)), + "%p inadequately aligned for" + " alignment=%zu, size=%zu", ps[i], + alignment, sz); sallocm(ps[i], &rsz, 0); total += rsz; if (total >= (MAXALIGN << 1)) @@ -185,7 +150,15 @@ main(void) } } } - - malloc_printf("Test end\n"); - return (0); +} +TEST_END + +int +main(void) +{ + + return (test( + test_basic, + test_alignment_errors, + test_alignment_and_size)); } diff --git a/test/integration/allocm.exp b/test/integration/allocm.exp deleted file mode 100644 index b5061c72..00000000 --- a/test/integration/allocm.exp +++ /dev/null @@ -1,25 +0,0 @@ -Test begin -Alignment: 8 -Alignment: 16 -Alignment: 32 -Alignment: 64 -Alignment: 128 -Alignment: 256 -Alignment: 512 -Alignment: 1024 -Alignment: 2048 -Alignment: 4096 -Alignment: 8192 -Alignment: 16384 -Alignment: 32768 -Alignment: 65536 -Alignment: 131072 -Alignment: 262144 -Alignment: 524288 -Alignment: 1048576 -Alignment: 2097152 -Alignment: 4194304 -Alignment: 8388608 -Alignment: 16777216 -Alignment: 33554432 -Test end diff --git a/test/integration/mremap.c b/test/integration/mremap.c index cdef9de0..a7fb7ef0 100644 --- a/test/integration/mremap.c +++ b/test/integration/mremap.c @@ -1,59 +1,45 @@ #include "test/jemalloc_test.h" -int -main(void) +TEST_BEGIN(test_mremap) { - int ret, err; + int err; size_t sz, lg_chunk, chunksize, i; char *p, *q; - malloc_printf("Test begin\n"); - sz = sizeof(lg_chunk); - if ((err = mallctl("opt.lg_chunk", &lg_chunk, &sz, NULL, 0))) { - assert(err != ENOENT); - malloc_printf("%s(): Error in mallctl(): %s\n", __func__, - strerror(err)); - ret = 1; - goto label_return; - } + err = mallctl("opt.lg_chunk", &lg_chunk, &sz, NULL, 0); + assert_d_eq(err, 0, "Error in mallctl(): %s", strerror(err)); chunksize = ((size_t)1U) << lg_chunk; p = (char *)malloc(chunksize); - if (p == NULL) { - malloc_printf("malloc(%zu) --> %p\n", chunksize, p); - ret = 1; - goto label_return; - } + assert_ptr_not_null(p, "malloc(%zu) --> %p", chunksize, p); memset(p, 'a', chunksize); q = (char *)realloc(p, chunksize * 2); - if (q == NULL) { - malloc_printf("realloc(%p, %zu) --> %p\n", p, chunksize * 2, - q); - ret = 1; - goto label_return; - } + assert_ptr_not_null(q, "realloc(%p, %zu) --> %p", p, chunksize * 2, + q); for (i = 0; i < chunksize; i++) { - assert(q[i] == 'a'); + assert_c_eq(q[i], 'a', + "realloc() should preserve existing bytes across copies"); } p = q; q = (char *)realloc(p, chunksize); - if (q == NULL) { - malloc_printf("realloc(%p, %zu) --> %p\n", p, chunksize, q); - ret = 1; - goto label_return; - } + assert_ptr_not_null(q, "realloc(%p, %zu) --> %p", p, chunksize, q); for (i = 0; i < chunksize; i++) { - assert(q[i] == 'a'); + assert_c_eq(q[i], 'a', + "realloc() should preserve existing bytes across copies"); } free(q); - - ret = 0; -label_return: - malloc_printf("Test end\n"); - return (ret); +} +TEST_END + +int +main(void) +{ + + return (test( + test_mremap)); } diff --git a/test/integration/mremap.exp b/test/integration/mremap.exp deleted file mode 100644 index 369a88dd..00000000 --- a/test/integration/mremap.exp +++ /dev/null @@ -1,2 +0,0 @@ -Test begin -Test end diff --git a/test/integration/posix_memalign.c b/test/integration/posix_memalign.c index dc5cd0e2..48ca0b35 100644 --- a/test/integration/posix_memalign.c +++ b/test/integration/posix_memalign.c @@ -5,35 +5,30 @@ #define MAXALIGN ((size_t)0x2000000LU) #define NITER 4 -int -main(void) +TEST_BEGIN(test_alignment_errors) { - size_t alignment, size, total; - unsigned i; - int err; - void *p, *ps[NITER]; + size_t alignment; + void *p; - malloc_printf("Test begin\n"); - - /* Test error conditions. */ for (alignment = 0; alignment < sizeof(void *); alignment++) { - err = posix_memalign(&p, alignment, 1); - if (err != EINVAL) { - malloc_printf( - "Expected error for invalid alignment %zu\n", - alignment); - } + assert_d_eq(posix_memalign(&p, alignment, 1), EINVAL, + "Expected error for invalid alignment %zu", + alignment); } for (alignment = sizeof(size_t); alignment < MAXALIGN; alignment <<= 1) { - err = posix_memalign(&p, alignment + 1, 1); - if (err == 0) { - malloc_printf( - "Expected error for invalid alignment %zu\n", - alignment + 1); - } + assert_d_ne(posix_memalign(&p, alignment + 1, 1), 0, + "Expected error for invalid alignment %zu", + alignment + 1); } +} +TEST_END + +TEST_BEGIN(test_oom_errors) +{ + size_t alignment, size; + void *p; #if LG_SIZEOF_PTR == 3 alignment = UINT64_C(0x8000000000000000); @@ -42,12 +37,9 @@ main(void) alignment = 0x80000000LU; size = 0x80000000LU; #endif - err = posix_memalign(&p, alignment, size); - if (err == 0) { - malloc_printf( - "Expected error for posix_memalign(&p, %zu, %zu)\n", - alignment, size); - } + assert_d_ne(posix_memalign(&p, alignment, size), 0, + "Expected error for posix_memalign(&p, %zu, %zu)", + alignment, size); #if LG_SIZEOF_PTR == 3 alignment = UINT64_C(0x4000000000000000); @@ -56,12 +48,9 @@ main(void) alignment = 0x40000000LU; size = 0x84000001LU; #endif - err = posix_memalign(&p, alignment, size); - if (err == 0) { - malloc_printf( - "Expected error for posix_memalign(&p, %zu, %zu)\n", - alignment, size); - } + assert_d_ne(posix_memalign(&p, alignment, size), 0, + "Expected error for posix_memalign(&p, %zu, %zu)", + alignment, size); alignment = 0x10LU; #if LG_SIZEOF_PTR == 3 @@ -69,12 +58,18 @@ main(void) #else size = 0xfffffff0LU; #endif - err = posix_memalign(&p, alignment, size); - if (err == 0) { - malloc_printf( - "Expected error for posix_memalign(&p, %zu, %zu)\n", - alignment, size); - } + assert_d_ne(posix_memalign(&p, alignment, size), 0, + "Expected error for posix_memalign(&p, %zu, %zu)", + alignment, size); +} +TEST_END + +TEST_BEGIN(test_alignment_and_size) +{ + size_t alignment, size, total; + unsigned i; + int err; + void *ps[NITER]; for (i = 0; i < NITER; i++) ps[i] = NULL; @@ -83,7 +78,6 @@ main(void) alignment <= MAXALIGN; alignment <<= 1) { total = 0; - malloc_printf("Alignment: %zu\n", alignment); for (size = 1; size < 3 * alignment && size < (1U << 31); size += (alignment >> (LG_SIZEOF_PTR-1)) - 1) { @@ -91,9 +85,13 @@ main(void) err = posix_memalign(&ps[i], alignment, size); if (err) { + char buf[BUFERROR_BUF]; + + buferror(get_errno(), buf, sizeof(buf)); test_fail( - "Error for size %zu (%#zx): %s\n", - size, size, strerror(err)); + "Error for alignment=%zu, " + "size=%zu (%#zx): %s", + alignment, size, size, buf); } total += malloc_usable_size(ps[i]); if (total >= (MAXALIGN << 1)) @@ -107,7 +105,15 @@ main(void) } } } - - malloc_printf("Test end\n"); - return (0); +} +TEST_END + +int +main(void) +{ + + return (test( + test_alignment_errors, + test_oom_errors, + test_alignment_and_size)); } diff --git a/test/integration/posix_memalign.exp b/test/integration/posix_memalign.exp deleted file mode 100644 index b5061c72..00000000 --- a/test/integration/posix_memalign.exp +++ /dev/null @@ -1,25 +0,0 @@ -Test begin -Alignment: 8 -Alignment: 16 -Alignment: 32 -Alignment: 64 -Alignment: 128 -Alignment: 256 -Alignment: 512 -Alignment: 1024 -Alignment: 2048 -Alignment: 4096 -Alignment: 8192 -Alignment: 16384 -Alignment: 32768 -Alignment: 65536 -Alignment: 131072 -Alignment: 262144 -Alignment: 524288 -Alignment: 1048576 -Alignment: 2097152 -Alignment: 4194304 -Alignment: 8388608 -Alignment: 16777216 -Alignment: 33554432 -Test end diff --git a/test/integration/rallocm.c b/test/integration/rallocm.c index 2c10dbae..c13cd699 100644 --- a/test/integration/rallocm.c +++ b/test/integration/rallocm.c @@ -2,127 +2,112 @@ #include "test/jemalloc_test.h" +TEST_BEGIN(test_same_size) +{ + void *p, *q; + size_t sz, tsz; + + assert_d_eq(allocm(&p, &sz, 42, 0), ALLOCM_SUCCESS, + "Unexpected allocm() error"); + + q = p; + assert_d_eq(rallocm(&q, &tsz, sz, 0, ALLOCM_NO_MOVE), ALLOCM_SUCCESS, + "Unexpected rallocm() error"); + assert_ptr_eq(q, p, "Unexpected object move"); + assert_zu_eq(tsz, sz, "Unexpected size change: %zu --> %zu", sz, tsz); + + assert_d_eq(dallocm(p, 0), ALLOCM_SUCCESS, + "Unexpected dallocm() error"); +} +TEST_END + +TEST_BEGIN(test_extra_no_move) +{ + void *p, *q; + size_t sz, tsz; + + assert_d_eq(allocm(&p, &sz, 42, 0), ALLOCM_SUCCESS, + "Unexpected allocm() error"); + + q = p; + assert_d_eq(rallocm(&q, &tsz, sz, sz-42, ALLOCM_NO_MOVE), + ALLOCM_SUCCESS, "Unexpected rallocm() error"); + assert_ptr_eq(q, p, "Unexpected object move"); + assert_zu_eq(tsz, sz, "Unexpected size change: %zu --> %zu", sz, tsz); + + assert_d_eq(dallocm(p, 0), ALLOCM_SUCCESS, + "Unexpected dallocm() error"); +} +TEST_END + +TEST_BEGIN(test_no_move_fail) +{ + void *p, *q; + size_t sz, tsz; + + assert_d_eq(allocm(&p, &sz, 42, 0), ALLOCM_SUCCESS, + "Unexpected allocm() error"); + + q = p; + assert_d_eq(rallocm(&q, &tsz, sz + 5, 0, ALLOCM_NO_MOVE), + ALLOCM_ERR_NOT_MOVED, "Unexpected rallocm() result"); + assert_ptr_eq(q, p, "Unexpected object move"); + assert_zu_eq(tsz, sz, "Unexpected size change: %zu --> %zu", sz, tsz); + + assert_d_eq(dallocm(p, 0), ALLOCM_SUCCESS, + "Unexpected dallocm() error"); +} +TEST_END + +TEST_BEGIN(test_grow_and_shrink) +{ + void *p, *q; + size_t tsz; +#define NCYCLES 3 + unsigned i, j; +#define NSZS 2500 + size_t szs[NSZS]; +#define MAXSZ ZU(12 * 1024 * 1024) + + assert_d_eq(allocm(&p, &szs[0], 1, 0), ALLOCM_SUCCESS, + "Unexpected allocm() error"); + + for (i = 0; i < NCYCLES; i++) { + for (j = 1; j < NSZS && szs[j-1] < MAXSZ; j++) { + q = p; + assert_d_eq(rallocm(&q, &szs[j], szs[j-1]+1, 0, 0), + ALLOCM_SUCCESS, + "Unexpected rallocm() error for size=%zu-->%zu", + szs[j-1], szs[j-1]+1); + assert_zu_ne(szs[j], szs[j-1]+1, + "Expected size to at least: %zu", szs[j-1]+1); + p = q; + } + + for (j--; j > 0; j--) { + q = p; + assert_d_eq(rallocm(&q, &tsz, szs[j-1], 0, 0), + ALLOCM_SUCCESS, + "Unexpected rallocm() error for size=%zu-->%zu", + szs[j], szs[j-1]); + assert_zu_eq(tsz, szs[j-1], + "Expected size=%zu, got size=%zu", szs[j-1], tsz); + p = q; + } + } + + assert_d_eq(dallocm(p, 0), ALLOCM_SUCCESS, + "Unexpected dallocm() error"); +} +TEST_END + int main(void) { - size_t pagesize; - void *p, *q; - size_t sz, tsz; - int r; - malloc_printf("Test begin\n"); - - /* Get page size. */ - { -#ifdef _WIN32 - SYSTEM_INFO si; - GetSystemInfo(&si); - pagesize = (size_t)si.dwPageSize; -#else - long result = sysconf(_SC_PAGESIZE); - assert(result != -1); - pagesize = (size_t)result; -#endif - } - - r = allocm(&p, &sz, 42, 0); - if (r != ALLOCM_SUCCESS) { - malloc_printf("Unexpected allocm() error\n"); - abort(); - } - - q = p; - r = rallocm(&q, &tsz, sz, 0, ALLOCM_NO_MOVE); - if (r != ALLOCM_SUCCESS) - malloc_printf("Unexpected rallocm() error\n"); - if (q != p) - malloc_printf("Unexpected object move\n"); - if (tsz != sz) { - malloc_printf("Unexpected size change: %zu --> %zu\n", - sz, tsz); - } - - q = p; - r = rallocm(&q, &tsz, sz, 5, ALLOCM_NO_MOVE); - if (r != ALLOCM_SUCCESS) - malloc_printf("Unexpected rallocm() error\n"); - if (q != p) - malloc_printf("Unexpected object move\n"); - if (tsz != sz) { - malloc_printf("Unexpected size change: %zu --> %zu\n", - sz, tsz); - } - - q = p; - r = rallocm(&q, &tsz, sz + 5, 0, ALLOCM_NO_MOVE); - if (r != ALLOCM_ERR_NOT_MOVED) - malloc_printf("Unexpected rallocm() result\n"); - if (q != p) - malloc_printf("Unexpected object move\n"); - if (tsz != sz) { - malloc_printf("Unexpected size change: %zu --> %zu\n", - sz, tsz); - } - - q = p; - r = rallocm(&q, &tsz, sz + 5, 0, 0); - if (r != ALLOCM_SUCCESS) - malloc_printf("Unexpected rallocm() error\n"); - if (q == p) - malloc_printf("Expected object move\n"); - if (tsz == sz) { - malloc_printf("Expected size change: %zu --> %zu\n", - sz, tsz); - } - p = q; - sz = tsz; - - r = rallocm(&q, &tsz, pagesize*2, 0, 0); - if (r != ALLOCM_SUCCESS) - malloc_printf("Unexpected rallocm() error\n"); - if (q == p) - malloc_printf("Expected object move\n"); - if (tsz == sz) { - malloc_printf("Expected size change: %zu --> %zu\n", - sz, tsz); - } - p = q; - sz = tsz; - - r = rallocm(&q, &tsz, pagesize*4, 0, 0); - if (r != ALLOCM_SUCCESS) - malloc_printf("Unexpected rallocm() error\n"); - if (tsz == sz) { - malloc_printf("Expected size change: %zu --> %zu\n", - sz, tsz); - } - p = q; - sz = tsz; - - r = rallocm(&q, &tsz, pagesize*2, 0, ALLOCM_NO_MOVE); - if (r != ALLOCM_SUCCESS) - malloc_printf("Unexpected rallocm() error\n"); - if (q != p) - malloc_printf("Unexpected object move\n"); - if (tsz == sz) { - malloc_printf("Expected size change: %zu --> %zu\n", - sz, tsz); - } - sz = tsz; - - r = rallocm(&q, &tsz, pagesize*4, 0, ALLOCM_NO_MOVE); - if (r != ALLOCM_SUCCESS) - malloc_printf("Unexpected rallocm() error\n"); - if (q != p) - malloc_printf("Unexpected object move\n"); - if (tsz == sz) { - malloc_printf("Expected size change: %zu --> %zu\n", - sz, tsz); - } - sz = tsz; - - dallocm(p, 0); - - malloc_printf("Test end\n"); - return (0); + return (test( + test_same_size, + test_extra_no_move, + test_no_move_fail, + test_grow_and_shrink)); } diff --git a/test/integration/rallocm.exp b/test/integration/rallocm.exp deleted file mode 100644 index 369a88dd..00000000 --- a/test/integration/rallocm.exp +++ /dev/null @@ -1,2 +0,0 @@ -Test begin -Test end diff --git a/test/integration/thread_arena.c b/test/integration/thread_arena.c index eb5b0988..cf0aad73 100644 --- a/test/integration/thread_arena.c +++ b/test/integration/thread_arena.c @@ -12,36 +12,33 @@ je_thread_start(void *arg) int err; p = malloc(1); - if (p == NULL) { - malloc_printf("%s(): Error in malloc()\n", __func__); - return (void *)1; - } + assert_ptr_not_null(p, "Error in malloc()"); free(p); size = sizeof(arena_ind); if ((err = mallctl("thread.arena", &arena_ind, &size, &main_arena_ind, sizeof(main_arena_ind)))) { - malloc_printf("%s(): Error in mallctl(): %s\n", __func__, - strerror(err)); - return (void *)1; + char buf[BUFERROR_BUF]; + + buferror(err, buf, sizeof(buf)); + test_fail("Error in mallctl(): %s", buf); } size = sizeof(arena_ind); - if ((err = mallctl("thread.arena", &arena_ind, &size, NULL, - 0))) { - malloc_printf("%s(): Error in mallctl(): %s\n", __func__, - strerror(err)); - return (void *)1; + if ((err = mallctl("thread.arena", &arena_ind, &size, NULL, 0))) { + char buf[BUFERROR_BUF]; + + buferror(err, buf, sizeof(buf)); + test_fail("Error in mallctl(): %s", buf); } - assert(arena_ind == main_arena_ind); + assert_u_eq(arena_ind, main_arena_ind, + "Arena index should be same as for main thread"); return (NULL); } -int -main(void) +TEST_BEGIN(test_thread_arena) { - int ret = 0; void *p; unsigned arena_ind; size_t size; @@ -49,21 +46,15 @@ main(void) je_thread_t threads[NTHREADS]; unsigned i; - malloc_printf("Test begin\n"); - p = malloc(1); - if (p == NULL) { - malloc_printf("%s(): Error in malloc()\n", __func__); - ret = 1; - goto label_return; - } + assert_ptr_not_null(p, "Error in malloc()"); size = sizeof(arena_ind); if ((err = mallctl("thread.arena", &arena_ind, &size, NULL, 0))) { - malloc_printf("%s(): Error in mallctl(): %s\n", __func__, - strerror(err)); - ret = 1; - goto label_return; + char buf[BUFERROR_BUF]; + + buferror(err, buf, sizeof(buf)); + test_fail("Error in mallctl(): %s", buf); } for (i = 0; i < NTHREADS; i++) { @@ -74,11 +65,15 @@ main(void) for (i = 0; i < NTHREADS; i++) { intptr_t join_ret; je_thread_join(threads[i], (void *)&join_ret); - if (join_ret != 0) - ret = 1; + assert_zd_eq(join_ret, 0, "Unexpected thread join error"); } - -label_return: - malloc_printf("Test end\n"); - return (ret); +} +TEST_END + +int +main(void) +{ + + return (test( + test_thread_arena)); } diff --git a/test/integration/thread_arena.exp b/test/integration/thread_arena.exp deleted file mode 100644 index 369a88dd..00000000 --- a/test/integration/thread_arena.exp +++ /dev/null @@ -1,2 +0,0 @@ -Test begin -Test end diff --git a/test/integration/thread_tcache_enabled.c b/test/integration/thread_tcache_enabled.c index f9da0526..52e8a11c 100644 --- a/test/integration/thread_tcache_enabled.c +++ b/test/integration/thread_tcache_enabled.c @@ -1,5 +1,13 @@ #include "test/jemalloc_test.h" +static const bool config_tcache = +#ifdef JEMALLOC_TCACHE + true +#else + false +#endif + ; + void * je_thread_start(void *arg) { @@ -10,81 +18,96 @@ je_thread_start(void *arg) sz = sizeof(bool); if ((err = mallctl("thread.tcache.enabled", &e0, &sz, NULL, 0))) { if (err == ENOENT) { -#ifdef JEMALLOC_TCACHE - assert(false); -#endif + assert_false(config_tcache, + "ENOENT should only be returned if tcache is " + "disabled"); } - goto label_return; + goto label_ENOENT; } if (e0) { e1 = false; - assert(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz) - == 0); - assert(e0); + assert_d_eq(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz), + 0, "Unexpected mallctl() error"); + assert_true(e0, "tcache should be enabled"); } e1 = true; - assert(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz) == 0); - assert(e0 == false); + assert_d_eq(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz), 0, + "Unexpected mallctl() error"); + assert_false(e0, "tcache should be disabled"); e1 = true; - assert(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz) == 0); - assert(e0); + assert_d_eq(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz), 0, + "Unexpected mallctl() error"); + assert_true(e0, "tcache should be enabled"); e1 = false; - assert(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz) == 0); - assert(e0); + assert_d_eq(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz), 0, + "Unexpected mallctl() error"); + assert_true(e0, "tcache should be enabled"); e1 = false; - assert(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz) == 0); - assert(e0 == false); + assert_d_eq(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz), 0, + "Unexpected mallctl() error"); + assert_false(e0, "tcache should be disabled"); free(malloc(1)); e1 = true; - assert(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz) == 0); - assert(e0 == false); + assert_d_eq(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz), 0, + "Unexpected mallctl() error"); + assert_false(e0, "tcache should be disabled"); free(malloc(1)); e1 = true; - assert(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz) == 0); - assert(e0); + assert_d_eq(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz), 0, + "Unexpected mallctl() error"); + assert_true(e0, "tcache should be enabled"); free(malloc(1)); e1 = false; - assert(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz) == 0); - assert(e0); + assert_d_eq(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz), 0, + "Unexpected mallctl() error"); + assert_true(e0, "tcache should be enabled"); free(malloc(1)); e1 = false; - assert(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz) == 0); - assert(e0 == false); + assert_d_eq(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz), 0, + "Unexpected mallctl() error"); + assert_false(e0, "tcache should be disabled"); free(malloc(1)); -label_return: + return (NULL); +label_ENOENT: + test_skip("\"thread.tcache.enabled\" mallctl not available"); return (NULL); } +TEST_BEGIN(test_main_thread) +{ + + je_thread_start(NULL); +} +TEST_END + +TEST_BEGIN(test_subthread) +{ + je_thread_t thread; + + je_thread_create(&thread, je_thread_start, NULL); + je_thread_join(thread, NULL); +} +TEST_END + int main(void) { - int ret = 0; - je_thread_t thread; - malloc_printf("Test begin\n"); - - je_thread_start(NULL); - - je_thread_create(&thread, je_thread_start, NULL); - je_thread_join(thread, NULL); - - je_thread_start(NULL); - - je_thread_create(&thread, je_thread_start, NULL); - je_thread_join(thread, NULL); - - je_thread_start(NULL); - - malloc_printf("Test end\n"); - return (ret); + /* Run tests multiple times to check for bad interactions. */ + return (test( + test_main_thread, + test_subthread, + test_main_thread, + test_subthread, + test_main_thread)); } diff --git a/test/integration/thread_tcache_enabled.exp b/test/integration/thread_tcache_enabled.exp deleted file mode 100644 index 369a88dd..00000000 --- a/test/integration/thread_tcache_enabled.exp +++ /dev/null @@ -1,2 +0,0 @@ -Test begin -Test end diff --git a/test/src/test.c b/test/src/test.c index 1bc34b4a..eb1f5ef9 100644 --- a/test/src/test.c +++ b/test/src/test.c @@ -1,19 +1,9 @@ #include "test/jemalloc_test.h" -#define JEMALLOC_TEST_EXIT_FAIL 1 -#define JEMALLOC_TEST_EXIT_SKIP 2 - -JEMALLOC_ATTR(format(printf, 1, 2)) -void -test_fail(const char *format, ...) -{ - va_list ap; - - va_start(ap, format); - malloc_vcprintf(NULL, NULL, format, ap); - va_end(ap); - exit(JEMALLOC_TEST_EXIT_FAIL); -} +static unsigned test_count = 0; +static test_status_t test_counts[test_status_count] = {0, 0, 0}; +static test_status_t test_status = test_status_pass; +static const char * test_name = ""; JEMALLOC_ATTR(format(printf, 1, 2)) void @@ -24,5 +14,83 @@ test_skip(const char *format, ...) va_start(ap, format); malloc_vcprintf(NULL, NULL, format, ap); va_end(ap); - exit(JEMALLOC_TEST_EXIT_SKIP); + test_status = test_status_skip; +} + +JEMALLOC_ATTR(format(printf, 1, 2)) +void +test_fail(const char *format, ...) +{ + va_list ap; + + va_start(ap, format); + malloc_vcprintf(NULL, NULL, format, ap); + va_end(ap); + test_status = test_status_fail; +} + +static const char * +test_status_string(test_status_t test_status) +{ + + switch (test_status) { + case test_status_pass: return "pass"; + case test_status_skip: return "skip"; + case test_status_fail: return "fail"; + default: not_reached(); + } +} + +void +p_test_init(const char *name) +{ + + test_count++; + test_status = test_status_pass; + test_name = name; +} + +void +p_test_fini(void) +{ + + test_counts[test_status]++; + malloc_printf("%s: %s\n", test_name, test_status_string(test_status)); +} + +test_status_t +p_test(test_t* t, ...) +{ + test_status_t ret = test_status_pass; + va_list ap; + + va_start(ap, t); + for (; t != NULL; t = va_arg(ap, test_t*)) { + t(); + if (test_status > ret) + ret = test_status; + } + va_end(ap); + + malloc_printf("tests: %u, pass: %u, skip: %u, fail: %u\n", + test_count, + test_counts[test_status_pass], + test_counts[test_status_skip], + test_counts[test_status_fail]); + + return (ret); +} + +void +p_test_fail(const char *format, ...) +{ + va_list ap; + + va_start(ap, format); + malloc_vcprintf(NULL, NULL, format, ap); + format = va_arg(ap, const char *); + malloc_vcprintf(NULL, NULL, format, ap); + va_end(ap); + malloc_printf("\n"); + test_status = test_status_fail; } diff --git a/test/test.sh.in b/test/test.sh.in index 726cd636..93c1978f 100644 --- a/test/test.sh.in +++ b/test/test.sh.in @@ -11,27 +11,42 @@ case @abi@ in ;; esac -total=0 -failures=0 -echo "=========================================" +# Corresponds to test_status_t. +pass_code=0 +skip_code=1 +fail_code=2 + +echo "================================================================================" +pass_count=0 +skip_count=0 +fail_count=0 for t in $@; do - total=`expr $total + 1` - /bin/echo -n "${t} ... " + echo "${t}:" ${t}@exe@ @abs_srcroot@ @abs_objroot@ > @objroot@${t}.out 2>&1 - result=$? - if [ -e "@srcroot@${t}.exp" ] ; then - diff -w -u @srcroot@${t}.exp @objroot@${t}.out >/dev/null 2>&1 - fail=$? - if [ "${fail}" -eq "1" ] ; then - failures=`expr ${failures} + 1` - echo "*** FAIL ***" - else - echo "pass" - fi - else - echo "*** FAIL *** (.exp file is missing)" - failures=`expr ${failures} + 1` - fi + result_code=$? + /bin/echo -n " " + tail -n 1 @objroot@${t}.out + case ${result_code} in + ${pass_code}) + pass_count=$((pass_count+1)) + ;; + ${skip_code}) + skip_count=$((skip_count+1)) + ;; + ${fail_code}) + fail_count=$((fail_count+1)) + echo " *** ${t} failure; see @objroot@${t}.out for full output ***" 1>&2 + ;; + *) + echo "Test harness error" 1>&2 + exit 1 + esac done -echo "=========================================" -echo "Failures: ${failures}/${total}" +echo "================================================================================" +echo "Test suite summary: pass: ${pass_count}, skip: ${skip_count}, fail: ${fail_count}" + +if [ ${fail_count} -eq 0 ] ; then + exit 0 +else + exit 1 +fi diff --git a/test/unit/bitmap.c b/test/unit/bitmap.c index 37c3043c..8086b888 100644 --- a/test/unit/bitmap.c +++ b/test/unit/bitmap.c @@ -6,21 +6,21 @@ # define MAXBITS (1U << LG_BITMAP_MAXBITS) #endif -static void -test_bitmap_size(void) +TEST_BEGIN(test_bitmap_size) { size_t i, prev_size; prev_size = 0; for (i = 1; i <= MAXBITS; i++) { size_t size = bitmap_size(i); - assert(size >= prev_size); + assert_true(size >= prev_size, + "Bitmap size is smaller than expected"); prev_size = size; } } +TEST_END -static void -test_bitmap_init(void) +TEST_BEGIN(test_bitmap_init) { size_t i; @@ -33,15 +33,17 @@ test_bitmap_init(void) bitmap_info_ngroups(&binfo)); bitmap_init(bitmap, &binfo); - for (j = 0; j < i; j++) - assert(bitmap_get(bitmap, &binfo, j) == false); + for (j = 0; j < i; j++) { + assert_false(bitmap_get(bitmap, &binfo, j), + "Bit should be unset"); + } free(bitmap); } } } +TEST_END -static void -test_bitmap_set(void) +TEST_BEGIN(test_bitmap_set) { size_t i; @@ -56,14 +58,15 @@ test_bitmap_set(void) for (j = 0; j < i; j++) bitmap_set(bitmap, &binfo, j); - assert(bitmap_full(bitmap, &binfo)); + assert_true(bitmap_full(bitmap, &binfo), + "All bits should be set"); free(bitmap); } } } +TEST_END -static void -test_bitmap_unset(void) +TEST_BEGIN(test_bitmap_unset) { size_t i; @@ -78,19 +81,21 @@ test_bitmap_unset(void) for (j = 0; j < i; j++) bitmap_set(bitmap, &binfo, j); - assert(bitmap_full(bitmap, &binfo)); + assert_true(bitmap_full(bitmap, &binfo), + "All bits should be set"); for (j = 0; j < i; j++) bitmap_unset(bitmap, &binfo, j); for (j = 0; j < i; j++) bitmap_set(bitmap, &binfo, j); - assert(bitmap_full(bitmap, &binfo)); + assert_true(bitmap_full(bitmap, &binfo), + "All bits should be set"); free(bitmap); } } } +TEST_END -static void -test_bitmap_sfu(void) +TEST_BEGIN(test_bitmap_sfu) { size_t i; @@ -104,9 +109,13 @@ test_bitmap_sfu(void) bitmap_init(bitmap, &binfo); /* Iteratively set bits starting at the beginning. */ - for (j = 0; j < i; j++) - assert(bitmap_sfu(bitmap, &binfo) == j); - assert(bitmap_full(bitmap, &binfo)); + for (j = 0; j < i; j++) { + assert_zd_eq(bitmap_sfu(bitmap, &binfo), j, + "First unset bit should be just after " + "previous first unset bit"); + } + assert_true(bitmap_full(bitmap, &binfo), + "All bits should be set"); /* * Iteratively unset bits starting at the end, and @@ -114,10 +123,13 @@ test_bitmap_sfu(void) */ for (j = i - 1; j >= 0; j--) { bitmap_unset(bitmap, &binfo, j); - assert(bitmap_sfu(bitmap, &binfo) == j); + assert_zd_eq(bitmap_sfu(bitmap, &binfo), j, + "First unset bit should the bit previously " + "unset"); bitmap_unset(bitmap, &binfo, j); } - assert(bitmap_get(bitmap, &binfo, 0) == false); + assert_false(bitmap_get(bitmap, &binfo, 0), + "Bit should be unset"); /* * Iteratively set bits starting at the beginning, and @@ -125,27 +137,29 @@ test_bitmap_sfu(void) */ for (j = 1; j < i; j++) { bitmap_set(bitmap, &binfo, j - 1); - assert(bitmap_sfu(bitmap, &binfo) == j); + assert_zd_eq(bitmap_sfu(bitmap, &binfo), j, + "First unset bit should be just after the " + "bit previously set"); bitmap_unset(bitmap, &binfo, j); } - assert(bitmap_sfu(bitmap, &binfo) == i - 1); - assert(bitmap_full(bitmap, &binfo)); + assert_zd_eq(bitmap_sfu(bitmap, &binfo), i - 1, + "First unset bit should be the last bit"); + assert_true(bitmap_full(bitmap, &binfo), + "All bits should be set"); free(bitmap); } } } +TEST_END int main(void) { - malloc_printf("Test begin\n"); - test_bitmap_size(); - test_bitmap_init(); - test_bitmap_set(); - test_bitmap_unset(); - test_bitmap_sfu(); - - malloc_printf("Test end\n"); - return (0); + return (test( + test_bitmap_size, + test_bitmap_init, + test_bitmap_set, + test_bitmap_unset, + test_bitmap_sfu)); } diff --git a/test/unit/bitmap.exp b/test/unit/bitmap.exp deleted file mode 100644 index 369a88dd..00000000 --- a/test/unit/bitmap.exp +++ /dev/null @@ -1,2 +0,0 @@ -Test begin -Test end diff --git a/test/unit/tsd.c b/test/unit/tsd.c index dacddfff..1160a79b 100644 --- a/test/unit/tsd.c +++ b/test/unit/tsd.c @@ -4,12 +4,16 @@ typedef unsigned int data_t; +static bool data_cleanup_executed; + void data_cleanup(void *arg) { data_t *data = (data_t *)arg; - malloc_printf("Cleanup for data %x.\n", *data); + assert_x_eq(*data, THREAD_DATA, + "Argument passed into cleanup function should match tsd value"); + data_cleanup_executed = true; } malloc_tsd_protos(, data, data_t) @@ -21,34 +25,47 @@ malloc_tsd_funcs(, data, data_t, DATA_INIT, data_cleanup) void * je_thread_start(void *arg) { - data_t d = (data_t)(uintptr_t) arg; - malloc_printf("Initial tsd_get returns %x. Expected %x.\n", - *data_tsd_get(), DATA_INIT); + data_t d = (data_t)(uintptr_t)arg; + assert_x_eq(*data_tsd_get(), DATA_INIT, + "Initial tsd get should return initialization value"); data_tsd_set(&d); - malloc_printf("After tsd_set: %x. Expected %x.\n", - *data_tsd_get(), d); + assert_x_eq(*data_tsd_get(), d, + "After tsd set, tsd get should return value that was set"); d = 0; - malloc_printf("After resetting local data: %x. Expected %x.\n", - *data_tsd_get(), (data_t)(uintptr_t) arg); + assert_x_eq(*data_tsd_get(), (data_t)(uintptr_t)arg, + "Resetting local data should have no effect on tsd"); return NULL; } -int -main(void) +TEST_BEGIN(test_tsd_main_thread) +{ + + je_thread_start((void *) 0xa5f3e329); +} +TEST_END + +TEST_BEGIN(test_tsd_sub_thread) { je_thread_t thread; - malloc_printf("Test begin\n"); - - data_tsd_boot(); - je_thread_start((void *) 0xa5f3e329); - + data_cleanup_executed = false; je_thread_create(&thread, je_thread_start, (void *) THREAD_DATA); je_thread_join(thread, NULL); - - malloc_printf("Test end\n"); - return (0); + assert_true(data_cleanup_executed, + "Cleanup function should have executed"); +} +TEST_END + +int +main(void) +{ + + data_tsd_boot(); + + return (test( + test_tsd_main_thread, + test_tsd_sub_thread)); } diff --git a/test/unit/tsd.exp b/test/unit/tsd.exp deleted file mode 100644 index b4abedcf..00000000 --- a/test/unit/tsd.exp +++ /dev/null @@ -1,9 +0,0 @@ -Test begin -Initial tsd_get returns 12345678. Expected 12345678. -After tsd_set: a5f3e329. Expected a5f3e329. -After resetting local data: a5f3e329. Expected a5f3e329. -Initial tsd_get returns 12345678. Expected 12345678. -After tsd_set: 72b65c10. Expected 72b65c10. -After resetting local data: 72b65c10. Expected 72b65c10. -Cleanup for data 72b65c10. -Test end From a4f124f59fa5f702231432a7e5fa45140ba81e2a Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sun, 8 Dec 2013 22:28:27 -0800 Subject: [PATCH 0337/3378] Normalize #define whitespace. Consistently use a tab rather than a space following #define. --- configure.ac | 2 +- include/jemalloc/internal/ckh.h | 2 +- .../jemalloc/internal/jemalloc_internal.h.in | 10 +++--- include/jemalloc/internal/prng.h | 4 +-- include/jemalloc/internal/ql.h | 36 +++++++++---------- include/jemalloc/internal/qr.h | 22 ++++++------ include/jemalloc/internal/tsd.h | 2 +- include/jemalloc/internal/util.h | 2 +- src/bitmap.c | 2 +- src/mutex.c | 2 +- src/stats.c | 8 ++--- src/util.c | 2 +- test/integration/aligned_alloc.c | 6 ++-- test/integration/allocm.c | 6 ++-- test/integration/posix_memalign.c | 6 ++-- test/unit/tsd.c | 4 +-- 16 files changed, 58 insertions(+), 58 deletions(-) diff --git a/configure.ac b/configure.ac index 45b510c5..0b547165 100644 --- a/configure.ac +++ b/configure.ac @@ -833,7 +833,7 @@ fi ) if test "x$enable_mremap" = "x1" ; then JE_COMPILABLE([mremap(...MREMAP_FIXED...)], [ -#define _GNU_SOURCE +#define _GNU_SOURCE #include ], [ void *p = mremap((void *)0, 0, 0, MREMAP_MAYMOVE|MREMAP_FIXED, (void *)0); diff --git a/include/jemalloc/internal/ckh.h b/include/jemalloc/internal/ckh.h index 50c39ed9..58712a6a 100644 --- a/include/jemalloc/internal/ckh.h +++ b/include/jemalloc/internal/ckh.h @@ -17,7 +17,7 @@ typedef bool ckh_keycomp_t (const void *, const void *); * There are 2^LG_CKH_BUCKET_CELLS cells in each hash table bucket. Try to fit * one bucket per L1 cache line. */ -#define LG_CKH_BUCKET_CELLS (LG_CACHELINE - LG_SIZEOF_PTR - 1) +#define LG_CKH_BUCKET_CELLS (LG_CACHELINE - LG_SIZEOF_PTR - 1) #endif /* JEMALLOC_H_TYPES */ /******************************************************************************/ diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index edb40a29..3dd9761d 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -1,5 +1,5 @@ #ifndef JEMALLOC_INTERNAL_H -#define JEMALLOC_INTERNAL_H +#define JEMALLOC_INTERNAL_H #include #ifdef _WIN32 # include @@ -224,7 +224,7 @@ static const bool config_ivsalloc = * JEMALLOC_H_INLINES : Inline functions. */ /******************************************************************************/ -#define JEMALLOC_H_TYPES +#define JEMALLOC_H_TYPES #include "jemalloc/internal/jemalloc_internal_macros.h" @@ -460,7 +460,7 @@ static const bool config_ivsalloc = #undef JEMALLOC_H_TYPES /******************************************************************************/ -#define JEMALLOC_H_STRUCTS +#define JEMALLOC_H_STRUCTS #include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" @@ -496,7 +496,7 @@ typedef struct { #undef JEMALLOC_H_STRUCTS /******************************************************************************/ -#define JEMALLOC_H_EXTERNS +#define JEMALLOC_H_EXTERNS extern bool opt_abort; extern bool opt_junk; @@ -556,7 +556,7 @@ void jemalloc_postfork_child(void); #undef JEMALLOC_H_EXTERNS /******************************************************************************/ -#define JEMALLOC_H_INLINES +#define JEMALLOC_H_INLINES #include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" diff --git a/include/jemalloc/internal/prng.h b/include/jemalloc/internal/prng.h index 83a5462b..7b2b0651 100644 --- a/include/jemalloc/internal/prng.h +++ b/include/jemalloc/internal/prng.h @@ -25,7 +25,7 @@ * uint32_t state : Seed value. * const uint32_t a, c : See above discussion. */ -#define prng32(r, lg_range, state, a, c) do { \ +#define prng32(r, lg_range, state, a, c) do { \ assert(lg_range > 0); \ assert(lg_range <= 32); \ \ @@ -35,7 +35,7 @@ } while (false) /* Same as prng32(), but 64 bits of pseudo-randomness, using uint64_t. */ -#define prng64(r, lg_range, state, a, c) do { \ +#define prng64(r, lg_range, state, a, c) do { \ assert(lg_range > 0); \ assert(lg_range <= 64); \ \ diff --git a/include/jemalloc/internal/ql.h b/include/jemalloc/internal/ql.h index a9ed2393..f70c5f6f 100644 --- a/include/jemalloc/internal/ql.h +++ b/include/jemalloc/internal/ql.h @@ -1,61 +1,61 @@ /* * List definitions. */ -#define ql_head(a_type) \ +#define ql_head(a_type) \ struct { \ a_type *qlh_first; \ } -#define ql_head_initializer(a_head) {NULL} +#define ql_head_initializer(a_head) {NULL} -#define ql_elm(a_type) qr(a_type) +#define ql_elm(a_type) qr(a_type) /* List functions. */ -#define ql_new(a_head) do { \ +#define ql_new(a_head) do { \ (a_head)->qlh_first = NULL; \ } while (0) -#define ql_elm_new(a_elm, a_field) qr_new((a_elm), a_field) +#define ql_elm_new(a_elm, a_field) qr_new((a_elm), a_field) -#define ql_first(a_head) ((a_head)->qlh_first) +#define ql_first(a_head) ((a_head)->qlh_first) -#define ql_last(a_head, a_field) \ +#define ql_last(a_head, a_field) \ ((ql_first(a_head) != NULL) \ ? qr_prev(ql_first(a_head), a_field) : NULL) -#define ql_next(a_head, a_elm, a_field) \ +#define ql_next(a_head, a_elm, a_field) \ ((ql_last(a_head, a_field) != (a_elm)) \ ? qr_next((a_elm), a_field) : NULL) -#define ql_prev(a_head, a_elm, a_field) \ +#define ql_prev(a_head, a_elm, a_field) \ ((ql_first(a_head) != (a_elm)) ? qr_prev((a_elm), a_field) \ : NULL) -#define ql_before_insert(a_head, a_qlelm, a_elm, a_field) do { \ +#define ql_before_insert(a_head, a_qlelm, a_elm, a_field) do { \ qr_before_insert((a_qlelm), (a_elm), a_field); \ if (ql_first(a_head) == (a_qlelm)) { \ ql_first(a_head) = (a_elm); \ } \ } while (0) -#define ql_after_insert(a_qlelm, a_elm, a_field) \ +#define ql_after_insert(a_qlelm, a_elm, a_field) \ qr_after_insert((a_qlelm), (a_elm), a_field) -#define ql_head_insert(a_head, a_elm, a_field) do { \ +#define ql_head_insert(a_head, a_elm, a_field) do { \ if (ql_first(a_head) != NULL) { \ qr_before_insert(ql_first(a_head), (a_elm), a_field); \ } \ ql_first(a_head) = (a_elm); \ } while (0) -#define ql_tail_insert(a_head, a_elm, a_field) do { \ +#define ql_tail_insert(a_head, a_elm, a_field) do { \ if (ql_first(a_head) != NULL) { \ qr_before_insert(ql_first(a_head), (a_elm), a_field); \ } \ ql_first(a_head) = qr_next((a_elm), a_field); \ } while (0) -#define ql_remove(a_head, a_elm, a_field) do { \ +#define ql_remove(a_head, a_elm, a_field) do { \ if (ql_first(a_head) == (a_elm)) { \ ql_first(a_head) = qr_next(ql_first(a_head), a_field); \ } \ @@ -66,18 +66,18 @@ struct { \ } \ } while (0) -#define ql_head_remove(a_head, a_type, a_field) do { \ +#define ql_head_remove(a_head, a_type, a_field) do { \ a_type *t = ql_first(a_head); \ ql_remove((a_head), t, a_field); \ } while (0) -#define ql_tail_remove(a_head, a_type, a_field) do { \ +#define ql_tail_remove(a_head, a_type, a_field) do { \ a_type *t = ql_last(a_head, a_field); \ ql_remove((a_head), t, a_field); \ } while (0) -#define ql_foreach(a_var, a_head, a_field) \ +#define ql_foreach(a_var, a_head, a_field) \ qr_foreach((a_var), ql_first(a_head), a_field) -#define ql_reverse_foreach(a_var, a_head, a_field) \ +#define ql_reverse_foreach(a_var, a_head, a_field) \ qr_reverse_foreach((a_var), ql_first(a_head), a_field) diff --git a/include/jemalloc/internal/qr.h b/include/jemalloc/internal/qr.h index fe22352f..602944b9 100644 --- a/include/jemalloc/internal/qr.h +++ b/include/jemalloc/internal/qr.h @@ -1,28 +1,28 @@ /* Ring definitions. */ -#define qr(a_type) \ +#define qr(a_type) \ struct { \ a_type *qre_next; \ a_type *qre_prev; \ } /* Ring functions. */ -#define qr_new(a_qr, a_field) do { \ +#define qr_new(a_qr, a_field) do { \ (a_qr)->a_field.qre_next = (a_qr); \ (a_qr)->a_field.qre_prev = (a_qr); \ } while (0) -#define qr_next(a_qr, a_field) ((a_qr)->a_field.qre_next) +#define qr_next(a_qr, a_field) ((a_qr)->a_field.qre_next) -#define qr_prev(a_qr, a_field) ((a_qr)->a_field.qre_prev) +#define qr_prev(a_qr, a_field) ((a_qr)->a_field.qre_prev) -#define qr_before_insert(a_qrelm, a_qr, a_field) do { \ +#define qr_before_insert(a_qrelm, a_qr, a_field) do { \ (a_qr)->a_field.qre_prev = (a_qrelm)->a_field.qre_prev; \ (a_qr)->a_field.qre_next = (a_qrelm); \ (a_qr)->a_field.qre_prev->a_field.qre_next = (a_qr); \ (a_qrelm)->a_field.qre_prev = (a_qr); \ } while (0) -#define qr_after_insert(a_qrelm, a_qr, a_field) \ +#define qr_after_insert(a_qrelm, a_qr, a_field) \ do \ { \ (a_qr)->a_field.qre_next = (a_qrelm)->a_field.qre_next; \ @@ -31,7 +31,7 @@ struct { \ (a_qrelm)->a_field.qre_next = (a_qr); \ } while (0) -#define qr_meld(a_qr_a, a_qr_b, a_field) do { \ +#define qr_meld(a_qr_a, a_qr_b, a_field) do { \ void *t; \ (a_qr_a)->a_field.qre_prev->a_field.qre_next = (a_qr_b); \ (a_qr_b)->a_field.qre_prev->a_field.qre_next = (a_qr_a); \ @@ -42,10 +42,10 @@ struct { \ /* qr_meld() and qr_split() are functionally equivalent, so there's no need to * have two copies of the code. */ -#define qr_split(a_qr_a, a_qr_b, a_field) \ +#define qr_split(a_qr_a, a_qr_b, a_field) \ qr_meld((a_qr_a), (a_qr_b), a_field) -#define qr_remove(a_qr, a_field) do { \ +#define qr_remove(a_qr, a_field) do { \ (a_qr)->a_field.qre_prev->a_field.qre_next \ = (a_qr)->a_field.qre_next; \ (a_qr)->a_field.qre_next->a_field.qre_prev \ @@ -54,13 +54,13 @@ struct { \ (a_qr)->a_field.qre_prev = (a_qr); \ } while (0) -#define qr_foreach(var, a_qr, a_field) \ +#define qr_foreach(var, a_qr, a_field) \ for ((var) = (a_qr); \ (var) != NULL; \ (var) = (((var)->a_field.qre_next != (a_qr)) \ ? (var)->a_field.qre_next : NULL)) -#define qr_reverse_foreach(var, a_qr, a_field) \ +#define qr_reverse_foreach(var, a_qr, a_field) \ for ((var) = ((a_qr) != NULL) ? qr_prev(a_qr, a_field) : NULL; \ (var) != NULL; \ (var) = (((var) != (a_qr)) \ diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h index eec14445..5f7ad1c5 100644 --- a/include/jemalloc/internal/tsd.h +++ b/include/jemalloc/internal/tsd.h @@ -81,7 +81,7 @@ extern __thread a_type a_name##_tls; \ extern pthread_key_t a_name##_tsd; \ extern bool a_name##_booted; #elif (defined(_WIN32)) -#define malloc_tsd_externs(a_name, a_type) \ +#define malloc_tsd_externs(a_name, a_type) \ extern DWORD a_name##_tsd; \ extern bool a_name##_booted; #else diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h index a8904d0c..302444d5 100644 --- a/include/jemalloc/internal/util.h +++ b/include/jemalloc/internal/util.h @@ -14,7 +14,7 @@ * Wrap a cpp argument that contains commas such that it isn't broken up into * multiple arguments. */ -#define JEMALLOC_ARG_CONCAT(...) __VA_ARGS__ +#define JEMALLOC_ARG_CONCAT(...) __VA_ARGS__ /* * Silence compiler warnings due to uninitialized values. This is used diff --git a/src/bitmap.c b/src/bitmap.c index b47e2629..e2bd907d 100644 --- a/src/bitmap.c +++ b/src/bitmap.c @@ -1,4 +1,4 @@ -#define JEMALLOC_BITMAP_C_ +#define JEMALLOC_BITMAP_C_ #include "jemalloc/internal/jemalloc_internal.h" /******************************************************************************/ diff --git a/src/mutex.c b/src/mutex.c index 55e18c23..788eca38 100644 --- a/src/mutex.c +++ b/src/mutex.c @@ -6,7 +6,7 @@ #endif #ifndef _CRT_SPINCOUNT -#define _CRT_SPINCOUNT 4000 +#define _CRT_SPINCOUNT 4000 #endif /******************************************************************************/ diff --git a/src/stats.c b/src/stats.c index 43f87af6..bef2ab33 100644 --- a/src/stats.c +++ b/src/stats.c @@ -345,25 +345,25 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, malloc_cprintf(write_cb, cbopaque, "Assertions %s\n", bv ? "enabled" : "disabled"); -#define OPT_WRITE_BOOL(n) \ +#define OPT_WRITE_BOOL(n) \ if ((err = je_mallctl("opt."#n, &bv, &bsz, NULL, 0)) \ == 0) { \ malloc_cprintf(write_cb, cbopaque, \ " opt."#n": %s\n", bv ? "true" : "false"); \ } -#define OPT_WRITE_SIZE_T(n) \ +#define OPT_WRITE_SIZE_T(n) \ if ((err = je_mallctl("opt."#n, &sv, &ssz, NULL, 0)) \ == 0) { \ malloc_cprintf(write_cb, cbopaque, \ " opt."#n": %zu\n", sv); \ } -#define OPT_WRITE_SSIZE_T(n) \ +#define OPT_WRITE_SSIZE_T(n) \ if ((err = je_mallctl("opt."#n, &ssv, &sssz, NULL, 0)) \ == 0) { \ malloc_cprintf(write_cb, cbopaque, \ " opt."#n": %zd\n", ssv); \ } -#define OPT_WRITE_CHAR_P(n) \ +#define OPT_WRITE_CHAR_P(n) \ if ((err = je_mallctl("opt."#n, &cpv, &cpsz, NULL, 0)) \ == 0) { \ malloc_cprintf(write_cb, cbopaque, \ diff --git a/src/util.c b/src/util.c index d2ca4f21..6cedf8c2 100644 --- a/src/util.c +++ b/src/util.c @@ -331,7 +331,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) APPEND_C(' '); \ } \ } while (0) -#define GET_ARG_NUMERIC(val, len) do { \ +#define GET_ARG_NUMERIC(val, len) do { \ switch (len) { \ case '?': \ val = va_arg(ap, int); \ diff --git a/test/integration/aligned_alloc.c b/test/integration/aligned_alloc.c index 73f73f8e..17c2151c 100644 --- a/test/integration/aligned_alloc.c +++ b/test/integration/aligned_alloc.c @@ -1,9 +1,9 @@ #include "test/jemalloc_test.h" -#define CHUNK 0x400000 +#define CHUNK 0x400000 /* #define MAXALIGN ((size_t)UINT64_C(0x80000000000)) */ -#define MAXALIGN ((size_t)0x2000000LU) -#define NITER 4 +#define MAXALIGN ((size_t)0x2000000LU) +#define NITER 4 TEST_BEGIN(test_alignment_errors) { diff --git a/test/integration/allocm.c b/test/integration/allocm.c index 59dfcf1c..3886280a 100644 --- a/test/integration/allocm.c +++ b/test/integration/allocm.c @@ -1,9 +1,9 @@ #include "test/jemalloc_test.h" -#define CHUNK 0x400000 +#define CHUNK 0x400000 /* #define MAXALIGN ((size_t)UINT64_C(0x80000000000)) */ -#define MAXALIGN ((size_t)0x2000000LU) -#define NITER 4 +#define MAXALIGN ((size_t)0x2000000LU) +#define NITER 4 TEST_BEGIN(test_basic) { diff --git a/test/integration/posix_memalign.c b/test/integration/posix_memalign.c index 48ca0b35..c88a4dcb 100644 --- a/test/integration/posix_memalign.c +++ b/test/integration/posix_memalign.c @@ -1,9 +1,9 @@ #include "test/jemalloc_test.h" -#define CHUNK 0x400000 +#define CHUNK 0x400000 /* #define MAXALIGN ((size_t)UINT64_C(0x80000000000)) */ -#define MAXALIGN ((size_t)0x2000000LU) -#define NITER 4 +#define MAXALIGN ((size_t)0x2000000LU) +#define NITER 4 TEST_BEGIN(test_alignment_errors) { diff --git a/test/unit/tsd.c b/test/unit/tsd.c index 1160a79b..71feb847 100644 --- a/test/unit/tsd.c +++ b/test/unit/tsd.c @@ -1,6 +1,6 @@ #include "test/jemalloc_test.h" -#define THREAD_DATA 0x72b65c10 +#define THREAD_DATA 0x72b65c10 typedef unsigned int data_t; @@ -18,7 +18,7 @@ data_cleanup(void *arg) malloc_tsd_protos(, data, data_t) malloc_tsd_externs(data, data_t) -#define DATA_INIT 0x12345678 +#define DATA_INIT 0x12345678 malloc_tsd_data(, data, data_t, DATA_INIT) malloc_tsd_funcs(, data, data_t, DATA_INIT, data_cleanup) From 80061b6df0a8bef0cedbd947d74932ff1c2511e8 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 9 Dec 2013 13:21:08 -0800 Subject: [PATCH 0338/3378] Integrate SFMT 1.3.3 into test infrastructure. Integrate the SIMD-oriented Fast Mersenne Twister (SFMT) 1.3.3 into the test infrastructure. The sfmt_t state encapsulation modification comes from Crux (http://www.canonware.com/Crux/) and enables multiple concurrent PRNGs. test/unit/SFMT.c is an adaptation of SFMT's test.c that performs all the same validation, both for 32- and 64-bit generation. --- .gitignore | 1 + Makefile.in | 4 +- configure.ac | 8 + test/include/test/SFMT-alti.h | 191 +++ test/include/test/SFMT-params.h | 132 ++ test/include/test/SFMT-params11213.h | 81 ++ test/include/test/SFMT-params1279.h | 81 ++ test/include/test/SFMT-params132049.h | 81 ++ test/include/test/SFMT-params19937.h | 81 ++ test/include/test/SFMT-params216091.h | 81 ++ test/include/test/SFMT-params2281.h | 81 ++ test/include/test/SFMT-params4253.h | 81 ++ test/include/test/SFMT-params44497.h | 81 ++ test/include/test/SFMT-params607.h | 81 ++ test/include/test/SFMT-params86243.h | 81 ++ test/include/test/SFMT-sse2.h | 160 ++ test/include/test/SFMT.h | 213 +++ test/include/test/jemalloc_test.h.in | 4 + test/include/test/jemalloc_test_defs.h.in | 3 + test/include/test/test.h | 26 + test/src/SFMT.c | 717 +++++++++ test/unit/SFMT.c | 1608 +++++++++++++++++++++ 22 files changed, 3876 insertions(+), 1 deletion(-) create mode 100644 test/include/test/SFMT-alti.h create mode 100644 test/include/test/SFMT-params.h create mode 100644 test/include/test/SFMT-params11213.h create mode 100644 test/include/test/SFMT-params1279.h create mode 100644 test/include/test/SFMT-params132049.h create mode 100644 test/include/test/SFMT-params19937.h create mode 100644 test/include/test/SFMT-params216091.h create mode 100644 test/include/test/SFMT-params2281.h create mode 100644 test/include/test/SFMT-params4253.h create mode 100644 test/include/test/SFMT-params44497.h create mode 100644 test/include/test/SFMT-params607.h create mode 100644 test/include/test/SFMT-params86243.h create mode 100644 test/include/test/SFMT-sse2.h create mode 100644 test/include/test/SFMT.h create mode 100644 test/include/test/jemalloc_test_defs.h.in create mode 100644 test/src/SFMT.c create mode 100644 test/unit/SFMT.c diff --git a/.gitignore b/.gitignore index b7715e8d..1d593e76 100644 --- a/.gitignore +++ b/.gitignore @@ -39,6 +39,7 @@ /test/test.sh test/include/test/jemalloc_test.h +test/include/test/jemalloc_test_defs.h /test/integration/[A-Za-z]* !/test/integration/[A-Za-z]*.* diff --git a/Makefile.in b/Makefile.in index ca160402..0dd54a72 100644 --- a/Makefile.in +++ b/Makefile.in @@ -103,9 +103,11 @@ DOCS_XML := $(objroot)doc/jemalloc$(install_suffix).xml DOCS_HTML := $(DOCS_XML:$(objroot)%.xml=$(srcroot)%.html) DOCS_MAN3 := $(DOCS_XML:$(objroot)%.xml=$(srcroot)%.3) DOCS := $(DOCS_HTML) $(DOCS_MAN3) -C_TESTLIB_SRCS := $(srcroot)test/src/test.c $(srcroot)test/src/thread.c +C_TESTLIB_SRCS := $(srcroot)test/src/SFMT.c $(srcroot)test/src/test.c \ + $(srcroot)test/src/thread.c C_UTIL_INTEGRATION_SRCS := $(srcroot)src/util.c TESTS_UNIT := $(srcroot)test/unit/bitmap.c \ + $(srcroot)test/unit/SFMT.c \ $(srcroot)test/unit/tsd.c TESTS_INTEGRATION := $(srcroot)test/integration/aligned_alloc.c \ $(srcroot)test/integration/allocated.c \ diff --git a/configure.ac b/configure.ac index 0b547165..02842b63 100644 --- a/configure.ac +++ b/configure.ac @@ -199,6 +199,7 @@ case "${host_cpu}" in if test "x${je_cv_asm}" = "xyes" ; then CPU_SPINWAIT='__asm__ volatile("pause")' fi + AC_DEFINE_UNQUOTED([HAVE_SSE2], [ ]) ;; x86_64) JE_COMPILABLE([__asm__ syntax], [], @@ -206,6 +207,10 @@ case "${host_cpu}" in if test "x${je_cv_asm}" = "xyes" ; then CPU_SPINWAIT='__asm__ volatile("pause")' fi + AC_DEFINE_UNQUOTED([HAVE_SSE2], [ ]) + ;; + powerpc) + AC_DEFINE_UNQUOTED([HAVE_ALTIVEC], [ ]) ;; *) ;; @@ -574,6 +579,7 @@ cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/internal/public_namespace.s cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/internal/public_unnamespace.sh" cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/internal/size_classes.sh" cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/jemalloc.sh" +cfghdrs_in="${cfghdrs_in} ${srcroot}test/include/test/jemalloc_test_defs.h.in" cfghdrs_out="include/jemalloc/jemalloc_defs.h" cfghdrs_out="${cfghdrs_out} include/jemalloc/jemalloc${install_suffix}.h" @@ -584,9 +590,11 @@ cfghdrs_out="${cfghdrs_out} include/jemalloc/internal/private_unnamespace.h" cfghdrs_out="${cfghdrs_out} include/jemalloc/internal/public_namespace.h" cfghdrs_out="${cfghdrs_out} include/jemalloc/internal/public_unnamespace.h" cfghdrs_out="${cfghdrs_out} include/jemalloc/internal/size_classes.h" +cfghdrs_out="${cfghdrs_out} test/include/test/jemalloc_test_defs.h" cfghdrs_tup="include/jemalloc/jemalloc_defs.h:include/jemalloc/jemalloc_defs.h.in" cfghdrs_tup="${cfghdrs_tup} include/jemalloc/internal/jemalloc_internal_defs.h:${srcroot}include/jemalloc/internal/jemalloc_internal_defs.h.in" +cfghdrs_tup="${cfghdrs_tup} test/include/test/jemalloc_test_defs.h:${srcroot}test/include/test/jemalloc_test_defs.h.in" dnl Do not silence irrelevant compiler warnings by default, since enabling this dnl option incurs a performance penalty. diff --git a/test/include/test/SFMT-alti.h b/test/include/test/SFMT-alti.h new file mode 100644 index 00000000..3942bbcf --- /dev/null +++ b/test/include/test/SFMT-alti.h @@ -0,0 +1,191 @@ +/* + * This file derives from SFMT 1.3.3 + * (http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/SFMT/index.html), which was + * released under the terms of the following license: + * + * Copyright (c) 2006,2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima + * University. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * * Neither the name of the Hiroshima University nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +/** + * @file SFMT-alti.h + * + * @brief SIMD oriented Fast Mersenne Twister(SFMT) + * pseudorandom number generator + * + * @author Mutsuo Saito (Hiroshima University) + * @author Makoto Matsumoto (Hiroshima University) + * + * Copyright (C) 2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima + * University. All rights reserved. + * + * The new BSD License is applied to this software. + * see LICENSE.txt + */ + +#ifndef SFMT_ALTI_H +#define SFMT_ALTI_H + +inline static vector unsigned int vec_recursion(vector unsigned int a, + vector unsigned int b, + vector unsigned int c, + vector unsigned int d) + ALWAYSINLINE; + +/** + * This function represents the recursion formula in AltiVec and BIG ENDIAN. + * @param a a 128-bit part of the interal state array + * @param b a 128-bit part of the interal state array + * @param c a 128-bit part of the interal state array + * @param d a 128-bit part of the interal state array + * @return output + */ +inline static vector unsigned int vec_recursion(vector unsigned int a, + vector unsigned int b, + vector unsigned int c, + vector unsigned int d) { + + const vector unsigned int sl1 = ALTI_SL1; + const vector unsigned int sr1 = ALTI_SR1; +#ifdef ONLY64 + const vector unsigned int mask = ALTI_MSK64; + const vector unsigned char perm_sl = ALTI_SL2_PERM64; + const vector unsigned char perm_sr = ALTI_SR2_PERM64; +#else + const vector unsigned int mask = ALTI_MSK; + const vector unsigned char perm_sl = ALTI_SL2_PERM; + const vector unsigned char perm_sr = ALTI_SR2_PERM; +#endif + vector unsigned int v, w, x, y, z; + x = vec_perm(a, (vector unsigned int)perm_sl, perm_sl); + v = a; + y = vec_sr(b, sr1); + z = vec_perm(c, (vector unsigned int)perm_sr, perm_sr); + w = vec_sl(d, sl1); + z = vec_xor(z, w); + y = vec_and(y, mask); + v = vec_xor(v, x); + z = vec_xor(z, y); + z = vec_xor(z, v); + return z; +} + +/** + * This function fills the internal state array with pseudorandom + * integers. + */ +inline static void gen_rand_all(sfmt_t *ctx) { + int i; + vector unsigned int r, r1, r2; + + r1 = ctx->sfmt[N - 2].s; + r2 = ctx->sfmt[N - 1].s; + for (i = 0; i < N - POS1; i++) { + r = vec_recursion(ctx->sfmt[i].s, ctx->sfmt[i + POS1].s, r1, r2); + ctx->sfmt[i].s = r; + r1 = r2; + r2 = r; + } + for (; i < N; i++) { + r = vec_recursion(ctx->sfmt[i].s, ctx->sfmt[i + POS1 - N].s, r1, r2); + ctx->sfmt[i].s = r; + r1 = r2; + r2 = r; + } +} + +/** + * This function fills the user-specified array with pseudorandom + * integers. + * + * @param array an 128-bit array to be filled by pseudorandom numbers. + * @param size number of 128-bit pesudorandom numbers to be generated. + */ +inline static void gen_rand_array(sfmt_t *ctx, w128_t *array, int size) { + int i, j; + vector unsigned int r, r1, r2; + + r1 = ctx->sfmt[N - 2].s; + r2 = ctx->sfmt[N - 1].s; + for (i = 0; i < N - POS1; i++) { + r = vec_recursion(ctx->sfmt[i].s, ctx->sfmt[i + POS1].s, r1, r2); + array[i].s = r; + r1 = r2; + r2 = r; + } + for (; i < N; i++) { + r = vec_recursion(ctx->sfmt[i].s, array[i + POS1 - N].s, r1, r2); + array[i].s = r; + r1 = r2; + r2 = r; + } + /* main loop */ + for (; i < size - N; i++) { + r = vec_recursion(array[i - N].s, array[i + POS1 - N].s, r1, r2); + array[i].s = r; + r1 = r2; + r2 = r; + } + for (j = 0; j < 2 * N - size; j++) { + ctx->sfmt[j].s = array[j + size - N].s; + } + for (; i < size; i++) { + r = vec_recursion(array[i - N].s, array[i + POS1 - N].s, r1, r2); + array[i].s = r; + ctx->sfmt[j++].s = r; + r1 = r2; + r2 = r; + } +} + +#ifndef ONLY64 +#if defined(__APPLE__) +#define ALTI_SWAP (vector unsigned char) \ + (4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11) +#else +#define ALTI_SWAP {4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11} +#endif +/** + * This function swaps high and low 32-bit of 64-bit integers in user + * specified array. + * + * @param array an 128-bit array to be swaped. + * @param size size of 128-bit array. + */ +inline static void swap(w128_t *array, int size) { + int i; + const vector unsigned char perm = ALTI_SWAP; + + for (i = 0; i < size; i++) { + array[i].s = vec_perm(array[i].s, (vector unsigned int)perm, perm); + } +} +#endif + +#endif diff --git a/test/include/test/SFMT-params.h b/test/include/test/SFMT-params.h new file mode 100644 index 00000000..ade66222 --- /dev/null +++ b/test/include/test/SFMT-params.h @@ -0,0 +1,132 @@ +/* + * This file derives from SFMT 1.3.3 + * (http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/SFMT/index.html), which was + * released under the terms of the following license: + * + * Copyright (c) 2006,2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima + * University. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * * Neither the name of the Hiroshima University nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef SFMT_PARAMS_H +#define SFMT_PARAMS_H + +#if !defined(MEXP) +#ifdef __GNUC__ + #warning "MEXP is not defined. I assume MEXP is 19937." +#endif + #define MEXP 19937 +#endif +/*----------------- + BASIC DEFINITIONS + -----------------*/ +/** Mersenne Exponent. The period of the sequence + * is a multiple of 2^MEXP-1. + * #define MEXP 19937 */ +/** SFMT generator has an internal state array of 128-bit integers, + * and N is its size. */ +#define N (MEXP / 128 + 1) +/** N32 is the size of internal state array when regarded as an array + * of 32-bit integers.*/ +#define N32 (N * 4) +/** N64 is the size of internal state array when regarded as an array + * of 64-bit integers.*/ +#define N64 (N * 2) + +/*---------------------- + the parameters of SFMT + following definitions are in paramsXXXX.h file. + ----------------------*/ +/** the pick up position of the array. +#define POS1 122 +*/ + +/** the parameter of shift left as four 32-bit registers. +#define SL1 18 + */ + +/** the parameter of shift left as one 128-bit register. + * The 128-bit integer is shifted by (SL2 * 8) bits. +#define SL2 1 +*/ + +/** the parameter of shift right as four 32-bit registers. +#define SR1 11 +*/ + +/** the parameter of shift right as one 128-bit register. + * The 128-bit integer is shifted by (SL2 * 8) bits. +#define SR2 1 +*/ + +/** A bitmask, used in the recursion. These parameters are introduced + * to break symmetry of SIMD. +#define MSK1 0xdfffffefU +#define MSK2 0xddfecb7fU +#define MSK3 0xbffaffffU +#define MSK4 0xbffffff6U +*/ + +/** These definitions are part of a 128-bit period certification vector. +#define PARITY1 0x00000001U +#define PARITY2 0x00000000U +#define PARITY3 0x00000000U +#define PARITY4 0xc98e126aU +*/ + +#if MEXP == 607 + #include "test/SFMT-params607.h" +#elif MEXP == 1279 + #include "test/SFMT-params1279.h" +#elif MEXP == 2281 + #include "test/SFMT-params2281.h" +#elif MEXP == 4253 + #include "test/SFMT-params4253.h" +#elif MEXP == 11213 + #include "test/SFMT-params11213.h" +#elif MEXP == 19937 + #include "test/SFMT-params19937.h" +#elif MEXP == 44497 + #include "test/SFMT-params44497.h" +#elif MEXP == 86243 + #include "test/SFMT-params86243.h" +#elif MEXP == 132049 + #include "test/SFMT-params132049.h" +#elif MEXP == 216091 + #include "test/SFMT-params216091.h" +#else +#ifdef __GNUC__ + #error "MEXP is not valid." + #undef MEXP +#else + #undef MEXP +#endif + +#endif + +#endif /* SFMT_PARAMS_H */ diff --git a/test/include/test/SFMT-params11213.h b/test/include/test/SFMT-params11213.h new file mode 100644 index 00000000..2994bd21 --- /dev/null +++ b/test/include/test/SFMT-params11213.h @@ -0,0 +1,81 @@ +/* + * This file derives from SFMT 1.3.3 + * (http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/SFMT/index.html), which was + * released under the terms of the following license: + * + * Copyright (c) 2006,2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima + * University. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * * Neither the name of the Hiroshima University nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef SFMT_PARAMS11213_H +#define SFMT_PARAMS11213_H + +#define POS1 68 +#define SL1 14 +#define SL2 3 +#define SR1 7 +#define SR2 3 +#define MSK1 0xeffff7fbU +#define MSK2 0xffffffefU +#define MSK3 0xdfdfbfffU +#define MSK4 0x7fffdbfdU +#define PARITY1 0x00000001U +#define PARITY2 0x00000000U +#define PARITY3 0xe8148000U +#define PARITY4 0xd0c7afa3U + + +/* PARAMETERS FOR ALTIVEC */ +#if defined(__APPLE__) /* For OSX */ + #define ALTI_SL1 (vector unsigned int)(SL1, SL1, SL1, SL1) + #define ALTI_SR1 (vector unsigned int)(SR1, SR1, SR1, SR1) + #define ALTI_MSK (vector unsigned int)(MSK1, MSK2, MSK3, MSK4) + #define ALTI_MSK64 \ + (vector unsigned int)(MSK2, MSK1, MSK4, MSK3) + #define ALTI_SL2_PERM \ + (vector unsigned char)(3,21,21,21,7,0,1,2,11,4,5,6,15,8,9,10) + #define ALTI_SL2_PERM64 \ + (vector unsigned char)(3,4,5,6,7,29,29,29,11,12,13,14,15,0,1,2) + #define ALTI_SR2_PERM \ + (vector unsigned char)(5,6,7,0,9,10,11,4,13,14,15,8,19,19,19,12) + #define ALTI_SR2_PERM64 \ + (vector unsigned char)(13,14,15,0,1,2,3,4,19,19,19,8,9,10,11,12) +#else /* For OTHER OSs(Linux?) */ + #define ALTI_SL1 {SL1, SL1, SL1, SL1} + #define ALTI_SR1 {SR1, SR1, SR1, SR1} + #define ALTI_MSK {MSK1, MSK2, MSK3, MSK4} + #define ALTI_MSK64 {MSK2, MSK1, MSK4, MSK3} + #define ALTI_SL2_PERM {3,21,21,21,7,0,1,2,11,4,5,6,15,8,9,10} + #define ALTI_SL2_PERM64 {3,4,5,6,7,29,29,29,11,12,13,14,15,0,1,2} + #define ALTI_SR2_PERM {5,6,7,0,9,10,11,4,13,14,15,8,19,19,19,12} + #define ALTI_SR2_PERM64 {13,14,15,0,1,2,3,4,19,19,19,8,9,10,11,12} +#endif /* For OSX */ +#define IDSTR "SFMT-11213:68-14-3-7-3:effff7fb-ffffffef-dfdfbfff-7fffdbfd" + +#endif /* SFMT_PARAMS11213_H */ diff --git a/test/include/test/SFMT-params1279.h b/test/include/test/SFMT-params1279.h new file mode 100644 index 00000000..d7959f98 --- /dev/null +++ b/test/include/test/SFMT-params1279.h @@ -0,0 +1,81 @@ +/* + * This file derives from SFMT 1.3.3 + * (http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/SFMT/index.html), which was + * released under the terms of the following license: + * + * Copyright (c) 2006,2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima + * University. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * * Neither the name of the Hiroshima University nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef SFMT_PARAMS1279_H +#define SFMT_PARAMS1279_H + +#define POS1 7 +#define SL1 14 +#define SL2 3 +#define SR1 5 +#define SR2 1 +#define MSK1 0xf7fefffdU +#define MSK2 0x7fefcfffU +#define MSK3 0xaff3ef3fU +#define MSK4 0xb5ffff7fU +#define PARITY1 0x00000001U +#define PARITY2 0x00000000U +#define PARITY3 0x00000000U +#define PARITY4 0x20000000U + + +/* PARAMETERS FOR ALTIVEC */ +#if defined(__APPLE__) /* For OSX */ + #define ALTI_SL1 (vector unsigned int)(SL1, SL1, SL1, SL1) + #define ALTI_SR1 (vector unsigned int)(SR1, SR1, SR1, SR1) + #define ALTI_MSK (vector unsigned int)(MSK1, MSK2, MSK3, MSK4) + #define ALTI_MSK64 \ + (vector unsigned int)(MSK2, MSK1, MSK4, MSK3) + #define ALTI_SL2_PERM \ + (vector unsigned char)(3,21,21,21,7,0,1,2,11,4,5,6,15,8,9,10) + #define ALTI_SL2_PERM64 \ + (vector unsigned char)(3,4,5,6,7,29,29,29,11,12,13,14,15,0,1,2) + #define ALTI_SR2_PERM \ + (vector unsigned char)(7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14) + #define ALTI_SR2_PERM64 \ + (vector unsigned char)(15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14) +#else /* For OTHER OSs(Linux?) */ + #define ALTI_SL1 {SL1, SL1, SL1, SL1} + #define ALTI_SR1 {SR1, SR1, SR1, SR1} + #define ALTI_MSK {MSK1, MSK2, MSK3, MSK4} + #define ALTI_MSK64 {MSK2, MSK1, MSK4, MSK3} + #define ALTI_SL2_PERM {3,21,21,21,7,0,1,2,11,4,5,6,15,8,9,10} + #define ALTI_SL2_PERM64 {3,4,5,6,7,29,29,29,11,12,13,14,15,0,1,2} + #define ALTI_SR2_PERM {7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14} + #define ALTI_SR2_PERM64 {15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14} +#endif /* For OSX */ +#define IDSTR "SFMT-1279:7-14-3-5-1:f7fefffd-7fefcfff-aff3ef3f-b5ffff7f" + +#endif /* SFMT_PARAMS1279_H */ diff --git a/test/include/test/SFMT-params132049.h b/test/include/test/SFMT-params132049.h new file mode 100644 index 00000000..a1dcec39 --- /dev/null +++ b/test/include/test/SFMT-params132049.h @@ -0,0 +1,81 @@ +/* + * This file derives from SFMT 1.3.3 + * (http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/SFMT/index.html), which was + * released under the terms of the following license: + * + * Copyright (c) 2006,2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima + * University. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * * Neither the name of the Hiroshima University nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef SFMT_PARAMS132049_H +#define SFMT_PARAMS132049_H + +#define POS1 110 +#define SL1 19 +#define SL2 1 +#define SR1 21 +#define SR2 1 +#define MSK1 0xffffbb5fU +#define MSK2 0xfb6ebf95U +#define MSK3 0xfffefffaU +#define MSK4 0xcff77fffU +#define PARITY1 0x00000001U +#define PARITY2 0x00000000U +#define PARITY3 0xcb520000U +#define PARITY4 0xc7e91c7dU + + +/* PARAMETERS FOR ALTIVEC */ +#if defined(__APPLE__) /* For OSX */ + #define ALTI_SL1 (vector unsigned int)(SL1, SL1, SL1, SL1) + #define ALTI_SR1 (vector unsigned int)(SR1, SR1, SR1, SR1) + #define ALTI_MSK (vector unsigned int)(MSK1, MSK2, MSK3, MSK4) + #define ALTI_MSK64 \ + (vector unsigned int)(MSK2, MSK1, MSK4, MSK3) + #define ALTI_SL2_PERM \ + (vector unsigned char)(1,2,3,23,5,6,7,0,9,10,11,4,13,14,15,8) + #define ALTI_SL2_PERM64 \ + (vector unsigned char)(1,2,3,4,5,6,7,31,9,10,11,12,13,14,15,0) + #define ALTI_SR2_PERM \ + (vector unsigned char)(7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14) + #define ALTI_SR2_PERM64 \ + (vector unsigned char)(15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14) +#else /* For OTHER OSs(Linux?) */ + #define ALTI_SL1 {SL1, SL1, SL1, SL1} + #define ALTI_SR1 {SR1, SR1, SR1, SR1} + #define ALTI_MSK {MSK1, MSK2, MSK3, MSK4} + #define ALTI_MSK64 {MSK2, MSK1, MSK4, MSK3} + #define ALTI_SL2_PERM {1,2,3,23,5,6,7,0,9,10,11,4,13,14,15,8} + #define ALTI_SL2_PERM64 {1,2,3,4,5,6,7,31,9,10,11,12,13,14,15,0} + #define ALTI_SR2_PERM {7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14} + #define ALTI_SR2_PERM64 {15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14} +#endif /* For OSX */ +#define IDSTR "SFMT-132049:110-19-1-21-1:ffffbb5f-fb6ebf95-fffefffa-cff77fff" + +#endif /* SFMT_PARAMS132049_H */ diff --git a/test/include/test/SFMT-params19937.h b/test/include/test/SFMT-params19937.h new file mode 100644 index 00000000..fb92b4c9 --- /dev/null +++ b/test/include/test/SFMT-params19937.h @@ -0,0 +1,81 @@ +/* + * This file derives from SFMT 1.3.3 + * (http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/SFMT/index.html), which was + * released under the terms of the following license: + * + * Copyright (c) 2006,2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima + * University. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * * Neither the name of the Hiroshima University nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef SFMT_PARAMS19937_H +#define SFMT_PARAMS19937_H + +#define POS1 122 +#define SL1 18 +#define SL2 1 +#define SR1 11 +#define SR2 1 +#define MSK1 0xdfffffefU +#define MSK2 0xddfecb7fU +#define MSK3 0xbffaffffU +#define MSK4 0xbffffff6U +#define PARITY1 0x00000001U +#define PARITY2 0x00000000U +#define PARITY3 0x00000000U +#define PARITY4 0x13c9e684U + + +/* PARAMETERS FOR ALTIVEC */ +#if defined(__APPLE__) /* For OSX */ + #define ALTI_SL1 (vector unsigned int)(SL1, SL1, SL1, SL1) + #define ALTI_SR1 (vector unsigned int)(SR1, SR1, SR1, SR1) + #define ALTI_MSK (vector unsigned int)(MSK1, MSK2, MSK3, MSK4) + #define ALTI_MSK64 \ + (vector unsigned int)(MSK2, MSK1, MSK4, MSK3) + #define ALTI_SL2_PERM \ + (vector unsigned char)(1,2,3,23,5,6,7,0,9,10,11,4,13,14,15,8) + #define ALTI_SL2_PERM64 \ + (vector unsigned char)(1,2,3,4,5,6,7,31,9,10,11,12,13,14,15,0) + #define ALTI_SR2_PERM \ + (vector unsigned char)(7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14) + #define ALTI_SR2_PERM64 \ + (vector unsigned char)(15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14) +#else /* For OTHER OSs(Linux?) */ + #define ALTI_SL1 {SL1, SL1, SL1, SL1} + #define ALTI_SR1 {SR1, SR1, SR1, SR1} + #define ALTI_MSK {MSK1, MSK2, MSK3, MSK4} + #define ALTI_MSK64 {MSK2, MSK1, MSK4, MSK3} + #define ALTI_SL2_PERM {1,2,3,23,5,6,7,0,9,10,11,4,13,14,15,8} + #define ALTI_SL2_PERM64 {1,2,3,4,5,6,7,31,9,10,11,12,13,14,15,0} + #define ALTI_SR2_PERM {7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14} + #define ALTI_SR2_PERM64 {15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14} +#endif /* For OSX */ +#define IDSTR "SFMT-19937:122-18-1-11-1:dfffffef-ddfecb7f-bffaffff-bffffff6" + +#endif /* SFMT_PARAMS19937_H */ diff --git a/test/include/test/SFMT-params216091.h b/test/include/test/SFMT-params216091.h new file mode 100644 index 00000000..125ce282 --- /dev/null +++ b/test/include/test/SFMT-params216091.h @@ -0,0 +1,81 @@ +/* + * This file derives from SFMT 1.3.3 + * (http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/SFMT/index.html), which was + * released under the terms of the following license: + * + * Copyright (c) 2006,2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima + * University. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * * Neither the name of the Hiroshima University nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef SFMT_PARAMS216091_H +#define SFMT_PARAMS216091_H + +#define POS1 627 +#define SL1 11 +#define SL2 3 +#define SR1 10 +#define SR2 1 +#define MSK1 0xbff7bff7U +#define MSK2 0xbfffffffU +#define MSK3 0xbffffa7fU +#define MSK4 0xffddfbfbU +#define PARITY1 0xf8000001U +#define PARITY2 0x89e80709U +#define PARITY3 0x3bd2b64bU +#define PARITY4 0x0c64b1e4U + + +/* PARAMETERS FOR ALTIVEC */ +#if defined(__APPLE__) /* For OSX */ + #define ALTI_SL1 (vector unsigned int)(SL1, SL1, SL1, SL1) + #define ALTI_SR1 (vector unsigned int)(SR1, SR1, SR1, SR1) + #define ALTI_MSK (vector unsigned int)(MSK1, MSK2, MSK3, MSK4) + #define ALTI_MSK64 \ + (vector unsigned int)(MSK2, MSK1, MSK4, MSK3) + #define ALTI_SL2_PERM \ + (vector unsigned char)(3,21,21,21,7,0,1,2,11,4,5,6,15,8,9,10) + #define ALTI_SL2_PERM64 \ + (vector unsigned char)(3,4,5,6,7,29,29,29,11,12,13,14,15,0,1,2) + #define ALTI_SR2_PERM \ + (vector unsigned char)(7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14) + #define ALTI_SR2_PERM64 \ + (vector unsigned char)(15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14) +#else /* For OTHER OSs(Linux?) */ + #define ALTI_SL1 {SL1, SL1, SL1, SL1} + #define ALTI_SR1 {SR1, SR1, SR1, SR1} + #define ALTI_MSK {MSK1, MSK2, MSK3, MSK4} + #define ALTI_MSK64 {MSK2, MSK1, MSK4, MSK3} + #define ALTI_SL2_PERM {3,21,21,21,7,0,1,2,11,4,5,6,15,8,9,10} + #define ALTI_SL2_PERM64 {3,4,5,6,7,29,29,29,11,12,13,14,15,0,1,2} + #define ALTI_SR2_PERM {7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14} + #define ALTI_SR2_PERM64 {15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14} +#endif /* For OSX */ +#define IDSTR "SFMT-216091:627-11-3-10-1:bff7bff7-bfffffff-bffffa7f-ffddfbfb" + +#endif /* SFMT_PARAMS216091_H */ diff --git a/test/include/test/SFMT-params2281.h b/test/include/test/SFMT-params2281.h new file mode 100644 index 00000000..0ef85c40 --- /dev/null +++ b/test/include/test/SFMT-params2281.h @@ -0,0 +1,81 @@ +/* + * This file derives from SFMT 1.3.3 + * (http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/SFMT/index.html), which was + * released under the terms of the following license: + * + * Copyright (c) 2006,2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima + * University. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * * Neither the name of the Hiroshima University nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef SFMT_PARAMS2281_H +#define SFMT_PARAMS2281_H + +#define POS1 12 +#define SL1 19 +#define SL2 1 +#define SR1 5 +#define SR2 1 +#define MSK1 0xbff7ffbfU +#define MSK2 0xfdfffffeU +#define MSK3 0xf7ffef7fU +#define MSK4 0xf2f7cbbfU +#define PARITY1 0x00000001U +#define PARITY2 0x00000000U +#define PARITY3 0x00000000U +#define PARITY4 0x41dfa600U + + +/* PARAMETERS FOR ALTIVEC */ +#if defined(__APPLE__) /* For OSX */ + #define ALTI_SL1 (vector unsigned int)(SL1, SL1, SL1, SL1) + #define ALTI_SR1 (vector unsigned int)(SR1, SR1, SR1, SR1) + #define ALTI_MSK (vector unsigned int)(MSK1, MSK2, MSK3, MSK4) + #define ALTI_MSK64 \ + (vector unsigned int)(MSK2, MSK1, MSK4, MSK3) + #define ALTI_SL2_PERM \ + (vector unsigned char)(1,2,3,23,5,6,7,0,9,10,11,4,13,14,15,8) + #define ALTI_SL2_PERM64 \ + (vector unsigned char)(1,2,3,4,5,6,7,31,9,10,11,12,13,14,15,0) + #define ALTI_SR2_PERM \ + (vector unsigned char)(7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14) + #define ALTI_SR2_PERM64 \ + (vector unsigned char)(15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14) +#else /* For OTHER OSs(Linux?) */ + #define ALTI_SL1 {SL1, SL1, SL1, SL1} + #define ALTI_SR1 {SR1, SR1, SR1, SR1} + #define ALTI_MSK {MSK1, MSK2, MSK3, MSK4} + #define ALTI_MSK64 {MSK2, MSK1, MSK4, MSK3} + #define ALTI_SL2_PERM {1,2,3,23,5,6,7,0,9,10,11,4,13,14,15,8} + #define ALTI_SL2_PERM64 {1,2,3,4,5,6,7,31,9,10,11,12,13,14,15,0} + #define ALTI_SR2_PERM {7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14} + #define ALTI_SR2_PERM64 {15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14} +#endif /* For OSX */ +#define IDSTR "SFMT-2281:12-19-1-5-1:bff7ffbf-fdfffffe-f7ffef7f-f2f7cbbf" + +#endif /* SFMT_PARAMS2281_H */ diff --git a/test/include/test/SFMT-params4253.h b/test/include/test/SFMT-params4253.h new file mode 100644 index 00000000..9f07bc67 --- /dev/null +++ b/test/include/test/SFMT-params4253.h @@ -0,0 +1,81 @@ +/* + * This file derives from SFMT 1.3.3 + * (http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/SFMT/index.html), which was + * released under the terms of the following license: + * + * Copyright (c) 2006,2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima + * University. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * * Neither the name of the Hiroshima University nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef SFMT_PARAMS4253_H +#define SFMT_PARAMS4253_H + +#define POS1 17 +#define SL1 20 +#define SL2 1 +#define SR1 7 +#define SR2 1 +#define MSK1 0x9f7bffffU +#define MSK2 0x9fffff5fU +#define MSK3 0x3efffffbU +#define MSK4 0xfffff7bbU +#define PARITY1 0xa8000001U +#define PARITY2 0xaf5390a3U +#define PARITY3 0xb740b3f8U +#define PARITY4 0x6c11486dU + + +/* PARAMETERS FOR ALTIVEC */ +#if defined(__APPLE__) /* For OSX */ + #define ALTI_SL1 (vector unsigned int)(SL1, SL1, SL1, SL1) + #define ALTI_SR1 (vector unsigned int)(SR1, SR1, SR1, SR1) + #define ALTI_MSK (vector unsigned int)(MSK1, MSK2, MSK3, MSK4) + #define ALTI_MSK64 \ + (vector unsigned int)(MSK2, MSK1, MSK4, MSK3) + #define ALTI_SL2_PERM \ + (vector unsigned char)(1,2,3,23,5,6,7,0,9,10,11,4,13,14,15,8) + #define ALTI_SL2_PERM64 \ + (vector unsigned char)(1,2,3,4,5,6,7,31,9,10,11,12,13,14,15,0) + #define ALTI_SR2_PERM \ + (vector unsigned char)(7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14) + #define ALTI_SR2_PERM64 \ + (vector unsigned char)(15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14) +#else /* For OTHER OSs(Linux?) */ + #define ALTI_SL1 {SL1, SL1, SL1, SL1} + #define ALTI_SR1 {SR1, SR1, SR1, SR1} + #define ALTI_MSK {MSK1, MSK2, MSK3, MSK4} + #define ALTI_MSK64 {MSK2, MSK1, MSK4, MSK3} + #define ALTI_SL2_PERM {1,2,3,23,5,6,7,0,9,10,11,4,13,14,15,8} + #define ALTI_SL2_PERM64 {1,2,3,4,5,6,7,31,9,10,11,12,13,14,15,0} + #define ALTI_SR2_PERM {7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14} + #define ALTI_SR2_PERM64 {15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14} +#endif /* For OSX */ +#define IDSTR "SFMT-4253:17-20-1-7-1:9f7bffff-9fffff5f-3efffffb-fffff7bb" + +#endif /* SFMT_PARAMS4253_H */ diff --git a/test/include/test/SFMT-params44497.h b/test/include/test/SFMT-params44497.h new file mode 100644 index 00000000..85598fed --- /dev/null +++ b/test/include/test/SFMT-params44497.h @@ -0,0 +1,81 @@ +/* + * This file derives from SFMT 1.3.3 + * (http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/SFMT/index.html), which was + * released under the terms of the following license: + * + * Copyright (c) 2006,2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima + * University. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * * Neither the name of the Hiroshima University nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef SFMT_PARAMS44497_H +#define SFMT_PARAMS44497_H + +#define POS1 330 +#define SL1 5 +#define SL2 3 +#define SR1 9 +#define SR2 3 +#define MSK1 0xeffffffbU +#define MSK2 0xdfbebfffU +#define MSK3 0xbfbf7befU +#define MSK4 0x9ffd7bffU +#define PARITY1 0x00000001U +#define PARITY2 0x00000000U +#define PARITY3 0xa3ac4000U +#define PARITY4 0xecc1327aU + + +/* PARAMETERS FOR ALTIVEC */ +#if defined(__APPLE__) /* For OSX */ + #define ALTI_SL1 (vector unsigned int)(SL1, SL1, SL1, SL1) + #define ALTI_SR1 (vector unsigned int)(SR1, SR1, SR1, SR1) + #define ALTI_MSK (vector unsigned int)(MSK1, MSK2, MSK3, MSK4) + #define ALTI_MSK64 \ + (vector unsigned int)(MSK2, MSK1, MSK4, MSK3) + #define ALTI_SL2_PERM \ + (vector unsigned char)(3,21,21,21,7,0,1,2,11,4,5,6,15,8,9,10) + #define ALTI_SL2_PERM64 \ + (vector unsigned char)(3,4,5,6,7,29,29,29,11,12,13,14,15,0,1,2) + #define ALTI_SR2_PERM \ + (vector unsigned char)(5,6,7,0,9,10,11,4,13,14,15,8,19,19,19,12) + #define ALTI_SR2_PERM64 \ + (vector unsigned char)(13,14,15,0,1,2,3,4,19,19,19,8,9,10,11,12) +#else /* For OTHER OSs(Linux?) */ + #define ALTI_SL1 {SL1, SL1, SL1, SL1} + #define ALTI_SR1 {SR1, SR1, SR1, SR1} + #define ALTI_MSK {MSK1, MSK2, MSK3, MSK4} + #define ALTI_MSK64 {MSK2, MSK1, MSK4, MSK3} + #define ALTI_SL2_PERM {3,21,21,21,7,0,1,2,11,4,5,6,15,8,9,10} + #define ALTI_SL2_PERM64 {3,4,5,6,7,29,29,29,11,12,13,14,15,0,1,2} + #define ALTI_SR2_PERM {5,6,7,0,9,10,11,4,13,14,15,8,19,19,19,12} + #define ALTI_SR2_PERM64 {13,14,15,0,1,2,3,4,19,19,19,8,9,10,11,12} +#endif /* For OSX */ +#define IDSTR "SFMT-44497:330-5-3-9-3:effffffb-dfbebfff-bfbf7bef-9ffd7bff" + +#endif /* SFMT_PARAMS44497_H */ diff --git a/test/include/test/SFMT-params607.h b/test/include/test/SFMT-params607.h new file mode 100644 index 00000000..bc76485f --- /dev/null +++ b/test/include/test/SFMT-params607.h @@ -0,0 +1,81 @@ +/* + * This file derives from SFMT 1.3.3 + * (http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/SFMT/index.html), which was + * released under the terms of the following license: + * + * Copyright (c) 2006,2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima + * University. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * * Neither the name of the Hiroshima University nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef SFMT_PARAMS607_H +#define SFMT_PARAMS607_H + +#define POS1 2 +#define SL1 15 +#define SL2 3 +#define SR1 13 +#define SR2 3 +#define MSK1 0xfdff37ffU +#define MSK2 0xef7f3f7dU +#define MSK3 0xff777b7dU +#define MSK4 0x7ff7fb2fU +#define PARITY1 0x00000001U +#define PARITY2 0x00000000U +#define PARITY3 0x00000000U +#define PARITY4 0x5986f054U + + +/* PARAMETERS FOR ALTIVEC */ +#if defined(__APPLE__) /* For OSX */ + #define ALTI_SL1 (vector unsigned int)(SL1, SL1, SL1, SL1) + #define ALTI_SR1 (vector unsigned int)(SR1, SR1, SR1, SR1) + #define ALTI_MSK (vector unsigned int)(MSK1, MSK2, MSK3, MSK4) + #define ALTI_MSK64 \ + (vector unsigned int)(MSK2, MSK1, MSK4, MSK3) + #define ALTI_SL2_PERM \ + (vector unsigned char)(3,21,21,21,7,0,1,2,11,4,5,6,15,8,9,10) + #define ALTI_SL2_PERM64 \ + (vector unsigned char)(3,4,5,6,7,29,29,29,11,12,13,14,15,0,1,2) + #define ALTI_SR2_PERM \ + (vector unsigned char)(5,6,7,0,9,10,11,4,13,14,15,8,19,19,19,12) + #define ALTI_SR2_PERM64 \ + (vector unsigned char)(13,14,15,0,1,2,3,4,19,19,19,8,9,10,11,12) +#else /* For OTHER OSs(Linux?) */ + #define ALTI_SL1 {SL1, SL1, SL1, SL1} + #define ALTI_SR1 {SR1, SR1, SR1, SR1} + #define ALTI_MSK {MSK1, MSK2, MSK3, MSK4} + #define ALTI_MSK64 {MSK2, MSK1, MSK4, MSK3} + #define ALTI_SL2_PERM {3,21,21,21,7,0,1,2,11,4,5,6,15,8,9,10} + #define ALTI_SL2_PERM64 {3,4,5,6,7,29,29,29,11,12,13,14,15,0,1,2} + #define ALTI_SR2_PERM {5,6,7,0,9,10,11,4,13,14,15,8,19,19,19,12} + #define ALTI_SR2_PERM64 {13,14,15,0,1,2,3,4,19,19,19,8,9,10,11,12} +#endif /* For OSX */ +#define IDSTR "SFMT-607:2-15-3-13-3:fdff37ff-ef7f3f7d-ff777b7d-7ff7fb2f" + +#endif /* SFMT_PARAMS607_H */ diff --git a/test/include/test/SFMT-params86243.h b/test/include/test/SFMT-params86243.h new file mode 100644 index 00000000..5e4d783c --- /dev/null +++ b/test/include/test/SFMT-params86243.h @@ -0,0 +1,81 @@ +/* + * This file derives from SFMT 1.3.3 + * (http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/SFMT/index.html), which was + * released under the terms of the following license: + * + * Copyright (c) 2006,2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima + * University. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * * Neither the name of the Hiroshima University nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef SFMT_PARAMS86243_H +#define SFMT_PARAMS86243_H + +#define POS1 366 +#define SL1 6 +#define SL2 7 +#define SR1 19 +#define SR2 1 +#define MSK1 0xfdbffbffU +#define MSK2 0xbff7ff3fU +#define MSK3 0xfd77efffU +#define MSK4 0xbf9ff3ffU +#define PARITY1 0x00000001U +#define PARITY2 0x00000000U +#define PARITY3 0x00000000U +#define PARITY4 0xe9528d85U + + +/* PARAMETERS FOR ALTIVEC */ +#if defined(__APPLE__) /* For OSX */ + #define ALTI_SL1 (vector unsigned int)(SL1, SL1, SL1, SL1) + #define ALTI_SR1 (vector unsigned int)(SR1, SR1, SR1, SR1) + #define ALTI_MSK (vector unsigned int)(MSK1, MSK2, MSK3, MSK4) + #define ALTI_MSK64 \ + (vector unsigned int)(MSK2, MSK1, MSK4, MSK3) + #define ALTI_SL2_PERM \ + (vector unsigned char)(25,25,25,25,3,25,25,25,7,0,1,2,11,4,5,6) + #define ALTI_SL2_PERM64 \ + (vector unsigned char)(7,25,25,25,25,25,25,25,15,0,1,2,3,4,5,6) + #define ALTI_SR2_PERM \ + (vector unsigned char)(7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14) + #define ALTI_SR2_PERM64 \ + (vector unsigned char)(15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14) +#else /* For OTHER OSs(Linux?) */ + #define ALTI_SL1 {SL1, SL1, SL1, SL1} + #define ALTI_SR1 {SR1, SR1, SR1, SR1} + #define ALTI_MSK {MSK1, MSK2, MSK3, MSK4} + #define ALTI_MSK64 {MSK2, MSK1, MSK4, MSK3} + #define ALTI_SL2_PERM {25,25,25,25,3,25,25,25,7,0,1,2,11,4,5,6} + #define ALTI_SL2_PERM64 {7,25,25,25,25,25,25,25,15,0,1,2,3,4,5,6} + #define ALTI_SR2_PERM {7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14} + #define ALTI_SR2_PERM64 {15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14} +#endif /* For OSX */ +#define IDSTR "SFMT-86243:366-6-7-19-1:fdbffbff-bff7ff3f-fd77efff-bf9ff3ff" + +#endif /* SFMT_PARAMS86243_H */ diff --git a/test/include/test/SFMT-sse2.h b/test/include/test/SFMT-sse2.h new file mode 100644 index 00000000..19131806 --- /dev/null +++ b/test/include/test/SFMT-sse2.h @@ -0,0 +1,160 @@ +/* + * This file derives from SFMT 1.3.3 + * (http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/SFMT/index.html), which was + * released under the terms of the following license: + * + * Copyright (c) 2006,2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima + * University. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * * Neither the name of the Hiroshima University nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +/** + * @file SFMT-sse2.h + * @brief SIMD oriented Fast Mersenne Twister(SFMT) for Intel SSE2 + * + * @author Mutsuo Saito (Hiroshima University) + * @author Makoto Matsumoto (Hiroshima University) + * + * @note We assume LITTLE ENDIAN in this file + * + * Copyright (C) 2006, 2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima + * University. All rights reserved. + * + * The new BSD License is applied to this software, see LICENSE.txt + */ + +#ifndef SFMT_SSE2_H +#define SFMT_SSE2_H + +PRE_ALWAYS static __m128i mm_recursion(__m128i *a, __m128i *b, __m128i c, + __m128i d, __m128i mask) ALWAYSINLINE; + +/** + * This function represents the recursion formula. + * @param a a 128-bit part of the interal state array + * @param b a 128-bit part of the interal state array + * @param c a 128-bit part of the interal state array + * @param d a 128-bit part of the interal state array + * @param mask 128-bit mask + * @return output + */ +PRE_ALWAYS static __m128i mm_recursion(__m128i *a, __m128i *b, + __m128i c, __m128i d, __m128i mask) { + __m128i v, x, y, z; + + x = _mm_load_si128(a); + y = _mm_srli_epi32(*b, SR1); + z = _mm_srli_si128(c, SR2); + v = _mm_slli_epi32(d, SL1); + z = _mm_xor_si128(z, x); + z = _mm_xor_si128(z, v); + x = _mm_slli_si128(x, SL2); + y = _mm_and_si128(y, mask); + z = _mm_xor_si128(z, x); + z = _mm_xor_si128(z, y); + return z; +} + +/** + * This function fills the internal state array with pseudorandom + * integers. + */ +inline static void gen_rand_all(sfmt_t *ctx) { + int i; + __m128i r, r1, r2, mask; + mask = _mm_set_epi32(MSK4, MSK3, MSK2, MSK1); + + r1 = _mm_load_si128(&ctx->sfmt[N - 2].si); + r2 = _mm_load_si128(&ctx->sfmt[N - 1].si); + for (i = 0; i < N - POS1; i++) { + r = mm_recursion(&ctx->sfmt[i].si, &ctx->sfmt[i + POS1].si, r1, r2, + mask); + _mm_store_si128(&ctx->sfmt[i].si, r); + r1 = r2; + r2 = r; + } + for (; i < N; i++) { + r = mm_recursion(&ctx->sfmt[i].si, &ctx->sfmt[i + POS1 - N].si, r1, r2, + mask); + _mm_store_si128(&ctx->sfmt[i].si, r); + r1 = r2; + r2 = r; + } +} + +/** + * This function fills the user-specified array with pseudorandom + * integers. + * + * @param array an 128-bit array to be filled by pseudorandom numbers. + * @param size number of 128-bit pesudorandom numbers to be generated. + */ +inline static void gen_rand_array(sfmt_t *ctx, w128_t *array, int size) { + int i, j; + __m128i r, r1, r2, mask; + mask = _mm_set_epi32(MSK4, MSK3, MSK2, MSK1); + + r1 = _mm_load_si128(&ctx->sfmt[N - 2].si); + r2 = _mm_load_si128(&ctx->sfmt[N - 1].si); + for (i = 0; i < N - POS1; i++) { + r = mm_recursion(&ctx->sfmt[i].si, &ctx->sfmt[i + POS1].si, r1, r2, + mask); + _mm_store_si128(&array[i].si, r); + r1 = r2; + r2 = r; + } + for (; i < N; i++) { + r = mm_recursion(&ctx->sfmt[i].si, &array[i + POS1 - N].si, r1, r2, + mask); + _mm_store_si128(&array[i].si, r); + r1 = r2; + r2 = r; + } + /* main loop */ + for (; i < size - N; i++) { + r = mm_recursion(&array[i - N].si, &array[i + POS1 - N].si, r1, r2, + mask); + _mm_store_si128(&array[i].si, r); + r1 = r2; + r2 = r; + } + for (j = 0; j < 2 * N - size; j++) { + r = _mm_load_si128(&array[j + size - N].si); + _mm_store_si128(&ctx->sfmt[j].si, r); + } + for (; i < size; i++) { + r = mm_recursion(&array[i - N].si, &array[i + POS1 - N].si, r1, r2, + mask); + _mm_store_si128(&array[i].si, r); + _mm_store_si128(&ctx->sfmt[j++].si, r); + r1 = r2; + r2 = r; + } +} + +#endif diff --git a/test/include/test/SFMT.h b/test/include/test/SFMT.h new file mode 100644 index 00000000..3cb350d1 --- /dev/null +++ b/test/include/test/SFMT.h @@ -0,0 +1,213 @@ +/* + * This file derives from SFMT 1.3.3 + * (http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/SFMT/index.html), which was + * released under the terms of the following license: + * + * Copyright (c) 2006,2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima + * University. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * * Neither the name of the Hiroshima University nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +/** + * @file SFMT.h + * + * @brief SIMD oriented Fast Mersenne Twister(SFMT) pseudorandom + * number generator + * + * @author Mutsuo Saito (Hiroshima University) + * @author Makoto Matsumoto (Hiroshima University) + * + * Copyright (C) 2006, 2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima + * University. All rights reserved. + * + * The new BSD License is applied to this software. + * see LICENSE.txt + * + * @note We assume that your system has inttypes.h. If your system + * doesn't have inttypes.h, you have to typedef uint32_t and uint64_t, + * and you have to define PRIu64 and PRIx64 in this file as follows: + * @verbatim + typedef unsigned int uint32_t + typedef unsigned long long uint64_t + #define PRIu64 "llu" + #define PRIx64 "llx" +@endverbatim + * uint32_t must be exactly 32-bit unsigned integer type (no more, no + * less), and uint64_t must be exactly 64-bit unsigned integer type. + * PRIu64 and PRIx64 are used for printf function to print 64-bit + * unsigned int and 64-bit unsigned int in hexadecimal format. + */ + +#ifndef SFMT_H +#define SFMT_H + +#include +#include + +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) + #include +#elif defined(_MSC_VER) || defined(__BORLANDC__) + typedef unsigned int uint32_t; + typedef unsigned __int64 uint64_t; + #define inline __inline +#else + #include + #if defined(__GNUC__) + #define inline __inline__ + #endif +#endif + +#ifndef PRIu64 + #if defined(_MSC_VER) || defined(__BORLANDC__) + #define PRIu64 "I64u" + #define PRIx64 "I64x" + #else + #define PRIu64 "llu" + #define PRIx64 "llx" + #endif +#endif + +#if defined(__GNUC__) +#define ALWAYSINLINE __attribute__((always_inline)) +#else +#define ALWAYSINLINE +#endif + +#if defined(_MSC_VER) + #if _MSC_VER >= 1200 + #define PRE_ALWAYS __forceinline + #else + #define PRE_ALWAYS inline + #endif +#else + #define PRE_ALWAYS inline +#endif + +typedef struct sfmt_s sfmt_t; + +uint32_t gen_rand32(sfmt_t *ctx); +uint32_t gen_rand32_range(sfmt_t *ctx, uint32_t limit); +uint64_t gen_rand64(sfmt_t *ctx); +uint64_t gen_rand64_range(sfmt_t *ctx, uint64_t limit); +void fill_array32(sfmt_t *ctx, uint32_t *array, int size); +void fill_array64(sfmt_t *ctx, uint64_t *array, int size); +sfmt_t *init_gen_rand(uint32_t seed); +sfmt_t *init_by_array(uint32_t *init_key, int key_length); +void fini_gen_rand(sfmt_t *ctx); +const char *get_idstring(void); +int get_min_array_size32(void); +int get_min_array_size64(void); + +#ifndef JEMALLOC_ENABLE_INLINE +double to_real1(uint32_t v); +double genrand_real1(sfmt_t *ctx); +double to_real2(uint32_t v); +double genrand_real2(sfmt_t *ctx); +double to_real3(uint32_t v); +double genrand_real3(sfmt_t *ctx); +double to_res53(uint64_t v); +double to_res53_mix(uint32_t x, uint32_t y); +double genrand_res53(sfmt_t *ctx); +double genrand_res53_mix(sfmt_t *ctx); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(SFMT_C_)) +/* These real versions are due to Isaku Wada */ +/** generates a random number on [0,1]-real-interval */ +JEMALLOC_INLINE double to_real1(uint32_t v) +{ + return v * (1.0/4294967295.0); + /* divided by 2^32-1 */ +} + +/** generates a random number on [0,1]-real-interval */ +JEMALLOC_INLINE double genrand_real1(sfmt_t *ctx) +{ + return to_real1(gen_rand32(ctx)); +} + +/** generates a random number on [0,1)-real-interval */ +JEMALLOC_INLINE double to_real2(uint32_t v) +{ + return v * (1.0/4294967296.0); + /* divided by 2^32 */ +} + +/** generates a random number on [0,1)-real-interval */ +JEMALLOC_INLINE double genrand_real2(sfmt_t *ctx) +{ + return to_real2(gen_rand32(ctx)); +} + +/** generates a random number on (0,1)-real-interval */ +JEMALLOC_INLINE double to_real3(uint32_t v) +{ + return (((double)v) + 0.5)*(1.0/4294967296.0); + /* divided by 2^32 */ +} + +/** generates a random number on (0,1)-real-interval */ +JEMALLOC_INLINE double genrand_real3(sfmt_t *ctx) +{ + return to_real3(gen_rand32(ctx)); +} +/** These real versions are due to Isaku Wada */ + +/** generates a random number on [0,1) with 53-bit resolution*/ +JEMALLOC_INLINE double to_res53(uint64_t v) +{ + return v * (1.0/18446744073709551616.0L); +} + +/** generates a random number on [0,1) with 53-bit resolution from two + * 32 bit integers */ +JEMALLOC_INLINE double to_res53_mix(uint32_t x, uint32_t y) +{ + return to_res53(x | ((uint64_t)y << 32)); +} + +/** generates a random number on [0,1) with 53-bit resolution + */ +JEMALLOC_INLINE double genrand_res53(sfmt_t *ctx) +{ + return to_res53(gen_rand64(ctx)); +} + +/** generates a random number on [0,1) with 53-bit resolution + using 32bit integer. + */ +JEMALLOC_INLINE double genrand_res53_mix(sfmt_t *ctx) +{ + uint32_t x, y; + + x = gen_rand32(ctx); + y = gen_rand32(ctx); + return to_res53_mix(x, y); +} +#endif +#endif diff --git a/test/include/test/jemalloc_test.h.in b/test/include/test/jemalloc_test.h.in index 20ccba76..026866b0 100644 --- a/test/include/test/jemalloc_test.h.in +++ b/test/include/test/jemalloc_test.h.in @@ -35,6 +35,8 @@ not_implemented(); \ } while (0) +#include "test/jemalloc_test_defs.h" + /******************************************************************************/ /* * For unit tests, expose all public and private interfaces. @@ -109,3 +111,5 @@ */ #include "test/test.h" #include "test/thread.h" +#define MEXP 19937 +#include "test/SFMT.h" diff --git a/test/include/test/jemalloc_test_defs.h.in b/test/include/test/jemalloc_test_defs.h.in new file mode 100644 index 00000000..093e2f23 --- /dev/null +++ b/test/include/test/jemalloc_test_defs.h.in @@ -0,0 +1,3 @@ +/* For use by SFMT. */ +#undef HAVE_SSE2 +#undef HAVE_ALTIVEC diff --git a/test/include/test/test.h b/test/include/test/test.h index 3376d33a..6f5e3436 100644 --- a/test/include/test/test.h +++ b/test/include/test/test.h @@ -74,6 +74,32 @@ #define assert_zu_gt(a, b, fmt...) assert_cmp(size_t, a, b, >, \ <=, "zu", fmt) +#define assert_d32_eq(a, b, fmt...) assert_cmp(int32_t, a, b, ==, \ + !=, PRId32, fmt) +#define assert_d32_ne(a, b, fmt...) assert_cmp(int32_t, a, b, !=, \ + ==, PRId32, fmt) +#define assert_d32_lt(a, b, fmt...) assert_cmp(int32_t, a, b, <, \ + >=, PRId32, fmt) +#define assert_d32_le(a, b, fmt...) assert_cmp(int32_t, a, b, <=, \ + >, PRId32, fmt) +#define assert_d32_ge(a, b, fmt...) assert_cmp(int32_t, a, b, >=, \ + <, PRId32, fmt) +#define assert_d32_gt(a, b, fmt...) assert_cmp(int32_t, a, b, >, \ + <=, PRId32, fmt) + +#define assert_u32_eq(a, b, fmt...) assert_cmp(uint32_t, a, b, ==, \ + !=, PRIu32, fmt) +#define assert_u32_ne(a, b, fmt...) assert_cmp(uint32_t, a, b, !=, \ + ==, PRIu32, fmt) +#define assert_u32_lt(a, b, fmt...) assert_cmp(uint32_t, a, b, <, \ + >=, PRIu32, fmt) +#define assert_u32_le(a, b, fmt...) assert_cmp(uint32_t, a, b, <=, \ + >, PRIu32, fmt) +#define assert_u32_ge(a, b, fmt...) assert_cmp(uint32_t, a, b, >=, \ + <, PRIu32, fmt) +#define assert_u32_gt(a, b, fmt...) assert_cmp(uint32_t, a, b, >, \ + <=, PRIu32, fmt) + #define assert_d64_eq(a, b, fmt...) assert_cmp(int64_t, a, b, ==, \ !=, PRId64, fmt) #define assert_d64_ne(a, b, fmt...) assert_cmp(int64_t, a, b, !=, \ diff --git a/test/src/SFMT.c b/test/src/SFMT.c new file mode 100644 index 00000000..cd48c3a9 --- /dev/null +++ b/test/src/SFMT.c @@ -0,0 +1,717 @@ +/* + * This file derives from SFMT 1.3.3 + * (http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/SFMT/index.html), which was + * released under the terms of the following license: + * + * Copyright (c) 2006,2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima + * University. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * * Neither the name of the Hiroshima University nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +/** + * @file SFMT.c + * @brief SIMD oriented Fast Mersenne Twister(SFMT) + * + * @author Mutsuo Saito (Hiroshima University) + * @author Makoto Matsumoto (Hiroshima University) + * + * Copyright (C) 2006,2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima + * University. All rights reserved. + * + * The new BSD License is applied to this software, see LICENSE.txt + */ +#define SFMT_C_ +#include "test/jemalloc_test.h" +#include "test/SFMT-params.h" + +#if defined(__BIG_ENDIAN__) && !defined(__amd64) && !defined(BIG_ENDIAN64) +#define BIG_ENDIAN64 1 +#endif +#if defined(HAVE_ALTIVEC) && !defined(BIG_ENDIAN64) +#define BIG_ENDIAN64 1 +#endif +#if defined(ONLY64) && !defined(BIG_ENDIAN64) + #if defined(__GNUC__) + #error "-DONLY64 must be specified with -DBIG_ENDIAN64" + #endif +#undef ONLY64 +#endif +/*------------------------------------------------------ + 128-bit SIMD data type for Altivec, SSE2 or standard C + ------------------------------------------------------*/ +#if defined(HAVE_ALTIVEC) + #if !defined(__APPLE__) + #include + #endif +/** 128-bit data structure */ +union W128_T { + vector unsigned int s; + uint32_t u[4]; +}; +/** 128-bit data type */ +typedef union W128_T w128_t; + +#elif defined(HAVE_SSE2) + #include + +/** 128-bit data structure */ +union W128_T { + __m128i si; + uint32_t u[4]; +}; +/** 128-bit data type */ +typedef union W128_T w128_t; + +#else + +/** 128-bit data structure */ +struct W128_T { + uint32_t u[4]; +}; +/** 128-bit data type */ +typedef struct W128_T w128_t; + +#endif + +struct sfmt_s { + /** the 128-bit internal state array */ + w128_t sfmt[N]; + /** index counter to the 32-bit internal state array */ + int idx; + /** a flag: it is 0 if and only if the internal state is not yet + * initialized. */ + int initialized; +}; + +/*-------------------------------------- + FILE GLOBAL VARIABLES + internal state, index counter and flag + --------------------------------------*/ + +/** a parity check vector which certificate the period of 2^{MEXP} */ +static uint32_t parity[4] = {PARITY1, PARITY2, PARITY3, PARITY4}; + +/*---------------- + STATIC FUNCTIONS + ----------------*/ +inline static int idxof(int i); +#if (!defined(HAVE_ALTIVEC)) && (!defined(HAVE_SSE2)) +inline static void rshift128(w128_t *out, w128_t const *in, int shift); +inline static void lshift128(w128_t *out, w128_t const *in, int shift); +#endif +inline static void gen_rand_all(sfmt_t *ctx); +inline static void gen_rand_array(sfmt_t *ctx, w128_t *array, int size); +inline static uint32_t func1(uint32_t x); +inline static uint32_t func2(uint32_t x); +static void period_certification(sfmt_t *ctx); +#if defined(BIG_ENDIAN64) && !defined(ONLY64) +inline static void swap(w128_t *array, int size); +#endif + +#if defined(HAVE_ALTIVEC) + #include "test/SFMT-alti.h" +#elif defined(HAVE_SSE2) + #include "test/SFMT-sse2.h" +#endif + +/** + * This function simulate a 64-bit index of LITTLE ENDIAN + * in BIG ENDIAN machine. + */ +#ifdef ONLY64 +inline static int idxof(int i) { + return i ^ 1; +} +#else +inline static int idxof(int i) { + return i; +} +#endif +/** + * This function simulates SIMD 128-bit right shift by the standard C. + * The 128-bit integer given in in is shifted by (shift * 8) bits. + * This function simulates the LITTLE ENDIAN SIMD. + * @param out the output of this function + * @param in the 128-bit data to be shifted + * @param shift the shift value + */ +#if (!defined(HAVE_ALTIVEC)) && (!defined(HAVE_SSE2)) +#ifdef ONLY64 +inline static void rshift128(w128_t *out, w128_t const *in, int shift) { + uint64_t th, tl, oh, ol; + + th = ((uint64_t)in->u[2] << 32) | ((uint64_t)in->u[3]); + tl = ((uint64_t)in->u[0] << 32) | ((uint64_t)in->u[1]); + + oh = th >> (shift * 8); + ol = tl >> (shift * 8); + ol |= th << (64 - shift * 8); + out->u[0] = (uint32_t)(ol >> 32); + out->u[1] = (uint32_t)ol; + out->u[2] = (uint32_t)(oh >> 32); + out->u[3] = (uint32_t)oh; +} +#else +inline static void rshift128(w128_t *out, w128_t const *in, int shift) { + uint64_t th, tl, oh, ol; + + th = ((uint64_t)in->u[3] << 32) | ((uint64_t)in->u[2]); + tl = ((uint64_t)in->u[1] << 32) | ((uint64_t)in->u[0]); + + oh = th >> (shift * 8); + ol = tl >> (shift * 8); + ol |= th << (64 - shift * 8); + out->u[1] = (uint32_t)(ol >> 32); + out->u[0] = (uint32_t)ol; + out->u[3] = (uint32_t)(oh >> 32); + out->u[2] = (uint32_t)oh; +} +#endif +/** + * This function simulates SIMD 128-bit left shift by the standard C. + * The 128-bit integer given in in is shifted by (shift * 8) bits. + * This function simulates the LITTLE ENDIAN SIMD. + * @param out the output of this function + * @param in the 128-bit data to be shifted + * @param shift the shift value + */ +#ifdef ONLY64 +inline static void lshift128(w128_t *out, w128_t const *in, int shift) { + uint64_t th, tl, oh, ol; + + th = ((uint64_t)in->u[2] << 32) | ((uint64_t)in->u[3]); + tl = ((uint64_t)in->u[0] << 32) | ((uint64_t)in->u[1]); + + oh = th << (shift * 8); + ol = tl << (shift * 8); + oh |= tl >> (64 - shift * 8); + out->u[0] = (uint32_t)(ol >> 32); + out->u[1] = (uint32_t)ol; + out->u[2] = (uint32_t)(oh >> 32); + out->u[3] = (uint32_t)oh; +} +#else +inline static void lshift128(w128_t *out, w128_t const *in, int shift) { + uint64_t th, tl, oh, ol; + + th = ((uint64_t)in->u[3] << 32) | ((uint64_t)in->u[2]); + tl = ((uint64_t)in->u[1] << 32) | ((uint64_t)in->u[0]); + + oh = th << (shift * 8); + ol = tl << (shift * 8); + oh |= tl >> (64 - shift * 8); + out->u[1] = (uint32_t)(ol >> 32); + out->u[0] = (uint32_t)ol; + out->u[3] = (uint32_t)(oh >> 32); + out->u[2] = (uint32_t)oh; +} +#endif +#endif + +/** + * This function represents the recursion formula. + * @param r output + * @param a a 128-bit part of the internal state array + * @param b a 128-bit part of the internal state array + * @param c a 128-bit part of the internal state array + * @param d a 128-bit part of the internal state array + */ +#if (!defined(HAVE_ALTIVEC)) && (!defined(HAVE_SSE2)) +#ifdef ONLY64 +inline static void do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *c, + w128_t *d) { + w128_t x; + w128_t y; + + lshift128(&x, a, SL2); + rshift128(&y, c, SR2); + r->u[0] = a->u[0] ^ x.u[0] ^ ((b->u[0] >> SR1) & MSK2) ^ y.u[0] + ^ (d->u[0] << SL1); + r->u[1] = a->u[1] ^ x.u[1] ^ ((b->u[1] >> SR1) & MSK1) ^ y.u[1] + ^ (d->u[1] << SL1); + r->u[2] = a->u[2] ^ x.u[2] ^ ((b->u[2] >> SR1) & MSK4) ^ y.u[2] + ^ (d->u[2] << SL1); + r->u[3] = a->u[3] ^ x.u[3] ^ ((b->u[3] >> SR1) & MSK3) ^ y.u[3] + ^ (d->u[3] << SL1); +} +#else +inline static void do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *c, + w128_t *d) { + w128_t x; + w128_t y; + + lshift128(&x, a, SL2); + rshift128(&y, c, SR2); + r->u[0] = a->u[0] ^ x.u[0] ^ ((b->u[0] >> SR1) & MSK1) ^ y.u[0] + ^ (d->u[0] << SL1); + r->u[1] = a->u[1] ^ x.u[1] ^ ((b->u[1] >> SR1) & MSK2) ^ y.u[1] + ^ (d->u[1] << SL1); + r->u[2] = a->u[2] ^ x.u[2] ^ ((b->u[2] >> SR1) & MSK3) ^ y.u[2] + ^ (d->u[2] << SL1); + r->u[3] = a->u[3] ^ x.u[3] ^ ((b->u[3] >> SR1) & MSK4) ^ y.u[3] + ^ (d->u[3] << SL1); +} +#endif +#endif + +#if (!defined(HAVE_ALTIVEC)) && (!defined(HAVE_SSE2)) +/** + * This function fills the internal state array with pseudorandom + * integers. + */ +inline static void gen_rand_all(sfmt_t *ctx) { + int i; + w128_t *r1, *r2; + + r1 = &ctx->sfmt[N - 2]; + r2 = &ctx->sfmt[N - 1]; + for (i = 0; i < N - POS1; i++) { + do_recursion(&ctx->sfmt[i], &ctx->sfmt[i], &ctx->sfmt[i + POS1], r1, + r2); + r1 = r2; + r2 = &ctx->sfmt[i]; + } + for (; i < N; i++) { + do_recursion(&ctx->sfmt[i], &ctx->sfmt[i], &ctx->sfmt[i + POS1 - N], r1, + r2); + r1 = r2; + r2 = &ctx->sfmt[i]; + } +} + +/** + * This function fills the user-specified array with pseudorandom + * integers. + * + * @param array an 128-bit array to be filled by pseudorandom numbers. + * @param size number of 128-bit pseudorandom numbers to be generated. + */ +inline static void gen_rand_array(sfmt_t *ctx, w128_t *array, int size) { + int i, j; + w128_t *r1, *r2; + + r1 = &ctx->sfmt[N - 2]; + r2 = &ctx->sfmt[N - 1]; + for (i = 0; i < N - POS1; i++) { + do_recursion(&array[i], &ctx->sfmt[i], &ctx->sfmt[i + POS1], r1, r2); + r1 = r2; + r2 = &array[i]; + } + for (; i < N; i++) { + do_recursion(&array[i], &ctx->sfmt[i], &array[i + POS1 - N], r1, r2); + r1 = r2; + r2 = &array[i]; + } + for (; i < size - N; i++) { + do_recursion(&array[i], &array[i - N], &array[i + POS1 - N], r1, r2); + r1 = r2; + r2 = &array[i]; + } + for (j = 0; j < 2 * N - size; j++) { + ctx->sfmt[j] = array[j + size - N]; + } + for (; i < size; i++, j++) { + do_recursion(&array[i], &array[i - N], &array[i + POS1 - N], r1, r2); + r1 = r2; + r2 = &array[i]; + ctx->sfmt[j] = array[i]; + } +} +#endif + +#if defined(BIG_ENDIAN64) && !defined(ONLY64) && !defined(HAVE_ALTIVEC) +inline static void swap(w128_t *array, int size) { + int i; + uint32_t x, y; + + for (i = 0; i < size; i++) { + x = array[i].u[0]; + y = array[i].u[2]; + array[i].u[0] = array[i].u[1]; + array[i].u[2] = array[i].u[3]; + array[i].u[1] = x; + array[i].u[3] = y; + } +} +#endif +/** + * This function represents a function used in the initialization + * by init_by_array + * @param x 32-bit integer + * @return 32-bit integer + */ +static uint32_t func1(uint32_t x) { + return (x ^ (x >> 27)) * (uint32_t)1664525UL; +} + +/** + * This function represents a function used in the initialization + * by init_by_array + * @param x 32-bit integer + * @return 32-bit integer + */ +static uint32_t func2(uint32_t x) { + return (x ^ (x >> 27)) * (uint32_t)1566083941UL; +} + +/** + * This function certificate the period of 2^{MEXP} + */ +static void period_certification(sfmt_t *ctx) { + int inner = 0; + int i, j; + uint32_t work; + uint32_t *psfmt32 = &ctx->sfmt[0].u[0]; + + for (i = 0; i < 4; i++) + inner ^= psfmt32[idxof(i)] & parity[i]; + for (i = 16; i > 0; i >>= 1) + inner ^= inner >> i; + inner &= 1; + /* check OK */ + if (inner == 1) { + return; + } + /* check NG, and modification */ + for (i = 0; i < 4; i++) { + work = 1; + for (j = 0; j < 32; j++) { + if ((work & parity[i]) != 0) { + psfmt32[idxof(i)] ^= work; + return; + } + work = work << 1; + } + } +} + +/*---------------- + PUBLIC FUNCTIONS + ----------------*/ +/** + * This function returns the identification string. + * The string shows the word size, the Mersenne exponent, + * and all parameters of this generator. + */ +const char *get_idstring(void) { + return IDSTR; +} + +/** + * This function returns the minimum size of array used for \b + * fill_array32() function. + * @return minimum size of array used for fill_array32() function. + */ +int get_min_array_size32(void) { + return N32; +} + +/** + * This function returns the minimum size of array used for \b + * fill_array64() function. + * @return minimum size of array used for fill_array64() function. + */ +int get_min_array_size64(void) { + return N64; +} + +#ifndef ONLY64 +/** + * This function generates and returns 32-bit pseudorandom number. + * init_gen_rand or init_by_array must be called before this function. + * @return 32-bit pseudorandom number + */ +uint32_t gen_rand32(sfmt_t *ctx) { + uint32_t r; + uint32_t *psfmt32 = &ctx->sfmt[0].u[0]; + + assert(ctx->initialized); + if (ctx->idx >= N32) { + gen_rand_all(ctx); + ctx->idx = 0; + } + r = psfmt32[ctx->idx++]; + return r; +} + +/* Generate a random integer in [0..limit). */ +uint32_t gen_rand32_range(sfmt_t *ctx, uint32_t limit) { + uint32_t ret, above; + + above = 0xffffffffU - (0xffffffffU % limit); + while (1) { + ret = gen_rand32(ctx); + if (ret < above) { + ret %= limit; + break; + } + } + return ret; +} +#endif +/** + * This function generates and returns 64-bit pseudorandom number. + * init_gen_rand or init_by_array must be called before this function. + * The function gen_rand64 should not be called after gen_rand32, + * unless an initialization is again executed. + * @return 64-bit pseudorandom number + */ +uint64_t gen_rand64(sfmt_t *ctx) { +#if defined(BIG_ENDIAN64) && !defined(ONLY64) + uint32_t r1, r2; + uint32_t *psfmt32 = &ctx->sfmt[0].u[0]; +#else + uint64_t r; + uint64_t *psfmt64 = (uint64_t *)&ctx->sfmt[0].u[0]; +#endif + + assert(ctx->initialized); + assert(ctx->idx % 2 == 0); + + if (ctx->idx >= N32) { + gen_rand_all(ctx); + ctx->idx = 0; + } +#if defined(BIG_ENDIAN64) && !defined(ONLY64) + r1 = psfmt32[ctx->idx]; + r2 = psfmt32[ctx->idx + 1]; + ctx->idx += 2; + return ((uint64_t)r2 << 32) | r1; +#else + r = psfmt64[ctx->idx / 2]; + ctx->idx += 2; + return r; +#endif +} + +/* Generate a random integer in [0..limit). */ +uint64_t gen_rand64_range(sfmt_t *ctx, uint64_t limit) { + uint64_t ret, above; + + above = 0xffffffffffffffffLLU - (0xffffffffffffffffLLU % limit); + while (1) { + ret = gen_rand64(ctx); + if (ret < above) { + ret %= limit; + break; + } + } + return ret; +} + +#ifndef ONLY64 +/** + * This function generates pseudorandom 32-bit integers in the + * specified array[] by one call. The number of pseudorandom integers + * is specified by the argument size, which must be at least 624 and a + * multiple of four. The generation by this function is much faster + * than the following gen_rand function. + * + * For initialization, init_gen_rand or init_by_array must be called + * before the first call of this function. This function can not be + * used after calling gen_rand function, without initialization. + * + * @param array an array where pseudorandom 32-bit integers are filled + * by this function. The pointer to the array must be \b "aligned" + * (namely, must be a multiple of 16) in the SIMD version, since it + * refers to the address of a 128-bit integer. In the standard C + * version, the pointer is arbitrary. + * + * @param size the number of 32-bit pseudorandom integers to be + * generated. size must be a multiple of 4, and greater than or equal + * to (MEXP / 128 + 1) * 4. + * + * @note \b memalign or \b posix_memalign is available to get aligned + * memory. Mac OSX doesn't have these functions, but \b malloc of OSX + * returns the pointer to the aligned memory block. + */ +void fill_array32(sfmt_t *ctx, uint32_t *array, int size) { + assert(ctx->initialized); + assert(ctx->idx == N32); + assert(size % 4 == 0); + assert(size >= N32); + + gen_rand_array(ctx, (w128_t *)array, size / 4); + ctx->idx = N32; +} +#endif + +/** + * This function generates pseudorandom 64-bit integers in the + * specified array[] by one call. The number of pseudorandom integers + * is specified by the argument size, which must be at least 312 and a + * multiple of two. The generation by this function is much faster + * than the following gen_rand function. + * + * For initialization, init_gen_rand or init_by_array must be called + * before the first call of this function. This function can not be + * used after calling gen_rand function, without initialization. + * + * @param array an array where pseudorandom 64-bit integers are filled + * by this function. The pointer to the array must be "aligned" + * (namely, must be a multiple of 16) in the SIMD version, since it + * refers to the address of a 128-bit integer. In the standard C + * version, the pointer is arbitrary. + * + * @param size the number of 64-bit pseudorandom integers to be + * generated. size must be a multiple of 2, and greater than or equal + * to (MEXP / 128 + 1) * 2 + * + * @note \b memalign or \b posix_memalign is available to get aligned + * memory. Mac OSX doesn't have these functions, but \b malloc of OSX + * returns the pointer to the aligned memory block. + */ +void fill_array64(sfmt_t *ctx, uint64_t *array, int size) { + assert(ctx->initialized); + assert(ctx->idx == N32); + assert(size % 2 == 0); + assert(size >= N64); + + gen_rand_array(ctx, (w128_t *)array, size / 2); + ctx->idx = N32; + +#if defined(BIG_ENDIAN64) && !defined(ONLY64) + swap((w128_t *)array, size /2); +#endif +} + +/** + * This function initializes the internal state array with a 32-bit + * integer seed. + * + * @param seed a 32-bit integer used as the seed. + */ +sfmt_t *init_gen_rand(uint32_t seed) { + sfmt_t *ctx; + int i; + uint32_t *psfmt32; + + if (posix_memalign((void **)&ctx, sizeof(w128_t), sizeof(sfmt_t)) != 0) { + return NULL; + } + psfmt32 = &ctx->sfmt[0].u[0]; + + psfmt32[idxof(0)] = seed; + for (i = 1; i < N32; i++) { + psfmt32[idxof(i)] = 1812433253UL * (psfmt32[idxof(i - 1)] + ^ (psfmt32[idxof(i - 1)] >> 30)) + + i; + } + ctx->idx = N32; + period_certification(ctx); + ctx->initialized = 1; + + return ctx; +} + +/** + * This function initializes the internal state array, + * with an array of 32-bit integers used as the seeds + * @param init_key the array of 32-bit integers, used as a seed. + * @param key_length the length of init_key. + */ +sfmt_t *init_by_array(uint32_t *init_key, int key_length) { + sfmt_t *ctx; + int i, j, count; + uint32_t r; + int lag; + int mid; + int size = N * 4; + uint32_t *psfmt32; + + if (posix_memalign((void **)&ctx, sizeof(w128_t), sizeof(sfmt_t)) != 0) { + return NULL; + } + psfmt32 = &ctx->sfmt[0].u[0]; + + if (size >= 623) { + lag = 11; + } else if (size >= 68) { + lag = 7; + } else if (size >= 39) { + lag = 5; + } else { + lag = 3; + } + mid = (size - lag) / 2; + + memset(ctx->sfmt, 0x8b, sizeof(ctx->sfmt)); + if (key_length + 1 > N32) { + count = key_length + 1; + } else { + count = N32; + } + r = func1(psfmt32[idxof(0)] ^ psfmt32[idxof(mid)] + ^ psfmt32[idxof(N32 - 1)]); + psfmt32[idxof(mid)] += r; + r += key_length; + psfmt32[idxof(mid + lag)] += r; + psfmt32[idxof(0)] = r; + + count--; + for (i = 1, j = 0; (j < count) && (j < key_length); j++) { + r = func1(psfmt32[idxof(i)] ^ psfmt32[idxof((i + mid) % N32)] + ^ psfmt32[idxof((i + N32 - 1) % N32)]); + psfmt32[idxof((i + mid) % N32)] += r; + r += init_key[j] + i; + psfmt32[idxof((i + mid + lag) % N32)] += r; + psfmt32[idxof(i)] = r; + i = (i + 1) % N32; + } + for (; j < count; j++) { + r = func1(psfmt32[idxof(i)] ^ psfmt32[idxof((i + mid) % N32)] + ^ psfmt32[idxof((i + N32 - 1) % N32)]); + psfmt32[idxof((i + mid) % N32)] += r; + r += i; + psfmt32[idxof((i + mid + lag) % N32)] += r; + psfmt32[idxof(i)] = r; + i = (i + 1) % N32; + } + for (j = 0; j < N32; j++) { + r = func2(psfmt32[idxof(i)] + psfmt32[idxof((i + mid) % N32)] + + psfmt32[idxof((i + N32 - 1) % N32)]); + psfmt32[idxof((i + mid) % N32)] ^= r; + r -= i; + psfmt32[idxof((i + mid + lag) % N32)] ^= r; + psfmt32[idxof(i)] = r; + i = (i + 1) % N32; + } + + ctx->idx = N32; + period_certification(ctx); + ctx->initialized = 1; + + return ctx; +} + +void fini_gen_rand(sfmt_t *ctx) { + assert(ctx != NULL); + + ctx->initialized = 0; + free(ctx); +} diff --git a/test/unit/SFMT.c b/test/unit/SFMT.c new file mode 100644 index 00000000..a4759376 --- /dev/null +++ b/test/unit/SFMT.c @@ -0,0 +1,1608 @@ +/* + * This file derives from SFMT 1.3.3 + * (http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/SFMT/index.html), which was + * released under the terms of the following license: + * + * Copyright (c) 2006,2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima + * University. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * * Neither the name of the Hiroshima University nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include "test/jemalloc_test.h" + +#define BLOCK_SIZE 100000 +#define BLOCK_SIZE64 50000 +#define COUNT 1000 + +static const uint32_t init_gen_rand_32_expected[] = { + 3440181298U, 1564997079U, 1510669302U, 2930277156U, 1452439940U, + 3796268453U, 423124208U, 2143818589U, 3827219408U, 2987036003U, + 2674978610U, 1536842514U, 2027035537U, 2534897563U, 1686527725U, + 545368292U, 1489013321U, 1370534252U, 4231012796U, 3994803019U, + 1764869045U, 824597505U, 862581900U, 2469764249U, 812862514U, + 359318673U, 116957936U, 3367389672U, 2327178354U, 1898245200U, + 3206507879U, 2378925033U, 1040214787U, 2524778605U, 3088428700U, + 1417665896U, 964324147U, 2282797708U, 2456269299U, 313400376U, + 2245093271U, 1015729427U, 2694465011U, 3246975184U, 1992793635U, + 463679346U, 3721104591U, 3475064196U, 856141236U, 1499559719U, + 3522818941U, 3721533109U, 1954826617U, 1282044024U, 1543279136U, + 1301863085U, 2669145051U, 4221477354U, 3896016841U, 3392740262U, + 462466863U, 1037679449U, 1228140306U, 922298197U, 1205109853U, + 1872938061U, 3102547608U, 2742766808U, 1888626088U, 4028039414U, + 157593879U, 1136901695U, 4038377686U, 3572517236U, 4231706728U, + 2997311961U, 1189931652U, 3981543765U, 2826166703U, 87159245U, + 1721379072U, 3897926942U, 1790395498U, 2569178939U, 1047368729U, + 2340259131U, 3144212906U, 2301169789U, 2442885464U, 3034046771U, + 3667880593U, 3935928400U, 2372805237U, 1666397115U, 2460584504U, + 513866770U, 3810869743U, 2147400037U, 2792078025U, 2941761810U, + 3212265810U, 984692259U, 346590253U, 1804179199U, 3298543443U, + 750108141U, 2880257022U, 243310542U, 1869036465U, 1588062513U, + 2983949551U, 1931450364U, 4034505847U, 2735030199U, 1628461061U, + 2539522841U, 127965585U, 3992448871U, 913388237U, 559130076U, + 1202933193U, 4087643167U, 2590021067U, 2256240196U, 1746697293U, + 1013913783U, 1155864921U, 2715773730U, 915061862U, 1948766573U, + 2322882854U, 3761119102U, 1343405684U, 3078711943U, 3067431651U, + 3245156316U, 3588354584U, 3484623306U, 3899621563U, 4156689741U, + 3237090058U, 3880063844U, 862416318U, 4039923869U, 2303788317U, + 3073590536U, 701653667U, 2131530884U, 3169309950U, 2028486980U, + 747196777U, 3620218225U, 432016035U, 1449580595U, 2772266392U, + 444224948U, 1662832057U, 3184055582U, 3028331792U, 1861686254U, + 1104864179U, 342430307U, 1350510923U, 3024656237U, 1028417492U, + 2870772950U, 290847558U, 3675663500U, 508431529U, 4264340390U, + 2263569913U, 1669302976U, 519511383U, 2706411211U, 3764615828U, + 3883162495U, 4051445305U, 2412729798U, 3299405164U, 3991911166U, + 2348767304U, 2664054906U, 3763609282U, 593943581U, 3757090046U, + 2075338894U, 2020550814U, 4287452920U, 4290140003U, 1422957317U, + 2512716667U, 2003485045U, 2307520103U, 2288472169U, 3940751663U, + 4204638664U, 2892583423U, 1710068300U, 3904755993U, 2363243951U, + 3038334120U, 547099465U, 771105860U, 3199983734U, 4282046461U, + 2298388363U, 934810218U, 2837827901U, 3952500708U, 2095130248U, + 3083335297U, 26885281U, 3932155283U, 1531751116U, 1425227133U, + 495654159U, 3279634176U, 3855562207U, 3957195338U, 4159985527U, + 893375062U, 1875515536U, 1327247422U, 3754140693U, 1028923197U, + 1729880440U, 805571298U, 448971099U, 2726757106U, 2749436461U, + 2485987104U, 175337042U, 3235477922U, 3882114302U, 2020970972U, + 943926109U, 2762587195U, 1904195558U, 3452650564U, 108432281U, + 3893463573U, 3977583081U, 2636504348U, 1110673525U, 3548479841U, + 4258854744U, 980047703U, 4057175418U, 3890008292U, 145653646U, + 3141868989U, 3293216228U, 1194331837U, 1254570642U, 3049934521U, + 2868313360U, 2886032750U, 1110873820U, 279553524U, 3007258565U, + 1104807822U, 3186961098U, 315764646U, 2163680838U, 3574508994U, + 3099755655U, 191957684U, 3642656737U, 3317946149U, 3522087636U, + 444526410U, 779157624U, 1088229627U, 1092460223U, 1856013765U, + 3659877367U, 368270451U, 503570716U, 3000984671U, 2742789647U, + 928097709U, 2914109539U, 308843566U, 2816161253U, 3667192079U, + 2762679057U, 3395240989U, 2928925038U, 1491465914U, 3458702834U, + 3787782576U, 2894104823U, 1296880455U, 1253636503U, 989959407U, + 2291560361U, 2776790436U, 1913178042U, 1584677829U, 689637520U, + 1898406878U, 688391508U, 3385234998U, 845493284U, 1943591856U, + 2720472050U, 222695101U, 1653320868U, 2904632120U, 4084936008U, + 1080720688U, 3938032556U, 387896427U, 2650839632U, 99042991U, + 1720913794U, 1047186003U, 1877048040U, 2090457659U, 517087501U, + 4172014665U, 2129713163U, 2413533132U, 2760285054U, 4129272496U, + 1317737175U, 2309566414U, 2228873332U, 3889671280U, 1110864630U, + 3576797776U, 2074552772U, 832002644U, 3097122623U, 2464859298U, + 2679603822U, 1667489885U, 3237652716U, 1478413938U, 1719340335U, + 2306631119U, 639727358U, 3369698270U, 226902796U, 2099920751U, + 1892289957U, 2201594097U, 3508197013U, 3495811856U, 3900381493U, + 841660320U, 3974501451U, 3360949056U, 1676829340U, 728899254U, + 2047809627U, 2390948962U, 670165943U, 3412951831U, 4189320049U, + 1911595255U, 2055363086U, 507170575U, 418219594U, 4141495280U, + 2692088692U, 4203630654U, 3540093932U, 791986533U, 2237921051U, + 2526864324U, 2956616642U, 1394958700U, 1983768223U, 1893373266U, + 591653646U, 228432437U, 1611046598U, 3007736357U, 1040040725U, + 2726180733U, 2789804360U, 4263568405U, 829098158U, 3847722805U, + 1123578029U, 1804276347U, 997971319U, 4203797076U, 4185199713U, + 2811733626U, 2343642194U, 2985262313U, 1417930827U, 3759587724U, + 1967077982U, 1585223204U, 1097475516U, 1903944948U, 740382444U, + 1114142065U, 1541796065U, 1718384172U, 1544076191U, 1134682254U, + 3519754455U, 2866243923U, 341865437U, 645498576U, 2690735853U, + 1046963033U, 2493178460U, 1187604696U, 1619577821U, 488503634U, + 3255768161U, 2306666149U, 1630514044U, 2377698367U, 2751503746U, + 3794467088U, 1796415981U, 3657173746U, 409136296U, 1387122342U, + 1297726519U, 219544855U, 4270285558U, 437578827U, 1444698679U, + 2258519491U, 963109892U, 3982244073U, 3351535275U, 385328496U, + 1804784013U, 698059346U, 3920535147U, 708331212U, 784338163U, + 785678147U, 1238376158U, 1557298846U, 2037809321U, 271576218U, + 4145155269U, 1913481602U, 2763691931U, 588981080U, 1201098051U, + 3717640232U, 1509206239U, 662536967U, 3180523616U, 1133105435U, + 2963500837U, 2253971215U, 3153642623U, 1066925709U, 2582781958U, + 3034720222U, 1090798544U, 2942170004U, 4036187520U, 686972531U, + 2610990302U, 2641437026U, 1837562420U, 722096247U, 1315333033U, + 2102231203U, 3402389208U, 3403698140U, 1312402831U, 2898426558U, + 814384596U, 385649582U, 1916643285U, 1924625106U, 2512905582U, + 2501170304U, 4275223366U, 2841225246U, 1467663688U, 3563567847U, + 2969208552U, 884750901U, 102992576U, 227844301U, 3681442994U, + 3502881894U, 4034693299U, 1166727018U, 1697460687U, 1737778332U, + 1787161139U, 1053003655U, 1215024478U, 2791616766U, 2525841204U, + 1629323443U, 3233815U, 2003823032U, 3083834263U, 2379264872U, + 3752392312U, 1287475550U, 3770904171U, 3004244617U, 1502117784U, + 918698423U, 2419857538U, 3864502062U, 1751322107U, 2188775056U, + 4018728324U, 983712955U, 440071928U, 3710838677U, 2001027698U, + 3994702151U, 22493119U, 3584400918U, 3446253670U, 4254789085U, + 1405447860U, 1240245579U, 1800644159U, 1661363424U, 3278326132U, + 3403623451U, 67092802U, 2609352193U, 3914150340U, 1814842761U, + 3610830847U, 591531412U, 3880232807U, 1673505890U, 2585326991U, + 1678544474U, 3148435887U, 3457217359U, 1193226330U, 2816576908U, + 154025329U, 121678860U, 1164915738U, 973873761U, 269116100U, + 52087970U, 744015362U, 498556057U, 94298882U, 1563271621U, + 2383059628U, 4197367290U, 3958472990U, 2592083636U, 2906408439U, + 1097742433U, 3924840517U, 264557272U, 2292287003U, 3203307984U, + 4047038857U, 3820609705U, 2333416067U, 1839206046U, 3600944252U, + 3412254904U, 583538222U, 2390557166U, 4140459427U, 2810357445U, + 226777499U, 2496151295U, 2207301712U, 3283683112U, 611630281U, + 1933218215U, 3315610954U, 3889441987U, 3719454256U, 3957190521U, + 1313998161U, 2365383016U, 3146941060U, 1801206260U, 796124080U, + 2076248581U, 1747472464U, 3254365145U, 595543130U, 3573909503U, + 3758250204U, 2020768540U, 2439254210U, 93368951U, 3155792250U, + 2600232980U, 3709198295U, 3894900440U, 2971850836U, 1578909644U, + 1443493395U, 2581621665U, 3086506297U, 2443465861U, 558107211U, + 1519367835U, 249149686U, 908102264U, 2588765675U, 1232743965U, + 1001330373U, 3561331654U, 2259301289U, 1564977624U, 3835077093U, + 727244906U, 4255738067U, 1214133513U, 2570786021U, 3899704621U, + 1633861986U, 1636979509U, 1438500431U, 58463278U, 2823485629U, + 2297430187U, 2926781924U, 3371352948U, 1864009023U, 2722267973U, + 1444292075U, 437703973U, 1060414512U, 189705863U, 910018135U, + 4077357964U, 884213423U, 2644986052U, 3973488374U, 1187906116U, + 2331207875U, 780463700U, 3713351662U, 3854611290U, 412805574U, + 2978462572U, 2176222820U, 829424696U, 2790788332U, 2750819108U, + 1594611657U, 3899878394U, 3032870364U, 1702887682U, 1948167778U, + 14130042U, 192292500U, 947227076U, 90719497U, 3854230320U, + 784028434U, 2142399787U, 1563449646U, 2844400217U, 819143172U, + 2883302356U, 2328055304U, 1328532246U, 2603885363U, 3375188924U, + 933941291U, 3627039714U, 2129697284U, 2167253953U, 2506905438U, + 1412424497U, 2981395985U, 1418359660U, 2925902456U, 52752784U, + 3713667988U, 3924669405U, 648975707U, 1145520213U, 4018650664U, + 3805915440U, 2380542088U, 2013260958U, 3262572197U, 2465078101U, + 1114540067U, 3728768081U, 2396958768U, 590672271U, 904818725U, + 4263660715U, 700754408U, 1042601829U, 4094111823U, 4274838909U, + 2512692617U, 2774300207U, 2057306915U, 3470942453U, 99333088U, + 1142661026U, 2889931380U, 14316674U, 2201179167U, 415289459U, + 448265759U, 3515142743U, 3254903683U, 246633281U, 1184307224U, + 2418347830U, 2092967314U, 2682072314U, 2558750234U, 2000352263U, + 1544150531U, 399010405U, 1513946097U, 499682937U, 461167460U, + 3045570638U, 1633669705U, 851492362U, 4052801922U, 2055266765U, + 635556996U, 368266356U, 2385737383U, 3218202352U, 2603772408U, + 349178792U, 226482567U, 3102426060U, 3575998268U, 2103001871U, + 3243137071U, 225500688U, 1634718593U, 4283311431U, 4292122923U, + 3842802787U, 811735523U, 105712518U, 663434053U, 1855889273U, + 2847972595U, 1196355421U, 2552150115U, 4254510614U, 3752181265U, + 3430721819U, 3828705396U, 3436287905U, 3441964937U, 4123670631U, + 353001539U, 459496439U, 3799690868U, 1293777660U, 2761079737U, + 498096339U, 3398433374U, 4080378380U, 2304691596U, 2995729055U, + 4134660419U, 3903444024U, 3576494993U, 203682175U, 3321164857U, + 2747963611U, 79749085U, 2992890370U, 1240278549U, 1772175713U, + 2111331972U, 2655023449U, 1683896345U, 2836027212U, 3482868021U, + 2489884874U, 756853961U, 2298874501U, 4013448667U, 4143996022U, + 2948306858U, 4132920035U, 1283299272U, 995592228U, 3450508595U, + 1027845759U, 1766942720U, 3861411826U, 1446861231U, 95974993U, + 3502263554U, 1487532194U, 601502472U, 4129619129U, 250131773U, + 2050079547U, 3198903947U, 3105589778U, 4066481316U, 3026383978U, + 2276901713U, 365637751U, 2260718426U, 1394775634U, 1791172338U, + 2690503163U, 2952737846U, 1568710462U, 732623190U, 2980358000U, + 1053631832U, 1432426951U, 3229149635U, 1854113985U, 3719733532U, + 3204031934U, 735775531U, 107468620U, 3734611984U, 631009402U, + 3083622457U, 4109580626U, 159373458U, 1301970201U, 4132389302U, + 1293255004U, 847182752U, 4170022737U, 96712900U, 2641406755U, + 1381727755U, 405608287U, 4287919625U, 1703554290U, 3589580244U, + 2911403488U, 2166565U, 2647306451U, 2330535117U, 1200815358U, + 1165916754U, 245060911U, 4040679071U, 3684908771U, 2452834126U, + 2486872773U, 2318678365U, 2940627908U, 1837837240U, 3447897409U, + 4270484676U, 1495388728U, 3754288477U, 4204167884U, 1386977705U, + 2692224733U, 3076249689U, 4109568048U, 4170955115U, 4167531356U, + 4020189950U, 4261855038U, 3036907575U, 3410399885U, 3076395737U, + 1046178638U, 144496770U, 230725846U, 3349637149U, 17065717U, + 2809932048U, 2054581785U, 3608424964U, 3259628808U, 134897388U, + 3743067463U, 257685904U, 3795656590U, 1562468719U, 3589103904U, + 3120404710U, 254684547U, 2653661580U, 3663904795U, 2631942758U, + 1063234347U, 2609732900U, 2332080715U, 3521125233U, 1180599599U, + 1935868586U, 4110970440U, 296706371U, 2128666368U, 1319875791U, + 1570900197U, 3096025483U, 1799882517U, 1928302007U, 1163707758U, + 1244491489U, 3533770203U, 567496053U, 2757924305U, 2781639343U, + 2818420107U, 560404889U, 2619609724U, 4176035430U, 2511289753U, + 2521842019U, 3910553502U, 2926149387U, 3302078172U, 4237118867U, + 330725126U, 367400677U, 888239854U, 545570454U, 4259590525U, + 134343617U, 1102169784U, 1647463719U, 3260979784U, 1518840883U, + 3631537963U, 3342671457U, 1301549147U, 2083739356U, 146593792U, + 3217959080U, 652755743U, 2032187193U, 3898758414U, 1021358093U, + 4037409230U, 2176407931U, 3427391950U, 2883553603U, 985613827U, + 3105265092U, 3423168427U, 3387507672U, 467170288U, 2141266163U, + 3723870208U, 916410914U, 1293987799U, 2652584950U, 769160137U, + 3205292896U, 1561287359U, 1684510084U, 3136055621U, 3765171391U, + 639683232U, 2639569327U, 1218546948U, 4263586685U, 3058215773U, + 2352279820U, 401870217U, 2625822463U, 1529125296U, 2981801895U, + 1191285226U, 4027725437U, 3432700217U, 4098835661U, 971182783U, + 2443861173U, 3881457123U, 3874386651U, 457276199U, 2638294160U, + 4002809368U, 421169044U, 1112642589U, 3076213779U, 3387033971U, + 2499610950U, 3057240914U, 1662679783U, 461224431U, 1168395933U +}; +static const uint32_t init_by_array_32_expected[] = { + 2920711183U, 3885745737U, 3501893680U, 856470934U, 1421864068U, + 277361036U, 1518638004U, 2328404353U, 3355513634U, 64329189U, + 1624587673U, 3508467182U, 2481792141U, 3706480799U, 1925859037U, + 2913275699U, 882658412U, 384641219U, 422202002U, 1873384891U, + 2006084383U, 3924929912U, 1636718106U, 3108838742U, 1245465724U, + 4195470535U, 779207191U, 1577721373U, 1390469554U, 2928648150U, + 121399709U, 3170839019U, 4044347501U, 953953814U, 3821710850U, + 3085591323U, 3666535579U, 3577837737U, 2012008410U, 3565417471U, + 4044408017U, 433600965U, 1637785608U, 1798509764U, 860770589U, + 3081466273U, 3982393409U, 2451928325U, 3437124742U, 4093828739U, + 3357389386U, 2154596123U, 496568176U, 2650035164U, 2472361850U, + 3438299U, 2150366101U, 1577256676U, 3802546413U, 1787774626U, + 4078331588U, 3706103141U, 170391138U, 3806085154U, 1680970100U, + 1961637521U, 3316029766U, 890610272U, 1453751581U, 1430283664U, + 3051057411U, 3597003186U, 542563954U, 3796490244U, 1690016688U, + 3448752238U, 440702173U, 347290497U, 1121336647U, 2540588620U, + 280881896U, 2495136428U, 213707396U, 15104824U, 2946180358U, + 659000016U, 566379385U, 2614030979U, 2855760170U, 334526548U, + 2315569495U, 2729518615U, 564745877U, 1263517638U, 3157185798U, + 1604852056U, 1011639885U, 2950579535U, 2524219188U, 312951012U, + 1528896652U, 1327861054U, 2846910138U, 3966855905U, 2536721582U, + 855353911U, 1685434729U, 3303978929U, 1624872055U, 4020329649U, + 3164802143U, 1642802700U, 1957727869U, 1792352426U, 3334618929U, + 2631577923U, 3027156164U, 842334259U, 3353446843U, 1226432104U, + 1742801369U, 3552852535U, 3471698828U, 1653910186U, 3380330939U, + 2313782701U, 3351007196U, 2129839995U, 1800682418U, 4085884420U, + 1625156629U, 3669701987U, 615211810U, 3294791649U, 4131143784U, + 2590843588U, 3207422808U, 3275066464U, 561592872U, 3957205738U, + 3396578098U, 48410678U, 3505556445U, 1005764855U, 3920606528U, + 2936980473U, 2378918600U, 2404449845U, 1649515163U, 701203563U, + 3705256349U, 83714199U, 3586854132U, 922978446U, 2863406304U, + 3523398907U, 2606864832U, 2385399361U, 3171757816U, 4262841009U, + 3645837721U, 1169579486U, 3666433897U, 3174689479U, 1457866976U, + 3803895110U, 3346639145U, 1907224409U, 1978473712U, 1036712794U, + 980754888U, 1302782359U, 1765252468U, 459245755U, 3728923860U, + 1512894209U, 2046491914U, 207860527U, 514188684U, 2288713615U, + 1597354672U, 3349636117U, 2357291114U, 3995796221U, 945364213U, + 1893326518U, 3770814016U, 1691552714U, 2397527410U, 967486361U, + 776416472U, 4197661421U, 951150819U, 1852770983U, 4044624181U, + 1399439738U, 4194455275U, 2284037669U, 1550734958U, 3321078108U, + 1865235926U, 2912129961U, 2664980877U, 1357572033U, 2600196436U, + 2486728200U, 2372668724U, 1567316966U, 2374111491U, 1839843570U, + 20815612U, 3727008608U, 3871996229U, 824061249U, 1932503978U, + 3404541726U, 758428924U, 2609331364U, 1223966026U, 1299179808U, + 648499352U, 2180134401U, 880821170U, 3781130950U, 113491270U, + 1032413764U, 4185884695U, 2490396037U, 1201932817U, 4060951446U, + 4165586898U, 1629813212U, 2887821158U, 415045333U, 628926856U, + 2193466079U, 3391843445U, 2227540681U, 1907099846U, 2848448395U, + 1717828221U, 1372704537U, 1707549841U, 2294058813U, 2101214437U, + 2052479531U, 1695809164U, 3176587306U, 2632770465U, 81634404U, + 1603220563U, 644238487U, 302857763U, 897352968U, 2613146653U, + 1391730149U, 4245717312U, 4191828749U, 1948492526U, 2618174230U, + 3992984522U, 2178852787U, 3596044509U, 3445573503U, 2026614616U, + 915763564U, 3415689334U, 2532153403U, 3879661562U, 2215027417U, + 3111154986U, 2929478371U, 668346391U, 1152241381U, 2632029711U, + 3004150659U, 2135025926U, 948690501U, 2799119116U, 4228829406U, + 1981197489U, 4209064138U, 684318751U, 3459397845U, 201790843U, + 4022541136U, 3043635877U, 492509624U, 3263466772U, 1509148086U, + 921459029U, 3198857146U, 705479721U, 3835966910U, 3603356465U, + 576159741U, 1742849431U, 594214882U, 2055294343U, 3634861861U, + 449571793U, 3246390646U, 3868232151U, 1479156585U, 2900125656U, + 2464815318U, 3960178104U, 1784261920U, 18311476U, 3627135050U, + 644609697U, 424968996U, 919890700U, 2986824110U, 816423214U, + 4003562844U, 1392714305U, 1757384428U, 2569030598U, 995949559U, + 3875659880U, 2933807823U, 2752536860U, 2993858466U, 4030558899U, + 2770783427U, 2775406005U, 2777781742U, 1931292655U, 472147933U, + 3865853827U, 2726470545U, 2668412860U, 2887008249U, 408979190U, + 3578063323U, 3242082049U, 1778193530U, 27981909U, 2362826515U, + 389875677U, 1043878156U, 581653903U, 3830568952U, 389535942U, + 3713523185U, 2768373359U, 2526101582U, 1998618197U, 1160859704U, + 3951172488U, 1098005003U, 906275699U, 3446228002U, 2220677963U, + 2059306445U, 132199571U, 476838790U, 1868039399U, 3097344807U, + 857300945U, 396345050U, 2835919916U, 1782168828U, 1419519470U, + 4288137521U, 819087232U, 596301494U, 872823172U, 1526888217U, + 805161465U, 1116186205U, 2829002754U, 2352620120U, 620121516U, + 354159268U, 3601949785U, 209568138U, 1352371732U, 2145977349U, + 4236871834U, 1539414078U, 3558126206U, 3224857093U, 4164166682U, + 3817553440U, 3301780278U, 2682696837U, 3734994768U, 1370950260U, + 1477421202U, 2521315749U, 1330148125U, 1261554731U, 2769143688U, + 3554756293U, 4235882678U, 3254686059U, 3530579953U, 1215452615U, + 3574970923U, 4057131421U, 589224178U, 1000098193U, 171190718U, + 2521852045U, 2351447494U, 2284441580U, 2646685513U, 3486933563U, + 3789864960U, 1190528160U, 1702536782U, 1534105589U, 4262946827U, + 2726686826U, 3584544841U, 2348270128U, 2145092281U, 2502718509U, + 1027832411U, 3571171153U, 1287361161U, 4011474411U, 3241215351U, + 2419700818U, 971242709U, 1361975763U, 1096842482U, 3271045537U, + 81165449U, 612438025U, 3912966678U, 1356929810U, 733545735U, + 537003843U, 1282953084U, 884458241U, 588930090U, 3930269801U, + 2961472450U, 1219535534U, 3632251943U, 268183903U, 1441240533U, + 3653903360U, 3854473319U, 2259087390U, 2548293048U, 2022641195U, + 2105543911U, 1764085217U, 3246183186U, 482438805U, 888317895U, + 2628314765U, 2466219854U, 717546004U, 2322237039U, 416725234U, + 1544049923U, 1797944973U, 3398652364U, 3111909456U, 485742908U, + 2277491072U, 1056355088U, 3181001278U, 129695079U, 2693624550U, + 1764438564U, 3797785470U, 195503713U, 3266519725U, 2053389444U, + 1961527818U, 3400226523U, 3777903038U, 2597274307U, 4235851091U, + 4094406648U, 2171410785U, 1781151386U, 1378577117U, 654643266U, + 3424024173U, 3385813322U, 679385799U, 479380913U, 681715441U, + 3096225905U, 276813409U, 3854398070U, 2721105350U, 831263315U, + 3276280337U, 2628301522U, 3984868494U, 1466099834U, 2104922114U, + 1412672743U, 820330404U, 3491501010U, 942735832U, 710652807U, + 3972652090U, 679881088U, 40577009U, 3705286397U, 2815423480U, + 3566262429U, 663396513U, 3777887429U, 4016670678U, 404539370U, + 1142712925U, 1140173408U, 2913248352U, 2872321286U, 263751841U, + 3175196073U, 3162557581U, 2878996619U, 75498548U, 3836833140U, + 3284664959U, 1157523805U, 112847376U, 207855609U, 1337979698U, + 1222578451U, 157107174U, 901174378U, 3883717063U, 1618632639U, + 1767889440U, 4264698824U, 1582999313U, 884471997U, 2508825098U, + 3756370771U, 2457213553U, 3565776881U, 3709583214U, 915609601U, + 460833524U, 1091049576U, 85522880U, 2553251U, 132102809U, + 2429882442U, 2562084610U, 1386507633U, 4112471229U, 21965213U, + 1981516006U, 2418435617U, 3054872091U, 4251511224U, 2025783543U, + 1916911512U, 2454491136U, 3938440891U, 3825869115U, 1121698605U, + 3463052265U, 802340101U, 1912886800U, 4031997367U, 3550640406U, + 1596096923U, 610150600U, 431464457U, 2541325046U, 486478003U, + 739704936U, 2862696430U, 3037903166U, 1129749694U, 2611481261U, + 1228993498U, 510075548U, 3424962587U, 2458689681U, 818934833U, + 4233309125U, 1608196251U, 3419476016U, 1858543939U, 2682166524U, + 3317854285U, 631986188U, 3008214764U, 613826412U, 3567358221U, + 3512343882U, 1552467474U, 3316162670U, 1275841024U, 4142173454U, + 565267881U, 768644821U, 198310105U, 2396688616U, 1837659011U, + 203429334U, 854539004U, 4235811518U, 3338304926U, 3730418692U, + 3852254981U, 3032046452U, 2329811860U, 2303590566U, 2696092212U, + 3894665932U, 145835667U, 249563655U, 1932210840U, 2431696407U, + 3312636759U, 214962629U, 2092026914U, 3020145527U, 4073039873U, + 2739105705U, 1308336752U, 855104522U, 2391715321U, 67448785U, + 547989482U, 854411802U, 3608633740U, 431731530U, 537375589U, + 3888005760U, 696099141U, 397343236U, 1864511780U, 44029739U, + 1729526891U, 1993398655U, 2010173426U, 2591546756U, 275223291U, + 1503900299U, 4217765081U, 2185635252U, 1122436015U, 3550155364U, + 681707194U, 3260479338U, 933579397U, 2983029282U, 2505504587U, + 2667410393U, 2962684490U, 4139721708U, 2658172284U, 2452602383U, + 2607631612U, 1344296217U, 3075398709U, 2949785295U, 1049956168U, + 3917185129U, 2155660174U, 3280524475U, 1503827867U, 674380765U, + 1918468193U, 3843983676U, 634358221U, 2538335643U, 1873351298U, + 3368723763U, 2129144130U, 3203528633U, 3087174986U, 2691698871U, + 2516284287U, 24437745U, 1118381474U, 2816314867U, 2448576035U, + 4281989654U, 217287825U, 165872888U, 2628995722U, 3533525116U, + 2721669106U, 872340568U, 3429930655U, 3309047304U, 3916704967U, + 3270160355U, 1348884255U, 1634797670U, 881214967U, 4259633554U, + 174613027U, 1103974314U, 1625224232U, 2678368291U, 1133866707U, + 3853082619U, 4073196549U, 1189620777U, 637238656U, 930241537U, + 4042750792U, 3842136042U, 2417007212U, 2524907510U, 1243036827U, + 1282059441U, 3764588774U, 1394459615U, 2323620015U, 1166152231U, + 3307479609U, 3849322257U, 3507445699U, 4247696636U, 758393720U, + 967665141U, 1095244571U, 1319812152U, 407678762U, 2640605208U, + 2170766134U, 3663594275U, 4039329364U, 2512175520U, 725523154U, + 2249807004U, 3312617979U, 2414634172U, 1278482215U, 349206484U, + 1573063308U, 1196429124U, 3873264116U, 2400067801U, 268795167U, + 226175489U, 2961367263U, 1968719665U, 42656370U, 1010790699U, + 561600615U, 2422453992U, 3082197735U, 1636700484U, 3977715296U, + 3125350482U, 3478021514U, 2227819446U, 1540868045U, 3061908980U, + 1087362407U, 3625200291U, 361937537U, 580441897U, 1520043666U, + 2270875402U, 1009161260U, 2502355842U, 4278769785U, 473902412U, + 1057239083U, 1905829039U, 1483781177U, 2080011417U, 1207494246U, + 1806991954U, 2194674403U, 3455972205U, 807207678U, 3655655687U, + 674112918U, 195425752U, 3917890095U, 1874364234U, 1837892715U, + 3663478166U, 1548892014U, 2570748714U, 2049929836U, 2167029704U, + 697543767U, 3499545023U, 3342496315U, 1725251190U, 3561387469U, + 2905606616U, 1580182447U, 3934525927U, 4103172792U, 1365672522U, + 1534795737U, 3308667416U, 2841911405U, 3943182730U, 4072020313U, + 3494770452U, 3332626671U, 55327267U, 478030603U, 411080625U, + 3419529010U, 1604767823U, 3513468014U, 570668510U, 913790824U, + 2283967995U, 695159462U, 3825542932U, 4150698144U, 1829758699U, + 202895590U, 1609122645U, 1267651008U, 2910315509U, 2511475445U, + 2477423819U, 3932081579U, 900879979U, 2145588390U, 2670007504U, + 580819444U, 1864996828U, 2526325979U, 1019124258U, 815508628U, + 2765933989U, 1277301341U, 3006021786U, 855540956U, 288025710U, + 1919594237U, 2331223864U, 177452412U, 2475870369U, 2689291749U, + 865194284U, 253432152U, 2628531804U, 2861208555U, 2361597573U, + 1653952120U, 1039661024U, 2159959078U, 3709040440U, 3564718533U, + 2596878672U, 2041442161U, 31164696U, 2662962485U, 3665637339U, + 1678115244U, 2699839832U, 3651968520U, 3521595541U, 458433303U, + 2423096824U, 21831741U, 380011703U, 2498168716U, 861806087U, + 1673574843U, 4188794405U, 2520563651U, 2632279153U, 2170465525U, + 4171949898U, 3886039621U, 1661344005U, 3424285243U, 992588372U, + 2500984144U, 2993248497U, 3590193895U, 1535327365U, 515645636U, + 131633450U, 3729760261U, 1613045101U, 3254194278U, 15889678U, + 1493590689U, 244148718U, 2991472662U, 1401629333U, 777349878U, + 2501401703U, 4285518317U, 3794656178U, 955526526U, 3442142820U, + 3970298374U, 736025417U, 2737370764U, 1271509744U, 440570731U, + 136141826U, 1596189518U, 923399175U, 257541519U, 3505774281U, + 2194358432U, 2518162991U, 1379893637U, 2667767062U, 3748146247U, + 1821712620U, 3923161384U, 1947811444U, 2392527197U, 4127419685U, + 1423694998U, 4156576871U, 1382885582U, 3420127279U, 3617499534U, + 2994377493U, 4038063986U, 1918458672U, 2983166794U, 4200449033U, + 353294540U, 1609232588U, 243926648U, 2332803291U, 507996832U, + 2392838793U, 4075145196U, 2060984340U, 4287475136U, 88232602U, + 2491531140U, 4159725633U, 2272075455U, 759298618U, 201384554U, + 838356250U, 1416268324U, 674476934U, 90795364U, 141672229U, + 3660399588U, 4196417251U, 3249270244U, 3774530247U, 59587265U, + 3683164208U, 19392575U, 1463123697U, 1882205379U, 293780489U, + 2553160622U, 2933904694U, 675638239U, 2851336944U, 1435238743U, + 2448730183U, 804436302U, 2119845972U, 322560608U, 4097732704U, + 2987802540U, 641492617U, 2575442710U, 4217822703U, 3271835300U, + 2836418300U, 3739921620U, 2138378768U, 2879771855U, 4294903423U, + 3121097946U, 2603440486U, 2560820391U, 1012930944U, 2313499967U, + 584489368U, 3431165766U, 897384869U, 2062537737U, 2847889234U, + 3742362450U, 2951174585U, 4204621084U, 1109373893U, 3668075775U, + 2750138839U, 3518055702U, 733072558U, 4169325400U, 788493625U +}; +static const uint64_t init_gen_rand_64_expected[] = { + QU(16924766246869039260LLU), QU( 8201438687333352714LLU), + QU( 2265290287015001750LLU), QU(18397264611805473832LLU), + QU( 3375255223302384358LLU), QU( 6345559975416828796LLU), + QU(18229739242790328073LLU), QU( 7596792742098800905LLU), + QU( 255338647169685981LLU), QU( 2052747240048610300LLU), + QU(18328151576097299343LLU), QU(12472905421133796567LLU), + QU(11315245349717600863LLU), QU(16594110197775871209LLU), + QU(15708751964632456450LLU), QU(10452031272054632535LLU), + QU(11097646720811454386LLU), QU( 4556090668445745441LLU), + QU(17116187693090663106LLU), QU(14931526836144510645LLU), + QU( 9190752218020552591LLU), QU( 9625800285771901401LLU), + QU(13995141077659972832LLU), QU( 5194209094927829625LLU), + QU( 4156788379151063303LLU), QU( 8523452593770139494LLU), + QU(14082382103049296727LLU), QU( 2462601863986088483LLU), + QU( 3030583461592840678LLU), QU( 5221622077872827681LLU), + QU( 3084210671228981236LLU), QU(13956758381389953823LLU), + QU(13503889856213423831LLU), QU(15696904024189836170LLU), + QU( 4612584152877036206LLU), QU( 6231135538447867881LLU), + QU(10172457294158869468LLU), QU( 6452258628466708150LLU), + QU(14044432824917330221LLU), QU( 370168364480044279LLU), + QU(10102144686427193359LLU), QU( 667870489994776076LLU), + QU( 2732271956925885858LLU), QU(18027788905977284151LLU), + QU(15009842788582923859LLU), QU( 7136357960180199542LLU), + QU(15901736243475578127LLU), QU(16951293785352615701LLU), + QU(10551492125243691632LLU), QU(17668869969146434804LLU), + QU(13646002971174390445LLU), QU( 9804471050759613248LLU), + QU( 5511670439655935493LLU), QU(18103342091070400926LLU), + QU(17224512747665137533LLU), QU(15534627482992618168LLU), + QU( 1423813266186582647LLU), QU(15821176807932930024LLU), + QU( 30323369733607156LLU), QU(11599382494723479403LLU), + QU( 653856076586810062LLU), QU( 3176437395144899659LLU), + QU(14028076268147963917LLU), QU(16156398271809666195LLU), + QU( 3166955484848201676LLU), QU( 5746805620136919390LLU), + QU(17297845208891256593LLU), QU(11691653183226428483LLU), + QU(17900026146506981577LLU), QU(15387382115755971042LLU), + QU(16923567681040845943LLU), QU( 8039057517199388606LLU), + QU(11748409241468629263LLU), QU( 794358245539076095LLU), + QU(13438501964693401242LLU), QU(14036803236515618962LLU), + QU( 5252311215205424721LLU), QU(17806589612915509081LLU), + QU( 6802767092397596006LLU), QU(14212120431184557140LLU), + QU( 1072951366761385712LLU), QU(13098491780722836296LLU), + QU( 9466676828710797353LLU), QU(12673056849042830081LLU), + QU(12763726623645357580LLU), QU(16468961652999309493LLU), + QU(15305979875636438926LLU), QU(17444713151223449734LLU), + QU( 5692214267627883674LLU), QU(13049589139196151505LLU), + QU( 880115207831670745LLU), QU( 1776529075789695498LLU), + QU(16695225897801466485LLU), QU(10666901778795346845LLU), + QU( 6164389346722833869LLU), QU( 2863817793264300475LLU), + QU( 9464049921886304754LLU), QU( 3993566636740015468LLU), + QU( 9983749692528514136LLU), QU(16375286075057755211LLU), + QU(16042643417005440820LLU), QU(11445419662923489877LLU), + QU( 7999038846885158836LLU), QU( 6721913661721511535LLU), + QU( 5363052654139357320LLU), QU( 1817788761173584205LLU), + QU(13290974386445856444LLU), QU( 4650350818937984680LLU), + QU( 8219183528102484836LLU), QU( 1569862923500819899LLU), + QU( 4189359732136641860LLU), QU(14202822961683148583LLU), + QU( 4457498315309429058LLU), QU(13089067387019074834LLU), + QU(11075517153328927293LLU), QU(10277016248336668389LLU), + QU( 7070509725324401122LLU), QU(17808892017780289380LLU), + QU(13143367339909287349LLU), QU( 1377743745360085151LLU), + QU( 5749341807421286485LLU), QU(14832814616770931325LLU), + QU( 7688820635324359492LLU), QU(10960474011539770045LLU), + QU( 81970066653179790LLU), QU(12619476072607878022LLU), + QU( 4419566616271201744LLU), QU(15147917311750568503LLU), + QU( 5549739182852706345LLU), QU( 7308198397975204770LLU), + QU(13580425496671289278LLU), QU(17070764785210130301LLU), + QU( 8202832846285604405LLU), QU( 6873046287640887249LLU), + QU( 6927424434308206114LLU), QU( 6139014645937224874LLU), + QU(10290373645978487639LLU), QU(15904261291701523804LLU), + QU( 9628743442057826883LLU), QU(18383429096255546714LLU), + QU( 4977413265753686967LLU), QU( 7714317492425012869LLU), + QU( 9025232586309926193LLU), QU(14627338359776709107LLU), + QU(14759849896467790763LLU), QU(10931129435864423252LLU), + QU( 4588456988775014359LLU), QU(10699388531797056724LLU), + QU( 468652268869238792LLU), QU( 5755943035328078086LLU), + QU( 2102437379988580216LLU), QU( 9986312786506674028LLU), + QU( 2654207180040945604LLU), QU( 8726634790559960062LLU), + QU( 100497234871808137LLU), QU( 2800137176951425819LLU), + QU( 6076627612918553487LLU), QU( 5780186919186152796LLU), + QU( 8179183595769929098LLU), QU( 6009426283716221169LLU), + QU( 2796662551397449358LLU), QU( 1756961367041986764LLU), + QU( 6972897917355606205LLU), QU(14524774345368968243LLU), + QU( 2773529684745706940LLU), QU( 4853632376213075959LLU), + QU( 4198177923731358102LLU), QU( 8271224913084139776LLU), + QU( 2741753121611092226LLU), QU(16782366145996731181LLU), + QU(15426125238972640790LLU), QU(13595497100671260342LLU), + QU( 3173531022836259898LLU), QU( 6573264560319511662LLU), + QU(18041111951511157441LLU), QU( 2351433581833135952LLU), + QU( 3113255578908173487LLU), QU( 1739371330877858784LLU), + QU(16046126562789165480LLU), QU( 8072101652214192925LLU), + QU(15267091584090664910LLU), QU( 9309579200403648940LLU), + QU( 5218892439752408722LLU), QU(14492477246004337115LLU), + QU(17431037586679770619LLU), QU( 7385248135963250480LLU), + QU( 9580144956565560660LLU), QU( 4919546228040008720LLU), + QU(15261542469145035584LLU), QU(18233297270822253102LLU), + QU( 5453248417992302857LLU), QU( 9309519155931460285LLU), + QU(10342813012345291756LLU), QU(15676085186784762381LLU), + QU(15912092950691300645LLU), QU( 9371053121499003195LLU), + QU( 9897186478226866746LLU), QU(14061858287188196327LLU), + QU( 122575971620788119LLU), QU(12146750969116317754LLU), + QU( 4438317272813245201LLU), QU( 8332576791009527119LLU), + QU(13907785691786542057LLU), QU(10374194887283287467LLU), + QU( 2098798755649059566LLU), QU( 3416235197748288894LLU), + QU( 8688269957320773484LLU), QU( 7503964602397371571LLU), + QU(16724977015147478236LLU), QU( 9461512855439858184LLU), + QU(13259049744534534727LLU), QU( 3583094952542899294LLU), + QU( 8764245731305528292LLU), QU(13240823595462088985LLU), + QU(13716141617617910448LLU), QU(18114969519935960955LLU), + QU( 2297553615798302206LLU), QU( 4585521442944663362LLU), + QU(17776858680630198686LLU), QU( 4685873229192163363LLU), + QU( 152558080671135627LLU), QU(15424900540842670088LLU), + QU(13229630297130024108LLU), QU(17530268788245718717LLU), + QU(16675633913065714144LLU), QU( 3158912717897568068LLU), + QU(15399132185380087288LLU), QU( 7401418744515677872LLU), + QU(13135412922344398535LLU), QU( 6385314346100509511LLU), + QU(13962867001134161139LLU), QU(10272780155442671999LLU), + QU(12894856086597769142LLU), QU(13340877795287554994LLU), + QU(12913630602094607396LLU), QU(12543167911119793857LLU), + QU(17343570372251873096LLU), QU(10959487764494150545LLU), + QU( 6966737953093821128LLU), QU(13780699135496988601LLU), + QU( 4405070719380142046LLU), QU(14923788365607284982LLU), + QU( 2869487678905148380LLU), QU( 6416272754197188403LLU), + QU(15017380475943612591LLU), QU( 1995636220918429487LLU), + QU( 3402016804620122716LLU), QU(15800188663407057080LLU), + QU(11362369990390932882LLU), QU(15262183501637986147LLU), + QU(10239175385387371494LLU), QU( 9352042420365748334LLU), + QU( 1682457034285119875LLU), QU( 1724710651376289644LLU), + QU( 2038157098893817966LLU), QU( 9897825558324608773LLU), + QU( 1477666236519164736LLU), QU(16835397314511233640LLU), + QU(10370866327005346508LLU), QU(10157504370660621982LLU), + QU(12113904045335882069LLU), QU(13326444439742783008LLU), + QU(11302769043000765804LLU), QU(13594979923955228484LLU), + QU(11779351762613475968LLU), QU( 3786101619539298383LLU), + QU( 8021122969180846063LLU), QU(15745904401162500495LLU), + QU(10762168465993897267LLU), QU(13552058957896319026LLU), + QU(11200228655252462013LLU), QU( 5035370357337441226LLU), + QU( 7593918984545500013LLU), QU( 5418554918361528700LLU), + QU( 4858270799405446371LLU), QU( 9974659566876282544LLU), + QU(18227595922273957859LLU), QU( 2772778443635656220LLU), + QU(14285143053182085385LLU), QU( 9939700992429600469LLU), + QU(12756185904545598068LLU), QU( 2020783375367345262LLU), + QU( 57026775058331227LLU), QU( 950827867930065454LLU), + QU( 6602279670145371217LLU), QU( 2291171535443566929LLU), + QU( 5832380724425010313LLU), QU( 1220343904715982285LLU), + QU(17045542598598037633LLU), QU(15460481779702820971LLU), + QU(13948388779949365130LLU), QU(13975040175430829518LLU), + QU(17477538238425541763LLU), QU(11104663041851745725LLU), + QU(15860992957141157587LLU), QU(14529434633012950138LLU), + QU( 2504838019075394203LLU), QU( 7512113882611121886LLU), + QU( 4859973559980886617LLU), QU( 1258601555703250219LLU), + QU(15594548157514316394LLU), QU( 4516730171963773048LLU), + QU(11380103193905031983LLU), QU( 6809282239982353344LLU), + QU(18045256930420065002LLU), QU( 2453702683108791859LLU), + QU( 977214582986981460LLU), QU( 2006410402232713466LLU), + QU( 6192236267216378358LLU), QU( 3429468402195675253LLU), + QU(18146933153017348921LLU), QU(17369978576367231139LLU), + QU( 1246940717230386603LLU), QU(11335758870083327110LLU), + QU(14166488801730353682LLU), QU( 9008573127269635732LLU), + QU(10776025389820643815LLU), QU(15087605441903942962LLU), + QU( 1359542462712147922LLU), QU(13898874411226454206LLU), + QU(17911176066536804411LLU), QU( 9435590428600085274LLU), + QU( 294488509967864007LLU), QU( 8890111397567922046LLU), + QU( 7987823476034328778LLU), QU(13263827582440967651LLU), + QU( 7503774813106751573LLU), QU(14974747296185646837LLU), + QU( 8504765037032103375LLU), QU(17340303357444536213LLU), + QU( 7704610912964485743LLU), QU( 8107533670327205061LLU), + QU( 9062969835083315985LLU), QU(16968963142126734184LLU), + QU(12958041214190810180LLU), QU( 2720170147759570200LLU), + QU( 2986358963942189566LLU), QU(14884226322219356580LLU), + QU( 286224325144368520LLU), QU(11313800433154279797LLU), + QU(18366849528439673248LLU), QU(17899725929482368789LLU), + QU( 3730004284609106799LLU), QU( 1654474302052767205LLU), + QU( 5006698007047077032LLU), QU( 8196893913601182838LLU), + QU(15214541774425211640LLU), QU(17391346045606626073LLU), + QU( 8369003584076969089LLU), QU( 3939046733368550293LLU), + QU(10178639720308707785LLU), QU( 2180248669304388697LLU), + QU( 62894391300126322LLU), QU( 9205708961736223191LLU), + QU( 6837431058165360438LLU), QU( 3150743890848308214LLU), + QU(17849330658111464583LLU), QU(12214815643135450865LLU), + QU(13410713840519603402LLU), QU( 3200778126692046802LLU), + QU(13354780043041779313LLU), QU( 800850022756886036LLU), + QU(15660052933953067433LLU), QU( 6572823544154375676LLU), + QU(11030281857015819266LLU), QU(12682241941471433835LLU), + QU(11654136407300274693LLU), QU( 4517795492388641109LLU), + QU( 9757017371504524244LLU), QU(17833043400781889277LLU), + QU(12685085201747792227LLU), QU(10408057728835019573LLU), + QU( 98370418513455221LLU), QU( 6732663555696848598LLU), + QU(13248530959948529780LLU), QU( 3530441401230622826LLU), + QU(18188251992895660615LLU), QU( 1847918354186383756LLU), + QU( 1127392190402660921LLU), QU(11293734643143819463LLU), + QU( 3015506344578682982LLU), QU(13852645444071153329LLU), + QU( 2121359659091349142LLU), QU( 1294604376116677694LLU), + QU( 5616576231286352318LLU), QU( 7112502442954235625LLU), + QU(11676228199551561689LLU), QU(12925182803007305359LLU), + QU( 7852375518160493082LLU), QU( 1136513130539296154LLU), + QU( 5636923900916593195LLU), QU( 3221077517612607747LLU), + QU(17784790465798152513LLU), QU( 3554210049056995938LLU), + QU(17476839685878225874LLU), QU( 3206836372585575732LLU), + QU( 2765333945644823430LLU), QU(10080070903718799528LLU), + QU( 5412370818878286353LLU), QU( 9689685887726257728LLU), + QU( 8236117509123533998LLU), QU( 1951139137165040214LLU), + QU( 4492205209227980349LLU), QU(16541291230861602967LLU), + QU( 1424371548301437940LLU), QU( 9117562079669206794LLU), + QU(14374681563251691625LLU), QU(13873164030199921303LLU), + QU( 6680317946770936731LLU), QU(15586334026918276214LLU), + QU(10896213950976109802LLU), QU( 9506261949596413689LLU), + QU( 9903949574308040616LLU), QU( 6038397344557204470LLU), + QU( 174601465422373648LLU), QU(15946141191338238030LLU), + QU(17142225620992044937LLU), QU( 7552030283784477064LLU), + QU( 2947372384532947997LLU), QU( 510797021688197711LLU), + QU( 4962499439249363461LLU), QU( 23770320158385357LLU), + QU( 959774499105138124LLU), QU( 1468396011518788276LLU), + QU( 2015698006852312308LLU), QU( 4149400718489980136LLU), + QU( 5992916099522371188LLU), QU(10819182935265531076LLU), + QU(16189787999192351131LLU), QU( 342833961790261950LLU), + QU(12470830319550495336LLU), QU(18128495041912812501LLU), + QU( 1193600899723524337LLU), QU( 9056793666590079770LLU), + QU( 2154021227041669041LLU), QU( 4963570213951235735LLU), + QU( 4865075960209211409LLU), QU( 2097724599039942963LLU), + QU( 2024080278583179845LLU), QU(11527054549196576736LLU), + QU(10650256084182390252LLU), QU( 4808408648695766755LLU), + QU( 1642839215013788844LLU), QU(10607187948250398390LLU), + QU( 7076868166085913508LLU), QU( 730522571106887032LLU), + QU(12500579240208524895LLU), QU( 4484390097311355324LLU), + QU(15145801330700623870LLU), QU( 8055827661392944028LLU), + QU( 5865092976832712268LLU), QU(15159212508053625143LLU), + QU( 3560964582876483341LLU), QU( 4070052741344438280LLU), + QU( 6032585709886855634LLU), QU(15643262320904604873LLU), + QU( 2565119772293371111LLU), QU( 318314293065348260LLU), + QU(15047458749141511872LLU), QU( 7772788389811528730LLU), + QU( 7081187494343801976LLU), QU( 6465136009467253947LLU), + QU(10425940692543362069LLU), QU( 554608190318339115LLU), + QU(14796699860302125214LLU), QU( 1638153134431111443LLU), + QU(10336967447052276248LLU), QU( 8412308070396592958LLU), + QU( 4004557277152051226LLU), QU( 8143598997278774834LLU), + QU(16413323996508783221LLU), QU(13139418758033994949LLU), + QU( 9772709138335006667LLU), QU( 2818167159287157659LLU), + QU(17091740573832523669LLU), QU(14629199013130751608LLU), + QU(18268322711500338185LLU), QU( 8290963415675493063LLU), + QU( 8830864907452542588LLU), QU( 1614839084637494849LLU), + QU(14855358500870422231LLU), QU( 3472996748392519937LLU), + QU(15317151166268877716LLU), QU( 5825895018698400362LLU), + QU(16730208429367544129LLU), QU(10481156578141202800LLU), + QU( 4746166512382823750LLU), QU(12720876014472464998LLU), + QU( 8825177124486735972LLU), QU(13733447296837467838LLU), + QU( 6412293741681359625LLU), QU( 8313213138756135033LLU), + QU(11421481194803712517LLU), QU( 7997007691544174032LLU), + QU( 6812963847917605930LLU), QU( 9683091901227558641LLU), + QU(14703594165860324713LLU), QU( 1775476144519618309LLU), + QU( 2724283288516469519LLU), QU( 717642555185856868LLU), + QU( 8736402192215092346LLU), QU(11878800336431381021LLU), + QU( 4348816066017061293LLU), QU( 6115112756583631307LLU), + QU( 9176597239667142976LLU), QU(12615622714894259204LLU), + QU(10283406711301385987LLU), QU( 5111762509485379420LLU), + QU( 3118290051198688449LLU), QU( 7345123071632232145LLU), + QU( 9176423451688682359LLU), QU( 4843865456157868971LLU), + QU(12008036363752566088LLU), QU(12058837181919397720LLU), + QU( 2145073958457347366LLU), QU( 1526504881672818067LLU), + QU( 3488830105567134848LLU), QU(13208362960674805143LLU), + QU( 4077549672899572192LLU), QU( 7770995684693818365LLU), + QU( 1398532341546313593LLU), QU(12711859908703927840LLU), + QU( 1417561172594446813LLU), QU(17045191024194170604LLU), + QU( 4101933177604931713LLU), QU(14708428834203480320LLU), + QU(17447509264469407724LLU), QU(14314821973983434255LLU), + QU(17990472271061617265LLU), QU( 5087756685841673942LLU), + QU(12797820586893859939LLU), QU( 1778128952671092879LLU), + QU( 3535918530508665898LLU), QU( 9035729701042481301LLU), + QU(14808661568277079962LLU), QU(14587345077537747914LLU), + QU(11920080002323122708LLU), QU( 6426515805197278753LLU), + QU( 3295612216725984831LLU), QU(11040722532100876120LLU), + QU(12305952936387598754LLU), QU(16097391899742004253LLU), + QU( 4908537335606182208LLU), QU(12446674552196795504LLU), + QU(16010497855816895177LLU), QU( 9194378874788615551LLU), + QU( 3382957529567613384LLU), QU( 5154647600754974077LLU), + QU( 9801822865328396141LLU), QU( 9023662173919288143LLU), + QU(17623115353825147868LLU), QU( 8238115767443015816LLU), + QU(15811444159859002560LLU), QU( 9085612528904059661LLU), + QU( 6888601089398614254LLU), QU( 258252992894160189LLU), + QU( 6704363880792428622LLU), QU( 6114966032147235763LLU), + QU(11075393882690261875LLU), QU( 8797664238933620407LLU), + QU( 5901892006476726920LLU), QU( 5309780159285518958LLU), + QU(14940808387240817367LLU), QU(14642032021449656698LLU), + QU( 9808256672068504139LLU), QU( 3670135111380607658LLU), + QU(11211211097845960152LLU), QU( 1474304506716695808LLU), + QU(15843166204506876239LLU), QU( 7661051252471780561LLU), + QU(10170905502249418476LLU), QU( 7801416045582028589LLU), + QU( 2763981484737053050LLU), QU( 9491377905499253054LLU), + QU(16201395896336915095LLU), QU( 9256513756442782198LLU), + QU( 5411283157972456034LLU), QU( 5059433122288321676LLU), + QU( 4327408006721123357LLU), QU( 9278544078834433377LLU), + QU( 7601527110882281612LLU), QU(11848295896975505251LLU), + QU(12096998801094735560LLU), QU(14773480339823506413LLU), + QU(15586227433895802149LLU), QU(12786541257830242872LLU), + QU( 6904692985140503067LLU), QU( 5309011515263103959LLU), + QU(12105257191179371066LLU), QU(14654380212442225037LLU), + QU( 2556774974190695009LLU), QU( 4461297399927600261LLU), + QU(14888225660915118646LLU), QU(14915459341148291824LLU), + QU( 2738802166252327631LLU), QU( 6047155789239131512LLU), + QU(12920545353217010338LLU), QU(10697617257007840205LLU), + QU( 2751585253158203504LLU), QU(13252729159780047496LLU), + QU(14700326134672815469LLU), QU(14082527904374600529LLU), + QU(16852962273496542070LLU), QU(17446675504235853907LLU), + QU(15019600398527572311LLU), QU(12312781346344081551LLU), + QU(14524667935039810450LLU), QU( 5634005663377195738LLU), + QU(11375574739525000569LLU), QU( 2423665396433260040LLU), + QU( 5222836914796015410LLU), QU( 4397666386492647387LLU), + QU( 4619294441691707638LLU), QU( 665088602354770716LLU), + QU(13246495665281593610LLU), QU( 6564144270549729409LLU), + QU(10223216188145661688LLU), QU( 3961556907299230585LLU), + QU(11543262515492439914LLU), QU(16118031437285993790LLU), + QU( 7143417964520166465LLU), QU(13295053515909486772LLU), + QU( 40434666004899675LLU), QU(17127804194038347164LLU), + QU( 8599165966560586269LLU), QU( 8214016749011284903LLU), + QU(13725130352140465239LLU), QU( 5467254474431726291LLU), + QU( 7748584297438219877LLU), QU(16933551114829772472LLU), + QU( 2169618439506799400LLU), QU( 2169787627665113463LLU), + QU(17314493571267943764LLU), QU(18053575102911354912LLU), + QU(11928303275378476973LLU), QU(11593850925061715550LLU), + QU(17782269923473589362LLU), QU( 3280235307704747039LLU), + QU( 6145343578598685149LLU), QU(17080117031114086090LLU), + QU(18066839902983594755LLU), QU( 6517508430331020706LLU), + QU( 8092908893950411541LLU), QU(12558378233386153732LLU), + QU( 4476532167973132976LLU), QU(16081642430367025016LLU), + QU( 4233154094369139361LLU), QU( 8693630486693161027LLU), + QU(11244959343027742285LLU), QU(12273503967768513508LLU), + QU(14108978636385284876LLU), QU( 7242414665378826984LLU), + QU( 6561316938846562432LLU), QU( 8601038474994665795LLU), + QU(17532942353612365904LLU), QU(17940076637020912186LLU), + QU( 7340260368823171304LLU), QU( 7061807613916067905LLU), + QU(10561734935039519326LLU), QU(17990796503724650862LLU), + QU( 6208732943911827159LLU), QU( 359077562804090617LLU), + QU(14177751537784403113LLU), QU(10659599444915362902LLU), + QU(15081727220615085833LLU), QU(13417573895659757486LLU), + QU(15513842342017811524LLU), QU(11814141516204288231LLU), + QU( 1827312513875101814LLU), QU( 2804611699894603103LLU), + QU(17116500469975602763LLU), QU(12270191815211952087LLU), + QU(12256358467786024988LLU), QU(18435021722453971267LLU), + QU( 671330264390865618LLU), QU( 476504300460286050LLU), + QU(16465470901027093441LLU), QU( 4047724406247136402LLU), + QU( 1322305451411883346LLU), QU( 1388308688834322280LLU), + QU( 7303989085269758176LLU), QU( 9323792664765233642LLU), + QU( 4542762575316368936LLU), QU(17342696132794337618LLU), + QU( 4588025054768498379LLU), QU(13415475057390330804LLU), + QU(17880279491733405570LLU), QU(10610553400618620353LLU), + QU( 3180842072658960139LLU), QU(13002966655454270120LLU), + QU( 1665301181064982826LLU), QU( 7083673946791258979LLU), + QU( 190522247122496820LLU), QU(17388280237250677740LLU), + QU( 8430770379923642945LLU), QU(12987180971921668584LLU), + QU( 2311086108365390642LLU), QU( 2870984383579822345LLU), + QU(14014682609164653318LLU), QU(14467187293062251484LLU), + QU( 192186361147413298LLU), QU(15171951713531796524LLU), + QU( 9900305495015948728LLU), QU(17958004775615466344LLU), + QU(14346380954498606514LLU), QU(18040047357617407096LLU), + QU( 5035237584833424532LLU), QU(15089555460613972287LLU), + QU( 4131411873749729831LLU), QU( 1329013581168250330LLU), + QU(10095353333051193949LLU), QU(10749518561022462716LLU), + QU( 9050611429810755847LLU), QU(15022028840236655649LLU), + QU( 8775554279239748298LLU), QU(13105754025489230502LLU), + QU(15471300118574167585LLU), QU( 89864764002355628LLU), + QU( 8776416323420466637LLU), QU( 5280258630612040891LLU), + QU( 2719174488591862912LLU), QU( 7599309137399661994LLU), + QU(15012887256778039979LLU), QU(14062981725630928925LLU), + QU(12038536286991689603LLU), QU( 7089756544681775245LLU), + QU(10376661532744718039LLU), QU( 1265198725901533130LLU), + QU(13807996727081142408LLU), QU( 2935019626765036403LLU), + QU( 7651672460680700141LLU), QU( 3644093016200370795LLU), + QU( 2840982578090080674LLU), QU(17956262740157449201LLU), + QU(18267979450492880548LLU), QU(11799503659796848070LLU), + QU( 9942537025669672388LLU), QU(11886606816406990297LLU), + QU( 5488594946437447576LLU), QU( 7226714353282744302LLU), + QU( 3784851653123877043LLU), QU( 878018453244803041LLU), + QU(12110022586268616085LLU), QU( 734072179404675123LLU), + QU(11869573627998248542LLU), QU( 469150421297783998LLU), + QU( 260151124912803804LLU), QU(11639179410120968649LLU), + QU( 9318165193840846253LLU), QU(12795671722734758075LLU), + QU(15318410297267253933LLU), QU( 691524703570062620LLU), + QU( 5837129010576994601LLU), QU(15045963859726941052LLU), + QU( 5850056944932238169LLU), QU(12017434144750943807LLU), + QU( 7447139064928956574LLU), QU( 3101711812658245019LLU), + QU(16052940704474982954LLU), QU(18195745945986994042LLU), + QU( 8932252132785575659LLU), QU(13390817488106794834LLU), + QU(11582771836502517453LLU), QU( 4964411326683611686LLU), + QU( 2195093981702694011LLU), QU(14145229538389675669LLU), + QU(16459605532062271798LLU), QU( 866316924816482864LLU), + QU( 4593041209937286377LLU), QU( 8415491391910972138LLU), + QU( 4171236715600528969LLU), QU(16637569303336782889LLU), + QU( 2002011073439212680LLU), QU(17695124661097601411LLU), + QU( 4627687053598611702LLU), QU( 7895831936020190403LLU), + QU( 8455951300917267802LLU), QU( 2923861649108534854LLU), + QU( 8344557563927786255LLU), QU( 6408671940373352556LLU), + QU(12210227354536675772LLU), QU(14294804157294222295LLU), + QU(10103022425071085127LLU), QU(10092959489504123771LLU), + QU( 6554774405376736268LLU), QU(12629917718410641774LLU), + QU( 6260933257596067126LLU), QU( 2460827021439369673LLU), + QU( 2541962996717103668LLU), QU( 597377203127351475LLU), + QU( 5316984203117315309LLU), QU( 4811211393563241961LLU), + QU(13119698597255811641LLU), QU( 8048691512862388981LLU), + QU(10216818971194073842LLU), QU( 4612229970165291764LLU), + QU(10000980798419974770LLU), QU( 6877640812402540687LLU), + QU( 1488727563290436992LLU), QU( 2227774069895697318LLU), + QU(11237754507523316593LLU), QU(13478948605382290972LLU), + QU( 1963583846976858124LLU), QU( 5512309205269276457LLU), + QU( 3972770164717652347LLU), QU( 3841751276198975037LLU), + QU(10283343042181903117LLU), QU( 8564001259792872199LLU), + QU(16472187244722489221LLU), QU( 8953493499268945921LLU), + QU( 3518747340357279580LLU), QU( 4003157546223963073LLU), + QU( 3270305958289814590LLU), QU( 3966704458129482496LLU), + QU( 8122141865926661939LLU), QU(14627734748099506653LLU), + QU(13064426990862560568LLU), QU( 2414079187889870829LLU), + QU( 5378461209354225306LLU), QU(10841985740128255566LLU), + QU( 538582442885401738LLU), QU( 7535089183482905946LLU), + QU(16117559957598879095LLU), QU( 8477890721414539741LLU), + QU( 1459127491209533386LLU), QU(17035126360733620462LLU), + QU( 8517668552872379126LLU), QU(10292151468337355014LLU), + QU(17081267732745344157LLU), QU(13751455337946087178LLU), + QU(14026945459523832966LLU), QU( 6653278775061723516LLU), + QU(10619085543856390441LLU), QU( 2196343631481122885LLU), + QU(10045966074702826136LLU), QU(10082317330452718282LLU), + QU( 5920859259504831242LLU), QU( 9951879073426540617LLU), + QU( 7074696649151414158LLU), QU(15808193543879464318LLU), + QU( 7385247772746953374LLU), QU( 3192003544283864292LLU), + QU(18153684490917593847LLU), QU(12423498260668568905LLU), + QU(10957758099756378169LLU), QU(11488762179911016040LLU), + QU( 2099931186465333782LLU), QU(11180979581250294432LLU), + QU( 8098916250668367933LLU), QU( 3529200436790763465LLU), + QU(12988418908674681745LLU), QU( 6147567275954808580LLU), + QU( 3207503344604030989LLU), QU(10761592604898615360LLU), + QU( 229854861031893504LLU), QU( 8809853962667144291LLU), + QU(13957364469005693860LLU), QU( 7634287665224495886LLU), + QU(12353487366976556874LLU), QU( 1134423796317152034LLU), + QU( 2088992471334107068LLU), QU( 7393372127190799698LLU), + QU( 1845367839871058391LLU), QU( 207922563987322884LLU), + QU(11960870813159944976LLU), QU(12182120053317317363LLU), + QU(17307358132571709283LLU), QU(13871081155552824936LLU), + QU(18304446751741566262LLU), QU( 7178705220184302849LLU), + QU(10929605677758824425LLU), QU(16446976977835806844LLU), + QU(13723874412159769044LLU), QU( 6942854352100915216LLU), + QU( 1726308474365729390LLU), QU( 2150078766445323155LLU), + QU(15345558947919656626LLU), QU(12145453828874527201LLU), + QU( 2054448620739726849LLU), QU( 2740102003352628137LLU), + QU(11294462163577610655LLU), QU( 756164283387413743LLU), + QU(17841144758438810880LLU), QU(10802406021185415861LLU), + QU( 8716455530476737846LLU), QU( 6321788834517649606LLU), + QU(14681322910577468426LLU), QU(17330043563884336387LLU), + QU(12701802180050071614LLU), QU(14695105111079727151LLU), + QU( 5112098511654172830LLU), QU( 4957505496794139973LLU), + QU( 8270979451952045982LLU), QU(12307685939199120969LLU), + QU(12425799408953443032LLU), QU( 8376410143634796588LLU), + QU(16621778679680060464LLU), QU( 3580497854566660073LLU), + QU( 1122515747803382416LLU), QU( 857664980960597599LLU), + QU( 6343640119895925918LLU), QU(12878473260854462891LLU), + QU(10036813920765722626LLU), QU(14451335468363173812LLU), + QU( 5476809692401102807LLU), QU(16442255173514366342LLU), + QU(13060203194757167104LLU), QU(14354124071243177715LLU), + QU(15961249405696125227LLU), QU(13703893649690872584LLU), + QU( 363907326340340064LLU), QU( 6247455540491754842LLU), + QU(12242249332757832361LLU), QU( 156065475679796717LLU), + QU( 9351116235749732355LLU), QU( 4590350628677701405LLU), + QU( 1671195940982350389LLU), QU(13501398458898451905LLU), + QU( 6526341991225002255LLU), QU( 1689782913778157592LLU), + QU( 7439222350869010334LLU), QU(13975150263226478308LLU), + QU(11411961169932682710LLU), QU(17204271834833847277LLU), + QU( 541534742544435367LLU), QU( 6591191931218949684LLU), + QU( 2645454775478232486LLU), QU( 4322857481256485321LLU), + QU( 8477416487553065110LLU), QU(12902505428548435048LLU), + QU( 971445777981341415LLU), QU(14995104682744976712LLU), + QU( 4243341648807158063LLU), QU( 8695061252721927661LLU), + QU( 5028202003270177222LLU), QU( 2289257340915567840LLU), + QU(13870416345121866007LLU), QU(13994481698072092233LLU), + QU( 6912785400753196481LLU), QU( 2278309315841980139LLU), + QU( 4329765449648304839LLU), QU( 5963108095785485298LLU), + QU( 4880024847478722478LLU), QU(16015608779890240947LLU), + QU( 1866679034261393544LLU), QU( 914821179919731519LLU), + QU( 9643404035648760131LLU), QU( 2418114953615593915LLU), + QU( 944756836073702374LLU), QU(15186388048737296834LLU), + QU( 7723355336128442206LLU), QU( 7500747479679599691LLU), + QU(18013961306453293634LLU), QU( 2315274808095756456LLU), + QU(13655308255424029566LLU), QU(17203800273561677098LLU), + QU( 1382158694422087756LLU), QU( 5090390250309588976LLU), + QU( 517170818384213989LLU), QU( 1612709252627729621LLU), + QU( 1330118955572449606LLU), QU( 300922478056709885LLU), + QU(18115693291289091987LLU), QU(13491407109725238321LLU), + QU(15293714633593827320LLU), QU( 5151539373053314504LLU), + QU( 5951523243743139207LLU), QU(14459112015249527975LLU), + QU( 5456113959000700739LLU), QU( 3877918438464873016LLU), + QU(12534071654260163555LLU), QU(15871678376893555041LLU), + QU(11005484805712025549LLU), QU(16353066973143374252LLU), + QU( 4358331472063256685LLU), QU( 8268349332210859288LLU), + QU(12485161590939658075LLU), QU(13955993592854471343LLU), + QU( 5911446886848367039LLU), QU(14925834086813706974LLU), + QU( 6590362597857994805LLU), QU( 1280544923533661875LLU), + QU( 1637756018947988164LLU), QU( 4734090064512686329LLU), + QU(16693705263131485912LLU), QU( 6834882340494360958LLU), + QU( 8120732176159658505LLU), QU( 2244371958905329346LLU), + QU(10447499707729734021LLU), QU( 7318742361446942194LLU), + QU( 8032857516355555296LLU), QU(14023605983059313116LLU), + QU( 1032336061815461376LLU), QU( 9840995337876562612LLU), + QU( 9869256223029203587LLU), QU(12227975697177267636LLU), + QU(12728115115844186033LLU), QU( 7752058479783205470LLU), + QU( 729733219713393087LLU), QU(12954017801239007622LLU) +}; +static const uint64_t init_by_array_64_expected[] = { + QU( 2100341266307895239LLU), QU( 8344256300489757943LLU), + QU(15687933285484243894LLU), QU( 8268620370277076319LLU), + QU(12371852309826545459LLU), QU( 8800491541730110238LLU), + QU(18113268950100835773LLU), QU( 2886823658884438119LLU), + QU( 3293667307248180724LLU), QU( 9307928143300172731LLU), + QU( 7688082017574293629LLU), QU( 900986224735166665LLU), + QU( 9977972710722265039LLU), QU( 6008205004994830552LLU), + QU( 546909104521689292LLU), QU( 7428471521869107594LLU), + QU(14777563419314721179LLU), QU(16116143076567350053LLU), + QU( 5322685342003142329LLU), QU( 4200427048445863473LLU), + QU( 4693092150132559146LLU), QU(13671425863759338582LLU), + QU( 6747117460737639916LLU), QU( 4732666080236551150LLU), + QU( 5912839950611941263LLU), QU( 3903717554504704909LLU), + QU( 2615667650256786818LLU), QU(10844129913887006352LLU), + QU(13786467861810997820LLU), QU(14267853002994021570LLU), + QU(13767807302847237439LLU), QU(16407963253707224617LLU), + QU( 4802498363698583497LLU), QU( 2523802839317209764LLU), + QU( 3822579397797475589LLU), QU( 8950320572212130610LLU), + QU( 3745623504978342534LLU), QU(16092609066068482806LLU), + QU( 9817016950274642398LLU), QU(10591660660323829098LLU), + QU(11751606650792815920LLU), QU( 5122873818577122211LLU), + QU(17209553764913936624LLU), QU( 6249057709284380343LLU), + QU(15088791264695071830LLU), QU(15344673071709851930LLU), + QU( 4345751415293646084LLU), QU( 2542865750703067928LLU), + QU(13520525127852368784LLU), QU(18294188662880997241LLU), + QU( 3871781938044881523LLU), QU( 2873487268122812184LLU), + QU(15099676759482679005LLU), QU(15442599127239350490LLU), + QU( 6311893274367710888LLU), QU( 3286118760484672933LLU), + QU( 4146067961333542189LLU), QU(13303942567897208770LLU), + QU( 8196013722255630418LLU), QU( 4437815439340979989LLU), + QU(15433791533450605135LLU), QU( 4254828956815687049LLU), + QU( 1310903207708286015LLU), QU(10529182764462398549LLU), + QU(14900231311660638810LLU), QU( 9727017277104609793LLU), + QU( 1821308310948199033LLU), QU(11628861435066772084LLU), + QU( 9469019138491546924LLU), QU( 3145812670532604988LLU), + QU( 9938468915045491919LLU), QU( 1562447430672662142LLU), + QU(13963995266697989134LLU), QU( 3356884357625028695LLU), + QU( 4499850304584309747LLU), QU( 8456825817023658122LLU), + QU(10859039922814285279LLU), QU( 8099512337972526555LLU), + QU( 348006375109672149LLU), QU(11919893998241688603LLU), + QU( 1104199577402948826LLU), QU(16689191854356060289LLU), + QU(10992552041730168078LLU), QU( 7243733172705465836LLU), + QU( 5668075606180319560LLU), QU(18182847037333286970LLU), + QU( 4290215357664631322LLU), QU( 4061414220791828613LLU), + QU(13006291061652989604LLU), QU( 7140491178917128798LLU), + QU(12703446217663283481LLU), QU( 5500220597564558267LLU), + QU(10330551509971296358LLU), QU(15958554768648714492LLU), + QU( 5174555954515360045LLU), QU( 1731318837687577735LLU), + QU( 3557700801048354857LLU), QU(13764012341928616198LLU), + QU(13115166194379119043LLU), QU( 7989321021560255519LLU), + QU( 2103584280905877040LLU), QU( 9230788662155228488LLU), + QU(16396629323325547654LLU), QU( 657926409811318051LLU), + QU(15046700264391400727LLU), QU( 5120132858771880830LLU), + QU( 7934160097989028561LLU), QU( 6963121488531976245LLU), + QU(17412329602621742089LLU), QU(15144843053931774092LLU), + QU(17204176651763054532LLU), QU(13166595387554065870LLU), + QU( 8590377810513960213LLU), QU( 5834365135373991938LLU), + QU( 7640913007182226243LLU), QU( 3479394703859418425LLU), + QU(16402784452644521040LLU), QU( 4993979809687083980LLU), + QU(13254522168097688865LLU), QU(15643659095244365219LLU), + QU( 5881437660538424982LLU), QU(11174892200618987379LLU), + QU( 254409966159711077LLU), QU(17158413043140549909LLU), + QU( 3638048789290376272LLU), QU( 1376816930299489190LLU), + QU( 4622462095217761923LLU), QU(15086407973010263515LLU), + QU(13253971772784692238LLU), QU( 5270549043541649236LLU), + QU(11182714186805411604LLU), QU(12283846437495577140LLU), + QU( 5297647149908953219LLU), QU(10047451738316836654LLU), + QU( 4938228100367874746LLU), QU(12328523025304077923LLU), + QU( 3601049438595312361LLU), QU( 9313624118352733770LLU), + QU(13322966086117661798LLU), QU(16660005705644029394LLU), + QU(11337677526988872373LLU), QU(13869299102574417795LLU), + QU(15642043183045645437LLU), QU( 3021755569085880019LLU), + QU( 4979741767761188161LLU), QU(13679979092079279587LLU), + QU( 3344685842861071743LLU), QU(13947960059899588104LLU), + QU( 305806934293368007LLU), QU( 5749173929201650029LLU), + QU(11123724852118844098LLU), QU(15128987688788879802LLU), + QU(15251651211024665009LLU), QU( 7689925933816577776LLU), + QU(16732804392695859449LLU), QU(17087345401014078468LLU), + QU(14315108589159048871LLU), QU( 4820700266619778917LLU), + QU(16709637539357958441LLU), QU( 4936227875177351374LLU), + QU( 2137907697912987247LLU), QU(11628565601408395420LLU), + QU( 2333250549241556786LLU), QU( 5711200379577778637LLU), + QU( 5170680131529031729LLU), QU(12620392043061335164LLU), + QU( 95363390101096078LLU), QU( 5487981914081709462LLU), + QU( 1763109823981838620LLU), QU( 3395861271473224396LLU), + QU( 1300496844282213595LLU), QU( 6894316212820232902LLU), + QU(10673859651135576674LLU), QU( 5911839658857903252LLU), + QU(17407110743387299102LLU), QU( 8257427154623140385LLU), + QU(11389003026741800267LLU), QU( 4070043211095013717LLU), + QU(11663806997145259025LLU), QU(15265598950648798210LLU), + QU( 630585789434030934LLU), QU( 3524446529213587334LLU), + QU( 7186424168495184211LLU), QU(10806585451386379021LLU), + QU(11120017753500499273LLU), QU( 1586837651387701301LLU), + QU(17530454400954415544LLU), QU( 9991670045077880430LLU), + QU( 7550997268990730180LLU), QU( 8640249196597379304LLU), + QU( 3522203892786893823LLU), QU(10401116549878854788LLU), + QU(13690285544733124852LLU), QU( 8295785675455774586LLU), + QU(15535716172155117603LLU), QU( 3112108583723722511LLU), + QU(17633179955339271113LLU), QU(18154208056063759375LLU), + QU( 1866409236285815666LLU), QU(13326075895396412882LLU), + QU( 8756261842948020025LLU), QU( 6281852999868439131LLU), + QU(15087653361275292858LLU), QU(10333923911152949397LLU), + QU( 5265567645757408500LLU), QU(12728041843210352184LLU), + QU( 6347959327507828759LLU), QU( 154112802625564758LLU), + QU(18235228308679780218LLU), QU( 3253805274673352418LLU), + QU( 4849171610689031197LLU), QU(17948529398340432518LLU), + QU(13803510475637409167LLU), QU(13506570190409883095LLU), + QU(15870801273282960805LLU), QU( 8451286481299170773LLU), + QU( 9562190620034457541LLU), QU( 8518905387449138364LLU), + QU(12681306401363385655LLU), QU( 3788073690559762558LLU), + QU( 5256820289573487769LLU), QU( 2752021372314875467LLU), + QU( 6354035166862520716LLU), QU( 4328956378309739069LLU), + QU( 449087441228269600LLU), QU( 5533508742653090868LLU), + QU( 1260389420404746988LLU), QU(18175394473289055097LLU), + QU( 1535467109660399420LLU), QU( 8818894282874061442LLU), + QU(12140873243824811213LLU), QU(15031386653823014946LLU), + QU( 1286028221456149232LLU), QU( 6329608889367858784LLU), + QU( 9419654354945132725LLU), QU( 6094576547061672379LLU), + QU(17706217251847450255LLU), QU( 1733495073065878126LLU), + QU(16918923754607552663LLU), QU( 8881949849954945044LLU), + QU(12938977706896313891LLU), QU(14043628638299793407LLU), + QU(18393874581723718233LLU), QU( 6886318534846892044LLU), + QU(14577870878038334081LLU), QU(13541558383439414119LLU), + QU(13570472158807588273LLU), QU(18300760537910283361LLU), + QU( 818368572800609205LLU), QU( 1417000585112573219LLU), + QU(12337533143867683655LLU), QU(12433180994702314480LLU), + QU( 778190005829189083LLU), QU(13667356216206524711LLU), + QU( 9866149895295225230LLU), QU(11043240490417111999LLU), + QU( 1123933826541378598LLU), QU( 6469631933605123610LLU), + QU(14508554074431980040LLU), QU(13918931242962026714LLU), + QU( 2870785929342348285LLU), QU(14786362626740736974LLU), + QU(13176680060902695786LLU), QU( 9591778613541679456LLU), + QU( 9097662885117436706LLU), QU( 749262234240924947LLU), + QU( 1944844067793307093LLU), QU( 4339214904577487742LLU), + QU( 8009584152961946551LLU), QU(16073159501225501777LLU), + QU( 3335870590499306217LLU), QU(17088312653151202847LLU), + QU( 3108893142681931848LLU), QU(16636841767202792021LLU), + QU(10423316431118400637LLU), QU( 8008357368674443506LLU), + QU(11340015231914677875LLU), QU(17687896501594936090LLU), + QU(15173627921763199958LLU), QU( 542569482243721959LLU), + QU(15071714982769812975LLU), QU( 4466624872151386956LLU), + QU( 1901780715602332461LLU), QU( 9822227742154351098LLU), + QU( 1479332892928648780LLU), QU( 6981611948382474400LLU), + QU( 7620824924456077376LLU), QU(14095973329429406782LLU), + QU( 7902744005696185404LLU), QU(15830577219375036920LLU), + QU(10287076667317764416LLU), QU(12334872764071724025LLU), + QU( 4419302088133544331LLU), QU(14455842851266090520LLU), + QU(12488077416504654222LLU), QU( 7953892017701886766LLU), + QU( 6331484925529519007LLU), QU( 4902145853785030022LLU), + QU(17010159216096443073LLU), QU(11945354668653886087LLU), + QU(15112022728645230829LLU), QU(17363484484522986742LLU), + QU( 4423497825896692887LLU), QU( 8155489510809067471LLU), + QU( 258966605622576285LLU), QU( 5462958075742020534LLU), + QU( 6763710214913276228LLU), QU( 2368935183451109054LLU), + QU(14209506165246453811LLU), QU( 2646257040978514881LLU), + QU( 3776001911922207672LLU), QU( 1419304601390147631LLU), + QU(14987366598022458284LLU), QU( 3977770701065815721LLU), + QU( 730820417451838898LLU), QU( 3982991703612885327LLU), + QU( 2803544519671388477LLU), QU(17067667221114424649LLU), + QU( 2922555119737867166LLU), QU( 1989477584121460932LLU), + QU(15020387605892337354LLU), QU( 9293277796427533547LLU), + QU(10722181424063557247LLU), QU(16704542332047511651LLU), + QU( 5008286236142089514LLU), QU(16174732308747382540LLU), + QU(17597019485798338402LLU), QU(13081745199110622093LLU), + QU( 8850305883842258115LLU), QU(12723629125624589005LLU), + QU( 8140566453402805978LLU), QU(15356684607680935061LLU), + QU(14222190387342648650LLU), QU(11134610460665975178LLU), + QU( 1259799058620984266LLU), QU(13281656268025610041LLU), + QU( 298262561068153992LLU), QU(12277871700239212922LLU), + QU(13911297774719779438LLU), QU(16556727962761474934LLU), + QU(17903010316654728010LLU), QU( 9682617699648434744LLU), + QU(14757681836838592850LLU), QU( 1327242446558524473LLU), + QU(11126645098780572792LLU), QU( 1883602329313221774LLU), + QU( 2543897783922776873LLU), QU(15029168513767772842LLU), + QU(12710270651039129878LLU), QU(16118202956069604504LLU), + QU(15010759372168680524LLU), QU( 2296827082251923948LLU), + QU(10793729742623518101LLU), QU(13829764151845413046LLU), + QU(17769301223184451213LLU), QU( 3118268169210783372LLU), + QU(17626204544105123127LLU), QU( 7416718488974352644LLU), + QU(10450751996212925994LLU), QU( 9352529519128770586LLU), + QU( 259347569641110140LLU), QU( 8048588892269692697LLU), + QU( 1774414152306494058LLU), QU(10669548347214355622LLU), + QU(13061992253816795081LLU), QU(18432677803063861659LLU), + QU( 8879191055593984333LLU), QU(12433753195199268041LLU), + QU(14919392415439730602LLU), QU( 6612848378595332963LLU), + QU( 6320986812036143628LLU), QU(10465592420226092859LLU), + QU( 4196009278962570808LLU), QU( 3747816564473572224LLU), + QU(17941203486133732898LLU), QU( 2350310037040505198LLU), + QU( 5811779859134370113LLU), QU(10492109599506195126LLU), + QU( 7699650690179541274LLU), QU( 1954338494306022961LLU), + QU(14095816969027231152LLU), QU( 5841346919964852061LLU), + QU(14945969510148214735LLU), QU( 3680200305887550992LLU), + QU( 6218047466131695792LLU), QU( 8242165745175775096LLU), + QU(11021371934053307357LLU), QU( 1265099502753169797LLU), + QU( 4644347436111321718LLU), QU( 3609296916782832859LLU), + QU( 8109807992218521571LLU), QU(18387884215648662020LLU), + QU(14656324896296392902LLU), QU(17386819091238216751LLU), + QU(17788300878582317152LLU), QU( 7919446259742399591LLU), + QU( 4466613134576358004LLU), QU(12928181023667938509LLU), + QU(13147446154454932030LLU), QU(16552129038252734620LLU), + QU( 8395299403738822450LLU), QU(11313817655275361164LLU), + QU( 434258809499511718LLU), QU( 2074882104954788676LLU), + QU( 7929892178759395518LLU), QU( 9006461629105745388LLU), + QU( 5176475650000323086LLU), QU(11128357033468341069LLU), + QU(12026158851559118955LLU), QU(14699716249471156500LLU), + QU( 448982497120206757LLU), QU( 4156475356685519900LLU), + QU( 6063816103417215727LLU), QU(10073289387954971479LLU), + QU( 8174466846138590962LLU), QU( 2675777452363449006LLU), + QU( 9090685420572474281LLU), QU( 6659652652765562060LLU), + QU(12923120304018106621LLU), QU(11117480560334526775LLU), + QU( 937910473424587511LLU), QU( 1838692113502346645LLU), + QU(11133914074648726180LLU), QU( 7922600945143884053LLU), + QU(13435287702700959550LLU), QU( 5287964921251123332LLU), + QU(11354875374575318947LLU), QU(17955724760748238133LLU), + QU(13728617396297106512LLU), QU( 4107449660118101255LLU), + QU( 1210269794886589623LLU), QU(11408687205733456282LLU), + QU( 4538354710392677887LLU), QU(13566803319341319267LLU), + QU(17870798107734050771LLU), QU( 3354318982568089135LLU), + QU( 9034450839405133651LLU), QU(13087431795753424314LLU), + QU( 950333102820688239LLU), QU( 1968360654535604116LLU), + QU(16840551645563314995LLU), QU( 8867501803892924995LLU), + QU(11395388644490626845LLU), QU( 1529815836300732204LLU), + QU(13330848522996608842LLU), QU( 1813432878817504265LLU), + QU( 2336867432693429560LLU), QU(15192805445973385902LLU), + QU( 2528593071076407877LLU), QU( 128459777936689248LLU), + QU( 9976345382867214866LLU), QU( 6208885766767996043LLU), + QU(14982349522273141706LLU), QU( 3099654362410737822LLU), + QU(13776700761947297661LLU), QU( 8806185470684925550LLU), + QU( 8151717890410585321LLU), QU( 640860591588072925LLU), + QU(14592096303937307465LLU), QU( 9056472419613564846LLU), + QU(14861544647742266352LLU), QU(12703771500398470216LLU), + QU( 3142372800384138465LLU), QU( 6201105606917248196LLU), + QU(18337516409359270184LLU), QU(15042268695665115339LLU), + QU(15188246541383283846LLU), QU(12800028693090114519LLU), + QU( 5992859621101493472LLU), QU(18278043971816803521LLU), + QU( 9002773075219424560LLU), QU( 7325707116943598353LLU), + QU( 7930571931248040822LLU), QU( 5645275869617023448LLU), + QU( 7266107455295958487LLU), QU( 4363664528273524411LLU), + QU(14313875763787479809LLU), QU(17059695613553486802LLU), + QU( 9247761425889940932LLU), QU(13704726459237593128LLU), + QU( 2701312427328909832LLU), QU(17235532008287243115LLU), + QU(14093147761491729538LLU), QU( 6247352273768386516LLU), + QU( 8268710048153268415LLU), QU( 7985295214477182083LLU), + QU(15624495190888896807LLU), QU( 3772753430045262788LLU), + QU( 9133991620474991698LLU), QU( 5665791943316256028LLU), + QU( 7551996832462193473LLU), QU(13163729206798953877LLU), + QU( 9263532074153846374LLU), QU( 1015460703698618353LLU), + QU(17929874696989519390LLU), QU(18257884721466153847LLU), + QU(16271867543011222991LLU), QU( 3905971519021791941LLU), + QU(16814488397137052085LLU), QU( 1321197685504621613LLU), + QU( 2870359191894002181LLU), QU(14317282970323395450LLU), + QU(13663920845511074366LLU), QU( 2052463995796539594LLU), + QU(14126345686431444337LLU), QU( 1727572121947022534LLU), + QU(17793552254485594241LLU), QU( 6738857418849205750LLU), + QU( 1282987123157442952LLU), QU(16655480021581159251LLU), + QU( 6784587032080183866LLU), QU(14726758805359965162LLU), + QU( 7577995933961987349LLU), QU(12539609320311114036LLU), + QU(10789773033385439494LLU), QU( 8517001497411158227LLU), + QU(10075543932136339710LLU), QU(14838152340938811081LLU), + QU( 9560840631794044194LLU), QU(17445736541454117475LLU), + QU(10633026464336393186LLU), QU(15705729708242246293LLU), + QU( 1117517596891411098LLU), QU( 4305657943415886942LLU), + QU( 4948856840533979263LLU), QU(16071681989041789593LLU), + QU(13723031429272486527LLU), QU( 7639567622306509462LLU), + QU(12670424537483090390LLU), QU( 9715223453097197134LLU), + QU( 5457173389992686394LLU), QU( 289857129276135145LLU), + QU(17048610270521972512LLU), QU( 692768013309835485LLU), + QU(14823232360546632057LLU), QU(18218002361317895936LLU), + QU( 3281724260212650204LLU), QU(16453957266549513795LLU), + QU( 8592711109774511881LLU), QU( 929825123473369579LLU), + QU(15966784769764367791LLU), QU( 9627344291450607588LLU), + QU(10849555504977813287LLU), QU( 9234566913936339275LLU), + QU( 6413807690366911210LLU), QU(10862389016184219267LLU), + QU(13842504799335374048LLU), QU( 1531994113376881174LLU), + QU( 2081314867544364459LLU), QU(16430628791616959932LLU), + QU( 8314714038654394368LLU), QU( 9155473892098431813LLU), + QU(12577843786670475704LLU), QU( 4399161106452401017LLU), + QU( 1668083091682623186LLU), QU( 1741383777203714216LLU), + QU( 2162597285417794374LLU), QU(15841980159165218736LLU), + QU( 1971354603551467079LLU), QU( 1206714764913205968LLU), + QU( 4790860439591272330LLU), QU(14699375615594055799LLU), + QU( 8374423871657449988LLU), QU(10950685736472937738LLU), + QU( 697344331343267176LLU), QU(10084998763118059810LLU), + QU(12897369539795983124LLU), QU(12351260292144383605LLU), + QU( 1268810970176811234LLU), QU( 7406287800414582768LLU), + QU( 516169557043807831LLU), QU( 5077568278710520380LLU), + QU( 3828791738309039304LLU), QU( 7721974069946943610LLU), + QU( 3534670260981096460LLU), QU( 4865792189600584891LLU), + QU(16892578493734337298LLU), QU( 9161499464278042590LLU), + QU(11976149624067055931LLU), QU(13219479887277343990LLU), + QU(14161556738111500680LLU), QU(14670715255011223056LLU), + QU( 4671205678403576558LLU), QU(12633022931454259781LLU), + QU(14821376219869187646LLU), QU( 751181776484317028LLU), + QU( 2192211308839047070LLU), QU(11787306362361245189LLU), + QU(10672375120744095707LLU), QU( 4601972328345244467LLU), + QU(15457217788831125879LLU), QU( 8464345256775460809LLU), + QU(10191938789487159478LLU), QU( 6184348739615197613LLU), + QU(11425436778806882100LLU), QU( 2739227089124319793LLU), + QU( 461464518456000551LLU), QU( 4689850170029177442LLU), + QU( 6120307814374078625LLU), QU(11153579230681708671LLU), + QU( 7891721473905347926LLU), QU(10281646937824872400LLU), + QU( 3026099648191332248LLU), QU( 8666750296953273818LLU), + QU(14978499698844363232LLU), QU(13303395102890132065LLU), + QU( 8182358205292864080LLU), QU(10560547713972971291LLU), + QU(11981635489418959093LLU), QU( 3134621354935288409LLU), + QU(11580681977404383968LLU), QU(14205530317404088650LLU), + QU( 5997789011854923157LLU), QU(13659151593432238041LLU), + QU(11664332114338865086LLU), QU( 7490351383220929386LLU), + QU( 7189290499881530378LLU), QU(15039262734271020220LLU), + QU( 2057217285976980055LLU), QU( 555570804905355739LLU), + QU(11235311968348555110LLU), QU(13824557146269603217LLU), + QU(16906788840653099693LLU), QU( 7222878245455661677LLU), + QU( 5245139444332423756LLU), QU( 4723748462805674292LLU), + QU(12216509815698568612LLU), QU(17402362976648951187LLU), + QU(17389614836810366768LLU), QU( 4880936484146667711LLU), + QU( 9085007839292639880LLU), QU(13837353458498535449LLU), + QU(11914419854360366677LLU), QU(16595890135313864103LLU), + QU( 6313969847197627222LLU), QU(18296909792163910431LLU), + QU(10041780113382084042LLU), QU( 2499478551172884794LLU), + QU(11057894246241189489LLU), QU( 9742243032389068555LLU), + QU(12838934582673196228LLU), QU(13437023235248490367LLU), + QU(13372420669446163240LLU), QU( 6752564244716909224LLU), + QU( 7157333073400313737LLU), QU(12230281516370654308LLU), + QU( 1182884552219419117LLU), QU( 2955125381312499218LLU), + QU(10308827097079443249LLU), QU( 1337648572986534958LLU), + QU(16378788590020343939LLU), QU( 108619126514420935LLU), + QU( 3990981009621629188LLU), QU( 5460953070230946410LLU), + QU( 9703328329366531883LLU), QU(13166631489188077236LLU), + QU( 1104768831213675170LLU), QU( 3447930458553877908LLU), + QU( 8067172487769945676LLU), QU( 5445802098190775347LLU), + QU( 3244840981648973873LLU), QU(17314668322981950060LLU), + QU( 5006812527827763807LLU), QU(18158695070225526260LLU), + QU( 2824536478852417853LLU), QU(13974775809127519886LLU), + QU( 9814362769074067392LLU), QU(17276205156374862128LLU), + QU(11361680725379306967LLU), QU( 3422581970382012542LLU), + QU(11003189603753241266LLU), QU(11194292945277862261LLU), + QU( 6839623313908521348LLU), QU(11935326462707324634LLU), + QU( 1611456788685878444LLU), QU(13112620989475558907LLU), + QU( 517659108904450427LLU), QU(13558114318574407624LLU), + QU(15699089742731633077LLU), QU( 4988979278862685458LLU), + QU( 8111373583056521297LLU), QU( 3891258746615399627LLU), + QU( 8137298251469718086LLU), QU(12748663295624701649LLU), + QU( 4389835683495292062LLU), QU( 5775217872128831729LLU), + QU( 9462091896405534927LLU), QU( 8498124108820263989LLU), + QU( 8059131278842839525LLU), QU(10503167994254090892LLU), + QU(11613153541070396656LLU), QU(18069248738504647790LLU), + QU( 570657419109768508LLU), QU( 3950574167771159665LLU), + QU( 5514655599604313077LLU), QU( 2908460854428484165LLU), + QU(10777722615935663114LLU), QU(12007363304839279486LLU), + QU( 9800646187569484767LLU), QU( 8795423564889864287LLU), + QU(14257396680131028419LLU), QU( 6405465117315096498LLU), + QU( 7939411072208774878LLU), QU(17577572378528990006LLU), + QU(14785873806715994850LLU), QU(16770572680854747390LLU), + QU(18127549474419396481LLU), QU(11637013449455757750LLU), + QU(14371851933996761086LLU), QU( 3601181063650110280LLU), + QU( 4126442845019316144LLU), QU(10198287239244320669LLU), + QU(18000169628555379659LLU), QU(18392482400739978269LLU), + QU( 6219919037686919957LLU), QU( 3610085377719446052LLU), + QU( 2513925039981776336LLU), QU(16679413537926716955LLU), + QU(12903302131714909434LLU), QU( 5581145789762985009LLU), + QU(12325955044293303233LLU), QU(17216111180742141204LLU), + QU( 6321919595276545740LLU), QU( 3507521147216174501LLU), + QU( 9659194593319481840LLU), QU(11473976005975358326LLU), + QU(14742730101435987026LLU), QU( 492845897709954780LLU), + QU(16976371186162599676LLU), QU(17712703422837648655LLU), + QU( 9881254778587061697LLU), QU( 8413223156302299551LLU), + QU( 1563841828254089168LLU), QU( 9996032758786671975LLU), + QU( 138877700583772667LLU), QU(13003043368574995989LLU), + QU( 4390573668650456587LLU), QU( 8610287390568126755LLU), + QU(15126904974266642199LLU), QU( 6703637238986057662LLU), + QU( 2873075592956810157LLU), QU( 6035080933946049418LLU), + QU(13382846581202353014LLU), QU( 7303971031814642463LLU), + QU(18418024405307444267LLU), QU( 5847096731675404647LLU), + QU( 4035880699639842500LLU), QU(11525348625112218478LLU), + QU( 3041162365459574102LLU), QU( 2604734487727986558LLU), + QU(15526341771636983145LLU), QU(14556052310697370254LLU), + QU(12997787077930808155LLU), QU( 9601806501755554499LLU), + QU(11349677952521423389LLU), QU(14956777807644899350LLU), + QU(16559736957742852721LLU), QU(12360828274778140726LLU), + QU( 6685373272009662513LLU), QU(16932258748055324130LLU), + QU(15918051131954158508LLU), QU( 1692312913140790144LLU), + QU( 546653826801637367LLU), QU( 5341587076045986652LLU), + QU(14975057236342585662LLU), QU(12374976357340622412LLU), + QU(10328833995181940552LLU), QU(12831807101710443149LLU), + QU(10548514914382545716LLU), QU( 2217806727199715993LLU), + QU(12627067369242845138LLU), QU( 4598965364035438158LLU), + QU( 150923352751318171LLU), QU(14274109544442257283LLU), + QU( 4696661475093863031LLU), QU( 1505764114384654516LLU), + QU(10699185831891495147LLU), QU( 2392353847713620519LLU), + QU( 3652870166711788383LLU), QU( 8640653276221911108LLU), + QU( 3894077592275889704LLU), QU( 4918592872135964845LLU), + QU(16379121273281400789LLU), QU(12058465483591683656LLU), + QU(11250106829302924945LLU), QU( 1147537556296983005LLU), + QU( 6376342756004613268LLU), QU(14967128191709280506LLU), + QU(18007449949790627628LLU), QU( 9497178279316537841LLU), + QU( 7920174844809394893LLU), QU(10037752595255719907LLU), + QU(15875342784985217697LLU), QU(15311615921712850696LLU), + QU( 9552902652110992950LLU), QU(14054979450099721140LLU), + QU( 5998709773566417349LLU), QU(18027910339276320187LLU), + QU( 8223099053868585554LLU), QU( 7842270354824999767LLU), + QU( 4896315688770080292LLU), QU(12969320296569787895LLU), + QU( 2674321489185759961LLU), QU( 4053615936864718439LLU), + QU(11349775270588617578LLU), QU( 4743019256284553975LLU), + QU( 5602100217469723769LLU), QU(14398995691411527813LLU), + QU( 7412170493796825470LLU), QU( 836262406131744846LLU), + QU( 8231086633845153022LLU), QU( 5161377920438552287LLU), + QU( 8828731196169924949LLU), QU(16211142246465502680LLU), + QU( 3307990879253687818LLU), QU( 5193405406899782022LLU), + QU( 8510842117467566693LLU), QU( 6070955181022405365LLU), + QU(14482950231361409799LLU), QU(12585159371331138077LLU), + QU( 3511537678933588148LLU), QU( 2041849474531116417LLU), + QU(10944936685095345792LLU), QU(18303116923079107729LLU), + QU( 2720566371239725320LLU), QU( 4958672473562397622LLU), + QU( 3032326668253243412LLU), QU(13689418691726908338LLU), + QU( 1895205511728843996LLU), QU( 8146303515271990527LLU), + QU(16507343500056113480LLU), QU( 473996939105902919LLU), + QU( 9897686885246881481LLU), QU(14606433762712790575LLU), + QU( 6732796251605566368LLU), QU( 1399778120855368916LLU), + QU( 935023885182833777LLU), QU(16066282816186753477LLU), + QU( 7291270991820612055LLU), QU(17530230393129853844LLU), + QU(10223493623477451366LLU), QU(15841725630495676683LLU), + QU(17379567246435515824LLU), QU( 8588251429375561971LLU), + QU(18339511210887206423LLU), QU(17349587430725976100LLU), + QU(12244876521394838088LLU), QU( 6382187714147161259LLU), + QU(12335807181848950831LLU), QU(16948885622305460665LLU), + QU(13755097796371520506LLU), QU(14806740373324947801LLU), + QU( 4828699633859287703LLU), QU( 8209879281452301604LLU), + QU(12435716669553736437LLU), QU(13970976859588452131LLU), + QU( 6233960842566773148LLU), QU(12507096267900505759LLU), + QU( 1198713114381279421LLU), QU(14989862731124149015LLU), + QU(15932189508707978949LLU), QU( 2526406641432708722LLU), + QU( 29187427817271982LLU), QU( 1499802773054556353LLU), + QU(10816638187021897173LLU), QU( 5436139270839738132LLU), + QU( 6659882287036010082LLU), QU( 2154048955317173697LLU), + QU(10887317019333757642LLU), QU(16281091802634424955LLU), + QU(10754549879915384901LLU), QU(10760611745769249815LLU), + QU( 2161505946972504002LLU), QU( 5243132808986265107LLU), + QU(10129852179873415416LLU), QU( 710339480008649081LLU), + QU( 7802129453068808528LLU), QU(17967213567178907213LLU), + QU(15730859124668605599LLU), QU(13058356168962376502LLU), + QU( 3701224985413645909LLU), QU(14464065869149109264LLU), + QU( 9959272418844311646LLU), QU(10157426099515958752LLU), + QU(14013736814538268528LLU), QU(17797456992065653951LLU), + QU(17418878140257344806LLU), QU(15457429073540561521LLU), + QU( 2184426881360949378LLU), QU( 2062193041154712416LLU), + QU( 8553463347406931661LLU), QU( 4913057625202871854LLU), + QU( 2668943682126618425LLU), QU(17064444737891172288LLU), + QU( 4997115903913298637LLU), QU(12019402608892327416LLU), + QU(17603584559765897352LLU), QU(11367529582073647975LLU), + QU( 8211476043518436050LLU), QU( 8676849804070323674LLU), + QU(18431829230394475730LLU), QU(10490177861361247904LLU), + QU( 9508720602025651349LLU), QU( 7409627448555722700LLU), + QU( 5804047018862729008LLU), QU(11943858176893142594LLU), + QU(11908095418933847092LLU), QU( 5415449345715887652LLU), + QU( 1554022699166156407LLU), QU( 9073322106406017161LLU), + QU( 7080630967969047082LLU), QU(18049736940860732943LLU), + QU(12748714242594196794LLU), QU( 1226992415735156741LLU), + QU(17900981019609531193LLU), QU(11720739744008710999LLU), + QU( 3006400683394775434LLU), QU(11347974011751996028LLU), + QU( 3316999628257954608LLU), QU( 8384484563557639101LLU), + QU(18117794685961729767LLU), QU( 1900145025596618194LLU), + QU(17459527840632892676LLU), QU( 5634784101865710994LLU), + QU( 7918619300292897158LLU), QU( 3146577625026301350LLU), + QU( 9955212856499068767LLU), QU( 1873995843681746975LLU), + QU( 1561487759967972194LLU), QU( 8322718804375878474LLU), + QU(11300284215327028366LLU), QU( 4667391032508998982LLU), + QU( 9820104494306625580LLU), QU(17922397968599970610LLU), + QU( 1784690461886786712LLU), QU(14940365084341346821LLU), + QU( 5348719575594186181LLU), QU(10720419084507855261LLU), + QU(14210394354145143274LLU), QU( 2426468692164000131LLU), + QU(16271062114607059202LLU), QU(14851904092357070247LLU), + QU( 6524493015693121897LLU), QU( 9825473835127138531LLU), + QU(14222500616268569578LLU), QU(15521484052007487468LLU), + QU(14462579404124614699LLU), QU(11012375590820665520LLU), + QU(11625327350536084927LLU), QU(14452017765243785417LLU), + QU( 9989342263518766305LLU), QU( 3640105471101803790LLU), + QU( 4749866455897513242LLU), QU(13963064946736312044LLU), + QU(10007416591973223791LLU), QU(18314132234717431115LLU), + QU( 3286596588617483450LLU), QU( 7726163455370818765LLU), + QU( 7575454721115379328LLU), QU( 5308331576437663422LLU), + QU(18288821894903530934LLU), QU( 8028405805410554106LLU), + QU(15744019832103296628LLU), QU( 149765559630932100LLU), + QU( 6137705557200071977LLU), QU(14513416315434803615LLU), + QU(11665702820128984473LLU), QU( 218926670505601386LLU), + QU( 6868675028717769519LLU), QU(15282016569441512302LLU), + QU( 5707000497782960236LLU), QU( 6671120586555079567LLU), + QU( 2194098052618985448LLU), QU(16849577895477330978LLU), + QU(12957148471017466283LLU), QU( 1997805535404859393LLU), + QU( 1180721060263860490LLU), QU(13206391310193756958LLU), + QU(12980208674461861797LLU), QU( 3825967775058875366LLU), + QU(17543433670782042631LLU), QU( 1518339070120322730LLU), + QU(16344584340890991669LLU), QU( 2611327165318529819LLU), + QU(11265022723283422529LLU), QU( 4001552800373196817LLU), + QU(14509595890079346161LLU), QU( 3528717165416234562LLU), + QU(18153222571501914072LLU), QU( 9387182977209744425LLU), + QU(10064342315985580021LLU), QU(11373678413215253977LLU), + QU( 2308457853228798099LLU), QU( 9729042942839545302LLU), + QU( 7833785471140127746LLU), QU( 6351049900319844436LLU), + QU(14454610627133496067LLU), QU(12533175683634819111LLU), + QU(15570163926716513029LLU), QU(13356980519185762498LLU) +}; + +TEST_BEGIN(test_gen_rand_32) +{ + uint64_t array1[BLOCK_SIZE / 4][2] JEMALLOC_ATTR(aligned(16)); + uint64_t array2[10000 / 4][2] JEMALLOC_ATTR(aligned(16)); + int i; + uint32_t *array32 = (uint32_t *)array1; + uint32_t *array32_2 = (uint32_t *)array2; + uint32_t r32; + sfmt_t *ctx; + + assert_d_le(get_min_array_size32(), 10000, "Array size too small"); + ctx = init_gen_rand(1234); + fill_array32(ctx, array32, 10000); + fill_array32(ctx, array32_2, 10000); + fini_gen_rand(ctx); + + ctx = init_gen_rand(1234); + for (i = 0; i < 10000; i++) { + if (i < 1000) { + assert_u32_eq(array32[i], init_gen_rand_32_expected[i], + "Output mismatch for i=%d", i); + } + r32 = gen_rand32(ctx); + assert_u32_eq(r32, array32[i], + "Mismatch at array32[%d]=%x, gen=%x", i, array32[i], r32); + } + for (i = 0; i < 700; i++) { + r32 = gen_rand32(ctx); + assert_u32_eq(r32, array32_2[i], + "Mismatch at array32_2[%d]=%x, gen=%x", i, array32_2[i], + r32); + } + fini_gen_rand(ctx); +} +TEST_END + +TEST_BEGIN(test_by_array_32) +{ + uint64_t array1[BLOCK_SIZE / 4][2] JEMALLOC_ATTR(aligned(16)); + uint64_t array2[10000 / 4][2] JEMALLOC_ATTR(aligned(16)); + int i; + uint32_t *array32 = (uint32_t *)array1; + uint32_t *array32_2 = (uint32_t *)array2; + uint32_t ini[4] = {0x1234, 0x5678, 0x9abc, 0xdef0}; + uint32_t r32; + sfmt_t *ctx; + + assert_d_le(get_min_array_size32(), 10000, "Array size too small"); + ctx = init_by_array(ini, 4); + fill_array32(ctx, array32, 10000); + fill_array32(ctx, array32_2, 10000); + fini_gen_rand(ctx); + + ctx = init_by_array(ini, 4); + for (i = 0; i < 10000; i++) { + if (i < 1000) { + assert_u32_eq(array32[i], init_by_array_32_expected[i], + "Output mismatch for i=%d", i); + } + r32 = gen_rand32(ctx); + assert_u32_eq(r32, array32[i], + "Mismatch at array32[%d]=%x, gen=%x", i, array32[i], r32); + } + for (i = 0; i < 700; i++) { + r32 = gen_rand32(ctx); + assert_u32_eq(r32, array32_2[i], + "Mismatch at array32_2[%d]=%x, gen=%x", i, array32_2[i], + r32); + } + fini_gen_rand(ctx); +} +TEST_END + +TEST_BEGIN(test_gen_rand_64) +{ + uint64_t array1[BLOCK_SIZE / 4][2] JEMALLOC_ATTR(aligned(16)); + uint64_t array2[10000 / 4][2] JEMALLOC_ATTR(aligned(16)); + int i; + uint64_t *array64 = (uint64_t *)array1; + uint64_t *array64_2 = (uint64_t *)array2; + uint64_t r; + sfmt_t *ctx; + + assert_d_le(get_min_array_size64(), 5000, "Array size too small"); + ctx = init_gen_rand(4321); + fill_array64(ctx, array64, 5000); + fill_array64(ctx, array64_2, 5000); + fini_gen_rand(ctx); + + ctx = init_gen_rand(4321); + for (i = 0; i < 5000; i++) { + if (i < 1000) { + assert_u64_eq(array64[i], init_gen_rand_64_expected[i], + "Output mismatch for i=%d", i); + } + r = gen_rand64(ctx); + assert_u64_eq(r, array64[i], + "Mismatch at array64[%d]=%"PRIx64", gen=%"PRIx64, i, + array64[i], r); + } + for (i = 0; i < 700; i++) { + r = gen_rand64(ctx); + assert_u64_eq(r, array64_2[i], + "Mismatch at array64_2[%d]=%"PRIx64" gen=%"PRIx64"", i, + array64_2[i], r); + } + fini_gen_rand(ctx); +} +TEST_END + +TEST_BEGIN(test_by_array_64) +{ + uint64_t array1[BLOCK_SIZE / 4][2] JEMALLOC_ATTR(aligned(16)); + uint64_t array2[10000 / 4][2] JEMALLOC_ATTR(aligned(16)); + int i; + uint64_t *array64 = (uint64_t *)array1; + uint64_t *array64_2 = (uint64_t *)array2; + uint64_t r; + uint32_t ini[] = {5, 4, 3, 2, 1}; + sfmt_t *ctx; + + assert_d_le(get_min_array_size64(), 5000, "Array size too small"); + ctx = init_by_array(ini, 5); + fill_array64(ctx, array64, 5000); + fill_array64(ctx, array64_2, 5000); + fini_gen_rand(ctx); + + ctx = init_by_array(ini, 5); + for (i = 0; i < 5000; i++) { + if (i < 1000) { + assert_u64_eq(array64[i], init_by_array_64_expected[i], + "Output mismatch for i=%d"); + } + r = gen_rand64(ctx); + assert_u64_eq(r, array64[i], + "Mismatch at array64[%d]=%"PRIx64" gen=%"PRIx64, i, + array64[i], r); + } + for (i = 0; i < 700; i++) { + r = gen_rand64(ctx); + assert_u64_eq(r, array64_2[i], + "Mismatch at array64_2[%d]=%"PRIx64" gen=%"PRIx64, i, + array64_2[i], r); + } + fini_gen_rand(ctx); +} +TEST_END + +int +main(void) +{ + + return (test( + test_gen_rand_32, + test_by_array_32, + test_gen_rand_64, + test_by_array_64)); +} From b1941c615023cab9baf0a78a28df1e3b4972434f Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 9 Dec 2013 23:36:37 -0800 Subject: [PATCH 0339/3378] Add probabability distribution utility code. Add probabability distribution utility code that enables generation of random deviates drawn from normal, Chi-square, and Gamma distributions. Fix format strings in several of the assert_* macros (remove a %s). Clean up header issues; it's critical that system headers are not included after internal definitions potentially do things like: #define inline Fix the build system to incorporate header dependencies for the test library C files. --- Makefile.in | 16 +- .../internal/jemalloc_internal_macros.h | 1 + include/jemalloc/internal/prof.h | 2 +- test/include/test/SFMT.h | 26 -- test/include/test/jemalloc_test.h.in | 17 +- test/include/test/math.h | 311 ++++++++++++++ test/include/test/test.h | 8 +- test/include/test/thread.h | 3 - test/src/SFMT.c | 5 - test/src/math.c | 2 + test/unit/math.c | 388 ++++++++++++++++++ 11 files changed, 731 insertions(+), 48 deletions(-) create mode 100644 test/include/test/math.h create mode 100644 test/src/math.c create mode 100644 test/unit/math.c diff --git a/Makefile.in b/Makefile.in index 0dd54a72..78554433 100644 --- a/Makefile.in +++ b/Makefile.in @@ -103,12 +103,11 @@ DOCS_XML := $(objroot)doc/jemalloc$(install_suffix).xml DOCS_HTML := $(DOCS_XML:$(objroot)%.xml=$(srcroot)%.html) DOCS_MAN3 := $(DOCS_XML:$(objroot)%.xml=$(srcroot)%.3) DOCS := $(DOCS_HTML) $(DOCS_MAN3) -C_TESTLIB_SRCS := $(srcroot)test/src/SFMT.c $(srcroot)test/src/test.c \ - $(srcroot)test/src/thread.c +C_TESTLIB_SRCS := $(srcroot)test/src/math.c $(srcroot)test/src/SFMT.c \ + $(srcroot)test/src/test.c $(srcroot)test/src/thread.c C_UTIL_INTEGRATION_SRCS := $(srcroot)src/util.c -TESTS_UNIT := $(srcroot)test/unit/bitmap.c \ - $(srcroot)test/unit/SFMT.c \ - $(srcroot)test/unit/tsd.c +TESTS_UNIT := $(srcroot)test/unit/bitmap.c $(srcroot)test/unit/math.c \ + $(srcroot)test/unit/SFMT.c $(srcroot)test/unit/tsd.c TESTS_INTEGRATION := $(srcroot)test/integration/aligned_alloc.c \ $(srcroot)test/integration/allocated.c \ $(srcroot)test/integration/ALLOCM_ARENA.c \ @@ -166,6 +165,7 @@ ifdef CC_MM -include $(C_OBJS:%.$(O)=%.d) -include $(C_PIC_OBJS:%.$(O)=%.d) -include $(C_JET_OBJS:%.$(O)=%.d) +-include $(C_TESTLIB_OBJS:%.$(O)=%.d) -include $(TESTS_OBJS:%.$(O)=%.d) endif @@ -227,15 +227,15 @@ $(STATIC_LIBS): $(objroot)test/unit/%$(EXE): $(objroot)test/unit/%.$(O) $(C_JET_OBJS) $(C_TESTLIB_UNIT_OBJS) @mkdir -p $(@D) - $(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LDFLAGS) $(LIBS) $(EXTRA_LDFLAGS) + $(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(EXTRA_LDFLAGS) $(objroot)test/integration/%$(EXE): $(objroot)test/integration/%.$(O) $(C_TESTLIB_INTEGRATION_OBJS) $(C_UTIL_INTEGRATION_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) @mkdir -p $(@D) - $(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter -lpthread,$(LIBS)) $(EXTRA_LDFLAGS) + $(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(filter -lpthread,$(LIBS))) -lm $(EXTRA_LDFLAGS) $(objroot)test/stress/%$(EXE): $(objroot)test/stress/%.$(O) $(C_JET_OBJS) $(C_TESTLIB_STRESS_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) @mkdir -p $(@D) - $(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(LIBS) $(EXTRA_LDFLAGS) + $(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(EXTRA_LDFLAGS) build_lib_shared: $(DSOS) build_lib_static: $(STATIC_LIBS) diff --git a/include/jemalloc/internal/jemalloc_internal_macros.h b/include/jemalloc/internal/jemalloc_internal_macros.h index ebb62168..82f827da 100644 --- a/include/jemalloc/internal/jemalloc_internal_macros.h +++ b/include/jemalloc/internal/jemalloc_internal_macros.h @@ -37,6 +37,7 @@ #define ZU(z) ((size_t)z) #define QU(q) ((uint64_t)q) +#define QI(q) ((int64_t)q) #ifndef __DECONST # define __DECONST(type, var) ((type)(uintptr_t)(const void *)(var)) diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index 38a761bf..28ad37af 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -355,7 +355,7 @@ prof_sample_threshold_update(prof_tdata_t *prof_tdata) * Luc Devroye * Springer-Verlag, New York, 1986 * pp 500 - * (http://cg.scs.carleton.ca/~luc/rnbookindex.html) + * (http://luc.devroye.org/rnbookindex.html) */ prng64(r, 53, prof_tdata->prng_state, UINT64_C(6364136223846793005), UINT64_C(1442695040888963407)); diff --git a/test/include/test/SFMT.h b/test/include/test/SFMT.h index 3cb350d1..3dbf9422 100644 --- a/test/include/test/SFMT.h +++ b/test/include/test/SFMT.h @@ -66,32 +66,6 @@ #ifndef SFMT_H #define SFMT_H -#include -#include - -#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) - #include -#elif defined(_MSC_VER) || defined(__BORLANDC__) - typedef unsigned int uint32_t; - typedef unsigned __int64 uint64_t; - #define inline __inline -#else - #include - #if defined(__GNUC__) - #define inline __inline__ - #endif -#endif - -#ifndef PRIu64 - #if defined(_MSC_VER) || defined(__BORLANDC__) - #define PRIu64 "I64u" - #define PRIx64 "I64x" - #else - #define PRIu64 "llu" - #define PRIx64 "llx" - #endif -#endif - #if defined(__GNUC__) #define ALWAYSINLINE __attribute__((always_inline)) #else diff --git a/test/include/test/jemalloc_test.h.in b/test/include/test/jemalloc_test.h.in index 026866b0..9743cd52 100644 --- a/test/include/test/jemalloc_test.h.in +++ b/test/include/test/jemalloc_test.h.in @@ -1,6 +1,14 @@ #include #include #include +#include +#include + +#ifdef _WIN32 +# include +#else +# include +#endif /******************************************************************************/ /* @@ -37,6 +45,13 @@ #include "test/jemalloc_test_defs.h" +#if defined(HAVE_ALTIVEC) && !defined(__APPLE__) +# include +#endif +#ifdef HAVE_SSE2 +# include +#endif + /******************************************************************************/ /* * For unit tests, expose all public and private interfaces. @@ -60,7 +75,6 @@ # define JEMALLOC_N(n) @private_namespace@##n # include "jemalloc/internal/private_namespace.h" -# include # include # include # define JEMALLOC_H_TYPES @@ -109,6 +123,7 @@ /* * Common test utilities. */ +#include "test/math.h" #include "test/test.h" #include "test/thread.h" #define MEXP 19937 diff --git a/test/include/test/math.h b/test/include/test/math.h new file mode 100644 index 00000000..a862ed7d --- /dev/null +++ b/test/include/test/math.h @@ -0,0 +1,311 @@ +#ifndef JEMALLOC_ENABLE_INLINE +double ln_gamma(double x); +double i_gamma(double x, double p, double ln_gamma_p); +double pt_norm(double p); +double pt_chi2(double p, double df, double ln_gamma_df_2); +double pt_gamma(double p, double shape, double scale, double ln_gamma_shape); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(MATH_C_)) +/* + * Compute the natural log of Gamma(x), accurate to 10 decimal places. + * + * This implementation is based on: + * + * Pike, M.C., I.D. Hill (1966) Algorithm 291: Logarithm of Gamma function + * [S14]. Communications of the ACM 9(9):684. + */ +JEMALLOC_INLINE double +ln_gamma(double x) +{ + double f, z; + + assert(x > 0.0); + + if (x < 7.0) { + f = 1.0; + z = x; + while (z < 7.0) { + f *= z; + z += 1.0; + } + x = z; + f = -log(f); + } else + f = 0.0; + + z = 1.0 / (x * x); + + return (f + (x-0.5) * log(x) - x + 0.918938533204673 + + (((-0.000595238095238 * z + 0.000793650793651) * z - + 0.002777777777778) * z + 0.083333333333333) / x); +} + +/* + * Compute the incomplete Gamma ratio for [0..x], where p is the shape + * parameter, and ln_gamma_p is ln_gamma(p). + * + * This implementation is based on: + * + * Bhattacharjee, G.P. (1970) Algorithm AS 32: The incomplete Gamma integral. + * Applied Statistics 19:285-287. + */ +JEMALLOC_INLINE double +i_gamma(double x, double p, double ln_gamma_p) +{ + double acu, factor, oflo, gin, term, rn, a, b, an, dif; + double pn[6]; + unsigned i; + + assert(p > 0.0); + assert(x >= 0.0); + + if (x == 0.0) + return (0.0); + + acu = 1.0e-10; + oflo = 1.0e30; + gin = 0.0; + factor = exp(p * log(x) - x - ln_gamma_p); + + if (x <= 1.0 || x < p) { + /* Calculation by series expansion. */ + gin = 1.0; + term = 1.0; + rn = p; + + while (true) { + rn += 1.0; + term *= x / rn; + gin += term; + if (term <= acu) { + gin *= factor / p; + return (gin); + } + } + } else { + /* Calculation by continued fraction. */ + a = 1.0 - p; + b = a + x + 1.0; + term = 0.0; + pn[0] = 1.0; + pn[1] = x; + pn[2] = x + 1.0; + pn[3] = x * b; + gin = pn[2] / pn[3]; + + while (true) { + a += 1.0; + b += 2.0; + term += 1.0; + an = a * term; + for (i = 0; i < 2; i++) + pn[i+4] = b * pn[i+2] - an * pn[i]; + if (pn[5] != 0.0) { + rn = pn[4] / pn[5]; + dif = fabs(gin - rn); + if (dif <= acu && dif <= acu * rn) { + gin = 1.0 - factor * gin; + return (gin); + } + gin = rn; + } + for (i = 0; i < 4; i++) + pn[i] = pn[i+2]; + + if (fabs(pn[4]) >= oflo) { + for (i = 0; i < 4; i++) + pn[i] /= oflo; + } + } + } +} + +/* + * Given a value p in [0..1] of the lower tail area of the normal distribution, + * compute the limit on the definite integral from [-inf..z] that satisfies p, + * accurate to 16 decimal places. + * + * This implementation is based on: + * + * Wichura, M.J. (1988) Algorithm AS 241: The percentage points of the normal + * distribution. Applied Statistics 37(3):477-484. + */ +JEMALLOC_INLINE double +pt_norm(double p) +{ + double q, r, ret; + + assert(p > 0.0 && p < 1.0); + + q = p - 0.5; + if (fabs(q) <= 0.425) { + /* p close to 1/2. */ + r = 0.180625 - q * q; + return (q * (((((((2.5090809287301226727e3 * r + + 3.3430575583588128105e4) * r + 6.7265770927008700853e4) * r + + 4.5921953931549871457e4) * r + 1.3731693765509461125e4) * + r + 1.9715909503065514427e3) * r + 1.3314166789178437745e2) + * r + 3.3871328727963666080e0) / + (((((((5.2264952788528545610e3 * r + + 2.8729085735721942674e4) * r + 3.9307895800092710610e4) * r + + 2.1213794301586595867e4) * r + 5.3941960214247511077e3) * + r + 6.8718700749205790830e2) * r + 4.2313330701600911252e1) + * r + 1.0)); + } else { + if (q < 0.0) + r = p; + else + r = 1.0 - p; + assert(r > 0.0); + + r = sqrt(-log(r)); + if (r <= 5.0) { + /* p neither close to 1/2 nor 0 or 1. */ + r -= 1.6; + ret = ((((((((7.74545014278341407640e-4 * r + + 2.27238449892691845833e-2) * r + + 2.41780725177450611770e-1) * r + + 1.27045825245236838258e0) * r + + 3.64784832476320460504e0) * r + + 5.76949722146069140550e0) * r + + 4.63033784615654529590e0) * r + + 1.42343711074968357734e0) / + (((((((1.05075007164441684324e-9 * r + + 5.47593808499534494600e-4) * r + + 1.51986665636164571966e-2) + * r + 1.48103976427480074590e-1) * r + + 6.89767334985100004550e-1) * r + + 1.67638483018380384940e0) * r + + 2.05319162663775882187e0) * r + 1.0)); + } else { + /* p near 0 or 1. */ + r -= 5.0; + ret = ((((((((2.01033439929228813265e-7 * r + + 2.71155556874348757815e-5) * r + + 1.24266094738807843860e-3) * r + + 2.65321895265761230930e-2) * r + + 2.96560571828504891230e-1) * r + + 1.78482653991729133580e0) * r + + 5.46378491116411436990e0) * r + + 6.65790464350110377720e0) / + (((((((2.04426310338993978564e-15 * r + + 1.42151175831644588870e-7) * r + + 1.84631831751005468180e-5) * r + + 7.86869131145613259100e-4) * r + + 1.48753612908506148525e-2) * r + + 1.36929880922735805310e-1) * r + + 5.99832206555887937690e-1) + * r + 1.0)); + } + if (q < 0.0) + ret = -ret; + return (ret); + } +} + +/* + * Given a value p in [0..1] of the lower tail area of the Chi^2 distribution + * with df degrees of freedom, where ln_gamma_df_2 is ln_gamma(df/2.0), compute + * the upper limit on the definite integral from [0..z] that satisfies p, + * accurate to 12 decimal places. + * + * This implementation is based on: + * + * Best, D.J., D.E. Roberts (1975) Algorithm AS 91: The percentage points of + * the Chi^2 distribution. Applied Statistics 24(3):385-388. + * + * Shea, B.L. (1991) Algorithm AS R85: A remark on AS 91: The percentage + * points of the Chi^2 distribution. Applied Statistics 40(1):233-235. + */ +JEMALLOC_INLINE double +pt_chi2(double p, double df, double ln_gamma_df_2) +{ + double e, aa, xx, c, ch, a, q, p1, p2, t, x, b, s1, s2, s3, s4, s5, s6; + unsigned i; + + assert(p >= 0.0 && p < 1.0); + assert(df > 0.0); + + e = 5.0e-7; + aa = 0.6931471805; + + xx = 0.5 * df; + c = xx - 1.0; + + if (df < -1.24 * log(p)) { + /* Starting approximation for small Chi^2. */ + ch = pow(p * xx * exp(ln_gamma_df_2 + xx * aa), 1.0 / xx); + if (ch - e < 0.0) + return (ch); + } else { + if (df > 0.32) { + x = pt_norm(p); + /* + * Starting approximation using Wilson and Hilferty + * estimate. + */ + p1 = 0.222222 / df; + ch = df * pow(x * sqrt(p1) + 1.0 - p1, 3.0); + /* Starting approximation for p tending to 1. */ + if (ch > 2.2 * df + 6.0) { + ch = -2.0 * (log(1.0 - p) - c * log(0.5 * ch) + + ln_gamma_df_2); + } + } else { + ch = 0.4; + a = log(1.0 - p); + while (true) { + q = ch; + p1 = 1.0 + ch * (4.67 + ch); + p2 = ch * (6.73 + ch * (6.66 + ch)); + t = -0.5 + (4.67 + 2.0 * ch) / p1 - (6.73 + ch + * (13.32 + 3.0 * ch)) / p2; + ch -= (1.0 - exp(a + ln_gamma_df_2 + 0.5 * ch + + c * aa) * p2 / p1) / t; + if (fabs(q / ch - 1.0) - 0.01 <= 0.0) + break; + } + } + } + + for (i = 0; i < 20; i++) { + /* Calculation of seven-term Taylor series. */ + q = ch; + p1 = 0.5 * ch; + if (p1 < 0.0) + return (-1.0); + p2 = p - i_gamma(p1, xx, ln_gamma_df_2); + t = p2 * exp(xx * aa + ln_gamma_df_2 + p1 - c * log(ch)); + b = t / ch; + a = 0.5 * t - b * c; + s1 = (210.0 + a * (140.0 + a * (105.0 + a * (84.0 + a * (70.0 + + 60.0 * a))))) / 420.0; + s2 = (420.0 + a * (735.0 + a * (966.0 + a * (1141.0 + 1278.0 * + a)))) / 2520.0; + s3 = (210.0 + a * (462.0 + a * (707.0 + 932.0 * a))) / 2520.0; + s4 = (252.0 + a * (672.0 + 1182.0 * a) + c * (294.0 + a * + (889.0 + 1740.0 * a))) / 5040.0; + s5 = (84.0 + 264.0 * a + c * (175.0 + 606.0 * a)) / 2520.0; + s6 = (120.0 + c * (346.0 + 127.0 * c)) / 5040.0; + ch += t * (1.0 + 0.5 * t * s1 - b * c * (s1 - b * (s2 - b * (s3 + - b * (s4 - b * (s5 - b * s6)))))); + if (fabs(q / ch - 1.0) <= e) + break; + } + + return (ch); +} + +/* + * Given a value p in [0..1] and Gamma distribution shape and scale parameters, + * compute the upper limit on the definite integeral from [0..z] that satisfies + * p. + */ +JEMALLOC_INLINE double +pt_gamma(double p, double shape, double scale, double ln_gamma_shape) +{ + + return (pt_chi2(p, shape * 2.0, ln_gamma_shape) * 0.5 * scale); +} +#endif diff --git a/test/include/test/test.h b/test/include/test/test.h index 6f5e3436..d7601f8b 100644 --- a/test/include/test/test.h +++ b/test/include/test/test.h @@ -131,7 +131,7 @@ if (!(a_ == true)) { \ p_test_fail( \ "%s:%s:%d: Failed assertion: " \ - "(%s) == true --> %s != true: %s\n", \ + "(%s) == true --> %s != true: ", \ __func__, __FILE__, __LINE__, \ #a, a_ ? "true" : "false", fmt); \ } \ @@ -141,7 +141,7 @@ if (!(a_ == false)) { \ p_test_fail( \ "%s:%s:%d: Failed assertion: " \ - "(%s) == false --> %s != false: %s\n", \ + "(%s) == false --> %s != false: ", \ __func__, __FILE__, __LINE__, \ #a, a_ ? "true" : "false", fmt); \ } \ @@ -152,7 +152,7 @@ p_test_fail( \ "%s:%s:%d: Failed assertion: " \ "(%s) same as (%s) --> " \ - "\"%s\" differs from \"%s\": %s\n", \ + "\"%s\" differs from \"%s\": ", \ __func__, __FILE__, __LINE__, #a, #b, a, b, fmt); \ } \ } while (0) @@ -161,7 +161,7 @@ p_test_fail( \ "%s:%s:%d: Failed assertion: " \ "(%s) differs from (%s) --> " \ - "\"%s\" same as \"%s\": %s\n", \ + "\"%s\" same as \"%s\": ", \ __func__, __FILE__, __LINE__, #a, #b, a, b, fmt); \ } \ } while (0) diff --git a/test/include/test/thread.h b/test/include/test/thread.h index e3c0e270..3b02a011 100644 --- a/test/include/test/thread.h +++ b/test/include/test/thread.h @@ -1,10 +1,7 @@ - /* Abstraction layer for threading in tests */ #ifdef _WIN32 -#include typedef HANDLE je_thread_t; #else -#include typedef pthread_t je_thread_t; #endif diff --git a/test/src/SFMT.c b/test/src/SFMT.c index cd48c3a9..bfd763c0 100644 --- a/test/src/SFMT.c +++ b/test/src/SFMT.c @@ -65,9 +65,6 @@ 128-bit SIMD data type for Altivec, SSE2 or standard C ------------------------------------------------------*/ #if defined(HAVE_ALTIVEC) - #if !defined(__APPLE__) - #include - #endif /** 128-bit data structure */ union W128_T { vector unsigned int s; @@ -77,8 +74,6 @@ union W128_T { typedef union W128_T w128_t; #elif defined(HAVE_SSE2) - #include - /** 128-bit data structure */ union W128_T { __m128i si; diff --git a/test/src/math.c b/test/src/math.c new file mode 100644 index 00000000..887a3639 --- /dev/null +++ b/test/src/math.c @@ -0,0 +1,2 @@ +#define MATH_C_ +#include "test/jemalloc_test.h" diff --git a/test/unit/math.c b/test/unit/math.c new file mode 100644 index 00000000..a1b288ea --- /dev/null +++ b/test/unit/math.c @@ -0,0 +1,388 @@ +#include "test/jemalloc_test.h" + +#define MAX_REL_ERR 1.0e-9 +#define MAX_ABS_ERR 1.0e-9 + +static bool +double_eq_rel(double a, double b, double max_rel_err, double max_abs_err) +{ + double rel_err; + + if (fabs(a - b) < max_abs_err) + return (true); + rel_err = (fabs(b) > fabs(a)) ? fabs((a-b)/b) : fabs((a-b)/a); + return (rel_err < max_rel_err); +} + +static uint64_t +factorial(unsigned x) +{ + uint64_t ret = 1; + unsigned i; + + for (i = 2; i <= x; i++) + ret *= (uint64_t)i; + + return (ret); +} + +TEST_BEGIN(test_ln_gamma_factorial) +{ + unsigned x; + + /* exp(ln_gamma(x)) == (x-1)! for integer x. */ + for (x = 1; x <= 21; x++) { + assert_true(double_eq_rel(exp(ln_gamma(x)), + (double)factorial(x-1), MAX_REL_ERR, MAX_ABS_ERR), + "Incorrect factorial result for x=%u", x); + } +} +TEST_END + +/* Expected ln_gamma([0.0..100.0] increment=0.25). */ +static const double ln_gamma_misc_expected[] = { + INFINITY, + 1.28802252469807743, 0.57236494292470008, 0.20328095143129538, + 0.00000000000000000, -0.09827183642181320, -0.12078223763524518, + -0.08440112102048555, 0.00000000000000000, 0.12487171489239651, + 0.28468287047291918, 0.47521466691493719, 0.69314718055994529, + 0.93580193110872523, 1.20097360234707429, 1.48681557859341718, + 1.79175946922805496, 2.11445692745037128, 2.45373657084244234, + 2.80857141857573644, 3.17805383034794575, 3.56137591038669710, + 3.95781396761871651, 4.36671603662228680, 4.78749174278204581, + 5.21960398699022932, 5.66256205985714178, 6.11591589143154568, + 6.57925121201010121, 7.05218545073853953, 7.53436423675873268, + 8.02545839631598312, 8.52516136106541467, 9.03318691960512332, + 9.54926725730099690, 10.07315123968123949, 10.60460290274525086, + 11.14340011995171231, 11.68933342079726856, 12.24220494005076176, + 12.80182748008146909, 13.36802367147604720, 13.94062521940376342, + 14.51947222506051816, 15.10441257307551943, 15.69530137706046524, + 16.29200047656724237, 16.89437797963419285, 17.50230784587389010, + 18.11566950571089407, 18.73434751193644843, 19.35823122022435427, + 19.98721449566188468, 20.62119544270163018, 21.26007615624470048, + 21.90376249182879320, 22.55216385312342098, 23.20519299513386002, + 23.86276584168908954, 24.52480131594137802, 25.19122118273868338, + 25.86194990184851861, 26.53691449111561340, 27.21604439872720604, + 27.89927138384089389, 28.58652940490193828, 29.27775451504081516, + 29.97288476399884871, 30.67186010608067548, 31.37462231367769050, + 32.08111489594735843, 32.79128302226991565, 33.50507345013689076, + 34.22243445715505317, 34.94331577687681545, 35.66766853819134298, + 36.39544520803305261, 37.12659953718355865, 37.86108650896109395, + 38.59886229060776230, 39.33988418719949465, 40.08411059791735198, + 40.83150097453079752, 41.58201578195490100, 42.33561646075348506, + 43.09226539146988699, 43.85192586067515208, 44.61456202863158893, + 45.38013889847690052, 46.14862228684032885, 46.91997879580877395, + 47.69417578616628361, 48.47118135183522014, 49.25096429545256882, + 50.03349410501914463, 50.81874093156324790, 51.60667556776436982, + 52.39726942748592364, 53.19049452616926743, 53.98632346204390586, + 54.78472939811231157, 55.58568604486942633, 56.38916764371992940, + 57.19514895105859864, 58.00360522298051080, 58.81451220059079787, + 59.62784609588432261, 60.44358357816834371, 61.26170176100199427, + 62.08217818962842927, 62.90499082887649962, 63.73011805151035958, + 64.55753862700632340, 65.38723171073768015, 66.21917683354901385, + 67.05335389170279825, 67.88974313718154008, 68.72832516833013017, + 69.56908092082363737, 70.41199165894616385, 71.25703896716800045, + 72.10420474200799390, 72.95347118416940191, 73.80482079093779646, + 74.65823634883015814, 75.51370092648485866, 76.37119786778275454, + 77.23071078519033961, 78.09222355331530707, 78.95572030266725960, + 79.82118541361435859, 80.68860351052903468, 81.55795945611502873, + 82.42923834590904164, 83.30242550295004378, 84.17750647261028973, + 85.05446701758152983, 85.93329311301090456, 86.81397094178107920, + 87.69648688992882057, 88.58082754219766741, 89.46697967771913795, + 90.35493026581838194, 91.24466646193963015, 92.13617560368709292, + 93.02944520697742803, 93.92446296229978486, 94.82121673107967297, + 95.71969454214321615, 96.61988458827809723, 97.52177522288820910, + 98.42535495673848800, 99.33061245478741341, 100.23753653310367895, + 101.14611615586458981, 102.05634043243354370, 102.96819861451382394, + 103.88168009337621811, 104.79677439715833032, 105.71347118823287303, + 106.63176026064346047, 107.55163153760463501, 108.47307506906540198, + 109.39608102933323153, 110.32063971475740516, 111.24674154146920557, + 112.17437704317786995, 113.10353686902013237, 114.03421178146170689, + 114.96639265424990128, 115.90007047041454769, 116.83523632031698014, + 117.77188139974506953, 118.70999700805310795, 119.64957454634490830, + 120.59060551569974962, 121.53308151543865279, 122.47699424143097247, + 123.42233548443955726, 124.36909712850338394, 125.31727114935689826, + 126.26684961288492559, 127.21782467361175861, 128.17018857322420899, + 129.12393363912724453, 130.07905228303084755, 131.03553699956862033, + 131.99338036494577864, 132.95257503561629164, 133.91311374698926784, + 134.87498931216194364, 135.83819462068046846, 136.80272263732638294, + 137.76856640092901785, 138.73571902320256299, 139.70417368760718091, + 140.67392364823425055, 141.64496222871400732, 142.61728282114600574, + 143.59087888505104047, 144.56574394634486680, 145.54187159633210058, + 146.51925549072063859, 147.49788934865566148, 148.47776695177302031, + 149.45888214327129617, 150.44122882700193600, 151.42480096657754984, + 152.40959258449737490, 153.39559776128982094, 154.38281063467164245, + 155.37122539872302696, 156.36083630307879844, 157.35163765213474107, + 158.34362380426921391, 159.33678917107920370, 160.33112821663092973, + 161.32663545672428995, 162.32330545817117695, 163.32113283808695314, + 164.32011226319519892, 165.32023844914485267, 166.32150615984036790, + 167.32391020678358018, 168.32744544842768164, 169.33210678954270634, + 170.33788918059275375, 171.34478761712384198, 172.35279713916281707, + 173.36191283062726143, 174.37212981874515094, 175.38344327348534080, + 176.39584840699734514, 177.40934047306160437, 178.42391476654847793, + 179.43956662288721304, 180.45629141754378111, 181.47408456550741107, + 182.49294152078630304, 183.51285777591152737, 184.53382886144947861, + 185.55585034552262869, 186.57891783333786861, 187.60302696672312095, + 188.62817342367162610, 189.65435291789341932, 190.68156119837468054, + 191.70979404894376330, 192.73904728784492590, 193.76931676731820176, + 194.80059837318714244, 195.83288802445184729, 196.86618167288995096, + 197.90047530266301123, 198.93576492992946214, 199.97204660246373464, + 201.00931639928148797, 202.04757043027063901, 203.08680483582807597, + 204.12701578650228385, 205.16819948264117102, 206.21035215404597807, + 207.25347005962987623, 208.29754948708190909, 209.34258675253678916, + 210.38857820024875878, 211.43552020227099320, 212.48340915813977858, + 213.53224149456323744, 214.58201366511514152, 215.63272214993284592, + 216.68436345542014010, 217.73693411395422004, 218.79043068359703739, + 219.84484974781133815, 220.90018791517996988, 221.95644181913033322, + 223.01360811766215875, 224.07168349307951871, 225.13066465172661879, + 226.19054832372759734, 227.25133126272962159, 228.31301024565024704, + 229.37558207242807384, 230.43904356577689896, 231.50339157094342113, + 232.56862295546847008, 233.63473460895144740, 234.70172344281823484, + 235.76958639009222907, 236.83832040516844586, 237.90792246359117712, + 238.97838956183431947, 240.04971871708477238, 241.12190696702904802, + 242.19495136964280846, 243.26884900298270509, 244.34359696498191283, + 245.41919237324782443, 246.49563236486270057, 247.57291409618682110, + 248.65103474266476269, 249.72999149863338175, 250.80978157713354904, + 251.89040220972316320, 252.97185064629374551, 254.05412415488834199, + 255.13722002152300661, 256.22113555000953511, 257.30586806178126835, + 258.39141489572085675, 259.47777340799029844, 260.56494097186322279, + 261.65291497755913497, 262.74169283208021852, 263.83127195904967266, + 264.92164979855277807, 266.01282380697938379, 267.10479145686849733, + 268.19755023675537586, 269.29109765101975427, 270.38543121973674488, + 271.48054847852881721, 272.57644697842033565, 273.67312428569374561, + 274.77057798174683967, 275.86880566295326389, 276.96780494052313770, + 278.06757344036617496, 279.16810880295668085, 280.26940868320008349, + 281.37147075030043197, 282.47429268763045229, 283.57787219260217171, + 284.68220697654078322, 285.78729476455760050, 286.89313329542699194, + 287.99972032146268930, 289.10705360839756395, 290.21513093526289140, + 291.32395009427028754, 292.43350889069523646, 293.54380514276073200, + 294.65483668152336350, 295.76660135076059532, 296.87909700685889902, + 297.99232151870342022, 299.10627276756946458, 300.22094864701409733, + 301.33634706277030091, 302.45246593264130297, 303.56930318639643929, + 304.68685676566872189, 305.80512462385280514, 306.92410472600477078, + 308.04379504874236773, 309.16419358014690033, 310.28529831966631036, + 311.40710727801865687, 312.52961847709792664, 313.65282994987899201, + 314.77673974032603610, 315.90134590329950015, 317.02664650446632777, + 318.15263962020929966, 319.27932333753892635, 320.40669575400545455, + 321.53475497761127144, 322.66349912672620803, 323.79292633000159185, + 324.92303472628691452, 326.05382246454587403, 327.18528770377525916, + 328.31742861292224234, 329.45024337080525356, 330.58373016603343331, + 331.71788719692847280, 332.85271267144611329, 333.98820480709991898, + 335.12436183088397001, 336.26118197919845443, 337.39866349777429377, + 338.53680464159958774, 339.67560367484657036, 340.81505887079896411, + 341.95516851178109619, 343.09593088908627578, 344.23734430290727460, + 345.37940706226686416, 346.52211748494903532, 347.66547389743118401, + 348.80947463481720661, 349.95411804077025408, 351.09940246744753267, + 352.24532627543504759, 353.39188783368263103, 354.53908551944078908, + 355.68691771819692349, 356.83538282361303118, 357.98447923746385868, + 359.13420536957539753 +}; + +TEST_BEGIN(test_ln_gamma_misc) +{ + unsigned i; + + for (i = 1; i < sizeof(ln_gamma_misc_expected)/sizeof(double); i++) { + double x = (double)i * 0.25; + assert_true(double_eq_rel(ln_gamma(x), + ln_gamma_misc_expected[i], MAX_REL_ERR, MAX_ABS_ERR), + "Incorrect ln_gamma result for i=%u", i); + } +} +TEST_END + +/* Expected pt_norm([0.01..0.99] increment=0.01). */ +static const double pt_norm_expected[] = { + -INFINITY, + -2.32634787404084076, -2.05374891063182252, -1.88079360815125085, + -1.75068607125216946, -1.64485362695147264, -1.55477359459685305, + -1.47579102817917063, -1.40507156030963221, -1.34075503369021654, + -1.28155156554460081, -1.22652812003661049, -1.17498679206608991, + -1.12639112903880045, -1.08031934081495606, -1.03643338949378938, + -0.99445788320975281, -0.95416525314619416, -0.91536508784281390, + -0.87789629505122846, -0.84162123357291418, -0.80642124701824025, + -0.77219321418868492, -0.73884684918521371, -0.70630256284008752, + -0.67448975019608171, -0.64334540539291685, -0.61281299101662701, + -0.58284150727121620, -0.55338471955567281, -0.52440051270804067, + -0.49585034734745320, -0.46769879911450812, -0.43991316567323380, + -0.41246312944140462, -0.38532046640756751, -0.35845879325119373, + -0.33185334643681652, -0.30548078809939738, -0.27931903444745404, + -0.25334710313579978, -0.22754497664114931, -0.20189347914185077, + -0.17637416478086135, -0.15096921549677725, -0.12566134685507399, + -0.10043372051146975, -0.07526986209982976, -0.05015358346473352, + -0.02506890825871106, 0.00000000000000000, 0.02506890825871106, + 0.05015358346473366, 0.07526986209982990, 0.10043372051146990, + 0.12566134685507413, 0.15096921549677739, 0.17637416478086146, + 0.20189347914185105, 0.22754497664114931, 0.25334710313579978, + 0.27931903444745404, 0.30548078809939738, 0.33185334643681652, + 0.35845879325119373, 0.38532046640756762, 0.41246312944140484, + 0.43991316567323391, 0.46769879911450835, 0.49585034734745348, + 0.52440051270804111, 0.55338471955567303, 0.58284150727121620, + 0.61281299101662701, 0.64334540539291685, 0.67448975019608171, + 0.70630256284008752, 0.73884684918521371, 0.77219321418868492, + 0.80642124701824036, 0.84162123357291441, 0.87789629505122879, + 0.91536508784281423, 0.95416525314619460, 0.99445788320975348, + 1.03643338949378938, 1.08031934081495606, 1.12639112903880045, + 1.17498679206608991, 1.22652812003661049, 1.28155156554460081, + 1.34075503369021654, 1.40507156030963265, 1.47579102817917085, + 1.55477359459685394, 1.64485362695147308, 1.75068607125217102, + 1.88079360815125041, 2.05374891063182208, 2.32634787404084076 +}; + +TEST_BEGIN(test_pt_norm) +{ + unsigned i; + + for (i = 1; i < sizeof(pt_norm_expected)/sizeof(double); i++) { + double p = (double)i * 0.01; + assert_true(double_eq_rel(pt_norm(p), pt_norm_expected[i], + MAX_REL_ERR, MAX_ABS_ERR), + "Incorrect pt_norm result for i=%u", i); + } +} +TEST_END + +/* + * Expected pt_chi2(p=[0.01..0.99] increment=0.07, + * df={0.1, 1.1, 10.1, 100.1, 1000.1}). + */ +static const double pt_chi2_df[] = {0.1, 1.1, 10.1, 100.1, 1000.1}; +static const double pt_chi2_expected[] = { + 1.168926411457320e-40, 1.347680397072034e-22, 3.886980416666260e-17, + 8.245951724356564e-14, 2.068936347497604e-11, 1.562561743309233e-09, + 5.459543043426564e-08, 1.114775688149252e-06, 1.532101202364371e-05, + 1.553884683726585e-04, 1.239396954915939e-03, 8.153872320255721e-03, + 4.631183739647523e-02, 2.473187311701327e-01, 2.175254800183617e+00, + + 0.0003729887888876379, 0.0164409238228929513, 0.0521523015190650113, + 0.1064701372271216612, 0.1800913735793082115, 0.2748704281195626931, + 0.3939246282787986497, 0.5420727552260817816, 0.7267265822221973259, + 0.9596554296000253670, 1.2607440376386165326, 1.6671185084541604304, + 2.2604828984738705167, 3.2868613342148607082, 6.9298574921692139839, + + 2.606673548632508, 4.602913725294877, 5.646152813924212, + 6.488971315540869, 7.249823275816285, 7.977314231410841, + 8.700354939944047, 9.441728024225892, 10.224338321374127, + 11.076435368801061, 12.039320937038386, 13.183878752697167, + 14.657791935084575, 16.885728216339373, 23.361991680031817, + + 70.14844087392152, 80.92379498849355, 85.53325420085891, + 88.94433120715347, 91.83732712857017, 94.46719943606301, + 96.96896479994635, 99.43412843510363, 101.94074719829733, + 104.57228644307247, 107.43900093448734, 110.71844673417287, + 114.76616819871325, 120.57422505959563, 135.92318818757556, + + 899.0072447849649, 937.9271278858220, 953.8117189560207, + 965.3079371501154, 974.8974061207954, 983.4936235182347, + 991.5691170518946, 999.4334123954690, 1007.3391826856553, + 1015.5445154999951, 1024.3777075619569, 1034.3538789836223, + 1046.4872561869577, 1063.5717461999654, 1107.0741966053859 +}; + +TEST_BEGIN(test_pt_chi2) +{ + unsigned i, j; + unsigned e = 0; + + for (i = 0; i < sizeof(pt_chi2_df)/sizeof(double); i++) { + double df = pt_chi2_df[i]; + double ln_gamma_df = ln_gamma(df * 0.5); + for (j = 1; j < 100; j += 7) { + double p = (double)j * 0.01; + assert_true(double_eq_rel(pt_chi2(p, df, ln_gamma_df), + pt_chi2_expected[e], MAX_REL_ERR, MAX_ABS_ERR), + "Incorrect pt_chi2 result for i=%u, j=%u", i, j); + e++; + } + } +} +TEST_END + +/* + * Expected pt_gamma(p=[0.1..0.99] increment=0.07, + * shape=[0.5..3.0] increment=0.5). + */ +static const double pt_gamma_shape[] = {0.5, 1.0, 1.5, 2.0, 2.5, 3.0}; +static const double pt_gamma_expected[] = { + 7.854392895485103e-05, 5.043466107888016e-03, 1.788288957794883e-02, + 3.900956150232906e-02, 6.913847560638034e-02, 1.093710833465766e-01, + 1.613412523825817e-01, 2.274682115597864e-01, 3.114117323127083e-01, + 4.189466220207417e-01, 5.598106789059246e-01, 7.521856146202706e-01, + 1.036125427911119e+00, 1.532450860038180e+00, 3.317448300510606e+00, + + 0.01005033585350144, 0.08338160893905107, 0.16251892949777497, + 0.24846135929849966, 0.34249030894677596, 0.44628710262841947, + 0.56211891815354142, 0.69314718055994529, 0.84397007029452920, + 1.02165124753198167, 1.23787435600161766, 1.51412773262977574, + 1.89711998488588196, 2.52572864430825783, 4.60517018598809091, + + 0.05741590094955853, 0.24747378084860744, 0.39888572212236084, + 0.54394139997444901, 0.69048812513915159, 0.84311389861296104, + 1.00580622221479898, 1.18298694218766931, 1.38038096305861213, + 1.60627736383027453, 1.87396970522337947, 2.20749220408081070, + 2.65852391865854942, 3.37934630984842244, 5.67243336507218476, + + 0.1485547402532659, 0.4657458011640391, 0.6832386130709406, + 0.8794297834672100, 1.0700752852474524, 1.2629614217350744, + 1.4638400448580779, 1.6783469900166610, 1.9132338090606940, + 2.1778589228618777, 2.4868823970010991, 2.8664695666264195, + 3.3724415436062114, 4.1682658512758071, 6.6383520679938108, + + 0.2771490383641385, 0.7195001279643727, 0.9969081732265243, + 1.2383497880608061, 1.4675206597269927, 1.6953064251816552, + 1.9291243435606809, 2.1757300955477641, 2.4428032131216391, + 2.7406534569230616, 3.0851445039665513, 3.5043101122033367, + 4.0575997065264637, 4.9182956424675286, 7.5431362346944937, + + 0.4360451650782932, 0.9983600902486267, 1.3306365880734528, + 1.6129750834753802, 1.8767241606994294, 2.1357032436097660, + 2.3988853336865565, 2.6740603137235603, 2.9697561737517959, + 3.2971457713883265, 3.6731795898504660, 4.1275751617770631, + 4.7230515633946677, 5.6417477865306020, 8.4059469148854635 +}; + +TEST_BEGIN(test_pt_gamma_shape) +{ + unsigned i, j; + unsigned e = 0; + + for (i = 0; i < sizeof(pt_gamma_shape)/sizeof(double); i++) { + double shape = pt_gamma_shape[i]; + double ln_gamma_shape = ln_gamma(shape); + for (j = 1; j < 100; j += 7) { + double p = (double)j * 0.01; + assert_true(double_eq_rel(pt_gamma(p, shape, 1.0, + ln_gamma_shape), pt_gamma_expected[e], MAX_REL_ERR, + MAX_ABS_ERR), + "Incorrect pt_gamma result for i=%u, j=%u", i, j); + e++; + } + } +} +TEST_END + +TEST_BEGIN(test_pt_gamma_scale) +{ + double shape = 1.0; + double ln_gamma_shape = ln_gamma(shape); + + assert_true(double_eq_rel( + pt_gamma(0.5, shape, 1.0, ln_gamma_shape) * 10.0, + pt_gamma(0.5, shape, 10.0, ln_gamma_shape), MAX_REL_ERR, + MAX_ABS_ERR), + "Scale should be trivially equivalent to external multiplication"); +} +TEST_END + +int +main(void) +{ + + return (test( + test_ln_gamma_factorial, + test_ln_gamma_misc, + test_pt_norm, + test_pt_chi2, + test_pt_gamma_shape, + test_pt_gamma_scale)); +} From 736923254409aed1a4a226e0ba7429f573c1f372 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 10 Dec 2013 13:51:52 -0800 Subject: [PATCH 0340/3378] Silence some unused variable warnings. --- src/jemalloc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/jemalloc.c b/src/jemalloc.c index 57a5e359..f13a7d8c 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1106,7 +1106,7 @@ je_realloc(void *ptr, size_t size) void *ret; size_t usize JEMALLOC_CC_SILENCE_INIT(0); size_t old_size = 0; - size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0); + UNUSED size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0); prof_thr_cnt_t *cnt JEMALLOC_CC_SILENCE_INIT(NULL); prof_ctx_t *old_ctx JEMALLOC_CC_SILENCE_INIT(NULL); @@ -1256,7 +1256,7 @@ je_free(void *ptr) UTRACE(ptr, 0, 0); if (ptr != NULL) { size_t usize; - size_t rzsize JEMALLOC_CC_SILENCE_INIT(0); + UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0); assert(malloc_initialized || IS_INITIALIZER); @@ -1504,7 +1504,7 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) void *p, *q; size_t usize; size_t old_size; - size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0); + UNUSED size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0); size_t alignment = (ZU(1) << (flags & ALLOCM_LG_ALIGN_MASK) & (SIZE_T_MAX-1)); bool zero = flags & ALLOCM_ZERO; @@ -1651,7 +1651,7 @@ int je_dallocm(void *ptr, int flags) { size_t usize; - size_t rzsize JEMALLOC_CC_SILENCE_INIT(0); + UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0); unsigned arena_ind = ((unsigned)(flags >> 8)) - 1; bool try_tcache; From 6edc97db15311fdac189798ec24e3eb39dc75d8e Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 10 Dec 2013 14:23:10 -0800 Subject: [PATCH 0341/3378] Fix inline-related macro issues. Add JEMALLOC_INLINE_C and use it instead of JEMALLOC_INLINE in .c files, so that the annotated functions are always static. Remove SFMT's inline-related macros and use jemalloc's instead, so that there's no danger of interactions with jemalloc's definitions that disable inlining for debug builds. --- .../internal/jemalloc_internal_macros.h | 18 +++++---- src/ckh.c | 12 +++--- test/include/test/SFMT-alti.h | 15 +++----- test/include/test/SFMT-sse2.h | 9 ++--- test/include/test/SFMT.h | 16 -------- test/src/SFMT.c | 38 +++++++++---------- 6 files changed, 43 insertions(+), 65 deletions(-) diff --git a/include/jemalloc/internal/jemalloc_internal_macros.h b/include/jemalloc/internal/jemalloc_internal_macros.h index 82f827da..70602ee8 100644 --- a/include/jemalloc/internal/jemalloc_internal_macros.h +++ b/include/jemalloc/internal/jemalloc_internal_macros.h @@ -1,16 +1,18 @@ /* - * JEMALLOC_ALWAYS_INLINE is used within header files for functions that are - * static inline functions if inlining is enabled, and single-definition - * library-private functions if inlining is disabled. + * JEMALLOC_ALWAYS_INLINE and JEMALLOC_INLINE are used within header files for + * functions that are static inline functions if inlining is enabled, and + * single-definition library-private functions if inlining is disabled. * - * JEMALLOC_ALWAYS_INLINE_C is for use in .c files, in which case the denoted - * functions are always static, regardless of whether inlining is enabled. + * JEMALLOC_ALWAYS_INLINE_C and JEMALLOC_INLINE_C are for use in .c files, in + * which case the denoted functions are always static, regardless of whether + * inlining is enabled. */ #if defined(JEMALLOC_DEBUG) || defined(JEMALLOC_CODE_COVERAGE) /* Disable inlining to make debugging/profiling easier. */ # define JEMALLOC_ALWAYS_INLINE # define JEMALLOC_ALWAYS_INLINE_C static # define JEMALLOC_INLINE +# define JEMALLOC_INLINE_C static # define inline #else # define JEMALLOC_ENABLE_INLINE @@ -24,15 +26,16 @@ # define JEMALLOC_ALWAYS_INLINE_C static inline # endif # define JEMALLOC_INLINE static inline +# define JEMALLOC_INLINE_C static inline # ifdef _MSC_VER # define inline _inline # endif #endif #ifdef JEMALLOC_CC_SILENCE -#define UNUSED JEMALLOC_ATTR(unused) +# define UNUSED JEMALLOC_ATTR(unused) #else -#define UNUSED +# define UNUSED #endif #define ZU(z) ((size_t)z) @@ -42,4 +45,3 @@ #ifndef __DECONST # define __DECONST(type, var) ((type)(uintptr_t)(const void *)(var)) #endif - diff --git a/src/ckh.c b/src/ckh.c index 2f38348b..04c52966 100644 --- a/src/ckh.c +++ b/src/ckh.c @@ -49,7 +49,7 @@ static void ckh_shrink(ckh_t *ckh); * Search bucket for key and return the cell number if found; SIZE_T_MAX * otherwise. */ -JEMALLOC_INLINE size_t +JEMALLOC_INLINE_C size_t ckh_bucket_search(ckh_t *ckh, size_t bucket, const void *key) { ckhc_t *cell; @@ -67,7 +67,7 @@ ckh_bucket_search(ckh_t *ckh, size_t bucket, const void *key) /* * Search table for key and return cell number if found; SIZE_T_MAX otherwise. */ -JEMALLOC_INLINE size_t +JEMALLOC_INLINE_C size_t ckh_isearch(ckh_t *ckh, const void *key) { size_t hashes[2], bucket, cell; @@ -88,7 +88,7 @@ ckh_isearch(ckh_t *ckh, const void *key) return (cell); } -JEMALLOC_INLINE bool +JEMALLOC_INLINE_C bool ckh_try_bucket_insert(ckh_t *ckh, size_t bucket, const void *key, const void *data) { @@ -120,7 +120,7 @@ ckh_try_bucket_insert(ckh_t *ckh, size_t bucket, const void *key, * eviction/relocation procedure until either success or detection of an * eviction/relocation bucket cycle. */ -JEMALLOC_INLINE bool +JEMALLOC_INLINE_C bool ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey, void const **argdata) { @@ -190,7 +190,7 @@ ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey, } } -JEMALLOC_INLINE bool +JEMALLOC_INLINE_C bool ckh_try_insert(ckh_t *ckh, void const**argkey, void const**argdata) { size_t hashes[2], bucket; @@ -219,7 +219,7 @@ ckh_try_insert(ckh_t *ckh, void const**argkey, void const**argdata) * Try to rebuild the hash table from scratch by inserting all items from the * old table into the new. */ -JEMALLOC_INLINE bool +JEMALLOC_INLINE_C bool ckh_rebuild(ckh_t *ckh, ckhc_t *aTab) { size_t count, i, nins; diff --git a/test/include/test/SFMT-alti.h b/test/include/test/SFMT-alti.h index 3942bbcf..2f86f67d 100644 --- a/test/include/test/SFMT-alti.h +++ b/test/include/test/SFMT-alti.h @@ -52,12 +52,6 @@ #ifndef SFMT_ALTI_H #define SFMT_ALTI_H -inline static vector unsigned int vec_recursion(vector unsigned int a, - vector unsigned int b, - vector unsigned int c, - vector unsigned int d) - ALWAYSINLINE; - /** * This function represents the recursion formula in AltiVec and BIG ENDIAN. * @param a a 128-bit part of the interal state array @@ -66,7 +60,8 @@ inline static vector unsigned int vec_recursion(vector unsigned int a, * @param d a 128-bit part of the interal state array * @return output */ -inline static vector unsigned int vec_recursion(vector unsigned int a, +JEMALLOC_ALWAYS_INLINE +static vector unsigned int vec_recursion(vector unsigned int a, vector unsigned int b, vector unsigned int c, vector unsigned int d) { @@ -100,7 +95,7 @@ inline static vector unsigned int vec_recursion(vector unsigned int a, * This function fills the internal state array with pseudorandom * integers. */ -inline static void gen_rand_all(sfmt_t *ctx) { +JEMALLOC_INLINE void gen_rand_all(sfmt_t *ctx) { int i; vector unsigned int r, r1, r2; @@ -127,7 +122,7 @@ inline static void gen_rand_all(sfmt_t *ctx) { * @param array an 128-bit array to be filled by pseudorandom numbers. * @param size number of 128-bit pesudorandom numbers to be generated. */ -inline static void gen_rand_array(sfmt_t *ctx, w128_t *array, int size) { +JEMALLOC_INLINE void gen_rand_array(sfmt_t *ctx, w128_t *array, int size) { int i, j; vector unsigned int r, r1, r2; @@ -178,7 +173,7 @@ inline static void gen_rand_array(sfmt_t *ctx, w128_t *array, int size) { * @param array an 128-bit array to be swaped. * @param size size of 128-bit array. */ -inline static void swap(w128_t *array, int size) { +JEMALLOC_INLINE void swap(w128_t *array, int size) { int i; const vector unsigned char perm = ALTI_SWAP; diff --git a/test/include/test/SFMT-sse2.h b/test/include/test/SFMT-sse2.h index 19131806..0314a163 100644 --- a/test/include/test/SFMT-sse2.h +++ b/test/include/test/SFMT-sse2.h @@ -51,9 +51,6 @@ #ifndef SFMT_SSE2_H #define SFMT_SSE2_H -PRE_ALWAYS static __m128i mm_recursion(__m128i *a, __m128i *b, __m128i c, - __m128i d, __m128i mask) ALWAYSINLINE; - /** * This function represents the recursion formula. * @param a a 128-bit part of the interal state array @@ -63,7 +60,7 @@ PRE_ALWAYS static __m128i mm_recursion(__m128i *a, __m128i *b, __m128i c, * @param mask 128-bit mask * @return output */ -PRE_ALWAYS static __m128i mm_recursion(__m128i *a, __m128i *b, +JEMALLOC_ALWAYS_INLINE __m128i mm_recursion(__m128i *a, __m128i *b, __m128i c, __m128i d, __m128i mask) { __m128i v, x, y, z; @@ -84,7 +81,7 @@ PRE_ALWAYS static __m128i mm_recursion(__m128i *a, __m128i *b, * This function fills the internal state array with pseudorandom * integers. */ -inline static void gen_rand_all(sfmt_t *ctx) { +JEMALLOC_INLINE void gen_rand_all(sfmt_t *ctx) { int i; __m128i r, r1, r2, mask; mask = _mm_set_epi32(MSK4, MSK3, MSK2, MSK1); @@ -114,7 +111,7 @@ inline static void gen_rand_all(sfmt_t *ctx) { * @param array an 128-bit array to be filled by pseudorandom numbers. * @param size number of 128-bit pesudorandom numbers to be generated. */ -inline static void gen_rand_array(sfmt_t *ctx, w128_t *array, int size) { +JEMALLOC_INLINE void gen_rand_array(sfmt_t *ctx, w128_t *array, int size) { int i, j; __m128i r, r1, r2, mask; mask = _mm_set_epi32(MSK4, MSK3, MSK2, MSK1); diff --git a/test/include/test/SFMT.h b/test/include/test/SFMT.h index 3dbf9422..09c1607d 100644 --- a/test/include/test/SFMT.h +++ b/test/include/test/SFMT.h @@ -66,22 +66,6 @@ #ifndef SFMT_H #define SFMT_H -#if defined(__GNUC__) -#define ALWAYSINLINE __attribute__((always_inline)) -#else -#define ALWAYSINLINE -#endif - -#if defined(_MSC_VER) - #if _MSC_VER >= 1200 - #define PRE_ALWAYS __forceinline - #else - #define PRE_ALWAYS inline - #endif -#else - #define PRE_ALWAYS inline -#endif - typedef struct sfmt_s sfmt_t; uint32_t gen_rand32(sfmt_t *ctx); diff --git a/test/src/SFMT.c b/test/src/SFMT.c index bfd763c0..9fade282 100644 --- a/test/src/SFMT.c +++ b/test/src/SFMT.c @@ -114,18 +114,18 @@ static uint32_t parity[4] = {PARITY1, PARITY2, PARITY3, PARITY4}; /*---------------- STATIC FUNCTIONS ----------------*/ -inline static int idxof(int i); +JEMALLOC_INLINE_C int idxof(int i); #if (!defined(HAVE_ALTIVEC)) && (!defined(HAVE_SSE2)) -inline static void rshift128(w128_t *out, w128_t const *in, int shift); -inline static void lshift128(w128_t *out, w128_t const *in, int shift); +JEMALLOC_INLINE_C void rshift128(w128_t *out, w128_t const *in, int shift); +JEMALLOC_INLINE_C void lshift128(w128_t *out, w128_t const *in, int shift); #endif -inline static void gen_rand_all(sfmt_t *ctx); -inline static void gen_rand_array(sfmt_t *ctx, w128_t *array, int size); -inline static uint32_t func1(uint32_t x); -inline static uint32_t func2(uint32_t x); +JEMALLOC_INLINE_C void gen_rand_all(sfmt_t *ctx); +JEMALLOC_INLINE_C void gen_rand_array(sfmt_t *ctx, w128_t *array, int size); +JEMALLOC_INLINE_C uint32_t func1(uint32_t x); +JEMALLOC_INLINE_C uint32_t func2(uint32_t x); static void period_certification(sfmt_t *ctx); #if defined(BIG_ENDIAN64) && !defined(ONLY64) -inline static void swap(w128_t *array, int size); +JEMALLOC_INLINE_C void swap(w128_t *array, int size); #endif #if defined(HAVE_ALTIVEC) @@ -139,11 +139,11 @@ inline static void swap(w128_t *array, int size); * in BIG ENDIAN machine. */ #ifdef ONLY64 -inline static int idxof(int i) { +JEMALLOC_INLINE_C int idxof(int i) { return i ^ 1; } #else -inline static int idxof(int i) { +JEMALLOC_INLINE_C int idxof(int i) { return i; } #endif @@ -157,7 +157,7 @@ inline static int idxof(int i) { */ #if (!defined(HAVE_ALTIVEC)) && (!defined(HAVE_SSE2)) #ifdef ONLY64 -inline static void rshift128(w128_t *out, w128_t const *in, int shift) { +JEMALLOC_INLINE_C void rshift128(w128_t *out, w128_t const *in, int shift) { uint64_t th, tl, oh, ol; th = ((uint64_t)in->u[2] << 32) | ((uint64_t)in->u[3]); @@ -172,7 +172,7 @@ inline static void rshift128(w128_t *out, w128_t const *in, int shift) { out->u[3] = (uint32_t)oh; } #else -inline static void rshift128(w128_t *out, w128_t const *in, int shift) { +JEMALLOC_INLINE_C void rshift128(w128_t *out, w128_t const *in, int shift) { uint64_t th, tl, oh, ol; th = ((uint64_t)in->u[3] << 32) | ((uint64_t)in->u[2]); @@ -196,7 +196,7 @@ inline static void rshift128(w128_t *out, w128_t const *in, int shift) { * @param shift the shift value */ #ifdef ONLY64 -inline static void lshift128(w128_t *out, w128_t const *in, int shift) { +JEMALLOC_INLINE_C void lshift128(w128_t *out, w128_t const *in, int shift) { uint64_t th, tl, oh, ol; th = ((uint64_t)in->u[2] << 32) | ((uint64_t)in->u[3]); @@ -211,7 +211,7 @@ inline static void lshift128(w128_t *out, w128_t const *in, int shift) { out->u[3] = (uint32_t)oh; } #else -inline static void lshift128(w128_t *out, w128_t const *in, int shift) { +JEMALLOC_INLINE_C void lshift128(w128_t *out, w128_t const *in, int shift) { uint64_t th, tl, oh, ol; th = ((uint64_t)in->u[3] << 32) | ((uint64_t)in->u[2]); @@ -238,7 +238,7 @@ inline static void lshift128(w128_t *out, w128_t const *in, int shift) { */ #if (!defined(HAVE_ALTIVEC)) && (!defined(HAVE_SSE2)) #ifdef ONLY64 -inline static void do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *c, +JEMALLOC_INLINE_C void do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *c, w128_t *d) { w128_t x; w128_t y; @@ -255,7 +255,7 @@ inline static void do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *c, ^ (d->u[3] << SL1); } #else -inline static void do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *c, +JEMALLOC_INLINE_C void do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *c, w128_t *d) { w128_t x; w128_t y; @@ -279,7 +279,7 @@ inline static void do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *c, * This function fills the internal state array with pseudorandom * integers. */ -inline static void gen_rand_all(sfmt_t *ctx) { +JEMALLOC_INLINE_C void gen_rand_all(sfmt_t *ctx) { int i; w128_t *r1, *r2; @@ -306,7 +306,7 @@ inline static void gen_rand_all(sfmt_t *ctx) { * @param array an 128-bit array to be filled by pseudorandom numbers. * @param size number of 128-bit pseudorandom numbers to be generated. */ -inline static void gen_rand_array(sfmt_t *ctx, w128_t *array, int size) { +JEMALLOC_INLINE_C void gen_rand_array(sfmt_t *ctx, w128_t *array, int size) { int i, j; w128_t *r1, *r2; @@ -340,7 +340,7 @@ inline static void gen_rand_array(sfmt_t *ctx, w128_t *array, int size) { #endif #if defined(BIG_ENDIAN64) && !defined(ONLY64) && !defined(HAVE_ALTIVEC) -inline static void swap(w128_t *array, int size) { +JEMALLOC_INLINE_C void swap(w128_t *array, int size) { int i; uint32_t x, y; From 19609724f9dce1ac644b6cbf89acb740319eb498 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 10 Dec 2013 15:05:24 -0800 Subject: [PATCH 0342/3378] Clean up SFMT test. Refactor array declarations to remove some dubious casts. Reduce array size to what is actually used. Extract magic numbers into cpp macro definitions. --- test/unit/SFMT.c | 83 +++++++++++++++++++++++------------------------- 1 file changed, 40 insertions(+), 43 deletions(-) diff --git a/test/unit/SFMT.c b/test/unit/SFMT.c index a4759376..4805f8e4 100644 --- a/test/unit/SFMT.c +++ b/test/unit/SFMT.c @@ -35,9 +35,10 @@ */ #include "test/jemalloc_test.h" -#define BLOCK_SIZE 100000 -#define BLOCK_SIZE64 50000 -#define COUNT 1000 +#define BLOCK_SIZE 10000 +#define BLOCK_SIZE64 (BLOCK_SIZE / 2) +#define COUNT_1 1000 +#define COUNT_2 700 static const uint32_t init_gen_rand_32_expected[] = { 3440181298U, 1564997079U, 1510669302U, 2930277156U, 1452439940U, @@ -1450,23 +1451,22 @@ static const uint64_t init_by_array_64_expected[] = { TEST_BEGIN(test_gen_rand_32) { - uint64_t array1[BLOCK_SIZE / 4][2] JEMALLOC_ATTR(aligned(16)); - uint64_t array2[10000 / 4][2] JEMALLOC_ATTR(aligned(16)); + uint32_t array32[BLOCK_SIZE] JEMALLOC_ATTR(aligned(16)); + uint32_t array32_2[BLOCK_SIZE] JEMALLOC_ATTR(aligned(16)); int i; - uint32_t *array32 = (uint32_t *)array1; - uint32_t *array32_2 = (uint32_t *)array2; uint32_t r32; sfmt_t *ctx; - assert_d_le(get_min_array_size32(), 10000, "Array size too small"); + assert_d_le(get_min_array_size32(), BLOCK_SIZE, + "Array size too small"); ctx = init_gen_rand(1234); - fill_array32(ctx, array32, 10000); - fill_array32(ctx, array32_2, 10000); + fill_array32(ctx, array32, BLOCK_SIZE); + fill_array32(ctx, array32_2, BLOCK_SIZE); fini_gen_rand(ctx); ctx = init_gen_rand(1234); - for (i = 0; i < 10000; i++) { - if (i < 1000) { + for (i = 0; i < BLOCK_SIZE; i++) { + if (i < COUNT_1) { assert_u32_eq(array32[i], init_gen_rand_32_expected[i], "Output mismatch for i=%d", i); } @@ -1474,7 +1474,7 @@ TEST_BEGIN(test_gen_rand_32) assert_u32_eq(r32, array32[i], "Mismatch at array32[%d]=%x, gen=%x", i, array32[i], r32); } - for (i = 0; i < 700; i++) { + for (i = 0; i < COUNT_2; i++) { r32 = gen_rand32(ctx); assert_u32_eq(r32, array32_2[i], "Mismatch at array32_2[%d]=%x, gen=%x", i, array32_2[i], @@ -1486,24 +1486,23 @@ TEST_END TEST_BEGIN(test_by_array_32) { - uint64_t array1[BLOCK_SIZE / 4][2] JEMALLOC_ATTR(aligned(16)); - uint64_t array2[10000 / 4][2] JEMALLOC_ATTR(aligned(16)); + uint32_t array32[BLOCK_SIZE] JEMALLOC_ATTR(aligned(16)); + uint32_t array32_2[BLOCK_SIZE] JEMALLOC_ATTR(aligned(16)); int i; - uint32_t *array32 = (uint32_t *)array1; - uint32_t *array32_2 = (uint32_t *)array2; uint32_t ini[4] = {0x1234, 0x5678, 0x9abc, 0xdef0}; uint32_t r32; sfmt_t *ctx; - assert_d_le(get_min_array_size32(), 10000, "Array size too small"); + assert_d_le(get_min_array_size32(), BLOCK_SIZE, + "Array size too small"); ctx = init_by_array(ini, 4); - fill_array32(ctx, array32, 10000); - fill_array32(ctx, array32_2, 10000); + fill_array32(ctx, array32, BLOCK_SIZE); + fill_array32(ctx, array32_2, BLOCK_SIZE); fini_gen_rand(ctx); ctx = init_by_array(ini, 4); - for (i = 0; i < 10000; i++) { - if (i < 1000) { + for (i = 0; i < BLOCK_SIZE; i++) { + if (i < COUNT_1) { assert_u32_eq(array32[i], init_by_array_32_expected[i], "Output mismatch for i=%d", i); } @@ -1511,7 +1510,7 @@ TEST_BEGIN(test_by_array_32) assert_u32_eq(r32, array32[i], "Mismatch at array32[%d]=%x, gen=%x", i, array32[i], r32); } - for (i = 0; i < 700; i++) { + for (i = 0; i < COUNT_2; i++) { r32 = gen_rand32(ctx); assert_u32_eq(r32, array32_2[i], "Mismatch at array32_2[%d]=%x, gen=%x", i, array32_2[i], @@ -1523,23 +1522,22 @@ TEST_END TEST_BEGIN(test_gen_rand_64) { - uint64_t array1[BLOCK_SIZE / 4][2] JEMALLOC_ATTR(aligned(16)); - uint64_t array2[10000 / 4][2] JEMALLOC_ATTR(aligned(16)); + uint64_t array64[BLOCK_SIZE64] JEMALLOC_ATTR(aligned(16)); + uint64_t array64_2[BLOCK_SIZE64] JEMALLOC_ATTR(aligned(16)); int i; - uint64_t *array64 = (uint64_t *)array1; - uint64_t *array64_2 = (uint64_t *)array2; uint64_t r; sfmt_t *ctx; - assert_d_le(get_min_array_size64(), 5000, "Array size too small"); + assert_d_le(get_min_array_size64(), BLOCK_SIZE64, + "Array size too small"); ctx = init_gen_rand(4321); - fill_array64(ctx, array64, 5000); - fill_array64(ctx, array64_2, 5000); + fill_array64(ctx, array64, BLOCK_SIZE64); + fill_array64(ctx, array64_2, BLOCK_SIZE64); fini_gen_rand(ctx); ctx = init_gen_rand(4321); - for (i = 0; i < 5000; i++) { - if (i < 1000) { + for (i = 0; i < BLOCK_SIZE64; i++) { + if (i < COUNT_1) { assert_u64_eq(array64[i], init_gen_rand_64_expected[i], "Output mismatch for i=%d", i); } @@ -1548,7 +1546,7 @@ TEST_BEGIN(test_gen_rand_64) "Mismatch at array64[%d]=%"PRIx64", gen=%"PRIx64, i, array64[i], r); } - for (i = 0; i < 700; i++) { + for (i = 0; i < COUNT_2; i++) { r = gen_rand64(ctx); assert_u64_eq(r, array64_2[i], "Mismatch at array64_2[%d]=%"PRIx64" gen=%"PRIx64"", i, @@ -1560,24 +1558,23 @@ TEST_END TEST_BEGIN(test_by_array_64) { - uint64_t array1[BLOCK_SIZE / 4][2] JEMALLOC_ATTR(aligned(16)); - uint64_t array2[10000 / 4][2] JEMALLOC_ATTR(aligned(16)); + uint64_t array64[BLOCK_SIZE64] JEMALLOC_ATTR(aligned(16)); + uint64_t array64_2[BLOCK_SIZE64] JEMALLOC_ATTR(aligned(16)); int i; - uint64_t *array64 = (uint64_t *)array1; - uint64_t *array64_2 = (uint64_t *)array2; uint64_t r; uint32_t ini[] = {5, 4, 3, 2, 1}; sfmt_t *ctx; - assert_d_le(get_min_array_size64(), 5000, "Array size too small"); + assert_d_le(get_min_array_size64(), BLOCK_SIZE64, + "Array size too small"); ctx = init_by_array(ini, 5); - fill_array64(ctx, array64, 5000); - fill_array64(ctx, array64_2, 5000); + fill_array64(ctx, array64, BLOCK_SIZE64); + fill_array64(ctx, array64_2, BLOCK_SIZE64); fini_gen_rand(ctx); ctx = init_by_array(ini, 5); - for (i = 0; i < 5000; i++) { - if (i < 1000) { + for (i = 0; i < BLOCK_SIZE64; i++) { + if (i < COUNT_1) { assert_u64_eq(array64[i], init_by_array_64_expected[i], "Output mismatch for i=%d"); } @@ -1586,7 +1583,7 @@ TEST_BEGIN(test_by_array_64) "Mismatch at array64[%d]=%"PRIx64" gen=%"PRIx64, i, array64[i], r); } - for (i = 0; i < 700; i++) { + for (i = 0; i < COUNT_2; i++) { r = gen_rand64(ctx); assert_u64_eq(r, array64_2[i], "Mismatch at array64_2[%d]=%"PRIx64" gen=%"PRIx64, i, From 0f4f1efd94d33a4bbf766d3d4e7e349fa7c0d3b9 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 12 Dec 2013 14:41:02 -0800 Subject: [PATCH 0343/3378] Add mq (message queue) to test infrastructure. Add mtx (mutex) to test infrastructure, in order to avoid bootstrapping complications that would result from directly using malloc_mutex. Rename test infrastructure's thread abstraction from je_thread to thd. Fix some header ordering issues. --- Makefile.in | 6 +- include/jemalloc/internal/hash.h | 10 +- .../internal/jemalloc_internal_defs.h.in | 4 + include/jemalloc/internal/tsd.h | 2 +- test/include/test/jemalloc_test.h.in | 16 ++- test/include/test/jemalloc_test_defs.h.in | 2 + test/include/test/mq.h | 110 ++++++++++++++++++ test/include/test/mtx.h | 21 ++++ test/include/test/thd.h | 9 ++ test/include/test/thread.h | 9 -- test/integration/ALLOCM_ARENA.c | 8 +- test/integration/allocated.c | 10 +- test/integration/jemalloc_integration.h.in | 50 -------- test/integration/thread_arena.c | 8 +- test/integration/thread_tcache_enabled.c | 10 +- test/src/mtx.c | 62 ++++++++++ test/src/thd.c | 35 ++++++ test/src/thread.c | 35 ------ test/unit/mq.c | 91 +++++++++++++++ test/unit/mtx.c | 60 ++++++++++ test/unit/tsd.c | 14 +-- 21 files changed, 441 insertions(+), 131 deletions(-) create mode 100644 test/include/test/mq.h create mode 100644 test/include/test/mtx.h create mode 100644 test/include/test/thd.h delete mode 100644 test/include/test/thread.h delete mode 100644 test/integration/jemalloc_integration.h.in create mode 100644 test/src/mtx.c create mode 100644 test/src/thd.c delete mode 100644 test/src/thread.c create mode 100644 test/unit/mq.c create mode 100644 test/unit/mtx.c diff --git a/Makefile.in b/Makefile.in index 78554433..cd137fd9 100644 --- a/Makefile.in +++ b/Makefile.in @@ -103,10 +103,12 @@ DOCS_XML := $(objroot)doc/jemalloc$(install_suffix).xml DOCS_HTML := $(DOCS_XML:$(objroot)%.xml=$(srcroot)%.html) DOCS_MAN3 := $(DOCS_XML:$(objroot)%.xml=$(srcroot)%.3) DOCS := $(DOCS_HTML) $(DOCS_MAN3) -C_TESTLIB_SRCS := $(srcroot)test/src/math.c $(srcroot)test/src/SFMT.c \ - $(srcroot)test/src/test.c $(srcroot)test/src/thread.c +C_TESTLIB_SRCS := $(srcroot)test/src/math.c $(srcroot)test/src/mtx.c \ + $(srcroot)test/src/SFMT.c $(srcroot)test/src/test.c \ + $(srcroot)test/src/thd.c C_UTIL_INTEGRATION_SRCS := $(srcroot)src/util.c TESTS_UNIT := $(srcroot)test/unit/bitmap.c $(srcroot)test/unit/math.c \ + $(srcroot)test/unit/mq.c $(srcroot)test/unit/mtx.c \ $(srcroot)test/unit/SFMT.c $(srcroot)test/unit/tsd.c TESTS_INTEGRATION := $(srcroot)test/integration/aligned_alloc.c \ $(srcroot)test/integration/allocated.c \ diff --git a/include/jemalloc/internal/hash.h b/include/jemalloc/internal/hash.h index 56ecc793..6b8d9cd5 100644 --- a/include/jemalloc/internal/hash.h +++ b/include/jemalloc/internal/hash.h @@ -43,14 +43,14 @@ JEMALLOC_INLINE uint32_t hash_get_block_32(const uint32_t *p, int i) { - return p[i]; + return (p[i]); } JEMALLOC_INLINE uint64_t hash_get_block_64(const uint64_t *p, int i) { - return p[i]; + return (p[i]); } JEMALLOC_INLINE uint32_t @@ -63,7 +63,7 @@ hash_fmix_32(uint32_t h) h *= 0xc2b2ae35; h ^= h >> 16; - return h; + return (h); } JEMALLOC_INLINE uint64_t @@ -76,7 +76,7 @@ hash_fmix_64(uint64_t k) k *= QU(0xc4ceb9fe1a85ec53LLU); k ^= k >> 33; - return k; + return (k); } JEMALLOC_INLINE uint32_t @@ -127,7 +127,7 @@ hash_x86_32(const void *key, int len, uint32_t seed) h1 = hash_fmix_32(h1); - return h1; + return (h1); } UNUSED JEMALLOC_INLINE void diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in index 3b72b35f..752bb103 100644 --- a/include/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in @@ -1,3 +1,5 @@ +#ifndef JEMALLOC_INTERNAL_DEFS_H_ +#define JEMALLOC_INTERNAL_DEFS_H_ /* * If JEMALLOC_PREFIX is defined via --with-jemalloc-prefix, it will cause all * public APIs to be prefixed. This makes it possible, with some care, to use @@ -193,3 +195,5 @@ /* sizeof(intmax_t) == 2^LG_SIZEOF_INTMAX_T. */ #undef LG_SIZEOF_INTMAX_T + +#endif /* JEMALLOC_INTERNAL_DEFS_H_ */ diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h index 5f7ad1c5..9fb4a23e 100644 --- a/include/jemalloc/internal/tsd.h +++ b/include/jemalloc/internal/tsd.h @@ -348,7 +348,7 @@ a_name##_tsd_get_wrapper(void) \ wrapper = tsd_init_check_recursion( \ &a_name##_tsd_init_head, &block); \ if (wrapper) \ - return wrapper; \ + return (wrapper); \ wrapper = (a_name##_tsd_wrapper_t *) \ malloc_tsd_malloc(sizeof(a_name##_tsd_wrapper_t)); \ block.data = wrapper; \ diff --git a/test/include/test/jemalloc_test.h.in b/test/include/test/jemalloc_test.h.in index 9743cd52..9f7dfa46 100644 --- a/test/include/test/jemalloc_test.h.in +++ b/test/include/test/jemalloc_test.h.in @@ -1,8 +1,10 @@ #include +#include #include -#include +#include #include #include +#include #ifdef _WIN32 # include @@ -45,6 +47,10 @@ #include "test/jemalloc_test_defs.h" +#ifdef JEMALLOC_OSSPIN +# include +#endif + #if defined(HAVE_ALTIVEC) && !defined(__APPLE__) # include #endif @@ -75,13 +81,13 @@ # define JEMALLOC_N(n) @private_namespace@##n # include "jemalloc/internal/private_namespace.h" -# include -# include # define JEMALLOC_H_TYPES # define JEMALLOC_H_STRUCTS # define JEMALLOC_H_EXTERNS # define JEMALLOC_H_INLINES # include "jemalloc/internal/util.h" +# include "jemalloc/internal/qr.h" +# include "jemalloc/internal/ql.h" # undef JEMALLOC_H_TYPES # undef JEMALLOC_H_STRUCTS # undef JEMALLOC_H_EXTERNS @@ -124,7 +130,9 @@ * Common test utilities. */ #include "test/math.h" +#include "test/mtx.h" +#include "test/mq.h" #include "test/test.h" -#include "test/thread.h" +#include "test/thd.h" #define MEXP 19937 #include "test/SFMT.h" diff --git a/test/include/test/jemalloc_test_defs.h.in b/test/include/test/jemalloc_test_defs.h.in index 093e2f23..18a9773d 100644 --- a/test/include/test/jemalloc_test_defs.h.in +++ b/test/include/test/jemalloc_test_defs.h.in @@ -1,3 +1,5 @@ +#include "jemalloc/internal/jemalloc_internal_defs.h" + /* For use by SFMT. */ #undef HAVE_SSE2 #undef HAVE_ALTIVEC diff --git a/test/include/test/mq.h b/test/include/test/mq.h new file mode 100644 index 00000000..11188653 --- /dev/null +++ b/test/include/test/mq.h @@ -0,0 +1,110 @@ +/* + * Simple templated message queue implementation that relies on only mutexes for + * synchronization (which reduces portability issues). Given the following + * setup: + * + * typedef struct mq_msg_s mq_msg_t; + * struct mq_msg_s { + * mq_msg(mq_msg_t) link; + * [message data] + * }; + * mq_gen(, mq_, mq_t, mq_msg_t, link) + * + * The API is as follows: + * + * bool mq_init(mq_t *mq); + * void mq_fini(mq_t *mq); + * unsigned mq_count(mq_t *mq); + * mq_msg_t *mq_tryget(mq_t *mq); + * mq_msg_t *mq_get(mq_t *mq); + * void mq_put(mq_t *mq, mq_msg_t *msg); + * + * The message queue linkage embedded in each message is to be treated as + * externally opaque (no need to initialize or clean up externally). mq_fini() + * does not perform any cleanup of messages, since it knows nothing of their + * payloads. + */ +#define mq_msg(a_mq_msg_type) ql_elm(a_mq_msg_type) + +#define mq_gen(a_attr, a_prefix, a_mq_type, a_mq_msg_type, a_field) \ +typedef struct { \ + mtx_t lock; \ + ql_head(a_mq_msg_type) msgs; \ + unsigned count; \ +} a_mq_type; \ +a_attr bool \ +a_prefix##init(a_mq_type *mq) { \ + \ + if (mtx_init(&mq->lock)) \ + return (true); \ + ql_new(&mq->msgs); \ + mq->count = 0; \ + return (false); \ +} \ +a_attr void \ +a_prefix##fini(a_mq_type *mq) \ +{ \ + \ + mtx_fini(&mq->lock); \ +} \ +a_attr unsigned \ +a_prefix##count(a_mq_type *mq) \ +{ \ + unsigned count; \ + \ + mtx_lock(&mq->lock); \ + count = mq->count; \ + mtx_unlock(&mq->lock); \ + return (count); \ +} \ +a_attr a_mq_msg_type * \ +a_prefix##tryget(a_mq_type *mq) \ +{ \ + a_mq_msg_type *msg; \ + \ + mtx_lock(&mq->lock); \ + msg = ql_first(&mq->msgs); \ + if (msg != NULL) { \ + ql_head_remove(&mq->msgs, a_mq_msg_type, a_field); \ + mq->count--; \ + } \ + mtx_unlock(&mq->lock); \ + return (msg); \ +} \ +a_attr a_mq_msg_type * \ +a_prefix##get(a_mq_type *mq) \ +{ \ + a_mq_msg_type *msg; \ + struct timespec timeout; \ + \ + msg = a_prefix##tryget(mq); \ + if (msg != NULL) \ + return (msg); \ + \ + timeout.tv_sec = 0; \ + timeout.tv_nsec = 1; \ + while (true) { \ + nanosleep(&timeout, NULL); \ + msg = a_prefix##tryget(mq); \ + if (msg != NULL) \ + return (msg); \ + if (timeout.tv_sec == 0) { \ + /* Double sleep time, up to max 1 second. */ \ + timeout.tv_nsec <<= 1; \ + if (timeout.tv_nsec >= 1000*1000*1000) { \ + timeout.tv_sec = 1; \ + timeout.tv_nsec = 0; \ + } \ + } \ + } \ +} \ +a_attr void \ +a_prefix##put(a_mq_type *mq, a_mq_msg_type *msg) \ +{ \ + \ + mtx_lock(&mq->lock); \ + ql_elm_new(msg, a_field); \ + ql_tail_insert(&mq->msgs, msg, a_field); \ + mq->count++; \ + mtx_unlock(&mq->lock); \ +} diff --git a/test/include/test/mtx.h b/test/include/test/mtx.h new file mode 100644 index 00000000..bbe822f5 --- /dev/null +++ b/test/include/test/mtx.h @@ -0,0 +1,21 @@ +/* + * mtx is a slightly simplified version of malloc_mutex. This code duplication + * is unfortunate, but there are allocator bootstrapping considerations that + * would leak into the test infrastructure if malloc_mutex were used directly + * in tests. + */ + +typedef struct { +#ifdef _WIN32 + CRITICAL_SECTION lock; +#elif (defined(JEMALLOC_OSSPIN)) + OSSpinLock lock; +#else + pthread_mutex_t lock; +#endif +} mtx_t; + +bool mtx_init(mtx_t *mtx); +void mtx_fini(mtx_t *mtx); +void mtx_lock(mtx_t *mtx); +void mtx_unlock(mtx_t *mtx); diff --git a/test/include/test/thd.h b/test/include/test/thd.h new file mode 100644 index 00000000..f941d7a7 --- /dev/null +++ b/test/include/test/thd.h @@ -0,0 +1,9 @@ +/* Abstraction layer for threading in tests */ +#ifdef _WIN32 +typedef HANDLE thd_t; +#else +typedef pthread_t thd_t; +#endif + +void thd_create(thd_t *thd, void *(*proc)(void *), void *arg); +void thd_join(thd_t thd, void **ret); diff --git a/test/include/test/thread.h b/test/include/test/thread.h deleted file mode 100644 index 3b02a011..00000000 --- a/test/include/test/thread.h +++ /dev/null @@ -1,9 +0,0 @@ -/* Abstraction layer for threading in tests */ -#ifdef _WIN32 -typedef HANDLE je_thread_t; -#else -typedef pthread_t je_thread_t; -#endif - -void je_thread_create(je_thread_t *thread, void *(*proc)(void *), void *arg); -void je_thread_join(je_thread_t thread, void **ret); diff --git a/test/integration/ALLOCM_ARENA.c b/test/integration/ALLOCM_ARENA.c index ec91c59b..5bf3c4ab 100644 --- a/test/integration/ALLOCM_ARENA.c +++ b/test/integration/ALLOCM_ARENA.c @@ -3,7 +3,7 @@ #define NTHREADS 10 void * -je_thread_start(void *arg) +thd_start(void *arg) { unsigned thread_ind = (unsigned)(uintptr_t)arg; unsigned arena_ind; @@ -36,16 +36,16 @@ je_thread_start(void *arg) TEST_BEGIN(test_ALLOCM_ARENA) { - je_thread_t threads[NTHREADS]; + thd_t thds[NTHREADS]; unsigned i; for (i = 0; i < NTHREADS; i++) { - je_thread_create(&threads[i], je_thread_start, + thd_create(&thds[i], thd_start, (void *)(uintptr_t)i); } for (i = 0; i < NTHREADS; i++) - je_thread_join(threads[i], NULL); + thd_join(thds[i], NULL); } TEST_END diff --git a/test/integration/allocated.c b/test/integration/allocated.c index 156451dc..3630e80c 100644 --- a/test/integration/allocated.c +++ b/test/integration/allocated.c @@ -9,7 +9,7 @@ static const bool config_stats = ; void * -je_thread_start(void *arg) +thd_start(void *arg) { int err; void *p; @@ -98,16 +98,16 @@ label_ENOENT: TEST_BEGIN(test_main_thread) { - je_thread_start(NULL); + thd_start(NULL); } TEST_END TEST_BEGIN(test_subthread) { - je_thread_t thread; + thd_t thd; - je_thread_create(&thread, je_thread_start, NULL); - je_thread_join(thread, NULL); + thd_create(&thd, thd_start, NULL); + thd_join(thd, NULL); } TEST_END diff --git a/test/integration/jemalloc_integration.h.in b/test/integration/jemalloc_integration.h.in deleted file mode 100644 index 4730aab1..00000000 --- a/test/integration/jemalloc_integration.h.in +++ /dev/null @@ -1,50 +0,0 @@ -/* - * This header should be included by tests, rather than directly including - * jemalloc/jemalloc.h, because --with-install-suffix may cause the header to - * have a different name. - */ -#include "jemalloc/jemalloc@install_suffix@.h" -#include "jemalloc/internal/jemalloc_internal.h" - -/* Abstraction layer for threading in tests. */ -#ifdef _WIN32 -#include - -typedef HANDLE je_thread_t; - -void -je_thread_create(je_thread_t *thread, void *(*proc)(void *), void *arg) -{ - LPTHREAD_START_ROUTINE routine = (LPTHREAD_START_ROUTINE)proc; - *thread = CreateThread(NULL, 0, routine, arg, 0, NULL); - if (*thread == NULL) - test_fail("Error in CreateThread()\n"); -} - -void -je_thread_join(je_thread_t thread, void **ret) -{ - - WaitForSingleObject(thread, INFINITE); -} - -#else -#include - -typedef pthread_t je_thread_t; - -void -je_thread_create(je_thread_t *thread, void *(*proc)(void *), void *arg) -{ - - if (pthread_create(thread, NULL, proc, arg) != 0) - test_fail("Error in pthread_create()\n"); -} - -void -je_thread_join(je_thread_t thread, void **ret) -{ - - pthread_join(thread, ret); -} -#endif diff --git a/test/integration/thread_arena.c b/test/integration/thread_arena.c index cf0aad73..67be5351 100644 --- a/test/integration/thread_arena.c +++ b/test/integration/thread_arena.c @@ -3,7 +3,7 @@ #define NTHREADS 10 void * -je_thread_start(void *arg) +thd_start(void *arg) { unsigned main_arena_ind = *(unsigned *)arg; void *p; @@ -43,7 +43,7 @@ TEST_BEGIN(test_thread_arena) unsigned arena_ind; size_t size; int err; - je_thread_t threads[NTHREADS]; + thd_t thds[NTHREADS]; unsigned i; p = malloc(1); @@ -58,13 +58,13 @@ TEST_BEGIN(test_thread_arena) } for (i = 0; i < NTHREADS; i++) { - je_thread_create(&threads[i], je_thread_start, + thd_create(&thds[i], thd_start, (void *)&arena_ind); } for (i = 0; i < NTHREADS; i++) { intptr_t join_ret; - je_thread_join(threads[i], (void *)&join_ret); + thd_join(thds[i], (void *)&join_ret); assert_zd_eq(join_ret, 0, "Unexpected thread join error"); } } diff --git a/test/integration/thread_tcache_enabled.c b/test/integration/thread_tcache_enabled.c index 52e8a11c..f4e89c68 100644 --- a/test/integration/thread_tcache_enabled.c +++ b/test/integration/thread_tcache_enabled.c @@ -9,7 +9,7 @@ static const bool config_tcache = ; void * -je_thread_start(void *arg) +thd_start(void *arg) { int err; size_t sz; @@ -86,16 +86,16 @@ label_ENOENT: TEST_BEGIN(test_main_thread) { - je_thread_start(NULL); + thd_start(NULL); } TEST_END TEST_BEGIN(test_subthread) { - je_thread_t thread; + thd_t thd; - je_thread_create(&thread, je_thread_start, NULL); - je_thread_join(thread, NULL); + thd_create(&thd, thd_start, NULL); + thd_join(thd, NULL); } TEST_END diff --git a/test/src/mtx.c b/test/src/mtx.c new file mode 100644 index 00000000..8d9e0ca1 --- /dev/null +++ b/test/src/mtx.c @@ -0,0 +1,62 @@ +#include "test/jemalloc_test.h" + +bool +mtx_init(mtx_t *mtx) +{ + +#ifdef _WIN32 + if (!InitializeCriticalSectionAndSpinCount(&mtx->lock, _CRT_SPINCOUNT)) + return (true); +#elif (defined(JEMALLOC_OSSPIN)) + mtx->lock = 0; +#else + pthread_mutexattr_t attr; + + if (pthread_mutexattr_init(&attr) != 0) + return (true); + pthread_mutexattr_settype(&attr, MALLOC_MUTEX_TYPE); + if (pthread_mutex_init(&mtx->lock, &attr) != 0) { + pthread_mutexattr_destroy(&attr); + return (true); + } + pthread_mutexattr_destroy(&attr); +#endif + return (false); +} + +void +mtx_fini(mtx_t *mtx) +{ + +#ifdef _WIN32 +#elif (defined(JEMALLOC_OSSPIN)) +#else + pthread_mutex_destroy(&mtx->lock); +#endif +} + +void +mtx_lock(mtx_t *mtx) +{ + +#ifdef _WIN32 + EnterCriticalSection(&mtx->lock); +#elif (defined(JEMALLOC_OSSPIN)) + OSSpinLockLock(&mtx->lock); +#else + pthread_mutex_lock(&mtx->lock); +#endif +} + +void +mtx_unlock(mtx_t *mtx) +{ + +#ifdef _WIN32 + LeaveCriticalSection(&mtx->lock); +#elif (defined(JEMALLOC_OSSPIN)) + OSSpinLockUnlock(&mtx->lock); +#else + pthread_mutex_unlock(&mtx->lock); +#endif +} diff --git a/test/src/thd.c b/test/src/thd.c new file mode 100644 index 00000000..233242a1 --- /dev/null +++ b/test/src/thd.c @@ -0,0 +1,35 @@ +#include "test/jemalloc_test.h" + +#ifdef _WIN32 +void +thd_create(thd_t *thd, void *(*proc)(void *), void *arg) +{ + LPTHREAD_START_ROUTINE routine = (LPTHREAD_START_ROUTINE)proc; + *thd = CreateThread(NULL, 0, routine, arg, 0, NULL); + if (*thd == NULL) + test_fail("Error in CreateThread()\n"); +} + +void +thd_join(thd_t thd, void **ret) +{ + + WaitForSingleObject(thd, INFINITE); +} + +#else +void +thd_create(thd_t *thd, void *(*proc)(void *), void *arg) +{ + + if (pthread_create(thd, NULL, proc, arg) != 0) + test_fail("Error in pthread_create()\n"); +} + +void +thd_join(thd_t thd, void **ret) +{ + + pthread_join(thd, ret); +} +#endif diff --git a/test/src/thread.c b/test/src/thread.c deleted file mode 100644 index 5a91e27e..00000000 --- a/test/src/thread.c +++ /dev/null @@ -1,35 +0,0 @@ -#include "test/jemalloc_test.h" - -#ifdef _WIN32 -void -je_thread_create(je_thread_t *thread, void *(*proc)(void *), void *arg) -{ - LPTHREAD_START_ROUTINE routine = (LPTHREAD_START_ROUTINE)proc; - *thread = CreateThread(NULL, 0, routine, arg, 0, NULL); - if (*thread == NULL) - test_fail("Error in CreateThread()\n"); -} - -void -je_thread_join(je_thread_t thread, void **ret) -{ - - WaitForSingleObject(thread, INFINITE); -} - -#else -void -je_thread_create(je_thread_t *thread, void *(*proc)(void *), void *arg) -{ - - if (pthread_create(thread, NULL, proc, arg) != 0) - test_fail("Error in pthread_create()\n"); -} - -void -je_thread_join(je_thread_t thread, void **ret) -{ - - pthread_join(thread, ret); -} -#endif diff --git a/test/unit/mq.c b/test/unit/mq.c new file mode 100644 index 00000000..f0f49905 --- /dev/null +++ b/test/unit/mq.c @@ -0,0 +1,91 @@ +#include "test/jemalloc_test.h" + +#define NSENDERS 3 +#define NMSGS 100000 + +typedef struct mq_msg_s mq_msg_t; +struct mq_msg_s { + mq_msg(mq_msg_t) link; +}; +mq_gen(static, mq_, mq_t, mq_msg_t, link) + +TEST_BEGIN(test_mq_basic) +{ + mq_t mq; + mq_msg_t msg; + + assert_false(mq_init(&mq), "Unexpected mq_init() failure"); + assert_u_eq(mq_count(&mq), 0, "mq should be empty"); + assert_ptr_null(mq_tryget(&mq), + "mq_tryget() should fail when the queue is empty"); + + mq_put(&mq, &msg); + assert_u_eq(mq_count(&mq), 1, "mq should contain one message"); + assert_ptr_eq(mq_tryget(&mq), &msg, "mq_tryget() should return msg"); + + mq_put(&mq, &msg); + assert_ptr_eq(mq_get(&mq), &msg, "mq_get() should return msg"); + + mq_fini(&mq); +} +TEST_END + +static void * +thd_receiver_start(void *arg) +{ + mq_t *mq = (mq_t *)arg; + unsigned i; + + for (i = 0; i < (NSENDERS * NMSGS); i++) { + mq_msg_t *msg = mq_get(mq); + assert_ptr_not_null(msg, "mq_get() should never return NULL"); + assert_d_eq(jet_dallocm(msg, 0), ALLOCM_SUCCESS, + "Unexpected dallocm() failure"); + } + return (NULL); +} + +static void * +thd_sender_start(void *arg) +{ + mq_t *mq = (mq_t *)arg; + unsigned i; + + for (i = 0; i < NMSGS; i++) { + mq_msg_t *msg; + assert_d_eq(jet_allocm((void **)&msg, NULL, sizeof(mq_msg_t), + 0), ALLOCM_SUCCESS, "Unexpected allocm() failure"); + mq_put(mq, msg); + } + return (NULL); +} + +TEST_BEGIN(test_mq_threaded) +{ + mq_t mq; + thd_t receiver; + thd_t senders[NSENDERS]; + unsigned i; + + assert_false(mq_init(&mq), "Unexpected mq_init() failure"); + + thd_create(&receiver, thd_receiver_start, (void *)&mq); + for (i = 0; i < NSENDERS; i++) + thd_create(&senders[i], thd_sender_start, (void *)&mq); + + thd_join(receiver, NULL); + for (i = 0; i < NSENDERS; i++) + thd_join(senders[i], NULL); + + mq_fini(&mq); +} +TEST_END + +int +main(void) +{ + return (test( + test_mq_basic, + test_mq_threaded)); +} + diff --git a/test/unit/mtx.c b/test/unit/mtx.c new file mode 100644 index 00000000..96ff6948 --- /dev/null +++ b/test/unit/mtx.c @@ -0,0 +1,60 @@ +#include "test/jemalloc_test.h" + +#define NTHREADS 2 +#define NINCRS 2000000 + +TEST_BEGIN(test_mtx_basic) +{ + mtx_t mtx; + + assert_false(mtx_init(&mtx), "Unexpected mtx_init() failure"); + mtx_lock(&mtx); + mtx_unlock(&mtx); + mtx_fini(&mtx); +} +TEST_END + +typedef struct { + mtx_t mtx; + unsigned x; +} thd_start_arg_t; + +static void * +thd_start(void *varg) +{ + thd_start_arg_t *arg = (thd_start_arg_t *)varg; + unsigned i; + + for (i = 0; i < NINCRS; i++) { + mtx_lock(&arg->mtx); + arg->x++; + mtx_unlock(&arg->mtx); + } + return (NULL); +} + +TEST_BEGIN(test_mtx_race) +{ + thd_start_arg_t arg; + thd_t thds[NTHREADS]; + unsigned i; + + assert_false(mtx_init(&arg.mtx), "Unexpected mtx_init() failure"); + arg.x = 0; + for (i = 0; i < NTHREADS; i++) + thd_create(&thds[i], thd_start, (void *)&arg); + for (i = 0; i < NTHREADS; i++) + thd_join(thds[i], NULL); + assert_u_eq(arg.x, NTHREADS * NINCRS, + "Race-related counter corruption"); +} +TEST_END + +int +main(void) +{ + + return (test( + test_mtx_basic, + test_mtx_race)); +} diff --git a/test/unit/tsd.c b/test/unit/tsd.c index 71feb847..f421c1a3 100644 --- a/test/unit/tsd.c +++ b/test/unit/tsd.c @@ -22,8 +22,8 @@ malloc_tsd_externs(data, data_t) malloc_tsd_data(, data, data_t, DATA_INIT) malloc_tsd_funcs(, data, data_t, DATA_INIT, data_cleanup) -void * -je_thread_start(void *arg) +static void * +thd_start(void *arg) { data_t d = (data_t)(uintptr_t)arg; assert_x_eq(*data_tsd_get(), DATA_INIT, @@ -37,23 +37,23 @@ je_thread_start(void *arg) assert_x_eq(*data_tsd_get(), (data_t)(uintptr_t)arg, "Resetting local data should have no effect on tsd"); - return NULL; + return (NULL); } TEST_BEGIN(test_tsd_main_thread) { - je_thread_start((void *) 0xa5f3e329); + thd_start((void *) 0xa5f3e329); } TEST_END TEST_BEGIN(test_tsd_sub_thread) { - je_thread_t thread; + thd_t thd; data_cleanup_executed = false; - je_thread_create(&thread, je_thread_start, (void *) THREAD_DATA); - je_thread_join(thread, NULL); + thd_create(&thd, thd_start, (void *)THREAD_DATA); + thd_join(thd, NULL); assert_true(data_cleanup_executed, "Cleanup function should have executed"); } From 00a9cc7b6dd04cf8f2e4406cf5262dd8ded315c9 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 12 Dec 2013 14:58:26 -0800 Subject: [PATCH 0344/3378] Streamline test output. --- test/src/test.c | 12 +++++++----- test/test.sh.in | 17 +++++++++-------- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/test/src/test.c b/test/src/test.c index eb1f5ef9..74eb9dce 100644 --- a/test/src/test.c +++ b/test/src/test.c @@ -72,11 +72,13 @@ p_test(test_t* t, ...) } va_end(ap); - malloc_printf("tests: %u, pass: %u, skip: %u, fail: %u\n", - test_count, - test_counts[test_status_pass], - test_counts[test_status_skip], - test_counts[test_status_fail]); + malloc_printf("--- %s: %u/%u, %s: %u/%u, %s: %u/%u ---\n", + test_status_string(test_status_pass), + test_counts[test_status_pass], test_count, + test_status_string(test_status_skip), + test_counts[test_status_skip], test_count, + test_status_string(test_status_fail), + test_counts[test_status_fail], test_count); return (ret); } diff --git a/test/test.sh.in b/test/test.sh.in index 93c1978f..a39f99f6 100644 --- a/test/test.sh.in +++ b/test/test.sh.in @@ -16,16 +16,16 @@ pass_code=0 skip_code=1 fail_code=2 -echo "================================================================================" pass_count=0 skip_count=0 fail_count=0 for t in $@; do - echo "${t}:" - ${t}@exe@ @abs_srcroot@ @abs_objroot@ > @objroot@${t}.out 2>&1 + if [ $pass_count -ne 0 -o $skip_count -ne 0 -o $fail_count != 0 ] ; then + echo + fi + echo "=== ${t} ===" + ${t}@exe@ @abs_srcroot@ @abs_objroot@ result_code=$? - /bin/echo -n " " - tail -n 1 @objroot@${t}.out case ${result_code} in ${pass_code}) pass_count=$((pass_count+1)) @@ -35,15 +35,16 @@ for t in $@; do ;; ${fail_code}) fail_count=$((fail_count+1)) - echo " *** ${t} failure; see @objroot@${t}.out for full output ***" 1>&2 ;; *) echo "Test harness error" 1>&2 exit 1 esac done -echo "================================================================================" -echo "Test suite summary: pass: ${pass_count}, skip: ${skip_count}, fail: ${fail_count}" + +total_count=`expr ${pass_count} + ${skip_count} + ${fail_count}` +echo +echo "Test suite summary: pass: ${pass_count}/${total_count}, skip: ${skip_count}/${total_count}, fail: ${fail_count}/${total_count}" if [ ${fail_count} -eq 0 ] ; then exit 0 From dfecadf4b28da0d582357b5a5180b1cc8d57c748 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 12 Dec 2013 15:05:24 -0800 Subject: [PATCH 0345/3378] Fix a strict aliasing violation. --- test/src/SFMT.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/test/src/SFMT.c b/test/src/SFMT.c index 9fade282..433d7f6e 100644 --- a/test/src/SFMT.c +++ b/test/src/SFMT.c @@ -602,13 +602,15 @@ void fill_array64(sfmt_t *ctx, uint64_t *array, int size) { * @param seed a 32-bit integer used as the seed. */ sfmt_t *init_gen_rand(uint32_t seed) { + void *p; sfmt_t *ctx; int i; uint32_t *psfmt32; - if (posix_memalign((void **)&ctx, sizeof(w128_t), sizeof(sfmt_t)) != 0) { + if (posix_memalign(&p, sizeof(w128_t), sizeof(sfmt_t)) != 0) { return NULL; } + ctx = (sfmt_t *)p; psfmt32 = &ctx->sfmt[0].u[0]; psfmt32[idxof(0)] = seed; @@ -631,6 +633,7 @@ sfmt_t *init_gen_rand(uint32_t seed) { * @param key_length the length of init_key. */ sfmt_t *init_by_array(uint32_t *init_key, int key_length) { + void *p; sfmt_t *ctx; int i, j, count; uint32_t r; @@ -639,9 +642,10 @@ sfmt_t *init_by_array(uint32_t *init_key, int key_length) { int size = N * 4; uint32_t *psfmt32; - if (posix_memalign((void **)&ctx, sizeof(w128_t), sizeof(sfmt_t)) != 0) { + if (posix_memalign(&p, sizeof(w128_t), sizeof(sfmt_t)) != 0) { return NULL; } + ctx = (sfmt_t *)p; psfmt32 = &ctx->sfmt[0].u[0]; if (size >= 623) { From a2be4779b19a491a5686cfda067f98d7d70a9056 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 12 Dec 2013 15:14:51 -0800 Subject: [PATCH 0346/3378] Fix a malloc_mutex dependency in mtx. --- test/src/mtx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/src/mtx.c b/test/src/mtx.c index 8d9e0ca1..41b95d59 100644 --- a/test/src/mtx.c +++ b/test/src/mtx.c @@ -14,7 +14,7 @@ mtx_init(mtx_t *mtx) if (pthread_mutexattr_init(&attr) != 0) return (true); - pthread_mutexattr_settype(&attr, MALLOC_MUTEX_TYPE); + pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_DEFAULT); if (pthread_mutex_init(&mtx->lock, &attr) != 0) { pthread_mutexattr_destroy(&attr); return (true); From 0ac396a06a10f8a8c1d41c8771367625e7d49d07 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 12 Dec 2013 15:20:49 -0800 Subject: [PATCH 0347/3378] Fix a strict aliasing violation. --- test/unit/mq.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/test/unit/mq.c b/test/unit/mq.c index f0f49905..01e72fd1 100644 --- a/test/unit/mq.c +++ b/test/unit/mq.c @@ -53,8 +53,10 @@ thd_sender_start(void *arg) for (i = 0; i < NMSGS; i++) { mq_msg_t *msg; - assert_d_eq(jet_allocm((void **)&msg, NULL, sizeof(mq_msg_t), - 0), ALLOCM_SUCCESS, "Unexpected allocm() failure"); + void *p; + assert_d_eq(jet_allocm(&p, NULL, sizeof(mq_msg_t), 0), + ALLOCM_SUCCESS, "Unexpected allocm() failure"); + msg = (mq_msg_t *)p; mq_put(mq, msg); } return (NULL); From d82a5e6a34f20698ab9368bb2b4953b81d175552 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 12 Dec 2013 22:35:52 -0800 Subject: [PATCH 0348/3378] Implement the *allocx() API. Implement the *allocx() API, which is a successor to the *allocm() API. The *allocx() functions are slightly simpler to use because they have fewer parameters, they directly return the results of primary interest, and mallocx()/rallocx() avoid the strict aliasing pitfall that allocm()/rallocx() share with posix_memalign(). The following code violates strict aliasing rules: foo_t *foo; allocm((void **)&foo, NULL, 42, 0); whereas the following is safe: foo_t *foo; void *p; allocm(&p, NULL, 42, 0); foo = (foo_t *)p; mallocx() does not have this problem: foo_t *foo = (foo_t *)mallocx(42, 0); --- Makefile.in | 7 +- configure.ac | 2 +- doc/jemalloc.xml.in | 248 +++++-- .../jemalloc/internal/jemalloc_internal.h.in | 45 +- include/jemalloc/internal/private_symbols.txt | 12 +- include/jemalloc/internal/public_symbols.txt | 10 +- include/jemalloc/jemalloc_macros.h.in | 21 +- include/jemalloc/jemalloc_mangle.h.in | 16 +- include/jemalloc/jemalloc_protos.h.in | 30 +- src/arena.c | 6 +- src/huge.c | 2 +- src/jemalloc.c | 646 +++++++++++------- src/tcache.c | 4 +- src/tsd.c | 2 +- test/integration/mallocx.c | 149 ++++ test/integration/rallocm.c | 2 - test/integration/rallocx.c | 51 ++ test/integration/xallocx.c | 59 ++ test/unit/mq.c | 7 +- 19 files changed, 954 insertions(+), 365 deletions(-) create mode 100644 test/integration/mallocx.c create mode 100644 test/integration/rallocx.c create mode 100644 test/integration/xallocx.c diff --git a/Makefile.in b/Makefile.in index cd137fd9..af60a21c 100644 --- a/Makefile.in +++ b/Makefile.in @@ -112,13 +112,16 @@ TESTS_UNIT := $(srcroot)test/unit/bitmap.c $(srcroot)test/unit/math.c \ $(srcroot)test/unit/SFMT.c $(srcroot)test/unit/tsd.c TESTS_INTEGRATION := $(srcroot)test/integration/aligned_alloc.c \ $(srcroot)test/integration/allocated.c \ - $(srcroot)test/integration/ALLOCM_ARENA.c \ + $(srcroot)test/integration/mallocx.c \ $(srcroot)test/integration/mremap.c \ $(srcroot)test/integration/posix_memalign.c \ + $(srcroot)test/integration/rallocx.c \ $(srcroot)test/integration/thread_arena.c \ - $(srcroot)test/integration/thread_tcache_enabled.c + $(srcroot)test/integration/thread_tcache_enabled.c \ + $(srcroot)test/integration/xallocx.c ifeq ($(enable_experimental), 1) TESTS_INTEGRATION += $(srcroot)test/integration/allocm.c \ + $(srcroot)test/integration/ALLOCM_ARENA.c \ $(srcroot)test/integration/rallocm.c endif TESTS_STRESS := diff --git a/configure.ac b/configure.ac index 02842b63..724bc1a2 100644 --- a/configure.ac +++ b/configure.ac @@ -417,7 +417,7 @@ AC_PROG_RANLIB AC_PATH_PROG([LD], [ld], [false], [$PATH]) AC_PATH_PROG([AUTOCONF], [autoconf], [false], [$PATH]) -public_syms="malloc_conf malloc_message malloc calloc posix_memalign aligned_alloc realloc free malloc_usable_size malloc_stats_print mallctl mallctlnametomib mallctlbymib" +public_syms="malloc_conf malloc_message malloc calloc posix_memalign aligned_alloc realloc free mallocx rallocx xallocx sallocx dallocx nallocx mallctl mallctlnametomib mallctlbymib malloc_stats_print malloc_usable_size" dnl Check for allocator-related functions that should be wrapped. AC_CHECK_FUNC([memalign], diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 596f6458..d6f72722 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -33,11 +33,17 @@ aligned_alloc realloc free - malloc_usable_size - malloc_stats_print + mallocx + rallocx + xallocx + sallocx + dallocx + nallocx mallctl mallctlnametomib mallctlbymib + malloc_stats_print + malloc_usable_size allocm rallocm sallocm @@ -92,16 +98,37 @@ Non-standard API - size_t malloc_usable_size - const void *ptr + void *mallocx + size_t size + int flags - void malloc_stats_print - void (*write_cb) - void *, const char * - - void *cbopaque - const char *opts + void *rallocx + void *ptr + size_t size + int flags + + + size_t xallocx + void *ptr + size_t size + size_t extra + int flags + + + size_t sallocx + void *ptr + int flags + + + void dallocx + void *ptr + int flags + + + size_t nallocx + size_t size + int flags int mallctl @@ -126,6 +153,18 @@ void *newp size_t newlen + + void malloc_stats_print + void (*write_cb) + void *, const char * + + void *cbopaque + const char *opts + + + size_t malloc_usable_size + const void *ptr + void (*malloc_message) void *cbopaque @@ -225,41 +264,99 @@ Non-standard API + The mallocx, + rallocx, + xallocx, + sallocx, + dallocx, and + nallocx functions all have a + flags argument that can be used to specify + options. The functions only check the options that are contextually + relevant. Use bitwise or (|) operations to + specify one or more of the following: + + + MALLOCX_LG_ALIGN(la) + - The malloc_usable_size function - returns the usable size of the allocation pointed to by - ptr. The return value may be larger than the size - that was requested during allocation. The - malloc_usable_size function is not a - mechanism for in-place realloc; rather - it is provided solely as a tool for introspection purposes. Any - discrepancy between the requested allocation size and the size reported - by malloc_usable_size should not be - depended on, since such behavior is entirely implementation-dependent. + Align the memory allocation to start at an address + that is a multiple of (1 << + la). This macro does not validate + that la is within the valid + range. + + + MALLOCX_ALIGN(a) + + + Align the memory allocation to start at an address + that is a multiple of a, where + a is a power of two. This macro does not + validate that a is a power of 2. + + + + MALLOCX_ZERO + + Initialize newly allocated memory to contain zero + bytes. In the growing reallocation case, the real size prior to + reallocation defines the boundary between untouched bytes and those + that are initialized to contain zero bytes. If this macro is + absent, newly allocated memory is uninitialized. + + + MALLOCX_ARENA(a) + + + Use the arena specified by the index + a (and by necessity bypass the thread + cache). This macro has no effect for huge regions, nor for regions + that were allocated via an arena other than the one specified. + This macro does not validate that a + specifies an arena index in the valid range. + + - The malloc_stats_print function - writes human-readable summary statistics via the - write_cb callback function pointer and - cbopaque data passed to - write_cb, or - malloc_message if - write_cb is NULL. This - function can be called repeatedly. General information that never - changes during execution can be omitted by specifying "g" as a character - within the opts string. Note that - malloc_message uses the - mallctl* functions internally, so - inconsistent statistics can be reported if multiple threads use these - functions simultaneously. If is - specified during configuration, “m” and “a” can - be specified to omit merged arena and per arena statistics, respectively; - “b” and “l” can be specified to omit per size - class statistics for bins and large objects, respectively. Unrecognized - characters are silently ignored. Note that thread caching may prevent - some statistics from being completely up to date, since extra locking - would be required to merge counters that track thread cache operations. - + The mallocx function allocates at + least size bytes of memory, and returns a pointer + to the base address of the allocation. Behavior is undefined if + size is 0. + + The rallocx function resizes the + allocation at ptr to be at least + size bytes, and returns a pointer to the base + address of the resulting allocation, which may or may not have moved from + its original location. Behavior is undefined if + size is 0. + + The xallocx function resizes the + allocation at ptr in place to be at least + size bytes, and returns the real size of the + allocation. If extra is non-zero, an attempt is + made to resize the allocation to be at least (size + + extra) bytes, though inability to allocate + the extra byte(s) will not by itself result in failure to resize. + Behavior is undefined if size is + 0, or if (size + extra + > SIZE_T_MAX). + + The sallocx function returns the + real size of the allocation at ptr. + + The dallocx function causes the + memory referenced by ptr to be made available for + future allocations. + + The nallocx function allocates no + memory, but it performs the same size computation as the + mallocx function, and returns the real + size of the allocation that would result from the equivalent + mallocx function call. Behavior is + undefined if size is + 0. The mallctl function provides a general interface for introspecting the memory allocator, as well as @@ -314,6 +411,41 @@ for (i = 0; i < nbins; i++) { mallctlbymib(mib, miblen, &bin_size, &len, NULL, 0); /* Do something with bin_size... */ }]]> + + The malloc_stats_print function + writes human-readable summary statistics via the + write_cb callback function pointer and + cbopaque data passed to + write_cb, or + malloc_message if + write_cb is NULL. This + function can be called repeatedly. General information that never + changes during execution can be omitted by specifying "g" as a character + within the opts string. Note that + malloc_message uses the + mallctl* functions internally, so + inconsistent statistics can be reported if multiple threads use these + functions simultaneously. If is + specified during configuration, “m” and “a” can + be specified to omit merged arena and per arena statistics, respectively; + “b” and “l” can be specified to omit per size + class statistics for bins and large objects, respectively. Unrecognized + characters are silently ignored. Note that thread caching may prevent + some statistics from being completely up to date, since extra locking + would be required to merge counters that track thread cache operations. + + + The malloc_usable_size function + returns the usable size of the allocation pointed to by + ptr. The return value may be larger than the size + that was requested during allocation. The + malloc_usable_size function is not a + mechanism for in-place realloc; rather + it is provided solely as a tool for introspection purposes. Any + discrepancy between the requested allocation size and the size reported + by malloc_usable_size should not be + depended on, since such behavior is entirely implementation-dependent. + Experimental API @@ -398,7 +530,7 @@ for (i = 0; i < nbins; i++) { rsize is not NULL. If extra is non-zero, an attempt is made to resize the allocation to be at least size + + language="C">(size + extra) bytes, though inability to allocate the extra byte(s) will not by itself result in failure. Behavior is undefined if size is 0, or if @@ -936,7 +1068,8 @@ for (i = 0; i < nbins; i++) { Zero filling enabled/disabled. If enabled, each byte of uninitialized allocated memory will be initialized to 0. Note that this initialization only happens once for each byte, so - realloc and + realloc, + rallocx and rallocm calls do not zero memory that was previously allocated. This is intended for debugging and will impact performance negatively. This option is disabled by default. @@ -2039,9 +2172,26 @@ malloc_conf = "xmalloc:true";]]> Non-standard API - The malloc_usable_size function - returns the usable size of the allocation pointed to by - ptr. + The mallocx and + rallocx functions return a pointer to + the allocated memory if successful; otherwise a NULL + pointer is returned to indicate insufficient contiguous memory was + available to service the allocation request. + + The xallocx function returns the + real size of the resulting resized allocation pointed to by + ptr, which is a value less than + size if the allocation could not be adequately + grown in place. + + The sallocx function returns the + real size of the allocation pointed to by ptr. + + + The nallocx returns the real size + that would result from a successful equivalent + mallocx function call, or zero if + insufficient memory is available to perform the size computation. The mallctl, mallctlnametomib, and @@ -2092,6 +2242,10 @@ malloc_conf = "xmalloc:true";]]> + + The malloc_usable_size function + returns the usable size of the allocation pointed to by + ptr. Experimental API diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 3dd9761d..f380bbfb 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -228,6 +228,7 @@ static const bool config_ivsalloc = #include "jemalloc/internal/jemalloc_internal_macros.h" +#define MALLOCX_LG_ALIGN_MASK ((int)0x3f) #define ALLOCM_LG_ALIGN_MASK ((int)0x3f) /* Smallest size class to support. */ @@ -731,22 +732,22 @@ choose_arena(arena_t *arena) #include "jemalloc/internal/quarantine.h" #ifndef JEMALLOC_ENABLE_INLINE -void *imallocx(size_t size, bool try_tcache, arena_t *arena); +void *imalloct(size_t size, bool try_tcache, arena_t *arena); void *imalloc(size_t size); -void *icallocx(size_t size, bool try_tcache, arena_t *arena); +void *icalloct(size_t size, bool try_tcache, arena_t *arena); void *icalloc(size_t size); -void *ipallocx(size_t usize, size_t alignment, bool zero, bool try_tcache, +void *ipalloct(size_t usize, size_t alignment, bool zero, bool try_tcache, arena_t *arena); void *ipalloc(size_t usize, size_t alignment, bool zero); size_t isalloc(const void *ptr, bool demote); size_t ivsalloc(const void *ptr, bool demote); size_t u2rz(size_t usize); size_t p2rz(const void *ptr); -void idallocx(void *ptr, bool try_tcache); +void idalloct(void *ptr, bool try_tcache); void idalloc(void *ptr); -void iqallocx(void *ptr, bool try_tcache); +void iqalloct(void *ptr, bool try_tcache); void iqalloc(void *ptr); -void *irallocx(void *ptr, size_t size, size_t extra, size_t alignment, +void *iralloct(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, bool no_move, bool try_tcache_alloc, bool try_tcache_dalloc, arena_t *arena); void *iralloc(void *ptr, size_t size, size_t extra, size_t alignment, @@ -756,7 +757,7 @@ malloc_tsd_protos(JEMALLOC_ATTR(unused), thread_allocated, thread_allocated_t) #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) JEMALLOC_ALWAYS_INLINE void * -imallocx(size_t size, bool try_tcache, arena_t *arena) +imalloct(size_t size, bool try_tcache, arena_t *arena) { assert(size != 0); @@ -771,11 +772,11 @@ JEMALLOC_ALWAYS_INLINE void * imalloc(size_t size) { - return (imallocx(size, true, NULL)); + return (imalloct(size, true, NULL)); } JEMALLOC_ALWAYS_INLINE void * -icallocx(size_t size, bool try_tcache, arena_t *arena) +icalloct(size_t size, bool try_tcache, arena_t *arena) { if (size <= arena_maxclass) @@ -788,11 +789,11 @@ JEMALLOC_ALWAYS_INLINE void * icalloc(size_t size) { - return (icallocx(size, true, NULL)); + return (icalloct(size, true, NULL)); } JEMALLOC_ALWAYS_INLINE void * -ipallocx(size_t usize, size_t alignment, bool zero, bool try_tcache, +ipalloct(size_t usize, size_t alignment, bool zero, bool try_tcache, arena_t *arena) { void *ret; @@ -820,7 +821,7 @@ JEMALLOC_ALWAYS_INLINE void * ipalloc(size_t usize, size_t alignment, bool zero) { - return (ipallocx(usize, alignment, zero, true, NULL)); + return (ipalloct(usize, alignment, zero, true, NULL)); } /* @@ -881,7 +882,7 @@ p2rz(const void *ptr) } JEMALLOC_ALWAYS_INLINE void -idallocx(void *ptr, bool try_tcache) +idalloct(void *ptr, bool try_tcache) { arena_chunk_t *chunk; @@ -898,28 +899,28 @@ JEMALLOC_ALWAYS_INLINE void idalloc(void *ptr) { - idallocx(ptr, true); + idalloct(ptr, true); } JEMALLOC_ALWAYS_INLINE void -iqallocx(void *ptr, bool try_tcache) +iqalloct(void *ptr, bool try_tcache) { if (config_fill && opt_quarantine) quarantine(ptr); else - idallocx(ptr, try_tcache); + idalloct(ptr, try_tcache); } JEMALLOC_ALWAYS_INLINE void iqalloc(void *ptr) { - iqallocx(ptr, true); + iqalloct(ptr, true); } JEMALLOC_ALWAYS_INLINE void * -irallocx(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, +iralloct(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, bool no_move, bool try_tcache_alloc, bool try_tcache_dalloc, arena_t *arena) { void *ret; @@ -943,7 +944,7 @@ irallocx(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, usize = sa2u(size + extra, alignment); if (usize == 0) return (NULL); - ret = ipallocx(usize, alignment, zero, try_tcache_alloc, arena); + ret = ipalloct(usize, alignment, zero, try_tcache_alloc, arena); if (ret == NULL) { if (extra == 0) return (NULL); @@ -951,7 +952,7 @@ irallocx(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, usize = sa2u(size, alignment); if (usize == 0) return (NULL); - ret = ipallocx(usize, alignment, zero, try_tcache_alloc, + ret = ipalloct(usize, alignment, zero, try_tcache_alloc, arena); if (ret == NULL) return (NULL); @@ -963,7 +964,7 @@ irallocx(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, */ copysize = (size < oldsize) ? size : oldsize; memcpy(ret, ptr, copysize); - iqallocx(ptr, try_tcache_dalloc); + iqalloct(ptr, try_tcache_dalloc); return (ret); } @@ -992,7 +993,7 @@ iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, bool no_move) { - return (irallocx(ptr, size, extra, alignment, zero, no_move, true, true, + return (iralloct(ptr, size, extra, alignment, zero, no_move, true, true, NULL)); } diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index 9fbc625f..541e1b2c 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -207,17 +207,17 @@ huge_ralloc_no_move huge_salloc iallocm icalloc -icallocx +icalloct idalloc -idallocx +idalloct imalloc -imallocx +imalloct ipalloc -ipallocx +ipalloct iqalloc -iqallocx +iqalloct iralloc -irallocx +iralloct isalloc isthreaded ivsalloc diff --git a/include/jemalloc/internal/public_symbols.txt b/include/jemalloc/internal/public_symbols.txt index 7d097422..e27c0e5b 100644 --- a/include/jemalloc/internal/public_symbols.txt +++ b/include/jemalloc/internal/public_symbols.txt @@ -6,11 +6,17 @@ posix_memalign aligned_alloc realloc free -malloc_usable_size -malloc_stats_print +mallocx +rallocx +xallocx +sallocx +dallocx +nallocx mallctl mallctlnametomib mallctlbymib +malloc_stats_print +malloc_usable_size memalign valloc allocm diff --git a/include/jemalloc/jemalloc_macros.h.in b/include/jemalloc/jemalloc_macros.h.in index d1455319..9773bcbc 100644 --- a/include/jemalloc/jemalloc_macros.h.in +++ b/include/jemalloc/jemalloc_macros.h.in @@ -8,6 +8,17 @@ #define JEMALLOC_VERSION_NREV @jemalloc_version_nrev@ #define JEMALLOC_VERSION_GID "@jemalloc_version_gid@" +# define MALLOCX_LG_ALIGN(la) (la) +# if LG_SIZEOF_PTR == 2 +# define MALLOCX_ALIGN(a) (ffs(a)-1) +# else +# define MALLOCX_ALIGN(a) \ + ((a < (size_t)INT_MAX) ? ffs(a)-1 : ffs(a>>32)+31) +# endif +# define MALLOCX_ZERO ((int)0x40) +/* Bias arena index bits so that 0 encodes "MALLOCX_ARENA() unspecified". */ +# define MALLOCX_ARENA(a) ((int)(((a)+1) << 8)) + #ifdef JEMALLOC_EXPERIMENTAL # define ALLOCM_LG_ALIGN(la) (la) # if LG_SIZEOF_PTR == 2 @@ -39,11 +50,17 @@ # undef je_aligned_alloc # undef je_realloc # undef je_free -# undef je_malloc_usable_size -# undef je_malloc_stats_print +# undef je_mallocx +# undef je_rallocx +# undef je_xallocx +# undef je_sallocx +# undef je_dallocx +# undef je_nallocx # undef je_mallctl # undef je_mallctlnametomib # undef je_mallctlbymib +# undef je_malloc_stats_print +# undef je_malloc_usable_size # undef je_memalign # undef je_valloc # undef je_allocm diff --git a/include/jemalloc/jemalloc_mangle.h.in b/include/jemalloc/jemalloc_mangle.h.in index 215de9d0..7018a752 100644 --- a/include/jemalloc/jemalloc_mangle.h.in +++ b/include/jemalloc/jemalloc_mangle.h.in @@ -17,11 +17,17 @@ # define aligned_alloc je_aligned_alloc # define realloc je_realloc # define free je_free -# define malloc_usable_size je_malloc_usable_size -# define malloc_stats_print je_malloc_stats_print +# define mallocx je_mallocx +# define rallocx je_rallocx +# define xallocx je_xallocx +# define sallocx je_sallocx +# define dallocx je_dallocx +# define nallocx je_nallocx # define mallctl je_mallctl # define mallctlnametomib je_mallctlnametomib # define mallctlbymib je_mallctlbymib +# define malloc_stats_print je_malloc_stats_print +# define malloc_usable_size je_malloc_usable_size # define memalign je_memalign # define valloc je_valloc # ifdef JEMALLOC_EXPERIMENTAL @@ -56,6 +62,12 @@ # undef je_mallctlbymib # undef je_memalign # undef je_valloc +# undef je_mallocx +# undef je_rallocx +# undef je_xallocx +# undef je_sallocx +# undef je_dallocx +# undef je_nallocx # ifdef JEMALLOC_EXPERIMENTAL # undef je_allocm # undef je_rallocm diff --git a/include/jemalloc/jemalloc_protos.h.in b/include/jemalloc/jemalloc_protos.h.in index 3dad8596..25446de3 100644 --- a/include/jemalloc/jemalloc_protos.h.in +++ b/include/jemalloc/jemalloc_protos.h.in @@ -17,6 +17,25 @@ JEMALLOC_EXPORT void *@je_@aligned_alloc(size_t alignment, size_t size) JEMALLOC_EXPORT void *@je_@realloc(void *ptr, size_t size); JEMALLOC_EXPORT void @je_@free(void *ptr); +JEMALLOC_EXPORT void *@je_@mallocx(size_t size, int flags); +JEMALLOC_EXPORT void *@je_@rallocx(void *ptr, size_t size, int flags); +JEMALLOC_EXPORT size_t @je_@xallocx(void *ptr, size_t size, size_t extra, + int flags); +JEMALLOC_EXPORT size_t @je_@sallocx(const void *ptr, int flags); +JEMALLOC_EXPORT void @je_@dallocx(void *ptr, int flags); +JEMALLOC_EXPORT size_t @je_@nallocx(size_t size, int flags); + +JEMALLOC_EXPORT int @je_@mallctl(const char *name, void *oldp, + size_t *oldlenp, void *newp, size_t newlen); +JEMALLOC_EXPORT int @je_@mallctlnametomib(const char *name, size_t *mibp, + size_t *miblenp); +JEMALLOC_EXPORT int @je_@mallctlbymib(const size_t *mib, size_t miblen, + void *oldp, size_t *oldlenp, void *newp, size_t newlen); +JEMALLOC_EXPORT void @je_@malloc_stats_print(void (*write_cb)(void *, + const char *), void *@je_@cbopaque, const char *opts); +JEMALLOC_EXPORT size_t @je_@malloc_usable_size( + JEMALLOC_USABLE_SIZE_CONST void *ptr); + #ifdef JEMALLOC_OVERRIDE_MEMALIGN JEMALLOC_EXPORT void * @je_@memalign(size_t alignment, size_t size) JEMALLOC_ATTR(malloc); @@ -26,17 +45,6 @@ JEMALLOC_EXPORT void * @je_@memalign(size_t alignment, size_t size) JEMALLOC_EXPORT void * @je_@valloc(size_t size) JEMALLOC_ATTR(malloc); #endif -JEMALLOC_EXPORT size_t @je_@malloc_usable_size( - JEMALLOC_USABLE_SIZE_CONST void *ptr); -JEMALLOC_EXPORT void @je_@malloc_stats_print(void (*write_cb)(void *, - const char *), void *@je_@cbopaque, const char *opts); -JEMALLOC_EXPORT int @je_@mallctl(const char *name, void *oldp, - size_t *oldlenp, void *newp, size_t newlen); -JEMALLOC_EXPORT int @je_@mallctlnametomib(const char *name, size_t *mibp, - size_t *miblenp); -JEMALLOC_EXPORT int @je_@mallctlbymib(const size_t *mib, size_t miblen, - void *oldp, size_t *oldlenp, void *newp, size_t newlen); - #ifdef JEMALLOC_EXPERIMENTAL JEMALLOC_EXPORT int @je_@allocm(void **ptr, size_t *rsize, size_t size, int flags) JEMALLOC_ATTR(nonnull(1)); diff --git a/src/arena.c b/src/arena.c index 145de863..4a460130 100644 --- a/src/arena.c +++ b/src/arena.c @@ -2031,7 +2031,7 @@ arena_ralloc(arena_t *arena, void *ptr, size_t oldsize, size_t size, size_t usize = sa2u(size + extra, alignment); if (usize == 0) return (NULL); - ret = ipallocx(usize, alignment, zero, try_tcache_alloc, arena); + ret = ipalloct(usize, alignment, zero, try_tcache_alloc, arena); } else ret = arena_malloc(arena, size + extra, zero, try_tcache_alloc); @@ -2043,7 +2043,7 @@ arena_ralloc(arena_t *arena, void *ptr, size_t oldsize, size_t size, size_t usize = sa2u(size, alignment); if (usize == 0) return (NULL); - ret = ipallocx(usize, alignment, zero, try_tcache_alloc, + ret = ipalloct(usize, alignment, zero, try_tcache_alloc, arena); } else ret = arena_malloc(arena, size, zero, try_tcache_alloc); @@ -2061,7 +2061,7 @@ arena_ralloc(arena_t *arena, void *ptr, size_t oldsize, size_t size, copysize = (size < oldsize) ? size : oldsize; VALGRIND_MAKE_MEM_UNDEFINED(ret, copysize); memcpy(ret, ptr, copysize); - iqallocx(ptr, try_tcache_dalloc); + iqalloct(ptr, try_tcache_dalloc); return (ret); } diff --git a/src/huge.c b/src/huge.c index 443b4007..33fab684 100644 --- a/src/huge.c +++ b/src/huge.c @@ -181,7 +181,7 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, #endif { memcpy(ret, ptr, copysize); - iqallocx(ptr, try_tcache_dalloc); + iqalloct(ptr, try_tcache_dalloc); } return (ret); } diff --git a/src/jemalloc.c b/src/jemalloc.c index f13a7d8c..f8c8119d 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1337,28 +1337,363 @@ JEMALLOC_EXPORT void *(* __memalign_hook)(size_t alignment, size_t size) = * Begin non-standard functions. */ -size_t -je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) +JEMALLOC_ALWAYS_INLINE_C void * +imallocx(size_t usize, size_t alignment, bool zero, bool try_tcache, + arena_t *arena) { - size_t ret; + + assert(usize == ((alignment == 0) ? s2u(usize) : sa2u(usize, + alignment))); + + if (alignment != 0) + return (ipalloct(usize, alignment, zero, try_tcache, arena)); + else if (zero) + return (icalloct(usize, try_tcache, arena)); + else + return (imalloct(usize, try_tcache, arena)); +} + +void * +je_mallocx(size_t size, int flags) +{ + void *p; + size_t usize; + size_t alignment = (ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK) + & (SIZE_T_MAX-1)); + bool zero = flags & MALLOCX_ZERO; + unsigned arena_ind = ((unsigned)(flags >> 8)) - 1; + arena_t *arena; + bool try_tcache; + + assert(size != 0); + + if (malloc_init()) + goto label_oom; + + if (arena_ind != UINT_MAX) { + arena = arenas[arena_ind]; + try_tcache = false; + } else { + arena = NULL; + try_tcache = true; + } + + usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment); + if (usize == 0) + goto label_oom; + + if (config_prof && opt_prof) { + prof_thr_cnt_t *cnt; + + PROF_ALLOC_PREP(1, usize, cnt); + if (cnt == NULL) + goto label_oom; + if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <= + SMALL_MAXCLASS) { + size_t usize_promoted = (alignment == 0) ? + s2u(SMALL_MAXCLASS+1) : sa2u(SMALL_MAXCLASS+1, + alignment); + assert(usize_promoted != 0); + p = imallocx(usize_promoted, alignment, zero, + try_tcache, arena); + if (p == NULL) + goto label_oom; + arena_prof_promoted(p, usize); + } else { + p = imallocx(usize, alignment, zero, try_tcache, arena); + if (p == NULL) + goto label_oom; + } + prof_malloc(p, usize, cnt); + } else { + p = imallocx(usize, alignment, zero, try_tcache, arena); + if (p == NULL) + goto label_oom; + } + + if (config_stats) { + assert(usize == isalloc(p, config_prof)); + thread_allocated_tsd_get()->allocated += usize; + } + UTRACE(0, size, p); + JEMALLOC_VALGRIND_MALLOC(true, p, usize, zero); + return (p); +label_oom: + if (config_xmalloc && opt_xmalloc) { + malloc_write(": Error in mallocx(): out of memory\n"); + abort(); + } + UTRACE(0, size, 0); + return (NULL); +} + +void * +je_rallocx(void *ptr, size_t size, int flags) +{ + void *p; + size_t usize; + size_t old_size; + UNUSED size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0); + size_t alignment = (ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK) + & (SIZE_T_MAX-1)); + bool zero = flags & MALLOCX_ZERO; + unsigned arena_ind = ((unsigned)(flags >> 8)) - 1; + bool try_tcache_alloc, try_tcache_dalloc; + arena_t *arena; + + assert(ptr != NULL); + assert(size != 0); + assert(malloc_initialized || IS_INITIALIZER); + malloc_thread_init(); + + if (arena_ind != UINT_MAX) { + arena_chunk_t *chunk; + try_tcache_alloc = false; + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + try_tcache_dalloc = (chunk == ptr || chunk->arena != + arenas[arena_ind]); + arena = arenas[arena_ind]; + } else { + try_tcache_alloc = true; + try_tcache_dalloc = true; + arena = NULL; + } + + if (config_prof && opt_prof) { + prof_thr_cnt_t *cnt; + + usize = (alignment == 0) ? s2u(size) : sa2u(size, + alignment); + prof_ctx_t *old_ctx = prof_ctx_get(ptr); + old_size = isalloc(ptr, true); + if (config_valgrind && opt_valgrind) + old_rzsize = p2rz(ptr); + PROF_ALLOC_PREP(1, usize, cnt); + if (cnt == NULL) + goto label_oom; + if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <= + SMALL_MAXCLASS) { + p = iralloct(ptr, SMALL_MAXCLASS+1, (SMALL_MAXCLASS+1 >= + size) ? 0 : size - (SMALL_MAXCLASS+1), alignment, + zero, false, try_tcache_alloc, try_tcache_dalloc, + arena); + if (p == NULL) + goto label_oom; + if (usize < PAGE) + arena_prof_promoted(p, usize); + } else { + p = iralloct(ptr, size, 0, alignment, zero, false, + try_tcache_alloc, try_tcache_dalloc, arena); + if (p == NULL) + goto label_oom; + } + prof_realloc(p, usize, cnt, old_size, old_ctx); + } else { + if (config_stats) { + old_size = isalloc(ptr, false); + if (config_valgrind && opt_valgrind) + old_rzsize = u2rz(old_size); + } else if (config_valgrind && opt_valgrind) { + old_size = isalloc(ptr, false); + old_rzsize = u2rz(old_size); + } + p = iralloct(ptr, size, 0, alignment, zero, false, + try_tcache_alloc, try_tcache_dalloc, arena); + if (p == NULL) + goto label_oom; + if (config_stats || (config_valgrind && opt_valgrind)) + usize = isalloc(p, config_prof); + } + + if (config_stats) { + thread_allocated_t *ta; + ta = thread_allocated_tsd_get(); + ta->allocated += usize; + ta->deallocated += old_size; + } + UTRACE(ptr, size, p); + JEMALLOC_VALGRIND_REALLOC(p, usize, ptr, old_size, old_rzsize, zero); + return (p); +label_oom: + if (config_xmalloc && opt_xmalloc) { + malloc_write(": Error in rallocx(): out of memory\n"); + abort(); + } + UTRACE(ptr, size, 0); + return (NULL); +} + +size_t +je_xallocx(void *ptr, size_t size, size_t extra, int flags) +{ + size_t usize; + size_t old_size; + UNUSED size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0); + size_t alignment = (ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK) + & (SIZE_T_MAX-1)); + bool zero = flags & MALLOCX_ZERO; + unsigned arena_ind = ((unsigned)(flags >> 8)) - 1; + bool try_tcache_alloc, try_tcache_dalloc; + arena_t *arena; + + assert(ptr != NULL); + assert(size != 0); + assert(SIZE_T_MAX - size >= extra); + assert(malloc_initialized || IS_INITIALIZER); + malloc_thread_init(); + + if (arena_ind != UINT_MAX) { + arena_chunk_t *chunk; + try_tcache_alloc = false; + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + try_tcache_dalloc = (chunk == ptr || chunk->arena != + arenas[arena_ind]); + arena = arenas[arena_ind]; + } else { + try_tcache_alloc = true; + try_tcache_dalloc = true; + arena = NULL; + } + + if (config_prof && opt_prof) { + prof_thr_cnt_t *cnt; + + /* + * usize isn't knowable before iralloc() returns when extra is + * non-zero. Therefore, compute its maximum possible value and + * use that in PROF_ALLOC_PREP() to decide whether to capture a + * backtrace. prof_realloc() will use the actual usize to + * decide whether to sample. + */ + size_t max_usize = (alignment == 0) ? s2u(size+extra) : + sa2u(size+extra, alignment); + prof_ctx_t *old_ctx = prof_ctx_get(ptr); + old_size = isalloc(ptr, true); + if (config_valgrind && opt_valgrind) + old_rzsize = p2rz(ptr); + PROF_ALLOC_PREP(1, max_usize, cnt); + if (cnt == NULL) { + usize = isalloc(ptr, config_prof); + goto label_not_moved; + } + /* + * Use minimum usize to determine whether promotion may happen. + */ + if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U + && ((alignment == 0) ? s2u(size) : sa2u(size, alignment)) + <= SMALL_MAXCLASS) { + if (iralloct(ptr, SMALL_MAXCLASS+1, (SMALL_MAXCLASS+1 >= + size+extra) ? 0 : size+extra - (SMALL_MAXCLASS+1), + alignment, zero, true, try_tcache_alloc, + try_tcache_dalloc, arena) == NULL) + goto label_not_moved; + if (max_usize < PAGE) { + usize = max_usize; + arena_prof_promoted(ptr, usize); + } else + usize = isalloc(ptr, config_prof); + } else { + if (iralloct(ptr, size, extra, alignment, zero, true, + try_tcache_alloc, try_tcache_dalloc, arena) == NULL) + goto label_not_moved; + usize = isalloc(ptr, config_prof); + } + prof_realloc(ptr, usize, cnt, old_size, old_ctx); + } else { + if (config_stats) { + old_size = isalloc(ptr, false); + if (config_valgrind && opt_valgrind) + old_rzsize = u2rz(old_size); + } else if (config_valgrind && opt_valgrind) { + old_size = isalloc(ptr, false); + old_rzsize = u2rz(old_size); + } + if (iralloct(ptr, size, extra, alignment, zero, true, + try_tcache_alloc, try_tcache_dalloc, arena) == NULL) { + usize = isalloc(ptr, config_prof); + goto label_not_moved; + } + usize = isalloc(ptr, config_prof); + } + + if (config_stats) { + thread_allocated_t *ta; + ta = thread_allocated_tsd_get(); + ta->allocated += usize; + ta->deallocated += old_size; + } + JEMALLOC_VALGRIND_REALLOC(ptr, usize, ptr, old_size, old_rzsize, zero); +label_not_moved: + UTRACE(ptr, size, ptr); + return (usize); +} + +size_t +je_sallocx(const void *ptr, int flags) +{ + size_t usize; assert(malloc_initialized || IS_INITIALIZER); malloc_thread_init(); if (config_ivsalloc) - ret = ivsalloc(ptr, config_prof); - else - ret = (ptr != NULL) ? isalloc(ptr, config_prof) : 0; + usize = ivsalloc(ptr, config_prof); + else { + assert(ptr != NULL); + usize = isalloc(ptr, config_prof); + } - return (ret); + return (usize); } void -je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque, - const char *opts) +je_dallocx(void *ptr, int flags) { + size_t usize; + UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0); + unsigned arena_ind = ((unsigned)(flags >> 8)) - 1; + bool try_tcache; - stats_print(write_cb, cbopaque, opts); + assert(ptr != NULL); + assert(malloc_initialized || IS_INITIALIZER); + + if (arena_ind != UINT_MAX) { + arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + try_tcache = (chunk == ptr || chunk->arena != + arenas[arena_ind]); + } else + try_tcache = true; + + UTRACE(ptr, 0, 0); + if (config_stats || config_valgrind) + usize = isalloc(ptr, config_prof); + if (config_prof && opt_prof) { + if (config_stats == false && config_valgrind == false) + usize = isalloc(ptr, config_prof); + prof_free(ptr, usize); + } + if (config_stats) + thread_allocated_tsd_get()->deallocated += usize; + if (config_valgrind && opt_valgrind) + rzsize = p2rz(ptr); + iqalloct(ptr, try_tcache); + JEMALLOC_VALGRIND_FREE(ptr, rzsize); +} + +size_t +je_nallocx(size_t size, int flags) +{ + size_t usize; + size_t alignment = (ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK) + & (SIZE_T_MAX-1)); + + assert(size != 0); + + if (malloc_init()) + return (0); + + usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment); + return (usize); } int @@ -1393,6 +1728,30 @@ je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, return (ctl_bymib(mib, miblen, oldp, oldlenp, newp, newlen)); } +void +je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque, + const char *opts) +{ + + stats_print(write_cb, cbopaque, opts); +} + +size_t +je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) +{ + size_t ret; + + assert(malloc_initialized || IS_INITIALIZER); + malloc_thread_init(); + + if (config_ivsalloc) + ret = ivsalloc(ptr, config_prof); + else + ret = (ptr != NULL) ? isalloc(ptr, config_prof) : 0; + + return (ret); +} + /* * End non-standard functions. */ @@ -1402,284 +1761,65 @@ je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, */ #ifdef JEMALLOC_EXPERIMENTAL -JEMALLOC_ALWAYS_INLINE_C void * -iallocm(size_t usize, size_t alignment, bool zero, bool try_tcache, - arena_t *arena) -{ - - assert(usize == ((alignment == 0) ? s2u(usize) : sa2u(usize, - alignment))); - - if (alignment != 0) - return (ipallocx(usize, alignment, zero, try_tcache, arena)); - else if (zero) - return (icallocx(usize, try_tcache, arena)); - else - return (imallocx(usize, try_tcache, arena)); -} - int je_allocm(void **ptr, size_t *rsize, size_t size, int flags) { void *p; - size_t usize; - size_t alignment = (ZU(1) << (flags & ALLOCM_LG_ALIGN_MASK) - & (SIZE_T_MAX-1)); - bool zero = flags & ALLOCM_ZERO; - unsigned arena_ind = ((unsigned)(flags >> 8)) - 1; - arena_t *arena; - bool try_tcache; assert(ptr != NULL); - assert(size != 0); - if (malloc_init()) - goto label_oom; - - if (arena_ind != UINT_MAX) { - arena = arenas[arena_ind]; - try_tcache = false; - } else { - arena = NULL; - try_tcache = true; - } - - usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment); - if (usize == 0) - goto label_oom; - - if (config_prof && opt_prof) { - prof_thr_cnt_t *cnt; - - PROF_ALLOC_PREP(1, usize, cnt); - if (cnt == NULL) - goto label_oom; - if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <= - SMALL_MAXCLASS) { - size_t usize_promoted = (alignment == 0) ? - s2u(SMALL_MAXCLASS+1) : sa2u(SMALL_MAXCLASS+1, - alignment); - assert(usize_promoted != 0); - p = iallocm(usize_promoted, alignment, zero, - try_tcache, arena); - if (p == NULL) - goto label_oom; - arena_prof_promoted(p, usize); - } else { - p = iallocm(usize, alignment, zero, try_tcache, arena); - if (p == NULL) - goto label_oom; - } - prof_malloc(p, usize, cnt); - } else { - p = iallocm(usize, alignment, zero, try_tcache, arena); - if (p == NULL) - goto label_oom; - } + p = je_mallocx(size, flags); + if (p == NULL) + return (ALLOCM_ERR_OOM); if (rsize != NULL) - *rsize = usize; - + *rsize = isalloc(p, config_prof); *ptr = p; - if (config_stats) { - assert(usize == isalloc(p, config_prof)); - thread_allocated_tsd_get()->allocated += usize; - } - UTRACE(0, size, p); - JEMALLOC_VALGRIND_MALLOC(true, p, usize, zero); return (ALLOCM_SUCCESS); -label_oom: - if (config_xmalloc && opt_xmalloc) { - malloc_write(": Error in allocm(): " - "out of memory\n"); - abort(); - } - *ptr = NULL; - UTRACE(0, size, 0); - return (ALLOCM_ERR_OOM); } int je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) { - void *p, *q; - size_t usize; - size_t old_size; - UNUSED size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0); - size_t alignment = (ZU(1) << (flags & ALLOCM_LG_ALIGN_MASK) - & (SIZE_T_MAX-1)); - bool zero = flags & ALLOCM_ZERO; + int ret; bool no_move = flags & ALLOCM_NO_MOVE; - unsigned arena_ind = ((unsigned)(flags >> 8)) - 1; - bool try_tcache_alloc, try_tcache_dalloc; - arena_t *arena; assert(ptr != NULL); assert(*ptr != NULL); assert(size != 0); assert(SIZE_T_MAX - size >= extra); - assert(malloc_initialized || IS_INITIALIZER); - malloc_thread_init(); - if (arena_ind != UINT_MAX) { - arena_chunk_t *chunk; - try_tcache_alloc = false; - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(*ptr); - try_tcache_dalloc = (chunk == *ptr || chunk->arena != - arenas[arena_ind]); - arena = arenas[arena_ind]; - } else { - try_tcache_alloc = true; - try_tcache_dalloc = true; - arena = NULL; - } - - p = *ptr; - if (config_prof && opt_prof) { - prof_thr_cnt_t *cnt; - - /* - * usize isn't knowable before iralloc() returns when extra is - * non-zero. Therefore, compute its maximum possible value and - * use that in PROF_ALLOC_PREP() to decide whether to capture a - * backtrace. prof_realloc() will use the actual usize to - * decide whether to sample. - */ - size_t max_usize = (alignment == 0) ? s2u(size+extra) : - sa2u(size+extra, alignment); - prof_ctx_t *old_ctx = prof_ctx_get(p); - old_size = isalloc(p, true); - if (config_valgrind && opt_valgrind) - old_rzsize = p2rz(p); - PROF_ALLOC_PREP(1, max_usize, cnt); - if (cnt == NULL) - goto label_oom; - /* - * Use minimum usize to determine whether promotion may happen. - */ - if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U - && ((alignment == 0) ? s2u(size) : sa2u(size, alignment)) - <= SMALL_MAXCLASS) { - q = irallocx(p, SMALL_MAXCLASS+1, (SMALL_MAXCLASS+1 >= - size+extra) ? 0 : size+extra - (SMALL_MAXCLASS+1), - alignment, zero, no_move, try_tcache_alloc, - try_tcache_dalloc, arena); - if (q == NULL) - goto label_err; - if (max_usize < PAGE) { - usize = max_usize; - arena_prof_promoted(q, usize); - } else - usize = isalloc(q, config_prof); - } else { - q = irallocx(p, size, extra, alignment, zero, no_move, - try_tcache_alloc, try_tcache_dalloc, arena); - if (q == NULL) - goto label_err; - usize = isalloc(q, config_prof); - } - prof_realloc(q, usize, cnt, old_size, old_ctx); + if (no_move) { + size_t usize = je_xallocx(*ptr, size, extra, flags); + ret = (usize >= size) ? ALLOCM_SUCCESS : ALLOCM_ERR_NOT_MOVED; if (rsize != NULL) *rsize = usize; } else { - if (config_stats) { - old_size = isalloc(p, false); - if (config_valgrind && opt_valgrind) - old_rzsize = u2rz(old_size); - } else if (config_valgrind && opt_valgrind) { - old_size = isalloc(p, false); - old_rzsize = u2rz(old_size); - } - q = irallocx(p, size, extra, alignment, zero, no_move, - try_tcache_alloc, try_tcache_dalloc, arena); - if (q == NULL) - goto label_err; - if (config_stats) - usize = isalloc(q, config_prof); - if (rsize != NULL) { - if (config_stats == false) - usize = isalloc(q, config_prof); - *rsize = usize; - } + void *p = je_rallocx(*ptr, size+extra, flags); + if (p != NULL) { + *ptr = p; + ret = ALLOCM_SUCCESS; + } else + ret = ALLOCM_ERR_OOM; + if (rsize != NULL) + *rsize = isalloc(*ptr, config_prof); } - - *ptr = q; - if (config_stats) { - thread_allocated_t *ta; - ta = thread_allocated_tsd_get(); - ta->allocated += usize; - ta->deallocated += old_size; - } - UTRACE(p, size, q); - JEMALLOC_VALGRIND_REALLOC(q, usize, p, old_size, old_rzsize, zero); - return (ALLOCM_SUCCESS); -label_err: - if (no_move) { - UTRACE(p, size, q); - return (ALLOCM_ERR_NOT_MOVED); - } -label_oom: - if (config_xmalloc && opt_xmalloc) { - malloc_write(": Error in rallocm(): " - "out of memory\n"); - abort(); - } - UTRACE(p, size, 0); - return (ALLOCM_ERR_OOM); + return (ret); } int je_sallocm(const void *ptr, size_t *rsize, int flags) { - size_t sz; - assert(malloc_initialized || IS_INITIALIZER); - malloc_thread_init(); - - if (config_ivsalloc) - sz = ivsalloc(ptr, config_prof); - else { - assert(ptr != NULL); - sz = isalloc(ptr, config_prof); - } assert(rsize != NULL); - *rsize = sz; - + *rsize = je_sallocx(ptr, flags); return (ALLOCM_SUCCESS); } int je_dallocm(void *ptr, int flags) { - size_t usize; - UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0); - unsigned arena_ind = ((unsigned)(flags >> 8)) - 1; - bool try_tcache; - - assert(ptr != NULL); - assert(malloc_initialized || IS_INITIALIZER); - - if (arena_ind != UINT_MAX) { - arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - try_tcache = (chunk == ptr || chunk->arena != - arenas[arena_ind]); - } else - try_tcache = true; - - UTRACE(ptr, 0, 0); - if (config_stats || config_valgrind) - usize = isalloc(ptr, config_prof); - if (config_prof && opt_prof) { - if (config_stats == false && config_valgrind == false) - usize = isalloc(ptr, config_prof); - prof_free(ptr, usize); - } - if (config_stats) - thread_allocated_tsd_get()->deallocated += usize; - if (config_valgrind && opt_valgrind) - rzsize = p2rz(ptr); - iqallocx(ptr, try_tcache); - JEMALLOC_VALGRIND_FREE(ptr, rzsize); + je_dallocx(ptr, flags); return (ALLOCM_SUCCESS); } @@ -1687,18 +1827,10 @@ int je_nallocm(size_t *rsize, size_t size, int flags) { size_t usize; - size_t alignment = (ZU(1) << (flags & ALLOCM_LG_ALIGN_MASK) - & (SIZE_T_MAX-1)); - assert(size != 0); - - if (malloc_init()) - return (ALLOCM_ERR_OOM); - - usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment); + usize = je_nallocx(size, flags); if (usize == 0) return (ALLOCM_ERR_OOM); - if (rsize != NULL) *rsize = usize; return (ALLOCM_SUCCESS); diff --git a/src/tcache.c b/src/tcache.c index 88ec4810..6de92960 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -292,7 +292,7 @@ tcache_create(arena_t *arena) else if (size <= tcache_maxclass) tcache = (tcache_t *)arena_malloc_large(arena, size, true); else - tcache = (tcache_t *)icallocx(size, false, arena); + tcache = (tcache_t *)icalloct(size, false, arena); if (tcache == NULL) return (NULL); @@ -366,7 +366,7 @@ tcache_destroy(tcache_t *tcache) arena_dalloc_large(arena, chunk, tcache); } else - idallocx(tcache, false); + idalloct(tcache, false); } void diff --git a/src/tsd.c b/src/tsd.c index 8431751f..700caabf 100644 --- a/src/tsd.c +++ b/src/tsd.c @@ -21,7 +21,7 @@ void malloc_tsd_dalloc(void *wrapper) { - idallocx(wrapper, false); + idalloct(wrapper, false); } void diff --git a/test/integration/mallocx.c b/test/integration/mallocx.c new file mode 100644 index 00000000..f12855e7 --- /dev/null +++ b/test/integration/mallocx.c @@ -0,0 +1,149 @@ +#include "test/jemalloc_test.h" + +#define CHUNK 0x400000 +/* #define MAXALIGN ((size_t)UINT64_C(0x80000000000)) */ +#define MAXALIGN ((size_t)0x2000000LU) +#define NITER 4 + +TEST_BEGIN(test_basic) +{ + size_t nsz, rsz, sz; + void *p; + + sz = 42; + nsz = nallocx(sz, 0); + assert_zu_ne(nsz, 0, "Unexpected nallocx() error"); + p = mallocx(sz, 0); + assert_ptr_not_null(p, "Unexpected mallocx() error"); + rsz = sallocx(p, 0); + assert_zu_ge(rsz, sz, "Real size smaller than expected"); + assert_zu_eq(nsz, rsz, "nallocx()/sallocx() size mismatch"); + dallocx(p, 0); + + p = mallocx(sz, 0); + assert_ptr_not_null(p, "Unexpected mallocx() error"); + dallocx(p, 0); + + nsz = nallocx(sz, MALLOCX_ZERO); + assert_zu_ne(nsz, 0, "Unexpected nallocx() error"); + p = mallocx(sz, MALLOCX_ZERO); + assert_ptr_not_null(p, "Unexpected mallocx() error"); + rsz = sallocx(p, 0); + assert_zu_eq(nsz, rsz, "nallocx()/sallocx() rsize mismatch"); + dallocx(p, 0); +} +TEST_END + +TEST_BEGIN(test_alignment_errors) +{ + void *p; + size_t nsz, sz, alignment; + +#if LG_SIZEOF_PTR == 3 + alignment = UINT64_C(0x8000000000000000); + sz = UINT64_C(0x8000000000000000); +#else + alignment = 0x80000000LU; + sz = 0x80000000LU; +#endif + nsz = nallocx(sz, MALLOCX_ALIGN(alignment)); + assert_zu_eq(nsz, 0, "Expected error for nallocx(%zu, %#x)", sz, + MALLOCX_ALIGN(alignment)); + p = mallocx(sz, MALLOCX_ALIGN(alignment)); + assert_ptr_null(p, "Expected error for mallocx(%zu, %#x)", sz, + MALLOCX_ALIGN(alignment)); + +#if LG_SIZEOF_PTR == 3 + alignment = UINT64_C(0x4000000000000000); + sz = UINT64_C(0x8400000000000001); +#else + alignment = 0x40000000LU; + sz = 0x84000001LU; +#endif + nsz = nallocx(sz, MALLOCX_ALIGN(alignment)); + assert_zu_ne(nsz, 0, "Unexpected nallocx() error"); + p = mallocx(sz, MALLOCX_ALIGN(alignment)); + assert_ptr_null(p, "Expected error for mallocx(%zu, %#x)", sz, + MALLOCX_ALIGN(alignment)); + + alignment = 0x10LU; +#if LG_SIZEOF_PTR == 3 + sz = UINT64_C(0xfffffffffffffff0); +#else + sz = 0xfffffff0LU; +#endif + nsz = nallocx(sz, MALLOCX_ALIGN(alignment)); + assert_zu_eq(nsz, 0, "Expected error for nallocx(%zu, %#x)", sz, + MALLOCX_ALIGN(alignment)); + nsz = nallocx(sz, MALLOCX_ALIGN(alignment)); + assert_zu_eq(nsz, 0, "Expected error for nallocx(%zu, %#x)", sz, + MALLOCX_ALIGN(alignment)); + p = mallocx(sz, MALLOCX_ALIGN(alignment)); + assert_ptr_null(p, "Expected error for mallocx(%zu, %#x)", sz, + MALLOCX_ALIGN(alignment)); +} +TEST_END + +TEST_BEGIN(test_alignment_and_size) +{ + size_t nsz, rsz, sz, alignment, total; + unsigned i; + void *ps[NITER]; + + for (i = 0; i < NITER; i++) + ps[i] = NULL; + + for (alignment = 8; + alignment <= MAXALIGN; + alignment <<= 1) { + total = 0; + for (sz = 1; + sz < 3 * alignment && sz < (1U << 31); + sz += (alignment >> (LG_SIZEOF_PTR-1)) - 1) { + for (i = 0; i < NITER; i++) { + nsz = nallocx(sz, MALLOCX_ALIGN(alignment) | + MALLOCX_ZERO); + assert_zu_ne(nsz, 0, + "nallocx() error for alignment=%zu, " + "size=%zu (%#zx)", alignment, sz, sz); + ps[i] = mallocx(sz, MALLOCX_ALIGN(alignment) | + MALLOCX_ZERO); + assert_ptr_not_null(ps[i], + "mallocx() error for alignment=%zu, " + "size=%zu (%#zx)", alignment, sz, sz); + rsz = sallocx(ps[i], 0); + assert_zu_ge(rsz, sz, + "Real size smaller than expected for " + "alignment=%zu, size=%zu", alignment, sz); + assert_zu_eq(nsz, rsz, + "nallocx()/sallocx() size mismatch for " + "alignment=%zu, size=%zu", alignment, sz); + assert_ptr_null( + (void *)((uintptr_t)ps[i] & (alignment-1)), + "%p inadequately aligned for" + " alignment=%zu, size=%zu", ps[i], + alignment, sz); + total += rsz; + if (total >= (MAXALIGN << 1)) + break; + } + for (i = 0; i < NITER; i++) { + if (ps[i] != NULL) { + dallocx(ps[i], 0); + ps[i] = NULL; + } + } + } + } +} +TEST_END + +int +main(void) +{ + + return (test( + test_basic, + test_alignment_errors, + test_alignment_and_size)); +} diff --git a/test/integration/rallocm.c b/test/integration/rallocm.c index c13cd699..33c11bb7 100644 --- a/test/integration/rallocm.c +++ b/test/integration/rallocm.c @@ -1,5 +1,3 @@ -#include - #include "test/jemalloc_test.h" TEST_BEGIN(test_same_size) diff --git a/test/integration/rallocx.c b/test/integration/rallocx.c new file mode 100644 index 00000000..cc9138ba --- /dev/null +++ b/test/integration/rallocx.c @@ -0,0 +1,51 @@ +#include "test/jemalloc_test.h" + +TEST_BEGIN(test_grow_and_shrink) +{ + void *p, *q; + size_t tsz; +#define NCYCLES 3 + unsigned i, j; +#define NSZS 2500 + size_t szs[NSZS]; +#define MAXSZ ZU(12 * 1024 * 1024) + + p = mallocx(1, 0); + assert_ptr_not_null(p, "Unexpected mallocx() error"); + szs[0] = sallocx(p, 0); + + for (i = 0; i < NCYCLES; i++) { + for (j = 1; j < NSZS && szs[j-1] < MAXSZ; j++) { + q = rallocx(p, szs[j-1]+1, 0); + assert_ptr_not_null(q, + "Unexpected rallocx() error for size=%zu-->%zu", + szs[j-1], szs[j-1]+1); + szs[j] = sallocx(q, 0); + assert_zu_ne(szs[j], szs[j-1]+1, + "Expected size to at least: %zu", szs[j-1]+1); + p = q; + } + + for (j--; j > 0; j--) { + q = rallocx(p, szs[j-1], 0); + assert_ptr_not_null(q, + "Unexpected rallocx() error for size=%zu-->%zu", + szs[j], szs[j-1]); + tsz = sallocx(q, 0); + assert_zu_eq(tsz, szs[j-1], + "Expected size=%zu, got size=%zu", szs[j-1], tsz); + p = q; + } + } + + dallocx(p, 0); +} +TEST_END + +int +main(void) +{ + + return (test( + test_grow_and_shrink)); +} diff --git a/test/integration/xallocx.c b/test/integration/xallocx.c new file mode 100644 index 00000000..ab4cf945 --- /dev/null +++ b/test/integration/xallocx.c @@ -0,0 +1,59 @@ +#include "test/jemalloc_test.h" + +TEST_BEGIN(test_same_size) +{ + void *p; + size_t sz, tsz; + + p = mallocx(42, 0); + assert_ptr_not_null(p, "Unexpected mallocx() error"); + sz = sallocx(p, 0); + + tsz = xallocx(p, sz, 0, 0); + assert_zu_eq(tsz, sz, "Unexpected size change: %zu --> %zu", sz, tsz); + + dallocx(p, 0); +} +TEST_END + +TEST_BEGIN(test_extra_no_move) +{ + void *p; + size_t sz, tsz; + + p = mallocx(42, 0); + assert_ptr_not_null(p, "Unexpected mallocx() error"); + sz = sallocx(p, 0); + + tsz = xallocx(p, sz, sz-42, 0); + assert_zu_eq(tsz, sz, "Unexpected size change: %zu --> %zu", sz, tsz); + + dallocx(p, 0); +} +TEST_END + +TEST_BEGIN(test_no_move_fail) +{ + void *p; + size_t sz, tsz; + + p = mallocx(42, 0); + assert_ptr_not_null(p, "Unexpected mallocx() error"); + sz = sallocx(p, 0); + + tsz = xallocx(p, sz + 5, 0, 0); + assert_zu_eq(tsz, sz, "Unexpected size change: %zu --> %zu", sz, tsz); + + dallocx(p, 0); +} +TEST_END + +int +main(void) +{ + + return (test( + test_same_size, + test_extra_no_move, + test_no_move_fail)); +} diff --git a/test/unit/mq.c b/test/unit/mq.c index 01e72fd1..e6cba101 100644 --- a/test/unit/mq.c +++ b/test/unit/mq.c @@ -39,8 +39,7 @@ thd_receiver_start(void *arg) for (i = 0; i < (NSENDERS * NMSGS); i++) { mq_msg_t *msg = mq_get(mq); assert_ptr_not_null(msg, "mq_get() should never return NULL"); - assert_d_eq(jet_dallocm(msg, 0), ALLOCM_SUCCESS, - "Unexpected dallocm() failure"); + jet_dallocx(msg, 0); } return (NULL); } @@ -54,8 +53,8 @@ thd_sender_start(void *arg) for (i = 0; i < NMSGS; i++) { mq_msg_t *msg; void *p; - assert_d_eq(jet_allocm(&p, NULL, sizeof(mq_msg_t), 0), - ALLOCM_SUCCESS, "Unexpected allocm() failure"); + p = jet_mallocx(sizeof(mq_msg_t), 0); + assert_ptr_not_null(p, "Unexpected allocm() failure"); msg = (mq_msg_t *)p; mq_put(mq, msg); } From 34779914400988922d36815b7085893fbcc89a2e Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 13 Dec 2013 15:07:43 -0800 Subject: [PATCH 0349/3378] Fix name mangling issues. Move je_* definitions from jemalloc_macros.h.in to jemalloc_defs.h.in, because only the latter is an autoconf header (#undef substitution occurs). Fix unit tests to use automatic mangling, so that e.g. mallocx is macro-substituted to becom jet_mallocx. --- include/jemalloc/jemalloc_defs.h.in | 34 +++++++++++++++++++++++++++ include/jemalloc/jemalloc_macros.h.in | 34 --------------------------- test/include/test/jemalloc_test.h.in | 1 + test/unit/mq.c | 4 ++-- 4 files changed, 37 insertions(+), 36 deletions(-) diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index eb38d710..71e23af7 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -22,3 +22,37 @@ /* sizeof(void *) == 2^LG_SIZEOF_PTR. */ #undef LG_SIZEOF_PTR + +/* + * Name mangling for public symbols is controlled by --with-mangling and + * --with-jemalloc-prefix. With default settings the je_ prefix is stripped by + * these macro definitions. + */ +#ifndef JEMALLOC_NO_RENAME +# undef je_malloc_conf +# undef je_malloc_message +# undef je_malloc +# undef je_calloc +# undef je_posix_memalign +# undef je_aligned_alloc +# undef je_realloc +# undef je_free +# undef je_mallocx +# undef je_rallocx +# undef je_xallocx +# undef je_sallocx +# undef je_dallocx +# undef je_nallocx +# undef je_mallctl +# undef je_mallctlnametomib +# undef je_mallctlbymib +# undef je_malloc_stats_print +# undef je_malloc_usable_size +# undef je_memalign +# undef je_valloc +# undef je_allocm +# undef je_rallocm +# undef je_sallocm +# undef je_dallocm +# undef je_nallocm +#endif diff --git a/include/jemalloc/jemalloc_macros.h.in b/include/jemalloc/jemalloc_macros.h.in index 9773bcbc..13dbdd91 100644 --- a/include/jemalloc/jemalloc_macros.h.in +++ b/include/jemalloc/jemalloc_macros.h.in @@ -36,40 +36,6 @@ # define ALLOCM_ERR_NOT_MOVED 2 #endif -/* - * Name mangling for public symbols is controlled by --with-mangling and - * --with-jemalloc-prefix. With default settings the je_ prefix is stripped by - * these macro definitions. - */ -#ifndef JEMALLOC_NO_RENAME -# undef je_malloc_conf -# undef je_malloc_message -# undef je_malloc -# undef je_calloc -# undef je_posix_memalign -# undef je_aligned_alloc -# undef je_realloc -# undef je_free -# undef je_mallocx -# undef je_rallocx -# undef je_xallocx -# undef je_sallocx -# undef je_dallocx -# undef je_nallocx -# undef je_mallctl -# undef je_mallctlnametomib -# undef je_mallctlbymib -# undef je_malloc_stats_print -# undef je_malloc_usable_size -# undef je_memalign -# undef je_valloc -# undef je_allocm -# undef je_rallocm -# undef je_sallocm -# undef je_dallocm -# undef je_nallocm -#endif - #ifdef JEMALLOC_HAVE_ATTR # define JEMALLOC_ATTR(s) __attribute__((s)) # define JEMALLOC_EXPORT JEMALLOC_ATTR(visibility("default")) diff --git a/test/include/test/jemalloc_test.h.in b/test/include/test/jemalloc_test.h.in index 9f7dfa46..38db000c 100644 --- a/test/include/test/jemalloc_test.h.in +++ b/test/include/test/jemalloc_test.h.in @@ -64,6 +64,7 @@ */ #ifdef JEMALLOC_UNIT_TEST # define JEMALLOC_JET +# define JEMALLOC_MANGLE # include "jemalloc/internal/jemalloc_internal.h" /******************************************************************************/ diff --git a/test/unit/mq.c b/test/unit/mq.c index e6cba101..f57e96af 100644 --- a/test/unit/mq.c +++ b/test/unit/mq.c @@ -39,7 +39,7 @@ thd_receiver_start(void *arg) for (i = 0; i < (NSENDERS * NMSGS); i++) { mq_msg_t *msg = mq_get(mq); assert_ptr_not_null(msg, "mq_get() should never return NULL"); - jet_dallocx(msg, 0); + dallocx(msg, 0); } return (NULL); } @@ -53,7 +53,7 @@ thd_sender_start(void *arg) for (i = 0; i < NMSGS; i++) { mq_msg_t *msg; void *p; - p = jet_mallocx(sizeof(mq_msg_t), 0); + p = mallocx(sizeof(mq_msg_t), 0); assert_ptr_not_null(p, "Unexpected allocm() failure"); msg = (mq_msg_t *)p; mq_put(mq, msg); From 5a658b9c7517d62fa39759f5ff6119d26dfc4cb7 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sun, 15 Dec 2013 15:54:18 -0800 Subject: [PATCH 0350/3378] Add zero/align tests for rallocx(). --- test/integration/rallocx.c | 121 ++++++++++++++++++++++++++++++++++++- test/src/test.c | 2 + 2 files changed, 122 insertions(+), 1 deletion(-) diff --git a/test/integration/rallocx.c b/test/integration/rallocx.c index cc9138ba..438e9a5f 100644 --- a/test/integration/rallocx.c +++ b/test/integration/rallocx.c @@ -39,6 +39,122 @@ TEST_BEGIN(test_grow_and_shrink) } dallocx(p, 0); +#undef MAXSZ +#undef NSZS +#undef NCYCLES +} +TEST_END + +static bool +validate_fill(const void *p, uint8_t c, size_t offset, size_t len) +{ + bool ret = false; + const uint8_t *buf = (const uint8_t *)p; + size_t i; + + for (i = 0; i < len; i++) { + uint8_t b = buf[offset+i]; + if (b != c) { + test_fail("Allocation at %p contains %#x rather than " + "%#x at offset %zu", p, b, c, offset+i); + ret = true; + } + } + + return (ret); +} + +TEST_BEGIN(test_zero) +{ + void *p, *q; + size_t psz, qsz, i, j; + size_t start_sizes[] = {1, 3*1024, 63*1024, 4095*1024}; +#define FILL_BYTE 0xaaU +#define RANGE 2048 + + for (i = 0; i < sizeof(start_sizes)/sizeof(size_t); i++) { + size_t start_size = start_sizes[i]; + p = mallocx(start_size, MALLOCX_ZERO); + assert_ptr_not_null(p, "Unexpected mallocx() error"); + psz = sallocx(p, 0); + + assert_false(validate_fill(p, 0, 0, psz), + "Expected zeroed memory"); + memset(p, FILL_BYTE, psz); + assert_false(validate_fill(p, FILL_BYTE, 0, psz), + "Expected filled memory"); + + for (j = 1; j < RANGE; j++) { + q = rallocx(p, start_size+j, MALLOCX_ZERO); + assert_ptr_not_null(q, "Unexpected rallocx() error"); + qsz = sallocx(q, 0); + if (q != p || qsz != psz) { + assert_false(validate_fill(q, FILL_BYTE, 0, + psz), "Expected filled memory"); + assert_false(validate_fill(q, 0, psz, qsz-psz), + "Expected zeroed memory"); + } + if (psz != qsz) { + memset(q+psz, FILL_BYTE, qsz-psz); + psz = qsz; + } + p = q; + } + assert_false(validate_fill(p, FILL_BYTE, 0, psz), + "Expected filled memory"); + dallocx(p, 0); + } +#undef FILL_BYTE +} +TEST_END + +TEST_BEGIN(test_align) +{ + void *p, *q; + size_t align; +#define MAX_ALIGN (ZU(1) << 29) + + align = ZU(1); + p = mallocx(1, MALLOCX_ALIGN(align)); + assert_ptr_not_null(p, "Unexpected mallocx() error"); + + for (align <<= 1; align <= MAX_ALIGN; align <<= 1) { + q = rallocx(p, 1, MALLOCX_ALIGN(align)); + assert_ptr_not_null(q, + "Unexpected rallocx() error for align=%zu", align); + assert_ptr_null( + (void *)((uintptr_t)q & (align-1)), + "%p inadequately aligned for align=%zu", + q, align); + p = q; + } + dallocx(p, 0); +#undef MAX_ALIGN +} +TEST_END + +TEST_BEGIN(test_lg_align) +{ + void *p, *q; + size_t lg_align; +#define MAX_LG_ALIGN 29 + + lg_align = ZU(0); + p = mallocx(1, MALLOCX_LG_ALIGN(lg_align)); + assert_ptr_not_null(p, "Unexpected mallocx() error"); + + for (lg_align++; lg_align <= MAX_LG_ALIGN; lg_align++) { + q = rallocx(p, 1, MALLOCX_LG_ALIGN(lg_align)); + assert_ptr_not_null(q, + "Unexpected rallocx() error for lg_align=%zu", lg_align); + assert_ptr_null( + (void *)((uintptr_t)q & ((ZU(1) << lg_align)-1)), + "%p inadequately aligned for lg_align=%zu", + q, lg_align); + p = q; + } + dallocx(p, 0); +#undef MAX_LG_ALIGN } TEST_END @@ -47,5 +163,8 @@ main(void) { return (test( - test_grow_and_shrink)); + test_grow_and_shrink, + test_zero, + test_align, + test_lg_align)); } diff --git a/test/src/test.c b/test/src/test.c index 74eb9dce..6552e377 100644 --- a/test/src/test.c +++ b/test/src/test.c @@ -14,6 +14,7 @@ test_skip(const char *format, ...) va_start(ap, format); malloc_vcprintf(NULL, NULL, format, ap); va_end(ap); + malloc_printf("\n"); test_status = test_status_skip; } @@ -26,6 +27,7 @@ test_fail(const char *format, ...) va_start(ap, format); malloc_vcprintf(NULL, NULL, format, ap); va_end(ap); + malloc_printf("\n"); test_status = test_status_fail; } From 665769357cd77b74e00a146f196fff19243b33c4 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sun, 15 Dec 2013 16:21:30 -0800 Subject: [PATCH 0351/3378] Optimize arena_prof_ctx_set(). Refactor such that arena_prof_ctx_set() receives usize as an argument, and use it to determine whether to handle ptr as a small region, rather than reading the chunk page map. --- include/jemalloc/internal/arena.h | 24 ++++--- include/jemalloc/internal/prof.h | 52 +++++++-------- src/jemalloc.c | 101 +++++++++++++++++------------- 3 files changed, 99 insertions(+), 78 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 075c263a..28540a47 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -474,7 +474,7 @@ size_t arena_bin_index(arena_t *arena, arena_bin_t *bin); unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr); prof_ctx_t *arena_prof_ctx_get(const void *ptr); -void arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx); +void arena_prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx); void *arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache); size_t arena_salloc(const void *ptr, bool demote); void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, @@ -886,7 +886,7 @@ arena_prof_ctx_get(const void *ptr) } JEMALLOC_INLINE void -arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) +arena_prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx) { arena_chunk_t *chunk; size_t pageind, mapbits; @@ -899,7 +899,14 @@ arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; mapbits = arena_mapbits_get(chunk, pageind); assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); - if ((mapbits & CHUNK_MAP_LARGE) == 0) { + + if (usize > SMALL_MAXCLASS || (prof_promote && + ((uintptr_t)ctx != (uintptr_t)1U || ((mapbits & CHUNK_MAP_LARGE) != + 0)))) { + assert((mapbits & CHUNK_MAP_LARGE) != 0); + arena_mapp_get(chunk, pageind)->prof_ctx = ctx; + } else { + assert((mapbits & CHUNK_MAP_LARGE) == 0); if (prof_promote == false) { arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapbits >> LG_PAGE)) << @@ -912,12 +919,11 @@ arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) bin_info = &arena_bin_info[binind]; regind = arena_run_regind(run, bin_info, ptr); - *((prof_ctx_t **)((uintptr_t)run + bin_info->ctx0_offset - + (regind * sizeof(prof_ctx_t *)))) = ctx; - } else - assert((uintptr_t)ctx == (uintptr_t)1U); - } else - arena_mapp_get(chunk, pageind)->prof_ctx = ctx; + *((prof_ctx_t **)((uintptr_t)run + + bin_info->ctx0_offset + (regind * sizeof(prof_ctx_t + *)))) = ctx; + } + } } JEMALLOC_ALWAYS_INLINE void * diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index 28ad37af..8b240999 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -289,11 +289,11 @@ malloc_tsd_protos(JEMALLOC_ATTR(unused), prof_tdata, prof_tdata_t *) prof_tdata_t *prof_tdata_get(bool create); void prof_sample_threshold_update(prof_tdata_t *prof_tdata); prof_ctx_t *prof_ctx_get(const void *ptr); -void prof_ctx_set(const void *ptr, prof_ctx_t *ctx); +void prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx); bool prof_sample_accum_update(size_t size); -void prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt); -void prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt, - size_t old_size, prof_ctx_t *old_ctx); +void prof_malloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt); +void prof_realloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt, + size_t old_usize, prof_ctx_t *old_ctx); void prof_free(const void *ptr, size_t size); #endif @@ -386,7 +386,7 @@ prof_ctx_get(const void *ptr) } JEMALLOC_INLINE void -prof_ctx_set(const void *ptr, prof_ctx_t *ctx) +prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx) { arena_chunk_t *chunk; @@ -396,7 +396,7 @@ prof_ctx_set(const void *ptr, prof_ctx_t *ctx) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk != ptr) { /* Region. */ - arena_prof_ctx_set(ptr, ctx); + arena_prof_ctx_set(ptr, usize, ctx); } else huge_prof_ctx_set(ptr, ctx); } @@ -431,20 +431,20 @@ prof_sample_accum_update(size_t size) } JEMALLOC_INLINE void -prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt) +prof_malloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt) { cassert(config_prof); assert(ptr != NULL); - assert(size == isalloc(ptr, true)); + assert(usize == isalloc(ptr, true)); if (opt_lg_prof_sample != 0) { - if (prof_sample_accum_update(size)) { + if (prof_sample_accum_update(usize)) { /* * Don't sample. For malloc()-like allocation, it is * always possible to tell in advance how large an * object's usable size will be, so there should never - * be a difference between the size passed to + * be a difference between the usize passed to * PROF_ALLOC_PREP() and prof_malloc(). */ assert((uintptr_t)cnt == (uintptr_t)1U); @@ -452,17 +452,17 @@ prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt) } if ((uintptr_t)cnt > (uintptr_t)1U) { - prof_ctx_set(ptr, cnt->ctx); + prof_ctx_set(ptr, usize, cnt->ctx); cnt->epoch++; /*********/ mb_write(); /*********/ cnt->cnts.curobjs++; - cnt->cnts.curbytes += size; + cnt->cnts.curbytes += usize; if (opt_prof_accum) { cnt->cnts.accumobjs++; - cnt->cnts.accumbytes += size; + cnt->cnts.accumbytes += usize; } /*********/ mb_write(); @@ -472,12 +472,12 @@ prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt) mb_write(); /*********/ } else - prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U); + prof_ctx_set(ptr, usize, (prof_ctx_t *)(uintptr_t)1U); } JEMALLOC_INLINE void -prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt, - size_t old_size, prof_ctx_t *old_ctx) +prof_realloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt, + size_t old_usize, prof_ctx_t *old_ctx) { prof_thr_cnt_t *told_cnt; @@ -485,15 +485,15 @@ prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt, assert(ptr != NULL || (uintptr_t)cnt <= (uintptr_t)1U); if (ptr != NULL) { - assert(size == isalloc(ptr, true)); + assert(usize == isalloc(ptr, true)); if (opt_lg_prof_sample != 0) { - if (prof_sample_accum_update(size)) { + if (prof_sample_accum_update(usize)) { /* - * Don't sample. The size passed to + * Don't sample. The usize passed to * PROF_ALLOC_PREP() was larger than what * actually got allocated, so a backtrace was * captured for this allocation, even though - * its actual size was insufficient to cross + * its actual usize was insufficient to cross * the sample threshold. */ cnt = (prof_thr_cnt_t *)(uintptr_t)1U; @@ -510,7 +510,7 @@ prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt, */ malloc_mutex_lock(old_ctx->lock); old_ctx->cnt_merged.curobjs--; - old_ctx->cnt_merged.curbytes -= old_size; + old_ctx->cnt_merged.curbytes -= old_usize; malloc_mutex_unlock(old_ctx->lock); told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U; } @@ -520,23 +520,23 @@ prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt, if ((uintptr_t)told_cnt > (uintptr_t)1U) told_cnt->epoch++; if ((uintptr_t)cnt > (uintptr_t)1U) { - prof_ctx_set(ptr, cnt->ctx); + prof_ctx_set(ptr, usize, cnt->ctx); cnt->epoch++; } else if (ptr != NULL) - prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U); + prof_ctx_set(ptr, usize, (prof_ctx_t *)(uintptr_t)1U); /*********/ mb_write(); /*********/ if ((uintptr_t)told_cnt > (uintptr_t)1U) { told_cnt->cnts.curobjs--; - told_cnt->cnts.curbytes -= old_size; + told_cnt->cnts.curbytes -= old_usize; } if ((uintptr_t)cnt > (uintptr_t)1U) { cnt->cnts.curobjs++; - cnt->cnts.curbytes += size; + cnt->cnts.curbytes += usize; if (opt_prof_accum) { cnt->cnts.accumobjs++; - cnt->cnts.accumbytes += size; + cnt->cnts.accumbytes += usize; } } /*********/ diff --git a/src/jemalloc.c b/src/jemalloc.c index f8c8119d..b8a4fb07 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1105,7 +1105,7 @@ je_realloc(void *ptr, size_t size) { void *ret; size_t usize JEMALLOC_CC_SILENCE_INIT(0); - size_t old_size = 0; + size_t old_usize = 0; UNUSED size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0); prof_thr_cnt_t *cnt JEMALLOC_CC_SILENCE_INIT(NULL); prof_ctx_t *old_ctx JEMALLOC_CC_SILENCE_INIT(NULL); @@ -1115,16 +1115,16 @@ je_realloc(void *ptr, size_t size) /* realloc(ptr, 0) is equivalent to free(p). */ assert(malloc_initialized || IS_INITIALIZER); if (config_prof) { - old_size = isalloc(ptr, true); + old_usize = isalloc(ptr, true); if (config_valgrind && opt_valgrind) old_rzsize = p2rz(ptr); } else if (config_stats) { - old_size = isalloc(ptr, false); + old_usize = isalloc(ptr, false); if (config_valgrind && opt_valgrind) - old_rzsize = u2rz(old_size); + old_rzsize = u2rz(old_usize); } else if (config_valgrind && opt_valgrind) { - old_size = isalloc(ptr, false); - old_rzsize = u2rz(old_size); + old_usize = isalloc(ptr, false); + old_rzsize = u2rz(old_usize); } if (config_prof && opt_prof) { old_ctx = prof_ctx_get(ptr); @@ -1142,16 +1142,16 @@ je_realloc(void *ptr, size_t size) malloc_thread_init(); if (config_prof) { - old_size = isalloc(ptr, true); + old_usize = isalloc(ptr, true); if (config_valgrind && opt_valgrind) old_rzsize = p2rz(ptr); } else if (config_stats) { - old_size = isalloc(ptr, false); + old_usize = isalloc(ptr, false); if (config_valgrind && opt_valgrind) - old_rzsize = u2rz(old_size); + old_rzsize = u2rz(old_usize); } else if (config_valgrind && opt_valgrind) { - old_size = isalloc(ptr, false); - old_rzsize = u2rz(old_size); + old_usize = isalloc(ptr, false); + old_rzsize = u2rz(old_usize); } if (config_prof && opt_prof) { usize = s2u(size); @@ -1236,16 +1236,17 @@ label_oom: label_return: if (config_prof && opt_prof) - prof_realloc(ret, usize, cnt, old_size, old_ctx); + prof_realloc(ret, usize, cnt, old_usize, old_ctx); if (config_stats && ret != NULL) { thread_allocated_t *ta; assert(usize == isalloc(ret, config_prof)); ta = thread_allocated_tsd_get(); ta->allocated += usize; - ta->deallocated += old_size; + ta->deallocated += old_usize; } UTRACE(ptr, size, ret); - JEMALLOC_VALGRIND_REALLOC(ret, usize, ptr, old_size, old_rzsize, false); + JEMALLOC_VALGRIND_REALLOC(ret, usize, ptr, old_usize, old_rzsize, + false); return (ret); } @@ -1431,8 +1432,7 @@ void * je_rallocx(void *ptr, size_t size, int flags) { void *p; - size_t usize; - size_t old_size; + size_t usize, old_usize; UNUSED size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0); size_t alignment = (ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK) & (SIZE_T_MAX-1)); @@ -1465,7 +1465,7 @@ je_rallocx(void *ptr, size_t size, int flags) usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment); prof_ctx_t *old_ctx = prof_ctx_get(ptr); - old_size = isalloc(ptr, true); + old_usize = isalloc(ptr, true); if (config_valgrind && opt_valgrind) old_rzsize = p2rz(ptr); PROF_ALLOC_PREP(1, usize, cnt); @@ -1487,15 +1487,28 @@ je_rallocx(void *ptr, size_t size, int flags) if (p == NULL) goto label_oom; } - prof_realloc(p, usize, cnt, old_size, old_ctx); + if (p == ptr && alignment != 0) { + /* + * The allocation did not move, so it is possible that + * the size class is smaller than would guarantee the + * requested alignment, and that the alignment + * constraint was serendipitously satisfied. + * Additionally, old_usize may not be the same as the + * current usize because of in-place large + * reallocation. Therefore, query the actual value of + * usize. + */ + usize = isalloc(p, true); + } + prof_realloc(p, usize, cnt, old_usize, old_ctx); } else { if (config_stats) { - old_size = isalloc(ptr, false); + old_usize = isalloc(ptr, false); if (config_valgrind && opt_valgrind) - old_rzsize = u2rz(old_size); + old_rzsize = u2rz(old_usize); } else if (config_valgrind && opt_valgrind) { - old_size = isalloc(ptr, false); - old_rzsize = u2rz(old_size); + old_usize = isalloc(ptr, false); + old_rzsize = u2rz(old_usize); } p = iralloct(ptr, size, 0, alignment, zero, false, try_tcache_alloc, try_tcache_dalloc, arena); @@ -1509,10 +1522,10 @@ je_rallocx(void *ptr, size_t size, int flags) thread_allocated_t *ta; ta = thread_allocated_tsd_get(); ta->allocated += usize; - ta->deallocated += old_size; + ta->deallocated += old_usize; } UTRACE(ptr, size, p); - JEMALLOC_VALGRIND_REALLOC(p, usize, ptr, old_size, old_rzsize, zero); + JEMALLOC_VALGRIND_REALLOC(p, usize, ptr, old_usize, old_rzsize, zero); return (p); label_oom: if (config_xmalloc && opt_xmalloc) { @@ -1526,8 +1539,7 @@ label_oom: size_t je_xallocx(void *ptr, size_t size, size_t extra, int flags) { - size_t usize; - size_t old_size; + size_t usize, old_usize; UNUSED size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0); size_t alignment = (ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK) & (SIZE_T_MAX-1)); @@ -1568,12 +1580,12 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) size_t max_usize = (alignment == 0) ? s2u(size+extra) : sa2u(size+extra, alignment); prof_ctx_t *old_ctx = prof_ctx_get(ptr); - old_size = isalloc(ptr, true); + old_usize = isalloc(ptr, true); if (config_valgrind && opt_valgrind) old_rzsize = p2rz(ptr); PROF_ALLOC_PREP(1, max_usize, cnt); if (cnt == NULL) { - usize = isalloc(ptr, config_prof); + usize = old_usize; goto label_not_moved; } /* @@ -1585,32 +1597,35 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) if (iralloct(ptr, SMALL_MAXCLASS+1, (SMALL_MAXCLASS+1 >= size+extra) ? 0 : size+extra - (SMALL_MAXCLASS+1), alignment, zero, true, try_tcache_alloc, - try_tcache_dalloc, arena) == NULL) + try_tcache_dalloc, arena) == NULL) { + usize = old_usize; goto label_not_moved; - if (max_usize < PAGE) { - usize = max_usize; + } + usize = isalloc(ptr, true); + if (max_usize < PAGE) arena_prof_promoted(ptr, usize); - } else - usize = isalloc(ptr, config_prof); } else { if (iralloct(ptr, size, extra, alignment, zero, true, - try_tcache_alloc, try_tcache_dalloc, arena) == NULL) + try_tcache_alloc, try_tcache_dalloc, arena) == + NULL) { + usize = old_usize; goto label_not_moved; - usize = isalloc(ptr, config_prof); + } + usize = isalloc(ptr, true); } - prof_realloc(ptr, usize, cnt, old_size, old_ctx); + prof_realloc(ptr, usize, cnt, old_usize, old_ctx); } else { if (config_stats) { - old_size = isalloc(ptr, false); + old_usize = isalloc(ptr, false); if (config_valgrind && opt_valgrind) - old_rzsize = u2rz(old_size); + old_rzsize = u2rz(old_usize); } else if (config_valgrind && opt_valgrind) { - old_size = isalloc(ptr, false); - old_rzsize = u2rz(old_size); + old_usize = isalloc(ptr, false); + old_rzsize = u2rz(old_usize); } if (iralloct(ptr, size, extra, alignment, zero, true, try_tcache_alloc, try_tcache_dalloc, arena) == NULL) { - usize = isalloc(ptr, config_prof); + usize = old_usize; goto label_not_moved; } usize = isalloc(ptr, config_prof); @@ -1620,9 +1635,9 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) thread_allocated_t *ta; ta = thread_allocated_tsd_get(); ta->allocated += usize; - ta->deallocated += old_size; + ta->deallocated += old_usize; } - JEMALLOC_VALGRIND_REALLOC(ptr, usize, ptr, old_size, old_rzsize, zero); + JEMALLOC_VALGRIND_REALLOC(ptr, usize, ptr, old_usize, old_rzsize, zero); label_not_moved: UTRACE(ptr, size, ptr); return (usize); From 6e62984ef6ca4312cf0a2e49ea2cc38feb94175b Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sun, 15 Dec 2013 21:49:40 -0800 Subject: [PATCH 0352/3378] Don't junk-fill reallocations unless usize changes. Don't junk fill reallocations for which the request size is less than the current usable size, but not enough smaller to cause a size class change. Unlike malloc()/calloc()/realloc(), *allocx() contractually treats the full usize as the allocation, so a caller can ask for zeroed memory via mallocx() and a series of rallocx() calls that all specify MALLOCX_ZERO, and be assured that all newly allocated bytes will be zeroed and made available to the application without danger of allocator mutation until the size class decreases enough to cause usize reduction. --- include/jemalloc/internal/tcache.h | 1 + src/arena.c | 15 +++------------ src/huge.c | 4 ---- 3 files changed, 4 insertions(+), 16 deletions(-) diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index d4eecdee..c3d4b58d 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -297,6 +297,7 @@ tcache_alloc_small(tcache_t *tcache, size_t size, bool zero) binind = SMALL_SIZE2BIN(size); assert(binind < NBINS); tbin = &tcache->tbins[binind]; + size = arena_bin_info[binind].reg_size; ret = tcache_alloc_easy(tbin); if (ret == NULL) { ret = tcache_alloc_small_hard(tcache, tbin, binind); diff --git a/src/arena.c b/src/arena.c index 4a460130..406cf5de 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1938,10 +1938,6 @@ arena_ralloc_large(void *ptr, size_t oldsize, size_t size, size_t extra, psize = PAGE_CEILING(size + extra); if (psize == oldsize) { /* Same size class. */ - if (config_fill && opt_junk && size < oldsize) { - memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize - - size); - } return (false); } else { arena_chunk_t *chunk; @@ -1953,8 +1949,8 @@ arena_ralloc_large(void *ptr, size_t oldsize, size_t size, size_t extra, if (psize < oldsize) { /* Fill before shrinking in order avoid a race. */ if (config_fill && opt_junk) { - memset((void *)((uintptr_t)ptr + size), 0x5a, - oldsize - size); + memset((void *)((uintptr_t)ptr + psize), 0x5a, + oldsize - psize); } arena_ralloc_large_shrink(arena, chunk, ptr, oldsize, psize); @@ -1988,13 +1984,8 @@ arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, if ((size + extra <= SMALL_MAXCLASS && SMALL_SIZE2BIN(size + extra) == SMALL_SIZE2BIN(oldsize)) || (size <= oldsize && - size + extra >= oldsize)) { - if (config_fill && opt_junk && size < oldsize) { - memset((void *)((uintptr_t)ptr + size), - 0x5a, oldsize - size); - } + size + extra >= oldsize)) return (ptr); - } } else { assert(size <= arena_maxclass); if (size + extra > SMALL_MAXCLASS) { diff --git a/src/huge.c b/src/huge.c index 33fab684..ea9a2ad2 100644 --- a/src/huge.c +++ b/src/huge.c @@ -89,10 +89,6 @@ huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra) && CHUNK_CEILING(oldsize) >= CHUNK_CEILING(size) && CHUNK_CEILING(oldsize) <= CHUNK_CEILING(size+extra)) { assert(CHUNK_CEILING(oldsize) == oldsize); - if (config_fill && opt_junk && size < oldsize) { - memset((void *)((uintptr_t)ptr + size), 0x5a, - oldsize - size); - } return (ptr); } From 5fbad0902b845b1a6b311994468d0b9962e4fd30 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sun, 15 Dec 2013 22:08:44 -0800 Subject: [PATCH 0353/3378] Finish arena_prof_ctx_set() optimization. Delay reading the mapbits until it's unavoidable. --- include/jemalloc/internal/arena.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 28540a47..e43aed10 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -889,7 +889,7 @@ JEMALLOC_INLINE void arena_prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx) { arena_chunk_t *chunk; - size_t pageind, mapbits; + size_t pageind; cassert(config_prof); assert(ptr != NULL); @@ -897,17 +897,17 @@ arena_prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - mapbits = arena_mapbits_get(chunk, pageind); - assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); + assert(arena_mapbits_allocated_get(chunk, pageind) != 0); if (usize > SMALL_MAXCLASS || (prof_promote && - ((uintptr_t)ctx != (uintptr_t)1U || ((mapbits & CHUNK_MAP_LARGE) != - 0)))) { - assert((mapbits & CHUNK_MAP_LARGE) != 0); + ((uintptr_t)ctx != (uintptr_t)1U || arena_mapbits_large_get(chunk, + pageind) != 0))) { + assert(arena_mapbits_large_get(chunk, pageind) != 0); arena_mapp_get(chunk, pageind)->prof_ctx = ctx; } else { - assert((mapbits & CHUNK_MAP_LARGE) == 0); + assert(arena_mapbits_large_get(chunk, pageind) == 0); if (prof_promote == false) { + size_t mapbits = arena_mapbits_get(chunk, pageind); arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapbits >> LG_PAGE)) << LG_PAGE)); From e935c07e0066e5c7b8ae51e68ebcc4321eabcb7c Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 16 Dec 2013 13:37:21 -0800 Subject: [PATCH 0354/3378] Add rallocx() test of both alignment and zeroing. --- test/integration/rallocx.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/test/integration/rallocx.c b/test/integration/rallocx.c index 438e9a5f..b4b67802 100644 --- a/test/integration/rallocx.c +++ b/test/integration/rallocx.c @@ -133,27 +133,39 @@ TEST_BEGIN(test_align) } TEST_END -TEST_BEGIN(test_lg_align) +TEST_BEGIN(test_lg_align_and_zero) { void *p, *q; - size_t lg_align; + size_t lg_align, sz; #define MAX_LG_ALIGN 29 +#define MAX_VALIDATE (ZU(1) << 22) lg_align = ZU(0); - p = mallocx(1, MALLOCX_LG_ALIGN(lg_align)); + p = mallocx(1, MALLOCX_LG_ALIGN(lg_align)|MALLOCX_ZERO); assert_ptr_not_null(p, "Unexpected mallocx() error"); for (lg_align++; lg_align <= MAX_LG_ALIGN; lg_align++) { - q = rallocx(p, 1, MALLOCX_LG_ALIGN(lg_align)); + q = rallocx(p, 1, MALLOCX_LG_ALIGN(lg_align)|MALLOCX_ZERO); assert_ptr_not_null(q, "Unexpected rallocx() error for lg_align=%zu", lg_align); assert_ptr_null( (void *)((uintptr_t)q & ((ZU(1) << lg_align)-1)), "%p inadequately aligned for lg_align=%zu", q, lg_align); + sz = sallocx(q, 0); + if ((sz << 1) <= MAX_VALIDATE) { + assert_false(validate_fill(q, 0, 0, sz), + "Expected zeroed memory"); + } else { + assert_false(validate_fill(q, 0, 0, MAX_VALIDATE), + "Expected zeroed memory"); + assert_false(validate_fill(q+sz-MAX_VALIDATE, 0, 0, + MAX_VALIDATE), "Expected zeroed memory"); + } p = q; } dallocx(p, 0); +#undef MAX_VALIDATE #undef MAX_LG_ALIGN } TEST_END @@ -166,5 +178,5 @@ main(void) test_grow_and_shrink, test_zero, test_align, - test_lg_align)); + test_lg_align_and_zero)); } From e948fa6439a19f0f3eb44012fd0b3274ebf82b8f Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 16 Dec 2013 18:04:23 -0800 Subject: [PATCH 0355/3378] Add ckh unit tests. --- Makefile.in | 7 +- test/unit/ckh.c | 206 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 210 insertions(+), 3 deletions(-) create mode 100644 test/unit/ckh.c diff --git a/Makefile.in b/Makefile.in index af60a21c..590ab11c 100644 --- a/Makefile.in +++ b/Makefile.in @@ -107,9 +107,10 @@ C_TESTLIB_SRCS := $(srcroot)test/src/math.c $(srcroot)test/src/mtx.c \ $(srcroot)test/src/SFMT.c $(srcroot)test/src/test.c \ $(srcroot)test/src/thd.c C_UTIL_INTEGRATION_SRCS := $(srcroot)src/util.c -TESTS_UNIT := $(srcroot)test/unit/bitmap.c $(srcroot)test/unit/math.c \ - $(srcroot)test/unit/mq.c $(srcroot)test/unit/mtx.c \ - $(srcroot)test/unit/SFMT.c $(srcroot)test/unit/tsd.c +TESTS_UNIT := $(srcroot)test/unit/bitmap.c $(srcroot)test/unit/ckh.c \ + $(srcroot)test/unit/math.c $(srcroot)test/unit/mq.c \ + $(srcroot)test/unit/mtx.c $(srcroot)test/unit/SFMT.c \ + $(srcroot)test/unit/tsd.c TESTS_INTEGRATION := $(srcroot)test/integration/aligned_alloc.c \ $(srcroot)test/integration/allocated.c \ $(srcroot)test/integration/mallocx.c \ diff --git a/test/unit/ckh.c b/test/unit/ckh.c new file mode 100644 index 00000000..69fd7f52 --- /dev/null +++ b/test/unit/ckh.c @@ -0,0 +1,206 @@ +#include "test/jemalloc_test.h" + +TEST_BEGIN(test_new_delete) +{ + ckh_t ckh; + + assert_false(ckh_new(&ckh, 2, ckh_string_hash, ckh_string_keycomp), + "Unexpected ckh_new() error"); + ckh_delete(&ckh); + + assert_false(ckh_new(&ckh, 3, ckh_pointer_hash, ckh_pointer_keycomp), + "Unexpected ckh_new() error"); + ckh_delete(&ckh); +} +TEST_END + +TEST_BEGIN(test_count_insert_search_remove) +{ + ckh_t ckh; + const char *strs[] = { + "a string", + "A string", + "a string.", + "A string." + }; + const char *missing = "A string not in the hash table."; + size_t i; + + assert_false(ckh_new(&ckh, 2, ckh_string_hash, ckh_string_keycomp), + "Unexpected ckh_new() error"); + assert_zu_eq(ckh_count(&ckh), 0, + "ckh_count() should return %zu, but it returned %zu", 0, + ckh_count(&ckh)); + + /* Insert. */ + for (i = 0; i < sizeof(strs)/sizeof(const char *); i++) { + ckh_insert(&ckh, strs[i], strs[i]); + assert_zu_eq(ckh_count(&ckh), i+1, + "ckh_count() should return %zu, but it returned %zu", i+1, + ckh_count(&ckh)); + } + + /* Search. */ + for (i = 0; i < sizeof(strs)/sizeof(const char *); i++) { + union { + void *p; + const char *s; + } k, v; + void **kp, **vp; + const char *ks, *vs; + + kp = (i & 1) ? &k.p : NULL; + vp = (i & 2) ? &v.p : NULL; + k.p = NULL; + v.p = NULL; + assert_false(ckh_search(&ckh, strs[i], kp, vp), + "Unexpected ckh_search() error"); + + ks = (i & 1) ? strs[i] : (const char *)NULL; + vs = (i & 2) ? strs[i] : (const char *)NULL; + assert_ptr_eq((void *)ks, (void *)k.s, + "Key mismatch, i=%zu", i); + assert_ptr_eq((void *)vs, (void *)v.s, + "Value mismatch, i=%zu", i); + } + assert_true(ckh_search(&ckh, missing, NULL, NULL), + "Unexpected ckh_search() success"); + + /* Remove. */ + for (i = 0; i < sizeof(strs)/sizeof(const char *); i++) { + union { + void *p; + const char *s; + } k, v; + void **kp, **vp; + const char *ks, *vs; + + kp = (i & 1) ? &k.p : NULL; + vp = (i & 2) ? &v.p : NULL; + k.p = NULL; + v.p = NULL; + assert_false(ckh_remove(&ckh, strs[i], kp, vp), + "Unexpected ckh_remove() error"); + + ks = (i & 1) ? strs[i] : (const char *)NULL; + vs = (i & 2) ? strs[i] : (const char *)NULL; + assert_ptr_eq((void *)ks, (void *)k.s, + "Key mismatch, i=%zu", i); + assert_ptr_eq((void *)vs, (void *)v.s, + "Value mismatch, i=%zu", i); + assert_zu_eq(ckh_count(&ckh), + sizeof(strs)/sizeof(const char *) - i - 1, + "ckh_count() should return %zu, but it returned %zu", + sizeof(strs)/sizeof(const char *) - i - 1, + ckh_count(&ckh)); + } + + ckh_delete(&ckh); +} +TEST_END + +TEST_BEGIN(test_insert_iter_remove) +{ +#define NITEMS 1000 + ckh_t ckh; + void **p[NITEMS]; + void *q, *r; + unsigned i; + + assert_false(ckh_new(&ckh, 2, ckh_pointer_hash, ckh_pointer_keycomp), + "Unexpected ckh_new() error"); + + for (i = 0; i < NITEMS; i++) { + p[i] = mallocx(i+1, 0); + assert_ptr_not_null(p[i], "Unexpected mallocx() failure"); + } + + for (i = 0; i < NITEMS; i++) { + unsigned j; + + for (j = i; j < NITEMS; j++) { + assert_false(ckh_insert(&ckh, p[j], p[j]), + "Unexpected ckh_insert() failure"); + assert_false(ckh_search(&ckh, p[j], &q, &r), + "Unexpected ckh_search() failure"); + assert_ptr_eq(p[j], q, "Key pointer mismatch"); + assert_ptr_eq(p[j], r, "Value pointer mismatch"); + } + + assert_zu_eq(ckh_count(&ckh), NITEMS, + "ckh_count() should return %zu, but it returned %zu", + NITEMS, ckh_count(&ckh)); + + for (j = i + 1; j < NITEMS; j++) { + assert_false(ckh_search(&ckh, p[j], NULL, NULL), + "Unexpected ckh_search() failure"); + assert_false(ckh_remove(&ckh, p[j], &q, &r), + "Unexpected ckh_remove() failure"); + assert_ptr_eq(p[j], q, "Key pointer mismatch"); + assert_ptr_eq(p[j], r, "Value pointer mismatch"); + assert_true(ckh_search(&ckh, p[j], NULL, NULL), + "Unexpected ckh_search() success"); + assert_true(ckh_remove(&ckh, p[j], &q, &r), + "Unexpected ckh_remove() success"); + } + + { + bool seen[NITEMS]; + size_t tabind; + + memset(seen, 0, sizeof(seen)); + + for (tabind = 0; ckh_iter(&ckh, &tabind, &q, &r) == + false;) { + unsigned k; + + assert_ptr_eq(q, r, "Key and val not equal"); + + for (k = 0; k < NITEMS; k++) { + if (p[k] == q) { + assert_false(seen[k], + "Item %zu already seen", k); + seen[k] = true; + break; + } + } + } + + for (j = 0; j < i + 1; j++) + assert_true(seen[j], "Item %zu not seen", j); + for (; j < NITEMS; j++) + assert_false(seen[j], "Item %zu seen", j); + } + } + + for (i = 0; i < NITEMS; i++) { + assert_false(ckh_search(&ckh, p[i], NULL, NULL), + "Unexpected ckh_search() failure"); + assert_false(ckh_remove(&ckh, p[i], &q, &r), + "Unexpected ckh_remove() failure"); + assert_ptr_eq(p[i], q, "Key pointer mismatch"); + assert_ptr_eq(p[i], r, "Value pointer mismatch"); + assert_true(ckh_search(&ckh, p[i], NULL, NULL), + "Unexpected ckh_search() success"); + assert_true(ckh_remove(&ckh, p[i], &q, &r), + "Unexpected ckh_remove() success"); + dallocx(p[i], 0); + } + + assert_zu_eq(ckh_count(&ckh), 0, + "ckh_count() should return %zu, but it returned %zu", 0, + ckh_count(&ckh)); + ckh_delete(&ckh); +#undef NITEMS +} +TEST_END + +int +main(void) +{ + + return (test( + test_new_delete, + test_count_insert_search_remove, + test_insert_iter_remove)); +} From e6b7aa4a609d4b514d772ee4a1340778a195f6f7 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 16 Dec 2013 22:55:41 -0800 Subject: [PATCH 0356/3378] Add hash (MurmurHash3) tests. Add hash tests that are based on SMHasher's VerificationTest() function. --- Makefile.in | 6 +- include/jemalloc/internal/hash.h | 1 - test/unit/hash.c | 167 +++++++++++++++++++++++++++++++ 3 files changed, 170 insertions(+), 4 deletions(-) create mode 100644 test/unit/hash.c diff --git a/Makefile.in b/Makefile.in index 590ab11c..08dad5fc 100644 --- a/Makefile.in +++ b/Makefile.in @@ -108,9 +108,9 @@ C_TESTLIB_SRCS := $(srcroot)test/src/math.c $(srcroot)test/src/mtx.c \ $(srcroot)test/src/thd.c C_UTIL_INTEGRATION_SRCS := $(srcroot)src/util.c TESTS_UNIT := $(srcroot)test/unit/bitmap.c $(srcroot)test/unit/ckh.c \ - $(srcroot)test/unit/math.c $(srcroot)test/unit/mq.c \ - $(srcroot)test/unit/mtx.c $(srcroot)test/unit/SFMT.c \ - $(srcroot)test/unit/tsd.c + $(srcroot)test/unit/hash.c $(srcroot)test/unit/math.c \ + $(srcroot)test/unit/mq.c $(srcroot)test/unit/mtx.c \ + $(srcroot)test/unit/SFMT.c $(srcroot)test/unit/tsd.c TESTS_INTEGRATION := $(srcroot)test/integration/aligned_alloc.c \ $(srcroot)test/integration/allocated.c \ $(srcroot)test/integration/mallocx.c \ diff --git a/include/jemalloc/internal/hash.h b/include/jemalloc/internal/hash.h index 6b8d9cd5..3ef10fdd 100644 --- a/include/jemalloc/internal/hash.h +++ b/include/jemalloc/internal/hash.h @@ -310,7 +310,6 @@ hash_x64_128(const void *key, const int len, const uint32_t seed, r_out[1] = h2; } - /******************************************************************************/ /* API. */ JEMALLOC_INLINE void diff --git a/test/unit/hash.c b/test/unit/hash.c new file mode 100644 index 00000000..46b4decf --- /dev/null +++ b/test/unit/hash.c @@ -0,0 +1,167 @@ +/* + * This file is based on code that is part of SMHasher + * (https://code.google.com/p/smhasher/), and is subject to the MIT license + * (http://www.opensource.org/licenses/mit-license.php). Both email addresses + * associated with the source code's revision history belong to Austin Appleby, + * and the revision history ranges from 2010 to 2012. Therefore the copyright + * and license are here taken to be: + * + * Copyright (c) 2010-2012 Austin Appleby + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "test/jemalloc_test.h" + +typedef enum { + hash_variant_x86_32, + hash_variant_x86_128, + hash_variant_x64_128 +} hash_variant_t; + +static size_t +hash_variant_bits(hash_variant_t variant) +{ + + switch (variant) { + case hash_variant_x86_32: return (32); + case hash_variant_x86_128: return (128); + case hash_variant_x64_128: return (128); + default: not_reached(); + } +} + +static const char * +hash_variant_string(hash_variant_t variant) +{ + + switch (variant) { + case hash_variant_x86_32: return ("hash_x86_32"); + case hash_variant_x86_128: return ("hash_x86_128"); + case hash_variant_x64_128: return ("hash_64_128"); + default: not_reached(); + } +} + +static void +hash_variant_verify(hash_variant_t variant) +{ + const size_t hashbytes = hash_variant_bits(variant) / 8; + uint8_t key[256]; + uint8_t hashes[hashbytes * 256]; + uint8_t final[hashbytes]; + unsigned i; + uint32_t computed, expected; + + memset(key, 0, sizeof(key)); + memset(hashes, 0, sizeof(hashes)); + memset(final, 0, sizeof(final)); + + /* + * Hash keys of the form {0}, {0,1}, {0,1,2}, ..., {0,1,...,255} as the + * seed. + */ + for (i = 0; i < 256; i++) { + key[i] = (uint8_t)i; + switch (variant) { + case hash_variant_x86_32: { + uint32_t out; + out = hash_x86_32(key, i, 256-i); + memcpy(&hashes[i*hashbytes], &out, hashbytes); + break; + } case hash_variant_x86_128: { + uint64_t out[2]; + hash_x86_128(key, i, 256-i, out); + memcpy(&hashes[i*hashbytes], out, hashbytes); + break; + } + case hash_variant_x64_128: { + uint64_t out[2]; + hash_x64_128(key, i, 256-i, out); + memcpy(&hashes[i*hashbytes], out, hashbytes); + break; + } + default: not_reached(); + } + } + + /* Hash the result array. */ + switch (variant) { + case hash_variant_x86_32: { + uint32_t out = hash_x86_32(hashes, hashbytes*256, 0); + memcpy(final, &out, sizeof(out)); + break; + } case hash_variant_x86_128: { + uint64_t out[2]; + hash_x86_128(hashes, hashbytes*256, 0, out); + memcpy(final, out, sizeof(out)); + break; + } case hash_variant_x64_128: { + uint64_t out[2]; + hash_x64_128(hashes, hashbytes*256, 0, out); + memcpy(final, out, sizeof(out)); + break; + } default: not_reached(); + } + + computed = (final[0] << 0) | (final[1] << 8) | (final[2] << 16) | + (final[3] << 24); + + switch (variant) { + case hash_variant_x86_32: expected = 0xb0f57ee3U; break; + case hash_variant_x86_128: expected = 0xb3ece62aU; break; + case hash_variant_x64_128: expected = 0x6384ba69U; break; + default: not_reached(); + } + + assert_u32_eq(computed, expected, + "Hash mismatch for %s(): expected %#x but got %#x", + hash_variant_string(variant), expected, computed); +} + +TEST_BEGIN(test_hash_x86_32) +{ + + hash_variant_verify(hash_variant_x86_32); +} +TEST_END + +TEST_BEGIN(test_hash_x86_128) +{ + + hash_variant_verify(hash_variant_x86_128); +} +TEST_END + +TEST_BEGIN(test_hash_x64_128) +{ + + hash_variant_verify(hash_variant_x64_128); +} +TEST_END + +int +main(void) +{ + + return (test( + test_hash_x86_32, + test_hash_x86_128, + test_hash_x64_128)); +} From eca367b77909ebd649fbd0430e1e9b80dded14e0 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 17 Dec 2013 09:14:39 -0800 Subject: [PATCH 0357/3378] Fix a typo in a string constant. --- test/unit/hash.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/unit/hash.c b/test/unit/hash.c index 46b4decf..939bf9b3 100644 --- a/test/unit/hash.c +++ b/test/unit/hash.c @@ -54,7 +54,7 @@ hash_variant_string(hash_variant_t variant) switch (variant) { case hash_variant_x86_32: return ("hash_x86_32"); case hash_variant_x86_128: return ("hash_x86_128"); - case hash_variant_x64_128: return ("hash_64_128"); + case hash_variant_x64_128: return ("hash_x64_128"); default: not_reached(); } } From 0d6c5d8bd0d866a0ce4ce321259cec65d6459821 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 17 Dec 2013 15:14:36 -0800 Subject: [PATCH 0358/3378] Add quarantine unit tests. Verify that freed regions are quarantined, and that redzone corruption is detected. Introduce a testing idiom for intercepting/replacing internal functions. In this case the replaced function is ordinarily a static function, but the idiom should work similarly for library-private functions. --- Makefile.in | 3 +- include/jemalloc/internal/arena.h | 6 + include/jemalloc/internal/private_symbols.txt | 2 + src/arena.c | 66 +++++++++-- src/quarantine.c | 13 ++- test/include/test/test.h | 10 ++ test/unit/quarantine.c | 108 ++++++++++++++++++ 7 files changed, 193 insertions(+), 15 deletions(-) create mode 100644 test/unit/quarantine.c diff --git a/Makefile.in b/Makefile.in index 08dad5fc..afa7bde4 100644 --- a/Makefile.in +++ b/Makefile.in @@ -110,7 +110,8 @@ C_UTIL_INTEGRATION_SRCS := $(srcroot)src/util.c TESTS_UNIT := $(srcroot)test/unit/bitmap.c $(srcroot)test/unit/ckh.c \ $(srcroot)test/unit/hash.c $(srcroot)test/unit/math.c \ $(srcroot)test/unit/mq.c $(srcroot)test/unit/mtx.c \ - $(srcroot)test/unit/SFMT.c $(srcroot)test/unit/tsd.c + $(srcroot)test/unit/quarantine.c $(srcroot)test/unit/SFMT.c \ + $(srcroot)test/unit/tsd.c TESTS_INTEGRATION := $(srcroot)test/integration/aligned_alloc.c \ $(srcroot)test/integration/allocated.c \ $(srcroot)test/integration/mallocx.c \ diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index e43aed10..41517510 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -405,7 +405,13 @@ void arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind, uint64_t prof_accumbytes); void arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info, bool zero); +#ifdef JEMALLOC_JET +typedef void (arena_redzone_corruption_t)(void *, size_t, bool, size_t, + uint8_t); +extern arena_redzone_corruption_t *arena_redzone_corruption_fptr; +#endif void arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info); +void arena_quarantine_junk_small(void *ptr, size_t usize); void *arena_malloc_small(arena_t *arena, size_t size, bool zero); void *arena_malloc_large(arena_t *arena, size_t size, bool zero); void *arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero); diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index 541e1b2c..fae648cd 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -50,8 +50,10 @@ arena_prof_ctx_set arena_prof_promoted arena_ptr_small_binind_get arena_purge_all +arena_quarantine_junk_small arena_ralloc arena_ralloc_no_move +arena_redzone_corruption arena_run_regind arena_salloc arena_stats_merge diff --git a/src/arena.c b/src/arena.c index 406cf5de..71057aad 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1432,8 +1432,28 @@ arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info, bool zero) } } -void -arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info) +#ifdef JEMALLOC_JET +#undef arena_redzone_corruption +#define arena_redzone_corruption JEMALLOC_N(arena_redzone_corruption_impl) +#endif +static void +arena_redzone_corruption(void *ptr, size_t usize, bool after, + size_t offset, uint8_t byte) +{ + + malloc_printf(": Corrupt redzone %zu byte%s %s %p " + "(size %zu), byte=%#x\n", offset, (offset == 1) ? "" : "s", + after ? "after" : "before", ptr, usize, byte); +} +#ifdef JEMALLOC_JET +arena_redzone_corruption_t *arena_redzone_corruption_fptr = + arena_redzone_corruption; +#undef arena_redzone_corruption +#define arena_redzone_corruption arena_redzone_corruption_fptr +#endif + +static void +arena_redzones_validate(void *ptr, arena_bin_info_t *bin_info, bool reset) { size_t size = bin_info->reg_size; size_t redzone_size = bin_info->redzone_size; @@ -1441,30 +1461,52 @@ arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info) bool error = false; for (i = 1; i <= redzone_size; i++) { - unsigned byte; - if ((byte = *(uint8_t *)((uintptr_t)ptr - i)) != 0xa5) { + uint8_t *byte = (uint8_t *)((uintptr_t)ptr - i); + if (*byte != 0xa5) { error = true; - malloc_printf(": Corrupt redzone " - "%zu byte%s before %p (size %zu), byte=%#x\n", i, - (i == 1) ? "" : "s", ptr, size, byte); + arena_redzone_corruption(ptr, size, false, i, *byte); + if (reset) + *byte = 0xa5; } } for (i = 0; i < redzone_size; i++) { - unsigned byte; - if ((byte = *(uint8_t *)((uintptr_t)ptr + size + i)) != 0xa5) { + uint8_t *byte = (uint8_t *)((uintptr_t)ptr + size + i); + if (*byte != 0xa5) { error = true; - malloc_printf(": Corrupt redzone " - "%zu byte%s after end of %p (size %zu), byte=%#x\n", - i, (i == 1) ? "" : "s", ptr, size, byte); + arena_redzone_corruption(ptr, size, true, i, *byte); + if (reset) + *byte = 0xa5; } } if (opt_abort && error) abort(); +} +void +arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info) +{ + size_t redzone_size = bin_info->redzone_size; + + arena_redzones_validate(ptr, bin_info, false); memset((void *)((uintptr_t)ptr - redzone_size), 0x5a, bin_info->reg_interval); } +void +arena_quarantine_junk_small(void *ptr, size_t usize) +{ + size_t binind; + arena_bin_info_t *bin_info; + cassert(config_fill); + assert(opt_junk); + assert(opt_quarantine); + assert(usize <= SMALL_MAXCLASS); + + binind = SMALL_SIZE2BIN(usize); + bin_info = &arena_bin_info[binind]; + arena_redzones_validate(ptr, bin_info, true); +} + void * arena_malloc_small(arena_t *arena, size_t size, bool zero) { diff --git a/src/quarantine.c b/src/quarantine.c index f96a948d..54315116 100644 --- a/src/quarantine.c +++ b/src/quarantine.c @@ -141,8 +141,17 @@ quarantine(void *ptr) obj->usize = usize; quarantine->curbytes += usize; quarantine->curobjs++; - if (opt_junk) - memset(ptr, 0x5a, usize); + if (config_fill && opt_junk) { + /* + * Only do redzone validation if Valgrind isn't in + * operation. + */ + if ((config_valgrind == false || opt_valgrind == false) + && usize <= SMALL_MAXCLASS) + arena_quarantine_junk_small(ptr, usize); + else + memset(ptr, 0x5a, usize); + } } else { assert(quarantine->curbytes == 0); idalloc(ptr); diff --git a/test/include/test/test.h b/test/include/test/test.h index d7601f8b..e0bbfacb 100644 --- a/test/include/test/test.h +++ b/test/include/test/test.h @@ -187,12 +187,22 @@ f(void) \ p_test_init(#f); #define TEST_END \ + goto label_test_end; \ +label_test_end: \ p_test_fini(); \ } #define test(tests...) \ p_test(tests, NULL) +#define test_skip_if(e) do { \ + if (e) { \ + test_skip("%s:%s:%d: Test skipped: (%s)", \ + __func__, __FILE__, __LINE__, #e); \ + goto label_test_end; \ + } \ +} while (0) + void test_skip(const char *format, ...) JEMALLOC_ATTR(format(printf, 1, 2)); void test_fail(const char *format, ...) JEMALLOC_ATTR(format(printf, 1, 2)); diff --git a/test/unit/quarantine.c b/test/unit/quarantine.c new file mode 100644 index 00000000..d8a65e28 --- /dev/null +++ b/test/unit/quarantine.c @@ -0,0 +1,108 @@ +#include "test/jemalloc_test.h" + +#define QUARANTINE_SIZE 8192 +#define STRINGIFY_HELPER(x) #x +#define STRINGIFY(x) STRINGIFY_HELPER(x) + +#ifdef JEMALLOC_FILL +const char *malloc_conf = "abort:false,junk:true,redzone:true,quarantine:" + STRINGIFY(QUARANTINE_SIZE); +#endif + +void +quarantine_clear(void) +{ + void *p; + + p = mallocx(QUARANTINE_SIZE*2, 0); + assert_ptr_not_null(p, "Unexpected mallocx() failure"); + dallocx(p, 0); +} + +TEST_BEGIN(test_quarantine) +{ +#define SZ 256 +#define NQUARANTINED (QUARANTINE_SIZE/SZ) + void *quarantined[NQUARANTINED+1]; + size_t i, j; + + test_skip_if(!config_fill); + + assert_zu_eq(nallocx(SZ, 0), SZ, + "SZ=%zu does not precisely equal a size class", SZ); + + quarantine_clear(); + + /* + * Allocate enough regions to completely fill the quarantine, plus one + * more. The last iteration occurs with a completely full quarantine, + * but no regions should be drained from the quarantine until the last + * deallocation occurs. Therefore no region recycling should occur + * until after this loop completes. + */ + for (i = 0; i < NQUARANTINED+1; i++) { + void *p = mallocx(SZ, 0); + assert_ptr_not_null(p, "Unexpected mallocx() failure"); + quarantined[i] = p; + dallocx(p, 0); + for (j = 0; j < i; j++) { + assert_ptr_ne(p, quarantined[j], + "Quarantined region recycled too early; " + "i=%zu, j=%zu", i, j); + } + } +#undef NQUARANTINED +#undef SZ +} +TEST_END + +static bool detected_redzone_corruption; + +static void +arena_redzone_corruption_replacement(void *ptr, size_t usize, bool after, + size_t offset, uint8_t byte) +{ + + detected_redzone_corruption = true; +} + +TEST_BEGIN(test_quarantine_redzone) +{ + char *s; + arena_redzone_corruption_t *arena_redzone_corruption_orig; + + test_skip_if(!config_fill); + + arena_redzone_corruption_orig = arena_redzone_corruption_fptr; + arena_redzone_corruption_fptr = arena_redzone_corruption_replacement; + + /* Test underflow. */ + detected_redzone_corruption = false; + s = (char *)mallocx(1, 0); + assert_ptr_not_null((void *)s, "Unexpected mallocx() failure"); + s[-1] = 0xbb; + dallocx(s, 0); + assert_true(detected_redzone_corruption, + "Did not detect redzone corruption"); + + /* Test overflow. */ + detected_redzone_corruption = false; + s = (char *)mallocx(1, 0); + assert_ptr_not_null((void *)s, "Unexpected mallocx() failure"); + s[sallocx(s, 0)] = 0xbb; + dallocx(s, 0); + assert_true(detected_redzone_corruption, + "Did not detect redzone corruption"); + + arena_redzone_corruption_fptr = arena_redzone_corruption_orig; +} +TEST_END + +int +main(void) +{ + + return (test( + test_quarantine, + test_quarantine_redzone)); +} From 1b75b4e6d11814f470e797be4a610a2e3ae323d5 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 17 Dec 2013 15:30:49 -0800 Subject: [PATCH 0359/3378] Add missing prototypes. --- include/jemalloc/internal/hash.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/include/jemalloc/internal/hash.h b/include/jemalloc/internal/hash.h index 3ef10fdd..09b69df5 100644 --- a/include/jemalloc/internal/hash.h +++ b/include/jemalloc/internal/hash.h @@ -19,6 +19,11 @@ #ifdef JEMALLOC_H_INLINES #ifndef JEMALLOC_ENABLE_INLINE +uint32_t hash_x86_32(const void *key, int len, uint32_t seed); +void hash_x86_128(const void *key, const int len, uint32_t seed, + uint64_t r_out[2]); +void hash_x64_128(const void *key, const int len, const uint32_t seed, + uint64_t r_out[2]); void hash(const void *key, size_t len, const uint32_t seed, size_t r_hash[2]); #endif @@ -132,7 +137,7 @@ hash_x86_32(const void *key, int len, uint32_t seed) UNUSED JEMALLOC_INLINE void hash_x86_128(const void *key, const int len, uint32_t seed, - uint64_t r_out[2]) + uint64_t r_out[2]) { const uint8_t * data = (const uint8_t *) key; const int nblocks = len / 16; @@ -234,7 +239,7 @@ hash_x86_128(const void *key, const int len, uint32_t seed, UNUSED JEMALLOC_INLINE void hash_x64_128(const void *key, const int len, const uint32_t seed, - uint64_t r_out[2]) + uint64_t r_out[2]) { const uint8_t *data = (const uint8_t *) key; const int nblocks = len / 16; From 1393d79a4c50fd5302a6844a5795de8c8ce83c16 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 18 Dec 2013 15:35:45 -0800 Subject: [PATCH 0360/3378] Remove ENOMEM from the documented set of *mallctl() errors. *mallctl() always returns EINVAL and does partial result copying when *oldlenp is to short to hold the requested value, rather than returning ENOMEM. Therefore remove ENOMEM from the documented set of possible errors. --- doc/jemalloc.xml.in | 6 ------ 1 file changed, 6 deletions(-) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index d6f72722..9fb1b76f 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -2208,12 +2208,6 @@ malloc_conf = "xmalloc:true";]]> is too large or too small; in this case as much data as possible are read despite the error. - - ENOMEM - - *oldlenp is too short to - hold the requested value. - ENOENT From de73296d6b56abe70b08654342e136c2612d1dbd Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 18 Dec 2013 23:21:42 -0800 Subject: [PATCH 0361/3378] Add mallctl*() unit tests. --- Makefile.in | 8 +- doc/jemalloc.xml.in | 5 +- test/include/test/test.h | 22 ++- test/unit/mallctl.c | 415 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 435 insertions(+), 15 deletions(-) create mode 100644 test/unit/mallctl.c diff --git a/Makefile.in b/Makefile.in index afa7bde4..0c9b9d20 100644 --- a/Makefile.in +++ b/Makefile.in @@ -108,10 +108,10 @@ C_TESTLIB_SRCS := $(srcroot)test/src/math.c $(srcroot)test/src/mtx.c \ $(srcroot)test/src/thd.c C_UTIL_INTEGRATION_SRCS := $(srcroot)src/util.c TESTS_UNIT := $(srcroot)test/unit/bitmap.c $(srcroot)test/unit/ckh.c \ - $(srcroot)test/unit/hash.c $(srcroot)test/unit/math.c \ - $(srcroot)test/unit/mq.c $(srcroot)test/unit/mtx.c \ - $(srcroot)test/unit/quarantine.c $(srcroot)test/unit/SFMT.c \ - $(srcroot)test/unit/tsd.c + $(srcroot)test/unit/hash.c $(srcroot)test/unit/mallctl.c \ + $(srcroot)test/unit/math.c $(srcroot)test/unit/mq.c \ + $(srcroot)test/unit/mtx.c $(srcroot)test/unit/quarantine.c \ + $(srcroot)test/unit/SFMT.c $(srcroot)test/unit/tsd.c TESTS_INTEGRATION := $(srcroot)test/integration/aligned_alloc.c \ $(srcroot)test/integration/allocated.c \ $(srcroot)test/integration/mallocx.c \ diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 9fb1b76f..2740c82b 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -394,15 +394,14 @@ it is legitimate to construct code like the following: , \ <=, PRIu64, fmt) -#define assert_true(a, fmt...) do { \ +#define assert_b_eq(a, b, fmt...) do { \ bool a_ = (a); \ - if (!(a_ == true)) { \ + bool b_ = (b); \ + if (!(a_ == b_)) { \ p_test_fail( \ "%s:%s:%d: Failed assertion: " \ - "(%s) == true --> %s != true: ", \ + "(%s) == (%s) --> %s != %s: ", \ __func__, __FILE__, __LINE__, \ - #a, a_ ? "true" : "false", fmt); \ + #a, #b, a_ ? "true" : "false", \ + b_ ? "true" : "false", fmt); \ } \ } while (0) -#define assert_false(a, fmt...) do { \ +#define assert_b_ne(a, b, fmt...) do { \ bool a_ = (a); \ - if (!(a_ == false)) { \ + bool b_ = (b); \ + if (!(a_ != b_)) { \ p_test_fail( \ "%s:%s:%d: Failed assertion: " \ - "(%s) == false --> %s != false: ", \ + "(%s) != (%s) --> %s == %s: ", \ __func__, __FILE__, __LINE__, \ - #a, a_ ? "true" : "false", fmt); \ + #a, #b, a_ ? "true" : "false", \ + b_ ? "true" : "false", fmt); \ } \ } while (0) +#define assert_true(a, fmt...) assert_b_eq(a, true, fmt) +#define assert_false(a, fmt...) assert_b_eq(a, false, fmt) #define assert_str_eq(a, b, fmt...) do { \ if (strcmp((a), (b))) { \ diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c new file mode 100644 index 00000000..31fb8105 --- /dev/null +++ b/test/unit/mallctl.c @@ -0,0 +1,415 @@ +#include "test/jemalloc_test.h" + +TEST_BEGIN(test_mallctl_errors) +{ + uint64_t epoch; + size_t sz; + + assert_d_eq(mallctl("no_such_name", NULL, NULL, NULL, 0), ENOENT, + "mallctl() should return ENOENT for non-existent names"); + + assert_d_eq(mallctl("version", NULL, NULL, "0.0.0", strlen("0.0.0")), + EPERM, "mallctl() should return EPERM on attempt to write " + "read-only value"); + + assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)-1), + EINVAL, "mallctl() should return EINVAL for input size mismatch"); + assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)+1), + EINVAL, "mallctl() should return EINVAL for input size mismatch"); + + sz = sizeof(epoch)-1; + assert_d_eq(mallctl("epoch", &epoch, &sz, NULL, 0), EINVAL, + "mallctl() should return EINVAL for output size mismatch"); + sz = sizeof(epoch)+1; + assert_d_eq(mallctl("epoch", &epoch, &sz, NULL, 0), EINVAL, + "mallctl() should return EINVAL for output size mismatch"); +} +TEST_END + +TEST_BEGIN(test_mallctlnametomib_errors) +{ + size_t mib[1]; + size_t miblen; + + miblen = sizeof(mib)/sizeof(size_t); + assert_d_eq(mallctlnametomib("no_such_name", mib, &miblen), ENOENT, + "mallctlnametomib() should return ENOENT for non-existent names"); +} +TEST_END + +TEST_BEGIN(test_mallctlbymib_errors) +{ + uint64_t epoch; + size_t sz; + size_t mib[1]; + size_t miblen; + + miblen = sizeof(mib)/sizeof(size_t); + assert_d_eq(mallctlnametomib("version", mib, &miblen), 0, + "Unexpected mallctlnametomib() failure"); + + assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, "0.0.0", + strlen("0.0.0")), EPERM, "mallctl() should return EPERM on " + "attempt to write read-only value"); + + miblen = sizeof(mib)/sizeof(size_t); + assert_d_eq(mallctlnametomib("epoch", mib, &miblen), 0, + "Unexpected mallctlnametomib() failure"); + + assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, &epoch, + sizeof(epoch)-1), EINVAL, + "mallctlbymib() should return EINVAL for input size mismatch"); + assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, &epoch, + sizeof(epoch)+1), EINVAL, + "mallctlbymib() should return EINVAL for input size mismatch"); + + sz = sizeof(epoch)-1; + assert_d_eq(mallctlbymib(mib, miblen, &epoch, &sz, NULL, 0), EINVAL, + "mallctlbymib() should return EINVAL for output size mismatch"); + sz = sizeof(epoch)+1; + assert_d_eq(mallctlbymib(mib, miblen, &epoch, &sz, NULL, 0), EINVAL, + "mallctlbymib() should return EINVAL for output size mismatch"); +} +TEST_END + +TEST_BEGIN(test_mallctl_read_write) +{ + uint64_t old_epoch, new_epoch; + size_t sz = sizeof(old_epoch); + + /* Blind. */ + assert_d_eq(mallctl("epoch", NULL, NULL, NULL, 0), 0, + "Unexpected mallctl() failure"); + assert_zu_eq(sz, sizeof(old_epoch), "Unexpected output size"); + + /* Read. */ + assert_d_eq(mallctl("epoch", &old_epoch, &sz, NULL, 0), 0, + "Unexpected mallctl() failure"); + assert_zu_eq(sz, sizeof(old_epoch), "Unexpected output size"); + + /* Write. */ + assert_d_eq(mallctl("epoch", NULL, NULL, &new_epoch, sizeof(new_epoch)), + 0, "Unexpected mallctl() failure"); + assert_zu_eq(sz, sizeof(old_epoch), "Unexpected output size"); + + /* Read+write. */ + assert_d_eq(mallctl("epoch", &old_epoch, &sz, &new_epoch, + sizeof(new_epoch)), 0, "Unexpected mallctl() failure"); + assert_zu_eq(sz, sizeof(old_epoch), "Unexpected output size"); +} +TEST_END + +TEST_BEGIN(test_mallctlnametomib_short_mib) +{ + size_t mib[4]; + size_t miblen; + + miblen = 3; + mib[3] = 42; + assert_d_eq(mallctlnametomib("arenas.bin.0.nregs", mib, &miblen), 0, + "Unexpected mallctlnametomib() failure"); + assert_zu_eq(miblen, 3, "Unexpected mib output length"); + assert_zu_eq(mib[3], 42, + "mallctlnametomib() wrote past the end of the input mib"); +} +TEST_END + +TEST_BEGIN(test_mallctl_config) +{ + +#define TEST_MALLCTL_CONFIG(config) do { \ + bool oldval; \ + size_t sz = sizeof(oldval); \ + assert_d_eq(mallctl("config."#config, &oldval, &sz, NULL, 0), \ + 0, "Unexpected mallctl() failure"); \ + assert_b_eq(oldval, config_##config, "Incorrect config value"); \ + assert_zu_eq(sz, sizeof(oldval), "Unexpected output size"); \ +} while (0) + + TEST_MALLCTL_CONFIG(debug); + TEST_MALLCTL_CONFIG(dss); + TEST_MALLCTL_CONFIG(fill); + TEST_MALLCTL_CONFIG(lazy_lock); + TEST_MALLCTL_CONFIG(mremap); + TEST_MALLCTL_CONFIG(munmap); + TEST_MALLCTL_CONFIG(prof); + TEST_MALLCTL_CONFIG(prof_libgcc); + TEST_MALLCTL_CONFIG(prof_libunwind); + TEST_MALLCTL_CONFIG(stats); + TEST_MALLCTL_CONFIG(tcache); + TEST_MALLCTL_CONFIG(tls); + TEST_MALLCTL_CONFIG(utrace); + TEST_MALLCTL_CONFIG(valgrind); + TEST_MALLCTL_CONFIG(xmalloc); + +#undef TEST_MALLCTL_CONFIG +} +TEST_END + +TEST_BEGIN(test_mallctl_opt) +{ + bool config_always = true; + +#define TEST_MALLCTL_OPT(t, opt, config) do { \ + t oldval; \ + size_t sz = sizeof(oldval); \ + int expected = config_##config ? 0 : ENOENT; \ + int result = mallctl("opt."#opt, &oldval, &sz, NULL, 0); \ + assert_d_eq(result, expected, \ + "Unexpected mallctl() result for opt."#opt); \ + assert_zu_eq(sz, sizeof(oldval), "Unexpected output size"); \ +} while (0) + + TEST_MALLCTL_OPT(bool, abort, always); + TEST_MALLCTL_OPT(size_t, lg_chunk, always); + TEST_MALLCTL_OPT(const char *, dss, always); + TEST_MALLCTL_OPT(size_t, narenas, always); + TEST_MALLCTL_OPT(ssize_t, lg_dirty_mult, always); + TEST_MALLCTL_OPT(bool, stats_print, always); + TEST_MALLCTL_OPT(bool, junk, fill); + TEST_MALLCTL_OPT(size_t, quarantine, fill); + TEST_MALLCTL_OPT(bool, redzone, fill); + TEST_MALLCTL_OPT(bool, zero, fill); + TEST_MALLCTL_OPT(bool, utrace, utrace); + TEST_MALLCTL_OPT(bool, valgrind, valgrind); + TEST_MALLCTL_OPT(bool, xmalloc, xmalloc); + TEST_MALLCTL_OPT(bool, tcache, tcache); + TEST_MALLCTL_OPT(size_t, lg_tcache_max, tcache); + TEST_MALLCTL_OPT(bool, prof, prof); + TEST_MALLCTL_OPT(const char *, prof_prefix, prof); + TEST_MALLCTL_OPT(bool, prof_active, prof); + TEST_MALLCTL_OPT(ssize_t, lg_prof_sample, prof); + TEST_MALLCTL_OPT(bool, prof_accum, prof); + TEST_MALLCTL_OPT(ssize_t, lg_prof_interval, prof); + TEST_MALLCTL_OPT(bool, prof_gdump, prof); + TEST_MALLCTL_OPT(bool, prof_final, prof); + TEST_MALLCTL_OPT(bool, prof_leak, prof); + +#undef TEST_MALLCTL_OPT +} +TEST_END + +TEST_BEGIN(test_manpage_example) +{ + unsigned nbins, i; + size_t mib[4]; + size_t len, miblen; + + len = sizeof(nbins); + assert_d_eq(mallctl("arenas.nbins", &nbins, &len, NULL, 0), 0, + "Unexpected mallctl() failure"); + + miblen = 4; + assert_d_eq(mallctlnametomib("arenas.bin.0.size", mib, &miblen), 0, + "Unexpected mallctlnametomib() failure"); + for (i = 0; i < nbins; i++) { + size_t bin_size; + + mib[2] = i; + len = sizeof(bin_size); + assert_d_eq(mallctlbymib(mib, miblen, &bin_size, &len, NULL, 0), + 0, "Unexpected mallctlbymib() failure"); + /* Do something with bin_size... */ + } +} +TEST_END + +TEST_BEGIN(test_thread_arena) +{ + unsigned arena_old, arena_new, narenas; + size_t sz = sizeof(unsigned); + + assert_d_eq(mallctl("arenas.narenas", &narenas, &sz, NULL, 0), 0, + "Unexpected mallctl() failure"); + assert_u_eq(narenas, opt_narenas, "Number of arenas incorrect"); + arena_new = narenas - 1; + assert_d_eq(mallctl("thread.arena", &arena_old, &sz, &arena_new, + sizeof(unsigned)), 0, "Unexpected mallctl() failure"); + arena_new = 0; + assert_d_eq(mallctl("thread.arena", &arena_old, &sz, &arena_new, + sizeof(unsigned)), 0, "Unexpected mallctl() failure"); +} +TEST_END + +TEST_BEGIN(test_arena_i_purge) +{ + unsigned narenas; + size_t sz = sizeof(unsigned); + size_t mib[3]; + size_t miblen = 3; + + assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0, + "Unexpected mallctl() failure"); + + assert_d_eq(mallctl("arenas.narenas", &narenas, &sz, NULL, 0), 0, + "Unexpected mallctl() failure"); + assert_d_eq(mallctlnametomib("arena.0.purge", mib, &miblen), 0, + "Unexpected mallctlnametomib() failure"); + mib[1] = narenas; + assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0, + "Unexpected mallctlbymib() failure"); +} +TEST_END + +TEST_BEGIN(test_arena_i_dss) +{ + const char *dss_prec_old, *dss_prec_new; + size_t sz = sizeof(dss_prec_old); + + dss_prec_new = "primary"; + assert_d_eq(mallctl("arena.0.dss", &dss_prec_old, &sz, &dss_prec_new, + sizeof(dss_prec_new)), 0, "Unexpected mallctl() failure"); + assert_str_ne(dss_prec_old, "primary", + "Unexpected default for dss precedence"); + + assert_d_eq(mallctl("arena.0.dss", &dss_prec_new, &sz, &dss_prec_old, + sizeof(dss_prec_old)), 0, "Unexpected mallctl() failure"); +} +TEST_END + +TEST_BEGIN(test_arenas_purge) +{ + unsigned arena = 0; + + assert_d_eq(mallctl("arenas.purge", NULL, NULL, &arena, sizeof(arena)), + 0, "Unexpected mallctl() failure"); + + assert_d_eq(mallctl("arenas.purge", NULL, NULL, NULL, 0), 0, + "Unexpected mallctl() failure"); +} +TEST_END + +TEST_BEGIN(test_arenas_initialized) +{ + unsigned narenas; + size_t sz = sizeof(narenas); + + assert_d_eq(mallctl("arenas.narenas", &narenas, &sz, NULL, 0), 0, + "Unexpected mallctl() failure"); + { + bool initialized[narenas]; + + sz = narenas * sizeof(bool); + assert_d_eq(mallctl("arenas.initialized", initialized, &sz, + NULL, 0), 0, "Unexpected mallctl() failure"); + } +} +TEST_END + +TEST_BEGIN(test_arenas_constants) +{ + +#define TEST_ARENAS_CONSTANT(t, name, expected) do { \ + t name; \ + size_t sz = sizeof(t); \ + assert_d_eq(mallctl("arenas."#name, &name, &sz, NULL, 0), 0, \ + "Unexpected mallctl() failure"); \ + assert_zu_eq(name, expected, "Incorrect "#name" size"); \ +} while (0) + + TEST_ARENAS_CONSTANT(size_t, quantum, QUANTUM); + TEST_ARENAS_CONSTANT(size_t, page, PAGE); + TEST_ARENAS_CONSTANT(unsigned, nbins, NBINS); + TEST_ARENAS_CONSTANT(size_t, nlruns, nlclasses); + +#undef TEST_ARENAS_CONSTANT +} +TEST_END + +TEST_BEGIN(test_arenas_bin_constants) +{ + +#define TEST_ARENAS_BIN_CONSTANT(t, name, expected) do { \ + t name; \ + size_t sz = sizeof(t); \ + assert_d_eq(mallctl("arenas.bin.0."#name, &name, &sz, NULL, 0), \ + 0, "Unexpected mallctl() failure"); \ + assert_zu_eq(name, expected, "Incorrect "#name" size"); \ +} while (0) + + TEST_ARENAS_BIN_CONSTANT(size_t, size, arena_bin_info[0].reg_size); + TEST_ARENAS_BIN_CONSTANT(uint32_t, nregs, arena_bin_info[0].nregs); + TEST_ARENAS_BIN_CONSTANT(size_t, run_size, arena_bin_info[0].run_size); + +#undef TEST_ARENAS_BIN_CONSTANT +} +TEST_END + +TEST_BEGIN(test_arenas_lrun_constants) +{ + +#define TEST_ARENAS_LRUN_CONSTANT(t, name, expected) do { \ + t name; \ + size_t sz = sizeof(t); \ + assert_d_eq(mallctl("arenas.lrun.0."#name, &name, &sz, NULL, \ + 0), 0, "Unexpected mallctl() failure"); \ + assert_zu_eq(name, expected, "Incorrect "#name" size"); \ +} while (0) + + TEST_ARENAS_LRUN_CONSTANT(size_t, size, (1 << LG_PAGE)); + +#undef TEST_ARENAS_LRUN_CONSTANT +} +TEST_END + +TEST_BEGIN(test_arenas_extend) +{ + unsigned narenas_before, arena, narenas_after; + size_t sz = sizeof(unsigned); + + assert_d_eq(mallctl("arenas.narenas", &narenas_before, &sz, NULL, 0), 0, + "Unexpected mallctl() failure"); + assert_d_eq(mallctl("arenas.extend", &arena, &sz, NULL, 0), 0, + "Unexpected mallctl() failure"); + assert_d_eq(mallctl("arenas.narenas", &narenas_after, &sz, NULL, 0), 0, + "Unexpected mallctl() failure"); + + assert_u_eq(narenas_before+1, narenas_after, + "Unexpected number of arenas before versus after extension"); + assert_u_eq(arena, narenas_after-1, "Unexpected arena index"); +} +TEST_END + +TEST_BEGIN(test_stats_arenas) +{ + +#define TEST_STATS_ARENAS(t, name) do { \ + t name; \ + size_t sz = sizeof(t); \ + assert_d_eq(mallctl("stats.arenas.0."#name, &name, &sz, NULL, \ + 0), 0, "Unexpected mallctl() failure"); \ +} while (0) + + TEST_STATS_ARENAS(const char *, dss); + TEST_STATS_ARENAS(unsigned, nthreads); + TEST_STATS_ARENAS(size_t, pactive); + TEST_STATS_ARENAS(size_t, pdirty); + +#undef TEST_STATS_ARENAS +} +TEST_END + +int +main(void) +{ + + return (test( + test_mallctl_errors, + test_mallctlnametomib_errors, + test_mallctlbymib_errors, + test_mallctl_read_write, + test_mallctlnametomib_short_mib, + test_mallctl_config, + test_mallctl_opt, + test_manpage_example, + test_thread_arena, + test_arena_i_purge, + test_arena_i_dss, + test_arenas_purge, + test_arenas_initialized, + test_arenas_constants, + test_arenas_bin_constants, + test_arenas_lrun_constants, + test_arenas_extend, + test_stats_arenas)); +} From d8a390020c4750a9c5f3d9780e21bfc2b0d64cdf Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 19 Dec 2013 21:40:41 -0800 Subject: [PATCH 0362/3378] Fix a few mallctl() documentation errors. Normalize mallctl() order (code and documentation). --- doc/jemalloc.xml.in | 37 ++++--- src/ctl.c | 245 ++++++++++++++++++++++---------------------- 2 files changed, 144 insertions(+), 138 deletions(-) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 2740c82b..5fc76534 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -924,19 +924,6 @@ for (i = 0; i < nbins; i++) { - - - opt.lg_chunk - (size_t) - r- - - Virtual memory chunk size (log base 2). If a chunk - size outside the supported size range is specified, the size is - silently clipped to the minimum/maximum supported size. The default - chunk size is 4 MiB (2^22). - - - opt.dss @@ -948,7 +935,23 @@ for (i = 0; i < nbins; i++) { related to mmap 2 allocation. The following settings are supported: “disabled”, “primary”, - and “secondary” (default). + and “secondary”. The default is “secondary” if + config.dss is + true, “disabled” otherwise. + + + + + + opt.lg_chunk + (size_t) + r- + + Virtual memory chunk size (log base 2). If a chunk + size outside the supported size range is specified, the size is + silently clipped to the minimum/maximum supported size. The default + chunk size is 4 MiB (2^22). + @@ -1197,7 +1200,7 @@ malloc_conf = "xmalloc:true";]]> opt.prof_active (bool) - r- + rw [] Profiling activated/deactivated. This is a secondary @@ -1819,9 +1822,9 @@ malloc_conf = "xmalloc:true";]]> similar calls made to purge dirty pages. - + - stats.arenas.<i>.npurged + stats.arenas.<i>.purged (uint64_t) r- [] diff --git a/src/ctl.c b/src/ctl.c index 68ffba35..cc2c5aef 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -1110,6 +1110,8 @@ label_return: \ return (ret); \ } +/******************************************************************************/ + CTL_RO_NL_GEN(version, JEMALLOC_VERSION, const char *) static int @@ -1131,49 +1133,52 @@ label_return: return (ret); } -static int -thread_tcache_enabled_ctl(const size_t *mib, size_t miblen, void *oldp, - size_t *oldlenp, void *newp, size_t newlen) -{ - int ret; - bool oldval; +/******************************************************************************/ - if (config_tcache == false) - return (ENOENT); +CTL_RO_BOOL_CONFIG_GEN(config_debug) +CTL_RO_BOOL_CONFIG_GEN(config_dss) +CTL_RO_BOOL_CONFIG_GEN(config_fill) +CTL_RO_BOOL_CONFIG_GEN(config_lazy_lock) +CTL_RO_BOOL_CONFIG_GEN(config_mremap) +CTL_RO_BOOL_CONFIG_GEN(config_munmap) +CTL_RO_BOOL_CONFIG_GEN(config_prof) +CTL_RO_BOOL_CONFIG_GEN(config_prof_libgcc) +CTL_RO_BOOL_CONFIG_GEN(config_prof_libunwind) +CTL_RO_BOOL_CONFIG_GEN(config_stats) +CTL_RO_BOOL_CONFIG_GEN(config_tcache) +CTL_RO_BOOL_CONFIG_GEN(config_tls) +CTL_RO_BOOL_CONFIG_GEN(config_utrace) +CTL_RO_BOOL_CONFIG_GEN(config_valgrind) +CTL_RO_BOOL_CONFIG_GEN(config_xmalloc) - oldval = tcache_enabled_get(); - if (newp != NULL) { - if (newlen != sizeof(bool)) { - ret = EINVAL; - goto label_return; - } - tcache_enabled_set(*(bool *)newp); - } - READ(oldval, bool); +/******************************************************************************/ - ret = 0; -label_return: - return (ret); -} +CTL_RO_NL_GEN(opt_abort, opt_abort, bool) +CTL_RO_NL_GEN(opt_dss, opt_dss, const char *) +CTL_RO_NL_GEN(opt_lg_chunk, opt_lg_chunk, size_t) +CTL_RO_NL_GEN(opt_narenas, opt_narenas, size_t) +CTL_RO_NL_GEN(opt_lg_dirty_mult, opt_lg_dirty_mult, ssize_t) +CTL_RO_NL_GEN(opt_stats_print, opt_stats_print, bool) +CTL_RO_NL_CGEN(config_fill, opt_junk, opt_junk, bool) +CTL_RO_NL_CGEN(config_fill, opt_quarantine, opt_quarantine, size_t) +CTL_RO_NL_CGEN(config_fill, opt_redzone, opt_redzone, bool) +CTL_RO_NL_CGEN(config_fill, opt_zero, opt_zero, bool) +CTL_RO_NL_CGEN(config_utrace, opt_utrace, opt_utrace, bool) +CTL_RO_NL_CGEN(config_valgrind, opt_valgrind, opt_valgrind, bool) +CTL_RO_NL_CGEN(config_xmalloc, opt_xmalloc, opt_xmalloc, bool) +CTL_RO_NL_CGEN(config_tcache, opt_tcache, opt_tcache, bool) +CTL_RO_NL_CGEN(config_tcache, opt_lg_tcache_max, opt_lg_tcache_max, ssize_t) +CTL_RO_NL_CGEN(config_prof, opt_prof, opt_prof, bool) +CTL_RO_NL_CGEN(config_prof, opt_prof_prefix, opt_prof_prefix, const char *) +CTL_RO_CGEN(config_prof, opt_prof_active, opt_prof_active, bool) /* Mutable. */ +CTL_RO_NL_CGEN(config_prof, opt_lg_prof_sample, opt_lg_prof_sample, size_t) +CTL_RO_NL_CGEN(config_prof, opt_prof_accum, opt_prof_accum, bool) +CTL_RO_NL_CGEN(config_prof, opt_lg_prof_interval, opt_lg_prof_interval, ssize_t) +CTL_RO_NL_CGEN(config_prof, opt_prof_gdump, opt_prof_gdump, bool) +CTL_RO_NL_CGEN(config_prof, opt_prof_final, opt_prof_final, bool) +CTL_RO_NL_CGEN(config_prof, opt_prof_leak, opt_prof_leak, bool) -static int -thread_tcache_flush_ctl(const size_t *mib, size_t miblen, void *oldp, - size_t *oldlenp, void *newp, size_t newlen) -{ - int ret; - - if (config_tcache == false) - return (ENOENT); - - READONLY(); - WRITEONLY(); - - tcache_flush(); - - ret = 0; -label_return: - return (ret); -} +/******************************************************************************/ static int thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, @@ -1235,50 +1240,49 @@ CTL_RO_NL_CGEN(config_stats, thread_deallocated, CTL_RO_NL_CGEN(config_stats, thread_deallocatedp, &thread_allocated_tsd_get()->deallocated, uint64_t *) -/******************************************************************************/ +static int +thread_tcache_enabled_ctl(const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) +{ + int ret; + bool oldval; -CTL_RO_BOOL_CONFIG_GEN(config_debug) -CTL_RO_BOOL_CONFIG_GEN(config_dss) -CTL_RO_BOOL_CONFIG_GEN(config_fill) -CTL_RO_BOOL_CONFIG_GEN(config_lazy_lock) -CTL_RO_BOOL_CONFIG_GEN(config_mremap) -CTL_RO_BOOL_CONFIG_GEN(config_munmap) -CTL_RO_BOOL_CONFIG_GEN(config_prof) -CTL_RO_BOOL_CONFIG_GEN(config_prof_libgcc) -CTL_RO_BOOL_CONFIG_GEN(config_prof_libunwind) -CTL_RO_BOOL_CONFIG_GEN(config_stats) -CTL_RO_BOOL_CONFIG_GEN(config_tcache) -CTL_RO_BOOL_CONFIG_GEN(config_tls) -CTL_RO_BOOL_CONFIG_GEN(config_utrace) -CTL_RO_BOOL_CONFIG_GEN(config_valgrind) -CTL_RO_BOOL_CONFIG_GEN(config_xmalloc) + if (config_tcache == false) + return (ENOENT); -/******************************************************************************/ + oldval = tcache_enabled_get(); + if (newp != NULL) { + if (newlen != sizeof(bool)) { + ret = EINVAL; + goto label_return; + } + tcache_enabled_set(*(bool *)newp); + } + READ(oldval, bool); -CTL_RO_NL_GEN(opt_abort, opt_abort, bool) -CTL_RO_NL_GEN(opt_dss, opt_dss, const char *) -CTL_RO_NL_GEN(opt_lg_chunk, opt_lg_chunk, size_t) -CTL_RO_NL_GEN(opt_narenas, opt_narenas, size_t) -CTL_RO_NL_GEN(opt_lg_dirty_mult, opt_lg_dirty_mult, ssize_t) -CTL_RO_NL_GEN(opt_stats_print, opt_stats_print, bool) -CTL_RO_NL_CGEN(config_fill, opt_junk, opt_junk, bool) -CTL_RO_NL_CGEN(config_fill, opt_zero, opt_zero, bool) -CTL_RO_NL_CGEN(config_fill, opt_quarantine, opt_quarantine, size_t) -CTL_RO_NL_CGEN(config_fill, opt_redzone, opt_redzone, bool) -CTL_RO_NL_CGEN(config_utrace, opt_utrace, opt_utrace, bool) -CTL_RO_NL_CGEN(config_valgrind, opt_valgrind, opt_valgrind, bool) -CTL_RO_NL_CGEN(config_xmalloc, opt_xmalloc, opt_xmalloc, bool) -CTL_RO_NL_CGEN(config_tcache, opt_tcache, opt_tcache, bool) -CTL_RO_NL_CGEN(config_tcache, opt_lg_tcache_max, opt_lg_tcache_max, ssize_t) -CTL_RO_NL_CGEN(config_prof, opt_prof, opt_prof, bool) -CTL_RO_NL_CGEN(config_prof, opt_prof_prefix, opt_prof_prefix, const char *) -CTL_RO_CGEN(config_prof, opt_prof_active, opt_prof_active, bool) /* Mutable. */ -CTL_RO_NL_CGEN(config_prof, opt_lg_prof_sample, opt_lg_prof_sample, size_t) -CTL_RO_NL_CGEN(config_prof, opt_lg_prof_interval, opt_lg_prof_interval, ssize_t) -CTL_RO_NL_CGEN(config_prof, opt_prof_gdump, opt_prof_gdump, bool) -CTL_RO_NL_CGEN(config_prof, opt_prof_final, opt_prof_final, bool) -CTL_RO_NL_CGEN(config_prof, opt_prof_leak, opt_prof_leak, bool) -CTL_RO_NL_CGEN(config_prof, opt_prof_accum, opt_prof_accum, bool) + ret = 0; +label_return: + return (ret); +} + +static int +thread_tcache_flush_ctl(const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) +{ + int ret; + + if (config_tcache == false) + return (ENOENT); + + READONLY(); + WRITEONLY(); + + tcache_flush(); + + ret = 0; +label_return: + return (ret); +} /******************************************************************************/ @@ -1390,31 +1394,8 @@ label_return: return (ret); } - /******************************************************************************/ -CTL_RO_NL_GEN(arenas_bin_i_size, arena_bin_info[mib[2]].reg_size, size_t) -CTL_RO_NL_GEN(arenas_bin_i_nregs, arena_bin_info[mib[2]].nregs, uint32_t) -CTL_RO_NL_GEN(arenas_bin_i_run_size, arena_bin_info[mib[2]].run_size, size_t) -static const ctl_named_node_t * -arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i) -{ - - if (i > NBINS) - return (NULL); - return (super_arenas_bin_i_node); -} - -CTL_RO_NL_GEN(arenas_lrun_i_size, ((mib[2]+1) << LG_PAGE), size_t) -static const ctl_named_node_t * -arenas_lrun_i_index(const size_t *mib, size_t miblen, size_t i) -{ - - if (i > nlclasses) - return (NULL); - return (super_arenas_lrun_i_node); -} - static int arenas_narenas_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) @@ -1468,7 +1449,28 @@ CTL_RO_NL_GEN(arenas_page, PAGE, size_t) CTL_RO_NL_CGEN(config_tcache, arenas_tcache_max, tcache_maxclass, size_t) CTL_RO_NL_GEN(arenas_nbins, NBINS, unsigned) CTL_RO_NL_CGEN(config_tcache, arenas_nhbins, nhbins, unsigned) +CTL_RO_NL_GEN(arenas_bin_i_size, arena_bin_info[mib[2]].reg_size, size_t) +CTL_RO_NL_GEN(arenas_bin_i_nregs, arena_bin_info[mib[2]].nregs, uint32_t) +CTL_RO_NL_GEN(arenas_bin_i_run_size, arena_bin_info[mib[2]].run_size, size_t) +static const ctl_named_node_t * +arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i) +{ + + if (i > NBINS) + return (NULL); + return (super_arenas_bin_i_node); +} + CTL_RO_NL_GEN(arenas_nlruns, nlclasses, size_t) +CTL_RO_NL_GEN(arenas_lrun_i_size, ((mib[2]+1) << LG_PAGE), size_t) +static const ctl_named_node_t * +arenas_lrun_i_index(const size_t *mib, size_t miblen, size_t i) +{ + + if (i > nlclasses) + return (NULL); + return (super_arenas_lrun_i_node); +} static int arenas_purge_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, @@ -1575,6 +1577,11 @@ CTL_RO_NL_CGEN(config_prof, prof_interval, prof_interval, uint64_t) /******************************************************************************/ +CTL_RO_CGEN(config_stats, stats_cactive, &stats_cactive, size_t *) +CTL_RO_CGEN(config_stats, stats_allocated, ctl_stats.allocated, size_t) +CTL_RO_CGEN(config_stats, stats_active, ctl_stats.active, size_t) +CTL_RO_CGEN(config_stats, stats_mapped, ctl_stats.mapped, size_t) + CTL_RO_CGEN(config_stats, stats_chunks_current, ctl_stats.chunks.current, size_t) CTL_RO_CGEN(config_stats, stats_chunks_total, ctl_stats.chunks.total, uint64_t) @@ -1582,6 +1589,20 @@ CTL_RO_CGEN(config_stats, stats_chunks_high, ctl_stats.chunks.high, size_t) CTL_RO_CGEN(config_stats, stats_huge_allocated, huge_allocated, size_t) CTL_RO_CGEN(config_stats, stats_huge_nmalloc, huge_nmalloc, uint64_t) CTL_RO_CGEN(config_stats, stats_huge_ndalloc, huge_ndalloc, uint64_t) + +CTL_RO_GEN(stats_arenas_i_dss, ctl_stats.arenas[mib[2]].dss, const char *) +CTL_RO_GEN(stats_arenas_i_nthreads, ctl_stats.arenas[mib[2]].nthreads, unsigned) +CTL_RO_GEN(stats_arenas_i_pactive, ctl_stats.arenas[mib[2]].pactive, size_t) +CTL_RO_GEN(stats_arenas_i_pdirty, ctl_stats.arenas[mib[2]].pdirty, size_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_mapped, + ctl_stats.arenas[mib[2]].astats.mapped, size_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_npurge, + ctl_stats.arenas[mib[2]].astats.npurge, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_nmadvise, + ctl_stats.arenas[mib[2]].astats.nmadvise, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_purged, + ctl_stats.arenas[mib[2]].astats.purged, uint64_t) + CTL_RO_CGEN(config_stats, stats_arenas_i_small_allocated, ctl_stats.arenas[mib[2]].allocated_small, size_t) CTL_RO_CGEN(config_stats, stats_arenas_i_small_nmalloc, @@ -1645,19 +1666,6 @@ stats_arenas_i_lruns_j_index(const size_t *mib, size_t miblen, size_t j) return (super_stats_arenas_i_lruns_j_node); } -CTL_RO_GEN(stats_arenas_i_nthreads, ctl_stats.arenas[mib[2]].nthreads, unsigned) -CTL_RO_GEN(stats_arenas_i_dss, ctl_stats.arenas[mib[2]].dss, const char *) -CTL_RO_GEN(stats_arenas_i_pactive, ctl_stats.arenas[mib[2]].pactive, size_t) -CTL_RO_GEN(stats_arenas_i_pdirty, ctl_stats.arenas[mib[2]].pdirty, size_t) -CTL_RO_CGEN(config_stats, stats_arenas_i_mapped, - ctl_stats.arenas[mib[2]].astats.mapped, size_t) -CTL_RO_CGEN(config_stats, stats_arenas_i_npurge, - ctl_stats.arenas[mib[2]].astats.npurge, uint64_t) -CTL_RO_CGEN(config_stats, stats_arenas_i_nmadvise, - ctl_stats.arenas[mib[2]].astats.nmadvise, uint64_t) -CTL_RO_CGEN(config_stats, stats_arenas_i_purged, - ctl_stats.arenas[mib[2]].astats.purged, uint64_t) - static const ctl_named_node_t * stats_arenas_i_index(const size_t *mib, size_t miblen, size_t i) { @@ -1674,8 +1682,3 @@ label_return: malloc_mutex_unlock(&ctl_mtx); return (ret); } - -CTL_RO_CGEN(config_stats, stats_cactive, &stats_cactive, size_t *) -CTL_RO_CGEN(config_stats, stats_allocated, ctl_stats.allocated, size_t) -CTL_RO_CGEN(config_stats, stats_active, ctl_stats.active, size_t) -CTL_RO_CGEN(config_stats, stats_mapped, ctl_stats.mapped, size_t) From 0a8696658f8135ae39eeb86e57f7cecc01144894 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 20 Dec 2013 15:47:16 -0800 Subject: [PATCH 0363/3378] Add stats unit tests. --- Makefile.in | 3 +- test/unit/stats.c | 350 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 352 insertions(+), 1 deletion(-) create mode 100644 test/unit/stats.c diff --git a/Makefile.in b/Makefile.in index 0c9b9d20..f3f0e167 100644 --- a/Makefile.in +++ b/Makefile.in @@ -111,7 +111,8 @@ TESTS_UNIT := $(srcroot)test/unit/bitmap.c $(srcroot)test/unit/ckh.c \ $(srcroot)test/unit/hash.c $(srcroot)test/unit/mallctl.c \ $(srcroot)test/unit/math.c $(srcroot)test/unit/mq.c \ $(srcroot)test/unit/mtx.c $(srcroot)test/unit/quarantine.c \ - $(srcroot)test/unit/SFMT.c $(srcroot)test/unit/tsd.c + $(srcroot)test/unit/SFMT.c $(srcroot)test/unit/stats.c \ + $(srcroot)test/unit/tsd.c TESTS_INTEGRATION := $(srcroot)test/integration/aligned_alloc.c \ $(srcroot)test/integration/allocated.c \ $(srcroot)test/integration/mallocx.c \ diff --git a/test/unit/stats.c b/test/unit/stats.c new file mode 100644 index 00000000..3fa08172 --- /dev/null +++ b/test/unit/stats.c @@ -0,0 +1,350 @@ +#include "test/jemalloc_test.h" + +TEST_BEGIN(test_stats_summary) +{ + size_t *cactive; + size_t sz, allocated, active, mapped; + int expected = config_stats ? 0 : ENOENT; + + sz = sizeof(cactive); + assert_d_eq(mallctl("stats.cactive", &cactive, &sz, NULL, 0), expected, + "Unexpected mallctl() result"); + + sz = sizeof(size_t); + assert_d_eq(mallctl("stats.allocated", &allocated, &sz, NULL, 0), + expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.active", &active, &sz, NULL, 0), expected, + "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.mapped", &mapped, &sz, NULL, 0), expected, + "Unexpected mallctl() result"); + + if (config_stats) { + assert_zu_le(active, *cactive, + "active should be no larger than cactive"); + assert_zu_le(allocated, active, + "allocated should be no larger than active"); + assert_zu_le(active, mapped, + "active should be no larger than mapped"); + } +} +TEST_END + +TEST_BEGIN(test_stats_chunks) +{ + size_t current, total, high; + size_t sz = sizeof(size_t); + int expected = config_stats ? 0 : ENOENT; + + assert_d_eq(mallctl("stats.chunks.current", ¤t, &sz, NULL, 0), + expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.chunks.total", &total, &sz, NULL, 0), + expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.chunks.high", &high, &sz, NULL, 0), expected, + "Unexpected mallctl() result"); + + if (config_stats) { + assert_zu_le(current, high, + "current should be no larger than high"); + assert_zu_le(high, total, + "high should be no larger than total"); + } +} +TEST_END + +TEST_BEGIN(test_stats_huge) +{ + void *p; + uint64_t epoch; + size_t allocated; + uint64_t nmalloc, ndalloc; + size_t sz; + int expected = config_stats ? 0 : ENOENT; + + p = mallocx(arena_maxclass+1, 0); + assert_ptr_not_null(p, "Unexpected mallocx() failure"); + + assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)), 0, + "Unexpected mallctl() failure"); + + sz = sizeof(size_t); + assert_d_eq(mallctl("stats.huge.allocated", &allocated, &sz, NULL, 0), + expected, "Unexpected mallctl() result"); + sz = sizeof(uint64_t); + assert_d_eq(mallctl("stats.huge.nmalloc", &nmalloc, &sz, NULL, 0), + expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.huge.ndalloc", &ndalloc, &sz, NULL, 0), + expected, "Unexpected mallctl() result"); + + if (config_stats) { + assert_zu_gt(allocated, 0, + "allocated should be greater than zero"); + assert_u64_ge(nmalloc, ndalloc, + "nmalloc should be at least as large as ndalloc"); + } + + dallocx(p, 0); +} +TEST_END + +TEST_BEGIN(test_stats_arenas_summary) +{ + unsigned arena; + void *small, *large; + uint64_t epoch; + size_t sz; + int expected = config_stats ? 0 : ENOENT; + size_t mapped; + uint64_t npurge, nmadvise, purged; + + arena = 0; + assert_d_eq(mallctl("thread.arena", NULL, NULL, &arena, sizeof(arena)), + 0, "Unexpected mallctl() failure"); + + small = mallocx(SMALL_MAXCLASS, 0); + assert_ptr_not_null(small, "Unexpected mallocx() failure"); + large = mallocx(arena_maxclass, 0); + assert_ptr_not_null(large, "Unexpected mallocx() failure"); + + assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0, + "Unexpected mallctl() failure"); + + assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)), 0, + "Unexpected mallctl() failure"); + + sz = sizeof(size_t); + assert_d_eq(mallctl("stats.arenas.0.mapped", &mapped, &sz, NULL, 0), + expected, "Unexepected mallctl() result"); + sz = sizeof(uint64_t); + assert_d_eq(mallctl("stats.arenas.0.npurge", &npurge, &sz, NULL, 0), + expected, "Unexepected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.nmadvise", &nmadvise, &sz, NULL, 0), + expected, "Unexepected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.purged", &purged, &sz, NULL, 0), + expected, "Unexepected mallctl() result"); + + if (config_stats) { + assert_u64_gt(npurge, 0, "At least one purge occurred"); + assert_u64_le(nmadvise, purged, + "nmadvise should be no greater than purged"); + } + + dallocx(small, 0); + dallocx(large, 0); +} +TEST_END + +TEST_BEGIN(test_stats_arenas_small) +{ + unsigned arena; + void *p; + size_t sz, allocated; + uint64_t epoch, nmalloc, ndalloc, nrequests; + int expected = config_stats ? 0 : ENOENT; + + arena = 0; + assert_d_eq(mallctl("thread.arena", NULL, NULL, &arena, sizeof(arena)), + 0, "Unexpected mallctl() failure"); + + p = mallocx(SMALL_MAXCLASS, 0); + assert_ptr_not_null(p, "Unexpected mallocx() failure"); + + assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)), 0, + "Unexpected mallctl() failure"); + + sz = sizeof(size_t); + assert_d_eq(mallctl("stats.arenas.0.small.allocated", &allocated, &sz, + NULL, 0), expected, "Unexpected mallctl() result"); + sz = sizeof(uint64_t); + assert_d_eq(mallctl("stats.arenas.0.small.nmalloc", &nmalloc, &sz, + NULL, 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.small.ndalloc", &ndalloc, &sz, + NULL, 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.small.nrequests", &nrequests, &sz, + NULL, 0), expected, "Unexpected mallctl() result"); + + if (config_stats) { + assert_zu_gt(allocated, 0, + "allocated should be greated than zero"); + assert_u64_gt(nmalloc, 0, + "nmalloc should be no greater than zero"); + assert_u64_ge(nmalloc, ndalloc, + "nmalloc should be at least as large as ndalloc"); + assert_u64_gt(nrequests, 0, + "nrequests should be no greater than zero"); + } + + dallocx(p, 0); +} +TEST_END + +TEST_BEGIN(test_stats_arenas_large) +{ + unsigned arena; + void *p; + size_t sz, allocated; + uint64_t epoch, nmalloc, ndalloc, nrequests; + int expected = config_stats ? 0 : ENOENT; + + arena = 0; + assert_d_eq(mallctl("thread.arena", NULL, NULL, &arena, sizeof(arena)), + 0, "Unexpected mallctl() failure"); + + p = mallocx(arena_maxclass, 0); + assert_ptr_not_null(p, "Unexpected mallocx() failure"); + + assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)), 0, + "Unexpected mallctl() failure"); + + sz = sizeof(size_t); + assert_d_eq(mallctl("stats.arenas.0.large.allocated", &allocated, &sz, + NULL, 0), expected, "Unexpected mallctl() result"); + sz = sizeof(uint64_t); + assert_d_eq(mallctl("stats.arenas.0.large.nmalloc", &nmalloc, &sz, + NULL, 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.large.ndalloc", &ndalloc, &sz, + NULL, 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.large.nrequests", &nrequests, &sz, + NULL, 0), expected, "Unexpected mallctl() result"); + + if (config_stats) { + assert_zu_gt(allocated, 0, + "allocated should be greated than zero"); + assert_zu_gt(nmalloc, 0, + "nmalloc should be no greater than zero"); + assert_zu_ge(nmalloc, ndalloc, + "nmalloc should be at least as large as ndalloc"); + assert_zu_gt(nrequests, 0, + "nrequests should be no greater than zero"); + } + + dallocx(p, 0); +} +TEST_END + +TEST_BEGIN(test_stats_arenas_bins) +{ + unsigned arena; + void *p; + size_t sz, allocated; + uint64_t epoch, nmalloc, ndalloc, nrequests, nfills, nflushes; + uint64_t nruns, nreruns, curruns; + int expected = config_stats ? 0 : ENOENT; + + arena = 0; + assert_d_eq(mallctl("thread.arena", NULL, NULL, &arena, sizeof(arena)), + 0, "Unexpected mallctl() failure"); + + p = mallocx(arena_bin_info[0].reg_size, 0); + assert_ptr_not_null(p, "Unexpected mallocx() failure"); + + assert_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0), + config_tcache ? 0 : ENOENT, "Unexpected mallctl() result"); + + assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)), 0, + "Unexpected mallctl() failure"); + + sz = sizeof(size_t); + assert_d_eq(mallctl("stats.arenas.0.bins.0.allocated", &allocated, &sz, + NULL, 0), expected, "Unexpected mallctl() result"); + sz = sizeof(uint64_t); + assert_d_eq(mallctl("stats.arenas.0.bins.0.nmalloc", &nmalloc, &sz, + NULL, 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.bins.0.ndalloc", &ndalloc, &sz, + NULL, 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.bins.0.nrequests", &nrequests, &sz, + NULL, 0), expected, "Unexpected mallctl() result"); + + assert_d_eq(mallctl("stats.arenas.0.bins.0.nfills", &nfills, &sz, + NULL, 0), config_tcache ? expected : ENOENT, + "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.bins.0.nflushes", &nflushes, &sz, + NULL, 0), config_tcache ? expected : ENOENT, + "Unexpected mallctl() result"); + + assert_d_eq(mallctl("stats.arenas.0.bins.0.nruns", &nruns, &sz, + NULL, 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.bins.0.nreruns", &nreruns, &sz, + NULL, 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.bins.0.curruns", &curruns, &sz, + NULL, 0), expected, "Unexpected mallctl() result"); + + if (config_stats) { + assert_zu_gt(allocated, 0, + "allocated should be greated than zero"); + assert_u64_gt(nmalloc, 0, + "nmalloc should be no greater than zero"); + assert_u64_ge(nmalloc, ndalloc, + "nmalloc should be at least as large as ndalloc"); + assert_u64_gt(nrequests, 0, + "nrequests should be no greater than zero"); + if (config_tcache) { + assert_u64_gt(nfills, 0, + "At least one fill has occurred"); + assert_u64_gt(nflushes, 0, + "At least one flush has occurred"); + } + assert_u64_gt(nruns, 0, "At least one run has been allocated"); + assert_u64_gt(curruns, 0, + "At least one run is currently allocated"); + } + + dallocx(p, 0); +} +TEST_END + +TEST_BEGIN(test_stats_arenas_lruns) +{ + unsigned arena; + void *p; + uint64_t epoch, nmalloc, ndalloc, nrequests, curruns; + size_t sz = sizeof(uint64_t); + int expected = config_stats ? 0 : ENOENT; + + arena = 0; + assert_d_eq(mallctl("thread.arena", NULL, NULL, &arena, sizeof(arena)), + 0, "Unexpected mallctl() failure"); + + p = mallocx(SMALL_MAXCLASS+1, 0); + assert_ptr_not_null(p, "Unexpected mallocx() failure"); + + assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)), 0, + "Unexpected mallctl() failure"); + + assert_d_eq(mallctl("stats.arenas.0.lruns.0.nmalloc", &nmalloc, &sz, + NULL, 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.lruns.0.ndalloc", &ndalloc, &sz, + NULL, 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.lruns.0.nrequests", &nrequests, &sz, + NULL, 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.lruns.0.curruns", &curruns, &sz, + NULL, 0), expected, "Unexpected mallctl() result"); + + if (config_stats) { + assert_u64_gt(nmalloc, 0, + "nmalloc should be no greater than zero"); + assert_u64_ge(nmalloc, ndalloc, + "nmalloc should be at least as large as ndalloc"); + assert_u64_gt(nrequests, 0, + "nrequests should be no greater than zero"); + assert_u64_gt(curruns, 0, + "At least one run is currently allocated"); + } + + dallocx(p, 0); +} +TEST_END + +int +main(void) +{ + + return (test( + test_stats_summary, + test_stats_chunks, + test_stats_huge, + test_stats_arenas_summary, + test_stats_arenas_small, + test_stats_arenas_large, + test_stats_arenas_bins, + test_stats_arenas_lruns)); +} From 040531292147e8e4847d6c666746a426403d0cf5 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 20 Dec 2013 15:52:01 -0800 Subject: [PATCH 0364/3378] Fix an uninitialized variable read in xallocx(). --- src/jemalloc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/jemalloc.c b/src/jemalloc.c index b8a4fb07..5845fe9a 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1625,6 +1625,9 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) } if (iralloct(ptr, size, extra, alignment, zero, true, try_tcache_alloc, try_tcache_dalloc, arena) == NULL) { + if (config_stats == false && (config_valgrind == false + || opt_valgrind == false)) + old_usize = isalloc(ptr, false); usize = old_usize; goto label_not_moved; } From 5aeeda6f927005294c2e23605b57c5d601a80a8c Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 2 Jan 2014 13:38:23 -0800 Subject: [PATCH 0365/3378] Clean up code formatting. --- test/unit/hash.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/test/unit/hash.c b/test/unit/hash.c index 939bf9b3..0446e524 100644 --- a/test/unit/hash.c +++ b/test/unit/hash.c @@ -90,14 +90,12 @@ hash_variant_verify(hash_variant_t variant) hash_x86_128(key, i, 256-i, out); memcpy(&hashes[i*hashbytes], out, hashbytes); break; - } - case hash_variant_x64_128: { + } case hash_variant_x64_128: { uint64_t out[2]; hash_x64_128(key, i, 256-i, out); memcpy(&hashes[i*hashbytes], out, hashbytes); break; - } - default: not_reached(); + } default: not_reached(); } } From b980cc774a9ccb208a82f4e9ccdcc695d06a960a Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 2 Jan 2014 16:08:28 -0800 Subject: [PATCH 0366/3378] Add rtree unit tests. --- Makefile.in | 4 +- include/jemalloc/internal/private_symbols.txt | 1 + include/jemalloc/internal/rtree.h | 14 ++- src/chunk.c | 2 +- src/rtree.c | 35 +++++- test/unit/rtree.c | 119 ++++++++++++++++++ 6 files changed, 165 insertions(+), 10 deletions(-) create mode 100644 test/unit/rtree.c diff --git a/Makefile.in b/Makefile.in index f3f0e167..6dd91743 100644 --- a/Makefile.in +++ b/Makefile.in @@ -111,8 +111,8 @@ TESTS_UNIT := $(srcroot)test/unit/bitmap.c $(srcroot)test/unit/ckh.c \ $(srcroot)test/unit/hash.c $(srcroot)test/unit/mallctl.c \ $(srcroot)test/unit/math.c $(srcroot)test/unit/mq.c \ $(srcroot)test/unit/mtx.c $(srcroot)test/unit/quarantine.c \ - $(srcroot)test/unit/SFMT.c $(srcroot)test/unit/stats.c \ - $(srcroot)test/unit/tsd.c + $(srcroot)test/unit/rtree.c $(srcroot)test/unit/SFMT.c \ + $(srcroot)test/unit/stats.c $(srcroot)test/unit/tsd.c TESTS_INTEGRATION := $(srcroot)test/integration/aligned_alloc.c \ $(srcroot)test/integration/allocated.c \ $(srcroot)test/integration/mallocx.c \ diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index fae648cd..0ad600b9 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -327,6 +327,7 @@ quarantine_tsd_get_wrapper quarantine_tsd_init_head quarantine_tsd_set register_zone +rtree_delete rtree_get rtree_get_locked rtree_new diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h index 9bd98548..2fa59867 100644 --- a/include/jemalloc/internal/rtree.h +++ b/include/jemalloc/internal/rtree.h @@ -20,11 +20,16 @@ typedef struct rtree_s rtree_t; # define RTREE_NODESIZE CACHELINE #endif +typedef void *(rtree_alloc_t)(size_t); +typedef void (rtree_dalloc_t)(void *); + #endif /* JEMALLOC_H_TYPES */ /******************************************************************************/ #ifdef JEMALLOC_H_STRUCTS struct rtree_s { + rtree_alloc_t *alloc; + rtree_dalloc_t *dalloc; malloc_mutex_t mutex; void **root; unsigned height; @@ -35,7 +40,8 @@ struct rtree_s { /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -rtree_t *rtree_new(unsigned bits); +rtree_t *rtree_new(unsigned bits, rtree_alloc_t *alloc, rtree_dalloc_t *dalloc); +void rtree_delete(rtree_t *rtree); void rtree_prefork(rtree_t *rtree); void rtree_postfork_parent(rtree_t *rtree); void rtree_postfork_child(rtree_t *rtree); @@ -45,7 +51,7 @@ void rtree_postfork_child(rtree_t *rtree); #ifdef JEMALLOC_H_INLINES #ifndef JEMALLOC_ENABLE_INLINE -#ifndef JEMALLOC_DEBUG +#ifdef JEMALLOC_DEBUG void *rtree_get_locked(rtree_t *rtree, uintptr_t key); #endif void *rtree_get(rtree_t *rtree, uintptr_t key); @@ -68,7 +74,7 @@ f(rtree_t *rtree, uintptr_t key) \ i < height - 1; \ i++, lshift += bits, node = child) { \ bits = rtree->level2bits[i]; \ - subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR + \ + subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR + \ 3)) - bits); \ child = (void**)node[subkey]; \ if (child == NULL) { \ @@ -138,7 +144,7 @@ rtree_set(rtree_t *rtree, uintptr_t key, void *val) bits); child = (void**)node[subkey]; if (child == NULL) { - child = (void**)base_alloc(sizeof(void *) << + child = (void**)rtree->alloc(sizeof(void *) << rtree->level2bits[i+1]); if (child == NULL) { malloc_mutex_unlock(&rtree->mutex); diff --git a/src/chunk.c b/src/chunk.c index a93d28aa..71bad5a0 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -356,7 +356,7 @@ chunk_boot(void) extent_tree_ad_new(&chunks_ad_dss); if (config_ivsalloc) { chunks_rtree = rtree_new((ZU(1) << (LG_SIZEOF_PTR+3)) - - opt_lg_chunk); + opt_lg_chunk, base_alloc, NULL); if (chunks_rtree == NULL) return (true); } diff --git a/src/rtree.c b/src/rtree.c index 90c6935a..4e26766a 100644 --- a/src/rtree.c +++ b/src/rtree.c @@ -2,24 +2,28 @@ #include "jemalloc/internal/jemalloc_internal.h" rtree_t * -rtree_new(unsigned bits) +rtree_new(unsigned bits, rtree_alloc_t *alloc, rtree_dalloc_t *dalloc) { rtree_t *ret; unsigned bits_per_level, height, i; + assert(bits > 0 && bits <= (sizeof(uintptr_t) << 3)); + bits_per_level = ffs(pow2_ceil((RTREE_NODESIZE / sizeof(void *)))) - 1; height = bits / bits_per_level; if (height * bits_per_level != bits) height++; assert(height * bits_per_level >= bits); - ret = (rtree_t*)base_alloc(offsetof(rtree_t, level2bits) + + ret = (rtree_t*)alloc(offsetof(rtree_t, level2bits) + (sizeof(unsigned) * height)); if (ret == NULL) return (NULL); memset(ret, 0, offsetof(rtree_t, level2bits) + (sizeof(unsigned) * height)); + ret->alloc = alloc; + ret->dalloc = dalloc; if (malloc_mutex_init(&ret->mutex)) { /* Leak the rtree. */ return (NULL); @@ -32,7 +36,7 @@ rtree_new(unsigned bits) for (i = 1; i < height; i++) ret->level2bits[i] = bits_per_level; - ret->root = (void**)base_alloc(sizeof(void *) << ret->level2bits[0]); + ret->root = (void**)alloc(sizeof(void *) << ret->level2bits[0]); if (ret->root == NULL) { /* * We leak the rtree here, since there's no generic base @@ -45,6 +49,31 @@ rtree_new(unsigned bits) return (ret); } +static void +rtree_delete_subtree(rtree_t *rtree, void **node, unsigned level) +{ + + if (level < rtree->height - 1) { + size_t nchildren, i; + + nchildren = ZU(1) << rtree->level2bits[level]; + for (i = 0; i < nchildren; i++) { + void **child = (void **)node[i]; + if (child != NULL) + rtree_delete_subtree(rtree, child, level + 1); + } + } + rtree->dalloc(node); +} + +void +rtree_delete(rtree_t *rtree) +{ + + rtree_delete_subtree(rtree, rtree->root, 0); + rtree->dalloc(rtree); +} + void rtree_prefork(rtree_t *rtree) { diff --git a/test/unit/rtree.c b/test/unit/rtree.c new file mode 100644 index 00000000..c12a7442 --- /dev/null +++ b/test/unit/rtree.c @@ -0,0 +1,119 @@ +#include "test/jemalloc_test.h" + +TEST_BEGIN(test_rtree_get_empty) +{ + unsigned i; + + for (i = 1; i <= (sizeof(uintptr_t) << 3); i++) { + rtree_t *rtree = rtree_new(i, imalloc, idalloc); + assert_ptr_null(rtree_get(rtree, 0), + "rtree_get() should return NULL for empty tree"); + rtree_delete(rtree); + } +} +TEST_END + +TEST_BEGIN(test_rtree_extrema) +{ + unsigned i; + + for (i = 1; i <= (sizeof(uintptr_t) << 3); i++) { + rtree_t *rtree = rtree_new(i, imalloc, idalloc); + + rtree_set(rtree, 0, (void *)1); + assert_ptr_eq(rtree_get(rtree, 0), (void *)1, + "rtree_get() should return previously set value"); + + rtree_set(rtree, ~((uintptr_t)0), (void *)1); + assert_ptr_eq(rtree_get(rtree, ~((uintptr_t)0)), (void *)1, + "rtree_get() should return previously set value"); + + rtree_delete(rtree); + } +} +TEST_END + +TEST_BEGIN(test_rtree_bits) +{ + unsigned i, j, k; + + for (i = 1; i < (sizeof(uintptr_t) << 3); i++) { + uintptr_t keys[] = {0, 1, + (((uintptr_t)1) << (sizeof(uintptr_t)*8-i)) - 1}; + rtree_t *rtree = rtree_new(i, imalloc, idalloc); + + for (j = 0; j < sizeof(keys)/sizeof(uintptr_t); j++) { + rtree_set(rtree, keys[j], (void *)1); + for (k = 0; k < sizeof(keys)/sizeof(uintptr_t); k++) { + assert_ptr_eq(rtree_get(rtree, keys[k]), + (void *)1, + "rtree_get() should return previously set " + "value and ignore insignificant key bits; " + "i=%u, j=%u, k=%u, set key=%#x, " + "get key=%#x", i, j, k, keys[j], keys[k]); + } + assert_ptr_eq(rtree_get(rtree, + (((uintptr_t)1) << (sizeof(uintptr_t)*8-i))), + (void *)0, + "Only leftmost rtree leaf should be set; " + "i=%u, j=%u", i, j); + rtree_set(rtree, keys[j], (void *)0); + } + + rtree_delete(rtree); + } +} +TEST_END + +TEST_BEGIN(test_rtree_random) +{ + unsigned i; + sfmt_t *sfmt; +#define NSET 100 +#define SEED 42 + + sfmt = init_gen_rand(SEED); + for (i = 1; i <= (sizeof(uintptr_t) << 3); i++) { + rtree_t *rtree = rtree_new(i, imalloc, idalloc); + uintptr_t keys[NSET]; + unsigned j; + + for (j = 0; j < NSET; j++) { + keys[j] = (uintptr_t)gen_rand64(sfmt); + rtree_set(rtree, keys[j], (void *)1); + assert_ptr_eq(rtree_get(rtree, keys[j]), (void *)1, + "rtree_get() should return previously set value"); + } + for (j = 0; j < NSET; j++) { + assert_ptr_eq(rtree_get(rtree, keys[j]), (void *)1, + "rtree_get() should return previously set value"); + } + + for (j = 0; j < NSET; j++) { + rtree_set(rtree, keys[j], (void *)0); + assert_ptr_eq(rtree_get(rtree, keys[j]), (void *)0, + "rtree_get() should return previously set value"); + } + for (j = 0; j < NSET; j++) { + assert_ptr_eq(rtree_get(rtree, keys[j]), (void *)0, + "rtree_get() should return previously set value"); + } + + rtree_delete(rtree); + } + fini_gen_rand(sfmt); +#undef NSET +#undef SEED +} +TEST_END + +int +main(void) +{ + + return (test( + test_rtree_get_empty, + test_rtree_extrema, + test_rtree_bits, + test_rtree_random)); +} From b954bc5d3a65966df0ce7801cd6102542b5e894b Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 2 Jan 2014 17:36:38 -0800 Subject: [PATCH 0367/3378] Convert rtree from (void *) to (uint8_t) storage. Reduce rtree memory usage by storing booleans (1 byte each) rather than pointers. The rtree code is only used to record whether jemalloc manages a chunk of memory, so there's no need to store pointers in the rtree. Increase rtree node size to 64 KiB in order to reduce tree depth from 13 to 3 on 64-bit systems. The conversion to more compact leaf nodes was enough by itself to make the rtree depth 1 on 32-bit systems; due to the fact that root nodes are smaller than the specified node size if possible, the node size change has no impact on 32-bit systems (assuming default chunk size). --- .../jemalloc/internal/jemalloc_internal.h.in | 2 +- include/jemalloc/internal/rtree.h | 38 +++++++++-------- src/chunk.c | 4 +- src/rtree.c | 41 +++++++++++-------- test/unit/rtree.c | 34 ++++++++------- 5 files changed, 64 insertions(+), 55 deletions(-) diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index f380bbfb..caadc1e1 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -853,7 +853,7 @@ ivsalloc(const void *ptr, bool demote) { /* Return 0 if ptr is not within a chunk managed by jemalloc. */ - if (rtree_get(chunks_rtree, (uintptr_t)CHUNK_ADDR2BASE(ptr)) == NULL) + if (rtree_get(chunks_rtree, (uintptr_t)CHUNK_ADDR2BASE(ptr)) == 0) return (0); return (isalloc(ptr, demote)); diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h index 2fa59867..bc74769f 100644 --- a/include/jemalloc/internal/rtree.h +++ b/include/jemalloc/internal/rtree.h @@ -14,11 +14,7 @@ typedef struct rtree_s rtree_t; * Size of each radix tree node (must be a power of 2). This impacts tree * depth. */ -#if (LG_SIZEOF_PTR == 2) -# define RTREE_NODESIZE (1U << 14) -#else -# define RTREE_NODESIZE CACHELINE -#endif +#define RTREE_NODESIZE (1U << 16) typedef void *(rtree_alloc_t)(size_t); typedef void (rtree_dalloc_t)(void *); @@ -52,19 +48,19 @@ void rtree_postfork_child(rtree_t *rtree); #ifndef JEMALLOC_ENABLE_INLINE #ifdef JEMALLOC_DEBUG -void *rtree_get_locked(rtree_t *rtree, uintptr_t key); +uint8_t rtree_get_locked(rtree_t *rtree, uintptr_t key); #endif -void *rtree_get(rtree_t *rtree, uintptr_t key); -bool rtree_set(rtree_t *rtree, uintptr_t key, void *val); +uint8_t rtree_get(rtree_t *rtree, uintptr_t key); +bool rtree_set(rtree_t *rtree, uintptr_t key, uint8_t val); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_RTREE_C_)) #define RTREE_GET_GENERATE(f) \ /* The least significant bits of the key are ignored. */ \ -JEMALLOC_INLINE void * \ +JEMALLOC_INLINE uint8_t \ f(rtree_t *rtree, uintptr_t key) \ { \ - void *ret; \ + uint8_t ret; \ uintptr_t subkey; \ unsigned i, lshift, height, bits; \ void **node, **child; \ @@ -79,7 +75,7 @@ f(rtree_t *rtree, uintptr_t key) \ child = (void**)node[subkey]; \ if (child == NULL) { \ RTREE_UNLOCK(&rtree->mutex); \ - return (NULL); \ + return (0); \ } \ } \ \ @@ -90,7 +86,10 @@ f(rtree_t *rtree, uintptr_t key) \ bits = rtree->level2bits[i]; \ subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - \ bits); \ - ret = node[subkey]; \ + { \ + uint8_t *leaf = (uint8_t *)node; \ + ret = leaf[subkey]; \ + } \ RTREE_UNLOCK(&rtree->mutex); \ \ RTREE_GET_VALIDATE \ @@ -129,7 +128,7 @@ RTREE_GET_GENERATE(rtree_get) #undef RTREE_GET_VALIDATE JEMALLOC_INLINE bool -rtree_set(rtree_t *rtree, uintptr_t key, void *val) +rtree_set(rtree_t *rtree, uintptr_t key, uint8_t val) { uintptr_t subkey; unsigned i, lshift, height, bits; @@ -144,14 +143,14 @@ rtree_set(rtree_t *rtree, uintptr_t key, void *val) bits); child = (void**)node[subkey]; if (child == NULL) { - child = (void**)rtree->alloc(sizeof(void *) << - rtree->level2bits[i+1]); + size_t size = ((i + 1 < height - 1) ? sizeof(void *) + : (sizeof(uint8_t))) << rtree->level2bits[i+1]; + child = (void**)rtree->alloc(size); if (child == NULL) { malloc_mutex_unlock(&rtree->mutex); return (true); } - memset(child, 0, sizeof(void *) << - rtree->level2bits[i+1]); + memset(child, 0, size); node[subkey] = child; } } @@ -159,7 +158,10 @@ rtree_set(rtree_t *rtree, uintptr_t key, void *val) /* node is a leaf, so it contains values rather than node pointers. */ bits = rtree->level2bits[i]; subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - bits); - node[subkey] = val; + { + uint8_t *leaf = (uint8_t *)node; + leaf[subkey] = val; + } malloc_mutex_unlock(&rtree->mutex); return (false); diff --git a/src/chunk.c b/src/chunk.c index 71bad5a0..90ab116a 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -180,7 +180,7 @@ chunk_alloc(size_t size, size_t alignment, bool base, bool *zero, label_return: if (ret != NULL) { if (config_ivsalloc && base == false) { - if (rtree_set(chunks_rtree, (uintptr_t)ret, ret)) { + if (rtree_set(chunks_rtree, (uintptr_t)ret, 1)) { chunk_dealloc(ret, size, true); return (NULL); } @@ -321,7 +321,7 @@ chunk_dealloc(void *chunk, size_t size, bool unmap) assert((size & chunksize_mask) == 0); if (config_ivsalloc) - rtree_set(chunks_rtree, (uintptr_t)chunk, NULL); + rtree_set(chunks_rtree, (uintptr_t)chunk, 0); if (config_stats || config_prof) { malloc_mutex_lock(&chunks_mtx); assert(stats_chunks.curchunks >= (size / chunksize)); diff --git a/src/rtree.c b/src/rtree.c index 4e26766a..205957ac 100644 --- a/src/rtree.c +++ b/src/rtree.c @@ -5,15 +5,20 @@ rtree_t * rtree_new(unsigned bits, rtree_alloc_t *alloc, rtree_dalloc_t *dalloc) { rtree_t *ret; - unsigned bits_per_level, height, i; + unsigned bits_per_level, bits_in_leaf, height, i; assert(bits > 0 && bits <= (sizeof(uintptr_t) << 3)); bits_per_level = ffs(pow2_ceil((RTREE_NODESIZE / sizeof(void *)))) - 1; - height = bits / bits_per_level; - if (height * bits_per_level != bits) - height++; - assert(height * bits_per_level >= bits); + bits_in_leaf = ffs(pow2_ceil((RTREE_NODESIZE / sizeof(uint8_t)))) - 1; + if (bits > bits_in_leaf) { + height = 1 + (bits - bits_in_leaf) / bits_per_level; + if ((height-1) * bits_per_level + bits_in_leaf != bits) + height++; + } else { + height = 1; + } + assert((height-1) * bits_per_level + bits_in_leaf >= bits); ret = (rtree_t*)alloc(offsetof(rtree_t, level2bits) + (sizeof(unsigned) * height)); @@ -25,23 +30,27 @@ rtree_new(unsigned bits, rtree_alloc_t *alloc, rtree_dalloc_t *dalloc) ret->alloc = alloc; ret->dalloc = dalloc; if (malloc_mutex_init(&ret->mutex)) { - /* Leak the rtree. */ + if (dalloc != NULL) + dalloc(ret); return (NULL); } ret->height = height; - if (bits_per_level * height > bits) - ret->level2bits[0] = bits % bits_per_level; - else - ret->level2bits[0] = bits_per_level; - for (i = 1; i < height; i++) - ret->level2bits[i] = bits_per_level; + if (height > 1) { + if ((height-1) * bits_per_level + bits_in_leaf > bits) { + ret->level2bits[0] = (bits - bits_in_leaf) % + bits_per_level; + } else + ret->level2bits[0] = bits_per_level; + for (i = 1; i < height-1; i++) + ret->level2bits[i] = bits_per_level; + ret->level2bits[height-1] = bits_in_leaf; + } else + ret->level2bits[0] = bits; ret->root = (void**)alloc(sizeof(void *) << ret->level2bits[0]); if (ret->root == NULL) { - /* - * We leak the rtree here, since there's no generic base - * deallocation. - */ + if (dalloc != NULL) + dalloc(ret); return (NULL); } memset(ret->root, 0, sizeof(void *) << ret->level2bits[0]); diff --git a/test/unit/rtree.c b/test/unit/rtree.c index c12a7442..5e7a4113 100644 --- a/test/unit/rtree.c +++ b/test/unit/rtree.c @@ -6,7 +6,7 @@ TEST_BEGIN(test_rtree_get_empty) for (i = 1; i <= (sizeof(uintptr_t) << 3); i++) { rtree_t *rtree = rtree_new(i, imalloc, idalloc); - assert_ptr_null(rtree_get(rtree, 0), + assert_u_eq(rtree_get(rtree, 0), 0, "rtree_get() should return NULL for empty tree"); rtree_delete(rtree); } @@ -20,12 +20,12 @@ TEST_BEGIN(test_rtree_extrema) for (i = 1; i <= (sizeof(uintptr_t) << 3); i++) { rtree_t *rtree = rtree_new(i, imalloc, idalloc); - rtree_set(rtree, 0, (void *)1); - assert_ptr_eq(rtree_get(rtree, 0), (void *)1, + rtree_set(rtree, 0, 1); + assert_u_eq(rtree_get(rtree, 0), 1, "rtree_get() should return previously set value"); - rtree_set(rtree, ~((uintptr_t)0), (void *)1); - assert_ptr_eq(rtree_get(rtree, ~((uintptr_t)0)), (void *)1, + rtree_set(rtree, ~((uintptr_t)0), 1); + assert_u_eq(rtree_get(rtree, ~((uintptr_t)0)), 1, "rtree_get() should return previously set value"); rtree_delete(rtree); @@ -43,21 +43,19 @@ TEST_BEGIN(test_rtree_bits) rtree_t *rtree = rtree_new(i, imalloc, idalloc); for (j = 0; j < sizeof(keys)/sizeof(uintptr_t); j++) { - rtree_set(rtree, keys[j], (void *)1); + rtree_set(rtree, keys[j], 1); for (k = 0; k < sizeof(keys)/sizeof(uintptr_t); k++) { - assert_ptr_eq(rtree_get(rtree, keys[k]), - (void *)1, + assert_u_eq(rtree_get(rtree, keys[k]), 1, "rtree_get() should return previously set " "value and ignore insignificant key bits; " "i=%u, j=%u, k=%u, set key=%#x, " "get key=%#x", i, j, k, keys[j], keys[k]); } - assert_ptr_eq(rtree_get(rtree, - (((uintptr_t)1) << (sizeof(uintptr_t)*8-i))), - (void *)0, + assert_u_eq(rtree_get(rtree, + (((uintptr_t)1) << (sizeof(uintptr_t)*8-i))), 0, "Only leftmost rtree leaf should be set; " "i=%u, j=%u", i, j); - rtree_set(rtree, keys[j], (void *)0); + rtree_set(rtree, keys[j], 0); } rtree_delete(rtree); @@ -80,22 +78,22 @@ TEST_BEGIN(test_rtree_random) for (j = 0; j < NSET; j++) { keys[j] = (uintptr_t)gen_rand64(sfmt); - rtree_set(rtree, keys[j], (void *)1); - assert_ptr_eq(rtree_get(rtree, keys[j]), (void *)1, + rtree_set(rtree, keys[j], 1); + assert_u_eq(rtree_get(rtree, keys[j]), 1, "rtree_get() should return previously set value"); } for (j = 0; j < NSET; j++) { - assert_ptr_eq(rtree_get(rtree, keys[j]), (void *)1, + assert_u_eq(rtree_get(rtree, keys[j]), 1, "rtree_get() should return previously set value"); } for (j = 0; j < NSET; j++) { - rtree_set(rtree, keys[j], (void *)0); - assert_ptr_eq(rtree_get(rtree, keys[j]), (void *)0, + rtree_set(rtree, keys[j], 0); + assert_u_eq(rtree_get(rtree, keys[j]), 0, "rtree_get() should return previously set value"); } for (j = 0; j < NSET; j++) { - assert_ptr_eq(rtree_get(rtree, keys[j]), (void *)0, + assert_u_eq(rtree_get(rtree, keys[j]), 0, "rtree_get() should return previously set value"); } From 981bb499d92fee940d5be79cec3abdd3de3891b8 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 3 Jan 2014 16:35:03 -0800 Subject: [PATCH 0368/3378] Add unit tests for qr, ql, and rb. --- Makefile.in | 8 +- test/unit/ql.c | 209 +++++++++++++++++++++++++++++++ test/unit/qr.c | 248 +++++++++++++++++++++++++++++++++++++ test/unit/rb.c | 327 +++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 789 insertions(+), 3 deletions(-) create mode 100644 test/unit/ql.c create mode 100644 test/unit/qr.c create mode 100644 test/unit/rb.c diff --git a/Makefile.in b/Makefile.in index 6dd91743..78f16af6 100644 --- a/Makefile.in +++ b/Makefile.in @@ -110,9 +110,11 @@ C_UTIL_INTEGRATION_SRCS := $(srcroot)src/util.c TESTS_UNIT := $(srcroot)test/unit/bitmap.c $(srcroot)test/unit/ckh.c \ $(srcroot)test/unit/hash.c $(srcroot)test/unit/mallctl.c \ $(srcroot)test/unit/math.c $(srcroot)test/unit/mq.c \ - $(srcroot)test/unit/mtx.c $(srcroot)test/unit/quarantine.c \ - $(srcroot)test/unit/rtree.c $(srcroot)test/unit/SFMT.c \ - $(srcroot)test/unit/stats.c $(srcroot)test/unit/tsd.c + $(srcroot)test/unit/mtx.c $(srcroot)test/unit/ql.c \ + $(srcroot)test/unit/qr.c $(srcroot)test/unit/quarantine.c \ + $(srcroot)test/unit/rb.c $(srcroot)test/unit/rtree.c \ + $(srcroot)test/unit/SFMT.c $(srcroot)test/unit/stats.c \ + $(srcroot)test/unit/tsd.c TESTS_INTEGRATION := $(srcroot)test/integration/aligned_alloc.c \ $(srcroot)test/integration/allocated.c \ $(srcroot)test/integration/mallocx.c \ diff --git a/test/unit/ql.c b/test/unit/ql.c new file mode 100644 index 00000000..05fad450 --- /dev/null +++ b/test/unit/ql.c @@ -0,0 +1,209 @@ +#include "test/jemalloc_test.h" + +/* Number of ring entries, in [2..26]. */ +#define NENTRIES 9 + +typedef struct list_s list_t; +typedef ql_head(list_t) list_head_t; + +struct list_s { + ql_elm(list_t) link; + char id; +}; + +static void +test_empty_list(list_head_t *head) +{ + list_t *t; + unsigned i; + + assert_ptr_null(ql_first(head), "Unexpected element for empty list"); + assert_ptr_null(ql_last(head, link), + "Unexpected element for empty list"); + + i = 0; + ql_foreach(t, head, link) { + i++; + } + assert_u_eq(i, 0, "Unexpected element for empty list"); + + i = 0; + ql_reverse_foreach(t, head, link) { + i++; + } + assert_u_eq(i, 0, "Unexpected element for empty list"); +} + +TEST_BEGIN(test_ql_empty) +{ + list_head_t head; + + ql_new(&head); + test_empty_list(&head); +} +TEST_END + +static void +init_entries(list_t *entries, unsigned nentries) +{ + unsigned i; + + for (i = 0; i < nentries; i++) { + entries[i].id = 'a' + i; + ql_elm_new(&entries[i], link); + } +} + +static void +test_entries_list(list_head_t *head, list_t *entries, unsigned nentries) +{ + list_t *t; + unsigned i; + + assert_c_eq(ql_first(head)->id, entries[0].id, "Element id mismatch"); + assert_c_eq(ql_last(head, link)->id, entries[nentries-1].id, + "Element id mismatch"); + + i = 0; + ql_foreach(t, head, link) { + assert_c_eq(t->id, entries[i].id, "Element id mismatch"); + i++; + } + + i = 0; + ql_reverse_foreach(t, head, link) { + assert_c_eq(t->id, entries[nentries-i-1].id, + "Element id mismatch"); + i++; + } + + for (i = 0; i < nentries-1; i++) { + t = ql_next(head, &entries[i], link); + assert_c_eq(t->id, entries[i+1].id, "Element id mismatch"); + } + assert_ptr_null(ql_next(head, &entries[nentries-1], link), + "Unexpected element"); + + assert_ptr_null(ql_prev(head, &entries[0], link), "Unexpected element"); + for (i = 1; i < nentries; i++) { + t = ql_prev(head, &entries[i], link); + assert_c_eq(t->id, entries[i-1].id, "Element id mismatch"); + } +} + +TEST_BEGIN(test_ql_tail_insert) +{ + list_head_t head; + list_t entries[NENTRIES]; + unsigned i; + + ql_new(&head); + init_entries(entries, sizeof(entries)/sizeof(list_t)); + for (i = 0; i < NENTRIES; i++) + ql_tail_insert(&head, &entries[i], link); + + test_entries_list(&head, entries, NENTRIES); +} +TEST_END + +TEST_BEGIN(test_ql_tail_remove) +{ + list_head_t head; + list_t entries[NENTRIES]; + unsigned i; + + ql_new(&head); + init_entries(entries, sizeof(entries)/sizeof(list_t)); + for (i = 0; i < NENTRIES; i++) + ql_tail_insert(&head, &entries[i], link); + + for (i = 0; i < NENTRIES; i++) { + test_entries_list(&head, entries, NENTRIES-i); + ql_tail_remove(&head, list_t, link); + } + test_empty_list(&head); +} +TEST_END + +TEST_BEGIN(test_ql_head_insert) +{ + list_head_t head; + list_t entries[NENTRIES]; + unsigned i; + + ql_new(&head); + init_entries(entries, sizeof(entries)/sizeof(list_t)); + for (i = 0; i < NENTRIES; i++) + ql_head_insert(&head, &entries[NENTRIES-i-1], link); + + test_entries_list(&head, entries, NENTRIES); +} +TEST_END + +TEST_BEGIN(test_ql_head_remove) +{ + list_head_t head; + list_t entries[NENTRIES]; + unsigned i; + + ql_new(&head); + init_entries(entries, sizeof(entries)/sizeof(list_t)); + for (i = 0; i < NENTRIES; i++) + ql_head_insert(&head, &entries[NENTRIES-i-1], link); + + for (i = 0; i < NENTRIES; i++) { + test_entries_list(&head, &entries[i], NENTRIES-i); + ql_head_remove(&head, list_t, link); + } + test_empty_list(&head); +} +TEST_END + +TEST_BEGIN(test_ql_insert) +{ + list_head_t head; + list_t entries[8]; + list_t *a, *b, *c, *d, *e, *f, *g, *h; + + ql_new(&head); + init_entries(entries, sizeof(entries)/sizeof(list_t)); + a = &entries[0]; + b = &entries[1]; + c = &entries[2]; + d = &entries[3]; + e = &entries[4]; + f = &entries[5]; + g = &entries[6]; + h = &entries[7]; + + /* + * ql_remove(), ql_before_insert(), and ql_after_insert() are used + * internally by other macros that are already tested, so there's no + * need to test them completely. However, insertion/deletion from the + * middle of lists is not otherwise tested; do so here. + */ + ql_tail_insert(&head, f, link); + ql_before_insert(&head, f, b, link); + ql_before_insert(&head, f, c, link); + ql_after_insert(f, h, link); + ql_after_insert(f, g, link); + ql_before_insert(&head, b, a, link); + ql_after_insert(c, d, link); + ql_before_insert(&head, f, e, link); + + test_entries_list(&head, entries, sizeof(entries)/sizeof(list_t)); +} +TEST_END + +int +main(void) +{ + + return (test( + test_ql_empty, + test_ql_tail_insert, + test_ql_tail_remove, + test_ql_head_insert, + test_ql_head_remove, + test_ql_insert)); +} diff --git a/test/unit/qr.c b/test/unit/qr.c new file mode 100644 index 00000000..a2a2d902 --- /dev/null +++ b/test/unit/qr.c @@ -0,0 +1,248 @@ +#include "test/jemalloc_test.h" + +/* Number of ring entries, in [2..26]. */ +#define NENTRIES 9 +/* Split index, in [1..NENTRIES). */ +#define SPLIT_INDEX 5 + +typedef struct ring_s ring_t; + +struct ring_s { + qr(ring_t) link; + char id; +}; + +static void +init_entries(ring_t *entries) +{ + unsigned i; + + for (i = 0; i < NENTRIES; i++) { + qr_new(&entries[i], link); + entries[i].id = 'a' + i; + } +} + +static void +test_independent_entries(ring_t *entries) +{ + ring_t *t; + unsigned i, j; + + for (i = 0; i < NENTRIES; i++) { + j = 0; + qr_foreach(t, &entries[i], link) { + j++; + } + assert_u_eq(j, 1, + "Iteration over single-element ring should visit precisely " + "one element"); + } + for (i = 0; i < NENTRIES; i++) { + j = 0; + qr_reverse_foreach(t, &entries[i], link) { + j++; + } + assert_u_eq(j, 1, + "Iteration over single-element ring should visit precisely " + "one element"); + } + for (i = 0; i < NENTRIES; i++) { + t = qr_next(&entries[i], link); + assert_ptr_eq(t, &entries[i], + "Next element in single-element ring should be same as " + "current element"); + } + for (i = 0; i < NENTRIES; i++) { + t = qr_prev(&entries[i], link); + assert_ptr_eq(t, &entries[i], + "Previous element in single-element ring should be same as " + "current element"); + } +} + +TEST_BEGIN(test_qr_one) +{ + ring_t entries[NENTRIES]; + + init_entries(entries); + test_independent_entries(entries); +} +TEST_END + +static void +test_entries_ring(ring_t *entries) +{ + ring_t *t; + unsigned i, j; + + for (i = 0; i < NENTRIES; i++) { + j = 0; + qr_foreach(t, &entries[i], link) { + assert_c_eq(t->id, entries[(i+j) % NENTRIES].id, + "Element id mismatch"); + j++; + } + } + for (i = 0; i < NENTRIES; i++) { + j = 0; + qr_reverse_foreach(t, &entries[i], link) { + assert_c_eq(t->id, entries[(NENTRIES+i-j-1) % + NENTRIES].id, "Element id mismatch"); + j++; + } + } + for (i = 0; i < NENTRIES; i++) { + t = qr_next(&entries[i], link); + assert_c_eq(t->id, entries[(i+1) % NENTRIES].id, + "Element id mismatch"); + } + for (i = 0; i < NENTRIES; i++) { + t = qr_prev(&entries[i], link); + assert_c_eq(t->id, entries[(NENTRIES+i-1) % NENTRIES].id, + "Element id mismatch"); + } +} + +TEST_BEGIN(test_qr_after_insert) +{ + ring_t entries[NENTRIES]; + unsigned i; + + init_entries(entries); + for (i = 1; i < NENTRIES; i++) + qr_after_insert(&entries[i - 1], &entries[i], link); + test_entries_ring(entries); +} +TEST_END + +TEST_BEGIN(test_qr_remove) +{ + ring_t entries[NENTRIES]; + ring_t *t; + unsigned i, j; + + init_entries(entries); + for (i = 1; i < NENTRIES; i++) + qr_after_insert(&entries[i - 1], &entries[i], link); + + for (i = 0; i < NENTRIES; i++) { + j = 0; + qr_foreach(t, &entries[i], link) { + assert_c_eq(t->id, entries[i+j].id, + "Element id mismatch"); + j++; + } + j = 0; + qr_reverse_foreach(t, &entries[i], link) { + assert_c_eq(t->id, entries[NENTRIES - 1 - j].id, + "Element id mismatch"); + j++; + } + qr_remove(&entries[i], link); + } + test_independent_entries(entries); +} +TEST_END + +TEST_BEGIN(test_qr_before_insert) +{ + ring_t entries[NENTRIES]; + ring_t *t; + unsigned i, j; + + init_entries(entries); + for (i = 1; i < NENTRIES; i++) + qr_before_insert(&entries[i - 1], &entries[i], link); + for (i = 0; i < NENTRIES; i++) { + j = 0; + qr_foreach(t, &entries[i], link) { + assert_c_eq(t->id, entries[(NENTRIES+i-j) % + NENTRIES].id, "Element id mismatch"); + j++; + } + } + for (i = 0; i < NENTRIES; i++) { + j = 0; + qr_reverse_foreach(t, &entries[i], link) { + assert_c_eq(t->id, entries[(i+j+1) % NENTRIES].id, + "Element id mismatch"); + j++; + } + } + for (i = 0; i < NENTRIES; i++) { + t = qr_next(&entries[i], link); + assert_c_eq(t->id, entries[(NENTRIES+i-1) % NENTRIES].id, + "Element id mismatch"); + } + for (i = 0; i < NENTRIES; i++) { + t = qr_prev(&entries[i], link); + assert_c_eq(t->id, entries[(i+1) % NENTRIES].id, + "Element id mismatch"); + } +} +TEST_END + +static void +test_split_entries(ring_t *entries) +{ + ring_t *t; + unsigned i, j; + + for (i = 0; i < NENTRIES; i++) { + j = 0; + qr_foreach(t, &entries[i], link) { + if (i < SPLIT_INDEX) { + assert_c_eq(t->id, + entries[(i+j) % SPLIT_INDEX].id, + "Element id mismatch"); + } else { + assert_c_eq(t->id, entries[(i+j-SPLIT_INDEX) % + (NENTRIES-SPLIT_INDEX) + SPLIT_INDEX].id, + "Element id mismatch"); + } + j++; + } + } +} + +TEST_BEGIN(test_qr_meld_split) +{ + ring_t entries[NENTRIES]; + unsigned i; + + init_entries(entries); + for (i = 1; i < NENTRIES; i++) + qr_after_insert(&entries[i - 1], &entries[i], link); + + qr_split(&entries[0], &entries[SPLIT_INDEX], link); + test_split_entries(entries); + + qr_meld(&entries[0], &entries[SPLIT_INDEX], link); + test_entries_ring(entries); + + qr_meld(&entries[0], &entries[SPLIT_INDEX], link); + test_split_entries(entries); + + qr_split(&entries[0], &entries[SPLIT_INDEX], link); + test_entries_ring(entries); + + qr_split(&entries[0], &entries[0], link); + test_entries_ring(entries); + + qr_meld(&entries[0], &entries[0], link); + test_entries_ring(entries); +} +TEST_END + +int +main(void) +{ + + return (test( + test_qr_one, + test_qr_after_insert, + test_qr_remove, + test_qr_before_insert, + test_qr_meld_split)); +} diff --git a/test/unit/rb.c b/test/unit/rb.c new file mode 100644 index 00000000..a41eee1b --- /dev/null +++ b/test/unit/rb.c @@ -0,0 +1,327 @@ +#include "test/jemalloc_test.h" + +#define rbtn_black_height(a_type, a_field, a_rbt, r_height) do { \ + a_type *rbp_bh_t; \ + for (rbp_bh_t = (a_rbt)->rbt_root, (r_height) = 0; \ + rbp_bh_t != &(a_rbt)->rbt_nil; \ + rbp_bh_t = rbtn_left_get(a_type, a_field, rbp_bh_t)) { \ + if (rbtn_red_get(a_type, a_field, rbp_bh_t) == false) { \ + (r_height)++; \ + } \ + } \ +} while (0) + +typedef struct node_s node_t; + +struct node_s { +#define NODE_MAGIC 0x9823af7e + uint32_t magic; + rb_node(node_t) link; + uint64_t key; +}; + +static int +node_cmp(node_t *a, node_t *b) { + int ret; + + assert_u32_eq(a->magic, NODE_MAGIC, "Bad magic"); + assert_u32_eq(b->magic, NODE_MAGIC, "Bad magic"); + + ret = (a->key > b->key) - (a->key < b->key); + if (ret == 0) { + /* + * Duplicates are not allowed in the tree, so force an + * arbitrary ordering for non-identical items with equal keys. + */ + ret = (((uintptr_t)a) > ((uintptr_t)b)) + - (((uintptr_t)a) < ((uintptr_t)b)); + } + return (ret); +} + +typedef rb_tree(node_t) tree_t; +rb_gen(static, tree_, tree_t, node_t, link, node_cmp); + +TEST_BEGIN(test_rb_empty) +{ + tree_t tree; + node_t key; + + tree_new(&tree); + + assert_ptr_null(tree_first(&tree), "Unexpected node"); + assert_ptr_null(tree_last(&tree), "Unexpected node"); + + key.key = 0; + key.magic = NODE_MAGIC; + assert_ptr_null(tree_search(&tree, &key), "Unexpected node"); + + key.key = 0; + key.magic = NODE_MAGIC; + assert_ptr_null(tree_nsearch(&tree, &key), "Unexpected node"); + + key.key = 0; + key.magic = NODE_MAGIC; + assert_ptr_null(tree_psearch(&tree, &key), "Unexpected node"); +} +TEST_END + +static unsigned +tree_recurse(node_t *node, unsigned black_height, unsigned black_depth, + node_t *nil) +{ + unsigned ret = 0; + node_t *left_node = rbtn_left_get(node_t, link, node); + node_t *right_node = rbtn_right_get(node_t, link, node); + + if (rbtn_red_get(node_t, link, node) == false) + black_depth++; + + /* Red nodes must be interleaved with black nodes. */ + if (rbtn_red_get(node_t, link, node)) { + node_t *t_node = rbtn_left_get(node_t, link, left_node); + assert_false(rbtn_red_get(node_t, link, left_node), + "Node should be black"); + t_node = rbtn_right_get(node_t, link, left_node); + assert_false(rbtn_red_get(node_t, link, left_node), + "Node should be black"); + } + + if (node == nil) + return (ret); + /* Self. */ + assert_u32_eq(node->magic, NODE_MAGIC, "Bad magic"); + + /* Left subtree. */ + if (left_node != nil) + ret += tree_recurse(left_node, black_height, black_depth, nil); + else + ret += (black_depth != black_height); + + /* Right subtree. */ + if (right_node != nil) + ret += tree_recurse(right_node, black_height, black_depth, nil); + else + ret += (black_depth != black_height); + + return (ret); +} + +static node_t * +tree_iterate_cb(tree_t *tree, node_t *node, void *data) +{ + unsigned *i = (unsigned *)data; + node_t *search_node; + + assert_u32_eq(node->magic, NODE_MAGIC, "Bad magic"); + + /* Test rb_search(). */ + search_node = tree_search(tree, node); + assert_ptr_eq(search_node, node, + "tree_search() returned unexpected node"); + + /* Test rb_nsearch(). */ + search_node = tree_nsearch(tree, node); + assert_ptr_eq(search_node, node, + "tree_nsearch() returned unexpected node"); + + /* Test rb_psearch(). */ + search_node = tree_psearch(tree, node); + assert_ptr_eq(search_node, node, + "tree_psearch() returned unexpected node"); + + (*i)++; + + return (NULL); +} + +static unsigned +tree_iterate(tree_t *tree) +{ + unsigned i; + + i = 0; + tree_iter(tree, NULL, tree_iterate_cb, (void *)&i); + + return (i); +} + +static unsigned +tree_iterate_reverse(tree_t *tree) +{ + unsigned i; + + i = 0; + tree_reverse_iter(tree, NULL, tree_iterate_cb, (void *)&i); + + return (i); +} + +static void +node_remove(tree_t *tree, node_t *node, unsigned nnodes) +{ + node_t *search_node; + unsigned black_height, imbalances; + + tree_remove(tree, node); + + /* Test rb_nsearch(). */ + search_node = tree_nsearch(tree, node); + assert(search_node == NULL || search_node->key >= node->key); + + /* Test rb_psearch(). */ + search_node = tree_psearch(tree, node); + assert(search_node == NULL || search_node->key <= node->key); + + node->magic = 0; + + rbtn_black_height(node_t, link, tree, black_height); + imbalances = tree_recurse(tree->rbt_root, black_height, 0, + &(tree->rbt_nil)); + assert_u_eq(imbalances, 0, "Tree is unbalanced"); + assert(nnodes - 1 == tree_iterate(tree)); + assert(nnodes - 1 == tree_iterate_reverse(tree)); +} + +static node_t * +remove_iterate_cb(tree_t *tree, node_t *node, void *data) +{ + unsigned *nnodes = (unsigned *)data; + node_t *ret = tree_next(tree, node); + + node_remove(tree, node, *nnodes); + + return (ret); +} + +static node_t * +remove_reverse_iterate_cb(tree_t *tree, node_t *node, void *data) +{ + unsigned *nnodes = (unsigned *)data; + node_t *ret = tree_prev(tree, node); + + node_remove(tree, node, *nnodes); + + return (ret); +} + +TEST_BEGIN(test_rb_random) +{ +#define NNODES 25 +#define NBAGS 250 +#define SEED 42 + sfmt_t *sfmt; + uint64_t bag[NNODES]; + tree_t tree; + node_t nodes[NNODES]; + unsigned i, j, k, black_height, imbalances; + + sfmt = init_gen_rand(SEED); + for (i = 0; i < NBAGS; i++) { + switch (i) { + case 0: + /* Insert in order. */ + for (j = 0; j < NNODES; j++) + bag[j] = j; + break; + case 1: + /* Insert in reverse order. */ + for (j = 0; j < NNODES; j++) + bag[j] = NNODES - j - 1; + break; + default: + for (j = 0; j < NNODES; j++) + bag[j] = gen_rand64_range(sfmt, NNODES); + } + + for (j = 1; j <= NNODES; j++) { + /* Initialize tree and nodes. */ + tree_new(&tree); + tree.rbt_nil.magic = 0; + for (k = 0; k < j; k++) { + nodes[k].magic = NODE_MAGIC; + nodes[k].key = bag[k]; + } + + /* Insert nodes. */ + for (k = 0; k < j; k++) { + tree_insert(&tree, &nodes[k]); + + rbtn_black_height(node_t, link, &tree, + black_height); + imbalances = tree_recurse(tree.rbt_root, + black_height, 0, &(tree.rbt_nil)); + assert_u_eq(imbalances, 0, + "Tree is unbalanced"); + + assert_u_eq(tree_iterate(&tree), k+1, + "Unexpected node iteration count"); + assert_u_eq(tree_iterate_reverse(&tree), k+1, + "Unexpected node iteration count"); + + assert_ptr_not_null(tree_first(&tree), + "Tree should not be empty"); + assert_ptr_not_null(tree_last(&tree), + "Tree should not be empty"); + + tree_next(&tree, &nodes[k]); + tree_prev(&tree, &nodes[k]); + } + + /* Remove nodes. */ + switch (i % 4) { + case 0: + for (k = 0; k < j; k++) + node_remove(&tree, &nodes[k], j - k); + break; + case 1: + for (k = j; k > 0; k--) + node_remove(&tree, &nodes[k-1], k); + break; + case 2: { + node_t *start; + unsigned nnodes = j; + + start = NULL; + do { + start = tree_iter(&tree, start, + remove_iterate_cb, (void *)&nnodes); + nnodes--; + } while (start != NULL); + assert_u_eq(nnodes, 0, + "Removal terminated early"); + break; + } case 3: { + node_t *start; + unsigned nnodes = j; + + start = NULL; + do { + start = tree_reverse_iter(&tree, start, + remove_reverse_iterate_cb, + (void *)&nnodes); + nnodes--; + } while (start != NULL); + assert_u_eq(nnodes, 0, + "Removal terminated early"); + break; + } default: + not_reached(); + } + } + } + fini_gen_rand(sfmt); +#undef NNODES +#undef NBAGS +#undef SEED +} +TEST_END + +int +main(void) +{ + + return (test( + test_rb_empty, + test_rb_random)); +} From 8cd0d949779930b63d763c3642de157c9f77e1fd Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 3 Jan 2014 17:07:58 -0800 Subject: [PATCH 0369/3378] Convert assert() in test code to assert_*(). --- test/unit/rb.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/test/unit/rb.c b/test/unit/rb.c index a41eee1b..d79ec527 100644 --- a/test/unit/rb.c +++ b/test/unit/rb.c @@ -167,11 +167,17 @@ node_remove(tree_t *tree, node_t *node, unsigned nnodes) /* Test rb_nsearch(). */ search_node = tree_nsearch(tree, node); - assert(search_node == NULL || search_node->key >= node->key); + if (search_node != NULL) { + assert_u64_ge(search_node->key, node->key, + "Key ordering error"); + } /* Test rb_psearch(). */ search_node = tree_psearch(tree, node); - assert(search_node == NULL || search_node->key <= node->key); + if (search_node != NULL) { + assert_u64_le(search_node->key, node->key, + "Key ordering error"); + } node->magic = 0; @@ -179,8 +185,10 @@ node_remove(tree_t *tree, node_t *node, unsigned nnodes) imbalances = tree_recurse(tree->rbt_root, black_height, 0, &(tree->rbt_nil)); assert_u_eq(imbalances, 0, "Tree is unbalanced"); - assert(nnodes - 1 == tree_iterate(tree)); - assert(nnodes - 1 == tree_iterate_reverse(tree)); + assert_u_eq(tree_iterate(tree), nnodes-1, + "Unexpected node iteration count"); + assert_u_eq(tree_iterate_reverse(tree), nnodes-1, + "Unexpected node iteration count"); } static node_t * From e18c25d23de0e845f0ee7e11d02c1be044738a3c Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 6 Jan 2014 20:33:48 -0800 Subject: [PATCH 0370/3378] Add util unit tests, and fix discovered bugs. Add unit tests for pow2_ceil(), malloc_strtoumax(), and malloc_snprintf(). Fix numerous bugs in malloc_strotumax() error handling/reporting. These bugs could have caused application-visible issues for some seldom used (0X... and 0... prefixes) or malformed MALLOC_CONF or mallctl() argument strings, but otherwise they had no impact. Fix numerous bugs in malloc_snprintf(). These bugs were not exercised by existing malloc_*printf() calls, so they had no impact. --- Makefile.in | 2 +- include/jemalloc/internal/util.h | 3 +- src/util.c | 64 ++++--- test/include/test/test.h | 83 +++++++++ test/unit/util.c | 294 +++++++++++++++++++++++++++++++ 5 files changed, 416 insertions(+), 30 deletions(-) create mode 100644 test/unit/util.c diff --git a/Makefile.in b/Makefile.in index 78f16af6..470495c8 100644 --- a/Makefile.in +++ b/Makefile.in @@ -114,7 +114,7 @@ TESTS_UNIT := $(srcroot)test/unit/bitmap.c $(srcroot)test/unit/ckh.c \ $(srcroot)test/unit/qr.c $(srcroot)test/unit/quarantine.c \ $(srcroot)test/unit/rb.c $(srcroot)test/unit/rtree.c \ $(srcroot)test/unit/SFMT.c $(srcroot)test/unit/stats.c \ - $(srcroot)test/unit/tsd.c + $(srcroot)test/unit/tsd.c $(srcroot)test/unit/util.c TESTS_INTEGRATION := $(srcroot)test/integration/aligned_alloc.c \ $(srcroot)test/integration/allocated.c \ $(srcroot)test/integration/mallocx.c \ diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h index 302444d5..6b938f74 100644 --- a/include/jemalloc/internal/util.h +++ b/include/jemalloc/internal/util.h @@ -85,7 +85,8 @@ #ifdef JEMALLOC_H_EXTERNS int buferror(int err, char *buf, size_t buflen); -uintmax_t malloc_strtoumax(const char *nptr, char **endptr, int base); +uintmax_t malloc_strtoumax(const char *restrict nptr, + char **restrict endptr, int base); void malloc_write(const char *s); /* diff --git a/src/util.c b/src/util.c index 6cedf8c2..20062553 100644 --- a/src/util.c +++ b/src/util.c @@ -97,22 +97,24 @@ buferror(int err, char *buf, size_t buflen) } uintmax_t -malloc_strtoumax(const char *nptr, char **endptr, int base) +malloc_strtoumax(const char *restrict nptr, char **restrict endptr, int base) { uintmax_t ret, digit; int b; bool neg; const char *p, *ns; + p = nptr; if (base < 0 || base == 1 || base > 36) { + ns = p; set_errno(EINVAL); - return (UINTMAX_MAX); + ret = UINTMAX_MAX; + goto label_return; } b = base; /* Swallow leading whitespace and get sign, if any. */ neg = false; - p = nptr; while (true) { switch (*p) { case '\t': case '\n': case '\v': case '\f': case '\r': case ' ': @@ -146,7 +148,7 @@ malloc_strtoumax(const char *nptr, char **endptr, int base) if (b == 8) p++; break; - case 'x': + case 'X': case 'x': switch (p[2]) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': @@ -164,7 +166,9 @@ malloc_strtoumax(const char *nptr, char **endptr, int base) } break; default: - break; + p++; + ret = 0; + goto label_return; } } if (b == 0) @@ -181,13 +185,22 @@ malloc_strtoumax(const char *nptr, char **endptr, int base) if (ret < pret) { /* Overflow. */ set_errno(ERANGE); - return (UINTMAX_MAX); + ret = UINTMAX_MAX; + goto label_return; } p++; } if (neg) ret = -ret; + if (p == ns) { + /* No conversion performed. */ + set_errno(EINVAL); + ret = UINTMAX_MAX; + goto label_return; + } + +label_return: if (endptr != NULL) { if (p == ns) { /* No characters were converted. */ @@ -195,7 +208,6 @@ malloc_strtoumax(const char *nptr, char **endptr, int base) } else *endptr = (char *)p; } - return (ret); } @@ -354,6 +366,9 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) case 'j': \ val = va_arg(ap, intmax_t); \ break; \ + case 'j' | 0x80: \ + val = va_arg(ap, uintmax_t); \ + break; \ case 't': \ val = va_arg(ap, ptrdiff_t); \ break; \ @@ -419,6 +434,10 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) case '*': width = va_arg(ap, int); f++; + if (width < 0) { + left_justify = true; + width = -width; + } break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { @@ -428,19 +447,16 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) assert(uwidth != UINTMAX_MAX || get_errno() != ERANGE); width = (int)uwidth; - if (*f == '.') { - f++; - goto label_precision; - } else - goto label_length; break; - } case '.': - f++; - goto label_precision; - default: goto label_length; + } default: + break; } + /* Width/precision separator. */ + if (*f == '.') + f++; + else + goto label_length; /* Precision. */ - label_precision: switch (*f) { case '*': prec = va_arg(ap, int); @@ -469,16 +485,8 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) } else len = 'l'; break; - case 'j': - len = 'j'; - f++; - break; - case 't': - len = 't'; - f++; - break; - case 'z': - len = 'z'; + case 'q': case 'j': case 't': case 'z': + len = *f; f++; break; default: break; @@ -540,7 +548,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) assert(len == '?' || len == 'l'); assert_not_implemented(len != 'l'); s = va_arg(ap, char *); - slen = (prec == -1) ? strlen(s) : prec; + slen = (prec < 0) ? strlen(s) : prec; APPEND_PADDED_S(s, slen, width, left_justify); f++; break; diff --git a/test/include/test/test.h b/test/include/test/test.h index 5f985751..8cc97af5 100644 --- a/test/include/test/test.h +++ b/test/include/test/test.h @@ -48,6 +48,84 @@ #define assert_u_ge(a, b, fmt...) assert_cmp(int, a, b, >=, <, "u", fmt) #define assert_u_gt(a, b, fmt...) assert_cmp(int, a, b, >, <=, "u", fmt) +#define assert_ld_eq(a, b, fmt...) assert_cmp(long, a, b, ==, \ + !=, "ld", fmt) +#define assert_ld_ne(a, b, fmt...) assert_cmp(long, a, b, !=, \ + ==, "ld", fmt) +#define assert_ld_lt(a, b, fmt...) assert_cmp(long, a, b, <, \ + >=, "ld", fmt) +#define assert_ld_le(a, b, fmt...) assert_cmp(long, a, b, <=, \ + >, "ld", fmt) +#define assert_ld_ge(a, b, fmt...) assert_cmp(long, a, b, >=, \ + <, "ld", fmt) +#define assert_ld_gt(a, b, fmt...) assert_cmp(long, a, b, >, \ + <=, "ld", fmt) + +#define assert_lu_eq(a, b, fmt...) assert_cmp(unsigned long, \ + a, b, ==, !=, "lu", fmt) +#define assert_lu_ne(a, b, fmt...) assert_cmp(unsigned long, \ + a, b, !=, ==, "lu", fmt) +#define assert_lu_lt(a, b, fmt...) assert_cmp(unsigned long, \ + a, b, <, >=, "lu", fmt) +#define assert_lu_le(a, b, fmt...) assert_cmp(unsigned long, \ + a, b, <=, >, "lu", fmt) +#define assert_lu_ge(a, b, fmt...) assert_cmp(unsigned long, \ + a, b, >=, <, "lu", fmt) +#define assert_lu_gt(a, b, fmt...) assert_cmp(unsigned long, \ + a, b, >, <=, "lu", fmt) + +#define assert_qd_eq(a, b, fmt...) assert_cmp(long long, a, b, ==, \ + !=, "qd", fmt) +#define assert_qd_ne(a, b, fmt...) assert_cmp(long long, a, b, !=, \ + ==, "qd", fmt) +#define assert_qd_lt(a, b, fmt...) assert_cmp(long long, a, b, <, \ + >=, "qd", fmt) +#define assert_qd_le(a, b, fmt...) assert_cmp(long long, a, b, <=, \ + >, "qd", fmt) +#define assert_qd_ge(a, b, fmt...) assert_cmp(long long, a, b, >=, \ + <, "qd", fmt) +#define assert_qd_gt(a, b, fmt...) assert_cmp(long long, a, b, >, \ + <=, "qd", fmt) + +#define assert_qu_eq(a, b, fmt...) assert_cmp(unsigned long long, \ + a, b, ==, !=, "qu", fmt) +#define assert_qu_ne(a, b, fmt...) assert_cmp(unsigned long long, \ + a, b, !=, ==, "qu", fmt) +#define assert_qu_lt(a, b, fmt...) assert_cmp(unsigned long long, \ + a, b, <, >=, "qu", fmt) +#define assert_qu_le(a, b, fmt...) assert_cmp(unsigned long long, \ + a, b, <=, >, "qu", fmt) +#define assert_qu_ge(a, b, fmt...) assert_cmp(unsigned long long, \ + a, b, >=, <, "qu", fmt) +#define assert_qu_gt(a, b, fmt...) assert_cmp(unsigned long long, \ + a, b, >, <=, "qu", fmt) + +#define assert_jd_eq(a, b, fmt...) assert_cmp(intmax_t, a, b, ==, \ + !=, "jd", fmt) +#define assert_jd_ne(a, b, fmt...) assert_cmp(intmax_t, a, b, !=, \ + ==, "jd", fmt) +#define assert_jd_lt(a, b, fmt...) assert_cmp(intmax_t, a, b, <, \ + >=, "jd", fmt) +#define assert_jd_le(a, b, fmt...) assert_cmp(intmax_t, a, b, <=, \ + >, "jd", fmt) +#define assert_jd_ge(a, b, fmt...) assert_cmp(intmax_t, a, b, >=, \ + <, "jd", fmt) +#define assert_jd_gt(a, b, fmt...) assert_cmp(intmax_t, a, b, >, \ + <=, "jd", fmt) + +#define assert_ju_eq(a, b, fmt...) assert_cmp(uintmax_t, a, b, ==, \ + !=, "ju", fmt) +#define assert_ju_ne(a, b, fmt...) assert_cmp(uintmax_t, a, b, !=, \ + ==, "ju", fmt) +#define assert_ju_lt(a, b, fmt...) assert_cmp(uintmax_t, a, b, <, \ + >=, "ju", fmt) +#define assert_ju_le(a, b, fmt...) assert_cmp(uintmax_t, a, b, <=, \ + >, "ju", fmt) +#define assert_ju_ge(a, b, fmt...) assert_cmp(uintmax_t, a, b, >=, \ + <, "ju", fmt) +#define assert_ju_gt(a, b, fmt...) assert_cmp(uintmax_t, a, b, >, \ + <=, "ju", fmt) + #define assert_zd_eq(a, b, fmt...) assert_cmp(ssize_t, a, b, ==, \ !=, "zd", fmt) #define assert_zd_ne(a, b, fmt...) assert_cmp(ssize_t, a, b, !=, \ @@ -172,6 +250,11 @@ } \ } while (0) +#define assert_not_reached(fmt...) do { \ + p_test_fail("%s:%s:%d: Unreachable code reached: ", \ + __func__, __FILE__, __LINE__, fmt); \ +} while (0) + /* * If this enum changes, corresponding changes in test/test.sh.in are also * necessary. diff --git a/test/unit/util.c b/test/unit/util.c new file mode 100644 index 00000000..1f2f5759 --- /dev/null +++ b/test/unit/util.c @@ -0,0 +1,294 @@ +#include "test/jemalloc_test.h" + +TEST_BEGIN(test_pow2_ceil) +{ + unsigned i, pow2; + size_t x; + + assert_zu_eq(pow2_ceil(0), 0, "Unexpected result"); + + for (i = 0; i < sizeof(size_t) * 8; i++) { + assert_zu_eq(pow2_ceil(ZU(1) << i), ZU(1) << i, + "Unexpected result"); + } + + for (i = 2; i < sizeof(size_t) * 8; i++) { + assert_zu_eq(pow2_ceil((ZU(1) << i) - 1), ZU(1) << i, + "Unexpected result"); + } + + for (i = 0; i < sizeof(size_t) * 8 - 1; i++) { + assert_zu_eq(pow2_ceil((ZU(1) << i) + 1), ZU(1) << (i+1), + "Unexpected result"); + } + + for (pow2 = 1; pow2 < 25; pow2++) { + for (x = (ZU(1) << (pow2-1)) + 1; x <= ZU(1) << pow2; x++) { + assert_zu_eq(pow2_ceil(x), ZU(1) << pow2, + "Unexpected result, x=%zu", x); + } + } +} +TEST_END + +TEST_BEGIN(test_malloc_strtoumax_no_endptr) +{ + int err; + + set_errno(0); + assert_ju_eq(malloc_strtoumax("0", NULL, 0), 0, "Unexpected result"); + err = get_errno(); + assert_d_eq(err, 0, "Unexpected failure"); +} +TEST_END + +TEST_BEGIN(test_malloc_strtoumax) +{ + struct test_s { + const char *input; + const char *expected_remainder; + int base; + int expected_errno; + const char *expected_errno_name; + uintmax_t expected_x; + }; +#define ERR(e) e, #e +#define UMAX(x) ((uintmax_t)x##ULL) + struct test_s tests[] = { + {"0", "0", -1, ERR(EINVAL), UINTMAX_MAX}, + {"0", "0", 1, ERR(EINVAL), UINTMAX_MAX}, + {"0", "0", 37, ERR(EINVAL), UINTMAX_MAX}, + + {"", "", 0, ERR(EINVAL), UINTMAX_MAX}, + {"+", "+", 0, ERR(EINVAL), UINTMAX_MAX}, + {"++3", "++3", 0, ERR(EINVAL), UINTMAX_MAX}, + {"-", "-", 0, ERR(EINVAL), UINTMAX_MAX}, + + {"42", "", 0, ERR(0), UMAX(42)}, + {"+42", "", 0, ERR(0), UMAX(42)}, + {"-42", "", 0, ERR(0), UMAX(-42)}, + {"042", "", 0, ERR(0), UMAX(042)}, + {"+042", "", 0, ERR(0), UMAX(042)}, + {"-042", "", 0, ERR(0), UMAX(-042)}, + {"0x42", "", 0, ERR(0), UMAX(0x42)}, + {"+0x42", "", 0, ERR(0), UMAX(0x42)}, + {"-0x42", "", 0, ERR(0), UMAX(-0x42)}, + + {"0", "", 0, ERR(0), UMAX(0)}, + {"1", "", 0, ERR(0), UMAX(1)}, + + {"42", "", 0, ERR(0), UMAX(42)}, + {" 42", "", 0, ERR(0), UMAX(42)}, + {"42 ", " ", 0, ERR(0), UMAX(42)}, + {"0x", "x", 0, ERR(0), UMAX(0)}, + {"42x", "x", 0, ERR(0), UMAX(42)}, + + {"07", "", 0, ERR(0), UMAX(7)}, + {"010", "", 0, ERR(0), UMAX(8)}, + {"08", "8", 0, ERR(0), UMAX(0)}, + {"0_", "_", 0, ERR(0), UMAX(0)}, + + {"0x", "x", 0, ERR(0), UMAX(0)}, + {"0X", "X", 0, ERR(0), UMAX(0)}, + {"0xg", "xg", 0, ERR(0), UMAX(0)}, + {"0XA", "", 0, ERR(0), UMAX(10)}, + + {"010", "", 10, ERR(0), UMAX(10)}, + {"0x3", "x3", 10, ERR(0), UMAX(0)}, + + {"12", "2", 2, ERR(0), UMAX(1)}, + {"78", "8", 8, ERR(0), UMAX(7)}, + {"9a", "a", 10, ERR(0), UMAX(9)}, + {"9A", "A", 10, ERR(0), UMAX(9)}, + {"fg", "g", 16, ERR(0), UMAX(15)}, + {"FG", "G", 16, ERR(0), UMAX(15)}, + {"0xfg", "g", 16, ERR(0), UMAX(15)}, + {"0XFG", "G", 16, ERR(0), UMAX(15)}, + {"z_", "_", 36, ERR(0), UMAX(35)}, + {"Z_", "_", 36, ERR(0), UMAX(35)} + }; +#undef ERR +#undef UMAX + unsigned i; + + for (i = 0; i < sizeof(tests)/sizeof(struct test_s); i++) { + struct test_s *test = &tests[i]; + int err; + uintmax_t result; + char *remainder; + + set_errno(0); + result = malloc_strtoumax(test->input, &remainder, test->base); + err = get_errno(); + assert_d_eq(err, test->expected_errno, + "Expected errno %s for \"%s\", base %d", + test->expected_errno_name, test->input, test->base); + assert_str_eq(remainder, test->expected_remainder, + "Unexpected remainder for \"%s\", base %d", + test->input, test->base); + if (err == 0) { + assert_ju_eq(result, test->expected_x, + "Unexpected result for \"%s\", base %d", + test->input, test->base); + } + } +} +TEST_END + +TEST_BEGIN(test_malloc_snprintf_truncated) +{ +#define BUFLEN 15 + char buf[BUFLEN]; + int result; + size_t len; +#define TEST(expected_str_untruncated, fmt...) do { \ + result = malloc_snprintf(buf, len, fmt); \ + assert_d_eq(strncmp(buf, expected_str_untruncated, len-1), 0, \ + "Unexpected string inequality (\"%s\" vs \"%s\")", \ + buf, expected_str_untruncated); \ + assert_d_eq(result, strlen(expected_str_untruncated), \ + "Unexpected result"); \ +} while (0) + + for (len = 1; len < BUFLEN; len++) { + TEST("", ""); + TEST("012346789", "012346789"); + TEST("a0123b", "a%sb", "0123"); + TEST("a01234567", "a%s%s", "0123", "4567"); + TEST("a0123 ", "a%-6s", "0123"); + TEST("a 0123", "a%6s", "0123"); + TEST("a 012", "a%6.3s", "0123"); + TEST("a 012", "a%*.*s", 6, 3, "0123"); + TEST("a 123b", "a% db", 123); + TEST("a123b", "a%-db", 123); + TEST("a-123b", "a%-db", -123); + TEST("a+123b", "a%+db", 123); + } +#undef BUFLEN +#undef TEST +} +TEST_END + +TEST_BEGIN(test_malloc_snprintf) +{ +#define BUFLEN 128 + char buf[BUFLEN]; + int result; +#define TEST(expected_str, fmt...) do { \ + result = malloc_snprintf(buf, sizeof(buf), fmt); \ + assert_str_eq(buf, expected_str, "Unexpected output"); \ + assert_d_eq(result, strlen(expected_str), "Unexpected result"); \ +} while (0) + + TEST("", ""); + TEST("hello", "hello"); + + TEST("a0123b", "a%sb", "0123"); + + TEST("a 0123b", "a%5sb", "0123"); + TEST("a 0123b", "a%*sb", 5, "0123"); + + TEST("a0123 b", "a%-5sb", "0123"); + TEST("a0123b", "a%*sb", -1, "0123"); + TEST("a0123 b", "a%*sb", -5, "0123"); + TEST("a0123 b", "a%-*sb", -5, "0123"); + + TEST("a012b", "a%.3sb", "0123"); + TEST("a012b", "a%.*sb", 3, "0123"); + TEST("a0123b", "a%.*sb", -3, "0123"); + + TEST("a 012b", "a%5.3sb", "0123"); + TEST("a 012b", "a%5.*sb", 3, "0123"); + TEST("a 012b", "a%*.3sb", 5, "0123"); + TEST("a 012b", "a%*.*sb", 5, 3, "0123"); + TEST("a 0123b", "a%*.*sb", 5, -3, "0123"); + + TEST("_abcd_", "_%x_", 0xabcd); + TEST("_0xabcd_", "_%#x_", 0xabcd); + TEST("_1234_", "_%o_", 01234); + TEST("_01234_", "_%#o_", 01234); + TEST("_1234_", "_%u_", 1234); + + TEST("_1234_", "_%d_", 1234); + TEST("_ 1234_", "_% d_", 1234); + TEST("_+1234_", "_%+d_", 1234); + TEST("_-1234_", "_%d_", -1234); + TEST("_-1234_", "_% d_", -1234); + TEST("_-1234_", "_%+d_", -1234); + + TEST("_-1234_", "_%d_", -1234); + TEST("_1234_", "_%d_", 1234); + TEST("_-1234_", "_%i_", -1234); + TEST("_1234_", "_%i_", 1234); + TEST("_01234_", "_%#o_", 01234); + TEST("_1234_", "_%u_", 1234); + TEST("_0x1234abc_", "_%#x_", 0x1234abc); + TEST("_0X1234ABC_", "_%#X_", 0x1234abc); + TEST("_c_", "_%c_", 'c'); + TEST("_string_", "_%s_", "string"); + TEST("_0x42_", "_%p_", ((void *)0x42)); + + TEST("_-1234_", "_%ld_", ((long)-1234)); + TEST("_1234_", "_%ld_", ((long)1234)); + TEST("_-1234_", "_%li_", ((long)-1234)); + TEST("_1234_", "_%li_", ((long)1234)); + TEST("_01234_", "_%#lo_", ((long)01234)); + TEST("_1234_", "_%lu_", ((long)1234)); + TEST("_0x1234abc_", "_%#lx_", ((long)0x1234abc)); + TEST("_0X1234ABC_", "_%#lX_", ((long)0x1234ABC)); + + TEST("_-1234_", "_%lld_", ((long long)-1234)); + TEST("_1234_", "_%lld_", ((long long)1234)); + TEST("_-1234_", "_%lli_", ((long long)-1234)); + TEST("_1234_", "_%lli_", ((long long)1234)); + TEST("_01234_", "_%#llo_", ((long long)01234)); + TEST("_1234_", "_%llu_", ((long long)1234)); + TEST("_0x1234abc_", "_%#llx_", ((long long)0x1234abc)); + TEST("_0X1234ABC_", "_%#llX_", ((long long)0x1234ABC)); + + TEST("_-1234_", "_%qd_", ((long long)-1234)); + TEST("_1234_", "_%qd_", ((long long)1234)); + TEST("_-1234_", "_%qi_", ((long long)-1234)); + TEST("_1234_", "_%qi_", ((long long)1234)); + TEST("_01234_", "_%#qo_", ((long long)01234)); + TEST("_1234_", "_%qu_", ((long long)1234)); + TEST("_0x1234abc_", "_%#qx_", ((long long)0x1234abc)); + TEST("_0X1234ABC_", "_%#qX_", ((long long)0x1234ABC)); + + TEST("_-1234_", "_%jd_", ((intmax_t)-1234)); + TEST("_1234_", "_%jd_", ((intmax_t)1234)); + TEST("_-1234_", "_%ji_", ((intmax_t)-1234)); + TEST("_1234_", "_%ji_", ((intmax_t)1234)); + TEST("_01234_", "_%#jo_", ((intmax_t)01234)); + TEST("_1234_", "_%ju_", ((intmax_t)1234)); + TEST("_0x1234abc_", "_%#jx_", ((intmax_t)0x1234abc)); + TEST("_0X1234ABC_", "_%#jX_", ((intmax_t)0x1234ABC)); + + TEST("_1234_", "_%td_", ((ptrdiff_t)1234)); + TEST("_-1234_", "_%td_", ((ptrdiff_t)-1234)); + TEST("_1234_", "_%ti_", ((ptrdiff_t)1234)); + TEST("_-1234_", "_%ti_", ((ptrdiff_t)-1234)); + + TEST("_-1234_", "_%zd_", ((ssize_t)-1234)); + TEST("_1234_", "_%zd_", ((ssize_t)1234)); + TEST("_-1234_", "_%zi_", ((ssize_t)-1234)); + TEST("_1234_", "_%zi_", ((ssize_t)1234)); + TEST("_01234_", "_%#zo_", ((ssize_t)01234)); + TEST("_1234_", "_%zu_", ((ssize_t)1234)); + TEST("_0x1234abc_", "_%#zx_", ((ssize_t)0x1234abc)); + TEST("_0X1234ABC_", "_%#zX_", ((ssize_t)0x1234ABC)); +#undef BUFLEN +} +TEST_END + +int +main(void) +{ + + return (test( + test_pow2_ceil, + test_malloc_strtoumax_no_endptr, + test_malloc_strtoumax, + test_malloc_snprintf_truncated, + test_malloc_snprintf)); +} From 6b694c4d47278cddfaaedeb7ee49fa5757e35ed5 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 7 Jan 2014 16:47:56 -0800 Subject: [PATCH 0371/3378] Add junk/zero filling unit tests, and fix discovered bugs. Fix growing large reallocation to junk fill new space. Fix huge deallocation to junk fill when munmap is disabled. --- Makefile.in | 15 +- include/jemalloc/internal/arena.h | 15 +- include/jemalloc/internal/huge.h | 4 + include/jemalloc/internal/private_symbols.txt | 3 + src/arena.c | 84 +++++-- src/huge.c | 27 ++- test/unit/junk.c | 219 ++++++++++++++++++ test/unit/quarantine.c | 6 +- test/unit/zero.c | 78 +++++++ 9 files changed, 420 insertions(+), 31 deletions(-) create mode 100644 test/unit/junk.c create mode 100644 test/unit/zero.c diff --git a/Makefile.in b/Makefile.in index 470495c8..89d82555 100644 --- a/Makefile.in +++ b/Makefile.in @@ -108,13 +108,14 @@ C_TESTLIB_SRCS := $(srcroot)test/src/math.c $(srcroot)test/src/mtx.c \ $(srcroot)test/src/thd.c C_UTIL_INTEGRATION_SRCS := $(srcroot)src/util.c TESTS_UNIT := $(srcroot)test/unit/bitmap.c $(srcroot)test/unit/ckh.c \ - $(srcroot)test/unit/hash.c $(srcroot)test/unit/mallctl.c \ - $(srcroot)test/unit/math.c $(srcroot)test/unit/mq.c \ - $(srcroot)test/unit/mtx.c $(srcroot)test/unit/ql.c \ - $(srcroot)test/unit/qr.c $(srcroot)test/unit/quarantine.c \ - $(srcroot)test/unit/rb.c $(srcroot)test/unit/rtree.c \ - $(srcroot)test/unit/SFMT.c $(srcroot)test/unit/stats.c \ - $(srcroot)test/unit/tsd.c $(srcroot)test/unit/util.c + $(srcroot)test/unit/hash.c $(srcroot)test/unit/junk.c \ + $(srcroot)test/unit/mallctl.c $(srcroot)test/unit/math.c \ + $(srcroot)test/unit/mq.c $(srcroot)test/unit/mtx.c \ + $(srcroot)test/unit/ql.c $(srcroot)test/unit/qr.c \ + $(srcroot)test/unit/quarantine.c $(srcroot)test/unit/rb.c \ + $(srcroot)test/unit/rtree.c $(srcroot)test/unit/SFMT.c \ + $(srcroot)test/unit/stats.c $(srcroot)test/unit/tsd.c \ + $(srcroot)test/unit/util.c $(srcroot)test/unit/zero.c TESTS_INTEGRATION := $(srcroot)test/integration/aligned_alloc.c \ $(srcroot)test/integration/allocated.c \ $(srcroot)test/integration/mallocx.c \ diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 41517510..f092155c 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -408,9 +408,12 @@ void arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info, #ifdef JEMALLOC_JET typedef void (arena_redzone_corruption_t)(void *, size_t, bool, size_t, uint8_t); -extern arena_redzone_corruption_t *arena_redzone_corruption_fptr; -#endif +extern arena_redzone_corruption_t *arena_redzone_corruption; +typedef void (arena_dalloc_junk_small_t)(void *, arena_bin_info_t *); +extern arena_dalloc_junk_small_t *arena_dalloc_junk_small; +#else void arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info); +#endif void arena_quarantine_junk_small(void *ptr, size_t usize); void *arena_malloc_small(arena_t *arena, size_t size, bool zero); void *arena_malloc_large(arena_t *arena, size_t size, bool zero); @@ -422,9 +425,17 @@ void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, size_t pageind, arena_chunk_map_t *mapelm); void arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr, size_t pageind); +#ifdef JEMALLOC_JET +typedef void (arena_dalloc_junk_large_t)(void *, size_t); +extern arena_dalloc_junk_large_t *arena_dalloc_junk_large; +#endif void arena_dalloc_large_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr); void arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr); +#ifdef JEMALLOC_JET +typedef void (arena_ralloc_junk_large_t)(void *, size_t, size_t); +extern arena_ralloc_junk_large_t *arena_ralloc_junk_large; +#endif void *arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, bool zero); void *arena_ralloc(arena_t *arena, void *ptr, size_t oldsize, size_t size, diff --git a/include/jemalloc/internal/huge.h b/include/jemalloc/internal/huge.h index d987d370..dac23c68 100644 --- a/include/jemalloc/internal/huge.h +++ b/include/jemalloc/internal/huge.h @@ -23,6 +23,10 @@ void *huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra); void *huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, size_t alignment, bool zero, bool try_tcache_dalloc); +#ifdef JEMALLOC_JET +typedef void (huge_dalloc_junk_t)(void *, size_t); +extern huge_dalloc_junk_t *huge_dalloc_junk; +#endif void huge_dalloc(void *ptr, bool unmap); size_t huge_salloc(const void *ptr); prof_ctx_t *huge_prof_ctx_get(const void *ptr); diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index 0ad600b9..10ac5493 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -8,6 +8,7 @@ arena_boot arena_dalloc arena_dalloc_bin arena_dalloc_bin_locked +arena_dalloc_junk_large arena_dalloc_junk_small arena_dalloc_large arena_dalloc_large_locked @@ -52,6 +53,7 @@ arena_ptr_small_binind_get arena_purge_all arena_quarantine_junk_small arena_ralloc +arena_ralloc_junk_large arena_ralloc_no_move arena_redzone_corruption arena_run_regind @@ -194,6 +196,7 @@ hash_x86_32 huge_allocated huge_boot huge_dalloc +huge_dalloc_junk huge_malloc huge_mtx huge_ndalloc diff --git a/src/arena.c b/src/arena.c index 71057aad..536be296 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1446,10 +1446,10 @@ arena_redzone_corruption(void *ptr, size_t usize, bool after, after ? "after" : "before", ptr, usize, byte); } #ifdef JEMALLOC_JET -arena_redzone_corruption_t *arena_redzone_corruption_fptr = - arena_redzone_corruption; #undef arena_redzone_corruption -#define arena_redzone_corruption arena_redzone_corruption_fptr +#define arena_redzone_corruption JEMALLOC_N(arena_redzone_corruption) +arena_redzone_corruption_t *arena_redzone_corruption = + JEMALLOC_N(arena_redzone_corruption_impl); #endif static void @@ -1482,6 +1482,10 @@ arena_redzones_validate(void *ptr, arena_bin_info_t *bin_info, bool reset) abort(); } +#ifdef JEMALLOC_JET +#undef arena_dalloc_junk_small +#define arena_dalloc_junk_small JEMALLOC_N(arena_dalloc_junk_small_impl) +#endif void arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info) { @@ -1491,6 +1495,12 @@ arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info) memset((void *)((uintptr_t)ptr - redzone_size), 0x5a, bin_info->reg_interval); } +#ifdef JEMALLOC_JET +#undef arena_dalloc_junk_small +#define arena_dalloc_junk_small JEMALLOC_N(arena_dalloc_junk_small) +arena_dalloc_junk_small_t *arena_dalloc_junk_small = + JEMALLOC_N(arena_dalloc_junk_small_impl); +#endif void arena_quarantine_junk_small(void *ptr, size_t usize) @@ -1841,21 +1851,38 @@ arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr, arena_dalloc_bin(arena, chunk, ptr, pageind, mapelm); } +#ifdef JEMALLOC_JET +#undef arena_dalloc_junk_large +#define arena_dalloc_junk_large JEMALLOC_N(arena_dalloc_junk_large_impl) +#endif +static void +arena_dalloc_junk_large(void *ptr, size_t usize) +{ + + if (config_fill && opt_junk) + memset(ptr, 0x5a, usize); +} +#ifdef JEMALLOC_JET +#undef arena_dalloc_junk_large +#define arena_dalloc_junk_large JEMALLOC_N(arena_dalloc_junk_large) +arena_dalloc_junk_large_t *arena_dalloc_junk_large = + JEMALLOC_N(arena_dalloc_junk_large_impl); +#endif + void arena_dalloc_large_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr) { if (config_fill || config_stats) { size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - size_t size = arena_mapbits_large_size_get(chunk, pageind); + size_t usize = arena_mapbits_large_size_get(chunk, pageind); - if (config_fill && config_stats && opt_junk) - memset(ptr, 0x5a, size); + arena_dalloc_junk_large(ptr, usize); if (config_stats) { arena->stats.ndalloc_large++; - arena->stats.allocated_large -= size; - arena->stats.lstats[(size >> LG_PAGE) - 1].ndalloc++; - arena->stats.lstats[(size >> LG_PAGE) - 1].curruns--; + arena->stats.allocated_large -= usize; + arena->stats.lstats[(usize >> LG_PAGE) - 1].ndalloc++; + arena->stats.lstats[(usize >> LG_PAGE) - 1].curruns--; } } @@ -1967,6 +1994,26 @@ arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr, return (true); } +#ifdef JEMALLOC_JET +#undef arena_ralloc_junk_large +#define arena_ralloc_junk_large JEMALLOC_N(arena_ralloc_junk_large_impl) +#endif +static void +arena_ralloc_junk_large(void *ptr, size_t old_usize, size_t usize) +{ + + if (config_fill && opt_junk) { + memset((void *)((uintptr_t)ptr + usize), 0x5a, + old_usize - usize); + } +} +#ifdef JEMALLOC_JET +#undef arena_ralloc_junk_large +#define arena_ralloc_junk_large JEMALLOC_N(arena_ralloc_junk_large) +arena_ralloc_junk_large_t *arena_ralloc_junk_large = + JEMALLOC_N(arena_ralloc_junk_large_impl); +#endif + /* * Try to resize a large allocation, in order to avoid copying. This will * always fail if growing an object, and the following run is already in use. @@ -1990,10 +2037,7 @@ arena_ralloc_large(void *ptr, size_t oldsize, size_t size, size_t extra, if (psize < oldsize) { /* Fill before shrinking in order avoid a race. */ - if (config_fill && opt_junk) { - memset((void *)((uintptr_t)ptr + psize), 0x5a, - oldsize - psize); - } + arena_ralloc_junk_large(ptr, oldsize, psize); arena_ralloc_large_shrink(arena, chunk, ptr, oldsize, psize); return (false); @@ -2001,10 +2045,16 @@ arena_ralloc_large(void *ptr, size_t oldsize, size_t size, size_t extra, bool ret = arena_ralloc_large_grow(arena, chunk, ptr, oldsize, PAGE_CEILING(size), psize - PAGE_CEILING(size), zero); - if (config_fill && ret == false && zero == false && - opt_zero) { - memset((void *)((uintptr_t)ptr + oldsize), 0, - size - oldsize); + if (config_fill && ret == false && zero == false) { + if (opt_junk) { + memset((void *)((uintptr_t)ptr + + oldsize), 0xa5, isalloc(ptr, + config_prof) - oldsize); + } else if (opt_zero) { + memset((void *)((uintptr_t)ptr + + oldsize), 0, isalloc(ptr, + config_prof) - oldsize); + } } return (ret); } diff --git a/src/huge.c b/src/huge.c index ea9a2ad2..766c80c6 100644 --- a/src/huge.c +++ b/src/huge.c @@ -182,6 +182,29 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, return (ret); } +#ifdef JEMALLOC_JET +#undef huge_dalloc_junk +#define huge_dalloc_junk JEMALLOC_N(huge_dalloc_junk_impl) +#endif +static void +huge_dalloc_junk(void *ptr, size_t usize) +{ + + if (config_fill && config_dss && opt_junk) { + /* + * Only bother junk filling if the chunk isn't about to be + * unmapped. + */ + if (config_munmap == false || (config_dss && chunk_in_dss(ptr))) + memset(ptr, 0x5a, usize); + } +} +#ifdef JEMALLOC_JET +#undef huge_dalloc_junk +#define huge_dalloc_junk JEMALLOC_N(huge_dalloc_junk) +huge_dalloc_junk_t *huge_dalloc_junk = JEMALLOC_N(huge_dalloc_junk_impl); +#endif + void huge_dalloc(void *ptr, bool unmap) { @@ -204,8 +227,8 @@ huge_dalloc(void *ptr, bool unmap) malloc_mutex_unlock(&huge_mtx); - if (unmap && config_fill && config_dss && opt_junk) - memset(node->addr, 0x5a, node->size); + if (unmap) + huge_dalloc_junk(node->addr, node->size); chunk_dealloc(node->addr, node->size, unmap); diff --git a/test/unit/junk.c b/test/unit/junk.c new file mode 100644 index 00000000..e27db2fe --- /dev/null +++ b/test/unit/junk.c @@ -0,0 +1,219 @@ +#include "test/jemalloc_test.h" + +#ifdef JEMALLOC_FILL +const char *malloc_conf = + "abort:false,junk:true,zero:false,redzone:true,quarantine:0"; +#endif + +static arena_dalloc_junk_small_t *arena_dalloc_junk_small_orig; +static arena_dalloc_junk_large_t *arena_dalloc_junk_large_orig; +static huge_dalloc_junk_t *huge_dalloc_junk_orig; +static void *most_recently_junked; + +static void +arena_dalloc_junk_small_intercept(void *ptr, arena_bin_info_t *bin_info) +{ + size_t i; + + arena_dalloc_junk_small_orig(ptr, bin_info); + for (i = 0; i < bin_info->reg_size; i++) { + assert_c_eq(((char *)ptr)[i], 0x5a, + "Missing junk fill for byte %zu/%zu of deallocated region", + i, bin_info->reg_size); + } + most_recently_junked = ptr; +} + +static void +arena_dalloc_junk_large_intercept(void *ptr, size_t usize) +{ + size_t i; + + arena_dalloc_junk_large_orig(ptr, usize); + for (i = 0; i < usize; i++) { + assert_c_eq(((char *)ptr)[i], 0x5a, + "Missing junk fill for byte %zu/%zu of deallocated region", + i, usize); + } + most_recently_junked = ptr; +} + +static void +huge_dalloc_junk_intercept(void *ptr, size_t usize) +{ + + huge_dalloc_junk_orig(ptr, usize); + /* + * The conditions under which junk filling actually occurs are nuanced + * enough that it doesn't make sense to duplicate the decision logic in + * test code, so don't actually check that the region is junk-filled. + */ + most_recently_junked = ptr; +} + +static void +test_junk(size_t sz_min, size_t sz_max) +{ + char *s; + size_t sz_prev, sz, i; + + arena_dalloc_junk_small_orig = arena_dalloc_junk_small; + arena_dalloc_junk_small = arena_dalloc_junk_small_intercept; + arena_dalloc_junk_large_orig = arena_dalloc_junk_large; + arena_dalloc_junk_large = arena_dalloc_junk_large_intercept; + huge_dalloc_junk_orig = huge_dalloc_junk; + huge_dalloc_junk = huge_dalloc_junk_intercept; + + sz_prev = 0; + s = (char *)mallocx(sz_min, 0); + assert_ptr_not_null((void *)s, "Unexpected mallocx() failure"); + + for (sz = sallocx(s, 0); sz <= sz_max; + sz_prev = sz, sz = sallocx(s, 0)) { + if (sz_prev > 0) { + assert_c_eq(s[0], 'a', + "Previously allocated byte %zu/%zu is corrupted", + 0, sz_prev); + assert_c_eq(s[sz_prev-1], 'a', + "Previously allocated byte %zu/%zu is corrupted", + sz_prev-1, sz_prev); + } + + for (i = sz_prev; i < sz; i++) { + assert_c_eq(s[i], 0xa5, + "Newly allocated byte %zu/%zu isn't junk-filled", + i, sz); + s[i] = 'a'; + } + + if (xallocx(s, sz+1, 0, 0) == sz) { + void *junked = (void *)s; + + s = (char *)rallocx(s, sz+1, 0); + assert_ptr_not_null((void *)s, + "Unexpected rallocx() failure"); + assert_ptr_eq(most_recently_junked, junked, + "Expected region of size %zu to be junk-filled", + sz); + } + } + + dallocx(s, 0); + assert_ptr_eq(most_recently_junked, (void *)s, + "Expected region of size %zu to be junk-filled", sz); + + arena_dalloc_junk_small = arena_dalloc_junk_small_orig; + arena_dalloc_junk_large = arena_dalloc_junk_large_orig; + huge_dalloc_junk = huge_dalloc_junk_orig; +} + +TEST_BEGIN(test_junk_small) +{ + + test_skip_if(!config_fill); + test_junk(1, SMALL_MAXCLASS-1); +} +TEST_END + +TEST_BEGIN(test_junk_large) +{ + + test_skip_if(!config_fill); + test_junk(SMALL_MAXCLASS+1, arena_maxclass); +} +TEST_END + +TEST_BEGIN(test_junk_huge) +{ + + test_skip_if(!config_fill); + test_junk(arena_maxclass+1, chunksize*2); +} +TEST_END + +arena_ralloc_junk_large_t *arena_ralloc_junk_large_orig; +static void *most_recently_trimmed; + +static void +arena_ralloc_junk_large_intercept(void *ptr, size_t old_usize, size_t usize) +{ + + arena_ralloc_junk_large_orig(ptr, old_usize, usize); + assert_zu_eq(old_usize, arena_maxclass, "Unexpected old_usize"); + assert_zu_eq(usize, arena_maxclass-PAGE, "Unexpected usize"); + most_recently_trimmed = ptr; +} + +TEST_BEGIN(test_junk_large_ralloc_shrink) +{ + void *p1, *p2; + + p1 = mallocx(arena_maxclass, 0); + assert_ptr_not_null(p1, "Unexpected mallocx() failure"); + + arena_ralloc_junk_large_orig = arena_ralloc_junk_large; + arena_ralloc_junk_large = arena_ralloc_junk_large_intercept; + + p2 = rallocx(p1, arena_maxclass-PAGE, 0); + assert_ptr_eq(p1, p2, "Unexpected move during shrink"); + + arena_ralloc_junk_large = arena_ralloc_junk_large_orig; + + assert_ptr_eq(most_recently_trimmed, p1, + "Expected trimmed portion of region to be junk-filled"); +} +TEST_END + +static bool detected_redzone_corruption; + +static void +arena_redzone_corruption_replacement(void *ptr, size_t usize, bool after, + size_t offset, uint8_t byte) +{ + + detected_redzone_corruption = true; +} + +TEST_BEGIN(test_junk_redzone) +{ + char *s; + arena_redzone_corruption_t *arena_redzone_corruption_orig; + + test_skip_if(!config_fill); + + arena_redzone_corruption_orig = arena_redzone_corruption; + arena_redzone_corruption = arena_redzone_corruption_replacement; + + /* Test underflow. */ + detected_redzone_corruption = false; + s = (char *)mallocx(1, 0); + assert_ptr_not_null((void *)s, "Unexpected mallocx() failure"); + s[-1] = 0xbb; + dallocx(s, 0); + assert_true(detected_redzone_corruption, + "Did not detect redzone corruption"); + + /* Test overflow. */ + detected_redzone_corruption = false; + s = (char *)mallocx(1, 0); + assert_ptr_not_null((void *)s, "Unexpected mallocx() failure"); + s[sallocx(s, 0)] = 0xbb; + dallocx(s, 0); + assert_true(detected_redzone_corruption, + "Did not detect redzone corruption"); + + arena_redzone_corruption = arena_redzone_corruption_orig; +} +TEST_END + +int +main(void) +{ + + return (test( + test_junk_small, + test_junk_large, + test_junk_huge, + test_junk_large_ralloc_shrink, + test_junk_redzone)); +} diff --git a/test/unit/quarantine.c b/test/unit/quarantine.c index d8a65e28..45349237 100644 --- a/test/unit/quarantine.c +++ b/test/unit/quarantine.c @@ -73,8 +73,8 @@ TEST_BEGIN(test_quarantine_redzone) test_skip_if(!config_fill); - arena_redzone_corruption_orig = arena_redzone_corruption_fptr; - arena_redzone_corruption_fptr = arena_redzone_corruption_replacement; + arena_redzone_corruption_orig = arena_redzone_corruption; + arena_redzone_corruption = arena_redzone_corruption_replacement; /* Test underflow. */ detected_redzone_corruption = false; @@ -94,7 +94,7 @@ TEST_BEGIN(test_quarantine_redzone) assert_true(detected_redzone_corruption, "Did not detect redzone corruption"); - arena_redzone_corruption_fptr = arena_redzone_corruption_orig; + arena_redzone_corruption = arena_redzone_corruption_orig; } TEST_END diff --git a/test/unit/zero.c b/test/unit/zero.c new file mode 100644 index 00000000..2fdae2fd --- /dev/null +++ b/test/unit/zero.c @@ -0,0 +1,78 @@ +#include "test/jemalloc_test.h" + +#ifdef JEMALLOC_FILL +const char *malloc_conf = + "abort:false,junk:false,zero:true,redzone:false,quarantine:0"; +#endif + +static void +test_zero(size_t sz_min, size_t sz_max) +{ + char *s; + size_t sz_prev, sz, i; + + sz_prev = 0; + s = (char *)mallocx(sz_min, 0); + assert_ptr_not_null((void *)s, "Unexpected mallocx() failure"); + + for (sz = sallocx(s, 0); sz <= sz_max; + sz_prev = sz, sz = sallocx(s, 0)) { + if (sz_prev > 0) { + assert_c_eq(s[0], 'a', + "Previously allocated byte %zu/%zu is corrupted", + 0, sz_prev); + assert_c_eq(s[sz_prev-1], 'a', + "Previously allocated byte %zu/%zu is corrupted", + sz_prev-1, sz_prev); + } + + for (i = sz_prev; i < sz; i++) { + assert_c_eq(s[i], 0x0, + "Newly allocated byte %zu/%zu isn't zero-filled", + i, sz); + s[i] = 'a'; + } + + if (xallocx(s, sz+1, 0, 0) == sz) { + s = (char *)rallocx(s, sz+1, 0); + assert_ptr_not_null((void *)s, + "Unexpected rallocx() failure"); + } + } + + dallocx(s, 0); +} + +TEST_BEGIN(test_zero_small) +{ + + test_skip_if(!config_fill); + test_zero(1, SMALL_MAXCLASS-1); +} +TEST_END + +TEST_BEGIN(test_zero_large) +{ + + test_skip_if(!config_fill); + test_zero(SMALL_MAXCLASS+1, arena_maxclass); +} +TEST_END + +TEST_BEGIN(test_zero_huge) +{ + + test_skip_if(!config_fill); + test_zero(arena_maxclass+1, chunksize*2); +} +TEST_END + +int +main(void) +{ + + return (test( + test_zero_small, + test_zero_large, + test_zero_huge)); +} From b2c31660be917ea6d59cd54e6f650b06b5e812ed Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sun, 12 Jan 2014 15:05:44 -0800 Subject: [PATCH 0372/3378] Extract profiling code from [re]allocation functions. Extract profiling code from malloc(), imemalign(), calloc(), realloc(), mallocx(), rallocx(), and xallocx(). This slightly reduces the amount of code compiled into the fast paths, but the primary benefit is the combinatorial complexity reduction. Simplify iralloc[t]() by creating a separate ixalloc() that handles the no-move cases. Further simplify [mrxn]allocx() (and by implication [mrn]allocm()) to make request size overflows due to size class and/or alignment constraints trigger undefined behavior (detected by debug-only assertions). Report ENOMEM rather than EINVAL if an OOM occurs during heap profiling backtrace creation in imemalign(). This bug impacted posix_memalign() and aligned_alloc(). --- doc/jemalloc.xml.in | 26 +- include/jemalloc/internal/arena.h | 2 +- include/jemalloc/internal/huge.h | 2 +- .../jemalloc/internal/jemalloc_internal.h.in | 128 +-- include/jemalloc/internal/private_symbols.txt | 2 + src/arena.c | 13 +- src/huge.c | 11 +- src/jemalloc.c | 803 ++++++++++-------- test/integration/allocm.c | 33 - test/integration/mallocx.c | 30 - 10 files changed, 553 insertions(+), 497 deletions(-) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 5fc76534..c7e2e872 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -321,14 +321,16 @@ The mallocx function allocates at least size bytes of memory, and returns a pointer to the base address of the allocation. Behavior is undefined if - size is 0. + size is 0, or if request size + overflows due to size class and/or alignment constraints. The rallocx function resizes the allocation at ptr to be at least size bytes, and returns a pointer to the base address of the resulting allocation, which may or may not have moved from its original location. Behavior is undefined if - size is 0. + size is 0, or if request size + overflows due to size class and/or alignment constraints. The xallocx function resizes the allocation at ptr in place to be at least @@ -355,8 +357,9 @@ mallocx function, and returns the real size of the allocation that would result from the equivalent mallocx function call. Behavior is - undefined if size is - 0. + undefined if size is 0, or if + request size overflows due to size class and/or alignment + constraints. The mallctl function provides a general interface for introspecting the memory allocator, as well as @@ -518,8 +521,9 @@ for (i = 0; i < nbins; i++) { *ptr to the base address of the allocation, and sets *rsize to the real size of the allocation if rsize is not NULL. Behavior - is undefined if size is - 0. + is undefined if size is 0, or + if request size overflows due to size class and/or alignment + constraints. The rallocm function resizes the allocation at *ptr to be at least @@ -532,8 +536,9 @@ for (i = 0; i < nbins; i++) { language="C">(size + extra) bytes, though inability to allocate the extra byte(s) will not by itself result in failure. Behavior is - undefined if size is 0, or if - (size + + undefined if size is 0, if + request size overflows due to size class and/or alignment constraints, or + if (size + extra > SIZE_T_MAX). @@ -550,8 +555,9 @@ for (i = 0; i < nbins; i++) { rsize is not NULL it sets *rsize to the real size of the allocation that would result from the equivalent allocm - function call. Behavior is undefined if - size is 0. + function call. Behavior is undefined if size is + 0, or if request size overflows due to size class + and/or alignment constraints. diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index f092155c..20dfd8cc 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -436,7 +436,7 @@ void arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr); typedef void (arena_ralloc_junk_large_t)(void *, size_t, size_t); extern arena_ralloc_junk_large_t *arena_ralloc_junk_large; #endif -void *arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, +bool arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, bool zero); void *arena_ralloc(arena_t *arena, void *ptr, size_t oldsize, size_t size, size_t extra, size_t alignment, bool zero, bool try_tcache_alloc, diff --git a/include/jemalloc/internal/huge.h b/include/jemalloc/internal/huge.h index dac23c68..ddf13138 100644 --- a/include/jemalloc/internal/huge.h +++ b/include/jemalloc/internal/huge.h @@ -19,7 +19,7 @@ extern malloc_mutex_t huge_mtx; void *huge_malloc(size_t size, bool zero); void *huge_palloc(size_t size, size_t alignment, bool zero); -void *huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, +bool huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra); void *huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, size_t alignment, bool zero, bool try_tcache_dalloc); diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index caadc1e1..7c4397fb 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -747,11 +747,15 @@ void idalloct(void *ptr, bool try_tcache); void idalloc(void *ptr); void iqalloct(void *ptr, bool try_tcache); void iqalloc(void *ptr); -void *iralloct(void *ptr, size_t size, size_t extra, size_t alignment, - bool zero, bool no_move, bool try_tcache_alloc, bool try_tcache_dalloc, +void *iralloct_realign(void *ptr, size_t oldsize, size_t size, size_t extra, + size_t alignment, bool zero, bool try_tcache_alloc, bool try_tcache_dalloc, arena_t *arena); +void *iralloct(void *ptr, size_t size, size_t extra, size_t alignment, + bool zero, bool try_tcache_alloc, bool try_tcache_dalloc, arena_t *arena); void *iralloc(void *ptr, size_t size, size_t extra, size_t alignment, - bool zero, bool no_move); + bool zero); +bool ixalloc(void *ptr, size_t size, size_t extra, size_t alignment, + bool zero); malloc_tsd_protos(JEMALLOC_ATTR(unused), thread_allocated, thread_allocated_t) #endif @@ -920,10 +924,42 @@ iqalloc(void *ptr) } JEMALLOC_ALWAYS_INLINE void * -iralloct(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, - bool no_move, bool try_tcache_alloc, bool try_tcache_dalloc, arena_t *arena) +iralloct_realign(void *ptr, size_t oldsize, size_t size, size_t extra, + size_t alignment, bool zero, bool try_tcache_alloc, bool try_tcache_dalloc, + arena_t *arena) +{ + void *p; + size_t usize, copysize; + + usize = sa2u(size + extra, alignment); + if (usize == 0) + return (NULL); + p = ipalloct(usize, alignment, zero, try_tcache_alloc, arena); + if (p == NULL) { + if (extra == 0) + return (NULL); + /* Try again, without extra this time. */ + usize = sa2u(size, alignment); + if (usize == 0) + return (NULL); + p = ipalloct(usize, alignment, zero, try_tcache_alloc, arena); + if (p == NULL) + return (NULL); + } + /* + * Copy at most size bytes (not size+extra), since the caller has no + * expectation that the extra bytes will be reliably preserved. + */ + copysize = (size < oldsize) ? size : oldsize; + memcpy(p, ptr, copysize); + iqalloct(ptr, try_tcache_dalloc); + return (p); +} + +JEMALLOC_ALWAYS_INLINE void * +iralloct(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, + bool try_tcache_alloc, bool try_tcache_dalloc, arena_t *arena) { - void *ret; size_t oldsize; assert(ptr != NULL); @@ -933,68 +969,50 @@ iralloct(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1)) != 0) { - size_t usize, copysize; - /* * Existing object alignment is inadequate; allocate new space * and copy. */ - if (no_move) - return (NULL); - usize = sa2u(size + extra, alignment); - if (usize == 0) - return (NULL); - ret = ipalloct(usize, alignment, zero, try_tcache_alloc, arena); - if (ret == NULL) { - if (extra == 0) - return (NULL); - /* Try again, without extra this time. */ - usize = sa2u(size, alignment); - if (usize == 0) - return (NULL); - ret = ipalloct(usize, alignment, zero, try_tcache_alloc, - arena); - if (ret == NULL) - return (NULL); - } - /* - * Copy at most size bytes (not size+extra), since the caller - * has no expectation that the extra bytes will be reliably - * preserved. - */ - copysize = (size < oldsize) ? size : oldsize; - memcpy(ret, ptr, copysize); - iqalloct(ptr, try_tcache_dalloc); - return (ret); + return (iralloct_realign(ptr, oldsize, size, extra, alignment, + zero, try_tcache_alloc, try_tcache_dalloc, arena)); } - if (no_move) { - if (size <= arena_maxclass) { - return (arena_ralloc_no_move(ptr, oldsize, size, - extra, zero)); - } else { - return (huge_ralloc_no_move(ptr, oldsize, size, - extra)); - } + if (size + extra <= arena_maxclass) { + return (arena_ralloc(arena, ptr, oldsize, size, extra, + alignment, zero, try_tcache_alloc, + try_tcache_dalloc)); } else { - if (size + extra <= arena_maxclass) { - return (arena_ralloc(arena, ptr, oldsize, size, extra, - alignment, zero, try_tcache_alloc, - try_tcache_dalloc)); - } else { - return (huge_ralloc(ptr, oldsize, size, extra, - alignment, zero, try_tcache_dalloc)); - } + return (huge_ralloc(ptr, oldsize, size, extra, + alignment, zero, try_tcache_dalloc)); } } JEMALLOC_ALWAYS_INLINE void * -iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, - bool no_move) +iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero) { - return (iralloct(ptr, size, extra, alignment, zero, no_move, true, true, - NULL)); + return (iralloct(ptr, size, extra, alignment, zero, true, true, NULL)); +} + +JEMALLOC_ALWAYS_INLINE bool +ixalloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero) +{ + size_t oldsize; + + assert(ptr != NULL); + assert(size != 0); + + oldsize = isalloc(ptr, config_prof); + if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1)) + != 0) { + /* Existing object alignment is inadequate. */ + return (true); + } + + if (size <= arena_maxclass) + return (arena_ralloc_no_move(ptr, oldsize, size, extra, zero)); + else + return (huge_ralloc_no_move(ptr, oldsize, size, extra)); } malloc_tsd_externs(thread_allocated, thread_allocated_t) diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index 10ac5493..6cc811dd 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -223,9 +223,11 @@ iqalloc iqalloct iralloc iralloct +iralloct_realign isalloc isthreaded ivsalloc +ixalloc jemalloc_postfork_child jemalloc_postfork_parent jemalloc_prefork diff --git a/src/arena.c b/src/arena.c index 536be296..ca5b4fe4 100644 --- a/src/arena.c +++ b/src/arena.c @@ -2061,7 +2061,7 @@ arena_ralloc_large(void *ptr, size_t oldsize, size_t size, size_t extra, } } -void * +bool arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, bool zero) { @@ -2077,19 +2077,19 @@ arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, SMALL_SIZE2BIN(size + extra) == SMALL_SIZE2BIN(oldsize)) || (size <= oldsize && size + extra >= oldsize)) - return (ptr); + return (false); } else { assert(size <= arena_maxclass); if (size + extra > SMALL_MAXCLASS) { if (arena_ralloc_large(ptr, oldsize, size, extra, zero) == false) - return (ptr); + return (false); } } } /* Reallocation would require a move. */ - return (NULL); + return (true); } void * @@ -2101,9 +2101,8 @@ arena_ralloc(arena_t *arena, void *ptr, size_t oldsize, size_t size, size_t copysize; /* Try to avoid moving the allocation. */ - ret = arena_ralloc_no_move(ptr, oldsize, size, extra, zero); - if (ret != NULL) - return (ret); + if (arena_ralloc_no_move(ptr, oldsize, size, extra, zero) == false) + return (ptr); /* * size and oldsize are different enough that we need to move the diff --git a/src/huge.c b/src/huge.c index 766c80c6..cecaf2df 100644 --- a/src/huge.c +++ b/src/huge.c @@ -78,7 +78,7 @@ huge_palloc(size_t size, size_t alignment, bool zero) return (ret); } -void * +bool huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra) { @@ -89,11 +89,11 @@ huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra) && CHUNK_CEILING(oldsize) >= CHUNK_CEILING(size) && CHUNK_CEILING(oldsize) <= CHUNK_CEILING(size+extra)) { assert(CHUNK_CEILING(oldsize) == oldsize); - return (ptr); + return (false); } /* Reallocation would require a move. */ - return (NULL); + return (true); } void * @@ -104,9 +104,8 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, size_t copysize; /* Try to avoid moving the allocation. */ - ret = huge_ralloc_no_move(ptr, oldsize, size, extra); - if (ret != NULL) - return (ret); + if (huge_ralloc_no_move(ptr, oldsize, size, extra) == false) + return (ptr); /* * size and oldsize are different enough that we need to use a diff --git a/src/jemalloc.c b/src/jemalloc.c index 5845fe9a..9fc9b8d1 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -100,18 +100,12 @@ typedef struct { #endif /******************************************************************************/ -/* Function prototypes for non-inline static functions. */ +/* + * Function prototypes for static functions that are referenced prior to + * definition. + */ -static void stats_print_atexit(void); -static unsigned malloc_ncpus(void); -static bool malloc_conf_next(char const **opts_p, char const **k_p, - size_t *klen_p, char const **v_p, size_t *vlen_p); -static void malloc_conf_error(const char *msg, const char *k, size_t klen, - const char *v, size_t vlen); -static void malloc_conf_init(void); static bool malloc_init_hard(void); -static int imemalign(void **memptr, size_t alignment, size_t size, - size_t min_alignment); /******************************************************************************/ /* @@ -852,42 +846,88 @@ malloc_init_hard(void) * Begin malloc(3)-compatible functions. */ +static void * +imalloc_prof_sample(size_t usize, prof_thr_cnt_t *cnt) +{ + void *p; + + if (cnt == NULL) + return (NULL); + if (prof_promote && usize <= SMALL_MAXCLASS) { + p = imalloc(SMALL_MAXCLASS+1); + if (p == NULL) + return (NULL); + arena_prof_promoted(p, usize); + } else + p = imalloc(usize); + + return (p); +} + +JEMALLOC_ALWAYS_INLINE_C void * +imalloc_prof(size_t usize, prof_thr_cnt_t *cnt) +{ + void *p; + + if ((uintptr_t)cnt != (uintptr_t)1U) + p = imalloc_prof_sample(usize, cnt); + else + p = imalloc(usize); + if (p == NULL) + return (NULL); + prof_malloc(p, usize, cnt); + + return (p); +} + +/* + * MALLOC_BODY() is a macro rather than a function because its contents are in + * the fast path, but inlining would cause reliability issues when determining + * how many frames to discard from heap profiling backtraces. + */ +#define MALLOC_BODY(ret, size, usize) do { \ + if (malloc_init()) \ + ret = NULL; \ + else { \ + if (config_prof && opt_prof) { \ + prof_thr_cnt_t *cnt; \ + \ + usize = s2u(size); \ + /* \ + * Call PROF_ALLOC_PREP() here rather than in \ + * imalloc_prof() so that imalloc_prof() can be \ + * inlined without introducing uncertainty \ + * about the number of backtrace frames to \ + * ignore. imalloc_prof() is in the fast path \ + * when heap profiling is enabled, so inlining \ + * is critical to performance. (For \ + * consistency all callers of PROF_ALLOC_PREP() \ + * are structured similarly, even though e.g. \ + * realloc() isn't called enough for inlining \ + * to be critical.) \ + */ \ + PROF_ALLOC_PREP(1, usize, cnt); \ + ret = imalloc_prof(usize, cnt); \ + } else { \ + if (config_stats || (config_valgrind && \ + opt_valgrind)) \ + usize = s2u(size); \ + ret = imalloc(size); \ + } \ + } \ +} while (0) + void * je_malloc(size_t size) { void *ret; size_t usize JEMALLOC_CC_SILENCE_INIT(0); - prof_thr_cnt_t *cnt JEMALLOC_CC_SILENCE_INIT(NULL); - - if (malloc_init()) { - ret = NULL; - goto label_oom; - } if (size == 0) size = 1; - if (config_prof && opt_prof) { - usize = s2u(size); - PROF_ALLOC_PREP(1, usize, cnt); - if (cnt == NULL) { - ret = NULL; - goto label_oom; - } - if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <= - SMALL_MAXCLASS) { - ret = imalloc(SMALL_MAXCLASS+1); - if (ret != NULL) - arena_prof_promoted(ret, usize); - } else - ret = imalloc(size); - } else { - if (config_stats || (config_valgrind && opt_valgrind)) - usize = s2u(size); - ret = imalloc(size); - } + MALLOC_BODY(ret, size, usize); -label_oom: if (ret == NULL) { if (config_xmalloc && opt_xmalloc) { malloc_write(": Error in malloc(): " @@ -896,8 +936,6 @@ label_oom: } set_errno(ENOMEM); } - if (config_prof && opt_prof && ret != NULL) - prof_malloc(ret, usize, cnt); if (config_stats && ret != NULL) { assert(usize == isalloc(ret, config_prof)); thread_allocated_tsd_get()->allocated += usize; @@ -907,6 +945,42 @@ label_oom: return (ret); } +static void * +imemalign_prof_sample(size_t alignment, size_t usize, prof_thr_cnt_t *cnt) +{ + void *p; + + if (cnt == NULL) + return (NULL); + if (prof_promote && usize <= SMALL_MAXCLASS) { + assert(sa2u(SMALL_MAXCLASS+1, alignment) != 0); + p = ipalloc(sa2u(SMALL_MAXCLASS+1, alignment), alignment, + false); + if (p == NULL) + return (NULL); + arena_prof_promoted(p, usize); + } else + p = ipalloc(usize, alignment, false); + + return (p); +} + +JEMALLOC_ALWAYS_INLINE_C void * +imemalign_prof(size_t alignment, size_t usize, prof_thr_cnt_t *cnt) +{ + void *p; + + if ((uintptr_t)cnt != (uintptr_t)1U) + p = imemalign_prof_sample(alignment, usize, cnt); + else + p = ipalloc(usize, alignment, false); + if (p == NULL) + return (NULL); + prof_malloc(p, usize, cnt); + + return (p); +} + JEMALLOC_ATTR(nonnull(1)) #ifdef JEMALLOC_PROF /* @@ -916,19 +990,18 @@ JEMALLOC_ATTR(nonnull(1)) JEMALLOC_NOINLINE #endif static int -imemalign(void **memptr, size_t alignment, size_t size, - size_t min_alignment) +imemalign(void **memptr, size_t alignment, size_t size, size_t min_alignment) { int ret; size_t usize; void *result; - prof_thr_cnt_t *cnt JEMALLOC_CC_SILENCE_INIT(NULL); assert(min_alignment != 0); - if (malloc_init()) + if (malloc_init()) { result = NULL; - else { + goto label_oom; + } else { if (size == 0) size = 1; @@ -948,57 +1021,38 @@ imemalign(void **memptr, size_t alignment, size_t size, usize = sa2u(size, alignment); if (usize == 0) { result = NULL; - ret = ENOMEM; - goto label_return; + goto label_oom; } if (config_prof && opt_prof) { + prof_thr_cnt_t *cnt; + PROF_ALLOC_PREP(2, usize, cnt); - if (cnt == NULL) { - result = NULL; - ret = EINVAL; - } else { - if (prof_promote && (uintptr_t)cnt != - (uintptr_t)1U && usize <= SMALL_MAXCLASS) { - assert(sa2u(SMALL_MAXCLASS+1, - alignment) != 0); - result = ipalloc(sa2u(SMALL_MAXCLASS+1, - alignment), alignment, false); - if (result != NULL) { - arena_prof_promoted(result, - usize); - } - } else { - result = ipalloc(usize, alignment, - false); - } - } + result = imemalign_prof(alignment, usize, cnt); } else result = ipalloc(usize, alignment, false); - } - - if (result == NULL) { - if (config_xmalloc && opt_xmalloc) { - malloc_write(": Error allocating aligned " - "memory: out of memory\n"); - abort(); - } - ret = ENOMEM; - goto label_return; + if (result == NULL) + goto label_oom; } *memptr = result; ret = 0; - label_return: if (config_stats && result != NULL) { assert(usize == isalloc(result, config_prof)); thread_allocated_tsd_get()->allocated += usize; } - if (config_prof && opt_prof && result != NULL) - prof_malloc(result, usize, cnt); UTRACE(0, size, result); return (ret); +label_oom: + assert(result == NULL); + if (config_xmalloc && opt_xmalloc) { + malloc_write(": Error allocating aligned memory: " + "out of memory\n"); + abort(); + } + ret = ENOMEM; + goto label_return; } int @@ -1025,13 +1079,46 @@ je_aligned_alloc(size_t alignment, size_t size) return (ret); } +static void * +icalloc_prof_sample(size_t usize, prof_thr_cnt_t *cnt) +{ + void *p; + + if (cnt == NULL) + return (NULL); + if (prof_promote && usize <= SMALL_MAXCLASS) { + p = icalloc(SMALL_MAXCLASS+1); + if (p == NULL) + return (NULL); + arena_prof_promoted(p, usize); + } else + p = icalloc(usize); + + return (p); +} + +JEMALLOC_ALWAYS_INLINE_C void * +icalloc_prof(size_t usize, prof_thr_cnt_t *cnt) +{ + void *p; + + if ((uintptr_t)cnt != (uintptr_t)1U) + p = icalloc_prof_sample(usize, cnt); + else + p = icalloc(usize); + if (p == NULL) + return (NULL); + prof_malloc(p, usize, cnt); + + return (p); +} + void * je_calloc(size_t num, size_t size) { void *ret; size_t num_size; size_t usize JEMALLOC_CC_SILENCE_INIT(0); - prof_thr_cnt_t *cnt JEMALLOC_CC_SILENCE_INIT(NULL); if (malloc_init()) { num_size = 0; @@ -1060,19 +1147,11 @@ je_calloc(size_t num, size_t size) } if (config_prof && opt_prof) { + prof_thr_cnt_t *cnt; + usize = s2u(num_size); PROF_ALLOC_PREP(1, usize, cnt); - if (cnt == NULL) { - ret = NULL; - goto label_return; - } - if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize - <= SMALL_MAXCLASS) { - ret = icalloc(SMALL_MAXCLASS+1); - if (ret != NULL) - arena_prof_promoted(ret, usize); - } else - ret = icalloc(num_size); + ret = icalloc_prof(usize, cnt); } else { if (config_stats || (config_valgrind && opt_valgrind)) usize = s2u(num_size); @@ -1088,9 +1167,6 @@ label_return: } set_errno(ENOMEM); } - - if (config_prof && opt_prof && ret != NULL) - prof_malloc(ret, usize, cnt); if (config_stats && ret != NULL) { assert(usize == isalloc(ret, config_prof)); thread_allocated_tsd_get()->allocated += usize; @@ -1100,6 +1176,64 @@ label_return: return (ret); } +static void * +irealloc_prof_sample(void *oldptr, size_t usize, prof_thr_cnt_t *cnt) +{ + void *p; + + if (cnt == NULL) + return (NULL); + if (prof_promote && usize <= SMALL_MAXCLASS) { + p = iralloc(oldptr, SMALL_MAXCLASS+1, 0, 0, false); + if (p == NULL) + return (NULL); + arena_prof_promoted(p, usize); + } else + p = iralloc(oldptr, usize, 0, 0, false); + + return (p); +} + +JEMALLOC_ALWAYS_INLINE_C void * +irealloc_prof(void *oldptr, size_t old_usize, size_t usize, prof_thr_cnt_t *cnt) +{ + void *p; + prof_ctx_t *old_ctx; + + old_ctx = prof_ctx_get(oldptr); + if ((uintptr_t)cnt != (uintptr_t)1U) + p = irealloc_prof_sample(oldptr, usize, cnt); + else + p = iralloc(oldptr, usize, 0, 0, false); + if (p == NULL) + return (NULL); + prof_realloc(p, usize, cnt, old_usize, old_ctx); + + return (p); +} + +JEMALLOC_INLINE_C void +ifree(void *ptr) +{ + size_t usize; + UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0); + + assert(ptr != NULL); + assert(malloc_initialized || IS_INITIALIZER); + + if (config_prof && opt_prof) { + usize = isalloc(ptr, config_prof); + prof_free(ptr, usize); + } else if (config_stats || config_valgrind) + usize = isalloc(ptr, config_prof); + if (config_stats) + thread_allocated_tsd_get()->deallocated += usize; + if (config_valgrind && opt_valgrind) + rzsize = p2rz(ptr); + iqalloc(ptr); + JEMALLOC_VALGRIND_FREE(ptr, rzsize); +} + void * je_realloc(void *ptr, size_t size) { @@ -1107,136 +1241,51 @@ je_realloc(void *ptr, size_t size) size_t usize JEMALLOC_CC_SILENCE_INIT(0); size_t old_usize = 0; UNUSED size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0); - prof_thr_cnt_t *cnt JEMALLOC_CC_SILENCE_INIT(NULL); - prof_ctx_t *old_ctx JEMALLOC_CC_SILENCE_INIT(NULL); if (size == 0) { if (ptr != NULL) { - /* realloc(ptr, 0) is equivalent to free(p). */ - assert(malloc_initialized || IS_INITIALIZER); - if (config_prof) { - old_usize = isalloc(ptr, true); - if (config_valgrind && opt_valgrind) - old_rzsize = p2rz(ptr); - } else if (config_stats) { - old_usize = isalloc(ptr, false); - if (config_valgrind && opt_valgrind) - old_rzsize = u2rz(old_usize); - } else if (config_valgrind && opt_valgrind) { - old_usize = isalloc(ptr, false); - old_rzsize = u2rz(old_usize); - } - if (config_prof && opt_prof) { - old_ctx = prof_ctx_get(ptr); - cnt = NULL; - } - iqalloc(ptr); - ret = NULL; - goto label_return; - } else - size = 1; + /* realloc(ptr, 0) is equivalent to free(ptr). */ + UTRACE(ptr, 0, 0); + ifree(ptr); + return (NULL); + } + size = 1; } if (ptr != NULL) { assert(malloc_initialized || IS_INITIALIZER); malloc_thread_init(); - if (config_prof) { - old_usize = isalloc(ptr, true); - if (config_valgrind && opt_valgrind) - old_rzsize = p2rz(ptr); - } else if (config_stats) { - old_usize = isalloc(ptr, false); - if (config_valgrind && opt_valgrind) - old_rzsize = u2rz(old_usize); - } else if (config_valgrind && opt_valgrind) { - old_usize = isalloc(ptr, false); - old_rzsize = u2rz(old_usize); - } + if ((config_prof && opt_prof) || config_stats || + (config_valgrind && opt_valgrind)) + old_usize = isalloc(ptr, config_prof); + if (config_valgrind && opt_valgrind) + old_rzsize = config_prof ? p2rz(ptr) : u2rz(old_usize); + if (config_prof && opt_prof) { + prof_thr_cnt_t *cnt; + usize = s2u(size); - old_ctx = prof_ctx_get(ptr); PROF_ALLOC_PREP(1, usize, cnt); - if (cnt == NULL) { - old_ctx = NULL; - ret = NULL; - goto label_oom; - } - if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && - usize <= SMALL_MAXCLASS) { - ret = iralloc(ptr, SMALL_MAXCLASS+1, 0, 0, - false, false); - if (ret != NULL) - arena_prof_promoted(ret, usize); - else - old_ctx = NULL; - } else { - ret = iralloc(ptr, size, 0, 0, false, false); - if (ret == NULL) - old_ctx = NULL; - } + ret = irealloc_prof(ptr, old_usize, usize, cnt); } else { if (config_stats || (config_valgrind && opt_valgrind)) usize = s2u(size); - ret = iralloc(ptr, size, 0, 0, false, false); - } - -label_oom: - if (ret == NULL) { - if (config_xmalloc && opt_xmalloc) { - malloc_write(": Error in realloc(): " - "out of memory\n"); - abort(); - } - set_errno(ENOMEM); + ret = iralloc(ptr, size, 0, 0, false); } } else { /* realloc(NULL, size) is equivalent to malloc(size). */ - if (config_prof && opt_prof) - old_ctx = NULL; - if (malloc_init()) { - if (config_prof && opt_prof) - cnt = NULL; - ret = NULL; - } else { - if (config_prof && opt_prof) { - usize = s2u(size); - PROF_ALLOC_PREP(1, usize, cnt); - if (cnt == NULL) - ret = NULL; - else { - if (prof_promote && (uintptr_t)cnt != - (uintptr_t)1U && usize <= - SMALL_MAXCLASS) { - ret = imalloc(SMALL_MAXCLASS+1); - if (ret != NULL) { - arena_prof_promoted(ret, - usize); - } - } else - ret = imalloc(size); - } - } else { - if (config_stats || (config_valgrind && - opt_valgrind)) - usize = s2u(size); - ret = imalloc(size); - } - } - - if (ret == NULL) { - if (config_xmalloc && opt_xmalloc) { - malloc_write(": Error in realloc(): " - "out of memory\n"); - abort(); - } - set_errno(ENOMEM); - } + MALLOC_BODY(ret, size, usize); } -label_return: - if (config_prof && opt_prof) - prof_realloc(ret, usize, cnt, old_usize, old_ctx); + if (ret == NULL) { + if (config_xmalloc && opt_xmalloc) { + malloc_write(": Error in realloc(): " + "out of memory\n"); + abort(); + } + set_errno(ENOMEM); + } if (config_stats && ret != NULL) { thread_allocated_t *ta; assert(usize == isalloc(ret, config_prof)); @@ -1255,24 +1304,8 @@ je_free(void *ptr) { UTRACE(ptr, 0, 0); - if (ptr != NULL) { - size_t usize; - UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0); - - assert(malloc_initialized || IS_INITIALIZER); - - if (config_prof && opt_prof) { - usize = isalloc(ptr, config_prof); - prof_free(ptr, usize); - } else if (config_stats || config_valgrind) - usize = isalloc(ptr, config_prof); - if (config_stats) - thread_allocated_tsd_get()->deallocated += usize; - if (config_valgrind && opt_valgrind) - rzsize = p2rz(ptr); - iqalloc(ptr); - JEMALLOC_VALGRIND_FREE(ptr, rzsize); - } + if (ptr != NULL) + ifree(ptr); } /* @@ -1354,6 +1387,47 @@ imallocx(size_t usize, size_t alignment, bool zero, bool try_tcache, return (imalloct(usize, try_tcache, arena)); } +static void * +imallocx_prof_sample(size_t usize, size_t alignment, bool zero, bool try_tcache, + arena_t *arena, prof_thr_cnt_t *cnt) +{ + void *p; + + if (cnt == NULL) + return (NULL); + if (prof_promote && usize <= SMALL_MAXCLASS) { + size_t usize_promoted = (alignment == 0) ? + s2u(SMALL_MAXCLASS+1) : sa2u(SMALL_MAXCLASS+1, alignment); + assert(usize_promoted != 0); + p = imallocx(usize_promoted, alignment, zero, try_tcache, + arena); + if (p == NULL) + return (NULL); + arena_prof_promoted(p, usize); + } else + p = imallocx(usize, alignment, zero, try_tcache, arena); + + return (p); +} + +JEMALLOC_ALWAYS_INLINE_C void * +imallocx_prof(size_t usize, size_t alignment, bool zero, bool try_tcache, + arena_t *arena, prof_thr_cnt_t *cnt) +{ + void *p; + + if ((uintptr_t)cnt != (uintptr_t)1U) { + p = imallocx_prof_sample(usize, alignment, zero, try_tcache, + arena, cnt); + } else + p = imallocx(usize, alignment, zero, try_tcache, arena); + if (p == NULL) + return (NULL); + prof_malloc(p, usize, cnt); + + return (p); +} + void * je_mallocx(size_t size, int flags) { @@ -1380,37 +1454,18 @@ je_mallocx(size_t size, int flags) } usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment); - if (usize == 0) - goto label_oom; + assert(usize != 0); if (config_prof && opt_prof) { prof_thr_cnt_t *cnt; PROF_ALLOC_PREP(1, usize, cnt); - if (cnt == NULL) - goto label_oom; - if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <= - SMALL_MAXCLASS) { - size_t usize_promoted = (alignment == 0) ? - s2u(SMALL_MAXCLASS+1) : sa2u(SMALL_MAXCLASS+1, - alignment); - assert(usize_promoted != 0); - p = imallocx(usize_promoted, alignment, zero, - try_tcache, arena); - if (p == NULL) - goto label_oom; - arena_prof_promoted(p, usize); - } else { - p = imallocx(usize, alignment, zero, try_tcache, arena); - if (p == NULL) - goto label_oom; - } - prof_malloc(p, usize, cnt); - } else { + p = imallocx_prof(usize, alignment, zero, try_tcache, arena, + cnt); + } else p = imallocx(usize, alignment, zero, try_tcache, arena); - if (p == NULL) - goto label_oom; - } + if (p == NULL) + goto label_oom; if (config_stats) { assert(usize == isalloc(p, config_prof)); @@ -1428,6 +1483,65 @@ label_oom: return (NULL); } +static void * +irallocx_prof_sample(void *oldptr, size_t size, size_t alignment, size_t usize, + bool zero, bool try_tcache_alloc, bool try_tcache_dalloc, arena_t *arena, + prof_thr_cnt_t *cnt) +{ + void *p; + + if (cnt == NULL) + return (NULL); + if (prof_promote && usize <= SMALL_MAXCLASS) { + p = iralloct(oldptr, SMALL_MAXCLASS+1, (SMALL_MAXCLASS+1 >= + size) ? 0 : size - (SMALL_MAXCLASS+1), alignment, zero, + try_tcache_alloc, try_tcache_dalloc, arena); + if (p == NULL) + return (NULL); + arena_prof_promoted(p, usize); + } else { + p = iralloct(oldptr, size, 0, alignment, zero, + try_tcache_alloc, try_tcache_dalloc, arena); + } + + return (p); +} + +JEMALLOC_ALWAYS_INLINE_C void * +irallocx_prof(void *oldptr, size_t old_usize, size_t size, size_t alignment, + size_t *usize, bool zero, bool try_tcache_alloc, bool try_tcache_dalloc, + arena_t *arena, prof_thr_cnt_t *cnt) +{ + void *p; + prof_ctx_t *old_ctx; + + old_ctx = prof_ctx_get(oldptr); + if ((uintptr_t)cnt != (uintptr_t)1U) + p = irallocx_prof_sample(oldptr, size, alignment, *usize, zero, + try_tcache_alloc, try_tcache_dalloc, arena, cnt); + else { + p = iralloct(oldptr, size, 0, alignment, zero, + try_tcache_alloc, try_tcache_dalloc, arena); + } + if (p == NULL) + return (NULL); + + if (p == oldptr && alignment != 0) { + /* + * The allocation did not move, so it is possible that the size + * class is smaller than would guarantee the requested + * alignment, and that the alignment constraint was + * serendipitously satisfied. Additionally, old_usize may not + * be the same as the current usize because of in-place large + * reallocation. Therefore, query the actual value of usize. + */ + *usize = isalloc(p, config_prof); + } + prof_realloc(p, *usize, cnt, old_usize, old_ctx); + + return (p); +} + void * je_rallocx(void *ptr, size_t size, int flags) { @@ -1459,59 +1573,25 @@ je_rallocx(void *ptr, size_t size, int flags) arena = NULL; } + if ((config_prof && opt_prof) || config_stats || + (config_valgrind && opt_valgrind)) + old_usize = isalloc(ptr, config_prof); + if (config_valgrind && opt_valgrind) + old_rzsize = u2rz(old_usize); + if (config_prof && opt_prof) { prof_thr_cnt_t *cnt; - usize = (alignment == 0) ? s2u(size) : sa2u(size, - alignment); - prof_ctx_t *old_ctx = prof_ctx_get(ptr); - old_usize = isalloc(ptr, true); - if (config_valgrind && opt_valgrind) - old_rzsize = p2rz(ptr); + usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment); + assert(usize != 0); PROF_ALLOC_PREP(1, usize, cnt); - if (cnt == NULL) + p = irallocx_prof(ptr, old_usize, size, alignment, &usize, zero, + try_tcache_alloc, try_tcache_dalloc, arena, cnt); + if (p == NULL) goto label_oom; - if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <= - SMALL_MAXCLASS) { - p = iralloct(ptr, SMALL_MAXCLASS+1, (SMALL_MAXCLASS+1 >= - size) ? 0 : size - (SMALL_MAXCLASS+1), alignment, - zero, false, try_tcache_alloc, try_tcache_dalloc, - arena); - if (p == NULL) - goto label_oom; - if (usize < PAGE) - arena_prof_promoted(p, usize); - } else { - p = iralloct(ptr, size, 0, alignment, zero, false, - try_tcache_alloc, try_tcache_dalloc, arena); - if (p == NULL) - goto label_oom; - } - if (p == ptr && alignment != 0) { - /* - * The allocation did not move, so it is possible that - * the size class is smaller than would guarantee the - * requested alignment, and that the alignment - * constraint was serendipitously satisfied. - * Additionally, old_usize may not be the same as the - * current usize because of in-place large - * reallocation. Therefore, query the actual value of - * usize. - */ - usize = isalloc(p, true); - } - prof_realloc(p, usize, cnt, old_usize, old_ctx); } else { - if (config_stats) { - old_usize = isalloc(ptr, false); - if (config_valgrind && opt_valgrind) - old_rzsize = u2rz(old_usize); - } else if (config_valgrind && opt_valgrind) { - old_usize = isalloc(ptr, false); - old_rzsize = u2rz(old_usize); - } - p = iralloct(ptr, size, 0, alignment, zero, false, - try_tcache_alloc, try_tcache_dalloc, arena); + p = iralloct(ptr, size, 0, alignment, zero, try_tcache_alloc, + try_tcache_dalloc, arena); if (p == NULL) goto label_oom; if (config_stats || (config_valgrind && opt_valgrind)) @@ -1536,6 +1616,69 @@ label_oom: return (NULL); } +JEMALLOC_ALWAYS_INLINE_C size_t +ixallocx_helper(void *ptr, size_t old_usize, size_t size, size_t extra, + size_t alignment, bool zero, arena_t *arena) +{ + size_t usize; + + if (ixalloc(ptr, size, extra, alignment, zero)) + return (old_usize); + usize = isalloc(ptr, config_prof); + + return (usize); +} + +static size_t +ixallocx_prof_sample(void *ptr, size_t old_usize, size_t size, size_t extra, + size_t alignment, size_t max_usize, bool zero, arena_t *arena, + prof_thr_cnt_t *cnt) +{ + size_t usize; + + if (cnt == NULL) + return (old_usize); + /* Use minimum usize to determine whether promotion may happen. */ + if (prof_promote && ((alignment == 0) ? s2u(size) : sa2u(size, + alignment)) <= SMALL_MAXCLASS) { + if (ixalloc(ptr, SMALL_MAXCLASS+1, (SMALL_MAXCLASS+1 >= + size+extra) ? 0 : size+extra - (SMALL_MAXCLASS+1), + alignment, zero)) + return (old_usize); + usize = isalloc(ptr, config_prof); + if (max_usize < PAGE) + arena_prof_promoted(ptr, usize); + } else { + usize = ixallocx_helper(ptr, old_usize, size, extra, alignment, + zero, arena); + } + + return (usize); +} + +JEMALLOC_ALWAYS_INLINE_C size_t +ixallocx_prof(void *ptr, size_t old_usize, size_t size, size_t extra, + size_t alignment, size_t max_usize, bool zero, arena_t *arena, + prof_thr_cnt_t *cnt) +{ + size_t usize; + prof_ctx_t *old_ctx; + + old_ctx = prof_ctx_get(ptr); + if ((uintptr_t)cnt != (uintptr_t)1U) { + usize = ixallocx_prof_sample(ptr, old_usize, size, extra, + alignment, zero, max_usize, arena, cnt); + } else { + usize = ixallocx_helper(ptr, old_usize, size, extra, alignment, + zero, arena); + } + if (usize == old_usize) + return (usize); + prof_realloc(ptr, usize, cnt, old_usize, old_ctx); + + return (usize); +} + size_t je_xallocx(void *ptr, size_t size, size_t extra, int flags) { @@ -1545,7 +1688,6 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) & (SIZE_T_MAX-1)); bool zero = flags & MALLOCX_ZERO; unsigned arena_ind = ((unsigned)(flags >> 8)) - 1; - bool try_tcache_alloc, try_tcache_dalloc; arena_t *arena; assert(ptr != NULL); @@ -1556,22 +1698,19 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) if (arena_ind != UINT_MAX) { arena_chunk_t *chunk; - try_tcache_alloc = false; chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - try_tcache_dalloc = (chunk == ptr || chunk->arena != - arenas[arena_ind]); arena = arenas[arena_ind]; - } else { - try_tcache_alloc = true; - try_tcache_dalloc = true; + } else arena = NULL; - } + + old_usize = isalloc(ptr, config_prof); + if (config_valgrind && opt_valgrind) + old_rzsize = u2rz(old_usize); if (config_prof && opt_prof) { prof_thr_cnt_t *cnt; - /* - * usize isn't knowable before iralloc() returns when extra is + * usize isn't knowable before ixalloc() returns when extra is * non-zero. Therefore, compute its maximum possible value and * use that in PROF_ALLOC_PREP() to decide whether to capture a * backtrace. prof_realloc() will use the actual usize to @@ -1579,60 +1718,15 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) */ size_t max_usize = (alignment == 0) ? s2u(size+extra) : sa2u(size+extra, alignment); - prof_ctx_t *old_ctx = prof_ctx_get(ptr); - old_usize = isalloc(ptr, true); - if (config_valgrind && opt_valgrind) - old_rzsize = p2rz(ptr); PROF_ALLOC_PREP(1, max_usize, cnt); - if (cnt == NULL) { - usize = old_usize; - goto label_not_moved; - } - /* - * Use minimum usize to determine whether promotion may happen. - */ - if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U - && ((alignment == 0) ? s2u(size) : sa2u(size, alignment)) - <= SMALL_MAXCLASS) { - if (iralloct(ptr, SMALL_MAXCLASS+1, (SMALL_MAXCLASS+1 >= - size+extra) ? 0 : size+extra - (SMALL_MAXCLASS+1), - alignment, zero, true, try_tcache_alloc, - try_tcache_dalloc, arena) == NULL) { - usize = old_usize; - goto label_not_moved; - } - usize = isalloc(ptr, true); - if (max_usize < PAGE) - arena_prof_promoted(ptr, usize); - } else { - if (iralloct(ptr, size, extra, alignment, zero, true, - try_tcache_alloc, try_tcache_dalloc, arena) == - NULL) { - usize = old_usize; - goto label_not_moved; - } - usize = isalloc(ptr, true); - } - prof_realloc(ptr, usize, cnt, old_usize, old_ctx); + usize = ixallocx_prof(ptr, old_usize, size, extra, alignment, + max_usize, zero, arena, cnt); } else { - if (config_stats) { - old_usize = isalloc(ptr, false); - if (config_valgrind && opt_valgrind) - old_rzsize = u2rz(old_usize); - } else if (config_valgrind && opt_valgrind) { - old_usize = isalloc(ptr, false); - old_rzsize = u2rz(old_usize); - } - if (iralloct(ptr, size, extra, alignment, zero, true, - try_tcache_alloc, try_tcache_dalloc, arena) == NULL) { - if (config_stats == false && (config_valgrind == false - || opt_valgrind == false)) - old_usize = isalloc(ptr, false); - usize = old_usize; - goto label_not_moved; - } - usize = isalloc(ptr, config_prof); + usize = ixallocx_helper(ptr, old_usize, size, extra, alignment, + zero, arena); } + if (usize == old_usize) + goto label_not_resized; if (config_stats) { thread_allocated_t *ta; @@ -1641,7 +1735,7 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) ta->deallocated += old_usize; } JEMALLOC_VALGRIND_REALLOC(ptr, usize, ptr, old_usize, old_rzsize, zero); -label_not_moved: +label_not_resized: UTRACE(ptr, size, ptr); return (usize); } @@ -1711,6 +1805,7 @@ je_nallocx(size_t size, int flags) return (0); usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment); + assert(usize != 0); return (usize); } diff --git a/test/integration/allocm.c b/test/integration/allocm.c index 3886280a..bd7a3ca5 100644 --- a/test/integration/allocm.c +++ b/test/integration/allocm.c @@ -44,23 +44,6 @@ TEST_BEGIN(test_alignment_errors) void *p; size_t nsz, rsz, sz, alignment; -#if LG_SIZEOF_PTR == 3 - alignment = UINT64_C(0x8000000000000000); - sz = UINT64_C(0x8000000000000000); -#else - alignment = 0x80000000LU; - sz = 0x80000000LU; -#endif - nsz = 0; - assert_d_ne(nallocm(&nsz, sz, ALLOCM_ALIGN(alignment)), ALLOCM_SUCCESS, - "Expected error for nallocm(&nsz, %zu, %#x)", - sz, ALLOCM_ALIGN(alignment)); - rsz = 0; - assert_d_ne(allocm(&p, &rsz, sz, ALLOCM_ALIGN(alignment)), - ALLOCM_SUCCESS, "Expected error for allocm(&p, %zu, %#x)", - sz, ALLOCM_ALIGN(alignment)); - assert_zu_eq(nsz, rsz, "nallocm()/allocm() rsize mismatch"); - #if LG_SIZEOF_PTR == 3 alignment = UINT64_C(0x4000000000000000); sz = UINT64_C(0x8400000000000001); @@ -75,22 +58,6 @@ TEST_BEGIN(test_alignment_errors) assert_d_ne(allocm(&p, &rsz, sz, ALLOCM_ALIGN(alignment)), ALLOCM_SUCCESS, "Expected error for allocm(&p, %zu, %#x)", sz, ALLOCM_ALIGN(alignment)); - - alignment = 0x10LU; -#if LG_SIZEOF_PTR == 3 - sz = UINT64_C(0xfffffffffffffff0); -#else - sz = 0xfffffff0LU; -#endif - nsz = 0; - assert_d_ne(nallocm(&nsz, sz, ALLOCM_ALIGN(alignment)), ALLOCM_SUCCESS, - "Expected error for nallocm(&nsz, %zu, %#x)", - sz, ALLOCM_ALIGN(alignment)); - rsz = 0; - assert_d_ne(allocm(&p, &rsz, sz, ALLOCM_ALIGN(alignment)), - ALLOCM_SUCCESS, "Expected error for allocm(&p, %zu, %#x)", - sz, ALLOCM_ALIGN(alignment)); - assert_zu_eq(nsz, rsz, "nallocm()/allocm() rsize mismatch"); } TEST_END diff --git a/test/integration/mallocx.c b/test/integration/mallocx.c index f12855e7..c26f6c56 100644 --- a/test/integration/mallocx.c +++ b/test/integration/mallocx.c @@ -39,20 +39,6 @@ TEST_BEGIN(test_alignment_errors) void *p; size_t nsz, sz, alignment; -#if LG_SIZEOF_PTR == 3 - alignment = UINT64_C(0x8000000000000000); - sz = UINT64_C(0x8000000000000000); -#else - alignment = 0x80000000LU; - sz = 0x80000000LU; -#endif - nsz = nallocx(sz, MALLOCX_ALIGN(alignment)); - assert_zu_eq(nsz, 0, "Expected error for nallocx(%zu, %#x)", sz, - MALLOCX_ALIGN(alignment)); - p = mallocx(sz, MALLOCX_ALIGN(alignment)); - assert_ptr_null(p, "Expected error for mallocx(%zu, %#x)", sz, - MALLOCX_ALIGN(alignment)); - #if LG_SIZEOF_PTR == 3 alignment = UINT64_C(0x4000000000000000); sz = UINT64_C(0x8400000000000001); @@ -65,22 +51,6 @@ TEST_BEGIN(test_alignment_errors) p = mallocx(sz, MALLOCX_ALIGN(alignment)); assert_ptr_null(p, "Expected error for mallocx(%zu, %#x)", sz, MALLOCX_ALIGN(alignment)); - - alignment = 0x10LU; -#if LG_SIZEOF_PTR == 3 - sz = UINT64_C(0xfffffffffffffff0); -#else - sz = 0xfffffff0LU; -#endif - nsz = nallocx(sz, MALLOCX_ALIGN(alignment)); - assert_zu_eq(nsz, 0, "Expected error for nallocx(%zu, %#x)", sz, - MALLOCX_ALIGN(alignment)); - nsz = nallocx(sz, MALLOCX_ALIGN(alignment)); - assert_zu_eq(nsz, 0, "Expected error for nallocx(%zu, %#x)", sz, - MALLOCX_ALIGN(alignment)); - p = mallocx(sz, MALLOCX_ALIGN(alignment)); - assert_ptr_null(p, "Expected error for mallocx(%zu, %#x)", sz, - MALLOCX_ALIGN(alignment)); } TEST_END From aa5113b1fdafd1129c22512837c6c3d66c295fc8 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 14 Jan 2014 16:23:03 -0800 Subject: [PATCH 0373/3378] Refactor overly large/complex functions. Refactor overly large functions by breaking out helper functions. Refactor overly complex multi-purpose functions into separate more specific functions. --- include/jemalloc/internal/arena.h | 1 + src/arena.c | 990 ++++++++++++++++-------------- 2 files changed, 535 insertions(+), 456 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 20dfd8cc..9d000c03 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -158,6 +158,7 @@ struct arena_chunk_map_s { }; typedef rb_tree(arena_chunk_map_t) arena_avail_tree_t; typedef rb_tree(arena_chunk_map_t) arena_run_tree_t; +typedef ql_head(arena_chunk_map_t) arena_chunk_mapelms_t; /* Arena chunk header. */ struct arena_chunk_s { diff --git a/src/arena.c b/src/arena.c index ca5b4fe4..4da6d50c 100644 --- a/src/arena.c +++ b/src/arena.c @@ -38,56 +38,18 @@ const uint8_t small_size2bin[] = { }; /******************************************************************************/ -/* Function prototypes for non-inline static functions. */ +/* + * Function prototypes for static functions that are referenced prior to + * definition. + */ -static void arena_avail_insert(arena_t *arena, arena_chunk_t *chunk, - size_t pageind, size_t npages, bool maybe_adjac_pred, - bool maybe_adjac_succ); -static void arena_avail_remove(arena_t *arena, arena_chunk_t *chunk, - size_t pageind, size_t npages, bool maybe_adjac_pred, - bool maybe_adjac_succ); -static void arena_run_split_helper(arena_t *arena, arena_run_t *run, - size_t size, bool large, size_t binind, bool remove, bool zero); -static void arena_run_split(arena_t *arena, arena_run_t *run, size_t size, - bool large, size_t binind, bool zero); -static void arena_run_init(arena_t *arena, arena_run_t *run, size_t size, - bool large, size_t binind, bool zero); -static arena_chunk_t *arena_chunk_alloc(arena_t *arena); -static void arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk); -static arena_run_t *arena_run_alloc_helper(arena_t *arena, size_t size, - bool large, size_t binind, bool zero); -static arena_run_t *arena_run_alloc(arena_t *arena, size_t size, bool large, - size_t binind, bool zero); -static arena_chunk_t *chunks_dirty_iter_cb(arena_chunk_tree_t *tree, - arena_chunk_t *chunk, void *arg); static void arena_purge(arena_t *arena, bool all); static void arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned); -static void arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, - arena_run_t *run, size_t oldsize, size_t newsize); -static void arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, - arena_run_t *run, size_t oldsize, size_t newsize, bool dirty); -static arena_run_t *arena_bin_runs_first(arena_bin_t *bin); -static void arena_bin_runs_insert(arena_bin_t *bin, arena_run_t *run); -static void arena_bin_runs_remove(arena_bin_t *bin, arena_run_t *run); -static arena_run_t *arena_bin_nonfull_run_tryget(arena_bin_t *bin); -static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin); -static void *arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin); -static void arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run, - arena_bin_t *bin); static void arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, arena_bin_t *bin); static void arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, arena_bin_t *bin); -static void arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, - void *ptr, size_t oldsize, size_t size); -static bool arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, - void *ptr, size_t oldsize, size_t size, size_t extra, bool zero); -static bool arena_ralloc_large(void *ptr, size_t oldsize, size_t size, - size_t extra, bool zero); -static size_t bin_info_run_size_calc(arena_bin_info_t *bin_info, - size_t min_run_size); -static void bin_info_init(void); /******************************************************************************/ @@ -392,16 +354,65 @@ arena_run_page_validate_zeroed(arena_chunk_t *chunk, size_t run_ind) } static void -arena_run_split_helper(arena_t *arena, arena_run_t *run, size_t size, - bool large, size_t binind, bool remove, bool zero) +arena_cactive_update(arena_t *arena, size_t add_pages, size_t sub_pages) +{ + + if (config_stats) { + ssize_t cactive_diff = CHUNK_CEILING((arena->nactive + + add_pages) << LG_PAGE) - CHUNK_CEILING((arena->nactive - + sub_pages) << LG_PAGE); + if (cactive_diff != 0) + stats_cactive_add(cactive_diff); + } +} + +static void +arena_run_split_remove(arena_t *arena, arena_chunk_t *chunk, size_t run_ind, + size_t flag_dirty, size_t need_pages) +{ + size_t total_pages, rem_pages; + + total_pages = arena_mapbits_unallocated_size_get(chunk, run_ind) >> + LG_PAGE; + assert(arena_mapbits_dirty_get(chunk, run_ind+total_pages-1) == + flag_dirty); + assert(need_pages <= total_pages); + rem_pages = total_pages - need_pages; + + arena_avail_remove(arena, chunk, run_ind, total_pages, true, true); + arena_cactive_update(arena, need_pages, 0); + arena->nactive += need_pages; + + /* Keep track of trailing unused pages for later use. */ + if (rem_pages > 0) { + if (flag_dirty != 0) { + arena_mapbits_unallocated_set(chunk, + run_ind+need_pages, (rem_pages << LG_PAGE), + flag_dirty); + arena_mapbits_unallocated_set(chunk, + run_ind+total_pages-1, (rem_pages << LG_PAGE), + flag_dirty); + } else { + arena_mapbits_unallocated_set(chunk, run_ind+need_pages, + (rem_pages << LG_PAGE), + arena_mapbits_unzeroed_get(chunk, + run_ind+need_pages)); + arena_mapbits_unallocated_set(chunk, + run_ind+total_pages-1, (rem_pages << LG_PAGE), + arena_mapbits_unzeroed_get(chunk, + run_ind+total_pages-1)); + } + arena_avail_insert(arena, chunk, run_ind+need_pages, rem_pages, + false, true); + } +} + +static void +arena_run_split_large_helper(arena_t *arena, arena_run_t *run, size_t size, + bool remove, bool zero) { arena_chunk_t *chunk; - size_t run_ind, need_pages, i; - size_t flag_dirty; - - assert(large || remove); - assert((large && binind == BININD_INVALID) || (large == false && binind - != BININD_INVALID)); + size_t flag_dirty, run_ind, need_pages, i; chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE); @@ -410,229 +421,201 @@ arena_run_split_helper(arena_t *arena, arena_run_t *run, size_t size, assert(need_pages > 0); if (remove) { - size_t total_pages, rem_pages; - - total_pages = arena_mapbits_unallocated_size_get(chunk, - run_ind) >> LG_PAGE; - assert(arena_mapbits_dirty_get(chunk, run_ind+total_pages-1) == - flag_dirty); - assert(need_pages <= total_pages); - rem_pages = total_pages - need_pages; - - arena_avail_remove(arena, chunk, run_ind, total_pages, true, - true); - if (config_stats) { - /* - * Update stats_cactive if nactive is crossing a chunk - * multiple. - */ - size_t cactive_diff = CHUNK_CEILING((arena->nactive + - need_pages) << LG_PAGE) - - CHUNK_CEILING(arena->nactive << LG_PAGE); - if (cactive_diff != 0) - stats_cactive_add(cactive_diff); - } - arena->nactive += need_pages; - - /* Keep track of trailing unused pages for later use. */ - if (rem_pages > 0) { - if (flag_dirty != 0) { - arena_mapbits_unallocated_set(chunk, - run_ind+need_pages, (rem_pages << LG_PAGE), - flag_dirty); - arena_mapbits_unallocated_set(chunk, - run_ind+total_pages-1, (rem_pages << - LG_PAGE), flag_dirty); - } else { - arena_mapbits_unallocated_set(chunk, - run_ind+need_pages, (rem_pages << LG_PAGE), - arena_mapbits_unzeroed_get(chunk, - run_ind+need_pages)); - arena_mapbits_unallocated_set(chunk, - run_ind+total_pages-1, (rem_pages << - LG_PAGE), arena_mapbits_unzeroed_get(chunk, - run_ind+total_pages-1)); - } - arena_avail_insert(arena, chunk, run_ind+need_pages, - rem_pages, false, true); - } + arena_run_split_remove(arena, chunk, run_ind, flag_dirty, + need_pages); } - /* - * Update the page map separately for large vs. small runs, since it is - * possible to avoid iteration for large mallocs. - */ - if (large) { - if (zero) { - if (flag_dirty == 0) { - /* - * The run is clean, so some pages may be - * zeroed (i.e. never before touched). - */ - for (i = 0; i < need_pages; i++) { - if (arena_mapbits_unzeroed_get(chunk, - run_ind+i) != 0) { - arena_run_zero(chunk, run_ind+i, - 1); - } else if (config_debug) { - arena_run_page_validate_zeroed( - chunk, run_ind+i); - } else { - arena_run_page_mark_zeroed( - chunk, run_ind+i); - } + if (zero) { + if (flag_dirty == 0) { + /* + * The run is clean, so some pages may be zeroed (i.e. + * never before touched). + */ + for (i = 0; i < need_pages; i++) { + if (arena_mapbits_unzeroed_get(chunk, run_ind+i) + != 0) + arena_run_zero(chunk, run_ind+i, 1); + else if (config_debug) { + arena_run_page_validate_zeroed(chunk, + run_ind+i); + } else { + arena_run_page_mark_zeroed(chunk, + run_ind+i); } - } else { - /* - * The run is dirty, so all pages must be - * zeroed. - */ - arena_run_zero(chunk, run_ind, need_pages); } } else { - VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk + - (run_ind << LG_PAGE)), (need_pages << LG_PAGE)); + /* The run is dirty, so all pages must be zeroed. */ + arena_run_zero(chunk, run_ind, need_pages); } - - /* - * Set the last element first, in case the run only contains one - * page (i.e. both statements set the same element). - */ - arena_mapbits_large_set(chunk, run_ind+need_pages-1, 0, - flag_dirty); - arena_mapbits_large_set(chunk, run_ind, size, flag_dirty); } else { - assert(zero == false); - /* - * Propagate the dirty and unzeroed flags to the allocated - * small run, so that arena_dalloc_bin_run() has the ability to - * conditionally trim clean pages. - */ - arena_mapbits_small_set(chunk, run_ind, 0, binind, flag_dirty); - /* - * The first page will always be dirtied during small run - * initialization, so a validation failure here would not - * actually cause an observable failure. - */ - if (config_debug && flag_dirty == 0 && - arena_mapbits_unzeroed_get(chunk, run_ind) == 0) - arena_run_page_validate_zeroed(chunk, run_ind); - for (i = 1; i < need_pages - 1; i++) { - arena_mapbits_small_set(chunk, run_ind+i, i, binind, 0); - if (config_debug && flag_dirty == 0 && - arena_mapbits_unzeroed_get(chunk, run_ind+i) == 0) { - arena_run_page_validate_zeroed(chunk, - run_ind+i); - } - } - arena_mapbits_small_set(chunk, run_ind+need_pages-1, - need_pages-1, binind, flag_dirty); - if (config_debug && flag_dirty == 0 && - arena_mapbits_unzeroed_get(chunk, run_ind+need_pages-1) == - 0) { - arena_run_page_validate_zeroed(chunk, - run_ind+need_pages-1); - } VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk + (run_ind << LG_PAGE)), (need_pages << LG_PAGE)); } + + /* + * Set the last element first, in case the run only contains one page + * (i.e. both statements set the same element). + */ + arena_mapbits_large_set(chunk, run_ind+need_pages-1, 0, flag_dirty); + arena_mapbits_large_set(chunk, run_ind, size, flag_dirty); } static void -arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, - size_t binind, bool zero) +arena_run_split_large(arena_t *arena, arena_run_t *run, size_t size, bool zero) { - arena_run_split_helper(arena, run, size, large, binind, true, zero); + arena_run_split_large_helper(arena, run, size, true, zero); } static void -arena_run_init(arena_t *arena, arena_run_t *run, size_t size, bool large, - size_t binind, bool zero) +arena_run_init_large(arena_t *arena, arena_run_t *run, size_t size, bool zero) { - arena_run_split_helper(arena, run, size, large, binind, false, zero); + arena_run_split_large_helper(arena, run, size, false, zero); +} + +static void +arena_run_split_small(arena_t *arena, arena_run_t *run, size_t size, + size_t binind) +{ + arena_chunk_t *chunk; + size_t flag_dirty, run_ind, need_pages, i; + + assert(binind != BININD_INVALID); + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); + run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE); + flag_dirty = arena_mapbits_dirty_get(chunk, run_ind); + need_pages = (size >> LG_PAGE); + assert(need_pages > 0); + + arena_run_split_remove(arena, chunk, run_ind, flag_dirty, need_pages); + + /* + * Propagate the dirty and unzeroed flags to the allocated small run, + * so that arena_dalloc_bin_run() has the ability to conditionally trim + * clean pages. + */ + arena_mapbits_small_set(chunk, run_ind, 0, binind, flag_dirty); + /* + * The first page will always be dirtied during small run + * initialization, so a validation failure here would not actually + * cause an observable failure. + */ + if (config_debug && flag_dirty == 0 && arena_mapbits_unzeroed_get(chunk, + run_ind) == 0) + arena_run_page_validate_zeroed(chunk, run_ind); + for (i = 1; i < need_pages - 1; i++) { + arena_mapbits_small_set(chunk, run_ind+i, i, binind, 0); + if (config_debug && flag_dirty == 0 && + arena_mapbits_unzeroed_get(chunk, run_ind+i) == 0) + arena_run_page_validate_zeroed(chunk, run_ind+i); + } + arena_mapbits_small_set(chunk, run_ind+need_pages-1, need_pages-1, + binind, flag_dirty); + if (config_debug && flag_dirty == 0 && arena_mapbits_unzeroed_get(chunk, + run_ind+need_pages-1) == 0) + arena_run_page_validate_zeroed(chunk, run_ind+need_pages-1); + VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk + + (run_ind << LG_PAGE)), (need_pages << LG_PAGE)); +} + +static arena_chunk_t * +arena_chunk_init_spare(arena_t *arena) +{ + arena_chunk_t *chunk; + + assert(arena->spare != NULL); + + chunk = arena->spare; + arena->spare = NULL; + + assert(arena_mapbits_allocated_get(chunk, map_bias) == 0); + assert(arena_mapbits_allocated_get(chunk, chunk_npages-1) == 0); + assert(arena_mapbits_unallocated_size_get(chunk, map_bias) == + arena_maxclass); + assert(arena_mapbits_unallocated_size_get(chunk, chunk_npages-1) == + arena_maxclass); + assert(arena_mapbits_dirty_get(chunk, map_bias) == + arena_mapbits_dirty_get(chunk, chunk_npages-1)); + + return (chunk); +} + +static arena_chunk_t * +arena_chunk_init_hard(arena_t *arena) +{ + arena_chunk_t *chunk; + bool zero; + size_t unzeroed, i; + + assert(arena->spare == NULL); + + zero = false; + malloc_mutex_unlock(&arena->lock); + chunk = (arena_chunk_t *)chunk_alloc(chunksize, chunksize, false, + &zero, arena->dss_prec); + malloc_mutex_lock(&arena->lock); + if (chunk == NULL) + return (NULL); + if (config_stats) + arena->stats.mapped += chunksize; + + chunk->arena = arena; + + /* + * Claim that no pages are in use, since the header is merely overhead. + */ + chunk->ndirty = 0; + + chunk->nruns_avail = 0; + chunk->nruns_adjac = 0; + + /* + * Initialize the map to contain one maximal free untouched run. Mark + * the pages as zeroed iff chunk_alloc() returned a zeroed chunk. + */ + unzeroed = zero ? 0 : CHUNK_MAP_UNZEROED; + arena_mapbits_unallocated_set(chunk, map_bias, arena_maxclass, + unzeroed); + /* + * There is no need to initialize the internal page map entries unless + * the chunk is not zeroed. + */ + if (zero == false) { + VALGRIND_MAKE_MEM_UNDEFINED((void *)arena_mapp_get(chunk, + map_bias+1), (size_t)((uintptr_t) arena_mapp_get(chunk, + chunk_npages-1) - (uintptr_t)arena_mapp_get(chunk, + map_bias+1))); + for (i = map_bias+1; i < chunk_npages-1; i++) + arena_mapbits_unzeroed_set(chunk, i, unzeroed); + } else { + VALGRIND_MAKE_MEM_DEFINED((void *)arena_mapp_get(chunk, + map_bias+1), (size_t)((uintptr_t) arena_mapp_get(chunk, + chunk_npages-1) - (uintptr_t)arena_mapp_get(chunk, + map_bias+1))); + if (config_debug) { + for (i = map_bias+1; i < chunk_npages-1; i++) { + assert(arena_mapbits_unzeroed_get(chunk, i) == + unzeroed); + } + } + } + arena_mapbits_unallocated_set(chunk, chunk_npages-1, arena_maxclass, + unzeroed); + + return (chunk); } static arena_chunk_t * arena_chunk_alloc(arena_t *arena) { arena_chunk_t *chunk; - size_t i; - if (arena->spare != NULL) { - chunk = arena->spare; - arena->spare = NULL; - - assert(arena_mapbits_allocated_get(chunk, map_bias) == 0); - assert(arena_mapbits_allocated_get(chunk, chunk_npages-1) == 0); - assert(arena_mapbits_unallocated_size_get(chunk, map_bias) == - arena_maxclass); - assert(arena_mapbits_unallocated_size_get(chunk, - chunk_npages-1) == arena_maxclass); - assert(arena_mapbits_dirty_get(chunk, map_bias) == - arena_mapbits_dirty_get(chunk, chunk_npages-1)); - } else { - bool zero; - size_t unzeroed; - - zero = false; - malloc_mutex_unlock(&arena->lock); - chunk = (arena_chunk_t *)chunk_alloc(chunksize, chunksize, - false, &zero, arena->dss_prec); - malloc_mutex_lock(&arena->lock); - if (chunk == NULL) - return (NULL); - if (config_stats) - arena->stats.mapped += chunksize; - - chunk->arena = arena; - - /* - * Claim that no pages are in use, since the header is merely - * overhead. - */ - chunk->ndirty = 0; - - chunk->nruns_avail = 0; - chunk->nruns_adjac = 0; - - /* - * Initialize the map to contain one maximal free untouched run. - * Mark the pages as zeroed iff chunk_alloc() returned a zeroed - * chunk. - */ - unzeroed = zero ? 0 : CHUNK_MAP_UNZEROED; - arena_mapbits_unallocated_set(chunk, map_bias, arena_maxclass, - unzeroed); - /* - * There is no need to initialize the internal page map entries - * unless the chunk is not zeroed. - */ - if (zero == false) { - VALGRIND_MAKE_MEM_UNDEFINED( - (void *)arena_mapp_get(chunk, map_bias+1), - (size_t)((uintptr_t) arena_mapp_get(chunk, - chunk_npages-1) - (uintptr_t)arena_mapp_get(chunk, - map_bias+1))); - for (i = map_bias+1; i < chunk_npages-1; i++) - arena_mapbits_unzeroed_set(chunk, i, unzeroed); - } else { - VALGRIND_MAKE_MEM_DEFINED( - (void *)arena_mapp_get(chunk, map_bias+1), - (size_t)((uintptr_t) arena_mapp_get(chunk, - chunk_npages-1) - (uintptr_t)arena_mapp_get(chunk, - map_bias+1))); - if (config_debug) { - for (i = map_bias+1; i < chunk_npages-1; i++) { - assert(arena_mapbits_unzeroed_get(chunk, - i) == unzeroed); - } - } - } - arena_mapbits_unallocated_set(chunk, chunk_npages-1, - arena_maxclass, unzeroed); - } + if (arena->spare != NULL) + chunk = arena_chunk_init_spare(arena); + else + chunk = arena_chunk_init_hard(arena); /* Insert the run into the runs_avail tree. */ arena_avail_insert(arena, chunk, map_bias, chunk_npages-map_bias, @@ -674,8 +657,7 @@ arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk) } static arena_run_t * -arena_run_alloc_helper(arena_t *arena, size_t size, bool large, size_t binind, - bool zero) +arena_run_alloc_large_helper(arena_t *arena, size_t size, bool zero) { arena_run_t *run; arena_chunk_map_t *mapelm, key; @@ -690,7 +672,7 @@ arena_run_alloc_helper(arena_t *arena, size_t size, bool large, size_t binind, run = (arena_run_t *)((uintptr_t)run_chunk + (pageind << LG_PAGE)); - arena_run_split(arena, run, size, large, binind, zero); + arena_run_split_large(arena, run, size, zero); return (run); } @@ -698,19 +680,16 @@ arena_run_alloc_helper(arena_t *arena, size_t size, bool large, size_t binind, } static arena_run_t * -arena_run_alloc(arena_t *arena, size_t size, bool large, size_t binind, - bool zero) +arena_run_alloc_large(arena_t *arena, size_t size, bool zero) { arena_chunk_t *chunk; arena_run_t *run; assert(size <= arena_maxclass); assert((size & PAGE_MASK) == 0); - assert((large && binind == BININD_INVALID) || (large == false && binind - != BININD_INVALID)); /* Search the arena's chunks for the lowest best fit. */ - run = arena_run_alloc_helper(arena, size, large, binind, zero); + run = arena_run_alloc_large_helper(arena, size, zero); if (run != NULL) return (run); @@ -720,7 +699,7 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, size_t binind, chunk = arena_chunk_alloc(arena); if (chunk != NULL) { run = (arena_run_t *)((uintptr_t)chunk + (map_bias << LG_PAGE)); - arena_run_split(arena, run, size, large, binind, zero); + arena_run_split_large(arena, run, size, zero); return (run); } @@ -729,7 +708,63 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, size_t binind, * sufficient memory available while this one dropped arena->lock in * arena_chunk_alloc(), so search one more time. */ - return (arena_run_alloc_helper(arena, size, large, binind, zero)); + return (arena_run_alloc_large_helper(arena, size, zero)); +} + +static arena_run_t * +arena_run_alloc_small_helper(arena_t *arena, size_t size, size_t binind) +{ + arena_run_t *run; + arena_chunk_map_t *mapelm, key; + + key.bits = size | CHUNK_MAP_KEY; + mapelm = arena_avail_tree_nsearch(&arena->runs_avail, &key); + if (mapelm != NULL) { + arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm); + size_t pageind = (((uintptr_t)mapelm - + (uintptr_t)run_chunk->map) / sizeof(arena_chunk_map_t)) + + map_bias; + + run = (arena_run_t *)((uintptr_t)run_chunk + (pageind << + LG_PAGE)); + arena_run_split_small(arena, run, size, binind); + return (run); + } + + return (NULL); +} + +static arena_run_t * +arena_run_alloc_small(arena_t *arena, size_t size, size_t binind) +{ + arena_chunk_t *chunk; + arena_run_t *run; + + assert(size <= arena_maxclass); + assert((size & PAGE_MASK) == 0); + assert(binind != BININD_INVALID); + + /* Search the arena's chunks for the lowest best fit. */ + run = arena_run_alloc_small_helper(arena, size, binind); + if (run != NULL) + return (run); + + /* + * No usable runs. Create a new chunk from which to allocate the run. + */ + chunk = arena_chunk_alloc(arena); + if (chunk != NULL) { + run = (arena_run_t *)((uintptr_t)chunk + (map_bias << LG_PAGE)); + arena_run_split_small(arena, run, size, binind); + return (run); + } + + /* + * arena_chunk_alloc() failed, but another thread may have made + * sufficient memory available while this one dropped arena->lock in + * arena_chunk_alloc(), so search one more time. + */ + return (arena_run_alloc_small_helper(arena, size, binind)); } static inline void @@ -755,14 +790,169 @@ arena_maybe_purge(arena_t *arena) arena_purge(arena, false); } +static arena_chunk_t * +chunks_dirty_iter_cb(arena_chunk_tree_t *tree, arena_chunk_t *chunk, void *arg) +{ + size_t *ndirty = (size_t *)arg; + + assert(chunk->ndirty != 0); + *ndirty += chunk->ndirty; + return (NULL); +} + +static size_t +arena_compute_npurgatory(arena_t *arena, bool all) +{ + size_t npurgatory, npurgeable; + + /* + * Compute the minimum number of pages that this thread should try to + * purge. + */ + npurgeable = arena->ndirty - arena->npurgatory; + + if (all == false) { + size_t threshold = (arena->nactive >> opt_lg_dirty_mult); + + npurgatory = npurgeable - threshold; + } else + npurgatory = npurgeable; + + return (npurgatory); +} + +static void +arena_chunk_stash_dirty(arena_t *arena, arena_chunk_t *chunk, bool all, + arena_chunk_mapelms_t *mapelms) +{ + size_t pageind, npages; + + /* + * Temporarily allocate free dirty runs within chunk. If all is false, + * only operate on dirty runs that are fragments; otherwise operate on + * all dirty runs. + */ + for (pageind = map_bias; pageind < chunk_npages; pageind += npages) { + arena_chunk_map_t *mapelm = arena_mapp_get(chunk, pageind); + if (arena_mapbits_allocated_get(chunk, pageind) == 0) { + size_t run_size = + arena_mapbits_unallocated_size_get(chunk, pageind); + + npages = run_size >> LG_PAGE; + assert(pageind + npages <= chunk_npages); + assert(arena_mapbits_dirty_get(chunk, pageind) == + arena_mapbits_dirty_get(chunk, pageind+npages-1)); + + if (arena_mapbits_dirty_get(chunk, pageind) != 0 && + (all || arena_avail_adjac(chunk, pageind, + npages))) { + arena_run_t *run = (arena_run_t *)((uintptr_t) + chunk + (uintptr_t)(pageind << LG_PAGE)); + + arena_run_split_large(arena, run, run_size, + false); + /* Append to list for later processing. */ + ql_elm_new(mapelm, u.ql_link); + ql_tail_insert(mapelms, mapelm, u.ql_link); + } + } else { + /* Skip run. */ + if (arena_mapbits_large_get(chunk, pageind) != 0) { + npages = arena_mapbits_large_size_get(chunk, + pageind) >> LG_PAGE; + } else { + size_t binind; + arena_bin_info_t *bin_info; + arena_run_t *run = (arena_run_t *)((uintptr_t) + chunk + (uintptr_t)(pageind << LG_PAGE)); + + assert(arena_mapbits_small_runind_get(chunk, + pageind) == 0); + binind = arena_bin_index(arena, run->bin); + bin_info = &arena_bin_info[binind]; + npages = bin_info->run_size >> LG_PAGE; + } + } + } + assert(pageind == chunk_npages); + assert(chunk->ndirty == 0 || all == false); + assert(chunk->nruns_adjac == 0); +} + +static size_t +arena_chunk_purge_stashed(arena_t *arena, arena_chunk_t *chunk, + arena_chunk_mapelms_t *mapelms) +{ + size_t npurged, pageind, npages, nmadvise; + arena_chunk_map_t *mapelm; + + malloc_mutex_unlock(&arena->lock); + if (config_stats) + nmadvise = 0; + npurged = 0; + ql_foreach(mapelm, mapelms, u.ql_link) { + bool unzeroed; + size_t flag_unzeroed, i; + + pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) / + sizeof(arena_chunk_map_t)) + map_bias; + npages = arena_mapbits_large_size_get(chunk, pageind) >> + LG_PAGE; + assert(pageind + npages <= chunk_npages); + unzeroed = pages_purge((void *)((uintptr_t)chunk + (pageind << + LG_PAGE)), (npages << LG_PAGE)); + flag_unzeroed = unzeroed ? CHUNK_MAP_UNZEROED : 0; + /* + * Set the unzeroed flag for all pages, now that pages_purge() + * has returned whether the pages were zeroed as a side effect + * of purging. This chunk map modification is safe even though + * the arena mutex isn't currently owned by this thread, + * because the run is marked as allocated, thus protecting it + * from being modified by any other thread. As long as these + * writes don't perturb the first and last elements' + * CHUNK_MAP_ALLOCATED bits, behavior is well defined. + */ + for (i = 0; i < npages; i++) { + arena_mapbits_unzeroed_set(chunk, pageind+i, + flag_unzeroed); + } + npurged += npages; + if (config_stats) + nmadvise++; + } + malloc_mutex_lock(&arena->lock); + if (config_stats) + arena->stats.nmadvise += nmadvise; + + return (npurged); +} + +static void +arena_chunk_unstash_purged(arena_t *arena, arena_chunk_t *chunk, + arena_chunk_mapelms_t *mapelms) +{ + arena_chunk_map_t *mapelm; + size_t pageind; + + /* Deallocate runs. */ + for (mapelm = ql_first(mapelms); mapelm != NULL; + mapelm = ql_first(mapelms)) { + arena_run_t *run; + + pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) / + sizeof(arena_chunk_map_t)) + map_bias; + run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)(pageind << + LG_PAGE)); + ql_remove(mapelms, mapelm, u.ql_link); + arena_run_dalloc(arena, run, false, true); + } +} + static inline size_t arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk, bool all) { size_t npurged; - ql_head(arena_chunk_map_t) mapelms; - arena_chunk_map_t *mapelm; - size_t pageind, npages; - size_t nmadvise; + arena_chunk_mapelms_t mapelms; ql_new(&mapelms); @@ -798,121 +988,13 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk, bool all) if (chunk->nruns_adjac == 0) all = true; - /* - * Temporarily allocate free dirty runs within chunk. If all is false, - * only operate on dirty runs that are fragments; otherwise operate on - * all dirty runs. - */ - for (pageind = map_bias; pageind < chunk_npages; pageind += npages) { - mapelm = arena_mapp_get(chunk, pageind); - if (arena_mapbits_allocated_get(chunk, pageind) == 0) { - size_t run_size = - arena_mapbits_unallocated_size_get(chunk, pageind); - - npages = run_size >> LG_PAGE; - assert(pageind + npages <= chunk_npages); - assert(arena_mapbits_dirty_get(chunk, pageind) == - arena_mapbits_dirty_get(chunk, pageind+npages-1)); - - if (arena_mapbits_dirty_get(chunk, pageind) != 0 && - (all || arena_avail_adjac(chunk, pageind, - npages))) { - arena_run_t *run = (arena_run_t *)((uintptr_t) - chunk + (uintptr_t)(pageind << LG_PAGE)); - - arena_run_split(arena, run, run_size, true, - BININD_INVALID, false); - /* Append to list for later processing. */ - ql_elm_new(mapelm, u.ql_link); - ql_tail_insert(&mapelms, mapelm, u.ql_link); - } - } else { - /* Skip run. */ - if (arena_mapbits_large_get(chunk, pageind) != 0) { - npages = arena_mapbits_large_size_get(chunk, - pageind) >> LG_PAGE; - } else { - size_t binind; - arena_bin_info_t *bin_info; - arena_run_t *run = (arena_run_t *)((uintptr_t) - chunk + (uintptr_t)(pageind << LG_PAGE)); - - assert(arena_mapbits_small_runind_get(chunk, - pageind) == 0); - binind = arena_bin_index(arena, run->bin); - bin_info = &arena_bin_info[binind]; - npages = bin_info->run_size >> LG_PAGE; - } - } - } - assert(pageind == chunk_npages); - assert(chunk->ndirty == 0 || all == false); - assert(chunk->nruns_adjac == 0); - - malloc_mutex_unlock(&arena->lock); - if (config_stats) - nmadvise = 0; - npurged = 0; - ql_foreach(mapelm, &mapelms, u.ql_link) { - bool unzeroed; - size_t flag_unzeroed, i; - - pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) / - sizeof(arena_chunk_map_t)) + map_bias; - npages = arena_mapbits_large_size_get(chunk, pageind) >> - LG_PAGE; - assert(pageind + npages <= chunk_npages); - unzeroed = pages_purge((void *)((uintptr_t)chunk + (pageind << - LG_PAGE)), (npages << LG_PAGE)); - flag_unzeroed = unzeroed ? CHUNK_MAP_UNZEROED : 0; - /* - * Set the unzeroed flag for all pages, now that pages_purge() - * has returned whether the pages were zeroed as a side effect - * of purging. This chunk map modification is safe even though - * the arena mutex isn't currently owned by this thread, - * because the run is marked as allocated, thus protecting it - * from being modified by any other thread. As long as these - * writes don't perturb the first and last elements' - * CHUNK_MAP_ALLOCATED bits, behavior is well defined. - */ - for (i = 0; i < npages; i++) { - arena_mapbits_unzeroed_set(chunk, pageind+i, - flag_unzeroed); - } - npurged += npages; - if (config_stats) - nmadvise++; - } - malloc_mutex_lock(&arena->lock); - if (config_stats) - arena->stats.nmadvise += nmadvise; - - /* Deallocate runs. */ - for (mapelm = ql_first(&mapelms); mapelm != NULL; - mapelm = ql_first(&mapelms)) { - arena_run_t *run; - - pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) / - sizeof(arena_chunk_map_t)) + map_bias; - run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)(pageind << - LG_PAGE)); - ql_remove(&mapelms, mapelm, u.ql_link); - arena_run_dalloc(arena, run, false, true); - } + arena_chunk_stash_dirty(arena, chunk, all, &mapelms); + npurged = arena_chunk_purge_stashed(arena, chunk, &mapelms); + arena_chunk_unstash_purged(arena, chunk, &mapelms); return (npurged); } -static arena_chunk_t * -chunks_dirty_iter_cb(arena_chunk_tree_t *tree, arena_chunk_t *chunk, void *arg) -{ - size_t *ndirty = (size_t *)arg; - - assert(chunk->ndirty != 0); - *ndirty += chunk->ndirty; - return (NULL); -} - static void arena_purge(arena_t *arena, bool all) { @@ -933,21 +1015,11 @@ arena_purge(arena_t *arena, bool all) arena->stats.npurge++; /* - * Compute the minimum number of pages that this thread should try to - * purge, and add the result to arena->npurgatory. This will keep - * multiple threads from racing to reduce ndirty below the threshold. + * Add the minimum number of pages this thread should try to purge to + * arena->npurgatory. This will keep multiple threads from racing to + * reduce ndirty below the threshold. */ - { - size_t npurgeable = arena->ndirty - arena->npurgatory; - - if (all == false) { - size_t threshold = (arena->nactive >> - opt_lg_dirty_mult); - - npurgatory = npurgeable - threshold; - } else - npurgatory = npurgeable; - } + npurgatory = arena_compute_npurgatory(arena, all); arena->npurgatory += npurgatory; while (npurgatory > 0) { @@ -1014,61 +1086,12 @@ arena_purge_all(arena_t *arena) } static void -arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned) +arena_run_coalesce(arena_t *arena, arena_chunk_t *chunk, size_t *p_size, + size_t *p_run_ind, size_t *p_run_pages, size_t flag_dirty) { - arena_chunk_t *chunk; - size_t size, run_ind, run_pages, flag_dirty; - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); - run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE); - assert(run_ind >= map_bias); - assert(run_ind < chunk_npages); - if (arena_mapbits_large_get(chunk, run_ind) != 0) { - size = arena_mapbits_large_size_get(chunk, run_ind); - assert(size == PAGE || - arena_mapbits_large_size_get(chunk, - run_ind+(size>>LG_PAGE)-1) == 0); - } else { - size_t binind = arena_bin_index(arena, run->bin); - arena_bin_info_t *bin_info = &arena_bin_info[binind]; - size = bin_info->run_size; - } - run_pages = (size >> LG_PAGE); - if (config_stats) { - /* - * Update stats_cactive if nactive is crossing a chunk - * multiple. - */ - size_t cactive_diff = CHUNK_CEILING(arena->nactive << LG_PAGE) - - CHUNK_CEILING((arena->nactive - run_pages) << LG_PAGE); - if (cactive_diff != 0) - stats_cactive_sub(cactive_diff); - } - arena->nactive -= run_pages; - - /* - * The run is dirty if the caller claims to have dirtied it, as well as - * if it was already dirty before being allocated and the caller - * doesn't claim to have cleaned it. - */ - assert(arena_mapbits_dirty_get(chunk, run_ind) == - arena_mapbits_dirty_get(chunk, run_ind+run_pages-1)); - if (cleaned == false && arena_mapbits_dirty_get(chunk, run_ind) != 0) - dirty = true; - flag_dirty = dirty ? CHUNK_MAP_DIRTY : 0; - - /* Mark pages as unallocated in the chunk map. */ - if (dirty) { - arena_mapbits_unallocated_set(chunk, run_ind, size, - CHUNK_MAP_DIRTY); - arena_mapbits_unallocated_set(chunk, run_ind+run_pages-1, size, - CHUNK_MAP_DIRTY); - } else { - arena_mapbits_unallocated_set(chunk, run_ind, size, - arena_mapbits_unzeroed_get(chunk, run_ind)); - arena_mapbits_unallocated_set(chunk, run_ind+run_pages-1, size, - arena_mapbits_unzeroed_get(chunk, run_ind+run_pages-1)); - } + size_t size = *p_size; + size_t run_ind = *p_run_ind; + size_t run_pages = *p_run_pages; /* Try to coalesce forward. */ if (run_ind + run_pages < chunk_npages && @@ -1098,8 +1121,9 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned) } /* Try to coalesce backward. */ - if (run_ind > map_bias && arena_mapbits_allocated_get(chunk, run_ind-1) - == 0 && arena_mapbits_dirty_get(chunk, run_ind-1) == flag_dirty) { + if (run_ind > map_bias && arena_mapbits_allocated_get(chunk, + run_ind-1) == 0 && arena_mapbits_dirty_get(chunk, run_ind-1) == + flag_dirty) { size_t prun_size = arena_mapbits_unallocated_size_get(chunk, run_ind-1); size_t prun_pages = prun_size >> LG_PAGE; @@ -1124,6 +1148,62 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned) size); } + *p_size = size; + *p_run_ind = run_ind; + *p_run_pages = run_pages; +} + +static void +arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned) +{ + arena_chunk_t *chunk; + size_t size, run_ind, run_pages, flag_dirty; + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); + run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE); + assert(run_ind >= map_bias); + assert(run_ind < chunk_npages); + if (arena_mapbits_large_get(chunk, run_ind) != 0) { + size = arena_mapbits_large_size_get(chunk, run_ind); + assert(size == PAGE || + arena_mapbits_large_size_get(chunk, + run_ind+(size>>LG_PAGE)-1) == 0); + } else { + size_t binind = arena_bin_index(arena, run->bin); + arena_bin_info_t *bin_info = &arena_bin_info[binind]; + size = bin_info->run_size; + } + run_pages = (size >> LG_PAGE); + arena_cactive_update(arena, 0, run_pages); + arena->nactive -= run_pages; + + /* + * The run is dirty if the caller claims to have dirtied it, as well as + * if it was already dirty before being allocated and the caller + * doesn't claim to have cleaned it. + */ + assert(arena_mapbits_dirty_get(chunk, run_ind) == + arena_mapbits_dirty_get(chunk, run_ind+run_pages-1)); + if (cleaned == false && arena_mapbits_dirty_get(chunk, run_ind) != 0) + dirty = true; + flag_dirty = dirty ? CHUNK_MAP_DIRTY : 0; + + /* Mark pages as unallocated in the chunk map. */ + if (dirty) { + arena_mapbits_unallocated_set(chunk, run_ind, size, + CHUNK_MAP_DIRTY); + arena_mapbits_unallocated_set(chunk, run_ind+run_pages-1, size, + CHUNK_MAP_DIRTY); + } else { + arena_mapbits_unallocated_set(chunk, run_ind, size, + arena_mapbits_unzeroed_get(chunk, run_ind)); + arena_mapbits_unallocated_set(chunk, run_ind+run_pages-1, size, + arena_mapbits_unzeroed_get(chunk, run_ind+run_pages-1)); + } + + arena_run_coalesce(arena, chunk, &size, &run_ind, &run_pages, + flag_dirty); + /* Insert into runs_avail, now that coalescing is complete. */ assert(arena_mapbits_unallocated_size_get(chunk, run_ind) == arena_mapbits_unallocated_size_get(chunk, run_ind+run_pages-1)); @@ -1291,7 +1371,7 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin) malloc_mutex_unlock(&bin->lock); /******************************/ malloc_mutex_lock(&arena->lock); - run = arena_run_alloc(arena, bin_info->run_size, false, binind, false); + run = arena_run_alloc_small(arena, bin_info->run_size, binind); if (run != NULL) { bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run + (uintptr_t)bin_info->bitmap_offset); @@ -1314,7 +1394,7 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin) } /* - * arena_run_alloc() failed, but another thread may have made + * arena_run_alloc_small() failed, but another thread may have made * sufficient memory available while this one dropped bin->lock above, * so search one more time. */ @@ -1349,12 +1429,12 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin) arena_chunk_t *chunk; /* - * arena_run_alloc() may have allocated run, or it may - * have pulled run from the bin's run tree. Therefore - * it is unsafe to make any assumptions about how run - * has previously been used, and arena_bin_lower_run() - * must be called, as if a region were just deallocated - * from the run. + * arena_run_alloc_small() may have allocated run, or + * it may have pulled run from the bin's run tree. + * Therefore it is unsafe to make any assumptions about + * how run has previously been used, and + * arena_bin_lower_run() must be called, as if a region + * were just deallocated from the run. */ chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); if (run->nfree == bin_info->nregs) @@ -1580,7 +1660,7 @@ arena_malloc_large(arena_t *arena, size_t size, bool zero) /* Large allocation. */ size = PAGE_CEILING(size); malloc_mutex_lock(&arena->lock); - ret = (void *)arena_run_alloc(arena, size, true, BININD_INVALID, zero); + ret = (void *)arena_run_alloc_large(arena, size, zero); if (ret == NULL) { malloc_mutex_unlock(&arena->lock); return (NULL); @@ -1626,7 +1706,7 @@ arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero) alloc_size = size + alignment - PAGE; malloc_mutex_lock(&arena->lock); - run = arena_run_alloc(arena, alloc_size, true, BININD_INVALID, false); + run = arena_run_alloc_large(arena, alloc_size, false); if (run == NULL) { malloc_mutex_unlock(&arena->lock); return (NULL); @@ -1646,8 +1726,7 @@ arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero) arena_run_trim_tail(arena, chunk, ret, size + trailsize, size, false); } - arena_run_init(arena, (arena_run_t *)ret, size, true, BININD_INVALID, - zero); + arena_run_init_large(arena, (arena_run_t *)ret, size, zero); if (config_stats) { arena->stats.nmalloc_large++; @@ -1953,9 +2032,8 @@ arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr, size_t flag_dirty; size_t splitsize = (oldsize + followsize <= size + extra) ? followsize : size + extra - oldsize; - arena_run_split(arena, (arena_run_t *)((uintptr_t)chunk + - ((pageind+npages) << LG_PAGE)), splitsize, true, - BININD_INVALID, zero); + arena_run_split_large(arena, (arena_run_t *)((uintptr_t)chunk + + ((pageind+npages) << LG_PAGE)), splitsize, zero); size = oldsize + splitsize; npages = size >> LG_PAGE; From fb1775e47e91e2e5cef8d4adcb19473309a75f28 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 14 Jan 2014 17:04:34 -0800 Subject: [PATCH 0374/3378] Refactor prof_lookup() by extracting prof_lookup_global(). --- src/prof.c | 161 ++++++++++++++++++++++++++--------------------------- 1 file changed, 79 insertions(+), 82 deletions(-) diff --git a/src/prof.c b/src/prof.c index 40718968..0b0555ad 100644 --- a/src/prof.c +++ b/src/prof.c @@ -65,33 +65,13 @@ static int prof_dump_fd; static bool prof_booted = false; /******************************************************************************/ -/* Function prototypes for non-inline static functions. */ +/* + * Function prototypes for static functions that are referenced prior to + * definition. + */ -static prof_bt_t *bt_dup(prof_bt_t *bt); -static void bt_destroy(prof_bt_t *bt); -#ifdef JEMALLOC_PROF_LIBGCC -static _Unwind_Reason_Code prof_unwind_init_callback( - struct _Unwind_Context *context, void *arg); -static _Unwind_Reason_Code prof_unwind_callback( - struct _Unwind_Context *context, void *arg); -#endif -static bool prof_flush(bool propagate_err); -static bool prof_write(bool propagate_err, const char *s); -static bool prof_printf(bool propagate_err, const char *format, ...) - JEMALLOC_ATTR(format(printf, 2, 3)); -static void prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all, - size_t *leak_nctx); static void prof_ctx_destroy(prof_ctx_t *ctx); static void prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt); -static bool prof_dump_ctx(bool propagate_err, prof_ctx_t *ctx, - prof_bt_t *bt); -static bool prof_dump_maps(bool propagate_err); -static bool prof_dump(bool propagate_err, const char *filename, - bool leakcheck); -static void prof_dump_filename(char *filename, char v, int64_t vseq); -static void prof_fdump(void); -static void prof_bt_hash(const void *key, size_t r_hash[2]); -static bool prof_bt_keycomp(const void *k1, const void *k2); static malloc_mutex_t *prof_ctx_mutex_choose(void); /******************************************************************************/ @@ -427,6 +407,69 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore) } #endif +static bool +prof_lookup_global(prof_bt_t *bt, prof_tdata_t *prof_tdata, void **p_btkey, + prof_ctx_t **p_ctx, bool *p_new_ctx) +{ + union { + prof_ctx_t *p; + void *v; + } ctx; + union { + prof_bt_t *p; + void *v; + } btkey; + bool new_ctx; + + prof_enter(prof_tdata); + if (ckh_search(&bt2ctx, bt, &btkey.v, &ctx.v)) { + /* bt has never been seen before. Insert it. */ + ctx.v = imalloc(sizeof(prof_ctx_t)); + if (ctx.v == NULL) { + prof_leave(prof_tdata); + return (true); + } + btkey.p = bt_dup(bt); + if (btkey.v == NULL) { + prof_leave(prof_tdata); + idalloc(ctx.v); + return (true); + } + ctx.p->bt = btkey.p; + ctx.p->lock = prof_ctx_mutex_choose(); + /* + * Set nlimbo to 1, in order to avoid a race condition with + * prof_ctx_merge()/prof_ctx_destroy(). + */ + ctx.p->nlimbo = 1; + memset(&ctx.p->cnt_merged, 0, sizeof(prof_cnt_t)); + ql_new(&ctx.p->cnts_ql); + if (ckh_insert(&bt2ctx, btkey.v, ctx.v)) { + /* OOM. */ + prof_leave(prof_tdata); + idalloc(btkey.v); + idalloc(ctx.v); + return (true); + } + new_ctx = true; + } else { + /* + * Increment nlimbo, in order to avoid a race condition with + * prof_ctx_merge()/prof_ctx_destroy(). + */ + malloc_mutex_lock(ctx.p->lock); + ctx.p->nlimbo++; + malloc_mutex_unlock(ctx.p->lock); + new_ctx = false; + } + prof_leave(prof_tdata); + + *p_btkey = btkey.v; + *p_ctx = ctx.p; + *p_new_ctx = new_ctx; + return (false); +} + prof_thr_cnt_t * prof_lookup(prof_bt_t *bt) { @@ -443,62 +486,16 @@ prof_lookup(prof_bt_t *bt) return (NULL); if (ckh_search(&prof_tdata->bt2cnt, bt, NULL, &ret.v)) { - union { - prof_bt_t *p; - void *v; - } btkey; - union { - prof_ctx_t *p; - void *v; - } ctx; + void *btkey; + prof_ctx_t *ctx; bool new_ctx; /* * This thread's cache lacks bt. Look for it in the global * cache. */ - prof_enter(prof_tdata); - if (ckh_search(&bt2ctx, bt, &btkey.v, &ctx.v)) { - /* bt has never been seen before. Insert it. */ - ctx.v = imalloc(sizeof(prof_ctx_t)); - if (ctx.v == NULL) { - prof_leave(prof_tdata); - return (NULL); - } - btkey.p = bt_dup(bt); - if (btkey.v == NULL) { - prof_leave(prof_tdata); - idalloc(ctx.v); - return (NULL); - } - ctx.p->bt = btkey.p; - ctx.p->lock = prof_ctx_mutex_choose(); - /* - * Set nlimbo to 1, in order to avoid a race condition - * with prof_ctx_merge()/prof_ctx_destroy(). - */ - ctx.p->nlimbo = 1; - memset(&ctx.p->cnt_merged, 0, sizeof(prof_cnt_t)); - ql_new(&ctx.p->cnts_ql); - if (ckh_insert(&bt2ctx, btkey.v, ctx.v)) { - /* OOM. */ - prof_leave(prof_tdata); - idalloc(btkey.v); - idalloc(ctx.v); - return (NULL); - } - new_ctx = true; - } else { - /* - * Increment nlimbo, in order to avoid a race condition - * with prof_ctx_merge()/prof_ctx_destroy(). - */ - malloc_mutex_lock(ctx.p->lock); - ctx.p->nlimbo++; - malloc_mutex_unlock(ctx.p->lock); - new_ctx = false; - } - prof_leave(prof_tdata); + if (prof_lookup_global(bt, prof_tdata, &btkey, &ctx, &new_ctx)) + return (NULL); /* Link a prof_thd_cnt_t into ctx for this thread. */ if (ckh_count(&prof_tdata->bt2cnt) == PROF_TCMAX) { @@ -521,27 +518,27 @@ prof_lookup(prof_bt_t *bt) ret.v = imalloc(sizeof(prof_thr_cnt_t)); if (ret.p == NULL) { if (new_ctx) - prof_ctx_destroy(ctx.p); + prof_ctx_destroy(ctx); return (NULL); } ql_elm_new(ret.p, cnts_link); ql_elm_new(ret.p, lru_link); } /* Finish initializing ret. */ - ret.p->ctx = ctx.p; + ret.p->ctx = ctx; ret.p->epoch = 0; memset(&ret.p->cnts, 0, sizeof(prof_cnt_t)); - if (ckh_insert(&prof_tdata->bt2cnt, btkey.v, ret.v)) { + if (ckh_insert(&prof_tdata->bt2cnt, btkey, ret.v)) { if (new_ctx) - prof_ctx_destroy(ctx.p); + prof_ctx_destroy(ctx); idalloc(ret.v); return (NULL); } ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link); - malloc_mutex_lock(ctx.p->lock); - ql_tail_insert(&ctx.p->cnts_ql, ret.p, cnts_link); - ctx.p->nlimbo--; - malloc_mutex_unlock(ctx.p->lock); + malloc_mutex_lock(ctx->lock); + ql_tail_insert(&ctx->cnts_ql, ret.p, cnts_link); + ctx->nlimbo--; + malloc_mutex_unlock(ctx->lock); } else { /* Move ret to the front of the LRU. */ ql_remove(&prof_tdata->lru_ql, ret.p, lru_link); From 35f1bc4e4bd14e07257f6eddbbbcf0c01b52ba5c Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 14 Jan 2014 17:49:37 -0800 Subject: [PATCH 0375/3378] Fix warnings and a test failure exposed on CentOS 6.3. --- test/unit/stats.c | 7 +++++-- test/unit/util.c | 2 -- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/test/unit/stats.c b/test/unit/stats.c index 3fa08172..6ebe369f 100644 --- a/test/unit/stats.c +++ b/test/unit/stats.c @@ -148,6 +148,9 @@ TEST_BEGIN(test_stats_arenas_small) p = mallocx(SMALL_MAXCLASS, 0); assert_ptr_not_null(p, "Unexpected mallocx() failure"); + assert_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0), + config_tcache ? 0 : ENOENT, "Unexpected mallctl() result"); + assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)), 0, "Unexpected mallctl() failure"); @@ -164,13 +167,13 @@ TEST_BEGIN(test_stats_arenas_small) if (config_stats) { assert_zu_gt(allocated, 0, - "allocated should be greated than zero"); + "allocated should be greater than zero"); assert_u64_gt(nmalloc, 0, "nmalloc should be no greater than zero"); assert_u64_ge(nmalloc, ndalloc, "nmalloc should be at least as large as ndalloc"); assert_u64_gt(nrequests, 0, - "nrequests should be no greater than zero"); + "nrequests should be greater than zero"); } dallocx(p, 0); diff --git a/test/unit/util.c b/test/unit/util.c index 1f2f5759..4b4d19da 100644 --- a/test/unit/util.c +++ b/test/unit/util.c @@ -151,7 +151,6 @@ TEST_BEGIN(test_malloc_snprintf_truncated) } while (0) for (len = 1; len < BUFLEN; len++) { - TEST("", ""); TEST("012346789", "012346789"); TEST("a0123b", "a%sb", "0123"); TEST("a01234567", "a%s%s", "0123", "4567"); @@ -180,7 +179,6 @@ TEST_BEGIN(test_malloc_snprintf) assert_d_eq(result, strlen(expected_str), "Unexpected result"); \ } while (0) - TEST("", ""); TEST("hello", "hello"); TEST("a0123b", "a%sb", "0123"); From 4f37ef693e3d5903ce07dc0b61c0da320b35e3d9 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 16 Jan 2014 13:23:56 -0800 Subject: [PATCH 0376/3378] Refactor prof_dump() to reduce contention. Refactor prof_dump() to use a two pass algorithm, and prof_leave() prior to the second pass. This avoids write(2) system calls while holding critical prof resources. Fix prof_dump() to close the dump file descriptor for all relevant error paths. Minimize the size of prof-related static buffers when prof is disabled. This saves roughly 65 KiB of application memory for non-prof builds. Refactor prof_ctx_init() out of prof_lookup_global(). --- include/jemalloc/internal/prof.h | 5 + src/prof.c | 449 +++++++++++++++++++------------ 2 files changed, 280 insertions(+), 174 deletions(-) diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index 8b240999..4a8073fc 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -129,6 +129,7 @@ struct prof_ctx_s { * limbo due to one of: * - Initializing per thread counters associated with this ctx. * - Preparing to destroy this ctx. + * - Dumping a heap profile that includes this ctx. * nlimbo must be 1 (single destroyer) in order to safely destroy the * ctx. */ @@ -145,7 +146,11 @@ struct prof_ctx_s { * this context. */ ql_head(prof_thr_cnt_t) cnts_ql; + + /* Linkage for list of contexts to be dumped. */ + ql_elm(prof_ctx_t) dump_link; }; +typedef ql_head(prof_ctx_t) prof_ctx_list_t; struct prof_tdata_s { /* diff --git a/src/prof.c b/src/prof.c index 0b0555ad..45cb67fa 100644 --- a/src/prof.c +++ b/src/prof.c @@ -24,7 +24,13 @@ bool opt_prof_gdump = false; bool opt_prof_final = true; bool opt_prof_leak = false; bool opt_prof_accum = false; -char opt_prof_prefix[PATH_MAX + 1]; +char opt_prof_prefix[ + /* Minimize memory bloat for non-prof builds. */ +#ifdef JEMALLOC_PROF + PATH_MAX + +#endif + 1 +]; uint64_t prof_interval = 0; bool prof_promote; @@ -54,26 +60,23 @@ static uint64_t prof_dump_useq; /* * This buffer is rather large for stack allocation, so use a single buffer for - * all profile dumps. The buffer is implicitly protected by bt2ctx_mtx, since - * it must be locked anyway during dumping. + * all profile dumps. */ -static char prof_dump_buf[PROF_DUMP_BUFSIZE]; +static malloc_mutex_t prof_dump_mtx; +static char prof_dump_buf[ + /* Minimize memory bloat for non-prof builds. */ +#ifdef JEMALLOC_PROF + PROF_DUMP_BUFSIZE +#else + 1 +#endif +]; static unsigned prof_dump_buf_end; static int prof_dump_fd; /* Do not dump any profiles until bootstrapping is complete. */ static bool prof_booted = false; -/******************************************************************************/ -/* - * Function prototypes for static functions that are referenced prior to - * definition. - */ - -static void prof_ctx_destroy(prof_ctx_t *ctx); -static void prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt); -static malloc_mutex_t *prof_ctx_mutex_choose(void); - /******************************************************************************/ void @@ -407,6 +410,110 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore) } #endif +static malloc_mutex_t * +prof_ctx_mutex_choose(void) +{ + unsigned nctxs = atomic_add_u(&cum_ctxs, 1); + + return (&ctx_locks[(nctxs - 1) % PROF_NCTX_LOCKS]); +} + +static void +prof_ctx_init(prof_ctx_t *ctx, prof_bt_t *bt) +{ + + ctx->bt = bt; + ctx->lock = prof_ctx_mutex_choose(); + /* + * Set nlimbo to 1, in order to avoid a race condition with + * prof_ctx_merge()/prof_ctx_destroy(). + */ + ctx->nlimbo = 1; + ql_elm_new(ctx, dump_link); + memset(&ctx->cnt_merged, 0, sizeof(prof_cnt_t)); + ql_new(&ctx->cnts_ql); +} + +static void +prof_ctx_destroy(prof_ctx_t *ctx) +{ + prof_tdata_t *prof_tdata; + + cassert(config_prof); + + /* + * Check that ctx is still unused by any thread cache before destroying + * it. prof_lookup() increments ctx->nlimbo in order to avoid a race + * condition with this function, as does prof_ctx_merge() in order to + * avoid a race between the main body of prof_ctx_merge() and entry + * into this function. + */ + prof_tdata = prof_tdata_get(false); + assert((uintptr_t)prof_tdata > (uintptr_t)PROF_TDATA_STATE_MAX); + prof_enter(prof_tdata); + malloc_mutex_lock(ctx->lock); + if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 0 && + ctx->nlimbo == 1) { + assert(ctx->cnt_merged.curbytes == 0); + assert(ctx->cnt_merged.accumobjs == 0); + assert(ctx->cnt_merged.accumbytes == 0); + /* Remove ctx from bt2ctx. */ + if (ckh_remove(&bt2ctx, ctx->bt, NULL, NULL)) + not_reached(); + prof_leave(prof_tdata); + /* Destroy ctx. */ + malloc_mutex_unlock(ctx->lock); + bt_destroy(ctx->bt); + idalloc(ctx); + } else { + /* + * Compensate for increment in prof_ctx_merge() or + * prof_lookup(). + */ + ctx->nlimbo--; + malloc_mutex_unlock(ctx->lock); + prof_leave(prof_tdata); + } +} + +static void +prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt) +{ + bool destroy; + + cassert(config_prof); + + /* Merge cnt stats and detach from ctx. */ + malloc_mutex_lock(ctx->lock); + ctx->cnt_merged.curobjs += cnt->cnts.curobjs; + ctx->cnt_merged.curbytes += cnt->cnts.curbytes; + ctx->cnt_merged.accumobjs += cnt->cnts.accumobjs; + ctx->cnt_merged.accumbytes += cnt->cnts.accumbytes; + ql_remove(&ctx->cnts_ql, cnt, cnts_link); + if (opt_prof_accum == false && ql_first(&ctx->cnts_ql) == NULL && + ctx->cnt_merged.curobjs == 0 && ctx->nlimbo == 0) { + /* + * Increment ctx->nlimbo in order to keep another thread from + * winning the race to destroy ctx while this one has ctx->lock + * dropped. Without this, it would be possible for another + * thread to: + * + * 1) Sample an allocation associated with ctx. + * 2) Deallocate the sampled object. + * 3) Successfully prof_ctx_destroy(ctx). + * + * The result would be that ctx no longer exists by the time + * this thread accesses it in prof_ctx_destroy(). + */ + ctx->nlimbo++; + destroy = true; + } else + destroy = false; + malloc_mutex_unlock(ctx->lock); + if (destroy) + prof_ctx_destroy(ctx); +} + static bool prof_lookup_global(prof_bt_t *bt, prof_tdata_t *prof_tdata, void **p_btkey, prof_ctx_t **p_ctx, bool *p_new_ctx) @@ -435,15 +542,7 @@ prof_lookup_global(prof_bt_t *bt, prof_tdata_t *prof_tdata, void **p_btkey, idalloc(ctx.v); return (true); } - ctx.p->bt = btkey.p; - ctx.p->lock = prof_ctx_mutex_choose(); - /* - * Set nlimbo to 1, in order to avoid a race condition with - * prof_ctx_merge()/prof_ctx_destroy(). - */ - ctx.p->nlimbo = 1; - memset(&ctx.p->cnt_merged, 0, sizeof(prof_cnt_t)); - ql_new(&ctx.p->cnts_ql); + prof_ctx_init(ctx.p, btkey.p); if (ckh_insert(&bt2ctx, btkey.v, ctx.v)) { /* OOM. */ prof_leave(prof_tdata); @@ -549,7 +648,26 @@ prof_lookup(prof_bt_t *bt) } static bool -prof_flush(bool propagate_err) +prof_dump_open(bool propagate_err, const char *filename) +{ + + prof_dump_fd = creat(filename, 0644); + if (prof_dump_fd == -1) { + if (propagate_err == false) { + malloc_printf( + ": creat(\"%s\"), 0644) failed\n", + filename); + if (opt_abort) + abort(); + } + return (true); + } + + return (false); +} + +static bool +prof_dump_flush(bool propagate_err) { bool ret = false; ssize_t err; @@ -572,7 +690,20 @@ prof_flush(bool propagate_err) } static bool -prof_write(bool propagate_err, const char *s) +prof_dump_close(bool propagate_err) +{ + bool ret; + + assert(prof_dump_fd != -1); + ret = prof_dump_flush(propagate_err); + close(prof_dump_fd); + prof_dump_fd = -1; + + return (ret); +} + +static bool +prof_dump_write(bool propagate_err, const char *s) { unsigned i, slen, n; @@ -583,7 +714,7 @@ prof_write(bool propagate_err, const char *s) while (i < slen) { /* Flush the buffer if it is full. */ if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) - if (prof_flush(propagate_err) && propagate_err) + if (prof_dump_flush(propagate_err) && propagate_err) return (true); if (prof_dump_buf_end + slen <= PROF_DUMP_BUFSIZE) { @@ -603,7 +734,7 @@ prof_write(bool propagate_err, const char *s) JEMALLOC_ATTR(format(printf, 2, 3)) static bool -prof_printf(bool propagate_err, const char *format, ...) +prof_dump_printf(bool propagate_err, const char *format, ...) { bool ret; va_list ap; @@ -612,13 +743,14 @@ prof_printf(bool propagate_err, const char *format, ...) va_start(ap, format); malloc_vsnprintf(buf, sizeof(buf), format, ap); va_end(ap); - ret = prof_write(propagate_err, buf); + ret = prof_dump_write(propagate_err, buf); return (ret); } static void -prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx) +prof_dump_ctx_prep(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx, + prof_ctx_list_t *ctx_ql) { prof_thr_cnt_t *thr_cnt; prof_cnt_t tcnt; @@ -627,6 +759,14 @@ prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx) malloc_mutex_lock(ctx->lock); + /* + * Increment nlimbo so that ctx won't go away before dump. + * Additionally, link ctx into the dump list so that it is included in + * prof_dump()'s second pass. + */ + ctx->nlimbo++; + ql_tail_insert(ctx_ql, ctx, dump_link); + memcpy(&ctx->cnt_summed, &ctx->cnt_merged, sizeof(prof_cnt_t)); ql_foreach(thr_cnt, &ctx->cnts_ql, cnts_link) { volatile unsigned *epoch = &thr_cnt->epoch; @@ -667,89 +807,52 @@ prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx) malloc_mutex_unlock(ctx->lock); } -static void -prof_ctx_destroy(prof_ctx_t *ctx) +static bool +prof_dump_header(bool propagate_err, const prof_cnt_t *cnt_all) { - prof_tdata_t *prof_tdata; - cassert(config_prof); - - /* - * Check that ctx is still unused by any thread cache before destroying - * it. prof_lookup() increments ctx->nlimbo in order to avoid a race - * condition with this function, as does prof_ctx_merge() in order to - * avoid a race between the main body of prof_ctx_merge() and entry - * into this function. - */ - prof_tdata = prof_tdata_get(false); - assert((uintptr_t)prof_tdata > (uintptr_t)PROF_TDATA_STATE_MAX); - prof_enter(prof_tdata); - malloc_mutex_lock(ctx->lock); - if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 0 && - ctx->nlimbo == 1) { - assert(ctx->cnt_merged.curbytes == 0); - assert(ctx->cnt_merged.accumobjs == 0); - assert(ctx->cnt_merged.accumbytes == 0); - /* Remove ctx from bt2ctx. */ - if (ckh_remove(&bt2ctx, ctx->bt, NULL, NULL)) - not_reached(); - prof_leave(prof_tdata); - /* Destroy ctx. */ - malloc_mutex_unlock(ctx->lock); - bt_destroy(ctx->bt); - idalloc(ctx); + if (opt_lg_prof_sample == 0) { + if (prof_dump_printf(propagate_err, + "heap profile: %"PRId64": %"PRId64 + " [%"PRIu64": %"PRIu64"] @ heapprofile\n", + cnt_all->curobjs, cnt_all->curbytes, + cnt_all->accumobjs, cnt_all->accumbytes)) + return (true); } else { - /* - * Compensate for increment in prof_ctx_merge() or - * prof_lookup(). - */ - ctx->nlimbo--; - malloc_mutex_unlock(ctx->lock); - prof_leave(prof_tdata); + if (prof_dump_printf(propagate_err, + "heap profile: %"PRId64": %"PRId64 + " [%"PRIu64": %"PRIu64"] @ heap_v2/%"PRIu64"\n", + cnt_all->curobjs, cnt_all->curbytes, + cnt_all->accumobjs, cnt_all->accumbytes, + ((uint64_t)1U << opt_lg_prof_sample))) + return (true); } + + return (false); } static void -prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt) +prof_dump_ctx_cleanup_locked(prof_ctx_t *ctx, prof_ctx_list_t *ctx_ql) { - bool destroy; - cassert(config_prof); + ctx->nlimbo--; + ql_remove(ctx_ql, ctx, dump_link); +} + +static void +prof_dump_ctx_cleanup(prof_ctx_t *ctx, prof_ctx_list_t *ctx_ql) +{ - /* Merge cnt stats and detach from ctx. */ malloc_mutex_lock(ctx->lock); - ctx->cnt_merged.curobjs += cnt->cnts.curobjs; - ctx->cnt_merged.curbytes += cnt->cnts.curbytes; - ctx->cnt_merged.accumobjs += cnt->cnts.accumobjs; - ctx->cnt_merged.accumbytes += cnt->cnts.accumbytes; - ql_remove(&ctx->cnts_ql, cnt, cnts_link); - if (opt_prof_accum == false && ql_first(&ctx->cnts_ql) == NULL && - ctx->cnt_merged.curobjs == 0 && ctx->nlimbo == 0) { - /* - * Increment ctx->nlimbo in order to keep another thread from - * winning the race to destroy ctx while this one has ctx->lock - * dropped. Without this, it would be possible for another - * thread to: - * - * 1) Sample an allocation associated with ctx. - * 2) Deallocate the sampled object. - * 3) Successfully prof_ctx_destroy(ctx). - * - * The result would be that ctx no longer exists by the time - * this thread accesses it in prof_ctx_destroy(). - */ - ctx->nlimbo++; - destroy = true; - } else - destroy = false; + prof_dump_ctx_cleanup_locked(ctx, ctx_ql); malloc_mutex_unlock(ctx->lock); - if (destroy) - prof_ctx_destroy(ctx); } static bool -prof_dump_ctx(bool propagate_err, prof_ctx_t *ctx, prof_bt_t *bt) +prof_dump_ctx(bool propagate_err, prof_ctx_t *ctx, const prof_bt_t *bt, + prof_ctx_list_t *ctx_ql) { + bool ret; unsigned i; cassert(config_prof); @@ -761,31 +864,42 @@ prof_dump_ctx(bool propagate_err, prof_ctx_t *ctx, prof_bt_t *bt) * filled in. Avoid dumping any ctx that is an artifact of either * implementation detail. */ + malloc_mutex_lock(ctx->lock); if ((opt_prof_accum == false && ctx->cnt_summed.curobjs == 0) || (opt_prof_accum && ctx->cnt_summed.accumobjs == 0)) { assert(ctx->cnt_summed.curobjs == 0); assert(ctx->cnt_summed.curbytes == 0); assert(ctx->cnt_summed.accumobjs == 0); assert(ctx->cnt_summed.accumbytes == 0); - return (false); + ret = false; + goto label_return; } - if (prof_printf(propagate_err, "%"PRId64": %"PRId64 + if (prof_dump_printf(propagate_err, "%"PRId64": %"PRId64 " [%"PRIu64": %"PRIu64"] @", ctx->cnt_summed.curobjs, ctx->cnt_summed.curbytes, - ctx->cnt_summed.accumobjs, ctx->cnt_summed.accumbytes)) - return (true); - - for (i = 0; i < bt->len; i++) { - if (prof_printf(propagate_err, " %#"PRIxPTR, - (uintptr_t)bt->vec[i])) - return (true); + ctx->cnt_summed.accumobjs, ctx->cnt_summed.accumbytes)) { + ret = true; + goto label_return; } - if (prof_write(propagate_err, "\n")) - return (true); + for (i = 0; i < bt->len; i++) { + if (prof_dump_printf(propagate_err, " %#"PRIxPTR, + (uintptr_t)bt->vec[i])) { + ret = true; + goto label_return; + } + } - return (false); + if (prof_dump_write(propagate_err, "\n")) { + ret = true; + goto label_return; + } + +label_return: + prof_dump_ctx_cleanup_locked(ctx, ctx_ql); + malloc_mutex_unlock(ctx->lock); + return (ret); } static bool @@ -803,7 +917,7 @@ prof_dump_maps(bool propagate_err) if (mfd != -1) { ssize_t nread; - if (prof_write(propagate_err, "\nMAPPED_LIBRARIES:\n") && + if (prof_dump_write(propagate_err, "\nMAPPED_LIBRARIES:\n") && propagate_err) { ret = true; goto label_return; @@ -813,7 +927,7 @@ prof_dump_maps(bool propagate_err) prof_dump_buf_end += nread; if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) { /* Make space in prof_dump_buf before read(). */ - if (prof_flush(propagate_err) && + if (prof_dump_flush(propagate_err) && propagate_err) { ret = true; goto label_return; @@ -834,105 +948,98 @@ label_return: return (ret); } +static void +prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_nctx, + const char *filename) +{ + + if (cnt_all->curbytes != 0) { + malloc_printf(": Leak summary: %"PRId64" byte%s, %" + PRId64" object%s, %zu context%s\n", + cnt_all->curbytes, (cnt_all->curbytes != 1) ? "s" : "", + cnt_all->curobjs, (cnt_all->curobjs != 1) ? "s" : "", + leak_nctx, (leak_nctx != 1) ? "s" : ""); + malloc_printf( + ": Run pprof on \"%s\" for leak detail\n", + filename); + } +} + static bool prof_dump(bool propagate_err, const char *filename, bool leakcheck) { prof_tdata_t *prof_tdata; prof_cnt_t cnt_all; size_t tabind; - union { - prof_bt_t *p; - void *v; - } bt; union { prof_ctx_t *p; void *v; } ctx; size_t leak_nctx; + prof_ctx_list_t ctx_ql; cassert(config_prof); prof_tdata = prof_tdata_get(false); if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) return (true); - prof_enter(prof_tdata); - prof_dump_fd = creat(filename, 0644); - if (prof_dump_fd == -1) { - if (propagate_err == false) { - malloc_printf( - ": creat(\"%s\"), 0644) failed\n", - filename); - if (opt_abort) - abort(); - } - goto label_error; - } + + malloc_mutex_lock(&prof_dump_mtx); /* Merge per thread profile stats, and sum them in cnt_all. */ memset(&cnt_all, 0, sizeof(prof_cnt_t)); leak_nctx = 0; + ql_new(&ctx_ql); + prof_enter(prof_tdata); for (tabind = 0; ckh_iter(&bt2ctx, &tabind, NULL, &ctx.v) == false;) - prof_ctx_sum(ctx.p, &cnt_all, &leak_nctx); + prof_dump_ctx_prep(ctx.p, &cnt_all, &leak_nctx, &ctx_ql); + prof_leave(prof_tdata); + + /* Create dump file. */ + if (prof_dump_open(propagate_err, filename)) + goto label_open_close_error; /* Dump profile header. */ - if (opt_lg_prof_sample == 0) { - if (prof_printf(propagate_err, - "heap profile: %"PRId64": %"PRId64 - " [%"PRIu64": %"PRIu64"] @ heapprofile\n", - cnt_all.curobjs, cnt_all.curbytes, - cnt_all.accumobjs, cnt_all.accumbytes)) - goto label_error; - } else { - if (prof_printf(propagate_err, - "heap profile: %"PRId64": %"PRId64 - " [%"PRIu64": %"PRIu64"] @ heap_v2/%"PRIu64"\n", - cnt_all.curobjs, cnt_all.curbytes, - cnt_all.accumobjs, cnt_all.accumbytes, - ((uint64_t)1U << opt_lg_prof_sample))) - goto label_error; - } + if (prof_dump_header(propagate_err, &cnt_all)) + goto label_write_error; /* Dump per ctx profile stats. */ - for (tabind = 0; ckh_iter(&bt2ctx, &tabind, &bt.v, &ctx.v) - == false;) { - if (prof_dump_ctx(propagate_err, ctx.p, bt.p)) - goto label_error; + while ((ctx.p = ql_first(&ctx_ql)) != NULL) { + if (prof_dump_ctx(propagate_err, ctx.p, ctx.p->bt, &ctx_ql)) + goto label_write_error; } /* Dump /proc//maps if possible. */ if (prof_dump_maps(propagate_err)) - goto label_error; + goto label_write_error; - if (prof_flush(propagate_err)) - goto label_error; - close(prof_dump_fd); - prof_leave(prof_tdata); + if (prof_dump_close(propagate_err)) + goto label_open_close_error; - if (leakcheck && cnt_all.curbytes != 0) { - malloc_printf(": Leak summary: %"PRId64" byte%s, %" - PRId64" object%s, %zu context%s\n", - cnt_all.curbytes, (cnt_all.curbytes != 1) ? "s" : "", - cnt_all.curobjs, (cnt_all.curobjs != 1) ? "s" : "", - leak_nctx, (leak_nctx != 1) ? "s" : ""); - malloc_printf( - ": Run pprof on \"%s\" for leak detail\n", - filename); - } + malloc_mutex_unlock(&prof_dump_mtx); + + if (leakcheck) + prof_leakcheck(&cnt_all, leak_nctx, filename); return (false); -label_error: - prof_leave(prof_tdata); +label_write_error: + prof_dump_close(propagate_err); +label_open_close_error: + while ((ctx.p = ql_first(&ctx_ql)) != NULL) + prof_dump_ctx_cleanup(ctx.p, &ctx_ql); + malloc_mutex_unlock(&prof_dump_mtx); return (true); } #define DUMP_FILENAME_BUFSIZE (PATH_MAX + 1) +#define VSEQ_INVALID UINT64_C(0xffffffffffffffff) static void prof_dump_filename(char *filename, char v, int64_t vseq) { cassert(config_prof); - if (vseq != UINT64_C(0xffffffffffffffff)) { + if (vseq != VSEQ_INVALID) { /* "...v.heap" */ malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE, "%s.%d.%"PRIu64".%c%"PRId64".heap", @@ -958,7 +1065,7 @@ prof_fdump(void) if (opt_prof_final && opt_prof_prefix[0] != '\0') { malloc_mutex_lock(&prof_dump_seq_mtx); - prof_dump_filename(filename, 'f', UINT64_C(0xffffffffffffffff)); + prof_dump_filename(filename, 'f', VSEQ_INVALID); malloc_mutex_unlock(&prof_dump_seq_mtx); prof_dump(false, filename, opt_prof_leak); } @@ -1064,14 +1171,6 @@ prof_bt_keycomp(const void *k1, const void *k2) return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0); } -static malloc_mutex_t * -prof_ctx_mutex_choose(void) -{ - unsigned nctxs = atomic_add_u(&cum_ctxs, 1); - - return (&ctx_locks[(nctxs - 1) % PROF_NCTX_LOCKS]); -} - prof_tdata_t * prof_tdata_init(void) { @@ -1216,6 +1315,8 @@ prof_boot2(void) if (malloc_mutex_init(&prof_dump_seq_mtx)) return (true); + if (malloc_mutex_init(&prof_dump_mtx)) + return (true); if (atexit(prof_fdump) != 0) { malloc_write(": Error in atexit()\n"); From f234dc51b9740242d8ba69307db7c5a1312f5a03 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 16 Jan 2014 17:38:01 -0800 Subject: [PATCH 0377/3378] Fix name mangling for stress tests. Fix stress tests such that testlib code uses the jet_ allocator, but test code uses libjemalloc. Generate jemalloc_{rename,mangle}.h, the former because it's needed for the stress test name mangling fix, and the latter for consistency. As an artifact of this change, some (but not all) definitions related to the experimental API are absent from the headers unless the feature is enabled at configure time. --- .gitignore | 5 +- Makefile.in | 2 +- configure.ac | 100 ++++++++++++++---- .../jemalloc/internal/jemalloc_internal.h.in | 1 + include/jemalloc/internal/public_namespace.sh | 5 +- include/jemalloc/internal/public_symbols.txt | 27 ----- .../jemalloc/internal/public_unnamespace.sh | 5 +- include/jemalloc/jemalloc.sh | 4 +- include/jemalloc/jemalloc_defs.h.in | 34 ------ include/jemalloc/jemalloc_mangle.h.in | 78 -------------- include/jemalloc/jemalloc_mangle.sh | 45 ++++++++ include/jemalloc/jemalloc_rename.sh | 22 ++++ test/include/test/jemalloc_test.h.in | 10 +- 13 files changed, 164 insertions(+), 174 deletions(-) delete mode 100644 include/jemalloc/internal/public_symbols.txt delete mode 100644 include/jemalloc/jemalloc_mangle.h.in create mode 100755 include/jemalloc/jemalloc_mangle.sh create mode 100755 include/jemalloc/jemalloc_rename.sh diff --git a/.gitignore b/.gitignore index 1d593e76..4c408ec2 100644 --- a/.gitignore +++ b/.gitignore @@ -24,14 +24,17 @@ /include/jemalloc/internal/private_namespace.h /include/jemalloc/internal/private_unnamespace.h /include/jemalloc/internal/public_namespace.h +/include/jemalloc/internal/public_symbols.txt /include/jemalloc/internal/public_unnamespace.h /include/jemalloc/internal/size_classes.h /include/jemalloc/jemalloc.h /include/jemalloc/jemalloc_defs.h /include/jemalloc/jemalloc_macros.h +/include/jemalloc/jemalloc_mangle.h +/include/jemalloc/jemalloc_mangle_jet.h /include/jemalloc/jemalloc_protos.h /include/jemalloc/jemalloc_protos_jet.h -/include/jemalloc/jemalloc_mangle.h +/include/jemalloc/jemalloc_rename.h /src/*.[od] /src/*.gcda diff --git a/Makefile.in b/Makefile.in index 89d82555..38ffbf4c 100644 --- a/Makefile.in +++ b/Makefile.in @@ -191,7 +191,7 @@ $(C_TESTLIB_INTEGRATION_OBJS): $(objroot)test/src/%.integration.$(O): $(srcroot) $(C_TESTLIB_INTEGRATION_OBJS): CPPFLAGS += -DJEMALLOC_INTEGRATION_TEST $(C_UTIL_INTEGRATION_OBJS): $(objroot)src/%.integration.$(O): $(srcroot)src/%.c $(C_TESTLIB_STRESS_OBJS): $(objroot)test/src/%.stress.$(O): $(srcroot)test/src/%.c -$(C_TESTLIB_STRESS_OBJS): CPPFLAGS += -DJEMALLOC_STRESS_TEST +$(C_TESTLIB_STRESS_OBJS): CPPFLAGS += -DJEMALLOC_STRESS_TEST -DJEMALLOC_STRESS_TESTLIB $(C_TESTLIB_OBJS): CPPFLAGS += -I$(srcroot)test/include -I$(objroot)test/include $(TESTS_UNIT_OBJS): CPPFLAGS += -DJEMALLOC_UNIT_TEST $(TESTS_INTEGRATION_OBJS): CPPFLAGS += -DJEMALLOC_INTEGRATION_TEST diff --git a/configure.ac b/configure.ac index 724bc1a2..61fd868b 100644 --- a/configure.ac +++ b/configure.ac @@ -474,14 +474,6 @@ dnl Perform no name mangling by default. AC_ARG_WITH([mangling], [AS_HELP_STRING([--with-mangling=], [Mangle symbols in ])], [mangling_map="$with_mangling"], [mangling_map=""]) -for nm in `echo ${mangling_map} |tr ',' ' '` ; do - k="`echo ${nm} |tr ':' ' ' |awk '{print $1}'`" - n="je_${k}" - m=`echo ${nm} |tr ':' ' ' |awk '{print $2}'` - AC_DEFINE_UNQUOTED([${n}], [${m}]) - dnl Remove key from public_syms so that it isn't redefined later. - public_syms=`for sym in ${public_syms}; do echo "${sym}"; done |grep -v "^${k}\$" |tr '\n' ' '` -done dnl Do not prefix public APIs by default. AC_ARG_WITH([jemalloc_prefix], @@ -498,14 +490,6 @@ if test "x$JEMALLOC_PREFIX" != "x" ; then AC_DEFINE_UNQUOTED([JEMALLOC_PREFIX], ["$JEMALLOC_PREFIX"]) AC_DEFINE_UNQUOTED([JEMALLOC_CPREFIX], ["$JEMALLOC_CPREFIX"]) fi -dnl Generate macros to rename public symbols. All public symbols are prefixed -dnl with je_ in the source code, so these macro definitions are needed even if -dnl --with-jemalloc-prefix wasn't specified. -for stem in ${public_syms}; do - n="je_${stem}" - m="${JEMALLOC_PREFIX}${stem}" - AC_DEFINE_UNQUOTED([${n}], [${m}]) -done AC_ARG_WITH([export], [AS_HELP_STRING([--without-export], [disable exporting jemalloc public APIs])], @@ -544,7 +528,6 @@ cfgoutputs_in="${cfgoutputs_in} ${srcroot}doc/manpages.xsl.in" cfgoutputs_in="${cfgoutputs_in} ${srcroot}doc/jemalloc.xml.in" cfgoutputs_in="${cfgoutputs_in} ${srcroot}include/jemalloc/jemalloc_macros.h.in" cfgoutputs_in="${cfgoutputs_in} ${srcroot}include/jemalloc/jemalloc_protos.h.in" -cfgoutputs_in="${cfgoutputs_in} ${srcroot}include/jemalloc/jemalloc_mangle.h.in" cfgoutputs_in="${cfgoutputs_in} ${srcroot}include/jemalloc/internal/jemalloc_internal.h.in" cfgoutputs_in="${cfgoutputs_in} ${srcroot}test/test.sh.in" cfgoutputs_in="${cfgoutputs_in} ${srcroot}test/include/test/jemalloc_test.h.in" @@ -555,7 +538,6 @@ cfgoutputs_out="${cfgoutputs_out} doc/manpages.xsl" cfgoutputs_out="${cfgoutputs_out} doc/jemalloc.xml" cfgoutputs_out="${cfgoutputs_out} include/jemalloc/jemalloc_macros.h" cfgoutputs_out="${cfgoutputs_out} include/jemalloc/jemalloc_protos.h" -cfgoutputs_out="${cfgoutputs_out} include/jemalloc/jemalloc_mangle.h" cfgoutputs_out="${cfgoutputs_out} include/jemalloc/internal/jemalloc_internal.h" cfgoutputs_out="${cfgoutputs_out} test/test.sh" cfgoutputs_out="${cfgoutputs_out} test/include/test/jemalloc_test.h" @@ -566,7 +548,6 @@ cfgoutputs_tup="${cfgoutputs_tup} doc/manpages.xsl:doc/manpages.xsl.in" cfgoutputs_tup="${cfgoutputs_tup} doc/jemalloc.xml:doc/jemalloc.xml.in" cfgoutputs_tup="${cfgoutputs_tup} include/jemalloc/jemalloc_macros.h:include/jemalloc/jemalloc_macros.h.in" cfgoutputs_tup="${cfgoutputs_tup} include/jemalloc/jemalloc_protos.h:include/jemalloc/jemalloc_protos.h.in" -cfgoutputs_tup="${cfgoutputs_tup} include/jemalloc/jemalloc_mangle.h:include/jemalloc/jemalloc_mangle.h.in" cfgoutputs_tup="${cfgoutputs_tup} include/jemalloc/internal/jemalloc_internal.h" cfgoutputs_tup="${cfgoutputs_tup} test/test.sh:test/test.sh.in" cfgoutputs_tup="${cfgoutputs_tup} test/include/test/jemalloc_test.h:test/include/test/jemalloc_test.h.in" @@ -575,21 +556,28 @@ cfghdrs_in="${srcroot}include/jemalloc/jemalloc_defs.h.in" cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/internal/jemalloc_internal_defs.h.in" cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/internal/private_namespace.sh" cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/internal/private_unnamespace.sh" +cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/internal/private_symbols.txt" cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/internal/public_namespace.sh" cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/internal/public_unnamespace.sh" cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/internal/size_classes.sh" +cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/jemalloc_rename.sh" +cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/jemalloc_mangle.sh" cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/jemalloc.sh" cfghdrs_in="${cfghdrs_in} ${srcroot}test/include/test/jemalloc_test_defs.h.in" cfghdrs_out="include/jemalloc/jemalloc_defs.h" cfghdrs_out="${cfghdrs_out} include/jemalloc/jemalloc${install_suffix}.h" -cfghdrs_out="${cfghdrs_out} include/jemalloc/jemalloc_protos_jet.h" -cfghdrs_out="${cfghdrs_out} include/jemalloc/internal/jemalloc_internal_defs.h" cfghdrs_out="${cfghdrs_out} include/jemalloc/internal/private_namespace.h" cfghdrs_out="${cfghdrs_out} include/jemalloc/internal/private_unnamespace.h" +cfghdrs_out="${cfghdrs_out} include/jemalloc/internal/public_symbols.txt" cfghdrs_out="${cfghdrs_out} include/jemalloc/internal/public_namespace.h" cfghdrs_out="${cfghdrs_out} include/jemalloc/internal/public_unnamespace.h" cfghdrs_out="${cfghdrs_out} include/jemalloc/internal/size_classes.h" +cfghdrs_out="${cfghdrs_out} include/jemalloc/jemalloc_protos_jet.h" +cfghdrs_out="${cfghdrs_out} include/jemalloc/jemalloc_rename.h" +cfghdrs_out="${cfghdrs_out} include/jemalloc/jemalloc_mangle.h" +cfghdrs_out="${cfghdrs_out} include/jemalloc/jemalloc_mangle_jet.h" +cfghdrs_out="${cfghdrs_out} include/jemalloc/internal/jemalloc_internal_defs.h" cfghdrs_out="${cfghdrs_out} test/include/test/jemalloc_test_defs.h" cfghdrs_tup="include/jemalloc/jemalloc_defs.h:include/jemalloc/jemalloc_defs.h.in" @@ -1342,30 +1330,96 @@ dnl Define commands that generate output files. AC_CONFIG_COMMANDS([include/jemalloc/internal/private_namespace.h], [ mkdir -p "${objroot}include/jemalloc/internal" "${srcdir}/include/jemalloc/internal/private_namespace.sh" "${srcdir}/include/jemalloc/internal/private_symbols.txt" > "${objroot}include/jemalloc/internal/private_namespace.h" +], [ + srcdir="${srcdir}" + objroot="${objroot}" ]) AC_CONFIG_COMMANDS([include/jemalloc/internal/private_unnamespace.h], [ mkdir -p "${objroot}include/jemalloc/internal" "${srcdir}/include/jemalloc/internal/private_unnamespace.sh" "${srcdir}/include/jemalloc/internal/private_symbols.txt" > "${objroot}include/jemalloc/internal/private_unnamespace.h" +], [ + srcdir="${srcdir}" + objroot="${objroot}" +]) +AC_CONFIG_COMMANDS([include/jemalloc/internal/public_symbols.txt], [ + f="${objroot}include/jemalloc/internal/public_symbols.txt" + mkdir -p "${objroot}include/jemalloc/internal" + cp /dev/null "${f}" + for nm in `echo ${mangling_map} |tr ',' ' '` ; do + n=`echo ${nm} |tr ':' ' ' |awk '{print $[]1}'` + m=`echo ${nm} |tr ':' ' ' |awk '{print $[]2}'` + echo "${n}:${m}" >> "${f}" + dnl Remove name from public_syms so that it isn't redefined later. + public_syms=`for sym in ${public_syms}; do echo "${sym}"; done |grep -v "^${n}\$" |tr '\n' ' '` + done + for sym in ${public_syms} ; do + n="${sym}" + m="${JEMALLOC_PREFIX}${sym}" + echo "${n}:${m}" >> "${f}" + done +], [ + srcdir="${srcdir}" + objroot="${objroot}" + mangling_map="${mangling_map}" + public_syms="${public_syms}" + JEMALLOC_PREFIX="${JEMALLOC_PREFIX}" ]) AC_CONFIG_COMMANDS([include/jemalloc/internal/public_namespace.h], [ mkdir -p "${objroot}include/jemalloc/internal" - "${srcdir}/include/jemalloc/internal/public_namespace.sh" "${srcdir}/include/jemalloc/internal/public_symbols.txt" > "${objroot}include/jemalloc/internal/public_namespace.h" + "${srcdir}/include/jemalloc/internal/public_namespace.sh" "${objroot}include/jemalloc/internal/public_symbols.txt" > "${objroot}include/jemalloc/internal/public_namespace.h" +], [ + srcdir="${srcdir}" + objroot="${objroot}" ]) AC_CONFIG_COMMANDS([include/jemalloc/internal/public_unnamespace.h], [ mkdir -p "${objroot}include/jemalloc/internal" - "${srcdir}/include/jemalloc/internal/public_unnamespace.sh" "${srcdir}/include/jemalloc/internal/public_symbols.txt" > "${objroot}include/jemalloc/internal/public_unnamespace.h" + "${srcdir}/include/jemalloc/internal/public_unnamespace.sh" "${objroot}include/jemalloc/internal/public_symbols.txt" > "${objroot}include/jemalloc/internal/public_unnamespace.h" +], [ + srcdir="${srcdir}" + objroot="${objroot}" ]) AC_CONFIG_COMMANDS([include/jemalloc/internal/size_classes.h], [ mkdir -p "${objroot}include/jemalloc/internal" "${srcdir}/include/jemalloc/internal/size_classes.sh" > "${objroot}include/jemalloc/internal/size_classes.h" +], [ + srcdir="${srcdir}" + objroot="${objroot}" ]) AC_CONFIG_COMMANDS([include/jemalloc/jemalloc_protos_jet.h], [ mkdir -p "${objroot}include/jemalloc" cat "${srcdir}/include/jemalloc/jemalloc_protos.h.in" | sed -e 's/@je_@/jet_/g' > "${objroot}include/jemalloc/jemalloc_protos_jet.h" +], [ + srcdir="${srcdir}" + objroot="${objroot}" +]) +AC_CONFIG_COMMANDS([include/jemalloc/jemalloc_rename.h], [ + mkdir -p "${objroot}include/jemalloc" + "${srcdir}/include/jemalloc/jemalloc_rename.sh" "${objroot}include/jemalloc/internal/public_symbols.txt" > "${objroot}include/jemalloc/jemalloc_rename.h" +], [ + srcdir="${srcdir}" + objroot="${objroot}" +]) +AC_CONFIG_COMMANDS([include/jemalloc/jemalloc_mangle.h], [ + mkdir -p "${objroot}include/jemalloc" + "${srcdir}/include/jemalloc/jemalloc_mangle.sh" "${objroot}include/jemalloc/internal/public_symbols.txt" je_ > "${objroot}include/jemalloc/jemalloc_mangle.h" +], [ + srcdir="${srcdir}" + objroot="${objroot}" +]) +AC_CONFIG_COMMANDS([include/jemalloc/jemalloc_mangle_jet.h], [ + mkdir -p "${objroot}include/jemalloc" + "${srcdir}/include/jemalloc/jemalloc_mangle.sh" "${objroot}include/jemalloc/internal/public_symbols.txt" jet_ > "${objroot}include/jemalloc/jemalloc_mangle_jet.h" +], [ + srcdir="${srcdir}" + objroot="${objroot}" ]) AC_CONFIG_COMMANDS([include/jemalloc/jemalloc.h], [ mkdir -p "${objroot}include/jemalloc" "${srcdir}/include/jemalloc/jemalloc.sh" "${objroot}" > "${objroot}include/jemalloc/jemalloc${install_suffix}.h" +], [ + srcdir="${srcdir}" + objroot="${objroot}" + install_suffix="${install_suffix}" ]) dnl Process .in files. diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 7c4397fb..d24a1fe6 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -71,6 +71,7 @@ typedef intptr_t ssize_t; # include "jemalloc/internal/public_namespace.h" # define JEMALLOC_NO_RENAME # include "../jemalloc@install_suffix@.h" +# undef JEMALLOC_NO_RENAME #else # define JEMALLOC_N(n) @private_namespace@##n # include "../jemalloc@install_suffix@.h" diff --git a/include/jemalloc/internal/public_namespace.sh b/include/jemalloc/internal/public_namespace.sh index 6d6de619..362109f7 100755 --- a/include/jemalloc/internal/public_namespace.sh +++ b/include/jemalloc/internal/public_namespace.sh @@ -1,5 +1,6 @@ #!/bin/sh -for symbol in `cat $1` ; do - echo "#define je_${symbol} JEMALLOC_N(${symbol})" +for nm in `cat $1` ; do + n=`echo ${nm} |tr ':' ' ' |awk '{print $1}'` + echo "#define je_${n} JEMALLOC_N(${n})" done diff --git a/include/jemalloc/internal/public_symbols.txt b/include/jemalloc/internal/public_symbols.txt deleted file mode 100644 index e27c0e5b..00000000 --- a/include/jemalloc/internal/public_symbols.txt +++ /dev/null @@ -1,27 +0,0 @@ -malloc_conf -malloc_message -malloc -calloc -posix_memalign -aligned_alloc -realloc -free -mallocx -rallocx -xallocx -sallocx -dallocx -nallocx -mallctl -mallctlnametomib -mallctlbymib -malloc_stats_print -malloc_usable_size -memalign -valloc -allocm -rallocm -sallocm -dallocm -nallocm - diff --git a/include/jemalloc/internal/public_unnamespace.sh b/include/jemalloc/internal/public_unnamespace.sh index f42ff6e2..4239d177 100755 --- a/include/jemalloc/internal/public_unnamespace.sh +++ b/include/jemalloc/internal/public_unnamespace.sh @@ -1,5 +1,6 @@ #!/bin/sh -for symbol in `cat $1` ; do - echo "#undef je_${symbol}" +for nm in `cat $1` ; do + n=`echo ${nm} |tr ':' ' ' |awk '{print $1}'` + echo "#undef je_${n}" done diff --git a/include/jemalloc/jemalloc.sh b/include/jemalloc/jemalloc.sh index ad8376e7..e4738eba 100755 --- a/include/jemalloc/jemalloc.sh +++ b/include/jemalloc/jemalloc.sh @@ -11,8 +11,8 @@ extern "C" { EOF -for hdr in jemalloc_defs.h jemalloc_macros.h jemalloc_protos.h \ - jemalloc_mangle.h ; do +for hdr in jemalloc_defs.h jemalloc_rename.h jemalloc_macros.h \ + jemalloc_protos.h jemalloc_mangle.h ; do cat "${objroot}include/jemalloc/${hdr}" \ | grep -v 'Generated from .* by configure\.' \ | sed -e 's/^#define /#define /g' \ diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index 71e23af7..eb38d710 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -22,37 +22,3 @@ /* sizeof(void *) == 2^LG_SIZEOF_PTR. */ #undef LG_SIZEOF_PTR - -/* - * Name mangling for public symbols is controlled by --with-mangling and - * --with-jemalloc-prefix. With default settings the je_ prefix is stripped by - * these macro definitions. - */ -#ifndef JEMALLOC_NO_RENAME -# undef je_malloc_conf -# undef je_malloc_message -# undef je_malloc -# undef je_calloc -# undef je_posix_memalign -# undef je_aligned_alloc -# undef je_realloc -# undef je_free -# undef je_mallocx -# undef je_rallocx -# undef je_xallocx -# undef je_sallocx -# undef je_dallocx -# undef je_nallocx -# undef je_mallctl -# undef je_mallctlnametomib -# undef je_mallctlbymib -# undef je_malloc_stats_print -# undef je_malloc_usable_size -# undef je_memalign -# undef je_valloc -# undef je_allocm -# undef je_rallocm -# undef je_sallocm -# undef je_dallocm -# undef je_nallocm -#endif diff --git a/include/jemalloc/jemalloc_mangle.h.in b/include/jemalloc/jemalloc_mangle.h.in deleted file mode 100644 index 7018a752..00000000 --- a/include/jemalloc/jemalloc_mangle.h.in +++ /dev/null @@ -1,78 +0,0 @@ -/* - * By default application code must explicitly refer to mangled symbol names, - * so that it is possible to use jemalloc in conjunction with another allocator - * in the same application. Define JEMALLOC_MANGLE in order to cause automatic - * name mangling that matches the API prefixing that happened as a result of - * --with-mangling and/or --with-jemalloc-prefix configuration settings. - */ -#ifdef JEMALLOC_MANGLE -# ifndef JEMALLOC_NO_DEMANGLE -# define JEMALLOC_NO_DEMANGLE -# endif -# define malloc_conf je_malloc_conf -# define malloc_message je_malloc_message -# define malloc je_malloc -# define calloc je_calloc -# define posix_memalign je_posix_memalign -# define aligned_alloc je_aligned_alloc -# define realloc je_realloc -# define free je_free -# define mallocx je_mallocx -# define rallocx je_rallocx -# define xallocx je_xallocx -# define sallocx je_sallocx -# define dallocx je_dallocx -# define nallocx je_nallocx -# define mallctl je_mallctl -# define mallctlnametomib je_mallctlnametomib -# define mallctlbymib je_mallctlbymib -# define malloc_stats_print je_malloc_stats_print -# define malloc_usable_size je_malloc_usable_size -# define memalign je_memalign -# define valloc je_valloc -# ifdef JEMALLOC_EXPERIMENTAL -# define allocm je_allocm -# define rallocm je_rallocm -# define sallocm je_sallocm -# define dallocm je_dallocm -# define nallocm je_nallocm -# endif -#endif - -/* - * The je_* macros can be used as stable alternative names for the public - * jemalloc API if JEMALLOC_NO_DEMANGLE is defined. This is primarily meant - * for use in jemalloc itself, but it can be used by application code to - * provide isolation from the name mangling specified via --with-mangling - * and/or --with-jemalloc-prefix. - */ -#ifndef JEMALLOC_NO_DEMANGLE -# undef je_malloc_conf -# undef je_malloc_message -# undef je_malloc -# undef je_calloc -# undef je_posix_memalign -# undef je_aligned_alloc -# undef je_realloc -# undef je_free -# undef je_malloc_usable_size -# undef je_malloc_stats_print -# undef je_mallctl -# undef je_mallctlnametomib -# undef je_mallctlbymib -# undef je_memalign -# undef je_valloc -# undef je_mallocx -# undef je_rallocx -# undef je_xallocx -# undef je_sallocx -# undef je_dallocx -# undef je_nallocx -# ifdef JEMALLOC_EXPERIMENTAL -# undef je_allocm -# undef je_rallocm -# undef je_sallocm -# undef je_dallocm -# undef je_nallocm -# endif -#endif diff --git a/include/jemalloc/jemalloc_mangle.sh b/include/jemalloc/jemalloc_mangle.sh new file mode 100755 index 00000000..df328b78 --- /dev/null +++ b/include/jemalloc/jemalloc_mangle.sh @@ -0,0 +1,45 @@ +#!/bin/sh + +public_symbols_txt=$1 +symbol_prefix=$2 + +cat < Date: Thu, 16 Jan 2014 18:04:30 -0800 Subject: [PATCH 0378/3378] Fix a variable prototype/definition mismatch. --- include/jemalloc/internal/prof.h | 7 ++++++- src/prof.c | 3 +-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index 4a8073fc..566739bb 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -200,7 +200,12 @@ extern bool opt_prof_gdump; /* High-water memory dumping. */ extern bool opt_prof_final; /* Final profile dumping. */ extern bool opt_prof_leak; /* Dump leak summary at exit. */ extern bool opt_prof_accum; /* Report cumulative bytes. */ -extern char opt_prof_prefix[PATH_MAX + 1]; +extern char opt_prof_prefix[ + /* Minimize memory bloat for non-prof builds. */ +#ifdef JEMALLOC_PROF + PATH_MAX + +#endif + 1]; /* * Profile dump interval, measured in bytes allocated. Each arena triggers a diff --git a/src/prof.c b/src/prof.c index 45cb67fa..0d652124 100644 --- a/src/prof.c +++ b/src/prof.c @@ -29,8 +29,7 @@ char opt_prof_prefix[ #ifdef JEMALLOC_PROF PATH_MAX + #endif - 1 -]; + 1]; uint64_t prof_interval = 0; bool prof_promote; From 772163b4f3d8e9a12343e9215f6b070068507604 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 17 Jan 2014 15:40:52 -0800 Subject: [PATCH 0379/3378] Add heap profiling tests. Fix a regression in prof_dump_ctx() due to an uninitized variable. This was caused by revision 4f37ef693e3d5903ce07dc0b61c0da320b35e3d9, so no releases are affected. --- Makefile.in | 30 +++-- include/jemalloc/internal/private_symbols.txt | 2 + include/jemalloc/internal/prof.h | 5 + src/prof.c | 52 ++++++-- test/unit/prof_accum.c | 116 ++++++++++++++++++ test/unit/prof_gdump.c | 56 +++++++++ test/unit/prof_idump.c | 51 ++++++++ 7 files changed, 290 insertions(+), 22 deletions(-) create mode 100644 test/unit/prof_accum.c create mode 100644 test/unit/prof_gdump.c create mode 100644 test/unit/prof_idump.c diff --git a/Makefile.in b/Makefile.in index 38ffbf4c..67c4d5d8 100644 --- a/Makefile.in +++ b/Makefile.in @@ -107,15 +107,27 @@ C_TESTLIB_SRCS := $(srcroot)test/src/math.c $(srcroot)test/src/mtx.c \ $(srcroot)test/src/SFMT.c $(srcroot)test/src/test.c \ $(srcroot)test/src/thd.c C_UTIL_INTEGRATION_SRCS := $(srcroot)src/util.c -TESTS_UNIT := $(srcroot)test/unit/bitmap.c $(srcroot)test/unit/ckh.c \ - $(srcroot)test/unit/hash.c $(srcroot)test/unit/junk.c \ - $(srcroot)test/unit/mallctl.c $(srcroot)test/unit/math.c \ - $(srcroot)test/unit/mq.c $(srcroot)test/unit/mtx.c \ - $(srcroot)test/unit/ql.c $(srcroot)test/unit/qr.c \ - $(srcroot)test/unit/quarantine.c $(srcroot)test/unit/rb.c \ - $(srcroot)test/unit/rtree.c $(srcroot)test/unit/SFMT.c \ - $(srcroot)test/unit/stats.c $(srcroot)test/unit/tsd.c \ - $(srcroot)test/unit/util.c $(srcroot)test/unit/zero.c +TESTS_UNIT := $(srcroot)test/unit/bitmap.c \ + $(srcroot)test/unit/ckh.c \ + $(srcroot)test/unit/hash.c \ + $(srcroot)test/unit/junk.c \ + $(srcroot)test/unit/mallctl.c \ + $(srcroot)test/unit/math.c \ + $(srcroot)test/unit/mq.c \ + $(srcroot)test/unit/mtx.c \ + $(srcroot)test/unit/prof_accum.c \ + $(srcroot)test/unit/prof_gdump.c \ + $(srcroot)test/unit/prof_idump.c \ + $(srcroot)test/unit/ql.c \ + $(srcroot)test/unit/qr.c \ + $(srcroot)test/unit/quarantine.c \ + $(srcroot)test/unit/rb.c \ + $(srcroot)test/unit/rtree.c \ + $(srcroot)test/unit/SFMT.c \ + $(srcroot)test/unit/stats.c \ + $(srcroot)test/unit/tsd.c \ + $(srcroot)test/unit/util.c \ + $(srcroot)test/unit/zero.c TESTS_INTEGRATION := $(srcroot)test/integration/aligned_alloc.c \ $(srcroot)test/integration/allocated.c \ $(srcroot)test/integration/mallocx.c \ diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index 6cc811dd..1e64ed57 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -288,8 +288,10 @@ prof_backtrace prof_boot0 prof_boot1 prof_boot2 +prof_bt_count prof_ctx_get prof_ctx_set +prof_dump_open prof_free prof_gdump prof_idump diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index 566739bb..db056fc4 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -225,6 +225,11 @@ extern bool prof_promote; void bt_init(prof_bt_t *bt, void **vec); void prof_backtrace(prof_bt_t *bt, unsigned nignore); prof_thr_cnt_t *prof_lookup(prof_bt_t *bt); +#ifdef JEMALLOC_JET +size_t prof_bt_count(void); +typedef int (prof_dump_open_t)(bool, const char *); +extern prof_dump_open_t *prof_dump_open; +#endif void prof_idump(void); bool prof_mdump(const char *filename); void prof_gdump(void); diff --git a/src/prof.c b/src/prof.c index 0d652124..1d8ccbd6 100644 --- a/src/prof.c +++ b/src/prof.c @@ -646,24 +646,49 @@ prof_lookup(prof_bt_t *bt) return (ret.p); } -static bool +#ifdef JEMALLOC_JET +size_t +prof_bt_count(void) +{ + size_t bt_count; + prof_tdata_t *prof_tdata; + + prof_tdata = prof_tdata_get(false); + if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) + return (0); + + prof_enter(prof_tdata); + bt_count = ckh_count(&bt2ctx); + prof_leave(prof_tdata); + + return (bt_count); +} +#endif + +#ifdef JEMALLOC_JET +#undef prof_dump_open +#define prof_dump_open JEMALLOC_N(prof_dump_open_impl) +#endif +static int prof_dump_open(bool propagate_err, const char *filename) { + int fd; - prof_dump_fd = creat(filename, 0644); - if (prof_dump_fd == -1) { - if (propagate_err == false) { - malloc_printf( - ": creat(\"%s\"), 0644) failed\n", - filename); - if (opt_abort) - abort(); - } - return (true); + fd = creat(filename, 0644); + if (fd == -1 && propagate_err == false) { + malloc_printf(": creat(\"%s\"), 0644) failed\n", + filename); + if (opt_abort) + abort(); } - return (false); + return (fd); } +#ifdef JEMALLOC_JET +#undef prof_dump_open +#define prof_dump_open JEMALLOC_N(prof_dump_open) +prof_dump_open_t *prof_dump_open = JEMALLOC_N(prof_dump_open_impl); +#endif static bool prof_dump_flush(bool propagate_err) @@ -895,6 +920,7 @@ prof_dump_ctx(bool propagate_err, prof_ctx_t *ctx, const prof_bt_t *bt, goto label_return; } + ret = false; label_return: prof_dump_ctx_cleanup_locked(ctx, ctx_ql); malloc_mutex_unlock(ctx->lock); @@ -995,7 +1021,7 @@ prof_dump(bool propagate_err, const char *filename, bool leakcheck) prof_leave(prof_tdata); /* Create dump file. */ - if (prof_dump_open(propagate_err, filename)) + if ((prof_dump_fd = prof_dump_open(propagate_err, filename)) == -1) goto label_open_close_error; /* Dump profile header. */ diff --git a/test/unit/prof_accum.c b/test/unit/prof_accum.c new file mode 100644 index 00000000..b5f1c8d6 --- /dev/null +++ b/test/unit/prof_accum.c @@ -0,0 +1,116 @@ +#include "test/jemalloc_test.h" + +#define NTHREADS 4 +#define NALLOCS_PER_THREAD 50 +#define DUMP_INTERVAL 1 +#define BT_COUNT_CHECK_INTERVAL 5 + +#ifdef JEMALLOC_PROF +const char *malloc_conf = + "prof:true,prof_accum:true,prof_active:false,lg_prof_sample:0"; +#endif + +static int +prof_dump_open_intercept(bool propagate_err, const char *filename) +{ + int fd; + + fd = open("/dev/null", O_WRONLY); + assert_d_ne(fd, -1, "Unexpected open() failure"); + + return (fd); +} + +#define alloc_n_proto(n) \ +static void *alloc_##n(unsigned bits); + +#define alloc_n_gen(n) \ +static void * \ +alloc_##n(unsigned bits) \ +{ \ + \ + if (bits == 0) { \ + void *p = mallocx(1, 0); \ + assert_ptr_not_null(p, "Unexpected mallocx() failure"); \ + return (p); \ + } \ + \ + switch (bits & 0x1U) { \ + case 0: return (alloc_0(bits >> 1)); \ + case 1: return (alloc_1(bits >> 1)); \ + default: not_reached(); \ + } \ +} +alloc_n_proto(0) +alloc_n_proto(1) +alloc_n_gen(0) +alloc_n_gen(1) + +static void * +alloc_from_permuted_backtrace(unsigned thd_ind, unsigned iteration) +{ + + return (alloc_0(thd_ind*NALLOCS_PER_THREAD + iteration)); +} + +static void * +thd_start(void *varg) +{ + unsigned thd_ind = *(unsigned *)varg; + size_t bt_count_prev, bt_count; + unsigned i_prev, i; + + i_prev = 0; + bt_count_prev = 0; + for (i = 0; i < NALLOCS_PER_THREAD; i++) { + void *p = alloc_from_permuted_backtrace(thd_ind, i); + dallocx(p, 0); + if (i % DUMP_INTERVAL == 0) { + assert_d_eq(mallctl("prof.dump", NULL, NULL, NULL, 0), + 0, "Unexpected error while dumping heap profile"); + } + + if (i % BT_COUNT_CHECK_INTERVAL == 0 || + i+1 == NALLOCS_PER_THREAD) { + bt_count = prof_bt_count(); + assert_zu_le(bt_count_prev+(i-i_prev), bt_count, + "Expected larger bactrace count increase"); + i_prev = i; + bt_count_prev = bt_count; + } + } + + return (NULL); +} + +TEST_BEGIN(test_idump) +{ + bool active; + thd_t thds[NTHREADS]; + unsigned thd_args[NTHREADS]; + unsigned i; + + test_skip_if(!config_prof); + + active = true; + assert_d_eq(mallctl("prof.active", NULL, NULL, &active, sizeof(active)), + 0, "Unexpected mallctl failure while activating profiling"); + + prof_dump_open = prof_dump_open_intercept; + + for (i = 0; i < NTHREADS; i++) { + thd_args[i] = i; + thd_create(&thds[i], thd_start, (void *)&thd_args[i]); + } + for (i = 0; i < NTHREADS; i++) + thd_join(thds[i], NULL); +} +TEST_END + +int +main(void) +{ + + return (test( + test_idump)); +} diff --git a/test/unit/prof_gdump.c b/test/unit/prof_gdump.c new file mode 100644 index 00000000..a00b1054 --- /dev/null +++ b/test/unit/prof_gdump.c @@ -0,0 +1,56 @@ +#include "test/jemalloc_test.h" + +#ifdef JEMALLOC_PROF +const char *malloc_conf = "prof:true,prof_active:false,prof_gdump:true"; +#endif + +static bool did_prof_dump_open; + +static int +prof_dump_open_intercept(bool propagate_err, const char *filename) +{ + int fd; + + did_prof_dump_open = true; + + fd = open("/dev/null", O_WRONLY); + assert_d_ne(fd, -1, "Unexpected open() failure"); + + return (fd); +} + +TEST_BEGIN(test_gdump) +{ + bool active; + void *p, *q; + + test_skip_if(!config_prof); + + active = true; + assert_d_eq(mallctl("prof.active", NULL, NULL, &active, sizeof(active)), + 0, "Unexpected mallctl failure while activating profiling"); + + prof_dump_open = prof_dump_open_intercept; + + did_prof_dump_open = false; + p = mallocx(chunksize, 0); + assert_ptr_not_null(p, "Unexpected mallocx() failure"); + assert_true(did_prof_dump_open, "Expected a profile dump"); + + did_prof_dump_open = false; + q = mallocx(chunksize, 0); + assert_ptr_not_null(q, "Unexpected mallocx() failure"); + assert_true(did_prof_dump_open, "Expected a profile dump"); + + dallocx(p, 0); + dallocx(q, 0); +} +TEST_END + +int +main(void) +{ + + return (test( + test_gdump)); +} diff --git a/test/unit/prof_idump.c b/test/unit/prof_idump.c new file mode 100644 index 00000000..bdea53ec --- /dev/null +++ b/test/unit/prof_idump.c @@ -0,0 +1,51 @@ +#include "test/jemalloc_test.h" + +#ifdef JEMALLOC_PROF +const char *malloc_conf = + "prof:true,prof_accum:true,prof_active:false,lg_prof_sample:0," + "lg_prof_interval:0"; +#endif + +static bool did_prof_dump_open; + +static int +prof_dump_open_intercept(bool propagate_err, const char *filename) +{ + int fd; + + did_prof_dump_open = true; + + fd = open("/dev/null", O_WRONLY); + assert_d_ne(fd, -1, "Unexpected open() failure"); + + return (fd); +} + +TEST_BEGIN(test_idump) +{ + bool active; + void *p; + + test_skip_if(!config_prof); + + active = true; + assert_d_eq(mallctl("prof.active", NULL, NULL, &active, sizeof(active)), + 0, "Unexpected mallctl failure while activating profiling"); + + prof_dump_open = prof_dump_open_intercept; + + did_prof_dump_open = false; + p = mallocx(1, 0); + assert_ptr_not_null(p, "Unexpected mallocx() failure"); + dallocx(p, 0); + assert_true(did_prof_dump_open, "Expected a profile dump"); +} +TEST_END + +int +main(void) +{ + + return (test( + test_idump)); +} From 264dfd35d05fd7304cd3f023aa7497cf4efc0b32 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 17 Jan 2014 17:01:23 -0800 Subject: [PATCH 0380/3378] Update ChangeLog. --- ChangeLog | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/ChangeLog b/ChangeLog index 34d017e9..3f7ae632 100644 --- a/ChangeLog +++ b/ChangeLog @@ -8,6 +8,18 @@ found in the git revision history: * 3.5.0 (XXX) + This version focuses on refactoring and automated testing, though it also + includes some non-trivial heap profiling optimizations not mentioned below. + + New features: + - Add the *allocx() API, which is a successor to the experimental *allocm() + API. The *allocx() functions are slightly simpler to use because they have + fewer parameters, they directly return the results of primary interest, and + mallocx()/rallocx() avoid the strict aliasing pitfall that + allocm()/rallocx() share with posix_memalign(). Note that *allocm() is + slated for removal in the next non-bugfix release. + - Add support for LinuxThreads. + Bug fixes: - Unless heap profiling is enabled, disable floating point code and don't link with libm. This, in combination with e.g. EXTRA_CFLAGS=-mno-sse on x64 @@ -16,10 +28,36 @@ found in the git revision history: point registers during dynamic lazy symbol loading, and the symbol loading code uses whatever malloc the application happens to have linked/loaded with, the result being potential floating point register corruption. + - Report ENOMEM rather than EINVAL if an OOM occurs during heap profiling + backtrace creation in imemalign(). This bug impacted posix_memalign() and + aligned_alloc(). + - Fix a file descriptor leak in a prof_dump_maps() error path. + - Fix prof_dump() to close the dump file descriptor for all relevant error + paths. + - Fix rallocm() to use the arena specified by the ALLOCM_ARENA(s) flag for + allocation, not just deallocation. + - Fix a data race for large allocation stats counters. + - Fix a potential infinite loop during thread exit. This bug occurred on + Solaris, and could affect other platforms with similar pthreads TSD + implementations. + - Don't junk-fill reallocations unless usable size changes. This fixes a + violation of the *allocx()/*allocm() semantics. + - Fix growing large reallocation to junk fill new space. + - Fix huge deallocation to junk fill when munmap is disabled. - Change the default private namespace prefix from empty to je_, and change --with-private-namespace-prefix so that it prepends an additional prefix rather than replacing je_. This reduces the likelihood of applications which statically link jemalloc experiencing symbol name collisions. + - Add missing private namespace mangling (relevant when + --with-private-namespace is specified). + - Add and use JEMALLOC_INLINE_C so that static inline functions are marked as + static even for debug builds. + - Add a missing mutex unlock in a malloc_init_hard() error path. In practice + this error path is never executed. + - Fix numerous bugs in malloc_strotumax() error handling/reporting. These + bugs had no impact except for malformed inputs. + - Fix numerous bugs in malloc_snprintf(). These bugs were not exercised by + existing calls, so they had no impact. * 3.4.1 (October 20, 2013) From 7d8fea987182fe2ee17eb5c1c6f3ae620f82beb0 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 21 Jan 2014 14:20:29 -0800 Subject: [PATCH 0381/3378] Avoid lazy-lock in a tcache-dependent test. --- test/unit/stats.c | 48 +++++++++++++++++++++++++++++++++-------------- 1 file changed, 34 insertions(+), 14 deletions(-) diff --git a/test/unit/stats.c b/test/unit/stats.c index 6ebe369f..6cd97730 100644 --- a/test/unit/stats.c +++ b/test/unit/stats.c @@ -123,7 +123,8 @@ TEST_BEGIN(test_stats_arenas_summary) expected, "Unexepected mallctl() result"); if (config_stats) { - assert_u64_gt(npurge, 0, "At least one purge occurred"); + assert_u64_gt(npurge, 0, + "At least one purge should have occurred"); assert_u64_le(nmadvise, purged, "nmadvise should be no greater than purged"); } @@ -133,6 +134,22 @@ TEST_BEGIN(test_stats_arenas_summary) } TEST_END +void * +thd_start(void *arg) +{ + + return (NULL); +} + +static void +no_lazy_lock(void) +{ + thd_t thd; + + thd_create(&thd, thd_start, NULL); + thd_join(thd, NULL); +} + TEST_BEGIN(test_stats_arenas_small) { unsigned arena; @@ -141,6 +158,8 @@ TEST_BEGIN(test_stats_arenas_small) uint64_t epoch, nmalloc, ndalloc, nrequests; int expected = config_stats ? 0 : ENOENT; + no_lazy_lock(); /* Lazy locking would dodge tcache testing. */ + arena = 0; assert_d_eq(mallctl("thread.arena", NULL, NULL, &arena, sizeof(arena)), 0, "Unexpected mallctl() failure"); @@ -211,13 +230,13 @@ TEST_BEGIN(test_stats_arenas_large) if (config_stats) { assert_zu_gt(allocated, 0, - "allocated should be greated than zero"); + "allocated should be greater than zero"); assert_zu_gt(nmalloc, 0, - "nmalloc should be no greater than zero"); + "nmalloc should be greater than zero"); assert_zu_ge(nmalloc, ndalloc, "nmalloc should be at least as large as ndalloc"); assert_zu_gt(nrequests, 0, - "nrequests should be no greater than zero"); + "nrequests should be greater than zero"); } dallocx(p, 0); @@ -273,22 +292,23 @@ TEST_BEGIN(test_stats_arenas_bins) if (config_stats) { assert_zu_gt(allocated, 0, - "allocated should be greated than zero"); + "allocated should be greater than zero"); assert_u64_gt(nmalloc, 0, - "nmalloc should be no greater than zero"); + "nmalloc should be greater than zero"); assert_u64_ge(nmalloc, ndalloc, "nmalloc should be at least as large as ndalloc"); assert_u64_gt(nrequests, 0, - "nrequests should be no greater than zero"); + "nrequests should be greater than zero"); if (config_tcache) { assert_u64_gt(nfills, 0, - "At least one fill has occurred"); + "At least one fill should have occurred"); assert_u64_gt(nflushes, 0, - "At least one flush has occurred"); + "At least one flush should have occurred"); } - assert_u64_gt(nruns, 0, "At least one run has been allocated"); + assert_u64_gt(nruns, 0, + "At least one run should have been allocated"); assert_u64_gt(curruns, 0, - "At least one run is currently allocated"); + "At least one run should be currently allocated"); } dallocx(p, 0); @@ -324,13 +344,13 @@ TEST_BEGIN(test_stats_arenas_lruns) if (config_stats) { assert_u64_gt(nmalloc, 0, - "nmalloc should be no greater than zero"); + "nmalloc should be greater than zero"); assert_u64_ge(nmalloc, ndalloc, "nmalloc should be at least as large as ndalloc"); assert_u64_gt(nrequests, 0, - "nrequests should be no greater than zero"); + "nrequests should be greater than zero"); assert_u64_gt(curruns, 0, - "At least one run is currently allocated"); + "At least one run should be currently allocated"); } dallocx(p, 0); From e2206edebcce662e3979c68ba182d0bcf38141ba Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 21 Jan 2014 14:59:13 -0800 Subject: [PATCH 0382/3378] Fix unused variable warnings. --- src/jemalloc.c | 6 ++---- test/unit/rb.c | 4 +--- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/src/jemalloc.c b/src/jemalloc.c index 9fc9b8d1..563d99f8 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1696,11 +1696,9 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) assert(malloc_initialized || IS_INITIALIZER); malloc_thread_init(); - if (arena_ind != UINT_MAX) { - arena_chunk_t *chunk; - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + if (arena_ind != UINT_MAX) arena = arenas[arena_ind]; - } else + else arena = NULL; old_usize = isalloc(ptr, config_prof); diff --git a/test/unit/rb.c b/test/unit/rb.c index d79ec527..b737485a 100644 --- a/test/unit/rb.c +++ b/test/unit/rb.c @@ -79,11 +79,9 @@ tree_recurse(node_t *node, unsigned black_height, unsigned black_depth, /* Red nodes must be interleaved with black nodes. */ if (rbtn_red_get(node_t, link, node)) { - node_t *t_node = rbtn_left_get(node_t, link, left_node); assert_false(rbtn_red_get(node_t, link, left_node), "Node should be black"); - t_node = rbtn_right_get(node_t, link, left_node); - assert_false(rbtn_red_get(node_t, link, left_node), + assert_false(rbtn_red_get(node_t, link, right_node), "Node should be black"); } From 898960247a8b2e6534738b7a3a244855f379faf9 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 21 Jan 2014 14:59:40 -0800 Subject: [PATCH 0383/3378] Subvert tail call optimization in backtrace test. Re-structure alloc_[01](), which are mutually tail-recursive functions, to do (unnecessary) work post-recursion so that the compiler cannot perform tail call optimization, thus preserving intentionally unique call paths in captured backtraces. --- test/unit/prof_accum.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/test/unit/prof_accum.c b/test/unit/prof_accum.c index b5f1c8d6..cf3f287d 100644 --- a/test/unit/prof_accum.c +++ b/test/unit/prof_accum.c @@ -28,18 +28,24 @@ static void *alloc_##n(unsigned bits); static void * \ alloc_##n(unsigned bits) \ { \ + void *p; \ \ - if (bits == 0) { \ - void *p = mallocx(1, 0); \ - assert_ptr_not_null(p, "Unexpected mallocx() failure"); \ - return (p); \ - } \ - \ - switch (bits & 0x1U) { \ - case 0: return (alloc_0(bits >> 1)); \ - case 1: return (alloc_1(bits >> 1)); \ - default: not_reached(); \ + if (bits == 0) \ + p = mallocx(1, 0); \ + else { \ + switch (bits & 0x1U) { \ + case 0: \ + p = alloc_0(bits >> 1); \ + break; \ + case 1: \ + p = alloc_1(bits >> 1); \ + break; \ + default: not_reached(); \ + } \ } \ + /* Intentionally sabotage tail call optimization. */ \ + assert_ptr_not_null(p, "Unexpected mallocx() failure"); \ + return (p); \ } alloc_n_proto(0) alloc_n_proto(1) @@ -74,7 +80,7 @@ thd_start(void *varg) i+1 == NALLOCS_PER_THREAD) { bt_count = prof_bt_count(); assert_zu_le(bt_count_prev+(i-i_prev), bt_count, - "Expected larger bactrace count increase"); + "Expected larger backtrace count increase"); i_prev = i; bt_count_prev = bt_count; } From 0dec3507c62efac909e204217bf9b6c8c59d10ac Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 21 Jan 2014 20:48:56 -0800 Subject: [PATCH 0384/3378] Remove __FBSDID from rb.h. --- include/jemalloc/internal/rb.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/include/jemalloc/internal/rb.h b/include/jemalloc/internal/rb.h index 7b675f09..423802eb 100644 --- a/include/jemalloc/internal/rb.h +++ b/include/jemalloc/internal/rb.h @@ -22,10 +22,6 @@ #ifndef RB_H_ #define RB_H_ -#if 0 -__FBSDID("$FreeBSD: head/lib/libc/stdlib/rb.h 204493 2010-02-28 22:57:13Z jasone $"); -#endif - #ifdef RB_COMPACT /* Node structure. */ #define rb_node(a_type) \ From 0c4e743eaf2ab6bcfd56e002fb82f95801b9292e Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 22 Jan 2014 09:00:27 -0800 Subject: [PATCH 0385/3378] Test and fix malloc_printf("%%"). --- src/util.c | 13 ++++++------- test/unit/util.c | 2 ++ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/util.c b/src/util.c index 20062553..93a19fd1 100644 --- a/src/util.c +++ b/src/util.c @@ -400,11 +400,6 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) unsigned char len = '?'; f++; - if (*f == '%') { - /* %% */ - APPEND_C(*f); - break; - } /* Flags. */ while (true) { switch (*f) { @@ -495,6 +490,11 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) switch (*f) { char *s; size_t slen; + case '%': + /* %% */ + APPEND_C(*f); + f++; + break; case 'd': case 'i': { intmax_t val JEMALLOC_CC_SILENCE_INIT(0); char buf[D2S_BUFSIZE]; @@ -561,8 +561,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) APPEND_PADDED_S(s, slen, width, left_justify); f++; break; - } - default: not_reached(); + } default: not_reached(); } break; } default: { diff --git a/test/unit/util.c b/test/unit/util.c index 4b4d19da..dc3cfe8a 100644 --- a/test/unit/util.c +++ b/test/unit/util.c @@ -181,6 +181,8 @@ TEST_BEGIN(test_malloc_snprintf) TEST("hello", "hello"); + TEST("50%, 100%", "50%%, %d%%", 100); + TEST("a0123b", "a%sb", "0123"); TEST("a 0123b", "a%5sb", "0123"); From 56455f7ebc72454e9e37a9fdba46a264711c08aa Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 22 Jan 2014 11:09:04 -0800 Subject: [PATCH 0386/3378] Update copyrights. --- COPYING | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/COPYING b/COPYING index 019e8132..bdda0feb 100644 --- a/COPYING +++ b/COPYING @@ -1,10 +1,10 @@ Unless otherwise specified, files in the jemalloc source distribution are subject to the following license: -------------------------------------------------------------------------------- -Copyright (C) 2002-2013 Jason Evans . +Copyright (C) 2002-2014 Jason Evans . All rights reserved. Copyright (C) 2007-2012 Mozilla Foundation. All rights reserved. -Copyright (C) 2009-2013 Facebook, Inc. All rights reserved. +Copyright (C) 2009-2014 Facebook, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: From 798a48103014aabf8afb3d7efff90399a466dd8c Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 22 Jan 2014 11:09:50 -0800 Subject: [PATCH 0387/3378] Update ChangeLog for 3.5.0. --- ChangeLog | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 3f7ae632..c5e4198d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -6,7 +6,7 @@ found in the git revision history: http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git git://canonware.com/jemalloc.git -* 3.5.0 (XXX) +* 3.5.0 (January 22, 2014) This version focuses on refactoring and automated testing, though it also includes some non-trivial heap profiling optimizations not mentioned below. From 9c8baec0a310f7bbd00c7dc76cfbf663dd9c14ce Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 22 Jan 2014 13:08:47 -0800 Subject: [PATCH 0388/3378] Fix a typo. --- ChangeLog | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index c5e4198d..5c2b1eec 100644 --- a/ChangeLog +++ b/ChangeLog @@ -16,7 +16,7 @@ found in the git revision history: API. The *allocx() functions are slightly simpler to use because they have fewer parameters, they directly return the results of primary interest, and mallocx()/rallocx() avoid the strict aliasing pitfall that - allocm()/rallocx() share with posix_memalign(). Note that *allocm() is + allocm()/rallocm() share with posix_memalign(). Note that *allocm() is slated for removal in the next non-bugfix release. - Add support for LinuxThreads. From 2b51a3e9e9bfebf081d25dfa92f3cd89e4a8ed73 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 28 Jan 2014 17:22:06 -0800 Subject: [PATCH 0389/3378] Fix mallctl argument size mismatches (size_t vs. uint64_t). MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reported by İsmail Dönmez. --- test/unit/stats.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/test/unit/stats.c b/test/unit/stats.c index 6cd97730..03a55c7f 100644 --- a/test/unit/stats.c +++ b/test/unit/stats.c @@ -31,21 +31,25 @@ TEST_END TEST_BEGIN(test_stats_chunks) { - size_t current, total, high; - size_t sz = sizeof(size_t); + size_t current, high; + uint64_t total; + size_t sz; int expected = config_stats ? 0 : ENOENT; + sz = sizeof(size_t); assert_d_eq(mallctl("stats.chunks.current", ¤t, &sz, NULL, 0), expected, "Unexpected mallctl() result"); + sz = sizeof(uint64_t); assert_d_eq(mallctl("stats.chunks.total", &total, &sz, NULL, 0), expected, "Unexpected mallctl() result"); + sz = sizeof(size_t); assert_d_eq(mallctl("stats.chunks.high", &high, &sz, NULL, 0), expected, "Unexpected mallctl() result"); if (config_stats) { assert_zu_le(current, high, "current should be no larger than high"); - assert_zu_le(high, total, + assert_u64_le((uint64_t)high, total, "high should be no larger than total"); } } @@ -247,9 +251,9 @@ TEST_BEGIN(test_stats_arenas_bins) { unsigned arena; void *p; - size_t sz, allocated; + size_t sz, allocated, curruns; uint64_t epoch, nmalloc, ndalloc, nrequests, nfills, nflushes; - uint64_t nruns, nreruns, curruns; + uint64_t nruns, nreruns; int expected = config_stats ? 0 : ENOENT; arena = 0; @@ -287,6 +291,7 @@ TEST_BEGIN(test_stats_arenas_bins) NULL, 0), expected, "Unexpected mallctl() result"); assert_d_eq(mallctl("stats.arenas.0.bins.0.nreruns", &nreruns, &sz, NULL, 0), expected, "Unexpected mallctl() result"); + sz = sizeof(size_t); assert_d_eq(mallctl("stats.arenas.0.bins.0.curruns", &curruns, &sz, NULL, 0), expected, "Unexpected mallctl() result"); @@ -307,7 +312,7 @@ TEST_BEGIN(test_stats_arenas_bins) } assert_u64_gt(nruns, 0, "At least one run should have been allocated"); - assert_u64_gt(curruns, 0, + assert_zu_gt(curruns, 0, "At least one run should be currently allocated"); } @@ -319,8 +324,8 @@ TEST_BEGIN(test_stats_arenas_lruns) { unsigned arena; void *p; - uint64_t epoch, nmalloc, ndalloc, nrequests, curruns; - size_t sz = sizeof(uint64_t); + uint64_t epoch, nmalloc, ndalloc, nrequests; + size_t curruns, sz; int expected = config_stats ? 0 : ENOENT; arena = 0; @@ -333,12 +338,14 @@ TEST_BEGIN(test_stats_arenas_lruns) assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)), 0, "Unexpected mallctl() failure"); + sz = sizeof(uint64_t); assert_d_eq(mallctl("stats.arenas.0.lruns.0.nmalloc", &nmalloc, &sz, NULL, 0), expected, "Unexpected mallctl() result"); assert_d_eq(mallctl("stats.arenas.0.lruns.0.ndalloc", &ndalloc, &sz, NULL, 0), expected, "Unexpected mallctl() result"); assert_d_eq(mallctl("stats.arenas.0.lruns.0.nrequests", &nrequests, &sz, NULL, 0), expected, "Unexpected mallctl() result"); + sz = sizeof(size_t); assert_d_eq(mallctl("stats.arenas.0.lruns.0.curruns", &curruns, &sz, NULL, 0), expected, "Unexpected mallctl() result"); From a184d3fcdecfaaf694029fb375d023882aea444e Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 28 Jan 2014 18:02:07 -0800 Subject: [PATCH 0390/3378] Fix/remove flawed alignment-related overflow tests. Fix/remove three related flawed tests that attempted to cause OOM due to large request size and alignment constraint. Although these tests "passed" on 64-bit systems due to the virtual memory hole, they could pass on some 32-bit systems. --- test/integration/aligned_alloc.c | 4 ++-- test/integration/mallocx.c | 21 --------------------- test/integration/posix_memalign.c | 4 ++-- 3 files changed, 4 insertions(+), 25 deletions(-) diff --git a/test/integration/aligned_alloc.c b/test/integration/aligned_alloc.c index 17c2151c..60900148 100644 --- a/test/integration/aligned_alloc.c +++ b/test/integration/aligned_alloc.c @@ -47,10 +47,10 @@ TEST_BEGIN(test_oom_errors) #if LG_SIZEOF_PTR == 3 alignment = UINT64_C(0x4000000000000000); - size = UINT64_C(0x8400000000000001); + size = UINT64_C(0xc000000000000001); #else alignment = 0x40000000LU; - size = 0x84000001LU; + size = 0xc0000001LU; #endif set_errno(0); p = aligned_alloc(alignment, size); diff --git a/test/integration/mallocx.c b/test/integration/mallocx.c index c26f6c56..f37a74bc 100644 --- a/test/integration/mallocx.c +++ b/test/integration/mallocx.c @@ -34,26 +34,6 @@ TEST_BEGIN(test_basic) } TEST_END -TEST_BEGIN(test_alignment_errors) -{ - void *p; - size_t nsz, sz, alignment; - -#if LG_SIZEOF_PTR == 3 - alignment = UINT64_C(0x4000000000000000); - sz = UINT64_C(0x8400000000000001); -#else - alignment = 0x40000000LU; - sz = 0x84000001LU; -#endif - nsz = nallocx(sz, MALLOCX_ALIGN(alignment)); - assert_zu_ne(nsz, 0, "Unexpected nallocx() error"); - p = mallocx(sz, MALLOCX_ALIGN(alignment)); - assert_ptr_null(p, "Expected error for mallocx(%zu, %#x)", sz, - MALLOCX_ALIGN(alignment)); -} -TEST_END - TEST_BEGIN(test_alignment_and_size) { size_t nsz, rsz, sz, alignment, total; @@ -114,6 +94,5 @@ main(void) return (test( test_basic, - test_alignment_errors, test_alignment_and_size)); } diff --git a/test/integration/posix_memalign.c b/test/integration/posix_memalign.c index c88a4dcb..19741c6c 100644 --- a/test/integration/posix_memalign.c +++ b/test/integration/posix_memalign.c @@ -43,10 +43,10 @@ TEST_BEGIN(test_oom_errors) #if LG_SIZEOF_PTR == 3 alignment = UINT64_C(0x4000000000000000); - size = UINT64_C(0x8400000000000001); + size = UINT64_C(0xc000000000000001); #else alignment = 0x40000000LU; - size = 0x84000001LU; + size = 0xc0000001LU; #endif assert_d_ne(posix_memalign(&p, alignment, size), 0, "Expected error for posix_memalign(&p, %zu, %zu)", From 5f60afa01eb2cf7d44024d162a1ecc6cceedcca1 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 28 Jan 2014 23:04:02 -0800 Subject: [PATCH 0391/3378] Avoid a compiler warning. Avoid copying "jeprof" to a 1-byte buffer within prof_boot0() when heap profiling is disabled. Although this is dead code under such conditions, the compiler doesn't figure that part out. Reported by Eduardo Silva. --- include/jemalloc/internal/prof.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index db056fc4..6f162d21 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -8,7 +8,11 @@ typedef struct prof_ctx_s prof_ctx_t; typedef struct prof_tdata_s prof_tdata_t; /* Option defaults. */ -#define PROF_PREFIX_DEFAULT "jeprof" +#ifdef JEMALLOC_PROF +# define PROF_PREFIX_DEFAULT "jeprof" +#else +# define PROF_PREFIX_DEFAULT "" +#endif #define LG_PROF_SAMPLE_DEFAULT 19 #define LG_PROF_INTERVAL_DEFAULT -1 From 2850e90d0d42d0e2b54864949bfa41c59c3a8dc9 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 29 Jan 2014 10:54:18 -0800 Subject: [PATCH 0392/3378] Remove flawed alignment-related overflow test. Remove the allocm() test equivalent to the mallocx() test removed in the previous commit. The flawed test attempted to cause OOM due to large request size and alignment constraint. Although this test "passed" on 64-bit systems due to the virtual memory hole, it could pass on some 32-bit systems. --- test/integration/allocm.c | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/test/integration/allocm.c b/test/integration/allocm.c index bd7a3ca5..66ecf869 100644 --- a/test/integration/allocm.c +++ b/test/integration/allocm.c @@ -39,28 +39,6 @@ TEST_BEGIN(test_basic) } TEST_END -TEST_BEGIN(test_alignment_errors) -{ - void *p; - size_t nsz, rsz, sz, alignment; - -#if LG_SIZEOF_PTR == 3 - alignment = UINT64_C(0x4000000000000000); - sz = UINT64_C(0x8400000000000001); -#else - alignment = 0x40000000LU; - sz = 0x84000001LU; -#endif - nsz = 0; - assert_d_eq(nallocm(&nsz, sz, ALLOCM_ALIGN(alignment)), ALLOCM_SUCCESS, - "Unexpected nallocm() error"); - rsz = 0; - assert_d_ne(allocm(&p, &rsz, sz, ALLOCM_ALIGN(alignment)), - ALLOCM_SUCCESS, "Expected error for allocm(&p, %zu, %#x)", - sz, ALLOCM_ALIGN(alignment)); -} -TEST_END - TEST_BEGIN(test_alignment_and_size) { int r; @@ -126,6 +104,5 @@ main(void) return (test( test_basic, - test_alignment_errors, test_alignment_and_size)); } From 526e4a59a2fe39e4f8bdf1ec0c0d2a5a557c3f62 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 29 Jan 2014 10:56:51 -0800 Subject: [PATCH 0393/3378] Prevent inlining of backtraced test functions. Inlining of alloc_0() and alloc_1() would prevent generation of unique backtraces, upon which the test code relies. --- test/unit/prof_accum.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/unit/prof_accum.c b/test/unit/prof_accum.c index cf3f287d..08be4194 100644 --- a/test/unit/prof_accum.c +++ b/test/unit/prof_accum.c @@ -22,10 +22,10 @@ prof_dump_open_intercept(bool propagate_err, const char *filename) } #define alloc_n_proto(n) \ -static void *alloc_##n(unsigned bits); +JEMALLOC_NOINLINE static void *alloc_##n(unsigned bits); #define alloc_n_gen(n) \ -static void * \ +JEMALLOC_NOINLINE static void * \ alloc_##n(unsigned bits) \ { \ void *p; \ From ddd6bd4e99e62feb3e377624b93e8cf88080fca4 Mon Sep 17 00:00:00 2001 From: George Kola Date: Wed, 12 Feb 2014 23:05:45 +0000 Subject: [PATCH 0394/3378] Using MADV_FREE on Solaris/Illumos --- configure.ac | 1 + 1 file changed, 1 insertion(+) diff --git a/configure.ac b/configure.ac index 61fd868b..a80c05d1 100644 --- a/configure.ac +++ b/configure.ac @@ -296,6 +296,7 @@ case "${host}" in *-*-solaris2*) CFLAGS="$CFLAGS" abi="elf" + AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ]) RPATH='-Wl,-R,$(1)' dnl Solaris needs this for sigwait(). CPPFLAGS="$CPPFLAGS -D_POSIX_PTHREAD_SEMANTICS" From 69e9fbb9c143e0d60670c68e29076a5c5c76ca3c Mon Sep 17 00:00:00 2001 From: Erwan Legrand Date: Fri, 14 Feb 2014 12:48:58 +0100 Subject: [PATCH 0395/3378] Fix typo --- src/arena.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/arena.c b/src/arena.c index 4da6d50c..390ab0f8 100644 --- a/src/arena.c +++ b/src/arena.c @@ -2476,7 +2476,6 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) bin_info->reg_interval) - pad_size; } while (try_hdr_size > try_redzone0_offset); } while (try_run_size <= arena_maxclass - && try_run_size <= arena_maxclass && RUN_MAX_OVRHD * (bin_info->reg_interval << 3) > RUN_MAX_OVRHD_RELAX && (try_redzone0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size From 99b0fbbe6975bf2af1387f75d961ad84fb108276 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 24 Feb 2014 16:08:38 -0800 Subject: [PATCH 0396/3378] Add workaround for missing 'restrict' keyword. Add a cpp #define that removes 'restrict' keyword usage unless the compiler definitely supports C99. As written, 'restrict' is only enabled if the compiler supports the -std=gnu99 option (e.g. gcc and llvm). Reported by Tobias Hieta. --- configure.ac | 5 +++++ include/jemalloc/internal/jemalloc_internal_defs.h.in | 3 +++ include/jemalloc/internal/jemalloc_internal_macros.h | 4 ++++ 3 files changed, 12 insertions(+) diff --git a/configure.ac b/configure.ac index a80c05d1..938c0191 100644 --- a/configure.ac +++ b/configure.ac @@ -19,7 +19,9 @@ AC_COMPILE_IFELSE([AC_LANG_PROGRAM( ]], [[ return 0; ]])], + [je_cv_cflags_appended=$1] AC_MSG_RESULT([yes]), + [je_cv_cflags_appended=] AC_MSG_RESULT([no]) [CFLAGS="${TCFLAGS}"] ) @@ -128,6 +130,9 @@ if test "x$CFLAGS" = "x" ; then no_CFLAGS="yes" if test "x$GCC" = "xyes" ; then JE_CFLAGS_APPEND([-std=gnu99]) + if test "x$je_cv_cflags_appended" = "x-std=gnu99" ; then + AC_DEFINE_UNQUOTED([JEMALLOC_HAS_RESTRICT]) + fi JE_CFLAGS_APPEND([-Wall]) JE_CFLAGS_APPEND([-pipe]) JE_CFLAGS_APPEND([-g3]) diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in index 752bb103..e3758e47 100644 --- a/include/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in @@ -187,6 +187,9 @@ */ #undef JEMALLOC_HAS_ALLOCA_H +/* C99 restrict keyword supported. */ +#undef JEMALLOC_HAS_RESTRICT + /* sizeof(int) == 2^LG_SIZEOF_INT. */ #undef LG_SIZEOF_INT diff --git a/include/jemalloc/internal/jemalloc_internal_macros.h b/include/jemalloc/internal/jemalloc_internal_macros.h index 70602ee8..4e239230 100644 --- a/include/jemalloc/internal/jemalloc_internal_macros.h +++ b/include/jemalloc/internal/jemalloc_internal_macros.h @@ -45,3 +45,7 @@ #ifndef __DECONST # define __DECONST(type, var) ((type)(uintptr_t)(const void *)(var)) #endif + +#ifndef JEMALLOC_HAS_RESTRICT +# define restrict +#endif From ad47e8996e649ff8b4c920abb937bbacb8b9625e Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 24 Feb 2014 22:00:10 -0800 Subject: [PATCH 0397/3378] Break prof_accum into multiple compilation units. Break prof_accum into multiple compilation units, in order to thwart compiler optimizations such as inlining and tail call optimization that would alter backtraces. --- Makefile.in | 15 ++++++++++++--- test/unit/prof_accum.c | 38 +------------------------------------- test/unit/prof_accum.h | 29 +++++++++++++++++++++++++++++ test/unit/prof_accum_a.c | 3 +++ test/unit/prof_accum_b.c | 3 +++ 5 files changed, 48 insertions(+), 40 deletions(-) create mode 100644 test/unit/prof_accum.h create mode 100644 test/unit/prof_accum_a.c create mode 100644 test/unit/prof_accum_b.c diff --git a/Makefile.in b/Makefile.in index 67c4d5d8..7399f277 100644 --- a/Makefile.in +++ b/Makefile.in @@ -128,6 +128,8 @@ TESTS_UNIT := $(srcroot)test/unit/bitmap.c \ $(srcroot)test/unit/tsd.c \ $(srcroot)test/unit/util.c \ $(srcroot)test/unit/zero.c +TESTS_UNIT_AUX := $(srcroot)test/unit/prof_accum_a.c \ + $(srcroot)test/unit/prof_accum_b.c TESTS_INTEGRATION := $(srcroot)test/integration/aligned_alloc.c \ $(srcroot)test/integration/allocated.c \ $(srcroot)test/integration/mallocx.c \ @@ -155,9 +157,10 @@ C_TESTLIB_STRESS_OBJS := $(C_TESTLIB_SRCS:$(srcroot)%.c=$(objroot)%.stress.$(O)) C_TESTLIB_OBJS := $(C_TESTLIB_UNIT_OBJS) $(C_TESTLIB_INTEGRATION_OBJS) $(C_UTIL_INTEGRATION_OBJS) $(C_TESTLIB_STRESS_OBJS) TESTS_UNIT_OBJS := $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%.$(O)) +TESTS_UNIT_AUX_OBJS := $(TESTS_UNIT_AUX:$(srcroot)%.c=$(objroot)%.$(O)) TESTS_INTEGRATION_OBJS := $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%.$(O)) TESTS_STRESS_OBJS := $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%.$(O)) -TESTS_OBJS := $(TESTS_UNIT_OBJS) $(TESTS_INTEGRATION_OBJS) $(TESTS_STRESS_OBJS) +TESTS_OBJS := $(TESTS_UNIT_OBJS) $(TESTS_UNIT_AUX_OBJS) $(TESTS_INTEGRATION_OBJS) $(TESTS_STRESS_OBJS) .PHONY: all dist build_doc_html build_doc_man build_doc .PHONY: install_bin install_include install_lib @@ -206,6 +209,12 @@ $(C_TESTLIB_STRESS_OBJS): $(objroot)test/src/%.stress.$(O): $(srcroot)test/src/% $(C_TESTLIB_STRESS_OBJS): CPPFLAGS += -DJEMALLOC_STRESS_TEST -DJEMALLOC_STRESS_TESTLIB $(C_TESTLIB_OBJS): CPPFLAGS += -I$(srcroot)test/include -I$(objroot)test/include $(TESTS_UNIT_OBJS): CPPFLAGS += -DJEMALLOC_UNIT_TEST +$(TESTS_UNIT_AUX_OBJS): CPPFLAGS += -DJEMALLOC_UNIT_TEST +define make-unit-link-dep +$(1): TESTS_UNIT_LINK_OBJS += $(2) +$(1): $(2) +endef +$(foreach test, $(TESTS_UNIT:$(srcroot)test/unit/%.c=$(objroot)test/unit/%$(EXE)), $(eval $(call make-unit-link-dep,$(test),$(filter $(test:%=%_a.$(O)) $(test:%=%_b.$(O)),$(TESTS_UNIT_AUX_OBJS))))) $(TESTS_INTEGRATION_OBJS): CPPFLAGS += -DJEMALLOC_INTEGRATION_TEST $(TESTS_STRESS_OBJS): CPPFLAGS += -DJEMALLOC_STRESS_TEST $(TESTS_OBJS): $(objroot)test/%.$(O): $(srcroot)test/%.c @@ -248,7 +257,7 @@ $(STATIC_LIBS): @mkdir -p $(@D) $(AR) $(ARFLAGS)@AROUT@ $+ -$(objroot)test/unit/%$(EXE): $(objroot)test/unit/%.$(O) $(C_JET_OBJS) $(C_TESTLIB_UNIT_OBJS) +$(objroot)test/unit/%$(EXE): $(objroot)test/unit/%.$(O) $(TESTS_UNIT_LINK_OBJS) $(C_JET_OBJS) $(C_TESTLIB_UNIT_OBJS) @mkdir -p $(@D) $(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(EXTRA_LDFLAGS) @@ -359,7 +368,7 @@ coverage: check $(SHELL) $(srcroot)coverage.sh $(srcroot)test/src unit $(C_TESTLIB_UNIT_OBJS) $(SHELL) $(srcroot)coverage.sh $(srcroot)test/src integration $(C_TESTLIB_INTEGRATION_OBJS) $(SHELL) $(srcroot)coverage.sh $(srcroot)test/src stress $(C_TESTLIB_STRESS_OBJS) - $(SHELL) $(srcroot)coverage.sh $(srcroot)test/unit unit $(TESTS_UNIT_OBJS) + $(SHELL) $(srcroot)coverage.sh $(srcroot)test/unit unit $(TESTS_UNIT_OBJS) $(TESTS_UNIT_AUX_OBJS) $(SHELL) $(srcroot)coverage.sh $(srcroot)test/integration integration $(TESTS_INTEGRATION_OBJS) $(SHELL) $(srcroot)coverage.sh $(srcroot)test/stress integration $(TESTS_STRESS_OBJS) endif diff --git a/test/unit/prof_accum.c b/test/unit/prof_accum.c index 08be4194..050a8a7e 100644 --- a/test/unit/prof_accum.c +++ b/test/unit/prof_accum.c @@ -1,9 +1,4 @@ -#include "test/jemalloc_test.h" - -#define NTHREADS 4 -#define NALLOCS_PER_THREAD 50 -#define DUMP_INTERVAL 1 -#define BT_COUNT_CHECK_INTERVAL 5 +#include "prof_accum.h" #ifdef JEMALLOC_PROF const char *malloc_conf = @@ -21,37 +16,6 @@ prof_dump_open_intercept(bool propagate_err, const char *filename) return (fd); } -#define alloc_n_proto(n) \ -JEMALLOC_NOINLINE static void *alloc_##n(unsigned bits); - -#define alloc_n_gen(n) \ -JEMALLOC_NOINLINE static void * \ -alloc_##n(unsigned bits) \ -{ \ - void *p; \ - \ - if (bits == 0) \ - p = mallocx(1, 0); \ - else { \ - switch (bits & 0x1U) { \ - case 0: \ - p = alloc_0(bits >> 1); \ - break; \ - case 1: \ - p = alloc_1(bits >> 1); \ - break; \ - default: not_reached(); \ - } \ - } \ - /* Intentionally sabotage tail call optimization. */ \ - assert_ptr_not_null(p, "Unexpected mallocx() failure"); \ - return (p); \ -} -alloc_n_proto(0) -alloc_n_proto(1) -alloc_n_gen(0) -alloc_n_gen(1) - static void * alloc_from_permuted_backtrace(unsigned thd_ind, unsigned iteration) { diff --git a/test/unit/prof_accum.h b/test/unit/prof_accum.h new file mode 100644 index 00000000..de9cfead --- /dev/null +++ b/test/unit/prof_accum.h @@ -0,0 +1,29 @@ +#include "test/jemalloc_test.h" + +#define NTHREADS 4 +#define NALLOCS_PER_THREAD 50 +#define DUMP_INTERVAL 1 +#define BT_COUNT_CHECK_INTERVAL 5 + +#define alloc_n_proto(n) \ +void *alloc_##n(unsigned bits); +alloc_n_proto(0) +alloc_n_proto(1) + +#define alloc_n_gen(n) \ +void * \ +alloc_##n(unsigned bits) \ +{ \ + \ + if (bits == 0) { \ + void *p = mallocx(1, 0); \ + assert_ptr_not_null(p, "Unexpected mallocx() failure"); \ + return (p); \ + } else { \ + switch (bits & 0x1U) { \ + case 0: return (alloc_0(bits >> 1)); \ + case 1: return (alloc_1(bits >> 1)); \ + default: not_reached(); \ + } \ + } \ +} diff --git a/test/unit/prof_accum_a.c b/test/unit/prof_accum_a.c new file mode 100644 index 00000000..42ad521d --- /dev/null +++ b/test/unit/prof_accum_a.c @@ -0,0 +1,3 @@ +#include "prof_accum.h" + +alloc_n_gen(0) diff --git a/test/unit/prof_accum_b.c b/test/unit/prof_accum_b.c new file mode 100644 index 00000000..60d9dab6 --- /dev/null +++ b/test/unit/prof_accum_b.c @@ -0,0 +1,3 @@ +#include "prof_accum.h" + +alloc_n_gen(1) From cb657e3170349a27e753cdf6316513f56550205e Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 25 Feb 2014 11:21:41 -0800 Subject: [PATCH 0398/3378] Add configure test to verify SSE2 code compiles. Make sure that emmintrin.h can be #include'd without causing a compilation error, rather than blindly defining HAVE_SSE2 based on architecture. Attempts to force SSE2 compilation on a 32-bit Ubuntu 13.10 system running as a VMware guest resulted in a no-win choice without any obvious explanation besides toolchain misconfiguration/bug: - Suffer compilation failure due to __MMX__, __SSE__, and __SSE2__ not being defined, even if -mmmx, -msse, and -msse2 are manually specified (note that they appear to be enabled by default). - Manually define __MMX__, __SSE__, and __SSE2__, and suffer compiler warnings that they are already automatically defined. This results in successful compilation and execution, but the noise is intolerable. --- configure.ac | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/configure.ac b/configure.ac index 938c0191..3837a786 100644 --- a/configure.ac +++ b/configure.ac @@ -198,21 +198,21 @@ CPU_SPINWAIT="" case "${host_cpu}" in i[[345]]86) ;; - i686) - JE_COMPILABLE([__asm__], [], [[__asm__ volatile("pause"); return 0;]], - [je_cv_asm]) - if test "x${je_cv_asm}" = "xyes" ; then + i686|x86_64) + JE_COMPILABLE([pause instruction], [], + [[__asm__ volatile("pause"); return 0;]], + [je_cv_pause]) + if test "x${je_cv_pause}" = "xyes" ; then CPU_SPINWAIT='__asm__ volatile("pause")' fi - AC_DEFINE_UNQUOTED([HAVE_SSE2], [ ]) - ;; - x86_64) - JE_COMPILABLE([__asm__ syntax], [], - [[__asm__ volatile("pause"); return 0;]], [je_cv_asm]) - if test "x${je_cv_asm}" = "xyes" ; then - CPU_SPINWAIT='__asm__ volatile("pause")' + dnl emmintrin.h fails to compile unless MMX, SSE, and SSE2 are + dnl supported. + JE_COMPILABLE([SSE2 intrinsics], [ +#include +], [], [je_cv_sse2]) + if test "x${je_cv_sse2}" = "xyes" ; then + AC_DEFINE_UNQUOTED([HAVE_SSE2], [ ]) fi - AC_DEFINE_UNQUOTED([HAVE_SSE2], [ ]) ;; powerpc) AC_DEFINE_UNQUOTED([HAVE_ALTIVEC], [ ]) From 940fdfd5eef45f5425f9124e250fddde5c5c48bf Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 25 Feb 2014 11:58:50 -0800 Subject: [PATCH 0399/3378] Fix junk filling for mremap(2)-based huge reallocation. If mremap(2) is used for huge reallocation, physical pages are mapped to new virtual addresses rather than data being copied to new pages. This bypasses the normal junk filling that would happen during allocation, so add junk filling that is specific to this case. --- src/huge.c | 10 ++++++++++ test/unit/junk.c | 9 ++++++--- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/src/huge.c b/src/huge.c index cecaf2df..6d86aed8 100644 --- a/src/huge.c +++ b/src/huge.c @@ -171,6 +171,16 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, abort(); memcpy(ret, ptr, copysize); chunk_dealloc_mmap(ptr, oldsize); + } else if (config_fill && zero == false && opt_junk && oldsize + < newsize) { + /* + * mremap(2) clobbers the original mapping, so + * junk/zero filling is not preserved. There is no + * need to zero fill here, since any trailing + * uninititialized memory is demand-zeroed by the + * kernel, but junk filling must be redone. + */ + memset(ret + oldsize, 0xa5, newsize - oldsize); } } else #endif diff --git a/test/unit/junk.c b/test/unit/junk.c index e27db2fe..ef8f9c16 100644 --- a/test/unit/junk.c +++ b/test/unit/junk.c @@ -92,9 +92,12 @@ test_junk(size_t sz_min, size_t sz_max) s = (char *)rallocx(s, sz+1, 0); assert_ptr_not_null((void *)s, "Unexpected rallocx() failure"); - assert_ptr_eq(most_recently_junked, junked, - "Expected region of size %zu to be junk-filled", - sz); + if (!config_mremap || sz+1 <= arena_maxclass) { + assert_ptr_eq(most_recently_junked, junked, + "Expected region of size %zu to be " + "junk-filled", + sz); + } } } From b037a55f365471002bac024ffa1a8392ddcd578f Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 25 Feb 2014 16:11:15 -0800 Subject: [PATCH 0400/3378] Restore tail call optimization subversion. Restore the essence of 898960247a8b2e6534738b7a3a244855f379faf9, which sabotages tail call optimization. This is necessary even when the mutually recursive functions are in separate compilation units. --- test/unit/prof_accum.h | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/test/unit/prof_accum.h b/test/unit/prof_accum.h index de9cfead..109d86b5 100644 --- a/test/unit/prof_accum.h +++ b/test/unit/prof_accum.h @@ -14,16 +14,22 @@ alloc_n_proto(1) void * \ alloc_##n(unsigned bits) \ { \ + void *p; \ \ - if (bits == 0) { \ - void *p = mallocx(1, 0); \ - assert_ptr_not_null(p, "Unexpected mallocx() failure"); \ - return (p); \ - } else { \ + if (bits == 0) \ + p = mallocx(1, 0); \ + else { \ switch (bits & 0x1U) { \ - case 0: return (alloc_0(bits >> 1)); \ - case 1: return (alloc_1(bits >> 1)); \ + case 0: \ + p = (alloc_0(bits >> 1)); \ + break; \ + case 1: \ + p = (alloc_1(bits >> 1)); \ + break; \ default: not_reached(); \ } \ } \ + /* Intentionally sabotage tail call optimization. */ \ + assert_ptr_not_null(p, "Unexpected mallocx() failure"); \ + return (p); \ } From b9ec5c9a004a9c2f63951c476a6ff9f4c27f7b6f Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 25 Feb 2014 16:43:51 -0800 Subject: [PATCH 0401/3378] Update ChangeLog for 3.5.1. --- ChangeLog | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index 5c2b1eec..c0ca338b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -3,8 +3,26 @@ bug fixes are all mentioned, but internal enhancements are omitted here for brevity (even though they are more fun to write about). Much more detail can be found in the git revision history: - http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git - git://canonware.com/jemalloc.git + https://github.com/jemalloc/jemalloc + +* 3.5.1 (February 25, 2014) + + This version primarily addresses minor bugs in test code. + + Bug fixes: + - Configure Solaris/Illumos to use MADV_FREE. + - Fix junk filling for mremap(2)-based huge reallocation. This is only + relevant if configuring with the --enable-mremap option specified. + - Avoid compilation failure if 'restrict' C99 keyword is not supported by the + compiler. + - Add a configure test for SSE2 rather than assuming it is usable on i686 + systems. This fixes test compilation errors, especially on 32-bit Linux + systems. + - Fix mallctl argument size mismatches (size_t vs. uint64_t) in the stats unit + test. + - Fix/remove flawed alignment-related overflow tests. + - Prevent compiler optimizations that could change backtraces in the + prof_accum unit test. * 3.5.0 (January 22, 2014) From 9e20df163c0c608026498b8fb5beab35e8a049c6 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 26 Feb 2014 10:19:18 -0800 Subject: [PATCH 0402/3378] Remove duplicate 'static' keyword. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reported by İsmail Dönmez. --- test/include/test/SFMT-alti.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/include/test/SFMT-alti.h b/test/include/test/SFMT-alti.h index 2f86f67d..0005df6b 100644 --- a/test/include/test/SFMT-alti.h +++ b/test/include/test/SFMT-alti.h @@ -61,7 +61,7 @@ * @return output */ JEMALLOC_ALWAYS_INLINE -static vector unsigned int vec_recursion(vector unsigned int a, +vector unsigned int vec_recursion(vector unsigned int a, vector unsigned int b, vector unsigned int c, vector unsigned int d) { From bf543df20ccd9e2c422751908cabf073bc7f5d4b Mon Sep 17 00:00:00 2001 From: Harald Weppner Date: Mon, 17 Mar 2014 23:53:00 -0700 Subject: [PATCH 0403/3378] Enable profiling / leak detection in FreeBSD * Assumes procfs is mounted at /proc, cf. --- bin/pprof | 22 ++++++++++++++++++++-- src/prof.c | 8 +++++++- 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/bin/pprof b/bin/pprof index 727eb437..08c9ea31 100755 --- a/bin/pprof +++ b/bin/pprof @@ -4197,8 +4197,12 @@ sub FindLibrary { # For libc libraries, the copy in /usr/lib/debug contains debugging symbols sub DebuggingLibrary { my $file = shift; - if ($file =~ m|^/| && -f "/usr/lib/debug$file") { - return "/usr/lib/debug$file"; + if ($file =~ m|^/|) { + if (-f "/usr/lib/debug$file") { + return "/usr/lib/debug$file"; + } elsif (-f "/usr/lib/debug$file.debug") { + return "/usr/lib/debug$file.debug"; + } } return undef; } @@ -4360,6 +4364,19 @@ sub ParseLibraries { $finish = HexExtend($2); $offset = $zero_offset; $lib = $3; + } + # FreeBSD 10.0 virtual memory map /proc/curproc/map as defined in + # function procfs_doprocmap (sys/fs/procfs/procfs_map.c) + # + # Example: + # 0x800600000 0x80061a000 26 0 0xfffff800035a0000 r-x 75 33 0x1004 COW NC vnode /libexec/ld-elf.s + # o.1 NCH -1 + elsif ($l =~ /^(0x$h)\s(0x$h)\s\d+\s\d+\s0x$h\sr-x\s\d+\s\d+\s0x\d+\s(COW|NCO)\s(NC|NNC)\svnode\s(\S+\.so(\.\d+)*)/) { + $start = HexExtend($1); + $finish = HexExtend($2); + $offset = $zero_offset; + $lib = FindLibrary($5); + } else { next; } @@ -4382,6 +4399,7 @@ sub ParseLibraries { } } + if($main::opt_debug) { printf STDERR "$start:$finish ($offset) $lib\n"; } push(@{$result}, [$lib, $start, $finish, $offset]); } diff --git a/src/prof.c b/src/prof.c index 1d8ccbd6..ede89a7a 100644 --- a/src/prof.c +++ b/src/prof.c @@ -11,6 +11,12 @@ #include #endif +#ifdef __FreeBSD__ +#define PROCESS_VMEM_MAP "/proc/curproc/map" +#else +#define PROCESS_VMEM_MAP "/proc/%d/maps" +#endif + /******************************************************************************/ /* Data. */ @@ -936,7 +942,7 @@ prof_dump_maps(bool propagate_err) cassert(config_prof); - malloc_snprintf(filename, sizeof(filename), "/proc/%d/maps", + malloc_snprintf(filename, sizeof(filename), PROCESS_VMEM_MAP, (int)getpid()); mfd = open(filename, O_RDONLY); if (mfd != -1) { From 4bbf8181f384d6bd8a634b22543f83e5b949b609 Mon Sep 17 00:00:00 2001 From: Harald Weppner Date: Tue, 18 Mar 2014 00:00:14 -0700 Subject: [PATCH 0404/3378] Consistently use debug lib(s) if present Fixes a situation where nm uses the debug lib but addr2line does not, which completely messes up the symbol lookup. --- bin/pprof | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/bin/pprof b/bin/pprof index 08c9ea31..a309943c 100755 --- a/bin/pprof +++ b/bin/pprof @@ -4607,6 +4607,12 @@ sub ExtractSymbols { my $finish = $lib->[2]; my $offset = $lib->[3]; + # Use debug library if it exists + my $debug_libname = DebuggingLibrary($libname); + if ($debug_libname) { + $libname = $debug_libname; + } + # Get list of pcs that belong in this library. my $contained = []; my ($start_pc_index, $finish_pc_index); @@ -5037,7 +5043,7 @@ sub GetProcedureBoundariesViaNm { # Tag this routine with the starting address in case the image # has multiple occurrences of this routine. We use a syntax - # that resembles template paramters that are automatically + # that resembles template parameters that are automatically # stripped out by ShortFunctionName() $this_routine .= "<$start_val>"; From 20a8c78bfe3310e0f0f72b596d4e10ca7336063b Mon Sep 17 00:00:00 2001 From: Chris Pride Date: Tue, 25 Mar 2014 22:36:05 -0700 Subject: [PATCH 0405/3378] Fix a crashing case where arena_chunk_init_hard returns NULL. This happens when it fails to allocate a new chunk. Which arena_chunk_alloc then passes into arena_avail_insert without any checks. This then causes a crash when arena_avail_insert tries to check chunk->ndirty. This was introduced by the refactoring of arena_chunk_alloc which previously would have returned NULL immediately after calling chunk_alloc. This is now the return from arena_chunk_init_hard so we need to check that return, and not continue if it was NULL. --- src/arena.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/arena.c b/src/arena.c index 390ab0f8..dad707b6 100644 --- a/src/arena.c +++ b/src/arena.c @@ -614,8 +614,11 @@ arena_chunk_alloc(arena_t *arena) if (arena->spare != NULL) chunk = arena_chunk_init_spare(arena); - else + else { chunk = arena_chunk_init_hard(arena); + if (chunk == NULL) + return (NULL); + } /* Insert the run into the runs_avail tree. */ arena_avail_insert(arena, chunk, map_bias, chunk_npages-map_bias, From c2dcfd8ded8162cddb143836c12d003840fdeaeb Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 28 Mar 2014 10:40:03 -0700 Subject: [PATCH 0406/3378] Convert ALLOCM_ARENA() test to MALLOCX_ARENA() test. --- Makefile.in | 2 +- test/integration/{ALLOCM_ARENA.c => MALLOCX_ARENA.c} | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) rename test/integration/{ALLOCM_ARENA.c => MALLOCX_ARENA.c} (88%) diff --git a/Makefile.in b/Makefile.in index 7399f277..d6b7d6ea 100644 --- a/Makefile.in +++ b/Makefile.in @@ -141,7 +141,7 @@ TESTS_INTEGRATION := $(srcroot)test/integration/aligned_alloc.c \ $(srcroot)test/integration/xallocx.c ifeq ($(enable_experimental), 1) TESTS_INTEGRATION += $(srcroot)test/integration/allocm.c \ - $(srcroot)test/integration/ALLOCM_ARENA.c \ + $(srcroot)test/integration/MALLOCX_ARENA.c \ $(srcroot)test/integration/rallocm.c endif TESTS_STRESS := diff --git a/test/integration/ALLOCM_ARENA.c b/test/integration/MALLOCX_ARENA.c similarity index 88% rename from test/integration/ALLOCM_ARENA.c rename to test/integration/MALLOCX_ARENA.c index 5bf3c4ab..71cf6f25 100644 --- a/test/integration/ALLOCM_ARENA.c +++ b/test/integration/MALLOCX_ARENA.c @@ -8,7 +8,7 @@ thd_start(void *arg) unsigned thread_ind = (unsigned)(uintptr_t)arg; unsigned arena_ind; void *p; - size_t rsz, sz; + size_t sz; sz = sizeof(arena_ind); assert_d_eq(mallctl("arenas.extend", &arena_ind, &sz, NULL, 0), 0, @@ -27,9 +27,9 @@ thd_start(void *arg) sizeof(const char *)), 0, "Error in mallctlbymib()"); } - assert_d_eq(allocm(&p, &rsz, 1, ALLOCM_ARENA(arena_ind)), - ALLOCM_SUCCESS, "Unexpected allocm() error"); - dallocm(p, 0); + p = mallocx(1, MALLOCX_ARENA(arena_ind)); + assert_ptr_not_null(p, "Unexpected mallocx() error"); + dallocx(p, 0); return (NULL); } From fbb31029a5c2f556f39e04a8781340d4ee4cf16c Mon Sep 17 00:00:00 2001 From: Max Wang Date: Thu, 27 Mar 2014 14:46:00 -0700 Subject: [PATCH 0407/3378] Use arena dss prec instead of default for huge allocs. Pass a dss_prec_t parameter to huge_{m,p,r}alloc instead of defaulting to the chunk dss prec. --- include/jemalloc/internal/huge.h | 8 +++--- .../jemalloc/internal/jemalloc_internal.h.in | 10 +++---- src/huge.c | 26 ++++++++++++------- src/jemalloc.c | 2 +- 4 files changed, 27 insertions(+), 19 deletions(-) diff --git a/include/jemalloc/internal/huge.h b/include/jemalloc/internal/huge.h index ddf13138..a2b9c779 100644 --- a/include/jemalloc/internal/huge.h +++ b/include/jemalloc/internal/huge.h @@ -17,18 +17,20 @@ extern size_t huge_allocated; /* Protects chunk-related data structures. */ extern malloc_mutex_t huge_mtx; -void *huge_malloc(size_t size, bool zero); -void *huge_palloc(size_t size, size_t alignment, bool zero); +void *huge_malloc(size_t size, bool zero, dss_prec_t dss_prec); +void *huge_palloc(size_t size, size_t alignment, bool zero, + dss_prec_t dss_prec); bool huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra); void *huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, - size_t alignment, bool zero, bool try_tcache_dalloc); + size_t alignment, bool zero, bool try_tcache_dalloc, dss_prec_t dss_prec); #ifdef JEMALLOC_JET typedef void (huge_dalloc_junk_t)(void *, size_t); extern huge_dalloc_junk_t *huge_dalloc_junk; #endif void huge_dalloc(void *ptr, bool unmap); size_t huge_salloc(const void *ptr); +dss_prec_t huge_dss_prec_get(arena_t *arena); prof_ctx_t *huge_prof_ctx_get(const void *ptr); void huge_prof_ctx_set(const void *ptr, prof_ctx_t *ctx); bool huge_boot(void); diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index d24a1fe6..574bbb14 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -770,7 +770,7 @@ imalloct(size_t size, bool try_tcache, arena_t *arena) if (size <= arena_maxclass) return (arena_malloc(arena, size, false, try_tcache)); else - return (huge_malloc(size, false)); + return (huge_malloc(size, false, huge_dss_prec_get(arena))); } JEMALLOC_ALWAYS_INLINE void * @@ -787,7 +787,7 @@ icalloct(size_t size, bool try_tcache, arena_t *arena) if (size <= arena_maxclass) return (arena_malloc(arena, size, true, try_tcache)); else - return (huge_malloc(size, true)); + return (huge_malloc(size, true, huge_dss_prec_get(arena))); } JEMALLOC_ALWAYS_INLINE void * @@ -813,9 +813,9 @@ ipalloct(size_t usize, size_t alignment, bool zero, bool try_tcache, ret = arena_palloc(choose_arena(arena), usize, alignment, zero); } else if (alignment <= chunksize) - ret = huge_malloc(usize, zero); + ret = huge_malloc(usize, zero, huge_dss_prec_get(arena)); else - ret = huge_palloc(usize, alignment, zero); + ret = huge_palloc(usize, alignment, zero, huge_dss_prec_get(arena)); } assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret); @@ -984,7 +984,7 @@ iralloct(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, try_tcache_dalloc)); } else { return (huge_ralloc(ptr, oldsize, size, extra, - alignment, zero, try_tcache_dalloc)); + alignment, zero, try_tcache_dalloc, huge_dss_prec_get(arena))); } } diff --git a/src/huge.c b/src/huge.c index 6d86aed8..d72f2135 100644 --- a/src/huge.c +++ b/src/huge.c @@ -16,14 +16,14 @@ malloc_mutex_t huge_mtx; static extent_tree_t huge; void * -huge_malloc(size_t size, bool zero) +huge_malloc(size_t size, bool zero, dss_prec_t dss_prec) { - return (huge_palloc(size, chunksize, zero)); + return (huge_palloc(size, chunksize, zero, dss_prec)); } void * -huge_palloc(size_t size, size_t alignment, bool zero) +huge_palloc(size_t size, size_t alignment, bool zero, dss_prec_t dss_prec) { void *ret; size_t csize; @@ -48,8 +48,7 @@ huge_palloc(size_t size, size_t alignment, bool zero) * it is possible to make correct junk/zero fill decisions below. */ is_zeroed = zero; - ret = chunk_alloc(csize, alignment, false, &is_zeroed, - chunk_dss_prec_get()); + ret = chunk_alloc(csize, alignment, false, &is_zeroed, dss_prec); if (ret == NULL) { base_node_dealloc(node); return (NULL); @@ -98,7 +97,7 @@ huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra) void * huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, - size_t alignment, bool zero, bool try_tcache_dalloc) + size_t alignment, bool zero, bool try_tcache_dalloc, dss_prec_t dss_prec) { void *ret; size_t copysize; @@ -113,18 +112,18 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, * space and copying. */ if (alignment > chunksize) - ret = huge_palloc(size + extra, alignment, zero); + ret = huge_palloc(size + extra, alignment, zero, dss_prec); else - ret = huge_malloc(size + extra, zero); + ret = huge_malloc(size + extra, zero, dss_prec); if (ret == NULL) { if (extra == 0) return (NULL); /* Try again, this time without extra. */ if (alignment > chunksize) - ret = huge_palloc(size, alignment, zero); + ret = huge_palloc(size, alignment, zero, dss_prec); else - ret = huge_malloc(size, zero); + ret = huge_malloc(size, zero, dss_prec); if (ret == NULL) return (NULL); @@ -264,6 +263,13 @@ huge_salloc(const void *ptr) return (ret); } +dss_prec_t +huge_dss_prec_get(arena_t *arena) +{ + + return (arena_dss_prec_get(choose_arena(arena))); +} + prof_ctx_t * huge_prof_ctx_get(const void *ptr) { diff --git a/src/jemalloc.c b/src/jemalloc.c index 563d99f8..204778bc 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -2076,7 +2076,7 @@ a0alloc(size_t size, bool zero) if (size <= arena_maxclass) return (arena_malloc(arenas[0], size, zero, false)); else - return (huge_malloc(size, zero)); + return (huge_malloc(size, zero, huge_dss_prec_get(arenas[0]))); } void * From c2da2591befa5574cf8c930a5a2cd7f56138658e Mon Sep 17 00:00:00 2001 From: Harald Weppner Date: Tue, 18 Mar 2014 00:00:14 -0700 Subject: [PATCH 0408/3378] Consistently use debug lib(s) if present Fixes a situation where nm uses the debug lib but addr2line does not, which completely messes up the symbol lookup. --- src/prof.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/src/prof.c b/src/prof.c index ede89a7a..7722b7b4 100644 --- a/src/prof.c +++ b/src/prof.c @@ -11,12 +11,6 @@ #include #endif -#ifdef __FreeBSD__ -#define PROCESS_VMEM_MAP "/proc/curproc/map" -#else -#define PROCESS_VMEM_MAP "/proc/%d/maps" -#endif - /******************************************************************************/ /* Data. */ @@ -941,9 +935,12 @@ prof_dump_maps(bool propagate_err) char filename[PATH_MAX + 1]; cassert(config_prof); - - malloc_snprintf(filename, sizeof(filename), PROCESS_VMEM_MAP, +#ifdef __FreeBSD__ + malloc_snprintf(filename, sizeof(filename), "/proc/curproc/map"); +#else + malloc_snprintf(filename, sizeof(filename), "/proc/%d/maps", (int)getpid()); +#endif mfd = open(filename, O_RDONLY); if (mfd != -1) { ssize_t nread; From e3f27cfced57ac9c3b5306947d37411479a68c2e Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 29 Mar 2014 23:14:32 -0700 Subject: [PATCH 0409/3378] Fix p_test_fail()'s va_list abuse. p_test_fail() was passing a va_list to two separate functions with the expectation that no reset would occur. Refactor p_test_fail()'s callers to instead format two strings and pass them to p_test_fail(). Add a missing parameter to an assert_u64_eq() call, which the compiler warned about after the assertion macro refactoring. --- test/include/test/test.h | 53 ++++++++++++++++++++++++++++++---------- test/src/test.c | 10 ++------ test/unit/SFMT.c | 2 +- 3 files changed, 43 insertions(+), 22 deletions(-) diff --git a/test/include/test/test.h b/test/include/test/test.h index 8cc97af5..a32ec07c 100644 --- a/test/include/test/test.h +++ b/test/include/test/test.h @@ -1,13 +1,19 @@ +#define ASSERT_BUFSIZE 256 + #define assert_cmp(t, a, b, cmp, neg_cmp, pri, fmt...) do { \ t a_ = (a); \ t b_ = (b); \ if (!(a_ cmp b_)) { \ - p_test_fail( \ + char prefix[ASSERT_BUFSIZE]; \ + char message[ASSERT_BUFSIZE]; \ + malloc_snprintf(prefix, sizeof(prefix), \ "%s:%s:%d: Failed assertion: " \ "(%s) "#cmp" (%s) --> " \ "%"pri" "#neg_cmp" %"pri": ", \ __func__, __FILE__, __LINE__, \ - #a, #b, a_, b_, fmt); \ + #a, #b, a_, b_); \ + malloc_snprintf(message, sizeof(message), fmt); \ + p_test_fail(prefix, message); \ } \ } while (0) @@ -208,24 +214,32 @@ bool a_ = (a); \ bool b_ = (b); \ if (!(a_ == b_)) { \ - p_test_fail( \ + char prefix[ASSERT_BUFSIZE]; \ + char message[ASSERT_BUFSIZE]; \ + malloc_snprintf(prefix, sizeof(prefix), \ "%s:%s:%d: Failed assertion: " \ "(%s) == (%s) --> %s != %s: ", \ __func__, __FILE__, __LINE__, \ #a, #b, a_ ? "true" : "false", \ - b_ ? "true" : "false", fmt); \ + b_ ? "true" : "false"); \ + malloc_snprintf(message, sizeof(message), fmt); \ + p_test_fail(prefix, message); \ } \ } while (0) #define assert_b_ne(a, b, fmt...) do { \ bool a_ = (a); \ bool b_ = (b); \ if (!(a_ != b_)) { \ - p_test_fail( \ + char prefix[ASSERT_BUFSIZE]; \ + char message[ASSERT_BUFSIZE]; \ + malloc_snprintf(prefix, sizeof(prefix), \ "%s:%s:%d: Failed assertion: " \ "(%s) != (%s) --> %s == %s: ", \ __func__, __FILE__, __LINE__, \ #a, #b, a_ ? "true" : "false", \ - b_ ? "true" : "false", fmt); \ + b_ ? "true" : "false"); \ + malloc_snprintf(message, sizeof(message), fmt); \ + p_test_fail(prefix, message); \ } \ } while (0) #define assert_true(a, fmt...) assert_b_eq(a, true, fmt) @@ -233,26 +247,39 @@ #define assert_str_eq(a, b, fmt...) do { \ if (strcmp((a), (b))) { \ - p_test_fail( \ + char prefix[ASSERT_BUFSIZE]; \ + char message[ASSERT_BUFSIZE]; \ + malloc_snprintf(prefix, sizeof(prefix), \ "%s:%s:%d: Failed assertion: " \ "(%s) same as (%s) --> " \ "\"%s\" differs from \"%s\": ", \ - __func__, __FILE__, __LINE__, #a, #b, a, b, fmt); \ + __func__, __FILE__, __LINE__, #a, #b, a, b); \ + malloc_snprintf(message, sizeof(message), fmt); \ + p_test_fail(prefix, message); \ } \ } while (0) #define assert_str_ne(a, b, fmt...) do { \ if (!strcmp((a), (b))) { \ - p_test_fail( \ + char prefix[ASSERT_BUFSIZE]; \ + char message[ASSERT_BUFSIZE]; \ + malloc_snprintf(prefix, sizeof(prefix), \ "%s:%s:%d: Failed assertion: " \ "(%s) differs from (%s) --> " \ "\"%s\" same as \"%s\": ", \ - __func__, __FILE__, __LINE__, #a, #b, a, b, fmt); \ + __func__, __FILE__, __LINE__, #a, #b, a, b); \ + malloc_snprintf(message, sizeof(message), fmt); \ + p_test_fail(prefix, message); \ } \ } while (0) #define assert_not_reached(fmt...) do { \ - p_test_fail("%s:%s:%d: Unreachable code reached: ", \ - __func__, __FILE__, __LINE__, fmt); \ + char prefix[ASSERT_BUFSIZE]; \ + char message[ASSERT_BUFSIZE]; \ + malloc_snprintf(prefix, sizeof(prefix), \ + "%s:%s:%d: Unreachable code reached: ", \ + __func__, __FILE__, __LINE__); \ + malloc_snprintf(message, sizeof(message), fmt); \ + p_test_fail(prefix, message); \ } while (0) /* @@ -299,4 +326,4 @@ void test_fail(const char *format, ...) JEMALLOC_ATTR(format(printf, 1, 2)); test_status_t p_test(test_t* t, ...); void p_test_init(const char *name); void p_test_fini(void); -void p_test_fail(const char *format, ...); +void p_test_fail(const char *prefix, const char *message); diff --git a/test/src/test.c b/test/src/test.c index 6552e377..528d8583 100644 --- a/test/src/test.c +++ b/test/src/test.c @@ -86,15 +86,9 @@ p_test(test_t* t, ...) } void -p_test_fail(const char *format, ...) +p_test_fail(const char *prefix, const char *message) { - va_list ap; - va_start(ap, format); - malloc_vcprintf(NULL, NULL, format, ap); - format = va_arg(ap, const char *); - malloc_vcprintf(NULL, NULL, format, ap); - va_end(ap); - malloc_printf("\n"); + malloc_cprintf(NULL, NULL, "%s%s\n", prefix, message); test_status = test_status_fail; } diff --git a/test/unit/SFMT.c b/test/unit/SFMT.c index 4805f8e4..c57bd68d 100644 --- a/test/unit/SFMT.c +++ b/test/unit/SFMT.c @@ -1576,7 +1576,7 @@ TEST_BEGIN(test_by_array_64) for (i = 0; i < BLOCK_SIZE64; i++) { if (i < COUNT_1) { assert_u64_eq(array64[i], init_by_array_64_expected[i], - "Output mismatch for i=%d"); + "Output mismatch for i=%d", i); } r = gen_rand64(ctx); assert_u64_eq(r, array64[i], From ab8c79fdafd6d1ee722c1277ef32c14c6e0c9dd3 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sun, 30 Mar 2014 11:21:09 -0700 Subject: [PATCH 0410/3378] Fix message formatting errors uncovered by p_test_fail() refactoring. --- test/unit/ckh.c | 12 ++++++------ test/unit/junk.c | 2 +- test/unit/quarantine.c | 2 +- test/unit/rtree.c | 5 +++-- test/unit/zero.c | 2 +- 5 files changed, 12 insertions(+), 11 deletions(-) diff --git a/test/unit/ckh.c b/test/unit/ckh.c index 69fd7f52..b214c279 100644 --- a/test/unit/ckh.c +++ b/test/unit/ckh.c @@ -29,7 +29,7 @@ TEST_BEGIN(test_count_insert_search_remove) assert_false(ckh_new(&ckh, 2, ckh_string_hash, ckh_string_keycomp), "Unexpected ckh_new() error"); assert_zu_eq(ckh_count(&ckh), 0, - "ckh_count() should return %zu, but it returned %zu", 0, + "ckh_count() should return %zu, but it returned %zu", ZU(0), ckh_count(&ckh)); /* Insert. */ @@ -101,11 +101,11 @@ TEST_END TEST_BEGIN(test_insert_iter_remove) { -#define NITEMS 1000 +#define NITEMS ZU(1000) ckh_t ckh; void **p[NITEMS]; void *q, *r; - unsigned i; + size_t i; assert_false(ckh_new(&ckh, 2, ckh_pointer_hash, ckh_pointer_keycomp), "Unexpected ckh_new() error"); @@ -116,7 +116,7 @@ TEST_BEGIN(test_insert_iter_remove) } for (i = 0; i < NITEMS; i++) { - unsigned j; + size_t j; for (j = i; j < NITEMS; j++) { assert_false(ckh_insert(&ckh, p[j], p[j]), @@ -152,7 +152,7 @@ TEST_BEGIN(test_insert_iter_remove) for (tabind = 0; ckh_iter(&ckh, &tabind, &q, &r) == false;) { - unsigned k; + size_t k; assert_ptr_eq(q, r, "Key and val not equal"); @@ -188,7 +188,7 @@ TEST_BEGIN(test_insert_iter_remove) } assert_zu_eq(ckh_count(&ckh), 0, - "ckh_count() should return %zu, but it returned %zu", 0, + "ckh_count() should return %zu, but it returned %zu", ZU(0), ckh_count(&ckh)); ckh_delete(&ckh); #undef NITEMS diff --git a/test/unit/junk.c b/test/unit/junk.c index ef8f9c16..85bbf9e2 100644 --- a/test/unit/junk.c +++ b/test/unit/junk.c @@ -73,7 +73,7 @@ test_junk(size_t sz_min, size_t sz_max) if (sz_prev > 0) { assert_c_eq(s[0], 'a', "Previously allocated byte %zu/%zu is corrupted", - 0, sz_prev); + ZU(0), sz_prev); assert_c_eq(s[sz_prev-1], 'a', "Previously allocated byte %zu/%zu is corrupted", sz_prev-1, sz_prev); diff --git a/test/unit/quarantine.c b/test/unit/quarantine.c index 45349237..bbd48a51 100644 --- a/test/unit/quarantine.c +++ b/test/unit/quarantine.c @@ -21,7 +21,7 @@ quarantine_clear(void) TEST_BEGIN(test_quarantine) { -#define SZ 256 +#define SZ ZU(256) #define NQUARANTINED (QUARANTINE_SIZE/SZ) void *quarantined[NQUARANTINED+1]; size_t i, j; diff --git a/test/unit/rtree.c b/test/unit/rtree.c index 5e7a4113..5463055f 100644 --- a/test/unit/rtree.c +++ b/test/unit/rtree.c @@ -48,8 +48,9 @@ TEST_BEGIN(test_rtree_bits) assert_u_eq(rtree_get(rtree, keys[k]), 1, "rtree_get() should return previously set " "value and ignore insignificant key bits; " - "i=%u, j=%u, k=%u, set key=%#x, " - "get key=%#x", i, j, k, keys[j], keys[k]); + "i=%u, j=%u, k=%u, set key=%#"PRIxPTR", " + "get key=%#"PRIxPTR, i, j, k, keys[j], + keys[k]); } assert_u_eq(rtree_get(rtree, (((uintptr_t)1) << (sizeof(uintptr_t)*8-i))), 0, diff --git a/test/unit/zero.c b/test/unit/zero.c index 2fdae2fd..65a8f0c9 100644 --- a/test/unit/zero.c +++ b/test/unit/zero.c @@ -20,7 +20,7 @@ test_zero(size_t sz_min, size_t sz_max) if (sz_prev > 0) { assert_c_eq(s[0], 'a', "Previously allocated byte %zu/%zu is corrupted", - 0, sz_prev); + ZU(0), sz_prev); assert_c_eq(s[sz_prev-1], 'a', "Previously allocated byte %zu/%zu is corrupted", sz_prev-1, sz_prev); From ada8447cf6fb2c1f976b6311dade2e91026b3d83 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sun, 30 Mar 2014 11:22:23 -0700 Subject: [PATCH 0411/3378] Reduce maximum tested alignment. Reduce maximum tested alignment from 2^29 to 2^25. Some systems may not have enough contiguous virtual memory to satisfy the larger alignment, but the smaller alignment is still adequate to test multi-chunk alignment. --- test/integration/allocm.c | 3 +-- test/integration/mallocx.c | 3 +-- test/integration/rallocx.c | 4 ++-- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/test/integration/allocm.c b/test/integration/allocm.c index 66ecf869..7b4ea0c2 100644 --- a/test/integration/allocm.c +++ b/test/integration/allocm.c @@ -1,8 +1,7 @@ #include "test/jemalloc_test.h" #define CHUNK 0x400000 -/* #define MAXALIGN ((size_t)UINT64_C(0x80000000000)) */ -#define MAXALIGN ((size_t)0x2000000LU) +#define MAXALIGN (((size_t)1) << 25) #define NITER 4 TEST_BEGIN(test_basic) diff --git a/test/integration/mallocx.c b/test/integration/mallocx.c index f37a74bc..123e041f 100644 --- a/test/integration/mallocx.c +++ b/test/integration/mallocx.c @@ -1,8 +1,7 @@ #include "test/jemalloc_test.h" #define CHUNK 0x400000 -/* #define MAXALIGN ((size_t)UINT64_C(0x80000000000)) */ -#define MAXALIGN ((size_t)0x2000000LU) +#define MAXALIGN (((size_t)1) << 25) #define NITER 4 TEST_BEGIN(test_basic) diff --git a/test/integration/rallocx.c b/test/integration/rallocx.c index b4b67802..ee21aedf 100644 --- a/test/integration/rallocx.c +++ b/test/integration/rallocx.c @@ -112,7 +112,7 @@ TEST_BEGIN(test_align) { void *p, *q; size_t align; -#define MAX_ALIGN (ZU(1) << 29) +#define MAX_ALIGN (ZU(1) << 25) align = ZU(1); p = mallocx(1, MALLOCX_ALIGN(align)); @@ -137,7 +137,7 @@ TEST_BEGIN(test_lg_align_and_zero) { void *p, *q; size_t lg_align, sz; -#define MAX_LG_ALIGN 29 +#define MAX_LG_ALIGN 25 #define MAX_VALIDATE (ZU(1) << 22) lg_align = ZU(0); From df3f27024f193b7baeedcd9f3799b4774dd20bbf Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sun, 30 Mar 2014 16:27:08 -0700 Subject: [PATCH 0412/3378] Adapt hash tests to big-endian systems. The hash code, which has MurmurHash3 at its core, generates different output depending on system endianness, so adapt the expected output on big-endian systems. MurmurHash3 code also makes the assumption that unaligned access is okay (not true on all systems), but jemalloc only hashes data structures that have sufficient alignment to dodge this limitation. --- configure.ac | 5 +++++ include/jemalloc/internal/hash.h | 2 +- include/jemalloc/internal/jemalloc_internal_defs.h.in | 3 +++ test/unit/hash.c | 6 ++++++ 4 files changed, 15 insertions(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 3837a786..d5c663eb 100644 --- a/configure.ac +++ b/configure.ac @@ -150,6 +150,11 @@ if test "x$EXTRA_CFLAGS" != "x" ; then fi AC_PROG_CPP +AC_C_BIGENDIAN([ac_cv_big_endian=1], [ac_cv_big_endian=0]) +if test "x${ac_cv_big_endian}" = "x1" ; then + AC_DEFINE_UNQUOTED([JEMALLOC_BIG_ENDIAN], [ ]) +fi + AC_CHECK_SIZEOF([void *]) if test "x${ac_cv_sizeof_void_p}" = "x8" ; then LG_SIZEOF_PTR=3 diff --git a/include/jemalloc/internal/hash.h b/include/jemalloc/internal/hash.h index 09b69df5..c7183ede 100644 --- a/include/jemalloc/internal/hash.h +++ b/include/jemalloc/internal/hash.h @@ -320,7 +320,7 @@ hash_x64_128(const void *key, const int len, const uint32_t seed, JEMALLOC_INLINE void hash(const void *key, size_t len, const uint32_t seed, size_t r_hash[2]) { -#if (LG_SIZEOF_PTR == 3) +#if (LG_SIZEOF_PTR == 3 && !defined(JEMALLOC_BIG_ENDIAN)) hash_x64_128(key, len, seed, (uint64_t *)r_hash); #else uint64_t hashes[2]; diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in index e3758e47..c166fbd9 100644 --- a/include/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in @@ -190,6 +190,9 @@ /* C99 restrict keyword supported. */ #undef JEMALLOC_HAS_RESTRICT +/* For use by hash code. */ +#undef JEMALLOC_BIG_ENDIAN + /* sizeof(int) == 2^LG_SIZEOF_INT. */ #undef LG_SIZEOF_INT diff --git a/test/unit/hash.c b/test/unit/hash.c index 0446e524..abb394ac 100644 --- a/test/unit/hash.c +++ b/test/unit/hash.c @@ -122,9 +122,15 @@ hash_variant_verify(hash_variant_t variant) (final[3] << 24); switch (variant) { +#ifdef JEMALLOC_BIG_ENDIAN + case hash_variant_x86_32: expected = 0x6213303eU; break; + case hash_variant_x86_128: expected = 0x266820caU; break; + case hash_variant_x64_128: expected = 0xcc622b6fU; break; +#else case hash_variant_x86_32: expected = 0xb0f57ee3U; break; case hash_variant_x86_128: expected = 0xb3ece62aU; break; case hash_variant_x64_128: expected = 0x6384ba69U; break; +#endif default: not_reached(); } From e64b1b7be9319b187360306ceff17ce6cb2d530c Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sun, 30 Mar 2014 17:24:24 -0700 Subject: [PATCH 0413/3378] Enable big-endian mode for SFMT. Add cpp logic to enable big-endian mode in SFMT. This should fix SFMT tests on e.g. MIPS and SPARC. --- test/src/SFMT.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/src/SFMT.c b/test/src/SFMT.c index 433d7f6e..e6f8deec 100644 --- a/test/src/SFMT.c +++ b/test/src/SFMT.c @@ -49,6 +49,9 @@ #include "test/jemalloc_test.h" #include "test/SFMT-params.h" +#if defined(JEMALLOC_BIG_ENDIAN) && !defined(BIG_ENDIAN64) +#define BIG_ENDIAN64 1 +#endif #if defined(__BIG_ENDIAN__) && !defined(__amd64) && !defined(BIG_ENDIAN64) #define BIG_ENDIAN64 1 #endif From e181f5aa76d3a9d59a4e0ce46867349334f286d1 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sun, 30 Mar 2014 18:58:32 -0700 Subject: [PATCH 0414/3378] Keep frame pointers if using gcc frame intrinsics. Specify -fno-omit-frame-pointer when using __builtin_frame_address() and __builtin_return_address() for backtracing. This fixes backtracing failures on e.g. i686 for optimized builds. --- configure.ac | 1 + 1 file changed, 1 insertion(+) diff --git a/configure.ac b/configure.ac index d5c663eb..3521a704 100644 --- a/configure.ac +++ b/configure.ac @@ -784,6 +784,7 @@ fi ) if test "x$backtrace_method" = "x" -a "x$enable_prof_gcc" = "x1" \ -a "x$GCC" = "xyes" ; then + JE_CFLAGS_APPEND([-fno-omit-frame-pointer]) backtrace_method="gcc intrinsics" AC_DEFINE([JEMALLOC_PROF_GCC], [ ]) else From 82abf6fe6913a0f8bccc5ad8aeab081a8d9a5ed8 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sun, 30 Mar 2014 20:35:50 -0700 Subject: [PATCH 0415/3378] Allow libgcc-based backtracing on x86. Remove autoconf code that explicitly disabled libgcc-based backtracing on i[3456]86. There is no mention of which platforms/compilers exhibited problems when this code was added, and chances are good that any gcc toolchain issues have long since been fixed. --- configure.ac | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/configure.ac b/configure.ac index 3521a704..4de81dc1 100644 --- a/configure.ac +++ b/configure.ac @@ -747,22 +747,6 @@ if test "x$backtrace_method" = "x" -a "x$enable_prof_libgcc" = "x1" \ -a "x$GCC" = "xyes" ; then AC_CHECK_HEADERS([unwind.h], , [enable_prof_libgcc="0"]) AC_CHECK_LIB([gcc], [_Unwind_Backtrace], [LIBS="$LIBS -lgcc"], [enable_prof_libgcc="0"]) - dnl The following is conservative, in that it only has entries for CPUs on - dnl which jemalloc has been tested. - AC_MSG_CHECKING([libgcc-based backtracing reliability on ${host_cpu}]) - case "${host_cpu}" in - i[[3456]]86) - AC_MSG_RESULT([unreliable]) - enable_prof_libgcc="0"; - ;; - x86_64) - AC_MSG_RESULT([reliable]) - ;; - *) - AC_MSG_RESULT([unreliable]) - enable_prof_libgcc="0"; - ;; - esac if test "x${enable_prof_libgcc}" = "x1" ; then backtrace_method="libgcc" AC_DEFINE([JEMALLOC_PROF_LIBGCC], [ ]) From 9c62ed44b04610bd5465c8aaeec5623280015d41 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 31 Mar 2014 09:16:59 -0700 Subject: [PATCH 0416/3378] Document how dss precedence affects huge allocation. --- doc/jemalloc.xml.in | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index c7e2e872..d8e2e711 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -1439,8 +1439,12 @@ malloc_conf = "xmalloc:true";]]> Set the precedence of dss allocation as related to mmap allocation for arena <i>, or for all arenas if <i> equals arenas.narenas. See - opt.dss for supported + linkend="arenas.narenas">arenas.narenas. Note + that even during huge allocation this setting is read from the arena + that would be chosen for small or large allocation so that applications + can depend on consistent dss versus mmap allocation regardless of + allocation size. See opt.dss for supported settings. From ff53631535707c79bbd7e0fd255f954aaf2b5268 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 31 Mar 2014 09:23:10 -0700 Subject: [PATCH 0417/3378] Update ChangeLog for 3.6.0. --- ChangeLog | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/ChangeLog b/ChangeLog index c0ca338b..d56ee999 100644 --- a/ChangeLog +++ b/ChangeLog @@ -5,6 +5,30 @@ found in the git revision history: https://github.com/jemalloc/jemalloc +* 3.6.0 (March 31, 2014) + + This version contains a critical bug fix for a regression present in 3.5.0 and + 3.5.1. + + Bug fixes: + - Fix a regression in arena_chunk_alloc() that caused crashes during + small/large allocation if chunk allocation failed. In the absence of this + bug, chunk allocation failure would result in allocation failure, e.g. NULL + return from malloc(). This regression was introduced in 3.5.0. + - Fix backtracing for gcc intrinsics-based backtracing by specifying + -fno-omit-frame-pointer to gcc. Note that the application (and all the + libraries it links to) must also be compiled with this option for + backtracing to be reliable. + - Use dss allocation precedence for huge allocations as well as small/large + allocations. + - Fix test assertion failure message formatting. This bug did not manifect on + x86_64 systems because of implementation subtleties in va_list. + - Fix inconsequential test failures for hash and SFMT code. + + New features: + - Support heap profiling on FreeBSD. This feature depends on the proc + filesystem being mounted during heap profile dumping. + * 3.5.1 (February 25, 2014) This version primarily addresses minor bugs in test code. From 8a26eaca7f4c95771ecbf096caeeba14fbe1122f Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 31 Mar 2014 09:31:38 -0700 Subject: [PATCH 0418/3378] Add private namespace mangling for huge_dss_prec_get(). --- include/jemalloc/internal/private_symbols.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index 1e64ed57..93516d24 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -197,6 +197,7 @@ huge_allocated huge_boot huge_dalloc huge_dalloc_junk +huge_dss_prec_get huge_malloc huge_mtx huge_ndalloc From be8e59f5a64ef775c9694aee0d6a87d92336d303 Mon Sep 17 00:00:00 2001 From: Ben Maurer Date: Sat, 5 Apr 2014 15:59:08 -0700 Subject: [PATCH 0419/3378] Don't dereference chunk->arena in free() hot path When you call free() we load chunk->arena even though that data isn't used on the tcache hot path. In profiling some FB applications, I found that ~30% of the dTLB misses in the free() function come from this line. With 4 MB chunks, the arena_chunk_t->map is ~ 32 KB (1024 pages in the chunk, 4 8 byte pointers in arena_chunk_map_t). This means there's only a 1/8 chance of the page containing chunk->arena also comtaining the map bits. --- include/jemalloc/internal/arena.h | 11 ++++------- include/jemalloc/internal/jemalloc_internal.h.in | 2 +- src/jemalloc.c | 2 +- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 9d000c03..b899888d 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -495,8 +495,7 @@ prof_ctx_t *arena_prof_ctx_get(const void *ptr); void arena_prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx); void *arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache); size_t arena_salloc(const void *ptr, bool demote); -void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, - bool try_tcache); +void arena_dalloc(arena_chunk_t *chunk, void *ptr, bool try_tcache); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) @@ -1022,13 +1021,11 @@ arena_salloc(const void *ptr, bool demote) } JEMALLOC_ALWAYS_INLINE void -arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, bool try_tcache) +arena_dalloc(arena_chunk_t *chunk, void *ptr, bool try_tcache) { size_t pageind, mapbits; tcache_t *tcache; - assert(arena != NULL); - assert(chunk->arena == arena); assert(ptr != NULL); assert(CHUNK_ADDR2BASE(ptr) != ptr); @@ -1043,7 +1040,7 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, bool try_tcache) binind = arena_ptr_small_binind_get(ptr, mapbits); tcache_dalloc_small(tcache, ptr, binind); } else - arena_dalloc_small(arena, chunk, ptr, pageind); + arena_dalloc_small(chunk->arena, chunk, ptr, pageind); } else { size_t size = arena_mapbits_large_size_get(chunk, pageind); @@ -1053,7 +1050,7 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, bool try_tcache) tcache_get(false)) != NULL) { tcache_dalloc_large(tcache, ptr, size); } else - arena_dalloc_large(arena, chunk, ptr); + arena_dalloc_large(chunk->arena, chunk, ptr); } } # endif /* JEMALLOC_ARENA_INLINE_B */ diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 574bbb14..9c79ae00 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -895,7 +895,7 @@ idalloct(void *ptr, bool try_tcache) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk != ptr) - arena_dalloc(chunk->arena, chunk, ptr, try_tcache); + arena_dalloc(chunk, ptr, try_tcache); else huge_dalloc(ptr, true); } diff --git a/src/jemalloc.c b/src/jemalloc.c index 204778bc..558dbb20 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -2103,7 +2103,7 @@ a0free(void *ptr) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk != ptr) - arena_dalloc(chunk->arena, chunk, ptr, false); + arena_dalloc(chunk, ptr, false); else huge_dalloc(ptr, true); } From f9ff60346d7c25ad653ea062e496a5d0864233b2 Mon Sep 17 00:00:00 2001 From: Ben Maurer Date: Sun, 6 Apr 2014 13:24:16 -0700 Subject: [PATCH 0420/3378] refactoring for bits splitting --- src/arena.c | 76 ++++++++++++++++++++++++++++------------------------- 1 file changed, 40 insertions(+), 36 deletions(-) diff --git a/src/arena.c b/src/arena.c index dad707b6..3cb62601 100644 --- a/src/arena.c +++ b/src/arena.c @@ -53,6 +53,22 @@ static void arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk, /******************************************************************************/ +JEMALLOC_INLINE_C size_t +arena_mapelm_to_pageind(arena_chunk_map_t *mapelm) +{ + uintptr_t map_offset = + CHUNK_ADDR2OFFSET(mapelm) - offsetof(arena_chunk_t, map); + + return ((map_offset / sizeof(arena_chunk_map_t)) + map_bias); +} + +JEMALLOC_INLINE_C size_t +arena_mapelm_to_bits(arena_chunk_map_t *mapelm) +{ + + return (mapelm->bits); +} + static inline int arena_run_comp(arena_chunk_map_t *a, arena_chunk_map_t *b) { @@ -73,26 +89,19 @@ static inline int arena_avail_comp(arena_chunk_map_t *a, arena_chunk_map_t *b) { int ret; - size_t a_size = a->bits & ~PAGE_MASK; - size_t b_size = b->bits & ~PAGE_MASK; + size_t a_size; + size_t b_size = arena_mapelm_to_bits(b) & ~PAGE_MASK; + uintptr_t a_mapelm = (uintptr_t)a; + uintptr_t b_mapelm = (uintptr_t)b; + + if (a_mapelm & CHUNK_MAP_KEY) + a_size = a_mapelm & ~PAGE_MASK; + else + a_size = arena_mapelm_to_bits(a) & ~PAGE_MASK; ret = (a_size > b_size) - (a_size < b_size); - if (ret == 0) { - uintptr_t a_mapelm, b_mapelm; - - if ((a->bits & CHUNK_MAP_KEY) != CHUNK_MAP_KEY) - a_mapelm = (uintptr_t)a; - else { - /* - * Treat keys as though they are lower than anything - * else. - */ - a_mapelm = 0; - } - b_mapelm = (uintptr_t)b; - + if (ret == 0 && (!(a_mapelm & CHUNK_MAP_KEY))) ret = (a_mapelm > b_mapelm) - (a_mapelm < b_mapelm); - } return (ret); } @@ -663,15 +672,14 @@ static arena_run_t * arena_run_alloc_large_helper(arena_t *arena, size_t size, bool zero) { arena_run_t *run; - arena_chunk_map_t *mapelm, key; + arena_chunk_map_t *mapelm; + arena_chunk_map_t *key; - key.bits = size | CHUNK_MAP_KEY; - mapelm = arena_avail_tree_nsearch(&arena->runs_avail, &key); + key = (arena_chunk_map_t *)(size | CHUNK_MAP_KEY); + mapelm = arena_avail_tree_nsearch(&arena->runs_avail, key); if (mapelm != NULL) { arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm); - size_t pageind = (((uintptr_t)mapelm - - (uintptr_t)run_chunk->map) / sizeof(arena_chunk_map_t)) - + map_bias; + size_t pageind = arena_mapelm_to_pageind(mapelm); run = (arena_run_t *)((uintptr_t)run_chunk + (pageind << LG_PAGE)); @@ -718,15 +726,14 @@ static arena_run_t * arena_run_alloc_small_helper(arena_t *arena, size_t size, size_t binind) { arena_run_t *run; - arena_chunk_map_t *mapelm, key; + arena_chunk_map_t *mapelm; + arena_chunk_map_t *key; - key.bits = size | CHUNK_MAP_KEY; - mapelm = arena_avail_tree_nsearch(&arena->runs_avail, &key); + key = (arena_chunk_map_t *)(size | CHUNK_MAP_KEY); + mapelm = arena_avail_tree_nsearch(&arena->runs_avail, key); if (mapelm != NULL) { arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm); - size_t pageind = (((uintptr_t)mapelm - - (uintptr_t)run_chunk->map) / sizeof(arena_chunk_map_t)) - + map_bias; + size_t pageind = arena_mapelm_to_pageind(mapelm); run = (arena_run_t *)((uintptr_t)run_chunk + (pageind << LG_PAGE)); @@ -897,8 +904,7 @@ arena_chunk_purge_stashed(arena_t *arena, arena_chunk_t *chunk, bool unzeroed; size_t flag_unzeroed, i; - pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) / - sizeof(arena_chunk_map_t)) + map_bias; + pageind = arena_mapelm_to_pageind(mapelm); npages = arena_mapbits_large_size_get(chunk, pageind) >> LG_PAGE; assert(pageind + npages <= chunk_npages); @@ -942,8 +948,7 @@ arena_chunk_unstash_purged(arena_t *arena, arena_chunk_t *chunk, mapelm = ql_first(mapelms)) { arena_run_t *run; - pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) / - sizeof(arena_chunk_map_t)) + map_bias; + pageind = arena_mapelm_to_pageind(mapelm); run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)(pageind << LG_PAGE)); ql_remove(mapelms, mapelm, u.ql_link); @@ -1307,8 +1312,7 @@ arena_bin_runs_first(arena_bin_t *bin) arena_run_t *run; chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(mapelm); - pageind = ((((uintptr_t)mapelm - (uintptr_t)chunk->map) / - sizeof(arena_chunk_map_t))) + map_bias; + pageind = arena_mapelm_to_pageind(mapelm); run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - arena_mapbits_small_runind_get(chunk, pageind)) << LG_PAGE)); @@ -1882,7 +1886,7 @@ arena_dalloc_bin_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr, run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - arena_mapbits_small_runind_get(chunk, pageind)) << LG_PAGE)); bin = run->bin; - binind = arena_ptr_small_binind_get(ptr, mapelm->bits); + binind = arena_ptr_small_binind_get(ptr, arena_mapbits_get(chunk, pageind)); bin_info = &arena_bin_info[binind]; if (config_fill || config_stats) size = bin_info->reg_size; From 9b0cbf0850b130a9b0a8c58bd10b2926b2083510 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 11 Apr 2014 14:24:51 -0700 Subject: [PATCH 0421/3378] Remove support for non-prof-promote heap profiling metadata. Make promotion of sampled small objects to large objects mandatory, so that profiling metadata can always be stored in the chunk map, rather than requiring one pointer per small region in each small-region page run. In practice the non-prof-promote code was only useful when using jemalloc to track all objects and report them as leaks at program exit. However, Valgrind is at least as good a tool for this particular use case. Furthermore, the non-prof-promote code is getting in the way of some optimizations that will make heap profiling much cheaper for the predominant use case (sampling a small representative proportion of all allocations). --- include/jemalloc/internal/arena.h | 66 +++---------------- include/jemalloc/internal/private_symbols.txt | 1 - include/jemalloc/internal/prof.h | 20 ++---- include/jemalloc/internal/size_classes.sh | 5 +- include/jemalloc/internal/tcache.h | 2 +- src/arena.c | 21 ------ src/jemalloc.c | 16 ++--- src/prof.c | 7 +- 8 files changed, 28 insertions(+), 110 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index b899888d..0e14c2c4 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -110,7 +110,6 @@ struct arena_chunk_map_s { * p : run page offset * s : run size * n : binind for size class; large objects set these to BININD_INVALID - * except for promoted allocations (see prof_promote) * x : don't care * - : 0 * + : 1 @@ -216,8 +215,6 @@ struct arena_run_s { * | ... | * bitmap_offset | bitmap | * | ... | - * ctx0_offset | ctx map | - * | ... | * |--------------------| * | redzone | * reg0_offset | region 0 | @@ -270,12 +267,6 @@ struct arena_bin_info_s { */ bitmap_info_t bitmap_info; - /* - * Offset of first (prof_ctx_t *) in a run header for this bin's size - * class, or 0 if (config_prof == false || opt_prof == false). - */ - uint32_t ctx0_offset; - /* Offset of first region in a run for this bin's size class. */ uint32_t reg0_offset; }; @@ -492,7 +483,7 @@ size_t arena_bin_index(arena_t *arena, arena_bin_t *bin); unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr); prof_ctx_t *arena_prof_ctx_get(const void *ptr); -void arena_prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx); +void arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx); void *arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache); size_t arena_salloc(const void *ptr, bool demote); void arena_dalloc(arena_chunk_t *chunk, void *ptr, bool try_tcache); @@ -879,31 +870,16 @@ arena_prof_ctx_get(const void *ptr) pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; mapbits = arena_mapbits_get(chunk, pageind); assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); - if ((mapbits & CHUNK_MAP_LARGE) == 0) { - if (prof_promote) - ret = (prof_ctx_t *)(uintptr_t)1U; - else { - arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)((pageind - (mapbits >> LG_PAGE)) << - LG_PAGE)); - size_t binind = arena_ptr_small_binind_get(ptr, - mapbits); - arena_bin_info_t *bin_info = &arena_bin_info[binind]; - unsigned regind; - - regind = arena_run_regind(run, bin_info, ptr); - ret = *(prof_ctx_t **)((uintptr_t)run + - bin_info->ctx0_offset + (regind * - sizeof(prof_ctx_t *))); - } - } else + if ((mapbits & CHUNK_MAP_LARGE) == 0) + ret = (prof_ctx_t *)(uintptr_t)1U; + else ret = arena_mapp_get(chunk, pageind)->prof_ctx; return (ret); } JEMALLOC_INLINE void -arena_prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx) +arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) { arena_chunk_t *chunk; size_t pageind; @@ -916,31 +892,8 @@ arena_prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx) pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; assert(arena_mapbits_allocated_get(chunk, pageind) != 0); - if (usize > SMALL_MAXCLASS || (prof_promote && - ((uintptr_t)ctx != (uintptr_t)1U || arena_mapbits_large_get(chunk, - pageind) != 0))) { - assert(arena_mapbits_large_get(chunk, pageind) != 0); + if (arena_mapbits_large_get(chunk, pageind) != 0) arena_mapp_get(chunk, pageind)->prof_ctx = ctx; - } else { - assert(arena_mapbits_large_get(chunk, pageind) == 0); - if (prof_promote == false) { - size_t mapbits = arena_mapbits_get(chunk, pageind); - arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)((pageind - (mapbits >> LG_PAGE)) << - LG_PAGE)); - size_t binind; - arena_bin_info_t *bin_info; - unsigned regind; - - binind = arena_ptr_small_binind_get(ptr, mapbits); - bin_info = &arena_bin_info[binind]; - regind = arena_run_regind(run, bin_info, ptr); - - *((prof_ctx_t **)((uintptr_t)run + - bin_info->ctx0_offset + (regind * sizeof(prof_ctx_t - *)))) = ctx; - } - } } JEMALLOC_ALWAYS_INLINE void * @@ -989,7 +942,7 @@ arena_salloc(const void *ptr, bool demote) assert(arena_mapbits_allocated_get(chunk, pageind) != 0); binind = arena_mapbits_binind_get(chunk, pageind); if (binind == BININD_INVALID || (config_prof && demote == false && - prof_promote && arena_mapbits_large_get(chunk, pageind) != 0)) { + arena_mapbits_large_get(chunk, pageind) != 0)) { /* * Large allocation. In the common case (demote == true), and * as this is an inline function, most callers will only end up @@ -1007,10 +960,7 @@ arena_salloc(const void *ptr, bool demote) assert(arena_mapbits_dirty_get(chunk, pageind) == arena_mapbits_dirty_get(chunk, pageind+(ret>>LG_PAGE)-1)); } else { - /* - * Small allocation (possibly promoted to a large object due to - * prof_promote). - */ + /* Small allocation (possibly promoted to a large object). */ assert(arena_mapbits_large_get(chunk, pageind) != 0 || arena_ptr_small_binind_get(ptr, arena_mapbits_get(chunk, pageind)) == binind); diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index 93516d24..f52d49f9 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -303,7 +303,6 @@ prof_mdump prof_postfork_child prof_postfork_parent prof_prefork -prof_promote prof_realloc prof_sample_accum_update prof_sample_threshold_update diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index 6f162d21..56014f18 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -220,12 +220,6 @@ extern char opt_prof_prefix[ */ extern uint64_t prof_interval; -/* - * If true, promote small sampled objects to large objects, since small run - * headers do not have embedded profile context pointers. - */ -extern bool prof_promote; - void bt_init(prof_bt_t *bt, void **vec); void prof_backtrace(prof_bt_t *bt, unsigned nignore); prof_thr_cnt_t *prof_lookup(prof_bt_t *bt); @@ -308,7 +302,7 @@ malloc_tsd_protos(JEMALLOC_ATTR(unused), prof_tdata, prof_tdata_t *) prof_tdata_t *prof_tdata_get(bool create); void prof_sample_threshold_update(prof_tdata_t *prof_tdata); prof_ctx_t *prof_ctx_get(const void *ptr); -void prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx); +void prof_ctx_set(const void *ptr, prof_ctx_t *ctx); bool prof_sample_accum_update(size_t size); void prof_malloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt); void prof_realloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt, @@ -405,7 +399,7 @@ prof_ctx_get(const void *ptr) } JEMALLOC_INLINE void -prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx) +prof_ctx_set(const void *ptr, prof_ctx_t *ctx) { arena_chunk_t *chunk; @@ -415,7 +409,7 @@ prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk != ptr) { /* Region. */ - arena_prof_ctx_set(ptr, usize, ctx); + arena_prof_ctx_set(ptr, ctx); } else huge_prof_ctx_set(ptr, ctx); } @@ -471,7 +465,7 @@ prof_malloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt) } if ((uintptr_t)cnt > (uintptr_t)1U) { - prof_ctx_set(ptr, usize, cnt->ctx); + prof_ctx_set(ptr, cnt->ctx); cnt->epoch++; /*********/ @@ -491,7 +485,7 @@ prof_malloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt) mb_write(); /*********/ } else - prof_ctx_set(ptr, usize, (prof_ctx_t *)(uintptr_t)1U); + prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U); } JEMALLOC_INLINE void @@ -539,10 +533,10 @@ prof_realloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt, if ((uintptr_t)told_cnt > (uintptr_t)1U) told_cnt->epoch++; if ((uintptr_t)cnt > (uintptr_t)1U) { - prof_ctx_set(ptr, usize, cnt->ctx); + prof_ctx_set(ptr, cnt->ctx); cnt->epoch++; } else if (ptr != NULL) - prof_ctx_set(ptr, usize, (prof_ctx_t *)(uintptr_t)1U); + prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U); /*********/ mb_write(); /*********/ diff --git a/include/jemalloc/internal/size_classes.sh b/include/jemalloc/internal/size_classes.sh index 29c80c1f..960674aa 100755 --- a/include/jemalloc/internal/size_classes.sh +++ b/include/jemalloc/internal/size_classes.sh @@ -94,9 +94,8 @@ cat < 255) # error "Too many small size classes" diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index c3d4b58d..51974136 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -354,7 +354,7 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) if (ret == NULL) return (NULL); } else { - if (config_prof && prof_promote && size == PAGE) { + if (config_prof && size == PAGE) { arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret); size_t pageind = (((uintptr_t)ret - (uintptr_t)chunk) >> diff --git a/src/arena.c b/src/arena.c index 3cb62601..d5741000 100644 --- a/src/arena.c +++ b/src/arena.c @@ -2373,7 +2373,6 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) uint32_t try_nregs, good_nregs; uint32_t try_hdr_size, good_hdr_size; uint32_t try_bitmap_offset, good_bitmap_offset; - uint32_t try_ctx0_offset, good_ctx0_offset; uint32_t try_redzone0_offset, good_redzone0_offset; assert(min_run_size >= PAGE); @@ -2428,14 +2427,6 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) try_bitmap_offset = try_hdr_size; /* Add space for bitmap. */ try_hdr_size += bitmap_size(try_nregs); - if (config_prof && opt_prof && prof_promote == false) { - /* Pad to a quantum boundary. */ - try_hdr_size = QUANTUM_CEILING(try_hdr_size); - try_ctx0_offset = try_hdr_size; - /* Add space for one (prof_ctx_t *) per region. */ - try_hdr_size += try_nregs * sizeof(prof_ctx_t *); - } else - try_ctx0_offset = 0; try_redzone0_offset = try_run_size - (try_nregs * bin_info->reg_interval) - pad_size; } while (try_hdr_size > try_redzone0_offset); @@ -2449,7 +2440,6 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) good_nregs = try_nregs; good_hdr_size = try_hdr_size; good_bitmap_offset = try_bitmap_offset; - good_ctx0_offset = try_ctx0_offset; good_redzone0_offset = try_redzone0_offset; /* Try more aggressive settings. */ @@ -2469,16 +2459,6 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) try_bitmap_offset = try_hdr_size; /* Add space for bitmap. */ try_hdr_size += bitmap_size(try_nregs); - if (config_prof && opt_prof && prof_promote == false) { - /* Pad to a quantum boundary. */ - try_hdr_size = QUANTUM_CEILING(try_hdr_size); - try_ctx0_offset = try_hdr_size; - /* - * Add space for one (prof_ctx_t *) per region. - */ - try_hdr_size += try_nregs * - sizeof(prof_ctx_t *); - } try_redzone0_offset = try_run_size - (try_nregs * bin_info->reg_interval) - pad_size; } while (try_hdr_size > try_redzone0_offset); @@ -2494,7 +2474,6 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) bin_info->run_size = good_run_size; bin_info->nregs = good_nregs; bin_info->bitmap_offset = good_bitmap_offset; - bin_info->ctx0_offset = good_ctx0_offset; bin_info->reg0_offset = good_redzone0_offset + bin_info->redzone_size; assert(bin_info->reg0_offset - bin_info->redzone_size + (bin_info->nregs diff --git a/src/jemalloc.c b/src/jemalloc.c index 558dbb20..816a12e0 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -853,7 +853,7 @@ imalloc_prof_sample(size_t usize, prof_thr_cnt_t *cnt) if (cnt == NULL) return (NULL); - if (prof_promote && usize <= SMALL_MAXCLASS) { + if (usize <= SMALL_MAXCLASS) { p = imalloc(SMALL_MAXCLASS+1); if (p == NULL) return (NULL); @@ -952,7 +952,7 @@ imemalign_prof_sample(size_t alignment, size_t usize, prof_thr_cnt_t *cnt) if (cnt == NULL) return (NULL); - if (prof_promote && usize <= SMALL_MAXCLASS) { + if (usize <= SMALL_MAXCLASS) { assert(sa2u(SMALL_MAXCLASS+1, alignment) != 0); p = ipalloc(sa2u(SMALL_MAXCLASS+1, alignment), alignment, false); @@ -1086,7 +1086,7 @@ icalloc_prof_sample(size_t usize, prof_thr_cnt_t *cnt) if (cnt == NULL) return (NULL); - if (prof_promote && usize <= SMALL_MAXCLASS) { + if (usize <= SMALL_MAXCLASS) { p = icalloc(SMALL_MAXCLASS+1); if (p == NULL) return (NULL); @@ -1183,7 +1183,7 @@ irealloc_prof_sample(void *oldptr, size_t usize, prof_thr_cnt_t *cnt) if (cnt == NULL) return (NULL); - if (prof_promote && usize <= SMALL_MAXCLASS) { + if (usize <= SMALL_MAXCLASS) { p = iralloc(oldptr, SMALL_MAXCLASS+1, 0, 0, false); if (p == NULL) return (NULL); @@ -1395,7 +1395,7 @@ imallocx_prof_sample(size_t usize, size_t alignment, bool zero, bool try_tcache, if (cnt == NULL) return (NULL); - if (prof_promote && usize <= SMALL_MAXCLASS) { + if (usize <= SMALL_MAXCLASS) { size_t usize_promoted = (alignment == 0) ? s2u(SMALL_MAXCLASS+1) : sa2u(SMALL_MAXCLASS+1, alignment); assert(usize_promoted != 0); @@ -1492,7 +1492,7 @@ irallocx_prof_sample(void *oldptr, size_t size, size_t alignment, size_t usize, if (cnt == NULL) return (NULL); - if (prof_promote && usize <= SMALL_MAXCLASS) { + if (usize <= SMALL_MAXCLASS) { p = iralloct(oldptr, SMALL_MAXCLASS+1, (SMALL_MAXCLASS+1 >= size) ? 0 : size - (SMALL_MAXCLASS+1), alignment, zero, try_tcache_alloc, try_tcache_dalloc, arena); @@ -1639,8 +1639,8 @@ ixallocx_prof_sample(void *ptr, size_t old_usize, size_t size, size_t extra, if (cnt == NULL) return (old_usize); /* Use minimum usize to determine whether promotion may happen. */ - if (prof_promote && ((alignment == 0) ? s2u(size) : sa2u(size, - alignment)) <= SMALL_MAXCLASS) { + if (((alignment == 0) ? s2u(size) : sa2u(size, alignment)) <= + SMALL_MAXCLASS) { if (ixalloc(ptr, SMALL_MAXCLASS+1, (SMALL_MAXCLASS+1 >= size+extra) ? 0 : size+extra - (SMALL_MAXCLASS+1), alignment, zero)) diff --git a/src/prof.c b/src/prof.c index 7722b7b4..1b1f7a84 100644 --- a/src/prof.c +++ b/src/prof.c @@ -32,7 +32,6 @@ char opt_prof_prefix[ 1]; uint64_t prof_interval = 0; -bool prof_promote; /* * Table of mutexes that are shared among ctx's. These are leaf locks, so @@ -1300,8 +1299,8 @@ prof_boot1(void) cassert(config_prof); /* - * opt_prof and prof_promote must be in their final state before any - * arenas are initialized, so this function must be executed early. + * opt_prof must be in its final state before any arenas are + * initialized, so this function must be executed early. */ if (opt_prof_leak && opt_prof == false) { @@ -1317,8 +1316,6 @@ prof_boot1(void) opt_lg_prof_interval); } } - - prof_promote = (opt_prof && opt_lg_prof_sample > LG_PAGE); } bool From 9790b9667fd975b1f9a4f108f9d0a20ab265c6b6 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 14 Apr 2014 22:32:31 -0700 Subject: [PATCH 0422/3378] Remove the *allocm() API, which is superceded by the *allocx() API. --- INSTALL | 3 - Makefile.in | 7 +- configure.ac | 21 +- doc/jemalloc.xml.in | 191 +----------------- .../jemalloc/internal/jemalloc_internal.h.in | 1 - include/jemalloc/internal/private_symbols.txt | 1 - include/jemalloc/jemalloc_defs.h.in | 3 - include/jemalloc/jemalloc_macros.h.in | 17 -- include/jemalloc/jemalloc_protos.h.in | 12 -- src/jemalloc.c | 85 -------- test/integration/MALLOCX_ARENA.c | 4 +- test/integration/allocm.c | 107 ---------- test/integration/rallocm.c | 111 ---------- test/unit/mq.c | 2 +- 14 files changed, 7 insertions(+), 558 deletions(-) delete mode 100644 test/integration/allocm.c delete mode 100644 test/integration/rallocm.c diff --git a/INSTALL b/INSTALL index 841704d2..8530643d 100644 --- a/INSTALL +++ b/INSTALL @@ -157,9 +157,6 @@ any of the following arguments (not a definitive list) to 'configure': --disable-valgrind Disable support for Valgrind. ---disable-experimental - Disable support for the experimental API (*allocm()). - --disable-zone-allocator Disable zone allocator for Darwin. This means jemalloc won't be hooked as the default allocator on OSX/iOS. diff --git a/Makefile.in b/Makefile.in index d6b7d6ea..f7aa7d87 100644 --- a/Makefile.in +++ b/Makefile.in @@ -48,7 +48,6 @@ cfgoutputs_in := @cfgoutputs_in@ cfgoutputs_out := @cfgoutputs_out@ enable_autogen := @enable_autogen@ enable_code_coverage := @enable_code_coverage@ -enable_experimental := @enable_experimental@ enable_zone_allocator := @enable_zone_allocator@ DSO_LDFLAGS = @DSO_LDFLAGS@ SOREV = @SOREV@ @@ -133,17 +132,13 @@ TESTS_UNIT_AUX := $(srcroot)test/unit/prof_accum_a.c \ TESTS_INTEGRATION := $(srcroot)test/integration/aligned_alloc.c \ $(srcroot)test/integration/allocated.c \ $(srcroot)test/integration/mallocx.c \ + $(srcroot)test/integration/MALLOCX_ARENA.c \ $(srcroot)test/integration/mremap.c \ $(srcroot)test/integration/posix_memalign.c \ $(srcroot)test/integration/rallocx.c \ $(srcroot)test/integration/thread_arena.c \ $(srcroot)test/integration/thread_tcache_enabled.c \ $(srcroot)test/integration/xallocx.c -ifeq ($(enable_experimental), 1) -TESTS_INTEGRATION += $(srcroot)test/integration/allocm.c \ - $(srcroot)test/integration/MALLOCX_ARENA.c \ - $(srcroot)test/integration/rallocm.c -endif TESTS_STRESS := TESTS := $(TESTS_UNIT) $(TESTS_INTEGRATION) $(TESTS_STRESS) diff --git a/configure.ac b/configure.ac index 4de81dc1..04cefe95 100644 --- a/configure.ac +++ b/configure.ac @@ -44,7 +44,7 @@ AC_CACHE_CHECK([whether $1 is compilable], dnl ============================================================================ dnl Library revision. -rev=1 +rev=2 AC_SUBST([rev]) srcroot=$srcdir @@ -438,24 +438,6 @@ AC_CHECK_FUNC([valloc], [AC_DEFINE([JEMALLOC_OVERRIDE_VALLOC], [ ]) public_syms="${public_syms} valloc"]) -dnl Support the experimental API by default. -AC_ARG_ENABLE([experimental], - [AS_HELP_STRING([--disable-experimental], - [Disable support for the experimental API])], -[if test "x$enable_experimental" = "xno" ; then - enable_experimental="0" -else - enable_experimental="1" -fi -], -[enable_experimental="1"] -) -if test "x$enable_experimental" = "x1" ; then - AC_DEFINE([JEMALLOC_EXPERIMENTAL], [ ]) - public_syms="${public_syms} allocm dallocm nallocm rallocm sallocm" -fi -AC_SUBST([enable_experimental]) - dnl Do not compute test code coverage by default. GCOV_FLAGS= AC_ARG_ENABLE([code-coverage], @@ -1465,7 +1447,6 @@ AC_MSG_RESULT([JEMALLOC_PRIVATE_NAMESPACE]) AC_MSG_RESULT([ : ${JEMALLOC_PRIVATE_NAMESPACE}]) AC_MSG_RESULT([install_suffix : ${install_suffix}]) AC_MSG_RESULT([autogen : ${enable_autogen}]) -AC_MSG_RESULT([experimental : ${enable_experimental}]) AC_MSG_RESULT([cc-silence : ${enable_cc_silence}]) AC_MSG_RESULT([debug : ${enable_debug}]) AC_MSG_RESULT([code-coverage : ${enable_code_coverage}]) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index d8e2e711..a4232409 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -44,11 +44,6 @@ mallctlbymib malloc_stats_print malloc_usable_size - allocm - rallocm - sallocm - dallocm - nallocm --> general purpose memory allocation functions @@ -172,41 +167,6 @@ const char *malloc_conf; - - Experimental API - - int allocm - void **ptr - size_t *rsize - size_t size - int flags - - - int rallocm - void **ptr - size_t *rsize - size_t size - size_t extra - int flags - - - int sallocm - const void *ptr - size_t *rsize - int flags - - - int dallocm - void *ptr - int flags - - - int nallocm - size_t *rsize - size_t size - int flags - - @@ -449,116 +409,6 @@ for (i = 0; i < nbins; i++) { depended on, since such behavior is entirely implementation-dependent. - - Experimental API - The experimental API is subject to change or removal without regard - for backward compatibility. If - is specified during configuration, the experimental API is - omitted. - - The allocm, - rallocm, - sallocm, - dallocm, and - nallocm functions all have a - flags argument that can be used to specify - options. The functions only check the options that are contextually - relevant. Use bitwise or (|) operations to - specify one or more of the following: - - - ALLOCM_LG_ALIGN(la) - - - Align the memory allocation to start at an address - that is a multiple of (1 << - la). This macro does not validate - that la is within the valid - range. - - - ALLOCM_ALIGN(a) - - - Align the memory allocation to start at an address - that is a multiple of a, where - a is a power of two. This macro does not - validate that a is a power of 2. - - - - ALLOCM_ZERO - - Initialize newly allocated memory to contain zero - bytes. In the growing reallocation case, the real size prior to - reallocation defines the boundary between untouched bytes and those - that are initialized to contain zero bytes. If this macro is - absent, newly allocated memory is uninitialized. - - - ALLOCM_NO_MOVE - - For reallocation, fail rather than moving the - object. This constraint can apply to both growth and - shrinkage. - - - ALLOCM_ARENA(a) - - - Use the arena specified by the index - a (and by necessity bypass the thread - cache). This macro has no effect for huge regions, nor for regions - that were allocated via an arena other than the one specified. - This macro does not validate that a - specifies an arena index in the valid range. - - - - - The allocm function allocates at - least size bytes of memory, sets - *ptr to the base address of the allocation, and - sets *rsize to the real size of the allocation if - rsize is not NULL. Behavior - is undefined if size is 0, or - if request size overflows due to size class and/or alignment - constraints. - - The rallocm function resizes the - allocation at *ptr to be at least - size bytes, sets *ptr to - the base address of the allocation if it moved, and sets - *rsize to the real size of the allocation if - rsize is not NULL. If - extra is non-zero, an attempt is made to resize - the allocation to be at least (size + - extra) bytes, though inability to allocate - the extra byte(s) will not by itself result in failure. Behavior is - undefined if size is 0, if - request size overflows due to size class and/or alignment constraints, or - if (size + - extra > - SIZE_T_MAX). - - The sallocm function sets - *rsize to the real size of the allocation. - - The dallocm function causes the - memory referenced by ptr to be made available for - future allocations. - - The nallocm function allocates no - memory, but it performs the same size computation as the - allocm function, and if - rsize is not NULL it sets - *rsize to the real size of the allocation that - would result from the equivalent allocm - function call. Behavior is undefined if size is - 0, or if request size overflows due to size class - and/or alignment constraints. - TUNING @@ -1076,9 +926,8 @@ for (i = 0; i < nbins; i++) { Zero filling enabled/disabled. If enabled, each byte of uninitialized allocated memory will be initialized to 0. Note that this initialization only happens once for each byte, so - realloc, - rallocx and - rallocm calls do not zero memory that + realloc and + rallocx calls do not zero memory that was previously allocated. This is intended for debugging and will impact performance negatively. This option is disabled by default. @@ -2253,42 +2102,6 @@ malloc_conf = "xmalloc:true";]]> returns the usable size of the allocation pointed to by ptr. - - Experimental API - The allocm, - rallocm, - sallocm, - dallocm, and - nallocm functions return - ALLOCM_SUCCESS on success; otherwise they return an - error value. The allocm, - rallocm, and - nallocm functions will fail if: - - - ALLOCM_ERR_OOM - - Out of memory. Insufficient contiguous memory was - available to service the allocation request. The - allocm function additionally sets - *ptr to NULL, whereas - the rallocm function leaves - *ptr unmodified. - - - The rallocm function will also - fail if: - - - ALLOCM_ERR_NOT_MOVED - - ALLOCM_NO_MOVE was specified, - but the reallocation request could not be serviced without moving - the object. - - - - ENVIRONMENT diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 9c79ae00..a374e2a3 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -230,7 +230,6 @@ static const bool config_ivsalloc = #include "jemalloc/internal/jemalloc_internal_macros.h" #define MALLOCX_LG_ALIGN_MASK ((int)0x3f) -#define ALLOCM_LG_ALIGN_MASK ((int)0x3f) /* Smallest size class to support. */ #define LG_TINY_MIN 3 diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index f52d49f9..e1cb28f8 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -211,7 +211,6 @@ huge_prof_ctx_set huge_ralloc huge_ralloc_no_move huge_salloc -iallocm icalloc icalloct idalloc diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index eb38d710..ce6c6987 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -1,9 +1,6 @@ /* Defined if __attribute__((...)) syntax is supported. */ #undef JEMALLOC_HAVE_ATTR -/* Support the experimental API. */ -#undef JEMALLOC_EXPERIMENTAL - /* * Define overrides for non-standard allocator-related functions if they are * present on the system. diff --git a/include/jemalloc/jemalloc_macros.h.in b/include/jemalloc/jemalloc_macros.h.in index 13dbdd91..1530f9ca 100644 --- a/include/jemalloc/jemalloc_macros.h.in +++ b/include/jemalloc/jemalloc_macros.h.in @@ -19,23 +19,6 @@ /* Bias arena index bits so that 0 encodes "MALLOCX_ARENA() unspecified". */ # define MALLOCX_ARENA(a) ((int)(((a)+1) << 8)) -#ifdef JEMALLOC_EXPERIMENTAL -# define ALLOCM_LG_ALIGN(la) (la) -# if LG_SIZEOF_PTR == 2 -# define ALLOCM_ALIGN(a) (ffs(a)-1) -# else -# define ALLOCM_ALIGN(a) \ - ((a < (size_t)INT_MAX) ? ffs(a)-1 : ffs(a>>32)+31) -# endif -# define ALLOCM_ZERO ((int)0x40) -# define ALLOCM_NO_MOVE ((int)0x80) -/* Bias arena index bits so that 0 encodes "ALLOCM_ARENA() unspecified". */ -# define ALLOCM_ARENA(a) ((int)(((a)+1) << 8)) -# define ALLOCM_SUCCESS 0 -# define ALLOCM_ERR_OOM 1 -# define ALLOCM_ERR_NOT_MOVED 2 -#endif - #ifdef JEMALLOC_HAVE_ATTR # define JEMALLOC_ATTR(s) __attribute__((s)) # define JEMALLOC_EXPORT JEMALLOC_ATTR(visibility("default")) diff --git a/include/jemalloc/jemalloc_protos.h.in b/include/jemalloc/jemalloc_protos.h.in index 25446de3..59aeee11 100644 --- a/include/jemalloc/jemalloc_protos.h.in +++ b/include/jemalloc/jemalloc_protos.h.in @@ -44,15 +44,3 @@ JEMALLOC_EXPORT void * @je_@memalign(size_t alignment, size_t size) #ifdef JEMALLOC_OVERRIDE_VALLOC JEMALLOC_EXPORT void * @je_@valloc(size_t size) JEMALLOC_ATTR(malloc); #endif - -#ifdef JEMALLOC_EXPERIMENTAL -JEMALLOC_EXPORT int @je_@allocm(void **ptr, size_t *rsize, size_t size, - int flags) JEMALLOC_ATTR(nonnull(1)); -JEMALLOC_EXPORT int @je_@rallocm(void **ptr, size_t *rsize, size_t size, - size_t extra, int flags) JEMALLOC_ATTR(nonnull(1)); -JEMALLOC_EXPORT int @je_@sallocm(const void *ptr, size_t *rsize, int flags) - JEMALLOC_ATTR(nonnull(1)); -JEMALLOC_EXPORT int @je_@dallocm(void *ptr, int flags) - JEMALLOC_ATTR(nonnull(1)); -JEMALLOC_EXPORT int @je_@nallocm(size_t *rsize, size_t size, int flags); -#endif diff --git a/src/jemalloc.c b/src/jemalloc.c index 816a12e0..0de59408 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1867,91 +1867,6 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) * End non-standard functions. */ /******************************************************************************/ -/* - * Begin experimental functions. - */ -#ifdef JEMALLOC_EXPERIMENTAL - -int -je_allocm(void **ptr, size_t *rsize, size_t size, int flags) -{ - void *p; - - assert(ptr != NULL); - - p = je_mallocx(size, flags); - if (p == NULL) - return (ALLOCM_ERR_OOM); - if (rsize != NULL) - *rsize = isalloc(p, config_prof); - *ptr = p; - return (ALLOCM_SUCCESS); -} - -int -je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) -{ - int ret; - bool no_move = flags & ALLOCM_NO_MOVE; - - assert(ptr != NULL); - assert(*ptr != NULL); - assert(size != 0); - assert(SIZE_T_MAX - size >= extra); - - if (no_move) { - size_t usize = je_xallocx(*ptr, size, extra, flags); - ret = (usize >= size) ? ALLOCM_SUCCESS : ALLOCM_ERR_NOT_MOVED; - if (rsize != NULL) - *rsize = usize; - } else { - void *p = je_rallocx(*ptr, size+extra, flags); - if (p != NULL) { - *ptr = p; - ret = ALLOCM_SUCCESS; - } else - ret = ALLOCM_ERR_OOM; - if (rsize != NULL) - *rsize = isalloc(*ptr, config_prof); - } - return (ret); -} - -int -je_sallocm(const void *ptr, size_t *rsize, int flags) -{ - - assert(rsize != NULL); - *rsize = je_sallocx(ptr, flags); - return (ALLOCM_SUCCESS); -} - -int -je_dallocm(void *ptr, int flags) -{ - - je_dallocx(ptr, flags); - return (ALLOCM_SUCCESS); -} - -int -je_nallocm(size_t *rsize, size_t size, int flags) -{ - size_t usize; - - usize = je_nallocx(size, flags); - if (usize == 0) - return (ALLOCM_ERR_OOM); - if (rsize != NULL) - *rsize = usize; - return (ALLOCM_SUCCESS); -} - -#endif -/* - * End experimental functions. - */ -/******************************************************************************/ /* * The following functions are used by threading libraries for protection of * malloc during fork(). diff --git a/test/integration/MALLOCX_ARENA.c b/test/integration/MALLOCX_ARENA.c index 71cf6f25..695a5b66 100644 --- a/test/integration/MALLOCX_ARENA.c +++ b/test/integration/MALLOCX_ARENA.c @@ -34,7 +34,7 @@ thd_start(void *arg) return (NULL); } -TEST_BEGIN(test_ALLOCM_ARENA) +TEST_BEGIN(test_MALLOCX_ARENA) { thd_t thds[NTHREADS]; unsigned i; @@ -54,5 +54,5 @@ main(void) { return (test( - test_ALLOCM_ARENA)); + test_MALLOCX_ARENA)); } diff --git a/test/integration/allocm.c b/test/integration/allocm.c deleted file mode 100644 index 7b4ea0c2..00000000 --- a/test/integration/allocm.c +++ /dev/null @@ -1,107 +0,0 @@ -#include "test/jemalloc_test.h" - -#define CHUNK 0x400000 -#define MAXALIGN (((size_t)1) << 25) -#define NITER 4 - -TEST_BEGIN(test_basic) -{ - size_t nsz, rsz, sz; - void *p; - - sz = 42; - nsz = 0; - assert_d_eq(nallocm(&nsz, sz, 0), ALLOCM_SUCCESS, - "Unexpected nallocm() error"); - rsz = 0; - assert_d_eq(allocm(&p, &rsz, sz, 0), ALLOCM_SUCCESS, - "Unexpected allocm() error"); - assert_zu_ge(rsz, sz, "Real size smaller than expected"); - assert_zu_eq(nsz, rsz, "nallocm()/allocm() rsize mismatch"); - assert_d_eq(dallocm(p, 0), ALLOCM_SUCCESS, - "Unexpected dallocm() error"); - - assert_d_eq(allocm(&p, NULL, sz, 0), ALLOCM_SUCCESS, - "Unexpected allocm() error"); - assert_d_eq(dallocm(p, 0), ALLOCM_SUCCESS, - "Unexpected dallocm() error"); - - nsz = 0; - assert_d_eq(nallocm(&nsz, sz, ALLOCM_ZERO), ALLOCM_SUCCESS, - "Unexpected nallocm() error"); - rsz = 0; - assert_d_eq(allocm(&p, &rsz, sz, ALLOCM_ZERO), ALLOCM_SUCCESS, - "Unexpected allocm() error"); - assert_zu_eq(nsz, rsz, "nallocm()/allocm() rsize mismatch"); - assert_d_eq(dallocm(p, 0), ALLOCM_SUCCESS, - "Unexpected dallocm() error"); -} -TEST_END - -TEST_BEGIN(test_alignment_and_size) -{ - int r; - size_t nsz, rsz, sz, alignment, total; - unsigned i; - void *ps[NITER]; - - for (i = 0; i < NITER; i++) - ps[i] = NULL; - - for (alignment = 8; - alignment <= MAXALIGN; - alignment <<= 1) { - total = 0; - for (sz = 1; - sz < 3 * alignment && sz < (1U << 31); - sz += (alignment >> (LG_SIZEOF_PTR-1)) - 1) { - for (i = 0; i < NITER; i++) { - nsz = 0; - r = nallocm(&nsz, sz, ALLOCM_ALIGN(alignment) | - ALLOCM_ZERO); - assert_d_eq(r, ALLOCM_SUCCESS, - "nallocm() error for alignment=%zu, " - "size=%zu (%#zx): %d", - alignment, sz, sz, r); - rsz = 0; - r = allocm(&ps[i], &rsz, sz, - ALLOCM_ALIGN(alignment) | ALLOCM_ZERO); - assert_d_eq(r, ALLOCM_SUCCESS, - "allocm() error for alignment=%zu, " - "size=%zu (%#zx): %d", - alignment, sz, sz, r); - assert_zu_ge(rsz, sz, - "Real size smaller than expected for " - "alignment=%zu, size=%zu", alignment, sz); - assert_zu_eq(nsz, rsz, - "nallocm()/allocm() rsize mismatch for " - "alignment=%zu, size=%zu", alignment, sz); - assert_ptr_null( - (void *)((uintptr_t)ps[i] & (alignment-1)), - "%p inadequately aligned for" - " alignment=%zu, size=%zu", ps[i], - alignment, sz); - sallocm(ps[i], &rsz, 0); - total += rsz; - if (total >= (MAXALIGN << 1)) - break; - } - for (i = 0; i < NITER; i++) { - if (ps[i] != NULL) { - dallocm(ps[i], 0); - ps[i] = NULL; - } - } - } - } -} -TEST_END - -int -main(void) -{ - - return (test( - test_basic, - test_alignment_and_size)); -} diff --git a/test/integration/rallocm.c b/test/integration/rallocm.c deleted file mode 100644 index 33c11bb7..00000000 --- a/test/integration/rallocm.c +++ /dev/null @@ -1,111 +0,0 @@ -#include "test/jemalloc_test.h" - -TEST_BEGIN(test_same_size) -{ - void *p, *q; - size_t sz, tsz; - - assert_d_eq(allocm(&p, &sz, 42, 0), ALLOCM_SUCCESS, - "Unexpected allocm() error"); - - q = p; - assert_d_eq(rallocm(&q, &tsz, sz, 0, ALLOCM_NO_MOVE), ALLOCM_SUCCESS, - "Unexpected rallocm() error"); - assert_ptr_eq(q, p, "Unexpected object move"); - assert_zu_eq(tsz, sz, "Unexpected size change: %zu --> %zu", sz, tsz); - - assert_d_eq(dallocm(p, 0), ALLOCM_SUCCESS, - "Unexpected dallocm() error"); -} -TEST_END - -TEST_BEGIN(test_extra_no_move) -{ - void *p, *q; - size_t sz, tsz; - - assert_d_eq(allocm(&p, &sz, 42, 0), ALLOCM_SUCCESS, - "Unexpected allocm() error"); - - q = p; - assert_d_eq(rallocm(&q, &tsz, sz, sz-42, ALLOCM_NO_MOVE), - ALLOCM_SUCCESS, "Unexpected rallocm() error"); - assert_ptr_eq(q, p, "Unexpected object move"); - assert_zu_eq(tsz, sz, "Unexpected size change: %zu --> %zu", sz, tsz); - - assert_d_eq(dallocm(p, 0), ALLOCM_SUCCESS, - "Unexpected dallocm() error"); -} -TEST_END - -TEST_BEGIN(test_no_move_fail) -{ - void *p, *q; - size_t sz, tsz; - - assert_d_eq(allocm(&p, &sz, 42, 0), ALLOCM_SUCCESS, - "Unexpected allocm() error"); - - q = p; - assert_d_eq(rallocm(&q, &tsz, sz + 5, 0, ALLOCM_NO_MOVE), - ALLOCM_ERR_NOT_MOVED, "Unexpected rallocm() result"); - assert_ptr_eq(q, p, "Unexpected object move"); - assert_zu_eq(tsz, sz, "Unexpected size change: %zu --> %zu", sz, tsz); - - assert_d_eq(dallocm(p, 0), ALLOCM_SUCCESS, - "Unexpected dallocm() error"); -} -TEST_END - -TEST_BEGIN(test_grow_and_shrink) -{ - void *p, *q; - size_t tsz; -#define NCYCLES 3 - unsigned i, j; -#define NSZS 2500 - size_t szs[NSZS]; -#define MAXSZ ZU(12 * 1024 * 1024) - - assert_d_eq(allocm(&p, &szs[0], 1, 0), ALLOCM_SUCCESS, - "Unexpected allocm() error"); - - for (i = 0; i < NCYCLES; i++) { - for (j = 1; j < NSZS && szs[j-1] < MAXSZ; j++) { - q = p; - assert_d_eq(rallocm(&q, &szs[j], szs[j-1]+1, 0, 0), - ALLOCM_SUCCESS, - "Unexpected rallocm() error for size=%zu-->%zu", - szs[j-1], szs[j-1]+1); - assert_zu_ne(szs[j], szs[j-1]+1, - "Expected size to at least: %zu", szs[j-1]+1); - p = q; - } - - for (j--; j > 0; j--) { - q = p; - assert_d_eq(rallocm(&q, &tsz, szs[j-1], 0, 0), - ALLOCM_SUCCESS, - "Unexpected rallocm() error for size=%zu-->%zu", - szs[j], szs[j-1]); - assert_zu_eq(tsz, szs[j-1], - "Expected size=%zu, got size=%zu", szs[j-1], tsz); - p = q; - } - } - - assert_d_eq(dallocm(p, 0), ALLOCM_SUCCESS, - "Unexpected dallocm() error"); -} -TEST_END - -int -main(void) -{ - - return (test( - test_same_size, - test_extra_no_move, - test_no_move_fail, - test_grow_and_shrink)); -} diff --git a/test/unit/mq.c b/test/unit/mq.c index f57e96af..bd289c54 100644 --- a/test/unit/mq.c +++ b/test/unit/mq.c @@ -54,7 +54,7 @@ thd_sender_start(void *arg) mq_msg_t *msg; void *p; p = mallocx(sizeof(mq_msg_t), 0); - assert_ptr_not_null(p, "Unexpected allocm() failure"); + assert_ptr_not_null(p, "Unexpected mallocx() failure"); msg = (mq_msg_t *)p; mq_put(mq, msg); } From 24a4ba77e1e4c73488c6ac87db6db972232b392d Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 14 Apr 2014 22:38:59 -0700 Subject: [PATCH 0423/3378] Update MALLOCX_ARENA() documentation. Update MALLOCX_ARENA() documentation to no longer claim that it has no effect for huge region allocations. --- doc/jemalloc.xml.in | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index a4232409..4acb07f3 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -270,10 +270,10 @@ Use the arena specified by the index a (and by necessity bypass the thread - cache). This macro has no effect for huge regions, nor for regions - that were allocated via an arena other than the one specified. - This macro does not validate that a - specifies an arena index in the valid range. + cache). This macro has no effect for regions that were allocated + via an arena other than the one specified. This macro does not + validate that a specifies an arena index in + the valid range. From 644d414bc9ab52efbbf7ebeb350170106ec1f937 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 14 Apr 2014 22:49:23 -0700 Subject: [PATCH 0424/3378] Reverse the cc-silence default. Replace --enable-cc-silence with --disable-cc-silence, so that by default people won't see spurious warnings when building jemalloc. --- INSTALL | 8 ++++---- configure.ac | 9 ++++----- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/INSTALL b/INSTALL index 8530643d..55c919ae 100644 --- a/INSTALL +++ b/INSTALL @@ -71,10 +71,10 @@ any of the following arguments (not a definitive list) to 'configure': versions of jemalloc can coexist in the same installation directory. For example, libjemalloc.so.0 becomes libjemalloc.so.0. ---enable-cc-silence - Enable code that silences non-useful compiler warnings. This is helpful - when trying to tell serious warnings from those due to compiler - limitations, but it potentially incurs a performance penalty. +--disable-cc-silence + Disable code that silences non-useful compiler warnings. This is mainly + useful during development when auditing the set of warnings that are being + silenced. --enable-debug Enable assertions and validation code. This incurs a substantial diff --git a/configure.ac b/configure.ac index 04cefe95..b47d5723 100644 --- a/configure.ac +++ b/configure.ac @@ -577,18 +577,17 @@ cfghdrs_tup="include/jemalloc/jemalloc_defs.h:include/jemalloc/jemalloc_defs.h.i cfghdrs_tup="${cfghdrs_tup} include/jemalloc/internal/jemalloc_internal_defs.h:${srcroot}include/jemalloc/internal/jemalloc_internal_defs.h.in" cfghdrs_tup="${cfghdrs_tup} test/include/test/jemalloc_test_defs.h:${srcroot}test/include/test/jemalloc_test_defs.h.in" -dnl Do not silence irrelevant compiler warnings by default, since enabling this -dnl option incurs a performance penalty. +dnl Silence irrelevant compiler warnings by default. AC_ARG_ENABLE([cc-silence], - [AS_HELP_STRING([--enable-cc-silence], - [Silence irrelevant compiler warnings])], + [AS_HELP_STRING([--disable-cc-silence], + [Do not silence irrelevant compiler warnings])], [if test "x$enable_cc_silence" = "xno" ; then enable_cc_silence="0" else enable_cc_silence="1" fi ], -[enable_cc_silence="0"] +[enable_cc_silence="1"] ) if test "x$enable_cc_silence" = "x1" ; then AC_DEFINE([JEMALLOC_CC_SILENCE], [ ]) From 4d434adb146375ad17f0d5e994ed5728d2942e3f Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 15 Apr 2014 12:09:48 -0700 Subject: [PATCH 0425/3378] Make dss non-optional, and fix an "arena..dss" mallctl bug. Make dss non-optional on all platforms which support sbrk(2). Fix the "arena..dss" mallctl to return an error if "primary" or "secondary" precedence is specified, but sbrk(2) is not supported. --- INSTALL | 4 --- configure.ac | 23 ++++----------- doc/jemalloc.xml.in | 29 +++++++++---------- include/jemalloc/internal/arena.h | 2 +- .../jemalloc/internal/jemalloc_internal.h.in | 2 +- .../internal/jemalloc_internal_defs.h.in | 3 -- src/arena.c | 5 +++- src/chunk.c | 8 ++--- src/chunk_dss.c | 20 ++++++------- src/ctl.c | 6 +--- src/huge.c | 6 ++-- test/integration/MALLOCX_ARENA.c | 17 +++++++++-- test/unit/mallctl.c | 20 ++++++++++--- 13 files changed, 72 insertions(+), 73 deletions(-) diff --git a/INSTALL b/INSTALL index 55c919ae..07f51d1e 100644 --- a/INSTALL +++ b/INSTALL @@ -145,10 +145,6 @@ any of the following arguments (not a definitive list) to 'configure': memory allocation algorithm that causes semi-permanent VM map holes under normal jemalloc operation. ---enable-dss - Enable support for page allocation/deallocation via sbrk(2), in addition to - mmap(2). - --disable-fill Disable support for junk/zero filling of memory, quarantine, and redzones. See the "opt.junk", "opt.zero", "opt.quarantine", and "opt.redzone" option diff --git a/configure.ac b/configure.ac index b47d5723..dc817e1c 100644 --- a/configure.ac +++ b/configure.ac @@ -836,34 +836,22 @@ if test "x$enable_munmap" = "x1" ; then fi AC_SUBST([enable_munmap]) -dnl Do not enable allocation from DSS by default. -AC_ARG_ENABLE([dss], - [AS_HELP_STRING([--enable-dss], [Enable allocation from DSS])], -[if test "x$enable_dss" = "xno" ; then - enable_dss="0" -else - enable_dss="1" -fi -], -[enable_dss="0"] -) +dnl Enable allocation from DSS if supported by the OS. +have_dss="1" dnl Check whether the BSD/SUSv1 sbrk() exists. If not, disable DSS support. AC_CHECK_FUNC([sbrk], [have_sbrk="1"], [have_sbrk="0"]) if test "x$have_sbrk" = "x1" ; then if test "x$sbrk_deprecated" == "x1" ; then AC_MSG_RESULT([Disabling dss allocation because sbrk is deprecated]) - enable_dss="0" - else - AC_DEFINE([JEMALLOC_HAVE_SBRK], [ ]) + have_dss="0" fi else - enable_dss="0" + have_dss="0" fi -if test "x$enable_dss" = "x1" ; then +if test "x$have_dss" = "x1" ; then AC_DEFINE([JEMALLOC_DSS], [ ]) fi -AC_SUBST([enable_dss]) dnl Support the junk/zero filling option by default. AC_ARG_ENABLE([fill], @@ -1461,7 +1449,6 @@ AC_MSG_RESULT([valgrind : ${enable_valgrind}]) AC_MSG_RESULT([xmalloc : ${enable_xmalloc}]) AC_MSG_RESULT([mremap : ${enable_mremap}]) AC_MSG_RESULT([munmap : ${enable_munmap}]) -AC_MSG_RESULT([dss : ${enable_dss}]) AC_MSG_RESULT([lazy_lock : ${enable_lazy_lock}]) AC_MSG_RESULT([tls : ${enable_tls}]) AC_MSG_RESULT([===============================================================================]) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 4acb07f3..16dd0bbe 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -448,8 +448,10 @@ for (i = 0; i < nbins; i++) { 2 to obtain memory, which is suboptimal for several reasons, including race conditions, increased fragmentation, and artificial limitations on maximum usable memory. If - is specified during configuration, this - allocator uses both mmap + sbrk + 2 is supported by the operating + system, this allocator uses both + mmap 2 and sbrk 2, in that order of preference; @@ -625,16 +627,6 @@ for (i = 0; i < nbins; i++) { build configuration. - - - config.dss - (bool) - r- - - was specified during - build configuration. - - config.fill @@ -790,10 +782,15 @@ for (i = 0; i < nbins; i++) { 2) allocation precedence as related to mmap 2 allocation. The following - settings are supported: “disabled”, “primary”, - and “secondary”. The default is “secondary” if - config.dss is - true, “disabled” otherwise. + settings are supported if + sbrk + 2 is supported by the operating + system: “disabled”, “primary”, and + “secondary”; otherwise only “disabled” is + supported. The default is “secondary” if + sbrk + 2 is supported by the operating + system; “disabled” otherwise. diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 0e14c2c4..6de312eb 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -434,7 +434,7 @@ void *arena_ralloc(arena_t *arena, void *ptr, size_t oldsize, size_t size, size_t extra, size_t alignment, bool zero, bool try_tcache_alloc, bool try_tcache_dalloc); dss_prec_t arena_dss_prec_get(arena_t *arena); -void arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec); +bool arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec); void arena_stats_merge(arena_t *arena, const char **dss, size_t *nactive, size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats); diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index a374e2a3..4821b9bc 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -85,7 +85,7 @@ static const bool config_debug = false #endif ; -static const bool config_dss = +static const bool have_dss = #ifdef JEMALLOC_DSS true #else diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in index c166fbd9..fc959671 100644 --- a/include/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in @@ -76,9 +76,6 @@ */ #undef JEMALLOC_MUTEX_INIT_CB -/* Defined if sbrk() is supported. */ -#undef JEMALLOC_HAVE_SBRK - /* Non-empty if the tls_model attribute is supported. */ #undef JEMALLOC_TLS_MODEL diff --git a/src/arena.c b/src/arena.c index d5741000..8aa36fdf 100644 --- a/src/arena.c +++ b/src/arena.c @@ -2243,13 +2243,16 @@ arena_dss_prec_get(arena_t *arena) return (ret); } -void +bool arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec) { + if (have_dss == false) + return (dss_prec != dss_prec_disabled); malloc_mutex_lock(&arena->lock); arena->dss_prec = dss_prec; malloc_mutex_unlock(&arena->lock); + return (false); } void diff --git a/src/chunk.c b/src/chunk.c index 90ab116a..fdd693e0 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -153,7 +153,7 @@ chunk_alloc(size_t size, size_t alignment, bool base, bool *zero, assert((alignment & chunksize_mask) == 0); /* "primary" dss. */ - if (config_dss && dss_prec == dss_prec_primary) { + if (have_dss && dss_prec == dss_prec_primary) { if ((ret = chunk_recycle(&chunks_szad_dss, &chunks_ad_dss, size, alignment, base, zero)) != NULL) goto label_return; @@ -167,7 +167,7 @@ chunk_alloc(size_t size, size_t alignment, bool base, bool *zero, if ((ret = chunk_alloc_mmap(size, alignment, zero)) != NULL) goto label_return; /* "secondary" dss. */ - if (config_dss && dss_prec == dss_prec_secondary) { + if (have_dss && dss_prec == dss_prec_secondary) { if ((ret = chunk_recycle(&chunks_szad_dss, &chunks_ad_dss, size, alignment, base, zero)) != NULL) goto label_return; @@ -305,7 +305,7 @@ chunk_unmap(void *chunk, size_t size) assert(size != 0); assert((size & chunksize_mask) == 0); - if (config_dss && chunk_in_dss(chunk)) + if (have_dss && chunk_in_dss(chunk)) chunk_record(&chunks_szad_dss, &chunks_ad_dss, chunk, size); else if (chunk_dealloc_mmap(chunk, size)) chunk_record(&chunks_szad_mmap, &chunks_ad_mmap, chunk, size); @@ -348,7 +348,7 @@ chunk_boot(void) return (true); memset(&stats_chunks, 0, sizeof(chunk_stats_t)); } - if (config_dss && chunk_dss_boot()) + if (have_dss && chunk_dss_boot()) return (true); extent_tree_szad_new(&chunks_szad_mmap); extent_tree_ad_new(&chunks_ad_mmap); diff --git a/src/chunk_dss.c b/src/chunk_dss.c index 510bb8be..36133f14 100644 --- a/src/chunk_dss.c +++ b/src/chunk_dss.c @@ -32,7 +32,7 @@ static void * chunk_dss_sbrk(intptr_t increment) { -#ifdef JEMALLOC_HAVE_SBRK +#ifdef JEMALLOC_DSS return (sbrk(increment)); #else not_implemented(); @@ -45,7 +45,7 @@ chunk_dss_prec_get(void) { dss_prec_t ret; - if (config_dss == false) + if (have_dss == false) return (dss_prec_disabled); malloc_mutex_lock(&dss_mtx); ret = dss_prec_default; @@ -57,8 +57,8 @@ bool chunk_dss_prec_set(dss_prec_t dss_prec) { - if (config_dss == false) - return (true); + if (have_dss == false) + return (dss_prec != dss_prec_disabled); malloc_mutex_lock(&dss_mtx); dss_prec_default = dss_prec; malloc_mutex_unlock(&dss_mtx); @@ -70,7 +70,7 @@ chunk_alloc_dss(size_t size, size_t alignment, bool *zero) { void *ret; - cassert(config_dss); + cassert(have_dss); assert(size > 0 && (size & chunksize_mask) == 0); assert(alignment > 0 && (alignment & chunksize_mask) == 0); @@ -143,7 +143,7 @@ chunk_in_dss(void *chunk) { bool ret; - cassert(config_dss); + cassert(have_dss); malloc_mutex_lock(&dss_mtx); if ((uintptr_t)chunk >= (uintptr_t)dss_base @@ -160,7 +160,7 @@ bool chunk_dss_boot(void) { - cassert(config_dss); + cassert(have_dss); if (malloc_mutex_init(&dss_mtx)) return (true); @@ -175,7 +175,7 @@ void chunk_dss_prefork(void) { - if (config_dss) + if (have_dss) malloc_mutex_prefork(&dss_mtx); } @@ -183,7 +183,7 @@ void chunk_dss_postfork_parent(void) { - if (config_dss) + if (have_dss) malloc_mutex_postfork_parent(&dss_mtx); } @@ -191,7 +191,7 @@ void chunk_dss_postfork_child(void) { - if (config_dss) + if (have_dss) malloc_mutex_postfork_child(&dss_mtx); } diff --git a/src/ctl.c b/src/ctl.c index cc2c5aef..0340a274 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -74,7 +74,6 @@ CTL_PROTO(thread_allocatedp) CTL_PROTO(thread_deallocated) CTL_PROTO(thread_deallocatedp) CTL_PROTO(config_debug) -CTL_PROTO(config_dss) CTL_PROTO(config_fill) CTL_PROTO(config_lazy_lock) CTL_PROTO(config_mremap) @@ -213,7 +212,6 @@ static const ctl_named_node_t thread_node[] = { static const ctl_named_node_t config_node[] = { {NAME("debug"), CTL(config_debug)}, - {NAME("dss"), CTL(config_dss)}, {NAME("fill"), CTL(config_fill)}, {NAME("lazy_lock"), CTL(config_lazy_lock)}, {NAME("mremap"), CTL(config_mremap)}, @@ -1136,7 +1134,6 @@ label_return: /******************************************************************************/ CTL_RO_BOOL_CONFIG_GEN(config_debug) -CTL_RO_BOOL_CONFIG_GEN(config_dss) CTL_RO_BOOL_CONFIG_GEN(config_fill) CTL_RO_BOOL_CONFIG_GEN(config_lazy_lock) CTL_RO_BOOL_CONFIG_GEN(config_mremap) @@ -1356,8 +1353,7 @@ arena_i_dss_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, arena_t *arena = arenas[arena_ind]; if (arena != NULL) { dss_prec_old = arena_dss_prec_get(arena); - arena_dss_prec_set(arena, dss_prec); - err = false; + err = arena_dss_prec_set(arena, dss_prec); } else err = true; } else { diff --git a/src/huge.c b/src/huge.c index d72f2135..e725fd90 100644 --- a/src/huge.c +++ b/src/huge.c @@ -140,7 +140,7 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, * Use mremap(2) if this is a huge-->huge reallocation, and neither the * source nor the destination are in dss. */ - if (oldsize >= chunksize && (config_dss == false || (chunk_in_dss(ptr) + if (oldsize >= chunksize && (have_dss == false || (chunk_in_dss(ptr) == false && chunk_in_dss(ret) == false))) { size_t newsize = huge_salloc(ret); @@ -198,12 +198,12 @@ static void huge_dalloc_junk(void *ptr, size_t usize) { - if (config_fill && config_dss && opt_junk) { + if (config_fill && have_dss && opt_junk) { /* * Only bother junk filling if the chunk isn't about to be * unmapped. */ - if (config_munmap == false || (config_dss && chunk_in_dss(ptr))) + if (config_munmap == false || (have_dss && chunk_in_dss(ptr))) memset(ptr, 0x5a, usize); } } diff --git a/test/integration/MALLOCX_ARENA.c b/test/integration/MALLOCX_ARENA.c index 695a5b66..30c203ae 100644 --- a/test/integration/MALLOCX_ARENA.c +++ b/test/integration/MALLOCX_ARENA.c @@ -2,6 +2,14 @@ #define NTHREADS 10 +static bool have_dss = +#ifdef JEMALLOC_DSS + true +#else + false +#endif + ; + void * thd_start(void *arg) { @@ -18,13 +26,16 @@ thd_start(void *arg) size_t mib[3]; size_t miblen = sizeof(mib) / sizeof(size_t); const char *dss_precs[] = {"disabled", "primary", "secondary"}; - const char *dss = dss_precs[thread_ind % - (sizeof(dss_precs)/sizeof(char*))]; + unsigned prec_ind = thread_ind % + (sizeof(dss_precs)/sizeof(char*)); + const char *dss = dss_precs[prec_ind]; + int expected_err = (have_dss || prec_ind == 0) ? 0 : EFAULT; assert_d_eq(mallctlnametomib("arena.0.dss", mib, &miblen), 0, "Error in mallctlnametomib()"); mib[1] = arena_ind; assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, (void *)&dss, - sizeof(const char *)), 0, "Error in mallctlbymib()"); + sizeof(const char *)), expected_err, + "Error in mallctlbymib()"); } p = mallocx(1, MALLOCX_ARENA(arena_ind)); diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c index 31fb8105..caf20f86 100644 --- a/test/unit/mallctl.c +++ b/test/unit/mallctl.c @@ -127,7 +127,6 @@ TEST_BEGIN(test_mallctl_config) } while (0) TEST_MALLCTL_CONFIG(debug); - TEST_MALLCTL_CONFIG(dss); TEST_MALLCTL_CONFIG(fill); TEST_MALLCTL_CONFIG(lazy_lock); TEST_MALLCTL_CONFIG(mremap); @@ -255,15 +254,28 @@ TEST_BEGIN(test_arena_i_dss) { const char *dss_prec_old, *dss_prec_new; size_t sz = sizeof(dss_prec_old); + size_t mib[3]; + size_t miblen; - dss_prec_new = "primary"; - assert_d_eq(mallctl("arena.0.dss", &dss_prec_old, &sz, &dss_prec_new, + miblen = sizeof(mib)/sizeof(size_t); + assert_d_eq(mallctlnametomib("arena.0.dss", mib, &miblen), 0, + "Unexpected mallctlnametomib() error"); + + dss_prec_new = "disabled"; + assert_d_eq(mallctlbymib(mib, miblen, &dss_prec_old, &sz, &dss_prec_new, sizeof(dss_prec_new)), 0, "Unexpected mallctl() failure"); assert_str_ne(dss_prec_old, "primary", "Unexpected default for dss precedence"); - assert_d_eq(mallctl("arena.0.dss", &dss_prec_new, &sz, &dss_prec_old, + assert_d_eq(mallctlbymib(mib, miblen, &dss_prec_new, &sz, &dss_prec_old, sizeof(dss_prec_old)), 0, "Unexpected mallctl() failure"); + + mib[1] = narenas_total_get(); + dss_prec_new = "disabled"; + assert_d_eq(mallctlbymib(mib, miblen, &dss_prec_old, &sz, &dss_prec_new, + sizeof(dss_prec_new)), 0, "Unexpected mallctl() failure"); + assert_str_ne(dss_prec_old, "primary", + "Unexpected default for dss precedence"); } TEST_END From a2c719b37445ce9083b6fc5084436dc37ceb7f75 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 15 Apr 2014 12:46:28 -0700 Subject: [PATCH 0426/3378] Remove the "arenas.purge" mallctl. Remove the "arenas.purge" mallctl, which was obsoleted by the "arena..purge" mallctl in 3.1.0. --- doc/jemalloc.xml.in | 12 +----------- src/ctl.c | 27 --------------------------- test/unit/mallctl.c | 13 ------------- 3 files changed, 1 insertion(+), 51 deletions(-) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 16dd0bbe..b0c77c20 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -1267,7 +1267,7 @@ malloc_conf = "xmalloc:true";]]> arena.<i>.purge - (unsigned) + (void) -- Purge unused dirty pages for arena <i>, or for @@ -1410,16 +1410,6 @@ malloc_conf = "xmalloc:true";]]> class. - - - arenas.purge - (unsigned) - -w - - Purge unused dirty pages for the specified arena, or - for all arenas if none is specified. - - arenas.extend diff --git a/src/ctl.c b/src/ctl.c index 0340a274..3d44a951 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -129,7 +129,6 @@ CTL_PROTO(arenas_tcache_max) CTL_PROTO(arenas_nbins) CTL_PROTO(arenas_nhbins) CTL_PROTO(arenas_nlruns) -CTL_PROTO(arenas_purge) CTL_PROTO(arenas_extend) CTL_PROTO(prof_active) CTL_PROTO(prof_dump) @@ -301,7 +300,6 @@ static const ctl_named_node_t arenas_node[] = { {NAME("bin"), CHILD(indexed, arenas_bin)}, {NAME("nlruns"), CTL(arenas_nlruns)}, {NAME("lrun"), CHILD(indexed, arenas_lrun)}, - {NAME("purge"), CTL(arenas_purge)}, {NAME("extend"), CTL(arenas_extend)} }; @@ -1468,31 +1466,6 @@ arenas_lrun_i_index(const size_t *mib, size_t miblen, size_t i) return (super_arenas_lrun_i_node); } -static int -arenas_purge_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, - void *newp, size_t newlen) -{ - int ret; - unsigned arena_ind; - - malloc_mutex_lock(&ctl_mtx); - WRITEONLY(); - arena_ind = UINT_MAX; - WRITE(arena_ind, unsigned); - if (newp != NULL && arena_ind >= ctl_stats.narenas) - ret = EFAULT; - else { - if (arena_ind == UINT_MAX) - arena_ind = ctl_stats.narenas; - arena_purge(arena_ind); - ret = 0; - } - -label_return: - malloc_mutex_unlock(&ctl_mtx); - return (ret); -} - static int arenas_extend_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c index caf20f86..73f42ddd 100644 --- a/test/unit/mallctl.c +++ b/test/unit/mallctl.c @@ -279,18 +279,6 @@ TEST_BEGIN(test_arena_i_dss) } TEST_END -TEST_BEGIN(test_arenas_purge) -{ - unsigned arena = 0; - - assert_d_eq(mallctl("arenas.purge", NULL, NULL, &arena, sizeof(arena)), - 0, "Unexpected mallctl() failure"); - - assert_d_eq(mallctl("arenas.purge", NULL, NULL, NULL, 0), 0, - "Unexpected mallctl() failure"); -} -TEST_END - TEST_BEGIN(test_arenas_initialized) { unsigned narenas; @@ -417,7 +405,6 @@ main(void) test_thread_arena, test_arena_i_purge, test_arena_i_dss, - test_arenas_purge, test_arenas_initialized, test_arenas_constants, test_arenas_bin_constants, From ecd3e59ca351d7111ec72a327fe0c009f2aa69a0 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 15 Apr 2014 14:33:50 -0700 Subject: [PATCH 0427/3378] Remove the "opt.valgrind" mallctl. Remove the "opt.valgrind" mallctl because it is unnecessary -- jemalloc automatically detects whether it is running inside valgrind. --- doc/jemalloc.xml.in | 13 ------- .../jemalloc/internal/jemalloc_internal.h.in | 9 ++--- include/jemalloc/internal/private_symbols.txt | 2 +- src/ctl.c | 3 -- src/jemalloc.c | 35 +++++++++---------- src/quarantine.c | 2 +- test/unit/mallctl.c | 1 - 7 files changed, 24 insertions(+), 41 deletions(-) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index b0c77c20..3e0b806f 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -943,19 +943,6 @@ for (i = 0; i < nbins; i++) { is disabled by default. - - - opt.valgrind - (bool) - r- - [] - - Valgrind - support enabled/disabled. This option is vestigal because jemalloc - auto-detects whether it is running inside Valgrind. This option is - disabled by default, unless running inside Valgrind. - - opt.xmalloc diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 4821b9bc..9b1a6c85 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -377,12 +377,12 @@ static const bool config_ivsalloc = * usable space. */ #define JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) do { \ - if (config_valgrind && opt_valgrind && cond) \ + if (config_valgrind && in_valgrind && cond) \ VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, p2rz(ptr), zero); \ } while (0) #define JEMALLOC_VALGRIND_REALLOC(ptr, usize, old_ptr, old_usize, \ old_rzsize, zero) do { \ - if (config_valgrind && opt_valgrind) { \ + if (config_valgrind && in_valgrind) { \ size_t rzsize = p2rz(ptr); \ \ if (ptr == old_ptr) { \ @@ -418,7 +418,7 @@ static const bool config_ivsalloc = } \ } while (0) #define JEMALLOC_VALGRIND_FREE(ptr, rzsize) do { \ - if (config_valgrind && opt_valgrind) \ + if (config_valgrind && in_valgrind) \ VALGRIND_FREELIKE_BLOCK(ptr, rzsize); \ } while (0) #else @@ -504,11 +504,12 @@ extern bool opt_junk; extern size_t opt_quarantine; extern bool opt_redzone; extern bool opt_utrace; -extern bool opt_valgrind; extern bool opt_xmalloc; extern bool opt_zero; extern size_t opt_narenas; +extern bool in_valgrind; + /* Number of CPUs. */ extern unsigned ncpus; diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index e1cb28f8..c1403779 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -217,6 +217,7 @@ idalloc idalloct imalloc imalloct +in_valgrind ipalloc ipalloct iqalloc @@ -278,7 +279,6 @@ opt_redzone opt_stats_print opt_tcache opt_utrace -opt_valgrind opt_xmalloc opt_zero p2rz diff --git a/src/ctl.c b/src/ctl.c index 3d44a951..9ee5de9f 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -98,7 +98,6 @@ CTL_PROTO(opt_zero) CTL_PROTO(opt_quarantine) CTL_PROTO(opt_redzone) CTL_PROTO(opt_utrace) -CTL_PROTO(opt_valgrind) CTL_PROTO(opt_xmalloc) CTL_PROTO(opt_tcache) CTL_PROTO(opt_lg_tcache_max) @@ -238,7 +237,6 @@ static const ctl_named_node_t opt_node[] = { {NAME("quarantine"), CTL(opt_quarantine)}, {NAME("redzone"), CTL(opt_redzone)}, {NAME("utrace"), CTL(opt_utrace)}, - {NAME("valgrind"), CTL(opt_valgrind)}, {NAME("xmalloc"), CTL(opt_xmalloc)}, {NAME("tcache"), CTL(opt_tcache)}, {NAME("lg_tcache_max"), CTL(opt_lg_tcache_max)}, @@ -1159,7 +1157,6 @@ CTL_RO_NL_CGEN(config_fill, opt_quarantine, opt_quarantine, size_t) CTL_RO_NL_CGEN(config_fill, opt_redzone, opt_redzone, bool) CTL_RO_NL_CGEN(config_fill, opt_zero, opt_zero, bool) CTL_RO_NL_CGEN(config_utrace, opt_utrace, opt_utrace, bool) -CTL_RO_NL_CGEN(config_valgrind, opt_valgrind, opt_valgrind, bool) CTL_RO_NL_CGEN(config_xmalloc, opt_xmalloc, opt_xmalloc, bool) CTL_RO_NL_CGEN(config_tcache, opt_tcache, opt_tcache, bool) CTL_RO_NL_CGEN(config_tcache, opt_lg_tcache_max, opt_lg_tcache_max, ssize_t) diff --git a/src/jemalloc.c b/src/jemalloc.c index 0de59408..11f1c450 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -27,11 +27,13 @@ bool opt_junk = size_t opt_quarantine = ZU(0); bool opt_redzone = false; bool opt_utrace = false; -bool opt_valgrind = false; bool opt_xmalloc = false; bool opt_zero = false; size_t opt_narenas = 0; +/* Initialized to true if the process is running inside Valgrind. */ +bool in_valgrind; + unsigned ncpus; malloc_mutex_t arenas_lock; @@ -394,14 +396,14 @@ malloc_conf_init(void) * valgrind option remains in jemalloc 3.x for compatibility reasons. */ if (config_valgrind) { - opt_valgrind = (RUNNING_ON_VALGRIND != 0) ? true : false; - if (config_fill && opt_valgrind) { + in_valgrind = (RUNNING_ON_VALGRIND != 0) ? true : false; + if (config_fill && in_valgrind) { opt_junk = false; assert(opt_zero == false); opt_quarantine = JEMALLOC_VALGRIND_QUARANTINE_DEFAULT; opt_redzone = true; } - if (config_tcache && opt_valgrind) + if (config_tcache && in_valgrind) opt_tcache = false; } @@ -608,9 +610,6 @@ malloc_conf_init(void) if (config_utrace) { CONF_HANDLE_BOOL(opt_utrace, "utrace") } - if (config_valgrind) { - CONF_HANDLE_BOOL(opt_valgrind, "valgrind") - } if (config_xmalloc) { CONF_HANDLE_BOOL(opt_xmalloc, "xmalloc") } @@ -910,7 +909,7 @@ imalloc_prof(size_t usize, prof_thr_cnt_t *cnt) ret = imalloc_prof(usize, cnt); \ } else { \ if (config_stats || (config_valgrind && \ - opt_valgrind)) \ + in_valgrind)) \ usize = s2u(size); \ ret = imalloc(size); \ } \ @@ -1153,7 +1152,7 @@ je_calloc(size_t num, size_t size) PROF_ALLOC_PREP(1, usize, cnt); ret = icalloc_prof(usize, cnt); } else { - if (config_stats || (config_valgrind && opt_valgrind)) + if (config_stats || (config_valgrind && in_valgrind)) usize = s2u(num_size); ret = icalloc(num_size); } @@ -1228,7 +1227,7 @@ ifree(void *ptr) usize = isalloc(ptr, config_prof); if (config_stats) thread_allocated_tsd_get()->deallocated += usize; - if (config_valgrind && opt_valgrind) + if (config_valgrind && in_valgrind) rzsize = p2rz(ptr); iqalloc(ptr); JEMALLOC_VALGRIND_FREE(ptr, rzsize); @@ -1257,9 +1256,9 @@ je_realloc(void *ptr, size_t size) malloc_thread_init(); if ((config_prof && opt_prof) || config_stats || - (config_valgrind && opt_valgrind)) + (config_valgrind && in_valgrind)) old_usize = isalloc(ptr, config_prof); - if (config_valgrind && opt_valgrind) + if (config_valgrind && in_valgrind) old_rzsize = config_prof ? p2rz(ptr) : u2rz(old_usize); if (config_prof && opt_prof) { @@ -1269,7 +1268,7 @@ je_realloc(void *ptr, size_t size) PROF_ALLOC_PREP(1, usize, cnt); ret = irealloc_prof(ptr, old_usize, usize, cnt); } else { - if (config_stats || (config_valgrind && opt_valgrind)) + if (config_stats || (config_valgrind && in_valgrind)) usize = s2u(size); ret = iralloc(ptr, size, 0, 0, false); } @@ -1574,9 +1573,9 @@ je_rallocx(void *ptr, size_t size, int flags) } if ((config_prof && opt_prof) || config_stats || - (config_valgrind && opt_valgrind)) + (config_valgrind && in_valgrind)) old_usize = isalloc(ptr, config_prof); - if (config_valgrind && opt_valgrind) + if (config_valgrind && in_valgrind) old_rzsize = u2rz(old_usize); if (config_prof && opt_prof) { @@ -1594,7 +1593,7 @@ je_rallocx(void *ptr, size_t size, int flags) try_tcache_dalloc, arena); if (p == NULL) goto label_oom; - if (config_stats || (config_valgrind && opt_valgrind)) + if (config_stats || (config_valgrind && in_valgrind)) usize = isalloc(p, config_prof); } @@ -1702,7 +1701,7 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) arena = NULL; old_usize = isalloc(ptr, config_prof); - if (config_valgrind && opt_valgrind) + if (config_valgrind && in_valgrind) old_rzsize = u2rz(old_usize); if (config_prof && opt_prof) { @@ -1784,7 +1783,7 @@ je_dallocx(void *ptr, int flags) } if (config_stats) thread_allocated_tsd_get()->deallocated += usize; - if (config_valgrind && opt_valgrind) + if (config_valgrind && in_valgrind) rzsize = p2rz(ptr); iqalloct(ptr, try_tcache); JEMALLOC_VALGRIND_FREE(ptr, rzsize); diff --git a/src/quarantine.c b/src/quarantine.c index 54315116..3b874422 100644 --- a/src/quarantine.c +++ b/src/quarantine.c @@ -146,7 +146,7 @@ quarantine(void *ptr) * Only do redzone validation if Valgrind isn't in * operation. */ - if ((config_valgrind == false || opt_valgrind == false) + if ((config_valgrind == false || in_valgrind == false) && usize <= SMALL_MAXCLASS) arena_quarantine_junk_small(ptr, usize); else diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c index 73f42ddd..754834c1 100644 --- a/test/unit/mallctl.c +++ b/test/unit/mallctl.c @@ -170,7 +170,6 @@ TEST_BEGIN(test_mallctl_opt) TEST_MALLCTL_OPT(bool, redzone, fill); TEST_MALLCTL_OPT(bool, zero, fill); TEST_MALLCTL_OPT(bool, utrace, utrace); - TEST_MALLCTL_OPT(bool, valgrind, valgrind); TEST_MALLCTL_OPT(bool, xmalloc, xmalloc); TEST_MALLCTL_OPT(bool, tcache, tcache); TEST_MALLCTL_OPT(size_t, lg_tcache_max, tcache); From bd87b01999416ec7418ff8bdb504d9b6c009ff68 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 15 Apr 2014 16:35:08 -0700 Subject: [PATCH 0428/3378] Optimize Valgrind integration. Forcefully disable tcache if running inside Valgrind, and remove Valgrind calls in tcache-specific code. Restructure Valgrind-related code to move most Valgrind calls out of the fast path functions. Take advantage of static knowledge to elide some branches in JEMALLOC_VALGRIND_REALLOC(). --- Makefile.in | 4 + doc/jemalloc.xml.in | 3 +- .../jemalloc/internal/jemalloc_internal.h.in | 84 +------------ include/jemalloc/internal/private_symbols.txt | 4 + include/jemalloc/internal/tcache.h | 7 +- include/jemalloc/internal/valgrind.h | 112 ++++++++++++++++++ src/arena.c | 28 ++--- src/base.c | 7 +- src/chunk.c | 6 +- src/chunk_dss.c | 3 +- src/jemalloc.c | 75 +++++++----- src/valgrind.c | 34 ++++++ 12 files changed, 231 insertions(+), 136 deletions(-) create mode 100644 include/jemalloc/internal/valgrind.h create mode 100644 src/valgrind.c diff --git a/Makefile.in b/Makefile.in index f7aa7d87..e411804a 100644 --- a/Makefile.in +++ b/Makefile.in @@ -48,6 +48,7 @@ cfgoutputs_in := @cfgoutputs_in@ cfgoutputs_out := @cfgoutputs_out@ enable_autogen := @enable_autogen@ enable_code_coverage := @enable_code_coverage@ +enable_valgrind := @enable_valgrind@ enable_zone_allocator := @enable_zone_allocator@ DSO_LDFLAGS = @DSO_LDFLAGS@ SOREV = @SOREV@ @@ -82,6 +83,9 @@ C_SRCS := $(srcroot)src/jemalloc.c $(srcroot)src/arena.c \ $(srcroot)src/mb.c $(srcroot)src/mutex.c $(srcroot)src/prof.c \ $(srcroot)src/quarantine.c $(srcroot)src/rtree.c $(srcroot)src/stats.c \ $(srcroot)src/tcache.c $(srcroot)src/util.c $(srcroot)src/tsd.c +ifeq ($(enable_valgrind), 1) +C_SRCS += $(srcroot)src/valgrind.c +endif ifeq ($(enable_zone_allocator), 1) C_SRCS += $(srcroot)src/zone.c endif diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 3e0b806f..78e9b3c6 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -979,7 +979,8 @@ malloc_conf = "xmalloc:true";]]> linkend="opt.lg_tcache_max">opt.lg_tcache_max option for related tuning information. This option is enabled by default unless running inside Valgrind. + url="http://valgrind.org/">Valgrind, in which case it is + forcefully disabled. diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 9b1a6c85..50d44cc2 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -60,11 +60,6 @@ typedef intptr_t ssize_t; #include #endif -#ifdef JEMALLOC_VALGRIND -#include -#include -#endif - #define JEMALLOC_NO_DEMANGLE #ifdef JEMALLOC_JET # define JEMALLOC_N(n) jet_##n @@ -362,81 +357,7 @@ static const bool config_ivsalloc = # define VARIABLE_ARRAY(type, name, count) type name[count] #endif -#ifdef JEMALLOC_VALGRIND -/* - * The JEMALLOC_VALGRIND_*() macros must be macros rather than functions - * so that when Valgrind reports errors, there are no extra stack frames - * in the backtraces. - * - * The size that is reported to valgrind must be consistent through a chain of - * malloc..realloc..realloc calls. Request size isn't recorded anywhere in - * jemalloc, so it is critical that all callers of these macros provide usize - * rather than request size. As a result, buffer overflow detection is - * technically weakened for the standard API, though it is generally accepted - * practice to consider any extra bytes reported by malloc_usable_size() as - * usable space. - */ -#define JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) do { \ - if (config_valgrind && in_valgrind && cond) \ - VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, p2rz(ptr), zero); \ -} while (0) -#define JEMALLOC_VALGRIND_REALLOC(ptr, usize, old_ptr, old_usize, \ - old_rzsize, zero) do { \ - if (config_valgrind && in_valgrind) { \ - size_t rzsize = p2rz(ptr); \ - \ - if (ptr == old_ptr) { \ - VALGRIND_RESIZEINPLACE_BLOCK(ptr, old_usize, \ - usize, rzsize); \ - if (zero && old_usize < usize) { \ - VALGRIND_MAKE_MEM_DEFINED( \ - (void *)((uintptr_t)ptr + \ - old_usize), usize - old_usize); \ - } \ - } else { \ - if (old_ptr != NULL) { \ - VALGRIND_FREELIKE_BLOCK(old_ptr, \ - old_rzsize); \ - } \ - if (ptr != NULL) { \ - size_t copy_size = (old_usize < usize) \ - ? old_usize : usize; \ - size_t tail_size = usize - copy_size; \ - VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, \ - rzsize, false); \ - if (copy_size > 0) { \ - VALGRIND_MAKE_MEM_DEFINED(ptr, \ - copy_size); \ - } \ - if (zero && tail_size > 0) { \ - VALGRIND_MAKE_MEM_DEFINED( \ - (void *)((uintptr_t)ptr + \ - copy_size), tail_size); \ - } \ - } \ - } \ - } \ -} while (0) -#define JEMALLOC_VALGRIND_FREE(ptr, rzsize) do { \ - if (config_valgrind && in_valgrind) \ - VALGRIND_FREELIKE_BLOCK(ptr, rzsize); \ -} while (0) -#else -#define RUNNING_ON_VALGRIND ((unsigned)0) -#define VALGRIND_MALLOCLIKE_BLOCK(addr, sizeB, rzB, is_zeroed) \ - do {} while (0) -#define VALGRIND_RESIZEINPLACE_BLOCK(addr, oldSizeB, newSizeB, rzB) \ - do {} while (0) -#define VALGRIND_FREELIKE_BLOCK(addr, rzB) do {} while (0) -#define VALGRIND_MAKE_MEM_NOACCESS(_qzz_addr, _qzz_len) do {} while (0) -#define VALGRIND_MAKE_MEM_UNDEFINED(_qzz_addr, _qzz_len) do {} while (0) -#define VALGRIND_MAKE_MEM_DEFINED(_qzz_addr, _qzz_len) do {} while (0) -#define JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) do {} while (0) -#define JEMALLOC_VALGRIND_REALLOC(ptr, usize, old_ptr, old_usize, \ - old_rzsize, zero) do {} while (0) -#define JEMALLOC_VALGRIND_FREE(ptr, rzsize) do {} while (0) -#endif - +#include "jemalloc/internal/valgrind.h" #include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" #include "jemalloc/internal/prng.h" @@ -463,6 +384,7 @@ static const bool config_ivsalloc = /******************************************************************************/ #define JEMALLOC_H_STRUCTS +#include "jemalloc/internal/valgrind.h" #include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" #include "jemalloc/internal/prng.h" @@ -534,6 +456,7 @@ void jemalloc_prefork(void); void jemalloc_postfork_parent(void); void jemalloc_postfork_child(void); +#include "jemalloc/internal/valgrind.h" #include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" #include "jemalloc/internal/prng.h" @@ -560,6 +483,7 @@ void jemalloc_postfork_child(void); /******************************************************************************/ #define JEMALLOC_H_INLINES +#include "jemalloc/internal/valgrind.h" #include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" #include "jemalloc/internal/prng.h" diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index c1403779..9d77cbaa 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -409,3 +409,7 @@ thread_allocated_tsd_set tsd_init_check_recursion tsd_init_finish u2rz +valgrind_freelike_block +valgrind_make_mem_defined +valgrind_make_mem_noaccess +valgrind_make_mem_undefined diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index 51974136..af248451 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -314,13 +314,11 @@ tcache_alloc_small(tcache_t *tcache, size_t size, bool zero) } else if (opt_zero) memset(ret, 0, size); } - VALGRIND_MAKE_MEM_UNDEFINED(ret, size); } else { if (config_fill && opt_junk) { arena_alloc_junk_small(ret, &arena_bin_info[binind], true); } - VALGRIND_MAKE_MEM_UNDEFINED(ret, size); memset(ret, 0, size); } @@ -369,11 +367,8 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) else if (opt_zero) memset(ret, 0, size); } - VALGRIND_MAKE_MEM_UNDEFINED(ret, size); - } else { - VALGRIND_MAKE_MEM_UNDEFINED(ret, size); + } else memset(ret, 0, size); - } if (config_stats) tbin->tstats.nrequests++; diff --git a/include/jemalloc/internal/valgrind.h b/include/jemalloc/internal/valgrind.h new file mode 100644 index 00000000..52c93f29 --- /dev/null +++ b/include/jemalloc/internal/valgrind.h @@ -0,0 +1,112 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +#ifdef JEMALLOC_VALGRIND +#include + +/* + * The size that is reported to Valgrind must be consistent through a chain of + * malloc..realloc..realloc calls. Request size isn't recorded anywhere in + * jemalloc, so it is critical that all callers of these macros provide usize + * rather than request size. As a result, buffer overflow detection is + * technically weakened for the standard API, though it is generally accepted + * practice to consider any extra bytes reported by malloc_usable_size() as + * usable space. + */ +#define JEMALLOC_VALGRIND_MAKE_MEM_NOACCESS(ptr, usize) do { \ + if (in_valgrind) \ + valgrind_make_mem_noaccess(ptr, usize); \ +} while (0) +#define JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ptr, usize) do { \ + if (in_valgrind) \ + valgrind_make_mem_undefined(ptr, usize); \ +} while (0) +#define JEMALLOC_VALGRIND_MAKE_MEM_DEFINED(ptr, usize) do { \ + if (in_valgrind) \ + valgrind_make_mem_defined(ptr, usize); \ +} while (0) +/* + * The VALGRIND_MALLOCLIKE_BLOCK() and VALGRIND_RESIZEINPLACE_BLOCK() macro + * calls must be embedded in macros rather than in functions so that when + * Valgrind reports errors, there are no extra stack frames in the backtraces. + */ +#define JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) do { \ + if (in_valgrind && cond) \ + VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, p2rz(ptr), zero); \ +} while (0) +#define JEMALLOC_VALGRIND_REALLOC(maybe_moved, ptr, usize, \ + ptr_maybe_null, old_ptr, old_usize, old_rzsize, old_ptr_maybe_null, \ + zero) do { \ + if (in_valgrind) { \ + size_t rzsize = p2rz(ptr); \ + \ + if (!maybe_moved || ptr == old_ptr) { \ + VALGRIND_RESIZEINPLACE_BLOCK(ptr, old_usize, \ + usize, rzsize); \ + if (zero && old_usize < usize) { \ + valgrind_make_mem_defined( \ + (void *)((uintptr_t)ptr + \ + old_usize), usize - old_usize); \ + } \ + } else { \ + if (!old_ptr_maybe_null || old_ptr != NULL) { \ + valgrind_freelike_block(old_ptr, \ + old_rzsize); \ + } \ + if (!ptr_maybe_null || ptr != NULL) { \ + size_t copy_size = (old_usize < usize) \ + ? old_usize : usize; \ + size_t tail_size = usize - copy_size; \ + VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, \ + rzsize, false); \ + if (copy_size > 0) { \ + valgrind_make_mem_defined(ptr, \ + copy_size); \ + } \ + if (zero && tail_size > 0) { \ + valgrind_make_mem_defined( \ + (void *)((uintptr_t)ptr + \ + copy_size), tail_size); \ + } \ + } \ + } \ + } \ +} while (0) +#define JEMALLOC_VALGRIND_FREE(ptr, rzsize) do { \ + if (in_valgrind) \ + valgrind_freelike_block(ptr, rzsize); \ +} while (0) +#else +#define RUNNING_ON_VALGRIND ((unsigned)0) +#define JEMALLOC_VALGRIND_MAKE_MEM_NOACCESS(ptr, usize) do {} while (0) +#define JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ptr, usize) do {} while (0) +#define JEMALLOC_VALGRIND_MAKE_MEM_DEFINED(ptr, usize) do {} while (0) +#define JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) do {} while (0) +#define JEMALLOC_VALGRIND_REALLOC(maybe_moved, ptr, usize, \ + ptr_maybe_null, old_ptr, old_usize, old_rzsize, old_ptr_maybe_null, \ + zero) do {} while (0) +#define JEMALLOC_VALGRIND_FREE(ptr, rzsize) do {} while (0) +#endif + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +#ifdef JEMALLOC_VALGRIND +void valgrind_make_mem_noaccess(void *ptr, size_t usize); +void valgrind_make_mem_undefined(void *ptr, size_t usize); +void valgrind_make_mem_defined(void *ptr, size_t usize); +void valgrind_freelike_block(void *ptr, size_t usize); +#endif + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ + diff --git a/src/arena.c b/src/arena.c index 8aa36fdf..3952e70d 100644 --- a/src/arena.c +++ b/src/arena.c @@ -337,8 +337,8 @@ static inline void arena_run_zero(arena_chunk_t *chunk, size_t run_ind, size_t npages) { - VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk + (run_ind << - LG_PAGE)), (npages << LG_PAGE)); + JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk + + (run_ind << LG_PAGE)), (npages << LG_PAGE)); memset((void *)((uintptr_t)chunk + (run_ind << LG_PAGE)), 0, (npages << LG_PAGE)); } @@ -347,8 +347,8 @@ static inline void arena_run_page_mark_zeroed(arena_chunk_t *chunk, size_t run_ind) { - VALGRIND_MAKE_MEM_DEFINED((void *)((uintptr_t)chunk + (run_ind << - LG_PAGE)), PAGE); + JEMALLOC_VALGRIND_MAKE_MEM_DEFINED((void *)((uintptr_t)chunk + (run_ind + << LG_PAGE)), PAGE); } static inline void @@ -457,7 +457,7 @@ arena_run_split_large_helper(arena_t *arena, arena_run_t *run, size_t size, arena_run_zero(chunk, run_ind, need_pages); } } else { - VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk + + JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk + (run_ind << LG_PAGE)), (need_pages << LG_PAGE)); } @@ -525,7 +525,7 @@ arena_run_split_small(arena_t *arena, arena_run_t *run, size_t size, if (config_debug && flag_dirty == 0 && arena_mapbits_unzeroed_get(chunk, run_ind+need_pages-1) == 0) arena_run_page_validate_zeroed(chunk, run_ind+need_pages-1); - VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk + + JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk + (run_ind << LG_PAGE)), (need_pages << LG_PAGE)); } @@ -592,14 +592,14 @@ arena_chunk_init_hard(arena_t *arena) * the chunk is not zeroed. */ if (zero == false) { - VALGRIND_MAKE_MEM_UNDEFINED((void *)arena_mapp_get(chunk, - map_bias+1), (size_t)((uintptr_t) arena_mapp_get(chunk, - chunk_npages-1) - (uintptr_t)arena_mapp_get(chunk, - map_bias+1))); + JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED( + (void *)arena_mapp_get(chunk, map_bias+1), + (size_t)((uintptr_t) arena_mapp_get(chunk, chunk_npages-1) - + (uintptr_t)arena_mapp_get(chunk, map_bias+1))); for (i = map_bias+1; i < chunk_npages-1; i++) arena_mapbits_unzeroed_set(chunk, i, unzeroed); } else { - VALGRIND_MAKE_MEM_DEFINED((void *)arena_mapp_get(chunk, + JEMALLOC_VALGRIND_MAKE_MEM_DEFINED((void *)arena_mapp_get(chunk, map_bias+1), (size_t)((uintptr_t) arena_mapp_get(chunk, chunk_npages-1) - (uintptr_t)arena_mapp_get(chunk, map_bias+1))); @@ -1645,13 +1645,13 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero) } else if (opt_zero) memset(ret, 0, size); } - VALGRIND_MAKE_MEM_UNDEFINED(ret, size); + JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, size); } else { if (config_fill && opt_junk) { arena_alloc_junk_small(ret, &arena_bin_info[binind], true); } - VALGRIND_MAKE_MEM_UNDEFINED(ret, size); + JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, size); memset(ret, 0, size); } @@ -2226,7 +2226,7 @@ arena_ralloc(arena_t *arena, void *ptr, size_t oldsize, size_t size, * expectation that the extra bytes will be reliably preserved. */ copysize = (size < oldsize) ? size : oldsize; - VALGRIND_MAKE_MEM_UNDEFINED(ret, copysize); + JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, copysize); memcpy(ret, ptr, copysize); iqalloct(ptr, try_tcache_dalloc); return (ret); diff --git a/src/base.c b/src/base.c index 4e62e8fa..03dcf8f4 100644 --- a/src/base.c +++ b/src/base.c @@ -63,7 +63,7 @@ base_alloc(size_t size) ret = base_next_addr; base_next_addr = (void *)((uintptr_t)base_next_addr + csize); malloc_mutex_unlock(&base_mtx); - VALGRIND_MAKE_MEM_UNDEFINED(ret, csize); + JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, csize); return (ret); } @@ -89,7 +89,8 @@ base_node_alloc(void) ret = base_nodes; base_nodes = *(extent_node_t **)ret; malloc_mutex_unlock(&base_mtx); - VALGRIND_MAKE_MEM_UNDEFINED(ret, sizeof(extent_node_t)); + JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, + sizeof(extent_node_t)); } else { malloc_mutex_unlock(&base_mtx); ret = (extent_node_t *)base_alloc(sizeof(extent_node_t)); @@ -102,7 +103,7 @@ void base_node_dealloc(extent_node_t *node) { - VALGRIND_MAKE_MEM_UNDEFINED(node, sizeof(extent_node_t)); + JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(node, sizeof(extent_node_t)); malloc_mutex_lock(&base_mtx); *(extent_node_t **)node = base_nodes; base_nodes = node; diff --git a/src/chunk.c b/src/chunk.c index fdd693e0..246324a2 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -127,7 +127,7 @@ chunk_recycle(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, size_t size, size_t i; size_t *p = (size_t *)(uintptr_t)ret; - VALGRIND_MAKE_MEM_DEFINED(ret, size); + JEMALLOC_VALGRIND_MAKE_MEM_DEFINED(ret, size); for (i = 0; i < size / sizeof(size_t); i++) assert(p[i] == 0); } @@ -203,7 +203,7 @@ label_return: prof_gdump(); } if (config_valgrind) - VALGRIND_MAKE_MEM_UNDEFINED(ret, size); + JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, size); } assert(CHUNK_ADDR2BASE(ret) == ret); return (ret); @@ -217,7 +217,7 @@ chunk_record(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, void *chunk, extent_node_t *xnode, *node, *prev, *xprev, key; unzeroed = pages_purge(chunk, size); - VALGRIND_MAKE_MEM_NOACCESS(chunk, size); + JEMALLOC_VALGRIND_MAKE_MEM_NOACCESS(chunk, size); /* * Allocate a node before acquiring chunks_mtx even though it might not diff --git a/src/chunk_dss.c b/src/chunk_dss.c index 36133f14..82faf918 100644 --- a/src/chunk_dss.c +++ b/src/chunk_dss.c @@ -126,7 +126,8 @@ chunk_alloc_dss(size_t size, size_t alignment, bool *zero) if (cpad_size != 0) chunk_unmap(cpad, cpad_size); if (*zero) { - VALGRIND_MAKE_MEM_UNDEFINED(ret, size); + JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED( + ret, size); memset(ret, 0, size); } return (ret); diff --git a/src/jemalloc.c b/src/jemalloc.c index 11f1c450..36eae722 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -479,9 +479,10 @@ malloc_conf_init(void) while (*opts != '\0' && malloc_conf_next(&opts, &k, &klen, &v, &vlen) == false) { -#define CONF_HANDLE_BOOL(o, n) \ - if (sizeof(n)-1 == klen && strncmp(n, k, \ - klen) == 0) { \ +#define CONF_MATCH(n) \ + (sizeof(n)-1 == klen && strncmp(n, k, klen) == 0) +#define CONF_HANDLE_BOOL(o, n, cont) \ + if (CONF_MATCH(n)) { \ if (strncmp("true", v, vlen) == 0 && \ vlen == sizeof("true")-1) \ o = true; \ @@ -493,11 +494,11 @@ malloc_conf_init(void) "Invalid conf value", \ k, klen, v, vlen); \ } \ - continue; \ + if (cont) \ + continue; \ } #define CONF_HANDLE_SIZE_T(o, n, min, max, clip) \ - if (sizeof(n)-1 == klen && strncmp(n, k, \ - klen) == 0) { \ + if (CONF_MATCH(n)) { \ uintmax_t um; \ char *end; \ \ @@ -528,8 +529,7 @@ malloc_conf_init(void) continue; \ } #define CONF_HANDLE_SSIZE_T(o, n, min, max) \ - if (sizeof(n)-1 == klen && strncmp(n, k, \ - klen) == 0) { \ + if (CONF_MATCH(n)) { \ long l; \ char *end; \ \ @@ -550,8 +550,7 @@ malloc_conf_init(void) continue; \ } #define CONF_HANDLE_CHAR_P(o, n, d) \ - if (sizeof(n)-1 == klen && strncmp(n, k, \ - klen) == 0) { \ + if (CONF_MATCH(n)) { \ size_t cpylen = (vlen <= \ sizeof(o)-1) ? vlen : \ sizeof(o)-1; \ @@ -560,7 +559,7 @@ malloc_conf_init(void) continue; \ } - CONF_HANDLE_BOOL(opt_abort, "abort") + CONF_HANDLE_BOOL(opt_abort, "abort", true) /* * Chunks always require at least one header page, plus * one data page in the absence of redzones, or three @@ -599,44 +598,62 @@ malloc_conf_init(void) SIZE_T_MAX, false) CONF_HANDLE_SSIZE_T(opt_lg_dirty_mult, "lg_dirty_mult", -1, (sizeof(size_t) << 3) - 1) - CONF_HANDLE_BOOL(opt_stats_print, "stats_print") + CONF_HANDLE_BOOL(opt_stats_print, "stats_print", true) if (config_fill) { - CONF_HANDLE_BOOL(opt_junk, "junk") + CONF_HANDLE_BOOL(opt_junk, "junk", true) CONF_HANDLE_SIZE_T(opt_quarantine, "quarantine", 0, SIZE_T_MAX, false) - CONF_HANDLE_BOOL(opt_redzone, "redzone") - CONF_HANDLE_BOOL(opt_zero, "zero") + CONF_HANDLE_BOOL(opt_redzone, "redzone", true) + CONF_HANDLE_BOOL(opt_zero, "zero", true) } if (config_utrace) { - CONF_HANDLE_BOOL(opt_utrace, "utrace") + CONF_HANDLE_BOOL(opt_utrace, "utrace", true) } if (config_xmalloc) { - CONF_HANDLE_BOOL(opt_xmalloc, "xmalloc") + CONF_HANDLE_BOOL(opt_xmalloc, "xmalloc", true) } if (config_tcache) { - CONF_HANDLE_BOOL(opt_tcache, "tcache") + CONF_HANDLE_BOOL(opt_tcache, "tcache", + !config_valgrind || !in_valgrind) + if (CONF_MATCH("tcache")) { + assert(config_valgrind && in_valgrind); + if (opt_tcache) { + opt_tcache = false; + malloc_conf_error( + "tcache cannot be enabled " + "while running inside Valgrind", + k, klen, v, vlen); + } + continue; + } CONF_HANDLE_SSIZE_T(opt_lg_tcache_max, "lg_tcache_max", -1, (sizeof(size_t) << 3) - 1) } if (config_prof) { - CONF_HANDLE_BOOL(opt_prof, "prof") + CONF_HANDLE_BOOL(opt_prof, "prof", true) CONF_HANDLE_CHAR_P(opt_prof_prefix, "prof_prefix", "jeprof") - CONF_HANDLE_BOOL(opt_prof_active, "prof_active") + CONF_HANDLE_BOOL(opt_prof_active, "prof_active", + true) CONF_HANDLE_SSIZE_T(opt_lg_prof_sample, "lg_prof_sample", 0, (sizeof(uint64_t) << 3) - 1) - CONF_HANDLE_BOOL(opt_prof_accum, "prof_accum") + CONF_HANDLE_BOOL(opt_prof_accum, "prof_accum", + true) CONF_HANDLE_SSIZE_T(opt_lg_prof_interval, "lg_prof_interval", -1, (sizeof(uint64_t) << 3) - 1) - CONF_HANDLE_BOOL(opt_prof_gdump, "prof_gdump") - CONF_HANDLE_BOOL(opt_prof_final, "prof_final") - CONF_HANDLE_BOOL(opt_prof_leak, "prof_leak") + CONF_HANDLE_BOOL(opt_prof_gdump, "prof_gdump", + true) + CONF_HANDLE_BOOL(opt_prof_final, "prof_final", + true) + CONF_HANDLE_BOOL(opt_prof_leak, "prof_leak", + true) } malloc_conf_error("Invalid conf pair", k, klen, v, vlen); +#undef CONF_MATCH #undef CONF_HANDLE_BOOL #undef CONF_HANDLE_SIZE_T #undef CONF_HANDLE_SSIZE_T @@ -1293,8 +1310,8 @@ je_realloc(void *ptr, size_t size) ta->deallocated += old_usize; } UTRACE(ptr, size, ret); - JEMALLOC_VALGRIND_REALLOC(ret, usize, ptr, old_usize, old_rzsize, - false); + JEMALLOC_VALGRIND_REALLOC(true, ret, usize, true, ptr, old_usize, + old_rzsize, true, false); return (ret); } @@ -1604,7 +1621,8 @@ je_rallocx(void *ptr, size_t size, int flags) ta->deallocated += old_usize; } UTRACE(ptr, size, p); - JEMALLOC_VALGRIND_REALLOC(p, usize, ptr, old_usize, old_rzsize, zero); + JEMALLOC_VALGRIND_REALLOC(true, p, usize, false, ptr, old_usize, + old_rzsize, false, zero); return (p); label_oom: if (config_xmalloc && opt_xmalloc) { @@ -1731,7 +1749,8 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) ta->allocated += usize; ta->deallocated += old_usize; } - JEMALLOC_VALGRIND_REALLOC(ptr, usize, ptr, old_usize, old_rzsize, zero); + JEMALLOC_VALGRIND_REALLOC(false, ptr, usize, false, ptr, old_usize, + old_rzsize, false, zero); label_not_resized: UTRACE(ptr, size, ptr); return (usize); diff --git a/src/valgrind.c b/src/valgrind.c new file mode 100644 index 00000000..8e7ef3a2 --- /dev/null +++ b/src/valgrind.c @@ -0,0 +1,34 @@ +#include "jemalloc/internal/jemalloc_internal.h" +#ifndef JEMALLOC_VALGRIND +# error "This source file is for Valgrind integration." +#endif + +#include + +void +valgrind_make_mem_noaccess(void *ptr, size_t usize) +{ + + VALGRIND_MAKE_MEM_NOACCESS(ptr, usize); +} + +void +valgrind_make_mem_undefined(void *ptr, size_t usize) +{ + + VALGRIND_MAKE_MEM_UNDEFINED(ptr, usize); +} + +void +valgrind_make_mem_defined(void *ptr, size_t usize) +{ + + VALGRIND_MAKE_MEM_DEFINED(ptr, usize); +} + +void +valgrind_freelike_block(void *ptr, size_t usize) +{ + + VALGRIND_FREELIKE_BLOCK(ptr, usize); +} From a7619b7fa56f98d1ca99a23b458696dd37c12b77 Mon Sep 17 00:00:00 2001 From: Ben Maurer Date: Tue, 15 Apr 2014 13:28:37 -0700 Subject: [PATCH 0429/3378] outline rare tcache_get codepaths --- include/jemalloc/internal/private_symbols.txt | 1 + include/jemalloc/internal/tcache.h | 35 +--------------- src/tcache.c | 40 +++++++++++++++++++ 3 files changed, 43 insertions(+), 33 deletions(-) diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index f52d49f9..376f95d1 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -384,6 +384,7 @@ tcache_event tcache_event_hard tcache_flush tcache_get +tcache_get_hard tcache_initialized tcache_maxclass tcache_salloc diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index 51974136..96447f42 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -110,6 +110,7 @@ void tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem, tcache_t *tcache); void tcache_arena_associate(tcache_t *tcache, arena_t *arena); void tcache_arena_dissociate(tcache_t *tcache); +tcache_t *tcache_get_hard(tcache_t *tcache, bool create); tcache_t *tcache_create(arena_t *arena); void tcache_destroy(tcache_t *tcache); void tcache_thread_cleanup(void *arg); @@ -220,39 +221,7 @@ tcache_get(bool create) if ((uintptr_t)tcache <= (uintptr_t)TCACHE_STATE_MAX) { if (tcache == TCACHE_STATE_DISABLED) return (NULL); - if (tcache == NULL) { - if (create == false) { - /* - * Creating a tcache here would cause - * allocation as a side effect of free(). - * Ordinarily that would be okay since - * tcache_create() failure is a soft failure - * that doesn't propagate. However, if TLS - * data are freed via free() as in glibc, - * subtle corruption could result from setting - * a TLS variable after its backing memory is - * freed. - */ - return (NULL); - } - if (tcache_enabled_get() == false) { - tcache_enabled_set(false); /* Memoize. */ - return (NULL); - } - return (tcache_create(choose_arena(NULL))); - } - if (tcache == TCACHE_STATE_PURGATORY) { - /* - * Make a note that an allocator function was called - * after tcache_thread_cleanup() was called. - */ - tcache = TCACHE_STATE_REINCARNATED; - tcache_tsd_set(&tcache); - return (NULL); - } - if (tcache == TCACHE_STATE_REINCARNATED) - return (NULL); - not_reached(); + tcache = tcache_get_hard(tcache, create); } return (tcache); diff --git a/src/tcache.c b/src/tcache.c index 6de92960..868f2d77 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -265,6 +265,46 @@ tcache_arena_dissociate(tcache_t *tcache) } } +tcache_t * +tcache_get_hard(tcache_t *tcache, bool create) +{ + + if (tcache == NULL) { + if (create == false) { + /* + * Creating a tcache here would cause + * allocation as a side effect of free(). + * Ordinarily that would be okay since + * tcache_create() failure is a soft failure + * that doesn't propagate. However, if TLS + * data are freed via free() as in glibc, + * subtle corruption could result from setting + * a TLS variable after its backing memory is + * freed. + */ + return (NULL); + } + if (tcache_enabled_get() == false) { + tcache_enabled_set(false); /* Memoize. */ + return (NULL); + } + return (tcache_create(choose_arena(NULL))); + } + if (tcache == TCACHE_STATE_PURGATORY) { + /* + * Make a note that an allocator function was called + * after tcache_thread_cleanup() was called. + */ + tcache = TCACHE_STATE_REINCARNATED; + tcache_tsd_set(&tcache); + return (NULL); + } + if (tcache == TCACHE_STATE_REINCARNATED) + return (NULL); + not_reached(); + return (NULL); +} + tcache_t * tcache_create(arena_t *arena) { From 6c39f9e059d0825f4c29d8cec9f318b798912c3c Mon Sep 17 00:00:00 2001 From: Ben Maurer Date: Tue, 15 Apr 2014 13:47:13 -0700 Subject: [PATCH 0430/3378] refactor profiling. only use a bytes till next sample variable. --- include/jemalloc/internal/private_symbols.txt | 1 + include/jemalloc/internal/prof.h | 222 ++++++------------ src/prof.c | 65 ++++- 3 files changed, 134 insertions(+), 154 deletions(-) diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index 376f95d1..032bed4f 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -299,6 +299,7 @@ prof_idump prof_interval prof_lookup prof_malloc +prof_malloc_record_object prof_mdump prof_postfork_child prof_postfork_parent diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index 56014f18..27be10c1 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -177,8 +177,7 @@ struct prof_tdata_s { /* Sampling state. */ uint64_t prng_state; - uint64_t threshold; - uint64_t accum; + uint64_t bytes_until_sample; /* State used to avoid dumping while operating on prof internals. */ bool enq; @@ -239,6 +238,7 @@ bool prof_boot2(void); void prof_prefork(void); void prof_postfork_parent(void); void prof_postfork_child(void); +void prof_sample_threshold_update(prof_tdata_t *prof_tdata); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ @@ -250,49 +250,13 @@ void prof_postfork_child(void); \ assert(size == s2u(size)); \ \ - prof_tdata = prof_tdata_get(true); \ - if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) { \ - if (prof_tdata != NULL) \ - ret = (prof_thr_cnt_t *)(uintptr_t)1U; \ - else \ - ret = NULL; \ - break; \ - } \ - \ - if (opt_prof_active == false) { \ - /* Sampling is currently inactive, so avoid sampling. */\ + if (!opt_prof_active || \ + prof_sample_accum_update(size, false, &prof_tdata)) { \ ret = (prof_thr_cnt_t *)(uintptr_t)1U; \ - } else if (opt_lg_prof_sample == 0) { \ - /* Don't bother with sampling logic, since sampling */\ - /* interval is 1. */\ + } else { \ bt_init(&bt, prof_tdata->vec); \ prof_backtrace(&bt, nignore); \ ret = prof_lookup(&bt); \ - } else { \ - if (prof_tdata->threshold == 0) { \ - /* Initialize. Seed the prng differently for */\ - /* each thread. */\ - prof_tdata->prng_state = \ - (uint64_t)(uintptr_t)&size; \ - prof_sample_threshold_update(prof_tdata); \ - } \ - \ - /* Determine whether to capture a backtrace based on */\ - /* whether size is enough for prof_accum to reach */\ - /* prof_tdata->threshold. However, delay updating */\ - /* these variables until prof_{m,re}alloc(), because */\ - /* we don't know for sure that the allocation will */\ - /* succeed. */\ - /* */\ - /* Use subtraction rather than addition to avoid */\ - /* potential integer overflow. */\ - if (size >= prof_tdata->threshold - \ - prof_tdata->accum) { \ - bt_init(&bt, prof_tdata->vec); \ - prof_backtrace(&bt, nignore); \ - ret = prof_lookup(&bt); \ - } else \ - ret = (prof_thr_cnt_t *)(uintptr_t)1U; \ } \ } while (0) @@ -300,10 +264,13 @@ void prof_postfork_child(void); malloc_tsd_protos(JEMALLOC_ATTR(unused), prof_tdata, prof_tdata_t *) prof_tdata_t *prof_tdata_get(bool create); -void prof_sample_threshold_update(prof_tdata_t *prof_tdata); +void prof_sample_accum_update(size_t size, bool commit, + prof_tdata_t **prof_tdata_out); prof_ctx_t *prof_ctx_get(const void *ptr); void prof_ctx_set(const void *ptr, prof_ctx_t *ctx); bool prof_sample_accum_update(size_t size); +void prof_malloc_record_object(const void *ptr, size_t usize, + prof_thr_cnt_t *cnt) void prof_malloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt); void prof_realloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt, size_t old_usize, prof_ctx_t *old_ctx); @@ -330,55 +297,6 @@ prof_tdata_get(bool create) return (prof_tdata); } -JEMALLOC_INLINE void -prof_sample_threshold_update(prof_tdata_t *prof_tdata) -{ - /* - * The body of this function is compiled out unless heap profiling is - * enabled, so that it is possible to compile jemalloc with floating - * point support completely disabled. Avoiding floating point code is - * important on memory-constrained systems, but it also enables a - * workaround for versions of glibc that don't properly save/restore - * floating point registers during dynamic lazy symbol loading (which - * internally calls into whatever malloc implementation happens to be - * integrated into the application). Note that some compilers (e.g. - * gcc 4.8) may use floating point registers for fast memory moves, so - * jemalloc must be compiled with such optimizations disabled (e.g. - * -mno-sse) in order for the workaround to be complete. - */ -#ifdef JEMALLOC_PROF - uint64_t r; - double u; - - cassert(config_prof); - - /* - * Compute sample threshold as a geometrically distributed random - * variable with mean (2^opt_lg_prof_sample). - * - * __ __ - * | log(u) | 1 - * prof_tdata->threshold = | -------- |, where p = ------------------- - * | log(1-p) | opt_lg_prof_sample - * 2 - * - * For more information on the math, see: - * - * Non-Uniform Random Variate Generation - * Luc Devroye - * Springer-Verlag, New York, 1986 - * pp 500 - * (http://luc.devroye.org/rnbookindex.html) - */ - prng64(r, 53, prof_tdata->prng_state, - UINT64_C(6364136223846793005), UINT64_C(1442695040888963407)); - u = (double)r * (1.0/9007199254740992.0L); - prof_tdata->threshold = (uint64_t)(log(u) / - log(1.0 - (1.0 / (double)((uint64_t)1U << opt_lg_prof_sample)))) - + (uint64_t)1U; -#endif -} - JEMALLOC_INLINE prof_ctx_t * prof_ctx_get(const void *ptr) { @@ -415,34 +333,58 @@ prof_ctx_set(const void *ptr, prof_ctx_t *ctx) } JEMALLOC_INLINE bool -prof_sample_accum_update(size_t size) +prof_sample_accum_update(size_t size, bool commit, + prof_tdata_t **prof_tdata_out) { prof_tdata_t *prof_tdata; cassert(config_prof); - /* Sampling logic is unnecessary if the interval is 1. */ - assert(opt_lg_prof_sample != 0); - prof_tdata = prof_tdata_get(false); + prof_tdata = prof_tdata_get(true); if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) + prof_tdata = NULL; + + if (prof_tdata_out != NULL) + *prof_tdata_out = prof_tdata; + + if (prof_tdata == NULL) return (true); - /* Take care to avoid integer overflow. */ - if (size >= prof_tdata->threshold - prof_tdata->accum) { - prof_tdata->accum -= (prof_tdata->threshold - size); - /* Compute new sample threshold. */ - prof_sample_threshold_update(prof_tdata); - while (prof_tdata->accum >= prof_tdata->threshold) { - prof_tdata->accum -= prof_tdata->threshold; - prof_sample_threshold_update(prof_tdata); - } - return (false); - } else { - prof_tdata->accum += size; + if (prof_tdata->bytes_until_sample >= size) { + if (commit) + prof_tdata->bytes_until_sample -= size; return (true); + } else { + /* Compute new sample threshold. */ + if (commit) + prof_sample_threshold_update(prof_tdata); + return (false); } } +JEMALLOC_INLINE void +prof_malloc_record_object(const void *ptr, size_t usize, prof_thr_cnt_t *cnt) { + prof_ctx_set(ptr, cnt->ctx); + + cnt->epoch++; + /*********/ + mb_write(); + /*********/ + cnt->cnts.curobjs++; + cnt->cnts.curbytes += usize; + if (opt_prof_accum) { + cnt->cnts.accumobjs++; + cnt->cnts.accumbytes += usize; + } + /*********/ + mb_write(); + /*********/ + cnt->epoch++; + /*********/ + mb_write(); + /*********/ +} + JEMALLOC_INLINE void prof_malloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt) { @@ -451,40 +393,20 @@ prof_malloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt) assert(ptr != NULL); assert(usize == isalloc(ptr, true)); - if (opt_lg_prof_sample != 0) { - if (prof_sample_accum_update(usize)) { - /* - * Don't sample. For malloc()-like allocation, it is - * always possible to tell in advance how large an - * object's usable size will be, so there should never - * be a difference between the usize passed to - * PROF_ALLOC_PREP() and prof_malloc(). - */ - assert((uintptr_t)cnt == (uintptr_t)1U); - } + if (prof_sample_accum_update(usize, true, NULL)) { + /* + * Don't sample. For malloc()-like allocation, it is + * always possible to tell in advance how large an + * object's usable size will be, so there should never + * be a difference between the usize passed to + * PROF_ALLOC_PREP() and prof_malloc(). + */ + assert((uintptr_t)cnt == (uintptr_t)1U); } - if ((uintptr_t)cnt > (uintptr_t)1U) { - prof_ctx_set(ptr, cnt->ctx); - - cnt->epoch++; - /*********/ - mb_write(); - /*********/ - cnt->cnts.curobjs++; - cnt->cnts.curbytes += usize; - if (opt_prof_accum) { - cnt->cnts.accumobjs++; - cnt->cnts.accumbytes += usize; - } - /*********/ - mb_write(); - /*********/ - cnt->epoch++; - /*********/ - mb_write(); - /*********/ - } else + if ((uintptr_t)cnt > (uintptr_t)1U) + prof_malloc_record_object(ptr, usize, cnt); + else prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U); } @@ -499,18 +421,16 @@ prof_realloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt, if (ptr != NULL) { assert(usize == isalloc(ptr, true)); - if (opt_lg_prof_sample != 0) { - if (prof_sample_accum_update(usize)) { - /* - * Don't sample. The usize passed to - * PROF_ALLOC_PREP() was larger than what - * actually got allocated, so a backtrace was - * captured for this allocation, even though - * its actual usize was insufficient to cross - * the sample threshold. - */ - cnt = (prof_thr_cnt_t *)(uintptr_t)1U; - } + if (prof_sample_accum_update(usize, true, NULL)) { + /* + * Don't sample. The usize passed to + * PROF_ALLOC_PREP() was larger than what + * actually got allocated, so a backtrace was + * captured for this allocation, even though + * its actual usize was insufficient to cross + * the sample threshold. + */ + cnt = (prof_thr_cnt_t *)(uintptr_t)1U; } } diff --git a/src/prof.c b/src/prof.c index 1b1f7a84..82c7f703 100644 --- a/src/prof.c +++ b/src/prof.c @@ -645,6 +645,66 @@ prof_lookup(prof_bt_t *bt) return (ret.p); } + +void +prof_sample_threshold_update(prof_tdata_t *prof_tdata) +{ + /* + * The body of this function is compiled out unless heap profiling is + * enabled, so that it is possible to compile jemalloc with floating + * point support completely disabled. Avoiding floating point code is + * important on memory-constrained systems, but it also enables a + * workaround for versions of glibc that don't properly save/restore + * floating point registers during dynamic lazy symbol loading (which + * internally calls into whatever malloc implementation happens to be + * integrated into the application). Note that some compilers (e.g. + * gcc 4.8) may use floating point registers for fast memory moves, so + * jemalloc must be compiled with such optimizations disabled (e.g. + * -mno-sse) in order for the workaround to be complete. + */ +#ifdef JEMALLOC_PROF + uint64_t r; + double u; + + if (!config_prof) + return; + + if (prof_tdata == NULL) + prof_tdata = prof_tdata_get(false); + + if (opt_lg_prof_sample == 0) { + prof_tdata->bytes_until_sample = 0; + return; + } + + /* + * Compute sample threshold as a geometrically distributed random + * variable with mean (2^opt_lg_prof_sample). + * + * __ __ + * | log(u) | 1 + * prof_tdata->threshold = | -------- |, where p = ------------------- + * | log(1-p) | opt_lg_prof_sample + * 2 + * + * For more information on the math, see: + * + * Non-Uniform Random Variate Generation + * Luc Devroye + * Springer-Verlag, New York, 1986 + * pp 500 + * (http://luc.devroye.org/rnbookindex.html) + */ + prng64(r, 53, prof_tdata->prng_state, + UINT64_C(6364136223846793005), UINT64_C(1442695040888963407)); + u = (double)r * (1.0/9007199254740992.0L); + prof_tdata->bytes_until_sample = (uint64_t)(log(u) / + log(1.0 - (1.0 / (double)((uint64_t)1U << opt_lg_prof_sample)))) + + (uint64_t)1U; +#endif +} + + #ifdef JEMALLOC_JET size_t prof_bt_count(void) @@ -1224,9 +1284,8 @@ prof_tdata_init(void) return (NULL); } - prof_tdata->prng_state = 0; - prof_tdata->threshold = 0; - prof_tdata->accum = 0; + prof_tdata->prng_state = (uint64_t)(uintptr_t)prof_tdata; + prof_sample_threshold_update(prof_tdata); prof_tdata->enq = false; prof_tdata->enq_idump = false; From 021136ce4db79f50031a1fd5dd751891888fbc7b Mon Sep 17 00:00:00 2001 From: Ben Maurer Date: Wed, 16 Apr 2014 14:31:24 -0700 Subject: [PATCH 0431/3378] Create a const array with only a small bin to size map --- include/jemalloc/internal/arena.h | 3 ++- include/jemalloc/internal/jemalloc_internal.h.in | 4 ++-- include/jemalloc/internal/private_symbols.txt | 1 + include/jemalloc/internal/tcache.h | 6 +++--- src/arena.c | 10 +++++++++- 5 files changed, 17 insertions(+), 7 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 0e14c2c4..b435d0b0 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -385,6 +385,7 @@ extern ssize_t opt_lg_dirty_mult; * and all accesses are via the SMALL_SIZE2BIN macro. */ extern uint8_t const small_size2bin[]; +extern uint32_t const small_bin2size[]; #define SMALL_SIZE2BIN(s) (small_size2bin[(s-1) >> LG_TINY_MIN]) extern arena_bin_info_t arena_bin_info[NBINS]; @@ -964,7 +965,7 @@ arena_salloc(const void *ptr, bool demote) assert(arena_mapbits_large_get(chunk, pageind) != 0 || arena_ptr_small_binind_get(ptr, arena_mapbits_get(chunk, pageind)) == binind); - ret = arena_bin_info[binind].reg_size; + ret = small_bin2size[binind]; } return (ret); diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 9c79ae00..17d77623 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -602,7 +602,7 @@ s2u(size_t size) { if (size <= SMALL_MAXCLASS) - return (arena_bin_info[SMALL_SIZE2BIN(size)].reg_size); + return (small_bin2size[SMALL_SIZE2BIN(size)]); if (size <= arena_maxclass) return (PAGE_CEILING(size)); return (CHUNK_CEILING(size)); @@ -645,7 +645,7 @@ sa2u(size_t size, size_t alignment) if (usize <= arena_maxclass && alignment <= PAGE) { if (usize <= SMALL_MAXCLASS) - return (arena_bin_info[SMALL_SIZE2BIN(usize)].reg_size); + return (small_bin2size[SMALL_SIZE2BIN(usize)]); return (PAGE_CEILING(usize)); } else { size_t run_size; diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index 032bed4f..12d64dc4 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -346,6 +346,7 @@ rtree_set s2u sa2u set_errno +small_bin2size small_size2bin stats_cactive stats_cactive_add diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index 96447f42..098d19ae 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -266,14 +266,14 @@ tcache_alloc_small(tcache_t *tcache, size_t size, bool zero) binind = SMALL_SIZE2BIN(size); assert(binind < NBINS); tbin = &tcache->tbins[binind]; - size = arena_bin_info[binind].reg_size; + size = small_bin2size[binind]; ret = tcache_alloc_easy(tbin); if (ret == NULL) { ret = tcache_alloc_small_hard(tcache, tbin, binind); if (ret == NULL) return (NULL); } - assert(tcache_salloc(ret) == arena_bin_info[binind].reg_size); + assert(tcache_salloc(ret) == size); if (zero == false) { if (config_fill) { @@ -296,7 +296,7 @@ tcache_alloc_small(tcache_t *tcache, size_t size, bool zero) if (config_stats) tbin->tstats.nrequests++; if (config_prof) - tcache->prof_accumbytes += arena_bin_info[binind].reg_size; + tcache->prof_accumbytes += size; tcache_event(tcache); return (ret); } diff --git a/src/arena.c b/src/arena.c index d5741000..37487ffa 100644 --- a/src/arena.c +++ b/src/arena.c @@ -7,6 +7,14 @@ ssize_t opt_lg_dirty_mult = LG_DIRTY_MULT_DEFAULT; arena_bin_info_t arena_bin_info[NBINS]; +JEMALLOC_ALIGNED(CACHELINE) +const uint32_t small_bin2size[NBINS] = { +#define SIZE_CLASS(bin, delta, size) \ + size, + SIZE_CLASSES +#undef SIZE_CLASS +}; + JEMALLOC_ALIGNED(CACHELINE) const uint8_t small_size2bin[] = { #define S2B_8(i) i, @@ -1615,7 +1623,7 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero) binind = SMALL_SIZE2BIN(size); assert(binind < NBINS); bin = &arena->bins[binind]; - size = arena_bin_info[binind].reg_size; + size = small_bin2size[binind]; malloc_mutex_lock(&bin->lock); if ((run = bin->runcur) != NULL && run->nfree > 0) From 0b49403958b68294eee0eca8a0b5195e761cf316 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 16 Apr 2014 16:38:22 -0700 Subject: [PATCH 0432/3378] Fix debug-only compilation failures. Fix debug-only compilation failures introduced by changes to prof_sample_accum_update() in: 6c39f9e059d0825f4c29d8cec9f318b798912c3c refactor profiling. only use a bytes till next sample variable. --- include/jemalloc/internal/prof.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index 27be10c1..d7422538 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -264,13 +264,12 @@ void prof_sample_threshold_update(prof_tdata_t *prof_tdata); malloc_tsd_protos(JEMALLOC_ATTR(unused), prof_tdata, prof_tdata_t *) prof_tdata_t *prof_tdata_get(bool create); -void prof_sample_accum_update(size_t size, bool commit, +bool prof_sample_accum_update(size_t size, bool commit, prof_tdata_t **prof_tdata_out); prof_ctx_t *prof_ctx_get(const void *ptr); void prof_ctx_set(const void *ptr, prof_ctx_t *ctx); -bool prof_sample_accum_update(size_t size); void prof_malloc_record_object(const void *ptr, size_t usize, - prof_thr_cnt_t *cnt) + prof_thr_cnt_t *cnt); void prof_malloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt); void prof_realloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt, size_t old_usize, prof_ctx_t *old_ctx); From 3541a904d6fb949f3f0aea05418ccce7cbd4b705 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 16 Apr 2014 17:14:33 -0700 Subject: [PATCH 0433/3378] Refactor small_size2bin and small_bin2size. Refactor small_size2bin and small_bin2size to be inline functions rather than directly accessed arrays. --- include/jemalloc/internal/arena.h | 40 ++++++++++++++----- .../jemalloc/internal/jemalloc_internal.h.in | 26 +++++++----- include/jemalloc/internal/private_symbols.txt | 2 + include/jemalloc/internal/tcache.h | 4 +- src/arena.c | 18 ++++----- 5 files changed, 61 insertions(+), 29 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index fbbbb911..605a87e5 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -380,13 +380,17 @@ struct arena_s { extern ssize_t opt_lg_dirty_mult; /* - * small_size2bin is a compact lookup table that rounds request sizes up to + * small_size2bin_tab is a compact lookup table that rounds request sizes up to * size classes. In order to reduce cache footprint, the table is compressed, - * and all accesses are via the SMALL_SIZE2BIN macro. + * and all accesses are via small_size2bin(). */ -extern uint8_t const small_size2bin[]; -extern uint32_t const small_bin2size[]; -#define SMALL_SIZE2BIN(s) (small_size2bin[(s-1) >> LG_TINY_MIN]) +extern uint8_t const small_size2bin_tab[]; +/* + * small_bin2size_tab duplicates information in arena_bin_info, but in a const + * array, for which it is easier for the compiler to optimize repeated + * dereferences. + */ +extern uint32_t const small_bin2size_tab[NBINS]; extern arena_bin_info_t arena_bin_info[NBINS]; @@ -450,6 +454,8 @@ void arena_postfork_child(arena_t *arena); #ifdef JEMALLOC_H_INLINES #ifndef JEMALLOC_ENABLE_INLINE +size_t small_size2bin(size_t size); +size_t small_bin2size(size_t binind); arena_chunk_map_t *arena_mapp_get(arena_chunk_t *chunk, size_t pageind); size_t *arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind); size_t arena_mapbitsp_read(size_t *mapbitsp); @@ -492,6 +498,22 @@ void arena_dalloc(arena_chunk_t *chunk, void *ptr, bool try_tcache); #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) # ifdef JEMALLOC_ARENA_INLINE_A +JEMALLOC_ALWAYS_INLINE size_t +small_size2bin(size_t size) +{ + + return ((size_t)(small_size2bin_tab[(size-1) >> LG_TINY_MIN])); +} + +JEMALLOC_ALWAYS_INLINE size_t +small_bin2size(size_t binind) +{ + + return ((size_t)(small_bin2size_tab[binind])); +} +# endif /* JEMALLOC_ARENA_INLINE_A */ + +# ifdef JEMALLOC_ARENA_INLINE_B JEMALLOC_ALWAYS_INLINE arena_chunk_map_t * arena_mapp_get(arena_chunk_t *chunk, size_t pageind) { @@ -773,9 +795,9 @@ arena_ptr_small_binind_get(const void *ptr, size_t mapbits) return (binind); } -# endif /* JEMALLOC_ARENA_INLINE_A */ +# endif /* JEMALLOC_ARENA_INLINE_B */ -# ifdef JEMALLOC_ARENA_INLINE_B +# ifdef JEMALLOC_ARENA_INLINE_C JEMALLOC_INLINE size_t arena_bin_index(arena_t *arena, arena_bin_t *bin) { @@ -965,7 +987,7 @@ arena_salloc(const void *ptr, bool demote) assert(arena_mapbits_large_get(chunk, pageind) != 0 || arena_ptr_small_binind_get(ptr, arena_mapbits_get(chunk, pageind)) == binind); - ret = small_bin2size[binind]; + ret = small_bin2size(binind); } return (ret); @@ -1004,7 +1026,7 @@ arena_dalloc(arena_chunk_t *chunk, void *ptr, bool try_tcache) arena_dalloc_large(chunk->arena, chunk, ptr); } } -# endif /* JEMALLOC_ARENA_INLINE_B */ +# endif /* JEMALLOC_ARENA_INLINE_C */ #endif #endif /* JEMALLOC_H_INLINES */ diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 6f11d4b4..d530c3b8 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -499,6 +499,14 @@ void jemalloc_postfork_child(void); #include "jemalloc/internal/chunk.h" #include "jemalloc/internal/huge.h" +/* + * Include arena.h the first time in order to provide inline functions for this + * header's inlines. + */ +#define JEMALLOC_ARENA_INLINE_A +#include "jemalloc/internal/arena.h" +#undef JEMALLOC_ARENA_INLINE_A + #ifndef JEMALLOC_ENABLE_INLINE malloc_tsd_protos(JEMALLOC_ATTR(unused), arenas, arena_t *) @@ -526,7 +534,7 @@ s2u(size_t size) { if (size <= SMALL_MAXCLASS) - return (small_bin2size[SMALL_SIZE2BIN(size)]); + return (small_bin2size(small_size2bin(size))); if (size <= arena_maxclass) return (PAGE_CEILING(size)); return (CHUNK_CEILING(size)); @@ -569,7 +577,7 @@ sa2u(size_t size, size_t alignment) if (usize <= arena_maxclass && alignment <= PAGE) { if (usize <= SMALL_MAXCLASS) - return (small_bin2size[SMALL_SIZE2BIN(usize)]); + return (small_bin2size(small_size2bin(usize))); return (PAGE_CEILING(usize)); } else { size_t run_size; @@ -643,16 +651,16 @@ choose_arena(arena_t *arena) #include "jemalloc/internal/bitmap.h" #include "jemalloc/internal/rtree.h" /* - * Include arena.h twice in order to resolve circular dependencies with - * tcache.h. + * Include arena.h the second and third times in order to resolve circular + * dependencies with tcache.h. */ -#define JEMALLOC_ARENA_INLINE_A -#include "jemalloc/internal/arena.h" -#undef JEMALLOC_ARENA_INLINE_A -#include "jemalloc/internal/tcache.h" #define JEMALLOC_ARENA_INLINE_B #include "jemalloc/internal/arena.h" #undef JEMALLOC_ARENA_INLINE_B +#include "jemalloc/internal/tcache.h" +#define JEMALLOC_ARENA_INLINE_C +#include "jemalloc/internal/arena.h" +#undef JEMALLOC_ARENA_INLINE_C #include "jemalloc/internal/hash.h" #include "jemalloc/internal/quarantine.h" @@ -794,7 +802,7 @@ u2rz(size_t usize) size_t ret; if (usize <= SMALL_MAXCLASS) { - size_t binind = SMALL_SIZE2BIN(usize); + size_t binind = small_size2bin(usize); ret = arena_bin_info[binind].redzone_size; } else ret = 0; diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index ff9ed476..ccbb3a90 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -346,7 +346,9 @@ s2u sa2u set_errno small_bin2size +small_bin2size_tab small_size2bin +small_size2bin_tab stats_cactive stats_cactive_add stats_cactive_get diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index 06c7c8fc..c0d48b93 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -263,10 +263,10 @@ tcache_alloc_small(tcache_t *tcache, size_t size, bool zero) size_t binind; tcache_bin_t *tbin; - binind = SMALL_SIZE2BIN(size); + binind = small_size2bin(size); assert(binind < NBINS); tbin = &tcache->tbins[binind]; - size = small_bin2size[binind]; + size = small_bin2size(binind); ret = tcache_alloc_easy(tbin); if (ret == NULL) { ret = tcache_alloc_small_hard(tcache, tbin, binind); diff --git a/src/arena.c b/src/arena.c index 4256344c..d956be3e 100644 --- a/src/arena.c +++ b/src/arena.c @@ -8,7 +8,7 @@ ssize_t opt_lg_dirty_mult = LG_DIRTY_MULT_DEFAULT; arena_bin_info_t arena_bin_info[NBINS]; JEMALLOC_ALIGNED(CACHELINE) -const uint32_t small_bin2size[NBINS] = { +const uint32_t small_bin2size_tab[NBINS] = { #define SIZE_CLASS(bin, delta, size) \ size, SIZE_CLASSES @@ -16,7 +16,7 @@ const uint32_t small_bin2size[NBINS] = { }; JEMALLOC_ALIGNED(CACHELINE) -const uint8_t small_size2bin[] = { +const uint8_t small_size2bin_tab[] = { #define S2B_8(i) i, #define S2B_16(i) S2B_8(i) S2B_8(i) #define S2B_32(i) S2B_16(i) S2B_16(i) @@ -1607,7 +1607,7 @@ arena_quarantine_junk_small(void *ptr, size_t usize) assert(opt_quarantine); assert(usize <= SMALL_MAXCLASS); - binind = SMALL_SIZE2BIN(usize); + binind = small_size2bin(usize); bin_info = &arena_bin_info[binind]; arena_redzones_validate(ptr, bin_info, true); } @@ -1620,10 +1620,10 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero) arena_run_t *run; size_t binind; - binind = SMALL_SIZE2BIN(size); + binind = small_size2bin(size); assert(binind < NBINS); bin = &arena->bins[binind]; - size = small_bin2size[binind]; + size = small_bin2size(binind); malloc_mutex_lock(&bin->lock); if ((run = bin->runcur) != NULL && run->nfree > 0) @@ -1777,7 +1777,7 @@ arena_prof_promoted(const void *ptr, size_t size) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - binind = SMALL_SIZE2BIN(size); + binind = small_size2bin(size); assert(binind < NBINS); arena_mapbits_large_binind_set(chunk, pageind, binind); @@ -2164,11 +2164,11 @@ arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, */ if (oldsize <= arena_maxclass) { if (oldsize <= SMALL_MAXCLASS) { - assert(arena_bin_info[SMALL_SIZE2BIN(oldsize)].reg_size + assert(arena_bin_info[small_size2bin(oldsize)].reg_size == oldsize); if ((size + extra <= SMALL_MAXCLASS && - SMALL_SIZE2BIN(size + extra) == - SMALL_SIZE2BIN(oldsize)) || (size <= oldsize && + small_size2bin(size + extra) == + small_size2bin(oldsize)) || (size <= oldsize && size + extra >= oldsize)) return (false); } else { From 9d4e13f45a281a2eabe4d3528ab26e5f3903d5a5 Mon Sep 17 00:00:00 2001 From: Lucian Adrian Grijincu Date: Mon, 21 Apr 2014 20:52:35 -0700 Subject: [PATCH 0434/3378] prof_backtrace: use unw_backtrace unw_backtrace: - does internal per-thread caching - doesn't acquire an internal lock --- .../jemalloc/internal/jemalloc_internal.h.in | 4 +-- src/prof.c | 33 +++++-------------- 2 files changed, 11 insertions(+), 26 deletions(-) diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index d530c3b8..dc77b5a1 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -352,9 +352,9 @@ static const bool config_ivsalloc = # endif # endif # define VARIABLE_ARRAY(type, name, count) \ - type *name = alloca(sizeof(type) * count) + type *name = alloca(sizeof(type) * (count)) #else -# define VARIABLE_ARRAY(type, name, count) type name[count] +# define VARIABLE_ARRAY(type, name, count) type name[(count)] #endif #include "jemalloc/internal/valgrind.h" diff --git a/src/prof.c b/src/prof.c index 82c7f703..11f1267f 100644 --- a/src/prof.c +++ b/src/prof.c @@ -160,36 +160,21 @@ prof_leave(prof_tdata_t *prof_tdata) void prof_backtrace(prof_bt_t *bt, unsigned nignore) { - unw_context_t uc; - unw_cursor_t cursor; - unsigned i; - int err; - cassert(config_prof); assert(bt->len == 0); assert(bt->vec != NULL); - unw_getcontext(&uc); - unw_init_local(&cursor, &uc); + VARIABLE_ARRAY(void *, frames, nignore + PROF_BT_MAX); + int n = unw_backtrace(frames, nignore + PROF_BT_MAX); + if (n <= 0) + return; /* Throw away (nignore+1) stack frames, if that many exist. */ - for (i = 0; i < nignore + 1; i++) { - err = unw_step(&cursor); - if (err <= 0) - return; - } - - /* - * Iterate over stack frames until there are no more, or until no space - * remains in bt. - */ - for (i = 0; i < PROF_BT_MAX; i++) { - unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *)&bt->vec[i]); - bt->len++; - err = unw_step(&cursor); - if (err <= 0) - break; - } + nignore++; + if (nignore >= n) + return; + memcpy(bt->vec, &frames[nignore], sizeof(frames[0]) * (n - nignore)); + bt->len = n - nignore; } #elif (defined(JEMALLOC_PROF_LIBGCC)) static _Unwind_Reason_Code From 05125b83778a5695c29777acdc662d999d016d32 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 22 Apr 2014 20:48:07 -0700 Subject: [PATCH 0435/3378] Update libunwind configuration check to look for unw_backtrace(). Update libunwind configuration check to look for unw_backtrace(), which is a newer API not available in older versions of libunwind. --- configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index dc817e1c..eb9ca45c 100644 --- a/configure.ac +++ b/configure.ac @@ -702,7 +702,7 @@ fi, if test "x$backtrace_method" = "x" -a "x$enable_prof_libunwind" = "x1" ; then AC_CHECK_HEADERS([libunwind.h], , [enable_prof_libunwind="0"]) if test "x$LUNWIND" = "x-lunwind" ; then - AC_CHECK_LIB([unwind], [backtrace], [LIBS="$LIBS $LUNWIND"], + AC_CHECK_LIB([unwind], [unw_backtrace], [LIBS="$LIBS $LUNWIND"], [enable_prof_libunwind="0"]) else LIBS="$LIBS $LUNWIND" From 6f001059aa33d77a3cb7799002044faf8dd08fc0 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 22 Apr 2014 18:41:15 -0700 Subject: [PATCH 0436/3378] Simplify backtracing. Simplify backtracing to not ignore any frames, and compensate for this in pprof in order to increase flexibility with respect to function-based refactoring even in the presence of non-deterministic inlining. Modify pprof to blacklist all jemalloc allocation entry points including non-standard ones like mallocx(), and ignore all allocator-internal frames. Prior to this change, pprof excluded the specifically blacklisted functions from backtraces, but it left allocator-internal frames intact. --- bin/pprof | 9 ++++ include/jemalloc/internal/prof.h | 7 ++- src/jemalloc.c | 80 +++++++++++--------------------- src/prof.c | 55 +++++++++------------- 4 files changed, 60 insertions(+), 91 deletions(-) diff --git a/bin/pprof b/bin/pprof index a309943c..328138cd 100755 --- a/bin/pprof +++ b/bin/pprof @@ -2811,9 +2811,14 @@ sub RemoveUninterestingFrames { 'free', 'memalign', 'posix_memalign', + 'aligned_alloc', 'pvalloc', 'valloc', 'realloc', + 'mallocx', # jemalloc + 'rallocx', # jemalloc + 'xallocx', # jemalloc + 'dallocx', # jemalloc 'tc_calloc', 'tc_cfree', 'tc_malloc', @@ -2923,6 +2928,10 @@ sub RemoveUninterestingFrames { if (exists($symbols->{$a})) { my $func = $symbols->{$a}->[0]; if ($skip{$func} || ($func =~ m/$skip_regexp/)) { + # Throw away the portion of the backtrace seen so far, under the + # assumption that previous frames were for functions internal to the + # allocator. + @path = (); next; } } diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index d7422538..d82fbc4f 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -63,7 +63,6 @@ struct prof_bt_s { /* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */ typedef struct { prof_bt_t *bt; - unsigned nignore; unsigned max; } prof_unwind_data_t; #endif @@ -220,7 +219,7 @@ extern char opt_prof_prefix[ extern uint64_t prof_interval; void bt_init(prof_bt_t *bt, void **vec); -void prof_backtrace(prof_bt_t *bt, unsigned nignore); +void prof_backtrace(prof_bt_t *bt); prof_thr_cnt_t *prof_lookup(prof_bt_t *bt); #ifdef JEMALLOC_JET size_t prof_bt_count(void); @@ -244,7 +243,7 @@ void prof_sample_threshold_update(prof_tdata_t *prof_tdata); /******************************************************************************/ #ifdef JEMALLOC_H_INLINES -#define PROF_ALLOC_PREP(nignore, size, ret) do { \ +#define PROF_ALLOC_PREP(size, ret) do { \ prof_tdata_t *prof_tdata; \ prof_bt_t bt; \ \ @@ -255,7 +254,7 @@ void prof_sample_threshold_update(prof_tdata_t *prof_tdata); ret = (prof_thr_cnt_t *)(uintptr_t)1U; \ } else { \ bt_init(&bt, prof_tdata->vec); \ - prof_backtrace(&bt, nignore); \ + prof_backtrace(&bt); \ ret = prof_lookup(&bt); \ } \ } while (0) diff --git a/src/jemalloc.c b/src/jemalloc.c index 36eae722..f1dda758 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -881,10 +881,12 @@ imalloc_prof_sample(size_t usize, prof_thr_cnt_t *cnt) } JEMALLOC_ALWAYS_INLINE_C void * -imalloc_prof(size_t usize, prof_thr_cnt_t *cnt) +imalloc_prof(size_t usize) { void *p; + prof_thr_cnt_t *cnt; + PROF_ALLOC_PREP(usize, cnt); if ((uintptr_t)cnt != (uintptr_t)1U) p = imalloc_prof_sample(usize, cnt); else @@ -896,42 +898,22 @@ imalloc_prof(size_t usize, prof_thr_cnt_t *cnt) return (p); } -/* - * MALLOC_BODY() is a macro rather than a function because its contents are in - * the fast path, but inlining would cause reliability issues when determining - * how many frames to discard from heap profiling backtraces. - */ -#define MALLOC_BODY(ret, size, usize) do { \ - if (malloc_init()) \ - ret = NULL; \ - else { \ - if (config_prof && opt_prof) { \ - prof_thr_cnt_t *cnt; \ - \ - usize = s2u(size); \ - /* \ - * Call PROF_ALLOC_PREP() here rather than in \ - * imalloc_prof() so that imalloc_prof() can be \ - * inlined without introducing uncertainty \ - * about the number of backtrace frames to \ - * ignore. imalloc_prof() is in the fast path \ - * when heap profiling is enabled, so inlining \ - * is critical to performance. (For \ - * consistency all callers of PROF_ALLOC_PREP() \ - * are structured similarly, even though e.g. \ - * realloc() isn't called enough for inlining \ - * to be critical.) \ - */ \ - PROF_ALLOC_PREP(1, usize, cnt); \ - ret = imalloc_prof(usize, cnt); \ - } else { \ - if (config_stats || (config_valgrind && \ - in_valgrind)) \ - usize = s2u(size); \ - ret = imalloc(size); \ - } \ - } \ -} while (0) +JEMALLOC_ALWAYS_INLINE_C void * +imalloc_body(size_t size, size_t *usize) +{ + + if (malloc_init()) + return (NULL); + + if (config_prof && opt_prof) { + *usize = s2u(size); + return (imalloc_prof(*usize)); + } + + if (config_stats || (config_valgrind && in_valgrind)) + *usize = s2u(size); + return (imalloc(size)); +} void * je_malloc(size_t size) @@ -942,8 +924,7 @@ je_malloc(size_t size) if (size == 0) size = 1; - MALLOC_BODY(ret, size, usize); - + ret = imalloc_body(size, &usize); if (ret == NULL) { if (config_xmalloc && opt_xmalloc) { malloc_write(": Error in malloc(): " @@ -998,13 +979,6 @@ imemalign_prof(size_t alignment, size_t usize, prof_thr_cnt_t *cnt) } JEMALLOC_ATTR(nonnull(1)) -#ifdef JEMALLOC_PROF -/* - * Avoid any uncertainty as to how many backtrace frames to ignore in - * PROF_ALLOC_PREP(). - */ -JEMALLOC_NOINLINE -#endif static int imemalign(void **memptr, size_t alignment, size_t size, size_t min_alignment) { @@ -1043,7 +1017,7 @@ imemalign(void **memptr, size_t alignment, size_t size, size_t min_alignment) if (config_prof && opt_prof) { prof_thr_cnt_t *cnt; - PROF_ALLOC_PREP(2, usize, cnt); + PROF_ALLOC_PREP(usize, cnt); result = imemalign_prof(alignment, usize, cnt); } else result = ipalloc(usize, alignment, false); @@ -1166,7 +1140,7 @@ je_calloc(size_t num, size_t size) prof_thr_cnt_t *cnt; usize = s2u(num_size); - PROF_ALLOC_PREP(1, usize, cnt); + PROF_ALLOC_PREP(usize, cnt); ret = icalloc_prof(usize, cnt); } else { if (config_stats || (config_valgrind && in_valgrind)) @@ -1282,7 +1256,7 @@ je_realloc(void *ptr, size_t size) prof_thr_cnt_t *cnt; usize = s2u(size); - PROF_ALLOC_PREP(1, usize, cnt); + PROF_ALLOC_PREP(usize, cnt); ret = irealloc_prof(ptr, old_usize, usize, cnt); } else { if (config_stats || (config_valgrind && in_valgrind)) @@ -1291,7 +1265,7 @@ je_realloc(void *ptr, size_t size) } } else { /* realloc(NULL, size) is equivalent to malloc(size). */ - MALLOC_BODY(ret, size, usize); + ret = imalloc_body(size, &usize); } if (ret == NULL) { @@ -1475,7 +1449,7 @@ je_mallocx(size_t size, int flags) if (config_prof && opt_prof) { prof_thr_cnt_t *cnt; - PROF_ALLOC_PREP(1, usize, cnt); + PROF_ALLOC_PREP(usize, cnt); p = imallocx_prof(usize, alignment, zero, try_tcache, arena, cnt); } else @@ -1600,7 +1574,7 @@ je_rallocx(void *ptr, size_t size, int flags) usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment); assert(usize != 0); - PROF_ALLOC_PREP(1, usize, cnt); + PROF_ALLOC_PREP(usize, cnt); p = irallocx_prof(ptr, old_usize, size, alignment, &usize, zero, try_tcache_alloc, try_tcache_dalloc, arena, cnt); if (p == NULL) @@ -1733,7 +1707,7 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) */ size_t max_usize = (alignment == 0) ? s2u(size+extra) : sa2u(size+extra, alignment); - PROF_ALLOC_PREP(1, max_usize, cnt); + PROF_ALLOC_PREP(max_usize, cnt); usize = ixallocx_prof(ptr, old_usize, size, extra, alignment, max_usize, zero, arena, cnt); } else { diff --git a/src/prof.c b/src/prof.c index 11f1267f..b64386e3 100644 --- a/src/prof.c +++ b/src/prof.c @@ -158,23 +158,18 @@ prof_leave(prof_tdata_t *prof_tdata) #ifdef JEMALLOC_PROF_LIBUNWIND void -prof_backtrace(prof_bt_t *bt, unsigned nignore) +prof_backtrace(prof_bt_t *bt) { + int nframes; + cassert(config_prof); assert(bt->len == 0); assert(bt->vec != NULL); - VARIABLE_ARRAY(void *, frames, nignore + PROF_BT_MAX); - int n = unw_backtrace(frames, nignore + PROF_BT_MAX); - if (n <= 0) + nframes = unw_backtrace(bt->vec, PROF_BT_MAX); + if (nframes <= 0) return; - - /* Throw away (nignore+1) stack frames, if that many exist. */ - nignore++; - if (nignore >= n) - return; - memcpy(bt->vec, &frames[nignore], sizeof(frames[0]) * (n - nignore)); - bt->len = n - nignore; + bt->len = nframes; } #elif (defined(JEMALLOC_PROF_LIBGCC)) static _Unwind_Reason_Code @@ -190,25 +185,25 @@ static _Unwind_Reason_Code prof_unwind_callback(struct _Unwind_Context *context, void *arg) { prof_unwind_data_t *data = (prof_unwind_data_t *)arg; + void *ip; cassert(config_prof); - if (data->nignore > 0) - data->nignore--; - else { - data->bt->vec[data->bt->len] = (void *)_Unwind_GetIP(context); - data->bt->len++; - if (data->bt->len == data->max) - return (_URC_END_OF_STACK); - } + ip = (void *)_Unwind_GetIP(context); + if (ip == NULL) + return (_URC_END_OF_STACK); + data->bt->vec[data->bt->len] = ip; + data->bt->len++; + if (data->bt->len == data->max) + return (_URC_END_OF_STACK); return (_URC_NO_REASON); } void -prof_backtrace(prof_bt_t *bt, unsigned nignore) +prof_backtrace(prof_bt_t *bt) { - prof_unwind_data_t data = {bt, nignore, PROF_BT_MAX}; + prof_unwind_data_t data = {bt, PROF_BT_MAX}; cassert(config_prof); @@ -216,25 +211,22 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore) } #elif (defined(JEMALLOC_PROF_GCC)) void -prof_backtrace(prof_bt_t *bt, unsigned nignore) +prof_backtrace(prof_bt_t *bt) { #define BT_FRAME(i) \ - if ((i) < nignore + PROF_BT_MAX) { \ + if ((i) < PROF_BT_MAX) { \ void *p; \ if (__builtin_frame_address(i) == 0) \ return; \ p = __builtin_return_address(i); \ if (p == NULL) \ return; \ - if (i >= nignore) { \ - bt->vec[(i) - nignore] = p; \ - bt->len = (i) - nignore + 1; \ - } \ + bt->vec[(i)] = p; \ + bt->len = (i) + 1; \ } else \ return; cassert(config_prof); - assert(nignore <= 3); BT_FRAME(0) BT_FRAME(1) @@ -376,16 +368,11 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore) BT_FRAME(125) BT_FRAME(126) BT_FRAME(127) - - /* Extras to compensate for nignore. */ - BT_FRAME(128) - BT_FRAME(129) - BT_FRAME(130) #undef BT_FRAME } #else void -prof_backtrace(prof_bt_t *bt, unsigned nignore) +prof_backtrace(prof_bt_t *bt) { cassert(config_prof); From a344dd01c74a7e385087819046105f689931905d Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 1 May 2014 15:51:30 -0700 Subject: [PATCH 0437/3378] Fix coding sytle nits. --- src/jemalloc.c | 8 ++++---- test/include/test/test.h | 2 +- test/src/test.c | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/jemalloc.c b/src/jemalloc.c index f1dda758..289d7f74 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1346,10 +1346,10 @@ je_valloc(size_t size) * passed an extra argument for the caller return address, which will be * ignored. */ -JEMALLOC_EXPORT void (* __free_hook)(void *ptr) = je_free; -JEMALLOC_EXPORT void *(* __malloc_hook)(size_t size) = je_malloc; -JEMALLOC_EXPORT void *(* __realloc_hook)(void *ptr, size_t size) = je_realloc; -JEMALLOC_EXPORT void *(* __memalign_hook)(size_t alignment, size_t size) = +JEMALLOC_EXPORT void (*__free_hook)(void *ptr) = je_free; +JEMALLOC_EXPORT void *(*__malloc_hook)(size_t size) = je_malloc; +JEMALLOC_EXPORT void *(*__realloc_hook)(void *ptr, size_t size) = je_realloc; +JEMALLOC_EXPORT void *(*__memalign_hook)(size_t alignment, size_t size) = je_memalign; #endif diff --git a/test/include/test/test.h b/test/include/test/test.h index a32ec07c..161fafdf 100644 --- a/test/include/test/test.h +++ b/test/include/test/test.h @@ -323,7 +323,7 @@ void test_skip(const char *format, ...) JEMALLOC_ATTR(format(printf, 1, 2)); void test_fail(const char *format, ...) JEMALLOC_ATTR(format(printf, 1, 2)); /* For private use by macros. */ -test_status_t p_test(test_t* t, ...); +test_status_t p_test(test_t *t, ...); void p_test_init(const char *name); void p_test_fini(void); void p_test_fail(const char *prefix, const char *message); diff --git a/test/src/test.c b/test/src/test.c index 528d8583..3acf8454 100644 --- a/test/src/test.c +++ b/test/src/test.c @@ -61,13 +61,13 @@ p_test_fini(void) } test_status_t -p_test(test_t* t, ...) +p_test(test_t *t, ...) { test_status_t ret = test_status_pass; va_list ap; va_start(ap, t); - for (; t != NULL; t = va_arg(ap, test_t*)) { + for (; t != NULL; t = va_arg(ap, test_t *)) { t(); if (test_status > ret) ret = test_status; From 74b1ea5ce09c8455f35da0fbbd41f678708151d8 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Wed, 7 May 2014 17:58:54 -0400 Subject: [PATCH 0438/3378] fix git handling of newlines on windows By default, git will coerce LF to CRLF when files are checked out on Windows. This causes hard to diagnose errors when compiling with mingw-w64 from Windows rather than cross-compiling. --- .gitattributes | 1 + 1 file changed, 1 insertion(+) create mode 100644 .gitattributes diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000..6313b56c --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +* text=auto eol=lf From fb7fe50a88ca9bde74e9a401ae17ad3b15bbae28 Mon Sep 17 00:00:00 2001 From: aravind Date: Mon, 5 May 2014 15:16:56 -0700 Subject: [PATCH 0439/3378] Add support for user-specified chunk allocators/deallocators. Add new mallctl endpoints "arena.chunk.alloc" and "arena.chunk.dealloc" to allow userspace to configure jemalloc's chunk allocator and deallocator on a per-arena basis. --- Makefile.in | 3 +- doc/jemalloc.xml.in | 63 +++++++++++++++++++ include/jemalloc/internal/arena.h | 6 ++ include/jemalloc/internal/chunk.h | 8 ++- include/jemalloc/internal/extent.h | 3 + include/jemalloc/internal/huge.h | 10 +-- .../jemalloc/internal/jemalloc_internal.h.in | 14 +++-- include/jemalloc/internal/private_symbols.txt | 1 + include/jemalloc/jemalloc_protos.h.in | 3 + src/arena.c | 8 ++- src/base.c | 2 +- src/chunk.c | 58 ++++++++++++----- src/ctl.c | 61 +++++++++++++++++- src/huge.c | 25 ++++---- src/jemalloc.c | 2 +- test/integration/chunk.c | 61 ++++++++++++++++++ 16 files changed, 283 insertions(+), 45 deletions(-) create mode 100644 test/integration/chunk.c diff --git a/Makefile.in b/Makefile.in index e411804a..800dd08d 100644 --- a/Makefile.in +++ b/Makefile.in @@ -142,7 +142,8 @@ TESTS_INTEGRATION := $(srcroot)test/integration/aligned_alloc.c \ $(srcroot)test/integration/rallocx.c \ $(srcroot)test/integration/thread_arena.c \ $(srcroot)test/integration/thread_tcache_enabled.c \ - $(srcroot)test/integration/xallocx.c + $(srcroot)test/integration/xallocx.c \ + $(srcroot)test/integration/chunk.c TESTS_STRESS := TESTS := $(TESTS_UNIT) $(TESTS_INTEGRATION) $(TESTS_STRESS) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 78e9b3c6..a7c38b55 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -1283,6 +1283,69 @@ malloc_conf = "xmalloc:true";]]> + + + arena.<i>.chunk.alloc + (chunk_alloc_t *) + rw + + Get or set the chunk allocation function for arena + <i>. If setting, the chunk deallocation function should + also be set via + arena.<i>.chunk.dealloc to a companion + function that knows how to deallocate the chunks. + + typedef void *(chunk_alloc_t) + size_t size + size_t alignment + bool *zero + unsigned arena_ind + + A chunk allocation function conforms to the chunk_alloc_t + type and upon success returns a pointer to size + bytes of memory on behalf of arena arena_ind such + that the chunk's base address is a multiple of + alignment, as well as setting + *zero to indicate whether the chunk is zeroed. + Upon error the function returns NULL and leaves + *zero unmodified. The + size parameter is always a multiple of the chunk + size. The alignment parameter is always a power + of two at least as large as the chunk size. Zeroing is mandatory if + *zero is true upon function + entry. + + + + + arena.<i>.chunk.dealloc + (chunk_dealloc_t *) + rw + + Get or set the chunk deallocation function for arena + <i>. If setting, the chunk deallocation function must + be capable of deallocating all extant chunks associated with arena + <i>, usually by passing unknown chunks to the deallocation + function that was replaced. In practice, it is feasible to control + allocation for arenas created via arenas.extend such + that all chunks originate from an application-supplied chunk allocator + (by setting custom chunk allocation/deallocation functions just after + arena creation), but the automatically created arenas may have already + created chunks prior to the application having an opportunity to take + over chunk allocation. + + typedef void (chunk_dealloc_t) + void *chunk + size_t size + unsigned arena_ind + + A chunk deallocation function conforms to the + chunk_dealloc_t type and deallocates a + chunk of given size on + behalf of arena arena_ind. + + arenas.narenas diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 605a87e5..d50159b3 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -370,6 +370,12 @@ struct arena_s { */ arena_avail_tree_t runs_avail; + /* + * user-configureable chunk allocation and deallocation functions. + */ + chunk_alloc_t *chunk_alloc; + chunk_dealloc_t *chunk_dealloc; + /* bins is used to store trees of free regions. */ arena_bin_t bins[NBINS]; }; diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h index 87d8700d..cea0e8ae 100644 --- a/include/jemalloc/internal/chunk.h +++ b/include/jemalloc/internal/chunk.h @@ -43,10 +43,12 @@ extern size_t chunk_npages; extern size_t map_bias; /* Number of arena chunk header pages. */ extern size_t arena_maxclass; /* Max size class for arenas. */ -void *chunk_alloc(size_t size, size_t alignment, bool base, bool *zero, - dss_prec_t dss_prec); +void *chunk_alloc(arena_t *arena, size_t size, size_t alignment, bool base, + bool *zero, dss_prec_t dss_prec); +void *chunk_alloc_default(size_t size, size_t alignment, bool *zero, + unsigned arena_ind); void chunk_unmap(void *chunk, size_t size); -void chunk_dealloc(void *chunk, size_t size, bool unmap); +void chunk_dealloc(arena_t *arena, void *chunk, size_t size, bool unmap); bool chunk_boot(void); void chunk_prefork(void); void chunk_postfork_parent(void); diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h index ba95ca81..000ef6d5 100644 --- a/include/jemalloc/internal/extent.h +++ b/include/jemalloc/internal/extent.h @@ -24,6 +24,9 @@ struct extent_node_s { /* Total region size. */ size_t size; + /* Arena from which this extent came, if any */ + arena_t *arena; + /* True if zero-filled; used by chunk recycling code. */ bool zeroed; }; diff --git a/include/jemalloc/internal/huge.h b/include/jemalloc/internal/huge.h index a2b9c779..ab8d44a2 100644 --- a/include/jemalloc/internal/huge.h +++ b/include/jemalloc/internal/huge.h @@ -17,13 +17,15 @@ extern size_t huge_allocated; /* Protects chunk-related data structures. */ extern malloc_mutex_t huge_mtx; -void *huge_malloc(size_t size, bool zero, dss_prec_t dss_prec); -void *huge_palloc(size_t size, size_t alignment, bool zero, +void *huge_malloc(arena_t *arena, size_t size, bool zero, + dss_prec_t dss_prec); +void *huge_palloc(arena_t *arena, size_t size, size_t alignment, bool zero, dss_prec_t dss_prec); bool huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra); -void *huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, - size_t alignment, bool zero, bool try_tcache_dalloc, dss_prec_t dss_prec); +void *huge_ralloc(arena_t *arena, void *ptr, size_t oldsize, size_t size, + size_t extra, size_t alignment, bool zero, bool try_tcache_dalloc, + dss_prec_t dss_prec); #ifdef JEMALLOC_JET typedef void (huge_dalloc_junk_t)(void *, size_t); extern huge_dalloc_junk_t *huge_dalloc_junk; diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index dc77b5a1..9e779c65 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -702,7 +702,8 @@ imalloct(size_t size, bool try_tcache, arena_t *arena) if (size <= arena_maxclass) return (arena_malloc(arena, size, false, try_tcache)); else - return (huge_malloc(size, false, huge_dss_prec_get(arena))); + return (huge_malloc(arena, size, false, + huge_dss_prec_get(arena))); } JEMALLOC_ALWAYS_INLINE void * @@ -719,7 +720,8 @@ icalloct(size_t size, bool try_tcache, arena_t *arena) if (size <= arena_maxclass) return (arena_malloc(arena, size, true, try_tcache)); else - return (huge_malloc(size, true, huge_dss_prec_get(arena))); + return (huge_malloc(arena, size, true, + huge_dss_prec_get(arena))); } JEMALLOC_ALWAYS_INLINE void * @@ -745,9 +747,11 @@ ipalloct(size_t usize, size_t alignment, bool zero, bool try_tcache, ret = arena_palloc(choose_arena(arena), usize, alignment, zero); } else if (alignment <= chunksize) - ret = huge_malloc(usize, zero, huge_dss_prec_get(arena)); + ret = huge_malloc(arena, usize, zero, + huge_dss_prec_get(arena)); else - ret = huge_palloc(usize, alignment, zero, huge_dss_prec_get(arena)); + ret = huge_palloc(arena, usize, alignment, zero, + huge_dss_prec_get(arena)); } assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret); @@ -915,7 +919,7 @@ iralloct(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, alignment, zero, try_tcache_alloc, try_tcache_dalloc)); } else { - return (huge_ralloc(ptr, oldsize, size, extra, + return (huge_ralloc(arena, ptr, oldsize, size, extra, alignment, zero, try_tcache_dalloc, huge_dss_prec_get(arena))); } } diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index ccbb3a90..589b56a1 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -104,6 +104,7 @@ buferror choose_arena choose_arena_hard chunk_alloc +chunk_alloc_default chunk_alloc_dss chunk_alloc_mmap chunk_boot diff --git a/include/jemalloc/jemalloc_protos.h.in b/include/jemalloc/jemalloc_protos.h.in index 59aeee11..8e945fa5 100644 --- a/include/jemalloc/jemalloc_protos.h.in +++ b/include/jemalloc/jemalloc_protos.h.in @@ -44,3 +44,6 @@ JEMALLOC_EXPORT void * @je_@memalign(size_t alignment, size_t size) #ifdef JEMALLOC_OVERRIDE_VALLOC JEMALLOC_EXPORT void * @je_@valloc(size_t size) JEMALLOC_ATTR(malloc); #endif + +typedef void *(chunk_alloc_t)(size_t, size_t, bool *, unsigned); +typedef bool (chunk_dealloc_t)(void *, size_t, unsigned); diff --git a/src/arena.c b/src/arena.c index d956be3e..6db2b630 100644 --- a/src/arena.c +++ b/src/arena.c @@ -570,8 +570,8 @@ arena_chunk_init_hard(arena_t *arena) zero = false; malloc_mutex_unlock(&arena->lock); - chunk = (arena_chunk_t *)chunk_alloc(chunksize, chunksize, false, - &zero, arena->dss_prec); + chunk = (arena_chunk_t *)chunk_alloc(arena, chunksize, chunksize, + false, &zero, arena->dss_prec); malloc_mutex_lock(&arena->lock); if (chunk == NULL) return (NULL); @@ -668,7 +668,7 @@ arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk) arena->spare = chunk; malloc_mutex_unlock(&arena->lock); - chunk_dealloc((void *)spare, chunksize, true); + chunk_dealloc(arena, (void *)spare, chunksize, true); malloc_mutex_lock(&arena->lock); if (config_stats) arena->stats.mapped -= chunksize; @@ -2319,6 +2319,8 @@ arena_new(arena_t *arena, unsigned ind) arena->ind = ind; arena->nthreads = 0; + arena->chunk_alloc = chunk_alloc_default; + arena->chunk_dealloc = (chunk_dealloc_t *)chunk_unmap; if (malloc_mutex_init(&arena->lock)) return (true); diff --git a/src/base.c b/src/base.c index 03dcf8f4..e8b312ef 100644 --- a/src/base.c +++ b/src/base.c @@ -32,7 +32,7 @@ base_pages_alloc(size_t minsize) assert(minsize != 0); csize = CHUNK_CEILING(minsize); zero = false; - base_pages = chunk_alloc(csize, chunksize, true, &zero, + base_pages = chunk_alloc(NULL, csize, chunksize, true, &zero, chunk_dss_prec_get()); if (base_pages == NULL) return (true); diff --git a/src/chunk.c b/src/chunk.c index 246324a2..8bb07229 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -104,7 +104,7 @@ chunk_recycle(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, size_t size, malloc_mutex_unlock(&chunks_mtx); node = base_node_alloc(); if (node == NULL) { - chunk_dealloc(ret, size, true); + chunk_dealloc(NULL, ret, size, true); return (NULL); } malloc_mutex_lock(&chunks_mtx); @@ -141,8 +141,8 @@ chunk_recycle(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, size_t size, * takes advantage of this to avoid demanding zeroed chunks, but taking * advantage of them if they are returned. */ -void * -chunk_alloc(size_t size, size_t alignment, bool base, bool *zero, +static void * +chunk_alloc_core(size_t size, size_t alignment, bool base, bool *zero, dss_prec_t dss_prec) { void *ret; @@ -156,32 +156,56 @@ chunk_alloc(size_t size, size_t alignment, bool base, bool *zero, if (have_dss && dss_prec == dss_prec_primary) { if ((ret = chunk_recycle(&chunks_szad_dss, &chunks_ad_dss, size, alignment, base, zero)) != NULL) - goto label_return; + return (ret); if ((ret = chunk_alloc_dss(size, alignment, zero)) != NULL) - goto label_return; + return (ret); } /* mmap. */ if ((ret = chunk_recycle(&chunks_szad_mmap, &chunks_ad_mmap, size, alignment, base, zero)) != NULL) - goto label_return; + return (ret); if ((ret = chunk_alloc_mmap(size, alignment, zero)) != NULL) - goto label_return; + return (ret); /* "secondary" dss. */ if (have_dss && dss_prec == dss_prec_secondary) { if ((ret = chunk_recycle(&chunks_szad_dss, &chunks_ad_dss, size, alignment, base, zero)) != NULL) - goto label_return; + return (ret); if ((ret = chunk_alloc_dss(size, alignment, zero)) != NULL) - goto label_return; + return (ret); } /* All strategies for allocation failed. */ - ret = NULL; -label_return: + return (NULL); +} + +/* + * Default arena chunk allocation routine in the absence of user-override. + */ +void * +chunk_alloc_default(size_t size, size_t alignment, bool *zero, + unsigned arena_ind) +{ + + return (chunk_alloc_core(size, alignment, false, zero, + arenas[arena_ind]->dss_prec)); +} + +void * +chunk_alloc(arena_t *arena, size_t size, size_t alignment, bool base, + bool *zero, dss_prec_t dss_prec) +{ + void *ret; + + if (arena) + ret = arena->chunk_alloc(size, alignment, zero, arena->ind); + else + ret = chunk_alloc_core(size, alignment, base, zero, dss_prec); + if (ret != NULL) { if (config_ivsalloc && base == false) { if (rtree_set(chunks_rtree, (uintptr_t)ret, 1)) { - chunk_dealloc(ret, size, true); + chunk_dealloc(arena, ret, size, true); return (NULL); } } @@ -312,7 +336,7 @@ chunk_unmap(void *chunk, size_t size) } void -chunk_dealloc(void *chunk, size_t size, bool unmap) +chunk_dealloc(arena_t *arena, void *chunk, size_t size, bool unmap) { assert(chunk != NULL); @@ -329,8 +353,12 @@ chunk_dealloc(void *chunk, size_t size, bool unmap) malloc_mutex_unlock(&chunks_mtx); } - if (unmap) - chunk_unmap(chunk, size); + if (unmap) { + if (arena) + arena->chunk_dealloc(chunk, size, arena->ind); + else + chunk_unmap(chunk, size); + } } bool diff --git a/src/ctl.c b/src/ctl.c index 9ee5de9f..395c32a1 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -113,6 +113,8 @@ CTL_PROTO(opt_prof_accum) CTL_PROTO(arena_i_purge) static void arena_purge(unsigned arena_ind); CTL_PROTO(arena_i_dss) +CTL_PROTO(arena_i_chunk_alloc) +CTL_PROTO(arena_i_chunk_dealloc) INDEX_PROTO(arena_i) CTL_PROTO(arenas_bin_i_size) CTL_PROTO(arenas_bin_i_nregs) @@ -251,9 +253,15 @@ static const ctl_named_node_t opt_node[] = { {NAME("prof_accum"), CTL(opt_prof_accum)} }; +static const ctl_named_node_t chunk_node[] = { + {NAME("alloc"), CTL(arena_i_chunk_alloc)}, + {NAME("dealloc"), CTL(arena_i_chunk_dealloc)} +}; + static const ctl_named_node_t arena_i_node[] = { {NAME("purge"), CTL(arena_i_purge)}, - {NAME("dss"), CTL(arena_i_dss)} + {NAME("dss"), CTL(arena_i_dss)}, + {NAME("chunk"), CHILD(named, chunk)}, }; static const ctl_named_node_t super_arena_i_node[] = { {NAME(""), CHILD(named, arena_i)} @@ -1368,6 +1376,57 @@ label_return: return (ret); } +static int +arena_i_chunk_alloc_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) +{ + int ret; + unsigned arena_ind = mib[1]; + arena_t *arena; + + malloc_mutex_lock(&ctl_mtx); + if (arena_ind < narenas_total && (arena = arenas[arena_ind]) != NULL) { + malloc_mutex_lock(&arena->lock); + READ(arena->chunk_alloc, chunk_alloc_t *); + WRITE(arena->chunk_alloc, chunk_alloc_t *); + } else { + ret = EFAULT; + goto label_outer_return; + } + ret = 0; +label_return: + malloc_mutex_unlock(&arena->lock); +label_outer_return: + malloc_mutex_unlock(&ctl_mtx); + return (ret); +} + +static int +arena_i_chunk_dealloc_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) +{ + + int ret; + unsigned arena_ind = mib[1]; + arena_t *arena; + + malloc_mutex_lock(&ctl_mtx); + if (arena_ind < narenas_total && (arena = arenas[arena_ind]) != NULL) { + malloc_mutex_lock(&arena->lock); + READ(arena->chunk_dealloc, chunk_dealloc_t *); + WRITE(arena->chunk_dealloc, chunk_dealloc_t *); + } else { + ret = EFAULT; + goto label_outer_return; + } + ret = 0; +label_return: + malloc_mutex_unlock(&arena->lock); +label_outer_return: + malloc_mutex_unlock(&ctl_mtx); + return (ret); +} + static const ctl_named_node_t * arena_i_index(const size_t *mib, size_t miblen, size_t i) { diff --git a/src/huge.c b/src/huge.c index e725fd90..ab05c905 100644 --- a/src/huge.c +++ b/src/huge.c @@ -16,14 +16,15 @@ malloc_mutex_t huge_mtx; static extent_tree_t huge; void * -huge_malloc(size_t size, bool zero, dss_prec_t dss_prec) +huge_malloc(arena_t *arena, size_t size, bool zero, dss_prec_t dss_prec) { - return (huge_palloc(size, chunksize, zero, dss_prec)); + return (huge_palloc(arena, size, chunksize, zero, dss_prec)); } void * -huge_palloc(size_t size, size_t alignment, bool zero, dss_prec_t dss_prec) +huge_palloc(arena_t *arena, size_t size, size_t alignment, bool zero, + dss_prec_t dss_prec) { void *ret; size_t csize; @@ -48,7 +49,7 @@ huge_palloc(size_t size, size_t alignment, bool zero, dss_prec_t dss_prec) * it is possible to make correct junk/zero fill decisions below. */ is_zeroed = zero; - ret = chunk_alloc(csize, alignment, false, &is_zeroed, dss_prec); + ret = chunk_alloc(arena, csize, alignment, false, &is_zeroed, dss_prec); if (ret == NULL) { base_node_dealloc(node); return (NULL); @@ -57,6 +58,7 @@ huge_palloc(size_t size, size_t alignment, bool zero, dss_prec_t dss_prec) /* Insert node into huge. */ node->addr = ret; node->size = csize; + node->arena = arena; malloc_mutex_lock(&huge_mtx); extent_tree_ad_insert(&huge, node); @@ -96,8 +98,9 @@ huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra) } void * -huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, - size_t alignment, bool zero, bool try_tcache_dalloc, dss_prec_t dss_prec) +huge_ralloc(arena_t *arena, void *ptr, size_t oldsize, size_t size, + size_t extra, size_t alignment, bool zero, bool try_tcache_dalloc, + dss_prec_t dss_prec) { void *ret; size_t copysize; @@ -112,18 +115,18 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, * space and copying. */ if (alignment > chunksize) - ret = huge_palloc(size + extra, alignment, zero, dss_prec); + ret = huge_palloc(arena, size + extra, alignment, zero, dss_prec); else - ret = huge_malloc(size + extra, zero, dss_prec); + ret = huge_malloc(arena, size + extra, zero, dss_prec); if (ret == NULL) { if (extra == 0) return (NULL); /* Try again, this time without extra. */ if (alignment > chunksize) - ret = huge_palloc(size, alignment, zero, dss_prec); + ret = huge_palloc(arena, size, alignment, zero, dss_prec); else - ret = huge_malloc(size, zero, dss_prec); + ret = huge_malloc(arena, size, zero, dss_prec); if (ret == NULL) return (NULL); @@ -238,7 +241,7 @@ huge_dalloc(void *ptr, bool unmap) if (unmap) huge_dalloc_junk(node->addr, node->size); - chunk_dealloc(node->addr, node->size, unmap); + chunk_dealloc(node->arena, node->addr, node->size, unmap); base_node_dealloc(node); } diff --git a/src/jemalloc.c b/src/jemalloc.c index 289d7f74..e0f9275f 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1983,7 +1983,7 @@ a0alloc(size_t size, bool zero) if (size <= arena_maxclass) return (arena_malloc(arenas[0], size, zero, false)); else - return (huge_malloc(size, zero, huge_dss_prec_get(arenas[0]))); + return (huge_malloc(NULL, size, zero, huge_dss_prec_get(arenas[0]))); } void * diff --git a/test/integration/chunk.c b/test/integration/chunk.c new file mode 100644 index 00000000..13659894 --- /dev/null +++ b/test/integration/chunk.c @@ -0,0 +1,61 @@ +#include "test/jemalloc_test.h" + +chunk_alloc_t *old_alloc; +chunk_dealloc_t *old_dealloc; + +bool +chunk_dealloc(void *chunk, size_t size, unsigned arena_ind) +{ + + return (old_dealloc(chunk, size, arena_ind)); +} + +void * +chunk_alloc(size_t size, size_t alignment, bool *zero, unsigned arena_ind) +{ + + return (old_alloc(size, alignment, zero, arena_ind)); +} + +TEST_BEGIN(test_chunk) +{ + void *p; + chunk_alloc_t *new_alloc; + chunk_dealloc_t *new_dealloc; + size_t old_size, new_size; + + new_alloc = chunk_alloc; + new_dealloc = chunk_dealloc; + old_size = sizeof(chunk_alloc_t *); + new_size = sizeof(chunk_alloc_t *); + + assert_d_eq(mallctl("arena.0.chunk.alloc", &old_alloc, + &old_size, &new_alloc, new_size), 0, + "Unexpected alloc error"); + assert_ptr_ne(old_alloc, new_alloc, + "Unexpected alloc error"); + assert_d_eq(mallctl("arena.0.chunk.dealloc", &old_dealloc, + &old_size, &new_dealloc, new_size), 0, + "Unexpected dealloc error"); + assert_ptr_ne(old_dealloc, new_dealloc, + "Unexpected dealloc error"); + + p = mallocx(42, 0); + assert_ptr_ne(p, NULL, "Unexpected alloc error"); + free(p); + + assert_d_eq(mallctl("arena.0.chunk.alloc", NULL, + NULL, &old_alloc, old_size), 0, + "Unexpected alloc error"); + assert_d_eq(mallctl("arena.0.chunk.dealloc", NULL, + NULL, &old_dealloc, old_size), 0, + "Unexpected dealloc error"); +} +TEST_END + +int +main(void) +{ + + return (test(test_chunk)); +} From e2deab7a751c8080c2b2cdcfd7b11887332be1bb Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 15 May 2014 22:22:27 -0700 Subject: [PATCH 0440/3378] Refactor huge allocation to be managed by arenas. Refactor huge allocation to be managed by arenas (though the global red-black tree of huge allocations remains for lookup during deallocation). This is the logical conclusion of recent changes that 1) made per arena dss precedence apply to huge allocation, and 2) made it possible to replace the per arena chunk allocation/deallocation functions. Remove the top level huge stats, and replace them with per arena huge stats. Normalize function names and types to *dalloc* (some were *dealloc*). Remove the --enable-mremap option. As jemalloc currently operates, this is a performace regression for some applications, but planned work to logarithmically space huge size classes should provide similar amortized performance. The motivation for this change was that mremap-based huge reallocation forced leaky abstractions that prevented refactoring. --- INSTALL | 6 - Makefile.in | 1 - configure.ac | 28 ---- doc/jemalloc.xml.in | 128 +++++++-------- include/jemalloc/internal/arena.h | 7 +- include/jemalloc/internal/base.h | 2 +- include/jemalloc/internal/chunk.h | 8 +- include/jemalloc/internal/chunk_mmap.h | 2 +- include/jemalloc/internal/ctl.h | 5 - include/jemalloc/internal/huge.h | 20 +-- .../jemalloc/internal/jemalloc_internal.h.in | 23 +-- .../internal/jemalloc_internal_defs.h.in | 7 - include/jemalloc/internal/private_symbols.txt | 13 +- include/jemalloc/internal/stats.h | 5 + include/jemalloc/jemalloc_protos.h.in | 2 +- src/arena.c | 113 +++++++++++-- src/base.c | 12 +- src/chunk.c | 153 ++++++++++-------- src/chunk_mmap.c | 2 +- src/ctl.c | 68 ++++---- src/huge.c | 120 +++----------- src/jemalloc.c | 4 +- src/stats.c | 29 ++-- test/integration/chunk.c | 23 ++- test/integration/mremap.c | 45 ------ test/unit/junk.c | 9 +- test/unit/mallctl.c | 1 - test/unit/stats.c | 18 ++- 28 files changed, 384 insertions(+), 470 deletions(-) delete mode 100644 test/integration/mremap.c diff --git a/INSTALL b/INSTALL index 07f51d1e..2df667ca 100644 --- a/INSTALL +++ b/INSTALL @@ -132,12 +132,6 @@ any of the following arguments (not a definitive list) to 'configure': released in bulk, thus reducing the total number of mutex operations. See the "opt.tcache" option for usage details. ---enable-mremap - Enable huge realloc() via mremap(2). mremap() is disabled by default - because the flavor used is specific to Linux, which has a quirk in its - virtual memory allocation algorithm that causes semi-permanent VM map holes - under normal jemalloc operation. - --disable-munmap Disable virtual memory deallocation via munmap(2); instead keep track of the virtual memory for later use. munmap() is disabled by default (i.e. diff --git a/Makefile.in b/Makefile.in index 800dd08d..90869eb8 100644 --- a/Makefile.in +++ b/Makefile.in @@ -137,7 +137,6 @@ TESTS_INTEGRATION := $(srcroot)test/integration/aligned_alloc.c \ $(srcroot)test/integration/allocated.c \ $(srcroot)test/integration/mallocx.c \ $(srcroot)test/integration/MALLOCX_ARENA.c \ - $(srcroot)test/integration/mremap.c \ $(srcroot)test/integration/posix_memalign.c \ $(srcroot)test/integration/rallocx.c \ $(srcroot)test/integration/thread_arena.c \ diff --git a/configure.ac b/configure.ac index eb9ca45c..57015d1d 100644 --- a/configure.ac +++ b/configure.ac @@ -793,33 +793,6 @@ if test "x$enable_tcache" = "x1" ; then fi AC_SUBST([enable_tcache]) -dnl Disable mremap() for huge realloc() by default. -AC_ARG_ENABLE([mremap], - [AS_HELP_STRING([--enable-mremap], [Enable mremap(2) for huge realloc()])], -[if test "x$enable_mremap" = "xno" ; then - enable_mremap="0" -else - enable_mremap="1" -fi -], -[enable_mremap="0"] -) -if test "x$enable_mremap" = "x1" ; then - JE_COMPILABLE([mremap(...MREMAP_FIXED...)], [ -#define _GNU_SOURCE -#include -], [ -void *p = mremap((void *)0, 0, 0, MREMAP_MAYMOVE|MREMAP_FIXED, (void *)0); -], [je_cv_mremap_fixed]) - if test "x${je_cv_mremap_fixed}" = "xno" ; then - enable_mremap="0" - fi -fi -if test "x$enable_mremap" = "x1" ; then - AC_DEFINE([JEMALLOC_MREMAP], [ ]) -fi -AC_SUBST([enable_mremap]) - dnl Enable VM deallocation via munmap() by default. AC_ARG_ENABLE([munmap], [AS_HELP_STRING([--disable-munmap], [Disable VM deallocation via munmap(2)])], @@ -1447,7 +1420,6 @@ AC_MSG_RESULT([fill : ${enable_fill}]) AC_MSG_RESULT([utrace : ${enable_utrace}]) AC_MSG_RESULT([valgrind : ${enable_valgrind}]) AC_MSG_RESULT([xmalloc : ${enable_xmalloc}]) -AC_MSG_RESULT([mremap : ${enable_mremap}]) AC_MSG_RESULT([munmap : ${enable_munmap}]) AC_MSG_RESULT([lazy_lock : ${enable_lazy_lock}]) AC_MSG_RESULT([tls : ${enable_tls}]) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index a7c38b55..46e505fc 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -486,10 +486,11 @@ for (i = 0; i < nbins; i++) { User objects are broken into three categories according to size: small, large, and huge. Small objects are smaller than one page. Large objects are smaller than the chunk size. Huge objects are a multiple of - the chunk size. Small and large objects are managed by arenas; huge - objects are managed separately in a single data structure that is shared by - all threads. Huge objects are used by applications infrequently enough - that this single data structure is not a scalability issue. + the chunk size. Small and large objects are managed entirely by arenas; + huge objects are additionally aggregated in a single data structure that is + shared by all threads. Huge objects are typically used by applications + infrequently enough that this single data structure is not a scalability + issue. Each chunk that is managed by an arena tracks its contents as runs of contiguous pages (unused, backing a set of small objects, or backing one @@ -647,16 +648,6 @@ for (i = 0; i < nbins; i++) { during build configuration. - - - config.mremap - (bool) - r- - - was specified during - build configuration. - - config.munmap @@ -1273,14 +1264,9 @@ malloc_conf = "xmalloc:true";]]> Set the precedence of dss allocation as related to mmap allocation for arena <i>, or for all arenas if <i> equals arenas.narenas. Note - that even during huge allocation this setting is read from the arena - that would be chosen for small or large allocation so that applications - can depend on consistent dss versus mmap allocation regardless of - allocation size. See opt.dss for supported - settings. - + linkend="arenas.narenas">arenas.narenas. See + opt.dss for supported + settings. @@ -1291,8 +1277,8 @@ malloc_conf = "xmalloc:true";]]> Get or set the chunk allocation function for arena <i>. If setting, the chunk deallocation function should - also be set via - arena.<i>.chunk.dealloc to a companion + also be set via + arena.<i>.chunk.dalloc to a companion function that knows how to deallocate the chunks. typedef void *(chunk_alloc_t) @@ -1313,13 +1299,18 @@ malloc_conf = "xmalloc:true";]]> size. The alignment parameter is always a power of two at least as large as the chunk size. Zeroing is mandatory if *zero is true upon function - entry. + entry. + + Note that replacing the default chunk allocation function makes + the arena's arena.<i>.dss + setting irrelevant. - + - arena.<i>.chunk.dealloc - (chunk_dealloc_t *) + arena.<i>.chunk.dalloc + (chunk_dalloc_t *) rw Get or set the chunk deallocation function for arena @@ -1335,13 +1326,13 @@ malloc_conf = "xmalloc:true";]]> created chunks prior to the application having an opportunity to take over chunk allocation. - typedef void (chunk_dealloc_t) + typedef void (chunk_dalloc_t) void *chunk size_t size unsigned arena_ind A chunk deallocation function conforms to the - chunk_dealloc_t type and deallocates a + chunk_dalloc_t type and deallocates a chunk of given size on behalf of arena arena_ind. @@ -1608,39 +1599,6 @@ malloc_conf = "xmalloc:true";]]> - - - stats.huge.allocated - (size_t) - r- - [] - - Number of bytes currently allocated by huge objects. - - - - - - stats.huge.nmalloc - (uint64_t) - r- - [] - - Cumulative number of huge allocation requests. - - - - - - stats.huge.ndalloc - (uint64_t) - r- - [] - - Cumulative number of huge deallocation requests. - - - stats.arenas.<i>.dss @@ -1817,6 +1775,50 @@ malloc_conf = "xmalloc:true";]]> + + + stats.arenas.<i>.huge.allocated + (size_t) + r- + [] + + Number of bytes currently allocated by huge objects. + + + + + + stats.arenas.<i>.huge.nmalloc + (uint64_t) + r- + [] + + Cumulative number of huge allocation requests served + directly by the arena. + + + + + stats.arenas.<i>.huge.ndalloc + (uint64_t) + r- + [] + + Cumulative number of huge deallocation requests served + directly by the arena. + + + + + stats.arenas.<i>.huge.nrequests + (uint64_t) + r- + [] + + Cumulative number of huge allocation requests. + + + stats.arenas.<i>.bins.<j>.allocated diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index d50159b3..598a89b0 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -345,7 +345,7 @@ struct arena_s { */ arena_chunk_t *spare; - /* Number of pages in active runs. */ + /* Number of pages in active runs and huge regions. */ size_t nactive; /* @@ -374,7 +374,7 @@ struct arena_s { * user-configureable chunk allocation and deallocation functions. */ chunk_alloc_t *chunk_alloc; - chunk_dealloc_t *chunk_dealloc; + chunk_dalloc_t *chunk_dalloc; /* bins is used to store trees of free regions. */ arena_bin_t bins[NBINS]; @@ -403,6 +403,9 @@ extern arena_bin_info_t arena_bin_info[NBINS]; /* Number of large size classes. */ #define nlclasses (chunk_npages - map_bias) +void *arena_chunk_alloc_huge(arena_t *arena, size_t size, size_t alignment, + bool *zero); +void arena_chunk_dalloc_huge(arena_t *arena, void *chunk, size_t size); void arena_purge_all(arena_t *arena); void arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind, uint64_t prof_accumbytes); diff --git a/include/jemalloc/internal/base.h b/include/jemalloc/internal/base.h index 9cf75ffb..3fb80b92 100644 --- a/include/jemalloc/internal/base.h +++ b/include/jemalloc/internal/base.h @@ -12,7 +12,7 @@ void *base_alloc(size_t size); void *base_calloc(size_t number, size_t size); extent_node_t *base_node_alloc(void); -void base_node_dealloc(extent_node_t *node); +void base_node_dalloc(extent_node_t *node); bool base_boot(void); void base_prefork(void); void base_postfork_parent(void); diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h index cea0e8ae..f3bfbe08 100644 --- a/include/jemalloc/internal/chunk.h +++ b/include/jemalloc/internal/chunk.h @@ -43,12 +43,14 @@ extern size_t chunk_npages; extern size_t map_bias; /* Number of arena chunk header pages. */ extern size_t arena_maxclass; /* Max size class for arenas. */ -void *chunk_alloc(arena_t *arena, size_t size, size_t alignment, bool base, - bool *zero, dss_prec_t dss_prec); +void *chunk_alloc_base(size_t size); +void *chunk_alloc_arena(chunk_alloc_t *chunk_alloc, + chunk_dalloc_t *chunk_dalloc, unsigned arena_ind, size_t size, + size_t alignment, bool *zero); void *chunk_alloc_default(size_t size, size_t alignment, bool *zero, unsigned arena_ind); void chunk_unmap(void *chunk, size_t size); -void chunk_dealloc(arena_t *arena, void *chunk, size_t size, bool unmap); +bool chunk_dalloc_default(void *chunk, size_t size, unsigned arena_ind); bool chunk_boot(void); void chunk_prefork(void); void chunk_postfork_parent(void); diff --git a/include/jemalloc/internal/chunk_mmap.h b/include/jemalloc/internal/chunk_mmap.h index f24abac7..c5d5c6c0 100644 --- a/include/jemalloc/internal/chunk_mmap.h +++ b/include/jemalloc/internal/chunk_mmap.h @@ -12,7 +12,7 @@ bool pages_purge(void *addr, size_t length); void *chunk_alloc_mmap(size_t size, size_t alignment, bool *zero); -bool chunk_dealloc_mmap(void *chunk, size_t size); +bool chunk_dalloc_mmap(void *chunk, size_t size); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h index 0ffecc5f..2d301bf1 100644 --- a/include/jemalloc/internal/ctl.h +++ b/include/jemalloc/internal/ctl.h @@ -57,11 +57,6 @@ struct ctl_stats_s { uint64_t total; /* stats_chunks.nchunks */ size_t high; /* stats_chunks.highchunks */ } chunks; - struct { - size_t allocated; /* huge_allocated */ - uint64_t nmalloc; /* huge_nmalloc */ - uint64_t ndalloc; /* huge_ndalloc */ - } huge; unsigned narenas; ctl_arena_stats_t *arenas; /* (narenas + 1) elements. */ }; diff --git a/include/jemalloc/internal/huge.h b/include/jemalloc/internal/huge.h index ab8d44a2..1e545367 100644 --- a/include/jemalloc/internal/huge.h +++ b/include/jemalloc/internal/huge.h @@ -9,30 +9,18 @@ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -/* Huge allocation statistics. */ -extern uint64_t huge_nmalloc; -extern uint64_t huge_ndalloc; -extern size_t huge_allocated; - -/* Protects chunk-related data structures. */ -extern malloc_mutex_t huge_mtx; - -void *huge_malloc(arena_t *arena, size_t size, bool zero, - dss_prec_t dss_prec); -void *huge_palloc(arena_t *arena, size_t size, size_t alignment, bool zero, - dss_prec_t dss_prec); +void *huge_malloc(arena_t *arena, size_t size, bool zero); +void *huge_palloc(arena_t *arena, size_t size, size_t alignment, bool zero); bool huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra); void *huge_ralloc(arena_t *arena, void *ptr, size_t oldsize, size_t size, - size_t extra, size_t alignment, bool zero, bool try_tcache_dalloc, - dss_prec_t dss_prec); + size_t extra, size_t alignment, bool zero, bool try_tcache_dalloc); #ifdef JEMALLOC_JET typedef void (huge_dalloc_junk_t)(void *, size_t); extern huge_dalloc_junk_t *huge_dalloc_junk; #endif -void huge_dalloc(void *ptr, bool unmap); +void huge_dalloc(void *ptr); size_t huge_salloc(const void *ptr); -dss_prec_t huge_dss_prec_get(arena_t *arena); prof_ctx_t *huge_prof_ctx_get(const void *ptr); void huge_prof_ctx_set(const void *ptr, prof_ctx_t *ctx); bool huge_boot(void); diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 9e779c65..c9462e52 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -122,13 +122,6 @@ static const bool config_prof_libunwind = false #endif ; -static const bool config_mremap = -#ifdef JEMALLOC_MREMAP - true -#else - false -#endif - ; static const bool config_munmap = #ifdef JEMALLOC_MUNMAP true @@ -702,8 +695,7 @@ imalloct(size_t size, bool try_tcache, arena_t *arena) if (size <= arena_maxclass) return (arena_malloc(arena, size, false, try_tcache)); else - return (huge_malloc(arena, size, false, - huge_dss_prec_get(arena))); + return (huge_malloc(arena, size, false)); } JEMALLOC_ALWAYS_INLINE void * @@ -720,8 +712,7 @@ icalloct(size_t size, bool try_tcache, arena_t *arena) if (size <= arena_maxclass) return (arena_malloc(arena, size, true, try_tcache)); else - return (huge_malloc(arena, size, true, - huge_dss_prec_get(arena))); + return (huge_malloc(arena, size, true)); } JEMALLOC_ALWAYS_INLINE void * @@ -747,11 +738,9 @@ ipalloct(size_t usize, size_t alignment, bool zero, bool try_tcache, ret = arena_palloc(choose_arena(arena), usize, alignment, zero); } else if (alignment <= chunksize) - ret = huge_malloc(arena, usize, zero, - huge_dss_prec_get(arena)); + ret = huge_malloc(arena, usize, zero); else - ret = huge_palloc(arena, usize, alignment, zero, - huge_dss_prec_get(arena)); + ret = huge_palloc(arena, usize, alignment, zero); } assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret); @@ -833,7 +822,7 @@ idalloct(void *ptr, bool try_tcache) if (chunk != ptr) arena_dalloc(chunk, ptr, try_tcache); else - huge_dalloc(ptr, true); + huge_dalloc(ptr); } JEMALLOC_ALWAYS_INLINE void @@ -920,7 +909,7 @@ iralloct(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, try_tcache_dalloc)); } else { return (huge_ralloc(arena, ptr, oldsize, size, extra, - alignment, zero, try_tcache_dalloc, huge_dss_prec_get(arena))); + alignment, zero, try_tcache_dalloc)); } } diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in index fc959671..09ddd4f3 100644 --- a/include/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in @@ -144,13 +144,6 @@ */ #undef JEMALLOC_MUNMAP -/* - * If defined, use mremap(...MREMAP_FIXED...) for huge realloc(). This is - * disabled by default because it is Linux-specific and it will cause virtual - * memory map holes, much like munmap(2) does. - */ -#undef JEMALLOC_MREMAP - /* TLS is used to map arenas and magazine caches to threads. */ #undef JEMALLOC_TLS diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index 589b56a1..f6c4fbcc 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -5,6 +5,8 @@ arena_alloc_junk_small arena_bin_index arena_bin_info arena_boot +arena_chunk_alloc_huge +arena_chunk_dalloc_huge arena_dalloc arena_dalloc_bin arena_dalloc_bin_locked @@ -86,7 +88,7 @@ base_alloc base_boot base_calloc base_node_alloc -base_node_dealloc +base_node_dalloc base_postfork_child base_postfork_parent base_prefork @@ -103,13 +105,14 @@ bt_init buferror choose_arena choose_arena_hard -chunk_alloc +chunk_alloc_arena +chunk_alloc_base chunk_alloc_default chunk_alloc_dss chunk_alloc_mmap chunk_boot -chunk_dealloc -chunk_dealloc_mmap +chunk_dalloc_default +chunk_dalloc_mmap chunk_dss_boot chunk_dss_postfork_child chunk_dss_postfork_parent @@ -198,9 +201,7 @@ huge_allocated huge_boot huge_dalloc huge_dalloc_junk -huge_dss_prec_get huge_malloc -huge_mtx huge_ndalloc huge_nmalloc huge_palloc diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h index 27f68e36..ce96476a 100644 --- a/include/jemalloc/internal/stats.h +++ b/include/jemalloc/internal/stats.h @@ -101,6 +101,11 @@ struct arena_stats_s { uint64_t ndalloc_large; uint64_t nrequests_large; + size_t allocated_huge; + uint64_t nmalloc_huge; + uint64_t ndalloc_huge; + uint64_t nrequests_huge; + /* * One element for each possible size class, including sizes that * overlap with bin size classes. This is necessary because ipalloc() diff --git a/include/jemalloc/jemalloc_protos.h.in b/include/jemalloc/jemalloc_protos.h.in index 8e945fa5..67268c47 100644 --- a/include/jemalloc/jemalloc_protos.h.in +++ b/include/jemalloc/jemalloc_protos.h.in @@ -46,4 +46,4 @@ JEMALLOC_EXPORT void * @je_@valloc(size_t size) JEMALLOC_ATTR(malloc); #endif typedef void *(chunk_alloc_t)(size_t, size_t, bool *, unsigned); -typedef bool (chunk_dealloc_t)(void *, size_t, unsigned); +typedef bool (chunk_dalloc_t)(void *, size_t, unsigned); diff --git a/src/arena.c b/src/arena.c index 6db2b630..f5d7d062 100644 --- a/src/arena.c +++ b/src/arena.c @@ -559,6 +559,65 @@ arena_chunk_init_spare(arena_t *arena) return (chunk); } +static arena_chunk_t * +arena_chunk_alloc_internal(arena_t *arena, size_t size, size_t alignment, + bool *zero) +{ + arena_chunk_t *chunk; + chunk_alloc_t *chunk_alloc; + chunk_dalloc_t *chunk_dalloc; + + chunk_alloc = arena->chunk_alloc; + chunk_dalloc = arena->chunk_dalloc; + malloc_mutex_unlock(&arena->lock); + chunk = (arena_chunk_t *)chunk_alloc_arena(chunk_alloc, chunk_dalloc, + arena->ind, size, alignment, zero); + malloc_mutex_lock(&arena->lock); + if (config_stats && chunk != NULL) + arena->stats.mapped += chunksize; + + return (chunk); +} + +void * +arena_chunk_alloc_huge(arena_t *arena, size_t size, size_t alignment, + bool *zero) +{ + void *ret; + chunk_alloc_t *chunk_alloc; + chunk_dalloc_t *chunk_dalloc; + + malloc_mutex_lock(&arena->lock); + chunk_alloc = arena->chunk_alloc; + chunk_dalloc = arena->chunk_dalloc; + if (config_stats) { + /* Optimistically update stats prior to unlocking. */ + arena->stats.mapped += size; + arena->stats.allocated_huge += size; + arena->stats.nmalloc_huge++; + arena->stats.nrequests_huge++; + } + arena->nactive += (size >> LG_PAGE); + malloc_mutex_unlock(&arena->lock); + + ret = chunk_alloc_arena(chunk_alloc, chunk_dalloc, arena->ind, + size, alignment, zero); + if (config_stats) { + if (ret != NULL) + stats_cactive_add(size); + else { + /* Revert optimistic stats updates. */ + malloc_mutex_lock(&arena->lock); + arena->stats.mapped -= size; + arena->stats.allocated_huge -= size; + arena->stats.nmalloc_huge--; + malloc_mutex_unlock(&arena->lock); + } + } + + return (ret); +} + static arena_chunk_t * arena_chunk_init_hard(arena_t *arena) { @@ -569,14 +628,9 @@ arena_chunk_init_hard(arena_t *arena) assert(arena->spare == NULL); zero = false; - malloc_mutex_unlock(&arena->lock); - chunk = (arena_chunk_t *)chunk_alloc(arena, chunksize, chunksize, - false, &zero, arena->dss_prec); - malloc_mutex_lock(&arena->lock); + chunk = arena_chunk_alloc_internal(arena, chunksize, chunksize, &zero); if (chunk == NULL) return (NULL); - if (config_stats) - arena->stats.mapped += chunksize; chunk->arena = arena; @@ -645,7 +699,38 @@ arena_chunk_alloc(arena_t *arena) } static void -arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk) +arena_chunk_dalloc_internal(arena_t *arena, arena_chunk_t *chunk) +{ + chunk_dalloc_t *chunk_dalloc; + + chunk_dalloc = arena->chunk_dalloc; + malloc_mutex_unlock(&arena->lock); + chunk_dalloc((void *)chunk, chunksize, arena->ind); + malloc_mutex_lock(&arena->lock); + if (config_stats) + arena->stats.mapped -= chunksize; +} + +void +arena_chunk_dalloc_huge(arena_t *arena, void *chunk, size_t size) +{ + chunk_dalloc_t *chunk_dalloc; + + malloc_mutex_lock(&arena->lock); + chunk_dalloc = arena->chunk_dalloc; + if (config_stats) { + arena->stats.mapped -= size; + arena->stats.allocated_huge -= size; + arena->stats.ndalloc_huge++; + stats_cactive_sub(size); + } + arena->nactive -= (size >> LG_PAGE); + malloc_mutex_unlock(&arena->lock); + chunk_dalloc(chunk, size, arena->ind); +} + +static void +arena_chunk_dalloc(arena_t *arena, arena_chunk_t *chunk) { assert(arena_mapbits_allocated_get(chunk, map_bias) == 0); assert(arena_mapbits_allocated_get(chunk, chunk_npages-1) == 0); @@ -667,11 +752,7 @@ arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk) arena_chunk_t *spare = arena->spare; arena->spare = chunk; - malloc_mutex_unlock(&arena->lock); - chunk_dealloc(arena, (void *)spare, chunksize, true); - malloc_mutex_lock(&arena->lock); - if (config_stats) - arena->stats.mapped -= chunksize; + arena_chunk_dalloc_internal(arena, spare); } else arena->spare = chunk; } @@ -1231,7 +1312,7 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned) if (size == arena_maxclass) { assert(run_ind == map_bias); assert(run_pages == (arena_maxclass >> LG_PAGE)); - arena_chunk_dealloc(arena, chunk); + arena_chunk_dalloc(arena, chunk); } /* @@ -2283,6 +2364,10 @@ arena_stats_merge(arena_t *arena, const char **dss, size_t *nactive, astats->nmalloc_large += arena->stats.nmalloc_large; astats->ndalloc_large += arena->stats.ndalloc_large; astats->nrequests_large += arena->stats.nrequests_large; + astats->allocated_huge += arena->stats.allocated_huge; + astats->nmalloc_huge += arena->stats.nmalloc_huge; + astats->ndalloc_huge += arena->stats.ndalloc_huge; + astats->nrequests_huge += arena->stats.nrequests_huge; for (i = 0; i < nlclasses; i++) { lstats[i].nmalloc += arena->stats.lstats[i].nmalloc; @@ -2320,7 +2405,7 @@ arena_new(arena_t *arena, unsigned ind) arena->ind = ind; arena->nthreads = 0; arena->chunk_alloc = chunk_alloc_default; - arena->chunk_dealloc = (chunk_dealloc_t *)chunk_unmap; + arena->chunk_dalloc = chunk_dalloc_default; if (malloc_mutex_init(&arena->lock)) return (true); diff --git a/src/base.c b/src/base.c index e8b312ef..409c7bb7 100644 --- a/src/base.c +++ b/src/base.c @@ -16,24 +16,16 @@ static void *base_next_addr; static void *base_past_addr; /* Addr immediately past base_pages. */ static extent_node_t *base_nodes; -/******************************************************************************/ -/* Function prototypes for non-inline static functions. */ - -static bool base_pages_alloc(size_t minsize); - /******************************************************************************/ static bool base_pages_alloc(size_t minsize) { size_t csize; - bool zero; assert(minsize != 0); csize = CHUNK_CEILING(minsize); - zero = false; - base_pages = chunk_alloc(NULL, csize, chunksize, true, &zero, - chunk_dss_prec_get()); + base_pages = chunk_alloc_base(csize); if (base_pages == NULL) return (true); base_next_addr = base_pages; @@ -100,7 +92,7 @@ base_node_alloc(void) } void -base_node_dealloc(extent_node_t *node) +base_node_dalloc(extent_node_t *node) { JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(node, sizeof(extent_node_t)); diff --git a/src/chunk.c b/src/chunk.c index 8bb07229..38d02868 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -31,13 +31,12 @@ size_t map_bias; size_t arena_maxclass; /* Max size class for arenas. */ /******************************************************************************/ -/* Function prototypes for non-inline static functions. */ +/* + * Function prototypes for static functions that are referenced prior to + * definition. + */ -static void *chunk_recycle(extent_tree_t *chunks_szad, - extent_tree_t *chunks_ad, size_t size, size_t alignment, bool base, - bool *zero); -static void chunk_record(extent_tree_t *chunks_szad, - extent_tree_t *chunks_ad, void *chunk, size_t size); +static void chunk_dalloc_core(void *chunk, size_t size); /******************************************************************************/ @@ -104,7 +103,7 @@ chunk_recycle(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, size_t size, malloc_mutex_unlock(&chunks_mtx); node = base_node_alloc(); if (node == NULL) { - chunk_dealloc(NULL, ret, size, true); + chunk_dalloc_core(ret, size); return (NULL); } malloc_mutex_lock(&chunks_mtx); @@ -119,7 +118,7 @@ chunk_recycle(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, size_t size, malloc_mutex_unlock(&chunks_mtx); if (node != NULL) - base_node_dealloc(node); + base_node_dalloc(node); if (*zero) { if (zeroed == false) memset(ret, 0, size); @@ -179,9 +178,73 @@ chunk_alloc_core(size_t size, size_t alignment, bool base, bool *zero, return (NULL); } -/* - * Default arena chunk allocation routine in the absence of user-override. - */ +static bool +chunk_register(void *chunk, size_t size, bool base) +{ + + assert(chunk != NULL); + assert(CHUNK_ADDR2BASE(chunk) == chunk); + + if (config_ivsalloc && base == false) { + if (rtree_set(chunks_rtree, (uintptr_t)chunk, 1)) + return (true); + } + if (config_stats || config_prof) { + bool gdump; + malloc_mutex_lock(&chunks_mtx); + if (config_stats) + stats_chunks.nchunks += (size / chunksize); + stats_chunks.curchunks += (size / chunksize); + if (stats_chunks.curchunks > stats_chunks.highchunks) { + stats_chunks.highchunks = + stats_chunks.curchunks; + if (config_prof) + gdump = true; + } else if (config_prof) + gdump = false; + malloc_mutex_unlock(&chunks_mtx); + if (config_prof && opt_prof && opt_prof_gdump && gdump) + prof_gdump(); + } + if (config_valgrind) + JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(chunk, size); + return (false); +} + +void * +chunk_alloc_base(size_t size) +{ + void *ret; + bool zero; + + zero = false; + ret = chunk_alloc_core(size, chunksize, true, &zero, + chunk_dss_prec_get()); + if (ret == NULL) + return (NULL); + if (chunk_register(ret, size, true)) { + chunk_dalloc_core(ret, size); + return (NULL); + } + return (ret); +} + +void * +chunk_alloc_arena(chunk_alloc_t *chunk_alloc, chunk_dalloc_t *chunk_dalloc, + unsigned arena_ind, size_t size, size_t alignment, bool *zero) +{ + void *ret; + + ret = chunk_alloc(size, alignment, zero, arena_ind); + if (ret != NULL && chunk_register(ret, size, false)) { + chunk_dalloc(ret, size, arena_ind); + ret = NULL; + } + + return (ret); +} + +/* Default arena chunk allocation routine in the absence of user override. */ void * chunk_alloc_default(size_t size, size_t alignment, bool *zero, unsigned arena_ind) @@ -191,48 +254,6 @@ chunk_alloc_default(size_t size, size_t alignment, bool *zero, arenas[arena_ind]->dss_prec)); } -void * -chunk_alloc(arena_t *arena, size_t size, size_t alignment, bool base, - bool *zero, dss_prec_t dss_prec) -{ - void *ret; - - if (arena) - ret = arena->chunk_alloc(size, alignment, zero, arena->ind); - else - ret = chunk_alloc_core(size, alignment, base, zero, dss_prec); - - if (ret != NULL) { - if (config_ivsalloc && base == false) { - if (rtree_set(chunks_rtree, (uintptr_t)ret, 1)) { - chunk_dealloc(arena, ret, size, true); - return (NULL); - } - } - if (config_stats || config_prof) { - bool gdump; - malloc_mutex_lock(&chunks_mtx); - if (config_stats) - stats_chunks.nchunks += (size / chunksize); - stats_chunks.curchunks += (size / chunksize); - if (stats_chunks.curchunks > stats_chunks.highchunks) { - stats_chunks.highchunks = - stats_chunks.curchunks; - if (config_prof) - gdump = true; - } else if (config_prof) - gdump = false; - malloc_mutex_unlock(&chunks_mtx); - if (config_prof && opt_prof && opt_prof_gdump && gdump) - prof_gdump(); - } - if (config_valgrind) - JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, size); - } - assert(CHUNK_ADDR2BASE(ret) == ret); - return (ret); -} - static void chunk_record(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, void *chunk, size_t size) @@ -316,9 +337,9 @@ label_return: * avoid potential deadlock. */ if (xnode != NULL) - base_node_dealloc(xnode); + base_node_dalloc(xnode); if (xprev != NULL) - base_node_dealloc(xprev); + base_node_dalloc(xprev); } void @@ -331,12 +352,12 @@ chunk_unmap(void *chunk, size_t size) if (have_dss && chunk_in_dss(chunk)) chunk_record(&chunks_szad_dss, &chunks_ad_dss, chunk, size); - else if (chunk_dealloc_mmap(chunk, size)) + else if (chunk_dalloc_mmap(chunk, size)) chunk_record(&chunks_szad_mmap, &chunks_ad_mmap, chunk, size); } -void -chunk_dealloc(arena_t *arena, void *chunk, size_t size, bool unmap) +static void +chunk_dalloc_core(void *chunk, size_t size) { assert(chunk != NULL); @@ -353,12 +374,16 @@ chunk_dealloc(arena_t *arena, void *chunk, size_t size, bool unmap) malloc_mutex_unlock(&chunks_mtx); } - if (unmap) { - if (arena) - arena->chunk_dealloc(chunk, size, arena->ind); - else - chunk_unmap(chunk, size); - } + chunk_unmap(chunk, size); +} + +/* Default arena chunk deallocation routine in the absence of user override. */ +bool +chunk_dalloc_default(void *chunk, size_t size, unsigned arena_ind) +{ + + chunk_dalloc_core(chunk, size); + return (false); } bool diff --git a/src/chunk_mmap.c b/src/chunk_mmap.c index 2056d793..f960e068 100644 --- a/src/chunk_mmap.c +++ b/src/chunk_mmap.c @@ -200,7 +200,7 @@ chunk_alloc_mmap(size_t size, size_t alignment, bool *zero) } bool -chunk_dealloc_mmap(void *chunk, size_t size) +chunk_dalloc_mmap(void *chunk, size_t size) { if (config_munmap) diff --git a/src/ctl.c b/src/ctl.c index 395c32a1..a193605d 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -76,7 +76,6 @@ CTL_PROTO(thread_deallocatedp) CTL_PROTO(config_debug) CTL_PROTO(config_fill) CTL_PROTO(config_lazy_lock) -CTL_PROTO(config_mremap) CTL_PROTO(config_munmap) CTL_PROTO(config_prof) CTL_PROTO(config_prof_libgcc) @@ -114,7 +113,7 @@ CTL_PROTO(arena_i_purge) static void arena_purge(unsigned arena_ind); CTL_PROTO(arena_i_dss) CTL_PROTO(arena_i_chunk_alloc) -CTL_PROTO(arena_i_chunk_dealloc) +CTL_PROTO(arena_i_chunk_dalloc) INDEX_PROTO(arena_i) CTL_PROTO(arenas_bin_i_size) CTL_PROTO(arenas_bin_i_nregs) @@ -137,9 +136,6 @@ CTL_PROTO(prof_interval) CTL_PROTO(stats_chunks_current) CTL_PROTO(stats_chunks_total) CTL_PROTO(stats_chunks_high) -CTL_PROTO(stats_huge_allocated) -CTL_PROTO(stats_huge_nmalloc) -CTL_PROTO(stats_huge_ndalloc) CTL_PROTO(stats_arenas_i_small_allocated) CTL_PROTO(stats_arenas_i_small_nmalloc) CTL_PROTO(stats_arenas_i_small_ndalloc) @@ -148,6 +144,10 @@ CTL_PROTO(stats_arenas_i_large_allocated) CTL_PROTO(stats_arenas_i_large_nmalloc) CTL_PROTO(stats_arenas_i_large_ndalloc) CTL_PROTO(stats_arenas_i_large_nrequests) +CTL_PROTO(stats_arenas_i_huge_allocated) +CTL_PROTO(stats_arenas_i_huge_nmalloc) +CTL_PROTO(stats_arenas_i_huge_ndalloc) +CTL_PROTO(stats_arenas_i_huge_nrequests) CTL_PROTO(stats_arenas_i_bins_j_allocated) CTL_PROTO(stats_arenas_i_bins_j_nmalloc) CTL_PROTO(stats_arenas_i_bins_j_ndalloc) @@ -214,7 +214,6 @@ static const ctl_named_node_t config_node[] = { {NAME("debug"), CTL(config_debug)}, {NAME("fill"), CTL(config_fill)}, {NAME("lazy_lock"), CTL(config_lazy_lock)}, - {NAME("mremap"), CTL(config_mremap)}, {NAME("munmap"), CTL(config_munmap)}, {NAME("prof"), CTL(config_prof)}, {NAME("prof_libgcc"), CTL(config_prof_libgcc)}, @@ -255,7 +254,7 @@ static const ctl_named_node_t opt_node[] = { static const ctl_named_node_t chunk_node[] = { {NAME("alloc"), CTL(arena_i_chunk_alloc)}, - {NAME("dealloc"), CTL(arena_i_chunk_dealloc)} + {NAME("dalloc"), CTL(arena_i_chunk_dalloc)} }; static const ctl_named_node_t arena_i_node[] = { @@ -321,12 +320,6 @@ static const ctl_named_node_t stats_chunks_node[] = { {NAME("high"), CTL(stats_chunks_high)} }; -static const ctl_named_node_t stats_huge_node[] = { - {NAME("allocated"), CTL(stats_huge_allocated)}, - {NAME("nmalloc"), CTL(stats_huge_nmalloc)}, - {NAME("ndalloc"), CTL(stats_huge_ndalloc)} -}; - static const ctl_named_node_t stats_arenas_i_small_node[] = { {NAME("allocated"), CTL(stats_arenas_i_small_allocated)}, {NAME("nmalloc"), CTL(stats_arenas_i_small_nmalloc)}, @@ -341,6 +334,13 @@ static const ctl_named_node_t stats_arenas_i_large_node[] = { {NAME("nrequests"), CTL(stats_arenas_i_large_nrequests)} }; +static const ctl_named_node_t stats_arenas_i_huge_node[] = { + {NAME("allocated"), CTL(stats_arenas_i_huge_allocated)}, + {NAME("nmalloc"), CTL(stats_arenas_i_huge_nmalloc)}, + {NAME("ndalloc"), CTL(stats_arenas_i_huge_ndalloc)}, + {NAME("nrequests"), CTL(stats_arenas_i_huge_nrequests)}, +}; + static const ctl_named_node_t stats_arenas_i_bins_j_node[] = { {NAME("allocated"), CTL(stats_arenas_i_bins_j_allocated)}, {NAME("nmalloc"), CTL(stats_arenas_i_bins_j_nmalloc)}, @@ -385,6 +385,7 @@ static const ctl_named_node_t stats_arenas_i_node[] = { {NAME("purged"), CTL(stats_arenas_i_purged)}, {NAME("small"), CHILD(named, stats_arenas_i_small)}, {NAME("large"), CHILD(named, stats_arenas_i_large)}, + {NAME("huge"), CHILD(named, stats_arenas_i_huge)}, {NAME("bins"), CHILD(indexed, stats_arenas_i_bins)}, {NAME("lruns"), CHILD(indexed, stats_arenas_i_lruns)} }; @@ -402,7 +403,6 @@ static const ctl_named_node_t stats_node[] = { {NAME("active"), CTL(stats_active)}, {NAME("mapped"), CTL(stats_mapped)}, {NAME("chunks"), CHILD(named, stats_chunks)}, - {NAME("huge"), CHILD(named, stats_huge)}, {NAME("arenas"), CHILD(indexed, stats_arenas)} }; @@ -500,6 +500,11 @@ ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, ctl_arena_stats_t *astats) sstats->astats.ndalloc_large += astats->astats.ndalloc_large; sstats->astats.nrequests_large += astats->astats.nrequests_large; + sstats->astats.allocated_huge += astats->astats.allocated_huge; + sstats->astats.nmalloc_huge += astats->astats.nmalloc_huge; + sstats->astats.ndalloc_huge += astats->astats.ndalloc_huge; + sstats->astats.nrequests_huge += astats->astats.nrequests_huge; + for (i = 0; i < nlclasses; i++) { sstats->lstats[i].nmalloc += astats->lstats[i].nmalloc; sstats->lstats[i].ndalloc += astats->lstats[i].ndalloc; @@ -626,12 +631,6 @@ ctl_refresh(void) ctl_stats.chunks.total = stats_chunks.nchunks; ctl_stats.chunks.high = stats_chunks.highchunks; malloc_mutex_unlock(&chunks_mtx); - - malloc_mutex_lock(&huge_mtx); - ctl_stats.huge.allocated = huge_allocated; - ctl_stats.huge.nmalloc = huge_nmalloc; - ctl_stats.huge.ndalloc = huge_ndalloc; - malloc_mutex_unlock(&huge_mtx); } /* @@ -662,10 +661,9 @@ ctl_refresh(void) ctl_stats.allocated = ctl_stats.arenas[ctl_stats.narenas].allocated_small + ctl_stats.arenas[ctl_stats.narenas].astats.allocated_large - + ctl_stats.huge.allocated; + + ctl_stats.arenas[ctl_stats.narenas].astats.allocated_huge; ctl_stats.active = - (ctl_stats.arenas[ctl_stats.narenas].pactive << LG_PAGE) - + ctl_stats.huge.allocated; + (ctl_stats.arenas[ctl_stats.narenas].pactive << LG_PAGE); ctl_stats.mapped = (ctl_stats.chunks.current << opt_lg_chunk); } @@ -1140,7 +1138,6 @@ label_return: CTL_RO_BOOL_CONFIG_GEN(config_debug) CTL_RO_BOOL_CONFIG_GEN(config_fill) CTL_RO_BOOL_CONFIG_GEN(config_lazy_lock) -CTL_RO_BOOL_CONFIG_GEN(config_mremap) CTL_RO_BOOL_CONFIG_GEN(config_munmap) CTL_RO_BOOL_CONFIG_GEN(config_prof) CTL_RO_BOOL_CONFIG_GEN(config_prof_libgcc) @@ -1377,8 +1374,8 @@ label_return: } static int -arena_i_chunk_alloc_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, - void *newp, size_t newlen) +arena_i_chunk_alloc_ctl(const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) { int ret; unsigned arena_ind = mib[1]; @@ -1402,8 +1399,8 @@ label_outer_return: } static int -arena_i_chunk_dealloc_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, - void *newp, size_t newlen) +arena_i_chunk_dalloc_ctl(const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) { int ret; @@ -1413,8 +1410,8 @@ arena_i_chunk_dealloc_ctl(const size_t *mib, size_t miblen, void *oldp, size_t * malloc_mutex_lock(&ctl_mtx); if (arena_ind < narenas_total && (arena = arenas[arena_ind]) != NULL) { malloc_mutex_lock(&arena->lock); - READ(arena->chunk_dealloc, chunk_dealloc_t *); - WRITE(arena->chunk_dealloc, chunk_dealloc_t *); + READ(arena->chunk_dalloc, chunk_dalloc_t *); + WRITE(arena->chunk_dalloc, chunk_dalloc_t *); } else { ret = EFAULT; goto label_outer_return; @@ -1611,9 +1608,6 @@ CTL_RO_CGEN(config_stats, stats_chunks_current, ctl_stats.chunks.current, size_t) CTL_RO_CGEN(config_stats, stats_chunks_total, ctl_stats.chunks.total, uint64_t) CTL_RO_CGEN(config_stats, stats_chunks_high, ctl_stats.chunks.high, size_t) -CTL_RO_CGEN(config_stats, stats_huge_allocated, huge_allocated, size_t) -CTL_RO_CGEN(config_stats, stats_huge_nmalloc, huge_nmalloc, uint64_t) -CTL_RO_CGEN(config_stats, stats_huge_ndalloc, huge_ndalloc, uint64_t) CTL_RO_GEN(stats_arenas_i_dss, ctl_stats.arenas[mib[2]].dss, const char *) CTL_RO_GEN(stats_arenas_i_nthreads, ctl_stats.arenas[mib[2]].nthreads, unsigned) @@ -1644,6 +1638,14 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_large_ndalloc, ctl_stats.arenas[mib[2]].astats.ndalloc_large, uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_large_nrequests, ctl_stats.arenas[mib[2]].astats.nrequests_large, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_huge_allocated, + ctl_stats.arenas[mib[2]].astats.allocated_huge, size_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_huge_nmalloc, + ctl_stats.arenas[mib[2]].astats.nmalloc_huge, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_huge_ndalloc, + ctl_stats.arenas[mib[2]].astats.ndalloc_huge, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_huge_nrequests, + ctl_stats.arenas[mib[2]].astats.nrequests_huge, uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_allocated, ctl_stats.arenas[mib[2]].bstats[mib[4]].allocated, size_t) diff --git a/src/huge.c b/src/huge.c index ab05c905..d08ed4a9 100644 --- a/src/huge.c +++ b/src/huge.c @@ -4,11 +4,8 @@ /******************************************************************************/ /* Data. */ -uint64_t huge_nmalloc; -uint64_t huge_ndalloc; -size_t huge_allocated; - -malloc_mutex_t huge_mtx; +/* Protects chunk-related data structures. */ +static malloc_mutex_t huge_mtx; /******************************************************************************/ @@ -16,15 +13,14 @@ malloc_mutex_t huge_mtx; static extent_tree_t huge; void * -huge_malloc(arena_t *arena, size_t size, bool zero, dss_prec_t dss_prec) +huge_malloc(arena_t *arena, size_t size, bool zero) { - return (huge_palloc(arena, size, chunksize, zero, dss_prec)); + return (huge_palloc(arena, size, chunksize, zero)); } void * -huge_palloc(arena_t *arena, size_t size, size_t alignment, bool zero, - dss_prec_t dss_prec) +huge_palloc(arena_t *arena, size_t size, size_t alignment, bool zero) { void *ret; size_t csize; @@ -49,9 +45,10 @@ huge_palloc(arena_t *arena, size_t size, size_t alignment, bool zero, * it is possible to make correct junk/zero fill decisions below. */ is_zeroed = zero; - ret = chunk_alloc(arena, csize, alignment, false, &is_zeroed, dss_prec); + arena = choose_arena(arena); + ret = arena_chunk_alloc_huge(arena, csize, alignment, &is_zeroed); if (ret == NULL) { - base_node_dealloc(node); + base_node_dalloc(node); return (NULL); } @@ -62,11 +59,6 @@ huge_palloc(arena_t *arena, size_t size, size_t alignment, bool zero, malloc_mutex_lock(&huge_mtx); extent_tree_ad_insert(&huge, node); - if (config_stats) { - stats_cactive_add(csize); - huge_nmalloc++; - huge_allocated += csize; - } malloc_mutex_unlock(&huge_mtx); if (config_fill && zero == false) { @@ -99,8 +91,7 @@ huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra) void * huge_ralloc(arena_t *arena, void *ptr, size_t oldsize, size_t size, - size_t extra, size_t alignment, bool zero, bool try_tcache_dalloc, - dss_prec_t dss_prec) + size_t extra, size_t alignment, bool zero, bool try_tcache_dalloc) { void *ret; size_t copysize; @@ -115,18 +106,18 @@ huge_ralloc(arena_t *arena, void *ptr, size_t oldsize, size_t size, * space and copying. */ if (alignment > chunksize) - ret = huge_palloc(arena, size + extra, alignment, zero, dss_prec); + ret = huge_palloc(arena, size + extra, alignment, zero); else - ret = huge_malloc(arena, size + extra, zero, dss_prec); + ret = huge_malloc(arena, size + extra, zero); if (ret == NULL) { if (extra == 0) return (NULL); /* Try again, this time without extra. */ if (alignment > chunksize) - ret = huge_palloc(arena, size, alignment, zero, dss_prec); + ret = huge_palloc(arena, size, alignment, zero); else - ret = huge_malloc(arena, size, zero, dss_prec); + ret = huge_malloc(arena, size, zero); if (ret == NULL) return (NULL); @@ -137,59 +128,8 @@ huge_ralloc(arena_t *arena, void *ptr, size_t oldsize, size_t size, * expectation that the extra bytes will be reliably preserved. */ copysize = (size < oldsize) ? size : oldsize; - -#ifdef JEMALLOC_MREMAP - /* - * Use mremap(2) if this is a huge-->huge reallocation, and neither the - * source nor the destination are in dss. - */ - if (oldsize >= chunksize && (have_dss == false || (chunk_in_dss(ptr) - == false && chunk_in_dss(ret) == false))) { - size_t newsize = huge_salloc(ret); - - /* - * Remove ptr from the tree of huge allocations before - * performing the remap operation, in order to avoid the - * possibility of another thread acquiring that mapping before - * this one removes it from the tree. - */ - huge_dalloc(ptr, false); - if (mremap(ptr, oldsize, newsize, MREMAP_MAYMOVE|MREMAP_FIXED, - ret) == MAP_FAILED) { - /* - * Assuming no chunk management bugs in the allocator, - * the only documented way an error can occur here is - * if the application changed the map type for a - * portion of the old allocation. This is firmly in - * undefined behavior territory, so write a diagnostic - * message, and optionally abort. - */ - char buf[BUFERROR_BUF]; - - buferror(get_errno(), buf, sizeof(buf)); - malloc_printf(": Error in mremap(): %s\n", - buf); - if (opt_abort) - abort(); - memcpy(ret, ptr, copysize); - chunk_dealloc_mmap(ptr, oldsize); - } else if (config_fill && zero == false && opt_junk && oldsize - < newsize) { - /* - * mremap(2) clobbers the original mapping, so - * junk/zero filling is not preserved. There is no - * need to zero fill here, since any trailing - * uninititialized memory is demand-zeroed by the - * kernel, but junk filling must be redone. - */ - memset(ret + oldsize, 0xa5, newsize - oldsize); - } - } else -#endif - { - memcpy(ret, ptr, copysize); - iqalloct(ptr, try_tcache_dalloc); - } + memcpy(ret, ptr, copysize); + iqalloct(ptr, try_tcache_dalloc); return (ret); } @@ -217,7 +157,7 @@ huge_dalloc_junk_t *huge_dalloc_junk = JEMALLOC_N(huge_dalloc_junk_impl); #endif void -huge_dalloc(void *ptr, bool unmap) +huge_dalloc(void *ptr) { extent_node_t *node, key; @@ -230,20 +170,11 @@ huge_dalloc(void *ptr, bool unmap) assert(node->addr == ptr); extent_tree_ad_remove(&huge, node); - if (config_stats) { - stats_cactive_sub(node->size); - huge_ndalloc++; - huge_allocated -= node->size; - } - malloc_mutex_unlock(&huge_mtx); - if (unmap) - huge_dalloc_junk(node->addr, node->size); - - chunk_dealloc(node->arena, node->addr, node->size, unmap); - - base_node_dealloc(node); + huge_dalloc_junk(node->addr, node->size); + arena_chunk_dalloc_huge(node->arena, node->addr, node->size); + base_node_dalloc(node); } size_t @@ -266,13 +197,6 @@ huge_salloc(const void *ptr) return (ret); } -dss_prec_t -huge_dss_prec_get(arena_t *arena) -{ - - return (arena_dss_prec_get(choose_arena(arena))); -} - prof_ctx_t * huge_prof_ctx_get(const void *ptr) { @@ -319,12 +243,6 @@ huge_boot(void) return (true); extent_tree_ad_new(&huge); - if (config_stats) { - huge_nmalloc = 0; - huge_ndalloc = 0; - huge_allocated = 0; - } - return (false); } diff --git a/src/jemalloc.c b/src/jemalloc.c index e0f9275f..43a494e4 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1983,7 +1983,7 @@ a0alloc(size_t size, bool zero) if (size <= arena_maxclass) return (arena_malloc(arenas[0], size, zero, false)); else - return (huge_malloc(NULL, size, zero, huge_dss_prec_get(arenas[0]))); + return (huge_malloc(NULL, size, zero)); } void * @@ -2012,7 +2012,7 @@ a0free(void *ptr) if (chunk != ptr) arena_dalloc(chunk, ptr, false); else - huge_dalloc(ptr, true); + huge_dalloc(ptr); } /******************************************************************************/ diff --git a/src/stats.c b/src/stats.c index bef2ab33..a0eb2971 100644 --- a/src/stats.c +++ b/src/stats.c @@ -213,6 +213,8 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, uint64_t small_nmalloc, small_ndalloc, small_nrequests; size_t large_allocated; uint64_t large_nmalloc, large_ndalloc, large_nrequests; + size_t huge_allocated; + uint64_t huge_nmalloc, huge_ndalloc, huge_nrequests; CTL_GET("arenas.page", &page, size_t); @@ -249,12 +251,19 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, malloc_cprintf(write_cb, cbopaque, "large: %12zu %12"PRIu64" %12"PRIu64" %12"PRIu64"\n", large_allocated, large_nmalloc, large_ndalloc, large_nrequests); + CTL_I_GET("stats.arenas.0.huge.allocated", &huge_allocated, size_t); + CTL_I_GET("stats.arenas.0.huge.nmalloc", &huge_nmalloc, uint64_t); + CTL_I_GET("stats.arenas.0.huge.ndalloc", &huge_ndalloc, uint64_t); + CTL_I_GET("stats.arenas.0.huge.nrequests", &huge_nrequests, uint64_t); + malloc_cprintf(write_cb, cbopaque, + "huge: %12zu %12"PRIu64" %12"PRIu64" %12"PRIu64"\n", + huge_allocated, huge_nmalloc, huge_ndalloc, huge_nrequests); malloc_cprintf(write_cb, cbopaque, "total: %12zu %12"PRIu64" %12"PRIu64" %12"PRIu64"\n", - small_allocated + large_allocated, - small_nmalloc + large_nmalloc, - small_ndalloc + large_ndalloc, - small_nrequests + large_nrequests); + small_allocated + large_allocated + huge_allocated, + small_nmalloc + large_nmalloc + huge_nmalloc, + small_ndalloc + large_ndalloc + huge_ndalloc, + small_nrequests + large_nrequests + huge_nrequests); malloc_cprintf(write_cb, cbopaque, "active: %12zu\n", pactive * page); CTL_I_GET("stats.arenas.0.mapped", &mapped, size_t); malloc_cprintf(write_cb, cbopaque, "mapped: %12zu\n", mapped); @@ -458,8 +467,6 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, size_t allocated, active, mapped; size_t chunks_current, chunks_high; uint64_t chunks_total; - size_t huge_allocated; - uint64_t huge_nmalloc, huge_ndalloc; CTL_GET("stats.cactive", &cactive, size_t *); CTL_GET("stats.allocated", &allocated, size_t); @@ -481,16 +488,6 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, " %13"PRIu64" %12zu %12zu\n", chunks_total, chunks_high, chunks_current); - /* Print huge stats. */ - CTL_GET("stats.huge.nmalloc", &huge_nmalloc, uint64_t); - CTL_GET("stats.huge.ndalloc", &huge_ndalloc, uint64_t); - CTL_GET("stats.huge.allocated", &huge_allocated, size_t); - malloc_cprintf(write_cb, cbopaque, - "huge: nmalloc ndalloc allocated\n"); - malloc_cprintf(write_cb, cbopaque, - " %12"PRIu64" %12"PRIu64" %12zu\n", - huge_nmalloc, huge_ndalloc, huge_allocated); - if (merged) { unsigned narenas; diff --git a/test/integration/chunk.c b/test/integration/chunk.c index 13659894..28537098 100644 --- a/test/integration/chunk.c +++ b/test/integration/chunk.c @@ -1,13 +1,13 @@ #include "test/jemalloc_test.h" chunk_alloc_t *old_alloc; -chunk_dealloc_t *old_dealloc; +chunk_dalloc_t *old_dalloc; bool -chunk_dealloc(void *chunk, size_t size, unsigned arena_ind) +chunk_dalloc(void *chunk, size_t size, unsigned arena_ind) { - return (old_dealloc(chunk, size, arena_ind)); + return (old_dalloc(chunk, size, arena_ind)); } void * @@ -21,11 +21,11 @@ TEST_BEGIN(test_chunk) { void *p; chunk_alloc_t *new_alloc; - chunk_dealloc_t *new_dealloc; + chunk_dalloc_t *new_dalloc; size_t old_size, new_size; new_alloc = chunk_alloc; - new_dealloc = chunk_dealloc; + new_dalloc = chunk_dalloc; old_size = sizeof(chunk_alloc_t *); new_size = sizeof(chunk_alloc_t *); @@ -34,11 +34,9 @@ TEST_BEGIN(test_chunk) "Unexpected alloc error"); assert_ptr_ne(old_alloc, new_alloc, "Unexpected alloc error"); - assert_d_eq(mallctl("arena.0.chunk.dealloc", &old_dealloc, - &old_size, &new_dealloc, new_size), 0, - "Unexpected dealloc error"); - assert_ptr_ne(old_dealloc, new_dealloc, - "Unexpected dealloc error"); + assert_d_eq(mallctl("arena.0.chunk.dalloc", &old_dalloc, &old_size, + &new_dalloc, new_size), 0, "Unexpected dalloc error"); + assert_ptr_ne(old_dalloc, new_dalloc, "Unexpected dalloc error"); p = mallocx(42, 0); assert_ptr_ne(p, NULL, "Unexpected alloc error"); @@ -47,9 +45,8 @@ TEST_BEGIN(test_chunk) assert_d_eq(mallctl("arena.0.chunk.alloc", NULL, NULL, &old_alloc, old_size), 0, "Unexpected alloc error"); - assert_d_eq(mallctl("arena.0.chunk.dealloc", NULL, - NULL, &old_dealloc, old_size), 0, - "Unexpected dealloc error"); + assert_d_eq(mallctl("arena.0.chunk.dalloc", NULL, NULL, &old_dalloc, + old_size), 0, "Unexpected dalloc error"); } TEST_END diff --git a/test/integration/mremap.c b/test/integration/mremap.c deleted file mode 100644 index a7fb7ef0..00000000 --- a/test/integration/mremap.c +++ /dev/null @@ -1,45 +0,0 @@ -#include "test/jemalloc_test.h" - -TEST_BEGIN(test_mremap) -{ - int err; - size_t sz, lg_chunk, chunksize, i; - char *p, *q; - - sz = sizeof(lg_chunk); - err = mallctl("opt.lg_chunk", &lg_chunk, &sz, NULL, 0); - assert_d_eq(err, 0, "Error in mallctl(): %s", strerror(err)); - chunksize = ((size_t)1U) << lg_chunk; - - p = (char *)malloc(chunksize); - assert_ptr_not_null(p, "malloc(%zu) --> %p", chunksize, p); - memset(p, 'a', chunksize); - - q = (char *)realloc(p, chunksize * 2); - assert_ptr_not_null(q, "realloc(%p, %zu) --> %p", p, chunksize * 2, - q); - for (i = 0; i < chunksize; i++) { - assert_c_eq(q[i], 'a', - "realloc() should preserve existing bytes across copies"); - } - - p = q; - - q = (char *)realloc(p, chunksize); - assert_ptr_not_null(q, "realloc(%p, %zu) --> %p", p, chunksize, q); - for (i = 0; i < chunksize; i++) { - assert_c_eq(q[i], 'a', - "realloc() should preserve existing bytes across copies"); - } - - free(q); -} -TEST_END - -int -main(void) -{ - - return (test( - test_mremap)); -} diff --git a/test/unit/junk.c b/test/unit/junk.c index 85bbf9e2..301428f2 100644 --- a/test/unit/junk.c +++ b/test/unit/junk.c @@ -92,12 +92,9 @@ test_junk(size_t sz_min, size_t sz_max) s = (char *)rallocx(s, sz+1, 0); assert_ptr_not_null((void *)s, "Unexpected rallocx() failure"); - if (!config_mremap || sz+1 <= arena_maxclass) { - assert_ptr_eq(most_recently_junked, junked, - "Expected region of size %zu to be " - "junk-filled", - sz); - } + assert_ptr_eq(most_recently_junked, junked, + "Expected region of size %zu to be junk-filled", + sz); } } diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c index 754834c1..cb120497 100644 --- a/test/unit/mallctl.c +++ b/test/unit/mallctl.c @@ -129,7 +129,6 @@ TEST_BEGIN(test_mallctl_config) TEST_MALLCTL_CONFIG(debug); TEST_MALLCTL_CONFIG(fill); TEST_MALLCTL_CONFIG(lazy_lock); - TEST_MALLCTL_CONFIG(mremap); TEST_MALLCTL_CONFIG(munmap); TEST_MALLCTL_CONFIG(prof); TEST_MALLCTL_CONFIG(prof_libgcc); diff --git a/test/unit/stats.c b/test/unit/stats.c index 03a55c7f..ab87b29b 100644 --- a/test/unit/stats.c +++ b/test/unit/stats.c @@ -60,7 +60,7 @@ TEST_BEGIN(test_stats_huge) void *p; uint64_t epoch; size_t allocated; - uint64_t nmalloc, ndalloc; + uint64_t nmalloc, ndalloc, nrequests; size_t sz; int expected = config_stats ? 0 : ENOENT; @@ -71,19 +71,23 @@ TEST_BEGIN(test_stats_huge) "Unexpected mallctl() failure"); sz = sizeof(size_t); - assert_d_eq(mallctl("stats.huge.allocated", &allocated, &sz, NULL, 0), - expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.huge.allocated", &allocated, &sz, + NULL, 0), expected, "Unexpected mallctl() result"); sz = sizeof(uint64_t); - assert_d_eq(mallctl("stats.huge.nmalloc", &nmalloc, &sz, NULL, 0), - expected, "Unexpected mallctl() result"); - assert_d_eq(mallctl("stats.huge.ndalloc", &ndalloc, &sz, NULL, 0), - expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.huge.nmalloc", &nmalloc, &sz, NULL, + 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.huge.ndalloc", &ndalloc, &sz, NULL, + 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.huge.nrequests", &nrequests, &sz, + NULL, 0), expected, "Unexpected mallctl() result"); if (config_stats) { assert_zu_gt(allocated, 0, "allocated should be greater than zero"); assert_u64_ge(nmalloc, ndalloc, "nmalloc should be at least as large as ndalloc"); + assert_u64_le(nmalloc, nrequests, + "nmalloc should no larger than nrequests"); } dallocx(p, 0); From b4d62cd61b46130b7947c3a427a2b007e7fa0eb8 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 15 May 2014 22:46:24 -0700 Subject: [PATCH 0441/3378] Minor doc edit. --- doc/jemalloc.xml.in | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 46e505fc..308d0c65 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -189,15 +189,15 @@ The posix_memalign function allocates size bytes of memory such that the - allocation's base address is an even multiple of + allocation's base address is a multiple of alignment, and returns the allocation in the value pointed to by ptr. The requested - alignment must be a power of 2 at least as large - as sizeof(void *). + alignment must be a power of 2 at least as large as + sizeof(void *). The aligned_alloc function allocates size bytes of memory such that the - allocation's base address is an even multiple of + allocation's base address is a multiple of alignment. The requested alignment must be a power of 2. Behavior is undefined if size is not an integral multiple of From ed0b0ec935a6df9ef429e56a08c0c9b63c3ba358 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Wed, 21 May 2014 16:38:24 +0900 Subject: [PATCH 0442/3378] Fix manual dependency on jemalloc_test.h --- Makefile.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.in b/Makefile.in index 90869eb8..4cb1a65a 100644 --- a/Makefile.in +++ b/Makefile.in @@ -228,7 +228,7 @@ HEADER_DIRS = $(srcroot)include/jemalloc/internal \ $(objroot)include/jemalloc $(objroot)include/jemalloc/internal HEADERS = $(wildcard $(foreach dir,$(HEADER_DIRS),$(dir)/*.h)) $(C_OBJS) $(C_PIC_OBJS) $(C_JET_OBJS) $(C_TESTLIB_OBJS) $(TESTS_OBJS): $(HEADERS) -$(TESTS_OBJS): $(objroot)test/unit/jemalloc_test.h +$(TESTS_OBJS): $(objroot)test/include/test/jemalloc_test.h endif $(C_OBJS) $(C_PIC_OBJS) $(C_JET_OBJS) $(C_TESTLIB_OBJS) $(TESTS_OBJS): %.$(O): From 47d58a01ff9d894f854412f3f6d3ba97a7aa2929 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Wed, 21 May 2014 16:59:50 +0900 Subject: [PATCH 0443/3378] Define _CRT_SPINCOUNT in test/src/mtx.c like in src/mutex.c --- test/src/mtx.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test/src/mtx.c b/test/src/mtx.c index 41b95d59..73bd02f6 100644 --- a/test/src/mtx.c +++ b/test/src/mtx.c @@ -1,5 +1,9 @@ #include "test/jemalloc_test.h" +#ifndef _CRT_SPINCOUNT +#define _CRT_SPINCOUNT 4000 +#endif + bool mtx_init(mtx_t *mtx) { From d6fd11413e1fe33a9bc947d794e880d7d10f7786 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Wed, 21 May 2014 17:04:24 +0900 Subject: [PATCH 0444/3378] Define DLLEXPORT when building .jet objects --- Makefile.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.in b/Makefile.in index 4cb1a65a..65d73db6 100644 --- a/Makefile.in +++ b/Makefile.in @@ -219,7 +219,7 @@ $(TESTS_STRESS_OBJS): CPPFLAGS += -DJEMALLOC_STRESS_TEST $(TESTS_OBJS): $(objroot)test/%.$(O): $(srcroot)test/%.c $(TESTS_OBJS): CPPFLAGS += -I$(srcroot)test/include -I$(objroot)test/include ifneq ($(IMPORTLIB),$(SO)) -$(C_OBJS): CPPFLAGS += -DDLLEXPORT +$(C_OBJS) $(C_JET_OBJS): CPPFLAGS += -DDLLEXPORT endif ifndef CC_MM From f41f14366877538b03109ecf346dbff2e21bbb16 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Wed, 21 May 2014 17:24:08 +0900 Subject: [PATCH 0445/3378] Replace variable arrays in tests with VARIABLE_ARRAY --- test/unit/hash.c | 4 ++-- test/unit/mallctl.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/test/unit/hash.c b/test/unit/hash.c index abb394ac..77a8cede 100644 --- a/test/unit/hash.c +++ b/test/unit/hash.c @@ -64,8 +64,8 @@ hash_variant_verify(hash_variant_t variant) { const size_t hashbytes = hash_variant_bits(variant) / 8; uint8_t key[256]; - uint8_t hashes[hashbytes * 256]; - uint8_t final[hashbytes]; + VARIABLE_ARRAY(uint8_t, hashes, hashbytes * 256); + VARIABLE_ARRAY(uint8_t, final, hashbytes); unsigned i; uint32_t computed, expected; diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c index cb120497..7a8b55f5 100644 --- a/test/unit/mallctl.c +++ b/test/unit/mallctl.c @@ -285,7 +285,7 @@ TEST_BEGIN(test_arenas_initialized) assert_d_eq(mallctl("arenas.narenas", &narenas, &sz, NULL, 0), 0, "Unexpected mallctl() failure"); { - bool initialized[narenas]; + VARIABLE_ARRAY(bool, initialized, narenas); sz = narenas * sizeof(bool); assert_d_eq(mallctl("arenas.initialized", initialized, &sz, From 1ad4a6e9f9ba55c874d0ad63041e09b96b459b1f Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Wed, 21 May 2014 17:44:42 +0900 Subject: [PATCH 0446/3378] Add missing $(EXE) to filter TESTS_UNIT_AUX_OBJS --- Makefile.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.in b/Makefile.in index 65d73db6..839bb08f 100644 --- a/Makefile.in +++ b/Makefile.in @@ -213,7 +213,7 @@ define make-unit-link-dep $(1): TESTS_UNIT_LINK_OBJS += $(2) $(1): $(2) endef -$(foreach test, $(TESTS_UNIT:$(srcroot)test/unit/%.c=$(objroot)test/unit/%$(EXE)), $(eval $(call make-unit-link-dep,$(test),$(filter $(test:%=%_a.$(O)) $(test:%=%_b.$(O)),$(TESTS_UNIT_AUX_OBJS))))) +$(foreach test, $(TESTS_UNIT:$(srcroot)test/unit/%.c=$(objroot)test/unit/%$(EXE)), $(eval $(call make-unit-link-dep,$(test),$(filter $(test:%$(EXE)=%_a.$(O)) $(test:%$(EXE)=%_b.$(O)),$(TESTS_UNIT_AUX_OBJS))))) $(TESTS_INTEGRATION_OBJS): CPPFLAGS += -DJEMALLOC_INTEGRATION_TEST $(TESTS_STRESS_OBJS): CPPFLAGS += -DJEMALLOC_STRESS_TEST $(TESTS_OBJS): $(objroot)test/%.$(O): $(srcroot)test/%.c From 7330c3770af0e5328d749635217387efbbe0ae3c Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Wed, 21 May 2014 18:00:15 +0900 Subject: [PATCH 0447/3378] Use C99 varadic macros instead of GCC ones --- test/include/test/test.h | 384 +++++++++++++++++++-------------------- test/unit/util.c | 8 +- 2 files changed, 196 insertions(+), 196 deletions(-) diff --git a/test/include/test/test.h b/test/include/test/test.h index 161fafdf..f55bafce 100644 --- a/test/include/test/test.h +++ b/test/include/test/test.h @@ -1,6 +1,6 @@ #define ASSERT_BUFSIZE 256 -#define assert_cmp(t, a, b, cmp, neg_cmp, pri, fmt...) do { \ +#define assert_cmp(t, a, b, cmp, neg_cmp, pri, ...) do { \ t a_ = (a); \ t b_ = (b); \ if (!(a_ cmp b_)) { \ @@ -12,205 +12,205 @@ "%"pri" "#neg_cmp" %"pri": ", \ __func__, __FILE__, __LINE__, \ #a, #b, a_, b_); \ - malloc_snprintf(message, sizeof(message), fmt); \ + malloc_snprintf(message, sizeof(message), __VA_ARGS__); \ p_test_fail(prefix, message); \ } \ } while (0) -#define assert_ptr_eq(a, b, fmt...) assert_cmp(void *, a, b, ==, \ - !=, "p", fmt) -#define assert_ptr_ne(a, b, fmt...) assert_cmp(void *, a, b, !=, \ - ==, "p", fmt) -#define assert_ptr_null(a, fmt...) assert_cmp(void *, a, NULL, ==, \ - !=, "p", fmt) -#define assert_ptr_not_null(a, fmt...) assert_cmp(void *, a, NULL, !=, \ - ==, "p", fmt) +#define assert_ptr_eq(a, b, ...) assert_cmp(void *, a, b, ==, \ + !=, "p", __VA_ARGS__) +#define assert_ptr_ne(a, b, ...) assert_cmp(void *, a, b, !=, \ + ==, "p", __VA_ARGS__) +#define assert_ptr_null(a, ...) assert_cmp(void *, a, NULL, ==, \ + !=, "p", __VA_ARGS__) +#define assert_ptr_not_null(a, ...) assert_cmp(void *, a, NULL, !=, \ + ==, "p", __VA_ARGS__) -#define assert_c_eq(a, b, fmt...) assert_cmp(char, a, b, ==, !=, "c", fmt) -#define assert_c_ne(a, b, fmt...) assert_cmp(char, a, b, !=, ==, "c", fmt) -#define assert_c_lt(a, b, fmt...) assert_cmp(char, a, b, <, >=, "c", fmt) -#define assert_c_le(a, b, fmt...) assert_cmp(char, a, b, <=, >, "c", fmt) -#define assert_c_ge(a, b, fmt...) assert_cmp(char, a, b, >=, <, "c", fmt) -#define assert_c_gt(a, b, fmt...) assert_cmp(char, a, b, >, <=, "c", fmt) +#define assert_c_eq(a, b, ...) assert_cmp(char, a, b, ==, !=, "c", __VA_ARGS__) +#define assert_c_ne(a, b, ...) assert_cmp(char, a, b, !=, ==, "c", __VA_ARGS__) +#define assert_c_lt(a, b, ...) assert_cmp(char, a, b, <, >=, "c", __VA_ARGS__) +#define assert_c_le(a, b, ...) assert_cmp(char, a, b, <=, >, "c", __VA_ARGS__) +#define assert_c_ge(a, b, ...) assert_cmp(char, a, b, >=, <, "c", __VA_ARGS__) +#define assert_c_gt(a, b, ...) assert_cmp(char, a, b, >, <=, "c", __VA_ARGS__) -#define assert_x_eq(a, b, fmt...) assert_cmp(int, a, b, ==, !=, "#x", fmt) -#define assert_x_ne(a, b, fmt...) assert_cmp(int, a, b, !=, ==, "#x", fmt) -#define assert_x_lt(a, b, fmt...) assert_cmp(int, a, b, <, >=, "#x", fmt) -#define assert_x_le(a, b, fmt...) assert_cmp(int, a, b, <=, >, "#x", fmt) -#define assert_x_ge(a, b, fmt...) assert_cmp(int, a, b, >=, <, "#x", fmt) -#define assert_x_gt(a, b, fmt...) assert_cmp(int, a, b, >, <=, "#x", fmt) +#define assert_x_eq(a, b, ...) assert_cmp(int, a, b, ==, !=, "#x", __VA_ARGS__) +#define assert_x_ne(a, b, ...) assert_cmp(int, a, b, !=, ==, "#x", __VA_ARGS__) +#define assert_x_lt(a, b, ...) assert_cmp(int, a, b, <, >=, "#x", __VA_ARGS__) +#define assert_x_le(a, b, ...) assert_cmp(int, a, b, <=, >, "#x", __VA_ARGS__) +#define assert_x_ge(a, b, ...) assert_cmp(int, a, b, >=, <, "#x", __VA_ARGS__) +#define assert_x_gt(a, b, ...) assert_cmp(int, a, b, >, <=, "#x", __VA_ARGS__) -#define assert_d_eq(a, b, fmt...) assert_cmp(int, a, b, ==, !=, "d", fmt) -#define assert_d_ne(a, b, fmt...) assert_cmp(int, a, b, !=, ==, "d", fmt) -#define assert_d_lt(a, b, fmt...) assert_cmp(int, a, b, <, >=, "d", fmt) -#define assert_d_le(a, b, fmt...) assert_cmp(int, a, b, <=, >, "d", fmt) -#define assert_d_ge(a, b, fmt...) assert_cmp(int, a, b, >=, <, "d", fmt) -#define assert_d_gt(a, b, fmt...) assert_cmp(int, a, b, >, <=, "d", fmt) +#define assert_d_eq(a, b, ...) assert_cmp(int, a, b, ==, !=, "d", __VA_ARGS__) +#define assert_d_ne(a, b, ...) assert_cmp(int, a, b, !=, ==, "d", __VA_ARGS__) +#define assert_d_lt(a, b, ...) assert_cmp(int, a, b, <, >=, "d", __VA_ARGS__) +#define assert_d_le(a, b, ...) assert_cmp(int, a, b, <=, >, "d", __VA_ARGS__) +#define assert_d_ge(a, b, ...) assert_cmp(int, a, b, >=, <, "d", __VA_ARGS__) +#define assert_d_gt(a, b, ...) assert_cmp(int, a, b, >, <=, "d", __VA_ARGS__) -#define assert_u_eq(a, b, fmt...) assert_cmp(int, a, b, ==, !=, "u", fmt) -#define assert_u_ne(a, b, fmt...) assert_cmp(int, a, b, !=, ==, "u", fmt) -#define assert_u_lt(a, b, fmt...) assert_cmp(int, a, b, <, >=, "u", fmt) -#define assert_u_le(a, b, fmt...) assert_cmp(int, a, b, <=, >, "u", fmt) -#define assert_u_ge(a, b, fmt...) assert_cmp(int, a, b, >=, <, "u", fmt) -#define assert_u_gt(a, b, fmt...) assert_cmp(int, a, b, >, <=, "u", fmt) +#define assert_u_eq(a, b, ...) assert_cmp(int, a, b, ==, !=, "u", __VA_ARGS__) +#define assert_u_ne(a, b, ...) assert_cmp(int, a, b, !=, ==, "u", __VA_ARGS__) +#define assert_u_lt(a, b, ...) assert_cmp(int, a, b, <, >=, "u", __VA_ARGS__) +#define assert_u_le(a, b, ...) assert_cmp(int, a, b, <=, >, "u", __VA_ARGS__) +#define assert_u_ge(a, b, ...) assert_cmp(int, a, b, >=, <, "u", __VA_ARGS__) +#define assert_u_gt(a, b, ...) assert_cmp(int, a, b, >, <=, "u", __VA_ARGS__) -#define assert_ld_eq(a, b, fmt...) assert_cmp(long, a, b, ==, \ - !=, "ld", fmt) -#define assert_ld_ne(a, b, fmt...) assert_cmp(long, a, b, !=, \ - ==, "ld", fmt) -#define assert_ld_lt(a, b, fmt...) assert_cmp(long, a, b, <, \ - >=, "ld", fmt) -#define assert_ld_le(a, b, fmt...) assert_cmp(long, a, b, <=, \ - >, "ld", fmt) -#define assert_ld_ge(a, b, fmt...) assert_cmp(long, a, b, >=, \ - <, "ld", fmt) -#define assert_ld_gt(a, b, fmt...) assert_cmp(long, a, b, >, \ - <=, "ld", fmt) +#define assert_ld_eq(a, b, ...) assert_cmp(long, a, b, ==, \ + !=, "ld", __VA_ARGS__) +#define assert_ld_ne(a, b, ...) assert_cmp(long, a, b, !=, \ + ==, "ld", __VA_ARGS__) +#define assert_ld_lt(a, b, ...) assert_cmp(long, a, b, <, \ + >=, "ld", __VA_ARGS__) +#define assert_ld_le(a, b, ...) assert_cmp(long, a, b, <=, \ + >, "ld", __VA_ARGS__) +#define assert_ld_ge(a, b, ...) assert_cmp(long, a, b, >=, \ + <, "ld", __VA_ARGS__) +#define assert_ld_gt(a, b, ...) assert_cmp(long, a, b, >, \ + <=, "ld", __VA_ARGS__) -#define assert_lu_eq(a, b, fmt...) assert_cmp(unsigned long, \ - a, b, ==, !=, "lu", fmt) -#define assert_lu_ne(a, b, fmt...) assert_cmp(unsigned long, \ - a, b, !=, ==, "lu", fmt) -#define assert_lu_lt(a, b, fmt...) assert_cmp(unsigned long, \ - a, b, <, >=, "lu", fmt) -#define assert_lu_le(a, b, fmt...) assert_cmp(unsigned long, \ - a, b, <=, >, "lu", fmt) -#define assert_lu_ge(a, b, fmt...) assert_cmp(unsigned long, \ - a, b, >=, <, "lu", fmt) -#define assert_lu_gt(a, b, fmt...) assert_cmp(unsigned long, \ - a, b, >, <=, "lu", fmt) +#define assert_lu_eq(a, b, ...) assert_cmp(unsigned long, \ + a, b, ==, !=, "lu", __VA_ARGS__) +#define assert_lu_ne(a, b, ...) assert_cmp(unsigned long, \ + a, b, !=, ==, "lu", __VA_ARGS__) +#define assert_lu_lt(a, b, ...) assert_cmp(unsigned long, \ + a, b, <, >=, "lu", __VA_ARGS__) +#define assert_lu_le(a, b, ...) assert_cmp(unsigned long, \ + a, b, <=, >, "lu", __VA_ARGS__) +#define assert_lu_ge(a, b, ...) assert_cmp(unsigned long, \ + a, b, >=, <, "lu", __VA_ARGS__) +#define assert_lu_gt(a, b, ...) assert_cmp(unsigned long, \ + a, b, >, <=, "lu", __VA_ARGS__) -#define assert_qd_eq(a, b, fmt...) assert_cmp(long long, a, b, ==, \ - !=, "qd", fmt) -#define assert_qd_ne(a, b, fmt...) assert_cmp(long long, a, b, !=, \ - ==, "qd", fmt) -#define assert_qd_lt(a, b, fmt...) assert_cmp(long long, a, b, <, \ - >=, "qd", fmt) -#define assert_qd_le(a, b, fmt...) assert_cmp(long long, a, b, <=, \ - >, "qd", fmt) -#define assert_qd_ge(a, b, fmt...) assert_cmp(long long, a, b, >=, \ - <, "qd", fmt) -#define assert_qd_gt(a, b, fmt...) assert_cmp(long long, a, b, >, \ - <=, "qd", fmt) +#define assert_qd_eq(a, b, ...) assert_cmp(long long, a, b, ==, \ + !=, "qd", __VA_ARGS__) +#define assert_qd_ne(a, b, ...) assert_cmp(long long, a, b, !=, \ + ==, "qd", __VA_ARGS__) +#define assert_qd_lt(a, b, ...) assert_cmp(long long, a, b, <, \ + >=, "qd", __VA_ARGS__) +#define assert_qd_le(a, b, ...) assert_cmp(long long, a, b, <=, \ + >, "qd", __VA_ARGS__) +#define assert_qd_ge(a, b, ...) assert_cmp(long long, a, b, >=, \ + <, "qd", __VA_ARGS__) +#define assert_qd_gt(a, b, ...) assert_cmp(long long, a, b, >, \ + <=, "qd", __VA_ARGS__) -#define assert_qu_eq(a, b, fmt...) assert_cmp(unsigned long long, \ - a, b, ==, !=, "qu", fmt) -#define assert_qu_ne(a, b, fmt...) assert_cmp(unsigned long long, \ - a, b, !=, ==, "qu", fmt) -#define assert_qu_lt(a, b, fmt...) assert_cmp(unsigned long long, \ - a, b, <, >=, "qu", fmt) -#define assert_qu_le(a, b, fmt...) assert_cmp(unsigned long long, \ - a, b, <=, >, "qu", fmt) -#define assert_qu_ge(a, b, fmt...) assert_cmp(unsigned long long, \ - a, b, >=, <, "qu", fmt) -#define assert_qu_gt(a, b, fmt...) assert_cmp(unsigned long long, \ - a, b, >, <=, "qu", fmt) +#define assert_qu_eq(a, b, ...) assert_cmp(unsigned long long, \ + a, b, ==, !=, "qu", __VA_ARGS__) +#define assert_qu_ne(a, b, ...) assert_cmp(unsigned long long, \ + a, b, !=, ==, "qu", __VA_ARGS__) +#define assert_qu_lt(a, b, ...) assert_cmp(unsigned long long, \ + a, b, <, >=, "qu", __VA_ARGS__) +#define assert_qu_le(a, b, ...) assert_cmp(unsigned long long, \ + a, b, <=, >, "qu", __VA_ARGS__) +#define assert_qu_ge(a, b, ...) assert_cmp(unsigned long long, \ + a, b, >=, <, "qu", __VA_ARGS__) +#define assert_qu_gt(a, b, ...) assert_cmp(unsigned long long, \ + a, b, >, <=, "qu", __VA_ARGS__) -#define assert_jd_eq(a, b, fmt...) assert_cmp(intmax_t, a, b, ==, \ - !=, "jd", fmt) -#define assert_jd_ne(a, b, fmt...) assert_cmp(intmax_t, a, b, !=, \ - ==, "jd", fmt) -#define assert_jd_lt(a, b, fmt...) assert_cmp(intmax_t, a, b, <, \ - >=, "jd", fmt) -#define assert_jd_le(a, b, fmt...) assert_cmp(intmax_t, a, b, <=, \ - >, "jd", fmt) -#define assert_jd_ge(a, b, fmt...) assert_cmp(intmax_t, a, b, >=, \ - <, "jd", fmt) -#define assert_jd_gt(a, b, fmt...) assert_cmp(intmax_t, a, b, >, \ - <=, "jd", fmt) +#define assert_jd_eq(a, b, ...) assert_cmp(intmax_t, a, b, ==, \ + !=, "jd", __VA_ARGS__) +#define assert_jd_ne(a, b, ...) assert_cmp(intmax_t, a, b, !=, \ + ==, "jd", __VA_ARGS__) +#define assert_jd_lt(a, b, ...) assert_cmp(intmax_t, a, b, <, \ + >=, "jd", __VA_ARGS__) +#define assert_jd_le(a, b, ...) assert_cmp(intmax_t, a, b, <=, \ + >, "jd", __VA_ARGS__) +#define assert_jd_ge(a, b, ...) assert_cmp(intmax_t, a, b, >=, \ + <, "jd", __VA_ARGS__) +#define assert_jd_gt(a, b, ...) assert_cmp(intmax_t, a, b, >, \ + <=, "jd", __VA_ARGS__) -#define assert_ju_eq(a, b, fmt...) assert_cmp(uintmax_t, a, b, ==, \ - !=, "ju", fmt) -#define assert_ju_ne(a, b, fmt...) assert_cmp(uintmax_t, a, b, !=, \ - ==, "ju", fmt) -#define assert_ju_lt(a, b, fmt...) assert_cmp(uintmax_t, a, b, <, \ - >=, "ju", fmt) -#define assert_ju_le(a, b, fmt...) assert_cmp(uintmax_t, a, b, <=, \ - >, "ju", fmt) -#define assert_ju_ge(a, b, fmt...) assert_cmp(uintmax_t, a, b, >=, \ - <, "ju", fmt) -#define assert_ju_gt(a, b, fmt...) assert_cmp(uintmax_t, a, b, >, \ - <=, "ju", fmt) +#define assert_ju_eq(a, b, ...) assert_cmp(uintmax_t, a, b, ==, \ + !=, "ju", __VA_ARGS__) +#define assert_ju_ne(a, b, ...) assert_cmp(uintmax_t, a, b, !=, \ + ==, "ju", __VA_ARGS__) +#define assert_ju_lt(a, b, ...) assert_cmp(uintmax_t, a, b, <, \ + >=, "ju", __VA_ARGS__) +#define assert_ju_le(a, b, ...) assert_cmp(uintmax_t, a, b, <=, \ + >, "ju", __VA_ARGS__) +#define assert_ju_ge(a, b, ...) assert_cmp(uintmax_t, a, b, >=, \ + <, "ju", __VA_ARGS__) +#define assert_ju_gt(a, b, ...) assert_cmp(uintmax_t, a, b, >, \ + <=, "ju", __VA_ARGS__) -#define assert_zd_eq(a, b, fmt...) assert_cmp(ssize_t, a, b, ==, \ - !=, "zd", fmt) -#define assert_zd_ne(a, b, fmt...) assert_cmp(ssize_t, a, b, !=, \ - ==, "zd", fmt) -#define assert_zd_lt(a, b, fmt...) assert_cmp(ssize_t, a, b, <, \ - >=, "zd", fmt) -#define assert_zd_le(a, b, fmt...) assert_cmp(ssize_t, a, b, <=, \ - >, "zd", fmt) -#define assert_zd_ge(a, b, fmt...) assert_cmp(ssize_t, a, b, >=, \ - <, "zd", fmt) -#define assert_zd_gt(a, b, fmt...) assert_cmp(ssize_t, a, b, >, \ - <=, "zd", fmt) +#define assert_zd_eq(a, b, ...) assert_cmp(ssize_t, a, b, ==, \ + !=, "zd", __VA_ARGS__) +#define assert_zd_ne(a, b, ...) assert_cmp(ssize_t, a, b, !=, \ + ==, "zd", __VA_ARGS__) +#define assert_zd_lt(a, b, ...) assert_cmp(ssize_t, a, b, <, \ + >=, "zd", __VA_ARGS__) +#define assert_zd_le(a, b, ...) assert_cmp(ssize_t, a, b, <=, \ + >, "zd", __VA_ARGS__) +#define assert_zd_ge(a, b, ...) assert_cmp(ssize_t, a, b, >=, \ + <, "zd", __VA_ARGS__) +#define assert_zd_gt(a, b, ...) assert_cmp(ssize_t, a, b, >, \ + <=, "zd", __VA_ARGS__) -#define assert_zu_eq(a, b, fmt...) assert_cmp(size_t, a, b, ==, \ - !=, "zu", fmt) -#define assert_zu_ne(a, b, fmt...) assert_cmp(size_t, a, b, !=, \ - ==, "zu", fmt) -#define assert_zu_lt(a, b, fmt...) assert_cmp(size_t, a, b, <, \ - >=, "zu", fmt) -#define assert_zu_le(a, b, fmt...) assert_cmp(size_t, a, b, <=, \ - >, "zu", fmt) -#define assert_zu_ge(a, b, fmt...) assert_cmp(size_t, a, b, >=, \ - <, "zu", fmt) -#define assert_zu_gt(a, b, fmt...) assert_cmp(size_t, a, b, >, \ - <=, "zu", fmt) +#define assert_zu_eq(a, b, ...) assert_cmp(size_t, a, b, ==, \ + !=, "zu", __VA_ARGS__) +#define assert_zu_ne(a, b, ...) assert_cmp(size_t, a, b, !=, \ + ==, "zu", __VA_ARGS__) +#define assert_zu_lt(a, b, ...) assert_cmp(size_t, a, b, <, \ + >=, "zu", __VA_ARGS__) +#define assert_zu_le(a, b, ...) assert_cmp(size_t, a, b, <=, \ + >, "zu", __VA_ARGS__) +#define assert_zu_ge(a, b, ...) assert_cmp(size_t, a, b, >=, \ + <, "zu", __VA_ARGS__) +#define assert_zu_gt(a, b, ...) assert_cmp(size_t, a, b, >, \ + <=, "zu", __VA_ARGS__) -#define assert_d32_eq(a, b, fmt...) assert_cmp(int32_t, a, b, ==, \ - !=, PRId32, fmt) -#define assert_d32_ne(a, b, fmt...) assert_cmp(int32_t, a, b, !=, \ - ==, PRId32, fmt) -#define assert_d32_lt(a, b, fmt...) assert_cmp(int32_t, a, b, <, \ - >=, PRId32, fmt) -#define assert_d32_le(a, b, fmt...) assert_cmp(int32_t, a, b, <=, \ - >, PRId32, fmt) -#define assert_d32_ge(a, b, fmt...) assert_cmp(int32_t, a, b, >=, \ - <, PRId32, fmt) -#define assert_d32_gt(a, b, fmt...) assert_cmp(int32_t, a, b, >, \ - <=, PRId32, fmt) +#define assert_d32_eq(a, b, ...) assert_cmp(int32_t, a, b, ==, \ + !=, PRId32, __VA_ARGS__) +#define assert_d32_ne(a, b, ...) assert_cmp(int32_t, a, b, !=, \ + ==, PRId32, __VA_ARGS__) +#define assert_d32_lt(a, b, ...) assert_cmp(int32_t, a, b, <, \ + >=, PRId32, __VA_ARGS__) +#define assert_d32_le(a, b, ...) assert_cmp(int32_t, a, b, <=, \ + >, PRId32, __VA_ARGS__) +#define assert_d32_ge(a, b, ...) assert_cmp(int32_t, a, b, >=, \ + <, PRId32, __VA_ARGS__) +#define assert_d32_gt(a, b, ...) assert_cmp(int32_t, a, b, >, \ + <=, PRId32, __VA_ARGS__) -#define assert_u32_eq(a, b, fmt...) assert_cmp(uint32_t, a, b, ==, \ - !=, PRIu32, fmt) -#define assert_u32_ne(a, b, fmt...) assert_cmp(uint32_t, a, b, !=, \ - ==, PRIu32, fmt) -#define assert_u32_lt(a, b, fmt...) assert_cmp(uint32_t, a, b, <, \ - >=, PRIu32, fmt) -#define assert_u32_le(a, b, fmt...) assert_cmp(uint32_t, a, b, <=, \ - >, PRIu32, fmt) -#define assert_u32_ge(a, b, fmt...) assert_cmp(uint32_t, a, b, >=, \ - <, PRIu32, fmt) -#define assert_u32_gt(a, b, fmt...) assert_cmp(uint32_t, a, b, >, \ - <=, PRIu32, fmt) +#define assert_u32_eq(a, b, ...) assert_cmp(uint32_t, a, b, ==, \ + !=, PRIu32, __VA_ARGS__) +#define assert_u32_ne(a, b, ...) assert_cmp(uint32_t, a, b, !=, \ + ==, PRIu32, __VA_ARGS__) +#define assert_u32_lt(a, b, ...) assert_cmp(uint32_t, a, b, <, \ + >=, PRIu32, __VA_ARGS__) +#define assert_u32_le(a, b, ...) assert_cmp(uint32_t, a, b, <=, \ + >, PRIu32, __VA_ARGS__) +#define assert_u32_ge(a, b, ...) assert_cmp(uint32_t, a, b, >=, \ + <, PRIu32, __VA_ARGS__) +#define assert_u32_gt(a, b, ...) assert_cmp(uint32_t, a, b, >, \ + <=, PRIu32, __VA_ARGS__) -#define assert_d64_eq(a, b, fmt...) assert_cmp(int64_t, a, b, ==, \ - !=, PRId64, fmt) -#define assert_d64_ne(a, b, fmt...) assert_cmp(int64_t, a, b, !=, \ - ==, PRId64, fmt) -#define assert_d64_lt(a, b, fmt...) assert_cmp(int64_t, a, b, <, \ - >=, PRId64, fmt) -#define assert_d64_le(a, b, fmt...) assert_cmp(int64_t, a, b, <=, \ - >, PRId64, fmt) -#define assert_d64_ge(a, b, fmt...) assert_cmp(int64_t, a, b, >=, \ - <, PRId64, fmt) -#define assert_d64_gt(a, b, fmt...) assert_cmp(int64_t, a, b, >, \ - <=, PRId64, fmt) +#define assert_d64_eq(a, b, ...) assert_cmp(int64_t, a, b, ==, \ + !=, PRId64, __VA_ARGS__) +#define assert_d64_ne(a, b, ...) assert_cmp(int64_t, a, b, !=, \ + ==, PRId64, __VA_ARGS__) +#define assert_d64_lt(a, b, ...) assert_cmp(int64_t, a, b, <, \ + >=, PRId64, __VA_ARGS__) +#define assert_d64_le(a, b, ...) assert_cmp(int64_t, a, b, <=, \ + >, PRId64, __VA_ARGS__) +#define assert_d64_ge(a, b, ...) assert_cmp(int64_t, a, b, >=, \ + <, PRId64, __VA_ARGS__) +#define assert_d64_gt(a, b, ...) assert_cmp(int64_t, a, b, >, \ + <=, PRId64, __VA_ARGS__) -#define assert_u64_eq(a, b, fmt...) assert_cmp(uint64_t, a, b, ==, \ - !=, PRIu64, fmt) -#define assert_u64_ne(a, b, fmt...) assert_cmp(uint64_t, a, b, !=, \ - ==, PRIu64, fmt) -#define assert_u64_lt(a, b, fmt...) assert_cmp(uint64_t, a, b, <, \ - >=, PRIu64, fmt) -#define assert_u64_le(a, b, fmt...) assert_cmp(uint64_t, a, b, <=, \ - >, PRIu64, fmt) -#define assert_u64_ge(a, b, fmt...) assert_cmp(uint64_t, a, b, >=, \ - <, PRIu64, fmt) -#define assert_u64_gt(a, b, fmt...) assert_cmp(uint64_t, a, b, >, \ - <=, PRIu64, fmt) +#define assert_u64_eq(a, b, ...) assert_cmp(uint64_t, a, b, ==, \ + !=, PRIu64, __VA_ARGS__) +#define assert_u64_ne(a, b, ...) assert_cmp(uint64_t, a, b, !=, \ + ==, PRIu64, __VA_ARGS__) +#define assert_u64_lt(a, b, ...) assert_cmp(uint64_t, a, b, <, \ + >=, PRIu64, __VA_ARGS__) +#define assert_u64_le(a, b, ...) assert_cmp(uint64_t, a, b, <=, \ + >, PRIu64, __VA_ARGS__) +#define assert_u64_ge(a, b, ...) assert_cmp(uint64_t, a, b, >=, \ + <, PRIu64, __VA_ARGS__) +#define assert_u64_gt(a, b, ...) assert_cmp(uint64_t, a, b, >, \ + <=, PRIu64, __VA_ARGS__) -#define assert_b_eq(a, b, fmt...) do { \ +#define assert_b_eq(a, b, ...) do { \ bool a_ = (a); \ bool b_ = (b); \ if (!(a_ == b_)) { \ @@ -222,11 +222,11 @@ __func__, __FILE__, __LINE__, \ #a, #b, a_ ? "true" : "false", \ b_ ? "true" : "false"); \ - malloc_snprintf(message, sizeof(message), fmt); \ + malloc_snprintf(message, sizeof(message), __VA_ARGS__); \ p_test_fail(prefix, message); \ } \ } while (0) -#define assert_b_ne(a, b, fmt...) do { \ +#define assert_b_ne(a, b, ...) do { \ bool a_ = (a); \ bool b_ = (b); \ if (!(a_ != b_)) { \ @@ -238,14 +238,14 @@ __func__, __FILE__, __LINE__, \ #a, #b, a_ ? "true" : "false", \ b_ ? "true" : "false"); \ - malloc_snprintf(message, sizeof(message), fmt); \ + malloc_snprintf(message, sizeof(message), __VA_ARGS__); \ p_test_fail(prefix, message); \ } \ } while (0) -#define assert_true(a, fmt...) assert_b_eq(a, true, fmt) -#define assert_false(a, fmt...) assert_b_eq(a, false, fmt) +#define assert_true(a, ...) assert_b_eq(a, true, __VA_ARGS__) +#define assert_false(a, ...) assert_b_eq(a, false, __VA_ARGS__) -#define assert_str_eq(a, b, fmt...) do { \ +#define assert_str_eq(a, b, ...) do { \ if (strcmp((a), (b))) { \ char prefix[ASSERT_BUFSIZE]; \ char message[ASSERT_BUFSIZE]; \ @@ -254,11 +254,11 @@ "(%s) same as (%s) --> " \ "\"%s\" differs from \"%s\": ", \ __func__, __FILE__, __LINE__, #a, #b, a, b); \ - malloc_snprintf(message, sizeof(message), fmt); \ + malloc_snprintf(message, sizeof(message), __VA_ARGS__); \ p_test_fail(prefix, message); \ } \ } while (0) -#define assert_str_ne(a, b, fmt...) do { \ +#define assert_str_ne(a, b, ...) do { \ if (!strcmp((a), (b))) { \ char prefix[ASSERT_BUFSIZE]; \ char message[ASSERT_BUFSIZE]; \ @@ -267,18 +267,18 @@ "(%s) differs from (%s) --> " \ "\"%s\" same as \"%s\": ", \ __func__, __FILE__, __LINE__, #a, #b, a, b); \ - malloc_snprintf(message, sizeof(message), fmt); \ + malloc_snprintf(message, sizeof(message), __VA_ARGS__); \ p_test_fail(prefix, message); \ } \ } while (0) -#define assert_not_reached(fmt...) do { \ +#define assert_not_reached(...) do { \ char prefix[ASSERT_BUFSIZE]; \ char message[ASSERT_BUFSIZE]; \ malloc_snprintf(prefix, sizeof(prefix), \ "%s:%s:%d: Unreachable code reached: ", \ __func__, __FILE__, __LINE__); \ - malloc_snprintf(message, sizeof(message), fmt); \ + malloc_snprintf(message, sizeof(message), __VA_ARGS__); \ p_test_fail(prefix, message); \ } while (0) @@ -308,8 +308,8 @@ label_test_end: \ p_test_fini(); \ } -#define test(tests...) \ - p_test(tests, NULL) +#define test(...) \ + p_test(__VA_ARGS__, NULL) #define test_skip_if(e) do { \ if (e) { \ diff --git a/test/unit/util.c b/test/unit/util.c index dc3cfe8a..c11d5984 100644 --- a/test/unit/util.c +++ b/test/unit/util.c @@ -141,8 +141,8 @@ TEST_BEGIN(test_malloc_snprintf_truncated) char buf[BUFLEN]; int result; size_t len; -#define TEST(expected_str_untruncated, fmt...) do { \ - result = malloc_snprintf(buf, len, fmt); \ +#define TEST(expected_str_untruncated, ...) do { \ + result = malloc_snprintf(buf, len, __VA_ARGS__); \ assert_d_eq(strncmp(buf, expected_str_untruncated, len-1), 0, \ "Unexpected string inequality (\"%s\" vs \"%s\")", \ buf, expected_str_untruncated); \ @@ -173,8 +173,8 @@ TEST_BEGIN(test_malloc_snprintf) #define BUFLEN 128 char buf[BUFLEN]; int result; -#define TEST(expected_str, fmt...) do { \ - result = malloc_snprintf(buf, sizeof(buf), fmt); \ +#define TEST(expected_str, ...) do { \ + result = malloc_snprintf(buf, sizeof(buf), __VA_ARGS__); \ assert_str_eq(buf, expected_str, "Unexpected output"); \ assert_d_eq(result, strlen(expected_str), "Unexpected result"); \ } while (0) From 86e2e703ffb3cc17e05af816df8895db62a9272e Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Wed, 21 May 2014 18:01:21 +0900 Subject: [PATCH 0448/3378] Rename "small" local variable, because windows headers #define it --- test/unit/stats.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/unit/stats.c b/test/unit/stats.c index ab87b29b..78c78cd5 100644 --- a/test/unit/stats.c +++ b/test/unit/stats.c @@ -97,7 +97,7 @@ TEST_END TEST_BEGIN(test_stats_arenas_summary) { unsigned arena; - void *small, *large; + void *little, *large; uint64_t epoch; size_t sz; int expected = config_stats ? 0 : ENOENT; @@ -108,8 +108,8 @@ TEST_BEGIN(test_stats_arenas_summary) assert_d_eq(mallctl("thread.arena", NULL, NULL, &arena, sizeof(arena)), 0, "Unexpected mallctl() failure"); - small = mallocx(SMALL_MAXCLASS, 0); - assert_ptr_not_null(small, "Unexpected mallocx() failure"); + little = mallocx(SMALL_MAXCLASS, 0); + assert_ptr_not_null(little, "Unexpected mallocx() failure"); large = mallocx(arena_maxclass, 0); assert_ptr_not_null(large, "Unexpected mallocx() failure"); @@ -137,7 +137,7 @@ TEST_BEGIN(test_stats_arenas_summary) "nmadvise should be no greater than purged"); } - dallocx(small, 0); + dallocx(little, 0); dallocx(large, 0); } TEST_END From 3a730dfd5062ecd6fc46b68f28342e14b461f560 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Wed, 21 May 2014 18:13:21 +0900 Subject: [PATCH 0449/3378] Avoid pointer arithmetic on void* in test/integration/rallocx.c --- test/integration/rallocx.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/test/integration/rallocx.c b/test/integration/rallocx.c index ee21aedf..e78e02f3 100644 --- a/test/integration/rallocx.c +++ b/test/integration/rallocx.c @@ -95,7 +95,8 @@ TEST_BEGIN(test_zero) "Expected zeroed memory"); } if (psz != qsz) { - memset(q+psz, FILL_BYTE, qsz-psz); + memset((void *)(uintptr_t)q+psz, FILL_BYTE, + qsz-psz); psz = qsz; } p = q; @@ -159,8 +160,9 @@ TEST_BEGIN(test_lg_align_and_zero) } else { assert_false(validate_fill(q, 0, 0, MAX_VALIDATE), "Expected zeroed memory"); - assert_false(validate_fill(q+sz-MAX_VALIDATE, 0, 0, - MAX_VALIDATE), "Expected zeroed memory"); + assert_false(validate_fill( + (void *)(uintptr_t)q+sz-MAX_VALIDATE, + 0, 0, MAX_VALIDATE), "Expected zeroed memory"); } p = q; } From a9df1ae622d0eb91a26208c03c51d0c518cce146 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Wed, 21 May 2014 16:34:02 +0900 Subject: [PATCH 0450/3378] Use ULL prefix instead of LLU for unsigned long longs MSVC only supports the former. --- include/jemalloc/internal/hash.h | 8 +- test/src/SFMT.c | 2 +- test/unit/SFMT.c | 2000 +++++++++++++++--------------- 3 files changed, 1005 insertions(+), 1005 deletions(-) diff --git a/include/jemalloc/internal/hash.h b/include/jemalloc/internal/hash.h index c7183ede..f2b3a16c 100644 --- a/include/jemalloc/internal/hash.h +++ b/include/jemalloc/internal/hash.h @@ -76,9 +76,9 @@ hash_fmix_64(uint64_t k) { k ^= k >> 33; - k *= QU(0xff51afd7ed558ccdLLU); + k *= QU(0xff51afd7ed558ccdULL); k ^= k >> 33; - k *= QU(0xc4ceb9fe1a85ec53LLU); + k *= QU(0xc4ceb9fe1a85ec53ULL); k ^= k >> 33; return (k); @@ -247,8 +247,8 @@ hash_x64_128(const void *key, const int len, const uint32_t seed, uint64_t h1 = seed; uint64_t h2 = seed; - const uint64_t c1 = QU(0x87c37b91114253d5LLU); - const uint64_t c2 = QU(0x4cf5ad432745937fLLU); + const uint64_t c1 = QU(0x87c37b91114253d5ULL); + const uint64_t c2 = QU(0x4cf5ad432745937fULL); /* body */ { diff --git a/test/src/SFMT.c b/test/src/SFMT.c index e6f8deec..d2cc9d1c 100644 --- a/test/src/SFMT.c +++ b/test/src/SFMT.c @@ -511,7 +511,7 @@ uint64_t gen_rand64(sfmt_t *ctx) { uint64_t gen_rand64_range(sfmt_t *ctx, uint64_t limit) { uint64_t ret, above; - above = 0xffffffffffffffffLLU - (0xffffffffffffffffLLU % limit); + above = 0xffffffffffffffffULL - (0xffffffffffffffffULL % limit); while (1) { ret = gen_rand64(ctx); if (ret < above) { diff --git a/test/unit/SFMT.c b/test/unit/SFMT.c index c57bd68d..0ad9c233 100644 --- a/test/unit/SFMT.c +++ b/test/unit/SFMT.c @@ -445,1008 +445,1008 @@ static const uint32_t init_by_array_32_expected[] = { 2750138839U, 3518055702U, 733072558U, 4169325400U, 788493625U }; static const uint64_t init_gen_rand_64_expected[] = { - QU(16924766246869039260LLU), QU( 8201438687333352714LLU), - QU( 2265290287015001750LLU), QU(18397264611805473832LLU), - QU( 3375255223302384358LLU), QU( 6345559975416828796LLU), - QU(18229739242790328073LLU), QU( 7596792742098800905LLU), - QU( 255338647169685981LLU), QU( 2052747240048610300LLU), - QU(18328151576097299343LLU), QU(12472905421133796567LLU), - QU(11315245349717600863LLU), QU(16594110197775871209LLU), - QU(15708751964632456450LLU), QU(10452031272054632535LLU), - QU(11097646720811454386LLU), QU( 4556090668445745441LLU), - QU(17116187693090663106LLU), QU(14931526836144510645LLU), - QU( 9190752218020552591LLU), QU( 9625800285771901401LLU), - QU(13995141077659972832LLU), QU( 5194209094927829625LLU), - QU( 4156788379151063303LLU), QU( 8523452593770139494LLU), - QU(14082382103049296727LLU), QU( 2462601863986088483LLU), - QU( 3030583461592840678LLU), QU( 5221622077872827681LLU), - QU( 3084210671228981236LLU), QU(13956758381389953823LLU), - QU(13503889856213423831LLU), QU(15696904024189836170LLU), - QU( 4612584152877036206LLU), QU( 6231135538447867881LLU), - QU(10172457294158869468LLU), QU( 6452258628466708150LLU), - QU(14044432824917330221LLU), QU( 370168364480044279LLU), - QU(10102144686427193359LLU), QU( 667870489994776076LLU), - QU( 2732271956925885858LLU), QU(18027788905977284151LLU), - QU(15009842788582923859LLU), QU( 7136357960180199542LLU), - QU(15901736243475578127LLU), QU(16951293785352615701LLU), - QU(10551492125243691632LLU), QU(17668869969146434804LLU), - QU(13646002971174390445LLU), QU( 9804471050759613248LLU), - QU( 5511670439655935493LLU), QU(18103342091070400926LLU), - QU(17224512747665137533LLU), QU(15534627482992618168LLU), - QU( 1423813266186582647LLU), QU(15821176807932930024LLU), - QU( 30323369733607156LLU), QU(11599382494723479403LLU), - QU( 653856076586810062LLU), QU( 3176437395144899659LLU), - QU(14028076268147963917LLU), QU(16156398271809666195LLU), - QU( 3166955484848201676LLU), QU( 5746805620136919390LLU), - QU(17297845208891256593LLU), QU(11691653183226428483LLU), - QU(17900026146506981577LLU), QU(15387382115755971042LLU), - QU(16923567681040845943LLU), QU( 8039057517199388606LLU), - QU(11748409241468629263LLU), QU( 794358245539076095LLU), - QU(13438501964693401242LLU), QU(14036803236515618962LLU), - QU( 5252311215205424721LLU), QU(17806589612915509081LLU), - QU( 6802767092397596006LLU), QU(14212120431184557140LLU), - QU( 1072951366761385712LLU), QU(13098491780722836296LLU), - QU( 9466676828710797353LLU), QU(12673056849042830081LLU), - QU(12763726623645357580LLU), QU(16468961652999309493LLU), - QU(15305979875636438926LLU), QU(17444713151223449734LLU), - QU( 5692214267627883674LLU), QU(13049589139196151505LLU), - QU( 880115207831670745LLU), QU( 1776529075789695498LLU), - QU(16695225897801466485LLU), QU(10666901778795346845LLU), - QU( 6164389346722833869LLU), QU( 2863817793264300475LLU), - QU( 9464049921886304754LLU), QU( 3993566636740015468LLU), - QU( 9983749692528514136LLU), QU(16375286075057755211LLU), - QU(16042643417005440820LLU), QU(11445419662923489877LLU), - QU( 7999038846885158836LLU), QU( 6721913661721511535LLU), - QU( 5363052654139357320LLU), QU( 1817788761173584205LLU), - QU(13290974386445856444LLU), QU( 4650350818937984680LLU), - QU( 8219183528102484836LLU), QU( 1569862923500819899LLU), - QU( 4189359732136641860LLU), QU(14202822961683148583LLU), - QU( 4457498315309429058LLU), QU(13089067387019074834LLU), - QU(11075517153328927293LLU), QU(10277016248336668389LLU), - QU( 7070509725324401122LLU), QU(17808892017780289380LLU), - QU(13143367339909287349LLU), QU( 1377743745360085151LLU), - QU( 5749341807421286485LLU), QU(14832814616770931325LLU), - QU( 7688820635324359492LLU), QU(10960474011539770045LLU), - QU( 81970066653179790LLU), QU(12619476072607878022LLU), - QU( 4419566616271201744LLU), QU(15147917311750568503LLU), - QU( 5549739182852706345LLU), QU( 7308198397975204770LLU), - QU(13580425496671289278LLU), QU(17070764785210130301LLU), - QU( 8202832846285604405LLU), QU( 6873046287640887249LLU), - QU( 6927424434308206114LLU), QU( 6139014645937224874LLU), - QU(10290373645978487639LLU), QU(15904261291701523804LLU), - QU( 9628743442057826883LLU), QU(18383429096255546714LLU), - QU( 4977413265753686967LLU), QU( 7714317492425012869LLU), - QU( 9025232586309926193LLU), QU(14627338359776709107LLU), - QU(14759849896467790763LLU), QU(10931129435864423252LLU), - QU( 4588456988775014359LLU), QU(10699388531797056724LLU), - QU( 468652268869238792LLU), QU( 5755943035328078086LLU), - QU( 2102437379988580216LLU), QU( 9986312786506674028LLU), - QU( 2654207180040945604LLU), QU( 8726634790559960062LLU), - QU( 100497234871808137LLU), QU( 2800137176951425819LLU), - QU( 6076627612918553487LLU), QU( 5780186919186152796LLU), - QU( 8179183595769929098LLU), QU( 6009426283716221169LLU), - QU( 2796662551397449358LLU), QU( 1756961367041986764LLU), - QU( 6972897917355606205LLU), QU(14524774345368968243LLU), - QU( 2773529684745706940LLU), QU( 4853632376213075959LLU), - QU( 4198177923731358102LLU), QU( 8271224913084139776LLU), - QU( 2741753121611092226LLU), QU(16782366145996731181LLU), - QU(15426125238972640790LLU), QU(13595497100671260342LLU), - QU( 3173531022836259898LLU), QU( 6573264560319511662LLU), - QU(18041111951511157441LLU), QU( 2351433581833135952LLU), - QU( 3113255578908173487LLU), QU( 1739371330877858784LLU), - QU(16046126562789165480LLU), QU( 8072101652214192925LLU), - QU(15267091584090664910LLU), QU( 9309579200403648940LLU), - QU( 5218892439752408722LLU), QU(14492477246004337115LLU), - QU(17431037586679770619LLU), QU( 7385248135963250480LLU), - QU( 9580144956565560660LLU), QU( 4919546228040008720LLU), - QU(15261542469145035584LLU), QU(18233297270822253102LLU), - QU( 5453248417992302857LLU), QU( 9309519155931460285LLU), - QU(10342813012345291756LLU), QU(15676085186784762381LLU), - QU(15912092950691300645LLU), QU( 9371053121499003195LLU), - QU( 9897186478226866746LLU), QU(14061858287188196327LLU), - QU( 122575971620788119LLU), QU(12146750969116317754LLU), - QU( 4438317272813245201LLU), QU( 8332576791009527119LLU), - QU(13907785691786542057LLU), QU(10374194887283287467LLU), - QU( 2098798755649059566LLU), QU( 3416235197748288894LLU), - QU( 8688269957320773484LLU), QU( 7503964602397371571LLU), - QU(16724977015147478236LLU), QU( 9461512855439858184LLU), - QU(13259049744534534727LLU), QU( 3583094952542899294LLU), - QU( 8764245731305528292LLU), QU(13240823595462088985LLU), - QU(13716141617617910448LLU), QU(18114969519935960955LLU), - QU( 2297553615798302206LLU), QU( 4585521442944663362LLU), - QU(17776858680630198686LLU), QU( 4685873229192163363LLU), - QU( 152558080671135627LLU), QU(15424900540842670088LLU), - QU(13229630297130024108LLU), QU(17530268788245718717LLU), - QU(16675633913065714144LLU), QU( 3158912717897568068LLU), - QU(15399132185380087288LLU), QU( 7401418744515677872LLU), - QU(13135412922344398535LLU), QU( 6385314346100509511LLU), - QU(13962867001134161139LLU), QU(10272780155442671999LLU), - QU(12894856086597769142LLU), QU(13340877795287554994LLU), - QU(12913630602094607396LLU), QU(12543167911119793857LLU), - QU(17343570372251873096LLU), QU(10959487764494150545LLU), - QU( 6966737953093821128LLU), QU(13780699135496988601LLU), - QU( 4405070719380142046LLU), QU(14923788365607284982LLU), - QU( 2869487678905148380LLU), QU( 6416272754197188403LLU), - QU(15017380475943612591LLU), QU( 1995636220918429487LLU), - QU( 3402016804620122716LLU), QU(15800188663407057080LLU), - QU(11362369990390932882LLU), QU(15262183501637986147LLU), - QU(10239175385387371494LLU), QU( 9352042420365748334LLU), - QU( 1682457034285119875LLU), QU( 1724710651376289644LLU), - QU( 2038157098893817966LLU), QU( 9897825558324608773LLU), - QU( 1477666236519164736LLU), QU(16835397314511233640LLU), - QU(10370866327005346508LLU), QU(10157504370660621982LLU), - QU(12113904045335882069LLU), QU(13326444439742783008LLU), - QU(11302769043000765804LLU), QU(13594979923955228484LLU), - QU(11779351762613475968LLU), QU( 3786101619539298383LLU), - QU( 8021122969180846063LLU), QU(15745904401162500495LLU), - QU(10762168465993897267LLU), QU(13552058957896319026LLU), - QU(11200228655252462013LLU), QU( 5035370357337441226LLU), - QU( 7593918984545500013LLU), QU( 5418554918361528700LLU), - QU( 4858270799405446371LLU), QU( 9974659566876282544LLU), - QU(18227595922273957859LLU), QU( 2772778443635656220LLU), - QU(14285143053182085385LLU), QU( 9939700992429600469LLU), - QU(12756185904545598068LLU), QU( 2020783375367345262LLU), - QU( 57026775058331227LLU), QU( 950827867930065454LLU), - QU( 6602279670145371217LLU), QU( 2291171535443566929LLU), - QU( 5832380724425010313LLU), QU( 1220343904715982285LLU), - QU(17045542598598037633LLU), QU(15460481779702820971LLU), - QU(13948388779949365130LLU), QU(13975040175430829518LLU), - QU(17477538238425541763LLU), QU(11104663041851745725LLU), - QU(15860992957141157587LLU), QU(14529434633012950138LLU), - QU( 2504838019075394203LLU), QU( 7512113882611121886LLU), - QU( 4859973559980886617LLU), QU( 1258601555703250219LLU), - QU(15594548157514316394LLU), QU( 4516730171963773048LLU), - QU(11380103193905031983LLU), QU( 6809282239982353344LLU), - QU(18045256930420065002LLU), QU( 2453702683108791859LLU), - QU( 977214582986981460LLU), QU( 2006410402232713466LLU), - QU( 6192236267216378358LLU), QU( 3429468402195675253LLU), - QU(18146933153017348921LLU), QU(17369978576367231139LLU), - QU( 1246940717230386603LLU), QU(11335758870083327110LLU), - QU(14166488801730353682LLU), QU( 9008573127269635732LLU), - QU(10776025389820643815LLU), QU(15087605441903942962LLU), - QU( 1359542462712147922LLU), QU(13898874411226454206LLU), - QU(17911176066536804411LLU), QU( 9435590428600085274LLU), - QU( 294488509967864007LLU), QU( 8890111397567922046LLU), - QU( 7987823476034328778LLU), QU(13263827582440967651LLU), - QU( 7503774813106751573LLU), QU(14974747296185646837LLU), - QU( 8504765037032103375LLU), QU(17340303357444536213LLU), - QU( 7704610912964485743LLU), QU( 8107533670327205061LLU), - QU( 9062969835083315985LLU), QU(16968963142126734184LLU), - QU(12958041214190810180LLU), QU( 2720170147759570200LLU), - QU( 2986358963942189566LLU), QU(14884226322219356580LLU), - QU( 286224325144368520LLU), QU(11313800433154279797LLU), - QU(18366849528439673248LLU), QU(17899725929482368789LLU), - QU( 3730004284609106799LLU), QU( 1654474302052767205LLU), - QU( 5006698007047077032LLU), QU( 8196893913601182838LLU), - QU(15214541774425211640LLU), QU(17391346045606626073LLU), - QU( 8369003584076969089LLU), QU( 3939046733368550293LLU), - QU(10178639720308707785LLU), QU( 2180248669304388697LLU), - QU( 62894391300126322LLU), QU( 9205708961736223191LLU), - QU( 6837431058165360438LLU), QU( 3150743890848308214LLU), - QU(17849330658111464583LLU), QU(12214815643135450865LLU), - QU(13410713840519603402LLU), QU( 3200778126692046802LLU), - QU(13354780043041779313LLU), QU( 800850022756886036LLU), - QU(15660052933953067433LLU), QU( 6572823544154375676LLU), - QU(11030281857015819266LLU), QU(12682241941471433835LLU), - QU(11654136407300274693LLU), QU( 4517795492388641109LLU), - QU( 9757017371504524244LLU), QU(17833043400781889277LLU), - QU(12685085201747792227LLU), QU(10408057728835019573LLU), - QU( 98370418513455221LLU), QU( 6732663555696848598LLU), - QU(13248530959948529780LLU), QU( 3530441401230622826LLU), - QU(18188251992895660615LLU), QU( 1847918354186383756LLU), - QU( 1127392190402660921LLU), QU(11293734643143819463LLU), - QU( 3015506344578682982LLU), QU(13852645444071153329LLU), - QU( 2121359659091349142LLU), QU( 1294604376116677694LLU), - QU( 5616576231286352318LLU), QU( 7112502442954235625LLU), - QU(11676228199551561689LLU), QU(12925182803007305359LLU), - QU( 7852375518160493082LLU), QU( 1136513130539296154LLU), - QU( 5636923900916593195LLU), QU( 3221077517612607747LLU), - QU(17784790465798152513LLU), QU( 3554210049056995938LLU), - QU(17476839685878225874LLU), QU( 3206836372585575732LLU), - QU( 2765333945644823430LLU), QU(10080070903718799528LLU), - QU( 5412370818878286353LLU), QU( 9689685887726257728LLU), - QU( 8236117509123533998LLU), QU( 1951139137165040214LLU), - QU( 4492205209227980349LLU), QU(16541291230861602967LLU), - QU( 1424371548301437940LLU), QU( 9117562079669206794LLU), - QU(14374681563251691625LLU), QU(13873164030199921303LLU), - QU( 6680317946770936731LLU), QU(15586334026918276214LLU), - QU(10896213950976109802LLU), QU( 9506261949596413689LLU), - QU( 9903949574308040616LLU), QU( 6038397344557204470LLU), - QU( 174601465422373648LLU), QU(15946141191338238030LLU), - QU(17142225620992044937LLU), QU( 7552030283784477064LLU), - QU( 2947372384532947997LLU), QU( 510797021688197711LLU), - QU( 4962499439249363461LLU), QU( 23770320158385357LLU), - QU( 959774499105138124LLU), QU( 1468396011518788276LLU), - QU( 2015698006852312308LLU), QU( 4149400718489980136LLU), - QU( 5992916099522371188LLU), QU(10819182935265531076LLU), - QU(16189787999192351131LLU), QU( 342833961790261950LLU), - QU(12470830319550495336LLU), QU(18128495041912812501LLU), - QU( 1193600899723524337LLU), QU( 9056793666590079770LLU), - QU( 2154021227041669041LLU), QU( 4963570213951235735LLU), - QU( 4865075960209211409LLU), QU( 2097724599039942963LLU), - QU( 2024080278583179845LLU), QU(11527054549196576736LLU), - QU(10650256084182390252LLU), QU( 4808408648695766755LLU), - QU( 1642839215013788844LLU), QU(10607187948250398390LLU), - QU( 7076868166085913508LLU), QU( 730522571106887032LLU), - QU(12500579240208524895LLU), QU( 4484390097311355324LLU), - QU(15145801330700623870LLU), QU( 8055827661392944028LLU), - QU( 5865092976832712268LLU), QU(15159212508053625143LLU), - QU( 3560964582876483341LLU), QU( 4070052741344438280LLU), - QU( 6032585709886855634LLU), QU(15643262320904604873LLU), - QU( 2565119772293371111LLU), QU( 318314293065348260LLU), - QU(15047458749141511872LLU), QU( 7772788389811528730LLU), - QU( 7081187494343801976LLU), QU( 6465136009467253947LLU), - QU(10425940692543362069LLU), QU( 554608190318339115LLU), - QU(14796699860302125214LLU), QU( 1638153134431111443LLU), - QU(10336967447052276248LLU), QU( 8412308070396592958LLU), - QU( 4004557277152051226LLU), QU( 8143598997278774834LLU), - QU(16413323996508783221LLU), QU(13139418758033994949LLU), - QU( 9772709138335006667LLU), QU( 2818167159287157659LLU), - QU(17091740573832523669LLU), QU(14629199013130751608LLU), - QU(18268322711500338185LLU), QU( 8290963415675493063LLU), - QU( 8830864907452542588LLU), QU( 1614839084637494849LLU), - QU(14855358500870422231LLU), QU( 3472996748392519937LLU), - QU(15317151166268877716LLU), QU( 5825895018698400362LLU), - QU(16730208429367544129LLU), QU(10481156578141202800LLU), - QU( 4746166512382823750LLU), QU(12720876014472464998LLU), - QU( 8825177124486735972LLU), QU(13733447296837467838LLU), - QU( 6412293741681359625LLU), QU( 8313213138756135033LLU), - QU(11421481194803712517LLU), QU( 7997007691544174032LLU), - QU( 6812963847917605930LLU), QU( 9683091901227558641LLU), - QU(14703594165860324713LLU), QU( 1775476144519618309LLU), - QU( 2724283288516469519LLU), QU( 717642555185856868LLU), - QU( 8736402192215092346LLU), QU(11878800336431381021LLU), - QU( 4348816066017061293LLU), QU( 6115112756583631307LLU), - QU( 9176597239667142976LLU), QU(12615622714894259204LLU), - QU(10283406711301385987LLU), QU( 5111762509485379420LLU), - QU( 3118290051198688449LLU), QU( 7345123071632232145LLU), - QU( 9176423451688682359LLU), QU( 4843865456157868971LLU), - QU(12008036363752566088LLU), QU(12058837181919397720LLU), - QU( 2145073958457347366LLU), QU( 1526504881672818067LLU), - QU( 3488830105567134848LLU), QU(13208362960674805143LLU), - QU( 4077549672899572192LLU), QU( 7770995684693818365LLU), - QU( 1398532341546313593LLU), QU(12711859908703927840LLU), - QU( 1417561172594446813LLU), QU(17045191024194170604LLU), - QU( 4101933177604931713LLU), QU(14708428834203480320LLU), - QU(17447509264469407724LLU), QU(14314821973983434255LLU), - QU(17990472271061617265LLU), QU( 5087756685841673942LLU), - QU(12797820586893859939LLU), QU( 1778128952671092879LLU), - QU( 3535918530508665898LLU), QU( 9035729701042481301LLU), - QU(14808661568277079962LLU), QU(14587345077537747914LLU), - QU(11920080002323122708LLU), QU( 6426515805197278753LLU), - QU( 3295612216725984831LLU), QU(11040722532100876120LLU), - QU(12305952936387598754LLU), QU(16097391899742004253LLU), - QU( 4908537335606182208LLU), QU(12446674552196795504LLU), - QU(16010497855816895177LLU), QU( 9194378874788615551LLU), - QU( 3382957529567613384LLU), QU( 5154647600754974077LLU), - QU( 9801822865328396141LLU), QU( 9023662173919288143LLU), - QU(17623115353825147868LLU), QU( 8238115767443015816LLU), - QU(15811444159859002560LLU), QU( 9085612528904059661LLU), - QU( 6888601089398614254LLU), QU( 258252992894160189LLU), - QU( 6704363880792428622LLU), QU( 6114966032147235763LLU), - QU(11075393882690261875LLU), QU( 8797664238933620407LLU), - QU( 5901892006476726920LLU), QU( 5309780159285518958LLU), - QU(14940808387240817367LLU), QU(14642032021449656698LLU), - QU( 9808256672068504139LLU), QU( 3670135111380607658LLU), - QU(11211211097845960152LLU), QU( 1474304506716695808LLU), - QU(15843166204506876239LLU), QU( 7661051252471780561LLU), - QU(10170905502249418476LLU), QU( 7801416045582028589LLU), - QU( 2763981484737053050LLU), QU( 9491377905499253054LLU), - QU(16201395896336915095LLU), QU( 9256513756442782198LLU), - QU( 5411283157972456034LLU), QU( 5059433122288321676LLU), - QU( 4327408006721123357LLU), QU( 9278544078834433377LLU), - QU( 7601527110882281612LLU), QU(11848295896975505251LLU), - QU(12096998801094735560LLU), QU(14773480339823506413LLU), - QU(15586227433895802149LLU), QU(12786541257830242872LLU), - QU( 6904692985140503067LLU), QU( 5309011515263103959LLU), - QU(12105257191179371066LLU), QU(14654380212442225037LLU), - QU( 2556774974190695009LLU), QU( 4461297399927600261LLU), - QU(14888225660915118646LLU), QU(14915459341148291824LLU), - QU( 2738802166252327631LLU), QU( 6047155789239131512LLU), - QU(12920545353217010338LLU), QU(10697617257007840205LLU), - QU( 2751585253158203504LLU), QU(13252729159780047496LLU), - QU(14700326134672815469LLU), QU(14082527904374600529LLU), - QU(16852962273496542070LLU), QU(17446675504235853907LLU), - QU(15019600398527572311LLU), QU(12312781346344081551LLU), - QU(14524667935039810450LLU), QU( 5634005663377195738LLU), - QU(11375574739525000569LLU), QU( 2423665396433260040LLU), - QU( 5222836914796015410LLU), QU( 4397666386492647387LLU), - QU( 4619294441691707638LLU), QU( 665088602354770716LLU), - QU(13246495665281593610LLU), QU( 6564144270549729409LLU), - QU(10223216188145661688LLU), QU( 3961556907299230585LLU), - QU(11543262515492439914LLU), QU(16118031437285993790LLU), - QU( 7143417964520166465LLU), QU(13295053515909486772LLU), - QU( 40434666004899675LLU), QU(17127804194038347164LLU), - QU( 8599165966560586269LLU), QU( 8214016749011284903LLU), - QU(13725130352140465239LLU), QU( 5467254474431726291LLU), - QU( 7748584297438219877LLU), QU(16933551114829772472LLU), - QU( 2169618439506799400LLU), QU( 2169787627665113463LLU), - QU(17314493571267943764LLU), QU(18053575102911354912LLU), - QU(11928303275378476973LLU), QU(11593850925061715550LLU), - QU(17782269923473589362LLU), QU( 3280235307704747039LLU), - QU( 6145343578598685149LLU), QU(17080117031114086090LLU), - QU(18066839902983594755LLU), QU( 6517508430331020706LLU), - QU( 8092908893950411541LLU), QU(12558378233386153732LLU), - QU( 4476532167973132976LLU), QU(16081642430367025016LLU), - QU( 4233154094369139361LLU), QU( 8693630486693161027LLU), - QU(11244959343027742285LLU), QU(12273503967768513508LLU), - QU(14108978636385284876LLU), QU( 7242414665378826984LLU), - QU( 6561316938846562432LLU), QU( 8601038474994665795LLU), - QU(17532942353612365904LLU), QU(17940076637020912186LLU), - QU( 7340260368823171304LLU), QU( 7061807613916067905LLU), - QU(10561734935039519326LLU), QU(17990796503724650862LLU), - QU( 6208732943911827159LLU), QU( 359077562804090617LLU), - QU(14177751537784403113LLU), QU(10659599444915362902LLU), - QU(15081727220615085833LLU), QU(13417573895659757486LLU), - QU(15513842342017811524LLU), QU(11814141516204288231LLU), - QU( 1827312513875101814LLU), QU( 2804611699894603103LLU), - QU(17116500469975602763LLU), QU(12270191815211952087LLU), - QU(12256358467786024988LLU), QU(18435021722453971267LLU), - QU( 671330264390865618LLU), QU( 476504300460286050LLU), - QU(16465470901027093441LLU), QU( 4047724406247136402LLU), - QU( 1322305451411883346LLU), QU( 1388308688834322280LLU), - QU( 7303989085269758176LLU), QU( 9323792664765233642LLU), - QU( 4542762575316368936LLU), QU(17342696132794337618LLU), - QU( 4588025054768498379LLU), QU(13415475057390330804LLU), - QU(17880279491733405570LLU), QU(10610553400618620353LLU), - QU( 3180842072658960139LLU), QU(13002966655454270120LLU), - QU( 1665301181064982826LLU), QU( 7083673946791258979LLU), - QU( 190522247122496820LLU), QU(17388280237250677740LLU), - QU( 8430770379923642945LLU), QU(12987180971921668584LLU), - QU( 2311086108365390642LLU), QU( 2870984383579822345LLU), - QU(14014682609164653318LLU), QU(14467187293062251484LLU), - QU( 192186361147413298LLU), QU(15171951713531796524LLU), - QU( 9900305495015948728LLU), QU(17958004775615466344LLU), - QU(14346380954498606514LLU), QU(18040047357617407096LLU), - QU( 5035237584833424532LLU), QU(15089555460613972287LLU), - QU( 4131411873749729831LLU), QU( 1329013581168250330LLU), - QU(10095353333051193949LLU), QU(10749518561022462716LLU), - QU( 9050611429810755847LLU), QU(15022028840236655649LLU), - QU( 8775554279239748298LLU), QU(13105754025489230502LLU), - QU(15471300118574167585LLU), QU( 89864764002355628LLU), - QU( 8776416323420466637LLU), QU( 5280258630612040891LLU), - QU( 2719174488591862912LLU), QU( 7599309137399661994LLU), - QU(15012887256778039979LLU), QU(14062981725630928925LLU), - QU(12038536286991689603LLU), QU( 7089756544681775245LLU), - QU(10376661532744718039LLU), QU( 1265198725901533130LLU), - QU(13807996727081142408LLU), QU( 2935019626765036403LLU), - QU( 7651672460680700141LLU), QU( 3644093016200370795LLU), - QU( 2840982578090080674LLU), QU(17956262740157449201LLU), - QU(18267979450492880548LLU), QU(11799503659796848070LLU), - QU( 9942537025669672388LLU), QU(11886606816406990297LLU), - QU( 5488594946437447576LLU), QU( 7226714353282744302LLU), - QU( 3784851653123877043LLU), QU( 878018453244803041LLU), - QU(12110022586268616085LLU), QU( 734072179404675123LLU), - QU(11869573627998248542LLU), QU( 469150421297783998LLU), - QU( 260151124912803804LLU), QU(11639179410120968649LLU), - QU( 9318165193840846253LLU), QU(12795671722734758075LLU), - QU(15318410297267253933LLU), QU( 691524703570062620LLU), - QU( 5837129010576994601LLU), QU(15045963859726941052LLU), - QU( 5850056944932238169LLU), QU(12017434144750943807LLU), - QU( 7447139064928956574LLU), QU( 3101711812658245019LLU), - QU(16052940704474982954LLU), QU(18195745945986994042LLU), - QU( 8932252132785575659LLU), QU(13390817488106794834LLU), - QU(11582771836502517453LLU), QU( 4964411326683611686LLU), - QU( 2195093981702694011LLU), QU(14145229538389675669LLU), - QU(16459605532062271798LLU), QU( 866316924816482864LLU), - QU( 4593041209937286377LLU), QU( 8415491391910972138LLU), - QU( 4171236715600528969LLU), QU(16637569303336782889LLU), - QU( 2002011073439212680LLU), QU(17695124661097601411LLU), - QU( 4627687053598611702LLU), QU( 7895831936020190403LLU), - QU( 8455951300917267802LLU), QU( 2923861649108534854LLU), - QU( 8344557563927786255LLU), QU( 6408671940373352556LLU), - QU(12210227354536675772LLU), QU(14294804157294222295LLU), - QU(10103022425071085127LLU), QU(10092959489504123771LLU), - QU( 6554774405376736268LLU), QU(12629917718410641774LLU), - QU( 6260933257596067126LLU), QU( 2460827021439369673LLU), - QU( 2541962996717103668LLU), QU( 597377203127351475LLU), - QU( 5316984203117315309LLU), QU( 4811211393563241961LLU), - QU(13119698597255811641LLU), QU( 8048691512862388981LLU), - QU(10216818971194073842LLU), QU( 4612229970165291764LLU), - QU(10000980798419974770LLU), QU( 6877640812402540687LLU), - QU( 1488727563290436992LLU), QU( 2227774069895697318LLU), - QU(11237754507523316593LLU), QU(13478948605382290972LLU), - QU( 1963583846976858124LLU), QU( 5512309205269276457LLU), - QU( 3972770164717652347LLU), QU( 3841751276198975037LLU), - QU(10283343042181903117LLU), QU( 8564001259792872199LLU), - QU(16472187244722489221LLU), QU( 8953493499268945921LLU), - QU( 3518747340357279580LLU), QU( 4003157546223963073LLU), - QU( 3270305958289814590LLU), QU( 3966704458129482496LLU), - QU( 8122141865926661939LLU), QU(14627734748099506653LLU), - QU(13064426990862560568LLU), QU( 2414079187889870829LLU), - QU( 5378461209354225306LLU), QU(10841985740128255566LLU), - QU( 538582442885401738LLU), QU( 7535089183482905946LLU), - QU(16117559957598879095LLU), QU( 8477890721414539741LLU), - QU( 1459127491209533386LLU), QU(17035126360733620462LLU), - QU( 8517668552872379126LLU), QU(10292151468337355014LLU), - QU(17081267732745344157LLU), QU(13751455337946087178LLU), - QU(14026945459523832966LLU), QU( 6653278775061723516LLU), - QU(10619085543856390441LLU), QU( 2196343631481122885LLU), - QU(10045966074702826136LLU), QU(10082317330452718282LLU), - QU( 5920859259504831242LLU), QU( 9951879073426540617LLU), - QU( 7074696649151414158LLU), QU(15808193543879464318LLU), - QU( 7385247772746953374LLU), QU( 3192003544283864292LLU), - QU(18153684490917593847LLU), QU(12423498260668568905LLU), - QU(10957758099756378169LLU), QU(11488762179911016040LLU), - QU( 2099931186465333782LLU), QU(11180979581250294432LLU), - QU( 8098916250668367933LLU), QU( 3529200436790763465LLU), - QU(12988418908674681745LLU), QU( 6147567275954808580LLU), - QU( 3207503344604030989LLU), QU(10761592604898615360LLU), - QU( 229854861031893504LLU), QU( 8809853962667144291LLU), - QU(13957364469005693860LLU), QU( 7634287665224495886LLU), - QU(12353487366976556874LLU), QU( 1134423796317152034LLU), - QU( 2088992471334107068LLU), QU( 7393372127190799698LLU), - QU( 1845367839871058391LLU), QU( 207922563987322884LLU), - QU(11960870813159944976LLU), QU(12182120053317317363LLU), - QU(17307358132571709283LLU), QU(13871081155552824936LLU), - QU(18304446751741566262LLU), QU( 7178705220184302849LLU), - QU(10929605677758824425LLU), QU(16446976977835806844LLU), - QU(13723874412159769044LLU), QU( 6942854352100915216LLU), - QU( 1726308474365729390LLU), QU( 2150078766445323155LLU), - QU(15345558947919656626LLU), QU(12145453828874527201LLU), - QU( 2054448620739726849LLU), QU( 2740102003352628137LLU), - QU(11294462163577610655LLU), QU( 756164283387413743LLU), - QU(17841144758438810880LLU), QU(10802406021185415861LLU), - QU( 8716455530476737846LLU), QU( 6321788834517649606LLU), - QU(14681322910577468426LLU), QU(17330043563884336387LLU), - QU(12701802180050071614LLU), QU(14695105111079727151LLU), - QU( 5112098511654172830LLU), QU( 4957505496794139973LLU), - QU( 8270979451952045982LLU), QU(12307685939199120969LLU), - QU(12425799408953443032LLU), QU( 8376410143634796588LLU), - QU(16621778679680060464LLU), QU( 3580497854566660073LLU), - QU( 1122515747803382416LLU), QU( 857664980960597599LLU), - QU( 6343640119895925918LLU), QU(12878473260854462891LLU), - QU(10036813920765722626LLU), QU(14451335468363173812LLU), - QU( 5476809692401102807LLU), QU(16442255173514366342LLU), - QU(13060203194757167104LLU), QU(14354124071243177715LLU), - QU(15961249405696125227LLU), QU(13703893649690872584LLU), - QU( 363907326340340064LLU), QU( 6247455540491754842LLU), - QU(12242249332757832361LLU), QU( 156065475679796717LLU), - QU( 9351116235749732355LLU), QU( 4590350628677701405LLU), - QU( 1671195940982350389LLU), QU(13501398458898451905LLU), - QU( 6526341991225002255LLU), QU( 1689782913778157592LLU), - QU( 7439222350869010334LLU), QU(13975150263226478308LLU), - QU(11411961169932682710LLU), QU(17204271834833847277LLU), - QU( 541534742544435367LLU), QU( 6591191931218949684LLU), - QU( 2645454775478232486LLU), QU( 4322857481256485321LLU), - QU( 8477416487553065110LLU), QU(12902505428548435048LLU), - QU( 971445777981341415LLU), QU(14995104682744976712LLU), - QU( 4243341648807158063LLU), QU( 8695061252721927661LLU), - QU( 5028202003270177222LLU), QU( 2289257340915567840LLU), - QU(13870416345121866007LLU), QU(13994481698072092233LLU), - QU( 6912785400753196481LLU), QU( 2278309315841980139LLU), - QU( 4329765449648304839LLU), QU( 5963108095785485298LLU), - QU( 4880024847478722478LLU), QU(16015608779890240947LLU), - QU( 1866679034261393544LLU), QU( 914821179919731519LLU), - QU( 9643404035648760131LLU), QU( 2418114953615593915LLU), - QU( 944756836073702374LLU), QU(15186388048737296834LLU), - QU( 7723355336128442206LLU), QU( 7500747479679599691LLU), - QU(18013961306453293634LLU), QU( 2315274808095756456LLU), - QU(13655308255424029566LLU), QU(17203800273561677098LLU), - QU( 1382158694422087756LLU), QU( 5090390250309588976LLU), - QU( 517170818384213989LLU), QU( 1612709252627729621LLU), - QU( 1330118955572449606LLU), QU( 300922478056709885LLU), - QU(18115693291289091987LLU), QU(13491407109725238321LLU), - QU(15293714633593827320LLU), QU( 5151539373053314504LLU), - QU( 5951523243743139207LLU), QU(14459112015249527975LLU), - QU( 5456113959000700739LLU), QU( 3877918438464873016LLU), - QU(12534071654260163555LLU), QU(15871678376893555041LLU), - QU(11005484805712025549LLU), QU(16353066973143374252LLU), - QU( 4358331472063256685LLU), QU( 8268349332210859288LLU), - QU(12485161590939658075LLU), QU(13955993592854471343LLU), - QU( 5911446886848367039LLU), QU(14925834086813706974LLU), - QU( 6590362597857994805LLU), QU( 1280544923533661875LLU), - QU( 1637756018947988164LLU), QU( 4734090064512686329LLU), - QU(16693705263131485912LLU), QU( 6834882340494360958LLU), - QU( 8120732176159658505LLU), QU( 2244371958905329346LLU), - QU(10447499707729734021LLU), QU( 7318742361446942194LLU), - QU( 8032857516355555296LLU), QU(14023605983059313116LLU), - QU( 1032336061815461376LLU), QU( 9840995337876562612LLU), - QU( 9869256223029203587LLU), QU(12227975697177267636LLU), - QU(12728115115844186033LLU), QU( 7752058479783205470LLU), - QU( 729733219713393087LLU), QU(12954017801239007622LLU) + QU(16924766246869039260ULL), QU( 8201438687333352714ULL), + QU( 2265290287015001750ULL), QU(18397264611805473832ULL), + QU( 3375255223302384358ULL), QU( 6345559975416828796ULL), + QU(18229739242790328073ULL), QU( 7596792742098800905ULL), + QU( 255338647169685981ULL), QU( 2052747240048610300ULL), + QU(18328151576097299343ULL), QU(12472905421133796567ULL), + QU(11315245349717600863ULL), QU(16594110197775871209ULL), + QU(15708751964632456450ULL), QU(10452031272054632535ULL), + QU(11097646720811454386ULL), QU( 4556090668445745441ULL), + QU(17116187693090663106ULL), QU(14931526836144510645ULL), + QU( 9190752218020552591ULL), QU( 9625800285771901401ULL), + QU(13995141077659972832ULL), QU( 5194209094927829625ULL), + QU( 4156788379151063303ULL), QU( 8523452593770139494ULL), + QU(14082382103049296727ULL), QU( 2462601863986088483ULL), + QU( 3030583461592840678ULL), QU( 5221622077872827681ULL), + QU( 3084210671228981236ULL), QU(13956758381389953823ULL), + QU(13503889856213423831ULL), QU(15696904024189836170ULL), + QU( 4612584152877036206ULL), QU( 6231135538447867881ULL), + QU(10172457294158869468ULL), QU( 6452258628466708150ULL), + QU(14044432824917330221ULL), QU( 370168364480044279ULL), + QU(10102144686427193359ULL), QU( 667870489994776076ULL), + QU( 2732271956925885858ULL), QU(18027788905977284151ULL), + QU(15009842788582923859ULL), QU( 7136357960180199542ULL), + QU(15901736243475578127ULL), QU(16951293785352615701ULL), + QU(10551492125243691632ULL), QU(17668869969146434804ULL), + QU(13646002971174390445ULL), QU( 9804471050759613248ULL), + QU( 5511670439655935493ULL), QU(18103342091070400926ULL), + QU(17224512747665137533ULL), QU(15534627482992618168ULL), + QU( 1423813266186582647ULL), QU(15821176807932930024ULL), + QU( 30323369733607156ULL), QU(11599382494723479403ULL), + QU( 653856076586810062ULL), QU( 3176437395144899659ULL), + QU(14028076268147963917ULL), QU(16156398271809666195ULL), + QU( 3166955484848201676ULL), QU( 5746805620136919390ULL), + QU(17297845208891256593ULL), QU(11691653183226428483ULL), + QU(17900026146506981577ULL), QU(15387382115755971042ULL), + QU(16923567681040845943ULL), QU( 8039057517199388606ULL), + QU(11748409241468629263ULL), QU( 794358245539076095ULL), + QU(13438501964693401242ULL), QU(14036803236515618962ULL), + QU( 5252311215205424721ULL), QU(17806589612915509081ULL), + QU( 6802767092397596006ULL), QU(14212120431184557140ULL), + QU( 1072951366761385712ULL), QU(13098491780722836296ULL), + QU( 9466676828710797353ULL), QU(12673056849042830081ULL), + QU(12763726623645357580ULL), QU(16468961652999309493ULL), + QU(15305979875636438926ULL), QU(17444713151223449734ULL), + QU( 5692214267627883674ULL), QU(13049589139196151505ULL), + QU( 880115207831670745ULL), QU( 1776529075789695498ULL), + QU(16695225897801466485ULL), QU(10666901778795346845ULL), + QU( 6164389346722833869ULL), QU( 2863817793264300475ULL), + QU( 9464049921886304754ULL), QU( 3993566636740015468ULL), + QU( 9983749692528514136ULL), QU(16375286075057755211ULL), + QU(16042643417005440820ULL), QU(11445419662923489877ULL), + QU( 7999038846885158836ULL), QU( 6721913661721511535ULL), + QU( 5363052654139357320ULL), QU( 1817788761173584205ULL), + QU(13290974386445856444ULL), QU( 4650350818937984680ULL), + QU( 8219183528102484836ULL), QU( 1569862923500819899ULL), + QU( 4189359732136641860ULL), QU(14202822961683148583ULL), + QU( 4457498315309429058ULL), QU(13089067387019074834ULL), + QU(11075517153328927293ULL), QU(10277016248336668389ULL), + QU( 7070509725324401122ULL), QU(17808892017780289380ULL), + QU(13143367339909287349ULL), QU( 1377743745360085151ULL), + QU( 5749341807421286485ULL), QU(14832814616770931325ULL), + QU( 7688820635324359492ULL), QU(10960474011539770045ULL), + QU( 81970066653179790ULL), QU(12619476072607878022ULL), + QU( 4419566616271201744ULL), QU(15147917311750568503ULL), + QU( 5549739182852706345ULL), QU( 7308198397975204770ULL), + QU(13580425496671289278ULL), QU(17070764785210130301ULL), + QU( 8202832846285604405ULL), QU( 6873046287640887249ULL), + QU( 6927424434308206114ULL), QU( 6139014645937224874ULL), + QU(10290373645978487639ULL), QU(15904261291701523804ULL), + QU( 9628743442057826883ULL), QU(18383429096255546714ULL), + QU( 4977413265753686967ULL), QU( 7714317492425012869ULL), + QU( 9025232586309926193ULL), QU(14627338359776709107ULL), + QU(14759849896467790763ULL), QU(10931129435864423252ULL), + QU( 4588456988775014359ULL), QU(10699388531797056724ULL), + QU( 468652268869238792ULL), QU( 5755943035328078086ULL), + QU( 2102437379988580216ULL), QU( 9986312786506674028ULL), + QU( 2654207180040945604ULL), QU( 8726634790559960062ULL), + QU( 100497234871808137ULL), QU( 2800137176951425819ULL), + QU( 6076627612918553487ULL), QU( 5780186919186152796ULL), + QU( 8179183595769929098ULL), QU( 6009426283716221169ULL), + QU( 2796662551397449358ULL), QU( 1756961367041986764ULL), + QU( 6972897917355606205ULL), QU(14524774345368968243ULL), + QU( 2773529684745706940ULL), QU( 4853632376213075959ULL), + QU( 4198177923731358102ULL), QU( 8271224913084139776ULL), + QU( 2741753121611092226ULL), QU(16782366145996731181ULL), + QU(15426125238972640790ULL), QU(13595497100671260342ULL), + QU( 3173531022836259898ULL), QU( 6573264560319511662ULL), + QU(18041111951511157441ULL), QU( 2351433581833135952ULL), + QU( 3113255578908173487ULL), QU( 1739371330877858784ULL), + QU(16046126562789165480ULL), QU( 8072101652214192925ULL), + QU(15267091584090664910ULL), QU( 9309579200403648940ULL), + QU( 5218892439752408722ULL), QU(14492477246004337115ULL), + QU(17431037586679770619ULL), QU( 7385248135963250480ULL), + QU( 9580144956565560660ULL), QU( 4919546228040008720ULL), + QU(15261542469145035584ULL), QU(18233297270822253102ULL), + QU( 5453248417992302857ULL), QU( 9309519155931460285ULL), + QU(10342813012345291756ULL), QU(15676085186784762381ULL), + QU(15912092950691300645ULL), QU( 9371053121499003195ULL), + QU( 9897186478226866746ULL), QU(14061858287188196327ULL), + QU( 122575971620788119ULL), QU(12146750969116317754ULL), + QU( 4438317272813245201ULL), QU( 8332576791009527119ULL), + QU(13907785691786542057ULL), QU(10374194887283287467ULL), + QU( 2098798755649059566ULL), QU( 3416235197748288894ULL), + QU( 8688269957320773484ULL), QU( 7503964602397371571ULL), + QU(16724977015147478236ULL), QU( 9461512855439858184ULL), + QU(13259049744534534727ULL), QU( 3583094952542899294ULL), + QU( 8764245731305528292ULL), QU(13240823595462088985ULL), + QU(13716141617617910448ULL), QU(18114969519935960955ULL), + QU( 2297553615798302206ULL), QU( 4585521442944663362ULL), + QU(17776858680630198686ULL), QU( 4685873229192163363ULL), + QU( 152558080671135627ULL), QU(15424900540842670088ULL), + QU(13229630297130024108ULL), QU(17530268788245718717ULL), + QU(16675633913065714144ULL), QU( 3158912717897568068ULL), + QU(15399132185380087288ULL), QU( 7401418744515677872ULL), + QU(13135412922344398535ULL), QU( 6385314346100509511ULL), + QU(13962867001134161139ULL), QU(10272780155442671999ULL), + QU(12894856086597769142ULL), QU(13340877795287554994ULL), + QU(12913630602094607396ULL), QU(12543167911119793857ULL), + QU(17343570372251873096ULL), QU(10959487764494150545ULL), + QU( 6966737953093821128ULL), QU(13780699135496988601ULL), + QU( 4405070719380142046ULL), QU(14923788365607284982ULL), + QU( 2869487678905148380ULL), QU( 6416272754197188403ULL), + QU(15017380475943612591ULL), QU( 1995636220918429487ULL), + QU( 3402016804620122716ULL), QU(15800188663407057080ULL), + QU(11362369990390932882ULL), QU(15262183501637986147ULL), + QU(10239175385387371494ULL), QU( 9352042420365748334ULL), + QU( 1682457034285119875ULL), QU( 1724710651376289644ULL), + QU( 2038157098893817966ULL), QU( 9897825558324608773ULL), + QU( 1477666236519164736ULL), QU(16835397314511233640ULL), + QU(10370866327005346508ULL), QU(10157504370660621982ULL), + QU(12113904045335882069ULL), QU(13326444439742783008ULL), + QU(11302769043000765804ULL), QU(13594979923955228484ULL), + QU(11779351762613475968ULL), QU( 3786101619539298383ULL), + QU( 8021122969180846063ULL), QU(15745904401162500495ULL), + QU(10762168465993897267ULL), QU(13552058957896319026ULL), + QU(11200228655252462013ULL), QU( 5035370357337441226ULL), + QU( 7593918984545500013ULL), QU( 5418554918361528700ULL), + QU( 4858270799405446371ULL), QU( 9974659566876282544ULL), + QU(18227595922273957859ULL), QU( 2772778443635656220ULL), + QU(14285143053182085385ULL), QU( 9939700992429600469ULL), + QU(12756185904545598068ULL), QU( 2020783375367345262ULL), + QU( 57026775058331227ULL), QU( 950827867930065454ULL), + QU( 6602279670145371217ULL), QU( 2291171535443566929ULL), + QU( 5832380724425010313ULL), QU( 1220343904715982285ULL), + QU(17045542598598037633ULL), QU(15460481779702820971ULL), + QU(13948388779949365130ULL), QU(13975040175430829518ULL), + QU(17477538238425541763ULL), QU(11104663041851745725ULL), + QU(15860992957141157587ULL), QU(14529434633012950138ULL), + QU( 2504838019075394203ULL), QU( 7512113882611121886ULL), + QU( 4859973559980886617ULL), QU( 1258601555703250219ULL), + QU(15594548157514316394ULL), QU( 4516730171963773048ULL), + QU(11380103193905031983ULL), QU( 6809282239982353344ULL), + QU(18045256930420065002ULL), QU( 2453702683108791859ULL), + QU( 977214582986981460ULL), QU( 2006410402232713466ULL), + QU( 6192236267216378358ULL), QU( 3429468402195675253ULL), + QU(18146933153017348921ULL), QU(17369978576367231139ULL), + QU( 1246940717230386603ULL), QU(11335758870083327110ULL), + QU(14166488801730353682ULL), QU( 9008573127269635732ULL), + QU(10776025389820643815ULL), QU(15087605441903942962ULL), + QU( 1359542462712147922ULL), QU(13898874411226454206ULL), + QU(17911176066536804411ULL), QU( 9435590428600085274ULL), + QU( 294488509967864007ULL), QU( 8890111397567922046ULL), + QU( 7987823476034328778ULL), QU(13263827582440967651ULL), + QU( 7503774813106751573ULL), QU(14974747296185646837ULL), + QU( 8504765037032103375ULL), QU(17340303357444536213ULL), + QU( 7704610912964485743ULL), QU( 8107533670327205061ULL), + QU( 9062969835083315985ULL), QU(16968963142126734184ULL), + QU(12958041214190810180ULL), QU( 2720170147759570200ULL), + QU( 2986358963942189566ULL), QU(14884226322219356580ULL), + QU( 286224325144368520ULL), QU(11313800433154279797ULL), + QU(18366849528439673248ULL), QU(17899725929482368789ULL), + QU( 3730004284609106799ULL), QU( 1654474302052767205ULL), + QU( 5006698007047077032ULL), QU( 8196893913601182838ULL), + QU(15214541774425211640ULL), QU(17391346045606626073ULL), + QU( 8369003584076969089ULL), QU( 3939046733368550293ULL), + QU(10178639720308707785ULL), QU( 2180248669304388697ULL), + QU( 62894391300126322ULL), QU( 9205708961736223191ULL), + QU( 6837431058165360438ULL), QU( 3150743890848308214ULL), + QU(17849330658111464583ULL), QU(12214815643135450865ULL), + QU(13410713840519603402ULL), QU( 3200778126692046802ULL), + QU(13354780043041779313ULL), QU( 800850022756886036ULL), + QU(15660052933953067433ULL), QU( 6572823544154375676ULL), + QU(11030281857015819266ULL), QU(12682241941471433835ULL), + QU(11654136407300274693ULL), QU( 4517795492388641109ULL), + QU( 9757017371504524244ULL), QU(17833043400781889277ULL), + QU(12685085201747792227ULL), QU(10408057728835019573ULL), + QU( 98370418513455221ULL), QU( 6732663555696848598ULL), + QU(13248530959948529780ULL), QU( 3530441401230622826ULL), + QU(18188251992895660615ULL), QU( 1847918354186383756ULL), + QU( 1127392190402660921ULL), QU(11293734643143819463ULL), + QU( 3015506344578682982ULL), QU(13852645444071153329ULL), + QU( 2121359659091349142ULL), QU( 1294604376116677694ULL), + QU( 5616576231286352318ULL), QU( 7112502442954235625ULL), + QU(11676228199551561689ULL), QU(12925182803007305359ULL), + QU( 7852375518160493082ULL), QU( 1136513130539296154ULL), + QU( 5636923900916593195ULL), QU( 3221077517612607747ULL), + QU(17784790465798152513ULL), QU( 3554210049056995938ULL), + QU(17476839685878225874ULL), QU( 3206836372585575732ULL), + QU( 2765333945644823430ULL), QU(10080070903718799528ULL), + QU( 5412370818878286353ULL), QU( 9689685887726257728ULL), + QU( 8236117509123533998ULL), QU( 1951139137165040214ULL), + QU( 4492205209227980349ULL), QU(16541291230861602967ULL), + QU( 1424371548301437940ULL), QU( 9117562079669206794ULL), + QU(14374681563251691625ULL), QU(13873164030199921303ULL), + QU( 6680317946770936731ULL), QU(15586334026918276214ULL), + QU(10896213950976109802ULL), QU( 9506261949596413689ULL), + QU( 9903949574308040616ULL), QU( 6038397344557204470ULL), + QU( 174601465422373648ULL), QU(15946141191338238030ULL), + QU(17142225620992044937ULL), QU( 7552030283784477064ULL), + QU( 2947372384532947997ULL), QU( 510797021688197711ULL), + QU( 4962499439249363461ULL), QU( 23770320158385357ULL), + QU( 959774499105138124ULL), QU( 1468396011518788276ULL), + QU( 2015698006852312308ULL), QU( 4149400718489980136ULL), + QU( 5992916099522371188ULL), QU(10819182935265531076ULL), + QU(16189787999192351131ULL), QU( 342833961790261950ULL), + QU(12470830319550495336ULL), QU(18128495041912812501ULL), + QU( 1193600899723524337ULL), QU( 9056793666590079770ULL), + QU( 2154021227041669041ULL), QU( 4963570213951235735ULL), + QU( 4865075960209211409ULL), QU( 2097724599039942963ULL), + QU( 2024080278583179845ULL), QU(11527054549196576736ULL), + QU(10650256084182390252ULL), QU( 4808408648695766755ULL), + QU( 1642839215013788844ULL), QU(10607187948250398390ULL), + QU( 7076868166085913508ULL), QU( 730522571106887032ULL), + QU(12500579240208524895ULL), QU( 4484390097311355324ULL), + QU(15145801330700623870ULL), QU( 8055827661392944028ULL), + QU( 5865092976832712268ULL), QU(15159212508053625143ULL), + QU( 3560964582876483341ULL), QU( 4070052741344438280ULL), + QU( 6032585709886855634ULL), QU(15643262320904604873ULL), + QU( 2565119772293371111ULL), QU( 318314293065348260ULL), + QU(15047458749141511872ULL), QU( 7772788389811528730ULL), + QU( 7081187494343801976ULL), QU( 6465136009467253947ULL), + QU(10425940692543362069ULL), QU( 554608190318339115ULL), + QU(14796699860302125214ULL), QU( 1638153134431111443ULL), + QU(10336967447052276248ULL), QU( 8412308070396592958ULL), + QU( 4004557277152051226ULL), QU( 8143598997278774834ULL), + QU(16413323996508783221ULL), QU(13139418758033994949ULL), + QU( 9772709138335006667ULL), QU( 2818167159287157659ULL), + QU(17091740573832523669ULL), QU(14629199013130751608ULL), + QU(18268322711500338185ULL), QU( 8290963415675493063ULL), + QU( 8830864907452542588ULL), QU( 1614839084637494849ULL), + QU(14855358500870422231ULL), QU( 3472996748392519937ULL), + QU(15317151166268877716ULL), QU( 5825895018698400362ULL), + QU(16730208429367544129ULL), QU(10481156578141202800ULL), + QU( 4746166512382823750ULL), QU(12720876014472464998ULL), + QU( 8825177124486735972ULL), QU(13733447296837467838ULL), + QU( 6412293741681359625ULL), QU( 8313213138756135033ULL), + QU(11421481194803712517ULL), QU( 7997007691544174032ULL), + QU( 6812963847917605930ULL), QU( 9683091901227558641ULL), + QU(14703594165860324713ULL), QU( 1775476144519618309ULL), + QU( 2724283288516469519ULL), QU( 717642555185856868ULL), + QU( 8736402192215092346ULL), QU(11878800336431381021ULL), + QU( 4348816066017061293ULL), QU( 6115112756583631307ULL), + QU( 9176597239667142976ULL), QU(12615622714894259204ULL), + QU(10283406711301385987ULL), QU( 5111762509485379420ULL), + QU( 3118290051198688449ULL), QU( 7345123071632232145ULL), + QU( 9176423451688682359ULL), QU( 4843865456157868971ULL), + QU(12008036363752566088ULL), QU(12058837181919397720ULL), + QU( 2145073958457347366ULL), QU( 1526504881672818067ULL), + QU( 3488830105567134848ULL), QU(13208362960674805143ULL), + QU( 4077549672899572192ULL), QU( 7770995684693818365ULL), + QU( 1398532341546313593ULL), QU(12711859908703927840ULL), + QU( 1417561172594446813ULL), QU(17045191024194170604ULL), + QU( 4101933177604931713ULL), QU(14708428834203480320ULL), + QU(17447509264469407724ULL), QU(14314821973983434255ULL), + QU(17990472271061617265ULL), QU( 5087756685841673942ULL), + QU(12797820586893859939ULL), QU( 1778128952671092879ULL), + QU( 3535918530508665898ULL), QU( 9035729701042481301ULL), + QU(14808661568277079962ULL), QU(14587345077537747914ULL), + QU(11920080002323122708ULL), QU( 6426515805197278753ULL), + QU( 3295612216725984831ULL), QU(11040722532100876120ULL), + QU(12305952936387598754ULL), QU(16097391899742004253ULL), + QU( 4908537335606182208ULL), QU(12446674552196795504ULL), + QU(16010497855816895177ULL), QU( 9194378874788615551ULL), + QU( 3382957529567613384ULL), QU( 5154647600754974077ULL), + QU( 9801822865328396141ULL), QU( 9023662173919288143ULL), + QU(17623115353825147868ULL), QU( 8238115767443015816ULL), + QU(15811444159859002560ULL), QU( 9085612528904059661ULL), + QU( 6888601089398614254ULL), QU( 258252992894160189ULL), + QU( 6704363880792428622ULL), QU( 6114966032147235763ULL), + QU(11075393882690261875ULL), QU( 8797664238933620407ULL), + QU( 5901892006476726920ULL), QU( 5309780159285518958ULL), + QU(14940808387240817367ULL), QU(14642032021449656698ULL), + QU( 9808256672068504139ULL), QU( 3670135111380607658ULL), + QU(11211211097845960152ULL), QU( 1474304506716695808ULL), + QU(15843166204506876239ULL), QU( 7661051252471780561ULL), + QU(10170905502249418476ULL), QU( 7801416045582028589ULL), + QU( 2763981484737053050ULL), QU( 9491377905499253054ULL), + QU(16201395896336915095ULL), QU( 9256513756442782198ULL), + QU( 5411283157972456034ULL), QU( 5059433122288321676ULL), + QU( 4327408006721123357ULL), QU( 9278544078834433377ULL), + QU( 7601527110882281612ULL), QU(11848295896975505251ULL), + QU(12096998801094735560ULL), QU(14773480339823506413ULL), + QU(15586227433895802149ULL), QU(12786541257830242872ULL), + QU( 6904692985140503067ULL), QU( 5309011515263103959ULL), + QU(12105257191179371066ULL), QU(14654380212442225037ULL), + QU( 2556774974190695009ULL), QU( 4461297399927600261ULL), + QU(14888225660915118646ULL), QU(14915459341148291824ULL), + QU( 2738802166252327631ULL), QU( 6047155789239131512ULL), + QU(12920545353217010338ULL), QU(10697617257007840205ULL), + QU( 2751585253158203504ULL), QU(13252729159780047496ULL), + QU(14700326134672815469ULL), QU(14082527904374600529ULL), + QU(16852962273496542070ULL), QU(17446675504235853907ULL), + QU(15019600398527572311ULL), QU(12312781346344081551ULL), + QU(14524667935039810450ULL), QU( 5634005663377195738ULL), + QU(11375574739525000569ULL), QU( 2423665396433260040ULL), + QU( 5222836914796015410ULL), QU( 4397666386492647387ULL), + QU( 4619294441691707638ULL), QU( 665088602354770716ULL), + QU(13246495665281593610ULL), QU( 6564144270549729409ULL), + QU(10223216188145661688ULL), QU( 3961556907299230585ULL), + QU(11543262515492439914ULL), QU(16118031437285993790ULL), + QU( 7143417964520166465ULL), QU(13295053515909486772ULL), + QU( 40434666004899675ULL), QU(17127804194038347164ULL), + QU( 8599165966560586269ULL), QU( 8214016749011284903ULL), + QU(13725130352140465239ULL), QU( 5467254474431726291ULL), + QU( 7748584297438219877ULL), QU(16933551114829772472ULL), + QU( 2169618439506799400ULL), QU( 2169787627665113463ULL), + QU(17314493571267943764ULL), QU(18053575102911354912ULL), + QU(11928303275378476973ULL), QU(11593850925061715550ULL), + QU(17782269923473589362ULL), QU( 3280235307704747039ULL), + QU( 6145343578598685149ULL), QU(17080117031114086090ULL), + QU(18066839902983594755ULL), QU( 6517508430331020706ULL), + QU( 8092908893950411541ULL), QU(12558378233386153732ULL), + QU( 4476532167973132976ULL), QU(16081642430367025016ULL), + QU( 4233154094369139361ULL), QU( 8693630486693161027ULL), + QU(11244959343027742285ULL), QU(12273503967768513508ULL), + QU(14108978636385284876ULL), QU( 7242414665378826984ULL), + QU( 6561316938846562432ULL), QU( 8601038474994665795ULL), + QU(17532942353612365904ULL), QU(17940076637020912186ULL), + QU( 7340260368823171304ULL), QU( 7061807613916067905ULL), + QU(10561734935039519326ULL), QU(17990796503724650862ULL), + QU( 6208732943911827159ULL), QU( 359077562804090617ULL), + QU(14177751537784403113ULL), QU(10659599444915362902ULL), + QU(15081727220615085833ULL), QU(13417573895659757486ULL), + QU(15513842342017811524ULL), QU(11814141516204288231ULL), + QU( 1827312513875101814ULL), QU( 2804611699894603103ULL), + QU(17116500469975602763ULL), QU(12270191815211952087ULL), + QU(12256358467786024988ULL), QU(18435021722453971267ULL), + QU( 671330264390865618ULL), QU( 476504300460286050ULL), + QU(16465470901027093441ULL), QU( 4047724406247136402ULL), + QU( 1322305451411883346ULL), QU( 1388308688834322280ULL), + QU( 7303989085269758176ULL), QU( 9323792664765233642ULL), + QU( 4542762575316368936ULL), QU(17342696132794337618ULL), + QU( 4588025054768498379ULL), QU(13415475057390330804ULL), + QU(17880279491733405570ULL), QU(10610553400618620353ULL), + QU( 3180842072658960139ULL), QU(13002966655454270120ULL), + QU( 1665301181064982826ULL), QU( 7083673946791258979ULL), + QU( 190522247122496820ULL), QU(17388280237250677740ULL), + QU( 8430770379923642945ULL), QU(12987180971921668584ULL), + QU( 2311086108365390642ULL), QU( 2870984383579822345ULL), + QU(14014682609164653318ULL), QU(14467187293062251484ULL), + QU( 192186361147413298ULL), QU(15171951713531796524ULL), + QU( 9900305495015948728ULL), QU(17958004775615466344ULL), + QU(14346380954498606514ULL), QU(18040047357617407096ULL), + QU( 5035237584833424532ULL), QU(15089555460613972287ULL), + QU( 4131411873749729831ULL), QU( 1329013581168250330ULL), + QU(10095353333051193949ULL), QU(10749518561022462716ULL), + QU( 9050611429810755847ULL), QU(15022028840236655649ULL), + QU( 8775554279239748298ULL), QU(13105754025489230502ULL), + QU(15471300118574167585ULL), QU( 89864764002355628ULL), + QU( 8776416323420466637ULL), QU( 5280258630612040891ULL), + QU( 2719174488591862912ULL), QU( 7599309137399661994ULL), + QU(15012887256778039979ULL), QU(14062981725630928925ULL), + QU(12038536286991689603ULL), QU( 7089756544681775245ULL), + QU(10376661532744718039ULL), QU( 1265198725901533130ULL), + QU(13807996727081142408ULL), QU( 2935019626765036403ULL), + QU( 7651672460680700141ULL), QU( 3644093016200370795ULL), + QU( 2840982578090080674ULL), QU(17956262740157449201ULL), + QU(18267979450492880548ULL), QU(11799503659796848070ULL), + QU( 9942537025669672388ULL), QU(11886606816406990297ULL), + QU( 5488594946437447576ULL), QU( 7226714353282744302ULL), + QU( 3784851653123877043ULL), QU( 878018453244803041ULL), + QU(12110022586268616085ULL), QU( 734072179404675123ULL), + QU(11869573627998248542ULL), QU( 469150421297783998ULL), + QU( 260151124912803804ULL), QU(11639179410120968649ULL), + QU( 9318165193840846253ULL), QU(12795671722734758075ULL), + QU(15318410297267253933ULL), QU( 691524703570062620ULL), + QU( 5837129010576994601ULL), QU(15045963859726941052ULL), + QU( 5850056944932238169ULL), QU(12017434144750943807ULL), + QU( 7447139064928956574ULL), QU( 3101711812658245019ULL), + QU(16052940704474982954ULL), QU(18195745945986994042ULL), + QU( 8932252132785575659ULL), QU(13390817488106794834ULL), + QU(11582771836502517453ULL), QU( 4964411326683611686ULL), + QU( 2195093981702694011ULL), QU(14145229538389675669ULL), + QU(16459605532062271798ULL), QU( 866316924816482864ULL), + QU( 4593041209937286377ULL), QU( 8415491391910972138ULL), + QU( 4171236715600528969ULL), QU(16637569303336782889ULL), + QU( 2002011073439212680ULL), QU(17695124661097601411ULL), + QU( 4627687053598611702ULL), QU( 7895831936020190403ULL), + QU( 8455951300917267802ULL), QU( 2923861649108534854ULL), + QU( 8344557563927786255ULL), QU( 6408671940373352556ULL), + QU(12210227354536675772ULL), QU(14294804157294222295ULL), + QU(10103022425071085127ULL), QU(10092959489504123771ULL), + QU( 6554774405376736268ULL), QU(12629917718410641774ULL), + QU( 6260933257596067126ULL), QU( 2460827021439369673ULL), + QU( 2541962996717103668ULL), QU( 597377203127351475ULL), + QU( 5316984203117315309ULL), QU( 4811211393563241961ULL), + QU(13119698597255811641ULL), QU( 8048691512862388981ULL), + QU(10216818971194073842ULL), QU( 4612229970165291764ULL), + QU(10000980798419974770ULL), QU( 6877640812402540687ULL), + QU( 1488727563290436992ULL), QU( 2227774069895697318ULL), + QU(11237754507523316593ULL), QU(13478948605382290972ULL), + QU( 1963583846976858124ULL), QU( 5512309205269276457ULL), + QU( 3972770164717652347ULL), QU( 3841751276198975037ULL), + QU(10283343042181903117ULL), QU( 8564001259792872199ULL), + QU(16472187244722489221ULL), QU( 8953493499268945921ULL), + QU( 3518747340357279580ULL), QU( 4003157546223963073ULL), + QU( 3270305958289814590ULL), QU( 3966704458129482496ULL), + QU( 8122141865926661939ULL), QU(14627734748099506653ULL), + QU(13064426990862560568ULL), QU( 2414079187889870829ULL), + QU( 5378461209354225306ULL), QU(10841985740128255566ULL), + QU( 538582442885401738ULL), QU( 7535089183482905946ULL), + QU(16117559957598879095ULL), QU( 8477890721414539741ULL), + QU( 1459127491209533386ULL), QU(17035126360733620462ULL), + QU( 8517668552872379126ULL), QU(10292151468337355014ULL), + QU(17081267732745344157ULL), QU(13751455337946087178ULL), + QU(14026945459523832966ULL), QU( 6653278775061723516ULL), + QU(10619085543856390441ULL), QU( 2196343631481122885ULL), + QU(10045966074702826136ULL), QU(10082317330452718282ULL), + QU( 5920859259504831242ULL), QU( 9951879073426540617ULL), + QU( 7074696649151414158ULL), QU(15808193543879464318ULL), + QU( 7385247772746953374ULL), QU( 3192003544283864292ULL), + QU(18153684490917593847ULL), QU(12423498260668568905ULL), + QU(10957758099756378169ULL), QU(11488762179911016040ULL), + QU( 2099931186465333782ULL), QU(11180979581250294432ULL), + QU( 8098916250668367933ULL), QU( 3529200436790763465ULL), + QU(12988418908674681745ULL), QU( 6147567275954808580ULL), + QU( 3207503344604030989ULL), QU(10761592604898615360ULL), + QU( 229854861031893504ULL), QU( 8809853962667144291ULL), + QU(13957364469005693860ULL), QU( 7634287665224495886ULL), + QU(12353487366976556874ULL), QU( 1134423796317152034ULL), + QU( 2088992471334107068ULL), QU( 7393372127190799698ULL), + QU( 1845367839871058391ULL), QU( 207922563987322884ULL), + QU(11960870813159944976ULL), QU(12182120053317317363ULL), + QU(17307358132571709283ULL), QU(13871081155552824936ULL), + QU(18304446751741566262ULL), QU( 7178705220184302849ULL), + QU(10929605677758824425ULL), QU(16446976977835806844ULL), + QU(13723874412159769044ULL), QU( 6942854352100915216ULL), + QU( 1726308474365729390ULL), QU( 2150078766445323155ULL), + QU(15345558947919656626ULL), QU(12145453828874527201ULL), + QU( 2054448620739726849ULL), QU( 2740102003352628137ULL), + QU(11294462163577610655ULL), QU( 756164283387413743ULL), + QU(17841144758438810880ULL), QU(10802406021185415861ULL), + QU( 8716455530476737846ULL), QU( 6321788834517649606ULL), + QU(14681322910577468426ULL), QU(17330043563884336387ULL), + QU(12701802180050071614ULL), QU(14695105111079727151ULL), + QU( 5112098511654172830ULL), QU( 4957505496794139973ULL), + QU( 8270979451952045982ULL), QU(12307685939199120969ULL), + QU(12425799408953443032ULL), QU( 8376410143634796588ULL), + QU(16621778679680060464ULL), QU( 3580497854566660073ULL), + QU( 1122515747803382416ULL), QU( 857664980960597599ULL), + QU( 6343640119895925918ULL), QU(12878473260854462891ULL), + QU(10036813920765722626ULL), QU(14451335468363173812ULL), + QU( 5476809692401102807ULL), QU(16442255173514366342ULL), + QU(13060203194757167104ULL), QU(14354124071243177715ULL), + QU(15961249405696125227ULL), QU(13703893649690872584ULL), + QU( 363907326340340064ULL), QU( 6247455540491754842ULL), + QU(12242249332757832361ULL), QU( 156065475679796717ULL), + QU( 9351116235749732355ULL), QU( 4590350628677701405ULL), + QU( 1671195940982350389ULL), QU(13501398458898451905ULL), + QU( 6526341991225002255ULL), QU( 1689782913778157592ULL), + QU( 7439222350869010334ULL), QU(13975150263226478308ULL), + QU(11411961169932682710ULL), QU(17204271834833847277ULL), + QU( 541534742544435367ULL), QU( 6591191931218949684ULL), + QU( 2645454775478232486ULL), QU( 4322857481256485321ULL), + QU( 8477416487553065110ULL), QU(12902505428548435048ULL), + QU( 971445777981341415ULL), QU(14995104682744976712ULL), + QU( 4243341648807158063ULL), QU( 8695061252721927661ULL), + QU( 5028202003270177222ULL), QU( 2289257340915567840ULL), + QU(13870416345121866007ULL), QU(13994481698072092233ULL), + QU( 6912785400753196481ULL), QU( 2278309315841980139ULL), + QU( 4329765449648304839ULL), QU( 5963108095785485298ULL), + QU( 4880024847478722478ULL), QU(16015608779890240947ULL), + QU( 1866679034261393544ULL), QU( 914821179919731519ULL), + QU( 9643404035648760131ULL), QU( 2418114953615593915ULL), + QU( 944756836073702374ULL), QU(15186388048737296834ULL), + QU( 7723355336128442206ULL), QU( 7500747479679599691ULL), + QU(18013961306453293634ULL), QU( 2315274808095756456ULL), + QU(13655308255424029566ULL), QU(17203800273561677098ULL), + QU( 1382158694422087756ULL), QU( 5090390250309588976ULL), + QU( 517170818384213989ULL), QU( 1612709252627729621ULL), + QU( 1330118955572449606ULL), QU( 300922478056709885ULL), + QU(18115693291289091987ULL), QU(13491407109725238321ULL), + QU(15293714633593827320ULL), QU( 5151539373053314504ULL), + QU( 5951523243743139207ULL), QU(14459112015249527975ULL), + QU( 5456113959000700739ULL), QU( 3877918438464873016ULL), + QU(12534071654260163555ULL), QU(15871678376893555041ULL), + QU(11005484805712025549ULL), QU(16353066973143374252ULL), + QU( 4358331472063256685ULL), QU( 8268349332210859288ULL), + QU(12485161590939658075ULL), QU(13955993592854471343ULL), + QU( 5911446886848367039ULL), QU(14925834086813706974ULL), + QU( 6590362597857994805ULL), QU( 1280544923533661875ULL), + QU( 1637756018947988164ULL), QU( 4734090064512686329ULL), + QU(16693705263131485912ULL), QU( 6834882340494360958ULL), + QU( 8120732176159658505ULL), QU( 2244371958905329346ULL), + QU(10447499707729734021ULL), QU( 7318742361446942194ULL), + QU( 8032857516355555296ULL), QU(14023605983059313116ULL), + QU( 1032336061815461376ULL), QU( 9840995337876562612ULL), + QU( 9869256223029203587ULL), QU(12227975697177267636ULL), + QU(12728115115844186033ULL), QU( 7752058479783205470ULL), + QU( 729733219713393087ULL), QU(12954017801239007622ULL) }; static const uint64_t init_by_array_64_expected[] = { - QU( 2100341266307895239LLU), QU( 8344256300489757943LLU), - QU(15687933285484243894LLU), QU( 8268620370277076319LLU), - QU(12371852309826545459LLU), QU( 8800491541730110238LLU), - QU(18113268950100835773LLU), QU( 2886823658884438119LLU), - QU( 3293667307248180724LLU), QU( 9307928143300172731LLU), - QU( 7688082017574293629LLU), QU( 900986224735166665LLU), - QU( 9977972710722265039LLU), QU( 6008205004994830552LLU), - QU( 546909104521689292LLU), QU( 7428471521869107594LLU), - QU(14777563419314721179LLU), QU(16116143076567350053LLU), - QU( 5322685342003142329LLU), QU( 4200427048445863473LLU), - QU( 4693092150132559146LLU), QU(13671425863759338582LLU), - QU( 6747117460737639916LLU), QU( 4732666080236551150LLU), - QU( 5912839950611941263LLU), QU( 3903717554504704909LLU), - QU( 2615667650256786818LLU), QU(10844129913887006352LLU), - QU(13786467861810997820LLU), QU(14267853002994021570LLU), - QU(13767807302847237439LLU), QU(16407963253707224617LLU), - QU( 4802498363698583497LLU), QU( 2523802839317209764LLU), - QU( 3822579397797475589LLU), QU( 8950320572212130610LLU), - QU( 3745623504978342534LLU), QU(16092609066068482806LLU), - QU( 9817016950274642398LLU), QU(10591660660323829098LLU), - QU(11751606650792815920LLU), QU( 5122873818577122211LLU), - QU(17209553764913936624LLU), QU( 6249057709284380343LLU), - QU(15088791264695071830LLU), QU(15344673071709851930LLU), - QU( 4345751415293646084LLU), QU( 2542865750703067928LLU), - QU(13520525127852368784LLU), QU(18294188662880997241LLU), - QU( 3871781938044881523LLU), QU( 2873487268122812184LLU), - QU(15099676759482679005LLU), QU(15442599127239350490LLU), - QU( 6311893274367710888LLU), QU( 3286118760484672933LLU), - QU( 4146067961333542189LLU), QU(13303942567897208770LLU), - QU( 8196013722255630418LLU), QU( 4437815439340979989LLU), - QU(15433791533450605135LLU), QU( 4254828956815687049LLU), - QU( 1310903207708286015LLU), QU(10529182764462398549LLU), - QU(14900231311660638810LLU), QU( 9727017277104609793LLU), - QU( 1821308310948199033LLU), QU(11628861435066772084LLU), - QU( 9469019138491546924LLU), QU( 3145812670532604988LLU), - QU( 9938468915045491919LLU), QU( 1562447430672662142LLU), - QU(13963995266697989134LLU), QU( 3356884357625028695LLU), - QU( 4499850304584309747LLU), QU( 8456825817023658122LLU), - QU(10859039922814285279LLU), QU( 8099512337972526555LLU), - QU( 348006375109672149LLU), QU(11919893998241688603LLU), - QU( 1104199577402948826LLU), QU(16689191854356060289LLU), - QU(10992552041730168078LLU), QU( 7243733172705465836LLU), - QU( 5668075606180319560LLU), QU(18182847037333286970LLU), - QU( 4290215357664631322LLU), QU( 4061414220791828613LLU), - QU(13006291061652989604LLU), QU( 7140491178917128798LLU), - QU(12703446217663283481LLU), QU( 5500220597564558267LLU), - QU(10330551509971296358LLU), QU(15958554768648714492LLU), - QU( 5174555954515360045LLU), QU( 1731318837687577735LLU), - QU( 3557700801048354857LLU), QU(13764012341928616198LLU), - QU(13115166194379119043LLU), QU( 7989321021560255519LLU), - QU( 2103584280905877040LLU), QU( 9230788662155228488LLU), - QU(16396629323325547654LLU), QU( 657926409811318051LLU), - QU(15046700264391400727LLU), QU( 5120132858771880830LLU), - QU( 7934160097989028561LLU), QU( 6963121488531976245LLU), - QU(17412329602621742089LLU), QU(15144843053931774092LLU), - QU(17204176651763054532LLU), QU(13166595387554065870LLU), - QU( 8590377810513960213LLU), QU( 5834365135373991938LLU), - QU( 7640913007182226243LLU), QU( 3479394703859418425LLU), - QU(16402784452644521040LLU), QU( 4993979809687083980LLU), - QU(13254522168097688865LLU), QU(15643659095244365219LLU), - QU( 5881437660538424982LLU), QU(11174892200618987379LLU), - QU( 254409966159711077LLU), QU(17158413043140549909LLU), - QU( 3638048789290376272LLU), QU( 1376816930299489190LLU), - QU( 4622462095217761923LLU), QU(15086407973010263515LLU), - QU(13253971772784692238LLU), QU( 5270549043541649236LLU), - QU(11182714186805411604LLU), QU(12283846437495577140LLU), - QU( 5297647149908953219LLU), QU(10047451738316836654LLU), - QU( 4938228100367874746LLU), QU(12328523025304077923LLU), - QU( 3601049438595312361LLU), QU( 9313624118352733770LLU), - QU(13322966086117661798LLU), QU(16660005705644029394LLU), - QU(11337677526988872373LLU), QU(13869299102574417795LLU), - QU(15642043183045645437LLU), QU( 3021755569085880019LLU), - QU( 4979741767761188161LLU), QU(13679979092079279587LLU), - QU( 3344685842861071743LLU), QU(13947960059899588104LLU), - QU( 305806934293368007LLU), QU( 5749173929201650029LLU), - QU(11123724852118844098LLU), QU(15128987688788879802LLU), - QU(15251651211024665009LLU), QU( 7689925933816577776LLU), - QU(16732804392695859449LLU), QU(17087345401014078468LLU), - QU(14315108589159048871LLU), QU( 4820700266619778917LLU), - QU(16709637539357958441LLU), QU( 4936227875177351374LLU), - QU( 2137907697912987247LLU), QU(11628565601408395420LLU), - QU( 2333250549241556786LLU), QU( 5711200379577778637LLU), - QU( 5170680131529031729LLU), QU(12620392043061335164LLU), - QU( 95363390101096078LLU), QU( 5487981914081709462LLU), - QU( 1763109823981838620LLU), QU( 3395861271473224396LLU), - QU( 1300496844282213595LLU), QU( 6894316212820232902LLU), - QU(10673859651135576674LLU), QU( 5911839658857903252LLU), - QU(17407110743387299102LLU), QU( 8257427154623140385LLU), - QU(11389003026741800267LLU), QU( 4070043211095013717LLU), - QU(11663806997145259025LLU), QU(15265598950648798210LLU), - QU( 630585789434030934LLU), QU( 3524446529213587334LLU), - QU( 7186424168495184211LLU), QU(10806585451386379021LLU), - QU(11120017753500499273LLU), QU( 1586837651387701301LLU), - QU(17530454400954415544LLU), QU( 9991670045077880430LLU), - QU( 7550997268990730180LLU), QU( 8640249196597379304LLU), - QU( 3522203892786893823LLU), QU(10401116549878854788LLU), - QU(13690285544733124852LLU), QU( 8295785675455774586LLU), - QU(15535716172155117603LLU), QU( 3112108583723722511LLU), - QU(17633179955339271113LLU), QU(18154208056063759375LLU), - QU( 1866409236285815666LLU), QU(13326075895396412882LLU), - QU( 8756261842948020025LLU), QU( 6281852999868439131LLU), - QU(15087653361275292858LLU), QU(10333923911152949397LLU), - QU( 5265567645757408500LLU), QU(12728041843210352184LLU), - QU( 6347959327507828759LLU), QU( 154112802625564758LLU), - QU(18235228308679780218LLU), QU( 3253805274673352418LLU), - QU( 4849171610689031197LLU), QU(17948529398340432518LLU), - QU(13803510475637409167LLU), QU(13506570190409883095LLU), - QU(15870801273282960805LLU), QU( 8451286481299170773LLU), - QU( 9562190620034457541LLU), QU( 8518905387449138364LLU), - QU(12681306401363385655LLU), QU( 3788073690559762558LLU), - QU( 5256820289573487769LLU), QU( 2752021372314875467LLU), - QU( 6354035166862520716LLU), QU( 4328956378309739069LLU), - QU( 449087441228269600LLU), QU( 5533508742653090868LLU), - QU( 1260389420404746988LLU), QU(18175394473289055097LLU), - QU( 1535467109660399420LLU), QU( 8818894282874061442LLU), - QU(12140873243824811213LLU), QU(15031386653823014946LLU), - QU( 1286028221456149232LLU), QU( 6329608889367858784LLU), - QU( 9419654354945132725LLU), QU( 6094576547061672379LLU), - QU(17706217251847450255LLU), QU( 1733495073065878126LLU), - QU(16918923754607552663LLU), QU( 8881949849954945044LLU), - QU(12938977706896313891LLU), QU(14043628638299793407LLU), - QU(18393874581723718233LLU), QU( 6886318534846892044LLU), - QU(14577870878038334081LLU), QU(13541558383439414119LLU), - QU(13570472158807588273LLU), QU(18300760537910283361LLU), - QU( 818368572800609205LLU), QU( 1417000585112573219LLU), - QU(12337533143867683655LLU), QU(12433180994702314480LLU), - QU( 778190005829189083LLU), QU(13667356216206524711LLU), - QU( 9866149895295225230LLU), QU(11043240490417111999LLU), - QU( 1123933826541378598LLU), QU( 6469631933605123610LLU), - QU(14508554074431980040LLU), QU(13918931242962026714LLU), - QU( 2870785929342348285LLU), QU(14786362626740736974LLU), - QU(13176680060902695786LLU), QU( 9591778613541679456LLU), - QU( 9097662885117436706LLU), QU( 749262234240924947LLU), - QU( 1944844067793307093LLU), QU( 4339214904577487742LLU), - QU( 8009584152961946551LLU), QU(16073159501225501777LLU), - QU( 3335870590499306217LLU), QU(17088312653151202847LLU), - QU( 3108893142681931848LLU), QU(16636841767202792021LLU), - QU(10423316431118400637LLU), QU( 8008357368674443506LLU), - QU(11340015231914677875LLU), QU(17687896501594936090LLU), - QU(15173627921763199958LLU), QU( 542569482243721959LLU), - QU(15071714982769812975LLU), QU( 4466624872151386956LLU), - QU( 1901780715602332461LLU), QU( 9822227742154351098LLU), - QU( 1479332892928648780LLU), QU( 6981611948382474400LLU), - QU( 7620824924456077376LLU), QU(14095973329429406782LLU), - QU( 7902744005696185404LLU), QU(15830577219375036920LLU), - QU(10287076667317764416LLU), QU(12334872764071724025LLU), - QU( 4419302088133544331LLU), QU(14455842851266090520LLU), - QU(12488077416504654222LLU), QU( 7953892017701886766LLU), - QU( 6331484925529519007LLU), QU( 4902145853785030022LLU), - QU(17010159216096443073LLU), QU(11945354668653886087LLU), - QU(15112022728645230829LLU), QU(17363484484522986742LLU), - QU( 4423497825896692887LLU), QU( 8155489510809067471LLU), - QU( 258966605622576285LLU), QU( 5462958075742020534LLU), - QU( 6763710214913276228LLU), QU( 2368935183451109054LLU), - QU(14209506165246453811LLU), QU( 2646257040978514881LLU), - QU( 3776001911922207672LLU), QU( 1419304601390147631LLU), - QU(14987366598022458284LLU), QU( 3977770701065815721LLU), - QU( 730820417451838898LLU), QU( 3982991703612885327LLU), - QU( 2803544519671388477LLU), QU(17067667221114424649LLU), - QU( 2922555119737867166LLU), QU( 1989477584121460932LLU), - QU(15020387605892337354LLU), QU( 9293277796427533547LLU), - QU(10722181424063557247LLU), QU(16704542332047511651LLU), - QU( 5008286236142089514LLU), QU(16174732308747382540LLU), - QU(17597019485798338402LLU), QU(13081745199110622093LLU), - QU( 8850305883842258115LLU), QU(12723629125624589005LLU), - QU( 8140566453402805978LLU), QU(15356684607680935061LLU), - QU(14222190387342648650LLU), QU(11134610460665975178LLU), - QU( 1259799058620984266LLU), QU(13281656268025610041LLU), - QU( 298262561068153992LLU), QU(12277871700239212922LLU), - QU(13911297774719779438LLU), QU(16556727962761474934LLU), - QU(17903010316654728010LLU), QU( 9682617699648434744LLU), - QU(14757681836838592850LLU), QU( 1327242446558524473LLU), - QU(11126645098780572792LLU), QU( 1883602329313221774LLU), - QU( 2543897783922776873LLU), QU(15029168513767772842LLU), - QU(12710270651039129878LLU), QU(16118202956069604504LLU), - QU(15010759372168680524LLU), QU( 2296827082251923948LLU), - QU(10793729742623518101LLU), QU(13829764151845413046LLU), - QU(17769301223184451213LLU), QU( 3118268169210783372LLU), - QU(17626204544105123127LLU), QU( 7416718488974352644LLU), - QU(10450751996212925994LLU), QU( 9352529519128770586LLU), - QU( 259347569641110140LLU), QU( 8048588892269692697LLU), - QU( 1774414152306494058LLU), QU(10669548347214355622LLU), - QU(13061992253816795081LLU), QU(18432677803063861659LLU), - QU( 8879191055593984333LLU), QU(12433753195199268041LLU), - QU(14919392415439730602LLU), QU( 6612848378595332963LLU), - QU( 6320986812036143628LLU), QU(10465592420226092859LLU), - QU( 4196009278962570808LLU), QU( 3747816564473572224LLU), - QU(17941203486133732898LLU), QU( 2350310037040505198LLU), - QU( 5811779859134370113LLU), QU(10492109599506195126LLU), - QU( 7699650690179541274LLU), QU( 1954338494306022961LLU), - QU(14095816969027231152LLU), QU( 5841346919964852061LLU), - QU(14945969510148214735LLU), QU( 3680200305887550992LLU), - QU( 6218047466131695792LLU), QU( 8242165745175775096LLU), - QU(11021371934053307357LLU), QU( 1265099502753169797LLU), - QU( 4644347436111321718LLU), QU( 3609296916782832859LLU), - QU( 8109807992218521571LLU), QU(18387884215648662020LLU), - QU(14656324896296392902LLU), QU(17386819091238216751LLU), - QU(17788300878582317152LLU), QU( 7919446259742399591LLU), - QU( 4466613134576358004LLU), QU(12928181023667938509LLU), - QU(13147446154454932030LLU), QU(16552129038252734620LLU), - QU( 8395299403738822450LLU), QU(11313817655275361164LLU), - QU( 434258809499511718LLU), QU( 2074882104954788676LLU), - QU( 7929892178759395518LLU), QU( 9006461629105745388LLU), - QU( 5176475650000323086LLU), QU(11128357033468341069LLU), - QU(12026158851559118955LLU), QU(14699716249471156500LLU), - QU( 448982497120206757LLU), QU( 4156475356685519900LLU), - QU( 6063816103417215727LLU), QU(10073289387954971479LLU), - QU( 8174466846138590962LLU), QU( 2675777452363449006LLU), - QU( 9090685420572474281LLU), QU( 6659652652765562060LLU), - QU(12923120304018106621LLU), QU(11117480560334526775LLU), - QU( 937910473424587511LLU), QU( 1838692113502346645LLU), - QU(11133914074648726180LLU), QU( 7922600945143884053LLU), - QU(13435287702700959550LLU), QU( 5287964921251123332LLU), - QU(11354875374575318947LLU), QU(17955724760748238133LLU), - QU(13728617396297106512LLU), QU( 4107449660118101255LLU), - QU( 1210269794886589623LLU), QU(11408687205733456282LLU), - QU( 4538354710392677887LLU), QU(13566803319341319267LLU), - QU(17870798107734050771LLU), QU( 3354318982568089135LLU), - QU( 9034450839405133651LLU), QU(13087431795753424314LLU), - QU( 950333102820688239LLU), QU( 1968360654535604116LLU), - QU(16840551645563314995LLU), QU( 8867501803892924995LLU), - QU(11395388644490626845LLU), QU( 1529815836300732204LLU), - QU(13330848522996608842LLU), QU( 1813432878817504265LLU), - QU( 2336867432693429560LLU), QU(15192805445973385902LLU), - QU( 2528593071076407877LLU), QU( 128459777936689248LLU), - QU( 9976345382867214866LLU), QU( 6208885766767996043LLU), - QU(14982349522273141706LLU), QU( 3099654362410737822LLU), - QU(13776700761947297661LLU), QU( 8806185470684925550LLU), - QU( 8151717890410585321LLU), QU( 640860591588072925LLU), - QU(14592096303937307465LLU), QU( 9056472419613564846LLU), - QU(14861544647742266352LLU), QU(12703771500398470216LLU), - QU( 3142372800384138465LLU), QU( 6201105606917248196LLU), - QU(18337516409359270184LLU), QU(15042268695665115339LLU), - QU(15188246541383283846LLU), QU(12800028693090114519LLU), - QU( 5992859621101493472LLU), QU(18278043971816803521LLU), - QU( 9002773075219424560LLU), QU( 7325707116943598353LLU), - QU( 7930571931248040822LLU), QU( 5645275869617023448LLU), - QU( 7266107455295958487LLU), QU( 4363664528273524411LLU), - QU(14313875763787479809LLU), QU(17059695613553486802LLU), - QU( 9247761425889940932LLU), QU(13704726459237593128LLU), - QU( 2701312427328909832LLU), QU(17235532008287243115LLU), - QU(14093147761491729538LLU), QU( 6247352273768386516LLU), - QU( 8268710048153268415LLU), QU( 7985295214477182083LLU), - QU(15624495190888896807LLU), QU( 3772753430045262788LLU), - QU( 9133991620474991698LLU), QU( 5665791943316256028LLU), - QU( 7551996832462193473LLU), QU(13163729206798953877LLU), - QU( 9263532074153846374LLU), QU( 1015460703698618353LLU), - QU(17929874696989519390LLU), QU(18257884721466153847LLU), - QU(16271867543011222991LLU), QU( 3905971519021791941LLU), - QU(16814488397137052085LLU), QU( 1321197685504621613LLU), - QU( 2870359191894002181LLU), QU(14317282970323395450LLU), - QU(13663920845511074366LLU), QU( 2052463995796539594LLU), - QU(14126345686431444337LLU), QU( 1727572121947022534LLU), - QU(17793552254485594241LLU), QU( 6738857418849205750LLU), - QU( 1282987123157442952LLU), QU(16655480021581159251LLU), - QU( 6784587032080183866LLU), QU(14726758805359965162LLU), - QU( 7577995933961987349LLU), QU(12539609320311114036LLU), - QU(10789773033385439494LLU), QU( 8517001497411158227LLU), - QU(10075543932136339710LLU), QU(14838152340938811081LLU), - QU( 9560840631794044194LLU), QU(17445736541454117475LLU), - QU(10633026464336393186LLU), QU(15705729708242246293LLU), - QU( 1117517596891411098LLU), QU( 4305657943415886942LLU), - QU( 4948856840533979263LLU), QU(16071681989041789593LLU), - QU(13723031429272486527LLU), QU( 7639567622306509462LLU), - QU(12670424537483090390LLU), QU( 9715223453097197134LLU), - QU( 5457173389992686394LLU), QU( 289857129276135145LLU), - QU(17048610270521972512LLU), QU( 692768013309835485LLU), - QU(14823232360546632057LLU), QU(18218002361317895936LLU), - QU( 3281724260212650204LLU), QU(16453957266549513795LLU), - QU( 8592711109774511881LLU), QU( 929825123473369579LLU), - QU(15966784769764367791LLU), QU( 9627344291450607588LLU), - QU(10849555504977813287LLU), QU( 9234566913936339275LLU), - QU( 6413807690366911210LLU), QU(10862389016184219267LLU), - QU(13842504799335374048LLU), QU( 1531994113376881174LLU), - QU( 2081314867544364459LLU), QU(16430628791616959932LLU), - QU( 8314714038654394368LLU), QU( 9155473892098431813LLU), - QU(12577843786670475704LLU), QU( 4399161106452401017LLU), - QU( 1668083091682623186LLU), QU( 1741383777203714216LLU), - QU( 2162597285417794374LLU), QU(15841980159165218736LLU), - QU( 1971354603551467079LLU), QU( 1206714764913205968LLU), - QU( 4790860439591272330LLU), QU(14699375615594055799LLU), - QU( 8374423871657449988LLU), QU(10950685736472937738LLU), - QU( 697344331343267176LLU), QU(10084998763118059810LLU), - QU(12897369539795983124LLU), QU(12351260292144383605LLU), - QU( 1268810970176811234LLU), QU( 7406287800414582768LLU), - QU( 516169557043807831LLU), QU( 5077568278710520380LLU), - QU( 3828791738309039304LLU), QU( 7721974069946943610LLU), - QU( 3534670260981096460LLU), QU( 4865792189600584891LLU), - QU(16892578493734337298LLU), QU( 9161499464278042590LLU), - QU(11976149624067055931LLU), QU(13219479887277343990LLU), - QU(14161556738111500680LLU), QU(14670715255011223056LLU), - QU( 4671205678403576558LLU), QU(12633022931454259781LLU), - QU(14821376219869187646LLU), QU( 751181776484317028LLU), - QU( 2192211308839047070LLU), QU(11787306362361245189LLU), - QU(10672375120744095707LLU), QU( 4601972328345244467LLU), - QU(15457217788831125879LLU), QU( 8464345256775460809LLU), - QU(10191938789487159478LLU), QU( 6184348739615197613LLU), - QU(11425436778806882100LLU), QU( 2739227089124319793LLU), - QU( 461464518456000551LLU), QU( 4689850170029177442LLU), - QU( 6120307814374078625LLU), QU(11153579230681708671LLU), - QU( 7891721473905347926LLU), QU(10281646937824872400LLU), - QU( 3026099648191332248LLU), QU( 8666750296953273818LLU), - QU(14978499698844363232LLU), QU(13303395102890132065LLU), - QU( 8182358205292864080LLU), QU(10560547713972971291LLU), - QU(11981635489418959093LLU), QU( 3134621354935288409LLU), - QU(11580681977404383968LLU), QU(14205530317404088650LLU), - QU( 5997789011854923157LLU), QU(13659151593432238041LLU), - QU(11664332114338865086LLU), QU( 7490351383220929386LLU), - QU( 7189290499881530378LLU), QU(15039262734271020220LLU), - QU( 2057217285976980055LLU), QU( 555570804905355739LLU), - QU(11235311968348555110LLU), QU(13824557146269603217LLU), - QU(16906788840653099693LLU), QU( 7222878245455661677LLU), - QU( 5245139444332423756LLU), QU( 4723748462805674292LLU), - QU(12216509815698568612LLU), QU(17402362976648951187LLU), - QU(17389614836810366768LLU), QU( 4880936484146667711LLU), - QU( 9085007839292639880LLU), QU(13837353458498535449LLU), - QU(11914419854360366677LLU), QU(16595890135313864103LLU), - QU( 6313969847197627222LLU), QU(18296909792163910431LLU), - QU(10041780113382084042LLU), QU( 2499478551172884794LLU), - QU(11057894246241189489LLU), QU( 9742243032389068555LLU), - QU(12838934582673196228LLU), QU(13437023235248490367LLU), - QU(13372420669446163240LLU), QU( 6752564244716909224LLU), - QU( 7157333073400313737LLU), QU(12230281516370654308LLU), - QU( 1182884552219419117LLU), QU( 2955125381312499218LLU), - QU(10308827097079443249LLU), QU( 1337648572986534958LLU), - QU(16378788590020343939LLU), QU( 108619126514420935LLU), - QU( 3990981009621629188LLU), QU( 5460953070230946410LLU), - QU( 9703328329366531883LLU), QU(13166631489188077236LLU), - QU( 1104768831213675170LLU), QU( 3447930458553877908LLU), - QU( 8067172487769945676LLU), QU( 5445802098190775347LLU), - QU( 3244840981648973873LLU), QU(17314668322981950060LLU), - QU( 5006812527827763807LLU), QU(18158695070225526260LLU), - QU( 2824536478852417853LLU), QU(13974775809127519886LLU), - QU( 9814362769074067392LLU), QU(17276205156374862128LLU), - QU(11361680725379306967LLU), QU( 3422581970382012542LLU), - QU(11003189603753241266LLU), QU(11194292945277862261LLU), - QU( 6839623313908521348LLU), QU(11935326462707324634LLU), - QU( 1611456788685878444LLU), QU(13112620989475558907LLU), - QU( 517659108904450427LLU), QU(13558114318574407624LLU), - QU(15699089742731633077LLU), QU( 4988979278862685458LLU), - QU( 8111373583056521297LLU), QU( 3891258746615399627LLU), - QU( 8137298251469718086LLU), QU(12748663295624701649LLU), - QU( 4389835683495292062LLU), QU( 5775217872128831729LLU), - QU( 9462091896405534927LLU), QU( 8498124108820263989LLU), - QU( 8059131278842839525LLU), QU(10503167994254090892LLU), - QU(11613153541070396656LLU), QU(18069248738504647790LLU), - QU( 570657419109768508LLU), QU( 3950574167771159665LLU), - QU( 5514655599604313077LLU), QU( 2908460854428484165LLU), - QU(10777722615935663114LLU), QU(12007363304839279486LLU), - QU( 9800646187569484767LLU), QU( 8795423564889864287LLU), - QU(14257396680131028419LLU), QU( 6405465117315096498LLU), - QU( 7939411072208774878LLU), QU(17577572378528990006LLU), - QU(14785873806715994850LLU), QU(16770572680854747390LLU), - QU(18127549474419396481LLU), QU(11637013449455757750LLU), - QU(14371851933996761086LLU), QU( 3601181063650110280LLU), - QU( 4126442845019316144LLU), QU(10198287239244320669LLU), - QU(18000169628555379659LLU), QU(18392482400739978269LLU), - QU( 6219919037686919957LLU), QU( 3610085377719446052LLU), - QU( 2513925039981776336LLU), QU(16679413537926716955LLU), - QU(12903302131714909434LLU), QU( 5581145789762985009LLU), - QU(12325955044293303233LLU), QU(17216111180742141204LLU), - QU( 6321919595276545740LLU), QU( 3507521147216174501LLU), - QU( 9659194593319481840LLU), QU(11473976005975358326LLU), - QU(14742730101435987026LLU), QU( 492845897709954780LLU), - QU(16976371186162599676LLU), QU(17712703422837648655LLU), - QU( 9881254778587061697LLU), QU( 8413223156302299551LLU), - QU( 1563841828254089168LLU), QU( 9996032758786671975LLU), - QU( 138877700583772667LLU), QU(13003043368574995989LLU), - QU( 4390573668650456587LLU), QU( 8610287390568126755LLU), - QU(15126904974266642199LLU), QU( 6703637238986057662LLU), - QU( 2873075592956810157LLU), QU( 6035080933946049418LLU), - QU(13382846581202353014LLU), QU( 7303971031814642463LLU), - QU(18418024405307444267LLU), QU( 5847096731675404647LLU), - QU( 4035880699639842500LLU), QU(11525348625112218478LLU), - QU( 3041162365459574102LLU), QU( 2604734487727986558LLU), - QU(15526341771636983145LLU), QU(14556052310697370254LLU), - QU(12997787077930808155LLU), QU( 9601806501755554499LLU), - QU(11349677952521423389LLU), QU(14956777807644899350LLU), - QU(16559736957742852721LLU), QU(12360828274778140726LLU), - QU( 6685373272009662513LLU), QU(16932258748055324130LLU), - QU(15918051131954158508LLU), QU( 1692312913140790144LLU), - QU( 546653826801637367LLU), QU( 5341587076045986652LLU), - QU(14975057236342585662LLU), QU(12374976357340622412LLU), - QU(10328833995181940552LLU), QU(12831807101710443149LLU), - QU(10548514914382545716LLU), QU( 2217806727199715993LLU), - QU(12627067369242845138LLU), QU( 4598965364035438158LLU), - QU( 150923352751318171LLU), QU(14274109544442257283LLU), - QU( 4696661475093863031LLU), QU( 1505764114384654516LLU), - QU(10699185831891495147LLU), QU( 2392353847713620519LLU), - QU( 3652870166711788383LLU), QU( 8640653276221911108LLU), - QU( 3894077592275889704LLU), QU( 4918592872135964845LLU), - QU(16379121273281400789LLU), QU(12058465483591683656LLU), - QU(11250106829302924945LLU), QU( 1147537556296983005LLU), - QU( 6376342756004613268LLU), QU(14967128191709280506LLU), - QU(18007449949790627628LLU), QU( 9497178279316537841LLU), - QU( 7920174844809394893LLU), QU(10037752595255719907LLU), - QU(15875342784985217697LLU), QU(15311615921712850696LLU), - QU( 9552902652110992950LLU), QU(14054979450099721140LLU), - QU( 5998709773566417349LLU), QU(18027910339276320187LLU), - QU( 8223099053868585554LLU), QU( 7842270354824999767LLU), - QU( 4896315688770080292LLU), QU(12969320296569787895LLU), - QU( 2674321489185759961LLU), QU( 4053615936864718439LLU), - QU(11349775270588617578LLU), QU( 4743019256284553975LLU), - QU( 5602100217469723769LLU), QU(14398995691411527813LLU), - QU( 7412170493796825470LLU), QU( 836262406131744846LLU), - QU( 8231086633845153022LLU), QU( 5161377920438552287LLU), - QU( 8828731196169924949LLU), QU(16211142246465502680LLU), - QU( 3307990879253687818LLU), QU( 5193405406899782022LLU), - QU( 8510842117467566693LLU), QU( 6070955181022405365LLU), - QU(14482950231361409799LLU), QU(12585159371331138077LLU), - QU( 3511537678933588148LLU), QU( 2041849474531116417LLU), - QU(10944936685095345792LLU), QU(18303116923079107729LLU), - QU( 2720566371239725320LLU), QU( 4958672473562397622LLU), - QU( 3032326668253243412LLU), QU(13689418691726908338LLU), - QU( 1895205511728843996LLU), QU( 8146303515271990527LLU), - QU(16507343500056113480LLU), QU( 473996939105902919LLU), - QU( 9897686885246881481LLU), QU(14606433762712790575LLU), - QU( 6732796251605566368LLU), QU( 1399778120855368916LLU), - QU( 935023885182833777LLU), QU(16066282816186753477LLU), - QU( 7291270991820612055LLU), QU(17530230393129853844LLU), - QU(10223493623477451366LLU), QU(15841725630495676683LLU), - QU(17379567246435515824LLU), QU( 8588251429375561971LLU), - QU(18339511210887206423LLU), QU(17349587430725976100LLU), - QU(12244876521394838088LLU), QU( 6382187714147161259LLU), - QU(12335807181848950831LLU), QU(16948885622305460665LLU), - QU(13755097796371520506LLU), QU(14806740373324947801LLU), - QU( 4828699633859287703LLU), QU( 8209879281452301604LLU), - QU(12435716669553736437LLU), QU(13970976859588452131LLU), - QU( 6233960842566773148LLU), QU(12507096267900505759LLU), - QU( 1198713114381279421LLU), QU(14989862731124149015LLU), - QU(15932189508707978949LLU), QU( 2526406641432708722LLU), - QU( 29187427817271982LLU), QU( 1499802773054556353LLU), - QU(10816638187021897173LLU), QU( 5436139270839738132LLU), - QU( 6659882287036010082LLU), QU( 2154048955317173697LLU), - QU(10887317019333757642LLU), QU(16281091802634424955LLU), - QU(10754549879915384901LLU), QU(10760611745769249815LLU), - QU( 2161505946972504002LLU), QU( 5243132808986265107LLU), - QU(10129852179873415416LLU), QU( 710339480008649081LLU), - QU( 7802129453068808528LLU), QU(17967213567178907213LLU), - QU(15730859124668605599LLU), QU(13058356168962376502LLU), - QU( 3701224985413645909LLU), QU(14464065869149109264LLU), - QU( 9959272418844311646LLU), QU(10157426099515958752LLU), - QU(14013736814538268528LLU), QU(17797456992065653951LLU), - QU(17418878140257344806LLU), QU(15457429073540561521LLU), - QU( 2184426881360949378LLU), QU( 2062193041154712416LLU), - QU( 8553463347406931661LLU), QU( 4913057625202871854LLU), - QU( 2668943682126618425LLU), QU(17064444737891172288LLU), - QU( 4997115903913298637LLU), QU(12019402608892327416LLU), - QU(17603584559765897352LLU), QU(11367529582073647975LLU), - QU( 8211476043518436050LLU), QU( 8676849804070323674LLU), - QU(18431829230394475730LLU), QU(10490177861361247904LLU), - QU( 9508720602025651349LLU), QU( 7409627448555722700LLU), - QU( 5804047018862729008LLU), QU(11943858176893142594LLU), - QU(11908095418933847092LLU), QU( 5415449345715887652LLU), - QU( 1554022699166156407LLU), QU( 9073322106406017161LLU), - QU( 7080630967969047082LLU), QU(18049736940860732943LLU), - QU(12748714242594196794LLU), QU( 1226992415735156741LLU), - QU(17900981019609531193LLU), QU(11720739744008710999LLU), - QU( 3006400683394775434LLU), QU(11347974011751996028LLU), - QU( 3316999628257954608LLU), QU( 8384484563557639101LLU), - QU(18117794685961729767LLU), QU( 1900145025596618194LLU), - QU(17459527840632892676LLU), QU( 5634784101865710994LLU), - QU( 7918619300292897158LLU), QU( 3146577625026301350LLU), - QU( 9955212856499068767LLU), QU( 1873995843681746975LLU), - QU( 1561487759967972194LLU), QU( 8322718804375878474LLU), - QU(11300284215327028366LLU), QU( 4667391032508998982LLU), - QU( 9820104494306625580LLU), QU(17922397968599970610LLU), - QU( 1784690461886786712LLU), QU(14940365084341346821LLU), - QU( 5348719575594186181LLU), QU(10720419084507855261LLU), - QU(14210394354145143274LLU), QU( 2426468692164000131LLU), - QU(16271062114607059202LLU), QU(14851904092357070247LLU), - QU( 6524493015693121897LLU), QU( 9825473835127138531LLU), - QU(14222500616268569578LLU), QU(15521484052007487468LLU), - QU(14462579404124614699LLU), QU(11012375590820665520LLU), - QU(11625327350536084927LLU), QU(14452017765243785417LLU), - QU( 9989342263518766305LLU), QU( 3640105471101803790LLU), - QU( 4749866455897513242LLU), QU(13963064946736312044LLU), - QU(10007416591973223791LLU), QU(18314132234717431115LLU), - QU( 3286596588617483450LLU), QU( 7726163455370818765LLU), - QU( 7575454721115379328LLU), QU( 5308331576437663422LLU), - QU(18288821894903530934LLU), QU( 8028405805410554106LLU), - QU(15744019832103296628LLU), QU( 149765559630932100LLU), - QU( 6137705557200071977LLU), QU(14513416315434803615LLU), - QU(11665702820128984473LLU), QU( 218926670505601386LLU), - QU( 6868675028717769519LLU), QU(15282016569441512302LLU), - QU( 5707000497782960236LLU), QU( 6671120586555079567LLU), - QU( 2194098052618985448LLU), QU(16849577895477330978LLU), - QU(12957148471017466283LLU), QU( 1997805535404859393LLU), - QU( 1180721060263860490LLU), QU(13206391310193756958LLU), - QU(12980208674461861797LLU), QU( 3825967775058875366LLU), - QU(17543433670782042631LLU), QU( 1518339070120322730LLU), - QU(16344584340890991669LLU), QU( 2611327165318529819LLU), - QU(11265022723283422529LLU), QU( 4001552800373196817LLU), - QU(14509595890079346161LLU), QU( 3528717165416234562LLU), - QU(18153222571501914072LLU), QU( 9387182977209744425LLU), - QU(10064342315985580021LLU), QU(11373678413215253977LLU), - QU( 2308457853228798099LLU), QU( 9729042942839545302LLU), - QU( 7833785471140127746LLU), QU( 6351049900319844436LLU), - QU(14454610627133496067LLU), QU(12533175683634819111LLU), - QU(15570163926716513029LLU), QU(13356980519185762498LLU) + QU( 2100341266307895239ULL), QU( 8344256300489757943ULL), + QU(15687933285484243894ULL), QU( 8268620370277076319ULL), + QU(12371852309826545459ULL), QU( 8800491541730110238ULL), + QU(18113268950100835773ULL), QU( 2886823658884438119ULL), + QU( 3293667307248180724ULL), QU( 9307928143300172731ULL), + QU( 7688082017574293629ULL), QU( 900986224735166665ULL), + QU( 9977972710722265039ULL), QU( 6008205004994830552ULL), + QU( 546909104521689292ULL), QU( 7428471521869107594ULL), + QU(14777563419314721179ULL), QU(16116143076567350053ULL), + QU( 5322685342003142329ULL), QU( 4200427048445863473ULL), + QU( 4693092150132559146ULL), QU(13671425863759338582ULL), + QU( 6747117460737639916ULL), QU( 4732666080236551150ULL), + QU( 5912839950611941263ULL), QU( 3903717554504704909ULL), + QU( 2615667650256786818ULL), QU(10844129913887006352ULL), + QU(13786467861810997820ULL), QU(14267853002994021570ULL), + QU(13767807302847237439ULL), QU(16407963253707224617ULL), + QU( 4802498363698583497ULL), QU( 2523802839317209764ULL), + QU( 3822579397797475589ULL), QU( 8950320572212130610ULL), + QU( 3745623504978342534ULL), QU(16092609066068482806ULL), + QU( 9817016950274642398ULL), QU(10591660660323829098ULL), + QU(11751606650792815920ULL), QU( 5122873818577122211ULL), + QU(17209553764913936624ULL), QU( 6249057709284380343ULL), + QU(15088791264695071830ULL), QU(15344673071709851930ULL), + QU( 4345751415293646084ULL), QU( 2542865750703067928ULL), + QU(13520525127852368784ULL), QU(18294188662880997241ULL), + QU( 3871781938044881523ULL), QU( 2873487268122812184ULL), + QU(15099676759482679005ULL), QU(15442599127239350490ULL), + QU( 6311893274367710888ULL), QU( 3286118760484672933ULL), + QU( 4146067961333542189ULL), QU(13303942567897208770ULL), + QU( 8196013722255630418ULL), QU( 4437815439340979989ULL), + QU(15433791533450605135ULL), QU( 4254828956815687049ULL), + QU( 1310903207708286015ULL), QU(10529182764462398549ULL), + QU(14900231311660638810ULL), QU( 9727017277104609793ULL), + QU( 1821308310948199033ULL), QU(11628861435066772084ULL), + QU( 9469019138491546924ULL), QU( 3145812670532604988ULL), + QU( 9938468915045491919ULL), QU( 1562447430672662142ULL), + QU(13963995266697989134ULL), QU( 3356884357625028695ULL), + QU( 4499850304584309747ULL), QU( 8456825817023658122ULL), + QU(10859039922814285279ULL), QU( 8099512337972526555ULL), + QU( 348006375109672149ULL), QU(11919893998241688603ULL), + QU( 1104199577402948826ULL), QU(16689191854356060289ULL), + QU(10992552041730168078ULL), QU( 7243733172705465836ULL), + QU( 5668075606180319560ULL), QU(18182847037333286970ULL), + QU( 4290215357664631322ULL), QU( 4061414220791828613ULL), + QU(13006291061652989604ULL), QU( 7140491178917128798ULL), + QU(12703446217663283481ULL), QU( 5500220597564558267ULL), + QU(10330551509971296358ULL), QU(15958554768648714492ULL), + QU( 5174555954515360045ULL), QU( 1731318837687577735ULL), + QU( 3557700801048354857ULL), QU(13764012341928616198ULL), + QU(13115166194379119043ULL), QU( 7989321021560255519ULL), + QU( 2103584280905877040ULL), QU( 9230788662155228488ULL), + QU(16396629323325547654ULL), QU( 657926409811318051ULL), + QU(15046700264391400727ULL), QU( 5120132858771880830ULL), + QU( 7934160097989028561ULL), QU( 6963121488531976245ULL), + QU(17412329602621742089ULL), QU(15144843053931774092ULL), + QU(17204176651763054532ULL), QU(13166595387554065870ULL), + QU( 8590377810513960213ULL), QU( 5834365135373991938ULL), + QU( 7640913007182226243ULL), QU( 3479394703859418425ULL), + QU(16402784452644521040ULL), QU( 4993979809687083980ULL), + QU(13254522168097688865ULL), QU(15643659095244365219ULL), + QU( 5881437660538424982ULL), QU(11174892200618987379ULL), + QU( 254409966159711077ULL), QU(17158413043140549909ULL), + QU( 3638048789290376272ULL), QU( 1376816930299489190ULL), + QU( 4622462095217761923ULL), QU(15086407973010263515ULL), + QU(13253971772784692238ULL), QU( 5270549043541649236ULL), + QU(11182714186805411604ULL), QU(12283846437495577140ULL), + QU( 5297647149908953219ULL), QU(10047451738316836654ULL), + QU( 4938228100367874746ULL), QU(12328523025304077923ULL), + QU( 3601049438595312361ULL), QU( 9313624118352733770ULL), + QU(13322966086117661798ULL), QU(16660005705644029394ULL), + QU(11337677526988872373ULL), QU(13869299102574417795ULL), + QU(15642043183045645437ULL), QU( 3021755569085880019ULL), + QU( 4979741767761188161ULL), QU(13679979092079279587ULL), + QU( 3344685842861071743ULL), QU(13947960059899588104ULL), + QU( 305806934293368007ULL), QU( 5749173929201650029ULL), + QU(11123724852118844098ULL), QU(15128987688788879802ULL), + QU(15251651211024665009ULL), QU( 7689925933816577776ULL), + QU(16732804392695859449ULL), QU(17087345401014078468ULL), + QU(14315108589159048871ULL), QU( 4820700266619778917ULL), + QU(16709637539357958441ULL), QU( 4936227875177351374ULL), + QU( 2137907697912987247ULL), QU(11628565601408395420ULL), + QU( 2333250549241556786ULL), QU( 5711200379577778637ULL), + QU( 5170680131529031729ULL), QU(12620392043061335164ULL), + QU( 95363390101096078ULL), QU( 5487981914081709462ULL), + QU( 1763109823981838620ULL), QU( 3395861271473224396ULL), + QU( 1300496844282213595ULL), QU( 6894316212820232902ULL), + QU(10673859651135576674ULL), QU( 5911839658857903252ULL), + QU(17407110743387299102ULL), QU( 8257427154623140385ULL), + QU(11389003026741800267ULL), QU( 4070043211095013717ULL), + QU(11663806997145259025ULL), QU(15265598950648798210ULL), + QU( 630585789434030934ULL), QU( 3524446529213587334ULL), + QU( 7186424168495184211ULL), QU(10806585451386379021ULL), + QU(11120017753500499273ULL), QU( 1586837651387701301ULL), + QU(17530454400954415544ULL), QU( 9991670045077880430ULL), + QU( 7550997268990730180ULL), QU( 8640249196597379304ULL), + QU( 3522203892786893823ULL), QU(10401116549878854788ULL), + QU(13690285544733124852ULL), QU( 8295785675455774586ULL), + QU(15535716172155117603ULL), QU( 3112108583723722511ULL), + QU(17633179955339271113ULL), QU(18154208056063759375ULL), + QU( 1866409236285815666ULL), QU(13326075895396412882ULL), + QU( 8756261842948020025ULL), QU( 6281852999868439131ULL), + QU(15087653361275292858ULL), QU(10333923911152949397ULL), + QU( 5265567645757408500ULL), QU(12728041843210352184ULL), + QU( 6347959327507828759ULL), QU( 154112802625564758ULL), + QU(18235228308679780218ULL), QU( 3253805274673352418ULL), + QU( 4849171610689031197ULL), QU(17948529398340432518ULL), + QU(13803510475637409167ULL), QU(13506570190409883095ULL), + QU(15870801273282960805ULL), QU( 8451286481299170773ULL), + QU( 9562190620034457541ULL), QU( 8518905387449138364ULL), + QU(12681306401363385655ULL), QU( 3788073690559762558ULL), + QU( 5256820289573487769ULL), QU( 2752021372314875467ULL), + QU( 6354035166862520716ULL), QU( 4328956378309739069ULL), + QU( 449087441228269600ULL), QU( 5533508742653090868ULL), + QU( 1260389420404746988ULL), QU(18175394473289055097ULL), + QU( 1535467109660399420ULL), QU( 8818894282874061442ULL), + QU(12140873243824811213ULL), QU(15031386653823014946ULL), + QU( 1286028221456149232ULL), QU( 6329608889367858784ULL), + QU( 9419654354945132725ULL), QU( 6094576547061672379ULL), + QU(17706217251847450255ULL), QU( 1733495073065878126ULL), + QU(16918923754607552663ULL), QU( 8881949849954945044ULL), + QU(12938977706896313891ULL), QU(14043628638299793407ULL), + QU(18393874581723718233ULL), QU( 6886318534846892044ULL), + QU(14577870878038334081ULL), QU(13541558383439414119ULL), + QU(13570472158807588273ULL), QU(18300760537910283361ULL), + QU( 818368572800609205ULL), QU( 1417000585112573219ULL), + QU(12337533143867683655ULL), QU(12433180994702314480ULL), + QU( 778190005829189083ULL), QU(13667356216206524711ULL), + QU( 9866149895295225230ULL), QU(11043240490417111999ULL), + QU( 1123933826541378598ULL), QU( 6469631933605123610ULL), + QU(14508554074431980040ULL), QU(13918931242962026714ULL), + QU( 2870785929342348285ULL), QU(14786362626740736974ULL), + QU(13176680060902695786ULL), QU( 9591778613541679456ULL), + QU( 9097662885117436706ULL), QU( 749262234240924947ULL), + QU( 1944844067793307093ULL), QU( 4339214904577487742ULL), + QU( 8009584152961946551ULL), QU(16073159501225501777ULL), + QU( 3335870590499306217ULL), QU(17088312653151202847ULL), + QU( 3108893142681931848ULL), QU(16636841767202792021ULL), + QU(10423316431118400637ULL), QU( 8008357368674443506ULL), + QU(11340015231914677875ULL), QU(17687896501594936090ULL), + QU(15173627921763199958ULL), QU( 542569482243721959ULL), + QU(15071714982769812975ULL), QU( 4466624872151386956ULL), + QU( 1901780715602332461ULL), QU( 9822227742154351098ULL), + QU( 1479332892928648780ULL), QU( 6981611948382474400ULL), + QU( 7620824924456077376ULL), QU(14095973329429406782ULL), + QU( 7902744005696185404ULL), QU(15830577219375036920ULL), + QU(10287076667317764416ULL), QU(12334872764071724025ULL), + QU( 4419302088133544331ULL), QU(14455842851266090520ULL), + QU(12488077416504654222ULL), QU( 7953892017701886766ULL), + QU( 6331484925529519007ULL), QU( 4902145853785030022ULL), + QU(17010159216096443073ULL), QU(11945354668653886087ULL), + QU(15112022728645230829ULL), QU(17363484484522986742ULL), + QU( 4423497825896692887ULL), QU( 8155489510809067471ULL), + QU( 258966605622576285ULL), QU( 5462958075742020534ULL), + QU( 6763710214913276228ULL), QU( 2368935183451109054ULL), + QU(14209506165246453811ULL), QU( 2646257040978514881ULL), + QU( 3776001911922207672ULL), QU( 1419304601390147631ULL), + QU(14987366598022458284ULL), QU( 3977770701065815721ULL), + QU( 730820417451838898ULL), QU( 3982991703612885327ULL), + QU( 2803544519671388477ULL), QU(17067667221114424649ULL), + QU( 2922555119737867166ULL), QU( 1989477584121460932ULL), + QU(15020387605892337354ULL), QU( 9293277796427533547ULL), + QU(10722181424063557247ULL), QU(16704542332047511651ULL), + QU( 5008286236142089514ULL), QU(16174732308747382540ULL), + QU(17597019485798338402ULL), QU(13081745199110622093ULL), + QU( 8850305883842258115ULL), QU(12723629125624589005ULL), + QU( 8140566453402805978ULL), QU(15356684607680935061ULL), + QU(14222190387342648650ULL), QU(11134610460665975178ULL), + QU( 1259799058620984266ULL), QU(13281656268025610041ULL), + QU( 298262561068153992ULL), QU(12277871700239212922ULL), + QU(13911297774719779438ULL), QU(16556727962761474934ULL), + QU(17903010316654728010ULL), QU( 9682617699648434744ULL), + QU(14757681836838592850ULL), QU( 1327242446558524473ULL), + QU(11126645098780572792ULL), QU( 1883602329313221774ULL), + QU( 2543897783922776873ULL), QU(15029168513767772842ULL), + QU(12710270651039129878ULL), QU(16118202956069604504ULL), + QU(15010759372168680524ULL), QU( 2296827082251923948ULL), + QU(10793729742623518101ULL), QU(13829764151845413046ULL), + QU(17769301223184451213ULL), QU( 3118268169210783372ULL), + QU(17626204544105123127ULL), QU( 7416718488974352644ULL), + QU(10450751996212925994ULL), QU( 9352529519128770586ULL), + QU( 259347569641110140ULL), QU( 8048588892269692697ULL), + QU( 1774414152306494058ULL), QU(10669548347214355622ULL), + QU(13061992253816795081ULL), QU(18432677803063861659ULL), + QU( 8879191055593984333ULL), QU(12433753195199268041ULL), + QU(14919392415439730602ULL), QU( 6612848378595332963ULL), + QU( 6320986812036143628ULL), QU(10465592420226092859ULL), + QU( 4196009278962570808ULL), QU( 3747816564473572224ULL), + QU(17941203486133732898ULL), QU( 2350310037040505198ULL), + QU( 5811779859134370113ULL), QU(10492109599506195126ULL), + QU( 7699650690179541274ULL), QU( 1954338494306022961ULL), + QU(14095816969027231152ULL), QU( 5841346919964852061ULL), + QU(14945969510148214735ULL), QU( 3680200305887550992ULL), + QU( 6218047466131695792ULL), QU( 8242165745175775096ULL), + QU(11021371934053307357ULL), QU( 1265099502753169797ULL), + QU( 4644347436111321718ULL), QU( 3609296916782832859ULL), + QU( 8109807992218521571ULL), QU(18387884215648662020ULL), + QU(14656324896296392902ULL), QU(17386819091238216751ULL), + QU(17788300878582317152ULL), QU( 7919446259742399591ULL), + QU( 4466613134576358004ULL), QU(12928181023667938509ULL), + QU(13147446154454932030ULL), QU(16552129038252734620ULL), + QU( 8395299403738822450ULL), QU(11313817655275361164ULL), + QU( 434258809499511718ULL), QU( 2074882104954788676ULL), + QU( 7929892178759395518ULL), QU( 9006461629105745388ULL), + QU( 5176475650000323086ULL), QU(11128357033468341069ULL), + QU(12026158851559118955ULL), QU(14699716249471156500ULL), + QU( 448982497120206757ULL), QU( 4156475356685519900ULL), + QU( 6063816103417215727ULL), QU(10073289387954971479ULL), + QU( 8174466846138590962ULL), QU( 2675777452363449006ULL), + QU( 9090685420572474281ULL), QU( 6659652652765562060ULL), + QU(12923120304018106621ULL), QU(11117480560334526775ULL), + QU( 937910473424587511ULL), QU( 1838692113502346645ULL), + QU(11133914074648726180ULL), QU( 7922600945143884053ULL), + QU(13435287702700959550ULL), QU( 5287964921251123332ULL), + QU(11354875374575318947ULL), QU(17955724760748238133ULL), + QU(13728617396297106512ULL), QU( 4107449660118101255ULL), + QU( 1210269794886589623ULL), QU(11408687205733456282ULL), + QU( 4538354710392677887ULL), QU(13566803319341319267ULL), + QU(17870798107734050771ULL), QU( 3354318982568089135ULL), + QU( 9034450839405133651ULL), QU(13087431795753424314ULL), + QU( 950333102820688239ULL), QU( 1968360654535604116ULL), + QU(16840551645563314995ULL), QU( 8867501803892924995ULL), + QU(11395388644490626845ULL), QU( 1529815836300732204ULL), + QU(13330848522996608842ULL), QU( 1813432878817504265ULL), + QU( 2336867432693429560ULL), QU(15192805445973385902ULL), + QU( 2528593071076407877ULL), QU( 128459777936689248ULL), + QU( 9976345382867214866ULL), QU( 6208885766767996043ULL), + QU(14982349522273141706ULL), QU( 3099654362410737822ULL), + QU(13776700761947297661ULL), QU( 8806185470684925550ULL), + QU( 8151717890410585321ULL), QU( 640860591588072925ULL), + QU(14592096303937307465ULL), QU( 9056472419613564846ULL), + QU(14861544647742266352ULL), QU(12703771500398470216ULL), + QU( 3142372800384138465ULL), QU( 6201105606917248196ULL), + QU(18337516409359270184ULL), QU(15042268695665115339ULL), + QU(15188246541383283846ULL), QU(12800028693090114519ULL), + QU( 5992859621101493472ULL), QU(18278043971816803521ULL), + QU( 9002773075219424560ULL), QU( 7325707116943598353ULL), + QU( 7930571931248040822ULL), QU( 5645275869617023448ULL), + QU( 7266107455295958487ULL), QU( 4363664528273524411ULL), + QU(14313875763787479809ULL), QU(17059695613553486802ULL), + QU( 9247761425889940932ULL), QU(13704726459237593128ULL), + QU( 2701312427328909832ULL), QU(17235532008287243115ULL), + QU(14093147761491729538ULL), QU( 6247352273768386516ULL), + QU( 8268710048153268415ULL), QU( 7985295214477182083ULL), + QU(15624495190888896807ULL), QU( 3772753430045262788ULL), + QU( 9133991620474991698ULL), QU( 5665791943316256028ULL), + QU( 7551996832462193473ULL), QU(13163729206798953877ULL), + QU( 9263532074153846374ULL), QU( 1015460703698618353ULL), + QU(17929874696989519390ULL), QU(18257884721466153847ULL), + QU(16271867543011222991ULL), QU( 3905971519021791941ULL), + QU(16814488397137052085ULL), QU( 1321197685504621613ULL), + QU( 2870359191894002181ULL), QU(14317282970323395450ULL), + QU(13663920845511074366ULL), QU( 2052463995796539594ULL), + QU(14126345686431444337ULL), QU( 1727572121947022534ULL), + QU(17793552254485594241ULL), QU( 6738857418849205750ULL), + QU( 1282987123157442952ULL), QU(16655480021581159251ULL), + QU( 6784587032080183866ULL), QU(14726758805359965162ULL), + QU( 7577995933961987349ULL), QU(12539609320311114036ULL), + QU(10789773033385439494ULL), QU( 8517001497411158227ULL), + QU(10075543932136339710ULL), QU(14838152340938811081ULL), + QU( 9560840631794044194ULL), QU(17445736541454117475ULL), + QU(10633026464336393186ULL), QU(15705729708242246293ULL), + QU( 1117517596891411098ULL), QU( 4305657943415886942ULL), + QU( 4948856840533979263ULL), QU(16071681989041789593ULL), + QU(13723031429272486527ULL), QU( 7639567622306509462ULL), + QU(12670424537483090390ULL), QU( 9715223453097197134ULL), + QU( 5457173389992686394ULL), QU( 289857129276135145ULL), + QU(17048610270521972512ULL), QU( 692768013309835485ULL), + QU(14823232360546632057ULL), QU(18218002361317895936ULL), + QU( 3281724260212650204ULL), QU(16453957266549513795ULL), + QU( 8592711109774511881ULL), QU( 929825123473369579ULL), + QU(15966784769764367791ULL), QU( 9627344291450607588ULL), + QU(10849555504977813287ULL), QU( 9234566913936339275ULL), + QU( 6413807690366911210ULL), QU(10862389016184219267ULL), + QU(13842504799335374048ULL), QU( 1531994113376881174ULL), + QU( 2081314867544364459ULL), QU(16430628791616959932ULL), + QU( 8314714038654394368ULL), QU( 9155473892098431813ULL), + QU(12577843786670475704ULL), QU( 4399161106452401017ULL), + QU( 1668083091682623186ULL), QU( 1741383777203714216ULL), + QU( 2162597285417794374ULL), QU(15841980159165218736ULL), + QU( 1971354603551467079ULL), QU( 1206714764913205968ULL), + QU( 4790860439591272330ULL), QU(14699375615594055799ULL), + QU( 8374423871657449988ULL), QU(10950685736472937738ULL), + QU( 697344331343267176ULL), QU(10084998763118059810ULL), + QU(12897369539795983124ULL), QU(12351260292144383605ULL), + QU( 1268810970176811234ULL), QU( 7406287800414582768ULL), + QU( 516169557043807831ULL), QU( 5077568278710520380ULL), + QU( 3828791738309039304ULL), QU( 7721974069946943610ULL), + QU( 3534670260981096460ULL), QU( 4865792189600584891ULL), + QU(16892578493734337298ULL), QU( 9161499464278042590ULL), + QU(11976149624067055931ULL), QU(13219479887277343990ULL), + QU(14161556738111500680ULL), QU(14670715255011223056ULL), + QU( 4671205678403576558ULL), QU(12633022931454259781ULL), + QU(14821376219869187646ULL), QU( 751181776484317028ULL), + QU( 2192211308839047070ULL), QU(11787306362361245189ULL), + QU(10672375120744095707ULL), QU( 4601972328345244467ULL), + QU(15457217788831125879ULL), QU( 8464345256775460809ULL), + QU(10191938789487159478ULL), QU( 6184348739615197613ULL), + QU(11425436778806882100ULL), QU( 2739227089124319793ULL), + QU( 461464518456000551ULL), QU( 4689850170029177442ULL), + QU( 6120307814374078625ULL), QU(11153579230681708671ULL), + QU( 7891721473905347926ULL), QU(10281646937824872400ULL), + QU( 3026099648191332248ULL), QU( 8666750296953273818ULL), + QU(14978499698844363232ULL), QU(13303395102890132065ULL), + QU( 8182358205292864080ULL), QU(10560547713972971291ULL), + QU(11981635489418959093ULL), QU( 3134621354935288409ULL), + QU(11580681977404383968ULL), QU(14205530317404088650ULL), + QU( 5997789011854923157ULL), QU(13659151593432238041ULL), + QU(11664332114338865086ULL), QU( 7490351383220929386ULL), + QU( 7189290499881530378ULL), QU(15039262734271020220ULL), + QU( 2057217285976980055ULL), QU( 555570804905355739ULL), + QU(11235311968348555110ULL), QU(13824557146269603217ULL), + QU(16906788840653099693ULL), QU( 7222878245455661677ULL), + QU( 5245139444332423756ULL), QU( 4723748462805674292ULL), + QU(12216509815698568612ULL), QU(17402362976648951187ULL), + QU(17389614836810366768ULL), QU( 4880936484146667711ULL), + QU( 9085007839292639880ULL), QU(13837353458498535449ULL), + QU(11914419854360366677ULL), QU(16595890135313864103ULL), + QU( 6313969847197627222ULL), QU(18296909792163910431ULL), + QU(10041780113382084042ULL), QU( 2499478551172884794ULL), + QU(11057894246241189489ULL), QU( 9742243032389068555ULL), + QU(12838934582673196228ULL), QU(13437023235248490367ULL), + QU(13372420669446163240ULL), QU( 6752564244716909224ULL), + QU( 7157333073400313737ULL), QU(12230281516370654308ULL), + QU( 1182884552219419117ULL), QU( 2955125381312499218ULL), + QU(10308827097079443249ULL), QU( 1337648572986534958ULL), + QU(16378788590020343939ULL), QU( 108619126514420935ULL), + QU( 3990981009621629188ULL), QU( 5460953070230946410ULL), + QU( 9703328329366531883ULL), QU(13166631489188077236ULL), + QU( 1104768831213675170ULL), QU( 3447930458553877908ULL), + QU( 8067172487769945676ULL), QU( 5445802098190775347ULL), + QU( 3244840981648973873ULL), QU(17314668322981950060ULL), + QU( 5006812527827763807ULL), QU(18158695070225526260ULL), + QU( 2824536478852417853ULL), QU(13974775809127519886ULL), + QU( 9814362769074067392ULL), QU(17276205156374862128ULL), + QU(11361680725379306967ULL), QU( 3422581970382012542ULL), + QU(11003189603753241266ULL), QU(11194292945277862261ULL), + QU( 6839623313908521348ULL), QU(11935326462707324634ULL), + QU( 1611456788685878444ULL), QU(13112620989475558907ULL), + QU( 517659108904450427ULL), QU(13558114318574407624ULL), + QU(15699089742731633077ULL), QU( 4988979278862685458ULL), + QU( 8111373583056521297ULL), QU( 3891258746615399627ULL), + QU( 8137298251469718086ULL), QU(12748663295624701649ULL), + QU( 4389835683495292062ULL), QU( 5775217872128831729ULL), + QU( 9462091896405534927ULL), QU( 8498124108820263989ULL), + QU( 8059131278842839525ULL), QU(10503167994254090892ULL), + QU(11613153541070396656ULL), QU(18069248738504647790ULL), + QU( 570657419109768508ULL), QU( 3950574167771159665ULL), + QU( 5514655599604313077ULL), QU( 2908460854428484165ULL), + QU(10777722615935663114ULL), QU(12007363304839279486ULL), + QU( 9800646187569484767ULL), QU( 8795423564889864287ULL), + QU(14257396680131028419ULL), QU( 6405465117315096498ULL), + QU( 7939411072208774878ULL), QU(17577572378528990006ULL), + QU(14785873806715994850ULL), QU(16770572680854747390ULL), + QU(18127549474419396481ULL), QU(11637013449455757750ULL), + QU(14371851933996761086ULL), QU( 3601181063650110280ULL), + QU( 4126442845019316144ULL), QU(10198287239244320669ULL), + QU(18000169628555379659ULL), QU(18392482400739978269ULL), + QU( 6219919037686919957ULL), QU( 3610085377719446052ULL), + QU( 2513925039981776336ULL), QU(16679413537926716955ULL), + QU(12903302131714909434ULL), QU( 5581145789762985009ULL), + QU(12325955044293303233ULL), QU(17216111180742141204ULL), + QU( 6321919595276545740ULL), QU( 3507521147216174501ULL), + QU( 9659194593319481840ULL), QU(11473976005975358326ULL), + QU(14742730101435987026ULL), QU( 492845897709954780ULL), + QU(16976371186162599676ULL), QU(17712703422837648655ULL), + QU( 9881254778587061697ULL), QU( 8413223156302299551ULL), + QU( 1563841828254089168ULL), QU( 9996032758786671975ULL), + QU( 138877700583772667ULL), QU(13003043368574995989ULL), + QU( 4390573668650456587ULL), QU( 8610287390568126755ULL), + QU(15126904974266642199ULL), QU( 6703637238986057662ULL), + QU( 2873075592956810157ULL), QU( 6035080933946049418ULL), + QU(13382846581202353014ULL), QU( 7303971031814642463ULL), + QU(18418024405307444267ULL), QU( 5847096731675404647ULL), + QU( 4035880699639842500ULL), QU(11525348625112218478ULL), + QU( 3041162365459574102ULL), QU( 2604734487727986558ULL), + QU(15526341771636983145ULL), QU(14556052310697370254ULL), + QU(12997787077930808155ULL), QU( 9601806501755554499ULL), + QU(11349677952521423389ULL), QU(14956777807644899350ULL), + QU(16559736957742852721ULL), QU(12360828274778140726ULL), + QU( 6685373272009662513ULL), QU(16932258748055324130ULL), + QU(15918051131954158508ULL), QU( 1692312913140790144ULL), + QU( 546653826801637367ULL), QU( 5341587076045986652ULL), + QU(14975057236342585662ULL), QU(12374976357340622412ULL), + QU(10328833995181940552ULL), QU(12831807101710443149ULL), + QU(10548514914382545716ULL), QU( 2217806727199715993ULL), + QU(12627067369242845138ULL), QU( 4598965364035438158ULL), + QU( 150923352751318171ULL), QU(14274109544442257283ULL), + QU( 4696661475093863031ULL), QU( 1505764114384654516ULL), + QU(10699185831891495147ULL), QU( 2392353847713620519ULL), + QU( 3652870166711788383ULL), QU( 8640653276221911108ULL), + QU( 3894077592275889704ULL), QU( 4918592872135964845ULL), + QU(16379121273281400789ULL), QU(12058465483591683656ULL), + QU(11250106829302924945ULL), QU( 1147537556296983005ULL), + QU( 6376342756004613268ULL), QU(14967128191709280506ULL), + QU(18007449949790627628ULL), QU( 9497178279316537841ULL), + QU( 7920174844809394893ULL), QU(10037752595255719907ULL), + QU(15875342784985217697ULL), QU(15311615921712850696ULL), + QU( 9552902652110992950ULL), QU(14054979450099721140ULL), + QU( 5998709773566417349ULL), QU(18027910339276320187ULL), + QU( 8223099053868585554ULL), QU( 7842270354824999767ULL), + QU( 4896315688770080292ULL), QU(12969320296569787895ULL), + QU( 2674321489185759961ULL), QU( 4053615936864718439ULL), + QU(11349775270588617578ULL), QU( 4743019256284553975ULL), + QU( 5602100217469723769ULL), QU(14398995691411527813ULL), + QU( 7412170493796825470ULL), QU( 836262406131744846ULL), + QU( 8231086633845153022ULL), QU( 5161377920438552287ULL), + QU( 8828731196169924949ULL), QU(16211142246465502680ULL), + QU( 3307990879253687818ULL), QU( 5193405406899782022ULL), + QU( 8510842117467566693ULL), QU( 6070955181022405365ULL), + QU(14482950231361409799ULL), QU(12585159371331138077ULL), + QU( 3511537678933588148ULL), QU( 2041849474531116417ULL), + QU(10944936685095345792ULL), QU(18303116923079107729ULL), + QU( 2720566371239725320ULL), QU( 4958672473562397622ULL), + QU( 3032326668253243412ULL), QU(13689418691726908338ULL), + QU( 1895205511728843996ULL), QU( 8146303515271990527ULL), + QU(16507343500056113480ULL), QU( 473996939105902919ULL), + QU( 9897686885246881481ULL), QU(14606433762712790575ULL), + QU( 6732796251605566368ULL), QU( 1399778120855368916ULL), + QU( 935023885182833777ULL), QU(16066282816186753477ULL), + QU( 7291270991820612055ULL), QU(17530230393129853844ULL), + QU(10223493623477451366ULL), QU(15841725630495676683ULL), + QU(17379567246435515824ULL), QU( 8588251429375561971ULL), + QU(18339511210887206423ULL), QU(17349587430725976100ULL), + QU(12244876521394838088ULL), QU( 6382187714147161259ULL), + QU(12335807181848950831ULL), QU(16948885622305460665ULL), + QU(13755097796371520506ULL), QU(14806740373324947801ULL), + QU( 4828699633859287703ULL), QU( 8209879281452301604ULL), + QU(12435716669553736437ULL), QU(13970976859588452131ULL), + QU( 6233960842566773148ULL), QU(12507096267900505759ULL), + QU( 1198713114381279421ULL), QU(14989862731124149015ULL), + QU(15932189508707978949ULL), QU( 2526406641432708722ULL), + QU( 29187427817271982ULL), QU( 1499802773054556353ULL), + QU(10816638187021897173ULL), QU( 5436139270839738132ULL), + QU( 6659882287036010082ULL), QU( 2154048955317173697ULL), + QU(10887317019333757642ULL), QU(16281091802634424955ULL), + QU(10754549879915384901ULL), QU(10760611745769249815ULL), + QU( 2161505946972504002ULL), QU( 5243132808986265107ULL), + QU(10129852179873415416ULL), QU( 710339480008649081ULL), + QU( 7802129453068808528ULL), QU(17967213567178907213ULL), + QU(15730859124668605599ULL), QU(13058356168962376502ULL), + QU( 3701224985413645909ULL), QU(14464065869149109264ULL), + QU( 9959272418844311646ULL), QU(10157426099515958752ULL), + QU(14013736814538268528ULL), QU(17797456992065653951ULL), + QU(17418878140257344806ULL), QU(15457429073540561521ULL), + QU( 2184426881360949378ULL), QU( 2062193041154712416ULL), + QU( 8553463347406931661ULL), QU( 4913057625202871854ULL), + QU( 2668943682126618425ULL), QU(17064444737891172288ULL), + QU( 4997115903913298637ULL), QU(12019402608892327416ULL), + QU(17603584559765897352ULL), QU(11367529582073647975ULL), + QU( 8211476043518436050ULL), QU( 8676849804070323674ULL), + QU(18431829230394475730ULL), QU(10490177861361247904ULL), + QU( 9508720602025651349ULL), QU( 7409627448555722700ULL), + QU( 5804047018862729008ULL), QU(11943858176893142594ULL), + QU(11908095418933847092ULL), QU( 5415449345715887652ULL), + QU( 1554022699166156407ULL), QU( 9073322106406017161ULL), + QU( 7080630967969047082ULL), QU(18049736940860732943ULL), + QU(12748714242594196794ULL), QU( 1226992415735156741ULL), + QU(17900981019609531193ULL), QU(11720739744008710999ULL), + QU( 3006400683394775434ULL), QU(11347974011751996028ULL), + QU( 3316999628257954608ULL), QU( 8384484563557639101ULL), + QU(18117794685961729767ULL), QU( 1900145025596618194ULL), + QU(17459527840632892676ULL), QU( 5634784101865710994ULL), + QU( 7918619300292897158ULL), QU( 3146577625026301350ULL), + QU( 9955212856499068767ULL), QU( 1873995843681746975ULL), + QU( 1561487759967972194ULL), QU( 8322718804375878474ULL), + QU(11300284215327028366ULL), QU( 4667391032508998982ULL), + QU( 9820104494306625580ULL), QU(17922397968599970610ULL), + QU( 1784690461886786712ULL), QU(14940365084341346821ULL), + QU( 5348719575594186181ULL), QU(10720419084507855261ULL), + QU(14210394354145143274ULL), QU( 2426468692164000131ULL), + QU(16271062114607059202ULL), QU(14851904092357070247ULL), + QU( 6524493015693121897ULL), QU( 9825473835127138531ULL), + QU(14222500616268569578ULL), QU(15521484052007487468ULL), + QU(14462579404124614699ULL), QU(11012375590820665520ULL), + QU(11625327350536084927ULL), QU(14452017765243785417ULL), + QU( 9989342263518766305ULL), QU( 3640105471101803790ULL), + QU( 4749866455897513242ULL), QU(13963064946736312044ULL), + QU(10007416591973223791ULL), QU(18314132234717431115ULL), + QU( 3286596588617483450ULL), QU( 7726163455370818765ULL), + QU( 7575454721115379328ULL), QU( 5308331576437663422ULL), + QU(18288821894903530934ULL), QU( 8028405805410554106ULL), + QU(15744019832103296628ULL), QU( 149765559630932100ULL), + QU( 6137705557200071977ULL), QU(14513416315434803615ULL), + QU(11665702820128984473ULL), QU( 218926670505601386ULL), + QU( 6868675028717769519ULL), QU(15282016569441512302ULL), + QU( 5707000497782960236ULL), QU( 6671120586555079567ULL), + QU( 2194098052618985448ULL), QU(16849577895477330978ULL), + QU(12957148471017466283ULL), QU( 1997805535404859393ULL), + QU( 1180721060263860490ULL), QU(13206391310193756958ULL), + QU(12980208674461861797ULL), QU( 3825967775058875366ULL), + QU(17543433670782042631ULL), QU( 1518339070120322730ULL), + QU(16344584340890991669ULL), QU( 2611327165318529819ULL), + QU(11265022723283422529ULL), QU( 4001552800373196817ULL), + QU(14509595890079346161ULL), QU( 3528717165416234562ULL), + QU(18153222571501914072ULL), QU( 9387182977209744425ULL), + QU(10064342315985580021ULL), QU(11373678413215253977ULL), + QU( 2308457853228798099ULL), QU( 9729042942839545302ULL), + QU( 7833785471140127746ULL), QU( 6351049900319844436ULL), + QU(14454610627133496067ULL), QU(12533175683634819111ULL), + QU(15570163926716513029ULL), QU(13356980519185762498ULL) }; TEST_BEGIN(test_gen_rand_32) From 22bc570fba00c4dd04cb4962e219d4230f137a4c Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Wed, 21 May 2014 18:06:14 +0900 Subject: [PATCH 0451/3378] Move __func__ to jemalloc_internal_macros.h test/integration/aligned_alloc.c needs it. --- include/jemalloc/internal/jemalloc_internal.h.in | 1 - include/jemalloc/internal/jemalloc_internal_macros.h | 4 ++++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index c9462e52..d9bfadf0 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -46,7 +46,6 @@ typedef intptr_t ssize_t; # define PATH_MAX 1024 # define STDERR_FILENO 2 -# define __func__ __FUNCTION__ /* Disable warnings about deprecated system functions */ # pragma warning(disable: 4996) #else diff --git a/include/jemalloc/internal/jemalloc_internal_macros.h b/include/jemalloc/internal/jemalloc_internal_macros.h index 4e239230..bb81e990 100644 --- a/include/jemalloc/internal/jemalloc_internal_macros.h +++ b/include/jemalloc/internal/jemalloc_internal_macros.h @@ -49,3 +49,7 @@ #ifndef JEMALLOC_HAS_RESTRICT # define restrict #endif + +#ifdef _MSC_VER +# define __func__ __FUNCTION__ +#endif From affe009e3765384805a23d804152fbf04151b117 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Wed, 28 May 2014 08:10:12 +0900 Subject: [PATCH 0452/3378] Use a configure test to detect the form of malloc_usable_size in malloc.h --- configure.ac | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/configure.ac b/configure.ac index 57015d1d..04bb2947 100644 --- a/configure.ac +++ b/configure.ac @@ -258,7 +258,6 @@ dnl Define cpp macros in CPPFLAGS, rather than doing AC_DEFINE(macro), since the dnl definitions need to be seen before any headers are included, which is a pain dnl to make happen otherwise. default_munmap="1" -JEMALLOC_USABLE_SIZE_CONST="const" case "${host}" in *-*-darwin*) CFLAGS="$CFLAGS" @@ -286,7 +285,6 @@ case "${host}" in AC_DEFINE([JEMALLOC_HAS_ALLOCA_H]) AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED], [ ]) AC_DEFINE([JEMALLOC_THREADED_INIT], [ ]) - JEMALLOC_USABLE_SIZE_CONST="" default_munmap="0" ;; *-*-netbsd*) @@ -351,6 +349,22 @@ case "${host}" in abi="elf" ;; esac + +JEMALLOC_USABLE_SIZE_CONST=const +AC_CHECK_HEADERS([malloc.h], [ + AC_MSG_CHECKING([whether malloc_usable_size definition can use const argument]) + AC_COMPILE_IFELSE([AC_LANG_PROGRAM( + [#include + #include + size_t malloc_usable_size(const void *ptr); + ], + [])],[ + AC_MSG_RESULT([yes]) + ],[ + JEMALLOC_USABLE_SIZE_CONST= + AC_MSG_RESULT([no]) + ]) +]) AC_DEFINE_UNQUOTED([JEMALLOC_USABLE_SIZE_CONST], [$JEMALLOC_USABLE_SIZE_CONST]) AC_SUBST([abi]) AC_SUBST([RPATH]) From 12f74e680c1d53c8fe5323a4ff66877534dcadd3 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Wed, 28 May 2014 12:39:13 +0900 Subject: [PATCH 0453/3378] Move platform headers and tricks from jemalloc_internal.h.in to a new jemalloc_internal_decls.h header --- .../jemalloc/internal/jemalloc_internal.h.in | 53 +---------------- .../internal/jemalloc_internal_decls.h | 58 +++++++++++++++++++ .../internal/jemalloc_internal_macros.h | 4 -- test/include/test/jemalloc_test_defs.h.in | 1 + 4 files changed, 60 insertions(+), 56 deletions(-) create mode 100644 include/jemalloc/internal/jemalloc_internal_decls.h diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index d9bfadf0..cf20f1f9 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -1,59 +1,8 @@ #ifndef JEMALLOC_INTERNAL_H #define JEMALLOC_INTERNAL_H -#include -#ifdef _WIN32 -# include -# define ENOENT ERROR_PATH_NOT_FOUND -# define EINVAL ERROR_BAD_ARGUMENTS -# define EAGAIN ERROR_OUTOFMEMORY -# define EPERM ERROR_WRITE_FAULT -# define EFAULT ERROR_INVALID_ADDRESS -# define ENOMEM ERROR_NOT_ENOUGH_MEMORY -# undef ERANGE -# define ERANGE ERROR_INVALID_DATA -#else -# include -# include -# include -# if !defined(SYS_write) && defined(__NR_write) -# define SYS_write __NR_write -# endif -# include -# include -# include -#endif -#include - -#include -#ifndef SIZE_T_MAX -# define SIZE_T_MAX SIZE_MAX -#endif -#include -#include -#include -#include -#include -#include -#ifndef offsetof -# define offsetof(type, member) ((size_t)&(((type *)NULL)->member)) -#endif -#include -#include -#include -#include -#ifdef _MSC_VER -# include -typedef intptr_t ssize_t; -# define PATH_MAX 1024 -# define STDERR_FILENO 2 -/* Disable warnings about deprecated system functions */ -# pragma warning(disable: 4996) -#else -# include -#endif -#include #include "jemalloc_internal_defs.h" +#include "jemalloc/internal/jemalloc_internal_decls.h" #ifdef JEMALLOC_UTRACE #include diff --git a/include/jemalloc/internal/jemalloc_internal_decls.h b/include/jemalloc/internal/jemalloc_internal_decls.h new file mode 100644 index 00000000..7775ab38 --- /dev/null +++ b/include/jemalloc/internal/jemalloc_internal_decls.h @@ -0,0 +1,58 @@ +#ifndef JEMALLOC_INTERNAL_DECLS_H +#define JEMALLOC_INTERNAL_DECLS_H + +#include +#ifdef _WIN32 +# include +# define ENOENT ERROR_PATH_NOT_FOUND +# define EINVAL ERROR_BAD_ARGUMENTS +# define EAGAIN ERROR_OUTOFMEMORY +# define EPERM ERROR_WRITE_FAULT +# define EFAULT ERROR_INVALID_ADDRESS +# define ENOMEM ERROR_NOT_ENOUGH_MEMORY +# undef ERANGE +# define ERANGE ERROR_INVALID_DATA +#else +# include +# include +# include +# if !defined(SYS_write) && defined(__NR_write) +# define SYS_write __NR_write +# endif +# include +# include +# include +#endif +#include + +#include +#ifndef SIZE_T_MAX +# define SIZE_T_MAX SIZE_MAX +#endif +#include +#include +#include +#include +#include +#include +#ifndef offsetof +# define offsetof(type, member) ((size_t)&(((type *)NULL)->member)) +#endif +#include +#include +#include +#include +#ifdef _MSC_VER +# include +typedef intptr_t ssize_t; +# define PATH_MAX 1024 +# define STDERR_FILENO 2 +# define __func__ __FUNCTION__ +/* Disable warnings about deprecated system functions */ +# pragma warning(disable: 4996) +#else +# include +#endif +#include + +#endif /* JEMALLOC_INTERNAL_H */ diff --git a/include/jemalloc/internal/jemalloc_internal_macros.h b/include/jemalloc/internal/jemalloc_internal_macros.h index bb81e990..4e239230 100644 --- a/include/jemalloc/internal/jemalloc_internal_macros.h +++ b/include/jemalloc/internal/jemalloc_internal_macros.h @@ -49,7 +49,3 @@ #ifndef JEMALLOC_HAS_RESTRICT # define restrict #endif - -#ifdef _MSC_VER -# define __func__ __FUNCTION__ -#endif diff --git a/test/include/test/jemalloc_test_defs.h.in b/test/include/test/jemalloc_test_defs.h.in index 18a9773d..aaaaec14 100644 --- a/test/include/test/jemalloc_test_defs.h.in +++ b/test/include/test/jemalloc_test_defs.h.in @@ -1,4 +1,5 @@ #include "jemalloc/internal/jemalloc_internal_defs.h" +#include "jemalloc/internal/jemalloc_internal_decls.h" /* For use by SFMT. */ #undef HAVE_SSE2 From 26246af977250a520194a1ced89cbc73ce218ca7 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Wed, 28 May 2014 13:14:46 +0900 Subject: [PATCH 0454/3378] Define INFINITY when it's not defined --- test/unit/math.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/test/unit/math.c b/test/unit/math.c index a1b288ea..ebec77a6 100644 --- a/test/unit/math.c +++ b/test/unit/math.c @@ -3,6 +3,12 @@ #define MAX_REL_ERR 1.0e-9 #define MAX_ABS_ERR 1.0e-9 +#include + +#ifndef INFINITY +#define INFINITY (DBL_MAX + DBL_MAX) +#endif + static bool double_eq_rel(double a, double b, double max_rel_err, double max_abs_err) { From 17767b5f2b195076a8b57f8489addabb1ee68009 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Wed, 28 May 2014 14:06:30 +0900 Subject: [PATCH 0455/3378] Correctly return exit code from thd_join on Windows --- test/src/thd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/src/thd.c b/test/src/thd.c index 233242a1..7e53625f 100644 --- a/test/src/thd.c +++ b/test/src/thd.c @@ -14,7 +14,8 @@ void thd_join(thd_t thd, void **ret) { - WaitForSingleObject(thd, INFINITE); + if (WaitForSingleObject(thd, INFINITE) == WAIT_OBJECT_0 && ret) + GetExitCodeThread(thd, (LPDWORD) ret); } #else From b54aef1d8cc16f7b3f295cf857842aa6d5844d46 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Wed, 28 May 2014 14:17:01 +0900 Subject: [PATCH 0456/3378] Fixup after 3a730df (Avoid pointer arithmetic on void*[...]) --- test/integration/rallocx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/integration/rallocx.c b/test/integration/rallocx.c index e78e02f3..b6980729 100644 --- a/test/integration/rallocx.c +++ b/test/integration/rallocx.c @@ -95,7 +95,7 @@ TEST_BEGIN(test_zero) "Expected zeroed memory"); } if (psz != qsz) { - memset((void *)(uintptr_t)q+psz, FILL_BYTE, + memset((void *)((uintptr_t)q+psz), FILL_BYTE, qsz-psz); psz = qsz; } @@ -161,7 +161,7 @@ TEST_BEGIN(test_lg_align_and_zero) assert_false(validate_fill(q, 0, 0, MAX_VALIDATE), "Expected zeroed memory"); assert_false(validate_fill( - (void *)(uintptr_t)q+sz-MAX_VALIDATE, + (void *)((uintptr_t)q+sz-MAX_VALIDATE), 0, 0, MAX_VALIDATE), "Expected zeroed memory"); } p = q; From ccf046659a7c83e4e1573a1df30415144b4efdb6 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Wed, 7 May 2014 01:17:05 -0400 Subject: [PATCH 0457/3378] STATIC_PAGE_SHIFT for cross-compiling jemalloc Sets `STATIC_PAGE_SHIFT` for cross-compiling jemalloc to 12. A shift of 12 represents a page size of 4k for practically all platforms. --- configure.ac | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 04bb2947..58f6289d 100644 --- a/configure.ac +++ b/configure.ac @@ -968,7 +968,8 @@ AC_CACHE_CHECK([STATIC_PAGE_SHIFT], return 0; ]])], [je_cv_static_page_shift=`cat conftest.out`], - [je_cv_static_page_shift=undefined])) + [je_cv_static_page_shift=undefined], + [je_cv_static_page_shift=12])) if test "x$je_cv_static_page_shift" != "xundefined"; then AC_DEFINE_UNQUOTED([STATIC_PAGE_SHIFT], [$je_cv_static_page_shift]) From 26f44df742893306a53a90328e15a62ed11b9e57 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 28 May 2014 11:08:17 -0700 Subject: [PATCH 0458/3378] Make sure initialization occurs prior to running tests. --- test/src/test.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/test/src/test.c b/test/src/test.c index 3acf8454..17728ca8 100644 --- a/test/src/test.c +++ b/test/src/test.c @@ -63,9 +63,22 @@ p_test_fini(void) test_status_t p_test(test_t *t, ...) { - test_status_t ret = test_status_pass; + test_status_t ret; va_list ap; + /* + * Make sure initialization occurs prior to running tests. Tests are + * special because they may use internal facilities prior to triggering + * initialization as a side effect of calling into the public API. This + * is a final safety that works even if jemalloc_constructor() doesn't + * run, as for MSVC builds. + */ + if (mallctl("version", NULL, NULL, NULL, 0) != 0) { + malloc_printf("Initialization error"); + return (test_status_fail); + } + + ret = test_status_pass; va_start(ap, t); for (; t != NULL; t = va_arg(ap, test_t *)) { t(); From 99118622ff5204feaabd2ee4109a7847ab388282 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 28 May 2014 11:23:01 -0700 Subject: [PATCH 0459/3378] Use nallocx() rather than mallctl() to trigger initialization. Use nallocx() rather than mallctl() to trigger initialization, because nallocx() has no side effects other than initialization, whereas mallctl() does a bunch of internal memory allocation. --- test/src/test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/src/test.c b/test/src/test.c index 17728ca8..0f8bd494 100644 --- a/test/src/test.c +++ b/test/src/test.c @@ -73,7 +73,7 @@ p_test(test_t *t, ...) * is a final safety that works even if jemalloc_constructor() doesn't * run, as for MSVC builds. */ - if (mallctl("version", NULL, NULL, NULL, 0) != 0) { + if (nallocx(1, 0) == 0) { malloc_printf("Initialization error"); return (test_status_fail); } From d04047cc29bbc9d1f87a9346d1601e3dd87b6ca0 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 28 May 2014 16:11:55 -0700 Subject: [PATCH 0460/3378] Add size class computation capability. Add size class computation capability, currently used only as validation of the size class lookup tables. Generalize the size class spacing used for bins, for eventual use throughout the full range of allocation sizes. --- configure.ac | 23 ++ include/jemalloc/internal/arena.h | 137 ++++++++- .../jemalloc/internal/jemalloc_internal.h.in | 4 +- .../internal/jemalloc_internal_defs.h.in | 5 + .../internal/jemalloc_internal_macros.h | 6 + include/jemalloc/internal/private_symbols.txt | 8 + include/jemalloc/internal/size_classes.sh | 261 ++++++++++++++---- include/jemalloc/internal/util.h | 47 ++++ src/arena.c | 62 +++-- 9 files changed, 462 insertions(+), 91 deletions(-) diff --git a/configure.ac b/configure.ac index 58f6289d..58522499 100644 --- a/configure.ac +++ b/configure.ac @@ -1200,6 +1200,29 @@ if test "x${je_cv_atomic9}" != "xyes" -a "x${je_cv_osatomic}" != "xyes" ; then JE_SYNC_COMPARE_AND_SWAP_CHECK(64, 8) fi +dnl ============================================================================ +dnl Check for __builtin_clz() and __builtin_clzl(). + +AC_CACHE_CHECK([for __builtin_clz], + [je_cv_builtin_clz], + [AC_LINK_IFELSE([AC_LANG_PROGRAM([], + [ + { + unsigned x = 0; + int y = __builtin_clz(x); + } + { + unsigned long x = 0; + int y = __builtin_clzl(x); + } + ])], + [je_cv_builtin_clz=yes], + [je_cv_builtin_clz=no])]) + +if test "x${je_cv_builtin_clz}" = "xyes" ; then + AC_DEFINE([JEMALLOC_HAVE_BUILTIN_CLZ], [ ]) +fi + dnl ============================================================================ dnl Check for spinlock(3) operations as provided on Darwin. diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 598a89b0..2dc9501d 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -463,8 +463,15 @@ void arena_postfork_child(arena_t *arena); #ifdef JEMALLOC_H_INLINES #ifndef JEMALLOC_ENABLE_INLINE +size_t small_size2bin_compute(size_t size); +size_t small_size2bin_lookup(size_t size); size_t small_size2bin(size_t size); +size_t small_bin2size_compute(size_t binind); +size_t small_bin2size_lookup(size_t binind); size_t small_bin2size(size_t binind); +size_t small_s2u_compute(size_t size); +size_t small_s2u_lookup(size_t size); +size_t small_s2u(size_t size); arena_chunk_map_t *arena_mapp_get(arena_chunk_t *chunk, size_t pageind); size_t *arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind); size_t arena_mapbitsp_read(size_t *mapbitsp); @@ -507,18 +514,144 @@ void arena_dalloc(arena_chunk_t *chunk, void *ptr, bool try_tcache); #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) # ifdef JEMALLOC_ARENA_INLINE_A +JEMALLOC_INLINE size_t +small_size2bin_compute(size_t size) +{ +#if (NTBINS != 0) + if (size <= (ZU(1) << LG_TINY_MAXCLASS)) { + size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1; + size_t lg_ceil = lg_floor(pow2_ceil(size)); + return (lg_ceil < lg_tmin ? 0 : lg_ceil - lg_tmin); + } else +#endif + { + size_t x = lg_floor((size<<1)-1); + size_t shift = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM) ? 0 : + x - (LG_SIZE_CLASS_GROUP + LG_QUANTUM); + size_t grp = shift << LG_SIZE_CLASS_GROUP; + + size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1) + ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1; + + size_t delta_inverse_mask = ZI(-1) << lg_delta; + size_t mod = ((((size-1) & delta_inverse_mask) >> lg_delta)) & + ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1); + + size_t bin = NTBINS + grp + mod; + return (bin); + } +} + +JEMALLOC_ALWAYS_INLINE size_t +small_size2bin_lookup(size_t size) +{ + + assert(size <= LOOKUP_MAXCLASS); + { + size_t ret = ((size_t)(small_size2bin_tab[(size-1) >> + LG_TINY_MIN])); + assert(ret == small_size2bin_compute(size)); + return (ret); + } +} + JEMALLOC_ALWAYS_INLINE size_t small_size2bin(size_t size) { - return ((size_t)(small_size2bin_tab[(size-1) >> LG_TINY_MIN])); + assert(size > 0); + if (size <= LOOKUP_MAXCLASS) + return (small_size2bin_lookup(size)); + else + return (small_size2bin_compute(size)); +} + +JEMALLOC_INLINE size_t +small_bin2size_compute(size_t binind) +{ +#if (NTBINS > 0) + if (binind < NTBINS) + return (ZU(1) << (LG_TINY_MAXCLASS - NTBINS + 1 + binind)); + else +#endif + { + size_t reduced_binind = binind - NTBINS; + size_t grp = reduced_binind >> LG_SIZE_CLASS_GROUP; + size_t mod = reduced_binind & ((ZU(1) << LG_SIZE_CLASS_GROUP) - + 1); + + size_t grp_size_mask = ~((!!grp)-1); + size_t grp_size = ((ZU(1) << (LG_QUANTUM + + (LG_SIZE_CLASS_GROUP-1))) << grp) & grp_size_mask; + + size_t shift = (grp == 0) ? 1 : grp; + size_t lg_delta = shift + (LG_QUANTUM-1); + size_t mod_size = (mod+1) << lg_delta; + + size_t usize = grp_size + mod_size; + return (usize); + } +} + +JEMALLOC_ALWAYS_INLINE size_t +small_bin2size_lookup(size_t binind) +{ + + assert(binind < NBINS); + { + size_t ret = ((size_t)(small_bin2size_tab[binind])); + assert(ret == small_bin2size_compute(binind)); + return (ret); + } } JEMALLOC_ALWAYS_INLINE size_t small_bin2size(size_t binind) { - return ((size_t)(small_bin2size_tab[binind])); + return (small_bin2size_lookup(binind)); +} + +JEMALLOC_ALWAYS_INLINE size_t +small_s2u_compute(size_t size) +{ +#if (NTBINS > 0) + if (size <= (ZU(1) << LG_TINY_MAXCLASS)) { + size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1; + size_t lg_ceil = lg_floor(pow2_ceil(size)); + return (lg_ceil < lg_tmin ? (ZU(1) << lg_tmin) : + (ZU(1) << lg_ceil)); + } else +#endif + { + size_t x = lg_floor((size<<1)-1); + size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1) + ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1; + size_t delta = ZU(1) << lg_delta; + size_t delta_mask = delta - 1; + size_t usize = (size + delta_mask) & ~delta_mask; + return (usize); + } +} + +JEMALLOC_ALWAYS_INLINE size_t +small_s2u_lookup(size_t size) +{ + size_t ret = (small_bin2size(small_size2bin(size))); + + assert(ret == small_s2u_compute(size)); + return (ret); +} + +JEMALLOC_ALWAYS_INLINE size_t +small_s2u(size_t size) +{ + + assert(size > 0); + if (size <= LOOKUP_MAXCLASS) + return (small_s2u_lookup(size)); + else + return (small_s2u_compute(size)); } # endif /* JEMALLOC_ARENA_INLINE_A */ diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index cf20f1f9..491345c9 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -475,7 +475,7 @@ s2u(size_t size) { if (size <= SMALL_MAXCLASS) - return (small_bin2size(small_size2bin(size))); + return (small_s2u(size)); if (size <= arena_maxclass) return (PAGE_CEILING(size)); return (CHUNK_CEILING(size)); @@ -518,7 +518,7 @@ sa2u(size_t size, size_t alignment) if (usize <= arena_maxclass && alignment <= PAGE) { if (usize <= SMALL_MAXCLASS) - return (small_bin2size(small_size2bin(usize))); + return (small_s2u(usize)); return (PAGE_CEILING(usize)); } else { size_t run_size; diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in index 09ddd4f3..a9a50f14 100644 --- a/include/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in @@ -47,6 +47,11 @@ */ #undef JE_FORCE_SYNC_COMPARE_AND_SWAP_8 +/* + * Defined if __builtin_clz() and __builtin_clzl() are available. + */ +#undef JEMALLOC_HAVE_BUILTIN_CLZ + /* * Defined if OSSpin*() functions are available, as provided by Darwin, and * documented in the spinlock(3) manual page. diff --git a/include/jemalloc/internal/jemalloc_internal_macros.h b/include/jemalloc/internal/jemalloc_internal_macros.h index 4e239230..38e28861 100644 --- a/include/jemalloc/internal/jemalloc_internal_macros.h +++ b/include/jemalloc/internal/jemalloc_internal_macros.h @@ -39,9 +39,15 @@ #endif #define ZU(z) ((size_t)z) +#define ZI(z) ((ssize_t)z) #define QU(q) ((uint64_t)q) #define QI(q) ((int64_t)q) +#define KZU(z) ZU(z##ULL) +#define KZI(z) ZI(z##ULL) +#define KQU(q) QU(q##ULL) +#define KQI(q) QI(q##ULL) + #ifndef __DECONST # define __DECONST(type, var) ((type)(uintptr_t)(const void *)(var)) #endif diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index f6c4fbcc..3401301c 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -234,6 +234,7 @@ ixalloc jemalloc_postfork_child jemalloc_postfork_parent jemalloc_prefork +lg_floor malloc_cprintf malloc_mutex_init malloc_mutex_lock @@ -348,8 +349,15 @@ s2u sa2u set_errno small_bin2size +small_bin2size_compute +small_bin2size_lookup small_bin2size_tab +small_s2u +small_s2u_compute +small_s2u_lookup small_size2bin +small_size2bin_compute +small_size2bin_lookup small_size2bin_tab stats_cactive stats_cactive_add diff --git a/include/jemalloc/internal/size_classes.sh b/include/jemalloc/internal/size_classes.sh index 960674aa..3edebf23 100755 --- a/include/jemalloc/internal/size_classes.sh +++ b/include/jemalloc/internal/size_classes.sh @@ -2,16 +2,23 @@ # The following limits are chosen such that they cover all supported platforms. -# Range of quanta. -lg_qmin=3 -lg_qmax=4 +# Pointer sizes. +lg_zarr="2 3" + +# Quanta. +lg_qarr="3 4" # The range of tiny size classes is [2^lg_tmin..2^(lg_q-1)]. lg_tmin=3 -# Range of page sizes. -lg_pmin=12 -lg_pmax=16 +# Maximum lookup size. +lg_kmax=12 + +# Page sizes. +lg_parr="12 13 16" + +# Size class group size (number of size classes for each size doubling). +lg_g=2 pow2() { e=$1 @@ -22,68 +29,206 @@ pow2() { done } +lg() { + x=$1 + lg_result=0 + while [ ${x} -gt 1 ] ; do + lg_result=$((${lg_result} + 1)) + x=$((${x} / 2)) + done +} + +size_class() { + index=$1 + lg_grp=$2 + lg_delta=$3 + ndelta=$4 + lg_p=$5 + lg_kmax=$6 + + lg ${ndelta}; lg_ndelta=${lg_result}; pow2 ${lg_ndelta} + if [ ${pow2_result} -lt ${ndelta} ] ; then + rem="yes" + else + rem="no" + fi + + lg_size=${lg_grp} + if [ $((${lg_delta} + ${lg_ndelta})) -eq ${lg_grp} ] ; then + lg_size=$((${lg_grp} + 1)) + else + lg_size=${lg_grp} + rem="yes" + fi + + if [ ${lg_size} -lt ${lg_p} ] ; then + bin="yes" + else + bin="no" + fi + if [ ${lg_size} -lt ${lg_kmax} \ + -o ${lg_size} -eq ${lg_kmax} -a ${rem} = "no" ] ; then + lg_delta_lookup=${lg_delta} + else + lg_delta_lookup="no" + fi + printf ' SC(%3d, %6d, %8d, %6d, %3s, %2s) \\\n' ${index} ${lg_grp} ${lg_delta} ${ndelta} ${bin} ${lg_delta_lookup} + # Defined upon return: + # - lg_delta_lookup (${lg_delta} or "no") + # - bin ("yes" or "no") +} + +sep_line() { + echo " \\" +} + +size_classes() { + lg_z=$1 + lg_q=$2 + lg_t=$3 + lg_p=$4 + lg_g=$5 + + pow2 $((${lg_z} + 3)); ptr_bits=${pow2_result} + pow2 ${lg_g}; g=${pow2_result} + + echo "#define SIZE_CLASSES \\" + echo " /* index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup */ \\" + + ntbins=0 + nlbins=0 + lg_tiny_maxclass='"NA"' + nbins=0 + + # Tiny size classes. + ndelta=0 + index=0 + lg_grp=${lg_t} + lg_delta=${lg_grp} + while [ ${lg_grp} -lt ${lg_q} ] ; do + size_class ${index} ${lg_grp} ${lg_delta} ${ndelta} ${lg_p} ${lg_kmax} + if [ ${lg_delta_lookup} != "no" ] ; then + nlbins=$((${index} + 1)) + fi + if [ ${bin} != "no" ] ; then + nbins=$((${index} + 1)) + fi + ntbins=$((${ntbins} + 1)) + lg_tiny_maxclass=${lg_grp} # Final written value is correct. + index=$((${index} + 1)) + lg_delta=${lg_grp} + lg_grp=$((${lg_grp} + 1)) + done + + # First non-tiny group. + if [ ${ntbins} -gt 0 ] ; then + sep_line + # The first size class has an unusual encoding, because the size has to be + # split between grp and delta*ndelta. + lg_grp=$((${lg_grp} - 1)) + ndelta=1 + size_class ${index} ${lg_grp} ${lg_delta} ${ndelta} ${lg_p} ${lg_kmax} + index=$((${index} + 1)) + lg_grp=$((${lg_grp} + 1)) + lg_delta=$((${lg_delta} + 1)) + fi + while [ ${ndelta} -lt ${g} ] ; do + size_class ${index} ${lg_grp} ${lg_delta} ${ndelta} ${lg_p} ${lg_kmax} + index=$((${index} + 1)) + ndelta=$((${ndelta} + 1)) + done + + # All remaining groups. + lg_grp=$((${lg_grp} + ${lg_g})) + while [ ${lg_grp} -lt ${ptr_bits} ] ; do + sep_line + ndelta=1 + if [ ${lg_grp} -eq $((${ptr_bits} - 1)) ] ; then + ndelta_limit=$((${g} - 1)) + else + ndelta_limit=${g} + fi + while [ ${ndelta} -le ${ndelta_limit} ] ; do + size_class ${index} ${lg_grp} ${lg_delta} ${ndelta} ${lg_p} ${lg_kmax} + if [ ${lg_delta_lookup} != "no" ] ; then + nlbins=$((${index} + 1)) + # Final written value is correct: + lookup_maxclass="((((size_t)1) << ${lg_grp}) + (((size_t)${ndelta}) << ${lg_delta}))" + fi + if [ ${bin} != "no" ] ; then + nbins=$((${index} + 1)) + # Final written value is correct: + small_maxclass="((((size_t)1) << ${lg_grp}) + (((size_t)${ndelta}) << ${lg_delta}))" + fi + index=$((${index} + 1)) + ndelta=$((${ndelta} + 1)) + done + lg_grp=$((${lg_grp} + 1)) + lg_delta=$((${lg_delta} + 1)) + done + echo + + # Defined upon completion: + # - ntbins + # - nlbins + # - nbins + # - lg_tiny_maxclass + # - lookup_maxclass + # - small_maxclass +} + cat <> 1); + x |= (x >> 2); + x |= (x >> 4); + x |= (x >> 8); + x |= (x >> 16); +#if (LG_SIZEOF_PTR == 3 && LG_SIZEOF_PTR == LG_SIZEOF_LONG) + x |= (x >> 32); + return (65 - ffsl(~x)); +#elif (LG_SIZEOF_PTR == 2) + return (33 - ffs(~x)); +#else +# error "Unsupported type sizes for lg_floor()" +#endif +} +#endif + /* Sets error code */ JEMALLOC_INLINE void set_errno(int errnum) diff --git a/src/arena.c b/src/arena.c index f5d7d062..c392419e 100644 --- a/src/arena.c +++ b/src/arena.c @@ -9,40 +9,39 @@ arena_bin_info_t arena_bin_info[NBINS]; JEMALLOC_ALIGNED(CACHELINE) const uint32_t small_bin2size_tab[NBINS] = { -#define SIZE_CLASS(bin, delta, size) \ +#define B2S_bin_yes(size) \ size, +#define B2S_bin_no(size) +#define SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup) \ + B2S_bin_##bin((ZU(1)<reg_size = size; \ prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size);\ bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs); +#define BIN_INFO_INIT_bin_no(index, size) +#define SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup) \ + BIN_INFO_INIT_bin_##bin(index, (ZU(1)< Date: Wed, 28 May 2014 21:14:16 -0700 Subject: [PATCH 0461/3378] Use KQU() rather than QU() where applicable. Fix KZI() and KQI() to append LL rather than ULL. --- include/jemalloc/internal/hash.h | 8 +- .../internal/jemalloc_internal_macros.h | 4 +- test/src/SFMT.c | 2 +- test/unit/SFMT.c | 2000 ++++++++--------- test/unit/util.c | 78 +- 5 files changed, 1046 insertions(+), 1046 deletions(-) diff --git a/include/jemalloc/internal/hash.h b/include/jemalloc/internal/hash.h index f2b3a16c..a43bbbec 100644 --- a/include/jemalloc/internal/hash.h +++ b/include/jemalloc/internal/hash.h @@ -76,9 +76,9 @@ hash_fmix_64(uint64_t k) { k ^= k >> 33; - k *= QU(0xff51afd7ed558ccdULL); + k *= KQU(0xff51afd7ed558ccd); k ^= k >> 33; - k *= QU(0xc4ceb9fe1a85ec53ULL); + k *= KQU(0xc4ceb9fe1a85ec53); k ^= k >> 33; return (k); @@ -247,8 +247,8 @@ hash_x64_128(const void *key, const int len, const uint32_t seed, uint64_t h1 = seed; uint64_t h2 = seed; - const uint64_t c1 = QU(0x87c37b91114253d5ULL); - const uint64_t c2 = QU(0x4cf5ad432745937fULL); + const uint64_t c1 = KQU(0x87c37b91114253d5); + const uint64_t c2 = KQU(0x4cf5ad432745937f); /* body */ { diff --git a/include/jemalloc/internal/jemalloc_internal_macros.h b/include/jemalloc/internal/jemalloc_internal_macros.h index 38e28861..a08ba772 100644 --- a/include/jemalloc/internal/jemalloc_internal_macros.h +++ b/include/jemalloc/internal/jemalloc_internal_macros.h @@ -44,9 +44,9 @@ #define QI(q) ((int64_t)q) #define KZU(z) ZU(z##ULL) -#define KZI(z) ZI(z##ULL) +#define KZI(z) ZI(z##LL) #define KQU(q) QU(q##ULL) -#define KQI(q) QI(q##ULL) +#define KQI(q) QI(q##LL) #ifndef __DECONST # define __DECONST(type, var) ((type)(uintptr_t)(const void *)(var)) diff --git a/test/src/SFMT.c b/test/src/SFMT.c index d2cc9d1c..22a5ac55 100644 --- a/test/src/SFMT.c +++ b/test/src/SFMT.c @@ -511,7 +511,7 @@ uint64_t gen_rand64(sfmt_t *ctx) { uint64_t gen_rand64_range(sfmt_t *ctx, uint64_t limit) { uint64_t ret, above; - above = 0xffffffffffffffffULL - (0xffffffffffffffffULL % limit); + above = KQU(0xffffffffffffffff) - (KQU(0xffffffffffffffff) % limit); while (1) { ret = gen_rand64(ctx); if (ret < above) { diff --git a/test/unit/SFMT.c b/test/unit/SFMT.c index 0ad9c233..88b31f6e 100644 --- a/test/unit/SFMT.c +++ b/test/unit/SFMT.c @@ -445,1008 +445,1008 @@ static const uint32_t init_by_array_32_expected[] = { 2750138839U, 3518055702U, 733072558U, 4169325400U, 788493625U }; static const uint64_t init_gen_rand_64_expected[] = { - QU(16924766246869039260ULL), QU( 8201438687333352714ULL), - QU( 2265290287015001750ULL), QU(18397264611805473832ULL), - QU( 3375255223302384358ULL), QU( 6345559975416828796ULL), - QU(18229739242790328073ULL), QU( 7596792742098800905ULL), - QU( 255338647169685981ULL), QU( 2052747240048610300ULL), - QU(18328151576097299343ULL), QU(12472905421133796567ULL), - QU(11315245349717600863ULL), QU(16594110197775871209ULL), - QU(15708751964632456450ULL), QU(10452031272054632535ULL), - QU(11097646720811454386ULL), QU( 4556090668445745441ULL), - QU(17116187693090663106ULL), QU(14931526836144510645ULL), - QU( 9190752218020552591ULL), QU( 9625800285771901401ULL), - QU(13995141077659972832ULL), QU( 5194209094927829625ULL), - QU( 4156788379151063303ULL), QU( 8523452593770139494ULL), - QU(14082382103049296727ULL), QU( 2462601863986088483ULL), - QU( 3030583461592840678ULL), QU( 5221622077872827681ULL), - QU( 3084210671228981236ULL), QU(13956758381389953823ULL), - QU(13503889856213423831ULL), QU(15696904024189836170ULL), - QU( 4612584152877036206ULL), QU( 6231135538447867881ULL), - QU(10172457294158869468ULL), QU( 6452258628466708150ULL), - QU(14044432824917330221ULL), QU( 370168364480044279ULL), - QU(10102144686427193359ULL), QU( 667870489994776076ULL), - QU( 2732271956925885858ULL), QU(18027788905977284151ULL), - QU(15009842788582923859ULL), QU( 7136357960180199542ULL), - QU(15901736243475578127ULL), QU(16951293785352615701ULL), - QU(10551492125243691632ULL), QU(17668869969146434804ULL), - QU(13646002971174390445ULL), QU( 9804471050759613248ULL), - QU( 5511670439655935493ULL), QU(18103342091070400926ULL), - QU(17224512747665137533ULL), QU(15534627482992618168ULL), - QU( 1423813266186582647ULL), QU(15821176807932930024ULL), - QU( 30323369733607156ULL), QU(11599382494723479403ULL), - QU( 653856076586810062ULL), QU( 3176437395144899659ULL), - QU(14028076268147963917ULL), QU(16156398271809666195ULL), - QU( 3166955484848201676ULL), QU( 5746805620136919390ULL), - QU(17297845208891256593ULL), QU(11691653183226428483ULL), - QU(17900026146506981577ULL), QU(15387382115755971042ULL), - QU(16923567681040845943ULL), QU( 8039057517199388606ULL), - QU(11748409241468629263ULL), QU( 794358245539076095ULL), - QU(13438501964693401242ULL), QU(14036803236515618962ULL), - QU( 5252311215205424721ULL), QU(17806589612915509081ULL), - QU( 6802767092397596006ULL), QU(14212120431184557140ULL), - QU( 1072951366761385712ULL), QU(13098491780722836296ULL), - QU( 9466676828710797353ULL), QU(12673056849042830081ULL), - QU(12763726623645357580ULL), QU(16468961652999309493ULL), - QU(15305979875636438926ULL), QU(17444713151223449734ULL), - QU( 5692214267627883674ULL), QU(13049589139196151505ULL), - QU( 880115207831670745ULL), QU( 1776529075789695498ULL), - QU(16695225897801466485ULL), QU(10666901778795346845ULL), - QU( 6164389346722833869ULL), QU( 2863817793264300475ULL), - QU( 9464049921886304754ULL), QU( 3993566636740015468ULL), - QU( 9983749692528514136ULL), QU(16375286075057755211ULL), - QU(16042643417005440820ULL), QU(11445419662923489877ULL), - QU( 7999038846885158836ULL), QU( 6721913661721511535ULL), - QU( 5363052654139357320ULL), QU( 1817788761173584205ULL), - QU(13290974386445856444ULL), QU( 4650350818937984680ULL), - QU( 8219183528102484836ULL), QU( 1569862923500819899ULL), - QU( 4189359732136641860ULL), QU(14202822961683148583ULL), - QU( 4457498315309429058ULL), QU(13089067387019074834ULL), - QU(11075517153328927293ULL), QU(10277016248336668389ULL), - QU( 7070509725324401122ULL), QU(17808892017780289380ULL), - QU(13143367339909287349ULL), QU( 1377743745360085151ULL), - QU( 5749341807421286485ULL), QU(14832814616770931325ULL), - QU( 7688820635324359492ULL), QU(10960474011539770045ULL), - QU( 81970066653179790ULL), QU(12619476072607878022ULL), - QU( 4419566616271201744ULL), QU(15147917311750568503ULL), - QU( 5549739182852706345ULL), QU( 7308198397975204770ULL), - QU(13580425496671289278ULL), QU(17070764785210130301ULL), - QU( 8202832846285604405ULL), QU( 6873046287640887249ULL), - QU( 6927424434308206114ULL), QU( 6139014645937224874ULL), - QU(10290373645978487639ULL), QU(15904261291701523804ULL), - QU( 9628743442057826883ULL), QU(18383429096255546714ULL), - QU( 4977413265753686967ULL), QU( 7714317492425012869ULL), - QU( 9025232586309926193ULL), QU(14627338359776709107ULL), - QU(14759849896467790763ULL), QU(10931129435864423252ULL), - QU( 4588456988775014359ULL), QU(10699388531797056724ULL), - QU( 468652268869238792ULL), QU( 5755943035328078086ULL), - QU( 2102437379988580216ULL), QU( 9986312786506674028ULL), - QU( 2654207180040945604ULL), QU( 8726634790559960062ULL), - QU( 100497234871808137ULL), QU( 2800137176951425819ULL), - QU( 6076627612918553487ULL), QU( 5780186919186152796ULL), - QU( 8179183595769929098ULL), QU( 6009426283716221169ULL), - QU( 2796662551397449358ULL), QU( 1756961367041986764ULL), - QU( 6972897917355606205ULL), QU(14524774345368968243ULL), - QU( 2773529684745706940ULL), QU( 4853632376213075959ULL), - QU( 4198177923731358102ULL), QU( 8271224913084139776ULL), - QU( 2741753121611092226ULL), QU(16782366145996731181ULL), - QU(15426125238972640790ULL), QU(13595497100671260342ULL), - QU( 3173531022836259898ULL), QU( 6573264560319511662ULL), - QU(18041111951511157441ULL), QU( 2351433581833135952ULL), - QU( 3113255578908173487ULL), QU( 1739371330877858784ULL), - QU(16046126562789165480ULL), QU( 8072101652214192925ULL), - QU(15267091584090664910ULL), QU( 9309579200403648940ULL), - QU( 5218892439752408722ULL), QU(14492477246004337115ULL), - QU(17431037586679770619ULL), QU( 7385248135963250480ULL), - QU( 9580144956565560660ULL), QU( 4919546228040008720ULL), - QU(15261542469145035584ULL), QU(18233297270822253102ULL), - QU( 5453248417992302857ULL), QU( 9309519155931460285ULL), - QU(10342813012345291756ULL), QU(15676085186784762381ULL), - QU(15912092950691300645ULL), QU( 9371053121499003195ULL), - QU( 9897186478226866746ULL), QU(14061858287188196327ULL), - QU( 122575971620788119ULL), QU(12146750969116317754ULL), - QU( 4438317272813245201ULL), QU( 8332576791009527119ULL), - QU(13907785691786542057ULL), QU(10374194887283287467ULL), - QU( 2098798755649059566ULL), QU( 3416235197748288894ULL), - QU( 8688269957320773484ULL), QU( 7503964602397371571ULL), - QU(16724977015147478236ULL), QU( 9461512855439858184ULL), - QU(13259049744534534727ULL), QU( 3583094952542899294ULL), - QU( 8764245731305528292ULL), QU(13240823595462088985ULL), - QU(13716141617617910448ULL), QU(18114969519935960955ULL), - QU( 2297553615798302206ULL), QU( 4585521442944663362ULL), - QU(17776858680630198686ULL), QU( 4685873229192163363ULL), - QU( 152558080671135627ULL), QU(15424900540842670088ULL), - QU(13229630297130024108ULL), QU(17530268788245718717ULL), - QU(16675633913065714144ULL), QU( 3158912717897568068ULL), - QU(15399132185380087288ULL), QU( 7401418744515677872ULL), - QU(13135412922344398535ULL), QU( 6385314346100509511ULL), - QU(13962867001134161139ULL), QU(10272780155442671999ULL), - QU(12894856086597769142ULL), QU(13340877795287554994ULL), - QU(12913630602094607396ULL), QU(12543167911119793857ULL), - QU(17343570372251873096ULL), QU(10959487764494150545ULL), - QU( 6966737953093821128ULL), QU(13780699135496988601ULL), - QU( 4405070719380142046ULL), QU(14923788365607284982ULL), - QU( 2869487678905148380ULL), QU( 6416272754197188403ULL), - QU(15017380475943612591ULL), QU( 1995636220918429487ULL), - QU( 3402016804620122716ULL), QU(15800188663407057080ULL), - QU(11362369990390932882ULL), QU(15262183501637986147ULL), - QU(10239175385387371494ULL), QU( 9352042420365748334ULL), - QU( 1682457034285119875ULL), QU( 1724710651376289644ULL), - QU( 2038157098893817966ULL), QU( 9897825558324608773ULL), - QU( 1477666236519164736ULL), QU(16835397314511233640ULL), - QU(10370866327005346508ULL), QU(10157504370660621982ULL), - QU(12113904045335882069ULL), QU(13326444439742783008ULL), - QU(11302769043000765804ULL), QU(13594979923955228484ULL), - QU(11779351762613475968ULL), QU( 3786101619539298383ULL), - QU( 8021122969180846063ULL), QU(15745904401162500495ULL), - QU(10762168465993897267ULL), QU(13552058957896319026ULL), - QU(11200228655252462013ULL), QU( 5035370357337441226ULL), - QU( 7593918984545500013ULL), QU( 5418554918361528700ULL), - QU( 4858270799405446371ULL), QU( 9974659566876282544ULL), - QU(18227595922273957859ULL), QU( 2772778443635656220ULL), - QU(14285143053182085385ULL), QU( 9939700992429600469ULL), - QU(12756185904545598068ULL), QU( 2020783375367345262ULL), - QU( 57026775058331227ULL), QU( 950827867930065454ULL), - QU( 6602279670145371217ULL), QU( 2291171535443566929ULL), - QU( 5832380724425010313ULL), QU( 1220343904715982285ULL), - QU(17045542598598037633ULL), QU(15460481779702820971ULL), - QU(13948388779949365130ULL), QU(13975040175430829518ULL), - QU(17477538238425541763ULL), QU(11104663041851745725ULL), - QU(15860992957141157587ULL), QU(14529434633012950138ULL), - QU( 2504838019075394203ULL), QU( 7512113882611121886ULL), - QU( 4859973559980886617ULL), QU( 1258601555703250219ULL), - QU(15594548157514316394ULL), QU( 4516730171963773048ULL), - QU(11380103193905031983ULL), QU( 6809282239982353344ULL), - QU(18045256930420065002ULL), QU( 2453702683108791859ULL), - QU( 977214582986981460ULL), QU( 2006410402232713466ULL), - QU( 6192236267216378358ULL), QU( 3429468402195675253ULL), - QU(18146933153017348921ULL), QU(17369978576367231139ULL), - QU( 1246940717230386603ULL), QU(11335758870083327110ULL), - QU(14166488801730353682ULL), QU( 9008573127269635732ULL), - QU(10776025389820643815ULL), QU(15087605441903942962ULL), - QU( 1359542462712147922ULL), QU(13898874411226454206ULL), - QU(17911176066536804411ULL), QU( 9435590428600085274ULL), - QU( 294488509967864007ULL), QU( 8890111397567922046ULL), - QU( 7987823476034328778ULL), QU(13263827582440967651ULL), - QU( 7503774813106751573ULL), QU(14974747296185646837ULL), - QU( 8504765037032103375ULL), QU(17340303357444536213ULL), - QU( 7704610912964485743ULL), QU( 8107533670327205061ULL), - QU( 9062969835083315985ULL), QU(16968963142126734184ULL), - QU(12958041214190810180ULL), QU( 2720170147759570200ULL), - QU( 2986358963942189566ULL), QU(14884226322219356580ULL), - QU( 286224325144368520ULL), QU(11313800433154279797ULL), - QU(18366849528439673248ULL), QU(17899725929482368789ULL), - QU( 3730004284609106799ULL), QU( 1654474302052767205ULL), - QU( 5006698007047077032ULL), QU( 8196893913601182838ULL), - QU(15214541774425211640ULL), QU(17391346045606626073ULL), - QU( 8369003584076969089ULL), QU( 3939046733368550293ULL), - QU(10178639720308707785ULL), QU( 2180248669304388697ULL), - QU( 62894391300126322ULL), QU( 9205708961736223191ULL), - QU( 6837431058165360438ULL), QU( 3150743890848308214ULL), - QU(17849330658111464583ULL), QU(12214815643135450865ULL), - QU(13410713840519603402ULL), QU( 3200778126692046802ULL), - QU(13354780043041779313ULL), QU( 800850022756886036ULL), - QU(15660052933953067433ULL), QU( 6572823544154375676ULL), - QU(11030281857015819266ULL), QU(12682241941471433835ULL), - QU(11654136407300274693ULL), QU( 4517795492388641109ULL), - QU( 9757017371504524244ULL), QU(17833043400781889277ULL), - QU(12685085201747792227ULL), QU(10408057728835019573ULL), - QU( 98370418513455221ULL), QU( 6732663555696848598ULL), - QU(13248530959948529780ULL), QU( 3530441401230622826ULL), - QU(18188251992895660615ULL), QU( 1847918354186383756ULL), - QU( 1127392190402660921ULL), QU(11293734643143819463ULL), - QU( 3015506344578682982ULL), QU(13852645444071153329ULL), - QU( 2121359659091349142ULL), QU( 1294604376116677694ULL), - QU( 5616576231286352318ULL), QU( 7112502442954235625ULL), - QU(11676228199551561689ULL), QU(12925182803007305359ULL), - QU( 7852375518160493082ULL), QU( 1136513130539296154ULL), - QU( 5636923900916593195ULL), QU( 3221077517612607747ULL), - QU(17784790465798152513ULL), QU( 3554210049056995938ULL), - QU(17476839685878225874ULL), QU( 3206836372585575732ULL), - QU( 2765333945644823430ULL), QU(10080070903718799528ULL), - QU( 5412370818878286353ULL), QU( 9689685887726257728ULL), - QU( 8236117509123533998ULL), QU( 1951139137165040214ULL), - QU( 4492205209227980349ULL), QU(16541291230861602967ULL), - QU( 1424371548301437940ULL), QU( 9117562079669206794ULL), - QU(14374681563251691625ULL), QU(13873164030199921303ULL), - QU( 6680317946770936731ULL), QU(15586334026918276214ULL), - QU(10896213950976109802ULL), QU( 9506261949596413689ULL), - QU( 9903949574308040616ULL), QU( 6038397344557204470ULL), - QU( 174601465422373648ULL), QU(15946141191338238030ULL), - QU(17142225620992044937ULL), QU( 7552030283784477064ULL), - QU( 2947372384532947997ULL), QU( 510797021688197711ULL), - QU( 4962499439249363461ULL), QU( 23770320158385357ULL), - QU( 959774499105138124ULL), QU( 1468396011518788276ULL), - QU( 2015698006852312308ULL), QU( 4149400718489980136ULL), - QU( 5992916099522371188ULL), QU(10819182935265531076ULL), - QU(16189787999192351131ULL), QU( 342833961790261950ULL), - QU(12470830319550495336ULL), QU(18128495041912812501ULL), - QU( 1193600899723524337ULL), QU( 9056793666590079770ULL), - QU( 2154021227041669041ULL), QU( 4963570213951235735ULL), - QU( 4865075960209211409ULL), QU( 2097724599039942963ULL), - QU( 2024080278583179845ULL), QU(11527054549196576736ULL), - QU(10650256084182390252ULL), QU( 4808408648695766755ULL), - QU( 1642839215013788844ULL), QU(10607187948250398390ULL), - QU( 7076868166085913508ULL), QU( 730522571106887032ULL), - QU(12500579240208524895ULL), QU( 4484390097311355324ULL), - QU(15145801330700623870ULL), QU( 8055827661392944028ULL), - QU( 5865092976832712268ULL), QU(15159212508053625143ULL), - QU( 3560964582876483341ULL), QU( 4070052741344438280ULL), - QU( 6032585709886855634ULL), QU(15643262320904604873ULL), - QU( 2565119772293371111ULL), QU( 318314293065348260ULL), - QU(15047458749141511872ULL), QU( 7772788389811528730ULL), - QU( 7081187494343801976ULL), QU( 6465136009467253947ULL), - QU(10425940692543362069ULL), QU( 554608190318339115ULL), - QU(14796699860302125214ULL), QU( 1638153134431111443ULL), - QU(10336967447052276248ULL), QU( 8412308070396592958ULL), - QU( 4004557277152051226ULL), QU( 8143598997278774834ULL), - QU(16413323996508783221ULL), QU(13139418758033994949ULL), - QU( 9772709138335006667ULL), QU( 2818167159287157659ULL), - QU(17091740573832523669ULL), QU(14629199013130751608ULL), - QU(18268322711500338185ULL), QU( 8290963415675493063ULL), - QU( 8830864907452542588ULL), QU( 1614839084637494849ULL), - QU(14855358500870422231ULL), QU( 3472996748392519937ULL), - QU(15317151166268877716ULL), QU( 5825895018698400362ULL), - QU(16730208429367544129ULL), QU(10481156578141202800ULL), - QU( 4746166512382823750ULL), QU(12720876014472464998ULL), - QU( 8825177124486735972ULL), QU(13733447296837467838ULL), - QU( 6412293741681359625ULL), QU( 8313213138756135033ULL), - QU(11421481194803712517ULL), QU( 7997007691544174032ULL), - QU( 6812963847917605930ULL), QU( 9683091901227558641ULL), - QU(14703594165860324713ULL), QU( 1775476144519618309ULL), - QU( 2724283288516469519ULL), QU( 717642555185856868ULL), - QU( 8736402192215092346ULL), QU(11878800336431381021ULL), - QU( 4348816066017061293ULL), QU( 6115112756583631307ULL), - QU( 9176597239667142976ULL), QU(12615622714894259204ULL), - QU(10283406711301385987ULL), QU( 5111762509485379420ULL), - QU( 3118290051198688449ULL), QU( 7345123071632232145ULL), - QU( 9176423451688682359ULL), QU( 4843865456157868971ULL), - QU(12008036363752566088ULL), QU(12058837181919397720ULL), - QU( 2145073958457347366ULL), QU( 1526504881672818067ULL), - QU( 3488830105567134848ULL), QU(13208362960674805143ULL), - QU( 4077549672899572192ULL), QU( 7770995684693818365ULL), - QU( 1398532341546313593ULL), QU(12711859908703927840ULL), - QU( 1417561172594446813ULL), QU(17045191024194170604ULL), - QU( 4101933177604931713ULL), QU(14708428834203480320ULL), - QU(17447509264469407724ULL), QU(14314821973983434255ULL), - QU(17990472271061617265ULL), QU( 5087756685841673942ULL), - QU(12797820586893859939ULL), QU( 1778128952671092879ULL), - QU( 3535918530508665898ULL), QU( 9035729701042481301ULL), - QU(14808661568277079962ULL), QU(14587345077537747914ULL), - QU(11920080002323122708ULL), QU( 6426515805197278753ULL), - QU( 3295612216725984831ULL), QU(11040722532100876120ULL), - QU(12305952936387598754ULL), QU(16097391899742004253ULL), - QU( 4908537335606182208ULL), QU(12446674552196795504ULL), - QU(16010497855816895177ULL), QU( 9194378874788615551ULL), - QU( 3382957529567613384ULL), QU( 5154647600754974077ULL), - QU( 9801822865328396141ULL), QU( 9023662173919288143ULL), - QU(17623115353825147868ULL), QU( 8238115767443015816ULL), - QU(15811444159859002560ULL), QU( 9085612528904059661ULL), - QU( 6888601089398614254ULL), QU( 258252992894160189ULL), - QU( 6704363880792428622ULL), QU( 6114966032147235763ULL), - QU(11075393882690261875ULL), QU( 8797664238933620407ULL), - QU( 5901892006476726920ULL), QU( 5309780159285518958ULL), - QU(14940808387240817367ULL), QU(14642032021449656698ULL), - QU( 9808256672068504139ULL), QU( 3670135111380607658ULL), - QU(11211211097845960152ULL), QU( 1474304506716695808ULL), - QU(15843166204506876239ULL), QU( 7661051252471780561ULL), - QU(10170905502249418476ULL), QU( 7801416045582028589ULL), - QU( 2763981484737053050ULL), QU( 9491377905499253054ULL), - QU(16201395896336915095ULL), QU( 9256513756442782198ULL), - QU( 5411283157972456034ULL), QU( 5059433122288321676ULL), - QU( 4327408006721123357ULL), QU( 9278544078834433377ULL), - QU( 7601527110882281612ULL), QU(11848295896975505251ULL), - QU(12096998801094735560ULL), QU(14773480339823506413ULL), - QU(15586227433895802149ULL), QU(12786541257830242872ULL), - QU( 6904692985140503067ULL), QU( 5309011515263103959ULL), - QU(12105257191179371066ULL), QU(14654380212442225037ULL), - QU( 2556774974190695009ULL), QU( 4461297399927600261ULL), - QU(14888225660915118646ULL), QU(14915459341148291824ULL), - QU( 2738802166252327631ULL), QU( 6047155789239131512ULL), - QU(12920545353217010338ULL), QU(10697617257007840205ULL), - QU( 2751585253158203504ULL), QU(13252729159780047496ULL), - QU(14700326134672815469ULL), QU(14082527904374600529ULL), - QU(16852962273496542070ULL), QU(17446675504235853907ULL), - QU(15019600398527572311ULL), QU(12312781346344081551ULL), - QU(14524667935039810450ULL), QU( 5634005663377195738ULL), - QU(11375574739525000569ULL), QU( 2423665396433260040ULL), - QU( 5222836914796015410ULL), QU( 4397666386492647387ULL), - QU( 4619294441691707638ULL), QU( 665088602354770716ULL), - QU(13246495665281593610ULL), QU( 6564144270549729409ULL), - QU(10223216188145661688ULL), QU( 3961556907299230585ULL), - QU(11543262515492439914ULL), QU(16118031437285993790ULL), - QU( 7143417964520166465ULL), QU(13295053515909486772ULL), - QU( 40434666004899675ULL), QU(17127804194038347164ULL), - QU( 8599165966560586269ULL), QU( 8214016749011284903ULL), - QU(13725130352140465239ULL), QU( 5467254474431726291ULL), - QU( 7748584297438219877ULL), QU(16933551114829772472ULL), - QU( 2169618439506799400ULL), QU( 2169787627665113463ULL), - QU(17314493571267943764ULL), QU(18053575102911354912ULL), - QU(11928303275378476973ULL), QU(11593850925061715550ULL), - QU(17782269923473589362ULL), QU( 3280235307704747039ULL), - QU( 6145343578598685149ULL), QU(17080117031114086090ULL), - QU(18066839902983594755ULL), QU( 6517508430331020706ULL), - QU( 8092908893950411541ULL), QU(12558378233386153732ULL), - QU( 4476532167973132976ULL), QU(16081642430367025016ULL), - QU( 4233154094369139361ULL), QU( 8693630486693161027ULL), - QU(11244959343027742285ULL), QU(12273503967768513508ULL), - QU(14108978636385284876ULL), QU( 7242414665378826984ULL), - QU( 6561316938846562432ULL), QU( 8601038474994665795ULL), - QU(17532942353612365904ULL), QU(17940076637020912186ULL), - QU( 7340260368823171304ULL), QU( 7061807613916067905ULL), - QU(10561734935039519326ULL), QU(17990796503724650862ULL), - QU( 6208732943911827159ULL), QU( 359077562804090617ULL), - QU(14177751537784403113ULL), QU(10659599444915362902ULL), - QU(15081727220615085833ULL), QU(13417573895659757486ULL), - QU(15513842342017811524ULL), QU(11814141516204288231ULL), - QU( 1827312513875101814ULL), QU( 2804611699894603103ULL), - QU(17116500469975602763ULL), QU(12270191815211952087ULL), - QU(12256358467786024988ULL), QU(18435021722453971267ULL), - QU( 671330264390865618ULL), QU( 476504300460286050ULL), - QU(16465470901027093441ULL), QU( 4047724406247136402ULL), - QU( 1322305451411883346ULL), QU( 1388308688834322280ULL), - QU( 7303989085269758176ULL), QU( 9323792664765233642ULL), - QU( 4542762575316368936ULL), QU(17342696132794337618ULL), - QU( 4588025054768498379ULL), QU(13415475057390330804ULL), - QU(17880279491733405570ULL), QU(10610553400618620353ULL), - QU( 3180842072658960139ULL), QU(13002966655454270120ULL), - QU( 1665301181064982826ULL), QU( 7083673946791258979ULL), - QU( 190522247122496820ULL), QU(17388280237250677740ULL), - QU( 8430770379923642945ULL), QU(12987180971921668584ULL), - QU( 2311086108365390642ULL), QU( 2870984383579822345ULL), - QU(14014682609164653318ULL), QU(14467187293062251484ULL), - QU( 192186361147413298ULL), QU(15171951713531796524ULL), - QU( 9900305495015948728ULL), QU(17958004775615466344ULL), - QU(14346380954498606514ULL), QU(18040047357617407096ULL), - QU( 5035237584833424532ULL), QU(15089555460613972287ULL), - QU( 4131411873749729831ULL), QU( 1329013581168250330ULL), - QU(10095353333051193949ULL), QU(10749518561022462716ULL), - QU( 9050611429810755847ULL), QU(15022028840236655649ULL), - QU( 8775554279239748298ULL), QU(13105754025489230502ULL), - QU(15471300118574167585ULL), QU( 89864764002355628ULL), - QU( 8776416323420466637ULL), QU( 5280258630612040891ULL), - QU( 2719174488591862912ULL), QU( 7599309137399661994ULL), - QU(15012887256778039979ULL), QU(14062981725630928925ULL), - QU(12038536286991689603ULL), QU( 7089756544681775245ULL), - QU(10376661532744718039ULL), QU( 1265198725901533130ULL), - QU(13807996727081142408ULL), QU( 2935019626765036403ULL), - QU( 7651672460680700141ULL), QU( 3644093016200370795ULL), - QU( 2840982578090080674ULL), QU(17956262740157449201ULL), - QU(18267979450492880548ULL), QU(11799503659796848070ULL), - QU( 9942537025669672388ULL), QU(11886606816406990297ULL), - QU( 5488594946437447576ULL), QU( 7226714353282744302ULL), - QU( 3784851653123877043ULL), QU( 878018453244803041ULL), - QU(12110022586268616085ULL), QU( 734072179404675123ULL), - QU(11869573627998248542ULL), QU( 469150421297783998ULL), - QU( 260151124912803804ULL), QU(11639179410120968649ULL), - QU( 9318165193840846253ULL), QU(12795671722734758075ULL), - QU(15318410297267253933ULL), QU( 691524703570062620ULL), - QU( 5837129010576994601ULL), QU(15045963859726941052ULL), - QU( 5850056944932238169ULL), QU(12017434144750943807ULL), - QU( 7447139064928956574ULL), QU( 3101711812658245019ULL), - QU(16052940704474982954ULL), QU(18195745945986994042ULL), - QU( 8932252132785575659ULL), QU(13390817488106794834ULL), - QU(11582771836502517453ULL), QU( 4964411326683611686ULL), - QU( 2195093981702694011ULL), QU(14145229538389675669ULL), - QU(16459605532062271798ULL), QU( 866316924816482864ULL), - QU( 4593041209937286377ULL), QU( 8415491391910972138ULL), - QU( 4171236715600528969ULL), QU(16637569303336782889ULL), - QU( 2002011073439212680ULL), QU(17695124661097601411ULL), - QU( 4627687053598611702ULL), QU( 7895831936020190403ULL), - QU( 8455951300917267802ULL), QU( 2923861649108534854ULL), - QU( 8344557563927786255ULL), QU( 6408671940373352556ULL), - QU(12210227354536675772ULL), QU(14294804157294222295ULL), - QU(10103022425071085127ULL), QU(10092959489504123771ULL), - QU( 6554774405376736268ULL), QU(12629917718410641774ULL), - QU( 6260933257596067126ULL), QU( 2460827021439369673ULL), - QU( 2541962996717103668ULL), QU( 597377203127351475ULL), - QU( 5316984203117315309ULL), QU( 4811211393563241961ULL), - QU(13119698597255811641ULL), QU( 8048691512862388981ULL), - QU(10216818971194073842ULL), QU( 4612229970165291764ULL), - QU(10000980798419974770ULL), QU( 6877640812402540687ULL), - QU( 1488727563290436992ULL), QU( 2227774069895697318ULL), - QU(11237754507523316593ULL), QU(13478948605382290972ULL), - QU( 1963583846976858124ULL), QU( 5512309205269276457ULL), - QU( 3972770164717652347ULL), QU( 3841751276198975037ULL), - QU(10283343042181903117ULL), QU( 8564001259792872199ULL), - QU(16472187244722489221ULL), QU( 8953493499268945921ULL), - QU( 3518747340357279580ULL), QU( 4003157546223963073ULL), - QU( 3270305958289814590ULL), QU( 3966704458129482496ULL), - QU( 8122141865926661939ULL), QU(14627734748099506653ULL), - QU(13064426990862560568ULL), QU( 2414079187889870829ULL), - QU( 5378461209354225306ULL), QU(10841985740128255566ULL), - QU( 538582442885401738ULL), QU( 7535089183482905946ULL), - QU(16117559957598879095ULL), QU( 8477890721414539741ULL), - QU( 1459127491209533386ULL), QU(17035126360733620462ULL), - QU( 8517668552872379126ULL), QU(10292151468337355014ULL), - QU(17081267732745344157ULL), QU(13751455337946087178ULL), - QU(14026945459523832966ULL), QU( 6653278775061723516ULL), - QU(10619085543856390441ULL), QU( 2196343631481122885ULL), - QU(10045966074702826136ULL), QU(10082317330452718282ULL), - QU( 5920859259504831242ULL), QU( 9951879073426540617ULL), - QU( 7074696649151414158ULL), QU(15808193543879464318ULL), - QU( 7385247772746953374ULL), QU( 3192003544283864292ULL), - QU(18153684490917593847ULL), QU(12423498260668568905ULL), - QU(10957758099756378169ULL), QU(11488762179911016040ULL), - QU( 2099931186465333782ULL), QU(11180979581250294432ULL), - QU( 8098916250668367933ULL), QU( 3529200436790763465ULL), - QU(12988418908674681745ULL), QU( 6147567275954808580ULL), - QU( 3207503344604030989ULL), QU(10761592604898615360ULL), - QU( 229854861031893504ULL), QU( 8809853962667144291ULL), - QU(13957364469005693860ULL), QU( 7634287665224495886ULL), - QU(12353487366976556874ULL), QU( 1134423796317152034ULL), - QU( 2088992471334107068ULL), QU( 7393372127190799698ULL), - QU( 1845367839871058391ULL), QU( 207922563987322884ULL), - QU(11960870813159944976ULL), QU(12182120053317317363ULL), - QU(17307358132571709283ULL), QU(13871081155552824936ULL), - QU(18304446751741566262ULL), QU( 7178705220184302849ULL), - QU(10929605677758824425ULL), QU(16446976977835806844ULL), - QU(13723874412159769044ULL), QU( 6942854352100915216ULL), - QU( 1726308474365729390ULL), QU( 2150078766445323155ULL), - QU(15345558947919656626ULL), QU(12145453828874527201ULL), - QU( 2054448620739726849ULL), QU( 2740102003352628137ULL), - QU(11294462163577610655ULL), QU( 756164283387413743ULL), - QU(17841144758438810880ULL), QU(10802406021185415861ULL), - QU( 8716455530476737846ULL), QU( 6321788834517649606ULL), - QU(14681322910577468426ULL), QU(17330043563884336387ULL), - QU(12701802180050071614ULL), QU(14695105111079727151ULL), - QU( 5112098511654172830ULL), QU( 4957505496794139973ULL), - QU( 8270979451952045982ULL), QU(12307685939199120969ULL), - QU(12425799408953443032ULL), QU( 8376410143634796588ULL), - QU(16621778679680060464ULL), QU( 3580497854566660073ULL), - QU( 1122515747803382416ULL), QU( 857664980960597599ULL), - QU( 6343640119895925918ULL), QU(12878473260854462891ULL), - QU(10036813920765722626ULL), QU(14451335468363173812ULL), - QU( 5476809692401102807ULL), QU(16442255173514366342ULL), - QU(13060203194757167104ULL), QU(14354124071243177715ULL), - QU(15961249405696125227ULL), QU(13703893649690872584ULL), - QU( 363907326340340064ULL), QU( 6247455540491754842ULL), - QU(12242249332757832361ULL), QU( 156065475679796717ULL), - QU( 9351116235749732355ULL), QU( 4590350628677701405ULL), - QU( 1671195940982350389ULL), QU(13501398458898451905ULL), - QU( 6526341991225002255ULL), QU( 1689782913778157592ULL), - QU( 7439222350869010334ULL), QU(13975150263226478308ULL), - QU(11411961169932682710ULL), QU(17204271834833847277ULL), - QU( 541534742544435367ULL), QU( 6591191931218949684ULL), - QU( 2645454775478232486ULL), QU( 4322857481256485321ULL), - QU( 8477416487553065110ULL), QU(12902505428548435048ULL), - QU( 971445777981341415ULL), QU(14995104682744976712ULL), - QU( 4243341648807158063ULL), QU( 8695061252721927661ULL), - QU( 5028202003270177222ULL), QU( 2289257340915567840ULL), - QU(13870416345121866007ULL), QU(13994481698072092233ULL), - QU( 6912785400753196481ULL), QU( 2278309315841980139ULL), - QU( 4329765449648304839ULL), QU( 5963108095785485298ULL), - QU( 4880024847478722478ULL), QU(16015608779890240947ULL), - QU( 1866679034261393544ULL), QU( 914821179919731519ULL), - QU( 9643404035648760131ULL), QU( 2418114953615593915ULL), - QU( 944756836073702374ULL), QU(15186388048737296834ULL), - QU( 7723355336128442206ULL), QU( 7500747479679599691ULL), - QU(18013961306453293634ULL), QU( 2315274808095756456ULL), - QU(13655308255424029566ULL), QU(17203800273561677098ULL), - QU( 1382158694422087756ULL), QU( 5090390250309588976ULL), - QU( 517170818384213989ULL), QU( 1612709252627729621ULL), - QU( 1330118955572449606ULL), QU( 300922478056709885ULL), - QU(18115693291289091987ULL), QU(13491407109725238321ULL), - QU(15293714633593827320ULL), QU( 5151539373053314504ULL), - QU( 5951523243743139207ULL), QU(14459112015249527975ULL), - QU( 5456113959000700739ULL), QU( 3877918438464873016ULL), - QU(12534071654260163555ULL), QU(15871678376893555041ULL), - QU(11005484805712025549ULL), QU(16353066973143374252ULL), - QU( 4358331472063256685ULL), QU( 8268349332210859288ULL), - QU(12485161590939658075ULL), QU(13955993592854471343ULL), - QU( 5911446886848367039ULL), QU(14925834086813706974ULL), - QU( 6590362597857994805ULL), QU( 1280544923533661875ULL), - QU( 1637756018947988164ULL), QU( 4734090064512686329ULL), - QU(16693705263131485912ULL), QU( 6834882340494360958ULL), - QU( 8120732176159658505ULL), QU( 2244371958905329346ULL), - QU(10447499707729734021ULL), QU( 7318742361446942194ULL), - QU( 8032857516355555296ULL), QU(14023605983059313116ULL), - QU( 1032336061815461376ULL), QU( 9840995337876562612ULL), - QU( 9869256223029203587ULL), QU(12227975697177267636ULL), - QU(12728115115844186033ULL), QU( 7752058479783205470ULL), - QU( 729733219713393087ULL), QU(12954017801239007622ULL) + KQU(16924766246869039260), KQU( 8201438687333352714), + KQU( 2265290287015001750), KQU(18397264611805473832), + KQU( 3375255223302384358), KQU( 6345559975416828796), + KQU(18229739242790328073), KQU( 7596792742098800905), + KQU( 255338647169685981), KQU( 2052747240048610300), + KQU(18328151576097299343), KQU(12472905421133796567), + KQU(11315245349717600863), KQU(16594110197775871209), + KQU(15708751964632456450), KQU(10452031272054632535), + KQU(11097646720811454386), KQU( 4556090668445745441), + KQU(17116187693090663106), KQU(14931526836144510645), + KQU( 9190752218020552591), KQU( 9625800285771901401), + KQU(13995141077659972832), KQU( 5194209094927829625), + KQU( 4156788379151063303), KQU( 8523452593770139494), + KQU(14082382103049296727), KQU( 2462601863986088483), + KQU( 3030583461592840678), KQU( 5221622077872827681), + KQU( 3084210671228981236), KQU(13956758381389953823), + KQU(13503889856213423831), KQU(15696904024189836170), + KQU( 4612584152877036206), KQU( 6231135538447867881), + KQU(10172457294158869468), KQU( 6452258628466708150), + KQU(14044432824917330221), KQU( 370168364480044279), + KQU(10102144686427193359), KQU( 667870489994776076), + KQU( 2732271956925885858), KQU(18027788905977284151), + KQU(15009842788582923859), KQU( 7136357960180199542), + KQU(15901736243475578127), KQU(16951293785352615701), + KQU(10551492125243691632), KQU(17668869969146434804), + KQU(13646002971174390445), KQU( 9804471050759613248), + KQU( 5511670439655935493), KQU(18103342091070400926), + KQU(17224512747665137533), KQU(15534627482992618168), + KQU( 1423813266186582647), KQU(15821176807932930024), + KQU( 30323369733607156), KQU(11599382494723479403), + KQU( 653856076586810062), KQU( 3176437395144899659), + KQU(14028076268147963917), KQU(16156398271809666195), + KQU( 3166955484848201676), KQU( 5746805620136919390), + KQU(17297845208891256593), KQU(11691653183226428483), + KQU(17900026146506981577), KQU(15387382115755971042), + KQU(16923567681040845943), KQU( 8039057517199388606), + KQU(11748409241468629263), KQU( 794358245539076095), + KQU(13438501964693401242), KQU(14036803236515618962), + KQU( 5252311215205424721), KQU(17806589612915509081), + KQU( 6802767092397596006), KQU(14212120431184557140), + KQU( 1072951366761385712), KQU(13098491780722836296), + KQU( 9466676828710797353), KQU(12673056849042830081), + KQU(12763726623645357580), KQU(16468961652999309493), + KQU(15305979875636438926), KQU(17444713151223449734), + KQU( 5692214267627883674), KQU(13049589139196151505), + KQU( 880115207831670745), KQU( 1776529075789695498), + KQU(16695225897801466485), KQU(10666901778795346845), + KQU( 6164389346722833869), KQU( 2863817793264300475), + KQU( 9464049921886304754), KQU( 3993566636740015468), + KQU( 9983749692528514136), KQU(16375286075057755211), + KQU(16042643417005440820), KQU(11445419662923489877), + KQU( 7999038846885158836), KQU( 6721913661721511535), + KQU( 5363052654139357320), KQU( 1817788761173584205), + KQU(13290974386445856444), KQU( 4650350818937984680), + KQU( 8219183528102484836), KQU( 1569862923500819899), + KQU( 4189359732136641860), KQU(14202822961683148583), + KQU( 4457498315309429058), KQU(13089067387019074834), + KQU(11075517153328927293), KQU(10277016248336668389), + KQU( 7070509725324401122), KQU(17808892017780289380), + KQU(13143367339909287349), KQU( 1377743745360085151), + KQU( 5749341807421286485), KQU(14832814616770931325), + KQU( 7688820635324359492), KQU(10960474011539770045), + KQU( 81970066653179790), KQU(12619476072607878022), + KQU( 4419566616271201744), KQU(15147917311750568503), + KQU( 5549739182852706345), KQU( 7308198397975204770), + KQU(13580425496671289278), KQU(17070764785210130301), + KQU( 8202832846285604405), KQU( 6873046287640887249), + KQU( 6927424434308206114), KQU( 6139014645937224874), + KQU(10290373645978487639), KQU(15904261291701523804), + KQU( 9628743442057826883), KQU(18383429096255546714), + KQU( 4977413265753686967), KQU( 7714317492425012869), + KQU( 9025232586309926193), KQU(14627338359776709107), + KQU(14759849896467790763), KQU(10931129435864423252), + KQU( 4588456988775014359), KQU(10699388531797056724), + KQU( 468652268869238792), KQU( 5755943035328078086), + KQU( 2102437379988580216), KQU( 9986312786506674028), + KQU( 2654207180040945604), KQU( 8726634790559960062), + KQU( 100497234871808137), KQU( 2800137176951425819), + KQU( 6076627612918553487), KQU( 5780186919186152796), + KQU( 8179183595769929098), KQU( 6009426283716221169), + KQU( 2796662551397449358), KQU( 1756961367041986764), + KQU( 6972897917355606205), KQU(14524774345368968243), + KQU( 2773529684745706940), KQU( 4853632376213075959), + KQU( 4198177923731358102), KQU( 8271224913084139776), + KQU( 2741753121611092226), KQU(16782366145996731181), + KQU(15426125238972640790), KQU(13595497100671260342), + KQU( 3173531022836259898), KQU( 6573264560319511662), + KQU(18041111951511157441), KQU( 2351433581833135952), + KQU( 3113255578908173487), KQU( 1739371330877858784), + KQU(16046126562789165480), KQU( 8072101652214192925), + KQU(15267091584090664910), KQU( 9309579200403648940), + KQU( 5218892439752408722), KQU(14492477246004337115), + KQU(17431037586679770619), KQU( 7385248135963250480), + KQU( 9580144956565560660), KQU( 4919546228040008720), + KQU(15261542469145035584), KQU(18233297270822253102), + KQU( 5453248417992302857), KQU( 9309519155931460285), + KQU(10342813012345291756), KQU(15676085186784762381), + KQU(15912092950691300645), KQU( 9371053121499003195), + KQU( 9897186478226866746), KQU(14061858287188196327), + KQU( 122575971620788119), KQU(12146750969116317754), + KQU( 4438317272813245201), KQU( 8332576791009527119), + KQU(13907785691786542057), KQU(10374194887283287467), + KQU( 2098798755649059566), KQU( 3416235197748288894), + KQU( 8688269957320773484), KQU( 7503964602397371571), + KQU(16724977015147478236), KQU( 9461512855439858184), + KQU(13259049744534534727), KQU( 3583094952542899294), + KQU( 8764245731305528292), KQU(13240823595462088985), + KQU(13716141617617910448), KQU(18114969519935960955), + KQU( 2297553615798302206), KQU( 4585521442944663362), + KQU(17776858680630198686), KQU( 4685873229192163363), + KQU( 152558080671135627), KQU(15424900540842670088), + KQU(13229630297130024108), KQU(17530268788245718717), + KQU(16675633913065714144), KQU( 3158912717897568068), + KQU(15399132185380087288), KQU( 7401418744515677872), + KQU(13135412922344398535), KQU( 6385314346100509511), + KQU(13962867001134161139), KQU(10272780155442671999), + KQU(12894856086597769142), KQU(13340877795287554994), + KQU(12913630602094607396), KQU(12543167911119793857), + KQU(17343570372251873096), KQU(10959487764494150545), + KQU( 6966737953093821128), KQU(13780699135496988601), + KQU( 4405070719380142046), KQU(14923788365607284982), + KQU( 2869487678905148380), KQU( 6416272754197188403), + KQU(15017380475943612591), KQU( 1995636220918429487), + KQU( 3402016804620122716), KQU(15800188663407057080), + KQU(11362369990390932882), KQU(15262183501637986147), + KQU(10239175385387371494), KQU( 9352042420365748334), + KQU( 1682457034285119875), KQU( 1724710651376289644), + KQU( 2038157098893817966), KQU( 9897825558324608773), + KQU( 1477666236519164736), KQU(16835397314511233640), + KQU(10370866327005346508), KQU(10157504370660621982), + KQU(12113904045335882069), KQU(13326444439742783008), + KQU(11302769043000765804), KQU(13594979923955228484), + KQU(11779351762613475968), KQU( 3786101619539298383), + KQU( 8021122969180846063), KQU(15745904401162500495), + KQU(10762168465993897267), KQU(13552058957896319026), + KQU(11200228655252462013), KQU( 5035370357337441226), + KQU( 7593918984545500013), KQU( 5418554918361528700), + KQU( 4858270799405446371), KQU( 9974659566876282544), + KQU(18227595922273957859), KQU( 2772778443635656220), + KQU(14285143053182085385), KQU( 9939700992429600469), + KQU(12756185904545598068), KQU( 2020783375367345262), + KQU( 57026775058331227), KQU( 950827867930065454), + KQU( 6602279670145371217), KQU( 2291171535443566929), + KQU( 5832380724425010313), KQU( 1220343904715982285), + KQU(17045542598598037633), KQU(15460481779702820971), + KQU(13948388779949365130), KQU(13975040175430829518), + KQU(17477538238425541763), KQU(11104663041851745725), + KQU(15860992957141157587), KQU(14529434633012950138), + KQU( 2504838019075394203), KQU( 7512113882611121886), + KQU( 4859973559980886617), KQU( 1258601555703250219), + KQU(15594548157514316394), KQU( 4516730171963773048), + KQU(11380103193905031983), KQU( 6809282239982353344), + KQU(18045256930420065002), KQU( 2453702683108791859), + KQU( 977214582986981460), KQU( 2006410402232713466), + KQU( 6192236267216378358), KQU( 3429468402195675253), + KQU(18146933153017348921), KQU(17369978576367231139), + KQU( 1246940717230386603), KQU(11335758870083327110), + KQU(14166488801730353682), KQU( 9008573127269635732), + KQU(10776025389820643815), KQU(15087605441903942962), + KQU( 1359542462712147922), KQU(13898874411226454206), + KQU(17911176066536804411), KQU( 9435590428600085274), + KQU( 294488509967864007), KQU( 8890111397567922046), + KQU( 7987823476034328778), KQU(13263827582440967651), + KQU( 7503774813106751573), KQU(14974747296185646837), + KQU( 8504765037032103375), KQU(17340303357444536213), + KQU( 7704610912964485743), KQU( 8107533670327205061), + KQU( 9062969835083315985), KQU(16968963142126734184), + KQU(12958041214190810180), KQU( 2720170147759570200), + KQU( 2986358963942189566), KQU(14884226322219356580), + KQU( 286224325144368520), KQU(11313800433154279797), + KQU(18366849528439673248), KQU(17899725929482368789), + KQU( 3730004284609106799), KQU( 1654474302052767205), + KQU( 5006698007047077032), KQU( 8196893913601182838), + KQU(15214541774425211640), KQU(17391346045606626073), + KQU( 8369003584076969089), KQU( 3939046733368550293), + KQU(10178639720308707785), KQU( 2180248669304388697), + KQU( 62894391300126322), KQU( 9205708961736223191), + KQU( 6837431058165360438), KQU( 3150743890848308214), + KQU(17849330658111464583), KQU(12214815643135450865), + KQU(13410713840519603402), KQU( 3200778126692046802), + KQU(13354780043041779313), KQU( 800850022756886036), + KQU(15660052933953067433), KQU( 6572823544154375676), + KQU(11030281857015819266), KQU(12682241941471433835), + KQU(11654136407300274693), KQU( 4517795492388641109), + KQU( 9757017371504524244), KQU(17833043400781889277), + KQU(12685085201747792227), KQU(10408057728835019573), + KQU( 98370418513455221), KQU( 6732663555696848598), + KQU(13248530959948529780), KQU( 3530441401230622826), + KQU(18188251992895660615), KQU( 1847918354186383756), + KQU( 1127392190402660921), KQU(11293734643143819463), + KQU( 3015506344578682982), KQU(13852645444071153329), + KQU( 2121359659091349142), KQU( 1294604376116677694), + KQU( 5616576231286352318), KQU( 7112502442954235625), + KQU(11676228199551561689), KQU(12925182803007305359), + KQU( 7852375518160493082), KQU( 1136513130539296154), + KQU( 5636923900916593195), KQU( 3221077517612607747), + KQU(17784790465798152513), KQU( 3554210049056995938), + KQU(17476839685878225874), KQU( 3206836372585575732), + KQU( 2765333945644823430), KQU(10080070903718799528), + KQU( 5412370818878286353), KQU( 9689685887726257728), + KQU( 8236117509123533998), KQU( 1951139137165040214), + KQU( 4492205209227980349), KQU(16541291230861602967), + KQU( 1424371548301437940), KQU( 9117562079669206794), + KQU(14374681563251691625), KQU(13873164030199921303), + KQU( 6680317946770936731), KQU(15586334026918276214), + KQU(10896213950976109802), KQU( 9506261949596413689), + KQU( 9903949574308040616), KQU( 6038397344557204470), + KQU( 174601465422373648), KQU(15946141191338238030), + KQU(17142225620992044937), KQU( 7552030283784477064), + KQU( 2947372384532947997), KQU( 510797021688197711), + KQU( 4962499439249363461), KQU( 23770320158385357), + KQU( 959774499105138124), KQU( 1468396011518788276), + KQU( 2015698006852312308), KQU( 4149400718489980136), + KQU( 5992916099522371188), KQU(10819182935265531076), + KQU(16189787999192351131), KQU( 342833961790261950), + KQU(12470830319550495336), KQU(18128495041912812501), + KQU( 1193600899723524337), KQU( 9056793666590079770), + KQU( 2154021227041669041), KQU( 4963570213951235735), + KQU( 4865075960209211409), KQU( 2097724599039942963), + KQU( 2024080278583179845), KQU(11527054549196576736), + KQU(10650256084182390252), KQU( 4808408648695766755), + KQU( 1642839215013788844), KQU(10607187948250398390), + KQU( 7076868166085913508), KQU( 730522571106887032), + KQU(12500579240208524895), KQU( 4484390097311355324), + KQU(15145801330700623870), KQU( 8055827661392944028), + KQU( 5865092976832712268), KQU(15159212508053625143), + KQU( 3560964582876483341), KQU( 4070052741344438280), + KQU( 6032585709886855634), KQU(15643262320904604873), + KQU( 2565119772293371111), KQU( 318314293065348260), + KQU(15047458749141511872), KQU( 7772788389811528730), + KQU( 7081187494343801976), KQU( 6465136009467253947), + KQU(10425940692543362069), KQU( 554608190318339115), + KQU(14796699860302125214), KQU( 1638153134431111443), + KQU(10336967447052276248), KQU( 8412308070396592958), + KQU( 4004557277152051226), KQU( 8143598997278774834), + KQU(16413323996508783221), KQU(13139418758033994949), + KQU( 9772709138335006667), KQU( 2818167159287157659), + KQU(17091740573832523669), KQU(14629199013130751608), + KQU(18268322711500338185), KQU( 8290963415675493063), + KQU( 8830864907452542588), KQU( 1614839084637494849), + KQU(14855358500870422231), KQU( 3472996748392519937), + KQU(15317151166268877716), KQU( 5825895018698400362), + KQU(16730208429367544129), KQU(10481156578141202800), + KQU( 4746166512382823750), KQU(12720876014472464998), + KQU( 8825177124486735972), KQU(13733447296837467838), + KQU( 6412293741681359625), KQU( 8313213138756135033), + KQU(11421481194803712517), KQU( 7997007691544174032), + KQU( 6812963847917605930), KQU( 9683091901227558641), + KQU(14703594165860324713), KQU( 1775476144519618309), + KQU( 2724283288516469519), KQU( 717642555185856868), + KQU( 8736402192215092346), KQU(11878800336431381021), + KQU( 4348816066017061293), KQU( 6115112756583631307), + KQU( 9176597239667142976), KQU(12615622714894259204), + KQU(10283406711301385987), KQU( 5111762509485379420), + KQU( 3118290051198688449), KQU( 7345123071632232145), + KQU( 9176423451688682359), KQU( 4843865456157868971), + KQU(12008036363752566088), KQU(12058837181919397720), + KQU( 2145073958457347366), KQU( 1526504881672818067), + KQU( 3488830105567134848), KQU(13208362960674805143), + KQU( 4077549672899572192), KQU( 7770995684693818365), + KQU( 1398532341546313593), KQU(12711859908703927840), + KQU( 1417561172594446813), KQU(17045191024194170604), + KQU( 4101933177604931713), KQU(14708428834203480320), + KQU(17447509264469407724), KQU(14314821973983434255), + KQU(17990472271061617265), KQU( 5087756685841673942), + KQU(12797820586893859939), KQU( 1778128952671092879), + KQU( 3535918530508665898), KQU( 9035729701042481301), + KQU(14808661568277079962), KQU(14587345077537747914), + KQU(11920080002323122708), KQU( 6426515805197278753), + KQU( 3295612216725984831), KQU(11040722532100876120), + KQU(12305952936387598754), KQU(16097391899742004253), + KQU( 4908537335606182208), KQU(12446674552196795504), + KQU(16010497855816895177), KQU( 9194378874788615551), + KQU( 3382957529567613384), KQU( 5154647600754974077), + KQU( 9801822865328396141), KQU( 9023662173919288143), + KQU(17623115353825147868), KQU( 8238115767443015816), + KQU(15811444159859002560), KQU( 9085612528904059661), + KQU( 6888601089398614254), KQU( 258252992894160189), + KQU( 6704363880792428622), KQU( 6114966032147235763), + KQU(11075393882690261875), KQU( 8797664238933620407), + KQU( 5901892006476726920), KQU( 5309780159285518958), + KQU(14940808387240817367), KQU(14642032021449656698), + KQU( 9808256672068504139), KQU( 3670135111380607658), + KQU(11211211097845960152), KQU( 1474304506716695808), + KQU(15843166204506876239), KQU( 7661051252471780561), + KQU(10170905502249418476), KQU( 7801416045582028589), + KQU( 2763981484737053050), KQU( 9491377905499253054), + KQU(16201395896336915095), KQU( 9256513756442782198), + KQU( 5411283157972456034), KQU( 5059433122288321676), + KQU( 4327408006721123357), KQU( 9278544078834433377), + KQU( 7601527110882281612), KQU(11848295896975505251), + KQU(12096998801094735560), KQU(14773480339823506413), + KQU(15586227433895802149), KQU(12786541257830242872), + KQU( 6904692985140503067), KQU( 5309011515263103959), + KQU(12105257191179371066), KQU(14654380212442225037), + KQU( 2556774974190695009), KQU( 4461297399927600261), + KQU(14888225660915118646), KQU(14915459341148291824), + KQU( 2738802166252327631), KQU( 6047155789239131512), + KQU(12920545353217010338), KQU(10697617257007840205), + KQU( 2751585253158203504), KQU(13252729159780047496), + KQU(14700326134672815469), KQU(14082527904374600529), + KQU(16852962273496542070), KQU(17446675504235853907), + KQU(15019600398527572311), KQU(12312781346344081551), + KQU(14524667935039810450), KQU( 5634005663377195738), + KQU(11375574739525000569), KQU( 2423665396433260040), + KQU( 5222836914796015410), KQU( 4397666386492647387), + KQU( 4619294441691707638), KQU( 665088602354770716), + KQU(13246495665281593610), KQU( 6564144270549729409), + KQU(10223216188145661688), KQU( 3961556907299230585), + KQU(11543262515492439914), KQU(16118031437285993790), + KQU( 7143417964520166465), KQU(13295053515909486772), + KQU( 40434666004899675), KQU(17127804194038347164), + KQU( 8599165966560586269), KQU( 8214016749011284903), + KQU(13725130352140465239), KQU( 5467254474431726291), + KQU( 7748584297438219877), KQU(16933551114829772472), + KQU( 2169618439506799400), KQU( 2169787627665113463), + KQU(17314493571267943764), KQU(18053575102911354912), + KQU(11928303275378476973), KQU(11593850925061715550), + KQU(17782269923473589362), KQU( 3280235307704747039), + KQU( 6145343578598685149), KQU(17080117031114086090), + KQU(18066839902983594755), KQU( 6517508430331020706), + KQU( 8092908893950411541), KQU(12558378233386153732), + KQU( 4476532167973132976), KQU(16081642430367025016), + KQU( 4233154094369139361), KQU( 8693630486693161027), + KQU(11244959343027742285), KQU(12273503967768513508), + KQU(14108978636385284876), KQU( 7242414665378826984), + KQU( 6561316938846562432), KQU( 8601038474994665795), + KQU(17532942353612365904), KQU(17940076637020912186), + KQU( 7340260368823171304), KQU( 7061807613916067905), + KQU(10561734935039519326), KQU(17990796503724650862), + KQU( 6208732943911827159), KQU( 359077562804090617), + KQU(14177751537784403113), KQU(10659599444915362902), + KQU(15081727220615085833), KQU(13417573895659757486), + KQU(15513842342017811524), KQU(11814141516204288231), + KQU( 1827312513875101814), KQU( 2804611699894603103), + KQU(17116500469975602763), KQU(12270191815211952087), + KQU(12256358467786024988), KQU(18435021722453971267), + KQU( 671330264390865618), KQU( 476504300460286050), + KQU(16465470901027093441), KQU( 4047724406247136402), + KQU( 1322305451411883346), KQU( 1388308688834322280), + KQU( 7303989085269758176), KQU( 9323792664765233642), + KQU( 4542762575316368936), KQU(17342696132794337618), + KQU( 4588025054768498379), KQU(13415475057390330804), + KQU(17880279491733405570), KQU(10610553400618620353), + KQU( 3180842072658960139), KQU(13002966655454270120), + KQU( 1665301181064982826), KQU( 7083673946791258979), + KQU( 190522247122496820), KQU(17388280237250677740), + KQU( 8430770379923642945), KQU(12987180971921668584), + KQU( 2311086108365390642), KQU( 2870984383579822345), + KQU(14014682609164653318), KQU(14467187293062251484), + KQU( 192186361147413298), KQU(15171951713531796524), + KQU( 9900305495015948728), KQU(17958004775615466344), + KQU(14346380954498606514), KQU(18040047357617407096), + KQU( 5035237584833424532), KQU(15089555460613972287), + KQU( 4131411873749729831), KQU( 1329013581168250330), + KQU(10095353333051193949), KQU(10749518561022462716), + KQU( 9050611429810755847), KQU(15022028840236655649), + KQU( 8775554279239748298), KQU(13105754025489230502), + KQU(15471300118574167585), KQU( 89864764002355628), + KQU( 8776416323420466637), KQU( 5280258630612040891), + KQU( 2719174488591862912), KQU( 7599309137399661994), + KQU(15012887256778039979), KQU(14062981725630928925), + KQU(12038536286991689603), KQU( 7089756544681775245), + KQU(10376661532744718039), KQU( 1265198725901533130), + KQU(13807996727081142408), KQU( 2935019626765036403), + KQU( 7651672460680700141), KQU( 3644093016200370795), + KQU( 2840982578090080674), KQU(17956262740157449201), + KQU(18267979450492880548), KQU(11799503659796848070), + KQU( 9942537025669672388), KQU(11886606816406990297), + KQU( 5488594946437447576), KQU( 7226714353282744302), + KQU( 3784851653123877043), KQU( 878018453244803041), + KQU(12110022586268616085), KQU( 734072179404675123), + KQU(11869573627998248542), KQU( 469150421297783998), + KQU( 260151124912803804), KQU(11639179410120968649), + KQU( 9318165193840846253), KQU(12795671722734758075), + KQU(15318410297267253933), KQU( 691524703570062620), + KQU( 5837129010576994601), KQU(15045963859726941052), + KQU( 5850056944932238169), KQU(12017434144750943807), + KQU( 7447139064928956574), KQU( 3101711812658245019), + KQU(16052940704474982954), KQU(18195745945986994042), + KQU( 8932252132785575659), KQU(13390817488106794834), + KQU(11582771836502517453), KQU( 4964411326683611686), + KQU( 2195093981702694011), KQU(14145229538389675669), + KQU(16459605532062271798), KQU( 866316924816482864), + KQU( 4593041209937286377), KQU( 8415491391910972138), + KQU( 4171236715600528969), KQU(16637569303336782889), + KQU( 2002011073439212680), KQU(17695124661097601411), + KQU( 4627687053598611702), KQU( 7895831936020190403), + KQU( 8455951300917267802), KQU( 2923861649108534854), + KQU( 8344557563927786255), KQU( 6408671940373352556), + KQU(12210227354536675772), KQU(14294804157294222295), + KQU(10103022425071085127), KQU(10092959489504123771), + KQU( 6554774405376736268), KQU(12629917718410641774), + KQU( 6260933257596067126), KQU( 2460827021439369673), + KQU( 2541962996717103668), KQU( 597377203127351475), + KQU( 5316984203117315309), KQU( 4811211393563241961), + KQU(13119698597255811641), KQU( 8048691512862388981), + KQU(10216818971194073842), KQU( 4612229970165291764), + KQU(10000980798419974770), KQU( 6877640812402540687), + KQU( 1488727563290436992), KQU( 2227774069895697318), + KQU(11237754507523316593), KQU(13478948605382290972), + KQU( 1963583846976858124), KQU( 5512309205269276457), + KQU( 3972770164717652347), KQU( 3841751276198975037), + KQU(10283343042181903117), KQU( 8564001259792872199), + KQU(16472187244722489221), KQU( 8953493499268945921), + KQU( 3518747340357279580), KQU( 4003157546223963073), + KQU( 3270305958289814590), KQU( 3966704458129482496), + KQU( 8122141865926661939), KQU(14627734748099506653), + KQU(13064426990862560568), KQU( 2414079187889870829), + KQU( 5378461209354225306), KQU(10841985740128255566), + KQU( 538582442885401738), KQU( 7535089183482905946), + KQU(16117559957598879095), KQU( 8477890721414539741), + KQU( 1459127491209533386), KQU(17035126360733620462), + KQU( 8517668552872379126), KQU(10292151468337355014), + KQU(17081267732745344157), KQU(13751455337946087178), + KQU(14026945459523832966), KQU( 6653278775061723516), + KQU(10619085543856390441), KQU( 2196343631481122885), + KQU(10045966074702826136), KQU(10082317330452718282), + KQU( 5920859259504831242), KQU( 9951879073426540617), + KQU( 7074696649151414158), KQU(15808193543879464318), + KQU( 7385247772746953374), KQU( 3192003544283864292), + KQU(18153684490917593847), KQU(12423498260668568905), + KQU(10957758099756378169), KQU(11488762179911016040), + KQU( 2099931186465333782), KQU(11180979581250294432), + KQU( 8098916250668367933), KQU( 3529200436790763465), + KQU(12988418908674681745), KQU( 6147567275954808580), + KQU( 3207503344604030989), KQU(10761592604898615360), + KQU( 229854861031893504), KQU( 8809853962667144291), + KQU(13957364469005693860), KQU( 7634287665224495886), + KQU(12353487366976556874), KQU( 1134423796317152034), + KQU( 2088992471334107068), KQU( 7393372127190799698), + KQU( 1845367839871058391), KQU( 207922563987322884), + KQU(11960870813159944976), KQU(12182120053317317363), + KQU(17307358132571709283), KQU(13871081155552824936), + KQU(18304446751741566262), KQU( 7178705220184302849), + KQU(10929605677758824425), KQU(16446976977835806844), + KQU(13723874412159769044), KQU( 6942854352100915216), + KQU( 1726308474365729390), KQU( 2150078766445323155), + KQU(15345558947919656626), KQU(12145453828874527201), + KQU( 2054448620739726849), KQU( 2740102003352628137), + KQU(11294462163577610655), KQU( 756164283387413743), + KQU(17841144758438810880), KQU(10802406021185415861), + KQU( 8716455530476737846), KQU( 6321788834517649606), + KQU(14681322910577468426), KQU(17330043563884336387), + KQU(12701802180050071614), KQU(14695105111079727151), + KQU( 5112098511654172830), KQU( 4957505496794139973), + KQU( 8270979451952045982), KQU(12307685939199120969), + KQU(12425799408953443032), KQU( 8376410143634796588), + KQU(16621778679680060464), KQU( 3580497854566660073), + KQU( 1122515747803382416), KQU( 857664980960597599), + KQU( 6343640119895925918), KQU(12878473260854462891), + KQU(10036813920765722626), KQU(14451335468363173812), + KQU( 5476809692401102807), KQU(16442255173514366342), + KQU(13060203194757167104), KQU(14354124071243177715), + KQU(15961249405696125227), KQU(13703893649690872584), + KQU( 363907326340340064), KQU( 6247455540491754842), + KQU(12242249332757832361), KQU( 156065475679796717), + KQU( 9351116235749732355), KQU( 4590350628677701405), + KQU( 1671195940982350389), KQU(13501398458898451905), + KQU( 6526341991225002255), KQU( 1689782913778157592), + KQU( 7439222350869010334), KQU(13975150263226478308), + KQU(11411961169932682710), KQU(17204271834833847277), + KQU( 541534742544435367), KQU( 6591191931218949684), + KQU( 2645454775478232486), KQU( 4322857481256485321), + KQU( 8477416487553065110), KQU(12902505428548435048), + KQU( 971445777981341415), KQU(14995104682744976712), + KQU( 4243341648807158063), KQU( 8695061252721927661), + KQU( 5028202003270177222), KQU( 2289257340915567840), + KQU(13870416345121866007), KQU(13994481698072092233), + KQU( 6912785400753196481), KQU( 2278309315841980139), + KQU( 4329765449648304839), KQU( 5963108095785485298), + KQU( 4880024847478722478), KQU(16015608779890240947), + KQU( 1866679034261393544), KQU( 914821179919731519), + KQU( 9643404035648760131), KQU( 2418114953615593915), + KQU( 944756836073702374), KQU(15186388048737296834), + KQU( 7723355336128442206), KQU( 7500747479679599691), + KQU(18013961306453293634), KQU( 2315274808095756456), + KQU(13655308255424029566), KQU(17203800273561677098), + KQU( 1382158694422087756), KQU( 5090390250309588976), + KQU( 517170818384213989), KQU( 1612709252627729621), + KQU( 1330118955572449606), KQU( 300922478056709885), + KQU(18115693291289091987), KQU(13491407109725238321), + KQU(15293714633593827320), KQU( 5151539373053314504), + KQU( 5951523243743139207), KQU(14459112015249527975), + KQU( 5456113959000700739), KQU( 3877918438464873016), + KQU(12534071654260163555), KQU(15871678376893555041), + KQU(11005484805712025549), KQU(16353066973143374252), + KQU( 4358331472063256685), KQU( 8268349332210859288), + KQU(12485161590939658075), KQU(13955993592854471343), + KQU( 5911446886848367039), KQU(14925834086813706974), + KQU( 6590362597857994805), KQU( 1280544923533661875), + KQU( 1637756018947988164), KQU( 4734090064512686329), + KQU(16693705263131485912), KQU( 6834882340494360958), + KQU( 8120732176159658505), KQU( 2244371958905329346), + KQU(10447499707729734021), KQU( 7318742361446942194), + KQU( 8032857516355555296), KQU(14023605983059313116), + KQU( 1032336061815461376), KQU( 9840995337876562612), + KQU( 9869256223029203587), KQU(12227975697177267636), + KQU(12728115115844186033), KQU( 7752058479783205470), + KQU( 729733219713393087), KQU(12954017801239007622) }; static const uint64_t init_by_array_64_expected[] = { - QU( 2100341266307895239ULL), QU( 8344256300489757943ULL), - QU(15687933285484243894ULL), QU( 8268620370277076319ULL), - QU(12371852309826545459ULL), QU( 8800491541730110238ULL), - QU(18113268950100835773ULL), QU( 2886823658884438119ULL), - QU( 3293667307248180724ULL), QU( 9307928143300172731ULL), - QU( 7688082017574293629ULL), QU( 900986224735166665ULL), - QU( 9977972710722265039ULL), QU( 6008205004994830552ULL), - QU( 546909104521689292ULL), QU( 7428471521869107594ULL), - QU(14777563419314721179ULL), QU(16116143076567350053ULL), - QU( 5322685342003142329ULL), QU( 4200427048445863473ULL), - QU( 4693092150132559146ULL), QU(13671425863759338582ULL), - QU( 6747117460737639916ULL), QU( 4732666080236551150ULL), - QU( 5912839950611941263ULL), QU( 3903717554504704909ULL), - QU( 2615667650256786818ULL), QU(10844129913887006352ULL), - QU(13786467861810997820ULL), QU(14267853002994021570ULL), - QU(13767807302847237439ULL), QU(16407963253707224617ULL), - QU( 4802498363698583497ULL), QU( 2523802839317209764ULL), - QU( 3822579397797475589ULL), QU( 8950320572212130610ULL), - QU( 3745623504978342534ULL), QU(16092609066068482806ULL), - QU( 9817016950274642398ULL), QU(10591660660323829098ULL), - QU(11751606650792815920ULL), QU( 5122873818577122211ULL), - QU(17209553764913936624ULL), QU( 6249057709284380343ULL), - QU(15088791264695071830ULL), QU(15344673071709851930ULL), - QU( 4345751415293646084ULL), QU( 2542865750703067928ULL), - QU(13520525127852368784ULL), QU(18294188662880997241ULL), - QU( 3871781938044881523ULL), QU( 2873487268122812184ULL), - QU(15099676759482679005ULL), QU(15442599127239350490ULL), - QU( 6311893274367710888ULL), QU( 3286118760484672933ULL), - QU( 4146067961333542189ULL), QU(13303942567897208770ULL), - QU( 8196013722255630418ULL), QU( 4437815439340979989ULL), - QU(15433791533450605135ULL), QU( 4254828956815687049ULL), - QU( 1310903207708286015ULL), QU(10529182764462398549ULL), - QU(14900231311660638810ULL), QU( 9727017277104609793ULL), - QU( 1821308310948199033ULL), QU(11628861435066772084ULL), - QU( 9469019138491546924ULL), QU( 3145812670532604988ULL), - QU( 9938468915045491919ULL), QU( 1562447430672662142ULL), - QU(13963995266697989134ULL), QU( 3356884357625028695ULL), - QU( 4499850304584309747ULL), QU( 8456825817023658122ULL), - QU(10859039922814285279ULL), QU( 8099512337972526555ULL), - QU( 348006375109672149ULL), QU(11919893998241688603ULL), - QU( 1104199577402948826ULL), QU(16689191854356060289ULL), - QU(10992552041730168078ULL), QU( 7243733172705465836ULL), - QU( 5668075606180319560ULL), QU(18182847037333286970ULL), - QU( 4290215357664631322ULL), QU( 4061414220791828613ULL), - QU(13006291061652989604ULL), QU( 7140491178917128798ULL), - QU(12703446217663283481ULL), QU( 5500220597564558267ULL), - QU(10330551509971296358ULL), QU(15958554768648714492ULL), - QU( 5174555954515360045ULL), QU( 1731318837687577735ULL), - QU( 3557700801048354857ULL), QU(13764012341928616198ULL), - QU(13115166194379119043ULL), QU( 7989321021560255519ULL), - QU( 2103584280905877040ULL), QU( 9230788662155228488ULL), - QU(16396629323325547654ULL), QU( 657926409811318051ULL), - QU(15046700264391400727ULL), QU( 5120132858771880830ULL), - QU( 7934160097989028561ULL), QU( 6963121488531976245ULL), - QU(17412329602621742089ULL), QU(15144843053931774092ULL), - QU(17204176651763054532ULL), QU(13166595387554065870ULL), - QU( 8590377810513960213ULL), QU( 5834365135373991938ULL), - QU( 7640913007182226243ULL), QU( 3479394703859418425ULL), - QU(16402784452644521040ULL), QU( 4993979809687083980ULL), - QU(13254522168097688865ULL), QU(15643659095244365219ULL), - QU( 5881437660538424982ULL), QU(11174892200618987379ULL), - QU( 254409966159711077ULL), QU(17158413043140549909ULL), - QU( 3638048789290376272ULL), QU( 1376816930299489190ULL), - QU( 4622462095217761923ULL), QU(15086407973010263515ULL), - QU(13253971772784692238ULL), QU( 5270549043541649236ULL), - QU(11182714186805411604ULL), QU(12283846437495577140ULL), - QU( 5297647149908953219ULL), QU(10047451738316836654ULL), - QU( 4938228100367874746ULL), QU(12328523025304077923ULL), - QU( 3601049438595312361ULL), QU( 9313624118352733770ULL), - QU(13322966086117661798ULL), QU(16660005705644029394ULL), - QU(11337677526988872373ULL), QU(13869299102574417795ULL), - QU(15642043183045645437ULL), QU( 3021755569085880019ULL), - QU( 4979741767761188161ULL), QU(13679979092079279587ULL), - QU( 3344685842861071743ULL), QU(13947960059899588104ULL), - QU( 305806934293368007ULL), QU( 5749173929201650029ULL), - QU(11123724852118844098ULL), QU(15128987688788879802ULL), - QU(15251651211024665009ULL), QU( 7689925933816577776ULL), - QU(16732804392695859449ULL), QU(17087345401014078468ULL), - QU(14315108589159048871ULL), QU( 4820700266619778917ULL), - QU(16709637539357958441ULL), QU( 4936227875177351374ULL), - QU( 2137907697912987247ULL), QU(11628565601408395420ULL), - QU( 2333250549241556786ULL), QU( 5711200379577778637ULL), - QU( 5170680131529031729ULL), QU(12620392043061335164ULL), - QU( 95363390101096078ULL), QU( 5487981914081709462ULL), - QU( 1763109823981838620ULL), QU( 3395861271473224396ULL), - QU( 1300496844282213595ULL), QU( 6894316212820232902ULL), - QU(10673859651135576674ULL), QU( 5911839658857903252ULL), - QU(17407110743387299102ULL), QU( 8257427154623140385ULL), - QU(11389003026741800267ULL), QU( 4070043211095013717ULL), - QU(11663806997145259025ULL), QU(15265598950648798210ULL), - QU( 630585789434030934ULL), QU( 3524446529213587334ULL), - QU( 7186424168495184211ULL), QU(10806585451386379021ULL), - QU(11120017753500499273ULL), QU( 1586837651387701301ULL), - QU(17530454400954415544ULL), QU( 9991670045077880430ULL), - QU( 7550997268990730180ULL), QU( 8640249196597379304ULL), - QU( 3522203892786893823ULL), QU(10401116549878854788ULL), - QU(13690285544733124852ULL), QU( 8295785675455774586ULL), - QU(15535716172155117603ULL), QU( 3112108583723722511ULL), - QU(17633179955339271113ULL), QU(18154208056063759375ULL), - QU( 1866409236285815666ULL), QU(13326075895396412882ULL), - QU( 8756261842948020025ULL), QU( 6281852999868439131ULL), - QU(15087653361275292858ULL), QU(10333923911152949397ULL), - QU( 5265567645757408500ULL), QU(12728041843210352184ULL), - QU( 6347959327507828759ULL), QU( 154112802625564758ULL), - QU(18235228308679780218ULL), QU( 3253805274673352418ULL), - QU( 4849171610689031197ULL), QU(17948529398340432518ULL), - QU(13803510475637409167ULL), QU(13506570190409883095ULL), - QU(15870801273282960805ULL), QU( 8451286481299170773ULL), - QU( 9562190620034457541ULL), QU( 8518905387449138364ULL), - QU(12681306401363385655ULL), QU( 3788073690559762558ULL), - QU( 5256820289573487769ULL), QU( 2752021372314875467ULL), - QU( 6354035166862520716ULL), QU( 4328956378309739069ULL), - QU( 449087441228269600ULL), QU( 5533508742653090868ULL), - QU( 1260389420404746988ULL), QU(18175394473289055097ULL), - QU( 1535467109660399420ULL), QU( 8818894282874061442ULL), - QU(12140873243824811213ULL), QU(15031386653823014946ULL), - QU( 1286028221456149232ULL), QU( 6329608889367858784ULL), - QU( 9419654354945132725ULL), QU( 6094576547061672379ULL), - QU(17706217251847450255ULL), QU( 1733495073065878126ULL), - QU(16918923754607552663ULL), QU( 8881949849954945044ULL), - QU(12938977706896313891ULL), QU(14043628638299793407ULL), - QU(18393874581723718233ULL), QU( 6886318534846892044ULL), - QU(14577870878038334081ULL), QU(13541558383439414119ULL), - QU(13570472158807588273ULL), QU(18300760537910283361ULL), - QU( 818368572800609205ULL), QU( 1417000585112573219ULL), - QU(12337533143867683655ULL), QU(12433180994702314480ULL), - QU( 778190005829189083ULL), QU(13667356216206524711ULL), - QU( 9866149895295225230ULL), QU(11043240490417111999ULL), - QU( 1123933826541378598ULL), QU( 6469631933605123610ULL), - QU(14508554074431980040ULL), QU(13918931242962026714ULL), - QU( 2870785929342348285ULL), QU(14786362626740736974ULL), - QU(13176680060902695786ULL), QU( 9591778613541679456ULL), - QU( 9097662885117436706ULL), QU( 749262234240924947ULL), - QU( 1944844067793307093ULL), QU( 4339214904577487742ULL), - QU( 8009584152961946551ULL), QU(16073159501225501777ULL), - QU( 3335870590499306217ULL), QU(17088312653151202847ULL), - QU( 3108893142681931848ULL), QU(16636841767202792021ULL), - QU(10423316431118400637ULL), QU( 8008357368674443506ULL), - QU(11340015231914677875ULL), QU(17687896501594936090ULL), - QU(15173627921763199958ULL), QU( 542569482243721959ULL), - QU(15071714982769812975ULL), QU( 4466624872151386956ULL), - QU( 1901780715602332461ULL), QU( 9822227742154351098ULL), - QU( 1479332892928648780ULL), QU( 6981611948382474400ULL), - QU( 7620824924456077376ULL), QU(14095973329429406782ULL), - QU( 7902744005696185404ULL), QU(15830577219375036920ULL), - QU(10287076667317764416ULL), QU(12334872764071724025ULL), - QU( 4419302088133544331ULL), QU(14455842851266090520ULL), - QU(12488077416504654222ULL), QU( 7953892017701886766ULL), - QU( 6331484925529519007ULL), QU( 4902145853785030022ULL), - QU(17010159216096443073ULL), QU(11945354668653886087ULL), - QU(15112022728645230829ULL), QU(17363484484522986742ULL), - QU( 4423497825896692887ULL), QU( 8155489510809067471ULL), - QU( 258966605622576285ULL), QU( 5462958075742020534ULL), - QU( 6763710214913276228ULL), QU( 2368935183451109054ULL), - QU(14209506165246453811ULL), QU( 2646257040978514881ULL), - QU( 3776001911922207672ULL), QU( 1419304601390147631ULL), - QU(14987366598022458284ULL), QU( 3977770701065815721ULL), - QU( 730820417451838898ULL), QU( 3982991703612885327ULL), - QU( 2803544519671388477ULL), QU(17067667221114424649ULL), - QU( 2922555119737867166ULL), QU( 1989477584121460932ULL), - QU(15020387605892337354ULL), QU( 9293277796427533547ULL), - QU(10722181424063557247ULL), QU(16704542332047511651ULL), - QU( 5008286236142089514ULL), QU(16174732308747382540ULL), - QU(17597019485798338402ULL), QU(13081745199110622093ULL), - QU( 8850305883842258115ULL), QU(12723629125624589005ULL), - QU( 8140566453402805978ULL), QU(15356684607680935061ULL), - QU(14222190387342648650ULL), QU(11134610460665975178ULL), - QU( 1259799058620984266ULL), QU(13281656268025610041ULL), - QU( 298262561068153992ULL), QU(12277871700239212922ULL), - QU(13911297774719779438ULL), QU(16556727962761474934ULL), - QU(17903010316654728010ULL), QU( 9682617699648434744ULL), - QU(14757681836838592850ULL), QU( 1327242446558524473ULL), - QU(11126645098780572792ULL), QU( 1883602329313221774ULL), - QU( 2543897783922776873ULL), QU(15029168513767772842ULL), - QU(12710270651039129878ULL), QU(16118202956069604504ULL), - QU(15010759372168680524ULL), QU( 2296827082251923948ULL), - QU(10793729742623518101ULL), QU(13829764151845413046ULL), - QU(17769301223184451213ULL), QU( 3118268169210783372ULL), - QU(17626204544105123127ULL), QU( 7416718488974352644ULL), - QU(10450751996212925994ULL), QU( 9352529519128770586ULL), - QU( 259347569641110140ULL), QU( 8048588892269692697ULL), - QU( 1774414152306494058ULL), QU(10669548347214355622ULL), - QU(13061992253816795081ULL), QU(18432677803063861659ULL), - QU( 8879191055593984333ULL), QU(12433753195199268041ULL), - QU(14919392415439730602ULL), QU( 6612848378595332963ULL), - QU( 6320986812036143628ULL), QU(10465592420226092859ULL), - QU( 4196009278962570808ULL), QU( 3747816564473572224ULL), - QU(17941203486133732898ULL), QU( 2350310037040505198ULL), - QU( 5811779859134370113ULL), QU(10492109599506195126ULL), - QU( 7699650690179541274ULL), QU( 1954338494306022961ULL), - QU(14095816969027231152ULL), QU( 5841346919964852061ULL), - QU(14945969510148214735ULL), QU( 3680200305887550992ULL), - QU( 6218047466131695792ULL), QU( 8242165745175775096ULL), - QU(11021371934053307357ULL), QU( 1265099502753169797ULL), - QU( 4644347436111321718ULL), QU( 3609296916782832859ULL), - QU( 8109807992218521571ULL), QU(18387884215648662020ULL), - QU(14656324896296392902ULL), QU(17386819091238216751ULL), - QU(17788300878582317152ULL), QU( 7919446259742399591ULL), - QU( 4466613134576358004ULL), QU(12928181023667938509ULL), - QU(13147446154454932030ULL), QU(16552129038252734620ULL), - QU( 8395299403738822450ULL), QU(11313817655275361164ULL), - QU( 434258809499511718ULL), QU( 2074882104954788676ULL), - QU( 7929892178759395518ULL), QU( 9006461629105745388ULL), - QU( 5176475650000323086ULL), QU(11128357033468341069ULL), - QU(12026158851559118955ULL), QU(14699716249471156500ULL), - QU( 448982497120206757ULL), QU( 4156475356685519900ULL), - QU( 6063816103417215727ULL), QU(10073289387954971479ULL), - QU( 8174466846138590962ULL), QU( 2675777452363449006ULL), - QU( 9090685420572474281ULL), QU( 6659652652765562060ULL), - QU(12923120304018106621ULL), QU(11117480560334526775ULL), - QU( 937910473424587511ULL), QU( 1838692113502346645ULL), - QU(11133914074648726180ULL), QU( 7922600945143884053ULL), - QU(13435287702700959550ULL), QU( 5287964921251123332ULL), - QU(11354875374575318947ULL), QU(17955724760748238133ULL), - QU(13728617396297106512ULL), QU( 4107449660118101255ULL), - QU( 1210269794886589623ULL), QU(11408687205733456282ULL), - QU( 4538354710392677887ULL), QU(13566803319341319267ULL), - QU(17870798107734050771ULL), QU( 3354318982568089135ULL), - QU( 9034450839405133651ULL), QU(13087431795753424314ULL), - QU( 950333102820688239ULL), QU( 1968360654535604116ULL), - QU(16840551645563314995ULL), QU( 8867501803892924995ULL), - QU(11395388644490626845ULL), QU( 1529815836300732204ULL), - QU(13330848522996608842ULL), QU( 1813432878817504265ULL), - QU( 2336867432693429560ULL), QU(15192805445973385902ULL), - QU( 2528593071076407877ULL), QU( 128459777936689248ULL), - QU( 9976345382867214866ULL), QU( 6208885766767996043ULL), - QU(14982349522273141706ULL), QU( 3099654362410737822ULL), - QU(13776700761947297661ULL), QU( 8806185470684925550ULL), - QU( 8151717890410585321ULL), QU( 640860591588072925ULL), - QU(14592096303937307465ULL), QU( 9056472419613564846ULL), - QU(14861544647742266352ULL), QU(12703771500398470216ULL), - QU( 3142372800384138465ULL), QU( 6201105606917248196ULL), - QU(18337516409359270184ULL), QU(15042268695665115339ULL), - QU(15188246541383283846ULL), QU(12800028693090114519ULL), - QU( 5992859621101493472ULL), QU(18278043971816803521ULL), - QU( 9002773075219424560ULL), QU( 7325707116943598353ULL), - QU( 7930571931248040822ULL), QU( 5645275869617023448ULL), - QU( 7266107455295958487ULL), QU( 4363664528273524411ULL), - QU(14313875763787479809ULL), QU(17059695613553486802ULL), - QU( 9247761425889940932ULL), QU(13704726459237593128ULL), - QU( 2701312427328909832ULL), QU(17235532008287243115ULL), - QU(14093147761491729538ULL), QU( 6247352273768386516ULL), - QU( 8268710048153268415ULL), QU( 7985295214477182083ULL), - QU(15624495190888896807ULL), QU( 3772753430045262788ULL), - QU( 9133991620474991698ULL), QU( 5665791943316256028ULL), - QU( 7551996832462193473ULL), QU(13163729206798953877ULL), - QU( 9263532074153846374ULL), QU( 1015460703698618353ULL), - QU(17929874696989519390ULL), QU(18257884721466153847ULL), - QU(16271867543011222991ULL), QU( 3905971519021791941ULL), - QU(16814488397137052085ULL), QU( 1321197685504621613ULL), - QU( 2870359191894002181ULL), QU(14317282970323395450ULL), - QU(13663920845511074366ULL), QU( 2052463995796539594ULL), - QU(14126345686431444337ULL), QU( 1727572121947022534ULL), - QU(17793552254485594241ULL), QU( 6738857418849205750ULL), - QU( 1282987123157442952ULL), QU(16655480021581159251ULL), - QU( 6784587032080183866ULL), QU(14726758805359965162ULL), - QU( 7577995933961987349ULL), QU(12539609320311114036ULL), - QU(10789773033385439494ULL), QU( 8517001497411158227ULL), - QU(10075543932136339710ULL), QU(14838152340938811081ULL), - QU( 9560840631794044194ULL), QU(17445736541454117475ULL), - QU(10633026464336393186ULL), QU(15705729708242246293ULL), - QU( 1117517596891411098ULL), QU( 4305657943415886942ULL), - QU( 4948856840533979263ULL), QU(16071681989041789593ULL), - QU(13723031429272486527ULL), QU( 7639567622306509462ULL), - QU(12670424537483090390ULL), QU( 9715223453097197134ULL), - QU( 5457173389992686394ULL), QU( 289857129276135145ULL), - QU(17048610270521972512ULL), QU( 692768013309835485ULL), - QU(14823232360546632057ULL), QU(18218002361317895936ULL), - QU( 3281724260212650204ULL), QU(16453957266549513795ULL), - QU( 8592711109774511881ULL), QU( 929825123473369579ULL), - QU(15966784769764367791ULL), QU( 9627344291450607588ULL), - QU(10849555504977813287ULL), QU( 9234566913936339275ULL), - QU( 6413807690366911210ULL), QU(10862389016184219267ULL), - QU(13842504799335374048ULL), QU( 1531994113376881174ULL), - QU( 2081314867544364459ULL), QU(16430628791616959932ULL), - QU( 8314714038654394368ULL), QU( 9155473892098431813ULL), - QU(12577843786670475704ULL), QU( 4399161106452401017ULL), - QU( 1668083091682623186ULL), QU( 1741383777203714216ULL), - QU( 2162597285417794374ULL), QU(15841980159165218736ULL), - QU( 1971354603551467079ULL), QU( 1206714764913205968ULL), - QU( 4790860439591272330ULL), QU(14699375615594055799ULL), - QU( 8374423871657449988ULL), QU(10950685736472937738ULL), - QU( 697344331343267176ULL), QU(10084998763118059810ULL), - QU(12897369539795983124ULL), QU(12351260292144383605ULL), - QU( 1268810970176811234ULL), QU( 7406287800414582768ULL), - QU( 516169557043807831ULL), QU( 5077568278710520380ULL), - QU( 3828791738309039304ULL), QU( 7721974069946943610ULL), - QU( 3534670260981096460ULL), QU( 4865792189600584891ULL), - QU(16892578493734337298ULL), QU( 9161499464278042590ULL), - QU(11976149624067055931ULL), QU(13219479887277343990ULL), - QU(14161556738111500680ULL), QU(14670715255011223056ULL), - QU( 4671205678403576558ULL), QU(12633022931454259781ULL), - QU(14821376219869187646ULL), QU( 751181776484317028ULL), - QU( 2192211308839047070ULL), QU(11787306362361245189ULL), - QU(10672375120744095707ULL), QU( 4601972328345244467ULL), - QU(15457217788831125879ULL), QU( 8464345256775460809ULL), - QU(10191938789487159478ULL), QU( 6184348739615197613ULL), - QU(11425436778806882100ULL), QU( 2739227089124319793ULL), - QU( 461464518456000551ULL), QU( 4689850170029177442ULL), - QU( 6120307814374078625ULL), QU(11153579230681708671ULL), - QU( 7891721473905347926ULL), QU(10281646937824872400ULL), - QU( 3026099648191332248ULL), QU( 8666750296953273818ULL), - QU(14978499698844363232ULL), QU(13303395102890132065ULL), - QU( 8182358205292864080ULL), QU(10560547713972971291ULL), - QU(11981635489418959093ULL), QU( 3134621354935288409ULL), - QU(11580681977404383968ULL), QU(14205530317404088650ULL), - QU( 5997789011854923157ULL), QU(13659151593432238041ULL), - QU(11664332114338865086ULL), QU( 7490351383220929386ULL), - QU( 7189290499881530378ULL), QU(15039262734271020220ULL), - QU( 2057217285976980055ULL), QU( 555570804905355739ULL), - QU(11235311968348555110ULL), QU(13824557146269603217ULL), - QU(16906788840653099693ULL), QU( 7222878245455661677ULL), - QU( 5245139444332423756ULL), QU( 4723748462805674292ULL), - QU(12216509815698568612ULL), QU(17402362976648951187ULL), - QU(17389614836810366768ULL), QU( 4880936484146667711ULL), - QU( 9085007839292639880ULL), QU(13837353458498535449ULL), - QU(11914419854360366677ULL), QU(16595890135313864103ULL), - QU( 6313969847197627222ULL), QU(18296909792163910431ULL), - QU(10041780113382084042ULL), QU( 2499478551172884794ULL), - QU(11057894246241189489ULL), QU( 9742243032389068555ULL), - QU(12838934582673196228ULL), QU(13437023235248490367ULL), - QU(13372420669446163240ULL), QU( 6752564244716909224ULL), - QU( 7157333073400313737ULL), QU(12230281516370654308ULL), - QU( 1182884552219419117ULL), QU( 2955125381312499218ULL), - QU(10308827097079443249ULL), QU( 1337648572986534958ULL), - QU(16378788590020343939ULL), QU( 108619126514420935ULL), - QU( 3990981009621629188ULL), QU( 5460953070230946410ULL), - QU( 9703328329366531883ULL), QU(13166631489188077236ULL), - QU( 1104768831213675170ULL), QU( 3447930458553877908ULL), - QU( 8067172487769945676ULL), QU( 5445802098190775347ULL), - QU( 3244840981648973873ULL), QU(17314668322981950060ULL), - QU( 5006812527827763807ULL), QU(18158695070225526260ULL), - QU( 2824536478852417853ULL), QU(13974775809127519886ULL), - QU( 9814362769074067392ULL), QU(17276205156374862128ULL), - QU(11361680725379306967ULL), QU( 3422581970382012542ULL), - QU(11003189603753241266ULL), QU(11194292945277862261ULL), - QU( 6839623313908521348ULL), QU(11935326462707324634ULL), - QU( 1611456788685878444ULL), QU(13112620989475558907ULL), - QU( 517659108904450427ULL), QU(13558114318574407624ULL), - QU(15699089742731633077ULL), QU( 4988979278862685458ULL), - QU( 8111373583056521297ULL), QU( 3891258746615399627ULL), - QU( 8137298251469718086ULL), QU(12748663295624701649ULL), - QU( 4389835683495292062ULL), QU( 5775217872128831729ULL), - QU( 9462091896405534927ULL), QU( 8498124108820263989ULL), - QU( 8059131278842839525ULL), QU(10503167994254090892ULL), - QU(11613153541070396656ULL), QU(18069248738504647790ULL), - QU( 570657419109768508ULL), QU( 3950574167771159665ULL), - QU( 5514655599604313077ULL), QU( 2908460854428484165ULL), - QU(10777722615935663114ULL), QU(12007363304839279486ULL), - QU( 9800646187569484767ULL), QU( 8795423564889864287ULL), - QU(14257396680131028419ULL), QU( 6405465117315096498ULL), - QU( 7939411072208774878ULL), QU(17577572378528990006ULL), - QU(14785873806715994850ULL), QU(16770572680854747390ULL), - QU(18127549474419396481ULL), QU(11637013449455757750ULL), - QU(14371851933996761086ULL), QU( 3601181063650110280ULL), - QU( 4126442845019316144ULL), QU(10198287239244320669ULL), - QU(18000169628555379659ULL), QU(18392482400739978269ULL), - QU( 6219919037686919957ULL), QU( 3610085377719446052ULL), - QU( 2513925039981776336ULL), QU(16679413537926716955ULL), - QU(12903302131714909434ULL), QU( 5581145789762985009ULL), - QU(12325955044293303233ULL), QU(17216111180742141204ULL), - QU( 6321919595276545740ULL), QU( 3507521147216174501ULL), - QU( 9659194593319481840ULL), QU(11473976005975358326ULL), - QU(14742730101435987026ULL), QU( 492845897709954780ULL), - QU(16976371186162599676ULL), QU(17712703422837648655ULL), - QU( 9881254778587061697ULL), QU( 8413223156302299551ULL), - QU( 1563841828254089168ULL), QU( 9996032758786671975ULL), - QU( 138877700583772667ULL), QU(13003043368574995989ULL), - QU( 4390573668650456587ULL), QU( 8610287390568126755ULL), - QU(15126904974266642199ULL), QU( 6703637238986057662ULL), - QU( 2873075592956810157ULL), QU( 6035080933946049418ULL), - QU(13382846581202353014ULL), QU( 7303971031814642463ULL), - QU(18418024405307444267ULL), QU( 5847096731675404647ULL), - QU( 4035880699639842500ULL), QU(11525348625112218478ULL), - QU( 3041162365459574102ULL), QU( 2604734487727986558ULL), - QU(15526341771636983145ULL), QU(14556052310697370254ULL), - QU(12997787077930808155ULL), QU( 9601806501755554499ULL), - QU(11349677952521423389ULL), QU(14956777807644899350ULL), - QU(16559736957742852721ULL), QU(12360828274778140726ULL), - QU( 6685373272009662513ULL), QU(16932258748055324130ULL), - QU(15918051131954158508ULL), QU( 1692312913140790144ULL), - QU( 546653826801637367ULL), QU( 5341587076045986652ULL), - QU(14975057236342585662ULL), QU(12374976357340622412ULL), - QU(10328833995181940552ULL), QU(12831807101710443149ULL), - QU(10548514914382545716ULL), QU( 2217806727199715993ULL), - QU(12627067369242845138ULL), QU( 4598965364035438158ULL), - QU( 150923352751318171ULL), QU(14274109544442257283ULL), - QU( 4696661475093863031ULL), QU( 1505764114384654516ULL), - QU(10699185831891495147ULL), QU( 2392353847713620519ULL), - QU( 3652870166711788383ULL), QU( 8640653276221911108ULL), - QU( 3894077592275889704ULL), QU( 4918592872135964845ULL), - QU(16379121273281400789ULL), QU(12058465483591683656ULL), - QU(11250106829302924945ULL), QU( 1147537556296983005ULL), - QU( 6376342756004613268ULL), QU(14967128191709280506ULL), - QU(18007449949790627628ULL), QU( 9497178279316537841ULL), - QU( 7920174844809394893ULL), QU(10037752595255719907ULL), - QU(15875342784985217697ULL), QU(15311615921712850696ULL), - QU( 9552902652110992950ULL), QU(14054979450099721140ULL), - QU( 5998709773566417349ULL), QU(18027910339276320187ULL), - QU( 8223099053868585554ULL), QU( 7842270354824999767ULL), - QU( 4896315688770080292ULL), QU(12969320296569787895ULL), - QU( 2674321489185759961ULL), QU( 4053615936864718439ULL), - QU(11349775270588617578ULL), QU( 4743019256284553975ULL), - QU( 5602100217469723769ULL), QU(14398995691411527813ULL), - QU( 7412170493796825470ULL), QU( 836262406131744846ULL), - QU( 8231086633845153022ULL), QU( 5161377920438552287ULL), - QU( 8828731196169924949ULL), QU(16211142246465502680ULL), - QU( 3307990879253687818ULL), QU( 5193405406899782022ULL), - QU( 8510842117467566693ULL), QU( 6070955181022405365ULL), - QU(14482950231361409799ULL), QU(12585159371331138077ULL), - QU( 3511537678933588148ULL), QU( 2041849474531116417ULL), - QU(10944936685095345792ULL), QU(18303116923079107729ULL), - QU( 2720566371239725320ULL), QU( 4958672473562397622ULL), - QU( 3032326668253243412ULL), QU(13689418691726908338ULL), - QU( 1895205511728843996ULL), QU( 8146303515271990527ULL), - QU(16507343500056113480ULL), QU( 473996939105902919ULL), - QU( 9897686885246881481ULL), QU(14606433762712790575ULL), - QU( 6732796251605566368ULL), QU( 1399778120855368916ULL), - QU( 935023885182833777ULL), QU(16066282816186753477ULL), - QU( 7291270991820612055ULL), QU(17530230393129853844ULL), - QU(10223493623477451366ULL), QU(15841725630495676683ULL), - QU(17379567246435515824ULL), QU( 8588251429375561971ULL), - QU(18339511210887206423ULL), QU(17349587430725976100ULL), - QU(12244876521394838088ULL), QU( 6382187714147161259ULL), - QU(12335807181848950831ULL), QU(16948885622305460665ULL), - QU(13755097796371520506ULL), QU(14806740373324947801ULL), - QU( 4828699633859287703ULL), QU( 8209879281452301604ULL), - QU(12435716669553736437ULL), QU(13970976859588452131ULL), - QU( 6233960842566773148ULL), QU(12507096267900505759ULL), - QU( 1198713114381279421ULL), QU(14989862731124149015ULL), - QU(15932189508707978949ULL), QU( 2526406641432708722ULL), - QU( 29187427817271982ULL), QU( 1499802773054556353ULL), - QU(10816638187021897173ULL), QU( 5436139270839738132ULL), - QU( 6659882287036010082ULL), QU( 2154048955317173697ULL), - QU(10887317019333757642ULL), QU(16281091802634424955ULL), - QU(10754549879915384901ULL), QU(10760611745769249815ULL), - QU( 2161505946972504002ULL), QU( 5243132808986265107ULL), - QU(10129852179873415416ULL), QU( 710339480008649081ULL), - QU( 7802129453068808528ULL), QU(17967213567178907213ULL), - QU(15730859124668605599ULL), QU(13058356168962376502ULL), - QU( 3701224985413645909ULL), QU(14464065869149109264ULL), - QU( 9959272418844311646ULL), QU(10157426099515958752ULL), - QU(14013736814538268528ULL), QU(17797456992065653951ULL), - QU(17418878140257344806ULL), QU(15457429073540561521ULL), - QU( 2184426881360949378ULL), QU( 2062193041154712416ULL), - QU( 8553463347406931661ULL), QU( 4913057625202871854ULL), - QU( 2668943682126618425ULL), QU(17064444737891172288ULL), - QU( 4997115903913298637ULL), QU(12019402608892327416ULL), - QU(17603584559765897352ULL), QU(11367529582073647975ULL), - QU( 8211476043518436050ULL), QU( 8676849804070323674ULL), - QU(18431829230394475730ULL), QU(10490177861361247904ULL), - QU( 9508720602025651349ULL), QU( 7409627448555722700ULL), - QU( 5804047018862729008ULL), QU(11943858176893142594ULL), - QU(11908095418933847092ULL), QU( 5415449345715887652ULL), - QU( 1554022699166156407ULL), QU( 9073322106406017161ULL), - QU( 7080630967969047082ULL), QU(18049736940860732943ULL), - QU(12748714242594196794ULL), QU( 1226992415735156741ULL), - QU(17900981019609531193ULL), QU(11720739744008710999ULL), - QU( 3006400683394775434ULL), QU(11347974011751996028ULL), - QU( 3316999628257954608ULL), QU( 8384484563557639101ULL), - QU(18117794685961729767ULL), QU( 1900145025596618194ULL), - QU(17459527840632892676ULL), QU( 5634784101865710994ULL), - QU( 7918619300292897158ULL), QU( 3146577625026301350ULL), - QU( 9955212856499068767ULL), QU( 1873995843681746975ULL), - QU( 1561487759967972194ULL), QU( 8322718804375878474ULL), - QU(11300284215327028366ULL), QU( 4667391032508998982ULL), - QU( 9820104494306625580ULL), QU(17922397968599970610ULL), - QU( 1784690461886786712ULL), QU(14940365084341346821ULL), - QU( 5348719575594186181ULL), QU(10720419084507855261ULL), - QU(14210394354145143274ULL), QU( 2426468692164000131ULL), - QU(16271062114607059202ULL), QU(14851904092357070247ULL), - QU( 6524493015693121897ULL), QU( 9825473835127138531ULL), - QU(14222500616268569578ULL), QU(15521484052007487468ULL), - QU(14462579404124614699ULL), QU(11012375590820665520ULL), - QU(11625327350536084927ULL), QU(14452017765243785417ULL), - QU( 9989342263518766305ULL), QU( 3640105471101803790ULL), - QU( 4749866455897513242ULL), QU(13963064946736312044ULL), - QU(10007416591973223791ULL), QU(18314132234717431115ULL), - QU( 3286596588617483450ULL), QU( 7726163455370818765ULL), - QU( 7575454721115379328ULL), QU( 5308331576437663422ULL), - QU(18288821894903530934ULL), QU( 8028405805410554106ULL), - QU(15744019832103296628ULL), QU( 149765559630932100ULL), - QU( 6137705557200071977ULL), QU(14513416315434803615ULL), - QU(11665702820128984473ULL), QU( 218926670505601386ULL), - QU( 6868675028717769519ULL), QU(15282016569441512302ULL), - QU( 5707000497782960236ULL), QU( 6671120586555079567ULL), - QU( 2194098052618985448ULL), QU(16849577895477330978ULL), - QU(12957148471017466283ULL), QU( 1997805535404859393ULL), - QU( 1180721060263860490ULL), QU(13206391310193756958ULL), - QU(12980208674461861797ULL), QU( 3825967775058875366ULL), - QU(17543433670782042631ULL), QU( 1518339070120322730ULL), - QU(16344584340890991669ULL), QU( 2611327165318529819ULL), - QU(11265022723283422529ULL), QU( 4001552800373196817ULL), - QU(14509595890079346161ULL), QU( 3528717165416234562ULL), - QU(18153222571501914072ULL), QU( 9387182977209744425ULL), - QU(10064342315985580021ULL), QU(11373678413215253977ULL), - QU( 2308457853228798099ULL), QU( 9729042942839545302ULL), - QU( 7833785471140127746ULL), QU( 6351049900319844436ULL), - QU(14454610627133496067ULL), QU(12533175683634819111ULL), - QU(15570163926716513029ULL), QU(13356980519185762498ULL) + KQU( 2100341266307895239), KQU( 8344256300489757943), + KQU(15687933285484243894), KQU( 8268620370277076319), + KQU(12371852309826545459), KQU( 8800491541730110238), + KQU(18113268950100835773), KQU( 2886823658884438119), + KQU( 3293667307248180724), KQU( 9307928143300172731), + KQU( 7688082017574293629), KQU( 900986224735166665), + KQU( 9977972710722265039), KQU( 6008205004994830552), + KQU( 546909104521689292), KQU( 7428471521869107594), + KQU(14777563419314721179), KQU(16116143076567350053), + KQU( 5322685342003142329), KQU( 4200427048445863473), + KQU( 4693092150132559146), KQU(13671425863759338582), + KQU( 6747117460737639916), KQU( 4732666080236551150), + KQU( 5912839950611941263), KQU( 3903717554504704909), + KQU( 2615667650256786818), KQU(10844129913887006352), + KQU(13786467861810997820), KQU(14267853002994021570), + KQU(13767807302847237439), KQU(16407963253707224617), + KQU( 4802498363698583497), KQU( 2523802839317209764), + KQU( 3822579397797475589), KQU( 8950320572212130610), + KQU( 3745623504978342534), KQU(16092609066068482806), + KQU( 9817016950274642398), KQU(10591660660323829098), + KQU(11751606650792815920), KQU( 5122873818577122211), + KQU(17209553764913936624), KQU( 6249057709284380343), + KQU(15088791264695071830), KQU(15344673071709851930), + KQU( 4345751415293646084), KQU( 2542865750703067928), + KQU(13520525127852368784), KQU(18294188662880997241), + KQU( 3871781938044881523), KQU( 2873487268122812184), + KQU(15099676759482679005), KQU(15442599127239350490), + KQU( 6311893274367710888), KQU( 3286118760484672933), + KQU( 4146067961333542189), KQU(13303942567897208770), + KQU( 8196013722255630418), KQU( 4437815439340979989), + KQU(15433791533450605135), KQU( 4254828956815687049), + KQU( 1310903207708286015), KQU(10529182764462398549), + KQU(14900231311660638810), KQU( 9727017277104609793), + KQU( 1821308310948199033), KQU(11628861435066772084), + KQU( 9469019138491546924), KQU( 3145812670532604988), + KQU( 9938468915045491919), KQU( 1562447430672662142), + KQU(13963995266697989134), KQU( 3356884357625028695), + KQU( 4499850304584309747), KQU( 8456825817023658122), + KQU(10859039922814285279), KQU( 8099512337972526555), + KQU( 348006375109672149), KQU(11919893998241688603), + KQU( 1104199577402948826), KQU(16689191854356060289), + KQU(10992552041730168078), KQU( 7243733172705465836), + KQU( 5668075606180319560), KQU(18182847037333286970), + KQU( 4290215357664631322), KQU( 4061414220791828613), + KQU(13006291061652989604), KQU( 7140491178917128798), + KQU(12703446217663283481), KQU( 5500220597564558267), + KQU(10330551509971296358), KQU(15958554768648714492), + KQU( 5174555954515360045), KQU( 1731318837687577735), + KQU( 3557700801048354857), KQU(13764012341928616198), + KQU(13115166194379119043), KQU( 7989321021560255519), + KQU( 2103584280905877040), KQU( 9230788662155228488), + KQU(16396629323325547654), KQU( 657926409811318051), + KQU(15046700264391400727), KQU( 5120132858771880830), + KQU( 7934160097989028561), KQU( 6963121488531976245), + KQU(17412329602621742089), KQU(15144843053931774092), + KQU(17204176651763054532), KQU(13166595387554065870), + KQU( 8590377810513960213), KQU( 5834365135373991938), + KQU( 7640913007182226243), KQU( 3479394703859418425), + KQU(16402784452644521040), KQU( 4993979809687083980), + KQU(13254522168097688865), KQU(15643659095244365219), + KQU( 5881437660538424982), KQU(11174892200618987379), + KQU( 254409966159711077), KQU(17158413043140549909), + KQU( 3638048789290376272), KQU( 1376816930299489190), + KQU( 4622462095217761923), KQU(15086407973010263515), + KQU(13253971772784692238), KQU( 5270549043541649236), + KQU(11182714186805411604), KQU(12283846437495577140), + KQU( 5297647149908953219), KQU(10047451738316836654), + KQU( 4938228100367874746), KQU(12328523025304077923), + KQU( 3601049438595312361), KQU( 9313624118352733770), + KQU(13322966086117661798), KQU(16660005705644029394), + KQU(11337677526988872373), KQU(13869299102574417795), + KQU(15642043183045645437), KQU( 3021755569085880019), + KQU( 4979741767761188161), KQU(13679979092079279587), + KQU( 3344685842861071743), KQU(13947960059899588104), + KQU( 305806934293368007), KQU( 5749173929201650029), + KQU(11123724852118844098), KQU(15128987688788879802), + KQU(15251651211024665009), KQU( 7689925933816577776), + KQU(16732804392695859449), KQU(17087345401014078468), + KQU(14315108589159048871), KQU( 4820700266619778917), + KQU(16709637539357958441), KQU( 4936227875177351374), + KQU( 2137907697912987247), KQU(11628565601408395420), + KQU( 2333250549241556786), KQU( 5711200379577778637), + KQU( 5170680131529031729), KQU(12620392043061335164), + KQU( 95363390101096078), KQU( 5487981914081709462), + KQU( 1763109823981838620), KQU( 3395861271473224396), + KQU( 1300496844282213595), KQU( 6894316212820232902), + KQU(10673859651135576674), KQU( 5911839658857903252), + KQU(17407110743387299102), KQU( 8257427154623140385), + KQU(11389003026741800267), KQU( 4070043211095013717), + KQU(11663806997145259025), KQU(15265598950648798210), + KQU( 630585789434030934), KQU( 3524446529213587334), + KQU( 7186424168495184211), KQU(10806585451386379021), + KQU(11120017753500499273), KQU( 1586837651387701301), + KQU(17530454400954415544), KQU( 9991670045077880430), + KQU( 7550997268990730180), KQU( 8640249196597379304), + KQU( 3522203892786893823), KQU(10401116549878854788), + KQU(13690285544733124852), KQU( 8295785675455774586), + KQU(15535716172155117603), KQU( 3112108583723722511), + KQU(17633179955339271113), KQU(18154208056063759375), + KQU( 1866409236285815666), KQU(13326075895396412882), + KQU( 8756261842948020025), KQU( 6281852999868439131), + KQU(15087653361275292858), KQU(10333923911152949397), + KQU( 5265567645757408500), KQU(12728041843210352184), + KQU( 6347959327507828759), KQU( 154112802625564758), + KQU(18235228308679780218), KQU( 3253805274673352418), + KQU( 4849171610689031197), KQU(17948529398340432518), + KQU(13803510475637409167), KQU(13506570190409883095), + KQU(15870801273282960805), KQU( 8451286481299170773), + KQU( 9562190620034457541), KQU( 8518905387449138364), + KQU(12681306401363385655), KQU( 3788073690559762558), + KQU( 5256820289573487769), KQU( 2752021372314875467), + KQU( 6354035166862520716), KQU( 4328956378309739069), + KQU( 449087441228269600), KQU( 5533508742653090868), + KQU( 1260389420404746988), KQU(18175394473289055097), + KQU( 1535467109660399420), KQU( 8818894282874061442), + KQU(12140873243824811213), KQU(15031386653823014946), + KQU( 1286028221456149232), KQU( 6329608889367858784), + KQU( 9419654354945132725), KQU( 6094576547061672379), + KQU(17706217251847450255), KQU( 1733495073065878126), + KQU(16918923754607552663), KQU( 8881949849954945044), + KQU(12938977706896313891), KQU(14043628638299793407), + KQU(18393874581723718233), KQU( 6886318534846892044), + KQU(14577870878038334081), KQU(13541558383439414119), + KQU(13570472158807588273), KQU(18300760537910283361), + KQU( 818368572800609205), KQU( 1417000585112573219), + KQU(12337533143867683655), KQU(12433180994702314480), + KQU( 778190005829189083), KQU(13667356216206524711), + KQU( 9866149895295225230), KQU(11043240490417111999), + KQU( 1123933826541378598), KQU( 6469631933605123610), + KQU(14508554074431980040), KQU(13918931242962026714), + KQU( 2870785929342348285), KQU(14786362626740736974), + KQU(13176680060902695786), KQU( 9591778613541679456), + KQU( 9097662885117436706), KQU( 749262234240924947), + KQU( 1944844067793307093), KQU( 4339214904577487742), + KQU( 8009584152961946551), KQU(16073159501225501777), + KQU( 3335870590499306217), KQU(17088312653151202847), + KQU( 3108893142681931848), KQU(16636841767202792021), + KQU(10423316431118400637), KQU( 8008357368674443506), + KQU(11340015231914677875), KQU(17687896501594936090), + KQU(15173627921763199958), KQU( 542569482243721959), + KQU(15071714982769812975), KQU( 4466624872151386956), + KQU( 1901780715602332461), KQU( 9822227742154351098), + KQU( 1479332892928648780), KQU( 6981611948382474400), + KQU( 7620824924456077376), KQU(14095973329429406782), + KQU( 7902744005696185404), KQU(15830577219375036920), + KQU(10287076667317764416), KQU(12334872764071724025), + KQU( 4419302088133544331), KQU(14455842851266090520), + KQU(12488077416504654222), KQU( 7953892017701886766), + KQU( 6331484925529519007), KQU( 4902145853785030022), + KQU(17010159216096443073), KQU(11945354668653886087), + KQU(15112022728645230829), KQU(17363484484522986742), + KQU( 4423497825896692887), KQU( 8155489510809067471), + KQU( 258966605622576285), KQU( 5462958075742020534), + KQU( 6763710214913276228), KQU( 2368935183451109054), + KQU(14209506165246453811), KQU( 2646257040978514881), + KQU( 3776001911922207672), KQU( 1419304601390147631), + KQU(14987366598022458284), KQU( 3977770701065815721), + KQU( 730820417451838898), KQU( 3982991703612885327), + KQU( 2803544519671388477), KQU(17067667221114424649), + KQU( 2922555119737867166), KQU( 1989477584121460932), + KQU(15020387605892337354), KQU( 9293277796427533547), + KQU(10722181424063557247), KQU(16704542332047511651), + KQU( 5008286236142089514), KQU(16174732308747382540), + KQU(17597019485798338402), KQU(13081745199110622093), + KQU( 8850305883842258115), KQU(12723629125624589005), + KQU( 8140566453402805978), KQU(15356684607680935061), + KQU(14222190387342648650), KQU(11134610460665975178), + KQU( 1259799058620984266), KQU(13281656268025610041), + KQU( 298262561068153992), KQU(12277871700239212922), + KQU(13911297774719779438), KQU(16556727962761474934), + KQU(17903010316654728010), KQU( 9682617699648434744), + KQU(14757681836838592850), KQU( 1327242446558524473), + KQU(11126645098780572792), KQU( 1883602329313221774), + KQU( 2543897783922776873), KQU(15029168513767772842), + KQU(12710270651039129878), KQU(16118202956069604504), + KQU(15010759372168680524), KQU( 2296827082251923948), + KQU(10793729742623518101), KQU(13829764151845413046), + KQU(17769301223184451213), KQU( 3118268169210783372), + KQU(17626204544105123127), KQU( 7416718488974352644), + KQU(10450751996212925994), KQU( 9352529519128770586), + KQU( 259347569641110140), KQU( 8048588892269692697), + KQU( 1774414152306494058), KQU(10669548347214355622), + KQU(13061992253816795081), KQU(18432677803063861659), + KQU( 8879191055593984333), KQU(12433753195199268041), + KQU(14919392415439730602), KQU( 6612848378595332963), + KQU( 6320986812036143628), KQU(10465592420226092859), + KQU( 4196009278962570808), KQU( 3747816564473572224), + KQU(17941203486133732898), KQU( 2350310037040505198), + KQU( 5811779859134370113), KQU(10492109599506195126), + KQU( 7699650690179541274), KQU( 1954338494306022961), + KQU(14095816969027231152), KQU( 5841346919964852061), + KQU(14945969510148214735), KQU( 3680200305887550992), + KQU( 6218047466131695792), KQU( 8242165745175775096), + KQU(11021371934053307357), KQU( 1265099502753169797), + KQU( 4644347436111321718), KQU( 3609296916782832859), + KQU( 8109807992218521571), KQU(18387884215648662020), + KQU(14656324896296392902), KQU(17386819091238216751), + KQU(17788300878582317152), KQU( 7919446259742399591), + KQU( 4466613134576358004), KQU(12928181023667938509), + KQU(13147446154454932030), KQU(16552129038252734620), + KQU( 8395299403738822450), KQU(11313817655275361164), + KQU( 434258809499511718), KQU( 2074882104954788676), + KQU( 7929892178759395518), KQU( 9006461629105745388), + KQU( 5176475650000323086), KQU(11128357033468341069), + KQU(12026158851559118955), KQU(14699716249471156500), + KQU( 448982497120206757), KQU( 4156475356685519900), + KQU( 6063816103417215727), KQU(10073289387954971479), + KQU( 8174466846138590962), KQU( 2675777452363449006), + KQU( 9090685420572474281), KQU( 6659652652765562060), + KQU(12923120304018106621), KQU(11117480560334526775), + KQU( 937910473424587511), KQU( 1838692113502346645), + KQU(11133914074648726180), KQU( 7922600945143884053), + KQU(13435287702700959550), KQU( 5287964921251123332), + KQU(11354875374575318947), KQU(17955724760748238133), + KQU(13728617396297106512), KQU( 4107449660118101255), + KQU( 1210269794886589623), KQU(11408687205733456282), + KQU( 4538354710392677887), KQU(13566803319341319267), + KQU(17870798107734050771), KQU( 3354318982568089135), + KQU( 9034450839405133651), KQU(13087431795753424314), + KQU( 950333102820688239), KQU( 1968360654535604116), + KQU(16840551645563314995), KQU( 8867501803892924995), + KQU(11395388644490626845), KQU( 1529815836300732204), + KQU(13330848522996608842), KQU( 1813432878817504265), + KQU( 2336867432693429560), KQU(15192805445973385902), + KQU( 2528593071076407877), KQU( 128459777936689248), + KQU( 9976345382867214866), KQU( 6208885766767996043), + KQU(14982349522273141706), KQU( 3099654362410737822), + KQU(13776700761947297661), KQU( 8806185470684925550), + KQU( 8151717890410585321), KQU( 640860591588072925), + KQU(14592096303937307465), KQU( 9056472419613564846), + KQU(14861544647742266352), KQU(12703771500398470216), + KQU( 3142372800384138465), KQU( 6201105606917248196), + KQU(18337516409359270184), KQU(15042268695665115339), + KQU(15188246541383283846), KQU(12800028693090114519), + KQU( 5992859621101493472), KQU(18278043971816803521), + KQU( 9002773075219424560), KQU( 7325707116943598353), + KQU( 7930571931248040822), KQU( 5645275869617023448), + KQU( 7266107455295958487), KQU( 4363664528273524411), + KQU(14313875763787479809), KQU(17059695613553486802), + KQU( 9247761425889940932), KQU(13704726459237593128), + KQU( 2701312427328909832), KQU(17235532008287243115), + KQU(14093147761491729538), KQU( 6247352273768386516), + KQU( 8268710048153268415), KQU( 7985295214477182083), + KQU(15624495190888896807), KQU( 3772753430045262788), + KQU( 9133991620474991698), KQU( 5665791943316256028), + KQU( 7551996832462193473), KQU(13163729206798953877), + KQU( 9263532074153846374), KQU( 1015460703698618353), + KQU(17929874696989519390), KQU(18257884721466153847), + KQU(16271867543011222991), KQU( 3905971519021791941), + KQU(16814488397137052085), KQU( 1321197685504621613), + KQU( 2870359191894002181), KQU(14317282970323395450), + KQU(13663920845511074366), KQU( 2052463995796539594), + KQU(14126345686431444337), KQU( 1727572121947022534), + KQU(17793552254485594241), KQU( 6738857418849205750), + KQU( 1282987123157442952), KQU(16655480021581159251), + KQU( 6784587032080183866), KQU(14726758805359965162), + KQU( 7577995933961987349), KQU(12539609320311114036), + KQU(10789773033385439494), KQU( 8517001497411158227), + KQU(10075543932136339710), KQU(14838152340938811081), + KQU( 9560840631794044194), KQU(17445736541454117475), + KQU(10633026464336393186), KQU(15705729708242246293), + KQU( 1117517596891411098), KQU( 4305657943415886942), + KQU( 4948856840533979263), KQU(16071681989041789593), + KQU(13723031429272486527), KQU( 7639567622306509462), + KQU(12670424537483090390), KQU( 9715223453097197134), + KQU( 5457173389992686394), KQU( 289857129276135145), + KQU(17048610270521972512), KQU( 692768013309835485), + KQU(14823232360546632057), KQU(18218002361317895936), + KQU( 3281724260212650204), KQU(16453957266549513795), + KQU( 8592711109774511881), KQU( 929825123473369579), + KQU(15966784769764367791), KQU( 9627344291450607588), + KQU(10849555504977813287), KQU( 9234566913936339275), + KQU( 6413807690366911210), KQU(10862389016184219267), + KQU(13842504799335374048), KQU( 1531994113376881174), + KQU( 2081314867544364459), KQU(16430628791616959932), + KQU( 8314714038654394368), KQU( 9155473892098431813), + KQU(12577843786670475704), KQU( 4399161106452401017), + KQU( 1668083091682623186), KQU( 1741383777203714216), + KQU( 2162597285417794374), KQU(15841980159165218736), + KQU( 1971354603551467079), KQU( 1206714764913205968), + KQU( 4790860439591272330), KQU(14699375615594055799), + KQU( 8374423871657449988), KQU(10950685736472937738), + KQU( 697344331343267176), KQU(10084998763118059810), + KQU(12897369539795983124), KQU(12351260292144383605), + KQU( 1268810970176811234), KQU( 7406287800414582768), + KQU( 516169557043807831), KQU( 5077568278710520380), + KQU( 3828791738309039304), KQU( 7721974069946943610), + KQU( 3534670260981096460), KQU( 4865792189600584891), + KQU(16892578493734337298), KQU( 9161499464278042590), + KQU(11976149624067055931), KQU(13219479887277343990), + KQU(14161556738111500680), KQU(14670715255011223056), + KQU( 4671205678403576558), KQU(12633022931454259781), + KQU(14821376219869187646), KQU( 751181776484317028), + KQU( 2192211308839047070), KQU(11787306362361245189), + KQU(10672375120744095707), KQU( 4601972328345244467), + KQU(15457217788831125879), KQU( 8464345256775460809), + KQU(10191938789487159478), KQU( 6184348739615197613), + KQU(11425436778806882100), KQU( 2739227089124319793), + KQU( 461464518456000551), KQU( 4689850170029177442), + KQU( 6120307814374078625), KQU(11153579230681708671), + KQU( 7891721473905347926), KQU(10281646937824872400), + KQU( 3026099648191332248), KQU( 8666750296953273818), + KQU(14978499698844363232), KQU(13303395102890132065), + KQU( 8182358205292864080), KQU(10560547713972971291), + KQU(11981635489418959093), KQU( 3134621354935288409), + KQU(11580681977404383968), KQU(14205530317404088650), + KQU( 5997789011854923157), KQU(13659151593432238041), + KQU(11664332114338865086), KQU( 7490351383220929386), + KQU( 7189290499881530378), KQU(15039262734271020220), + KQU( 2057217285976980055), KQU( 555570804905355739), + KQU(11235311968348555110), KQU(13824557146269603217), + KQU(16906788840653099693), KQU( 7222878245455661677), + KQU( 5245139444332423756), KQU( 4723748462805674292), + KQU(12216509815698568612), KQU(17402362976648951187), + KQU(17389614836810366768), KQU( 4880936484146667711), + KQU( 9085007839292639880), KQU(13837353458498535449), + KQU(11914419854360366677), KQU(16595890135313864103), + KQU( 6313969847197627222), KQU(18296909792163910431), + KQU(10041780113382084042), KQU( 2499478551172884794), + KQU(11057894246241189489), KQU( 9742243032389068555), + KQU(12838934582673196228), KQU(13437023235248490367), + KQU(13372420669446163240), KQU( 6752564244716909224), + KQU( 7157333073400313737), KQU(12230281516370654308), + KQU( 1182884552219419117), KQU( 2955125381312499218), + KQU(10308827097079443249), KQU( 1337648572986534958), + KQU(16378788590020343939), KQU( 108619126514420935), + KQU( 3990981009621629188), KQU( 5460953070230946410), + KQU( 9703328329366531883), KQU(13166631489188077236), + KQU( 1104768831213675170), KQU( 3447930458553877908), + KQU( 8067172487769945676), KQU( 5445802098190775347), + KQU( 3244840981648973873), KQU(17314668322981950060), + KQU( 5006812527827763807), KQU(18158695070225526260), + KQU( 2824536478852417853), KQU(13974775809127519886), + KQU( 9814362769074067392), KQU(17276205156374862128), + KQU(11361680725379306967), KQU( 3422581970382012542), + KQU(11003189603753241266), KQU(11194292945277862261), + KQU( 6839623313908521348), KQU(11935326462707324634), + KQU( 1611456788685878444), KQU(13112620989475558907), + KQU( 517659108904450427), KQU(13558114318574407624), + KQU(15699089742731633077), KQU( 4988979278862685458), + KQU( 8111373583056521297), KQU( 3891258746615399627), + KQU( 8137298251469718086), KQU(12748663295624701649), + KQU( 4389835683495292062), KQU( 5775217872128831729), + KQU( 9462091896405534927), KQU( 8498124108820263989), + KQU( 8059131278842839525), KQU(10503167994254090892), + KQU(11613153541070396656), KQU(18069248738504647790), + KQU( 570657419109768508), KQU( 3950574167771159665), + KQU( 5514655599604313077), KQU( 2908460854428484165), + KQU(10777722615935663114), KQU(12007363304839279486), + KQU( 9800646187569484767), KQU( 8795423564889864287), + KQU(14257396680131028419), KQU( 6405465117315096498), + KQU( 7939411072208774878), KQU(17577572378528990006), + KQU(14785873806715994850), KQU(16770572680854747390), + KQU(18127549474419396481), KQU(11637013449455757750), + KQU(14371851933996761086), KQU( 3601181063650110280), + KQU( 4126442845019316144), KQU(10198287239244320669), + KQU(18000169628555379659), KQU(18392482400739978269), + KQU( 6219919037686919957), KQU( 3610085377719446052), + KQU( 2513925039981776336), KQU(16679413537926716955), + KQU(12903302131714909434), KQU( 5581145789762985009), + KQU(12325955044293303233), KQU(17216111180742141204), + KQU( 6321919595276545740), KQU( 3507521147216174501), + KQU( 9659194593319481840), KQU(11473976005975358326), + KQU(14742730101435987026), KQU( 492845897709954780), + KQU(16976371186162599676), KQU(17712703422837648655), + KQU( 9881254778587061697), KQU( 8413223156302299551), + KQU( 1563841828254089168), KQU( 9996032758786671975), + KQU( 138877700583772667), KQU(13003043368574995989), + KQU( 4390573668650456587), KQU( 8610287390568126755), + KQU(15126904974266642199), KQU( 6703637238986057662), + KQU( 2873075592956810157), KQU( 6035080933946049418), + KQU(13382846581202353014), KQU( 7303971031814642463), + KQU(18418024405307444267), KQU( 5847096731675404647), + KQU( 4035880699639842500), KQU(11525348625112218478), + KQU( 3041162365459574102), KQU( 2604734487727986558), + KQU(15526341771636983145), KQU(14556052310697370254), + KQU(12997787077930808155), KQU( 9601806501755554499), + KQU(11349677952521423389), KQU(14956777807644899350), + KQU(16559736957742852721), KQU(12360828274778140726), + KQU( 6685373272009662513), KQU(16932258748055324130), + KQU(15918051131954158508), KQU( 1692312913140790144), + KQU( 546653826801637367), KQU( 5341587076045986652), + KQU(14975057236342585662), KQU(12374976357340622412), + KQU(10328833995181940552), KQU(12831807101710443149), + KQU(10548514914382545716), KQU( 2217806727199715993), + KQU(12627067369242845138), KQU( 4598965364035438158), + KQU( 150923352751318171), KQU(14274109544442257283), + KQU( 4696661475093863031), KQU( 1505764114384654516), + KQU(10699185831891495147), KQU( 2392353847713620519), + KQU( 3652870166711788383), KQU( 8640653276221911108), + KQU( 3894077592275889704), KQU( 4918592872135964845), + KQU(16379121273281400789), KQU(12058465483591683656), + KQU(11250106829302924945), KQU( 1147537556296983005), + KQU( 6376342756004613268), KQU(14967128191709280506), + KQU(18007449949790627628), KQU( 9497178279316537841), + KQU( 7920174844809394893), KQU(10037752595255719907), + KQU(15875342784985217697), KQU(15311615921712850696), + KQU( 9552902652110992950), KQU(14054979450099721140), + KQU( 5998709773566417349), KQU(18027910339276320187), + KQU( 8223099053868585554), KQU( 7842270354824999767), + KQU( 4896315688770080292), KQU(12969320296569787895), + KQU( 2674321489185759961), KQU( 4053615936864718439), + KQU(11349775270588617578), KQU( 4743019256284553975), + KQU( 5602100217469723769), KQU(14398995691411527813), + KQU( 7412170493796825470), KQU( 836262406131744846), + KQU( 8231086633845153022), KQU( 5161377920438552287), + KQU( 8828731196169924949), KQU(16211142246465502680), + KQU( 3307990879253687818), KQU( 5193405406899782022), + KQU( 8510842117467566693), KQU( 6070955181022405365), + KQU(14482950231361409799), KQU(12585159371331138077), + KQU( 3511537678933588148), KQU( 2041849474531116417), + KQU(10944936685095345792), KQU(18303116923079107729), + KQU( 2720566371239725320), KQU( 4958672473562397622), + KQU( 3032326668253243412), KQU(13689418691726908338), + KQU( 1895205511728843996), KQU( 8146303515271990527), + KQU(16507343500056113480), KQU( 473996939105902919), + KQU( 9897686885246881481), KQU(14606433762712790575), + KQU( 6732796251605566368), KQU( 1399778120855368916), + KQU( 935023885182833777), KQU(16066282816186753477), + KQU( 7291270991820612055), KQU(17530230393129853844), + KQU(10223493623477451366), KQU(15841725630495676683), + KQU(17379567246435515824), KQU( 8588251429375561971), + KQU(18339511210887206423), KQU(17349587430725976100), + KQU(12244876521394838088), KQU( 6382187714147161259), + KQU(12335807181848950831), KQU(16948885622305460665), + KQU(13755097796371520506), KQU(14806740373324947801), + KQU( 4828699633859287703), KQU( 8209879281452301604), + KQU(12435716669553736437), KQU(13970976859588452131), + KQU( 6233960842566773148), KQU(12507096267900505759), + KQU( 1198713114381279421), KQU(14989862731124149015), + KQU(15932189508707978949), KQU( 2526406641432708722), + KQU( 29187427817271982), KQU( 1499802773054556353), + KQU(10816638187021897173), KQU( 5436139270839738132), + KQU( 6659882287036010082), KQU( 2154048955317173697), + KQU(10887317019333757642), KQU(16281091802634424955), + KQU(10754549879915384901), KQU(10760611745769249815), + KQU( 2161505946972504002), KQU( 5243132808986265107), + KQU(10129852179873415416), KQU( 710339480008649081), + KQU( 7802129453068808528), KQU(17967213567178907213), + KQU(15730859124668605599), KQU(13058356168962376502), + KQU( 3701224985413645909), KQU(14464065869149109264), + KQU( 9959272418844311646), KQU(10157426099515958752), + KQU(14013736814538268528), KQU(17797456992065653951), + KQU(17418878140257344806), KQU(15457429073540561521), + KQU( 2184426881360949378), KQU( 2062193041154712416), + KQU( 8553463347406931661), KQU( 4913057625202871854), + KQU( 2668943682126618425), KQU(17064444737891172288), + KQU( 4997115903913298637), KQU(12019402608892327416), + KQU(17603584559765897352), KQU(11367529582073647975), + KQU( 8211476043518436050), KQU( 8676849804070323674), + KQU(18431829230394475730), KQU(10490177861361247904), + KQU( 9508720602025651349), KQU( 7409627448555722700), + KQU( 5804047018862729008), KQU(11943858176893142594), + KQU(11908095418933847092), KQU( 5415449345715887652), + KQU( 1554022699166156407), KQU( 9073322106406017161), + KQU( 7080630967969047082), KQU(18049736940860732943), + KQU(12748714242594196794), KQU( 1226992415735156741), + KQU(17900981019609531193), KQU(11720739744008710999), + KQU( 3006400683394775434), KQU(11347974011751996028), + KQU( 3316999628257954608), KQU( 8384484563557639101), + KQU(18117794685961729767), KQU( 1900145025596618194), + KQU(17459527840632892676), KQU( 5634784101865710994), + KQU( 7918619300292897158), KQU( 3146577625026301350), + KQU( 9955212856499068767), KQU( 1873995843681746975), + KQU( 1561487759967972194), KQU( 8322718804375878474), + KQU(11300284215327028366), KQU( 4667391032508998982), + KQU( 9820104494306625580), KQU(17922397968599970610), + KQU( 1784690461886786712), KQU(14940365084341346821), + KQU( 5348719575594186181), KQU(10720419084507855261), + KQU(14210394354145143274), KQU( 2426468692164000131), + KQU(16271062114607059202), KQU(14851904092357070247), + KQU( 6524493015693121897), KQU( 9825473835127138531), + KQU(14222500616268569578), KQU(15521484052007487468), + KQU(14462579404124614699), KQU(11012375590820665520), + KQU(11625327350536084927), KQU(14452017765243785417), + KQU( 9989342263518766305), KQU( 3640105471101803790), + KQU( 4749866455897513242), KQU(13963064946736312044), + KQU(10007416591973223791), KQU(18314132234717431115), + KQU( 3286596588617483450), KQU( 7726163455370818765), + KQU( 7575454721115379328), KQU( 5308331576437663422), + KQU(18288821894903530934), KQU( 8028405805410554106), + KQU(15744019832103296628), KQU( 149765559630932100), + KQU( 6137705557200071977), KQU(14513416315434803615), + KQU(11665702820128984473), KQU( 218926670505601386), + KQU( 6868675028717769519), KQU(15282016569441512302), + KQU( 5707000497782960236), KQU( 6671120586555079567), + KQU( 2194098052618985448), KQU(16849577895477330978), + KQU(12957148471017466283), KQU( 1997805535404859393), + KQU( 1180721060263860490), KQU(13206391310193756958), + KQU(12980208674461861797), KQU( 3825967775058875366), + KQU(17543433670782042631), KQU( 1518339070120322730), + KQU(16344584340890991669), KQU( 2611327165318529819), + KQU(11265022723283422529), KQU( 4001552800373196817), + KQU(14509595890079346161), KQU( 3528717165416234562), + KQU(18153222571501914072), KQU( 9387182977209744425), + KQU(10064342315985580021), KQU(11373678413215253977), + KQU( 2308457853228798099), KQU( 9729042942839545302), + KQU( 7833785471140127746), KQU( 6351049900319844436), + KQU(14454610627133496067), KQU(12533175683634819111), + KQU(15570163926716513029), KQU(13356980519185762498) }; TEST_BEGIN(test_gen_rand_32) diff --git a/test/unit/util.c b/test/unit/util.c index c11d5984..8ab39a45 100644 --- a/test/unit/util.c +++ b/test/unit/util.c @@ -52,8 +52,8 @@ TEST_BEGIN(test_malloc_strtoumax) const char *expected_errno_name; uintmax_t expected_x; }; -#define ERR(e) e, #e -#define UMAX(x) ((uintmax_t)x##ULL) +#define ERR(e) e, #e +#define KUMAX(x) ((uintmax_t)x##ULL) struct test_s tests[] = { {"0", "0", -1, ERR(EINVAL), UINTMAX_MAX}, {"0", "0", 1, ERR(EINVAL), UINTMAX_MAX}, @@ -64,51 +64,51 @@ TEST_BEGIN(test_malloc_strtoumax) {"++3", "++3", 0, ERR(EINVAL), UINTMAX_MAX}, {"-", "-", 0, ERR(EINVAL), UINTMAX_MAX}, - {"42", "", 0, ERR(0), UMAX(42)}, - {"+42", "", 0, ERR(0), UMAX(42)}, - {"-42", "", 0, ERR(0), UMAX(-42)}, - {"042", "", 0, ERR(0), UMAX(042)}, - {"+042", "", 0, ERR(0), UMAX(042)}, - {"-042", "", 0, ERR(0), UMAX(-042)}, - {"0x42", "", 0, ERR(0), UMAX(0x42)}, - {"+0x42", "", 0, ERR(0), UMAX(0x42)}, - {"-0x42", "", 0, ERR(0), UMAX(-0x42)}, + {"42", "", 0, ERR(0), KUMAX(42)}, + {"+42", "", 0, ERR(0), KUMAX(42)}, + {"-42", "", 0, ERR(0), KUMAX(-42)}, + {"042", "", 0, ERR(0), KUMAX(042)}, + {"+042", "", 0, ERR(0), KUMAX(042)}, + {"-042", "", 0, ERR(0), KUMAX(-042)}, + {"0x42", "", 0, ERR(0), KUMAX(0x42)}, + {"+0x42", "", 0, ERR(0), KUMAX(0x42)}, + {"-0x42", "", 0, ERR(0), KUMAX(-0x42)}, - {"0", "", 0, ERR(0), UMAX(0)}, - {"1", "", 0, ERR(0), UMAX(1)}, + {"0", "", 0, ERR(0), KUMAX(0)}, + {"1", "", 0, ERR(0), KUMAX(1)}, - {"42", "", 0, ERR(0), UMAX(42)}, - {" 42", "", 0, ERR(0), UMAX(42)}, - {"42 ", " ", 0, ERR(0), UMAX(42)}, - {"0x", "x", 0, ERR(0), UMAX(0)}, - {"42x", "x", 0, ERR(0), UMAX(42)}, + {"42", "", 0, ERR(0), KUMAX(42)}, + {" 42", "", 0, ERR(0), KUMAX(42)}, + {"42 ", " ", 0, ERR(0), KUMAX(42)}, + {"0x", "x", 0, ERR(0), KUMAX(0)}, + {"42x", "x", 0, ERR(0), KUMAX(42)}, - {"07", "", 0, ERR(0), UMAX(7)}, - {"010", "", 0, ERR(0), UMAX(8)}, - {"08", "8", 0, ERR(0), UMAX(0)}, - {"0_", "_", 0, ERR(0), UMAX(0)}, + {"07", "", 0, ERR(0), KUMAX(7)}, + {"010", "", 0, ERR(0), KUMAX(8)}, + {"08", "8", 0, ERR(0), KUMAX(0)}, + {"0_", "_", 0, ERR(0), KUMAX(0)}, - {"0x", "x", 0, ERR(0), UMAX(0)}, - {"0X", "X", 0, ERR(0), UMAX(0)}, - {"0xg", "xg", 0, ERR(0), UMAX(0)}, - {"0XA", "", 0, ERR(0), UMAX(10)}, + {"0x", "x", 0, ERR(0), KUMAX(0)}, + {"0X", "X", 0, ERR(0), KUMAX(0)}, + {"0xg", "xg", 0, ERR(0), KUMAX(0)}, + {"0XA", "", 0, ERR(0), KUMAX(10)}, - {"010", "", 10, ERR(0), UMAX(10)}, - {"0x3", "x3", 10, ERR(0), UMAX(0)}, + {"010", "", 10, ERR(0), KUMAX(10)}, + {"0x3", "x3", 10, ERR(0), KUMAX(0)}, - {"12", "2", 2, ERR(0), UMAX(1)}, - {"78", "8", 8, ERR(0), UMAX(7)}, - {"9a", "a", 10, ERR(0), UMAX(9)}, - {"9A", "A", 10, ERR(0), UMAX(9)}, - {"fg", "g", 16, ERR(0), UMAX(15)}, - {"FG", "G", 16, ERR(0), UMAX(15)}, - {"0xfg", "g", 16, ERR(0), UMAX(15)}, - {"0XFG", "G", 16, ERR(0), UMAX(15)}, - {"z_", "_", 36, ERR(0), UMAX(35)}, - {"Z_", "_", 36, ERR(0), UMAX(35)} + {"12", "2", 2, ERR(0), KUMAX(1)}, + {"78", "8", 8, ERR(0), KUMAX(7)}, + {"9a", "a", 10, ERR(0), KUMAX(9)}, + {"9A", "A", 10, ERR(0), KUMAX(9)}, + {"fg", "g", 16, ERR(0), KUMAX(15)}, + {"FG", "G", 16, ERR(0), KUMAX(15)}, + {"0xfg", "g", 16, ERR(0), KUMAX(15)}, + {"0XFG", "G", 16, ERR(0), KUMAX(15)}, + {"z_", "_", 36, ERR(0), KUMAX(35)}, + {"Z_", "_", 36, ERR(0), KUMAX(35)} }; #undef ERR -#undef UMAX +#undef KUMAX unsigned i; for (i = 0; i < sizeof(tests)/sizeof(struct test_s); i++) { From 999e1b5cc74e299a25cc718ddf9fae370cf45264 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Thu, 29 May 2014 09:03:00 +0900 Subject: [PATCH 0462/3378] Fix thd_join on win64 --- test/src/thd.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/test/src/thd.c b/test/src/thd.c index 7e53625f..c9d00658 100644 --- a/test/src/thd.c +++ b/test/src/thd.c @@ -14,8 +14,11 @@ void thd_join(thd_t thd, void **ret) { - if (WaitForSingleObject(thd, INFINITE) == WAIT_OBJECT_0 && ret) - GetExitCodeThread(thd, (LPDWORD) ret); + if (WaitForSingleObject(thd, INFINITE) == WAIT_OBJECT_0 && ret) { + DWORD exit_code; + GetExitCodeThread(thd, (LPDWORD) &exit_code); + *ret = (void *)(uintptr_t)exit_code; + } } #else From ff2e999667cbd06e5e80c243277c1f3c72d6d263 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Thu, 29 May 2014 16:33:02 +0900 Subject: [PATCH 0463/3378] Don't use msvc_compat's C99 headers with MSVC versions that have (some) C99 support --- configure.ac | 4 ++++ include/msvc_compat/{ => C99}/inttypes.h | 0 include/msvc_compat/{ => C99}/stdbool.h | 0 include/msvc_compat/{ => C99}/stdint.h | 0 4 files changed, 4 insertions(+) rename include/msvc_compat/{ => C99}/inttypes.h (100%) rename include/msvc_compat/{ => C99}/stdbool.h (100%) rename include/msvc_compat/{ => C99}/stdint.h (100%) diff --git a/configure.ac b/configure.ac index 58522499..5aeaa088 100644 --- a/configure.ac +++ b/configure.ac @@ -155,6 +155,10 @@ if test "x${ac_cv_big_endian}" = "x1" ; then AC_DEFINE_UNQUOTED([JEMALLOC_BIG_ENDIAN], [ ]) fi +if test "x${je_cv_msvc}" = "xyes" -a "x${ac_cv_header_inttypes_h}" = "xno"; then + CPPFLAGS="$CPPFLAGS -I${srcroot}/include/msvc_compat/C99" +fi + AC_CHECK_SIZEOF([void *]) if test "x${ac_cv_sizeof_void_p}" = "x8" ; then LG_SIZEOF_PTR=3 diff --git a/include/msvc_compat/inttypes.h b/include/msvc_compat/C99/inttypes.h similarity index 100% rename from include/msvc_compat/inttypes.h rename to include/msvc_compat/C99/inttypes.h diff --git a/include/msvc_compat/stdbool.h b/include/msvc_compat/C99/stdbool.h similarity index 100% rename from include/msvc_compat/stdbool.h rename to include/msvc_compat/C99/stdbool.h diff --git a/include/msvc_compat/stdint.h b/include/msvc_compat/C99/stdint.h similarity index 100% rename from include/msvc_compat/stdint.h rename to include/msvc_compat/C99/stdint.h From 8c6157558aca6cb764b4f312c3d4f285664ef3e7 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Thu, 29 May 2014 16:58:21 +0900 Subject: [PATCH 0464/3378] Add -FS flag to support parallel builds with MSVC 2013 --- configure.ac | 1 + 1 file changed, 1 insertion(+) diff --git a/configure.ac b/configure.ac index 5aeaa088..045f62e2 100644 --- a/configure.ac +++ b/configure.ac @@ -141,6 +141,7 @@ if test "x$CFLAGS" = "x" ; then JE_CFLAGS_APPEND([-Zi]) JE_CFLAGS_APPEND([-MT]) JE_CFLAGS_APPEND([-W3]) + JE_CFLAGS_APPEND([-FS]) CPPFLAGS="$CPPFLAGS -I${srcroot}/include/msvc_compat" fi fi From 6f6704c35b28e919552a50e9e1d89a75a8b7c962 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Thu, 29 May 2014 17:01:10 +0900 Subject: [PATCH 0465/3378] Make in-tree MSVC builds work --- configure.ac | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/configure.ac b/configure.ac index 045f62e2..4944c44a 100644 --- a/configure.ac +++ b/configure.ac @@ -142,7 +142,7 @@ if test "x$CFLAGS" = "x" ; then JE_CFLAGS_APPEND([-MT]) JE_CFLAGS_APPEND([-W3]) JE_CFLAGS_APPEND([-FS]) - CPPFLAGS="$CPPFLAGS -I${srcroot}/include/msvc_compat" + CPPFLAGS="$CPPFLAGS -I${srcdir}/include/msvc_compat" fi fi dnl Append EXTRA_CFLAGS to CFLAGS, if defined. @@ -157,7 +157,7 @@ if test "x${ac_cv_big_endian}" = "x1" ; then fi if test "x${je_cv_msvc}" = "xyes" -a "x${ac_cv_header_inttypes_h}" = "xno"; then - CPPFLAGS="$CPPFLAGS -I${srcroot}/include/msvc_compat/C99" + CPPFLAGS="$CPPFLAGS -I${srcdir}/include/msvc_compat/C99" fi AC_CHECK_SIZEOF([void *]) From 0b5c92213fbafc52c5b5a5dc84e91eacc812ae0b Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sun, 1 Jun 2014 22:05:08 -0700 Subject: [PATCH 0466/3378] Fix fallback lg_floor() implementations. --- include/jemalloc/internal/util.h | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h index 78648238..54aed8ec 100644 --- a/include/jemalloc/internal/util.h +++ b/include/jemalloc/internal/util.h @@ -152,9 +152,9 @@ lg_floor(size_t x) { #if (LG_SIZEOF_PTR == LG_SIZEOF_INT) - return ((8 << LG_SIZEOF_PTR - 1) - __builtin_clz(x)); + return (((8 << LG_SIZEOF_PTR) - 1) - __builtin_clz(x)); #elif (LG_SIZEOF_PTR == LG_SIZEOF_LONG) - return ((8 << LG_SIZEOF_PTR - 1) - __builtin_clzl(x)); + return (((8 << LG_SIZEOF_PTR) - 1) - __builtin_clzl(x)); #else # error "Unsupported type sizes for lg_floor()" #endif @@ -164,16 +164,22 @@ JEMALLOC_INLINE size_t lg_floor(size_t x) { - x |= (x >> 1); - x |= (x >> 2); - x |= (x >> 4); - x |= (x >> 8); - x |= (x >> 16); + x |= (x >> 1); + x |= (x >> 2); + x |= (x >> 4); + x |= (x >> 8); + x |= (x >> 16); #if (LG_SIZEOF_PTR == 3 && LG_SIZEOF_PTR == LG_SIZEOF_LONG) - x |= (x >> 32); - return (65 - ffsl(~x)); + x |= (x >> 32); + if (x == KZU(0xffffffffffffffff)) + return (63); + x++; + return (ffsl(x) - 2); #elif (LG_SIZEOF_PTR == 2) - return (33 - ffs(~x)); + if (x == KZU(0xffffffff)) + return (31); + x++; + return (ffs(x) - 2); #else # error "Unsupported type sizes for lg_floor()" #endif From 9c3a10fdf6baa5ddb042b6adbef1ff1b3c613ce3 Mon Sep 17 00:00:00 2001 From: Richard Diamond Date: Wed, 28 May 2014 21:37:02 -0500 Subject: [PATCH 0467/3378] Try to use __builtin_ffsl if ffsl is unavailable. Some platforms (like those using Newlib) don't have ffs/ffsl. This commit adds a check to configure.ac for __builtin_ffsl if ffsl isn't found. __builtin_ffsl performs the same function as ffsl, and has the added benefit of being available on any platform utilizing Gcc-compatible compiler. This change does not address the used of ffs in the MALLOCX_ARENA() macro. --- configure.ac | 30 +++++++++++++++---- include/jemalloc/internal/arena.h | 2 +- include/jemalloc/internal/bitmap.h | 4 +-- .../jemalloc/internal/jemalloc_internal.h.in | 3 ++ .../internal/jemalloc_internal_decls.h | 10 ++++--- .../internal/jemalloc_internal_defs.h.in | 7 +++++ include/jemalloc/internal/util.h | 26 ++++++++++++++-- src/arena.c | 2 +- src/rtree.c | 4 +-- 9 files changed, 71 insertions(+), 17 deletions(-) diff --git a/configure.ac b/configure.ac index 4944c44a..3d36b5f8 100644 --- a/configure.ac +++ b/configure.ac @@ -1109,9 +1109,11 @@ elif test "x${force_tls}" = "x1" ; then fi dnl ============================================================================ -dnl Check for ffsl(3), and fail if not found. This function exists on all -dnl platforms that jemalloc currently has a chance of functioning on without -dnl modification. +dnl Check for ffsl(3), then __builtin_ffsl(), and fail if neither are found. +dnl One of those two functions should (theoretically) exist on all platforms +dnl that jemalloc currently has a chance of functioning on without modification. +dnl We additionally assume ffs() or __builtin_ffs() are defined if +dnl ffsl() or __builtin_ffsl() are defined, respectively. JE_COMPILABLE([a program using ffsl], [ #include #include @@ -1122,8 +1124,26 @@ JE_COMPILABLE([a program using ffsl], [ printf("%d\n", rv); } ], [je_cv_function_ffsl]) -if test "x${je_cv_function_ffsl}" != "xyes" ; then - AC_MSG_ERROR([Cannot build without ffsl(3)]) +if test "x${je_cv_function_ffsl}" == "xyes" ; then + AC_DEFINE([JEMALLOC_INTERNAL_FFSL], [ffsl]) + AC_DEFINE([JEMALLOC_INTERNAL_FFS], [ffs]) +else + JE_COMPILABLE([a program using __builtin_ffsl], [ + #include + #include + #include + ], [ + { + int rv = __builtin_ffsl(0x08); + printf("%d\n", rv); + } + ], [je_cv_gcc_builtin_ffsl]) + if test "x${je_cv_gcc_builtin_ffsl}" == "xyes" ; then + AC_DEFINE([JEMALLOC_INTERNAL_FFSL], [__builtin_ffsl]) + AC_DEFINE([JEMALLOC_INTERNAL_FFS], [__builtin_ffs]) + else + AC_MSG_ERROR([Cannot build without ffsl(3) or __builtin_ffsl()]) + fi fi dnl ============================================================================ diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 2dc9501d..cb73283b 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -970,7 +970,7 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) /* Rescale (factor powers of 2 out of the numerator and denominator). */ interval = bin_info->reg_interval; - shift = ffs(interval) - 1; + shift = jemalloc_ffs(interval) - 1; diff >>= shift; interval >>= shift; diff --git a/include/jemalloc/internal/bitmap.h b/include/jemalloc/internal/bitmap.h index 605ebac5..6db4ab70 100644 --- a/include/jemalloc/internal/bitmap.h +++ b/include/jemalloc/internal/bitmap.h @@ -130,11 +130,11 @@ bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo) i = binfo->nlevels - 1; g = bitmap[binfo->levels[i].group_offset]; - bit = ffsl(g) - 1; + bit = jemalloc_ffsl(g) - 1; while (i > 0) { i--; g = bitmap[binfo->levels[i].group_offset + bit]; - bit = (bit << LG_BITMAP_GROUP_NBITS) + (ffsl(g) - 1); + bit = (bit << LG_BITMAP_GROUP_NBITS) + (jemalloc_ffsl(g) - 1); } bitmap_set(bitmap, binfo, bit); diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 491345c9..f2cd743f 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -215,6 +215,9 @@ static const bool config_ivsalloc = # ifdef __tile__ # define LG_QUANTUM 4 # endif +# ifdef __le32__ +# define LG_QUANTUM 4 +# endif # ifndef LG_QUANTUM # error "No LG_QUANTUM definition for architecture; specify via CPPFLAGS" # endif diff --git a/include/jemalloc/internal/jemalloc_internal_decls.h b/include/jemalloc/internal/jemalloc_internal_decls.h index 7775ab38..fa590404 100644 --- a/include/jemalloc/internal/jemalloc_internal_decls.h +++ b/include/jemalloc/internal/jemalloc_internal_decls.h @@ -15,11 +15,13 @@ #else # include # include -# include -# if !defined(SYS_write) && defined(__NR_write) -# define SYS_write __NR_write +# if !defined(__pnacl__) && !defined(__native_client__) +# include +# if !defined(SYS_write) && defined(__NR_write) +# define SYS_write __NR_write +# endif +# include # endif -# include # include # include #endif diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in index a9a50f14..65ac76c0 100644 --- a/include/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in @@ -152,6 +152,13 @@ /* TLS is used to map arenas and magazine caches to threads. */ #undef JEMALLOC_TLS +/* + * ffs()/ffsl() functions to use for bitmapping. Don't use these directly; + * instead, use jemalloc_ffs() or jemalloc_ffsl() from util.h. + */ +#undef JEMALLOC_INTERNAL_FFSL +#undef JEMALLOC_INTERNAL_FFS + /* * JEMALLOC_IVSALLOC enables ivsalloc(), which verifies that pointers reside * within jemalloc-owned chunks before dereferencing them. diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h index 54aed8ec..d2b7a967 100644 --- a/include/jemalloc/internal/util.h +++ b/include/jemalloc/internal/util.h @@ -109,6 +109,8 @@ void malloc_printf(const char *format, ...) #ifdef JEMALLOC_H_INLINES #ifndef JEMALLOC_ENABLE_INLINE +int jemalloc_ffsl(long bitmap); +int jemalloc_ffs(int bitmap); size_t pow2_ceil(size_t x); size_t lg_floor(size_t x); void set_errno(int errnum); @@ -116,6 +118,26 @@ int get_errno(void); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_UTIL_C_)) + +/* Sanity check: */ +#if !defined(JEMALLOC_INTERNAL_FFSL) || !defined(JEMALLOC_INTERNAL_FFS) +# error Both JEMALLOC_INTERNAL_FFSL && JEMALLOC_INTERNAL_FFS should have been defined by configure +#endif + +JEMALLOC_ALWAYS_INLINE int +jemalloc_ffsl(long bitmap) +{ + + return (JEMALLOC_INTERNAL_FFSL(bitmap)); +} + +JEMALLOC_ALWAYS_INLINE int +jemalloc_ffs(int bitmap) +{ + + return (JEMALLOC_INTERNAL_FFS(bitmap)); +} + /* Compute the smallest power of 2 that is >= x. */ JEMALLOC_INLINE size_t pow2_ceil(size_t x) @@ -174,12 +196,12 @@ lg_floor(size_t x) if (x == KZU(0xffffffffffffffff)) return (63); x++; - return (ffsl(x) - 2); + return (jemalloc_ffsl(x) - 2); #elif (LG_SIZEOF_PTR == 2) if (x == KZU(0xffffffff)) return (31); x++; - return (ffs(x) - 2); + return (jemalloc_ffs(x) - 2); #else # error "Unsupported type sizes for lg_floor()" #endif diff --git a/src/arena.c b/src/arena.c index c392419e..d3fe0fba 100644 --- a/src/arena.c +++ b/src/arena.c @@ -2483,7 +2483,7 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) * be twice as large in order to maintain alignment. */ if (config_fill && opt_redzone) { - size_t align_min = ZU(1) << (ffs(bin_info->reg_size) - 1); + size_t align_min = ZU(1) << (jemalloc_ffs(bin_info->reg_size) - 1); if (align_min <= REDZONE_MINSIZE) { bin_info->redzone_size = REDZONE_MINSIZE; pad_size = 0; diff --git a/src/rtree.c b/src/rtree.c index 205957ac..87b0b154 100644 --- a/src/rtree.c +++ b/src/rtree.c @@ -9,8 +9,8 @@ rtree_new(unsigned bits, rtree_alloc_t *alloc, rtree_dalloc_t *dalloc) assert(bits > 0 && bits <= (sizeof(uintptr_t) << 3)); - bits_per_level = ffs(pow2_ceil((RTREE_NODESIZE / sizeof(void *)))) - 1; - bits_in_leaf = ffs(pow2_ceil((RTREE_NODESIZE / sizeof(uint8_t)))) - 1; + bits_per_level = jemalloc_ffs(pow2_ceil((RTREE_NODESIZE / sizeof(void *)))) - 1; + bits_in_leaf = jemalloc_ffs(pow2_ceil((RTREE_NODESIZE / sizeof(uint8_t)))) - 1; if (bits > bits_in_leaf) { height = 1 + (bits - bits_in_leaf) / bits_per_level; if ((height-1) * bits_per_level + bits_in_leaf != bits) From 94ed6812bc04a6171d1a801f2740355f458d5c9c Mon Sep 17 00:00:00 2001 From: Richard Diamond Date: Wed, 28 May 2014 21:47:15 -0500 Subject: [PATCH 0468/3378] Don't catch fork()ing events for Native Client. Native Client doesn't allow forking, thus there is no need to catch fork()ing events for Native Client. Additionally, without this commit, jemalloc will introduce an unresolved pthread_atfork() in PNaCl Rust bins. --- src/jemalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/jemalloc.c b/src/jemalloc.c index 43a494e4..0983c00d 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -792,7 +792,7 @@ malloc_init_hard(void) ncpus = malloc_ncpus(); #if (!defined(JEMALLOC_MUTEX_INIT_CB) && !defined(JEMALLOC_ZONE) \ - && !defined(_WIN32)) + && !defined(_WIN32) && !defined(__native_client__)) /* LinuxThreads's pthread_atfork() allocates. */ if (pthread_atfork(jemalloc_prefork, jemalloc_postfork_parent, jemalloc_postfork_child) != 0) { From 3e310b34eb53eb331981ecda2ea5f10cf6956747 Mon Sep 17 00:00:00 2001 From: Chris Peterson Date: Wed, 28 May 2014 19:04:06 -0700 Subject: [PATCH 0469/3378] Fix -Wsign-compare warnings --- src/prof.c | 4 ++-- src/util.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/prof.c b/src/prof.c index b64386e3..0eb7dbdb 100644 --- a/src/prof.c +++ b/src/prof.c @@ -1093,7 +1093,7 @@ label_open_close_error: #define DUMP_FILENAME_BUFSIZE (PATH_MAX + 1) #define VSEQ_INVALID UINT64_C(0xffffffffffffffff) static void -prof_dump_filename(char *filename, char v, int64_t vseq) +prof_dump_filename(char *filename, char v, uint64_t vseq) { cassert(config_prof); @@ -1101,7 +1101,7 @@ prof_dump_filename(char *filename, char v, int64_t vseq) if (vseq != VSEQ_INVALID) { /* "...v.heap" */ malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE, - "%s.%d.%"PRIu64".%c%"PRId64".heap", + "%s.%d.%"PRIu64".%c%"PRIu64".heap", opt_prof_prefix, (int)getpid(), prof_dump_seq, v, vseq); } else { /* "....heap" */ diff --git a/src/util.c b/src/util.c index 93a19fd1..9076be94 100644 --- a/src/util.c +++ b/src/util.c @@ -100,7 +100,7 @@ uintmax_t malloc_strtoumax(const char *restrict nptr, char **restrict endptr, int base) { uintmax_t ret, digit; - int b; + unsigned b; bool neg; const char *p, *ns; @@ -548,7 +548,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) assert(len == '?' || len == 'l'); assert_not_implemented(len != 'l'); s = va_arg(ap, char *); - slen = (prec < 0) ? strlen(s) : prec; + slen = (prec < 0) ? strlen(s) : (size_t)prec; APPEND_PADDED_S(s, slen, width, left_justify); f++; break; From 70807bc54b06bb259b6607541af44bc73a890bf6 Mon Sep 17 00:00:00 2001 From: Chris Peterson Date: Wed, 28 May 2014 19:04:33 -0700 Subject: [PATCH 0470/3378] Fix -Wsometimes-uninitialized warnings --- src/util.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/util.c b/src/util.c index 9076be94..1717f08e 100644 --- a/src/util.c +++ b/src/util.c @@ -381,7 +381,9 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) case 'p': /* Synthetic; used for %p. */ \ val = va_arg(ap, uintptr_t); \ break; \ - default: not_reached(); \ + default: \ + not_reached(); \ + val = 0; \ } \ } while (0) From 994fad9bdaaa18273f2089856c2637cfb0c307bd Mon Sep 17 00:00:00 2001 From: Richard Diamond Date: Tue, 3 Jun 2014 02:39:18 -0500 Subject: [PATCH 0471/3378] Add check for madvise(2) to configure.ac. Some platforms, such as Google's Portable Native Client, use Newlib and thus lack access to madvise(2). In those instances, pages_purge() is transformed into a no-op. --- configure.ac | 14 ++++++++++++++ .../jemalloc/internal/jemalloc_internal_defs.h.in | 5 +++++ src/chunk_mmap.c | 7 +++++-- 3 files changed, 24 insertions(+), 2 deletions(-) diff --git a/configure.ac b/configure.ac index 3d36b5f8..29edcb6a 100644 --- a/configure.ac +++ b/configure.ac @@ -1191,6 +1191,20 @@ if test "x${je_cv_osatomic}" = "xyes" ; then AC_DEFINE([JEMALLOC_OSATOMIC], [ ]) fi +dnl ============================================================================ +dnl Check for madvise(2). + +JE_COMPILABLE([madvise(2)], [ +#include +], [ + { + madvise((void *)0, 0, 0); + } +], [je_cv_madvise]) +if test "x${je_cv_madvise}" = "xyes" ; then + AC_DEFINE([JEMALLOC_HAVE_MADVISE], [ ]) +fi + dnl ============================================================================ dnl Check whether __sync_{add,sub}_and_fetch() are available despite dnl __GCC_HAVE_SYNC_COMPARE_AND_SWAP_n macros being undefined. diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in index 65ac76c0..93716b0a 100644 --- a/include/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in @@ -52,6 +52,11 @@ */ #undef JEMALLOC_HAVE_BUILTIN_CLZ +/* + * Defined if madvise(2) is available. + */ +#undef JEMALLOC_HAVE_MADVISE + /* * Defined if OSSpin*() functions are available, as provided by Darwin, and * documented in the spinlock(3) manual page. diff --git a/src/chunk_mmap.c b/src/chunk_mmap.c index f960e068..65137b41 100644 --- a/src/chunk_mmap.c +++ b/src/chunk_mmap.c @@ -121,7 +121,7 @@ pages_purge(void *addr, size_t length) #ifdef _WIN32 VirtualAlloc(addr, length, MEM_RESET, PAGE_READWRITE); unzeroed = true; -#else +#elif defined(JEMALLOC_HAVE_MADVISE) # ifdef JEMALLOC_PURGE_MADVISE_DONTNEED # define JEMALLOC_MADV_PURGE MADV_DONTNEED # define JEMALLOC_MADV_ZEROS true @@ -129,12 +129,15 @@ pages_purge(void *addr, size_t length) # define JEMALLOC_MADV_PURGE MADV_FREE # define JEMALLOC_MADV_ZEROS false # else -# error "No method defined for purging unused dirty pages." +# error "No madvise(2) flag defined for purging unused dirty pages." # endif int err = madvise(addr, length, JEMALLOC_MADV_PURGE); unzeroed = (JEMALLOC_MADV_ZEROS == false || err != 0); # undef JEMALLOC_MADV_PURGE # undef JEMALLOC_MADV_ZEROS +#else + /* Last resort no-op. */ + unzeroed = true; #endif return (unzeroed); } From 1a3eafd1b045163f27e4a5acf01280edfe28c309 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Wed, 4 Jun 2014 12:09:08 +0900 Subject: [PATCH 0472/3378] Check for __builtin_ffsl before ffsl. When building with -O0, GCC doesn't use builtins for ffs and ffsl calls, and uses library function calls instead. But the Android NDK doesn't have those functions exported from any library, leading to build failure. However, using __builtin_ffs* uses the builtin inlines. --- configure.ac | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/configure.ac b/configure.ac index 29edcb6a..f456bd21 100644 --- a/configure.ac +++ b/configure.ac @@ -1109,43 +1109,44 @@ elif test "x${force_tls}" = "x1" ; then fi dnl ============================================================================ -dnl Check for ffsl(3), then __builtin_ffsl(), and fail if neither are found. +dnl Check for __builtin_ffsl(), then ffsl(3), and fail if neither are found. dnl One of those two functions should (theoretically) exist on all platforms dnl that jemalloc currently has a chance of functioning on without modification. dnl We additionally assume ffs() or __builtin_ffs() are defined if dnl ffsl() or __builtin_ffsl() are defined, respectively. -JE_COMPILABLE([a program using ffsl], [ +JE_COMPILABLE([a program using __builtin_ffsl], [ #include #include #include ], [ { - int rv = ffsl(0x08); + int rv = __builtin_ffsl(0x08); printf("%d\n", rv); } -], [je_cv_function_ffsl]) -if test "x${je_cv_function_ffsl}" == "xyes" ; then - AC_DEFINE([JEMALLOC_INTERNAL_FFSL], [ffsl]) - AC_DEFINE([JEMALLOC_INTERNAL_FFS], [ffs]) +], [je_cv_gcc_builtin_ffsl]) +if test "x${je_cv_gcc_builtin_ffsl}" == "xyes" ; then + AC_DEFINE([JEMALLOC_INTERNAL_FFSL], [__builtin_ffsl]) + AC_DEFINE([JEMALLOC_INTERNAL_FFS], [__builtin_ffs]) else - JE_COMPILABLE([a program using __builtin_ffsl], [ + JE_COMPILABLE([a program using ffsl], [ #include #include #include ], [ { - int rv = __builtin_ffsl(0x08); + int rv = ffsl(0x08); printf("%d\n", rv); } - ], [je_cv_gcc_builtin_ffsl]) - if test "x${je_cv_gcc_builtin_ffsl}" == "xyes" ; then - AC_DEFINE([JEMALLOC_INTERNAL_FFSL], [__builtin_ffsl]) - AC_DEFINE([JEMALLOC_INTERNAL_FFS], [__builtin_ffs]) + ], [je_cv_function_ffsl]) + if test "x${je_cv_function_ffsl}" == "xyes" ; then + AC_DEFINE([JEMALLOC_INTERNAL_FFSL], [ffsl]) + AC_DEFINE([JEMALLOC_INTERNAL_FFS], [ffs]) else AC_MSG_ERROR([Cannot build without ffsl(3) or __builtin_ffsl()]) fi fi + dnl ============================================================================ dnl Check for atomic(9) operations as provided on FreeBSD. From 8f50ec8eda262e87ad547ec50b6ca928ea3e31c4 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Wed, 4 Jun 2014 12:12:55 +0900 Subject: [PATCH 0473/3378] Use JEMALLOC_INTERNAL_FFSL in STATIC_PAGE_SHIFT test --- configure.ac | 79 ++++++++++++++++++++++++++-------------------------- 1 file changed, 39 insertions(+), 40 deletions(-) diff --git a/configure.ac b/configure.ac index f456bd21..e9775342 100644 --- a/configure.ac +++ b/configure.ac @@ -935,6 +935,44 @@ if test "x$enable_xmalloc" = "x1" ; then fi AC_SUBST([enable_xmalloc]) +dnl ============================================================================ +dnl Check for __builtin_ffsl(), then ffsl(3), and fail if neither are found. +dnl One of those two functions should (theoretically) exist on all platforms +dnl that jemalloc currently has a chance of functioning on without modification. +dnl We additionally assume ffs() or __builtin_ffs() are defined if +dnl ffsl() or __builtin_ffsl() are defined, respectively. +JE_COMPILABLE([a program using __builtin_ffsl], [ +#include +#include +#include +], [ + { + int rv = __builtin_ffsl(0x08); + printf("%d\n", rv); + } +], [je_cv_gcc_builtin_ffsl]) +if test "x${je_cv_gcc_builtin_ffsl}" == "xyes" ; then + AC_DEFINE([JEMALLOC_INTERNAL_FFSL], [__builtin_ffsl]) + AC_DEFINE([JEMALLOC_INTERNAL_FFS], [__builtin_ffs]) +else + JE_COMPILABLE([a program using ffsl], [ + #include + #include + #include + ], [ + { + int rv = ffsl(0x08); + printf("%d\n", rv); + } + ], [je_cv_function_ffsl]) + if test "x${je_cv_function_ffsl}" == "xyes" ; then + AC_DEFINE([JEMALLOC_INTERNAL_FFSL], [ffsl]) + AC_DEFINE([JEMALLOC_INTERNAL_FFS], [ffs]) + else + AC_MSG_ERROR([Cannot build without ffsl(3) or __builtin_ffsl()]) + fi +fi + AC_CACHE_CHECK([STATIC_PAGE_SHIFT], [je_cv_static_page_shift], AC_RUN_IFELSE([AC_LANG_PROGRAM( @@ -961,7 +999,7 @@ AC_CACHE_CHECK([STATIC_PAGE_SHIFT], if (result == -1) { return 1; } - result = ffsl(result) - 1; + result = JEMALLOC_INTERNAL_FFSL(result) - 1; f = fopen("conftest.out", "w"); if (f == NULL) { @@ -1108,45 +1146,6 @@ elif test "x${force_tls}" = "x1" ; then AC_MSG_ERROR([Failed to configure TLS, which is mandatory for correct function]) fi -dnl ============================================================================ -dnl Check for __builtin_ffsl(), then ffsl(3), and fail if neither are found. -dnl One of those two functions should (theoretically) exist on all platforms -dnl that jemalloc currently has a chance of functioning on without modification. -dnl We additionally assume ffs() or __builtin_ffs() are defined if -dnl ffsl() or __builtin_ffsl() are defined, respectively. -JE_COMPILABLE([a program using __builtin_ffsl], [ -#include -#include -#include -], [ - { - int rv = __builtin_ffsl(0x08); - printf("%d\n", rv); - } -], [je_cv_gcc_builtin_ffsl]) -if test "x${je_cv_gcc_builtin_ffsl}" == "xyes" ; then - AC_DEFINE([JEMALLOC_INTERNAL_FFSL], [__builtin_ffsl]) - AC_DEFINE([JEMALLOC_INTERNAL_FFS], [__builtin_ffs]) -else - JE_COMPILABLE([a program using ffsl], [ - #include - #include - #include - ], [ - { - int rv = ffsl(0x08); - printf("%d\n", rv); - } - ], [je_cv_function_ffsl]) - if test "x${je_cv_function_ffsl}" == "xyes" ; then - AC_DEFINE([JEMALLOC_INTERNAL_FFSL], [ffsl]) - AC_DEFINE([JEMALLOC_INTERNAL_FFS], [ffs]) - else - AC_MSG_ERROR([Cannot build without ffsl(3) or __builtin_ffsl()]) - fi -fi - - dnl ============================================================================ dnl Check for atomic(9) operations as provided on FreeBSD. From 5921ba7b0c3b3278c54d569dee37deab2768b70b Mon Sep 17 00:00:00 2001 From: Valerii Hiora Date: Fri, 16 May 2014 16:28:20 +0300 Subject: [PATCH 0474/3378] Support for iOS compilation --- config.sub | 2 ++ configure.ac | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/config.sub b/config.sub index 61cb4bc2..c4cc9836 100755 --- a/config.sub +++ b/config.sub @@ -1400,6 +1400,8 @@ case $os in -mac*) os=`echo $os | sed -e 's|mac|macos|'` ;; + -ios*) + ;; -linux-dietlibc) os=-linux-dietlibc ;; diff --git a/configure.ac b/configure.ac index e9775342..48863a59 100644 --- a/configure.ac +++ b/configure.ac @@ -264,7 +264,7 @@ dnl definitions need to be seen before any headers are included, which is a pain dnl to make happen otherwise. default_munmap="1" case "${host}" in - *-*-darwin*) + *-*-darwin* | *-*-ios*) CFLAGS="$CFLAGS" abi="macho" AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ]) From 6f533c1903a1d067dacfca2f06c6cc9754fdf67e Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Tue, 10 Jun 2014 18:18:22 +0900 Subject: [PATCH 0475/3378] Ensure the default purgeable zone is after the default zone on OS X --- src/zone.c | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/src/zone.c b/src/zone.c index e0302ef4..a722287b 100644 --- a/src/zone.c +++ b/src/zone.c @@ -176,6 +176,7 @@ register_zone(void) * register jemalloc's. */ malloc_zone_t *default_zone = malloc_default_zone(); + malloc_zone_t *purgeable_zone = NULL; if (!default_zone->zone_name || strcmp(default_zone->zone_name, "DefaultMallocZone") != 0) { return; @@ -237,22 +238,37 @@ register_zone(void) * run time. */ if (malloc_default_purgeable_zone != NULL) - malloc_default_purgeable_zone(); + purgeable_zone = malloc_default_purgeable_zone(); /* Register the custom zone. At this point it won't be the default. */ malloc_zone_register(&zone); - /* - * Unregister and reregister the default zone. On OSX >= 10.6, - * unregistering takes the last registered zone and places it at the - * location of the specified zone. Unregistering the default zone thus - * makes the last registered one the default. On OSX < 10.6, - * unregistering shifts all registered zones. The first registered zone - * then becomes the default. - */ do { default_zone = malloc_default_zone(); + /* + * Unregister and reregister the default zone. On OSX >= 10.6, + * unregistering takes the last registered zone and places it + * at the location of the specified zone. Unregistering the + * default zone thus makes the last registered one the default. + * On OSX < 10.6, unregistering shifts all registered zones. + * The first registered zone then becomes the default. + */ malloc_zone_unregister(default_zone); malloc_zone_register(default_zone); + /* + * On OSX 10.6, having the default purgeable zone appear before + * the default zone makes some things crash because it thinks it + * owns the default zone allocated pointers. We thus unregister/ + * re-register it in order to ensure it's always after the + * default zone. On OSX < 10.6, there is no purgeable zone, so + * this does nothing. On OSX >= 10.6, unregistering replaces the + * purgeable zone with the last registered zone above, i.e the + * default zone. Registering it again then puts it at the end, + * obviously after the default zone. + */ + if (purgeable_zone) { + malloc_zone_unregister(purgeable_zone); + malloc_zone_register(purgeable_zone); + } } while (malloc_default_zone() != &zone); } From c521df5dcf7410898cabdcb556f919535cf16d19 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Thu, 12 Jun 2014 13:07:31 +0900 Subject: [PATCH 0476/3378] Allow to build with clang-cl --- include/msvc_compat/C99/stdbool.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/msvc_compat/C99/stdbool.h b/include/msvc_compat/C99/stdbool.h index da9ee8b8..d92160eb 100644 --- a/include/msvc_compat/C99/stdbool.h +++ b/include/msvc_compat/C99/stdbool.h @@ -5,7 +5,11 @@ /* MSVC doesn't define _Bool or bool in C, but does have BOOL */ /* Note this doesn't pass autoconf's test because (bool) 0.5 != true */ +/* Clang-cl uses MSVC headers, so needs msvc_compat, but has _Bool as + * a built-in type. */ +#ifndef __clang__ typedef BOOL _Bool; +#endif #define bool _Bool #define true 1 From 79230fef31428a133683c236bedcc1560f8fcfd8 Mon Sep 17 00:00:00 2001 From: Steven Stewart-Gallus Date: Thu, 19 Jun 2014 16:11:43 -0700 Subject: [PATCH 0477/3378] Fix unportable == operator in configure scripts Now this code is more portable and now people can use faster shells than Bash such as Dash. To use a faster shell with autoconf set the CONFIG_SHELL environment variable to the shell and run the configure script with the shell. --- configure.ac | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/configure.ac b/configure.ac index 48863a59..645bd466 100644 --- a/configure.ac +++ b/configure.ac @@ -833,7 +833,7 @@ have_dss="1" dnl Check whether the BSD/SUSv1 sbrk() exists. If not, disable DSS support. AC_CHECK_FUNC([sbrk], [have_sbrk="1"], [have_sbrk="0"]) if test "x$have_sbrk" = "x1" ; then - if test "x$sbrk_deprecated" == "x1" ; then + if test "x$sbrk_deprecated" = "x1" ; then AC_MSG_RESULT([Disabling dss allocation because sbrk is deprecated]) have_dss="0" fi @@ -951,7 +951,7 @@ JE_COMPILABLE([a program using __builtin_ffsl], [ printf("%d\n", rv); } ], [je_cv_gcc_builtin_ffsl]) -if test "x${je_cv_gcc_builtin_ffsl}" == "xyes" ; then +if test "x${je_cv_gcc_builtin_ffsl}" = "xyes" ; then AC_DEFINE([JEMALLOC_INTERNAL_FFSL], [__builtin_ffsl]) AC_DEFINE([JEMALLOC_INTERNAL_FFS], [__builtin_ffs]) else @@ -965,7 +965,7 @@ else printf("%d\n", rv); } ], [je_cv_function_ffsl]) - if test "x${je_cv_function_ffsl}" == "xyes" ; then + if test "x${je_cv_function_ffsl}" = "xyes" ; then AC_DEFINE([JEMALLOC_INTERNAL_FFSL], [ffsl]) AC_DEFINE([JEMALLOC_INTERNAL_FFS], [ffs]) else From ffa259841c6a4b6dae4ed74f02bb38703e190065 Mon Sep 17 00:00:00 2001 From: "Manuel A. Fernandez Montecelo" Date: Tue, 29 Jul 2014 23:11:26 +0100 Subject: [PATCH 0478/3378] Add OpenRISC/or1k LG_QUANTUM size definition --- include/jemalloc/internal/jemalloc_internal.h.in | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index f2cd743f..1c2f3d44 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -203,6 +203,9 @@ static const bool config_ivsalloc = # ifdef __mips__ # define LG_QUANTUM 3 # endif +# ifdef __or1k__ +# define LG_QUANTUM 3 +# endif # ifdef __powerpc__ # define LG_QUANTUM 4 # endif From b433d7a87b27ff1e4ccea5103bc0a95afbf58ea4 Mon Sep 17 00:00:00 2001 From: "Manuel A. Fernandez Montecelo" Date: Tue, 29 Jul 2014 23:15:26 +0100 Subject: [PATCH 0479/3378] Update config.{guess,sub} to more recent versions, to add better support to OpenRISC/or1k (among others) --- config.guess | 192 ++++++++------------------------------------------- config.sub | 21 +++--- 2 files changed, 37 insertions(+), 176 deletions(-) diff --git a/config.guess b/config.guess index b79252d6..1f5c50c0 100755 --- a/config.guess +++ b/config.guess @@ -1,8 +1,8 @@ #! /bin/sh # Attempt to guess a canonical system name. -# Copyright 1992-2013 Free Software Foundation, Inc. +# Copyright 1992-2014 Free Software Foundation, Inc. -timestamp='2013-06-10' +timestamp='2014-03-23' # This file is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by @@ -50,7 +50,7 @@ version="\ GNU config.guess ($timestamp) Originally written by Per Bothner. -Copyright 1992-2013 Free Software Foundation, Inc. +Copyright 1992-2014 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." @@ -149,7 +149,7 @@ Linux|GNU|GNU/*) LIBC=gnu #endif EOF - eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC'` + eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC' | sed 's, ,,g'` ;; esac @@ -826,7 +826,7 @@ EOF *:MINGW*:*) echo ${UNAME_MACHINE}-pc-mingw32 exit ;; - i*:MSYS*:*) + *:MSYS*:*) echo ${UNAME_MACHINE}-pc-msys exit ;; i*:windows32*:*) @@ -969,10 +969,10 @@ EOF eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'` test x"${CPU}" != x && { echo "${CPU}-unknown-linux-${LIBC}"; exit; } ;; - or1k:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + openrisc*:Linux:*:*) + echo or1k-unknown-linux-${LIBC} exit ;; - or32:Linux:*:*) + or32:Linux:*:* | or1k*:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-${LIBC} exit ;; padre:Linux:*:*) @@ -1260,16 +1260,26 @@ EOF if test "$UNAME_PROCESSOR" = unknown ; then UNAME_PROCESSOR=powerpc fi - if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then - if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \ - (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \ - grep IS_64BIT_ARCH >/dev/null - then - case $UNAME_PROCESSOR in - i386) UNAME_PROCESSOR=x86_64 ;; - powerpc) UNAME_PROCESSOR=powerpc64 ;; - esac + if test `echo "$UNAME_RELEASE" | sed -e 's/\..*//'` -le 10 ; then + if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then + if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \ + (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \ + grep IS_64BIT_ARCH >/dev/null + then + case $UNAME_PROCESSOR in + i386) UNAME_PROCESSOR=x86_64 ;; + powerpc) UNAME_PROCESSOR=powerpc64 ;; + esac + fi fi + elif test "$UNAME_PROCESSOR" = i386 ; then + # Avoid executing cc on OS X 10.9, as it ships with a stub + # that puts up a graphical alert prompting to install + # developer tools. Any system running Mac OS X 10.7 or + # later (Darwin 11 and later) is required to have a 64-bit + # processor. This is not true of the ARM version of Darwin + # that Apple uses in portable devices. + UNAME_PROCESSOR=x86_64 fi echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE} exit ;; @@ -1361,154 +1371,6 @@ EOF exit ;; esac -eval $set_cc_for_build -cat >$dummy.c < -# include -#endif -main () -{ -#if defined (sony) -#if defined (MIPSEB) - /* BFD wants "bsd" instead of "newsos". Perhaps BFD should be changed, - I don't know.... */ - printf ("mips-sony-bsd\n"); exit (0); -#else -#include - printf ("m68k-sony-newsos%s\n", -#ifdef NEWSOS4 - "4" -#else - "" -#endif - ); exit (0); -#endif -#endif - -#if defined (__arm) && defined (__acorn) && defined (__unix) - printf ("arm-acorn-riscix\n"); exit (0); -#endif - -#if defined (hp300) && !defined (hpux) - printf ("m68k-hp-bsd\n"); exit (0); -#endif - -#if defined (NeXT) -#if !defined (__ARCHITECTURE__) -#define __ARCHITECTURE__ "m68k" -#endif - int version; - version=`(hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null`; - if (version < 4) - printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version); - else - printf ("%s-next-openstep%d\n", __ARCHITECTURE__, version); - exit (0); -#endif - -#if defined (MULTIMAX) || defined (n16) -#if defined (UMAXV) - printf ("ns32k-encore-sysv\n"); exit (0); -#else -#if defined (CMU) - printf ("ns32k-encore-mach\n"); exit (0); -#else - printf ("ns32k-encore-bsd\n"); exit (0); -#endif -#endif -#endif - -#if defined (__386BSD__) - printf ("i386-pc-bsd\n"); exit (0); -#endif - -#if defined (sequent) -#if defined (i386) - printf ("i386-sequent-dynix\n"); exit (0); -#endif -#if defined (ns32000) - printf ("ns32k-sequent-dynix\n"); exit (0); -#endif -#endif - -#if defined (_SEQUENT_) - struct utsname un; - - uname(&un); - - if (strncmp(un.version, "V2", 2) == 0) { - printf ("i386-sequent-ptx2\n"); exit (0); - } - if (strncmp(un.version, "V1", 2) == 0) { /* XXX is V1 correct? */ - printf ("i386-sequent-ptx1\n"); exit (0); - } - printf ("i386-sequent-ptx\n"); exit (0); - -#endif - -#if defined (vax) -# if !defined (ultrix) -# include -# if defined (BSD) -# if BSD == 43 - printf ("vax-dec-bsd4.3\n"); exit (0); -# else -# if BSD == 199006 - printf ("vax-dec-bsd4.3reno\n"); exit (0); -# else - printf ("vax-dec-bsd\n"); exit (0); -# endif -# endif -# else - printf ("vax-dec-bsd\n"); exit (0); -# endif -# else - printf ("vax-dec-ultrix\n"); exit (0); -# endif -#endif - -#if defined (alliant) && defined (i860) - printf ("i860-alliant-bsd\n"); exit (0); -#endif - - exit (1); -} -EOF - -$CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null && SYSTEM_NAME=`$dummy` && - { echo "$SYSTEM_NAME"; exit; } - -# Apollos put the system type in the environment. - -test -d /usr/apollo && { echo ${ISP}-apollo-${SYSTYPE}; exit; } - -# Convex versions that predate uname can use getsysinfo(1) - -if [ -x /usr/convex/getsysinfo ] -then - case `getsysinfo -f cpu_type` in - c1*) - echo c1-convex-bsd - exit ;; - c2*) - if getsysinfo -f scalar_acc - then echo c32-convex-bsd - else echo c2-convex-bsd - fi - exit ;; - c34*) - echo c34-convex-bsd - exit ;; - c38*) - echo c38-convex-bsd - exit ;; - c4*) - echo c4-convex-bsd - exit ;; - esac -fi - cat >&2 <." version="\ GNU config.sub ($timestamp) -Copyright 1992-2013 Free Software Foundation, Inc. +Copyright 1992-2014 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." @@ -283,8 +283,10 @@ case $basic_machine in | mips64vr5900 | mips64vr5900el \ | mipsisa32 | mipsisa32el \ | mipsisa32r2 | mipsisa32r2el \ + | mipsisa32r6 | mipsisa32r6el \ | mipsisa64 | mipsisa64el \ | mipsisa64r2 | mipsisa64r2el \ + | mipsisa64r6 | mipsisa64r6el \ | mipsisa64sb1 | mipsisa64sb1el \ | mipsisa64sr71k | mipsisa64sr71kel \ | mipsr5900 | mipsr5900el \ @@ -296,8 +298,7 @@ case $basic_machine in | nds32 | nds32le | nds32be \ | nios | nios2 | nios2eb | nios2el \ | ns16k | ns32k \ - | open8 \ - | or1k | or32 \ + | open8 | or1k | or1knd | or32 \ | pdp10 | pdp11 | pj | pjl \ | powerpc | powerpc64 | powerpc64le | powerpcle \ | pyramid \ @@ -402,8 +403,10 @@ case $basic_machine in | mips64vr5900-* | mips64vr5900el-* \ | mipsisa32-* | mipsisa32el-* \ | mipsisa32r2-* | mipsisa32r2el-* \ + | mipsisa32r6-* | mipsisa32r6el-* \ | mipsisa64-* | mipsisa64el-* \ | mipsisa64r2-* | mipsisa64r2el-* \ + | mipsisa64r6-* | mipsisa64r6el-* \ | mipsisa64sb1-* | mipsisa64sb1el-* \ | mipsisa64sr71k-* | mipsisa64sr71kel-* \ | mipsr5900-* | mipsr5900el-* \ @@ -415,6 +418,7 @@ case $basic_machine in | nios-* | nios2-* | nios2eb-* | nios2el-* \ | none-* | np1-* | ns16k-* | ns32k-* \ | open8-* \ + | or1k*-* \ | orion-* \ | pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \ | powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* \ @@ -1376,7 +1380,7 @@ case $os in | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \ | -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \ | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \ - | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es*) + | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es* | -tirtos*) # Remember, each alternative MUST END IN *, to match a version number. ;; -qnx*) @@ -1400,8 +1404,6 @@ case $os in -mac*) os=`echo $os | sed -e 's|mac|macos|'` ;; - -ios*) - ;; -linux-dietlibc) os=-linux-dietlibc ;; @@ -1596,9 +1598,6 @@ case $basic_machine in mips*-*) os=-elf ;; - or1k-*) - os=-elf - ;; or32-*) os=-coff ;; From 1aa25a3ca28d8da347dc115636073493db791183 Mon Sep 17 00:00:00 2001 From: Michael Neumann Date: Tue, 5 Aug 2014 03:06:02 +0200 Subject: [PATCH 0480/3378] Support DragonFlyBSD Note that in contrast to FreeBSD, DragonFly does not work with force_lazy_lock enabled. --- configure.ac | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/configure.ac b/configure.ac index 645bd466..83c60ec5 100644 --- a/configure.ac +++ b/configure.ac @@ -283,6 +283,11 @@ case "${host}" in AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ]) force_lazy_lock="1" ;; + *-*-dragonfly*) + CFLAGS="$CFLAGS" + abi="elf" + AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ]) + ;; *-*-linux*) CFLAGS="$CFLAGS" CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE" From cf6032d0efbc2e3e9f736a8cd69846cf7427640b Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Wed, 30 Jul 2014 18:16:13 +0900 Subject: [PATCH 0481/3378] Remove ${srcroot} from cfghdrs_in, cfgoutputs_in and cfghdrs_tup in configure On Windows, srcroot may start with "drive:", which confuses autoconf's AC_CONFIG_* macros. The macros works equally well without ${srcroot}, provided some adjustment to Makefile.in. --- Makefile.in | 4 ++-- configure.ac | 46 +++++++++++++++++++++++----------------------- 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/Makefile.in b/Makefile.in index 839bb08f..a21acd45 100644 --- a/Makefile.in +++ b/Makefile.in @@ -42,9 +42,9 @@ XSLTPROC := @XSLTPROC@ AUTOCONF := @AUTOCONF@ _RPATH = @RPATH@ RPATH = $(if $(1),$(call _RPATH,$(1))) -cfghdrs_in := @cfghdrs_in@ +cfghdrs_in := $(addprefix $(srcroot),@cfghdrs_in@) cfghdrs_out := @cfghdrs_out@ -cfgoutputs_in := @cfgoutputs_in@ +cfgoutputs_in := $(addprefix $(srcroot),@cfgoutputs_in@) cfgoutputs_out := @cfgoutputs_out@ enable_autogen := @enable_autogen@ enable_code_coverage := @enable_code_coverage@ diff --git a/configure.ac b/configure.ac index 645bd466..bc3464fd 100644 --- a/configure.ac +++ b/configure.ac @@ -534,15 +534,15 @@ dnl jemalloc_protos_jet.h easy. je_="je_" AC_SUBST([je_]) -cfgoutputs_in="${srcroot}Makefile.in" -cfgoutputs_in="${cfgoutputs_in} ${srcroot}doc/html.xsl.in" -cfgoutputs_in="${cfgoutputs_in} ${srcroot}doc/manpages.xsl.in" -cfgoutputs_in="${cfgoutputs_in} ${srcroot}doc/jemalloc.xml.in" -cfgoutputs_in="${cfgoutputs_in} ${srcroot}include/jemalloc/jemalloc_macros.h.in" -cfgoutputs_in="${cfgoutputs_in} ${srcroot}include/jemalloc/jemalloc_protos.h.in" -cfgoutputs_in="${cfgoutputs_in} ${srcroot}include/jemalloc/internal/jemalloc_internal.h.in" -cfgoutputs_in="${cfgoutputs_in} ${srcroot}test/test.sh.in" -cfgoutputs_in="${cfgoutputs_in} ${srcroot}test/include/test/jemalloc_test.h.in" +cfgoutputs_in="Makefile.in" +cfgoutputs_in="${cfgoutputs_in} doc/html.xsl.in" +cfgoutputs_in="${cfgoutputs_in} doc/manpages.xsl.in" +cfgoutputs_in="${cfgoutputs_in} doc/jemalloc.xml.in" +cfgoutputs_in="${cfgoutputs_in} include/jemalloc/jemalloc_macros.h.in" +cfgoutputs_in="${cfgoutputs_in} include/jemalloc/jemalloc_protos.h.in" +cfgoutputs_in="${cfgoutputs_in} include/jemalloc/internal/jemalloc_internal.h.in" +cfgoutputs_in="${cfgoutputs_in} test/test.sh.in" +cfgoutputs_in="${cfgoutputs_in} test/include/test/jemalloc_test.h.in" cfgoutputs_out="Makefile" cfgoutputs_out="${cfgoutputs_out} doc/html.xsl" @@ -564,18 +564,18 @@ cfgoutputs_tup="${cfgoutputs_tup} include/jemalloc/internal/jemalloc_internal.h" cfgoutputs_tup="${cfgoutputs_tup} test/test.sh:test/test.sh.in" cfgoutputs_tup="${cfgoutputs_tup} test/include/test/jemalloc_test.h:test/include/test/jemalloc_test.h.in" -cfghdrs_in="${srcroot}include/jemalloc/jemalloc_defs.h.in" -cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/internal/jemalloc_internal_defs.h.in" -cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/internal/private_namespace.sh" -cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/internal/private_unnamespace.sh" -cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/internal/private_symbols.txt" -cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/internal/public_namespace.sh" -cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/internal/public_unnamespace.sh" -cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/internal/size_classes.sh" -cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/jemalloc_rename.sh" -cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/jemalloc_mangle.sh" -cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/jemalloc.sh" -cfghdrs_in="${cfghdrs_in} ${srcroot}test/include/test/jemalloc_test_defs.h.in" +cfghdrs_in="include/jemalloc/jemalloc_defs.h.in" +cfghdrs_in="${cfghdrs_in} include/jemalloc/internal/jemalloc_internal_defs.h.in" +cfghdrs_in="${cfghdrs_in} include/jemalloc/internal/private_namespace.sh" +cfghdrs_in="${cfghdrs_in} include/jemalloc/internal/private_unnamespace.sh" +cfghdrs_in="${cfghdrs_in} include/jemalloc/internal/private_symbols.txt" +cfghdrs_in="${cfghdrs_in} include/jemalloc/internal/public_namespace.sh" +cfghdrs_in="${cfghdrs_in} include/jemalloc/internal/public_unnamespace.sh" +cfghdrs_in="${cfghdrs_in} include/jemalloc/internal/size_classes.sh" +cfghdrs_in="${cfghdrs_in} include/jemalloc/jemalloc_rename.sh" +cfghdrs_in="${cfghdrs_in} include/jemalloc/jemalloc_mangle.sh" +cfghdrs_in="${cfghdrs_in} include/jemalloc/jemalloc.sh" +cfghdrs_in="${cfghdrs_in} test/include/test/jemalloc_test_defs.h.in" cfghdrs_out="include/jemalloc/jemalloc_defs.h" cfghdrs_out="${cfghdrs_out} include/jemalloc/jemalloc${install_suffix}.h" @@ -593,8 +593,8 @@ cfghdrs_out="${cfghdrs_out} include/jemalloc/internal/jemalloc_internal_defs.h" cfghdrs_out="${cfghdrs_out} test/include/test/jemalloc_test_defs.h" cfghdrs_tup="include/jemalloc/jemalloc_defs.h:include/jemalloc/jemalloc_defs.h.in" -cfghdrs_tup="${cfghdrs_tup} include/jemalloc/internal/jemalloc_internal_defs.h:${srcroot}include/jemalloc/internal/jemalloc_internal_defs.h.in" -cfghdrs_tup="${cfghdrs_tup} test/include/test/jemalloc_test_defs.h:${srcroot}test/include/test/jemalloc_test_defs.h.in" +cfghdrs_tup="${cfghdrs_tup} include/jemalloc/internal/jemalloc_internal_defs.h:include/jemalloc/internal/jemalloc_internal_defs.h.in" +cfghdrs_tup="${cfghdrs_tup} test/include/test/jemalloc_test_defs.h:test/include/test/jemalloc_test_defs.h.in" dnl Silence irrelevant compiler warnings by default. AC_ARG_ENABLE([cc-silence], From 55c9aa10386b21af92f323d04bddc15691d48756 Mon Sep 17 00:00:00 2001 From: Qinfan Wu Date: Wed, 6 Aug 2014 16:10:08 -0700 Subject: [PATCH 0482/3378] Fix the bug that causes not allocating free run with lowest address. --- src/arena.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/arena.c b/src/arena.c index d3fe0fba..db699161 100644 --- a/src/arena.c +++ b/src/arena.c @@ -101,14 +101,18 @@ arena_avail_comp(arena_chunk_map_t *a, arena_chunk_map_t *b) uintptr_t a_mapelm = (uintptr_t)a; uintptr_t b_mapelm = (uintptr_t)b; - if (a_mapelm & CHUNK_MAP_KEY) + if (a_mapelm & CHUNK_MAP_KEY) a_size = a_mapelm & ~PAGE_MASK; else a_size = arena_mapelm_to_bits(a) & ~PAGE_MASK; ret = (a_size > b_size) - (a_size < b_size); - if (ret == 0 && (!(a_mapelm & CHUNK_MAP_KEY))) - ret = (a_mapelm > b_mapelm) - (a_mapelm < b_mapelm); + if (ret == 0) { + if (!(a_mapelm & CHUNK_MAP_KEY)) + ret = (a_mapelm > b_mapelm) - (a_mapelm < b_mapelm); + else + ret = -1; + } return (ret); } From ea73eb8f3e029f0a5697e78c6771b49063cf4138 Mon Sep 17 00:00:00 2001 From: Qinfan Wu Date: Wed, 6 Aug 2014 16:43:01 -0700 Subject: [PATCH 0483/3378] Reintroduce the comment that was removed in f9ff603. --- src/arena.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/arena.c b/src/arena.c index db699161..118700b9 100644 --- a/src/arena.c +++ b/src/arena.c @@ -110,8 +110,12 @@ arena_avail_comp(arena_chunk_map_t *a, arena_chunk_map_t *b) if (ret == 0) { if (!(a_mapelm & CHUNK_MAP_KEY)) ret = (a_mapelm > b_mapelm) - (a_mapelm < b_mapelm); - else + else { + /* + * Treat keys as if they are lower than anything else. + */ ret = -1; + } } return (ret); From a2ea54c98640eafc5bb256fa4369d5553499ac81 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 6 Aug 2014 23:36:19 -0700 Subject: [PATCH 0484/3378] Add atomic operations tests and fix latent bugs. --- Makefile.in | 3 +- include/jemalloc/internal/atomic.h | 41 +++++++++---- test/unit/atomic.c | 97 ++++++++++++++++++++++++++++++ 3 files changed, 128 insertions(+), 13 deletions(-) create mode 100644 test/unit/atomic.c diff --git a/Makefile.in b/Makefile.in index a21acd45..dfafe455 100644 --- a/Makefile.in +++ b/Makefile.in @@ -110,7 +110,8 @@ C_TESTLIB_SRCS := $(srcroot)test/src/math.c $(srcroot)test/src/mtx.c \ $(srcroot)test/src/SFMT.c $(srcroot)test/src/test.c \ $(srcroot)test/src/thd.c C_UTIL_INTEGRATION_SRCS := $(srcroot)src/util.c -TESTS_UNIT := $(srcroot)test/unit/bitmap.c \ +TESTS_UNIT := $(srcroot)test/unit/atomic.c \ + $(srcroot)test/unit/bitmap.c \ $(srcroot)test/unit/ckh.c \ $(srcroot)test/unit/hash.c \ $(srcroot)test/unit/junk.c \ diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h index 11a7b47f..a0488157 100644 --- a/include/jemalloc/internal/atomic.h +++ b/include/jemalloc/internal/atomic.h @@ -18,6 +18,17 @@ /******************************************************************************/ #ifdef JEMALLOC_H_INLINES +/* + * All functions return the arithmetic result of the atomic operation. Some + * atomic operation APIs return the value prior to mutation, in which case the + * following functions must redundantly compute the result so that it can be + * returned. These functions are normally inlined, so the extra operations can + * be optimized away if the return values aren't used by the callers. + * + * atomic_add_( *p, x) { return (*p + x); } + * atomic_sub_( *p, x) { return (*p - x); } + */ + #ifndef JEMALLOC_ENABLE_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x); uint64_t atomic_sub_uint64(uint64_t *p, uint64_t x); @@ -52,14 +63,14 @@ JEMALLOC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x) { - return (InterlockedExchangeAdd64(p, x)); + return (InterlockedExchangeAdd64(p, x) + x); } JEMALLOC_INLINE uint64_t atomic_sub_uint64(uint64_t *p, uint64_t x) { - return (InterlockedExchangeAdd64(p, -((int64_t)x))); + return (InterlockedExchangeAdd64(p, -((int64_t)x)) - x); } #elif (defined(JEMALLOC_OSATOMIC)) JEMALLOC_INLINE uint64_t @@ -79,28 +90,31 @@ atomic_sub_uint64(uint64_t *p, uint64_t x) JEMALLOC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x) { + uint64_t t = x; asm volatile ( "lock; xaddq %0, %1;" - : "+r" (x), "=m" (*p) /* Outputs. */ + : "+r" (t), "=m" (*p) /* Outputs. */ : "m" (*p) /* Inputs. */ ); - return (x); + return (t + x); } JEMALLOC_INLINE uint64_t atomic_sub_uint64(uint64_t *p, uint64_t x) { + uint64_t t; x = (uint64_t)(-(int64_t)x); + t = x; asm volatile ( "lock; xaddq %0, %1;" - : "+r" (x), "=m" (*p) /* Outputs. */ + : "+r" (t), "=m" (*p) /* Outputs. */ : "m" (*p) /* Inputs. */ ); - return (x); + return (t + x); } # elif (defined(JEMALLOC_ATOMIC9)) JEMALLOC_INLINE uint64_t @@ -164,14 +178,14 @@ JEMALLOC_INLINE uint32_t atomic_add_uint32(uint32_t *p, uint32_t x) { - return (InterlockedExchangeAdd(p, x)); + return (InterlockedExchangeAdd(p, x) + x); } JEMALLOC_INLINE uint32_t atomic_sub_uint32(uint32_t *p, uint32_t x) { - return (InterlockedExchangeAdd(p, -((int32_t)x))); + return (InterlockedExchangeAdd(p, -((int32_t)x)) - x); } #elif (defined(JEMALLOC_OSATOMIC)) JEMALLOC_INLINE uint32_t @@ -191,28 +205,31 @@ atomic_sub_uint32(uint32_t *p, uint32_t x) JEMALLOC_INLINE uint32_t atomic_add_uint32(uint32_t *p, uint32_t x) { + uint32_t t = x; asm volatile ( "lock; xaddl %0, %1;" - : "+r" (x), "=m" (*p) /* Outputs. */ + : "+r" (t), "=m" (*p) /* Outputs. */ : "m" (*p) /* Inputs. */ ); - return (x); + return (t + x); } JEMALLOC_INLINE uint32_t atomic_sub_uint32(uint32_t *p, uint32_t x) { + uint32_t t; x = (uint32_t)(-(int32_t)x); + t = x; asm volatile ( "lock; xaddl %0, %1;" - : "+r" (x), "=m" (*p) /* Outputs. */ + : "+r" (t), "=m" (*p) /* Outputs. */ : "m" (*p) /* Inputs. */ ); - return (x); + return (t + x); } #elif (defined(JEMALLOC_ATOMIC9)) JEMALLOC_INLINE uint32_t diff --git a/test/unit/atomic.c b/test/unit/atomic.c new file mode 100644 index 00000000..eb6136c7 --- /dev/null +++ b/test/unit/atomic.c @@ -0,0 +1,97 @@ +#include "test/jemalloc_test.h" + +#define TEST_STRUCT(p, t) \ +struct p##_test_s { \ + t accum0; \ + t x; \ +}; \ +typedef struct p##_test_s p##_test_t; + +#define TEST_BODY(p, t, PRI) do { \ + const p##_test_t tests[] = { \ + {-1, -1}, \ + {-1, 0}, \ + {-1, 1}, \ + \ + { 0, -1}, \ + { 0, 0}, \ + { 0, 1}, \ + \ + { 1, -1}, \ + { 1, 0}, \ + { 1, 1}, \ + \ + {0, -(1 << 22)}, \ + {0, (1 << 22)}, \ + {(1 << 22), -(1 << 22)}, \ + {(1 << 22), (1 << 22)} \ + }; \ + unsigned i; \ + \ + for (i = 0; i < sizeof(tests)/sizeof(p##_test_t); i++) { \ + t accum = tests[i].accum0; \ + assert_u64_eq(atomic_read_##p(&accum), tests[i].accum0, \ + "i=%u", i); \ + assert_u64_eq(atomic_add_##p(&accum, tests[i].x), \ + tests[i].accum0 + tests[i].x, \ + "i=%u, accum=%#"PRI", x=%#"PRI, \ + i, tests[i].accum0, tests[i].x); \ + assert_u64_eq(atomic_read_##p(&accum), accum, \ + "i=%u", i); \ + \ + accum = tests[i].accum0; \ + assert_u64_eq(atomic_sub_##p(&accum, tests[i].x), \ + tests[i].accum0 - tests[i].x, \ + "i=%u, accum=%#"PRI", x=%#"PRI, \ + i, tests[i].accum0, tests[i].x); \ + assert_u64_eq(atomic_read_##p(&accum), accum, \ + "i=%u", i); \ + } \ +} while (0) + +TEST_STRUCT(uint64, uint64_t) +TEST_BEGIN(test_atomic_uint64) +{ + +#if !(LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3) + test_skip("64-bit atomic operations not supported"); +#else + TEST_BODY(uint64, uint64_t, PRIx64); +#endif +} +TEST_END + +TEST_STRUCT(uint32, uint32_t) +TEST_BEGIN(test_atomic_uint32) +{ + + TEST_BODY(uint32, uint32_t, PRIx32); +} +TEST_END + +TEST_STRUCT(z, size_t) +TEST_BEGIN(test_atomic_z) +{ + + TEST_BODY(z, size_t, "zx"); +} +TEST_END + +TEST_STRUCT(u, unsigned) +TEST_BEGIN(test_atomic_u) +{ + + TEST_BODY(u, unsigned, "x"); +} +TEST_END + +int +main(void) +{ + + return (test( + test_atomic_uint64, + test_atomic_uint32, + test_atomic_z, + test_atomic_u)); +} From 1522937e9cbcfa24c881dc439cc454f9a34a7e88 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 6 Aug 2014 23:38:39 -0700 Subject: [PATCH 0485/3378] Fix the cactive statistic. Fix the cactive statistic to decrease (rather than increase) when active memory decreases. This regression was introduced by aa5113b1fdafd1129c22512837c6c3d66c295fc8 (Refactor overly large/complex functions) and first released in 3.5.0. --- src/arena.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/arena.c b/src/arena.c index 118700b9..c0ec98a8 100644 --- a/src/arena.c +++ b/src/arena.c @@ -382,9 +382,9 @@ arena_cactive_update(arena_t *arena, size_t add_pages, size_t sub_pages) { if (config_stats) { - ssize_t cactive_diff = CHUNK_CEILING((arena->nactive + - add_pages) << LG_PAGE) - CHUNK_CEILING((arena->nactive - - sub_pages) << LG_PAGE); + ssize_t cactive_diff = CHUNK_CEILING((arena->nactive + add_pages + - sub_pages) << LG_PAGE) - CHUNK_CEILING(arena->nactive << + LG_PAGE); if (cactive_diff != 0) stats_cactive_add(cactive_diff); } From 011dde96c52e37e897526e242e9e3018caafb751 Mon Sep 17 00:00:00 2001 From: Psi Mankoski Date: Mon, 11 Aug 2014 17:08:25 -0700 Subject: [PATCH 0486/3378] Set VERSION also when the source directory is a git submodule using a ".git" file pointing to the repo. directory. --- configure.ac | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/configure.ac b/configure.ac index 9c365eda..ede5f70f 100644 --- a/configure.ac +++ b/configure.ac @@ -1029,8 +1029,8 @@ dnl ============================================================================ dnl jemalloc configuration. dnl -dnl Set VERSION if source directory has an embedded git repository. -if test -d "${srcroot}.git" ; then +dnl Set VERSION if source directory has an embedded git repository or is a git submodule. +if test -e "${srcroot}.git" ; then git describe --long --abbrev=40 > ${srcroot}VERSION fi jemalloc_version=`cat ${srcroot}VERSION` From 04d60a132beed9e8c33f73b94fb9251b919073c8 Mon Sep 17 00:00:00 2001 From: Qinfan Wu Date: Fri, 18 Jul 2014 14:21:17 -0700 Subject: [PATCH 0487/3378] Maintain all the dirty runs in a linked list for each arena --- include/jemalloc/internal/arena.h | 6 ++++ src/arena.c | 47 +++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index cb73283b..3422f362 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -89,6 +89,9 @@ struct arena_chunk_map_s { }; /* union { ... }; */ #endif + /* Linkage for list of dirty runs. */ + ql_elm(arena_chunk_map_t) dr_link; + /* * Run address (or size) and various flags are stored together. The bit * layout looks like (assuming 32-bit system): @@ -333,6 +336,9 @@ struct arena_s { /* Tree of dirty-page-containing chunks this arena manages. */ arena_chunk_tree_t chunks_dirty; + /* List of dirty runs this arena manages. */ + arena_chunk_mapelms_t runs_dirty; + /* * In order to avoid rapid chunk allocation/deallocation when an arena * oscillates right on the cusp of needing a new chunk, cache the most diff --git a/src/arena.c b/src/arena.c index c0ec98a8..33977311 100644 --- a/src/arena.c +++ b/src/arena.c @@ -394,6 +394,7 @@ static void arena_run_split_remove(arena_t *arena, arena_chunk_t *chunk, size_t run_ind, size_t flag_dirty, size_t need_pages) { + arena_chunk_map_t *mapelm; size_t total_pages, rem_pages; total_pages = arena_mapbits_unallocated_size_get(chunk, run_ind) >> @@ -404,6 +405,11 @@ arena_run_split_remove(arena_t *arena, arena_chunk_t *chunk, size_t run_ind, rem_pages = total_pages - need_pages; arena_avail_remove(arena, chunk, run_ind, total_pages, true, true); + if (flag_dirty != 0) { + /* If the run is dirty, it must be in the dirty list. */ + mapelm = arena_mapp_get(chunk, run_ind); + ql_remove(&arena->runs_dirty, mapelm, dr_link); + } arena_cactive_update(arena, need_pages, 0); arena->nactive += need_pages; @@ -416,6 +422,14 @@ arena_run_split_remove(arena_t *arena, arena_chunk_t *chunk, size_t run_ind, arena_mapbits_unallocated_set(chunk, run_ind+total_pages-1, (rem_pages << LG_PAGE), flag_dirty); + mapelm = arena_mapp_get(chunk, run_ind+need_pages); + /* + * Append the trailing run at the end of the dirty list. + * We could also insert the run at the original place. + * Let us consider this later. + */ + ql_elm_new(mapelm, dr_link); + ql_tail_insert(&arena->runs_dirty, mapelm, dr_link); } else { arena_mapbits_unallocated_set(chunk, run_ind+need_pages, (rem_pages << LG_PAGE), @@ -701,6 +715,11 @@ arena_chunk_alloc(arena_t *arena) /* Insert the run into the runs_avail tree. */ arena_avail_insert(arena, chunk, map_bias, chunk_npages-map_bias, false, false); + if (arena_mapbits_dirty_get(chunk, map_bias) != 0) { + arena_chunk_map_t *mapelm = arena_mapp_get(chunk, map_bias); + ql_elm_new(mapelm, dr_link); + ql_tail_insert(&arena->runs_dirty, mapelm, dr_link); + } return (chunk); } @@ -739,6 +758,7 @@ arena_chunk_dalloc_huge(arena_t *arena, void *chunk, size_t size) static void arena_chunk_dalloc(arena_t *arena, arena_chunk_t *chunk) { + assert(arena_mapbits_allocated_get(chunk, map_bias) == 0); assert(arena_mapbits_allocated_get(chunk, chunk_npages-1) == 0); assert(arena_mapbits_unallocated_size_get(chunk, map_bias) == @@ -754,6 +774,10 @@ arena_chunk_dalloc(arena_t *arena, arena_chunk_t *chunk) */ arena_avail_remove(arena, chunk, map_bias, chunk_npages-map_bias, false, false); + if (arena_mapbits_dirty_get(chunk, map_bias) != 0) { + arena_chunk_map_t *mapelm = arena_mapp_get(chunk, map_bias); + ql_remove(&arena->runs_dirty, mapelm, dr_link); + } if (arena->spare != NULL) { arena_chunk_t *spare = arena->spare; @@ -1216,6 +1240,13 @@ arena_run_coalesce(arena_t *arena, arena_chunk_t *chunk, size_t *p_size, arena_avail_remove(arena, chunk, run_ind+run_pages, nrun_pages, false, true); + /* If the successor is dirty, remove it from runs_dirty. */ + if (flag_dirty != 0) { + arena_chunk_map_t *mapelm = arena_mapp_get(chunk, + run_ind+run_pages); + ql_remove(&arena->runs_dirty, mapelm, dr_link); + } + size += nrun_size; run_pages += nrun_pages; @@ -1244,6 +1275,13 @@ arena_run_coalesce(arena_t *arena, arena_chunk_t *chunk, size_t *p_size, arena_avail_remove(arena, chunk, run_ind, prun_pages, true, false); + /* If the predecessor is dirty, remove it from runs_dirty. */ + if (flag_dirty != 0) { + arena_chunk_map_t *mapelm = arena_mapp_get(chunk, + run_ind); + ql_remove(&arena->runs_dirty, mapelm, dr_link); + } + size += prun_size; run_pages += prun_pages; @@ -1261,6 +1299,7 @@ static void arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned) { arena_chunk_t *chunk; + arena_chunk_map_t *mapelm; size_t size, run_ind, run_pages, flag_dirty; chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); @@ -1315,6 +1354,13 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned) arena_mapbits_dirty_get(chunk, run_ind+run_pages-1)); arena_avail_insert(arena, chunk, run_ind, run_pages, true, true); + if (dirty) { + /* Insert into runs_dirty list. */ + mapelm = arena_mapp_get(chunk, run_ind); + ql_elm_new(mapelm, dr_link); + ql_tail_insert(&arena->runs_dirty, mapelm, dr_link); + } + /* Deallocate chunk if it is now completely unused. */ if (size == arena_maxclass) { assert(run_ind == map_bias); @@ -2437,6 +2483,7 @@ arena_new(arena_t *arena, unsigned ind) /* Initialize chunks. */ arena_chunk_dirty_new(&arena->chunks_dirty); + ql_new(&arena->runs_dirty); arena->spare = NULL; arena->nactive = 0; From a244e5078e8505978b5f63cfe6dcb3c9d63d2cb5 Mon Sep 17 00:00:00 2001 From: Qinfan Wu Date: Mon, 21 Jul 2014 10:23:36 -0700 Subject: [PATCH 0488/3378] Add dirty page counting for debug --- src/arena.c | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/src/arena.c b/src/arena.c index 33977311..3cf1abf6 100644 --- a/src/arena.c +++ b/src/arena.c @@ -923,11 +923,33 @@ arena_maybe_purge(arena_t *arena) static arena_chunk_t * chunks_dirty_iter_cb(arena_chunk_tree_t *tree, arena_chunk_t *chunk, void *arg) { - size_t *ndirty = (size_t *)arg; + size_t *ndirty = (size_t *)arg; - assert(chunk->ndirty != 0); - *ndirty += chunk->ndirty; - return (NULL); + assert(chunk->ndirty != 0); + *ndirty += chunk->ndirty; + return (NULL); +} + +static size_t +arena_dirty_count(arena_t *arena) +{ + size_t ndirty = 0; + arena_chunk_map_t *mapelm; + arena_chunk_t *chunk; + size_t pageind, npages; + + ql_foreach(mapelm, &arena->runs_dirty, dr_link) { + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(mapelm); + pageind = arena_mapelm_to_pageind(mapelm); + assert(arena_mapbits_allocated_get(chunk, pageind) == 0); + assert(arena_mapbits_large_get(chunk, pageind) == 0); + assert(arena_mapbits_dirty_get(chunk, pageind) != 0); + npages = arena_mapbits_unallocated_size_get(chunk, pageind) >> + LG_PAGE; + ndirty += npages; + } + + return (ndirty); } static size_t @@ -1134,6 +1156,9 @@ arena_purge(arena_t *arena, bool all) arena_chunk_dirty_iter(&arena->chunks_dirty, NULL, chunks_dirty_iter_cb, (void *)&ndirty); assert(ndirty == arena->ndirty); + + ndirty = arena_dirty_count(arena); + assert(ndirty == arena->ndirty); } assert(arena->ndirty > arena->npurgatory || all); assert((arena->nactive >> opt_lg_dirty_mult) < (arena->ndirty - From e970800c780df918b80f8b914eeac475dd5f1ec4 Mon Sep 17 00:00:00 2001 From: Qinfan Wu Date: Mon, 21 Jul 2014 18:09:04 -0700 Subject: [PATCH 0489/3378] Purge dirty pages from the beginning of the dirty list. --- src/arena.c | 227 +++++++++++++++------------------------------------- 1 file changed, 66 insertions(+), 161 deletions(-) diff --git a/src/arena.c b/src/arena.c index 3cf1abf6..a78a66f6 100644 --- a/src/arena.c +++ b/src/arena.c @@ -973,86 +973,73 @@ arena_compute_npurgatory(arena_t *arena, bool all) return (npurgatory); } -static void -arena_chunk_stash_dirty(arena_t *arena, arena_chunk_t *chunk, bool all, +static size_t +arena_stash_dirty(arena_t *arena, bool all, size_t npurgatory, arena_chunk_mapelms_t *mapelms) { - size_t pageind, npages; + arena_chunk_map_t *mapelm; + size_t nstashed = 0; + arena_chunk_t *chunk; + size_t pageind, npages, run_size; + arena_run_t *run; - /* - * Temporarily allocate free dirty runs within chunk. If all is false, - * only operate on dirty runs that are fragments; otherwise operate on - * all dirty runs. - */ - for (pageind = map_bias; pageind < chunk_npages; pageind += npages) { - arena_chunk_map_t *mapelm = arena_mapp_get(chunk, pageind); - if (arena_mapbits_allocated_get(chunk, pageind) == 0) { - size_t run_size = - arena_mapbits_unallocated_size_get(chunk, pageind); + /* Add at least npurgatory pages to purge_list. */ + for (mapelm = ql_first(&arena->runs_dirty); mapelm != NULL; + mapelm = ql_first(&arena->runs_dirty)) { + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(mapelm); + pageind = arena_mapelm_to_pageind(mapelm); + run_size = arena_mapbits_unallocated_size_get(chunk, pageind); + npages = run_size >> LG_PAGE; + run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)(pageind << + LG_PAGE)); - npages = run_size >> LG_PAGE; - assert(pageind + npages <= chunk_npages); - assert(arena_mapbits_dirty_get(chunk, pageind) == - arena_mapbits_dirty_get(chunk, pageind+npages-1)); + assert(pageind + npages <= chunk_npages); + assert(arena_mapbits_dirty_get(chunk, pageind) == + arena_mapbits_dirty_get(chunk, pageind+npages-1)); - if (arena_mapbits_dirty_get(chunk, pageind) != 0 && - (all || arena_avail_adjac(chunk, pageind, - npages))) { - arena_run_t *run = (arena_run_t *)((uintptr_t) - chunk + (uintptr_t)(pageind << LG_PAGE)); + /* Temporarily allocate the free dirty run. */ + arena_run_split_large(arena, run, run_size, false); + /* Append to purge_list for later processing. */ + ql_elm_new(mapelm, dr_link); + ql_tail_insert(mapelms, mapelm, dr_link); - arena_run_split_large(arena, run, run_size, - false); - /* Append to list for later processing. */ - ql_elm_new(mapelm, u.ql_link); - ql_tail_insert(mapelms, mapelm, u.ql_link); - } - } else { - /* Skip run. */ - if (arena_mapbits_large_get(chunk, pageind) != 0) { - npages = arena_mapbits_large_size_get(chunk, - pageind) >> LG_PAGE; - } else { - size_t binind; - arena_bin_info_t *bin_info; - arena_run_t *run = (arena_run_t *)((uintptr_t) - chunk + (uintptr_t)(pageind << LG_PAGE)); + nstashed += npages; - assert(arena_mapbits_small_runind_get(chunk, - pageind) == 0); - binind = arena_bin_index(arena, run->bin); - bin_info = &arena_bin_info[binind]; - npages = bin_info->run_size >> LG_PAGE; - } - } + if (all == false && nstashed >= npurgatory) + break; } - assert(pageind == chunk_npages); - assert(chunk->ndirty == 0 || all == false); - assert(chunk->nruns_adjac == 0); + + return (nstashed); } static size_t -arena_chunk_purge_stashed(arena_t *arena, arena_chunk_t *chunk, - arena_chunk_mapelms_t *mapelms) +arena_purge_stashed(arena_t *arena, arena_chunk_mapelms_t *mapelms) { - size_t npurged, pageind, npages, nmadvise; + size_t npurged, nmadvise; arena_chunk_map_t *mapelm; + arena_chunk_t *chunk; + size_t pageind, npages, run_size; - malloc_mutex_unlock(&arena->lock); if (config_stats) nmadvise = 0; npurged = 0; - ql_foreach(mapelm, mapelms, u.ql_link) { + + malloc_mutex_unlock(&arena->lock); + + ql_foreach(mapelm, mapelms, dr_link) { bool unzeroed; size_t flag_unzeroed, i; + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(mapelm); pageind = arena_mapelm_to_pageind(mapelm); - npages = arena_mapbits_large_size_get(chunk, pageind) >> - LG_PAGE; + run_size = arena_mapbits_large_size_get(chunk, pageind); + npages = run_size >> LG_PAGE; + assert(pageind + npages <= chunk_npages); unzeroed = pages_purge((void *)((uintptr_t)chunk + (pageind << - LG_PAGE)), (npages << LG_PAGE)); + LG_PAGE)), run_size); flag_unzeroed = unzeroed ? CHUNK_MAP_UNZEROED : 0; + /* * Set the unzeroed flag for all pages, now that pages_purge() * has returned whether the pages were zeroed as a side effect @@ -1067,89 +1054,48 @@ arena_chunk_purge_stashed(arena_t *arena, arena_chunk_t *chunk, arena_mapbits_unzeroed_set(chunk, pageind+i, flag_unzeroed); } + npurged += npages; if (config_stats) nmadvise++; } + malloc_mutex_lock(&arena->lock); - if (config_stats) + + if (config_stats) { arena->stats.nmadvise += nmadvise; + arena->stats.purged += npurged; + } return (npurged); } static void -arena_chunk_unstash_purged(arena_t *arena, arena_chunk_t *chunk, - arena_chunk_mapelms_t *mapelms) +arena_unstash_purged(arena_t *arena, arena_chunk_mapelms_t *mapelms) { arena_chunk_map_t *mapelm; + arena_chunk_t *chunk; + arena_run_t *run; size_t pageind; /* Deallocate runs. */ for (mapelm = ql_first(mapelms); mapelm != NULL; mapelm = ql_first(mapelms)) { - arena_run_t *run; - + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(mapelm); pageind = arena_mapelm_to_pageind(mapelm); run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)(pageind << LG_PAGE)); - ql_remove(mapelms, mapelm, u.ql_link); + ql_remove(mapelms, mapelm, dr_link); arena_run_dalloc(arena, run, false, true); } } -static inline size_t -arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk, bool all) -{ - size_t npurged; - arena_chunk_mapelms_t mapelms; - - ql_new(&mapelms); - - /* - * If chunk is the spare, temporarily re-allocate it, 1) so that its - * run is reinserted into runs_avail, and 2) so that it cannot be - * completely discarded by another thread while arena->lock is dropped - * by this thread. Note that the arena_run_dalloc() call will - * implicitly deallocate the chunk, so no explicit action is required - * in this function to deallocate the chunk. - * - * Note that once a chunk contains dirty pages, it cannot again contain - * a single run unless 1) it is a dirty run, or 2) this function purges - * dirty pages and causes the transition to a single clean run. Thus - * (chunk == arena->spare) is possible, but it is not possible for - * this function to be called on the spare unless it contains a dirty - * run. - */ - if (chunk == arena->spare) { - assert(arena_mapbits_dirty_get(chunk, map_bias) != 0); - assert(arena_mapbits_dirty_get(chunk, chunk_npages-1) != 0); - - arena_chunk_alloc(arena); - } - - if (config_stats) - arena->stats.purged += chunk->ndirty; - - /* - * Operate on all dirty runs if there is no clean/dirty run - * fragmentation. - */ - if (chunk->nruns_adjac == 0) - all = true; - - arena_chunk_stash_dirty(arena, chunk, all, &mapelms); - npurged = arena_chunk_purge_stashed(arena, chunk, &mapelms); - arena_chunk_unstash_purged(arena, chunk, &mapelms); - - return (npurged); -} - -static void +void arena_purge(arena_t *arena, bool all) { - arena_chunk_t *chunk; - size_t npurgatory; + size_t npurgatory, npurgeable, npurged; + arena_chunk_mapelms_t purge_list; + if (config_debug) { size_t ndirty = 0; @@ -1175,58 +1121,17 @@ arena_purge(arena_t *arena, bool all) npurgatory = arena_compute_npurgatory(arena, all); arena->npurgatory += npurgatory; - while (npurgatory > 0) { - size_t npurgeable, npurged, nunpurged; + ql_new(&purge_list); - /* Get next chunk with dirty pages. */ - chunk = arena_chunk_dirty_first(&arena->chunks_dirty); - if (chunk == NULL) { - /* - * This thread was unable to purge as many pages as - * originally intended, due to races with other threads - * that either did some of the purging work, or re-used - * dirty pages. - */ - arena->npurgatory -= npurgatory; - return; - } - npurgeable = chunk->ndirty; - assert(npurgeable != 0); + npurgeable = arena_stash_dirty(arena, all, npurgatory, &purge_list); + assert(npurgeable >= npurgatory); + /* Actually we no longer need arena->npurgatory. */ + arena->npurgatory -= npurgatory; - if (npurgeable > npurgatory && chunk->nruns_adjac == 0) { - /* - * This thread will purge all the dirty pages in chunk, - * so set npurgatory to reflect this thread's intent to - * purge the pages. This tends to reduce the chances - * of the following scenario: - * - * 1) This thread sets arena->npurgatory such that - * (arena->ndirty - arena->npurgatory) is at the - * threshold. - * 2) This thread drops arena->lock. - * 3) Another thread causes one or more pages to be - * dirtied, and immediately determines that it must - * purge dirty pages. - * - * If this scenario *does* play out, that's okay, - * because all of the purging work being done really - * needs to happen. - */ - arena->npurgatory += npurgeable - npurgatory; - npurgatory = npurgeable; - } + npurged = arena_purge_stashed(arena, &purge_list); + assert(npurged == npurgeable); - /* - * Keep track of how many pages are purgeable, versus how many - * actually get purged, and adjust counters accordingly. - */ - arena->npurgatory -= npurgeable; - npurgatory -= npurgeable; - npurged = arena_chunk_purge(arena, chunk, all); - nunpurged = npurgeable - npurged; - arena->npurgatory += nunpurged; - npurgatory += nunpurged; - } + arena_unstash_purged(arena, &purge_list); } void From 90737fcda150a5da3f4db1c3144ea24eed8de55b Mon Sep 17 00:00:00 2001 From: Qinfan Wu Date: Mon, 21 Jul 2014 19:39:20 -0700 Subject: [PATCH 0490/3378] Remove chunks_dirty tree, nruns_avail and nruns_adjac since we no longer need to maintain the tree for dirty page purging. --- include/jemalloc/internal/arena.h | 19 --- src/arena.c | 187 ++---------------------------- 2 files changed, 10 insertions(+), 196 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 3422f362..f87dfe4d 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -167,24 +167,9 @@ struct arena_chunk_s { /* Arena that owns the chunk. */ arena_t *arena; - /* Linkage for tree of arena chunks that contain dirty runs. */ - rb_node(arena_chunk_t) dirty_link; - /* Number of dirty pages. */ size_t ndirty; - /* Number of available runs. */ - size_t nruns_avail; - - /* - * Number of available run adjacencies that purging could coalesce. - * Clean and dirty available runs are not coalesced, which causes - * virtual memory fragmentation. The ratio of - * (nruns_avail-nruns_adjac):nruns_adjac is used for tracking this - * fragmentation. - */ - size_t nruns_adjac; - /* * Map of pages within chunk that keeps track of free/large/small. The * first map_bias entries are omitted, since the chunk header does not @@ -193,7 +178,6 @@ struct arena_chunk_s { */ arena_chunk_map_t map[1]; /* Dynamically sized. */ }; -typedef rb_tree(arena_chunk_t) arena_chunk_tree_t; struct arena_run_s { /* Bin this run is associated with. */ @@ -333,9 +317,6 @@ struct arena_s { dss_prec_t dss_prec; - /* Tree of dirty-page-containing chunks this arena manages. */ - arena_chunk_tree_t chunks_dirty; - /* List of dirty runs this arena manages. */ arena_chunk_mapelms_t runs_dirty; diff --git a/src/arena.c b/src/arena.c index a78a66f6..24ed2ba2 100644 --- a/src/arena.c +++ b/src/arena.c @@ -125,143 +125,18 @@ arena_avail_comp(arena_chunk_map_t *a, arena_chunk_map_t *b) rb_gen(static UNUSED, arena_avail_tree_, arena_avail_tree_t, arena_chunk_map_t, u.rb_link, arena_avail_comp) -static inline int -arena_chunk_dirty_comp(arena_chunk_t *a, arena_chunk_t *b) -{ - - assert(a != NULL); - assert(b != NULL); - - /* - * Short-circuit for self comparison. The following comparison code - * would come to the same result, but at the cost of executing the slow - * path. - */ - if (a == b) - return (0); - - /* - * Order such that chunks with higher fragmentation are "less than" - * those with lower fragmentation -- purging order is from "least" to - * "greatest". Fragmentation is measured as: - * - * mean current avail run size - * -------------------------------- - * mean defragmented avail run size - * - * navail - * ----------- - * nruns_avail nruns_avail-nruns_adjac - * = ========================= = ----------------------- - * navail nruns_avail - * ----------------------- - * nruns_avail-nruns_adjac - * - * The following code multiplies away the denominator prior to - * comparison, in order to avoid division. - * - */ - { - size_t a_val = (a->nruns_avail - a->nruns_adjac) * - b->nruns_avail; - size_t b_val = (b->nruns_avail - b->nruns_adjac) * - a->nruns_avail; - - if (a_val < b_val) - return (1); - if (a_val > b_val) - return (-1); - } - /* - * Break ties by chunk address. For fragmented chunks, report lower - * addresses as "lower", so that fragmentation reduction happens first - * at lower addresses. However, use the opposite ordering for - * unfragmented chunks, in order to increase the chances of - * re-allocating dirty runs. - */ - { - uintptr_t a_chunk = (uintptr_t)a; - uintptr_t b_chunk = (uintptr_t)b; - int ret = ((a_chunk > b_chunk) - (a_chunk < b_chunk)); - if (a->nruns_adjac == 0) { - assert(b->nruns_adjac == 0); - ret = -ret; - } - return (ret); - } -} - -/* Generate red-black tree functions. */ -rb_gen(static UNUSED, arena_chunk_dirty_, arena_chunk_tree_t, arena_chunk_t, - dirty_link, arena_chunk_dirty_comp) - -static inline bool -arena_avail_adjac_pred(arena_chunk_t *chunk, size_t pageind) -{ - bool ret; - - if (pageind-1 < map_bias) - ret = false; - else { - ret = (arena_mapbits_allocated_get(chunk, pageind-1) == 0); - assert(ret == false || arena_mapbits_dirty_get(chunk, - pageind-1) != arena_mapbits_dirty_get(chunk, pageind)); - } - return (ret); -} - -static inline bool -arena_avail_adjac_succ(arena_chunk_t *chunk, size_t pageind, size_t npages) -{ - bool ret; - - if (pageind+npages == chunk_npages) - ret = false; - else { - assert(pageind+npages < chunk_npages); - ret = (arena_mapbits_allocated_get(chunk, pageind+npages) == 0); - assert(ret == false || arena_mapbits_dirty_get(chunk, pageind) - != arena_mapbits_dirty_get(chunk, pageind+npages)); - } - return (ret); -} - -static inline bool -arena_avail_adjac(arena_chunk_t *chunk, size_t pageind, size_t npages) -{ - - return (arena_avail_adjac_pred(chunk, pageind) || - arena_avail_adjac_succ(chunk, pageind, npages)); -} - static void arena_avail_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind, - size_t npages, bool maybe_adjac_pred, bool maybe_adjac_succ) + size_t npages) { assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >> LG_PAGE)); - /* - * chunks_dirty is keyed by nruns_{avail,adjac}, so the chunk must be - * removed and reinserted even if the run to be inserted is clean. - */ - if (chunk->ndirty != 0) - arena_chunk_dirty_remove(&arena->chunks_dirty, chunk); - - if (maybe_adjac_pred && arena_avail_adjac_pred(chunk, pageind)) - chunk->nruns_adjac++; - if (maybe_adjac_succ && arena_avail_adjac_succ(chunk, pageind, npages)) - chunk->nruns_adjac++; - chunk->nruns_avail++; - assert(chunk->nruns_avail > chunk->nruns_adjac); - if (arena_mapbits_dirty_get(chunk, pageind) != 0) { arena->ndirty += npages; chunk->ndirty += npages; } - if (chunk->ndirty != 0) - arena_chunk_dirty_insert(&arena->chunks_dirty, chunk); arena_avail_tree_insert(&arena->runs_avail, arena_mapp_get(chunk, pageind)); @@ -269,33 +144,16 @@ arena_avail_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind, static void arena_avail_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind, - size_t npages, bool maybe_adjac_pred, bool maybe_adjac_succ) + size_t npages) { assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >> LG_PAGE)); - /* - * chunks_dirty is keyed by nruns_{avail,adjac}, so the chunk must be - * removed and reinserted even if the run to be removed is clean. - */ - if (chunk->ndirty != 0) - arena_chunk_dirty_remove(&arena->chunks_dirty, chunk); - - if (maybe_adjac_pred && arena_avail_adjac_pred(chunk, pageind)) - chunk->nruns_adjac--; - if (maybe_adjac_succ && arena_avail_adjac_succ(chunk, pageind, npages)) - chunk->nruns_adjac--; - chunk->nruns_avail--; - assert(chunk->nruns_avail > chunk->nruns_adjac || (chunk->nruns_avail - == 0 && chunk->nruns_adjac == 0)); - if (arena_mapbits_dirty_get(chunk, pageind) != 0) { arena->ndirty -= npages; chunk->ndirty -= npages; } - if (chunk->ndirty != 0) - arena_chunk_dirty_insert(&arena->chunks_dirty, chunk); arena_avail_tree_remove(&arena->runs_avail, arena_mapp_get(chunk, pageind)); @@ -404,7 +262,7 @@ arena_run_split_remove(arena_t *arena, arena_chunk_t *chunk, size_t run_ind, assert(need_pages <= total_pages); rem_pages = total_pages - need_pages; - arena_avail_remove(arena, chunk, run_ind, total_pages, true, true); + arena_avail_remove(arena, chunk, run_ind, total_pages); if (flag_dirty != 0) { /* If the run is dirty, it must be in the dirty list. */ mapelm = arena_mapp_get(chunk, run_ind); @@ -440,8 +298,7 @@ arena_run_split_remove(arena_t *arena, arena_chunk_t *chunk, size_t run_ind, arena_mapbits_unzeroed_get(chunk, run_ind+total_pages-1)); } - arena_avail_insert(arena, chunk, run_ind+need_pages, rem_pages, - false, true); + arena_avail_insert(arena, chunk, run_ind+need_pages, rem_pages); } } @@ -660,9 +517,6 @@ arena_chunk_init_hard(arena_t *arena) */ chunk->ndirty = 0; - chunk->nruns_avail = 0; - chunk->nruns_adjac = 0; - /* * Initialize the map to contain one maximal free untouched run. Mark * the pages as zeroed iff chunk_alloc() returned a zeroed chunk. @@ -713,8 +567,7 @@ arena_chunk_alloc(arena_t *arena) } /* Insert the run into the runs_avail tree. */ - arena_avail_insert(arena, chunk, map_bias, chunk_npages-map_bias, - false, false); + arena_avail_insert(arena, chunk, map_bias, chunk_npages-map_bias); if (arena_mapbits_dirty_get(chunk, map_bias) != 0) { arena_chunk_map_t *mapelm = arena_mapp_get(chunk, map_bias); ql_elm_new(mapelm, dr_link); @@ -772,8 +625,7 @@ arena_chunk_dalloc(arena_t *arena, arena_chunk_t *chunk) * Remove run from the runs_avail tree, so that the arena does not use * it. */ - arena_avail_remove(arena, chunk, map_bias, chunk_npages-map_bias, - false, false); + arena_avail_remove(arena, chunk, map_bias, chunk_npages-map_bias); if (arena_mapbits_dirty_get(chunk, map_bias) != 0) { arena_chunk_map_t *mapelm = arena_mapp_get(chunk, map_bias); ql_remove(&arena->runs_dirty, mapelm, dr_link); @@ -920,16 +772,6 @@ arena_maybe_purge(arena_t *arena) arena_purge(arena, false); } -static arena_chunk_t * -chunks_dirty_iter_cb(arena_chunk_tree_t *tree, arena_chunk_t *chunk, void *arg) -{ - size_t *ndirty = (size_t *)arg; - - assert(chunk->ndirty != 0); - *ndirty += chunk->ndirty; - return (NULL); -} - static size_t arena_dirty_count(arena_t *arena) { @@ -1097,13 +939,7 @@ arena_purge(arena_t *arena, bool all) arena_chunk_mapelms_t purge_list; if (config_debug) { - size_t ndirty = 0; - - arena_chunk_dirty_iter(&arena->chunks_dirty, NULL, - chunks_dirty_iter_cb, (void *)&ndirty); - assert(ndirty == arena->ndirty); - - ndirty = arena_dirty_count(arena); + size_t ndirty = arena_dirty_count(arena); assert(ndirty == arena->ndirty); } assert(arena->ndirty > arena->npurgatory || all); @@ -1167,8 +1003,7 @@ arena_run_coalesce(arena_t *arena, arena_chunk_t *chunk, size_t *p_size, run_ind+run_pages+nrun_pages-1) == nrun_size); assert(arena_mapbits_dirty_get(chunk, run_ind+run_pages+nrun_pages-1) == flag_dirty); - arena_avail_remove(arena, chunk, run_ind+run_pages, nrun_pages, - false, true); + arena_avail_remove(arena, chunk, run_ind+run_pages, nrun_pages); /* If the successor is dirty, remove it from runs_dirty. */ if (flag_dirty != 0) { @@ -1202,8 +1037,7 @@ arena_run_coalesce(arena_t *arena, arena_chunk_t *chunk, size_t *p_size, assert(arena_mapbits_unallocated_size_get(chunk, run_ind) == prun_size); assert(arena_mapbits_dirty_get(chunk, run_ind) == flag_dirty); - arena_avail_remove(arena, chunk, run_ind, prun_pages, true, - false); + arena_avail_remove(arena, chunk, run_ind, prun_pages); /* If the predecessor is dirty, remove it from runs_dirty. */ if (flag_dirty != 0) { @@ -1282,7 +1116,7 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned) arena_mapbits_unallocated_size_get(chunk, run_ind+run_pages-1)); assert(arena_mapbits_dirty_get(chunk, run_ind) == arena_mapbits_dirty_get(chunk, run_ind+run_pages-1)); - arena_avail_insert(arena, chunk, run_ind, run_pages, true, true); + arena_avail_insert(arena, chunk, run_ind, run_pages); if (dirty) { /* Insert into runs_dirty list. */ @@ -2412,7 +2246,6 @@ arena_new(arena_t *arena, unsigned ind) arena->dss_prec = chunk_dss_prec_get(); /* Initialize chunks. */ - arena_chunk_dirty_new(&arena->chunks_dirty); ql_new(&arena->runs_dirty); arena->spare = NULL; From e8a2fd83a2ddc082fcd4e49373ea05bd79213c71 Mon Sep 17 00:00:00 2001 From: Qinfan Wu Date: Mon, 21 Jul 2014 20:00:14 -0700 Subject: [PATCH 0491/3378] arena->npurgatory is no longer needed since we drop arena's lock after stashing all the purgeable runs. --- include/jemalloc/internal/arena.h | 8 -------- src/arena.c | 15 +++------------ 2 files changed, 3 insertions(+), 20 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index f87dfe4d..1e2e9876 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -343,14 +343,6 @@ struct arena_s { */ size_t ndirty; - /* - * Approximate number of pages being purged. It is possible for - * multiple threads to purge dirty pages concurrently, and they use - * npurgatory to indicate the total number of pages all threads are - * attempting to purge. - */ - size_t npurgatory; - /* * Size/address-ordered trees of this arena's available runs. The trees * are used for first-best-fit run allocation. diff --git a/src/arena.c b/src/arena.c index 24ed2ba2..68b156bf 100644 --- a/src/arena.c +++ b/src/arena.c @@ -757,10 +757,7 @@ arena_maybe_purge(arena_t *arena) /* Don't purge if the option is disabled. */ if (opt_lg_dirty_mult < 0) return; - /* Don't purge if all dirty pages are already being purged. */ - if (arena->ndirty <= arena->npurgatory) - return; - npurgeable = arena->ndirty - arena->npurgatory; + npurgeable = arena->ndirty; threshold = (arena->nactive >> opt_lg_dirty_mult); /* * Don't purge unless the number of purgeable pages exceeds the @@ -803,7 +800,7 @@ arena_compute_npurgatory(arena_t *arena, bool all) * Compute the minimum number of pages that this thread should try to * purge. */ - npurgeable = arena->ndirty - arena->npurgatory; + npurgeable = arena->ndirty; if (all == false) { size_t threshold = (arena->nactive >> opt_lg_dirty_mult); @@ -942,9 +939,7 @@ arena_purge(arena_t *arena, bool all) size_t ndirty = arena_dirty_count(arena); assert(ndirty == arena->ndirty); } - assert(arena->ndirty > arena->npurgatory || all); - assert((arena->nactive >> opt_lg_dirty_mult) < (arena->ndirty - - arena->npurgatory) || all); + assert((arena->nactive >> opt_lg_dirty_mult) < arena->ndirty || all); if (config_stats) arena->stats.npurge++; @@ -955,14 +950,11 @@ arena_purge(arena_t *arena, bool all) * reduce ndirty below the threshold. */ npurgatory = arena_compute_npurgatory(arena, all); - arena->npurgatory += npurgatory; ql_new(&purge_list); npurgeable = arena_stash_dirty(arena, all, npurgatory, &purge_list); assert(npurgeable >= npurgatory); - /* Actually we no longer need arena->npurgatory. */ - arena->npurgatory -= npurgatory; npurged = arena_purge_stashed(arena, &purge_list); assert(npurged == npurgeable); @@ -2251,7 +2243,6 @@ arena_new(arena_t *arena, unsigned ind) arena->nactive = 0; arena->ndirty = 0; - arena->npurgatory = 0; arena_avail_tree_new(&arena->runs_avail); From 070b3c3fbd90296610005c111ec6060e8bb23d31 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 14 Aug 2014 14:45:58 -0700 Subject: [PATCH 0492/3378] Fix and refactor runs_dirty-based purging. Fix runs_dirty-based purging to also purge dirty pages in the spare chunk. Refactor runs_dirty manipulation into arena_dirty_{insert,remove}(), and move the arena->ndirty accounting into those functions. Remove the u.ql_link field from arena_chunk_map_t, and get rid of the enclosing union for u.rb_link, since only rb_link remains. Remove the ndirty field from arena_chunk_t. --- include/jemalloc/internal/arena.h | 34 ++---- src/arena.c | 184 +++++++++++++----------------- 2 files changed, 91 insertions(+), 127 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 1e2e9876..9351e3b0 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -65,23 +65,14 @@ struct arena_chunk_map_s { */ union { #endif - union { - /* - * Linkage for run trees. There are two disjoint uses: - * - * 1) arena_t's runs_avail tree. - * 2) arena_run_t conceptually uses this linkage for in-use - * non-full runs, rather than directly embedding linkage. - */ - rb_node(arena_chunk_map_t) rb_link; - /* - * List of runs currently in purgatory. arena_chunk_purge() - * temporarily allocates runs that contain dirty pages while - * purging, so that other threads cannot use the runs while the - * purging thread is operating without the arena lock held. - */ - ql_elm(arena_chunk_map_t) ql_link; - } u; + /* + * Linkage for run trees. There are two disjoint uses: + * + * 1) arena_t's runs_avail tree. + * 2) arena_run_t conceptually uses this linkage for in-use non-full + * runs, rather than directly embedding linkage. + */ + rb_node(arena_chunk_map_t) rb_link; /* Profile counters, used for large object runs. */ prof_ctx_t *prof_ctx; @@ -167,9 +158,6 @@ struct arena_chunk_s { /* Arena that owns the chunk. */ arena_t *arena; - /* Number of dirty pages. */ - size_t ndirty; - /* * Map of pages within chunk that keeps track of free/large/small. The * first map_bias entries are omitted, since the chunk header does not @@ -317,9 +305,6 @@ struct arena_s { dss_prec_t dss_prec; - /* List of dirty runs this arena manages. */ - arena_chunk_mapelms_t runs_dirty; - /* * In order to avoid rapid chunk allocation/deallocation when an arena * oscillates right on the cusp of needing a new chunk, cache the most @@ -349,6 +334,9 @@ struct arena_s { */ arena_avail_tree_t runs_avail; + /* List of dirty runs this arena manages. */ + arena_chunk_mapelms_t runs_dirty; + /* * user-configureable chunk allocation and deallocation functions. */ diff --git a/src/arena.c b/src/arena.c index 68b156bf..1263269e 100644 --- a/src/arena.c +++ b/src/arena.c @@ -90,7 +90,7 @@ arena_run_comp(arena_chunk_map_t *a, arena_chunk_map_t *b) /* Generate red-black tree functions. */ rb_gen(static UNUSED, arena_run_tree_, arena_run_tree_t, arena_chunk_map_t, - u.rb_link, arena_run_comp) + rb_link, arena_run_comp) static inline int arena_avail_comp(arena_chunk_map_t *a, arena_chunk_map_t *b) @@ -123,7 +123,7 @@ arena_avail_comp(arena_chunk_map_t *a, arena_chunk_map_t *b) /* Generate red-black tree functions. */ rb_gen(static UNUSED, arena_avail_tree_, arena_avail_tree_t, arena_chunk_map_t, - u.rb_link, arena_avail_comp) + rb_link, arena_avail_comp) static void arena_avail_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind, @@ -132,12 +132,6 @@ arena_avail_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind, assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >> LG_PAGE)); - - if (arena_mapbits_dirty_get(chunk, pageind) != 0) { - arena->ndirty += npages; - chunk->ndirty += npages; - } - arena_avail_tree_insert(&arena->runs_avail, arena_mapp_get(chunk, pageind)); } @@ -149,16 +143,39 @@ arena_avail_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind, assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >> LG_PAGE)); - - if (arena_mapbits_dirty_get(chunk, pageind) != 0) { - arena->ndirty -= npages; - chunk->ndirty -= npages; - } - arena_avail_tree_remove(&arena->runs_avail, arena_mapp_get(chunk, pageind)); } +static void +arena_dirty_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind, + size_t npages) +{ + arena_chunk_map_t *mapelm = arena_mapp_get(chunk, pageind); + assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >> + LG_PAGE)); + assert(arena_mapbits_dirty_get(chunk, pageind) == CHUNK_MAP_DIRTY); + assert(arena_mapbits_dirty_get(chunk, pageind+npages-1) == + CHUNK_MAP_DIRTY); + ql_elm_new(mapelm, dr_link); + ql_tail_insert(&arena->runs_dirty, mapelm, dr_link); + arena->ndirty += npages; +} + +static void +arena_dirty_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind, + size_t npages) +{ + arena_chunk_map_t *mapelm = arena_mapp_get(chunk, pageind); + assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >> + LG_PAGE)); + assert(arena_mapbits_dirty_get(chunk, pageind) == CHUNK_MAP_DIRTY); + assert(arena_mapbits_dirty_get(chunk, pageind+npages-1) == + CHUNK_MAP_DIRTY); + ql_remove(&arena->runs_dirty, mapelm, dr_link); + arena->ndirty -= npages; +} + static inline void * arena_run_reg_alloc(arena_run_t *run, arena_bin_info_t *bin_info) { @@ -252,7 +269,6 @@ static void arena_run_split_remove(arena_t *arena, arena_chunk_t *chunk, size_t run_ind, size_t flag_dirty, size_t need_pages) { - arena_chunk_map_t *mapelm; size_t total_pages, rem_pages; total_pages = arena_mapbits_unallocated_size_get(chunk, run_ind) >> @@ -263,11 +279,8 @@ arena_run_split_remove(arena_t *arena, arena_chunk_t *chunk, size_t run_ind, rem_pages = total_pages - need_pages; arena_avail_remove(arena, chunk, run_ind, total_pages); - if (flag_dirty != 0) { - /* If the run is dirty, it must be in the dirty list. */ - mapelm = arena_mapp_get(chunk, run_ind); - ql_remove(&arena->runs_dirty, mapelm, dr_link); - } + if (flag_dirty != 0) + arena_dirty_remove(arena, chunk, run_ind, total_pages); arena_cactive_update(arena, need_pages, 0); arena->nactive += need_pages; @@ -280,14 +293,8 @@ arena_run_split_remove(arena_t *arena, arena_chunk_t *chunk, size_t run_ind, arena_mapbits_unallocated_set(chunk, run_ind+total_pages-1, (rem_pages << LG_PAGE), flag_dirty); - mapelm = arena_mapp_get(chunk, run_ind+need_pages); - /* - * Append the trailing run at the end of the dirty list. - * We could also insert the run at the original place. - * Let us consider this later. - */ - ql_elm_new(mapelm, dr_link); - ql_tail_insert(&arena->runs_dirty, mapelm, dr_link); + arena_dirty_insert(arena, chunk, run_ind+need_pages, + rem_pages); } else { arena_mapbits_unallocated_set(chunk, run_ind+need_pages, (rem_pages << LG_PAGE), @@ -512,11 +519,6 @@ arena_chunk_init_hard(arena_t *arena) chunk->arena = arena; - /* - * Claim that no pages are in use, since the header is merely overhead. - */ - chunk->ndirty = 0; - /* * Initialize the map to contain one maximal free untouched run. Mark * the pages as zeroed iff chunk_alloc() returned a zeroed chunk. @@ -568,11 +570,6 @@ arena_chunk_alloc(arena_t *arena) /* Insert the run into the runs_avail tree. */ arena_avail_insert(arena, chunk, map_bias, chunk_npages-map_bias); - if (arena_mapbits_dirty_get(chunk, map_bias) != 0) { - arena_chunk_map_t *mapelm = arena_mapp_get(chunk, map_bias); - ql_elm_new(mapelm, dr_link); - ql_tail_insert(&arena->runs_dirty, mapelm, dr_link); - } return (chunk); } @@ -626,15 +623,15 @@ arena_chunk_dalloc(arena_t *arena, arena_chunk_t *chunk) * it. */ arena_avail_remove(arena, chunk, map_bias, chunk_npages-map_bias); - if (arena_mapbits_dirty_get(chunk, map_bias) != 0) { - arena_chunk_map_t *mapelm = arena_mapp_get(chunk, map_bias); - ql_remove(&arena->runs_dirty, mapelm, dr_link); - } if (arena->spare != NULL) { arena_chunk_t *spare = arena->spare; arena->spare = chunk; + if (arena_mapbits_dirty_get(spare, map_bias) != 0) { + arena_dirty_remove(arena, spare, map_bias, + chunk_npages-map_bias); + } arena_chunk_dalloc_internal(arena, spare); } else arena->spare = chunk; @@ -752,18 +749,17 @@ arena_run_alloc_small(arena_t *arena, size_t size, size_t binind) static inline void arena_maybe_purge(arena_t *arena) { - size_t npurgeable, threshold; + size_t threshold; /* Don't purge if the option is disabled. */ if (opt_lg_dirty_mult < 0) return; - npurgeable = arena->ndirty; threshold = (arena->nactive >> opt_lg_dirty_mult); /* * Don't purge unless the number of purgeable pages exceeds the * threshold. */ - if (npurgeable <= threshold) + if (arena->ndirty <= threshold) return; arena_purge(arena, false); @@ -792,50 +788,53 @@ arena_dirty_count(arena_t *arena) } static size_t -arena_compute_npurgatory(arena_t *arena, bool all) +arena_compute_npurge(arena_t *arena, bool all) { - size_t npurgatory, npurgeable; + size_t npurge; /* * Compute the minimum number of pages that this thread should try to * purge. */ - npurgeable = arena->ndirty; - if (all == false) { size_t threshold = (arena->nactive >> opt_lg_dirty_mult); - npurgatory = npurgeable - threshold; + npurge = arena->ndirty - threshold; } else - npurgatory = npurgeable; + npurge = arena->ndirty; - return (npurgatory); + return (npurge); } static size_t -arena_stash_dirty(arena_t *arena, bool all, size_t npurgatory, +arena_stash_dirty(arena_t *arena, bool all, size_t npurge, arena_chunk_mapelms_t *mapelms) { arena_chunk_map_t *mapelm; size_t nstashed = 0; - arena_chunk_t *chunk; - size_t pageind, npages, run_size; - arena_run_t *run; - /* Add at least npurgatory pages to purge_list. */ + /* Add at least npurge pages to purge_list. */ for (mapelm = ql_first(&arena->runs_dirty); mapelm != NULL; mapelm = ql_first(&arena->runs_dirty)) { - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(mapelm); - pageind = arena_mapelm_to_pageind(mapelm); - run_size = arena_mapbits_unallocated_size_get(chunk, pageind); - npages = run_size >> LG_PAGE; - run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)(pageind << - LG_PAGE)); + arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(mapelm); + size_t pageind = arena_mapelm_to_pageind(mapelm); + size_t run_size = arena_mapbits_unallocated_size_get(chunk, + pageind); + size_t npages = run_size >> LG_PAGE; + arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + + (uintptr_t)(pageind << LG_PAGE)); assert(pageind + npages <= chunk_npages); assert(arena_mapbits_dirty_get(chunk, pageind) == arena_mapbits_dirty_get(chunk, pageind+npages-1)); + /* + * If purging the spare chunk's run, make it available prior to + * allocation. + */ + if (chunk == arena->spare) + arena_chunk_alloc(arena); + /* Temporarily allocate the free dirty run. */ arena_run_split_large(arena, run, run_size, false); /* Append to purge_list for later processing. */ @@ -844,7 +843,7 @@ arena_stash_dirty(arena_t *arena, bool all, size_t npurgatory, nstashed += npages; - if (all == false && nstashed >= npurgatory) + if (all == false && nstashed >= npurge) break; } @@ -856,8 +855,6 @@ arena_purge_stashed(arena_t *arena, arena_chunk_mapelms_t *mapelms) { size_t npurged, nmadvise; arena_chunk_map_t *mapelm; - arena_chunk_t *chunk; - size_t pageind, npages, run_size; if (config_stats) nmadvise = 0; @@ -866,8 +863,9 @@ arena_purge_stashed(arena_t *arena, arena_chunk_mapelms_t *mapelms) malloc_mutex_unlock(&arena->lock); ql_foreach(mapelm, mapelms, dr_link) { + arena_chunk_t *chunk; + size_t pageind, run_size, npages, flag_unzeroed, i; bool unzeroed; - size_t flag_unzeroed, i; chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(mapelm); pageind = arena_mapelm_to_pageind(mapelm); @@ -913,17 +911,14 @@ static void arena_unstash_purged(arena_t *arena, arena_chunk_mapelms_t *mapelms) { arena_chunk_map_t *mapelm; - arena_chunk_t *chunk; - arena_run_t *run; - size_t pageind; /* Deallocate runs. */ for (mapelm = ql_first(mapelms); mapelm != NULL; mapelm = ql_first(mapelms)) { - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(mapelm); - pageind = arena_mapelm_to_pageind(mapelm); - run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)(pageind << - LG_PAGE)); + arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(mapelm); + size_t pageind = arena_mapelm_to_pageind(mapelm); + arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + + (uintptr_t)(pageind << LG_PAGE)); ql_remove(mapelms, mapelm, dr_link); arena_run_dalloc(arena, run, false, true); } @@ -932,7 +927,7 @@ arena_unstash_purged(arena_t *arena, arena_chunk_mapelms_t *mapelms) void arena_purge(arena_t *arena, bool all) { - size_t npurgatory, npurgeable, npurged; + size_t npurge, npurgeable, npurged; arena_chunk_mapelms_t purge_list; if (config_debug) { @@ -944,21 +939,12 @@ arena_purge(arena_t *arena, bool all) if (config_stats) arena->stats.npurge++; - /* - * Add the minimum number of pages this thread should try to purge to - * arena->npurgatory. This will keep multiple threads from racing to - * reduce ndirty below the threshold. - */ - npurgatory = arena_compute_npurgatory(arena, all); - + npurge = arena_compute_npurge(arena, all); ql_new(&purge_list); - - npurgeable = arena_stash_dirty(arena, all, npurgatory, &purge_list); - assert(npurgeable >= npurgatory); - + npurgeable = arena_stash_dirty(arena, all, npurge, &purge_list); + assert(npurgeable >= npurge); npurged = arena_purge_stashed(arena, &purge_list); assert(npurged == npurgeable); - arena_unstash_purged(arena, &purge_list); } @@ -999,9 +985,8 @@ arena_run_coalesce(arena_t *arena, arena_chunk_t *chunk, size_t *p_size, /* If the successor is dirty, remove it from runs_dirty. */ if (flag_dirty != 0) { - arena_chunk_map_t *mapelm = arena_mapp_get(chunk, - run_ind+run_pages); - ql_remove(&arena->runs_dirty, mapelm, dr_link); + arena_dirty_remove(arena, chunk, run_ind+run_pages, + nrun_pages); } size += nrun_size; @@ -1032,11 +1017,8 @@ arena_run_coalesce(arena_t *arena, arena_chunk_t *chunk, size_t *p_size, arena_avail_remove(arena, chunk, run_ind, prun_pages); /* If the predecessor is dirty, remove it from runs_dirty. */ - if (flag_dirty != 0) { - arena_chunk_map_t *mapelm = arena_mapp_get(chunk, - run_ind); - ql_remove(&arena->runs_dirty, mapelm, dr_link); - } + if (flag_dirty != 0) + arena_dirty_remove(arena, chunk, run_ind, prun_pages); size += prun_size; run_pages += prun_pages; @@ -1055,7 +1037,6 @@ static void arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned) { arena_chunk_t *chunk; - arena_chunk_map_t *mapelm; size_t size, run_ind, run_pages, flag_dirty; chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); @@ -1110,12 +1091,8 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned) arena_mapbits_dirty_get(chunk, run_ind+run_pages-1)); arena_avail_insert(arena, chunk, run_ind, run_pages); - if (dirty) { - /* Insert into runs_dirty list. */ - mapelm = arena_mapp_get(chunk, run_ind); - ql_elm_new(mapelm, dr_link); - ql_tail_insert(&arena->runs_dirty, mapelm, dr_link); - } + if (dirty) + arena_dirty_insert(arena, chunk, run_ind, run_pages); /* Deallocate chunk if it is now completely unused. */ if (size == arena_maxclass) { @@ -2237,14 +2214,13 @@ arena_new(arena_t *arena, unsigned ind) arena->dss_prec = chunk_dss_prec_get(); - /* Initialize chunks. */ - ql_new(&arena->runs_dirty); arena->spare = NULL; arena->nactive = 0; arena->ndirty = 0; arena_avail_tree_new(&arena->runs_avail); + ql_new(&arena->runs_dirty); /* Initialize bins. */ for (i = 0; i < NBINS; i++) { From 586c8ede42d7d0545d36d9cbb0235fb39221ef3e Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 15 Aug 2014 12:20:20 -0700 Subject: [PATCH 0493/3378] Fix arena..dss mallctl to handle read-only calls. --- src/ctl.c | 52 +++++++++++++++++++++++++-------------------- test/unit/mallctl.c | 13 ++++++++++++ 2 files changed, 42 insertions(+), 23 deletions(-) diff --git a/src/ctl.c b/src/ctl.c index a193605d..fa52a6cc 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -1327,45 +1327,51 @@ static int arena_i_dss_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { - int ret, i; - bool match, err; - const char *dss; + int ret; + const char *dss = NULL; unsigned arena_ind = mib[1]; dss_prec_t dss_prec_old = dss_prec_limit; dss_prec_t dss_prec = dss_prec_limit; malloc_mutex_lock(&ctl_mtx); WRITE(dss, const char *); - match = false; - for (i = 0; i < dss_prec_limit; i++) { - if (strcmp(dss_prec_names[i], dss) == 0) { - dss_prec = i; - match = true; - break; + if (dss != NULL) { + int i; + bool match = false; + + for (i = 0; i < dss_prec_limit; i++) { + if (strcmp(dss_prec_names[i], dss) == 0) { + dss_prec = i; + match = true; + break; + } + } + + if (match == false) { + ret = EINVAL; + goto label_return; } - } - if (match == false) { - ret = EINVAL; - goto label_return; } if (arena_ind < ctl_stats.narenas) { arena_t *arena = arenas[arena_ind]; - if (arena != NULL) { - dss_prec_old = arena_dss_prec_get(arena); - err = arena_dss_prec_set(arena, dss_prec); - } else - err = true; + if (arena == NULL || (dss_prec != dss_prec_limit && + arena_dss_prec_set(arena, dss_prec))) { + ret = EFAULT; + goto label_return; + } + dss_prec_old = arena_dss_prec_get(arena); } else { + if (dss_prec != dss_prec_limit && + chunk_dss_prec_set(dss_prec)) { + ret = EFAULT; + goto label_return; + } dss_prec_old = chunk_dss_prec_get(); - err = chunk_dss_prec_set(dss_prec); } + dss = dss_prec_names[dss_prec_old]; READ(dss, const char *); - if (err) { - ret = EFAULT; - goto label_return; - } ret = 0; label_return: diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c index 7a8b55f5..c70473cc 100644 --- a/test/unit/mallctl.c +++ b/test/unit/mallctl.c @@ -268,12 +268,25 @@ TEST_BEGIN(test_arena_i_dss) assert_d_eq(mallctlbymib(mib, miblen, &dss_prec_new, &sz, &dss_prec_old, sizeof(dss_prec_old)), 0, "Unexpected mallctl() failure"); + assert_d_eq(mallctlbymib(mib, miblen, &dss_prec_old, &sz, NULL, 0), 0, + "Unexpected mallctl() failure"); + assert_str_ne(dss_prec_old, "primary", + "Unexpected value for dss precedence"); + mib[1] = narenas_total_get(); dss_prec_new = "disabled"; assert_d_eq(mallctlbymib(mib, miblen, &dss_prec_old, &sz, &dss_prec_new, sizeof(dss_prec_new)), 0, "Unexpected mallctl() failure"); assert_str_ne(dss_prec_old, "primary", "Unexpected default for dss precedence"); + + assert_d_eq(mallctlbymib(mib, miblen, &dss_prec_new, &sz, &dss_prec_old, + sizeof(dss_prec_new)), 0, "Unexpected mallctl() failure"); + + assert_d_eq(mallctlbymib(mib, miblen, &dss_prec_old, &sz, NULL, 0), 0, + "Unexpected mallctl() failure"); + assert_str_ne(dss_prec_old, "primary", + "Unexpected value for dss precedence"); } TEST_END From b41ccdb125b312d4522da1a80091a0137773c964 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 15 Aug 2014 15:01:15 -0700 Subject: [PATCH 0494/3378] Convert prof_tdata_t's bt2cnt to a comprehensive map. Treat prof_tdata_t's bt2cnt as a comprehensive map of the thread's extant allocation samples (do not limit the total number of entries). This helps prepare the way for per thread heap profiling. --- include/jemalloc/internal/prof.h | 24 ++++-------- src/prof.c | 67 ++++++++------------------------ 2 files changed, 25 insertions(+), 66 deletions(-) diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index d82fbc4f..96db4c3e 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -23,9 +23,6 @@ typedef struct prof_tdata_s prof_tdata_t; */ #define PROF_BT_MAX 128 -/* Maximum number of backtraces to store in each per thread LRU cache. */ -#define PROF_TCMAX 1024 - /* Initial hash table size. */ #define PROF_CKH_MINITEMS 64 @@ -87,9 +84,6 @@ struct prof_thr_cnt_s { /* Linkage into prof_ctx_t's cnts_ql. */ ql_elm(prof_thr_cnt_t) cnts_link; - /* Linkage into thread's LRU. */ - ql_elm(prof_thr_cnt_t) lru_link; - /* * Associated context. If a thread frees an object that it did not * allocate, it is possible that the context is not cached in the @@ -157,10 +151,11 @@ typedef ql_head(prof_ctx_t) prof_ctx_list_t; struct prof_tdata_s { /* - * Hash of (prof_bt_t *)-->(prof_thr_cnt_t *). Each thread keeps a - * cache of backtraces, with associated thread-specific prof_thr_cnt_t - * objects. Other threads may read the prof_thr_cnt_t contents, but no - * others will ever write them. + * Hash of (prof_bt_t *)-->(prof_thr_cnt_t *). Each thread tracks + * backtraces for which it has non-zero allocation/deallocation counters + * associated with thread-specific prof_thr_cnt_t objects. Other + * threads may read the prof_thr_cnt_t contents, but no others will ever + * write them. * * Upon thread exit, the thread must merge all the prof_thr_cnt_t * counter data into the associated prof_ctx_t objects, and unlink/free @@ -168,12 +163,6 @@ struct prof_tdata_s { */ ckh_t bt2cnt; - /* LRU for contents of bt2cnt. */ - ql_head(prof_thr_cnt_t) lru_ql; - - /* Backtrace vector, used for calls to prof_backtrace(). */ - void **vec; - /* Sampling state. */ uint64_t prng_state; uint64_t bytes_until_sample; @@ -182,6 +171,9 @@ struct prof_tdata_s { bool enq; bool enq_idump; bool enq_gdump; + + /* Backtrace vector, used for calls to prof_backtrace(). */ + void *vec[PROF_BT_MAX]; }; #endif /* JEMALLOC_H_STRUCTS */ diff --git a/src/prof.c b/src/prof.c index 0eb7dbdb..4f95fdb9 100644 --- a/src/prof.c +++ b/src/prof.c @@ -567,33 +567,13 @@ prof_lookup(prof_bt_t *bt) return (NULL); /* Link a prof_thd_cnt_t into ctx for this thread. */ - if (ckh_count(&prof_tdata->bt2cnt) == PROF_TCMAX) { - assert(ckh_count(&prof_tdata->bt2cnt) > 0); - /* - * Flush the least recently used cnt in order to keep - * bt2cnt from becoming too large. - */ - ret.p = ql_last(&prof_tdata->lru_ql, lru_link); - assert(ret.v != NULL); - if (ckh_remove(&prof_tdata->bt2cnt, ret.p->ctx->bt, - NULL, NULL)) - not_reached(); - ql_remove(&prof_tdata->lru_ql, ret.p, lru_link); - prof_ctx_merge(ret.p->ctx, ret.p); - /* ret can now be re-used. */ - } else { - assert(ckh_count(&prof_tdata->bt2cnt) < PROF_TCMAX); - /* Allocate and partially initialize a new cnt. */ - ret.v = imalloc(sizeof(prof_thr_cnt_t)); - if (ret.p == NULL) { - if (new_ctx) - prof_ctx_destroy(ctx); - return (NULL); - } - ql_elm_new(ret.p, cnts_link); - ql_elm_new(ret.p, lru_link); + ret.v = imalloc(sizeof(prof_thr_cnt_t)); + if (ret.p == NULL) { + if (new_ctx) + prof_ctx_destroy(ctx); + return (NULL); } - /* Finish initializing ret. */ + ql_elm_new(ret.p, cnts_link); ret.p->ctx = ctx; ret.p->epoch = 0; memset(&ret.p->cnts, 0, sizeof(prof_cnt_t)); @@ -603,15 +583,10 @@ prof_lookup(prof_bt_t *bt) idalloc(ret.v); return (NULL); } - ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link); malloc_mutex_lock(ctx->lock); ql_tail_insert(&ctx->cnts_ql, ret.p, cnts_link); ctx->nlimbo--; malloc_mutex_unlock(ctx->lock); - } else { - /* Move ret to the front of the LRU. */ - ql_remove(&prof_tdata->lru_ql, ret.p, lru_link); - ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link); } return (ret.p); @@ -1247,14 +1222,6 @@ prof_tdata_init(void) idalloc(prof_tdata); return (NULL); } - ql_new(&prof_tdata->lru_ql); - - prof_tdata->vec = imalloc(sizeof(void *) * PROF_BT_MAX); - if (prof_tdata->vec == NULL) { - ckh_delete(&prof_tdata->bt2cnt); - idalloc(prof_tdata); - return (NULL); - } prof_tdata->prng_state = (uint64_t)(uintptr_t)prof_tdata; prof_sample_threshold_update(prof_tdata); @@ -1271,7 +1238,6 @@ prof_tdata_init(void) void prof_tdata_cleanup(void *arg) { - prof_thr_cnt_t *cnt; prof_tdata_t *prof_tdata = *(prof_tdata_t **)arg; cassert(config_prof); @@ -1292,21 +1258,22 @@ prof_tdata_cleanup(void *arg) * nothing, so that the destructor will not be called again. */ } else if (prof_tdata != NULL) { - /* - * Delete the hash table. All of its contents can still be - * iterated over via the LRU. - */ - ckh_delete(&prof_tdata->bt2cnt); + union { + prof_thr_cnt_t *p; + void *v; + } cnt; + size_t tabind; + /* * Iteratively merge cnt's into the global stats and delete * them. */ - while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) { - ql_remove(&prof_tdata->lru_ql, cnt, lru_link); - prof_ctx_merge(cnt->ctx, cnt); - idalloc(cnt); + for (tabind = 0; ckh_iter(&prof_tdata->bt2cnt, &tabind, NULL, + &cnt.v);) { + prof_ctx_merge(cnt.p->ctx, cnt.p); + idalloc(cnt.v); } - idalloc(prof_tdata->vec); + ckh_delete(&prof_tdata->bt2cnt); idalloc(prof_tdata); prof_tdata = PROF_TDATA_STATE_PURGATORY; prof_tdata_tsd_set(&prof_tdata); From ab532e97991d190e9368781cf308c60c2319b933 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 15 Aug 2014 15:05:12 -0700 Subject: [PATCH 0495/3378] Directly embed prof_ctx_t's bt. --- include/jemalloc/internal/prof.h | 13 +++--- src/prof.c | 69 +++++++++----------------------- 2 files changed, 26 insertions(+), 56 deletions(-) diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index 96db4c3e..9be908d6 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -115,9 +115,6 @@ struct prof_thr_cnt_s { }; struct prof_ctx_s { - /* Associated backtrace. */ - prof_bt_t *bt; - /* Protects nlimbo, cnt_merged, and cnts_ql. */ malloc_mutex_t *lock; @@ -146,6 +143,12 @@ struct prof_ctx_s { /* Linkage for list of contexts to be dumped. */ ql_elm(prof_ctx_t) dump_link; + + /* Associated backtrace. */ + prof_bt_t bt; + + /* Backtrace vector, variable size, referred to by bt. */ + void *vec[1]; }; typedef ql_head(prof_ctx_t) prof_ctx_list_t; @@ -425,7 +428,7 @@ prof_realloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt, } if ((uintptr_t)old_ctx > (uintptr_t)1U) { - told_cnt = prof_lookup(old_ctx->bt); + told_cnt = prof_lookup(&old_ctx->bt); if (told_cnt == NULL) { /* * It's too late to propagate OOM for this realloc(), @@ -483,7 +486,7 @@ prof_free(const void *ptr, size_t size) if ((uintptr_t)ctx > (uintptr_t)1) { prof_thr_cnt_t *tcnt; assert(size == isalloc(ptr, true)); - tcnt = prof_lookup(ctx->bt); + tcnt = prof_lookup(&ctx->bt); if (tcnt != NULL) { tcnt->epoch++; diff --git a/src/prof.c b/src/prof.c index 4f95fdb9..1b396afe 100644 --- a/src/prof.c +++ b/src/prof.c @@ -87,41 +87,6 @@ bt_init(prof_bt_t *bt, void **vec) bt->len = 0; } -static void -bt_destroy(prof_bt_t *bt) -{ - - cassert(config_prof); - - idalloc(bt); -} - -static prof_bt_t * -bt_dup(prof_bt_t *bt) -{ - prof_bt_t *ret; - - cassert(config_prof); - - /* - * Create a single allocation that has space for vec immediately - * following the prof_bt_t structure. The backtraces that get - * stored in the backtrace caches are copied from stack-allocated - * temporary variables, so size is known at creation time. Making this - * a contiguous object improves cache locality. - */ - ret = (prof_bt_t *)imalloc(QUANTUM_CEILING(sizeof(prof_bt_t)) + - (bt->len * sizeof(void *))); - if (ret == NULL) - return (NULL); - ret->vec = (void **)((uintptr_t)ret + - QUANTUM_CEILING(sizeof(prof_bt_t))); - memcpy(ret->vec, bt->vec, bt->len * sizeof(void *)); - ret->len = bt->len; - - return (ret); -} - static inline void prof_enter(prof_tdata_t *prof_tdata) { @@ -388,11 +353,16 @@ prof_ctx_mutex_choose(void) return (&ctx_locks[(nctxs - 1) % PROF_NCTX_LOCKS]); } -static void -prof_ctx_init(prof_ctx_t *ctx, prof_bt_t *bt) +static prof_ctx_t * +prof_ctx_create(prof_bt_t *bt) { - - ctx->bt = bt; + /* + * Create a single allocation that has space for vec of length bt->len. + */ + prof_ctx_t *ctx = (prof_ctx_t *)imalloc(offsetof(prof_ctx_t, vec) + + (bt->len * sizeof(void *))); + if (ctx == NULL) + return (NULL); ctx->lock = prof_ctx_mutex_choose(); /* * Set nlimbo to 1, in order to avoid a race condition with @@ -402,6 +372,11 @@ prof_ctx_init(prof_ctx_t *ctx, prof_bt_t *bt) ql_elm_new(ctx, dump_link); memset(&ctx->cnt_merged, 0, sizeof(prof_cnt_t)); ql_new(&ctx->cnts_ql); + /* Duplicate bt. */ + memcpy(ctx->vec, bt->vec, bt->len * sizeof(void *)); + ctx->bt.vec = ctx->vec; + ctx->bt.len = bt->len; + return (ctx); } static void @@ -428,12 +403,11 @@ prof_ctx_destroy(prof_ctx_t *ctx) assert(ctx->cnt_merged.accumobjs == 0); assert(ctx->cnt_merged.accumbytes == 0); /* Remove ctx from bt2ctx. */ - if (ckh_remove(&bt2ctx, ctx->bt, NULL, NULL)) + if (ckh_remove(&bt2ctx, &ctx->bt, NULL, NULL)) not_reached(); prof_leave(prof_tdata); /* Destroy ctx. */ malloc_mutex_unlock(ctx->lock); - bt_destroy(ctx->bt); idalloc(ctx); } else { /* @@ -501,22 +475,15 @@ prof_lookup_global(prof_bt_t *bt, prof_tdata_t *prof_tdata, void **p_btkey, prof_enter(prof_tdata); if (ckh_search(&bt2ctx, bt, &btkey.v, &ctx.v)) { /* bt has never been seen before. Insert it. */ - ctx.v = imalloc(sizeof(prof_ctx_t)); + ctx.p = prof_ctx_create(bt); if (ctx.v == NULL) { prof_leave(prof_tdata); return (true); } - btkey.p = bt_dup(bt); - if (btkey.v == NULL) { - prof_leave(prof_tdata); - idalloc(ctx.v); - return (true); - } - prof_ctx_init(ctx.p, btkey.p); + btkey.p = &ctx.p->bt; if (ckh_insert(&bt2ctx, btkey.v, ctx.v)) { /* OOM. */ prof_leave(prof_tdata); - idalloc(btkey.v); idalloc(ctx.v); return (true); } @@ -1039,7 +1006,7 @@ prof_dump(bool propagate_err, const char *filename, bool leakcheck) /* Dump per ctx profile stats. */ while ((ctx.p = ql_first(&ctx_ql)) != NULL) { - if (prof_dump_ctx(propagate_err, ctx.p, ctx.p->bt, &ctx_ql)) + if (prof_dump_ctx(propagate_err, ctx.p, &ctx.p->bt, &ctx_ql)) goto label_write_error; } From 3a81cbd2d4f2d8c052f11f4b0b73ee5c84a33d4f Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 16 Aug 2014 12:58:55 -0700 Subject: [PATCH 0496/3378] Dump heap profile backtraces in a stable order. Also iterate over per thread stats in a stable order, which prepares the way for stable ordering of per thread heap profile dumps. --- include/jemalloc/internal/prof.h | 24 +++-- src/prof.c | 157 +++++++++++++++++++++---------- 2 files changed, 119 insertions(+), 62 deletions(-) diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index 9be908d6..9398ad91 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -1,6 +1,7 @@ /******************************************************************************/ #ifdef JEMALLOC_H_TYPES +typedef uint64_t prof_thr_uid_t; typedef struct prof_bt_s prof_bt_t; typedef struct prof_cnt_s prof_cnt_t; typedef struct prof_thr_cnt_s prof_thr_cnt_t; @@ -81,15 +82,17 @@ struct prof_cnt_s { }; struct prof_thr_cnt_s { - /* Linkage into prof_ctx_t's cnts_ql. */ - ql_elm(prof_thr_cnt_t) cnts_link; + prof_thr_uid_t thr_uid; + + /* Linkage into prof_ctx_t's thr_cnts. */ + rb_node(prof_thr_cnt_t) thr_cnt_link; /* * Associated context. If a thread frees an object that it did not - * allocate, it is possible that the context is not cached in the + * allocate, it is possible that the context is not present in the * thread's hash table, in which case it must be able to look up the * context, insert a new prof_thr_cnt_t into the thread's hash table, - * and link it into the prof_ctx_t's cnts_ql. + * and link it into the prof_ctx_t's thr_cnts. */ prof_ctx_t *ctx; @@ -113,9 +116,10 @@ struct prof_thr_cnt_s { /* Profiling counters. */ prof_cnt_t cnts; }; +typedef rb_tree(prof_thr_cnt_t) prof_thr_cnt_tree_t; struct prof_ctx_s { - /* Protects nlimbo, cnt_merged, and cnts_ql. */ + /* Protects nlimbo, cnt_merged, and thr_cnts. */ malloc_mutex_t *lock; /* @@ -136,13 +140,13 @@ struct prof_ctx_s { prof_cnt_t cnt_merged; /* - * List of profile counters, one for each thread that has allocated in + * Tree of profile counters, one for each thread that has allocated in * this context. */ - ql_head(prof_thr_cnt_t) cnts_ql; + prof_thr_cnt_tree_t thr_cnts; - /* Linkage for list of contexts to be dumped. */ - ql_elm(prof_ctx_t) dump_link; + /* Linkage for tree of contexts to be dumped. */ + rb_node(prof_ctx_t) dump_link; /* Associated backtrace. */ prof_bt_t bt; @@ -150,7 +154,7 @@ struct prof_ctx_s { /* Backtrace vector, variable size, referred to by bt. */ void *vec[1]; }; -typedef ql_head(prof_ctx_t) prof_ctx_list_t; +typedef rb_tree(prof_ctx_t) prof_ctx_tree_t; struct prof_tdata_s { /* diff --git a/src/prof.c b/src/prof.c index 1b396afe..497ccf42 100644 --- a/src/prof.c +++ b/src/prof.c @@ -77,6 +77,33 @@ static bool prof_booted = false; /******************************************************************************/ +JEMALLOC_INLINE_C int +prof_thr_cnt_comp(const prof_thr_cnt_t *a, const prof_thr_cnt_t *b) +{ + prof_thr_uid_t a_uid = a->thr_uid; + prof_thr_uid_t b_uid = b->thr_uid; + + return ((a_uid > b_uid) - (a_uid < b_uid)); +} + +rb_gen(static UNUSED, thr_cnt_tree_, prof_thr_cnt_tree_t, prof_thr_cnt_t, + thr_cnt_link, prof_thr_cnt_comp) + +JEMALLOC_INLINE_C int +prof_ctx_comp(const prof_ctx_t *a, const prof_ctx_t *b) +{ + unsigned a_len = a->bt.len; + unsigned b_len = b->bt.len; + unsigned comp_len = (a_len < b_len) ? a_len : b_len; + int ret = memcmp(a->bt.vec, b->bt.vec, comp_len * sizeof(void *)); + if (ret == 0) + ret = (a_len > b_len) - (a_len < b_len); + return (ret); +} + +rb_gen(static UNUSED, ctx_tree_, prof_ctx_tree_t, prof_ctx_t, dump_link, + prof_ctx_comp) + void bt_init(prof_bt_t *bt, void **vec) { @@ -369,9 +396,8 @@ prof_ctx_create(prof_bt_t *bt) * prof_ctx_merge()/prof_ctx_destroy(). */ ctx->nlimbo = 1; - ql_elm_new(ctx, dump_link); memset(&ctx->cnt_merged, 0, sizeof(prof_cnt_t)); - ql_new(&ctx->cnts_ql); + thr_cnt_tree_new(&ctx->thr_cnts); /* Duplicate bt. */ memcpy(ctx->vec, bt->vec, bt->len * sizeof(void *)); ctx->bt.vec = ctx->vec; @@ -397,8 +423,8 @@ prof_ctx_destroy(prof_ctx_t *ctx) assert((uintptr_t)prof_tdata > (uintptr_t)PROF_TDATA_STATE_MAX); prof_enter(prof_tdata); malloc_mutex_lock(ctx->lock); - if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 0 && - ctx->nlimbo == 1) { + if (thr_cnt_tree_first(&ctx->thr_cnts) == NULL && + ctx->cnt_merged.curobjs == 0 && ctx->nlimbo == 1) { assert(ctx->cnt_merged.curbytes == 0); assert(ctx->cnt_merged.accumobjs == 0); assert(ctx->cnt_merged.accumbytes == 0); @@ -433,9 +459,9 @@ prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt) ctx->cnt_merged.curbytes += cnt->cnts.curbytes; ctx->cnt_merged.accumobjs += cnt->cnts.accumobjs; ctx->cnt_merged.accumbytes += cnt->cnts.accumbytes; - ql_remove(&ctx->cnts_ql, cnt, cnts_link); - if (opt_prof_accum == false && ql_first(&ctx->cnts_ql) == NULL && - ctx->cnt_merged.curobjs == 0 && ctx->nlimbo == 0) { + thr_cnt_tree_remove(&ctx->thr_cnts, cnt); + if (opt_prof_accum == false && thr_cnt_tree_first(&ctx->thr_cnts) == + NULL && ctx->cnt_merged.curobjs == 0 && ctx->nlimbo == 0) { /* * Increment ctx->nlimbo in order to keep another thread from * winning the race to destroy ctx while this one has ctx->lock @@ -540,7 +566,6 @@ prof_lookup(prof_bt_t *bt) prof_ctx_destroy(ctx); return (NULL); } - ql_elm_new(ret.p, cnts_link); ret.p->ctx = ctx; ret.p->epoch = 0; memset(&ret.p->cnts, 0, sizeof(prof_cnt_t)); @@ -551,7 +576,7 @@ prof_lookup(prof_bt_t *bt) return (NULL); } malloc_mutex_lock(ctx->lock); - ql_tail_insert(&ctx->cnts_ql, ret.p, cnts_link); + thr_cnt_tree_insert(&ctx->thr_cnts, ret.p); ctx->nlimbo--; malloc_mutex_unlock(ctx->lock); } @@ -745,12 +770,41 @@ prof_dump_printf(bool propagate_err, const char *format, ...) return (ret); } +static prof_thr_cnt_t * +ctx_sum_iter(prof_thr_cnt_tree_t *thr_cnts, prof_thr_cnt_t *thr_cnt, void *arg) +{ + prof_ctx_t *ctx = (prof_ctx_t *)arg; + volatile unsigned *epoch = &thr_cnt->epoch; + prof_cnt_t tcnt; + + while (true) { + unsigned epoch0 = *epoch; + + /* Make sure epoch is even. */ + if (epoch0 & 1U) + continue; + + memcpy(&tcnt, &thr_cnt->cnts, sizeof(prof_cnt_t)); + + /* Terminate if epoch didn't change while reading. */ + if (*epoch == epoch0) + break; + } + + ctx->cnt_summed.curobjs += tcnt.curobjs; + ctx->cnt_summed.curbytes += tcnt.curbytes; + if (opt_prof_accum) { + ctx->cnt_summed.accumobjs += tcnt.accumobjs; + ctx->cnt_summed.accumbytes += tcnt.accumbytes; + } + + return (NULL); +} + static void prof_dump_ctx_prep(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx, - prof_ctx_list_t *ctx_ql) + prof_ctx_tree_t *ctxs) { - prof_thr_cnt_t *thr_cnt; - prof_cnt_t tcnt; cassert(config_prof); @@ -762,33 +816,10 @@ prof_dump_ctx_prep(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx, * prof_dump()'s second pass. */ ctx->nlimbo++; - ql_tail_insert(ctx_ql, ctx, dump_link); + ctx_tree_insert(ctxs, ctx); memcpy(&ctx->cnt_summed, &ctx->cnt_merged, sizeof(prof_cnt_t)); - ql_foreach(thr_cnt, &ctx->cnts_ql, cnts_link) { - volatile unsigned *epoch = &thr_cnt->epoch; - - while (true) { - unsigned epoch0 = *epoch; - - /* Make sure epoch is even. */ - if (epoch0 & 1U) - continue; - - memcpy(&tcnt, &thr_cnt->cnts, sizeof(prof_cnt_t)); - - /* Terminate if epoch didn't change while reading. */ - if (*epoch == epoch0) - break; - } - - ctx->cnt_summed.curobjs += tcnt.curobjs; - ctx->cnt_summed.curbytes += tcnt.curbytes; - if (opt_prof_accum) { - ctx->cnt_summed.accumobjs += tcnt.accumobjs; - ctx->cnt_summed.accumbytes += tcnt.accumbytes; - } - } + thr_cnt_tree_iter(&ctx->thr_cnts, NULL, ctx_sum_iter, (void *)ctx); if (ctx->cnt_summed.curobjs != 0) (*leak_nctx)++; @@ -829,25 +860,24 @@ prof_dump_header(bool propagate_err, const prof_cnt_t *cnt_all) } static void -prof_dump_ctx_cleanup_locked(prof_ctx_t *ctx, prof_ctx_list_t *ctx_ql) +prof_dump_ctx_cleanup_locked(prof_ctx_t *ctx, prof_ctx_tree_t *ctxs) { ctx->nlimbo--; - ql_remove(ctx_ql, ctx, dump_link); } static void -prof_dump_ctx_cleanup(prof_ctx_t *ctx, prof_ctx_list_t *ctx_ql) +prof_dump_ctx_cleanup(prof_ctx_t *ctx, prof_ctx_tree_t *ctxs) { malloc_mutex_lock(ctx->lock); - prof_dump_ctx_cleanup_locked(ctx, ctx_ql); + prof_dump_ctx_cleanup_locked(ctx, ctxs); malloc_mutex_unlock(ctx->lock); } static bool prof_dump_ctx(bool propagate_err, prof_ctx_t *ctx, const prof_bt_t *bt, - prof_ctx_list_t *ctx_ql) + prof_ctx_tree_t *ctxs) { bool ret; unsigned i; @@ -895,7 +925,7 @@ prof_dump_ctx(bool propagate_err, prof_ctx_t *ctx, const prof_bt_t *bt, ret = false; label_return: - prof_dump_ctx_cleanup_locked(ctx, ctx_ql); + prof_dump_ctx_cleanup_locked(ctx, ctxs); malloc_mutex_unlock(ctx->lock); return (ret); } @@ -966,6 +996,26 @@ prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_nctx, } } +static prof_ctx_t * +prof_ctx_dump_iter(prof_ctx_tree_t *ctxs, prof_ctx_t *ctx, void *arg) +{ + bool propagate_err = *(bool *)arg; + + if (prof_dump_ctx(propagate_err, ctx, &ctx->bt, ctxs)) + return (ctx_tree_next(ctxs, ctx)); + + return (NULL); +} + +static prof_ctx_t * +prof_ctx_cleanup_iter(prof_ctx_tree_t *ctxs, prof_ctx_t *ctx, void *arg) +{ + + prof_dump_ctx_cleanup(ctx, ctxs); + + return (NULL); +} + static bool prof_dump(bool propagate_err, const char *filename, bool leakcheck) { @@ -977,7 +1027,8 @@ prof_dump(bool propagate_err, const char *filename, bool leakcheck) void *v; } ctx; size_t leak_nctx; - prof_ctx_list_t ctx_ql; + prof_ctx_tree_t ctxs; + prof_ctx_t *cleanup_start = NULL; cassert(config_prof); @@ -990,10 +1041,10 @@ prof_dump(bool propagate_err, const char *filename, bool leakcheck) /* Merge per thread profile stats, and sum them in cnt_all. */ memset(&cnt_all, 0, sizeof(prof_cnt_t)); leak_nctx = 0; - ql_new(&ctx_ql); + ctx_tree_new(&ctxs); prof_enter(prof_tdata); for (tabind = 0; ckh_iter(&bt2ctx, &tabind, NULL, &ctx.v) == false;) - prof_dump_ctx_prep(ctx.p, &cnt_all, &leak_nctx, &ctx_ql); + prof_dump_ctx_prep(ctx.p, &cnt_all, &leak_nctx, &ctxs); prof_leave(prof_tdata); /* Create dump file. */ @@ -1005,10 +1056,10 @@ prof_dump(bool propagate_err, const char *filename, bool leakcheck) goto label_write_error; /* Dump per ctx profile stats. */ - while ((ctx.p = ql_first(&ctx_ql)) != NULL) { - if (prof_dump_ctx(propagate_err, ctx.p, &ctx.p->bt, &ctx_ql)) - goto label_write_error; - } + cleanup_start = ctx_tree_iter(&ctxs, NULL, prof_ctx_dump_iter, + (void *)&propagate_err); + if (cleanup_start != NULL) + goto label_write_error; /* Dump /proc//maps if possible. */ if (prof_dump_maps(propagate_err)) @@ -1026,8 +1077,10 @@ prof_dump(bool propagate_err, const char *filename, bool leakcheck) label_write_error: prof_dump_close(propagate_err); label_open_close_error: - while ((ctx.p = ql_first(&ctx_ql)) != NULL) - prof_dump_ctx_cleanup(ctx.p, &ctx_ql); + if (cleanup_start != NULL) { + ctx_tree_iter(&ctxs, cleanup_start, prof_ctx_cleanup_iter, + NULL); + } malloc_mutex_unlock(&prof_dump_mtx); return (true); } From 1628e8615ed6c82ded14d6013ac775274eb426e6 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 19 Aug 2014 01:28:49 -0700 Subject: [PATCH 0497/3378] Add rb_empty(). --- include/jemalloc/internal/rb.h | 13 +++++++++++++ test/unit/rb.c | 3 +++ 2 files changed, 16 insertions(+) diff --git a/include/jemalloc/internal/rb.h b/include/jemalloc/internal/rb.h index 423802eb..ffe3bb0d 100644 --- a/include/jemalloc/internal/rb.h +++ b/include/jemalloc/internal/rb.h @@ -158,6 +158,8 @@ struct { \ #define rb_proto(a_attr, a_prefix, a_rbt_type, a_type) \ a_attr void \ a_prefix##new(a_rbt_type *rbtree); \ +a_attr bool \ +a_prefix##empty(a_rbt_type *rbtree); \ a_attr a_type * \ a_prefix##first(a_rbt_type *rbtree); \ a_attr a_type * \ @@ -224,6 +226,13 @@ a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \ * Args: * tree: Pointer to an uninitialized red-black tree object. * + * static bool + * ex_empty(ex_t *tree); + * Description: Determine whether tree is empty. + * Args: + * tree: Pointer to an initialized red-black tree object. + * Ret: True if tree is empty, false otherwise. + * * static ex_node_t * * ex_first(ex_t *tree); * static ex_node_t * @@ -309,6 +318,10 @@ a_attr void \ a_prefix##new(a_rbt_type *rbtree) { \ rb_new(a_type, a_field, rbtree); \ } \ +a_attr bool \ +a_prefix##empty(a_rbt_type *rbtree) { \ + return (rbtree->rbt_root == &rbtree->rbt_nil); \ +} \ a_attr a_type * \ a_prefix##first(a_rbt_type *rbtree) { \ a_type *ret; \ diff --git a/test/unit/rb.c b/test/unit/rb.c index b737485a..e43907f1 100644 --- a/test/unit/rb.c +++ b/test/unit/rb.c @@ -49,6 +49,7 @@ TEST_BEGIN(test_rb_empty) tree_new(&tree); + assert_true(tree_empty(&tree), "Tree should be empty"); assert_ptr_null(tree_first(&tree), "Unexpected node"); assert_ptr_null(tree_last(&tree), "Unexpected node"); @@ -265,6 +266,8 @@ TEST_BEGIN(test_rb_random) assert_u_eq(tree_iterate_reverse(&tree), k+1, "Unexpected node iteration count"); + assert_false(tree_empty(&tree), + "Tree should not be empty"); assert_ptr_not_null(tree_first(&tree), "Tree should not be empty"); assert_ptr_not_null(tree_last(&tree), From 602c8e0971160e4b85b08b16cf8a2375aa24bc04 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 18 Aug 2014 16:22:13 -0700 Subject: [PATCH 0498/3378] Implement per thread heap profiling. Rename data structures (prof_thr_cnt_t-->prof_tctx_t, prof_ctx_t-->prof_gctx_t), and convert to storing a prof_tctx_t for sampled objects. Convert PROF_ALLOC_PREP() to prof_alloc_prep(), since precise backtrace depth within jemalloc functions is no longer an issue (pprof prunes irrelevant frames). Implement mallctl's: - prof.reset implements full sample data reset, and optional change of sample interval. - prof.lg_sample reads the current sample interval (opt.lg_prof_sample was the permanent source of truth prior to prof.reset). - thread.prof.name provides naming capability for threads within heap profile dumps. - thread.prof.active makes it possible to activate/deactivate heap profiling for individual threads. Modify the heap dump files to contain per thread heap profile data. This change is incompatible with the existing pprof, which will require enhancements to read and process the enriched data. --- doc/jemalloc.xml.in | 56 +- include/jemalloc/internal/arena.h | 22 +- include/jemalloc/internal/extent.h | 2 +- include/jemalloc/internal/huge.h | 4 +- include/jemalloc/internal/private_symbols.txt | 21 +- include/jemalloc/internal/prof.h | 444 +++---- src/ctl.c | 97 +- src/huge.c | 12 +- src/jemalloc.c | 140 +- src/prof.c | 1159 +++++++++++------ src/stats.c | 2 +- 11 files changed, 1235 insertions(+), 724 deletions(-) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 308d0c65..8f4327f3 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -1047,7 +1047,7 @@ malloc_conf = "xmalloc:true";]]> opt.lg_prof_sample - (ssize_t) + (size_t) r- [] @@ -1243,6 +1243,35 @@ malloc_conf = "xmalloc:true";]]> the developer may find manual flushing useful. + + + thread.prof.name + (const char *) + rw + [] + + Get/set the descriptive name associated with the calling + thread in memory profile dumps. An internal copy of the name string is + created, so the input string need not be maintained after this interface + completes execution. The output string of this interface should be + copied for non-ephemeral uses, because multiple implementation details + can cause asynchronous string deallocation. + + + + + thread.prof.active + (bool) + rw + [] + + Control whether sampling is currently active for the + calling thread. This is a deactivation mechanism in addition to prof.active; both must + be active for the calling thread to sample. This flag is enabled by + default. + + arena.<i>.purge @@ -1492,6 +1521,31 @@ malloc_conf = "xmalloc:true";]]> option. + + + prof.reset + (size_t) + -w + [] + + Reset all memory profile statistics, and optionally + update the sample rate (see opt.lg_prof_sample). + + + + + + prof.lg_sample + (size_t) + r- + [] + + Get the sample rate (see opt.lg_prof_sample). + + + prof.interval diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 9351e3b0..f3f6426c 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -58,7 +58,7 @@ typedef struct arena_s arena_t; struct arena_chunk_map_s { #ifndef JEMALLOC_PROF /* - * Overlay prof_ctx in order to allow it to be referenced by dead code. + * Overlay prof_tctx in order to allow it to be referenced by dead code. * Such antics aren't warranted for per arena data structures, but * chunk map overhead accounts for a percentage of memory, rather than * being just a fixed cost. @@ -75,7 +75,7 @@ struct arena_chunk_map_s { rb_node(arena_chunk_map_t) rb_link; /* Profile counters, used for large object runs. */ - prof_ctx_t *prof_ctx; + prof_tctx_t *prof_tctx; #ifndef JEMALLOC_PROF }; /* union { ... }; */ #endif @@ -472,8 +472,8 @@ size_t arena_ptr_small_binind_get(const void *ptr, size_t mapbits); size_t arena_bin_index(arena_t *arena, arena_bin_t *bin); unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr); -prof_ctx_t *arena_prof_ctx_get(const void *ptr); -void arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx); +prof_tctx_t *arena_prof_tctx_get(const void *ptr); +void arena_prof_tctx_set(const void *ptr, prof_tctx_t *tctx); void *arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache); size_t arena_salloc(const void *ptr, bool demote); void arena_dalloc(arena_chunk_t *chunk, void *ptr, bool try_tcache); @@ -987,10 +987,10 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) return (regind); } -JEMALLOC_INLINE prof_ctx_t * -arena_prof_ctx_get(const void *ptr) +JEMALLOC_INLINE prof_tctx_t * +arena_prof_tctx_get(const void *ptr) { - prof_ctx_t *ret; + prof_tctx_t *ret; arena_chunk_t *chunk; size_t pageind, mapbits; @@ -1003,15 +1003,15 @@ arena_prof_ctx_get(const void *ptr) mapbits = arena_mapbits_get(chunk, pageind); assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); if ((mapbits & CHUNK_MAP_LARGE) == 0) - ret = (prof_ctx_t *)(uintptr_t)1U; + ret = (prof_tctx_t *)(uintptr_t)1U; else - ret = arena_mapp_get(chunk, pageind)->prof_ctx; + ret = arena_mapp_get(chunk, pageind)->prof_tctx; return (ret); } JEMALLOC_INLINE void -arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) +arena_prof_tctx_set(const void *ptr, prof_tctx_t *tctx) { arena_chunk_t *chunk; size_t pageind; @@ -1025,7 +1025,7 @@ arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) assert(arena_mapbits_allocated_get(chunk, pageind) != 0); if (arena_mapbits_large_get(chunk, pageind) != 0) - arena_mapp_get(chunk, pageind)->prof_ctx = ctx; + arena_mapp_get(chunk, pageind)->prof_tctx = tctx; } JEMALLOC_ALWAYS_INLINE void * diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h index 000ef6d5..5b00076f 100644 --- a/include/jemalloc/internal/extent.h +++ b/include/jemalloc/internal/extent.h @@ -16,7 +16,7 @@ struct extent_node_s { rb_node(extent_node_t) link_ad; /* Profile counters, used for huge objects. */ - prof_ctx_t *prof_ctx; + prof_tctx_t *prof_tctx; /* Pointer to the extent that this tree node is responsible for. */ void *addr; diff --git a/include/jemalloc/internal/huge.h b/include/jemalloc/internal/huge.h index 1e545367..2ec77520 100644 --- a/include/jemalloc/internal/huge.h +++ b/include/jemalloc/internal/huge.h @@ -21,8 +21,8 @@ extern huge_dalloc_junk_t *huge_dalloc_junk; #endif void huge_dalloc(void *ptr); size_t huge_salloc(const void *ptr); -prof_ctx_t *huge_prof_ctx_get(const void *ptr); -void huge_prof_ctx_set(const void *ptr, prof_ctx_t *ctx); +prof_tctx_t *huge_prof_tctx_get(const void *ptr); +void huge_prof_tctx_set(const void *ptr, prof_tctx_t *tctx); bool huge_boot(void); void huge_prefork(void); void huge_postfork_parent(void); diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index 3401301c..13505455 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -48,9 +48,9 @@ arena_prefork arena_prof_accum arena_prof_accum_impl arena_prof_accum_locked -arena_prof_ctx_get -arena_prof_ctx_set arena_prof_promoted +arena_prof_tctx_get +arena_prof_tctx_set arena_ptr_small_binind_get arena_purge_all arena_quarantine_junk_small @@ -208,8 +208,8 @@ huge_palloc huge_postfork_child huge_postfork_parent huge_prefork -huge_prof_ctx_get -huge_prof_ctx_set +huge_prof_tctx_get +huge_prof_tctx_set huge_ralloc huge_ralloc_no_move huge_salloc @@ -287,28 +287,31 @@ opt_zero p2rz pages_purge pow2_ceil +prof_alloc_prep prof_backtrace prof_boot0 prof_boot1 prof_boot2 prof_bt_count -prof_ctx_get -prof_ctx_set prof_dump_open prof_free +prof_free_sampled_object prof_gdump prof_idump prof_interval prof_lookup prof_malloc -prof_malloc_record_object +prof_malloc_sample_object prof_mdump prof_postfork_child prof_postfork_parent prof_prefork prof_realloc +prof_reset prof_sample_accum_update prof_sample_threshold_update +prof_tctx_get +prof_tctx_set prof_tdata_booted prof_tdata_cleanup prof_tdata_get @@ -322,6 +325,10 @@ prof_tdata_tsd_get prof_tdata_tsd_get_wrapper prof_tdata_tsd_init_head prof_tdata_tsd_set +prof_thread_active_get +prof_thread_active_set +prof_thread_name_get +prof_thread_name_set quarantine quarantine_alloc_hook quarantine_boot diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index 9398ad91..104bfade 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -1,11 +1,10 @@ /******************************************************************************/ #ifdef JEMALLOC_H_TYPES -typedef uint64_t prof_thr_uid_t; typedef struct prof_bt_s prof_bt_t; typedef struct prof_cnt_s prof_cnt_t; -typedef struct prof_thr_cnt_s prof_thr_cnt_t; -typedef struct prof_ctx_s prof_ctx_t; +typedef struct prof_tctx_s prof_tctx_t; +typedef struct prof_gctx_s prof_gctx_t; typedef struct prof_tdata_s prof_tdata_t; /* Option defaults. */ @@ -34,11 +33,17 @@ typedef struct prof_tdata_s prof_tdata_t; #define PROF_PRINTF_BUFSIZE 128 /* - * Number of mutexes shared among all ctx's. No space is allocated for these + * Number of mutexes shared among all gctx's. No space is allocated for these * unless profiling is enabled, so it's okay to over-provision. */ #define PROF_NCTX_LOCKS 1024 +/* + * Number of mutexes shared among all tdata's. No space is allocated for these + * unless profiling is enabled, so it's okay to over-provision. + */ +#define PROF_NTDATA_LOCKS 256 + /* * prof_tdata pointers close to NULL are used to encode state information that * is used for cleaning up during thread shutdown. @@ -66,87 +71,70 @@ typedef struct { #endif struct prof_cnt_s { - /* - * Profiling counters. An allocation/deallocation pair can operate on - * different prof_thr_cnt_t objects that are linked into the same - * prof_ctx_t cnts_ql, so it is possible for the cur* counters to go - * negative. In principle it is possible for the *bytes counters to - * overflow/underflow, but a general solution would require something - * like 128-bit counters; this implementation doesn't bother to solve - * that problem. - */ - int64_t curobjs; - int64_t curbytes; + /* Profiling counters. */ + uint64_t curobjs; + uint64_t curbytes; uint64_t accumobjs; uint64_t accumbytes; }; -struct prof_thr_cnt_s { - prof_thr_uid_t thr_uid; +typedef enum { + prof_tctx_state_nominal, + prof_tctx_state_dumping, + prof_tctx_state_purgatory /* Dumper must finish destroying. */ +} prof_tctx_state_t; - /* Linkage into prof_ctx_t's thr_cnts. */ - rb_node(prof_thr_cnt_t) thr_cnt_link; +struct prof_tctx_s { + /* Thread data for thread that performed the allocation. */ + prof_tdata_t *tdata; - /* - * Associated context. If a thread frees an object that it did not - * allocate, it is possible that the context is not present in the - * thread's hash table, in which case it must be able to look up the - * context, insert a new prof_thr_cnt_t into the thread's hash table, - * and link it into the prof_ctx_t's thr_cnts. - */ - prof_ctx_t *ctx; - - /* - * Threads use memory barriers to update the counters. Since there is - * only ever one writer, the only challenge is for the reader to get a - * consistent read of the counters. - * - * The writer uses this series of operations: - * - * 1) Increment epoch to an odd number. - * 2) Update counters. - * 3) Increment epoch to an even number. - * - * The reader must assure 1) that the epoch is even while it reads the - * counters, and 2) that the epoch doesn't change between the time it - * starts and finishes reading the counters. - */ - unsigned epoch; - - /* Profiling counters. */ + /* Profiling counters, protected by tdata->lock. */ prof_cnt_t cnts; -}; -typedef rb_tree(prof_thr_cnt_t) prof_thr_cnt_tree_t; -struct prof_ctx_s { - /* Protects nlimbo, cnt_merged, and thr_cnts. */ + /* Associated global context. */ + prof_gctx_t *gctx; + + /* Linkage into gctx's tctxs. */ + rb_node(prof_tctx_t) tctx_link; + + /* Current dump-related state, protected by gctx->lock. */ + prof_tctx_state_t state; + + /* + * Copy of cnts snapshotted during early dump phase, protected by + * dump_mtx. + */ + prof_cnt_t dump_cnts; +}; +typedef rb_tree(prof_tctx_t) prof_tctx_tree_t; + +struct prof_gctx_s { + /* Protects nlimbo, cnt_summed, and tctxs. */ malloc_mutex_t *lock; /* - * Number of threads that currently cause this ctx to be in a state of + * Number of threads that currently cause this gctx to be in a state of * limbo due to one of: - * - Initializing per thread counters associated with this ctx. - * - Preparing to destroy this ctx. - * - Dumping a heap profile that includes this ctx. + * - Initializing this gctx. + * - Initializing per thread counters associated with this gctx. + * - Preparing to destroy this gctx. + * - Dumping a heap profile that includes this gctx. * nlimbo must be 1 (single destroyer) in order to safely destroy the - * ctx. + * gctx. */ unsigned nlimbo; - /* Temporary storage for summation during dump. */ - prof_cnt_t cnt_summed; - - /* When threads exit, they merge their stats into cnt_merged. */ - prof_cnt_t cnt_merged; - /* * Tree of profile counters, one for each thread that has allocated in * this context. */ - prof_thr_cnt_tree_t thr_cnts; + prof_tctx_tree_t tctxs; /* Linkage for tree of contexts to be dumped. */ - rb_node(prof_ctx_t) dump_link; + rb_node(prof_gctx_t) dump_link; + + /* Temporary storage for summation during dump. */ + prof_cnt_t cnt_summed; /* Associated backtrace. */ prof_bt_t bt; @@ -154,21 +142,34 @@ struct prof_ctx_s { /* Backtrace vector, variable size, referred to by bt. */ void *vec[1]; }; -typedef rb_tree(prof_ctx_t) prof_ctx_tree_t; +typedef rb_tree(prof_gctx_t) prof_gctx_tree_t; + +typedef enum { + prof_tdata_state_attached, /* Active thread attached, data valid. */ + prof_tdata_state_detached, /* Defunct thread, data remain valid. */ + prof_tdata_state_expired /* Predates reset, omit data from dump. */ +} prof_tdata_state_t; struct prof_tdata_s { + malloc_mutex_t *lock; + + /* Monotonically increasing unique thread identifier. */ + uint64_t thr_uid; + + /* Included in heap profile dumps if non-NULL. */ + char *thread_name; + + prof_tdata_state_t state; + + rb_node(prof_tdata_t) tdata_link; + /* - * Hash of (prof_bt_t *)-->(prof_thr_cnt_t *). Each thread tracks + * Hash of (prof_bt_t *)-->(prof_tctx_t *). Each thread tracks * backtraces for which it has non-zero allocation/deallocation counters - * associated with thread-specific prof_thr_cnt_t objects. Other - * threads may read the prof_thr_cnt_t contents, but no others will ever - * write them. - * - * Upon thread exit, the thread must merge all the prof_thr_cnt_t - * counter data into the associated prof_ctx_t objects, and unlink/free - * the prof_thr_cnt_t objects. + * associated with thread-specific prof_tctx_t objects. Other threads + * may write to prof_tctx_t contents when freeing associated objects. */ - ckh_t bt2cnt; + ckh_t bt2tctx; /* Sampling state. */ uint64_t prng_state; @@ -179,9 +180,27 @@ struct prof_tdata_s { bool enq_idump; bool enq_gdump; + /* + * Set to true during an early dump phase for tdata's which are + * currently being dumped. New threads' tdata's have this initialized + * to false so that they aren't accidentally included in later dump + * phases. + */ + bool dumping; + + /* + * True if profiling is active for this tdata's thread + * (thread.prof.active mallctl). + */ + bool active; + + /* Temporary storage for summation during dump. */ + prof_cnt_t cnt_summed; + /* Backtrace vector, used for calls to prof_backtrace(). */ void *vec[PROF_BT_MAX]; }; +typedef rb_tree(prof_tdata_t) prof_tdata_tree_t; #endif /* JEMALLOC_H_STRUCTS */ /******************************************************************************/ @@ -217,9 +236,18 @@ extern char opt_prof_prefix[ */ extern uint64_t prof_interval; +/* + * Initialized as opt_lg_prof_sample, and potentially modified during profiling + * resets. + */ +extern size_t lg_prof_sample; + +void prof_malloc_sample_object(const void *ptr, size_t usize, + prof_tctx_t *tctx); +void prof_free_sampled_object(size_t usize, prof_tctx_t *tctx); void bt_init(prof_bt_t *bt, void **vec); void prof_backtrace(prof_bt_t *bt); -prof_thr_cnt_t *prof_lookup(prof_bt_t *bt); +prof_tctx_t *prof_lookup(prof_bt_t *bt); #ifdef JEMALLOC_JET size_t prof_bt_count(void); typedef int (prof_dump_open_t)(bool, const char *); @@ -229,53 +257,44 @@ void prof_idump(void); bool prof_mdump(const char *filename); void prof_gdump(void); prof_tdata_t *prof_tdata_init(void); +prof_tdata_t *prof_tdata_reinit(prof_tdata_t *tdata); +void prof_reset(size_t lg_sample); void prof_tdata_cleanup(void *arg); +const char *prof_thread_name_get(void); +bool prof_thread_name_set(const char *thread_name); +bool prof_thread_active_get(void); +bool prof_thread_active_set(bool active); void prof_boot0(void); void prof_boot1(void); bool prof_boot2(void); void prof_prefork(void); void prof_postfork_parent(void); void prof_postfork_child(void); -void prof_sample_threshold_update(prof_tdata_t *prof_tdata); +void prof_sample_threshold_update(prof_tdata_t *tdata); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ #ifdef JEMALLOC_H_INLINES -#define PROF_ALLOC_PREP(size, ret) do { \ - prof_tdata_t *prof_tdata; \ - prof_bt_t bt; \ - \ - assert(size == s2u(size)); \ - \ - if (!opt_prof_active || \ - prof_sample_accum_update(size, false, &prof_tdata)) { \ - ret = (prof_thr_cnt_t *)(uintptr_t)1U; \ - } else { \ - bt_init(&bt, prof_tdata->vec); \ - prof_backtrace(&bt); \ - ret = prof_lookup(&bt); \ - } \ -} while (0) - #ifndef JEMALLOC_ENABLE_INLINE malloc_tsd_protos(JEMALLOC_ATTR(unused), prof_tdata, prof_tdata_t *) prof_tdata_t *prof_tdata_get(bool create); -bool prof_sample_accum_update(size_t size, bool commit, - prof_tdata_t **prof_tdata_out); -prof_ctx_t *prof_ctx_get(const void *ptr); -void prof_ctx_set(const void *ptr, prof_ctx_t *ctx); -void prof_malloc_record_object(const void *ptr, size_t usize, - prof_thr_cnt_t *cnt); -void prof_malloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt); -void prof_realloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt, - size_t old_usize, prof_ctx_t *old_ctx); -void prof_free(const void *ptr, size_t size); +bool prof_sample_accum_update(size_t usize, bool commit, + prof_tdata_t **tdata_out); +prof_tctx_t *prof_alloc_prep(size_t usize); +prof_tctx_t *prof_tctx_get(const void *ptr); +void prof_tctx_set(const void *ptr, prof_tctx_t *tctx); +void prof_malloc_sample_object(const void *ptr, size_t usize, + prof_tctx_t *tctx); +void prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx); +void prof_realloc(const void *ptr, size_t usize, prof_tctx_t *tctx, + size_t old_usize, prof_tctx_t *old_tctx); +void prof_free(const void *ptr, size_t usize); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_)) -/* Thread-specific backtrace cache, used to reduce bt2ctx contention. */ +/* Thread-specific backtrace cache, used to reduce bt2gctx contention. */ malloc_tsd_externs(prof_tdata, prof_tdata_t *) malloc_tsd_funcs(JEMALLOC_INLINE, prof_tdata, prof_tdata_t *, NULL, prof_tdata_cleanup) @@ -283,21 +302,27 @@ malloc_tsd_funcs(JEMALLOC_INLINE, prof_tdata, prof_tdata_t *, NULL, JEMALLOC_INLINE prof_tdata_t * prof_tdata_get(bool create) { - prof_tdata_t *prof_tdata; + prof_tdata_t *tdata; cassert(config_prof); - prof_tdata = *prof_tdata_tsd_get(); - if (create && prof_tdata == NULL) - prof_tdata = prof_tdata_init(); + tdata = *prof_tdata_tsd_get(); + if (create) { + if (tdata == NULL) + tdata = prof_tdata_init(); + else if (tdata->state == prof_tdata_state_expired) + tdata = prof_tdata_reinit(tdata); + assert(tdata == NULL || tdata->state == + prof_tdata_state_attached); + } - return (prof_tdata); + return (tdata); } -JEMALLOC_INLINE prof_ctx_t * -prof_ctx_get(const void *ptr) +JEMALLOC_INLINE prof_tctx_t * +prof_tctx_get(const void *ptr) { - prof_ctx_t *ret; + prof_tctx_t *ret; arena_chunk_t *chunk; cassert(config_prof); @@ -306,15 +331,15 @@ prof_ctx_get(const void *ptr) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk != ptr) { /* Region. */ - ret = arena_prof_ctx_get(ptr); + ret = arena_prof_tctx_get(ptr); } else - ret = huge_prof_ctx_get(ptr); + ret = huge_prof_tctx_get(ptr); return (ret); } JEMALLOC_INLINE void -prof_ctx_set(const void *ptr, prof_ctx_t *ctx) +prof_tctx_set(const void *ptr, prof_tctx_t *tctx) { arena_chunk_t *chunk; @@ -324,66 +349,62 @@ prof_ctx_set(const void *ptr, prof_ctx_t *ctx) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk != ptr) { /* Region. */ - arena_prof_ctx_set(ptr, ctx); + arena_prof_tctx_set(ptr, tctx); } else - huge_prof_ctx_set(ptr, ctx); + huge_prof_tctx_set(ptr, tctx); } JEMALLOC_INLINE bool -prof_sample_accum_update(size_t size, bool commit, - prof_tdata_t **prof_tdata_out) +prof_sample_accum_update(size_t usize, bool commit, prof_tdata_t **tdata_out) { - prof_tdata_t *prof_tdata; + prof_tdata_t *tdata; cassert(config_prof); - prof_tdata = prof_tdata_get(true); - if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) - prof_tdata = NULL; + tdata = prof_tdata_get(true); + if ((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) + tdata = NULL; - if (prof_tdata_out != NULL) - *prof_tdata_out = prof_tdata; + if (tdata_out != NULL) + *tdata_out = tdata; - if (prof_tdata == NULL) + if (tdata == NULL) return (true); - if (prof_tdata->bytes_until_sample >= size) { + if (tdata->bytes_until_sample >= usize) { if (commit) - prof_tdata->bytes_until_sample -= size; + tdata->bytes_until_sample -= usize; return (true); } else { /* Compute new sample threshold. */ if (commit) - prof_sample_threshold_update(prof_tdata); - return (false); + prof_sample_threshold_update(tdata); + return (tdata->active == false); } } -JEMALLOC_INLINE void -prof_malloc_record_object(const void *ptr, size_t usize, prof_thr_cnt_t *cnt) { - prof_ctx_set(ptr, cnt->ctx); +JEMALLOC_INLINE prof_tctx_t * +prof_alloc_prep(size_t usize) +{ + prof_tctx_t *ret; + prof_tdata_t *tdata; + prof_bt_t bt; - cnt->epoch++; - /*********/ - mb_write(); - /*********/ - cnt->cnts.curobjs++; - cnt->cnts.curbytes += usize; - if (opt_prof_accum) { - cnt->cnts.accumobjs++; - cnt->cnts.accumbytes += usize; + assert(usize == s2u(usize)); + + if (!opt_prof_active || prof_sample_accum_update(usize, false, &tdata)) + ret = (prof_tctx_t *)(uintptr_t)1U; + else { + bt_init(&bt, tdata->vec); + prof_backtrace(&bt); + ret = prof_lookup(&bt); } - /*********/ - mb_write(); - /*********/ - cnt->epoch++; - /*********/ - mb_write(); - /*********/ + + return (ret); } JEMALLOC_INLINE void -prof_malloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt) +prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx) { cassert(config_prof); @@ -392,131 +413,60 @@ prof_malloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt) if (prof_sample_accum_update(usize, true, NULL)) { /* - * Don't sample. For malloc()-like allocation, it is - * always possible to tell in advance how large an - * object's usable size will be, so there should never - * be a difference between the usize passed to - * PROF_ALLOC_PREP() and prof_malloc(). + * Don't sample. For malloc()-like allocation, it is always + * possible to tell in advance how large an object's usable size + * will be, so there should never be a difference between the + * usize passed to PROF_ALLOC_PREP() and prof_malloc(). */ - assert((uintptr_t)cnt == (uintptr_t)1U); + assert((uintptr_t)tctx == (uintptr_t)1U); } - if ((uintptr_t)cnt > (uintptr_t)1U) - prof_malloc_record_object(ptr, usize, cnt); + if ((uintptr_t)tctx > (uintptr_t)1U) + prof_malloc_sample_object(ptr, usize, tctx); else - prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U); + prof_tctx_set(ptr, (prof_tctx_t *)(uintptr_t)1U); } JEMALLOC_INLINE void -prof_realloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt, - size_t old_usize, prof_ctx_t *old_ctx) +prof_realloc(const void *ptr, size_t usize, prof_tctx_t *tctx, size_t old_usize, + prof_tctx_t *old_tctx) { - prof_thr_cnt_t *told_cnt; cassert(config_prof); - assert(ptr != NULL || (uintptr_t)cnt <= (uintptr_t)1U); + assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U); if (ptr != NULL) { assert(usize == isalloc(ptr, true)); if (prof_sample_accum_update(usize, true, NULL)) { /* - * Don't sample. The usize passed to - * PROF_ALLOC_PREP() was larger than what - * actually got allocated, so a backtrace was - * captured for this allocation, even though - * its actual usize was insufficient to cross - * the sample threshold. + * Don't sample. The usize passed to PROF_ALLOC_PREP() + * was larger than what actually got allocated, so a + * backtrace was captured for this allocation, even + * though its actual usize was insufficient to cross the + * sample threshold. */ - cnt = (prof_thr_cnt_t *)(uintptr_t)1U; + tctx = (prof_tctx_t *)(uintptr_t)1U; } } - if ((uintptr_t)old_ctx > (uintptr_t)1U) { - told_cnt = prof_lookup(&old_ctx->bt); - if (told_cnt == NULL) { - /* - * It's too late to propagate OOM for this realloc(), - * so operate directly on old_cnt->ctx->cnt_merged. - */ - malloc_mutex_lock(old_ctx->lock); - old_ctx->cnt_merged.curobjs--; - old_ctx->cnt_merged.curbytes -= old_usize; - malloc_mutex_unlock(old_ctx->lock); - told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U; - } - } else - told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U; - - if ((uintptr_t)told_cnt > (uintptr_t)1U) - told_cnt->epoch++; - if ((uintptr_t)cnt > (uintptr_t)1U) { - prof_ctx_set(ptr, cnt->ctx); - cnt->epoch++; - } else if (ptr != NULL) - prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U); - /*********/ - mb_write(); - /*********/ - if ((uintptr_t)told_cnt > (uintptr_t)1U) { - told_cnt->cnts.curobjs--; - told_cnt->cnts.curbytes -= old_usize; - } - if ((uintptr_t)cnt > (uintptr_t)1U) { - cnt->cnts.curobjs++; - cnt->cnts.curbytes += usize; - if (opt_prof_accum) { - cnt->cnts.accumobjs++; - cnt->cnts.accumbytes += usize; - } - } - /*********/ - mb_write(); - /*********/ - if ((uintptr_t)told_cnt > (uintptr_t)1U) - told_cnt->epoch++; - if ((uintptr_t)cnt > (uintptr_t)1U) - cnt->epoch++; - /*********/ - mb_write(); /* Not strictly necessary. */ + if ((uintptr_t)old_tctx > (uintptr_t)1U) + prof_free_sampled_object(old_usize, old_tctx); + if ((uintptr_t)tctx > (uintptr_t)1U) + prof_malloc_sample_object(ptr, usize, tctx); + else + prof_tctx_set(ptr, (prof_tctx_t *)(uintptr_t)1U); } JEMALLOC_INLINE void -prof_free(const void *ptr, size_t size) +prof_free(const void *ptr, size_t usize) { - prof_ctx_t *ctx = prof_ctx_get(ptr); + prof_tctx_t *tctx = prof_tctx_get(ptr); cassert(config_prof); + assert(usize == isalloc(ptr, true)); - if ((uintptr_t)ctx > (uintptr_t)1) { - prof_thr_cnt_t *tcnt; - assert(size == isalloc(ptr, true)); - tcnt = prof_lookup(&ctx->bt); - - if (tcnt != NULL) { - tcnt->epoch++; - /*********/ - mb_write(); - /*********/ - tcnt->cnts.curobjs--; - tcnt->cnts.curbytes -= size; - /*********/ - mb_write(); - /*********/ - tcnt->epoch++; - /*********/ - mb_write(); - /*********/ - } else { - /* - * OOM during free() cannot be propagated, so operate - * directly on cnt->ctx->cnt_merged. - */ - malloc_mutex_lock(ctx->lock); - ctx->cnt_merged.curobjs--; - ctx->cnt_merged.curbytes -= size; - malloc_mutex_unlock(ctx->lock); - } - } + if ((uintptr_t)tctx > (uintptr_t)1U) + prof_free_sampled_object(usize, tctx); } #endif diff --git a/src/ctl.c b/src/ctl.c index fa52a6cc..b816c845 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -68,6 +68,8 @@ CTL_PROTO(version) CTL_PROTO(epoch) CTL_PROTO(thread_tcache_enabled) CTL_PROTO(thread_tcache_flush) +CTL_PROTO(thread_prof_name) +CTL_PROTO(thread_prof_active) CTL_PROTO(thread_arena) CTL_PROTO(thread_allocated) CTL_PROTO(thread_allocatedp) @@ -132,7 +134,9 @@ CTL_PROTO(arenas_nlruns) CTL_PROTO(arenas_extend) CTL_PROTO(prof_active) CTL_PROTO(prof_dump) +CTL_PROTO(prof_reset) CTL_PROTO(prof_interval) +CTL_PROTO(lg_prof_sample) CTL_PROTO(stats_chunks_current) CTL_PROTO(stats_chunks_total) CTL_PROTO(stats_chunks_high) @@ -196,18 +200,24 @@ CTL_PROTO(stats_mapped) */ #define INDEX(i) {false}, i##_index -static const ctl_named_node_t tcache_node[] = { +static const ctl_named_node_t thread_tcache_node[] = { {NAME("enabled"), CTL(thread_tcache_enabled)}, {NAME("flush"), CTL(thread_tcache_flush)} }; +static const ctl_named_node_t thread_prof_node[] = { + {NAME("name"), CTL(thread_prof_name)}, + {NAME("active"), CTL(thread_prof_active)} +}; + static const ctl_named_node_t thread_node[] = { {NAME("arena"), CTL(thread_arena)}, {NAME("allocated"), CTL(thread_allocated)}, {NAME("allocatedp"), CTL(thread_allocatedp)}, {NAME("deallocated"), CTL(thread_deallocated)}, {NAME("deallocatedp"), CTL(thread_deallocatedp)}, - {NAME("tcache"), CHILD(named, tcache)} + {NAME("tcache"), CHILD(named, thread_tcache)}, + {NAME("prof"), CHILD(named, thread_prof)} }; static const ctl_named_node_t config_node[] = { @@ -311,7 +321,9 @@ static const ctl_named_node_t arenas_node[] = { static const ctl_named_node_t prof_node[] = { {NAME("active"), CTL(prof_active)}, {NAME("dump"), CTL(prof_dump)}, - {NAME("interval"), CTL(prof_interval)} + {NAME("reset"), CTL(prof_reset)}, + {NAME("interval"), CTL(prof_interval)}, + {NAME("lg_sample"), CTL(lg_prof_sample)} }; static const ctl_named_node_t stats_chunks_node[] = { @@ -1281,6 +1293,62 @@ label_return: return (ret); } +static int +thread_prof_name_ctl(const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) +{ + int ret; + const char *oldname; + + if (config_prof == false) + return (ENOENT); + + oldname = prof_thread_name_get(); + if (newp != NULL) { + if (newlen != sizeof(const char *)) { + ret = EINVAL; + goto label_return; + } + if (prof_thread_name_set(*(const char **)newp)) { + ret = EAGAIN; + goto label_return; + } + } + READ(oldname, const char *); + + ret = 0; +label_return: + return (ret); +} + +static int +thread_prof_active_ctl(const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) +{ + int ret; + bool oldval; + + if (config_prof == false) + return (ENOENT); + + oldval = prof_thread_active_get(); + if (newp != NULL) { + if (newlen != sizeof(bool)) { + ret = EINVAL; + goto label_return; + } + if (prof_thread_active_set(*(bool *)newp)) { + ret = EAGAIN; + goto label_return; + } + } + READ(oldval, bool); + + ret = 0; +label_return: + return (ret); +} + /******************************************************************************/ /* ctl_mutex must be held during execution of this function. */ @@ -1601,7 +1669,30 @@ label_return: return (ret); } +static int +prof_reset_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) +{ + int ret; + size_t lg_sample = lg_prof_sample; + + if (config_prof == false) + return (ENOENT); + + WRITEONLY(); + WRITE(lg_sample, size_t); + if (lg_sample >= (sizeof(uint64_t) << 3)) + lg_sample = (sizeof(uint64_t) << 3) - 1; + + prof_reset(lg_sample); + + ret = 0; +label_return: + return (ret); +} + CTL_RO_NL_CGEN(config_prof, prof_interval, prof_interval, uint64_t) +CTL_RO_NL_CGEN(config_prof, lg_prof_sample, lg_prof_sample, size_t) /******************************************************************************/ diff --git a/src/huge.c b/src/huge.c index d08ed4a9..5f0c6980 100644 --- a/src/huge.c +++ b/src/huge.c @@ -197,10 +197,10 @@ huge_salloc(const void *ptr) return (ret); } -prof_ctx_t * -huge_prof_ctx_get(const void *ptr) +prof_tctx_t * +huge_prof_tctx_get(const void *ptr) { - prof_ctx_t *ret; + prof_tctx_t *ret; extent_node_t *node, key; malloc_mutex_lock(&huge_mtx); @@ -210,7 +210,7 @@ huge_prof_ctx_get(const void *ptr) node = extent_tree_ad_search(&huge, &key); assert(node != NULL); - ret = node->prof_ctx; + ret = node->prof_tctx; malloc_mutex_unlock(&huge_mtx); @@ -218,7 +218,7 @@ huge_prof_ctx_get(const void *ptr) } void -huge_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) +huge_prof_tctx_set(const void *ptr, prof_tctx_t *tctx) { extent_node_t *node, key; @@ -229,7 +229,7 @@ huge_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) node = extent_tree_ad_search(&huge, &key); assert(node != NULL); - node->prof_ctx = ctx; + node->prof_tctx = tctx; malloc_mutex_unlock(&huge_mtx); } diff --git a/src/jemalloc.c b/src/jemalloc.c index 0983c00d..2d01272e 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -636,9 +636,9 @@ malloc_conf_init(void) "prof_prefix", "jeprof") CONF_HANDLE_BOOL(opt_prof_active, "prof_active", true) - CONF_HANDLE_SSIZE_T(opt_lg_prof_sample, + CONF_HANDLE_SIZE_T(opt_lg_prof_sample, "lg_prof_sample", 0, - (sizeof(uint64_t) << 3) - 1) + (sizeof(uint64_t) << 3) - 1, true) CONF_HANDLE_BOOL(opt_prof_accum, "prof_accum", true) CONF_HANDLE_SSIZE_T(opt_lg_prof_interval, @@ -863,11 +863,11 @@ malloc_init_hard(void) */ static void * -imalloc_prof_sample(size_t usize, prof_thr_cnt_t *cnt) +imalloc_prof_sample(size_t usize, prof_tctx_t *tctx) { void *p; - if (cnt == NULL) + if (tctx == NULL) return (NULL); if (usize <= SMALL_MAXCLASS) { p = imalloc(SMALL_MAXCLASS+1); @@ -884,16 +884,16 @@ JEMALLOC_ALWAYS_INLINE_C void * imalloc_prof(size_t usize) { void *p; - prof_thr_cnt_t *cnt; + prof_tctx_t *tctx; - PROF_ALLOC_PREP(usize, cnt); - if ((uintptr_t)cnt != (uintptr_t)1U) - p = imalloc_prof_sample(usize, cnt); + tctx = prof_alloc_prep(usize); + if ((uintptr_t)tctx != (uintptr_t)1U) + p = imalloc_prof_sample(usize, tctx); else p = imalloc(usize); if (p == NULL) return (NULL); - prof_malloc(p, usize, cnt); + prof_malloc(p, usize, tctx); return (p); } @@ -943,11 +943,11 @@ je_malloc(size_t size) } static void * -imemalign_prof_sample(size_t alignment, size_t usize, prof_thr_cnt_t *cnt) +imemalign_prof_sample(size_t alignment, size_t usize, prof_tctx_t *tctx) { void *p; - if (cnt == NULL) + if (tctx == NULL) return (NULL); if (usize <= SMALL_MAXCLASS) { assert(sa2u(SMALL_MAXCLASS+1, alignment) != 0); @@ -963,17 +963,17 @@ imemalign_prof_sample(size_t alignment, size_t usize, prof_thr_cnt_t *cnt) } JEMALLOC_ALWAYS_INLINE_C void * -imemalign_prof(size_t alignment, size_t usize, prof_thr_cnt_t *cnt) +imemalign_prof(size_t alignment, size_t usize, prof_tctx_t *tctx) { void *p; - if ((uintptr_t)cnt != (uintptr_t)1U) - p = imemalign_prof_sample(alignment, usize, cnt); + if ((uintptr_t)tctx != (uintptr_t)1U) + p = imemalign_prof_sample(alignment, usize, tctx); else p = ipalloc(usize, alignment, false); if (p == NULL) return (NULL); - prof_malloc(p, usize, cnt); + prof_malloc(p, usize, tctx); return (p); } @@ -1015,10 +1015,10 @@ imemalign(void **memptr, size_t alignment, size_t size, size_t min_alignment) } if (config_prof && opt_prof) { - prof_thr_cnt_t *cnt; + prof_tctx_t *tctx; - PROF_ALLOC_PREP(usize, cnt); - result = imemalign_prof(alignment, usize, cnt); + tctx = prof_alloc_prep(usize); + result = imemalign_prof(alignment, usize, tctx); } else result = ipalloc(usize, alignment, false); if (result == NULL) @@ -1070,11 +1070,11 @@ je_aligned_alloc(size_t alignment, size_t size) } static void * -icalloc_prof_sample(size_t usize, prof_thr_cnt_t *cnt) +icalloc_prof_sample(size_t usize, prof_tctx_t *tctx) { void *p; - if (cnt == NULL) + if (tctx == NULL) return (NULL); if (usize <= SMALL_MAXCLASS) { p = icalloc(SMALL_MAXCLASS+1); @@ -1088,17 +1088,17 @@ icalloc_prof_sample(size_t usize, prof_thr_cnt_t *cnt) } JEMALLOC_ALWAYS_INLINE_C void * -icalloc_prof(size_t usize, prof_thr_cnt_t *cnt) +icalloc_prof(size_t usize, prof_tctx_t *tctx) { void *p; - if ((uintptr_t)cnt != (uintptr_t)1U) - p = icalloc_prof_sample(usize, cnt); + if ((uintptr_t)tctx != (uintptr_t)1U) + p = icalloc_prof_sample(usize, tctx); else p = icalloc(usize); if (p == NULL) return (NULL); - prof_malloc(p, usize, cnt); + prof_malloc(p, usize, tctx); return (p); } @@ -1137,11 +1137,11 @@ je_calloc(size_t num, size_t size) } if (config_prof && opt_prof) { - prof_thr_cnt_t *cnt; + prof_tctx_t *tctx; usize = s2u(num_size); - PROF_ALLOC_PREP(usize, cnt); - ret = icalloc_prof(usize, cnt); + tctx = prof_alloc_prep(usize); + ret = icalloc_prof(usize, tctx); } else { if (config_stats || (config_valgrind && in_valgrind)) usize = s2u(num_size); @@ -1167,11 +1167,11 @@ label_return: } static void * -irealloc_prof_sample(void *oldptr, size_t usize, prof_thr_cnt_t *cnt) +irealloc_prof_sample(void *oldptr, size_t usize, prof_tctx_t *tctx) { void *p; - if (cnt == NULL) + if (tctx == NULL) return (NULL); if (usize <= SMALL_MAXCLASS) { p = iralloc(oldptr, SMALL_MAXCLASS+1, 0, 0, false); @@ -1185,19 +1185,19 @@ irealloc_prof_sample(void *oldptr, size_t usize, prof_thr_cnt_t *cnt) } JEMALLOC_ALWAYS_INLINE_C void * -irealloc_prof(void *oldptr, size_t old_usize, size_t usize, prof_thr_cnt_t *cnt) +irealloc_prof(void *oldptr, size_t old_usize, size_t usize, prof_tctx_t *tctx) { void *p; - prof_ctx_t *old_ctx; + prof_tctx_t *old_tctx; - old_ctx = prof_ctx_get(oldptr); - if ((uintptr_t)cnt != (uintptr_t)1U) - p = irealloc_prof_sample(oldptr, usize, cnt); + old_tctx = prof_tctx_get(oldptr); + if ((uintptr_t)tctx != (uintptr_t)1U) + p = irealloc_prof_sample(oldptr, usize, tctx); else p = iralloc(oldptr, usize, 0, 0, false); if (p == NULL) return (NULL); - prof_realloc(p, usize, cnt, old_usize, old_ctx); + prof_realloc(p, usize, tctx, old_usize, old_tctx); return (p); } @@ -1253,11 +1253,11 @@ je_realloc(void *ptr, size_t size) old_rzsize = config_prof ? p2rz(ptr) : u2rz(old_usize); if (config_prof && opt_prof) { - prof_thr_cnt_t *cnt; + prof_tctx_t *tctx; usize = s2u(size); - PROF_ALLOC_PREP(usize, cnt); - ret = irealloc_prof(ptr, old_usize, usize, cnt); + tctx = prof_alloc_prep(usize); + ret = irealloc_prof(ptr, old_usize, usize, tctx); } else { if (config_stats || (config_valgrind && in_valgrind)) usize = s2u(size); @@ -1379,11 +1379,11 @@ imallocx(size_t usize, size_t alignment, bool zero, bool try_tcache, static void * imallocx_prof_sample(size_t usize, size_t alignment, bool zero, bool try_tcache, - arena_t *arena, prof_thr_cnt_t *cnt) + arena_t *arena, prof_tctx_t *tctx) { void *p; - if (cnt == NULL) + if (tctx == NULL) return (NULL); if (usize <= SMALL_MAXCLASS) { size_t usize_promoted = (alignment == 0) ? @@ -1402,18 +1402,18 @@ imallocx_prof_sample(size_t usize, size_t alignment, bool zero, bool try_tcache, JEMALLOC_ALWAYS_INLINE_C void * imallocx_prof(size_t usize, size_t alignment, bool zero, bool try_tcache, - arena_t *arena, prof_thr_cnt_t *cnt) + arena_t *arena, prof_tctx_t *tctx) { void *p; - if ((uintptr_t)cnt != (uintptr_t)1U) { + if ((uintptr_t)tctx != (uintptr_t)1U) { p = imallocx_prof_sample(usize, alignment, zero, try_tcache, - arena, cnt); + arena, tctx); } else p = imallocx(usize, alignment, zero, try_tcache, arena); if (p == NULL) return (NULL); - prof_malloc(p, usize, cnt); + prof_malloc(p, usize, tctx); return (p); } @@ -1447,11 +1447,11 @@ je_mallocx(size_t size, int flags) assert(usize != 0); if (config_prof && opt_prof) { - prof_thr_cnt_t *cnt; + prof_tctx_t *tctx; - PROF_ALLOC_PREP(usize, cnt); + tctx = prof_alloc_prep(usize); p = imallocx_prof(usize, alignment, zero, try_tcache, arena, - cnt); + tctx); } else p = imallocx(usize, alignment, zero, try_tcache, arena); if (p == NULL) @@ -1476,11 +1476,11 @@ label_oom: static void * irallocx_prof_sample(void *oldptr, size_t size, size_t alignment, size_t usize, bool zero, bool try_tcache_alloc, bool try_tcache_dalloc, arena_t *arena, - prof_thr_cnt_t *cnt) + prof_tctx_t *tctx) { void *p; - if (cnt == NULL) + if (tctx == NULL) return (NULL); if (usize <= SMALL_MAXCLASS) { p = iralloct(oldptr, SMALL_MAXCLASS+1, (SMALL_MAXCLASS+1 >= @@ -1500,15 +1500,15 @@ irallocx_prof_sample(void *oldptr, size_t size, size_t alignment, size_t usize, JEMALLOC_ALWAYS_INLINE_C void * irallocx_prof(void *oldptr, size_t old_usize, size_t size, size_t alignment, size_t *usize, bool zero, bool try_tcache_alloc, bool try_tcache_dalloc, - arena_t *arena, prof_thr_cnt_t *cnt) + arena_t *arena, prof_tctx_t *tctx) { void *p; - prof_ctx_t *old_ctx; + prof_tctx_t *old_tctx; - old_ctx = prof_ctx_get(oldptr); - if ((uintptr_t)cnt != (uintptr_t)1U) + old_tctx = prof_tctx_get(oldptr); + if ((uintptr_t)tctx != (uintptr_t)1U) p = irallocx_prof_sample(oldptr, size, alignment, *usize, zero, - try_tcache_alloc, try_tcache_dalloc, arena, cnt); + try_tcache_alloc, try_tcache_dalloc, arena, tctx); else { p = iralloct(oldptr, size, 0, alignment, zero, try_tcache_alloc, try_tcache_dalloc, arena); @@ -1527,7 +1527,7 @@ irallocx_prof(void *oldptr, size_t old_usize, size_t size, size_t alignment, */ *usize = isalloc(p, config_prof); } - prof_realloc(p, *usize, cnt, old_usize, old_ctx); + prof_realloc(p, *usize, tctx, old_usize, old_tctx); return (p); } @@ -1570,13 +1570,13 @@ je_rallocx(void *ptr, size_t size, int flags) old_rzsize = u2rz(old_usize); if (config_prof && opt_prof) { - prof_thr_cnt_t *cnt; + prof_tctx_t *tctx; usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment); assert(usize != 0); - PROF_ALLOC_PREP(usize, cnt); + tctx = prof_alloc_prep(usize); p = irallocx_prof(ptr, old_usize, size, alignment, &usize, zero, - try_tcache_alloc, try_tcache_dalloc, arena, cnt); + try_tcache_alloc, try_tcache_dalloc, arena, tctx); if (p == NULL) goto label_oom; } else { @@ -1623,11 +1623,11 @@ ixallocx_helper(void *ptr, size_t old_usize, size_t size, size_t extra, static size_t ixallocx_prof_sample(void *ptr, size_t old_usize, size_t size, size_t extra, size_t alignment, size_t max_usize, bool zero, arena_t *arena, - prof_thr_cnt_t *cnt) + prof_tctx_t *tctx) { size_t usize; - if (cnt == NULL) + if (tctx == NULL) return (old_usize); /* Use minimum usize to determine whether promotion may happen. */ if (((alignment == 0) ? s2u(size) : sa2u(size, alignment)) <= @@ -1650,22 +1650,22 @@ ixallocx_prof_sample(void *ptr, size_t old_usize, size_t size, size_t extra, JEMALLOC_ALWAYS_INLINE_C size_t ixallocx_prof(void *ptr, size_t old_usize, size_t size, size_t extra, size_t alignment, size_t max_usize, bool zero, arena_t *arena, - prof_thr_cnt_t *cnt) + prof_tctx_t *tctx) { size_t usize; - prof_ctx_t *old_ctx; + prof_tctx_t *old_tctx; - old_ctx = prof_ctx_get(ptr); - if ((uintptr_t)cnt != (uintptr_t)1U) { + old_tctx = prof_tctx_get(ptr); + if ((uintptr_t)tctx != (uintptr_t)1U) { usize = ixallocx_prof_sample(ptr, old_usize, size, extra, - alignment, zero, max_usize, arena, cnt); + alignment, zero, max_usize, arena, tctx); } else { usize = ixallocx_helper(ptr, old_usize, size, extra, alignment, zero, arena); } if (usize == old_usize) return (usize); - prof_realloc(ptr, usize, cnt, old_usize, old_ctx); + prof_realloc(ptr, usize, tctx, old_usize, old_tctx); return (usize); } @@ -1697,19 +1697,19 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) old_rzsize = u2rz(old_usize); if (config_prof && opt_prof) { - prof_thr_cnt_t *cnt; + prof_tctx_t *tctx; /* * usize isn't knowable before ixalloc() returns when extra is * non-zero. Therefore, compute its maximum possible value and - * use that in PROF_ALLOC_PREP() to decide whether to capture a + * use that in prof_alloc_prep() to decide whether to capture a * backtrace. prof_realloc() will use the actual usize to * decide whether to sample. */ size_t max_usize = (alignment == 0) ? s2u(size+extra) : sa2u(size+extra, alignment); - PROF_ALLOC_PREP(max_usize, cnt); + tctx = prof_alloc_prep(max_usize); usize = ixallocx_prof(ptr, old_usize, size, extra, alignment, - max_usize, zero, arena, cnt); + max_usize, zero, arena, tctx); } else { usize = ixallocx_helper(ptr, old_usize, size, extra, alignment, zero, arena); diff --git a/src/prof.c b/src/prof.c index 497ccf42..044acd8b 100644 --- a/src/prof.c +++ b/src/prof.c @@ -33,22 +33,41 @@ char opt_prof_prefix[ uint64_t prof_interval = 0; +size_t lg_prof_sample; + /* - * Table of mutexes that are shared among ctx's. These are leaf locks, so - * there is no problem with using them for more than one ctx at the same time. - * The primary motivation for this sharing though is that ctx's are ephemeral, + * Table of mutexes that are shared among gctx's. These are leaf locks, so + * there is no problem with using them for more than one gctx at the same time. + * The primary motivation for this sharing though is that gctx's are ephemeral, * and destroying mutexes causes complications for systems that allocate when * creating/destroying mutexes. */ -static malloc_mutex_t *ctx_locks; -static unsigned cum_ctxs; /* Atomic counter. */ +static malloc_mutex_t *gctx_locks; +static unsigned cum_gctxs; /* Atomic counter. */ /* - * Global hash of (prof_bt_t *)-->(prof_ctx_t *). This is the master data + * Table of mutexes that are shared among tdata's. No operations require + * holding multiple tdata locks, so there is no problem with using them for more + * than one tdata at the same time, even though a gctx lock may be acquired + * while holding a tdata lock. + */ +static malloc_mutex_t *tdata_locks; + +/* + * Global hash of (prof_bt_t *)-->(prof_gctx_t *). This is the master data * structure that knows about all backtraces currently captured. */ -static ckh_t bt2ctx; -static malloc_mutex_t bt2ctx_mtx; +static ckh_t bt2gctx; +static malloc_mutex_t bt2gctx_mtx; + +/* + * Tree of all extant prof_tdata_t structures, regardless of state, + * {attached,detached,expired}. + */ +static prof_tdata_tree_t tdatas; +static malloc_mutex_t tdatas_mtx; + +static uint64_t next_thr_uid; static malloc_mutex_t prof_dump_seq_mtx; static uint64_t prof_dump_seq; @@ -76,21 +95,33 @@ static int prof_dump_fd; static bool prof_booted = false; /******************************************************************************/ +/* + * Function prototypes for static functions that are referenced prior to + * definition. + */ + +static bool prof_tctx_should_destroy(prof_tctx_t *tctx); +static void prof_tctx_destroy(prof_tctx_t *tctx); +static bool prof_tdata_should_destroy(prof_tdata_t *tdata); +static void prof_tdata_destroy(prof_tdata_t *tdata); + +/******************************************************************************/ +/* Red-black trees. */ JEMALLOC_INLINE_C int -prof_thr_cnt_comp(const prof_thr_cnt_t *a, const prof_thr_cnt_t *b) +prof_tctx_comp(const prof_tctx_t *a, const prof_tctx_t *b) { - prof_thr_uid_t a_uid = a->thr_uid; - prof_thr_uid_t b_uid = b->thr_uid; + uint64_t a_uid = a->tdata->thr_uid; + uint64_t b_uid = b->tdata->thr_uid; return ((a_uid > b_uid) - (a_uid < b_uid)); } -rb_gen(static UNUSED, thr_cnt_tree_, prof_thr_cnt_tree_t, prof_thr_cnt_t, - thr_cnt_link, prof_thr_cnt_comp) +rb_gen(static UNUSED, tctx_tree_, prof_tctx_tree_t, prof_tctx_t, + tctx_link, prof_tctx_comp) JEMALLOC_INLINE_C int -prof_ctx_comp(const prof_ctx_t *a, const prof_ctx_t *b) +prof_gctx_comp(const prof_gctx_t *a, const prof_gctx_t *b) { unsigned a_len = a->bt.len; unsigned b_len = b->bt.len; @@ -101,8 +132,52 @@ prof_ctx_comp(const prof_ctx_t *a, const prof_ctx_t *b) return (ret); } -rb_gen(static UNUSED, ctx_tree_, prof_ctx_tree_t, prof_ctx_t, dump_link, - prof_ctx_comp) +rb_gen(static UNUSED, gctx_tree_, prof_gctx_tree_t, prof_gctx_t, dump_link, + prof_gctx_comp) + +JEMALLOC_INLINE_C int +prof_tdata_comp(const prof_tdata_t *a, const prof_tdata_t *b) +{ + uint64_t a_uid = a->thr_uid; + uint64_t b_uid = b->thr_uid; + + return ((a_uid > b_uid) - (a_uid < b_uid)); +} + +rb_gen(static UNUSED, tdata_tree_, prof_tdata_tree_t, prof_tdata_t, tdata_link, + prof_tdata_comp) + +/******************************************************************************/ + +void +prof_malloc_sample_object(const void *ptr, size_t usize, prof_tctx_t *tctx) { + prof_tctx_set(ptr, tctx); + + malloc_mutex_lock(tctx->tdata->lock); + tctx->cnts.curobjs++; + tctx->cnts.curbytes += usize; + if (opt_prof_accum) { + tctx->cnts.accumobjs++; + tctx->cnts.accumbytes += usize; + } + malloc_mutex_unlock(tctx->tdata->lock); +} + +void +prof_free_sampled_object(size_t usize, prof_tctx_t *tctx) +{ + + malloc_mutex_lock(tctx->tdata->lock); + assert(tctx->cnts.curobjs > 0); + assert(tctx->cnts.curbytes >= usize); + tctx->cnts.curobjs--; + tctx->cnts.curbytes -= usize; + + if (prof_tctx_should_destroy(tctx)) + prof_tctx_destroy(tctx); + else + malloc_mutex_unlock(tctx->tdata->lock); +} void bt_init(prof_bt_t *bt, void **vec) @@ -115,32 +190,32 @@ bt_init(prof_bt_t *bt, void **vec) } static inline void -prof_enter(prof_tdata_t *prof_tdata) +prof_enter(prof_tdata_t *tdata) { cassert(config_prof); - assert(prof_tdata->enq == false); - prof_tdata->enq = true; + assert(tdata->enq == false); + tdata->enq = true; - malloc_mutex_lock(&bt2ctx_mtx); + malloc_mutex_lock(&bt2gctx_mtx); } static inline void -prof_leave(prof_tdata_t *prof_tdata) +prof_leave(prof_tdata_t *tdata) { bool idump, gdump; cassert(config_prof); - malloc_mutex_unlock(&bt2ctx_mtx); + malloc_mutex_unlock(&bt2gctx_mtx); - assert(prof_tdata->enq); - prof_tdata->enq = false; - idump = prof_tdata->enq_idump; - prof_tdata->enq_idump = false; - gdump = prof_tdata->enq_gdump; - prof_tdata->enq_gdump = false; + assert(tdata->enq); + tdata->enq = false; + idump = tdata->enq_idump; + tdata->enq_idump = false; + gdump = tdata->enq_gdump; + tdata->enq_gdump = false; if (idump) prof_idump(); @@ -373,220 +448,268 @@ prof_backtrace(prof_bt_t *bt) #endif static malloc_mutex_t * -prof_ctx_mutex_choose(void) +prof_gctx_mutex_choose(void) { - unsigned nctxs = atomic_add_u(&cum_ctxs, 1); + unsigned ngctxs = atomic_add_u(&cum_gctxs, 1); - return (&ctx_locks[(nctxs - 1) % PROF_NCTX_LOCKS]); + return (&gctx_locks[(ngctxs - 1) % PROF_NCTX_LOCKS]); } -static prof_ctx_t * -prof_ctx_create(prof_bt_t *bt) +static malloc_mutex_t * +prof_tdata_mutex_choose(uint64_t thr_uid) +{ + + return (&tdata_locks[thr_uid % PROF_NTDATA_LOCKS]); +} + +static prof_gctx_t * +prof_gctx_create(prof_bt_t *bt) { /* * Create a single allocation that has space for vec of length bt->len. */ - prof_ctx_t *ctx = (prof_ctx_t *)imalloc(offsetof(prof_ctx_t, vec) + + prof_gctx_t *gctx = (prof_gctx_t *)imalloc(offsetof(prof_gctx_t, vec) + (bt->len * sizeof(void *))); - if (ctx == NULL) + if (gctx == NULL) return (NULL); - ctx->lock = prof_ctx_mutex_choose(); + gctx->lock = prof_gctx_mutex_choose(); /* * Set nlimbo to 1, in order to avoid a race condition with - * prof_ctx_merge()/prof_ctx_destroy(). + * prof_tctx_destroy()/prof_gctx_maybe_destroy(). */ - ctx->nlimbo = 1; - memset(&ctx->cnt_merged, 0, sizeof(prof_cnt_t)); - thr_cnt_tree_new(&ctx->thr_cnts); + gctx->nlimbo = 1; + tctx_tree_new(&gctx->tctxs); /* Duplicate bt. */ - memcpy(ctx->vec, bt->vec, bt->len * sizeof(void *)); - ctx->bt.vec = ctx->vec; - ctx->bt.len = bt->len; - return (ctx); + memcpy(gctx->vec, bt->vec, bt->len * sizeof(void *)); + gctx->bt.vec = gctx->vec; + gctx->bt.len = bt->len; + return (gctx); } static void -prof_ctx_destroy(prof_ctx_t *ctx) +prof_gctx_maybe_destroy(prof_gctx_t *gctx) { - prof_tdata_t *prof_tdata; + prof_tdata_t *tdata; cassert(config_prof); /* - * Check that ctx is still unused by any thread cache before destroying - * it. prof_lookup() increments ctx->nlimbo in order to avoid a race - * condition with this function, as does prof_ctx_merge() in order to - * avoid a race between the main body of prof_ctx_merge() and entry + * Check that gctx is still unused by any thread cache before destroying + * it. prof_lookup() increments gctx->nlimbo in order to avoid a race + * condition with this function, as does prof_tctx_destroy() in order to + * avoid a race between the main body of prof_tctx_destroy() and entry * into this function. */ - prof_tdata = prof_tdata_get(false); - assert((uintptr_t)prof_tdata > (uintptr_t)PROF_TDATA_STATE_MAX); - prof_enter(prof_tdata); - malloc_mutex_lock(ctx->lock); - if (thr_cnt_tree_first(&ctx->thr_cnts) == NULL && - ctx->cnt_merged.curobjs == 0 && ctx->nlimbo == 1) { - assert(ctx->cnt_merged.curbytes == 0); - assert(ctx->cnt_merged.accumobjs == 0); - assert(ctx->cnt_merged.accumbytes == 0); - /* Remove ctx from bt2ctx. */ - if (ckh_remove(&bt2ctx, &ctx->bt, NULL, NULL)) + tdata = prof_tdata_get(false); + assert((uintptr_t)tdata > (uintptr_t)PROF_TDATA_STATE_MAX); + prof_enter(tdata); + malloc_mutex_lock(gctx->lock); + if (tctx_tree_empty(&gctx->tctxs) && gctx->nlimbo == 1) { + /* Remove gctx from bt2gctx. */ + if (ckh_remove(&bt2gctx, &gctx->bt, NULL, NULL)) not_reached(); - prof_leave(prof_tdata); - /* Destroy ctx. */ - malloc_mutex_unlock(ctx->lock); - idalloc(ctx); + prof_leave(tdata); + /* Destroy gctx. */ + malloc_mutex_unlock(gctx->lock); + idalloc(gctx); } else { /* - * Compensate for increment in prof_ctx_merge() or + * Compensate for increment in prof_tctx_destroy() or * prof_lookup(). */ - ctx->nlimbo--; - malloc_mutex_unlock(ctx->lock); - prof_leave(prof_tdata); + gctx->nlimbo--; + malloc_mutex_unlock(gctx->lock); + prof_leave(tdata); } } -static void -prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt) +/* tctx->tdata->lock must be held. */ +static bool +prof_tctx_should_destroy(prof_tctx_t *tctx) { - bool destroy; - cassert(config_prof); - - /* Merge cnt stats and detach from ctx. */ - malloc_mutex_lock(ctx->lock); - ctx->cnt_merged.curobjs += cnt->cnts.curobjs; - ctx->cnt_merged.curbytes += cnt->cnts.curbytes; - ctx->cnt_merged.accumobjs += cnt->cnts.accumobjs; - ctx->cnt_merged.accumbytes += cnt->cnts.accumbytes; - thr_cnt_tree_remove(&ctx->thr_cnts, cnt); - if (opt_prof_accum == false && thr_cnt_tree_first(&ctx->thr_cnts) == - NULL && ctx->cnt_merged.curobjs == 0 && ctx->nlimbo == 0) { - /* - * Increment ctx->nlimbo in order to keep another thread from - * winning the race to destroy ctx while this one has ctx->lock - * dropped. Without this, it would be possible for another - * thread to: - * - * 1) Sample an allocation associated with ctx. - * 2) Deallocate the sampled object. - * 3) Successfully prof_ctx_destroy(ctx). - * - * The result would be that ctx no longer exists by the time - * this thread accesses it in prof_ctx_destroy(). - */ - ctx->nlimbo++; - destroy = true; - } else - destroy = false; - malloc_mutex_unlock(ctx->lock); - if (destroy) - prof_ctx_destroy(ctx); + if (opt_prof_accum) + return (false); + if (tctx->cnts.curobjs != 0) + return (false); + return (true); } static bool -prof_lookup_global(prof_bt_t *bt, prof_tdata_t *prof_tdata, void **p_btkey, - prof_ctx_t **p_ctx, bool *p_new_ctx) +prof_gctx_should_destroy(prof_gctx_t *gctx) +{ + + if (opt_prof_accum) + return (false); + if (tctx_tree_empty(&gctx->tctxs) == false) + return (false); + if (gctx->nlimbo != 0) + return (false); + return (true); +} + +/* tctx->tdata->lock is held upon entry, and released before return. */ +static void +prof_tctx_destroy(prof_tctx_t *tctx) +{ + prof_gctx_t *gctx = tctx->gctx; + bool destroy_gctx; + + assert(tctx->cnts.curobjs == 0); + assert(tctx->cnts.curbytes == 0); + assert(opt_prof_accum == false); + assert(tctx->cnts.accumobjs == 0); + assert(tctx->cnts.accumbytes == 0); + + { + prof_tdata_t *tdata = tctx->tdata; + bool tdata_destroy; + + ckh_remove(&tdata->bt2tctx, &gctx->bt, NULL, NULL); + tdata_destroy = prof_tdata_should_destroy(tdata); + malloc_mutex_unlock(tdata->lock); + if (tdata_destroy) + prof_tdata_destroy(tdata); + } + + malloc_mutex_lock(gctx->lock); + tctx_tree_remove(&gctx->tctxs, tctx); + if (prof_gctx_should_destroy(gctx)) { + /* + * Increment gctx->nlimbo in order to keep another thread from + * winning the race to destroy gctx while this one has + * gctx->lock dropped. Without this, it would be possible for + * another thread to: + * + * 1) Sample an allocation associated with gctx. + * 2) Deallocate the sampled object. + * 3) Successfully prof_gctx_maybe_destroy(gctx). + * + * The result would be that gctx no longer exists by the time + * this thread accesses it in prof_gctx_maybe_destroy(). + */ + gctx->nlimbo++; + destroy_gctx = true; + } else + destroy_gctx = false; + malloc_mutex_unlock(gctx->lock); + if (destroy_gctx) + prof_gctx_maybe_destroy(gctx); + + idalloc(tctx); +} + +static bool +prof_lookup_global(prof_bt_t *bt, prof_tdata_t *tdata, void **p_btkey, + prof_gctx_t **p_gctx, bool *p_new_gctx) { union { - prof_ctx_t *p; + prof_gctx_t *p; void *v; - } ctx; + } gctx; union { prof_bt_t *p; void *v; } btkey; - bool new_ctx; + bool new_gctx; - prof_enter(prof_tdata); - if (ckh_search(&bt2ctx, bt, &btkey.v, &ctx.v)) { + prof_enter(tdata); + if (ckh_search(&bt2gctx, bt, &btkey.v, &gctx.v)) { /* bt has never been seen before. Insert it. */ - ctx.p = prof_ctx_create(bt); - if (ctx.v == NULL) { - prof_leave(prof_tdata); + gctx.p = prof_gctx_create(bt); + if (gctx.v == NULL) { + prof_leave(tdata); return (true); } - btkey.p = &ctx.p->bt; - if (ckh_insert(&bt2ctx, btkey.v, ctx.v)) { + btkey.p = &gctx.p->bt; + if (ckh_insert(&bt2gctx, btkey.v, gctx.v)) { /* OOM. */ - prof_leave(prof_tdata); - idalloc(ctx.v); + prof_leave(tdata); + idalloc(gctx.v); return (true); } - new_ctx = true; + new_gctx = true; } else { /* * Increment nlimbo, in order to avoid a race condition with - * prof_ctx_merge()/prof_ctx_destroy(). + * prof_tctx_destroy()/prof_gctx_maybe_destroy(). */ - malloc_mutex_lock(ctx.p->lock); - ctx.p->nlimbo++; - malloc_mutex_unlock(ctx.p->lock); - new_ctx = false; + malloc_mutex_lock(gctx.p->lock); + gctx.p->nlimbo++; + malloc_mutex_unlock(gctx.p->lock); + new_gctx = false; } - prof_leave(prof_tdata); + prof_leave(tdata); *p_btkey = btkey.v; - *p_ctx = ctx.p; - *p_new_ctx = new_ctx; + *p_gctx = gctx.p; + *p_new_gctx = new_gctx; return (false); } -prof_thr_cnt_t * +prof_tctx_t * prof_lookup(prof_bt_t *bt) { union { - prof_thr_cnt_t *p; + prof_tctx_t *p; void *v; } ret; - prof_tdata_t *prof_tdata; + prof_tdata_t *tdata; + bool not_found; cassert(config_prof); - prof_tdata = prof_tdata_get(false); - if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) + tdata = prof_tdata_get(false); + if ((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) return (NULL); - if (ckh_search(&prof_tdata->bt2cnt, bt, NULL, &ret.v)) { + malloc_mutex_lock(tdata->lock); + not_found = ckh_search(&tdata->bt2tctx, bt, NULL, &ret.v); + malloc_mutex_unlock(tdata->lock); + if (not_found) { void *btkey; - prof_ctx_t *ctx; - bool new_ctx; + prof_gctx_t *gctx; + bool new_gctx, error; /* * This thread's cache lacks bt. Look for it in the global * cache. */ - if (prof_lookup_global(bt, prof_tdata, &btkey, &ctx, &new_ctx)) + if (prof_lookup_global(bt, tdata, &btkey, &gctx, + &new_gctx)) return (NULL); - /* Link a prof_thd_cnt_t into ctx for this thread. */ - ret.v = imalloc(sizeof(prof_thr_cnt_t)); + /* Link a prof_tctx_t into gctx for this thread. */ + ret.v = imalloc(sizeof(prof_tctx_t)); if (ret.p == NULL) { - if (new_ctx) - prof_ctx_destroy(ctx); + if (new_gctx) + prof_gctx_maybe_destroy(gctx); return (NULL); } - ret.p->ctx = ctx; - ret.p->epoch = 0; + ret.p->tdata = tdata; memset(&ret.p->cnts, 0, sizeof(prof_cnt_t)); - if (ckh_insert(&prof_tdata->bt2cnt, btkey, ret.v)) { - if (new_ctx) - prof_ctx_destroy(ctx); + ret.p->gctx = gctx; + ret.p->state = prof_tctx_state_nominal; + malloc_mutex_lock(tdata->lock); + error = ckh_insert(&tdata->bt2tctx, btkey, ret.v); + malloc_mutex_unlock(tdata->lock); + if (error) { + if (new_gctx) + prof_gctx_maybe_destroy(gctx); idalloc(ret.v); return (NULL); } - malloc_mutex_lock(ctx->lock); - thr_cnt_tree_insert(&ctx->thr_cnts, ret.p); - ctx->nlimbo--; - malloc_mutex_unlock(ctx->lock); + malloc_mutex_lock(gctx->lock); + tctx_tree_insert(&gctx->tctxs, ret.p); + gctx->nlimbo--; + malloc_mutex_unlock(gctx->lock); } return (ret.p); } - void -prof_sample_threshold_update(prof_tdata_t *prof_tdata) +prof_sample_threshold_update(prof_tdata_t *tdata) { /* * The body of this function is compiled out unless heap profiling is @@ -608,23 +731,20 @@ prof_sample_threshold_update(prof_tdata_t *prof_tdata) if (!config_prof) return; - if (prof_tdata == NULL) - prof_tdata = prof_tdata_get(false); - - if (opt_lg_prof_sample == 0) { - prof_tdata->bytes_until_sample = 0; + if (lg_prof_sample == 0) { + tdata->bytes_until_sample = 0; return; } /* - * Compute sample threshold as a geometrically distributed random - * variable with mean (2^opt_lg_prof_sample). + * Compute sample interval as a geometrically distributed random + * variable with mean (2^lg_prof_sample). * - * __ __ - * | log(u) | 1 - * prof_tdata->threshold = | -------- |, where p = ------------------- - * | log(1-p) | opt_lg_prof_sample - * 2 + * __ __ + * | log(u) | 1 + * tdata->bytes_until_sample = | -------- |, where p = --------------- + * | log(1-p) | lg_prof_sample + * 2 * * For more information on the math, see: * @@ -634,30 +754,29 @@ prof_sample_threshold_update(prof_tdata_t *prof_tdata) * pp 500 * (http://luc.devroye.org/rnbookindex.html) */ - prng64(r, 53, prof_tdata->prng_state, - UINT64_C(6364136223846793005), UINT64_C(1442695040888963407)); + prng64(r, 53, tdata->prng_state, UINT64_C(6364136223846793005), + UINT64_C(1442695040888963407)); u = (double)r * (1.0/9007199254740992.0L); - prof_tdata->bytes_until_sample = (uint64_t)(log(u) / - log(1.0 - (1.0 / (double)((uint64_t)1U << opt_lg_prof_sample)))) + tdata->bytes_until_sample = (uint64_t)(log(u) / + log(1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample)))) + (uint64_t)1U; #endif } - #ifdef JEMALLOC_JET size_t prof_bt_count(void) { size_t bt_count; - prof_tdata_t *prof_tdata; + prof_tdata_t *tdata; - prof_tdata = prof_tdata_get(false); - if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) + tdata = prof_tdata_get(false); + if ((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) return (0); - prof_enter(prof_tdata); - bt_count = ckh_count(&bt2ctx); - prof_leave(prof_tdata); + prof_enter(tdata); + bt_count = ckh_count(&bt2gctx); + prof_leave(tdata); return (bt_count); } @@ -770,146 +889,249 @@ prof_dump_printf(bool propagate_err, const char *format, ...) return (ret); } -static prof_thr_cnt_t * -ctx_sum_iter(prof_thr_cnt_tree_t *thr_cnts, prof_thr_cnt_t *thr_cnt, void *arg) +/* tctx->tdata->lock is held. */ +static void +prof_tctx_merge_tdata(prof_tctx_t *tctx, prof_tdata_t *tdata) { - prof_ctx_t *ctx = (prof_ctx_t *)arg; - volatile unsigned *epoch = &thr_cnt->epoch; - prof_cnt_t tcnt; - while (true) { - unsigned epoch0 = *epoch; + assert(tctx->state == prof_tctx_state_nominal); + tctx->state = prof_tctx_state_dumping; + memcpy(&tctx->dump_cnts, &tctx->cnts, sizeof(prof_cnt_t)); - /* Make sure epoch is even. */ - if (epoch0 & 1U) - continue; - - memcpy(&tcnt, &thr_cnt->cnts, sizeof(prof_cnt_t)); - - /* Terminate if epoch didn't change while reading. */ - if (*epoch == epoch0) - break; - } - - ctx->cnt_summed.curobjs += tcnt.curobjs; - ctx->cnt_summed.curbytes += tcnt.curbytes; + tdata->cnt_summed.curobjs += tctx->dump_cnts.curobjs; + tdata->cnt_summed.curbytes += tctx->dump_cnts.curbytes; if (opt_prof_accum) { - ctx->cnt_summed.accumobjs += tcnt.accumobjs; - ctx->cnt_summed.accumbytes += tcnt.accumbytes; + tdata->cnt_summed.accumobjs += tctx->dump_cnts.accumobjs; + tdata->cnt_summed.accumbytes += tctx->dump_cnts.accumbytes; + } +} + +/* gctx->lock is held. */ +static void +prof_tctx_merge_gctx(prof_tctx_t *tctx, prof_gctx_t *gctx) +{ + + gctx->cnt_summed.curobjs += tctx->dump_cnts.curobjs; + gctx->cnt_summed.curbytes += tctx->dump_cnts.curbytes; + if (opt_prof_accum) { + gctx->cnt_summed.accumobjs += tctx->dump_cnts.accumobjs; + gctx->cnt_summed.accumbytes += tctx->dump_cnts.accumbytes; + } +} + +/* tctx->gctx is held. */ +static prof_tctx_t * +prof_tctx_merge_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) +{ + + switch (tctx->state) { + case prof_tctx_state_nominal: + /* New since dumping started; ignore. */ + break; + case prof_tctx_state_dumping: + case prof_tctx_state_purgatory: + prof_tctx_merge_gctx(tctx, tctx->gctx); + break; + default: + not_reached(); } return (NULL); } +/* gctx->lock is held. */ +static prof_tctx_t * +prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) +{ + bool propagate_err = *(bool *)arg; + + if (prof_dump_printf(propagate_err, + " t%"PRIu64": %"PRIu64": %"PRIu64" [%"PRIu64": %"PRIu64"]\n", + tctx->tdata->thr_uid, tctx->dump_cnts.curobjs, + tctx->dump_cnts.curbytes, tctx->dump_cnts.accumobjs, + tctx->dump_cnts.accumbytes)) + return (tctx); + return (NULL); +} + +/* tctx->gctx is held. */ +static prof_tctx_t * +prof_tctx_finish_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) +{ + prof_tctx_t *ret; + + switch (tctx->state) { + case prof_tctx_state_nominal: + /* New since dumping started; ignore. */ + break; + case prof_tctx_state_dumping: + tctx->state = prof_tctx_state_nominal; + break; + case prof_tctx_state_purgatory: + ret = tctx_tree_next(tctxs, tctx); + tctx_tree_remove(tctxs, tctx); + idalloc(tctx); + goto label_return; + default: + not_reached(); + } + + ret = NULL; +label_return: + return (ret); +} + static void -prof_dump_ctx_prep(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx, - prof_ctx_tree_t *ctxs) +prof_dump_gctx_prep(prof_gctx_t *gctx, prof_gctx_tree_t *gctxs) { cassert(config_prof); - malloc_mutex_lock(ctx->lock); + malloc_mutex_lock(gctx->lock); /* - * Increment nlimbo so that ctx won't go away before dump. - * Additionally, link ctx into the dump list so that it is included in + * Increment nlimbo so that gctx won't go away before dump. + * Additionally, link gctx into the dump list so that it is included in * prof_dump()'s second pass. */ - ctx->nlimbo++; - ctx_tree_insert(ctxs, ctx); + gctx->nlimbo++; + gctx_tree_insert(gctxs, gctx); - memcpy(&ctx->cnt_summed, &ctx->cnt_merged, sizeof(prof_cnt_t)); - thr_cnt_tree_iter(&ctx->thr_cnts, NULL, ctx_sum_iter, (void *)ctx); + memset(&gctx->cnt_summed, 0, sizeof(prof_cnt_t)); - if (ctx->cnt_summed.curobjs != 0) - (*leak_nctx)++; + malloc_mutex_unlock(gctx->lock); +} - /* Add to cnt_all. */ - cnt_all->curobjs += ctx->cnt_summed.curobjs; - cnt_all->curbytes += ctx->cnt_summed.curbytes; - if (opt_prof_accum) { - cnt_all->accumobjs += ctx->cnt_summed.accumobjs; - cnt_all->accumbytes += ctx->cnt_summed.accumbytes; - } +static prof_gctx_t * +prof_gctx_merge_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *arg) +{ + size_t *leak_ngctx = (size_t *)arg; - malloc_mutex_unlock(ctx->lock); + malloc_mutex_lock(gctx->lock); + tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_merge_iter, NULL); + if (gctx->cnt_summed.curobjs != 0) + (*leak_ngctx)++; + malloc_mutex_unlock(gctx->lock); + + return (NULL); +} + +static prof_gctx_t * +prof_gctx_finish_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *arg) +{ + prof_tctx_t *next; + bool destroy_gctx; + + malloc_mutex_lock(gctx->lock); + next = NULL; + do { + next = tctx_tree_iter(&gctx->tctxs, next, prof_tctx_finish_iter, + NULL); + } while (next != NULL); + gctx->nlimbo--; + destroy_gctx = prof_gctx_should_destroy(gctx); + malloc_mutex_unlock(gctx->lock); + if (destroy_gctx) + prof_gctx_maybe_destroy(gctx); + + return (NULL); +} + +static prof_tdata_t * +prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, void *arg) +{ + prof_cnt_t *cnt_all = (prof_cnt_t *)arg; + + malloc_mutex_lock(tdata->lock); + if (tdata->state != prof_tdata_state_expired) { + size_t tabind; + union { + prof_tctx_t *p; + void *v; + } tctx; + + tdata->dumping = true; + memset(&tdata->cnt_summed, 0, sizeof(prof_cnt_t)); + for (tabind = 0; ckh_iter(&tdata->bt2tctx, &tabind, NULL, + &tctx.v) == false;) + prof_tctx_merge_tdata(tctx.p, tdata); + + cnt_all->curobjs += tdata->cnt_summed.curobjs; + cnt_all->curbytes += tdata->cnt_summed.curbytes; + if (opt_prof_accum) { + cnt_all->accumobjs += tdata->cnt_summed.accumobjs; + cnt_all->accumbytes += tdata->cnt_summed.accumbytes; + } + } else + tdata->dumping = false; + malloc_mutex_unlock(tdata->lock); + + return (NULL); +} + +static prof_tdata_t * +prof_tdata_dump_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, void *arg) +{ + bool propagate_err = *(bool *)arg; + + if (tdata->dumping == false) + return (NULL); + + if (prof_dump_printf(propagate_err, + " t%"PRIu64": %"PRIu64": %"PRIu64" [%"PRIu64": %"PRIu64"]%s%s\n", + tdata->thr_uid, tdata->cnt_summed.curobjs, + tdata->cnt_summed.curbytes, tdata->cnt_summed.accumobjs, + tdata->cnt_summed.accumbytes, + (tdata->thread_name != NULL) ? " " : "", + (tdata->thread_name != NULL) ? tdata->thread_name : "")) + return (tdata); + return (NULL); } static bool prof_dump_header(bool propagate_err, const prof_cnt_t *cnt_all) { + bool ret; - if (opt_lg_prof_sample == 0) { - if (prof_dump_printf(propagate_err, - "heap profile: %"PRId64": %"PRId64 - " [%"PRIu64": %"PRIu64"] @ heapprofile\n", - cnt_all->curobjs, cnt_all->curbytes, - cnt_all->accumobjs, cnt_all->accumbytes)) - return (true); - } else { - if (prof_dump_printf(propagate_err, - "heap profile: %"PRId64": %"PRId64 - " [%"PRIu64": %"PRIu64"] @ heap_v2/%"PRIu64"\n", - cnt_all->curobjs, cnt_all->curbytes, - cnt_all->accumobjs, cnt_all->accumbytes, - ((uint64_t)1U << opt_lg_prof_sample))) - return (true); - } + if (prof_dump_printf(propagate_err, + "heap_v2/%"PRIu64"\n" + " t*: %"PRIu64": %"PRIu64" [%"PRIu64": %"PRIu64"]\n", + ((uint64_t)1U << lg_prof_sample), cnt_all->curobjs, + cnt_all->curbytes, cnt_all->accumobjs, cnt_all->accumbytes)) + return (true); - return (false); -} - -static void -prof_dump_ctx_cleanup_locked(prof_ctx_t *ctx, prof_ctx_tree_t *ctxs) -{ - - ctx->nlimbo--; -} - -static void -prof_dump_ctx_cleanup(prof_ctx_t *ctx, prof_ctx_tree_t *ctxs) -{ - - malloc_mutex_lock(ctx->lock); - prof_dump_ctx_cleanup_locked(ctx, ctxs); - malloc_mutex_unlock(ctx->lock); + malloc_mutex_lock(&tdatas_mtx); + ret = (tdata_tree_iter(&tdatas, NULL, prof_tdata_dump_iter, + (void *)&propagate_err) != NULL); + malloc_mutex_unlock(&tdatas_mtx); + return (ret); } +/* gctx->lock is held. */ static bool -prof_dump_ctx(bool propagate_err, prof_ctx_t *ctx, const prof_bt_t *bt, - prof_ctx_tree_t *ctxs) +prof_dump_gctx(bool propagate_err, prof_gctx_t *gctx, const prof_bt_t *bt, + prof_gctx_tree_t *gctxs) { bool ret; unsigned i; cassert(config_prof); - /* - * Current statistics can sum to 0 as a result of unmerged per thread - * statistics. Additionally, interval- and growth-triggered dumps can - * occur between the time a ctx is created and when its statistics are - * filled in. Avoid dumping any ctx that is an artifact of either - * implementation detail. - */ - malloc_mutex_lock(ctx->lock); - if ((opt_prof_accum == false && ctx->cnt_summed.curobjs == 0) || - (opt_prof_accum && ctx->cnt_summed.accumobjs == 0)) { - assert(ctx->cnt_summed.curobjs == 0); - assert(ctx->cnt_summed.curbytes == 0); - assert(ctx->cnt_summed.accumobjs == 0); - assert(ctx->cnt_summed.accumbytes == 0); + /* Avoid dumping such gctx's that have no useful data. */ + if ((opt_prof_accum == false && gctx->cnt_summed.curobjs == 0) || + (opt_prof_accum && gctx->cnt_summed.accumobjs == 0)) { + assert(gctx->cnt_summed.curobjs == 0); + assert(gctx->cnt_summed.curbytes == 0); + assert(gctx->cnt_summed.accumobjs == 0); + assert(gctx->cnt_summed.accumbytes == 0); ret = false; goto label_return; } - if (prof_dump_printf(propagate_err, "%"PRId64": %"PRId64 - " [%"PRIu64": %"PRIu64"] @", - ctx->cnt_summed.curobjs, ctx->cnt_summed.curbytes, - ctx->cnt_summed.accumobjs, ctx->cnt_summed.accumbytes)) { + if (prof_dump_printf(propagate_err, "@")) { ret = true; goto label_return; } - for (i = 0; i < bt->len; i++) { if (prof_dump_printf(propagate_err, " %#"PRIxPTR, (uintptr_t)bt->vec[i])) { @@ -918,15 +1140,23 @@ prof_dump_ctx(bool propagate_err, prof_ctx_t *ctx, const prof_bt_t *bt, } } - if (prof_dump_write(propagate_err, "\n")) { + if (prof_dump_printf(propagate_err, + "\n" + " t*: %"PRIu64": %"PRIu64" [%"PRIu64": %"PRIu64"]\n", + gctx->cnt_summed.curobjs, gctx->cnt_summed.curbytes, + gctx->cnt_summed.accumobjs, gctx->cnt_summed.accumbytes)) { + ret = true; + goto label_return; + } + + if (tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_dump_iter, + (void *)&propagate_err) != NULL) { ret = true; goto label_return; } ret = false; label_return: - prof_dump_ctx_cleanup_locked(ctx, ctxs); - malloc_mutex_unlock(ctx->lock); return (ret); } @@ -980,72 +1210,85 @@ label_return: } static void -prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_nctx, +prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_ngctx, const char *filename) { if (cnt_all->curbytes != 0) { - malloc_printf(": Leak summary: %"PRId64" byte%s, %" - PRId64" object%s, %zu context%s\n", + malloc_printf(": Leak summary: %"PRIu64" byte%s, %" + PRIu64" object%s, %zu context%s\n", cnt_all->curbytes, (cnt_all->curbytes != 1) ? "s" : "", cnt_all->curobjs, (cnt_all->curobjs != 1) ? "s" : "", - leak_nctx, (leak_nctx != 1) ? "s" : ""); + leak_ngctx, (leak_ngctx != 1) ? "s" : ""); malloc_printf( ": Run pprof on \"%s\" for leak detail\n", filename); } } -static prof_ctx_t * -prof_ctx_dump_iter(prof_ctx_tree_t *ctxs, prof_ctx_t *ctx, void *arg) +static prof_gctx_t * +prof_gctx_dump_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *arg) { + prof_gctx_t *ret; bool propagate_err = *(bool *)arg; - if (prof_dump_ctx(propagate_err, ctx, &ctx->bt, ctxs)) - return (ctx_tree_next(ctxs, ctx)); + malloc_mutex_lock(gctx->lock); - return (NULL); -} + if (prof_dump_gctx(propagate_err, gctx, &gctx->bt, gctxs)) { + ret = gctx_tree_next(gctxs, gctx); + goto label_return; + } -static prof_ctx_t * -prof_ctx_cleanup_iter(prof_ctx_tree_t *ctxs, prof_ctx_t *ctx, void *arg) -{ - - prof_dump_ctx_cleanup(ctx, ctxs); - - return (NULL); + ret = NULL; +label_return: + malloc_mutex_unlock(gctx->lock); + return (ret); } static bool prof_dump(bool propagate_err, const char *filename, bool leakcheck) { - prof_tdata_t *prof_tdata; + prof_tdata_t *tdata; prof_cnt_t cnt_all; size_t tabind; union { - prof_ctx_t *p; + prof_gctx_t *p; void *v; - } ctx; - size_t leak_nctx; - prof_ctx_tree_t ctxs; - prof_ctx_t *cleanup_start = NULL; + } gctx; + size_t leak_ngctx; + prof_gctx_tree_t gctxs; cassert(config_prof); - prof_tdata = prof_tdata_get(false); - if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) + tdata = prof_tdata_get(false); + if ((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) return (true); malloc_mutex_lock(&prof_dump_mtx); + prof_enter(tdata); - /* Merge per thread profile stats, and sum them in cnt_all. */ + /* + * Put gctx's in limbo and clear their counters in preparation for + * summing. + */ + gctx_tree_new(&gctxs); + for (tabind = 0; ckh_iter(&bt2gctx, &tabind, NULL, &gctx.v) == false;) + prof_dump_gctx_prep(gctx.p, &gctxs); + + /* + * Iterate over tdatas, and for the non-expired ones snapshot their tctx + * stats and merge them into the associated gctx's. + */ memset(&cnt_all, 0, sizeof(prof_cnt_t)); - leak_nctx = 0; - ctx_tree_new(&ctxs); - prof_enter(prof_tdata); - for (tabind = 0; ckh_iter(&bt2ctx, &tabind, NULL, &ctx.v) == false;) - prof_dump_ctx_prep(ctx.p, &cnt_all, &leak_nctx, &ctxs); - prof_leave(prof_tdata); + malloc_mutex_lock(&tdatas_mtx); + tdata_tree_iter(&tdatas, NULL, prof_tdata_merge_iter, (void *)&cnt_all); + malloc_mutex_unlock(&tdatas_mtx); + + /* Merge tctx stats into gctx's. */ + leak_ngctx = 0; + gctx_tree_iter(&gctxs, NULL, prof_gctx_merge_iter, (void *)&leak_ngctx); + + prof_leave(tdata); /* Create dump file. */ if ((prof_dump_fd = prof_dump_open(propagate_err, filename)) == -1) @@ -1055,10 +1298,9 @@ prof_dump(bool propagate_err, const char *filename, bool leakcheck) if (prof_dump_header(propagate_err, &cnt_all)) goto label_write_error; - /* Dump per ctx profile stats. */ - cleanup_start = ctx_tree_iter(&ctxs, NULL, prof_ctx_dump_iter, - (void *)&propagate_err); - if (cleanup_start != NULL) + /* Dump per gctx profile stats. */ + if (gctx_tree_iter(&gctxs, NULL, prof_gctx_dump_iter, + (void *)&propagate_err) != NULL) goto label_write_error; /* Dump /proc//maps if possible. */ @@ -1068,19 +1310,17 @@ prof_dump(bool propagate_err, const char *filename, bool leakcheck) if (prof_dump_close(propagate_err)) goto label_open_close_error; + gctx_tree_iter(&gctxs, NULL, prof_gctx_finish_iter, NULL); malloc_mutex_unlock(&prof_dump_mtx); if (leakcheck) - prof_leakcheck(&cnt_all, leak_nctx, filename); + prof_leakcheck(&cnt_all, leak_ngctx, filename); return (false); label_write_error: prof_dump_close(propagate_err); label_open_close_error: - if (cleanup_start != NULL) { - ctx_tree_iter(&ctxs, cleanup_start, prof_ctx_cleanup_iter, - NULL); - } + gctx_tree_iter(&gctxs, NULL, prof_gctx_finish_iter, NULL); malloc_mutex_unlock(&prof_dump_mtx); return (true); } @@ -1128,18 +1368,18 @@ prof_fdump(void) void prof_idump(void) { - prof_tdata_t *prof_tdata; + prof_tdata_t *tdata; char filename[PATH_MAX + 1]; cassert(config_prof); if (prof_booted == false) return; - prof_tdata = prof_tdata_get(false); - if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) + tdata = prof_tdata_get(false); + if ((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) return; - if (prof_tdata->enq) { - prof_tdata->enq_idump = true; + if (tdata->enq) { + tdata->enq_idump = true; return; } @@ -1178,18 +1418,18 @@ prof_mdump(const char *filename) void prof_gdump(void) { - prof_tdata_t *prof_tdata; + prof_tdata_t *tdata; char filename[DUMP_FILENAME_BUFSIZE]; cassert(config_prof); if (prof_booted == false) return; - prof_tdata = prof_tdata_get(false); - if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) + tdata = prof_tdata_get(false); + if ((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) return; - if (prof_tdata->enq) { - prof_tdata->enq_gdump = true; + if (tdata->enq) { + tdata->enq_gdump = true; return; } @@ -1225,81 +1465,233 @@ prof_bt_keycomp(const void *k1, const void *k2) return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0); } -prof_tdata_t * -prof_tdata_init(void) +JEMALLOC_INLINE_C uint64_t +prof_thr_uid_alloc(void) { - prof_tdata_t *prof_tdata; + + return (atomic_add_uint64(&next_thr_uid, 1) - 1); +} + +static prof_tdata_t * +prof_tdata_init_impl(uint64_t thr_uid) +{ + prof_tdata_t *tdata; cassert(config_prof); /* Initialize an empty cache for this thread. */ - prof_tdata = (prof_tdata_t *)imalloc(sizeof(prof_tdata_t)); - if (prof_tdata == NULL) + tdata = (prof_tdata_t *)imalloc(sizeof(prof_tdata_t)); + if (tdata == NULL) return (NULL); - if (ckh_new(&prof_tdata->bt2cnt, PROF_CKH_MINITEMS, + tdata->lock = prof_tdata_mutex_choose(thr_uid); + tdata->thr_uid = thr_uid; + tdata->thread_name = NULL; + tdata->state = prof_tdata_state_attached; + + if (ckh_new(&tdata->bt2tctx, PROF_CKH_MINITEMS, prof_bt_hash, prof_bt_keycomp)) { - idalloc(prof_tdata); + idalloc(tdata); return (NULL); } - prof_tdata->prng_state = (uint64_t)(uintptr_t)prof_tdata; - prof_sample_threshold_update(prof_tdata); + tdata->prng_state = (uint64_t)(uintptr_t)tdata; + prof_sample_threshold_update(tdata); - prof_tdata->enq = false; - prof_tdata->enq_idump = false; - prof_tdata->enq_gdump = false; + tdata->enq = false; + tdata->enq_idump = false; + tdata->enq_gdump = false; - prof_tdata_tsd_set(&prof_tdata); + tdata->dumping = false; + tdata->active = true; - return (prof_tdata); + prof_tdata_tsd_set(&tdata); + + malloc_mutex_lock(&tdatas_mtx); + tdata_tree_insert(&tdatas, tdata); + malloc_mutex_unlock(&tdatas_mtx); + + return (tdata); +} + +prof_tdata_t * +prof_tdata_init(void) +{ + + return (prof_tdata_init_impl(prof_thr_uid_alloc())); +} + +prof_tdata_t * +prof_tdata_reinit(prof_tdata_t *tdata) +{ + + return (prof_tdata_init_impl(tdata->thr_uid)); +} + +/* tdata->lock must be held. */ +static bool +prof_tdata_should_destroy(prof_tdata_t *tdata) +{ + + if (tdata->state == prof_tdata_state_attached) + return (false); + if (ckh_count(&tdata->bt2tctx) != 0) + return (false); + return (true); +} + +static void +prof_tdata_destroy(prof_tdata_t *tdata) +{ + + assert(prof_tdata_should_destroy(tdata)); + + malloc_mutex_lock(&tdatas_mtx); + tdata_tree_remove(&tdatas, tdata); + malloc_mutex_unlock(&tdatas_mtx); + + if (tdata->thread_name != NULL) + idalloc(tdata->thread_name); + ckh_delete(&tdata->bt2tctx); + idalloc(tdata); +} + +static void +prof_tdata_state_transition(prof_tdata_t *tdata, prof_tdata_state_t state) +{ + bool destroy_tdata; + + malloc_mutex_lock(tdata->lock); + if (tdata->state != state) { + tdata->state = state; + destroy_tdata = prof_tdata_should_destroy(tdata); + } else + destroy_tdata = false; + malloc_mutex_unlock(tdata->lock); + if (destroy_tdata) + prof_tdata_destroy(tdata); +} + +static void +prof_tdata_detach(prof_tdata_t *tdata) +{ + + prof_tdata_state_transition(tdata, prof_tdata_state_detached); +} + +static void +prof_tdata_expire(prof_tdata_t *tdata) +{ + + prof_tdata_state_transition(tdata, prof_tdata_state_expired); +} + +static prof_tdata_t * +prof_tdata_reset_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, void *arg) +{ + + prof_tdata_expire(tdata); + return (NULL); +} + +void +prof_reset(size_t lg_sample) +{ + + assert(lg_sample < (sizeof(uint64_t) << 3)); + + malloc_mutex_lock(&prof_dump_mtx); + malloc_mutex_lock(&tdatas_mtx); + + lg_prof_sample = lg_sample; + tdata_tree_iter(&tdatas, NULL, prof_tdata_reset_iter, NULL); + + malloc_mutex_unlock(&tdatas_mtx); + malloc_mutex_unlock(&prof_dump_mtx); } void prof_tdata_cleanup(void *arg) { - prof_tdata_t *prof_tdata = *(prof_tdata_t **)arg; + prof_tdata_t *tdata = *(prof_tdata_t **)arg; cassert(config_prof); - if (prof_tdata == PROF_TDATA_STATE_REINCARNATED) { + if (tdata == PROF_TDATA_STATE_REINCARNATED) { /* * Another destructor deallocated memory after this destructor - * was called. Reset prof_tdata to PROF_TDATA_STATE_PURGATORY - * in order to receive another callback. + * was called. Reset tdata to PROF_TDATA_STATE_PURGATORY in + * order to receive another callback. */ - prof_tdata = PROF_TDATA_STATE_PURGATORY; - prof_tdata_tsd_set(&prof_tdata); - } else if (prof_tdata == PROF_TDATA_STATE_PURGATORY) { + tdata = PROF_TDATA_STATE_PURGATORY; + prof_tdata_tsd_set(&tdata); + } else if (tdata == PROF_TDATA_STATE_PURGATORY) { /* * The previous time this destructor was called, we set the key * to PROF_TDATA_STATE_PURGATORY so that other destructors - * wouldn't cause re-creation of the prof_tdata. This time, do + * wouldn't cause re-creation of the tdata. This time, do * nothing, so that the destructor will not be called again. */ - } else if (prof_tdata != NULL) { - union { - prof_thr_cnt_t *p; - void *v; - } cnt; - size_t tabind; - - /* - * Iteratively merge cnt's into the global stats and delete - * them. - */ - for (tabind = 0; ckh_iter(&prof_tdata->bt2cnt, &tabind, NULL, - &cnt.v);) { - prof_ctx_merge(cnt.p->ctx, cnt.p); - idalloc(cnt.v); - } - ckh_delete(&prof_tdata->bt2cnt); - idalloc(prof_tdata); - prof_tdata = PROF_TDATA_STATE_PURGATORY; - prof_tdata_tsd_set(&prof_tdata); + } else if (tdata != NULL) { + prof_tdata_detach(tdata); + tdata = PROF_TDATA_STATE_PURGATORY; + prof_tdata_tsd_set(&tdata); } } +const char * +prof_thread_name_get(void) +{ + prof_tdata_t *tdata = prof_tdata_get(true); + if (tdata == NULL) + return (NULL); + return (tdata->thread_name); +} + +bool +prof_thread_name_set(const char *thread_name) +{ + prof_tdata_t *tdata; + size_t size; + char *s; + + tdata = prof_tdata_get(true); + if (tdata == NULL) + return (true); + + size = strlen(thread_name) + 1; + s = imalloc(size); + if (s == NULL) + return (true); + + memcpy(s, thread_name, size); + if (tdata->thread_name != NULL) + idalloc(tdata->thread_name); + tdata->thread_name = s; + return (false); +} + +bool +prof_thread_active_get(void) +{ + prof_tdata_t *tdata = prof_tdata_get(true); + if (tdata == NULL) + return (false); + return (tdata->active); +} + +bool +prof_thread_active_set(bool active) +{ + prof_tdata_t *tdata; + + tdata = prof_tdata_get(true); + if (tdata == NULL) + return (true); + tdata->active = active; + return (false); +} + void prof_boot0(void) { @@ -1345,10 +1737,12 @@ prof_boot2(void) if (opt_prof) { unsigned i; - if (ckh_new(&bt2ctx, PROF_CKH_MINITEMS, prof_bt_hash, + lg_prof_sample = opt_lg_prof_sample; + + if (ckh_new(&bt2gctx, PROF_CKH_MINITEMS, prof_bt_hash, prof_bt_keycomp)) return (true); - if (malloc_mutex_init(&bt2ctx_mtx)) + if (malloc_mutex_init(&bt2gctx_mtx)) return (true); if (prof_tdata_tsd_boot()) { malloc_write( @@ -1356,6 +1750,12 @@ prof_boot2(void) abort(); } + tdata_tree_new(&tdatas); + if (malloc_mutex_init(&tdatas_mtx)) + return (true); + + next_thr_uid = 0; + if (malloc_mutex_init(&prof_dump_seq_mtx)) return (true); if (malloc_mutex_init(&prof_dump_mtx)) @@ -1367,12 +1767,21 @@ prof_boot2(void) abort(); } - ctx_locks = (malloc_mutex_t *)base_alloc(PROF_NCTX_LOCKS * + gctx_locks = (malloc_mutex_t *)base_alloc(PROF_NCTX_LOCKS * sizeof(malloc_mutex_t)); - if (ctx_locks == NULL) + if (gctx_locks == NULL) return (true); for (i = 0; i < PROF_NCTX_LOCKS; i++) { - if (malloc_mutex_init(&ctx_locks[i])) + if (malloc_mutex_init(&gctx_locks[i])) + return (true); + } + + tdata_locks = (malloc_mutex_t *)base_alloc(PROF_NTDATA_LOCKS * + sizeof(malloc_mutex_t)); + if (tdata_locks == NULL) + return (true); + for (i = 0; i < PROF_NTDATA_LOCKS; i++) { + if (malloc_mutex_init(&tdata_locks[i])) return (true); } } @@ -1397,10 +1806,10 @@ prof_prefork(void) if (opt_prof) { unsigned i; - malloc_mutex_prefork(&bt2ctx_mtx); + malloc_mutex_prefork(&bt2gctx_mtx); malloc_mutex_prefork(&prof_dump_seq_mtx); for (i = 0; i < PROF_NCTX_LOCKS; i++) - malloc_mutex_prefork(&ctx_locks[i]); + malloc_mutex_prefork(&gctx_locks[i]); } } @@ -1412,9 +1821,9 @@ prof_postfork_parent(void) unsigned i; for (i = 0; i < PROF_NCTX_LOCKS; i++) - malloc_mutex_postfork_parent(&ctx_locks[i]); + malloc_mutex_postfork_parent(&gctx_locks[i]); malloc_mutex_postfork_parent(&prof_dump_seq_mtx); - malloc_mutex_postfork_parent(&bt2ctx_mtx); + malloc_mutex_postfork_parent(&bt2gctx_mtx); } } @@ -1426,9 +1835,9 @@ prof_postfork_child(void) unsigned i; for (i = 0; i < PROF_NCTX_LOCKS; i++) - malloc_mutex_postfork_child(&ctx_locks[i]); + malloc_mutex_postfork_child(&gctx_locks[i]); malloc_mutex_postfork_child(&prof_dump_seq_mtx); - malloc_mutex_postfork_child(&bt2ctx_mtx); + malloc_mutex_postfork_child(&bt2gctx_mtx); } } diff --git a/src/stats.c b/src/stats.c index a0eb2971..db34275e 100644 --- a/src/stats.c +++ b/src/stats.c @@ -441,7 +441,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, } if ((err = je_mallctl("opt.prof", &bv, &bsz, NULL, 0)) == 0 && bv) { - CTL_GET("opt.lg_prof_sample", &sv, size_t); + CTL_GET("prof.lg_sample", &sv, size_t); malloc_cprintf(write_cb, cbopaque, "Average profile sample interval: %"PRIu64 " (2^%zu)\n", (((uint64_t)1U) << sv), sv); From 3e24afa28e01b743a9f7fa1d42acb67e079d8187 Mon Sep 17 00:00:00 2001 From: Sara Golemon Date: Mon, 18 Aug 2014 13:06:39 -0700 Subject: [PATCH 0499/3378] Test for availability of malloc hooks via autoconf __*_hook() is glibc, but on at least one glibc platform (homebrew), the __GLIBC__ define isn't set correctly and we miss being able to use these hooks. Do a feature test for it during configuration so that we enable it anywhere the hooks are actually available. --- configure.ac | 31 +++++++++++++++++++ .../internal/jemalloc_internal_defs.h.in | 6 ++++ src/jemalloc.c | 4 ++- 3 files changed, 40 insertions(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index ede5f70f..6f8fd3fd 100644 --- a/configure.ac +++ b/configure.ac @@ -1340,6 +1340,37 @@ if test "x${enable_zone_allocator}" = "x1" ; then AC_DEFINE_UNQUOTED(JEMALLOC_ZONE_VERSION, [$JEMALLOC_ZONE_VERSION]) fi +dnl ============================================================================ +dnl Check for glibc malloc hooks + +JE_COMPILABLE([glibc malloc hook], [ +#include + +extern void (* __free_hook)(void *ptr); +extern void *(* __malloc_hook)(size_t size); +extern void *(* __realloc_hook)(void *ptr, size_t size); +], [ + void *ptr = 0L; + if (__malloc_hook) ptr = __malloc_hook(1); + if (__realloc_hook) ptr = __realloc_hook(ptr, 2); + if (__free_hook && ptr) __free_hook(ptr); +], [je_cv_glibc_malloc_hook]) +if test "x${je_cv_glibc_malloc_hook}" = "xyes" ; then + AC_DEFINE([JEMALLOC_GLIBC_MALLOC_HOOK], [ ]) +fi + +JE_COMPILABLE([glibc memalign hook], [ +#include + +extern void *(* __memalign_hook)(size_t alignment, size_t size); +], [ + void *ptr = 0L; + if (__memalign_hook) ptr = __memalign_hook(16, 7); +], [je_cv_glibc_memalign_hook]) +if test "x${je_cv_glibc_memalign_hook}" = "xyes" ; then + AC_DEFINE([JEMALLOC_GLIBC_MEMALIGN_HOOK], [ ]) +fi + dnl ============================================================================ dnl Check for typedefs, structures, and compiler characteristics. AC_HEADER_STDBOOL diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in index 93716b0a..955582ee 100644 --- a/include/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in @@ -209,4 +209,10 @@ /* sizeof(intmax_t) == 2^LG_SIZEOF_INTMAX_T. */ #undef LG_SIZEOF_INTMAX_T +/* glibc malloc hooks (__malloc_hook, __realloc_hook, __free_hook) */ +#undef JEMALLOC_GLIBC_MALLOC_HOOK + +/* glibc memalign hook */ +#undef JEMALLOC_GLIBC_MEMALIGN_HOOK + #endif /* JEMALLOC_INTERNAL_DEFS_H_ */ diff --git a/src/jemalloc.c b/src/jemalloc.c index 2d01272e..9df70018 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1336,7 +1336,7 @@ je_valloc(size_t size) #define is_malloc_(a) malloc_is_ ## a #define is_malloc(a) is_malloc_(a) -#if ((is_malloc(je_malloc) == 1) && defined(__GLIBC__) && !defined(__UCLIBC__)) +#if ((is_malloc(je_malloc) == 1) && defined(JEMALLOC_GLIBC_MALLOC_HOOK)) /* * glibc provides the RTLD_DEEPBIND flag for dlopen which can make it possible * to inconsistently reference libc's malloc(3)-compatible functions @@ -1349,8 +1349,10 @@ je_valloc(size_t size) JEMALLOC_EXPORT void (*__free_hook)(void *ptr) = je_free; JEMALLOC_EXPORT void *(*__malloc_hook)(size_t size) = je_malloc; JEMALLOC_EXPORT void *(*__realloc_hook)(void *ptr, size_t size) = je_realloc; +# ifdef JEMALLOC_GLIBC_MEMALIGN_HOOK JEMALLOC_EXPORT void *(*__memalign_hook)(size_t alignment, size_t size) = je_memalign; +# endif #endif /* From 58799f6d1c1f58053f4aac1b100ce9049c868039 Mon Sep 17 00:00:00 2001 From: Qinfan Wu Date: Tue, 26 Aug 2014 21:28:31 -0700 Subject: [PATCH 0500/3378] Remove junk filling in tcache_bin_flush_small(). Junk filling is done in arena_dalloc_bin_locked(), so arena_alloc_junk_small() is redundant. Also, we should use arena_dalloc_junk_small() instead of arena_alloc_junk_small(). --- src/tcache.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/tcache.c b/src/tcache.c index 868f2d77..4fbc94cc 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -120,10 +120,6 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem, (uintptr_t)chunk) >> LG_PAGE; arena_chunk_map_t *mapelm = arena_mapp_get(chunk, pageind); - if (config_fill && opt_junk) { - arena_alloc_junk_small(ptr, - &arena_bin_info[binind], true); - } arena_dalloc_bin_locked(arena, chunk, ptr, mapelm); } else { From a5a658ab48f7dfa7fd134e505ef23304eaa0ce54 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 2 Sep 2014 15:07:07 -0700 Subject: [PATCH 0501/3378] Make VERSION generation more robust. Relax the "are we in a git repo?" check to succeed even if the top level jemalloc directory is not at the top level of the git repo. Add git tag filtering so that only version triplets match when generating VERSION. Add fallback bogus VERSION creation, so that in the worst case, rather than generating empty values for e.g. JEMALLOC_VERSION_MAJOR, configuration ends up generating useless constants. --- configure.ac | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/configure.ac b/configure.ac index 6f8fd3fd..3b658852 100644 --- a/configure.ac +++ b/configure.ac @@ -1029,11 +1029,33 @@ dnl ============================================================================ dnl jemalloc configuration. dnl -dnl Set VERSION if source directory has an embedded git repository or is a git submodule. -if test -e "${srcroot}.git" ; then - git describe --long --abbrev=40 > ${srcroot}VERSION +dnl Set VERSION if source directory is inside a git repository. +if test "x`git rev-parse --is-inside-work-tree 2>/dev/null`" = "xtrue" ; then + dnl Pattern globs aren't powerful enough to match both single- and + dnl double-digit version numbers, so iterate over patterns to support up to + dnl version 99.99.99 without any accidental matches. + rm -f "${srcroot}VERSION" + for pattern in ['[0-9].[0-9].[0-9]' '[0-9].[0-9].[0-9][0-9]' \ + '[0-9].[0-9][0-9].[0-9]' '[0-9].[0-9][0-9].[0-9][0-9]' \ + '[0-9][0-9].[0-9].[0-9]' '[0-9][0-9].[0-9].[0-9][0-9]' \ + '[0-9][0-9].[0-9][0-9].[0-9]' \ + '[0-9][0-9].[0-9][0-9].[0-9][0-9]']; do + if test ! -e "${srcroot}VERSION" ; then + git describe --long --abbrev=40 --match="${pattern}" > "${srcroot}VERSION.tmp" 2>/dev/null + if test $? -eq 0 ; then + mv "${srcroot}VERSION.tmp" "${srcroot}VERSION" + break + fi + fi + done fi -jemalloc_version=`cat ${srcroot}VERSION` +rm -f "${srcroot}VERSION.tmp" +if test ! -e "${srcroot}VERSION" ; then + AC_MSG_RESULT( + [Missing VERSION file, and unable to generate it; creating bogus VERSION]) + echo "0.0.0-0-g0000000000000000000000000000000000000000" > "${srcroot}VERSION" +fi +jemalloc_version=`cat "${srcroot}VERSION"` jemalloc_version_major=`echo ${jemalloc_version} | tr ".g-" " " | awk '{print [$]1}'` jemalloc_version_minor=`echo ${jemalloc_version} | tr ".g-" " " | awk '{print [$]2}'` jemalloc_version_bugfix=`echo ${jemalloc_version} | tr ".g-" " " | awk '{print [$]3}'` From f34f6037e8d9836f7cddc02ad349dc72964bbcc7 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 2 Sep 2014 17:49:29 -0700 Subject: [PATCH 0502/3378] Disable autom4te cache. --- .autom4te.cfg | 3 +++ .gitignore | 2 -- Makefile.in | 1 - 3 files changed, 3 insertions(+), 3 deletions(-) create mode 100644 .autom4te.cfg diff --git a/.autom4te.cfg b/.autom4te.cfg new file mode 100644 index 00000000..fe2424db --- /dev/null +++ b/.autom4te.cfg @@ -0,0 +1,3 @@ +begin-language: "Autoconf-without-aclocal-m4" +args: --no-cache +end-language: "Autoconf-without-aclocal-m4" diff --git a/.gitignore b/.gitignore index 4c408ec2..ec9c0b92 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,5 @@ /*.gcov.* -/autom4te.cache/ - /bin/jemalloc.sh /config.stamp diff --git a/Makefile.in b/Makefile.in index dfafe455..b5f0ee90 100644 --- a/Makefile.in +++ b/Makefile.in @@ -400,7 +400,6 @@ clean: rm -f $(objroot)*.gcov.* distclean: clean - rm -rf $(objroot)autom4te.cache rm -f $(objroot)bin/jemalloc.sh rm -f $(objroot)config.log rm -f $(objroot)config.status From ff6a31d3b92b7c63446ce645341d2bbd77b67dc6 Mon Sep 17 00:00:00 2001 From: Qinfan Wu Date: Fri, 29 Aug 2014 13:34:40 -0700 Subject: [PATCH 0503/3378] Refactor chunk map. Break the chunk map into two separate arrays, in order to improve cache locality. This is related to issue #23. --- include/jemalloc/internal/arena.h | 108 +++++---- include/jemalloc/internal/chunk.h | 1 + include/jemalloc/internal/private_symbols.txt | 4 +- include/jemalloc/internal/size_classes.sh | 2 +- src/arena.c | 208 +++++++++--------- src/chunk.c | 1 + src/tcache.c | 11 +- 7 files changed, 186 insertions(+), 149 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index f3f6426c..986bea92 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -43,7 +43,8 @@ */ #define LG_DIRTY_MULT_DEFAULT 3 -typedef struct arena_chunk_map_s arena_chunk_map_t; +typedef struct arena_chunk_map_bits_s arena_chunk_map_bits_t; +typedef struct arena_chunk_map_misc_s arena_chunk_map_misc_t; typedef struct arena_chunk_s arena_chunk_t; typedef struct arena_run_s arena_run_t; typedef struct arena_bin_info_s arena_bin_info_t; @@ -55,34 +56,7 @@ typedef struct arena_s arena_t; #ifdef JEMALLOC_H_STRUCTS /* Each element of the chunk map corresponds to one page within the chunk. */ -struct arena_chunk_map_s { -#ifndef JEMALLOC_PROF - /* - * Overlay prof_tctx in order to allow it to be referenced by dead code. - * Such antics aren't warranted for per arena data structures, but - * chunk map overhead accounts for a percentage of memory, rather than - * being just a fixed cost. - */ - union { -#endif - /* - * Linkage for run trees. There are two disjoint uses: - * - * 1) arena_t's runs_avail tree. - * 2) arena_run_t conceptually uses this linkage for in-use non-full - * runs, rather than directly embedding linkage. - */ - rb_node(arena_chunk_map_t) rb_link; - - /* Profile counters, used for large object runs. */ - prof_tctx_t *prof_tctx; -#ifndef JEMALLOC_PROF - }; /* union { ... }; */ -#endif - - /* Linkage for list of dirty runs. */ - ql_elm(arena_chunk_map_t) dr_link; - +struct arena_chunk_map_bits_s { /* * Run address (or size) and various flags are stored together. The bit * layout looks like (assuming 32-bit system): @@ -149,9 +123,43 @@ struct arena_chunk_map_s { #define CHUNK_MAP_ALLOCATED ((size_t)0x1U) #define CHUNK_MAP_KEY CHUNK_MAP_ALLOCATED }; -typedef rb_tree(arena_chunk_map_t) arena_avail_tree_t; -typedef rb_tree(arena_chunk_map_t) arena_run_tree_t; -typedef ql_head(arena_chunk_map_t) arena_chunk_mapelms_t; + +/* + * Each arena_chunk_map_misc_t corresponds to one page within the chunk, just + * like arena_chunk_map_bits_t. Two separate arrays are stored within each + * chunk header in order to improve cache locality. + */ +struct arena_chunk_map_misc_s { +#ifndef JEMALLOC_PROF + /* + * Overlay prof_tctx in order to allow it to be referenced by dead code. + * Such antics aren't warranted for per arena data structures, but + * chunk map overhead accounts for a percentage of memory, rather than + * being just a fixed cost. + */ + union { +#endif + /* + * Linkage for run trees. There are two disjoint uses: + * + * 1) arena_t's runs_avail tree. + * 2) arena_run_t conceptually uses this linkage for in-use non-full + * runs, rather than directly embedding linkage. + */ + rb_node(arena_chunk_map_misc_t) rb_link; + + /* Profile counters, used for large object runs. */ + prof_tctx_t *prof_tctx; +#ifndef JEMALLOC_PROF + }; /* union { ... }; */ +#endif + + /* Linkage for list of dirty runs. */ + ql_elm(arena_chunk_map_misc_t) dr_link; +}; +typedef rb_tree(arena_chunk_map_misc_t) arena_avail_tree_t; +typedef rb_tree(arena_chunk_map_misc_t) arena_run_tree_t; +typedef ql_head(arena_chunk_map_misc_t) arena_chunk_miscelms_t; /* Arena chunk header. */ struct arena_chunk_s { @@ -164,7 +172,7 @@ struct arena_chunk_s { * need to be tracked in the map. This omission saves a header page * for common chunk sizes (e.g. 4 MiB). */ - arena_chunk_map_t map[1]; /* Dynamically sized. */ + arena_chunk_map_bits_t map_bits[1]; /* Dynamically sized. */ }; struct arena_run_s { @@ -335,7 +343,7 @@ struct arena_s { arena_avail_tree_t runs_avail; /* List of dirty runs this arena manages. */ - arena_chunk_mapelms_t runs_dirty; + arena_chunk_miscelms_t runs_dirty; /* * user-configureable chunk allocation and deallocation functions. @@ -393,9 +401,9 @@ void *arena_malloc_large(arena_t *arena, size_t size, bool zero); void *arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero); void arena_prof_promoted(const void *ptr, size_t size); void arena_dalloc_bin_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr, - arena_chunk_map_t *mapelm); + arena_chunk_map_bits_t *bitselm); void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, - size_t pageind, arena_chunk_map_t *mapelm); + size_t pageind, arena_chunk_map_bits_t *bitselm); void arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr, size_t pageind); #ifdef JEMALLOC_JET @@ -439,7 +447,10 @@ size_t small_bin2size(size_t binind); size_t small_s2u_compute(size_t size); size_t small_s2u_lookup(size_t size); size_t small_s2u(size_t size); -arena_chunk_map_t *arena_mapp_get(arena_chunk_t *chunk, size_t pageind); +arena_chunk_map_bits_t *arena_bitselm_get(arena_chunk_t *chunk, + size_t pageind); +arena_chunk_map_misc_t *arena_miscelm_get(arena_chunk_t *chunk, + size_t pageind); size_t *arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind); size_t arena_mapbitsp_read(size_t *mapbitsp); size_t arena_mapbits_get(arena_chunk_t *chunk, size_t pageind); @@ -623,21 +634,32 @@ small_s2u(size_t size) # endif /* JEMALLOC_ARENA_INLINE_A */ # ifdef JEMALLOC_ARENA_INLINE_B -JEMALLOC_ALWAYS_INLINE arena_chunk_map_t * -arena_mapp_get(arena_chunk_t *chunk, size_t pageind) +JEMALLOC_ALWAYS_INLINE arena_chunk_map_bits_t * +arena_bitselm_get(arena_chunk_t *chunk, size_t pageind) { assert(pageind >= map_bias); assert(pageind < chunk_npages); - return (&chunk->map[pageind-map_bias]); + return (&chunk->map_bits[pageind-map_bias]); +} + +JEMALLOC_ALWAYS_INLINE arena_chunk_map_misc_t * +arena_miscelm_get(arena_chunk_t *chunk, size_t pageind) +{ + + assert(pageind >= map_bias); + assert(pageind < chunk_npages); + + return ((arena_chunk_map_misc_t *)((uintptr_t)chunk + + (uintptr_t)map_misc_offset) + pageind-map_bias); } JEMALLOC_ALWAYS_INLINE size_t * arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind) { - return (&arena_mapp_get(chunk, pageind)->bits); + return (&arena_bitselm_get(chunk, pageind)->bits); } JEMALLOC_ALWAYS_INLINE size_t @@ -1005,7 +1027,7 @@ arena_prof_tctx_get(const void *ptr) if ((mapbits & CHUNK_MAP_LARGE) == 0) ret = (prof_tctx_t *)(uintptr_t)1U; else - ret = arena_mapp_get(chunk, pageind)->prof_tctx; + ret = arena_miscelm_get(chunk, pageind)->prof_tctx; return (ret); } @@ -1025,7 +1047,7 @@ arena_prof_tctx_set(const void *ptr, prof_tctx_t *tctx) assert(arena_mapbits_allocated_get(chunk, pageind) != 0); if (arena_mapbits_large_get(chunk, pageind) != 0) - arena_mapp_get(chunk, pageind)->prof_tctx = tctx; + arena_miscelm_get(chunk, pageind)->prof_tctx = tctx; } JEMALLOC_ALWAYS_INLINE void * diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h index f3bfbe08..27aa0adf 100644 --- a/include/jemalloc/internal/chunk.h +++ b/include/jemalloc/internal/chunk.h @@ -41,6 +41,7 @@ extern size_t chunksize; extern size_t chunksize_mask; /* (chunksize - 1). */ extern size_t chunk_npages; extern size_t map_bias; /* Number of arena chunk header pages. */ +extern size_t map_misc_offset; extern size_t arena_maxclass; /* Max size class for arenas. */ void *chunk_alloc_base(size_t size); diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index 13505455..9ca139ab 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -4,6 +4,7 @@ a0malloc arena_alloc_junk_small arena_bin_index arena_bin_info +arena_bitselm_get arena_boot arena_chunk_alloc_huge arena_chunk_dalloc_huge @@ -38,8 +39,8 @@ arena_mapbits_unzeroed_set arena_mapbitsp_get arena_mapbitsp_read arena_mapbitsp_write -arena_mapp_get arena_maxclass +arena_miscelm_get arena_new arena_palloc arena_postfork_child @@ -254,6 +255,7 @@ malloc_vcprintf malloc_vsnprintf malloc_write map_bias +map_misc_offset mb_write mutex_boot narenas_auto diff --git a/include/jemalloc/internal/size_classes.sh b/include/jemalloc/internal/size_classes.sh index 3edebf23..379d36c2 100755 --- a/include/jemalloc/internal/size_classes.sh +++ b/include/jemalloc/internal/size_classes.sh @@ -240,7 +240,7 @@ cat < 255) # error "Too many small size classes" diff --git a/src/arena.c b/src/arena.c index 1263269e..d9dda832 100644 --- a/src/arena.c +++ b/src/arena.c @@ -61,55 +61,57 @@ static void arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk, /******************************************************************************/ JEMALLOC_INLINE_C size_t -arena_mapelm_to_pageind(arena_chunk_map_t *mapelm) +arena_miscelm_to_pageind(arena_chunk_map_misc_t *miscelm) { - uintptr_t map_offset = - CHUNK_ADDR2OFFSET(mapelm) - offsetof(arena_chunk_t, map); + size_t offset = CHUNK_ADDR2OFFSET(miscelm); - return ((map_offset / sizeof(arena_chunk_map_t)) + map_bias); + return ((offset - map_misc_offset) / sizeof(arena_chunk_map_misc_t) + + map_bias); } JEMALLOC_INLINE_C size_t -arena_mapelm_to_bits(arena_chunk_map_t *mapelm) +arena_miscelm_to_bits(arena_chunk_map_misc_t *miscelm) { + arena_chunk_t *chunk = CHUNK_ADDR2BASE(miscelm); + size_t pageind = arena_miscelm_to_pageind(miscelm); - return (mapelm->bits); + return arena_mapbits_get(chunk, pageind); } static inline int -arena_run_comp(arena_chunk_map_t *a, arena_chunk_map_t *b) +arena_run_comp(arena_chunk_map_misc_t *a, arena_chunk_map_misc_t *b) { - uintptr_t a_mapelm = (uintptr_t)a; - uintptr_t b_mapelm = (uintptr_t)b; + uintptr_t a_miscelm = (uintptr_t)a; + uintptr_t b_miscelm = (uintptr_t)b; assert(a != NULL); assert(b != NULL); - return ((a_mapelm > b_mapelm) - (a_mapelm < b_mapelm)); + return ((a_miscelm > b_miscelm) - (a_miscelm < b_miscelm)); } /* Generate red-black tree functions. */ -rb_gen(static UNUSED, arena_run_tree_, arena_run_tree_t, arena_chunk_map_t, +rb_gen(static UNUSED, arena_run_tree_, arena_run_tree_t, arena_chunk_map_misc_t, rb_link, arena_run_comp) static inline int -arena_avail_comp(arena_chunk_map_t *a, arena_chunk_map_t *b) +arena_avail_comp(arena_chunk_map_misc_t *a, arena_chunk_map_misc_t *b) { int ret; size_t a_size; - size_t b_size = arena_mapelm_to_bits(b) & ~PAGE_MASK; - uintptr_t a_mapelm = (uintptr_t)a; - uintptr_t b_mapelm = (uintptr_t)b; + size_t b_size = arena_miscelm_to_bits(b) & ~PAGE_MASK; + uintptr_t a_miscelm = (uintptr_t)a; + uintptr_t b_miscelm = (uintptr_t)b; - if (a_mapelm & CHUNK_MAP_KEY) - a_size = a_mapelm & ~PAGE_MASK; - else - a_size = arena_mapelm_to_bits(a) & ~PAGE_MASK; + if (a_miscelm & CHUNK_MAP_KEY) + a_size = a_miscelm & ~PAGE_MASK; + else + a_size = arena_miscelm_to_bits(a) & ~PAGE_MASK; ret = (a_size > b_size) - (a_size < b_size); if (ret == 0) { - if (!(a_mapelm & CHUNK_MAP_KEY)) - ret = (a_mapelm > b_mapelm) - (a_mapelm < b_mapelm); + if (!(a_miscelm & CHUNK_MAP_KEY)) + ret = (a_miscelm > b_miscelm) - (a_miscelm < b_miscelm); else { /* * Treat keys as if they are lower than anything else. @@ -122,8 +124,8 @@ arena_avail_comp(arena_chunk_map_t *a, arena_chunk_map_t *b) } /* Generate red-black tree functions. */ -rb_gen(static UNUSED, arena_avail_tree_, arena_avail_tree_t, arena_chunk_map_t, - rb_link, arena_avail_comp) +rb_gen(static UNUSED, arena_avail_tree_, arena_avail_tree_t, + arena_chunk_map_misc_t, rb_link, arena_avail_comp) static void arena_avail_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind, @@ -132,7 +134,7 @@ arena_avail_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind, assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >> LG_PAGE)); - arena_avail_tree_insert(&arena->runs_avail, arena_mapp_get(chunk, + arena_avail_tree_insert(&arena->runs_avail, arena_miscelm_get(chunk, pageind)); } @@ -143,7 +145,7 @@ arena_avail_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind, assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >> LG_PAGE)); - arena_avail_tree_remove(&arena->runs_avail, arena_mapp_get(chunk, + arena_avail_tree_remove(&arena->runs_avail, arena_miscelm_get(chunk, pageind)); } @@ -151,14 +153,14 @@ static void arena_dirty_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind, size_t npages) { - arena_chunk_map_t *mapelm = arena_mapp_get(chunk, pageind); + arena_chunk_map_misc_t *miscelm = arena_miscelm_get(chunk, pageind); assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >> LG_PAGE)); assert(arena_mapbits_dirty_get(chunk, pageind) == CHUNK_MAP_DIRTY); assert(arena_mapbits_dirty_get(chunk, pageind+npages-1) == CHUNK_MAP_DIRTY); - ql_elm_new(mapelm, dr_link); - ql_tail_insert(&arena->runs_dirty, mapelm, dr_link); + ql_elm_new(miscelm, dr_link); + ql_tail_insert(&arena->runs_dirty, miscelm, dr_link); arena->ndirty += npages; } @@ -166,13 +168,13 @@ static void arena_dirty_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind, size_t npages) { - arena_chunk_map_t *mapelm = arena_mapp_get(chunk, pageind); + arena_chunk_map_misc_t *miscelm = arena_miscelm_get(chunk, pageind); assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >> LG_PAGE)); assert(arena_mapbits_dirty_get(chunk, pageind) == CHUNK_MAP_DIRTY); assert(arena_mapbits_dirty_get(chunk, pageind+npages-1) == CHUNK_MAP_DIRTY); - ql_remove(&arena->runs_dirty, mapelm, dr_link); + ql_remove(&arena->runs_dirty, miscelm, dr_link); arena->ndirty -= npages; } @@ -532,16 +534,17 @@ arena_chunk_init_hard(arena_t *arena) */ if (zero == false) { JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED( - (void *)arena_mapp_get(chunk, map_bias+1), - (size_t)((uintptr_t) arena_mapp_get(chunk, chunk_npages-1) - - (uintptr_t)arena_mapp_get(chunk, map_bias+1))); + (void *)arena_bitselm_get(chunk, map_bias+1), + (size_t)((uintptr_t) arena_bitselm_get(chunk, + chunk_npages-1) - (uintptr_t)arena_bitselm_get(chunk, + map_bias+1))); for (i = map_bias+1; i < chunk_npages-1; i++) arena_mapbits_unzeroed_set(chunk, i, unzeroed); } else { - JEMALLOC_VALGRIND_MAKE_MEM_DEFINED((void *)arena_mapp_get(chunk, - map_bias+1), (size_t)((uintptr_t) arena_mapp_get(chunk, - chunk_npages-1) - (uintptr_t)arena_mapp_get(chunk, - map_bias+1))); + JEMALLOC_VALGRIND_MAKE_MEM_DEFINED((void + *)arena_bitselm_get(chunk, map_bias+1), (size_t)((uintptr_t) + arena_bitselm_get(chunk, chunk_npages-1) - + (uintptr_t)arena_bitselm_get(chunk, map_bias+1))); if (config_debug) { for (i = map_bias+1; i < chunk_npages-1; i++) { assert(arena_mapbits_unzeroed_get(chunk, i) == @@ -641,14 +644,14 @@ static arena_run_t * arena_run_alloc_large_helper(arena_t *arena, size_t size, bool zero) { arena_run_t *run; - arena_chunk_map_t *mapelm; - arena_chunk_map_t *key; + arena_chunk_map_misc_t *miscelm; + arena_chunk_map_misc_t *key; - key = (arena_chunk_map_t *)(size | CHUNK_MAP_KEY); - mapelm = arena_avail_tree_nsearch(&arena->runs_avail, key); - if (mapelm != NULL) { - arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm); - size_t pageind = arena_mapelm_to_pageind(mapelm); + key = (arena_chunk_map_misc_t *)(size | CHUNK_MAP_KEY); + miscelm = arena_avail_tree_nsearch(&arena->runs_avail, key); + if (miscelm != NULL) { + arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(miscelm); + size_t pageind = arena_miscelm_to_pageind(miscelm); run = (arena_run_t *)((uintptr_t)run_chunk + (pageind << LG_PAGE)); @@ -695,14 +698,14 @@ static arena_run_t * arena_run_alloc_small_helper(arena_t *arena, size_t size, size_t binind) { arena_run_t *run; - arena_chunk_map_t *mapelm; - arena_chunk_map_t *key; + arena_chunk_map_misc_t *miscelm; + arena_chunk_map_misc_t *key; - key = (arena_chunk_map_t *)(size | CHUNK_MAP_KEY); - mapelm = arena_avail_tree_nsearch(&arena->runs_avail, key); - if (mapelm != NULL) { - arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm); - size_t pageind = arena_mapelm_to_pageind(mapelm); + key = (arena_chunk_map_misc_t *)(size | CHUNK_MAP_KEY); + miscelm = arena_avail_tree_nsearch(&arena->runs_avail, key); + if (miscelm != NULL) { + arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(miscelm); + size_t pageind = arena_miscelm_to_pageind(miscelm); run = (arena_run_t *)((uintptr_t)run_chunk + (pageind << LG_PAGE)); @@ -769,13 +772,13 @@ static size_t arena_dirty_count(arena_t *arena) { size_t ndirty = 0; - arena_chunk_map_t *mapelm; + arena_chunk_map_misc_t *miscelm; arena_chunk_t *chunk; size_t pageind, npages; - ql_foreach(mapelm, &arena->runs_dirty, dr_link) { - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(mapelm); - pageind = arena_mapelm_to_pageind(mapelm); + ql_foreach(miscelm, &arena->runs_dirty, dr_link) { + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm); + pageind = arena_miscelm_to_pageind(miscelm); assert(arena_mapbits_allocated_get(chunk, pageind) == 0); assert(arena_mapbits_large_get(chunk, pageind) == 0); assert(arena_mapbits_dirty_get(chunk, pageind) != 0); @@ -808,16 +811,17 @@ arena_compute_npurge(arena_t *arena, bool all) static size_t arena_stash_dirty(arena_t *arena, bool all, size_t npurge, - arena_chunk_mapelms_t *mapelms) + arena_chunk_miscelms_t *miscelms) { - arena_chunk_map_t *mapelm; + arena_chunk_map_misc_t *miscelm; size_t nstashed = 0; /* Add at least npurge pages to purge_list. */ - for (mapelm = ql_first(&arena->runs_dirty); mapelm != NULL; - mapelm = ql_first(&arena->runs_dirty)) { - arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(mapelm); - size_t pageind = arena_mapelm_to_pageind(mapelm); + for (miscelm = ql_first(&arena->runs_dirty); miscelm != NULL; + miscelm = ql_first(&arena->runs_dirty)) { + arena_chunk_t *chunk = + (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm); + size_t pageind = arena_miscelm_to_pageind(miscelm); size_t run_size = arena_mapbits_unallocated_size_get(chunk, pageind); size_t npages = run_size >> LG_PAGE; @@ -838,8 +842,8 @@ arena_stash_dirty(arena_t *arena, bool all, size_t npurge, /* Temporarily allocate the free dirty run. */ arena_run_split_large(arena, run, run_size, false); /* Append to purge_list for later processing. */ - ql_elm_new(mapelm, dr_link); - ql_tail_insert(mapelms, mapelm, dr_link); + ql_elm_new(miscelm, dr_link); + ql_tail_insert(miscelms, miscelm, dr_link); nstashed += npages; @@ -851,10 +855,10 @@ arena_stash_dirty(arena_t *arena, bool all, size_t npurge, } static size_t -arena_purge_stashed(arena_t *arena, arena_chunk_mapelms_t *mapelms) +arena_purge_stashed(arena_t *arena, arena_chunk_miscelms_t *miscelms) { size_t npurged, nmadvise; - arena_chunk_map_t *mapelm; + arena_chunk_map_misc_t *miscelm; if (config_stats) nmadvise = 0; @@ -862,13 +866,13 @@ arena_purge_stashed(arena_t *arena, arena_chunk_mapelms_t *mapelms) malloc_mutex_unlock(&arena->lock); - ql_foreach(mapelm, mapelms, dr_link) { + ql_foreach(miscelm, miscelms, dr_link) { arena_chunk_t *chunk; size_t pageind, run_size, npages, flag_unzeroed, i; bool unzeroed; - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(mapelm); - pageind = arena_mapelm_to_pageind(mapelm); + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm); + pageind = arena_miscelm_to_pageind(miscelm); run_size = arena_mapbits_large_size_get(chunk, pageind); npages = run_size >> LG_PAGE; @@ -908,18 +912,19 @@ arena_purge_stashed(arena_t *arena, arena_chunk_mapelms_t *mapelms) } static void -arena_unstash_purged(arena_t *arena, arena_chunk_mapelms_t *mapelms) +arena_unstash_purged(arena_t *arena, arena_chunk_miscelms_t *miscelms) { - arena_chunk_map_t *mapelm; + arena_chunk_map_misc_t *miscelm; /* Deallocate runs. */ - for (mapelm = ql_first(mapelms); mapelm != NULL; - mapelm = ql_first(mapelms)) { - arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(mapelm); - size_t pageind = arena_mapelm_to_pageind(mapelm); + for (miscelm = ql_first(miscelms); miscelm != NULL; + miscelm = ql_first(miscelms)) { + arena_chunk_t *chunk = + (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm); + size_t pageind = arena_miscelm_to_pageind(miscelm); arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)(pageind << LG_PAGE)); - ql_remove(mapelms, mapelm, dr_link); + ql_remove(miscelms, miscelm, dr_link); arena_run_dalloc(arena, run, false, true); } } @@ -928,7 +933,7 @@ void arena_purge(arena_t *arena, bool all) { size_t npurge, npurgeable, npurged; - arena_chunk_mapelms_t purge_list; + arena_chunk_miscelms_t purge_list; if (config_debug) { size_t ndirty = arena_dirty_count(arena); @@ -1180,14 +1185,14 @@ arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, static arena_run_t * arena_bin_runs_first(arena_bin_t *bin) { - arena_chunk_map_t *mapelm = arena_run_tree_first(&bin->runs); - if (mapelm != NULL) { + arena_chunk_map_misc_t *miscelm = arena_run_tree_first(&bin->runs); + if (miscelm != NULL) { arena_chunk_t *chunk; size_t pageind; arena_run_t *run; - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(mapelm); - pageind = arena_mapelm_to_pageind(mapelm); + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm); + pageind = arena_miscelm_to_pageind(miscelm); run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - arena_mapbits_small_runind_get(chunk, pageind)) << LG_PAGE)); @@ -1202,11 +1207,11 @@ arena_bin_runs_insert(arena_bin_t *bin, arena_run_t *run) { arena_chunk_t *chunk = CHUNK_ADDR2BASE(run); size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE; - arena_chunk_map_t *mapelm = arena_mapp_get(chunk, pageind); + arena_chunk_map_misc_t *miscelm = arena_miscelm_get(chunk, pageind); - assert(arena_run_tree_search(&bin->runs, mapelm) == NULL); + assert(arena_run_tree_search(&bin->runs, miscelm) == NULL); - arena_run_tree_insert(&bin->runs, mapelm); + arena_run_tree_insert(&bin->runs, miscelm); } static void @@ -1214,11 +1219,11 @@ arena_bin_runs_remove(arena_bin_t *bin, arena_run_t *run) { arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE; - arena_chunk_map_t *mapelm = arena_mapp_get(chunk, pageind); + arena_chunk_map_misc_t *miscelm = arena_miscelm_get(chunk, pageind); - assert(arena_run_tree_search(&bin->runs, mapelm) != NULL); + assert(arena_run_tree_search(&bin->runs, miscelm) != NULL); - arena_run_tree_remove(&bin->runs, mapelm); + arena_run_tree_remove(&bin->runs, miscelm); } static arena_run_t * @@ -1684,9 +1689,8 @@ arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, size_t npages, run_ind, past; assert(run != bin->runcur); - assert(arena_run_tree_search(&bin->runs, - arena_mapp_get(chunk, ((uintptr_t)run-(uintptr_t)chunk)>>LG_PAGE)) - == NULL); + assert(arena_run_tree_search(&bin->runs, arena_miscelm_get(chunk, + ((uintptr_t)run-(uintptr_t)chunk)>>LG_PAGE)) == NULL); binind = arena_bin_index(chunk->arena, run->bin); bin_info = &arena_bin_info[binind]; @@ -1749,7 +1753,7 @@ arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, void arena_dalloc_bin_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr, - arena_chunk_map_t *mapelm) + arena_chunk_map_bits_t *bitselm) { size_t pageind; arena_run_t *run; @@ -1761,7 +1765,8 @@ arena_dalloc_bin_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr, run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - arena_mapbits_small_runind_get(chunk, pageind)) << LG_PAGE)); bin = run->bin; - binind = arena_ptr_small_binind_get(ptr, arena_mapbits_get(chunk, pageind)); + binind = arena_ptr_small_binind_get(ptr, arena_mapbits_get(chunk, + pageind)); bin_info = &arena_bin_info[binind]; if (config_fill || config_stats) size = bin_info->reg_size; @@ -1784,7 +1789,7 @@ arena_dalloc_bin_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr, void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, - size_t pageind, arena_chunk_map_t *mapelm) + size_t pageind, arena_chunk_map_bits_t *bitselm) { arena_run_t *run; arena_bin_t *bin; @@ -1793,7 +1798,7 @@ arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, arena_mapbits_small_runind_get(chunk, pageind)) << LG_PAGE)); bin = run->bin; malloc_mutex_lock(&bin->lock); - arena_dalloc_bin_locked(arena, chunk, ptr, mapelm); + arena_dalloc_bin_locked(arena, chunk, ptr, bitselm); malloc_mutex_unlock(&bin->lock); } @@ -1801,15 +1806,15 @@ void arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr, size_t pageind) { - arena_chunk_map_t *mapelm; + arena_chunk_map_bits_t *bitselm; if (config_debug) { /* arena_ptr_small_binind_get() does extra sanity checking. */ assert(arena_ptr_small_binind_get(ptr, arena_mapbits_get(chunk, pageind)) != BININD_INVALID); } - mapelm = arena_mapp_get(chunk, pageind); - arena_dalloc_bin(arena, chunk, ptr, pageind, mapelm); + bitselm = arena_bitselm_get(chunk, pageind); + arena_dalloc_bin(arena, chunk, ptr, pageind, bitselm); } #ifdef JEMALLOC_JET @@ -2268,7 +2273,8 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) * be twice as large in order to maintain alignment. */ if (config_fill && opt_redzone) { - size_t align_min = ZU(1) << (jemalloc_ffs(bin_info->reg_size) - 1); + size_t align_min = ZU(1) << (jemalloc_ffs(bin_info->reg_size) - + 1); if (align_min <= REDZONE_MINSIZE) { bin_info->redzone_size = REDZONE_MINSIZE; pad_size = 0; @@ -2404,13 +2410,17 @@ arena_boot(void) */ map_bias = 0; for (i = 0; i < 3; i++) { - header_size = offsetof(arena_chunk_t, map) + - (sizeof(arena_chunk_map_t) * (chunk_npages-map_bias)); + header_size = offsetof(arena_chunk_t, map_bits) + + ((sizeof(arena_chunk_map_bits_t) + + sizeof(arena_chunk_map_misc_t)) * (chunk_npages-map_bias)); map_bias = (header_size >> LG_PAGE) + ((header_size & PAGE_MASK) != 0); } assert(map_bias > 0); + map_misc_offset = offsetof(arena_chunk_t, map_bits) + + sizeof(arena_chunk_map_bits_t) * (chunk_npages-map_bias); + arena_maxclass = chunksize - (map_bias << LG_PAGE); bin_info_init(); diff --git a/src/chunk.c b/src/chunk.c index 38d02868..874002cf 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -28,6 +28,7 @@ size_t chunksize; size_t chunksize_mask; /* (chunksize - 1). */ size_t chunk_npages; size_t map_bias; +size_t map_misc_offset; size_t arena_maxclass; /* Max size class for arenas. */ /******************************************************************************/ diff --git a/src/tcache.c b/src/tcache.c index 4fbc94cc..f86a46e6 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -118,10 +118,10 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem, if (chunk->arena == arena) { size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - arena_chunk_map_t *mapelm = - arena_mapp_get(chunk, pageind); + arena_chunk_map_bits_t *bitselm = + arena_bitselm_get(chunk, pageind); arena_dalloc_bin_locked(arena, chunk, ptr, - mapelm); + bitselm); } else { /* * This object was allocated via a different @@ -393,9 +393,10 @@ tcache_destroy(tcache_t *tcache) arena_t *arena = chunk->arena; size_t pageind = ((uintptr_t)tcache - (uintptr_t)chunk) >> LG_PAGE; - arena_chunk_map_t *mapelm = arena_mapp_get(chunk, pageind); + arena_chunk_map_bits_t *bitselm = arena_bitselm_get(chunk, + pageind); - arena_dalloc_bin(arena, chunk, tcache, pageind, mapelm); + arena_dalloc_bin(arena, chunk, tcache, pageind, bitselm); } else if (tcache_size <= tcache_maxclass) { arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache); arena_t *arena = chunk->arena; From c21b05ea09874222266b3e36ceb18765fcb4a00b Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 4 Sep 2014 22:27:26 -0700 Subject: [PATCH 0504/3378] Whitespace cleanups. --- INSTALL | 6 +++--- include/jemalloc/internal/prng.h | 2 +- src/zone.c | 14 +++++++------- test/src/SFMT.c | 20 ++++++++++---------- 4 files changed, 21 insertions(+), 21 deletions(-) diff --git a/INSTALL b/INSTALL index 2df667ca..6c46100e 100644 --- a/INSTALL +++ b/INSTALL @@ -56,7 +56,7 @@ any of the following arguments (not a definitive list) to 'configure': replace the "malloc", "calloc", etc. symbols. --without-export - Don't export public APIs. This can be useful when building jemalloc as a + Don't export public APIs. This can be useful when building jemalloc as a static library, or to avoid exporting public APIs when using the zone allocator on OSX. @@ -96,7 +96,7 @@ any of the following arguments (not a definitive list) to 'configure': --enable-ivsalloc Enable validation code, which verifies that pointers reside within - jemalloc-owned chunks before dereferencing them. This incurs a substantial + jemalloc-owned chunks before dereferencing them. This incurs a substantial performance hit. --disable-stats @@ -148,7 +148,7 @@ any of the following arguments (not a definitive list) to 'configure': Disable support for Valgrind. --disable-zone-allocator - Disable zone allocator for Darwin. This means jemalloc won't be hooked as + Disable zone allocator for Darwin. This means jemalloc won't be hooked as the default allocator on OSX/iOS. --enable-utrace diff --git a/include/jemalloc/internal/prng.h b/include/jemalloc/internal/prng.h index 7b2b0651..c6b17972 100644 --- a/include/jemalloc/internal/prng.h +++ b/include/jemalloc/internal/prng.h @@ -15,7 +15,7 @@ * See Knuth's TAOCP 3rd Ed., Vol. 2, pg. 17 for details on these constraints. * * This choice of m has the disadvantage that the quality of the bits is - * proportional to bit position. For example. the lowest bit has a cycle of 2, + * proportional to bit position. For example, the lowest bit has a cycle of 2, * the next has a cycle of 4, etc. For this reason, we prefer to use the upper * bits. * diff --git a/src/zone.c b/src/zone.c index a722287b..c6bd533f 100644 --- a/src/zone.c +++ b/src/zone.c @@ -258,13 +258,13 @@ register_zone(void) /* * On OSX 10.6, having the default purgeable zone appear before * the default zone makes some things crash because it thinks it - * owns the default zone allocated pointers. We thus unregister/ - * re-register it in order to ensure it's always after the - * default zone. On OSX < 10.6, there is no purgeable zone, so - * this does nothing. On OSX >= 10.6, unregistering replaces the - * purgeable zone with the last registered zone above, i.e the - * default zone. Registering it again then puts it at the end, - * obviously after the default zone. + * owns the default zone allocated pointers. We thus + * unregister/re-register it in order to ensure it's always + * after the default zone. On OSX < 10.6, there is no purgeable + * zone, so this does nothing. On OSX >= 10.6, unregistering + * replaces the purgeable zone with the last registered zone + * above, i.e the default zone. Registering it again then puts + * it at the end, obviously after the default zone. */ if (purgeable_zone) { malloc_zone_unregister(purgeable_zone); diff --git a/test/src/SFMT.c b/test/src/SFMT.c index 22a5ac55..80cabe05 100644 --- a/test/src/SFMT.c +++ b/test/src/SFMT.c @@ -463,11 +463,11 @@ uint32_t gen_rand32_range(sfmt_t *ctx, uint32_t limit) { above = 0xffffffffU - (0xffffffffU % limit); while (1) { - ret = gen_rand32(ctx); - if (ret < above) { - ret %= limit; - break; - } + ret = gen_rand32(ctx); + if (ret < above) { + ret %= limit; + break; + } } return ret; } @@ -513,11 +513,11 @@ uint64_t gen_rand64_range(sfmt_t *ctx, uint64_t limit) { above = KQU(0xffffffffffffffff) - (KQU(0xffffffffffffffff) % limit); while (1) { - ret = gen_rand64(ctx); - if (ret < above) { - ret %= limit; - break; - } + ret = gen_rand64(ctx); + if (ret < above) { + ret %= limit; + break; + } } return ret; } From b718cf77e9917f6ae1995c2e2b219ff4219c9f46 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sun, 7 Sep 2014 14:40:19 -0700 Subject: [PATCH 0505/3378] Optimize [nmd]alloc() fast paths. Optimize [nmd]alloc() fast paths such that the (flags == 0) case is streamlined, flags decoding only happens to the minimum degree necessary, and no conditionals are repeated. --- include/jemalloc/internal/arena.h | 4 +- .../jemalloc/internal/jemalloc_internal.h.in | 52 ++-- include/jemalloc/internal/private_symbols.txt | 1 - include/jemalloc/internal/size_classes.sh | 3 + src/arena.c | 2 +- src/huge.c | 2 +- src/jemalloc.c | 239 ++++++++++-------- 7 files changed, 172 insertions(+), 131 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 986bea92..166d0523 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -577,7 +577,7 @@ small_bin2size_lookup(size_t binind) assert(binind < NBINS); { - size_t ret = ((size_t)(small_bin2size_tab[binind])); + size_t ret = (size_t)small_bin2size_tab[binind]; assert(ret == small_bin2size_compute(binind)); return (ret); } @@ -615,7 +615,7 @@ small_s2u_compute(size_t size) JEMALLOC_ALWAYS_INLINE size_t small_s2u_lookup(size_t size) { - size_t ret = (small_bin2size(small_size2bin(size))); + size_t ret = small_bin2size(small_size2bin(size)); assert(ret == small_s2u_compute(size)); return (ret); diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 1c2f3d44..59ae8d55 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -165,7 +165,17 @@ static const bool config_ivsalloc = #include "jemalloc/internal/jemalloc_internal_macros.h" +#define MALLOCX_ARENA_MASK ((int)~0xff) #define MALLOCX_LG_ALIGN_MASK ((int)0x3f) +/* Use MALLOCX_ALIGN_GET() if alignment may not be specified in flags. */ +#define MALLOCX_ALIGN_GET_SPECIFIED(flags) \ + (ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK)) +#define MALLOCX_ALIGN_GET(flags) \ + (MALLOCX_ALIGN_GET_SPECIFIED(flags) & (SIZE_T_MAX-1)) +#define MALLOCX_ZERO_GET(flags) \ + ((bool)(flags & MALLOCX_ZERO)) +#define MALLOCX_ARENA_GET(flags) \ + (((unsigned)(flags >> 8)) - 1) /* Smallest size class to support. */ #define LG_TINY_MIN 3 @@ -625,15 +635,13 @@ size_t u2rz(size_t usize); size_t p2rz(const void *ptr); void idalloct(void *ptr, bool try_tcache); void idalloc(void *ptr); -void iqalloct(void *ptr, bool try_tcache); -void iqalloc(void *ptr); +void iqalloc(void *ptr, bool try_tcache); void *iralloct_realign(void *ptr, size_t oldsize, size_t size, size_t extra, size_t alignment, bool zero, bool try_tcache_alloc, bool try_tcache_dalloc, arena_t *arena); -void *iralloct(void *ptr, size_t size, size_t extra, size_t alignment, - bool zero, bool try_tcache_alloc, bool try_tcache_dalloc, arena_t *arena); -void *iralloc(void *ptr, size_t size, size_t extra, size_t alignment, - bool zero); +void *iralloct(void *ptr, size_t size, size_t alignment, bool zero, + bool try_tcache_alloc, bool try_tcache_dalloc, arena_t *arena); +void *iralloc(void *ptr, size_t size, size_t alignment, bool zero); bool ixalloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero); malloc_tsd_protos(JEMALLOC_ATTR(unused), thread_allocated, thread_allocated_t) @@ -787,7 +795,7 @@ idalloc(void *ptr) } JEMALLOC_ALWAYS_INLINE void -iqalloct(void *ptr, bool try_tcache) +iqalloc(void *ptr, bool try_tcache) { if (config_fill && opt_quarantine) @@ -796,13 +804,6 @@ iqalloct(void *ptr, bool try_tcache) idalloct(ptr, try_tcache); } -JEMALLOC_ALWAYS_INLINE void -iqalloc(void *ptr) -{ - - iqalloct(ptr, true); -} - JEMALLOC_ALWAYS_INLINE void * iralloct_realign(void *ptr, size_t oldsize, size_t size, size_t extra, size_t alignment, bool zero, bool try_tcache_alloc, bool try_tcache_dalloc, @@ -832,12 +833,12 @@ iralloct_realign(void *ptr, size_t oldsize, size_t size, size_t extra, */ copysize = (size < oldsize) ? size : oldsize; memcpy(p, ptr, copysize); - iqalloct(ptr, try_tcache_dalloc); + iqalloc(ptr, try_tcache_dalloc); return (p); } JEMALLOC_ALWAYS_INLINE void * -iralloct(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, +iralloct(void *ptr, size_t size, size_t alignment, bool zero, bool try_tcache_alloc, bool try_tcache_dalloc, arena_t *arena) { size_t oldsize; @@ -853,25 +854,24 @@ iralloct(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, * Existing object alignment is inadequate; allocate new space * and copy. */ - return (iralloct_realign(ptr, oldsize, size, extra, alignment, - zero, try_tcache_alloc, try_tcache_dalloc, arena)); + return (iralloct_realign(ptr, oldsize, size, 0, alignment, zero, + try_tcache_alloc, try_tcache_dalloc, arena)); } - if (size + extra <= arena_maxclass) { - return (arena_ralloc(arena, ptr, oldsize, size, extra, - alignment, zero, try_tcache_alloc, - try_tcache_dalloc)); + if (size <= arena_maxclass) { + return (arena_ralloc(arena, ptr, oldsize, size, 0, alignment, + zero, try_tcache_alloc, try_tcache_dalloc)); } else { - return (huge_ralloc(arena, ptr, oldsize, size, extra, - alignment, zero, try_tcache_dalloc)); + return (huge_ralloc(arena, ptr, oldsize, size, 0, alignment, + zero, try_tcache_dalloc)); } } JEMALLOC_ALWAYS_INLINE void * -iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero) +iralloc(void *ptr, size_t size, size_t alignment, bool zero) { - return (iralloct(ptr, size, extra, alignment, zero, true, true, NULL)); + return (iralloct(ptr, size, alignment, zero, true, true, NULL)); } JEMALLOC_ALWAYS_INLINE bool diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index 9ca139ab..84f05910 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -224,7 +224,6 @@ in_valgrind ipalloc ipalloct iqalloc -iqalloct iralloc iralloct iralloct_realign diff --git a/include/jemalloc/internal/size_classes.sh b/include/jemalloc/internal/size_classes.sh index 379d36c2..0cfac72d 100755 --- a/include/jemalloc/internal/size_classes.sh +++ b/include/jemalloc/internal/size_classes.sh @@ -202,6 +202,7 @@ cat <deallocated += usize; if (config_valgrind && in_valgrind) rzsize = p2rz(ptr); - iqalloc(ptr); + iqalloc(ptr, try_tcache); JEMALLOC_VALGRIND_FREE(ptr, rzsize); } @@ -1236,7 +1235,7 @@ je_realloc(void *ptr, size_t size) if (ptr != NULL) { /* realloc(ptr, 0) is equivalent to free(ptr). */ UTRACE(ptr, 0, 0); - ifree(ptr); + ifree(ptr, true); return (NULL); } size = 1; @@ -1261,7 +1260,7 @@ je_realloc(void *ptr, size_t size) } else { if (config_stats || (config_valgrind && in_valgrind)) usize = s2u(size); - ret = iralloc(ptr, size, 0, 0, false); + ret = iralloc(ptr, size, 0, false); } } else { /* realloc(NULL, size) is equivalent to malloc(size). */ @@ -1295,7 +1294,7 @@ je_free(void *ptr) UTRACE(ptr, 0, 0); if (ptr != NULL) - ifree(ptr); + ifree(ptr, true); } /* @@ -1363,99 +1362,153 @@ JEMALLOC_EXPORT void *(*__memalign_hook)(size_t alignment, size_t size) = * Begin non-standard functions. */ +JEMALLOC_ALWAYS_INLINE_C void +imallocx_flags_decode_hard(size_t size, int flags, size_t *usize, + size_t *alignment, bool *zero, bool *try_tcache, arena_t **arena) +{ + + if ((flags & MALLOCX_LG_ALIGN_MASK) == 0) { + *alignment = 0; + *usize = s2u(size); + } else { + *alignment = MALLOCX_ALIGN_GET_SPECIFIED(flags); + *usize = sa2u(size, *alignment); + } + *zero = MALLOCX_ZERO_GET(flags); + if ((flags & MALLOCX_ARENA_MASK) != 0) { + unsigned arena_ind = MALLOCX_ARENA_GET(flags); + *try_tcache = false; + *arena = arenas[arena_ind]; + } else { + *try_tcache = true; + *arena = NULL; + } +} + +JEMALLOC_ALWAYS_INLINE_C void +imallocx_flags_decode(size_t size, int flags, size_t *usize, size_t *alignment, + bool *zero, bool *try_tcache, arena_t **arena) +{ + + if (flags == 0) { + *usize = s2u(size); + assert(usize != 0); + *alignment = 0; + *zero = false; + *try_tcache = true; + *arena = NULL; + } else { + imallocx_flags_decode_hard(size, flags, usize, alignment, zero, + try_tcache, arena); + } +} + JEMALLOC_ALWAYS_INLINE_C void * -imallocx(size_t usize, size_t alignment, bool zero, bool try_tcache, +imallocx_flags(size_t usize, size_t alignment, bool zero, bool try_tcache, arena_t *arena) { - assert(usize == ((alignment == 0) ? s2u(usize) : sa2u(usize, - alignment))); - if (alignment != 0) return (ipalloct(usize, alignment, zero, try_tcache, arena)); - else if (zero) + if (zero) return (icalloct(usize, try_tcache, arena)); - else - return (imalloct(usize, try_tcache, arena)); + return (imalloct(usize, try_tcache, arena)); +} + + +JEMALLOC_ALWAYS_INLINE_C void * +imallocx_maybe_flags(size_t size, int flags, size_t usize, size_t alignment, + bool zero, bool try_tcache, arena_t *arena) +{ + + if (flags == 0) + return (imalloc(size)); + return (imallocx_flags(usize, alignment, zero, try_tcache, arena)); } static void * -imallocx_prof_sample(size_t usize, size_t alignment, bool zero, bool try_tcache, - arena_t *arena, prof_tctx_t *tctx) +imallocx_prof_sample(size_t size, int flags, size_t usize, size_t alignment, + bool zero, bool try_tcache, arena_t *arena) { void *p; - if (tctx == NULL) - return (NULL); if (usize <= SMALL_MAXCLASS) { - size_t usize_promoted = (alignment == 0) ? - s2u(SMALL_MAXCLASS+1) : sa2u(SMALL_MAXCLASS+1, alignment); - assert(usize_promoted != 0); - p = imallocx(usize_promoted, alignment, zero, try_tcache, - arena); + assert(((alignment == 0) ? s2u(LARGE_MINCLASS) : + sa2u(LARGE_MINCLASS, alignment)) == LARGE_MINCLASS); + p = imalloc(LARGE_MINCLASS); if (p == NULL) return (NULL); arena_prof_promoted(p, usize); - } else - p = imallocx(usize, alignment, zero, try_tcache, arena); + } else { + p = imallocx_maybe_flags(size, flags, usize, alignment, zero, + try_tcache, arena); + } return (p); } JEMALLOC_ALWAYS_INLINE_C void * -imallocx_prof(size_t usize, size_t alignment, bool zero, bool try_tcache, - arena_t *arena, prof_tctx_t *tctx) +imallocx_prof(size_t size, int flags, size_t *usize) { void *p; + size_t alignment; + bool zero; + bool try_tcache; + arena_t *arena; + prof_tctx_t *tctx; - if ((uintptr_t)tctx != (uintptr_t)1U) { - p = imallocx_prof_sample(usize, alignment, zero, try_tcache, - arena, tctx); + imallocx_flags_decode(size, flags, usize, &alignment, &zero, + &try_tcache, &arena); + tctx = prof_alloc_prep(*usize); + if ((uintptr_t)tctx == (uintptr_t)1U) { + p = imallocx_maybe_flags(size, flags, *usize, alignment, zero, + try_tcache, arena); + } else if ((uintptr_t)tctx > (uintptr_t)1U) { + p = imallocx_prof_sample(size, flags, *usize, alignment, zero, + try_tcache, arena); } else - p = imallocx(usize, alignment, zero, try_tcache, arena); + p = NULL; if (p == NULL) return (NULL); - prof_malloc(p, usize, tctx); + prof_malloc(p, *usize, tctx); return (p); } +JEMALLOC_ALWAYS_INLINE_C void * +imallocx_no_prof(size_t size, int flags, size_t *usize) +{ + size_t alignment; + bool zero; + bool try_tcache; + arena_t *arena; + + if (flags == 0) { + if (config_stats || (config_valgrind && in_valgrind)) + *usize = s2u(size); + return (imalloc(size)); + } + + imallocx_flags_decode_hard(size, flags, usize, &alignment, &zero, + &try_tcache, &arena); + return (imallocx_flags(*usize, alignment, zero, try_tcache, arena)); +} + void * je_mallocx(size_t size, int flags) { void *p; size_t usize; - size_t alignment = (ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK) - & (SIZE_T_MAX-1)); - bool zero = flags & MALLOCX_ZERO; - unsigned arena_ind = ((unsigned)(flags >> 8)) - 1; - arena_t *arena; - bool try_tcache; assert(size != 0); if (malloc_init()) goto label_oom; - if (arena_ind != UINT_MAX) { - arena = arenas[arena_ind]; - try_tcache = false; - } else { - arena = NULL; - try_tcache = true; - } - - usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment); - assert(usize != 0); - - if (config_prof && opt_prof) { - prof_tctx_t *tctx; - - tctx = prof_alloc_prep(usize); - p = imallocx_prof(usize, alignment, zero, try_tcache, arena, - tctx); - } else - p = imallocx(usize, alignment, zero, try_tcache, arena); + if (config_prof && opt_prof) + p = imallocx_prof(size, flags, &usize); + else + p = imallocx_no_prof(size, flags, &usize); if (p == NULL) goto label_oom; @@ -1464,7 +1517,7 @@ je_mallocx(size_t size, int flags) thread_allocated_tsd_get()->allocated += usize; } UTRACE(0, size, p); - JEMALLOC_VALGRIND_MALLOC(true, p, usize, zero); + JEMALLOC_VALGRIND_MALLOC(true, p, usize, MALLOCX_ZERO_GET(flags)); return (p); label_oom: if (config_xmalloc && opt_xmalloc) { @@ -1485,15 +1538,14 @@ irallocx_prof_sample(void *oldptr, size_t size, size_t alignment, size_t usize, if (tctx == NULL) return (NULL); if (usize <= SMALL_MAXCLASS) { - p = iralloct(oldptr, SMALL_MAXCLASS+1, (SMALL_MAXCLASS+1 >= - size) ? 0 : size - (SMALL_MAXCLASS+1), alignment, zero, + p = iralloct(oldptr, LARGE_MINCLASS, alignment, zero, try_tcache_alloc, try_tcache_dalloc, arena); if (p == NULL) return (NULL); arena_prof_promoted(p, usize); } else { - p = iralloct(oldptr, size, 0, alignment, zero, - try_tcache_alloc, try_tcache_dalloc, arena); + p = iralloct(oldptr, size, alignment, zero, try_tcache_alloc, + try_tcache_dalloc, arena); } return (p); @@ -1512,8 +1564,8 @@ irallocx_prof(void *oldptr, size_t old_usize, size_t size, size_t alignment, p = irallocx_prof_sample(oldptr, size, alignment, *usize, zero, try_tcache_alloc, try_tcache_dalloc, arena, tctx); else { - p = iralloct(oldptr, size, 0, alignment, zero, - try_tcache_alloc, try_tcache_dalloc, arena); + p = iralloct(oldptr, size, alignment, zero, try_tcache_alloc, + try_tcache_dalloc, arena); } if (p == NULL) return (NULL); @@ -1540,10 +1592,8 @@ je_rallocx(void *ptr, size_t size, int flags) void *p; size_t usize, old_usize; UNUSED size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0); - size_t alignment = (ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK) - & (SIZE_T_MAX-1)); + size_t alignment = MALLOCX_ALIGN_GET(flags); bool zero = flags & MALLOCX_ZERO; - unsigned arena_ind = ((unsigned)(flags >> 8)) - 1; bool try_tcache_alloc, try_tcache_dalloc; arena_t *arena; @@ -1552,7 +1602,8 @@ je_rallocx(void *ptr, size_t size, int flags) assert(malloc_initialized || IS_INITIALIZER); malloc_thread_init(); - if (arena_ind != UINT_MAX) { + if ((flags & MALLOCX_ARENA_MASK) != 0) { + unsigned arena_ind = MALLOCX_ARENA_GET(flags); arena_chunk_t *chunk; try_tcache_alloc = false; chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); @@ -1582,7 +1633,7 @@ je_rallocx(void *ptr, size_t size, int flags) if (p == NULL) goto label_oom; } else { - p = iralloct(ptr, size, 0, alignment, zero, try_tcache_alloc, + p = iralloct(ptr, size, alignment, zero, try_tcache_alloc, try_tcache_dalloc, arena); if (p == NULL) goto label_oom; @@ -1677,10 +1728,8 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) { size_t usize, old_usize; UNUSED size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0); - size_t alignment = (ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK) - & (SIZE_T_MAX-1)); + size_t alignment = MALLOCX_ALIGN_GET(flags); bool zero = flags & MALLOCX_ZERO; - unsigned arena_ind = ((unsigned)(flags >> 8)) - 1; arena_t *arena; assert(ptr != NULL); @@ -1689,9 +1738,10 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) assert(malloc_initialized || IS_INITIALIZER); malloc_thread_init(); - if (arena_ind != UINT_MAX) + if ((flags & MALLOCX_ARENA_MASK) != 0) { + unsigned arena_ind = MALLOCX_ARENA_GET(flags); arena = arenas[arena_ind]; - else + } else arena = NULL; old_usize = isalloc(ptr, config_prof); @@ -1753,15 +1803,13 @@ je_sallocx(const void *ptr, int flags) void je_dallocx(void *ptr, int flags) { - size_t usize; - UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0); - unsigned arena_ind = ((unsigned)(flags >> 8)) - 1; bool try_tcache; assert(ptr != NULL); assert(malloc_initialized || IS_INITIALIZER); - if (arena_ind != UINT_MAX) { + if ((flags & MALLOCX_ARENA_MASK) != 0) { + unsigned arena_ind = MALLOCX_ARENA_GET(flags); arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); try_tcache = (chunk == ptr || chunk->arena != arenas[arena_ind]); @@ -1769,34 +1817,25 @@ je_dallocx(void *ptr, int flags) try_tcache = true; UTRACE(ptr, 0, 0); - if (config_stats || config_valgrind) - usize = isalloc(ptr, config_prof); - if (config_prof && opt_prof) { - if (config_stats == false && config_valgrind == false) - usize = isalloc(ptr, config_prof); - prof_free(ptr, usize); - } - if (config_stats) - thread_allocated_tsd_get()->deallocated += usize; - if (config_valgrind && in_valgrind) - rzsize = p2rz(ptr); - iqalloct(ptr, try_tcache); - JEMALLOC_VALGRIND_FREE(ptr, rzsize); + ifree(ptr, try_tcache); } size_t je_nallocx(size_t size, int flags) { size_t usize; - size_t alignment = (ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK) - & (SIZE_T_MAX-1)); assert(size != 0); if (malloc_init()) return (0); - usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment); + if ((flags & MALLOCX_LG_ALIGN_MASK) == 0) + usize = s2u(size); + else { + size_t alignment = MALLOCX_ALIGN_GET_SPECIFIED(flags); + usize = sa2u(size, alignment); + } assert(usize != 0); return (usize); } From 82e88d1ecfe3d7bf700355cb5023ab61559f9578 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sun, 7 Sep 2014 19:55:03 -0700 Subject: [PATCH 0506/3378] Move typedefs from jemalloc_protos.h.in to jemalloc_typedefs.h.in. Move typedefs from jemalloc_protos.h.in to jemalloc_typedefs.h.in, so that typedefs aren't redefined when compiling stress tests. --- .gitignore | 1 + configure.ac | 3 +++ include/jemalloc/jemalloc.sh | 2 +- include/jemalloc/jemalloc_protos.h.in | 3 --- include/jemalloc/jemalloc_typedefs.h.in | 2 ++ 5 files changed, 7 insertions(+), 4 deletions(-) create mode 100644 include/jemalloc/jemalloc_typedefs.h.in diff --git a/.gitignore b/.gitignore index ec9c0b92..79d454f2 100644 --- a/.gitignore +++ b/.gitignore @@ -33,6 +33,7 @@ /include/jemalloc/jemalloc_protos.h /include/jemalloc/jemalloc_protos_jet.h /include/jemalloc/jemalloc_rename.h +/include/jemalloc/jemalloc_typedefs.h /src/*.[od] /src/*.gcda diff --git a/configure.ac b/configure.ac index 3b658852..ce4af213 100644 --- a/configure.ac +++ b/configure.ac @@ -545,6 +545,7 @@ cfgoutputs_in="${cfgoutputs_in} doc/manpages.xsl.in" cfgoutputs_in="${cfgoutputs_in} doc/jemalloc.xml.in" cfgoutputs_in="${cfgoutputs_in} include/jemalloc/jemalloc_macros.h.in" cfgoutputs_in="${cfgoutputs_in} include/jemalloc/jemalloc_protos.h.in" +cfgoutputs_in="${cfgoutputs_in} include/jemalloc/jemalloc_typedefs.h.in" cfgoutputs_in="${cfgoutputs_in} include/jemalloc/internal/jemalloc_internal.h.in" cfgoutputs_in="${cfgoutputs_in} test/test.sh.in" cfgoutputs_in="${cfgoutputs_in} test/include/test/jemalloc_test.h.in" @@ -555,6 +556,7 @@ cfgoutputs_out="${cfgoutputs_out} doc/manpages.xsl" cfgoutputs_out="${cfgoutputs_out} doc/jemalloc.xml" cfgoutputs_out="${cfgoutputs_out} include/jemalloc/jemalloc_macros.h" cfgoutputs_out="${cfgoutputs_out} include/jemalloc/jemalloc_protos.h" +cfgoutputs_out="${cfgoutputs_out} include/jemalloc/jemalloc_typedefs.h" cfgoutputs_out="${cfgoutputs_out} include/jemalloc/internal/jemalloc_internal.h" cfgoutputs_out="${cfgoutputs_out} test/test.sh" cfgoutputs_out="${cfgoutputs_out} test/include/test/jemalloc_test.h" @@ -565,6 +567,7 @@ cfgoutputs_tup="${cfgoutputs_tup} doc/manpages.xsl:doc/manpages.xsl.in" cfgoutputs_tup="${cfgoutputs_tup} doc/jemalloc.xml:doc/jemalloc.xml.in" cfgoutputs_tup="${cfgoutputs_tup} include/jemalloc/jemalloc_macros.h:include/jemalloc/jemalloc_macros.h.in" cfgoutputs_tup="${cfgoutputs_tup} include/jemalloc/jemalloc_protos.h:include/jemalloc/jemalloc_protos.h.in" +cfgoutputs_tup="${cfgoutputs_tup} include/jemalloc/jemalloc_typedefs.h:include/jemalloc/jemalloc_typedefs.h.in" cfgoutputs_tup="${cfgoutputs_tup} include/jemalloc/internal/jemalloc_internal.h" cfgoutputs_tup="${cfgoutputs_tup} test/test.sh:test/test.sh.in" cfgoutputs_tup="${cfgoutputs_tup} test/include/test/jemalloc_test.h:test/include/test/jemalloc_test.h.in" diff --git a/include/jemalloc/jemalloc.sh b/include/jemalloc/jemalloc.sh index e4738eba..7e1c8be1 100755 --- a/include/jemalloc/jemalloc.sh +++ b/include/jemalloc/jemalloc.sh @@ -12,7 +12,7 @@ extern "C" { EOF for hdr in jemalloc_defs.h jemalloc_rename.h jemalloc_macros.h \ - jemalloc_protos.h jemalloc_mangle.h ; do + jemalloc_protos.h jemalloc_typedefs.h jemalloc_mangle.h ; do cat "${objroot}include/jemalloc/${hdr}" \ | grep -v 'Generated from .* by configure\.' \ | sed -e 's/^#define /#define /g' \ diff --git a/include/jemalloc/jemalloc_protos.h.in b/include/jemalloc/jemalloc_protos.h.in index 67268c47..59aeee11 100644 --- a/include/jemalloc/jemalloc_protos.h.in +++ b/include/jemalloc/jemalloc_protos.h.in @@ -44,6 +44,3 @@ JEMALLOC_EXPORT void * @je_@memalign(size_t alignment, size_t size) #ifdef JEMALLOC_OVERRIDE_VALLOC JEMALLOC_EXPORT void * @je_@valloc(size_t size) JEMALLOC_ATTR(malloc); #endif - -typedef void *(chunk_alloc_t)(size_t, size_t, bool *, unsigned); -typedef bool (chunk_dalloc_t)(void *, size_t, unsigned); diff --git a/include/jemalloc/jemalloc_typedefs.h.in b/include/jemalloc/jemalloc_typedefs.h.in new file mode 100644 index 00000000..47e57ca7 --- /dev/null +++ b/include/jemalloc/jemalloc_typedefs.h.in @@ -0,0 +1,2 @@ +typedef void *(chunk_alloc_t)(size_t, size_t, bool *, unsigned); +typedef bool (chunk_dalloc_t)(void *, size_t, unsigned); From b67ec3c4973e8f7ca272c13472aa98c8a3ba4de4 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sun, 7 Sep 2014 19:57:24 -0700 Subject: [PATCH 0507/3378] Add a simple timer implementation for use in benchmarking. --- Makefile.in | 2 +- test/include/test/jemalloc_test.h.in | 2 + test/include/test/timer.h | 15 ++++++++ test/src/timer.c | 57 ++++++++++++++++++++++++++++ 4 files changed, 75 insertions(+), 1 deletion(-) create mode 100644 test/include/test/timer.h create mode 100644 test/src/timer.c diff --git a/Makefile.in b/Makefile.in index b5f0ee90..d3e91b56 100644 --- a/Makefile.in +++ b/Makefile.in @@ -108,7 +108,7 @@ DOCS_MAN3 := $(DOCS_XML:$(objroot)%.xml=$(srcroot)%.3) DOCS := $(DOCS_HTML) $(DOCS_MAN3) C_TESTLIB_SRCS := $(srcroot)test/src/math.c $(srcroot)test/src/mtx.c \ $(srcroot)test/src/SFMT.c $(srcroot)test/src/test.c \ - $(srcroot)test/src/thd.c + $(srcroot)test/src/thd.c $(srcroot)test/src/timer.c C_UTIL_INTEGRATION_SRCS := $(srcroot)src/util.c TESTS_UNIT := $(srcroot)test/unit/atomic.c \ $(srcroot)test/unit/bitmap.c \ diff --git a/test/include/test/jemalloc_test.h.in b/test/include/test/jemalloc_test.h.in index 730a55db..a93c4f67 100644 --- a/test/include/test/jemalloc_test.h.in +++ b/test/include/test/jemalloc_test.h.in @@ -5,6 +5,7 @@ #include #include #include +#include #ifdef _WIN32 # include @@ -136,6 +137,7 @@ #include "test/mtx.h" #include "test/mq.h" #include "test/test.h" +#include "test/timer.h" #include "test/thd.h" #define MEXP 19937 #include "test/SFMT.h" diff --git a/test/include/test/timer.h b/test/include/test/timer.h new file mode 100644 index 00000000..f21ccf10 --- /dev/null +++ b/test/include/test/timer.h @@ -0,0 +1,15 @@ +/* + * Simple timer, for use in benchmark reporting. + */ + +#include + +typedef struct { + struct timeval tv0; + struct timeval tv1; +} timer_t; + +void timer_start(timer_t *timer); +void timer_stop(timer_t *timer); +uint64_t timer_usec(const timer_t *timer); +void timer_ratio(timer_t *a, timer_t *b, char *buf, size_t buflen); diff --git a/test/src/timer.c b/test/src/timer.c new file mode 100644 index 00000000..17ead172 --- /dev/null +++ b/test/src/timer.c @@ -0,0 +1,57 @@ +#include "test/jemalloc_test.h" + +void +timer_start(timer_t *timer) +{ + + gettimeofday(&timer->tv0, NULL); +} + +void +timer_stop(timer_t *timer) +{ + + gettimeofday(&timer->tv1, NULL); +} + +uint64_t +timer_usec(const timer_t *timer) +{ + + return (((timer->tv1.tv_sec - timer->tv0.tv_sec) * 1000000) + + timer->tv1.tv_usec - timer->tv0.tv_usec); +} + +void +timer_ratio(timer_t *a, timer_t *b, char *buf, size_t buflen) +{ + uint64_t t0 = timer_usec(a); + uint64_t t1 = timer_usec(b); + uint64_t mult; + unsigned i = 0; + unsigned j; + int n; + + /* Whole. */ + n = malloc_snprintf(&buf[i], buflen-i, "%"PRIu64, t0 / t1); + i += n; + if (i >= buflen) + return; + mult = 1; + for (j = 0; j < n; j++) + mult *= 10; + + /* Decimal. */ + n = malloc_snprintf(&buf[i], buflen-i, "."); + i += n; + + /* Fraction. */ + while (i < buflen-1) { + uint64_t round = (i+1 == buflen-1 && ((t0 * mult * 10 / t1) % 10 + >= 5)) ? 1 : 0; + n = malloc_snprintf(&buf[i], buflen-i, + "%"PRIu64, (t0 * mult / t1) % 10 + round); + i += n; + mult *= 10; + } +} From 423d78a21bc6c9a038bdf436ad2cee194560d488 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sun, 7 Sep 2014 19:58:04 -0700 Subject: [PATCH 0508/3378] Add microbench tests. --- Makefile.in | 2 +- test/stress/microbench.c | 142 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 143 insertions(+), 1 deletion(-) create mode 100644 test/stress/microbench.c diff --git a/Makefile.in b/Makefile.in index d3e91b56..1446dbe6 100644 --- a/Makefile.in +++ b/Makefile.in @@ -144,7 +144,7 @@ TESTS_INTEGRATION := $(srcroot)test/integration/aligned_alloc.c \ $(srcroot)test/integration/thread_tcache_enabled.c \ $(srcroot)test/integration/xallocx.c \ $(srcroot)test/integration/chunk.c -TESTS_STRESS := +TESTS_STRESS := $(srcroot)test/stress/microbench.c TESTS := $(TESTS_UNIT) $(TESTS_INTEGRATION) $(TESTS_STRESS) C_OBJS := $(C_SRCS:$(srcroot)%.c=$(objroot)%.$(O)) diff --git a/test/stress/microbench.c b/test/stress/microbench.c new file mode 100644 index 00000000..8c252153 --- /dev/null +++ b/test/stress/microbench.c @@ -0,0 +1,142 @@ +#include "test/jemalloc_test.h" + +JEMALLOC_INLINE_C void +time_func(timer_t *timer, uint64_t nwarmup, uint64_t niter, void (*func)(void)) +{ + uint64_t i; + + for (i = 0; i < nwarmup; i++) + func(); + timer_start(timer); + for (i = 0; i < niter; i++) + func(); + timer_stop(timer); +} + +void +compare_funcs(uint64_t nwarmup, uint64_t niter, const char *name_a, + void (*func_a), const char *name_b, void (*func_b)) +{ + timer_t timer_a, timer_b; + char ratio_buf[6]; + + time_func(&timer_a, nwarmup, niter, func_a); + time_func(&timer_b, nwarmup, niter, func_b); + + timer_ratio(&timer_a, &timer_b, ratio_buf, sizeof(ratio_buf)); + malloc_printf("%"PRIu64" iterations, %s=%"PRIu64"us, " + "%s=%"PRIu64"us, ratio=1:%s\n", + niter, name_a, timer_usec(&timer_a), name_b, timer_usec(&timer_b), + ratio_buf); +} + +static void +malloc_vs_mallocx_malloc(void) +{ + + free(malloc(1)); +} + +static void +malloc_vs_mallocx_mallocx(void) +{ + + free(mallocx(1, 0)); +} + +TEST_BEGIN(test_malloc_vs_mallocx) +{ + + compare_funcs(10*1000*1000, 100*1000*1000, "malloc", + malloc_vs_mallocx_malloc, "mallocx", malloc_vs_mallocx_mallocx); +} +TEST_END + +static void +free_vs_dallocx_free(void) +{ + + free(malloc(1)); +} + +static void +free_vs_dallocx_dallocx(void) +{ + + dallocx(malloc(1), 0); +} + +TEST_BEGIN(test_free_vs_dallocx) +{ + + compare_funcs(10*1000*1000, 100*1000*1000, "free", free_vs_dallocx_free, + "dallocx", free_vs_dallocx_dallocx); +} +TEST_END + +static void +mus_vs_sallocx_mus(void) +{ + void *p; + + p = malloc(1); + malloc_usable_size(p); + free(p); +} + +static void +mus_vs_sallocx_sallocx(void) +{ + void *p; + + p = malloc(1); + sallocx(p, 0); + free(p); +} + +TEST_BEGIN(test_mus_vs_sallocx) +{ + + compare_funcs(10*1000*1000, 100*1000*1000, "malloc_usable_size", + mus_vs_sallocx_mus, "sallocx", mus_vs_sallocx_sallocx); +} +TEST_END + +static void +sallocx_vs_nallocx_sallocx(void) +{ + void *p; + + p = malloc(1); + sallocx(p, 0); + free(p); +} + +static void +sallocx_vs_nallocx_nallocx(void) +{ + void *p; + + p = malloc(1); + nallocx(1, 0); + free(p); +} + +TEST_BEGIN(test_sallocx_vs_nallocx) +{ + + compare_funcs(10*1000*1000, 100*1000*1000, "sallocx", + sallocx_vs_nallocx_sallocx, "nallocx", sallocx_vs_nallocx_nallocx); +} +TEST_END + +int +main(void) +{ + + return (test( + test_malloc_vs_mallocx, + test_free_vs_dallocx, + test_mus_vs_sallocx, + test_sallocx_vs_nallocx)); +} From c3bfe9569a9927dc881b6d8ac025c423d66a541f Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Mon, 8 Sep 2014 00:46:12 -0400 Subject: [PATCH 0509/3378] avoid conflict with the POSIX timer_t type It hits a compilation error with glibc 2.19 without a rename. --- test/include/test/timer.h | 10 +++++----- test/src/timer.c | 8 ++++---- test/stress/microbench.c | 4 ++-- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/test/include/test/timer.h b/test/include/test/timer.h index f21ccf10..6877e4ac 100644 --- a/test/include/test/timer.h +++ b/test/include/test/timer.h @@ -7,9 +7,9 @@ typedef struct { struct timeval tv0; struct timeval tv1; -} timer_t; +} timedelta_t; -void timer_start(timer_t *timer); -void timer_stop(timer_t *timer); -uint64_t timer_usec(const timer_t *timer); -void timer_ratio(timer_t *a, timer_t *b, char *buf, size_t buflen); +void timer_start(timedelta_t *timer); +void timer_stop(timedelta_t *timer); +uint64_t timer_usec(const timedelta_t *timer); +void timer_ratio(timedelta_t *a, timedelta_t *b, char *buf, size_t buflen); diff --git a/test/src/timer.c b/test/src/timer.c index 17ead172..36fbedd4 100644 --- a/test/src/timer.c +++ b/test/src/timer.c @@ -1,21 +1,21 @@ #include "test/jemalloc_test.h" void -timer_start(timer_t *timer) +timer_start(timedelta_t *timer) { gettimeofday(&timer->tv0, NULL); } void -timer_stop(timer_t *timer) +timer_stop(timedelta_t *timer) { gettimeofday(&timer->tv1, NULL); } uint64_t -timer_usec(const timer_t *timer) +timer_usec(const timedelta_t *timer) { return (((timer->tv1.tv_sec - timer->tv0.tv_sec) * 1000000) + @@ -23,7 +23,7 @@ timer_usec(const timer_t *timer) } void -timer_ratio(timer_t *a, timer_t *b, char *buf, size_t buflen) +timer_ratio(timedelta_t *a, timedelta_t *b, char *buf, size_t buflen) { uint64_t t0 = timer_usec(a); uint64_t t1 = timer_usec(b); diff --git a/test/stress/microbench.c b/test/stress/microbench.c index 8c252153..616f361c 100644 --- a/test/stress/microbench.c +++ b/test/stress/microbench.c @@ -1,7 +1,7 @@ #include "test/jemalloc_test.h" JEMALLOC_INLINE_C void -time_func(timer_t *timer, uint64_t nwarmup, uint64_t niter, void (*func)(void)) +time_func(timedelta_t *timer, uint64_t nwarmup, uint64_t niter, void (*func)(void)) { uint64_t i; @@ -17,7 +17,7 @@ void compare_funcs(uint64_t nwarmup, uint64_t niter, const char *name_a, void (*func_a), const char *name_b, void (*func_b)) { - timer_t timer_a, timer_b; + timedelta_t timer_a, timer_b; char ratio_buf[6]; time_func(&timer_a, nwarmup, niter, func_a); From a1f3929ffd1bd958734a2747cf2000a9b2a5db0b Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 8 Sep 2014 16:23:48 -0700 Subject: [PATCH 0510/3378] Thwart optimization of free(malloc(1)) in microbench. --- test/stress/microbench.c | 44 +++++++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/test/stress/microbench.c b/test/stress/microbench.c index 616f361c..8e1017cc 100644 --- a/test/stress/microbench.c +++ b/test/stress/microbench.c @@ -31,46 +31,52 @@ compare_funcs(uint64_t nwarmup, uint64_t niter, const char *name_a, } static void -malloc_vs_mallocx_malloc(void) +malloc_free(void) { - - free(malloc(1)); + /* The compiler can optimize away free(malloc(1))! */ + void *p = malloc(1); + if (p == NULL) { + test_fail("Unexpected malloc() failure"); + return; + } + free(p); } static void -malloc_vs_mallocx_mallocx(void) +mallocx_free(void) { - - free(mallocx(1, 0)); + void *p = mallocx(1, 0); + if (p == NULL) { + test_fail("Unexpected mallocx() failure"); + return; + } + free(p); } TEST_BEGIN(test_malloc_vs_mallocx) { compare_funcs(10*1000*1000, 100*1000*1000, "malloc", - malloc_vs_mallocx_malloc, "mallocx", malloc_vs_mallocx_mallocx); + malloc_free, "mallocx", mallocx_free); } TEST_END static void -free_vs_dallocx_free(void) +malloc_dallocx(void) { - - free(malloc(1)); -} - -static void -free_vs_dallocx_dallocx(void) -{ - - dallocx(malloc(1), 0); + void *p = malloc(1); + if (p == NULL) { + test_fail("Unexpected malloc() failure"); + return; + } + dallocx(p, 0); } TEST_BEGIN(test_free_vs_dallocx) { - compare_funcs(10*1000*1000, 100*1000*1000, "free", free_vs_dallocx_free, - "dallocx", free_vs_dallocx_dallocx); + compare_funcs(10*1000*1000, 100*1000*1000, "free", malloc_free, + "dallocx", malloc_dallocx); } TEST_END From c3f865074923bf388742da3ec52dca857a0960a2 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 8 Sep 2014 16:47:51 -0700 Subject: [PATCH 0511/3378] Add relevant function attributes to [msn]allocx(). --- include/jemalloc/jemalloc_protos.h.in | 9 ++++++--- test/stress/microbench.c | 26 +++++++++----------------- 2 files changed, 15 insertions(+), 20 deletions(-) diff --git a/include/jemalloc/jemalloc_protos.h.in b/include/jemalloc/jemalloc_protos.h.in index 59aeee11..b365eb4a 100644 --- a/include/jemalloc/jemalloc_protos.h.in +++ b/include/jemalloc/jemalloc_protos.h.in @@ -17,13 +17,16 @@ JEMALLOC_EXPORT void *@je_@aligned_alloc(size_t alignment, size_t size) JEMALLOC_EXPORT void *@je_@realloc(void *ptr, size_t size); JEMALLOC_EXPORT void @je_@free(void *ptr); -JEMALLOC_EXPORT void *@je_@mallocx(size_t size, int flags); +JEMALLOC_EXPORT void *@je_@mallocx(size_t size, int flags) + JEMALLOC_ATTR(malloc); JEMALLOC_EXPORT void *@je_@rallocx(void *ptr, size_t size, int flags); JEMALLOC_EXPORT size_t @je_@xallocx(void *ptr, size_t size, size_t extra, int flags); -JEMALLOC_EXPORT size_t @je_@sallocx(const void *ptr, int flags); +JEMALLOC_EXPORT size_t @je_@sallocx(const void *ptr, int flags) + JEMALLOC_ATTR(pure); JEMALLOC_EXPORT void @je_@dallocx(void *ptr, int flags); -JEMALLOC_EXPORT size_t @je_@nallocx(size_t size, int flags); +JEMALLOC_EXPORT size_t @je_@nallocx(size_t size, int flags) + JEMALLOC_ATTR(pure); JEMALLOC_EXPORT int @je_@mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp, size_t newlen); diff --git a/test/stress/microbench.c b/test/stress/microbench.c index 8e1017cc..60c02db3 100644 --- a/test/stress/microbench.c +++ b/test/stress/microbench.c @@ -81,7 +81,7 @@ TEST_BEGIN(test_free_vs_dallocx) TEST_END static void -mus_vs_sallocx_mus(void) +malloc_mus_free(void) { void *p; @@ -91,12 +91,13 @@ mus_vs_sallocx_mus(void) } static void -mus_vs_sallocx_sallocx(void) +malloc_sallocx_free(void) { void *p; p = malloc(1); - sallocx(p, 0); + if (sallocx(p, 0) < 1) + test_fail("Unexpected sallocx() failure"); free(p); } @@ -104,27 +105,18 @@ TEST_BEGIN(test_mus_vs_sallocx) { compare_funcs(10*1000*1000, 100*1000*1000, "malloc_usable_size", - mus_vs_sallocx_mus, "sallocx", mus_vs_sallocx_sallocx); + malloc_mus_free, "sallocx", malloc_sallocx_free); } TEST_END static void -sallocx_vs_nallocx_sallocx(void) +malloc_nallocx_free(void) { void *p; p = malloc(1); - sallocx(p, 0); - free(p); -} - -static void -sallocx_vs_nallocx_nallocx(void) -{ - void *p; - - p = malloc(1); - nallocx(1, 0); + if (nallocx(1, 0) < 1) + test_fail("Unexpected nallocx() failure"); free(p); } @@ -132,7 +124,7 @@ TEST_BEGIN(test_sallocx_vs_nallocx) { compare_funcs(10*1000*1000, 100*1000*1000, "sallocx", - sallocx_vs_nallocx_sallocx, "nallocx", sallocx_vs_nallocx_nallocx); + malloc_sallocx_free, "nallocx", malloc_nallocx_free); } TEST_END From 4cfe55166e0173be745c53adb0fecf50d11d1227 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Thu, 28 Aug 2014 15:41:48 -0400 Subject: [PATCH 0512/3378] Add support for sized deallocation. This adds a new `sdallocx` function to the external API, allowing the size to be passed by the caller. It avoids some extra reads in the thread cache fast path. In the case where stats are enabled, this avoids the work of calculating the size from the pointer. An assertion validates the size that's passed in, so enabling debugging will allow users of the API to debug cases where an incorrect size is passed in. The performance win for a contrived microbenchmark doing an allocation and immediately freeing it is ~10%. It may have a different impact on a real workload. Closes #28 --- Makefile.in | 1 + configure.ac | 2 +- doc/jemalloc.xml.in | 19 ++++++- include/jemalloc/internal/arena.h | 33 ++++++++++- .../jemalloc/internal/jemalloc_internal.h.in | 26 +++++++++ include/jemalloc/internal/private_symbols.txt | 3 + include/jemalloc/jemalloc_protos.h.in | 1 + src/jemalloc.c | 44 ++++++++++++++ test/integration/sdallocx.c | 57 +++++++++++++++++++ test/stress/microbench.c | 20 +++++++ 10 files changed, 201 insertions(+), 5 deletions(-) create mode 100644 test/integration/sdallocx.c diff --git a/Makefile.in b/Makefile.in index 1446dbe6..ac56d8fa 100644 --- a/Makefile.in +++ b/Makefile.in @@ -136,6 +136,7 @@ TESTS_UNIT_AUX := $(srcroot)test/unit/prof_accum_a.c \ $(srcroot)test/unit/prof_accum_b.c TESTS_INTEGRATION := $(srcroot)test/integration/aligned_alloc.c \ $(srcroot)test/integration/allocated.c \ + $(srcroot)test/integration/sdallocx.c \ $(srcroot)test/integration/mallocx.c \ $(srcroot)test/integration/MALLOCX_ARENA.c \ $(srcroot)test/integration/posix_memalign.c \ diff --git a/configure.ac b/configure.ac index ce4af213..d221876c 100644 --- a/configure.ac +++ b/configure.ac @@ -452,7 +452,7 @@ AC_PROG_RANLIB AC_PATH_PROG([LD], [ld], [false], [$PATH]) AC_PATH_PROG([AUTOCONF], [autoconf], [false], [$PATH]) -public_syms="malloc_conf malloc_message malloc calloc posix_memalign aligned_alloc realloc free mallocx rallocx xallocx sallocx dallocx nallocx mallctl mallctlnametomib mallctlbymib malloc_stats_print malloc_usable_size" +public_syms="malloc_conf malloc_message malloc calloc posix_memalign aligned_alloc realloc free mallocx rallocx xallocx sallocx dallocx sdallocx nallocx mallctl mallctlnametomib mallctlbymib malloc_stats_print malloc_usable_size" dnl Check for allocator-related functions that should be wrapped. AC_CHECK_FUNC([memalign], diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 8f4327f3..e5c229fe 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -38,6 +38,7 @@ xallocx sallocx dallocx + sdallocx nallocx mallctl mallctlnametomib @@ -120,6 +121,12 @@ void *ptr int flags + + void sdallocx + void *ptr + size_t size + int flags + size_t nallocx size_t size @@ -228,7 +235,8 @@ rallocx, xallocx, sallocx, - dallocx, and + dallocx, + sdallocx, and nallocx functions all have a flags argument that can be used to specify options. The functions only check the options that are contextually @@ -312,6 +320,15 @@ memory referenced by ptr to be made available for future allocations. + The sdallocx function is an + extension of dallocx with a + size parameter to allow the caller to pass in the + allocation size as an optimization. The minimum valid input size is the + original requested size of the allocation, and the maximum valid input + size is the corresponding value returned by + nallocx or + sallocx. + The nallocx function allocates no memory, but it performs the same size computation as the mallocx function, and returns the real diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 166d0523..6ab0ae71 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -488,6 +488,7 @@ void arena_prof_tctx_set(const void *ptr, prof_tctx_t *tctx); void *arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache); size_t arena_salloc(const void *ptr, bool demote); void arena_dalloc(arena_chunk_t *chunk, void *ptr, bool try_tcache); +void arena_sdalloc(arena_chunk_t *chunk, void *ptr, size_t size, bool try_tcache); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) @@ -1139,9 +1140,7 @@ arena_dalloc(arena_chunk_t *chunk, void *ptr, bool try_tcache) if ((mapbits & CHUNK_MAP_LARGE) == 0) { /* Small allocation. */ if (try_tcache && (tcache = tcache_get(false)) != NULL) { - size_t binind; - - binind = arena_ptr_small_binind_get(ptr, mapbits); + size_t binind = arena_ptr_small_binind_get(ptr, mapbits); tcache_dalloc_small(tcache, ptr, binind); } else arena_dalloc_small(chunk->arena, chunk, ptr, pageind); @@ -1157,6 +1156,34 @@ arena_dalloc(arena_chunk_t *chunk, void *ptr, bool try_tcache) arena_dalloc_large(chunk->arena, chunk, ptr); } } + +JEMALLOC_ALWAYS_INLINE void +arena_sdalloc(arena_chunk_t *chunk, void *ptr, size_t size, bool try_tcache) +{ + tcache_t *tcache; + + assert(ptr != NULL); + assert(CHUNK_ADDR2BASE(ptr) != ptr); + + if (size < PAGE) { + /* Small allocation. */ + if (try_tcache && (tcache = tcache_get(false)) != NULL) { + size_t binind = small_size2bin(size); + tcache_dalloc_small(tcache, ptr, binind); + } else { + size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; + arena_dalloc_small(chunk->arena, chunk, ptr, pageind); + } + } else { + assert(((uintptr_t)ptr & PAGE_MASK) == 0); + + if (try_tcache && size <= tcache_maxclass && (tcache = + tcache_get(false)) != NULL) { + tcache_dalloc_large(tcache, ptr, size); + } else + arena_dalloc_large(chunk->arena, chunk, ptr); + } +} # endif /* JEMALLOC_ARENA_INLINE_C */ #endif diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 59ae8d55..c0e326d4 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -634,8 +634,10 @@ size_t ivsalloc(const void *ptr, bool demote); size_t u2rz(size_t usize); size_t p2rz(const void *ptr); void idalloct(void *ptr, bool try_tcache); +void isdalloct(void *ptr, size_t size, bool try_tcache); void idalloc(void *ptr); void iqalloc(void *ptr, bool try_tcache); +void isqalloc(void *ptr, size_t size, bool try_tcache); void *iralloct_realign(void *ptr, size_t oldsize, size_t size, size_t extra, size_t alignment, bool zero, bool try_tcache_alloc, bool try_tcache_dalloc, arena_t *arena); @@ -787,6 +789,20 @@ idalloct(void *ptr, bool try_tcache) huge_dalloc(ptr); } +JEMALLOC_ALWAYS_INLINE void +isdalloct(void *ptr, size_t size, bool try_tcache) +{ + arena_chunk_t *chunk; + + assert(ptr != NULL); + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + if (chunk != ptr) + arena_sdalloc(chunk, ptr, size, try_tcache); + else + huge_dalloc(ptr); +} + JEMALLOC_ALWAYS_INLINE void idalloc(void *ptr) { @@ -804,6 +820,16 @@ iqalloc(void *ptr, bool try_tcache) idalloct(ptr, try_tcache); } +JEMALLOC_ALWAYS_INLINE void +isqalloc(void *ptr, size_t size, bool try_tcache) +{ + + if (config_fill && opt_quarantine) + quarantine(ptr); + else + idalloct(ptr, try_tcache); +} + JEMALLOC_ALWAYS_INLINE void * iralloct_realign(void *ptr, size_t oldsize, size_t size, size_t extra, size_t alignment, bool zero, bool try_tcache_alloc, bool try_tcache_dalloc, diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index 84f05910..3b990b0e 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -61,6 +61,7 @@ arena_ralloc_no_move arena_redzone_corruption arena_run_regind arena_salloc +arena_sdalloc arena_stats_merge arena_tcache_fill_small arenas @@ -228,7 +229,9 @@ iralloc iralloct iralloct_realign isalloc +isdalloct isthreaded +isqalloc ivsalloc ixalloc jemalloc_postfork_child diff --git a/include/jemalloc/jemalloc_protos.h.in b/include/jemalloc/jemalloc_protos.h.in index b365eb4a..f81adc14 100644 --- a/include/jemalloc/jemalloc_protos.h.in +++ b/include/jemalloc/jemalloc_protos.h.in @@ -25,6 +25,7 @@ JEMALLOC_EXPORT size_t @je_@xallocx(void *ptr, size_t size, size_t extra, JEMALLOC_EXPORT size_t @je_@sallocx(const void *ptr, int flags) JEMALLOC_ATTR(pure); JEMALLOC_EXPORT void @je_@dallocx(void *ptr, int flags); +JEMALLOC_EXPORT void @je_@sdallocx(void *ptr, size_t size, int flags); JEMALLOC_EXPORT size_t @je_@nallocx(size_t size, int flags) JEMALLOC_ATTR(pure); diff --git a/src/jemalloc.c b/src/jemalloc.c index 71e921b5..527782e8 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1223,6 +1223,24 @@ ifree(void *ptr, bool try_tcache) JEMALLOC_VALGRIND_FREE(ptr, rzsize); } +JEMALLOC_INLINE_C void +isfree(void *ptr, size_t usize, bool try_tcache) +{ + UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0); + + assert(ptr != NULL); + assert(malloc_initialized || IS_INITIALIZER); + + if (config_prof && opt_prof) + prof_free(ptr, usize); + if (config_stats) + thread_allocated_tsd_get()->deallocated += usize; + if (config_valgrind && in_valgrind) + rzsize = p2rz(ptr); + isqalloc(ptr, usize, try_tcache); + JEMALLOC_VALGRIND_FREE(ptr, rzsize); +} + void * je_realloc(void *ptr, size_t size) { @@ -1820,6 +1838,32 @@ je_dallocx(void *ptr, int flags) ifree(ptr, try_tcache); } +void +je_sdallocx(void *ptr, size_t size, int flags) +{ + bool try_tcache; + + assert(ptr != NULL); + assert(malloc_initialized || IS_INITIALIZER); + assert(size == isalloc(ptr, config_prof)); + + if ((flags & MALLOCX_LG_ALIGN_MASK) == 0) + size = s2u(size); + else + size = sa2u(size, MALLOCX_ALIGN_GET_SPECIFIED(flags)); + + if ((flags & MALLOCX_ARENA_MASK) != 0) { + unsigned arena_ind = MALLOCX_ARENA_GET(flags); + arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + try_tcache = (chunk == ptr || chunk->arena != + arenas[arena_ind]); + } else + try_tcache = true; + + UTRACE(ptr, 0, 0); + isfree(ptr, size, try_tcache); +} + size_t je_nallocx(size_t size, int flags) { diff --git a/test/integration/sdallocx.c b/test/integration/sdallocx.c new file mode 100644 index 00000000..b84817d7 --- /dev/null +++ b/test/integration/sdallocx.c @@ -0,0 +1,57 @@ +#include "test/jemalloc_test.h" + +#define MAXALIGN (((size_t)1) << 25) +#define NITER 4 + +TEST_BEGIN(test_basic) +{ + void *ptr = mallocx(64, 0); + sdallocx(ptr, 64, 0); +} +TEST_END + +TEST_BEGIN(test_alignment_and_size) +{ + size_t nsz, sz, alignment, total; + unsigned i; + void *ps[NITER]; + + for (i = 0; i < NITER; i++) + ps[i] = NULL; + + for (alignment = 8; + alignment <= MAXALIGN; + alignment <<= 1) { + total = 0; + for (sz = 1; + sz < 3 * alignment && sz < (1U << 31); + sz += (alignment >> (LG_SIZEOF_PTR-1)) - 1) { + for (i = 0; i < NITER; i++) { + nsz = nallocx(sz, MALLOCX_ALIGN(alignment) | + MALLOCX_ZERO); + ps[i] = mallocx(sz, MALLOCX_ALIGN(alignment) | + MALLOCX_ZERO); + total += nsz; + if (total >= (MAXALIGN << 1)) + break; + } + for (i = 0; i < NITER; i++) { + if (ps[i] != NULL) { + sdallocx(ps[i], sz, + MALLOCX_ALIGN(alignment)); + ps[i] = NULL; + } + } + } + } +} +TEST_END + +int +main(void) +{ + + return (test( + test_basic, + test_alignment_and_size)); +} diff --git a/test/stress/microbench.c b/test/stress/microbench.c index 60c02db3..a8267c39 100644 --- a/test/stress/microbench.c +++ b/test/stress/microbench.c @@ -72,6 +72,17 @@ malloc_dallocx(void) dallocx(p, 0); } +static void +malloc_sdallocx(void) +{ + void *p = malloc(1); + if (p == NULL) { + test_fail("Unexpected malloc() failure"); + return; + } + sdallocx(p, 1, 0); +} + TEST_BEGIN(test_free_vs_dallocx) { @@ -80,6 +91,14 @@ TEST_BEGIN(test_free_vs_dallocx) } TEST_END +TEST_BEGIN(test_dallocx_vs_sdallocx) +{ + + compare_funcs(10*1000*1000, 100*1000*1000, "dallocx", malloc_dallocx, + "sdallocx", malloc_sdallocx); +} +TEST_END + static void malloc_mus_free(void) { @@ -135,6 +154,7 @@ main(void) return (test( test_malloc_vs_mallocx, test_free_vs_dallocx, + test_dallocx_vs_sdallocx, test_mus_vs_sallocx, test_sallocx_vs_nallocx)); } From a62812eacca8ac3ce81f27c9480b44b2a97ff66c Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Mon, 8 Sep 2014 21:43:21 -0400 Subject: [PATCH 0513/3378] fix isqalloct (should call isdalloct) --- include/jemalloc/internal/jemalloc_internal.h.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index c0e326d4..81d46fc3 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -827,7 +827,7 @@ isqalloc(void *ptr, size_t size, bool try_tcache) if (config_fill && opt_quarantine) quarantine(ptr); else - idalloct(ptr, try_tcache); + isdalloct(ptr, size, try_tcache); } JEMALLOC_ALWAYS_INLINE void * From d95e704feadd44cc6d9eb8695b9cff7ac6d4c88f Mon Sep 17 00:00:00 2001 From: Bert Maher Date: Fri, 5 Sep 2014 14:10:37 -0700 Subject: [PATCH 0514/3378] Support threaded heap profiles in pprof - Add a --thread N option to select profile for thread N (otherwise, all threads will be printed) - The $profile map now has a {threads} element that is a map from thread id to a profile that has the same format as the {profile} element - Refactor ReadHeapProfile into smaller components and use them to implement ReadThreadedHeapProfile --- bin/pprof | 405 +++++++++++++++++++++++++++++++++++------------------- 1 file changed, 265 insertions(+), 140 deletions(-) diff --git a/bin/pprof b/bin/pprof index 328138cd..52da6004 100755 --- a/bin/pprof +++ b/bin/pprof @@ -2,11 +2,11 @@ # Copyright (c) 1998-2007, Google Inc. # All rights reserved. -# +# # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: -# +# # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above @@ -16,7 +16,7 @@ # * Neither the name of Google Inc. nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. -# +# # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR @@ -223,6 +223,7 @@ Call-graph Options: --edgefraction= Hide edges below *total [default=.001] --maxdegree= Max incoming/outgoing edges per node [default=8] --focus= Focus on nodes matching + --thread= Show profile for thread --ignore= Ignore nodes matching --scale= Set GV scaling [default=0] --heapcheck Make nodes with non-0 object counts @@ -332,6 +333,7 @@ sub Init() { $main::opt_edgefraction = 0.001; $main::opt_maxdegree = 8; $main::opt_focus = ''; + $main::opt_thread = undef; $main::opt_ignore = ''; $main::opt_scale = 0; $main::opt_heapcheck = 0; @@ -402,6 +404,7 @@ sub Init() { "edgefraction=f" => \$main::opt_edgefraction, "maxdegree=i" => \$main::opt_maxdegree, "focus=s" => \$main::opt_focus, + "thread=i" => \$main::opt_thread, "ignore=s" => \$main::opt_ignore, "scale=i" => \$main::opt_scale, "heapcheck" => \$main::opt_heapcheck, @@ -562,6 +565,86 @@ sub Init() { } } +sub FilterAndPrint { + my ($profile, $symbols, $libs, $thread) = @_; + + # Get total data in profile + my $total = TotalProfile($profile); + + # Remove uniniteresting stack items + $profile = RemoveUninterestingFrames($symbols, $profile); + + # Focus? + if ($main::opt_focus ne '') { + $profile = FocusProfile($symbols, $profile, $main::opt_focus); + } + + # Ignore? + if ($main::opt_ignore ne '') { + $profile = IgnoreProfile($symbols, $profile, $main::opt_ignore); + } + + my $calls = ExtractCalls($symbols, $profile); + + # Reduce profiles to required output granularity, and also clean + # each stack trace so a given entry exists at most once. + my $reduced = ReduceProfile($symbols, $profile); + + # Get derived profiles + my $flat = FlatProfile($reduced); + my $cumulative = CumulativeProfile($reduced); + + # Print + if (!$main::opt_interactive) { + if ($main::opt_disasm) { + PrintDisassembly($libs, $flat, $cumulative, $main::opt_disasm); + } elsif ($main::opt_list) { + PrintListing($total, $libs, $flat, $cumulative, $main::opt_list, 0); + } elsif ($main::opt_text) { + # Make sure the output is empty when have nothing to report + # (only matters when --heapcheck is given but we must be + # compatible with old branches that did not pass --heapcheck always): + if ($total != 0) { + printf("Total%s: %s %s\n", + (defined($thread) ? " (t$thread)" : ""), + Unparse($total), Units()); + } + PrintText($symbols, $flat, $cumulative, -1); + } elsif ($main::opt_raw) { + PrintSymbolizedProfile($symbols, $profile, $main::prog); + } elsif ($main::opt_callgrind) { + PrintCallgrind($calls); + } else { + if (PrintDot($main::prog, $symbols, $profile, $flat, $cumulative, $total)) { + if ($main::opt_gv) { + RunGV(TempName($main::next_tmpfile, "ps"), ""); + } elsif ($main::opt_evince) { + RunEvince(TempName($main::next_tmpfile, "pdf"), ""); + } elsif ($main::opt_web) { + my $tmp = TempName($main::next_tmpfile, "svg"); + RunWeb($tmp); + # The command we run might hand the file name off + # to an already running browser instance and then exit. + # Normally, we'd remove $tmp on exit (right now), + # but fork a child to remove $tmp a little later, so that the + # browser has time to load it first. + delete $main::tempnames{$tmp}; + if (fork() == 0) { + sleep 5; + unlink($tmp); + exit(0); + } + } + } else { + cleanup(); + exit(1); + } + } + } else { + InteractiveMode($profile, $symbols, $libs, $total); + } +} + sub Main() { Init(); $main::collected_profile = undef; @@ -605,9 +688,6 @@ sub Main() { $symbol_map = MergeSymbols($symbol_map, $base->{symbols}); } - # Get total data in profile - my $total = TotalProfile($profile); - # Collect symbols my $symbols; if ($main::use_symbolized_profile) { @@ -622,75 +702,16 @@ sub Main() { $symbols = ExtractSymbols($libs, $pcs); } - # Remove uniniteresting stack items - $profile = RemoveUninterestingFrames($symbols, $profile); - - # Focus? - if ($main::opt_focus ne '') { - $profile = FocusProfile($symbols, $profile, $main::opt_focus); + if (!defined($main::opt_thread)) { + FilterAndPrint($profile, $symbols, $libs); } - - # Ignore? - if ($main::opt_ignore ne '') { - $profile = IgnoreProfile($symbols, $profile, $main::opt_ignore); - } - - my $calls = ExtractCalls($symbols, $profile); - - # Reduce profiles to required output granularity, and also clean - # each stack trace so a given entry exists at most once. - my $reduced = ReduceProfile($symbols, $profile); - - # Get derived profiles - my $flat = FlatProfile($reduced); - my $cumulative = CumulativeProfile($reduced); - - # Print - if (!$main::opt_interactive) { - if ($main::opt_disasm) { - PrintDisassembly($libs, $flat, $cumulative, $main::opt_disasm); - } elsif ($main::opt_list) { - PrintListing($total, $libs, $flat, $cumulative, $main::opt_list, 0); - } elsif ($main::opt_text) { - # Make sure the output is empty when have nothing to report - # (only matters when --heapcheck is given but we must be - # compatible with old branches that did not pass --heapcheck always): - if ($total != 0) { - printf("Total: %s %s\n", Unparse($total), Units()); - } - PrintText($symbols, $flat, $cumulative, -1); - } elsif ($main::opt_raw) { - PrintSymbolizedProfile($symbols, $profile, $main::prog); - } elsif ($main::opt_callgrind) { - PrintCallgrind($calls); - } else { - if (PrintDot($main::prog, $symbols, $profile, $flat, $cumulative, $total)) { - if ($main::opt_gv) { - RunGV(TempName($main::next_tmpfile, "ps"), ""); - } elsif ($main::opt_evince) { - RunEvince(TempName($main::next_tmpfile, "pdf"), ""); - } elsif ($main::opt_web) { - my $tmp = TempName($main::next_tmpfile, "svg"); - RunWeb($tmp); - # The command we run might hand the file name off - # to an already running browser instance and then exit. - # Normally, we'd remove $tmp on exit (right now), - # but fork a child to remove $tmp a little later, so that the - # browser has time to load it first. - delete $main::tempnames{$tmp}; - if (fork() == 0) { - sleep 5; - unlink($tmp); - exit(0); - } - } - } else { - cleanup(); - exit(1); + if (defined($data->{threads})) { + foreach my $thread (sort { $a <=> $b } keys(%{$data->{threads}})) { + if (!defined($main::opt_thread) || $main::opt_thread == $thread) { + my $thread_profile = $data->{threads}{$thread}; + FilterAndPrint($thread_profile, $symbols, $libs, $thread); } } - } else { - InteractiveMode($profile, $symbols, $libs, $total); } cleanup(); @@ -1683,23 +1704,23 @@ sub PrintSource { HtmlPrintNumber($c2), UnparseAddress($offset, $e->[0]), CleanDisassembly($e->[3])); - + # Append the most specific source line associated with this instruction if (length($dis) < 80) { $dis .= (' ' x (80 - length($dis))) }; $dis = HtmlEscape($dis); my $f = $e->[5]; my $l = $e->[6]; if ($f ne $last_dis_filename) { - $dis .= sprintf("%s:%d", + $dis .= sprintf("%s:%d", HtmlEscape(CleanFileName($f)), $l); } elsif ($l ne $last_dis_linenum) { # De-emphasize the unchanged file name portion $dis .= sprintf("%s" . - ":%d", + ":%d", HtmlEscape(CleanFileName($f)), $l); } else { # De-emphasize the entire location - $dis .= sprintf("%s:%d", + $dis .= sprintf("%s:%d", HtmlEscape(CleanFileName($f)), $l); } $last_dis_filename = $f; @@ -1788,8 +1809,8 @@ sub PrintSource { if (defined($dis) && $dis ne '') { $asm = "" . $dis . ""; } - my $source_class = (($n1 + $n2 > 0) - ? "livesrc" + my $source_class = (($n1 + $n2 > 0) + ? "livesrc" : (($asm ne "") ? "deadsrc" : "nop")); printf $output ( "%5d " . @@ -3689,6 +3710,7 @@ sub IsSymbolizedProfileFile { # $result->{version} Version number of profile file # $result->{period} Sampling period (in microseconds) # $result->{profile} Profile object +# $result->{threads} Map of thread IDs to profile objects # $result->{map} Memory map info from profile # $result->{pcs} Hash of all PC values seen, key is hex address sub ReadProfile { @@ -3737,6 +3759,9 @@ sub ReadProfile { } elsif ($header =~ m/^heap profile:/) { $main::profile_type = 'heap'; $result = ReadHeapProfile($prog, *PROFILE, $header); + } elsif ($header =~ m/^heap/) { + $main::profile_type = 'heap'; + $result = ReadThreadedHeapProfile($prog, $fname, $header); } elsif ($header =~ m/^--- *$contention_marker/o) { $main::profile_type = 'contention'; $result = ReadSynchProfile($prog, *PROFILE); @@ -3879,11 +3904,7 @@ sub ReadCPUProfile { return $r; } -sub ReadHeapProfile { - my $prog = shift; - local *PROFILE = shift; - my $header = shift; - +sub HeapProfileIndex { my $index = 1; if ($main::opt_inuse_space) { $index = 1; @@ -3894,6 +3915,84 @@ sub ReadHeapProfile { } elsif ($main::opt_alloc_objects) { $index = 2; } + return $index; +} + +sub ReadMappedLibraries { + my $fh = shift; + my $map = ""; + # Read the /proc/self/maps data + while (<$fh>) { + s/\r//g; # turn windows-looking lines into unix-looking lines + $map .= $_; + } + return $map; +} + +sub ReadMemoryMap { + my $fh = shift; + my $map = ""; + # Read /proc/self/maps data as formatted by DumpAddressMap() + my $buildvar = ""; + while () { + s/\r//g; # turn windows-looking lines into unix-looking lines + # Parse "build=" specification if supplied + if (m/^\s*build=(.*)\n/) { + $buildvar = $1; + } + + # Expand "$build" variable if available + $_ =~ s/\$build\b/$buildvar/g; + + $map .= $_; + } + return $map; +} + +sub AdjustSamples { + my ($sample_adjustment, $sampling_algorithm, $n1, $s1, $n2, $s2) = @_; + if ($sample_adjustment) { + if ($sampling_algorithm == 2) { + # Remote-heap version 2 + # The sampling frequency is the rate of a Poisson process. + # This means that the probability of sampling an allocation of + # size X with sampling rate Y is 1 - exp(-X/Y) + if ($n1 != 0) { + my $ratio = (($s1*1.0)/$n1)/($sample_adjustment); + my $scale_factor = 1/(1 - exp(-$ratio)); + $n1 *= $scale_factor; + $s1 *= $scale_factor; + } + if ($n2 != 0) { + my $ratio = (($s2*1.0)/$n2)/($sample_adjustment); + my $scale_factor = 1/(1 - exp(-$ratio)); + $n2 *= $scale_factor; + $s2 *= $scale_factor; + } + } else { + # Remote-heap version 1 + my $ratio; + $ratio = (($s1*1.0)/$n1)/($sample_adjustment); + if ($ratio < 1) { + $n1 /= $ratio; + $s1 /= $ratio; + } + $ratio = (($s2*1.0)/$n2)/($sample_adjustment); + if ($ratio < 1) { + $n2 /= $ratio; + $s2 /= $ratio; + } + } + } + return ($n1, $s1, $n2, $s2); +} + +sub ReadHeapProfile { + my $prog = shift; + local *PROFILE = shift; + my $header = shift; + + my $index = HeapProfileIndex(); # Find the type of this profile. The header line looks like: # heap profile: 1246: 8800744 [ 1246: 8800744] @ /266053 @@ -3983,29 +4082,12 @@ sub ReadHeapProfile { while () { s/\r//g; # turn windows-looking lines into unix-looking lines if (/^MAPPED_LIBRARIES:/) { - # Read the /proc/self/maps data - while () { - s/\r//g; # turn windows-looking lines into unix-looking lines - $map .= $_; - } + $map .= ReadMappedLibraries(*PROFILE); last; } if (/^--- Memory map:/) { - # Read /proc/self/maps data as formatted by DumpAddressMap() - my $buildvar = ""; - while () { - s/\r//g; # turn windows-looking lines into unix-looking lines - # Parse "build=" specification if supplied - if (m/^\s*build=(.*)\n/) { - $buildvar = $1; - } - - # Expand "$build" variable if available - $_ =~ s/\$build\b/$buildvar/g; - - $map .= $_; - } + $map .= ReadMemoryMap(*PROFILE); last; } @@ -4016,42 +4098,8 @@ sub ReadHeapProfile { if (m/^\s*(\d+):\s+(\d+)\s+\[\s*(\d+):\s+(\d+)\]\s+@\s+(.*)$/) { my $stack = $5; my ($n1, $s1, $n2, $s2) = ($1, $2, $3, $4); - - if ($sample_adjustment) { - if ($sampling_algorithm == 2) { - # Remote-heap version 2 - # The sampling frequency is the rate of a Poisson process. - # This means that the probability of sampling an allocation of - # size X with sampling rate Y is 1 - exp(-X/Y) - if ($n1 != 0) { - my $ratio = (($s1*1.0)/$n1)/($sample_adjustment); - my $scale_factor = 1/(1 - exp(-$ratio)); - $n1 *= $scale_factor; - $s1 *= $scale_factor; - } - if ($n2 != 0) { - my $ratio = (($s2*1.0)/$n2)/($sample_adjustment); - my $scale_factor = 1/(1 - exp(-$ratio)); - $n2 *= $scale_factor; - $s2 *= $scale_factor; - } - } else { - # Remote-heap version 1 - my $ratio; - $ratio = (($s1*1.0)/$n1)/($sample_adjustment); - if ($ratio < 1) { - $n1 /= $ratio; - $s1 /= $ratio; - } - $ratio = (($s2*1.0)/$n2)/($sample_adjustment); - if ($ratio < 1) { - $n2 /= $ratio; - $s2 /= $ratio; - } - } - } - - my @counts = ($n1, $s1, $n2, $s2); + my @counts = AdjustSamples($sample_adjustment, $sampling_algorithm, + $n1, $s1, $n2, $s2); AddEntries($profile, $pcs, FixCallerAddresses($stack), $counts[$index]); } } @@ -4065,6 +4113,83 @@ sub ReadHeapProfile { return $r; } +sub ReadThreadedHeapProfile { + my ($prog, $fname, $header) = @_; + + my $index = HeapProfileIndex(); + my $sampling_algorithm = 0; + my $sample_adjustment = 0; + chomp($header); + my $type = "unknown"; + # Assuming a very specific type of header for now. + if ($header =~ m"^heap_v2/(\d+)") { + $type = "_v2"; + $sampling_algorithm = 2; + $sample_adjustment = int($1); + } + if ($type ne "_v2" || !defined($sample_adjustment)) { + die "Threaded heap profiles require v2 sampling with a sample rate\n"; + } + + my $profile = {}; + my $thread_profiles = {}; + my $pcs = {}; + my $map = ""; + my $stack = ""; + + while () { + s/\r//g; + if (/^MAPPED_LIBRARIES:/) { + $map .= ReadMappedLibraries(*PROFILE); + last; + } + + if (/^--- Memory map:/) { + $map .= ReadMemoryMap(*PROFILE); + last; + } + + # Read entry of the form: + # @ a1 a2 ... an + # t*: : [: ] + # t1: : [: ] + # ... + # tn: : [: ] + s/^\s*//; + s/\s*$//; + if (m/^@\s+(.*)$/) { + $stack = $1; + } elsif (m/^\s*(t(\*|\d+)):\s+(\d+):\s+(\d+)\s+\[\s*(\d+):\s+(\d+)\]$/) { + if ($stack eq "") { + # Still in the header, so this is just a per-thread summary. + next; + } + my $thread = $2; + my ($n1, $s1, $n2, $s2) = ($3, $4, $5, $6); + my @counts = AdjustSamples($sample_adjustment, $sampling_algorithm, + $n1, $s2, $n2, $s2); + if ($thread eq "*") { + AddEntries($profile, $pcs, FixCallerAddresses($stack), $counts[$index]); + } else { + if (!exists($thread_profiles->{$thread})) { + $thread_profiles->{$thread} = {}; + } + AddEntries($thread_profiles->{$thread}, $pcs, + FixCallerAddresses($stack), $counts[$index]); + } + } + } + + my $r = {}; + $r->{version} = "heap"; + $r->{period} = 1; + $r->{profile} = $profile; + $r->{threads} = $thread_profiles; + $r->{libs} = ParseLibraries($prog, $map, $pcs); + $r->{pcs} = $pcs; + return $r; +} + sub ReadSynchProfile { my $prog = shift; local *PROFILE = shift; @@ -4756,7 +4881,7 @@ sub MapToSymbols { } } } - + # Prepend to accumulated symbols for pcstr # (so that caller comes before callee) my $sym = $symbols->{$pcstr}; @@ -4950,7 +5075,7 @@ sub ConfigureTool { my $dirname = $`; # this is everything up to and including the last slash if (-x "$dirname$tool") { $path = "$dirname$tool"; - } else { + } else { $path = $tool; } } From a2260c95cd717c06c28b61d40b2157254d594219 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 9 Sep 2014 10:29:26 -0700 Subject: [PATCH 0515/3378] Fix sdallocx() assertion. Refactor sdallocx() and nallocx() to share inallocx(), and fix an sdallocx() assertion to check usize rather than size. --- src/jemalloc.c | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/src/jemalloc.c b/src/jemalloc.c index 527782e8..3f29a857 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1838,19 +1838,29 @@ je_dallocx(void *ptr, int flags) ifree(ptr, try_tcache); } +JEMALLOC_ALWAYS_INLINE_C size_t +inallocx(size_t size, int flags) +{ + size_t usize; + + if ((flags & MALLOCX_LG_ALIGN_MASK) == 0) + usize = s2u(size); + else + usize = sa2u(size, MALLOCX_ALIGN_GET_SPECIFIED(flags)); + assert(usize != 0); + return (usize); +} + void je_sdallocx(void *ptr, size_t size, int flags) { bool try_tcache; + size_t usize; assert(ptr != NULL); assert(malloc_initialized || IS_INITIALIZER); - assert(size == isalloc(ptr, config_prof)); - - if ((flags & MALLOCX_LG_ALIGN_MASK) == 0) - size = s2u(size); - else - size = sa2u(size, MALLOCX_ALIGN_GET_SPECIFIED(flags)); + usize = inallocx(size, flags); + assert(usize == isalloc(ptr, config_prof)); if ((flags & MALLOCX_ARENA_MASK) != 0) { unsigned arena_ind = MALLOCX_ARENA_GET(flags); @@ -1861,27 +1871,19 @@ je_sdallocx(void *ptr, size_t size, int flags) try_tcache = true; UTRACE(ptr, 0, 0); - isfree(ptr, size, try_tcache); + isfree(ptr, usize, try_tcache); } size_t je_nallocx(size_t size, int flags) { - size_t usize; assert(size != 0); if (malloc_init()) return (0); - if ((flags & MALLOCX_LG_ALIGN_MASK) == 0) - usize = s2u(size); - else { - size_t alignment = MALLOCX_ALIGN_GET_SPECIFIED(flags); - usize = sa2u(size, alignment); - } - assert(usize != 0); - return (usize); + return (inallocx(size, flags)); } int From 7c17e1670d7294db4b3c483ad7173dd056b42268 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 9 Sep 2014 15:27:52 -0700 Subject: [PATCH 0516/3378] Fix threaded heap profile bug in pprof. Fix ReadThreadedHeapProfile to pass the correct parameters to AdjustSamples. --- bin/pprof | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/pprof b/bin/pprof index 52da6004..87313f43 100755 --- a/bin/pprof +++ b/bin/pprof @@ -4167,7 +4167,7 @@ sub ReadThreadedHeapProfile { my $thread = $2; my ($n1, $s1, $n2, $s2) = ($3, $4, $5, $6); my @counts = AdjustSamples($sample_adjustment, $sampling_algorithm, - $n1, $s2, $n2, $s2); + $n1, $s1, $n2, $s2); if ($thread eq "*") { AddEntries($profile, $pcs, FixCallerAddresses($stack), $counts[$index]); } else { From 6fd53da030b5e9161a49d6010a8b38499ca2a124 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 9 Sep 2014 12:45:53 -0700 Subject: [PATCH 0517/3378] Fix prof_tdata_get()-related regressions. Fix prof_tdata_get() to avoid dereferencing an invalid tdata pointer (when it's PROF_TDATA_STATE_{REINCARNATED,PURGATORY}). Fix prof_tdata_get() callers to check for invalid results besides NULL (PROF_TDATA_STATE_{REINCARNATED,PURGATORY}). These regressions were caused by 602c8e0971160e4b85b08b16cf8a2375aa24bc04 (Implement per thread heap profiling.), which did not make it into any releases prior to these fixes. --- include/jemalloc/internal/prof.h | 11 ++++---- src/prof.c | 45 ++++++++++++++------------------ 2 files changed, 26 insertions(+), 30 deletions(-) diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index 104bfade..a9903280 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -308,12 +308,13 @@ prof_tdata_get(bool create) tdata = *prof_tdata_tsd_get(); if (create) { - if (tdata == NULL) - tdata = prof_tdata_init(); - else if (tdata->state == prof_tdata_state_expired) + if ((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) { + if (tdata == NULL) + tdata = prof_tdata_init(); + } else if (tdata->state == prof_tdata_state_expired) tdata = prof_tdata_reinit(tdata); - assert(tdata == NULL || tdata->state == - prof_tdata_state_attached); + assert((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX || + tdata->state == prof_tdata_state_attached); } return (tdata); diff --git a/src/prof.c b/src/prof.c index 044acd8b..941e53be 100644 --- a/src/prof.c +++ b/src/prof.c @@ -487,9 +487,8 @@ prof_gctx_create(prof_bt_t *bt) } static void -prof_gctx_maybe_destroy(prof_gctx_t *gctx) +prof_gctx_maybe_destroy(prof_gctx_t *gctx, prof_tdata_t *tdata) { - prof_tdata_t *tdata; cassert(config_prof); @@ -500,8 +499,6 @@ prof_gctx_maybe_destroy(prof_gctx_t *gctx) * avoid a race between the main body of prof_tctx_destroy() and entry * into this function. */ - tdata = prof_tdata_get(false); - assert((uintptr_t)tdata > (uintptr_t)PROF_TDATA_STATE_MAX); prof_enter(tdata); malloc_mutex_lock(gctx->lock); if (tctx_tree_empty(&gctx->tctxs) && gctx->nlimbo == 1) { @@ -552,8 +549,9 @@ prof_gctx_should_destroy(prof_gctx_t *gctx) static void prof_tctx_destroy(prof_tctx_t *tctx) { + prof_tdata_t *tdata = tctx->tdata; prof_gctx_t *gctx = tctx->gctx; - bool destroy_gctx; + bool destroy_tdata, destroy_gctx; assert(tctx->cnts.curobjs == 0); assert(tctx->cnts.curbytes == 0); @@ -561,16 +559,9 @@ prof_tctx_destroy(prof_tctx_t *tctx) assert(tctx->cnts.accumobjs == 0); assert(tctx->cnts.accumbytes == 0); - { - prof_tdata_t *tdata = tctx->tdata; - bool tdata_destroy; - - ckh_remove(&tdata->bt2tctx, &gctx->bt, NULL, NULL); - tdata_destroy = prof_tdata_should_destroy(tdata); - malloc_mutex_unlock(tdata->lock); - if (tdata_destroy) - prof_tdata_destroy(tdata); - } + ckh_remove(&tdata->bt2tctx, &gctx->bt, NULL, NULL); + destroy_tdata = prof_tdata_should_destroy(tdata); + malloc_mutex_unlock(tdata->lock); malloc_mutex_lock(gctx->lock); tctx_tree_remove(&gctx->tctxs, tctx); @@ -594,7 +585,10 @@ prof_tctx_destroy(prof_tctx_t *tctx) destroy_gctx = false; malloc_mutex_unlock(gctx->lock); if (destroy_gctx) - prof_gctx_maybe_destroy(gctx); + prof_gctx_maybe_destroy(gctx, tdata); + + if (destroy_tdata) + prof_tdata_destroy(tdata); idalloc(tctx); } @@ -683,7 +677,7 @@ prof_lookup(prof_bt_t *bt) ret.v = imalloc(sizeof(prof_tctx_t)); if (ret.p == NULL) { if (new_gctx) - prof_gctx_maybe_destroy(gctx); + prof_gctx_maybe_destroy(gctx, tdata); return (NULL); } ret.p->tdata = tdata; @@ -695,7 +689,7 @@ prof_lookup(prof_bt_t *bt) malloc_mutex_unlock(tdata->lock); if (error) { if (new_gctx) - prof_gctx_maybe_destroy(gctx); + prof_gctx_maybe_destroy(gctx, tdata); idalloc(ret.v); return (NULL); } @@ -1019,6 +1013,7 @@ prof_gctx_merge_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *arg) static prof_gctx_t * prof_gctx_finish_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *arg) { + prof_tdata_t *tdata = (prof_tdata_t *)arg; prof_tctx_t *next; bool destroy_gctx; @@ -1032,7 +1027,7 @@ prof_gctx_finish_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *arg) destroy_gctx = prof_gctx_should_destroy(gctx); malloc_mutex_unlock(gctx->lock); if (destroy_gctx) - prof_gctx_maybe_destroy(gctx); + prof_gctx_maybe_destroy(gctx, tdata); return (NULL); } @@ -1310,7 +1305,7 @@ prof_dump(bool propagate_err, const char *filename, bool leakcheck) if (prof_dump_close(propagate_err)) goto label_open_close_error; - gctx_tree_iter(&gctxs, NULL, prof_gctx_finish_iter, NULL); + gctx_tree_iter(&gctxs, NULL, prof_gctx_finish_iter, tdata); malloc_mutex_unlock(&prof_dump_mtx); if (leakcheck) @@ -1320,7 +1315,7 @@ prof_dump(bool propagate_err, const char *filename, bool leakcheck) label_write_error: prof_dump_close(propagate_err); label_open_close_error: - gctx_tree_iter(&gctxs, NULL, prof_gctx_finish_iter, NULL); + gctx_tree_iter(&gctxs, NULL, prof_gctx_finish_iter, tdata); malloc_mutex_unlock(&prof_dump_mtx); return (true); } @@ -1643,7 +1638,7 @@ const char * prof_thread_name_get(void) { prof_tdata_t *tdata = prof_tdata_get(true); - if (tdata == NULL) + if ((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) return (NULL); return (tdata->thread_name); } @@ -1656,7 +1651,7 @@ prof_thread_name_set(const char *thread_name) char *s; tdata = prof_tdata_get(true); - if (tdata == NULL) + if ((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) return (true); size = strlen(thread_name) + 1; @@ -1675,7 +1670,7 @@ bool prof_thread_active_get(void) { prof_tdata_t *tdata = prof_tdata_get(true); - if (tdata == NULL) + if ((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) return (false); return (tdata->active); } @@ -1686,7 +1681,7 @@ prof_thread_active_set(bool active) prof_tdata_t *tdata; tdata = prof_tdata_get(true); - if (tdata == NULL) + if ((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) return (true); tdata->active = active; return (false); From 6e73dc194ee9682d3eacaf725a989f04629718f7 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 9 Sep 2014 19:37:26 -0700 Subject: [PATCH 0518/3378] Fix a profile sampling race. Fix a profile sampling race that was due to preparing to sample, yet doing nothing to assure that the context remains valid until the stats are updated. These regressions were caused by 602c8e0971160e4b85b08b16cf8a2375aa24bc04 (Implement per thread heap profiling.), which did not make it into any releases prior to these fixes. --- include/jemalloc/internal/private_symbols.txt | 1 + include/jemalloc/internal/prof.h | 37 +++--- src/jemalloc.c | 109 +++++++++--------- src/prof.c | 35 ++++++ 4 files changed, 109 insertions(+), 73 deletions(-) diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index 3b990b0e..b8990177 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -292,6 +292,7 @@ p2rz pages_purge pow2_ceil prof_alloc_prep +prof_alloc_rollback prof_backtrace prof_boot0 prof_boot1 diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index a9903280..920ec63f 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -97,6 +97,12 @@ struct prof_tctx_s { /* Linkage into gctx's tctxs. */ rb_node(prof_tctx_t) tctx_link; + /* + * True during prof_alloc_prep()..prof_malloc_sample_object(), prevents + * sample vs destroy race. + */ + bool prepared; + /* Current dump-related state, protected by gctx->lock. */ prof_tctx_state_t state; @@ -242,6 +248,7 @@ extern uint64_t prof_interval; */ extern size_t lg_prof_sample; +void prof_alloc_rollback(prof_tctx_t *tctx, bool updated); void prof_malloc_sample_object(const void *ptr, size_t usize, prof_tctx_t *tctx); void prof_free_sampled_object(size_t usize, prof_tctx_t *tctx); @@ -282,14 +289,14 @@ malloc_tsd_protos(JEMALLOC_ATTR(unused), prof_tdata, prof_tdata_t *) prof_tdata_t *prof_tdata_get(bool create); bool prof_sample_accum_update(size_t usize, bool commit, prof_tdata_t **tdata_out); -prof_tctx_t *prof_alloc_prep(size_t usize); +prof_tctx_t *prof_alloc_prep(size_t usize, bool update); prof_tctx_t *prof_tctx_get(const void *ptr); void prof_tctx_set(const void *ptr, prof_tctx_t *tctx); void prof_malloc_sample_object(const void *ptr, size_t usize, prof_tctx_t *tctx); void prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx); void prof_realloc(const void *ptr, size_t usize, prof_tctx_t *tctx, - size_t old_usize, prof_tctx_t *old_tctx); + bool updated, size_t old_usize, prof_tctx_t *old_tctx); void prof_free(const void *ptr, size_t usize); #endif @@ -356,7 +363,7 @@ prof_tctx_set(const void *ptr, prof_tctx_t *tctx) } JEMALLOC_INLINE bool -prof_sample_accum_update(size_t usize, bool commit, prof_tdata_t **tdata_out) +prof_sample_accum_update(size_t usize, bool update, prof_tdata_t **tdata_out) { prof_tdata_t *tdata; @@ -373,19 +380,19 @@ prof_sample_accum_update(size_t usize, bool commit, prof_tdata_t **tdata_out) return (true); if (tdata->bytes_until_sample >= usize) { - if (commit) + if (update) tdata->bytes_until_sample -= usize; return (true); } else { /* Compute new sample threshold. */ - if (commit) + if (update) prof_sample_threshold_update(tdata); return (tdata->active == false); } } JEMALLOC_INLINE prof_tctx_t * -prof_alloc_prep(size_t usize) +prof_alloc_prep(size_t usize, bool update) { prof_tctx_t *ret; prof_tdata_t *tdata; @@ -393,7 +400,7 @@ prof_alloc_prep(size_t usize) assert(usize == s2u(usize)); - if (!opt_prof_active || prof_sample_accum_update(usize, false, &tdata)) + if (!opt_prof_active || prof_sample_accum_update(usize, update, &tdata)) ret = (prof_tctx_t *)(uintptr_t)1U; else { bt_init(&bt, tdata->vec); @@ -412,16 +419,6 @@ prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx) assert(ptr != NULL); assert(usize == isalloc(ptr, true)); - if (prof_sample_accum_update(usize, true, NULL)) { - /* - * Don't sample. For malloc()-like allocation, it is always - * possible to tell in advance how large an object's usable size - * will be, so there should never be a difference between the - * usize passed to PROF_ALLOC_PREP() and prof_malloc(). - */ - assert((uintptr_t)tctx == (uintptr_t)1U); - } - if ((uintptr_t)tctx > (uintptr_t)1U) prof_malloc_sample_object(ptr, usize, tctx); else @@ -429,14 +426,14 @@ prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx) } JEMALLOC_INLINE void -prof_realloc(const void *ptr, size_t usize, prof_tctx_t *tctx, size_t old_usize, - prof_tctx_t *old_tctx) +prof_realloc(const void *ptr, size_t usize, prof_tctx_t *tctx, bool updated, + size_t old_usize, prof_tctx_t *old_tctx) { cassert(config_prof); assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U); - if (ptr != NULL) { + if (!updated && ptr != NULL) { assert(usize == isalloc(ptr, true)); if (prof_sample_accum_update(usize, true, NULL)) { /* diff --git a/src/jemalloc.c b/src/jemalloc.c index 3f29a857..1d4d1a8a 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -886,13 +886,15 @@ imalloc_prof(size_t usize) void *p; prof_tctx_t *tctx; - tctx = prof_alloc_prep(usize); + tctx = prof_alloc_prep(usize, true); if ((uintptr_t)tctx != (uintptr_t)1U) p = imalloc_prof_sample(usize, tctx); else p = imalloc(usize); - if (p == NULL) + if (p == NULL) { + prof_alloc_rollback(tctx, true); return (NULL); + } prof_malloc(p, usize, tctx); return (p); @@ -962,16 +964,20 @@ imemalign_prof_sample(size_t alignment, size_t usize, prof_tctx_t *tctx) } JEMALLOC_ALWAYS_INLINE_C void * -imemalign_prof(size_t alignment, size_t usize, prof_tctx_t *tctx) +imemalign_prof(size_t alignment, size_t usize) { void *p; + prof_tctx_t *tctx; + tctx = prof_alloc_prep(usize, true); if ((uintptr_t)tctx != (uintptr_t)1U) p = imemalign_prof_sample(alignment, usize, tctx); else p = ipalloc(usize, alignment, false); - if (p == NULL) + if (p == NULL) { + prof_alloc_rollback(tctx, true); return (NULL); + } prof_malloc(p, usize, tctx); return (p); @@ -1013,12 +1019,9 @@ imemalign(void **memptr, size_t alignment, size_t size, size_t min_alignment) goto label_oom; } - if (config_prof && opt_prof) { - prof_tctx_t *tctx; - - tctx = prof_alloc_prep(usize); - result = imemalign_prof(alignment, usize, tctx); - } else + if (config_prof && opt_prof) + result = imemalign_prof(alignment, usize); + else result = ipalloc(usize, alignment, false); if (result == NULL) goto label_oom; @@ -1087,16 +1090,20 @@ icalloc_prof_sample(size_t usize, prof_tctx_t *tctx) } JEMALLOC_ALWAYS_INLINE_C void * -icalloc_prof(size_t usize, prof_tctx_t *tctx) +icalloc_prof(size_t usize) { void *p; + prof_tctx_t *tctx; + tctx = prof_alloc_prep(usize, true); if ((uintptr_t)tctx != (uintptr_t)1U) p = icalloc_prof_sample(usize, tctx); else p = icalloc(usize); - if (p == NULL) + if (p == NULL) { + prof_alloc_rollback(tctx, true); return (NULL); + } prof_malloc(p, usize, tctx); return (p); @@ -1136,11 +1143,8 @@ je_calloc(size_t num, size_t size) } if (config_prof && opt_prof) { - prof_tctx_t *tctx; - usize = s2u(num_size); - tctx = prof_alloc_prep(usize); - ret = icalloc_prof(usize, tctx); + ret = icalloc_prof(usize); } else { if (config_stats || (config_valgrind && in_valgrind)) usize = s2u(num_size); @@ -1184,19 +1188,20 @@ irealloc_prof_sample(void *oldptr, size_t usize, prof_tctx_t *tctx) } JEMALLOC_ALWAYS_INLINE_C void * -irealloc_prof(void *oldptr, size_t old_usize, size_t usize, prof_tctx_t *tctx) +irealloc_prof(void *oldptr, size_t old_usize, size_t usize) { void *p; - prof_tctx_t *old_tctx; + prof_tctx_t *old_tctx, *tctx; old_tctx = prof_tctx_get(oldptr); + tctx = prof_alloc_prep(usize, true); if ((uintptr_t)tctx != (uintptr_t)1U) p = irealloc_prof_sample(oldptr, usize, tctx); else p = iralloc(oldptr, usize, 0, false); if (p == NULL) return (NULL); - prof_realloc(p, usize, tctx, old_usize, old_tctx); + prof_realloc(p, usize, tctx, true, old_usize, old_tctx); return (p); } @@ -1270,11 +1275,8 @@ je_realloc(void *ptr, size_t size) old_rzsize = config_prof ? p2rz(ptr) : u2rz(old_usize); if (config_prof && opt_prof) { - prof_tctx_t *tctx; - usize = s2u(size); - tctx = prof_alloc_prep(usize); - ret = irealloc_prof(ptr, old_usize, usize, tctx); + ret = irealloc_prof(ptr, old_usize, usize); } else { if (config_stats || (config_valgrind && in_valgrind)) usize = s2u(size); @@ -1477,7 +1479,7 @@ imallocx_prof(size_t size, int flags, size_t *usize) imallocx_flags_decode(size, flags, usize, &alignment, &zero, &try_tcache, &arena); - tctx = prof_alloc_prep(*usize); + tctx = prof_alloc_prep(*usize, true); if ((uintptr_t)tctx == (uintptr_t)1U) { p = imallocx_maybe_flags(size, flags, *usize, alignment, zero, try_tcache, arena); @@ -1486,8 +1488,10 @@ imallocx_prof(size_t size, int flags, size_t *usize) try_tcache, arena); } else p = NULL; - if (p == NULL) + if (p == NULL) { + prof_alloc_rollback(tctx, true); return (NULL); + } prof_malloc(p, *usize, tctx); return (p); @@ -1572,21 +1576,24 @@ irallocx_prof_sample(void *oldptr, size_t size, size_t alignment, size_t usize, JEMALLOC_ALWAYS_INLINE_C void * irallocx_prof(void *oldptr, size_t old_usize, size_t size, size_t alignment, size_t *usize, bool zero, bool try_tcache_alloc, bool try_tcache_dalloc, - arena_t *arena, prof_tctx_t *tctx) + arena_t *arena) { void *p; - prof_tctx_t *old_tctx; + prof_tctx_t *old_tctx, *tctx; old_tctx = prof_tctx_get(oldptr); - if ((uintptr_t)tctx != (uintptr_t)1U) + tctx = prof_alloc_prep(*usize, true); + if ((uintptr_t)tctx != (uintptr_t)1U) { p = irallocx_prof_sample(oldptr, size, alignment, *usize, zero, try_tcache_alloc, try_tcache_dalloc, arena, tctx); - else { + } else { p = iralloct(oldptr, size, alignment, zero, try_tcache_alloc, try_tcache_dalloc, arena); } - if (p == NULL) + if (p == NULL) { + prof_alloc_rollback(tctx, true); return (NULL); + } if (p == oldptr && alignment != 0) { /* @@ -1599,7 +1606,7 @@ irallocx_prof(void *oldptr, size_t old_usize, size_t size, size_t alignment, */ *usize = isalloc(p, config_prof); } - prof_realloc(p, *usize, tctx, old_usize, old_tctx); + prof_realloc(p, *usize, tctx, true, old_usize, old_tctx); return (p); } @@ -1641,13 +1648,10 @@ je_rallocx(void *ptr, size_t size, int flags) old_rzsize = u2rz(old_usize); if (config_prof && opt_prof) { - prof_tctx_t *tctx; - usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment); assert(usize != 0); - tctx = prof_alloc_prep(usize); p = irallocx_prof(ptr, old_usize, size, alignment, &usize, zero, - try_tcache_alloc, try_tcache_dalloc, arena, tctx); + try_tcache_alloc, try_tcache_dalloc, arena); if (p == NULL) goto label_oom; } else { @@ -1720,13 +1724,21 @@ ixallocx_prof_sample(void *ptr, size_t old_usize, size_t size, size_t extra, JEMALLOC_ALWAYS_INLINE_C size_t ixallocx_prof(void *ptr, size_t old_usize, size_t size, size_t extra, - size_t alignment, size_t max_usize, bool zero, arena_t *arena, - prof_tctx_t *tctx) + size_t alignment, bool zero, arena_t *arena) { - size_t usize; - prof_tctx_t *old_tctx; + size_t max_usize, usize; + prof_tctx_t *old_tctx, *tctx; old_tctx = prof_tctx_get(ptr); + /* + * usize isn't knowable before ixalloc() returns when extra is non-zero. + * Therefore, compute its maximum possible value and use that in + * prof_alloc_prep() to decide whether to capture a backtrace. + * prof_realloc() will use the actual usize to decide whether to sample. + */ + max_usize = (alignment == 0) ? s2u(size+extra) : sa2u(size+extra, + alignment); + tctx = prof_alloc_prep(max_usize, false); if ((uintptr_t)tctx != (uintptr_t)1U) { usize = ixallocx_prof_sample(ptr, old_usize, size, extra, alignment, zero, max_usize, arena, tctx); @@ -1734,9 +1746,11 @@ ixallocx_prof(void *ptr, size_t old_usize, size_t size, size_t extra, usize = ixallocx_helper(ptr, old_usize, size, extra, alignment, zero, arena); } - if (usize == old_usize) + if (usize == old_usize) { + prof_alloc_rollback(tctx, false); return (usize); - prof_realloc(ptr, usize, tctx, old_usize, old_tctx); + } + prof_realloc(ptr, usize, tctx, false, old_usize, old_tctx); return (usize); } @@ -1767,19 +1781,8 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) old_rzsize = u2rz(old_usize); if (config_prof && opt_prof) { - prof_tctx_t *tctx; - /* - * usize isn't knowable before ixalloc() returns when extra is - * non-zero. Therefore, compute its maximum possible value and - * use that in prof_alloc_prep() to decide whether to capture a - * backtrace. prof_realloc() will use the actual usize to - * decide whether to sample. - */ - size_t max_usize = (alignment == 0) ? s2u(size+extra) : - sa2u(size+extra, alignment); - tctx = prof_alloc_prep(max_usize); usize = ixallocx_prof(ptr, old_usize, size, extra, alignment, - max_usize, zero, arena, tctx); + zero, arena); } else { usize = ixallocx_helper(ptr, old_usize, size, extra, alignment, zero, arena); diff --git a/src/prof.c b/src/prof.c index 941e53be..9495afc4 100644 --- a/src/prof.c +++ b/src/prof.c @@ -149,6 +149,35 @@ rb_gen(static UNUSED, tdata_tree_, prof_tdata_tree_t, prof_tdata_t, tdata_link, /******************************************************************************/ +void +prof_alloc_rollback(prof_tctx_t *tctx, bool updated) +{ + prof_tdata_t *tdata; + + cassert(config_prof); + + if (updated) { + /* + * Compute a new sample threshold. This isn't very important in + * practice, because this function is rarely executed, so the + * potential for sample bias is minimal except in contrived + * programs. + */ + tdata = prof_tdata_get(true); + if ((uintptr_t)tdata > (uintptr_t)PROF_TDATA_STATE_MAX) + prof_sample_threshold_update(tctx->tdata); + } + + if ((uintptr_t)tctx > (uintptr_t)1U) { + malloc_mutex_lock(tctx->tdata->lock); + tctx->prepared = false; + if (prof_tctx_should_destroy(tctx)) + prof_tctx_destroy(tctx); + else + malloc_mutex_unlock(tctx->tdata->lock); + } +} + void prof_malloc_sample_object(const void *ptr, size_t usize, prof_tctx_t *tctx) { prof_tctx_set(ptr, tctx); @@ -160,6 +189,7 @@ prof_malloc_sample_object(const void *ptr, size_t usize, prof_tctx_t *tctx) { tctx->cnts.accumobjs++; tctx->cnts.accumbytes += usize; } + tctx->prepared = false; malloc_mutex_unlock(tctx->tdata->lock); } @@ -529,6 +559,8 @@ prof_tctx_should_destroy(prof_tctx_t *tctx) return (false); if (tctx->cnts.curobjs != 0) return (false); + if (tctx->prepared) + return (false); return (true); } @@ -659,6 +691,8 @@ prof_lookup(prof_bt_t *bt) malloc_mutex_lock(tdata->lock); not_found = ckh_search(&tdata->bt2tctx, bt, NULL, &ret.v); + if (!not_found) /* Note double negative! */ + ret.p->prepared = true; malloc_mutex_unlock(tdata->lock); if (not_found) { void *btkey; @@ -683,6 +717,7 @@ prof_lookup(prof_bt_t *bt) ret.p->tdata = tdata; memset(&ret.p->cnts, 0, sizeof(prof_cnt_t)); ret.p->gctx = gctx; + ret.p->prepared = true; ret.p->state = prof_tctx_state_nominal; malloc_mutex_lock(tdata->lock); error = ckh_insert(&tdata->bt2tctx, btkey, ret.v); From 61beeb9f69f2f1fd5669b2411245cc7197b5d66a Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 10 Sep 2014 08:49:29 -0700 Subject: [PATCH 0519/3378] Add sdallocx() to list of functions to prune in pprof. --- bin/pprof | 1 + 1 file changed, 1 insertion(+) diff --git a/bin/pprof b/bin/pprof index 87313f43..5a4c6cd7 100755 --- a/bin/pprof +++ b/bin/pprof @@ -2840,6 +2840,7 @@ sub RemoveUninterestingFrames { 'rallocx', # jemalloc 'xallocx', # jemalloc 'dallocx', # jemalloc + 'sdallocx', # jemalloc 'tc_calloc', 'tc_cfree', 'tc_malloc', From 6b5609d23bf49423fdc6506281e0deac7c3a524e Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Mon, 8 Sep 2014 22:18:49 -0400 Subject: [PATCH 0520/3378] add likely / unlikely macros --- include/jemalloc/internal/util.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h index d2b7a967..82a453d4 100644 --- a/include/jemalloc/internal/util.h +++ b/include/jemalloc/internal/util.h @@ -27,6 +27,14 @@ # define JEMALLOC_CC_SILENCE_INIT(v) #endif +#ifdef __GNUC__ +#define likely(x) __builtin_expect(!!(x), 1) +#define unlikely(x) __builtin_expect(!!(x), 0) +#else +#define likely(x) !!(x) +#define unlikely(x) !!(x) +#endif + /* * Define a custom assert() in order to reduce the chances of deadlock during * assertion failure. From 23fdf8b359a690f457c5300338f4994d06402b95 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Tue, 9 Sep 2014 15:26:05 -0400 Subject: [PATCH 0521/3378] mark some conditions as unlikely * assertion failure * malloc_init failure * malloc not already initialized (in malloc_init) * running in valgrind * thread cache disabled at runtime Clang and GCC already consider a comparison with NULL or -1 to be cold, so many branches (out-of-memory) are already correctly considered as cold and marking them is not important. --- include/jemalloc/internal/tcache.h | 2 +- include/jemalloc/internal/util.h | 6 ++-- include/jemalloc/internal/valgrind.h | 12 ++++---- src/jemalloc.c | 42 ++++++++++++++-------------- 4 files changed, 31 insertions(+), 31 deletions(-) diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index c0d48b93..292ce461 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -218,7 +218,7 @@ tcache_get(bool create) return (NULL); tcache = *tcache_tsd_get(); - if ((uintptr_t)tcache <= (uintptr_t)TCACHE_STATE_MAX) { + if (unlikely((uintptr_t)tcache <= (uintptr_t)TCACHE_STATE_MAX)) { if (tcache == TCACHE_STATE_DISABLED) return (NULL); tcache = tcache_get_hard(tcache, create); diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h index 82a453d4..cc7806d0 100644 --- a/include/jemalloc/internal/util.h +++ b/include/jemalloc/internal/util.h @@ -41,7 +41,7 @@ */ #ifndef assert #define assert(e) do { \ - if (config_debug && !(e)) { \ + if (unlikely(config_debug && !(e))) { \ malloc_printf( \ ": %s:%d: Failed assertion: \"%s\"\n", \ __FILE__, __LINE__, #e); \ @@ -73,14 +73,14 @@ #ifndef assert_not_implemented #define assert_not_implemented(e) do { \ - if (config_debug && !(e)) \ + if (unlikely(config_debug && !(e))) \ not_implemented(); \ } while (0) #endif /* Use to assert a particular configuration, e.g., cassert(config_debug). */ #define cassert(c) do { \ - if ((c) == false) \ + if (unlikely(!(c))) \ not_reached(); \ } while (0) diff --git a/include/jemalloc/internal/valgrind.h b/include/jemalloc/internal/valgrind.h index 52c93f29..a3380df9 100644 --- a/include/jemalloc/internal/valgrind.h +++ b/include/jemalloc/internal/valgrind.h @@ -14,15 +14,15 @@ * usable space. */ #define JEMALLOC_VALGRIND_MAKE_MEM_NOACCESS(ptr, usize) do { \ - if (in_valgrind) \ + if (unlikely(in_valgrind)) \ valgrind_make_mem_noaccess(ptr, usize); \ } while (0) #define JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ptr, usize) do { \ - if (in_valgrind) \ + if (unlikely(in_valgrind)) \ valgrind_make_mem_undefined(ptr, usize); \ } while (0) #define JEMALLOC_VALGRIND_MAKE_MEM_DEFINED(ptr, usize) do { \ - if (in_valgrind) \ + if (unlikely(in_valgrind)) \ valgrind_make_mem_defined(ptr, usize); \ } while (0) /* @@ -31,13 +31,13 @@ * Valgrind reports errors, there are no extra stack frames in the backtraces. */ #define JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) do { \ - if (in_valgrind && cond) \ + if (unlikely(in_valgrind && cond)) \ VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, p2rz(ptr), zero); \ } while (0) #define JEMALLOC_VALGRIND_REALLOC(maybe_moved, ptr, usize, \ ptr_maybe_null, old_ptr, old_usize, old_rzsize, old_ptr_maybe_null, \ zero) do { \ - if (in_valgrind) { \ + if (unlikely(in_valgrind)) { \ size_t rzsize = p2rz(ptr); \ \ if (!maybe_moved || ptr == old_ptr) { \ @@ -73,7 +73,7 @@ } \ } while (0) #define JEMALLOC_VALGRIND_FREE(ptr, rzsize) do { \ - if (in_valgrind) \ + if (unlikely(in_valgrind)) \ valgrind_freelike_block(ptr, rzsize); \ } while (0) #else diff --git a/src/jemalloc.c b/src/jemalloc.c index 1d4d1a8a..9874361e 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -291,7 +291,7 @@ JEMALLOC_ALWAYS_INLINE_C bool malloc_init(void) { - if (malloc_initialized == false && malloc_init_hard()) + if (unlikely(!malloc_initialized) && malloc_init_hard()) return (true); malloc_thread_init(); @@ -904,7 +904,7 @@ JEMALLOC_ALWAYS_INLINE_C void * imalloc_body(size_t size, size_t *usize) { - if (malloc_init()) + if (unlikely(malloc_init())) return (NULL); if (config_prof && opt_prof) { @@ -912,7 +912,7 @@ imalloc_body(size_t size, size_t *usize) return (imalloc_prof(*usize)); } - if (config_stats || (config_valgrind && in_valgrind)) + if (config_stats || (unlikely(config_valgrind && in_valgrind))) *usize = s2u(size); return (imalloc(size)); } @@ -993,7 +993,7 @@ imemalign(void **memptr, size_t alignment, size_t size, size_t min_alignment) assert(min_alignment != 0); - if (malloc_init()) { + if (unlikely(malloc_init())) { result = NULL; goto label_oom; } else { @@ -1116,7 +1116,7 @@ je_calloc(size_t num, size_t size) size_t num_size; size_t usize JEMALLOC_CC_SILENCE_INIT(0); - if (malloc_init()) { + if (unlikely(malloc_init())) { num_size = 0; ret = NULL; goto label_return; @@ -1146,7 +1146,7 @@ je_calloc(size_t num, size_t size) usize = s2u(num_size); ret = icalloc_prof(usize); } else { - if (config_stats || (config_valgrind && in_valgrind)) + if (config_stats || unlikely(config_valgrind && in_valgrind)) usize = s2u(num_size); ret = icalloc(num_size); } @@ -1222,7 +1222,7 @@ ifree(void *ptr, bool try_tcache) usize = isalloc(ptr, config_prof); if (config_stats) thread_allocated_tsd_get()->deallocated += usize; - if (config_valgrind && in_valgrind) + if (unlikely(config_valgrind && in_valgrind)) rzsize = p2rz(ptr); iqalloc(ptr, try_tcache); JEMALLOC_VALGRIND_FREE(ptr, rzsize); @@ -1240,7 +1240,7 @@ isfree(void *ptr, size_t usize, bool try_tcache) prof_free(ptr, usize); if (config_stats) thread_allocated_tsd_get()->deallocated += usize; - if (config_valgrind && in_valgrind) + if (unlikely(config_valgrind && in_valgrind)) rzsize = p2rz(ptr); isqalloc(ptr, usize, try_tcache); JEMALLOC_VALGRIND_FREE(ptr, rzsize); @@ -1269,16 +1269,16 @@ je_realloc(void *ptr, size_t size) malloc_thread_init(); if ((config_prof && opt_prof) || config_stats || - (config_valgrind && in_valgrind)) + unlikely(config_valgrind && in_valgrind)) old_usize = isalloc(ptr, config_prof); - if (config_valgrind && in_valgrind) + if (unlikely(config_valgrind && in_valgrind)) old_rzsize = config_prof ? p2rz(ptr) : u2rz(old_usize); if (config_prof && opt_prof) { usize = s2u(size); ret = irealloc_prof(ptr, old_usize, usize); } else { - if (config_stats || (config_valgrind && in_valgrind)) + if (config_stats || unlikely(config_valgrind && in_valgrind)) usize = s2u(size); ret = iralloc(ptr, size, 0, false); } @@ -1506,7 +1506,7 @@ imallocx_no_prof(size_t size, int flags, size_t *usize) arena_t *arena; if (flags == 0) { - if (config_stats || (config_valgrind && in_valgrind)) + if (config_stats || unlikely(config_valgrind && in_valgrind)) *usize = s2u(size); return (imalloc(size)); } @@ -1524,7 +1524,7 @@ je_mallocx(size_t size, int flags) assert(size != 0); - if (malloc_init()) + if (unlikely(malloc_init())) goto label_oom; if (config_prof && opt_prof) @@ -1642,9 +1642,9 @@ je_rallocx(void *ptr, size_t size, int flags) } if ((config_prof && opt_prof) || config_stats || - (config_valgrind && in_valgrind)) + (unlikely(config_valgrind && in_valgrind))) old_usize = isalloc(ptr, config_prof); - if (config_valgrind && in_valgrind) + if (unlikely(config_valgrind && in_valgrind)) old_rzsize = u2rz(old_usize); if (config_prof && opt_prof) { @@ -1777,7 +1777,7 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) arena = NULL; old_usize = isalloc(ptr, config_prof); - if (config_valgrind && in_valgrind) + if (unlikely(config_valgrind && in_valgrind)) old_rzsize = u2rz(old_usize); if (config_prof && opt_prof) { @@ -1883,7 +1883,7 @@ je_nallocx(size_t size, int flags) assert(size != 0); - if (malloc_init()) + if (unlikely(malloc_init())) return (0); return (inallocx(size, flags)); @@ -1894,7 +1894,7 @@ je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { - if (malloc_init()) + if (unlikely(malloc_init())) return (EAGAIN); return (ctl_byname(name, oldp, oldlenp, newp, newlen)); @@ -1904,7 +1904,7 @@ int je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp) { - if (malloc_init()) + if (unlikely(malloc_init())) return (EAGAIN); return (ctl_nametomib(name, mibp, miblenp)); @@ -1915,7 +1915,7 @@ je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { - if (malloc_init()) + if (unlikely(malloc_init())) return (EAGAIN); return (ctl_bymib(mib, miblen, oldp, oldlenp, newp, newlen)); @@ -2064,7 +2064,7 @@ static void * a0alloc(size_t size, bool zero) { - if (malloc_init()) + if (unlikely(malloc_init())) return (NULL); if (size == 0) From 91566fc079cfaeaf2b424b7f40d6b9d8669d0470 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 11 Sep 2014 13:15:33 -0700 Subject: [PATCH 0522/3378] Fix mallocx() to always honor MALLOCX_ARENA() when profiling. --- src/jemalloc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/jemalloc.c b/src/jemalloc.c index 9874361e..f6be7514 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1435,7 +1435,6 @@ imallocx_flags(size_t usize, size_t alignment, bool zero, bool try_tcache, return (imalloct(usize, try_tcache, arena)); } - JEMALLOC_ALWAYS_INLINE_C void * imallocx_maybe_flags(size_t size, int flags, size_t usize, size_t alignment, bool zero, bool try_tcache, arena_t *arena) @@ -1455,7 +1454,7 @@ imallocx_prof_sample(size_t size, int flags, size_t usize, size_t alignment, if (usize <= SMALL_MAXCLASS) { assert(((alignment == 0) ? s2u(LARGE_MINCLASS) : sa2u(LARGE_MINCLASS, alignment)) == LARGE_MINCLASS); - p = imalloc(LARGE_MINCLASS); + p = imalloct(LARGE_MINCLASS, try_tcache, arena); if (p == NULL) return (NULL); arena_prof_promoted(p, usize); From 9c640bfdd4e2f25180a32ed3704ce8e4c4cc21f1 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 11 Sep 2014 16:20:44 -0700 Subject: [PATCH 0523/3378] Apply likely()/unlikely() to allocation/deallocation fast paths. --- include/jemalloc/internal/arena.h | 52 +++---- .../jemalloc/internal/jemalloc_internal.h.in | 4 +- include/jemalloc/internal/prof.h | 11 +- include/jemalloc/internal/tcache.h | 32 ++--- src/arena.c | 28 ++-- src/huge.c | 6 +- src/jemalloc.c | 130 +++++++++--------- src/quarantine.c | 4 +- 8 files changed, 138 insertions(+), 129 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 6ab0ae71..bfb0b3cf 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -488,7 +488,8 @@ void arena_prof_tctx_set(const void *ptr, prof_tctx_t *tctx); void *arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache); size_t arena_salloc(const void *ptr, bool demote); void arena_dalloc(arena_chunk_t *chunk, void *ptr, bool try_tcache); -void arena_sdalloc(arena_chunk_t *chunk, void *ptr, size_t size, bool try_tcache); +void arena_sdalloc(arena_chunk_t *chunk, void *ptr, size_t size, + bool try_tcache); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) @@ -539,7 +540,7 @@ small_size2bin(size_t size) { assert(size > 0); - if (size <= LOOKUP_MAXCLASS) + if (likely(size <= LOOKUP_MAXCLASS)) return (small_size2bin_lookup(size)); else return (small_size2bin_compute(size)); @@ -627,7 +628,7 @@ small_s2u(size_t size) { assert(size > 0); - if (size <= LOOKUP_MAXCLASS) + if (likely(size <= LOOKUP_MAXCLASS)) return (small_s2u_lookup(size)); else return (small_s2u_compute(size)); @@ -864,7 +865,7 @@ arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes) cassert(config_prof); - if (prof_interval == 0) + if (likely(prof_interval == 0)) return (false); return (arena_prof_accum_impl(arena, accumbytes)); } @@ -875,7 +876,7 @@ arena_prof_accum(arena_t *arena, uint64_t accumbytes) cassert(config_prof); - if (prof_interval == 0) + if (likely(prof_interval == 0)) return (false); { @@ -995,8 +996,8 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) SIZE_INV(28), SIZE_INV(29), SIZE_INV(30), SIZE_INV(31) }; - if (interval <= ((sizeof(interval_invs) / sizeof(unsigned)) + - 2)) { + if (likely(interval <= ((sizeof(interval_invs) / + sizeof(unsigned)) + 2))) { regind = (diff * interval_invs[interval - 3]) >> SIZE_INV_SHIFT; } else @@ -1025,7 +1026,7 @@ arena_prof_tctx_get(const void *ptr) pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; mapbits = arena_mapbits_get(chunk, pageind); assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); - if ((mapbits & CHUNK_MAP_LARGE) == 0) + if (likely((mapbits & CHUNK_MAP_LARGE) == 0)) ret = (prof_tctx_t *)(uintptr_t)1U; else ret = arena_miscelm_get(chunk, pageind)->prof_tctx; @@ -1047,7 +1048,7 @@ arena_prof_tctx_set(const void *ptr, prof_tctx_t *tctx) pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; assert(arena_mapbits_allocated_get(chunk, pageind) != 0); - if (arena_mapbits_large_get(chunk, pageind) != 0) + if (unlikely(arena_mapbits_large_get(chunk, pageind) != 0)) arena_miscelm_get(chunk, pageind)->prof_tctx = tctx; } @@ -1059,8 +1060,9 @@ arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache) assert(size != 0); assert(size <= arena_maxclass); - if (size <= SMALL_MAXCLASS) { - if (try_tcache && (tcache = tcache_get(true)) != NULL) + if (likely(size <= SMALL_MAXCLASS)) { + if (likely(try_tcache) && likely((tcache = tcache_get(true)) != + NULL)) return (tcache_alloc_small(tcache, size, zero)); else { return (arena_malloc_small(choose_arena(arena), size, @@ -1071,8 +1073,8 @@ arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache) * Initialize tcache after checking size in order to avoid * infinite recursion during tcache initialization. */ - if (try_tcache && size <= tcache_maxclass && (tcache = - tcache_get(true)) != NULL) + if (try_tcache && size <= tcache_maxclass && likely((tcache = + tcache_get(true)) != NULL)) return (tcache_alloc_large(tcache, size, zero)); else { return (arena_malloc_large(choose_arena(arena), size, @@ -1096,8 +1098,8 @@ arena_salloc(const void *ptr, bool demote) pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; assert(arena_mapbits_allocated_get(chunk, pageind) != 0); binind = arena_mapbits_binind_get(chunk, pageind); - if (binind == BININD_INVALID || (config_prof && demote == false && - arena_mapbits_large_get(chunk, pageind) != 0)) { + if (unlikely(binind == BININD_INVALID || (config_prof && demote == false + && arena_mapbits_large_get(chunk, pageind) != 0))) { /* * Large allocation. In the common case (demote == true), and * as this is an inline function, most callers will only end up @@ -1137,10 +1139,12 @@ arena_dalloc(arena_chunk_t *chunk, void *ptr, bool try_tcache) pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; mapbits = arena_mapbits_get(chunk, pageind); assert(arena_mapbits_allocated_get(chunk, pageind) != 0); - if ((mapbits & CHUNK_MAP_LARGE) == 0) { + if (likely((mapbits & CHUNK_MAP_LARGE) == 0)) { /* Small allocation. */ - if (try_tcache && (tcache = tcache_get(false)) != NULL) { - size_t binind = arena_ptr_small_binind_get(ptr, mapbits); + if (likely(try_tcache) && likely((tcache = tcache_get(false)) != + NULL)) { + size_t binind = arena_ptr_small_binind_get(ptr, + mapbits); tcache_dalloc_small(tcache, ptr, binind); } else arena_dalloc_small(chunk->arena, chunk, ptr, pageind); @@ -1149,8 +1153,8 @@ arena_dalloc(arena_chunk_t *chunk, void *ptr, bool try_tcache) assert(((uintptr_t)ptr & PAGE_MASK) == 0); - if (try_tcache && size <= tcache_maxclass && (tcache = - tcache_get(false)) != NULL) { + if (try_tcache && size <= tcache_maxclass && likely((tcache = + tcache_get(false)) != NULL)) { tcache_dalloc_large(tcache, ptr, size); } else arena_dalloc_large(chunk->arena, chunk, ptr); @@ -1165,13 +1169,15 @@ arena_sdalloc(arena_chunk_t *chunk, void *ptr, size_t size, bool try_tcache) assert(ptr != NULL); assert(CHUNK_ADDR2BASE(ptr) != ptr); - if (size < PAGE) { + if (likely(size <= SMALL_MAXCLASS)) { /* Small allocation. */ - if (try_tcache && (tcache = tcache_get(false)) != NULL) { + if (likely(try_tcache) && likely((tcache = tcache_get(false)) != + NULL)) { size_t binind = small_size2bin(size); tcache_dalloc_small(tcache, ptr, binind); } else { - size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; + size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> + LG_PAGE; arena_dalloc_small(chunk->arena, chunk, ptr, pageind); } } else { diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 81d46fc3..a380a414 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -814,7 +814,7 @@ JEMALLOC_ALWAYS_INLINE void iqalloc(void *ptr, bool try_tcache) { - if (config_fill && opt_quarantine) + if (config_fill && unlikely(opt_quarantine)) quarantine(ptr); else idalloct(ptr, try_tcache); @@ -824,7 +824,7 @@ JEMALLOC_ALWAYS_INLINE void isqalloc(void *ptr, size_t size, bool try_tcache) { - if (config_fill && opt_quarantine) + if (config_fill && unlikely(opt_quarantine)) quarantine(ptr); else isdalloct(ptr, size, try_tcache); diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index 920ec63f..a1e7ac5e 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -400,7 +400,8 @@ prof_alloc_prep(size_t usize, bool update) assert(usize == s2u(usize)); - if (!opt_prof_active || prof_sample_accum_update(usize, update, &tdata)) + if (!opt_prof_active || likely(prof_sample_accum_update(usize, update, + &tdata))) ret = (prof_tctx_t *)(uintptr_t)1U; else { bt_init(&bt, tdata->vec); @@ -419,7 +420,7 @@ prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx) assert(ptr != NULL); assert(usize == isalloc(ptr, true)); - if ((uintptr_t)tctx > (uintptr_t)1U) + if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) prof_malloc_sample_object(ptr, usize, tctx); else prof_tctx_set(ptr, (prof_tctx_t *)(uintptr_t)1U); @@ -447,9 +448,9 @@ prof_realloc(const void *ptr, size_t usize, prof_tctx_t *tctx, bool updated, } } - if ((uintptr_t)old_tctx > (uintptr_t)1U) + if (unlikely((uintptr_t)old_tctx > (uintptr_t)1U)) prof_free_sampled_object(old_usize, old_tctx); - if ((uintptr_t)tctx > (uintptr_t)1U) + if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) prof_malloc_sample_object(ptr, usize, tctx); else prof_tctx_set(ptr, (prof_tctx_t *)(uintptr_t)1U); @@ -463,7 +464,7 @@ prof_free(const void *ptr, size_t usize) cassert(config_prof); assert(usize == isalloc(ptr, true)); - if ((uintptr_t)tctx > (uintptr_t)1U) + if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) prof_free_sampled_object(usize, tctx); } #endif diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index 292ce461..c9d723aa 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -236,7 +236,7 @@ tcache_event(tcache_t *tcache) tcache->ev_cnt++; assert(tcache->ev_cnt <= TCACHE_GC_INCR); - if (tcache->ev_cnt == TCACHE_GC_INCR) + if (unlikely(tcache->ev_cnt == TCACHE_GC_INCR)) tcache_event_hard(tcache); } @@ -245,12 +245,12 @@ tcache_alloc_easy(tcache_bin_t *tbin) { void *ret; - if (tbin->ncached == 0) { + if (unlikely(tbin->ncached == 0)) { tbin->low_water = -1; return (NULL); } tbin->ncached--; - if ((int)tbin->ncached < tbin->low_water) + if (unlikely((int)tbin->ncached < tbin->low_water)) tbin->low_water = tbin->ncached; ret = tbin->avail[tbin->ncached]; return (ret); @@ -268,23 +268,23 @@ tcache_alloc_small(tcache_t *tcache, size_t size, bool zero) tbin = &tcache->tbins[binind]; size = small_bin2size(binind); ret = tcache_alloc_easy(tbin); - if (ret == NULL) { + if (unlikely(ret == NULL)) { ret = tcache_alloc_small_hard(tcache, tbin, binind); if (ret == NULL) return (NULL); } assert(tcache_salloc(ret) == size); - if (zero == false) { + if (likely(zero == false)) { if (config_fill) { - if (opt_junk) { + if (unlikely(opt_junk)) { arena_alloc_junk_small(ret, &arena_bin_info[binind], false); - } else if (opt_zero) + } else if (unlikely(opt_zero)) memset(ret, 0, size); } } else { - if (config_fill && opt_junk) { + if (config_fill && unlikely(opt_junk)) { arena_alloc_junk_small(ret, &arena_bin_info[binind], true); } @@ -312,7 +312,7 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) assert(binind < nhbins); tbin = &tcache->tbins[binind]; ret = tcache_alloc_easy(tbin); - if (ret == NULL) { + if (unlikely(ret == NULL)) { /* * Only allocate one large object at a time, because it's quite * expensive to create one and not use it. @@ -329,11 +329,11 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) arena_mapbits_large_binind_set(chunk, pageind, BININD_INVALID); } - if (zero == false) { + if (likely(zero == false)) { if (config_fill) { - if (opt_junk) + if (unlikely(opt_junk)) memset(ret, 0xa5, size); - else if (opt_zero) + else if (unlikely(opt_zero)) memset(ret, 0, size); } } else @@ -357,12 +357,12 @@ tcache_dalloc_small(tcache_t *tcache, void *ptr, size_t binind) assert(tcache_salloc(ptr) <= SMALL_MAXCLASS); - if (config_fill && opt_junk) + if (config_fill && unlikely(opt_junk)) arena_dalloc_junk_small(ptr, &arena_bin_info[binind]); tbin = &tcache->tbins[binind]; tbin_info = &tcache_bin_info[binind]; - if (tbin->ncached == tbin_info->ncached_max) { + if (unlikely(tbin->ncached == tbin_info->ncached_max)) { tcache_bin_flush_small(tbin, binind, (tbin_info->ncached_max >> 1), tcache); } @@ -386,12 +386,12 @@ tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size) binind = NBINS + (size >> LG_PAGE) - 1; - if (config_fill && opt_junk) + if (config_fill && unlikely(opt_junk)) memset(ptr, 0x5a, size); tbin = &tcache->tbins[binind]; tbin_info = &tcache_bin_info[binind]; - if (tbin->ncached == tbin_info->ncached_max) { + if (unlikely(tbin->ncached == tbin_info->ncached_max)) { tcache_bin_flush_large(tbin, binind, (tbin_info->ncached_max >> 1), tcache); } diff --git a/src/arena.c b/src/arena.c index 8d34cf60..35d792a2 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1365,7 +1365,7 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind, ptr = arena_bin_malloc_hard(arena, bin); if (ptr == NULL) break; - if (config_fill && opt_junk) { + if (config_fill && unlikely(opt_junk)) { arena_alloc_junk_small(ptr, &arena_bin_info[binind], true); } @@ -1519,15 +1519,15 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero) if (zero == false) { if (config_fill) { - if (opt_junk) { + if (unlikely(opt_junk)) { arena_alloc_junk_small(ret, &arena_bin_info[binind], false); - } else if (opt_zero) + } else if (unlikely(opt_zero)) memset(ret, 0, size); } JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, size); } else { - if (config_fill && opt_junk) { + if (config_fill && unlikely(opt_junk)) { arena_alloc_junk_small(ret, &arena_bin_info[binind], true); } @@ -1568,9 +1568,9 @@ arena_malloc_large(arena_t *arena, size_t size, bool zero) if (zero == false) { if (config_fill) { - if (opt_junk) + if (unlikely(opt_junk)) memset(ret, 0xa5, size); - else if (opt_zero) + else if (unlikely(opt_zero)) memset(ret, 0, size); } } @@ -1626,9 +1626,9 @@ arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero) malloc_mutex_unlock(&arena->lock); if (config_fill && zero == false) { - if (opt_junk) + if (unlikely(opt_junk)) memset(ret, 0xa5, size); - else if (opt_zero) + else if (unlikely(opt_zero)) memset(ret, 0, size); } return (ret); @@ -1771,7 +1771,7 @@ arena_dalloc_bin_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr, if (config_fill || config_stats) size = bin_info->reg_size; - if (config_fill && opt_junk) + if (config_fill && unlikely(opt_junk)) arena_dalloc_junk_small(ptr, bin_info); arena_run_reg_dalloc(run, ptr); @@ -1825,7 +1825,7 @@ static void arena_dalloc_junk_large(void *ptr, size_t usize) { - if (config_fill && opt_junk) + if (config_fill && unlikely(opt_junk)) memset(ptr, 0x5a, usize); } #ifdef JEMALLOC_JET @@ -1967,7 +1967,7 @@ static void arena_ralloc_junk_large(void *ptr, size_t old_usize, size_t usize) { - if (config_fill && opt_junk) { + if (config_fill && unlikely(opt_junk)) { memset((void *)((uintptr_t)ptr + usize), 0x5a, old_usize - usize); } @@ -2011,11 +2011,11 @@ arena_ralloc_large(void *ptr, size_t oldsize, size_t size, size_t extra, oldsize, PAGE_CEILING(size), psize - PAGE_CEILING(size), zero); if (config_fill && ret == false && zero == false) { - if (opt_junk) { + if (unlikely(opt_junk)) { memset((void *)((uintptr_t)ptr + oldsize), 0xa5, isalloc(ptr, config_prof) - oldsize); - } else if (opt_zero) { + } else if (unlikely(opt_zero)) { memset((void *)((uintptr_t)ptr + oldsize), 0, isalloc(ptr, config_prof) - oldsize); @@ -2272,7 +2272,7 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) * minimum alignment; without the padding, each redzone would have to * be twice as large in order to maintain alignment. */ - if (config_fill && opt_redzone) { + if (config_fill && unlikely(opt_redzone)) { size_t align_min = ZU(1) << (jemalloc_ffs(bin_info->reg_size) - 1); if (align_min <= REDZONE_MINSIZE) { diff --git a/src/huge.c b/src/huge.c index e7733093..0b7db7fc 100644 --- a/src/huge.c +++ b/src/huge.c @@ -62,9 +62,9 @@ huge_palloc(arena_t *arena, size_t size, size_t alignment, bool zero) malloc_mutex_unlock(&huge_mtx); if (config_fill && zero == false) { - if (opt_junk) + if (unlikely(opt_junk)) memset(ret, 0xa5, csize); - else if (opt_zero && is_zeroed == false) + else if (unlikely(opt_zero) && is_zeroed == false) memset(ret, 0, csize); } @@ -141,7 +141,7 @@ static void huge_dalloc_junk(void *ptr, size_t usize) { - if (config_fill && have_dss && opt_junk) { + if (config_fill && have_dss && unlikely(opt_junk)) { /* * Only bother junk filling if the chunk isn't about to be * unmapped. diff --git a/src/jemalloc.c b/src/jemalloc.c index f6be7514..dfb12666 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -87,7 +87,7 @@ typedef struct { #ifdef JEMALLOC_UTRACE # define UTRACE(a, b, c) do { \ - if (opt_utrace) { \ + if (unlikely(opt_utrace)) { \ int utrace_serrno = errno; \ malloc_utrace_t ut; \ ut.p = (a); \ @@ -283,7 +283,7 @@ malloc_thread_init(void) * a best effort attempt at initializing its TSD by hooking all * allocation events. */ - if (config_fill && opt_quarantine) + if (config_fill && unlikely(opt_quarantine)) quarantine_alloc_hook(); } @@ -397,13 +397,13 @@ malloc_conf_init(void) */ if (config_valgrind) { in_valgrind = (RUNNING_ON_VALGRIND != 0) ? true : false; - if (config_fill && in_valgrind) { + if (config_fill && unlikely(in_valgrind)) { opt_junk = false; assert(opt_zero == false); opt_quarantine = JEMALLOC_VALGRIND_QUARANTINE_DEFAULT; opt_redzone = true; } - if (config_tcache && in_valgrind) + if (config_tcache && unlikely(in_valgrind)) opt_tcache = false; } @@ -887,7 +887,7 @@ imalloc_prof(size_t usize) prof_tctx_t *tctx; tctx = prof_alloc_prep(usize, true); - if ((uintptr_t)tctx != (uintptr_t)1U) + if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) p = imalloc_prof_sample(usize, tctx); else p = imalloc(usize); @@ -912,7 +912,7 @@ imalloc_body(size_t size, size_t *usize) return (imalloc_prof(*usize)); } - if (config_stats || (unlikely(config_valgrind && in_valgrind))) + if (config_stats || (config_valgrind && unlikely(in_valgrind))) *usize = s2u(size); return (imalloc(size)); } @@ -927,15 +927,15 @@ je_malloc(size_t size) size = 1; ret = imalloc_body(size, &usize); - if (ret == NULL) { - if (config_xmalloc && opt_xmalloc) { + if (unlikely(ret == NULL)) { + if (config_xmalloc && unlikely(opt_xmalloc)) { malloc_write(": Error in malloc(): " "out of memory\n"); abort(); } set_errno(ENOMEM); } - if (config_stats && ret != NULL) { + if (config_stats && likely(ret != NULL)) { assert(usize == isalloc(ret, config_prof)); thread_allocated_tsd_get()->allocated += usize; } @@ -970,7 +970,7 @@ imemalign_prof(size_t alignment, size_t usize) prof_tctx_t *tctx; tctx = prof_alloc_prep(usize, true); - if ((uintptr_t)tctx != (uintptr_t)1U) + if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) p = imemalign_prof_sample(alignment, usize, tctx); else p = ipalloc(usize, alignment, false); @@ -1001,9 +1001,9 @@ imemalign(void **memptr, size_t alignment, size_t size, size_t min_alignment) size = 1; /* Make sure that alignment is a large enough power of 2. */ - if (((alignment - 1) & alignment) != 0 - || (alignment < min_alignment)) { - if (config_xmalloc && opt_xmalloc) { + if (unlikely(((alignment - 1) & alignment) != 0 + || (alignment < min_alignment))) { + if (config_xmalloc && unlikely(opt_xmalloc)) { malloc_write(": Error allocating " "aligned memory: invalid alignment\n"); abort(); @@ -1014,7 +1014,7 @@ imemalign(void **memptr, size_t alignment, size_t size, size_t min_alignment) } usize = sa2u(size, alignment); - if (usize == 0) { + if (unlikely(usize == 0)) { result = NULL; goto label_oom; } @@ -1023,14 +1023,14 @@ imemalign(void **memptr, size_t alignment, size_t size, size_t min_alignment) result = imemalign_prof(alignment, usize); else result = ipalloc(usize, alignment, false); - if (result == NULL) + if (unlikely(result == NULL)) goto label_oom; } *memptr = result; ret = 0; label_return: - if (config_stats && result != NULL) { + if (config_stats && likely(result != NULL)) { assert(usize == isalloc(result, config_prof)); thread_allocated_tsd_get()->allocated += usize; } @@ -1038,7 +1038,7 @@ label_return: return (ret); label_oom: assert(result == NULL); - if (config_xmalloc && opt_xmalloc) { + if (config_xmalloc && unlikely(opt_xmalloc)) { malloc_write(": Error allocating aligned memory: " "out of memory\n"); abort(); @@ -1062,7 +1062,7 @@ je_aligned_alloc(size_t alignment, size_t size) void *ret; int err; - if ((err = imemalign(&ret, alignment, size, 1)) != 0) { + if (unlikely((err = imemalign(&ret, alignment, size, 1)) != 0)) { ret = NULL; set_errno(err); } @@ -1096,7 +1096,7 @@ icalloc_prof(size_t usize) prof_tctx_t *tctx; tctx = prof_alloc_prep(usize, true); - if ((uintptr_t)tctx != (uintptr_t)1U) + if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) p = icalloc_prof_sample(usize, tctx); else p = icalloc(usize); @@ -1123,7 +1123,7 @@ je_calloc(size_t num, size_t size) } num_size = num * size; - if (num_size == 0) { + if (unlikely(num_size == 0)) { if (num == 0 || size == 0) num_size = 1; else { @@ -1135,8 +1135,8 @@ je_calloc(size_t num, size_t size) * overflow during multiplication if neither operand uses any of the * most significant half of the bits in a size_t. */ - } else if (((num | size) & (SIZE_T_MAX << (sizeof(size_t) << 2))) - && (num_size / size != num)) { + } else if (unlikely(((num | size) & (SIZE_T_MAX << (sizeof(size_t) << + 2))) && (num_size / size != num))) { /* size_t overflow. */ ret = NULL; goto label_return; @@ -1146,21 +1146,21 @@ je_calloc(size_t num, size_t size) usize = s2u(num_size); ret = icalloc_prof(usize); } else { - if (config_stats || unlikely(config_valgrind && in_valgrind)) + if (config_stats || (config_valgrind && unlikely(in_valgrind))) usize = s2u(num_size); ret = icalloc(num_size); } label_return: - if (ret == NULL) { - if (config_xmalloc && opt_xmalloc) { + if (unlikely(ret == NULL)) { + if (config_xmalloc && unlikely(opt_xmalloc)) { malloc_write(": Error in calloc(): out of " "memory\n"); abort(); } set_errno(ENOMEM); } - if (config_stats && ret != NULL) { + if (config_stats && likely(ret != NULL)) { assert(usize == isalloc(ret, config_prof)); thread_allocated_tsd_get()->allocated += usize; } @@ -1195,7 +1195,7 @@ irealloc_prof(void *oldptr, size_t old_usize, size_t usize) old_tctx = prof_tctx_get(oldptr); tctx = prof_alloc_prep(usize, true); - if ((uintptr_t)tctx != (uintptr_t)1U) + if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) p = irealloc_prof_sample(oldptr, usize, tctx); else p = iralloc(oldptr, usize, 0, false); @@ -1222,7 +1222,7 @@ ifree(void *ptr, bool try_tcache) usize = isalloc(ptr, config_prof); if (config_stats) thread_allocated_tsd_get()->deallocated += usize; - if (unlikely(config_valgrind && in_valgrind)) + if (config_valgrind && unlikely(in_valgrind)) rzsize = p2rz(ptr); iqalloc(ptr, try_tcache); JEMALLOC_VALGRIND_FREE(ptr, rzsize); @@ -1240,7 +1240,7 @@ isfree(void *ptr, size_t usize, bool try_tcache) prof_free(ptr, usize); if (config_stats) thread_allocated_tsd_get()->deallocated += usize; - if (unlikely(config_valgrind && in_valgrind)) + if (config_valgrind && unlikely(in_valgrind)) rzsize = p2rz(ptr); isqalloc(ptr, usize, try_tcache); JEMALLOC_VALGRIND_FREE(ptr, rzsize); @@ -1254,7 +1254,7 @@ je_realloc(void *ptr, size_t size) size_t old_usize = 0; UNUSED size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0); - if (size == 0) { + if (unlikely(size == 0)) { if (ptr != NULL) { /* realloc(ptr, 0) is equivalent to free(ptr). */ UTRACE(ptr, 0, 0); @@ -1264,21 +1264,22 @@ je_realloc(void *ptr, size_t size) size = 1; } - if (ptr != NULL) { + if (likely(ptr != NULL)) { assert(malloc_initialized || IS_INITIALIZER); malloc_thread_init(); if ((config_prof && opt_prof) || config_stats || - unlikely(config_valgrind && in_valgrind)) + (config_valgrind && unlikely(in_valgrind))) old_usize = isalloc(ptr, config_prof); - if (unlikely(config_valgrind && in_valgrind)) + if (config_valgrind && unlikely(in_valgrind)) old_rzsize = config_prof ? p2rz(ptr) : u2rz(old_usize); if (config_prof && opt_prof) { usize = s2u(size); ret = irealloc_prof(ptr, old_usize, usize); } else { - if (config_stats || unlikely(config_valgrind && in_valgrind)) + if (config_stats || (config_valgrind && + unlikely(in_valgrind))) usize = s2u(size); ret = iralloc(ptr, size, 0, false); } @@ -1287,15 +1288,15 @@ je_realloc(void *ptr, size_t size) ret = imalloc_body(size, &usize); } - if (ret == NULL) { - if (config_xmalloc && opt_xmalloc) { + if (unlikely(ret == NULL)) { + if (config_xmalloc && unlikely(opt_xmalloc)) { malloc_write(": Error in realloc(): " "out of memory\n"); abort(); } set_errno(ENOMEM); } - if (config_stats && ret != NULL) { + if (config_stats && likely(ret != NULL)) { thread_allocated_t *ta; assert(usize == isalloc(ret, config_prof)); ta = thread_allocated_tsd_get(); @@ -1313,7 +1314,7 @@ je_free(void *ptr) { UTRACE(ptr, 0, 0); - if (ptr != NULL) + if (likely(ptr != NULL)) ifree(ptr, true); } @@ -1410,7 +1411,7 @@ imallocx_flags_decode(size_t size, int flags, size_t *usize, size_t *alignment, bool *zero, bool *try_tcache, arena_t **arena) { - if (flags == 0) { + if (likely(flags == 0)) { *usize = s2u(size); assert(usize != 0); *alignment = 0; @@ -1440,7 +1441,7 @@ imallocx_maybe_flags(size_t size, int flags, size_t usize, size_t alignment, bool zero, bool try_tcache, arena_t *arena) { - if (flags == 0) + if (likely(flags == 0)) return (imalloc(size)); return (imallocx_flags(usize, alignment, zero, try_tcache, arena)); } @@ -1479,7 +1480,7 @@ imallocx_prof(size_t size, int flags, size_t *usize) imallocx_flags_decode(size, flags, usize, &alignment, &zero, &try_tcache, &arena); tctx = prof_alloc_prep(*usize, true); - if ((uintptr_t)tctx == (uintptr_t)1U) { + if (likely((uintptr_t)tctx == (uintptr_t)1U)) { p = imallocx_maybe_flags(size, flags, *usize, alignment, zero, try_tcache, arena); } else if ((uintptr_t)tctx > (uintptr_t)1U) { @@ -1487,7 +1488,7 @@ imallocx_prof(size_t size, int flags, size_t *usize) try_tcache, arena); } else p = NULL; - if (p == NULL) { + if (unlikely(p == NULL)) { prof_alloc_rollback(tctx, true); return (NULL); } @@ -1504,8 +1505,8 @@ imallocx_no_prof(size_t size, int flags, size_t *usize) bool try_tcache; arena_t *arena; - if (flags == 0) { - if (config_stats || unlikely(config_valgrind && in_valgrind)) + if (likely(flags == 0)) { + if (config_stats || (config_valgrind && unlikely(in_valgrind))) *usize = s2u(size); return (imalloc(size)); } @@ -1530,7 +1531,7 @@ je_mallocx(size_t size, int flags) p = imallocx_prof(size, flags, &usize); else p = imallocx_no_prof(size, flags, &usize); - if (p == NULL) + if (unlikely(p == NULL)) goto label_oom; if (config_stats) { @@ -1541,7 +1542,7 @@ je_mallocx(size_t size, int flags) JEMALLOC_VALGRIND_MALLOC(true, p, usize, MALLOCX_ZERO_GET(flags)); return (p); label_oom: - if (config_xmalloc && opt_xmalloc) { + if (config_xmalloc && unlikely(opt_xmalloc)) { malloc_write(": Error in mallocx(): out of memory\n"); abort(); } @@ -1582,14 +1583,14 @@ irallocx_prof(void *oldptr, size_t old_usize, size_t size, size_t alignment, old_tctx = prof_tctx_get(oldptr); tctx = prof_alloc_prep(*usize, true); - if ((uintptr_t)tctx != (uintptr_t)1U) { + if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) { p = irallocx_prof_sample(oldptr, size, alignment, *usize, zero, try_tcache_alloc, try_tcache_dalloc, arena, tctx); } else { p = iralloct(oldptr, size, alignment, zero, try_tcache_alloc, try_tcache_dalloc, arena); } - if (p == NULL) { + if (unlikely(p == NULL)) { prof_alloc_rollback(tctx, true); return (NULL); } @@ -1614,7 +1615,8 @@ void * je_rallocx(void *ptr, size_t size, int flags) { void *p; - size_t usize, old_usize; + size_t usize; + UNUSED size_t old_usize JEMALLOC_CC_SILENCE_INIT(0); UNUSED size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0); size_t alignment = MALLOCX_ALIGN_GET(flags); bool zero = flags & MALLOCX_ZERO; @@ -1626,7 +1628,7 @@ je_rallocx(void *ptr, size_t size, int flags) assert(malloc_initialized || IS_INITIALIZER); malloc_thread_init(); - if ((flags & MALLOCX_ARENA_MASK) != 0) { + if (unlikely((flags & MALLOCX_ARENA_MASK) != 0)) { unsigned arena_ind = MALLOCX_ARENA_GET(flags); arena_chunk_t *chunk; try_tcache_alloc = false; @@ -1641,9 +1643,9 @@ je_rallocx(void *ptr, size_t size, int flags) } if ((config_prof && opt_prof) || config_stats || - (unlikely(config_valgrind && in_valgrind))) + ((config_valgrind && unlikely(in_valgrind)))) old_usize = isalloc(ptr, config_prof); - if (unlikely(config_valgrind && in_valgrind)) + if (config_valgrind && unlikely(in_valgrind)) old_rzsize = u2rz(old_usize); if (config_prof && opt_prof) { @@ -1651,14 +1653,14 @@ je_rallocx(void *ptr, size_t size, int flags) assert(usize != 0); p = irallocx_prof(ptr, old_usize, size, alignment, &usize, zero, try_tcache_alloc, try_tcache_dalloc, arena); - if (p == NULL) + if (unlikely(p == NULL)) goto label_oom; } else { p = iralloct(ptr, size, alignment, zero, try_tcache_alloc, try_tcache_dalloc, arena); - if (p == NULL) + if (unlikely(p == NULL)) goto label_oom; - if (config_stats || (config_valgrind && in_valgrind)) + if (config_stats || (config_valgrind && unlikely(in_valgrind))) usize = isalloc(p, config_prof); } @@ -1673,7 +1675,7 @@ je_rallocx(void *ptr, size_t size, int flags) old_rzsize, false, zero); return (p); label_oom: - if (config_xmalloc && opt_xmalloc) { + if (config_xmalloc && unlikely(opt_xmalloc)) { malloc_write(": Error in rallocx(): out of memory\n"); abort(); } @@ -1738,14 +1740,14 @@ ixallocx_prof(void *ptr, size_t old_usize, size_t size, size_t extra, max_usize = (alignment == 0) ? s2u(size+extra) : sa2u(size+extra, alignment); tctx = prof_alloc_prep(max_usize, false); - if ((uintptr_t)tctx != (uintptr_t)1U) { + if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) { usize = ixallocx_prof_sample(ptr, old_usize, size, extra, alignment, zero, max_usize, arena, tctx); } else { usize = ixallocx_helper(ptr, old_usize, size, extra, alignment, zero, arena); } - if (usize == old_usize) { + if (unlikely(usize == old_usize)) { prof_alloc_rollback(tctx, false); return (usize); } @@ -1769,14 +1771,14 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) assert(malloc_initialized || IS_INITIALIZER); malloc_thread_init(); - if ((flags & MALLOCX_ARENA_MASK) != 0) { + if (unlikely((flags & MALLOCX_ARENA_MASK) != 0)) { unsigned arena_ind = MALLOCX_ARENA_GET(flags); arena = arenas[arena_ind]; } else arena = NULL; old_usize = isalloc(ptr, config_prof); - if (unlikely(config_valgrind && in_valgrind)) + if (config_valgrind && unlikely(in_valgrind)) old_rzsize = u2rz(old_usize); if (config_prof && opt_prof) { @@ -1786,7 +1788,7 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) usize = ixallocx_helper(ptr, old_usize, size, extra, alignment, zero, arena); } - if (usize == old_usize) + if (unlikely(usize == old_usize)) goto label_not_resized; if (config_stats) { @@ -1828,7 +1830,7 @@ je_dallocx(void *ptr, int flags) assert(ptr != NULL); assert(malloc_initialized || IS_INITIALIZER); - if ((flags & MALLOCX_ARENA_MASK) != 0) { + if (unlikely((flags & MALLOCX_ARENA_MASK) != 0)) { unsigned arena_ind = MALLOCX_ARENA_GET(flags); arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); try_tcache = (chunk == ptr || chunk->arena != @@ -1845,7 +1847,7 @@ inallocx(size_t size, int flags) { size_t usize; - if ((flags & MALLOCX_LG_ALIGN_MASK) == 0) + if (likely((flags & MALLOCX_LG_ALIGN_MASK) == 0)) usize = s2u(size); else usize = sa2u(size, MALLOCX_ALIGN_GET_SPECIFIED(flags)); @@ -1864,7 +1866,7 @@ je_sdallocx(void *ptr, size_t size, int flags) usize = inallocx(size, flags); assert(usize == isalloc(ptr, config_prof)); - if ((flags & MALLOCX_ARENA_MASK) != 0) { + if (unlikely((flags & MALLOCX_ARENA_MASK) != 0)) { unsigned arena_ind = MALLOCX_ARENA_GET(flags); arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); try_tcache = (chunk == ptr || chunk->arena != diff --git a/src/quarantine.c b/src/quarantine.c index 3b874422..efddeae7 100644 --- a/src/quarantine.c +++ b/src/quarantine.c @@ -141,12 +141,12 @@ quarantine(void *ptr) obj->usize = usize; quarantine->curbytes += usize; quarantine->curobjs++; - if (config_fill && opt_junk) { + if (config_fill && unlikely(opt_junk)) { /* * Only do redzone validation if Valgrind isn't in * operation. */ - if ((config_valgrind == false || in_valgrind == false) + if ((!config_valgrind || likely(!in_valgrind)) && usize <= SMALL_MAXCLASS) arena_quarantine_junk_small(ptr, usize); else From c3e9e7b0412e97e4976507f914fd39901b023537 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 11 Sep 2014 17:04:03 -0700 Subject: [PATCH 0524/3378] Fix irallocx_prof() sample logic. Fix irallocx_prof() sample logic to only update the threshold counter after it knows what size the allocation ended up being. This regression was caused by 6e73dc194ee9682d3eacaf725a989f04629718f7 (Fix a profile sampling race.), which did not make it into any releases prior to this fix. --- src/jemalloc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/jemalloc.c b/src/jemalloc.c index dfb12666..c5b8f520 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1582,7 +1582,7 @@ irallocx_prof(void *oldptr, size_t old_usize, size_t size, size_t alignment, prof_tctx_t *old_tctx, *tctx; old_tctx = prof_tctx_get(oldptr); - tctx = prof_alloc_prep(*usize, true); + tctx = prof_alloc_prep(*usize, false); if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) { p = irallocx_prof_sample(oldptr, size, alignment, *usize, zero, try_tcache_alloc, try_tcache_dalloc, arena, tctx); @@ -1591,7 +1591,7 @@ irallocx_prof(void *oldptr, size_t old_usize, size_t size, size_t alignment, try_tcache_dalloc, arena); } if (unlikely(p == NULL)) { - prof_alloc_rollback(tctx, true); + prof_alloc_rollback(tctx, false); return (NULL); } @@ -1606,7 +1606,7 @@ irallocx_prof(void *oldptr, size_t old_usize, size_t size, size_t alignment, */ *usize = isalloc(p, config_prof); } - prof_realloc(p, *usize, tctx, true, old_usize, old_tctx); + prof_realloc(p, *usize, tctx, false, old_usize, old_tctx); return (p); } From 9d8f3d203327a7ee9ba92814e1fd8a7d1b9c421b Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 11 Sep 2014 18:06:30 -0700 Subject: [PATCH 0525/3378] Fix prof regressions. Don't use atomic_add_uint64(), because it isn't available on 32-bit platforms. Fix forking support functions to manage all prof-related mutexes. These regressions were introduced by 602c8e0971160e4b85b08b16cf8a2375aa24bc04 (Implement per thread heap profiling.), which did not make it into any releases prior to these fixes. --- src/prof.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/src/prof.c b/src/prof.c index 9495afc4..a773e224 100644 --- a/src/prof.c +++ b/src/prof.c @@ -68,6 +68,7 @@ static prof_tdata_tree_t tdatas; static malloc_mutex_t tdatas_mtx; static uint64_t next_thr_uid; +static malloc_mutex_t next_thr_uid_mtx; static malloc_mutex_t prof_dump_seq_mtx; static uint64_t prof_dump_seq; @@ -1498,8 +1499,14 @@ prof_bt_keycomp(const void *k1, const void *k2) JEMALLOC_INLINE_C uint64_t prof_thr_uid_alloc(void) { + uint64_t thr_uid; - return (atomic_add_uint64(&next_thr_uid, 1) - 1); + malloc_mutex_lock(&next_thr_uid_mtx); + thr_uid = next_thr_uid; + next_thr_uid++; + malloc_mutex_unlock(&next_thr_uid_mtx); + + return (thr_uid); } static prof_tdata_t * @@ -1785,6 +1792,8 @@ prof_boot2(void) return (true); next_thr_uid = 0; + if (malloc_mutex_init(&next_thr_uid_mtx)) + return (true); if (malloc_mutex_init(&prof_dump_seq_mtx)) return (true); @@ -1836,10 +1845,14 @@ prof_prefork(void) if (opt_prof) { unsigned i; + malloc_mutex_prefork(&tdatas_mtx); malloc_mutex_prefork(&bt2gctx_mtx); + malloc_mutex_prefork(&next_thr_uid_mtx); malloc_mutex_prefork(&prof_dump_seq_mtx); for (i = 0; i < PROF_NCTX_LOCKS; i++) malloc_mutex_prefork(&gctx_locks[i]); + for (i = 0; i < PROF_NTDATA_LOCKS; i++) + malloc_mutex_prefork(&tdata_locks[i]); } } @@ -1850,10 +1863,14 @@ prof_postfork_parent(void) if (opt_prof) { unsigned i; + for (i = 0; i < PROF_NTDATA_LOCKS; i++) + malloc_mutex_postfork_parent(&tdata_locks[i]); for (i = 0; i < PROF_NCTX_LOCKS; i++) malloc_mutex_postfork_parent(&gctx_locks[i]); malloc_mutex_postfork_parent(&prof_dump_seq_mtx); + malloc_mutex_postfork_parent(&next_thr_uid_mtx); malloc_mutex_postfork_parent(&bt2gctx_mtx); + malloc_mutex_postfork_parent(&tdatas_mtx); } } @@ -1864,10 +1881,14 @@ prof_postfork_child(void) if (opt_prof) { unsigned i; + for (i = 0; i < PROF_NTDATA_LOCKS; i++) + malloc_mutex_postfork_child(&tdata_locks[i]); for (i = 0; i < PROF_NCTX_LOCKS; i++) malloc_mutex_postfork_child(&gctx_locks[i]); malloc_mutex_postfork_child(&prof_dump_seq_mtx); + malloc_mutex_postfork_child(&next_thr_uid_mtx); malloc_mutex_postfork_child(&bt2gctx_mtx); + malloc_mutex_postfork_child(&tdatas_mtx); } } From ebca69c9fb07dd7b0be7aa008215389581b193a0 Mon Sep 17 00:00:00 2001 From: Valerii Hiora Date: Fri, 12 Sep 2014 07:24:28 +0300 Subject: [PATCH 0526/3378] Fixed iOS build after OR1 changes --- config.sub | 3 +++ 1 file changed, 3 insertions(+) diff --git a/config.sub b/config.sub index d654d03c..0ccff770 100755 --- a/config.sub +++ b/config.sub @@ -1404,6 +1404,9 @@ case $os in -mac*) os=`echo $os | sed -e 's|mac|macos|'` ;; + # Apple iOS + -ios*) + ;; -linux-dietlibc) os=-linux-dietlibc ;; From f1cf3ea4753260d37c9a43463bae2140e00e16f0 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Tue, 16 Sep 2014 04:42:33 -0400 Subject: [PATCH 0527/3378] fix tls_model autoconf test It has an unused variable, so it was always failing (at least with gcc 4.9.1). Alternatively, the `-Werror` flag could be removed if it isn't strictly necessary. --- configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index d221876c..1d2e8902 100644 --- a/configure.ac +++ b/configure.ac @@ -411,7 +411,7 @@ SAVED_CFLAGS="${CFLAGS}" JE_CFLAGS_APPEND([-Werror]) JE_COMPILABLE([tls_model attribute], [], [static __thread int - __attribute__((tls_model("initial-exec"))) foo; + __attribute__((tls_model("initial-exec"), unused)) foo; foo = 0;], [je_cv_tls_model]) CFLAGS="${SAVED_CFLAGS}" From 913e9a8a853a693c5b5d6c13ab86f1b46a3404f7 Mon Sep 17 00:00:00 2001 From: Nick White Date: Fri, 19 Sep 2014 22:01:23 +0100 Subject: [PATCH 0528/3378] Generate a pkg-config file --- Makefile.in | 10 +++++++++- configure.ac | 3 +++ jemalloc.pc.in | 11 +++++++++++ 3 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 jemalloc.pc.in diff --git a/Makefile.in b/Makefile.in index ac56d8fa..41328b95 100644 --- a/Makefile.in +++ b/Makefile.in @@ -101,6 +101,7 @@ DSOS := $(objroot)lib/$(LIBJEMALLOC).$(SOREV) ifneq ($(SOREV),$(SO)) DSOS += $(objroot)lib/$(LIBJEMALLOC).$(SO) endif +PC := $(srcroot)jemalloc.pc MAN3 := $(objroot)doc/jemalloc$(install_suffix).3 DOCS_XML := $(objroot)doc/jemalloc$(install_suffix).xml DOCS_HTML := $(DOCS_XML:$(objroot)%.xml=$(srcroot)%.html) @@ -302,7 +303,14 @@ install_lib_static: $(STATIC_LIBS) install -m 755 $$l $(LIBDIR); \ done -install_lib: install_lib_shared install_lib_static +install_lib_pc: $(PC) + install -d $(LIBDIR)/pkgconfig + @for l in $(PC); do \ + echo "install -m 644 $$l $(LIBDIR)/pkgconfig"; \ + install -m 644 $$l $(LIBDIR)/pkgconfig; \ +done + +install_lib: install_lib_shared install_lib_static install_lib_pc install_doc_html: install -d $(DATADIR)/doc/jemalloc$(install_suffix) diff --git a/configure.ac b/configure.ac index 1d2e8902..2d5b56a4 100644 --- a/configure.ac +++ b/configure.ac @@ -540,6 +540,7 @@ je_="je_" AC_SUBST([je_]) cfgoutputs_in="Makefile.in" +cfgoutputs_in="${cfgoutputs_in} jemalloc.pc.in" cfgoutputs_in="${cfgoutputs_in} doc/html.xsl.in" cfgoutputs_in="${cfgoutputs_in} doc/manpages.xsl.in" cfgoutputs_in="${cfgoutputs_in} doc/jemalloc.xml.in" @@ -551,6 +552,7 @@ cfgoutputs_in="${cfgoutputs_in} test/test.sh.in" cfgoutputs_in="${cfgoutputs_in} test/include/test/jemalloc_test.h.in" cfgoutputs_out="Makefile" +cfgoutputs_out="${cfgoutputs_out} jemalloc.pc" cfgoutputs_out="${cfgoutputs_out} doc/html.xsl" cfgoutputs_out="${cfgoutputs_out} doc/manpages.xsl" cfgoutputs_out="${cfgoutputs_out} doc/jemalloc.xml" @@ -562,6 +564,7 @@ cfgoutputs_out="${cfgoutputs_out} test/test.sh" cfgoutputs_out="${cfgoutputs_out} test/include/test/jemalloc_test.h" cfgoutputs_tup="Makefile" +cfgoutputs_tup="${cfgoutputs_tup} jemalloc.pc:jemalloc.pc.in" cfgoutputs_tup="${cfgoutputs_tup} doc/html.xsl:doc/html.xsl.in" cfgoutputs_tup="${cfgoutputs_tup} doc/manpages.xsl:doc/manpages.xsl.in" cfgoutputs_tup="${cfgoutputs_tup} doc/jemalloc.xml:doc/jemalloc.xml.in" diff --git a/jemalloc.pc.in b/jemalloc.pc.in new file mode 100644 index 00000000..af3f945d --- /dev/null +++ b/jemalloc.pc.in @@ -0,0 +1,11 @@ +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +includedir=@includedir@ + +Name: jemalloc +Description: A general purpose malloc(3) implementation that emphasizes fragmentation avoidance and scalable concurrency support. +URL: http://www.canonware.com/jemalloc +Version: @jemalloc_version@ +Cflags: -I${includedir} +Libs: -L${libdir} -ljemalloc From 42f59559384ddb1af22607ddb3fe766b7b6ab0b7 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sun, 21 Sep 2014 21:40:38 -0700 Subject: [PATCH 0529/3378] Ignore jemalloc.pc . --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 79d454f2..fd68315d 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,8 @@ /doc/jemalloc.html /doc/jemalloc.3 +/jemalloc.pc + /lib/ /Makefile From 5460aa6f6676c7f253bfcb75c028dfd38cae8aaf Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 22 Sep 2014 21:09:23 -0700 Subject: [PATCH 0530/3378] Convert all tsd variables to reside in a single tsd structure. --- include/jemalloc/internal/arena.h | 48 +-- include/jemalloc/internal/ckh.h | 8 +- include/jemalloc/internal/huge.h | 10 +- .../jemalloc/internal/jemalloc_internal.h.in | 168 ++++---- include/jemalloc/internal/private_symbols.txt | 96 ++--- include/jemalloc/internal/prof.h | 78 ++-- include/jemalloc/internal/quarantine.h | 21 +- include/jemalloc/internal/tcache.h | 88 ++--- include/jemalloc/internal/tsd.h | 341 ++++++++++------ src/arena.c | 23 +- src/ckh.c | 38 +- src/ctl.c | 93 ++++- src/huge.c | 21 +- src/jemalloc.c | 366 ++++++++++-------- src/prof.c | 244 ++++++------ src/quarantine.c | 99 ++--- src/rtree.c | 6 +- src/tcache.c | 101 ++--- src/tsd.c | 51 ++- test/unit/ckh.c | 46 ++- test/unit/rtree.c | 8 +- test/unit/tsd.c | 8 +- 22 files changed, 1027 insertions(+), 935 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index bfb0b3cf..f1a12057 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -419,9 +419,9 @@ extern arena_ralloc_junk_large_t *arena_ralloc_junk_large; #endif bool arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, bool zero); -void *arena_ralloc(arena_t *arena, void *ptr, size_t oldsize, size_t size, - size_t extra, size_t alignment, bool zero, bool try_tcache_alloc, - bool try_tcache_dalloc); +void *arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, + size_t size, size_t extra, size_t alignment, bool zero, + bool try_tcache_alloc, bool try_tcache_dalloc); dss_prec_t arena_dss_prec_get(arena_t *arena); bool arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec); void arena_stats_merge(arena_t *arena, const char **dss, size_t *nactive, @@ -485,10 +485,12 @@ unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr); prof_tctx_t *arena_prof_tctx_get(const void *ptr); void arena_prof_tctx_set(const void *ptr, prof_tctx_t *tctx); -void *arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache); +void *arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, + bool try_tcache); size_t arena_salloc(const void *ptr, bool demote); -void arena_dalloc(arena_chunk_t *chunk, void *ptr, bool try_tcache); -void arena_sdalloc(arena_chunk_t *chunk, void *ptr, size_t size, +void arena_dalloc(tsd_t *tsd, arena_chunk_t *chunk, void *ptr, + bool try_tcache); +void arena_sdalloc(tsd_t *tsd, arena_chunk_t *chunk, void *ptr, size_t size, bool try_tcache); #endif @@ -1053,7 +1055,8 @@ arena_prof_tctx_set(const void *ptr, prof_tctx_t *tctx) } JEMALLOC_ALWAYS_INLINE void * -arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache) +arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, + bool try_tcache) { tcache_t *tcache; @@ -1061,12 +1064,12 @@ arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache) assert(size <= arena_maxclass); if (likely(size <= SMALL_MAXCLASS)) { - if (likely(try_tcache) && likely((tcache = tcache_get(true)) != - NULL)) + if (likely(try_tcache) && likely((tcache = tcache_get(tsd, + true)) != NULL)) return (tcache_alloc_small(tcache, size, zero)); else { - return (arena_malloc_small(choose_arena(arena), size, - zero)); + return (arena_malloc_small(choose_arena(tsd, arena), + size, zero)); } } else { /* @@ -1074,11 +1077,11 @@ arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache) * infinite recursion during tcache initialization. */ if (try_tcache && size <= tcache_maxclass && likely((tcache = - tcache_get(true)) != NULL)) + tcache_get(tsd, true)) != NULL)) return (tcache_alloc_large(tcache, size, zero)); else { - return (arena_malloc_large(choose_arena(arena), size, - zero)); + return (arena_malloc_large(choose_arena(tsd, arena), + size, zero)); } } } @@ -1128,7 +1131,7 @@ arena_salloc(const void *ptr, bool demote) } JEMALLOC_ALWAYS_INLINE void -arena_dalloc(arena_chunk_t *chunk, void *ptr, bool try_tcache) +arena_dalloc(tsd_t *tsd, arena_chunk_t *chunk, void *ptr, bool try_tcache) { size_t pageind, mapbits; tcache_t *tcache; @@ -1141,8 +1144,8 @@ arena_dalloc(arena_chunk_t *chunk, void *ptr, bool try_tcache) assert(arena_mapbits_allocated_get(chunk, pageind) != 0); if (likely((mapbits & CHUNK_MAP_LARGE) == 0)) { /* Small allocation. */ - if (likely(try_tcache) && likely((tcache = tcache_get(false)) != - NULL)) { + if (likely(try_tcache) && likely((tcache = tcache_get(tsd, + false)) != NULL)) { size_t binind = arena_ptr_small_binind_get(ptr, mapbits); tcache_dalloc_small(tcache, ptr, binind); @@ -1154,7 +1157,7 @@ arena_dalloc(arena_chunk_t *chunk, void *ptr, bool try_tcache) assert(((uintptr_t)ptr & PAGE_MASK) == 0); if (try_tcache && size <= tcache_maxclass && likely((tcache = - tcache_get(false)) != NULL)) { + tcache_get(tsd, false)) != NULL)) { tcache_dalloc_large(tcache, ptr, size); } else arena_dalloc_large(chunk->arena, chunk, ptr); @@ -1162,7 +1165,8 @@ arena_dalloc(arena_chunk_t *chunk, void *ptr, bool try_tcache) } JEMALLOC_ALWAYS_INLINE void -arena_sdalloc(arena_chunk_t *chunk, void *ptr, size_t size, bool try_tcache) +arena_sdalloc(tsd_t *tsd, arena_chunk_t *chunk, void *ptr, size_t size, + bool try_tcache) { tcache_t *tcache; @@ -1171,8 +1175,8 @@ arena_sdalloc(arena_chunk_t *chunk, void *ptr, size_t size, bool try_tcache) if (likely(size <= SMALL_MAXCLASS)) { /* Small allocation. */ - if (likely(try_tcache) && likely((tcache = tcache_get(false)) != - NULL)) { + if (likely(try_tcache) && likely((tcache = tcache_get(tsd, + false)) != NULL)) { size_t binind = small_size2bin(size); tcache_dalloc_small(tcache, ptr, binind); } else { @@ -1184,7 +1188,7 @@ arena_sdalloc(arena_chunk_t *chunk, void *ptr, size_t size, bool try_tcache) assert(((uintptr_t)ptr & PAGE_MASK) == 0); if (try_tcache && size <= tcache_maxclass && (tcache = - tcache_get(false)) != NULL) { + tcache_get(tsd, false)) != NULL) { tcache_dalloc_large(tcache, ptr, size); } else arena_dalloc_large(chunk->arena, chunk, ptr); diff --git a/include/jemalloc/internal/ckh.h b/include/jemalloc/internal/ckh.h index 58712a6a..75c1c979 100644 --- a/include/jemalloc/internal/ckh.h +++ b/include/jemalloc/internal/ckh.h @@ -66,13 +66,13 @@ struct ckh_s { /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -bool ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, +bool ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp); -void ckh_delete(ckh_t *ckh); +void ckh_delete(tsd_t *tsd, ckh_t *ckh); size_t ckh_count(ckh_t *ckh); bool ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data); -bool ckh_insert(ckh_t *ckh, const void *key, const void *data); -bool ckh_remove(ckh_t *ckh, const void *searchkey, void **key, +bool ckh_insert(tsd_t *tsd, ckh_t *ckh, const void *key, const void *data); +bool ckh_remove(tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key, void **data); bool ckh_search(ckh_t *ckh, const void *seachkey, void **key, void **data); void ckh_string_hash(const void *key, size_t r_hash[2]); diff --git a/include/jemalloc/internal/huge.h b/include/jemalloc/internal/huge.h index 2ec77520..b061e15b 100644 --- a/include/jemalloc/internal/huge.h +++ b/include/jemalloc/internal/huge.h @@ -9,12 +9,14 @@ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -void *huge_malloc(arena_t *arena, size_t size, bool zero); -void *huge_palloc(arena_t *arena, size_t size, size_t alignment, bool zero); +void *huge_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero); +void *huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment, + bool zero); bool huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra); -void *huge_ralloc(arena_t *arena, void *ptr, size_t oldsize, size_t size, - size_t extra, size_t alignment, bool zero, bool try_tcache_dalloc); +void *huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, + size_t size, size_t extra, size_t alignment, bool zero, + bool try_tcache_dalloc); #ifdef JEMALLOC_JET typedef void (huge_dalloc_junk_t)(void *, size_t); extern huge_dalloc_junk_t *huge_dalloc_junk; diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index a380a414..bff2bd27 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -350,7 +350,6 @@ static const bool config_ivsalloc = #include "jemalloc/internal/stats.h" #include "jemalloc/internal/ctl.h" #include "jemalloc/internal/mutex.h" -#include "jemalloc/internal/tsd.h" #include "jemalloc/internal/mb.h" #include "jemalloc/internal/bitmap.h" #include "jemalloc/internal/extent.h" @@ -364,15 +363,7 @@ static const bool config_ivsalloc = #include "jemalloc/internal/quarantine.h" #include "jemalloc/internal/prof.h" -typedef struct { - uint64_t allocated; - uint64_t deallocated; -} thread_allocated_t; -/* - * The JEMALLOC_ARG_CONCAT() wrapper is necessary to pass {0, 0} via a cpp macro - * argument. - */ -#define THREAD_ALLOCATED_INITIALIZER JEMALLOC_ARG_CONCAT({0, 0}) +#include "jemalloc/internal/tsd.h" #undef JEMALLOC_H_STRUCTS /******************************************************************************/ @@ -407,8 +398,10 @@ extern unsigned narenas_total; extern unsigned narenas_auto; /* Read-only after initialization. */ arena_t *arenas_extend(unsigned ind); -void arenas_cleanup(void *arg); -arena_t *choose_arena_hard(void); +arena_t *choose_arena_hard(tsd_t *tsd); +void thread_allocated_cleanup(tsd_t *tsd); +void thread_deallocated_cleanup(tsd_t *tsd); +void arena_cleanup(tsd_t *tsd); void jemalloc_prefork(void); void jemalloc_postfork_parent(void); void jemalloc_postfork_child(void); @@ -422,7 +415,6 @@ void jemalloc_postfork_child(void); #include "jemalloc/internal/stats.h" #include "jemalloc/internal/ctl.h" #include "jemalloc/internal/mutex.h" -#include "jemalloc/internal/tsd.h" #include "jemalloc/internal/mb.h" #include "jemalloc/internal/bitmap.h" #include "jemalloc/internal/extent.h" @@ -435,6 +427,7 @@ void jemalloc_postfork_child(void); #include "jemalloc/internal/hash.h" #include "jemalloc/internal/quarantine.h" #include "jemalloc/internal/prof.h" +#include "jemalloc/internal/tsd.h" #undef JEMALLOC_H_EXTERNS /******************************************************************************/ @@ -465,23 +458,13 @@ void jemalloc_postfork_child(void); #undef JEMALLOC_ARENA_INLINE_A #ifndef JEMALLOC_ENABLE_INLINE -malloc_tsd_protos(JEMALLOC_ATTR(unused), arenas, arena_t *) - size_t s2u(size_t size); size_t sa2u(size_t size, size_t alignment); unsigned narenas_total_get(void); -arena_t *choose_arena(arena_t *arena); +arena_t *choose_arena(tsd_t *tsd, arena_t *arena); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) -/* - * Map of pthread_self() --> arenas[???], used for selecting an arena to use - * for allocations. - */ -malloc_tsd_externs(arenas, arena_t *) -malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, arenas, arena_t *, NULL, - arenas_cleanup) - /* * Compute usable size that would result from allocating an object with the * specified size. @@ -589,15 +572,15 @@ narenas_total_get(void) /* Choose an arena based on a per-thread value. */ JEMALLOC_INLINE arena_t * -choose_arena(arena_t *arena) +choose_arena(tsd_t *tsd, arena_t *arena) { arena_t *ret; if (arena != NULL) return (arena); - if ((ret = *arenas_tsd_get()) == NULL) { - ret = choose_arena_hard(); + if (unlikely((ret = tsd_arena_get(tsd)) == NULL)) { + ret = choose_arena_hard(tsd); assert(ret != NULL); } @@ -622,72 +605,72 @@ choose_arena(arena_t *arena) #include "jemalloc/internal/quarantine.h" #ifndef JEMALLOC_ENABLE_INLINE -void *imalloct(size_t size, bool try_tcache, arena_t *arena); -void *imalloc(size_t size); -void *icalloct(size_t size, bool try_tcache, arena_t *arena); -void *icalloc(size_t size); -void *ipalloct(size_t usize, size_t alignment, bool zero, bool try_tcache, - arena_t *arena); -void *ipalloc(size_t usize, size_t alignment, bool zero); +void *imalloct(tsd_t *tsd, size_t size, bool try_tcache, arena_t *arena); +void *imalloc(tsd_t *tsd, size_t size); +void *icalloct(tsd_t *tsd, size_t size, bool try_tcache, arena_t *arena); +void *icalloc(tsd_t *tsd, size_t size); +void *ipalloct(tsd_t *tsd, size_t usize, size_t alignment, bool zero, + bool try_tcache, arena_t *arena); +void *ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero); size_t isalloc(const void *ptr, bool demote); size_t ivsalloc(const void *ptr, bool demote); size_t u2rz(size_t usize); size_t p2rz(const void *ptr); -void idalloct(void *ptr, bool try_tcache); -void isdalloct(void *ptr, size_t size, bool try_tcache); -void idalloc(void *ptr); -void iqalloc(void *ptr, bool try_tcache); -void isqalloc(void *ptr, size_t size, bool try_tcache); -void *iralloct_realign(void *ptr, size_t oldsize, size_t size, size_t extra, - size_t alignment, bool zero, bool try_tcache_alloc, bool try_tcache_dalloc, - arena_t *arena); -void *iralloct(void *ptr, size_t size, size_t alignment, bool zero, - bool try_tcache_alloc, bool try_tcache_dalloc, arena_t *arena); -void *iralloc(void *ptr, size_t size, size_t alignment, bool zero); +void idalloct(tsd_t *tsd, void *ptr, bool try_tcache); +void isdalloct(tsd_t *tsd, void *ptr, size_t size, bool try_tcache); +void idalloc(tsd_t *tsd, void *ptr); +void iqalloc(tsd_t *tsd, void *ptr, bool try_tcache); +void isqalloc(tsd_t *tsd, void *ptr, size_t size, bool try_tcache); +void *iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, + size_t extra, size_t alignment, bool zero, bool try_tcache_alloc, + bool try_tcache_dalloc, arena_t *arena); +void *iralloct(tsd_t *tsd, void *ptr, size_t size, size_t alignment, + bool zero, bool try_tcache_alloc, bool try_tcache_dalloc, arena_t *arena); +void *iralloc(tsd_t *tsd, void *ptr, size_t size, size_t alignment, + bool zero); bool ixalloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero); -malloc_tsd_protos(JEMALLOC_ATTR(unused), thread_allocated, thread_allocated_t) #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) JEMALLOC_ALWAYS_INLINE void * -imalloct(size_t size, bool try_tcache, arena_t *arena) +imalloct(tsd_t *tsd, size_t size, bool try_tcache, arena_t *arena) { assert(size != 0); if (size <= arena_maxclass) - return (arena_malloc(arena, size, false, try_tcache)); + return (arena_malloc(tsd, arena, size, false, try_tcache)); else - return (huge_malloc(arena, size, false)); + return (huge_malloc(tsd, arena, size, false)); } JEMALLOC_ALWAYS_INLINE void * -imalloc(size_t size) +imalloc(tsd_t *tsd, size_t size) { - return (imalloct(size, true, NULL)); + return (imalloct(tsd, size, true, NULL)); } JEMALLOC_ALWAYS_INLINE void * -icalloct(size_t size, bool try_tcache, arena_t *arena) +icalloct(tsd_t *tsd, size_t size, bool try_tcache, arena_t *arena) { if (size <= arena_maxclass) - return (arena_malloc(arena, size, true, try_tcache)); + return (arena_malloc(tsd, arena, size, true, try_tcache)); else - return (huge_malloc(arena, size, true)); + return (huge_malloc(tsd, arena, size, true)); } JEMALLOC_ALWAYS_INLINE void * -icalloc(size_t size) +icalloc(tsd_t *tsd, size_t size) { - return (icalloct(size, true, NULL)); + return (icalloct(tsd, size, true, NULL)); } JEMALLOC_ALWAYS_INLINE void * -ipalloct(size_t usize, size_t alignment, bool zero, bool try_tcache, +ipalloct(tsd_t *tsd, size_t usize, size_t alignment, bool zero, bool try_tcache, arena_t *arena) { void *ret; @@ -696,15 +679,15 @@ ipalloct(size_t usize, size_t alignment, bool zero, bool try_tcache, assert(usize == sa2u(usize, alignment)); if (usize <= arena_maxclass && alignment <= PAGE) - ret = arena_malloc(arena, usize, zero, try_tcache); + ret = arena_malloc(tsd, arena, usize, zero, try_tcache); else { if (usize <= arena_maxclass) { - ret = arena_palloc(choose_arena(arena), usize, + ret = arena_palloc(choose_arena(tsd, arena), usize, alignment, zero); } else if (alignment <= chunksize) - ret = huge_malloc(arena, usize, zero); + ret = huge_malloc(tsd, arena, usize, zero); else - ret = huge_palloc(arena, usize, alignment, zero); + ret = huge_palloc(tsd, arena, usize, alignment, zero); } assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret); @@ -712,10 +695,10 @@ ipalloct(size_t usize, size_t alignment, bool zero, bool try_tcache, } JEMALLOC_ALWAYS_INLINE void * -ipalloc(size_t usize, size_t alignment, bool zero) +ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero) { - return (ipalloct(usize, alignment, zero, true, NULL)); + return (ipalloct(tsd, usize, alignment, zero, true, NULL)); } /* @@ -776,7 +759,7 @@ p2rz(const void *ptr) } JEMALLOC_ALWAYS_INLINE void -idalloct(void *ptr, bool try_tcache) +idalloct(tsd_t *tsd, void *ptr, bool try_tcache) { arena_chunk_t *chunk; @@ -784,13 +767,13 @@ idalloct(void *ptr, bool try_tcache) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk != ptr) - arena_dalloc(chunk, ptr, try_tcache); + arena_dalloc(tsd, chunk, ptr, try_tcache); else huge_dalloc(ptr); } JEMALLOC_ALWAYS_INLINE void -isdalloct(void *ptr, size_t size, bool try_tcache) +isdalloct(tsd_t *tsd, void *ptr, size_t size, bool try_tcache) { arena_chunk_t *chunk; @@ -798,42 +781,42 @@ isdalloct(void *ptr, size_t size, bool try_tcache) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk != ptr) - arena_sdalloc(chunk, ptr, size, try_tcache); + arena_sdalloc(tsd, chunk, ptr, size, try_tcache); else huge_dalloc(ptr); } JEMALLOC_ALWAYS_INLINE void -idalloc(void *ptr) +idalloc(tsd_t *tsd, void *ptr) { - idalloct(ptr, true); + idalloct(tsd, ptr, true); } JEMALLOC_ALWAYS_INLINE void -iqalloc(void *ptr, bool try_tcache) +iqalloc(tsd_t *tsd, void *ptr, bool try_tcache) { if (config_fill && unlikely(opt_quarantine)) - quarantine(ptr); + quarantine(tsd, ptr); else - idalloct(ptr, try_tcache); + idalloct(tsd, ptr, try_tcache); } JEMALLOC_ALWAYS_INLINE void -isqalloc(void *ptr, size_t size, bool try_tcache) +isqalloc(tsd_t *tsd, void *ptr, size_t size, bool try_tcache) { if (config_fill && unlikely(opt_quarantine)) - quarantine(ptr); + quarantine(tsd, ptr); else - isdalloct(ptr, size, try_tcache); + isdalloct(tsd, ptr, size, try_tcache); } JEMALLOC_ALWAYS_INLINE void * -iralloct_realign(void *ptr, size_t oldsize, size_t size, size_t extra, - size_t alignment, bool zero, bool try_tcache_alloc, bool try_tcache_dalloc, - arena_t *arena) +iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, + size_t extra, size_t alignment, bool zero, bool try_tcache_alloc, + bool try_tcache_dalloc, arena_t *arena) { void *p; size_t usize, copysize; @@ -841,7 +824,7 @@ iralloct_realign(void *ptr, size_t oldsize, size_t size, size_t extra, usize = sa2u(size + extra, alignment); if (usize == 0) return (NULL); - p = ipalloct(usize, alignment, zero, try_tcache_alloc, arena); + p = ipalloct(tsd, usize, alignment, zero, try_tcache_alloc, arena); if (p == NULL) { if (extra == 0) return (NULL); @@ -849,7 +832,8 @@ iralloct_realign(void *ptr, size_t oldsize, size_t size, size_t extra, usize = sa2u(size, alignment); if (usize == 0) return (NULL); - p = ipalloct(usize, alignment, zero, try_tcache_alloc, arena); + p = ipalloct(tsd, usize, alignment, zero, try_tcache_alloc, + arena); if (p == NULL) return (NULL); } @@ -859,12 +843,12 @@ iralloct_realign(void *ptr, size_t oldsize, size_t size, size_t extra, */ copysize = (size < oldsize) ? size : oldsize; memcpy(p, ptr, copysize); - iqalloc(ptr, try_tcache_dalloc); + iqalloc(tsd, ptr, try_tcache_dalloc); return (p); } JEMALLOC_ALWAYS_INLINE void * -iralloct(void *ptr, size_t size, size_t alignment, bool zero, +iralloct(tsd_t *tsd, void *ptr, size_t size, size_t alignment, bool zero, bool try_tcache_alloc, bool try_tcache_dalloc, arena_t *arena) { size_t oldsize; @@ -880,24 +864,24 @@ iralloct(void *ptr, size_t size, size_t alignment, bool zero, * Existing object alignment is inadequate; allocate new space * and copy. */ - return (iralloct_realign(ptr, oldsize, size, 0, alignment, zero, - try_tcache_alloc, try_tcache_dalloc, arena)); + return (iralloct_realign(tsd, ptr, oldsize, size, 0, alignment, + zero, try_tcache_alloc, try_tcache_dalloc, arena)); } if (size <= arena_maxclass) { - return (arena_ralloc(arena, ptr, oldsize, size, 0, alignment, - zero, try_tcache_alloc, try_tcache_dalloc)); + return (arena_ralloc(tsd, arena, ptr, oldsize, size, 0, + alignment, zero, try_tcache_alloc, try_tcache_dalloc)); } else { - return (huge_ralloc(arena, ptr, oldsize, size, 0, alignment, - zero, try_tcache_dalloc)); + return (huge_ralloc(tsd, arena, ptr, oldsize, size, 0, + alignment, zero, try_tcache_dalloc)); } } JEMALLOC_ALWAYS_INLINE void * -iralloc(void *ptr, size_t size, size_t alignment, bool zero) +iralloc(tsd_t *tsd, void *ptr, size_t size, size_t alignment, bool zero) { - return (iralloct(ptr, size, alignment, zero, true, true, NULL)); + return (iralloct(tsd, ptr, size, alignment, zero, true, true, NULL)); } JEMALLOC_ALWAYS_INLINE bool @@ -920,10 +904,6 @@ ixalloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero) else return (huge_ralloc_no_move(ptr, oldsize, size, extra)); } - -malloc_tsd_externs(thread_allocated, thread_allocated_t) -malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, thread_allocated, thread_allocated_t, - THREAD_ALLOCATED_INITIALIZER, malloc_tsd_no_cleanup) #endif #include "jemalloc/internal/prof.h" diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index b8990177..84d48d19 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -8,6 +8,7 @@ arena_bitselm_get arena_boot arena_chunk_alloc_huge arena_chunk_dalloc_huge +arena_cleanup arena_dalloc arena_dalloc_bin arena_dalloc_bin_locked @@ -65,19 +66,9 @@ arena_sdalloc arena_stats_merge arena_tcache_fill_small arenas -arenas_booted arenas_cleanup arenas_extend -arenas_initialized arenas_lock -arenas_tls -arenas_tsd -arenas_tsd_boot -arenas_tsd_cleanup_wrapper -arenas_tsd_get -arenas_tsd_get_wrapper -arenas_tsd_init_head -arenas_tsd_set atomic_add_u atomic_add_uint32 atomic_add_uint64 @@ -317,37 +308,17 @@ prof_sample_accum_update prof_sample_threshold_update prof_tctx_get prof_tctx_set -prof_tdata_booted prof_tdata_cleanup prof_tdata_get prof_tdata_init -prof_tdata_initialized -prof_tdata_tls -prof_tdata_tsd -prof_tdata_tsd_boot -prof_tdata_tsd_cleanup_wrapper -prof_tdata_tsd_get -prof_tdata_tsd_get_wrapper -prof_tdata_tsd_init_head -prof_tdata_tsd_set prof_thread_active_get prof_thread_active_set prof_thread_name_get prof_thread_name_set quarantine quarantine_alloc_hook -quarantine_boot -quarantine_booted quarantine_cleanup quarantine_init -quarantine_tls -quarantine_tsd -quarantine_tsd_boot -quarantine_tsd_cleanup_wrapper -quarantine_tsd_get -quarantine_tsd_get_wrapper -quarantine_tsd_init_head -quarantine_tsd_set register_zone rtree_delete rtree_get @@ -386,55 +357,52 @@ tcache_arena_dissociate tcache_bin_flush_large tcache_bin_flush_small tcache_bin_info -tcache_boot0 -tcache_boot1 -tcache_booted +tcache_boot +tcache_cleanup tcache_create tcache_dalloc_large tcache_dalloc_small -tcache_destroy -tcache_enabled_booted +tcache_enabled_cleanup tcache_enabled_get -tcache_enabled_initialized tcache_enabled_set -tcache_enabled_tls -tcache_enabled_tsd -tcache_enabled_tsd_boot -tcache_enabled_tsd_cleanup_wrapper -tcache_enabled_tsd_get -tcache_enabled_tsd_get_wrapper -tcache_enabled_tsd_init_head -tcache_enabled_tsd_set tcache_event tcache_event_hard tcache_flush tcache_get tcache_get_hard -tcache_initialized tcache_maxclass tcache_salloc tcache_stats_merge -tcache_thread_cleanup -tcache_tls -tcache_tsd -tcache_tsd_boot -tcache_tsd_cleanup_wrapper -tcache_tsd_get -tcache_tsd_get_wrapper -tcache_tsd_init_head -tcache_tsd_set -thread_allocated_booted -thread_allocated_initialized -thread_allocated_tls -thread_allocated_tsd -thread_allocated_tsd_boot -thread_allocated_tsd_cleanup_wrapper -thread_allocated_tsd_get -thread_allocated_tsd_get_wrapper -thread_allocated_tsd_init_head -thread_allocated_tsd_set +thread_allocated_cleanup +thread_deallocated_cleanup +tsd_booted +tsd_arena_get +tsd_arena_set +tsd_boot +tsd_cleanup +tsd_cleanup_wrapper +tsd_get +tsd_get_wrapper +tsd_initialized tsd_init_check_recursion tsd_init_finish +tsd_init_head +tsd_quarantine_get +tsd_quarantine_set +tsd_set +tsd_tcache_enabled_get +tsd_tcache_enabled_set +tsd_tcache_get +tsd_tcache_set +tsd_tls +tsd_tsd +tsd_prof_tdata_get +tsd_prof_tdata_set +tsd_thread_allocated_get +tsd_thread_allocated_set +tsd_thread_deallocated_get +tsd_thread_deallocated_set +tsd_tryget u2rz valgrind_freelike_block valgrind_make_mem_defined diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index a1e7ac5e..b8a8b419 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -248,13 +248,13 @@ extern uint64_t prof_interval; */ extern size_t lg_prof_sample; -void prof_alloc_rollback(prof_tctx_t *tctx, bool updated); +void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated); void prof_malloc_sample_object(const void *ptr, size_t usize, prof_tctx_t *tctx); -void prof_free_sampled_object(size_t usize, prof_tctx_t *tctx); +void prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx); void bt_init(prof_bt_t *bt, void **vec); void prof_backtrace(prof_bt_t *bt); -prof_tctx_t *prof_lookup(prof_bt_t *bt); +prof_tctx_t *prof_lookup(tsd_t *tsd, prof_bt_t *bt); #ifdef JEMALLOC_JET size_t prof_bt_count(void); typedef int (prof_dump_open_t)(bool, const char *); @@ -263,12 +263,12 @@ extern prof_dump_open_t *prof_dump_open; void prof_idump(void); bool prof_mdump(const char *filename); void prof_gdump(void); -prof_tdata_t *prof_tdata_init(void); -prof_tdata_t *prof_tdata_reinit(prof_tdata_t *tdata); -void prof_reset(size_t lg_sample); -void prof_tdata_cleanup(void *arg); +prof_tdata_t *prof_tdata_init(tsd_t *tsd); +prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata); +void prof_reset(tsd_t *tsd, size_t lg_sample); +void prof_tdata_cleanup(tsd_t *tsd); const char *prof_thread_name_get(void); -bool prof_thread_name_set(const char *thread_name); +bool prof_thread_name_set(tsd_t *tsd, const char *thread_name); bool prof_thread_active_get(void); bool prof_thread_active_set(bool active); void prof_boot0(void); @@ -284,43 +284,38 @@ void prof_sample_threshold_update(prof_tdata_t *tdata); #ifdef JEMALLOC_H_INLINES #ifndef JEMALLOC_ENABLE_INLINE -malloc_tsd_protos(JEMALLOC_ATTR(unused), prof_tdata, prof_tdata_t *) - -prof_tdata_t *prof_tdata_get(bool create); -bool prof_sample_accum_update(size_t usize, bool commit, +prof_tdata_t *prof_tdata_get(tsd_t *tsd, bool create); +bool prof_sample_accum_update(tsd_t *tsd, size_t usize, bool commit, prof_tdata_t **tdata_out); -prof_tctx_t *prof_alloc_prep(size_t usize, bool update); +prof_tctx_t *prof_alloc_prep(tsd_t *tsd, size_t usize, bool update); prof_tctx_t *prof_tctx_get(const void *ptr); void prof_tctx_set(const void *ptr, prof_tctx_t *tctx); void prof_malloc_sample_object(const void *ptr, size_t usize, prof_tctx_t *tctx); void prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx); -void prof_realloc(const void *ptr, size_t usize, prof_tctx_t *tctx, - bool updated, size_t old_usize, prof_tctx_t *old_tctx); -void prof_free(const void *ptr, size_t usize); +void prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, + prof_tctx_t *tctx, bool updated, size_t old_usize, prof_tctx_t *old_tctx); +void prof_free(tsd_t *tsd, const void *ptr, size_t usize); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_)) -/* Thread-specific backtrace cache, used to reduce bt2gctx contention. */ -malloc_tsd_externs(prof_tdata, prof_tdata_t *) -malloc_tsd_funcs(JEMALLOC_INLINE, prof_tdata, prof_tdata_t *, NULL, - prof_tdata_cleanup) - JEMALLOC_INLINE prof_tdata_t * -prof_tdata_get(bool create) +prof_tdata_get(tsd_t *tsd, bool create) { prof_tdata_t *tdata; cassert(config_prof); - tdata = *prof_tdata_tsd_get(); + tdata = tsd_prof_tdata_get(tsd); if (create) { - if ((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) { - if (tdata == NULL) - tdata = prof_tdata_init(); - } else if (tdata->state == prof_tdata_state_expired) - tdata = prof_tdata_reinit(tdata); - assert((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX || + if (unlikely(tdata == NULL)) { + tdata = prof_tdata_init(tsd); + tsd_prof_tdata_set(tsd, tdata); + } else if (unlikely(tdata->state == prof_tdata_state_expired)) { + tdata = prof_tdata_reinit(tsd, tdata); + tsd_prof_tdata_set(tsd, tdata); + } + assert(tdata == NULL || tdata->state == prof_tdata_state_attached); } @@ -363,13 +358,14 @@ prof_tctx_set(const void *ptr, prof_tctx_t *tctx) } JEMALLOC_INLINE bool -prof_sample_accum_update(size_t usize, bool update, prof_tdata_t **tdata_out) +prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update, + prof_tdata_t **tdata_out) { prof_tdata_t *tdata; cassert(config_prof); - tdata = prof_tdata_get(true); + tdata = prof_tdata_get(tsd, true); if ((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) tdata = NULL; @@ -392,7 +388,7 @@ prof_sample_accum_update(size_t usize, bool update, prof_tdata_t **tdata_out) } JEMALLOC_INLINE prof_tctx_t * -prof_alloc_prep(size_t usize, bool update) +prof_alloc_prep(tsd_t *tsd, size_t usize, bool update) { prof_tctx_t *ret; prof_tdata_t *tdata; @@ -400,13 +396,13 @@ prof_alloc_prep(size_t usize, bool update) assert(usize == s2u(usize)); - if (!opt_prof_active || likely(prof_sample_accum_update(usize, update, - &tdata))) + if (!opt_prof_active || likely(prof_sample_accum_update(tsd, usize, + update, &tdata))) ret = (prof_tctx_t *)(uintptr_t)1U; else { bt_init(&bt, tdata->vec); prof_backtrace(&bt); - ret = prof_lookup(&bt); + ret = prof_lookup(tsd, &bt); } return (ret); @@ -427,8 +423,8 @@ prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx) } JEMALLOC_INLINE void -prof_realloc(const void *ptr, size_t usize, prof_tctx_t *tctx, bool updated, - size_t old_usize, prof_tctx_t *old_tctx) +prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx, + bool updated, size_t old_usize, prof_tctx_t *old_tctx) { cassert(config_prof); @@ -436,7 +432,7 @@ prof_realloc(const void *ptr, size_t usize, prof_tctx_t *tctx, bool updated, if (!updated && ptr != NULL) { assert(usize == isalloc(ptr, true)); - if (prof_sample_accum_update(usize, true, NULL)) { + if (prof_sample_accum_update(tsd, usize, true, NULL)) { /* * Don't sample. The usize passed to PROF_ALLOC_PREP() * was larger than what actually got allocated, so a @@ -449,7 +445,7 @@ prof_realloc(const void *ptr, size_t usize, prof_tctx_t *tctx, bool updated, } if (unlikely((uintptr_t)old_tctx > (uintptr_t)1U)) - prof_free_sampled_object(old_usize, old_tctx); + prof_free_sampled_object(tsd, old_usize, old_tctx); if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) prof_malloc_sample_object(ptr, usize, tctx); else @@ -457,7 +453,7 @@ prof_realloc(const void *ptr, size_t usize, prof_tctx_t *tctx, bool updated, } JEMALLOC_INLINE void -prof_free(const void *ptr, size_t usize) +prof_free(tsd_t *tsd, const void *ptr, size_t usize) { prof_tctx_t *tctx = prof_tctx_get(ptr); @@ -465,7 +461,7 @@ prof_free(const void *ptr, size_t usize) assert(usize == isalloc(ptr, true)); if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) - prof_free_sampled_object(usize, tctx); + prof_free_sampled_object(tsd, usize, tctx); } #endif diff --git a/include/jemalloc/internal/quarantine.h b/include/jemalloc/internal/quarantine.h index 16f677f7..3a755985 100644 --- a/include/jemalloc/internal/quarantine.h +++ b/include/jemalloc/internal/quarantine.h @@ -29,36 +29,29 @@ struct quarantine_s { /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -quarantine_t *quarantine_init(size_t lg_maxobjs); -void quarantine(void *ptr); -void quarantine_cleanup(void *arg); -bool quarantine_boot(void); +quarantine_t *quarantine_init(tsd_t *tsd, size_t lg_maxobjs); +void quarantine(tsd_t *tsd, void *ptr); +void quarantine_cleanup(tsd_t *tsd); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ #ifdef JEMALLOC_H_INLINES #ifndef JEMALLOC_ENABLE_INLINE -malloc_tsd_protos(JEMALLOC_ATTR(unused), quarantine, quarantine_t *) - void quarantine_alloc_hook(void); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_QUARANTINE_C_)) -malloc_tsd_externs(quarantine, quarantine_t *) -malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, quarantine, quarantine_t *, NULL, - quarantine_cleanup) - JEMALLOC_ALWAYS_INLINE void quarantine_alloc_hook(void) { - quarantine_t *quarantine; + tsd_t *tsd; assert(config_fill && opt_quarantine); - quarantine = *quarantine_tsd_get(); - if (quarantine == NULL) - quarantine_init(LG_MAXOBJS_INIT); + tsd = tsd_tryget(); + if (tsd != NULL && tsd_quarantine_get(tsd) == NULL) + tsd_quarantine_set(tsd, quarantine_init(tsd, LG_MAXOBJS_INIT)); } #endif diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index c9d723aa..6804668f 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -110,26 +110,22 @@ void tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem, tcache_t *tcache); void tcache_arena_associate(tcache_t *tcache, arena_t *arena); void tcache_arena_dissociate(tcache_t *tcache); -tcache_t *tcache_get_hard(tcache_t *tcache, bool create); +tcache_t *tcache_get_hard(tsd_t *tsd); tcache_t *tcache_create(arena_t *arena); -void tcache_destroy(tcache_t *tcache); -void tcache_thread_cleanup(void *arg); +void tcache_cleanup(tsd_t *tsd); +void tcache_enabled_cleanup(tsd_t *tsd); void tcache_stats_merge(tcache_t *tcache, arena_t *arena); -bool tcache_boot0(void); -bool tcache_boot1(void); +bool tcache_boot(void); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ #ifdef JEMALLOC_H_INLINES #ifndef JEMALLOC_ENABLE_INLINE -malloc_tsd_protos(JEMALLOC_ATTR(unused), tcache, tcache_t *) -malloc_tsd_protos(JEMALLOC_ATTR(unused), tcache_enabled, tcache_enabled_t) - void tcache_event(tcache_t *tcache); void tcache_flush(void); bool tcache_enabled_get(void); -tcache_t *tcache_get(bool create); +tcache_t *tcache_get(tsd_t *tsd, bool create); void tcache_enabled_set(bool enabled); void *tcache_alloc_easy(tcache_bin_t *tbin); void *tcache_alloc_small(tcache_t *tcache, size_t size, bool zero); @@ -139,41 +135,33 @@ void tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TCACHE_C_)) -/* Map of thread-specific caches. */ -malloc_tsd_externs(tcache, tcache_t *) -malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, tcache, tcache_t *, NULL, - tcache_thread_cleanup) -/* Per thread flag that allows thread caches to be disabled. */ -malloc_tsd_externs(tcache_enabled, tcache_enabled_t) -malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, tcache_enabled, tcache_enabled_t, - tcache_enabled_default, malloc_tsd_no_cleanup) - JEMALLOC_INLINE void tcache_flush(void) { - tcache_t *tcache; + tsd_t *tsd; cassert(config_tcache); - tcache = *tcache_tsd_get(); - if ((uintptr_t)tcache <= (uintptr_t)TCACHE_STATE_MAX) - return; - tcache_destroy(tcache); - tcache = NULL; - tcache_tsd_set(&tcache); + tsd = tsd_tryget(); + if (tsd != NULL) + tcache_cleanup(tsd); } JEMALLOC_INLINE bool tcache_enabled_get(void) { + tsd_t *tsd; tcache_enabled_t tcache_enabled; cassert(config_tcache); - tcache_enabled = *tcache_enabled_tsd_get(); + tsd = tsd_tryget(); + if (tsd == NULL) + return (false); + tcache_enabled = tsd_tcache_enabled_get(tsd); if (tcache_enabled == tcache_enabled_default) { tcache_enabled = (tcache_enabled_t)opt_tcache; - tcache_enabled_tsd_set(&tcache_enabled); + tsd_tcache_enabled_set(tsd, tcache_enabled); } return ((bool)tcache_enabled); @@ -182,33 +170,24 @@ tcache_enabled_get(void) JEMALLOC_INLINE void tcache_enabled_set(bool enabled) { + tsd_t *tsd; tcache_enabled_t tcache_enabled; - tcache_t *tcache; cassert(config_tcache); + tsd = tsd_tryget(); + if (tsd == NULL) + return; + tcache_enabled = (tcache_enabled_t)enabled; - tcache_enabled_tsd_set(&tcache_enabled); - tcache = *tcache_tsd_get(); - if (enabled) { - if (tcache == TCACHE_STATE_DISABLED) { - tcache = NULL; - tcache_tsd_set(&tcache); - } - } else /* disabled */ { - if (tcache > TCACHE_STATE_MAX) { - tcache_destroy(tcache); - tcache = NULL; - } - if (tcache == NULL) { - tcache = TCACHE_STATE_DISABLED; - tcache_tsd_set(&tcache); - } - } + tsd_tcache_enabled_set(tsd, tcache_enabled); + + if (!enabled) + tcache_cleanup(tsd); } JEMALLOC_ALWAYS_INLINE tcache_t * -tcache_get(bool create) +tcache_get(tsd_t *tsd, bool create) { tcache_t *tcache; @@ -216,12 +195,19 @@ tcache_get(bool create) return (NULL); if (config_lazy_lock && isthreaded == false) return (NULL); + /* + * If create is true, the caller has already assured that tsd is + * non-NULL. + */ + if (!create && unlikely(tsd == NULL)) + return (NULL); - tcache = *tcache_tsd_get(); - if (unlikely((uintptr_t)tcache <= (uintptr_t)TCACHE_STATE_MAX)) { - if (tcache == TCACHE_STATE_DISABLED) - return (NULL); - tcache = tcache_get_hard(tcache, create); + tcache = tsd_tcache_get(tsd); + if (!create) + return (tcache); + if (unlikely(tcache == NULL)) { + tcache = tcache_get_hard(tsd); + tsd_tcache_set(tsd, tcache); } return (tcache); diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h index 9fb4a23e..44952eed 100644 --- a/include/jemalloc/internal/tsd.h +++ b/include/jemalloc/internal/tsd.h @@ -12,6 +12,15 @@ typedef struct tsd_init_block_s tsd_init_block_t; typedef struct tsd_init_head_s tsd_init_head_t; #endif +typedef struct tsd_s tsd_t; + +typedef enum { + tsd_state_uninitialized, + tsd_state_nominal, + tsd_state_purgatory, + tsd_state_reincarnated +} tsd_state_t; + /* * TLS/TSD-agnostic macro-based implementation of thread-specific data. There * are four macros that support (at least) three use cases: file-private, @@ -24,11 +33,11 @@ typedef struct tsd_init_head_s tsd_init_head_t; * int y; * } example_t; * #define EX_INITIALIZER JEMALLOC_CONCAT({0, 0}) - * malloc_tsd_protos(, example, example_t *) - * malloc_tsd_externs(example, example_t *) + * malloc_tsd_protos(, example_, example_t *) + * malloc_tsd_externs(example_, example_t *) * In example.c: - * malloc_tsd_data(, example, example_t *, EX_INITIALIZER) - * malloc_tsd_funcs(, example, example_t *, EX_INITIALIZER, + * malloc_tsd_data(, example_, example_t *, EX_INITIALIZER) + * malloc_tsd_funcs(, example_, example_t *, EX_INITIALIZER, * example_tsd_cleanup) * * The result is a set of generated functions, e.g.: @@ -43,15 +52,13 @@ typedef struct tsd_init_head_s tsd_init_head_t; * cast to (void *). This means that the cleanup function needs to cast *and* * dereference the function argument, e.g.: * - * void + * bool * example_tsd_cleanup(void *arg) * { * example_t *example = *(example_t **)arg; * * [...] - * if ([want the cleanup function to be called again]) { - * example_tsd_set(&example); - * } + * return ([want the cleanup function to be called again]); * } * * If example_tsd_set() is called within example_tsd_cleanup(), it will be @@ -63,60 +70,60 @@ typedef struct tsd_init_head_s tsd_init_head_t; /* malloc_tsd_protos(). */ #define malloc_tsd_protos(a_attr, a_name, a_type) \ a_attr bool \ -a_name##_tsd_boot(void); \ +a_name##tsd_boot(void); \ a_attr a_type * \ -a_name##_tsd_get(void); \ +a_name##tsd_get(void); \ a_attr void \ -a_name##_tsd_set(a_type *val); +a_name##tsd_set(a_type *val); /* malloc_tsd_externs(). */ #ifdef JEMALLOC_MALLOC_THREAD_CLEANUP #define malloc_tsd_externs(a_name, a_type) \ -extern __thread a_type a_name##_tls; \ -extern __thread bool a_name##_initialized; \ -extern bool a_name##_booted; +extern __thread a_type a_name##tsd_tls; \ +extern __thread bool a_name##tsd_initialized; \ +extern bool a_name##tsd_booted; #elif (defined(JEMALLOC_TLS)) #define malloc_tsd_externs(a_name, a_type) \ -extern __thread a_type a_name##_tls; \ -extern pthread_key_t a_name##_tsd; \ -extern bool a_name##_booted; +extern __thread a_type a_name##tsd_tls; \ +extern pthread_key_t a_name##tsd_tsd; \ +extern bool a_name##tsd_booted; #elif (defined(_WIN32)) #define malloc_tsd_externs(a_name, a_type) \ -extern DWORD a_name##_tsd; \ -extern bool a_name##_booted; +extern DWORD a_name##tsd_tsd; \ +extern bool a_name##tsd_booted; #else #define malloc_tsd_externs(a_name, a_type) \ -extern pthread_key_t a_name##_tsd; \ -extern tsd_init_head_t a_name##_tsd_init_head; \ -extern bool a_name##_booted; +extern pthread_key_t a_name##tsd_tsd; \ +extern tsd_init_head_t a_name##tsd_init_head; \ +extern bool a_name##tsd_booted; #endif /* malloc_tsd_data(). */ #ifdef JEMALLOC_MALLOC_THREAD_CLEANUP #define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ a_attr __thread a_type JEMALLOC_TLS_MODEL \ - a_name##_tls = a_initializer; \ + a_name##tsd_tls = a_initializer; \ a_attr __thread bool JEMALLOC_TLS_MODEL \ - a_name##_initialized = false; \ -a_attr bool a_name##_booted = false; + a_name##tsd_initialized = false; \ +a_attr bool a_name##tsd_booted = false; #elif (defined(JEMALLOC_TLS)) #define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ a_attr __thread a_type JEMALLOC_TLS_MODEL \ - a_name##_tls = a_initializer; \ -a_attr pthread_key_t a_name##_tsd; \ -a_attr bool a_name##_booted = false; + a_name##tsd_tls = a_initializer; \ +a_attr pthread_key_t a_name##tsd_tsd; \ +a_attr bool a_name##tsd_booted = false; #elif (defined(_WIN32)) #define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ -a_attr DWORD a_name##_tsd; \ -a_attr bool a_name##_booted = false; +a_attr DWORD a_name##tsd_tsd; \ +a_attr bool a_name##tsd_booted = false; #else #define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ -a_attr pthread_key_t a_name##_tsd; \ -a_attr tsd_init_head_t a_name##_tsd_init_head = { \ +a_attr pthread_key_t a_name##tsd_tsd; \ +a_attr tsd_init_head_t a_name##tsd_init_head = { \ ql_head_initializer(blocks), \ MALLOC_MUTEX_INITIALIZER \ }; \ -a_attr bool a_name##_booted = false; +a_attr bool a_name##tsd_booted = false; #endif /* malloc_tsd_funcs(). */ @@ -125,75 +132,76 @@ a_attr bool a_name##_booted = false; a_cleanup) \ /* Initialization/cleanup. */ \ a_attr bool \ -a_name##_tsd_cleanup_wrapper(void) \ +a_name##tsd_cleanup_wrapper(void) \ { \ \ - if (a_name##_initialized) { \ - a_name##_initialized = false; \ - a_cleanup(&a_name##_tls); \ + if (a_name##tsd_initialized) { \ + a_name##tsd_initialized = false; \ + a_cleanup(&a_name##tsd_tls); \ } \ - return (a_name##_initialized); \ + return (a_name##tsd_initialized); \ } \ a_attr bool \ -a_name##_tsd_boot(void) \ +a_name##tsd_boot(void) \ { \ \ if (a_cleanup != malloc_tsd_no_cleanup) { \ malloc_tsd_cleanup_register( \ - &a_name##_tsd_cleanup_wrapper); \ + &a_name##tsd_cleanup_wrapper); \ } \ - a_name##_booted = true; \ + a_name##tsd_booted = true; \ return (false); \ } \ /* Get/set. */ \ a_attr a_type * \ -a_name##_tsd_get(void) \ +a_name##tsd_get(void) \ { \ \ - assert(a_name##_booted); \ - return (&a_name##_tls); \ + assert(a_name##tsd_booted); \ + return (&a_name##tsd_tls); \ } \ a_attr void \ -a_name##_tsd_set(a_type *val) \ +a_name##tsd_set(a_type *val) \ { \ \ - assert(a_name##_booted); \ - a_name##_tls = (*val); \ + assert(a_name##tsd_booted); \ + a_name##tsd_tls = (*val); \ if (a_cleanup != malloc_tsd_no_cleanup) \ - a_name##_initialized = true; \ + a_name##tsd_initialized = true; \ } #elif (defined(JEMALLOC_TLS)) #define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ a_cleanup) \ /* Initialization/cleanup. */ \ a_attr bool \ -a_name##_tsd_boot(void) \ +a_name##tsd_boot(void) \ { \ \ if (a_cleanup != malloc_tsd_no_cleanup) { \ - if (pthread_key_create(&a_name##_tsd, a_cleanup) != 0) \ + if (pthread_key_create(&a_name##tsd_tsd, a_cleanup) != \ + 0) \ return (true); \ } \ - a_name##_booted = true; \ + a_name##tsd_booted = true; \ return (false); \ } \ /* Get/set. */ \ a_attr a_type * \ -a_name##_tsd_get(void) \ +a_name##tsd_get(void) \ { \ \ - assert(a_name##_booted); \ - return (&a_name##_tls); \ + assert(a_name##tsd_booted); \ + return (&a_name##tsd_tls); \ } \ a_attr void \ -a_name##_tsd_set(a_type *val) \ +a_name##tsd_set(a_type *val) \ { \ \ - assert(a_name##_booted); \ - a_name##_tls = (*val); \ + assert(a_name##tsd_booted); \ + a_name##tsd_tls = (*val); \ if (a_cleanup != malloc_tsd_no_cleanup) { \ - if (pthread_setspecific(a_name##_tsd, \ - (void *)(&a_name##_tls))) { \ + if (pthread_setspecific(a_name##tsd_tsd, \ + (void *)(&a_name##tsd_tls))) { \ malloc_write(": Error" \ " setting TSD for "#a_name"\n"); \ if (opt_abort) \ @@ -208,23 +216,20 @@ a_name##_tsd_set(a_type *val) \ typedef struct { \ bool initialized; \ a_type val; \ -} a_name##_tsd_wrapper_t; \ +} a_name##tsd_wrapper_t; \ /* Initialization/cleanup. */ \ a_attr bool \ -a_name##_tsd_cleanup_wrapper(void) \ +a_name##tsd_cleanup_wrapper(void) \ { \ - a_name##_tsd_wrapper_t *wrapper; \ + a_name##tsd_wrapper_t *wrapper; \ \ - wrapper = (a_name##_tsd_wrapper_t *) TlsGetValue(a_name##_tsd); \ + wrapper = (a_name##tsd_wrapper_t *)TlsGetValue(a_name##tsd_tsd);\ if (wrapper == NULL) \ return (false); \ if (a_cleanup != malloc_tsd_no_cleanup && \ wrapper->initialized) { \ - a_type val = wrapper->val; \ - a_type tsd_static_data = a_initializer; \ wrapper->initialized = false; \ - wrapper->val = tsd_static_data; \ - a_cleanup(&val); \ + a_cleanup(&wrapper->val); \ if (wrapper->initialized) { \ /* Trigger another cleanup round. */ \ return (true); \ @@ -234,39 +239,38 @@ a_name##_tsd_cleanup_wrapper(void) \ return (false); \ } \ a_attr bool \ -a_name##_tsd_boot(void) \ +a_name##tsd_boot(void) \ { \ \ - a_name##_tsd = TlsAlloc(); \ - if (a_name##_tsd == TLS_OUT_OF_INDEXES) \ + a_name##tsd_tsd = TlsAlloc(); \ + if (a_name##tsd_tsd == TLS_OUT_OF_INDEXES) \ return (true); \ if (a_cleanup != malloc_tsd_no_cleanup) { \ malloc_tsd_cleanup_register( \ - &a_name##_tsd_cleanup_wrapper); \ + &a_name##tsd_cleanup_wrapper); \ } \ - a_name##_booted = true; \ + a_name##tsd_booted = true; \ return (false); \ } \ /* Get/set. */ \ -a_attr a_name##_tsd_wrapper_t * \ -a_name##_tsd_get_wrapper(void) \ +a_attr a_name##tsd_wrapper_t * \ +a_name##tsd_get_wrapper(void) \ { \ - a_name##_tsd_wrapper_t *wrapper = (a_name##_tsd_wrapper_t *) \ - TlsGetValue(a_name##_tsd); \ + a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *) \ + TlsGetValue(a_name##tsd_tsd); \ \ - if (wrapper == NULL) { \ - wrapper = (a_name##_tsd_wrapper_t *) \ - malloc_tsd_malloc(sizeof(a_name##_tsd_wrapper_t)); \ + if (unlikely(wrapper == NULL)) { \ + wrapper = (a_name##tsd_wrapper_t *) \ + malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t)); \ if (wrapper == NULL) { \ malloc_write(": Error allocating" \ " TSD for "#a_name"\n"); \ abort(); \ } else { \ - static a_type tsd_static_data = a_initializer; \ wrapper->initialized = false; \ - wrapper->val = tsd_static_data; \ + wrapper->val = a_initializer; \ } \ - if (!TlsSetValue(a_name##_tsd, (void *)wrapper)) { \ + if (!TlsSetValue(a_name##tsd_tsd, (void *)wrapper)) { \ malloc_write(": Error setting" \ " TSD for "#a_name"\n"); \ abort(); \ @@ -275,21 +279,21 @@ a_name##_tsd_get_wrapper(void) \ return (wrapper); \ } \ a_attr a_type * \ -a_name##_tsd_get(void) \ +a_name##tsd_get(void) \ { \ - a_name##_tsd_wrapper_t *wrapper; \ + a_name##tsd_wrapper_t *wrapper; \ \ - assert(a_name##_booted); \ - wrapper = a_name##_tsd_get_wrapper(); \ + assert(a_name##tsd_booted); \ + wrapper = a_name##tsd_get_wrapper(); \ return (&wrapper->val); \ } \ a_attr void \ -a_name##_tsd_set(a_type *val) \ +a_name##tsd_set(a_type *val) \ { \ - a_name##_tsd_wrapper_t *wrapper; \ + a_name##tsd_wrapper_t *wrapper; \ \ - assert(a_name##_booted); \ - wrapper = a_name##_tsd_get_wrapper(); \ + assert(a_name##tsd_booted); \ + wrapper = a_name##tsd_get_wrapper(); \ wrapper->val = *(val); \ if (a_cleanup != malloc_tsd_no_cleanup) \ wrapper->initialized = true; \ @@ -301,12 +305,12 @@ a_name##_tsd_set(a_type *val) \ typedef struct { \ bool initialized; \ a_type val; \ -} a_name##_tsd_wrapper_t; \ +} a_name##tsd_wrapper_t; \ /* Initialization/cleanup. */ \ a_attr void \ -a_name##_tsd_cleanup_wrapper(void *arg) \ +a_name##tsd_cleanup_wrapper(void *arg) \ { \ - a_name##_tsd_wrapper_t *wrapper = (a_name##_tsd_wrapper_t *)arg;\ + a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *)arg; \ \ if (a_cleanup != malloc_tsd_no_cleanup && \ wrapper->initialized) { \ @@ -314,7 +318,7 @@ a_name##_tsd_cleanup_wrapper(void *arg) \ a_cleanup(&wrapper->val); \ if (wrapper->initialized) { \ /* Trigger another cleanup round. */ \ - if (pthread_setspecific(a_name##_tsd, \ + if (pthread_setspecific(a_name##tsd_tsd, \ (void *)wrapper)) { \ malloc_write(": Error" \ " setting TSD for "#a_name"\n"); \ @@ -327,66 +331,65 @@ a_name##_tsd_cleanup_wrapper(void *arg) \ malloc_tsd_dalloc(wrapper); \ } \ a_attr bool \ -a_name##_tsd_boot(void) \ +a_name##tsd_boot(void) \ { \ \ - if (pthread_key_create(&a_name##_tsd, \ - a_name##_tsd_cleanup_wrapper) != 0) \ + if (pthread_key_create(&a_name##tsd_tsd, \ + a_name##tsd_cleanup_wrapper) != 0) \ return (true); \ - a_name##_booted = true; \ + a_name##tsd_booted = true; \ return (false); \ } \ /* Get/set. */ \ -a_attr a_name##_tsd_wrapper_t * \ -a_name##_tsd_get_wrapper(void) \ +a_attr a_name##tsd_wrapper_t * \ +a_name##tsd_get_wrapper(void) \ { \ - a_name##_tsd_wrapper_t *wrapper = (a_name##_tsd_wrapper_t *) \ - pthread_getspecific(a_name##_tsd); \ + a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *) \ + pthread_getspecific(a_name##tsd_tsd); \ \ - if (wrapper == NULL) { \ + if (unlikely(wrapper == NULL)) { \ tsd_init_block_t block; \ wrapper = tsd_init_check_recursion( \ - &a_name##_tsd_init_head, &block); \ + &a_name##tsd_init_head, &block); \ if (wrapper) \ return (wrapper); \ - wrapper = (a_name##_tsd_wrapper_t *) \ - malloc_tsd_malloc(sizeof(a_name##_tsd_wrapper_t)); \ + wrapper = (a_name##tsd_wrapper_t *) \ + malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t)); \ block.data = wrapper; \ if (wrapper == NULL) { \ malloc_write(": Error allocating" \ " TSD for "#a_name"\n"); \ abort(); \ } else { \ - static a_type tsd_static_data = a_initializer; \ wrapper->initialized = false; \ - wrapper->val = tsd_static_data; \ + wrapper->val = a_initializer; \ } \ - if (pthread_setspecific(a_name##_tsd, \ + if (pthread_setspecific(a_name##tsd_tsd, \ (void *)wrapper)) { \ malloc_write(": Error setting" \ " TSD for "#a_name"\n"); \ abort(); \ } \ - tsd_init_finish(&a_name##_tsd_init_head, &block); \ + tsd_init_finish(&a_name##tsd_init_head, &block); \ } \ return (wrapper); \ } \ a_attr a_type * \ -a_name##_tsd_get(void) \ +a_name##tsd_get(void) \ { \ - a_name##_tsd_wrapper_t *wrapper; \ + a_name##tsd_wrapper_t *wrapper; \ \ - assert(a_name##_booted); \ - wrapper = a_name##_tsd_get_wrapper(); \ + assert(a_name##tsd_booted); \ + wrapper = a_name##tsd_get_wrapper(); \ return (&wrapper->val); \ } \ a_attr void \ -a_name##_tsd_set(a_type *val) \ +a_name##tsd_set(a_type *val) \ { \ - a_name##_tsd_wrapper_t *wrapper; \ + a_name##tsd_wrapper_t *wrapper; \ \ - assert(a_name##_booted); \ - wrapper = a_name##_tsd_get_wrapper(); \ + assert(a_name##tsd_booted); \ + wrapper = a_name##tsd_get_wrapper(); \ wrapper->val = *(val); \ if (a_cleanup != malloc_tsd_no_cleanup) \ wrapper->initialized = true; \ @@ -410,25 +413,123 @@ struct tsd_init_head_s { }; #endif +#define MALLOC_TSD \ +/* O(name, type) */ \ + O(tcache, tcache_t *) \ + O(thread_allocated, uint64_t) \ + O(thread_deallocated, uint64_t) \ + O(prof_tdata, prof_tdata_t *) \ + O(arena, arena_t *) \ + O(tcache_enabled, tcache_enabled_t) \ + O(quarantine, quarantine_t *) \ + +#define TSD_INITIALIZER { \ + tsd_state_uninitialized, \ + NULL, \ + 0, \ + 0, \ + NULL, \ + NULL, \ + tcache_enabled_default, \ + NULL \ +} + +struct tsd_s { + tsd_state_t state; +#define O(n, t) \ + t n; +MALLOC_TSD +#undef O +}; + +static const tsd_t tsd_initializer = TSD_INITIALIZER; + #endif /* JEMALLOC_H_STRUCTS */ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS void *malloc_tsd_malloc(size_t size); void malloc_tsd_dalloc(void *wrapper); -void malloc_tsd_no_cleanup(void *); +void malloc_tsd_no_cleanup(void *arg); void malloc_tsd_cleanup_register(bool (*f)(void)); -void malloc_tsd_boot(void); +bool malloc_tsd_boot(void); #if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \ !defined(_WIN32)) void *tsd_init_check_recursion(tsd_init_head_t *head, tsd_init_block_t *block); void tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block); #endif +void tsd_cleanup(void *arg); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ #ifdef JEMALLOC_H_INLINES +#ifndef JEMALLOC_ENABLE_INLINE +malloc_tsd_protos(JEMALLOC_ATTR(unused), , tsd_t) + +tsd_t *tsd_tryget(void); +#define O(n, t) \ +t *tsd_##n##p_get(tsd_t *tsd); \ +t tsd_##n##_get(tsd_t *tsd); \ +void tsd_##n##_set(tsd_t *tsd, t n); +MALLOC_TSD +#undef O +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TSD_C_)) +malloc_tsd_externs(, tsd_t) +malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, , tsd_t, tsd_initializer, tsd_cleanup) + +JEMALLOC_INLINE tsd_t * +tsd_tryget(void) +{ + tsd_t *tsd; + + tsd = tsd_get(); + if (unlikely(tsd == NULL)) + return (NULL); + + if (likely(tsd->state == tsd_state_nominal)) + return (tsd); + else if (tsd->state == tsd_state_uninitialized) { + tsd->state = tsd_state_nominal; + tsd_set(tsd); + return (tsd); + } else if (tsd->state == tsd_state_purgatory) { + tsd->state = tsd_state_reincarnated; + tsd_set(tsd); + return (NULL); + } else { + assert(tsd->state == tsd_state_reincarnated); + return (NULL); + } +} + +#define O(n, t) \ +JEMALLOC_INLINE t * \ +tsd_##n##p_get(tsd_t *tsd) \ +{ \ + \ + return (&tsd->n); \ +} \ + \ +JEMALLOC_INLINE t \ +tsd_##n##_get(tsd_t *tsd) \ +{ \ + \ + return (*tsd_##n##p_get(tsd)); \ +} \ + \ +JEMALLOC_INLINE void \ +tsd_##n##_set(tsd_t *tsd, t n) \ +{ \ + \ + tsd->n = n; \ +} +MALLOC_TSD +#undef O +#endif + #endif /* JEMALLOC_H_INLINES */ /******************************************************************************/ diff --git a/src/arena.c b/src/arena.c index 35d792a2..40da9f47 100644 --- a/src/arena.c +++ b/src/arena.c @@ -2058,7 +2058,7 @@ arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, } void * -arena_ralloc(arena_t *arena, void *ptr, size_t oldsize, size_t size, +arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size, size_t extra, size_t alignment, bool zero, bool try_tcache_alloc, bool try_tcache_dalloc) { @@ -2078,9 +2078,12 @@ arena_ralloc(arena_t *arena, void *ptr, size_t oldsize, size_t size, size_t usize = sa2u(size + extra, alignment); if (usize == 0) return (NULL); - ret = ipalloct(usize, alignment, zero, try_tcache_alloc, arena); - } else - ret = arena_malloc(arena, size + extra, zero, try_tcache_alloc); + ret = ipalloct(tsd, usize, alignment, zero, try_tcache_alloc, + arena); + } else { + ret = arena_malloc(tsd, arena, size + extra, zero, + try_tcache_alloc); + } if (ret == NULL) { if (extra == 0) @@ -2090,10 +2093,12 @@ arena_ralloc(arena_t *arena, void *ptr, size_t oldsize, size_t size, size_t usize = sa2u(size, alignment); if (usize == 0) return (NULL); - ret = ipalloct(usize, alignment, zero, try_tcache_alloc, - arena); - } else - ret = arena_malloc(arena, size, zero, try_tcache_alloc); + ret = ipalloct(tsd, usize, alignment, zero, + try_tcache_alloc, arena); + } else { + ret = arena_malloc(tsd, arena, size, zero, + try_tcache_alloc); + } if (ret == NULL) return (NULL); @@ -2108,7 +2113,7 @@ arena_ralloc(arena_t *arena, void *ptr, size_t oldsize, size_t size, copysize = (size < oldsize) ? size : oldsize; JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, copysize); memcpy(ret, ptr, copysize); - iqalloc(ptr, try_tcache_dalloc); + iqalloc(tsd, ptr, try_tcache_dalloc); return (ret); } diff --git a/src/ckh.c b/src/ckh.c index 04c52966..7c7cc098 100644 --- a/src/ckh.c +++ b/src/ckh.c @@ -40,8 +40,8 @@ /******************************************************************************/ /* Function prototypes for non-inline static functions. */ -static bool ckh_grow(ckh_t *ckh); -static void ckh_shrink(ckh_t *ckh); +static bool ckh_grow(tsd_t *tsd, ckh_t *ckh); +static void ckh_shrink(tsd_t *tsd, ckh_t *ckh); /******************************************************************************/ @@ -243,7 +243,7 @@ ckh_rebuild(ckh_t *ckh, ckhc_t *aTab) } static bool -ckh_grow(ckh_t *ckh) +ckh_grow(tsd_t *tsd, ckh_t *ckh) { bool ret; ckhc_t *tab, *ttab; @@ -270,7 +270,7 @@ ckh_grow(ckh_t *ckh) ret = true; goto label_return; } - tab = (ckhc_t *)ipalloc(usize, CACHELINE, true); + tab = (ckhc_t *)ipalloc(tsd, usize, CACHELINE, true); if (tab == NULL) { ret = true; goto label_return; @@ -282,12 +282,12 @@ ckh_grow(ckh_t *ckh) ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS; if (ckh_rebuild(ckh, tab) == false) { - idalloc(tab); + idalloc(tsd, tab); break; } /* Rebuilding failed, so back out partially rebuilt table. */ - idalloc(ckh->tab); + idalloc(tsd, ckh->tab); ckh->tab = tab; ckh->lg_curbuckets = lg_prevbuckets; } @@ -298,7 +298,7 @@ label_return: } static void -ckh_shrink(ckh_t *ckh) +ckh_shrink(tsd_t *tsd, ckh_t *ckh) { ckhc_t *tab, *ttab; size_t lg_curcells, usize; @@ -313,7 +313,7 @@ ckh_shrink(ckh_t *ckh) usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE); if (usize == 0) return; - tab = (ckhc_t *)ipalloc(usize, CACHELINE, true); + tab = (ckhc_t *)ipalloc(tsd, usize, CACHELINE, true); if (tab == NULL) { /* * An OOM error isn't worth propagating, since it doesn't @@ -328,7 +328,7 @@ ckh_shrink(ckh_t *ckh) ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS; if (ckh_rebuild(ckh, tab) == false) { - idalloc(tab); + idalloc(tsd, tab); #ifdef CKH_COUNT ckh->nshrinks++; #endif @@ -336,7 +336,7 @@ ckh_shrink(ckh_t *ckh) } /* Rebuilding failed, so back out partially rebuilt table. */ - idalloc(ckh->tab); + idalloc(tsd, ckh->tab); ckh->tab = tab; ckh->lg_curbuckets = lg_prevbuckets; #ifdef CKH_COUNT @@ -345,7 +345,8 @@ ckh_shrink(ckh_t *ckh) } bool -ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp) +ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash, + ckh_keycomp_t *keycomp) { bool ret; size_t mincells, usize; @@ -388,7 +389,7 @@ ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp) ret = true; goto label_return; } - ckh->tab = (ckhc_t *)ipalloc(usize, CACHELINE, true); + ckh->tab = (ckhc_t *)ipalloc(tsd, usize, CACHELINE, true); if (ckh->tab == NULL) { ret = true; goto label_return; @@ -400,7 +401,7 @@ label_return: } void -ckh_delete(ckh_t *ckh) +ckh_delete(tsd_t *tsd, ckh_t *ckh) { assert(ckh != NULL); @@ -417,7 +418,7 @@ ckh_delete(ckh_t *ckh) (unsigned long long)ckh->nrelocs); #endif - idalloc(ckh->tab); + idalloc(tsd, ckh->tab); if (config_debug) memset(ckh, 0x5a, sizeof(ckh_t)); } @@ -452,7 +453,7 @@ ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data) } bool -ckh_insert(ckh_t *ckh, const void *key, const void *data) +ckh_insert(tsd_t *tsd, ckh_t *ckh, const void *key, const void *data) { bool ret; @@ -464,7 +465,7 @@ ckh_insert(ckh_t *ckh, const void *key, const void *data) #endif while (ckh_try_insert(ckh, &key, &data)) { - if (ckh_grow(ckh)) { + if (ckh_grow(tsd, ckh)) { ret = true; goto label_return; } @@ -476,7 +477,8 @@ label_return: } bool -ckh_remove(ckh_t *ckh, const void *searchkey, void **key, void **data) +ckh_remove(tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key, + void **data) { size_t cell; @@ -497,7 +499,7 @@ ckh_remove(ckh_t *ckh, const void *searchkey, void **key, void **data) + LG_CKH_BUCKET_CELLS - 2)) && ckh->lg_curbuckets > ckh->lg_minbuckets) { /* Ignore error due to OOM. */ - ckh_shrink(ckh); + ckh_shrink(tsd, ckh); } return (false); diff --git a/src/ctl.c b/src/ctl.c index b816c845..c55f6e44 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -565,18 +565,23 @@ ctl_arena_refresh(arena_t *arena, unsigned i) static bool ctl_grow(void) { + tsd_t *tsd; ctl_arena_stats_t *astats; arena_t **tarenas; + tsd = tsd_tryget(); + if (tsd == NULL) + return (true); + /* Allocate extended arena stats and arenas arrays. */ - astats = (ctl_arena_stats_t *)imalloc((ctl_stats.narenas + 2) * + astats = (ctl_arena_stats_t *)imalloc(tsd, (ctl_stats.narenas + 2) * sizeof(ctl_arena_stats_t)); if (astats == NULL) return (true); - tarenas = (arena_t **)imalloc((ctl_stats.narenas + 1) * + tarenas = (arena_t **)imalloc(tsd, (ctl_stats.narenas + 1) * sizeof(arena_t *)); if (tarenas == NULL) { - idalloc(astats); + idalloc(tsd, astats); return (true); } @@ -585,8 +590,8 @@ ctl_grow(void) sizeof(ctl_arena_stats_t)); memset(&astats[ctl_stats.narenas + 1], 0, sizeof(ctl_arena_stats_t)); if (ctl_arena_init(&astats[ctl_stats.narenas + 1])) { - idalloc(tarenas); - idalloc(astats); + idalloc(tsd, tarenas); + idalloc(tsd, astats); return (true); } /* Swap merged stats to their new location. */ @@ -623,7 +628,7 @@ ctl_grow(void) * base_alloc()). */ if (ctl_stats.narenas != narenas_auto) - idalloc(arenas_old); + idalloc(tsd, arenas_old); } ctl_stats.arenas = astats; ctl_stats.narenas++; @@ -1105,6 +1110,31 @@ label_return: \ return (ret); \ } +#define CTL_TSD_RO_NL_CGEN(c, n, m, t) \ +static int \ +n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ + void *newp, size_t newlen) \ +{ \ + int ret; \ + t oldval; \ + tsd_t *tsd; \ + \ + if ((c) == false) \ + return (ENOENT); \ + READONLY(); \ + tsd = tsd_tryget(); \ + if (tsd == NULL) { \ + ret = EAGAIN; \ + goto label_return; \ + } \ + oldval = (m(tsd)); \ + READ(oldval, t); \ + \ + ret = 0; \ +label_return: \ + return (ret); \ +} + #define CTL_RO_BOOL_CONFIG_GEN(n) \ static int \ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ @@ -1194,10 +1224,15 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { int ret; + tsd_t *tsd; unsigned newind, oldind; + tsd = tsd_tryget(); + if (tsd == NULL) + return (EAGAIN); + malloc_mutex_lock(&ctl_mtx); - newind = oldind = choose_arena(NULL)->ind; + newind = oldind = choose_arena(tsd, NULL)->ind; WRITE(newind, unsigned); READ(oldind, unsigned); if (newind != oldind) { @@ -1224,14 +1259,14 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, /* Set new arena association. */ if (config_tcache) { - tcache_t *tcache; - if ((uintptr_t)(tcache = *tcache_tsd_get()) > - (uintptr_t)TCACHE_STATE_MAX) { + tcache_t *tcache = tsd_tcache_get(tsd); + if (tcache != NULL) { tcache_arena_dissociate(tcache); tcache_arena_associate(tcache, arena); } } - arenas_tsd_set(&arena); + + tsd_arena_set(tsd, arena); } ret = 0; @@ -1240,14 +1275,14 @@ label_return: return (ret); } -CTL_RO_NL_CGEN(config_stats, thread_allocated, - thread_allocated_tsd_get()->allocated, uint64_t) -CTL_RO_NL_CGEN(config_stats, thread_allocatedp, - &thread_allocated_tsd_get()->allocated, uint64_t *) -CTL_RO_NL_CGEN(config_stats, thread_deallocated, - thread_allocated_tsd_get()->deallocated, uint64_t) -CTL_RO_NL_CGEN(config_stats, thread_deallocatedp, - &thread_allocated_tsd_get()->deallocated, uint64_t *) +CTL_TSD_RO_NL_CGEN(config_stats, thread_allocated, tsd_thread_allocated_get, + uint64_t) +CTL_TSD_RO_NL_CGEN(config_stats, thread_allocatedp, tsd_thread_allocatedp_get, + uint64_t *) +CTL_TSD_RO_NL_CGEN(config_stats, thread_deallocated, tsd_thread_deallocated_get, + uint64_t) +CTL_TSD_RO_NL_CGEN(config_stats, thread_deallocatedp, + tsd_thread_deallocatedp_get, uint64_t *) static int thread_tcache_enabled_ctl(const size_t *mib, size_t miblen, void *oldp, @@ -1305,11 +1340,20 @@ thread_prof_name_ctl(const size_t *mib, size_t miblen, void *oldp, oldname = prof_thread_name_get(); if (newp != NULL) { + tsd_t *tsd; + if (newlen != sizeof(const char *)) { ret = EINVAL; goto label_return; } - if (prof_thread_name_set(*(const char **)newp)) { + + tsd = tsd_tryget(); + if (tsd == NULL) { + ret = EAGAIN; + goto label_return; + } + + if (prof_thread_name_set(tsd, *(const char **)newp)) { ret = EAGAIN; goto label_return; } @@ -1675,6 +1719,7 @@ prof_reset_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, { int ret; size_t lg_sample = lg_prof_sample; + tsd_t *tsd; if (config_prof == false) return (ENOENT); @@ -1684,7 +1729,13 @@ prof_reset_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, if (lg_sample >= (sizeof(uint64_t) << 3)) lg_sample = (sizeof(uint64_t) << 3) - 1; - prof_reset(lg_sample); + tsd = tsd_tryget(); + if (tsd == NULL) { + ret = EAGAIN; + goto label_return; + } + + prof_reset(tsd, lg_sample); ret = 0; label_return: diff --git a/src/huge.c b/src/huge.c index 0b7db7fc..2e30ccfd 100644 --- a/src/huge.c +++ b/src/huge.c @@ -13,14 +13,15 @@ static malloc_mutex_t huge_mtx; static extent_tree_t huge; void * -huge_malloc(arena_t *arena, size_t size, bool zero) +huge_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero) { - return (huge_palloc(arena, size, chunksize, zero)); + return (huge_palloc(tsd, arena, size, chunksize, zero)); } void * -huge_palloc(arena_t *arena, size_t size, size_t alignment, bool zero) +huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment, + bool zero) { void *ret; size_t csize; @@ -45,7 +46,7 @@ huge_palloc(arena_t *arena, size_t size, size_t alignment, bool zero) * it is possible to make correct junk/zero fill decisions below. */ is_zeroed = zero; - arena = choose_arena(arena); + arena = choose_arena(tsd, arena); ret = arena_chunk_alloc_huge(arena, csize, alignment, &is_zeroed); if (ret == NULL) { base_node_dalloc(node); @@ -90,7 +91,7 @@ huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra) } void * -huge_ralloc(arena_t *arena, void *ptr, size_t oldsize, size_t size, +huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size, size_t extra, size_t alignment, bool zero, bool try_tcache_dalloc) { void *ret; @@ -106,18 +107,18 @@ huge_ralloc(arena_t *arena, void *ptr, size_t oldsize, size_t size, * space and copying. */ if (alignment > chunksize) - ret = huge_palloc(arena, size + extra, alignment, zero); + ret = huge_palloc(tsd, arena, size + extra, alignment, zero); else - ret = huge_malloc(arena, size + extra, zero); + ret = huge_malloc(tsd, arena, size + extra, zero); if (ret == NULL) { if (extra == 0) return (NULL); /* Try again, this time without extra. */ if (alignment > chunksize) - ret = huge_palloc(arena, size, alignment, zero); + ret = huge_palloc(tsd, arena, size, alignment, zero); else - ret = huge_malloc(arena, size, zero); + ret = huge_malloc(tsd, arena, size, zero); if (ret == NULL) return (NULL); @@ -129,7 +130,7 @@ huge_ralloc(arena_t *arena, void *ptr, size_t oldsize, size_t size, */ copysize = (size < oldsize) ? size : oldsize; memcpy(ret, ptr, copysize); - iqalloc(ptr, try_tcache_dalloc); + iqalloc(tsd, ptr, try_tcache_dalloc); return (ret); } diff --git a/src/jemalloc.c b/src/jemalloc.c index c5b8f520..4d3b22e5 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -5,8 +5,6 @@ /* Data. */ malloc_tsd_data(, arenas, arena_t *, NULL) -malloc_tsd_data(, thread_allocated, thread_allocated_t, - THREAD_ALLOCATED_INITIALIZER) /* Runtime configuration options. */ const char *je_malloc_conf; @@ -142,7 +140,7 @@ arenas_extend(unsigned ind) /* Slow path, called only by choose_arena(). */ arena_t * -choose_arena_hard(void) +choose_arena_hard(tsd_t *tsd) { arena_t *ret; @@ -196,11 +194,32 @@ choose_arena_hard(void) malloc_mutex_unlock(&arenas_lock); } - arenas_tsd_set(&ret); + tsd_arena_set(tsd, ret); return (ret); } +void +thread_allocated_cleanup(tsd_t *tsd) +{ + + /* Do nothing. */ +} + +void +thread_deallocated_cleanup(tsd_t *tsd) +{ + + /* Do nothing. */ +} + +void +arena_cleanup(tsd_t *tsd) +{ + + /* Do nothing. */ +} + static void stats_print_atexit(void) { @@ -691,7 +710,11 @@ malloc_init_hard(void) #endif malloc_initializer = INITIALIZER; - malloc_tsd_boot(); + if (malloc_tsd_boot()) { + malloc_mutex_unlock(&init_lock); + return (true); + } + if (config_prof) prof_boot0(); @@ -726,7 +749,7 @@ malloc_init_hard(void) arena_boot(); - if (config_tcache && tcache_boot0()) { + if (config_tcache && tcache_boot()) { malloc_mutex_unlock(&init_lock); return (true); } @@ -759,27 +782,6 @@ malloc_init_hard(void) return (true); } - /* Initialize allocation counters before any allocations can occur. */ - if (config_stats && thread_allocated_tsd_boot()) { - malloc_mutex_unlock(&init_lock); - return (true); - } - - if (arenas_tsd_boot()) { - malloc_mutex_unlock(&init_lock); - return (true); - } - - if (config_tcache && tcache_boot1()) { - malloc_mutex_unlock(&init_lock); - return (true); - } - - if (config_fill && quarantine_boot()) { - malloc_mutex_unlock(&init_lock); - return (true); - } - if (config_prof && prof_boot2()) { malloc_mutex_unlock(&init_lock); return (true); @@ -863,36 +865,36 @@ malloc_init_hard(void) */ static void * -imalloc_prof_sample(size_t usize, prof_tctx_t *tctx) +imalloc_prof_sample(tsd_t *tsd, size_t usize, prof_tctx_t *tctx) { void *p; if (tctx == NULL) return (NULL); if (usize <= SMALL_MAXCLASS) { - p = imalloc(LARGE_MINCLASS); + p = imalloc(tsd, LARGE_MINCLASS); if (p == NULL) return (NULL); arena_prof_promoted(p, usize); } else - p = imalloc(usize); + p = imalloc(tsd, usize); return (p); } JEMALLOC_ALWAYS_INLINE_C void * -imalloc_prof(size_t usize) +imalloc_prof(tsd_t *tsd, size_t usize) { void *p; prof_tctx_t *tctx; - tctx = prof_alloc_prep(usize, true); + tctx = prof_alloc_prep(tsd, usize, true); if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) - p = imalloc_prof_sample(usize, tctx); + p = imalloc_prof_sample(tsd, usize, tctx); else - p = imalloc(usize); + p = imalloc(tsd, usize); if (p == NULL) { - prof_alloc_rollback(tctx, true); + prof_alloc_rollback(tsd, tctx, true); return (NULL); } prof_malloc(p, usize, tctx); @@ -901,32 +903,33 @@ imalloc_prof(size_t usize) } JEMALLOC_ALWAYS_INLINE_C void * -imalloc_body(size_t size, size_t *usize) +imalloc_body(size_t size, tsd_t **tsd, size_t *usize) { - if (unlikely(malloc_init())) + if (unlikely(malloc_init()) || unlikely((*tsd = tsd_tryget()) == NULL)) return (NULL); if (config_prof && opt_prof) { *usize = s2u(size); - return (imalloc_prof(*usize)); + return (imalloc_prof(*tsd, *usize)); } if (config_stats || (config_valgrind && unlikely(in_valgrind))) *usize = s2u(size); - return (imalloc(size)); + return (imalloc(*tsd, size)); } void * je_malloc(size_t size) { void *ret; + tsd_t *tsd; size_t usize JEMALLOC_CC_SILENCE_INIT(0); if (size == 0) size = 1; - ret = imalloc_body(size, &usize); + ret = imalloc_body(size, &tsd, &usize); if (unlikely(ret == NULL)) { if (config_xmalloc && unlikely(opt_xmalloc)) { malloc_write(": Error in malloc(): " @@ -937,7 +940,7 @@ je_malloc(size_t size) } if (config_stats && likely(ret != NULL)) { assert(usize == isalloc(ret, config_prof)); - thread_allocated_tsd_get()->allocated += usize; + *tsd_thread_allocatedp_get(tsd) += usize; } UTRACE(0, size, ret); JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, usize, false); @@ -945,7 +948,8 @@ je_malloc(size_t size) } static void * -imemalign_prof_sample(size_t alignment, size_t usize, prof_tctx_t *tctx) +imemalign_prof_sample(tsd_t *tsd, size_t alignment, size_t usize, + prof_tctx_t *tctx) { void *p; @@ -953,29 +957,29 @@ imemalign_prof_sample(size_t alignment, size_t usize, prof_tctx_t *tctx) return (NULL); if (usize <= SMALL_MAXCLASS) { assert(sa2u(LARGE_MINCLASS, alignment) == LARGE_MINCLASS); - p = imalloc(LARGE_MINCLASS); + p = imalloc(tsd, LARGE_MINCLASS); if (p == NULL) return (NULL); arena_prof_promoted(p, usize); } else - p = ipalloc(usize, alignment, false); + p = ipalloc(tsd, usize, alignment, false); return (p); } JEMALLOC_ALWAYS_INLINE_C void * -imemalign_prof(size_t alignment, size_t usize) +imemalign_prof(tsd_t *tsd, size_t alignment, size_t usize) { void *p; prof_tctx_t *tctx; - tctx = prof_alloc_prep(usize, true); + tctx = prof_alloc_prep(tsd, usize, true); if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) - p = imemalign_prof_sample(alignment, usize, tctx); + p = imemalign_prof_sample(tsd, alignment, usize, tctx); else - p = ipalloc(usize, alignment, false); + p = ipalloc(tsd, usize, alignment, false); if (p == NULL) { - prof_alloc_rollback(tctx, true); + prof_alloc_rollback(tsd, tctx, true); return (NULL); } prof_malloc(p, usize, tctx); @@ -988,12 +992,13 @@ static int imemalign(void **memptr, size_t alignment, size_t size, size_t min_alignment) { int ret; + tsd_t *tsd; size_t usize; void *result; assert(min_alignment != 0); - if (unlikely(malloc_init())) { + if (unlikely(malloc_init()) || unlikely((tsd = tsd_tryget()) == NULL)) { result = NULL; goto label_oom; } else { @@ -1020,9 +1025,9 @@ imemalign(void **memptr, size_t alignment, size_t size, size_t min_alignment) } if (config_prof && opt_prof) - result = imemalign_prof(alignment, usize); + result = imemalign_prof(tsd, alignment, usize); else - result = ipalloc(usize, alignment, false); + result = ipalloc(tsd, usize, alignment, false); if (unlikely(result == NULL)) goto label_oom; } @@ -1032,7 +1037,7 @@ imemalign(void **memptr, size_t alignment, size_t size, size_t min_alignment) label_return: if (config_stats && likely(result != NULL)) { assert(usize == isalloc(result, config_prof)); - thread_allocated_tsd_get()->allocated += usize; + *tsd_thread_allocatedp_get(tsd) += usize; } UTRACE(0, size, result); return (ret); @@ -1072,36 +1077,36 @@ je_aligned_alloc(size_t alignment, size_t size) } static void * -icalloc_prof_sample(size_t usize, prof_tctx_t *tctx) +icalloc_prof_sample(tsd_t *tsd, size_t usize, prof_tctx_t *tctx) { void *p; if (tctx == NULL) return (NULL); if (usize <= SMALL_MAXCLASS) { - p = icalloc(LARGE_MINCLASS); + p = icalloc(tsd, LARGE_MINCLASS); if (p == NULL) return (NULL); arena_prof_promoted(p, usize); } else - p = icalloc(usize); + p = icalloc(tsd, usize); return (p); } JEMALLOC_ALWAYS_INLINE_C void * -icalloc_prof(size_t usize) +icalloc_prof(tsd_t *tsd, size_t usize) { void *p; prof_tctx_t *tctx; - tctx = prof_alloc_prep(usize, true); + tctx = prof_alloc_prep(tsd, usize, true); if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) - p = icalloc_prof_sample(usize, tctx); + p = icalloc_prof_sample(tsd, usize, tctx); else - p = icalloc(usize); + p = icalloc(tsd, usize); if (p == NULL) { - prof_alloc_rollback(tctx, true); + prof_alloc_rollback(tsd, tctx, true); return (NULL); } prof_malloc(p, usize, tctx); @@ -1113,10 +1118,11 @@ void * je_calloc(size_t num, size_t size) { void *ret; + tsd_t *tsd; size_t num_size; size_t usize JEMALLOC_CC_SILENCE_INIT(0); - if (unlikely(malloc_init())) { + if (unlikely(malloc_init()) || unlikely((tsd = tsd_tryget()) == NULL)) { num_size = 0; ret = NULL; goto label_return; @@ -1144,11 +1150,11 @@ je_calloc(size_t num, size_t size) if (config_prof && opt_prof) { usize = s2u(num_size); - ret = icalloc_prof(usize); + ret = icalloc_prof(tsd, usize); } else { if (config_stats || (config_valgrind && unlikely(in_valgrind))) usize = s2u(num_size); - ret = icalloc(num_size); + ret = icalloc(tsd, num_size); } label_return: @@ -1162,7 +1168,7 @@ label_return: } if (config_stats && likely(ret != NULL)) { assert(usize == isalloc(ret, config_prof)); - thread_allocated_tsd_get()->allocated += usize; + *tsd_thread_allocatedp_get(tsd) += usize; } UTRACE(0, num_size, ret); JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, usize, true); @@ -1170,44 +1176,44 @@ label_return: } static void * -irealloc_prof_sample(void *oldptr, size_t usize, prof_tctx_t *tctx) +irealloc_prof_sample(tsd_t *tsd, void *oldptr, size_t usize, prof_tctx_t *tctx) { void *p; if (tctx == NULL) return (NULL); if (usize <= SMALL_MAXCLASS) { - p = iralloc(oldptr, LARGE_MINCLASS, 0, false); + p = iralloc(tsd, oldptr, LARGE_MINCLASS, 0, false); if (p == NULL) return (NULL); arena_prof_promoted(p, usize); } else - p = iralloc(oldptr, usize, 0, false); + p = iralloc(tsd, oldptr, usize, 0, false); return (p); } JEMALLOC_ALWAYS_INLINE_C void * -irealloc_prof(void *oldptr, size_t old_usize, size_t usize) +irealloc_prof(tsd_t *tsd, void *oldptr, size_t old_usize, size_t usize) { void *p; prof_tctx_t *old_tctx, *tctx; old_tctx = prof_tctx_get(oldptr); - tctx = prof_alloc_prep(usize, true); + tctx = prof_alloc_prep(tsd, usize, true); if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) - p = irealloc_prof_sample(oldptr, usize, tctx); + p = irealloc_prof_sample(tsd, oldptr, usize, tctx); else - p = iralloc(oldptr, usize, 0, false); + p = iralloc(tsd, oldptr, usize, 0, false); if (p == NULL) return (NULL); - prof_realloc(p, usize, tctx, true, old_usize, old_tctx); + prof_realloc(tsd, p, usize, tctx, true, old_usize, old_tctx); return (p); } JEMALLOC_INLINE_C void -ifree(void *ptr, bool try_tcache) +ifree(tsd_t *tsd, void *ptr, bool try_tcache) { size_t usize; UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0); @@ -1217,19 +1223,19 @@ ifree(void *ptr, bool try_tcache) if (config_prof && opt_prof) { usize = isalloc(ptr, config_prof); - prof_free(ptr, usize); + prof_free(tsd, ptr, usize); } else if (config_stats || config_valgrind) usize = isalloc(ptr, config_prof); - if (config_stats) - thread_allocated_tsd_get()->deallocated += usize; + if (config_stats && likely(tsd != NULL)) + *tsd_thread_deallocatedp_get(tsd) += usize; if (config_valgrind && unlikely(in_valgrind)) rzsize = p2rz(ptr); - iqalloc(ptr, try_tcache); + iqalloc(tsd, ptr, try_tcache); JEMALLOC_VALGRIND_FREE(ptr, rzsize); } JEMALLOC_INLINE_C void -isfree(void *ptr, size_t usize, bool try_tcache) +isfree(tsd_t *tsd, void *ptr, size_t usize, bool try_tcache) { UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0); @@ -1237,12 +1243,12 @@ isfree(void *ptr, size_t usize, bool try_tcache) assert(malloc_initialized || IS_INITIALIZER); if (config_prof && opt_prof) - prof_free(ptr, usize); - if (config_stats) - thread_allocated_tsd_get()->deallocated += usize; + prof_free(tsd, ptr, usize); + if (config_stats && likely(tsd != NULL)) + *tsd_thread_deallocatedp_get(tsd) += usize; if (config_valgrind && unlikely(in_valgrind)) rzsize = p2rz(ptr); - isqalloc(ptr, usize, try_tcache); + isqalloc(tsd, ptr, usize, try_tcache); JEMALLOC_VALGRIND_FREE(ptr, rzsize); } @@ -1250,6 +1256,7 @@ void * je_realloc(void *ptr, size_t size) { void *ret; + tsd_t *tsd; size_t usize JEMALLOC_CC_SILENCE_INIT(0); size_t old_usize = 0; UNUSED size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0); @@ -1258,7 +1265,8 @@ je_realloc(void *ptr, size_t size) if (ptr != NULL) { /* realloc(ptr, 0) is equivalent to free(ptr). */ UTRACE(ptr, 0, 0); - ifree(ptr, true); + tsd = tsd_tryget(); + ifree(tsd, ptr, true); return (NULL); } size = 1; @@ -1268,24 +1276,29 @@ je_realloc(void *ptr, size_t size) assert(malloc_initialized || IS_INITIALIZER); malloc_thread_init(); - if ((config_prof && opt_prof) || config_stats || - (config_valgrind && unlikely(in_valgrind))) - old_usize = isalloc(ptr, config_prof); - if (config_valgrind && unlikely(in_valgrind)) - old_rzsize = config_prof ? p2rz(ptr) : u2rz(old_usize); + if ((tsd = tsd_tryget()) != NULL) { + if ((config_prof && opt_prof) || config_stats || + (config_valgrind && unlikely(in_valgrind))) + old_usize = isalloc(ptr, config_prof); + if (config_valgrind && unlikely(in_valgrind)) { + old_rzsize = config_prof ? p2rz(ptr) : + u2rz(old_usize); + } - if (config_prof && opt_prof) { - usize = s2u(size); - ret = irealloc_prof(ptr, old_usize, usize); - } else { - if (config_stats || (config_valgrind && - unlikely(in_valgrind))) + if (config_prof && opt_prof) { usize = s2u(size); - ret = iralloc(ptr, size, 0, false); - } + ret = irealloc_prof(tsd, ptr, old_usize, usize); + } else { + if (config_stats || (config_valgrind && + unlikely(in_valgrind))) + usize = s2u(size); + ret = iralloc(tsd, ptr, size, 0, false); + } + } else + ret = NULL; } else { /* realloc(NULL, size) is equivalent to malloc(size). */ - ret = imalloc_body(size, &usize); + ret = imalloc_body(size, &tsd, &usize); } if (unlikely(ret == NULL)) { @@ -1297,11 +1310,11 @@ je_realloc(void *ptr, size_t size) set_errno(ENOMEM); } if (config_stats && likely(ret != NULL)) { - thread_allocated_t *ta; assert(usize == isalloc(ret, config_prof)); - ta = thread_allocated_tsd_get(); - ta->allocated += usize; - ta->deallocated += old_usize; + if (tsd != NULL) { + *tsd_thread_allocatedp_get(tsd) += usize; + *tsd_thread_deallocatedp_get(tsd) += old_usize; + } } UTRACE(ptr, size, ret); JEMALLOC_VALGRIND_REALLOC(true, ret, usize, true, ptr, old_usize, @@ -1315,7 +1328,7 @@ je_free(void *ptr) UTRACE(ptr, 0, 0); if (likely(ptr != NULL)) - ifree(ptr, true); + ifree(tsd_tryget(), ptr, true); } /* @@ -1425,50 +1438,52 @@ imallocx_flags_decode(size_t size, int flags, size_t *usize, size_t *alignment, } JEMALLOC_ALWAYS_INLINE_C void * -imallocx_flags(size_t usize, size_t alignment, bool zero, bool try_tcache, - arena_t *arena) +imallocx_flags(tsd_t *tsd, size_t usize, size_t alignment, bool zero, + bool try_tcache, arena_t *arena) { - if (alignment != 0) - return (ipalloct(usize, alignment, zero, try_tcache, arena)); + if (alignment != 0) { + return (ipalloct(tsd, usize, alignment, zero, try_tcache, + arena)); + } if (zero) - return (icalloct(usize, try_tcache, arena)); - return (imalloct(usize, try_tcache, arena)); + return (icalloct(tsd, usize, try_tcache, arena)); + return (imalloct(tsd, usize, try_tcache, arena)); } JEMALLOC_ALWAYS_INLINE_C void * -imallocx_maybe_flags(size_t size, int flags, size_t usize, size_t alignment, - bool zero, bool try_tcache, arena_t *arena) +imallocx_maybe_flags(tsd_t *tsd, size_t size, int flags, size_t usize, + size_t alignment, bool zero, bool try_tcache, arena_t *arena) { if (likely(flags == 0)) - return (imalloc(size)); - return (imallocx_flags(usize, alignment, zero, try_tcache, arena)); + return (imalloc(tsd, size)); + return (imallocx_flags(tsd, usize, alignment, zero, try_tcache, arena)); } static void * -imallocx_prof_sample(size_t size, int flags, size_t usize, size_t alignment, - bool zero, bool try_tcache, arena_t *arena) +imallocx_prof_sample(tsd_t *tsd, size_t size, int flags, size_t usize, + size_t alignment, bool zero, bool try_tcache, arena_t *arena) { void *p; if (usize <= SMALL_MAXCLASS) { assert(((alignment == 0) ? s2u(LARGE_MINCLASS) : sa2u(LARGE_MINCLASS, alignment)) == LARGE_MINCLASS); - p = imalloct(LARGE_MINCLASS, try_tcache, arena); + p = imalloct(tsd, LARGE_MINCLASS, try_tcache, arena); if (p == NULL) return (NULL); arena_prof_promoted(p, usize); } else { - p = imallocx_maybe_flags(size, flags, usize, alignment, zero, - try_tcache, arena); + p = imallocx_maybe_flags(tsd, size, flags, usize, alignment, + zero, try_tcache, arena); } return (p); } JEMALLOC_ALWAYS_INLINE_C void * -imallocx_prof(size_t size, int flags, size_t *usize) +imallocx_prof(tsd_t *tsd, size_t size, int flags, size_t *usize) { void *p; size_t alignment; @@ -1479,17 +1494,17 @@ imallocx_prof(size_t size, int flags, size_t *usize) imallocx_flags_decode(size, flags, usize, &alignment, &zero, &try_tcache, &arena); - tctx = prof_alloc_prep(*usize, true); + tctx = prof_alloc_prep(tsd, *usize, true); if (likely((uintptr_t)tctx == (uintptr_t)1U)) { - p = imallocx_maybe_flags(size, flags, *usize, alignment, zero, - try_tcache, arena); + p = imallocx_maybe_flags(tsd, size, flags, *usize, alignment, + zero, try_tcache, arena); } else if ((uintptr_t)tctx > (uintptr_t)1U) { - p = imallocx_prof_sample(size, flags, *usize, alignment, zero, - try_tcache, arena); + p = imallocx_prof_sample(tsd, size, flags, *usize, alignment, + zero, try_tcache, arena); } else p = NULL; if (unlikely(p == NULL)) { - prof_alloc_rollback(tctx, true); + prof_alloc_rollback(tsd, tctx, true); return (NULL); } prof_malloc(p, *usize, tctx); @@ -1498,7 +1513,7 @@ imallocx_prof(size_t size, int flags, size_t *usize) } JEMALLOC_ALWAYS_INLINE_C void * -imallocx_no_prof(size_t size, int flags, size_t *usize) +imallocx_no_prof(tsd_t *tsd, size_t size, int flags, size_t *usize) { size_t alignment; bool zero; @@ -1508,35 +1523,39 @@ imallocx_no_prof(size_t size, int flags, size_t *usize) if (likely(flags == 0)) { if (config_stats || (config_valgrind && unlikely(in_valgrind))) *usize = s2u(size); - return (imalloc(size)); + return (imalloc(tsd, size)); } imallocx_flags_decode_hard(size, flags, usize, &alignment, &zero, &try_tcache, &arena); - return (imallocx_flags(*usize, alignment, zero, try_tcache, arena)); + return (imallocx_flags(tsd, *usize, alignment, zero, try_tcache, + arena)); } void * je_mallocx(size_t size, int flags) { + tsd_t *tsd; void *p; size_t usize; assert(size != 0); - if (unlikely(malloc_init())) + if (unlikely(malloc_init()) || unlikely((tsd = tsd_tryget()) == NULL)) goto label_oom; if (config_prof && opt_prof) - p = imallocx_prof(size, flags, &usize); + p = imallocx_prof(tsd, size, flags, &usize); else - p = imallocx_no_prof(size, flags, &usize); + p = imallocx_no_prof(tsd, size, flags, &usize); if (unlikely(p == NULL)) goto label_oom; if (config_stats) { + tsd_t *tsd = tsd_tryget(); assert(usize == isalloc(p, config_prof)); - thread_allocated_tsd_get()->allocated += usize; + if (tsd != NULL) + *tsd_thread_allocatedp_get(tsd) += usize; } UTRACE(0, size, p); JEMALLOC_VALGRIND_MALLOC(true, p, usize, MALLOCX_ZERO_GET(flags)); @@ -1551,47 +1570,47 @@ label_oom: } static void * -irallocx_prof_sample(void *oldptr, size_t size, size_t alignment, size_t usize, - bool zero, bool try_tcache_alloc, bool try_tcache_dalloc, arena_t *arena, - prof_tctx_t *tctx) +irallocx_prof_sample(tsd_t *tsd, void *oldptr, size_t size, size_t alignment, + size_t usize, bool zero, bool try_tcache_alloc, bool try_tcache_dalloc, + arena_t *arena, prof_tctx_t *tctx) { void *p; if (tctx == NULL) return (NULL); if (usize <= SMALL_MAXCLASS) { - p = iralloct(oldptr, LARGE_MINCLASS, alignment, zero, + p = iralloct(tsd, oldptr, LARGE_MINCLASS, alignment, zero, try_tcache_alloc, try_tcache_dalloc, arena); if (p == NULL) return (NULL); arena_prof_promoted(p, usize); } else { - p = iralloct(oldptr, size, alignment, zero, try_tcache_alloc, - try_tcache_dalloc, arena); + p = iralloct(tsd, oldptr, size, alignment, zero, + try_tcache_alloc, try_tcache_dalloc, arena); } return (p); } JEMALLOC_ALWAYS_INLINE_C void * -irallocx_prof(void *oldptr, size_t old_usize, size_t size, size_t alignment, - size_t *usize, bool zero, bool try_tcache_alloc, bool try_tcache_dalloc, - arena_t *arena) +irallocx_prof(tsd_t *tsd, void *oldptr, size_t old_usize, size_t size, + size_t alignment, size_t *usize, bool zero, bool try_tcache_alloc, + bool try_tcache_dalloc, arena_t *arena) { void *p; prof_tctx_t *old_tctx, *tctx; old_tctx = prof_tctx_get(oldptr); - tctx = prof_alloc_prep(*usize, false); + tctx = prof_alloc_prep(tsd, *usize, false); if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) { - p = irallocx_prof_sample(oldptr, size, alignment, *usize, zero, - try_tcache_alloc, try_tcache_dalloc, arena, tctx); + p = irallocx_prof_sample(tsd, oldptr, size, alignment, *usize, + zero, try_tcache_alloc, try_tcache_dalloc, arena, tctx); } else { - p = iralloct(oldptr, size, alignment, zero, try_tcache_alloc, - try_tcache_dalloc, arena); + p = iralloct(tsd, oldptr, size, alignment, zero, + try_tcache_alloc, try_tcache_dalloc, arena); } if (unlikely(p == NULL)) { - prof_alloc_rollback(tctx, false); + prof_alloc_rollback(tsd, tctx, false); return (NULL); } @@ -1606,7 +1625,7 @@ irallocx_prof(void *oldptr, size_t old_usize, size_t size, size_t alignment, */ *usize = isalloc(p, config_prof); } - prof_realloc(p, *usize, tctx, false, old_usize, old_tctx); + prof_realloc(tsd, p, *usize, tctx, false, old_usize, old_tctx); return (p); } @@ -1615,6 +1634,7 @@ void * je_rallocx(void *ptr, size_t size, int flags) { void *p; + tsd_t *tsd; size_t usize; UNUSED size_t old_usize JEMALLOC_CC_SILENCE_INIT(0); UNUSED size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0); @@ -1628,6 +1648,9 @@ je_rallocx(void *ptr, size_t size, int flags) assert(malloc_initialized || IS_INITIALIZER); malloc_thread_init(); + if (unlikely((tsd = tsd_tryget()) == NULL)) + goto label_oom; + if (unlikely((flags & MALLOCX_ARENA_MASK) != 0)) { unsigned arena_ind = MALLOCX_ARENA_GET(flags); arena_chunk_t *chunk; @@ -1651,12 +1674,12 @@ je_rallocx(void *ptr, size_t size, int flags) if (config_prof && opt_prof) { usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment); assert(usize != 0); - p = irallocx_prof(ptr, old_usize, size, alignment, &usize, zero, - try_tcache_alloc, try_tcache_dalloc, arena); + p = irallocx_prof(tsd, ptr, old_usize, size, alignment, &usize, + zero, try_tcache_alloc, try_tcache_dalloc, arena); if (unlikely(p == NULL)) goto label_oom; } else { - p = iralloct(ptr, size, alignment, zero, try_tcache_alloc, + p = iralloct(tsd, ptr, size, alignment, zero, try_tcache_alloc, try_tcache_dalloc, arena); if (unlikely(p == NULL)) goto label_oom; @@ -1665,10 +1688,8 @@ je_rallocx(void *ptr, size_t size, int flags) } if (config_stats) { - thread_allocated_t *ta; - ta = thread_allocated_tsd_get(); - ta->allocated += usize; - ta->deallocated += old_usize; + *tsd_thread_allocatedp_get(tsd) += usize; + *tsd_thread_deallocatedp_get(tsd) += old_usize; } UTRACE(ptr, size, p); JEMALLOC_VALGRIND_REALLOC(true, p, usize, false, ptr, old_usize, @@ -1724,8 +1745,8 @@ ixallocx_prof_sample(void *ptr, size_t old_usize, size_t size, size_t extra, } JEMALLOC_ALWAYS_INLINE_C size_t -ixallocx_prof(void *ptr, size_t old_usize, size_t size, size_t extra, - size_t alignment, bool zero, arena_t *arena) +ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size, + size_t extra, size_t alignment, bool zero, arena_t *arena) { size_t max_usize, usize; prof_tctx_t *old_tctx, *tctx; @@ -1739,7 +1760,7 @@ ixallocx_prof(void *ptr, size_t old_usize, size_t size, size_t extra, */ max_usize = (alignment == 0) ? s2u(size+extra) : sa2u(size+extra, alignment); - tctx = prof_alloc_prep(max_usize, false); + tctx = prof_alloc_prep(tsd, max_usize, false); if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) { usize = ixallocx_prof_sample(ptr, old_usize, size, extra, alignment, zero, max_usize, arena, tctx); @@ -1748,10 +1769,10 @@ ixallocx_prof(void *ptr, size_t old_usize, size_t size, size_t extra, zero, arena); } if (unlikely(usize == old_usize)) { - prof_alloc_rollback(tctx, false); + prof_alloc_rollback(tsd, tctx, false); return (usize); } - prof_realloc(ptr, usize, tctx, false, old_usize, old_tctx); + prof_realloc(tsd, ptr, usize, tctx, false, old_usize, old_tctx); return (usize); } @@ -1759,6 +1780,7 @@ ixallocx_prof(void *ptr, size_t old_usize, size_t size, size_t extra, size_t je_xallocx(void *ptr, size_t size, size_t extra, int flags) { + tsd_t *tsd; size_t usize, old_usize; UNUSED size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0); size_t alignment = MALLOCX_ALIGN_GET(flags); @@ -1778,12 +1800,16 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) arena = NULL; old_usize = isalloc(ptr, config_prof); + if (unlikely((tsd = tsd_tryget()) == NULL)) { + usize = old_usize; + goto label_not_resized; + } if (config_valgrind && unlikely(in_valgrind)) old_rzsize = u2rz(old_usize); if (config_prof && opt_prof) { - usize = ixallocx_prof(ptr, old_usize, size, extra, alignment, - zero, arena); + usize = ixallocx_prof(tsd, ptr, old_usize, size, extra, + alignment, zero, arena); } else { usize = ixallocx_helper(ptr, old_usize, size, extra, alignment, zero, arena); @@ -1792,10 +1818,8 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) goto label_not_resized; if (config_stats) { - thread_allocated_t *ta; - ta = thread_allocated_tsd_get(); - ta->allocated += usize; - ta->deallocated += old_usize; + *tsd_thread_allocatedp_get(tsd) += usize; + *tsd_thread_deallocatedp_get(tsd) += old_usize; } JEMALLOC_VALGRIND_REALLOC(false, ptr, usize, false, ptr, old_usize, old_rzsize, false, zero); @@ -1839,7 +1863,7 @@ je_dallocx(void *ptr, int flags) try_tcache = true; UTRACE(ptr, 0, 0); - ifree(ptr, try_tcache); + ifree(tsd_tryget(), ptr, try_tcache); } JEMALLOC_ALWAYS_INLINE_C size_t @@ -1875,7 +1899,7 @@ je_sdallocx(void *ptr, size_t size, int flags) try_tcache = true; UTRACE(ptr, 0, 0); - isfree(ptr, usize, try_tcache); + isfree(tsd_tryget(), ptr, usize, try_tcache); } size_t @@ -2072,9 +2096,9 @@ a0alloc(size_t size, bool zero) size = 1; if (size <= arena_maxclass) - return (arena_malloc(arenas[0], size, zero, false)); + return (arena_malloc(NULL, arenas[0], size, zero, false)); else - return (huge_malloc(NULL, size, zero)); + return (huge_malloc(NULL, arenas[0], size, zero)); } void * @@ -2101,7 +2125,7 @@ a0free(void *ptr) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk != ptr) - arena_dalloc(chunk, ptr, false); + arena_dalloc(NULL, chunk, ptr, false); else huge_dalloc(ptr); } diff --git a/src/prof.c b/src/prof.c index a773e224..dd84f533 100644 --- a/src/prof.c +++ b/src/prof.c @@ -14,8 +14,6 @@ /******************************************************************************/ /* Data. */ -malloc_tsd_data(, prof_tdata, prof_tdata_t *, NULL) - bool opt_prof = false; bool opt_prof_active = true; size_t opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT; @@ -102,9 +100,9 @@ static bool prof_booted = false; */ static bool prof_tctx_should_destroy(prof_tctx_t *tctx); -static void prof_tctx_destroy(prof_tctx_t *tctx); +static void prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx); static bool prof_tdata_should_destroy(prof_tdata_t *tdata); -static void prof_tdata_destroy(prof_tdata_t *tdata); +static void prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata); /******************************************************************************/ /* Red-black trees. */ @@ -151,7 +149,7 @@ rb_gen(static UNUSED, tdata_tree_, prof_tdata_tree_t, prof_tdata_t, tdata_link, /******************************************************************************/ void -prof_alloc_rollback(prof_tctx_t *tctx, bool updated) +prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated) { prof_tdata_t *tdata; @@ -164,8 +162,8 @@ prof_alloc_rollback(prof_tctx_t *tctx, bool updated) * potential for sample bias is minimal except in contrived * programs. */ - tdata = prof_tdata_get(true); - if ((uintptr_t)tdata > (uintptr_t)PROF_TDATA_STATE_MAX) + tdata = prof_tdata_get(tsd, true); + if (tdata != NULL) prof_sample_threshold_update(tctx->tdata); } @@ -173,7 +171,7 @@ prof_alloc_rollback(prof_tctx_t *tctx, bool updated) malloc_mutex_lock(tctx->tdata->lock); tctx->prepared = false; if (prof_tctx_should_destroy(tctx)) - prof_tctx_destroy(tctx); + prof_tctx_destroy(tsd, tctx); else malloc_mutex_unlock(tctx->tdata->lock); } @@ -195,7 +193,7 @@ prof_malloc_sample_object(const void *ptr, size_t usize, prof_tctx_t *tctx) { } void -prof_free_sampled_object(size_t usize, prof_tctx_t *tctx) +prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx) { malloc_mutex_lock(tctx->tdata->lock); @@ -205,7 +203,7 @@ prof_free_sampled_object(size_t usize, prof_tctx_t *tctx) tctx->cnts.curbytes -= usize; if (prof_tctx_should_destroy(tctx)) - prof_tctx_destroy(tctx); + prof_tctx_destroy(tsd, tctx); else malloc_mutex_unlock(tctx->tdata->lock); } @@ -494,13 +492,13 @@ prof_tdata_mutex_choose(uint64_t thr_uid) } static prof_gctx_t * -prof_gctx_create(prof_bt_t *bt) +prof_gctx_create(tsd_t *tsd, prof_bt_t *bt) { /* * Create a single allocation that has space for vec of length bt->len. */ - prof_gctx_t *gctx = (prof_gctx_t *)imalloc(offsetof(prof_gctx_t, vec) + - (bt->len * sizeof(void *))); + prof_gctx_t *gctx = (prof_gctx_t *)imalloc(tsd, offsetof(prof_gctx_t, + vec) + (bt->len * sizeof(void *))); if (gctx == NULL) return (NULL); gctx->lock = prof_gctx_mutex_choose(); @@ -518,7 +516,7 @@ prof_gctx_create(prof_bt_t *bt) } static void -prof_gctx_maybe_destroy(prof_gctx_t *gctx, prof_tdata_t *tdata) +prof_gctx_maybe_destroy(tsd_t *tsd, prof_gctx_t *gctx, prof_tdata_t *tdata) { cassert(config_prof); @@ -534,12 +532,12 @@ prof_gctx_maybe_destroy(prof_gctx_t *gctx, prof_tdata_t *tdata) malloc_mutex_lock(gctx->lock); if (tctx_tree_empty(&gctx->tctxs) && gctx->nlimbo == 1) { /* Remove gctx from bt2gctx. */ - if (ckh_remove(&bt2gctx, &gctx->bt, NULL, NULL)) + if (ckh_remove(tsd, &bt2gctx, &gctx->bt, NULL, NULL)) not_reached(); prof_leave(tdata); /* Destroy gctx. */ malloc_mutex_unlock(gctx->lock); - idalloc(gctx); + idalloc(tsd, gctx); } else { /* * Compensate for increment in prof_tctx_destroy() or @@ -580,7 +578,7 @@ prof_gctx_should_destroy(prof_gctx_t *gctx) /* tctx->tdata->lock is held upon entry, and released before return. */ static void -prof_tctx_destroy(prof_tctx_t *tctx) +prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) { prof_tdata_t *tdata = tctx->tdata; prof_gctx_t *gctx = tctx->gctx; @@ -592,7 +590,7 @@ prof_tctx_destroy(prof_tctx_t *tctx) assert(tctx->cnts.accumobjs == 0); assert(tctx->cnts.accumbytes == 0); - ckh_remove(&tdata->bt2tctx, &gctx->bt, NULL, NULL); + ckh_remove(tsd, &tdata->bt2tctx, &gctx->bt, NULL, NULL); destroy_tdata = prof_tdata_should_destroy(tdata); malloc_mutex_unlock(tdata->lock); @@ -618,17 +616,17 @@ prof_tctx_destroy(prof_tctx_t *tctx) destroy_gctx = false; malloc_mutex_unlock(gctx->lock); if (destroy_gctx) - prof_gctx_maybe_destroy(gctx, tdata); + prof_gctx_maybe_destroy(tsd, gctx, tdata); if (destroy_tdata) - prof_tdata_destroy(tdata); + prof_tdata_destroy(tsd, tdata); - idalloc(tctx); + idalloc(tsd, tctx); } static bool -prof_lookup_global(prof_bt_t *bt, prof_tdata_t *tdata, void **p_btkey, - prof_gctx_t **p_gctx, bool *p_new_gctx) +prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata, + void **p_btkey, prof_gctx_t **p_gctx, bool *p_new_gctx) { union { prof_gctx_t *p; @@ -643,16 +641,16 @@ prof_lookup_global(prof_bt_t *bt, prof_tdata_t *tdata, void **p_btkey, prof_enter(tdata); if (ckh_search(&bt2gctx, bt, &btkey.v, &gctx.v)) { /* bt has never been seen before. Insert it. */ - gctx.p = prof_gctx_create(bt); + gctx.p = prof_gctx_create(tsd, bt); if (gctx.v == NULL) { prof_leave(tdata); return (true); } btkey.p = &gctx.p->bt; - if (ckh_insert(&bt2gctx, btkey.v, gctx.v)) { + if (ckh_insert(tsd, &bt2gctx, btkey.v, gctx.v)) { /* OOM. */ prof_leave(tdata); - idalloc(gctx.v); + idalloc(tsd, gctx.v); return (true); } new_gctx = true; @@ -675,7 +673,7 @@ prof_lookup_global(prof_bt_t *bt, prof_tdata_t *tdata, void **p_btkey, } prof_tctx_t * -prof_lookup(prof_bt_t *bt) +prof_lookup(tsd_t *tsd, prof_bt_t *bt) { union { prof_tctx_t *p; @@ -686,8 +684,8 @@ prof_lookup(prof_bt_t *bt) cassert(config_prof); - tdata = prof_tdata_get(false); - if ((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) + tdata = prof_tdata_get(tsd, false); + if (tdata == NULL) return (NULL); malloc_mutex_lock(tdata->lock); @@ -704,15 +702,15 @@ prof_lookup(prof_bt_t *bt) * This thread's cache lacks bt. Look for it in the global * cache. */ - if (prof_lookup_global(bt, tdata, &btkey, &gctx, + if (prof_lookup_global(tsd, bt, tdata, &btkey, &gctx, &new_gctx)) return (NULL); /* Link a prof_tctx_t into gctx for this thread. */ - ret.v = imalloc(sizeof(prof_tctx_t)); + ret.v = imalloc(tsd, sizeof(prof_tctx_t)); if (ret.p == NULL) { if (new_gctx) - prof_gctx_maybe_destroy(gctx, tdata); + prof_gctx_maybe_destroy(tsd, gctx, tdata); return (NULL); } ret.p->tdata = tdata; @@ -721,12 +719,12 @@ prof_lookup(prof_bt_t *bt) ret.p->prepared = true; ret.p->state = prof_tctx_state_nominal; malloc_mutex_lock(tdata->lock); - error = ckh_insert(&tdata->bt2tctx, btkey, ret.v); + error = ckh_insert(tsd, &tdata->bt2tctx, btkey, ret.v); malloc_mutex_unlock(tdata->lock); if (error) { if (new_gctx) - prof_gctx_maybe_destroy(gctx, tdata); - idalloc(ret.v); + prof_gctx_maybe_destroy(tsd, gctx, tdata); + idalloc(tsd, ret.v); return (NULL); } malloc_mutex_lock(gctx->lock); @@ -798,10 +796,13 @@ size_t prof_bt_count(void) { size_t bt_count; + tsd_t *tsd; prof_tdata_t *tdata; - tdata = prof_tdata_get(false); - if ((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) + if ((tsd = tsd_tryget()) == NULL) + return (0); + tdata = prof_tdata_get(tsd, false); + if (tdata == NULL) return (0); prof_enter(tdata); @@ -989,6 +990,7 @@ static prof_tctx_t * prof_tctx_finish_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) { prof_tctx_t *ret; + tsd_t *tsd = (tsd_t *)arg; switch (tctx->state) { case prof_tctx_state_nominal: @@ -1000,7 +1002,7 @@ prof_tctx_finish_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) case prof_tctx_state_purgatory: ret = tctx_tree_next(tctxs, tctx); tctx_tree_remove(tctxs, tctx); - idalloc(tctx); + idalloc(tsd, tctx); goto label_return; default: not_reached(); @@ -1049,7 +1051,8 @@ prof_gctx_merge_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *arg) static prof_gctx_t * prof_gctx_finish_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *arg) { - prof_tdata_t *tdata = (prof_tdata_t *)arg; + tsd_t *tsd = (tsd_t *)arg; + prof_tdata_t *tdata = prof_tdata_get(tsd, false); prof_tctx_t *next; bool destroy_gctx; @@ -1057,13 +1060,13 @@ prof_gctx_finish_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *arg) next = NULL; do { next = tctx_tree_iter(&gctx->tctxs, next, prof_tctx_finish_iter, - NULL); + tsd); } while (next != NULL); gctx->nlimbo--; destroy_gctx = prof_gctx_should_destroy(gctx); malloc_mutex_unlock(gctx->lock); if (destroy_gctx) - prof_gctx_maybe_destroy(gctx, tdata); + prof_gctx_maybe_destroy(tsd, gctx, tdata); return (NULL); } @@ -1277,7 +1280,7 @@ label_return: } static bool -prof_dump(bool propagate_err, const char *filename, bool leakcheck) +prof_dump(tsd_t *tsd, bool propagate_err, const char *filename, bool leakcheck) { prof_tdata_t *tdata; prof_cnt_t cnt_all; @@ -1291,8 +1294,8 @@ prof_dump(bool propagate_err, const char *filename, bool leakcheck) cassert(config_prof); - tdata = prof_tdata_get(false); - if ((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) + tdata = prof_tdata_get(tsd, false); + if (tdata == NULL) return (true); malloc_mutex_lock(&prof_dump_mtx); @@ -1341,7 +1344,7 @@ prof_dump(bool propagate_err, const char *filename, bool leakcheck) if (prof_dump_close(propagate_err)) goto label_open_close_error; - gctx_tree_iter(&gctxs, NULL, prof_gctx_finish_iter, tdata); + gctx_tree_iter(&gctxs, NULL, prof_gctx_finish_iter, tsd); malloc_mutex_unlock(&prof_dump_mtx); if (leakcheck) @@ -1351,7 +1354,7 @@ prof_dump(bool propagate_err, const char *filename, bool leakcheck) label_write_error: prof_dump_close(propagate_err); label_open_close_error: - gctx_tree_iter(&gctxs, NULL, prof_gctx_finish_iter, tdata); + gctx_tree_iter(&gctxs, NULL, prof_gctx_finish_iter, tsd); malloc_mutex_unlock(&prof_dump_mtx); return (true); } @@ -1381,24 +1384,28 @@ prof_dump_filename(char *filename, char v, uint64_t vseq) static void prof_fdump(void) { + tsd_t *tsd; char filename[DUMP_FILENAME_BUFSIZE]; cassert(config_prof); if (prof_booted == false) return; + if ((tsd = tsd_tryget()) == NULL) + return; if (opt_prof_final && opt_prof_prefix[0] != '\0') { malloc_mutex_lock(&prof_dump_seq_mtx); prof_dump_filename(filename, 'f', VSEQ_INVALID); malloc_mutex_unlock(&prof_dump_seq_mtx); - prof_dump(false, filename, opt_prof_leak); + prof_dump(tsd, false, filename, opt_prof_leak); } } void prof_idump(void) { + tsd_t *tsd; prof_tdata_t *tdata; char filename[PATH_MAX + 1]; @@ -1406,8 +1413,10 @@ prof_idump(void) if (prof_booted == false) return; - tdata = prof_tdata_get(false); - if ((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) + if ((tsd = tsd_tryget()) == NULL) + return; + tdata = prof_tdata_get(tsd, false); + if (tdata == NULL) return; if (tdata->enq) { tdata->enq_idump = true; @@ -1419,19 +1428,22 @@ prof_idump(void) prof_dump_filename(filename, 'i', prof_dump_iseq); prof_dump_iseq++; malloc_mutex_unlock(&prof_dump_seq_mtx); - prof_dump(false, filename, false); + prof_dump(tsd, false, filename, false); } } bool prof_mdump(const char *filename) { + tsd_t *tsd; char filename_buf[DUMP_FILENAME_BUFSIZE]; cassert(config_prof); if (opt_prof == false || prof_booted == false) return (true); + if ((tsd = tsd_tryget()) == NULL) + return (true); if (filename == NULL) { /* No filename specified, so automatically generate one. */ @@ -1443,12 +1455,13 @@ prof_mdump(const char *filename) malloc_mutex_unlock(&prof_dump_seq_mtx); filename = filename_buf; } - return (prof_dump(true, filename, false)); + return (prof_dump(tsd, true, filename, false)); } void prof_gdump(void) { + tsd_t *tsd; prof_tdata_t *tdata; char filename[DUMP_FILENAME_BUFSIZE]; @@ -1456,8 +1469,10 @@ prof_gdump(void) if (prof_booted == false) return; - tdata = prof_tdata_get(false); - if ((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) + if ((tsd = tsd_tryget()) == NULL) + return; + tdata = prof_tdata_get(tsd, false); + if (tdata == NULL) return; if (tdata->enq) { tdata->enq_gdump = true; @@ -1469,7 +1484,7 @@ prof_gdump(void) prof_dump_filename(filename, 'u', prof_dump_useq); prof_dump_useq++; malloc_mutex_unlock(&prof_dump_seq_mtx); - prof_dump(false, filename, false); + prof_dump(tsd, false, filename, false); } } @@ -1510,14 +1525,14 @@ prof_thr_uid_alloc(void) } static prof_tdata_t * -prof_tdata_init_impl(uint64_t thr_uid) +prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid) { prof_tdata_t *tdata; cassert(config_prof); /* Initialize an empty cache for this thread. */ - tdata = (prof_tdata_t *)imalloc(sizeof(prof_tdata_t)); + tdata = (prof_tdata_t *)imalloc(tsd, sizeof(prof_tdata_t)); if (tdata == NULL) return (NULL); @@ -1526,9 +1541,9 @@ prof_tdata_init_impl(uint64_t thr_uid) tdata->thread_name = NULL; tdata->state = prof_tdata_state_attached; - if (ckh_new(&tdata->bt2tctx, PROF_CKH_MINITEMS, + if (ckh_new(tsd, &tdata->bt2tctx, PROF_CKH_MINITEMS, prof_bt_hash, prof_bt_keycomp)) { - idalloc(tdata); + idalloc(tsd, tdata); return (NULL); } @@ -1542,8 +1557,6 @@ prof_tdata_init_impl(uint64_t thr_uid) tdata->dumping = false; tdata->active = true; - prof_tdata_tsd_set(&tdata); - malloc_mutex_lock(&tdatas_mtx); tdata_tree_insert(&tdatas, tdata); malloc_mutex_unlock(&tdatas_mtx); @@ -1552,17 +1565,17 @@ prof_tdata_init_impl(uint64_t thr_uid) } prof_tdata_t * -prof_tdata_init(void) +prof_tdata_init(tsd_t *tsd) { - return (prof_tdata_init_impl(prof_thr_uid_alloc())); + return (prof_tdata_init_impl(tsd, prof_thr_uid_alloc())); } prof_tdata_t * -prof_tdata_reinit(prof_tdata_t *tdata) +prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata) { - return (prof_tdata_init_impl(tdata->thr_uid)); + return (prof_tdata_init_impl(tsd, tdata->thr_uid)); } /* tdata->lock must be held. */ @@ -1578,7 +1591,7 @@ prof_tdata_should_destroy(prof_tdata_t *tdata) } static void -prof_tdata_destroy(prof_tdata_t *tdata) +prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata) { assert(prof_tdata_should_destroy(tdata)); @@ -1588,13 +1601,14 @@ prof_tdata_destroy(prof_tdata_t *tdata) malloc_mutex_unlock(&tdatas_mtx); if (tdata->thread_name != NULL) - idalloc(tdata->thread_name); - ckh_delete(&tdata->bt2tctx); - idalloc(tdata); + idalloc(tsd, tdata->thread_name); + ckh_delete(tsd, &tdata->bt2tctx); + idalloc(tsd, tdata); } static void -prof_tdata_state_transition(prof_tdata_t *tdata, prof_tdata_state_t state) +prof_tdata_state_transition(tsd_t *tsd, prof_tdata_t *tdata, + prof_tdata_state_t state) { bool destroy_tdata; @@ -1606,33 +1620,34 @@ prof_tdata_state_transition(prof_tdata_t *tdata, prof_tdata_state_t state) destroy_tdata = false; malloc_mutex_unlock(tdata->lock); if (destroy_tdata) - prof_tdata_destroy(tdata); + prof_tdata_destroy(tsd, tdata); } static void -prof_tdata_detach(prof_tdata_t *tdata) +prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata) { - prof_tdata_state_transition(tdata, prof_tdata_state_detached); + prof_tdata_state_transition(tsd, tdata, prof_tdata_state_detached); } static void -prof_tdata_expire(prof_tdata_t *tdata) +prof_tdata_expire(tsd_t *tsd, prof_tdata_t *tdata) { - prof_tdata_state_transition(tdata, prof_tdata_state_expired); + prof_tdata_state_transition(tsd, tdata, prof_tdata_state_expired); } static prof_tdata_t * prof_tdata_reset_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, void *arg) { + tsd_t *tsd = (tsd_t *)arg; - prof_tdata_expire(tdata); + prof_tdata_expire(tsd, tdata); return (NULL); } void -prof_reset(size_t lg_sample) +prof_reset(tsd_t *tsd, size_t lg_sample) { assert(lg_sample < (sizeof(uint64_t) << 3)); @@ -1641,69 +1656,58 @@ prof_reset(size_t lg_sample) malloc_mutex_lock(&tdatas_mtx); lg_prof_sample = lg_sample; - tdata_tree_iter(&tdatas, NULL, prof_tdata_reset_iter, NULL); + tdata_tree_iter(&tdatas, NULL, prof_tdata_reset_iter, tsd); malloc_mutex_unlock(&tdatas_mtx); malloc_mutex_unlock(&prof_dump_mtx); } void -prof_tdata_cleanup(void *arg) +prof_tdata_cleanup(tsd_t *tsd) { - prof_tdata_t *tdata = *(prof_tdata_t **)arg; + prof_tdata_t *tdata; - cassert(config_prof); + if (!config_prof) + return; - if (tdata == PROF_TDATA_STATE_REINCARNATED) { - /* - * Another destructor deallocated memory after this destructor - * was called. Reset tdata to PROF_TDATA_STATE_PURGATORY in - * order to receive another callback. - */ - tdata = PROF_TDATA_STATE_PURGATORY; - prof_tdata_tsd_set(&tdata); - } else if (tdata == PROF_TDATA_STATE_PURGATORY) { - /* - * The previous time this destructor was called, we set the key - * to PROF_TDATA_STATE_PURGATORY so that other destructors - * wouldn't cause re-creation of the tdata. This time, do - * nothing, so that the destructor will not be called again. - */ - } else if (tdata != NULL) { - prof_tdata_detach(tdata); - tdata = PROF_TDATA_STATE_PURGATORY; - prof_tdata_tsd_set(&tdata); - } + tdata = tsd_prof_tdata_get(tsd); + if (tdata != NULL) + prof_tdata_detach(tsd, tdata); } const char * prof_thread_name_get(void) { - prof_tdata_t *tdata = prof_tdata_get(true); - if ((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) + tsd_t *tsd; + prof_tdata_t *tdata; + + if ((tsd = tsd_tryget()) == NULL) + return (NULL); + tdata = prof_tdata_get(tsd, true); + if (tdata == NULL) return (NULL); return (tdata->thread_name); } bool -prof_thread_name_set(const char *thread_name) +prof_thread_name_set(tsd_t *tsd, const char *thread_name) { prof_tdata_t *tdata; size_t size; char *s; - tdata = prof_tdata_get(true); - if ((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) + tdata = prof_tdata_get(tsd, true); + if (tdata == NULL) return (true); size = strlen(thread_name) + 1; - s = imalloc(size); + s = imalloc(tsd, size); if (s == NULL) return (true); memcpy(s, thread_name, size); if (tdata->thread_name != NULL) - idalloc(tdata->thread_name); + idalloc(tsd, tdata->thread_name); tdata->thread_name = s; return (false); } @@ -1711,8 +1715,13 @@ prof_thread_name_set(const char *thread_name) bool prof_thread_active_get(void) { - prof_tdata_t *tdata = prof_tdata_get(true); - if ((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) + tsd_t *tsd; + prof_tdata_t *tdata; + + if ((tsd = tsd_tryget()) == NULL) + return (false); + tdata = prof_tdata_get(tsd, true); + if (tdata == NULL) return (false); return (tdata->active); } @@ -1720,10 +1729,13 @@ prof_thread_active_get(void) bool prof_thread_active_set(bool active) { + tsd_t *tsd; prof_tdata_t *tdata; - tdata = prof_tdata_get(true); - if ((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) + if ((tsd = tsd_tryget()) == NULL) + return (true); + tdata = prof_tdata_get(tsd, true); + if (tdata == NULL) return (true); tdata->active = active; return (false); @@ -1772,20 +1784,18 @@ prof_boot2(void) cassert(config_prof); if (opt_prof) { + tsd_t *tsd; unsigned i; lg_prof_sample = opt_lg_prof_sample; - if (ckh_new(&bt2gctx, PROF_CKH_MINITEMS, prof_bt_hash, + if ((tsd = tsd_tryget()) == NULL) + return (true); + if (ckh_new(tsd, &bt2gctx, PROF_CKH_MINITEMS, prof_bt_hash, prof_bt_keycomp)) return (true); if (malloc_mutex_init(&bt2gctx_mtx)) return (true); - if (prof_tdata_tsd_boot()) { - malloc_write( - ": Error in pthread_key_create()\n"); - abort(); - } tdata_tree_new(&tdatas); if (malloc_mutex_init(&tdatas_mtx)) diff --git a/src/quarantine.c b/src/quarantine.c index efddeae7..1301b479 100644 --- a/src/quarantine.c +++ b/src/quarantine.c @@ -9,26 +9,22 @@ #define QUARANTINE_STATE_PURGATORY ((quarantine_t *)(uintptr_t)2) #define QUARANTINE_STATE_MAX QUARANTINE_STATE_PURGATORY -/******************************************************************************/ -/* Data. */ - -malloc_tsd_data(, quarantine, quarantine_t *, NULL) - /******************************************************************************/ /* Function prototypes for non-inline static functions. */ -static quarantine_t *quarantine_grow(quarantine_t *quarantine); -static void quarantine_drain_one(quarantine_t *quarantine); -static void quarantine_drain(quarantine_t *quarantine, size_t upper_bound); +static quarantine_t *quarantine_grow(tsd_t *tsd, quarantine_t *quarantine); +static void quarantine_drain_one(tsd_t *tsd, quarantine_t *quarantine); +static void quarantine_drain(tsd_t *tsd, quarantine_t *quarantine, + size_t upper_bound); /******************************************************************************/ quarantine_t * -quarantine_init(size_t lg_maxobjs) +quarantine_init(tsd_t *tsd, size_t lg_maxobjs) { quarantine_t *quarantine; - quarantine = (quarantine_t *)imalloc(offsetof(quarantine_t, objs) + + quarantine = (quarantine_t *)imalloc(tsd, offsetof(quarantine_t, objs) + ((ZU(1) << lg_maxobjs) * sizeof(quarantine_obj_t))); if (quarantine == NULL) return (NULL); @@ -37,19 +33,17 @@ quarantine_init(size_t lg_maxobjs) quarantine->first = 0; quarantine->lg_maxobjs = lg_maxobjs; - quarantine_tsd_set(&quarantine); - return (quarantine); } static quarantine_t * -quarantine_grow(quarantine_t *quarantine) +quarantine_grow(tsd_t *tsd, quarantine_t *quarantine) { quarantine_t *ret; - ret = quarantine_init(quarantine->lg_maxobjs + 1); + ret = quarantine_init(tsd, quarantine->lg_maxobjs + 1); if (ret == NULL) { - quarantine_drain_one(quarantine); + quarantine_drain_one(tsd, quarantine); return (quarantine); } @@ -71,17 +65,17 @@ quarantine_grow(quarantine_t *quarantine) memcpy(&ret->objs[ncopy_a], quarantine->objs, ncopy_b * sizeof(quarantine_obj_t)); } - idalloc(quarantine); + idalloc(tsd, quarantine); return (ret); } static void -quarantine_drain_one(quarantine_t *quarantine) +quarantine_drain_one(tsd_t *tsd, quarantine_t *quarantine) { quarantine_obj_t *obj = &quarantine->objs[quarantine->first]; assert(obj->usize == isalloc(obj->ptr, config_prof)); - idalloc(obj->ptr); + idalloc(tsd, obj->ptr); quarantine->curbytes -= obj->usize; quarantine->curobjs--; quarantine->first = (quarantine->first + 1) & ((ZU(1) << @@ -89,15 +83,15 @@ quarantine_drain_one(quarantine_t *quarantine) } static void -quarantine_drain(quarantine_t *quarantine, size_t upper_bound) +quarantine_drain(tsd_t *tsd, quarantine_t *quarantine, size_t upper_bound) { while (quarantine->curbytes > upper_bound && quarantine->curobjs > 0) - quarantine_drain_one(quarantine); + quarantine_drain_one(tsd, quarantine); } void -quarantine(void *ptr) +quarantine(tsd_t *tsd, void *ptr) { quarantine_t *quarantine; size_t usize = isalloc(ptr, config_prof); @@ -105,17 +99,8 @@ quarantine(void *ptr) cassert(config_fill); assert(opt_quarantine); - quarantine = *quarantine_tsd_get(); - if ((uintptr_t)quarantine <= (uintptr_t)QUARANTINE_STATE_MAX) { - if (quarantine == QUARANTINE_STATE_PURGATORY) { - /* - * Make a note that quarantine() was called after - * quarantine_cleanup() was called. - */ - quarantine = QUARANTINE_STATE_REINCARNATED; - quarantine_tsd_set(&quarantine); - } - idalloc(ptr); + if ((quarantine = tsd_quarantine_get(tsd)) == NULL) { + idalloc(tsd, ptr); return; } /* @@ -125,11 +110,11 @@ quarantine(void *ptr) if (quarantine->curbytes + usize > opt_quarantine) { size_t upper_bound = (opt_quarantine >= usize) ? opt_quarantine - usize : 0; - quarantine_drain(quarantine, upper_bound); + quarantine_drain(tsd, quarantine, upper_bound); } /* Grow the quarantine ring buffer if it's full. */ if (quarantine->curobjs == (ZU(1) << quarantine->lg_maxobjs)) - quarantine = quarantine_grow(quarantine); + quarantine = quarantine_grow(tsd, quarantine); /* quarantine_grow() must free a slot if it fails to grow. */ assert(quarantine->curobjs < (ZU(1) << quarantine->lg_maxobjs)); /* Append ptr if its size doesn't exceed the quarantine size. */ @@ -154,46 +139,22 @@ quarantine(void *ptr) } } else { assert(quarantine->curbytes == 0); - idalloc(ptr); + idalloc(tsd, ptr); } } void -quarantine_cleanup(void *arg) +quarantine_cleanup(tsd_t *tsd) { - quarantine_t *quarantine = *(quarantine_t **)arg; + quarantine_t *quarantine; - if (quarantine == QUARANTINE_STATE_REINCARNATED) { - /* - * Another destructor deallocated memory after this destructor - * was called. Reset quarantine to QUARANTINE_STATE_PURGATORY - * in order to receive another callback. - */ - quarantine = QUARANTINE_STATE_PURGATORY; - quarantine_tsd_set(&quarantine); - } else if (quarantine == QUARANTINE_STATE_PURGATORY) { - /* - * The previous time this destructor was called, we set the key - * to QUARANTINE_STATE_PURGATORY so that other destructors - * wouldn't cause re-creation of the quarantine. This time, do - * nothing, so that the destructor will not be called again. - */ - } else if (quarantine != NULL) { - quarantine_drain(quarantine, 0); - idalloc(quarantine); - quarantine = QUARANTINE_STATE_PURGATORY; - quarantine_tsd_set(&quarantine); + if (!config_fill) + return; + + quarantine = tsd_quarantine_get(tsd); + if (quarantine != NULL) { + quarantine_drain(tsd, quarantine, 0); + idalloc(tsd, quarantine); + tsd_quarantine_set(tsd, NULL); } } - -bool -quarantine_boot(void) -{ - - cassert(config_fill); - - if (quarantine_tsd_boot()) - return (true); - - return (false); -} diff --git a/src/rtree.c b/src/rtree.c index 87b0b154..2ff93dbe 100644 --- a/src/rtree.c +++ b/src/rtree.c @@ -9,8 +9,10 @@ rtree_new(unsigned bits, rtree_alloc_t *alloc, rtree_dalloc_t *dalloc) assert(bits > 0 && bits <= (sizeof(uintptr_t) << 3)); - bits_per_level = jemalloc_ffs(pow2_ceil((RTREE_NODESIZE / sizeof(void *)))) - 1; - bits_in_leaf = jemalloc_ffs(pow2_ceil((RTREE_NODESIZE / sizeof(uint8_t)))) - 1; + bits_per_level = jemalloc_ffs(pow2_ceil((RTREE_NODESIZE / sizeof(void + *)))) - 1; + bits_in_leaf = jemalloc_ffs(pow2_ceil((RTREE_NODESIZE / + sizeof(uint8_t)))) - 1; if (bits > bits_in_leaf) { height = 1 + (bits - bits_in_leaf) / bits_per_level; if ((height-1) * bits_per_level + bits_in_leaf != bits) diff --git a/src/tcache.c b/src/tcache.c index f86a46e6..bb4c3cc0 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -4,9 +4,6 @@ /******************************************************************************/ /* Data. */ -malloc_tsd_data(, tcache, tcache_t *, NULL) -malloc_tsd_data(, tcache_enabled, tcache_enabled_t, tcache_enabled_default) - bool opt_tcache = true; ssize_t opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT; @@ -262,43 +259,14 @@ tcache_arena_dissociate(tcache_t *tcache) } tcache_t * -tcache_get_hard(tcache_t *tcache, bool create) +tcache_get_hard(tsd_t *tsd) { - if (tcache == NULL) { - if (create == false) { - /* - * Creating a tcache here would cause - * allocation as a side effect of free(). - * Ordinarily that would be okay since - * tcache_create() failure is a soft failure - * that doesn't propagate. However, if TLS - * data are freed via free() as in glibc, - * subtle corruption could result from setting - * a TLS variable after its backing memory is - * freed. - */ - return (NULL); - } - if (tcache_enabled_get() == false) { - tcache_enabled_set(false); /* Memoize. */ - return (NULL); - } - return (tcache_create(choose_arena(NULL))); - } - if (tcache == TCACHE_STATE_PURGATORY) { - /* - * Make a note that an allocator function was called - * after tcache_thread_cleanup() was called. - */ - tcache = TCACHE_STATE_REINCARNATED; - tcache_tsd_set(&tcache); + if (tcache_enabled_get() == false) { + tcache_enabled_set(false); /* Memoize. */ return (NULL); } - if (tcache == TCACHE_STATE_REINCARNATED) - return (NULL); - not_reached(); - return (NULL); + return (tcache_create(choose_arena(tsd, NULL))); } tcache_t * @@ -328,7 +296,7 @@ tcache_create(arena_t *arena) else if (size <= tcache_maxclass) tcache = (tcache_t *)arena_malloc_large(arena, size, true); else - tcache = (tcache_t *)icalloct(size, false, arena); + tcache = (tcache_t *)icalloct(NULL, size, false, arena); if (tcache == NULL) return (NULL); @@ -343,13 +311,11 @@ tcache_create(arena_t *arena) stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *); } - tcache_tsd_set(&tcache); - return (tcache); } -void -tcache_destroy(tcache_t *tcache) +static void +tcache_destroy(tsd_t *tsd, tcache_t *tcache) { unsigned i; size_t tcache_size; @@ -403,39 +369,30 @@ tcache_destroy(tcache_t *tcache) arena_dalloc_large(arena, chunk, tcache); } else - idalloct(tcache, false); + idalloct(tsd, tcache, false); } void -tcache_thread_cleanup(void *arg) +tcache_cleanup(tsd_t *tsd) { - tcache_t *tcache = *(tcache_t **)arg; + tcache_t *tcache; - if (tcache == TCACHE_STATE_DISABLED) { - /* Do nothing. */ - } else if (tcache == TCACHE_STATE_REINCARNATED) { - /* - * Another destructor called an allocator function after this - * destructor was called. Reset tcache to - * TCACHE_STATE_PURGATORY in order to receive another callback. - */ - tcache = TCACHE_STATE_PURGATORY; - tcache_tsd_set(&tcache); - } else if (tcache == TCACHE_STATE_PURGATORY) { - /* - * The previous time this destructor was called, we set the key - * to TCACHE_STATE_PURGATORY so that other destructors wouldn't - * cause re-creation of the tcache. This time, do nothing, so - * that the destructor will not be called again. - */ - } else if (tcache != NULL) { - assert(tcache != TCACHE_STATE_PURGATORY); - tcache_destroy(tcache); - tcache = TCACHE_STATE_PURGATORY; - tcache_tsd_set(&tcache); + if (!config_tcache) + return; + + if ((tcache = tsd_tcache_get(tsd)) != NULL) { + tcache_destroy(tsd, tcache); + tsd_tcache_set(tsd, NULL); } } +void +tcache_enabled_cleanup(tsd_t *tsd) +{ + + /* Do nothing. */ +} + /* Caller must own arena->lock. */ void tcache_stats_merge(tcache_t *tcache, arena_t *arena) @@ -464,7 +421,7 @@ tcache_stats_merge(tcache_t *tcache, arena_t *arena) } bool -tcache_boot0(void) +tcache_boot(void) { unsigned i; @@ -504,13 +461,3 @@ tcache_boot0(void) return (false); } - -bool -tcache_boot1(void) -{ - - if (tcache_tsd_boot() || tcache_enabled_tsd_boot()) - return (true); - - return (false); -} diff --git a/src/tsd.c b/src/tsd.c index 700caabf..27a70ee8 100644 --- a/src/tsd.c +++ b/src/tsd.c @@ -7,6 +7,8 @@ static unsigned ncleanups; static malloc_tsd_cleanup_t cleanups[MALLOC_TSD_CLEANUPS_MAX]; +malloc_tsd_data(, , tsd_t, TSD_INITIALIZER) + /******************************************************************************/ void * @@ -14,14 +16,15 @@ malloc_tsd_malloc(size_t size) { /* Avoid choose_arena() in order to dodge bootstrapping issues. */ - return (arena_malloc(arenas[0], size, false, false)); + return (arena_malloc(NULL, arenas[0], CACHELINE_CEILING(size), false, + false)); } void malloc_tsd_dalloc(void *wrapper) { - idalloct(wrapper, false); + idalloct(NULL, wrapper, false); } void @@ -67,10 +70,54 @@ malloc_tsd_cleanup_register(bool (*f)(void)) } void +tsd_cleanup(void *arg) +{ + tsd_t *tsd = (tsd_t *)arg; + + if (tsd == NULL) { + /* OOM during re-initialization. */ + return; + } + + switch (tsd->state) { + case tsd_state_nominal: +#define O(n, t) \ + n##_cleanup(tsd); +MALLOC_TSD +#undef O + tsd->state = tsd_state_purgatory; + tsd_set(tsd); + break; + case tsd_state_purgatory: + /* + * The previous time this destructor was called, we set the + * state to tsd_state_purgatory so that other destructors + * wouldn't cause re-creation of the tsd. This time, do + * nothing, and do not request another callback. + */ + break; + case tsd_state_reincarnated: + /* + * Another destructor deallocated memory after this destructor + * was called. Reset state to tsd_state_purgatory and request + * another callback. + */ + tsd->state = tsd_state_purgatory; + tsd_set(tsd); + break; + default: + not_reached(); + } +} + +bool malloc_tsd_boot(void) { ncleanups = 0; + if (tsd_boot()) + return (true); + return (false); } #ifdef _WIN32 diff --git a/test/unit/ckh.c b/test/unit/ckh.c index b214c279..148b81e7 100644 --- a/test/unit/ckh.c +++ b/test/unit/ckh.c @@ -2,20 +2,25 @@ TEST_BEGIN(test_new_delete) { + tsd_t *tsd; ckh_t ckh; - assert_false(ckh_new(&ckh, 2, ckh_string_hash, ckh_string_keycomp), - "Unexpected ckh_new() error"); - ckh_delete(&ckh); + tsd = tsd_tryget(); + assert_ptr_not_null(tsd, "Unexpected tsd failure"); - assert_false(ckh_new(&ckh, 3, ckh_pointer_hash, ckh_pointer_keycomp), + assert_false(ckh_new(tsd, &ckh, 2, ckh_string_hash, ckh_string_keycomp), "Unexpected ckh_new() error"); - ckh_delete(&ckh); + ckh_delete(tsd, &ckh); + + assert_false(ckh_new(tsd, &ckh, 3, ckh_pointer_hash, + ckh_pointer_keycomp), "Unexpected ckh_new() error"); + ckh_delete(tsd, &ckh); } TEST_END TEST_BEGIN(test_count_insert_search_remove) { + tsd_t *tsd; ckh_t ckh; const char *strs[] = { "a string", @@ -26,7 +31,10 @@ TEST_BEGIN(test_count_insert_search_remove) const char *missing = "A string not in the hash table."; size_t i; - assert_false(ckh_new(&ckh, 2, ckh_string_hash, ckh_string_keycomp), + tsd = tsd_tryget(); + assert_ptr_not_null(tsd, "Unexpected tsd failure"); + + assert_false(ckh_new(tsd, &ckh, 2, ckh_string_hash, ckh_string_keycomp), "Unexpected ckh_new() error"); assert_zu_eq(ckh_count(&ckh), 0, "ckh_count() should return %zu, but it returned %zu", ZU(0), @@ -34,7 +42,7 @@ TEST_BEGIN(test_count_insert_search_remove) /* Insert. */ for (i = 0; i < sizeof(strs)/sizeof(const char *); i++) { - ckh_insert(&ckh, strs[i], strs[i]); + ckh_insert(tsd, &ckh, strs[i], strs[i]); assert_zu_eq(ckh_count(&ckh), i+1, "ckh_count() should return %zu, but it returned %zu", i+1, ckh_count(&ckh)); @@ -79,7 +87,7 @@ TEST_BEGIN(test_count_insert_search_remove) vp = (i & 2) ? &v.p : NULL; k.p = NULL; v.p = NULL; - assert_false(ckh_remove(&ckh, strs[i], kp, vp), + assert_false(ckh_remove(tsd, &ckh, strs[i], kp, vp), "Unexpected ckh_remove() error"); ks = (i & 1) ? strs[i] : (const char *)NULL; @@ -95,20 +103,24 @@ TEST_BEGIN(test_count_insert_search_remove) ckh_count(&ckh)); } - ckh_delete(&ckh); + ckh_delete(tsd, &ckh); } TEST_END TEST_BEGIN(test_insert_iter_remove) { #define NITEMS ZU(1000) + tsd_t *tsd; ckh_t ckh; void **p[NITEMS]; void *q, *r; size_t i; - assert_false(ckh_new(&ckh, 2, ckh_pointer_hash, ckh_pointer_keycomp), - "Unexpected ckh_new() error"); + tsd = tsd_tryget(); + assert_ptr_not_null(tsd, "Unexpected tsd failure"); + + assert_false(ckh_new(tsd, &ckh, 2, ckh_pointer_hash, + ckh_pointer_keycomp), "Unexpected ckh_new() error"); for (i = 0; i < NITEMS; i++) { p[i] = mallocx(i+1, 0); @@ -119,7 +131,7 @@ TEST_BEGIN(test_insert_iter_remove) size_t j; for (j = i; j < NITEMS; j++) { - assert_false(ckh_insert(&ckh, p[j], p[j]), + assert_false(ckh_insert(tsd, &ckh, p[j], p[j]), "Unexpected ckh_insert() failure"); assert_false(ckh_search(&ckh, p[j], &q, &r), "Unexpected ckh_search() failure"); @@ -134,13 +146,13 @@ TEST_BEGIN(test_insert_iter_remove) for (j = i + 1; j < NITEMS; j++) { assert_false(ckh_search(&ckh, p[j], NULL, NULL), "Unexpected ckh_search() failure"); - assert_false(ckh_remove(&ckh, p[j], &q, &r), + assert_false(ckh_remove(tsd, &ckh, p[j], &q, &r), "Unexpected ckh_remove() failure"); assert_ptr_eq(p[j], q, "Key pointer mismatch"); assert_ptr_eq(p[j], r, "Value pointer mismatch"); assert_true(ckh_search(&ckh, p[j], NULL, NULL), "Unexpected ckh_search() success"); - assert_true(ckh_remove(&ckh, p[j], &q, &r), + assert_true(ckh_remove(tsd, &ckh, p[j], &q, &r), "Unexpected ckh_remove() success"); } @@ -176,13 +188,13 @@ TEST_BEGIN(test_insert_iter_remove) for (i = 0; i < NITEMS; i++) { assert_false(ckh_search(&ckh, p[i], NULL, NULL), "Unexpected ckh_search() failure"); - assert_false(ckh_remove(&ckh, p[i], &q, &r), + assert_false(ckh_remove(tsd, &ckh, p[i], &q, &r), "Unexpected ckh_remove() failure"); assert_ptr_eq(p[i], q, "Key pointer mismatch"); assert_ptr_eq(p[i], r, "Value pointer mismatch"); assert_true(ckh_search(&ckh, p[i], NULL, NULL), "Unexpected ckh_search() success"); - assert_true(ckh_remove(&ckh, p[i], &q, &r), + assert_true(ckh_remove(tsd, &ckh, p[i], &q, &r), "Unexpected ckh_remove() success"); dallocx(p[i], 0); } @@ -190,7 +202,7 @@ TEST_BEGIN(test_insert_iter_remove) assert_zu_eq(ckh_count(&ckh), 0, "ckh_count() should return %zu, but it returned %zu", ZU(0), ckh_count(&ckh)); - ckh_delete(&ckh); + ckh_delete(tsd, &ckh); #undef NITEMS } TEST_END diff --git a/test/unit/rtree.c b/test/unit/rtree.c index 5463055f..77a947d6 100644 --- a/test/unit/rtree.c +++ b/test/unit/rtree.c @@ -5,7 +5,7 @@ TEST_BEGIN(test_rtree_get_empty) unsigned i; for (i = 1; i <= (sizeof(uintptr_t) << 3); i++) { - rtree_t *rtree = rtree_new(i, imalloc, idalloc); + rtree_t *rtree = rtree_new(i, malloc, free); assert_u_eq(rtree_get(rtree, 0), 0, "rtree_get() should return NULL for empty tree"); rtree_delete(rtree); @@ -18,7 +18,7 @@ TEST_BEGIN(test_rtree_extrema) unsigned i; for (i = 1; i <= (sizeof(uintptr_t) << 3); i++) { - rtree_t *rtree = rtree_new(i, imalloc, idalloc); + rtree_t *rtree = rtree_new(i, malloc, free); rtree_set(rtree, 0, 1); assert_u_eq(rtree_get(rtree, 0), 1, @@ -40,7 +40,7 @@ TEST_BEGIN(test_rtree_bits) for (i = 1; i < (sizeof(uintptr_t) << 3); i++) { uintptr_t keys[] = {0, 1, (((uintptr_t)1) << (sizeof(uintptr_t)*8-i)) - 1}; - rtree_t *rtree = rtree_new(i, imalloc, idalloc); + rtree_t *rtree = rtree_new(i, malloc, free); for (j = 0; j < sizeof(keys)/sizeof(uintptr_t); j++) { rtree_set(rtree, keys[j], 1); @@ -73,7 +73,7 @@ TEST_BEGIN(test_rtree_random) sfmt = init_gen_rand(SEED); for (i = 1; i <= (sizeof(uintptr_t) << 3); i++) { - rtree_t *rtree = rtree_new(i, imalloc, idalloc); + rtree_t *rtree = rtree_new(i, malloc, free); uintptr_t keys[NSET]; unsigned j; diff --git a/test/unit/tsd.c b/test/unit/tsd.c index f421c1a3..391a7807 100644 --- a/test/unit/tsd.c +++ b/test/unit/tsd.c @@ -16,11 +16,11 @@ data_cleanup(void *arg) data_cleanup_executed = true; } -malloc_tsd_protos(, data, data_t) -malloc_tsd_externs(data, data_t) +malloc_tsd_protos(, data_, data_t) +malloc_tsd_externs(data_, data_t) #define DATA_INIT 0x12345678 -malloc_tsd_data(, data, data_t, DATA_INIT) -malloc_tsd_funcs(, data, data_t, DATA_INIT, data_cleanup) +malloc_tsd_data(, data_, data_t, DATA_INIT) +malloc_tsd_funcs(, data_, data_t, DATA_INIT, data_cleanup) static void * thd_start(void *arg) From eb5376ab9e61d96daa0d1f03b4474baf5232478f Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 23 Sep 2014 09:21:49 -0700 Subject: [PATCH 0531/3378] Add instructions for installing from non-packaged sources. --- INSTALL | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/INSTALL b/INSTALL index 6c46100e..9af23369 100644 --- a/INSTALL +++ b/INSTALL @@ -1,10 +1,23 @@ -Building and installing jemalloc can be as simple as typing the following while -in the root directory of the source tree: +Building and installing a packaged release of jemalloc can be as simple as +typing the following while in the root directory of the source tree: ./configure make make install +If building from unpackaged developer sources, the simplest command sequence +that might work is: + + ./autogen.sh + make dist + make + make install + +Note that documentation is not built by the default target because doing so +would create a dependency on xsltproc in packaged releases, hence the +requirement to either run 'make dist' or avoid installing docs via the various +install_* targets documented below. + === Advanced configuration ===================================================== The 'configure' script supports numerous options that allow control of which From 70bdee07d9e3942580e576b94010108c342d609d Mon Sep 17 00:00:00 2001 From: Dave Rigby Date: Mon, 22 Sep 2014 15:53:16 +0100 Subject: [PATCH 0532/3378] autoconf: Support cygwin in addition to mingw --- configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 2d5b56a4..ab4bcc39 100644 --- a/configure.ac +++ b/configure.ac @@ -330,7 +330,7 @@ case "${host}" in fi abi="xcoff" ;; - *-*-mingw*) + *-*-mingw* | *-*-cygwin*) abi="pecoff" force_tls="0" RPATH="" From 112704cfbfacfc9cecdfb732741df47eb4133902 Mon Sep 17 00:00:00 2001 From: Dave Rigby Date: Mon, 22 Sep 2014 15:54:33 +0100 Subject: [PATCH 0533/3378] Use MSVC intrinsics for lg_floor When using MSVC make use of its intrinsic functions (supported on x86, amd64 & ARM) for lg_floor. --- include/jemalloc/internal/util.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h index cc7806d0..5af68329 100644 --- a/include/jemalloc/internal/util.h +++ b/include/jemalloc/internal/util.h @@ -176,6 +176,21 @@ lg_floor(size_t x) ); return (ret); } +#elif (defined(_MSC_VER)) +JEMALLOC_INLINE size_t +lg_floor(size_t x) +{ + unsigned long ret; + +#if (LG_SIZEOF_PTR == 3) + _BitScanReverse64(&ret, x); +#elif (LG_SIZEOF_PTR == 2) + _BitScanReverse(&ret, x); +#else +# error "Unsupported type sizes for lg_floor()" +#endif + return (ret); +} #elif (defined(JEMALLOC_HAVE_BUILTIN_CLZ)) JEMALLOC_INLINE size_t lg_floor(size_t x) From 6ef80d68f092caf3b3802a73b8d716057b41864c Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 24 Sep 2014 22:14:21 -0700 Subject: [PATCH 0534/3378] Fix profile dumping race. Fix a race that caused a non-critical assertion failure. To trigger the race, a thread had to be part way through initializing a new sample, such that it was discoverable by the dumping thread, but not yet linked into its gctx by the time a later dump phase would normally have reset its state to 'nominal'. Additionally, lock access to the state field during modification to transition to the dumping state. It's not apparent that this oversight could have caused an actual problem due to outer locking that protects the dumping machinery, but the added locking pedantically follows the stated locking protocol for the state field. --- include/jemalloc/internal/prof.h | 1 + src/prof.c | 10 +++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index b8a8b419..3872c7ae 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -79,6 +79,7 @@ struct prof_cnt_s { }; typedef enum { + prof_tctx_state_initializing, prof_tctx_state_nominal, prof_tctx_state_dumping, prof_tctx_state_purgatory /* Dumper must finish destroying. */ diff --git a/src/prof.c b/src/prof.c index dd84f533..9f10b533 100644 --- a/src/prof.c +++ b/src/prof.c @@ -717,7 +717,7 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) memset(&ret.p->cnts, 0, sizeof(prof_cnt_t)); ret.p->gctx = gctx; ret.p->prepared = true; - ret.p->state = prof_tctx_state_nominal; + ret.p->state = prof_tctx_state_initializing; malloc_mutex_lock(tdata->lock); error = ckh_insert(tsd, &tdata->bt2tctx, btkey, ret.v); malloc_mutex_unlock(tdata->lock); @@ -728,6 +728,7 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) return (NULL); } malloc_mutex_lock(gctx->lock); + ret.p->state = prof_tctx_state_nominal; tctx_tree_insert(&gctx->tctxs, ret.p); gctx->nlimbo--; malloc_mutex_unlock(gctx->lock); @@ -925,8 +926,15 @@ static void prof_tctx_merge_tdata(prof_tctx_t *tctx, prof_tdata_t *tdata) { + malloc_mutex_lock(tctx->gctx->lock); + if (tctx->state == prof_tctx_state_initializing) { + malloc_mutex_unlock(tctx->gctx->lock); + return; + } assert(tctx->state == prof_tctx_state_nominal); tctx->state = prof_tctx_state_dumping; + malloc_mutex_unlock(tctx->gctx->lock); + memcpy(&tctx->dump_cnts, &tctx->cnts, sizeof(prof_cnt_t)); tdata->cnt_summed.curobjs += tctx->dump_cnts.curobjs; From f97e5ac4ec8a5ae7ed74829e6c1bf6ce814947f5 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sun, 28 Sep 2014 14:43:11 -0700 Subject: [PATCH 0535/3378] Implement compile-time bitmap size computation. --- include/jemalloc/internal/bitmap.h | 46 ++++++++++++++++++++++++++++++ src/bitmap.c | 18 ++---------- test/unit/bitmap.c | 16 ++++------- 3 files changed, 54 insertions(+), 26 deletions(-) diff --git a/include/jemalloc/internal/bitmap.h b/include/jemalloc/internal/bitmap.h index 6db4ab70..4ca40ffd 100644 --- a/include/jemalloc/internal/bitmap.h +++ b/include/jemalloc/internal/bitmap.h @@ -3,6 +3,7 @@ /* Maximum bitmap bit count is 2^LG_BITMAP_MAXBITS. */ #define LG_BITMAP_MAXBITS LG_RUN_MAXREGS +#define BITMAP_MAXBITS (ZU(1) << LG_BITMAP_MAXBITS) typedef struct bitmap_level_s bitmap_level_t; typedef struct bitmap_info_s bitmap_info_t; @@ -14,6 +15,51 @@ typedef unsigned long bitmap_t; #define BITMAP_GROUP_NBITS (ZU(1) << LG_BITMAP_GROUP_NBITS) #define BITMAP_GROUP_NBITS_MASK (BITMAP_GROUP_NBITS-1) +/* Number of groups required to store a given number of bits. */ +#define BITMAP_BITS2GROUPS(nbits) \ + ((nbits + BITMAP_GROUP_NBITS_MASK) >> LG_BITMAP_GROUP_NBITS) + +/* + * Number of groups required at a particular level for a given number of bits. + */ +#define BITMAP_GROUPS_L0(nbits) \ + BITMAP_BITS2GROUPS(nbits) +#define BITMAP_GROUPS_L1(nbits) \ + BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(nbits)) +#define BITMAP_GROUPS_L2(nbits) \ + BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS((nbits)))) +#define BITMAP_GROUPS_L3(nbits) \ + BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS( \ + BITMAP_BITS2GROUPS((nbits))))) + +/* + * Assuming the number of levels, number of groups required for a given number + * of bits. + */ +#define BITMAP_GROUPS_1_LEVEL(nbits) \ + BITMAP_GROUPS_L0(nbits) +#define BITMAP_GROUPS_2_LEVEL(nbits) \ + (BITMAP_GROUPS_1_LEVEL(nbits) + BITMAP_GROUPS_L1(nbits)) +#define BITMAP_GROUPS_3_LEVEL(nbits) \ + (BITMAP_GROUPS_2_LEVEL(nbits) + BITMAP_GROUPS_L2(nbits)) +#define BITMAP_GROUPS_4_LEVEL(nbits) \ + (BITMAP_GROUPS_3_LEVEL(nbits) + BITMAP_GROUPS_L3(nbits)) + +/* + * Maximum number of groups required to support LG_BITMAP_MAXBITS. + */ +#if LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS +# define BITMAP_GROUPS_MAX BITMAP_GROUPS_1_LEVEL(BITMAP_MAXBITS) +#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 2 +# define BITMAP_GROUPS_MAX BITMAP_GROUPS_2_LEVEL(BITMAP_MAXBITS) +#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 3 +# define BITMAP_GROUPS_MAX BITMAP_GROUPS_3_LEVEL(BITMAP_MAXBITS) +#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 4 +# define BITMAP_GROUPS_MAX BITMAP_GROUPS_4_LEVEL(BITMAP_MAXBITS) +#else +# error "Unsupported bitmap size" +#endif + /* Maximum number of levels possible. */ #define BITMAP_MAX_LEVELS \ (LG_BITMAP_MAXBITS / LG_SIZEOF_BITMAP) \ diff --git a/src/bitmap.c b/src/bitmap.c index e2bd907d..c733372b 100644 --- a/src/bitmap.c +++ b/src/bitmap.c @@ -2,19 +2,6 @@ #include "jemalloc/internal/jemalloc_internal.h" /******************************************************************************/ -/* Function prototypes for non-inline static functions. */ - -static size_t bits2groups(size_t nbits); - -/******************************************************************************/ - -static size_t -bits2groups(size_t nbits) -{ - - return ((nbits >> LG_BITMAP_GROUP_NBITS) + - !!(nbits & BITMAP_GROUP_NBITS_MASK)); -} void bitmap_info_init(bitmap_info_t *binfo, size_t nbits) @@ -31,15 +18,16 @@ bitmap_info_init(bitmap_info_t *binfo, size_t nbits) * that requires only one group. */ binfo->levels[0].group_offset = 0; - group_count = bits2groups(nbits); + group_count = BITMAP_BITS2GROUPS(nbits); for (i = 1; group_count > 1; i++) { assert(i < BITMAP_MAX_LEVELS); binfo->levels[i].group_offset = binfo->levels[i-1].group_offset + group_count; - group_count = bits2groups(group_count); + group_count = BITMAP_BITS2GROUPS(group_count); } binfo->levels[i].group_offset = binfo->levels[i-1].group_offset + group_count; + assert(binfo->levels[i].group_offset <= BITMAP_GROUPS_MAX); binfo->nlevels = i; binfo->nbits = nbits; } diff --git a/test/unit/bitmap.c b/test/unit/bitmap.c index 8086b888..4ea94f85 100644 --- a/test/unit/bitmap.c +++ b/test/unit/bitmap.c @@ -1,17 +1,11 @@ #include "test/jemalloc_test.h" -#if (LG_BITMAP_MAXBITS > 12) -# define MAXBITS 4500 -#else -# define MAXBITS (1U << LG_BITMAP_MAXBITS) -#endif - TEST_BEGIN(test_bitmap_size) { size_t i, prev_size; prev_size = 0; - for (i = 1; i <= MAXBITS; i++) { + for (i = 1; i <= BITMAP_MAXBITS; i++) { size_t size = bitmap_size(i); assert_true(size >= prev_size, "Bitmap size is smaller than expected"); @@ -24,7 +18,7 @@ TEST_BEGIN(test_bitmap_init) { size_t i; - for (i = 1; i <= MAXBITS; i++) { + for (i = 1; i <= BITMAP_MAXBITS; i++) { bitmap_info_t binfo; bitmap_info_init(&binfo, i); { @@ -47,7 +41,7 @@ TEST_BEGIN(test_bitmap_set) { size_t i; - for (i = 1; i <= MAXBITS; i++) { + for (i = 1; i <= BITMAP_MAXBITS; i++) { bitmap_info_t binfo; bitmap_info_init(&binfo, i); { @@ -70,7 +64,7 @@ TEST_BEGIN(test_bitmap_unset) { size_t i; - for (i = 1; i <= MAXBITS; i++) { + for (i = 1; i <= BITMAP_MAXBITS; i++) { bitmap_info_t binfo; bitmap_info_init(&binfo, i); { @@ -99,7 +93,7 @@ TEST_BEGIN(test_bitmap_sfu) { size_t i; - for (i = 1; i <= MAXBITS; i++) { + for (i = 1; i <= BITMAP_MAXBITS; i++) { bitmap_info_t binfo; bitmap_info_init(&binfo, i); { From 0c5dd03e889d0269170b5db9fa872738d906eb78 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 29 Sep 2014 01:31:39 -0700 Subject: [PATCH 0536/3378] Move small run metadata into the arena chunk header. Move small run metadata into the arena chunk header, with multiple expected benefits: - Lower run fragmentation due to reduced run sizes; runs are more likely to completely drain when there are fewer total regions. - Improved cache behavior. Prior to this change, run headers were always page-aligned, which put extra pressure on some CPU cache sets. The degree to which this was a problem was hardware dependent, but it likely hurt some even for the most advanced modern hardware. - Buffer overruns/underruns are less likely to corrupt allocator metadata. - Size classes between 4 KiB and 16 KiB become reasonable to support without any special handling, and the runs are small enough that dirty unused pages aren't a significant concern. --- include/jemalloc/internal/arena.h | 144 ++++---- include/jemalloc/internal/private_symbols.txt | 3 + src/arena.c | 345 ++++++++---------- 3 files changed, 232 insertions(+), 260 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index f1a12057..48fd2055 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -1,30 +1,8 @@ /******************************************************************************/ #ifdef JEMALLOC_H_TYPES -/* - * RUN_MAX_OVRHD indicates maximum desired run header overhead. Runs are sized - * as small as possible such that this setting is still honored, without - * violating other constraints. The goal is to make runs as small as possible - * without exceeding a per run external fragmentation threshold. - * - * We use binary fixed point math for overhead computations, where the binary - * point is implicitly RUN_BFP bits to the left. - * - * Note that it is possible to set RUN_MAX_OVRHD low enough that it cannot be - * honored for some/all object sizes, since when heap profiling is enabled - * there is one pointer of header overhead per object (plus a constant). This - * constraint is relaxed (ignored) for runs that are so small that the - * per-region overhead is greater than: - * - * (RUN_MAX_OVRHD / (reg_interval << (3+RUN_BFP)) - */ -#define RUN_BFP 12 -/* \/ Implicit binary fixed point. */ -#define RUN_MAX_OVRHD 0x0000003dU -#define RUN_MAX_OVRHD_RELAX 0x00001800U - /* Maximum number of regions in one run. */ -#define LG_RUN_MAXREGS 11 +#define LG_RUN_MAXREGS (LG_PAGE - LG_TINY_MIN) #define RUN_MAXREGS (1U << LG_RUN_MAXREGS) /* @@ -43,10 +21,10 @@ */ #define LG_DIRTY_MULT_DEFAULT 3 +typedef struct arena_run_s arena_run_t; typedef struct arena_chunk_map_bits_s arena_chunk_map_bits_t; typedef struct arena_chunk_map_misc_s arena_chunk_map_misc_t; typedef struct arena_chunk_s arena_chunk_t; -typedef struct arena_run_s arena_run_t; typedef struct arena_bin_info_s arena_bin_info_t; typedef struct arena_bin_s arena_bin_t; typedef struct arena_s arena_t; @@ -55,6 +33,20 @@ typedef struct arena_s arena_t; /******************************************************************************/ #ifdef JEMALLOC_H_STRUCTS +struct arena_run_s { + /* Bin this run is associated with. */ + arena_bin_t *bin; + + /* Index of next region that has never been allocated, or nregs. */ + uint32_t nextind; + + /* Number of free regions in run. */ + unsigned nfree; + + /* Per region allocated/deallocated bitmap. */ + bitmap_t bitmap[BITMAP_GROUPS_MAX]; +}; + /* Each element of the chunk map corresponds to one page within the chunk. */ struct arena_chunk_map_bits_s { /* @@ -130,15 +122,6 @@ struct arena_chunk_map_bits_s { * chunk header in order to improve cache locality. */ struct arena_chunk_map_misc_s { -#ifndef JEMALLOC_PROF - /* - * Overlay prof_tctx in order to allow it to be referenced by dead code. - * Such antics aren't warranted for per arena data structures, but - * chunk map overhead accounts for a percentage of memory, rather than - * being just a fixed cost. - */ - union { -#endif /* * Linkage for run trees. There are two disjoint uses: * @@ -146,16 +129,18 @@ struct arena_chunk_map_misc_s { * 2) arena_run_t conceptually uses this linkage for in-use non-full * runs, rather than directly embedding linkage. */ - rb_node(arena_chunk_map_misc_t) rb_link; + rb_node(arena_chunk_map_misc_t) rb_link; - /* Profile counters, used for large object runs. */ - prof_tctx_t *prof_tctx; -#ifndef JEMALLOC_PROF - }; /* union { ... }; */ -#endif + union { + /* Linkage for list of dirty runs. */ + ql_elm(arena_chunk_map_misc_t) dr_link; - /* Linkage for list of dirty runs. */ - ql_elm(arena_chunk_map_misc_t) dr_link; + /* Profile counters, used for large object runs. */ + prof_tctx_t *prof_tctx; + + /* Small region run metadata. */ + arena_run_t run; + }; }; typedef rb_tree(arena_chunk_map_misc_t) arena_avail_tree_t; typedef rb_tree(arena_chunk_map_misc_t) arena_run_tree_t; @@ -175,17 +160,6 @@ struct arena_chunk_s { arena_chunk_map_bits_t map_bits[1]; /* Dynamically sized. */ }; -struct arena_run_s { - /* Bin this run is associated with. */ - arena_bin_t *bin; - - /* Index of next region that has never been allocated, or nregs. */ - uint32_t nextind; - - /* Number of free regions in run. */ - unsigned nfree; -}; - /* * Read-only information associated with each element of arena_t's bins array * is stored separately, partly to reduce memory usage (only one copy, rather @@ -194,10 +168,7 @@ struct arena_run_s { * Each run has the following layout: * * /--------------------\ - * | arena_run_t header | - * | ... | - * bitmap_offset | bitmap | - * | ... | + * | pad? | * |--------------------| * | redzone | * reg0_offset | region 0 | @@ -238,12 +209,6 @@ struct arena_bin_info_s { /* Total number of regions in a run for this bin's size class. */ uint32_t nregs; - /* - * Offset of first bitmap_t element in a run header for this bin's size - * class. - */ - uint32_t bitmap_offset; - /* * Metadata used to manipulate bitmaps for runs associated with this * bin. @@ -451,6 +416,9 @@ arena_chunk_map_bits_t *arena_bitselm_get(arena_chunk_t *chunk, size_t pageind); arena_chunk_map_misc_t *arena_miscelm_get(arena_chunk_t *chunk, size_t pageind); +size_t arena_miscelm_to_pageind(arena_chunk_map_misc_t *miscelm); +void *arena_miscelm_to_rpages(arena_chunk_map_misc_t *miscelm); +arena_chunk_map_misc_t *arena_run_to_miscelm(arena_run_t *run); size_t *arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind); size_t arena_mapbitsp_read(size_t *mapbitsp); size_t arena_mapbits_get(arena_chunk_t *chunk, size_t pageind); @@ -659,6 +627,40 @@ arena_miscelm_get(arena_chunk_t *chunk, size_t pageind) (uintptr_t)map_misc_offset) + pageind-map_bias); } +JEMALLOC_ALWAYS_INLINE size_t +arena_miscelm_to_pageind(arena_chunk_map_misc_t *miscelm) +{ + arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm); + size_t pageind = ((uintptr_t)miscelm - ((uintptr_t)chunk + + map_misc_offset)) / sizeof(arena_chunk_map_misc_t) + map_bias; + + assert(pageind >= map_bias); + assert(pageind < chunk_npages); + + return (pageind); +} + +JEMALLOC_ALWAYS_INLINE void * +arena_miscelm_to_rpages(arena_chunk_map_misc_t *miscelm) +{ + arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm); + size_t pageind = arena_miscelm_to_pageind(miscelm); + + return ((void *)((uintptr_t)chunk + (pageind << LG_PAGE))); +} + +JEMALLOC_ALWAYS_INLINE arena_chunk_map_misc_t * +arena_run_to_miscelm(arena_run_t *run) +{ + arena_chunk_map_misc_t *miscelm = (arena_chunk_map_misc_t + *)((uintptr_t)run - offsetof(arena_chunk_map_misc_t, run)); + + assert(arena_miscelm_to_pageind(miscelm) >= map_bias); + assert(arena_miscelm_to_pageind(miscelm) < chunk_npages); + + return (miscelm); +} + JEMALLOC_ALWAYS_INLINE size_t * arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind) { @@ -903,10 +905,13 @@ arena_ptr_small_binind_get(const void *ptr, size_t mapbits) arena_t *arena; size_t pageind; size_t actual_mapbits; + size_t rpages_ind; arena_run_t *run; arena_bin_t *bin; size_t actual_binind; arena_bin_info_t *bin_info; + arena_chunk_map_misc_t *miscelm; + void *rpages; assert(binind != BININD_INVALID); assert(binind < NBINS); @@ -917,13 +922,16 @@ arena_ptr_small_binind_get(const void *ptr, size_t mapbits) assert(mapbits == actual_mapbits); assert(arena_mapbits_large_get(chunk, pageind) == 0); assert(arena_mapbits_allocated_get(chunk, pageind) != 0); - run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - - (actual_mapbits >> LG_PAGE)) << LG_PAGE)); + rpages_ind = pageind - arena_mapbits_small_runind_get(chunk, + pageind); + miscelm = arena_miscelm_get(chunk, rpages_ind); + run = &miscelm->run; bin = run->bin; actual_binind = bin - arena->bins; assert(binind == actual_binind); bin_info = &arena_bin_info[actual_binind]; - assert(((uintptr_t)ptr - ((uintptr_t)run + + rpages = arena_miscelm_to_rpages(miscelm); + assert(((uintptr_t)ptr - ((uintptr_t)rpages + (uintptr_t)bin_info->reg0_offset)) % bin_info->reg_interval == 0); } @@ -946,19 +954,21 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) { unsigned shift, diff, regind; size_t interval; + arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run); + void *rpages = arena_miscelm_to_rpages(miscelm); /* * Freeing a pointer lower than region zero can cause assertion * failure. */ - assert((uintptr_t)ptr >= (uintptr_t)run + + assert((uintptr_t)ptr >= (uintptr_t)rpages + (uintptr_t)bin_info->reg0_offset); /* * Avoid doing division with a variable divisor if possible. Using * actual division here can reduce allocator throughput by over 20%! */ - diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - + diff = (unsigned)((uintptr_t)ptr - (uintptr_t)rpages - bin_info->reg0_offset); /* Rescale (factor powers of 2 out of the numerator and denominator). */ diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index 84d48d19..5ac82f59 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -42,6 +42,8 @@ arena_mapbitsp_read arena_mapbitsp_write arena_maxclass arena_miscelm_get +arena_miscelm_to_pageind +arena_miscelm_to_rpages arena_new arena_palloc arena_postfork_child @@ -61,6 +63,7 @@ arena_ralloc_junk_large arena_ralloc_no_move arena_redzone_corruption arena_run_regind +arena_run_to_miscelm arena_salloc arena_sdalloc arena_stats_merge diff --git a/src/arena.c b/src/arena.c index 40da9f47..ef391b16 100644 --- a/src/arena.c +++ b/src/arena.c @@ -60,15 +60,6 @@ static void arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk, /******************************************************************************/ -JEMALLOC_INLINE_C size_t -arena_miscelm_to_pageind(arena_chunk_map_misc_t *miscelm) -{ - size_t offset = CHUNK_ADDR2OFFSET(miscelm); - - return ((offset - map_misc_offset) / sizeof(arena_chunk_map_misc_t) + - map_bias); -} - JEMALLOC_INLINE_C size_t arena_miscelm_to_bits(arena_chunk_map_misc_t *miscelm) { @@ -183,14 +174,16 @@ arena_run_reg_alloc(arena_run_t *run, arena_bin_info_t *bin_info) { void *ret; unsigned regind; - bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run + - (uintptr_t)bin_info->bitmap_offset); + arena_chunk_map_misc_t *miscelm; + void *rpages; assert(run->nfree > 0); - assert(bitmap_full(bitmap, &bin_info->bitmap_info) == false); + assert(bitmap_full(run->bitmap, &bin_info->bitmap_info) == false); - regind = bitmap_sfu(bitmap, &bin_info->bitmap_info); - ret = (void *)((uintptr_t)run + (uintptr_t)bin_info->reg0_offset + + regind = bitmap_sfu(run->bitmap, &bin_info->bitmap_info); + miscelm = arena_run_to_miscelm(run); + rpages = arena_miscelm_to_rpages(miscelm); + ret = (void *)((uintptr_t)rpages + (uintptr_t)bin_info->reg0_offset + (uintptr_t)(bin_info->reg_interval * regind)); run->nfree--; if (regind == run->nextind) @@ -208,20 +201,20 @@ arena_run_reg_dalloc(arena_run_t *run, void *ptr) size_t binind = arena_ptr_small_binind_get(ptr, mapbits); arena_bin_info_t *bin_info = &arena_bin_info[binind]; unsigned regind = arena_run_regind(run, bin_info, ptr); - bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run + - (uintptr_t)bin_info->bitmap_offset); assert(run->nfree < bin_info->nregs); /* Freeing an interior pointer can cause assertion failure. */ - assert(((uintptr_t)ptr - ((uintptr_t)run + + assert(((uintptr_t)ptr - + ((uintptr_t)arena_miscelm_to_rpages(arena_run_to_miscelm(run)) + (uintptr_t)bin_info->reg0_offset)) % (uintptr_t)bin_info->reg_interval == 0); - assert((uintptr_t)ptr >= (uintptr_t)run + + assert((uintptr_t)ptr >= + (uintptr_t)arena_miscelm_to_rpages(arena_run_to_miscelm(run)) + (uintptr_t)bin_info->reg0_offset); /* Freeing an unallocated pointer can cause assertion failure. */ - assert(bitmap_get(bitmap, &bin_info->bitmap_info, regind)); + assert(bitmap_get(run->bitmap, &bin_info->bitmap_info, regind)); - bitmap_unset(bitmap, &bin_info->bitmap_info, regind); + bitmap_unset(run->bitmap, &bin_info->bitmap_info, regind); run->nfree++; } @@ -316,10 +309,12 @@ arena_run_split_large_helper(arena_t *arena, arena_run_t *run, size_t size, bool remove, bool zero) { arena_chunk_t *chunk; + arena_chunk_map_misc_t *miscelm; size_t flag_dirty, run_ind, need_pages, i; chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); - run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE); + miscelm = arena_run_to_miscelm(run); + run_ind = arena_miscelm_to_pageind(miscelm); flag_dirty = arena_mapbits_dirty_get(chunk, run_ind); need_pages = (size >> LG_PAGE); assert(need_pages > 0); @@ -383,12 +378,14 @@ arena_run_split_small(arena_t *arena, arena_run_t *run, size_t size, size_t binind) { arena_chunk_t *chunk; + arena_chunk_map_misc_t *miscelm; size_t flag_dirty, run_ind, need_pages, i; assert(binind != BININD_INVALID); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); - run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE); + miscelm = arena_run_to_miscelm(run); + run_ind = arena_miscelm_to_pageind(miscelm); flag_dirty = arena_mapbits_dirty_get(chunk, run_ind); need_pages = (size >> LG_PAGE); assert(need_pages > 0); @@ -401,11 +398,6 @@ arena_run_split_small(arena_t *arena, arena_run_t *run, size_t size, * clean pages. */ arena_mapbits_small_set(chunk, run_ind, 0, binind, flag_dirty); - /* - * The first page will always be dirtied during small run - * initialization, so a validation failure here would not actually - * cause an observable failure. - */ if (config_debug && flag_dirty == 0 && arena_mapbits_unzeroed_get(chunk, run_ind) == 0) arena_run_page_validate_zeroed(chunk, run_ind); @@ -643,19 +635,14 @@ arena_chunk_dalloc(arena_t *arena, arena_chunk_t *chunk) static arena_run_t * arena_run_alloc_large_helper(arena_t *arena, size_t size, bool zero) { - arena_run_t *run; arena_chunk_map_misc_t *miscelm; arena_chunk_map_misc_t *key; key = (arena_chunk_map_misc_t *)(size | CHUNK_MAP_KEY); miscelm = arena_avail_tree_nsearch(&arena->runs_avail, key); if (miscelm != NULL) { - arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(miscelm); - size_t pageind = arena_miscelm_to_pageind(miscelm); - - run = (arena_run_t *)((uintptr_t)run_chunk + (pageind << - LG_PAGE)); - arena_run_split_large(arena, run, size, zero); + arena_run_t *run = &miscelm->run; + arena_run_split_large(arena, &miscelm->run, size, zero); return (run); } @@ -681,7 +668,7 @@ arena_run_alloc_large(arena_t *arena, size_t size, bool zero) */ chunk = arena_chunk_alloc(arena); if (chunk != NULL) { - run = (arena_run_t *)((uintptr_t)chunk + (map_bias << LG_PAGE)); + run = &arena_miscelm_get(chunk, map_bias)->run; arena_run_split_large(arena, run, size, zero); return (run); } @@ -704,11 +691,7 @@ arena_run_alloc_small_helper(arena_t *arena, size_t size, size_t binind) key = (arena_chunk_map_misc_t *)(size | CHUNK_MAP_KEY); miscelm = arena_avail_tree_nsearch(&arena->runs_avail, key); if (miscelm != NULL) { - arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(miscelm); - size_t pageind = arena_miscelm_to_pageind(miscelm); - - run = (arena_run_t *)((uintptr_t)run_chunk + (pageind << - LG_PAGE)); + run = &miscelm->run; arena_run_split_small(arena, run, size, binind); return (run); } @@ -736,7 +719,7 @@ arena_run_alloc_small(arena_t *arena, size_t size, size_t binind) */ chunk = arena_chunk_alloc(arena); if (chunk != NULL) { - run = (arena_run_t *)((uintptr_t)chunk + (map_bias << LG_PAGE)); + run = &arena_miscelm_get(chunk, map_bias)->run; arena_run_split_small(arena, run, size, binind); return (run); } @@ -825,8 +808,7 @@ arena_stash_dirty(arena_t *arena, bool all, size_t npurge, size_t run_size = arena_mapbits_unallocated_size_get(chunk, pageind); size_t npages = run_size >> LG_PAGE; - arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)(pageind << LG_PAGE)); + arena_run_t *run = &miscelm->run; assert(pageind + npages <= chunk_npages); assert(arena_mapbits_dirty_get(chunk, pageind) == @@ -919,11 +901,7 @@ arena_unstash_purged(arena_t *arena, arena_chunk_miscelms_t *miscelms) /* Deallocate runs. */ for (miscelm = ql_first(miscelms); miscelm != NULL; miscelm = ql_first(miscelms)) { - arena_chunk_t *chunk = - (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm); - size_t pageind = arena_miscelm_to_pageind(miscelm); - arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)(pageind << LG_PAGE)); + arena_run_t *run = &miscelm->run; ql_remove(miscelms, miscelm, dr_link); arena_run_dalloc(arena, run, false, true); } @@ -1042,10 +1020,12 @@ static void arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned) { arena_chunk_t *chunk; + arena_chunk_map_misc_t *miscelm; size_t size, run_ind, run_pages, flag_dirty; chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); - run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE); + miscelm = arena_run_to_miscelm(run); + run_ind = arena_miscelm_to_pageind(miscelm); assert(run_ind >= map_bias); assert(run_ind < chunk_npages); if (arena_mapbits_large_get(chunk, run_ind) != 0) { @@ -1086,8 +1066,7 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned) arena_mapbits_unzeroed_get(chunk, run_ind+run_pages-1)); } - arena_run_coalesce(arena, chunk, &size, &run_ind, &run_pages, - flag_dirty); + arena_run_coalesce(arena, chunk, &size, &run_ind, &run_pages, flag_dirty); /* Insert into runs_avail, now that coalescing is complete. */ assert(arena_mapbits_unallocated_size_get(chunk, run_ind) == @@ -1121,7 +1100,8 @@ static void arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, size_t oldsize, size_t newsize) { - size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE; + arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run); + size_t pageind = arena_miscelm_to_pageind(miscelm); size_t head_npages = (oldsize - newsize) >> LG_PAGE; size_t flag_dirty = arena_mapbits_dirty_get(chunk, pageind); @@ -1153,9 +1133,12 @@ static void arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, size_t oldsize, size_t newsize, bool dirty) { - size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE; + arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run); + size_t pageind = arena_miscelm_to_pageind(miscelm); size_t head_npages = newsize >> LG_PAGE; size_t flag_dirty = arena_mapbits_dirty_get(chunk, pageind); + arena_chunk_map_misc_t *tail_miscelm; + arena_run_t *tail_run; assert(oldsize > newsize); @@ -1178,26 +1161,17 @@ arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, arena_mapbits_large_set(chunk, pageind+head_npages, oldsize-newsize, flag_dirty); - arena_run_dalloc(arena, (arena_run_t *)((uintptr_t)run + newsize), - dirty, false); + tail_miscelm = arena_miscelm_get(chunk, pageind + head_npages); + tail_run = &tail_miscelm->run; + arena_run_dalloc(arena, tail_run, dirty, false); } static arena_run_t * arena_bin_runs_first(arena_bin_t *bin) { arena_chunk_map_misc_t *miscelm = arena_run_tree_first(&bin->runs); - if (miscelm != NULL) { - arena_chunk_t *chunk; - size_t pageind; - arena_run_t *run; - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm); - pageind = arena_miscelm_to_pageind(miscelm); - run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - - arena_mapbits_small_runind_get(chunk, pageind)) << - LG_PAGE)); - return (run); - } + if (miscelm != NULL) + return (&miscelm->run); return (NULL); } @@ -1205,9 +1179,7 @@ arena_bin_runs_first(arena_bin_t *bin) static void arena_bin_runs_insert(arena_bin_t *bin, arena_run_t *run) { - arena_chunk_t *chunk = CHUNK_ADDR2BASE(run); - size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE; - arena_chunk_map_misc_t *miscelm = arena_miscelm_get(chunk, pageind); + arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run); assert(arena_run_tree_search(&bin->runs, miscelm) == NULL); @@ -1217,9 +1189,7 @@ arena_bin_runs_insert(arena_bin_t *bin, arena_run_t *run) static void arena_bin_runs_remove(arena_bin_t *bin, arena_run_t *run) { - arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); - size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE; - arena_chunk_map_misc_t *miscelm = arena_miscelm_get(chunk, pageind); + arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run); assert(arena_run_tree_search(&bin->runs, miscelm) != NULL); @@ -1260,14 +1230,11 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin) malloc_mutex_lock(&arena->lock); run = arena_run_alloc_small(arena, bin_info->run_size, binind); if (run != NULL) { - bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run + - (uintptr_t)bin_info->bitmap_offset); - /* Initialize run internals. */ run->bin = bin; run->nextind = 0; run->nfree = bin_info->nregs; - bitmap_init(bitmap, &bin_info->bitmap_info); + bitmap_init(run->bitmap, &bin_info->bitmap_info); } malloc_mutex_unlock(&arena->lock); /********************************/ @@ -1542,16 +1509,20 @@ void * arena_malloc_large(arena_t *arena, size_t size, bool zero) { void *ret; + arena_run_t *run; + arena_chunk_map_misc_t *miscelm; UNUSED bool idump; /* Large allocation. */ size = PAGE_CEILING(size); malloc_mutex_lock(&arena->lock); - ret = (void *)arena_run_alloc_large(arena, size, zero); - if (ret == NULL) { + run = arena_run_alloc_large(arena, size, zero); + if (run == NULL) { malloc_mutex_unlock(&arena->lock); return (NULL); } + miscelm = arena_run_to_miscelm(run); + ret = arena_miscelm_to_rpages(miscelm); if (config_stats) { arena->stats.nmalloc_large++; arena->stats.nrequests_large++; @@ -1586,6 +1557,8 @@ arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero) size_t alloc_size, leadsize, trailsize; arena_run_t *run; arena_chunk_t *chunk; + arena_chunk_map_misc_t *miscelm; + void *rpages; assert((size & PAGE_MASK) == 0); @@ -1599,21 +1572,31 @@ arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero) return (NULL); } chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); + miscelm = arena_run_to_miscelm(run); + rpages = arena_miscelm_to_rpages(miscelm); - leadsize = ALIGNMENT_CEILING((uintptr_t)run, alignment) - - (uintptr_t)run; + leadsize = ALIGNMENT_CEILING((uintptr_t)rpages, alignment) - + (uintptr_t)rpages; assert(alloc_size >= leadsize + size); trailsize = alloc_size - leadsize - size; - ret = (void *)((uintptr_t)run + leadsize); if (leadsize != 0) { - arena_run_trim_head(arena, chunk, run, alloc_size, alloc_size - - leadsize); + arena_chunk_map_misc_t *head_miscelm = miscelm; + arena_run_t *head_run = run; + + miscelm = arena_miscelm_get(chunk, + arena_miscelm_to_pageind(head_miscelm) + (leadsize >> + LG_PAGE)); + run = &miscelm->run; + + arena_run_trim_head(arena, chunk, head_run, alloc_size, + alloc_size - leadsize); } if (trailsize != 0) { - arena_run_trim_tail(arena, chunk, ret, size + trailsize, size, + arena_run_trim_tail(arena, chunk, run, size + trailsize, size, false); } - arena_run_init_large(arena, (arena_run_t *)ret, size, zero); + arena_run_init_large(arena, run, size, zero); + ret = arena_miscelm_to_rpages(miscelm); if (config_stats) { arena->stats.nmalloc_large++; @@ -1687,10 +1670,12 @@ arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, size_t binind; arena_bin_info_t *bin_info; size_t npages, run_ind, past; + arena_chunk_map_misc_t *miscelm; + void *rpages; assert(run != bin->runcur); - assert(arena_run_tree_search(&bin->runs, arena_miscelm_get(chunk, - ((uintptr_t)run-(uintptr_t)chunk)>>LG_PAGE)) == NULL); + assert(arena_run_tree_search(&bin->runs, arena_run_to_miscelm(run)) == + NULL); binind = arena_bin_index(chunk->arena, run->bin); bin_info = &arena_bin_info[binind]; @@ -1698,8 +1683,10 @@ arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, malloc_mutex_unlock(&bin->lock); /******************************/ npages = bin_info->run_size >> LG_PAGE; - run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE); - past = (size_t)(PAGE_CEILING((uintptr_t)run + + miscelm = arena_run_to_miscelm(run); + run_ind = arena_miscelm_to_pageind(miscelm); + rpages = arena_miscelm_to_rpages(miscelm); + past = (size_t)(PAGE_CEILING((uintptr_t)rpages + (uintptr_t)bin_info->reg0_offset + (uintptr_t)(run->nextind * bin_info->reg_interval - bin_info->redzone_size) - (uintptr_t)chunk) >> LG_PAGE); @@ -1716,13 +1703,18 @@ arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, npages) { /* Trim clean pages. Convert to large run beforehand. */ assert(npages > 0); - arena_mapbits_large_set(chunk, run_ind, bin_info->run_size, 0); - arena_mapbits_large_set(chunk, run_ind+npages-1, 0, 0); - arena_run_trim_tail(arena, chunk, run, (npages << LG_PAGE), - ((past - run_ind) << LG_PAGE), false); + if (past > run_ind) { + arena_mapbits_large_set(chunk, run_ind, + bin_info->run_size, 0); + arena_mapbits_large_set(chunk, run_ind+npages-1, 0, 0); + arena_run_trim_tail(arena, chunk, run, (npages << + LG_PAGE), ((past - run_ind) << LG_PAGE), false); + arena_run_dalloc(arena, run, true, false); + } else + arena_run_dalloc(arena, run, false, false); /* npages = past - run_ind; */ - } - arena_run_dalloc(arena, run, true, false); + } else + arena_run_dalloc(arena, run, true, false); malloc_mutex_unlock(&arena->lock); /****************************/ malloc_mutex_lock(&bin->lock); @@ -1755,15 +1747,15 @@ void arena_dalloc_bin_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr, arena_chunk_map_bits_t *bitselm) { - size_t pageind; + size_t pageind, rpages_ind; arena_run_t *run; arena_bin_t *bin; arena_bin_info_t *bin_info; size_t size, binind; pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - - arena_mapbits_small_runind_get(chunk, pageind)) << LG_PAGE)); + rpages_ind = pageind - arena_mapbits_small_runind_get(chunk, pageind); + run = &arena_miscelm_get(chunk, rpages_ind)->run; bin = run->bin; binind = arena_ptr_small_binind_get(ptr, arena_mapbits_get(chunk, pageind)); @@ -1793,9 +1785,10 @@ arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, { arena_run_t *run; arena_bin_t *bin; + size_t rpages_ind; - run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - - arena_mapbits_small_runind_get(chunk, pageind)) << LG_PAGE)); + rpages_ind = pageind - arena_mapbits_small_runind_get(chunk, pageind); + run = &arena_miscelm_get(chunk, rpages_ind)->run; bin = run->bin; malloc_mutex_lock(&bin->lock); arena_dalloc_bin_locked(arena, chunk, ptr, bitselm); @@ -1838,9 +1831,11 @@ arena_dalloc_junk_large_t *arena_dalloc_junk_large = void arena_dalloc_large_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr) { + size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; + arena_chunk_map_misc_t *miscelm = arena_miscelm_get(chunk, pageind); + arena_run_t *run = &miscelm->run; if (config_fill || config_stats) { - size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; size_t usize = arena_mapbits_large_size_get(chunk, pageind); arena_dalloc_junk_large(ptr, usize); @@ -1852,7 +1847,7 @@ arena_dalloc_large_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr) } } - arena_run_dalloc(arena, (arena_run_t *)ptr, true, false); + arena_run_dalloc(arena, run, true, false); } void @@ -1868,6 +1863,9 @@ static void arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr, size_t oldsize, size_t size) { + size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; + arena_chunk_map_misc_t *miscelm = arena_miscelm_get(chunk, pageind); + arena_run_t *run = &miscelm->run; assert(size < oldsize); @@ -1876,8 +1874,7 @@ arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr, * allocations. */ malloc_mutex_lock(&arena->lock); - arena_run_trim_tail(arena, chunk, (arena_run_t *)ptr, oldsize, size, - true); + arena_run_trim_tail(arena, chunk, run, oldsize, size, true); if (config_stats) { arena->stats.ndalloc_large++; arena->stats.allocated_large -= oldsize; @@ -1919,8 +1916,9 @@ arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr, size_t flag_dirty; size_t splitsize = (oldsize + followsize <= size + extra) ? followsize : size + extra - oldsize; - arena_run_split_large(arena, (arena_run_t *)((uintptr_t)chunk + - ((pageind+npages) << LG_PAGE)), splitsize, zero); + arena_run_t *run = &arena_miscelm_get(chunk, + pageind+npages)->run; + arena_run_split_large(arena, run, splitsize, zero); size = oldsize + splitsize; npages = size >> LG_PAGE; @@ -2249,26 +2247,18 @@ arena_new(arena_t *arena, unsigned ind) /* * Calculate bin_info->run_size such that it meets the following constraints: * - * *) bin_info->run_size >= min_run_size * *) bin_info->run_size <= arena_maxclass - * *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed). * *) bin_info->nregs <= RUN_MAXREGS * - * bin_info->nregs, bin_info->bitmap_offset, and bin_info->reg0_offset are also - * calculated here, since these settings are all interdependent. + * bin_info->nregs and bin_info->reg0_offset are also calculated here, since + * these settings are all interdependent. */ -static size_t -bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) +static void +bin_info_run_size_calc(arena_bin_info_t *bin_info) { size_t pad_size; - size_t try_run_size, good_run_size; - uint32_t try_nregs, good_nregs; - uint32_t try_hdr_size, good_hdr_size; - uint32_t try_bitmap_offset, good_bitmap_offset; - uint32_t try_redzone0_offset, good_redzone0_offset; - - assert(min_run_size >= PAGE); - assert(min_run_size <= arena_maxclass); + size_t try_run_size, perfect_run_size, actual_run_size; + uint32_t try_nregs, perfect_nregs, actual_nregs; /* * Determine redzone size based on minimum alignment and minimum @@ -2295,96 +2285,66 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) (bin_info->redzone_size << 1); /* - * Calculate known-valid settings before entering the run_size - * expansion loop, so that the first part of the loop always copies - * valid settings. - * - * The do..while loop iteratively reduces the number of regions until - * the run header and the regions no longer overlap. A closed formula - * would be quite messy, since there is an interdependency between the - * header's mask length and the number of regions. + * Compute run size under ideal conditions (no redzones, no limit on run + * size). */ - try_run_size = min_run_size; - try_nregs = ((try_run_size - sizeof(arena_run_t)) / - bin_info->reg_interval) - + 1; /* Counter-act try_nregs-- in loop. */ - if (try_nregs > RUN_MAXREGS) { - try_nregs = RUN_MAXREGS - + 1; /* Counter-act try_nregs-- in loop. */ - } + try_run_size = PAGE; + try_nregs = try_run_size / bin_info->reg_size; do { - try_nregs--; - try_hdr_size = sizeof(arena_run_t); - /* Pad to a long boundary. */ - try_hdr_size = LONG_CEILING(try_hdr_size); - try_bitmap_offset = try_hdr_size; - /* Add space for bitmap. */ - try_hdr_size += bitmap_size(try_nregs); - try_redzone0_offset = try_run_size - (try_nregs * - bin_info->reg_interval) - pad_size; - } while (try_hdr_size > try_redzone0_offset); + perfect_run_size = try_run_size; + perfect_nregs = try_nregs; - /* run_size expansion loop. */ - do { - /* - * Copy valid settings before trying more aggressive settings. - */ - good_run_size = try_run_size; - good_nregs = try_nregs; - good_hdr_size = try_hdr_size; - good_bitmap_offset = try_bitmap_offset; - good_redzone0_offset = try_redzone0_offset; - - /* Try more aggressive settings. */ try_run_size += PAGE; - try_nregs = ((try_run_size - sizeof(arena_run_t) - pad_size) / - bin_info->reg_interval) - + 1; /* Counter-act try_nregs-- in loop. */ - if (try_nregs > RUN_MAXREGS) { - try_nregs = RUN_MAXREGS - + 1; /* Counter-act try_nregs-- in loop. */ - } - do { - try_nregs--; - try_hdr_size = sizeof(arena_run_t); - /* Pad to a long boundary. */ - try_hdr_size = LONG_CEILING(try_hdr_size); - try_bitmap_offset = try_hdr_size; - /* Add space for bitmap. */ - try_hdr_size += bitmap_size(try_nregs); - try_redzone0_offset = try_run_size - (try_nregs * - bin_info->reg_interval) - pad_size; - } while (try_hdr_size > try_redzone0_offset); - } while (try_run_size <= arena_maxclass - && RUN_MAX_OVRHD * (bin_info->reg_interval << 3) > - RUN_MAX_OVRHD_RELAX - && (try_redzone0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size - && try_nregs < RUN_MAXREGS); + try_nregs = try_run_size / bin_info->reg_size; + } while (perfect_run_size != perfect_nregs * bin_info->reg_size); + assert(perfect_nregs <= RUN_MAXREGS); - assert(good_hdr_size <= good_redzone0_offset); + actual_run_size = perfect_run_size; + actual_nregs = (actual_run_size - pad_size) / bin_info->reg_interval; + + /* + * Redzones can require enough padding that not even a single region can + * fit within the number of pages that would normally be dedicated to a + * run for this size class. Increase the run size until at least one + * region fits. + */ + while (actual_nregs == 0) { + assert(config_fill && unlikely(opt_redzone)); + + actual_run_size += PAGE; + actual_nregs = (actual_run_size - pad_size) / + bin_info->reg_interval; + } + + /* + * Make sure that the run will fit within an arena chunk. + */ + while (actual_run_size > arena_maxclass) { + actual_run_size -= PAGE; + actual_nregs = (actual_run_size - pad_size) / + bin_info->reg_interval; + } + assert(actual_nregs > 0); /* Copy final settings. */ - bin_info->run_size = good_run_size; - bin_info->nregs = good_nregs; - bin_info->bitmap_offset = good_bitmap_offset; - bin_info->reg0_offset = good_redzone0_offset + bin_info->redzone_size; + bin_info->run_size = actual_run_size; + bin_info->nregs = actual_nregs; + bin_info->reg0_offset = actual_run_size - (actual_nregs * + bin_info->reg_interval) - pad_size + bin_info->redzone_size; assert(bin_info->reg0_offset - bin_info->redzone_size + (bin_info->nregs * bin_info->reg_interval) + pad_size == bin_info->run_size); - - return (good_run_size); } static void bin_info_init(void) { arena_bin_info_t *bin_info; - size_t prev_run_size = PAGE; #define BIN_INFO_INIT_bin_yes(index, size) \ bin_info = &arena_bin_info[index]; \ bin_info->reg_size = size; \ - prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size);\ + bin_info_run_size_calc(bin_info); \ bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs); #define BIN_INFO_INIT_bin_no(index, size) #define SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup) \ @@ -2418,8 +2378,7 @@ arena_boot(void) header_size = offsetof(arena_chunk_t, map_bits) + ((sizeof(arena_chunk_map_bits_t) + sizeof(arena_chunk_map_misc_t)) * (chunk_npages-map_bias)); - map_bias = (header_size >> LG_PAGE) + ((header_size & PAGE_MASK) - != 0); + map_bias = (header_size + PAGE_MASK) >> LG_PAGE; } assert(map_bias > 0); From e3a16fce5eb0c62a49e751f156d040c9f77fbc23 Mon Sep 17 00:00:00 2001 From: Dave Rigby Date: Wed, 24 Sep 2014 14:19:28 +0100 Subject: [PATCH 0537/3378] Mark malloc_conf as a weak symbol This fixes issue #113 - je_malloc_conf is not respected on OS X --- src/jemalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/jemalloc.c b/src/jemalloc.c index 4d3b22e5..3012f558 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -7,7 +7,7 @@ malloc_tsd_data(, arenas, arena_t *, NULL) /* Runtime configuration options. */ -const char *je_malloc_conf; +const char *je_malloc_conf JEMALLOC_ATTR(weak); bool opt_abort = #ifdef JEMALLOC_DEBUG true From 4dcf04bfc03b9e9eb50015a8fc8735de28c23090 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 31 Aug 2014 03:57:06 +0000 Subject: [PATCH 0538/3378] correctly detect adaptive mutexes in pthreads PTHREAD_MUTEX_ADAPTIVE_NP is an enum on glibc and not a macro, we must test for their existence by attempting compilation. --- configure.ac | 12 ++++++++++++ .../jemalloc/internal/jemalloc_internal_defs.h.in | 3 +++ include/jemalloc/internal/mutex.h | 2 +- 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index ab4bcc39..1ee2ed8e 100644 --- a/configure.ac +++ b/configure.ac @@ -1399,6 +1399,18 @@ if test "x${je_cv_glibc_memalign_hook}" = "xyes" ; then AC_DEFINE([JEMALLOC_GLIBC_MEMALIGN_HOOK], [ ]) fi +JE_COMPILABLE([pthreads adaptive mutexes], [ +#include +], [ + pthread_mutexattr_t attr; + pthread_mutexattr_init(&attr); + pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ADAPTIVE_NP); + pthread_mutexattr_destroy(&attr); +], [je_cv_pthread_mutex_adaptive_np]) +if test "x${je_cv_pthread_mutex_adaptive_np}" = "xyes" ; then + AC_DEFINE([JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP], [ ]) +fi + dnl ============================================================================ dnl Check for typedefs, structures, and compiler characteristics. AC_HEADER_STDBOOL diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in index 955582ee..fd85e5cf 100644 --- a/include/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in @@ -215,4 +215,7 @@ /* glibc memalign hook */ #undef JEMALLOC_GLIBC_MEMALIGN_HOOK +/* adaptive mutex support in pthreads */ +#undef JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP + #endif /* JEMALLOC_INTERNAL_DEFS_H_ */ diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h index de44e143..8a03d825 100644 --- a/include/jemalloc/internal/mutex.h +++ b/include/jemalloc/internal/mutex.h @@ -10,7 +10,7 @@ typedef struct malloc_mutex_s malloc_mutex_t; #elif (defined(JEMALLOC_MUTEX_INIT_CB)) # define MALLOC_MUTEX_INITIALIZER {PTHREAD_MUTEX_INITIALIZER, NULL} #else -# if (defined(PTHREAD_MUTEX_ADAPTIVE_NP) && \ +# if (defined(JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP) && \ defined(PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP)) # define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_ADAPTIVE_NP # define MALLOC_MUTEX_INITIALIZER {PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP} From f8034540a16a6f4fc7948e4783747ca1e9055823 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Tue, 30 Sep 2014 10:33:46 -0400 Subject: [PATCH 0539/3378] Implement in-place huge allocation shrinking. Trivial example: #include int main(void) { void *ptr = malloc(1024 * 1024 * 8); if (!ptr) return 1; ptr = realloc(ptr, 1024 * 1024 * 4); if (!ptr) return 1; } Before: mmap(NULL, 8388608, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fcfff000000 mmap(NULL, 4194304, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fcffec00000 madvise(0x7fcfff000000, 8388608, MADV_DONTNEED) = 0 After: mmap(NULL, 8388608, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f1934800000 madvise(0x7f1934c00000, 4194304, MADV_DONTNEED) = 0 Closes #134 --- src/huge.c | 89 +++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 62 insertions(+), 27 deletions(-) diff --git a/src/huge.c b/src/huge.c index 2e30ccfd..40d1362d 100644 --- a/src/huge.c +++ b/src/huge.c @@ -72,21 +72,79 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment, return (ret); } +#ifdef JEMALLOC_JET +#undef huge_dalloc_junk +#define huge_dalloc_junk JEMALLOC_N(huge_dalloc_junk_impl) +#endif +static void +huge_dalloc_junk(void *ptr, size_t usize) +{ + + if (config_fill && have_dss && unlikely(opt_junk)) { + /* + * Only bother junk filling if the chunk isn't about to be + * unmapped. + */ + if (config_munmap == false || (have_dss && chunk_in_dss(ptr))) + memset(ptr, 0x5a, usize); + } +} +#ifdef JEMALLOC_JET +#undef huge_dalloc_junk +#define huge_dalloc_junk JEMALLOC_N(huge_dalloc_junk) +huge_dalloc_junk_t *huge_dalloc_junk = JEMALLOC_N(huge_dalloc_junk_impl); +#endif + bool huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra) { + /* Both allocations must be huge to avoid a move. */ + if (oldsize <= arena_maxclass) + return (true); + + assert(CHUNK_CEILING(oldsize) == oldsize); + /* * Avoid moving the allocation if the size class can be left the same. */ - if (oldsize > arena_maxclass - && CHUNK_CEILING(oldsize) >= CHUNK_CEILING(size) + if (CHUNK_CEILING(oldsize) >= CHUNK_CEILING(size) && CHUNK_CEILING(oldsize) <= CHUNK_CEILING(size+extra)) { - assert(CHUNK_CEILING(oldsize) == oldsize); return (false); } - /* Reallocation would require a move. */ + /* Overflow. */ + if (CHUNK_CEILING(size) == 0) + return (true); + + /* Shrink the allocation in-place. */ + if (CHUNK_CEILING(oldsize) > CHUNK_CEILING(size)) { + extent_node_t *node, key; + void *excess_addr; + size_t excess_size; + + malloc_mutex_lock(&huge_mtx); + + key.addr = ptr; + node = extent_tree_ad_search(&huge, &key); + assert(node != NULL); + assert(node->addr == ptr); + + /* Update the size of the huge allocation. */ + node->size = CHUNK_CEILING(size); + + malloc_mutex_unlock(&huge_mtx); + + excess_addr = node->addr + CHUNK_CEILING(size); + excess_size = CHUNK_CEILING(oldsize) - CHUNK_CEILING(size); + + /* Zap the excess chunks. */ + huge_dalloc_junk(excess_addr, excess_size); + arena_chunk_dalloc_huge(node->arena, excess_addr, excess_size); + + return (false); + } + return (true); } @@ -134,29 +192,6 @@ huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size, return (ret); } -#ifdef JEMALLOC_JET -#undef huge_dalloc_junk -#define huge_dalloc_junk JEMALLOC_N(huge_dalloc_junk_impl) -#endif -static void -huge_dalloc_junk(void *ptr, size_t usize) -{ - - if (config_fill && have_dss && unlikely(opt_junk)) { - /* - * Only bother junk filling if the chunk isn't about to be - * unmapped. - */ - if (config_munmap == false || (have_dss && chunk_in_dss(ptr))) - memset(ptr, 0x5a, usize); - } -} -#ifdef JEMALLOC_JET -#undef huge_dalloc_junk -#define huge_dalloc_junk JEMALLOC_N(huge_dalloc_junk) -huge_dalloc_junk_t *huge_dalloc_junk = JEMALLOC_N(huge_dalloc_junk_impl); -#endif - void huge_dalloc(void *ptr) { From cc9e626ea97eb294f337c674685b8b5c9d5524b7 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 1 Oct 2014 17:51:52 -0700 Subject: [PATCH 0540/3378] Refactor permuted backtrace test allocation. Refactor permuted backtrace test allocation that was originally used only by the prof_accum test, so that it can be used by other heap profiling test binaries. --- Makefile.in | 20 ++++++---------- test/include/test/btalloc.h | 31 ++++++++++++++++++++++++ test/include/test/jemalloc_test.h.in | 1 + test/src/btalloc.c | 8 +++++++ test/src/btalloc_0.c | 3 +++ test/src/btalloc_1.c | 3 +++ test/unit/prof_accum.c | 9 +++++-- test/unit/prof_accum.h | 35 ---------------------------- test/unit/prof_accum_a.c | 3 --- test/unit/prof_accum_b.c | 3 --- 10 files changed, 60 insertions(+), 56 deletions(-) create mode 100644 test/include/test/btalloc.h create mode 100644 test/src/btalloc.c create mode 100644 test/src/btalloc_0.c create mode 100644 test/src/btalloc_1.c delete mode 100644 test/unit/prof_accum.h delete mode 100644 test/unit/prof_accum_a.c delete mode 100644 test/unit/prof_accum_b.c diff --git a/Makefile.in b/Makefile.in index 41328b95..5267bea4 100644 --- a/Makefile.in +++ b/Makefile.in @@ -107,9 +107,11 @@ DOCS_XML := $(objroot)doc/jemalloc$(install_suffix).xml DOCS_HTML := $(DOCS_XML:$(objroot)%.xml=$(srcroot)%.html) DOCS_MAN3 := $(DOCS_XML:$(objroot)%.xml=$(srcroot)%.3) DOCS := $(DOCS_HTML) $(DOCS_MAN3) -C_TESTLIB_SRCS := $(srcroot)test/src/math.c $(srcroot)test/src/mtx.c \ - $(srcroot)test/src/SFMT.c $(srcroot)test/src/test.c \ - $(srcroot)test/src/thd.c $(srcroot)test/src/timer.c +C_TESTLIB_SRCS := $(srcroot)test/src/btalloc.c $(srcroot)test/src/btalloc_0.c \ + $(srcroot)test/src/btalloc_1.c $(srcroot)test/src/math.c \ + $(srcroot)test/src/mtx.c $(srcroot)test/src/SFMT.c \ + $(srcroot)test/src/test.c $(srcroot)test/src/thd.c \ + $(srcroot)test/src/timer.c C_UTIL_INTEGRATION_SRCS := $(srcroot)src/util.c TESTS_UNIT := $(srcroot)test/unit/atomic.c \ $(srcroot)test/unit/bitmap.c \ @@ -123,6 +125,7 @@ TESTS_UNIT := $(srcroot)test/unit/atomic.c \ $(srcroot)test/unit/prof_accum.c \ $(srcroot)test/unit/prof_gdump.c \ $(srcroot)test/unit/prof_idump.c \ + $(srcroot)test/unit/prof_reset.c \ $(srcroot)test/unit/ql.c \ $(srcroot)test/unit/qr.c \ $(srcroot)test/unit/quarantine.c \ @@ -133,8 +136,6 @@ TESTS_UNIT := $(srcroot)test/unit/atomic.c \ $(srcroot)test/unit/tsd.c \ $(srcroot)test/unit/util.c \ $(srcroot)test/unit/zero.c -TESTS_UNIT_AUX := $(srcroot)test/unit/prof_accum_a.c \ - $(srcroot)test/unit/prof_accum_b.c TESTS_INTEGRATION := $(srcroot)test/integration/aligned_alloc.c \ $(srcroot)test/integration/allocated.c \ $(srcroot)test/integration/sdallocx.c \ @@ -159,10 +160,9 @@ C_TESTLIB_STRESS_OBJS := $(C_TESTLIB_SRCS:$(srcroot)%.c=$(objroot)%.stress.$(O)) C_TESTLIB_OBJS := $(C_TESTLIB_UNIT_OBJS) $(C_TESTLIB_INTEGRATION_OBJS) $(C_UTIL_INTEGRATION_OBJS) $(C_TESTLIB_STRESS_OBJS) TESTS_UNIT_OBJS := $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%.$(O)) -TESTS_UNIT_AUX_OBJS := $(TESTS_UNIT_AUX:$(srcroot)%.c=$(objroot)%.$(O)) TESTS_INTEGRATION_OBJS := $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%.$(O)) TESTS_STRESS_OBJS := $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%.$(O)) -TESTS_OBJS := $(TESTS_UNIT_OBJS) $(TESTS_UNIT_AUX_OBJS) $(TESTS_INTEGRATION_OBJS) $(TESTS_STRESS_OBJS) +TESTS_OBJS := $(TESTS_UNIT_OBJS) $(TESTS_INTEGRATION_OBJS) $(TESTS_STRESS_OBJS) .PHONY: all dist build_doc_html build_doc_man build_doc .PHONY: install_bin install_include install_lib @@ -211,12 +211,6 @@ $(C_TESTLIB_STRESS_OBJS): $(objroot)test/src/%.stress.$(O): $(srcroot)test/src/% $(C_TESTLIB_STRESS_OBJS): CPPFLAGS += -DJEMALLOC_STRESS_TEST -DJEMALLOC_STRESS_TESTLIB $(C_TESTLIB_OBJS): CPPFLAGS += -I$(srcroot)test/include -I$(objroot)test/include $(TESTS_UNIT_OBJS): CPPFLAGS += -DJEMALLOC_UNIT_TEST -$(TESTS_UNIT_AUX_OBJS): CPPFLAGS += -DJEMALLOC_UNIT_TEST -define make-unit-link-dep -$(1): TESTS_UNIT_LINK_OBJS += $(2) -$(1): $(2) -endef -$(foreach test, $(TESTS_UNIT:$(srcroot)test/unit/%.c=$(objroot)test/unit/%$(EXE)), $(eval $(call make-unit-link-dep,$(test),$(filter $(test:%$(EXE)=%_a.$(O)) $(test:%$(EXE)=%_b.$(O)),$(TESTS_UNIT_AUX_OBJS))))) $(TESTS_INTEGRATION_OBJS): CPPFLAGS += -DJEMALLOC_INTEGRATION_TEST $(TESTS_STRESS_OBJS): CPPFLAGS += -DJEMALLOC_STRESS_TEST $(TESTS_OBJS): $(objroot)test/%.$(O): $(srcroot)test/%.c diff --git a/test/include/test/btalloc.h b/test/include/test/btalloc.h new file mode 100644 index 00000000..c3f9d4df --- /dev/null +++ b/test/include/test/btalloc.h @@ -0,0 +1,31 @@ +/* btalloc() provides a mechanism for allocating via permuted backtraces. */ +void *btalloc(size_t size, unsigned bits); + +#define btalloc_n_proto(n) \ +void *btalloc_##n(size_t size, unsigned bits); +btalloc_n_proto(0) +btalloc_n_proto(1) + +#define btalloc_n_gen(n) \ +void * \ +btalloc_##n(size_t size, unsigned bits) \ +{ \ + void *p; \ + \ + if (bits == 0) \ + p = mallocx(size, 0); \ + else { \ + switch (bits & 0x1U) { \ + case 0: \ + p = (btalloc_0(size, bits >> 1)); \ + break; \ + case 1: \ + p = (btalloc_1(size, bits >> 1)); \ + break; \ + default: not_reached(); \ + } \ + } \ + /* Intentionally sabotage tail call optimization. */ \ + assert_ptr_not_null(p, "Unexpected mallocx() failure"); \ + return (p); \ +} diff --git a/test/include/test/jemalloc_test.h.in b/test/include/test/jemalloc_test.h.in index a93c4f67..6018e58a 100644 --- a/test/include/test/jemalloc_test.h.in +++ b/test/include/test/jemalloc_test.h.in @@ -133,6 +133,7 @@ /* * Common test utilities. */ +#include "test/btalloc.h" #include "test/math.h" #include "test/mtx.h" #include "test/mq.h" diff --git a/test/src/btalloc.c b/test/src/btalloc.c new file mode 100644 index 00000000..9a253d97 --- /dev/null +++ b/test/src/btalloc.c @@ -0,0 +1,8 @@ +#include "test/jemalloc_test.h" + +void * +btalloc(size_t size, unsigned bits) +{ + + return (btalloc_0(size, bits)); +} diff --git a/test/src/btalloc_0.c b/test/src/btalloc_0.c new file mode 100644 index 00000000..77d8904e --- /dev/null +++ b/test/src/btalloc_0.c @@ -0,0 +1,3 @@ +#include "test/jemalloc_test.h" + +btalloc_n_gen(0) diff --git a/test/src/btalloc_1.c b/test/src/btalloc_1.c new file mode 100644 index 00000000..4c126c30 --- /dev/null +++ b/test/src/btalloc_1.c @@ -0,0 +1,3 @@ +#include "test/jemalloc_test.h" + +btalloc_n_gen(1) diff --git a/test/unit/prof_accum.c b/test/unit/prof_accum.c index 050a8a7e..fd229e0f 100644 --- a/test/unit/prof_accum.c +++ b/test/unit/prof_accum.c @@ -1,4 +1,9 @@ -#include "prof_accum.h" +#include "test/jemalloc_test.h" + +#define NTHREADS 4 +#define NALLOCS_PER_THREAD 50 +#define DUMP_INTERVAL 1 +#define BT_COUNT_CHECK_INTERVAL 5 #ifdef JEMALLOC_PROF const char *malloc_conf = @@ -20,7 +25,7 @@ static void * alloc_from_permuted_backtrace(unsigned thd_ind, unsigned iteration) { - return (alloc_0(thd_ind*NALLOCS_PER_THREAD + iteration)); + return (btalloc(1, thd_ind*NALLOCS_PER_THREAD + iteration)); } static void * diff --git a/test/unit/prof_accum.h b/test/unit/prof_accum.h deleted file mode 100644 index 109d86b5..00000000 --- a/test/unit/prof_accum.h +++ /dev/null @@ -1,35 +0,0 @@ -#include "test/jemalloc_test.h" - -#define NTHREADS 4 -#define NALLOCS_PER_THREAD 50 -#define DUMP_INTERVAL 1 -#define BT_COUNT_CHECK_INTERVAL 5 - -#define alloc_n_proto(n) \ -void *alloc_##n(unsigned bits); -alloc_n_proto(0) -alloc_n_proto(1) - -#define alloc_n_gen(n) \ -void * \ -alloc_##n(unsigned bits) \ -{ \ - void *p; \ - \ - if (bits == 0) \ - p = mallocx(1, 0); \ - else { \ - switch (bits & 0x1U) { \ - case 0: \ - p = (alloc_0(bits >> 1)); \ - break; \ - case 1: \ - p = (alloc_1(bits >> 1)); \ - break; \ - default: not_reached(); \ - } \ - } \ - /* Intentionally sabotage tail call optimization. */ \ - assert_ptr_not_null(p, "Unexpected mallocx() failure"); \ - return (p); \ -} diff --git a/test/unit/prof_accum_a.c b/test/unit/prof_accum_a.c deleted file mode 100644 index 42ad521d..00000000 --- a/test/unit/prof_accum_a.c +++ /dev/null @@ -1,3 +0,0 @@ -#include "prof_accum.h" - -alloc_n_gen(0) diff --git a/test/unit/prof_accum_b.c b/test/unit/prof_accum_b.c deleted file mode 100644 index 60d9dab6..00000000 --- a/test/unit/prof_accum_b.c +++ /dev/null @@ -1,3 +0,0 @@ -#include "prof_accum.h" - -alloc_n_gen(1) From 20c31deaae38ed9aa4fe169ed65e0c45cd542955 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 2 Oct 2014 23:01:10 -0700 Subject: [PATCH 0541/3378] Test prof.reset mallctl and fix numerous discovered bugs. --- doc/jemalloc.xml.in | 5 +- include/jemalloc/internal/private_symbols.txt | 1 + include/jemalloc/internal/prof.h | 24 +- src/prof.c | 211 +++++++++++----- test/unit/prof_reset.c | 238 ++++++++++++++++++ 5 files changed, 404 insertions(+), 75 deletions(-) create mode 100644 test/unit/prof_reset.c diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index e5c229fe..b586e690 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -1547,7 +1547,8 @@ malloc_conf = "xmalloc:true";]]> Reset all memory profile statistics, and optionally update the sample rate (see opt.lg_prof_sample). + linkend="opt.lg_prof_sample">opt.lg_prof_sample + and prof.lg_sample). @@ -1558,7 +1559,7 @@ malloc_conf = "xmalloc:true";]]> r- [] - Get the sample rate (see Get the current sample rate (see opt.lg_prof_sample). diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index 5ac82f59..33f8ce01 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -292,6 +292,7 @@ prof_boot0 prof_boot1 prof_boot2 prof_bt_count +prof_dump_header prof_dump_open prof_free prof_free_sampled_object diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index 3872c7ae..91c871de 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -151,22 +151,23 @@ struct prof_gctx_s { }; typedef rb_tree(prof_gctx_t) prof_gctx_tree_t; -typedef enum { - prof_tdata_state_attached, /* Active thread attached, data valid. */ - prof_tdata_state_detached, /* Defunct thread, data remain valid. */ - prof_tdata_state_expired /* Predates reset, omit data from dump. */ -} prof_tdata_state_t; - struct prof_tdata_s { malloc_mutex_t *lock; /* Monotonically increasing unique thread identifier. */ uint64_t thr_uid; + /* + * Monotonically increasing discriminator among tdata structures + * associated with the same thr_uid. + */ + uint64_t thr_discrim; + /* Included in heap profile dumps if non-NULL. */ char *thread_name; - prof_tdata_state_t state; + bool attached; + bool expired; rb_node(prof_tdata_t) tdata_link; @@ -257,9 +258,13 @@ void bt_init(prof_bt_t *bt, void **vec); void prof_backtrace(prof_bt_t *bt); prof_tctx_t *prof_lookup(tsd_t *tsd, prof_bt_t *bt); #ifdef JEMALLOC_JET +size_t prof_tdata_count(void); size_t prof_bt_count(void); +const prof_cnt_t *prof_cnt_all(void); typedef int (prof_dump_open_t)(bool, const char *); extern prof_dump_open_t *prof_dump_open; +typedef bool (prof_dump_header_t)(bool, const prof_cnt_t *); +extern prof_dump_header_t *prof_dump_header; #endif void prof_idump(void); bool prof_mdump(const char *filename); @@ -312,12 +317,11 @@ prof_tdata_get(tsd_t *tsd, bool create) if (unlikely(tdata == NULL)) { tdata = prof_tdata_init(tsd); tsd_prof_tdata_set(tsd, tdata); - } else if (unlikely(tdata->state == prof_tdata_state_expired)) { + } else if (unlikely(tdata->expired)) { tdata = prof_tdata_reinit(tsd, tdata); tsd_prof_tdata_set(tsd, tdata); } - assert(tdata == NULL || - tdata->state == prof_tdata_state_attached); + assert(tdata == NULL || tdata->attached); } return (tdata); diff --git a/src/prof.c b/src/prof.c index 9f10b533..0a96d85f 100644 --- a/src/prof.c +++ b/src/prof.c @@ -137,10 +137,18 @@ rb_gen(static UNUSED, gctx_tree_, prof_gctx_tree_t, prof_gctx_t, dump_link, JEMALLOC_INLINE_C int prof_tdata_comp(const prof_tdata_t *a, const prof_tdata_t *b) { + int ret; uint64_t a_uid = a->thr_uid; uint64_t b_uid = b->thr_uid; - return ((a_uid > b_uid) - (a_uid < b_uid)); + ret = ((a_uid > b_uid) - (a_uid < b_uid)); + if (ret == 0) { + uint64_t a_discrim = a->thr_discrim; + uint64_t b_discrim = b->thr_discrim; + + ret = ((a_discrim > b_discrim) - (a_discrim < b_discrim)); + } + return (ret); } rb_gen(static UNUSED, tdata_tree_, prof_tdata_tree_t, prof_tdata_t, tdata_link, @@ -504,7 +512,7 @@ prof_gctx_create(tsd_t *tsd, prof_bt_t *bt) gctx->lock = prof_gctx_mutex_choose(); /* * Set nlimbo to 1, in order to avoid a race condition with - * prof_tctx_destroy()/prof_gctx_maybe_destroy(). + * prof_tctx_destroy()/prof_gctx_try_destroy(). */ gctx->nlimbo = 1; tctx_tree_new(&gctx->tctxs); @@ -516,7 +524,7 @@ prof_gctx_create(tsd_t *tsd, prof_bt_t *bt) } static void -prof_gctx_maybe_destroy(tsd_t *tsd, prof_gctx_t *gctx, prof_tdata_t *tdata) +prof_gctx_try_destroy(tsd_t *tsd, prof_gctx_t *gctx, prof_tdata_t *tdata) { cassert(config_prof); @@ -530,6 +538,7 @@ prof_gctx_maybe_destroy(tsd_t *tsd, prof_gctx_t *gctx, prof_tdata_t *tdata) */ prof_enter(tdata); malloc_mutex_lock(gctx->lock); + assert(gctx->nlimbo != 0); if (tctx_tree_empty(&gctx->tctxs) && gctx->nlimbo == 1) { /* Remove gctx from bt2gctx. */ if (ckh_remove(tsd, &bt2gctx, &gctx->bt, NULL, NULL)) @@ -605,10 +614,10 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) * * 1) Sample an allocation associated with gctx. * 2) Deallocate the sampled object. - * 3) Successfully prof_gctx_maybe_destroy(gctx). + * 3) Successfully prof_gctx_try_destroy(gctx). * * The result would be that gctx no longer exists by the time - * this thread accesses it in prof_gctx_maybe_destroy(). + * this thread accesses it in prof_gctx_try_destroy(). */ gctx->nlimbo++; destroy_gctx = true; @@ -616,7 +625,7 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) destroy_gctx = false; malloc_mutex_unlock(gctx->lock); if (destroy_gctx) - prof_gctx_maybe_destroy(tsd, gctx, tdata); + prof_gctx_try_destroy(tsd, gctx, tdata); if (destroy_tdata) prof_tdata_destroy(tsd, tdata); @@ -657,7 +666,7 @@ prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata, } else { /* * Increment nlimbo, in order to avoid a race condition with - * prof_tctx_destroy()/prof_gctx_maybe_destroy(). + * prof_tctx_destroy()/prof_gctx_try_destroy(). */ malloc_mutex_lock(gctx.p->lock); gctx.p->nlimbo++; @@ -710,7 +719,7 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) ret.v = imalloc(tsd, sizeof(prof_tctx_t)); if (ret.p == NULL) { if (new_gctx) - prof_gctx_maybe_destroy(tsd, gctx, tdata); + prof_gctx_try_destroy(tsd, gctx, tdata); return (NULL); } ret.p->tdata = tdata; @@ -723,7 +732,7 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) malloc_mutex_unlock(tdata->lock); if (error) { if (new_gctx) - prof_gctx_maybe_destroy(tsd, gctx, tdata); + prof_gctx_try_destroy(tsd, gctx, tdata); idalloc(tsd, ret.v); return (NULL); } @@ -792,6 +801,31 @@ prof_sample_threshold_update(prof_tdata_t *tdata) #endif } +#ifdef JEMALLOC_JET +static prof_tdata_t * +prof_tdata_count_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, void *arg) +{ + size_t *tdata_count = (size_t *)arg; + + (*tdata_count)++; + + return (NULL); +} + +size_t +prof_tdata_count(void) +{ + size_t tdata_count = 0; + + malloc_mutex_lock(&tdatas_mtx); + tdata_tree_iter(&tdatas, NULL, prof_tdata_count_iter, + (void *)&tdata_count); + malloc_mutex_unlock(&tdatas_mtx); + + return (tdata_count); +} +#endif + #ifdef JEMALLOC_JET size_t prof_bt_count(void) @@ -998,7 +1032,6 @@ static prof_tctx_t * prof_tctx_finish_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) { prof_tctx_t *ret; - tsd_t *tsd = (tsd_t *)arg; switch (tctx->state) { case prof_tctx_state_nominal: @@ -1008,9 +1041,7 @@ prof_tctx_finish_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) tctx->state = prof_tctx_state_nominal; break; case prof_tctx_state_purgatory: - ret = tctx_tree_next(tctxs, tctx); - tctx_tree_remove(tctxs, tctx); - idalloc(tsd, tctx); + ret = tctx; goto label_return; default: not_reached(); @@ -1056,27 +1087,47 @@ prof_gctx_merge_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *arg) return (NULL); } -static prof_gctx_t * -prof_gctx_finish_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *arg) +static void +prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs) { - tsd_t *tsd = (tsd_t *)arg; prof_tdata_t *tdata = prof_tdata_get(tsd, false); - prof_tctx_t *next; - bool destroy_gctx; + prof_gctx_t *gctx; - malloc_mutex_lock(gctx->lock); - next = NULL; - do { - next = tctx_tree_iter(&gctx->tctxs, next, prof_tctx_finish_iter, - tsd); - } while (next != NULL); - gctx->nlimbo--; - destroy_gctx = prof_gctx_should_destroy(gctx); - malloc_mutex_unlock(gctx->lock); - if (destroy_gctx) - prof_gctx_maybe_destroy(tsd, gctx, tdata); + /* + * Standard tree iteration won't work here, because as soon as we + * decrement gctx->nlimbo and unlock gctx, another thread can + * concurrently destroy it, which will corrupt the tree. Therefore, + * tear down the tree one node at a time during iteration. + */ + while ((gctx = gctx_tree_first(gctxs)) != NULL) { + gctx_tree_remove(gctxs, gctx); + malloc_mutex_lock(gctx->lock); + { + prof_tctx_t *next; - return (NULL); + next = NULL; + do { + prof_tctx_t *to_destroy = + tctx_tree_iter(&gctx->tctxs, next, + prof_tctx_finish_iter, NULL); + if (to_destroy != NULL) { + next = tctx_tree_next(&gctx->tctxs, + to_destroy); + tctx_tree_remove(&gctx->tctxs, + to_destroy); + idalloc(tsd, to_destroy); + } else + next = NULL; + } while (next != NULL); + } + gctx->nlimbo--; + if (prof_gctx_should_destroy(gctx)) { + gctx->nlimbo++; + malloc_mutex_unlock(gctx->lock); + prof_gctx_try_destroy(tsd, gctx, tdata); + } else + malloc_mutex_unlock(gctx->lock); + } } static prof_tdata_t * @@ -1085,7 +1136,7 @@ prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, void *arg) prof_cnt_t *cnt_all = (prof_cnt_t *)arg; malloc_mutex_lock(tdata->lock); - if (tdata->state != prof_tdata_state_expired) { + if (!tdata->expired) { size_t tabind; union { prof_tctx_t *p; @@ -1130,6 +1181,10 @@ prof_tdata_dump_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, void *arg) return (NULL); } +#ifdef JEMALLOC_JET +#undef prof_dump_header +#define prof_dump_header JEMALLOC_N(prof_dump_header_impl) +#endif static bool prof_dump_header(bool propagate_err, const prof_cnt_t *cnt_all) { @@ -1148,6 +1203,11 @@ prof_dump_header(bool propagate_err, const prof_cnt_t *cnt_all) malloc_mutex_unlock(&tdatas_mtx); return (ret); } +#ifdef JEMALLOC_JET +#undef prof_dump_header +#define prof_dump_header JEMALLOC_N(prof_dump_header) +prof_dump_header_t *prof_dump_header = JEMALLOC_N(prof_dump_header_impl); +#endif /* gctx->lock is held. */ static bool @@ -1277,7 +1337,7 @@ prof_gctx_dump_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *arg) malloc_mutex_lock(gctx->lock); if (prof_dump_gctx(propagate_err, gctx, &gctx->bt, gctxs)) { - ret = gctx_tree_next(gctxs, gctx); + ret = gctx; goto label_return; } @@ -1302,7 +1362,7 @@ prof_dump(tsd_t *tsd, bool propagate_err, const char *filename, bool leakcheck) cassert(config_prof); - tdata = prof_tdata_get(tsd, false); + tdata = prof_tdata_get(tsd, true); if (tdata == NULL) return (true); @@ -1352,7 +1412,7 @@ prof_dump(tsd_t *tsd, bool propagate_err, const char *filename, bool leakcheck) if (prof_dump_close(propagate_err)) goto label_open_close_error; - gctx_tree_iter(&gctxs, NULL, prof_gctx_finish_iter, tsd); + prof_gctx_finish(tsd, &gctxs); malloc_mutex_unlock(&prof_dump_mtx); if (leakcheck) @@ -1362,7 +1422,7 @@ prof_dump(tsd_t *tsd, bool propagate_err, const char *filename, bool leakcheck) label_write_error: prof_dump_close(propagate_err); label_open_close_error: - gctx_tree_iter(&gctxs, NULL, prof_gctx_finish_iter, tsd); + prof_gctx_finish(tsd, &gctxs); malloc_mutex_unlock(&prof_dump_mtx); return (true); } @@ -1533,7 +1593,7 @@ prof_thr_uid_alloc(void) } static prof_tdata_t * -prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid) +prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim) { prof_tdata_t *tdata; @@ -1546,8 +1606,10 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid) tdata->lock = prof_tdata_mutex_choose(thr_uid); tdata->thr_uid = thr_uid; + tdata->thr_discrim = thr_discrim; tdata->thread_name = NULL; - tdata->state = prof_tdata_state_attached; + tdata->attached = true; + tdata->expired = false; if (ckh_new(tsd, &tdata->bt2tctx, PROF_CKH_MINITEMS, prof_bt_hash, prof_bt_keycomp)) { @@ -1576,14 +1638,7 @@ prof_tdata_t * prof_tdata_init(tsd_t *tsd) { - return (prof_tdata_init_impl(tsd, prof_thr_uid_alloc())); -} - -prof_tdata_t * -prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata) -{ - - return (prof_tdata_init_impl(tsd, tdata->thr_uid)); + return (prof_tdata_init_impl(tsd, prof_thr_uid_alloc(), 0)); } /* tdata->lock must be held. */ @@ -1591,22 +1646,21 @@ static bool prof_tdata_should_destroy(prof_tdata_t *tdata) { - if (tdata->state == prof_tdata_state_attached) + if (tdata->attached) return (false); if (ckh_count(&tdata->bt2tctx) != 0) return (false); return (true); } +/* tdatas_mtx must be held. */ static void -prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata) +prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata) { assert(prof_tdata_should_destroy(tdata)); - malloc_mutex_lock(&tdatas_mtx); tdata_tree_remove(&tdatas, tdata); - malloc_mutex_unlock(&tdatas_mtx); if (tdata->thread_name != NULL) idalloc(tsd, tdata->thread_name); @@ -1615,14 +1669,22 @@ prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata) } static void -prof_tdata_state_transition(tsd_t *tsd, prof_tdata_t *tdata, - prof_tdata_state_t state) +prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata) +{ + + malloc_mutex_lock(&tdatas_mtx); + prof_tdata_destroy_locked(tsd, tdata); + malloc_mutex_unlock(&tdatas_mtx); +} + +static void +prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata) { bool destroy_tdata; malloc_mutex_lock(tdata->lock); - if (tdata->state != state) { - tdata->state = state; + if (tdata->attached) { + tdata->attached = false; destroy_tdata = prof_tdata_should_destroy(tdata); } else destroy_tdata = false; @@ -1631,32 +1693,44 @@ prof_tdata_state_transition(tsd_t *tsd, prof_tdata_t *tdata, prof_tdata_destroy(tsd, tdata); } -static void -prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata) +prof_tdata_t * +prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata) { + uint64_t thr_uid = tdata->thr_uid; + uint64_t thr_discrim = tdata->thr_discrim + 1; - prof_tdata_state_transition(tsd, tdata, prof_tdata_state_detached); + prof_tdata_detach(tsd, tdata); + return (prof_tdata_init_impl(tsd, thr_uid, thr_discrim)); } -static void -prof_tdata_expire(tsd_t *tsd, prof_tdata_t *tdata) +static bool +prof_tdata_expire(prof_tdata_t *tdata) { + bool destroy_tdata; - prof_tdata_state_transition(tsd, tdata, prof_tdata_state_expired); + malloc_mutex_lock(tdata->lock); + if (!tdata->expired) { + tdata->expired = true; + destroy_tdata = tdata->attached ? false : + prof_tdata_should_destroy(tdata); + } else + destroy_tdata = false; + malloc_mutex_unlock(tdata->lock); + + return (destroy_tdata); } static prof_tdata_t * prof_tdata_reset_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, void *arg) { - tsd_t *tsd = (tsd_t *)arg; - prof_tdata_expire(tsd, tdata); - return (NULL); + return (prof_tdata_expire(tdata) ? tdata : NULL); } void prof_reset(tsd_t *tsd, size_t lg_sample) { + prof_tdata_t *next; assert(lg_sample < (sizeof(uint64_t) << 3)); @@ -1664,7 +1738,18 @@ prof_reset(tsd_t *tsd, size_t lg_sample) malloc_mutex_lock(&tdatas_mtx); lg_prof_sample = lg_sample; - tdata_tree_iter(&tdatas, NULL, prof_tdata_reset_iter, tsd); + + next = NULL; + do { + prof_tdata_t *to_destroy = tdata_tree_iter(&tdatas, next, + prof_tdata_reset_iter, NULL); + if (to_destroy != NULL) { + next = tdata_tree_next(&tdatas, to_destroy); + tdata_tree_remove(&tdatas, to_destroy); + prof_tdata_destroy(tsd, to_destroy); + } else + next = NULL; + } while (next != NULL); malloc_mutex_unlock(&tdatas_mtx); malloc_mutex_unlock(&prof_dump_mtx); diff --git a/test/unit/prof_reset.c b/test/unit/prof_reset.c new file mode 100644 index 00000000..73fda419 --- /dev/null +++ b/test/unit/prof_reset.c @@ -0,0 +1,238 @@ +#include "test/jemalloc_test.h" + +#ifdef JEMALLOC_PROF +const char *malloc_conf = + "prof:true,prof_active:false,lg_prof_sample:0"; +#endif + +static int +prof_dump_open_intercept(bool propagate_err, const char *filename) +{ + int fd; + + fd = open("/dev/null", O_WRONLY); + assert_d_ne(fd, -1, "Unexpected open() failure"); + + return (fd); +} + +TEST_BEGIN(test_prof_reset_basic) +{ + size_t lg_prof_sample_orig, lg_prof_sample, lg_prof_sample_next; + size_t sz; + unsigned i; + + sz = sizeof(size_t); + assert_d_eq(mallctl("opt.lg_prof_sample", &lg_prof_sample_orig, &sz, + NULL, 0), 0, + "Unexpected mallctl failure while reading profiling sample rate"); + assert_zu_eq(lg_prof_sample_orig, 0, + "Unexpected profiling sample rate"); + sz = sizeof(size_t); + assert_d_eq(mallctl("prof.lg_sample", &lg_prof_sample, &sz, NULL, 0), 0, + "Unexpected mallctl failure while reading profiling sample rate"); + assert_zu_eq(lg_prof_sample_orig, lg_prof_sample, + "Unexpected disagreement between \"opt.lg_prof_sample\" and " + "\"prof.lg_sample\""); + + /* Test simple resets. */ + for (i = 0; i < 2; i++) { + assert_d_eq(mallctl("prof.reset", NULL, NULL, NULL, 0), 0, + "Unexpected mallctl failure while resetting profile data"); + sz = sizeof(size_t); + assert_d_eq(mallctl("prof.lg_sample", &lg_prof_sample, &sz, + NULL, 0), 0, "Unexpected mallctl failure while reading " + "profiling sample rate"); + assert_zu_eq(lg_prof_sample_orig, lg_prof_sample, + "Unexpected profile sample rate change"); + } + + /* Test resets with prof.lg_sample changes. */ + lg_prof_sample_next = 1; + for (i = 0; i < 2; i++) { + assert_d_eq(mallctl("prof.reset", NULL, NULL, + &lg_prof_sample_next, sizeof(size_t)), 0, + "Unexpected mallctl failure while resetting profile data"); + sz = sizeof(size_t); + assert_d_eq(mallctl("prof.lg_sample", &lg_prof_sample, &sz, + NULL, 0), 0, "Unexpected mallctl failure while reading " + "profiling sample rate"); + assert_zu_eq(lg_prof_sample, lg_prof_sample_next, + "Expected profile sample rate change"); + lg_prof_sample_next = lg_prof_sample_orig; + } + + /* Make sure the test code restored prof.lg_sample. */ + sz = sizeof(size_t); + assert_d_eq(mallctl("prof.lg_sample", &lg_prof_sample, &sz, NULL, 0), 0, + "Unexpected mallctl failure while reading profiling sample rate"); + assert_zu_eq(lg_prof_sample_orig, lg_prof_sample, + "Unexpected disagreement between \"opt.lg_prof_sample\" and " + "\"prof.lg_sample\""); +} +TEST_END + +bool prof_dump_header_intercepted = false; +prof_cnt_t cnt_all_copy = {0, 0, 0, 0}; +static bool +prof_dump_header_intercept(bool propagate_err, const prof_cnt_t *cnt_all) +{ + + prof_dump_header_intercepted = true; + memcpy(&cnt_all_copy, cnt_all, sizeof(prof_cnt_t)); + + return (false); +} + +TEST_BEGIN(test_prof_reset_cleanup) +{ + bool active; + void *p; + prof_dump_header_t *prof_dump_header_orig; + + active = true; + assert_d_eq(mallctl("prof.active", NULL, NULL, &active, sizeof(active)), + 0, "Unexpected mallctl failure while activating profiling"); + + // XXX Verify that reset actually drops backtrace count to 0. Alloc an + // object, reset, check bt count, free. prof_bt_count() doesn't do the + // right thing; we need to iterate during dump and count backtraces. + // Or, just intercept prof_dump_header(), which has enough information + // for these purposes. + + assert_zu_eq(prof_bt_count(), 0, "Expected 0 backtraces"); + p = mallocx(1, 0); + assert_ptr_not_null(p, "Unexpected mallocx() failure"); + assert_zu_eq(prof_bt_count(), 1, "Expected 1 backtrace"); + + prof_dump_header_orig = prof_dump_header; + prof_dump_header = prof_dump_header_intercept; + assert_false(prof_dump_header_intercepted, "Unexpected intercept"); + + assert_d_eq(mallctl("prof.dump", NULL, NULL, NULL, 0), + 0, "Unexpected error while dumping heap profile"); + assert_true(prof_dump_header_intercepted, "Expected intercept"); + assert_u64_eq(cnt_all_copy.curobjs, 1, "Expected 1 allocation"); + + assert_d_eq(mallctl("prof.reset", NULL, NULL, NULL, 0), 0, + "Unexpected error while resetting heap profile data"); + assert_d_eq(mallctl("prof.dump", NULL, NULL, NULL, 0), + 0, "Unexpected error while dumping heap profile"); + assert_u64_eq(cnt_all_copy.curobjs, 0, "Expected 0 allocations"); + assert_zu_eq(prof_bt_count(), 1, "Expected 1 backtrace"); + + prof_dump_header = prof_dump_header_orig; + + dallocx(p, 0); + assert_zu_eq(prof_bt_count(), 0, "Expected 0 backtraces"); + + active = false; + assert_d_eq(mallctl("prof.active", NULL, NULL, &active, sizeof(active)), + 0, "Unexpected mallctl failure while deactivating profiling"); +} +TEST_END + +#define NTHREADS 4 +#define NALLOCS_PER_THREAD (1U << 13) +#define OBJ_RING_BUF_COUNT 1531 +#define RESET_INTERVAL (1U << 10) +#define DUMP_INTERVAL 3677 +static void * +thd_start(void *varg) +{ + unsigned thd_ind = *(unsigned *)varg; + unsigned i; + void *objs[OBJ_RING_BUF_COUNT]; + + memset(objs, 0, sizeof(objs)); + + for (i = 0; i < NALLOCS_PER_THREAD; i++) { + if (i % RESET_INTERVAL == 0) { + assert_d_eq(mallctl("prof.reset", NULL, NULL, NULL, 0), + 0, "Unexpected error while resetting heap profile " + "data"); + } + + if (i % DUMP_INTERVAL == 0) { + assert_d_eq(mallctl("prof.dump", NULL, NULL, NULL, 0), + 0, "Unexpected error while dumping heap profile"); + } + + { + void **pp = &objs[i % OBJ_RING_BUF_COUNT]; + if (*pp != NULL) { + dallocx(*pp, 0); + *pp = NULL; + } + *pp = btalloc(1, thd_ind*NALLOCS_PER_THREAD + i); + assert_ptr_not_null(*pp, + "Unexpected btalloc() failure"); + } + } + + /* Clean up any remaining objects. */ + for (i = 0; i < OBJ_RING_BUF_COUNT; i++) { + void **pp = &objs[i % OBJ_RING_BUF_COUNT]; + if (*pp != NULL) { + dallocx(*pp, 0); + *pp = NULL; + } + } + + return (NULL); +} + +TEST_BEGIN(test_prof_reset) +{ + bool active; + thd_t thds[NTHREADS]; + unsigned thd_args[NTHREADS]; + unsigned i; + size_t bt_count, tdata_count; + + test_skip_if(!config_prof); + + bt_count = prof_bt_count(); + assert_zu_eq(bt_count, 0, + "Unexpected pre-existing tdata structures"); + tdata_count = prof_tdata_count(); + + active = true; + assert_d_eq(mallctl("prof.active", NULL, NULL, &active, sizeof(active)), + 0, "Unexpected mallctl failure while activating profiling"); + + for (i = 0; i < NTHREADS; i++) { + thd_args[i] = i; + thd_create(&thds[i], thd_start, (void *)&thd_args[i]); + } + for (i = 0; i < NTHREADS; i++) + thd_join(thds[i], NULL); + + assert_zu_eq(prof_bt_count(), bt_count, + "Unexpected bactrace count change"); + assert_zu_eq(prof_tdata_count(), tdata_count, + "Unexpected remaining tdata structures"); + + active = false; + assert_d_eq(mallctl("prof.active", NULL, NULL, &active, sizeof(active)), + 0, "Unexpected mallctl failure while deactivating profiling"); +} +TEST_END +#undef NTHREADS +#undef NALLOCS_PER_THREAD +#undef OBJ_RING_BUF_COUNT +#undef RESET_INTERVAL +#undef DUMP_INTERVAL + +int +main(void) +{ + + /* Intercept dumping prior to running any tests. */ + prof_dump_open = prof_dump_open_intercept; + + return (test( + test_prof_reset_basic, + test_prof_reset_cleanup, + test_prof_reset)); +} From ebbd0c91f0935421c04d05c8bdc6e38762a1e561 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 2 Oct 2014 23:05:23 -0700 Subject: [PATCH 0542/3378] Remove obsolete comment. --- test/unit/prof_reset.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/test/unit/prof_reset.c b/test/unit/prof_reset.c index 73fda419..62a4d5af 100644 --- a/test/unit/prof_reset.c +++ b/test/unit/prof_reset.c @@ -94,12 +94,6 @@ TEST_BEGIN(test_prof_reset_cleanup) assert_d_eq(mallctl("prof.active", NULL, NULL, &active, sizeof(active)), 0, "Unexpected mallctl failure while activating profiling"); - // XXX Verify that reset actually drops backtrace count to 0. Alloc an - // object, reset, check bt count, free. prof_bt_count() doesn't do the - // right thing; we need to iterate during dump and count backtraces. - // Or, just intercept prof_dump_header(), which has enough information - // for these purposes. - assert_zu_eq(prof_bt_count(), 0, "Expected 0 backtraces"); p = mallocx(1, 0); assert_ptr_not_null(p, "Unexpected mallocx() failure"); From 551ebc43647521bdd0bc78558b106762b3388928 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 3 Oct 2014 10:16:09 -0700 Subject: [PATCH 0543/3378] Convert to uniform style: cond == false --> !cond --- include/jemalloc/internal/arena.h | 11 +++--- include/jemalloc/internal/bitmap.h | 8 ++-- .../jemalloc/internal/jemalloc_internal.h.in | 2 +- include/jemalloc/internal/prof.h | 2 +- include/jemalloc/internal/rb.h | 7 ++-- include/jemalloc/internal/tcache.h | 8 ++-- src/arena.c | 28 +++++++------- src/chunk.c | 16 ++++---- src/chunk_dss.c | 4 +- src/chunk_mmap.c | 4 +- src/ckh.c | 10 ++--- src/ctl.c | 37 +++++++++---------- src/huge.c | 8 ++-- src/jemalloc.c | 22 +++++------ src/prof.c | 30 +++++++-------- src/stats.c | 2 +- src/tcache.c | 8 ++-- src/util.c | 12 +++--- test/unit/ckh.c | 3 +- test/unit/rb.c | 4 +- 20 files changed, 111 insertions(+), 115 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 48fd2055..2e9920ce 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -1111,13 +1111,12 @@ arena_salloc(const void *ptr, bool demote) pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; assert(arena_mapbits_allocated_get(chunk, pageind) != 0); binind = arena_mapbits_binind_get(chunk, pageind); - if (unlikely(binind == BININD_INVALID || (config_prof && demote == false - && arena_mapbits_large_get(chunk, pageind) != 0))) { + if (unlikely(binind == BININD_INVALID || (config_prof && !demote && + arena_mapbits_large_get(chunk, pageind) != 0))) { /* - * Large allocation. In the common case (demote == true), and - * as this is an inline function, most callers will only end up - * looking at binind to determine that ptr is a small - * allocation. + * Large allocation. In the common case (demote), and as this + * is an inline function, most callers will only end up looking + * at binind to determine that ptr is a small allocation. */ assert(((uintptr_t)ptr & PAGE_MASK) == 0); ret = arena_mapbits_large_size_get(chunk, pageind); diff --git a/include/jemalloc/internal/bitmap.h b/include/jemalloc/internal/bitmap.h index 4ca40ffd..fcc6005c 100644 --- a/include/jemalloc/internal/bitmap.h +++ b/include/jemalloc/internal/bitmap.h @@ -139,7 +139,7 @@ bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) bitmap_t g; assert(bit < binfo->nbits); - assert(bitmap_get(bitmap, binfo, bit) == false); + assert(!bitmap_get(bitmap, binfo, bit)); goff = bit >> LG_BITMAP_GROUP_NBITS; gp = &bitmap[goff]; g = *gp; @@ -172,7 +172,7 @@ bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo) bitmap_t g; unsigned i; - assert(bitmap_full(bitmap, binfo) == false); + assert(!bitmap_full(bitmap, binfo)); i = binfo->nlevels - 1; g = bitmap[binfo->levels[i].group_offset]; @@ -204,7 +204,7 @@ bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) assert((g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))) == 0); g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK); *gp = g; - assert(bitmap_get(bitmap, binfo, bit) == false); + assert(!bitmap_get(bitmap, binfo, bit)); /* Propagate group state transitions up the tree. */ if (propagate) { unsigned i; @@ -218,7 +218,7 @@ bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) == 0); g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK); *gp = g; - if (propagate == false) + if (!propagate) break; } } diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index bff2bd27..ed25172f 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -714,7 +714,7 @@ isalloc(const void *ptr, bool demote) assert(ptr != NULL); /* Demotion only makes sense if config_prof is true. */ - assert(config_prof || demote == false); + assert(config_prof || !demote); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk != ptr) diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index 91c871de..ea52a63b 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -388,7 +388,7 @@ prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update, /* Compute new sample threshold. */ if (update) prof_sample_threshold_update(tdata); - return (tdata->active == false); + return (!tdata->active); } } diff --git a/include/jemalloc/internal/rb.h b/include/jemalloc/internal/rb.h index ffe3bb0d..64fab89c 100644 --- a/include/jemalloc/internal/rb.h +++ b/include/jemalloc/internal/rb.h @@ -593,7 +593,7 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ if (left != &rbtree->rbt_nil) { \ /* node has no successor, but it has a left child. */\ /* Splice node out, without losing the left child. */\ - assert(rbtn_red_get(a_type, a_field, node) == false); \ + assert(!rbtn_red_get(a_type, a_field, node)); \ assert(rbtn_red_get(a_type, a_field, left)); \ rbtn_black_set(a_type, a_field, left); \ if (pathp == path) { \ @@ -629,8 +629,7 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ if (pathp->cmp < 0) { \ rbtn_left_set(a_type, a_field, pathp->node, \ pathp[1].node); \ - assert(rbtn_red_get(a_type, a_field, pathp[1].node) \ - == false); \ + assert(!rbtn_red_get(a_type, a_field, pathp[1].node)); \ if (rbtn_red_get(a_type, a_field, pathp->node)) { \ a_type *right = rbtn_right_get(a_type, a_field, \ pathp->node); \ @@ -862,7 +861,7 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ } \ /* Set root. */ \ rbtree->rbt_root = path->node; \ - assert(rbtn_red_get(a_type, a_field, rbtree->rbt_root) == false); \ + assert(!rbtn_red_get(a_type, a_field, rbtree->rbt_root)); \ } \ a_attr a_type * \ a_prefix##iter_recurse(a_rbt_type *rbtree, a_type *node, \ diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index 6804668f..bc0b41c7 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -191,9 +191,9 @@ tcache_get(tsd_t *tsd, bool create) { tcache_t *tcache; - if (config_tcache == false) + if (!config_tcache) return (NULL); - if (config_lazy_lock && isthreaded == false) + if (config_lazy_lock && !isthreaded) return (NULL); /* * If create is true, the caller has already assured that tsd is @@ -261,7 +261,7 @@ tcache_alloc_small(tcache_t *tcache, size_t size, bool zero) } assert(tcache_salloc(ret) == size); - if (likely(zero == false)) { + if (likely(!zero)) { if (config_fill) { if (unlikely(opt_junk)) { arena_alloc_junk_small(ret, @@ -315,7 +315,7 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) arena_mapbits_large_binind_set(chunk, pageind, BININD_INVALID); } - if (likely(zero == false)) { + if (likely(!zero)) { if (config_fill) { if (unlikely(opt_junk)) memset(ret, 0xa5, size); diff --git a/src/arena.c b/src/arena.c index ef391b16..79fea728 100644 --- a/src/arena.c +++ b/src/arena.c @@ -178,7 +178,7 @@ arena_run_reg_alloc(arena_run_t *run, arena_bin_info_t *bin_info) void *rpages; assert(run->nfree > 0); - assert(bitmap_full(run->bitmap, &bin_info->bitmap_info) == false); + assert(!bitmap_full(run->bitmap, &bin_info->bitmap_info)); regind = bitmap_sfu(run->bitmap, &bin_info->bitmap_info); miscelm = arena_run_to_miscelm(run); @@ -524,7 +524,7 @@ arena_chunk_init_hard(arena_t *arena) * There is no need to initialize the internal page map entries unless * the chunk is not zeroed. */ - if (zero == false) { + if (!zero) { JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED( (void *)arena_bitselm_get(chunk, map_bias+1), (size_t)((uintptr_t) arena_bitselm_get(chunk, @@ -782,7 +782,7 @@ arena_compute_npurge(arena_t *arena, bool all) * Compute the minimum number of pages that this thread should try to * purge. */ - if (all == false) { + if (!all) { size_t threshold = (arena->nactive >> opt_lg_dirty_mult); npurge = arena->ndirty - threshold; @@ -829,7 +829,7 @@ arena_stash_dirty(arena_t *arena, bool all, size_t npurge, nstashed += npages; - if (all == false && nstashed >= npurge) + if (!all && nstashed >= npurge) break; } @@ -1049,7 +1049,7 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned) */ assert(arena_mapbits_dirty_get(chunk, run_ind) == arena_mapbits_dirty_get(chunk, run_ind+run_pages-1)); - if (cleaned == false && arena_mapbits_dirty_get(chunk, run_ind) != 0) + if (!cleaned && arena_mapbits_dirty_get(chunk, run_ind) != 0) dirty = true; flag_dirty = dirty ? CHUNK_MAP_DIRTY : 0; @@ -1481,10 +1481,10 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero) bin->stats.nrequests++; } malloc_mutex_unlock(&bin->lock); - if (config_prof && isthreaded == false && arena_prof_accum(arena, size)) + if (config_prof && !isthreaded && arena_prof_accum(arena, size)) prof_idump(); - if (zero == false) { + if (!zero) { if (config_fill) { if (unlikely(opt_junk)) { arena_alloc_junk_small(ret, @@ -1537,7 +1537,7 @@ arena_malloc_large(arena_t *arena, size_t size, bool zero) if (config_prof && idump) prof_idump(); - if (zero == false) { + if (!zero) { if (config_fill) { if (unlikely(opt_junk)) memset(ret, 0xa5, size); @@ -1608,7 +1608,7 @@ arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero) } malloc_mutex_unlock(&arena->lock); - if (config_fill && zero == false) { + if (config_fill && !zero) { if (unlikely(opt_junk)) memset(ret, 0xa5, size); else if (unlikely(opt_zero)) @@ -2008,7 +2008,7 @@ arena_ralloc_large(void *ptr, size_t oldsize, size_t size, size_t extra, bool ret = arena_ralloc_large_grow(arena, chunk, ptr, oldsize, PAGE_CEILING(size), psize - PAGE_CEILING(size), zero); - if (config_fill && ret == false && zero == false) { + if (config_fill && !ret && !zero) { if (unlikely(opt_junk)) { memset((void *)((uintptr_t)ptr + oldsize), 0xa5, isalloc(ptr, @@ -2044,8 +2044,8 @@ arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, } else { assert(size <= arena_maxclass); if (size + extra > SMALL_MAXCLASS) { - if (arena_ralloc_large(ptr, oldsize, size, - extra, zero) == false) + if (!arena_ralloc_large(ptr, oldsize, size, + extra, zero)) return (false); } } @@ -2064,7 +2064,7 @@ arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size, size_t copysize; /* Try to avoid moving the allocation. */ - if (arena_ralloc_no_move(ptr, oldsize, size, extra, zero) == false) + if (!arena_ralloc_no_move(ptr, oldsize, size, extra, zero)) return (ptr); /* @@ -2130,7 +2130,7 @@ bool arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec) { - if (have_dss == false) + if (!have_dss) return (dss_prec != dss_prec_disabled); malloc_mutex_lock(&arena->lock); arena->dss_prec = dss_prec; diff --git a/src/chunk.c b/src/chunk.c index 874002cf..cde8606e 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -121,7 +121,7 @@ chunk_recycle(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, size_t size, if (node != NULL) base_node_dalloc(node); if (*zero) { - if (zeroed == false) + if (!zeroed) memset(ret, 0, size); else if (config_debug) { size_t i; @@ -136,10 +136,10 @@ chunk_recycle(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, size_t size, } /* - * If the caller specifies (*zero == false), it is still possible to receive - * zeroed memory, in which case *zero is toggled to true. arena_chunk_alloc() - * takes advantage of this to avoid demanding zeroed chunks, but taking - * advantage of them if they are returned. + * If the caller specifies (!*zero), it is still possible to receive zeroed + * memory, in which case *zero is toggled to true. arena_chunk_alloc() takes + * advantage of this to avoid demanding zeroed chunks, but taking advantage of + * them if they are returned. */ static void * chunk_alloc_core(size_t size, size_t alignment, bool base, bool *zero, @@ -186,7 +186,7 @@ chunk_register(void *chunk, size_t size, bool base) assert(chunk != NULL); assert(CHUNK_ADDR2BASE(chunk) == chunk); - if (config_ivsalloc && base == false) { + if (config_ivsalloc && !base) { if (rtree_set(chunks_rtree, (uintptr_t)chunk, 1)) return (true); } @@ -288,7 +288,7 @@ chunk_record(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, void *chunk, extent_tree_szad_remove(chunks_szad, node); node->addr = chunk; node->size += size; - node->zeroed = (node->zeroed && (unzeroed == false)); + node->zeroed = (node->zeroed && !unzeroed); extent_tree_szad_insert(chunks_szad, node); } else { /* Coalescing forward failed, so insert a new node. */ @@ -305,7 +305,7 @@ chunk_record(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, void *chunk, xnode = NULL; /* Prevent deallocation below. */ node->addr = chunk; node->size = size; - node->zeroed = (unzeroed == false); + node->zeroed = !unzeroed; extent_tree_ad_insert(chunks_ad, node); extent_tree_szad_insert(chunks_szad, node); } diff --git a/src/chunk_dss.c b/src/chunk_dss.c index 82faf918..cce71041 100644 --- a/src/chunk_dss.c +++ b/src/chunk_dss.c @@ -45,7 +45,7 @@ chunk_dss_prec_get(void) { dss_prec_t ret; - if (have_dss == false) + if (!have_dss) return (dss_prec_disabled); malloc_mutex_lock(&dss_mtx); ret = dss_prec_default; @@ -57,7 +57,7 @@ bool chunk_dss_prec_set(dss_prec_t dss_prec) { - if (have_dss == false) + if (!have_dss) return (dss_prec != dss_prec_disabled); malloc_mutex_lock(&dss_mtx); dss_prec_default = dss_prec; diff --git a/src/chunk_mmap.c b/src/chunk_mmap.c index 65137b41..7e02c102 100644 --- a/src/chunk_mmap.c +++ b/src/chunk_mmap.c @@ -132,7 +132,7 @@ pages_purge(void *addr, size_t length) # error "No madvise(2) flag defined for purging unused dirty pages." # endif int err = madvise(addr, length, JEMALLOC_MADV_PURGE); - unzeroed = (JEMALLOC_MADV_ZEROS == false || err != 0); + unzeroed = (!JEMALLOC_MADV_ZEROS || err != 0); # undef JEMALLOC_MADV_PURGE # undef JEMALLOC_MADV_ZEROS #else @@ -209,5 +209,5 @@ chunk_dalloc_mmap(void *chunk, size_t size) if (config_munmap) pages_unmap(chunk, size); - return (config_munmap == false); + return (!config_munmap); } diff --git a/src/ckh.c b/src/ckh.c index 7c7cc098..3a545966 100644 --- a/src/ckh.c +++ b/src/ckh.c @@ -185,7 +185,7 @@ ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey, } bucket = tbucket; - if (ckh_try_bucket_insert(ckh, bucket, key, data) == false) + if (!ckh_try_bucket_insert(ckh, bucket, key, data)) return (false); } } @@ -201,12 +201,12 @@ ckh_try_insert(ckh_t *ckh, void const**argkey, void const**argdata) /* Try to insert in primary bucket. */ bucket = hashes[0] & ((ZU(1) << ckh->lg_curbuckets) - 1); - if (ckh_try_bucket_insert(ckh, bucket, key, data) == false) + if (!ckh_try_bucket_insert(ckh, bucket, key, data)) return (false); /* Try to insert in secondary bucket. */ bucket = hashes[1] & ((ZU(1) << ckh->lg_curbuckets) - 1); - if (ckh_try_bucket_insert(ckh, bucket, key, data) == false) + if (!ckh_try_bucket_insert(ckh, bucket, key, data)) return (false); /* @@ -281,7 +281,7 @@ ckh_grow(tsd_t *tsd, ckh_t *ckh) tab = ttab; ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS; - if (ckh_rebuild(ckh, tab) == false) { + if (!ckh_rebuild(ckh, tab)) { idalloc(tsd, tab); break; } @@ -327,7 +327,7 @@ ckh_shrink(tsd_t *tsd, ckh_t *ckh) tab = ttab; ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS; - if (ckh_rebuild(ckh, tab) == false) { + if (!ckh_rebuild(ckh, tab)) { idalloc(tsd, tab); #ifdef CKH_COUNT ckh->nshrinks++; diff --git a/src/ctl.c b/src/ctl.c index c55f6e44..b85710c0 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -36,8 +36,7 @@ static inline const ctl_indexed_node_t * ctl_indexed_node(const ctl_node_t *node) { - return ((node->named == false) ? (const ctl_indexed_node_t *)node : - NULL); + return (!node->named ? (const ctl_indexed_node_t *)node : NULL); } /******************************************************************************/ @@ -693,7 +692,7 @@ ctl_init(void) bool ret; malloc_mutex_lock(&ctl_mtx); - if (ctl_initialized == false) { + if (!ctl_initialized) { /* * Allocate space for one extra arena stats element, which * contains summed stats across all arenas. @@ -843,7 +842,7 @@ ctl_byname(const char *name, void *oldp, size_t *oldlenp, void *newp, size_t mib[CTL_MAX_DEPTH]; const ctl_named_node_t *node; - if (ctl_initialized == false && ctl_init()) { + if (!ctl_initialized && ctl_init()) { ret = EAGAIN; goto label_return; } @@ -870,7 +869,7 @@ ctl_nametomib(const char *name, size_t *mibp, size_t *miblenp) { int ret; - if (ctl_initialized == false && ctl_init()) { + if (!ctl_initialized && ctl_init()) { ret = EAGAIN; goto label_return; } @@ -888,7 +887,7 @@ ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, const ctl_named_node_t *node; size_t i; - if (ctl_initialized == false && ctl_init()) { + if (!ctl_initialized && ctl_init()) { ret = EAGAIN; goto label_return; } @@ -1015,7 +1014,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ int ret; \ t oldval; \ \ - if ((c) == false) \ + if (!(c)) \ return (ENOENT); \ if (l) \ malloc_mutex_lock(&ctl_mtx); \ @@ -1038,7 +1037,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ int ret; \ t oldval; \ \ - if ((c) == false) \ + if (!(c)) \ return (ENOENT); \ malloc_mutex_lock(&ctl_mtx); \ READONLY(); \ @@ -1082,7 +1081,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ int ret; \ t oldval; \ \ - if ((c) == false) \ + if (!(c)) \ return (ENOENT); \ READONLY(); \ oldval = (v); \ @@ -1119,7 +1118,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ t oldval; \ tsd_t *tsd; \ \ - if ((c) == false) \ + if (!(c)) \ return (ENOENT); \ READONLY(); \ tsd = tsd_tryget(); \ @@ -1291,7 +1290,7 @@ thread_tcache_enabled_ctl(const size_t *mib, size_t miblen, void *oldp, int ret; bool oldval; - if (config_tcache == false) + if (!config_tcache) return (ENOENT); oldval = tcache_enabled_get(); @@ -1315,7 +1314,7 @@ thread_tcache_flush_ctl(const size_t *mib, size_t miblen, void *oldp, { int ret; - if (config_tcache == false) + if (!config_tcache) return (ENOENT); READONLY(); @@ -1335,7 +1334,7 @@ thread_prof_name_ctl(const size_t *mib, size_t miblen, void *oldp, int ret; const char *oldname; - if (config_prof == false) + if (!config_prof) return (ENOENT); oldname = prof_thread_name_get(); @@ -1372,7 +1371,7 @@ thread_prof_active_ctl(const size_t *mib, size_t miblen, void *oldp, int ret; bool oldval; - if (config_prof == false) + if (!config_prof) return (ENOENT); oldval = prof_thread_active_get(); @@ -1459,7 +1458,7 @@ arena_i_dss_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, } } - if (match == false) { + if (!match) { ret = EINVAL; goto label_return; } @@ -1668,7 +1667,7 @@ prof_active_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, int ret; bool oldval; - if (config_prof == false) + if (!config_prof) return (ENOENT); malloc_mutex_lock(&ctl_mtx); /* Protect opt_prof_active. */ @@ -1697,7 +1696,7 @@ prof_dump_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, int ret; const char *filename = NULL; - if (config_prof == false) + if (!config_prof) return (ENOENT); WRITEONLY(); @@ -1721,7 +1720,7 @@ prof_reset_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, size_t lg_sample = lg_prof_sample; tsd_t *tsd; - if (config_prof == false) + if (!config_prof) return (ENOENT); WRITEONLY(); @@ -1847,7 +1846,7 @@ stats_arenas_i_index(const size_t *mib, size_t miblen, size_t i) const ctl_named_node_t * ret; malloc_mutex_lock(&ctl_mtx); - if (i > ctl_stats.narenas || ctl_stats.arenas[i].initialized == false) { + if (i > ctl_stats.narenas || !ctl_stats.arenas[i].initialized) { ret = NULL; goto label_return; } diff --git a/src/huge.c b/src/huge.c index 40d1362d..2f059b4d 100644 --- a/src/huge.c +++ b/src/huge.c @@ -62,10 +62,10 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment, extent_tree_ad_insert(&huge, node); malloc_mutex_unlock(&huge_mtx); - if (config_fill && zero == false) { + if (config_fill && !zero) { if (unlikely(opt_junk)) memset(ret, 0xa5, csize); - else if (unlikely(opt_zero) && is_zeroed == false) + else if (unlikely(opt_zero) && !is_zeroed) memset(ret, 0, csize); } @@ -85,7 +85,7 @@ huge_dalloc_junk(void *ptr, size_t usize) * Only bother junk filling if the chunk isn't about to be * unmapped. */ - if (config_munmap == false || (have_dss && chunk_in_dss(ptr))) + if (!config_munmap || (have_dss && chunk_in_dss(ptr))) memset(ptr, 0x5a, usize); } } @@ -156,7 +156,7 @@ huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size, size_t copysize; /* Try to avoid moving the allocation. */ - if (huge_ralloc_no_move(ptr, oldsize, size, extra) == false) + if (!huge_ralloc_no_move(ptr, oldsize, size, extra)) return (ptr); /* diff --git a/src/jemalloc.c b/src/jemalloc.c index 3012f558..0d041313 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -119,7 +119,7 @@ arenas_extend(unsigned ind) arena_t *ret; ret = (arena_t *)base_alloc(sizeof(arena_t)); - if (ret != NULL && arena_new(ret, ind) == false) { + if (ret != NULL && !arena_new(ret, ind)) { arenas[ind] = ret; return (ret); } @@ -326,7 +326,7 @@ malloc_conf_next(char const **opts_p, char const **k_p, size_t *klen_p, *k_p = opts; - for (accept = false; accept == false;) { + for (accept = false; !accept;) { switch (*opts) { case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': @@ -361,7 +361,7 @@ malloc_conf_next(char const **opts_p, char const **k_p, size_t *klen_p, } } - for (accept = false; accept == false;) { + for (accept = false; !accept;) { switch (*opts) { case ',': opts++; @@ -418,7 +418,7 @@ malloc_conf_init(void) in_valgrind = (RUNNING_ON_VALGRIND != 0) ? true : false; if (config_fill && unlikely(in_valgrind)) { opt_junk = false; - assert(opt_zero == false); + assert(!opt_zero); opt_quarantine = JEMALLOC_VALGRIND_QUARANTINE_DEFAULT; opt_redzone = true; } @@ -496,8 +496,8 @@ malloc_conf_init(void) opts = buf; } - while (*opts != '\0' && malloc_conf_next(&opts, &k, &klen, &v, - &vlen) == false) { + while (*opts != '\0' && !malloc_conf_next(&opts, &k, &klen, &v, + &vlen)) { #define CONF_MATCH(n) \ (sizeof(n)-1 == klen && strncmp(n, k, klen) == 0) #define CONF_HANDLE_BOOL(o, n, cont) \ @@ -607,7 +607,7 @@ malloc_conf_init(void) } } } - if (match == false) { + if (!match) { malloc_conf_error("Invalid conf value", k, klen, v, vlen); } @@ -697,13 +697,13 @@ malloc_init_hard(void) return (false); } #ifdef JEMALLOC_THREADED_INIT - if (malloc_initializer != NO_INITIALIZER && IS_INITIALIZER == false) { + if (malloc_initializer != NO_INITIALIZER && !IS_INITIALIZER) { /* Busy-wait until the initializing thread completes. */ do { malloc_mutex_unlock(&init_lock); CPU_SPINWAIT; malloc_mutex_lock(&init_lock); - } while (malloc_initialized == false); + } while (!malloc_initialized); malloc_mutex_unlock(&init_lock); return (false); } @@ -2011,7 +2011,7 @@ _malloc_prefork(void) unsigned i; #ifdef JEMALLOC_MUTEX_INIT_CB - if (malloc_initialized == false) + if (!malloc_initialized) return; #endif assert(malloc_initialized); @@ -2040,7 +2040,7 @@ _malloc_postfork(void) unsigned i; #ifdef JEMALLOC_MUTEX_INIT_CB - if (malloc_initialized == false) + if (!malloc_initialized) return; #endif assert(malloc_initialized); diff --git a/src/prof.c b/src/prof.c index 0a96d85f..29b4baaa 100644 --- a/src/prof.c +++ b/src/prof.c @@ -232,7 +232,7 @@ prof_enter(prof_tdata_t *tdata) cassert(config_prof); - assert(tdata->enq == false); + assert(!tdata->enq); tdata->enq = true; malloc_mutex_lock(&bt2gctx_mtx); @@ -578,7 +578,7 @@ prof_gctx_should_destroy(prof_gctx_t *gctx) if (opt_prof_accum) return (false); - if (tctx_tree_empty(&gctx->tctxs) == false) + if (!tctx_tree_empty(&gctx->tctxs)) return (false); if (gctx->nlimbo != 0) return (false); @@ -595,7 +595,7 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) assert(tctx->cnts.curobjs == 0); assert(tctx->cnts.curbytes == 0); - assert(opt_prof_accum == false); + assert(!opt_prof_accum); assert(tctx->cnts.accumobjs == 0); assert(tctx->cnts.accumbytes == 0); @@ -858,7 +858,7 @@ prof_dump_open(bool propagate_err, const char *filename) int fd; fd = creat(filename, 0644); - if (fd == -1 && propagate_err == false) { + if (fd == -1 && !propagate_err) { malloc_printf(": creat(\"%s\"), 0644) failed\n", filename); if (opt_abort) @@ -883,7 +883,7 @@ prof_dump_flush(bool propagate_err) err = write(prof_dump_fd, prof_dump_buf, prof_dump_buf_end); if (err == -1) { - if (propagate_err == false) { + if (!propagate_err) { malloc_write(": write() failed during heap " "profile flush\n"); if (opt_abort) @@ -1145,8 +1145,8 @@ prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, void *arg) tdata->dumping = true; memset(&tdata->cnt_summed, 0, sizeof(prof_cnt_t)); - for (tabind = 0; ckh_iter(&tdata->bt2tctx, &tabind, NULL, - &tctx.v) == false;) + for (tabind = 0; !ckh_iter(&tdata->bt2tctx, &tabind, NULL, + &tctx.v);) prof_tctx_merge_tdata(tctx.p, tdata); cnt_all->curobjs += tdata->cnt_summed.curobjs; @@ -1167,7 +1167,7 @@ prof_tdata_dump_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, void *arg) { bool propagate_err = *(bool *)arg; - if (tdata->dumping == false) + if (!tdata->dumping) return (NULL); if (prof_dump_printf(propagate_err, @@ -1220,7 +1220,7 @@ prof_dump_gctx(bool propagate_err, prof_gctx_t *gctx, const prof_bt_t *bt, cassert(config_prof); /* Avoid dumping such gctx's that have no useful data. */ - if ((opt_prof_accum == false && gctx->cnt_summed.curobjs == 0) || + if ((!opt_prof_accum && gctx->cnt_summed.curobjs == 0) || (opt_prof_accum && gctx->cnt_summed.accumobjs == 0)) { assert(gctx->cnt_summed.curobjs == 0); assert(gctx->cnt_summed.curbytes == 0); @@ -1374,7 +1374,7 @@ prof_dump(tsd_t *tsd, bool propagate_err, const char *filename, bool leakcheck) * summing. */ gctx_tree_new(&gctxs); - for (tabind = 0; ckh_iter(&bt2gctx, &tabind, NULL, &gctx.v) == false;) + for (tabind = 0; !ckh_iter(&bt2gctx, &tabind, NULL, &gctx.v);) prof_dump_gctx_prep(gctx.p, &gctxs); /* @@ -1457,7 +1457,7 @@ prof_fdump(void) cassert(config_prof); - if (prof_booted == false) + if (!prof_booted) return; if ((tsd = tsd_tryget()) == NULL) return; @@ -1479,7 +1479,7 @@ prof_idump(void) cassert(config_prof); - if (prof_booted == false) + if (!prof_booted) return; if ((tsd = tsd_tryget()) == NULL) return; @@ -1508,7 +1508,7 @@ prof_mdump(const char *filename) cassert(config_prof); - if (opt_prof == false || prof_booted == false) + if (!opt_prof || !prof_booted) return (true); if ((tsd = tsd_tryget()) == NULL) return (true); @@ -1535,7 +1535,7 @@ prof_gdump(void) cassert(config_prof); - if (prof_booted == false) + if (!prof_booted) return; if ((tsd = tsd_tryget()) == NULL) return; @@ -1855,7 +1855,7 @@ prof_boot1(void) * initialized, so this function must be executed early. */ - if (opt_prof_leak && opt_prof == false) { + if (opt_prof_leak && !opt_prof) { /* * Enable opt_prof, but in such a way that profiles are never * automatically dumped. diff --git a/src/stats.c b/src/stats.c index db34275e..aa095500 100644 --- a/src/stats.c +++ b/src/stats.c @@ -505,7 +505,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, ninitialized++; } - if (ninitialized > 1 || unmerged == false) { + if (ninitialized > 1 || !unmerged) { /* Print merged arena stats. */ malloc_cprintf(write_cb, cbopaque, "\nMerged arenas stats:\n"); diff --git a/src/tcache.c b/src/tcache.c index bb4c3cc0..6f3408cd 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -101,7 +101,7 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem, malloc_mutex_lock(&bin->lock); if (config_stats && arena == tcache->arena) { - assert(merged_stats == false); + assert(!merged_stats); merged_stats = true; bin->stats.nflushes++; bin->stats.nrequests += tbin->tstats.nrequests; @@ -132,7 +132,7 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem, } malloc_mutex_unlock(&bin->lock); } - if (config_stats && merged_stats == false) { + if (config_stats && !merged_stats) { /* * The flush loop didn't happen to flush to this thread's * arena, so the stats didn't get merged. Manually do so now. @@ -210,7 +210,7 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem, if (config_prof && idump) prof_idump(); } - if (config_stats && merged_stats == false) { + if (config_stats && !merged_stats) { /* * The flush loop didn't happen to flush to this thread's * arena, so the stats didn't get merged. Manually do so now. @@ -262,7 +262,7 @@ tcache_t * tcache_get_hard(tsd_t *tsd) { - if (tcache_enabled_get() == false) { + if (!tcache_enabled_get()) { tcache_enabled_set(false); /* Memoize. */ return (NULL); } diff --git a/src/util.c b/src/util.c index 1717f08e..bfd86af8 100644 --- a/src/util.c +++ b/src/util.c @@ -266,7 +266,7 @@ d2s(intmax_t x, char sign, char *s, size_t *slen_p) sign = '-'; switch (sign) { case '-': - if (neg == false) + if (!neg) break; /* Fall through. */ case ' ': @@ -329,7 +329,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) /* Left padding. */ \ size_t pad_len = (width == -1) ? 0 : ((slen < (size_t)width) ? \ (size_t)width - slen : 0); \ - if (left_justify == false && pad_len != 0) { \ + if (!left_justify && pad_len != 0) { \ size_t j; \ for (j = 0; j < pad_len; j++) \ APPEND_C(' '); \ @@ -406,19 +406,19 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) while (true) { switch (*f) { case '#': - assert(alt_form == false); + assert(!alt_form); alt_form = true; break; case '-': - assert(left_justify == false); + assert(!left_justify); left_justify = true; break; case ' ': - assert(plus_space == false); + assert(!plus_space); plus_space = true; break; case '+': - assert(plus_plus == false); + assert(!plus_plus); plus_plus = true; break; default: goto label_width; diff --git a/test/unit/ckh.c b/test/unit/ckh.c index 148b81e7..03b4f716 100644 --- a/test/unit/ckh.c +++ b/test/unit/ckh.c @@ -162,8 +162,7 @@ TEST_BEGIN(test_insert_iter_remove) memset(seen, 0, sizeof(seen)); - for (tabind = 0; ckh_iter(&ckh, &tabind, &q, &r) == - false;) { + for (tabind = 0; !ckh_iter(&ckh, &tabind, &q, &r);) { size_t k; assert_ptr_eq(q, r, "Key and val not equal"); diff --git a/test/unit/rb.c b/test/unit/rb.c index e43907f1..b38eb0e3 100644 --- a/test/unit/rb.c +++ b/test/unit/rb.c @@ -5,7 +5,7 @@ for (rbp_bh_t = (a_rbt)->rbt_root, (r_height) = 0; \ rbp_bh_t != &(a_rbt)->rbt_nil; \ rbp_bh_t = rbtn_left_get(a_type, a_field, rbp_bh_t)) { \ - if (rbtn_red_get(a_type, a_field, rbp_bh_t) == false) { \ + if (!rbtn_red_get(a_type, a_field, rbp_bh_t)) { \ (r_height)++; \ } \ } \ @@ -75,7 +75,7 @@ tree_recurse(node_t *node, unsigned black_height, unsigned black_depth, node_t *left_node = rbtn_left_get(node_t, link, node); node_t *right_node = rbtn_right_get(node_t, link, node); - if (rbtn_red_get(node_t, link, node) == false) + if (!rbtn_red_get(node_t, link, node)) black_depth++; /* Red nodes must be interleaved with black nodes. */ From fc12c0b8bc1160530d1e3e641b76d2a4f793136f Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 3 Oct 2014 23:25:30 -0700 Subject: [PATCH 0544/3378] Implement/test/fix prof-related mallctl's. Implement/test/fix the opt.prof_thread_active_init, prof.thread_active_init, and thread.prof.active mallctl's. Test/fix the thread.prof.name mallctl. Refactor opt_prof_active to be read-only and move mutable state into the prof_active variable. Stop leaning on ctl-related locking for protection. --- Makefile.in | 2 + doc/jemalloc.xml.in | 52 ++++++- include/jemalloc/internal/private_symbols.txt | 5 + include/jemalloc/internal/prof.h | 34 +++-- src/ctl.c | 73 ++++++--- src/jemalloc.c | 2 + src/prof.c | 142 +++++++++++++++--- src/stats.c | 33 ++-- test/unit/prof_active.c | 136 +++++++++++++++++ test/unit/prof_reset.c | 4 + test/unit/prof_thread_name.c | 128 ++++++++++++++++ 11 files changed, 545 insertions(+), 66 deletions(-) create mode 100644 test/unit/prof_active.c create mode 100644 test/unit/prof_thread_name.c diff --git a/Makefile.in b/Makefile.in index 5267bea4..52f5a9d2 100644 --- a/Makefile.in +++ b/Makefile.in @@ -123,9 +123,11 @@ TESTS_UNIT := $(srcroot)test/unit/atomic.c \ $(srcroot)test/unit/mq.c \ $(srcroot)test/unit/mtx.c \ $(srcroot)test/unit/prof_accum.c \ + $(srcroot)test/unit/prof_active.c \ $(srcroot)test/unit/prof_gdump.c \ $(srcroot)test/unit/prof_idump.c \ $(srcroot)test/unit/prof_reset.c \ + $(srcroot)test/unit/prof_thread_name.c \ $(srcroot)test/unit/ql.c \ $(srcroot)test/unit/qr.c \ $(srcroot)test/unit/quarantine.c \ diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index b586e690..6abb50bc 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -1061,6 +1061,21 @@ malloc_conf = "xmalloc:true";]]> This option is enabled by default. + + + opt.prof_thread_active_init + (bool) + r- + [] + + Initial setting for thread.prof.active + in newly created threads. The initial setting for newly created threads + can also be changed during execution via the prof.thread_active_init + mallctl. This option is enabled by default. + + opt.lg_prof_sample @@ -1264,7 +1279,8 @@ malloc_conf = "xmalloc:true";]]> thread.prof.name (const char *) - rw + r- or + -w [] Get/set the descriptive name associated with the calling @@ -1272,7 +1288,15 @@ malloc_conf = "xmalloc:true";]]> created, so the input string need not be maintained after this interface completes execution. The output string of this interface should be copied for non-ephemeral uses, because multiple implementation details - can cause asynchronous string deallocation. + can cause asynchronous string deallocation. Furthermore, each + invocation of this interface can only read or write; simultaneous + read/write is not supported due to string lifetime limitations. The + name string must nil-terminated and comprised only of characters in the + sets recognized + by isgraph + 3 and + isblank + 3. @@ -1283,7 +1307,7 @@ malloc_conf = "xmalloc:true";]]> [] Control whether sampling is currently active for the - calling thread. This is a deactivation mechanism in addition to prof.active; both must be active for the calling thread to sample. This flag is enabled by default. @@ -1508,6 +1532,20 @@ malloc_conf = "xmalloc:true";]]> and returning the new arena index. + + + prof.thread_active_init + (bool) + rw + [] + + Control the initial setting for thread.prof.active + in newly created threads. See the opt.prof_thread_active_init + option for additional information. + + prof.active @@ -1518,8 +1556,9 @@ malloc_conf = "xmalloc:true";]]> Control whether sampling is currently active. See the opt.prof_active - option for additional information. - + option for additional information, as well as the interrelated thread.prof.active + mallctl. @@ -1548,7 +1587,8 @@ malloc_conf = "xmalloc:true";]]> Reset all memory profile statistics, and optionally update the sample rate (see opt.lg_prof_sample - and prof.lg_sample). + and prof.lg_sample). diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index 33f8ce01..63657833 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -285,6 +285,9 @@ opt_zero p2rz pages_purge pow2_ceil +prof_active_get +prof_active_get_unlocked +prof_active_set prof_alloc_prep prof_alloc_rollback prof_backtrace @@ -316,6 +319,8 @@ prof_tdata_cleanup prof_tdata_get prof_tdata_init prof_thread_active_get +prof_thread_active_init_get +prof_thread_active_init_set prof_thread_active_set prof_thread_name_get prof_thread_name_set diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index ea52a63b..3d3f8f4e 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -215,13 +215,8 @@ typedef rb_tree(prof_tdata_t) prof_tdata_tree_t; #ifdef JEMALLOC_H_EXTERNS extern bool opt_prof; -/* - * Even if opt_prof is true, sampling can be temporarily disabled by setting - * opt_prof_active to false. No locking is used when updating opt_prof_active, - * so there are no guarantees regarding how long it will take for all threads - * to notice state changes. - */ extern bool opt_prof_active; +extern bool opt_prof_thread_active_init; extern size_t opt_lg_prof_sample; /* Mean bytes between samples. */ extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */ extern bool opt_prof_gdump; /* High-water memory dumping. */ @@ -235,6 +230,9 @@ extern char opt_prof_prefix[ #endif 1]; +/* Accessed via prof_active_[gs]et{_unlocked,}(). */ +extern bool prof_active; + /* * Profile dump interval, measured in bytes allocated. Each arena triggers a * profile dump when it reaches this threshold. The effect is that the @@ -274,9 +272,13 @@ prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata); void prof_reset(tsd_t *tsd, size_t lg_sample); void prof_tdata_cleanup(tsd_t *tsd); const char *prof_thread_name_get(void); -bool prof_thread_name_set(tsd_t *tsd, const char *thread_name); +bool prof_active_get(void); +bool prof_active_set(bool active); +int prof_thread_name_set(tsd_t *tsd, const char *thread_name); bool prof_thread_active_get(void); bool prof_thread_active_set(bool active); +bool prof_thread_active_init_get(void); +bool prof_thread_active_init_set(bool active_init); void prof_boot0(void); void prof_boot1(void); bool prof_boot2(void); @@ -290,6 +292,7 @@ void prof_sample_threshold_update(prof_tdata_t *tdata); #ifdef JEMALLOC_H_INLINES #ifndef JEMALLOC_ENABLE_INLINE +bool prof_active_get_unlocked(void); prof_tdata_t *prof_tdata_get(tsd_t *tsd, bool create); bool prof_sample_accum_update(tsd_t *tsd, size_t usize, bool commit, prof_tdata_t **tdata_out); @@ -305,6 +308,19 @@ void prof_free(tsd_t *tsd, const void *ptr, size_t usize); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_)) +JEMALLOC_INLINE bool +prof_active_get_unlocked(void) +{ + + /* + * Even if opt_prof is true, sampling can be temporarily disabled by + * setting prof_active to false. No locking is used when reading + * prof_active in the fast path, so there are no guarantees regarding + * how long it will take for all threads to notice state changes. + */ + return (prof_active); +} + JEMALLOC_INLINE prof_tdata_t * prof_tdata_get(tsd_t *tsd, bool create) { @@ -401,8 +417,8 @@ prof_alloc_prep(tsd_t *tsd, size_t usize, bool update) assert(usize == s2u(usize)); - if (!opt_prof_active || likely(prof_sample_accum_update(tsd, usize, - update, &tdata))) + if (!prof_active_get_unlocked() || likely(prof_sample_accum_update(tsd, + usize, update, &tdata))) ret = (prof_tctx_t *)(uintptr_t)1U; else { bt_init(&bt, tdata->vec); diff --git a/src/ctl.c b/src/ctl.c index b85710c0..8f9faa56 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -7,7 +7,6 @@ /* * ctl_mtx protects the following: * - ctl_stats.* - * - opt_prof_active */ static malloc_mutex_t ctl_mtx; static bool ctl_initialized; @@ -104,6 +103,7 @@ CTL_PROTO(opt_lg_tcache_max) CTL_PROTO(opt_prof) CTL_PROTO(opt_prof_prefix) CTL_PROTO(opt_prof_active) +CTL_PROTO(opt_prof_thread_active_init) CTL_PROTO(opt_lg_prof_sample) CTL_PROTO(opt_lg_prof_interval) CTL_PROTO(opt_prof_gdump) @@ -131,6 +131,7 @@ CTL_PROTO(arenas_nbins) CTL_PROTO(arenas_nhbins) CTL_PROTO(arenas_nlruns) CTL_PROTO(arenas_extend) +CTL_PROTO(prof_thread_active_init) CTL_PROTO(prof_active) CTL_PROTO(prof_dump) CTL_PROTO(prof_reset) @@ -253,6 +254,7 @@ static const ctl_named_node_t opt_node[] = { {NAME("prof"), CTL(opt_prof)}, {NAME("prof_prefix"), CTL(opt_prof_prefix)}, {NAME("prof_active"), CTL(opt_prof_active)}, + {NAME("prof_thread_active_init"), CTL(opt_prof_thread_active_init)}, {NAME("lg_prof_sample"), CTL(opt_lg_prof_sample)}, {NAME("lg_prof_interval"), CTL(opt_lg_prof_interval)}, {NAME("prof_gdump"), CTL(opt_prof_gdump)}, @@ -318,6 +320,7 @@ static const ctl_named_node_t arenas_node[] = { }; static const ctl_named_node_t prof_node[] = { + {NAME("thread_active_init"), CTL(prof_thread_active_init)}, {NAME("active"), CTL(prof_active)}, {NAME("dump"), CTL(prof_dump)}, {NAME("reset"), CTL(prof_reset)}, @@ -979,6 +982,14 @@ ctl_postfork_child(void) } \ } while (0) +#define READ_XOR_WRITE() do { \ + if ((oldp != NULL && oldlenp != NULL) && (newp != NULL || \ + newlen != 0)) { \ + ret = EPERM; \ + goto label_return; \ + } \ +} while (0) + #define READ(v, t) do { \ if (oldp != NULL && oldlenp != NULL) { \ if (*oldlenp != sizeof(t)) { \ @@ -1208,7 +1219,9 @@ CTL_RO_NL_CGEN(config_tcache, opt_tcache, opt_tcache, bool) CTL_RO_NL_CGEN(config_tcache, opt_lg_tcache_max, opt_lg_tcache_max, ssize_t) CTL_RO_NL_CGEN(config_prof, opt_prof, opt_prof, bool) CTL_RO_NL_CGEN(config_prof, opt_prof_prefix, opt_prof_prefix, const char *) -CTL_RO_CGEN(config_prof, opt_prof_active, opt_prof_active, bool) /* Mutable. */ +CTL_RO_NL_CGEN(config_prof, opt_prof_active, opt_prof_active, bool) +CTL_RO_NL_CGEN(config_prof, opt_prof_thread_active_init, + opt_prof_thread_active_init, bool) CTL_RO_NL_CGEN(config_prof, opt_lg_prof_sample, opt_lg_prof_sample, size_t) CTL_RO_NL_CGEN(config_prof, opt_prof_accum, opt_prof_accum, bool) CTL_RO_NL_CGEN(config_prof, opt_lg_prof_interval, opt_lg_prof_interval, ssize_t) @@ -1332,12 +1345,12 @@ thread_prof_name_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { int ret; - const char *oldname; if (!config_prof) return (ENOENT); - oldname = prof_thread_name_get(); + READ_XOR_WRITE(); + if (newp != NULL) { tsd_t *tsd; @@ -1352,12 +1365,13 @@ thread_prof_name_ctl(const size_t *mib, size_t miblen, void *oldp, goto label_return; } - if (prof_thread_name_set(tsd, *(const char **)newp)) { - ret = EAGAIN; + if ((ret = prof_thread_name_set(tsd, *(const char **)newp)) != + 0) goto label_return; - } + } else { + const char *oldname = prof_thread_name_get(); + READ(oldname, const char *); } - READ(oldname, const char *); ret = 0; label_return: @@ -1660,6 +1674,31 @@ label_return: /******************************************************************************/ +static int +prof_thread_active_init_ctl(const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) +{ + int ret; + bool oldval; + + if (!config_prof) + return (ENOENT); + + if (newp != NULL) { + if (newlen != sizeof(bool)) { + ret = EINVAL; + goto label_return; + } + oldval = prof_thread_active_init_set(*(bool *)newp); + } else + oldval = prof_thread_active_init_get(); + READ(oldval, bool); + + ret = 0; +label_return: + return (ret); +} + static int prof_active_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) @@ -1670,22 +1709,18 @@ prof_active_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, if (!config_prof) return (ENOENT); - malloc_mutex_lock(&ctl_mtx); /* Protect opt_prof_active. */ - oldval = opt_prof_active; if (newp != NULL) { - /* - * The memory barriers will tend to make opt_prof_active - * propagate faster on systems with weak memory ordering. - */ - mb_write(); - WRITE(opt_prof_active, bool); - mb_write(); - } + if (newlen != sizeof(bool)) { + ret = EINVAL; + goto label_return; + } + oldval = prof_active_set(*(bool *)newp); + } else + oldval = prof_active_get(); READ(oldval, bool); ret = 0; label_return: - malloc_mutex_unlock(&ctl_mtx); return (ret); } diff --git a/src/jemalloc.c b/src/jemalloc.c index 0d041313..2e96705d 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -655,6 +655,8 @@ malloc_conf_init(void) "prof_prefix", "jeprof") CONF_HANDLE_BOOL(opt_prof_active, "prof_active", true) + CONF_HANDLE_BOOL(opt_prof_thread_active_init, + "prof_thread_active_init", true) CONF_HANDLE_SIZE_T(opt_lg_prof_sample, "lg_prof_sample", 0, (sizeof(uint64_t) << 3) - 1, true) diff --git a/src/prof.c b/src/prof.c index 29b4baaa..5b979989 100644 --- a/src/prof.c +++ b/src/prof.c @@ -16,6 +16,7 @@ bool opt_prof = false; bool opt_prof_active = true; +bool opt_prof_thread_active_init = true; size_t opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT; ssize_t opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT; bool opt_prof_gdump = false; @@ -29,6 +30,20 @@ char opt_prof_prefix[ #endif 1]; +/* + * Initialized as opt_prof_active, and accessed via + * prof_active_[gs]et{_unlocked,}(). + */ +bool prof_active; +static malloc_mutex_t prof_active_mtx; + +/* + * Initialized as opt_prof_thread_active_init, and accessed via + * prof_thread_active_init_[gs]et(). + */ +static bool prof_thread_active_init; +static malloc_mutex_t prof_thread_active_init_mtx; + uint64_t prof_interval = 0; size_t lg_prof_sample; @@ -103,6 +118,7 @@ static bool prof_tctx_should_destroy(prof_tctx_t *tctx); static void prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx); static bool prof_tdata_should_destroy(prof_tdata_t *tdata); static void prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata); +static char *prof_thread_name_alloc(tsd_t *tsd, const char *thread_name); /******************************************************************************/ /* Red-black trees. */ @@ -1593,7 +1609,8 @@ prof_thr_uid_alloc(void) } static prof_tdata_t * -prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim) +prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim, + char *thread_name, bool active) { prof_tdata_t *tdata; @@ -1607,7 +1624,7 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim) tdata->lock = prof_tdata_mutex_choose(thr_uid); tdata->thr_uid = thr_uid; tdata->thr_discrim = thr_discrim; - tdata->thread_name = NULL; + tdata->thread_name = thread_name; tdata->attached = true; tdata->expired = false; @@ -1625,7 +1642,7 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim) tdata->enq_gdump = false; tdata->dumping = false; - tdata->active = true; + tdata->active = active; malloc_mutex_lock(&tdatas_mtx); tdata_tree_insert(&tdatas, tdata); @@ -1638,7 +1655,8 @@ prof_tdata_t * prof_tdata_init(tsd_t *tsd) { - return (prof_tdata_init_impl(tsd, prof_thr_uid_alloc(), 0)); + return (prof_tdata_init_impl(tsd, prof_thr_uid_alloc(), 0, NULL, + prof_thread_active_init_get())); } /* tdata->lock must be held. */ @@ -1698,9 +1716,13 @@ prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata) { uint64_t thr_uid = tdata->thr_uid; uint64_t thr_discrim = tdata->thr_discrim + 1; + char *thread_name = (tdata->thread_name != NULL) ? + prof_thread_name_alloc(tsd, tdata->thread_name) : NULL; + bool active = tdata->active; prof_tdata_detach(tsd, tdata); - return (prof_tdata_init_impl(tsd, thr_uid, thr_discrim)); + return (prof_tdata_init_impl(tsd, thr_uid, thr_discrim, thread_name, + active)); } static bool @@ -1768,6 +1790,29 @@ prof_tdata_cleanup(tsd_t *tsd) prof_tdata_detach(tsd, tdata); } +bool +prof_active_get(void) +{ + bool prof_active_current; + + malloc_mutex_lock(&prof_active_mtx); + prof_active_current = prof_active; + malloc_mutex_unlock(&prof_active_mtx); + return (prof_active_current); +} + +bool +prof_active_set(bool active) +{ + bool prof_active_old; + + malloc_mutex_lock(&prof_active_mtx); + prof_active_old = prof_active; + prof_active = active; + malloc_mutex_unlock(&prof_active_mtx); + return (prof_active_old); +} + const char * prof_thread_name_get(void) { @@ -1775,34 +1820,64 @@ prof_thread_name_get(void) prof_tdata_t *tdata; if ((tsd = tsd_tryget()) == NULL) - return (NULL); + return (""); tdata = prof_tdata_get(tsd, true); if (tdata == NULL) - return (NULL); - return (tdata->thread_name); + return (""); + return (tdata->thread_name != NULL ? tdata->thread_name : ""); } -bool +static char * +prof_thread_name_alloc(tsd_t *tsd, const char *thread_name) +{ + char *ret; + size_t size; + + if (thread_name == NULL) + return (NULL); + + size = strlen(thread_name) + 1; + if (size == 1) + return (""); + + ret = imalloc(tsd, size); + if (ret == NULL) + return (NULL); + memcpy(ret, thread_name, size); + return (ret); +} + +int prof_thread_name_set(tsd_t *tsd, const char *thread_name) { prof_tdata_t *tdata; - size_t size; + unsigned i; char *s; tdata = prof_tdata_get(tsd, true); if (tdata == NULL) - return (true); + return (EAGAIN); - size = strlen(thread_name) + 1; - s = imalloc(tsd, size); + /* Validate input. */ + if (thread_name == NULL) + return (EFAULT); + for (i = 0; thread_name[i] != '\0'; i++) { + char c = thread_name[i]; + if (!isgraph(c) && !isblank(c)) + return (EFAULT); + } + + s = prof_thread_name_alloc(tsd, thread_name); if (s == NULL) - return (true); + return (EAGAIN); - memcpy(s, thread_name, size); - if (tdata->thread_name != NULL) + if (tdata->thread_name != NULL) { idalloc(tsd, tdata->thread_name); - tdata->thread_name = s; - return (false); + tdata->thread_name = NULL; + } + if (strlen(s) > 0) + tdata->thread_name = s; + return (0); } bool @@ -1834,6 +1909,29 @@ prof_thread_active_set(bool active) return (false); } +bool +prof_thread_active_init_get(void) +{ + bool active_init; + + malloc_mutex_lock(&prof_thread_active_init_mtx); + active_init = prof_thread_active_init; + malloc_mutex_unlock(&prof_thread_active_init_mtx); + return (active_init); +} + +bool +prof_thread_active_init_set(bool active_init) +{ + bool active_init_old; + + malloc_mutex_lock(&prof_thread_active_init_mtx); + active_init_old = prof_thread_active_init; + prof_thread_active_init = active_init; + malloc_mutex_unlock(&prof_thread_active_init_mtx); + return (active_init_old); +} + void prof_boot0(void) { @@ -1882,6 +1980,14 @@ prof_boot2(void) lg_prof_sample = opt_lg_prof_sample; + prof_active = opt_prof_active; + if (malloc_mutex_init(&prof_active_mtx)) + return (true); + + prof_thread_active_init = opt_prof_thread_active_init; + if (malloc_mutex_init(&prof_thread_active_init_mtx)) + return (true); + if ((tsd = tsd_tryget()) == NULL) return (true); if (ckh_new(tsd, &bt2gctx, PROF_CKH_MINITEMS, prof_bt_hash, diff --git a/src/stats.c b/src/stats.c index aa095500..5c3d7017 100644 --- a/src/stats.c +++ b/src/stats.c @@ -336,7 +336,6 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, malloc_cprintf(write_cb, cbopaque, "___ Begin jemalloc statistics ___\n"); if (general) { - int err; const char *cpv; bool bv; unsigned uv; @@ -355,26 +354,31 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, bv ? "enabled" : "disabled"); #define OPT_WRITE_BOOL(n) \ - if ((err = je_mallctl("opt."#n, &bv, &bsz, NULL, 0)) \ - == 0) { \ + if (je_mallctl("opt."#n, &bv, &bsz, NULL, 0) == 0) { \ malloc_cprintf(write_cb, cbopaque, \ " opt."#n": %s\n", bv ? "true" : "false"); \ } +#define OPT_WRITE_BOOL_MUTABLE(n, m) { \ + bool bv2; \ + if (je_mallctl("opt."#n, &bv, &bsz, NULL, 0) == 0 && \ + je_mallctl(#m, &bv2, &bsz, NULL, 0) == 0) { \ + malloc_cprintf(write_cb, cbopaque, \ + " opt."#n": %s ("#m": %s)\n", bv ? "true" \ + : "false", bv2 ? "true" : "false"); \ + } \ +} #define OPT_WRITE_SIZE_T(n) \ - if ((err = je_mallctl("opt."#n, &sv, &ssz, NULL, 0)) \ - == 0) { \ + if (je_mallctl("opt."#n, &sv, &ssz, NULL, 0) == 0) { \ malloc_cprintf(write_cb, cbopaque, \ " opt."#n": %zu\n", sv); \ } #define OPT_WRITE_SSIZE_T(n) \ - if ((err = je_mallctl("opt."#n, &ssv, &sssz, NULL, 0)) \ - == 0) { \ + if (je_mallctl("opt."#n, &ssv, &sssz, NULL, 0) == 0) { \ malloc_cprintf(write_cb, cbopaque, \ " opt."#n": %zd\n", ssv); \ } #define OPT_WRITE_CHAR_P(n) \ - if ((err = je_mallctl("opt."#n, &cpv, &cpsz, NULL, 0)) \ - == 0) { \ + if (je_mallctl("opt."#n, &cpv, &cpsz, NULL, 0) == 0) { \ malloc_cprintf(write_cb, cbopaque, \ " opt."#n": \"%s\"\n", cpv); \ } @@ -398,7 +402,9 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, OPT_WRITE_SSIZE_T(lg_tcache_max) OPT_WRITE_BOOL(prof) OPT_WRITE_CHAR_P(prof_prefix) - OPT_WRITE_BOOL(prof_active) + OPT_WRITE_BOOL_MUTABLE(prof_active, prof.active) + OPT_WRITE_BOOL_MUTABLE(prof_thread_active_init, + prof.thread_active_init) OPT_WRITE_SSIZE_T(lg_prof_sample) OPT_WRITE_BOOL(prof_accum) OPT_WRITE_SSIZE_T(lg_prof_interval) @@ -407,6 +413,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, OPT_WRITE_BOOL(prof_leak) #undef OPT_WRITE_BOOL +#undef OPT_WRITE_BOOL_MUTABLE #undef OPT_WRITE_SIZE_T #undef OPT_WRITE_SSIZE_T #undef OPT_WRITE_CHAR_P @@ -434,13 +441,11 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, malloc_cprintf(write_cb, cbopaque, "Min active:dirty page ratio per arena: N/A\n"); } - if ((err = je_mallctl("arenas.tcache_max", &sv, &ssz, NULL, 0)) - == 0) { + if (je_mallctl("arenas.tcache_max", &sv, &ssz, NULL, 0) == 0) { malloc_cprintf(write_cb, cbopaque, "Maximum thread-cached size class: %zu\n", sv); } - if ((err = je_mallctl("opt.prof", &bv, &bsz, NULL, 0)) == 0 && - bv) { + if (je_mallctl("opt.prof", &bv, &bsz, NULL, 0) == 0 && bv) { CTL_GET("prof.lg_sample", &sv, size_t); malloc_cprintf(write_cb, cbopaque, "Average profile sample interval: %"PRIu64 diff --git a/test/unit/prof_active.c b/test/unit/prof_active.c new file mode 100644 index 00000000..d4bab8d0 --- /dev/null +++ b/test/unit/prof_active.c @@ -0,0 +1,136 @@ +#include "test/jemalloc_test.h" + +#ifdef JEMALLOC_PROF +const char *malloc_conf = + "prof:true,prof_thread_active_init:false,lg_prof_sample:0,prof_final:false"; +#endif + +static void +mallctl_bool_get(const char *name, bool expected, const char *func, int line) +{ + bool old; + size_t sz; + + sz = sizeof(old); + assert_d_eq(mallctl(name, &old, &sz, NULL, 0), 0, + "%s():%d: Unexpected mallctl failure reading %s", func, line, name); + assert_b_eq(old, expected, "%s():%d: Unexpected %s value", func, line, + name); +} + +static void +mallctl_bool_set(const char *name, bool old_expected, bool val_new, + const char *func, int line) +{ + bool old; + size_t sz; + + sz = sizeof(old); + assert_d_eq(mallctl(name, &old, &sz, &val_new, sizeof(val_new)), 0, + "%s():%d: Unexpected mallctl failure reading/writing %s", func, + line, name); + assert_b_eq(old, old_expected, "%s():%d: Unexpected %s value", func, + line, name); +} + +static void +mallctl_prof_active_get_impl(bool prof_active_old_expected, const char *func, + int line) +{ + + mallctl_bool_get("prof.active", prof_active_old_expected, func, line); +} +#define mallctl_prof_active_get(a) \ + mallctl_prof_active_get_impl(a, __func__, __LINE__) + +static void +mallctl_prof_active_set_impl(bool prof_active_old_expected, + bool prof_active_new, const char *func, int line) +{ + + mallctl_bool_set("prof.active", prof_active_old_expected, + prof_active_new, func, line); +} +#define mallctl_prof_active_set(a, b) \ + mallctl_prof_active_set_impl(a, b, __func__, __LINE__) + +static void +mallctl_thread_prof_active_get_impl(bool thread_prof_active_old_expected, + const char *func, int line) +{ + + mallctl_bool_get("thread.prof.active", thread_prof_active_old_expected, + func, line); +} +#define mallctl_thread_prof_active_get(a) \ + mallctl_thread_prof_active_get_impl(a, __func__, __LINE__) + +static void +mallctl_thread_prof_active_set_impl(bool thread_prof_active_old_expected, + bool thread_prof_active_new, const char *func, int line) +{ + + mallctl_bool_set("thread.prof.active", thread_prof_active_old_expected, + thread_prof_active_new, func, line); +} +#define mallctl_thread_prof_active_set(a, b) \ + mallctl_thread_prof_active_set_impl(a, b, __func__, __LINE__) + +static void +prof_sampling_probe_impl(bool expect_sample, const char *func, int line) +{ + void *p; + size_t expected_backtraces = expect_sample ? 1 : 0; + + assert_zu_eq(prof_bt_count(), 0, "%s():%d: Expected 0 backtraces", func, + line); + p = mallocx(1, 0); + assert_ptr_not_null(p, "Unexpected mallocx() failure"); + assert_zu_eq(prof_bt_count(), expected_backtraces, + "%s():%d: Unexpected backtrace count", func, line); + dallocx(p, 0); +} +#define prof_sampling_probe(a) \ + prof_sampling_probe_impl(a, __func__, __LINE__) + +TEST_BEGIN(test_prof_active) +{ + + test_skip_if(!config_prof); + + mallctl_prof_active_get(true); + mallctl_thread_prof_active_get(false); + + mallctl_prof_active_set(true, true); + mallctl_thread_prof_active_set(false, false); + /* prof.active, !thread.prof.active. */ + prof_sampling_probe(false); + + mallctl_prof_active_set(true, false); + mallctl_thread_prof_active_set(false, false); + /* !prof.active, !thread.prof.active. */ + prof_sampling_probe(false); + + mallctl_prof_active_set(false, false); + mallctl_thread_prof_active_set(false, true); + /* !prof.active, thread.prof.active. */ + prof_sampling_probe(false); + + mallctl_prof_active_set(false, true); + mallctl_thread_prof_active_set(true, true); + /* prof.active, thread.prof.active. */ + prof_sampling_probe(true); + + /* Restore settings. */ + mallctl_prof_active_set(true, true); + mallctl_thread_prof_active_set(true, false); +} +TEST_END + +int +main(void) +{ + + return (test( + test_prof_active)); +} diff --git a/test/unit/prof_reset.c b/test/unit/prof_reset.c index 62a4d5af..3af19642 100644 --- a/test/unit/prof_reset.c +++ b/test/unit/prof_reset.c @@ -22,6 +22,8 @@ TEST_BEGIN(test_prof_reset_basic) size_t sz; unsigned i; + test_skip_if(!config_prof); + sz = sizeof(size_t); assert_d_eq(mallctl("opt.lg_prof_sample", &lg_prof_sample_orig, &sz, NULL, 0), 0, @@ -90,6 +92,8 @@ TEST_BEGIN(test_prof_reset_cleanup) void *p; prof_dump_header_t *prof_dump_header_orig; + test_skip_if(!config_prof); + active = true; assert_d_eq(mallctl("prof.active", NULL, NULL, &active, sizeof(active)), 0, "Unexpected mallctl failure while activating profiling"); diff --git a/test/unit/prof_thread_name.c b/test/unit/prof_thread_name.c new file mode 100644 index 00000000..7fb8038a --- /dev/null +++ b/test/unit/prof_thread_name.c @@ -0,0 +1,128 @@ +#include "test/jemalloc_test.h" + +#ifdef JEMALLOC_PROF +const char *malloc_conf = + "prof:true,prof_active:false,prof_final:false"; +#endif + +static void +mallctl_thread_name_get_impl(const char *thread_name_expected, const char *func, + int line) +{ + const char *thread_name_old; + size_t sz; + + sz = sizeof(thread_name_old); + assert_d_eq(mallctl("thread.prof.name", &thread_name_old, &sz, NULL, 0), + 0, "%s():%d: Unexpected mallctl failure reading thread.prof.name", + func, line); + assert_str_eq(thread_name_old, thread_name_expected, + "%s():%d: Unexpected thread.prof.name value", func, line); +} +#define mallctl_thread_name_get(a) \ + mallctl_thread_name_get_impl(a, __func__, __LINE__) + +static void +mallctl_thread_name_set_impl(const char *thread_name, const char *func, + int line) +{ + + assert_d_eq(mallctl("thread.prof.name", NULL, NULL, &thread_name, + sizeof(thread_name)), 0, + "%s():%d: Unexpected mallctl failure reading thread.prof.name", + func, line); + mallctl_thread_name_get_impl(thread_name, func, line); +} +#define mallctl_thread_name_set(a) \ + mallctl_thread_name_set_impl(a, __func__, __LINE__) + +TEST_BEGIN(test_prof_thread_name_validation) +{ + const char *thread_name; + + mallctl_thread_name_get(""); + mallctl_thread_name_set("hi there"); + + /* NULL input shouldn't be allowed. */ + thread_name = NULL; + assert_d_eq(mallctl("thread.prof.name", NULL, NULL, &thread_name, + sizeof(thread_name)), EFAULT, + "Unexpected mallctl result writing \"%s\" to thread.prof.name", + thread_name); + + /* '\n' shouldn't be allowed. */ + thread_name = "hi\nthere"; + assert_d_eq(mallctl("thread.prof.name", NULL, NULL, &thread_name, + sizeof(thread_name)), EFAULT, + "Unexpected mallctl result writing \"%s\" to thread.prof.name", + thread_name); + + /* Simultaneous read/write shouldn't be allowed. */ + { + const char *thread_name_old; + size_t sz; + + sz = sizeof(thread_name_old); + assert_d_eq(mallctl("thread.prof.name", &thread_name_old, &sz, + &thread_name, sizeof(thread_name)), EPERM, + "Unexpected mallctl result writing \"%s\" to " + "thread.prof.name", thread_name); + } + + mallctl_thread_name_set(""); +} +TEST_END + +#define NTHREADS 4 +#define NRESET 25 +static void * +thd_start(void *varg) +{ + unsigned thd_ind = *(unsigned *)varg; + char thread_name[16] = ""; + unsigned i; + + malloc_snprintf(thread_name, sizeof(thread_name), "thread %u", thd_ind); + + mallctl_thread_name_get(""); + mallctl_thread_name_set(thread_name); + + for (i = 0; i < NRESET; i++) { + assert_d_eq(mallctl("prof.reset", NULL, NULL, NULL, 0), 0, + "Unexpected error while resetting heap profile data"); + mallctl_thread_name_get(thread_name); + } + + mallctl_thread_name_set(thread_name); + mallctl_thread_name_set(""); + + return (NULL); +} + +TEST_BEGIN(test_prof_thread_name_threaded) +{ + thd_t thds[NTHREADS]; + unsigned thd_args[NTHREADS]; + unsigned i; + + test_skip_if(!config_prof); + + for (i = 0; i < NTHREADS; i++) { + thd_args[i] = i; + thd_create(&thds[i], thd_start, (void *)&thd_args[i]); + } + for (i = 0; i < NTHREADS; i++) + thd_join(thds[i], NULL); +} +TEST_END +#undef NTHREADS +#undef NRESET + +int +main(void) +{ + + return (test( + test_prof_thread_name_validation, + test_prof_thread_name_threaded)); +} From b72d4abc5fb1185e4017c014d521693a99f9175b Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 3 Oct 2014 23:41:53 -0700 Subject: [PATCH 0545/3378] Skip test_prof_thread_name_validation if !config_prof. --- test/unit/prof_thread_name.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/unit/prof_thread_name.c b/test/unit/prof_thread_name.c index 7fb8038a..6066dba7 100644 --- a/test/unit/prof_thread_name.c +++ b/test/unit/prof_thread_name.c @@ -40,6 +40,8 @@ TEST_BEGIN(test_prof_thread_name_validation) { const char *thread_name; + test_skip_if(!config_prof); + mallctl_thread_name_get(""); mallctl_thread_name_set("hi there"); From a4a972d9a163a57183f851535104f4e8ac78f511 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 4 Oct 2014 00:35:07 -0700 Subject: [PATCH 0546/3378] Fix install_lib target (incorrect jemalloc.pc path). --- Makefile.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.in b/Makefile.in index 52f5a9d2..50f6596a 100644 --- a/Makefile.in +++ b/Makefile.in @@ -101,7 +101,7 @@ DSOS := $(objroot)lib/$(LIBJEMALLOC).$(SOREV) ifneq ($(SOREV),$(SO)) DSOS += $(objroot)lib/$(LIBJEMALLOC).$(SO) endif -PC := $(srcroot)jemalloc.pc +PC := $(objroot)jemalloc.pc MAN3 := $(objroot)doc/jemalloc$(install_suffix).3 DOCS_XML := $(objroot)doc/jemalloc$(install_suffix).xml DOCS_HTML := $(DOCS_XML:$(objroot)%.xml=$(srcroot)%.html) From 029d44cf8b22aa7b749747bfd585887fb59e0030 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 4 Oct 2014 11:12:53 -0700 Subject: [PATCH 0547/3378] Fix tsd cleanup regressions. Fix tsd cleanup regressions that were introduced in 5460aa6f6676c7f253bfcb75c028dfd38cae8aaf (Convert all tsd variables to reside in a single tsd structure.). These regressions were twofold: 1) tsd_tryget() should never (and need never) return NULL. Rename it to tsd_fetch() and simplify all callers. 2) tsd_*_set() must only be called when tsd is in the nominal state, because cleanup happens during the nominal-->purgatory transition, and re-initialization must not happen while in the purgatory state. Add tsd_nominal() and use it as needed. Note that tsd_*{p,}_get() can still be used as long as no re-initialization that would require cleanup occurs. This means that e.g. the thread_allocated counter can be updated unconditionally. --- include/jemalloc/internal/private_symbols.txt | 3 +- include/jemalloc/internal/prof.h | 6 +- include/jemalloc/internal/quarantine.h | 4 +- include/jemalloc/internal/tcache.h | 21 ++--- include/jemalloc/internal/tsd.h | 65 +++++++++------- src/ctl.c | 26 ++----- src/jemalloc.c | 78 +++++++++---------- src/prof.c | 29 +++---- src/tcache.c | 3 +- src/tsd.c | 5 -- test/unit/ckh.c | 9 +-- test/unit/tsd.c | 35 ++++++++- 12 files changed, 137 insertions(+), 147 deletions(-) diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index 63657833..4ea9a953 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -390,12 +390,14 @@ tsd_arena_set tsd_boot tsd_cleanup tsd_cleanup_wrapper +tsd_fetch tsd_get tsd_get_wrapper tsd_initialized tsd_init_check_recursion tsd_init_finish tsd_init_head +tsd_nominal tsd_quarantine_get tsd_quarantine_set tsd_set @@ -411,7 +413,6 @@ tsd_thread_allocated_get tsd_thread_allocated_set tsd_thread_deallocated_get tsd_thread_deallocated_set -tsd_tryget u2rz valgrind_freelike_block valgrind_make_mem_defined diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index 3d3f8f4e..0ec7c18a 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -331,8 +331,10 @@ prof_tdata_get(tsd_t *tsd, bool create) tdata = tsd_prof_tdata_get(tsd); if (create) { if (unlikely(tdata == NULL)) { - tdata = prof_tdata_init(tsd); - tsd_prof_tdata_set(tsd, tdata); + if (tsd_nominal(tsd)) { + tdata = prof_tdata_init(tsd); + tsd_prof_tdata_set(tsd, tdata); + } } else if (unlikely(tdata->expired)) { tdata = prof_tdata_reinit(tsd, tdata); tsd_prof_tdata_set(tsd, tdata); diff --git a/include/jemalloc/internal/quarantine.h b/include/jemalloc/internal/quarantine.h index 3a755985..4e9c710a 100644 --- a/include/jemalloc/internal/quarantine.h +++ b/include/jemalloc/internal/quarantine.h @@ -49,8 +49,8 @@ quarantine_alloc_hook(void) assert(config_fill && opt_quarantine); - tsd = tsd_tryget(); - if (tsd != NULL && tsd_quarantine_get(tsd) == NULL) + tsd = tsd_fetch(); + if (tsd_quarantine_get(tsd) == NULL && tsd_nominal(tsd)) tsd_quarantine_set(tsd, quarantine_init(tsd, LG_MAXOBJS_INIT)); } #endif diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index bc0b41c7..1a70972c 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -142,9 +142,8 @@ tcache_flush(void) cassert(config_tcache); - tsd = tsd_tryget(); - if (tsd != NULL) - tcache_cleanup(tsd); + tsd = tsd_fetch(); + tcache_cleanup(tsd); } JEMALLOC_INLINE bool @@ -155,9 +154,7 @@ tcache_enabled_get(void) cassert(config_tcache); - tsd = tsd_tryget(); - if (tsd == NULL) - return (false); + tsd = tsd_fetch(); tcache_enabled = tsd_tcache_enabled_get(tsd); if (tcache_enabled == tcache_enabled_default) { tcache_enabled = (tcache_enabled_t)opt_tcache; @@ -175,9 +172,7 @@ tcache_enabled_set(bool enabled) cassert(config_tcache); - tsd = tsd_tryget(); - if (tsd == NULL) - return; + tsd = tsd_fetch(); tcache_enabled = (tcache_enabled_t)enabled; tsd_tcache_enabled_set(tsd, tcache_enabled); @@ -195,17 +190,11 @@ tcache_get(tsd_t *tsd, bool create) return (NULL); if (config_lazy_lock && !isthreaded) return (NULL); - /* - * If create is true, the caller has already assured that tsd is - * non-NULL. - */ - if (!create && unlikely(tsd == NULL)) - return (NULL); tcache = tsd_tcache_get(tsd); if (!create) return (tcache); - if (unlikely(tcache == NULL)) { + if (unlikely(tcache == NULL) && tsd_nominal(tsd)) { tcache = tcache_get_hard(tsd); tsd_tcache_set(tsd, tcache); } diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h index 44952eed..25450391 100644 --- a/include/jemalloc/internal/tsd.h +++ b/include/jemalloc/internal/tsd.h @@ -49,16 +49,19 @@ typedef enum { * Note that all of the functions deal in terms of (a_type *) rather than * (a_type) so that it is possible to support non-pointer types (unlike * pthreads TSD). example_tsd_cleanup() is passed an (a_type *) pointer that is - * cast to (void *). This means that the cleanup function needs to cast *and* - * dereference the function argument, e.g.: + * cast to (void *). This means that the cleanup function needs to cast the + * function argument to (a_type *), then dereference the resulting pointer to + * access fields, e.g. * - * bool + * void * example_tsd_cleanup(void *arg) * { - * example_t *example = *(example_t **)arg; + * example_t *example = (example_t *)arg; * + * example->x = 42; * [...] - * return ([want the cleanup function to be called again]); + * if ([want the cleanup function to be called again]) + * example_tsd_set(example); * } * * If example_tsd_set() is called within example_tsd_cleanup(), it will be @@ -468,7 +471,8 @@ void tsd_cleanup(void *arg); #ifndef JEMALLOC_ENABLE_INLINE malloc_tsd_protos(JEMALLOC_ATTR(unused), , tsd_t) -tsd_t *tsd_tryget(void); +tsd_t *tsd_fetch(void); +bool tsd_nominal(tsd_t *tsd); #define O(n, t) \ t *tsd_##n##p_get(tsd_t *tsd); \ t tsd_##n##_get(tsd_t *tsd); \ @@ -481,50 +485,53 @@ MALLOC_TSD malloc_tsd_externs(, tsd_t) malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, , tsd_t, tsd_initializer, tsd_cleanup) -JEMALLOC_INLINE tsd_t * -tsd_tryget(void) +JEMALLOC_ALWAYS_INLINE tsd_t * +tsd_fetch(void) { - tsd_t *tsd; + tsd_t *tsd = tsd_get(); - tsd = tsd_get(); - if (unlikely(tsd == NULL)) - return (NULL); - - if (likely(tsd->state == tsd_state_nominal)) - return (tsd); - else if (tsd->state == tsd_state_uninitialized) { - tsd->state = tsd_state_nominal; - tsd_set(tsd); - return (tsd); - } else if (tsd->state == tsd_state_purgatory) { - tsd->state = tsd_state_reincarnated; - tsd_set(tsd); - return (NULL); - } else { - assert(tsd->state == tsd_state_reincarnated); - return (NULL); + if (unlikely(tsd->state != tsd_state_nominal)) { + if (tsd->state == tsd_state_uninitialized) { + tsd->state = tsd_state_nominal; + /* Trigger cleanup handler registration. */ + tsd_set(tsd); + } else if (tsd->state == tsd_state_purgatory) { + tsd->state = tsd_state_reincarnated; + tsd_set(tsd); + } else + assert(tsd->state == tsd_state_reincarnated); } + + return (tsd); +} + +JEMALLOC_INLINE bool +tsd_nominal(tsd_t *tsd) +{ + + return (tsd->state == tsd_state_nominal); } #define O(n, t) \ -JEMALLOC_INLINE t * \ +JEMALLOC_ALWAYS_INLINE t * \ tsd_##n##p_get(tsd_t *tsd) \ { \ \ return (&tsd->n); \ } \ \ -JEMALLOC_INLINE t \ +JEMALLOC_ALWAYS_INLINE t \ tsd_##n##_get(tsd_t *tsd) \ { \ \ return (*tsd_##n##p_get(tsd)); \ } \ \ -JEMALLOC_INLINE void \ +JEMALLOC_ALWAYS_INLINE void \ tsd_##n##_set(tsd_t *tsd, t n) \ { \ \ + assert(tsd->state == tsd_state_nominal); \ tsd->n = n; \ } MALLOC_TSD diff --git a/src/ctl.c b/src/ctl.c index 8f9faa56..309f1f65 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -571,9 +571,7 @@ ctl_grow(void) ctl_arena_stats_t *astats; arena_t **tarenas; - tsd = tsd_tryget(); - if (tsd == NULL) - return (true); + tsd = tsd_fetch(); /* Allocate extended arena stats and arenas arrays. */ astats = (ctl_arena_stats_t *)imalloc(tsd, (ctl_stats.narenas + 2) * @@ -1132,11 +1130,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ if (!(c)) \ return (ENOENT); \ READONLY(); \ - tsd = tsd_tryget(); \ - if (tsd == NULL) { \ - ret = EAGAIN; \ - goto label_return; \ - } \ + tsd = tsd_fetch(); \ oldval = (m(tsd)); \ READ(oldval, t); \ \ @@ -1239,9 +1233,7 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, tsd_t *tsd; unsigned newind, oldind; - tsd = tsd_tryget(); - if (tsd == NULL) - return (EAGAIN); + tsd = tsd_fetch(); malloc_mutex_lock(&ctl_mtx); newind = oldind = choose_arena(tsd, NULL)->ind; @@ -1359,11 +1351,7 @@ thread_prof_name_ctl(const size_t *mib, size_t miblen, void *oldp, goto label_return; } - tsd = tsd_tryget(); - if (tsd == NULL) { - ret = EAGAIN; - goto label_return; - } + tsd = tsd_fetch(); if ((ret = prof_thread_name_set(tsd, *(const char **)newp)) != 0) @@ -1763,11 +1751,7 @@ prof_reset_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, if (lg_sample >= (sizeof(uint64_t) << 3)) lg_sample = (sizeof(uint64_t) << 3) - 1; - tsd = tsd_tryget(); - if (tsd == NULL) { - ret = EAGAIN; - goto label_return; - } + tsd = tsd_fetch(); prof_reset(tsd, lg_sample); diff --git a/src/jemalloc.c b/src/jemalloc.c index 2e96705d..4a3d968b 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -194,7 +194,8 @@ choose_arena_hard(tsd_t *tsd) malloc_mutex_unlock(&arenas_lock); } - tsd_arena_set(tsd, ret); + if (tsd_nominal(tsd)) + tsd_arena_set(tsd, ret); return (ret); } @@ -908,8 +909,9 @@ JEMALLOC_ALWAYS_INLINE_C void * imalloc_body(size_t size, tsd_t **tsd, size_t *usize) { - if (unlikely(malloc_init()) || unlikely((*tsd = tsd_tryget()) == NULL)) + if (unlikely(malloc_init())) return (NULL); + *tsd = tsd_fetch(); if (config_prof && opt_prof) { *usize = s2u(size); @@ -1000,10 +1002,11 @@ imemalign(void **memptr, size_t alignment, size_t size, size_t min_alignment) assert(min_alignment != 0); - if (unlikely(malloc_init()) || unlikely((tsd = tsd_tryget()) == NULL)) { + if (unlikely(malloc_init())) { result = NULL; goto label_oom; } else { + tsd = tsd_fetch(); if (size == 0) size = 1; @@ -1124,11 +1127,12 @@ je_calloc(size_t num, size_t size) size_t num_size; size_t usize JEMALLOC_CC_SILENCE_INIT(0); - if (unlikely(malloc_init()) || unlikely((tsd = tsd_tryget()) == NULL)) { + if (unlikely(malloc_init())) { num_size = 0; ret = NULL; goto label_return; } + tsd = tsd_fetch(); num_size = num * size; if (unlikely(num_size == 0)) { @@ -1228,7 +1232,7 @@ ifree(tsd_t *tsd, void *ptr, bool try_tcache) prof_free(tsd, ptr, usize); } else if (config_stats || config_valgrind) usize = isalloc(ptr, config_prof); - if (config_stats && likely(tsd != NULL)) + if (config_stats) *tsd_thread_deallocatedp_get(tsd) += usize; if (config_valgrind && unlikely(in_valgrind)) rzsize = p2rz(ptr); @@ -1246,7 +1250,7 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, bool try_tcache) if (config_prof && opt_prof) prof_free(tsd, ptr, usize); - if (config_stats && likely(tsd != NULL)) + if (config_stats) *tsd_thread_deallocatedp_get(tsd) += usize; if (config_valgrind && unlikely(in_valgrind)) rzsize = p2rz(ptr); @@ -1267,7 +1271,7 @@ je_realloc(void *ptr, size_t size) if (ptr != NULL) { /* realloc(ptr, 0) is equivalent to free(ptr). */ UTRACE(ptr, 0, 0); - tsd = tsd_tryget(); + tsd = tsd_fetch(); ifree(tsd, ptr, true); return (NULL); } @@ -1277,27 +1281,23 @@ je_realloc(void *ptr, size_t size) if (likely(ptr != NULL)) { assert(malloc_initialized || IS_INITIALIZER); malloc_thread_init(); + tsd = tsd_fetch(); - if ((tsd = tsd_tryget()) != NULL) { - if ((config_prof && opt_prof) || config_stats || - (config_valgrind && unlikely(in_valgrind))) - old_usize = isalloc(ptr, config_prof); - if (config_valgrind && unlikely(in_valgrind)) { - old_rzsize = config_prof ? p2rz(ptr) : - u2rz(old_usize); - } + if ((config_prof && opt_prof) || config_stats || + (config_valgrind && unlikely(in_valgrind))) + old_usize = isalloc(ptr, config_prof); + if (config_valgrind && unlikely(in_valgrind)) + old_rzsize = config_prof ? p2rz(ptr) : u2rz(old_usize); - if (config_prof && opt_prof) { + if (config_prof && opt_prof) { + usize = s2u(size); + ret = irealloc_prof(tsd, ptr, old_usize, usize); + } else { + if (config_stats || (config_valgrind && + unlikely(in_valgrind))) usize = s2u(size); - ret = irealloc_prof(tsd, ptr, old_usize, usize); - } else { - if (config_stats || (config_valgrind && - unlikely(in_valgrind))) - usize = s2u(size); - ret = iralloc(tsd, ptr, size, 0, false); - } - } else - ret = NULL; + ret = iralloc(tsd, ptr, size, 0, false); + } } else { /* realloc(NULL, size) is equivalent to malloc(size). */ ret = imalloc_body(size, &tsd, &usize); @@ -1313,10 +1313,8 @@ je_realloc(void *ptr, size_t size) } if (config_stats && likely(ret != NULL)) { assert(usize == isalloc(ret, config_prof)); - if (tsd != NULL) { - *tsd_thread_allocatedp_get(tsd) += usize; - *tsd_thread_deallocatedp_get(tsd) += old_usize; - } + *tsd_thread_allocatedp_get(tsd) += usize; + *tsd_thread_deallocatedp_get(tsd) += old_usize; } UTRACE(ptr, size, ret); JEMALLOC_VALGRIND_REALLOC(true, ret, usize, true, ptr, old_usize, @@ -1330,7 +1328,7 @@ je_free(void *ptr) UTRACE(ptr, 0, 0); if (likely(ptr != NULL)) - ifree(tsd_tryget(), ptr, true); + ifree(tsd_fetch(), ptr, true); } /* @@ -1543,8 +1541,9 @@ je_mallocx(size_t size, int flags) assert(size != 0); - if (unlikely(malloc_init()) || unlikely((tsd = tsd_tryget()) == NULL)) + if (unlikely(malloc_init())) goto label_oom; + tsd = tsd_fetch(); if (config_prof && opt_prof) p = imallocx_prof(tsd, size, flags, &usize); @@ -1554,10 +1553,8 @@ je_mallocx(size_t size, int flags) goto label_oom; if (config_stats) { - tsd_t *tsd = tsd_tryget(); assert(usize == isalloc(p, config_prof)); - if (tsd != NULL) - *tsd_thread_allocatedp_get(tsd) += usize; + *tsd_thread_allocatedp_get(tsd) += usize; } UTRACE(0, size, p); JEMALLOC_VALGRIND_MALLOC(true, p, usize, MALLOCX_ZERO_GET(flags)); @@ -1649,9 +1646,7 @@ je_rallocx(void *ptr, size_t size, int flags) assert(size != 0); assert(malloc_initialized || IS_INITIALIZER); malloc_thread_init(); - - if (unlikely((tsd = tsd_tryget()) == NULL)) - goto label_oom; + tsd = tsd_fetch(); if (unlikely((flags & MALLOCX_ARENA_MASK) != 0)) { unsigned arena_ind = MALLOCX_ARENA_GET(flags); @@ -1794,6 +1789,7 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) assert(SIZE_T_MAX - size >= extra); assert(malloc_initialized || IS_INITIALIZER); malloc_thread_init(); + tsd = tsd_fetch(); if (unlikely((flags & MALLOCX_ARENA_MASK) != 0)) { unsigned arena_ind = MALLOCX_ARENA_GET(flags); @@ -1802,10 +1798,6 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) arena = NULL; old_usize = isalloc(ptr, config_prof); - if (unlikely((tsd = tsd_tryget()) == NULL)) { - usize = old_usize; - goto label_not_resized; - } if (config_valgrind && unlikely(in_valgrind)) old_rzsize = u2rz(old_usize); @@ -1865,7 +1857,7 @@ je_dallocx(void *ptr, int flags) try_tcache = true; UTRACE(ptr, 0, 0); - ifree(tsd_tryget(), ptr, try_tcache); + ifree(tsd_fetch(), ptr, try_tcache); } JEMALLOC_ALWAYS_INLINE_C size_t @@ -1901,7 +1893,7 @@ je_sdallocx(void *ptr, size_t size, int flags) try_tcache = true; UTRACE(ptr, 0, 0); - isfree(tsd_tryget(), ptr, usize, try_tcache); + isfree(tsd_fetch(), ptr, usize, try_tcache); } size_t diff --git a/src/prof.c b/src/prof.c index 5b979989..262f0baa 100644 --- a/src/prof.c +++ b/src/prof.c @@ -850,8 +850,7 @@ prof_bt_count(void) tsd_t *tsd; prof_tdata_t *tdata; - if ((tsd = tsd_tryget()) == NULL) - return (0); + tsd = tsd_fetch(); tdata = prof_tdata_get(tsd, false); if (tdata == NULL) return (0); @@ -1475,8 +1474,7 @@ prof_fdump(void) if (!prof_booted) return; - if ((tsd = tsd_tryget()) == NULL) - return; + tsd = tsd_fetch(); if (opt_prof_final && opt_prof_prefix[0] != '\0') { malloc_mutex_lock(&prof_dump_seq_mtx); @@ -1497,8 +1495,7 @@ prof_idump(void) if (!prof_booted) return; - if ((tsd = tsd_tryget()) == NULL) - return; + tsd = tsd_fetch(); tdata = prof_tdata_get(tsd, false); if (tdata == NULL) return; @@ -1526,8 +1523,7 @@ prof_mdump(const char *filename) if (!opt_prof || !prof_booted) return (true); - if ((tsd = tsd_tryget()) == NULL) - return (true); + tsd = tsd_fetch(); if (filename == NULL) { /* No filename specified, so automatically generate one. */ @@ -1553,8 +1549,7 @@ prof_gdump(void) if (!prof_booted) return; - if ((tsd = tsd_tryget()) == NULL) - return; + tsd = tsd_fetch(); tdata = prof_tdata_get(tsd, false); if (tdata == NULL) return; @@ -1677,6 +1672,7 @@ prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata) { assert(prof_tdata_should_destroy(tdata)); + assert(tsd_prof_tdata_get(tsd) != tdata); tdata_tree_remove(&tdatas, tdata); @@ -1704,6 +1700,7 @@ prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata) if (tdata->attached) { tdata->attached = false; destroy_tdata = prof_tdata_should_destroy(tdata); + tsd_prof_tdata_set(tsd, NULL); } else destroy_tdata = false; malloc_mutex_unlock(tdata->lock); @@ -1819,8 +1816,7 @@ prof_thread_name_get(void) tsd_t *tsd; prof_tdata_t *tdata; - if ((tsd = tsd_tryget()) == NULL) - return (""); + tsd = tsd_fetch(); tdata = prof_tdata_get(tsd, true); if (tdata == NULL) return (""); @@ -1886,8 +1882,7 @@ prof_thread_active_get(void) tsd_t *tsd; prof_tdata_t *tdata; - if ((tsd = tsd_tryget()) == NULL) - return (false); + tsd = tsd_fetch(); tdata = prof_tdata_get(tsd, true); if (tdata == NULL) return (false); @@ -1900,8 +1895,7 @@ prof_thread_active_set(bool active) tsd_t *tsd; prof_tdata_t *tdata; - if ((tsd = tsd_tryget()) == NULL) - return (true); + tsd = tsd_fetch(); tdata = prof_tdata_get(tsd, true); if (tdata == NULL) return (true); @@ -1988,8 +1982,7 @@ prof_boot2(void) if (malloc_mutex_init(&prof_thread_active_init_mtx)) return (true); - if ((tsd = tsd_tryget()) == NULL) - return (true); + tsd = tsd_fetch(); if (ckh_new(tsd, &bt2gctx, PROF_CKH_MINITEMS, prof_bt_hash, prof_bt_keycomp)) return (true); diff --git a/src/tcache.c b/src/tcache.c index 6f3408cd..07167b6d 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -263,7 +263,8 @@ tcache_get_hard(tsd_t *tsd) { if (!tcache_enabled_get()) { - tcache_enabled_set(false); /* Memoize. */ + if (tsd_nominal(tsd)) + tcache_enabled_set(false); /* Memoize. */ return (NULL); } return (tcache_create(choose_arena(tsd, NULL))); diff --git a/src/tsd.c b/src/tsd.c index 27a70ee8..cbc64e44 100644 --- a/src/tsd.c +++ b/src/tsd.c @@ -74,11 +74,6 @@ tsd_cleanup(void *arg) { tsd_t *tsd = (tsd_t *)arg; - if (tsd == NULL) { - /* OOM during re-initialization. */ - return; - } - switch (tsd->state) { case tsd_state_nominal: #define O(n, t) \ diff --git a/test/unit/ckh.c b/test/unit/ckh.c index 03b4f716..c2126487 100644 --- a/test/unit/ckh.c +++ b/test/unit/ckh.c @@ -5,8 +5,7 @@ TEST_BEGIN(test_new_delete) tsd_t *tsd; ckh_t ckh; - tsd = tsd_tryget(); - assert_ptr_not_null(tsd, "Unexpected tsd failure"); + tsd = tsd_fetch(); assert_false(ckh_new(tsd, &ckh, 2, ckh_string_hash, ckh_string_keycomp), "Unexpected ckh_new() error"); @@ -31,8 +30,7 @@ TEST_BEGIN(test_count_insert_search_remove) const char *missing = "A string not in the hash table."; size_t i; - tsd = tsd_tryget(); - assert_ptr_not_null(tsd, "Unexpected tsd failure"); + tsd = tsd_fetch(); assert_false(ckh_new(tsd, &ckh, 2, ckh_string_hash, ckh_string_keycomp), "Unexpected ckh_new() error"); @@ -116,8 +114,7 @@ TEST_BEGIN(test_insert_iter_remove) void *q, *r; size_t i; - tsd = tsd_tryget(); - assert_ptr_not_null(tsd, "Unexpected tsd failure"); + tsd = tsd_fetch(); assert_false(ckh_new(tsd, &ckh, 2, ckh_pointer_hash, ckh_pointer_keycomp), "Unexpected ckh_new() error"); diff --git a/test/unit/tsd.c b/test/unit/tsd.c index 391a7807..eb1c5976 100644 --- a/test/unit/tsd.c +++ b/test/unit/tsd.c @@ -6,17 +6,46 @@ typedef unsigned int data_t; static bool data_cleanup_executed; +malloc_tsd_protos(, data_, data_t) + void data_cleanup(void *arg) { data_t *data = (data_t *)arg; - assert_x_eq(*data, THREAD_DATA, - "Argument passed into cleanup function should match tsd value"); + if (!data_cleanup_executed) { + assert_x_eq(*data, THREAD_DATA, + "Argument passed into cleanup function should match tsd " + "value"); + } data_cleanup_executed = true; + + /* + * Allocate during cleanup for two rounds, in order to assure that + * jemalloc's internal tsd reinitialization happens. + */ + switch (*data) { + case THREAD_DATA: + *data = 1; + data_tsd_set(data); + break; + case 1: + *data = 2; + data_tsd_set(data); + break; + case 2: + return; + default: + not_reached(); + } + + { + void *p = mallocx(1, 0); + assert_ptr_not_null(p, "Unexpeced mallocx() failure"); + dallocx(p, 0); + } } -malloc_tsd_protos(, data_, data_t) malloc_tsd_externs(data_, data_t) #define DATA_INIT 0x12345678 malloc_tsd_data(, data_, data_t, DATA_INIT) From 29146e9d15250be0b05cb92550a61a6511e58f79 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 4 Oct 2014 11:23:13 -0700 Subject: [PATCH 0548/3378] Don't force TLS on behalf of heap profiling. Revert 6716aa83526b3f866d73a033970cc920bc61c13f (Force use of TLS if heap profiling is enabled.). No existing tests indicate that this is necessary, nor does code inspection uncover any potential issues. Most likely the original commit covered up a bug related to tsd-internal allocation that has since been fixed. --- configure.ac | 5 ----- 1 file changed, 5 deletions(-) diff --git a/configure.ac b/configure.ac index 1ee2ed8e..e4afe889 100644 --- a/configure.ac +++ b/configure.ac @@ -793,11 +793,6 @@ fi AC_MSG_CHECKING([configured backtracing method]) AC_MSG_RESULT([$backtrace_method]) if test "x$enable_prof" = "x1" ; then - if test "x${force_tls}" = "x0" ; then - AC_MSG_ERROR([Heap profiling requires TLS]); - fi - force_tls="1" - if test "x$abi" != "xpecoff"; then dnl Heap profiling uses the log(3) function. LIBS="$LIBS -lm" From 34e85b4182d5ae029b558aae3da25fff7c3efe12 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 4 Oct 2014 11:26:05 -0700 Subject: [PATCH 0549/3378] Make prof-related inline functions always-inline. --- include/jemalloc/internal/prof.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index 0ec7c18a..c8014717 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -308,7 +308,7 @@ void prof_free(tsd_t *tsd, const void *ptr, size_t usize); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_)) -JEMALLOC_INLINE bool +JEMALLOC_ALWAYS_INLINE bool prof_active_get_unlocked(void) { @@ -321,7 +321,7 @@ prof_active_get_unlocked(void) return (prof_active); } -JEMALLOC_INLINE prof_tdata_t * +JEMALLOC_ALWAYS_INLINE prof_tdata_t * prof_tdata_get(tsd_t *tsd, bool create) { prof_tdata_t *tdata; @@ -345,7 +345,7 @@ prof_tdata_get(tsd_t *tsd, bool create) return (tdata); } -JEMALLOC_INLINE prof_tctx_t * +JEMALLOC_ALWAYS_INLINE prof_tctx_t * prof_tctx_get(const void *ptr) { prof_tctx_t *ret; @@ -364,7 +364,7 @@ prof_tctx_get(const void *ptr) return (ret); } -JEMALLOC_INLINE void +JEMALLOC_ALWAYS_INLINE void prof_tctx_set(const void *ptr, prof_tctx_t *tctx) { arena_chunk_t *chunk; @@ -380,7 +380,7 @@ prof_tctx_set(const void *ptr, prof_tctx_t *tctx) huge_prof_tctx_set(ptr, tctx); } -JEMALLOC_INLINE bool +JEMALLOC_ALWAYS_INLINE bool prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update, prof_tdata_t **tdata_out) { @@ -410,7 +410,7 @@ prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update, } } -JEMALLOC_INLINE prof_tctx_t * +JEMALLOC_ALWAYS_INLINE prof_tctx_t * prof_alloc_prep(tsd_t *tsd, size_t usize, bool update) { prof_tctx_t *ret; @@ -431,7 +431,7 @@ prof_alloc_prep(tsd_t *tsd, size_t usize, bool update) return (ret); } -JEMALLOC_INLINE void +JEMALLOC_ALWAYS_INLINE void prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx) { @@ -445,7 +445,7 @@ prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx) prof_tctx_set(ptr, (prof_tctx_t *)(uintptr_t)1U); } -JEMALLOC_INLINE void +JEMALLOC_ALWAYS_INLINE void prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx, bool updated, size_t old_usize, prof_tctx_t *old_tctx) { @@ -475,7 +475,7 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx, prof_tctx_set(ptr, (prof_tctx_t *)(uintptr_t)1U); } -JEMALLOC_INLINE void +JEMALLOC_ALWAYS_INLINE void prof_free(tsd_t *tsd, const void *ptr, size_t usize) { prof_tctx_t *tctx = prof_tctx_get(ptr); From 0800afd03f6f4bc2d722bffedb3398d8ac762c5f Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 4 Oct 2014 14:59:17 -0700 Subject: [PATCH 0550/3378] Silence a compiler warning. --- src/jemalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/jemalloc.c b/src/jemalloc.c index 4a3d968b..3490ecdf 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1262,7 +1262,7 @@ void * je_realloc(void *ptr, size_t size) { void *ret; - tsd_t *tsd; + tsd_t *tsd JEMALLOC_CC_SILENCE_INIT(NULL); size_t usize JEMALLOC_CC_SILENCE_INIT(0); size_t old_usize = 0; UNUSED size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0); From 47395a1b4c0793f676b89a763daaed1cbb510529 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 4 Oct 2014 14:59:38 -0700 Subject: [PATCH 0551/3378] Avoid purging in microbench when lazy-lock is enabled. --- test/stress/microbench.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/test/stress/microbench.c b/test/stress/microbench.c index a8267c39..980eca41 100644 --- a/test/stress/microbench.c +++ b/test/stress/microbench.c @@ -19,6 +19,13 @@ compare_funcs(uint64_t nwarmup, uint64_t niter, const char *name_a, { timedelta_t timer_a, timer_b; char ratio_buf[6]; + void *p; + + p = mallocx(1, 0); + if (p == NULL) { + test_fail("Unexpected mallocx() failure"); + return; + } time_func(&timer_a, nwarmup, niter, func_a); time_func(&timer_b, nwarmup, niter, func_b); @@ -28,6 +35,8 @@ compare_funcs(uint64_t nwarmup, uint64_t niter, const char *name_a, "%s=%"PRIu64"us, ratio=1:%s\n", niter, name_a, timer_usec(&timer_a), name_b, timer_usec(&timer_b), ratio_buf); + + dallocx(p, 0); } static void From 16854ebeb77c9403ebd1b85fdd46ee80bb3f3e9d Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 4 Oct 2014 15:00:51 -0700 Subject: [PATCH 0552/3378] Don't disable tcache for lazy-lock. Don't disable tcache when lazy-lock is configured. There already exists a mechanism to disable tcache, but doing so automatically due to lazy-lock causes surprising performance behavior. --- include/jemalloc/internal/tcache.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index 1a70972c..1b1d8d98 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -188,8 +188,6 @@ tcache_get(tsd_t *tsd, bool create) if (!config_tcache) return (NULL); - if (config_lazy_lock && !isthreaded) - return (NULL); tcache = tsd_tcache_get(tsd); if (!create) From f04a0bef99e67e11b687a661d6f04e1d7e3bde1f Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 4 Oct 2014 15:03:49 -0700 Subject: [PATCH 0553/3378] Fix prof regressions. Fix prof regressions related to tdata (main per thread profiling data structure) destruction: - Deadlock. The fix for this was intended to be part of 20c31deaae38ed9aa4fe169ed65e0c45cd542955 (Test prof.reset mallctl and fix numerous discovered bugs.) but the fix was left incomplete. - Destruction race. Detaching tdata just prior to destruction without holding the tdatas lock made it possible for another thread to destroy the tdata out from under the thread that was on its way to doing so. --- src/prof.c | 39 +++++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/src/prof.c b/src/prof.c index 262f0baa..a6cea92f 100644 --- a/src/prof.c +++ b/src/prof.c @@ -116,8 +116,10 @@ static bool prof_booted = false; static bool prof_tctx_should_destroy(prof_tctx_t *tctx); static void prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx); -static bool prof_tdata_should_destroy(prof_tdata_t *tdata); -static void prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata); +static bool prof_tdata_should_destroy(prof_tdata_t *tdata, + bool even_if_attached); +static void prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata, + bool even_if_attached); static char *prof_thread_name_alloc(tsd_t *tsd, const char *thread_name); /******************************************************************************/ @@ -616,7 +618,7 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) assert(tctx->cnts.accumbytes == 0); ckh_remove(tsd, &tdata->bt2tctx, &gctx->bt, NULL, NULL); - destroy_tdata = prof_tdata_should_destroy(tdata); + destroy_tdata = prof_tdata_should_destroy(tdata, false); malloc_mutex_unlock(tdata->lock); malloc_mutex_lock(gctx->lock); @@ -644,7 +646,7 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) prof_gctx_try_destroy(tsd, gctx, tdata); if (destroy_tdata) - prof_tdata_destroy(tsd, tdata); + prof_tdata_destroy(tsd, tdata, false); idalloc(tsd, tctx); } @@ -1656,10 +1658,10 @@ prof_tdata_init(tsd_t *tsd) /* tdata->lock must be held. */ static bool -prof_tdata_should_destroy(prof_tdata_t *tdata) +prof_tdata_should_destroy(prof_tdata_t *tdata, bool even_if_attached) { - if (tdata->attached) + if (tdata->attached && !even_if_attached) return (false); if (ckh_count(&tdata->bt2tctx) != 0) return (false); @@ -1668,10 +1670,11 @@ prof_tdata_should_destroy(prof_tdata_t *tdata) /* tdatas_mtx must be held. */ static void -prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata) +prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata, + bool even_if_attached) { - assert(prof_tdata_should_destroy(tdata)); + assert(prof_tdata_should_destroy(tdata, even_if_attached)); assert(tsd_prof_tdata_get(tsd) != tdata); tdata_tree_remove(&tdatas, tdata); @@ -1683,11 +1686,11 @@ prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata) } static void -prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata) +prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata, bool even_if_attached) { malloc_mutex_lock(&tdatas_mtx); - prof_tdata_destroy_locked(tsd, tdata); + prof_tdata_destroy_locked(tsd, tdata, even_if_attached); malloc_mutex_unlock(&tdatas_mtx); } @@ -1698,14 +1701,19 @@ prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata) malloc_mutex_lock(tdata->lock); if (tdata->attached) { - tdata->attached = false; - destroy_tdata = prof_tdata_should_destroy(tdata); + destroy_tdata = prof_tdata_should_destroy(tdata, true); + /* + * Only detach if !destroy_tdata, because detaching would allow + * another thread to win the race to destroy tdata. + */ + if (!destroy_tdata) + tdata->attached = false; tsd_prof_tdata_set(tsd, NULL); } else destroy_tdata = false; malloc_mutex_unlock(tdata->lock); if (destroy_tdata) - prof_tdata_destroy(tsd, tdata); + prof_tdata_destroy(tsd, tdata, true); } prof_tdata_t * @@ -1731,7 +1739,7 @@ prof_tdata_expire(prof_tdata_t *tdata) if (!tdata->expired) { tdata->expired = true; destroy_tdata = tdata->attached ? false : - prof_tdata_should_destroy(tdata); + prof_tdata_should_destroy(tdata, false); } else destroy_tdata = false; malloc_mutex_unlock(tdata->lock); @@ -1764,8 +1772,7 @@ prof_reset(tsd_t *tsd, size_t lg_sample) prof_tdata_reset_iter, NULL); if (to_destroy != NULL) { next = tdata_tree_next(&tdatas, to_destroy); - tdata_tree_remove(&tdatas, to_destroy); - prof_tdata_destroy(tsd, to_destroy); + prof_tdata_destroy_locked(tsd, to_destroy, false); } else next = NULL; } while (next != NULL); From e9a3fa2e091a48df272e6a7d5d3e92b1a12c489b Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sun, 5 Oct 2014 12:05:37 -0700 Subject: [PATCH 0554/3378] Add missing header includes in jemalloc/jemalloc.h . Add stdlib.h, stdbool.h, and stdint.h to jemalloc/jemalloc.h so that applications only have to #include . This resolves #132. --- doc/jemalloc.xml.in | 3 +-- include/jemalloc/jemalloc_macros.h.in | 3 +++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 6abb50bc..fcbb4722 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -57,8 +57,7 @@ SYNOPSIS - #include <stdlib.h> -#include <jemalloc/jemalloc.h> + #include <jemalloc/jemalloc.h> Standard API diff --git a/include/jemalloc/jemalloc_macros.h.in b/include/jemalloc/jemalloc_macros.h.in index 1530f9ca..99f12611 100644 --- a/include/jemalloc/jemalloc_macros.h.in +++ b/include/jemalloc/jemalloc_macros.h.in @@ -1,3 +1,6 @@ +#include +#include +#include #include #include From f11a6776c78a09059f8418b718c996a065b33fca Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sun, 5 Oct 2014 13:05:10 -0700 Subject: [PATCH 0555/3378] Fix OOM-related regression in arena_tcache_fill_small(). Fix an OOM-related regression in arena_tcache_fill_small() that caused cache corruption that would almost certainly expose the application to undefined behavior, usually in the form of an allocation request returning an already-allocated region, or somewhat less likely, a freed region that had already been returned to the arena, thus making it available to the arena for any purpose. This regression was introduced by 9c43c13a35220c10d97a886616899189daceb359 (Reverse tcache fill order.), and was present in all releases from 2.2.0 through 3.6.0. This resolves #98. --- src/arena.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/arena.c b/src/arena.c index 79fea728..c223946a 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1330,8 +1330,19 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind, ptr = arena_run_reg_alloc(run, &arena_bin_info[binind]); else ptr = arena_bin_malloc_hard(arena, bin); - if (ptr == NULL) + if (ptr == NULL) { + /* + * OOM. tbin->avail isn't yet filled down to its first + * element, so the successful allocations (if any) must + * be moved to the base of tbin->avail before bailing + * out. + */ + if (i > 0) { + memmove(tbin->avail, &tbin->avail[nfill - i], + i * sizeof(void *)); + } break; + } if (config_fill && unlikely(opt_junk)) { arena_alloc_junk_small(ptr, &arena_bin_info[binind], true); From a95018ee819abf897562d9d1f3bc31d4dd725a8d Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Sat, 4 Oct 2014 01:39:32 -0400 Subject: [PATCH 0556/3378] Attempt to expand huge allocations in-place. This adds support for expanding huge allocations in-place by requesting memory at a specific address from the chunk allocator. It's currently only implemented for the chunk recycling path, although in theory it could also be done by optimistically allocating new chunks. On Linux, it could attempt an in-place mremap. However, that won't work in practice since the heap is grown downwards and memory is not unmapped (in a normal build, at least). Repeated vector reallocation micro-benchmark: #include #include int main(void) { for (size_t i = 0; i < 100; i++) { void *ptr = NULL; size_t old_size = 0; for (size_t size = 4; size < (1 << 30); size *= 2) { ptr = realloc(ptr, size); if (!ptr) return 1; memset(ptr + old_size, 0xff, size - old_size); old_size = size; } free(ptr); } } The glibc allocator fails to do any in-place reallocations on this benchmark once it passes the M_MMAP_THRESHOLD (default 128k) but it elides the cost of copies via mremap, which is currently not something that jemalloc can use. With this improvement, jemalloc still fails to do any in-place huge reallocations for the first outer loop, but then succeeds 100% of the time for the remaining 99 iterations. The time spent doing allocations and copies drops down to under 5%, with nearly all of it spent doing purging + faulting (when huge pages are disabled) and the array memset. An improved mremap API (MREMAP_RETAIN - #138) would be far more general but this is a portable optimization and would still be useful on Linux for xallocx. Numbers with transparent huge pages enabled: glibc (copies elided via MREMAP_MAYMOVE): 8.471s jemalloc: 17.816s jemalloc + no-op madvise: 13.236s jemalloc + this commit: 6.787s jemalloc + this commit + no-op madvise: 6.144s Numbers with transparent huge pages disabled: glibc (copies elided via MREMAP_MAYMOVE): 15.403s jemalloc: 39.456s jemalloc + no-op madvise: 12.768s jemalloc + this commit: 15.534s jemalloc + this commit + no-op madvise: 6.354s Closes #137 --- doc/jemalloc.xml.in | 7 +- include/jemalloc/internal/arena.h | 4 +- include/jemalloc/internal/chunk.h | 8 +- include/jemalloc/internal/huge.h | 2 +- .../jemalloc/internal/jemalloc_internal.h.in | 2 +- include/jemalloc/jemalloc_typedefs.h.in | 2 +- src/arena.c | 8 +- src/chunk.c | 47 +++++++----- src/huge.c | 74 ++++++++++++++++++- test/integration/chunk.c | 5 +- 10 files changed, 118 insertions(+), 41 deletions(-) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index fcbb4722..f9d464ce 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -1351,6 +1351,7 @@ malloc_conf = "xmalloc:true";]]> function that knows how to deallocate the chunks. typedef void *(chunk_alloc_t) + void *chunk size_t size size_t alignment bool *zero @@ -1367,8 +1368,10 @@ malloc_conf = "xmalloc:true";]]> size parameter is always a multiple of the chunk size. The alignment parameter is always a power of two at least as large as the chunk size. Zeroing is mandatory if - *zero is true upon function - entry. + *zero is true upon function entry. If + chunk is not NULL, the + returned pointer must be chunk or + NULL if it could not be allocated. Note that replacing the default chunk allocation function makes the arena's chunk_dalloc; malloc_mutex_unlock(&arena->lock); chunk = (arena_chunk_t *)chunk_alloc_arena(chunk_alloc, chunk_dalloc, - arena->ind, size, alignment, zero); + arena->ind, NULL, size, alignment, zero); malloc_mutex_lock(&arena->lock); if (config_stats && chunk != NULL) arena->stats.mapped += chunksize; @@ -459,8 +459,8 @@ arena_chunk_alloc_internal(arena_t *arena, size_t size, size_t alignment, } void * -arena_chunk_alloc_huge(arena_t *arena, size_t size, size_t alignment, - bool *zero) +arena_chunk_alloc_huge(arena_t *arena, void *new_addr, size_t size, + size_t alignment, bool *zero) { void *ret; chunk_alloc_t *chunk_alloc; @@ -480,7 +480,7 @@ arena_chunk_alloc_huge(arena_t *arena, size_t size, size_t alignment, malloc_mutex_unlock(&arena->lock); ret = chunk_alloc_arena(chunk_alloc, chunk_dalloc, arena->ind, - size, alignment, zero); + new_addr, size, alignment, zero); if (config_stats) { if (ret != NULL) stats_cactive_add(size); diff --git a/src/chunk.c b/src/chunk.c index cde8606e..32b8b3a6 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -42,8 +42,8 @@ static void chunk_dalloc_core(void *chunk, size_t size); /******************************************************************************/ static void * -chunk_recycle(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, size_t size, - size_t alignment, bool base, bool *zero) +chunk_recycle(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, + void *new_addr, size_t size, size_t alignment, bool base, bool *zero) { void *ret; extent_node_t *node; @@ -65,11 +65,11 @@ chunk_recycle(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, size_t size, /* Beware size_t wrap-around. */ if (alloc_size < size) return (NULL); - key.addr = NULL; + key.addr = new_addr; key.size = alloc_size; malloc_mutex_lock(&chunks_mtx); node = extent_tree_szad_nsearch(chunks_szad, &key); - if (node == NULL) { + if (node == NULL || (new_addr && node->addr != new_addr)) { malloc_mutex_unlock(&chunks_mtx); return (NULL); } @@ -142,8 +142,8 @@ chunk_recycle(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, size_t size, * them if they are returned. */ static void * -chunk_alloc_core(size_t size, size_t alignment, bool base, bool *zero, - dss_prec_t dss_prec) +chunk_alloc_core(void *new_addr, size_t size, size_t alignment, bool base, + bool *zero, dss_prec_t dss_prec) { void *ret; @@ -154,24 +154,30 @@ chunk_alloc_core(size_t size, size_t alignment, bool base, bool *zero, /* "primary" dss. */ if (have_dss && dss_prec == dss_prec_primary) { - if ((ret = chunk_recycle(&chunks_szad_dss, &chunks_ad_dss, size, - alignment, base, zero)) != NULL) + if ((ret = chunk_recycle(&chunks_szad_dss, &chunks_ad_dss, + new_addr, size, alignment, base, zero)) != NULL) return (ret); - if ((ret = chunk_alloc_dss(size, alignment, zero)) != NULL) + /* requesting an address only implemented for recycle */ + if (new_addr == NULL + && (ret = chunk_alloc_dss(size, alignment, zero)) != NULL) return (ret); } /* mmap. */ - if ((ret = chunk_recycle(&chunks_szad_mmap, &chunks_ad_mmap, size, - alignment, base, zero)) != NULL) + if ((ret = chunk_recycle(&chunks_szad_mmap, &chunks_ad_mmap, new_addr, + size, alignment, base, zero)) != NULL) return (ret); - if ((ret = chunk_alloc_mmap(size, alignment, zero)) != NULL) + /* requesting an address only implemented for recycle */ + if (new_addr == NULL && + (ret = chunk_alloc_mmap(size, alignment, zero)) != NULL) return (ret); /* "secondary" dss. */ if (have_dss && dss_prec == dss_prec_secondary) { - if ((ret = chunk_recycle(&chunks_szad_dss, &chunks_ad_dss, size, - alignment, base, zero)) != NULL) + if ((ret = chunk_recycle(&chunks_szad_dss, &chunks_ad_dss, + new_addr, size, alignment, base, zero)) != NULL) return (ret); - if ((ret = chunk_alloc_dss(size, alignment, zero)) != NULL) + /* requesting an address only implemented for recycle */ + if (new_addr == NULL && + (ret = chunk_alloc_dss(size, alignment, zero)) != NULL) return (ret); } @@ -219,7 +225,7 @@ chunk_alloc_base(size_t size) bool zero; zero = false; - ret = chunk_alloc_core(size, chunksize, true, &zero, + ret = chunk_alloc_core(NULL, size, chunksize, true, &zero, chunk_dss_prec_get()); if (ret == NULL) return (NULL); @@ -232,11 +238,12 @@ chunk_alloc_base(size_t size) void * chunk_alloc_arena(chunk_alloc_t *chunk_alloc, chunk_dalloc_t *chunk_dalloc, - unsigned arena_ind, size_t size, size_t alignment, bool *zero) + unsigned arena_ind, void *new_addr, size_t size, size_t alignment, + bool *zero) { void *ret; - ret = chunk_alloc(size, alignment, zero, arena_ind); + ret = chunk_alloc(new_addr, size, alignment, zero, arena_ind); if (ret != NULL && chunk_register(ret, size, false)) { chunk_dalloc(ret, size, arena_ind); ret = NULL; @@ -247,11 +254,11 @@ chunk_alloc_arena(chunk_alloc_t *chunk_alloc, chunk_dalloc_t *chunk_dalloc, /* Default arena chunk allocation routine in the absence of user override. */ void * -chunk_alloc_default(size_t size, size_t alignment, bool *zero, +chunk_alloc_default(void *new_addr, size_t size, size_t alignment, bool *zero, unsigned arena_ind) { - return (chunk_alloc_core(size, alignment, false, zero, + return (chunk_alloc_core(new_addr, size, alignment, false, zero, arenas[arena_ind]->dss_prec)); } diff --git a/src/huge.c b/src/huge.c index 2f059b4d..6bdc0767 100644 --- a/src/huge.c +++ b/src/huge.c @@ -47,7 +47,7 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment, */ is_zeroed = zero; arena = choose_arena(tsd, arena); - ret = arena_chunk_alloc_huge(arena, csize, alignment, &is_zeroed); + ret = arena_chunk_alloc_huge(arena, NULL, csize, alignment, &is_zeroed); if (ret == NULL) { base_node_dalloc(node); return (NULL); @@ -95,8 +95,66 @@ huge_dalloc_junk(void *ptr, size_t usize) huge_dalloc_junk_t *huge_dalloc_junk = JEMALLOC_N(huge_dalloc_junk_impl); #endif +static bool +huge_ralloc_no_move_expand(void *ptr, size_t oldsize, size_t size, bool zero) { + size_t csize; + void *expand_addr; + size_t expand_size; + extent_node_t *node, key; + arena_t *arena; + bool is_zeroed; + void *ret; + + csize = CHUNK_CEILING(size); + if (csize == 0) { + /* size is large enough to cause size_t wrap-around. */ + return (true); + } + + expand_addr = ptr + oldsize; + expand_size = csize - oldsize; + + malloc_mutex_lock(&huge_mtx); + + key.addr = ptr; + node = extent_tree_ad_search(&huge, &key); + assert(node != NULL); + assert(node->addr == ptr); + + /* Find the current arena. */ + arena = node->arena; + + malloc_mutex_unlock(&huge_mtx); + + /* + * Copy zero into is_zeroed and pass the copy to chunk_alloc(), so that + * it is possible to make correct junk/zero fill decisions below. + */ + is_zeroed = zero; + ret = arena_chunk_alloc_huge(arena, expand_addr, expand_size, chunksize, + &is_zeroed); + if (ret == NULL) + return (true); + + assert(ret == expand_addr); + + malloc_mutex_lock(&huge_mtx); + /* Update the size of the huge allocation. */ + node->size = csize; + malloc_mutex_unlock(&huge_mtx); + + if (config_fill && !zero) { + if (unlikely(opt_junk)) + memset(expand_addr, 0xa5, expand_size); + else if (unlikely(opt_zero) && !is_zeroed) + memset(expand_addr, 0, expand_size); + } + return (false); +} + bool -huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra) +huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, + bool zero) { /* Both allocations must be huge to avoid a move. */ @@ -145,7 +203,15 @@ huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra) return (false); } - return (true); + /* Attempt to expand the allocation in-place. */ + if (huge_ralloc_no_move_expand(ptr, oldsize, size + extra, zero)) { + if (extra == 0) + return (true); + + /* Try again, this time without extra. */ + return (huge_ralloc_no_move_expand(ptr, oldsize, size, zero)); + } + return (false); } void * @@ -156,7 +222,7 @@ huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size, size_t copysize; /* Try to avoid moving the allocation. */ - if (!huge_ralloc_no_move(ptr, oldsize, size, extra)) + if (!huge_ralloc_no_move(ptr, oldsize, size, extra, zero)) return (ptr); /* diff --git a/test/integration/chunk.c b/test/integration/chunk.c index 28537098..89938504 100644 --- a/test/integration/chunk.c +++ b/test/integration/chunk.c @@ -11,10 +11,11 @@ chunk_dalloc(void *chunk, size_t size, unsigned arena_ind) } void * -chunk_alloc(size_t size, size_t alignment, bool *zero, unsigned arena_ind) +chunk_alloc(void *new_addr, size_t size, size_t alignment, bool *zero, + unsigned arena_ind) { - return (old_alloc(size, alignment, zero, arena_ind)); + return (old_alloc(new_addr, size, alignment, zero, arena_ind)); } TEST_BEGIN(test_chunk) From 3c3b3b1a94705c8019b973fb679dd99bd19305af Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sun, 5 Oct 2014 14:48:44 -0700 Subject: [PATCH 0557/3378] Fix a docbook element nesting nit. According to the docbook documentation for , its parent must be ; fix accordingly. Nonetheless, the man page processor fails badly when this construct is embedded in a (which is documented to be legal), although the html processor does fine. --- doc/jemalloc.xml.in | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index f9d464ce..1f692f78 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -1349,14 +1349,14 @@ malloc_conf = "xmalloc:true";]]> also be set via arena.<i>.chunk.dalloc to a companion function that knows how to deallocate the chunks. - + typedef void *(chunk_alloc_t) void *chunk size_t size size_t alignment bool *zero unsigned arena_ind - + A chunk allocation function conforms to the chunk_alloc_t type and upon success returns a pointer to size bytes of memory on behalf of arena arena_ind such @@ -1397,12 +1397,12 @@ malloc_conf = "xmalloc:true";]]> arena creation), but the automatically created arenas may have already created chunks prior to the application having an opportunity to take over chunk allocation. - + typedef void (chunk_dalloc_t) void *chunk size_t size unsigned arena_ind - + A chunk deallocation function conforms to the chunk_dalloc_t type and deallocates a chunk of given size on From 155bfa7da18cab0d21d87aa2dce4554166836f5d Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sun, 5 Oct 2014 17:54:10 -0700 Subject: [PATCH 0558/3378] Normalize size classes. Normalize size classes to use the same number of size classes per size doubling (currently hard coded to 4), across the intire range of size classes. Small size classes already used this spacing, but in order to support this change, additional small size classes now fill [4 KiB .. 16 KiB). Large size classes range from [16 KiB .. 4 MiB). Huge size classes now support non-multiples of the chunk size in order to fill (4 MiB .. 16 MiB). --- include/jemalloc/internal/arena.h | 231 +++----------- include/jemalloc/internal/chunk.h | 3 - include/jemalloc/internal/huge.h | 2 +- .../jemalloc/internal/jemalloc_internal.h.in | 299 +++++++++++++----- include/jemalloc/internal/private_symbols.txt | 22 +- include/jemalloc/internal/size_classes.sh | 15 +- include/jemalloc/internal/stats.h | 7 +- include/jemalloc/internal/tcache.h | 52 +-- src/arena.c | 223 ++++++------- src/chunk.c | 3 - src/ctl.c | 2 +- src/huge.c | 113 +++++-- src/jemalloc.c | 34 +- src/tcache.c | 8 +- test/unit/junk.c | 17 +- test/unit/mallctl.c | 2 +- 16 files changed, 558 insertions(+), 475 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 1f985723..681b5802 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -1,6 +1,8 @@ /******************************************************************************/ #ifdef JEMALLOC_H_TYPES +#define LARGE_MINCLASS (ZU(1) << LG_LARGE_MINCLASS) + /* Maximum number of regions in one run. */ #define LG_RUN_MAXREGS (LG_PAGE - LG_TINY_MIN) #define RUN_MAXREGS (1U << LG_RUN_MAXREGS) @@ -96,11 +98,15 @@ struct arena_chunk_map_bits_s { * xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx * -------- -------- ----++++ ++++D-LA * - * Large (sampled, size <= PAGE): + * Large (sampled, size <= LARGE_MINCLASS): * ssssssss ssssssss ssssnnnn nnnnD-LA + * xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx + * -------- -------- ----++++ ++++D-LA * - * Large (not sampled, size == PAGE): + * Large (not sampled, size == LARGE_MINCLASS): * ssssssss ssssssss ssss++++ ++++D-LA + * xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx + * -------- -------- ----++++ ++++D-LA */ size_t bits; #define CHUNK_MAP_BININD_SHIFT 4 @@ -325,30 +331,21 @@ struct arena_s { #ifdef JEMALLOC_H_EXTERNS extern ssize_t opt_lg_dirty_mult; -/* - * small_size2bin_tab is a compact lookup table that rounds request sizes up to - * size classes. In order to reduce cache footprint, the table is compressed, - * and all accesses are via small_size2bin(). - */ -extern uint8_t const small_size2bin_tab[]; -/* - * small_bin2size_tab duplicates information in arena_bin_info, but in a const - * array, for which it is easier for the compiler to optimize repeated - * dereferences. - */ -extern uint32_t const small_bin2size_tab[NBINS]; extern arena_bin_info_t arena_bin_info[NBINS]; -/* Number of large size classes. */ -#define nlclasses (chunk_npages - map_bias) +extern size_t map_bias; /* Number of arena chunk header pages. */ +extern size_t map_misc_offset; +extern size_t arena_maxrun; /* Max run size for arenas. */ +extern size_t arena_maxclass; /* Max size class for arenas. */ +extern size_t nlclasses; /* Number of large size classes. */ void *arena_chunk_alloc_huge(arena_t *arena, void *new_addr, size_t size, size_t alignment, bool *zero); void arena_chunk_dalloc_huge(arena_t *arena, void *chunk, size_t size); void arena_purge_all(arena_t *arena); void arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, - size_t binind, uint64_t prof_accumbytes); + index_t binind, uint64_t prof_accumbytes); void arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info, bool zero); #ifdef JEMALLOC_JET @@ -403,15 +400,6 @@ void arena_postfork_child(arena_t *arena); #ifdef JEMALLOC_H_INLINES #ifndef JEMALLOC_ENABLE_INLINE -size_t small_size2bin_compute(size_t size); -size_t small_size2bin_lookup(size_t size); -size_t small_size2bin(size_t size); -size_t small_bin2size_compute(size_t binind); -size_t small_bin2size_lookup(size_t binind); -size_t small_bin2size(size_t binind); -size_t small_s2u_compute(size_t size); -size_t small_s2u_lookup(size_t size); -size_t small_s2u(size_t size); arena_chunk_map_bits_t *arena_bitselm_get(arena_chunk_t *chunk, size_t pageind); arena_chunk_map_misc_t *arena_miscelm_get(arena_chunk_t *chunk, @@ -426,7 +414,7 @@ size_t arena_mapbits_unallocated_size_get(arena_chunk_t *chunk, size_t pageind); size_t arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind); size_t arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind); -size_t arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind); +index_t arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind); size_t arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind); size_t arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind); size_t arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind); @@ -439,16 +427,16 @@ void arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind, void arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind, size_t size, size_t flags); void arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind, - size_t binind); + index_t binind); void arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, - size_t runind, size_t binind, size_t flags); + size_t runind, index_t binind, size_t flags); void arena_mapbits_unzeroed_set(arena_chunk_t *chunk, size_t pageind, size_t unzeroed); bool arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes); bool arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes); bool arena_prof_accum(arena_t *arena, uint64_t accumbytes); -size_t arena_ptr_small_binind_get(const void *ptr, size_t mapbits); -size_t arena_bin_index(arena_t *arena, arena_bin_t *bin); +index_t arena_ptr_small_binind_get(const void *ptr, size_t mapbits); +index_t arena_bin_index(arena_t *arena, arena_bin_t *bin); unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr); prof_tctx_t *arena_prof_tctx_get(const void *ptr); @@ -464,148 +452,6 @@ void arena_sdalloc(tsd_t *tsd, arena_chunk_t *chunk, void *ptr, size_t size, #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) # ifdef JEMALLOC_ARENA_INLINE_A -JEMALLOC_INLINE size_t -small_size2bin_compute(size_t size) -{ -#if (NTBINS != 0) - if (size <= (ZU(1) << LG_TINY_MAXCLASS)) { - size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1; - size_t lg_ceil = lg_floor(pow2_ceil(size)); - return (lg_ceil < lg_tmin ? 0 : lg_ceil - lg_tmin); - } else -#endif - { - size_t x = lg_floor((size<<1)-1); - size_t shift = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM) ? 0 : - x - (LG_SIZE_CLASS_GROUP + LG_QUANTUM); - size_t grp = shift << LG_SIZE_CLASS_GROUP; - - size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1) - ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1; - - size_t delta_inverse_mask = ZI(-1) << lg_delta; - size_t mod = ((((size-1) & delta_inverse_mask) >> lg_delta)) & - ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1); - - size_t bin = NTBINS + grp + mod; - return (bin); - } -} - -JEMALLOC_ALWAYS_INLINE size_t -small_size2bin_lookup(size_t size) -{ - - assert(size <= LOOKUP_MAXCLASS); - { - size_t ret = ((size_t)(small_size2bin_tab[(size-1) >> - LG_TINY_MIN])); - assert(ret == small_size2bin_compute(size)); - return (ret); - } -} - -JEMALLOC_ALWAYS_INLINE size_t -small_size2bin(size_t size) -{ - - assert(size > 0); - if (likely(size <= LOOKUP_MAXCLASS)) - return (small_size2bin_lookup(size)); - else - return (small_size2bin_compute(size)); -} - -JEMALLOC_INLINE size_t -small_bin2size_compute(size_t binind) -{ -#if (NTBINS > 0) - if (binind < NTBINS) - return (ZU(1) << (LG_TINY_MAXCLASS - NTBINS + 1 + binind)); - else -#endif - { - size_t reduced_binind = binind - NTBINS; - size_t grp = reduced_binind >> LG_SIZE_CLASS_GROUP; - size_t mod = reduced_binind & ((ZU(1) << LG_SIZE_CLASS_GROUP) - - 1); - - size_t grp_size_mask = ~((!!grp)-1); - size_t grp_size = ((ZU(1) << (LG_QUANTUM + - (LG_SIZE_CLASS_GROUP-1))) << grp) & grp_size_mask; - - size_t shift = (grp == 0) ? 1 : grp; - size_t lg_delta = shift + (LG_QUANTUM-1); - size_t mod_size = (mod+1) << lg_delta; - - size_t usize = grp_size + mod_size; - return (usize); - } -} - -JEMALLOC_ALWAYS_INLINE size_t -small_bin2size_lookup(size_t binind) -{ - - assert(binind < NBINS); - { - size_t ret = (size_t)small_bin2size_tab[binind]; - assert(ret == small_bin2size_compute(binind)); - return (ret); - } -} - -JEMALLOC_ALWAYS_INLINE size_t -small_bin2size(size_t binind) -{ - - return (small_bin2size_lookup(binind)); -} - -JEMALLOC_ALWAYS_INLINE size_t -small_s2u_compute(size_t size) -{ -#if (NTBINS > 0) - if (size <= (ZU(1) << LG_TINY_MAXCLASS)) { - size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1; - size_t lg_ceil = lg_floor(pow2_ceil(size)); - return (lg_ceil < lg_tmin ? (ZU(1) << lg_tmin) : - (ZU(1) << lg_ceil)); - } else -#endif - { - size_t x = lg_floor((size<<1)-1); - size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1) - ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1; - size_t delta = ZU(1) << lg_delta; - size_t delta_mask = delta - 1; - size_t usize = (size + delta_mask) & ~delta_mask; - return (usize); - } -} - -JEMALLOC_ALWAYS_INLINE size_t -small_s2u_lookup(size_t size) -{ - size_t ret = small_bin2size(small_size2bin(size)); - - assert(ret == small_s2u_compute(size)); - return (ret); -} - -JEMALLOC_ALWAYS_INLINE size_t -small_s2u(size_t size) -{ - - assert(size > 0); - if (likely(size <= LOOKUP_MAXCLASS)) - return (small_s2u_lookup(size)); - else - return (small_s2u_compute(size)); -} -# endif /* JEMALLOC_ARENA_INLINE_A */ - -# ifdef JEMALLOC_ARENA_INLINE_B JEMALLOC_ALWAYS_INLINE arena_chunk_map_bits_t * arena_bitselm_get(arena_chunk_t *chunk, size_t pageind) { @@ -714,11 +560,11 @@ arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind) return (mapbits >> LG_PAGE); } -JEMALLOC_ALWAYS_INLINE size_t +JEMALLOC_ALWAYS_INLINE index_t arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind) { size_t mapbits; - size_t binind; + index_t binind; mapbits = arena_mapbits_get(chunk, pageind); binind = (mapbits & CHUNK_MAP_BININD_MASK) >> CHUNK_MAP_BININD_SHIFT; @@ -810,20 +656,20 @@ arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind, size_t size, JEMALLOC_ALWAYS_INLINE void arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind, - size_t binind) + index_t binind) { size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind); size_t mapbits = arena_mapbitsp_read(mapbitsp); assert(binind <= BININD_INVALID); - assert(arena_mapbits_large_size_get(chunk, pageind) == PAGE); + assert(arena_mapbits_large_size_get(chunk, pageind) == LARGE_MINCLASS); arena_mapbitsp_write(mapbitsp, (mapbits & ~CHUNK_MAP_BININD_MASK) | (binind << CHUNK_MAP_BININD_SHIFT)); } JEMALLOC_ALWAYS_INLINE void arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, size_t runind, - size_t binind, size_t flags) + index_t binind, size_t flags) { size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind); size_t mapbits = arena_mapbitsp_read(mapbitsp); @@ -893,10 +739,10 @@ arena_prof_accum(arena_t *arena, uint64_t accumbytes) } } -JEMALLOC_ALWAYS_INLINE size_t +JEMALLOC_ALWAYS_INLINE index_t arena_ptr_small_binind_get(const void *ptr, size_t mapbits) { - size_t binind; + index_t binind; binind = (mapbits & CHUNK_MAP_BININD_MASK) >> CHUNK_MAP_BININD_SHIFT; @@ -908,7 +754,7 @@ arena_ptr_small_binind_get(const void *ptr, size_t mapbits) size_t rpages_ind; arena_run_t *run; arena_bin_t *bin; - size_t actual_binind; + index_t actual_binind; arena_bin_info_t *bin_info; arena_chunk_map_misc_t *miscelm; void *rpages; @@ -938,13 +784,13 @@ arena_ptr_small_binind_get(const void *ptr, size_t mapbits) return (binind); } -# endif /* JEMALLOC_ARENA_INLINE_B */ +# endif /* JEMALLOC_ARENA_INLINE_A */ -# ifdef JEMALLOC_ARENA_INLINE_C -JEMALLOC_INLINE size_t +# ifdef JEMALLOC_ARENA_INLINE_B +JEMALLOC_INLINE index_t arena_bin_index(arena_t *arena, arena_bin_t *bin) { - size_t binind = bin - arena->bins; + index_t binind = bin - arena->bins; assert(binind < NBINS); return (binind); } @@ -1102,7 +948,8 @@ arena_salloc(const void *ptr, bool demote) { size_t ret; arena_chunk_t *chunk; - size_t pageind, binind; + size_t pageind; + index_t binind; assert(ptr != NULL); assert(CHUNK_ADDR2BASE(ptr) != ptr); @@ -1122,10 +969,6 @@ arena_salloc(const void *ptr, bool demote) ret = arena_mapbits_large_size_get(chunk, pageind); assert(ret != 0); assert(pageind + (ret>>LG_PAGE) <= chunk_npages); - assert(ret == PAGE || arena_mapbits_large_size_get(chunk, - pageind+(ret>>LG_PAGE)-1) == 0); - assert(binind == arena_mapbits_binind_get(chunk, - pageind+(ret>>LG_PAGE)-1)); assert(arena_mapbits_dirty_get(chunk, pageind) == arena_mapbits_dirty_get(chunk, pageind+(ret>>LG_PAGE)-1)); } else { @@ -1133,7 +976,7 @@ arena_salloc(const void *ptr, bool demote) assert(arena_mapbits_large_get(chunk, pageind) != 0 || arena_ptr_small_binind_get(ptr, arena_mapbits_get(chunk, pageind)) == binind); - ret = small_bin2size(binind); + ret = index2size(binind); } return (ret); @@ -1155,7 +998,7 @@ arena_dalloc(tsd_t *tsd, arena_chunk_t *chunk, void *ptr, bool try_tcache) /* Small allocation. */ if (likely(try_tcache) && likely((tcache = tcache_get(tsd, false)) != NULL)) { - size_t binind = arena_ptr_small_binind_get(ptr, + index_t binind = arena_ptr_small_binind_get(ptr, mapbits); tcache_dalloc_small(tcache, ptr, binind); } else @@ -1186,7 +1029,7 @@ arena_sdalloc(tsd_t *tsd, arena_chunk_t *chunk, void *ptr, size_t size, /* Small allocation. */ if (likely(try_tcache) && likely((tcache = tcache_get(tsd, false)) != NULL)) { - size_t binind = small_size2bin(size); + index_t binind = size2index(size); tcache_dalloc_small(tcache, ptr, binind); } else { size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> @@ -1203,7 +1046,7 @@ arena_sdalloc(tsd_t *tsd, arena_chunk_t *chunk, void *ptr, size_t size, arena_dalloc_large(chunk->arena, chunk, ptr); } } -# endif /* JEMALLOC_ARENA_INLINE_C */ +# endif /* JEMALLOC_ARENA_INLINE_B */ #endif #endif /* JEMALLOC_H_INLINES */ diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h index 2e68a020..764b7aca 100644 --- a/include/jemalloc/internal/chunk.h +++ b/include/jemalloc/internal/chunk.h @@ -40,9 +40,6 @@ extern rtree_t *chunks_rtree; extern size_t chunksize; extern size_t chunksize_mask; /* (chunksize - 1). */ extern size_t chunk_npages; -extern size_t map_bias; /* Number of arena chunk header pages. */ -extern size_t map_misc_offset; -extern size_t arena_maxclass; /* Max size class for arenas. */ void *chunk_alloc_base(size_t size); void *chunk_alloc_arena(chunk_alloc_t *chunk_alloc, diff --git a/include/jemalloc/internal/huge.h b/include/jemalloc/internal/huge.h index 00d8c09d..939993f2 100644 --- a/include/jemalloc/internal/huge.h +++ b/include/jemalloc/internal/huge.h @@ -10,7 +10,7 @@ #ifdef JEMALLOC_H_EXTERNS void *huge_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero); -void *huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment, +void *huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, bool zero); bool huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, bool zero); diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index a169221b..8f0beb9e 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -165,6 +165,9 @@ static const bool config_ivsalloc = #include "jemalloc/internal/jemalloc_internal_macros.h" +/* Size class index type. */ +typedef unsigned index_t; + #define MALLOCX_ARENA_MASK ((int)~0xff) #define MALLOCX_LG_ALIGN_MASK ((int)0x3f) /* Use MALLOCX_ALIGN_GET() if alignment may not be specified in flags. */ @@ -397,6 +400,18 @@ extern arena_t **arenas; extern unsigned narenas_total; extern unsigned narenas_auto; /* Read-only after initialization. */ +/* + * index2size_tab encodes the same information as could be computed (at + * unacceptable cost in some code paths) by index2size_compute(). + */ +extern size_t const index2size_tab[NSIZES]; +/* + * size2index_tab is a compact lookup table that rounds request sizes up to + * size classes. In order to reduce cache footprint, the table is compressed, + * and all accesses are via size2index(). + */ +extern uint8_t const size2index_tab[]; + arena_t *arenas_extend(unsigned ind); arena_t *choose_arena_hard(tsd_t *tsd); void thread_allocated_cleanup(tsd_t *tsd); @@ -449,15 +464,15 @@ void jemalloc_postfork_child(void); #include "jemalloc/internal/chunk.h" #include "jemalloc/internal/huge.h" -/* - * Include arena.h the first time in order to provide inline functions for this - * header's inlines. - */ -#define JEMALLOC_ARENA_INLINE_A -#include "jemalloc/internal/arena.h" -#undef JEMALLOC_ARENA_INLINE_A - #ifndef JEMALLOC_ENABLE_INLINE +index_t size2index_compute(size_t size); +index_t size2index_lookup(size_t size); +index_t size2index(size_t size); +size_t index2size_compute(index_t index); +size_t index2size_lookup(index_t index); +size_t index2size(index_t index); +size_t s2u_compute(size_t size); +size_t s2u_lookup(size_t size); size_t s2u(size_t size); size_t sa2u(size_t size, size_t alignment); unsigned narenas_total_get(void); @@ -465,6 +480,135 @@ arena_t *choose_arena(tsd_t *tsd, arena_t *arena); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) +JEMALLOC_INLINE index_t +size2index_compute(size_t size) +{ + +#if (NTBINS != 0) + if (size <= (ZU(1) << LG_TINY_MAXCLASS)) { + size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1; + size_t lg_ceil = lg_floor(pow2_ceil(size)); + return (lg_ceil < lg_tmin ? 0 : lg_ceil - lg_tmin); + } else +#endif + { + size_t x = lg_floor((size<<1)-1); + size_t shift = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM) ? 0 : + x - (LG_SIZE_CLASS_GROUP + LG_QUANTUM); + size_t grp = shift << LG_SIZE_CLASS_GROUP; + + size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1) + ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1; + + size_t delta_inverse_mask = ZI(-1) << lg_delta; + size_t mod = ((((size-1) & delta_inverse_mask) >> lg_delta)) & + ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1); + + size_t index = NTBINS + grp + mod; + return (index); + } +} + +JEMALLOC_ALWAYS_INLINE index_t +size2index_lookup(size_t size) +{ + + assert(size <= LOOKUP_MAXCLASS); + { + size_t ret = ((size_t)(size2index_tab[(size-1) >> + LG_TINY_MIN])); + assert(ret == size2index_compute(size)); + return (ret); + } +} + +JEMALLOC_ALWAYS_INLINE index_t +size2index(size_t size) +{ + + assert(size > 0); + if (likely(size <= LOOKUP_MAXCLASS)) + return (size2index_lookup(size)); + else + return (size2index_compute(size)); +} + +JEMALLOC_INLINE size_t +index2size_compute(index_t index) +{ + +#if (NTBINS > 0) + if (index < NTBINS) + return (ZU(1) << (LG_TINY_MAXCLASS - NTBINS + 1 + index)); + else +#endif + { + size_t reduced_index = index - NTBINS; + size_t grp = reduced_index >> LG_SIZE_CLASS_GROUP; + size_t mod = reduced_index & ((ZU(1) << LG_SIZE_CLASS_GROUP) - + 1); + + size_t grp_size_mask = ~((!!grp)-1); + size_t grp_size = ((ZU(1) << (LG_QUANTUM + + (LG_SIZE_CLASS_GROUP-1))) << grp) & grp_size_mask; + + size_t shift = (grp == 0) ? 1 : grp; + size_t lg_delta = shift + (LG_QUANTUM-1); + size_t mod_size = (mod+1) << lg_delta; + + size_t usize = grp_size + mod_size; + return (usize); + } +} + +JEMALLOC_ALWAYS_INLINE size_t +index2size_lookup(index_t index) +{ + size_t ret = (size_t)index2size_tab[index]; + assert(ret == index2size_compute(index)); + return (ret); +} + +JEMALLOC_ALWAYS_INLINE size_t +index2size(index_t index) +{ + + assert(index < NSIZES); + return (index2size_lookup(index)); +} + +JEMALLOC_ALWAYS_INLINE size_t +s2u_compute(size_t size) +{ + +#if (NTBINS > 0) + if (size <= (ZU(1) << LG_TINY_MAXCLASS)) { + size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1; + size_t lg_ceil = lg_floor(pow2_ceil(size)); + return (lg_ceil < lg_tmin ? (ZU(1) << lg_tmin) : + (ZU(1) << lg_ceil)); + } else +#endif + { + size_t x = lg_floor((size<<1)-1); + size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1) + ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1; + size_t delta = ZU(1) << lg_delta; + size_t delta_mask = delta - 1; + size_t usize = (size + delta_mask) & ~delta_mask; + return (usize); + } +} + +JEMALLOC_ALWAYS_INLINE size_t +s2u_lookup(size_t size) +{ + size_t ret = index2size_lookup(size2index_lookup(size)); + + assert(ret == s2u_compute(size)); + return (ret); +} + /* * Compute usable size that would result from allocating an object with the * specified size. @@ -473,11 +617,11 @@ JEMALLOC_ALWAYS_INLINE size_t s2u(size_t size) { - if (size <= SMALL_MAXCLASS) - return (small_s2u(size)); - if (size <= arena_maxclass) - return (PAGE_CEILING(size)); - return (CHUNK_CEILING(size)); + assert(size > 0); + if (likely(size <= LOOKUP_MAXCLASS)) + return (s2u_lookup(size)); + else + return (s2u_compute(size)); } /* @@ -491,71 +635,78 @@ sa2u(size_t size, size_t alignment) assert(alignment != 0 && ((alignment - 1) & alignment) == 0); - /* - * Round size up to the nearest multiple of alignment. - * - * This done, we can take advantage of the fact that for each small - * size class, every object is aligned at the smallest power of two - * that is non-zero in the base two representation of the size. For - * example: - * - * Size | Base 2 | Minimum alignment - * -----+----------+------------------ - * 96 | 1100000 | 32 - * 144 | 10100000 | 32 - * 192 | 11000000 | 64 - */ - usize = ALIGNMENT_CEILING(size, alignment); - /* - * (usize < size) protects against the combination of maximal - * alignment and size greater than maximal alignment. - */ - if (usize < size) { - /* size_t overflow. */ - return (0); + /* Try for a small size class. */ + if (size <= SMALL_MAXCLASS && alignment < PAGE) { + /* + * Round size up to the nearest multiple of alignment. + * + * This done, we can take advantage of the fact that for each + * small size class, every object is aligned at the smallest + * power of two that is non-zero in the base two representation + * of the size. For example: + * + * Size | Base 2 | Minimum alignment + * -----+----------+------------------ + * 96 | 1100000 | 32 + * 144 | 10100000 | 32 + * 192 | 11000000 | 64 + */ + usize = s2u(ALIGNMENT_CEILING(size, alignment)); + if (usize < LARGE_MINCLASS) + return (usize); } - if (usize <= arena_maxclass && alignment <= PAGE) { - if (usize <= SMALL_MAXCLASS) - return (small_s2u(usize)); - return (PAGE_CEILING(usize)); - } else { - size_t run_size; - + /* Try for a large size class. */ + if (size <= arena_maxclass && alignment < chunksize) { /* * We can't achieve subpage alignment, so round up alignment - * permanently; it makes later calculations simpler. + * to the minimum that can actually be supported. */ alignment = PAGE_CEILING(alignment); - usize = PAGE_CEILING(size); - /* - * (usize < size) protects against very large sizes within - * PAGE of SIZE_T_MAX. - * - * (usize + alignment < usize) protects against the - * combination of maximal alignment and usize large enough - * to cause overflow. This is similar to the first overflow - * check above, but it needs to be repeated due to the new - * usize value, which may now be *equal* to maximal - * alignment, whereas before we only detected overflow if the - * original size was *greater* than maximal alignment. - */ - if (usize < size || usize + alignment < usize) { - /* size_t overflow. */ - return (0); - } + + /* Make sure result is a large size class. */ + usize = (size <= LARGE_MINCLASS) ? LARGE_MINCLASS : s2u(size); /* * Calculate the size of the over-size run that arena_palloc() * would need to allocate in order to guarantee the alignment. - * If the run wouldn't fit within a chunk, round up to a huge - * allocation size. */ - run_size = usize + alignment - PAGE; - if (run_size <= arena_maxclass) - return (PAGE_CEILING(usize)); - return (CHUNK_CEILING(usize)); + if (usize + alignment - PAGE <= arena_maxrun) + return (usize); } + + /* Huge size class. Beware of size_t overflow. */ + + /* + * We can't achieve subchunk alignment, so round up alignment to the + * minimum that can actually be supported. + */ + alignment = CHUNK_CEILING(alignment); + if (alignment == 0) { + /* size_t overflow. */ + return (0); + } + + /* Make sure result is a huge size class. */ + if (size <= chunksize) + usize = chunksize; + else { + usize = s2u(size); + if (usize < size) { + /* size_t overflow. */ + return (0); + } + } + + /* + * Calculate the multi-chunk mapping that huge_palloc() would need in + * order to guarantee the alignment. + */ + if (usize + alignment - PAGE < usize) { + /* size_t overflow. */ + return (0); + } + return (usize); } JEMALLOC_INLINE unsigned @@ -591,16 +742,16 @@ choose_arena(tsd_t *tsd, arena_t *arena) #include "jemalloc/internal/bitmap.h" #include "jemalloc/internal/rtree.h" /* - * Include arena.h the second and third times in order to resolve circular - * dependencies with tcache.h. + * Include portions of arena.h interleaved with tcache.h in order to resolve + * circular dependencies. */ +#define JEMALLOC_ARENA_INLINE_A +#include "jemalloc/internal/arena.h" +#undef JEMALLOC_ARENA_INLINE_A +#include "jemalloc/internal/tcache.h" #define JEMALLOC_ARENA_INLINE_B #include "jemalloc/internal/arena.h" #undef JEMALLOC_ARENA_INLINE_B -#include "jemalloc/internal/tcache.h" -#define JEMALLOC_ARENA_INLINE_C -#include "jemalloc/internal/arena.h" -#undef JEMALLOC_ARENA_INLINE_C #include "jemalloc/internal/hash.h" #include "jemalloc/internal/quarantine.h" @@ -678,7 +829,7 @@ ipalloct(tsd_t *tsd, size_t usize, size_t alignment, bool zero, bool try_tcache, assert(usize != 0); assert(usize == sa2u(usize, alignment)); - if (usize <= arena_maxclass && alignment <= PAGE) + if (usize <= SMALL_MAXCLASS && alignment < PAGE) ret = arena_malloc(tsd, arena, usize, zero, try_tcache); else { if (usize <= arena_maxclass) { @@ -742,7 +893,7 @@ u2rz(size_t usize) size_t ret; if (usize <= SMALL_MAXCLASS) { - size_t binind = small_size2bin(usize); + index_t binind = size2index(usize); ret = arena_bin_info[binind].redzone_size; } else ret = 0; diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index 4ea9a953..1a7fde4b 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -41,6 +41,7 @@ arena_mapbitsp_get arena_mapbitsp_read arena_mapbitsp_write arena_maxclass +arena_maxrun arena_miscelm_get arena_miscelm_to_pageind arena_miscelm_to_rpages @@ -216,6 +217,10 @@ idalloct imalloc imalloct in_valgrind +index2size +index2size_compute +index2size_lookup +index2size_tab ipalloc ipalloct iqalloc @@ -338,19 +343,14 @@ rtree_postfork_parent rtree_prefork rtree_set s2u +s2u_compute +s2u_lookup sa2u set_errno -small_bin2size -small_bin2size_compute -small_bin2size_lookup -small_bin2size_tab -small_s2u -small_s2u_compute -small_s2u_lookup -small_size2bin -small_size2bin_compute -small_size2bin_lookup -small_size2bin_tab +size2index +size2index_compute +size2index_lookup +size2index_tab stats_cactive stats_cactive_add stats_cactive_get diff --git a/include/jemalloc/internal/size_classes.sh b/include/jemalloc/internal/size_classes.sh index 0cfac72d..897570cc 100755 --- a/include/jemalloc/internal/size_classes.sh +++ b/include/jemalloc/internal/size_classes.sh @@ -61,7 +61,7 @@ size_class() { rem="yes" fi - if [ ${lg_size} -lt ${lg_p} ] ; then + if [ ${lg_size} -lt $((${lg_p} + ${lg_g})) ] ; then bin="yes" else bin="no" @@ -159,6 +159,7 @@ size_classes() { nbins=$((${index} + 1)) # Final written value is correct: small_maxclass="((((size_t)1) << ${lg_grp}) + (((size_t)${ndelta}) << ${lg_delta}))" + lg_large_minclass=$((${lg_grp} + 1)) fi index=$((${index} + 1)) ndelta=$((${ndelta} + 1)) @@ -167,14 +168,17 @@ size_classes() { lg_delta=$((${lg_delta} + 1)) done echo + nsizes=${index} # Defined upon completion: # - ntbins # - nlbins # - nbins + # - nsizes # - lg_tiny_maxclass # - lookup_maxclass # - small_maxclass + # - lg_large_minclass } cat <tbins[binind]; - size = small_bin2size(binind); + usize = index2size(binind); ret = tcache_alloc_easy(tbin); if (unlikely(ret == NULL)) { ret = tcache_alloc_small_hard(tcache, tbin, binind); if (ret == NULL) return (NULL); } - assert(tcache_salloc(ret) == size); + assert(tcache_salloc(ret) == usize); if (likely(!zero)) { if (config_fill) { @@ -254,20 +255,20 @@ tcache_alloc_small(tcache_t *tcache, size_t size, bool zero) arena_alloc_junk_small(ret, &arena_bin_info[binind], false); } else if (unlikely(opt_zero)) - memset(ret, 0, size); + memset(ret, 0, usize); } } else { if (config_fill && unlikely(opt_junk)) { arena_alloc_junk_small(ret, &arena_bin_info[binind], true); } - memset(ret, 0, size); + memset(ret, 0, usize); } if (config_stats) tbin->tstats.nrequests++; if (config_prof) - tcache->prof_accumbytes += size; + tcache->prof_accumbytes += usize; tcache_event(tcache); return (ret); } @@ -276,12 +277,13 @@ JEMALLOC_ALWAYS_INLINE void * tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) { void *ret; - size_t binind; + index_t binind; + size_t usize; tcache_bin_t *tbin; - size = PAGE_CEILING(size); - assert(size <= tcache_maxclass); - binind = NBINS + (size >> LG_PAGE) - 1; + binind = size2index(size); + usize = index2size(binind); + assert(usize <= tcache_maxclass); assert(binind < nhbins); tbin = &tcache->tbins[binind]; ret = tcache_alloc_easy(tbin); @@ -290,11 +292,11 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) * Only allocate one large object at a time, because it's quite * expensive to create one and not use it. */ - ret = arena_malloc_large(tcache->arena, size, zero); + ret = arena_malloc_large(tcache->arena, usize, zero); if (ret == NULL) return (NULL); } else { - if (config_prof && size == PAGE) { + if (config_prof && usize == LARGE_MINCLASS) { arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret); size_t pageind = (((uintptr_t)ret - (uintptr_t)chunk) >> @@ -305,17 +307,17 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) if (likely(!zero)) { if (config_fill) { if (unlikely(opt_junk)) - memset(ret, 0xa5, size); + memset(ret, 0xa5, usize); else if (unlikely(opt_zero)) - memset(ret, 0, size); + memset(ret, 0, usize); } } else - memset(ret, 0, size); + memset(ret, 0, usize); if (config_stats) tbin->tstats.nrequests++; if (config_prof) - tcache->prof_accumbytes += size; + tcache->prof_accumbytes += usize; } tcache_event(tcache); @@ -323,7 +325,7 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) } JEMALLOC_ALWAYS_INLINE void -tcache_dalloc_small(tcache_t *tcache, void *ptr, size_t binind) +tcache_dalloc_small(tcache_t *tcache, void *ptr, index_t binind) { tcache_bin_t *tbin; tcache_bin_info_t *tbin_info; @@ -349,7 +351,7 @@ tcache_dalloc_small(tcache_t *tcache, void *ptr, size_t binind) JEMALLOC_ALWAYS_INLINE void tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size) { - size_t binind; + index_t binind; tcache_bin_t *tbin; tcache_bin_info_t *tbin_info; @@ -357,7 +359,7 @@ tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size) assert(tcache_salloc(ptr) > SMALL_MAXCLASS); assert(tcache_salloc(ptr) <= tcache_maxclass); - binind = NBINS + (size >> LG_PAGE) - 1; + binind = size2index(size); if (config_fill && unlikely(opt_junk)) memset(ptr, 0x5a, size); diff --git a/src/arena.c b/src/arena.c index b7300a92..49a30572 100644 --- a/src/arena.c +++ b/src/arena.c @@ -7,42 +7,11 @@ ssize_t opt_lg_dirty_mult = LG_DIRTY_MULT_DEFAULT; arena_bin_info_t arena_bin_info[NBINS]; -JEMALLOC_ALIGNED(CACHELINE) -const uint32_t small_bin2size_tab[NBINS] = { -#define B2S_bin_yes(size) \ - size, -#define B2S_bin_no(size) -#define SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup) \ - B2S_bin_##bin((ZU(1)<> LG_PAGE; size_t mapbits = arena_mapbits_get(chunk, pageind); - size_t binind = arena_ptr_small_binind_get(ptr, mapbits); + index_t binind = arena_ptr_small_binind_get(ptr, mapbits); arena_bin_info_t *bin_info = &arena_bin_info[binind]; unsigned regind = arena_run_regind(run, bin_info, ptr); @@ -375,7 +344,7 @@ arena_run_init_large(arena_t *arena, arena_run_t *run, size_t size, bool zero) static void arena_run_split_small(arena_t *arena, arena_run_t *run, size_t size, - size_t binind) + index_t binind) { arena_chunk_t *chunk; arena_chunk_map_misc_t *miscelm; @@ -429,9 +398,9 @@ arena_chunk_init_spare(arena_t *arena) assert(arena_mapbits_allocated_get(chunk, map_bias) == 0); assert(arena_mapbits_allocated_get(chunk, chunk_npages-1) == 0); assert(arena_mapbits_unallocated_size_get(chunk, map_bias) == - arena_maxclass); + arena_maxrun); assert(arena_mapbits_unallocated_size_get(chunk, chunk_npages-1) == - arena_maxclass); + arena_maxrun); assert(arena_mapbits_dirty_get(chunk, map_bias) == arena_mapbits_dirty_get(chunk, chunk_npages-1)); @@ -518,8 +487,7 @@ arena_chunk_init_hard(arena_t *arena) * the pages as zeroed iff chunk_alloc() returned a zeroed chunk. */ unzeroed = zero ? 0 : CHUNK_MAP_UNZEROED; - arena_mapbits_unallocated_set(chunk, map_bias, arena_maxclass, - unzeroed); + arena_mapbits_unallocated_set(chunk, map_bias, arena_maxrun, unzeroed); /* * There is no need to initialize the internal page map entries unless * the chunk is not zeroed. @@ -544,7 +512,7 @@ arena_chunk_init_hard(arena_t *arena) } } } - arena_mapbits_unallocated_set(chunk, chunk_npages-1, arena_maxclass, + arena_mapbits_unallocated_set(chunk, chunk_npages-1, arena_maxrun, unzeroed); return (chunk); @@ -607,9 +575,9 @@ arena_chunk_dalloc(arena_t *arena, arena_chunk_t *chunk) assert(arena_mapbits_allocated_get(chunk, map_bias) == 0); assert(arena_mapbits_allocated_get(chunk, chunk_npages-1) == 0); assert(arena_mapbits_unallocated_size_get(chunk, map_bias) == - arena_maxclass); + arena_maxrun); assert(arena_mapbits_unallocated_size_get(chunk, chunk_npages-1) == - arena_maxclass); + arena_maxrun); assert(arena_mapbits_dirty_get(chunk, map_bias) == arena_mapbits_dirty_get(chunk, chunk_npages-1)); @@ -682,7 +650,7 @@ arena_run_alloc_large(arena_t *arena, size_t size, bool zero) } static arena_run_t * -arena_run_alloc_small_helper(arena_t *arena, size_t size, size_t binind) +arena_run_alloc_small_helper(arena_t *arena, size_t size, index_t binind) { arena_run_t *run; arena_chunk_map_misc_t *miscelm; @@ -700,7 +668,7 @@ arena_run_alloc_small_helper(arena_t *arena, size_t size, size_t binind) } static arena_run_t * -arena_run_alloc_small(arena_t *arena, size_t size, size_t binind) +arena_run_alloc_small(arena_t *arena, size_t size, index_t binind) { arena_chunk_t *chunk; arena_run_t *run; @@ -1034,7 +1002,7 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned) arena_mapbits_large_size_get(chunk, run_ind+(size>>LG_PAGE)-1) == 0); } else { - size_t binind = arena_bin_index(arena, run->bin); + index_t binind = arena_bin_index(arena, run->bin); arena_bin_info_t *bin_info = &arena_bin_info[binind]; size = bin_info->run_size; } @@ -1079,9 +1047,9 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned) arena_dirty_insert(arena, chunk, run_ind, run_pages); /* Deallocate chunk if it is now completely unused. */ - if (size == arena_maxclass) { + if (size == arena_maxrun) { assert(run_ind == map_bias); - assert(run_pages == (arena_maxclass >> LG_PAGE)); + assert(run_pages == (arena_maxrun >> LG_PAGE)); arena_chunk_dalloc(arena, chunk); } @@ -1212,7 +1180,7 @@ static arena_run_t * arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin) { arena_run_t *run; - size_t binind; + index_t binind; arena_bin_info_t *bin_info; /* Look for a usable run. */ @@ -1264,7 +1232,7 @@ static void * arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin) { void *ret; - size_t binind; + index_t binind; arena_bin_info_t *bin_info; arena_run_t *run; @@ -1310,7 +1278,7 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin) } void -arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind, +arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, index_t binind, uint64_t prof_accumbytes) { unsigned i, nfill; @@ -1450,14 +1418,14 @@ arena_dalloc_junk_small_t *arena_dalloc_junk_small = void arena_quarantine_junk_small(void *ptr, size_t usize) { - size_t binind; + index_t binind; arena_bin_info_t *bin_info; cassert(config_fill); assert(opt_junk); assert(opt_quarantine); assert(usize <= SMALL_MAXCLASS); - binind = small_size2bin(usize); + binind = size2index(usize); bin_info = &arena_bin_info[binind]; arena_redzones_validate(ptr, bin_info, true); } @@ -1468,12 +1436,12 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero) void *ret; arena_bin_t *bin; arena_run_t *run; - size_t binind; + index_t binind; - binind = small_size2bin(size); + binind = size2index(size); assert(binind < NBINS); bin = &arena->bins[binind]; - size = small_bin2size(binind); + size = index2size(binind); malloc_mutex_lock(&bin->lock); if ((run = bin->runcur) != NULL && run->nfree > 0) @@ -1520,14 +1488,15 @@ void * arena_malloc_large(arena_t *arena, size_t size, bool zero) { void *ret; + size_t usize; arena_run_t *run; arena_chunk_map_misc_t *miscelm; UNUSED bool idump; /* Large allocation. */ - size = PAGE_CEILING(size); + usize = s2u(size); malloc_mutex_lock(&arena->lock); - run = arena_run_alloc_large(arena, size, zero); + run = arena_run_alloc_large(arena, usize, zero); if (run == NULL) { malloc_mutex_unlock(&arena->lock); return (NULL); @@ -1535,15 +1504,17 @@ arena_malloc_large(arena_t *arena, size_t size, bool zero) miscelm = arena_run_to_miscelm(run); ret = arena_miscelm_to_rpages(miscelm); if (config_stats) { + index_t index = size2index(usize) - NBINS; + arena->stats.nmalloc_large++; arena->stats.nrequests_large++; - arena->stats.allocated_large += size; - arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++; - arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++; - arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++; + arena->stats.allocated_large += usize; + arena->stats.lstats[index].nmalloc++; + arena->stats.lstats[index].nrequests++; + arena->stats.lstats[index].curruns++; } if (config_prof) - idump = arena_prof_accum_locked(arena, size); + idump = arena_prof_accum_locked(arena, usize); malloc_mutex_unlock(&arena->lock); if (config_prof && idump) prof_idump(); @@ -1551,9 +1522,9 @@ arena_malloc_large(arena_t *arena, size_t size, bool zero) if (!zero) { if (config_fill) { if (unlikely(opt_junk)) - memset(ret, 0xa5, size); + memset(ret, 0xa5, usize); else if (unlikely(opt_zero)) - memset(ret, 0, size); + memset(ret, 0, usize); } } @@ -1610,12 +1581,14 @@ arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero) ret = arena_miscelm_to_rpages(miscelm); if (config_stats) { + index_t index = size2index(size) - NBINS; + arena->stats.nmalloc_large++; arena->stats.nrequests_large++; arena->stats.allocated_large += size; - arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++; - arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++; - arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++; + arena->stats.lstats[index].nmalloc++; + arena->stats.lstats[index].nrequests++; + arena->stats.lstats[index].curruns++; } malloc_mutex_unlock(&arena->lock); @@ -1632,22 +1605,23 @@ void arena_prof_promoted(const void *ptr, size_t size) { arena_chunk_t *chunk; - size_t pageind, binind; + size_t pageind; + index_t binind; cassert(config_prof); assert(ptr != NULL); assert(CHUNK_ADDR2BASE(ptr) != ptr); - assert(isalloc(ptr, false) == PAGE); - assert(isalloc(ptr, true) == PAGE); + assert(isalloc(ptr, false) == LARGE_MINCLASS); + assert(isalloc(ptr, true) == LARGE_MINCLASS); assert(size <= SMALL_MAXCLASS); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - binind = small_size2bin(size); + binind = size2index(size); assert(binind < NBINS); arena_mapbits_large_binind_set(chunk, pageind, binind); - assert(isalloc(ptr, false) == PAGE); + assert(isalloc(ptr, false) == LARGE_MINCLASS); assert(isalloc(ptr, true) == size); } @@ -1660,7 +1634,7 @@ arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run, if (run == bin->runcur) bin->runcur = NULL; else { - size_t binind = arena_bin_index(chunk->arena, bin); + index_t binind = arena_bin_index(chunk->arena, bin); arena_bin_info_t *bin_info = &arena_bin_info[binind]; if (bin_info->nregs != 1) { @@ -1678,7 +1652,7 @@ static void arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, arena_bin_t *bin) { - size_t binind; + index_t binind; arena_bin_info_t *bin_info; size_t npages, run_ind, past; arena_chunk_map_misc_t *miscelm; @@ -1762,7 +1736,8 @@ arena_dalloc_bin_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr, arena_run_t *run; arena_bin_t *bin; arena_bin_info_t *bin_info; - size_t size, binind; + size_t size; + index_t binind; pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; rpages_ind = pageind - arena_mapbits_small_runind_get(chunk, pageind); @@ -1851,10 +1826,12 @@ arena_dalloc_large_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr) arena_dalloc_junk_large(ptr, usize); if (config_stats) { + index_t index = size2index(usize) - NBINS; + arena->stats.ndalloc_large++; arena->stats.allocated_large -= usize; - arena->stats.lstats[(usize >> LG_PAGE) - 1].ndalloc++; - arena->stats.lstats[(usize >> LG_PAGE) - 1].curruns--; + arena->stats.lstats[index].ndalloc++; + arena->stats.lstats[index].curruns--; } } @@ -1887,17 +1864,20 @@ arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr, malloc_mutex_lock(&arena->lock); arena_run_trim_tail(arena, chunk, run, oldsize, size, true); if (config_stats) { + index_t oldindex = size2index(oldsize) - NBINS; + index_t index = size2index(size) - NBINS; + arena->stats.ndalloc_large++; arena->stats.allocated_large -= oldsize; - arena->stats.lstats[(oldsize >> LG_PAGE) - 1].ndalloc++; - arena->stats.lstats[(oldsize >> LG_PAGE) - 1].curruns--; + arena->stats.lstats[oldindex].ndalloc++; + arena->stats.lstats[oldindex].curruns--; arena->stats.nmalloc_large++; arena->stats.nrequests_large++; arena->stats.allocated_large += size; - arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++; - arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++; - arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++; + arena->stats.lstats[index].nmalloc++; + arena->stats.lstats[index].nrequests++; + arena->stats.lstats[index].curruns++; } malloc_mutex_unlock(&arena->lock); } @@ -1909,24 +1889,30 @@ arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr, size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; size_t npages = oldsize >> LG_PAGE; size_t followsize; + size_t usize_min = s2u(size); assert(oldsize == arena_mapbits_large_size_get(chunk, pageind)); /* Try to extend the run. */ - assert(size + extra > oldsize); + assert(usize_min > oldsize); malloc_mutex_lock(&arena->lock); if (pageind + npages < chunk_npages && arena_mapbits_allocated_get(chunk, pageind+npages) == 0 && (followsize = arena_mapbits_unallocated_size_get(chunk, - pageind+npages)) >= size - oldsize) { + pageind+npages)) >= usize_min - oldsize) { /* * The next run is available and sufficiently large. Split the * following run, then merge the first part with the existing * allocation. */ - size_t flag_dirty; - size_t splitsize = (oldsize + followsize <= size + extra) - ? followsize : size + extra - oldsize; + size_t flag_dirty, splitsize, usize; + + usize = s2u(size + extra); + while (oldsize + followsize < usize) + usize = index2size(size2index(usize)-1); + assert(usize >= usize_min); + splitsize = usize - oldsize; + arena_run_t *run = &arena_miscelm_get(chunk, pageind+npages)->run; arena_run_split_large(arena, run, splitsize, zero); @@ -1948,17 +1934,20 @@ arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr, arena_mapbits_large_set(chunk, pageind+npages-1, 0, flag_dirty); if (config_stats) { + index_t oldindex = size2index(oldsize) - NBINS; + index_t index = size2index(size) - NBINS; + arena->stats.ndalloc_large++; arena->stats.allocated_large -= oldsize; - arena->stats.lstats[(oldsize >> LG_PAGE) - 1].ndalloc++; - arena->stats.lstats[(oldsize >> LG_PAGE) - 1].curruns--; + arena->stats.lstats[oldindex].ndalloc++; + arena->stats.lstats[oldindex].curruns--; arena->stats.nmalloc_large++; arena->stats.nrequests_large++; arena->stats.allocated_large += size; - arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++; - arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++; - arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++; + arena->stats.lstats[index].nmalloc++; + arena->stats.lstats[index].nrequests++; + arena->stats.lstats[index].curruns++; } malloc_mutex_unlock(&arena->lock); return (false); @@ -1996,10 +1985,14 @@ static bool arena_ralloc_large(void *ptr, size_t oldsize, size_t size, size_t extra, bool zero) { - size_t psize; + size_t usize; - psize = PAGE_CEILING(size + extra); - if (psize == oldsize) { + /* Make sure extra can't cause size_t overflow. */ + if (extra >= arena_maxclass) + return (true); + + usize = s2u(size + extra); + if (usize == oldsize) { /* Same size class. */ return (false); } else { @@ -2009,16 +2002,15 @@ arena_ralloc_large(void *ptr, size_t oldsize, size_t size, size_t extra, chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); arena = chunk->arena; - if (psize < oldsize) { + if (usize < oldsize) { /* Fill before shrinking in order avoid a race. */ - arena_ralloc_junk_large(ptr, oldsize, psize); + arena_ralloc_junk_large(ptr, oldsize, usize); arena_ralloc_large_shrink(arena, chunk, ptr, oldsize, - psize); + usize); return (false); } else { bool ret = arena_ralloc_large_grow(arena, chunk, ptr, - oldsize, PAGE_CEILING(size), - psize - PAGE_CEILING(size), zero); + oldsize, size, extra, zero); if (config_fill && !ret && !zero) { if (unlikely(opt_junk)) { memset((void *)((uintptr_t)ptr + @@ -2045,12 +2037,11 @@ arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, */ if (oldsize <= arena_maxclass) { if (oldsize <= SMALL_MAXCLASS) { - assert(arena_bin_info[small_size2bin(oldsize)].reg_size + assert(arena_bin_info[size2index(oldsize)].reg_size == oldsize); - if ((size + extra <= SMALL_MAXCLASS && - small_size2bin(size + extra) == - small_size2bin(oldsize)) || (size <= oldsize && - size + extra >= oldsize)) + if ((size + extra <= SMALL_MAXCLASS && size2index(size + + extra) == size2index(oldsize)) || (size <= oldsize + && size + extra >= oldsize)) return (false); } else { assert(size <= arena_maxclass); @@ -2258,7 +2249,7 @@ arena_new(arena_t *arena, unsigned ind) /* * Calculate bin_info->run_size such that it meets the following constraints: * - * *) bin_info->run_size <= arena_maxclass + * *) bin_info->run_size <= arena_maxrun * *) bin_info->nregs <= RUN_MAXREGS * * bin_info->nregs and bin_info->reg0_offset are also calculated here, since @@ -2330,7 +2321,7 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info) /* * Make sure that the run will fit within an arena chunk. */ - while (actual_run_size > arena_maxclass) { + while (actual_run_size > arena_maxrun) { actual_run_size -= PAGE; actual_nregs = (actual_run_size - pad_size) / bin_info->reg_interval; @@ -2396,7 +2387,17 @@ arena_boot(void) map_misc_offset = offsetof(arena_chunk_t, map_bits) + sizeof(arena_chunk_map_bits_t) * (chunk_npages-map_bias); - arena_maxclass = chunksize - (map_bias << LG_PAGE); + arena_maxrun = chunksize - (map_bias << LG_PAGE); + arena_maxclass = index2size(size2index(chunksize)-1); + if (arena_maxclass > arena_maxrun) { + /* + * For small chunk sizes it's possible for there to be fewer + * non-header pages available than are necessary to serve the + * size classes just below chunksize. + */ + arena_maxclass = arena_maxrun; + } + nlclasses = size2index(arena_maxclass) - size2index(SMALL_MAXCLASS); bin_info_init(); } diff --git a/src/chunk.c b/src/chunk.c index 32b8b3a6..618aaca0 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -27,9 +27,6 @@ rtree_t *chunks_rtree; size_t chunksize; size_t chunksize_mask; /* (chunksize - 1). */ size_t chunk_npages; -size_t map_bias; -size_t map_misc_offset; -size_t arena_maxclass; /* Max size class for arenas. */ /******************************************************************************/ /* diff --git a/src/ctl.c b/src/ctl.c index 309f1f65..f1f3234b 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -1628,7 +1628,7 @@ arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i) } CTL_RO_NL_GEN(arenas_nlruns, nlclasses, size_t) -CTL_RO_NL_GEN(arenas_lrun_i_size, ((mib[2]+1) << LG_PAGE), size_t) +CTL_RO_NL_GEN(arenas_lrun_i_size, index2size(NBINS+mib[2]), size_t) static const ctl_named_node_t * arenas_lrun_i_index(const size_t *mib, size_t miblen, size_t i) { diff --git a/src/huge.c b/src/huge.c index 6bdc0767..ae416253 100644 --- a/src/huge.c +++ b/src/huge.c @@ -15,12 +15,19 @@ static extent_tree_t huge; void * huge_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero) { + size_t usize; - return (huge_palloc(tsd, arena, size, chunksize, zero)); + usize = s2u(size); + if (usize == 0) { + /* size_t overflow. */ + return (NULL); + } + + return (huge_palloc(tsd, arena, usize, chunksize, zero)); } void * -huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment, +huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, bool zero) { void *ret; @@ -30,11 +37,8 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment, /* Allocate one or more contiguous chunks for this request. */ - csize = CHUNK_CEILING(size); - if (csize == 0) { - /* size is large enough to cause size_t wrap-around. */ - return (NULL); - } + csize = CHUNK_CEILING(usize); + assert(csize >= usize); /* Allocate an extent node with which to track the chunk. */ node = base_node_alloc(); @@ -55,7 +59,7 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment, /* Insert node into huge. */ node->addr = ret; - node->size = csize; + node->size = usize; node->arena = arena; malloc_mutex_lock(&huge_mtx); @@ -64,9 +68,9 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment, if (config_fill && !zero) { if (unlikely(opt_junk)) - memset(ret, 0xa5, csize); + memset(ret, 0xa5, usize); else if (unlikely(opt_zero) && !is_zeroed) - memset(ret, 0, csize); + memset(ret, 0, usize); } return (ret); @@ -97,7 +101,7 @@ huge_dalloc_junk_t *huge_dalloc_junk = JEMALLOC_N(huge_dalloc_junk_impl); static bool huge_ralloc_no_move_expand(void *ptr, size_t oldsize, size_t size, bool zero) { - size_t csize; + size_t usize; void *expand_addr; size_t expand_size; extent_node_t *node, key; @@ -105,14 +109,14 @@ huge_ralloc_no_move_expand(void *ptr, size_t oldsize, size_t size, bool zero) { bool is_zeroed; void *ret; - csize = CHUNK_CEILING(size); - if (csize == 0) { - /* size is large enough to cause size_t wrap-around. */ + usize = s2u(size); + if (usize == 0) { + /* size_t overflow. */ return (true); } - expand_addr = ptr + oldsize; - expand_size = csize - oldsize; + expand_addr = ptr + CHUNK_CEILING(oldsize); + expand_size = CHUNK_CEILING(usize) - CHUNK_CEILING(oldsize); malloc_mutex_lock(&huge_mtx); @@ -140,14 +144,14 @@ huge_ralloc_no_move_expand(void *ptr, size_t oldsize, size_t size, bool zero) { malloc_mutex_lock(&huge_mtx); /* Update the size of the huge allocation. */ - node->size = csize; + node->size = usize; malloc_mutex_unlock(&huge_mtx); if (config_fill && !zero) { if (unlikely(opt_junk)) - memset(expand_addr, 0xa5, expand_size); + memset(ptr + oldsize, 0xa5, usize - oldsize); else if (unlikely(opt_zero) && !is_zeroed) - memset(expand_addr, 0, expand_size); + memset(ptr + oldsize, 0, usize - oldsize); } return (false); } @@ -156,27 +160,71 @@ bool huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, bool zero) { + size_t usize; /* Both allocations must be huge to avoid a move. */ - if (oldsize <= arena_maxclass) + if (oldsize < chunksize) return (true); - assert(CHUNK_CEILING(oldsize) == oldsize); + assert(s2u(oldsize) == oldsize); + usize = s2u(size); + if (usize == 0) { + /* size_t overflow. */ + return (true); + } /* - * Avoid moving the allocation if the size class can be left the same. + * Avoid moving the allocation if the existing chunk size accommodates + * the new size. */ + if (CHUNK_CEILING(oldsize) >= CHUNK_CEILING(usize) + && CHUNK_CEILING(oldsize) <= CHUNK_CEILING(size+extra)) { + size_t usize_next; + + /* Increase usize to incorporate extra. */ + while (usize < s2u(size+extra) && (usize_next = s2u(usize+1)) < + oldsize) + usize = usize_next; + + /* Update the size of the huge allocation if it changed. */ + if (oldsize != usize) { + extent_node_t *node, key; + + malloc_mutex_lock(&huge_mtx); + + key.addr = ptr; + node = extent_tree_ad_search(&huge, &key); + assert(node != NULL); + assert(node->addr == ptr); + + assert(node->size != usize); + node->size = usize; + + malloc_mutex_unlock(&huge_mtx); + + if (oldsize < usize) { + if (zero || (config_fill && + unlikely(opt_zero))) { + memset(ptr + oldsize, 0, usize - + oldsize); + } else if (config_fill && unlikely(opt_junk)) { + memset(ptr + oldsize, 0xa5, usize - + oldsize); + } + } else if (config_fill && unlikely(opt_junk) && oldsize + > usize) + memset(ptr + usize, 0x5a, oldsize - usize); + } + return (false); + } + if (CHUNK_CEILING(oldsize) >= CHUNK_CEILING(size) && CHUNK_CEILING(oldsize) <= CHUNK_CEILING(size+extra)) { return (false); } - /* Overflow. */ - if (CHUNK_CEILING(size) == 0) - return (true); - /* Shrink the allocation in-place. */ - if (CHUNK_CEILING(oldsize) > CHUNK_CEILING(size)) { + if (CHUNK_CEILING(oldsize) > CHUNK_CEILING(usize)) { extent_node_t *node, key; void *excess_addr; size_t excess_size; @@ -189,15 +237,15 @@ huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, assert(node->addr == ptr); /* Update the size of the huge allocation. */ - node->size = CHUNK_CEILING(size); + node->size = usize; malloc_mutex_unlock(&huge_mtx); - excess_addr = node->addr + CHUNK_CEILING(size); - excess_size = CHUNK_CEILING(oldsize) - CHUNK_CEILING(size); + excess_addr = node->addr + CHUNK_CEILING(usize); + excess_size = CHUNK_CEILING(oldsize) - CHUNK_CEILING(usize); /* Zap the excess chunks. */ - huge_dalloc_junk(excess_addr, excess_size); + huge_dalloc_junk(ptr + usize, oldsize - usize); arena_chunk_dalloc_huge(node->arena, excess_addr, excess_size); return (false); @@ -275,7 +323,8 @@ huge_dalloc(void *ptr) malloc_mutex_unlock(&huge_mtx); huge_dalloc_junk(node->addr, node->size); - arena_chunk_dalloc_huge(node->arena, node->addr, node->size); + arena_chunk_dalloc_huge(node->arena, node->addr, + CHUNK_CEILING(node->size)); base_node_dalloc(node); } diff --git a/src/jemalloc.c b/src/jemalloc.c index 3490ecdf..f3750b40 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -42,6 +42,38 @@ unsigned narenas_auto; /* Set to true once the allocator has been initialized. */ static bool malloc_initialized = false; +JEMALLOC_ALIGNED(CACHELINE) +const size_t index2size_tab[NSIZES] = { +#define SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup) \ + ((ZU(1)<next_gc_bin; + index_t binind = tcache->next_gc_bin; tcache_bin_t *tbin = &tcache->tbins[binind]; tcache_bin_info_t *tbin_info = &tcache_bin_info[binind]; @@ -62,7 +62,7 @@ tcache_event_hard(tcache_t *tcache) } void * -tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind) +tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, index_t binind) { void *ret; @@ -76,7 +76,7 @@ tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind) } void -tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem, +tcache_bin_flush_small(tcache_bin_t *tbin, index_t binind, unsigned rem, tcache_t *tcache) { void *ptr; @@ -153,7 +153,7 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem, } void -tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem, +tcache_bin_flush_large(tcache_bin_t *tbin, index_t binind, unsigned rem, tcache_t *tcache) { void *ptr; diff --git a/test/unit/junk.c b/test/unit/junk.c index 301428f2..5b35a879 100644 --- a/test/unit/junk.c +++ b/test/unit/junk.c @@ -88,7 +88,6 @@ test_junk(size_t sz_min, size_t sz_max) if (xallocx(s, sz+1, 0, 0) == sz) { void *junked = (void *)s; - s = (char *)rallocx(s, sz+1, 0); assert_ptr_not_null((void *)s, "Unexpected rallocx() failure"); @@ -134,13 +133,25 @@ TEST_END arena_ralloc_junk_large_t *arena_ralloc_junk_large_orig; static void *most_recently_trimmed; +static size_t +shrink_size(size_t size) +{ + size_t shrink_size; + + for (shrink_size = size - 1; nallocx(shrink_size, 0) == size; + shrink_size--) + ; /* Do nothing. */ + + return (shrink_size); +} + static void arena_ralloc_junk_large_intercept(void *ptr, size_t old_usize, size_t usize) { arena_ralloc_junk_large_orig(ptr, old_usize, usize); assert_zu_eq(old_usize, arena_maxclass, "Unexpected old_usize"); - assert_zu_eq(usize, arena_maxclass-PAGE, "Unexpected usize"); + assert_zu_eq(usize, shrink_size(arena_maxclass), "Unexpected usize"); most_recently_trimmed = ptr; } @@ -154,7 +165,7 @@ TEST_BEGIN(test_junk_large_ralloc_shrink) arena_ralloc_junk_large_orig = arena_ralloc_junk_large; arena_ralloc_junk_large = arena_ralloc_junk_large_intercept; - p2 = rallocx(p1, arena_maxclass-PAGE, 0); + p2 = rallocx(p1, shrink_size(arena_maxclass), 0); assert_ptr_eq(p1, p2, "Unexpected move during shrink"); arena_ralloc_junk_large = arena_ralloc_junk_large_orig; diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c index c70473cc..e62e54f2 100644 --- a/test/unit/mallctl.c +++ b/test/unit/mallctl.c @@ -357,7 +357,7 @@ TEST_BEGIN(test_arenas_lrun_constants) assert_zu_eq(name, expected, "Incorrect "#name" size"); \ } while (0) - TEST_ARENAS_LRUN_CONSTANT(size_t, size, (1 << LG_PAGE)); + TEST_ARENAS_LRUN_CONSTANT(size_t, size, (1 << (LG_PAGE+2))); #undef TEST_ARENAS_LRUN_CONSTANT } From bf40641c5c9496d2912ad9ff2c38ee9ce2bfbde6 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 6 Oct 2014 16:35:11 -0700 Subject: [PATCH 0559/3378] Fix a prof_tctx_t destruction race. --- src/prof.c | 50 ++++++++++++++++++++++++++++++++------------------ 1 file changed, 32 insertions(+), 18 deletions(-) diff --git a/src/prof.c b/src/prof.c index a6cea92f..b3150a27 100644 --- a/src/prof.c +++ b/src/prof.c @@ -609,7 +609,7 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) { prof_tdata_t *tdata = tctx->tdata; prof_gctx_t *gctx = tctx->gctx; - bool destroy_tdata, destroy_gctx; + bool destroy_tdata, destroy_tctx, destroy_gctx; assert(tctx->cnts.curobjs == 0); assert(tctx->cnts.curbytes == 0); @@ -622,25 +622,38 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) malloc_mutex_unlock(tdata->lock); malloc_mutex_lock(gctx->lock); - tctx_tree_remove(&gctx->tctxs, tctx); - if (prof_gctx_should_destroy(gctx)) { + if (tctx->state != prof_tctx_state_dumping) { + tctx_tree_remove(&gctx->tctxs, tctx); + destroy_tctx = true; + if (prof_gctx_should_destroy(gctx)) { + /* + * Increment gctx->nlimbo in order to keep another + * thread from winning the race to destroy gctx while + * this one has gctx->lock dropped. Without this, it + * would be possible for another thread to: + * + * 1) Sample an allocation associated with gctx. + * 2) Deallocate the sampled object. + * 3) Successfully prof_gctx_try_destroy(gctx). + * + * The result would be that gctx no longer exists by the + * time this thread accesses it in + * prof_gctx_try_destroy(). + */ + gctx->nlimbo++; + destroy_gctx = true; + } else + destroy_gctx = false; + } else { /* - * Increment gctx->nlimbo in order to keep another thread from - * winning the race to destroy gctx while this one has - * gctx->lock dropped. Without this, it would be possible for - * another thread to: - * - * 1) Sample an allocation associated with gctx. - * 2) Deallocate the sampled object. - * 3) Successfully prof_gctx_try_destroy(gctx). - * - * The result would be that gctx no longer exists by the time - * this thread accesses it in prof_gctx_try_destroy(). + * A dumping thread needs tctx to remain valid until dumping + * has finished. Change state such that the dumping thread will + * complete destruction during a late dump iteration phase. */ - gctx->nlimbo++; - destroy_gctx = true; - } else + tctx->state = prof_tctx_state_purgatory; + destroy_tctx = false; destroy_gctx = false; + } malloc_mutex_unlock(gctx->lock); if (destroy_gctx) prof_gctx_try_destroy(tsd, gctx, tdata); @@ -648,7 +661,8 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) if (destroy_tdata) prof_tdata_destroy(tsd, tdata, false); - idalloc(tsd, tctx); + if (destroy_tctx) + idalloc(tsd, tctx); } static bool From 8bb3198f72fc7587dc93527f9f19fb5be52fa553 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 7 Oct 2014 23:14:57 -0700 Subject: [PATCH 0560/3378] Refactor/fix arenas manipulation. Abstract arenas access to use arena_get() (or a0get() where appropriate) rather than directly reading e.g. arenas[ind]. Prior to the addition of the arenas.extend mallctl, the worst possible outcome of directly accessing arenas was a stale read, but arenas.extend may allocate and assign a new array to arenas. Add a tsd-based arenas_cache, which amortizes arenas reads. This introduces some subtle bootstrapping issues, with tsd_boot() now being split into tsd_boot[01]() to support tsd wrapper allocation bootstrapping, as well as an arenas_cache_bypass tsd variable which dynamically terminates allocation of arenas_cache itself. Promote a0malloc(), a0calloc(), and a0free() to be generally useful for internal allocation, and use them in several places (more may be appropriate). Abstract arena->nthreads management and fix a missing decrement during thread destruction (recent tsd refactoring left arenas_cleanup() unused). Change arena_choose() to propagate OOM, and handle OOM in all callers. This is important for providing consistent allocation behavior when the MALLOCX_ARENA() flag is being used. Prior to this fix, it was possible for an OOM to result in allocation silently allocating from a different arena than the one specified. --- include/jemalloc/internal/arena.h | 14 +- .../jemalloc/internal/jemalloc_internal.h.in | 90 +-- include/jemalloc/internal/private_symbols.txt | 28 +- include/jemalloc/internal/tcache.h | 1 + include/jemalloc/internal/tsd.h | 239 ++++++-- src/arena.c | 30 +- src/chunk.c | 10 +- src/ctl.c | 119 ++-- src/huge.c | 6 +- src/jemalloc.c | 526 +++++++++++++----- src/tcache.c | 14 +- src/tsd.c | 19 +- test/unit/tsd.c | 1 + 13 files changed, 745 insertions(+), 352 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 681b5802..894ce9af 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -389,7 +389,7 @@ bool arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec); void arena_stats_merge(arena_t *arena, const char **dss, size_t *nactive, size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats); -bool arena_new(arena_t *arena, unsigned ind); +arena_t *arena_new(unsigned ind); void arena_boot(void); void arena_prefork(arena_t *arena); void arena_postfork_parent(arena_t *arena); @@ -924,8 +924,10 @@ arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, true)) != NULL)) return (tcache_alloc_small(tcache, size, zero)); else { - return (arena_malloc_small(choose_arena(tsd, arena), - size, zero)); + arena = arena_choose(tsd, arena); + if (unlikely(arena == NULL)) + return (NULL); + return (arena_malloc_small(arena, size, zero)); } } else { /* @@ -936,8 +938,10 @@ arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, tcache_get(tsd, true)) != NULL)) return (tcache_alloc_large(tcache, size, zero)); else { - return (arena_malloc_large(choose_arena(tsd, arena), - size, zero)); + arena = arena_choose(tsd, arena); + if (unlikely(arena == NULL)) + return (NULL); + return (arena_malloc_large(arena, size, zero)); } } } diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 8f0beb9e..c7a5fd8a 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -386,20 +386,6 @@ extern bool in_valgrind; /* Number of CPUs. */ extern unsigned ncpus; -/* Protects arenas initialization (arenas, arenas_total). */ -extern malloc_mutex_t arenas_lock; -/* - * Arenas that are used to service external requests. Not all elements of the - * arenas array are necessarily used; arenas are created lazily as needed. - * - * arenas[0..narenas_auto) are used for automatic multiplexing of threads and - * arenas. arenas[narenas_auto..narenas_total) are only used if the application - * takes some action to create them and allocate from them. - */ -extern arena_t **arenas; -extern unsigned narenas_total; -extern unsigned narenas_auto; /* Read-only after initialization. */ - /* * index2size_tab encodes the same information as could be computed (at * unacceptable cost in some code paths) by index2size_compute(). @@ -412,11 +398,23 @@ extern size_t const index2size_tab[NSIZES]; */ extern uint8_t const size2index_tab[]; +arena_t *a0get(void); +void *a0malloc(size_t size); +void *a0calloc(size_t num, size_t size); +void a0free(void *ptr); arena_t *arenas_extend(unsigned ind); -arena_t *choose_arena_hard(tsd_t *tsd); +arena_t *arena_init(unsigned ind); +unsigned narenas_total_get(void); +arena_t *arena_get_hard(tsd_t *tsd, unsigned ind, bool init_if_missing); +arena_t *arena_choose_hard(tsd_t *tsd); +void arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind); +unsigned arena_nbound(unsigned ind); void thread_allocated_cleanup(tsd_t *tsd); void thread_deallocated_cleanup(tsd_t *tsd); void arena_cleanup(tsd_t *tsd); +void arenas_cache_cleanup(tsd_t *tsd); +void narenas_cache_cleanup(tsd_t *tsd); +void arenas_cache_bypass_cleanup(tsd_t *tsd); void jemalloc_prefork(void); void jemalloc_postfork_parent(void); void jemalloc_postfork_child(void); @@ -475,8 +473,9 @@ size_t s2u_compute(size_t size); size_t s2u_lookup(size_t size); size_t s2u(size_t size); size_t sa2u(size_t size, size_t alignment); -unsigned narenas_total_get(void); -arena_t *choose_arena(tsd_t *tsd, arena_t *arena); +arena_t *arena_choose(tsd_t *tsd, arena_t *arena); +arena_t *arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing, + bool refresh_if_missing); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) @@ -709,34 +708,51 @@ sa2u(size_t size, size_t alignment) return (usize); } -JEMALLOC_INLINE unsigned -narenas_total_get(void) -{ - unsigned narenas; - - malloc_mutex_lock(&arenas_lock); - narenas = narenas_total; - malloc_mutex_unlock(&arenas_lock); - - return (narenas); -} - /* Choose an arena based on a per-thread value. */ JEMALLOC_INLINE arena_t * -choose_arena(tsd_t *tsd, arena_t *arena) +arena_choose(tsd_t *tsd, arena_t *arena) { arena_t *ret; if (arena != NULL) return (arena); - if (unlikely((ret = tsd_arena_get(tsd)) == NULL)) { - ret = choose_arena_hard(tsd); - assert(ret != NULL); - } + if (unlikely((ret = tsd_arena_get(tsd)) == NULL)) + ret = arena_choose_hard(tsd); return (ret); } + +JEMALLOC_INLINE arena_t * +arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing, + bool refresh_if_missing) +{ + arena_t *arena; + arena_t **arenas_cache = tsd_arenas_cache_get(tsd); + + /* init_if_missing requires refresh_if_missing. */ + assert(!init_if_missing || refresh_if_missing); + + if (unlikely(arenas_cache == NULL)) { + /* arenas_cache hasn't been initialized yet. */ + return (arena_get_hard(tsd, ind, init_if_missing)); + } + if (unlikely(ind >= tsd_narenas_cache_get(tsd))) { + /* + * ind is invalid, cache is old (too small), or arena to be + * initialized. + */ + return (refresh_if_missing ? arena_get_hard(tsd, ind, + init_if_missing) : NULL); + } + arena = arenas_cache[ind]; + if (likely(arena != NULL) || !refresh_if_missing) + return (arena); + if (init_if_missing) + return (arena_get_hard(tsd, ind, init_if_missing)); + else + return (NULL); +} #endif #include "jemalloc/internal/bitmap.h" @@ -833,8 +849,10 @@ ipalloct(tsd_t *tsd, size_t usize, size_t alignment, bool zero, bool try_tcache, ret = arena_malloc(tsd, arena, usize, zero, try_tcache); else { if (usize <= arena_maxclass) { - ret = arena_palloc(choose_arena(tsd, arena), usize, - alignment, zero); + arena = arena_choose(tsd, arena); + if (unlikely(arena == NULL)) + return (NULL); + ret = arena_palloc(arena, usize, alignment, zero); } else if (alignment <= chunksize) ret = huge_malloc(tsd, arena, usize, zero); else diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index 1a7fde4b..d5e6fdcf 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -1,11 +1,16 @@ a0calloc a0free +a0get a0malloc +arena_get +arena_get_hard arena_alloc_junk_small arena_bin_index arena_bin_info arena_bitselm_get arena_boot +arena_choose +arena_choose_hard arena_chunk_alloc_huge arena_chunk_dalloc_huge arena_cleanup @@ -19,6 +24,7 @@ arena_dalloc_large_locked arena_dalloc_small arena_dss_prec_get arena_dss_prec_set +arena_init arena_malloc arena_malloc_large arena_malloc_small @@ -42,9 +48,11 @@ arena_mapbitsp_read arena_mapbitsp_write arena_maxclass arena_maxrun +arena_migrate arena_miscelm_get arena_miscelm_to_pageind arena_miscelm_to_rpages +arena_nbound arena_new arena_palloc arena_postfork_child @@ -69,10 +77,8 @@ arena_salloc arena_sdalloc arena_stats_merge arena_tcache_fill_small -arenas -arenas_cleanup -arenas_extend -arenas_lock +arenas_cache_bypass_cleanup +arenas_cache_cleanup atomic_add_u atomic_add_uint32 atomic_add_uint64 @@ -100,8 +106,6 @@ bitmap_size bitmap_unset bt_init buferror -choose_arena -choose_arena_hard chunk_alloc_arena chunk_alloc_base chunk_alloc_default @@ -247,7 +251,8 @@ malloc_mutex_unlock malloc_printf malloc_snprintf malloc_strtoumax -malloc_tsd_boot +malloc_tsd_boot0 +malloc_tsd_boot1 malloc_tsd_cleanup_register malloc_tsd_dalloc malloc_tsd_malloc @@ -259,8 +264,7 @@ map_bias map_misc_offset mb_write mutex_boot -narenas_auto -narenas_total +narenas_cache_cleanup narenas_total_get ncpus nhbins @@ -363,6 +367,7 @@ tcache_alloc_small tcache_alloc_small_hard tcache_arena_associate tcache_arena_dissociate +tcache_arena_reassociate tcache_bin_flush_large tcache_bin_flush_small tcache_bin_info @@ -388,11 +393,14 @@ tsd_booted tsd_arena_get tsd_arena_set tsd_boot +tsd_boot0 +tsd_boot1 tsd_cleanup tsd_cleanup_wrapper tsd_fetch tsd_get -tsd_get_wrapper +tsd_wrapper_get +tsd_wrapper_set tsd_initialized tsd_init_check_recursion tsd_init_finish diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index da8e4ef4..02eec5db 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -109,6 +109,7 @@ void tcache_bin_flush_small(tcache_bin_t *tbin, index_t binind, unsigned rem, void tcache_bin_flush_large(tcache_bin_t *tbin, index_t binind, unsigned rem, tcache_t *tcache); void tcache_arena_associate(tcache_t *tcache, arena_t *arena); +void tcache_arena_reassociate(tcache_t *tcache, arena_t *arena); void tcache_arena_dissociate(tcache_t *tcache); tcache_t *tcache_get_hard(tsd_t *tsd); tcache_t *tcache_create(arena_t *arena); diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h index 25450391..b5658f8e 100644 --- a/include/jemalloc/internal/tsd.h +++ b/include/jemalloc/internal/tsd.h @@ -2,7 +2,7 @@ #ifdef JEMALLOC_H_TYPES /* Maximum number of malloc_tsd users with cleanup functions. */ -#define MALLOC_TSD_CLEANUPS_MAX 8 +#define MALLOC_TSD_CLEANUPS_MAX 2 typedef bool (*malloc_tsd_cleanup_t)(void); @@ -23,7 +23,7 @@ typedef enum { /* * TLS/TSD-agnostic macro-based implementation of thread-specific data. There - * are four macros that support (at least) three use cases: file-private, + * are five macros that support (at least) three use cases: file-private, * library-private, and library-private inlined. Following is an example * library-private tsd variable: * @@ -33,18 +33,19 @@ typedef enum { * int y; * } example_t; * #define EX_INITIALIZER JEMALLOC_CONCAT({0, 0}) - * malloc_tsd_protos(, example_, example_t *) - * malloc_tsd_externs(example_, example_t *) + * malloc_tsd_types(example_, example_t) + * malloc_tsd_protos(, example_, example_t) + * malloc_tsd_externs(example_, example_t) * In example.c: - * malloc_tsd_data(, example_, example_t *, EX_INITIALIZER) - * malloc_tsd_funcs(, example_, example_t *, EX_INITIALIZER, + * malloc_tsd_data(, example_, example_t, EX_INITIALIZER) + * malloc_tsd_funcs(, example_, example_t, EX_INITIALIZER, * example_tsd_cleanup) * * The result is a set of generated functions, e.g.: * * bool example_tsd_boot(void) {...} - * example_t **example_tsd_get() {...} - * void example_tsd_set(example_t **val) {...} + * example_t *example_tsd_get() {...} + * void example_tsd_set(example_t *val) {...} * * Note that all of the functions deal in terms of (a_type *) rather than * (a_type) so that it is possible to support non-pointer types (unlike @@ -70,9 +71,32 @@ typedef enum { * non-NULL. */ +/* malloc_tsd_types(). */ +#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP +#define malloc_tsd_types(a_name, a_type) +#elif (defined(JEMALLOC_TLS)) +#define malloc_tsd_types(a_name, a_type) +#elif (defined(_WIN32)) +#define malloc_tsd_types(a_name, a_type) \ +typedef struct { \ + bool initialized; \ + a_type val; \ +} a_name##tsd_wrapper_t; +#else +#define malloc_tsd_types(a_name, a_type) \ +typedef struct { \ + bool initialized; \ + a_type val; \ +} a_name##tsd_wrapper_t; +#endif + /* malloc_tsd_protos(). */ #define malloc_tsd_protos(a_attr, a_name, a_type) \ a_attr bool \ +a_name##tsd_boot0(void); \ +a_attr void \ +a_name##tsd_boot1(void); \ +a_attr bool \ a_name##tsd_boot(void); \ a_attr a_type * \ a_name##tsd_get(void); \ @@ -93,11 +117,13 @@ extern bool a_name##tsd_booted; #elif (defined(_WIN32)) #define malloc_tsd_externs(a_name, a_type) \ extern DWORD a_name##tsd_tsd; \ +extern a_name##tsd_wrapper_t a_name##tsd_boot_wrapper; \ extern bool a_name##tsd_booted; #else #define malloc_tsd_externs(a_name, a_type) \ extern pthread_key_t a_name##tsd_tsd; \ extern tsd_init_head_t a_name##tsd_init_head; \ +extern a_name##tsd_wrapper_t a_name##tsd_boot_wrapper; \ extern bool a_name##tsd_booted; #endif @@ -118,6 +144,10 @@ a_attr bool a_name##tsd_booted = false; #elif (defined(_WIN32)) #define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ a_attr DWORD a_name##tsd_tsd; \ +a_attr a_name##tsd_wrapper_t a_name##tsd_boot_wrapper = { \ + false, \ + a_initializer \ +}; \ a_attr bool a_name##tsd_booted = false; #else #define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ @@ -126,6 +156,10 @@ a_attr tsd_init_head_t a_name##tsd_init_head = { \ ql_head_initializer(blocks), \ MALLOC_MUTEX_INITIALIZER \ }; \ +a_attr a_name##tsd_wrapper_t a_name##tsd_boot_wrapper = { \ + false, \ + a_initializer \ +}; \ a_attr bool a_name##tsd_booted = false; #endif @@ -145,7 +179,7 @@ a_name##tsd_cleanup_wrapper(void) \ return (a_name##tsd_initialized); \ } \ a_attr bool \ -a_name##tsd_boot(void) \ +a_name##tsd_boot0(void) \ { \ \ if (a_cleanup != malloc_tsd_no_cleanup) { \ @@ -155,6 +189,18 @@ a_name##tsd_boot(void) \ a_name##tsd_booted = true; \ return (false); \ } \ +a_attr void \ +a_name##tsd_boot1() \ +{ \ + \ + /* Do nothing. */ \ +} \ +a_attr bool \ +a_name##tsd_boot(void) \ +{ \ + \ + return (a_name##tsd_boot0()); \ +} \ /* Get/set. */ \ a_attr a_type * \ a_name##tsd_get(void) \ @@ -177,7 +223,7 @@ a_name##tsd_set(a_type *val) \ a_cleanup) \ /* Initialization/cleanup. */ \ a_attr bool \ -a_name##tsd_boot(void) \ +a_name##tsd_boot0(void) \ { \ \ if (a_cleanup != malloc_tsd_no_cleanup) { \ @@ -188,6 +234,18 @@ a_name##tsd_boot(void) \ a_name##tsd_booted = true; \ return (false); \ } \ +a_attr void \ +a_name##tsd_boot1() \ +{ \ + \ + /* Do nothing. */ \ +} \ +a_attr bool \ +a_name##tsd_boot(void) \ +{ \ + \ + return (a_name##tsd_boot0()); \ +} \ /* Get/set. */ \ a_attr a_type * \ a_name##tsd_get(void) \ @@ -215,11 +273,6 @@ a_name##tsd_set(a_type *val) \ #elif (defined(_WIN32)) #define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ a_cleanup) \ -/* Data structure. */ \ -typedef struct { \ - bool initialized; \ - a_type val; \ -} a_name##tsd_wrapper_t; \ /* Initialization/cleanup. */ \ a_attr bool \ a_name##tsd_cleanup_wrapper(void) \ @@ -241,23 +294,18 @@ a_name##tsd_cleanup_wrapper(void) \ malloc_tsd_dalloc(wrapper); \ return (false); \ } \ -a_attr bool \ -a_name##tsd_boot(void) \ +a_attr void \ +a_name##tsd_wrapper_set(a_name##tsd_wrapper_t *wrapper) \ { \ \ - a_name##tsd_tsd = TlsAlloc(); \ - if (a_name##tsd_tsd == TLS_OUT_OF_INDEXES) \ - return (true); \ - if (a_cleanup != malloc_tsd_no_cleanup) { \ - malloc_tsd_cleanup_register( \ - &a_name##tsd_cleanup_wrapper); \ + if (!TlsSetValue(a_name##tsd_tsd, (void *)wrapper)) { \ + malloc_write(": Error setting" \ + " TSD for "#a_name"\n"); \ + abort(); \ } \ - a_name##tsd_booted = true; \ - return (false); \ } \ -/* Get/set. */ \ a_attr a_name##tsd_wrapper_t * \ -a_name##tsd_get_wrapper(void) \ +a_name##tsd_wrapper_get(void) \ { \ a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *) \ TlsGetValue(a_name##tsd_tsd); \ @@ -273,21 +321,63 @@ a_name##tsd_get_wrapper(void) \ wrapper->initialized = false; \ wrapper->val = a_initializer; \ } \ - if (!TlsSetValue(a_name##tsd_tsd, (void *)wrapper)) { \ - malloc_write(": Error setting" \ - " TSD for "#a_name"\n"); \ - abort(); \ - } \ + a_name##tsd_wrapper_set(wrapper); \ } \ return (wrapper); \ } \ +a_attr bool \ +a_name##tsd_boot0(void) \ +{ \ + \ + a_name##tsd_tsd = TlsAlloc(); \ + if (a_name##tsd_tsd == TLS_OUT_OF_INDEXES) \ + return (true); \ + if (a_cleanup != malloc_tsd_no_cleanup) { \ + malloc_tsd_cleanup_register( \ + &a_name##tsd_cleanup_wrapper); \ + } \ + a_name##tsd_wrapper_set(&a_name##tsd_boot_wrapper); \ + a_name##tsd_booted = true; \ + return (false); \ +} \ +a_attr void \ +a_name##tsd_boot1() \ +{ \ + a_name##tsd_wrapper_t *wrapper; \ + wrapper = (a_name##tsd_wrapper_t *) \ + malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t)); \ + if (wrapper == NULL) { \ + malloc_write(": Error allocating" \ + " TSD for "#a_name"\n"); \ + abort(); \ + } \ + memcpy(wrapper, &a_name##tsd_boot_wrapper, \ + sizeof(a_name##tsd_wrapper_t)); \ + a_name##tsd_wrapper_set(wrapper); \ +} \ +a_attr bool \ +a_name##tsd_boot(void) \ +{ \ + \ + if (a_name##tsd_boot0()) \ + return (true); \ + a_name##tsd_boot1(); \ + return (false); \ +} \ +a_attr bool \ +a_name##tsd_boot(void) \ +{ \ + \ + return (false); \ +} \ +/* Get/set. */ \ a_attr a_type * \ a_name##tsd_get(void) \ { \ a_name##tsd_wrapper_t *wrapper; \ \ assert(a_name##tsd_booted); \ - wrapper = a_name##tsd_get_wrapper(); \ + wrapper = a_name##tsd_wrapper_get(); \ return (&wrapper->val); \ } \ a_attr void \ @@ -296,7 +386,7 @@ a_name##tsd_set(a_type *val) \ a_name##tsd_wrapper_t *wrapper; \ \ assert(a_name##tsd_booted); \ - wrapper = a_name##tsd_get_wrapper(); \ + wrapper = a_name##tsd_wrapper_get(); \ wrapper->val = *(val); \ if (a_cleanup != malloc_tsd_no_cleanup) \ wrapper->initialized = true; \ @@ -304,11 +394,6 @@ a_name##tsd_set(a_type *val) \ #else #define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ a_cleanup) \ -/* Data structure. */ \ -typedef struct { \ - bool initialized; \ - a_type val; \ -} a_name##tsd_wrapper_t; \ /* Initialization/cleanup. */ \ a_attr void \ a_name##tsd_cleanup_wrapper(void *arg) \ @@ -333,19 +418,19 @@ a_name##tsd_cleanup_wrapper(void *arg) \ } \ malloc_tsd_dalloc(wrapper); \ } \ -a_attr bool \ -a_name##tsd_boot(void) \ +a_attr void \ +a_name##tsd_wrapper_set(a_name##tsd_wrapper_t *wrapper) \ { \ \ - if (pthread_key_create(&a_name##tsd_tsd, \ - a_name##tsd_cleanup_wrapper) != 0) \ - return (true); \ - a_name##tsd_booted = true; \ - return (false); \ + if (pthread_setspecific(a_name##tsd_tsd, \ + (void *)wrapper)) { \ + malloc_write(": Error setting" \ + " TSD for "#a_name"\n"); \ + abort(); \ + } \ } \ -/* Get/set. */ \ a_attr a_name##tsd_wrapper_t * \ -a_name##tsd_get_wrapper(void) \ +a_name##tsd_wrapper_get(void) \ { \ a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *) \ pthread_getspecific(a_name##tsd_tsd); \ @@ -367,23 +452,54 @@ a_name##tsd_get_wrapper(void) \ wrapper->initialized = false; \ wrapper->val = a_initializer; \ } \ - if (pthread_setspecific(a_name##tsd_tsd, \ - (void *)wrapper)) { \ - malloc_write(": Error setting" \ - " TSD for "#a_name"\n"); \ - abort(); \ - } \ + a_name##tsd_wrapper_set(wrapper); \ tsd_init_finish(&a_name##tsd_init_head, &block); \ } \ return (wrapper); \ } \ +a_attr bool \ +a_name##tsd_boot0(void) \ +{ \ + \ + if (pthread_key_create(&a_name##tsd_tsd, \ + a_name##tsd_cleanup_wrapper) != 0) \ + return (true); \ + a_name##tsd_wrapper_set(&a_name##tsd_boot_wrapper); \ + a_name##tsd_booted = true; \ + return (false); \ +} \ +a_attr void \ +a_name##tsd_boot1() \ +{ \ + a_name##tsd_wrapper_t *wrapper; \ + wrapper = (a_name##tsd_wrapper_t *) \ + malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t)); \ + if (wrapper == NULL) { \ + malloc_write(": Error allocating" \ + " TSD for "#a_name"\n"); \ + abort(); \ + } \ + memcpy(wrapper, &a_name##tsd_boot_wrapper, \ + sizeof(a_name##tsd_wrapper_t)); \ + a_name##tsd_wrapper_set(wrapper); \ +} \ +a_attr bool \ +a_name##tsd_boot(void) \ +{ \ + \ + if (a_name##tsd_boot0()) \ + return (true); \ + a_name##tsd_boot1(); \ + return (false); \ +} \ +/* Get/set. */ \ a_attr a_type * \ a_name##tsd_get(void) \ { \ a_name##tsd_wrapper_t *wrapper; \ \ assert(a_name##tsd_booted); \ - wrapper = a_name##tsd_get_wrapper(); \ + wrapper = a_name##tsd_wrapper_get(); \ return (&wrapper->val); \ } \ a_attr void \ @@ -392,7 +508,7 @@ a_name##tsd_set(a_type *val) \ a_name##tsd_wrapper_t *wrapper; \ \ assert(a_name##tsd_booted); \ - wrapper = a_name##tsd_get_wrapper(); \ + wrapper = a_name##tsd_wrapper_get(); \ wrapper->val = *(val); \ if (a_cleanup != malloc_tsd_no_cleanup) \ wrapper->initialized = true; \ @@ -423,6 +539,9 @@ struct tsd_init_head_s { O(thread_deallocated, uint64_t) \ O(prof_tdata, prof_tdata_t *) \ O(arena, arena_t *) \ + O(arenas_cache, arena_t **) \ + O(narenas_cache, unsigned) \ + O(arenas_cache_bypass, bool) \ O(tcache_enabled, tcache_enabled_t) \ O(quarantine, quarantine_t *) \ @@ -433,6 +552,9 @@ struct tsd_init_head_s { 0, \ NULL, \ NULL, \ + NULL, \ + 0, \ + false, \ tcache_enabled_default, \ NULL \ } @@ -447,6 +569,8 @@ MALLOC_TSD static const tsd_t tsd_initializer = TSD_INITIALIZER; +malloc_tsd_types(, tsd_t) + #endif /* JEMALLOC_H_STRUCTS */ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS @@ -455,7 +579,8 @@ void *malloc_tsd_malloc(size_t size); void malloc_tsd_dalloc(void *wrapper); void malloc_tsd_no_cleanup(void *arg); void malloc_tsd_cleanup_register(bool (*f)(void)); -bool malloc_tsd_boot(void); +bool malloc_tsd_boot0(void); +void malloc_tsd_boot1(void); #if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \ !defined(_WIN32)) void *tsd_init_check_recursion(tsd_init_head_t *head, diff --git a/src/arena.c b/src/arena.c index 49a30572..86e54404 100644 --- a/src/arena.c +++ b/src/arena.c @@ -2192,27 +2192,37 @@ arena_stats_merge(arena_t *arena, const char **dss, size_t *nactive, } } -bool -arena_new(arena_t *arena, unsigned ind) +arena_t * +arena_new(unsigned ind) { + arena_t *arena; unsigned i; arena_bin_t *bin; + /* + * Allocate arena and arena->lstats contiguously, mainly because there + * is no way to clean up if base_alloc() OOMs. + */ + if (config_stats) { + arena = (arena_t *)base_alloc(CACHELINE_CEILING(sizeof(arena_t)) + + nlclasses * sizeof(malloc_large_stats_t)); + } else + arena = (arena_t *)base_alloc(sizeof(arena_t)); + if (arena == NULL) + return (NULL); + arena->ind = ind; arena->nthreads = 0; arena->chunk_alloc = chunk_alloc_default; arena->chunk_dalloc = chunk_dalloc_default; if (malloc_mutex_init(&arena->lock)) - return (true); + return (NULL); if (config_stats) { memset(&arena->stats, 0, sizeof(arena_stats_t)); - arena->stats.lstats = - (malloc_large_stats_t *)base_alloc(nlclasses * - sizeof(malloc_large_stats_t)); - if (arena->stats.lstats == NULL) - return (true); + arena->stats.lstats = (malloc_large_stats_t *)(((void *)arena) + + CACHELINE_CEILING(sizeof(arena_t))); memset(arena->stats.lstats, 0, nlclasses * sizeof(malloc_large_stats_t)); if (config_tcache) @@ -2236,14 +2246,14 @@ arena_new(arena_t *arena, unsigned ind) for (i = 0; i < NBINS; i++) { bin = &arena->bins[i]; if (malloc_mutex_init(&bin->lock)) - return (true); + return (NULL); bin->runcur = NULL; arena_run_tree_new(&bin->runs); if (config_stats) memset(&bin->stats, 0, sizeof(malloc_bin_stats_t)); } - return (false); + return (arena); } /* diff --git a/src/chunk.c b/src/chunk.c index 618aaca0..f65b67af 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -254,9 +254,17 @@ void * chunk_alloc_default(void *new_addr, size_t size, size_t alignment, bool *zero, unsigned arena_ind) { + arena_t *arena; + + arena = arena_get(tsd_fetch(), arena_ind, false, true); + /* + * The arena we're allocating on behalf of must have been initialized + * already. + */ + assert(arena != NULL); return (chunk_alloc_core(new_addr, size, alignment, false, zero, - arenas[arena_ind]->dss_prec)); + arena->dss_prec)); } static void diff --git a/src/ctl.c b/src/ctl.c index f1f3234b..37f8f42a 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -447,7 +447,7 @@ ctl_arena_init(ctl_arena_stats_t *astats) { if (astats->lstats == NULL) { - astats->lstats = (malloc_large_stats_t *)base_alloc(nlclasses * + astats->lstats = (malloc_large_stats_t *)a0malloc(nlclasses * sizeof(malloc_large_stats_t)); if (astats->lstats == NULL) return (true); @@ -567,31 +567,24 @@ ctl_arena_refresh(arena_t *arena, unsigned i) static bool ctl_grow(void) { - tsd_t *tsd; ctl_arena_stats_t *astats; - arena_t **tarenas; - tsd = tsd_fetch(); + /* Initialize new arena. */ + if (arena_init(ctl_stats.narenas) == NULL) + return (true); - /* Allocate extended arena stats and arenas arrays. */ - astats = (ctl_arena_stats_t *)imalloc(tsd, (ctl_stats.narenas + 2) * + /* Allocate extended arena stats. */ + astats = (ctl_arena_stats_t *)a0malloc((ctl_stats.narenas + 2) * sizeof(ctl_arena_stats_t)); if (astats == NULL) return (true); - tarenas = (arena_t **)imalloc(tsd, (ctl_stats.narenas + 1) * - sizeof(arena_t *)); - if (tarenas == NULL) { - idalloc(tsd, astats); - return (true); - } /* Initialize the new astats element. */ memcpy(astats, ctl_stats.arenas, (ctl_stats.narenas + 1) * sizeof(ctl_arena_stats_t)); memset(&astats[ctl_stats.narenas + 1], 0, sizeof(ctl_arena_stats_t)); if (ctl_arena_init(&astats[ctl_stats.narenas + 1])) { - idalloc(tsd, tarenas); - idalloc(tsd, astats); + a0free(astats); return (true); } /* Swap merged stats to their new location. */ @@ -604,32 +597,7 @@ ctl_grow(void) memcpy(&astats[ctl_stats.narenas + 1], &tstats, sizeof(ctl_arena_stats_t)); } - /* Initialize the new arenas element. */ - tarenas[ctl_stats.narenas] = NULL; - { - arena_t **arenas_old = arenas; - /* - * Swap extended arenas array into place. Although ctl_mtx - * protects this function from other threads extending the - * array, it does not protect from other threads mutating it - * (i.e. initializing arenas and setting array elements to - * point to them). Therefore, array copying must happen under - * the protection of arenas_lock. - */ - malloc_mutex_lock(&arenas_lock); - arenas = tarenas; - memcpy(arenas, arenas_old, ctl_stats.narenas * - sizeof(arena_t *)); - narenas_total++; - arenas_extend(narenas_total - 1); - malloc_mutex_unlock(&arenas_lock); - /* - * Deallocate arenas_old only if it came from imalloc() (not - * base_alloc()). - */ - if (ctl_stats.narenas != narenas_auto) - idalloc(tsd, arenas_old); - } + a0free(ctl_stats.arenas); ctl_stats.arenas = astats; ctl_stats.narenas++; @@ -639,6 +607,7 @@ ctl_grow(void) static void ctl_refresh(void) { + tsd_t *tsd; unsigned i; VARIABLE_ARRAY(arena_t *, tarenas, ctl_stats.narenas); @@ -657,15 +626,17 @@ ctl_refresh(void) ctl_stats.arenas[ctl_stats.narenas].nthreads = 0; ctl_arena_clear(&ctl_stats.arenas[ctl_stats.narenas]); - malloc_mutex_lock(&arenas_lock); - memcpy(tarenas, arenas, sizeof(arena_t *) * ctl_stats.narenas); + tsd = tsd_fetch(); + for (i = 0; i < ctl_stats.narenas; i++) + tarenas[i] = arena_get(tsd, i, false, (i == 0)); + for (i = 0; i < ctl_stats.narenas; i++) { - if (arenas[i] != NULL) - ctl_stats.arenas[i].nthreads = arenas[i]->nthreads; + if (tarenas[i] != NULL) + ctl_stats.arenas[i].nthreads = arena_nbound(i); else ctl_stats.arenas[i].nthreads = 0; } - malloc_mutex_unlock(&arenas_lock); + for (i = 0; i < ctl_stats.narenas; i++) { bool initialized = (tarenas[i] != NULL); @@ -698,9 +669,8 @@ ctl_init(void) * Allocate space for one extra arena stats element, which * contains summed stats across all arenas. */ - assert(narenas_auto == narenas_total_get()); - ctl_stats.narenas = narenas_auto; - ctl_stats.arenas = (ctl_arena_stats_t *)base_alloc( + ctl_stats.narenas = narenas_total_get(); + ctl_stats.arenas = (ctl_arena_stats_t *)a0malloc( (ctl_stats.narenas + 1) * sizeof(ctl_arena_stats_t)); if (ctl_stats.arenas == NULL) { ret = true; @@ -718,6 +688,13 @@ ctl_init(void) unsigned i; for (i = 0; i <= ctl_stats.narenas; i++) { if (ctl_arena_init(&ctl_stats.arenas[i])) { + unsigned j; + for (j = 0; j < i; j++) { + a0free( + ctl_stats.arenas[j].lstats); + } + a0free(ctl_stats.arenas); + ctl_stats.arenas = NULL; ret = true; goto label_return; } @@ -1231,17 +1208,19 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, { int ret; tsd_t *tsd; + arena_t *arena; unsigned newind, oldind; tsd = tsd_fetch(); + arena = arena_choose(tsd, NULL); + if (arena == NULL) + return (EAGAIN); malloc_mutex_lock(&ctl_mtx); - newind = oldind = choose_arena(tsd, NULL)->ind; + newind = oldind = arena->ind; WRITE(newind, unsigned); READ(oldind, unsigned); if (newind != oldind) { - arena_t *arena; - if (newind >= ctl_stats.narenas) { /* New arena index is out of range. */ ret = EFAULT; @@ -1249,28 +1228,18 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, } /* Initialize arena if necessary. */ - malloc_mutex_lock(&arenas_lock); - if ((arena = arenas[newind]) == NULL && (arena = - arenas_extend(newind)) == NULL) { - malloc_mutex_unlock(&arenas_lock); + arena = arena_get(tsd, newind, true, true); + if (arena == NULL) { ret = EAGAIN; goto label_return; } - assert(arena == arenas[newind]); - arenas[oldind]->nthreads--; - arenas[newind]->nthreads++; - malloc_mutex_unlock(&arenas_lock); - - /* Set new arena association. */ + /* Set new arena/tcache associations. */ + arena_migrate(tsd, oldind, newind); if (config_tcache) { tcache_t *tcache = tsd_tcache_get(tsd); - if (tcache != NULL) { - tcache_arena_dissociate(tcache); - tcache_arena_associate(tcache, arena); - } + if (tcache != NULL) + tcache_arena_reassociate(tcache, arena); } - - tsd_arena_set(tsd, arena); } ret = 0; @@ -1400,11 +1369,13 @@ label_return: static void arena_purge(unsigned arena_ind) { + tsd_t *tsd; + unsigned i; VARIABLE_ARRAY(arena_t *, tarenas, ctl_stats.narenas); - malloc_mutex_lock(&arenas_lock); - memcpy(tarenas, arenas, sizeof(arena_t *) * ctl_stats.narenas); - malloc_mutex_unlock(&arenas_lock); + tsd = tsd_fetch(); + for (i = 0; i < ctl_stats.narenas; i++) + tarenas[i] = arena_get(tsd, i, false, (i == 0)); if (arena_ind == ctl_stats.narenas) { unsigned i; @@ -1467,7 +1438,7 @@ arena_i_dss_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, } if (arena_ind < ctl_stats.narenas) { - arena_t *arena = arenas[arena_ind]; + arena_t *arena = arena_get(tsd_fetch(), arena_ind, false, true); if (arena == NULL || (dss_prec != dss_prec_limit && arena_dss_prec_set(arena, dss_prec))) { ret = EFAULT; @@ -1501,7 +1472,8 @@ arena_i_chunk_alloc_ctl(const size_t *mib, size_t miblen, void *oldp, arena_t *arena; malloc_mutex_lock(&ctl_mtx); - if (arena_ind < narenas_total && (arena = arenas[arena_ind]) != NULL) { + if (arena_ind < narenas_total_get() && (arena = arena_get(tsd_fetch(), + arena_ind, false, true)) != NULL) { malloc_mutex_lock(&arena->lock); READ(arena->chunk_alloc, chunk_alloc_t *); WRITE(arena->chunk_alloc, chunk_alloc_t *); @@ -1527,7 +1499,8 @@ arena_i_chunk_dalloc_ctl(const size_t *mib, size_t miblen, void *oldp, arena_t *arena; malloc_mutex_lock(&ctl_mtx); - if (arena_ind < narenas_total && (arena = arenas[arena_ind]) != NULL) { + if (arena_ind < narenas_total_get() && (arena = arena_get(tsd_fetch(), + arena_ind, false, true)) != NULL) { malloc_mutex_lock(&arena->lock); READ(arena->chunk_dalloc, chunk_dalloc_t *); WRITE(arena->chunk_dalloc, chunk_dalloc_t *); diff --git a/src/huge.c b/src/huge.c index ae416253..1376729a 100644 --- a/src/huge.c +++ b/src/huge.c @@ -50,7 +50,11 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, * it is possible to make correct junk/zero fill decisions below. */ is_zeroed = zero; - arena = choose_arena(tsd, arena); + arena = arena_choose(tsd, arena); + if (unlikely(arena == NULL)) { + base_node_dalloc(node); + return (NULL); + } ret = arena_chunk_alloc_huge(arena, NULL, csize, alignment, &is_zeroed); if (ret == NULL) { base_node_dalloc(node); diff --git a/src/jemalloc.c b/src/jemalloc.c index f3750b40..3c889e8a 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -4,8 +4,6 @@ /******************************************************************************/ /* Data. */ -malloc_tsd_data(, arenas, arena_t *, NULL) - /* Runtime configuration options. */ const char *je_malloc_conf JEMALLOC_ATTR(weak); bool opt_abort = @@ -34,10 +32,20 @@ bool in_valgrind; unsigned ncpus; -malloc_mutex_t arenas_lock; -arena_t **arenas; -unsigned narenas_total; -unsigned narenas_auto; +/* Protects arenas initialization (arenas, narenas_total). */ +static malloc_mutex_t arenas_lock; +/* + * Arenas that are used to service external requests. Not all elements of the + * arenas array are necessarily used; arenas are created lazily as needed. + * + * arenas[0..narenas_auto) are used for automatic multiplexing of threads and + * arenas. arenas[narenas_auto..narenas_total) are only used if the application + * takes some action to create them and allocate from them. + */ +static arena_t **arenas; +static unsigned narenas_total; +static arena_t *a0; /* arenas[0]; read-only after initialization. */ +static unsigned narenas_auto; /* Read-only after initialization. */ /* Set to true once the allocator has been initialized. */ static bool malloc_initialized = false; @@ -144,35 +152,288 @@ static bool malloc_init_hard(void); * Begin miscellaneous support functions. */ -/* Create a new arena and insert it into the arenas array at index ind. */ -arena_t * -arenas_extend(unsigned ind) +JEMALLOC_ALWAYS_INLINE_C void +malloc_thread_init(void) { - arena_t *ret; - - ret = (arena_t *)base_alloc(sizeof(arena_t)); - if (ret != NULL && !arena_new(ret, ind)) { - arenas[ind] = ret; - return (ret); - } - /* Only reached if there is an OOM error. */ /* - * OOM here is quite inconvenient to propagate, since dealing with it - * would require a check for failure in the fast path. Instead, punt - * by using arenas[0]. In practice, this is an extremely unlikely - * failure. + * TSD initialization can't be safely done as a side effect of + * deallocation, because it is possible for a thread to do nothing but + * deallocate its TLS data via free(), in which case writing to TLS + * would cause write-after-free memory corruption. The quarantine + * facility *only* gets used as a side effect of deallocation, so make + * a best effort attempt at initializing its TSD by hooking all + * allocation events. */ - malloc_write(": Error initializing arena\n"); - if (opt_abort) - abort(); - - return (arenas[0]); + if (config_fill && unlikely(opt_quarantine)) + quarantine_alloc_hook(); } -/* Slow path, called only by choose_arena(). */ +JEMALLOC_ALWAYS_INLINE_C bool +malloc_init(void) +{ + + if (unlikely(!malloc_initialized) && malloc_init_hard()) + return (true); + malloc_thread_init(); + + return (false); +} + +/* + * The a0*() functions are used instead of i[mcd]alloc() in bootstrap-sensitive + * situations that cannot tolerate TLS variable access. These functions are + * also exposed for use in static binaries on FreeBSD, hence the old-style + * malloc() API. + */ + arena_t * -choose_arena_hard(tsd_t *tsd) +a0get(void) +{ + + assert(a0 != NULL); + return (a0); +} + +static void * +a0alloc(size_t size, bool zero) +{ + void *ret; + + if (unlikely(malloc_init())) + return (NULL); + + if (size == 0) + size = 1; + + if (size <= arena_maxclass) + ret = arena_malloc(NULL, a0get(), size, zero, false); + else + ret = huge_malloc(NULL, a0get(), size, zero); + + return (ret); +} + +void * +a0malloc(size_t size) +{ + + return (a0alloc(size, false)); +} + +void * +a0calloc(size_t num, size_t size) +{ + + return (a0alloc(num * size, true)); +} + +void +a0free(void *ptr) +{ + arena_chunk_t *chunk; + + if (ptr == NULL) + return; + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + if (chunk != ptr) + arena_dalloc(NULL, chunk, ptr, false); + else + huge_dalloc(ptr); +} + +/* Create a new arena and insert it into the arenas array at index ind. */ +arena_t * +arena_init(unsigned ind) +{ + arena_t *arena; + + malloc_mutex_lock(&arenas_lock); + + /* Expand arenas if necessary. */ + assert(ind <= narenas_total); + if (ind == narenas_total) { + unsigned narenas_new = narenas_total + 1; + arena_t **arenas_new = + (arena_t **)a0malloc(CACHELINE_CEILING(narenas_new * + sizeof(arena_t *))); + if (arenas_new == NULL) { + arena = NULL; + goto label_return; + } + memcpy(arenas_new, arenas, narenas_total * sizeof(arena_t *)); + arenas_new[ind] = NULL; + /* + * Deallocate only if arenas came from a0malloc() (not + * base_alloc()). + */ + if (narenas_total != narenas_auto) + a0free(arenas); + arenas = arenas_new; + narenas_total = narenas_new; + } + + /* + * Another thread may have already initialized arenas[ind] if it's an + * auto arena. + */ + arena = arenas[ind]; + if (arena != NULL) { + assert(ind < narenas_auto); + goto label_return; + } + + /* Actually initialize the arena. */ + arena = arenas[ind] = arena_new(ind); +label_return: + malloc_mutex_unlock(&arenas_lock); + return (arena); +} + +unsigned +narenas_total_get(void) +{ + unsigned narenas; + + malloc_mutex_lock(&arenas_lock); + narenas = narenas_total; + malloc_mutex_unlock(&arenas_lock); + + return (narenas); +} + +static void +arena_bind_locked(tsd_t *tsd, unsigned ind) +{ + arena_t *arena; + + arena = arenas[ind]; + arena->nthreads++; + + if (tsd_nominal(tsd)) + tsd_arena_set(tsd, arena); +} + +static void +arena_bind(tsd_t *tsd, unsigned ind) +{ + + malloc_mutex_lock(&arenas_lock); + arena_bind_locked(tsd, ind); + malloc_mutex_unlock(&arenas_lock); +} + +void +arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind) +{ + arena_t *oldarena, *newarena; + + malloc_mutex_lock(&arenas_lock); + oldarena = arenas[oldind]; + newarena = arenas[newind]; + oldarena->nthreads--; + newarena->nthreads++; + malloc_mutex_unlock(&arenas_lock); + tsd_arena_set(tsd, newarena); +} + +unsigned +arena_nbound(unsigned ind) +{ + unsigned nthreads; + + malloc_mutex_lock(&arenas_lock); + nthreads = arenas[ind]->nthreads; + malloc_mutex_unlock(&arenas_lock); + return (nthreads); +} + +static void +arena_unbind(tsd_t *tsd, unsigned ind) +{ + arena_t *arena; + + malloc_mutex_lock(&arenas_lock); + arena = arenas[ind]; + arena->nthreads--; + malloc_mutex_unlock(&arenas_lock); + tsd_arena_set(tsd, NULL); +} + +arena_t * +arena_get_hard(tsd_t *tsd, unsigned ind, bool init_if_missing) +{ + arena_t *arena; + arena_t **arenas_cache = tsd_arenas_cache_get(tsd); + unsigned narenas_cache = tsd_narenas_cache_get(tsd); + unsigned narenas_actual = narenas_total_get(); + + /* Deallocate old cache if it's too small. */ + if (arenas_cache != NULL && narenas_cache < narenas_actual) { + a0free(arenas_cache); + arenas_cache = NULL; + narenas_cache = 0; + tsd_arenas_cache_set(tsd, arenas_cache); + tsd_narenas_cache_set(tsd, narenas_cache); + } + + /* Allocate cache if it's missing. */ + if (arenas_cache == NULL) { + bool *arenas_cache_bypassp = tsd_arenas_cache_bypassp_get(tsd); + assert(ind < narenas_actual || !init_if_missing); + narenas_cache = (ind < narenas_actual) ? narenas_actual : ind+1; + + if (!*arenas_cache_bypassp) { + *arenas_cache_bypassp = true; + arenas_cache = (arena_t **)a0malloc(sizeof(arena_t *) * + narenas_cache); + *arenas_cache_bypassp = false; + } else + arenas_cache = NULL; + if (arenas_cache == NULL) { + /* + * This function must always tell the truth, even if + * it's slow, so don't let OOM or recursive allocation + * avoidance (note arenas_cache_bypass check) get in the + * way. + */ + if (ind >= narenas_actual) + return (NULL); + malloc_mutex_lock(&arenas_lock); + arena = arenas[ind]; + malloc_mutex_unlock(&arenas_lock); + return (arena); + } + tsd_arenas_cache_set(tsd, arenas_cache); + tsd_narenas_cache_set(tsd, narenas_cache); + } + + /* + * Copy to cache. It's possible that the actual number of arenas has + * increased since narenas_total_get() was called above, but that causes + * no correctness issues unless two threads concurrently execute the + * arenas.extend mallctl, which we trust mallctl synchronization to + * prevent. + */ + malloc_mutex_lock(&arenas_lock); + memcpy(arenas_cache, arenas, sizeof(arena_t *) * narenas_actual); + malloc_mutex_unlock(&arenas_lock); + if (narenas_cache > narenas_actual) { + memset(&arenas_cache[narenas_actual], 0, sizeof(arena_t *) * + (narenas_cache - narenas_actual)); + } + + /* Read the refreshed cache, and init the arena if necessary. */ + arena = arenas_cache[ind]; + if (init_if_missing && arena == NULL) + arena = arenas_cache[ind] = arena_init(ind); + return (arena); +} + +/* Slow path, called only by arena_choose(). */ +arena_t * +arena_choose_hard(tsd_t *tsd) { arena_t *ret; @@ -182,7 +443,7 @@ choose_arena_hard(tsd_t *tsd) choose = 0; first_null = narenas_auto; malloc_mutex_lock(&arenas_lock); - assert(arenas[0] != NULL); + assert(a0get() != NULL); for (i = 1; i < narenas_auto; i++) { if (arenas[i] != NULL) { /* @@ -215,20 +476,20 @@ choose_arena_hard(tsd_t *tsd) ret = arenas[choose]; } else { /* Initialize a new arena. */ - ret = arenas_extend(first_null); + choose = first_null; + ret = arena_init(choose); + if (ret == NULL) { + malloc_mutex_unlock(&arenas_lock); + return (NULL); + } } - ret->nthreads++; + arena_bind_locked(tsd, choose); malloc_mutex_unlock(&arenas_lock); } else { - ret = arenas[0]; - malloc_mutex_lock(&arenas_lock); - ret->nthreads++; - malloc_mutex_unlock(&arenas_lock); + ret = a0get(); + arena_bind(tsd, 0); } - if (tsd_nominal(tsd)) - tsd_arena_set(tsd, ret); - return (ret); } @@ -248,6 +509,33 @@ thread_deallocated_cleanup(tsd_t *tsd) void arena_cleanup(tsd_t *tsd) +{ + arena_t *arena; + + arena = tsd_arena_get(tsd); + if (arena != NULL) + arena_unbind(tsd, arena->ind); +} + +void +arenas_cache_cleanup(tsd_t *tsd) +{ + arena_t **arenas_cache; + + arenas_cache = tsd_arenas_cache_get(tsd); + if (arenas != NULL) + a0free(arenas_cache); +} + +void +narenas_cache_cleanup(tsd_t *tsd) +{ + + /* Do nothing. */ +} + +void +arenas_cache_bypass_cleanup(tsd_t *tsd) { /* Do nothing. */ @@ -312,44 +600,6 @@ malloc_ncpus(void) return ((result == -1) ? 1 : (unsigned)result); } -void -arenas_cleanup(void *arg) -{ - arena_t *arena = *(arena_t **)arg; - - malloc_mutex_lock(&arenas_lock); - arena->nthreads--; - malloc_mutex_unlock(&arenas_lock); -} - -JEMALLOC_ALWAYS_INLINE_C void -malloc_thread_init(void) -{ - - /* - * TSD initialization can't be safely done as a side effect of - * deallocation, because it is possible for a thread to do nothing but - * deallocate its TLS data via free(), in which case writing to TLS - * would cause write-after-free memory corruption. The quarantine - * facility *only* gets used as a side effect of deallocation, so make - * a best effort attempt at initializing its TSD by hooking all - * allocation events. - */ - if (config_fill && unlikely(opt_quarantine)) - quarantine_alloc_hook(); -} - -JEMALLOC_ALWAYS_INLINE_C bool -malloc_init(void) -{ - - if (unlikely(!malloc_initialized) && malloc_init_hard()) - return (true); - malloc_thread_init(); - - return (false); -} - static bool malloc_conf_next(char const **opts_p, char const **k_p, size_t *klen_p, char const **v_p, size_t *vlen_p) @@ -745,7 +995,7 @@ malloc_init_hard(void) #endif malloc_initializer = INITIALIZER; - if (malloc_tsd_boot()) { + if (malloc_tsd_boot0()) { malloc_mutex_unlock(&init_lock); return (true); } @@ -809,10 +1059,10 @@ malloc_init_hard(void) /* * Initialize one arena here. The rest are lazily created in - * choose_arena_hard(). + * arena_choose_hard(). */ - arenas_extend(0); - if (arenas[0] == NULL) { + a0 = arena_init(0); + if (a0 == NULL) { malloc_mutex_unlock(&init_lock); return (true); } @@ -887,6 +1137,7 @@ malloc_init_hard(void) malloc_initialized = true; malloc_mutex_unlock(&init_lock); + malloc_tsd_boot1(); return (false); } @@ -1428,8 +1679,8 @@ JEMALLOC_EXPORT void *(*__memalign_hook)(size_t alignment, size_t size) = * Begin non-standard functions. */ -JEMALLOC_ALWAYS_INLINE_C void -imallocx_flags_decode_hard(size_t size, int flags, size_t *usize, +JEMALLOC_ALWAYS_INLINE_C bool +imallocx_flags_decode_hard(tsd_t *tsd, size_t size, int flags, size_t *usize, size_t *alignment, bool *zero, bool *try_tcache, arena_t **arena) { @@ -1444,16 +1695,19 @@ imallocx_flags_decode_hard(size_t size, int flags, size_t *usize, if ((flags & MALLOCX_ARENA_MASK) != 0) { unsigned arena_ind = MALLOCX_ARENA_GET(flags); *try_tcache = false; - *arena = arenas[arena_ind]; + *arena = arena_get(tsd, arena_ind, true, true); + if (unlikely(*arena == NULL)) + return (true); } else { *try_tcache = true; *arena = NULL; } + return (false); } -JEMALLOC_ALWAYS_INLINE_C void -imallocx_flags_decode(size_t size, int flags, size_t *usize, size_t *alignment, - bool *zero, bool *try_tcache, arena_t **arena) +JEMALLOC_ALWAYS_INLINE_C bool +imallocx_flags_decode(tsd_t *tsd, size_t size, int flags, size_t *usize, + size_t *alignment, bool *zero, bool *try_tcache, arena_t **arena) { if (likely(flags == 0)) { @@ -1463,9 +1717,10 @@ imallocx_flags_decode(size_t size, int flags, size_t *usize, size_t *alignment, *zero = false; *try_tcache = true; *arena = NULL; + return (false); } else { - imallocx_flags_decode_hard(size, flags, usize, alignment, zero, - try_tcache, arena); + return (imallocx_flags_decode_hard(tsd, size, flags, usize, + alignment, zero, try_tcache, arena)); } } @@ -1524,8 +1779,9 @@ imallocx_prof(tsd_t *tsd, size_t size, int flags, size_t *usize) arena_t *arena; prof_tctx_t *tctx; - imallocx_flags_decode(size, flags, usize, &alignment, &zero, - &try_tcache, &arena); + if (unlikely(imallocx_flags_decode(tsd, size, flags, usize, &alignment, + &zero, &try_tcache, &arena))) + return (NULL); tctx = prof_alloc_prep(tsd, *usize, true); if (likely((uintptr_t)tctx == (uintptr_t)1U)) { p = imallocx_maybe_flags(tsd, size, flags, *usize, alignment, @@ -1558,8 +1814,9 @@ imallocx_no_prof(tsd_t *tsd, size_t size, int flags, size_t *usize) return (imalloc(tsd, size)); } - imallocx_flags_decode_hard(size, flags, usize, &alignment, &zero, - &try_tcache, &arena); + if (unlikely(imallocx_flags_decode_hard(tsd, size, flags, usize, + &alignment, &zero, &try_tcache, &arena))) + return (NULL); return (imallocx_flags(tsd, *usize, alignment, zero, try_tcache, arena)); } @@ -1685,9 +1942,10 @@ je_rallocx(void *ptr, size_t size, int flags) arena_chunk_t *chunk; try_tcache_alloc = false; chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - try_tcache_dalloc = (chunk == ptr || chunk->arena != - arenas[arena_ind]); - arena = arenas[arena_ind]; + arena = arena_get(tsd, arena_ind, true, true); + if (unlikely(arena == NULL)) + goto label_oom; + try_tcache_dalloc = (chunk == ptr || chunk->arena != arena); } else { try_tcache_alloc = true; try_tcache_dalloc = true; @@ -1825,6 +2083,7 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) if (unlikely((flags & MALLOCX_ARENA_MASK) != 0)) { unsigned arena_ind = MALLOCX_ARENA_GET(flags); + // XX Dangerous arenas read. arena = arenas[arena_ind]; } else arena = NULL; @@ -1875,16 +2134,24 @@ je_sallocx(const void *ptr, int flags) void je_dallocx(void *ptr, int flags) { + tsd_t *tsd; bool try_tcache; assert(ptr != NULL); assert(malloc_initialized || IS_INITIALIZER); + tsd = tsd_fetch(); if (unlikely((flags & MALLOCX_ARENA_MASK) != 0)) { unsigned arena_ind = MALLOCX_ARENA_GET(flags); arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - try_tcache = (chunk == ptr || chunk->arena != - arenas[arena_ind]); + arena_t *arena = arena_get(tsd, arena_ind, true, true); + /* + * If arena is NULL, the application passed an arena that has + * never been used before, which is unsupported during + * deallocation. + */ + assert(arena != NULL); + try_tcache = (chunk == ptr || chunk->arena != arena); } else try_tcache = true; @@ -1908,6 +2175,7 @@ inallocx(size_t size, int flags) void je_sdallocx(void *ptr, size_t size, int flags) { + tsd_t *tsd; bool try_tcache; size_t usize; @@ -1916,16 +2184,22 @@ je_sdallocx(void *ptr, size_t size, int flags) usize = inallocx(size, flags); assert(usize == isalloc(ptr, config_prof)); + tsd = tsd_fetch(); if (unlikely((flags & MALLOCX_ARENA_MASK) != 0)) { unsigned arena_ind = MALLOCX_ARENA_GET(flags); arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - try_tcache = (chunk == ptr || chunk->arena != - arenas[arena_ind]); + arena_t *arena = arena_get(tsd, arena_ind, true, true); + /* + * If arena is NULL, the application passed an arena that has + * never been used before, which is unsupported during + * deallocation. + */ + try_tcache = (chunk == ptr || chunk->arena != arena); } else try_tcache = true; UTRACE(ptr, 0, 0); - isfree(tsd_fetch(), ptr, usize, try_tcache); + isfree(tsd, ptr, usize, try_tcache); } size_t @@ -2105,55 +2379,3 @@ jemalloc_postfork_child(void) } /******************************************************************************/ -/* - * The following functions are used for TLS allocation/deallocation in static - * binaries on FreeBSD. The primary difference between these and i[mcd]alloc() - * is that these avoid accessing TLS variables. - */ - -static void * -a0alloc(size_t size, bool zero) -{ - - if (unlikely(malloc_init())) - return (NULL); - - if (size == 0) - size = 1; - - if (size <= arena_maxclass) - return (arena_malloc(NULL, arenas[0], size, zero, false)); - else - return (huge_malloc(NULL, arenas[0], size, zero)); -} - -void * -a0malloc(size_t size) -{ - - return (a0alloc(size, false)); -} - -void * -a0calloc(size_t num, size_t size) -{ - - return (a0alloc(num * size, true)); -} - -void -a0free(void *ptr) -{ - arena_chunk_t *chunk; - - if (ptr == NULL) - return; - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (chunk != ptr) - arena_dalloc(NULL, chunk, ptr, false); - else - huge_dalloc(ptr); -} - -/******************************************************************************/ diff --git a/src/tcache.c b/src/tcache.c index 2c968c68..1bf70269 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -245,6 +245,14 @@ tcache_arena_associate(tcache_t *tcache, arena_t *arena) tcache->arena = arena; } +void +tcache_arena_reassociate(tcache_t *tcache, arena_t *arena) +{ + + tcache_arena_dissociate(tcache); + tcache_arena_associate(tcache, arena); +} + void tcache_arena_dissociate(tcache_t *tcache) { @@ -261,13 +269,17 @@ tcache_arena_dissociate(tcache_t *tcache) tcache_t * tcache_get_hard(tsd_t *tsd) { + arena_t *arena; if (!tcache_enabled_get()) { if (tsd_nominal(tsd)) tcache_enabled_set(false); /* Memoize. */ return (NULL); } - return (tcache_create(choose_arena(tsd, NULL))); + arena = arena_choose(tsd, NULL); + if (unlikely(arena == NULL)) + return (NULL); + return (tcache_create(arena)); } tcache_t * diff --git a/src/tsd.c b/src/tsd.c index cbc64e44..59253fe3 100644 --- a/src/tsd.c +++ b/src/tsd.c @@ -15,16 +15,14 @@ void * malloc_tsd_malloc(size_t size) { - /* Avoid choose_arena() in order to dodge bootstrapping issues. */ - return (arena_malloc(NULL, arenas[0], CACHELINE_CEILING(size), false, - false)); + return (a0malloc(CACHELINE_CEILING(size))); } void malloc_tsd_dalloc(void *wrapper) { - idalloct(NULL, wrapper, false); + a0free(wrapper); } void @@ -106,15 +104,24 @@ MALLOC_TSD } bool -malloc_tsd_boot(void) +malloc_tsd_boot0(void) { ncleanups = 0; - if (tsd_boot()) + if (tsd_boot0()) return (true); + *tsd_arenas_cache_bypassp_get(tsd_fetch()) = true; return (false); } +void +malloc_tsd_boot1(void) +{ + + tsd_boot1(); + *tsd_arenas_cache_bypassp_get(tsd_fetch()) = false; +} + #ifdef _WIN32 static BOOL WINAPI _tls_callback(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) diff --git a/test/unit/tsd.c b/test/unit/tsd.c index eb1c5976..b031c484 100644 --- a/test/unit/tsd.c +++ b/test/unit/tsd.c @@ -6,6 +6,7 @@ typedef unsigned int data_t; static bool data_cleanup_executed; +malloc_tsd_types(data_, data_t) malloc_tsd_protos(, data_, data_t) void From f22214a29ddd3bed005cbcc8f2aff7c61ef4940b Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Mon, 6 Oct 2014 03:42:10 -0400 Subject: [PATCH 0561/3378] Use regular arena allocation for huge tree nodes. This avoids grabbing the base mutex, as a step towards fine-grained locking for huge allocations. The thread cache also provides a tiny (~3%) improvement for serial huge allocations. --- include/jemalloc/internal/huge.h | 2 +- .../jemalloc/internal/jemalloc_internal.h.in | 4 +-- src/huge.c | 9 ++++--- src/jemalloc.c | 2 +- test/unit/junk.c | 27 ++++++++++++++----- 5 files changed, 29 insertions(+), 15 deletions(-) diff --git a/include/jemalloc/internal/huge.h b/include/jemalloc/internal/huge.h index 939993f2..5d4d3a16 100644 --- a/include/jemalloc/internal/huge.h +++ b/include/jemalloc/internal/huge.h @@ -21,7 +21,7 @@ void *huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, typedef void (huge_dalloc_junk_t)(void *, size_t); extern huge_dalloc_junk_t *huge_dalloc_junk; #endif -void huge_dalloc(void *ptr); +void huge_dalloc(tsd_t *tsd, void *ptr); size_t huge_salloc(const void *ptr); prof_tctx_t *huge_prof_tctx_get(const void *ptr); void huge_prof_tctx_set(const void *ptr, prof_tctx_t *tctx); diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index c7a5fd8a..f4d5de6a 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -938,7 +938,7 @@ idalloct(tsd_t *tsd, void *ptr, bool try_tcache) if (chunk != ptr) arena_dalloc(tsd, chunk, ptr, try_tcache); else - huge_dalloc(ptr); + huge_dalloc(tsd, ptr); } JEMALLOC_ALWAYS_INLINE void @@ -952,7 +952,7 @@ isdalloct(tsd_t *tsd, void *ptr, size_t size, bool try_tcache) if (chunk != ptr) arena_sdalloc(tsd, chunk, ptr, size, try_tcache); else - huge_dalloc(ptr); + huge_dalloc(tsd, ptr); } JEMALLOC_ALWAYS_INLINE void diff --git a/src/huge.c b/src/huge.c index 1376729a..541df60a 100644 --- a/src/huge.c +++ b/src/huge.c @@ -41,7 +41,8 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, assert(csize >= usize); /* Allocate an extent node with which to track the chunk. */ - node = base_node_alloc(); + node = ipalloct(tsd, CACHELINE_CEILING(sizeof(extent_node_t)), + CACHELINE, false, tsd != NULL, NULL); if (node == NULL) return (NULL); @@ -57,7 +58,7 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, } ret = arena_chunk_alloc_huge(arena, NULL, csize, alignment, &is_zeroed); if (ret == NULL) { - base_node_dalloc(node); + idalloct(tsd, node, tsd != NULL); return (NULL); } @@ -311,7 +312,7 @@ huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size, } void -huge_dalloc(void *ptr) +huge_dalloc(tsd_t *tsd, void *ptr) { extent_node_t *node, key; @@ -329,7 +330,7 @@ huge_dalloc(void *ptr) huge_dalloc_junk(node->addr, node->size); arena_chunk_dalloc_huge(node->arena, node->addr, CHUNK_CEILING(node->size)); - base_node_dalloc(node); + idalloct(tsd, node, tsd != NULL); } size_t diff --git a/src/jemalloc.c b/src/jemalloc.c index 3c889e8a..38b5aaf7 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -240,7 +240,7 @@ a0free(void *ptr) if (chunk != ptr) arena_dalloc(NULL, chunk, ptr, false); else - huge_dalloc(ptr); + huge_dalloc(NULL, ptr); } /* Create a new arena and insert it into the arenas array at index ind. */ diff --git a/test/unit/junk.c b/test/unit/junk.c index 5b35a879..1522a610 100644 --- a/test/unit/junk.c +++ b/test/unit/junk.c @@ -8,7 +8,16 @@ const char *malloc_conf = static arena_dalloc_junk_small_t *arena_dalloc_junk_small_orig; static arena_dalloc_junk_large_t *arena_dalloc_junk_large_orig; static huge_dalloc_junk_t *huge_dalloc_junk_orig; -static void *most_recently_junked; +static void *watch_for_junking; +static bool saw_junking; + +static void +watch_junking(void *p) +{ + + watch_for_junking = p; + saw_junking = false; +} static void arena_dalloc_junk_small_intercept(void *ptr, arena_bin_info_t *bin_info) @@ -21,7 +30,8 @@ arena_dalloc_junk_small_intercept(void *ptr, arena_bin_info_t *bin_info) "Missing junk fill for byte %zu/%zu of deallocated region", i, bin_info->reg_size); } - most_recently_junked = ptr; + if (ptr == watch_for_junking) + saw_junking = true; } static void @@ -35,7 +45,8 @@ arena_dalloc_junk_large_intercept(void *ptr, size_t usize) "Missing junk fill for byte %zu/%zu of deallocated region", i, usize); } - most_recently_junked = ptr; + if (ptr == watch_for_junking) + saw_junking = true; } static void @@ -48,7 +59,8 @@ huge_dalloc_junk_intercept(void *ptr, size_t usize) * enough that it doesn't make sense to duplicate the decision logic in * test code, so don't actually check that the region is junk-filled. */ - most_recently_junked = ptr; + if (ptr == watch_for_junking) + saw_junking = true; } static void @@ -87,18 +99,19 @@ test_junk(size_t sz_min, size_t sz_max) } if (xallocx(s, sz+1, 0, 0) == sz) { - void *junked = (void *)s; + watch_junking(s); s = (char *)rallocx(s, sz+1, 0); assert_ptr_not_null((void *)s, "Unexpected rallocx() failure"); - assert_ptr_eq(most_recently_junked, junked, + assert_true(saw_junking, "Expected region of size %zu to be junk-filled", sz); } } + watch_junking(s); dallocx(s, 0); - assert_ptr_eq(most_recently_junked, (void *)s, + assert_true(saw_junking, "Expected region of size %zu to be junk-filled", sz); arena_dalloc_junk_small = arena_dalloc_junk_small_orig; From 3a8b9b1fd95b1bb9b3dc00f6798eeb40d5100b7b Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 8 Oct 2014 00:54:16 -0700 Subject: [PATCH 0562/3378] Fix a recursive lock acquisition regression. Fix a recursive lock acquisition regression, which was introduced by 8bb3198f72fc7587dc93527f9f19fb5be52fa553 (Refactor/fix arenas manipulation.). --- src/jemalloc.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/src/jemalloc.c b/src/jemalloc.c index 38b5aaf7..c62d8ce6 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -244,13 +244,11 @@ a0free(void *ptr) } /* Create a new arena and insert it into the arenas array at index ind. */ -arena_t * -arena_init(unsigned ind) +static arena_t * +arena_init_locked(unsigned ind) { arena_t *arena; - malloc_mutex_lock(&arenas_lock); - /* Expand arenas if necessary. */ assert(ind <= narenas_total); if (ind == narenas_total) { @@ -258,10 +256,8 @@ arena_init(unsigned ind) arena_t **arenas_new = (arena_t **)a0malloc(CACHELINE_CEILING(narenas_new * sizeof(arena_t *))); - if (arenas_new == NULL) { - arena = NULL; - goto label_return; - } + if (arenas_new == NULL) + return (NULL); memcpy(arenas_new, arenas, narenas_total * sizeof(arena_t *)); arenas_new[ind] = NULL; /* @@ -281,12 +277,21 @@ arena_init(unsigned ind) arena = arenas[ind]; if (arena != NULL) { assert(ind < narenas_auto); - goto label_return; + return (arena); } /* Actually initialize the arena. */ arena = arenas[ind] = arena_new(ind); -label_return: + return (arena); +} + +arena_t * +arena_init(unsigned ind) +{ + arena_t *arena; + + malloc_mutex_lock(&arenas_lock); + arena = arena_init_locked(ind); malloc_mutex_unlock(&arenas_lock); return (arena); } @@ -477,7 +482,7 @@ arena_choose_hard(tsd_t *tsd) } else { /* Initialize a new arena. */ choose = first_null; - ret = arena_init(choose); + ret = arena_init_locked(choose); if (ret == NULL) { malloc_mutex_unlock(&arenas_lock); return (NULL); From 57efa7bb0e284805c940472190bc9924327635a1 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 8 Oct 2014 17:57:19 -0700 Subject: [PATCH 0563/3378] Avoid atexit(3) when possible, disable prof_final by default. atexit(3) can deadlock internally during its own initialization if jemalloc calls atexit() during jemalloc initialization. Mitigate the impact by restructuring prof initialization to avoid calling atexit() unless the registered function will actually dump a final heap profile. Additionally, disable prof_final by default so that this land mine is opt-in rather than opt-out. This resolves #144. --- doc/jemalloc.xml.in | 18 +++++++++++++++--- src/prof.c | 17 +++++++++-------- test/unit/prof_active.c | 2 +- test/unit/prof_thread_name.c | 3 +-- 4 files changed, 26 insertions(+), 14 deletions(-) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 1f692f78..7da1498a 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -857,8 +857,14 @@ for (i = 0; i < nbins; i++) { is specified during configuration, this has the potential to cause deadlock for a multi-threaded process that exits while one or more threads are executing in the memory allocation - functions. Therefore, this option should only be used with care; it is - primarily intended as a performance tuning aid during application + functions. Furthermore, atexit may + allocate memory during application initialization and then deadlock + internally when jemalloc in turn calls + atexit, so this option is not + univerally usable (though the application can register its own + atexit function with equivalent + functionality). Therefore, this option should only be used with care; + it is primarily intended as a performance tuning aid during application development. This option is disabled by default. @@ -1155,7 +1161,13 @@ malloc_conf = "xmalloc:true";]]> <prefix>.<pid>.<seq>.f.heap, where <prefix> is controlled by the opt.prof_prefix - option. This option is enabled by default. + option. Note that atexit may allocate + memory during application initialization and then deadlock internally + when jemalloc in turn calls atexit, so + this option is not univerally usable (though the application can + register its own atexit function with + equivalent functionality). This option is disabled by + default. diff --git a/src/prof.c b/src/prof.c index b3150a27..3e2e4277 100644 --- a/src/prof.c +++ b/src/prof.c @@ -20,7 +20,7 @@ bool opt_prof_thread_active_init = true; size_t opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT; ssize_t opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT; bool opt_prof_gdump = false; -bool opt_prof_final = true; +bool opt_prof_final = false; bool opt_prof_leak = false; bool opt_prof_accum = false; char opt_prof_prefix[ @@ -1487,17 +1487,17 @@ prof_fdump(void) char filename[DUMP_FILENAME_BUFSIZE]; cassert(config_prof); + assert(opt_prof_final); + assert(opt_prof_prefix[0] != '\0'); if (!prof_booted) return; tsd = tsd_fetch(); - if (opt_prof_final && opt_prof_prefix[0] != '\0') { - malloc_mutex_lock(&prof_dump_seq_mtx); - prof_dump_filename(filename, 'f', VSEQ_INVALID); - malloc_mutex_unlock(&prof_dump_seq_mtx); - prof_dump(tsd, false, filename, opt_prof_leak); - } + malloc_mutex_lock(&prof_dump_seq_mtx); + prof_dump_filename(filename, 'f', VSEQ_INVALID); + malloc_mutex_unlock(&prof_dump_seq_mtx); + prof_dump(tsd, false, filename, opt_prof_leak); } void @@ -2023,7 +2023,8 @@ prof_boot2(void) if (malloc_mutex_init(&prof_dump_mtx)) return (true); - if (atexit(prof_fdump) != 0) { + if (opt_prof_final && opt_prof_prefix[0] != '\0' && + atexit(prof_fdump) != 0) { malloc_write(": Error in atexit()\n"); if (opt_abort) abort(); diff --git a/test/unit/prof_active.c b/test/unit/prof_active.c index d4bab8d0..81490957 100644 --- a/test/unit/prof_active.c +++ b/test/unit/prof_active.c @@ -2,7 +2,7 @@ #ifdef JEMALLOC_PROF const char *malloc_conf = - "prof:true,prof_thread_active_init:false,lg_prof_sample:0,prof_final:false"; + "prof:true,prof_thread_active_init:false,lg_prof_sample:0"; #endif static void diff --git a/test/unit/prof_thread_name.c b/test/unit/prof_thread_name.c index 6066dba7..f501158d 100644 --- a/test/unit/prof_thread_name.c +++ b/test/unit/prof_thread_name.c @@ -1,8 +1,7 @@ #include "test/jemalloc_test.h" #ifdef JEMALLOC_PROF -const char *malloc_conf = - "prof:true,prof_active:false,prof_final:false"; +const char *malloc_conf = "prof:true,prof_active:false"; #endif static void From b123ddc760e5b53dde17c6a19a130173067c0e30 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 8 Oct 2014 18:18:03 -0700 Subject: [PATCH 0564/3378] Don't configure HAVE_SSE2. Don't configure HAVE_SSE2 (on behalf of SFMT), because its dependencies are notoriously unportable in practice. This resolves #119. --- configure.ac | 10 ---------- test/include/test/jemalloc_test_defs.h.in | 5 ++++- 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/configure.ac b/configure.ac index e4afe889..1d79ded6 100644 --- a/configure.ac +++ b/configure.ac @@ -206,8 +206,6 @@ AC_CANONICAL_HOST dnl CPU-specific settings. CPU_SPINWAIT="" case "${host_cpu}" in - i[[345]]86) - ;; i686|x86_64) JE_COMPILABLE([pause instruction], [], [[__asm__ volatile("pause"); return 0;]], @@ -215,14 +213,6 @@ case "${host_cpu}" in if test "x${je_cv_pause}" = "xyes" ; then CPU_SPINWAIT='__asm__ volatile("pause")' fi - dnl emmintrin.h fails to compile unless MMX, SSE, and SSE2 are - dnl supported. - JE_COMPILABLE([SSE2 intrinsics], [ -#include -], [], [je_cv_sse2]) - if test "x${je_cv_sse2}" = "xyes" ; then - AC_DEFINE_UNQUOTED([HAVE_SSE2], [ ]) - fi ;; powerpc) AC_DEFINE_UNQUOTED([HAVE_ALTIVEC], [ ]) diff --git a/test/include/test/jemalloc_test_defs.h.in b/test/include/test/jemalloc_test_defs.h.in index aaaaec14..5cc8532a 100644 --- a/test/include/test/jemalloc_test_defs.h.in +++ b/test/include/test/jemalloc_test_defs.h.in @@ -1,6 +1,9 @@ #include "jemalloc/internal/jemalloc_internal_defs.h" #include "jemalloc/internal/jemalloc_internal_decls.h" -/* For use by SFMT. */ +/* + * For use by SFMT. configure.ac doesn't actually define HAVE_SSE2 because its + * dependencies are notoriously unportable in practice. + */ #undef HAVE_SSE2 #undef HAVE_ALTIVEC From fc0b3b7383373d66cfed2cd4e2faa272a6868d32 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 9 Oct 2014 17:54:06 -0700 Subject: [PATCH 0565/3378] Add configure options. Add: --with-lg-page --with-lg-page-sizes --with-lg-size-class-group --with-lg-quantum Get rid of STATIC_PAGE_SHIFT, in favor of directly setting LG_PAGE. Fix various edge conditions exposed by the configure options. --- INSTALL | 67 +++++++++++++++++++ Makefile.in | 1 + configure.ac | 47 ++++++++++--- include/jemalloc/internal/arena.h | 8 ++- include/jemalloc/internal/huge.h | 9 +-- .../jemalloc/internal/jemalloc_internal.h.in | 28 ++++---- .../internal/jemalloc_internal_defs.h.in | 10 ++- include/jemalloc/internal/private_symbols.txt | 4 +- include/jemalloc/internal/size_classes.sh | 12 +++- include/jemalloc/internal/tcache.h | 4 +- src/arena.c | 42 +++++++++--- src/huge.c | 52 +++++++------- src/jemalloc.c | 50 ++++++++------ src/tcache.c | 53 ++++----------- test/unit/lg_chunk.c | 26 +++++++ test/unit/mallctl.c | 2 +- 16 files changed, 278 insertions(+), 137 deletions(-) create mode 100644 test/unit/lg_chunk.c diff --git a/INSTALL b/INSTALL index 9af23369..73bf7185 100644 --- a/INSTALL +++ b/INSTALL @@ -189,6 +189,73 @@ any of the following arguments (not a definitive list) to 'configure': Specify where to find DocBook XSL stylesheets when building the documentation. +--with-lg-page= + Specify the base 2 log of the system page size. This option is only useful + when cross compiling, since the configure script automatically determines the + host's page size by default. + +--with-lg-page-sizes= + Specify the comma-separated base 2 logs of the page sizes to support. This + option may be useful when cross-compiling in combination with + --with-lg-page, but its primary use case is for integration with FreeBSD's + libc, wherein jemalloc is embedded. + +--with-lg-size-class-group= + Specify the base 2 log of how many size classes to use for each doubling in + size. By default jemalloc uses =2, which results in + e.g. the following size classes: + + [...], 64, + 80, 96, 112, 128, + 160, [...] + + =3 results in e.g. the following size classes: + + [...], 64, + 72, 80, 88, 96, 104, 112, 120, 128, + 144, [...] + + The minimal =0 causes jemalloc to only provide size + classes that are powers of 2: + + [...], + 64, + 128, + 256, + [...] + + An implementation detail currently limits the total number of small size + classes to 255, and a compilation error will result if the + you specify cannot be supported. The limit is + roughly =4, depending on page size. + +--with-lg-quantum= + Specify the base 2 log of the minimum allocation alignment (only + =3 and =4 are supported). jemalloc needs to know + the minimum alignment that meets the following C standard requirement + (quoted from the April 12, 2011 draft of the C11 standard): + + The pointer returned if the allocation succeeds is suitably aligned so + that it may be assigned to a pointer to any type of object with a + fundamental alignment requirement and then used to access such an object + or an array of such objects in the space allocated [...] + + This setting is architecture-specific, and although jemalloc includes known + safe values for the most commonly used modern architectures, there is a + wrinkle related to GNU libc (glibc) that may impact your choice of + . On most modern architectures, this mandates 16-byte alignment + (=4), but the glibc developers chose not to meet this requirement + for performance reasons. An old discussion can be found at + https://sourceware.org/bugzilla/show_bug.cgi?id=206 . Unlike glibc, + jemalloc does follow the C standard by default (caveat: jemalloc technically + cheats by only providing 8-byte alignment for 8-byte allocation requests), + but the fact that Linux systems already work around this allocator + noncompliance means that it is generally safe in practice to let jemalloc's + minimum alignment follow glibc's lead. If you specify --with-lg-quantum=3 + during configuration, jemalloc will provide additional size classes that + are not 16-byte-aligned (24, 40, and 56, assuming + --with-lg-size-class-group=2). + The following environment variables (not a definitive list) impact configure's behavior: diff --git a/Makefile.in b/Makefile.in index 50f6596a..40644ce8 100644 --- a/Makefile.in +++ b/Makefile.in @@ -118,6 +118,7 @@ TESTS_UNIT := $(srcroot)test/unit/atomic.c \ $(srcroot)test/unit/ckh.c \ $(srcroot)test/unit/hash.c \ $(srcroot)test/unit/junk.c \ + $(srcroot)test/unit/lg_chunk.c \ $(srcroot)test/unit/mallctl.c \ $(srcroot)test/unit/math.c \ $(srcroot)test/unit/mq.c \ diff --git a/configure.ac b/configure.ac index 1d79ded6..f8c09c46 100644 --- a/configure.ac +++ b/configure.ac @@ -969,8 +969,17 @@ else fi fi -AC_CACHE_CHECK([STATIC_PAGE_SHIFT], - [je_cv_static_page_shift], +AC_ARG_WITH([lg_quantum], + [AS_HELP_STRING([--with-lg-quantum=], + [Base 2 log of minimum allocation alignment])], + [AC_DEFINE_UNQUOTED([LG_QUANTUM], [$with_lg_quantum])]) + +AC_ARG_WITH([lg_page], + [AS_HELP_STRING([--with-lg-page=], [Base 2 log of system page size])], + [LG_PAGE="$with_lg_page"], [LG_PAGE="detect"]) +if test "x$LG_PAGE" == "xdetect"; then + AC_CACHE_CHECK([LG_PAGE], + [je_cv_lg_page], AC_RUN_IFELSE([AC_LANG_PROGRAM( [[ #include @@ -1006,15 +1015,29 @@ AC_CACHE_CHECK([STATIC_PAGE_SHIFT], return 0; ]])], - [je_cv_static_page_shift=`cat conftest.out`], - [je_cv_static_page_shift=undefined], - [je_cv_static_page_shift=12])) - -if test "x$je_cv_static_page_shift" != "xundefined"; then - AC_DEFINE_UNQUOTED([STATIC_PAGE_SHIFT], [$je_cv_static_page_shift]) -else - AC_MSG_ERROR([cannot determine value for STATIC_PAGE_SHIFT]) + [je_cv_lg_page=`cat conftest.out`], + [je_cv_lg_page=undefined], + [je_cv_lg_page=12])) fi +if test "x${je_cv_lg_page}" != "x" ; then + LG_PAGE="${je_cv_lg_page}" +fi +if test "x${LG_PAGE}" != "xundefined" ; then + AC_DEFINE_UNQUOTED([LG_PAGE], [$LG_PAGE]) +else + AC_MSG_ERROR([cannot determine value for LG_PAGE]) +fi + +AC_ARG_WITH([lg_page_sizes], + [AS_HELP_STRING([--with-lg-page-sizes=], + [Base 2 logs of system page sizes to support])], + [LG_PAGE_SIZES="$with_lg_page_sizes"], [LG_PAGE_SIZES="$LG_PAGE"]) + +AC_ARG_WITH([lg_size_class_group], + [AS_HELP_STRING([--with-lg-size-class-group=], + [Base 2 log of size classes per doubling])], + [LG_SIZE_CLASS_GROUP="$with_lg_size_class_group"], + [LG_SIZE_CLASS_GROUP="2"]) dnl ============================================================================ dnl jemalloc configuration. @@ -1456,10 +1479,12 @@ AC_CONFIG_COMMANDS([include/jemalloc/internal/public_unnamespace.h], [ ]) AC_CONFIG_COMMANDS([include/jemalloc/internal/size_classes.h], [ mkdir -p "${objroot}include/jemalloc/internal" - "${srcdir}/include/jemalloc/internal/size_classes.sh" > "${objroot}include/jemalloc/internal/size_classes.h" + "${srcdir}/include/jemalloc/internal/size_classes.sh" ${LG_PAGE_SIZES} ${LG_SIZE_CLASS_GROUP} > "${objroot}include/jemalloc/internal/size_classes.h" ], [ srcdir="${srcdir}" objroot="${objroot}" + LG_PAGE_SIZES=${LG_PAGE_SIZES} + LG_SIZE_CLASS_GROUP=${LG_SIZE_CLASS_GROUP} ]) AC_CONFIG_COMMANDS([include/jemalloc/jemalloc_protos_jet.h], [ mkdir -p "${objroot}include/jemalloc" diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 894ce9af..f5b9fc62 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -362,8 +362,8 @@ void *arena_malloc_small(arena_t *arena, size_t size, bool zero); void *arena_malloc_large(arena_t *arena, size_t size, bool zero); void *arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero); void arena_prof_promoted(const void *ptr, size_t size); -void arena_dalloc_bin_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr, - arena_chunk_map_bits_t *bitselm); +void arena_dalloc_bin_junked_locked(arena_t *arena, arena_chunk_t *chunk, + void *ptr, arena_chunk_map_bits_t *bitselm); void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, size_t pageind, arena_chunk_map_bits_t *bitselm); void arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr, @@ -371,8 +371,10 @@ void arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr, #ifdef JEMALLOC_JET typedef void (arena_dalloc_junk_large_t)(void *, size_t); extern arena_dalloc_junk_large_t *arena_dalloc_junk_large; +#else +void arena_dalloc_junk_large(void *ptr, size_t usize); #endif -void arena_dalloc_large_locked(arena_t *arena, arena_chunk_t *chunk, +void arena_dalloc_large_junked_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr); void arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr); #ifdef JEMALLOC_JET diff --git a/include/jemalloc/internal/huge.h b/include/jemalloc/internal/huge.h index 5d4d3a16..39d8aa50 100644 --- a/include/jemalloc/internal/huge.h +++ b/include/jemalloc/internal/huge.h @@ -9,19 +9,20 @@ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -void *huge_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero); +void *huge_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, + bool try_tcache); void *huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, - bool zero); + bool zero, bool try_tcache); bool huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, bool zero); void *huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size, size_t extra, size_t alignment, bool zero, - bool try_tcache_dalloc); + bool try_tcache_alloc, bool try_tcache_dalloc); #ifdef JEMALLOC_JET typedef void (huge_dalloc_junk_t)(void *, size_t); extern huge_dalloc_junk_t *huge_dalloc_junk; #endif -void huge_dalloc(tsd_t *tsd, void *ptr); +void huge_dalloc(tsd_t *tsd, void *ptr, bool try_tcache); size_t huge_salloc(const void *ptr); prof_tctx_t *huge_prof_tctx_get(const void *ptr); void huge_prof_tctx_set(const void *ptr, prof_tctx_t *tctx); diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index f4d5de6a..3f65fad0 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -185,7 +185,7 @@ typedef unsigned index_t; #define TINY_MIN (1U << LG_TINY_MIN) /* - * Minimum alignment of allocations is 2^LG_QUANTUM bytes (ignoring tiny size + * Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size * classes). */ #ifndef LG_QUANTUM @@ -235,7 +235,8 @@ typedef unsigned index_t; # define LG_QUANTUM 4 # endif # ifndef LG_QUANTUM -# error "No LG_QUANTUM definition for architecture; specify via CPPFLAGS" +# error "Unknown minimum alignment for architecture; specify via " + "--with-lg-quantum" # endif #endif @@ -275,12 +276,11 @@ typedef unsigned index_t; #define CACHELINE_CEILING(s) \ (((s) + CACHELINE_MASK) & ~CACHELINE_MASK) -/* Page size. STATIC_PAGE_SHIFT is determined by the configure script. */ +/* Page size. LG_PAGE is determined by the configure script. */ #ifdef PAGE_MASK # undef PAGE_MASK #endif -#define LG_PAGE STATIC_PAGE_SHIFT -#define PAGE ((size_t)(1U << STATIC_PAGE_SHIFT)) +#define PAGE ((size_t)(1U << LG_PAGE)) #define PAGE_MASK ((size_t)(PAGE - 1)) /* Return the smallest pagesize multiple that is >= s. */ @@ -809,7 +809,7 @@ imalloct(tsd_t *tsd, size_t size, bool try_tcache, arena_t *arena) if (size <= arena_maxclass) return (arena_malloc(tsd, arena, size, false, try_tcache)); else - return (huge_malloc(tsd, arena, size, false)); + return (huge_malloc(tsd, arena, size, false, try_tcache)); } JEMALLOC_ALWAYS_INLINE void * @@ -826,7 +826,7 @@ icalloct(tsd_t *tsd, size_t size, bool try_tcache, arena_t *arena) if (size <= arena_maxclass) return (arena_malloc(tsd, arena, size, true, try_tcache)); else - return (huge_malloc(tsd, arena, size, true)); + return (huge_malloc(tsd, arena, size, true, try_tcache)); } JEMALLOC_ALWAYS_INLINE void * @@ -854,9 +854,11 @@ ipalloct(tsd_t *tsd, size_t usize, size_t alignment, bool zero, bool try_tcache, return (NULL); ret = arena_palloc(arena, usize, alignment, zero); } else if (alignment <= chunksize) - ret = huge_malloc(tsd, arena, usize, zero); - else - ret = huge_palloc(tsd, arena, usize, alignment, zero); + ret = huge_malloc(tsd, arena, usize, zero, try_tcache); + else { + ret = huge_palloc(tsd, arena, usize, alignment, zero, + try_tcache); + } } assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret); @@ -938,7 +940,7 @@ idalloct(tsd_t *tsd, void *ptr, bool try_tcache) if (chunk != ptr) arena_dalloc(tsd, chunk, ptr, try_tcache); else - huge_dalloc(tsd, ptr); + huge_dalloc(tsd, ptr, try_tcache); } JEMALLOC_ALWAYS_INLINE void @@ -952,7 +954,7 @@ isdalloct(tsd_t *tsd, void *ptr, size_t size, bool try_tcache) if (chunk != ptr) arena_sdalloc(tsd, chunk, ptr, size, try_tcache); else - huge_dalloc(tsd, ptr); + huge_dalloc(tsd, ptr, try_tcache); } JEMALLOC_ALWAYS_INLINE void @@ -1042,7 +1044,7 @@ iralloct(tsd_t *tsd, void *ptr, size_t size, size_t alignment, bool zero, alignment, zero, try_tcache_alloc, try_tcache_dalloc)); } else { return (huge_ralloc(tsd, arena, ptr, oldsize, size, 0, - alignment, zero, try_tcache_dalloc)); + alignment, zero, try_tcache_alloc, try_tcache_dalloc)); } } diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in index fd85e5cf..0ff939c6 100644 --- a/include/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in @@ -144,8 +144,14 @@ /* Support lazy locking (avoid locking unless a second thread is launched). */ #undef JEMALLOC_LAZY_LOCK -/* One page is 2^STATIC_PAGE_SHIFT bytes. */ -#undef STATIC_PAGE_SHIFT +/* + * Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size + * classes). + */ +#undef LG_QUANTUM + +/* One page is 2^LG_PAGE bytes. */ +#undef LG_PAGE /* * If defined, use munmap() to unmap freed chunks, rather than storing them for diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index d5e6fdcf..66d48221 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -16,11 +16,11 @@ arena_chunk_dalloc_huge arena_cleanup arena_dalloc arena_dalloc_bin -arena_dalloc_bin_locked +arena_dalloc_bin_junked_locked arena_dalloc_junk_large arena_dalloc_junk_small arena_dalloc_large -arena_dalloc_large_locked +arena_dalloc_large_junked_locked arena_dalloc_small arena_dss_prec_get arena_dss_prec_set diff --git a/include/jemalloc/internal/size_classes.sh b/include/jemalloc/internal/size_classes.sh index 897570cc..733338c5 100755 --- a/include/jemalloc/internal/size_classes.sh +++ b/include/jemalloc/internal/size_classes.sh @@ -1,4 +1,6 @@ #!/bin/sh +# +# Usage: size_classes.sh # The following limits are chosen such that they cover all supported platforms. @@ -15,10 +17,10 @@ lg_tmin=3 lg_kmax=12 # Page sizes. -lg_parr="12 13 16" +lg_parr=`echo $1 | tr ',' ' '` # Size class group size (number of size classes for each size doubling). -lg_g=2 +lg_g=$2 pow2() { e=$1 @@ -159,7 +161,11 @@ size_classes() { nbins=$((${index} + 1)) # Final written value is correct: small_maxclass="((((size_t)1) << ${lg_grp}) + (((size_t)${ndelta}) << ${lg_delta}))" - lg_large_minclass=$((${lg_grp} + 1)) + if [ ${lg_g} -gt 0 ] ; then + lg_large_minclass=$((${lg_grp} + 1)) + else + lg_large_minclass=$((${lg_grp} + 2)) + fi fi index=$((${index} + 1)) ndelta=$((${ndelta} + 1)) diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index 02eec5db..fe9c47e8 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -112,7 +112,7 @@ void tcache_arena_associate(tcache_t *tcache, arena_t *arena); void tcache_arena_reassociate(tcache_t *tcache, arena_t *arena); void tcache_arena_dissociate(tcache_t *tcache); tcache_t *tcache_get_hard(tsd_t *tsd); -tcache_t *tcache_create(arena_t *arena); +tcache_t *tcache_create(tsd_t *tsd, arena_t *arena); void tcache_cleanup(tsd_t *tsd); void tcache_enabled_cleanup(tsd_t *tsd); void tcache_stats_merge(tcache_t *tcache, arena_t *arena); @@ -363,7 +363,7 @@ tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size) binind = size2index(size); if (config_fill && unlikely(opt_junk)) - memset(ptr, 0x5a, size); + arena_dalloc_junk_large(ptr, size); tbin = &tcache->tbins[binind]; tbin_info = &tcache_bin_info[binind]; diff --git a/src/arena.c b/src/arena.c index 86e54404..bbe58fa6 100644 --- a/src/arena.c +++ b/src/arena.c @@ -623,7 +623,7 @@ arena_run_alloc_large(arena_t *arena, size_t size, bool zero) arena_chunk_t *chunk; arena_run_t *run; - assert(size <= arena_maxclass); + assert(size <= arena_maxrun); assert((size & PAGE_MASK) == 0); /* Search the arena's chunks for the lowest best fit. */ @@ -673,7 +673,7 @@ arena_run_alloc_small(arena_t *arena, size_t size, index_t binind) arena_chunk_t *chunk; arena_run_t *run; - assert(size <= arena_maxclass); + assert(size <= arena_maxrun); assert((size & PAGE_MASK) == 0); assert(binind != BININD_INVALID); @@ -1728,9 +1728,9 @@ arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, arena_bin_runs_insert(bin, run); } -void -arena_dalloc_bin_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr, - arena_chunk_map_bits_t *bitselm) +static void +arena_dalloc_bin_locked_impl(arena_t *arena, arena_chunk_t *chunk, void *ptr, + arena_chunk_map_bits_t *bitselm, bool junked) { size_t pageind, rpages_ind; arena_run_t *run; @@ -1749,7 +1749,7 @@ arena_dalloc_bin_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr, if (config_fill || config_stats) size = bin_info->reg_size; - if (config_fill && unlikely(opt_junk)) + if (!junked && config_fill && unlikely(opt_junk)) arena_dalloc_junk_small(ptr, bin_info); arena_run_reg_dalloc(run, ptr); @@ -1765,6 +1765,14 @@ arena_dalloc_bin_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr, } } +void +arena_dalloc_bin_junked_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr, + arena_chunk_map_bits_t *bitselm) +{ + + arena_dalloc_bin_locked_impl(arena, chunk, ptr, bitselm, true); +} + void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, size_t pageind, arena_chunk_map_bits_t *bitselm) @@ -1777,7 +1785,7 @@ arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, run = &arena_miscelm_get(chunk, rpages_ind)->run; bin = run->bin; malloc_mutex_lock(&bin->lock); - arena_dalloc_bin_locked(arena, chunk, ptr, bitselm); + arena_dalloc_bin_locked_impl(arena, chunk, ptr, bitselm, false); malloc_mutex_unlock(&bin->lock); } @@ -1800,7 +1808,7 @@ arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr, #undef arena_dalloc_junk_large #define arena_dalloc_junk_large JEMALLOC_N(arena_dalloc_junk_large_impl) #endif -static void +void arena_dalloc_junk_large(void *ptr, size_t usize) { @@ -1815,7 +1823,8 @@ arena_dalloc_junk_large_t *arena_dalloc_junk_large = #endif void -arena_dalloc_large_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr) +arena_dalloc_large_locked_impl(arena_t *arena, arena_chunk_t *chunk, + void *ptr, bool junked) { size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; arena_chunk_map_misc_t *miscelm = arena_miscelm_get(chunk, pageind); @@ -1824,7 +1833,8 @@ arena_dalloc_large_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr) if (config_fill || config_stats) { size_t usize = arena_mapbits_large_size_get(chunk, pageind); - arena_dalloc_junk_large(ptr, usize); + if (!junked) + arena_dalloc_junk_large(ptr, usize); if (config_stats) { index_t index = size2index(usize) - NBINS; @@ -1838,12 +1848,20 @@ arena_dalloc_large_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr) arena_run_dalloc(arena, run, true, false); } +void +arena_dalloc_large_junked_locked(arena_t *arena, arena_chunk_t *chunk, + void *ptr) +{ + + arena_dalloc_large_locked_impl(arena, chunk, ptr, true); +} + void arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr) { malloc_mutex_lock(&arena->lock); - arena_dalloc_large_locked(arena, chunk, ptr); + arena_dalloc_large_locked_impl(arena, chunk, ptr, false); malloc_mutex_unlock(&arena->lock); } @@ -2398,6 +2416,7 @@ arena_boot(void) sizeof(arena_chunk_map_bits_t) * (chunk_npages-map_bias); arena_maxrun = chunksize - (map_bias << LG_PAGE); + assert(arena_maxrun > 0); arena_maxclass = index2size(size2index(chunksize)-1); if (arena_maxclass > arena_maxrun) { /* @@ -2407,6 +2426,7 @@ arena_boot(void) */ arena_maxclass = arena_maxrun; } + assert(arena_maxclass > 0); nlclasses = size2index(arena_maxclass) - size2index(SMALL_MAXCLASS); bin_info_init(); diff --git a/src/huge.c b/src/huge.c index 541df60a..6c9b97bb 100644 --- a/src/huge.c +++ b/src/huge.c @@ -13,7 +13,7 @@ static malloc_mutex_t huge_mtx; static extent_tree_t huge; void * -huge_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero) +huge_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, bool try_tcache) { size_t usize; @@ -23,12 +23,12 @@ huge_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero) return (NULL); } - return (huge_palloc(tsd, arena, usize, chunksize, zero)); + return (huge_palloc(tsd, arena, usize, chunksize, zero, try_tcache)); } void * huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, - bool zero) + bool zero, bool try_tcache) { void *ret; size_t csize; @@ -42,7 +42,7 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, /* Allocate an extent node with which to track the chunk. */ node = ipalloct(tsd, CACHELINE_CEILING(sizeof(extent_node_t)), - CACHELINE, false, tsd != NULL, NULL); + CACHELINE, false, try_tcache, NULL); if (node == NULL) return (NULL); @@ -58,7 +58,7 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, } ret = arena_chunk_alloc_huge(arena, NULL, csize, alignment, &is_zeroed); if (ret == NULL) { - idalloct(tsd, node, tsd != NULL); + idalloct(tsd, node, try_tcache); return (NULL); } @@ -122,6 +122,7 @@ huge_ralloc_no_move_expand(void *ptr, size_t oldsize, size_t size, bool zero) { expand_addr = ptr + CHUNK_CEILING(oldsize); expand_size = CHUNK_CEILING(usize) - CHUNK_CEILING(oldsize); + assert(expand_size > 0); malloc_mutex_lock(&huge_mtx); @@ -223,13 +224,8 @@ huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, return (false); } - if (CHUNK_CEILING(oldsize) >= CHUNK_CEILING(size) - && CHUNK_CEILING(oldsize) <= CHUNK_CEILING(size+extra)) { - return (false); - } - /* Shrink the allocation in-place. */ - if (CHUNK_CEILING(oldsize) > CHUNK_CEILING(usize)) { + if (CHUNK_CEILING(oldsize) >= CHUNK_CEILING(usize)) { extent_node_t *node, key; void *excess_addr; size_t excess_size; @@ -251,7 +247,10 @@ huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, /* Zap the excess chunks. */ huge_dalloc_junk(ptr + usize, oldsize - usize); - arena_chunk_dalloc_huge(node->arena, excess_addr, excess_size); + if (excess_size > 0) { + arena_chunk_dalloc_huge(node->arena, excess_addr, + excess_size); + } return (false); } @@ -269,7 +268,8 @@ huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, void * huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size, - size_t extra, size_t alignment, bool zero, bool try_tcache_dalloc) + size_t extra, size_t alignment, bool zero, bool try_tcache_alloc, + bool try_tcache_dalloc) { void *ret; size_t copysize; @@ -283,19 +283,25 @@ huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size, * different size class. In that case, fall back to allocating new * space and copying. */ - if (alignment > chunksize) - ret = huge_palloc(tsd, arena, size + extra, alignment, zero); - else - ret = huge_malloc(tsd, arena, size + extra, zero); + if (alignment > chunksize) { + ret = huge_palloc(tsd, arena, size + extra, alignment, zero, + try_tcache_alloc); + } else { + ret = huge_malloc(tsd, arena, size + extra, zero, + try_tcache_alloc); + } if (ret == NULL) { if (extra == 0) return (NULL); /* Try again, this time without extra. */ - if (alignment > chunksize) - ret = huge_palloc(tsd, arena, size, alignment, zero); - else - ret = huge_malloc(tsd, arena, size, zero); + if (alignment > chunksize) { + ret = huge_palloc(tsd, arena, size, alignment, zero, + try_tcache_alloc); + } else { + ret = huge_malloc(tsd, arena, size, zero, + try_tcache_alloc); + } if (ret == NULL) return (NULL); @@ -312,7 +318,7 @@ huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size, } void -huge_dalloc(tsd_t *tsd, void *ptr) +huge_dalloc(tsd_t *tsd, void *ptr, bool try_tcache) { extent_node_t *node, key; @@ -330,7 +336,7 @@ huge_dalloc(tsd_t *tsd, void *ptr) huge_dalloc_junk(node->addr, node->size); arena_chunk_dalloc_huge(node->arena, node->addr, CHUNK_CEILING(node->size)); - idalloct(tsd, node, tsd != NULL); + idalloct(tsd, node, try_tcache); } size_t diff --git a/src/jemalloc.c b/src/jemalloc.c index c62d8ce6..a862104a 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -67,6 +67,8 @@ const uint8_t size2index_tab[] = { #define S2B_7(i) S2B_6(i) S2B_6(i) #define S2B_8(i) S2B_7(i) S2B_7(i) #define S2B_9(i) S2B_8(i) S2B_8(i) +#define S2B_10(i) S2B_9(i) S2B_9(i) +#define S2B_11(i) S2B_10(i) S2B_10(i) #define S2B_no(i) #define SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup) \ S2B_##lg_delta_lookup(index) @@ -78,6 +80,8 @@ const uint8_t size2index_tab[] = { #undef S2B_7 #undef S2B_8 #undef S2B_9 +#undef S2B_10 +#undef S2B_11 #undef S2B_no #undef SC }; @@ -199,6 +203,7 @@ static void * a0alloc(size_t size, bool zero) { void *ret; + tsd_t *tsd; if (unlikely(malloc_init())) return (NULL); @@ -206,10 +211,11 @@ a0alloc(size_t size, bool zero) if (size == 0) size = 1; + tsd = tsd_fetch(); if (size <= arena_maxclass) - ret = arena_malloc(NULL, a0get(), size, zero, false); + ret = arena_malloc(tsd, a0get(), size, zero, false); else - ret = huge_malloc(NULL, a0get(), size, zero); + ret = huge_malloc(tsd, a0get(), size, zero, false); return (ret); } @@ -231,16 +237,18 @@ a0calloc(size_t num, size_t size) void a0free(void *ptr) { + tsd_t *tsd; arena_chunk_t *chunk; if (ptr == NULL) return; + tsd = tsd_fetch(); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk != ptr) - arena_dalloc(NULL, chunk, ptr, false); + arena_dalloc(tsd, chunk, ptr, false); else - huge_dalloc(NULL, ptr); + huge_dalloc(tsd, ptr, false); } /* Create a new arena and insert it into the arenas array at index ind. */ @@ -817,15 +825,15 @@ malloc_conf_init(void) "Invalid conf value", \ k, klen, v, vlen); \ } else if (clip) { \ - if (min != 0 && um < min) \ - o = min; \ - else if (um > max) \ - o = max; \ + if ((min) != 0 && um < (min)) \ + o = (min); \ + else if (um > (max)) \ + o = (max); \ else \ o = um; \ } else { \ - if ((min != 0 && um < min) || \ - um > max) { \ + if (((min) != 0 && um < (min)) \ + || um > (max)) { \ malloc_conf_error( \ "Out-of-range " \ "conf value", \ @@ -847,8 +855,8 @@ malloc_conf_init(void) malloc_conf_error( \ "Invalid conf value", \ k, klen, v, vlen); \ - } else if (l < (ssize_t)min || l > \ - (ssize_t)max) { \ + } else if (l < (ssize_t)(min) || l > \ + (ssize_t)(max)) { \ malloc_conf_error( \ "Out-of-range conf value", \ k, klen, v, vlen); \ @@ -868,15 +876,16 @@ malloc_conf_init(void) CONF_HANDLE_BOOL(opt_abort, "abort", true) /* - * Chunks always require at least one header page, plus - * one data page in the absence of redzones, or three - * pages in the presence of redzones. In order to - * simplify options processing, fix the limit based on - * config_fill. + * Chunks always require at least one header page, + * as many as 2^(LG_SIZE_CLASS_GROUP+1) data pages, and + * possibly an additional page in the presence of + * redzones. In order to simplify options processing, + * use a conservative bound that accommodates all these + * constraints. */ CONF_HANDLE_SIZE_T(opt_lg_chunk, "lg_chunk", LG_PAGE + - (config_fill ? 2 : 1), (sizeof(size_t) << 3) - 1, - true) + LG_SIZE_CLASS_GROUP + (config_fill ? 2 : 1), + (sizeof(size_t) << 3) - 1, true) if (strncmp("dss", k, klen) == 0) { int i; bool match = false; @@ -2088,8 +2097,7 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) if (unlikely((flags & MALLOCX_ARENA_MASK) != 0)) { unsigned arena_ind = MALLOCX_ARENA_GET(flags); - // XX Dangerous arenas read. - arena = arenas[arena_ind]; + arena = arena_get(tsd, arena_ind, true, true); } else arena = NULL; diff --git a/src/tcache.c b/src/tcache.c index 1bf70269..34224ec4 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -117,8 +117,8 @@ tcache_bin_flush_small(tcache_bin_t *tbin, index_t binind, unsigned rem, (uintptr_t)chunk) >> LG_PAGE; arena_chunk_map_bits_t *bitselm = arena_bitselm_get(chunk, pageind); - arena_dalloc_bin_locked(arena, chunk, ptr, - bitselm); + arena_dalloc_bin_junked_locked(arena, chunk, + ptr, bitselm); } else { /* * This object was allocated via a different @@ -193,9 +193,10 @@ tcache_bin_flush_large(tcache_bin_t *tbin, index_t binind, unsigned rem, ptr = tbin->avail[i]; assert(ptr != NULL); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (chunk->arena == arena) - arena_dalloc_large_locked(arena, chunk, ptr); - else { + if (chunk->arena == arena) { + arena_dalloc_large_junked_locked(arena, chunk, + ptr); + } else { /* * This object was allocated via a different * arena than the one that is currently locked. @@ -279,11 +280,11 @@ tcache_get_hard(tsd_t *tsd) arena = arena_choose(tsd, NULL); if (unlikely(arena == NULL)) return (NULL); - return (tcache_create(arena)); + return (tcache_create(tsd, arena)); } tcache_t * -tcache_create(arena_t *arena) +tcache_create(tsd_t *tsd, arena_t *arena) { tcache_t *tcache; size_t size, stack_offset; @@ -294,23 +295,10 @@ tcache_create(arena_t *arena) size = PTR_CEILING(size); stack_offset = size; size += stack_nelms * sizeof(void *); - /* - * Round up to the nearest multiple of the cacheline size, in order to - * avoid the possibility of false cacheline sharing. - * - * That this works relies on the same logic as in ipalloc(), but we - * cannot directly call ipalloc() here due to tcache bootstrapping - * issues. - */ - size = (size + CACHELINE_MASK) & (-CACHELINE); - - if (size <= SMALL_MAXCLASS) - tcache = (tcache_t *)arena_malloc_small(arena, size, true); - else if (size <= tcache_maxclass) - tcache = (tcache_t *)arena_malloc_large(arena, size, true); - else - tcache = (tcache_t *)icalloct(NULL, size, false, arena); + /* Avoid false cacheline sharing. */ + size = sa2u(size, CACHELINE); + tcache = ipalloct(tsd, size, CACHELINE, true, false, arena); if (tcache == NULL) return (NULL); @@ -331,7 +319,6 @@ static void tcache_destroy(tsd_t *tsd, tcache_t *tcache) { unsigned i; - size_t tcache_size; tcache_arena_dissociate(tcache); @@ -366,23 +353,7 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache) arena_prof_accum(tcache->arena, tcache->prof_accumbytes)) prof_idump(); - tcache_size = arena_salloc(tcache, false); - if (tcache_size <= SMALL_MAXCLASS) { - arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache); - arena_t *arena = chunk->arena; - size_t pageind = ((uintptr_t)tcache - (uintptr_t)chunk) >> - LG_PAGE; - arena_chunk_map_bits_t *bitselm = arena_bitselm_get(chunk, - pageind); - - arena_dalloc_bin(arena, chunk, tcache, pageind, bitselm); - } else if (tcache_size <= tcache_maxclass) { - arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache); - arena_t *arena = chunk->arena; - - arena_dalloc_large(arena, chunk, tcache); - } else - idalloct(tsd, tcache, false); + idalloct(tsd, tcache, false); } void diff --git a/test/unit/lg_chunk.c b/test/unit/lg_chunk.c new file mode 100644 index 00000000..7f0b31ce --- /dev/null +++ b/test/unit/lg_chunk.c @@ -0,0 +1,26 @@ +#include "test/jemalloc_test.h" + +/* + * Make sure that opt.lg_chunk clamping is sufficient. In practice, this test + * program will fail a debug assertion during initialization and abort (rather + * than the test soft-failing) if clamping is insufficient. + */ +const char *malloc_conf = "lg_chunk:0"; + +TEST_BEGIN(test_lg_chunk_clamp) +{ + void *p; + + p = mallocx(1, 0); + assert_ptr_not_null(p, "Unexpected mallocx() failure"); + dallocx(p, 0); +} +TEST_END + +int +main(void) +{ + + return (test( + test_lg_chunk_clamp)); +} diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c index e62e54f2..a8f7aed6 100644 --- a/test/unit/mallctl.c +++ b/test/unit/mallctl.c @@ -357,7 +357,7 @@ TEST_BEGIN(test_arenas_lrun_constants) assert_zu_eq(name, expected, "Incorrect "#name" size"); \ } while (0) - TEST_ARENAS_LRUN_CONSTANT(size_t, size, (1 << (LG_PAGE+2))); + TEST_ARENAS_LRUN_CONSTANT(size_t, size, LARGE_MINCLASS); #undef TEST_ARENAS_LRUN_CONSTANT } From 9b75677e538836b284a0d26a593963187c24a153 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 10 Oct 2014 18:19:20 -0700 Subject: [PATCH 0566/3378] Don't fetch tsd in a0{d,}alloc(). Don't fetch tsd in a0{d,}alloc(), because doing so can cause infinite recursion on systems that require an allocated tsd wrapper. --- src/jemalloc.c | 18 +++++++----------- test/unit/mq.c | 1 + 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/src/jemalloc.c b/src/jemalloc.c index a862104a..fc490eba 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -203,7 +203,6 @@ static void * a0alloc(size_t size, bool zero) { void *ret; - tsd_t *tsd; if (unlikely(malloc_init())) return (NULL); @@ -211,11 +210,10 @@ a0alloc(size_t size, bool zero) if (size == 0) size = 1; - tsd = tsd_fetch(); if (size <= arena_maxclass) - ret = arena_malloc(tsd, a0get(), size, zero, false); + ret = arena_malloc(NULL, a0get(), size, zero, false); else - ret = huge_malloc(tsd, a0get(), size, zero, false); + ret = huge_malloc(NULL, a0get(), size, zero, false); return (ret); } @@ -237,18 +235,16 @@ a0calloc(size_t num, size_t size) void a0free(void *ptr) { - tsd_t *tsd; arena_chunk_t *chunk; if (ptr == NULL) return; - tsd = tsd_fetch(); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk != ptr) - arena_dalloc(tsd, chunk, ptr, false); + arena_dalloc(NULL, chunk, ptr, false); else - huge_dalloc(tsd, ptr, false); + huge_dalloc(NULL, ptr, false); } /* Create a new arena and insert it into the arenas array at index ind. */ @@ -2301,9 +2297,9 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) * fork/malloc races via the following functions it registers during * initialization using pthread_atfork(), but of course that does no good if * the allocator isn't fully initialized at fork time. The following library - * constructor is a partial solution to this problem. It may still possible to - * trigger the deadlock described above, but doing so would involve forking via - * a library constructor that runs before jemalloc's runs. + * constructor is a partial solution to this problem. It may still be possible + * to trigger the deadlock described above, but doing so would involve forking + * via a library constructor that runs before jemalloc's runs. */ JEMALLOC_ATTR(constructor) static void diff --git a/test/unit/mq.c b/test/unit/mq.c index bd289c54..bde2a480 100644 --- a/test/unit/mq.c +++ b/test/unit/mq.c @@ -85,6 +85,7 @@ TEST_END int main(void) { + return (test( test_mq_basic, test_mq_threaded)); From 2eb941a3d3a69fa8a73902b29564294f854fc3b0 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 10 Oct 2014 20:40:43 -0700 Subject: [PATCH 0567/3378] Add AC_CACHE_CHECK() for pause instruction. This supports cross compilation. --- configure.ac | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/configure.ac b/configure.ac index f8c09c46..cc30da93 100644 --- a/configure.ac +++ b/configure.ac @@ -207,9 +207,10 @@ dnl CPU-specific settings. CPU_SPINWAIT="" case "${host_cpu}" in i686|x86_64) - JE_COMPILABLE([pause instruction], [], - [[__asm__ volatile("pause"); return 0;]], - [je_cv_pause]) + AC_CACHE_CHECK([whether pause instruction is compilable], [je_cv_pause], + [JE_COMPILABLE([pause instruction], [], + [[__asm__ volatile("pause"); return 0;]], + [je_cv_pause])]) if test "x${je_cv_pause}" = "xyes" ; then CPU_SPINWAIT='__asm__ volatile("pause")' fi From 81e547566e9bd55db7c317c5848ab9dc189047cb Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 10 Oct 2014 22:34:25 -0700 Subject: [PATCH 0568/3378] Add --with-lg-tiny-min, generalize --with-lg-quantum. --- INSTALL | 32 ++++++++--- configure.ac | 21 ++++++-- .../jemalloc/internal/jemalloc_internal.h.in | 1 - .../internal/jemalloc_internal_defs.h.in | 3 ++ include/jemalloc/internal/size_classes.sh | 10 ++-- src/jemalloc.c | 54 +++++++++++++++++++ 6 files changed, 105 insertions(+), 16 deletions(-) diff --git a/INSTALL b/INSTALL index 73bf7185..a00960aa 100644 --- a/INSTALL +++ b/INSTALL @@ -230,10 +230,9 @@ any of the following arguments (not a definitive list) to 'configure': roughly =4, depending on page size. --with-lg-quantum= - Specify the base 2 log of the minimum allocation alignment (only - =3 and =4 are supported). jemalloc needs to know - the minimum alignment that meets the following C standard requirement - (quoted from the April 12, 2011 draft of the C11 standard): + Specify the base 2 log of the minimum allocation alignment. jemalloc needs + to know the minimum alignment that meets the following C standard + requirement (quoted from the April 12, 2011 draft of the C11 standard): The pointer returned if the allocation succeeds is suitably aligned so that it may be assigned to a pointer to any type of object with a @@ -247,8 +246,8 @@ any of the following arguments (not a definitive list) to 'configure': (=4), but the glibc developers chose not to meet this requirement for performance reasons. An old discussion can be found at https://sourceware.org/bugzilla/show_bug.cgi?id=206 . Unlike glibc, - jemalloc does follow the C standard by default (caveat: jemalloc technically - cheats by only providing 8-byte alignment for 8-byte allocation requests), + jemalloc does follow the C standard by default (caveat: jemalloc + technically cheats if --with-lg-tiny-min is smaller than --with-lg-quantum), but the fact that Linux systems already work around this allocator noncompliance means that it is generally safe in practice to let jemalloc's minimum alignment follow glibc's lead. If you specify --with-lg-quantum=3 @@ -256,6 +255,27 @@ any of the following arguments (not a definitive list) to 'configure': are not 16-byte-aligned (24, 40, and 56, assuming --with-lg-size-class-group=2). +--with-lg-tiny-min= + Specify the base 2 log of the minimum tiny size class to support. Tiny + size classes are powers of 2 less than the quantum, and are only + incorporated if is less than (see + --with-lg-quantum). Tiny size classes technically violate the C standard + requirement for minimum alignment, and crashes could conceivably result if + the compiler were to generate instructions that made alignment assumptions, + both because illegal instruction traps could result, and because accesses + could straddle page boundaries and cause segmentation faults due to + accessing unmapped addresses. + + The default of =3 works well in practice even on architectures + that technically require 16-byte alignment, probably for the same reason + --with-lg-quantum=3 works. Smaller tiny size classes can, and will, cause + crashes (see https://bugzilla.mozilla.org/show_bug.cgi?id=691003 for an + example). + + This option is rarely useful, and is mainly provided as documentation of a + subtle implementation detail. If you do use this option, specify a + value in [3, ..., ]. + The following environment variables (not a definitive list) impact configure's behavior: diff --git a/configure.ac b/configure.ac index cc30da93..a7bf1039 100644 --- a/configure.ac +++ b/configure.ac @@ -207,7 +207,7 @@ dnl CPU-specific settings. CPU_SPINWAIT="" case "${host_cpu}" in i686|x86_64) - AC_CACHE_CHECK([whether pause instruction is compilable], [je_cv_pause], + AC_CACHE_VAL([je_cv_pause], [JE_COMPILABLE([pause instruction], [], [[__asm__ volatile("pause"); return 0;]], [je_cv_pause])]) @@ -970,10 +970,21 @@ else fi fi +AC_ARG_WITH([lg_tiny_min], + [AS_HELP_STRING([--with-lg-tiny-min=], + [Base 2 log of minimum tiny size class to support])], + [LG_TINY_MIN="$with_lg_tiny_min"], + [LG_TINY_MIN="3"]) +AC_DEFINE_UNQUOTED([LG_TINY_MIN], [$LG_TINY_MIN]) + AC_ARG_WITH([lg_quantum], [AS_HELP_STRING([--with-lg-quantum=], [Base 2 log of minimum allocation alignment])], - [AC_DEFINE_UNQUOTED([LG_QUANTUM], [$with_lg_quantum])]) + [LG_QUANTA="$with_lg_quantum"], + [LG_QUANTA="3 4"]) +if test "x$with_lg_quantum" != "x" ; then + AC_DEFINE_UNQUOTED([LG_QUANTUM], [$with_lg_quantum]) +fi AC_ARG_WITH([lg_page], [AS_HELP_STRING([--with-lg-page=], [Base 2 log of system page size])], @@ -1480,11 +1491,13 @@ AC_CONFIG_COMMANDS([include/jemalloc/internal/public_unnamespace.h], [ ]) AC_CONFIG_COMMANDS([include/jemalloc/internal/size_classes.h], [ mkdir -p "${objroot}include/jemalloc/internal" - "${srcdir}/include/jemalloc/internal/size_classes.sh" ${LG_PAGE_SIZES} ${LG_SIZE_CLASS_GROUP} > "${objroot}include/jemalloc/internal/size_classes.h" + "${srcdir}/include/jemalloc/internal/size_classes.sh" "${LG_QUANTA}" ${LG_TINY_MIN} "${LG_PAGE_SIZES}" ${LG_SIZE_CLASS_GROUP} > "${objroot}include/jemalloc/internal/size_classes.h" ], [ srcdir="${srcdir}" objroot="${objroot}" - LG_PAGE_SIZES=${LG_PAGE_SIZES} + LG_QUANTA="${LG_QUANTA}" + LG_TINY_MIN=${LG_TINY_MIN} + LG_PAGE_SIZES="${LG_PAGE_SIZES}" LG_SIZE_CLASS_GROUP=${LG_SIZE_CLASS_GROUP} ]) AC_CONFIG_COMMANDS([include/jemalloc/jemalloc_protos_jet.h], [ diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 3f65fad0..294e2cc1 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -181,7 +181,6 @@ typedef unsigned index_t; (((unsigned)(flags >> 8)) - 1) /* Smallest size class to support. */ -#define LG_TINY_MIN 3 #define TINY_MIN (1U << LG_TINY_MIN) /* diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in index 0ff939c6..dccbb1ed 100644 --- a/include/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in @@ -144,6 +144,9 @@ /* Support lazy locking (avoid locking unless a second thread is launched). */ #undef JEMALLOC_LAZY_LOCK +/* Minimum size class to support is 2^LG_TINY_MIN bytes. */ +#undef LG_TINY_MIN + /* * Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size * classes). diff --git a/include/jemalloc/internal/size_classes.sh b/include/jemalloc/internal/size_classes.sh index 733338c5..38020dc6 100755 --- a/include/jemalloc/internal/size_classes.sh +++ b/include/jemalloc/internal/size_classes.sh @@ -1,6 +1,6 @@ #!/bin/sh # -# Usage: size_classes.sh +# Usage: size_classes.sh # The following limits are chosen such that they cover all supported platforms. @@ -8,19 +8,19 @@ lg_zarr="2 3" # Quanta. -lg_qarr="3 4" +lg_qarr=$1 # The range of tiny size classes is [2^lg_tmin..2^(lg_q-1)]. -lg_tmin=3 +lg_tmin=$2 # Maximum lookup size. lg_kmax=12 # Page sizes. -lg_parr=`echo $1 | tr ',' ' '` +lg_parr=`echo $3 | tr ',' ' '` # Size class group size (number of size classes for each size doubling). -lg_g=$2 +lg_g=$4 pow2() { e=$1 diff --git a/src/jemalloc.c b/src/jemalloc.c index fc490eba..45439595 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -60,15 +60,69 @@ const size_t index2size_tab[NSIZES] = { JEMALLOC_ALIGNED(CACHELINE) const uint8_t size2index_tab[] = { +#if LG_TINY_MIN == 0 +#warning "Dangerous LG_TINY_MIN" +#define S2B_0(i) i, +#elif LG_TINY_MIN == 1 +#warning "Dangerous LG_TINY_MIN" +#define S2B_1(i) i, +#elif LG_TINY_MIN == 2 +#warning "Dangerous LG_TINY_MIN" +#define S2B_2(i) i, +#elif LG_TINY_MIN == 3 #define S2B_3(i) i, +#elif LG_TINY_MIN == 4 +#define S2B_4(i) i, +#elif LG_TINY_MIN == 5 +#define S2B_5(i) i, +#elif LG_TINY_MIN == 6 +#define S2B_6(i) i, +#elif LG_TINY_MIN == 7 +#define S2B_7(i) i, +#elif LG_TINY_MIN == 8 +#define S2B_8(i) i, +#elif LG_TINY_MIN == 9 +#define S2B_9(i) i, +#elif LG_TINY_MIN == 10 +#define S2B_10(i) i, +#elif LG_TINY_MIN == 11 +#define S2B_11(i) i, +#else +#error "Unsupported LG_TINY_MIN" +#endif +#if LG_TINY_MIN < 1 +#define S2B_1(i) S2B_0(i) S2B_0(i) +#endif +#if LG_TINY_MIN < 2 +#define S2B_2(i) S2B_1(i) S2B_1(i) +#endif +#if LG_TINY_MIN < 3 +#define S2B_3(i) S2B_2(i) S2B_2(i) +#endif +#if LG_TINY_MIN < 4 #define S2B_4(i) S2B_3(i) S2B_3(i) +#endif +#if LG_TINY_MIN < 5 #define S2B_5(i) S2B_4(i) S2B_4(i) +#endif +#if LG_TINY_MIN < 6 #define S2B_6(i) S2B_5(i) S2B_5(i) +#endif +#if LG_TINY_MIN < 7 #define S2B_7(i) S2B_6(i) S2B_6(i) +#endif +#if LG_TINY_MIN < 8 #define S2B_8(i) S2B_7(i) S2B_7(i) +#endif +#if LG_TINY_MIN < 9 #define S2B_9(i) S2B_8(i) S2B_8(i) +#endif +#if LG_TINY_MIN < 10 #define S2B_10(i) S2B_9(i) S2B_9(i) +#endif +#if LG_TINY_MIN < 11 #define S2B_11(i) S2B_10(i) S2B_10(i) +#endif #define S2B_no(i) #define SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup) \ S2B_##lg_delta_lookup(index) From 381c23dd9d3bf019cc4c7523a900be1e888802a7 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 10 Oct 2014 23:01:03 -0700 Subject: [PATCH 0569/3378] Remove arena_dalloc_bin_run() clean page preservation. Remove code in arena_dalloc_bin_run() that preserved the "clean" state of trailing clean pages by splitting them into a separate run during deallocation. This was a useful mechanism for reducing dirty page churn when bin runs comprised many pages, but bin runs are now quite small. Remove the nextind field from arena_run_t now that it is no longer needed, and change arena_run_t's bin field (arena_bin_t *) to binind (index_t). These two changes remove 8 bytes of chunk header overhead per page, which saves 1/512 of all arena chunk memory. --- include/jemalloc/internal/arena.h | 14 +++--- src/arena.c | 73 +++---------------------------- 2 files changed, 13 insertions(+), 74 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index f5b9fc62..28ff7271 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -36,11 +36,8 @@ typedef struct arena_s arena_t; #ifdef JEMALLOC_H_STRUCTS struct arena_run_s { - /* Bin this run is associated with. */ - arena_bin_t *bin; - - /* Index of next region that has never been allocated, or nregs. */ - uint32_t nextind; + /* Index of bin this run is associated with. */ + index_t binind; /* Number of free regions in run. */ unsigned nfree; @@ -756,7 +753,7 @@ arena_ptr_small_binind_get(const void *ptr, size_t mapbits) size_t rpages_ind; arena_run_t *run; arena_bin_t *bin; - index_t actual_binind; + index_t run_binind, actual_binind; arena_bin_info_t *bin_info; arena_chunk_map_misc_t *miscelm; void *rpages; @@ -774,9 +771,10 @@ arena_ptr_small_binind_get(const void *ptr, size_t mapbits) pageind); miscelm = arena_miscelm_get(chunk, rpages_ind); run = &miscelm->run; - bin = run->bin; + run_binind = run->binind; + bin = &arena->bins[run_binind]; actual_binind = bin - arena->bins; - assert(binind == actual_binind); + assert(run_binind == actual_binind); bin_info = &arena_bin_info[actual_binind]; rpages = arena_miscelm_to_rpages(miscelm); assert(((uintptr_t)ptr - ((uintptr_t)rpages + diff --git a/src/arena.c b/src/arena.c index bbe58fa6..8872331d 100644 --- a/src/arena.c +++ b/src/arena.c @@ -155,9 +155,6 @@ arena_run_reg_alloc(arena_run_t *run, arena_bin_info_t *bin_info) ret = (void *)((uintptr_t)rpages + (uintptr_t)bin_info->reg0_offset + (uintptr_t)(bin_info->reg_interval * regind)); run->nfree--; - if (regind == run->nextind) - run->nextind++; - assert(regind < run->nextind); return (ret); } @@ -361,26 +358,12 @@ arena_run_split_small(arena_t *arena, arena_run_t *run, size_t size, arena_run_split_remove(arena, chunk, run_ind, flag_dirty, need_pages); - /* - * Propagate the dirty and unzeroed flags to the allocated small run, - * so that arena_dalloc_bin_run() has the ability to conditionally trim - * clean pages. - */ - arena_mapbits_small_set(chunk, run_ind, 0, binind, flag_dirty); - if (config_debug && flag_dirty == 0 && arena_mapbits_unzeroed_get(chunk, - run_ind) == 0) - arena_run_page_validate_zeroed(chunk, run_ind); - for (i = 1; i < need_pages - 1; i++) { + for (i = 0; i < need_pages; i++) { arena_mapbits_small_set(chunk, run_ind+i, i, binind, 0); if (config_debug && flag_dirty == 0 && arena_mapbits_unzeroed_get(chunk, run_ind+i) == 0) arena_run_page_validate_zeroed(chunk, run_ind+i); } - arena_mapbits_small_set(chunk, run_ind+need_pages-1, need_pages-1, - binind, flag_dirty); - if (config_debug && flag_dirty == 0 && arena_mapbits_unzeroed_get(chunk, - run_ind+need_pages-1) == 0) - arena_run_page_validate_zeroed(chunk, run_ind+need_pages-1); JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk + (run_ind << LG_PAGE)), (need_pages << LG_PAGE)); } @@ -1002,8 +985,7 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned) arena_mapbits_large_size_get(chunk, run_ind+(size>>LG_PAGE)-1) == 0); } else { - index_t binind = arena_bin_index(arena, run->bin); - arena_bin_info_t *bin_info = &arena_bin_info[binind]; + arena_bin_info_t *bin_info = &arena_bin_info[run->binind]; size = bin_info->run_size; } run_pages = (size >> LG_PAGE); @@ -1199,8 +1181,7 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin) run = arena_run_alloc_small(arena, bin_info->run_size, binind); if (run != NULL) { /* Initialize run internals. */ - run->bin = bin; - run->nextind = 0; + run->binind = binind; run->nfree = bin_info->nregs; bitmap_init(run->bitmap, &bin_info->bitmap_info); } @@ -1652,54 +1633,15 @@ static void arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, arena_bin_t *bin) { - index_t binind; - arena_bin_info_t *bin_info; - size_t npages, run_ind, past; - arena_chunk_map_misc_t *miscelm; - void *rpages; assert(run != bin->runcur); assert(arena_run_tree_search(&bin->runs, arena_run_to_miscelm(run)) == NULL); - binind = arena_bin_index(chunk->arena, run->bin); - bin_info = &arena_bin_info[binind]; - malloc_mutex_unlock(&bin->lock); /******************************/ - npages = bin_info->run_size >> LG_PAGE; - miscelm = arena_run_to_miscelm(run); - run_ind = arena_miscelm_to_pageind(miscelm); - rpages = arena_miscelm_to_rpages(miscelm); - past = (size_t)(PAGE_CEILING((uintptr_t)rpages + - (uintptr_t)bin_info->reg0_offset + (uintptr_t)(run->nextind * - bin_info->reg_interval - bin_info->redzone_size) - - (uintptr_t)chunk) >> LG_PAGE); malloc_mutex_lock(&arena->lock); - - /* - * If the run was originally clean, and some pages were never touched, - * trim the clean pages before deallocating the dirty portion of the - * run. - */ - assert(arena_mapbits_dirty_get(chunk, run_ind) == - arena_mapbits_dirty_get(chunk, run_ind+npages-1)); - if (arena_mapbits_dirty_get(chunk, run_ind) == 0 && past - run_ind < - npages) { - /* Trim clean pages. Convert to large run beforehand. */ - assert(npages > 0); - if (past > run_ind) { - arena_mapbits_large_set(chunk, run_ind, - bin_info->run_size, 0); - arena_mapbits_large_set(chunk, run_ind+npages-1, 0, 0); - arena_run_trim_tail(arena, chunk, run, (npages << - LG_PAGE), ((past - run_ind) << LG_PAGE), false); - arena_run_dalloc(arena, run, true, false); - } else - arena_run_dalloc(arena, run, false, false); - /* npages = past - run_ind; */ - } else - arena_run_dalloc(arena, run, true, false); + arena_run_dalloc(arena, run, true, false); malloc_mutex_unlock(&arena->lock); /****************************/ malloc_mutex_lock(&bin->lock); @@ -1742,9 +1684,8 @@ arena_dalloc_bin_locked_impl(arena_t *arena, arena_chunk_t *chunk, void *ptr, pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; rpages_ind = pageind - arena_mapbits_small_runind_get(chunk, pageind); run = &arena_miscelm_get(chunk, rpages_ind)->run; - bin = run->bin; - binind = arena_ptr_small_binind_get(ptr, arena_mapbits_get(chunk, - pageind)); + binind = run->binind; + bin = &arena->bins[binind]; bin_info = &arena_bin_info[binind]; if (config_fill || config_stats) size = bin_info->reg_size; @@ -1783,7 +1724,7 @@ arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, rpages_ind = pageind - arena_mapbits_small_runind_get(chunk, pageind); run = &arena_miscelm_get(chunk, rpages_ind)->run; - bin = run->bin; + bin = &arena->bins[run->binind]; malloc_mutex_lock(&bin->lock); arena_dalloc_bin_locked_impl(arena, chunk, ptr, bitselm, false); malloc_mutex_unlock(&bin->lock); From 44c97b712ef1669a4c75ea97e8d47c0535e9ec71 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sun, 12 Oct 2014 13:03:20 -0700 Subject: [PATCH 0570/3378] Fix a prof_tctx_t/prof_tdata_t cleanup race. Fix a prof_tctx_t/prof_tdata_t cleanup race by storing a copy of thr_uid in prof_tctx_t, so that the associated tdata need not be present during tctx teardown. --- include/jemalloc/internal/prof.h | 6 ++++++ src/prof.c | 10 +++++----- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index c8014717..5103146b 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -89,6 +89,12 @@ struct prof_tctx_s { /* Thread data for thread that performed the allocation. */ prof_tdata_t *tdata; + /* + * Copy of tdata->thr_uid, necessary because tdata may be defunct during + * teardown. + */ + uint64_t thr_uid; + /* Profiling counters, protected by tdata->lock. */ prof_cnt_t cnts; diff --git a/src/prof.c b/src/prof.c index 3e2e4277..40163271 100644 --- a/src/prof.c +++ b/src/prof.c @@ -128,8 +128,8 @@ static char *prof_thread_name_alloc(tsd_t *tsd, const char *thread_name); JEMALLOC_INLINE_C int prof_tctx_comp(const prof_tctx_t *a, const prof_tctx_t *b) { - uint64_t a_uid = a->tdata->thr_uid; - uint64_t b_uid = b->tdata->thr_uid; + uint64_t a_uid = a->thr_uid; + uint64_t b_uid = b->thr_uid; return ((a_uid > b_uid) - (a_uid < b_uid)); } @@ -755,6 +755,7 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) return (NULL); } ret.p->tdata = tdata; + ret.p->thr_uid = tdata->thr_uid; memset(&ret.p->cnts, 0, sizeof(prof_cnt_t)); ret.p->gctx = gctx; ret.p->prepared = true; @@ -1051,9 +1052,8 @@ prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) if (prof_dump_printf(propagate_err, " t%"PRIu64": %"PRIu64": %"PRIu64" [%"PRIu64": %"PRIu64"]\n", - tctx->tdata->thr_uid, tctx->dump_cnts.curobjs, - tctx->dump_cnts.curbytes, tctx->dump_cnts.accumobjs, - tctx->dump_cnts.accumbytes)) + tctx->thr_uid, tctx->dump_cnts.curobjs, tctx->dump_cnts.curbytes, + tctx->dump_cnts.accumobjs, tctx->dump_cnts.accumbytes)) return (tctx); return (NULL); } From 3c4d92e82a31f652a7c77ca937a02d0185085b06 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sun, 12 Oct 2014 22:53:59 -0700 Subject: [PATCH 0571/3378] Add per size class huge allocation statistics. Add per size class huge allocation statistics, and normalize various stats: - Change the arenas.nlruns type from size_t to unsigned. - Add the arenas.nhchunks and arenas.hchunks..size mallctl's. - Replace the stats.arenas..bins..allocated mallctl with stats.arenas..bins..curregs . - Add the stats.arenas..hchunks..nmalloc, stats.arenas..hchunks..ndalloc, stats.arenas..hchunks..nrequests, and stats.arenas..hchunks..curhchunks mallctl's. --- doc/jemalloc.xml.in | 98 +++++++-- include/jemalloc/internal/arena.h | 9 +- include/jemalloc/internal/ctl.h | 1 + include/jemalloc/internal/stats.h | 34 ++- src/arena.c | 79 ++++--- src/ctl.c | 334 +++++++++++++++++++----------- src/huge.c | 168 +++++++++------ src/stats.c | 215 +++++++++++-------- test/unit/mallctl.c | 21 +- test/unit/stats.c | 103 ++++++++- 10 files changed, 724 insertions(+), 338 deletions(-) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 7da1498a..8111fc1d 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -406,11 +406,12 @@ for (i = 0; i < nbins; i++) { functions simultaneously. If is specified during configuration, “m” and “a” can be specified to omit merged arena and per arena statistics, respectively; - “b” and “l” can be specified to omit per size - class statistics for bins and large objects, respectively. Unrecognized - characters are silently ignored. Note that thread caching may prevent - some statistics from being completely up to date, since extra locking - would be required to merge counters that track thread cache operations. + “b”, “l”, and “h” can be specified to + omit per size class statistics for bins, large objects, and huge objects, + respectively. Unrecognized characters are silently ignored. Note that + thread caching may prevent some statistics from being completely up to + date, since extra locking would be required to merge counters that track + thread cache operations. The malloc_usable_size function @@ -1520,7 +1521,7 @@ malloc_conf = "xmalloc:true";]]> arenas.nlruns - (size_t) + (unsigned) r- Total number of large size classes. @@ -1536,6 +1537,25 @@ malloc_conf = "xmalloc:true";]]> class. + + + arenas.nhchunks + (unsigned) + r- + + Total number of huge size classes. + + + + + arenas.hchunks.<i>.size + (size_t) + r- + + Maximum size supported by this huge size + class. + + arenas.extend @@ -1945,17 +1965,6 @@ malloc_conf = "xmalloc:true";]]> - - - stats.arenas.<i>.bins.<j>.allocated - (size_t) - r- - [] - - Current number of bytes allocated by - bin. - - stats.arenas.<i>.bins.<j>.nmalloc @@ -1989,6 +1998,17 @@ malloc_conf = "xmalloc:true";]]> requests. + + + stats.arenas.<i>.bins.<j>.curregs + (size_t) + r- + [] + + Current number of regions for this size + class. + + stats.arenas.<i>.bins.<j>.nfills @@ -2083,6 +2103,50 @@ malloc_conf = "xmalloc:true";]]> Current number of runs for this size class. + + + + stats.arenas.<i>.hchunks.<j>.nmalloc + (uint64_t) + r- + [] + + Cumulative number of allocation requests for this size + class served directly by the arena. + + + + + stats.arenas.<i>.hchunks.<j>.ndalloc + (uint64_t) + r- + [] + + Cumulative number of deallocation requests for this + size class served directly by the arena. + + + + + stats.arenas.<i>.hchunks.<j>.nrequests + (uint64_t) + r- + [] + + Cumulative number of allocation requests for this size + class. + + + + + stats.arenas.<i>.hchunks.<j>.curhchunks + (size_t) + r- + [] + + Current number of huge allocations for this size class. + + diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 28ff7271..c31c8d7d 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -335,11 +335,12 @@ extern size_t map_bias; /* Number of arena chunk header pages. */ extern size_t map_misc_offset; extern size_t arena_maxrun; /* Max run size for arenas. */ extern size_t arena_maxclass; /* Max size class for arenas. */ -extern size_t nlclasses; /* Number of large size classes. */ +extern unsigned nlclasses; /* Number of large size classes. */ +extern unsigned nhclasses; /* Number of huge size classes. */ -void *arena_chunk_alloc_huge(arena_t *arena, void *new_addr, size_t size, +void *arena_chunk_alloc_huge(arena_t *arena, void *new_addr, size_t usize, size_t alignment, bool *zero); -void arena_chunk_dalloc_huge(arena_t *arena, void *chunk, size_t size); +void arena_chunk_dalloc_huge(arena_t *arena, void *chunk, size_t usize); void arena_purge_all(arena_t *arena); void arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, index_t binind, uint64_t prof_accumbytes); @@ -387,7 +388,7 @@ dss_prec_t arena_dss_prec_get(arena_t *arena); bool arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec); void arena_stats_merge(arena_t *arena, const char **dss, size_t *nactive, size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats, - malloc_large_stats_t *lstats); + malloc_large_stats_t *lstats, malloc_huge_stats_t *hstats); arena_t *arena_new(unsigned ind); void arena_boot(void); void arena_prefork(arena_t *arena); diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h index 2d301bf1..a3e899ea 100644 --- a/include/jemalloc/internal/ctl.h +++ b/include/jemalloc/internal/ctl.h @@ -46,6 +46,7 @@ struct ctl_arena_stats_s { malloc_bin_stats_t bstats[NBINS]; malloc_large_stats_t *lstats; /* nlclasses elements. */ + malloc_huge_stats_t *hstats; /* nhclasses elements. */ }; struct ctl_stats_s { diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h index 6104cb3a..d8600ed4 100644 --- a/include/jemalloc/internal/stats.h +++ b/include/jemalloc/internal/stats.h @@ -4,6 +4,7 @@ typedef struct tcache_bin_stats_s tcache_bin_stats_t; typedef struct malloc_bin_stats_s malloc_bin_stats_t; typedef struct malloc_large_stats_s malloc_large_stats_t; +typedef struct malloc_huge_stats_s malloc_huge_stats_t; typedef struct arena_stats_s arena_stats_t; typedef struct chunk_stats_s chunk_stats_t; @@ -20,12 +21,6 @@ struct tcache_bin_stats_s { }; struct malloc_bin_stats_s { - /* - * Current number of bytes allocated, including objects currently - * cached by tcache. - */ - size_t allocated; - /* * Total number of allocation/deallocation requests served directly by * the bin. Note that tcache may allocate an object, then recycle it @@ -42,6 +37,12 @@ struct malloc_bin_stats_s { */ uint64_t nrequests; + /* + * Current number of regions of this size class, including regions + * currently cached by tcache. + */ + size_t curregs; + /* Number of tcache fills from this bin. */ uint64_t nfills; @@ -78,10 +79,25 @@ struct malloc_large_stats_s { */ uint64_t nrequests; - /* Current number of runs of this size class. */ + /* + * Current number of runs of this size class, including runs currently + * cached by tcache. + */ size_t curruns; }; +struct malloc_huge_stats_s { + /* + * Total number of allocation/deallocation requests served directly by + * the arena. + */ + uint64_t nmalloc; + uint64_t ndalloc; + + /* Current number of (multi-)chunk allocations of this size class. */ + size_t curhchunks; +}; + struct arena_stats_s { /* Number of bytes currently mapped. */ size_t mapped; @@ -104,10 +120,12 @@ struct arena_stats_s { size_t allocated_huge; uint64_t nmalloc_huge; uint64_t ndalloc_huge; - uint64_t nrequests_huge; /* One element for each large size class. */ malloc_large_stats_t *lstats; + + /* One element for each huge size class. */ + malloc_huge_stats_t *hstats; }; struct chunk_stats_s { diff --git a/src/arena.c b/src/arena.c index 8872331d..74c36323 100644 --- a/src/arena.c +++ b/src/arena.c @@ -11,7 +11,8 @@ size_t map_bias; size_t map_misc_offset; size_t arena_maxrun; /* Max run size for arenas. */ size_t arena_maxclass; /* Max size class for arenas. */ -size_t nlclasses; /* Number of large size classes. */ +unsigned nlclasses; /* Number of large size classes. */ +unsigned nhclasses; /* Number of huge size classes. */ /******************************************************************************/ /* @@ -411,7 +412,7 @@ arena_chunk_alloc_internal(arena_t *arena, size_t size, size_t alignment, } void * -arena_chunk_alloc_huge(arena_t *arena, void *new_addr, size_t size, +arena_chunk_alloc_huge(arena_t *arena, void *new_addr, size_t usize, size_t alignment, bool *zero) { void *ret; @@ -422,26 +423,33 @@ arena_chunk_alloc_huge(arena_t *arena, void *new_addr, size_t size, chunk_alloc = arena->chunk_alloc; chunk_dalloc = arena->chunk_dalloc; if (config_stats) { + index_t index = size2index(usize) - nlclasses - NBINS; + /* Optimistically update stats prior to unlocking. */ - arena->stats.mapped += size; - arena->stats.allocated_huge += size; + arena->stats.allocated_huge += usize; arena->stats.nmalloc_huge++; - arena->stats.nrequests_huge++; + arena->stats.hstats[index].nmalloc++; + arena->stats.hstats[index].curhchunks++; + arena->stats.mapped += usize; } - arena->nactive += (size >> LG_PAGE); + arena->nactive += (usize >> LG_PAGE); malloc_mutex_unlock(&arena->lock); ret = chunk_alloc_arena(chunk_alloc, chunk_dalloc, arena->ind, - new_addr, size, alignment, zero); + new_addr, usize, alignment, zero); if (config_stats) { if (ret != NULL) - stats_cactive_add(size); + stats_cactive_add(usize); else { - /* Revert optimistic stats updates. */ + index_t index = size2index(usize) - nlclasses - NBINS; + malloc_mutex_lock(&arena->lock); - arena->stats.mapped -= size; - arena->stats.allocated_huge -= size; + /* Revert optimistic stats updates. */ + arena->stats.allocated_huge -= usize; arena->stats.nmalloc_huge--; + arena->stats.hstats[index].nmalloc--; + arena->stats.hstats[index].curhchunks--; + arena->stats.mapped -= usize; malloc_mutex_unlock(&arena->lock); } } @@ -534,21 +542,25 @@ arena_chunk_dalloc_internal(arena_t *arena, arena_chunk_t *chunk) } void -arena_chunk_dalloc_huge(arena_t *arena, void *chunk, size_t size) +arena_chunk_dalloc_huge(arena_t *arena, void *chunk, size_t usize) { chunk_dalloc_t *chunk_dalloc; malloc_mutex_lock(&arena->lock); chunk_dalloc = arena->chunk_dalloc; if (config_stats) { - arena->stats.mapped -= size; - arena->stats.allocated_huge -= size; + index_t index = size2index(usize) - nlclasses - NBINS; + arena->stats.ndalloc_huge++; - stats_cactive_sub(size); + arena->stats.allocated_huge -= usize; + arena->stats.hstats[index].ndalloc++; + arena->stats.hstats[index].curhchunks--; + arena->stats.mapped -= usize; + stats_cactive_sub(usize); } - arena->nactive -= (size >> LG_PAGE); + arena->nactive -= (usize >> LG_PAGE); malloc_mutex_unlock(&arena->lock); - chunk_dalloc(chunk, size, arena->ind); + chunk_dalloc(chunk, usize, arena->ind); } static void @@ -1300,9 +1312,9 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, index_t binind, tbin->avail[nfill - 1 - i] = ptr; } if (config_stats) { - bin->stats.allocated += i * arena_bin_info[binind].reg_size; bin->stats.nmalloc += i; bin->stats.nrequests += tbin->tstats.nrequests; + bin->stats.curregs += i; bin->stats.nfills++; tbin->tstats.nrequests = 0; } @@ -1436,9 +1448,9 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero) } if (config_stats) { - bin->stats.allocated += size; bin->stats.nmalloc++; bin->stats.nrequests++; + bin->stats.curregs++; } malloc_mutex_unlock(&bin->lock); if (config_prof && !isthreaded && arena_prof_accum(arena, size)) @@ -1678,7 +1690,6 @@ arena_dalloc_bin_locked_impl(arena_t *arena, arena_chunk_t *chunk, void *ptr, arena_run_t *run; arena_bin_t *bin; arena_bin_info_t *bin_info; - size_t size; index_t binind; pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; @@ -1687,8 +1698,6 @@ arena_dalloc_bin_locked_impl(arena_t *arena, arena_chunk_t *chunk, void *ptr, binind = run->binind; bin = &arena->bins[binind]; bin_info = &arena_bin_info[binind]; - if (config_fill || config_stats) - size = bin_info->reg_size; if (!junked && config_fill && unlikely(opt_junk)) arena_dalloc_junk_small(ptr, bin_info); @@ -1701,8 +1710,8 @@ arena_dalloc_bin_locked_impl(arena_t *arena, arena_chunk_t *chunk, void *ptr, arena_bin_lower_run(arena, chunk, run, bin); if (config_stats) { - bin->stats.allocated -= size; bin->stats.ndalloc++; + bin->stats.curregs--; } } @@ -2102,7 +2111,7 @@ arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec) void arena_stats_merge(arena_t *arena, const char **dss, size_t *nactive, size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats, - malloc_large_stats_t *lstats) + malloc_large_stats_t *lstats, malloc_huge_stats_t *hstats) { unsigned i; @@ -2122,7 +2131,6 @@ arena_stats_merge(arena_t *arena, const char **dss, size_t *nactive, astats->allocated_huge += arena->stats.allocated_huge; astats->nmalloc_huge += arena->stats.nmalloc_huge; astats->ndalloc_huge += arena->stats.ndalloc_huge; - astats->nrequests_huge += arena->stats.nrequests_huge; for (i = 0; i < nlclasses; i++) { lstats[i].nmalloc += arena->stats.lstats[i].nmalloc; @@ -2130,16 +2138,22 @@ arena_stats_merge(arena_t *arena, const char **dss, size_t *nactive, lstats[i].nrequests += arena->stats.lstats[i].nrequests; lstats[i].curruns += arena->stats.lstats[i].curruns; } + + for (i = 0; i < nhclasses; i++) { + hstats[i].nmalloc += arena->stats.hstats[i].nmalloc; + hstats[i].ndalloc += arena->stats.hstats[i].ndalloc; + hstats[i].curhchunks += arena->stats.hstats[i].curhchunks; + } malloc_mutex_unlock(&arena->lock); for (i = 0; i < NBINS; i++) { arena_bin_t *bin = &arena->bins[i]; malloc_mutex_lock(&bin->lock); - bstats[i].allocated += bin->stats.allocated; bstats[i].nmalloc += bin->stats.nmalloc; bstats[i].ndalloc += bin->stats.ndalloc; bstats[i].nrequests += bin->stats.nrequests; + bstats[i].curregs += bin->stats.curregs; if (config_tcache) { bstats[i].nfills += bin->stats.nfills; bstats[i].nflushes += bin->stats.nflushes; @@ -2159,12 +2173,13 @@ arena_new(unsigned ind) arena_bin_t *bin; /* - * Allocate arena and arena->lstats contiguously, mainly because there - * is no way to clean up if base_alloc() OOMs. + * Allocate arena, arena->lstats, and arena->hstats contiguously, mainly + * because there is no way to clean up if base_alloc() OOMs. */ if (config_stats) { arena = (arena_t *)base_alloc(CACHELINE_CEILING(sizeof(arena_t)) - + nlclasses * sizeof(malloc_large_stats_t)); + + QUANTUM_CEILING(nlclasses * sizeof(malloc_large_stats_t) + + nhclasses) * sizeof(malloc_huge_stats_t)); } else arena = (arena_t *)base_alloc(sizeof(arena_t)); if (arena == NULL) @@ -2184,6 +2199,11 @@ arena_new(unsigned ind) CACHELINE_CEILING(sizeof(arena_t))); memset(arena->stats.lstats, 0, nlclasses * sizeof(malloc_large_stats_t)); + arena->stats.hstats = (malloc_huge_stats_t *)(((void *)arena) + + CACHELINE_CEILING(sizeof(arena_t)) + + QUANTUM_CEILING(nlclasses * sizeof(malloc_large_stats_t))); + memset(arena->stats.hstats, 0, nhclasses * + sizeof(malloc_huge_stats_t)); if (config_tcache) ql_new(&arena->tcache_ql); } @@ -2369,6 +2389,7 @@ arena_boot(void) } assert(arena_maxclass > 0); nlclasses = size2index(arena_maxclass) - size2index(SMALL_MAXCLASS); + nhclasses = NSIZES - nlclasses - NBINS; bin_info_init(); } diff --git a/src/ctl.c b/src/ctl.c index 37f8f42a..72598b3d 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -122,6 +122,8 @@ CTL_PROTO(arenas_bin_i_run_size) INDEX_PROTO(arenas_bin_i) CTL_PROTO(arenas_lrun_i_size) INDEX_PROTO(arenas_lrun_i) +CTL_PROTO(arenas_hchunk_i_size) +INDEX_PROTO(arenas_hchunk_i) CTL_PROTO(arenas_narenas) CTL_PROTO(arenas_initialized) CTL_PROTO(arenas_quantum) @@ -130,6 +132,7 @@ CTL_PROTO(arenas_tcache_max) CTL_PROTO(arenas_nbins) CTL_PROTO(arenas_nhbins) CTL_PROTO(arenas_nlruns) +CTL_PROTO(arenas_nhchunks) CTL_PROTO(arenas_extend) CTL_PROTO(prof_thread_active_init) CTL_PROTO(prof_active) @@ -152,10 +155,10 @@ CTL_PROTO(stats_arenas_i_huge_allocated) CTL_PROTO(stats_arenas_i_huge_nmalloc) CTL_PROTO(stats_arenas_i_huge_ndalloc) CTL_PROTO(stats_arenas_i_huge_nrequests) -CTL_PROTO(stats_arenas_i_bins_j_allocated) CTL_PROTO(stats_arenas_i_bins_j_nmalloc) CTL_PROTO(stats_arenas_i_bins_j_ndalloc) CTL_PROTO(stats_arenas_i_bins_j_nrequests) +CTL_PROTO(stats_arenas_i_bins_j_curregs) CTL_PROTO(stats_arenas_i_bins_j_nfills) CTL_PROTO(stats_arenas_i_bins_j_nflushes) CTL_PROTO(stats_arenas_i_bins_j_nruns) @@ -167,6 +170,11 @@ CTL_PROTO(stats_arenas_i_lruns_j_ndalloc) CTL_PROTO(stats_arenas_i_lruns_j_nrequests) CTL_PROTO(stats_arenas_i_lruns_j_curruns) INDEX_PROTO(stats_arenas_i_lruns_j) +CTL_PROTO(stats_arenas_i_hchunks_j_nmalloc) +CTL_PROTO(stats_arenas_i_hchunks_j_ndalloc) +CTL_PROTO(stats_arenas_i_hchunks_j_nrequests) +CTL_PROTO(stats_arenas_i_hchunks_j_curhchunks) +INDEX_PROTO(stats_arenas_i_hchunks_j) CTL_PROTO(stats_arenas_i_nthreads) CTL_PROTO(stats_arenas_i_dss) CTL_PROTO(stats_arenas_i_pactive) @@ -221,60 +229,60 @@ static const ctl_named_node_t thread_node[] = { }; static const ctl_named_node_t config_node[] = { - {NAME("debug"), CTL(config_debug)}, - {NAME("fill"), CTL(config_fill)}, - {NAME("lazy_lock"), CTL(config_lazy_lock)}, - {NAME("munmap"), CTL(config_munmap)}, - {NAME("prof"), CTL(config_prof)}, - {NAME("prof_libgcc"), CTL(config_prof_libgcc)}, - {NAME("prof_libunwind"), CTL(config_prof_libunwind)}, - {NAME("stats"), CTL(config_stats)}, - {NAME("tcache"), CTL(config_tcache)}, - {NAME("tls"), CTL(config_tls)}, - {NAME("utrace"), CTL(config_utrace)}, - {NAME("valgrind"), CTL(config_valgrind)}, - {NAME("xmalloc"), CTL(config_xmalloc)} + {NAME("debug"), CTL(config_debug)}, + {NAME("fill"), CTL(config_fill)}, + {NAME("lazy_lock"), CTL(config_lazy_lock)}, + {NAME("munmap"), CTL(config_munmap)}, + {NAME("prof"), CTL(config_prof)}, + {NAME("prof_libgcc"), CTL(config_prof_libgcc)}, + {NAME("prof_libunwind"), CTL(config_prof_libunwind)}, + {NAME("stats"), CTL(config_stats)}, + {NAME("tcache"), CTL(config_tcache)}, + {NAME("tls"), CTL(config_tls)}, + {NAME("utrace"), CTL(config_utrace)}, + {NAME("valgrind"), CTL(config_valgrind)}, + {NAME("xmalloc"), CTL(config_xmalloc)} }; static const ctl_named_node_t opt_node[] = { - {NAME("abort"), CTL(opt_abort)}, - {NAME("dss"), CTL(opt_dss)}, - {NAME("lg_chunk"), CTL(opt_lg_chunk)}, - {NAME("narenas"), CTL(opt_narenas)}, - {NAME("lg_dirty_mult"), CTL(opt_lg_dirty_mult)}, - {NAME("stats_print"), CTL(opt_stats_print)}, - {NAME("junk"), CTL(opt_junk)}, - {NAME("zero"), CTL(opt_zero)}, - {NAME("quarantine"), CTL(opt_quarantine)}, - {NAME("redzone"), CTL(opt_redzone)}, - {NAME("utrace"), CTL(opt_utrace)}, - {NAME("xmalloc"), CTL(opt_xmalloc)}, - {NAME("tcache"), CTL(opt_tcache)}, - {NAME("lg_tcache_max"), CTL(opt_lg_tcache_max)}, - {NAME("prof"), CTL(opt_prof)}, - {NAME("prof_prefix"), CTL(opt_prof_prefix)}, - {NAME("prof_active"), CTL(opt_prof_active)}, + {NAME("abort"), CTL(opt_abort)}, + {NAME("dss"), CTL(opt_dss)}, + {NAME("lg_chunk"), CTL(opt_lg_chunk)}, + {NAME("narenas"), CTL(opt_narenas)}, + {NAME("lg_dirty_mult"), CTL(opt_lg_dirty_mult)}, + {NAME("stats_print"), CTL(opt_stats_print)}, + {NAME("junk"), CTL(opt_junk)}, + {NAME("zero"), CTL(opt_zero)}, + {NAME("quarantine"), CTL(opt_quarantine)}, + {NAME("redzone"), CTL(opt_redzone)}, + {NAME("utrace"), CTL(opt_utrace)}, + {NAME("xmalloc"), CTL(opt_xmalloc)}, + {NAME("tcache"), CTL(opt_tcache)}, + {NAME("lg_tcache_max"), CTL(opt_lg_tcache_max)}, + {NAME("prof"), CTL(opt_prof)}, + {NAME("prof_prefix"), CTL(opt_prof_prefix)}, + {NAME("prof_active"), CTL(opt_prof_active)}, {NAME("prof_thread_active_init"), CTL(opt_prof_thread_active_init)}, - {NAME("lg_prof_sample"), CTL(opt_lg_prof_sample)}, - {NAME("lg_prof_interval"), CTL(opt_lg_prof_interval)}, - {NAME("prof_gdump"), CTL(opt_prof_gdump)}, - {NAME("prof_final"), CTL(opt_prof_final)}, - {NAME("prof_leak"), CTL(opt_prof_leak)}, - {NAME("prof_accum"), CTL(opt_prof_accum)} + {NAME("lg_prof_sample"), CTL(opt_lg_prof_sample)}, + {NAME("lg_prof_interval"), CTL(opt_lg_prof_interval)}, + {NAME("prof_gdump"), CTL(opt_prof_gdump)}, + {NAME("prof_final"), CTL(opt_prof_final)}, + {NAME("prof_leak"), CTL(opt_prof_leak)}, + {NAME("prof_accum"), CTL(opt_prof_accum)} }; static const ctl_named_node_t chunk_node[] = { - {NAME("alloc"), CTL(arena_i_chunk_alloc)}, - {NAME("dalloc"), CTL(arena_i_chunk_dalloc)} + {NAME("alloc"), CTL(arena_i_chunk_alloc)}, + {NAME("dalloc"), CTL(arena_i_chunk_dalloc)} }; static const ctl_named_node_t arena_i_node[] = { - {NAME("purge"), CTL(arena_i_purge)}, - {NAME("dss"), CTL(arena_i_dss)}, - {NAME("chunk"), CHILD(named, chunk)}, + {NAME("purge"), CTL(arena_i_purge)}, + {NAME("dss"), CTL(arena_i_dss)}, + {NAME("chunk"), CHILD(named, chunk)}, }; static const ctl_named_node_t super_arena_i_node[] = { - {NAME(""), CHILD(named, arena_i)} + {NAME(""), CHILD(named, arena_i)} }; static const ctl_indexed_node_t arena_node[] = { @@ -282,12 +290,12 @@ static const ctl_indexed_node_t arena_node[] = { }; static const ctl_named_node_t arenas_bin_i_node[] = { - {NAME("size"), CTL(arenas_bin_i_size)}, - {NAME("nregs"), CTL(arenas_bin_i_nregs)}, - {NAME("run_size"), CTL(arenas_bin_i_run_size)} + {NAME("size"), CTL(arenas_bin_i_size)}, + {NAME("nregs"), CTL(arenas_bin_i_nregs)}, + {NAME("run_size"), CTL(arenas_bin_i_run_size)} }; static const ctl_named_node_t super_arenas_bin_i_node[] = { - {NAME(""), CHILD(named, arenas_bin_i)} + {NAME(""), CHILD(named, arenas_bin_i)} }; static const ctl_indexed_node_t arenas_bin_node[] = { @@ -295,28 +303,41 @@ static const ctl_indexed_node_t arenas_bin_node[] = { }; static const ctl_named_node_t arenas_lrun_i_node[] = { - {NAME("size"), CTL(arenas_lrun_i_size)} + {NAME("size"), CTL(arenas_lrun_i_size)} }; static const ctl_named_node_t super_arenas_lrun_i_node[] = { - {NAME(""), CHILD(named, arenas_lrun_i)} + {NAME(""), CHILD(named, arenas_lrun_i)} }; static const ctl_indexed_node_t arenas_lrun_node[] = { {INDEX(arenas_lrun_i)} }; +static const ctl_named_node_t arenas_hchunk_i_node[] = { + {NAME("size"), CTL(arenas_hchunk_i_size)} +}; +static const ctl_named_node_t super_arenas_hchunk_i_node[] = { + {NAME(""), CHILD(named, arenas_hchunk_i)} +}; + +static const ctl_indexed_node_t arenas_hchunk_node[] = { + {INDEX(arenas_hchunk_i)} +}; + static const ctl_named_node_t arenas_node[] = { - {NAME("narenas"), CTL(arenas_narenas)}, - {NAME("initialized"), CTL(arenas_initialized)}, - {NAME("quantum"), CTL(arenas_quantum)}, - {NAME("page"), CTL(arenas_page)}, - {NAME("tcache_max"), CTL(arenas_tcache_max)}, - {NAME("nbins"), CTL(arenas_nbins)}, - {NAME("nhbins"), CTL(arenas_nhbins)}, - {NAME("bin"), CHILD(indexed, arenas_bin)}, - {NAME("nlruns"), CTL(arenas_nlruns)}, - {NAME("lrun"), CHILD(indexed, arenas_lrun)}, - {NAME("extend"), CTL(arenas_extend)} + {NAME("narenas"), CTL(arenas_narenas)}, + {NAME("initialized"), CTL(arenas_initialized)}, + {NAME("quantum"), CTL(arenas_quantum)}, + {NAME("page"), CTL(arenas_page)}, + {NAME("tcache_max"), CTL(arenas_tcache_max)}, + {NAME("nbins"), CTL(arenas_nbins)}, + {NAME("nhbins"), CTL(arenas_nhbins)}, + {NAME("bin"), CHILD(indexed, arenas_bin)}, + {NAME("nlruns"), CTL(arenas_nlruns)}, + {NAME("lrun"), CHILD(indexed, arenas_lrun)}, + {NAME("nhchunks"), CTL(arenas_nhchunks)}, + {NAME("hchunk"), CHILD(indexed, arenas_hchunk)}, + {NAME("extend"), CTL(arenas_extend)} }; static const ctl_named_node_t prof_node[] = { @@ -329,45 +350,45 @@ static const ctl_named_node_t prof_node[] = { }; static const ctl_named_node_t stats_chunks_node[] = { - {NAME("current"), CTL(stats_chunks_current)}, - {NAME("total"), CTL(stats_chunks_total)}, - {NAME("high"), CTL(stats_chunks_high)} + {NAME("current"), CTL(stats_chunks_current)}, + {NAME("total"), CTL(stats_chunks_total)}, + {NAME("high"), CTL(stats_chunks_high)} }; static const ctl_named_node_t stats_arenas_i_small_node[] = { - {NAME("allocated"), CTL(stats_arenas_i_small_allocated)}, - {NAME("nmalloc"), CTL(stats_arenas_i_small_nmalloc)}, - {NAME("ndalloc"), CTL(stats_arenas_i_small_ndalloc)}, - {NAME("nrequests"), CTL(stats_arenas_i_small_nrequests)} + {NAME("allocated"), CTL(stats_arenas_i_small_allocated)}, + {NAME("nmalloc"), CTL(stats_arenas_i_small_nmalloc)}, + {NAME("ndalloc"), CTL(stats_arenas_i_small_ndalloc)}, + {NAME("nrequests"), CTL(stats_arenas_i_small_nrequests)} }; static const ctl_named_node_t stats_arenas_i_large_node[] = { - {NAME("allocated"), CTL(stats_arenas_i_large_allocated)}, - {NAME("nmalloc"), CTL(stats_arenas_i_large_nmalloc)}, - {NAME("ndalloc"), CTL(stats_arenas_i_large_ndalloc)}, - {NAME("nrequests"), CTL(stats_arenas_i_large_nrequests)} + {NAME("allocated"), CTL(stats_arenas_i_large_allocated)}, + {NAME("nmalloc"), CTL(stats_arenas_i_large_nmalloc)}, + {NAME("ndalloc"), CTL(stats_arenas_i_large_ndalloc)}, + {NAME("nrequests"), CTL(stats_arenas_i_large_nrequests)} }; static const ctl_named_node_t stats_arenas_i_huge_node[] = { - {NAME("allocated"), CTL(stats_arenas_i_huge_allocated)}, - {NAME("nmalloc"), CTL(stats_arenas_i_huge_nmalloc)}, - {NAME("ndalloc"), CTL(stats_arenas_i_huge_ndalloc)}, - {NAME("nrequests"), CTL(stats_arenas_i_huge_nrequests)}, + {NAME("allocated"), CTL(stats_arenas_i_huge_allocated)}, + {NAME("nmalloc"), CTL(stats_arenas_i_huge_nmalloc)}, + {NAME("ndalloc"), CTL(stats_arenas_i_huge_ndalloc)}, + {NAME("nrequests"), CTL(stats_arenas_i_huge_nrequests)} }; static const ctl_named_node_t stats_arenas_i_bins_j_node[] = { - {NAME("allocated"), CTL(stats_arenas_i_bins_j_allocated)}, - {NAME("nmalloc"), CTL(stats_arenas_i_bins_j_nmalloc)}, - {NAME("ndalloc"), CTL(stats_arenas_i_bins_j_ndalloc)}, - {NAME("nrequests"), CTL(stats_arenas_i_bins_j_nrequests)}, - {NAME("nfills"), CTL(stats_arenas_i_bins_j_nfills)}, - {NAME("nflushes"), CTL(stats_arenas_i_bins_j_nflushes)}, - {NAME("nruns"), CTL(stats_arenas_i_bins_j_nruns)}, - {NAME("nreruns"), CTL(stats_arenas_i_bins_j_nreruns)}, - {NAME("curruns"), CTL(stats_arenas_i_bins_j_curruns)} + {NAME("nmalloc"), CTL(stats_arenas_i_bins_j_nmalloc)}, + {NAME("ndalloc"), CTL(stats_arenas_i_bins_j_ndalloc)}, + {NAME("nrequests"), CTL(stats_arenas_i_bins_j_nrequests)}, + {NAME("curregs"), CTL(stats_arenas_i_bins_j_curregs)}, + {NAME("nfills"), CTL(stats_arenas_i_bins_j_nfills)}, + {NAME("nflushes"), CTL(stats_arenas_i_bins_j_nflushes)}, + {NAME("nruns"), CTL(stats_arenas_i_bins_j_nruns)}, + {NAME("nreruns"), CTL(stats_arenas_i_bins_j_nreruns)}, + {NAME("curruns"), CTL(stats_arenas_i_bins_j_curruns)} }; static const ctl_named_node_t super_stats_arenas_i_bins_j_node[] = { - {NAME(""), CHILD(named, stats_arenas_i_bins_j)} + {NAME(""), CHILD(named, stats_arenas_i_bins_j)} }; static const ctl_indexed_node_t stats_arenas_i_bins_node[] = { @@ -375,36 +396,51 @@ static const ctl_indexed_node_t stats_arenas_i_bins_node[] = { }; static const ctl_named_node_t stats_arenas_i_lruns_j_node[] = { - {NAME("nmalloc"), CTL(stats_arenas_i_lruns_j_nmalloc)}, - {NAME("ndalloc"), CTL(stats_arenas_i_lruns_j_ndalloc)}, - {NAME("nrequests"), CTL(stats_arenas_i_lruns_j_nrequests)}, - {NAME("curruns"), CTL(stats_arenas_i_lruns_j_curruns)} + {NAME("nmalloc"), CTL(stats_arenas_i_lruns_j_nmalloc)}, + {NAME("ndalloc"), CTL(stats_arenas_i_lruns_j_ndalloc)}, + {NAME("nrequests"), CTL(stats_arenas_i_lruns_j_nrequests)}, + {NAME("curruns"), CTL(stats_arenas_i_lruns_j_curruns)} }; static const ctl_named_node_t super_stats_arenas_i_lruns_j_node[] = { - {NAME(""), CHILD(named, stats_arenas_i_lruns_j)} + {NAME(""), CHILD(named, stats_arenas_i_lruns_j)} }; static const ctl_indexed_node_t stats_arenas_i_lruns_node[] = { {INDEX(stats_arenas_i_lruns_j)} }; +static const ctl_named_node_t stats_arenas_i_hchunks_j_node[] = { + {NAME("nmalloc"), CTL(stats_arenas_i_hchunks_j_nmalloc)}, + {NAME("ndalloc"), CTL(stats_arenas_i_hchunks_j_ndalloc)}, + {NAME("nrequests"), CTL(stats_arenas_i_hchunks_j_nrequests)}, + {NAME("curhchunks"), CTL(stats_arenas_i_hchunks_j_curhchunks)} +}; +static const ctl_named_node_t super_stats_arenas_i_hchunks_j_node[] = { + {NAME(""), CHILD(named, stats_arenas_i_hchunks_j)} +}; + +static const ctl_indexed_node_t stats_arenas_i_hchunks_node[] = { + {INDEX(stats_arenas_i_hchunks_j)} +}; + static const ctl_named_node_t stats_arenas_i_node[] = { - {NAME("nthreads"), CTL(stats_arenas_i_nthreads)}, - {NAME("dss"), CTL(stats_arenas_i_dss)}, - {NAME("pactive"), CTL(stats_arenas_i_pactive)}, - {NAME("pdirty"), CTL(stats_arenas_i_pdirty)}, - {NAME("mapped"), CTL(stats_arenas_i_mapped)}, - {NAME("npurge"), CTL(stats_arenas_i_npurge)}, - {NAME("nmadvise"), CTL(stats_arenas_i_nmadvise)}, - {NAME("purged"), CTL(stats_arenas_i_purged)}, - {NAME("small"), CHILD(named, stats_arenas_i_small)}, - {NAME("large"), CHILD(named, stats_arenas_i_large)}, - {NAME("huge"), CHILD(named, stats_arenas_i_huge)}, - {NAME("bins"), CHILD(indexed, stats_arenas_i_bins)}, - {NAME("lruns"), CHILD(indexed, stats_arenas_i_lruns)} + {NAME("nthreads"), CTL(stats_arenas_i_nthreads)}, + {NAME("dss"), CTL(stats_arenas_i_dss)}, + {NAME("pactive"), CTL(stats_arenas_i_pactive)}, + {NAME("pdirty"), CTL(stats_arenas_i_pdirty)}, + {NAME("mapped"), CTL(stats_arenas_i_mapped)}, + {NAME("npurge"), CTL(stats_arenas_i_npurge)}, + {NAME("nmadvise"), CTL(stats_arenas_i_nmadvise)}, + {NAME("purged"), CTL(stats_arenas_i_purged)}, + {NAME("small"), CHILD(named, stats_arenas_i_small)}, + {NAME("large"), CHILD(named, stats_arenas_i_large)}, + {NAME("huge"), CHILD(named, stats_arenas_i_huge)}, + {NAME("bins"), CHILD(indexed, stats_arenas_i_bins)}, + {NAME("lruns"), CHILD(indexed, stats_arenas_i_lruns)}, + {NAME("hchunks"), CHILD(indexed, stats_arenas_i_hchunks)} }; static const ctl_named_node_t super_stats_arenas_i_node[] = { - {NAME(""), CHILD(named, stats_arenas_i)} + {NAME(""), CHILD(named, stats_arenas_i)} }; static const ctl_indexed_node_t stats_arenas_node[] = { @@ -412,12 +448,12 @@ static const ctl_indexed_node_t stats_arenas_node[] = { }; static const ctl_named_node_t stats_node[] = { - {NAME("cactive"), CTL(stats_cactive)}, - {NAME("allocated"), CTL(stats_allocated)}, - {NAME("active"), CTL(stats_active)}, - {NAME("mapped"), CTL(stats_mapped)}, - {NAME("chunks"), CHILD(named, stats_chunks)}, - {NAME("arenas"), CHILD(indexed, stats_arenas)} + {NAME("cactive"), CTL(stats_cactive)}, + {NAME("allocated"), CTL(stats_allocated)}, + {NAME("active"), CTL(stats_active)}, + {NAME("mapped"), CTL(stats_mapped)}, + {NAME("chunks"), CHILD(named, stats_chunks)}, + {NAME("arenas"), CHILD(indexed, stats_arenas)} }; static const ctl_named_node_t root_node[] = { @@ -453,6 +489,13 @@ ctl_arena_init(ctl_arena_stats_t *astats) return (true); } + if (astats->hstats == NULL) { + astats->hstats = (malloc_huge_stats_t *)a0malloc(nhclasses * + sizeof(malloc_huge_stats_t)); + if (astats->hstats == NULL) + return (true); + } + return (false); } @@ -472,6 +515,8 @@ ctl_arena_clear(ctl_arena_stats_t *astats) memset(astats->bstats, 0, NBINS * sizeof(malloc_bin_stats_t)); memset(astats->lstats, 0, nlclasses * sizeof(malloc_large_stats_t)); + memset(astats->hstats, 0, nhclasses * + sizeof(malloc_huge_stats_t)); } } @@ -481,10 +526,12 @@ ctl_arena_stats_amerge(ctl_arena_stats_t *cstats, arena_t *arena) unsigned i; arena_stats_merge(arena, &cstats->dss, &cstats->pactive, - &cstats->pdirty, &cstats->astats, cstats->bstats, cstats->lstats); + &cstats->pdirty, &cstats->astats, cstats->bstats, cstats->lstats, + cstats->hstats); for (i = 0; i < NBINS; i++) { - cstats->allocated_small += cstats->bstats[i].allocated; + cstats->allocated_small += cstats->bstats[i].curregs * + index2size(i); cstats->nmalloc_small += cstats->bstats[i].nmalloc; cstats->ndalloc_small += cstats->bstats[i].ndalloc; cstats->nrequests_small += cstats->bstats[i].nrequests; @@ -517,20 +564,12 @@ ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, ctl_arena_stats_t *astats) sstats->astats.allocated_huge += astats->astats.allocated_huge; sstats->astats.nmalloc_huge += astats->astats.nmalloc_huge; sstats->astats.ndalloc_huge += astats->astats.ndalloc_huge; - sstats->astats.nrequests_huge += astats->astats.nrequests_huge; - - for (i = 0; i < nlclasses; i++) { - sstats->lstats[i].nmalloc += astats->lstats[i].nmalloc; - sstats->lstats[i].ndalloc += astats->lstats[i].ndalloc; - sstats->lstats[i].nrequests += astats->lstats[i].nrequests; - sstats->lstats[i].curruns += astats->lstats[i].curruns; - } for (i = 0; i < NBINS; i++) { - sstats->bstats[i].allocated += astats->bstats[i].allocated; sstats->bstats[i].nmalloc += astats->bstats[i].nmalloc; sstats->bstats[i].ndalloc += astats->bstats[i].ndalloc; sstats->bstats[i].nrequests += astats->bstats[i].nrequests; + sstats->bstats[i].curregs += astats->bstats[i].curregs; if (config_tcache) { sstats->bstats[i].nfills += astats->bstats[i].nfills; sstats->bstats[i].nflushes += @@ -540,6 +579,19 @@ ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, ctl_arena_stats_t *astats) sstats->bstats[i].reruns += astats->bstats[i].reruns; sstats->bstats[i].curruns += astats->bstats[i].curruns; } + + for (i = 0; i < nlclasses; i++) { + sstats->lstats[i].nmalloc += astats->lstats[i].nmalloc; + sstats->lstats[i].ndalloc += astats->lstats[i].ndalloc; + sstats->lstats[i].nrequests += astats->lstats[i].nrequests; + sstats->lstats[i].curruns += astats->lstats[i].curruns; + } + + for (i = 0; i < nhclasses; i++) { + sstats->hstats[i].nmalloc += astats->hstats[i].nmalloc; + sstats->hstats[i].ndalloc += astats->hstats[i].ndalloc; + sstats->hstats[i].curhchunks += astats->hstats[i].curhchunks; + } } static void @@ -692,6 +744,8 @@ ctl_init(void) for (j = 0; j < i; j++) { a0free( ctl_stats.arenas[j].lstats); + a0free( + ctl_stats.arenas[j].hstats); } a0free(ctl_stats.arenas); ctl_stats.arenas = NULL; @@ -1600,7 +1654,7 @@ arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i) return (super_arenas_bin_i_node); } -CTL_RO_NL_GEN(arenas_nlruns, nlclasses, size_t) +CTL_RO_NL_GEN(arenas_nlruns, nlclasses, unsigned) CTL_RO_NL_GEN(arenas_lrun_i_size, index2size(NBINS+mib[2]), size_t) static const ctl_named_node_t * arenas_lrun_i_index(const size_t *mib, size_t miblen, size_t i) @@ -1611,6 +1665,17 @@ arenas_lrun_i_index(const size_t *mib, size_t miblen, size_t i) return (super_arenas_lrun_i_node); } +CTL_RO_NL_GEN(arenas_nhchunks, nhclasses, unsigned) +CTL_RO_NL_GEN(arenas_hchunk_i_size, index2size(NBINS+nlclasses+mib[2]), size_t) +static const ctl_named_node_t * +arenas_hchunk_i_index(const size_t *mib, size_t miblen, size_t i) +{ + + if (i > nhclasses) + return (NULL); + return (super_arenas_hchunk_i_node); +} + static int arenas_extend_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) @@ -1784,16 +1849,16 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_huge_nmalloc, CTL_RO_CGEN(config_stats, stats_arenas_i_huge_ndalloc, ctl_stats.arenas[mib[2]].astats.ndalloc_huge, uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_huge_nrequests, - ctl_stats.arenas[mib[2]].astats.nrequests_huge, uint64_t) + ctl_stats.arenas[mib[2]].astats.nmalloc_huge, uint64_t) /* Intentional. */ -CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_allocated, - ctl_stats.arenas[mib[2]].bstats[mib[4]].allocated, size_t) CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nmalloc, ctl_stats.arenas[mib[2]].bstats[mib[4]].nmalloc, uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_ndalloc, ctl_stats.arenas[mib[2]].bstats[mib[4]].ndalloc, uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nrequests, ctl_stats.arenas[mib[2]].bstats[mib[4]].nrequests, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_curregs, + ctl_stats.arenas[mib[2]].bstats[mib[4]].curregs, size_t) CTL_RO_CGEN(config_stats && config_tcache, stats_arenas_i_bins_j_nfills, ctl_stats.arenas[mib[2]].bstats[mib[4]].nfills, uint64_t) CTL_RO_CGEN(config_stats && config_tcache, stats_arenas_i_bins_j_nflushes, @@ -1832,6 +1897,25 @@ stats_arenas_i_lruns_j_index(const size_t *mib, size_t miblen, size_t j) return (super_stats_arenas_i_lruns_j_node); } +CTL_RO_CGEN(config_stats, stats_arenas_i_hchunks_j_nmalloc, + ctl_stats.arenas[mib[2]].hstats[mib[4]].nmalloc, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_hchunks_j_ndalloc, + ctl_stats.arenas[mib[2]].hstats[mib[4]].ndalloc, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_hchunks_j_nrequests, + ctl_stats.arenas[mib[2]].hstats[mib[4]].nmalloc, /* Intentional. */ + uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_hchunks_j_curhchunks, + ctl_stats.arenas[mib[2]].hstats[mib[4]].curhchunks, size_t) + +static const ctl_named_node_t * +stats_arenas_i_hchunks_j_index(const size_t *mib, size_t miblen, size_t j) +{ + + if (j > nhclasses) + return (NULL); + return (super_stats_arenas_i_hchunks_j_node); +} + static const ctl_named_node_t * stats_arenas_i_index(const size_t *mib, size_t miblen, size_t i) { diff --git a/src/huge.c b/src/huge.c index 6c9b97bb..5f46241d 100644 --- a/src/huge.c +++ b/src/huge.c @@ -104,6 +104,101 @@ huge_dalloc_junk(void *ptr, size_t usize) huge_dalloc_junk_t *huge_dalloc_junk = JEMALLOC_N(huge_dalloc_junk_impl); #endif +static void +huge_ralloc_no_move_stats_update(arena_t *arena, size_t oldsize, size_t usize) +{ + index_t oldindex = size2index(oldsize) - nlclasses - NBINS; + index_t index = size2index(usize) - nlclasses - NBINS; + + cassert(config_stats); + + arena->stats.ndalloc_huge++; + arena->stats.allocated_huge -= oldsize; + arena->stats.hstats[oldindex].ndalloc++; + arena->stats.hstats[oldindex].curhchunks--; + + arena->stats.nmalloc_huge++; + arena->stats.allocated_huge += usize; + arena->stats.hstats[index].nmalloc++; + arena->stats.hstats[index].curhchunks++; +} + +static void +huge_ralloc_no_move_similar(void *ptr, size_t oldsize, size_t usize, + size_t size, size_t extra, bool zero) +{ + size_t usize_next; + extent_node_t *node, key; + arena_t *arena; + + /* Increase usize to incorporate extra. */ + while (usize < s2u(size+extra) && (usize_next = s2u(usize+1)) < oldsize) + usize = usize_next; + + malloc_mutex_lock(&huge_mtx); + + key.addr = ptr; + node = extent_tree_ad_search(&huge, &key); + assert(node != NULL); + assert(node->addr == ptr); + + arena = node->arena; + + /* Update the size of the huge allocation if it changed. */ + if (oldsize != usize) { + assert(node->size != usize); + node->size = usize; + } + + malloc_mutex_unlock(&huge_mtx); + + /* Fill if necessary. */ + if (oldsize < usize) { + if (zero || (config_fill && unlikely(opt_zero))) + memset(ptr + oldsize, 0, usize - oldsize); + else if (config_fill && unlikely(opt_junk)) + memset(ptr + oldsize, 0xa5, usize - oldsize); + } else if (config_fill && unlikely(opt_junk) && oldsize > usize) + memset(ptr + usize, 0x5a, oldsize - usize); + + if (config_stats) + huge_ralloc_no_move_stats_update(arena, oldsize, usize); +} + +static void +huge_ralloc_no_move_shrink(void *ptr, size_t oldsize, size_t usize) +{ + extent_node_t *node, key; + arena_t *arena; + void *excess_addr; + size_t excess_size; + + malloc_mutex_lock(&huge_mtx); + + key.addr = ptr; + node = extent_tree_ad_search(&huge, &key); + assert(node != NULL); + assert(node->addr == ptr); + + arena = node->arena; + + /* Update the size of the huge allocation. */ + node->size = usize; + + malloc_mutex_unlock(&huge_mtx); + + excess_addr = node->addr + CHUNK_CEILING(usize); + excess_size = CHUNK_CEILING(oldsize) - CHUNK_CEILING(usize); + + /* Zap the excess chunks. */ + huge_dalloc_junk(ptr + usize, oldsize - usize); + if (excess_size > 0) + arena_chunk_dalloc_huge(arena, excess_addr, excess_size); + + if (config_stats) + huge_ralloc_no_move_stats_update(arena, oldsize, usize); +} + static bool huge_ralloc_no_move_expand(void *ptr, size_t oldsize, size_t size, bool zero) { size_t usize; @@ -131,7 +226,6 @@ huge_ralloc_no_move_expand(void *ptr, size_t oldsize, size_t size, bool zero) { assert(node != NULL); assert(node->addr == ptr); - /* Find the current arena. */ arena = node->arena; malloc_mutex_unlock(&huge_mtx); @@ -159,6 +253,10 @@ huge_ralloc_no_move_expand(void *ptr, size_t oldsize, size_t size, bool zero) { else if (unlikely(opt_zero) && !is_zeroed) memset(ptr + oldsize, 0, usize - oldsize); } + + if (config_stats) + huge_ralloc_no_move_stats_update(arena, oldsize, usize); + return (false); } @@ -185,78 +283,20 @@ huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, */ if (CHUNK_CEILING(oldsize) >= CHUNK_CEILING(usize) && CHUNK_CEILING(oldsize) <= CHUNK_CEILING(size+extra)) { - size_t usize_next; - - /* Increase usize to incorporate extra. */ - while (usize < s2u(size+extra) && (usize_next = s2u(usize+1)) < - oldsize) - usize = usize_next; - - /* Update the size of the huge allocation if it changed. */ - if (oldsize != usize) { - extent_node_t *node, key; - - malloc_mutex_lock(&huge_mtx); - - key.addr = ptr; - node = extent_tree_ad_search(&huge, &key); - assert(node != NULL); - assert(node->addr == ptr); - - assert(node->size != usize); - node->size = usize; - - malloc_mutex_unlock(&huge_mtx); - - if (oldsize < usize) { - if (zero || (config_fill && - unlikely(opt_zero))) { - memset(ptr + oldsize, 0, usize - - oldsize); - } else if (config_fill && unlikely(opt_junk)) { - memset(ptr + oldsize, 0xa5, usize - - oldsize); - } - } else if (config_fill && unlikely(opt_junk) && oldsize - > usize) - memset(ptr + usize, 0x5a, oldsize - usize); - } + huge_ralloc_no_move_similar(ptr, oldsize, usize, size, extra, + zero); return (false); } /* Shrink the allocation in-place. */ if (CHUNK_CEILING(oldsize) >= CHUNK_CEILING(usize)) { - extent_node_t *node, key; - void *excess_addr; - size_t excess_size; - - malloc_mutex_lock(&huge_mtx); - - key.addr = ptr; - node = extent_tree_ad_search(&huge, &key); - assert(node != NULL); - assert(node->addr == ptr); - - /* Update the size of the huge allocation. */ - node->size = usize; - - malloc_mutex_unlock(&huge_mtx); - - excess_addr = node->addr + CHUNK_CEILING(usize); - excess_size = CHUNK_CEILING(oldsize) - CHUNK_CEILING(usize); - - /* Zap the excess chunks. */ - huge_dalloc_junk(ptr + usize, oldsize - usize); - if (excess_size > 0) { - arena_chunk_dalloc_huge(node->arena, excess_addr, - excess_size); - } - + huge_ralloc_no_move_shrink(ptr, oldsize, usize); return (false); } /* Attempt to expand the allocation in-place. */ - if (huge_ralloc_no_move_expand(ptr, oldsize, size + extra, zero)) { + if (huge_ralloc_no_move_expand(ptr, oldsize, size + extra, + zero)) { if (extra == 0) return (true); diff --git a/src/stats.c b/src/stats.c index 5c3d7017..16a18c50 100644 --- a/src/stats.c +++ b/src/stats.c @@ -48,8 +48,10 @@ static void stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, unsigned i); static void stats_arena_lruns_print(void (*write_cb)(void *, const char *), void *cbopaque, unsigned i); +static void stats_arena_hchunks_print( + void (*write_cb)(void *, const char *), void *cbopaque, unsigned i); static void stats_arena_print(void (*write_cb)(void *, const char *), - void *cbopaque, unsigned i, bool bins, bool large); + void *cbopaque, unsigned i, bool bins, bool large, bool huge); /******************************************************************************/ @@ -58,62 +60,55 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, unsigned i) { size_t page; - bool config_tcache; - unsigned nbins, j, gap_start; + bool config_tcache, in_gap; + unsigned nbins, j; CTL_GET("arenas.page", &page, size_t); CTL_GET("config.tcache", &config_tcache, bool); if (config_tcache) { malloc_cprintf(write_cb, cbopaque, - "bins: bin size regs pgs allocated nmalloc" - " ndalloc nrequests nfills nflushes" - " newruns reruns curruns\n"); + "bins: size ind allocated nmalloc" + " ndalloc nrequests curregs regs pgs" + " nfills nflushes newruns reruns" + " curruns\n"); } else { malloc_cprintf(write_cb, cbopaque, - "bins: bin size regs pgs allocated nmalloc" - " ndalloc newruns reruns curruns\n"); + "bins: size ind allocated nmalloc" + " ndalloc nrequests curregs regs pgs" + " newruns reruns curruns\n"); } CTL_GET("arenas.nbins", &nbins, unsigned); - for (j = 0, gap_start = UINT_MAX; j < nbins; j++) { + for (j = 0, in_gap = false; j < nbins; j++) { uint64_t nruns; CTL_IJ_GET("stats.arenas.0.bins.0.nruns", &nruns, uint64_t); - if (nruns == 0) { - if (gap_start == UINT_MAX) - gap_start = j; - } else { - size_t reg_size, run_size, allocated; + if (nruns == 0) + in_gap = true; + else { + size_t reg_size, run_size, curregs; uint32_t nregs; uint64_t nmalloc, ndalloc, nrequests, nfills, nflushes; uint64_t reruns; size_t curruns; - if (gap_start != UINT_MAX) { - if (j > gap_start + 1) { - /* Gap of more than one size class. */ - malloc_cprintf(write_cb, cbopaque, - "[%u..%u]\n", gap_start, - j - 1); - } else { - /* Gap of one size class. */ - malloc_cprintf(write_cb, cbopaque, - "[%u]\n", gap_start); - } - gap_start = UINT_MAX; + if (in_gap) { + malloc_cprintf(write_cb, cbopaque, + " ---\n"); + in_gap = false; } CTL_J_GET("arenas.bin.0.size", ®_size, size_t); CTL_J_GET("arenas.bin.0.nregs", &nregs, uint32_t); CTL_J_GET("arenas.bin.0.run_size", &run_size, size_t); - CTL_IJ_GET("stats.arenas.0.bins.0.allocated", - &allocated, size_t); CTL_IJ_GET("stats.arenas.0.bins.0.nmalloc", &nmalloc, uint64_t); CTL_IJ_GET("stats.arenas.0.bins.0.ndalloc", &ndalloc, uint64_t); + CTL_IJ_GET("stats.arenas.0.bins.0.curregs", + &curregs, size_t); + CTL_IJ_GET("stats.arenas.0.bins.0.nrequests", + &nrequests, uint64_t); if (config_tcache) { - CTL_IJ_GET("stats.arenas.0.bins.0.nrequests", - &nrequests, uint64_t); CTL_IJ_GET("stats.arenas.0.bins.0.nfills", &nfills, uint64_t); CTL_IJ_GET("stats.arenas.0.bins.0.nflushes", @@ -125,33 +120,28 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, size_t); if (config_tcache) { malloc_cprintf(write_cb, cbopaque, - "%13u %5zu %4u %3zu %12zu %12"PRIu64 - " %12"PRIu64" %12"PRIu64" %12"PRIu64 + "%20zu %3u %12zu %12"PRIu64" %12"PRIu64 + " %12"PRIu64" %12zu %4u %3zu %12"PRIu64 " %12"PRIu64" %12"PRIu64" %12"PRIu64 " %12zu\n", - j, reg_size, nregs, run_size / page, - allocated, nmalloc, ndalloc, nrequests, - nfills, nflushes, nruns, reruns, curruns); + reg_size, j, curregs * reg_size, nmalloc, + ndalloc, nrequests, curregs, nregs, run_size + / page, nfills, nflushes, nruns, reruns, + curruns); } else { malloc_cprintf(write_cb, cbopaque, - "%13u %5zu %4u %3zu %12zu %12"PRIu64 - " %12"PRIu64" %12"PRIu64" %12"PRIu64 - " %12zu\n", - j, reg_size, nregs, run_size / page, - allocated, nmalloc, ndalloc, nruns, reruns, - curruns); + "%20zu %3u %12zu %12"PRIu64" %12"PRIu64 + " %12"PRIu64" %12zu %4u %3zu %12"PRIu64 + " %12"PRIu64" %12zu\n", + reg_size, j, curregs * reg_size, nmalloc, + ndalloc, nrequests, curregs, nregs, + run_size / page, nruns, reruns, curruns); } } } - if (gap_start != UINT_MAX) { - if (j > gap_start + 1) { - /* Gap of more than one size class. */ - malloc_cprintf(write_cb, cbopaque, "[%u..%u]\n", - gap_start, j - 1); - } else { - /* Gap of one size class. */ - malloc_cprintf(write_cb, cbopaque, "[%u]\n", gap_start); - } + if (in_gap) { + malloc_cprintf(write_cb, cbopaque, + " ---\n"); } } @@ -159,16 +149,15 @@ static void stats_arena_lruns_print(void (*write_cb)(void *, const char *), void *cbopaque, unsigned i) { - size_t page, nlruns, j; - ssize_t gap_start; - - CTL_GET("arenas.page", &page, size_t); + unsigned nbins, nlruns, j; + bool in_gap; malloc_cprintf(write_cb, cbopaque, - "large: size pages nmalloc ndalloc nrequests" - " curruns\n"); - CTL_GET("arenas.nlruns", &nlruns, size_t); - for (j = 0, gap_start = -1; j < nlruns; j++) { + "large: size ind allocated nmalloc ndalloc" + " nrequests curruns\n"); + CTL_GET("arenas.nbins", &nbins, unsigned); + CTL_GET("arenas.nlruns", &nlruns, unsigned); + for (j = 0, in_gap = false; j < nlruns; j++) { uint64_t nmalloc, ndalloc, nrequests; size_t run_size, curruns; @@ -178,32 +167,82 @@ stats_arena_lruns_print(void (*write_cb)(void *, const char *), void *cbopaque, uint64_t); CTL_IJ_GET("stats.arenas.0.lruns.0.nrequests", &nrequests, uint64_t); - if (nrequests == 0) { - if (gap_start == -1) - gap_start = j; - } else { + if (nrequests == 0) + in_gap = true; + else { CTL_J_GET("arenas.lrun.0.size", &run_size, size_t); CTL_IJ_GET("stats.arenas.0.lruns.0.curruns", &curruns, size_t); - if (gap_start != -1) { - malloc_cprintf(write_cb, cbopaque, "[%zu]\n", - j - gap_start); - gap_start = -1; + if (in_gap) { + malloc_cprintf(write_cb, cbopaque, + " ---\n"); + in_gap = false; } malloc_cprintf(write_cb, cbopaque, - "%13zu %5zu %12"PRIu64" %12"PRIu64" %12"PRIu64 + "%20zu %3u %12zu %12"PRIu64" %12"PRIu64" %12"PRIu64 " %12zu\n", - run_size, run_size / page, nmalloc, ndalloc, - nrequests, curruns); + run_size, nbins + j, curruns * run_size, nmalloc, + ndalloc, nrequests, curruns); } } - if (gap_start != -1) - malloc_cprintf(write_cb, cbopaque, "[%zu]\n", j - gap_start); + if (in_gap) { + malloc_cprintf(write_cb, cbopaque, + " ---\n"); + } +} + +static void +stats_arena_hchunks_print(void (*write_cb)(void *, const char *), + void *cbopaque, unsigned i) +{ + unsigned nbins, nlruns, nhchunks, j; + bool in_gap; + + malloc_cprintf(write_cb, cbopaque, + "huge: size ind allocated nmalloc ndalloc" + " nrequests curhchunks\n"); + CTL_GET("arenas.nbins", &nbins, unsigned); + CTL_GET("arenas.nlruns", &nlruns, unsigned); + CTL_GET("arenas.nhchunks", &nhchunks, unsigned); + for (j = 0, in_gap = false; j < nhchunks; j++) { + uint64_t nmalloc, ndalloc, nrequests; + size_t hchunk_size, curhchunks; + + CTL_IJ_GET("stats.arenas.0.hchunks.0.nmalloc", &nmalloc, + uint64_t); + CTL_IJ_GET("stats.arenas.0.hchunks.0.ndalloc", &ndalloc, + uint64_t); + CTL_IJ_GET("stats.arenas.0.hchunks.0.nrequests", &nrequests, + uint64_t); + if (nrequests == 0) + in_gap = true; + else { + CTL_J_GET("arenas.hchunk.0.size", &hchunk_size, + size_t); + CTL_IJ_GET("stats.arenas.0.hchunks.0.curhchunks", + &curhchunks, size_t); + if (in_gap) { + malloc_cprintf(write_cb, cbopaque, + " ---\n"); + in_gap = false; + } + malloc_cprintf(write_cb, cbopaque, + "%20zu %3u %12zu %12"PRIu64" %12"PRIu64" %12"PRIu64 + " %12zu\n", + hchunk_size, nbins + nlruns + j, + curhchunks * hchunk_size, nmalloc, ndalloc, + nrequests, curhchunks); + } + } + if (in_gap) { + malloc_cprintf(write_cb, cbopaque, + " ---\n"); + } } static void stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, - unsigned i, bool bins, bool large) + unsigned i, bool bins, bool large, bool huge) { unsigned nthreads; const char *dss; @@ -236,42 +275,51 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, nmadvise, nmadvise == 1 ? "" : "s", purged); malloc_cprintf(write_cb, cbopaque, - " allocated nmalloc ndalloc nrequests\n"); + " allocated nmalloc ndalloc" + " nrequests\n"); CTL_I_GET("stats.arenas.0.small.allocated", &small_allocated, size_t); CTL_I_GET("stats.arenas.0.small.nmalloc", &small_nmalloc, uint64_t); CTL_I_GET("stats.arenas.0.small.ndalloc", &small_ndalloc, uint64_t); CTL_I_GET("stats.arenas.0.small.nrequests", &small_nrequests, uint64_t); malloc_cprintf(write_cb, cbopaque, - "small: %12zu %12"PRIu64" %12"PRIu64" %12"PRIu64"\n", + "small: %12zu %12"PRIu64" %12"PRIu64" %12"PRIu64 + "\n", small_allocated, small_nmalloc, small_ndalloc, small_nrequests); CTL_I_GET("stats.arenas.0.large.allocated", &large_allocated, size_t); CTL_I_GET("stats.arenas.0.large.nmalloc", &large_nmalloc, uint64_t); CTL_I_GET("stats.arenas.0.large.ndalloc", &large_ndalloc, uint64_t); CTL_I_GET("stats.arenas.0.large.nrequests", &large_nrequests, uint64_t); malloc_cprintf(write_cb, cbopaque, - "large: %12zu %12"PRIu64" %12"PRIu64" %12"PRIu64"\n", + "large: %12zu %12"PRIu64" %12"PRIu64" %12"PRIu64 + "\n", large_allocated, large_nmalloc, large_ndalloc, large_nrequests); CTL_I_GET("stats.arenas.0.huge.allocated", &huge_allocated, size_t); CTL_I_GET("stats.arenas.0.huge.nmalloc", &huge_nmalloc, uint64_t); CTL_I_GET("stats.arenas.0.huge.ndalloc", &huge_ndalloc, uint64_t); CTL_I_GET("stats.arenas.0.huge.nrequests", &huge_nrequests, uint64_t); malloc_cprintf(write_cb, cbopaque, - "huge: %12zu %12"PRIu64" %12"PRIu64" %12"PRIu64"\n", + "huge: %12zu %12"PRIu64" %12"PRIu64" %12"PRIu64 + "\n", huge_allocated, huge_nmalloc, huge_ndalloc, huge_nrequests); malloc_cprintf(write_cb, cbopaque, - "total: %12zu %12"PRIu64" %12"PRIu64" %12"PRIu64"\n", + "total: %12zu %12"PRIu64" %12"PRIu64" %12"PRIu64 + "\n", small_allocated + large_allocated + huge_allocated, small_nmalloc + large_nmalloc + huge_nmalloc, small_ndalloc + large_ndalloc + huge_ndalloc, small_nrequests + large_nrequests + huge_nrequests); - malloc_cprintf(write_cb, cbopaque, "active: %12zu\n", pactive * page); + malloc_cprintf(write_cb, cbopaque, "active: %12zu\n", + pactive * page); CTL_I_GET("stats.arenas.0.mapped", &mapped, size_t); - malloc_cprintf(write_cb, cbopaque, "mapped: %12zu\n", mapped); + malloc_cprintf(write_cb, cbopaque, "mapped: %12zu\n", + mapped); if (bins) stats_arena_bins_print(write_cb, cbopaque, i); if (large) stats_arena_lruns_print(write_cb, cbopaque, i); + if (huge) + stats_arena_hchunks_print(write_cb, cbopaque, i); } void @@ -286,6 +334,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, bool unmerged = true; bool bins = true; bool large = true; + bool huge = true; /* * Refresh stats, in case mallctl() was called by the application. @@ -328,6 +377,9 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, case 'l': large = false; break; + case 'h': + huge = false; + break; default:; } } @@ -515,7 +567,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, malloc_cprintf(write_cb, cbopaque, "\nMerged arenas stats:\n"); stats_arena_print(write_cb, cbopaque, - narenas, bins, large); + narenas, bins, large, huge); } } } @@ -541,7 +593,8 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, cbopaque, "\narenas[%u]:\n", i); stats_arena_print(write_cb, - cbopaque, i, bins, large); + cbopaque, i, bins, large, + huge); } } } diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c index a8f7aed6..028a9710 100644 --- a/test/unit/mallctl.c +++ b/test/unit/mallctl.c @@ -321,7 +321,8 @@ TEST_BEGIN(test_arenas_constants) TEST_ARENAS_CONSTANT(size_t, quantum, QUANTUM); TEST_ARENAS_CONSTANT(size_t, page, PAGE); TEST_ARENAS_CONSTANT(unsigned, nbins, NBINS); - TEST_ARENAS_CONSTANT(size_t, nlruns, nlclasses); + TEST_ARENAS_CONSTANT(unsigned, nlruns, nlclasses); + TEST_ARENAS_CONSTANT(unsigned, nhchunks, nhclasses); #undef TEST_ARENAS_CONSTANT } @@ -363,6 +364,23 @@ TEST_BEGIN(test_arenas_lrun_constants) } TEST_END +TEST_BEGIN(test_arenas_hchunk_constants) +{ + +#define TEST_ARENAS_HCHUNK_CONSTANT(t, name, expected) do { \ + t name; \ + size_t sz = sizeof(t); \ + assert_d_eq(mallctl("arenas.hchunk.0."#name, &name, &sz, NULL, \ + 0), 0, "Unexpected mallctl() failure"); \ + assert_zu_eq(name, expected, "Incorrect "#name" size"); \ +} while (0) + + TEST_ARENAS_HCHUNK_CONSTANT(size_t, size, chunksize); + +#undef TEST_ARENAS_HCHUNK_CONSTANT +} +TEST_END + TEST_BEGIN(test_arenas_extend) { unsigned narenas_before, arena, narenas_after; @@ -420,6 +438,7 @@ main(void) test_arenas_constants, test_arenas_bin_constants, test_arenas_lrun_constants, + test_arenas_hchunk_constants, test_arenas_extend, test_stats_arenas)); } diff --git a/test/unit/stats.c b/test/unit/stats.c index 78c78cd5..fd92d542 100644 --- a/test/unit/stats.c +++ b/test/unit/stats.c @@ -97,7 +97,7 @@ TEST_END TEST_BEGIN(test_stats_arenas_summary) { unsigned arena; - void *little, *large; + void *little, *large, *huge; uint64_t epoch; size_t sz; int expected = config_stats ? 0 : ENOENT; @@ -112,6 +112,8 @@ TEST_BEGIN(test_stats_arenas_summary) assert_ptr_not_null(little, "Unexpected mallocx() failure"); large = mallocx(arena_maxclass, 0); assert_ptr_not_null(large, "Unexpected mallocx() failure"); + huge = mallocx(chunksize, 0); + assert_ptr_not_null(huge, "Unexpected mallocx() failure"); assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0, "Unexpected mallctl() failure"); @@ -139,6 +141,7 @@ TEST_BEGIN(test_stats_arenas_summary) dallocx(little, 0); dallocx(large, 0); + dallocx(huge, 0); } TEST_END @@ -251,11 +254,51 @@ TEST_BEGIN(test_stats_arenas_large) } TEST_END +TEST_BEGIN(test_stats_arenas_huge) +{ + unsigned arena; + void *p; + size_t sz, allocated; + uint64_t epoch, nmalloc, ndalloc; + int expected = config_stats ? 0 : ENOENT; + + arena = 0; + assert_d_eq(mallctl("thread.arena", NULL, NULL, &arena, sizeof(arena)), + 0, "Unexpected mallctl() failure"); + + p = mallocx(chunksize, 0); + assert_ptr_not_null(p, "Unexpected mallocx() failure"); + + assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)), 0, + "Unexpected mallctl() failure"); + + sz = sizeof(size_t); + assert_d_eq(mallctl("stats.arenas.0.huge.allocated", &allocated, &sz, + NULL, 0), expected, "Unexpected mallctl() result"); + sz = sizeof(uint64_t); + assert_d_eq(mallctl("stats.arenas.0.huge.nmalloc", &nmalloc, &sz, + NULL, 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.huge.ndalloc", &ndalloc, &sz, + NULL, 0), expected, "Unexpected mallctl() result"); + + if (config_stats) { + assert_zu_gt(allocated, 0, + "allocated should be greater than zero"); + assert_zu_gt(nmalloc, 0, + "nmalloc should be greater than zero"); + assert_zu_ge(nmalloc, ndalloc, + "nmalloc should be at least as large as ndalloc"); + } + + dallocx(p, 0); +} +TEST_END + TEST_BEGIN(test_stats_arenas_bins) { unsigned arena; void *p; - size_t sz, allocated, curruns; + size_t sz, curruns, curregs; uint64_t epoch, nmalloc, ndalloc, nrequests, nfills, nflushes; uint64_t nruns, nreruns; int expected = config_stats ? 0 : ENOENT; @@ -273,9 +316,6 @@ TEST_BEGIN(test_stats_arenas_bins) assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)), 0, "Unexpected mallctl() failure"); - sz = sizeof(size_t); - assert_d_eq(mallctl("stats.arenas.0.bins.0.allocated", &allocated, &sz, - NULL, 0), expected, "Unexpected mallctl() result"); sz = sizeof(uint64_t); assert_d_eq(mallctl("stats.arenas.0.bins.0.nmalloc", &nmalloc, &sz, NULL, 0), expected, "Unexpected mallctl() result"); @@ -283,6 +323,9 @@ TEST_BEGIN(test_stats_arenas_bins) NULL, 0), expected, "Unexpected mallctl() result"); assert_d_eq(mallctl("stats.arenas.0.bins.0.nrequests", &nrequests, &sz, NULL, 0), expected, "Unexpected mallctl() result"); + sz = sizeof(size_t); + assert_d_eq(mallctl("stats.arenas.0.bins.0.curregs", &curregs, &sz, + NULL, 0), expected, "Unexpected mallctl() result"); assert_d_eq(mallctl("stats.arenas.0.bins.0.nfills", &nfills, &sz, NULL, 0), config_tcache ? expected : ENOENT, @@ -300,14 +343,14 @@ TEST_BEGIN(test_stats_arenas_bins) NULL, 0), expected, "Unexpected mallctl() result"); if (config_stats) { - assert_zu_gt(allocated, 0, - "allocated should be greater than zero"); assert_u64_gt(nmalloc, 0, "nmalloc should be greater than zero"); assert_u64_ge(nmalloc, ndalloc, "nmalloc should be at least as large as ndalloc"); assert_u64_gt(nrequests, 0, "nrequests should be greater than zero"); + assert_zu_gt(curregs, 0, + "allocated should be greater than zero"); if (config_tcache) { assert_u64_gt(nfills, 0, "At least one fill should have occurred"); @@ -336,7 +379,7 @@ TEST_BEGIN(test_stats_arenas_lruns) assert_d_eq(mallctl("thread.arena", NULL, NULL, &arena, sizeof(arena)), 0, "Unexpected mallctl() failure"); - p = mallocx(SMALL_MAXCLASS+1, 0); + p = mallocx(LARGE_MINCLASS, 0); assert_ptr_not_null(p, "Unexpected mallocx() failure"); assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)), 0, @@ -368,6 +411,46 @@ TEST_BEGIN(test_stats_arenas_lruns) } TEST_END +TEST_BEGIN(test_stats_arenas_hchunks) +{ + unsigned arena; + void *p; + uint64_t epoch, nmalloc, ndalloc; + size_t curhchunks, sz; + int expected = config_stats ? 0 : ENOENT; + + arena = 0; + assert_d_eq(mallctl("thread.arena", NULL, NULL, &arena, sizeof(arena)), + 0, "Unexpected mallctl() failure"); + + p = mallocx(chunksize, 0); + assert_ptr_not_null(p, "Unexpected mallocx() failure"); + + assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)), 0, + "Unexpected mallctl() failure"); + + sz = sizeof(uint64_t); + assert_d_eq(mallctl("stats.arenas.0.hchunks.0.nmalloc", &nmalloc, &sz, + NULL, 0), expected, "Unexpected mallctl() result"); + assert_d_eq(mallctl("stats.arenas.0.hchunks.0.ndalloc", &ndalloc, &sz, + NULL, 0), expected, "Unexpected mallctl() result"); + sz = sizeof(size_t); + assert_d_eq(mallctl("stats.arenas.0.hchunks.0.curhchunks", &curhchunks, + &sz, NULL, 0), expected, "Unexpected mallctl() result"); + + if (config_stats) { + assert_u64_gt(nmalloc, 0, + "nmalloc should be greater than zero"); + assert_u64_ge(nmalloc, ndalloc, + "nmalloc should be at least as large as ndalloc"); + assert_u64_gt(curhchunks, 0, + "At least one chunk should be currently allocated"); + } + + dallocx(p, 0); +} +TEST_END + int main(void) { @@ -379,6 +462,8 @@ main(void) test_stats_arenas_summary, test_stats_arenas_small, test_stats_arenas_large, + test_stats_arenas_huge, test_stats_arenas_bins, - test_stats_arenas_lruns)); + test_stats_arenas_lruns, + test_stats_arenas_hchunks)); } From 0cdabd2d489133e3cea8a00bdb9a986b24e57a66 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 14 Oct 2014 22:19:21 -0700 Subject: [PATCH 0572/3378] Update size class documentation. --- doc/jemalloc.xml.in | 110 +++++++++++++++++++++++++++++++++----------- 1 file changed, 84 insertions(+), 26 deletions(-) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 8111fc1d..fc01ad1b 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -501,13 +501,11 @@ for (i = 0; i < nbins; i++) { possible to find metadata for user objects very quickly. User objects are broken into three categories according to size: - small, large, and huge. Small objects are smaller than one page. Large - objects are smaller than the chunk size. Huge objects are a multiple of - the chunk size. Small and large objects are managed entirely by arenas; - huge objects are additionally aggregated in a single data structure that is - shared by all threads. Huge objects are typically used by applications - infrequently enough that this single data structure is not a scalability - issue. + small, large, and huge. Small and large objects are managed entirely by + arenas; huge objects are additionally aggregated in a single data structure + that is shared by all threads. Huge objects are typically used by + applications infrequently enough that this single data structure is not a + scalability issue. Each chunk that is managed by an arena tracks its contents as runs of contiguous pages (unused, backing a set of small objects, or backing one @@ -516,18 +514,18 @@ for (i = 0; i < nbins; i++) { allocations in constant time. Small objects are managed in groups by page runs. Each run maintains - a frontier and free list to track which regions are in use. Allocation - requests that are no more than half the quantum (8 or 16, depending on - architecture) are rounded up to the nearest power of two that is at least - sizeof(double). All other small - object size classes are multiples of the quantum, spaced such that internal - fragmentation is limited to approximately 25% for all but the smallest size - classes. Allocation requests that are larger than the maximum small size - class, but small enough to fit in an arena-managed chunk (see the opt.lg_chunk option), are - rounded up to the nearest run size. Allocation requests that are too large - to fit in an arena-managed chunk are rounded up to the nearest multiple of - the chunk size. + a bitmap to track which regions are in use. Allocation requests that are no + more than half the quantum (8 or 16, depending on architecture) are rounded + up to the nearest power of two that is at least sizeof(double). All other object size + classes are multiples of the quantum, spaced such that there are four size + classes for each doubling in size, which limits internal fragmentation to + approximately 20% for all but the smallest size classes. Small size classes + are smaller than four times the page size, large size classes are smaller + than the chunk size (see the opt.lg_chunk option), and + huge size classes extend from the chunk size up to one size class less than + the full address space size. Allocations are packed tightly together, which can be an issue for multi-threaded applications. If you need to assure that allocations do not @@ -554,13 +552,13 @@ for (i = 0; i < nbins; i++) {
- Small + Small lg [8] 16 - [16, 32, 48, ..., 128] + [16, 32, 48, 64, 80, 96, 112, 128] 32 @@ -580,17 +578,77 @@ for (i = 0; i < nbins; i++) { 512 - [2560, 3072, 3584] + [2560, 3072, 3584, 4096] + + + 1 KiB + [5 KiB, 6 KiB, 7 KiB, 8 KiB] + + + 2 KiB + [10 KiB, 12 KiB, 14 KiB] + + + Large + 2 KiB + [16 KiB] - Large 4 KiB - [4 KiB, 8 KiB, 12 KiB, ..., 4072 KiB] + [20 KiB, 24 KiB, 28 KiB, 32 KiB] + + + 8 KiB + [40 KiB, 48 KiB, 54 KiB, 64 KiB] + + + 16 KiB + [80 KiB, 96 KiB, 112 KiB, 128 KiB] + + + 32 KiB + [160 KiB, 192 KiB, 224 KiB, 256 KiB] + + + 64 KiB + [320 KiB, 384 KiB, 448 KiB, 512 KiB] + + + 128 KiB + [640 KiB, 768 KiB, 896 KiB, 1024 KiB] + + + 256 KiB + [1280 KiB, 1536 KiB, 1792 KiB, 2048 KiB] + + + 512 KiB + [2560 KiB, 3072 KiB, 3584 KiB] + + + Huge + 512 KiB + [4 MiB] + + + 1 MiB + [5 MiB, 6 MiB, 7 MiB, 8 MiB] + + + 2 MiB + [10 MiB, 12 MiB, 14 MiB, 16 MiB] - Huge 4 MiB - [4 MiB, 8 MiB, 12 MiB, ...] + [20 MiB, 24 MiB, 28 MiB, 32 MiB] + + + 8 MiB + [40 MiB, 48 MiB, 56 MiB, 64 MiB] + + + ... + ... From 9b41ac909facf4f09bb1b637b78ba647348e572e Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 14 Oct 2014 22:20:00 -0700 Subject: [PATCH 0573/3378] Fix huge allocation statistics. --- doc/jemalloc.xml.in | 5 +- include/jemalloc/internal/arena.h | 10 +- include/jemalloc/internal/private_symbols.txt | 3 + src/arena.c | 303 +++++++++++++----- src/huge.c | 95 ++---- 5 files changed, 254 insertions(+), 162 deletions(-) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index fc01ad1b..71b4cd19 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -1719,9 +1719,8 @@ malloc_conf = "xmalloc:true";]]> Pointer to a counter that contains an approximate count of the current number of bytes in active pages. The estimate may be - high, but never low, because each arena rounds up to the nearest - multiple of the chunk size when computing its contribution to the - counter. Note that the epoch mallctl has no bearing on this counter. Furthermore, counter consistency is maintained via atomic operations, so it is necessary to use an atomic operation in diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index c31c8d7d..16c04d25 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -338,9 +338,15 @@ extern size_t arena_maxclass; /* Max size class for arenas. */ extern unsigned nlclasses; /* Number of large size classes. */ extern unsigned nhclasses; /* Number of huge size classes. */ -void *arena_chunk_alloc_huge(arena_t *arena, void *new_addr, size_t usize, - size_t alignment, bool *zero); +void *arena_chunk_alloc_huge(arena_t *arena, size_t usize, size_t alignment, + bool *zero); void arena_chunk_dalloc_huge(arena_t *arena, void *chunk, size_t usize); +void arena_chunk_ralloc_huge_similar(arena_t *arena, void *chunk, + size_t oldsize, size_t usize); +void arena_chunk_ralloc_huge_shrink(arena_t *arena, void *chunk, + size_t oldsize, size_t usize); +bool arena_chunk_ralloc_huge_expand(arena_t *arena, void *chunk, + size_t oldsize, size_t usize, bool *zero); void arena_purge_all(arena_t *arena); void arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, index_t binind, uint64_t prof_accumbytes); diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index 66d48221..8eec874f 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -13,6 +13,9 @@ arena_choose arena_choose_hard arena_chunk_alloc_huge arena_chunk_dalloc_huge +arena_chunk_ralloc_huge_expand +arena_chunk_ralloc_huge_shrink +arena_chunk_ralloc_huge_similar arena_cleanup arena_dalloc arena_dalloc_bin diff --git a/src/arena.c b/src/arena.c index 74c36323..586e3c76 100644 --- a/src/arena.c +++ b/src/arena.c @@ -411,52 +411,6 @@ arena_chunk_alloc_internal(arena_t *arena, size_t size, size_t alignment, return (chunk); } -void * -arena_chunk_alloc_huge(arena_t *arena, void *new_addr, size_t usize, - size_t alignment, bool *zero) -{ - void *ret; - chunk_alloc_t *chunk_alloc; - chunk_dalloc_t *chunk_dalloc; - - malloc_mutex_lock(&arena->lock); - chunk_alloc = arena->chunk_alloc; - chunk_dalloc = arena->chunk_dalloc; - if (config_stats) { - index_t index = size2index(usize) - nlclasses - NBINS; - - /* Optimistically update stats prior to unlocking. */ - arena->stats.allocated_huge += usize; - arena->stats.nmalloc_huge++; - arena->stats.hstats[index].nmalloc++; - arena->stats.hstats[index].curhchunks++; - arena->stats.mapped += usize; - } - arena->nactive += (usize >> LG_PAGE); - malloc_mutex_unlock(&arena->lock); - - ret = chunk_alloc_arena(chunk_alloc, chunk_dalloc, arena->ind, - new_addr, usize, alignment, zero); - if (config_stats) { - if (ret != NULL) - stats_cactive_add(usize); - else { - index_t index = size2index(usize) - nlclasses - NBINS; - - malloc_mutex_lock(&arena->lock); - /* Revert optimistic stats updates. */ - arena->stats.allocated_huge -= usize; - arena->stats.nmalloc_huge--; - arena->stats.hstats[index].nmalloc--; - arena->stats.hstats[index].curhchunks--; - arena->stats.mapped -= usize; - malloc_mutex_unlock(&arena->lock); - } - } - - return (ret); -} - static arena_chunk_t * arena_chunk_init_hard(arena_t *arena) { @@ -528,41 +482,6 @@ arena_chunk_alloc(arena_t *arena) return (chunk); } -static void -arena_chunk_dalloc_internal(arena_t *arena, arena_chunk_t *chunk) -{ - chunk_dalloc_t *chunk_dalloc; - - chunk_dalloc = arena->chunk_dalloc; - malloc_mutex_unlock(&arena->lock); - chunk_dalloc((void *)chunk, chunksize, arena->ind); - malloc_mutex_lock(&arena->lock); - if (config_stats) - arena->stats.mapped -= chunksize; -} - -void -arena_chunk_dalloc_huge(arena_t *arena, void *chunk, size_t usize) -{ - chunk_dalloc_t *chunk_dalloc; - - malloc_mutex_lock(&arena->lock); - chunk_dalloc = arena->chunk_dalloc; - if (config_stats) { - index_t index = size2index(usize) - nlclasses - NBINS; - - arena->stats.ndalloc_huge++; - arena->stats.allocated_huge -= usize; - arena->stats.hstats[index].ndalloc++; - arena->stats.hstats[index].curhchunks--; - arena->stats.mapped -= usize; - stats_cactive_sub(usize); - } - arena->nactive -= (usize >> LG_PAGE); - malloc_mutex_unlock(&arena->lock); - chunk_dalloc(chunk, usize, arena->ind); -} - static void arena_chunk_dalloc(arena_t *arena, arena_chunk_t *chunk) { @@ -584,17 +503,237 @@ arena_chunk_dalloc(arena_t *arena, arena_chunk_t *chunk) if (arena->spare != NULL) { arena_chunk_t *spare = arena->spare; + chunk_dalloc_t *chunk_dalloc; arena->spare = chunk; if (arena_mapbits_dirty_get(spare, map_bias) != 0) { arena_dirty_remove(arena, spare, map_bias, chunk_npages-map_bias); } - arena_chunk_dalloc_internal(arena, spare); + chunk_dalloc = arena->chunk_dalloc; + malloc_mutex_unlock(&arena->lock); + chunk_dalloc((void *)spare, chunksize, arena->ind); + malloc_mutex_lock(&arena->lock); + if (config_stats) + arena->stats.mapped -= chunksize; } else arena->spare = chunk; } +static void +arena_huge_malloc_stats_update(arena_t *arena, size_t usize) +{ + index_t index = size2index(usize) - nlclasses - NBINS; + + cassert(config_stats); + + arena->stats.nmalloc_huge++; + arena->stats.allocated_huge += usize; + arena->stats.hstats[index].nmalloc++; + arena->stats.hstats[index].curhchunks++; +} + +static void +arena_huge_malloc_stats_update_undo(arena_t *arena, size_t usize) +{ + index_t index = size2index(usize) - nlclasses - NBINS; + + cassert(config_stats); + + arena->stats.nmalloc_huge--; + arena->stats.allocated_huge -= usize; + arena->stats.hstats[index].nmalloc--; + arena->stats.hstats[index].curhchunks--; +} + +static void +arena_huge_dalloc_stats_update(arena_t *arena, size_t usize) +{ + index_t index = size2index(usize) - nlclasses - NBINS; + + cassert(config_stats); + + arena->stats.ndalloc_huge++; + arena->stats.allocated_huge -= usize; + arena->stats.hstats[index].ndalloc++; + arena->stats.hstats[index].curhchunks--; +} + +static void +arena_huge_dalloc_stats_update_undo(arena_t *arena, size_t usize) +{ + index_t index = size2index(usize) - nlclasses - NBINS; + + cassert(config_stats); + + arena->stats.ndalloc_huge--; + arena->stats.allocated_huge += usize; + arena->stats.hstats[index].ndalloc--; + arena->stats.hstats[index].curhchunks++; +} + +static void +arena_huge_ralloc_stats_update(arena_t *arena, size_t oldsize, size_t usize) +{ + + arena_huge_dalloc_stats_update(arena, oldsize); + arena_huge_malloc_stats_update(arena, usize); +} + +static void +arena_huge_ralloc_stats_update_undo(arena_t *arena, size_t oldsize, + size_t usize) +{ + + arena_huge_dalloc_stats_update_undo(arena, oldsize); + arena_huge_malloc_stats_update_undo(arena, usize); +} + +void * +arena_chunk_alloc_huge(arena_t *arena, size_t usize, size_t alignment, + bool *zero) +{ + void *ret; + chunk_alloc_t *chunk_alloc; + chunk_dalloc_t *chunk_dalloc; + size_t csize = CHUNK_CEILING(usize); + + malloc_mutex_lock(&arena->lock); + chunk_alloc = arena->chunk_alloc; + chunk_dalloc = arena->chunk_dalloc; + if (config_stats) { + /* Optimistically update stats prior to unlocking. */ + arena_huge_malloc_stats_update(arena, usize); + arena->stats.mapped += usize; + } + arena->nactive += (usize >> LG_PAGE); + malloc_mutex_unlock(&arena->lock); + + ret = chunk_alloc_arena(chunk_alloc, chunk_dalloc, arena->ind, NULL, + csize, alignment, zero); + if (ret == NULL) { + /* Revert optimistic stats updates. */ + malloc_mutex_lock(&arena->lock); + if (config_stats) { + arena_huge_malloc_stats_update_undo(arena, usize); + arena->stats.mapped -= usize; + } + arena->nactive -= (usize >> LG_PAGE); + malloc_mutex_unlock(&arena->lock); + return (NULL); + } + + if (config_stats) + stats_cactive_add(usize); + + return (ret); +} + +void +arena_chunk_dalloc_huge(arena_t *arena, void *chunk, size_t usize) +{ + chunk_dalloc_t *chunk_dalloc; + + malloc_mutex_lock(&arena->lock); + chunk_dalloc = arena->chunk_dalloc; + if (config_stats) { + arena_huge_dalloc_stats_update(arena, usize); + arena->stats.mapped -= usize; + stats_cactive_sub(usize); + } + arena->nactive -= (usize >> LG_PAGE); + malloc_mutex_unlock(&arena->lock); + chunk_dalloc(chunk, CHUNK_CEILING(usize), arena->ind); +} + +void +arena_chunk_ralloc_huge_similar(arena_t *arena, void *chunk, size_t oldsize, + size_t usize) +{ + + assert(CHUNK_CEILING(oldsize) == CHUNK_CEILING(usize)); + assert(oldsize != usize); + + malloc_mutex_lock(&arena->lock); + if (config_stats) + arena_huge_ralloc_stats_update(arena, oldsize, usize); + if (oldsize < usize) { + size_t udiff = usize - oldsize; + arena->nactive += udiff >> LG_PAGE; + if (config_stats) + stats_cactive_add(udiff); + } else { + size_t udiff = oldsize - usize; + arena->nactive -= udiff >> LG_PAGE; + if (config_stats) + stats_cactive_sub(udiff); + } + malloc_mutex_unlock(&arena->lock); +} + +void +arena_chunk_ralloc_huge_shrink(arena_t *arena, void *chunk, size_t oldsize, + size_t usize) +{ + chunk_dalloc_t *chunk_dalloc; + size_t udiff = oldsize - usize; + size_t cdiff = CHUNK_CEILING(oldsize) - CHUNK_CEILING(usize); + + malloc_mutex_lock(&arena->lock); + chunk_dalloc = arena->chunk_dalloc; + if (config_stats) { + arena_huge_ralloc_stats_update(arena, oldsize, usize); + if (cdiff != 0) { + arena->stats.mapped -= cdiff; + stats_cactive_sub(udiff); + } + } + arena->nactive -= udiff >> LG_PAGE; + malloc_mutex_unlock(&arena->lock); + if (cdiff != 0) + chunk_dalloc(chunk + CHUNK_CEILING(usize), cdiff, arena->ind); +} + +bool +arena_chunk_ralloc_huge_expand(arena_t *arena, void *chunk, size_t oldsize, + size_t usize, bool *zero) +{ + chunk_alloc_t *chunk_alloc; + chunk_dalloc_t *chunk_dalloc; + size_t udiff = usize - oldsize; + size_t cdiff = CHUNK_CEILING(usize) - CHUNK_CEILING(oldsize); + + malloc_mutex_lock(&arena->lock); + chunk_alloc = arena->chunk_alloc; + chunk_dalloc = arena->chunk_dalloc; + if (config_stats) { + /* Optimistically update stats prior to unlocking. */ + arena_huge_ralloc_stats_update(arena, oldsize, usize); + arena->stats.mapped += cdiff; + } + arena->nactive += (udiff >> LG_PAGE); + malloc_mutex_unlock(&arena->lock); + + if (chunk_alloc_arena(chunk_alloc, chunk_dalloc, arena->ind, chunk + + CHUNK_CEILING(oldsize), cdiff, chunksize, zero) == NULL) { + /* Revert optimistic stats updates. */ + malloc_mutex_lock(&arena->lock); + if (config_stats) { + arena_huge_ralloc_stats_update_undo(arena, + oldsize, usize); + arena->stats.mapped -= cdiff; + } + arena->nactive -= (udiff >> LG_PAGE); + malloc_mutex_unlock(&arena->lock); + return (true); + } + + if (config_stats) + stats_cactive_add(udiff); + + return (false); +} + static arena_run_t * arena_run_alloc_large_helper(arena_t *arena, size_t size, bool zero) { diff --git a/src/huge.c b/src/huge.c index 5f46241d..740a93fc 100644 --- a/src/huge.c +++ b/src/huge.c @@ -31,15 +31,11 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, bool zero, bool try_tcache) { void *ret; - size_t csize; extent_node_t *node; bool is_zeroed; /* Allocate one or more contiguous chunks for this request. */ - csize = CHUNK_CEILING(usize); - assert(csize >= usize); - /* Allocate an extent node with which to track the chunk. */ node = ipalloct(tsd, CACHELINE_CEILING(sizeof(extent_node_t)), CACHELINE, false, try_tcache, NULL); @@ -56,7 +52,7 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, base_node_dalloc(node); return (NULL); } - ret = arena_chunk_alloc_huge(arena, NULL, csize, alignment, &is_zeroed); + ret = arena_chunk_alloc_huge(arena, usize, alignment, &is_zeroed); if (ret == NULL) { idalloct(tsd, node, try_tcache); return (NULL); @@ -104,25 +100,6 @@ huge_dalloc_junk(void *ptr, size_t usize) huge_dalloc_junk_t *huge_dalloc_junk = JEMALLOC_N(huge_dalloc_junk_impl); #endif -static void -huge_ralloc_no_move_stats_update(arena_t *arena, size_t oldsize, size_t usize) -{ - index_t oldindex = size2index(oldsize) - nlclasses - NBINS; - index_t index = size2index(usize) - nlclasses - NBINS; - - cassert(config_stats); - - arena->stats.ndalloc_huge++; - arena->stats.allocated_huge -= oldsize; - arena->stats.hstats[oldindex].ndalloc++; - arena->stats.hstats[oldindex].curhchunks--; - - arena->stats.nmalloc_huge++; - arena->stats.allocated_huge += usize; - arena->stats.hstats[index].nmalloc++; - arena->stats.hstats[index].curhchunks++; -} - static void huge_ralloc_no_move_similar(void *ptr, size_t oldsize, size_t usize, size_t size, size_t extra, bool zero) @@ -135,34 +112,33 @@ huge_ralloc_no_move_similar(void *ptr, size_t oldsize, size_t usize, while (usize < s2u(size+extra) && (usize_next = s2u(usize+1)) < oldsize) usize = usize_next; - malloc_mutex_lock(&huge_mtx); + if (oldsize == usize) + return; + malloc_mutex_lock(&huge_mtx); key.addr = ptr; node = extent_tree_ad_search(&huge, &key); assert(node != NULL); assert(node->addr == ptr); - arena = node->arena; - - /* Update the size of the huge allocation if it changed. */ - if (oldsize != usize) { - assert(node->size != usize); - node->size = usize; - } - + /* Update the size of the huge allocation. */ + assert(node->size != usize); + node->size = usize; malloc_mutex_unlock(&huge_mtx); - /* Fill if necessary. */ + /* Fill if necessary (shrinking). */ + if (config_fill && unlikely(opt_junk) && oldsize > usize) + memset(ptr + usize, 0x5a, oldsize - usize); + + arena_chunk_ralloc_huge_similar(arena, ptr, oldsize, usize); + + /* Fill if necessary (growing). */ if (oldsize < usize) { if (zero || (config_fill && unlikely(opt_zero))) memset(ptr + oldsize, 0, usize - oldsize); else if (config_fill && unlikely(opt_junk)) memset(ptr + oldsize, 0xa5, usize - oldsize); - } else if (config_fill && unlikely(opt_junk) && oldsize > usize) - memset(ptr + usize, 0x5a, oldsize - usize); - - if (config_stats) - huge_ralloc_no_move_stats_update(arena, oldsize, usize); + } } static void @@ -170,44 +146,28 @@ huge_ralloc_no_move_shrink(void *ptr, size_t oldsize, size_t usize) { extent_node_t *node, key; arena_t *arena; - void *excess_addr; - size_t excess_size; malloc_mutex_lock(&huge_mtx); - key.addr = ptr; node = extent_tree_ad_search(&huge, &key); assert(node != NULL); assert(node->addr == ptr); - arena = node->arena; - /* Update the size of the huge allocation. */ node->size = usize; - malloc_mutex_unlock(&huge_mtx); - excess_addr = node->addr + CHUNK_CEILING(usize); - excess_size = CHUNK_CEILING(oldsize) - CHUNK_CEILING(usize); - /* Zap the excess chunks. */ huge_dalloc_junk(ptr + usize, oldsize - usize); - if (excess_size > 0) - arena_chunk_dalloc_huge(arena, excess_addr, excess_size); - - if (config_stats) - huge_ralloc_no_move_stats_update(arena, oldsize, usize); + arena_chunk_ralloc_huge_shrink(arena, ptr, oldsize, usize); } static bool huge_ralloc_no_move_expand(void *ptr, size_t oldsize, size_t size, bool zero) { size_t usize; - void *expand_addr; - size_t expand_size; extent_node_t *node, key; arena_t *arena; bool is_zeroed; - void *ret; usize = s2u(size); if (usize == 0) { @@ -215,19 +175,12 @@ huge_ralloc_no_move_expand(void *ptr, size_t oldsize, size_t size, bool zero) { return (true); } - expand_addr = ptr + CHUNK_CEILING(oldsize); - expand_size = CHUNK_CEILING(usize) - CHUNK_CEILING(oldsize); - assert(expand_size > 0); - malloc_mutex_lock(&huge_mtx); - key.addr = ptr; node = extent_tree_ad_search(&huge, &key); assert(node != NULL); assert(node->addr == ptr); - arena = node->arena; - malloc_mutex_unlock(&huge_mtx); /* @@ -235,12 +188,10 @@ huge_ralloc_no_move_expand(void *ptr, size_t oldsize, size_t size, bool zero) { * it is possible to make correct junk/zero fill decisions below. */ is_zeroed = zero; - ret = arena_chunk_alloc_huge(arena, expand_addr, expand_size, chunksize, - &is_zeroed); - if (ret == NULL) - return (true); - assert(ret == expand_addr); + if (arena_chunk_ralloc_huge_expand(arena, ptr, oldsize, usize, + &is_zeroed)) + return (true); malloc_mutex_lock(&huge_mtx); /* Update the size of the huge allocation. */ @@ -254,9 +205,6 @@ huge_ralloc_no_move_expand(void *ptr, size_t oldsize, size_t size, bool zero) { memset(ptr + oldsize, 0, usize - oldsize); } - if (config_stats) - huge_ralloc_no_move_stats_update(arena, oldsize, usize); - return (false); } @@ -363,19 +311,16 @@ huge_dalloc(tsd_t *tsd, void *ptr, bool try_tcache) extent_node_t *node, key; malloc_mutex_lock(&huge_mtx); - /* Extract from tree of huge allocations. */ key.addr = ptr; node = extent_tree_ad_search(&huge, &key); assert(node != NULL); assert(node->addr == ptr); extent_tree_ad_remove(&huge, node); - malloc_mutex_unlock(&huge_mtx); huge_dalloc_junk(node->addr, node->size); - arena_chunk_dalloc_huge(node->arena, node->addr, - CHUNK_CEILING(node->size)); + arena_chunk_dalloc_huge(node->arena, node->addr, node->size); idalloct(tsd, node, try_tcache); } From d1f3ab4008f95e8928777d28a40aff9708701503 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 14 Oct 2014 22:31:49 -0700 Subject: [PATCH 0574/3378] Fix line wrapping. --- INSTALL | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/INSTALL b/INSTALL index a00960aa..b8459a81 100644 --- a/INSTALL +++ b/INSTALL @@ -191,8 +191,8 @@ any of the following arguments (not a definitive list) to 'configure': --with-lg-page= Specify the base 2 log of the system page size. This option is only useful - when cross compiling, since the configure script automatically determines the - host's page size by default. + when cross compiling, since the configure script automatically determines + the host's page size by default. --with-lg-page-sizes= Specify the comma-separated base 2 logs of the page sizes to support. This @@ -243,16 +243,16 @@ any of the following arguments (not a definitive list) to 'configure': safe values for the most commonly used modern architectures, there is a wrinkle related to GNU libc (glibc) that may impact your choice of . On most modern architectures, this mandates 16-byte alignment - (=4), but the glibc developers chose not to meet this requirement - for performance reasons. An old discussion can be found at + (=4), but the glibc developers chose not to meet this + requirement for performance reasons. An old discussion can be found at https://sourceware.org/bugzilla/show_bug.cgi?id=206 . Unlike glibc, jemalloc does follow the C standard by default (caveat: jemalloc - technically cheats if --with-lg-tiny-min is smaller than --with-lg-quantum), - but the fact that Linux systems already work around this allocator - noncompliance means that it is generally safe in practice to let jemalloc's - minimum alignment follow glibc's lead. If you specify --with-lg-quantum=3 - during configuration, jemalloc will provide additional size classes that - are not 16-byte-aligned (24, 40, and 56, assuming + technically cheats if --with-lg-tiny-min is smaller than + --with-lg-quantum), but the fact that Linux systems already work around + this allocator noncompliance means that it is generally safe in practice to + let jemalloc's minimum alignment follow glibc's lead. If you specify + --with-lg-quantum=3 during configuration, jemalloc will provide additional + size classes that are not 16-byte-aligned (24, 40, and 56, assuming --with-lg-size-class-group=2). --with-lg-tiny-min= From acbcbad1e18d3082ee6ce851994ed03f63ae55bd Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 15 Oct 2014 14:49:14 -0700 Subject: [PATCH 0575/3378] Thwart compiler optimizations. --- test/stress/microbench.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/test/stress/microbench.c b/test/stress/microbench.c index 980eca41..aefbe6a7 100644 --- a/test/stress/microbench.c +++ b/test/stress/microbench.c @@ -114,6 +114,10 @@ malloc_mus_free(void) void *p; p = malloc(1); + if (p == NULL) { + test_fail("Unexpected malloc() failure"); + return; + } malloc_usable_size(p); free(p); } @@ -124,6 +128,10 @@ malloc_sallocx_free(void) void *p; p = malloc(1); + if (p == NULL) { + test_fail("Unexpected malloc() failure"); + return; + } if (sallocx(p, 0) < 1) test_fail("Unexpected sallocx() failure"); free(p); @@ -143,6 +151,10 @@ malloc_nallocx_free(void) void *p; p = malloc(1); + if (p == NULL) { + test_fail("Unexpected malloc() failure"); + return; + } if (nallocx(1, 0) < 1) test_fail("Unexpected nallocx() failure"); free(p); From bf8d6a109200bf10f1c942ad914aa8cb5f279e17 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 15 Oct 2014 16:18:42 -0700 Subject: [PATCH 0576/3378] Add small run utilization to stats output. Add the 'util' column, which reports the proportion of available regions that are currently in use for each small size class. Small run utilization is the complement of external fragmentation. For example, utilization of 0.75 indicates that 25% of small run memory is consumed by external fragmentation, in other (more obtuse) words, 33% external fragmentation overhead. This resolves #27. --- src/stats.c | 50 ++++++++++++++++++++++++++++++++++---------------- 1 file changed, 34 insertions(+), 16 deletions(-) diff --git a/src/stats.c b/src/stats.c index 16a18c50..054f0332 100644 --- a/src/stats.c +++ b/src/stats.c @@ -69,14 +69,14 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, if (config_tcache) { malloc_cprintf(write_cb, cbopaque, "bins: size ind allocated nmalloc" - " ndalloc nrequests curregs regs pgs" - " nfills nflushes newruns reruns" - " curruns\n"); + " ndalloc nrequests curregs curruns regs" + " pgs util nfills nflushes newruns" + " reruns\n"); } else { malloc_cprintf(write_cb, cbopaque, "bins: size ind allocated nmalloc" - " ndalloc nrequests curregs regs pgs" - " newruns reruns curruns\n"); + " ndalloc nrequests curregs curruns regs" + " pgs util newruns reruns\n"); } CTL_GET("arenas.nbins", &nbins, unsigned); for (j = 0, in_gap = false; j < nbins; j++) { @@ -86,11 +86,12 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, if (nruns == 0) in_gap = true; else { - size_t reg_size, run_size, curregs; + size_t reg_size, run_size, curregs, availregs, milli; + size_t curruns; uint32_t nregs; uint64_t nmalloc, ndalloc, nrequests, nfills, nflushes; uint64_t reruns; - size_t curruns; + char util[6]; /* "x.yyy". */ if (in_gap) { malloc_cprintf(write_cb, cbopaque, @@ -118,24 +119,41 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, uint64_t); CTL_IJ_GET("stats.arenas.0.bins.0.curruns", &curruns, size_t); + + availregs = nregs * curruns; + milli = (availregs != 0) ? (1000 * curregs) / availregs + : 1000; + assert(milli <= 1000); + if (milli < 10) { + malloc_snprintf(util, sizeof(util), "0.00%zu", + milli); + } else if (milli < 100) { + malloc_snprintf(util, sizeof(util), "0.0%zu", + milli); + } else if (milli < 1000) { + malloc_snprintf(util, sizeof(util), "0.%zu", + milli); + } else + malloc_snprintf(util, sizeof(util), "1"); + if (config_tcache) { malloc_cprintf(write_cb, cbopaque, "%20zu %3u %12zu %12"PRIu64" %12"PRIu64 - " %12"PRIu64" %12zu %4u %3zu %12"PRIu64 + " %12"PRIu64" %12zu %12zu %4u %3zu %-5s" " %12"PRIu64" %12"PRIu64" %12"PRIu64 - " %12zu\n", + " %12"PRIu64"\n", reg_size, j, curregs * reg_size, nmalloc, - ndalloc, nrequests, curregs, nregs, run_size - / page, nfills, nflushes, nruns, reruns, - curruns); + ndalloc, nrequests, curregs, curruns, nregs, + run_size / page, util, nfills, nflushes, + nruns, reruns); } else { malloc_cprintf(write_cb, cbopaque, "%20zu %3u %12zu %12"PRIu64" %12"PRIu64 - " %12"PRIu64" %12zu %4u %3zu %12"PRIu64 - " %12"PRIu64" %12zu\n", + " %12"PRIu64" %12zu %12zu %4u %3zu %-5s" + " %12"PRIu64" %12"PRIu64"\n", reg_size, j, curregs * reg_size, nmalloc, - ndalloc, nrequests, curregs, nregs, - run_size / page, nruns, reruns, curruns); + ndalloc, nrequests, curregs, curruns, nregs, + run_size / page, util, nruns, reruns); } } } From 9673983443a0782d975fbcb5d8457cfd411b8b56 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 15 Oct 2014 18:02:02 -0700 Subject: [PATCH 0577/3378] Purge/zero sub-chunk huge allocations as necessary. Purge trailing pages during shrinking huge reallocation when resulting size is not a multiple of the chunk size. Similarly, zero pages if necessary during growing huge reallocation when the resulting size is not a multiple of the chunk size. --- src/huge.c | 75 +++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 51 insertions(+), 24 deletions(-) diff --git a/src/huge.c b/src/huge.c index 740a93fc..1734ff6e 100644 --- a/src/huge.c +++ b/src/huge.c @@ -61,18 +61,18 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, /* Insert node into huge. */ node->addr = ret; node->size = usize; + node->zeroed = is_zeroed; node->arena = arena; malloc_mutex_lock(&huge_mtx); extent_tree_ad_insert(&huge, node); malloc_mutex_unlock(&huge_mtx); - if (config_fill && !zero) { - if (unlikely(opt_junk)) - memset(ret, 0xa5, usize); - else if (unlikely(opt_zero) && !is_zeroed) + if (zero || (config_fill && unlikely(opt_zero))) { + if (!is_zeroed) memset(ret, 0, usize); - } + } else if (config_fill && unlikely(opt_junk)) + memset(ret, 0xa5, usize); return (ret); } @@ -105,6 +105,7 @@ huge_ralloc_no_move_similar(void *ptr, size_t oldsize, size_t usize, size_t size, size_t extra, bool zero) { size_t usize_next; + bool zeroed; extent_node_t *node, key; arena_t *arena; @@ -115,6 +116,17 @@ huge_ralloc_no_move_similar(void *ptr, size_t oldsize, size_t usize, if (oldsize == usize) return; + /* Fill if necessary (shrinking). */ + if (oldsize > usize) { + size_t sdiff = CHUNK_CEILING(usize) - usize; + zeroed = (sdiff != 0) ? !pages_purge(ptr + usize, sdiff) : true; + if (config_fill && unlikely(opt_junk)) { + memset(ptr + usize, 0x5a, oldsize - usize); + zeroed = false; + } + } else + zeroed = true; + malloc_mutex_lock(&huge_mtx); key.addr = ptr; node = extent_tree_ad_search(&huge, &key); @@ -124,19 +136,18 @@ huge_ralloc_no_move_similar(void *ptr, size_t oldsize, size_t usize, /* Update the size of the huge allocation. */ assert(node->size != usize); node->size = usize; + /* Clear node->zeroed if zeroing failed above. */ + node->zeroed = (node->zeroed && zeroed); malloc_mutex_unlock(&huge_mtx); - /* Fill if necessary (shrinking). */ - if (config_fill && unlikely(opt_junk) && oldsize > usize) - memset(ptr + usize, 0x5a, oldsize - usize); - arena_chunk_ralloc_huge_similar(arena, ptr, oldsize, usize); /* Fill if necessary (growing). */ if (oldsize < usize) { - if (zero || (config_fill && unlikely(opt_zero))) - memset(ptr + oldsize, 0, usize - oldsize); - else if (config_fill && unlikely(opt_junk)) + if (zero || (config_fill && unlikely(opt_zero))) { + if (!zeroed) + memset(ptr + oldsize, 0, usize - oldsize); + } else if (config_fill && unlikely(opt_junk)) memset(ptr + oldsize, 0xa5, usize - oldsize); } } @@ -144,9 +155,18 @@ huge_ralloc_no_move_similar(void *ptr, size_t oldsize, size_t usize, static void huge_ralloc_no_move_shrink(void *ptr, size_t oldsize, size_t usize) { + size_t sdiff; + bool zeroed; extent_node_t *node, key; arena_t *arena; + sdiff = CHUNK_CEILING(usize) - usize; + zeroed = (sdiff != 0) ? !pages_purge(ptr + usize, sdiff) : true; + if (config_fill && unlikely(opt_junk)) { + huge_dalloc_junk(ptr + usize, oldsize - usize); + zeroed = false; + } + malloc_mutex_lock(&huge_mtx); key.addr = ptr; node = extent_tree_ad_search(&huge, &key); @@ -155,10 +175,11 @@ huge_ralloc_no_move_shrink(void *ptr, size_t oldsize, size_t usize) arena = node->arena; /* Update the size of the huge allocation. */ node->size = usize; + /* Clear node->zeroed if zeroing failed above. */ + node->zeroed = (node->zeroed && zeroed); malloc_mutex_unlock(&huge_mtx); /* Zap the excess chunks. */ - huge_dalloc_junk(ptr + usize, oldsize - usize); arena_chunk_ralloc_huge_shrink(arena, ptr, oldsize, usize); } @@ -167,7 +188,7 @@ huge_ralloc_no_move_expand(void *ptr, size_t oldsize, size_t size, bool zero) { size_t usize; extent_node_t *node, key; arena_t *arena; - bool is_zeroed; + bool is_zeroed_subchunk, is_zeroed_chunk; usize = s2u(size); if (usize == 0) { @@ -181,16 +202,17 @@ huge_ralloc_no_move_expand(void *ptr, size_t oldsize, size_t size, bool zero) { assert(node != NULL); assert(node->addr == ptr); arena = node->arena; + is_zeroed_subchunk = node->zeroed; malloc_mutex_unlock(&huge_mtx); /* - * Copy zero into is_zeroed and pass the copy to chunk_alloc(), so that - * it is possible to make correct junk/zero fill decisions below. + * Copy zero into is_zeroed_chunk and pass the copy to chunk_alloc(), so + * that it is possible to make correct junk/zero fill decisions below. */ - is_zeroed = zero; + is_zeroed_chunk = zero; if (arena_chunk_ralloc_huge_expand(arena, ptr, oldsize, usize, - &is_zeroed)) + &is_zeroed_chunk)) return (true); malloc_mutex_lock(&huge_mtx); @@ -198,12 +220,17 @@ huge_ralloc_no_move_expand(void *ptr, size_t oldsize, size_t size, bool zero) { node->size = usize; malloc_mutex_unlock(&huge_mtx); - if (config_fill && !zero) { - if (unlikely(opt_junk)) - memset(ptr + oldsize, 0xa5, usize - oldsize); - else if (unlikely(opt_zero) && !is_zeroed) - memset(ptr + oldsize, 0, usize - oldsize); - } + if (zero || (config_fill && unlikely(opt_zero))) { + if (!is_zeroed_subchunk) { + memset(ptr + oldsize, 0, CHUNK_CEILING(oldsize) - + oldsize); + } + if (!is_zeroed_chunk) { + memset(ptr + CHUNK_CEILING(oldsize), 0, usize - + CHUNK_CEILING(oldsize)); + } + } else if (config_fill && unlikely(opt_junk)) + memset(ptr + oldsize, 0xa5, usize - oldsize); return (false); } From c83bccd27396cbb6e818d83cc360a58aef96558d Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 16 Oct 2014 12:33:18 -0700 Subject: [PATCH 0578/3378] Initialize chunks_mtx for all configurations. This resolves #150. --- src/chunk.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/chunk.c b/src/chunk.c index f65b67af..a7761162 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -409,11 +409,10 @@ chunk_boot(void) chunksize_mask = chunksize - 1; chunk_npages = (chunksize >> LG_PAGE); - if (config_stats || config_prof) { - if (malloc_mutex_init(&chunks_mtx)) - return (true); + if (malloc_mutex_init(&chunks_mtx)) + return (true); + if (config_stats || config_prof) memset(&stats_chunks, 0, sizeof(chunk_stats_t)); - } if (have_dss && chunk_dss_boot()) return (true); extent_tree_szad_new(&chunks_szad_mmap); From a9ea10d27c320926cab2e59c66ebcd25c49df24c Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Thu, 16 Oct 2014 15:05:02 -0400 Subject: [PATCH 0579/3378] use sized deallocation internally for ralloc The size of the source allocation is known at this point, so reading the chunk header can be avoided for the small size class fast path. This is not very useful right now, but it provides a significant performance boost with an alternate ralloc entry point taking the old size. --- src/arena.c | 2 +- src/huge.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/arena.c b/src/arena.c index 586e3c76..d7377aec 100644 --- a/src/arena.c +++ b/src/arena.c @@ -2220,7 +2220,7 @@ arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size, copysize = (size < oldsize) ? size : oldsize; JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, copysize); memcpy(ret, ptr, copysize); - iqalloc(tsd, ptr, try_tcache_dalloc); + isqalloc(tsd, ptr, oldsize, try_tcache_dalloc); return (ret); } diff --git a/src/huge.c b/src/huge.c index 1734ff6e..826464c2 100644 --- a/src/huge.c +++ b/src/huge.c @@ -328,7 +328,7 @@ huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size, */ copysize = (size < oldsize) ? size : oldsize; memcpy(ret, ptr, copysize); - iqalloc(tsd, ptr, try_tcache_dalloc); + isqalloc(tsd, ptr, oldsize, try_tcache_dalloc); return (ret); } From 79725aa6f6823bf0703374cb4b89b64133321138 Mon Sep 17 00:00:00 2001 From: Guilherme Goncalves Date: Mon, 20 Oct 2014 14:08:37 -0200 Subject: [PATCH 0580/3378] Fix variable declaration with no type in the configure script. --- configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index a7bf1039..5c51f27f 100644 --- a/configure.ac +++ b/configure.ac @@ -1363,7 +1363,7 @@ if test "x${enable_zone_allocator}" = "x1" ; then AC_DEFUN([JE_ZONE_PROGRAM], [AC_LANG_PROGRAM( [#include ], - [static foo[[sizeof($1) $2 sizeof(void *) * $3 ? 1 : -1]]] + [static int foo[[sizeof($1) $2 sizeof(void *) * $3 ? 1 : -1]]] )]) AC_COMPILE_IFELSE([JE_ZONE_PROGRAM(malloc_zone_t,==,14)],[JEMALLOC_ZONE_VERSION=3],[ From af1f5927633ee2cb98c095de0fcc67b8aacdc9c0 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 30 Oct 2014 16:38:08 -0700 Subject: [PATCH 0581/3378] Use JEMALLOC_INLINE_C everywhere it's appropriate. --- src/arena.c | 16 ++++++++-------- src/ctl.c | 6 +++--- src/extent.c | 4 ++-- src/prof.c | 4 ++-- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/arena.c b/src/arena.c index d7377aec..795f5302 100644 --- a/src/arena.c +++ b/src/arena.c @@ -39,7 +39,7 @@ arena_miscelm_to_bits(arena_chunk_map_misc_t *miscelm) return arena_mapbits_get(chunk, pageind); } -static inline int +JEMALLOC_INLINE_C int arena_run_comp(arena_chunk_map_misc_t *a, arena_chunk_map_misc_t *b) { uintptr_t a_miscelm = (uintptr_t)a; @@ -55,7 +55,7 @@ arena_run_comp(arena_chunk_map_misc_t *a, arena_chunk_map_misc_t *b) rb_gen(static UNUSED, arena_run_tree_, arena_run_tree_t, arena_chunk_map_misc_t, rb_link, arena_run_comp) -static inline int +JEMALLOC_INLINE_C int arena_avail_comp(arena_chunk_map_misc_t *a, arena_chunk_map_misc_t *b) { int ret; @@ -139,7 +139,7 @@ arena_dirty_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind, arena->ndirty -= npages; } -static inline void * +JEMALLOC_INLINE_C void * arena_run_reg_alloc(arena_run_t *run, arena_bin_info_t *bin_info) { void *ret; @@ -159,7 +159,7 @@ arena_run_reg_alloc(arena_run_t *run, arena_bin_info_t *bin_info) return (ret); } -static inline void +JEMALLOC_INLINE_C void arena_run_reg_dalloc(arena_run_t *run, void *ptr) { arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); @@ -185,7 +185,7 @@ arena_run_reg_dalloc(arena_run_t *run, void *ptr) run->nfree++; } -static inline void +JEMALLOC_INLINE_C void arena_run_zero(arena_chunk_t *chunk, size_t run_ind, size_t npages) { @@ -195,7 +195,7 @@ arena_run_zero(arena_chunk_t *chunk, size_t run_ind, size_t npages) (npages << LG_PAGE)); } -static inline void +JEMALLOC_INLINE_C void arena_run_page_mark_zeroed(arena_chunk_t *chunk, size_t run_ind) { @@ -203,7 +203,7 @@ arena_run_page_mark_zeroed(arena_chunk_t *chunk, size_t run_ind) << LG_PAGE)), PAGE); } -static inline void +JEMALLOC_INLINE_C void arena_run_page_validate_zeroed(arena_chunk_t *chunk, size_t run_ind) { size_t i; @@ -834,7 +834,7 @@ arena_run_alloc_small(arena_t *arena, size_t size, index_t binind) return (arena_run_alloc_small_helper(arena, size, binind)); } -static inline void +JEMALLOC_INLINE_C void arena_maybe_purge(arena_t *arena) { size_t threshold; diff --git a/src/ctl.c b/src/ctl.c index 72598b3d..b367c9f6 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -16,14 +16,14 @@ static ctl_stats_t ctl_stats; /******************************************************************************/ /* Helpers for named and indexed nodes. */ -static inline const ctl_named_node_t * +JEMALLOC_INLINE_C const ctl_named_node_t * ctl_named_node(const ctl_node_t *node) { return ((node->named) ? (const ctl_named_node_t *)node : NULL); } -static inline const ctl_named_node_t * +JEMALLOC_INLINE_C const ctl_named_node_t * ctl_named_children(const ctl_named_node_t *node, int index) { const ctl_named_node_t *children = ctl_named_node(node->children); @@ -31,7 +31,7 @@ ctl_named_children(const ctl_named_node_t *node, int index) return (children ? &children[index] : NULL); } -static inline const ctl_indexed_node_t * +JEMALLOC_INLINE_C const ctl_indexed_node_t * ctl_indexed_node(const ctl_node_t *node) { diff --git a/src/extent.c b/src/extent.c index 8c09b486..ca852016 100644 --- a/src/extent.c +++ b/src/extent.c @@ -3,7 +3,7 @@ /******************************************************************************/ -static inline int +JEMALLOC_INLINE_C int extent_szad_comp(extent_node_t *a, extent_node_t *b) { int ret; @@ -25,7 +25,7 @@ extent_szad_comp(extent_node_t *a, extent_node_t *b) rb_gen(, extent_tree_szad_, extent_tree_t, extent_node_t, link_szad, extent_szad_comp) -static inline int +JEMALLOC_INLINE_C int extent_ad_comp(extent_node_t *a, extent_node_t *b) { uintptr_t a_addr = (uintptr_t)a->addr; diff --git a/src/prof.c b/src/prof.c index 40163271..36ee7584 100644 --- a/src/prof.c +++ b/src/prof.c @@ -244,7 +244,7 @@ bt_init(prof_bt_t *bt, void **vec) bt->len = 0; } -static inline void +JEMALLOC_INLINE_C void prof_enter(prof_tdata_t *tdata) { @@ -256,7 +256,7 @@ prof_enter(prof_tdata_t *tdata) malloc_mutex_lock(&bt2gctx_mtx); } -static inline void +JEMALLOC_INLINE_C void prof_leave(prof_tdata_t *tdata) { bool idump, gdump; From c93ed81cd06ae46906ae7a386fd6312caca391fb Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 30 Oct 2014 16:50:33 -0700 Subject: [PATCH 0582/3378] Fix prof_{enter,leave}() calls to pass tdata_self. --- src/prof.c | 43 ++++++++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/src/prof.c b/src/prof.c index 36ee7584..71b0994a 100644 --- a/src/prof.c +++ b/src/prof.c @@ -245,10 +245,11 @@ bt_init(prof_bt_t *bt, void **vec) } JEMALLOC_INLINE_C void -prof_enter(prof_tdata_t *tdata) +prof_enter(tsd_t *tsd, prof_tdata_t *tdata) { cassert(config_prof); + assert(tdata == prof_tdata_get(tsd, false)); assert(!tdata->enq); tdata->enq = true; @@ -257,11 +258,12 @@ prof_enter(prof_tdata_t *tdata) } JEMALLOC_INLINE_C void -prof_leave(prof_tdata_t *tdata) +prof_leave(tsd_t *tsd, prof_tdata_t *tdata) { bool idump, gdump; cassert(config_prof); + assert(tdata == prof_tdata_get(tsd, false)); malloc_mutex_unlock(&bt2gctx_mtx); @@ -542,7 +544,8 @@ prof_gctx_create(tsd_t *tsd, prof_bt_t *bt) } static void -prof_gctx_try_destroy(tsd_t *tsd, prof_gctx_t *gctx, prof_tdata_t *tdata) +prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx, + prof_tdata_t *tdata) { cassert(config_prof); @@ -554,14 +557,14 @@ prof_gctx_try_destroy(tsd_t *tsd, prof_gctx_t *gctx, prof_tdata_t *tdata) * avoid a race between the main body of prof_tctx_destroy() and entry * into this function. */ - prof_enter(tdata); + prof_enter(tsd, tdata_self); malloc_mutex_lock(gctx->lock); assert(gctx->nlimbo != 0); if (tctx_tree_empty(&gctx->tctxs) && gctx->nlimbo == 1) { /* Remove gctx from bt2gctx. */ if (ckh_remove(tsd, &bt2gctx, &gctx->bt, NULL, NULL)) not_reached(); - prof_leave(tdata); + prof_leave(tsd, tdata_self); /* Destroy gctx. */ malloc_mutex_unlock(gctx->lock); idalloc(tsd, gctx); @@ -572,7 +575,7 @@ prof_gctx_try_destroy(tsd_t *tsd, prof_gctx_t *gctx, prof_tdata_t *tdata) */ gctx->nlimbo--; malloc_mutex_unlock(gctx->lock); - prof_leave(tdata); + prof_leave(tsd, tdata_self); } } @@ -655,8 +658,10 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) destroy_gctx = false; } malloc_mutex_unlock(gctx->lock); - if (destroy_gctx) - prof_gctx_try_destroy(tsd, gctx, tdata); + if (destroy_gctx) { + prof_gctx_try_destroy(tsd, prof_tdata_get(tsd, false), gctx, + tdata); + } if (destroy_tdata) prof_tdata_destroy(tsd, tdata, false); @@ -679,18 +684,18 @@ prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata, } btkey; bool new_gctx; - prof_enter(tdata); + prof_enter(tsd, tdata); if (ckh_search(&bt2gctx, bt, &btkey.v, &gctx.v)) { /* bt has never been seen before. Insert it. */ gctx.p = prof_gctx_create(tsd, bt); if (gctx.v == NULL) { - prof_leave(tdata); + prof_leave(tsd, tdata); return (true); } btkey.p = &gctx.p->bt; if (ckh_insert(tsd, &bt2gctx, btkey.v, gctx.v)) { /* OOM. */ - prof_leave(tdata); + prof_leave(tsd, tdata); idalloc(tsd, gctx.v); return (true); } @@ -705,7 +710,7 @@ prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata, malloc_mutex_unlock(gctx.p->lock); new_gctx = false; } - prof_leave(tdata); + prof_leave(tsd, tdata); *p_btkey = btkey.v; *p_gctx = gctx.p; @@ -751,7 +756,7 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) ret.v = imalloc(tsd, sizeof(prof_tctx_t)); if (ret.p == NULL) { if (new_gctx) - prof_gctx_try_destroy(tsd, gctx, tdata); + prof_gctx_try_destroy(tsd, tdata, gctx, tdata); return (NULL); } ret.p->tdata = tdata; @@ -765,7 +770,7 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) malloc_mutex_unlock(tdata->lock); if (error) { if (new_gctx) - prof_gctx_try_destroy(tsd, gctx, tdata); + prof_gctx_try_destroy(tsd, tdata, gctx, tdata); idalloc(tsd, ret.v); return (NULL); } @@ -872,9 +877,9 @@ prof_bt_count(void) if (tdata == NULL) return (0); - prof_enter(tdata); + malloc_mutex_lock(&bt2gctx_mtx); bt_count = ckh_count(&bt2gctx); - prof_leave(tdata); + malloc_mutex_unlock(&bt2gctx_mtx); return (bt_count); } @@ -1155,7 +1160,7 @@ prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs) if (prof_gctx_should_destroy(gctx)) { gctx->nlimbo++; malloc_mutex_unlock(gctx->lock); - prof_gctx_try_destroy(tsd, gctx, tdata); + prof_gctx_try_destroy(tsd, tdata, gctx, tdata); } else malloc_mutex_unlock(gctx->lock); } @@ -1398,7 +1403,7 @@ prof_dump(tsd_t *tsd, bool propagate_err, const char *filename, bool leakcheck) return (true); malloc_mutex_lock(&prof_dump_mtx); - prof_enter(tdata); + prof_enter(tsd, tdata); /* * Put gctx's in limbo and clear their counters in preparation for @@ -1421,7 +1426,7 @@ prof_dump(tsd_t *tsd, bool propagate_err, const char *filename, bool leakcheck) leak_ngctx = 0; gctx_tree_iter(&gctxs, NULL, prof_gctx_merge_iter, (void *)&leak_ngctx); - prof_leave(tdata); + prof_leave(tsd, tdata); /* Create dump file. */ if ((prof_dump_fd = prof_dump_open(propagate_err, filename)) == -1) From 809b0ac3919da60c20ad59517ef560d0df639f3b Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Thu, 23 Oct 2014 10:30:52 -0400 Subject: [PATCH 0583/3378] mark huge allocations as unlikely This cleans up the fast path a bit more by moving away more code. --- .../jemalloc/internal/jemalloc_internal.h.in | 20 +++++++++---------- include/jemalloc/internal/prof.h | 4 ++-- src/arena.c | 4 ++-- src/jemalloc.c | 4 ++-- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 294e2cc1..3ce5aba8 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -655,7 +655,7 @@ sa2u(size_t size, size_t alignment) } /* Try for a large size class. */ - if (size <= arena_maxclass && alignment < chunksize) { + if (likely(size <= arena_maxclass) && likely(alignment < chunksize)) { /* * We can't achieve subpage alignment, so round up alignment * to the minimum that can actually be supported. @@ -805,7 +805,7 @@ imalloct(tsd_t *tsd, size_t size, bool try_tcache, arena_t *arena) assert(size != 0); - if (size <= arena_maxclass) + if (likely(size <= arena_maxclass)) return (arena_malloc(tsd, arena, size, false, try_tcache)); else return (huge_malloc(tsd, arena, size, false, try_tcache)); @@ -822,7 +822,7 @@ JEMALLOC_ALWAYS_INLINE void * icalloct(tsd_t *tsd, size_t size, bool try_tcache, arena_t *arena) { - if (size <= arena_maxclass) + if (likely(size <= arena_maxclass)) return (arena_malloc(tsd, arena, size, true, try_tcache)); else return (huge_malloc(tsd, arena, size, true, try_tcache)); @@ -847,12 +847,12 @@ ipalloct(tsd_t *tsd, size_t usize, size_t alignment, bool zero, bool try_tcache, if (usize <= SMALL_MAXCLASS && alignment < PAGE) ret = arena_malloc(tsd, arena, usize, zero, try_tcache); else { - if (usize <= arena_maxclass) { + if (likely(usize <= arena_maxclass)) { arena = arena_choose(tsd, arena); if (unlikely(arena == NULL)) return (NULL); ret = arena_palloc(arena, usize, alignment, zero); - } else if (alignment <= chunksize) + } else if (likely(alignment <= chunksize)) ret = huge_malloc(tsd, arena, usize, zero, try_tcache); else { ret = huge_palloc(tsd, arena, usize, alignment, zero, @@ -887,7 +887,7 @@ isalloc(const void *ptr, bool demote) assert(config_prof || !demote); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (chunk != ptr) + if (likely(chunk != ptr)) ret = arena_salloc(ptr, demote); else ret = huge_salloc(ptr); @@ -936,7 +936,7 @@ idalloct(tsd_t *tsd, void *ptr, bool try_tcache) assert(ptr != NULL); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (chunk != ptr) + if (likely(chunk != ptr)) arena_dalloc(tsd, chunk, ptr, try_tcache); else huge_dalloc(tsd, ptr, try_tcache); @@ -950,7 +950,7 @@ isdalloct(tsd_t *tsd, void *ptr, size_t size, bool try_tcache) assert(ptr != NULL); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (chunk != ptr) + if (likely(chunk != ptr)) arena_sdalloc(tsd, chunk, ptr, size, try_tcache); else huge_dalloc(tsd, ptr, try_tcache); @@ -1038,7 +1038,7 @@ iralloct(tsd_t *tsd, void *ptr, size_t size, size_t alignment, bool zero, zero, try_tcache_alloc, try_tcache_dalloc, arena)); } - if (size <= arena_maxclass) { + if (likely(size <= arena_maxclass)) { return (arena_ralloc(tsd, arena, ptr, oldsize, size, 0, alignment, zero, try_tcache_alloc, try_tcache_dalloc)); } else { @@ -1069,7 +1069,7 @@ ixalloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero) return (true); } - if (size <= arena_maxclass) + if (likely(size <= arena_maxclass)) return (arena_ralloc_no_move(ptr, oldsize, size, extra, zero)); else return (huge_ralloc_no_move(ptr, oldsize, size, extra, zero)); diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index 5103146b..e0d5f104 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -361,7 +361,7 @@ prof_tctx_get(const void *ptr) assert(ptr != NULL); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (chunk != ptr) { + if (likely(chunk != ptr)) { /* Region. */ ret = arena_prof_tctx_get(ptr); } else @@ -379,7 +379,7 @@ prof_tctx_set(const void *ptr, prof_tctx_t *tctx) assert(ptr != NULL); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (chunk != ptr) { + if (likely(chunk != ptr)) { /* Region. */ arena_prof_tctx_set(ptr, tctx); } else diff --git a/src/arena.c b/src/arena.c index 795f5302..347d58e4 100644 --- a/src/arena.c +++ b/src/arena.c @@ -2095,7 +2095,7 @@ arena_ralloc_large(void *ptr, size_t oldsize, size_t size, size_t extra, size_t usize; /* Make sure extra can't cause size_t overflow. */ - if (extra >= arena_maxclass) + if (unlikely(extra >= arena_maxclass)) return (true); usize = s2u(size + extra); @@ -2142,7 +2142,7 @@ arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, /* * Avoid moving the allocation if the size class can be left the same. */ - if (oldsize <= arena_maxclass) { + if (likely(oldsize <= arena_maxclass)) { if (oldsize <= SMALL_MAXCLASS) { assert(arena_bin_info[size2index(oldsize)].reg_size == oldsize); diff --git a/src/jemalloc.c b/src/jemalloc.c index 45439595..f130e999 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -264,7 +264,7 @@ a0alloc(size_t size, bool zero) if (size == 0) size = 1; - if (size <= arena_maxclass) + if (likely(size <= arena_maxclass)) ret = arena_malloc(NULL, a0get(), size, zero, false); else ret = huge_malloc(NULL, a0get(), size, zero, false); @@ -295,7 +295,7 @@ a0free(void *ptr) return; chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (chunk != ptr) + if (likely(chunk != ptr)) arena_dalloc(NULL, chunk, ptr, false); else huge_dalloc(NULL, ptr, false); From d33f834591a2459f22da7a165c524340b5fc3a0c Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Fri, 24 Oct 2014 13:18:57 -0400 Subject: [PATCH 0584/3378] avoid redundant chunk header reads * use sized deallocation in iralloct_realign * iralloc and ixalloc always need the old size, so pass it in from the caller where it's often already calculated --- .../jemalloc/internal/jemalloc_internal.h.in | 33 ++++++------ src/jemalloc.c | 54 +++++++++---------- 2 files changed, 42 insertions(+), 45 deletions(-) diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 3ce5aba8..6f13093f 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -790,12 +790,13 @@ void isqalloc(tsd_t *tsd, void *ptr, size_t size, bool try_tcache); void *iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t extra, size_t alignment, bool zero, bool try_tcache_alloc, bool try_tcache_dalloc, arena_t *arena); -void *iralloct(tsd_t *tsd, void *ptr, size_t size, size_t alignment, - bool zero, bool try_tcache_alloc, bool try_tcache_dalloc, arena_t *arena); -void *iralloc(tsd_t *tsd, void *ptr, size_t size, size_t alignment, - bool zero); -bool ixalloc(void *ptr, size_t size, size_t extra, size_t alignment, - bool zero); +void *iralloct(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, + size_t alignment, bool zero, bool try_tcache_alloc, bool try_tcache_dalloc, + arena_t *arena); +void *iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, + size_t alignment, bool zero); +bool ixalloc(void *ptr, size_t oldsize, size_t size, size_t extra, + size_t alignment, bool zero); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) @@ -1013,21 +1014,18 @@ iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, */ copysize = (size < oldsize) ? size : oldsize; memcpy(p, ptr, copysize); - iqalloc(tsd, ptr, try_tcache_dalloc); + isqalloc(tsd, ptr, oldsize, try_tcache_dalloc); return (p); } JEMALLOC_ALWAYS_INLINE void * -iralloct(tsd_t *tsd, void *ptr, size_t size, size_t alignment, bool zero, - bool try_tcache_alloc, bool try_tcache_dalloc, arena_t *arena) +iralloct(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment, + bool zero, bool try_tcache_alloc, bool try_tcache_dalloc, arena_t *arena) { - size_t oldsize; assert(ptr != NULL); assert(size != 0); - oldsize = isalloc(ptr, config_prof); - if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1)) != 0) { /* @@ -1048,21 +1046,22 @@ iralloct(tsd_t *tsd, void *ptr, size_t size, size_t alignment, bool zero, } JEMALLOC_ALWAYS_INLINE void * -iralloc(tsd_t *tsd, void *ptr, size_t size, size_t alignment, bool zero) +iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment, + bool zero) { - return (iralloct(tsd, ptr, size, alignment, zero, true, true, NULL)); + return (iralloct(tsd, ptr, oldsize, size, alignment, zero, true, true, + NULL)); } JEMALLOC_ALWAYS_INLINE bool -ixalloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero) +ixalloc(void *ptr, size_t oldsize, size_t size, size_t extra, size_t alignment, + bool zero) { - size_t oldsize; assert(ptr != NULL); assert(size != 0); - oldsize = isalloc(ptr, config_prof); if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1)) != 0) { /* Existing object alignment is inadequate. */ diff --git a/src/jemalloc.c b/src/jemalloc.c index f130e999..7d559ef4 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1529,19 +1529,20 @@ label_return: } static void * -irealloc_prof_sample(tsd_t *tsd, void *oldptr, size_t usize, prof_tctx_t *tctx) +irealloc_prof_sample(tsd_t *tsd, void *oldptr, size_t old_usize, size_t usize, + prof_tctx_t *tctx) { void *p; if (tctx == NULL) return (NULL); if (usize <= SMALL_MAXCLASS) { - p = iralloc(tsd, oldptr, LARGE_MINCLASS, 0, false); + p = iralloc(tsd, oldptr, old_usize, LARGE_MINCLASS, 0, false); if (p == NULL) return (NULL); arena_prof_promoted(p, usize); } else - p = iralloc(tsd, oldptr, usize, 0, false); + p = iralloc(tsd, oldptr, old_usize, usize, 0, false); return (p); } @@ -1555,9 +1556,9 @@ irealloc_prof(tsd_t *tsd, void *oldptr, size_t old_usize, size_t usize) old_tctx = prof_tctx_get(oldptr); tctx = prof_alloc_prep(tsd, usize, true); if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) - p = irealloc_prof_sample(tsd, oldptr, usize, tctx); + p = irealloc_prof_sample(tsd, oldptr, old_usize, usize, tctx); else - p = iralloc(tsd, oldptr, usize, 0, false); + p = iralloc(tsd, oldptr, old_usize, usize, 0, false); if (p == NULL) return (NULL); prof_realloc(tsd, p, usize, tctx, true, old_usize, old_tctx); @@ -1630,9 +1631,7 @@ je_realloc(void *ptr, size_t size) malloc_thread_init(); tsd = tsd_fetch(); - if ((config_prof && opt_prof) || config_stats || - (config_valgrind && unlikely(in_valgrind))) - old_usize = isalloc(ptr, config_prof); + old_usize = isalloc(ptr, config_prof); if (config_valgrind && unlikely(in_valgrind)) old_rzsize = config_prof ? p2rz(ptr) : u2rz(old_usize); @@ -1643,7 +1642,7 @@ je_realloc(void *ptr, size_t size) if (config_stats || (config_valgrind && unlikely(in_valgrind))) usize = s2u(size); - ret = iralloc(tsd, ptr, size, 0, false); + ret = iralloc(tsd, ptr, old_usize, size, 0, false); } } else { /* realloc(NULL, size) is equivalent to malloc(size). */ @@ -1922,22 +1921,22 @@ label_oom: } static void * -irallocx_prof_sample(tsd_t *tsd, void *oldptr, size_t size, size_t alignment, - size_t usize, bool zero, bool try_tcache_alloc, bool try_tcache_dalloc, - arena_t *arena, prof_tctx_t *tctx) +irallocx_prof_sample(tsd_t *tsd, void *oldptr, size_t old_usize, size_t size, + size_t alignment, size_t usize, bool zero, bool try_tcache_alloc, + bool try_tcache_dalloc, arena_t *arena, prof_tctx_t *tctx) { void *p; if (tctx == NULL) return (NULL); if (usize <= SMALL_MAXCLASS) { - p = iralloct(tsd, oldptr, LARGE_MINCLASS, alignment, zero, - try_tcache_alloc, try_tcache_dalloc, arena); + p = iralloct(tsd, oldptr, old_usize, LARGE_MINCLASS, alignment, + zero, try_tcache_alloc, try_tcache_dalloc, arena); if (p == NULL) return (NULL); arena_prof_promoted(p, usize); } else { - p = iralloct(tsd, oldptr, size, alignment, zero, + p = iralloct(tsd, oldptr, old_usize, size, alignment, zero, try_tcache_alloc, try_tcache_dalloc, arena); } @@ -1955,10 +1954,11 @@ irallocx_prof(tsd_t *tsd, void *oldptr, size_t old_usize, size_t size, old_tctx = prof_tctx_get(oldptr); tctx = prof_alloc_prep(tsd, *usize, false); if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) { - p = irallocx_prof_sample(tsd, oldptr, size, alignment, *usize, - zero, try_tcache_alloc, try_tcache_dalloc, arena, tctx); + p = irallocx_prof_sample(tsd, oldptr, old_usize, size, + alignment, *usize, zero, try_tcache_alloc, + try_tcache_dalloc, arena, tctx); } else { - p = iralloct(tsd, oldptr, size, alignment, zero, + p = iralloct(tsd, oldptr, old_usize, size, alignment, zero, try_tcache_alloc, try_tcache_dalloc, arena); } if (unlikely(p == NULL)) { @@ -1988,7 +1988,7 @@ je_rallocx(void *ptr, size_t size, int flags) void *p; tsd_t *tsd; size_t usize; - UNUSED size_t old_usize JEMALLOC_CC_SILENCE_INIT(0); + size_t old_usize; UNUSED size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0); size_t alignment = MALLOCX_ALIGN_GET(flags); bool zero = flags & MALLOCX_ZERO; @@ -2016,9 +2016,7 @@ je_rallocx(void *ptr, size_t size, int flags) arena = NULL; } - if ((config_prof && opt_prof) || config_stats || - ((config_valgrind && unlikely(in_valgrind)))) - old_usize = isalloc(ptr, config_prof); + old_usize = isalloc(ptr, config_prof); if (config_valgrind && unlikely(in_valgrind)) old_rzsize = u2rz(old_usize); @@ -2030,8 +2028,8 @@ je_rallocx(void *ptr, size_t size, int flags) if (unlikely(p == NULL)) goto label_oom; } else { - p = iralloct(tsd, ptr, size, alignment, zero, try_tcache_alloc, - try_tcache_dalloc, arena); + p = iralloct(tsd, ptr, old_usize, size, alignment, zero, + try_tcache_alloc, try_tcache_dalloc, arena); if (unlikely(p == NULL)) goto label_oom; if (config_stats || (config_valgrind && unlikely(in_valgrind))) @@ -2061,7 +2059,7 @@ ixallocx_helper(void *ptr, size_t old_usize, size_t size, size_t extra, { size_t usize; - if (ixalloc(ptr, size, extra, alignment, zero)) + if (ixalloc(ptr, old_usize, size, extra, alignment, zero)) return (old_usize); usize = isalloc(ptr, config_prof); @@ -2080,9 +2078,9 @@ ixallocx_prof_sample(void *ptr, size_t old_usize, size_t size, size_t extra, /* Use minimum usize to determine whether promotion may happen. */ if (((alignment == 0) ? s2u(size) : sa2u(size, alignment)) <= SMALL_MAXCLASS) { - if (ixalloc(ptr, SMALL_MAXCLASS+1, (SMALL_MAXCLASS+1 >= - size+extra) ? 0 : size+extra - (SMALL_MAXCLASS+1), - alignment, zero)) + if (ixalloc(ptr, old_usize, SMALL_MAXCLASS+1, + (SMALL_MAXCLASS+1 >= size+extra) ? 0 : size+extra - + (SMALL_MAXCLASS+1), alignment, zero)) return (old_usize); usize = isalloc(ptr, config_prof); if (max_usize < LARGE_MINCLASS) From cfc5706f6977a48f3b82d69cd68aa1cf8802fb8d Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 30 Oct 2014 23:18:45 -0700 Subject: [PATCH 0585/3378] Miscellaneous cleanups. --- include/jemalloc/internal/prof.h | 10 ++++------ src/jemalloc.c | 6 +++--- src/prof.c | 4 +++- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index e0d5f104..e0818849 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -361,10 +361,9 @@ prof_tctx_get(const void *ptr) assert(ptr != NULL); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (likely(chunk != ptr)) { - /* Region. */ + if (likely(chunk != ptr)) ret = arena_prof_tctx_get(ptr); - } else + else ret = huge_prof_tctx_get(ptr); return (ret); @@ -379,10 +378,9 @@ prof_tctx_set(const void *ptr, prof_tctx_t *tctx) assert(ptr != NULL); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (likely(chunk != ptr)) { - /* Region. */ + if (likely(chunk != ptr)) arena_prof_tctx_set(ptr, tctx); - } else + else huge_prof_tctx_set(ptr, tctx); } diff --git a/src/jemalloc.c b/src/jemalloc.c index 7d559ef4..23947f42 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1243,7 +1243,7 @@ imalloc_prof(tsd_t *tsd, size_t usize) p = imalloc_prof_sample(tsd, usize, tctx); else p = imalloc(tsd, usize); - if (p == NULL) { + if (unlikely(p == NULL)) { prof_alloc_rollback(tsd, tctx, true); return (NULL); } @@ -1329,7 +1329,7 @@ imemalign_prof(tsd_t *tsd, size_t alignment, size_t usize) p = imemalign_prof_sample(tsd, alignment, usize, tctx); else p = ipalloc(tsd, usize, alignment, false); - if (p == NULL) { + if (unlikely(p == NULL)) { prof_alloc_rollback(tsd, tctx, true); return (NULL); } @@ -1457,7 +1457,7 @@ icalloc_prof(tsd_t *tsd, size_t usize) p = icalloc_prof_sample(tsd, usize, tctx); else p = icalloc(tsd, usize); - if (p == NULL) { + if (unlikely(p == NULL)) { prof_alloc_rollback(tsd, tctx, true); return (NULL); } diff --git a/src/prof.c b/src/prof.c index 71b0994a..4f5d4054 100644 --- a/src/prof.c +++ b/src/prof.c @@ -204,7 +204,9 @@ prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated) } void -prof_malloc_sample_object(const void *ptr, size_t usize, prof_tctx_t *tctx) { +prof_malloc_sample_object(const void *ptr, size_t usize, prof_tctx_t *tctx) +{ + prof_tctx_set(ptr, tctx); malloc_mutex_lock(tctx->tdata->lock); From dc652131110abb480df608d17b20cf5bd4cfe2d4 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Thu, 30 Oct 2014 23:23:16 -0400 Subject: [PATCH 0586/3378] rm unused arena wrangling from xallocx It has no use for the arena_t since unlike rallocx it never makes a new memory allocation. It's just an unused parameter in ixalloc_helper. --- src/jemalloc.c | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/src/jemalloc.c b/src/jemalloc.c index 23947f42..8b2ab8d7 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -2055,7 +2055,7 @@ label_oom: JEMALLOC_ALWAYS_INLINE_C size_t ixallocx_helper(void *ptr, size_t old_usize, size_t size, size_t extra, - size_t alignment, bool zero, arena_t *arena) + size_t alignment, bool zero) { size_t usize; @@ -2068,8 +2068,7 @@ ixallocx_helper(void *ptr, size_t old_usize, size_t size, size_t extra, static size_t ixallocx_prof_sample(void *ptr, size_t old_usize, size_t size, size_t extra, - size_t alignment, size_t max_usize, bool zero, arena_t *arena, - prof_tctx_t *tctx) + size_t alignment, size_t max_usize, bool zero, prof_tctx_t *tctx) { size_t usize; @@ -2087,7 +2086,7 @@ ixallocx_prof_sample(void *ptr, size_t old_usize, size_t size, size_t extra, arena_prof_promoted(ptr, usize); } else { usize = ixallocx_helper(ptr, old_usize, size, extra, alignment, - zero, arena); + zero); } return (usize); @@ -2095,7 +2094,7 @@ ixallocx_prof_sample(void *ptr, size_t old_usize, size_t size, size_t extra, JEMALLOC_ALWAYS_INLINE_C size_t ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size, - size_t extra, size_t alignment, bool zero, arena_t *arena) + size_t extra, size_t alignment, bool zero) { size_t max_usize, usize; prof_tctx_t *old_tctx, *tctx; @@ -2112,10 +2111,10 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size, tctx = prof_alloc_prep(tsd, max_usize, false); if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) { usize = ixallocx_prof_sample(ptr, old_usize, size, extra, - alignment, zero, max_usize, arena, tctx); + alignment, zero, max_usize, tctx); } else { usize = ixallocx_helper(ptr, old_usize, size, extra, alignment, - zero, arena); + zero); } if (unlikely(usize == old_usize)) { prof_alloc_rollback(tsd, tctx, false); @@ -2134,7 +2133,6 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) UNUSED size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0); size_t alignment = MALLOCX_ALIGN_GET(flags); bool zero = flags & MALLOCX_ZERO; - arena_t *arena; assert(ptr != NULL); assert(size != 0); @@ -2143,22 +2141,16 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) malloc_thread_init(); tsd = tsd_fetch(); - if (unlikely((flags & MALLOCX_ARENA_MASK) != 0)) { - unsigned arena_ind = MALLOCX_ARENA_GET(flags); - arena = arena_get(tsd, arena_ind, true, true); - } else - arena = NULL; - old_usize = isalloc(ptr, config_prof); if (config_valgrind && unlikely(in_valgrind)) old_rzsize = u2rz(old_usize); if (config_prof && opt_prof) { usize = ixallocx_prof(tsd, ptr, old_usize, size, extra, - alignment, zero, arena); + alignment, zero); } else { usize = ixallocx_helper(ptr, old_usize, size, extra, alignment, - zero, arena); + zero); } if (unlikely(usize == old_usize)) goto label_not_resized; From 6da2e9d4f6fdccf5108296c99b2b839a4f474bae Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 31 Oct 2014 17:08:13 -0700 Subject: [PATCH 0587/3378] Fix arena_sdalloc() to use promoted size. --- include/jemalloc/internal/arena.h | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 16c04d25..8782b191 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -1020,9 +1020,9 @@ arena_dalloc(tsd_t *tsd, arena_chunk_t *chunk, void *ptr, bool try_tcache) assert(((uintptr_t)ptr & PAGE_MASK) == 0); if (try_tcache && size <= tcache_maxclass && likely((tcache = - tcache_get(tsd, false)) != NULL)) { + tcache_get(tsd, false)) != NULL)) tcache_dalloc_large(tcache, ptr, size); - } else + else arena_dalloc_large(chunk->arena, chunk, ptr); } } @@ -1031,18 +1031,26 @@ JEMALLOC_ALWAYS_INLINE void arena_sdalloc(tsd_t *tsd, arena_chunk_t *chunk, void *ptr, size_t size, bool try_tcache) { + index_t binind; tcache_t *tcache; assert(ptr != NULL); assert(CHUNK_ADDR2BASE(ptr) != ptr); + if (config_prof && opt_prof) { + /* Use promoted size, not request size. */ + size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; + binind = arena_mapbits_binind_get(chunk, pageind); + size = index2size(binind); + } else + binind = size2index(size); + if (likely(size <= SMALL_MAXCLASS)) { /* Small allocation. */ if (likely(try_tcache) && likely((tcache = tcache_get(tsd, - false)) != NULL)) { - index_t binind = size2index(size); + false)) != NULL)) tcache_dalloc_small(tcache, ptr, binind); - } else { + else { size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; arena_dalloc_small(chunk->arena, chunk, ptr, pageind); @@ -1051,9 +1059,9 @@ arena_sdalloc(tsd_t *tsd, arena_chunk_t *chunk, void *ptr, size_t size, assert(((uintptr_t)ptr & PAGE_MASK) == 0); if (try_tcache && size <= tcache_maxclass && (tcache = - tcache_get(tsd, false)) != NULL) { + tcache_get(tsd, false)) != NULL) tcache_dalloc_large(tcache, ptr, size); - } else + else arena_dalloc_large(chunk->arena, chunk, ptr); } } From d7a9bab92db5dd3acc02e4f58e95637c6338c285 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 31 Oct 2014 22:26:24 -0700 Subject: [PATCH 0588/3378] Fix arena_sdalloc() to use promoted size (second attempt). Unlike the preceeding attempted fix, this version avoids the potential for converting an invalid bin index to a size class. --- include/jemalloc/internal/arena.h | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 8782b191..a42522d6 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -1031,26 +1031,29 @@ JEMALLOC_ALWAYS_INLINE void arena_sdalloc(tsd_t *tsd, arena_chunk_t *chunk, void *ptr, size_t size, bool try_tcache) { - index_t binind; tcache_t *tcache; assert(ptr != NULL); assert(CHUNK_ADDR2BASE(ptr) != ptr); if (config_prof && opt_prof) { - /* Use promoted size, not request size. */ size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - binind = arena_mapbits_binind_get(chunk, pageind); - size = index2size(binind); - } else - binind = size2index(size); + assert(arena_mapbits_allocated_get(chunk, pageind) != 0); + if (arena_mapbits_large_get(chunk, pageind) != 0) { + /* Make sure to use promoted size, not request size. */ + assert(((uintptr_t)ptr & PAGE_MASK) == 0); + size = arena_mapbits_large_size_get(chunk, pageind); + } + } + assert(s2u(size) == s2u(arena_salloc(ptr, false))); if (likely(size <= SMALL_MAXCLASS)) { /* Small allocation. */ if (likely(try_tcache) && likely((tcache = tcache_get(tsd, - false)) != NULL)) + false)) != NULL)) { + index_t binind = size2index(size); tcache_dalloc_small(tcache, ptr, binind); - else { + } else { size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; arena_dalloc_small(chunk->arena, chunk, ptr, pageind); From 82cb603ed799f29e387f37fb44cdfbe98fd2e4ee Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 1 Nov 2014 00:20:28 -0700 Subject: [PATCH 0589/3378] Don't dereference NULL tdata in prof_{enter,leave}(). It is possible for the thread's tdata to be NULL late during thread destruction, so take care not to dereference a NULL pointer in such cases. --- src/prof.c | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/src/prof.c b/src/prof.c index 4f5d4054..1103cc94 100644 --- a/src/prof.c +++ b/src/prof.c @@ -253,8 +253,10 @@ prof_enter(tsd_t *tsd, prof_tdata_t *tdata) cassert(config_prof); assert(tdata == prof_tdata_get(tsd, false)); - assert(!tdata->enq); - tdata->enq = true; + if (tdata != NULL) { + assert(!tdata->enq); + tdata->enq = true; + } malloc_mutex_lock(&bt2gctx_mtx); } @@ -262,24 +264,27 @@ prof_enter(tsd_t *tsd, prof_tdata_t *tdata) JEMALLOC_INLINE_C void prof_leave(tsd_t *tsd, prof_tdata_t *tdata) { - bool idump, gdump; cassert(config_prof); assert(tdata == prof_tdata_get(tsd, false)); malloc_mutex_unlock(&bt2gctx_mtx); - assert(tdata->enq); - tdata->enq = false; - idump = tdata->enq_idump; - tdata->enq_idump = false; - gdump = tdata->enq_gdump; - tdata->enq_gdump = false; + if (tdata != NULL) { + bool idump, gdump; - if (idump) - prof_idump(); - if (gdump) - prof_gdump(); + assert(tdata->enq); + tdata->enq = false; + idump = tdata->enq_idump; + tdata->enq_idump = false; + gdump = tdata->enq_gdump; + tdata->enq_gdump = false; + + if (idump) + prof_idump(); + if (gdump) + prof_gdump(); + } } #ifdef JEMALLOC_PROF_LIBUNWIND From 2b2f6dc1e45808c31fb2f3ae33306d224ec0b2d2 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 1 Nov 2014 02:29:10 -0700 Subject: [PATCH 0590/3378] Disable arena_dirty_count() validation. --- src/arena.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/arena.c b/src/arena.c index 347d58e4..ef42771a 100644 --- a/src/arena.c +++ b/src/arena.c @@ -872,7 +872,7 @@ arena_dirty_count(arena_t *arena) ndirty += npages; } - return (ndirty); + return (ndirty); } static size_t @@ -1015,7 +1015,11 @@ arena_purge(arena_t *arena, bool all) size_t npurge, npurgeable, npurged; arena_chunk_miscelms_t purge_list; - if (config_debug) { + /* + * Calls to arena_dirty_count() are disabled even for debug builds + * because overhead grows nonlinearly as memory usage increases. + */ + if (false && config_debug) { size_t ndirty = arena_dirty_count(arena); assert(ndirty == arena->ndirty); } From c002a5c80058ee27acb234ef34f69b0cf6836836 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 4 Nov 2014 18:03:11 -0800 Subject: [PATCH 0591/3378] Fix two quarantine regressions. Fix quarantine to actually update tsd when expanding, and to avoid double initialization (leaking the first quarantine) due to recursive initialization. This resolves #161. --- include/jemalloc/internal/private_symbols.txt | 1 + include/jemalloc/internal/quarantine.h | 5 +++-- src/quarantine.c | 22 +++++++++++++++++++ 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index 8eec874f..1988c6ed 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -339,6 +339,7 @@ prof_thread_name_set quarantine quarantine_alloc_hook quarantine_cleanup +quarantine_alloc_hook_work quarantine_init register_zone rtree_delete diff --git a/include/jemalloc/internal/quarantine.h b/include/jemalloc/internal/quarantine.h index 4e9c710a..a399faaa 100644 --- a/include/jemalloc/internal/quarantine.h +++ b/include/jemalloc/internal/quarantine.h @@ -30,6 +30,7 @@ struct quarantine_s { #ifdef JEMALLOC_H_EXTERNS quarantine_t *quarantine_init(tsd_t *tsd, size_t lg_maxobjs); +void quarantine_alloc_hook_work(tsd_t *tsd); void quarantine(tsd_t *tsd, void *ptr); void quarantine_cleanup(tsd_t *tsd); @@ -50,8 +51,8 @@ quarantine_alloc_hook(void) assert(config_fill && opt_quarantine); tsd = tsd_fetch(); - if (tsd_quarantine_get(tsd) == NULL && tsd_nominal(tsd)) - tsd_quarantine_set(tsd, quarantine_init(tsd, LG_MAXOBJS_INIT)); + if (tsd_quarantine_get(tsd) == NULL) + quarantine_alloc_hook_work(tsd); } #endif diff --git a/src/quarantine.c b/src/quarantine.c index 1301b479..aa1c3b04 100644 --- a/src/quarantine.c +++ b/src/quarantine.c @@ -24,6 +24,8 @@ quarantine_init(tsd_t *tsd, size_t lg_maxobjs) { quarantine_t *quarantine; + assert(tsd_nominal(tsd)); + quarantine = (quarantine_t *)imalloc(tsd, offsetof(quarantine_t, objs) + ((ZU(1) << lg_maxobjs) * sizeof(quarantine_obj_t))); if (quarantine == NULL) @@ -36,6 +38,25 @@ quarantine_init(tsd_t *tsd, size_t lg_maxobjs) return (quarantine); } +void +quarantine_alloc_hook_work(tsd_t *tsd) +{ + quarantine_t *quarantine; + + if (!tsd_nominal(tsd)) + return; + + quarantine = quarantine_init(tsd, LG_MAXOBJS_INIT); + /* + * Check again whether quarantine has been initialized, because + * qurantine_init() may have triggered recursive initialization. + */ + if (tsd_quarantine_get(tsd) == NULL) + tsd_quarantine_set(tsd, quarantine); + else + idalloc(tsd, quarantine); +} + static quarantine_t * quarantine_grow(tsd_t *tsd, quarantine_t *quarantine) { @@ -67,6 +88,7 @@ quarantine_grow(tsd_t *tsd, quarantine_t *quarantine) } idalloc(tsd, quarantine); + tsd_quarantine_set(tsd, ret); return (ret); } From 9cf2be0a81b77d4586591c19fb469a51fe6684fa Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 7 Nov 2014 14:50:38 -0800 Subject: [PATCH 0592/3378] Make quarantine_init() static. --- include/jemalloc/internal/private_symbols.txt | 3 +-- include/jemalloc/internal/quarantine.h | 1 - src/quarantine.c | 2 +- 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index 1988c6ed..ee973c9f 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -338,9 +338,8 @@ prof_thread_name_get prof_thread_name_set quarantine quarantine_alloc_hook -quarantine_cleanup quarantine_alloc_hook_work -quarantine_init +quarantine_cleanup register_zone rtree_delete rtree_get diff --git a/include/jemalloc/internal/quarantine.h b/include/jemalloc/internal/quarantine.h index a399faaa..ae607399 100644 --- a/include/jemalloc/internal/quarantine.h +++ b/include/jemalloc/internal/quarantine.h @@ -29,7 +29,6 @@ struct quarantine_s { /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -quarantine_t *quarantine_init(tsd_t *tsd, size_t lg_maxobjs); void quarantine_alloc_hook_work(tsd_t *tsd); void quarantine(tsd_t *tsd, void *ptr); void quarantine_cleanup(tsd_t *tsd); diff --git a/src/quarantine.c b/src/quarantine.c index aa1c3b04..ddacc6ee 100644 --- a/src/quarantine.c +++ b/src/quarantine.c @@ -19,7 +19,7 @@ static void quarantine_drain(tsd_t *tsd, quarantine_t *quarantine, /******************************************************************************/ -quarantine_t * +static quarantine_t * quarantine_init(tsd_t *tsd, size_t lg_maxobjs) { quarantine_t *quarantine; From 2012d5a5601c787ce464fac0cbd2b16e3754cfa2 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 17 Nov 2014 09:54:49 -0800 Subject: [PATCH 0593/3378] Fix pointer arithmetic undefined behavior. Reported by Denis Denisov. --- src/arena.c | 11 +++++++---- src/huge.c | 37 ++++++++++++++++++++++++------------- 2 files changed, 31 insertions(+), 17 deletions(-) diff --git a/src/arena.c b/src/arena.c index ef42771a..1ecc5d0b 100644 --- a/src/arena.c +++ b/src/arena.c @@ -690,8 +690,10 @@ arena_chunk_ralloc_huge_shrink(arena_t *arena, void *chunk, size_t oldsize, } arena->nactive -= udiff >> LG_PAGE; malloc_mutex_unlock(&arena->lock); - if (cdiff != 0) - chunk_dalloc(chunk + CHUNK_CEILING(usize), cdiff, arena->ind); + if (cdiff != 0) { + chunk_dalloc((void *)((uintptr_t)chunk + CHUNK_CEILING(usize)), + cdiff, arena->ind); + } } bool @@ -714,8 +716,9 @@ arena_chunk_ralloc_huge_expand(arena_t *arena, void *chunk, size_t oldsize, arena->nactive += (udiff >> LG_PAGE); malloc_mutex_unlock(&arena->lock); - if (chunk_alloc_arena(chunk_alloc, chunk_dalloc, arena->ind, chunk + - CHUNK_CEILING(oldsize), cdiff, chunksize, zero) == NULL) { + if (chunk_alloc_arena(chunk_alloc, chunk_dalloc, arena->ind, + (void *)((uintptr_t)chunk + CHUNK_CEILING(oldsize)), cdiff, + chunksize, zero) == NULL) { /* Revert optimistic stats updates. */ malloc_mutex_lock(&arena->lock); if (config_stats) { diff --git a/src/huge.c b/src/huge.c index 826464c2..7ad9b662 100644 --- a/src/huge.c +++ b/src/huge.c @@ -119,9 +119,11 @@ huge_ralloc_no_move_similar(void *ptr, size_t oldsize, size_t usize, /* Fill if necessary (shrinking). */ if (oldsize > usize) { size_t sdiff = CHUNK_CEILING(usize) - usize; - zeroed = (sdiff != 0) ? !pages_purge(ptr + usize, sdiff) : true; + zeroed = (sdiff != 0) ? !pages_purge((void *)((uintptr_t)ptr + + usize), sdiff) : true; if (config_fill && unlikely(opt_junk)) { - memset(ptr + usize, 0x5a, oldsize - usize); + memset((void *)((uintptr_t)ptr + usize), 0x5a, oldsize - + usize); zeroed = false; } } else @@ -145,10 +147,14 @@ huge_ralloc_no_move_similar(void *ptr, size_t oldsize, size_t usize, /* Fill if necessary (growing). */ if (oldsize < usize) { if (zero || (config_fill && unlikely(opt_zero))) { - if (!zeroed) - memset(ptr + oldsize, 0, usize - oldsize); - } else if (config_fill && unlikely(opt_junk)) - memset(ptr + oldsize, 0xa5, usize - oldsize); + if (!zeroed) { + memset((void *)((uintptr_t)ptr + oldsize), 0, + usize - oldsize); + } + } else if (config_fill && unlikely(opt_junk)) { + memset((void *)((uintptr_t)ptr + oldsize), 0xa5, usize - + oldsize); + } } } @@ -161,9 +167,11 @@ huge_ralloc_no_move_shrink(void *ptr, size_t oldsize, size_t usize) arena_t *arena; sdiff = CHUNK_CEILING(usize) - usize; - zeroed = (sdiff != 0) ? !pages_purge(ptr + usize, sdiff) : true; + zeroed = (sdiff != 0) ? !pages_purge((void *)((uintptr_t)ptr + usize), + sdiff) : true; if (config_fill && unlikely(opt_junk)) { - huge_dalloc_junk(ptr + usize, oldsize - usize); + huge_dalloc_junk((void *)((uintptr_t)ptr + usize), oldsize - + usize); zeroed = false; } @@ -222,15 +230,18 @@ huge_ralloc_no_move_expand(void *ptr, size_t oldsize, size_t size, bool zero) { if (zero || (config_fill && unlikely(opt_zero))) { if (!is_zeroed_subchunk) { - memset(ptr + oldsize, 0, CHUNK_CEILING(oldsize) - - oldsize); + memset((void *)((uintptr_t)ptr + oldsize), 0, + CHUNK_CEILING(oldsize) - oldsize); } if (!is_zeroed_chunk) { - memset(ptr + CHUNK_CEILING(oldsize), 0, usize - + memset((void *)((uintptr_t)ptr + + CHUNK_CEILING(oldsize)), 0, usize - CHUNK_CEILING(oldsize)); } - } else if (config_fill && unlikely(opt_junk)) - memset(ptr + oldsize, 0xa5, usize - oldsize); + } else if (config_fill && unlikely(opt_junk)) { + memset((void *)((uintptr_t)ptr + oldsize), 0xa5, usize - + oldsize); + } return (false); } From d49cb68b9e8b57169240e16686f4f60d6b5a089f Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 17 Nov 2014 10:31:59 -0800 Subject: [PATCH 0594/3378] Fix more pointer arithmetic undefined behavior. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reported by Guilherme Gonçalves. This resolves #166. --- src/arena.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/arena.c b/src/arena.c index 1ecc5d0b..f351c090 100644 --- a/src/arena.c +++ b/src/arena.c @@ -2341,12 +2341,12 @@ arena_new(unsigned ind) if (config_stats) { memset(&arena->stats, 0, sizeof(arena_stats_t)); - arena->stats.lstats = (malloc_large_stats_t *)(((void *)arena) + - CACHELINE_CEILING(sizeof(arena_t))); + arena->stats.lstats = (malloc_large_stats_t *)((uintptr_t)arena + + CACHELINE_CEILING(sizeof(arena_t))); memset(arena->stats.lstats, 0, nlclasses * sizeof(malloc_large_stats_t)); - arena->stats.hstats = (malloc_huge_stats_t *)(((void *)arena) + - CACHELINE_CEILING(sizeof(arena_t)) + + arena->stats.hstats = (malloc_huge_stats_t *)((uintptr_t)arena + + CACHELINE_CEILING(sizeof(arena_t)) + QUANTUM_CEILING(nlclasses * sizeof(malloc_large_stats_t))); memset(arena->stats.hstats, 0, nhclasses * sizeof(malloc_huge_stats_t)); From a2136025c4c4861b91f361a90c1dc94214848708 Mon Sep 17 00:00:00 2001 From: Guilherme Goncalves Date: Tue, 18 Nov 2014 18:48:48 -0200 Subject: [PATCH 0595/3378] Remove extra definition of je_tsd_boot on win32. --- include/jemalloc/internal/tsd.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h index b5658f8e..35dd8628 100644 --- a/include/jemalloc/internal/tsd.h +++ b/include/jemalloc/internal/tsd.h @@ -364,12 +364,6 @@ a_name##tsd_boot(void) \ a_name##tsd_boot1(); \ return (false); \ } \ -a_attr bool \ -a_name##tsd_boot(void) \ -{ \ - \ - return (false); \ -} \ /* Get/set. */ \ a_attr a_type * \ a_name##tsd_get(void) \ From 879e76a9e57e725e927e77900940967d301a4958 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Mon, 3 Nov 2014 14:02:52 -0500 Subject: [PATCH 0596/3378] teach the dss chunk allocator to handle new_addr This provides in-place expansion of huge allocations when the end of the allocation is at the end of the sbrk heap. There's already the ability to extend in-place via recycled chunks but this handles the initial growth of the heap via repeated vector / string reallocations. A possible future extension could allow realloc to go from the following: | huge allocation | recycled chunks | ^ dss_end To a larger allocation built from recycled *and* new chunks: | huge allocation | ^ dss_end Doing that would involve teaching the chunk recycling code to request new chunks to satisfy the request. The chunk_dss code wouldn't require any further changes. #include int main(void) { size_t chunk = 4 * 1024 * 1024; void *ptr = NULL; for (size_t size = chunk; size < chunk * 128; size *= 2) { ptr = realloc(ptr, size); if (!ptr) return 1; } } dss:secondary: 0.083s dss:primary: 0.083s After: dss:secondary: 0.083s dss:primary: 0.003s The dss heap grows in the upwards direction, so the oldest chunks are at the low addresses and they are used first. Linux prefers to grow the mmap heap downwards, so the trick will not work in the *current* mmap chunk allocator as a huge allocation will only be at the top of the heap in a contrived case. --- include/jemalloc/internal/chunk_dss.h | 3 ++- src/chunk.c | 12 +++++------- src/chunk_dss.c | 11 ++++++++++- 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/include/jemalloc/internal/chunk_dss.h b/include/jemalloc/internal/chunk_dss.h index 4535ce09..09896470 100644 --- a/include/jemalloc/internal/chunk_dss.h +++ b/include/jemalloc/internal/chunk_dss.h @@ -23,7 +23,8 @@ extern const char *dss_prec_names[]; dss_prec_t chunk_dss_prec_get(void); bool chunk_dss_prec_set(dss_prec_t dss_prec); -void *chunk_alloc_dss(size_t size, size_t alignment, bool *zero); +void *chunk_alloc_dss(void *new_addr, size_t size, size_t alignment, + bool *zero); bool chunk_in_dss(void *chunk); bool chunk_dss_boot(void); void chunk_dss_prefork(void); diff --git a/src/chunk.c b/src/chunk.c index a7761162..b3737180 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -154,16 +154,15 @@ chunk_alloc_core(void *new_addr, size_t size, size_t alignment, bool base, if ((ret = chunk_recycle(&chunks_szad_dss, &chunks_ad_dss, new_addr, size, alignment, base, zero)) != NULL) return (ret); - /* requesting an address only implemented for recycle */ - if (new_addr == NULL - && (ret = chunk_alloc_dss(size, alignment, zero)) != NULL) + if ((ret = chunk_alloc_dss(new_addr, size, alignment, zero)) + != NULL) return (ret); } /* mmap. */ if ((ret = chunk_recycle(&chunks_szad_mmap, &chunks_ad_mmap, new_addr, size, alignment, base, zero)) != NULL) return (ret); - /* requesting an address only implemented for recycle */ + /* requesting an address not implemented for chunk_alloc_mmap */ if (new_addr == NULL && (ret = chunk_alloc_mmap(size, alignment, zero)) != NULL) return (ret); @@ -172,9 +171,8 @@ chunk_alloc_core(void *new_addr, size_t size, size_t alignment, bool base, if ((ret = chunk_recycle(&chunks_szad_dss, &chunks_ad_dss, new_addr, size, alignment, base, zero)) != NULL) return (ret); - /* requesting an address only implemented for recycle */ - if (new_addr == NULL && - (ret = chunk_alloc_dss(size, alignment, zero)) != NULL) + if ((ret = chunk_alloc_dss(new_addr, size, alignment, zero)) + != NULL) return (ret); } diff --git a/src/chunk_dss.c b/src/chunk_dss.c index cce71041..edba3b23 100644 --- a/src/chunk_dss.c +++ b/src/chunk_dss.c @@ -66,7 +66,7 @@ chunk_dss_prec_set(dss_prec_t dss_prec) } void * -chunk_alloc_dss(size_t size, size_t alignment, bool *zero) +chunk_alloc_dss(void *new_addr, size_t size, size_t alignment, bool *zero) { void *ret; @@ -93,8 +93,17 @@ chunk_alloc_dss(size_t size, size_t alignment, bool *zero) * malloc. */ do { + /* Avoid an unnecessary system call. */ + if (new_addr != NULL && dss_max != new_addr) + break; + /* Get the current end of the DSS. */ dss_max = chunk_dss_sbrk(0); + + /* Make sure the earlier condition still holds. */ + if (new_addr != NULL && dss_max != new_addr) + break; + /* * Calculate how much padding is necessary to * chunk-align the end of the DSS. From f79e01f75b79058c3be0ce6de0d46f8a9a990176 Mon Sep 17 00:00:00 2001 From: Yuriy Kaminskiy Date: Tue, 2 Dec 2014 16:24:11 -0800 Subject: [PATCH 0597/3378] Fix test_stats_arenas_bins for 32-bit builds. --- test/unit/stats.c | 1 + 1 file changed, 1 insertion(+) diff --git a/test/unit/stats.c b/test/unit/stats.c index fd92d542..946e7370 100644 --- a/test/unit/stats.c +++ b/test/unit/stats.c @@ -327,6 +327,7 @@ TEST_BEGIN(test_stats_arenas_bins) assert_d_eq(mallctl("stats.arenas.0.bins.0.curregs", &curregs, &sz, NULL, 0), expected, "Unexpected mallctl() result"); + sz = sizeof(uint64_t); assert_d_eq(mallctl("stats.arenas.0.bins.0.nfills", &nfills, &sz, NULL, 0), config_tcache ? expected : ENOENT, "Unexpected mallctl() result"); From 1036ddbf11b7e9ec566b92b3dd50e105fc5f6932 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 4 Dec 2014 16:42:42 -0800 Subject: [PATCH 0598/3378] Fix OOM cleanup in huge_palloc(). Fix OOM cleanup in huge_palloc() to call idalloct() rather than base_node_dalloc(). This bug is a result of incomplete refactoring, and has no impact other than leaking memory during OOM. --- src/huge.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/huge.c b/src/huge.c index 7ad9b662..68839037 100644 --- a/src/huge.c +++ b/src/huge.c @@ -48,12 +48,8 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, */ is_zeroed = zero; arena = arena_choose(tsd, arena); - if (unlikely(arena == NULL)) { - base_node_dalloc(node); - return (NULL); - } - ret = arena_chunk_alloc_huge(arena, usize, alignment, &is_zeroed); - if (ret == NULL) { + if (unlikely(arena == NULL) || (ret = arena_chunk_alloc_huge(arena, + usize, alignment, &is_zeroed)) == NULL) { idalloct(tsd, node, try_tcache); return (NULL); } From a18c2b1f152b4334474ed32fc46d762d4fa54c2b Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 5 Dec 2014 17:49:47 -0800 Subject: [PATCH 0599/3378] Style fixes. --- include/jemalloc/internal/atomic.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h index a0488157..8b743b88 100644 --- a/include/jemalloc/internal/atomic.h +++ b/include/jemalloc/internal/atomic.h @@ -58,7 +58,7 @@ atomic_sub_uint64(uint64_t *p, uint64_t x) return (__sync_sub_and_fetch(p, x)); } -#elif (defined(_MSC_VER)) +# elif (defined(_MSC_VER)) JEMALLOC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x) { @@ -72,7 +72,7 @@ atomic_sub_uint64(uint64_t *p, uint64_t x) return (InterlockedExchangeAdd64(p, -((int64_t)x)) - x); } -#elif (defined(JEMALLOC_OSATOMIC)) +# elif (defined(JEMALLOC_OSATOMIC)) JEMALLOC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x) { From 59cd80e6c6e36c26a880e86f6cde9f71808b256c Mon Sep 17 00:00:00 2001 From: Chih-hung Hsieh Date: Fri, 5 Dec 2014 17:42:41 -0800 Subject: [PATCH 0600/3378] Add a C11 atomics-based implementation of atomic.h API. --- configure.ac | 21 ++++++++++++++ include/jemalloc/internal/atomic.h | 28 +++++++++++++++++++ .../jemalloc/internal/jemalloc_internal.h.in | 4 +++ .../internal/jemalloc_internal_defs.h.in | 3 ++ 4 files changed, 56 insertions(+) diff --git a/configure.ac b/configure.ac index 5c51f27f..8b1e55e4 100644 --- a/configure.ac +++ b/configure.ac @@ -1199,6 +1199,27 @@ elif test "x${force_tls}" = "x1" ; then AC_MSG_ERROR([Failed to configure TLS, which is mandatory for correct function]) fi +dnl ============================================================================ +dnl Check for C11 atomics. + +JE_COMPILABLE([C11 atomics], [ +#include +#if (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__) +#include +#else +#error Atomics not available +#endif +], [ + uint64_t *p = (uint64_t *)0; + uint64_t x = 1; + volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p; + uint64_t r = atomic_fetch_add(a, x) + x; + return (r == 0); +], [je_cv_c11atomics]) +if test "x${je_cv_c11atomics}" = "xyes" ; then + AC_DEFINE([JEMALLOC_C11ATOMICS]) +fi + dnl ============================================================================ dnl Check for atomic(9) operations as provided on FreeBSD. diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h index 8b743b88..23ac93ff 100644 --- a/include/jemalloc/internal/atomic.h +++ b/include/jemalloc/internal/atomic.h @@ -72,6 +72,20 @@ atomic_sub_uint64(uint64_t *p, uint64_t x) return (InterlockedExchangeAdd64(p, -((int64_t)x)) - x); } +# elif (defined(JEMALLOC_C11ATOMICS)) +JEMALLOC_INLINE uint64_t +atomic_add_uint64(uint64_t *p, uint64_t x) +{ + volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p; + return (atomic_fetch_add(a, x) + x); +} + +JEMALLOC_INLINE uint64_t +atomic_sub_uint64(uint64_t *p, uint64_t x) +{ + volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p; + return (atomic_fetch_sub(a, x) - x); +} # elif (defined(JEMALLOC_OSATOMIC)) JEMALLOC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x) @@ -187,6 +201,20 @@ atomic_sub_uint32(uint32_t *p, uint32_t x) return (InterlockedExchangeAdd(p, -((int32_t)x)) - x); } +# elif (defined(JEMALLOC_C11ATOMICS)) +JEMALLOC_INLINE uint32_t +atomic_add_uint32(uint32_t *p, uint32_t x) +{ + volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p; + return (atomic_fetch_add(a, x) + x); +} + +JEMALLOC_INLINE uint32_t +atomic_sub_uint32(uint32_t *p, uint32_t x) +{ + volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p; + return (atomic_fetch_sub(a, x) - x); +} #elif (defined(JEMALLOC_OSATOMIC)) JEMALLOC_INLINE uint32_t atomic_add_uint32(uint32_t *p, uint32_t x) diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 6f13093f..bf10617e 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -127,6 +127,10 @@ static const bool config_ivsalloc = #endif ; +#ifdef JEMALLOC_C11ATOMICS +#include +#endif + #ifdef JEMALLOC_ATOMIC9 #include #endif diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in index dccbb1ed..2923e83f 100644 --- a/include/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in @@ -22,6 +22,9 @@ */ #undef CPU_SPINWAIT +/* Defined if C11 atomics are available. */ +#undef JEMALLOC_C11ATOMICS + /* Defined if the equivalent of FreeBSD's atomic(9) functions are available. */ #undef JEMALLOC_ATOMIC9 From e12eaf93dca308a426c182956197b0eeb5f2cff3 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 8 Dec 2014 14:40:14 -0800 Subject: [PATCH 0601/3378] Style and spelling fixes. --- include/jemalloc/internal/arena.h | 5 ++--- include/jemalloc/internal/extent.h | 2 +- include/jemalloc/internal/hash.h | 5 +++-- include/jemalloc/internal/jemalloc_internal.h.in | 2 +- .../jemalloc/internal/jemalloc_internal_decls.h | 2 +- .../jemalloc/internal/jemalloc_internal_defs.h.in | 14 ++++++-------- include/jemalloc/internal/ql.h | 4 +--- include/jemalloc/internal/qr.h | 6 ++++-- include/jemalloc/internal/rb.h | 4 ++-- include/jemalloc/internal/tcache.h | 2 +- include/jemalloc/internal/util.h | 6 +++--- src/arena.c | 2 +- src/chunk.c | 2 +- src/ckh.c | 4 ++-- src/jemalloc.c | 2 +- src/quarantine.c | 4 ++-- src/zone.c | 2 +- test/include/test/math.h | 2 +- test/include/test/thd.h | 2 +- test/include/test/timer.h | 4 +--- 20 files changed, 36 insertions(+), 40 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index a42522d6..1e190234 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -263,8 +263,7 @@ struct arena_s { /* * There are three classes of arena operations from a locking * perspective: - * 1) Thread asssignment (modifies nthreads) is protected by - * arenas_lock. + * 1) Thread assignment (modifies nthreads) is protected by arenas_lock. * 2) Bin-related operations are protected by bin locks. * 3) Chunk- and run-related operations are protected by this mutex. */ @@ -314,7 +313,7 @@ struct arena_s { arena_chunk_miscelms_t runs_dirty; /* - * user-configureable chunk allocation and deallocation functions. + * User-configurable chunk allocation and deallocation functions. */ chunk_alloc_t *chunk_alloc; chunk_dalloc_t *chunk_dalloc; diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h index 5b00076f..cbfc20a9 100644 --- a/include/jemalloc/internal/extent.h +++ b/include/jemalloc/internal/extent.h @@ -24,7 +24,7 @@ struct extent_node_s { /* Total region size. */ size_t size; - /* Arena from which this extent came, if any */ + /* Arena from which this extent came, if any. */ arena_t *arena; /* True if zero-filled; used by chunk recycling code. */ diff --git a/include/jemalloc/internal/hash.h b/include/jemalloc/internal/hash.h index a43bbbec..bcead337 100644 --- a/include/jemalloc/internal/hash.h +++ b/include/jemalloc/internal/hash.h @@ -35,13 +35,14 @@ JEMALLOC_INLINE uint32_t hash_rotl_32(uint32_t x, int8_t r) { - return (x << r) | (x >> (32 - r)); + return ((x << r) | (x >> (32 - r))); } JEMALLOC_INLINE uint64_t hash_rotl_64(uint64_t x, int8_t r) { - return (x << r) | (x >> (64 - r)); + + return ((x << r) | (x >> (64 - r))); } JEMALLOC_INLINE uint32_t diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index bf10617e..9bd501c0 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -302,7 +302,7 @@ typedef unsigned index_t; #define ALIGNMENT_CEILING(s, alignment) \ (((s) + (alignment - 1)) & (-(alignment))) -/* Declare a variable length array */ +/* Declare a variable-length array. */ #if __STDC_VERSION__ < 199901L # ifdef _MSC_VER # include diff --git a/include/jemalloc/internal/jemalloc_internal_decls.h b/include/jemalloc/internal/jemalloc_internal_decls.h index fa590404..fb2effbf 100644 --- a/include/jemalloc/internal/jemalloc_internal_decls.h +++ b/include/jemalloc/internal/jemalloc_internal_decls.h @@ -50,7 +50,7 @@ typedef intptr_t ssize_t; # define PATH_MAX 1024 # define STDERR_FILENO 2 # define __func__ __FUNCTION__ -/* Disable warnings about deprecated system functions */ +/* Disable warnings about deprecated system functions. */ # pragma warning(disable: 4996) #else # include diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in index 2923e83f..e172c661 100644 --- a/include/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in @@ -38,7 +38,7 @@ * Defined if __sync_add_and_fetch(uint32_t *, uint32_t) and * __sync_sub_and_fetch(uint32_t *, uint32_t) are available, despite * __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 not being defined (which means the - * functions are defined in libgcc instead of being inlines) + * functions are defined in libgcc instead of being inlines). */ #undef JE_FORCE_SYNC_COMPARE_AND_SWAP_4 @@ -46,7 +46,7 @@ * Defined if __sync_add_and_fetch(uint64_t *, uint64_t) and * __sync_sub_and_fetch(uint64_t *, uint64_t) are available, despite * __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 not being defined (which means the - * functions are defined in libgcc instead of being inlines) + * functions are defined in libgcc instead of being inlines). */ #undef JE_FORCE_SYNC_COMPARE_AND_SWAP_8 @@ -201,9 +201,7 @@ #undef JEMALLOC_PURGE_MADVISE_DONTNEED #undef JEMALLOC_PURGE_MADVISE_FREE -/* - * Define if operating system has alloca.h header. - */ +/* Define if operating system has alloca.h header. */ #undef JEMALLOC_HAS_ALLOCA_H /* C99 restrict keyword supported. */ @@ -221,13 +219,13 @@ /* sizeof(intmax_t) == 2^LG_SIZEOF_INTMAX_T. */ #undef LG_SIZEOF_INTMAX_T -/* glibc malloc hooks (__malloc_hook, __realloc_hook, __free_hook) */ +/* glibc malloc hooks (__malloc_hook, __realloc_hook, __free_hook). */ #undef JEMALLOC_GLIBC_MALLOC_HOOK -/* glibc memalign hook */ +/* glibc memalign hook. */ #undef JEMALLOC_GLIBC_MEMALIGN_HOOK -/* adaptive mutex support in pthreads */ +/* Adaptive mutex support in pthreads. */ #undef JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP #endif /* JEMALLOC_INTERNAL_DEFS_H_ */ diff --git a/include/jemalloc/internal/ql.h b/include/jemalloc/internal/ql.h index f70c5f6f..1834bb85 100644 --- a/include/jemalloc/internal/ql.h +++ b/include/jemalloc/internal/ql.h @@ -1,6 +1,4 @@ -/* - * List definitions. - */ +/* List definitions. */ #define ql_head(a_type) \ struct { \ a_type *qlh_first; \ diff --git a/include/jemalloc/internal/qr.h b/include/jemalloc/internal/qr.h index 602944b9..0fbaec25 100644 --- a/include/jemalloc/internal/qr.h +++ b/include/jemalloc/internal/qr.h @@ -40,8 +40,10 @@ struct { \ (a_qr_b)->a_field.qre_prev = t; \ } while (0) -/* qr_meld() and qr_split() are functionally equivalent, so there's no need to - * have two copies of the code. */ +/* + * qr_meld() and qr_split() are functionally equivalent, so there's no need to + * have two copies of the code. + */ #define qr_split(a_qr_a, a_qr_b, a_field) \ qr_meld((a_qr_a), (a_qr_b), a_field) diff --git a/include/jemalloc/internal/rb.h b/include/jemalloc/internal/rb.h index 64fab89c..2ca8e593 100644 --- a/include/jemalloc/internal/rb.h +++ b/include/jemalloc/internal/rb.h @@ -200,7 +200,7 @@ a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \ * int (a_cmp *)(a_type *a_node, a_type *a_other); * ^^^^^^ * or a_key - * Interpretation of comparision function return values: + * Interpretation of comparison function return values: * -1 : a_node < a_other * 0 : a_node == a_other * 1 : a_node > a_other @@ -693,7 +693,7 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ rbtn_rotate_left(a_type, a_field, pathp->node, \ tnode); \ /* Balance restored, but rotation modified */\ - /* subree root, which may actually be the tree */\ + /* subtree root, which may actually be the tree */\ /* root. */\ if (pathp == path) { \ /* Set root. */ \ diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index fe9c47e8..3a3fd496 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -69,7 +69,7 @@ struct tcache_bin_s { struct tcache_s { ql_elm(tcache_t) link; /* Used for aggregating stats. */ - uint64_t prof_accumbytes;/* Cleared after arena_prof_accum() */ + uint64_t prof_accumbytes;/* Cleared after arena_prof_accum(). */ arena_t *arena; /* This thread's arena. */ unsigned ev_cnt; /* Event count since incremental GC. */ index_t next_gc_bin; /* Next bin to GC. */ diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h index 5af68329..b2b4ab74 100644 --- a/include/jemalloc/internal/util.h +++ b/include/jemalloc/internal/util.h @@ -127,7 +127,7 @@ int get_errno(void); #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_UTIL_C_)) -/* Sanity check: */ +/* Sanity check. */ #if !defined(JEMALLOC_INTERNAL_FFSL) || !defined(JEMALLOC_INTERNAL_FFS) # error Both JEMALLOC_INTERNAL_FFSL && JEMALLOC_INTERNAL_FFS should have been defined by configure #endif @@ -231,7 +231,7 @@ lg_floor(size_t x) } #endif -/* Sets error code */ +/* Set error code. */ JEMALLOC_INLINE void set_errno(int errnum) { @@ -243,7 +243,7 @@ set_errno(int errnum) #endif } -/* Get last error code */ +/* Get last error code. */ JEMALLOC_INLINE int get_errno(void) { diff --git a/src/arena.c b/src/arena.c index f351c090..6f2410ac 100644 --- a/src/arena.c +++ b/src/arena.c @@ -36,7 +36,7 @@ arena_miscelm_to_bits(arena_chunk_map_misc_t *miscelm) arena_chunk_t *chunk = CHUNK_ADDR2BASE(miscelm); size_t pageind = arena_miscelm_to_pageind(miscelm); - return arena_mapbits_get(chunk, pageind); + return (arena_mapbits_get(chunk, pageind)); } JEMALLOC_INLINE_C int diff --git a/src/chunk.c b/src/chunk.c index b3737180..79264527 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -162,7 +162,7 @@ chunk_alloc_core(void *new_addr, size_t size, size_t alignment, bool base, if ((ret = chunk_recycle(&chunks_szad_mmap, &chunks_ad_mmap, new_addr, size, alignment, base, zero)) != NULL) return (ret); - /* requesting an address not implemented for chunk_alloc_mmap */ + /* Requesting an address not implemented for chunk_alloc_mmap(). */ if (new_addr == NULL && (ret = chunk_alloc_mmap(size, alignment, zero)) != NULL) return (ret); diff --git a/src/ckh.c b/src/ckh.c index 3a545966..db2ae392 100644 --- a/src/ckh.c +++ b/src/ckh.c @@ -367,10 +367,10 @@ ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh->count = 0; /* - * Find the minimum power of 2 that is large enough to fit aBaseCount + * Find the minimum power of 2 that is large enough to fit minitems * entries. We are using (2+,2) cuckoo hashing, which has an expected * maximum load factor of at least ~0.86, so 0.75 is a conservative load - * factor that will typically allow 2^aLgMinItems to fit without ever + * factor that will typically allow mincells items to fit without ever * growing the table. */ assert(LG_CKH_BUCKET_CELLS > 0); diff --git a/src/jemalloc.c b/src/jemalloc.c index 8b2ab8d7..f7cc4575 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -808,7 +808,7 @@ malloc_conf_init(void) if (linklen == -1) { /* No configuration specified. */ linklen = 0; - /* restore errno */ + /* Restore errno. */ set_errno(saved_errno); } #endif diff --git a/src/quarantine.c b/src/quarantine.c index ddacc6ee..c5fa566b 100644 --- a/src/quarantine.c +++ b/src/quarantine.c @@ -2,7 +2,7 @@ #include "jemalloc/internal/jemalloc_internal.h" /* - * quarantine pointers close to NULL are used to encode state information that + * Quarantine pointers close to NULL are used to encode state information that * is used for cleaning up during thread shutdown. */ #define QUARANTINE_STATE_REINCARNATED ((quarantine_t *)(uintptr_t)1) @@ -49,7 +49,7 @@ quarantine_alloc_hook_work(tsd_t *tsd) quarantine = quarantine_init(tsd, LG_MAXOBJS_INIT); /* * Check again whether quarantine has been initialized, because - * qurantine_init() may have triggered recursive initialization. + * quarantine_init() may have triggered recursive initialization. */ if (tsd_quarantine_get(tsd) == NULL) tsd_quarantine_set(tsd, quarantine); diff --git a/src/zone.c b/src/zone.c index c6bd533f..12e1734a 100644 --- a/src/zone.c +++ b/src/zone.c @@ -263,7 +263,7 @@ register_zone(void) * after the default zone. On OSX < 10.6, there is no purgeable * zone, so this does nothing. On OSX >= 10.6, unregistering * replaces the purgeable zone with the last registered zone - * above, i.e the default zone. Registering it again then puts + * above, i.e. the default zone. Registering it again then puts * it at the end, obviously after the default zone. */ if (purgeable_zone) { diff --git a/test/include/test/math.h b/test/include/test/math.h index a862ed7d..b057b29a 100644 --- a/test/include/test/math.h +++ b/test/include/test/math.h @@ -299,7 +299,7 @@ pt_chi2(double p, double df, double ln_gamma_df_2) /* * Given a value p in [0..1] and Gamma distribution shape and scale parameters, - * compute the upper limit on the definite integeral from [0..z] that satisfies + * compute the upper limit on the definite integral from [0..z] that satisfies * p. */ JEMALLOC_INLINE double diff --git a/test/include/test/thd.h b/test/include/test/thd.h index f941d7a7..47a51262 100644 --- a/test/include/test/thd.h +++ b/test/include/test/thd.h @@ -1,4 +1,4 @@ -/* Abstraction layer for threading in tests */ +/* Abstraction layer for threading in tests. */ #ifdef _WIN32 typedef HANDLE thd_t; #else diff --git a/test/include/test/timer.h b/test/include/test/timer.h index 6877e4ac..496072ac 100644 --- a/test/include/test/timer.h +++ b/test/include/test/timer.h @@ -1,6 +1,4 @@ -/* - * Simple timer, for use in benchmark reporting. - */ +/* Simple timer, for use in benchmark reporting. */ #include From b74041fb6e279bd8bbc133250241249f90cd619f Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Tue, 9 Dec 2014 17:41:34 -0500 Subject: [PATCH 0602/3378] Ignore MALLOC_CONF in set{uid,gid,cap} binaries. This eliminates the malloc tunables as tools for an attacker. Closes #173 --- configure.ac | 18 +++++++++++++++ .../internal/jemalloc_internal_defs.h.in | 10 ++++++++ src/jemalloc.c | 23 ++++++++++++++++++- 3 files changed, 50 insertions(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 8b1e55e4..82bdefdd 100644 --- a/configure.ac +++ b/configure.ac @@ -1108,6 +1108,24 @@ fi CPPFLAGS="$CPPFLAGS -D_REENTRANT" +dnl Check if the GNU-specific secure_getenv function exists. +AC_CHECK_FUNC([secure_getenv], + [have_secure_getenv="1"], + [have_secure_getenv="0"] + ) +if test "x$have_secure_getenv" = "x1" ; then + AC_DEFINE([JEMALLOC_HAVE_SECURE_GETENV], [ ]) +fi + +dnl Check if the Solaris/BSD issetugid function exists. +AC_CHECK_FUNC([issetugid], + [have_issetugid="1"], + [have_issetugid="0"] + ) +if test "x$have_issetugid" = "x1" ; then + AC_DEFINE([JEMALLOC_HAVE_ISSETUGID], [ ]) +fi + dnl Check whether the BSD-specific _malloc_thread_cleanup() exists. If so, use dnl it rather than pthreads TSD cleanup functions to support cleanup during dnl thread exit, in order to avoid pthreads library recursion during diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in index e172c661..c8d7dafb 100644 --- a/include/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in @@ -66,6 +66,16 @@ */ #undef JEMALLOC_OSSPIN +/* + * Defined if secure_getenv(3) is available. + */ +#undef JEMALLOC_HAVE_SECURE_GETENV + +/* + * Defined if issetugid(2) is available. + */ +#undef JEMALLOC_HAVE_ISSETUGID + /* * Defined if _malloc_thread_cleanup() exists. At least in the case of * FreeBSD, pthread_key_create() allocates, which if used during malloc diff --git a/src/jemalloc.c b/src/jemalloc.c index f7cc4575..48de0da0 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -648,6 +648,27 @@ stats_print_atexit(void) * Begin initialization functions. */ +#ifndef JEMALLOC_HAVE_SECURE_GETENV +# ifdef JEMALLOC_HAVE_ISSETUGID +static char * +secure_getenv(const char *name) +{ + + if (issetugid() == 0) + return (getenv(name)); + else + return (NULL); +} +# else +static char * +secure_getenv(const char *name) +{ + + return (getenv(name)); +} +# endif +#endif + static unsigned malloc_ncpus(void) { @@ -824,7 +845,7 @@ malloc_conf_init(void) #endif ; - if ((opts = getenv(envname)) != NULL) { + if ((opts = secure_getenv(envname)) != NULL) { /* * Do nothing; opts is already initialized to * the value of the MALLOC_CONF environment From 2c5cb613dfbdf58f88152321b63e60c58cd23972 Mon Sep 17 00:00:00 2001 From: Guilherme Goncalves Date: Mon, 8 Dec 2014 19:12:41 -0200 Subject: [PATCH 0603/3378] Introduce two new modes of junk filling: "alloc" and "free". In addition to true/false, opt.junk can now be either "alloc" or "free", giving applications the possibility of junking memory only on allocation or deallocation. This resolves #172. --- Makefile.in | 2 + doc/jemalloc.xml.in | 20 ++++--- .../jemalloc/internal/jemalloc_internal.h.in | 4 +- include/jemalloc/internal/private_symbols.txt | 2 + include/jemalloc/internal/tcache.h | 10 ++-- src/arena.c | 55 ++++++++++--------- src/ctl.c | 2 +- src/huge.c | 12 ++-- src/jemalloc.c | 54 +++++++++++++++--- src/quarantine.c | 2 +- test/unit/junk.c | 41 +++++++++----- test/unit/junk_alloc.c | 3 + test/unit/junk_free.c | 3 + test/unit/mallctl.c | 2 +- 14 files changed, 140 insertions(+), 72 deletions(-) create mode 100644 test/unit/junk_alloc.c create mode 100644 test/unit/junk_free.c diff --git a/Makefile.in b/Makefile.in index 40644ce8..c268d002 100644 --- a/Makefile.in +++ b/Makefile.in @@ -118,6 +118,8 @@ TESTS_UNIT := $(srcroot)test/unit/atomic.c \ $(srcroot)test/unit/ckh.c \ $(srcroot)test/unit/hash.c \ $(srcroot)test/unit/junk.c \ + $(srcroot)test/unit/junk_alloc.c \ + $(srcroot)test/unit/junk_free.c \ $(srcroot)test/unit/lg_chunk.c \ $(srcroot)test/unit/mallctl.c \ $(srcroot)test/unit/math.c \ diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 71b4cd19..0148f038 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -930,18 +930,20 @@ for (i = 0; i < nbins; i++) { opt.junk - (bool) + (const char *) r- [] - Junk filling enabled/disabled. If enabled, each byte - of uninitialized allocated memory will be initialized to - 0xa5. All deallocated memory will be initialized to - 0x5a. This is intended for debugging and will - impact performance negatively. This option is disabled by default - unless is specified during - configuration, in which case it is enabled by default unless running - inside Junk filling. If set to "alloc", each byte of + uninitialized allocated memory will be initialized to + 0xa5. If set to "free", all deallocated memory will + be initialized to 0x5a. If set to "true", both + allocated and deallocated memory will be initialized, and if set to + "false", junk filling be disabled entirely. This is intended for + debugging and will impact performance negatively. This option is + "false" by default unless is specified + during configuration, in which case it is "true" by default unless + running inside Valgrind. diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 9bd501c0..b7617dfd 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -376,7 +376,9 @@ typedef unsigned index_t; #define JEMALLOC_H_EXTERNS extern bool opt_abort; -extern bool opt_junk; +extern const char *opt_junk; +extern bool opt_junk_alloc; +extern bool opt_junk_free; extern size_t opt_quarantine; extern bool opt_redzone; extern bool opt_utrace; diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index ee973c9f..7e339152 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -274,6 +274,8 @@ nhbins opt_abort opt_dss opt_junk +opt_junk_alloc +opt_junk_free opt_lg_chunk opt_lg_dirty_mult opt_lg_prof_interval diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index 3a3fd496..6e97b3dd 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -252,14 +252,14 @@ tcache_alloc_small(tcache_t *tcache, size_t size, bool zero) if (likely(!zero)) { if (config_fill) { - if (unlikely(opt_junk)) { + if (unlikely(opt_junk_alloc)) { arena_alloc_junk_small(ret, &arena_bin_info[binind], false); } else if (unlikely(opt_zero)) memset(ret, 0, usize); } } else { - if (config_fill && unlikely(opt_junk)) { + if (config_fill && unlikely(opt_junk_alloc)) { arena_alloc_junk_small(ret, &arena_bin_info[binind], true); } @@ -307,7 +307,7 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) } if (likely(!zero)) { if (config_fill) { - if (unlikely(opt_junk)) + if (unlikely(opt_junk_alloc)) memset(ret, 0xa5, usize); else if (unlikely(opt_zero)) memset(ret, 0, usize); @@ -333,7 +333,7 @@ tcache_dalloc_small(tcache_t *tcache, void *ptr, index_t binind) assert(tcache_salloc(ptr) <= SMALL_MAXCLASS); - if (config_fill && unlikely(opt_junk)) + if (config_fill && unlikely(opt_junk_free)) arena_dalloc_junk_small(ptr, &arena_bin_info[binind]); tbin = &tcache->tbins[binind]; @@ -362,7 +362,7 @@ tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size) binind = size2index(size); - if (config_fill && unlikely(opt_junk)) + if (config_fill && unlikely(opt_junk_free)) arena_dalloc_junk_large(ptr, size); tbin = &tcache->tbins[binind]; diff --git a/src/arena.c b/src/arena.c index 6f2410ac..bf789950 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1450,7 +1450,7 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, index_t binind, } break; } - if (config_fill && unlikely(opt_junk)) { + if (config_fill && unlikely(opt_junk_alloc)) { arena_alloc_junk_small(ptr, &arena_bin_info[binind], true); } @@ -1512,24 +1512,27 @@ arena_redzones_validate(void *ptr, arena_bin_info_t *bin_info, bool reset) size_t i; bool error = false; - for (i = 1; i <= redzone_size; i++) { - uint8_t *byte = (uint8_t *)((uintptr_t)ptr - i); - if (*byte != 0xa5) { - error = true; - arena_redzone_corruption(ptr, size, false, i, *byte); - if (reset) - *byte = 0xa5; - } - } - for (i = 0; i < redzone_size; i++) { - uint8_t *byte = (uint8_t *)((uintptr_t)ptr + size + i); - if (*byte != 0xa5) { - error = true; - arena_redzone_corruption(ptr, size, true, i, *byte); - if (reset) - *byte = 0xa5; + if (opt_junk_alloc) { + for (i = 1; i <= redzone_size; i++) { + uint8_t *byte = (uint8_t *)((uintptr_t)ptr - i); + if (*byte != 0xa5) { + error = true; + arena_redzone_corruption(ptr, size, false, i, *byte); + if (reset) + *byte = 0xa5; + } + } + for (i = 0; i < redzone_size; i++) { + uint8_t *byte = (uint8_t *)((uintptr_t)ptr + size + i); + if (*byte != 0xa5) { + error = true; + arena_redzone_corruption(ptr, size, true, i, *byte); + if (reset) + *byte = 0xa5; + } } } + if (opt_abort && error) abort(); } @@ -1560,7 +1563,7 @@ arena_quarantine_junk_small(void *ptr, size_t usize) index_t binind; arena_bin_info_t *bin_info; cassert(config_fill); - assert(opt_junk); + assert(opt_junk_free); assert(opt_quarantine); assert(usize <= SMALL_MAXCLASS); @@ -1604,7 +1607,7 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero) if (!zero) { if (config_fill) { - if (unlikely(opt_junk)) { + if (unlikely(opt_junk_alloc)) { arena_alloc_junk_small(ret, &arena_bin_info[binind], false); } else if (unlikely(opt_zero)) @@ -1612,7 +1615,7 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero) } JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, size); } else { - if (config_fill && unlikely(opt_junk)) { + if (config_fill && unlikely(opt_junk_alloc)) { arena_alloc_junk_small(ret, &arena_bin_info[binind], true); } @@ -1660,7 +1663,7 @@ arena_malloc_large(arena_t *arena, size_t size, bool zero) if (!zero) { if (config_fill) { - if (unlikely(opt_junk)) + if (unlikely(opt_junk_alloc)) memset(ret, 0xa5, usize); else if (unlikely(opt_zero)) memset(ret, 0, usize); @@ -1732,7 +1735,7 @@ arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero) malloc_mutex_unlock(&arena->lock); if (config_fill && !zero) { - if (unlikely(opt_junk)) + if (unlikely(opt_junk_alloc)) memset(ret, 0xa5, size); else if (unlikely(opt_zero)) memset(ret, 0, size); @@ -1845,7 +1848,7 @@ arena_dalloc_bin_locked_impl(arena_t *arena, arena_chunk_t *chunk, void *ptr, bin = &arena->bins[binind]; bin_info = &arena_bin_info[binind]; - if (!junked && config_fill && unlikely(opt_junk)) + if (!junked && config_fill && unlikely(opt_junk_free)) arena_dalloc_junk_small(ptr, bin_info); arena_run_reg_dalloc(run, ptr); @@ -1908,7 +1911,7 @@ void arena_dalloc_junk_large(void *ptr, size_t usize) { - if (config_fill && unlikely(opt_junk)) + if (config_fill && unlikely(opt_junk_free)) memset(ptr, 0x5a, usize); } #ifdef JEMALLOC_JET @@ -2079,7 +2082,7 @@ static void arena_ralloc_junk_large(void *ptr, size_t old_usize, size_t usize) { - if (config_fill && unlikely(opt_junk)) { + if (config_fill && unlikely(opt_junk_free)) { memset((void *)((uintptr_t)ptr + usize), 0x5a, old_usize - usize); } @@ -2126,7 +2129,7 @@ arena_ralloc_large(void *ptr, size_t oldsize, size_t size, size_t extra, bool ret = arena_ralloc_large_grow(arena, chunk, ptr, oldsize, size, extra, zero); if (config_fill && !ret && !zero) { - if (unlikely(opt_junk)) { + if (unlikely(opt_junk_alloc)) { memset((void *)((uintptr_t)ptr + oldsize), 0xa5, isalloc(ptr, config_prof) - oldsize); diff --git a/src/ctl.c b/src/ctl.c index b367c9f6..90bad7ee 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -1234,7 +1234,7 @@ CTL_RO_NL_GEN(opt_lg_chunk, opt_lg_chunk, size_t) CTL_RO_NL_GEN(opt_narenas, opt_narenas, size_t) CTL_RO_NL_GEN(opt_lg_dirty_mult, opt_lg_dirty_mult, ssize_t) CTL_RO_NL_GEN(opt_stats_print, opt_stats_print, bool) -CTL_RO_NL_CGEN(config_fill, opt_junk, opt_junk, bool) +CTL_RO_NL_CGEN(config_fill, opt_junk, opt_junk, const char *) CTL_RO_NL_CGEN(config_fill, opt_quarantine, opt_quarantine, size_t) CTL_RO_NL_CGEN(config_fill, opt_redzone, opt_redzone, bool) CTL_RO_NL_CGEN(config_fill, opt_zero, opt_zero, bool) diff --git a/src/huge.c b/src/huge.c index 68839037..416cb172 100644 --- a/src/huge.c +++ b/src/huge.c @@ -67,7 +67,7 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, if (zero || (config_fill && unlikely(opt_zero))) { if (!is_zeroed) memset(ret, 0, usize); - } else if (config_fill && unlikely(opt_junk)) + } else if (config_fill && unlikely(opt_junk_alloc)) memset(ret, 0xa5, usize); return (ret); @@ -81,7 +81,7 @@ static void huge_dalloc_junk(void *ptr, size_t usize) { - if (config_fill && have_dss && unlikely(opt_junk)) { + if (config_fill && have_dss && unlikely(opt_junk_free)) { /* * Only bother junk filling if the chunk isn't about to be * unmapped. @@ -117,7 +117,7 @@ huge_ralloc_no_move_similar(void *ptr, size_t oldsize, size_t usize, size_t sdiff = CHUNK_CEILING(usize) - usize; zeroed = (sdiff != 0) ? !pages_purge((void *)((uintptr_t)ptr + usize), sdiff) : true; - if (config_fill && unlikely(opt_junk)) { + if (config_fill && unlikely(opt_junk_free)) { memset((void *)((uintptr_t)ptr + usize), 0x5a, oldsize - usize); zeroed = false; @@ -147,7 +147,7 @@ huge_ralloc_no_move_similar(void *ptr, size_t oldsize, size_t usize, memset((void *)((uintptr_t)ptr + oldsize), 0, usize - oldsize); } - } else if (config_fill && unlikely(opt_junk)) { + } else if (config_fill && unlikely(opt_junk_alloc)) { memset((void *)((uintptr_t)ptr + oldsize), 0xa5, usize - oldsize); } @@ -165,7 +165,7 @@ huge_ralloc_no_move_shrink(void *ptr, size_t oldsize, size_t usize) sdiff = CHUNK_CEILING(usize) - usize; zeroed = (sdiff != 0) ? !pages_purge((void *)((uintptr_t)ptr + usize), sdiff) : true; - if (config_fill && unlikely(opt_junk)) { + if (config_fill && unlikely(opt_junk_free)) { huge_dalloc_junk((void *)((uintptr_t)ptr + usize), oldsize - usize); zeroed = false; @@ -234,7 +234,7 @@ huge_ralloc_no_move_expand(void *ptr, size_t oldsize, size_t size, bool zero) { CHUNK_CEILING(oldsize)), 0, usize - CHUNK_CEILING(oldsize)); } - } else if (config_fill && unlikely(opt_junk)) { + } else if (config_fill && unlikely(opt_junk_alloc)) { memset((void *)((uintptr_t)ptr + oldsize), 0xa5, usize - oldsize); } diff --git a/src/jemalloc.c b/src/jemalloc.c index 48de0da0..e63dab3e 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -13,13 +13,28 @@ bool opt_abort = false #endif ; -bool opt_junk = +const char *opt_junk = +#if (defined(JEMALLOC_DEBUG) && defined(JEMALLOC_FILL)) + "true" +#else + "false" +#endif + ; +bool opt_junk_alloc = #if (defined(JEMALLOC_DEBUG) && defined(JEMALLOC_FILL)) true #else false #endif ; +bool opt_junk_free = +#if (defined(JEMALLOC_DEBUG) && defined(JEMALLOC_FILL)) + true +#else + false +#endif + ; + size_t opt_quarantine = ZU(0); bool opt_redzone = false; bool opt_utrace = false; @@ -784,7 +799,9 @@ malloc_conf_init(void) if (config_valgrind) { in_valgrind = (RUNNING_ON_VALGRIND != 0) ? true : false; if (config_fill && unlikely(in_valgrind)) { - opt_junk = false; + opt_junk = "false"; + opt_junk_alloc = false; + opt_junk_free = false; assert(!opt_zero); opt_quarantine = JEMALLOC_VALGRIND_QUARANTINE_DEFAULT; opt_redzone = true; @@ -867,13 +884,13 @@ malloc_conf_init(void) &vlen)) { #define CONF_MATCH(n) \ (sizeof(n)-1 == klen && strncmp(n, k, klen) == 0) +#define CONF_MATCH_VALUE(n) \ + (sizeof(n)-1 == vlen && strncmp(n, v, vlen) == 0) #define CONF_HANDLE_BOOL(o, n, cont) \ if (CONF_MATCH(n)) { \ - if (strncmp("true", v, vlen) == 0 && \ - vlen == sizeof("true")-1) \ + if (CONF_MATCH_VALUE("true")) \ o = true; \ - else if (strncmp("false", v, vlen) == \ - 0 && vlen == sizeof("false")-1) \ + else if (CONF_MATCH_VALUE("false")) \ o = false; \ else { \ malloc_conf_error( \ @@ -987,7 +1004,30 @@ malloc_conf_init(void) -1, (sizeof(size_t) << 3) - 1) CONF_HANDLE_BOOL(opt_stats_print, "stats_print", true) if (config_fill) { - CONF_HANDLE_BOOL(opt_junk, "junk", true) + if (CONF_MATCH("junk")) { + if (CONF_MATCH_VALUE("true")) { + opt_junk = "true"; + opt_junk_alloc = opt_junk_free = + true; + } else if (CONF_MATCH_VALUE("false")) { + opt_junk = "false"; + opt_junk_alloc = opt_junk_free = + false; + } else if (CONF_MATCH_VALUE("alloc")) { + opt_junk = "alloc"; + opt_junk_alloc = true; + opt_junk_free = false; + } else if (CONF_MATCH_VALUE("free")) { + opt_junk = "free"; + opt_junk_alloc = false; + opt_junk_free = true; + } else { + malloc_conf_error( + "Invalid conf value", k, + klen, v, vlen); + } + continue; + } CONF_HANDLE_SIZE_T(opt_quarantine, "quarantine", 0, SIZE_T_MAX, false) CONF_HANDLE_BOOL(opt_redzone, "redzone", true) diff --git a/src/quarantine.c b/src/quarantine.c index c5fa566b..12c37e0a 100644 --- a/src/quarantine.c +++ b/src/quarantine.c @@ -148,7 +148,7 @@ quarantine(tsd_t *tsd, void *ptr) obj->usize = usize; quarantine->curbytes += usize; quarantine->curobjs++; - if (config_fill && unlikely(opt_junk)) { + if (config_fill && unlikely(opt_junk_free)) { /* * Only do redzone validation if Valgrind isn't in * operation. diff --git a/test/unit/junk.c b/test/unit/junk.c index 1522a610..733f661e 100644 --- a/test/unit/junk.c +++ b/test/unit/junk.c @@ -1,8 +1,11 @@ #include "test/jemalloc_test.h" #ifdef JEMALLOC_FILL +# ifndef JEMALLOC_TEST_JUNK_OPT +# define JEMALLOC_TEST_JUNK_OPT "junk:true" +# endif const char *malloc_conf = - "abort:false,junk:true,zero:false,redzone:true,quarantine:0"; + "abort:false,zero:false,redzone:true,quarantine:0," JEMALLOC_TEST_JUNK_OPT; #endif static arena_dalloc_junk_small_t *arena_dalloc_junk_small_orig; @@ -69,12 +72,14 @@ test_junk(size_t sz_min, size_t sz_max) char *s; size_t sz_prev, sz, i; - arena_dalloc_junk_small_orig = arena_dalloc_junk_small; - arena_dalloc_junk_small = arena_dalloc_junk_small_intercept; - arena_dalloc_junk_large_orig = arena_dalloc_junk_large; - arena_dalloc_junk_large = arena_dalloc_junk_large_intercept; - huge_dalloc_junk_orig = huge_dalloc_junk; - huge_dalloc_junk = huge_dalloc_junk_intercept; + if (opt_junk_free) { + arena_dalloc_junk_small_orig = arena_dalloc_junk_small; + arena_dalloc_junk_small = arena_dalloc_junk_small_intercept; + arena_dalloc_junk_large_orig = arena_dalloc_junk_large; + arena_dalloc_junk_large = arena_dalloc_junk_large_intercept; + huge_dalloc_junk_orig = huge_dalloc_junk; + huge_dalloc_junk = huge_dalloc_junk_intercept; + } sz_prev = 0; s = (char *)mallocx(sz_min, 0); @@ -92,9 +97,11 @@ test_junk(size_t sz_min, size_t sz_max) } for (i = sz_prev; i < sz; i++) { - assert_c_eq(s[i], 0xa5, - "Newly allocated byte %zu/%zu isn't junk-filled", - i, sz); + if (opt_junk_alloc) { + assert_c_eq(s[i], 0xa5, + "Newly allocated byte %zu/%zu isn't " + "junk-filled", i, sz); + } s[i] = 'a'; } @@ -103,7 +110,7 @@ test_junk(size_t sz_min, size_t sz_max) s = (char *)rallocx(s, sz+1, 0); assert_ptr_not_null((void *)s, "Unexpected rallocx() failure"); - assert_true(saw_junking, + assert_true(!opt_junk_free || saw_junking, "Expected region of size %zu to be junk-filled", sz); } @@ -111,12 +118,14 @@ test_junk(size_t sz_min, size_t sz_max) watch_junking(s); dallocx(s, 0); - assert_true(saw_junking, + assert_true(!opt_junk_free || saw_junking, "Expected region of size %zu to be junk-filled", sz); - arena_dalloc_junk_small = arena_dalloc_junk_small_orig; - arena_dalloc_junk_large = arena_dalloc_junk_large_orig; - huge_dalloc_junk = huge_dalloc_junk_orig; + if (opt_junk_free) { + arena_dalloc_junk_small = arena_dalloc_junk_small_orig; + arena_dalloc_junk_large = arena_dalloc_junk_large_orig; + huge_dalloc_junk = huge_dalloc_junk_orig; + } } TEST_BEGIN(test_junk_small) @@ -204,6 +213,7 @@ TEST_BEGIN(test_junk_redzone) arena_redzone_corruption_t *arena_redzone_corruption_orig; test_skip_if(!config_fill); + test_skip_if(!opt_junk_alloc || !opt_junk_free); arena_redzone_corruption_orig = arena_redzone_corruption; arena_redzone_corruption = arena_redzone_corruption_replacement; @@ -234,6 +244,7 @@ int main(void) { + assert(opt_junk_alloc || opt_junk_free); return (test( test_junk_small, test_junk_large, diff --git a/test/unit/junk_alloc.c b/test/unit/junk_alloc.c new file mode 100644 index 00000000..8db3331d --- /dev/null +++ b/test/unit/junk_alloc.c @@ -0,0 +1,3 @@ +#define JEMALLOC_TEST_JUNK_OPT "junk:alloc" +#include "junk.c" +#undef JEMALLOC_TEST_JUNK_OPT diff --git a/test/unit/junk_free.c b/test/unit/junk_free.c new file mode 100644 index 00000000..482a61d0 --- /dev/null +++ b/test/unit/junk_free.c @@ -0,0 +1,3 @@ +#define JEMALLOC_TEST_JUNK_OPT "junk:free" +#include "junk.c" +#undef JEMALLOC_TEST_JUNK_OPT diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c index 028a9710..f4b7d1ab 100644 --- a/test/unit/mallctl.c +++ b/test/unit/mallctl.c @@ -164,7 +164,7 @@ TEST_BEGIN(test_mallctl_opt) TEST_MALLCTL_OPT(size_t, narenas, always); TEST_MALLCTL_OPT(ssize_t, lg_dirty_mult, always); TEST_MALLCTL_OPT(bool, stats_print, always); - TEST_MALLCTL_OPT(bool, junk, fill); + TEST_MALLCTL_OPT(const char *, junk, fill); TEST_MALLCTL_OPT(size_t, quarantine, fill); TEST_MALLCTL_OPT(bool, redzone, fill); TEST_MALLCTL_OPT(bool, zero, fill); From b4acf7300a4ca3423ca36fe227e9bc2e23f25b9f Mon Sep 17 00:00:00 2001 From: Bert Maher Date: Fri, 24 Oct 2014 14:09:42 -0700 Subject: [PATCH 0604/3378] [pprof] Produce global profile unless thread-local profile requested Currently pprof will print output for all threads if a single thread is not specified, but this doesn't play well with many output formats (e.g., any of the dot-based formats). Instead, default to printing just the overall profile when no specific thread is requested. This resolves #157. --- bin/pprof | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/bin/pprof b/bin/pprof index 5a4c6cd7..df503aea 100755 --- a/bin/pprof +++ b/bin/pprof @@ -404,7 +404,7 @@ sub Init() { "edgefraction=f" => \$main::opt_edgefraction, "maxdegree=i" => \$main::opt_maxdegree, "focus=s" => \$main::opt_focus, - "thread=i" => \$main::opt_thread, + "thread=s" => \$main::opt_thread, "ignore=s" => \$main::opt_ignore, "scale=i" => \$main::opt_scale, "heapcheck" => \$main::opt_heapcheck, @@ -707,7 +707,8 @@ sub Main() { } if (defined($data->{threads})) { foreach my $thread (sort { $a <=> $b } keys(%{$data->{threads}})) { - if (!defined($main::opt_thread) || $main::opt_thread == $thread) { + if (defined($main::opt_thread) && + ($main::opt_thread eq '*' || $main::opt_thread == $thread)) { my $thread_profile = $data->{threads}{$thread}; FilterAndPrint($thread_profile, $symbols, $libs, $thread); } From 9c6a8d3b0cc14fd26b119ad08f190e537771464f Mon Sep 17 00:00:00 2001 From: Guilherme Goncalves Date: Wed, 17 Dec 2014 14:46:35 -0200 Subject: [PATCH 0605/3378] Move variable declaration to the top its block for MSVC compatibility. --- src/arena.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/arena.c b/src/arena.c index bf789950..1eb4000a 100644 --- a/src/arena.c +++ b/src/arena.c @@ -2022,6 +2022,7 @@ arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr, * following run, then merge the first part with the existing * allocation. */ + arena_run_t *run; size_t flag_dirty, splitsize, usize; usize = s2u(size + extra); @@ -2030,8 +2031,7 @@ arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr, assert(usize >= usize_min); splitsize = usize - oldsize; - arena_run_t *run = &arena_miscelm_get(chunk, - pageind+npages)->run; + run = &arena_miscelm_get(chunk, pageind+npages)->run; arena_run_split_large(arena, run, splitsize, zero); size = oldsize + splitsize; From b7b44dfad09186cf74080818075eb0bfc0805e3b Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Thu, 18 Dec 2014 15:12:53 +0900 Subject: [PATCH 0606/3378] Make mixed declarations an error It often happens that code changes introduce mixed declarations, that then break building with Visual Studio. Since the code style is to not use mixed declarations anyways, we might as well enforce it with -Werror. --- configure.ac | 1 + 1 file changed, 1 insertion(+) diff --git a/configure.ac b/configure.ac index 82bdefdd..95133c49 100644 --- a/configure.ac +++ b/configure.ac @@ -134,6 +134,7 @@ if test "x$CFLAGS" = "x" ; then AC_DEFINE_UNQUOTED([JEMALLOC_HAS_RESTRICT]) fi JE_CFLAGS_APPEND([-Wall]) + JE_CFLAGS_APPEND([-Werror=declaration-after-statement]) JE_CFLAGS_APPEND([-pipe]) JE_CFLAGS_APPEND([-g3]) elif test "x$je_cv_msvc" = "xyes" ; then From 51f86346c000aa2a44abaab08caeedcb151e6556 Mon Sep 17 00:00:00 2001 From: Guilherme Goncalves Date: Thu, 18 Dec 2014 15:01:21 +0900 Subject: [PATCH 0607/3378] Add a isblank definition for MSVC < 2013 --- include/jemalloc/internal/jemalloc_internal_decls.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/include/jemalloc/internal/jemalloc_internal_decls.h b/include/jemalloc/internal/jemalloc_internal_decls.h index fb2effbf..b10561c6 100644 --- a/include/jemalloc/internal/jemalloc_internal_decls.h +++ b/include/jemalloc/internal/jemalloc_internal_decls.h @@ -52,6 +52,14 @@ typedef intptr_t ssize_t; # define __func__ __FUNCTION__ /* Disable warnings about deprecated system functions. */ # pragma warning(disable: 4996) +#if _MSC_VER < 1800 +static int +isblank(int c) +{ + + return (c == '\t' || c == ' '); +} +#endif #else # include #endif From 24057f3da8cd1b23955068a368165eba2eefb5c4 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 14 Jan 2015 16:27:31 -0800 Subject: [PATCH 0608/3378] Fix an infinite recursion bug related to a0/tsd bootstrapping. This resolves #184. --- src/chunk.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/chunk.c b/src/chunk.c index 79264527..b9a24416 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -254,7 +254,9 @@ chunk_alloc_default(void *new_addr, size_t size, size_t alignment, bool *zero, { arena_t *arena; - arena = arena_get(tsd_fetch(), arena_ind, false, true); + /* Dodge tsd for a0 in order to avoid bootstrapping issues. */ + arena = (arena_ind == 0) ? a0get() : arena_get(tsd_fetch(), arena_ind, + false, true); /* * The arena we're allocating on behalf of must have been initialized * already. From 44b57b8e8b25797b94c7cccc0e32705f76fcf03b Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 16 Jan 2015 18:04:17 -0800 Subject: [PATCH 0609/3378] Fix OOM handling in memalign() and valloc(). Fix memalign() and valloc() to heed imemalign()'s return value. Reported by Kurt Wampler. --- src/jemalloc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/jemalloc.c b/src/jemalloc.c index e63dab3e..aecdce34 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1751,7 +1751,8 @@ void * je_memalign(size_t alignment, size_t size) { void *ret JEMALLOC_CC_SILENCE_INIT(NULL); - imemalign(&ret, alignment, size, 1); + if (unlikely(imemalign(&ret, alignment, size, 1) != 0)) + ret = NULL; JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, size, false); return (ret); } @@ -1762,7 +1763,8 @@ void * je_valloc(size_t size) { void *ret JEMALLOC_CC_SILENCE_INIT(NULL); - imemalign(&ret, PAGE, size, 1); + if (unlikely(imemalign(&ret, PAGE, size, 1) != 0)) + ret = NULL; JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, size, false); return (ret); } From b617df81bbd35b2d7124b16df4024f9541644f6e Mon Sep 17 00:00:00 2001 From: Abhishek Kulkarni Date: Wed, 21 Jan 2015 15:02:42 -0500 Subject: [PATCH 0610/3378] Add missing symbols to private_symbols.txt. This resolves #185. --- include/jemalloc/internal/private_symbols.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index 7e339152..39132b24 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -158,6 +158,7 @@ ctl_postfork_child ctl_postfork_parent ctl_prefork dss_prec_names +extent_tree_ad_empty extent_tree_ad_first extent_tree_ad_insert extent_tree_ad_iter @@ -174,6 +175,7 @@ extent_tree_ad_reverse_iter extent_tree_ad_reverse_iter_recurse extent_tree_ad_reverse_iter_start extent_tree_ad_search +extent_tree_szad_empty extent_tree_szad_first extent_tree_szad_insert extent_tree_szad_iter @@ -289,6 +291,7 @@ opt_prof_final opt_prof_gdump opt_prof_leak opt_prof_prefix +opt_prof_thread_active_init opt_quarantine opt_redzone opt_stats_print @@ -332,6 +335,7 @@ prof_tctx_set prof_tdata_cleanup prof_tdata_get prof_tdata_init +prof_tdata_reinit prof_thread_active_get prof_thread_active_init_get prof_thread_active_init_set From bc96876f99e89705817630b503ac54a5c48789ab Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 21 Jan 2015 09:01:43 -0800 Subject: [PATCH 0611/3378] Fix arenas_cache_cleanup(). Fix arenas_cache_cleanup() to check whether arenas_cache is NULL before deallocation, rather than checking arenas. --- src/jemalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/jemalloc.c b/src/jemalloc.c index aecdce34..c53129a5 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -601,7 +601,7 @@ arenas_cache_cleanup(tsd_t *tsd) arena_t **arenas_cache; arenas_cache = tsd_arenas_cache_get(tsd); - if (arenas != NULL) + if (arenas_cache != NULL) a0free(arenas_cache); } From 10aff3f3e1b8b3ac0348b259c439c9fe870a6b95 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 20 Jan 2015 15:37:51 -0800 Subject: [PATCH 0612/3378] Refactor bootstrapping to delay tsd initialization. Refactor bootstrapping to delay tsd initialization, primarily to support integration with FreeBSD's libc. Refactor a0*() for internal-only use, and add the bootstrap_{malloc,calloc,free}() API for use by FreeBSD's libc. This separation limits use of the a0*() functions to metadata allocation, which doesn't require malloc/calloc/free API compatibility. This resolves #170. --- .../jemalloc/internal/jemalloc_internal.h.in | 5 +- include/jemalloc/internal/private_symbols.txt | 6 +- include/jemalloc/internal/tsd.h | 2 +- src/ctl.c | 18 +- src/jemalloc.c | 307 +++++++++++------- src/tsd.c | 4 +- 6 files changed, 210 insertions(+), 132 deletions(-) diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index b7617dfd..41078607 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -404,9 +404,8 @@ extern size_t const index2size_tab[NSIZES]; extern uint8_t const size2index_tab[]; arena_t *a0get(void); -void *a0malloc(size_t size); -void *a0calloc(size_t num, size_t size); -void a0free(void *ptr); +void *a0malloc(size_t size, bool zero); +void a0dalloc(void *ptr); arena_t *arenas_extend(unsigned ind); arena_t *arena_init(unsigned ind); unsigned narenas_total_get(void); diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index 39132b24..1aaf80b6 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -1,5 +1,4 @@ -a0calloc -a0free +a0dalloc a0get a0malloc arena_get @@ -107,6 +106,9 @@ bitmap_set bitmap_sfu bitmap_size bitmap_unset +bootstrap_calloc +bootstrap_free +bootstrap_malloc bt_init buferror chunk_alloc_arena diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h index 35dd8628..dbb91a2e 100644 --- a/include/jemalloc/internal/tsd.h +++ b/include/jemalloc/internal/tsd.h @@ -48,7 +48,7 @@ typedef enum { * void example_tsd_set(example_t *val) {...} * * Note that all of the functions deal in terms of (a_type *) rather than - * (a_type) so that it is possible to support non-pointer types (unlike + * (a_type) so that it is possible to support non-pointer types (unlike * pthreads TSD). example_tsd_cleanup() is passed an (a_type *) pointer that is * cast to (void *). This means that the cleanup function needs to cast the * function argument to (a_type *), then dereference the resulting pointer to diff --git a/src/ctl.c b/src/ctl.c index 90bad7ee..6b95584b 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -484,14 +484,14 @@ ctl_arena_init(ctl_arena_stats_t *astats) if (astats->lstats == NULL) { astats->lstats = (malloc_large_stats_t *)a0malloc(nlclasses * - sizeof(malloc_large_stats_t)); + sizeof(malloc_large_stats_t), false); if (astats->lstats == NULL) return (true); } if (astats->hstats == NULL) { astats->hstats = (malloc_huge_stats_t *)a0malloc(nhclasses * - sizeof(malloc_huge_stats_t)); + sizeof(malloc_huge_stats_t), false); if (astats->hstats == NULL) return (true); } @@ -627,7 +627,7 @@ ctl_grow(void) /* Allocate extended arena stats. */ astats = (ctl_arena_stats_t *)a0malloc((ctl_stats.narenas + 2) * - sizeof(ctl_arena_stats_t)); + sizeof(ctl_arena_stats_t), false); if (astats == NULL) return (true); @@ -636,7 +636,7 @@ ctl_grow(void) sizeof(ctl_arena_stats_t)); memset(&astats[ctl_stats.narenas + 1], 0, sizeof(ctl_arena_stats_t)); if (ctl_arena_init(&astats[ctl_stats.narenas + 1])) { - a0free(astats); + a0dalloc(astats); return (true); } /* Swap merged stats to their new location. */ @@ -649,7 +649,7 @@ ctl_grow(void) memcpy(&astats[ctl_stats.narenas + 1], &tstats, sizeof(ctl_arena_stats_t)); } - a0free(ctl_stats.arenas); + a0dalloc(ctl_stats.arenas); ctl_stats.arenas = astats; ctl_stats.narenas++; @@ -723,7 +723,7 @@ ctl_init(void) */ ctl_stats.narenas = narenas_total_get(); ctl_stats.arenas = (ctl_arena_stats_t *)a0malloc( - (ctl_stats.narenas + 1) * sizeof(ctl_arena_stats_t)); + (ctl_stats.narenas + 1) * sizeof(ctl_arena_stats_t), false); if (ctl_stats.arenas == NULL) { ret = true; goto label_return; @@ -742,12 +742,12 @@ ctl_init(void) if (ctl_arena_init(&ctl_stats.arenas[i])) { unsigned j; for (j = 0; j < i; j++) { - a0free( + a0dalloc( ctl_stats.arenas[j].lstats); - a0free( + a0dalloc( ctl_stats.arenas[j].hstats); } - a0free(ctl_stats.arenas); + a0dalloc(ctl_stats.arenas); ctl_stats.arenas = NULL; ret = true; goto label_return; diff --git a/src/jemalloc.c b/src/jemalloc.c index c53129a5..632c8d3e 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -62,8 +62,13 @@ static unsigned narenas_total; static arena_t *a0; /* arenas[0]; read-only after initialization. */ static unsigned narenas_auto; /* Read-only after initialization. */ -/* Set to true once the allocator has been initialized. */ -static bool malloc_initialized = false; +typedef enum { + malloc_init_uninitialized = 3, + malloc_init_a0_initialized = 2, + malloc_init_recursible = 1, + malloc_init_initialized = 0 /* Common case --> jnz. */ +} malloc_init_t; +static malloc_init_t malloc_init_state = malloc_init_uninitialized; JEMALLOC_ALIGNED(CACHELINE) const size_t index2size_tab[NSIZES] = { @@ -218,6 +223,7 @@ typedef struct { * definition. */ +static bool malloc_init_hard_a0(void); static bool malloc_init_hard(void); /******************************************************************************/ @@ -225,6 +231,13 @@ static bool malloc_init_hard(void); * Begin miscellaneous support functions. */ +JEMALLOC_ALWAYS_INLINE_C bool +malloc_initialized(void) +{ + + return (malloc_init_state == malloc_init_initialized); +} + JEMALLOC_ALWAYS_INLINE_C void malloc_thread_init(void) { @@ -242,11 +255,20 @@ malloc_thread_init(void) quarantine_alloc_hook(); } +JEMALLOC_ALWAYS_INLINE_C bool +malloc_init_a0(void) +{ + + if (unlikely(malloc_init_state == malloc_init_uninitialized)) + return (malloc_init_hard_a0()); + return (false); +} + JEMALLOC_ALWAYS_INLINE_C bool malloc_init(void) { - if (unlikely(!malloc_initialized) && malloc_init_hard()) + if (unlikely(!malloc_initialized()) && malloc_init_hard()) return (true); malloc_thread_init(); @@ -254,10 +276,8 @@ malloc_init(void) } /* - * The a0*() functions are used instead of i[mcd]alloc() in bootstrap-sensitive - * situations that cannot tolerate TLS variable access. These functions are - * also exposed for use in static binaries on FreeBSD, hence the old-style - * malloc() API. + * The a0*() functions are used instead of i[mcd]alloc() in situations that + * cannot tolerate TLS variable access. */ arena_t * @@ -269,16 +289,13 @@ a0get(void) } static void * -a0alloc(size_t size, bool zero) +a0imalloc(size_t size, bool zero) { void *ret; - if (unlikely(malloc_init())) + if (unlikely(malloc_init_a0())) return (NULL); - if (size == 0) - size = 1; - if (likely(size <= arena_maxclass)) ret = arena_malloc(NULL, a0get(), size, zero, false); else @@ -287,28 +304,11 @@ a0alloc(size_t size, bool zero) return (ret); } -void * -a0malloc(size_t size) -{ - - return (a0alloc(size, false)); -} - -void * -a0calloc(size_t num, size_t size) -{ - - return (a0alloc(num * size, true)); -} - -void -a0free(void *ptr) +static void +a0idalloc(void *ptr) { arena_chunk_t *chunk; - if (ptr == NULL) - return; - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (likely(chunk != ptr)) arena_dalloc(NULL, chunk, ptr, false); @@ -316,6 +316,60 @@ a0free(void *ptr) huge_dalloc(NULL, ptr, false); } +void * +a0malloc(size_t size, bool zero) +{ + + return (a0imalloc(size, zero)); +} + +void +a0dalloc(void *ptr) +{ + + a0idalloc(ptr); +} + +/* + * FreeBSD's libc uses the bootstrap_*() functions in bootstrap-senstive + * situations that cannot tolerate TLS variable access (TLS allocation and very + * early internal data structure initialization). + */ + +void * +bootstrap_malloc(size_t size) +{ + + if (unlikely(size == 0)) + size = 1; + + return (a0imalloc(size, false)); +} + +void * +bootstrap_calloc(size_t num, size_t size) +{ + size_t num_size; + + num_size = num * size; + if (unlikely(num_size == 0)) { + assert(num == 0 || size == 0); + num_size = 1; + } + + return (a0imalloc(num_size, true)); +} + +void +bootstrap_free(void *ptr) +{ + + if (unlikely(ptr == NULL)) + return; + + a0idalloc(ptr); +} + /* Create a new arena and insert it into the arenas array at index ind. */ static arena_t * arena_init_locked(unsigned ind) @@ -328,7 +382,7 @@ arena_init_locked(unsigned ind) unsigned narenas_new = narenas_total + 1; arena_t **arenas_new = (arena_t **)a0malloc(CACHELINE_CEILING(narenas_new * - sizeof(arena_t *))); + sizeof(arena_t *)), false); if (arenas_new == NULL) return (NULL); memcpy(arenas_new, arenas, narenas_total * sizeof(arena_t *)); @@ -338,7 +392,7 @@ arena_init_locked(unsigned ind) * base_alloc()). */ if (narenas_total != narenas_auto) - a0free(arenas); + a0dalloc(arenas); arenas = arenas_new; narenas_total = narenas_new; } @@ -449,7 +503,7 @@ arena_get_hard(tsd_t *tsd, unsigned ind, bool init_if_missing) /* Deallocate old cache if it's too small. */ if (arenas_cache != NULL && narenas_cache < narenas_actual) { - a0free(arenas_cache); + a0dalloc(arenas_cache); arenas_cache = NULL; narenas_cache = 0; tsd_arenas_cache_set(tsd, arenas_cache); @@ -465,7 +519,7 @@ arena_get_hard(tsd_t *tsd, unsigned ind, bool init_if_missing) if (!*arenas_cache_bypassp) { *arenas_cache_bypassp = true; arenas_cache = (arena_t **)a0malloc(sizeof(arena_t *) * - narenas_cache); + narenas_cache, false); *arenas_cache_bypassp = false; } else arenas_cache = NULL; @@ -602,7 +656,7 @@ arenas_cache_cleanup(tsd_t *tsd) arenas_cache = tsd_arenas_cache_get(tsd); if (arenas_cache != NULL) - a0free(arenas_cache); + a0dalloc(arenas_cache); } void @@ -1091,19 +1145,18 @@ malloc_conf_init(void) } } +/* init_lock must be held. */ static bool -malloc_init_hard(void) +malloc_init_hard_needed(void) { - arena_t *init_arenas[1]; - malloc_mutex_lock(&init_lock); - if (malloc_initialized || IS_INITIALIZER) { + if (malloc_initialized() || (IS_INITIALIZER && malloc_init_state == + malloc_init_recursible)) { /* * Another thread initialized the allocator before this one * acquired init_lock, or this thread is the initializing * thread, and it is recursively allocating. */ - malloc_mutex_unlock(&init_lock); return (false); } #ifdef JEMALLOC_THREADED_INIT @@ -1113,23 +1166,23 @@ malloc_init_hard(void) malloc_mutex_unlock(&init_lock); CPU_SPINWAIT; malloc_mutex_lock(&init_lock); - } while (!malloc_initialized); - malloc_mutex_unlock(&init_lock); + } while (!malloc_initialized()); return (false); } #endif - malloc_initializer = INITIALIZER; + return (true); +} - if (malloc_tsd_boot0()) { - malloc_mutex_unlock(&init_lock); - return (true); - } +/* init_lock must be held. */ +static bool +malloc_init_hard_a0_locked(void) +{ + + malloc_initializer = INITIALIZER; if (config_prof) prof_boot0(); - malloc_conf_init(); - if (opt_stats_print) { /* Print statistics at exit. */ if (atexit(stats_print_atexit) != 0) { @@ -1138,68 +1191,60 @@ malloc_init_hard(void) abort(); } } - - if (base_boot()) { - malloc_mutex_unlock(&init_lock); + if (base_boot()) return (true); - } - - if (chunk_boot()) { - malloc_mutex_unlock(&init_lock); + if (chunk_boot()) return (true); - } - - if (ctl_boot()) { - malloc_mutex_unlock(&init_lock); + if (ctl_boot()) return (true); - } - if (config_prof) prof_boot1(); - arena_boot(); - - if (config_tcache && tcache_boot()) { - malloc_mutex_unlock(&init_lock); + if (config_tcache && tcache_boot()) return (true); - } - - if (huge_boot()) { - malloc_mutex_unlock(&init_lock); + if (huge_boot()) return (true); - } - - if (malloc_mutex_init(&arenas_lock)) { - malloc_mutex_unlock(&init_lock); + if (malloc_mutex_init(&arenas_lock)) return (true); - } - /* * Create enough scaffolding to allow recursive allocation in * malloc_ncpus(). */ narenas_total = narenas_auto = 1; - arenas = init_arenas; + arenas = &a0; memset(arenas, 0, sizeof(arena_t *) * narenas_auto); - /* * Initialize one arena here. The rest are lazily created in * arena_choose_hard(). */ - a0 = arena_init(0); - if (a0 == NULL) { - malloc_mutex_unlock(&init_lock); + if (arena_init(0) == NULL) return (true); - } + malloc_init_state = malloc_init_a0_initialized; + return (false); +} - if (config_prof && prof_boot2()) { - malloc_mutex_unlock(&init_lock); - return (true); - } +static bool +malloc_init_hard_a0(void) +{ + bool ret; + malloc_mutex_lock(&init_lock); + ret = malloc_init_hard_a0_locked(); + malloc_mutex_unlock(&init_lock); + return (ret); +} + +/* + * Initialize data structures which may trigger recursive allocation. + * + * init_lock must be held. + */ +static void +malloc_init_hard_recursible(void) +{ + + malloc_init_state = malloc_init_recursible; malloc_mutex_unlock(&init_lock); - /**********************************************************************/ - /* Recursive allocation may follow. */ ncpus = malloc_ncpus(); @@ -1213,15 +1258,16 @@ malloc_init_hard(void) abort(); } #endif - - /* Done recursively allocating. */ - /**********************************************************************/ malloc_mutex_lock(&init_lock); +} - if (mutex_boot()) { - malloc_mutex_unlock(&init_lock); +/* init_lock must be held. */ +static bool +malloc_init_hard_finish(void) +{ + + if (mutex_boot()) return (true); - } if (opt_narenas == 0) { /* @@ -1248,22 +1294,53 @@ malloc_init_hard(void) /* Allocate and initialize arenas. */ arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas_total); - if (arenas == NULL) { - malloc_mutex_unlock(&init_lock); + if (arenas == NULL) return (true); - } /* * Zero the array. In practice, this should always be pre-zeroed, * since it was just mmap()ed, but let's be sure. */ memset(arenas, 0, sizeof(arena_t *) * narenas_total); /* Copy the pointer to the one arena that was already initialized. */ - arenas[0] = init_arenas[0]; + arenas[0] = a0; + + malloc_init_state = malloc_init_initialized; + return (false); +} + +static bool +malloc_init_hard(void) +{ + + malloc_mutex_lock(&init_lock); + if (!malloc_init_hard_needed()) { + malloc_mutex_unlock(&init_lock); + return (false); + } + + if (malloc_init_state != malloc_init_a0_initialized && + malloc_init_hard_a0_locked()) { + malloc_mutex_unlock(&init_lock); + return (true); + } + if (malloc_tsd_boot0()) { + malloc_mutex_unlock(&init_lock); + return (true); + } + if (config_prof && prof_boot2()) { + malloc_mutex_unlock(&init_lock); + return (true); + } + + malloc_init_hard_recursible(); + + if (malloc_init_hard_finish()) { + malloc_mutex_unlock(&init_lock); + return (true); + } - malloc_initialized = true; malloc_mutex_unlock(&init_lock); malloc_tsd_boot1(); - return (false); } @@ -1634,7 +1711,7 @@ ifree(tsd_t *tsd, void *ptr, bool try_tcache) UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0); assert(ptr != NULL); - assert(malloc_initialized || IS_INITIALIZER); + assert(malloc_initialized() || IS_INITIALIZER); if (config_prof && opt_prof) { usize = isalloc(ptr, config_prof); @@ -1655,7 +1732,7 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, bool try_tcache) UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0); assert(ptr != NULL); - assert(malloc_initialized || IS_INITIALIZER); + assert(malloc_initialized() || IS_INITIALIZER); if (config_prof && opt_prof) prof_free(tsd, ptr, usize); @@ -1688,7 +1765,7 @@ je_realloc(void *ptr, size_t size) } if (likely(ptr != NULL)) { - assert(malloc_initialized || IS_INITIALIZER); + assert(malloc_initialized() || IS_INITIALIZER); malloc_thread_init(); tsd = tsd_fetch(); @@ -2060,7 +2137,7 @@ je_rallocx(void *ptr, size_t size, int flags) assert(ptr != NULL); assert(size != 0); - assert(malloc_initialized || IS_INITIALIZER); + assert(malloc_initialized() || IS_INITIALIZER); malloc_thread_init(); tsd = tsd_fetch(); @@ -2200,7 +2277,7 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) assert(ptr != NULL); assert(size != 0); assert(SIZE_T_MAX - size >= extra); - assert(malloc_initialized || IS_INITIALIZER); + assert(malloc_initialized() || IS_INITIALIZER); malloc_thread_init(); tsd = tsd_fetch(); @@ -2234,7 +2311,7 @@ je_sallocx(const void *ptr, int flags) { size_t usize; - assert(malloc_initialized || IS_INITIALIZER); + assert(malloc_initialized() || IS_INITIALIZER); malloc_thread_init(); if (config_ivsalloc) @@ -2254,7 +2331,7 @@ je_dallocx(void *ptr, int flags) bool try_tcache; assert(ptr != NULL); - assert(malloc_initialized || IS_INITIALIZER); + assert(malloc_initialized() || IS_INITIALIZER); tsd = tsd_fetch(); if (unlikely((flags & MALLOCX_ARENA_MASK) != 0)) { @@ -2296,7 +2373,7 @@ je_sdallocx(void *ptr, size_t size, int flags) size_t usize; assert(ptr != NULL); - assert(malloc_initialized || IS_INITIALIZER); + assert(malloc_initialized() || IS_INITIALIZER); usize = inallocx(size, flags); assert(usize == isalloc(ptr, config_prof)); @@ -2375,7 +2452,7 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) { size_t ret; - assert(malloc_initialized || IS_INITIALIZER); + assert(malloc_initialized() || IS_INITIALIZER); malloc_thread_init(); if (config_ivsalloc) @@ -2427,10 +2504,10 @@ _malloc_prefork(void) unsigned i; #ifdef JEMALLOC_MUTEX_INIT_CB - if (!malloc_initialized) + if (!malloc_initialized()) return; #endif - assert(malloc_initialized); + assert(malloc_initialized()); /* Acquire all mutexes in a safe order. */ ctl_prefork(); @@ -2456,10 +2533,10 @@ _malloc_postfork(void) unsigned i; #ifdef JEMALLOC_MUTEX_INIT_CB - if (!malloc_initialized) + if (!malloc_initialized()) return; #endif - assert(malloc_initialized); + assert(malloc_initialized()); /* Release all mutexes, now that fork() has completed. */ huge_postfork_parent(); @@ -2479,7 +2556,7 @@ jemalloc_postfork_child(void) { unsigned i; - assert(malloc_initialized); + assert(malloc_initialized()); /* Release all mutexes, now that fork() has completed. */ huge_postfork_child(); diff --git a/src/tsd.c b/src/tsd.c index 59253fe3..00d8f95f 100644 --- a/src/tsd.c +++ b/src/tsd.c @@ -15,14 +15,14 @@ void * malloc_tsd_malloc(size_t size) { - return (a0malloc(CACHELINE_CEILING(size))); + return (a0malloc(CACHELINE_CEILING(size), false)); } void malloc_tsd_dalloc(void *wrapper) { - a0free(wrapper); + a0dalloc(wrapper); } void From 228b2e92421d8cc7990e931e3144b6f1c3398501 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 22 Jan 2015 15:28:25 -0800 Subject: [PATCH 0613/3378] Document under what circumstances in-place resizing succeeds. This resolves #100. --- doc/jemalloc.xml.in | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 0148f038..858572d8 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -533,6 +533,22 @@ for (i = 0; i < nbins; i++) { nearest multiple of the cacheline size, or specify cacheline alignment when allocating. + The realloc, + rallocx, and + xallocx functions may resize allocations + without moving them under limited circumstances. Unlike the + *allocx API, the standard API does not + officially round up the usable size of an allocation to the nearest size + class, so technically it is necessary to call + realloc to grow e.g. a 9-byte allocation to + 16 bytes, or shrink a 16-byte allocation to 9 bytes. Growth and shrinkage + trivially succeeds in place as long as the pre-size and post-size both round + up to the same size class. No other API guarantees are made regarding + in-place resizing, but the current implementation also tries to resize large + and huge allocations in place, as long as the pre-size and post-size are + both large or both huge. In such cases shrinkage always succeeds, but + growth only succeeds if the trailing memory is currently available. + Assuming 4 MiB chunks, 4 KiB pages, and a 16-byte quantum on a 64-bit system, the size classes in each category are as shown in . From 8afcaa9d8133f0a147820799222492d1c251d285 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 22 Jan 2015 16:03:00 -0800 Subject: [PATCH 0614/3378] Update copyright dates for 2015. --- COPYING | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/COPYING b/COPYING index bdda0feb..611968cd 100644 --- a/COPYING +++ b/COPYING @@ -1,10 +1,10 @@ Unless otherwise specified, files in the jemalloc source distribution are subject to the following license: -------------------------------------------------------------------------------- -Copyright (C) 2002-2014 Jason Evans . +Copyright (C) 2002-2015 Jason Evans . All rights reserved. Copyright (C) 2007-2012 Mozilla Foundation. All rights reserved. -Copyright (C) 2009-2014 Facebook, Inc. All rights reserved. +Copyright (C) 2009-2015 Facebook, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: From bec6a8da39e8cb7e59550541d429cff5e3dfb6d8 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 22 Jan 2015 17:55:58 -0800 Subject: [PATCH 0615/3378] Implement the jemalloc-config script. This resolves #133. --- .gitignore | 1 + Makefile.in | 3 +- bin/jemalloc-config.in | 79 ++++++++++++++++++++++++++++++++++++++++++ configure.ac | 10 ++++-- 4 files changed, 89 insertions(+), 4 deletions(-) create mode 100644 bin/jemalloc-config.in diff --git a/.gitignore b/.gitignore index fd68315d..5cd3e922 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ /*.gcov.* +/bin/jemalloc-config /bin/jemalloc.sh /config.stamp diff --git a/Makefile.in b/Makefile.in index c268d002..da397c38 100644 --- a/Makefile.in +++ b/Makefile.in @@ -73,7 +73,7 @@ endif LIBJEMALLOC := $(LIBPREFIX)jemalloc$(install_suffix) # Lists of files. -BINS := $(srcroot)bin/pprof $(objroot)bin/jemalloc.sh +BINS := $(srcroot)bin/pprof $(objroot)bin/jemalloc-config $(objroot)bin/jemalloc.sh C_HDRS := $(objroot)include/jemalloc/jemalloc$(install_suffix).h C_SRCS := $(srcroot)src/jemalloc.c $(srcroot)src/arena.c \ $(srcroot)src/atomic.c $(srcroot)src/base.c $(srcroot)src/bitmap.c \ @@ -408,6 +408,7 @@ clean: rm -f $(objroot)*.gcov.* distclean: clean + rm -f $(objroot)bin/jemalloc-config rm -f $(objroot)bin/jemalloc.sh rm -f $(objroot)config.log rm -f $(objroot)config.status diff --git a/bin/jemalloc-config.in b/bin/jemalloc-config.in new file mode 100644 index 00000000..b016c8d3 --- /dev/null +++ b/bin/jemalloc-config.in @@ -0,0 +1,79 @@ +#!/bin/sh + +usage() { + cat < +Options: + --help | -h : Print usage. + --version : Print jemalloc version. + --revision : Print shared library revision number. + --config : Print configure options used to build jemalloc. + --prefix : Print installation directory prefix. + --bindir : Print binary installation directory. + --datadir : Print data installation directory. + --includedir : Print include installation directory. + --libdir : Print library installation directory. + --mandir : Print manual page installation directory. + --cc : Print compiler used to build jemalloc. + --cflags : Print compiler flags used to build jemalloc. + --cppflags : Print preprocessor flags used to build jemalloc. + --ldflags : Print library flags used to build jemalloc. + --libs : Print libraries jemalloc was linked against. +EOF +} + +prefix="@prefix@" +exec_prefix="@exec_prefix@" + +case "$1" in +--help | -h) + usage + exit 0 + ;; +--version) + echo "@jemalloc_version@" + ;; +--revision) + echo "@rev@" + ;; +--config) + echo "@CONFIG@" + ;; +--prefix) + echo "@PREFIX@" + ;; +--bindir) + echo "@BINDIR@" + ;; +--datadir) + echo "@DATADIR@" + ;; +--includedir) + echo "@INCLUDEDIR@" + ;; +--libdir) + echo "@LIBDIR@" + ;; +--mandir) + echo "@MANDIR@" + ;; +--cc) + echo "@CC@" + ;; +--cflags) + echo "@CFLAGS@" + ;; +--cppflags) + echo "@CPPFLAGS@" + ;; +--ldflags) + echo "@LDFLAGS@ @EXTRA_LDFLAGS@" + ;; +--libs) + echo "@LIBS@" + ;; +*) + usage + exit 1 +esac diff --git a/configure.ac b/configure.ac index 95133c49..0a4f01e8 100644 --- a/configure.ac +++ b/configure.ac @@ -43,6 +43,9 @@ AC_CACHE_CHECK([whether $1 is compilable], dnl ============================================================================ +CONFIG=`echo ${ac_configure_args} | sed -e "s#\'\([^ ]*\)\'#\1#g"` +AC_SUBST([CONFIG]) + dnl Library revision. rev=2 AC_SUBST([rev]) @@ -1585,7 +1588,7 @@ AC_CONFIG_HEADERS([$cfghdrs_tup]) dnl ============================================================================ dnl Generate outputs. -AC_CONFIG_FILES([$cfgoutputs_tup config.stamp bin/jemalloc.sh]) +AC_CONFIG_FILES([$cfgoutputs_tup config.stamp bin/jemalloc-config bin/jemalloc.sh]) AC_SUBST([cfgoutputs_in]) AC_SUBST([cfgoutputs_out]) AC_OUTPUT @@ -1596,9 +1599,10 @@ AC_MSG_RESULT([================================================================= AC_MSG_RESULT([jemalloc version : ${jemalloc_version}]) AC_MSG_RESULT([library revision : ${rev}]) AC_MSG_RESULT([]) +AC_MSG_RESULT([CONFIG : ${CONFIG}]) AC_MSG_RESULT([CC : ${CC}]) -AC_MSG_RESULT([CPPFLAGS : ${CPPFLAGS}]) AC_MSG_RESULT([CFLAGS : ${CFLAGS}]) +AC_MSG_RESULT([CPPFLAGS : ${CPPFLAGS}]) AC_MSG_RESULT([LDFLAGS : ${LDFLAGS}]) AC_MSG_RESULT([EXTRA_LDFLAGS : ${EXTRA_LDFLAGS}]) AC_MSG_RESULT([LIBS : ${LIBS}]) @@ -1609,9 +1613,9 @@ AC_MSG_RESULT([XSLROOT : ${XSLROOT}]) AC_MSG_RESULT([]) AC_MSG_RESULT([PREFIX : ${PREFIX}]) AC_MSG_RESULT([BINDIR : ${BINDIR}]) +AC_MSG_RESULT([DATADIR : ${DATADIR}]) AC_MSG_RESULT([INCLUDEDIR : ${INCLUDEDIR}]) AC_MSG_RESULT([LIBDIR : ${LIBDIR}]) -AC_MSG_RESULT([DATADIR : ${DATADIR}]) AC_MSG_RESULT([MANDIR : ${MANDIR}]) AC_MSG_RESULT([]) AC_MSG_RESULT([srcroot : ${srcroot}]) From ec98a44662a82aff30a54ed86bd9b24f36cfe67e Mon Sep 17 00:00:00 2001 From: Guilherme Goncalves Date: Fri, 23 Jan 2015 10:52:13 -0200 Subject: [PATCH 0616/3378] Use the correct type for opt.junk when printing stats. --- src/stats.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/stats.c b/src/stats.c index 054f0332..2b3da645 100644 --- a/src/stats.c +++ b/src/stats.c @@ -461,7 +461,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, OPT_WRITE_SIZE_T(narenas) OPT_WRITE_SSIZE_T(lg_dirty_mult) OPT_WRITE_BOOL(stats_print) - OPT_WRITE_BOOL(junk) + OPT_WRITE_CHAR_P(junk) OPT_WRITE_SIZE_T(quarantine) OPT_WRITE_BOOL(redzone) OPT_WRITE_BOOL(zero) From 4581b97809e7e545c38b996870a4e7284a620bc5 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 27 Nov 2014 17:22:36 -0200 Subject: [PATCH 0617/3378] Implement metadata statistics. There are three categories of metadata: - Base allocations are used for bootstrap-sensitive internal allocator data structures. - Arena chunk headers comprise pages which track the states of the non-metadata pages. - Internal allocations differ from application-originated allocations in that they are for internal use, and that they are omitted from heap profiles. The metadata statistics comprise the metadata categories as follows: - stats.metadata: All metadata -- base + arena chunk headers + internal allocations. - stats.arenas..metadata.mapped: Arena chunk headers. - stats.arenas..metadata.allocated: Internal allocations. This is reported separately from the other metadata statistics because it overlaps with the allocated and active statistics, whereas the other metadata statistics do not. Base allocations are not reported separately, though their magnitude can be computed by subtracting the arena-specific metadata. This resolves #163. --- doc/jemalloc.xml.in | 47 ++++ include/jemalloc/internal/arena.h | 34 +++ include/jemalloc/internal/base.h | 1 + include/jemalloc/internal/ctl.h | 1 + include/jemalloc/internal/huge.h | 1 + .../jemalloc/internal/jemalloc_internal.h.in | 230 +++++++++++------- include/jemalloc/internal/private_symbols.txt | 10 + include/jemalloc/internal/stats.h | 7 + src/arena.c | 10 +- src/base.c | 15 ++ src/ctl.c | 30 ++- src/huge.c | 113 ++++----- src/jemalloc.c | 37 ++- src/prof.c | 30 +-- src/quarantine.c | 11 +- src/stats.c | 14 +- src/tcache.c | 4 +- src/tsd.c | 2 +- 18 files changed, 393 insertions(+), 204 deletions(-) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 858572d8..08fd4eb3 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -1773,6 +1773,21 @@ malloc_conf = "xmalloc:true";]]> entirely devoted to allocator metadata. + + + stats.metadata + (size_t) + r- + [] + + Total number of bytes dedicated to metadata, which + comprise base allocations used for bootstrap-sensitive internal + allocator data structures, arena chunk headers (see stats.arenas.<i>.metadata.mapped), + and internal allocations (see stats.arenas.<i>.metadata.allocated). + + stats.mapped @@ -1875,6 +1890,38 @@ malloc_conf = "xmalloc:true";]]> Number of mapped bytes. + + + stats.arenas.<i>.metadata.mapped + (size_t) + r- + [] + + Number of mapped bytes in arena chunk headers, which + track the states of the non-metadata pages. + + + + + stats.arenas.<i>.metadata.allocated + (size_t) + r- + [] + + Number of bytes dedicated to internal allocations. + Internal allocations differ from application-originated allocations in + that they are for internal use, and that they are omitted from heap + profiles. This statistic is reported separately from stats.metadata and + stats.arenas.<i>.metadata.mapped + because it overlaps with e.g. the stats.allocated and + stats.active + statistics, whereas the other metadata statistics do + not. + + stats.arenas.<i>.npurge diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 1e190234..46367f68 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -437,6 +437,9 @@ void arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, size_t runind, index_t binind, size_t flags); void arena_mapbits_unzeroed_set(arena_chunk_t *chunk, size_t pageind, size_t unzeroed); +void arena_metadata_allocated_add(arena_t *arena, size_t size); +void arena_metadata_allocated_sub(arena_t *arena, size_t size); +size_t arena_metadata_allocated_get(arena_t *arena); bool arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes); bool arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes); bool arena_prof_accum(arena_t *arena, uint64_t accumbytes); @@ -448,6 +451,7 @@ prof_tctx_t *arena_prof_tctx_get(const void *ptr); void arena_prof_tctx_set(const void *ptr, prof_tctx_t *tctx); void *arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, bool try_tcache); +arena_t *arena_aalloc(const void *ptr); size_t arena_salloc(const void *ptr, bool demote); void arena_dalloc(tsd_t *tsd, arena_chunk_t *chunk, void *ptr, bool try_tcache); @@ -699,6 +703,27 @@ arena_mapbits_unzeroed_set(arena_chunk_t *chunk, size_t pageind, unzeroed); } +JEMALLOC_INLINE void +arena_metadata_allocated_add(arena_t *arena, size_t size) +{ + + atomic_add_z(&arena->stats.metadata_allocated, size); +} + +JEMALLOC_INLINE void +arena_metadata_allocated_sub(arena_t *arena, size_t size) +{ + + atomic_sub_z(&arena->stats.metadata_allocated, size); +} + +JEMALLOC_INLINE size_t +arena_metadata_allocated_get(arena_t *arena) +{ + + return (atomic_read_z(&arena->stats.metadata_allocated)); +} + JEMALLOC_INLINE bool arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes) { @@ -952,6 +977,15 @@ arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, } } +JEMALLOC_ALWAYS_INLINE arena_t * +arena_aalloc(const void *ptr) +{ + arena_chunk_t *chunk; + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + return (chunk->arena); +} + /* Return the size of the allocation pointed to by ptr. */ JEMALLOC_ALWAYS_INLINE size_t arena_salloc(const void *ptr, bool demote) diff --git a/include/jemalloc/internal/base.h b/include/jemalloc/internal/base.h index 3fb80b92..18b7a72d 100644 --- a/include/jemalloc/internal/base.h +++ b/include/jemalloc/internal/base.h @@ -13,6 +13,7 @@ void *base_alloc(size_t size); void *base_calloc(size_t number, size_t size); extent_node_t *base_node_alloc(void); void base_node_dalloc(extent_node_t *node); +size_t base_allocated_get(void); bool base_boot(void); void base_prefork(void); void base_postfork_parent(void); diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h index a3e899ea..65617bc9 100644 --- a/include/jemalloc/internal/ctl.h +++ b/include/jemalloc/internal/ctl.h @@ -52,6 +52,7 @@ struct ctl_arena_stats_s { struct ctl_stats_s { size_t allocated; size_t active; + size_t metadata; size_t mapped; struct { size_t current; /* stats_chunks.curchunks */ diff --git a/include/jemalloc/internal/huge.h b/include/jemalloc/internal/huge.h index 39d8aa50..decb0249 100644 --- a/include/jemalloc/internal/huge.h +++ b/include/jemalloc/internal/huge.h @@ -23,6 +23,7 @@ typedef void (huge_dalloc_junk_t)(void *, size_t); extern huge_dalloc_junk_t *huge_dalloc_junk; #endif void huge_dalloc(tsd_t *tsd, void *ptr, bool try_tcache); +arena_t *huge_aalloc(const void *ptr); size_t huge_salloc(const void *ptr); prof_tctx_t *huge_prof_tctx_get(const void *ptr); void huge_prof_tctx_set(const void *ptr, prof_tctx_t *tctx); diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 41078607..a4778550 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -404,8 +404,9 @@ extern size_t const index2size_tab[NSIZES]; extern uint8_t const size2index_tab[]; arena_t *a0get(void); -void *a0malloc(size_t size, bool zero); +void *a0malloc(size_t size); void a0dalloc(void *ptr); +size_t a0allocated(void); arena_t *arenas_extend(unsigned ind); arena_t *arena_init(unsigned ind); unsigned narenas_total_get(void); @@ -776,21 +777,27 @@ arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing, #include "jemalloc/internal/quarantine.h" #ifndef JEMALLOC_ENABLE_INLINE +arena_t *iaalloc(const void *ptr); +size_t isalloc(const void *ptr, bool demote); +void *iallocztm(tsd_t *tsd, size_t size, bool zero, bool try_tcache, + bool is_metadata, arena_t *arena); void *imalloct(tsd_t *tsd, size_t size, bool try_tcache, arena_t *arena); void *imalloc(tsd_t *tsd, size_t size); void *icalloct(tsd_t *tsd, size_t size, bool try_tcache, arena_t *arena); void *icalloc(tsd_t *tsd, size_t size); +void *ipallocztm(tsd_t *tsd, size_t usize, size_t alignment, bool zero, + bool try_tcache, bool is_metadata, arena_t *arena); void *ipalloct(tsd_t *tsd, size_t usize, size_t alignment, bool zero, bool try_tcache, arena_t *arena); void *ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero); -size_t isalloc(const void *ptr, bool demote); size_t ivsalloc(const void *ptr, bool demote); size_t u2rz(size_t usize); size_t p2rz(const void *ptr); +void idalloctm(tsd_t *tsd, void *ptr, bool try_tcache, bool is_metadata); void idalloct(tsd_t *tsd, void *ptr, bool try_tcache); -void isdalloct(tsd_t *tsd, void *ptr, size_t size, bool try_tcache); void idalloc(tsd_t *tsd, void *ptr); void iqalloc(tsd_t *tsd, void *ptr, bool try_tcache); +void isdalloct(tsd_t *tsd, void *ptr, size_t size, bool try_tcache); void isqalloc(tsd_t *tsd, void *ptr, size_t size, bool try_tcache); void *iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t extra, size_t alignment, bool zero, bool try_tcache_alloc, @@ -805,76 +812,21 @@ bool ixalloc(void *ptr, size_t oldsize, size_t size, size_t extra, #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) -JEMALLOC_ALWAYS_INLINE void * -imalloct(tsd_t *tsd, size_t size, bool try_tcache, arena_t *arena) +JEMALLOC_ALWAYS_INLINE arena_t * +iaalloc(const void *ptr) { + arena_t *arena; + arena_chunk_t *chunk; - assert(size != 0); + assert(ptr != NULL); - if (likely(size <= arena_maxclass)) - return (arena_malloc(tsd, arena, size, false, try_tcache)); + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + if (likely(chunk != ptr)) + arena = arena_aalloc(ptr); else - return (huge_malloc(tsd, arena, size, false, try_tcache)); -} + arena = huge_aalloc(ptr); -JEMALLOC_ALWAYS_INLINE void * -imalloc(tsd_t *tsd, size_t size) -{ - - return (imalloct(tsd, size, true, NULL)); -} - -JEMALLOC_ALWAYS_INLINE void * -icalloct(tsd_t *tsd, size_t size, bool try_tcache, arena_t *arena) -{ - - if (likely(size <= arena_maxclass)) - return (arena_malloc(tsd, arena, size, true, try_tcache)); - else - return (huge_malloc(tsd, arena, size, true, try_tcache)); -} - -JEMALLOC_ALWAYS_INLINE void * -icalloc(tsd_t *tsd, size_t size) -{ - - return (icalloct(tsd, size, true, NULL)); -} - -JEMALLOC_ALWAYS_INLINE void * -ipalloct(tsd_t *tsd, size_t usize, size_t alignment, bool zero, bool try_tcache, - arena_t *arena) -{ - void *ret; - - assert(usize != 0); - assert(usize == sa2u(usize, alignment)); - - if (usize <= SMALL_MAXCLASS && alignment < PAGE) - ret = arena_malloc(tsd, arena, usize, zero, try_tcache); - else { - if (likely(usize <= arena_maxclass)) { - arena = arena_choose(tsd, arena); - if (unlikely(arena == NULL)) - return (NULL); - ret = arena_palloc(arena, usize, alignment, zero); - } else if (likely(alignment <= chunksize)) - ret = huge_malloc(tsd, arena, usize, zero, try_tcache); - else { - ret = huge_palloc(tsd, arena, usize, alignment, zero, - try_tcache); - } - } - - assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret); - return (ret); -} - -JEMALLOC_ALWAYS_INLINE void * -ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero) -{ - - return (ipalloct(tsd, usize, alignment, zero, true, NULL)); + return (arena); } /* @@ -901,6 +853,101 @@ isalloc(const void *ptr, bool demote) return (ret); } +JEMALLOC_ALWAYS_INLINE void * +iallocztm(tsd_t *tsd, size_t size, bool zero, bool try_tcache, bool is_metadata, + arena_t *arena) +{ + void *ret; + + assert(size != 0); + + if (likely(size <= arena_maxclass)) + ret = arena_malloc(tsd, arena, size, zero, try_tcache); + else + ret = huge_malloc(tsd, arena, size, zero, try_tcache); + if (config_stats && is_metadata && likely(ret != NULL)) { + arena_metadata_allocated_add(iaalloc(ret), isalloc(ret, + config_prof)); + } + return (ret); +} + +JEMALLOC_ALWAYS_INLINE void * +imalloct(tsd_t *tsd, size_t size, bool try_tcache, arena_t *arena) +{ + + return (iallocztm(tsd, size, false, try_tcache, false, arena)); +} + +JEMALLOC_ALWAYS_INLINE void * +imalloc(tsd_t *tsd, size_t size) +{ + + return (iallocztm(tsd, size, false, true, false, NULL)); +} + +JEMALLOC_ALWAYS_INLINE void * +icalloct(tsd_t *tsd, size_t size, bool try_tcache, arena_t *arena) +{ + + return (iallocztm(tsd, size, true, try_tcache, false, arena)); +} + +JEMALLOC_ALWAYS_INLINE void * +icalloc(tsd_t *tsd, size_t size) +{ + + return (iallocztm(tsd, size, true, true, false, NULL)); +} + +JEMALLOC_ALWAYS_INLINE void * +ipallocztm(tsd_t *tsd, size_t usize, size_t alignment, bool zero, + bool try_tcache, bool is_metadata, arena_t *arena) +{ + void *ret; + + assert(usize != 0); + assert(usize == sa2u(usize, alignment)); + + if (usize <= SMALL_MAXCLASS && alignment < PAGE) + ret = arena_malloc(tsd, arena, usize, zero, try_tcache); + else { + if (likely(usize <= arena_maxclass)) { + arena = arena_choose(tsd, arena); + if (unlikely(arena == NULL)) + return (NULL); + ret = arena_palloc(arena, usize, alignment, zero); + } else if (likely(alignment <= chunksize)) + ret = huge_malloc(tsd, arena, usize, zero, try_tcache); + else { + ret = huge_palloc(tsd, arena, usize, alignment, zero, + try_tcache); + } + } + assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret); + if (config_stats && is_metadata && likely(ret != NULL)) { + arena_metadata_allocated_add(iaalloc(ret), isalloc(ret, + config_prof)); + } + return (ret); +} + +JEMALLOC_ALWAYS_INLINE void * +ipalloct(tsd_t *tsd, size_t usize, size_t alignment, bool zero, bool try_tcache, + arena_t *arena) +{ + + return (ipallocztm(tsd, usize, alignment, zero, try_tcache, false, + arena)); +} + +JEMALLOC_ALWAYS_INLINE void * +ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero) +{ + + return (ipallocztm(tsd, usize, alignment, zero, true, false, NULL)); +} + JEMALLOC_ALWAYS_INLINE size_t ivsalloc(const void *ptr, bool demote) { @@ -935,11 +982,15 @@ p2rz(const void *ptr) } JEMALLOC_ALWAYS_INLINE void -idalloct(tsd_t *tsd, void *ptr, bool try_tcache) +idalloctm(tsd_t *tsd, void *ptr, bool try_tcache, bool is_metadata) { arena_chunk_t *chunk; assert(ptr != NULL); + if (config_stats && is_metadata) { + arena_metadata_allocated_sub(iaalloc(ptr), isalloc(ptr, + config_prof)); + } chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (likely(chunk != ptr)) @@ -948,6 +999,30 @@ idalloct(tsd_t *tsd, void *ptr, bool try_tcache) huge_dalloc(tsd, ptr, try_tcache); } +JEMALLOC_ALWAYS_INLINE void +idalloct(tsd_t *tsd, void *ptr, bool try_tcache) +{ + + idalloctm(tsd, ptr, try_tcache, false); +} + +JEMALLOC_ALWAYS_INLINE void +idalloc(tsd_t *tsd, void *ptr) +{ + + idalloctm(tsd, ptr, true, false); +} + +JEMALLOC_ALWAYS_INLINE void +iqalloc(tsd_t *tsd, void *ptr, bool try_tcache) +{ + + if (config_fill && unlikely(opt_quarantine)) + quarantine(tsd, ptr); + else + idalloctm(tsd, ptr, try_tcache, false); +} + JEMALLOC_ALWAYS_INLINE void isdalloct(tsd_t *tsd, void *ptr, size_t size, bool try_tcache) { @@ -962,23 +1037,6 @@ isdalloct(tsd_t *tsd, void *ptr, size_t size, bool try_tcache) huge_dalloc(tsd, ptr, try_tcache); } -JEMALLOC_ALWAYS_INLINE void -idalloc(tsd_t *tsd, void *ptr) -{ - - idalloct(tsd, ptr, true); -} - -JEMALLOC_ALWAYS_INLINE void -iqalloc(tsd_t *tsd, void *ptr, bool try_tcache) -{ - - if (config_fill && unlikely(opt_quarantine)) - quarantine(tsd, ptr); - else - idalloct(tsd, ptr, try_tcache); -} - JEMALLOC_ALWAYS_INLINE void isqalloc(tsd_t *tsd, void *ptr, size_t size, bool try_tcache) { diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index 1aaf80b6..dfa87551 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -1,6 +1,7 @@ a0dalloc a0get a0malloc +arena_aalloc arena_get arena_get_hard arena_alloc_junk_small @@ -50,6 +51,9 @@ arena_mapbitsp_read arena_mapbitsp_write arena_maxclass arena_maxrun +arena_metadata_allocated_add +arena_metadata_allocated_get +arena_metadata_allocated_sub arena_migrate arena_miscelm_get arena_miscelm_to_pageind @@ -90,6 +94,7 @@ atomic_sub_uint32 atomic_sub_uint64 atomic_sub_z base_alloc +base_allocated_get base_boot base_calloc base_node_alloc @@ -205,6 +210,7 @@ hash_rotl_64 hash_x64_128 hash_x86_128 hash_x86_32 +huge_aalloc huge_allocated huge_boot huge_dalloc @@ -221,10 +227,13 @@ huge_prof_tctx_set huge_ralloc huge_ralloc_no_move huge_salloc +iaalloc +iallocztm icalloc icalloct idalloc idalloct +idalloctm imalloc imalloct in_valgrind @@ -234,6 +243,7 @@ index2size_lookup index2size_tab ipalloc ipalloct +ipallocztm iqalloc iralloc iralloct diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h index d8600ed4..7cba77b9 100644 --- a/include/jemalloc/internal/stats.h +++ b/include/jemalloc/internal/stats.h @@ -111,6 +111,13 @@ struct arena_stats_s { uint64_t nmadvise; uint64_t purged; + /* + * Number of bytes currently mapped purely for metadata purposes, and + * number of bytes currently allocated for internal metadata. + */ + size_t metadata_mapped; + size_t metadata_allocated; /* Protected via atomic_*_z(). */ + /* Per-size-category statistics. */ size_t allocated_large; uint64_t nmalloc_large; diff --git a/src/arena.c b/src/arena.c index 1eb4000a..984b8ad2 100644 --- a/src/arena.c +++ b/src/arena.c @@ -405,8 +405,10 @@ arena_chunk_alloc_internal(arena_t *arena, size_t size, size_t alignment, chunk = (arena_chunk_t *)chunk_alloc_arena(chunk_alloc, chunk_dalloc, arena->ind, NULL, size, alignment, zero); malloc_mutex_lock(&arena->lock); - if (config_stats && chunk != NULL) + if (config_stats && chunk != NULL) { arena->stats.mapped += chunksize; + arena->stats.metadata_mapped += (map_bias << LG_PAGE); + } return (chunk); } @@ -514,8 +516,10 @@ arena_chunk_dalloc(arena_t *arena, arena_chunk_t *chunk) malloc_mutex_unlock(&arena->lock); chunk_dalloc((void *)spare, chunksize, arena->ind); malloc_mutex_lock(&arena->lock); - if (config_stats) + if (config_stats) { arena->stats.mapped -= chunksize; + arena->stats.metadata_mapped -= (map_bias << LG_PAGE); + } } else arena->spare = chunk; } @@ -2273,6 +2277,8 @@ arena_stats_merge(arena_t *arena, const char **dss, size_t *nactive, astats->npurge += arena->stats.npurge; astats->nmadvise += arena->stats.nmadvise; astats->purged += arena->stats.purged; + astats->metadata_mapped += arena->stats.metadata_mapped; + astats->metadata_allocated += arena_metadata_allocated_get(arena); astats->allocated_large += arena->stats.allocated_large; astats->nmalloc_large += arena->stats.nmalloc_large; astats->ndalloc_large += arena->stats.ndalloc_large; diff --git a/src/base.c b/src/base.c index 409c7bb7..22f36139 100644 --- a/src/base.c +++ b/src/base.c @@ -16,6 +16,8 @@ static void *base_next_addr; static void *base_past_addr; /* Addr immediately past base_pages. */ static extent_node_t *base_nodes; +static size_t base_allocated; + /******************************************************************************/ static bool @@ -54,6 +56,8 @@ base_alloc(size_t size) /* Allocate. */ ret = base_next_addr; base_next_addr = (void *)((uintptr_t)base_next_addr + csize); + if (config_stats) + base_allocated += csize; malloc_mutex_unlock(&base_mtx); JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, csize); @@ -102,6 +106,17 @@ base_node_dalloc(extent_node_t *node) malloc_mutex_unlock(&base_mtx); } +size_t +base_allocated_get(void) +{ + size_t ret; + + malloc_mutex_lock(&base_mtx); + ret = base_allocated; + malloc_mutex_unlock(&base_mtx); + return (ret); +} + bool base_boot(void) { diff --git a/src/ctl.c b/src/ctl.c index 6b95584b..b65af520 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -183,10 +183,13 @@ CTL_PROTO(stats_arenas_i_mapped) CTL_PROTO(stats_arenas_i_npurge) CTL_PROTO(stats_arenas_i_nmadvise) CTL_PROTO(stats_arenas_i_purged) +CTL_PROTO(stats_arenas_i_metadata_mapped) +CTL_PROTO(stats_arenas_i_metadata_allocated) INDEX_PROTO(stats_arenas_i) CTL_PROTO(stats_cactive) CTL_PROTO(stats_allocated) CTL_PROTO(stats_active) +CTL_PROTO(stats_metadata) CTL_PROTO(stats_mapped) /******************************************************************************/ @@ -355,6 +358,11 @@ static const ctl_named_node_t stats_chunks_node[] = { {NAME("high"), CTL(stats_chunks_high)} }; +static const ctl_named_node_t stats_arenas_i_metadata_node[] = { + {NAME("mapped"), CTL(stats_arenas_i_metadata_mapped)}, + {NAME("allocated"), CTL(stats_arenas_i_metadata_allocated)} +}; + static const ctl_named_node_t stats_arenas_i_small_node[] = { {NAME("allocated"), CTL(stats_arenas_i_small_allocated)}, {NAME("nmalloc"), CTL(stats_arenas_i_small_nmalloc)}, @@ -432,6 +440,7 @@ static const ctl_named_node_t stats_arenas_i_node[] = { {NAME("npurge"), CTL(stats_arenas_i_npurge)}, {NAME("nmadvise"), CTL(stats_arenas_i_nmadvise)}, {NAME("purged"), CTL(stats_arenas_i_purged)}, + {NAME("metadata"), CHILD(named, stats_arenas_i_metadata)}, {NAME("small"), CHILD(named, stats_arenas_i_small)}, {NAME("large"), CHILD(named, stats_arenas_i_large)}, {NAME("huge"), CHILD(named, stats_arenas_i_huge)}, @@ -451,6 +460,7 @@ static const ctl_named_node_t stats_node[] = { {NAME("cactive"), CTL(stats_cactive)}, {NAME("allocated"), CTL(stats_allocated)}, {NAME("active"), CTL(stats_active)}, + {NAME("metadata"), CTL(stats_metadata)}, {NAME("mapped"), CTL(stats_mapped)}, {NAME("chunks"), CHILD(named, stats_chunks)}, {NAME("arenas"), CHILD(indexed, stats_arenas)} @@ -484,14 +494,14 @@ ctl_arena_init(ctl_arena_stats_t *astats) if (astats->lstats == NULL) { astats->lstats = (malloc_large_stats_t *)a0malloc(nlclasses * - sizeof(malloc_large_stats_t), false); + sizeof(malloc_large_stats_t)); if (astats->lstats == NULL) return (true); } if (astats->hstats == NULL) { astats->hstats = (malloc_huge_stats_t *)a0malloc(nhclasses * - sizeof(malloc_huge_stats_t), false); + sizeof(malloc_huge_stats_t)); if (astats->hstats == NULL) return (true); } @@ -551,6 +561,9 @@ ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, ctl_arena_stats_t *astats) sstats->astats.nmadvise += astats->astats.nmadvise; sstats->astats.purged += astats->astats.purged; + sstats->astats.metadata_mapped += astats->astats.metadata_mapped; + sstats->astats.metadata_allocated += astats->astats.metadata_allocated; + sstats->allocated_small += astats->allocated_small; sstats->nmalloc_small += astats->nmalloc_small; sstats->ndalloc_small += astats->ndalloc_small; @@ -627,7 +640,7 @@ ctl_grow(void) /* Allocate extended arena stats. */ astats = (ctl_arena_stats_t *)a0malloc((ctl_stats.narenas + 2) * - sizeof(ctl_arena_stats_t), false); + sizeof(ctl_arena_stats_t)); if (astats == NULL) return (true); @@ -704,6 +717,10 @@ ctl_refresh(void) + ctl_stats.arenas[ctl_stats.narenas].astats.allocated_huge; ctl_stats.active = (ctl_stats.arenas[ctl_stats.narenas].pactive << LG_PAGE); + ctl_stats.metadata = base_allocated_get() + + ctl_stats.arenas[ctl_stats.narenas].astats.metadata_mapped + + ctl_stats.arenas[ctl_stats.narenas].astats + .metadata_allocated; ctl_stats.mapped = (ctl_stats.chunks.current << opt_lg_chunk); } @@ -723,7 +740,7 @@ ctl_init(void) */ ctl_stats.narenas = narenas_total_get(); ctl_stats.arenas = (ctl_arena_stats_t *)a0malloc( - (ctl_stats.narenas + 1) * sizeof(ctl_arena_stats_t), false); + (ctl_stats.narenas + 1) * sizeof(ctl_arena_stats_t)); if (ctl_stats.arenas == NULL) { ret = true; goto label_return; @@ -1806,6 +1823,7 @@ CTL_RO_NL_CGEN(config_prof, lg_prof_sample, lg_prof_sample, size_t) CTL_RO_CGEN(config_stats, stats_cactive, &stats_cactive, size_t *) CTL_RO_CGEN(config_stats, stats_allocated, ctl_stats.allocated, size_t) CTL_RO_CGEN(config_stats, stats_active, ctl_stats.active, size_t) +CTL_RO_CGEN(config_stats, stats_metadata, ctl_stats.metadata, size_t) CTL_RO_CGEN(config_stats, stats_mapped, ctl_stats.mapped, size_t) CTL_RO_CGEN(config_stats, stats_chunks_current, ctl_stats.chunks.current, @@ -1825,6 +1843,10 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_nmadvise, ctl_stats.arenas[mib[2]].astats.nmadvise, uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_purged, ctl_stats.arenas[mib[2]].astats.purged, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_metadata_mapped, + ctl_stats.arenas[mib[2]].astats.metadata_mapped, size_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_metadata_allocated, + ctl_stats.arenas[mib[2]].astats.metadata_allocated, size_t) CTL_RO_CGEN(config_stats, stats_arenas_i_small_allocated, ctl_stats.arenas[mib[2]].allocated_small, size_t) diff --git a/src/huge.c b/src/huge.c index 416cb172..c4d1ebc6 100644 --- a/src/huge.c +++ b/src/huge.c @@ -37,8 +37,8 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, /* Allocate one or more contiguous chunks for this request. */ /* Allocate an extent node with which to track the chunk. */ - node = ipalloct(tsd, CACHELINE_CEILING(sizeof(extent_node_t)), - CACHELINE, false, try_tcache, NULL); + node = ipallocztm(tsd, CACHELINE_CEILING(sizeof(extent_node_t)), + CACHELINE, false, try_tcache, true, arena); if (node == NULL) return (NULL); @@ -50,7 +50,7 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, arena = arena_choose(tsd, arena); if (unlikely(arena == NULL) || (ret = arena_chunk_alloc_huge(arena, usize, alignment, &is_zeroed)) == NULL) { - idalloct(tsd, node, try_tcache); + idalloctm(tsd, node, try_tcache, true); return (NULL); } @@ -73,6 +73,33 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, return (ret); } +static extent_node_t * +huge_node_locked(const void *ptr) +{ + extent_node_t *node, key; + + /* Extract from tree of huge allocations. */ + key.addr = __DECONST(void *, ptr); + node = extent_tree_ad_search(&huge, &key); + assert(node != NULL); + assert(node->addr == ptr); + malloc_mutex_unlock(&huge_mtx); + + return (node); +} + +static extent_node_t * +huge_node(const void *ptr) +{ + extent_node_t *node; + + malloc_mutex_lock(&huge_mtx); + node = huge_node_locked(ptr); + malloc_mutex_unlock(&huge_mtx); + + return (node); +} + #ifdef JEMALLOC_JET #undef huge_dalloc_junk #define huge_dalloc_junk JEMALLOC_N(huge_dalloc_junk_impl) @@ -102,7 +129,7 @@ huge_ralloc_no_move_similar(void *ptr, size_t oldsize, size_t usize, { size_t usize_next; bool zeroed; - extent_node_t *node, key; + extent_node_t *node; arena_t *arena; /* Increase usize to incorporate extra. */ @@ -126,10 +153,7 @@ huge_ralloc_no_move_similar(void *ptr, size_t oldsize, size_t usize, zeroed = true; malloc_mutex_lock(&huge_mtx); - key.addr = ptr; - node = extent_tree_ad_search(&huge, &key); - assert(node != NULL); - assert(node->addr == ptr); + node = huge_node_locked(ptr); arena = node->arena; /* Update the size of the huge allocation. */ assert(node->size != usize); @@ -159,7 +183,7 @@ huge_ralloc_no_move_shrink(void *ptr, size_t oldsize, size_t usize) { size_t sdiff; bool zeroed; - extent_node_t *node, key; + extent_node_t *node; arena_t *arena; sdiff = CHUNK_CEILING(usize) - usize; @@ -172,10 +196,7 @@ huge_ralloc_no_move_shrink(void *ptr, size_t oldsize, size_t usize) } malloc_mutex_lock(&huge_mtx); - key.addr = ptr; - node = extent_tree_ad_search(&huge, &key); - assert(node != NULL); - assert(node->addr == ptr); + node = huge_node_locked(ptr); arena = node->arena; /* Update the size of the huge allocation. */ node->size = usize; @@ -190,7 +211,7 @@ huge_ralloc_no_move_shrink(void *ptr, size_t oldsize, size_t usize) static bool huge_ralloc_no_move_expand(void *ptr, size_t oldsize, size_t size, bool zero) { size_t usize; - extent_node_t *node, key; + extent_node_t *node; arena_t *arena; bool is_zeroed_subchunk, is_zeroed_chunk; @@ -201,10 +222,7 @@ huge_ralloc_no_move_expand(void *ptr, size_t oldsize, size_t size, bool zero) { } malloc_mutex_lock(&huge_mtx); - key.addr = ptr; - node = extent_tree_ad_search(&huge, &key); - assert(node != NULL); - assert(node->addr == ptr); + node = huge_node_locked(ptr); arena = node->arena; is_zeroed_subchunk = node->zeroed; malloc_mutex_unlock(&huge_mtx); @@ -342,77 +360,44 @@ huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size, void huge_dalloc(tsd_t *tsd, void *ptr, bool try_tcache) { - extent_node_t *node, key; + extent_node_t *node; malloc_mutex_lock(&huge_mtx); - /* Extract from tree of huge allocations. */ - key.addr = ptr; - node = extent_tree_ad_search(&huge, &key); - assert(node != NULL); - assert(node->addr == ptr); + node = huge_node_locked(ptr); extent_tree_ad_remove(&huge, node); malloc_mutex_unlock(&huge_mtx); huge_dalloc_junk(node->addr, node->size); arena_chunk_dalloc_huge(node->arena, node->addr, node->size); - idalloct(tsd, node, try_tcache); + idalloctm(tsd, node, try_tcache, true); +} + +arena_t * +huge_aalloc(const void *ptr) +{ + + return (huge_node(ptr)->arena); } size_t huge_salloc(const void *ptr) { - size_t ret; - extent_node_t *node, key; - malloc_mutex_lock(&huge_mtx); - - /* Extract from tree of huge allocations. */ - key.addr = __DECONST(void *, ptr); - node = extent_tree_ad_search(&huge, &key); - assert(node != NULL); - - ret = node->size; - - malloc_mutex_unlock(&huge_mtx); - - return (ret); + return (huge_node(ptr)->size); } prof_tctx_t * huge_prof_tctx_get(const void *ptr) { - prof_tctx_t *ret; - extent_node_t *node, key; - malloc_mutex_lock(&huge_mtx); - - /* Extract from tree of huge allocations. */ - key.addr = __DECONST(void *, ptr); - node = extent_tree_ad_search(&huge, &key); - assert(node != NULL); - - ret = node->prof_tctx; - - malloc_mutex_unlock(&huge_mtx); - - return (ret); + return (huge_node(ptr)->prof_tctx); } void huge_prof_tctx_set(const void *ptr, prof_tctx_t *tctx) { - extent_node_t *node, key; - malloc_mutex_lock(&huge_mtx); - - /* Extract from tree of huge allocations. */ - key.addr = __DECONST(void *, ptr); - node = extent_tree_ad_search(&huge, &key); - assert(node != NULL); - - node->prof_tctx = tctx; - - malloc_mutex_unlock(&huge_mtx); + huge_node(ptr)->prof_tctx = tctx; } bool diff --git a/src/jemalloc.c b/src/jemalloc.c index 632c8d3e..d1fa674c 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -289,45 +289,34 @@ a0get(void) } static void * -a0imalloc(size_t size, bool zero) +a0ialloc(size_t size, bool zero, bool is_metadata) { - void *ret; if (unlikely(malloc_init_a0())) return (NULL); - if (likely(size <= arena_maxclass)) - ret = arena_malloc(NULL, a0get(), size, zero, false); - else - ret = huge_malloc(NULL, a0get(), size, zero, false); - - return (ret); + return (iallocztm(NULL, size, zero, false, is_metadata, a0get())); } static void -a0idalloc(void *ptr) +a0idalloc(void *ptr, bool is_metadata) { - arena_chunk_t *chunk; - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (likely(chunk != ptr)) - arena_dalloc(NULL, chunk, ptr, false); - else - huge_dalloc(NULL, ptr, false); + idalloctm(NULL, ptr, false, is_metadata); } void * -a0malloc(size_t size, bool zero) +a0malloc(size_t size) { - return (a0imalloc(size, zero)); + return (a0ialloc(size, false, true)); } void a0dalloc(void *ptr) { - a0idalloc(ptr); + a0idalloc(ptr, true); } /* @@ -343,7 +332,7 @@ bootstrap_malloc(size_t size) if (unlikely(size == 0)) size = 1; - return (a0imalloc(size, false)); + return (a0ialloc(size, false, false)); } void * @@ -357,7 +346,7 @@ bootstrap_calloc(size_t num, size_t size) num_size = 1; } - return (a0imalloc(num_size, true)); + return (a0ialloc(num_size, true, false)); } void @@ -367,7 +356,7 @@ bootstrap_free(void *ptr) if (unlikely(ptr == NULL)) return; - a0idalloc(ptr); + a0idalloc(ptr, false); } /* Create a new arena and insert it into the arenas array at index ind. */ @@ -382,7 +371,7 @@ arena_init_locked(unsigned ind) unsigned narenas_new = narenas_total + 1; arena_t **arenas_new = (arena_t **)a0malloc(CACHELINE_CEILING(narenas_new * - sizeof(arena_t *)), false); + sizeof(arena_t *))); if (arenas_new == NULL) return (NULL); memcpy(arenas_new, arenas, narenas_total * sizeof(arena_t *)); @@ -519,7 +508,7 @@ arena_get_hard(tsd_t *tsd, unsigned ind, bool init_if_missing) if (!*arenas_cache_bypassp) { *arenas_cache_bypassp = true; arenas_cache = (arena_t **)a0malloc(sizeof(arena_t *) * - narenas_cache, false); + narenas_cache); *arenas_cache_bypassp = false; } else arenas_cache = NULL; @@ -1202,6 +1191,8 @@ malloc_init_hard_a0_locked(void) arena_boot(); if (config_tcache && tcache_boot()) return (true); + if (config_tcache && tcache_boot()) + malloc_mutex_unlock(&init_lock); if (huge_boot()) return (true); if (malloc_mutex_init(&arenas_lock)) diff --git a/src/prof.c b/src/prof.c index 1103cc94..06f5499f 100644 --- a/src/prof.c +++ b/src/prof.c @@ -532,8 +532,8 @@ prof_gctx_create(tsd_t *tsd, prof_bt_t *bt) /* * Create a single allocation that has space for vec of length bt->len. */ - prof_gctx_t *gctx = (prof_gctx_t *)imalloc(tsd, offsetof(prof_gctx_t, - vec) + (bt->len * sizeof(void *))); + prof_gctx_t *gctx = (prof_gctx_t *)iallocztm(tsd, offsetof(prof_gctx_t, + vec) + (bt->len * sizeof(void *)), false, true, true, NULL); if (gctx == NULL) return (NULL); gctx->lock = prof_gctx_mutex_choose(); @@ -574,7 +574,7 @@ prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx, prof_leave(tsd, tdata_self); /* Destroy gctx. */ malloc_mutex_unlock(gctx->lock); - idalloc(tsd, gctx); + idalloctm(tsd, gctx, true, true); } else { /* * Compensate for increment in prof_tctx_destroy() or @@ -674,7 +674,7 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) prof_tdata_destroy(tsd, tdata, false); if (destroy_tctx) - idalloc(tsd, tctx); + idalloctm(tsd, tctx, true, true); } static bool @@ -703,7 +703,7 @@ prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata, if (ckh_insert(tsd, &bt2gctx, btkey.v, gctx.v)) { /* OOM. */ prof_leave(tsd, tdata); - idalloc(tsd, gctx.v); + idalloctm(tsd, gctx.v, true, true); return (true); } new_gctx = true; @@ -760,7 +760,8 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) return (NULL); /* Link a prof_tctx_t into gctx for this thread. */ - ret.v = imalloc(tsd, sizeof(prof_tctx_t)); + ret.v = iallocztm(tsd, sizeof(prof_tctx_t), false, true, true, + NULL); if (ret.p == NULL) { if (new_gctx) prof_gctx_try_destroy(tsd, tdata, gctx, tdata); @@ -778,7 +779,7 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) if (error) { if (new_gctx) prof_gctx_try_destroy(tsd, tdata, gctx, tdata); - idalloc(tsd, ret.v); + idalloctm(tsd, ret.v, true, true); return (NULL); } malloc_mutex_lock(gctx->lock); @@ -1158,7 +1159,7 @@ prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs) to_destroy); tctx_tree_remove(&gctx->tctxs, to_destroy); - idalloc(tsd, to_destroy); + idalloctm(tsd, to_destroy, true, true); } else next = NULL; } while (next != NULL); @@ -1640,7 +1641,8 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim, cassert(config_prof); /* Initialize an empty cache for this thread. */ - tdata = (prof_tdata_t *)imalloc(tsd, sizeof(prof_tdata_t)); + tdata = (prof_tdata_t *)iallocztm(tsd, sizeof(prof_tdata_t), false, + true, true, NULL); if (tdata == NULL) return (NULL); @@ -1653,7 +1655,7 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim, if (ckh_new(tsd, &tdata->bt2tctx, PROF_CKH_MINITEMS, prof_bt_hash, prof_bt_keycomp)) { - idalloc(tsd, tdata); + idalloctm(tsd, tdata, true, true); return (NULL); } @@ -1706,9 +1708,9 @@ prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata, tdata_tree_remove(&tdatas, tdata); if (tdata->thread_name != NULL) - idalloc(tsd, tdata->thread_name); + idalloctm(tsd, tdata->thread_name, true, true); ckh_delete(tsd, &tdata->bt2tctx); - idalloc(tsd, tdata); + idalloctm(tsd, tdata, true, true); } static void @@ -1869,7 +1871,7 @@ prof_thread_name_alloc(tsd_t *tsd, const char *thread_name) if (size == 1) return (""); - ret = imalloc(tsd, size); + ret = iallocztm(tsd, size, false, true, true, NULL); if (ret == NULL) return (NULL); memcpy(ret, thread_name, size); @@ -1901,7 +1903,7 @@ prof_thread_name_set(tsd_t *tsd, const char *thread_name) return (EAGAIN); if (tdata->thread_name != NULL) { - idalloc(tsd, tdata->thread_name); + idalloctm(tsd, tdata->thread_name, true, true); tdata->thread_name = NULL; } if (strlen(s) > 0) diff --git a/src/quarantine.c b/src/quarantine.c index 12c37e0a..094b44d3 100644 --- a/src/quarantine.c +++ b/src/quarantine.c @@ -26,8 +26,9 @@ quarantine_init(tsd_t *tsd, size_t lg_maxobjs) assert(tsd_nominal(tsd)); - quarantine = (quarantine_t *)imalloc(tsd, offsetof(quarantine_t, objs) + - ((ZU(1) << lg_maxobjs) * sizeof(quarantine_obj_t))); + quarantine = (quarantine_t *)iallocztm(tsd, offsetof(quarantine_t, objs) + + ((ZU(1) << lg_maxobjs) * sizeof(quarantine_obj_t)), false, true, + true, NULL); if (quarantine == NULL) return (NULL); quarantine->curbytes = 0; @@ -54,7 +55,7 @@ quarantine_alloc_hook_work(tsd_t *tsd) if (tsd_quarantine_get(tsd) == NULL) tsd_quarantine_set(tsd, quarantine); else - idalloc(tsd, quarantine); + idalloctm(tsd, quarantine, true, true); } static quarantine_t * @@ -86,7 +87,7 @@ quarantine_grow(tsd_t *tsd, quarantine_t *quarantine) memcpy(&ret->objs[ncopy_a], quarantine->objs, ncopy_b * sizeof(quarantine_obj_t)); } - idalloc(tsd, quarantine); + idalloctm(tsd, quarantine, true, true); tsd_quarantine_set(tsd, ret); return (ret); @@ -176,7 +177,7 @@ quarantine_cleanup(tsd_t *tsd) quarantine = tsd_quarantine_get(tsd); if (quarantine != NULL) { quarantine_drain(tsd, quarantine, 0); - idalloc(tsd, quarantine); + idalloctm(tsd, quarantine, true, true); tsd_quarantine_set(tsd, NULL); } } diff --git a/src/stats.c b/src/stats.c index 2b3da645..865f7757 100644 --- a/src/stats.c +++ b/src/stats.c @@ -265,6 +265,7 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, unsigned nthreads; const char *dss; size_t page, pactive, pdirty, mapped; + size_t metadata_mapped, metadata_allocated; uint64_t npurge, nmadvise, purged; size_t small_allocated; uint64_t small_nmalloc, small_ndalloc, small_nrequests; @@ -331,6 +332,12 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, CTL_I_GET("stats.arenas.0.mapped", &mapped, size_t); malloc_cprintf(write_cb, cbopaque, "mapped: %12zu\n", mapped); + CTL_I_GET("stats.arenas.0.metadata.mapped", &metadata_mapped, size_t); + CTL_I_GET("stats.arenas.0.metadata.allocated", &metadata_allocated, + size_t); + malloc_cprintf(write_cb, cbopaque, + "metadata: mapped: %zu, allocated: %zu\n", metadata_mapped, + metadata_allocated); if (bins) stats_arena_bins_print(write_cb, cbopaque, i); @@ -539,17 +546,18 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, if (config_stats) { size_t *cactive; - size_t allocated, active, mapped; + size_t allocated, active, metadata, mapped; size_t chunks_current, chunks_high; uint64_t chunks_total; CTL_GET("stats.cactive", &cactive, size_t *); CTL_GET("stats.allocated", &allocated, size_t); CTL_GET("stats.active", &active, size_t); + CTL_GET("stats.metadata", &metadata, size_t); CTL_GET("stats.mapped", &mapped, size_t); malloc_cprintf(write_cb, cbopaque, - "Allocated: %zu, active: %zu, mapped: %zu\n", - allocated, active, mapped); + "Allocated: %zu, active: %zu, metadata: %zu, mapped: %zu\n", + allocated, active, metadata, mapped); malloc_cprintf(write_cb, cbopaque, "Current active ceiling: %zu\n", atomic_read_z(cactive)); diff --git a/src/tcache.c b/src/tcache.c index 34224ec4..d638015f 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -298,7 +298,7 @@ tcache_create(tsd_t *tsd, arena_t *arena) /* Avoid false cacheline sharing. */ size = sa2u(size, CACHELINE); - tcache = ipalloct(tsd, size, CACHELINE, true, false, arena); + tcache = ipallocztm(tsd, size, CACHELINE, true, false, true, arena); if (tcache == NULL) return (NULL); @@ -353,7 +353,7 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache) arena_prof_accum(tcache->arena, tcache->prof_accumbytes)) prof_idump(); - idalloct(tsd, tcache, false); + idalloctm(tsd, tcache, false, true); } void diff --git a/src/tsd.c b/src/tsd.c index 00d8f95f..3b59acff 100644 --- a/src/tsd.c +++ b/src/tsd.c @@ -15,7 +15,7 @@ void * malloc_tsd_malloc(size_t size) { - return (a0malloc(CACHELINE_CEILING(size), false)); + return (a0malloc(CACHELINE_CEILING(size))); } void From eee27b2a38d6bb741d9de5e028d5b23e2f4ec4cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Marie?= Date: Sun, 25 Jan 2015 15:12:28 +0100 Subject: [PATCH 0618/3378] huge_node_locked don't have to unlock huge_mtx in src/huge.c, after each call of huge_node_locked(), huge_mtx is already unlocked. don't unlock it twice (it is a undefined behaviour). --- src/huge.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/huge.c b/src/huge.c index c4d1ebc6..84a1ab23 100644 --- a/src/huge.c +++ b/src/huge.c @@ -83,7 +83,6 @@ huge_node_locked(const void *ptr) node = extent_tree_ad_search(&huge, &key); assert(node != NULL); assert(node->addr == ptr); - malloc_mutex_unlock(&huge_mtx); return (node); } From 77d597ebb23aa47a4a0112c294ad6a68857f450c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Marie?= Date: Sun, 25 Jan 2015 10:18:32 +0100 Subject: [PATCH 0619/3378] add openbsd support --- configure.ac | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/configure.ac b/configure.ac index 0a4f01e8..5e93a5d5 100644 --- a/configure.ac +++ b/configure.ac @@ -283,6 +283,11 @@ case "${host}" in abi="elf" AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ]) ;; + *-*-openbsd*) + CFLAGS="$CFLAGS" + abi="elf" + AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ]) + ;; *-*-linux*) CFLAGS="$CFLAGS" CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE" From 0fd663e9c5336089a98e8a2a0cf5419b534f045f Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sun, 25 Jan 2015 17:31:24 -0800 Subject: [PATCH 0620/3378] Avoid pointless chunk_recycle() call. Avoid calling chunk_recycle() for mmap()ed chunks if config_munmap is disabled, in which case there are never any recyclable chunks. This resolves #164. --- src/chunk.c | 52 ++++++++++++++++++++++++++++++---------------------- 1 file changed, 30 insertions(+), 22 deletions(-) diff --git a/src/chunk.c b/src/chunk.c index b9a24416..6d5f84f5 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -132,6 +132,19 @@ chunk_recycle(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, return (ret); } +static void * +chunk_alloc_core_dss(void *new_addr, size_t size, size_t alignment, bool base, + bool *zero) +{ + void *ret; + + if ((ret = chunk_recycle(&chunks_szad_dss, &chunks_ad_dss, + new_addr, size, alignment, base, zero)) != NULL) + return (ret); + ret = chunk_alloc_dss(new_addr, size, alignment, zero); + return (ret); +} + /* * If the caller specifies (!*zero), it is still possible to receive zeroed * memory, in which case *zero is toggled to true. arena_chunk_alloc() takes @@ -150,31 +163,26 @@ chunk_alloc_core(void *new_addr, size_t size, size_t alignment, bool base, assert((alignment & chunksize_mask) == 0); /* "primary" dss. */ - if (have_dss && dss_prec == dss_prec_primary) { - if ((ret = chunk_recycle(&chunks_szad_dss, &chunks_ad_dss, - new_addr, size, alignment, base, zero)) != NULL) - return (ret); - if ((ret = chunk_alloc_dss(new_addr, size, alignment, zero)) - != NULL) - return (ret); - } - /* mmap. */ - if ((ret = chunk_recycle(&chunks_szad_mmap, &chunks_ad_mmap, new_addr, - size, alignment, base, zero)) != NULL) + if (have_dss && dss_prec == dss_prec_primary && (ret = + chunk_alloc_core_dss(new_addr, size, alignment, base, zero)) != + NULL) return (ret); - /* Requesting an address not implemented for chunk_alloc_mmap(). */ - if (new_addr == NULL && - (ret = chunk_alloc_mmap(size, alignment, zero)) != NULL) + /* mmap. */ + if (!config_munmap && (ret = chunk_recycle(&chunks_szad_mmap, + &chunks_ad_mmap, new_addr, size, alignment, base, zero)) != NULL) + return (ret); + /* + * Requesting an address is not implemented for chunk_alloc_mmap(), so + * only call it if (new_addr == NULL). + */ + if (new_addr == NULL && (ret = chunk_alloc_mmap(size, alignment, zero)) + != NULL) return (ret); /* "secondary" dss. */ - if (have_dss && dss_prec == dss_prec_secondary) { - if ((ret = chunk_recycle(&chunks_szad_dss, &chunks_ad_dss, - new_addr, size, alignment, base, zero)) != NULL) - return (ret); - if ((ret = chunk_alloc_dss(new_addr, size, alignment, zero)) - != NULL) - return (ret); - } + if (have_dss && dss_prec == dss_prec_secondary && (ret = + chunk_alloc_core_dss(new_addr, size, alignment, base, zero)) != + NULL) + return (ret); /* All strategies for allocation failed. */ return (NULL); From 41f2e692f664da683ae694b17630f5e186aa454c Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sun, 25 Jan 2015 20:15:13 -0800 Subject: [PATCH 0621/3378] Fix quoting for CONFIG-related sed expression. --- configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 5e93a5d5..4c202591 100644 --- a/configure.ac +++ b/configure.ac @@ -43,7 +43,7 @@ AC_CACHE_CHECK([whether $1 is compilable], dnl ============================================================================ -CONFIG=`echo ${ac_configure_args} | sed -e "s#\'\([^ ]*\)\'#\1#g"` +CONFIG=`echo ${ac_configure_args} | sed -e 's#'"'"'\([^ ]*\)'"'"'#\1#g'` AC_SUBST([CONFIG]) dnl Library revision. From 5b8ed5b7c91939f64f14fc48be84ed20e3f023f4 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sun, 25 Jan 2015 21:16:57 -0800 Subject: [PATCH 0622/3378] Implement the prof.gdump mallctl. This feature makes it possible to toggle the gdump feature on/off during program execution, whereas the the opt.prof_dump mallctl value can only be set during program startup. This resolves #72. --- doc/jemalloc.xml.in | 28 +++++++++++---- include/jemalloc/internal/private_symbols.txt | 4 +++ include/jemalloc/internal/prof.h | 18 ++++++++++ src/chunk.c | 3 +- src/ctl.c | 27 +++++++++++++++ src/prof.c | 34 +++++++++++++++++++ test/unit/prof_gdump.c | 29 ++++++++++++++-- 7 files changed, 133 insertions(+), 10 deletions(-) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 08fd4eb3..739b33ac 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -1215,13 +1215,11 @@ malloc_conf = "xmalloc:true";]]> r- [] - Trigger a memory profile dump every time the total - virtual memory exceeds the previous maximum. Profiles are dumped to - files named according to the pattern - <prefix>.<pid>.<seq>.u<useq>.heap, - where <prefix> is controlled by the opt.prof_prefix - option. This option is disabled by default. + Set the initial state of prof.gdump, which when + enabled triggers a memory profile dump every time the total virtual + memory exceeds the previous maximum. This option is disabled by + default. @@ -1687,6 +1685,22 @@ malloc_conf = "xmalloc:true";]]> option. + + + prof.gdump + (bool) + rw + [] + + When enabled, trigger a memory profile dump every time + the total virtual memory exceeds the previous maximum. Profiles are + dumped to files named according to the pattern + <prefix>.<pid>.<seq>.u<useq>.heap, + where <prefix> is controlled by the opt.prof_prefix + option. + + prof.reset diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index dfa87551..f3fd8262 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -329,6 +329,10 @@ prof_dump_open prof_free prof_free_sampled_object prof_gdump +prof_gdump_get +prof_gdump_get_unlocked +prof_gdump_set +prof_gdump_val prof_idump prof_interval prof_lookup diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index e0818849..b2db6859 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -239,6 +239,9 @@ extern char opt_prof_prefix[ /* Accessed via prof_active_[gs]et{_unlocked,}(). */ extern bool prof_active; +/* Accessed via prof_gdump_[gs]et{_unlocked,}(). */ +extern bool prof_gdump_val; + /* * Profile dump interval, measured in bytes allocated. Each arena triggers a * profile dump when it reaches this threshold. The effect is that the @@ -285,6 +288,8 @@ bool prof_thread_active_get(void); bool prof_thread_active_set(bool active); bool prof_thread_active_init_get(void); bool prof_thread_active_init_set(bool active_init); +bool prof_gdump_get(void); +bool prof_gdump_set(bool active); void prof_boot0(void); void prof_boot1(void); bool prof_boot2(void); @@ -299,6 +304,7 @@ void prof_sample_threshold_update(prof_tdata_t *tdata); #ifndef JEMALLOC_ENABLE_INLINE bool prof_active_get_unlocked(void); +bool prof_gdump_get_unlocked(void); prof_tdata_t *prof_tdata_get(tsd_t *tsd, bool create); bool prof_sample_accum_update(tsd_t *tsd, size_t usize, bool commit, prof_tdata_t **tdata_out); @@ -327,6 +333,18 @@ prof_active_get_unlocked(void) return (prof_active); } +JEMALLOC_ALWAYS_INLINE bool +prof_gdump_get_unlocked(void) +{ + + /* + * No locking is used when reading prof_gdump_val in the fast path, so + * there are no guarantees regarding how long it will take for all + * threads to notice state changes. + */ + return (prof_gdump_val); +} + JEMALLOC_ALWAYS_INLINE prof_tdata_t * prof_tdata_get(tsd_t *tsd, bool create) { diff --git a/src/chunk.c b/src/chunk.c index 6d5f84f5..7bfcdb87 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -213,7 +213,8 @@ chunk_register(void *chunk, size_t size, bool base) } else if (config_prof) gdump = false; malloc_mutex_unlock(&chunks_mtx); - if (config_prof && opt_prof && opt_prof_gdump && gdump) + if (config_prof && opt_prof && prof_gdump_get_unlocked() && + gdump) prof_gdump(); } if (config_valgrind) diff --git a/src/ctl.c b/src/ctl.c index b65af520..63a689a3 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -137,6 +137,7 @@ CTL_PROTO(arenas_extend) CTL_PROTO(prof_thread_active_init) CTL_PROTO(prof_active) CTL_PROTO(prof_dump) +CTL_PROTO(prof_gdump) CTL_PROTO(prof_reset) CTL_PROTO(prof_interval) CTL_PROTO(lg_prof_sample) @@ -347,6 +348,7 @@ static const ctl_named_node_t prof_node[] = { {NAME("thread_active_init"), CTL(prof_thread_active_init)}, {NAME("active"), CTL(prof_active)}, {NAME("dump"), CTL(prof_dump)}, + {NAME("gdump"), CTL(prof_gdump)}, {NAME("reset"), CTL(prof_reset)}, {NAME("interval"), CTL(prof_interval)}, {NAME("lg_sample"), CTL(lg_prof_sample)} @@ -1790,6 +1792,31 @@ label_return: return (ret); } +static int +prof_gdump_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) +{ + int ret; + bool oldval; + + if (!config_prof) + return (ENOENT); + + if (newp != NULL) { + if (newlen != sizeof(bool)) { + ret = EINVAL; + goto label_return; + } + oldval = prof_gdump_set(*(bool *)newp); + } else + oldval = prof_gdump_get(); + READ(oldval, bool); + + ret = 0; +label_return: + return (ret); +} + static int prof_reset_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) diff --git a/src/prof.c b/src/prof.c index 06f5499f..04b2591c 100644 --- a/src/prof.c +++ b/src/prof.c @@ -44,6 +44,13 @@ static malloc_mutex_t prof_active_mtx; static bool prof_thread_active_init; static malloc_mutex_t prof_thread_active_init_mtx; +/* + * Initialized as opt_prof_gdump, and accessed via + * prof_gdump_[gs]et{_unlocked,}(). + */ +bool prof_gdump_val; +static malloc_mutex_t prof_gdump_mtx; + uint64_t prof_interval = 0; size_t lg_prof_sample; @@ -1961,6 +1968,29 @@ prof_thread_active_init_set(bool active_init) return (active_init_old); } +bool +prof_gdump_get(void) +{ + bool prof_gdump_current; + + malloc_mutex_lock(&prof_gdump_mtx); + prof_gdump_current = prof_gdump_val; + malloc_mutex_unlock(&prof_gdump_mtx); + return (prof_gdump_current); +} + +bool +prof_gdump_set(bool gdump) +{ + bool prof_gdump_old; + + malloc_mutex_lock(&prof_gdump_mtx); + prof_gdump_old = prof_gdump_val; + prof_gdump_val = gdump; + malloc_mutex_unlock(&prof_gdump_mtx); + return (prof_gdump_old); +} + void prof_boot0(void) { @@ -2013,6 +2043,10 @@ prof_boot2(void) if (malloc_mutex_init(&prof_active_mtx)) return (true); + prof_gdump_val = opt_prof_gdump; + if (malloc_mutex_init(&prof_gdump_mtx)) + return (true); + prof_thread_active_init = opt_prof_thread_active_init; if (malloc_mutex_init(&prof_thread_active_init_mtx)) return (true); diff --git a/test/unit/prof_gdump.c b/test/unit/prof_gdump.c index a00b1054..a0e6ee92 100644 --- a/test/unit/prof_gdump.c +++ b/test/unit/prof_gdump.c @@ -21,8 +21,9 @@ prof_dump_open_intercept(bool propagate_err, const char *filename) TEST_BEGIN(test_gdump) { - bool active; - void *p, *q; + bool active, gdump, gdump_old; + void *p, *q, *r, *s; + size_t sz; test_skip_if(!config_prof); @@ -42,8 +43,32 @@ TEST_BEGIN(test_gdump) assert_ptr_not_null(q, "Unexpected mallocx() failure"); assert_true(did_prof_dump_open, "Expected a profile dump"); + gdump = false; + sz = sizeof(gdump_old); + assert_d_eq(mallctl("prof.gdump", &gdump_old, &sz, &gdump, + sizeof(gdump)), 0, + "Unexpected mallctl failure while disabling prof.gdump"); + assert(gdump_old); + did_prof_dump_open = false; + r = mallocx(chunksize, 0); + assert_ptr_not_null(q, "Unexpected mallocx() failure"); + assert_false(did_prof_dump_open, "Unexpected profile dump"); + + gdump = true; + sz = sizeof(gdump_old); + assert_d_eq(mallctl("prof.gdump", &gdump_old, &sz, &gdump, + sizeof(gdump)), 0, + "Unexpected mallctl failure while enabling prof.gdump"); + assert(!gdump_old); + did_prof_dump_open = false; + s = mallocx(chunksize, 0); + assert_ptr_not_null(q, "Unexpected mallocx() failure"); + assert_true(did_prof_dump_open, "Expected a profile dump"); + dallocx(p, 0); dallocx(q, 0); + dallocx(r, 0); + dallocx(s, 0); } TEST_END From 008267b9f6a0e4d92a78f0e8c0697248020fc8d3 Mon Sep 17 00:00:00 2001 From: Felix Janda Date: Tue, 3 Feb 2015 18:58:02 +0100 Subject: [PATCH 0623/3378] util.c: strerror_r returns char* only on glibc --- src/util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/util.c b/src/util.c index bfd86af8..a964d700 100644 --- a/src/util.c +++ b/src/util.c @@ -84,7 +84,7 @@ buferror(int err, char *buf, size_t buflen) FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM, NULL, GetLastError(), 0, (LPSTR)buf, buflen, NULL); return (0); -#elif defined(_GNU_SOURCE) +#elif defined(__GLIBC__) && defined(_GNU_SOURCE) char *b = strerror_r(err, buf, buflen); if (b != buf) { strncpy(buf, b, buflen); From 6505733012458d8fcd0ae8e1f1acdc9ffe33ff35 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Wed, 4 Feb 2015 07:16:55 +0900 Subject: [PATCH 0624/3378] Make opt.lg_dirty_mult work as documented The documentation for opt.lg_dirty_mult says: Per-arena minimum ratio (log base 2) of active to dirty pages. Some dirty unused pages may be allowed to accumulate, within the limit set by the ratio (or one chunk worth of dirty pages, whichever is greater) (...) The restriction in parentheses currently doesn't happen. This makes jemalloc aggressively madvise(), which in turns increases the amount of page faults significantly. For instance, this resulted in several(!) hundred(!) milliseconds startup regression on Firefox for Android. This may require further tweaking, but starting with actually doing what the documentation says is a good start. --- src/arena.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/arena.c b/src/arena.c index 984b8ad2..a5033bf8 100644 --- a/src/arena.c +++ b/src/arena.c @@ -850,6 +850,7 @@ arena_maybe_purge(arena_t *arena) if (opt_lg_dirty_mult < 0) return; threshold = (arena->nactive >> opt_lg_dirty_mult); + threshold = threshold < chunk_npages ? chunk_npages : threshold; /* * Don't purge unless the number of purgeable pages exceeds the * threshold. @@ -893,6 +894,7 @@ arena_compute_npurge(arena_t *arena, bool all) */ if (!all) { size_t threshold = (arena->nactive >> opt_lg_dirty_mult); + threshold = threshold < chunk_npages ? chunk_npages : threshold; npurge = arena->ndirty - threshold; } else From b0808d5f635592cf7b9c487efbf26f13dc60b223 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 3 Feb 2015 12:39:31 -0800 Subject: [PATCH 0625/3378] Fix shell test to use = instead of ==. --- configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 4c202591..dc8aa02c 100644 --- a/configure.ac +++ b/configure.ac @@ -998,7 +998,7 @@ fi AC_ARG_WITH([lg_page], [AS_HELP_STRING([--with-lg-page=], [Base 2 log of system page size])], [LG_PAGE="$with_lg_page"], [LG_PAGE="detect"]) -if test "x$LG_PAGE" == "xdetect"; then +if test "x$LG_PAGE" = "xdetect"; then AC_CACHE_CHECK([LG_PAGE], [je_cv_lg_page], AC_RUN_IFELSE([AC_LANG_PROGRAM( From f8723572d8b3418f145fc1d5466cca6b8e2530ef Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 3 Feb 2015 12:39:55 -0800 Subject: [PATCH 0626/3378] Add missing prototypes for bootstrap_{malloc,calloc,free}(). --- include/jemalloc/internal/jemalloc_internal.h.in | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index a4778550..79a23e5e 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -406,7 +406,9 @@ extern uint8_t const size2index_tab[]; arena_t *a0get(void); void *a0malloc(size_t size); void a0dalloc(void *ptr); -size_t a0allocated(void); +void *bootstrap_malloc(size_t size); +void *bootstrap_calloc(size_t num, size_t size); +void bootstrap_free(void *ptr); arena_t *arenas_extend(unsigned ind); arena_t *arena_init(unsigned ind); unsigned narenas_total_get(void); From 8ddc93293cd8370870f221225ef1e013fbff6d65 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 30 Jan 2015 21:22:54 -0800 Subject: [PATCH 0627/3378] Fix chunk_recycle()'s new_addr functionality. Fix chunk_recycle()'s new_addr functionality to search by address rather than just size if new_addr is specified. The functionality added by a95018ee819abf897562d9d1f3bc31d4dd725a8d (Attempt to expand huge allocations in-place.) only worked if the two search orders happened to return the same results (e.g. in simple test cases). --- src/chunk.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/chunk.c b/src/chunk.c index 7bfcdb87..a3ae548a 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -48,6 +48,8 @@ chunk_recycle(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, size_t alloc_size, leadsize, trailsize; bool zeroed; + assert(new_addr == NULL || alignment == chunksize); + if (base) { /* * This function may need to call base_node_{,de}alloc(), but @@ -65,13 +67,15 @@ chunk_recycle(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, key.addr = new_addr; key.size = alloc_size; malloc_mutex_lock(&chunks_mtx); - node = extent_tree_szad_nsearch(chunks_szad, &key); - if (node == NULL || (new_addr && node->addr != new_addr)) { + node = (new_addr != NULL) ? extent_tree_ad_search(chunks_ad, &key) : + extent_tree_szad_nsearch(chunks_szad, &key); + if (node == NULL) { malloc_mutex_unlock(&chunks_mtx); return (NULL); } leadsize = ALIGNMENT_CEILING((uintptr_t)node->addr, alignment) - (uintptr_t)node->addr; + assert(new_addr == NULL || leadsize == 0); assert(node->size >= leadsize + size); trailsize = node->size - leadsize - size; ret = (void *)((uintptr_t)node->addr + leadsize); From a55dfa4b0af68f372782e130031483ad73cf7eec Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 2 Feb 2015 13:49:08 -0800 Subject: [PATCH 0628/3378] Implement more atomic operations. - atomic_*_p(). - atomic_cas_*(). - atomic_write_*(). --- include/jemalloc/internal/atomic.h | 544 ++++++++++++++---- include/jemalloc/internal/private_symbols.txt | 7 + test/unit/atomic.c | 85 ++- 3 files changed, 485 insertions(+), 151 deletions(-) diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h index 23ac93ff..f8bd62ec 100644 --- a/include/jemalloc/internal/atomic.h +++ b/include/jemalloc/internal/atomic.h @@ -11,6 +11,7 @@ #define atomic_read_uint64(p) atomic_add_uint64(p, 0) #define atomic_read_uint32(p) atomic_add_uint32(p, 0) +#define atomic_read_p(p) atomic_add_p(p, NULL) #define atomic_read_z(p) atomic_add_z(p, 0) #define atomic_read_u(p) atomic_add_u(p, 0) @@ -19,88 +20,54 @@ #ifdef JEMALLOC_H_INLINES /* - * All functions return the arithmetic result of the atomic operation. Some - * atomic operation APIs return the value prior to mutation, in which case the - * following functions must redundantly compute the result so that it can be - * returned. These functions are normally inlined, so the extra operations can - * be optimized away if the return values aren't used by the callers. + * All arithmetic functions return the arithmetic result of the atomic + * operation. Some atomic operation APIs return the value prior to mutation, in + * which case the following functions must redundantly compute the result so + * that it can be returned. These functions are normally inlined, so the extra + * operations can be optimized away if the return values aren't used by the + * callers. * + * atomic_read_( *p) { return (*p); } * atomic_add_( *p, x) { return (*p + x); } * atomic_sub_( *p, x) { return (*p - x); } + * bool atomic_cas_( *p, c, s) + * { + * if (*p != c) + * return (true); + * *p = s; + * return (false); + * } + * void atomic_write_( *p, x) { *p = x; } */ #ifndef JEMALLOC_ENABLE_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x); uint64_t atomic_sub_uint64(uint64_t *p, uint64_t x); +bool atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s); +void atomic_write_uint64(uint64_t *p, uint64_t x); uint32_t atomic_add_uint32(uint32_t *p, uint32_t x); uint32_t atomic_sub_uint32(uint32_t *p, uint32_t x); +bool atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s); +void atomic_write_uint32(uint32_t *p, uint32_t x); +void *atomic_add_p(void **p, void *x); +void *atomic_sub_p(void **p, void *x); +bool atomic_cas_p(void **p, void *c, void *s); +void atomic_write_p(void **p, void *x); size_t atomic_add_z(size_t *p, size_t x); size_t atomic_sub_z(size_t *p, size_t x); +bool atomic_cas_z(size_t *p, size_t c, size_t s); +void atomic_write_z(size_t *p, size_t x); unsigned atomic_add_u(unsigned *p, unsigned x); unsigned atomic_sub_u(unsigned *p, unsigned x); +bool atomic_cas_u(unsigned *p, unsigned c, unsigned s); +void atomic_write_u(unsigned *p, unsigned x); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ATOMIC_C_)) /******************************************************************************/ /* 64-bit operations. */ #if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3) -# ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 -JEMALLOC_INLINE uint64_t -atomic_add_uint64(uint64_t *p, uint64_t x) -{ - - return (__sync_add_and_fetch(p, x)); -} - -JEMALLOC_INLINE uint64_t -atomic_sub_uint64(uint64_t *p, uint64_t x) -{ - - return (__sync_sub_and_fetch(p, x)); -} -# elif (defined(_MSC_VER)) -JEMALLOC_INLINE uint64_t -atomic_add_uint64(uint64_t *p, uint64_t x) -{ - - return (InterlockedExchangeAdd64(p, x) + x); -} - -JEMALLOC_INLINE uint64_t -atomic_sub_uint64(uint64_t *p, uint64_t x) -{ - - return (InterlockedExchangeAdd64(p, -((int64_t)x)) - x); -} -# elif (defined(JEMALLOC_C11ATOMICS)) -JEMALLOC_INLINE uint64_t -atomic_add_uint64(uint64_t *p, uint64_t x) -{ - volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p; - return (atomic_fetch_add(a, x) + x); -} - -JEMALLOC_INLINE uint64_t -atomic_sub_uint64(uint64_t *p, uint64_t x) -{ - volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p; - return (atomic_fetch_sub(a, x) - x); -} -# elif (defined(JEMALLOC_OSATOMIC)) -JEMALLOC_INLINE uint64_t -atomic_add_uint64(uint64_t *p, uint64_t x) -{ - - return (OSAtomicAdd64((int64_t)x, (int64_t *)p)); -} - -JEMALLOC_INLINE uint64_t -atomic_sub_uint64(uint64_t *p, uint64_t x) -{ - - return (OSAtomicAdd64(-((int64_t)x), (int64_t *)p)); -} -# elif (defined(__amd64__) || defined(__x86_64__)) +# if (defined(__amd64__) || defined(__x86_64__)) JEMALLOC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x) { @@ -130,6 +97,62 @@ atomic_sub_uint64(uint64_t *p, uint64_t x) return (t + x); } + +JEMALLOC_INLINE bool +atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s) +{ + uint8_t success; + + asm volatile ( + "lock; cmpxchgq %4, %0;" + "sete %1;" + : "=m" (*p), "=a" (success) /* Outputs. */ + : "m" (*p), "a" (c), "r" (s) /* Inputs. */ + : "memory" /* Clobbers. */ + ); + + return (!(bool)success); +} + +JEMALLOC_INLINE void +atomic_write_uint64(uint64_t *p, uint64_t x) +{ + + asm volatile ( + "lock; xchgq %1, %0;" + : "=m" (*p), "+r" (x) /* Outputs. */ + : "m" (*p) /* Inputs. */ + : "memory" /* Clobbers. */ + ); +} +# elif (defined(JEMALLOC_C11ATOMICS)) +JEMALLOC_INLINE uint64_t +atomic_add_uint64(uint64_t *p, uint64_t x) +{ + volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p; + return (atomic_fetch_add(a, x) + x); +} + +JEMALLOC_INLINE uint64_t +atomic_sub_uint64(uint64_t *p, uint64_t x) +{ + volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p; + return (atomic_fetch_sub(a, x) - x); +} + +JEMALLOC_INLINE bool +atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s) +{ + + return (!atomic_compare_exchange_strong(p, &c, s)); +} + +JEMALLOC_INLINE void +atomic_write_uint64(uint64_t *p, uint64_t x) +{ + + atomic_store(p, x); +} # elif (defined(JEMALLOC_ATOMIC9)) JEMALLOC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x) @@ -152,7 +175,88 @@ atomic_sub_uint64(uint64_t *p, uint64_t x) return (atomic_fetchadd_long(p, (unsigned long)(-(long)x)) - x); } -# elif (defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8)) + +JEMALLOC_INLINE bool +atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s) +{ + + assert(sizeof(uint64_t) == sizeof(unsigned long)); + + return (!atomic_cmpset_long(p, (unsigned long)c, (unsigned long)s)); +} + +JEMALLOC_INLINE void +atomic_write_uint64(uint64_t *p, uint64_t x) +{ + + assert(sizeof(uint64_t) == sizeof(unsigned long)); + + atomic_store_rel_long(p, x); +} +# elif (defined(JEMALLOC_OSATOMIC)) +JEMALLOC_INLINE uint64_t +atomic_add_uint64(uint64_t *p, uint64_t x) +{ + + return (OSAtomicAdd64((int64_t)x, (int64_t *)p)); +} + +JEMALLOC_INLINE uint64_t +atomic_sub_uint64(uint64_t *p, uint64_t x) +{ + + return (OSAtomicAdd64(-((int64_t)x), (int64_t *)p)); +} + +JEMALLOC_INLINE bool +atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s) +{ + + return (!OSAtomicCompareAndSwap64(c, s, (int64_t *)p)); +} + +JEMALLOC_INLINE void +atomic_write_uint64(uint64_t *p, uint64_t x) +{ + uint64_t o; + + /*The documented OSAtomic*() API does not expose an atomic exchange. */ + do { + o = atomic_read_uint64(p); + } while (atomic_cas_uint64(p, o, x)); +} +# elif (defined(_MSC_VER)) +JEMALLOC_INLINE uint64_t +atomic_add_uint64(uint64_t *p, uint64_t x) +{ + + return (InterlockedExchangeAdd64(p, x) + x); +} + +JEMALLOC_INLINE uint64_t +atomic_sub_uint64(uint64_t *p, uint64_t x) +{ + + return (InterlockedExchangeAdd64(p, -((int64_t)x)) - x); +} + +JEMALLOC_INLINE bool +atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s) +{ + uint64_t o; + + o = InterlockedCompareExchange64(p, s, c); + return (o != c); +} + +JEMALLOC_INLINE void +atomic_write_uint64(uint64_t *p, uint64_t x) +{ + + InterlockedExchange64(p, x); +} +# elif (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8) || \ + defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8)) JEMALLOC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x) { @@ -166,6 +270,20 @@ atomic_sub_uint64(uint64_t *p, uint64_t x) return (__sync_sub_and_fetch(p, x)); } + +JEMALLOC_INLINE bool +atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s) +{ + + return (!__sync_bool_compare_and_swap(p, c, s)); +} + +JEMALLOC_INLINE void +atomic_write_uint64(uint64_t *p, uint64_t x) +{ + + __sync_lock_test_and_set(p, x); +} # else # error "Missing implementation for 64-bit atomic operations" # endif @@ -173,63 +291,7 @@ atomic_sub_uint64(uint64_t *p, uint64_t x) /******************************************************************************/ /* 32-bit operations. */ -#ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 -JEMALLOC_INLINE uint32_t -atomic_add_uint32(uint32_t *p, uint32_t x) -{ - - return (__sync_add_and_fetch(p, x)); -} - -JEMALLOC_INLINE uint32_t -atomic_sub_uint32(uint32_t *p, uint32_t x) -{ - - return (__sync_sub_and_fetch(p, x)); -} -#elif (defined(_MSC_VER)) -JEMALLOC_INLINE uint32_t -atomic_add_uint32(uint32_t *p, uint32_t x) -{ - - return (InterlockedExchangeAdd(p, x) + x); -} - -JEMALLOC_INLINE uint32_t -atomic_sub_uint32(uint32_t *p, uint32_t x) -{ - - return (InterlockedExchangeAdd(p, -((int32_t)x)) - x); -} -# elif (defined(JEMALLOC_C11ATOMICS)) -JEMALLOC_INLINE uint32_t -atomic_add_uint32(uint32_t *p, uint32_t x) -{ - volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p; - return (atomic_fetch_add(a, x) + x); -} - -JEMALLOC_INLINE uint32_t -atomic_sub_uint32(uint32_t *p, uint32_t x) -{ - volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p; - return (atomic_fetch_sub(a, x) - x); -} -#elif (defined(JEMALLOC_OSATOMIC)) -JEMALLOC_INLINE uint32_t -atomic_add_uint32(uint32_t *p, uint32_t x) -{ - - return (OSAtomicAdd32((int32_t)x, (int32_t *)p)); -} - -JEMALLOC_INLINE uint32_t -atomic_sub_uint32(uint32_t *p, uint32_t x) -{ - - return (OSAtomicAdd32(-((int32_t)x), (int32_t *)p)); -} -#elif (defined(__i386__) || defined(__amd64__) || defined(__x86_64__)) +#if (defined(__i386__) || defined(__amd64__) || defined(__x86_64__)) JEMALLOC_INLINE uint32_t atomic_add_uint32(uint32_t *p, uint32_t x) { @@ -259,6 +321,62 @@ atomic_sub_uint32(uint32_t *p, uint32_t x) return (t + x); } + +JEMALLOC_INLINE bool +atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s) +{ + uint8_t success; + + asm volatile ( + "lock; cmpxchgl %4, %0;" + "sete %1;" + : "=m" (*p), "=a" (success) /* Outputs. */ + : "m" (*p), "a" (c), "r" (s) /* Inputs. */ + : "memory" + ); + + return (!(bool)success); +} + +JEMALLOC_INLINE void +atomic_write_uint32(uint32_t *p, uint32_t x) +{ + + asm volatile ( + "lock; xchgl %1, %0;" + : "=m" (*p), "+r" (x) /* Outputs. */ + : "m" (*p) /* Inputs. */ + : "memory" /* Clobbers. */ + ); +} +# elif (defined(JEMALLOC_C11ATOMICS)) +JEMALLOC_INLINE uint32_t +atomic_add_uint32(uint32_t *p, uint32_t x) +{ + volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p; + return (atomic_fetch_add(a, x) + x); +} + +JEMALLOC_INLINE uint32_t +atomic_sub_uint32(uint32_t *p, uint32_t x) +{ + volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p; + return (atomic_fetch_sub(a, x) - x); +} + +JEMALLOC_INLINE bool +atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s) +{ + + return (!atomic_compare_exchange_strong(p, &c, s)); +} + +JEMALLOC_INLINE void +atomic_write_uint32(uint32_t *p, uint32_t x) +{ + + atomic_store(p, x); +} #elif (defined(JEMALLOC_ATOMIC9)) JEMALLOC_INLINE uint32_t atomic_add_uint32(uint32_t *p, uint32_t x) @@ -273,7 +391,84 @@ atomic_sub_uint32(uint32_t *p, uint32_t x) return (atomic_fetchadd_32(p, (uint32_t)(-(int32_t)x)) - x); } -#elif (defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_4)) + +JEMALLOC_INLINE bool +atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s) +{ + + return (!atomic_cmpset_32(p, c, s)); +} + +JEMALLOC_INLINE void +atomic_write_uint32(uint32_t *p, uint32_t x) +{ + + atomic_store_rel_32(p, x); +} +#elif (defined(JEMALLOC_OSATOMIC)) +JEMALLOC_INLINE uint32_t +atomic_add_uint32(uint32_t *p, uint32_t x) +{ + + return (OSAtomicAdd32((int32_t)x, (int32_t *)p)); +} + +JEMALLOC_INLINE uint32_t +atomic_sub_uint32(uint32_t *p, uint32_t x) +{ + + return (OSAtomicAdd32(-((int32_t)x), (int32_t *)p)); +} + +JEMALLOC_INLINE bool +atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s) +{ + + return (!OSAtomicCompareAndSwap32(c, s, (int32_t *)p)); +} + +JEMALLOC_INLINE void +atomic_write_uint32(uint32_t *p, uint32_t x) +{ + uint32_t o; + + /*The documented OSAtomic*() API does not expose an atomic exchange. */ + do { + o = atomic_read_uint32(p); + } while (atomic_cas_uint32(p, o, x)); +} +#elif (defined(_MSC_VER)) +JEMALLOC_INLINE uint32_t +atomic_add_uint32(uint32_t *p, uint32_t x) +{ + + return (InterlockedExchangeAdd(p, x) + x); +} + +JEMALLOC_INLINE uint32_t +atomic_sub_uint32(uint32_t *p, uint32_t x) +{ + + return (InterlockedExchangeAdd(p, -((int32_t)x)) - x); +} + +JEMALLOC_INLINE bool +atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s) +{ + uint32_t o; + + o = InterlockedCompareExchange32(p, s, c); + return (o != c); +} + +JEMALLOC_INLINE void +atomic_write_uint32(uint32_t *p, uint32_t x) +{ + + InterlockedExchange(p, x); +} +#elif (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4) || \ + defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_4)) JEMALLOC_INLINE uint32_t atomic_add_uint32(uint32_t *p, uint32_t x) { @@ -287,10 +482,72 @@ atomic_sub_uint32(uint32_t *p, uint32_t x) return (__sync_sub_and_fetch(p, x)); } + +JEMALLOC_INLINE bool +atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s) +{ + + return (!__sync_bool_compare_and_swap(p, c, s)); +} + +JEMALLOC_INLINE void +atomic_write_uint32(uint32_t *p, uint32_t x) +{ + + __sync_lock_test_and_set(p, x); +} #else # error "Missing implementation for 32-bit atomic operations" #endif +/******************************************************************************/ +/* Pointer operations. */ +JEMALLOC_INLINE void * +atomic_add_p(void **p, void *x) +{ + +#if (LG_SIZEOF_PTR == 3) + return ((void *)atomic_add_uint64((uint64_t *)p, (uint64_t)x)); +#elif (LG_SIZEOF_PTR == 2) + return ((void *)atomic_add_uint32((uint32_t *)p, (uint32_t)x)); +#endif +} + +JEMALLOC_INLINE void * +atomic_sub_p(void **p, void *x) +{ + +#if (LG_SIZEOF_PTR == 3) + return ((void *)atomic_add_uint64((uint64_t *)p, + (uint64_t)-((int64_t)x))); +#elif (LG_SIZEOF_PTR == 2) + return ((void *)atomic_add_uint32((uint32_t *)p, + (uint32_t)-((int32_t)x))); +#endif +} + +JEMALLOC_INLINE bool +atomic_cas_p(void **p, void *c, void *s) +{ + +#if (LG_SIZEOF_PTR == 3) + return (atomic_cas_uint64((uint64_t *)p, (uint64_t)c, (uint64_t)s)); +#elif (LG_SIZEOF_PTR == 2) + return (atomic_cas_uint32((uint32_t *)p, (uint32_t)c, (uint32_t)s)); +#endif +} + +JEMALLOC_INLINE void +atomic_write_p(void **p, void *x) +{ + +#if (LG_SIZEOF_PTR == 3) + atomic_write_uint64((uint64_t *)p, (uint64_t)x); +#elif (LG_SIZEOF_PTR == 2) + atomic_write_uint32((uint32_t *)p, (uint32_t)x); +#endif +} + /******************************************************************************/ /* size_t operations. */ JEMALLOC_INLINE size_t @@ -317,6 +574,28 @@ atomic_sub_z(size_t *p, size_t x) #endif } +JEMALLOC_INLINE bool +atomic_cas_z(size_t *p, size_t c, size_t s) +{ + +#if (LG_SIZEOF_PTR == 3) + return (atomic_cas_uint64((uint64_t *)p, (uint64_t)c, (uint64_t)s)); +#elif (LG_SIZEOF_PTR == 2) + return (atomic_cas_uint32((uint32_t *)p, (uint32_t)c, (uint32_t)s)); +#endif +} + +JEMALLOC_INLINE void +atomic_write_z(size_t *p, size_t x) +{ + +#if (LG_SIZEOF_PTR == 3) + atomic_write_uint64((uint64_t *)p, (uint64_t)x); +#elif (LG_SIZEOF_PTR == 2) + atomic_write_uint32((uint32_t *)p, (uint32_t)x); +#endif +} + /******************************************************************************/ /* unsigned operations. */ JEMALLOC_INLINE unsigned @@ -342,6 +621,29 @@ atomic_sub_u(unsigned *p, unsigned x) (uint32_t)-((int32_t)x))); #endif } + +JEMALLOC_INLINE bool +atomic_cas_u(unsigned *p, unsigned c, unsigned s) +{ + +#if (LG_SIZEOF_INT == 3) + return (atomic_cas_uint64((uint64_t *)p, (uint64_t)c, (uint64_t)s)); +#elif (LG_SIZEOF_INT == 2) + return (atomic_cas_uint32((uint32_t *)p, (uint32_t)c, (uint32_t)s)); +#endif +} + +JEMALLOC_INLINE void +atomic_write_u(unsigned *p, unsigned x) +{ + +#if (LG_SIZEOF_INT == 3) + atomic_write_uint64((uint64_t *)p, (uint64_t)x); +#elif (LG_SIZEOF_INT == 2) + atomic_write_uint32((uint32_t *)p, (uint32_t)x); +#endif +} + /******************************************************************************/ #endif diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index f3fd8262..ba7ab382 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -85,10 +85,17 @@ arena_stats_merge arena_tcache_fill_small arenas_cache_bypass_cleanup arenas_cache_cleanup +atomic_add_p atomic_add_u atomic_add_uint32 atomic_add_uint64 atomic_add_z +atomic_cas_p +atomic_cas_u +atomic_cas_uint32 +atomic_cas_uint64 +atomic_cas_z +atomic_sub_p atomic_sub_u atomic_sub_uint32 atomic_sub_uint64 diff --git a/test/unit/atomic.c b/test/unit/atomic.c index eb6136c7..a774836a 100644 --- a/test/unit/atomic.c +++ b/test/unit/atomic.c @@ -4,48 +4,64 @@ struct p##_test_s { \ t accum0; \ t x; \ + t s; \ }; \ typedef struct p##_test_s p##_test_t; -#define TEST_BODY(p, t, PRI) do { \ +#define TEST_BODY(p, t, tc, ta, PRI) do { \ const p##_test_t tests[] = { \ - {-1, -1}, \ - {-1, 0}, \ - {-1, 1}, \ + {(t)-1, (t)-1, (t)-2}, \ + {(t)-1, (t) 0, (t)-2}, \ + {(t)-1, (t) 1, (t)-2}, \ \ - { 0, -1}, \ - { 0, 0}, \ - { 0, 1}, \ + {(t) 0, (t)-1, (t)-2}, \ + {(t) 0, (t) 0, (t)-2}, \ + {(t) 0, (t) 1, (t)-2}, \ \ - { 1, -1}, \ - { 1, 0}, \ - { 1, 1}, \ + {(t) 1, (t)-1, (t)-2}, \ + {(t) 1, (t) 0, (t)-2}, \ + {(t) 1, (t) 1, (t)-2}, \ \ - {0, -(1 << 22)}, \ - {0, (1 << 22)}, \ - {(1 << 22), -(1 << 22)}, \ - {(1 << 22), (1 << 22)} \ + {(t)0, (t)-(1 << 22), (t)-2}, \ + {(t)0, (t)(1 << 22), (t)-2}, \ + {(t)(1 << 22), (t)-(1 << 22), (t)-2}, \ + {(t)(1 << 22), (t)(1 << 22), (t)-2} \ }; \ unsigned i; \ \ for (i = 0; i < sizeof(tests)/sizeof(p##_test_t); i++) { \ + bool err; \ t accum = tests[i].accum0; \ - assert_u64_eq(atomic_read_##p(&accum), tests[i].accum0, \ - "i=%u", i); \ - assert_u64_eq(atomic_add_##p(&accum, tests[i].x), \ - tests[i].accum0 + tests[i].x, \ - "i=%u, accum=%#"PRI", x=%#"PRI, \ + assert_##ta##_eq(atomic_read_##p(&accum), \ + tests[i].accum0, \ + "Erroneous read, i=%u", i); \ + \ + assert_##ta##_eq(atomic_add_##p(&accum, tests[i].x), \ + (t)((tc)tests[i].accum0 + (tc)tests[i].x), \ + "i=%u, accum=%"PRI", x=%"PRI, \ i, tests[i].accum0, tests[i].x); \ - assert_u64_eq(atomic_read_##p(&accum), accum, \ - "i=%u", i); \ + assert_##ta##_eq(atomic_read_##p(&accum), accum, \ + "Erroneous add, i=%u", i); \ \ accum = tests[i].accum0; \ - assert_u64_eq(atomic_sub_##p(&accum, tests[i].x), \ - tests[i].accum0 - tests[i].x, \ - "i=%u, accum=%#"PRI", x=%#"PRI, \ + assert_##ta##_eq(atomic_sub_##p(&accum, tests[i].x), \ + (t)((tc)tests[i].accum0 - (tc)tests[i].x), \ + "i=%u, accum=%"PRI", x=%"PRI, \ i, tests[i].accum0, tests[i].x); \ - assert_u64_eq(atomic_read_##p(&accum), accum, \ - "i=%u", i); \ + assert_##ta##_eq(atomic_read_##p(&accum), accum, \ + "Erroneous sub, i=%u", i); \ + \ + accum = tests[i].accum0; \ + err = atomic_cas_##p(&accum, tests[i].x, tests[i].s); \ + assert_b_eq(err, tests[i].accum0 != tests[i].x, \ + "Erroneous cas success/failure result"); \ + assert_##ta##_eq(accum, err ? tests[i].accum0 : \ + tests[i].s, "Erroneous cas effect, i=%u", i); \ + \ + accum = tests[i].accum0; \ + atomic_write_##p(&accum, tests[i].s); \ + assert_##ta##_eq(accum, tests[i].s, \ + "Erroneous write, i=%u", i); \ } \ } while (0) @@ -56,7 +72,7 @@ TEST_BEGIN(test_atomic_uint64) #if !(LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3) test_skip("64-bit atomic operations not supported"); #else - TEST_BODY(uint64, uint64_t, PRIx64); + TEST_BODY(uint64, uint64_t, uint64_t, u64, PRIx64); #endif } TEST_END @@ -65,7 +81,15 @@ TEST_STRUCT(uint32, uint32_t) TEST_BEGIN(test_atomic_uint32) { - TEST_BODY(uint32, uint32_t, PRIx32); + TEST_BODY(uint32, uint32_t, uint32_t, u32, "#"PRIx32); +} +TEST_END + +TEST_STRUCT(p, void *) +TEST_BEGIN(test_atomic_p) +{ + + TEST_BODY(p, void *, uintptr_t, ptr, "p"); } TEST_END @@ -73,7 +97,7 @@ TEST_STRUCT(z, size_t) TEST_BEGIN(test_atomic_z) { - TEST_BODY(z, size_t, "zx"); + TEST_BODY(z, size_t, size_t, zu, "#zx"); } TEST_END @@ -81,7 +105,7 @@ TEST_STRUCT(u, unsigned) TEST_BEGIN(test_atomic_u) { - TEST_BODY(u, unsigned, "x"); + TEST_BODY(u, unsigned, unsigned, u, "#x"); } TEST_END @@ -92,6 +116,7 @@ main(void) return (test( test_atomic_uint64, test_atomic_uint32, + test_atomic_p, test_atomic_z, test_atomic_u)); } From 918a1a5b3f09cb456c25be9a2555a8fea6a9bb94 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 30 Jan 2015 21:21:16 -0800 Subject: [PATCH 0629/3378] Reduce extent_node_t size to fit in one cache line. --- include/jemalloc/internal/extent.h | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h index cbfc20a9..f45940c1 100644 --- a/include/jemalloc/internal/extent.h +++ b/include/jemalloc/internal/extent.h @@ -15,9 +15,6 @@ struct extent_node_s { /* Linkage for the address-ordered tree. */ rb_node(extent_node_t) link_ad; - /* Profile counters, used for huge objects. */ - prof_tctx_t *prof_tctx; - /* Pointer to the extent that this tree node is responsible for. */ void *addr; @@ -27,8 +24,17 @@ struct extent_node_s { /* Arena from which this extent came, if any. */ arena_t *arena; - /* True if zero-filled; used by chunk recycling code. */ - bool zeroed; + /* + * 'prof_tctx' and 'zeroed' are never needed at the same time, so + * overlay them in order to fit extent_node_t in one cache line. + */ + union { + /* Profile counters, used for huge objects. */ + prof_tctx_t *prof_tctx; + + /* True if zero-filled; used by chunk recycling code. */ + bool zeroed; + }; }; typedef rb_tree(extent_node_t) extent_tree_t; From f500a10b2e94852b867334703ad77467dcfd2ddd Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 30 Jan 2015 21:49:19 -0800 Subject: [PATCH 0630/3378] Refactor base_alloc() to guarantee demand-zeroed memory. Refactor base_alloc() to guarantee that allocations are carved from demand-zeroed virtual memory. This supports sparse data structures such as multi-page radix tree nodes. Enhance base_alloc() to keep track of fragments which were too small to support previous allocation requests, and try to consume them during subsequent requests. This becomes important when request sizes commonly approach or exceed the chunk size (as could radix tree node allocations). --- include/jemalloc/internal/base.h | 1 - include/jemalloc/internal/private_symbols.txt | 1 - src/base.c | 167 +++++++++++------- src/chunk.c | 17 +- src/mutex.c | 6 +- 5 files changed, 114 insertions(+), 78 deletions(-) diff --git a/include/jemalloc/internal/base.h b/include/jemalloc/internal/base.h index 18b7a72d..a0798ee2 100644 --- a/include/jemalloc/internal/base.h +++ b/include/jemalloc/internal/base.h @@ -10,7 +10,6 @@ #ifdef JEMALLOC_H_EXTERNS void *base_alloc(size_t size); -void *base_calloc(size_t number, size_t size); extent_node_t *base_node_alloc(void); void base_node_dalloc(extent_node_t *node); size_t base_allocated_get(void); diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index ba7ab382..105e6646 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -103,7 +103,6 @@ atomic_sub_z base_alloc base_allocated_get base_boot -base_calloc base_node_alloc base_node_dalloc base_postfork_child diff --git a/src/base.c b/src/base.c index 22f36139..0d1de7fc 100644 --- a/src/base.c +++ b/src/base.c @@ -5,73 +5,117 @@ /* Data. */ static malloc_mutex_t base_mtx; - -/* - * Current pages that are being used for internal memory allocations. These - * pages are carved up in cacheline-size quanta, so that there is no chance of - * false cache line sharing. - */ -static void *base_pages; -static void *base_next_addr; -static void *base_past_addr; /* Addr immediately past base_pages. */ +static extent_tree_t base_avail_szad; static extent_node_t *base_nodes; - static size_t base_allocated; /******************************************************************************/ -static bool -base_pages_alloc(size_t minsize) +static extent_node_t * +base_node_try_alloc_locked(void) { - size_t csize; + extent_node_t *node; - assert(minsize != 0); - csize = CHUNK_CEILING(minsize); - base_pages = chunk_alloc_base(csize); - if (base_pages == NULL) - return (true); - base_next_addr = base_pages; - base_past_addr = (void *)((uintptr_t)base_pages + csize); - - return (false); + if (base_nodes == NULL) + return (NULL); + node = base_nodes; + base_nodes = *(extent_node_t **)node; + JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(node, sizeof(extent_node_t)); + return (node); } +static void +base_node_dalloc_locked(extent_node_t *node) +{ + + JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(node, sizeof(extent_node_t)); + *(extent_node_t **)node = base_nodes; + base_nodes = node; +} + +/* base_mtx must be held. */ +static extent_node_t * +base_chunk_alloc(size_t minsize) +{ + extent_node_t *node; + size_t csize, nsize; + void *addr; + + assert(minsize != 0); + node = base_node_try_alloc_locked(); + /* Allocate enough space to also carve a node out if necessary. */ + nsize = (node == NULL) ? CACHELINE_CEILING(sizeof(extent_node_t)) : 0; + csize = CHUNK_CEILING(minsize + nsize); + addr = chunk_alloc_base(csize); + if (addr == NULL) { + if (node != NULL) + base_node_dalloc_locked(node); + return (NULL); + } + if (node == NULL) { + csize -= nsize; + node = (extent_node_t *)((uintptr_t)addr + csize); + if (config_stats) + base_allocated += nsize; + } + node->addr = addr; + node->size = csize; + return (node); +} + +static void * +base_alloc_locked(size_t size) +{ + void *ret; + size_t csize; + extent_node_t *node; + extent_node_t key; + + /* + * Round size up to nearest multiple of the cacheline size, so that + * there is no chance of false cache line sharing. + */ + csize = CACHELINE_CEILING(size); + + key.addr = NULL; + key.size = csize; + node = extent_tree_szad_nsearch(&base_avail_szad, &key); + if (node != NULL) { + /* Use existing space. */ + extent_tree_szad_remove(&base_avail_szad, node); + } else { + /* Try to allocate more space. */ + node = base_chunk_alloc(csize); + } + if (node == NULL) + return (NULL); + + ret = node->addr; + if (node->size > csize) { + node->addr = (void *)((uintptr_t)ret + csize); + node->size -= csize; + extent_tree_szad_insert(&base_avail_szad, node); + } else + base_node_dalloc_locked(node); + if (config_stats) + base_allocated += csize; + JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, csize); + return (ret); +} + +/* + * base_alloc() guarantees demand-zeroed memory, in order to make multi-page + * sparse data structures such as radix tree nodes efficient with respect to + * physical memory usage. + */ void * base_alloc(size_t size) { void *ret; - size_t csize; - - /* Round size up to nearest multiple of the cacheline size. */ - csize = CACHELINE_CEILING(size); malloc_mutex_lock(&base_mtx); - /* Make sure there's enough space for the allocation. */ - if ((uintptr_t)base_next_addr + csize > (uintptr_t)base_past_addr) { - if (base_pages_alloc(csize)) { - malloc_mutex_unlock(&base_mtx); - return (NULL); - } - } - /* Allocate. */ - ret = base_next_addr; - base_next_addr = (void *)((uintptr_t)base_next_addr + csize); - if (config_stats) - base_allocated += csize; + ret = base_alloc_locked(size); malloc_mutex_unlock(&base_mtx); - JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, csize); - - return (ret); -} - -void * -base_calloc(size_t number, size_t size) -{ - void *ret = base_alloc(number * size); - - if (ret != NULL) - memset(ret, 0, number * size); - return (ret); } @@ -81,17 +125,9 @@ base_node_alloc(void) extent_node_t *ret; malloc_mutex_lock(&base_mtx); - if (base_nodes != NULL) { - ret = base_nodes; - base_nodes = *(extent_node_t **)ret; - malloc_mutex_unlock(&base_mtx); - JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, - sizeof(extent_node_t)); - } else { - malloc_mutex_unlock(&base_mtx); - ret = (extent_node_t *)base_alloc(sizeof(extent_node_t)); - } - + if ((ret = base_node_try_alloc_locked()) == NULL) + ret = (extent_node_t *)base_alloc_locked(sizeof(extent_node_t)); + malloc_mutex_unlock(&base_mtx); return (ret); } @@ -99,10 +135,8 @@ void base_node_dalloc(extent_node_t *node) { - JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(node, sizeof(extent_node_t)); malloc_mutex_lock(&base_mtx); - *(extent_node_t **)node = base_nodes; - base_nodes = node; + base_node_dalloc_locked(node); malloc_mutex_unlock(&base_mtx); } @@ -121,9 +155,10 @@ bool base_boot(void) { - base_nodes = NULL; if (malloc_mutex_init(&base_mtx)) return (true); + extent_tree_szad_new(&base_avail_szad); + base_nodes = NULL; return (false); } diff --git a/src/chunk.c b/src/chunk.c index a3ae548a..01180a71 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -232,15 +232,18 @@ chunk_alloc_base(size_t size) void *ret; bool zero; - zero = false; - ret = chunk_alloc_core(NULL, size, chunksize, true, &zero, - chunk_dss_prec_get()); - if (ret == NULL) - return (NULL); - if (chunk_register(ret, size, true)) { + /* + * Directly call chunk_alloc_mmap() rather than chunk_alloc_core() + * because it's critical that chunk_alloc_base() return untouched + * demand-zeroed virtual memory. + */ + zero = true; + ret = chunk_alloc_mmap(size, chunksize, &zero); + if (ret != NULL && chunk_register(ret, size, true)) { chunk_dalloc_core(ret, size); - return (NULL); + ret = NULL; } + return (ret); } diff --git a/src/mutex.c b/src/mutex.c index 788eca38..d86887ee 100644 --- a/src/mutex.c +++ b/src/mutex.c @@ -83,8 +83,8 @@ malloc_mutex_init(malloc_mutex_t *mutex) mutex->postponed_next = postponed_mutexes; postponed_mutexes = mutex; } else { - if (_pthread_mutex_init_calloc_cb(&mutex->lock, base_calloc) != - 0) + if (_pthread_mutex_init_calloc_cb(&mutex->lock, + bootstrap_calloc) != 0) return (true); } #else @@ -140,7 +140,7 @@ mutex_boot(void) postpone_init = false; while (postponed_mutexes != NULL) { if (_pthread_mutex_init_calloc_cb(&postponed_mutexes->lock, - base_calloc) != 0) + bootstrap_calloc) != 0) return (true); postponed_mutexes = postponed_mutexes->postponed_next; } From c810fcea1fa7983ef5bcabe6556cdc19dde6dd8d Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 4 Feb 2015 16:41:55 -0800 Subject: [PATCH 0631/3378] Add (x != 0) assertion to lg_floor(x). lg_floor(0) is undefined, but depending on compiler options may not cause a crash. This assertion makes it harder to accidentally abuse lg_floor(). --- include/jemalloc/internal/util.h | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h index b2b4ab74..5ad4933d 100644 --- a/include/jemalloc/internal/util.h +++ b/include/jemalloc/internal/util.h @@ -136,14 +136,14 @@ JEMALLOC_ALWAYS_INLINE int jemalloc_ffsl(long bitmap) { - return (JEMALLOC_INTERNAL_FFSL(bitmap)); + return (JEMALLOC_INTERNAL_FFSL(bitmap)); } JEMALLOC_ALWAYS_INLINE int jemalloc_ffs(int bitmap) { - return (JEMALLOC_INTERNAL_FFS(bitmap)); + return (JEMALLOC_INTERNAL_FFS(bitmap)); } /* Compute the smallest power of 2 that is >= x. */ @@ -170,6 +170,8 @@ lg_floor(size_t x) { size_t ret; + assert(x != 0); + asm ("bsr %1, %0" : "=r"(ret) // Outputs. : "r"(x) // Inputs. @@ -180,22 +182,26 @@ lg_floor(size_t x) JEMALLOC_INLINE size_t lg_floor(size_t x) { - unsigned long ret; + unsigned long ret; + + assert(x != 0); #if (LG_SIZEOF_PTR == 3) - _BitScanReverse64(&ret, x); + _BitScanReverse64(&ret, x); #elif (LG_SIZEOF_PTR == 2) - _BitScanReverse(&ret, x); + _BitScanReverse(&ret, x); #else # error "Unsupported type sizes for lg_floor()" #endif - return (ret); + return (ret); } #elif (defined(JEMALLOC_HAVE_BUILTIN_CLZ)) JEMALLOC_INLINE size_t lg_floor(size_t x) { + assert(x != 0); + #if (LG_SIZEOF_PTR == LG_SIZEOF_INT) return (((8 << LG_SIZEOF_PTR) - 1) - __builtin_clz(x)); #elif (LG_SIZEOF_PTR == LG_SIZEOF_LONG) @@ -209,6 +215,8 @@ JEMALLOC_INLINE size_t lg_floor(size_t x) { + assert(x != 0); + x |= (x >> 1); x |= (x >> 2); x |= (x >> 4); From 8d0e04d42f4750970ac3052a6c76379b60aba5dc Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 30 Jan 2015 22:54:08 -0800 Subject: [PATCH 0632/3378] Refactor rtree to be lock-free. Recent huge allocation refactoring associates huge allocations with arenas, but it remains necessary to quickly look up huge allocation metadata during reallocation/deallocation. A global radix tree remains a good solution to this problem, but locking would have become the primary bottleneck after (upcoming) migration of chunk management from global to per arena data structures. This lock-free implementation uses double-checked reads to traverse the tree, so that in the steady state, each read or write requires only a single atomic operation. This implementation also assures that no more than two tree levels actually exist, through a combination of careful virtual memory allocation which makes large sparse nodes cheap, and skipping the root node on x64 (possible because the top 16 bits are all 0 in practice). --- include/jemalloc/internal/chunk.h | 2 +- .../jemalloc/internal/jemalloc_internal.h.in | 2 +- include/jemalloc/internal/private_symbols.txt | 15 +- include/jemalloc/internal/rtree.h | 342 +++++++++++------- src/chunk.c | 25 +- src/rtree.c | 150 ++++---- test/unit/rtree.c | 77 ++-- 7 files changed, 384 insertions(+), 229 deletions(-) diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h index 764b7aca..62ac3e73 100644 --- a/include/jemalloc/internal/chunk.h +++ b/include/jemalloc/internal/chunk.h @@ -35,7 +35,7 @@ extern malloc_mutex_t chunks_mtx; /* Chunk statistics. */ extern chunk_stats_t stats_chunks; -extern rtree_t *chunks_rtree; +extern rtree_t chunks_rtree; extern size_t chunksize; extern size_t chunksize_mask; /* (chunksize - 1). */ diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 79a23e5e..280501df 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -955,7 +955,7 @@ ivsalloc(const void *ptr, bool demote) { /* Return 0 if ptr is not within a chunk managed by jemalloc. */ - if (rtree_get(chunks_rtree, (uintptr_t)CHUNK_ADDR2BASE(ptr)) == 0) + if (rtree_get(&chunks_rtree, (uintptr_t)CHUNK_ADDR2BASE(ptr)) == 0) return (0); return (isalloc(ptr, demote)); diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index 105e6646..7a78f580 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -369,14 +369,21 @@ quarantine_alloc_hook quarantine_alloc_hook_work quarantine_cleanup register_zone +rtree_child_read +rtree_child_read_hard +rtree_child_tryread rtree_delete rtree_get -rtree_get_locked rtree_new -rtree_postfork_child -rtree_postfork_parent -rtree_prefork +rtree_node_valid rtree_set +rtree_start_level +rtree_subkey +rtree_subtree_read +rtree_subtree_read_hard +rtree_subtree_tryread +rtree_val_read +rtree_val_write s2u s2u_compute s2u_lookup diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h index bc74769f..e86e17c4 100644 --- a/include/jemalloc/internal/rtree.h +++ b/include/jemalloc/internal/rtree.h @@ -1,170 +1,270 @@ /* * This radix tree implementation is tailored to the singular purpose of - * tracking which chunks are currently owned by jemalloc. This functionality - * is mandatory for OS X, where jemalloc must be able to respond to object - * ownership queries. + * associating metadata with chunks that are currently owned by jemalloc. * ******************************************************************************* */ #ifdef JEMALLOC_H_TYPES +typedef struct rtree_node_elm_s rtree_node_elm_t; +typedef struct rtree_level_s rtree_level_t; typedef struct rtree_s rtree_t; /* - * Size of each radix tree node (must be a power of 2). This impacts tree - * depth. + * RTREE_BITS_PER_LEVEL must be a power of two that is no larger than the + * machine address width. */ -#define RTREE_NODESIZE (1U << 16) +#define LG_RTREE_BITS_PER_LEVEL 4 +#define RTREE_BITS_PER_LEVEL (ZU(1) << LG_RTREE_BITS_PER_LEVEL) +#define RTREE_HEIGHT_MAX \ + ((ZU(1) << (LG_SIZEOF_PTR+3)) / RTREE_BITS_PER_LEVEL) -typedef void *(rtree_alloc_t)(size_t); -typedef void (rtree_dalloc_t)(void *); +/* Used for two-stage lock-free node initialization. */ +#define RTREE_NODE_INITIALIZING ((rtree_node_elm_t *)0x1) + +/* + * The node allocation callback function's argument is the number of contiguous + * rtree_node_elm_t structures to allocate, and the resulting memory must be + * zeroed. + */ +typedef rtree_node_elm_t *(rtree_node_alloc_t)(size_t); +typedef void (rtree_node_dalloc_t)(rtree_node_elm_t *); #endif /* JEMALLOC_H_TYPES */ /******************************************************************************/ #ifdef JEMALLOC_H_STRUCTS +struct rtree_node_elm_s { + union { + rtree_node_elm_t *child; + void *val; + }; +}; + +struct rtree_level_s { + /* + * A non-NULL subtree points to a subtree rooted along the hypothetical + * path to the leaf node corresponding to key 0. Depending on what keys + * have been used to store to the tree, an arbitrary combination of + * subtree pointers may remain NULL. + * + * Suppose keys comprise 48 bits, and LG_RTREE_BITS_PER_LEVEL is 4. + * This results in a 3-level tree, and the leftmost leaf can be directly + * accessed via subtrees[2], the subtree prefixed by 0x0000 (excluding + * 0x00000000) can be accessed via subtrees[1], and the remainder of the + * tree can be accessed via subtrees[0]. + * + * levels[0] : [ | 0x0001******** | 0x0002******** | ...] + * + * levels[1] : [ | 0x00000001**** | 0x00000002**** | ... ] + * + * levels[2] : [val(0x000000000000) | val(0x000000000001) | ...] + * + * This has practical implications on x64, which currently uses only the + * lower 47 bits of virtual address space in userland, thus leaving + * subtrees[0] unused and avoiding a level of tree traversal. + */ + rtree_node_elm_t *subtree; + /* Number of key bits distinguished by this level. */ + unsigned bits; + /* + * Cumulative number of key bits distinguished by traversing to + * corresponding tree level. + */ + unsigned cumbits; +}; + struct rtree_s { - rtree_alloc_t *alloc; - rtree_dalloc_t *dalloc; - malloc_mutex_t mutex; - void **root; - unsigned height; - unsigned level2bits[1]; /* Dynamically sized. */ + rtree_node_alloc_t *alloc; + rtree_node_dalloc_t *dalloc; + unsigned height; + /* + * Precomputed table used to convert from the number of leading 0 key + * bits to which subtree level to start at. + */ + unsigned start_level[RTREE_HEIGHT_MAX]; + rtree_level_t levels[RTREE_HEIGHT_MAX]; }; #endif /* JEMALLOC_H_STRUCTS */ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -rtree_t *rtree_new(unsigned bits, rtree_alloc_t *alloc, rtree_dalloc_t *dalloc); +bool rtree_new(rtree_t *rtree, unsigned bits, rtree_node_alloc_t *alloc, + rtree_node_dalloc_t *dalloc); void rtree_delete(rtree_t *rtree); -void rtree_prefork(rtree_t *rtree); -void rtree_postfork_parent(rtree_t *rtree); -void rtree_postfork_child(rtree_t *rtree); +rtree_node_elm_t *rtree_subtree_read_hard(rtree_t *rtree, + unsigned level); +rtree_node_elm_t *rtree_child_read_hard(rtree_t *rtree, + rtree_node_elm_t *elm, unsigned level); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ #ifdef JEMALLOC_H_INLINES #ifndef JEMALLOC_ENABLE_INLINE -#ifdef JEMALLOC_DEBUG -uint8_t rtree_get_locked(rtree_t *rtree, uintptr_t key); -#endif -uint8_t rtree_get(rtree_t *rtree, uintptr_t key); -bool rtree_set(rtree_t *rtree, uintptr_t key, uint8_t val); +unsigned rtree_start_level(rtree_t *rtree, uintptr_t key); +uintptr_t rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level); + +bool rtree_node_valid(rtree_node_elm_t *node); +rtree_node_elm_t *rtree_child_tryread(rtree_node_elm_t *elm); +rtree_node_elm_t *rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm, + unsigned level); +void *rtree_val_read(rtree_t *rtree, rtree_node_elm_t *elm); +void rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm, void *val); +rtree_node_elm_t *rtree_subtree_tryread(rtree_t *rtree, unsigned level); +rtree_node_elm_t *rtree_subtree_read(rtree_t *rtree, unsigned level); + +void *rtree_get(rtree_t *rtree, uintptr_t key); +bool rtree_set(rtree_t *rtree, uintptr_t key, void *val); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_RTREE_C_)) -#define RTREE_GET_GENERATE(f) \ -/* The least significant bits of the key are ignored. */ \ -JEMALLOC_INLINE uint8_t \ -f(rtree_t *rtree, uintptr_t key) \ -{ \ - uint8_t ret; \ - uintptr_t subkey; \ - unsigned i, lshift, height, bits; \ - void **node, **child; \ - \ - RTREE_LOCK(&rtree->mutex); \ - for (i = lshift = 0, height = rtree->height, node = rtree->root;\ - i < height - 1; \ - i++, lshift += bits, node = child) { \ - bits = rtree->level2bits[i]; \ - subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR + \ - 3)) - bits); \ - child = (void**)node[subkey]; \ - if (child == NULL) { \ - RTREE_UNLOCK(&rtree->mutex); \ - return (0); \ - } \ - } \ - \ - /* \ - * node is a leaf, so it contains values rather than node \ - * pointers. \ - */ \ - bits = rtree->level2bits[i]; \ - subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - \ - bits); \ - { \ - uint8_t *leaf = (uint8_t *)node; \ - ret = leaf[subkey]; \ - } \ - RTREE_UNLOCK(&rtree->mutex); \ - \ - RTREE_GET_VALIDATE \ - return (ret); \ +JEMALLOC_INLINE unsigned +rtree_start_level(rtree_t *rtree, uintptr_t key) +{ + unsigned start_level; + + if (unlikely(key == 0)) + return (rtree->height - 1); + + start_level = rtree->start_level[lg_floor(key) >> + LG_RTREE_BITS_PER_LEVEL]; + assert(start_level < rtree->height); + return (start_level); } -#ifdef JEMALLOC_DEBUG -# define RTREE_LOCK(l) malloc_mutex_lock(l) -# define RTREE_UNLOCK(l) malloc_mutex_unlock(l) -# define RTREE_GET_VALIDATE -RTREE_GET_GENERATE(rtree_get_locked) -# undef RTREE_LOCK -# undef RTREE_UNLOCK -# undef RTREE_GET_VALIDATE -#endif +JEMALLOC_INLINE uintptr_t +rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level) +{ -#define RTREE_LOCK(l) -#define RTREE_UNLOCK(l) -#ifdef JEMALLOC_DEBUG - /* - * Suppose that it were possible for a jemalloc-allocated chunk to be - * munmap()ped, followed by a different allocator in another thread re-using - * overlapping virtual memory, all without invalidating the cached rtree - * value. The result would be a false positive (the rtree would claim that - * jemalloc owns memory that it had actually discarded). This scenario - * seems impossible, but the following assertion is a prudent sanity check. - */ -# define RTREE_GET_VALIDATE \ - assert(rtree_get_locked(rtree, key) == ret); -#else -# define RTREE_GET_VALIDATE -#endif -RTREE_GET_GENERATE(rtree_get) -#undef RTREE_LOCK -#undef RTREE_UNLOCK -#undef RTREE_GET_VALIDATE + return ((key >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - + rtree->levels[level].cumbits)) & ((ZU(1) << + rtree->levels[level].bits) - 1)); +} JEMALLOC_INLINE bool -rtree_set(rtree_t *rtree, uintptr_t key, uint8_t val) +rtree_node_valid(rtree_node_elm_t *node) +{ + + return ((uintptr_t)node > (uintptr_t)RTREE_NODE_INITIALIZING); +} + +JEMALLOC_INLINE rtree_node_elm_t * +rtree_child_tryread(rtree_node_elm_t *elm) +{ + rtree_node_elm_t *child; + + /* Double-checked read (first read may be stale. */ + child = elm->child; + if (!rtree_node_valid(child)) + child = atomic_read_p((void **)&elm->child); + return (child); +} + +JEMALLOC_INLINE rtree_node_elm_t * +rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm, unsigned level) +{ + rtree_node_elm_t *child; + + child = rtree_child_tryread(elm); + if (unlikely(!rtree_node_valid(child))) + child = rtree_child_read_hard(rtree, elm, level); + return (child); +} + +JEMALLOC_INLINE void * +rtree_val_read(rtree_t *rtree, rtree_node_elm_t *elm) +{ + + return (atomic_read_p(&elm->val)); +} + +JEMALLOC_INLINE void +rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm, void *val) +{ + + atomic_write_p(&elm->val, val); +} + +JEMALLOC_INLINE rtree_node_elm_t * +rtree_subtree_tryread(rtree_t *rtree, unsigned level) +{ + rtree_node_elm_t *subtree; + + /* Double-checked read (first read may be stale. */ + subtree = rtree->levels[level].subtree; + if (!rtree_node_valid(subtree)) + subtree = atomic_read_p((void **)&rtree->levels[level].subtree); + return (subtree); +} + +JEMALLOC_INLINE rtree_node_elm_t * +rtree_subtree_read(rtree_t *rtree, unsigned level) +{ + rtree_node_elm_t *subtree; + + subtree = rtree_subtree_tryread(rtree, level); + if (unlikely(!rtree_node_valid(subtree))) + subtree = rtree_subtree_read_hard(rtree, level); + return (subtree); +} + +JEMALLOC_INLINE void * +rtree_get(rtree_t *rtree, uintptr_t key) { uintptr_t subkey; - unsigned i, lshift, height, bits; - void **node, **child; + unsigned i, start_level; + rtree_node_elm_t *node, *child; - malloc_mutex_lock(&rtree->mutex); - for (i = lshift = 0, height = rtree->height, node = rtree->root; - i < height - 1; - i++, lshift += bits, node = child) { - bits = rtree->level2bits[i]; - subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - - bits); - child = (void**)node[subkey]; - if (child == NULL) { - size_t size = ((i + 1 < height - 1) ? sizeof(void *) - : (sizeof(uint8_t))) << rtree->level2bits[i+1]; - child = (void**)rtree->alloc(size); - if (child == NULL) { - malloc_mutex_unlock(&rtree->mutex); - return (true); - } - memset(child, 0, size); - node[subkey] = child; + start_level = rtree_start_level(rtree, key); + + for (i = start_level, node = rtree_subtree_tryread(rtree, start_level); + /**/; i++, node = child) { + if (unlikely(!rtree_node_valid(node))) + return (NULL); + subkey = rtree_subkey(rtree, key, i); + if (i == rtree->height - 1) { + /* + * node is a leaf, so it contains values rather than + * child pointers. + */ + return (rtree_val_read(rtree, &node[subkey])); } + assert(i < rtree->height - 1); + child = rtree_child_tryread(&node[subkey]); } + not_reached(); +} - /* node is a leaf, so it contains values rather than node pointers. */ - bits = rtree->level2bits[i]; - subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - bits); - { - uint8_t *leaf = (uint8_t *)node; - leaf[subkey] = val; +JEMALLOC_INLINE bool +rtree_set(rtree_t *rtree, uintptr_t key, void *val) +{ + uintptr_t subkey; + unsigned i, start_level; + rtree_node_elm_t *node, *child; + + start_level = rtree_start_level(rtree, key); + + node = rtree_subtree_read(rtree, start_level); + if (node == NULL) + return (true); + for (i = start_level; /**/; i++, node = child) { + subkey = rtree_subkey(rtree, key, i); + if (i == rtree->height - 1) { + /* + * node is a leaf, so it contains values rather than + * child pointers. + */ + rtree_val_write(rtree, &node[subkey], val); + return (false); + } + assert(i < rtree->height - 1); + child = rtree_child_read(rtree, &node[subkey], i); + if (child == NULL) + return (true); } - malloc_mutex_unlock(&rtree->mutex); - - return (false); + not_reached(); } #endif diff --git a/src/chunk.c b/src/chunk.c index 01180a71..9ba0b0cf 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -21,7 +21,7 @@ static extent_tree_t chunks_ad_mmap; static extent_tree_t chunks_szad_dss; static extent_tree_t chunks_ad_dss; -rtree_t *chunks_rtree; +rtree_t chunks_rtree; /* Various chunk-related settings. */ size_t chunksize; @@ -200,7 +200,7 @@ chunk_register(void *chunk, size_t size, bool base) assert(CHUNK_ADDR2BASE(chunk) == chunk); if (config_ivsalloc && !base) { - if (rtree_set(chunks_rtree, (uintptr_t)chunk, 1)) + if (rtree_set(&chunks_rtree, (uintptr_t)chunk, chunk)) return (true); } if (config_stats || config_prof) { @@ -395,7 +395,7 @@ chunk_dalloc_core(void *chunk, size_t size) assert((size & chunksize_mask) == 0); if (config_ivsalloc) - rtree_set(chunks_rtree, (uintptr_t)chunk, 0); + rtree_set(&chunks_rtree, (uintptr_t)chunk, NULL); if (config_stats || config_prof) { malloc_mutex_lock(&chunks_mtx); assert(stats_chunks.curchunks >= (size / chunksize)); @@ -415,6 +415,14 @@ chunk_dalloc_default(void *chunk, size_t size, unsigned arena_ind) return (false); } +static rtree_node_elm_t * +chunks_rtree_node_alloc(size_t nelms) +{ + + return ((rtree_node_elm_t *)base_alloc(nelms * + sizeof(rtree_node_elm_t))); +} + bool chunk_boot(void) { @@ -436,9 +444,8 @@ chunk_boot(void) extent_tree_szad_new(&chunks_szad_dss); extent_tree_ad_new(&chunks_ad_dss); if (config_ivsalloc) { - chunks_rtree = rtree_new((ZU(1) << (LG_SIZEOF_PTR+3)) - - opt_lg_chunk, base_alloc, NULL); - if (chunks_rtree == NULL) + if (rtree_new(&chunks_rtree, (ZU(1) << (LG_SIZEOF_PTR+3)) - + opt_lg_chunk, chunks_rtree_node_alloc, NULL)) return (true); } @@ -450,8 +457,6 @@ chunk_prefork(void) { malloc_mutex_prefork(&chunks_mtx); - if (config_ivsalloc) - rtree_prefork(chunks_rtree); chunk_dss_prefork(); } @@ -460,8 +465,6 @@ chunk_postfork_parent(void) { chunk_dss_postfork_parent(); - if (config_ivsalloc) - rtree_postfork_parent(chunks_rtree); malloc_mutex_postfork_parent(&chunks_mtx); } @@ -470,7 +473,5 @@ chunk_postfork_child(void) { chunk_dss_postfork_child(); - if (config_ivsalloc) - rtree_postfork_child(chunks_rtree); malloc_mutex_postfork_child(&chunks_mtx); } diff --git a/src/rtree.c b/src/rtree.c index 2ff93dbe..47d9084e 100644 --- a/src/rtree.c +++ b/src/rtree.c @@ -1,75 +1,74 @@ #define JEMALLOC_RTREE_C_ #include "jemalloc/internal/jemalloc_internal.h" -rtree_t * -rtree_new(unsigned bits, rtree_alloc_t *alloc, rtree_dalloc_t *dalloc) +static unsigned +hmin(unsigned ha, unsigned hb) { - rtree_t *ret; - unsigned bits_per_level, bits_in_leaf, height, i; + + return (ha < hb ? ha : hb); +} + +/* Only the most significant bits of keys passed to rtree_[gs]et() are used. */ +bool +rtree_new(rtree_t *rtree, unsigned bits, rtree_node_alloc_t *alloc, + rtree_node_dalloc_t *dalloc) +{ + unsigned bits_in_leaf, height, i; assert(bits > 0 && bits <= (sizeof(uintptr_t) << 3)); - bits_per_level = jemalloc_ffs(pow2_ceil((RTREE_NODESIZE / sizeof(void - *)))) - 1; - bits_in_leaf = jemalloc_ffs(pow2_ceil((RTREE_NODESIZE / - sizeof(uint8_t)))) - 1; + bits_in_leaf = (bits % RTREE_BITS_PER_LEVEL) == 0 ? RTREE_BITS_PER_LEVEL + : (bits % RTREE_BITS_PER_LEVEL); if (bits > bits_in_leaf) { - height = 1 + (bits - bits_in_leaf) / bits_per_level; - if ((height-1) * bits_per_level + bits_in_leaf != bits) + height = 1 + (bits - bits_in_leaf) / RTREE_BITS_PER_LEVEL; + if ((height-1) * RTREE_BITS_PER_LEVEL + bits_in_leaf != bits) height++; - } else { - height = 1; - } - assert((height-1) * bits_per_level + bits_in_leaf >= bits); - - ret = (rtree_t*)alloc(offsetof(rtree_t, level2bits) + - (sizeof(unsigned) * height)); - if (ret == NULL) - return (NULL); - memset(ret, 0, offsetof(rtree_t, level2bits) + (sizeof(unsigned) * - height)); - - ret->alloc = alloc; - ret->dalloc = dalloc; - if (malloc_mutex_init(&ret->mutex)) { - if (dalloc != NULL) - dalloc(ret); - return (NULL); - } - ret->height = height; - if (height > 1) { - if ((height-1) * bits_per_level + bits_in_leaf > bits) { - ret->level2bits[0] = (bits - bits_in_leaf) % - bits_per_level; - } else - ret->level2bits[0] = bits_per_level; - for (i = 1; i < height-1; i++) - ret->level2bits[i] = bits_per_level; - ret->level2bits[height-1] = bits_in_leaf; } else - ret->level2bits[0] = bits; + height = 1; + assert((height-1) * RTREE_BITS_PER_LEVEL + bits_in_leaf == bits); - ret->root = (void**)alloc(sizeof(void *) << ret->level2bits[0]); - if (ret->root == NULL) { - if (dalloc != NULL) - dalloc(ret); - return (NULL); + rtree->alloc = alloc; + rtree->dalloc = dalloc; + rtree->height = height; + + /* Root level. */ + rtree->levels[0].subtree = NULL; + rtree->levels[0].bits = (height > 1) ? RTREE_BITS_PER_LEVEL : + bits_in_leaf; + rtree->levels[0].cumbits = rtree->levels[0].bits; + /* Interior levels. */ + for (i = 1; i < height-1; i++) { + rtree->levels[i].subtree = NULL; + rtree->levels[i].bits = RTREE_BITS_PER_LEVEL; + rtree->levels[i].cumbits = rtree->levels[i-1].cumbits + + RTREE_BITS_PER_LEVEL; + } + /* Leaf level. */ + if (height > 1) { + rtree->levels[height-1].subtree = NULL; + rtree->levels[height-1].bits = bits_in_leaf; + rtree->levels[height-1].cumbits = bits; } - memset(ret->root, 0, sizeof(void *) << ret->level2bits[0]); - return (ret); + /* Compute lookup table to be used by rtree_start_level(). */ + for (i = 0; i < RTREE_HEIGHT_MAX; i++) { + rtree->start_level[i] = hmin(RTREE_HEIGHT_MAX - 1 - i, height - + 1); + } + + return (false); } static void -rtree_delete_subtree(rtree_t *rtree, void **node, unsigned level) +rtree_delete_subtree(rtree_t *rtree, rtree_node_elm_t *node, unsigned level) { if (level < rtree->height - 1) { size_t nchildren, i; - nchildren = ZU(1) << rtree->level2bits[level]; + nchildren = ZU(1) << rtree->levels[level].bits; for (i = 0; i < nchildren; i++) { - void **child = (void **)node[i]; + rtree_node_elm_t *child = node[i].child; if (child != NULL) rtree_delete_subtree(rtree, child, level + 1); } @@ -80,28 +79,49 @@ rtree_delete_subtree(rtree_t *rtree, void **node, unsigned level) void rtree_delete(rtree_t *rtree) { + unsigned i; - rtree_delete_subtree(rtree, rtree->root, 0); - rtree->dalloc(rtree); + for (i = 0; i < rtree->height; i++) { + rtree_node_elm_t *subtree = rtree->levels[i].subtree; + if (subtree != NULL) + rtree_delete_subtree(rtree, subtree, i); + } } -void -rtree_prefork(rtree_t *rtree) +static rtree_node_elm_t * +rtree_node_init(rtree_t *rtree, unsigned level, rtree_node_elm_t **elmp) +{ + rtree_node_elm_t *node; + + if (atomic_cas_p((void **)elmp, NULL, RTREE_NODE_INITIALIZING)) { + /* + * Another thread is already in the process of initializing. + * Spin-wait until initialization is complete. + */ + do { + CPU_SPINWAIT; + node = atomic_read_p((void **)elmp); + } while (node == RTREE_NODE_INITIALIZING); + } else { + node = rtree->alloc(ZU(1) << rtree->levels[level].bits); + if (node == NULL) + return (NULL); + atomic_write_p((void **)elmp, node); + } + + return (node); +} + +rtree_node_elm_t * +rtree_subtree_read_hard(rtree_t *rtree, unsigned level) { - malloc_mutex_prefork(&rtree->mutex); + return (rtree_node_init(rtree, level, &rtree->levels[level].subtree)); } -void -rtree_postfork_parent(rtree_t *rtree) +rtree_node_elm_t * +rtree_child_read_hard(rtree_t *rtree, rtree_node_elm_t *elm, unsigned level) { - malloc_mutex_postfork_parent(&rtree->mutex); -} - -void -rtree_postfork_child(rtree_t *rtree) -{ - - malloc_mutex_postfork_child(&rtree->mutex); + return (rtree_node_init(rtree, level, &elm->child)); } diff --git a/test/unit/rtree.c b/test/unit/rtree.c index 77a947d6..556c4a87 100644 --- a/test/unit/rtree.c +++ b/test/unit/rtree.c @@ -1,14 +1,30 @@ #include "test/jemalloc_test.h" +static rtree_node_elm_t * +node_alloc(size_t nelms) +{ + + return (calloc(nelms, sizeof(rtree_node_elm_t))); +} + +static void +node_dalloc(rtree_node_elm_t *node) +{ + + free(node); +} + TEST_BEGIN(test_rtree_get_empty) { unsigned i; for (i = 1; i <= (sizeof(uintptr_t) << 3); i++) { - rtree_t *rtree = rtree_new(i, malloc, free); - assert_u_eq(rtree_get(rtree, 0), 0, + rtree_t rtree; + assert_false(rtree_new(&rtree, i, node_alloc, node_dalloc), + "Unexpected rtree_new() failure"); + assert_ptr_eq(rtree_get(&rtree, 0), NULL, "rtree_get() should return NULL for empty tree"); - rtree_delete(rtree); + rtree_delete(&rtree); } } TEST_END @@ -16,19 +32,22 @@ TEST_END TEST_BEGIN(test_rtree_extrema) { unsigned i; + extent_node_t node_a, node_b; for (i = 1; i <= (sizeof(uintptr_t) << 3); i++) { - rtree_t *rtree = rtree_new(i, malloc, free); + rtree_t rtree; + assert_false(rtree_new(&rtree, i, node_alloc, node_dalloc), + "Unexpected rtree_new() failure"); - rtree_set(rtree, 0, 1); - assert_u_eq(rtree_get(rtree, 0), 1, + rtree_set(&rtree, 0, &node_a); + assert_ptr_eq(rtree_get(&rtree, 0), &node_a, "rtree_get() should return previously set value"); - rtree_set(rtree, ~((uintptr_t)0), 1); - assert_u_eq(rtree_get(rtree, ~((uintptr_t)0)), 1, + rtree_set(&rtree, ~((uintptr_t)0), &node_b); + assert_ptr_eq(rtree_get(&rtree, ~((uintptr_t)0)), &node_b, "rtree_get() should return previously set value"); - rtree_delete(rtree); + rtree_delete(&rtree); } } TEST_END @@ -40,26 +59,30 @@ TEST_BEGIN(test_rtree_bits) for (i = 1; i < (sizeof(uintptr_t) << 3); i++) { uintptr_t keys[] = {0, 1, (((uintptr_t)1) << (sizeof(uintptr_t)*8-i)) - 1}; - rtree_t *rtree = rtree_new(i, malloc, free); + extent_node_t node; + rtree_t rtree; + + assert_false(rtree_new(&rtree, i, node_alloc, node_dalloc), + "Unexpected rtree_new() failure"); for (j = 0; j < sizeof(keys)/sizeof(uintptr_t); j++) { - rtree_set(rtree, keys[j], 1); + rtree_set(&rtree, keys[j], &node); for (k = 0; k < sizeof(keys)/sizeof(uintptr_t); k++) { - assert_u_eq(rtree_get(rtree, keys[k]), 1, + assert_ptr_eq(rtree_get(&rtree, keys[k]), &node, "rtree_get() should return previously set " "value and ignore insignificant key bits; " "i=%u, j=%u, k=%u, set key=%#"PRIxPTR", " "get key=%#"PRIxPTR, i, j, k, keys[j], keys[k]); } - assert_u_eq(rtree_get(rtree, - (((uintptr_t)1) << (sizeof(uintptr_t)*8-i))), 0, + assert_ptr_eq(rtree_get(&rtree, + (((uintptr_t)1) << (sizeof(uintptr_t)*8-i))), NULL, "Only leftmost rtree leaf should be set; " "i=%u, j=%u", i, j); - rtree_set(rtree, keys[j], 0); + rtree_set(&rtree, keys[j], NULL); } - rtree_delete(rtree); + rtree_delete(&rtree); } } TEST_END @@ -68,37 +91,41 @@ TEST_BEGIN(test_rtree_random) { unsigned i; sfmt_t *sfmt; -#define NSET 100 +#define NSET 16 #define SEED 42 sfmt = init_gen_rand(SEED); for (i = 1; i <= (sizeof(uintptr_t) << 3); i++) { - rtree_t *rtree = rtree_new(i, malloc, free); uintptr_t keys[NSET]; + extent_node_t node; unsigned j; + rtree_t rtree; + + assert_false(rtree_new(&rtree, i, node_alloc, node_dalloc), + "Unexpected rtree_new() failure"); for (j = 0; j < NSET; j++) { keys[j] = (uintptr_t)gen_rand64(sfmt); - rtree_set(rtree, keys[j], 1); - assert_u_eq(rtree_get(rtree, keys[j]), 1, + rtree_set(&rtree, keys[j], &node); + assert_ptr_eq(rtree_get(&rtree, keys[j]), &node, "rtree_get() should return previously set value"); } for (j = 0; j < NSET; j++) { - assert_u_eq(rtree_get(rtree, keys[j]), 1, + assert_ptr_eq(rtree_get(&rtree, keys[j]), &node, "rtree_get() should return previously set value"); } for (j = 0; j < NSET; j++) { - rtree_set(rtree, keys[j], 0); - assert_u_eq(rtree_get(rtree, keys[j]), 0, + rtree_set(&rtree, keys[j], NULL); + assert_ptr_eq(rtree_get(&rtree, keys[j]), NULL, "rtree_get() should return previously set value"); } for (j = 0; j < NSET; j++) { - assert_u_eq(rtree_get(rtree, keys[j]), 0, + assert_ptr_eq(rtree_get(&rtree, keys[j]), NULL, "rtree_get() should return previously set value"); } - rtree_delete(rtree); + rtree_delete(&rtree); } fini_gen_rand(sfmt); #undef NSET From 23694b07457f3aaf9605a4ff6b386f3c897eb624 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 9 Feb 2015 16:19:27 -0800 Subject: [PATCH 0633/3378] Fix arena_get() for (!init_if_missing && refresh_if_missing) case. Fix arena_get() to refresh the cache as needed in the (!init_if_missing && refresh_if_missing) case. This flaw was introduced by the initial arena_get() implementation, which was part of 8bb3198f72fc7587dc93527f9f19fb5be52fa553 (Refactor/fix arenas manipulation.). --- include/jemalloc/internal/jemalloc_internal.h.in | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 280501df..2b167420 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -755,10 +755,7 @@ arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing, arena = arenas_cache[ind]; if (likely(arena != NULL) || !refresh_if_missing) return (arena); - if (init_if_missing) - return (arena_get_hard(tsd, ind, init_if_missing)); - else - return (NULL); + return (arena_get_hard(tsd, ind, init_if_missing)); } #endif From 1cb181ed632e7573fb4eab194e4d216867222d27 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 29 Jan 2015 15:30:47 -0800 Subject: [PATCH 0634/3378] Implement explicit tcache support. Add the MALLOCX_TCACHE() and MALLOCX_TCACHE_NONE macros, which can be used in conjunction with the *allocx() API. Add the tcache.create, tcache.flush, and tcache.destroy mallctls. This resolves #145. --- doc/jemalloc.xml.in | 106 ++++++++--- include/jemalloc/internal/arena.h | 51 +++--- include/jemalloc/internal/huge.h | 8 +- .../jemalloc/internal/jemalloc_internal.h.in | 134 +++++++------- include/jemalloc/internal/private_symbols.txt | 5 + include/jemalloc/internal/tcache.h | 102 +++++++---- include/jemalloc/jemalloc_macros.h.in | 12 +- src/arena.c | 24 +-- src/ckh.c | 7 +- src/ctl.c | 113 +++++++++++- src/huge.c | 36 ++-- src/jemalloc.c | 158 ++++++++--------- src/prof.c | 35 ++-- src/quarantine.c | 10 +- src/tcache.c | 166 +++++++++++++----- test/unit/mallctl.c | 110 ++++++++++++ 16 files changed, 740 insertions(+), 337 deletions(-) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 739b33ac..da800ded 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -242,7 +242,7 @@ relevant. Use bitwise or (|) operations to specify one or more of the following: - + MALLOCX_LG_ALIGN(la) @@ -252,7 +252,7 @@ that la is within the valid range. - + MALLOCX_ALIGN(a) @@ -262,7 +262,7 @@ validate that a is a power of 2. - + MALLOCX_ZERO Initialize newly allocated memory to contain zero @@ -271,16 +271,38 @@ that are initialized to contain zero bytes. If this macro is absent, newly allocated memory is uninitialized. - + + MALLOCX_TCACHE(tc) + + + Use the thread-specific cache (tcache) specified by + the identifier tc, which must have been + acquired via the tcache.create + mallctl. This macro does not validate that + tc specifies a valid + identifier. + + + MALLOCX_TCACHE_NONE + + Do not use a thread-specific cache (tcache). Unless + MALLOCX_TCACHE(tc) or + MALLOCX_TCACHE_NONE is specified, an + automatically managed tcache will be used under many circumstances. + This macro cannot be used in the same flags + argument as + MALLOCX_TCACHE(tc). + + MALLOCX_ARENA(a) Use the arena specified by the index - a (and by necessity bypass the thread - cache). This macro has no effect for regions that were allocated - via an arena other than the one specified. This macro does not - validate that a specifies an arena index in - the valid range. + a. This macro has no effect for regions that + were allocated via an arena other than the one specified. This + macro does not validate that a specifies an + arena index in the valid range. @@ -1060,12 +1082,11 @@ malloc_conf = "xmalloc:true";]]> r- [] - Thread-specific caching enabled/disabled. When there - are multiple threads, each thread uses a thread-specific cache for - objects up to a certain size. Thread-specific caching allows many - allocations to be satisfied without performing any thread - synchronization, at the cost of increased memory use. See the - Thread-specific caching (tcache) enabled/disabled. When + there are multiple threads, each thread uses a tcache for objects up to + a certain size. Thread-specific caching allows many allocations to be + satisfied without performing any thread synchronization, at the cost of + increased memory use. See the opt.lg_tcache_max option for related tuning information. This option is enabled by default unless running inside [] Maximum size class (log base 2) to cache in the - thread-specific cache. At a minimum, all small size classes are - cached, and at a maximum all large size classes are cached. The + thread-specific cache (tcache). At a minimum, all small size classes + are cached, and at a maximum all large size classes are cached. The default maximum is 32 KiB (2^15). @@ -1339,7 +1360,7 @@ malloc_conf = "xmalloc:true";]]> Enable/disable calling thread's tcache. The tcache is implicitly flushed as a side effect of becoming disabled (see thread.tcache.flush). + linkend="thread.tcache.flush">thread.tcache.flush). @@ -1350,9 +1371,9 @@ malloc_conf = "xmalloc:true";]]> -- [] - Flush calling thread's tcache. This interface releases - all cached objects and internal data structures associated with the - calling thread's thread-specific cache. Ordinarily, this interface + Flush calling thread's thread-specific cache (tcache). + This interface releases all cached objects and internal data structures + associated with the calling thread's tcache. Ordinarily, this interface need not be called, since automatic periodic incremental garbage collection occurs, and the thread cache is automatically discarded when a thread exits. However, garbage collection is triggered by allocation @@ -1399,6 +1420,49 @@ malloc_conf = "xmalloc:true";]]> default. + + + tcache.create + (unsigned) + r- + [] + + Create an explicit thread-specific cache (tcache) and + return an identifier that can be passed to the MALLOCX_TCACHE(tc) + macro to explicitly use the specified cache rather than the + automatically managed one that is used by default. Each explicit cache + can be used by only one thread at a time; the application must assure + that this constraint holds. + + + + + + tcache.flush + (unsigned) + -w + [] + + Flush the specified thread-specific cache (tcache). The + same considerations apply to this interface as to thread.tcache.flush, + except that the tcache will never be automatically be discarded. + + + + + + tcache.destroy + (unsigned) + -w + [] + + Flush the specified thread-specific cache (tcache) and + make the identifier available for use during a future tcache creation. + + + arena.<i>.purge diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 46367f68..5476899d 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -272,7 +272,8 @@ struct arena_s { arena_stats_t stats; /* * List of tcaches for extant threads associated with this arena. - * Stats from these are merged incrementally, and at exit. + * Stats from these are merged incrementally, and at exit if + * opt_stats_print is enabled. */ ql_head(tcache_t) tcache_ql; @@ -387,8 +388,7 @@ extern arena_ralloc_junk_large_t *arena_ralloc_junk_large; bool arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, bool zero); void *arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, - size_t size, size_t extra, size_t alignment, bool zero, - bool try_tcache_alloc, bool try_tcache_dalloc); + size_t size, size_t extra, size_t alignment, bool zero, tcache_t *tcache); dss_prec_t arena_dss_prec_get(arena_t *arena); bool arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec); void arena_stats_merge(arena_t *arena, const char **dss, size_t *nactive, @@ -450,13 +450,13 @@ unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, prof_tctx_t *arena_prof_tctx_get(const void *ptr); void arena_prof_tctx_set(const void *ptr, prof_tctx_t *tctx); void *arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, - bool try_tcache); + tcache_t *tcache); arena_t *arena_aalloc(const void *ptr); size_t arena_salloc(const void *ptr, bool demote); void arena_dalloc(tsd_t *tsd, arena_chunk_t *chunk, void *ptr, - bool try_tcache); + tcache_t *tcache); void arena_sdalloc(tsd_t *tsd, arena_chunk_t *chunk, void *ptr, size_t size, - bool try_tcache); + tcache_t *tcache); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) @@ -943,17 +943,15 @@ arena_prof_tctx_set(const void *ptr, prof_tctx_t *tctx) JEMALLOC_ALWAYS_INLINE void * arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, - bool try_tcache) + tcache_t *tcache) { - tcache_t *tcache; assert(size != 0); assert(size <= arena_maxclass); if (likely(size <= SMALL_MAXCLASS)) { - if (likely(try_tcache) && likely((tcache = tcache_get(tsd, - true)) != NULL)) - return (tcache_alloc_small(tcache, size, zero)); + if (likely(tcache != NULL)) + return (tcache_alloc_small(tsd, tcache, size, zero)); else { arena = arena_choose(tsd, arena); if (unlikely(arena == NULL)) @@ -965,9 +963,8 @@ arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, * Initialize tcache after checking size in order to avoid * infinite recursion during tcache initialization. */ - if (try_tcache && size <= tcache_maxclass && likely((tcache = - tcache_get(tsd, true)) != NULL)) - return (tcache_alloc_large(tcache, size, zero)); + if (likely(tcache != NULL) && size <= tcache_maxclass) + return (tcache_alloc_large(tsd, tcache, size, zero)); else { arena = arena_choose(tsd, arena); if (unlikely(arena == NULL)) @@ -1027,10 +1024,9 @@ arena_salloc(const void *ptr, bool demote) } JEMALLOC_ALWAYS_INLINE void -arena_dalloc(tsd_t *tsd, arena_chunk_t *chunk, void *ptr, bool try_tcache) +arena_dalloc(tsd_t *tsd, arena_chunk_t *chunk, void *ptr, tcache_t *tcache) { size_t pageind, mapbits; - tcache_t *tcache; assert(ptr != NULL); assert(CHUNK_ADDR2BASE(ptr) != ptr); @@ -1040,11 +1036,10 @@ arena_dalloc(tsd_t *tsd, arena_chunk_t *chunk, void *ptr, bool try_tcache) assert(arena_mapbits_allocated_get(chunk, pageind) != 0); if (likely((mapbits & CHUNK_MAP_LARGE) == 0)) { /* Small allocation. */ - if (likely(try_tcache) && likely((tcache = tcache_get(tsd, - false)) != NULL)) { + if (likely(tcache != NULL)) { index_t binind = arena_ptr_small_binind_get(ptr, mapbits); - tcache_dalloc_small(tcache, ptr, binind); + tcache_dalloc_small(tsd, tcache, ptr, binind); } else arena_dalloc_small(chunk->arena, chunk, ptr, pageind); } else { @@ -1052,9 +1047,8 @@ arena_dalloc(tsd_t *tsd, arena_chunk_t *chunk, void *ptr, bool try_tcache) assert(((uintptr_t)ptr & PAGE_MASK) == 0); - if (try_tcache && size <= tcache_maxclass && likely((tcache = - tcache_get(tsd, false)) != NULL)) - tcache_dalloc_large(tcache, ptr, size); + if (likely(tcache != NULL) && size <= tcache_maxclass) + tcache_dalloc_large(tsd, tcache, ptr, size); else arena_dalloc_large(chunk->arena, chunk, ptr); } @@ -1062,9 +1056,8 @@ arena_dalloc(tsd_t *tsd, arena_chunk_t *chunk, void *ptr, bool try_tcache) JEMALLOC_ALWAYS_INLINE void arena_sdalloc(tsd_t *tsd, arena_chunk_t *chunk, void *ptr, size_t size, - bool try_tcache) + tcache_t *tcache) { - tcache_t *tcache; assert(ptr != NULL); assert(CHUNK_ADDR2BASE(ptr) != ptr); @@ -1082,10 +1075,9 @@ arena_sdalloc(tsd_t *tsd, arena_chunk_t *chunk, void *ptr, size_t size, if (likely(size <= SMALL_MAXCLASS)) { /* Small allocation. */ - if (likely(try_tcache) && likely((tcache = tcache_get(tsd, - false)) != NULL)) { + if (likely(tcache != NULL)) { index_t binind = size2index(size); - tcache_dalloc_small(tcache, ptr, binind); + tcache_dalloc_small(tsd, tcache, ptr, binind); } else { size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; @@ -1094,9 +1086,8 @@ arena_sdalloc(tsd_t *tsd, arena_chunk_t *chunk, void *ptr, size_t size, } else { assert(((uintptr_t)ptr & PAGE_MASK) == 0); - if (try_tcache && size <= tcache_maxclass && (tcache = - tcache_get(tsd, false)) != NULL) - tcache_dalloc_large(tcache, ptr, size); + if (likely(tcache != NULL) && size <= tcache_maxclass) + tcache_dalloc_large(tsd, tcache, ptr, size); else arena_dalloc_large(chunk->arena, chunk, ptr); } diff --git a/include/jemalloc/internal/huge.h b/include/jemalloc/internal/huge.h index decb0249..231cc368 100644 --- a/include/jemalloc/internal/huge.h +++ b/include/jemalloc/internal/huge.h @@ -10,19 +10,19 @@ #ifdef JEMALLOC_H_EXTERNS void *huge_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, - bool try_tcache); + tcache_t *tcache); void *huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, - bool zero, bool try_tcache); + bool zero, tcache_t *tcache); bool huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, bool zero); void *huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size, size_t extra, size_t alignment, bool zero, - bool try_tcache_alloc, bool try_tcache_dalloc); + tcache_t *tcache); #ifdef JEMALLOC_JET typedef void (huge_dalloc_junk_t)(void *, size_t); extern huge_dalloc_junk_t *huge_dalloc_junk; #endif -void huge_dalloc(tsd_t *tsd, void *ptr, bool try_tcache); +void huge_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache); arena_t *huge_aalloc(const void *ptr); size_t huge_salloc(const void *ptr); prof_tctx_t *huge_prof_tctx_get(const void *ptr); diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 2b167420..b8c994cb 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -172,7 +172,21 @@ static const bool config_ivsalloc = /* Size class index type. */ typedef unsigned index_t; -#define MALLOCX_ARENA_MASK ((int)~0xff) +/* + * Flags bits: + * + * a: arena + * t: tcache + * 0: unused + * z: zero + * n: alignment + * + * aaaaaaaa aaaatttt tttttttt 0znnnnnn + */ +#define MALLOCX_ARENA_MASK ((int)~0xfffff) +#define MALLOCX_ARENA_MAX 0xffe +#define MALLOCX_TCACHE_MASK ((int)~0xfff000ffU) +#define MALLOCX_TCACHE_MAX 0xffd #define MALLOCX_LG_ALIGN_MASK ((int)0x3f) /* Use MALLOCX_ALIGN_GET() if alignment may not be specified in flags. */ #define MALLOCX_ALIGN_GET_SPECIFIED(flags) \ @@ -181,8 +195,11 @@ typedef unsigned index_t; (MALLOCX_ALIGN_GET_SPECIFIED(flags) & (SIZE_T_MAX-1)) #define MALLOCX_ZERO_GET(flags) \ ((bool)(flags & MALLOCX_ZERO)) + +#define MALLOCX_TCACHE_GET(flags) \ + (((unsigned)((flags & MALLOCX_TCACHE_MASK) >> 8)) - 2) #define MALLOCX_ARENA_GET(flags) \ - (((unsigned)(flags >> 8)) - 1) + (((unsigned)(((unsigned)flags) >> 20)) - 1) /* Smallest size class to support. */ #define TINY_MIN (1U << LG_TINY_MIN) @@ -749,7 +766,7 @@ arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing, * ind is invalid, cache is old (too small), or arena to be * initialized. */ - return (refresh_if_missing ? arena_get_hard(tsd, ind, + return (refresh_if_missing ? arena_get_hard(tsd, ind, init_if_missing) : NULL); } arena = arenas_cache[ind]; @@ -778,32 +795,31 @@ arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing, #ifndef JEMALLOC_ENABLE_INLINE arena_t *iaalloc(const void *ptr); size_t isalloc(const void *ptr, bool demote); -void *iallocztm(tsd_t *tsd, size_t size, bool zero, bool try_tcache, +void *iallocztm(tsd_t *tsd, size_t size, bool zero, tcache_t *tcache, bool is_metadata, arena_t *arena); -void *imalloct(tsd_t *tsd, size_t size, bool try_tcache, arena_t *arena); +void *imalloct(tsd_t *tsd, size_t size, tcache_t *tcache, arena_t *arena); void *imalloc(tsd_t *tsd, size_t size); -void *icalloct(tsd_t *tsd, size_t size, bool try_tcache, arena_t *arena); +void *icalloct(tsd_t *tsd, size_t size, tcache_t *tcache, arena_t *arena); void *icalloc(tsd_t *tsd, size_t size); void *ipallocztm(tsd_t *tsd, size_t usize, size_t alignment, bool zero, - bool try_tcache, bool is_metadata, arena_t *arena); + tcache_t *tcache, bool is_metadata, arena_t *arena); void *ipalloct(tsd_t *tsd, size_t usize, size_t alignment, bool zero, - bool try_tcache, arena_t *arena); + tcache_t *tcache, arena_t *arena); void *ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero); size_t ivsalloc(const void *ptr, bool demote); size_t u2rz(size_t usize); size_t p2rz(const void *ptr); -void idalloctm(tsd_t *tsd, void *ptr, bool try_tcache, bool is_metadata); -void idalloct(tsd_t *tsd, void *ptr, bool try_tcache); +void idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata); +void idalloct(tsd_t *tsd, void *ptr, tcache_t *tcache); void idalloc(tsd_t *tsd, void *ptr); -void iqalloc(tsd_t *tsd, void *ptr, bool try_tcache); -void isdalloct(tsd_t *tsd, void *ptr, size_t size, bool try_tcache); -void isqalloc(tsd_t *tsd, void *ptr, size_t size, bool try_tcache); +void iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache); +void isdalloct(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache); +void isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache); void *iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, - size_t extra, size_t alignment, bool zero, bool try_tcache_alloc, - bool try_tcache_dalloc, arena_t *arena); -void *iralloct(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, - size_t alignment, bool zero, bool try_tcache_alloc, bool try_tcache_dalloc, + size_t extra, size_t alignment, bool zero, tcache_t *tcache, arena_t *arena); +void *iralloct(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, + size_t alignment, bool zero, tcache_t *tcache, arena_t *arena); void *iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment, bool zero); bool ixalloc(void *ptr, size_t oldsize, size_t size, size_t extra, @@ -853,7 +869,7 @@ isalloc(const void *ptr, bool demote) } JEMALLOC_ALWAYS_INLINE void * -iallocztm(tsd_t *tsd, size_t size, bool zero, bool try_tcache, bool is_metadata, +iallocztm(tsd_t *tsd, size_t size, bool zero, tcache_t *tcache, bool is_metadata, arena_t *arena) { void *ret; @@ -861,9 +877,9 @@ iallocztm(tsd_t *tsd, size_t size, bool zero, bool try_tcache, bool is_metadata, assert(size != 0); if (likely(size <= arena_maxclass)) - ret = arena_malloc(tsd, arena, size, zero, try_tcache); + ret = arena_malloc(tsd, arena, size, zero, tcache); else - ret = huge_malloc(tsd, arena, size, zero, try_tcache); + ret = huge_malloc(tsd, arena, size, zero, tcache); if (config_stats && is_metadata && likely(ret != NULL)) { arena_metadata_allocated_add(iaalloc(ret), isalloc(ret, config_prof)); @@ -872,36 +888,36 @@ iallocztm(tsd_t *tsd, size_t size, bool zero, bool try_tcache, bool is_metadata, } JEMALLOC_ALWAYS_INLINE void * -imalloct(tsd_t *tsd, size_t size, bool try_tcache, arena_t *arena) +imalloct(tsd_t *tsd, size_t size, tcache_t *tcache, arena_t *arena) { - return (iallocztm(tsd, size, false, try_tcache, false, arena)); + return (iallocztm(tsd, size, false, tcache, false, arena)); } JEMALLOC_ALWAYS_INLINE void * imalloc(tsd_t *tsd, size_t size) { - return (iallocztm(tsd, size, false, true, false, NULL)); + return (iallocztm(tsd, size, false, tcache_get(tsd, true), false, NULL)); } JEMALLOC_ALWAYS_INLINE void * -icalloct(tsd_t *tsd, size_t size, bool try_tcache, arena_t *arena) +icalloct(tsd_t *tsd, size_t size, tcache_t *tcache, arena_t *arena) { - return (iallocztm(tsd, size, true, try_tcache, false, arena)); + return (iallocztm(tsd, size, true, tcache, false, arena)); } JEMALLOC_ALWAYS_INLINE void * icalloc(tsd_t *tsd, size_t size) { - return (iallocztm(tsd, size, true, true, false, NULL)); + return (iallocztm(tsd, size, true, tcache_get(tsd, true), false, NULL)); } JEMALLOC_ALWAYS_INLINE void * ipallocztm(tsd_t *tsd, size_t usize, size_t alignment, bool zero, - bool try_tcache, bool is_metadata, arena_t *arena) + tcache_t *tcache, bool is_metadata, arena_t *arena) { void *ret; @@ -909,7 +925,7 @@ ipallocztm(tsd_t *tsd, size_t usize, size_t alignment, bool zero, assert(usize == sa2u(usize, alignment)); if (usize <= SMALL_MAXCLASS && alignment < PAGE) - ret = arena_malloc(tsd, arena, usize, zero, try_tcache); + ret = arena_malloc(tsd, arena, usize, zero, tcache); else { if (likely(usize <= arena_maxclass)) { arena = arena_choose(tsd, arena); @@ -917,10 +933,10 @@ ipallocztm(tsd_t *tsd, size_t usize, size_t alignment, bool zero, return (NULL); ret = arena_palloc(arena, usize, alignment, zero); } else if (likely(alignment <= chunksize)) - ret = huge_malloc(tsd, arena, usize, zero, try_tcache); + ret = huge_malloc(tsd, arena, usize, zero, tcache); else { ret = huge_palloc(tsd, arena, usize, alignment, zero, - try_tcache); + tcache); } } assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret); @@ -932,19 +948,19 @@ ipallocztm(tsd_t *tsd, size_t usize, size_t alignment, bool zero, } JEMALLOC_ALWAYS_INLINE void * -ipalloct(tsd_t *tsd, size_t usize, size_t alignment, bool zero, bool try_tcache, - arena_t *arena) +ipalloct(tsd_t *tsd, size_t usize, size_t alignment, bool zero, + tcache_t *tcache, arena_t *arena) { - return (ipallocztm(tsd, usize, alignment, zero, try_tcache, false, - arena)); + return (ipallocztm(tsd, usize, alignment, zero, tcache, false, arena)); } JEMALLOC_ALWAYS_INLINE void * ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero) { - return (ipallocztm(tsd, usize, alignment, zero, true, false, NULL)); + return (ipallocztm(tsd, usize, alignment, zero, tcache_get(tsd, + NULL), false, NULL)); } JEMALLOC_ALWAYS_INLINE size_t @@ -981,7 +997,7 @@ p2rz(const void *ptr) } JEMALLOC_ALWAYS_INLINE void -idalloctm(tsd_t *tsd, void *ptr, bool try_tcache, bool is_metadata) +idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata) { arena_chunk_t *chunk; @@ -993,37 +1009,37 @@ idalloctm(tsd_t *tsd, void *ptr, bool try_tcache, bool is_metadata) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (likely(chunk != ptr)) - arena_dalloc(tsd, chunk, ptr, try_tcache); + arena_dalloc(tsd, chunk, ptr, tcache); else - huge_dalloc(tsd, ptr, try_tcache); + huge_dalloc(tsd, ptr, tcache); } JEMALLOC_ALWAYS_INLINE void -idalloct(tsd_t *tsd, void *ptr, bool try_tcache) +idalloct(tsd_t *tsd, void *ptr, tcache_t *tcache) { - idalloctm(tsd, ptr, try_tcache, false); + idalloctm(tsd, ptr, tcache, false); } JEMALLOC_ALWAYS_INLINE void idalloc(tsd_t *tsd, void *ptr) { - idalloctm(tsd, ptr, true, false); + idalloctm(tsd, ptr, tcache_get(tsd, false), false); } JEMALLOC_ALWAYS_INLINE void -iqalloc(tsd_t *tsd, void *ptr, bool try_tcache) +iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache) { if (config_fill && unlikely(opt_quarantine)) quarantine(tsd, ptr); else - idalloctm(tsd, ptr, try_tcache, false); + idalloctm(tsd, ptr, tcache, false); } JEMALLOC_ALWAYS_INLINE void -isdalloct(tsd_t *tsd, void *ptr, size_t size, bool try_tcache) +isdalloct(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache) { arena_chunk_t *chunk; @@ -1031,25 +1047,24 @@ isdalloct(tsd_t *tsd, void *ptr, size_t size, bool try_tcache) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (likely(chunk != ptr)) - arena_sdalloc(tsd, chunk, ptr, size, try_tcache); + arena_sdalloc(tsd, chunk, ptr, size, tcache); else - huge_dalloc(tsd, ptr, try_tcache); + huge_dalloc(tsd, ptr, tcache); } JEMALLOC_ALWAYS_INLINE void -isqalloc(tsd_t *tsd, void *ptr, size_t size, bool try_tcache) +isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache) { if (config_fill && unlikely(opt_quarantine)) quarantine(tsd, ptr); else - isdalloct(tsd, ptr, size, try_tcache); + isdalloct(tsd, ptr, size, tcache); } JEMALLOC_ALWAYS_INLINE void * iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, - size_t extra, size_t alignment, bool zero, bool try_tcache_alloc, - bool try_tcache_dalloc, arena_t *arena) + size_t extra, size_t alignment, bool zero, tcache_t *tcache, arena_t *arena) { void *p; size_t usize, copysize; @@ -1057,7 +1072,7 @@ iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, usize = sa2u(size + extra, alignment); if (usize == 0) return (NULL); - p = ipalloct(tsd, usize, alignment, zero, try_tcache_alloc, arena); + p = ipalloct(tsd, usize, alignment, zero, tcache, arena); if (p == NULL) { if (extra == 0) return (NULL); @@ -1065,8 +1080,7 @@ iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, usize = sa2u(size, alignment); if (usize == 0) return (NULL); - p = ipalloct(tsd, usize, alignment, zero, try_tcache_alloc, - arena); + p = ipalloct(tsd, usize, alignment, zero, tcache, arena); if (p == NULL) return (NULL); } @@ -1076,13 +1090,13 @@ iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, */ copysize = (size < oldsize) ? size : oldsize; memcpy(p, ptr, copysize); - isqalloc(tsd, ptr, oldsize, try_tcache_dalloc); + isqalloc(tsd, ptr, oldsize, tcache); return (p); } JEMALLOC_ALWAYS_INLINE void * iralloct(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment, - bool zero, bool try_tcache_alloc, bool try_tcache_dalloc, arena_t *arena) + bool zero, tcache_t *tcache, arena_t *arena) { assert(ptr != NULL); @@ -1095,15 +1109,15 @@ iralloct(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment, * and copy. */ return (iralloct_realign(tsd, ptr, oldsize, size, 0, alignment, - zero, try_tcache_alloc, try_tcache_dalloc, arena)); + zero, tcache, arena)); } if (likely(size <= arena_maxclass)) { return (arena_ralloc(tsd, arena, ptr, oldsize, size, 0, - alignment, zero, try_tcache_alloc, try_tcache_dalloc)); + alignment, zero, tcache)); } else { return (huge_ralloc(tsd, arena, ptr, oldsize, size, 0, - alignment, zero, try_tcache_alloc, try_tcache_dalloc)); + alignment, zero, tcache)); } } @@ -1112,8 +1126,8 @@ iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment, bool zero) { - return (iralloct(tsd, ptr, oldsize, size, alignment, zero, true, true, - NULL)); + return (iralloct(tsd, ptr, oldsize, size, alignment, zero, + tcache_get(tsd, true), NULL)); } JEMALLOC_ALWAYS_INLINE bool diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index 7a78f580..cf42bead 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -425,6 +425,11 @@ tcache_get_hard tcache_maxclass tcache_salloc tcache_stats_merge +tcaches +tcaches_create +tcaches_destroy +tcaches_flush +tcaches_get thread_allocated_cleanup thread_deallocated_cleanup tsd_booted diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index 6e97b3dd..2a3952be 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -4,6 +4,7 @@ typedef struct tcache_bin_info_s tcache_bin_info_t; typedef struct tcache_bin_s tcache_bin_t; typedef struct tcache_s tcache_t; +typedef struct tcaches_s tcaches_t; /* * tcache pointers close to NULL are used to encode state information that is @@ -70,7 +71,6 @@ struct tcache_bin_s { struct tcache_s { ql_elm(tcache_t) link; /* Used for aggregating stats. */ uint64_t prof_accumbytes;/* Cleared after arena_prof_accum(). */ - arena_t *arena; /* This thread's arena. */ unsigned ev_cnt; /* Event count since incremental GC. */ index_t next_gc_bin; /* Next bin to GC. */ tcache_bin_t tbins[1]; /* Dynamically sized. */ @@ -82,6 +82,14 @@ struct tcache_s { */ }; +/* Linkage for list of available (previously used) explicit tcache IDs. */ +struct tcaches_s { + union { + tcache_t *tcache; + tcaches_t *next; + }; +}; + #endif /* JEMALLOC_H_STRUCTS */ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS @@ -95,27 +103,41 @@ extern tcache_bin_info_t *tcache_bin_info; * Number of tcache bins. There are NBINS small-object bins, plus 0 or more * large-object bins. */ -extern size_t nhbins; +extern size_t nhbins; /* Maximum cached size class. */ -extern size_t tcache_maxclass; +extern size_t tcache_maxclass; + +/* + * Explicit tcaches, managed via the tcache.{create,flush,destroy} mallctls and + * usable via the MALLOCX_TCACHE() flag. The automatic per thread tcaches are + * completely disjoint from this data structure. tcaches starts off as a sparse + * array, so it has no physical memory footprint until individual pages are + * touched. This allows the entire array to be allocated the first time an + * explicit tcache is created without a disproportionate impact on memory usage. + */ +extern tcaches_t *tcaches; size_t tcache_salloc(const void *ptr); -void tcache_event_hard(tcache_t *tcache); -void *tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, - index_t binind); -void tcache_bin_flush_small(tcache_bin_t *tbin, index_t binind, unsigned rem, - tcache_t *tcache); -void tcache_bin_flush_large(tcache_bin_t *tbin, index_t binind, unsigned rem, - tcache_t *tcache); +void tcache_event_hard(tsd_t *tsd, tcache_t *tcache); +void *tcache_alloc_small_hard(tsd_t *tsd, tcache_t *tcache, + tcache_bin_t *tbin, index_t binind); +void tcache_bin_flush_small(tsd_t *tsd, tcache_bin_t *tbin, index_t binind, + unsigned rem, tcache_t *tcache); +void tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, index_t binind, + unsigned rem, tcache_t *tcache); void tcache_arena_associate(tcache_t *tcache, arena_t *arena); -void tcache_arena_reassociate(tcache_t *tcache, arena_t *arena); -void tcache_arena_dissociate(tcache_t *tcache); +void tcache_arena_reassociate(tcache_t *tcache, arena_t *oldarena, + arena_t *newarena); +void tcache_arena_dissociate(tcache_t *tcache, arena_t *arena); tcache_t *tcache_get_hard(tsd_t *tsd); tcache_t *tcache_create(tsd_t *tsd, arena_t *arena); void tcache_cleanup(tsd_t *tsd); void tcache_enabled_cleanup(tsd_t *tsd); void tcache_stats_merge(tcache_t *tcache, arena_t *arena); +bool tcaches_create(tsd_t *tsd, unsigned *r_ind); +void tcaches_flush(tsd_t *tsd, unsigned ind); +void tcaches_destroy(tsd_t *tsd, unsigned ind); bool tcache_boot(void); #endif /* JEMALLOC_H_EXTERNS */ @@ -123,16 +145,21 @@ bool tcache_boot(void); #ifdef JEMALLOC_H_INLINES #ifndef JEMALLOC_ENABLE_INLINE -void tcache_event(tcache_t *tcache); +void tcache_event(tsd_t *tsd, tcache_t *tcache); void tcache_flush(void); bool tcache_enabled_get(void); tcache_t *tcache_get(tsd_t *tsd, bool create); void tcache_enabled_set(bool enabled); void *tcache_alloc_easy(tcache_bin_t *tbin); -void *tcache_alloc_small(tcache_t *tcache, size_t size, bool zero); -void *tcache_alloc_large(tcache_t *tcache, size_t size, bool zero); -void tcache_dalloc_small(tcache_t *tcache, void *ptr, index_t binind); -void tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size); +void *tcache_alloc_small(tsd_t *tsd, tcache_t *tcache, size_t size, + bool zero); +void *tcache_alloc_large(tsd_t *tsd, tcache_t *tcache, size_t size, + bool zero); +void tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, + index_t binind); +void tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, + size_t size); +tcache_t *tcaches_get(tsd_t *tsd, unsigned ind); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TCACHE_C_)) @@ -202,7 +229,7 @@ tcache_get(tsd_t *tsd, bool create) } JEMALLOC_ALWAYS_INLINE void -tcache_event(tcache_t *tcache) +tcache_event(tsd_t *tsd, tcache_t *tcache) { if (TCACHE_GC_INCR == 0) @@ -211,7 +238,7 @@ tcache_event(tcache_t *tcache) tcache->ev_cnt++; assert(tcache->ev_cnt <= TCACHE_GC_INCR); if (unlikely(tcache->ev_cnt == TCACHE_GC_INCR)) - tcache_event_hard(tcache); + tcache_event_hard(tsd, tcache); } JEMALLOC_ALWAYS_INLINE void * @@ -231,7 +258,7 @@ tcache_alloc_easy(tcache_bin_t *tbin) } JEMALLOC_ALWAYS_INLINE void * -tcache_alloc_small(tcache_t *tcache, size_t size, bool zero) +tcache_alloc_small(tsd_t *tsd, tcache_t *tcache, size_t size, bool zero) { void *ret; index_t binind; @@ -244,7 +271,7 @@ tcache_alloc_small(tcache_t *tcache, size_t size, bool zero) usize = index2size(binind); ret = tcache_alloc_easy(tbin); if (unlikely(ret == NULL)) { - ret = tcache_alloc_small_hard(tcache, tbin, binind); + ret = tcache_alloc_small_hard(tsd, tcache, tbin, binind); if (ret == NULL) return (NULL); } @@ -270,12 +297,12 @@ tcache_alloc_small(tcache_t *tcache, size_t size, bool zero) tbin->tstats.nrequests++; if (config_prof) tcache->prof_accumbytes += usize; - tcache_event(tcache); + tcache_event(tsd, tcache); return (ret); } JEMALLOC_ALWAYS_INLINE void * -tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) +tcache_alloc_large(tsd_t *tsd, tcache_t *tcache, size_t size, bool zero) { void *ret; index_t binind; @@ -293,7 +320,7 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) * Only allocate one large object at a time, because it's quite * expensive to create one and not use it. */ - ret = arena_malloc_large(tcache->arena, usize, zero); + ret = arena_malloc_large(arena_choose(tsd, NULL), usize, zero); if (ret == NULL) return (NULL); } else { @@ -321,12 +348,12 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) tcache->prof_accumbytes += usize; } - tcache_event(tcache); + tcache_event(tsd, tcache); return (ret); } JEMALLOC_ALWAYS_INLINE void -tcache_dalloc_small(tcache_t *tcache, void *ptr, index_t binind) +tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, index_t binind) { tcache_bin_t *tbin; tcache_bin_info_t *tbin_info; @@ -339,18 +366,18 @@ tcache_dalloc_small(tcache_t *tcache, void *ptr, index_t binind) tbin = &tcache->tbins[binind]; tbin_info = &tcache_bin_info[binind]; if (unlikely(tbin->ncached == tbin_info->ncached_max)) { - tcache_bin_flush_small(tbin, binind, (tbin_info->ncached_max >> - 1), tcache); + tcache_bin_flush_small(tsd, tbin, binind, + (tbin_info->ncached_max >> 1), tcache); } assert(tbin->ncached < tbin_info->ncached_max); tbin->avail[tbin->ncached] = ptr; tbin->ncached++; - tcache_event(tcache); + tcache_event(tsd, tcache); } JEMALLOC_ALWAYS_INLINE void -tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size) +tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, size_t size) { index_t binind; tcache_bin_t *tbin; @@ -368,14 +395,23 @@ tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size) tbin = &tcache->tbins[binind]; tbin_info = &tcache_bin_info[binind]; if (unlikely(tbin->ncached == tbin_info->ncached_max)) { - tcache_bin_flush_large(tbin, binind, (tbin_info->ncached_max >> - 1), tcache); + tcache_bin_flush_large(tsd, tbin, binind, + (tbin_info->ncached_max >> 1), tcache); } assert(tbin->ncached < tbin_info->ncached_max); tbin->avail[tbin->ncached] = ptr; tbin->ncached++; - tcache_event(tcache); + tcache_event(tsd, tcache); +} + +JEMALLOC_ALWAYS_INLINE tcache_t * +tcaches_get(tsd_t *tsd, unsigned ind) +{ + tcaches_t *elm = &tcaches[ind]; + if (unlikely(elm->tcache == NULL)) + elm->tcache = tcache_create(tsd, arena_choose(tsd, NULL)); + return (elm->tcache); } #endif diff --git a/include/jemalloc/jemalloc_macros.h.in b/include/jemalloc/jemalloc_macros.h.in index 99f12611..7d1dcf4a 100644 --- a/include/jemalloc/jemalloc_macros.h.in +++ b/include/jemalloc/jemalloc_macros.h.in @@ -19,8 +19,16 @@ ((a < (size_t)INT_MAX) ? ffs(a)-1 : ffs(a>>32)+31) # endif # define MALLOCX_ZERO ((int)0x40) -/* Bias arena index bits so that 0 encodes "MALLOCX_ARENA() unspecified". */ -# define MALLOCX_ARENA(a) ((int)(((a)+1) << 8)) +/* + * Bias tcache index bits so that 0 encodes "automatic tcache management", and 1 + * encodes MALLOCX_TCACHE_NONE. + */ +# define MALLOCX_TCACHE(tc) ((int)(((tc)+2) << 8)) +# define MALLOCX_TCACHE_NONE MALLOCX_TCACHE(-1) +/* + * Bias arena index bits so that 0 encodes "use an automatically chosen arena". + */ +# define MALLOCX_ARENA(a) ((int)(((a)+1) << 20)) #ifdef JEMALLOC_HAVE_ATTR # define JEMALLOC_ATTR(s) __attribute__((s)) diff --git a/src/arena.c b/src/arena.c index a5033bf8..907fbd7f 100644 --- a/src/arena.c +++ b/src/arena.c @@ -2182,8 +2182,7 @@ arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, void * arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size, - size_t extra, size_t alignment, bool zero, bool try_tcache_alloc, - bool try_tcache_dalloc) + size_t extra, size_t alignment, bool zero, tcache_t *tcache) { void *ret; size_t copysize; @@ -2201,12 +2200,9 @@ arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size, size_t usize = sa2u(size + extra, alignment); if (usize == 0) return (NULL); - ret = ipalloct(tsd, usize, alignment, zero, try_tcache_alloc, - arena); - } else { - ret = arena_malloc(tsd, arena, size + extra, zero, - try_tcache_alloc); - } + ret = ipalloct(tsd, usize, alignment, zero, tcache, arena); + } else + ret = arena_malloc(tsd, arena, size + extra, zero, tcache); if (ret == NULL) { if (extra == 0) @@ -2216,12 +2212,10 @@ arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size, size_t usize = sa2u(size, alignment); if (usize == 0) return (NULL); - ret = ipalloct(tsd, usize, alignment, zero, - try_tcache_alloc, arena); - } else { - ret = arena_malloc(tsd, arena, size, zero, - try_tcache_alloc); - } + ret = ipalloct(tsd, usize, alignment, zero, tcache, + arena); + } else + ret = arena_malloc(tsd, arena, size, zero, tcache); if (ret == NULL) return (NULL); @@ -2236,7 +2230,7 @@ arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size, copysize = (size < oldsize) ? size : oldsize; JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, copysize); memcpy(ret, ptr, copysize); - isqalloc(tsd, ptr, oldsize, try_tcache_dalloc); + isqalloc(tsd, ptr, oldsize, tcache); return (ret); } diff --git a/src/ckh.c b/src/ckh.c index db2ae392..ad075d60 100644 --- a/src/ckh.c +++ b/src/ckh.c @@ -270,7 +270,8 @@ ckh_grow(tsd_t *tsd, ckh_t *ckh) ret = true; goto label_return; } - tab = (ckhc_t *)ipalloc(tsd, usize, CACHELINE, true); + tab = (ckhc_t *)ipalloct(tsd, usize, CACHELINE, true, NULL, + NULL); if (tab == NULL) { ret = true; goto label_return; @@ -313,7 +314,7 @@ ckh_shrink(tsd_t *tsd, ckh_t *ckh) usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE); if (usize == 0) return; - tab = (ckhc_t *)ipalloc(tsd, usize, CACHELINE, true); + tab = (ckhc_t *)ipalloct(tsd, usize, CACHELINE, true, NULL, NULL); if (tab == NULL) { /* * An OOM error isn't worth propagating, since it doesn't @@ -389,7 +390,7 @@ ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ret = true; goto label_return; } - ckh->tab = (ckhc_t *)ipalloc(tsd, usize, CACHELINE, true); + ckh->tab = (ckhc_t *)ipalloct(tsd, usize, CACHELINE, true, NULL, NULL); if (ckh->tab == NULL) { ret = true; goto label_return; diff --git a/src/ctl.c b/src/ctl.c index 63a689a3..a2838032 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -110,6 +110,9 @@ CTL_PROTO(opt_prof_gdump) CTL_PROTO(opt_prof_final) CTL_PROTO(opt_prof_leak) CTL_PROTO(opt_prof_accum) +CTL_PROTO(tcache_create) +CTL_PROTO(tcache_flush) +CTL_PROTO(tcache_destroy) CTL_PROTO(arena_i_purge) static void arena_purge(unsigned arena_ind); CTL_PROTO(arena_i_dss) @@ -275,6 +278,12 @@ static const ctl_named_node_t opt_node[] = { {NAME("prof_accum"), CTL(opt_prof_accum)} }; +static const ctl_named_node_t tcache_node[] = { + {NAME("create"), CTL(tcache_create)}, + {NAME("flush"), CTL(tcache_flush)}, + {NAME("destroy"), CTL(tcache_destroy)} +}; + static const ctl_named_node_t chunk_node[] = { {NAME("alloc"), CTL(arena_i_chunk_alloc)}, {NAME("dalloc"), CTL(arena_i_chunk_dalloc)} @@ -474,6 +483,7 @@ static const ctl_named_node_t root_node[] = { {NAME("thread"), CHILD(named, thread)}, {NAME("config"), CHILD(named, config)}, {NAME("opt"), CHILD(named, opt)}, + {NAME("tcache"), CHILD(named, tcache)}, {NAME("arena"), CHILD(indexed, arena)}, {NAME("arenas"), CHILD(named, arenas)}, {NAME("prof"), CHILD(named, prof)}, @@ -1281,19 +1291,21 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, { int ret; tsd_t *tsd; - arena_t *arena; + arena_t *oldarena; unsigned newind, oldind; tsd = tsd_fetch(); - arena = arena_choose(tsd, NULL); - if (arena == NULL) + oldarena = arena_choose(tsd, NULL); + if (oldarena == NULL) return (EAGAIN); malloc_mutex_lock(&ctl_mtx); - newind = oldind = arena->ind; + newind = oldind = oldarena->ind; WRITE(newind, unsigned); READ(oldind, unsigned); if (newind != oldind) { + arena_t *newarena; + if (newind >= ctl_stats.narenas) { /* New arena index is out of range. */ ret = EFAULT; @@ -1301,8 +1313,8 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, } /* Initialize arena if necessary. */ - arena = arena_get(tsd, newind, true, true); - if (arena == NULL) { + newarena = arena_get(tsd, newind, true, true); + if (newarena == NULL) { ret = EAGAIN; goto label_return; } @@ -1310,8 +1322,10 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, arena_migrate(tsd, oldind, newind); if (config_tcache) { tcache_t *tcache = tsd_tcache_get(tsd); - if (tcache != NULL) - tcache_arena_reassociate(tcache, arena); + if (tcache != NULL) { + tcache_arena_reassociate(tcache, oldarena, + newarena); + } } } @@ -1438,6 +1452,89 @@ label_return: /******************************************************************************/ +static int +tcache_create_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) +{ + int ret; + tsd_t *tsd; + unsigned tcache_ind; + + if (!config_tcache) + return (ENOENT); + + tsd = tsd_fetch(); + + malloc_mutex_lock(&ctl_mtx); + READONLY(); + if (tcaches_create(tsd, &tcache_ind)) { + ret = EFAULT; + goto label_return; + } + READ(tcache_ind, unsigned); + + ret = 0; +label_return: + malloc_mutex_unlock(&ctl_mtx); + return (ret); +} + +static int +tcache_flush_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) +{ + int ret; + tsd_t *tsd; + unsigned tcache_ind; + + if (!config_tcache) + return (ENOENT); + + tsd = tsd_fetch(); + + WRITEONLY(); + tcache_ind = UINT_MAX; + WRITE(tcache_ind, unsigned); + if (tcache_ind == UINT_MAX) { + ret = EFAULT; + goto label_return; + } + tcaches_flush(tsd, tcache_ind); + + ret = 0; +label_return: + return (ret); +} + +static int +tcache_destroy_ctl(const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) +{ + int ret; + tsd_t *tsd; + unsigned tcache_ind; + + if (!config_tcache) + return (ENOENT); + + tsd = tsd_fetch(); + + WRITEONLY(); + tcache_ind = UINT_MAX; + WRITE(tcache_ind, unsigned); + if (tcache_ind == UINT_MAX) { + ret = EFAULT; + goto label_return; + } + tcaches_destroy(tsd, tcache_ind); + + ret = 0; +label_return: + return (ret); +} + +/******************************************************************************/ + /* ctl_mutex must be held during execution of this function. */ static void arena_purge(unsigned arena_ind) diff --git a/src/huge.c b/src/huge.c index 84a1ab23..db0ecd51 100644 --- a/src/huge.c +++ b/src/huge.c @@ -13,7 +13,8 @@ static malloc_mutex_t huge_mtx; static extent_tree_t huge; void * -huge_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, bool try_tcache) +huge_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, + tcache_t *tcache) { size_t usize; @@ -23,12 +24,12 @@ huge_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, bool try_tcache) return (NULL); } - return (huge_palloc(tsd, arena, usize, chunksize, zero, try_tcache)); + return (huge_palloc(tsd, arena, usize, chunksize, zero, tcache)); } void * huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, - bool zero, bool try_tcache) + bool zero, tcache_t *tcache) { void *ret; extent_node_t *node; @@ -38,7 +39,7 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, /* Allocate an extent node with which to track the chunk. */ node = ipallocztm(tsd, CACHELINE_CEILING(sizeof(extent_node_t)), - CACHELINE, false, try_tcache, true, arena); + CACHELINE, false, tcache, true, arena); if (node == NULL) return (NULL); @@ -50,7 +51,7 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, arena = arena_choose(tsd, arena); if (unlikely(arena == NULL) || (ret = arena_chunk_alloc_huge(arena, usize, alignment, &is_zeroed)) == NULL) { - idalloctm(tsd, node, try_tcache, true); + idalloctm(tsd, node, tcache, true); return (NULL); } @@ -307,8 +308,7 @@ huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, void * huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size, - size_t extra, size_t alignment, bool zero, bool try_tcache_alloc, - bool try_tcache_dalloc) + size_t extra, size_t alignment, bool zero, tcache_t *tcache) { void *ret; size_t copysize; @@ -324,11 +324,9 @@ huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size, */ if (alignment > chunksize) { ret = huge_palloc(tsd, arena, size + extra, alignment, zero, - try_tcache_alloc); - } else { - ret = huge_malloc(tsd, arena, size + extra, zero, - try_tcache_alloc); - } + tcache); + } else + ret = huge_malloc(tsd, arena, size + extra, zero, tcache); if (ret == NULL) { if (extra == 0) @@ -336,11 +334,9 @@ huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size, /* Try again, this time without extra. */ if (alignment > chunksize) { ret = huge_palloc(tsd, arena, size, alignment, zero, - try_tcache_alloc); - } else { - ret = huge_malloc(tsd, arena, size, zero, - try_tcache_alloc); - } + tcache); + } else + ret = huge_malloc(tsd, arena, size, zero, tcache); if (ret == NULL) return (NULL); @@ -352,12 +348,12 @@ huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size, */ copysize = (size < oldsize) ? size : oldsize; memcpy(ret, ptr, copysize); - isqalloc(tsd, ptr, oldsize, try_tcache_dalloc); + isqalloc(tsd, ptr, oldsize, tcache); return (ret); } void -huge_dalloc(tsd_t *tsd, void *ptr, bool try_tcache) +huge_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache) { extent_node_t *node; @@ -368,7 +364,7 @@ huge_dalloc(tsd_t *tsd, void *ptr, bool try_tcache) huge_dalloc_junk(node->addr, node->size); arena_chunk_dalloc_huge(node->arena, node->addr, node->size); - idalloctm(tsd, node, try_tcache, true); + idalloctm(tsd, node, tcache, true); } arena_t * diff --git a/src/jemalloc.c b/src/jemalloc.c index d1fa674c..94477914 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -367,6 +367,8 @@ arena_init_locked(unsigned ind) /* Expand arenas if necessary. */ assert(ind <= narenas_total); + if (ind > MALLOCX_ARENA_MAX) + return (NULL); if (ind == narenas_total) { unsigned narenas_new = narenas_total + 1; arena_t **arenas_new = @@ -1696,7 +1698,7 @@ irealloc_prof(tsd_t *tsd, void *oldptr, size_t old_usize, size_t usize) } JEMALLOC_INLINE_C void -ifree(tsd_t *tsd, void *ptr, bool try_tcache) +ifree(tsd_t *tsd, void *ptr, tcache_t *tcache) { size_t usize; UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0); @@ -1713,12 +1715,12 @@ ifree(tsd_t *tsd, void *ptr, bool try_tcache) *tsd_thread_deallocatedp_get(tsd) += usize; if (config_valgrind && unlikely(in_valgrind)) rzsize = p2rz(ptr); - iqalloc(tsd, ptr, try_tcache); + iqalloc(tsd, ptr, tcache); JEMALLOC_VALGRIND_FREE(ptr, rzsize); } JEMALLOC_INLINE_C void -isfree(tsd_t *tsd, void *ptr, size_t usize, bool try_tcache) +isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache) { UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0); @@ -1731,7 +1733,7 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, bool try_tcache) *tsd_thread_deallocatedp_get(tsd) += usize; if (config_valgrind && unlikely(in_valgrind)) rzsize = p2rz(ptr); - isqalloc(tsd, ptr, usize, try_tcache); + isqalloc(tsd, ptr, usize, tcache); JEMALLOC_VALGRIND_FREE(ptr, rzsize); } @@ -1749,7 +1751,7 @@ je_realloc(void *ptr, size_t size) /* realloc(ptr, 0) is equivalent to free(ptr). */ UTRACE(ptr, 0, 0); tsd = tsd_fetch(); - ifree(tsd, ptr, true); + ifree(tsd, ptr, tcache_get(tsd, false)); return (NULL); } size = 1; @@ -1802,8 +1804,10 @@ je_free(void *ptr) { UTRACE(ptr, 0, 0); - if (likely(ptr != NULL)) - ifree(tsd_fetch(), ptr, true); + if (likely(ptr != NULL)) { + tsd_t *tsd = tsd_fetch(); + ifree(tsd, ptr, tcache_get(tsd, false)); + } } /* @@ -1875,7 +1879,7 @@ JEMALLOC_EXPORT void *(*__memalign_hook)(size_t alignment, size_t size) = JEMALLOC_ALWAYS_INLINE_C bool imallocx_flags_decode_hard(tsd_t *tsd, size_t size, int flags, size_t *usize, - size_t *alignment, bool *zero, bool *try_tcache, arena_t **arena) + size_t *alignment, bool *zero, tcache_t **tcache, arena_t **arena) { if ((flags & MALLOCX_LG_ALIGN_MASK) == 0) { @@ -1886,22 +1890,26 @@ imallocx_flags_decode_hard(tsd_t *tsd, size_t size, int flags, size_t *usize, *usize = sa2u(size, *alignment); } *zero = MALLOCX_ZERO_GET(flags); + if ((flags & MALLOCX_TCACHE_MASK) != 0) { + if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE) + *tcache = NULL; + else + *tcache = tcaches_get(tsd, MALLOCX_TCACHE_GET(flags)); + } else + *tcache = tcache_get(tsd, true); if ((flags & MALLOCX_ARENA_MASK) != 0) { unsigned arena_ind = MALLOCX_ARENA_GET(flags); - *try_tcache = false; *arena = arena_get(tsd, arena_ind, true, true); if (unlikely(*arena == NULL)) return (true); - } else { - *try_tcache = true; + } else *arena = NULL; - } return (false); } JEMALLOC_ALWAYS_INLINE_C bool imallocx_flags_decode(tsd_t *tsd, size_t size, int flags, size_t *usize, - size_t *alignment, bool *zero, bool *try_tcache, arena_t **arena) + size_t *alignment, bool *zero, tcache_t **tcache, arena_t **arena) { if (likely(flags == 0)) { @@ -1909,55 +1917,53 @@ imallocx_flags_decode(tsd_t *tsd, size_t size, int flags, size_t *usize, assert(usize != 0); *alignment = 0; *zero = false; - *try_tcache = true; + *tcache = tcache_get(tsd, true); *arena = NULL; return (false); } else { return (imallocx_flags_decode_hard(tsd, size, flags, usize, - alignment, zero, try_tcache, arena)); + alignment, zero, tcache, arena)); } } JEMALLOC_ALWAYS_INLINE_C void * imallocx_flags(tsd_t *tsd, size_t usize, size_t alignment, bool zero, - bool try_tcache, arena_t *arena) + tcache_t *tcache, arena_t *arena) { - if (alignment != 0) { - return (ipalloct(tsd, usize, alignment, zero, try_tcache, - arena)); - } + if (alignment != 0) + return (ipalloct(tsd, usize, alignment, zero, tcache, arena)); if (zero) - return (icalloct(tsd, usize, try_tcache, arena)); - return (imalloct(tsd, usize, try_tcache, arena)); + return (icalloct(tsd, usize, tcache, arena)); + return (imalloct(tsd, usize, tcache, arena)); } JEMALLOC_ALWAYS_INLINE_C void * imallocx_maybe_flags(tsd_t *tsd, size_t size, int flags, size_t usize, - size_t alignment, bool zero, bool try_tcache, arena_t *arena) + size_t alignment, bool zero, tcache_t *tcache, arena_t *arena) { if (likely(flags == 0)) return (imalloc(tsd, size)); - return (imallocx_flags(tsd, usize, alignment, zero, try_tcache, arena)); + return (imallocx_flags(tsd, usize, alignment, zero, tcache, arena)); } static void * imallocx_prof_sample(tsd_t *tsd, size_t size, int flags, size_t usize, - size_t alignment, bool zero, bool try_tcache, arena_t *arena) + size_t alignment, bool zero, tcache_t *tcache, arena_t *arena) { void *p; if (usize <= SMALL_MAXCLASS) { assert(((alignment == 0) ? s2u(LARGE_MINCLASS) : sa2u(LARGE_MINCLASS, alignment)) == LARGE_MINCLASS); - p = imalloct(tsd, LARGE_MINCLASS, try_tcache, arena); + p = imalloct(tsd, LARGE_MINCLASS, tcache, arena); if (p == NULL) return (NULL); arena_prof_promoted(p, usize); } else { p = imallocx_maybe_flags(tsd, size, flags, usize, alignment, - zero, try_tcache, arena); + zero, tcache, arena); } return (p); @@ -1969,20 +1975,20 @@ imallocx_prof(tsd_t *tsd, size_t size, int flags, size_t *usize) void *p; size_t alignment; bool zero; - bool try_tcache; + tcache_t *tcache; arena_t *arena; prof_tctx_t *tctx; if (unlikely(imallocx_flags_decode(tsd, size, flags, usize, &alignment, - &zero, &try_tcache, &arena))) + &zero, &tcache, &arena))) return (NULL); tctx = prof_alloc_prep(tsd, *usize, true); if (likely((uintptr_t)tctx == (uintptr_t)1U)) { p = imallocx_maybe_flags(tsd, size, flags, *usize, alignment, - zero, try_tcache, arena); + zero, tcache, arena); } else if ((uintptr_t)tctx > (uintptr_t)1U) { p = imallocx_prof_sample(tsd, size, flags, *usize, alignment, - zero, try_tcache, arena); + zero, tcache, arena); } else p = NULL; if (unlikely(p == NULL)) { @@ -1999,7 +2005,7 @@ imallocx_no_prof(tsd_t *tsd, size_t size, int flags, size_t *usize) { size_t alignment; bool zero; - bool try_tcache; + tcache_t *tcache; arena_t *arena; if (likely(flags == 0)) { @@ -2009,10 +2015,9 @@ imallocx_no_prof(tsd_t *tsd, size_t size, int flags, size_t *usize) } if (unlikely(imallocx_flags_decode_hard(tsd, size, flags, usize, - &alignment, &zero, &try_tcache, &arena))) + &alignment, &zero, &tcache, &arena))) return (NULL); - return (imallocx_flags(tsd, *usize, alignment, zero, try_tcache, - arena)); + return (imallocx_flags(tsd, *usize, alignment, zero, tcache, arena)); } void * @@ -2053,8 +2058,8 @@ label_oom: static void * irallocx_prof_sample(tsd_t *tsd, void *oldptr, size_t old_usize, size_t size, - size_t alignment, size_t usize, bool zero, bool try_tcache_alloc, - bool try_tcache_dalloc, arena_t *arena, prof_tctx_t *tctx) + size_t alignment, size_t usize, bool zero, tcache_t *tcache, arena_t *arena, + prof_tctx_t *tctx) { void *p; @@ -2062,13 +2067,13 @@ irallocx_prof_sample(tsd_t *tsd, void *oldptr, size_t old_usize, size_t size, return (NULL); if (usize <= SMALL_MAXCLASS) { p = iralloct(tsd, oldptr, old_usize, LARGE_MINCLASS, alignment, - zero, try_tcache_alloc, try_tcache_dalloc, arena); + zero, tcache, arena); if (p == NULL) return (NULL); arena_prof_promoted(p, usize); } else { p = iralloct(tsd, oldptr, old_usize, size, alignment, zero, - try_tcache_alloc, try_tcache_dalloc, arena); + tcache, arena); } return (p); @@ -2076,8 +2081,8 @@ irallocx_prof_sample(tsd_t *tsd, void *oldptr, size_t old_usize, size_t size, JEMALLOC_ALWAYS_INLINE_C void * irallocx_prof(tsd_t *tsd, void *oldptr, size_t old_usize, size_t size, - size_t alignment, size_t *usize, bool zero, bool try_tcache_alloc, - bool try_tcache_dalloc, arena_t *arena) + size_t alignment, size_t *usize, bool zero, tcache_t *tcache, + arena_t *arena) { void *p; prof_tctx_t *old_tctx, *tctx; @@ -2086,11 +2091,10 @@ irallocx_prof(tsd_t *tsd, void *oldptr, size_t old_usize, size_t size, tctx = prof_alloc_prep(tsd, *usize, false); if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) { p = irallocx_prof_sample(tsd, oldptr, old_usize, size, - alignment, *usize, zero, try_tcache_alloc, - try_tcache_dalloc, arena, tctx); + alignment, *usize, zero, tcache, arena, tctx); } else { p = iralloct(tsd, oldptr, old_usize, size, alignment, zero, - try_tcache_alloc, try_tcache_dalloc, arena); + tcache, arena); } if (unlikely(p == NULL)) { prof_alloc_rollback(tsd, tctx, false); @@ -2123,8 +2127,8 @@ je_rallocx(void *ptr, size_t size, int flags) UNUSED size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0); size_t alignment = MALLOCX_ALIGN_GET(flags); bool zero = flags & MALLOCX_ZERO; - bool try_tcache_alloc, try_tcache_dalloc; arena_t *arena; + tcache_t *tcache; assert(ptr != NULL); assert(size != 0); @@ -2134,18 +2138,19 @@ je_rallocx(void *ptr, size_t size, int flags) if (unlikely((flags & MALLOCX_ARENA_MASK) != 0)) { unsigned arena_ind = MALLOCX_ARENA_GET(flags); - arena_chunk_t *chunk; - try_tcache_alloc = false; - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); arena = arena_get(tsd, arena_ind, true, true); if (unlikely(arena == NULL)) goto label_oom; - try_tcache_dalloc = (chunk == ptr || chunk->arena != arena); - } else { - try_tcache_alloc = true; - try_tcache_dalloc = true; + } else arena = NULL; - } + + if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) { + if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE) + tcache = NULL; + else + tcache = tcaches_get(tsd, MALLOCX_TCACHE_GET(flags)); + } else + tcache = tcache_get(tsd, true); old_usize = isalloc(ptr, config_prof); if (config_valgrind && unlikely(in_valgrind)) @@ -2155,12 +2160,12 @@ je_rallocx(void *ptr, size_t size, int flags) usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment); assert(usize != 0); p = irallocx_prof(tsd, ptr, old_usize, size, alignment, &usize, - zero, try_tcache_alloc, try_tcache_dalloc, arena); + zero, tcache, arena); if (unlikely(p == NULL)) goto label_oom; } else { p = iralloct(tsd, ptr, old_usize, size, alignment, zero, - try_tcache_alloc, try_tcache_dalloc, arena); + tcache, arena); if (unlikely(p == NULL)) goto label_oom; if (config_stats || (config_valgrind && unlikely(in_valgrind))) @@ -2319,28 +2324,22 @@ void je_dallocx(void *ptr, int flags) { tsd_t *tsd; - bool try_tcache; + tcache_t *tcache; assert(ptr != NULL); assert(malloc_initialized() || IS_INITIALIZER); tsd = tsd_fetch(); - if (unlikely((flags & MALLOCX_ARENA_MASK) != 0)) { - unsigned arena_ind = MALLOCX_ARENA_GET(flags); - arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - arena_t *arena = arena_get(tsd, arena_ind, true, true); - /* - * If arena is NULL, the application passed an arena that has - * never been used before, which is unsupported during - * deallocation. - */ - assert(arena != NULL); - try_tcache = (chunk == ptr || chunk->arena != arena); + if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) { + if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE) + tcache = NULL; + else + tcache = tcaches_get(tsd, MALLOCX_TCACHE_GET(flags)); } else - try_tcache = true; + tcache = tcache_get(tsd, false); UTRACE(ptr, 0, 0); - ifree(tsd_fetch(), ptr, try_tcache); + ifree(tsd_fetch(), ptr, tcache); } JEMALLOC_ALWAYS_INLINE_C size_t @@ -2360,7 +2359,7 @@ void je_sdallocx(void *ptr, size_t size, int flags) { tsd_t *tsd; - bool try_tcache; + tcache_t *tcache; size_t usize; assert(ptr != NULL); @@ -2369,21 +2368,16 @@ je_sdallocx(void *ptr, size_t size, int flags) assert(usize == isalloc(ptr, config_prof)); tsd = tsd_fetch(); - if (unlikely((flags & MALLOCX_ARENA_MASK) != 0)) { - unsigned arena_ind = MALLOCX_ARENA_GET(flags); - arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - arena_t *arena = arena_get(tsd, arena_ind, true, true); - /* - * If arena is NULL, the application passed an arena that has - * never been used before, which is unsupported during - * deallocation. - */ - try_tcache = (chunk == ptr || chunk->arena != arena); + if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) { + if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE) + tcache = NULL; + else + tcache = tcaches_get(tsd, MALLOCX_TCACHE_GET(flags)); } else - try_tcache = true; + tcache = tcache_get(tsd, false); UTRACE(ptr, 0, 0); - isfree(tsd, ptr, usize, try_tcache); + isfree(tsd, ptr, usize, tcache); } size_t diff --git a/src/prof.c b/src/prof.c index 04b2591c..4f1580b0 100644 --- a/src/prof.c +++ b/src/prof.c @@ -540,7 +540,8 @@ prof_gctx_create(tsd_t *tsd, prof_bt_t *bt) * Create a single allocation that has space for vec of length bt->len. */ prof_gctx_t *gctx = (prof_gctx_t *)iallocztm(tsd, offsetof(prof_gctx_t, - vec) + (bt->len * sizeof(void *)), false, true, true, NULL); + vec) + (bt->len * sizeof(void *)), false, tcache_get(tsd, true), + true, NULL); if (gctx == NULL) return (NULL); gctx->lock = prof_gctx_mutex_choose(); @@ -581,7 +582,7 @@ prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx, prof_leave(tsd, tdata_self); /* Destroy gctx. */ malloc_mutex_unlock(gctx->lock); - idalloctm(tsd, gctx, true, true); + idalloctm(tsd, gctx, tcache_get(tsd, false), true); } else { /* * Compensate for increment in prof_tctx_destroy() or @@ -681,7 +682,7 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) prof_tdata_destroy(tsd, tdata, false); if (destroy_tctx) - idalloctm(tsd, tctx, true, true); + idalloctm(tsd, tctx, tcache_get(tsd, false), true); } static bool @@ -710,7 +711,7 @@ prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata, if (ckh_insert(tsd, &bt2gctx, btkey.v, gctx.v)) { /* OOM. */ prof_leave(tsd, tdata); - idalloctm(tsd, gctx.v, true, true); + idalloctm(tsd, gctx.v, tcache_get(tsd, false), true); return (true); } new_gctx = true; @@ -754,6 +755,7 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) ret.p->prepared = true; malloc_mutex_unlock(tdata->lock); if (not_found) { + tcache_t *tcache; void *btkey; prof_gctx_t *gctx; bool new_gctx, error; @@ -767,7 +769,8 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) return (NULL); /* Link a prof_tctx_t into gctx for this thread. */ - ret.v = iallocztm(tsd, sizeof(prof_tctx_t), false, true, true, + tcache = tcache_get(tsd, true); + ret.v = iallocztm(tsd, sizeof(prof_tctx_t), false, tcache, true, NULL); if (ret.p == NULL) { if (new_gctx) @@ -786,7 +789,7 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) if (error) { if (new_gctx) prof_gctx_try_destroy(tsd, tdata, gctx, tdata); - idalloctm(tsd, ret.v, true, true); + idalloctm(tsd, ret.v, tcache, true); return (NULL); } malloc_mutex_lock(gctx->lock); @@ -1166,7 +1169,8 @@ prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs) to_destroy); tctx_tree_remove(&gctx->tctxs, to_destroy); - idalloctm(tsd, to_destroy, true, true); + idalloctm(tsd, to_destroy, + tcache_get(tsd, false), true); } else next = NULL; } while (next != NULL); @@ -1644,12 +1648,14 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim, char *thread_name, bool active) { prof_tdata_t *tdata; + tcache_t *tcache; cassert(config_prof); /* Initialize an empty cache for this thread. */ + tcache = tcache_get(tsd, true); tdata = (prof_tdata_t *)iallocztm(tsd, sizeof(prof_tdata_t), false, - true, true, NULL); + tcache, true, NULL); if (tdata == NULL) return (NULL); @@ -1662,7 +1668,7 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim, if (ckh_new(tsd, &tdata->bt2tctx, PROF_CKH_MINITEMS, prof_bt_hash, prof_bt_keycomp)) { - idalloctm(tsd, tdata, true, true); + idalloctm(tsd, tdata, tcache, true); return (NULL); } @@ -1708,16 +1714,18 @@ static void prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata, bool even_if_attached) { + tcache_t *tcache; assert(prof_tdata_should_destroy(tdata, even_if_attached)); assert(tsd_prof_tdata_get(tsd) != tdata); tdata_tree_remove(&tdatas, tdata); + tcache = tcache_get(tsd, false); if (tdata->thread_name != NULL) - idalloctm(tsd, tdata->thread_name, true, true); + idalloctm(tsd, tdata->thread_name, tcache, true); ckh_delete(tsd, &tdata->bt2tctx); - idalloctm(tsd, tdata, true, true); + idalloctm(tsd, tdata, tcache, true); } static void @@ -1878,7 +1886,7 @@ prof_thread_name_alloc(tsd_t *tsd, const char *thread_name) if (size == 1) return (""); - ret = iallocztm(tsd, size, false, true, true, NULL); + ret = iallocztm(tsd, size, false, tcache_get(tsd, true), true, NULL); if (ret == NULL) return (NULL); memcpy(ret, thread_name, size); @@ -1910,7 +1918,8 @@ prof_thread_name_set(tsd_t *tsd, const char *thread_name) return (EAGAIN); if (tdata->thread_name != NULL) { - idalloctm(tsd, tdata->thread_name, true, true); + idalloctm(tsd, tdata->thread_name, tcache_get(tsd, false), + true); tdata->thread_name = NULL; } if (strlen(s) > 0) diff --git a/src/quarantine.c b/src/quarantine.c index 094b44d3..adc7305d 100644 --- a/src/quarantine.c +++ b/src/quarantine.c @@ -27,8 +27,8 @@ quarantine_init(tsd_t *tsd, size_t lg_maxobjs) assert(tsd_nominal(tsd)); quarantine = (quarantine_t *)iallocztm(tsd, offsetof(quarantine_t, objs) - + ((ZU(1) << lg_maxobjs) * sizeof(quarantine_obj_t)), false, true, - true, NULL); + + ((ZU(1) << lg_maxobjs) * sizeof(quarantine_obj_t)), false, + tcache_get(tsd, true), true, NULL); if (quarantine == NULL) return (NULL); quarantine->curbytes = 0; @@ -55,7 +55,7 @@ quarantine_alloc_hook_work(tsd_t *tsd) if (tsd_quarantine_get(tsd) == NULL) tsd_quarantine_set(tsd, quarantine); else - idalloctm(tsd, quarantine, true, true); + idalloctm(tsd, quarantine, tcache_get(tsd, false), true); } static quarantine_t * @@ -87,7 +87,7 @@ quarantine_grow(tsd_t *tsd, quarantine_t *quarantine) memcpy(&ret->objs[ncopy_a], quarantine->objs, ncopy_b * sizeof(quarantine_obj_t)); } - idalloctm(tsd, quarantine, true, true); + idalloctm(tsd, quarantine, tcache_get(tsd, false), true); tsd_quarantine_set(tsd, ret); return (ret); @@ -177,7 +177,7 @@ quarantine_cleanup(tsd_t *tsd) quarantine = tsd_quarantine_get(tsd); if (quarantine != NULL) { quarantine_drain(tsd, quarantine, 0); - idalloctm(tsd, quarantine, true, true); + idalloctm(tsd, quarantine, tcache_get(tsd, false), true); tsd_quarantine_set(tsd, NULL); } } diff --git a/src/tcache.c b/src/tcache.c index d638015f..c7d4f784 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -13,6 +13,14 @@ static unsigned stack_nelms; /* Total stack elms per tcache. */ size_t nhbins; size_t tcache_maxclass; +tcaches_t *tcaches; + +/* Index of first element within tcaches that has never been used. */ +static unsigned tcaches_past; + +/* Head of singly linked list tracking available tcaches elements. */ +static tcaches_t *tcaches_avail; + /******************************************************************************/ size_t tcache_salloc(const void *ptr) @@ -22,7 +30,7 @@ size_t tcache_salloc(const void *ptr) } void -tcache_event_hard(tcache_t *tcache) +tcache_event_hard(tsd_t *tsd, tcache_t *tcache) { index_t binind = tcache->next_gc_bin; tcache_bin_t *tbin = &tcache->tbins[binind]; @@ -33,11 +41,11 @@ tcache_event_hard(tcache_t *tcache) * Flush (ceiling) 3/4 of the objects below the low water mark. */ if (binind < NBINS) { - tcache_bin_flush_small(tbin, binind, tbin->ncached - - tbin->low_water + (tbin->low_water >> 2), tcache); + tcache_bin_flush_small(tsd, tbin, binind, tbin->ncached + - tbin->low_water + (tbin->low_water >> 2), tcache); } else { - tcache_bin_flush_large(tbin, binind, tbin->ncached - - tbin->low_water + (tbin->low_water >> 2), tcache); + tcache_bin_flush_large(tsd, tbin, binind, tbin->ncached + - tbin->low_water + (tbin->low_water >> 2), tcache); } /* * Reduce fill count by 2X. Limit lg_fill_div such that the @@ -62,11 +70,12 @@ tcache_event_hard(tcache_t *tcache) } void * -tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, index_t binind) +tcache_alloc_small_hard(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin, + index_t binind) { void *ret; - arena_tcache_fill_small(tcache->arena, tbin, binind, + arena_tcache_fill_small(arena_choose(tsd, NULL), tbin, binind, config_prof ? tcache->prof_accumbytes : 0); if (config_prof) tcache->prof_accumbytes = 0; @@ -76,9 +85,10 @@ tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, index_t binind) } void -tcache_bin_flush_small(tcache_bin_t *tbin, index_t binind, unsigned rem, - tcache_t *tcache) +tcache_bin_flush_small(tsd_t *tsd, tcache_bin_t *tbin, index_t binind, + unsigned rem, tcache_t *tcache) { + arena_t *arena; void *ptr; unsigned i, nflush, ndeferred; bool merged_stats = false; @@ -86,21 +96,23 @@ tcache_bin_flush_small(tcache_bin_t *tbin, index_t binind, unsigned rem, assert(binind < NBINS); assert(rem <= tbin->ncached); + arena = arena_choose(tsd, NULL); + assert(arena != NULL); for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) { /* Lock the arena bin associated with the first object. */ arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE( tbin->avail[0]); - arena_t *arena = chunk->arena; + arena_t *bin_arena = chunk->arena; arena_bin_t *bin = &arena->bins[binind]; - if (config_prof && arena == tcache->arena) { + if (config_prof && bin_arena == arena) { if (arena_prof_accum(arena, tcache->prof_accumbytes)) prof_idump(); tcache->prof_accumbytes = 0; } malloc_mutex_lock(&bin->lock); - if (config_stats && arena == tcache->arena) { + if (config_stats && bin_arena == arena) { assert(!merged_stats); merged_stats = true; bin->stats.nflushes++; @@ -112,12 +124,12 @@ tcache_bin_flush_small(tcache_bin_t *tbin, index_t binind, unsigned rem, ptr = tbin->avail[i]; assert(ptr != NULL); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (chunk->arena == arena) { + if (chunk->arena == bin_arena) { size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; arena_chunk_map_bits_t *bitselm = arena_bitselm_get(chunk, pageind); - arena_dalloc_bin_junked_locked(arena, chunk, + arena_dalloc_bin_junked_locked(bin_arena, chunk, ptr, bitselm); } else { /* @@ -137,7 +149,7 @@ tcache_bin_flush_small(tcache_bin_t *tbin, index_t binind, unsigned rem, * The flush loop didn't happen to flush to this thread's * arena, so the stats didn't get merged. Manually do so now. */ - arena_bin_t *bin = &tcache->arena->bins[binind]; + arena_bin_t *bin = &arena->bins[binind]; malloc_mutex_lock(&bin->lock); bin->stats.nflushes++; bin->stats.nrequests += tbin->tstats.nrequests; @@ -153,9 +165,10 @@ tcache_bin_flush_small(tcache_bin_t *tbin, index_t binind, unsigned rem, } void -tcache_bin_flush_large(tcache_bin_t *tbin, index_t binind, unsigned rem, - tcache_t *tcache) +tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, index_t binind, + unsigned rem, tcache_t *tcache) { + arena_t *arena; void *ptr; unsigned i, nflush, ndeferred; bool merged_stats = false; @@ -163,17 +176,19 @@ tcache_bin_flush_large(tcache_bin_t *tbin, index_t binind, unsigned rem, assert(binind < nhbins); assert(rem <= tbin->ncached); + arena = arena_choose(tsd, NULL); + assert(arena != NULL); for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) { /* Lock the arena associated with the first object. */ arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE( tbin->avail[0]); - arena_t *arena = chunk->arena; + arena_t *locked_arena = chunk->arena; UNUSED bool idump; if (config_prof) idump = false; - malloc_mutex_lock(&arena->lock); - if ((config_prof || config_stats) && arena == tcache->arena) { + malloc_mutex_lock(&locked_arena->lock); + if ((config_prof || config_stats) && locked_arena == arena) { if (config_prof) { idump = arena_prof_accum_locked(arena, tcache->prof_accumbytes); @@ -193,9 +208,9 @@ tcache_bin_flush_large(tcache_bin_t *tbin, index_t binind, unsigned rem, ptr = tbin->avail[i]; assert(ptr != NULL); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (chunk->arena == arena) { - arena_dalloc_large_junked_locked(arena, chunk, - ptr); + if (chunk->arena == locked_arena) { + arena_dalloc_large_junked_locked(locked_arena, + chunk, ptr); } else { /* * This object was allocated via a different @@ -207,7 +222,7 @@ tcache_bin_flush_large(tcache_bin_t *tbin, index_t binind, unsigned rem, ndeferred++; } } - malloc_mutex_unlock(&arena->lock); + malloc_mutex_unlock(&locked_arena->lock); if (config_prof && idump) prof_idump(); } @@ -216,7 +231,6 @@ tcache_bin_flush_large(tcache_bin_t *tbin, index_t binind, unsigned rem, * The flush loop didn't happen to flush to this thread's * arena, so the stats didn't get merged. Manually do so now. */ - arena_t *arena = tcache->arena; malloc_mutex_lock(&arena->lock); arena->stats.nrequests_large += tbin->tstats.nrequests; arena->stats.lstats[binind - NBINS].nrequests += @@ -243,27 +257,37 @@ tcache_arena_associate(tcache_t *tcache, arena_t *arena) ql_tail_insert(&arena->tcache_ql, tcache, link); malloc_mutex_unlock(&arena->lock); } - tcache->arena = arena; } void -tcache_arena_reassociate(tcache_t *tcache, arena_t *arena) +tcache_arena_reassociate(tcache_t *tcache, arena_t *oldarena, arena_t *newarena) { - tcache_arena_dissociate(tcache); - tcache_arena_associate(tcache, arena); + tcache_arena_dissociate(tcache, oldarena); + tcache_arena_associate(tcache, newarena); } void -tcache_arena_dissociate(tcache_t *tcache) +tcache_arena_dissociate(tcache_t *tcache, arena_t *arena) { if (config_stats) { /* Unlink from list of extant tcaches. */ - malloc_mutex_lock(&tcache->arena->lock); - ql_remove(&tcache->arena->tcache_ql, tcache, link); - tcache_stats_merge(tcache, tcache->arena); - malloc_mutex_unlock(&tcache->arena->lock); + malloc_mutex_lock(&arena->lock); + if (config_debug) { + bool in_ql = false; + tcache_t *iter; + ql_foreach(iter, &arena->tcache_ql, link) { + if (iter == tcache) { + in_ql = true; + break; + } + } + assert(in_ql); + } + ql_remove(&arena->tcache_ql, tcache, link); + tcache_stats_merge(tcache, arena); + malloc_mutex_unlock(&arena->lock); } } @@ -298,7 +322,7 @@ tcache_create(tsd_t *tsd, arena_t *arena) /* Avoid false cacheline sharing. */ size = sa2u(size, CACHELINE); - tcache = ipallocztm(tsd, size, CACHELINE, true, false, true, arena); + tcache = ipallocztm(tsd, size, CACHELINE, true, false, true, a0get()); if (tcache == NULL) return (NULL); @@ -318,16 +342,17 @@ tcache_create(tsd_t *tsd, arena_t *arena) static void tcache_destroy(tsd_t *tsd, tcache_t *tcache) { + arena_t *arena; unsigned i; - tcache_arena_dissociate(tcache); + arena = arena_choose(tsd, NULL); + tcache_arena_dissociate(tcache, arena); for (i = 0; i < NBINS; i++) { tcache_bin_t *tbin = &tcache->tbins[i]; - tcache_bin_flush_small(tbin, i, 0, tcache); + tcache_bin_flush_small(tsd, tbin, i, 0, tcache); if (config_stats && tbin->tstats.nrequests != 0) { - arena_t *arena = tcache->arena; arena_bin_t *bin = &arena->bins[i]; malloc_mutex_lock(&bin->lock); bin->stats.nrequests += tbin->tstats.nrequests; @@ -337,10 +362,9 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache) for (; i < nhbins; i++) { tcache_bin_t *tbin = &tcache->tbins[i]; - tcache_bin_flush_large(tbin, i, 0, tcache); + tcache_bin_flush_large(tsd, tbin, i, 0, tcache); if (config_stats && tbin->tstats.nrequests != 0) { - arena_t *arena = tcache->arena; malloc_mutex_lock(&arena->lock); arena->stats.nrequests_large += tbin->tstats.nrequests; arena->stats.lstats[i - NBINS].nrequests += @@ -350,7 +374,7 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache) } if (config_prof && tcache->prof_accumbytes > 0 && - arena_prof_accum(tcache->arena, tcache->prof_accumbytes)) + arena_prof_accum(arena, tcache->prof_accumbytes)) prof_idump(); idalloctm(tsd, tcache, false, true); @@ -404,6 +428,66 @@ tcache_stats_merge(tcache_t *tcache, arena_t *arena) } } +bool +tcaches_create(tsd_t *tsd, unsigned *r_ind) +{ + tcache_t *tcache; + tcaches_t *elm; + + if (tcaches == NULL) { + tcaches = base_alloc(sizeof(tcache_t *) * + (MALLOCX_TCACHE_MAX+1)); + if (tcaches == NULL) + return (true); + } + + if (tcaches_avail == NULL && tcaches_past > MALLOCX_TCACHE_MAX) + return (true); + tcache = tcache_create(tsd, a0get()); + if (tcache == NULL) + return (true); + + if (tcaches_avail != NULL) { + elm = tcaches_avail; + tcaches_avail = tcaches_avail->next; + elm->tcache = tcache; + *r_ind = (elm - tcaches) / sizeof(tcaches_t); + } else { + elm = &tcaches[tcaches_past]; + elm->tcache = tcache; + *r_ind = tcaches_past; + tcaches_past++; + } + + return (false); +} + +static void +tcaches_elm_flush(tsd_t *tsd, tcaches_t *elm) +{ + + if (elm->tcache == NULL) + return; + tcache_destroy(tsd, elm->tcache); + elm->tcache = NULL; +} + +void +tcaches_flush(tsd_t *tsd, unsigned ind) +{ + + tcaches_elm_flush(tsd, &tcaches[ind]); +} + +void +tcaches_destroy(tsd_t *tsd, unsigned ind) +{ + tcaches_t *elm = &tcaches[ind]; + tcaches_elm_flush(tsd, elm); + elm->next = tcaches_avail; + tcaches_avail = elm; +} + bool tcache_boot(void) { diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c index f4b7d1ab..10a6fcd6 100644 --- a/test/unit/mallctl.c +++ b/test/unit/mallctl.c @@ -211,6 +211,114 @@ TEST_BEGIN(test_manpage_example) } TEST_END +TEST_BEGIN(test_tcache_none) +{ + void *p0, *q, *p1; + + test_skip_if(!config_tcache); + + /* Allocate p and q. */ + p0 = mallocx(42, 0); + assert_ptr_not_null(p0, "Unexpected mallocx() failure"); + q = mallocx(42, 0); + assert_ptr_not_null(q, "Unexpected mallocx() failure"); + + /* Deallocate p and q, but bypass the tcache for q. */ + dallocx(p0, 0); + dallocx(q, MALLOCX_TCACHE_NONE); + + /* Make sure that tcache-based allocation returns p, not q. */ + p1 = mallocx(42, 0); + assert_ptr_not_null(p1, "Unexpected mallocx() failure"); + assert_ptr_eq(p0, p1, "Expected tcache to allocate cached region"); + + /* Clean up. */ + dallocx(p1, MALLOCX_TCACHE_NONE); +} +TEST_END + +TEST_BEGIN(test_tcache) +{ +#define NTCACHES 10 + unsigned tis[NTCACHES]; + void *ps[NTCACHES]; + void *qs[NTCACHES]; + unsigned i; + size_t sz, psz, qsz; + + test_skip_if(!config_tcache); + + psz = 42; + qsz = nallocx(psz, 0) + 1; + + /* Create tcaches. */ + for (i = 0; i < NTCACHES; i++) { + sz = sizeof(unsigned); + assert_d_eq(mallctl("tcache.create", &tis[i], &sz, NULL, 0), 0, + "Unexpected mallctl() failure, i=%u", i); + } + + /* Flush empty tcaches. */ + for (i = 0; i < NTCACHES; i++) { + assert_d_eq(mallctl("tcache.flush", NULL, NULL, &tis[i], + sizeof(unsigned)), 0, "Unexpected mallctl() failure, i=%u", + i); + } + + /* Cache some allocations. */ + for (i = 0; i < NTCACHES; i++) { + ps[i] = mallocx(psz, MALLOCX_TCACHE(tis[i])); + assert_ptr_not_null(ps[i], "Unexpected mallocx() failure, i=%u", + i); + dallocx(ps[i], MALLOCX_TCACHE(tis[i])); + + qs[i] = mallocx(qsz, MALLOCX_TCACHE(tis[i])); + assert_ptr_not_null(qs[i], "Unexpected mallocx() failure, i=%u", + i); + dallocx(qs[i], MALLOCX_TCACHE(tis[i])); + } + + /* Verify that tcaches allocate cached regions. */ + for (i = 0; i < NTCACHES; i++) { + void *p0 = ps[i]; + ps[i] = mallocx(psz, MALLOCX_TCACHE(tis[i])); + assert_ptr_not_null(ps[i], "Unexpected mallocx() failure, i=%u", + i); + assert_ptr_eq(ps[i], p0, + "Expected mallocx() to allocate cached region, i=%u", i); + } + + /* Verify that reallocation uses cached regions. */ + for (i = 0; i < NTCACHES; i++) { + void *q0 = qs[i]; + qs[i] = rallocx(ps[i], qsz, MALLOCX_TCACHE(tis[i])); + assert_ptr_not_null(qs[i], "Unexpected rallocx() failure, i=%u", + i); + assert_ptr_eq(qs[i], q0, + "Expected rallocx() to allocate cached region, i=%u", i); + /* Avoid undefined behavior in case of test failure. */ + if (qs[i] == NULL) + qs[i] = ps[i]; + } + for (i = 0; i < NTCACHES; i++) + dallocx(qs[i], MALLOCX_TCACHE(tis[i])); + + /* Flush some non-empty tcaches. */ + for (i = 0; i < NTCACHES/2; i++) { + assert_d_eq(mallctl("tcache.flush", NULL, NULL, &tis[i], + sizeof(unsigned)), 0, "Unexpected mallctl() failure, i=%u", + i); + } + + /* Destroy tcaches. */ + for (i = 0; i < NTCACHES; i++) { + assert_d_eq(mallctl("tcache.destroy", NULL, NULL, &tis[i], + sizeof(unsigned)), 0, "Unexpected mallctl() failure, i=%u", + i); + } +} +TEST_END + TEST_BEGIN(test_thread_arena) { unsigned arena_old, arena_new, narenas; @@ -431,6 +539,8 @@ main(void) test_mallctl_config, test_mallctl_opt, test_manpage_example, + test_tcache_none, + test_tcache, test_thread_arena, test_arena_i_purge, test_arena_i_dss, From 9e561e8d3f3c625b98b57df069eeac0fa2f522fb Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 10 Feb 2015 09:03:48 -0800 Subject: [PATCH 0635/3378] Test and fix tcache ID recycling. --- src/tcache.c | 2 +- test/unit/mallctl.c | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/src/tcache.c b/src/tcache.c index c7d4f784..9fe78c39 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -451,7 +451,7 @@ tcaches_create(tsd_t *tsd, unsigned *r_ind) elm = tcaches_avail; tcaches_avail = tcaches_avail->next; elm->tcache = tcache; - *r_ind = (elm - tcaches) / sizeof(tcaches_t); + *r_ind = elm - tcaches; } else { elm = &tcaches[tcaches_past]; elm->tcache = tcache; diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c index 10a6fcd6..5960496f 100644 --- a/test/unit/mallctl.c +++ b/test/unit/mallctl.c @@ -258,6 +258,18 @@ TEST_BEGIN(test_tcache) "Unexpected mallctl() failure, i=%u", i); } + /* Exercise tcache ID recycling. */ + for (i = 0; i < NTCACHES; i++) { + assert_d_eq(mallctl("tcache.destroy", NULL, NULL, &tis[i], + sizeof(unsigned)), 0, "Unexpected mallctl() failure, i=%u", + i); + } + for (i = 0; i < NTCACHES; i++) { + sz = sizeof(unsigned); + assert_d_eq(mallctl("tcache.create", &tis[i], &sz, NULL, 0), 0, + "Unexpected mallctl() failure, i=%u", i); + } + /* Flush empty tcaches. */ for (i = 0; i < NTCACHES; i++) { assert_d_eq(mallctl("tcache.flush", NULL, NULL, &tis[i], From 051eae8cc591dfa2955cbfa73aae79ab53620c08 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 10 Feb 2015 16:05:52 -0800 Subject: [PATCH 0636/3378] Remove unnecessary xchg* lock prefixes. --- include/jemalloc/internal/atomic.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h index f8bd62ec..af2c6874 100644 --- a/include/jemalloc/internal/atomic.h +++ b/include/jemalloc/internal/atomic.h @@ -119,7 +119,7 @@ atomic_write_uint64(uint64_t *p, uint64_t x) { asm volatile ( - "lock; xchgq %1, %0;" + "xchgq %1, %0;" /* Lock is implied by xchgq. */ : "=m" (*p), "+r" (x) /* Outputs. */ : "m" (*p) /* Inputs. */ : "memory" /* Clobbers. */ @@ -343,7 +343,7 @@ atomic_write_uint32(uint32_t *p, uint32_t x) { asm volatile ( - "lock; xchgl %1, %0;" + "xchgl %1, %0;" /* Lock is implied by xchgl. */ : "=m" (*p), "+r" (x) /* Outputs. */ : "m" (*p) /* Inputs. */ : "memory" /* Clobbers. */ From 064dbfbaf76617643bbbe66cbcc880e7ee9ec00f Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 12 Feb 2015 00:09:37 -0800 Subject: [PATCH 0637/3378] Fix a regression in tcache_bin_flush_small(). Fix a serious regression in tcache_bin_flush_small() that was introduced by 1cb181ed632e7573fb4eab194e4d216867222d27 (Implement explicit tcache support.). --- src/tcache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tcache.c b/src/tcache.c index 9fe78c39..1166d60f 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -103,7 +103,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_bin_t *tbin, index_t binind, arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE( tbin->avail[0]); arena_t *bin_arena = chunk->arena; - arena_bin_t *bin = &arena->bins[binind]; + arena_bin_t *bin = &bin_arena->bins[binind]; if (config_prof && bin_arena == arena) { if (arena_prof_accum(arena, tcache->prof_accumbytes)) From f30e261c5b85d2900224f91c6d426a23dce94fe9 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 12 Feb 2015 00:12:44 -0800 Subject: [PATCH 0638/3378] Update ckh to support metadata allocation tracking. --- src/ckh.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/src/ckh.c b/src/ckh.c index ad075d60..da78d1b4 100644 --- a/src/ckh.c +++ b/src/ckh.c @@ -270,8 +270,8 @@ ckh_grow(tsd_t *tsd, ckh_t *ckh) ret = true; goto label_return; } - tab = (ckhc_t *)ipalloct(tsd, usize, CACHELINE, true, NULL, - NULL); + tab = (ckhc_t *)ipallocztm(tsd, usize, CACHELINE, true, NULL, + true, NULL); if (tab == NULL) { ret = true; goto label_return; @@ -283,12 +283,12 @@ ckh_grow(tsd_t *tsd, ckh_t *ckh) ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS; if (!ckh_rebuild(ckh, tab)) { - idalloc(tsd, tab); + idalloctm(tsd, tab, tcache_get(tsd, false), true); break; } /* Rebuilding failed, so back out partially rebuilt table. */ - idalloc(tsd, ckh->tab); + idalloctm(tsd, ckh->tab, tcache_get(tsd, false), true); ckh->tab = tab; ckh->lg_curbuckets = lg_prevbuckets; } @@ -314,7 +314,8 @@ ckh_shrink(tsd_t *tsd, ckh_t *ckh) usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE); if (usize == 0) return; - tab = (ckhc_t *)ipalloct(tsd, usize, CACHELINE, true, NULL, NULL); + tab = (ckhc_t *)ipallocztm(tsd, usize, CACHELINE, true, NULL, true, + NULL); if (tab == NULL) { /* * An OOM error isn't worth propagating, since it doesn't @@ -329,7 +330,7 @@ ckh_shrink(tsd_t *tsd, ckh_t *ckh) ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS; if (!ckh_rebuild(ckh, tab)) { - idalloc(tsd, tab); + idalloctm(tsd, tab, tcache_get(tsd, false), true); #ifdef CKH_COUNT ckh->nshrinks++; #endif @@ -337,7 +338,7 @@ ckh_shrink(tsd_t *tsd, ckh_t *ckh) } /* Rebuilding failed, so back out partially rebuilt table. */ - idalloc(tsd, ckh->tab); + idalloctm(tsd, ckh->tab, tcache_get(tsd, false), true); ckh->tab = tab; ckh->lg_curbuckets = lg_prevbuckets; #ifdef CKH_COUNT @@ -390,7 +391,8 @@ ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ret = true; goto label_return; } - ckh->tab = (ckhc_t *)ipalloct(tsd, usize, CACHELINE, true, NULL, NULL); + ckh->tab = (ckhc_t *)ipallocztm(tsd, usize, CACHELINE, true, NULL, true, + NULL); if (ckh->tab == NULL) { ret = true; goto label_return; @@ -419,7 +421,7 @@ ckh_delete(tsd_t *tsd, ckh_t *ckh) (unsigned long long)ckh->nrelocs); #endif - idalloc(tsd, ckh->tab); + idalloctm(tsd, ckh->tab, tcache_get(tsd, false), true); if (config_debug) memset(ckh, 0x5a, sizeof(ckh_t)); } From cbf3a6d70371d2390b8b0e76814e04cc6088002c Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 11 Feb 2015 12:24:27 -0800 Subject: [PATCH 0639/3378] Move centralized chunk management into arenas. Migrate all centralized data structures related to huge allocations and recyclable chunks into arena_t, so that each arena can manage huge allocations and recyclable virtual memory completely independently of other arenas. Add chunk node caching to arenas, in order to avoid contention on the base allocator. Use chunks_rtree to look up huge allocations rather than a red-black tree. Maintain a per arena unsorted list of huge allocations (which will be needed to enumerate huge allocations during arena reset). Remove the --enable-ivsalloc option, make ivsalloc() always available, and use it for size queries if --enable-debug is enabled. The only practical implications to this removal are that 1) ivsalloc() is now always available during live debugging (and the underlying radix tree is available during core-based debugging), and 2) size query validation can no longer be enabled independent of --enable-debug. Remove the stats.chunks.{current,total,high} mallctls, and replace their underlying statistics with simpler atomically updated counters used exclusively for gdump triggering. These statistics are no longer very useful because each arena manages chunks independently, and per arena statistics provide similar information. Simplify chunk synchronization code, now that base chunk allocation cannot cause recursive lock acquisition. --- INSTALL | 6 - configure.ac | 22 +- doc/jemalloc.xml.in | 35 +-- include/jemalloc/internal/arena.h | 60 +++- include/jemalloc/internal/atomic.h | 4 +- include/jemalloc/internal/base.h | 2 - include/jemalloc/internal/chunk.h | 22 +- include/jemalloc/internal/chunk_dss.h | 4 +- include/jemalloc/internal/ctl.h | 5 - include/jemalloc/internal/extent.h | 25 +- include/jemalloc/internal/huge.h | 4 - .../jemalloc/internal/jemalloc_internal.h.in | 28 +- .../internal/jemalloc_internal_defs.h.in | 6 - include/jemalloc/internal/private_symbols.txt | 12 +- include/jemalloc/internal/rtree.h | 23 +- include/jemalloc/internal/stats.h | 15 - src/arena.c | 74 ++++- src/base.c | 65 ++--- src/chunk.c | 275 +++++++----------- src/chunk_dss.c | 5 +- src/ctl.c | 26 +- src/huge.c | 169 ++++++----- src/jemalloc.c | 15 +- src/stats.c | 12 - src/tcache.c | 8 +- test/unit/stats.c | 27 -- 26 files changed, 394 insertions(+), 555 deletions(-) diff --git a/INSTALL b/INSTALL index b8459a81..517fe021 100644 --- a/INSTALL +++ b/INSTALL @@ -92,7 +92,6 @@ any of the following arguments (not a definitive list) to 'configure': --enable-debug Enable assertions and validation code. This incurs a substantial performance hit, but is very useful during application development. - Implies --enable-ivsalloc. --enable-code-coverage Enable code coverage support, for use during jemalloc test development. @@ -107,11 +106,6 @@ any of the following arguments (not a definitive list) to 'configure': there are interactions between the various coverage targets, so it is usually advisable to run 'make clean' between repeated code coverage runs. ---enable-ivsalloc - Enable validation code, which verifies that pointers reside within - jemalloc-owned chunks before dereferencing them. This incurs a substantial - performance hit. - --disable-stats Disable statistics gathering functionality. See the "opt.stats_print" option documentation for usage details. diff --git a/configure.ac b/configure.ac index dc8aa02c..2922880a 100644 --- a/configure.ac +++ b/configure.ac @@ -625,7 +625,7 @@ fi dnl Do not compile with debugging by default. AC_ARG_ENABLE([debug], - [AS_HELP_STRING([--enable-debug], [Build debugging code (implies --enable-ivsalloc)])], + [AS_HELP_STRING([--enable-debug], [Build debugging code])], [if test "x$enable_debug" = "xno" ; then enable_debug="0" else @@ -634,27 +634,8 @@ fi ], [enable_debug="0"] ) -if test "x$enable_debug" = "x1" ; then - AC_DEFINE([JEMALLOC_DEBUG], [ ]) - enable_ivsalloc="1" -fi AC_SUBST([enable_debug]) -dnl Do not validate pointers by default. -AC_ARG_ENABLE([ivsalloc], - [AS_HELP_STRING([--enable-ivsalloc], [Validate pointers passed through the public API])], -[if test "x$enable_ivsalloc" = "xno" ; then - enable_ivsalloc="0" -else - enable_ivsalloc="1" -fi -], -[enable_ivsalloc="0"] -) -if test "x$enable_ivsalloc" = "x1" ; then - AC_DEFINE([JEMALLOC_IVSALLOC], [ ]) -fi - dnl Only optimize if not debugging. if test "x$enable_debug" = "x0" -a "x$no_CFLAGS" = "xyes" ; then dnl Make sure that an optimization flag was not specified in EXTRA_CFLAGS. @@ -1401,7 +1382,6 @@ if test "x${enable_zone_allocator}" = "x1" ; then if test "x${abi}" != "xmacho"; then AC_MSG_ERROR([--enable-zone-allocator is only supported on Darwin]) fi - AC_DEFINE([JEMALLOC_IVSALLOC], [ ]) AC_DEFINE([JEMALLOC_ZONE], [ ]) dnl The szone version jumped from 3 to 6 between the OS X 10.5.x and 10.6 diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index da800ded..b392fa9e 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -1847,7 +1847,7 @@ malloc_conf = "xmalloc:true";]]> equal to stats.allocated. This does not include - stats.arenas.<i>.pdirty and pages + stats.arenas.<i>.pdirty, nor pages entirely devoted to allocator metadata. @@ -1880,39 +1880,6 @@ malloc_conf = "xmalloc:true";]]> does not include inactive chunks. - - - stats.chunks.current - (size_t) - r- - [] - - Total number of chunks actively mapped on behalf of the - application. This does not include inactive chunks. - - - - - - stats.chunks.total - (uint64_t) - r- - [] - - Cumulative number of chunks allocated. - - - - - stats.chunks.high - (size_t) - r- - [] - - Maximum number of active chunks at any time thus far. - - - stats.arenas.<i>.dss diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 5476899d..2ae4609e 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -151,8 +151,12 @@ typedef ql_head(arena_chunk_map_misc_t) arena_chunk_miscelms_t; /* Arena chunk header. */ struct arena_chunk_s { - /* Arena that owns the chunk. */ - arena_t *arena; + /* + * The arena that owns the chunk is node.arena. This field as a whole + * is used by chunks_rtree to support both ivsalloc() and core-based + * debugging. + */ + extent_node_t node; /* * Map of pages within chunk that keeps track of free/large/small. The @@ -313,6 +317,27 @@ struct arena_s { /* List of dirty runs this arena manages. */ arena_chunk_miscelms_t runs_dirty; + /* Extant huge allocations. */ + ql_head(extent_node_t) huge; + /* Synchronizes all huge allocation/update/deallocation. */ + malloc_mutex_t huge_mtx; + + /* + * Trees of chunks that were previously allocated (trees differ only in + * node ordering). These are used when allocating chunks, in an attempt + * to re-use address space. Depending on function, different tree + * orderings are needed, which is why there are two trees with the same + * contents. + */ + extent_tree_t chunks_szad_mmap; + extent_tree_t chunks_ad_mmap; + extent_tree_t chunks_szad_dss; + extent_tree_t chunks_ad_dss; + malloc_mutex_t chunks_mtx; + /* Cache of nodes that were allocated via base_alloc(). */ + ql_head(extent_node_t) node_cache; + malloc_mutex_t node_cache_mtx; + /* * User-configurable chunk allocation and deallocation functions. */ @@ -338,6 +363,8 @@ extern size_t arena_maxclass; /* Max size class for arenas. */ extern unsigned nlclasses; /* Number of large size classes. */ extern unsigned nhclasses; /* Number of huge size classes. */ +extent_node_t *arena_node_alloc(arena_t *arena); +void arena_node_dalloc(arena_t *arena, extent_node_t *node); void *arena_chunk_alloc_huge(arena_t *arena, size_t usize, size_t alignment, bool *zero); void arena_chunk_dalloc_huge(arena_t *arena, void *chunk, size_t usize); @@ -453,8 +480,7 @@ void *arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, tcache_t *tcache); arena_t *arena_aalloc(const void *ptr); size_t arena_salloc(const void *ptr, bool demote); -void arena_dalloc(tsd_t *tsd, arena_chunk_t *chunk, void *ptr, - tcache_t *tcache); +void arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache); void arena_sdalloc(tsd_t *tsd, arena_chunk_t *chunk, void *ptr, size_t size, tcache_t *tcache); #endif @@ -792,7 +818,7 @@ arena_ptr_small_binind_get(const void *ptr, size_t mapbits) assert(binind != BININD_INVALID); assert(binind < NBINS); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - arena = chunk->arena; + arena = chunk->node.arena; pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; actual_mapbits = arena_mapbits_get(chunk, pageind); assert(mapbits == actual_mapbits); @@ -980,7 +1006,7 @@ arena_aalloc(const void *ptr) arena_chunk_t *chunk; chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - return (chunk->arena); + return (chunk->node.arena); } /* Return the size of the allocation pointed to by ptr. */ @@ -1024,11 +1050,18 @@ arena_salloc(const void *ptr, bool demote) } JEMALLOC_ALWAYS_INLINE void -arena_dalloc(tsd_t *tsd, arena_chunk_t *chunk, void *ptr, tcache_t *tcache) +arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache) { + arena_chunk_t *chunk; size_t pageind, mapbits; assert(ptr != NULL); + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + if (unlikely(chunk == ptr)) { + huge_dalloc(tsd, ptr, tcache); + return; + } assert(CHUNK_ADDR2BASE(ptr) != ptr); pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; @@ -1040,8 +1073,10 @@ arena_dalloc(tsd_t *tsd, arena_chunk_t *chunk, void *ptr, tcache_t *tcache) index_t binind = arena_ptr_small_binind_get(ptr, mapbits); tcache_dalloc_small(tsd, tcache, ptr, binind); - } else - arena_dalloc_small(chunk->arena, chunk, ptr, pageind); + } else { + arena_dalloc_small(chunk->node.arena, chunk, ptr, + pageind); + } } else { size_t size = arena_mapbits_large_size_get(chunk, pageind); @@ -1050,7 +1085,7 @@ arena_dalloc(tsd_t *tsd, arena_chunk_t *chunk, void *ptr, tcache_t *tcache) if (likely(tcache != NULL) && size <= tcache_maxclass) tcache_dalloc_large(tsd, tcache, ptr, size); else - arena_dalloc_large(chunk->arena, chunk, ptr); + arena_dalloc_large(chunk->node.arena, chunk, ptr); } } @@ -1081,7 +1116,8 @@ arena_sdalloc(tsd_t *tsd, arena_chunk_t *chunk, void *ptr, size_t size, } else { size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - arena_dalloc_small(chunk->arena, chunk, ptr, pageind); + arena_dalloc_small(chunk->node.arena, chunk, ptr, + pageind); } } else { assert(((uintptr_t)ptr & PAGE_MASK) == 0); @@ -1089,7 +1125,7 @@ arena_sdalloc(tsd_t *tsd, arena_chunk_t *chunk, void *ptr, size_t size, if (likely(tcache != NULL) && size <= tcache_maxclass) tcache_dalloc_large(tsd, tcache, ptr, size); else - arena_dalloc_large(chunk->arena, chunk, ptr); + arena_dalloc_large(chunk->node.arena, chunk, ptr); } } # endif /* JEMALLOC_ARENA_INLINE_B */ diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h index af2c6874..0d33065e 100644 --- a/include/jemalloc/internal/atomic.h +++ b/include/jemalloc/internal/atomic.h @@ -52,7 +52,7 @@ void atomic_write_uint32(uint32_t *p, uint32_t x); void *atomic_add_p(void **p, void *x); void *atomic_sub_p(void **p, void *x); bool atomic_cas_p(void **p, void *c, void *s); -void atomic_write_p(void **p, void *x); +void atomic_write_p(void **p, const void *x); size_t atomic_add_z(size_t *p, size_t x); size_t atomic_sub_z(size_t *p, size_t x); bool atomic_cas_z(size_t *p, size_t c, size_t s); @@ -538,7 +538,7 @@ atomic_cas_p(void **p, void *c, void *s) } JEMALLOC_INLINE void -atomic_write_p(void **p, void *x) +atomic_write_p(void **p, const void *x) { #if (LG_SIZEOF_PTR == 3) diff --git a/include/jemalloc/internal/base.h b/include/jemalloc/internal/base.h index a0798ee2..bec76b32 100644 --- a/include/jemalloc/internal/base.h +++ b/include/jemalloc/internal/base.h @@ -10,8 +10,6 @@ #ifdef JEMALLOC_H_EXTERNS void *base_alloc(size_t size); -extent_node_t *base_node_alloc(void); -void base_node_dalloc(extent_node_t *node); size_t base_allocated_get(void); bool base_boot(void); void base_prefork(void); diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h index 62ac3e73..5e0fb144 100644 --- a/include/jemalloc/internal/chunk.h +++ b/include/jemalloc/internal/chunk.h @@ -30,24 +30,21 @@ extern size_t opt_lg_chunk; extern const char *opt_dss; -/* Protects stats_chunks; currently not used for any other purpose. */ -extern malloc_mutex_t chunks_mtx; -/* Chunk statistics. */ -extern chunk_stats_t stats_chunks; - extern rtree_t chunks_rtree; extern size_t chunksize; extern size_t chunksize_mask; /* (chunksize - 1). */ extern size_t chunk_npages; +bool chunk_register(const void *chunk, const extent_node_t *node); +void chunk_deregister(const void *chunk, const extent_node_t *node); void *chunk_alloc_base(size_t size); void *chunk_alloc_arena(chunk_alloc_t *chunk_alloc, chunk_dalloc_t *chunk_dalloc, unsigned arena_ind, void *new_addr, size_t size, size_t alignment, bool *zero); void *chunk_alloc_default(void *new_addr, size_t size, size_t alignment, bool *zero, unsigned arena_ind); -void chunk_unmap(void *chunk, size_t size); +void chunk_unmap(arena_t *arena, void *chunk, size_t size); bool chunk_dalloc_default(void *chunk, size_t size, unsigned arena_ind); bool chunk_boot(void); void chunk_prefork(void); @@ -58,6 +55,19 @@ void chunk_postfork_child(void); /******************************************************************************/ #ifdef JEMALLOC_H_INLINES +#ifndef JEMALLOC_ENABLE_INLINE +extent_node_t *chunk_lookup(const void *chunk); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_CHUNK_C_)) +JEMALLOC_INLINE extent_node_t * +chunk_lookup(const void *chunk) +{ + + return (rtree_get(&chunks_rtree, (uintptr_t)chunk)); +} +#endif + #endif /* JEMALLOC_H_INLINES */ /******************************************************************************/ diff --git a/include/jemalloc/internal/chunk_dss.h b/include/jemalloc/internal/chunk_dss.h index 09896470..87366a28 100644 --- a/include/jemalloc/internal/chunk_dss.h +++ b/include/jemalloc/internal/chunk_dss.h @@ -23,8 +23,8 @@ extern const char *dss_prec_names[]; dss_prec_t chunk_dss_prec_get(void); bool chunk_dss_prec_set(dss_prec_t dss_prec); -void *chunk_alloc_dss(void *new_addr, size_t size, size_t alignment, - bool *zero); +void *chunk_alloc_dss(arena_t *arena, void *new_addr, size_t size, + size_t alignment, bool *zero); bool chunk_in_dss(void *chunk); bool chunk_dss_boot(void); void chunk_dss_prefork(void); diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h index 65617bc9..ab9c9862 100644 --- a/include/jemalloc/internal/ctl.h +++ b/include/jemalloc/internal/ctl.h @@ -54,11 +54,6 @@ struct ctl_stats_s { size_t active; size_t metadata; size_t mapped; - struct { - size_t current; /* stats_chunks.curchunks */ - uint64_t total; /* stats_chunks.nchunks */ - size_t high; /* stats_chunks.highchunks */ - } chunks; unsigned narenas; ctl_arena_stats_t *arenas; /* (narenas + 1) elements. */ }; diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h index f45940c1..fbcdcf99 100644 --- a/include/jemalloc/internal/extent.h +++ b/include/jemalloc/internal/extent.h @@ -9,21 +9,17 @@ typedef struct extent_node_s extent_node_t; /* Tree of extents. */ struct extent_node_s { - /* Linkage for the size/address-ordered tree. */ - rb_node(extent_node_t) link_szad; - - /* Linkage for the address-ordered tree. */ - rb_node(extent_node_t) link_ad; + /* Arena from which this extent came, if any. */ + arena_t *arena; /* Pointer to the extent that this tree node is responsible for. */ void *addr; - /* Total region size. */ + /* + * Total region size, or 0 if this node corresponds to an arena chunk. + */ size_t size; - /* Arena from which this extent came, if any. */ - arena_t *arena; - /* * 'prof_tctx' and 'zeroed' are never needed at the same time, so * overlay them in order to fit extent_node_t in one cache line. @@ -35,6 +31,17 @@ struct extent_node_s { /* True if zero-filled; used by chunk recycling code. */ bool zeroed; }; + + union { + /* Linkage for the size/address-ordered tree. */ + rb_node(extent_node_t) link_szad; + + /* Linkage for huge allocations and cached chunks nodes. */ + ql_elm(extent_node_t) link_ql; + }; + + /* Linkage for the address-ordered tree. */ + rb_node(extent_node_t) link_ad; }; typedef rb_tree(extent_node_t) extent_tree_t; diff --git a/include/jemalloc/internal/huge.h b/include/jemalloc/internal/huge.h index 231cc368..c478d16a 100644 --- a/include/jemalloc/internal/huge.h +++ b/include/jemalloc/internal/huge.h @@ -27,10 +27,6 @@ arena_t *huge_aalloc(const void *ptr); size_t huge_salloc(const void *ptr); prof_tctx_t *huge_prof_tctx_get(const void *ptr); void huge_prof_tctx_set(const void *ptr, prof_tctx_t *tctx); -bool huge_boot(void); -void huge_prefork(void); -void huge_postfork_parent(void); -void huge_postfork_child(void); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index b8c994cb..ab93aa52 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -119,13 +119,6 @@ static const bool config_xmalloc = false #endif ; -static const bool config_ivsalloc = -#ifdef JEMALLOC_IVSALLOC - true -#else - false -#endif - ; #ifdef JEMALLOC_C11ATOMICS #include @@ -352,9 +345,9 @@ typedef unsigned index_t; #include "jemalloc/internal/arena.h" #include "jemalloc/internal/bitmap.h" #include "jemalloc/internal/base.h" +#include "jemalloc/internal/rtree.h" #include "jemalloc/internal/chunk.h" #include "jemalloc/internal/huge.h" -#include "jemalloc/internal/rtree.h" #include "jemalloc/internal/tcache.h" #include "jemalloc/internal/hash.h" #include "jemalloc/internal/quarantine.h" @@ -378,9 +371,9 @@ typedef unsigned index_t; #include "jemalloc/internal/extent.h" #include "jemalloc/internal/arena.h" #include "jemalloc/internal/base.h" +#include "jemalloc/internal/rtree.h" #include "jemalloc/internal/chunk.h" #include "jemalloc/internal/huge.h" -#include "jemalloc/internal/rtree.h" #include "jemalloc/internal/tcache.h" #include "jemalloc/internal/hash.h" #include "jemalloc/internal/quarantine.h" @@ -457,9 +450,9 @@ void jemalloc_postfork_child(void); #include "jemalloc/internal/extent.h" #include "jemalloc/internal/arena.h" #include "jemalloc/internal/base.h" +#include "jemalloc/internal/rtree.h" #include "jemalloc/internal/chunk.h" #include "jemalloc/internal/huge.h" -#include "jemalloc/internal/rtree.h" #include "jemalloc/internal/tcache.h" #include "jemalloc/internal/hash.h" #include "jemalloc/internal/quarantine.h" @@ -483,6 +476,7 @@ void jemalloc_postfork_child(void); #include "jemalloc/internal/mb.h" #include "jemalloc/internal/extent.h" #include "jemalloc/internal/base.h" +#include "jemalloc/internal/rtree.h" #include "jemalloc/internal/chunk.h" #include "jemalloc/internal/huge.h" @@ -777,7 +771,6 @@ arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing, #endif #include "jemalloc/internal/bitmap.h" -#include "jemalloc/internal/rtree.h" /* * Include portions of arena.h interleaved with tcache.h in order to resolve * circular dependencies. @@ -966,10 +959,14 @@ ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero) JEMALLOC_ALWAYS_INLINE size_t ivsalloc(const void *ptr, bool demote) { + extent_node_t *node; /* Return 0 if ptr is not within a chunk managed by jemalloc. */ - if (rtree_get(&chunks_rtree, (uintptr_t)CHUNK_ADDR2BASE(ptr)) == 0) + node = chunk_lookup(CHUNK_ADDR2BASE(ptr)); + if (node == NULL) return (0); + /* Only arena chunks should be looked up via interior pointers. */ + assert(node->addr == ptr || node->size == 0); return (isalloc(ptr, demote)); } @@ -999,7 +996,6 @@ p2rz(const void *ptr) JEMALLOC_ALWAYS_INLINE void idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata) { - arena_chunk_t *chunk; assert(ptr != NULL); if (config_stats && is_metadata) { @@ -1007,11 +1003,7 @@ idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata) config_prof)); } - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (likely(chunk != ptr)) - arena_dalloc(tsd, chunk, ptr, tcache); - else - huge_dalloc(tsd, ptr, tcache); + arena_dalloc(tsd, ptr, tcache); } JEMALLOC_ALWAYS_INLINE void diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in index c8d7dafb..0f0db8a1 100644 --- a/include/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in @@ -186,12 +186,6 @@ #undef JEMALLOC_INTERNAL_FFSL #undef JEMALLOC_INTERNAL_FFS -/* - * JEMALLOC_IVSALLOC enables ivsalloc(), which verifies that pointers reside - * within jemalloc-owned chunks before dereferencing them. - */ -#undef JEMALLOC_IVSALLOC - /* * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings. */ diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index cf42bead..d5601a68 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -60,6 +60,8 @@ arena_miscelm_to_pageind arena_miscelm_to_rpages arena_nbound arena_new +arena_node_alloc +arena_node_dalloc arena_palloc arena_postfork_child arena_postfork_parent @@ -103,8 +105,6 @@ atomic_sub_z base_alloc base_allocated_get base_boot -base_node_alloc -base_node_dalloc base_postfork_child base_postfork_parent base_prefork @@ -130,6 +130,7 @@ chunk_alloc_mmap chunk_boot chunk_dalloc_default chunk_dalloc_mmap +chunk_deregister chunk_dss_boot chunk_dss_postfork_child chunk_dss_postfork_parent @@ -137,12 +138,13 @@ chunk_dss_prec_get chunk_dss_prec_set chunk_dss_prefork chunk_in_dss +chunk_lookup chunk_npages chunk_postfork_child chunk_postfork_parent chunk_prefork +chunk_register chunk_unmap -chunks_mtx chunks_rtree chunksize chunksize_mask @@ -218,16 +220,12 @@ hash_x86_128 hash_x86_32 huge_aalloc huge_allocated -huge_boot huge_dalloc huge_dalloc_junk huge_malloc huge_ndalloc huge_nmalloc huge_palloc -huge_postfork_child -huge_postfork_parent -huge_prefork huge_prof_tctx_get huge_prof_tctx_set huge_ralloc diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h index e86e17c4..2eb726d6 100644 --- a/include/jemalloc/internal/rtree.h +++ b/include/jemalloc/internal/rtree.h @@ -37,7 +37,7 @@ typedef void (rtree_node_dalloc_t)(rtree_node_elm_t *); struct rtree_node_elm_s { union { rtree_node_elm_t *child; - void *val; + extent_node_t *val; }; }; @@ -110,13 +110,14 @@ bool rtree_node_valid(rtree_node_elm_t *node); rtree_node_elm_t *rtree_child_tryread(rtree_node_elm_t *elm); rtree_node_elm_t *rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm, unsigned level); -void *rtree_val_read(rtree_t *rtree, rtree_node_elm_t *elm); -void rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm, void *val); +extent_node_t *rtree_val_read(rtree_t *rtree, rtree_node_elm_t *elm); +void rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm, + const extent_node_t *val); rtree_node_elm_t *rtree_subtree_tryread(rtree_t *rtree, unsigned level); rtree_node_elm_t *rtree_subtree_read(rtree_t *rtree, unsigned level); -void *rtree_get(rtree_t *rtree, uintptr_t key); -bool rtree_set(rtree_t *rtree, uintptr_t key, void *val); +extent_node_t *rtree_get(rtree_t *rtree, uintptr_t key); +bool rtree_set(rtree_t *rtree, uintptr_t key, const extent_node_t *val); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_RTREE_C_)) @@ -173,18 +174,18 @@ rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm, unsigned level) return (child); } -JEMALLOC_INLINE void * +JEMALLOC_INLINE extent_node_t * rtree_val_read(rtree_t *rtree, rtree_node_elm_t *elm) { - return (atomic_read_p(&elm->val)); + return (atomic_read_p((void **)&elm->val)); } JEMALLOC_INLINE void -rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm, void *val) +rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm, const extent_node_t *val) { - atomic_write_p(&elm->val, val); + atomic_write_p((void **)&elm->val, val); } JEMALLOC_INLINE rtree_node_elm_t * @@ -210,7 +211,7 @@ rtree_subtree_read(rtree_t *rtree, unsigned level) return (subtree); } -JEMALLOC_INLINE void * +JEMALLOC_INLINE extent_node_t * rtree_get(rtree_t *rtree, uintptr_t key) { uintptr_t subkey; @@ -238,7 +239,7 @@ rtree_get(rtree_t *rtree, uintptr_t key) } JEMALLOC_INLINE bool -rtree_set(rtree_t *rtree, uintptr_t key, void *val) +rtree_set(rtree_t *rtree, uintptr_t key, const extent_node_t *val) { uintptr_t subkey; unsigned i, start_level; diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h index 7cba77b9..c91dba99 100644 --- a/include/jemalloc/internal/stats.h +++ b/include/jemalloc/internal/stats.h @@ -135,21 +135,6 @@ struct arena_stats_s { malloc_huge_stats_t *hstats; }; -struct chunk_stats_s { - /* Number of chunks that were allocated. */ - uint64_t nchunks; - - /* High-water mark for number of chunks allocated. */ - size_t highchunks; - - /* - * Current number of chunks allocated. This value isn't maintained for - * any other purpose, so keep track of it in order to be able to set - * highchunks. - */ - size_t curchunks; -}; - #endif /* JEMALLOC_H_STRUCTS */ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS diff --git a/src/arena.c b/src/arena.c index 907fbd7f..2bd1a2c0 100644 --- a/src/arena.c +++ b/src/arena.c @@ -20,6 +20,7 @@ unsigned nhclasses; /* Number of huge size classes. */ * definition. */ +static void arena_chunk_dalloc(arena_t *arena, arena_chunk_t *chunk); static void arena_purge(arena_t *arena, bool all); static void arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned); @@ -392,8 +393,7 @@ arena_chunk_init_spare(arena_t *arena) } static arena_chunk_t * -arena_chunk_alloc_internal(arena_t *arena, size_t size, size_t alignment, - bool *zero) +arena_chunk_alloc_internal(arena_t *arena, bool *zero) { arena_chunk_t *chunk; chunk_alloc_t *chunk_alloc; @@ -403,7 +403,16 @@ arena_chunk_alloc_internal(arena_t *arena, size_t size, size_t alignment, chunk_dalloc = arena->chunk_dalloc; malloc_mutex_unlock(&arena->lock); chunk = (arena_chunk_t *)chunk_alloc_arena(chunk_alloc, chunk_dalloc, - arena->ind, NULL, size, alignment, zero); + arena->ind, NULL, chunksize, chunksize, zero); + if (chunk != NULL) { + chunk->node.arena = arena; + chunk->node.addr = chunk; + chunk->node.size = 0; /* Indicates this is an arena chunk. */ + if (chunk_register(chunk, &chunk->node)) { + chunk_dalloc((void *)chunk, chunksize, arena->ind); + chunk = NULL; + } + } malloc_mutex_lock(&arena->lock); if (config_stats && chunk != NULL) { arena->stats.mapped += chunksize; @@ -423,12 +432,10 @@ arena_chunk_init_hard(arena_t *arena) assert(arena->spare == NULL); zero = false; - chunk = arena_chunk_alloc_internal(arena, chunksize, chunksize, &zero); + chunk = arena_chunk_alloc_internal(arena, &zero); if (chunk == NULL) return (NULL); - chunk->arena = arena; - /* * Initialize the map to contain one maximal free untouched run. Mark * the pages as zeroed iff chunk_alloc() returned a zeroed chunk. @@ -514,6 +521,7 @@ arena_chunk_dalloc(arena_t *arena, arena_chunk_t *chunk) } chunk_dalloc = arena->chunk_dalloc; malloc_mutex_unlock(&arena->lock); + chunk_deregister(spare, &spare->node); chunk_dalloc((void *)spare, chunksize, arena->ind); malloc_mutex_lock(&arena->lock); if (config_stats) { @@ -593,6 +601,32 @@ arena_huge_ralloc_stats_update_undo(arena_t *arena, size_t oldsize, arena_huge_malloc_stats_update_undo(arena, usize); } +extent_node_t * +arena_node_alloc(arena_t *arena) +{ + extent_node_t *node; + + malloc_mutex_lock(&arena->node_cache_mtx); + node = ql_last(&arena->node_cache, link_ql); + if (node == NULL) { + malloc_mutex_unlock(&arena->node_cache_mtx); + return (base_alloc(sizeof(extent_node_t))); + } + ql_tail_remove(&arena->node_cache, extent_node_t, link_ql); + malloc_mutex_unlock(&arena->node_cache_mtx); + return (node); +} + +void +arena_node_dalloc(arena_t *arena, extent_node_t *node) +{ + + malloc_mutex_lock(&arena->node_cache_mtx); + ql_elm_new(node, link_ql); + ql_tail_insert(&arena->node_cache, node, link_ql); + malloc_mutex_unlock(&arena->node_cache_mtx); +} + void * arena_chunk_alloc_huge(arena_t *arena, size_t usize, size_t alignment, bool *zero) @@ -1782,7 +1816,7 @@ arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run, if (run == bin->runcur) bin->runcur = NULL; else { - index_t binind = arena_bin_index(chunk->arena, bin); + index_t binind = arena_bin_index(chunk->node.arena, bin); arena_bin_info_t *bin_info = &arena_bin_info[binind]; if (bin_info->nregs != 1) { @@ -2123,7 +2157,7 @@ arena_ralloc_large(void *ptr, size_t oldsize, size_t size, size_t extra, arena_t *arena; chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - arena = chunk->arena; + arena = chunk->node.arena; if (usize < oldsize) { /* Fill before shrinking in order avoid a race. */ @@ -2338,10 +2372,21 @@ arena_new(unsigned ind) arena->ind = ind; arena->nthreads = 0; + if (malloc_mutex_init(&arena->lock)) + return (NULL); arena->chunk_alloc = chunk_alloc_default; arena->chunk_dalloc = chunk_dalloc_default; - - if (malloc_mutex_init(&arena->lock)) + ql_new(&arena->huge); + if (malloc_mutex_init(&arena->huge_mtx)) + return (NULL); + extent_tree_szad_new(&arena->chunks_szad_mmap); + extent_tree_ad_new(&arena->chunks_ad_mmap); + extent_tree_szad_new(&arena->chunks_szad_dss); + extent_tree_ad_new(&arena->chunks_ad_dss); + ql_new(&arena->node_cache); + if (malloc_mutex_init(&arena->chunks_mtx)) + return (NULL); + if (malloc_mutex_init(&arena->node_cache_mtx)) return (NULL); if (config_stats) { @@ -2551,6 +2596,9 @@ arena_prefork(arena_t *arena) unsigned i; malloc_mutex_prefork(&arena->lock); + malloc_mutex_prefork(&arena->huge_mtx); + malloc_mutex_prefork(&arena->chunks_mtx); + malloc_mutex_prefork(&arena->node_cache_mtx); for (i = 0; i < NBINS; i++) malloc_mutex_prefork(&arena->bins[i].lock); } @@ -2562,6 +2610,9 @@ arena_postfork_parent(arena_t *arena) for (i = 0; i < NBINS; i++) malloc_mutex_postfork_parent(&arena->bins[i].lock); + malloc_mutex_postfork_parent(&arena->node_cache_mtx); + malloc_mutex_postfork_parent(&arena->chunks_mtx); + malloc_mutex_postfork_parent(&arena->huge_mtx); malloc_mutex_postfork_parent(&arena->lock); } @@ -2572,5 +2623,8 @@ arena_postfork_child(arena_t *arena) for (i = 0; i < NBINS; i++) malloc_mutex_postfork_child(&arena->bins[i].lock); + malloc_mutex_postfork_child(&arena->node_cache_mtx); + malloc_mutex_postfork_child(&arena->chunks_mtx); + malloc_mutex_postfork_child(&arena->huge_mtx); malloc_mutex_postfork_child(&arena->lock); } diff --git a/src/base.c b/src/base.c index 0d1de7fc..7b5804ee 100644 --- a/src/base.c +++ b/src/base.c @@ -11,8 +11,9 @@ static size_t base_allocated; /******************************************************************************/ +/* base_mtx must be held. */ static extent_node_t * -base_node_try_alloc_locked(void) +base_node_try_alloc(void) { extent_node_t *node; @@ -24,8 +25,9 @@ base_node_try_alloc_locked(void) return (node); } +/* base_mtx must be held. */ static void -base_node_dalloc_locked(extent_node_t *node) +base_node_dalloc(extent_node_t *node) { JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(node, sizeof(extent_node_t)); @@ -42,14 +44,14 @@ base_chunk_alloc(size_t minsize) void *addr; assert(minsize != 0); - node = base_node_try_alloc_locked(); + node = base_node_try_alloc(); /* Allocate enough space to also carve a node out if necessary. */ nsize = (node == NULL) ? CACHELINE_CEILING(sizeof(extent_node_t)) : 0; csize = CHUNK_CEILING(minsize + nsize); addr = chunk_alloc_base(csize); if (addr == NULL) { if (node != NULL) - base_node_dalloc_locked(node); + base_node_dalloc(node); return (NULL); } if (node == NULL) { @@ -63,8 +65,13 @@ base_chunk_alloc(size_t minsize) return (node); } -static void * -base_alloc_locked(size_t size) +/* + * base_alloc() guarantees demand-zeroed memory, in order to make multi-page + * sparse data structures such as radix tree nodes efficient with respect to + * physical memory usage. + */ +void * +base_alloc(size_t size) { void *ret; size_t csize; @@ -79,6 +86,7 @@ base_alloc_locked(size_t size) key.addr = NULL; key.size = csize; + malloc_mutex_lock(&base_mtx); node = extent_tree_szad_nsearch(&base_avail_szad, &key); if (node != NULL) { /* Use existing space. */ @@ -87,8 +95,10 @@ base_alloc_locked(size_t size) /* Try to allocate more space. */ node = base_chunk_alloc(csize); } - if (node == NULL) - return (NULL); + if (node == NULL) { + ret = NULL; + goto label_return; + } ret = node->addr; if (node->size > csize) { @@ -96,50 +106,15 @@ base_alloc_locked(size_t size) node->size -= csize; extent_tree_szad_insert(&base_avail_szad, node); } else - base_node_dalloc_locked(node); + base_node_dalloc(node); if (config_stats) base_allocated += csize; JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, csize); - return (ret); -} - -/* - * base_alloc() guarantees demand-zeroed memory, in order to make multi-page - * sparse data structures such as radix tree nodes efficient with respect to - * physical memory usage. - */ -void * -base_alloc(size_t size) -{ - void *ret; - - malloc_mutex_lock(&base_mtx); - ret = base_alloc_locked(size); +label_return: malloc_mutex_unlock(&base_mtx); return (ret); } -extent_node_t * -base_node_alloc(void) -{ - extent_node_t *ret; - - malloc_mutex_lock(&base_mtx); - if ((ret = base_node_try_alloc_locked()) == NULL) - ret = (extent_node_t *)base_alloc_locked(sizeof(extent_node_t)); - malloc_mutex_unlock(&base_mtx); - return (ret); -} - -void -base_node_dalloc(extent_node_t *node) -{ - - malloc_mutex_lock(&base_mtx); - base_node_dalloc_locked(node); - malloc_mutex_unlock(&base_mtx); -} - size_t base_allocated_get(void) { diff --git a/src/chunk.c b/src/chunk.c index 9ba0b0cf..6f705ded 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -7,19 +7,9 @@ const char *opt_dss = DSS_DEFAULT; size_t opt_lg_chunk = LG_CHUNK_DEFAULT; -malloc_mutex_t chunks_mtx; -chunk_stats_t stats_chunks; - -/* - * Trees of chunks that were previously allocated (trees differ only in node - * ordering). These are used when allocating chunks, in an attempt to re-use - * address space. Depending on function, different tree orderings are needed, - * which is why there are two trees with the same contents. - */ -static extent_tree_t chunks_szad_mmap; -static extent_tree_t chunks_ad_mmap; -static extent_tree_t chunks_szad_dss; -static extent_tree_t chunks_ad_dss; +/* Used exclusively for gdump triggering. */ +static size_t curchunks; +static size_t highchunks; rtree_t chunks_rtree; @@ -29,18 +19,51 @@ size_t chunksize_mask; /* (chunksize - 1). */ size_t chunk_npages; /******************************************************************************/ -/* - * Function prototypes for static functions that are referenced prior to - * definition. - */ -static void chunk_dalloc_core(void *chunk, size_t size); +bool +chunk_register(const void *chunk, const extent_node_t *node) +{ -/******************************************************************************/ + assert(node->addr == chunk); + + if (rtree_set(&chunks_rtree, (uintptr_t)chunk, node)) + return (true); + if (config_prof && opt_prof) { + size_t nadd = (node->size == 0) ? 1 : node->size / chunksize; + size_t cur = atomic_add_z(&curchunks, nadd); + size_t high = atomic_read_z(&highchunks); + while (cur > high && atomic_cas_z(&highchunks, high, cur)) { + /* + * Don't refresh cur, because it may have decreased + * since this thread lost the highchunks update race. + */ + high = atomic_read_z(&highchunks); + } + if (cur > high && prof_gdump_get_unlocked()) + prof_gdump(); + } + + return (false); +} + +void +chunk_deregister(const void *chunk, const extent_node_t *node) +{ + bool err; + + err = rtree_set(&chunks_rtree, (uintptr_t)chunk, NULL); + assert(!err); + if (config_prof && opt_prof) { + size_t nsub = (node->size == 0) ? 1 : node->size / chunksize; + assert(atomic_read_z(&curchunks) >= nsub); + atomic_sub_z(&curchunks, nsub); + } +} static void * -chunk_recycle(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, - void *new_addr, size_t size, size_t alignment, bool base, bool *zero) +chunk_recycle(arena_t *arena, extent_tree_t *chunks_szad, + extent_tree_t *chunks_ad, void *new_addr, size_t size, size_t alignment, + bool *zero) { void *ret; extent_node_t *node; @@ -50,27 +73,17 @@ chunk_recycle(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, assert(new_addr == NULL || alignment == chunksize); - if (base) { - /* - * This function may need to call base_node_{,de}alloc(), but - * the current chunk allocation request is on behalf of the - * base allocator. Avoid deadlock (and if that weren't an - * issue, potential for infinite recursion) by returning NULL. - */ - return (NULL); - } - alloc_size = size + alignment - chunksize; /* Beware size_t wrap-around. */ if (alloc_size < size) return (NULL); key.addr = new_addr; key.size = alloc_size; - malloc_mutex_lock(&chunks_mtx); + malloc_mutex_lock(&arena->chunks_mtx); node = (new_addr != NULL) ? extent_tree_ad_search(chunks_ad, &key) : extent_tree_szad_nsearch(chunks_szad, &key); if (node == NULL) { - malloc_mutex_unlock(&chunks_mtx); + malloc_mutex_unlock(&arena->chunks_mtx); return (NULL); } leadsize = ALIGNMENT_CEILING((uintptr_t)node->addr, alignment) - @@ -95,20 +108,12 @@ chunk_recycle(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, if (trailsize != 0) { /* Insert the trailing space as a smaller chunk. */ if (node == NULL) { - /* - * An additional node is required, but - * base_node_alloc() can cause a new base chunk to be - * allocated. Drop chunks_mtx in order to avoid - * deadlock, and if node allocation fails, deallocate - * the result before returning an error. - */ - malloc_mutex_unlock(&chunks_mtx); - node = base_node_alloc(); + node = arena_node_alloc(arena); if (node == NULL) { - chunk_dalloc_core(ret, size); + malloc_mutex_unlock(&arena->chunks_mtx); + chunk_unmap(arena, ret, size); return (NULL); } - malloc_mutex_lock(&chunks_mtx); } node->addr = (void *)((uintptr_t)(ret) + size); node->size = trailsize; @@ -117,10 +122,10 @@ chunk_recycle(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, extent_tree_ad_insert(chunks_ad, node); node = NULL; } - malloc_mutex_unlock(&chunks_mtx); + malloc_mutex_unlock(&arena->chunks_mtx); if (node != NULL) - base_node_dalloc(node); + arena_node_dalloc(arena, node); if (*zero) { if (!zeroed) memset(ret, 0, size); @@ -137,15 +142,15 @@ chunk_recycle(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, } static void * -chunk_alloc_core_dss(void *new_addr, size_t size, size_t alignment, bool base, - bool *zero) +chunk_alloc_core_dss(arena_t *arena, void *new_addr, size_t size, + size_t alignment, bool *zero) { void *ret; - if ((ret = chunk_recycle(&chunks_szad_dss, &chunks_ad_dss, - new_addr, size, alignment, base, zero)) != NULL) + if ((ret = chunk_recycle(arena, &arena->chunks_szad_dss, + &arena->chunks_ad_dss, new_addr, size, alignment, zero)) != NULL) return (ret); - ret = chunk_alloc_dss(new_addr, size, alignment, zero); + ret = chunk_alloc_dss(arena, new_addr, size, alignment, zero); return (ret); } @@ -156,7 +161,7 @@ chunk_alloc_core_dss(void *new_addr, size_t size, size_t alignment, bool base, * them if they are returned. */ static void * -chunk_alloc_core(void *new_addr, size_t size, size_t alignment, bool base, +chunk_alloc_core(arena_t *arena, void *new_addr, size_t size, size_t alignment, bool *zero, dss_prec_t dss_prec) { void *ret; @@ -168,12 +173,13 @@ chunk_alloc_core(void *new_addr, size_t size, size_t alignment, bool base, /* "primary" dss. */ if (have_dss && dss_prec == dss_prec_primary && (ret = - chunk_alloc_core_dss(new_addr, size, alignment, base, zero)) != + chunk_alloc_core_dss(arena, new_addr, size, alignment, zero)) != NULL) return (ret); /* mmap. */ - if (!config_munmap && (ret = chunk_recycle(&chunks_szad_mmap, - &chunks_ad_mmap, new_addr, size, alignment, base, zero)) != NULL) + if (!config_munmap && (ret = chunk_recycle(arena, + &arena->chunks_szad_mmap, &arena->chunks_ad_mmap, new_addr, size, + alignment, zero)) != NULL) return (ret); /* * Requesting an address is not implemented for chunk_alloc_mmap(), so @@ -184,7 +190,7 @@ chunk_alloc_core(void *new_addr, size_t size, size_t alignment, bool base, return (ret); /* "secondary" dss. */ if (have_dss && dss_prec == dss_prec_secondary && (ret = - chunk_alloc_core_dss(new_addr, size, alignment, base, zero)) != + chunk_alloc_core_dss(arena, new_addr, size, alignment, zero)) != NULL) return (ret); @@ -192,40 +198,6 @@ chunk_alloc_core(void *new_addr, size_t size, size_t alignment, bool base, return (NULL); } -static bool -chunk_register(void *chunk, size_t size, bool base) -{ - - assert(chunk != NULL); - assert(CHUNK_ADDR2BASE(chunk) == chunk); - - if (config_ivsalloc && !base) { - if (rtree_set(&chunks_rtree, (uintptr_t)chunk, chunk)) - return (true); - } - if (config_stats || config_prof) { - bool gdump; - malloc_mutex_lock(&chunks_mtx); - if (config_stats) - stats_chunks.nchunks += (size / chunksize); - stats_chunks.curchunks += (size / chunksize); - if (stats_chunks.curchunks > stats_chunks.highchunks) { - stats_chunks.highchunks = - stats_chunks.curchunks; - if (config_prof) - gdump = true; - } else if (config_prof) - gdump = false; - malloc_mutex_unlock(&chunks_mtx); - if (config_prof && opt_prof && prof_gdump_get_unlocked() && - gdump) - prof_gdump(); - } - if (config_valgrind) - JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(chunk, size); - return (false); -} - void * chunk_alloc_base(size_t size) { @@ -239,10 +211,10 @@ chunk_alloc_base(size_t size) */ zero = true; ret = chunk_alloc_mmap(size, chunksize, &zero); - if (ret != NULL && chunk_register(ret, size, true)) { - chunk_dalloc_core(ret, size); - ret = NULL; - } + if (ret == NULL) + return (NULL); + if (config_valgrind) + JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, size); return (ret); } @@ -255,18 +227,16 @@ chunk_alloc_arena(chunk_alloc_t *chunk_alloc, chunk_dalloc_t *chunk_dalloc, void *ret; ret = chunk_alloc(new_addr, size, alignment, zero, arena_ind); - if (ret != NULL && chunk_register(ret, size, false)) { - chunk_dalloc(ret, size, arena_ind); - ret = NULL; - } + if (ret == NULL) + return (NULL); + if (config_valgrind) + JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, size); return (ret); } -/* Default arena chunk allocation routine in the absence of user override. */ -void * -chunk_alloc_default(void *new_addr, size_t size, size_t alignment, bool *zero, - unsigned arena_ind) +static arena_t * +chunk_arena_get(unsigned arena_ind) { arena_t *arena; @@ -278,32 +248,32 @@ chunk_alloc_default(void *new_addr, size_t size, size_t alignment, bool *zero, * already. */ assert(arena != NULL); + return (arena); +} - return (chunk_alloc_core(new_addr, size, alignment, false, zero, +/* Default arena chunk allocation routine in the absence of user override. */ +void * +chunk_alloc_default(void *new_addr, size_t size, size_t alignment, bool *zero, + unsigned arena_ind) +{ + arena_t *arena; + + arena = chunk_arena_get(arena_ind); + return (chunk_alloc_core(arena, new_addr, size, alignment, zero, arena->dss_prec)); } static void -chunk_record(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, void *chunk, - size_t size) +chunk_record(arena_t *arena, extent_tree_t *chunks_szad, + extent_tree_t *chunks_ad, void *chunk, size_t size) { bool unzeroed; - extent_node_t *xnode, *node, *prev, *xprev, key; + extent_node_t *node, *prev, key; unzeroed = pages_purge(chunk, size); JEMALLOC_VALGRIND_MAKE_MEM_NOACCESS(chunk, size); - /* - * Allocate a node before acquiring chunks_mtx even though it might not - * be needed, because base_node_alloc() may cause a new base chunk to - * be allocated, which could cause deadlock if chunks_mtx were already - * held. - */ - xnode = base_node_alloc(); - /* Use xprev to implement conditional deferred deallocation of prev. */ - xprev = NULL; - - malloc_mutex_lock(&chunks_mtx); + malloc_mutex_lock(&arena->chunks_mtx); key.addr = (void *)((uintptr_t)chunk + size); node = extent_tree_ad_nsearch(chunks_ad, &key); /* Try to coalesce forward. */ @@ -320,17 +290,16 @@ chunk_record(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, void *chunk, extent_tree_szad_insert(chunks_szad, node); } else { /* Coalescing forward failed, so insert a new node. */ - if (xnode == NULL) { + node = arena_node_alloc(arena); + if (node == NULL) { /* - * base_node_alloc() failed, which is an exceedingly + * Node allocation failed, which is an exceedingly * unlikely failure. Leak chunk; its pages have * already been purged, so this is only a virtual * memory leak. */ goto label_return; } - node = xnode; - xnode = NULL; /* Prevent deallocation below. */ node->addr = chunk; node->size = size; node->zeroed = !unzeroed; @@ -356,37 +325,15 @@ chunk_record(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, void *chunk, node->zeroed = (node->zeroed && prev->zeroed); extent_tree_szad_insert(chunks_szad, node); - xprev = prev; + arena_node_dalloc(arena, prev); } label_return: - malloc_mutex_unlock(&chunks_mtx); - /* - * Deallocate xnode and/or xprev after unlocking chunks_mtx in order to - * avoid potential deadlock. - */ - if (xnode != NULL) - base_node_dalloc(xnode); - if (xprev != NULL) - base_node_dalloc(xprev); + malloc_mutex_unlock(&arena->chunks_mtx); } void -chunk_unmap(void *chunk, size_t size) -{ - assert(chunk != NULL); - assert(CHUNK_ADDR2BASE(chunk) == chunk); - assert(size != 0); - assert((size & chunksize_mask) == 0); - - if (have_dss && chunk_in_dss(chunk)) - chunk_record(&chunks_szad_dss, &chunks_ad_dss, chunk, size); - else if (chunk_dalloc_mmap(chunk, size)) - chunk_record(&chunks_szad_mmap, &chunks_ad_mmap, chunk, size); -} - -static void -chunk_dalloc_core(void *chunk, size_t size) +chunk_unmap(arena_t *arena, void *chunk, size_t size) { assert(chunk != NULL); @@ -394,16 +341,13 @@ chunk_dalloc_core(void *chunk, size_t size) assert(size != 0); assert((size & chunksize_mask) == 0); - if (config_ivsalloc) - rtree_set(&chunks_rtree, (uintptr_t)chunk, NULL); - if (config_stats || config_prof) { - malloc_mutex_lock(&chunks_mtx); - assert(stats_chunks.curchunks >= (size / chunksize)); - stats_chunks.curchunks -= (size / chunksize); - malloc_mutex_unlock(&chunks_mtx); + if (have_dss && chunk_in_dss(chunk)) { + chunk_record(arena, &arena->chunks_szad_dss, + &arena->chunks_ad_dss, chunk, size); + } else if (chunk_dalloc_mmap(chunk, size)) { + chunk_record(arena, &arena->chunks_szad_mmap, + &arena->chunks_ad_mmap, chunk, size); } - - chunk_unmap(chunk, size); } /* Default arena chunk deallocation routine in the absence of user override. */ @@ -411,7 +355,7 @@ bool chunk_dalloc_default(void *chunk, size_t size, unsigned arena_ind) { - chunk_dalloc_core(chunk, size); + chunk_unmap(chunk_arena_get(arena_ind), chunk, size); return (false); } @@ -433,21 +377,11 @@ chunk_boot(void) chunksize_mask = chunksize - 1; chunk_npages = (chunksize >> LG_PAGE); - if (malloc_mutex_init(&chunks_mtx)) - return (true); - if (config_stats || config_prof) - memset(&stats_chunks, 0, sizeof(chunk_stats_t)); if (have_dss && chunk_dss_boot()) return (true); - extent_tree_szad_new(&chunks_szad_mmap); - extent_tree_ad_new(&chunks_ad_mmap); - extent_tree_szad_new(&chunks_szad_dss); - extent_tree_ad_new(&chunks_ad_dss); - if (config_ivsalloc) { - if (rtree_new(&chunks_rtree, (ZU(1) << (LG_SIZEOF_PTR+3)) - - opt_lg_chunk, chunks_rtree_node_alloc, NULL)) - return (true); - } + if (rtree_new(&chunks_rtree, (ZU(1) << (LG_SIZEOF_PTR+3)) - + opt_lg_chunk, chunks_rtree_node_alloc, NULL)) + return (true); return (false); } @@ -456,7 +390,6 @@ void chunk_prefork(void) { - malloc_mutex_prefork(&chunks_mtx); chunk_dss_prefork(); } @@ -465,7 +398,6 @@ chunk_postfork_parent(void) { chunk_dss_postfork_parent(); - malloc_mutex_postfork_parent(&chunks_mtx); } void @@ -473,5 +405,4 @@ chunk_postfork_child(void) { chunk_dss_postfork_child(); - malloc_mutex_postfork_child(&chunks_mtx); } diff --git a/src/chunk_dss.c b/src/chunk_dss.c index edba3b23..9c3eea82 100644 --- a/src/chunk_dss.c +++ b/src/chunk_dss.c @@ -66,7 +66,8 @@ chunk_dss_prec_set(dss_prec_t dss_prec) } void * -chunk_alloc_dss(void *new_addr, size_t size, size_t alignment, bool *zero) +chunk_alloc_dss(arena_t *arena, void *new_addr, size_t size, size_t alignment, + bool *zero) { void *ret; @@ -133,7 +134,7 @@ chunk_alloc_dss(void *new_addr, size_t size, size_t alignment, bool *zero) dss_max = dss_next; malloc_mutex_unlock(&dss_mtx); if (cpad_size != 0) - chunk_unmap(cpad, cpad_size); + chunk_unmap(arena, cpad, cpad_size); if (*zero) { JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED( ret, size); diff --git a/src/ctl.c b/src/ctl.c index a2838032..cd7927fc 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -144,9 +144,6 @@ CTL_PROTO(prof_gdump) CTL_PROTO(prof_reset) CTL_PROTO(prof_interval) CTL_PROTO(lg_prof_sample) -CTL_PROTO(stats_chunks_current) -CTL_PROTO(stats_chunks_total) -CTL_PROTO(stats_chunks_high) CTL_PROTO(stats_arenas_i_small_allocated) CTL_PROTO(stats_arenas_i_small_nmalloc) CTL_PROTO(stats_arenas_i_small_ndalloc) @@ -363,12 +360,6 @@ static const ctl_named_node_t prof_node[] = { {NAME("lg_sample"), CTL(lg_prof_sample)} }; -static const ctl_named_node_t stats_chunks_node[] = { - {NAME("current"), CTL(stats_chunks_current)}, - {NAME("total"), CTL(stats_chunks_total)}, - {NAME("high"), CTL(stats_chunks_high)} -}; - static const ctl_named_node_t stats_arenas_i_metadata_node[] = { {NAME("mapped"), CTL(stats_arenas_i_metadata_mapped)}, {NAME("allocated"), CTL(stats_arenas_i_metadata_allocated)} @@ -473,7 +464,6 @@ static const ctl_named_node_t stats_node[] = { {NAME("active"), CTL(stats_active)}, {NAME("metadata"), CTL(stats_metadata)}, {NAME("mapped"), CTL(stats_mapped)}, - {NAME("chunks"), CHILD(named, stats_chunks)}, {NAME("arenas"), CHILD(indexed, stats_arenas)} }; @@ -688,14 +678,6 @@ ctl_refresh(void) unsigned i; VARIABLE_ARRAY(arena_t *, tarenas, ctl_stats.narenas); - if (config_stats) { - malloc_mutex_lock(&chunks_mtx); - ctl_stats.chunks.current = stats_chunks.curchunks; - ctl_stats.chunks.total = stats_chunks.nchunks; - ctl_stats.chunks.high = stats_chunks.highchunks; - malloc_mutex_unlock(&chunks_mtx); - } - /* * Clear sum stats, since they will be merged into by * ctl_arena_refresh(). @@ -733,7 +715,8 @@ ctl_refresh(void) + ctl_stats.arenas[ctl_stats.narenas].astats.metadata_mapped + ctl_stats.arenas[ctl_stats.narenas].astats .metadata_allocated; - ctl_stats.mapped = (ctl_stats.chunks.current << opt_lg_chunk); + ctl_stats.mapped = + ctl_stats.arenas[ctl_stats.narenas].astats.mapped; } ctl_epoch++; @@ -1950,11 +1933,6 @@ CTL_RO_CGEN(config_stats, stats_active, ctl_stats.active, size_t) CTL_RO_CGEN(config_stats, stats_metadata, ctl_stats.metadata, size_t) CTL_RO_CGEN(config_stats, stats_mapped, ctl_stats.mapped, size_t) -CTL_RO_CGEN(config_stats, stats_chunks_current, ctl_stats.chunks.current, - size_t) -CTL_RO_CGEN(config_stats, stats_chunks_total, ctl_stats.chunks.total, uint64_t) -CTL_RO_CGEN(config_stats, stats_chunks_high, ctl_stats.chunks.high, size_t) - CTL_RO_GEN(stats_arenas_i_dss, ctl_stats.arenas[mib[2]].dss, const char *) CTL_RO_GEN(stats_arenas_i_nthreads, ctl_stats.arenas[mib[2]].nthreads, unsigned) CTL_RO_GEN(stats_arenas_i_pactive, ctl_stats.arenas[mib[2]].pactive, size_t) diff --git a/src/huge.c b/src/huge.c index db0ecd51..00327277 100644 --- a/src/huge.c +++ b/src/huge.c @@ -2,15 +2,33 @@ #include "jemalloc/internal/jemalloc_internal.h" /******************************************************************************/ -/* Data. */ -/* Protects chunk-related data structures. */ -static malloc_mutex_t huge_mtx; +static extent_node_t * +huge_node_get(const void *ptr) +{ + extent_node_t *node; -/******************************************************************************/ + node = chunk_lookup(ptr); + assert(node->size != 0); -/* Tree of chunks that are stand-alone huge allocations. */ -static extent_tree_t huge; + return (node); +} + +static bool +huge_node_set(const void *ptr, extent_node_t *node) +{ + + assert(node->addr == ptr); + assert(node->size != 0); + return (chunk_register(ptr, node)); +} + +static void +huge_node_unset(const void *ptr, const extent_node_t *node) +{ + + chunk_deregister(ptr, node); +} void * huge_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, @@ -55,15 +73,22 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, return (NULL); } - /* Insert node into huge. */ node->addr = ret; node->size = usize; node->zeroed = is_zeroed; node->arena = arena; - malloc_mutex_lock(&huge_mtx); - extent_tree_ad_insert(&huge, node); - malloc_mutex_unlock(&huge_mtx); + if (huge_node_set(ret, node)) { + arena_chunk_dalloc_huge(arena, ret, usize); + idalloctm(tsd, node, tcache, true); + return (NULL); + } + + /* Insert node into huge. */ + malloc_mutex_lock(&arena->huge_mtx); + ql_elm_new(node, link_ql); + ql_tail_insert(&arena->huge, node, link_ql); + malloc_mutex_unlock(&arena->huge_mtx); if (zero || (config_fill && unlikely(opt_zero))) { if (!is_zeroed) @@ -74,32 +99,6 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, return (ret); } -static extent_node_t * -huge_node_locked(const void *ptr) -{ - extent_node_t *node, key; - - /* Extract from tree of huge allocations. */ - key.addr = __DECONST(void *, ptr); - node = extent_tree_ad_search(&huge, &key); - assert(node != NULL); - assert(node->addr == ptr); - - return (node); -} - -static extent_node_t * -huge_node(const void *ptr) -{ - extent_node_t *node; - - malloc_mutex_lock(&huge_mtx); - node = huge_node_locked(ptr); - malloc_mutex_unlock(&huge_mtx); - - return (node); -} - #ifdef JEMALLOC_JET #undef huge_dalloc_junk #define huge_dalloc_junk JEMALLOC_N(huge_dalloc_junk_impl) @@ -152,15 +151,15 @@ huge_ralloc_no_move_similar(void *ptr, size_t oldsize, size_t usize, } else zeroed = true; - malloc_mutex_lock(&huge_mtx); - node = huge_node_locked(ptr); + node = huge_node_get(ptr); arena = node->arena; + malloc_mutex_lock(&arena->huge_mtx); /* Update the size of the huge allocation. */ assert(node->size != usize); node->size = usize; /* Clear node->zeroed if zeroing failed above. */ node->zeroed = (node->zeroed && zeroed); - malloc_mutex_unlock(&huge_mtx); + malloc_mutex_unlock(&arena->huge_mtx); arena_chunk_ralloc_huge_similar(arena, ptr, oldsize, usize); @@ -195,14 +194,14 @@ huge_ralloc_no_move_shrink(void *ptr, size_t oldsize, size_t usize) zeroed = false; } - malloc_mutex_lock(&huge_mtx); - node = huge_node_locked(ptr); + node = huge_node_get(ptr); arena = node->arena; + malloc_mutex_lock(&arena->huge_mtx); /* Update the size of the huge allocation. */ node->size = usize; /* Clear node->zeroed if zeroing failed above. */ node->zeroed = (node->zeroed && zeroed); - malloc_mutex_unlock(&huge_mtx); + malloc_mutex_unlock(&arena->huge_mtx); /* Zap the excess chunks. */ arena_chunk_ralloc_huge_shrink(arena, ptr, oldsize, usize); @@ -221,11 +220,11 @@ huge_ralloc_no_move_expand(void *ptr, size_t oldsize, size_t size, bool zero) { return (true); } - malloc_mutex_lock(&huge_mtx); - node = huge_node_locked(ptr); + node = huge_node_get(ptr); arena = node->arena; + malloc_mutex_lock(&arena->huge_mtx); is_zeroed_subchunk = node->zeroed; - malloc_mutex_unlock(&huge_mtx); + malloc_mutex_unlock(&arena->huge_mtx); /* * Copy zero into is_zeroed_chunk and pass the copy to chunk_alloc(), so @@ -237,10 +236,10 @@ huge_ralloc_no_move_expand(void *ptr, size_t oldsize, size_t size, bool zero) { &is_zeroed_chunk)) return (true); - malloc_mutex_lock(&huge_mtx); + malloc_mutex_lock(&arena->huge_mtx); /* Update the size of the huge allocation. */ node->size = usize; - malloc_mutex_unlock(&huge_mtx); + malloc_mutex_unlock(&arena->huge_mtx); if (zero || (config_fill && unlikely(opt_zero))) { if (!is_zeroed_subchunk) { @@ -356,11 +355,14 @@ void huge_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache) { extent_node_t *node; + arena_t *arena; - malloc_mutex_lock(&huge_mtx); - node = huge_node_locked(ptr); - extent_tree_ad_remove(&huge, node); - malloc_mutex_unlock(&huge_mtx); + node = huge_node_get(ptr); + arena = node->arena; + huge_node_unset(ptr, node); + malloc_mutex_lock(&arena->huge_mtx); + ql_remove(&arena->huge, node, link_ql); + malloc_mutex_unlock(&arena->huge_mtx); huge_dalloc_junk(node->addr, node->size); arena_chunk_dalloc_huge(node->arena, node->addr, node->size); @@ -371,59 +373,50 @@ arena_t * huge_aalloc(const void *ptr) { - return (huge_node(ptr)->arena); + return (huge_node_get(ptr)->arena); } size_t huge_salloc(const void *ptr) { + size_t size; + extent_node_t *node; + arena_t *arena; - return (huge_node(ptr)->size); + node = huge_node_get(ptr); + arena = node->arena; + malloc_mutex_lock(&arena->huge_mtx); + size = node->size; + malloc_mutex_unlock(&arena->huge_mtx); + + return (size); } prof_tctx_t * huge_prof_tctx_get(const void *ptr) { + prof_tctx_t *tctx; + extent_node_t *node; + arena_t *arena; - return (huge_node(ptr)->prof_tctx); + node = huge_node_get(ptr); + arena = node->arena; + malloc_mutex_lock(&arena->huge_mtx); + tctx = node->prof_tctx; + malloc_mutex_unlock(&arena->huge_mtx); + + return (tctx); } void huge_prof_tctx_set(const void *ptr, prof_tctx_t *tctx) { + extent_node_t *node; + arena_t *arena; - huge_node(ptr)->prof_tctx = tctx; -} - -bool -huge_boot(void) -{ - - /* Initialize chunks data. */ - if (malloc_mutex_init(&huge_mtx)) - return (true); - extent_tree_ad_new(&huge); - - return (false); -} - -void -huge_prefork(void) -{ - - malloc_mutex_prefork(&huge_mtx); -} - -void -huge_postfork_parent(void) -{ - - malloc_mutex_postfork_parent(&huge_mtx); -} - -void -huge_postfork_child(void) -{ - - malloc_mutex_postfork_child(&huge_mtx); + node = huge_node_get(ptr); + arena = node->arena; + malloc_mutex_lock(&arena->huge_mtx); + node->prof_tctx = tctx; + malloc_mutex_unlock(&arena->huge_mtx); } diff --git a/src/jemalloc.c b/src/jemalloc.c index 94477914..3903209b 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1195,8 +1195,6 @@ malloc_init_hard_a0_locked(void) return (true); if (config_tcache && tcache_boot()) malloc_mutex_unlock(&init_lock); - if (huge_boot()) - return (true); if (malloc_mutex_init(&arenas_lock)) return (true); /* @@ -2310,12 +2308,10 @@ je_sallocx(const void *ptr, int flags) assert(malloc_initialized() || IS_INITIALIZER); malloc_thread_init(); - if (config_ivsalloc) + if (config_debug) usize = ivsalloc(ptr, config_prof); - else { - assert(ptr != NULL); + else usize = isalloc(ptr, config_prof); - } return (usize); } @@ -2440,10 +2436,10 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) assert(malloc_initialized() || IS_INITIALIZER); malloc_thread_init(); - if (config_ivsalloc) + if (config_debug) ret = ivsalloc(ptr, config_prof); else - ret = (ptr != NULL) ? isalloc(ptr, config_prof) : 0; + ret = (ptr == NULL) ? 0 : isalloc(ptr, config_prof); return (ret); } @@ -2504,7 +2500,6 @@ _malloc_prefork(void) } chunk_prefork(); base_prefork(); - huge_prefork(); } #ifndef JEMALLOC_MUTEX_INIT_CB @@ -2524,7 +2519,6 @@ _malloc_postfork(void) assert(malloc_initialized()); /* Release all mutexes, now that fork() has completed. */ - huge_postfork_parent(); base_postfork_parent(); chunk_postfork_parent(); for (i = 0; i < narenas_total; i++) { @@ -2544,7 +2538,6 @@ jemalloc_postfork_child(void) assert(malloc_initialized()); /* Release all mutexes, now that fork() has completed. */ - huge_postfork_child(); base_postfork_child(); chunk_postfork_child(); for (i = 0; i < narenas_total; i++) { diff --git a/src/stats.c b/src/stats.c index 865f7757..e0f71651 100644 --- a/src/stats.c +++ b/src/stats.c @@ -547,8 +547,6 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, if (config_stats) { size_t *cactive; size_t allocated, active, metadata, mapped; - size_t chunks_current, chunks_high; - uint64_t chunks_total; CTL_GET("stats.cactive", &cactive, size_t *); CTL_GET("stats.allocated", &allocated, size_t); @@ -561,16 +559,6 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, malloc_cprintf(write_cb, cbopaque, "Current active ceiling: %zu\n", atomic_read_z(cactive)); - /* Print chunk stats. */ - CTL_GET("stats.chunks.total", &chunks_total, uint64_t); - CTL_GET("stats.chunks.high", &chunks_high, size_t); - CTL_GET("stats.chunks.current", &chunks_current, size_t); - malloc_cprintf(write_cb, cbopaque, "chunks: nchunks " - "highchunks curchunks\n"); - malloc_cprintf(write_cb, cbopaque, - " %13"PRIu64" %12zu %12zu\n", - chunks_total, chunks_high, chunks_current); - if (merged) { unsigned narenas; diff --git a/src/tcache.c b/src/tcache.c index 1166d60f..10c85dd3 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -102,7 +102,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_bin_t *tbin, index_t binind, /* Lock the arena bin associated with the first object. */ arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE( tbin->avail[0]); - arena_t *bin_arena = chunk->arena; + arena_t *bin_arena = chunk->node.arena; arena_bin_t *bin = &bin_arena->bins[binind]; if (config_prof && bin_arena == arena) { @@ -124,7 +124,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_bin_t *tbin, index_t binind, ptr = tbin->avail[i]; assert(ptr != NULL); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (chunk->arena == bin_arena) { + if (chunk->node.arena == bin_arena) { size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; arena_chunk_map_bits_t *bitselm = @@ -182,7 +182,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, index_t binind, /* Lock the arena associated with the first object. */ arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE( tbin->avail[0]); - arena_t *locked_arena = chunk->arena; + arena_t *locked_arena = chunk->node.arena; UNUSED bool idump; if (config_prof) @@ -208,7 +208,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, index_t binind, ptr = tbin->avail[i]; assert(ptr != NULL); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (chunk->arena == locked_arena) { + if (chunk->node.arena == locked_arena) { arena_dalloc_large_junked_locked(locked_arena, chunk, ptr); } else { diff --git a/test/unit/stats.c b/test/unit/stats.c index 946e7370..10999670 100644 --- a/test/unit/stats.c +++ b/test/unit/stats.c @@ -29,32 +29,6 @@ TEST_BEGIN(test_stats_summary) } TEST_END -TEST_BEGIN(test_stats_chunks) -{ - size_t current, high; - uint64_t total; - size_t sz; - int expected = config_stats ? 0 : ENOENT; - - sz = sizeof(size_t); - assert_d_eq(mallctl("stats.chunks.current", ¤t, &sz, NULL, 0), - expected, "Unexpected mallctl() result"); - sz = sizeof(uint64_t); - assert_d_eq(mallctl("stats.chunks.total", &total, &sz, NULL, 0), - expected, "Unexpected mallctl() result"); - sz = sizeof(size_t); - assert_d_eq(mallctl("stats.chunks.high", &high, &sz, NULL, 0), expected, - "Unexpected mallctl() result"); - - if (config_stats) { - assert_zu_le(current, high, - "current should be no larger than high"); - assert_u64_le((uint64_t)high, total, - "high should be no larger than total"); - } -} -TEST_END - TEST_BEGIN(test_stats_huge) { void *p; @@ -458,7 +432,6 @@ main(void) return (test( test_stats_summary, - test_stats_chunks, test_stats_huge, test_stats_arenas_summary, test_stats_arenas_small, From 1eaf3b6f345e0b5835549f19e844c81314c90435 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Thu, 12 Feb 2015 15:46:30 -0500 Subject: [PATCH 0640/3378] add missing check for new_addr chunk size 8ddc93293cd8370870f221225ef1e013fbff6d65 switched this to over using the address tree in order to avoid false negatives, so it now needs to check that the size of the free extent is large enough to satisfy the request. --- src/chunk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/chunk.c b/src/chunk.c index 6f705ded..b3576195 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -82,7 +82,7 @@ chunk_recycle(arena_t *arena, extent_tree_t *chunks_szad, malloc_mutex_lock(&arena->chunks_mtx); node = (new_addr != NULL) ? extent_tree_ad_search(chunks_ad, &key) : extent_tree_szad_nsearch(chunks_szad, &key); - if (node == NULL) { + if (node == NULL || (new_addr != NULL && node->size < size)) { malloc_mutex_unlock(&arena->chunks_mtx); return (NULL); } From 88fef7ceda6269598cef0cee8b984c8765673c27 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 12 Feb 2015 14:06:37 -0800 Subject: [PATCH 0641/3378] Refactor huge_*() calls into arena internals. Make redirects to the huge_*() API the arena code's responsibility, since arenas now take responsibility for all allocation sizes. --- include/jemalloc/internal/arena.h | 222 ++++++++++-------- .../jemalloc/internal/jemalloc_internal.h.in | 64 +---- include/jemalloc/internal/prof.h | 17 +- src/arena.c | 162 ++++++++----- 4 files changed, 238 insertions(+), 227 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 2ae4609e..77a7dcb6 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -391,7 +391,8 @@ void arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info); void arena_quarantine_junk_small(void *ptr, size_t usize); void *arena_malloc_small(arena_t *arena, size_t size, bool zero); void *arena_malloc_large(arena_t *arena, size_t size, bool zero); -void *arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero); +void *arena_palloc(tsd_t *tsd, arena_t *arena, size_t usize, + size_t alignment, bool zero, tcache_t *tcache); void arena_prof_promoted(const void *ptr, size_t size); void arena_dalloc_bin_junked_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr, arena_chunk_map_bits_t *bitselm); @@ -481,8 +482,7 @@ void *arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, arena_t *arena_aalloc(const void *ptr); size_t arena_salloc(const void *ptr, bool demote); void arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache); -void arena_sdalloc(tsd_t *tsd, arena_chunk_t *chunk, void *ptr, size_t size, - tcache_t *tcache); +void arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) @@ -931,20 +931,22 @@ arena_prof_tctx_get(const void *ptr) { prof_tctx_t *ret; arena_chunk_t *chunk; - size_t pageind, mapbits; cassert(config_prof); assert(ptr != NULL); assert(CHUNK_ADDR2BASE(ptr) != ptr); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - mapbits = arena_mapbits_get(chunk, pageind); - assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); - if (likely((mapbits & CHUNK_MAP_LARGE) == 0)) - ret = (prof_tctx_t *)(uintptr_t)1U; - else - ret = arena_miscelm_get(chunk, pageind)->prof_tctx; + if (likely(chunk != ptr)) { + size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; + size_t mapbits = arena_mapbits_get(chunk, pageind); + assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); + if (likely((mapbits & CHUNK_MAP_LARGE) == 0)) + ret = (prof_tctx_t *)(uintptr_t)1U; + else + ret = arena_miscelm_get(chunk, pageind)->prof_tctx; + } else + ret = huge_prof_tctx_get(ptr); return (ret); } @@ -953,18 +955,20 @@ JEMALLOC_INLINE void arena_prof_tctx_set(const void *ptr, prof_tctx_t *tctx) { arena_chunk_t *chunk; - size_t pageind; cassert(config_prof); assert(ptr != NULL); assert(CHUNK_ADDR2BASE(ptr) != ptr); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - assert(arena_mapbits_allocated_get(chunk, pageind) != 0); + if (likely(chunk != ptr)) { + size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; + assert(arena_mapbits_allocated_get(chunk, pageind) != 0); - if (unlikely(arena_mapbits_large_get(chunk, pageind) != 0)) - arena_miscelm_get(chunk, pageind)->prof_tctx = tctx; + if (unlikely(arena_mapbits_large_get(chunk, pageind) != 0)) + arena_miscelm_get(chunk, pageind)->prof_tctx = tctx; + } else + huge_prof_tctx_set(ptr, tctx); } JEMALLOC_ALWAYS_INLINE void * @@ -984,7 +988,7 @@ arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, return (NULL); return (arena_malloc_small(arena, size, zero)); } - } else { + } else if (likely(size <= arena_maxclass)) { /* * Initialize tcache after checking size in order to avoid * infinite recursion during tcache initialization. @@ -997,7 +1001,8 @@ arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, return (NULL); return (arena_malloc_large(arena, size, zero)); } - } + } else + return (huge_malloc(tsd, arena, size, zero, tcache)); } JEMALLOC_ALWAYS_INLINE arena_t * @@ -1006,7 +1011,10 @@ arena_aalloc(const void *ptr) arena_chunk_t *chunk; chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - return (chunk->node.arena); + if (likely(chunk != ptr)) + return (chunk->node.arena); + else + return (huge_aalloc(ptr)); } /* Return the size of the allocation pointed to by ptr. */ @@ -1022,29 +1030,37 @@ arena_salloc(const void *ptr, bool demote) assert(CHUNK_ADDR2BASE(ptr) != ptr); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - assert(arena_mapbits_allocated_get(chunk, pageind) != 0); - binind = arena_mapbits_binind_get(chunk, pageind); - if (unlikely(binind == BININD_INVALID || (config_prof && !demote && - arena_mapbits_large_get(chunk, pageind) != 0))) { - /* - * Large allocation. In the common case (demote), and as this - * is an inline function, most callers will only end up looking - * at binind to determine that ptr is a small allocation. - */ - assert(((uintptr_t)ptr & PAGE_MASK) == 0); - ret = arena_mapbits_large_size_get(chunk, pageind); - assert(ret != 0); - assert(pageind + (ret>>LG_PAGE) <= chunk_npages); - assert(arena_mapbits_dirty_get(chunk, pageind) == - arena_mapbits_dirty_get(chunk, pageind+(ret>>LG_PAGE)-1)); - } else { - /* Small allocation (possibly promoted to a large object). */ - assert(arena_mapbits_large_get(chunk, pageind) != 0 || - arena_ptr_small_binind_get(ptr, arena_mapbits_get(chunk, - pageind)) == binind); - ret = index2size(binind); - } + if (likely(chunk != ptr)) { + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; + assert(arena_mapbits_allocated_get(chunk, pageind) != 0); + binind = arena_mapbits_binind_get(chunk, pageind); + if (unlikely(binind == BININD_INVALID || (config_prof && !demote + && arena_mapbits_large_get(chunk, pageind) != 0))) { + /* + * Large allocation. In the common case (demote), and + * as this is an inline function, most callers will only + * end up looking at binind to determine that ptr is a + * small allocation. + */ + assert(((uintptr_t)ptr & PAGE_MASK) == 0); + ret = arena_mapbits_large_size_get(chunk, pageind); + assert(ret != 0); + assert(pageind + (ret>>LG_PAGE) <= chunk_npages); + assert(arena_mapbits_dirty_get(chunk, pageind) == + arena_mapbits_dirty_get(chunk, + pageind+(ret>>LG_PAGE)-1)); + } else { + /* + * Small allocation (possibly promoted to a large + * object). + */ + assert(arena_mapbits_large_get(chunk, pageind) != 0 || + arena_ptr_small_binind_get(ptr, + arena_mapbits_get(chunk, pageind)) == binind); + ret = index2size(binind); + } + } else + ret = huge_salloc(ptr); return (ret); } @@ -1058,75 +1074,83 @@ arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache) assert(ptr != NULL); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (unlikely(chunk == ptr)) { - huge_dalloc(tsd, ptr, tcache); - return; - } - assert(CHUNK_ADDR2BASE(ptr) != ptr); - - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - mapbits = arena_mapbits_get(chunk, pageind); - assert(arena_mapbits_allocated_get(chunk, pageind) != 0); - if (likely((mapbits & CHUNK_MAP_LARGE) == 0)) { - /* Small allocation. */ - if (likely(tcache != NULL)) { - index_t binind = arena_ptr_small_binind_get(ptr, - mapbits); - tcache_dalloc_small(tsd, tcache, ptr, binind); + if (likely(chunk != ptr)) { + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; + mapbits = arena_mapbits_get(chunk, pageind); + assert(arena_mapbits_allocated_get(chunk, pageind) != 0); + if (likely((mapbits & CHUNK_MAP_LARGE) == 0)) { + /* Small allocation. */ + if (likely(tcache != NULL)) { + index_t binind = arena_ptr_small_binind_get(ptr, + mapbits); + tcache_dalloc_small(tsd, tcache, ptr, binind); + } else { + arena_dalloc_small(chunk->node.arena, chunk, + ptr, pageind); + } } else { - arena_dalloc_small(chunk->node.arena, chunk, ptr, + size_t size = arena_mapbits_large_size_get(chunk, pageind); + + assert(((uintptr_t)ptr & PAGE_MASK) == 0); + + if (likely(tcache != NULL) && size <= tcache_maxclass) + tcache_dalloc_large(tsd, tcache, ptr, size); + else { + arena_dalloc_large(chunk->node.arena, chunk, + ptr); + } } - } else { - size_t size = arena_mapbits_large_size_get(chunk, pageind); - - assert(((uintptr_t)ptr & PAGE_MASK) == 0); - - if (likely(tcache != NULL) && size <= tcache_maxclass) - tcache_dalloc_large(tsd, tcache, ptr, size); - else - arena_dalloc_large(chunk->node.arena, chunk, ptr); - } + } else + huge_dalloc(tsd, ptr, tcache); } JEMALLOC_ALWAYS_INLINE void -arena_sdalloc(tsd_t *tsd, arena_chunk_t *chunk, void *ptr, size_t size, - tcache_t *tcache) +arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache) { + arena_chunk_t *chunk; - assert(ptr != NULL); - assert(CHUNK_ADDR2BASE(ptr) != ptr); - - if (config_prof && opt_prof) { - size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - assert(arena_mapbits_allocated_get(chunk, pageind) != 0); - if (arena_mapbits_large_get(chunk, pageind) != 0) { - /* Make sure to use promoted size, not request size. */ - assert(((uintptr_t)ptr & PAGE_MASK) == 0); - size = arena_mapbits_large_size_get(chunk, pageind); - } - } - assert(s2u(size) == s2u(arena_salloc(ptr, false))); - - if (likely(size <= SMALL_MAXCLASS)) { - /* Small allocation. */ - if (likely(tcache != NULL)) { - index_t binind = size2index(size); - tcache_dalloc_small(tsd, tcache, ptr, binind); - } else { + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + if (likely(chunk != ptr)) { + if (config_prof && opt_prof) { size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - arena_dalloc_small(chunk->node.arena, chunk, ptr, - pageind); + assert(arena_mapbits_allocated_get(chunk, pageind) != 0); + if (arena_mapbits_large_get(chunk, pageind) != 0) { + /* + * Make sure to use promoted size, not request + * size. + */ + assert(((uintptr_t)ptr & PAGE_MASK) == 0); + size = arena_mapbits_large_size_get(chunk, + pageind); + } } - } else { - assert(((uintptr_t)ptr & PAGE_MASK) == 0); + assert(s2u(size) == s2u(arena_salloc(ptr, false))); - if (likely(tcache != NULL) && size <= tcache_maxclass) - tcache_dalloc_large(tsd, tcache, ptr, size); - else - arena_dalloc_large(chunk->node.arena, chunk, ptr); - } + if (likely(size <= SMALL_MAXCLASS)) { + /* Small allocation. */ + if (likely(tcache != NULL)) { + index_t binind = size2index(size); + tcache_dalloc_small(tsd, tcache, ptr, binind); + } else { + size_t pageind = ((uintptr_t)ptr - + (uintptr_t)chunk) >> LG_PAGE; + arena_dalloc_small(chunk->node.arena, chunk, + ptr, pageind); + } + } else { + assert(((uintptr_t)ptr & PAGE_MASK) == 0); + + if (likely(tcache != NULL) && size <= tcache_maxclass) + tcache_dalloc_large(tsd, tcache, ptr, size); + else { + arena_dalloc_large(chunk->node.arena, chunk, + ptr); + } + } + } else + huge_dalloc(tsd, ptr, tcache); } # endif /* JEMALLOC_ARENA_INLINE_B */ #endif diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index ab93aa52..43276c6c 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -823,18 +823,10 @@ bool ixalloc(void *ptr, size_t oldsize, size_t size, size_t extra, JEMALLOC_ALWAYS_INLINE arena_t * iaalloc(const void *ptr) { - arena_t *arena; - arena_chunk_t *chunk; assert(ptr != NULL); - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (likely(chunk != ptr)) - arena = arena_aalloc(ptr); - else - arena = huge_aalloc(ptr); - - return (arena); + return (arena_aalloc(ptr)); } /* @@ -845,20 +837,12 @@ iaalloc(const void *ptr) JEMALLOC_ALWAYS_INLINE size_t isalloc(const void *ptr, bool demote) { - size_t ret; - arena_chunk_t *chunk; assert(ptr != NULL); /* Demotion only makes sense if config_prof is true. */ assert(config_prof || !demote); - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (likely(chunk != ptr)) - ret = arena_salloc(ptr, demote); - else - ret = huge_salloc(ptr); - - return (ret); + return (arena_salloc(ptr, demote)); } JEMALLOC_ALWAYS_INLINE void * @@ -869,10 +853,7 @@ iallocztm(tsd_t *tsd, size_t size, bool zero, tcache_t *tcache, bool is_metadata assert(size != 0); - if (likely(size <= arena_maxclass)) - ret = arena_malloc(tsd, arena, size, zero, tcache); - else - ret = huge_malloc(tsd, arena, size, zero, tcache); + ret = arena_malloc(tsd, arena, size, zero, tcache); if (config_stats && is_metadata && likely(ret != NULL)) { arena_metadata_allocated_add(iaalloc(ret), isalloc(ret, config_prof)); @@ -917,21 +898,7 @@ ipallocztm(tsd_t *tsd, size_t usize, size_t alignment, bool zero, assert(usize != 0); assert(usize == sa2u(usize, alignment)); - if (usize <= SMALL_MAXCLASS && alignment < PAGE) - ret = arena_malloc(tsd, arena, usize, zero, tcache); - else { - if (likely(usize <= arena_maxclass)) { - arena = arena_choose(tsd, arena); - if (unlikely(arena == NULL)) - return (NULL); - ret = arena_palloc(arena, usize, alignment, zero); - } else if (likely(alignment <= chunksize)) - ret = huge_malloc(tsd, arena, usize, zero, tcache); - else { - ret = huge_palloc(tsd, arena, usize, alignment, zero, - tcache); - } - } + ret = arena_palloc(tsd, arena, usize, alignment, zero, tcache); assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret); if (config_stats && is_metadata && likely(ret != NULL)) { arena_metadata_allocated_add(iaalloc(ret), isalloc(ret, @@ -1033,15 +1000,8 @@ iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache) JEMALLOC_ALWAYS_INLINE void isdalloct(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache) { - arena_chunk_t *chunk; - assert(ptr != NULL); - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (likely(chunk != ptr)) - arena_sdalloc(tsd, chunk, ptr, size, tcache); - else - huge_dalloc(tsd, ptr, tcache); + arena_sdalloc(tsd, ptr, size, tcache); } JEMALLOC_ALWAYS_INLINE void @@ -1104,13 +1064,8 @@ iralloct(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment, zero, tcache, arena)); } - if (likely(size <= arena_maxclass)) { - return (arena_ralloc(tsd, arena, ptr, oldsize, size, 0, - alignment, zero, tcache)); - } else { - return (huge_ralloc(tsd, arena, ptr, oldsize, size, 0, - alignment, zero, tcache)); - } + return (arena_ralloc(tsd, arena, ptr, oldsize, size, 0, alignment, zero, + tcache)); } JEMALLOC_ALWAYS_INLINE void * @@ -1136,10 +1091,7 @@ ixalloc(void *ptr, size_t oldsize, size_t size, size_t extra, size_t alignment, return (true); } - if (likely(size <= arena_maxclass)) - return (arena_ralloc_no_move(ptr, oldsize, size, extra, zero)); - else - return (huge_ralloc_no_move(ptr, oldsize, size, extra, zero)); + return (arena_ralloc_no_move(ptr, oldsize, size, extra, zero)); } #endif diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index b2db6859..f5082438 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -372,34 +372,21 @@ prof_tdata_get(tsd_t *tsd, bool create) JEMALLOC_ALWAYS_INLINE prof_tctx_t * prof_tctx_get(const void *ptr) { - prof_tctx_t *ret; - arena_chunk_t *chunk; cassert(config_prof); assert(ptr != NULL); - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (likely(chunk != ptr)) - ret = arena_prof_tctx_get(ptr); - else - ret = huge_prof_tctx_get(ptr); - - return (ret); + return (arena_prof_tctx_get(ptr)); } JEMALLOC_ALWAYS_INLINE void prof_tctx_set(const void *ptr, prof_tctx_t *tctx) { - arena_chunk_t *chunk; cassert(config_prof); assert(ptr != NULL); - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (likely(chunk != ptr)) - arena_prof_tctx_set(ptr, tctx); - else - huge_prof_tctx_set(ptr, tctx); + arena_prof_tctx_set(ptr, tctx); } JEMALLOC_ALWAYS_INLINE bool diff --git a/src/arena.c b/src/arena.c index 2bd1a2c0..7b441bed 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1714,8 +1714,9 @@ arena_malloc_large(arena_t *arena, size_t size, bool zero) } /* Only handles large allocations that require more than page alignment. */ -void * -arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero) +static void * +arena_palloc_large(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment, + bool zero) { void *ret; size_t alloc_size, leadsize, trailsize; @@ -1726,6 +1727,10 @@ arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero) assert((size & PAGE_MASK) == 0); + arena = arena_choose(tsd, arena); + if (unlikely(arena == NULL)) + return (NULL); + alignment = PAGE_CEILING(alignment); alloc_size = size + alignment - PAGE; @@ -1783,6 +1788,28 @@ arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero) return (ret); } +void * +arena_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, + bool zero, tcache_t *tcache) +{ + void *ret; + + if (usize <= SMALL_MAXCLASS && alignment < PAGE) + ret = arena_malloc(tsd, arena, usize, zero, tcache); + else { + if (likely(usize <= arena_maxclass)) { + ret = arena_palloc_large(tsd, arena, usize, alignment, + zero); + } else if (likely(alignment <= chunksize)) + ret = huge_malloc(tsd, arena, usize, zero, tcache); + else { + ret = huge_palloc(tsd, arena, usize, alignment, zero, + tcache); + } + } + return (ret); +} + void arena_prof_promoted(const void *ptr, size_t size) { @@ -2189,29 +2216,35 @@ arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, bool zero) { - /* - * Avoid moving the allocation if the size class can be left the same. - */ - if (likely(oldsize <= arena_maxclass)) { - if (oldsize <= SMALL_MAXCLASS) { - assert(arena_bin_info[size2index(oldsize)].reg_size - == oldsize); - if ((size + extra <= SMALL_MAXCLASS && size2index(size + - extra) == size2index(oldsize)) || (size <= oldsize - && size + extra >= oldsize)) - return (false); - } else { - assert(size <= arena_maxclass); - if (size + extra > SMALL_MAXCLASS) { - if (!arena_ralloc_large(ptr, oldsize, size, - extra, zero)) + if (likely(size <= arena_maxclass)) { + /* + * Avoid moving the allocation if the size class can be left the + * same. + */ + if (likely(oldsize <= arena_maxclass)) { + if (oldsize <= SMALL_MAXCLASS) { + assert( + arena_bin_info[size2index(oldsize)].reg_size + == oldsize); + if ((size + extra <= SMALL_MAXCLASS && + size2index(size + extra) == + size2index(oldsize)) || (size <= oldsize && + size + extra >= oldsize)) return (false); + } else { + assert(size <= arena_maxclass); + if (size + extra > SMALL_MAXCLASS) { + if (!arena_ralloc_large(ptr, oldsize, + size, extra, zero)) + return (false); + } } } - } - /* Reallocation would require a move. */ - return (true); + /* Reallocation would require a move. */ + return (true); + } else + return (huge_ralloc_no_move(ptr, oldsize, size, extra, zero)); } void * @@ -2219,52 +2252,67 @@ arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size, size_t extra, size_t alignment, bool zero, tcache_t *tcache) { void *ret; - size_t copysize; - /* Try to avoid moving the allocation. */ - if (!arena_ralloc_no_move(ptr, oldsize, size, extra, zero)) - return (ptr); + if (likely(size <= arena_maxclass)) { + size_t copysize; - /* - * size and oldsize are different enough that we need to move the - * object. In that case, fall back to allocating new space and - * copying. - */ - if (alignment != 0) { - size_t usize = sa2u(size + extra, alignment); - if (usize == 0) - return (NULL); - ret = ipalloct(tsd, usize, alignment, zero, tcache, arena); - } else - ret = arena_malloc(tsd, arena, size + extra, zero, tcache); + /* Try to avoid moving the allocation. */ + if (!arena_ralloc_no_move(ptr, oldsize, size, extra, zero)) + return (ptr); - if (ret == NULL) { - if (extra == 0) - return (NULL); - /* Try again, this time without extra. */ + /* + * size and oldsize are different enough that we need to move + * the object. In that case, fall back to allocating new space + * and copying. + */ if (alignment != 0) { - size_t usize = sa2u(size, alignment); + size_t usize = sa2u(size + extra, alignment); if (usize == 0) return (NULL); ret = ipalloct(tsd, usize, alignment, zero, tcache, arena); - } else - ret = arena_malloc(tsd, arena, size, zero, tcache); + } else { + ret = arena_malloc(tsd, arena, size + extra, zero, + tcache); + } - if (ret == NULL) - return (NULL); + if (ret == NULL) { + if (extra == 0) + return (NULL); + /* Try again, this time without extra. */ + if (alignment != 0) { + size_t usize = sa2u(size, alignment); + if (usize == 0) + return (NULL); + ret = ipalloct(tsd, usize, alignment, zero, + tcache, arena); + } else { + ret = arena_malloc(tsd, arena, size, zero, + tcache); + } + + if (ret == NULL) + return (NULL); + } + + /* + * Junk/zero-filling were already done by + * ipalloc()/arena_malloc(). + */ + + /* + * Copy at most size bytes (not size+extra), since the caller + * has no expectation that the extra bytes will be reliably + * preserved. + */ + copysize = (size < oldsize) ? size : oldsize; + JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, copysize); + memcpy(ret, ptr, copysize); + isqalloc(tsd, ptr, oldsize, tcache); + } else { + ret = huge_ralloc(tsd, arena, ptr, oldsize, size, extra, + alignment, zero, tcache); } - - /* Junk/zero-filling were already done by ipalloc()/arena_malloc(). */ - - /* - * Copy at most size bytes (not size+extra), since the caller has no - * expectation that the extra bytes will be reliably preserved. - */ - copysize = (size < oldsize) ? size : oldsize; - JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, copysize); - memcpy(ret, ptr, copysize); - isqalloc(tsd, ptr, oldsize, tcache); return (ret); } From 5f7140b045136232b1bbe66fcf2a7f63d08682a1 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 12 Feb 2015 15:54:53 -0800 Subject: [PATCH 0642/3378] Make prof_tctx accesses atomic. Although exceedingly unlikely, it appears that writes to the prof_tctx field of arena_chunk_map_misc_t could be reordered such that a stale value could be read during deallocation, with profiler metadata corruption and invalid pointer dereferences being the most likely effects. --- include/jemalloc/internal/arena.h | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 77a7dcb6..4d88736d 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -943,8 +943,11 @@ arena_prof_tctx_get(const void *ptr) assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); if (likely((mapbits & CHUNK_MAP_LARGE) == 0)) ret = (prof_tctx_t *)(uintptr_t)1U; - else - ret = arena_miscelm_get(chunk, pageind)->prof_tctx; + else { + arena_chunk_map_misc_t *elm = arena_miscelm_get(chunk, + pageind); + ret = atomic_read_p((void **)&elm->prof_tctx); + } } else ret = huge_prof_tctx_get(ptr); @@ -965,8 +968,11 @@ arena_prof_tctx_set(const void *ptr, prof_tctx_t *tctx) size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; assert(arena_mapbits_allocated_get(chunk, pageind) != 0); - if (unlikely(arena_mapbits_large_get(chunk, pageind) != 0)) - arena_miscelm_get(chunk, pageind)->prof_tctx = tctx; + if (unlikely(arena_mapbits_large_get(chunk, pageind) != 0)) { + arena_chunk_map_misc_t *elm = arena_miscelm_get(chunk, + pageind); + atomic_write_p((void **)&elm->prof_tctx, tctx); + } } else huge_prof_tctx_set(ptr, tctx); } From ab5e3790f6bc2dc0c4d7c3d537387cf2563456ff Mon Sep 17 00:00:00 2001 From: Dan McGregor Date: Tue, 23 Dec 2014 16:09:32 -0600 Subject: [PATCH 0643/3378] Build docs in object directory --- Makefile.in | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Makefile.in b/Makefile.in index da397c38..b1d88af9 100644 --- a/Makefile.in +++ b/Makefile.in @@ -104,8 +104,8 @@ endif PC := $(objroot)jemalloc.pc MAN3 := $(objroot)doc/jemalloc$(install_suffix).3 DOCS_XML := $(objroot)doc/jemalloc$(install_suffix).xml -DOCS_HTML := $(DOCS_XML:$(objroot)%.xml=$(srcroot)%.html) -DOCS_MAN3 := $(DOCS_XML:$(objroot)%.xml=$(srcroot)%.3) +DOCS_HTML := $(DOCS_XML:$(objroot)%.xml=$(objroot)%.html) +DOCS_MAN3 := $(DOCS_XML:$(objroot)%.xml=$(objroot)%.3) DOCS := $(DOCS_HTML) $(DOCS_MAN3) C_TESTLIB_SRCS := $(srcroot)test/src/btalloc.c $(srcroot)test/src/btalloc_0.c \ $(srcroot)test/src/btalloc_1.c $(srcroot)test/src/math.c \ @@ -181,10 +181,10 @@ all: build_lib dist: build_doc -$(srcroot)doc/%.html : $(objroot)doc/%.xml $(srcroot)doc/stylesheet.xsl $(objroot)doc/html.xsl +$(objroot)doc/%.html : $(objroot)doc/%.xml $(srcroot)doc/stylesheet.xsl $(objroot)doc/html.xsl $(XSLTPROC) -o $@ $(objroot)doc/html.xsl $< -$(srcroot)doc/%.3 : $(objroot)doc/%.xml $(srcroot)doc/stylesheet.xsl $(objroot)doc/manpages.xsl +$(objroot)doc/%.3 : $(objroot)doc/%.xml $(srcroot)doc/stylesheet.xsl $(objroot)doc/manpages.xsl $(XSLTPROC) -o $@ $(objroot)doc/manpages.xsl $< build_doc_html: $(DOCS_HTML) From f8880310ebb0ad5e1acce6e9886395e20041a32f Mon Sep 17 00:00:00 2001 From: Dan McGregor Date: Tue, 23 Dec 2014 16:10:08 -0600 Subject: [PATCH 0644/3378] Put VERSION file in object directory Also allow for the possibility that there exists a VERSION file in the srcroot, in case of building from a release tarball out of tree. --- Makefile.in | 2 +- configure.ac | 26 +++++++++++++++----------- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/Makefile.in b/Makefile.in index b1d88af9..a105bb12 100644 --- a/Makefile.in +++ b/Makefile.in @@ -418,7 +418,7 @@ distclean: clean relclean: distclean rm -f $(objroot)configure - rm -f $(srcroot)VERSION + rm -f $(objroot)VERSION rm -f $(DOCS_HTML) rm -f $(DOCS_MAN3) diff --git a/configure.ac b/configure.ac index 2922880a..240d27af 100644 --- a/configure.ac +++ b/configure.ac @@ -1046,32 +1046,36 @@ dnl jemalloc configuration. dnl dnl Set VERSION if source directory is inside a git repository. -if test "x`git rev-parse --is-inside-work-tree 2>/dev/null`" = "xtrue" ; then +if test "x`test ! \"${srcroot}\" && cd \"${srcroot}\"; git rev-parse --is-inside-work-tree 2>/dev/null`" = "xtrue" ; then dnl Pattern globs aren't powerful enough to match both single- and dnl double-digit version numbers, so iterate over patterns to support up to dnl version 99.99.99 without any accidental matches. - rm -f "${srcroot}VERSION" + rm -f "${objroot}VERSION" for pattern in ['[0-9].[0-9].[0-9]' '[0-9].[0-9].[0-9][0-9]' \ '[0-9].[0-9][0-9].[0-9]' '[0-9].[0-9][0-9].[0-9][0-9]' \ '[0-9][0-9].[0-9].[0-9]' '[0-9][0-9].[0-9].[0-9][0-9]' \ '[0-9][0-9].[0-9][0-9].[0-9]' \ '[0-9][0-9].[0-9][0-9].[0-9][0-9]']; do - if test ! -e "${srcroot}VERSION" ; then - git describe --long --abbrev=40 --match="${pattern}" > "${srcroot}VERSION.tmp" 2>/dev/null + if test ! -e "${objroot}VERSION" ; then + (test ! "${srcroot}" && cd "${srcroot}"; git describe --long --abbrev=40 --match="${pattern}") > "${objroot}VERSION.tmp" 2>/dev/null if test $? -eq 0 ; then - mv "${srcroot}VERSION.tmp" "${srcroot}VERSION" + mv "${objroot}VERSION.tmp" "${objroot}VERSION" break fi fi done fi -rm -f "${srcroot}VERSION.tmp" -if test ! -e "${srcroot}VERSION" ; then - AC_MSG_RESULT( - [Missing VERSION file, and unable to generate it; creating bogus VERSION]) - echo "0.0.0-0-g0000000000000000000000000000000000000000" > "${srcroot}VERSION" +rm -f "${objroot}VERSION.tmp" +if test ! -e "${objroot}VERSION" ; then + if test ! -e "${srcroot}VERSION" ; then + AC_MSG_RESULT( + [Missing VERSION file, and unable to generate it; creating bogus VERSION]) + echo "0.0.0-0-g0000000000000000000000000000000000000000" > "${objroot}VERSION" + else + cp ${srcroot}VERSION ${objroot}VERSION + fi fi -jemalloc_version=`cat "${srcroot}VERSION"` +jemalloc_version=`cat "${objroot}VERSION"` jemalloc_version_major=`echo ${jemalloc_version} | tr ".g-" " " | awk '{print [$]1}'` jemalloc_version_minor=`echo ${jemalloc_version} | tr ".g-" " " | awk '{print [$]2}'` jemalloc_version_bugfix=`echo ${jemalloc_version} | tr ".g-" " " | awk '{print [$]3}'` From feaaa3df0da9972b9c5016c55b886e54853cc855 Mon Sep 17 00:00:00 2001 From: Abhishek Kulkarni Date: Wed, 11 Feb 2015 14:38:10 -0500 Subject: [PATCH 0645/3378] Take into account the install suffix that jemalloc was built with in the pkg-config file. Signed-off-by: Abhishek Kulkarni --- jemalloc.pc.in | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/jemalloc.pc.in b/jemalloc.pc.in index af3f945d..1a3ad9b3 100644 --- a/jemalloc.pc.in +++ b/jemalloc.pc.in @@ -2,10 +2,11 @@ prefix=@prefix@ exec_prefix=@exec_prefix@ libdir=@libdir@ includedir=@includedir@ +install_suffix=@install_suffix@ Name: jemalloc Description: A general purpose malloc(3) implementation that emphasizes fragmentation avoidance and scalable concurrency support. URL: http://www.canonware.com/jemalloc Version: @jemalloc_version@ Cflags: -I${includedir} -Libs: -L${libdir} -ljemalloc +Libs: -L${libdir} -ljemalloc${install_suffix} From 41cfe03f39740fe61cf46d86982f66c24168de32 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 13 Feb 2015 15:28:56 -0800 Subject: [PATCH 0646/3378] If MALLOCX_ARENA(a) is specified, use it during tcache fill. --- include/jemalloc/internal/arena.h | 26 ++++++++++++-------------- include/jemalloc/internal/tcache.h | 28 +++++++++++++++------------- src/tcache.c | 19 ++++++++++--------- 3 files changed, 37 insertions(+), 36 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 4d88736d..b195daf0 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -985,28 +985,26 @@ arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, assert(size != 0); assert(size <= arena_maxclass); + arena = arena_choose(tsd, arena); + if (unlikely(arena == NULL)) + return (NULL); + if (likely(size <= SMALL_MAXCLASS)) { - if (likely(tcache != NULL)) - return (tcache_alloc_small(tsd, tcache, size, zero)); - else { - arena = arena_choose(tsd, arena); - if (unlikely(arena == NULL)) - return (NULL); + if (likely(tcache != NULL)) { + return (tcache_alloc_small(tsd, arena, tcache, size, + zero)); + } else return (arena_malloc_small(arena, size, zero)); - } } else if (likely(size <= arena_maxclass)) { /* * Initialize tcache after checking size in order to avoid * infinite recursion during tcache initialization. */ - if (likely(tcache != NULL) && size <= tcache_maxclass) - return (tcache_alloc_large(tsd, tcache, size, zero)); - else { - arena = arena_choose(tsd, arena); - if (unlikely(arena == NULL)) - return (NULL); + if (likely(tcache != NULL) && size <= tcache_maxclass) { + return (tcache_alloc_large(tsd, arena, tcache, size, + zero)); + } else return (arena_malloc_large(arena, size, zero)); - } } else return (huge_malloc(tsd, arena, size, zero, tcache)); } diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index 2a3952be..d2443b12 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -120,10 +120,10 @@ extern tcaches_t *tcaches; size_t tcache_salloc(const void *ptr); void tcache_event_hard(tsd_t *tsd, tcache_t *tcache); -void *tcache_alloc_small_hard(tsd_t *tsd, tcache_t *tcache, +void *tcache_alloc_small_hard(tsd_t *tsd, arena_t *arena, tcache_t *tcache, tcache_bin_t *tbin, index_t binind); -void tcache_bin_flush_small(tsd_t *tsd, tcache_bin_t *tbin, index_t binind, - unsigned rem, tcache_t *tcache); +void tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin, + index_t binind, unsigned rem); void tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, index_t binind, unsigned rem, tcache_t *tcache); void tcache_arena_associate(tcache_t *tcache, arena_t *arena); @@ -151,10 +151,10 @@ bool tcache_enabled_get(void); tcache_t *tcache_get(tsd_t *tsd, bool create); void tcache_enabled_set(bool enabled); void *tcache_alloc_easy(tcache_bin_t *tbin); -void *tcache_alloc_small(tsd_t *tsd, tcache_t *tcache, size_t size, - bool zero); -void *tcache_alloc_large(tsd_t *tsd, tcache_t *tcache, size_t size, - bool zero); +void *tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, + size_t size, bool zero); +void *tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, + size_t size, bool zero); void tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, index_t binind); void tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, @@ -258,7 +258,8 @@ tcache_alloc_easy(tcache_bin_t *tbin) } JEMALLOC_ALWAYS_INLINE void * -tcache_alloc_small(tsd_t *tsd, tcache_t *tcache, size_t size, bool zero) +tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, + bool zero) { void *ret; index_t binind; @@ -271,7 +272,7 @@ tcache_alloc_small(tsd_t *tsd, tcache_t *tcache, size_t size, bool zero) usize = index2size(binind); ret = tcache_alloc_easy(tbin); if (unlikely(ret == NULL)) { - ret = tcache_alloc_small_hard(tsd, tcache, tbin, binind); + ret = tcache_alloc_small_hard(tsd, arena, tcache, tbin, binind); if (ret == NULL) return (NULL); } @@ -302,7 +303,8 @@ tcache_alloc_small(tsd_t *tsd, tcache_t *tcache, size_t size, bool zero) } JEMALLOC_ALWAYS_INLINE void * -tcache_alloc_large(tsd_t *tsd, tcache_t *tcache, size_t size, bool zero) +tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, + bool zero) { void *ret; index_t binind; @@ -320,7 +322,7 @@ tcache_alloc_large(tsd_t *tsd, tcache_t *tcache, size_t size, bool zero) * Only allocate one large object at a time, because it's quite * expensive to create one and not use it. */ - ret = arena_malloc_large(arena_choose(tsd, NULL), usize, zero); + ret = arena_malloc_large(arena, usize, zero); if (ret == NULL) return (NULL); } else { @@ -366,8 +368,8 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, index_t binind) tbin = &tcache->tbins[binind]; tbin_info = &tcache_bin_info[binind]; if (unlikely(tbin->ncached == tbin_info->ncached_max)) { - tcache_bin_flush_small(tsd, tbin, binind, - (tbin_info->ncached_max >> 1), tcache); + tcache_bin_flush_small(tsd, tcache, tbin, binind, + (tbin_info->ncached_max >> 1)); } assert(tbin->ncached < tbin_info->ncached_max); tbin->avail[tbin->ncached] = ptr; diff --git a/src/tcache.c b/src/tcache.c index 10c85dd3..318e0dc8 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -41,8 +41,9 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) * Flush (ceiling) 3/4 of the objects below the low water mark. */ if (binind < NBINS) { - tcache_bin_flush_small(tsd, tbin, binind, tbin->ncached - - tbin->low_water + (tbin->low_water >> 2), tcache); + tcache_bin_flush_small(tsd, tcache, tbin, binind, + tbin->ncached - tbin->low_water + (tbin->low_water + >> 2)); } else { tcache_bin_flush_large(tsd, tbin, binind, tbin->ncached - tbin->low_water + (tbin->low_water >> 2), tcache); @@ -70,13 +71,13 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) } void * -tcache_alloc_small_hard(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin, - index_t binind) +tcache_alloc_small_hard(tsd_t *tsd, arena_t *arena, tcache_t *tcache, + tcache_bin_t *tbin, index_t binind) { void *ret; - arena_tcache_fill_small(arena_choose(tsd, NULL), tbin, binind, - config_prof ? tcache->prof_accumbytes : 0); + arena_tcache_fill_small(arena, tbin, binind, config_prof ? + tcache->prof_accumbytes : 0); if (config_prof) tcache->prof_accumbytes = 0; ret = tcache_alloc_easy(tbin); @@ -85,8 +86,8 @@ tcache_alloc_small_hard(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin, } void -tcache_bin_flush_small(tsd_t *tsd, tcache_bin_t *tbin, index_t binind, - unsigned rem, tcache_t *tcache) +tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin, + index_t binind, unsigned rem) { arena_t *arena; void *ptr; @@ -350,7 +351,7 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache) for (i = 0; i < NBINS; i++) { tcache_bin_t *tbin = &tcache->tbins[i]; - tcache_bin_flush_small(tsd, tbin, i, 0, tcache); + tcache_bin_flush_small(tsd, tcache, tbin, i, 0); if (config_stats && tbin->tstats.nrequests != 0) { arena_bin_t *bin = &arena->bins[i]; From b01186cebd9828e91a488d86980544bacb01e1a6 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sun, 15 Feb 2015 14:04:55 -0800 Subject: [PATCH 0647/3378] Remove redundant tcache_boot() call. --- src/jemalloc.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/jemalloc.c b/src/jemalloc.c index 3903209b..d5110092 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1193,8 +1193,6 @@ malloc_init_hard_a0_locked(void) arena_boot(); if (config_tcache && tcache_boot()) return (true); - if (config_tcache && tcache_boot()) - malloc_mutex_unlock(&init_lock); if (malloc_mutex_init(&arenas_lock)) return (true); /* From 2195ba4e1f8f262b7e6586106d90f4dc0aea7630 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sun, 15 Feb 2015 16:43:52 -0800 Subject: [PATCH 0648/3378] Normalize *_link and link_* fields to all be *_link. --- include/jemalloc/internal/extent.h | 6 +++--- src/arena.c | 8 ++++---- src/extent.c | 5 ++--- src/huge.c | 6 +++--- 4 files changed, 12 insertions(+), 13 deletions(-) diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h index fbcdcf99..885f475b 100644 --- a/include/jemalloc/internal/extent.h +++ b/include/jemalloc/internal/extent.h @@ -34,14 +34,14 @@ struct extent_node_s { union { /* Linkage for the size/address-ordered tree. */ - rb_node(extent_node_t) link_szad; + rb_node(extent_node_t) szad_link; /* Linkage for huge allocations and cached chunks nodes. */ - ql_elm(extent_node_t) link_ql; + ql_elm(extent_node_t) ql_link; }; /* Linkage for the address-ordered tree. */ - rb_node(extent_node_t) link_ad; + rb_node(extent_node_t) ad_link; }; typedef rb_tree(extent_node_t) extent_tree_t; diff --git a/src/arena.c b/src/arena.c index 7b441bed..ce500f41 100644 --- a/src/arena.c +++ b/src/arena.c @@ -607,12 +607,12 @@ arena_node_alloc(arena_t *arena) extent_node_t *node; malloc_mutex_lock(&arena->node_cache_mtx); - node = ql_last(&arena->node_cache, link_ql); + node = ql_last(&arena->node_cache, ql_link); if (node == NULL) { malloc_mutex_unlock(&arena->node_cache_mtx); return (base_alloc(sizeof(extent_node_t))); } - ql_tail_remove(&arena->node_cache, extent_node_t, link_ql); + ql_tail_remove(&arena->node_cache, extent_node_t, ql_link); malloc_mutex_unlock(&arena->node_cache_mtx); return (node); } @@ -622,8 +622,8 @@ arena_node_dalloc(arena_t *arena, extent_node_t *node) { malloc_mutex_lock(&arena->node_cache_mtx); - ql_elm_new(node, link_ql); - ql_tail_insert(&arena->node_cache, node, link_ql); + ql_elm_new(node, ql_link); + ql_tail_insert(&arena->node_cache, node, ql_link); malloc_mutex_unlock(&arena->node_cache_mtx); } diff --git a/src/extent.c b/src/extent.c index ca852016..60e24683 100644 --- a/src/extent.c +++ b/src/extent.c @@ -22,7 +22,7 @@ extent_szad_comp(extent_node_t *a, extent_node_t *b) } /* Generate red-black tree functions. */ -rb_gen(, extent_tree_szad_, extent_tree_t, extent_node_t, link_szad, +rb_gen(, extent_tree_szad_, extent_tree_t, extent_node_t, szad_link, extent_szad_comp) JEMALLOC_INLINE_C int @@ -35,5 +35,4 @@ extent_ad_comp(extent_node_t *a, extent_node_t *b) } /* Generate red-black tree functions. */ -rb_gen(, extent_tree_ad_, extent_tree_t, extent_node_t, link_ad, - extent_ad_comp) +rb_gen(, extent_tree_ad_, extent_tree_t, extent_node_t, ad_link, extent_ad_comp) diff --git a/src/huge.c b/src/huge.c index 00327277..bc7d99cb 100644 --- a/src/huge.c +++ b/src/huge.c @@ -86,8 +86,8 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, /* Insert node into huge. */ malloc_mutex_lock(&arena->huge_mtx); - ql_elm_new(node, link_ql); - ql_tail_insert(&arena->huge, node, link_ql); + ql_elm_new(node, ql_link); + ql_tail_insert(&arena->huge, node, ql_link); malloc_mutex_unlock(&arena->huge_mtx); if (zero || (config_fill && unlikely(opt_zero))) { @@ -361,7 +361,7 @@ huge_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache) arena = node->arena; huge_node_unset(ptr, node); malloc_mutex_lock(&arena->huge_mtx); - ql_remove(&arena->huge, node, link_ql); + ql_remove(&arena->huge, node, ql_link); malloc_mutex_unlock(&arena->huge_mtx); huge_dalloc_junk(node->addr, node->size); From 02e5dcf39d4995d2f37d0b18aa8511973938ac51 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sun, 15 Feb 2015 20:12:06 -0800 Subject: [PATCH 0649/3378] Fix --enable-debug regression. Fix --enable-debug to actually enable debug mode. This regression was introduced by cbf3a6d70371d2390b8b0e76814e04cc6088002c (Move centralized chunk management into arenas.). --- configure.ac | 3 +++ 1 file changed, 3 insertions(+) diff --git a/configure.ac b/configure.ac index 240d27af..7a694a20 100644 --- a/configure.ac +++ b/configure.ac @@ -634,6 +634,9 @@ fi ], [enable_debug="0"] ) +if test "x$enable_debug" = "x1" ; then + AC_DEFINE([JEMALLOC_DEBUG], [ ]) +fi AC_SUBST([enable_debug]) dnl Only optimize if not debugging. From cb9b44914e7e25c6b08af7124d7f8f976e059555 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sun, 15 Feb 2015 20:13:28 -0800 Subject: [PATCH 0650/3378] Remove obsolete (incorrect) assertions. This regression was introduced by 88fef7ceda6269598cef0cee8b984c8765673c27 (Refactor huge_*() calls into arena internals.), and went undetected because of the --enable-debug regression. --- include/jemalloc/internal/arena.h | 2 -- test/integration/mallocx.c | 45 ++++++++++++++++--------------- 2 files changed, 24 insertions(+), 23 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index b195daf0..232e9a61 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -983,7 +983,6 @@ arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, { assert(size != 0); - assert(size <= arena_maxclass); arena = arena_choose(tsd, arena); if (unlikely(arena == NULL)) @@ -1031,7 +1030,6 @@ arena_salloc(const void *ptr, bool demote) index_t binind; assert(ptr != NULL); - assert(CHUNK_ADDR2BASE(ptr) != ptr); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (likely(chunk != ptr)) { diff --git a/test/integration/mallocx.c b/test/integration/mallocx.c index 123e041f..23129c20 100644 --- a/test/integration/mallocx.c +++ b/test/integration/mallocx.c @@ -2,34 +2,37 @@ #define CHUNK 0x400000 #define MAXALIGN (((size_t)1) << 25) +#define MAXSZ (((size_t)1) << 26) #define NITER 4 TEST_BEGIN(test_basic) { - size_t nsz, rsz, sz; - void *p; + size_t sz; - sz = 42; - nsz = nallocx(sz, 0); - assert_zu_ne(nsz, 0, "Unexpected nallocx() error"); - p = mallocx(sz, 0); - assert_ptr_not_null(p, "Unexpected mallocx() error"); - rsz = sallocx(p, 0); - assert_zu_ge(rsz, sz, "Real size smaller than expected"); - assert_zu_eq(nsz, rsz, "nallocx()/sallocx() size mismatch"); - dallocx(p, 0); + for (sz = 1; sz < MAXSZ; sz = nallocx(sz, 0) + 1) { + size_t nsz, rsz; + void *p; + nsz = nallocx(sz, 0); + assert_zu_ne(nsz, 0, "Unexpected nallocx() error"); + p = mallocx(sz, 0); + assert_ptr_not_null(p, "Unexpected mallocx() error"); + rsz = sallocx(p, 0); + assert_zu_ge(rsz, sz, "Real size smaller than expected"); + assert_zu_eq(nsz, rsz, "nallocx()/sallocx() size mismatch"); + dallocx(p, 0); - p = mallocx(sz, 0); - assert_ptr_not_null(p, "Unexpected mallocx() error"); - dallocx(p, 0); + p = mallocx(sz, 0); + assert_ptr_not_null(p, "Unexpected mallocx() error"); + dallocx(p, 0); - nsz = nallocx(sz, MALLOCX_ZERO); - assert_zu_ne(nsz, 0, "Unexpected nallocx() error"); - p = mallocx(sz, MALLOCX_ZERO); - assert_ptr_not_null(p, "Unexpected mallocx() error"); - rsz = sallocx(p, 0); - assert_zu_eq(nsz, rsz, "nallocx()/sallocx() rsize mismatch"); - dallocx(p, 0); + nsz = nallocx(sz, MALLOCX_ZERO); + assert_zu_ne(nsz, 0, "Unexpected nallocx() error"); + p = mallocx(sz, MALLOCX_ZERO); + assert_ptr_not_null(p, "Unexpected mallocx() error"); + rsz = sallocx(p, 0); + assert_zu_eq(nsz, rsz, "nallocx()/sallocx() rsize mismatch"); + dallocx(p, 0); + } } TEST_END From 40ab8f98e42fda3816e2a993f136ec4770c202c7 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sun, 15 Feb 2015 20:26:45 -0800 Subject: [PATCH 0651/3378] Remove more obsolete (incorrect) assertions. This regression was introduced by 88fef7ceda6269598cef0cee8b984c8765673c27 (Refactor huge_*() calls into arena internals.), and went undetected because of the --enable-debug regression. --- include/jemalloc/internal/arena.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 232e9a61..6341a867 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -934,7 +934,6 @@ arena_prof_tctx_get(const void *ptr) cassert(config_prof); assert(ptr != NULL); - assert(CHUNK_ADDR2BASE(ptr) != ptr); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (likely(chunk != ptr)) { @@ -961,7 +960,6 @@ arena_prof_tctx_set(const void *ptr, prof_tctx_t *tctx) cassert(config_prof); assert(ptr != NULL); - assert(CHUNK_ADDR2BASE(ptr) != ptr); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (likely(chunk != ptr)) { From ee41ad409a43d12900a5a3108f6c14f84e4eb0eb Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sun, 15 Feb 2015 18:04:46 -0800 Subject: [PATCH 0652/3378] Integrate whole chunks into unused dirty page purging machinery. Extend per arena unused dirty page purging to manage unused dirty chunks in aaddtion to unused dirty runs. Rather than immediately unmapping deallocated chunks (or purging them in the --disable-munmap case), store them in a separate set of trees, chunks_[sz]ad_dirty. Preferrentially allocate dirty chunks. When excessive unused dirty pages accumulate, purge runs and chunks in ingegrated LRU order (and unmap chunks in the --enable-munmap case). Refactor extent_node_t to provide accessor functions. --- include/jemalloc/internal/arena.h | 64 ++- include/jemalloc/internal/chunk.h | 4 +- include/jemalloc/internal/extent.h | 135 +++++- .../jemalloc/internal/jemalloc_internal.h.in | 10 +- include/jemalloc/internal/private_symbols.txt | 15 + src/arena.c | 399 +++++++++++++----- src/base.c | 16 +- src/chunk.c | 146 ++++--- src/chunk_dss.c | 8 +- src/extent.c | 12 +- src/huge.c | 61 +-- src/tcache.c | 9 +- 12 files changed, 631 insertions(+), 248 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 6341a867..f967be3a 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -35,6 +35,7 @@ typedef struct arena_s arena_t; /******************************************************************************/ #ifdef JEMALLOC_H_STRUCTS +#ifdef JEMALLOC_ARENA_STRUCTS_A struct arena_run_s { /* Index of bin this run is associated with. */ index_t binind; @@ -136,7 +137,7 @@ struct arena_chunk_map_misc_s { union { /* Linkage for list of dirty runs. */ - ql_elm(arena_chunk_map_misc_t) dr_link; + qr(arena_chunk_map_misc_t) rd_link; /* Profile counters, used for large object runs. */ prof_tctx_t *prof_tctx; @@ -147,14 +148,16 @@ struct arena_chunk_map_misc_s { }; typedef rb_tree(arena_chunk_map_misc_t) arena_avail_tree_t; typedef rb_tree(arena_chunk_map_misc_t) arena_run_tree_t; -typedef ql_head(arena_chunk_map_misc_t) arena_chunk_miscelms_t; +typedef qr(arena_chunk_map_misc_t) arena_chunk_miscelms_t; +#endif /* JEMALLOC_ARENA_STRUCTS_A */ +#ifdef JEMALLOC_ARENA_STRUCTS_B /* Arena chunk header. */ struct arena_chunk_s { /* - * The arena that owns the chunk is node.arena. This field as a whole - * is used by chunks_rtree to support both ivsalloc() and core-based - * debugging. + * A pointer to the arena that owns the chunk is stored within the node. + * This field as a whole is used by chunks_rtree to support both + * ivsalloc() and core-based debugging. */ extent_node_t node; @@ -309,13 +312,29 @@ struct arena_s { size_t ndirty; /* - * Size/address-ordered trees of this arena's available runs. The trees - * are used for first-best-fit run allocation. + * Size/address-ordered tree of this arena's available runs. The tree + * is used for first-best-fit run allocation. */ arena_avail_tree_t runs_avail; - /* List of dirty runs this arena manages. */ - arena_chunk_miscelms_t runs_dirty; + /* + * Unused dirty memory this arena manages. Dirty memory is conceptually + * tracked as an arbitrarily interleaved LRU of runs and chunks, but the + * list linkage is actually semi-duplicated in order to avoid extra + * arena_chunk_map_misc_t space overhead. + * + * LRU-----------------------------------------------------------MRU + * + * ______________ ___ ___ + * ...-->|chunks_dirty|<--------->|c|<-------------------->|c|<--... + * -------------- |h| |h| + * ____________ _____ |u| _____ _____ |u| + * ...-->|runs_dirty|<-->|run|<-->|n|<-->|run|<-->|run|<-->|n|<--... + * ------------ ----- |k| ----- ----- |k| + * --- --- + */ + arena_chunk_map_misc_t runs_dirty; + extent_node_t chunks_dirty; /* Extant huge allocations. */ ql_head(extent_node_t) huge; @@ -329,6 +348,8 @@ struct arena_s { * orderings are needed, which is why there are two trees with the same * contents. */ + extent_tree_t chunks_szad_dirty; + extent_tree_t chunks_ad_dirty; extent_tree_t chunks_szad_mmap; extent_tree_t chunks_ad_mmap; extent_tree_t chunks_szad_dss; @@ -347,6 +368,7 @@ struct arena_s { /* bins is used to store trees of free regions. */ arena_bin_t bins[NBINS]; }; +#endif /* JEMALLOC_ARENA_STRUCTS_B */ #endif /* JEMALLOC_H_STRUCTS */ /******************************************************************************/ @@ -363,6 +385,10 @@ extern size_t arena_maxclass; /* Max size class for arenas. */ extern unsigned nlclasses; /* Number of large size classes. */ extern unsigned nhclasses; /* Number of huge size classes. */ +void arena_chunk_dirty_maybe_insert(arena_t *arena, extent_node_t *node, + bool dirty); +void arena_chunk_dirty_maybe_remove(arena_t *arena, extent_node_t *node, + bool dirty); extent_node_t *arena_node_alloc(arena_t *arena); void arena_node_dalloc(arena_t *arena, extent_node_t *node); void *arena_chunk_alloc_huge(arena_t *arena, size_t usize, size_t alignment, @@ -818,7 +844,7 @@ arena_ptr_small_binind_get(const void *ptr, size_t mapbits) assert(binind != BININD_INVALID); assert(binind < NBINS); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - arena = chunk->node.arena; + arena = extent_node_arena_get(&chunk->node); pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; actual_mapbits = arena_mapbits_get(chunk, pageind); assert(mapbits == actual_mapbits); @@ -1013,7 +1039,7 @@ arena_aalloc(const void *ptr) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (likely(chunk != ptr)) - return (chunk->node.arena); + return (extent_node_arena_get(&chunk->node)); else return (huge_aalloc(ptr)); } @@ -1085,8 +1111,8 @@ arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache) mapbits); tcache_dalloc_small(tsd, tcache, ptr, binind); } else { - arena_dalloc_small(chunk->node.arena, chunk, - ptr, pageind); + arena_dalloc_small(extent_node_arena_get( + &chunk->node), chunk, ptr, pageind); } } else { size_t size = arena_mapbits_large_size_get(chunk, @@ -1097,8 +1123,8 @@ arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache) if (likely(tcache != NULL) && size <= tcache_maxclass) tcache_dalloc_large(tsd, tcache, ptr, size); else { - arena_dalloc_large(chunk->node.arena, chunk, - ptr); + arena_dalloc_large(extent_node_arena_get( + &chunk->node), chunk, ptr); } } } else @@ -1136,8 +1162,8 @@ arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache) } else { size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - arena_dalloc_small(chunk->node.arena, chunk, - ptr, pageind); + arena_dalloc_small(extent_node_arena_get( + &chunk->node), chunk, ptr, pageind); } } else { assert(((uintptr_t)ptr & PAGE_MASK) == 0); @@ -1145,8 +1171,8 @@ arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache) if (likely(tcache != NULL) && size <= tcache_maxclass) tcache_dalloc_large(tsd, tcache, ptr, size); else { - arena_dalloc_large(chunk->node.arena, chunk, - ptr); + arena_dalloc_large(extent_node_arena_get( + &chunk->node), chunk, ptr); } } } else diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h index 5e0fb144..96b9e159 100644 --- a/include/jemalloc/internal/chunk.h +++ b/include/jemalloc/internal/chunk.h @@ -44,8 +44,10 @@ void *chunk_alloc_arena(chunk_alloc_t *chunk_alloc, size_t size, size_t alignment, bool *zero); void *chunk_alloc_default(void *new_addr, size_t size, size_t alignment, bool *zero, unsigned arena_ind); -void chunk_unmap(arena_t *arena, void *chunk, size_t size); +void chunk_record(arena_t *arena, extent_tree_t *chunks_szad, + extent_tree_t *chunks_ad, bool dirty, void *chunk, size_t size); bool chunk_dalloc_default(void *chunk, size_t size, unsigned arena_ind); +void chunk_unmap(arena_t *arena, void *chunk, size_t size); bool chunk_boot(void); void chunk_prefork(void); void chunk_postfork_parent(void); diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h index 885f475b..10607614 100644 --- a/include/jemalloc/internal/extent.h +++ b/include/jemalloc/internal/extent.h @@ -7,36 +7,48 @@ typedef struct extent_node_s extent_node_t; /******************************************************************************/ #ifdef JEMALLOC_H_STRUCTS -/* Tree of extents. */ +/* Tree of extents. Use accessor functions for en_* fields. */ struct extent_node_s { /* Arena from which this extent came, if any. */ - arena_t *arena; + arena_t *en_arena; /* Pointer to the extent that this tree node is responsible for. */ - void *addr; + void *en_addr; + + /* Total region size. */ + size_t en_size; /* - * Total region size, or 0 if this node corresponds to an arena chunk. + * The zeroed flag is used by chunk recycling code to track whether + * memory is zero-filled. */ - size_t size; + bool en_zeroed; /* - * 'prof_tctx' and 'zeroed' are never needed at the same time, so - * overlay them in order to fit extent_node_t in one cache line. + * The achunk flag is used to validate that huge allocation lookups + * don't return arena chunks. */ + bool en_achunk; + union { /* Profile counters, used for huge objects. */ - prof_tctx_t *prof_tctx; + prof_tctx_t *en_prof_tctx; - /* True if zero-filled; used by chunk recycling code. */ - bool zeroed; + struct { + /* + * Linkage for arena's runs_dirty and chunks_dirty + * rings. + */ + qr(extent_node_t) cd_link; + arena_chunk_map_misc_t runs_dirty; + }; }; union { /* Linkage for the size/address-ordered tree. */ rb_node(extent_node_t) szad_link; - /* Linkage for huge allocations and cached chunks nodes. */ + /* Linkage for arena's huge and node_cache lists. */ ql_elm(extent_node_t) ql_link; }; @@ -57,6 +69,107 @@ rb_proto(, extent_tree_ad_, extent_tree_t, extent_node_t) /******************************************************************************/ #ifdef JEMALLOC_H_INLINES +#ifndef JEMALLOC_ENABLE_INLINE +arena_t *extent_node_arena_get(const extent_node_t *node); +void *extent_node_addr_get(const extent_node_t *node); +size_t extent_node_size_get(const extent_node_t *node); +bool extent_node_zeroed_get(const extent_node_t *node); +bool extent_node_achunk_get(const extent_node_t *node); +prof_tctx_t *extent_node_prof_tctx_get(const extent_node_t *node); +void extent_node_arena_set(extent_node_t *node, arena_t *arena); +void extent_node_addr_set(extent_node_t *node, void *addr); +void extent_node_size_set(extent_node_t *node, size_t size); +void extent_node_zeroed_set(extent_node_t *node, bool zeroed); +void extent_node_achunk_set(extent_node_t *node, bool achunk); +void extent_node_prof_tctx_set(extent_node_t *node, prof_tctx_t *tctx); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_EXTENT_C_)) +JEMALLOC_INLINE arena_t * +extent_node_arena_get(const extent_node_t *node) +{ + + return (node->en_arena); +} + +JEMALLOC_INLINE void * +extent_node_addr_get(const extent_node_t *node) +{ + + return (node->en_addr); +} + +JEMALLOC_INLINE size_t +extent_node_size_get(const extent_node_t *node) +{ + + return (node->en_size); +} + +JEMALLOC_INLINE bool +extent_node_zeroed_get(const extent_node_t *node) +{ + + return (node->en_zeroed); +} + +JEMALLOC_INLINE bool +extent_node_achunk_get(const extent_node_t *node) +{ + + return (node->en_achunk); +} + +JEMALLOC_INLINE prof_tctx_t * +extent_node_prof_tctx_get(const extent_node_t *node) +{ + + return (node->en_prof_tctx); +} + +JEMALLOC_INLINE void +extent_node_arena_set(extent_node_t *node, arena_t *arena) +{ + + node->en_arena = arena; +} + +JEMALLOC_INLINE void +extent_node_addr_set(extent_node_t *node, void *addr) +{ + + node->en_addr = addr; +} + +JEMALLOC_INLINE void +extent_node_size_set(extent_node_t *node, size_t size) +{ + + node->en_size = size; +} + +JEMALLOC_INLINE void +extent_node_zeroed_set(extent_node_t *node, bool zeroed) +{ + + node->en_zeroed = zeroed; +} + +JEMALLOC_INLINE void +extent_node_achunk_set(extent_node_t *node, bool achunk) +{ + + node->en_achunk = achunk; +} + +JEMALLOC_INLINE void +extent_node_prof_tctx_set(extent_node_t *node, prof_tctx_t *tctx) +{ + + node->en_prof_tctx = tctx; +} +#endif + #endif /* JEMALLOC_H_INLINES */ /******************************************************************************/ diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 43276c6c..8ed69ce2 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -368,8 +368,13 @@ typedef unsigned index_t; #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/mb.h" #include "jemalloc/internal/bitmap.h" -#include "jemalloc/internal/extent.h" +#define JEMALLOC_ARENA_STRUCTS_A #include "jemalloc/internal/arena.h" +#undef JEMALLOC_ARENA_STRUCTS_A +#include "jemalloc/internal/extent.h" +#define JEMALLOC_ARENA_STRUCTS_B +#include "jemalloc/internal/arena.h" +#undef JEMALLOC_ARENA_STRUCTS_B #include "jemalloc/internal/base.h" #include "jemalloc/internal/rtree.h" #include "jemalloc/internal/chunk.h" @@ -933,7 +938,8 @@ ivsalloc(const void *ptr, bool demote) if (node == NULL) return (0); /* Only arena chunks should be looked up via interior pointers. */ - assert(node->addr == ptr || node->size == 0); + assert(extent_node_addr_get(node) == ptr || + extent_node_achunk_get(node)); return (isalloc(ptr, demote)); } diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index d5601a68..a1d12cf6 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -13,6 +13,8 @@ arena_choose arena_choose_hard arena_chunk_alloc_huge arena_chunk_dalloc_huge +arena_chunk_dirty_maybe_insert +arena_chunk_dirty_maybe_remove arena_chunk_ralloc_huge_expand arena_chunk_ralloc_huge_shrink arena_chunk_ralloc_huge_similar @@ -143,6 +145,7 @@ chunk_npages chunk_postfork_child chunk_postfork_parent chunk_prefork +chunk_record chunk_register chunk_unmap chunks_rtree @@ -173,6 +176,18 @@ ctl_postfork_child ctl_postfork_parent ctl_prefork dss_prec_names +extent_node_achunk_get +extent_node_achunk_set +extent_node_addr_get +extent_node_addr_set +extent_node_arena_get +extent_node_arena_set +extent_node_prof_tctx_get +extent_node_prof_tctx_set +extent_node_size_get +extent_node_size_set +extent_node_zeroed_get +extent_node_zeroed_set extent_tree_ad_empty extent_tree_ad_first extent_tree_ad_insert diff --git a/src/arena.c b/src/arena.c index ce500f41..a7a98e22 100644 --- a/src/arena.c +++ b/src/arena.c @@ -112,34 +112,94 @@ arena_avail_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind, } static void -arena_dirty_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind, +arena_run_dirty_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind, size_t npages) { arena_chunk_map_misc_t *miscelm = arena_miscelm_get(chunk, pageind); + assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >> LG_PAGE)); assert(arena_mapbits_dirty_get(chunk, pageind) == CHUNK_MAP_DIRTY); assert(arena_mapbits_dirty_get(chunk, pageind+npages-1) == CHUNK_MAP_DIRTY); - ql_elm_new(miscelm, dr_link); - ql_tail_insert(&arena->runs_dirty, miscelm, dr_link); + + qr_new(miscelm, rd_link); + qr_meld(&arena->runs_dirty, miscelm, rd_link); arena->ndirty += npages; } static void -arena_dirty_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind, +arena_run_dirty_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind, size_t npages) { arena_chunk_map_misc_t *miscelm = arena_miscelm_get(chunk, pageind); + assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >> LG_PAGE)); assert(arena_mapbits_dirty_get(chunk, pageind) == CHUNK_MAP_DIRTY); assert(arena_mapbits_dirty_get(chunk, pageind+npages-1) == CHUNK_MAP_DIRTY); - ql_remove(&arena->runs_dirty, miscelm, dr_link); + + qr_remove(miscelm, rd_link); + assert(arena->ndirty >= npages); arena->ndirty -= npages; } +static size_t +arena_chunk_dirty_npages(const extent_node_t *node) +{ + + return (extent_node_size_get(node) >> LG_PAGE); +} + +static void +arena_chunk_dirty_node_init(extent_node_t *node) +{ + + qr_new(node, cd_link); + qr_new(&node->runs_dirty, rd_link); +} + +static void +arena_chunk_dirty_insert(arena_chunk_map_misc_t *runs_dirty, + extent_node_t *chunks_dirty, extent_node_t *node) +{ + + qr_meld(chunks_dirty, node, cd_link); + qr_meld(runs_dirty, &node->runs_dirty, rd_link); +} + +static void +arena_chunk_dirty_remove(extent_node_t *node) +{ + + qr_remove(node, cd_link); + qr_remove(&node->runs_dirty, rd_link); +} + +void +arena_chunk_dirty_maybe_insert(arena_t *arena, extent_node_t *node, bool dirty) +{ + + arena_chunk_dirty_node_init(node); + if (dirty) { + arena_chunk_dirty_insert(&arena->runs_dirty, + &arena->chunks_dirty, node); + arena->ndirty += arena_chunk_dirty_npages(node); + } +} + +void +arena_chunk_dirty_maybe_remove(arena_t *arena, extent_node_t *node, bool dirty) +{ + + if (dirty) { + arena_chunk_dirty_remove(node); + assert(arena->ndirty >= arena_chunk_dirty_npages(node)); + arena->ndirty -= arena_chunk_dirty_npages(node); + } +} + JEMALLOC_INLINE_C void * arena_run_reg_alloc(arena_run_t *run, arena_bin_info_t *bin_info) { @@ -243,7 +303,7 @@ arena_run_split_remove(arena_t *arena, arena_chunk_t *chunk, size_t run_ind, arena_avail_remove(arena, chunk, run_ind, total_pages); if (flag_dirty != 0) - arena_dirty_remove(arena, chunk, run_ind, total_pages); + arena_run_dirty_remove(arena, chunk, run_ind, total_pages); arena_cactive_update(arena, need_pages, 0); arena->nactive += need_pages; @@ -256,7 +316,7 @@ arena_run_split_remove(arena_t *arena, arena_chunk_t *chunk, size_t run_ind, arena_mapbits_unallocated_set(chunk, run_ind+total_pages-1, (rem_pages << LG_PAGE), flag_dirty); - arena_dirty_insert(arena, chunk, run_ind+need_pages, + arena_run_dirty_insert(arena, chunk, run_ind+need_pages, rem_pages); } else { arena_mapbits_unallocated_set(chunk, run_ind+need_pages, @@ -405,9 +465,10 @@ arena_chunk_alloc_internal(arena_t *arena, bool *zero) chunk = (arena_chunk_t *)chunk_alloc_arena(chunk_alloc, chunk_dalloc, arena->ind, NULL, chunksize, chunksize, zero); if (chunk != NULL) { - chunk->node.arena = arena; - chunk->node.addr = chunk; - chunk->node.size = 0; /* Indicates this is an arena chunk. */ + extent_node_arena_set(&chunk->node, arena); + extent_node_addr_set(&chunk->node, chunk); + extent_node_size_set(&chunk->node, chunksize); + extent_node_achunk_set(&chunk->node, true); if (chunk_register(chunk, &chunk->node)) { chunk_dalloc((void *)chunk, chunksize, arena->ind); chunk = NULL; @@ -516,7 +577,7 @@ arena_chunk_dalloc(arena_t *arena, arena_chunk_t *chunk) arena->spare = chunk; if (arena_mapbits_dirty_get(spare, map_bias) != 0) { - arena_dirty_remove(arena, spare, map_bias, + arena_run_dirty_remove(arena, spare, map_bias, chunk_npages-map_bias); } chunk_dalloc = arena->chunk_dalloc; @@ -899,18 +960,29 @@ static size_t arena_dirty_count(arena_t *arena) { size_t ndirty = 0; - arena_chunk_map_misc_t *miscelm; - arena_chunk_t *chunk; - size_t pageind, npages; + arena_chunk_map_misc_t *runselm; + extent_node_t *chunkselm; - ql_foreach(miscelm, &arena->runs_dirty, dr_link) { - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm); - pageind = arena_miscelm_to_pageind(miscelm); - assert(arena_mapbits_allocated_get(chunk, pageind) == 0); - assert(arena_mapbits_large_get(chunk, pageind) == 0); - assert(arena_mapbits_dirty_get(chunk, pageind) != 0); - npages = arena_mapbits_unallocated_size_get(chunk, pageind) >> - LG_PAGE; + for (runselm = qr_next(&arena->runs_dirty, rd_link), + chunkselm = qr_next(&arena->chunks_dirty, cd_link); + runselm != &arena->runs_dirty; runselm = qr_next(runselm, + rd_link)) { + size_t npages; + + if (runselm == &chunkselm->runs_dirty) { + npages = extent_node_size_get(chunkselm) >> LG_PAGE; + chunkselm = qr_next(chunkselm, cd_link); + } else { + arena_chunk_t *chunk = (arena_chunk_t + *)CHUNK_ADDR2BASE(runselm); + size_t pageind = arena_miscelm_to_pageind(runselm); + assert(arena_mapbits_allocated_get(chunk, pageind) == + 0); + assert(arena_mapbits_large_get(chunk, pageind) == 0); + assert(arena_mapbits_dirty_get(chunk, pageind) != 0); + npages = arena_mapbits_unallocated_size_get(chunk, + pageind) >> LG_PAGE; + } ndirty += npages; } @@ -939,41 +1011,94 @@ arena_compute_npurge(arena_t *arena, bool all) static size_t arena_stash_dirty(arena_t *arena, bool all, size_t npurge, - arena_chunk_miscelms_t *miscelms) + arena_chunk_map_misc_t *purge_runs_sentinel, + extent_node_t *purge_chunks_sentinel) { - arena_chunk_map_misc_t *miscelm; + arena_chunk_map_misc_t *runselm, *runselm_next; + extent_node_t *chunkselm; size_t nstashed = 0; - /* Add at least npurge pages to purge_list. */ - for (miscelm = ql_first(&arena->runs_dirty); miscelm != NULL; - miscelm = ql_first(&arena->runs_dirty)) { - arena_chunk_t *chunk = - (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm); - size_t pageind = arena_miscelm_to_pageind(miscelm); - size_t run_size = arena_mapbits_unallocated_size_get(chunk, - pageind); - size_t npages = run_size >> LG_PAGE; - arena_run_t *run = &miscelm->run; + /* Stash at least npurge pages. */ + for (runselm = qr_next(&arena->runs_dirty, rd_link), + chunkselm = qr_next(&arena->chunks_dirty, cd_link); + runselm != &arena->runs_dirty; runselm = runselm_next) { + size_t npages; + runselm_next = qr_next(runselm, rd_link); - assert(pageind + npages <= chunk_npages); - assert(arena_mapbits_dirty_get(chunk, pageind) == - arena_mapbits_dirty_get(chunk, pageind+npages-1)); + if (runselm == &chunkselm->runs_dirty) { + extent_node_t *chunkselm_next, *tnode; + void *addr; + size_t size; + bool zeroed, zero; + UNUSED void *chunk; - /* - * If purging the spare chunk's run, make it available prior to - * allocation. - */ - if (chunk == arena->spare) - arena_chunk_alloc(arena); + chunkselm_next = qr_next(chunkselm, cd_link); + /* + * Cache contents of chunkselm prior to it being + * destroyed as a side effect of allocating the chunk. + */ + addr = extent_node_addr_get(chunkselm); + size = extent_node_size_get(chunkselm); + zeroed = extent_node_zeroed_get(chunkselm); + /* Allocate. */ + zero = false; + chunk = arena->chunk_alloc(addr, size, chunksize, &zero, + arena->ind); + assert(chunk == addr); + /* + * Create a temporary node to link into the ring of + * stashed allocations. + */ + tnode = arena_node_alloc(arena); + /* + * OOM shouldn't be possible because chunk allocation + * just cached a node. + */ + assert(tnode != NULL); + extent_node_arena_set(tnode, arena); + extent_node_addr_set(tnode, addr); + extent_node_size_set(tnode, size); + extent_node_zeroed_set(tnode, zeroed); + arena_chunk_dirty_node_init(tnode); + /* Stash. */ + arena_chunk_dirty_insert(purge_runs_sentinel, + purge_chunks_sentinel, tnode); + npages = size >> LG_PAGE; + chunkselm = chunkselm_next; + } else { + arena_chunk_t *chunk = + (arena_chunk_t *)CHUNK_ADDR2BASE(runselm); + size_t pageind = arena_miscelm_to_pageind(runselm); + arena_run_t *run = &runselm->run; + size_t run_size = + arena_mapbits_unallocated_size_get(chunk, pageind); - /* Temporarily allocate the free dirty run. */ - arena_run_split_large(arena, run, run_size, false); - /* Append to purge_list for later processing. */ - ql_elm_new(miscelm, dr_link); - ql_tail_insert(miscelms, miscelm, dr_link); + npages = run_size >> LG_PAGE; + + assert(pageind + npages <= chunk_npages); + assert(arena_mapbits_dirty_get(chunk, pageind) == + arena_mapbits_dirty_get(chunk, pageind+npages-1)); + + /* + * If purging the spare chunk's run, make it available + * prior to allocation. + */ + if (chunk == arena->spare) + arena_chunk_alloc(arena); + + /* Temporarily allocate the free dirty run. */ + arena_run_split_large(arena, run, run_size, false); + /* Append to purge_runs for later processing. */ + if (false) + qr_new(runselm, rd_link); /* Redundant. */ + else { + assert(qr_next(runselm, rd_link) == runselm); + assert(qr_prev(runselm, rd_link) == runselm); + } + qr_meld(purge_runs_sentinel, runselm, rd_link); + } nstashed += npages; - if (!all && nstashed >= npurge) break; } @@ -982,52 +1107,66 @@ arena_stash_dirty(arena_t *arena, bool all, size_t npurge, } static size_t -arena_purge_stashed(arena_t *arena, arena_chunk_miscelms_t *miscelms) +arena_purge_stashed(arena_t *arena, arena_chunk_map_misc_t *purge_runs_sentinel, + extent_node_t *purge_chunks_sentinel) { size_t npurged, nmadvise; - arena_chunk_map_misc_t *miscelm; + arena_chunk_map_misc_t *runselm; + extent_node_t *chunkselm; if (config_stats) nmadvise = 0; npurged = 0; malloc_mutex_unlock(&arena->lock); + for (runselm = qr_next(purge_runs_sentinel, rd_link), + chunkselm = qr_next(purge_chunks_sentinel, cd_link); + runselm != purge_runs_sentinel; runselm = qr_next(runselm, + rd_link)) { + size_t npages; - ql_foreach(miscelm, miscelms, dr_link) { - arena_chunk_t *chunk; - size_t pageind, run_size, npages, flag_unzeroed, i; - bool unzeroed; + if (runselm == &chunkselm->runs_dirty) { + size_t size = extent_node_size_get(chunkselm); - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm); - pageind = arena_miscelm_to_pageind(miscelm); - run_size = arena_mapbits_large_size_get(chunk, pageind); - npages = run_size >> LG_PAGE; + pages_purge(extent_node_addr_get(chunkselm), size); + npages = size >> LG_PAGE; + chunkselm = qr_next(chunkselm, cd_link); + } else { + arena_chunk_t *chunk; + size_t pageind, run_size, flag_unzeroed, i; + bool unzeroed; - assert(pageind + npages <= chunk_npages); - unzeroed = pages_purge((void *)((uintptr_t)chunk + (pageind << - LG_PAGE)), run_size); - flag_unzeroed = unzeroed ? CHUNK_MAP_UNZEROED : 0; + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(runselm); + pageind = arena_miscelm_to_pageind(runselm); + run_size = arena_mapbits_large_size_get(chunk, pageind); + npages = run_size >> LG_PAGE; - /* - * Set the unzeroed flag for all pages, now that pages_purge() - * has returned whether the pages were zeroed as a side effect - * of purging. This chunk map modification is safe even though - * the arena mutex isn't currently owned by this thread, - * because the run is marked as allocated, thus protecting it - * from being modified by any other thread. As long as these - * writes don't perturb the first and last elements' - * CHUNK_MAP_ALLOCATED bits, behavior is well defined. - */ - for (i = 0; i < npages; i++) { - arena_mapbits_unzeroed_set(chunk, pageind+i, - flag_unzeroed); + assert(pageind + npages <= chunk_npages); + unzeroed = pages_purge((void *)((uintptr_t)chunk + + (pageind << LG_PAGE)), run_size); + flag_unzeroed = unzeroed ? CHUNK_MAP_UNZEROED : 0; + + /* + * Set the unzeroed flag for all pages, now that + * pages_purge() has returned whether the pages were + * zeroed as a side effect of purging. This chunk map + * modification is safe even though the arena mutex + * isn't currently owned by this thread, because the run + * is marked as allocated, thus protecting it from being + * modified by any other thread. As long as these + * writes don't perturb the first and last elements' + * CHUNK_MAP_ALLOCATED bits, behavior is well defined. + */ + for (i = 0; i < npages; i++) { + arena_mapbits_unzeroed_set(chunk, pageind+i, + flag_unzeroed); + } } npurged += npages; if (config_stats) nmadvise++; } - malloc_mutex_lock(&arena->lock); if (config_stats) { @@ -1039,16 +1178,31 @@ arena_purge_stashed(arena_t *arena, arena_chunk_miscelms_t *miscelms) } static void -arena_unstash_purged(arena_t *arena, arena_chunk_miscelms_t *miscelms) +arena_unstash_purged(arena_t *arena, + arena_chunk_map_misc_t *purge_runs_sentinel, + extent_node_t *purge_chunks_sentinel) { - arena_chunk_map_misc_t *miscelm; + arena_chunk_map_misc_t *runselm, *runselm_next; + extent_node_t *chunkselm; /* Deallocate runs. */ - for (miscelm = ql_first(miscelms); miscelm != NULL; - miscelm = ql_first(miscelms)) { - arena_run_t *run = &miscelm->run; - ql_remove(miscelms, miscelm, dr_link); - arena_run_dalloc(arena, run, false, true); + for (runselm = qr_next(purge_runs_sentinel, rd_link), + chunkselm = qr_next(purge_chunks_sentinel, cd_link); + runselm != purge_runs_sentinel; runselm = runselm_next) { + runselm_next = qr_next(runselm, rd_link); + if (runselm == &chunkselm->runs_dirty) { + extent_node_t *chunkselm_next = qr_next(chunkselm, + cd_link); + arena_chunk_dirty_remove(chunkselm); + chunk_unmap(arena, extent_node_addr_get(chunkselm), + extent_node_size_get(chunkselm)); + arena_node_dalloc(arena, chunkselm); + chunkselm = chunkselm_next; + } else { + arena_run_t *run = &runselm->run; + qr_remove(runselm, rd_link); + arena_run_dalloc(arena, run, false, true); + } } } @@ -1056,7 +1210,8 @@ void arena_purge(arena_t *arena, bool all) { size_t npurge, npurgeable, npurged; - arena_chunk_miscelms_t purge_list; + arena_chunk_map_misc_t purge_runs_sentinel; + extent_node_t purge_chunks_sentinel; /* * Calls to arena_dirty_count() are disabled even for debug builds @@ -1072,12 +1227,17 @@ arena_purge(arena_t *arena, bool all) arena->stats.npurge++; npurge = arena_compute_npurge(arena, all); - ql_new(&purge_list); - npurgeable = arena_stash_dirty(arena, all, npurge, &purge_list); + qr_new(&purge_runs_sentinel, rd_link); + arena_chunk_dirty_node_init(&purge_chunks_sentinel); + + npurgeable = arena_stash_dirty(arena, all, npurge, &purge_runs_sentinel, + &purge_chunks_sentinel); assert(npurgeable >= npurge); - npurged = arena_purge_stashed(arena, &purge_list); + npurged = arena_purge_stashed(arena, &purge_runs_sentinel, + &purge_chunks_sentinel); assert(npurged == npurgeable); - arena_unstash_purged(arena, &purge_list); + arena_unstash_purged(arena, &purge_runs_sentinel, + &purge_chunks_sentinel); } void @@ -1115,9 +1275,12 @@ arena_run_coalesce(arena_t *arena, arena_chunk_t *chunk, size_t *p_size, run_ind+run_pages+nrun_pages-1) == flag_dirty); arena_avail_remove(arena, chunk, run_ind+run_pages, nrun_pages); - /* If the successor is dirty, remove it from runs_dirty. */ + /* + * If the successor is dirty, remove it from the set of dirty + * pages. + */ if (flag_dirty != 0) { - arena_dirty_remove(arena, chunk, run_ind+run_pages, + arena_run_dirty_remove(arena, chunk, run_ind+run_pages, nrun_pages); } @@ -1148,9 +1311,14 @@ arena_run_coalesce(arena_t *arena, arena_chunk_t *chunk, size_t *p_size, assert(arena_mapbits_dirty_get(chunk, run_ind) == flag_dirty); arena_avail_remove(arena, chunk, run_ind, prun_pages); - /* If the predecessor is dirty, remove it from runs_dirty. */ - if (flag_dirty != 0) - arena_dirty_remove(arena, chunk, run_ind, prun_pages); + /* + * If the predecessor is dirty, remove it from the set of dirty + * pages. + */ + if (flag_dirty != 0) { + arena_run_dirty_remove(arena, chunk, run_ind, + prun_pages); + } size += prun_size; run_pages += prun_pages; @@ -1224,7 +1392,7 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned) arena_avail_insert(arena, chunk, run_ind, run_pages); if (dirty) - arena_dirty_insert(arena, chunk, run_ind, run_pages); + arena_run_dirty_insert(arena, chunk, run_ind, run_pages); /* Deallocate chunk if it is now completely unused. */ if (size == arena_maxrun) { @@ -1843,7 +2011,8 @@ arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run, if (run == bin->runcur) bin->runcur = NULL; else { - index_t binind = arena_bin_index(chunk->node.arena, bin); + index_t binind = arena_bin_index(extent_node_arena_get( + &chunk->node), bin); arena_bin_info_t *bin_info = &arena_bin_info[binind]; if (bin_info->nregs != 1) { @@ -2184,7 +2353,7 @@ arena_ralloc_large(void *ptr, size_t oldsize, size_t size, size_t extra, arena_t *arena; chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - arena = chunk->node.arena; + arena = extent_node_arena_get(&chunk->node); if (usize < oldsize) { /* Fill before shrinking in order avoid a race. */ @@ -2422,20 +2591,6 @@ arena_new(unsigned ind) arena->nthreads = 0; if (malloc_mutex_init(&arena->lock)) return (NULL); - arena->chunk_alloc = chunk_alloc_default; - arena->chunk_dalloc = chunk_dalloc_default; - ql_new(&arena->huge); - if (malloc_mutex_init(&arena->huge_mtx)) - return (NULL); - extent_tree_szad_new(&arena->chunks_szad_mmap); - extent_tree_ad_new(&arena->chunks_ad_mmap); - extent_tree_szad_new(&arena->chunks_szad_dss); - extent_tree_ad_new(&arena->chunks_ad_dss); - ql_new(&arena->node_cache); - if (malloc_mutex_init(&arena->chunks_mtx)) - return (NULL); - if (malloc_mutex_init(&arena->node_cache_mtx)) - return (NULL); if (config_stats) { memset(&arena->stats, 0, sizeof(arena_stats_t)); @@ -2463,7 +2618,27 @@ arena_new(unsigned ind) arena->ndirty = 0; arena_avail_tree_new(&arena->runs_avail); - ql_new(&arena->runs_dirty); + qr_new(&arena->runs_dirty, rd_link); + qr_new(&arena->chunks_dirty, cd_link); + + ql_new(&arena->huge); + if (malloc_mutex_init(&arena->huge_mtx)) + return (NULL); + + extent_tree_szad_new(&arena->chunks_szad_dirty); + extent_tree_ad_new(&arena->chunks_ad_dirty); + extent_tree_szad_new(&arena->chunks_szad_mmap); + extent_tree_ad_new(&arena->chunks_ad_mmap); + extent_tree_szad_new(&arena->chunks_szad_dss); + extent_tree_ad_new(&arena->chunks_ad_dss); + if (malloc_mutex_init(&arena->chunks_mtx)) + return (NULL); + ql_new(&arena->node_cache); + if (malloc_mutex_init(&arena->node_cache_mtx)) + return (NULL); + + arena->chunk_alloc = chunk_alloc_default; + arena->chunk_dalloc = chunk_dalloc_default; /* Initialize bins. */ for (i = 0; i < NBINS; i++) { diff --git a/src/base.c b/src/base.c index 7b5804ee..819fa025 100644 --- a/src/base.c +++ b/src/base.c @@ -60,8 +60,8 @@ base_chunk_alloc(size_t minsize) if (config_stats) base_allocated += nsize; } - node->addr = addr; - node->size = csize; + extent_node_addr_set(node, addr); + extent_node_size_set(node, csize); return (node); } @@ -84,8 +84,8 @@ base_alloc(size_t size) */ csize = CACHELINE_CEILING(size); - key.addr = NULL; - key.size = csize; + extent_node_addr_set(&key, NULL); + extent_node_size_set(&key, csize); malloc_mutex_lock(&base_mtx); node = extent_tree_szad_nsearch(&base_avail_szad, &key); if (node != NULL) { @@ -100,10 +100,10 @@ base_alloc(size_t size) goto label_return; } - ret = node->addr; - if (node->size > csize) { - node->addr = (void *)((uintptr_t)ret + csize); - node->size -= csize; + ret = extent_node_addr_get(node); + if (extent_node_size_get(node) > csize) { + extent_node_addr_set(node, (void *)((uintptr_t)ret + csize)); + extent_node_size_set(node, extent_node_size_get(node) - csize); extent_tree_szad_insert(&base_avail_szad, node); } else base_node_dalloc(node); diff --git a/src/chunk.c b/src/chunk.c index b3576195..8bc87beb 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -24,12 +24,13 @@ bool chunk_register(const void *chunk, const extent_node_t *node) { - assert(node->addr == chunk); + assert(extent_node_addr_get(node) == chunk); if (rtree_set(&chunks_rtree, (uintptr_t)chunk, node)) return (true); if (config_prof && opt_prof) { - size_t nadd = (node->size == 0) ? 1 : node->size / chunksize; + size_t size = extent_node_size_get(node); + size_t nadd = (size == 0) ? 1 : size / chunksize; size_t cur = atomic_add_z(&curchunks, nadd); size_t high = atomic_read_z(&highchunks); while (cur > high && atomic_cas_z(&highchunks, high, cur)) { @@ -54,7 +55,8 @@ chunk_deregister(const void *chunk, const extent_node_t *node) err = rtree_set(&chunks_rtree, (uintptr_t)chunk, NULL); assert(!err); if (config_prof && opt_prof) { - size_t nsub = (node->size == 0) ? 1 : node->size / chunksize; + size_t size = extent_node_size_get(node); + size_t nsub = (size == 0) ? 1 : size / chunksize; assert(atomic_read_z(&curchunks) >= nsub); atomic_sub_z(&curchunks, nsub); } @@ -62,8 +64,8 @@ chunk_deregister(const void *chunk, const extent_node_t *node) static void * chunk_recycle(arena_t *arena, extent_tree_t *chunks_szad, - extent_tree_t *chunks_ad, void *new_addr, size_t size, size_t alignment, - bool *zero) + extent_tree_t *chunks_ad, bool dirty, void *new_addr, size_t size, + size_t alignment, bool *zero) { void *ret; extent_node_t *node; @@ -77,32 +79,35 @@ chunk_recycle(arena_t *arena, extent_tree_t *chunks_szad, /* Beware size_t wrap-around. */ if (alloc_size < size) return (NULL); - key.addr = new_addr; - key.size = alloc_size; + extent_node_addr_set(&key, new_addr); + extent_node_size_set(&key, alloc_size); malloc_mutex_lock(&arena->chunks_mtx); node = (new_addr != NULL) ? extent_tree_ad_search(chunks_ad, &key) : extent_tree_szad_nsearch(chunks_szad, &key); - if (node == NULL || (new_addr != NULL && node->size < size)) { + if (node == NULL || (new_addr != NULL && extent_node_size_get(node) < + size)) { malloc_mutex_unlock(&arena->chunks_mtx); return (NULL); } - leadsize = ALIGNMENT_CEILING((uintptr_t)node->addr, alignment) - - (uintptr_t)node->addr; + leadsize = ALIGNMENT_CEILING((uintptr_t)extent_node_addr_get(node), + alignment) - (uintptr_t)extent_node_addr_get(node); assert(new_addr == NULL || leadsize == 0); - assert(node->size >= leadsize + size); - trailsize = node->size - leadsize - size; - ret = (void *)((uintptr_t)node->addr + leadsize); - zeroed = node->zeroed; + assert(extent_node_size_get(node) >= leadsize + size); + trailsize = extent_node_size_get(node) - leadsize - size; + ret = (void *)((uintptr_t)extent_node_addr_get(node) + leadsize); + zeroed = extent_node_zeroed_get(node); if (zeroed) *zero = true; /* Remove node from the tree. */ extent_tree_szad_remove(chunks_szad, node); extent_tree_ad_remove(chunks_ad, node); + arena_chunk_dirty_maybe_remove(arena, node, dirty); if (leadsize != 0) { /* Insert the leading space as a smaller chunk. */ - node->size = leadsize; + extent_node_size_set(node, leadsize); extent_tree_szad_insert(chunks_szad, node); extent_tree_ad_insert(chunks_ad, node); + arena_chunk_dirty_maybe_insert(arena, node, dirty); node = NULL; } if (trailsize != 0) { @@ -111,15 +116,17 @@ chunk_recycle(arena_t *arena, extent_tree_t *chunks_szad, node = arena_node_alloc(arena); if (node == NULL) { malloc_mutex_unlock(&arena->chunks_mtx); - chunk_unmap(arena, ret, size); + chunk_record(arena, chunks_szad, chunks_ad, + dirty, ret, size); return (NULL); } } - node->addr = (void *)((uintptr_t)(ret) + size); - node->size = trailsize; - node->zeroed = zeroed; + extent_node_addr_set(node, (void *)((uintptr_t)(ret) + size)); + extent_node_size_set(node, trailsize); + extent_node_zeroed_set(node, zeroed); extent_tree_szad_insert(chunks_szad, node); extent_tree_ad_insert(chunks_ad, node); + arena_chunk_dirty_maybe_insert(arena, node, dirty); node = NULL; } malloc_mutex_unlock(&arena->chunks_mtx); @@ -148,7 +155,8 @@ chunk_alloc_core_dss(arena_t *arena, void *new_addr, size_t size, void *ret; if ((ret = chunk_recycle(arena, &arena->chunks_szad_dss, - &arena->chunks_ad_dss, new_addr, size, alignment, zero)) != NULL) + &arena->chunks_ad_dss, false, new_addr, size, alignment, zero)) != + NULL) return (ret); ret = chunk_alloc_dss(arena, new_addr, size, alignment, zero); return (ret); @@ -171,6 +179,11 @@ chunk_alloc_core(arena_t *arena, void *new_addr, size_t size, size_t alignment, assert(alignment != 0); assert((alignment & chunksize_mask) == 0); + /* dirty. */ + if ((ret = chunk_recycle(arena, &arena->chunks_szad_dirty, + &arena->chunks_ad_dirty, true, new_addr, size, alignment, zero)) != + NULL) + return (ret); /* "primary" dss. */ if (have_dss && dss_prec == dss_prec_primary && (ret = chunk_alloc_core_dss(arena, new_addr, size, alignment, zero)) != @@ -178,8 +191,8 @@ chunk_alloc_core(arena_t *arena, void *new_addr, size_t size, size_t alignment, return (ret); /* mmap. */ if (!config_munmap && (ret = chunk_recycle(arena, - &arena->chunks_szad_mmap, &arena->chunks_ad_mmap, new_addr, size, - alignment, zero)) != NULL) + &arena->chunks_szad_mmap, &arena->chunks_ad_mmap, false, new_addr, + size, alignment, zero)) != NULL) return (ret); /* * Requesting an address is not implemented for chunk_alloc_mmap(), so @@ -263,54 +276,62 @@ chunk_alloc_default(void *new_addr, size_t size, size_t alignment, bool *zero, arena->dss_prec)); } -static void +void chunk_record(arena_t *arena, extent_tree_t *chunks_szad, - extent_tree_t *chunks_ad, void *chunk, size_t size) + extent_tree_t *chunks_ad, bool dirty, void *chunk, size_t size) { bool unzeroed; - extent_node_t *node, *prev, key; + extent_node_t *node, *prev; + extent_node_t key; - unzeroed = pages_purge(chunk, size); + unzeroed = dirty ? true : pages_purge(chunk, size); JEMALLOC_VALGRIND_MAKE_MEM_NOACCESS(chunk, size); malloc_mutex_lock(&arena->chunks_mtx); - key.addr = (void *)((uintptr_t)chunk + size); + extent_node_addr_set(&key, (void *)((uintptr_t)chunk + size)); node = extent_tree_ad_nsearch(chunks_ad, &key); /* Try to coalesce forward. */ - if (node != NULL && node->addr == key.addr) { + if (node != NULL && extent_node_addr_get(node) == + extent_node_addr_get(&key)) { /* * Coalesce chunk with the following address range. This does * not change the position within chunks_ad, so only * remove/insert from/into chunks_szad. */ extent_tree_szad_remove(chunks_szad, node); - node->addr = chunk; - node->size += size; - node->zeroed = (node->zeroed && !unzeroed); + arena_chunk_dirty_maybe_remove(arena, node, dirty); + extent_node_addr_set(node, chunk); + extent_node_size_set(node, extent_node_size_get(node) + size); + extent_node_zeroed_set(node, extent_node_zeroed_get(node) && + !unzeroed); extent_tree_szad_insert(chunks_szad, node); + arena_chunk_dirty_maybe_insert(arena, node, dirty); } else { /* Coalescing forward failed, so insert a new node. */ node = arena_node_alloc(arena); if (node == NULL) { /* * Node allocation failed, which is an exceedingly - * unlikely failure. Leak chunk; its pages have - * already been purged, so this is only a virtual - * memory leak. + * unlikely failure. Leak chunk after making sure its + * pages have already been purged, so that this is only + * a virtual memory leak. */ + if (dirty) + pages_purge(chunk, size); goto label_return; } - node->addr = chunk; - node->size = size; - node->zeroed = !unzeroed; + extent_node_addr_set(node, chunk); + extent_node_size_set(node, size); + extent_node_zeroed_set(node, !unzeroed); extent_tree_ad_insert(chunks_ad, node); extent_tree_szad_insert(chunks_szad, node); + arena_chunk_dirty_maybe_insert(arena, node, dirty); } /* Try to coalesce backward. */ prev = extent_tree_ad_prev(chunks_ad, node); - if (prev != NULL && (void *)((uintptr_t)prev->addr + prev->size) == - chunk) { + if (prev != NULL && (void *)((uintptr_t)extent_node_addr_get(prev) + + extent_node_size_get(prev)) == chunk) { /* * Coalesce chunk with the previous address range. This does * not change the position within chunks_ad, so only @@ -318,12 +339,16 @@ chunk_record(arena_t *arena, extent_tree_t *chunks_szad, */ extent_tree_szad_remove(chunks_szad, prev); extent_tree_ad_remove(chunks_ad, prev); - + arena_chunk_dirty_maybe_remove(arena, prev, dirty); extent_tree_szad_remove(chunks_szad, node); - node->addr = prev->addr; - node->size += prev->size; - node->zeroed = (node->zeroed && prev->zeroed); + arena_chunk_dirty_maybe_remove(arena, node, dirty); + extent_node_addr_set(node, extent_node_addr_get(prev)); + extent_node_size_set(node, extent_node_size_get(node) + + extent_node_size_get(prev)); + extent_node_zeroed_set(node, extent_node_zeroed_get(node) && + extent_node_zeroed_get(prev)); extent_tree_szad_insert(chunks_szad, node); + arena_chunk_dirty_maybe_insert(arena, node, dirty); arena_node_dalloc(arena, prev); } @@ -332,6 +357,28 @@ label_return: malloc_mutex_unlock(&arena->chunks_mtx); } +static void +chunk_cache(arena_t *arena, void *chunk, size_t size) +{ + + assert(chunk != NULL); + assert(CHUNK_ADDR2BASE(chunk) == chunk); + assert(size != 0); + assert((size & chunksize_mask) == 0); + + chunk_record(arena, &arena->chunks_szad_dirty, &arena->chunks_ad_dirty, + true, chunk, size); +} + +/* Default arena chunk deallocation routine in the absence of user override. */ +bool +chunk_dalloc_default(void *chunk, size_t size, unsigned arena_ind) +{ + + chunk_cache(chunk_arena_get(arena_ind), chunk, size); + return (false); +} + void chunk_unmap(arena_t *arena, void *chunk, size_t size) { @@ -343,22 +390,13 @@ chunk_unmap(arena_t *arena, void *chunk, size_t size) if (have_dss && chunk_in_dss(chunk)) { chunk_record(arena, &arena->chunks_szad_dss, - &arena->chunks_ad_dss, chunk, size); + &arena->chunks_ad_dss, false, chunk, size); } else if (chunk_dalloc_mmap(chunk, size)) { chunk_record(arena, &arena->chunks_szad_mmap, - &arena->chunks_ad_mmap, chunk, size); + &arena->chunks_ad_mmap, false, chunk, size); } } -/* Default arena chunk deallocation routine in the absence of user override. */ -bool -chunk_dalloc_default(void *chunk, size_t size, unsigned arena_ind) -{ - - chunk_unmap(chunk_arena_get(arena_ind), chunk, size); - return (false); -} - static rtree_node_elm_t * chunks_rtree_node_alloc(size_t nelms) { diff --git a/src/chunk_dss.c b/src/chunk_dss.c index 9c3eea82..c3c48481 100644 --- a/src/chunk_dss.c +++ b/src/chunk_dss.c @@ -133,8 +133,12 @@ chunk_alloc_dss(arena_t *arena, void *new_addr, size_t size, size_t alignment, /* Success. */ dss_max = dss_next; malloc_mutex_unlock(&dss_mtx); - if (cpad_size != 0) - chunk_unmap(arena, cpad, cpad_size); + if (cpad_size != 0) { + chunk_record(arena, + &arena->chunks_szad_dss, + &arena->chunks_ad_dss, false, cpad, + cpad_size); + } if (*zero) { JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED( ret, size); diff --git a/src/extent.c b/src/extent.c index 60e24683..f98e77e2 100644 --- a/src/extent.c +++ b/src/extent.c @@ -7,13 +7,13 @@ JEMALLOC_INLINE_C int extent_szad_comp(extent_node_t *a, extent_node_t *b) { int ret; - size_t a_size = a->size; - size_t b_size = b->size; + size_t a_size = extent_node_size_get(a); + size_t b_size = extent_node_size_get(b); ret = (a_size > b_size) - (a_size < b_size); if (ret == 0) { - uintptr_t a_addr = (uintptr_t)a->addr; - uintptr_t b_addr = (uintptr_t)b->addr; + uintptr_t a_addr = (uintptr_t)extent_node_addr_get(a); + uintptr_t b_addr = (uintptr_t)extent_node_addr_get(b); ret = (a_addr > b_addr) - (a_addr < b_addr); } @@ -28,8 +28,8 @@ rb_gen(, extent_tree_szad_, extent_tree_t, extent_node_t, szad_link, JEMALLOC_INLINE_C int extent_ad_comp(extent_node_t *a, extent_node_t *b) { - uintptr_t a_addr = (uintptr_t)a->addr; - uintptr_t b_addr = (uintptr_t)b->addr; + uintptr_t a_addr = (uintptr_t)extent_node_addr_get(a); + uintptr_t b_addr = (uintptr_t)extent_node_addr_get(b); return ((a_addr > b_addr) - (a_addr < b_addr)); } diff --git a/src/huge.c b/src/huge.c index bc7d99cb..b9cae001 100644 --- a/src/huge.c +++ b/src/huge.c @@ -9,7 +9,7 @@ huge_node_get(const void *ptr) extent_node_t *node; node = chunk_lookup(ptr); - assert(node->size != 0); + assert(!extent_node_achunk_get(node)); return (node); } @@ -18,8 +18,8 @@ static bool huge_node_set(const void *ptr, extent_node_t *node) { - assert(node->addr == ptr); - assert(node->size != 0); + assert(extent_node_addr_get(node) == ptr); + assert(!extent_node_achunk_get(node)); return (chunk_register(ptr, node)); } @@ -73,10 +73,11 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, return (NULL); } - node->addr = ret; - node->size = usize; - node->zeroed = is_zeroed; - node->arena = arena; + extent_node_arena_set(node, arena); + extent_node_addr_set(node, ret); + extent_node_size_set(node, usize); + extent_node_achunk_set(node, false); + extent_node_zeroed_set(node, is_zeroed); if (huge_node_set(ret, node)) { arena_chunk_dalloc_huge(arena, ret, usize); @@ -152,13 +153,13 @@ huge_ralloc_no_move_similar(void *ptr, size_t oldsize, size_t usize, zeroed = true; node = huge_node_get(ptr); - arena = node->arena; + arena = extent_node_arena_get(node); malloc_mutex_lock(&arena->huge_mtx); /* Update the size of the huge allocation. */ - assert(node->size != usize); - node->size = usize; - /* Clear node->zeroed if zeroing failed above. */ - node->zeroed = (node->zeroed && zeroed); + assert(extent_node_size_get(node) != usize); + extent_node_size_set(node, usize); + /* Clear node's zeroed field if zeroing failed above. */ + extent_node_zeroed_set(node, extent_node_zeroed_get(node) && zeroed); malloc_mutex_unlock(&arena->huge_mtx); arena_chunk_ralloc_huge_similar(arena, ptr, oldsize, usize); @@ -195,12 +196,12 @@ huge_ralloc_no_move_shrink(void *ptr, size_t oldsize, size_t usize) } node = huge_node_get(ptr); - arena = node->arena; + arena = extent_node_arena_get(node); malloc_mutex_lock(&arena->huge_mtx); /* Update the size of the huge allocation. */ - node->size = usize; - /* Clear node->zeroed if zeroing failed above. */ - node->zeroed = (node->zeroed && zeroed); + extent_node_size_set(node, usize); + /* Clear node's zeroed field if zeroing failed above. */ + extent_node_zeroed_set(node, extent_node_zeroed_get(node) && zeroed); malloc_mutex_unlock(&arena->huge_mtx); /* Zap the excess chunks. */ @@ -221,9 +222,9 @@ huge_ralloc_no_move_expand(void *ptr, size_t oldsize, size_t size, bool zero) { } node = huge_node_get(ptr); - arena = node->arena; + arena = extent_node_arena_get(node); malloc_mutex_lock(&arena->huge_mtx); - is_zeroed_subchunk = node->zeroed; + is_zeroed_subchunk = extent_node_zeroed_get(node); malloc_mutex_unlock(&arena->huge_mtx); /* @@ -238,7 +239,7 @@ huge_ralloc_no_move_expand(void *ptr, size_t oldsize, size_t size, bool zero) { malloc_mutex_lock(&arena->huge_mtx); /* Update the size of the huge allocation. */ - node->size = usize; + extent_node_size_set(node, usize); malloc_mutex_unlock(&arena->huge_mtx); if (zero || (config_fill && unlikely(opt_zero))) { @@ -358,14 +359,16 @@ huge_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache) arena_t *arena; node = huge_node_get(ptr); - arena = node->arena; + arena = extent_node_arena_get(node); huge_node_unset(ptr, node); malloc_mutex_lock(&arena->huge_mtx); ql_remove(&arena->huge, node, ql_link); malloc_mutex_unlock(&arena->huge_mtx); - huge_dalloc_junk(node->addr, node->size); - arena_chunk_dalloc_huge(node->arena, node->addr, node->size); + huge_dalloc_junk(extent_node_addr_get(node), + extent_node_size_get(node)); + arena_chunk_dalloc_huge(extent_node_arena_get(node), + extent_node_addr_get(node), extent_node_size_get(node)); idalloctm(tsd, node, tcache, true); } @@ -373,7 +376,7 @@ arena_t * huge_aalloc(const void *ptr) { - return (huge_node_get(ptr)->arena); + return (extent_node_arena_get(huge_node_get(ptr))); } size_t @@ -384,9 +387,9 @@ huge_salloc(const void *ptr) arena_t *arena; node = huge_node_get(ptr); - arena = node->arena; + arena = extent_node_arena_get(node); malloc_mutex_lock(&arena->huge_mtx); - size = node->size; + size = extent_node_size_get(node); malloc_mutex_unlock(&arena->huge_mtx); return (size); @@ -400,9 +403,9 @@ huge_prof_tctx_get(const void *ptr) arena_t *arena; node = huge_node_get(ptr); - arena = node->arena; + arena = extent_node_arena_get(node); malloc_mutex_lock(&arena->huge_mtx); - tctx = node->prof_tctx; + tctx = extent_node_prof_tctx_get(node); malloc_mutex_unlock(&arena->huge_mtx); return (tctx); @@ -415,8 +418,8 @@ huge_prof_tctx_set(const void *ptr, prof_tctx_t *tctx) arena_t *arena; node = huge_node_get(ptr); - arena = node->arena; + arena = extent_node_arena_get(node); malloc_mutex_lock(&arena->huge_mtx); - node->prof_tctx = tctx; + extent_node_prof_tctx_set(node, tctx); malloc_mutex_unlock(&arena->huge_mtx); } diff --git a/src/tcache.c b/src/tcache.c index 318e0dc8..8d0a6fa8 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -103,7 +103,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin, /* Lock the arena bin associated with the first object. */ arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE( tbin->avail[0]); - arena_t *bin_arena = chunk->node.arena; + arena_t *bin_arena = extent_node_arena_get(&chunk->node); arena_bin_t *bin = &bin_arena->bins[binind]; if (config_prof && bin_arena == arena) { @@ -125,7 +125,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin, ptr = tbin->avail[i]; assert(ptr != NULL); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (chunk->node.arena == bin_arena) { + if (extent_node_arena_get(&chunk->node) == bin_arena) { size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; arena_chunk_map_bits_t *bitselm = @@ -183,7 +183,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, index_t binind, /* Lock the arena associated with the first object. */ arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE( tbin->avail[0]); - arena_t *locked_arena = chunk->node.arena; + arena_t *locked_arena = extent_node_arena_get(&chunk->node); UNUSED bool idump; if (config_prof) @@ -209,7 +209,8 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, index_t binind, ptr = tbin->avail[i]; assert(ptr != NULL); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (chunk->node.arena == locked_arena) { + if (extent_node_arena_get(&chunk->node) == + locked_arena) { arena_dalloc_large_junked_locked(locked_arena, chunk, ptr); } else { From a4e1888d1a12d864f42350f2859e33eb3a0033f2 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 17 Feb 2015 15:13:52 -0800 Subject: [PATCH 0653/3378] Simplify extent_node_t and add extent_node_init(). --- include/jemalloc/internal/extent.h | 33 ++++++++++++------- include/jemalloc/internal/private_symbols.txt | 1 + src/arena.c | 9 ++--- src/base.c | 6 ++-- src/chunk.c | 25 +++++++------- src/huge.c | 6 +--- 6 files changed, 38 insertions(+), 42 deletions(-) diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h index 10607614..9ee1b445 100644 --- a/include/jemalloc/internal/extent.h +++ b/include/jemalloc/internal/extent.h @@ -30,19 +30,12 @@ struct extent_node_s { */ bool en_achunk; - union { - /* Profile counters, used for huge objects. */ - prof_tctx_t *en_prof_tctx; + /* Profile counters, used for huge objects. */ + prof_tctx_t *en_prof_tctx; - struct { - /* - * Linkage for arena's runs_dirty and chunks_dirty - * rings. - */ - qr(extent_node_t) cd_link; - arena_chunk_map_misc_t runs_dirty; - }; - }; + /* Linkage for arena's runs_dirty and chunks_dirty rings. */ + qr(extent_node_t) cd_link; + arena_chunk_map_misc_t runs_dirty; union { /* Linkage for the size/address-ordered tree. */ @@ -82,6 +75,8 @@ void extent_node_size_set(extent_node_t *node, size_t size); void extent_node_zeroed_set(extent_node_t *node, bool zeroed); void extent_node_achunk_set(extent_node_t *node, bool achunk); void extent_node_prof_tctx_set(extent_node_t *node, prof_tctx_t *tctx); +void extent_node_init(extent_node_t *node, arena_t *arena, void *addr, + size_t size, bool zeroed); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_EXTENT_C_)) @@ -168,6 +163,20 @@ extent_node_prof_tctx_set(extent_node_t *node, prof_tctx_t *tctx) node->en_prof_tctx = tctx; } + +JEMALLOC_INLINE void +extent_node_init(extent_node_t *node, arena_t *arena, void *addr, size_t size, + bool zeroed) +{ + + extent_node_arena_set(node, arena); + extent_node_addr_set(node, addr); + extent_node_size_set(node, size); + extent_node_zeroed_set(node, zeroed); + extent_node_achunk_set(node, false); + if (config_prof) + extent_node_prof_tctx_set(node, NULL); +} #endif #endif /* JEMALLOC_H_INLINES */ diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index a1d12cf6..8b55954c 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -182,6 +182,7 @@ extent_node_addr_get extent_node_addr_set extent_node_arena_get extent_node_arena_set +extent_node_init extent_node_prof_tctx_get extent_node_prof_tctx_set extent_node_size_get diff --git a/src/arena.c b/src/arena.c index a7a98e22..b068a4d8 100644 --- a/src/arena.c +++ b/src/arena.c @@ -465,9 +465,7 @@ arena_chunk_alloc_internal(arena_t *arena, bool *zero) chunk = (arena_chunk_t *)chunk_alloc_arena(chunk_alloc, chunk_dalloc, arena->ind, NULL, chunksize, chunksize, zero); if (chunk != NULL) { - extent_node_arena_set(&chunk->node, arena); - extent_node_addr_set(&chunk->node, chunk); - extent_node_size_set(&chunk->node, chunksize); + extent_node_init(&chunk->node, arena, chunk, chunksize, *zero); extent_node_achunk_set(&chunk->node, true); if (chunk_register(chunk, &chunk->node)) { chunk_dalloc((void *)chunk, chunksize, arena->ind); @@ -1055,10 +1053,7 @@ arena_stash_dirty(arena_t *arena, bool all, size_t npurge, * just cached a node. */ assert(tnode != NULL); - extent_node_arena_set(tnode, arena); - extent_node_addr_set(tnode, addr); - extent_node_size_set(tnode, size); - extent_node_zeroed_set(tnode, zeroed); + extent_node_init(tnode, arena, addr, size, zeroed); arena_chunk_dirty_node_init(tnode); /* Stash. */ arena_chunk_dirty_insert(purge_runs_sentinel, diff --git a/src/base.c b/src/base.c index 819fa025..33e8b6f5 100644 --- a/src/base.c +++ b/src/base.c @@ -60,8 +60,7 @@ base_chunk_alloc(size_t minsize) if (config_stats) base_allocated += nsize; } - extent_node_addr_set(node, addr); - extent_node_size_set(node, csize); + extent_node_init(node, NULL, addr, csize, true); return (node); } @@ -84,8 +83,7 @@ base_alloc(size_t size) */ csize = CACHELINE_CEILING(size); - extent_node_addr_set(&key, NULL); - extent_node_size_set(&key, csize); + extent_node_init(&key, NULL, NULL, csize, false); malloc_mutex_lock(&base_mtx); node = extent_tree_szad_nsearch(&base_avail_szad, &key); if (node != NULL) { diff --git a/src/chunk.c b/src/chunk.c index 8bc87beb..59d72c9b 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -79,8 +79,7 @@ chunk_recycle(arena_t *arena, extent_tree_t *chunks_szad, /* Beware size_t wrap-around. */ if (alloc_size < size) return (NULL); - extent_node_addr_set(&key, new_addr); - extent_node_size_set(&key, alloc_size); + extent_node_init(&key, arena, new_addr, alloc_size, false); malloc_mutex_lock(&arena->chunks_mtx); node = (new_addr != NULL) ? extent_tree_ad_search(chunks_ad, &key) : extent_tree_szad_nsearch(chunks_szad, &key); @@ -121,9 +120,8 @@ chunk_recycle(arena_t *arena, extent_tree_t *chunks_szad, return (NULL); } } - extent_node_addr_set(node, (void *)((uintptr_t)(ret) + size)); - extent_node_size_set(node, trailsize); - extent_node_zeroed_set(node, zeroed); + extent_node_init(node, arena, (void *)((uintptr_t)(ret) + size), + trailsize, zeroed); extent_tree_szad_insert(chunks_szad, node); extent_tree_ad_insert(chunks_ad, node); arena_chunk_dirty_maybe_insert(arena, node, dirty); @@ -288,7 +286,8 @@ chunk_record(arena_t *arena, extent_tree_t *chunks_szad, JEMALLOC_VALGRIND_MAKE_MEM_NOACCESS(chunk, size); malloc_mutex_lock(&arena->chunks_mtx); - extent_node_addr_set(&key, (void *)((uintptr_t)chunk + size)); + extent_node_init(&key, arena, (void *)((uintptr_t)chunk + size), 0, + false); node = extent_tree_ad_nsearch(chunks_ad, &key); /* Try to coalesce forward. */ if (node != NULL && extent_node_addr_get(node) == @@ -301,7 +300,7 @@ chunk_record(arena_t *arena, extent_tree_t *chunks_szad, extent_tree_szad_remove(chunks_szad, node); arena_chunk_dirty_maybe_remove(arena, node, dirty); extent_node_addr_set(node, chunk); - extent_node_size_set(node, extent_node_size_get(node) + size); + extent_node_size_set(node, size + extent_node_size_get(node)); extent_node_zeroed_set(node, extent_node_zeroed_get(node) && !unzeroed); extent_tree_szad_insert(chunks_szad, node); @@ -320,9 +319,7 @@ chunk_record(arena_t *arena, extent_tree_t *chunks_szad, pages_purge(chunk, size); goto label_return; } - extent_node_addr_set(node, chunk); - extent_node_size_set(node, size); - extent_node_zeroed_set(node, !unzeroed); + extent_node_init(node, arena, chunk, size, !unzeroed); extent_tree_ad_insert(chunks_ad, node); extent_tree_szad_insert(chunks_szad, node); arena_chunk_dirty_maybe_insert(arena, node, dirty); @@ -343,10 +340,10 @@ chunk_record(arena_t *arena, extent_tree_t *chunks_szad, extent_tree_szad_remove(chunks_szad, node); arena_chunk_dirty_maybe_remove(arena, node, dirty); extent_node_addr_set(node, extent_node_addr_get(prev)); - extent_node_size_set(node, extent_node_size_get(node) + - extent_node_size_get(prev)); - extent_node_zeroed_set(node, extent_node_zeroed_get(node) && - extent_node_zeroed_get(prev)); + extent_node_size_set(node, extent_node_size_get(prev) + + extent_node_size_get(node)); + extent_node_zeroed_set(node, extent_node_zeroed_get(prev) && + extent_node_zeroed_get(node)); extent_tree_szad_insert(chunks_szad, node); arena_chunk_dirty_maybe_insert(arena, node, dirty); diff --git a/src/huge.c b/src/huge.c index b9cae001..3092932e 100644 --- a/src/huge.c +++ b/src/huge.c @@ -73,11 +73,7 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, return (NULL); } - extent_node_arena_set(node, arena); - extent_node_addr_set(node, ret); - extent_node_size_set(node, usize); - extent_node_achunk_set(node, false); - extent_node_zeroed_set(node, is_zeroed); + extent_node_init(node, arena, ret, usize, is_zeroed); if (huge_node_set(ret, node)) { arena_chunk_dalloc_huge(arena, ret, usize); From eafebfdfbe48bf8e95902d89cfa1eb3d5cd2fa5c Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 17 Feb 2015 16:12:31 -0800 Subject: [PATCH 0654/3378] Remove obsolete type arena_chunk_miscelms_t. --- include/jemalloc/internal/arena.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index f967be3a..0383f0c0 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -148,7 +148,6 @@ struct arena_chunk_map_misc_s { }; typedef rb_tree(arena_chunk_map_misc_t) arena_avail_tree_t; typedef rb_tree(arena_chunk_map_misc_t) arena_run_tree_t; -typedef qr(arena_chunk_map_misc_t) arena_chunk_miscelms_t; #endif /* JEMALLOC_ARENA_STRUCTS_A */ #ifdef JEMALLOC_ARENA_STRUCTS_B From 47701b22ee7c0df5e99efa0fcdcf98b9ff805b59 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 17 Feb 2015 22:23:10 -0800 Subject: [PATCH 0655/3378] arena_chunk_dirty_node_init() --> extent_node_dirty_linkage_init() --- include/jemalloc/internal/extent.h | 9 +++++++++ include/jemalloc/internal/private_symbols.txt | 1 + src/arena.c | 14 +++----------- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h index 9ee1b445..2f99debf 100644 --- a/include/jemalloc/internal/extent.h +++ b/include/jemalloc/internal/extent.h @@ -77,6 +77,7 @@ void extent_node_achunk_set(extent_node_t *node, bool achunk); void extent_node_prof_tctx_set(extent_node_t *node, prof_tctx_t *tctx); void extent_node_init(extent_node_t *node, arena_t *arena, void *addr, size_t size, bool zeroed); +void extent_node_dirty_linkage_init(extent_node_t *node); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_EXTENT_C_)) @@ -177,6 +178,14 @@ extent_node_init(extent_node_t *node, arena_t *arena, void *addr, size_t size, if (config_prof) extent_node_prof_tctx_set(node, NULL); } + +JEMALLOC_INLINE void +extent_node_dirty_linkage_init(extent_node_t *node) +{ + + qr_new(node, cd_link); + qr_new(&node->runs_dirty, rd_link); +} #endif #endif /* JEMALLOC_H_INLINES */ diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index 8b55954c..0a8654b9 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -182,6 +182,7 @@ extent_node_addr_get extent_node_addr_set extent_node_arena_get extent_node_arena_set +extent_node_dirty_linkage_init extent_node_init extent_node_prof_tctx_get extent_node_prof_tctx_set diff --git a/src/arena.c b/src/arena.c index b068a4d8..205f598c 100644 --- a/src/arena.c +++ b/src/arena.c @@ -152,14 +152,6 @@ arena_chunk_dirty_npages(const extent_node_t *node) return (extent_node_size_get(node) >> LG_PAGE); } -static void -arena_chunk_dirty_node_init(extent_node_t *node) -{ - - qr_new(node, cd_link); - qr_new(&node->runs_dirty, rd_link); -} - static void arena_chunk_dirty_insert(arena_chunk_map_misc_t *runs_dirty, extent_node_t *chunks_dirty, extent_node_t *node) @@ -181,8 +173,8 @@ void arena_chunk_dirty_maybe_insert(arena_t *arena, extent_node_t *node, bool dirty) { - arena_chunk_dirty_node_init(node); if (dirty) { + extent_node_dirty_linkage_init(node); arena_chunk_dirty_insert(&arena->runs_dirty, &arena->chunks_dirty, node); arena->ndirty += arena_chunk_dirty_npages(node); @@ -1054,7 +1046,7 @@ arena_stash_dirty(arena_t *arena, bool all, size_t npurge, */ assert(tnode != NULL); extent_node_init(tnode, arena, addr, size, zeroed); - arena_chunk_dirty_node_init(tnode); + extent_node_dirty_linkage_init(tnode); /* Stash. */ arena_chunk_dirty_insert(purge_runs_sentinel, purge_chunks_sentinel, tnode); @@ -1223,7 +1215,7 @@ arena_purge(arena_t *arena, bool all) npurge = arena_compute_npurge(arena, all); qr_new(&purge_runs_sentinel, rd_link); - arena_chunk_dirty_node_init(&purge_chunks_sentinel); + extent_node_dirty_linkage_init(&purge_chunks_sentinel); npurgeable = arena_stash_dirty(arena, all, npurge, &purge_runs_sentinel, &purge_chunks_sentinel); From 339c2b23b2d61993ac768afcc72af135662c6771 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 17 Feb 2015 22:25:56 -0800 Subject: [PATCH 0656/3378] Fix chunk_unmap() to propagate dirty state. Fix chunk_unmap() to propagate whether a chunk is dirty, and modify dirty chunk purging to record this information so it can be passed to chunk_unmap(). Since the broken version of chunk_unmap() claimed that all chunks were clean, this resulted in potential memory corruption for purging implementations that do not zero (e.g. MADV_FREE). This regression was introduced by ee41ad409a43d12900a5a3108f6c14f84e4eb0eb (Integrate whole chunks into unused dirty page purging machinery.). --- include/jemalloc/internal/chunk.h | 2 +- src/arena.c | 14 ++++++++++---- src/chunk.c | 6 +++--- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h index 96b9e159..8722dd0e 100644 --- a/include/jemalloc/internal/chunk.h +++ b/include/jemalloc/internal/chunk.h @@ -47,7 +47,7 @@ void *chunk_alloc_default(void *new_addr, size_t size, size_t alignment, void chunk_record(arena_t *arena, extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, bool dirty, void *chunk, size_t size); bool chunk_dalloc_default(void *chunk, size_t size, unsigned arena_ind); -void chunk_unmap(arena_t *arena, void *chunk, size_t size); +void chunk_unmap(arena_t *arena, bool dirty, void *chunk, size_t size); bool chunk_boot(void); void chunk_prefork(void); void chunk_postfork_parent(void); diff --git a/src/arena.c b/src/arena.c index 205f598c..3d38386e 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1035,6 +1035,7 @@ arena_stash_dirty(arena_t *arena, bool all, size_t npurge, chunk = arena->chunk_alloc(addr, size, chunksize, &zero, arena->ind); assert(chunk == addr); + assert(zero == zeroed); /* * Create a temporary node to link into the ring of * stashed allocations. @@ -1075,7 +1076,7 @@ arena_stash_dirty(arena_t *arena, bool all, size_t npurge, /* Temporarily allocate the free dirty run. */ arena_run_split_large(arena, run, run_size, false); - /* Append to purge_runs for later processing. */ + /* Stash. */ if (false) qr_new(runselm, rd_link); /* Redundant. */ else { @@ -1114,9 +1115,12 @@ arena_purge_stashed(arena_t *arena, arena_chunk_map_misc_t *purge_runs_sentinel, if (runselm == &chunkselm->runs_dirty) { size_t size = extent_node_size_get(chunkselm); + bool unzeroed; - pages_purge(extent_node_addr_get(chunkselm), size); npages = size >> LG_PAGE; + unzeroed = pages_purge(extent_node_addr_get(chunkselm), + size); + extent_node_zeroed_set(chunkselm, !unzeroed); chunkselm = qr_next(chunkselm, cd_link); } else { arena_chunk_t *chunk; @@ -1180,11 +1184,13 @@ arena_unstash_purged(arena_t *arena, if (runselm == &chunkselm->runs_dirty) { extent_node_t *chunkselm_next = qr_next(chunkselm, cd_link); + bool dirty = !extent_node_zeroed_get(chunkselm); + void *addr = extent_node_addr_get(chunkselm); + size_t size = extent_node_size_get(chunkselm); arena_chunk_dirty_remove(chunkselm); - chunk_unmap(arena, extent_node_addr_get(chunkselm), - extent_node_size_get(chunkselm)); arena_node_dalloc(arena, chunkselm); chunkselm = chunkselm_next; + chunk_unmap(arena, dirty, addr, size); } else { arena_run_t *run = &runselm->run; qr_remove(runselm, rd_link); diff --git a/src/chunk.c b/src/chunk.c index 59d72c9b..774a978a 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -377,7 +377,7 @@ chunk_dalloc_default(void *chunk, size_t size, unsigned arena_ind) } void -chunk_unmap(arena_t *arena, void *chunk, size_t size) +chunk_unmap(arena_t *arena, bool dirty, void *chunk, size_t size) { assert(chunk != NULL); @@ -387,10 +387,10 @@ chunk_unmap(arena_t *arena, void *chunk, size_t size) if (have_dss && chunk_in_dss(chunk)) { chunk_record(arena, &arena->chunks_szad_dss, - &arena->chunks_ad_dss, false, chunk, size); + &arena->chunks_ad_dss, dirty, chunk, size); } else if (chunk_dalloc_mmap(chunk, size)) { chunk_record(arena, &arena->chunks_szad_mmap, - &arena->chunks_ad_mmap, false, chunk, size); + &arena->chunks_ad_mmap, dirty, chunk, size); } } From 738e089a2e707dbfc70286f7deeebc68e03d2347 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 18 Feb 2015 01:15:50 -0800 Subject: [PATCH 0657/3378] Rename "dirty chunks" to "cached chunks". Rename "dirty chunks" to "cached chunks", in order to avoid overloading the term "dirty". Fix the regression caused by 339c2b23b2d61993ac768afcc72af135662c6771 (Fix chunk_unmap() to propagate dirty state.), and actually address what that change attempted, which is to only purge chunks once, and propagate whether zeroed pages resulted into chunk_record(). --- include/jemalloc/internal/arena.h | 22 +++--- include/jemalloc/internal/chunk.h | 5 +- include/jemalloc/internal/extent.h | 27 ++++++- include/jemalloc/internal/private_symbols.txt | 6 +- src/arena.c | 70 +++++++------------ src/chunk.c | 45 ++++++------ src/chunk_dss.c | 2 +- 7 files changed, 91 insertions(+), 86 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 0383f0c0..3d79c627 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -318,14 +318,14 @@ struct arena_s { /* * Unused dirty memory this arena manages. Dirty memory is conceptually - * tracked as an arbitrarily interleaved LRU of runs and chunks, but the - * list linkage is actually semi-duplicated in order to avoid extra - * arena_chunk_map_misc_t space overhead. + * tracked as an arbitrarily interleaved LRU of dirty runs and cached + * chunks, but the list linkage is actually semi-duplicated in order to + * avoid extra arena_chunk_map_misc_t space overhead. * * LRU-----------------------------------------------------------MRU * * ______________ ___ ___ - * ...-->|chunks_dirty|<--------->|c|<-------------------->|c|<--... + * ...-->|chunks_cache|<--------->|c|<-------------------->|c|<--... * -------------- |h| |h| * ____________ _____ |u| _____ _____ |u| * ...-->|runs_dirty|<-->|run|<-->|n|<-->|run|<-->|run|<-->|n|<--... @@ -333,7 +333,7 @@ struct arena_s { * --- --- */ arena_chunk_map_misc_t runs_dirty; - extent_node_t chunks_dirty; + extent_node_t chunks_cache; /* Extant huge allocations. */ ql_head(extent_node_t) huge; @@ -347,8 +347,8 @@ struct arena_s { * orderings are needed, which is why there are two trees with the same * contents. */ - extent_tree_t chunks_szad_dirty; - extent_tree_t chunks_ad_dirty; + extent_tree_t chunks_szad_cache; + extent_tree_t chunks_ad_cache; extent_tree_t chunks_szad_mmap; extent_tree_t chunks_ad_mmap; extent_tree_t chunks_szad_dss; @@ -384,10 +384,10 @@ extern size_t arena_maxclass; /* Max size class for arenas. */ extern unsigned nlclasses; /* Number of large size classes. */ extern unsigned nhclasses; /* Number of huge size classes. */ -void arena_chunk_dirty_maybe_insert(arena_t *arena, extent_node_t *node, - bool dirty); -void arena_chunk_dirty_maybe_remove(arena_t *arena, extent_node_t *node, - bool dirty); +void arena_chunk_cache_maybe_insert(arena_t *arena, extent_node_t *node, + bool cache); +void arena_chunk_cache_maybe_remove(arena_t *arena, extent_node_t *node, + bool cache); extent_node_t *arena_node_alloc(arena_t *arena); void arena_node_dalloc(arena_t *arena, extent_node_t *node); void *arena_chunk_alloc_huge(arena_t *arena, size_t usize, size_t alignment, diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h index 8722dd0e..bf6acbd1 100644 --- a/include/jemalloc/internal/chunk.h +++ b/include/jemalloc/internal/chunk.h @@ -45,9 +45,10 @@ void *chunk_alloc_arena(chunk_alloc_t *chunk_alloc, void *chunk_alloc_default(void *new_addr, size_t size, size_t alignment, bool *zero, unsigned arena_ind); void chunk_record(arena_t *arena, extent_tree_t *chunks_szad, - extent_tree_t *chunks_ad, bool dirty, void *chunk, size_t size); + extent_tree_t *chunks_ad, bool cache, void *chunk, size_t size, + bool zeroed); bool chunk_dalloc_default(void *chunk, size_t size, unsigned arena_ind); -void chunk_unmap(arena_t *arena, bool dirty, void *chunk, size_t size); +void chunk_unmap(arena_t *arena, void *chunk, size_t size, bool zeroed); bool chunk_boot(void); void chunk_prefork(void); void chunk_postfork_parent(void); diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h index 2f99debf..81ff40b8 100644 --- a/include/jemalloc/internal/extent.h +++ b/include/jemalloc/internal/extent.h @@ -33,9 +33,9 @@ struct extent_node_s { /* Profile counters, used for huge objects. */ prof_tctx_t *en_prof_tctx; - /* Linkage for arena's runs_dirty and chunks_dirty rings. */ - qr(extent_node_t) cd_link; + /* Linkage for arena's runs_dirty and chunks_cache rings. */ arena_chunk_map_misc_t runs_dirty; + qr(extent_node_t) cc_link; union { /* Linkage for the size/address-ordered tree. */ @@ -78,6 +78,9 @@ void extent_node_prof_tctx_set(extent_node_t *node, prof_tctx_t *tctx); void extent_node_init(extent_node_t *node, arena_t *arena, void *addr, size_t size, bool zeroed); void extent_node_dirty_linkage_init(extent_node_t *node); +void extent_node_dirty_insert(extent_node_t *node, + arena_chunk_map_misc_t *runs_dirty, extent_node_t *chunks_dirty); +void extent_node_dirty_remove(extent_node_t *node); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_EXTENT_C_)) @@ -183,9 +186,27 @@ JEMALLOC_INLINE void extent_node_dirty_linkage_init(extent_node_t *node) { - qr_new(node, cd_link); qr_new(&node->runs_dirty, rd_link); + qr_new(node, cc_link); } + +JEMALLOC_INLINE void +extent_node_dirty_insert(extent_node_t *node, + arena_chunk_map_misc_t *runs_dirty, extent_node_t *chunks_dirty) +{ + + qr_meld(runs_dirty, &node->runs_dirty, rd_link); + qr_meld(chunks_dirty, node, cc_link); +} + +JEMALLOC_INLINE void +extent_node_dirty_remove(extent_node_t *node) +{ + + qr_remove(&node->runs_dirty, rd_link); + qr_remove(node, cc_link); +} + #endif #endif /* JEMALLOC_H_INLINES */ diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index 0a8654b9..dfe62ce5 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -12,9 +12,9 @@ arena_boot arena_choose arena_choose_hard arena_chunk_alloc_huge +arena_chunk_cache_maybe_insert +arena_chunk_cache_maybe_remove arena_chunk_dalloc_huge -arena_chunk_dirty_maybe_insert -arena_chunk_dirty_maybe_remove arena_chunk_ralloc_huge_expand arena_chunk_ralloc_huge_shrink arena_chunk_ralloc_huge_similar @@ -182,7 +182,9 @@ extent_node_addr_get extent_node_addr_set extent_node_arena_get extent_node_arena_set +extent_node_dirty_insert extent_node_dirty_linkage_init +extent_node_dirty_remove extent_node_init extent_node_prof_tctx_get extent_node_prof_tctx_set diff --git a/src/arena.c b/src/arena.c index 3d38386e..762b8182 100644 --- a/src/arena.c +++ b/src/arena.c @@ -152,41 +152,24 @@ arena_chunk_dirty_npages(const extent_node_t *node) return (extent_node_size_get(node) >> LG_PAGE); } -static void -arena_chunk_dirty_insert(arena_chunk_map_misc_t *runs_dirty, - extent_node_t *chunks_dirty, extent_node_t *node) -{ - - qr_meld(chunks_dirty, node, cd_link); - qr_meld(runs_dirty, &node->runs_dirty, rd_link); -} - -static void -arena_chunk_dirty_remove(extent_node_t *node) -{ - - qr_remove(node, cd_link); - qr_remove(&node->runs_dirty, rd_link); -} - void -arena_chunk_dirty_maybe_insert(arena_t *arena, extent_node_t *node, bool dirty) +arena_chunk_cache_maybe_insert(arena_t *arena, extent_node_t *node, bool cache) { - if (dirty) { + if (cache) { extent_node_dirty_linkage_init(node); - arena_chunk_dirty_insert(&arena->runs_dirty, - &arena->chunks_dirty, node); + extent_node_dirty_insert(node, &arena->runs_dirty, + &arena->chunks_cache); arena->ndirty += arena_chunk_dirty_npages(node); } } void -arena_chunk_dirty_maybe_remove(arena_t *arena, extent_node_t *node, bool dirty) +arena_chunk_cache_maybe_remove(arena_t *arena, extent_node_t *node, bool dirty) { if (dirty) { - arena_chunk_dirty_remove(node); + extent_node_dirty_remove(node); assert(arena->ndirty >= arena_chunk_dirty_npages(node)); arena->ndirty -= arena_chunk_dirty_npages(node); } @@ -954,14 +937,14 @@ arena_dirty_count(arena_t *arena) extent_node_t *chunkselm; for (runselm = qr_next(&arena->runs_dirty, rd_link), - chunkselm = qr_next(&arena->chunks_dirty, cd_link); + chunkselm = qr_next(&arena->chunks_cache, cc_link); runselm != &arena->runs_dirty; runselm = qr_next(runselm, rd_link)) { size_t npages; if (runselm == &chunkselm->runs_dirty) { npages = extent_node_size_get(chunkselm) >> LG_PAGE; - chunkselm = qr_next(chunkselm, cd_link); + chunkselm = qr_next(chunkselm, cc_link); } else { arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(runselm); @@ -1010,7 +993,7 @@ arena_stash_dirty(arena_t *arena, bool all, size_t npurge, /* Stash at least npurge pages. */ for (runselm = qr_next(&arena->runs_dirty, rd_link), - chunkselm = qr_next(&arena->chunks_dirty, cd_link); + chunkselm = qr_next(&arena->chunks_cache, cc_link); runselm != &arena->runs_dirty; runselm = runselm_next) { size_t npages; runselm_next = qr_next(runselm, rd_link); @@ -1022,7 +1005,7 @@ arena_stash_dirty(arena_t *arena, bool all, size_t npurge, bool zeroed, zero; UNUSED void *chunk; - chunkselm_next = qr_next(chunkselm, cd_link); + chunkselm_next = qr_next(chunkselm, cc_link); /* * Cache contents of chunkselm prior to it being * destroyed as a side effect of allocating the chunk. @@ -1038,19 +1021,16 @@ arena_stash_dirty(arena_t *arena, bool all, size_t npurge, assert(zero == zeroed); /* * Create a temporary node to link into the ring of - * stashed allocations. + * stashed allocations. OOM shouldn't be possible + * because chunk allocation just cached a node. */ tnode = arena_node_alloc(arena); - /* - * OOM shouldn't be possible because chunk allocation - * just cached a node. - */ assert(tnode != NULL); + /* Stash. */ extent_node_init(tnode, arena, addr, size, zeroed); extent_node_dirty_linkage_init(tnode); - /* Stash. */ - arena_chunk_dirty_insert(purge_runs_sentinel, - purge_chunks_sentinel, tnode); + extent_node_dirty_insert(tnode, purge_runs_sentinel, + purge_chunks_sentinel); npages = size >> LG_PAGE; chunkselm = chunkselm_next; } else { @@ -1108,7 +1088,7 @@ arena_purge_stashed(arena_t *arena, arena_chunk_map_misc_t *purge_runs_sentinel, malloc_mutex_unlock(&arena->lock); for (runselm = qr_next(purge_runs_sentinel, rd_link), - chunkselm = qr_next(purge_chunks_sentinel, cd_link); + chunkselm = qr_next(purge_chunks_sentinel, cc_link); runselm != purge_runs_sentinel; runselm = qr_next(runselm, rd_link)) { size_t npages; @@ -1121,7 +1101,7 @@ arena_purge_stashed(arena_t *arena, arena_chunk_map_misc_t *purge_runs_sentinel, unzeroed = pages_purge(extent_node_addr_get(chunkselm), size); extent_node_zeroed_set(chunkselm, !unzeroed); - chunkselm = qr_next(chunkselm, cd_link); + chunkselm = qr_next(chunkselm, cc_link); } else { arena_chunk_t *chunk; size_t pageind, run_size, flag_unzeroed, i; @@ -1178,19 +1158,19 @@ arena_unstash_purged(arena_t *arena, /* Deallocate runs. */ for (runselm = qr_next(purge_runs_sentinel, rd_link), - chunkselm = qr_next(purge_chunks_sentinel, cd_link); + chunkselm = qr_next(purge_chunks_sentinel, cc_link); runselm != purge_runs_sentinel; runselm = runselm_next) { runselm_next = qr_next(runselm, rd_link); if (runselm == &chunkselm->runs_dirty) { extent_node_t *chunkselm_next = qr_next(chunkselm, - cd_link); - bool dirty = !extent_node_zeroed_get(chunkselm); + cc_link); void *addr = extent_node_addr_get(chunkselm); size_t size = extent_node_size_get(chunkselm); - arena_chunk_dirty_remove(chunkselm); + bool zeroed = extent_node_zeroed_get(chunkselm); + extent_node_dirty_remove(chunkselm); arena_node_dalloc(arena, chunkselm); chunkselm = chunkselm_next; - chunk_unmap(arena, dirty, addr, size); + chunk_unmap(arena, addr, size, zeroed); } else { arena_run_t *run = &runselm->run; qr_remove(runselm, rd_link); @@ -2612,14 +2592,14 @@ arena_new(unsigned ind) arena_avail_tree_new(&arena->runs_avail); qr_new(&arena->runs_dirty, rd_link); - qr_new(&arena->chunks_dirty, cd_link); + qr_new(&arena->chunks_cache, cc_link); ql_new(&arena->huge); if (malloc_mutex_init(&arena->huge_mtx)) return (NULL); - extent_tree_szad_new(&arena->chunks_szad_dirty); - extent_tree_ad_new(&arena->chunks_ad_dirty); + extent_tree_szad_new(&arena->chunks_szad_cache); + extent_tree_ad_new(&arena->chunks_ad_cache); extent_tree_szad_new(&arena->chunks_szad_mmap); extent_tree_ad_new(&arena->chunks_ad_mmap); extent_tree_szad_new(&arena->chunks_szad_dss); diff --git a/src/chunk.c b/src/chunk.c index 774a978a..264e4f27 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -64,7 +64,7 @@ chunk_deregister(const void *chunk, const extent_node_t *node) static void * chunk_recycle(arena_t *arena, extent_tree_t *chunks_szad, - extent_tree_t *chunks_ad, bool dirty, void *new_addr, size_t size, + extent_tree_t *chunks_ad, bool cache, void *new_addr, size_t size, size_t alignment, bool *zero) { void *ret; @@ -100,13 +100,13 @@ chunk_recycle(arena_t *arena, extent_tree_t *chunks_szad, /* Remove node from the tree. */ extent_tree_szad_remove(chunks_szad, node); extent_tree_ad_remove(chunks_ad, node); - arena_chunk_dirty_maybe_remove(arena, node, dirty); + arena_chunk_cache_maybe_remove(arena, node, cache); if (leadsize != 0) { /* Insert the leading space as a smaller chunk. */ extent_node_size_set(node, leadsize); extent_tree_szad_insert(chunks_szad, node); extent_tree_ad_insert(chunks_ad, node); - arena_chunk_dirty_maybe_insert(arena, node, dirty); + arena_chunk_cache_maybe_insert(arena, node, cache); node = NULL; } if (trailsize != 0) { @@ -116,7 +116,7 @@ chunk_recycle(arena_t *arena, extent_tree_t *chunks_szad, if (node == NULL) { malloc_mutex_unlock(&arena->chunks_mtx); chunk_record(arena, chunks_szad, chunks_ad, - dirty, ret, size); + cache, ret, size, zeroed); return (NULL); } } @@ -124,7 +124,7 @@ chunk_recycle(arena_t *arena, extent_tree_t *chunks_szad, trailsize, zeroed); extent_tree_szad_insert(chunks_szad, node); extent_tree_ad_insert(chunks_ad, node); - arena_chunk_dirty_maybe_insert(arena, node, dirty); + arena_chunk_cache_maybe_insert(arena, node, cache); node = NULL; } malloc_mutex_unlock(&arena->chunks_mtx); @@ -177,9 +177,9 @@ chunk_alloc_core(arena_t *arena, void *new_addr, size_t size, size_t alignment, assert(alignment != 0); assert((alignment & chunksize_mask) == 0); - /* dirty. */ - if ((ret = chunk_recycle(arena, &arena->chunks_szad_dirty, - &arena->chunks_ad_dirty, true, new_addr, size, alignment, zero)) != + /* cache. */ + if ((ret = chunk_recycle(arena, &arena->chunks_szad_cache, + &arena->chunks_ad_cache, true, new_addr, size, alignment, zero)) != NULL) return (ret); /* "primary" dss. */ @@ -276,13 +276,14 @@ chunk_alloc_default(void *new_addr, size_t size, size_t alignment, bool *zero, void chunk_record(arena_t *arena, extent_tree_t *chunks_szad, - extent_tree_t *chunks_ad, bool dirty, void *chunk, size_t size) + extent_tree_t *chunks_ad, bool cache, void *chunk, size_t size, bool zeroed) { bool unzeroed; extent_node_t *node, *prev; extent_node_t key; - unzeroed = dirty ? true : pages_purge(chunk, size); + assert(!cache || !zeroed); + unzeroed = cache || !zeroed; JEMALLOC_VALGRIND_MAKE_MEM_NOACCESS(chunk, size); malloc_mutex_lock(&arena->chunks_mtx); @@ -298,13 +299,13 @@ chunk_record(arena_t *arena, extent_tree_t *chunks_szad, * remove/insert from/into chunks_szad. */ extent_tree_szad_remove(chunks_szad, node); - arena_chunk_dirty_maybe_remove(arena, node, dirty); + arena_chunk_cache_maybe_remove(arena, node, cache); extent_node_addr_set(node, chunk); extent_node_size_set(node, size + extent_node_size_get(node)); extent_node_zeroed_set(node, extent_node_zeroed_get(node) && !unzeroed); extent_tree_szad_insert(chunks_szad, node); - arena_chunk_dirty_maybe_insert(arena, node, dirty); + arena_chunk_cache_maybe_insert(arena, node, cache); } else { /* Coalescing forward failed, so insert a new node. */ node = arena_node_alloc(arena); @@ -315,14 +316,14 @@ chunk_record(arena_t *arena, extent_tree_t *chunks_szad, * pages have already been purged, so that this is only * a virtual memory leak. */ - if (dirty) + if (cache) pages_purge(chunk, size); goto label_return; } extent_node_init(node, arena, chunk, size, !unzeroed); extent_tree_ad_insert(chunks_ad, node); extent_tree_szad_insert(chunks_szad, node); - arena_chunk_dirty_maybe_insert(arena, node, dirty); + arena_chunk_cache_maybe_insert(arena, node, cache); } /* Try to coalesce backward. */ @@ -336,16 +337,16 @@ chunk_record(arena_t *arena, extent_tree_t *chunks_szad, */ extent_tree_szad_remove(chunks_szad, prev); extent_tree_ad_remove(chunks_ad, prev); - arena_chunk_dirty_maybe_remove(arena, prev, dirty); + arena_chunk_cache_maybe_remove(arena, prev, cache); extent_tree_szad_remove(chunks_szad, node); - arena_chunk_dirty_maybe_remove(arena, node, dirty); + arena_chunk_cache_maybe_remove(arena, node, cache); extent_node_addr_set(node, extent_node_addr_get(prev)); extent_node_size_set(node, extent_node_size_get(prev) + extent_node_size_get(node)); extent_node_zeroed_set(node, extent_node_zeroed_get(prev) && extent_node_zeroed_get(node)); extent_tree_szad_insert(chunks_szad, node); - arena_chunk_dirty_maybe_insert(arena, node, dirty); + arena_chunk_cache_maybe_insert(arena, node, cache); arena_node_dalloc(arena, prev); } @@ -363,8 +364,8 @@ chunk_cache(arena_t *arena, void *chunk, size_t size) assert(size != 0); assert((size & chunksize_mask) == 0); - chunk_record(arena, &arena->chunks_szad_dirty, &arena->chunks_ad_dirty, - true, chunk, size); + chunk_record(arena, &arena->chunks_szad_cache, &arena->chunks_ad_cache, + true, chunk, size, false); } /* Default arena chunk deallocation routine in the absence of user override. */ @@ -377,7 +378,7 @@ chunk_dalloc_default(void *chunk, size_t size, unsigned arena_ind) } void -chunk_unmap(arena_t *arena, bool dirty, void *chunk, size_t size) +chunk_unmap(arena_t *arena, void *chunk, size_t size, bool zeroed) { assert(chunk != NULL); @@ -387,10 +388,10 @@ chunk_unmap(arena_t *arena, bool dirty, void *chunk, size_t size) if (have_dss && chunk_in_dss(chunk)) { chunk_record(arena, &arena->chunks_szad_dss, - &arena->chunks_ad_dss, dirty, chunk, size); + &arena->chunks_ad_dss, false, chunk, size, zeroed); } else if (chunk_dalloc_mmap(chunk, size)) { chunk_record(arena, &arena->chunks_szad_mmap, - &arena->chunks_ad_mmap, dirty, chunk, size); + &arena->chunks_ad_mmap, false, chunk, size, zeroed); } } diff --git a/src/chunk_dss.c b/src/chunk_dss.c index c3c48481..6fbe31bf 100644 --- a/src/chunk_dss.c +++ b/src/chunk_dss.c @@ -137,7 +137,7 @@ chunk_alloc_dss(arena_t *arena, void *new_addr, size_t size, size_t alignment, chunk_record(arena, &arena->chunks_szad_dss, &arena->chunks_ad_dss, false, cpad, - cpad_size); + cpad_size, false); } if (*zero) { JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED( From 99bd94fb65a0b6423c4efcc3e3e501179b92a4db Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 18 Feb 2015 16:40:53 -0800 Subject: [PATCH 0658/3378] Fix chunk cache races. These regressions were introduced by ee41ad409a43d12900a5a3108f6c14f84e4eb0eb (Integrate whole chunks into unused dirty page purging machinery.). --- include/jemalloc/internal/arena.h | 1 + include/jemalloc/internal/chunk.h | 13 +- include/jemalloc/internal/private_symbols.txt | 8 +- src/arena.c | 266 +++++++++++------- src/chunk.c | 114 +++++--- 5 files changed, 263 insertions(+), 139 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 3d79c627..42086ca1 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -399,6 +399,7 @@ void arena_chunk_ralloc_huge_shrink(arena_t *arena, void *chunk, size_t oldsize, size_t usize); bool arena_chunk_ralloc_huge_expand(arena_t *arena, void *chunk, size_t oldsize, size_t usize, bool *zero); +void arena_maybe_purge(arena_t *arena); void arena_purge_all(arena_t *arena); void arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, index_t binind, uint64_t prof_accumbytes); diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h index bf6acbd1..1a968a58 100644 --- a/include/jemalloc/internal/chunk.h +++ b/include/jemalloc/internal/chunk.h @@ -39,16 +39,21 @@ extern size_t chunk_npages; bool chunk_register(const void *chunk, const extent_node_t *node); void chunk_deregister(const void *chunk, const extent_node_t *node); void *chunk_alloc_base(size_t size); -void *chunk_alloc_arena(chunk_alloc_t *chunk_alloc, - chunk_dalloc_t *chunk_dalloc, unsigned arena_ind, void *new_addr, - size_t size, size_t alignment, bool *zero); +void *chunk_alloc_cache(arena_t *arena, void *new_addr, size_t size, + size_t alignment, bool *zero, bool dalloc_node); void *chunk_alloc_default(void *new_addr, size_t size, size_t alignment, bool *zero, unsigned arena_ind); +void *chunk_alloc_wrapper(arena_t *arena, chunk_alloc_t *chunk_alloc, + void *new_addr, size_t size, size_t alignment, bool *zero); void chunk_record(arena_t *arena, extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, bool cache, void *chunk, size_t size, bool zeroed); +void chunk_dalloc_cache(arena_t *arena, void *chunk, size_t size); +void chunk_dalloc_arena(arena_t *arena, void *chunk, size_t size, + bool zeroed); bool chunk_dalloc_default(void *chunk, size_t size, unsigned arena_ind); -void chunk_unmap(arena_t *arena, void *chunk, size_t size, bool zeroed); +void chunk_dalloc_wrapper(arena_t *arena, chunk_dalloc_t *chunk_dalloc, + void *chunk, size_t size); bool chunk_boot(void); void chunk_prefork(void); void chunk_postfork_parent(void); diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index dfe62ce5..7c217c74 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -53,6 +53,7 @@ arena_mapbitsp_read arena_mapbitsp_write arena_maxclass arena_maxrun +arena_maybe_purge arena_metadata_allocated_add arena_metadata_allocated_get arena_metadata_allocated_sub @@ -124,14 +125,18 @@ bootstrap_free bootstrap_malloc bt_init buferror -chunk_alloc_arena +chunk_alloc_cache chunk_alloc_base chunk_alloc_default chunk_alloc_dss chunk_alloc_mmap +chunk_alloc_wrapper chunk_boot +chunk_dalloc_arena +chunk_dalloc_cache chunk_dalloc_default chunk_dalloc_mmap +chunk_dalloc_wrapper chunk_deregister chunk_dss_boot chunk_dss_postfork_child @@ -147,7 +152,6 @@ chunk_postfork_parent chunk_prefork chunk_record chunk_register -chunk_unmap chunks_rtree chunksize chunksize_mask diff --git a/src/arena.c b/src/arena.c index 762b8182..78aa1ae0 100644 --- a/src/arena.c +++ b/src/arena.c @@ -20,7 +20,6 @@ unsigned nhclasses; /* Number of huge size classes. */ * definition. */ -static void arena_chunk_dalloc(arena_t *arena, arena_chunk_t *chunk); static void arena_purge(arena_t *arena, bool all); static void arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned); @@ -427,27 +426,53 @@ arena_chunk_init_spare(arena_t *arena) return (chunk); } +static bool +arena_chunk_register(arena_t *arena, arena_chunk_t *chunk, bool zero) +{ + + extent_node_init(&chunk->node, arena, chunk, chunksize, zero); + extent_node_achunk_set(&chunk->node, true); + return (chunk_register(chunk, &chunk->node)); +} + +static arena_chunk_t * +arena_chunk_alloc_internal_hard(arena_t *arena, bool *zero) +{ + arena_chunk_t *chunk; + chunk_alloc_t *chunk_alloc = arena->chunk_alloc; + chunk_dalloc_t *chunk_dalloc = arena->chunk_dalloc; + + malloc_mutex_unlock(&arena->lock); + chunk = (arena_chunk_t *)chunk_alloc_wrapper(arena, chunk_alloc, NULL, + chunksize, chunksize, zero); + if (chunk != NULL && arena_chunk_register(arena, chunk, *zero)) { + chunk_dalloc_wrapper(arena, chunk_dalloc, (void *)chunk, + chunksize); + chunk = NULL; + } + malloc_mutex_lock(&arena->lock); + + return (chunk); +} + static arena_chunk_t * arena_chunk_alloc_internal(arena_t *arena, bool *zero) { arena_chunk_t *chunk; - chunk_alloc_t *chunk_alloc; - chunk_dalloc_t *chunk_dalloc; - chunk_alloc = arena->chunk_alloc; - chunk_dalloc = arena->chunk_dalloc; - malloc_mutex_unlock(&arena->lock); - chunk = (arena_chunk_t *)chunk_alloc_arena(chunk_alloc, chunk_dalloc, - arena->ind, NULL, chunksize, chunksize, zero); - if (chunk != NULL) { - extent_node_init(&chunk->node, arena, chunk, chunksize, *zero); - extent_node_achunk_set(&chunk->node, true); - if (chunk_register(chunk, &chunk->node)) { - chunk_dalloc((void *)chunk, chunksize, arena->ind); - chunk = NULL; + if (likely(arena->chunk_alloc == chunk_alloc_default)) { + chunk = chunk_alloc_cache(arena, NULL, chunksize, chunksize, + zero, true); + if (chunk != NULL && arena_chunk_register(arena, chunk, + *zero)) { + chunk_dalloc_cache(arena, chunk, chunksize); + return (NULL); } - } - malloc_mutex_lock(&arena->lock); + } else + chunk = NULL; + if (chunk == NULL) + chunk = arena_chunk_alloc_internal_hard(arena, zero); + if (config_stats && chunk != NULL) { arena->stats.mapped += chunksize; arena->stats.metadata_mapped += (map_bias << LG_PAGE); @@ -553,11 +578,19 @@ arena_chunk_dalloc(arena_t *arena, arena_chunk_t *chunk) arena_run_dirty_remove(arena, spare, map_bias, chunk_npages-map_bias); } - chunk_dalloc = arena->chunk_dalloc; - malloc_mutex_unlock(&arena->lock); + chunk_deregister(spare, &spare->node); - chunk_dalloc((void *)spare, chunksize, arena->ind); - malloc_mutex_lock(&arena->lock); + + chunk_dalloc = arena->chunk_dalloc; + if (likely(chunk_dalloc == chunk_dalloc_default)) + chunk_dalloc_cache(arena, (void *)spare, chunksize); + else { + malloc_mutex_unlock(&arena->lock); + chunk_dalloc_wrapper(arena, chunk_dalloc, (void *)spare, + chunksize); + malloc_mutex_lock(&arena->lock); + } + if (config_stats) { arena->stats.mapped -= chunksize; arena->stats.metadata_mapped -= (map_bias << LG_PAGE); @@ -661,28 +694,14 @@ arena_node_dalloc(arena_t *arena, extent_node_t *node) malloc_mutex_unlock(&arena->node_cache_mtx); } -void * -arena_chunk_alloc_huge(arena_t *arena, size_t usize, size_t alignment, - bool *zero) +static void * +arena_chunk_alloc_huge_hard(arena_t *arena, chunk_alloc_t *chunk_alloc, + size_t usize, size_t alignment, bool *zero, size_t csize) { void *ret; - chunk_alloc_t *chunk_alloc; - chunk_dalloc_t *chunk_dalloc; - size_t csize = CHUNK_CEILING(usize); - malloc_mutex_lock(&arena->lock); - chunk_alloc = arena->chunk_alloc; - chunk_dalloc = arena->chunk_dalloc; - if (config_stats) { - /* Optimistically update stats prior to unlocking. */ - arena_huge_malloc_stats_update(arena, usize); - arena->stats.mapped += usize; - } - arena->nactive += (usize >> LG_PAGE); - malloc_mutex_unlock(&arena->lock); - - ret = chunk_alloc_arena(chunk_alloc, chunk_dalloc, arena->ind, NULL, - csize, alignment, zero); + ret = chunk_alloc_wrapper(arena, chunk_alloc, NULL, csize, alignment, + zero); if (ret == NULL) { /* Revert optimistic stats updates. */ malloc_mutex_lock(&arena->lock); @@ -692,12 +711,42 @@ arena_chunk_alloc_huge(arena_t *arena, size_t usize, size_t alignment, } arena->nactive -= (usize >> LG_PAGE); malloc_mutex_unlock(&arena->lock); - return (NULL); } - if (config_stats) - stats_cactive_add(usize); + return (ret); +} +void * +arena_chunk_alloc_huge(arena_t *arena, size_t usize, size_t alignment, + bool *zero) +{ + void *ret; + chunk_alloc_t *chunk_alloc; + size_t csize = CHUNK_CEILING(usize); + + malloc_mutex_lock(&arena->lock); + + /* Optimistically update stats. */ + if (config_stats) { + arena_huge_malloc_stats_update(arena, usize); + arena->stats.mapped += usize; + } + arena->nactive += (usize >> LG_PAGE); + + chunk_alloc = arena->chunk_alloc; + if (likely(chunk_alloc == chunk_alloc_default)) { + ret = chunk_alloc_cache(arena, NULL, csize, alignment, zero, + true); + } else + ret = NULL; + malloc_mutex_unlock(&arena->lock); + if (ret == NULL) { + ret = arena_chunk_alloc_huge_hard(arena, chunk_alloc, usize, + alignment, zero, csize); + } + + if (config_stats && ret != NULL) + stats_cactive_add(usize); return (ret); } @@ -705,7 +754,9 @@ void arena_chunk_dalloc_huge(arena_t *arena, void *chunk, size_t usize) { chunk_dalloc_t *chunk_dalloc; + size_t csize; + csize = CHUNK_CEILING(usize); malloc_mutex_lock(&arena->lock); chunk_dalloc = arena->chunk_dalloc; if (config_stats) { @@ -714,8 +765,14 @@ arena_chunk_dalloc_huge(arena_t *arena, void *chunk, size_t usize) stats_cactive_sub(usize); } arena->nactive -= (usize >> LG_PAGE); - malloc_mutex_unlock(&arena->lock); - chunk_dalloc(chunk, CHUNK_CEILING(usize), arena->ind); + + if (likely(chunk_dalloc == chunk_dalloc_default)) { + chunk_dalloc_cache(arena, chunk, csize); + malloc_mutex_unlock(&arena->lock); + } else { + malloc_mutex_unlock(&arena->lock); + chunk_dalloc_wrapper(arena, chunk_dalloc, chunk, csize); + } } void @@ -747,12 +804,10 @@ void arena_chunk_ralloc_huge_shrink(arena_t *arena, void *chunk, size_t oldsize, size_t usize) { - chunk_dalloc_t *chunk_dalloc; size_t udiff = oldsize - usize; size_t cdiff = CHUNK_CEILING(oldsize) - CHUNK_CEILING(usize); malloc_mutex_lock(&arena->lock); - chunk_dalloc = arena->chunk_dalloc; if (config_stats) { arena_huge_ralloc_stats_update(arena, oldsize, usize); if (cdiff != 0) { @@ -761,52 +816,81 @@ arena_chunk_ralloc_huge_shrink(arena_t *arena, void *chunk, size_t oldsize, } } arena->nactive -= udiff >> LG_PAGE; - malloc_mutex_unlock(&arena->lock); + if (cdiff != 0) { - chunk_dalloc((void *)((uintptr_t)chunk + CHUNK_CEILING(usize)), - cdiff, arena->ind); + chunk_dalloc_t *chunk_dalloc = arena->chunk_dalloc; + void *nchunk = (void *)((uintptr_t)chunk + + CHUNK_CEILING(usize)); + + if (likely(chunk_dalloc == chunk_dalloc_default)) { + chunk_dalloc_cache(arena, nchunk, cdiff); + malloc_mutex_unlock(&arena->lock); + } else { + malloc_mutex_unlock(&arena->lock); + chunk_dalloc_wrapper(arena, chunk_dalloc, nchunk, + cdiff); + } + } else + malloc_mutex_unlock(&arena->lock); +} + +bool +arena_chunk_ralloc_huge_expand_hard(arena_t *arena, chunk_alloc_t *chunk_alloc, + size_t oldsize, size_t usize, bool *zero, void *nchunk, size_t udiff, + size_t cdiff) +{ + bool err; + + err = (chunk_alloc_wrapper(arena, chunk_alloc, nchunk, cdiff, chunksize, + zero) == NULL); + if (err) { + /* Revert optimistic stats updates. */ + malloc_mutex_lock(&arena->lock); + if (config_stats) { + arena_huge_ralloc_stats_update_undo(arena, oldsize, + usize); + arena->stats.mapped -= cdiff; + } + arena->nactive -= (udiff >> LG_PAGE); + malloc_mutex_unlock(&arena->lock); } + return (err); } bool arena_chunk_ralloc_huge_expand(arena_t *arena, void *chunk, size_t oldsize, size_t usize, bool *zero) { + bool err; chunk_alloc_t *chunk_alloc; - chunk_dalloc_t *chunk_dalloc; + void *nchunk = (void *)((uintptr_t)chunk + CHUNK_CEILING(oldsize)); size_t udiff = usize - oldsize; size_t cdiff = CHUNK_CEILING(usize) - CHUNK_CEILING(oldsize); malloc_mutex_lock(&arena->lock); - chunk_alloc = arena->chunk_alloc; - chunk_dalloc = arena->chunk_dalloc; + + /* Optimistically update stats. */ if (config_stats) { - /* Optimistically update stats prior to unlocking. */ arena_huge_ralloc_stats_update(arena, oldsize, usize); arena->stats.mapped += cdiff; } arena->nactive += (udiff >> LG_PAGE); - malloc_mutex_unlock(&arena->lock); - if (chunk_alloc_arena(chunk_alloc, chunk_dalloc, arena->ind, - (void *)((uintptr_t)chunk + CHUNK_CEILING(oldsize)), cdiff, - chunksize, zero) == NULL) { - /* Revert optimistic stats updates. */ - malloc_mutex_lock(&arena->lock); - if (config_stats) { - arena_huge_ralloc_stats_update_undo(arena, - oldsize, usize); - arena->stats.mapped -= cdiff; - } - arena->nactive -= (udiff >> LG_PAGE); - malloc_mutex_unlock(&arena->lock); - return (true); + chunk_alloc = arena->chunk_alloc; + if (likely(chunk_alloc == chunk_alloc_default)) { + err = (chunk_alloc_cache(arena, nchunk, cdiff, chunksize, zero, + true) == NULL); + } else + err = true; + malloc_mutex_unlock(&arena->lock); + if (err) { + err = arena_chunk_ralloc_huge_expand_hard(arena, chunk_alloc, + oldsize, usize, zero, nchunk, udiff, cdiff); } - if (config_stats) + if (config_stats && !err) stats_cactive_add(udiff); - - return (false); + return (err); } static arena_run_t * @@ -909,7 +993,7 @@ arena_run_alloc_small(arena_t *arena, size_t size, index_t binind) return (arena_run_alloc_small_helper(arena, size, binind)); } -JEMALLOC_INLINE_C void +void arena_maybe_purge(arena_t *arena) { size_t threshold; @@ -999,39 +1083,25 @@ arena_stash_dirty(arena_t *arena, bool all, size_t npurge, runselm_next = qr_next(runselm, rd_link); if (runselm == &chunkselm->runs_dirty) { - extent_node_t *chunkselm_next, *tnode; - void *addr; - size_t size; - bool zeroed, zero; + extent_node_t *chunkselm_next; + bool zero; UNUSED void *chunk; chunkselm_next = qr_next(chunkselm, cc_link); /* - * Cache contents of chunkselm prior to it being - * destroyed as a side effect of allocating the chunk. + * Allocate. chunkselm remains valid due to the + * dalloc_node=false argument to chunk_alloc_cache(). */ - addr = extent_node_addr_get(chunkselm); - size = extent_node_size_get(chunkselm); - zeroed = extent_node_zeroed_get(chunkselm); - /* Allocate. */ zero = false; - chunk = arena->chunk_alloc(addr, size, chunksize, &zero, - arena->ind); - assert(chunk == addr); - assert(zero == zeroed); - /* - * Create a temporary node to link into the ring of - * stashed allocations. OOM shouldn't be possible - * because chunk allocation just cached a node. - */ - tnode = arena_node_alloc(arena); - assert(tnode != NULL); - /* Stash. */ - extent_node_init(tnode, arena, addr, size, zeroed); - extent_node_dirty_linkage_init(tnode); - extent_node_dirty_insert(tnode, purge_runs_sentinel, + chunk = chunk_alloc_cache(arena, + extent_node_addr_get(chunkselm), + extent_node_size_get(chunkselm), chunksize, &zero, + false); + assert(chunk == extent_node_addr_get(chunkselm)); + assert(zero == extent_node_zeroed_get(chunkselm)); + extent_node_dirty_insert(chunkselm, purge_runs_sentinel, purge_chunks_sentinel); - npages = size >> LG_PAGE; + npages = extent_node_size_get(chunkselm) >> LG_PAGE; chunkselm = chunkselm_next; } else { arena_chunk_t *chunk = @@ -1170,7 +1240,7 @@ arena_unstash_purged(arena_t *arena, extent_node_dirty_remove(chunkselm); arena_node_dalloc(arena, chunkselm); chunkselm = chunkselm_next; - chunk_unmap(arena, addr, size, zeroed); + chunk_dalloc_arena(arena, addr, size, zeroed); } else { arena_run_t *run = &runselm->run; qr_remove(runselm, rd_link); diff --git a/src/chunk.c b/src/chunk.c index 264e4f27..08f21f6a 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -65,7 +65,7 @@ chunk_deregister(const void *chunk, const extent_node_t *node) static void * chunk_recycle(arena_t *arena, extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, bool cache, void *new_addr, size_t size, - size_t alignment, bool *zero) + size_t alignment, bool *zero, bool dalloc_node) { void *ret; extent_node_t *node; @@ -74,6 +74,7 @@ chunk_recycle(arena_t *arena, extent_tree_t *chunks_szad, bool zeroed; assert(new_addr == NULL || alignment == chunksize); + assert(dalloc_node || new_addr != NULL); alloc_size = size + alignment - chunksize; /* Beware size_t wrap-around. */ @@ -129,7 +130,8 @@ chunk_recycle(arena_t *arena, extent_tree_t *chunks_szad, } malloc_mutex_unlock(&arena->chunks_mtx); - if (node != NULL) + assert(!dalloc_node || node != NULL); + if (dalloc_node && node != NULL) arena_node_dalloc(arena, node); if (*zero) { if (!zeroed) @@ -153,8 +155,8 @@ chunk_alloc_core_dss(arena_t *arena, void *new_addr, size_t size, void *ret; if ((ret = chunk_recycle(arena, &arena->chunks_szad_dss, - &arena->chunks_ad_dss, false, new_addr, size, alignment, zero)) != - NULL) + &arena->chunks_ad_dss, false, new_addr, size, alignment, zero, + true)) != NULL) return (ret); ret = chunk_alloc_dss(arena, new_addr, size, alignment, zero); return (ret); @@ -177,11 +179,6 @@ chunk_alloc_core(arena_t *arena, void *new_addr, size_t size, size_t alignment, assert(alignment != 0); assert((alignment & chunksize_mask) == 0); - /* cache. */ - if ((ret = chunk_recycle(arena, &arena->chunks_szad_cache, - &arena->chunks_ad_cache, true, new_addr, size, alignment, zero)) != - NULL) - return (ret); /* "primary" dss. */ if (have_dss && dss_prec == dss_prec_primary && (ret = chunk_alloc_core_dss(arena, new_addr, size, alignment, zero)) != @@ -190,7 +187,7 @@ chunk_alloc_core(arena_t *arena, void *new_addr, size_t size, size_t alignment, /* mmap. */ if (!config_munmap && (ret = chunk_recycle(arena, &arena->chunks_szad_mmap, &arena->chunks_ad_mmap, false, new_addr, - size, alignment, zero)) != NULL) + size, alignment, zero, true)) != NULL) return (ret); /* * Requesting an address is not implemented for chunk_alloc_mmap(), so @@ -231,19 +228,18 @@ chunk_alloc_base(size_t size) } void * -chunk_alloc_arena(chunk_alloc_t *chunk_alloc, chunk_dalloc_t *chunk_dalloc, - unsigned arena_ind, void *new_addr, size_t size, size_t alignment, - bool *zero) +chunk_alloc_cache(arena_t *arena, void *new_addr, size_t size, size_t alignment, + bool *zero, bool dalloc_node) { - void *ret; - ret = chunk_alloc(new_addr, size, alignment, zero, arena_ind); - if (ret == NULL) - return (NULL); - if (config_valgrind) - JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, size); + assert(size != 0); + assert((size & chunksize_mask) == 0); + assert(alignment != 0); + assert((alignment & chunksize_mask) == 0); - return (ret); + return (chunk_recycle(arena, &arena->chunks_szad_cache, + &arena->chunks_ad_cache, true, new_addr, size, alignment, zero, + dalloc_node)); } static arena_t * @@ -262,7 +258,27 @@ chunk_arena_get(unsigned arena_ind) return (arena); } -/* Default arena chunk allocation routine in the absence of user override. */ +static void * +chunk_alloc_arena(arena_t *arena, void *new_addr, size_t size, size_t alignment, + bool *zero) +{ + void *ret; + + ret = chunk_alloc_core(arena, new_addr, size, alignment, zero, + arena->dss_prec); + if (ret == NULL) + return (NULL); + if (config_valgrind) + JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, size); + + return (ret); +} + +/* + * Default arena chunk allocation routine in the absence of user override. This + * function isn't actually used by jemalloc, but it does the right thing if the + * application passes calls through to it during chunk allocation. + */ void * chunk_alloc_default(void *new_addr, size_t size, size_t alignment, bool *zero, unsigned arena_ind) @@ -270,8 +286,21 @@ chunk_alloc_default(void *new_addr, size_t size, size_t alignment, bool *zero, arena_t *arena; arena = chunk_arena_get(arena_ind); - return (chunk_alloc_core(arena, new_addr, size, alignment, zero, - arena->dss_prec)); + return (chunk_alloc_arena(arena, new_addr, size, alignment, zero)); +} + +void * +chunk_alloc_wrapper(arena_t *arena, chunk_alloc_t *chunk_alloc, void *new_addr, + size_t size, size_t alignment, bool *zero) +{ + void *ret; + + ret = chunk_alloc(new_addr, size, alignment, zero, arena->ind); + if (ret == NULL) + return (NULL); + if (config_valgrind && chunk_alloc != chunk_alloc_default) + JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(chunk, chunksize); + return (ret); } void @@ -355,8 +384,8 @@ label_return: malloc_mutex_unlock(&arena->chunks_mtx); } -static void -chunk_cache(arena_t *arena, void *chunk, size_t size) +void +chunk_dalloc_cache(arena_t *arena, void *chunk, size_t size) { assert(chunk != NULL); @@ -366,19 +395,11 @@ chunk_cache(arena_t *arena, void *chunk, size_t size) chunk_record(arena, &arena->chunks_szad_cache, &arena->chunks_ad_cache, true, chunk, size, false); -} - -/* Default arena chunk deallocation routine in the absence of user override. */ -bool -chunk_dalloc_default(void *chunk, size_t size, unsigned arena_ind) -{ - - chunk_cache(chunk_arena_get(arena_ind), chunk, size); - return (false); + arena_maybe_purge(arena); } void -chunk_unmap(arena_t *arena, void *chunk, size_t size, bool zeroed) +chunk_dalloc_arena(arena_t *arena, void *chunk, size_t size, bool zeroed) { assert(chunk != NULL); @@ -395,6 +416,29 @@ chunk_unmap(arena_t *arena, void *chunk, size_t size, bool zeroed) } } +/* + * Default arena chunk deallocation routine in the absence of user override. + * This function isn't actually used by jemalloc, but it does the right thing if + * the application passes calls through to it during chunk deallocation. + */ +bool +chunk_dalloc_default(void *chunk, size_t size, unsigned arena_ind) +{ + + chunk_dalloc_arena(chunk_arena_get(arena_ind), chunk, size, false); + return (false); +} + +void +chunk_dalloc_wrapper(arena_t *arena, chunk_dalloc_t *chunk_dalloc, void *chunk, + size_t size) +{ + + chunk_dalloc(chunk, size, arena->ind); + if (config_valgrind && chunk_dalloc != chunk_dalloc_default) + JEMALLOC_VALGRIND_MAKE_MEM_NOACCESS(chunk, size); +} + static rtree_node_elm_t * chunks_rtree_node_alloc(size_t nelms) { From 35e3fd9a63a9d24276eab24bf84edb3d9e856732 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 18 Feb 2015 16:51:51 -0800 Subject: [PATCH 0659/3378] Fix a compilation error and an incorrect assertion. --- src/chunk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/chunk.c b/src/chunk.c index 08f21f6a..9474a158 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -130,7 +130,7 @@ chunk_recycle(arena_t *arena, extent_tree_t *chunks_szad, } malloc_mutex_unlock(&arena->chunks_mtx); - assert(!dalloc_node || node != NULL); + assert(dalloc_node || node != NULL); if (dalloc_node && node != NULL) arena_node_dalloc(arena, node); if (*zero) { @@ -299,7 +299,7 @@ chunk_alloc_wrapper(arena_t *arena, chunk_alloc_t *chunk_alloc, void *new_addr, if (ret == NULL) return (NULL); if (config_valgrind && chunk_alloc != chunk_alloc_default) - JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(chunk, chunksize); + JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, chunksize); return (ret); } From 970fcfbca5dffda921394172c7298d274eebfd0e Mon Sep 17 00:00:00 2001 From: Dave Huseby Date: Mon, 9 Feb 2015 21:46:54 -0800 Subject: [PATCH 0660/3378] adding support for bitrig --- configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 7a694a20..4ac7ac82 100644 --- a/configure.ac +++ b/configure.ac @@ -283,7 +283,7 @@ case "${host}" in abi="elf" AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ]) ;; - *-*-openbsd*) + *-*-openbsd*|*-*-bitrig*) CFLAGS="$CFLAGS" abi="elf" AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ]) From 7c46fd59cce6afb14cdc6c819f662b6e81638f84 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Wed, 4 Mar 2015 21:48:01 +0900 Subject: [PATCH 0661/3378] Make --without-export actually work 9906660 added a --without-export configure option to avoid exporting jemalloc symbols, but the option didn't actually work. --- .../jemalloc/internal/jemalloc_internal_defs.h.in | 6 ++++++ include/jemalloc/jemalloc_macros.h.in | 14 +++++++++----- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in index 0f0db8a1..191abc52 100644 --- a/include/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in @@ -232,4 +232,10 @@ /* Adaptive mutex support in pthreads. */ #undef JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP +/* + * If defined, jemalloc symbols are not exported (doesn't work when + * JEMALLOC_PREFIX is not defined). + */ +#undef JEMALLOC_EXPORT + #endif /* JEMALLOC_INTERNAL_DEFS_H_ */ diff --git a/include/jemalloc/jemalloc_macros.h.in b/include/jemalloc/jemalloc_macros.h.in index 7d1dcf4a..72f2a082 100644 --- a/include/jemalloc/jemalloc_macros.h.in +++ b/include/jemalloc/jemalloc_macros.h.in @@ -32,16 +32,20 @@ #ifdef JEMALLOC_HAVE_ATTR # define JEMALLOC_ATTR(s) __attribute__((s)) -# define JEMALLOC_EXPORT JEMALLOC_ATTR(visibility("default")) +# ifndef JEMALLOC_EXPORT +# define JEMALLOC_EXPORT JEMALLOC_ATTR(visibility("default")) +# endif # define JEMALLOC_ALIGNED(s) JEMALLOC_ATTR(aligned(s)) # define JEMALLOC_SECTION(s) JEMALLOC_ATTR(section(s)) # define JEMALLOC_NOINLINE JEMALLOC_ATTR(noinline) #elif _MSC_VER # define JEMALLOC_ATTR(s) -# ifdef DLLEXPORT -# define JEMALLOC_EXPORT __declspec(dllexport) -# else -# define JEMALLOC_EXPORT __declspec(dllimport) +# ifndef JEMALLOC_EXPORT +# ifdef DLLEXPORT +# define JEMALLOC_EXPORT __declspec(dllexport) +# else +# define JEMALLOC_EXPORT __declspec(dllimport) +# endif # endif # define JEMALLOC_ALIGNED(s) __declspec(align(s)) # define JEMALLOC_SECTION(s) __declspec(allocate(s)) From 4d871f73af6b8310564dfcb63357dbfe8b1a1529 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Wed, 4 Mar 2015 10:54:10 +0900 Subject: [PATCH 0662/3378] Preserve LastError when calling TlsGetValue TlsGetValue has a semantic difference with pthread_getspecific, in that it can return a non-error NULL value, so it always sets the LastError. But allocator callers may not be expecting calling e.g. free() to change the value of the last error, so preserve it. --- include/jemalloc/internal/tsd.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h index dbb91a2e..62a887e6 100644 --- a/include/jemalloc/internal/tsd.h +++ b/include/jemalloc/internal/tsd.h @@ -277,9 +277,11 @@ a_name##tsd_set(a_type *val) \ a_attr bool \ a_name##tsd_cleanup_wrapper(void) \ { \ - a_name##tsd_wrapper_t *wrapper; \ + DWORD error = GetLastError(); \ + a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *) \ + TlsGetValue(a_name##tsd_tsd); \ + SetLastError(error); \ \ - wrapper = (a_name##tsd_wrapper_t *)TlsGetValue(a_name##tsd_tsd);\ if (wrapper == NULL) \ return (false); \ if (a_cleanup != malloc_tsd_no_cleanup && \ @@ -307,8 +309,10 @@ a_name##tsd_wrapper_set(a_name##tsd_wrapper_t *wrapper) \ a_attr a_name##tsd_wrapper_t * \ a_name##tsd_wrapper_get(void) \ { \ + DWORD error = GetLastError(); \ a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *) \ TlsGetValue(a_name##tsd_tsd); \ + SetLastError(error); \ \ if (unlikely(wrapper == NULL)) { \ wrapper = (a_name##tsd_wrapper_t *) \ From f044bb219e9bfcc585f64f097e5ab0b5837c0451 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 6 Mar 2015 20:05:16 -0800 Subject: [PATCH 0663/3378] Change default chunk size from 4 MiB to 256 KiB. Recent changes have improved huge allocation scalability, which removes upward pressure to set the chunk size so large that huge allocations are rare. Smaller chunks are more likely to completely drain, so set the default to the smallest size that doesn't leave excessive unusable trailing space in chunk headers. --- doc/jemalloc.xml.in | 26 +++++++++++++------------- include/jemalloc/internal/chunk.h | 2 +- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index b392fa9e..747e03f4 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -571,8 +571,8 @@ for (i = 0; i < nbins; i++) { both large or both huge. In such cases shrinkage always succeeds, but growth only succeeds if the trailing memory is currently available. - Assuming 4 MiB chunks, 4 KiB pages, and a 16-byte quantum on a 64-bit - system, the size classes in each category are as shown in Assuming 256 KiB chunks, 4 KiB pages, and a 16-byte quantum on a + 64-bit system, the size classes in each category are as shown in .
@@ -627,7 +627,7 @@ for (i = 0; i < nbins; i++) { [10 KiB, 12 KiB, 14 KiB] - Large + Large 2 KiB [16 KiB] @@ -645,7 +645,12 @@ for (i = 0; i < nbins; i++) { 32 KiB - [160 KiB, 192 KiB, 224 KiB, 256 KiB] + [160 KiB, 192 KiB, 224 KiB] + + + Huge + 32 KiB + [256 KiB] 64 KiB @@ -653,20 +658,15 @@ for (i = 0; i < nbins; i++) { 128 KiB - [640 KiB, 768 KiB, 896 KiB, 1024 KiB] + [640 KiB, 768 KiB, 896 KiB, 1 MiB] 256 KiB - [1280 KiB, 1536 KiB, 1792 KiB, 2048 KiB] + [1280 KiB, 1536 KiB, 1792 KiB, 2 MiB] 512 KiB - [2560 KiB, 3072 KiB, 3584 KiB] - - - Huge - 512 KiB - [4 MiB] + [2560 KiB, 3 MiB, 3584 KiB, 4 MiB] 1 MiB @@ -907,7 +907,7 @@ for (i = 0; i < nbins; i++) { Virtual memory chunk size (log base 2). If a chunk size outside the supported size range is specified, the size is silently clipped to the minimum/maximum supported size. The default - chunk size is 4 MiB (2^22). + chunk size is 256 KiB (2^18). diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h index 1a968a58..1af5b24b 100644 --- a/include/jemalloc/internal/chunk.h +++ b/include/jemalloc/internal/chunk.h @@ -5,7 +5,7 @@ * Size and alignment of memory chunks that are allocated by the OS's virtual * memory system. */ -#define LG_CHUNK_DEFAULT 22 +#define LG_CHUNK_DEFAULT 18 /* Return the chunk address for allocation address a. */ #define CHUNK_ADDR2BASE(a) \ From 5707d6f952c71baa2f19102479859012982ac821 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 6 Mar 2015 17:14:05 -0800 Subject: [PATCH 0664/3378] Quantize szad trees by size class. Treat sizes that round down to the same size class as size-equivalent in trees that are used to search for first best fit, so that there are only as many "firsts" as there are size classes. This comes closer to the ideal of first fit. --- src/arena.c | 36 +++++++++++++++++++++++++++--------- src/base.c | 5 +++-- src/chunk.c | 2 +- src/extent.c | 8 +++++++- 4 files changed, 38 insertions(+), 13 deletions(-) diff --git a/src/arena.c b/src/arena.c index 78aa1ae0..34329a62 100644 --- a/src/arena.c +++ b/src/arena.c @@ -59,21 +59,35 @@ JEMALLOC_INLINE_C int arena_avail_comp(arena_chunk_map_misc_t *a, arena_chunk_map_misc_t *b) { int ret; + uintptr_t a_miscelm = (uintptr_t)a; size_t a_size; size_t b_size = arena_miscelm_to_bits(b) & ~PAGE_MASK; - uintptr_t a_miscelm = (uintptr_t)a; - uintptr_t b_miscelm = (uintptr_t)b; + index_t a_index, b_index; - if (a_miscelm & CHUNK_MAP_KEY) + if (a_miscelm & CHUNK_MAP_KEY) { a_size = a_miscelm & ~PAGE_MASK; - else + assert(a_size == s2u(a_size)); + } else a_size = arena_miscelm_to_bits(a) & ~PAGE_MASK; - ret = (a_size > b_size) - (a_size < b_size); + /* + * Compute the index of the largest size class that the run can satisfy + * a request for. + */ + a_index = size2index(a_size + 1) - 1; + b_index = size2index(b_size + 1) - 1; + + /* + * Compare based on size class index rather than size, in order to + * sort equally useful runs only by address. + */ + ret = (a_index > b_index) - (a_index < b_index); if (ret == 0) { - if (!(a_miscelm & CHUNK_MAP_KEY)) + if (!(a_miscelm & CHUNK_MAP_KEY)) { + uintptr_t b_miscelm = (uintptr_t)b; + ret = (a_miscelm > b_miscelm) - (a_miscelm < b_miscelm); - else { + } else { /* * Treat keys as if they are lower than anything else. */ @@ -898,8 +912,10 @@ arena_run_alloc_large_helper(arena_t *arena, size_t size, bool zero) { arena_chunk_map_misc_t *miscelm; arena_chunk_map_misc_t *key; + size_t usize; - key = (arena_chunk_map_misc_t *)(size | CHUNK_MAP_KEY); + usize = s2u(size); + key = (arena_chunk_map_misc_t *)(usize | CHUNK_MAP_KEY); miscelm = arena_avail_tree_nsearch(&arena->runs_avail, key); if (miscelm != NULL) { arena_run_t *run = &miscelm->run; @@ -949,7 +965,8 @@ arena_run_alloc_small_helper(arena_t *arena, size_t size, index_t binind) arena_chunk_map_misc_t *miscelm; arena_chunk_map_misc_t *key; - key = (arena_chunk_map_misc_t *)(size | CHUNK_MAP_KEY); + assert(size == s2u(size)); + key = (arena_chunk_map_misc_t *)(PAGE_CEILING(size) | CHUNK_MAP_KEY); miscelm = arena_avail_tree_nsearch(&arena->runs_avail, key); if (miscelm != NULL) { run = &miscelm->run; @@ -2778,6 +2795,7 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info) bin_info->reg_interval; } assert(actual_nregs > 0); + assert(actual_run_size == s2u(actual_run_size)); /* Copy final settings. */ bin_info->run_size = actual_run_size; diff --git a/src/base.c b/src/base.c index 33e8b6f5..01c62df4 100644 --- a/src/base.c +++ b/src/base.c @@ -73,7 +73,7 @@ void * base_alloc(size_t size) { void *ret; - size_t csize; + size_t csize, usize; extent_node_t *node; extent_node_t key; @@ -83,7 +83,8 @@ base_alloc(size_t size) */ csize = CACHELINE_CEILING(size); - extent_node_init(&key, NULL, NULL, csize, false); + usize = s2u(csize); + extent_node_init(&key, NULL, NULL, usize, false); malloc_mutex_lock(&base_mtx); node = extent_tree_szad_nsearch(&base_avail_szad, &key); if (node != NULL) { diff --git a/src/chunk.c b/src/chunk.c index 9474a158..972fecde 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -76,7 +76,7 @@ chunk_recycle(arena_t *arena, extent_tree_t *chunks_szad, assert(new_addr == NULL || alignment == chunksize); assert(dalloc_node || new_addr != NULL); - alloc_size = size + alignment - chunksize; + alloc_size = CHUNK_CEILING(s2u(size + alignment - chunksize)); /* Beware size_t wrap-around. */ if (alloc_size < size) return (NULL); diff --git a/src/extent.c b/src/extent.c index f98e77e2..e16f8f6c 100644 --- a/src/extent.c +++ b/src/extent.c @@ -9,8 +9,14 @@ extent_szad_comp(extent_node_t *a, extent_node_t *b) int ret; size_t a_size = extent_node_size_get(a); size_t b_size = extent_node_size_get(b); + /* + * Compute the index of the largest size class that the chunk can + * satisfy a request for. + */ + size_t a_index = size2index(a_size + 1) - 1; + size_t b_index = size2index(b_size + 1) - 1; - ret = (a_size > b_size) - (a_size < b_size); + ret = (a_index > b_index) - (a_index < b_index); if (ret == 0) { uintptr_t a_addr = (uintptr_t)extent_node_addr_get(a); uintptr_t b_addr = (uintptr_t)extent_node_addr_get(b); From 97c04a93838c4001688fe31bf018972b4696efe2 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 6 Mar 2015 19:57:36 -0800 Subject: [PATCH 0665/3378] Use first-fit rather than first-best-fit run/chunk allocation. This tends to more effectively pack active memory toward low addresses. However, additional tree searches are required in many cases, so whether this change stands the test of time will depend on real-world benchmarks. --- include/jemalloc/internal/arena.h | 2 +- src/arena.c | 76 ++++++++++++++++++++----------- src/chunk.c | 43 +++++++++++++++-- 3 files changed, 89 insertions(+), 32 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 42086ca1..50b296e4 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -312,7 +312,7 @@ struct arena_s { /* * Size/address-ordered tree of this arena's available runs. The tree - * is used for first-best-fit run allocation. + * is used for first-fit run allocation. */ arena_avail_tree_t runs_avail; diff --git a/src/arena.c b/src/arena.c index 34329a62..6f4197b2 100644 --- a/src/arena.c +++ b/src/arena.c @@ -907,23 +907,55 @@ arena_chunk_ralloc_huge_expand(arena_t *arena, void *chunk, size_t oldsize, return (err); } +/* Do first-fit run selection. */ +static arena_run_t * +arena_run_first_fit(arena_t *arena, size_t size) +{ + arena_run_t *run; + index_t index, max_index; + + assert(size == s2u(size)); + assert(size == PAGE_CEILING(size)); + + /* + * Iterate over all size classes that are at least large enough to + * satisfy the request, search for the lowest run of each size class, + * and choose the lowest of the runs found. + */ + run = NULL; + for (index = size2index(size), max_index = size2index(arena_maxclass); + index <= max_index;) { + arena_run_t *currun; + arena_chunk_t *currun_chunk; + size_t currun_pageind, currun_size; + size_t usize = PAGE_CEILING(index2size(index)); + arena_chunk_map_misc_t *key = (arena_chunk_map_misc_t *)(usize | + CHUNK_MAP_KEY); + arena_chunk_map_misc_t *miscelm = + arena_avail_tree_nsearch(&arena->runs_avail, key); + if (miscelm == NULL) + break; + currun = &miscelm->run; + if (run == NULL || (uintptr_t)currun < (uintptr_t)run) + run = currun; + currun_chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(currun); + currun_pageind = arena_miscelm_to_pageind(miscelm); + currun_size = arena_mapbits_unallocated_size_get(currun_chunk, + currun_pageind); + assert(size2index(currun_size) + 1 > index); + index = size2index(currun_size) + 1; + } + + return (run); +} + static arena_run_t * arena_run_alloc_large_helper(arena_t *arena, size_t size, bool zero) { - arena_chunk_map_misc_t *miscelm; - arena_chunk_map_misc_t *key; - size_t usize; - - usize = s2u(size); - key = (arena_chunk_map_misc_t *)(usize | CHUNK_MAP_KEY); - miscelm = arena_avail_tree_nsearch(&arena->runs_avail, key); - if (miscelm != NULL) { - arena_run_t *run = &miscelm->run; - arena_run_split_large(arena, &miscelm->run, size, zero); - return (run); - } - - return (NULL); + arena_run_t *run = arena_run_first_fit(arena, s2u(size)); + if (run != NULL) + arena_run_split_large(arena, run, size, zero); + return (run); } static arena_run_t * @@ -961,20 +993,10 @@ arena_run_alloc_large(arena_t *arena, size_t size, bool zero) static arena_run_t * arena_run_alloc_small_helper(arena_t *arena, size_t size, index_t binind) { - arena_run_t *run; - arena_chunk_map_misc_t *miscelm; - arena_chunk_map_misc_t *key; - - assert(size == s2u(size)); - key = (arena_chunk_map_misc_t *)(PAGE_CEILING(size) | CHUNK_MAP_KEY); - miscelm = arena_avail_tree_nsearch(&arena->runs_avail, key); - if (miscelm != NULL) { - run = &miscelm->run; + arena_run_t *run = arena_run_first_fit(arena, PAGE_CEILING(size)); + if (run != NULL) arena_run_split_small(arena, run, size, binind); - return (run); - } - - return (NULL); + return (run); } static arena_run_t * diff --git a/src/chunk.c b/src/chunk.c index 972fecde..875fa4cc 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -62,6 +62,39 @@ chunk_deregister(const void *chunk, const extent_node_t *node) } } +/* Do first-fit chunk selection. */ +static extent_node_t * +chunk_first_fit(arena_t *arena, extent_tree_t *chunks_szad, size_t size) +{ + extent_node_t *node; + index_t index; + + assert(size == CHUNK_CEILING(size)); + + /* + * Iterate over all size classes that are at least large enough to + * satisfy the request, search for the lowest chunk of each size class, + * and choose the lowest of the chunks found. + */ + node = NULL; + for (index = size2index(size); index < NSIZES;) { + extent_node_t *curnode; + extent_node_t key; + extent_node_init(&key, arena, NULL, + CHUNK_CEILING(index2size(index)), false); + curnode = extent_tree_szad_nsearch(chunks_szad, &key); + if (curnode == NULL) + break; + if (node == NULL || (uintptr_t)extent_node_addr_get(curnode) < + (uintptr_t)extent_node_addr_get(node)) + node = curnode; + assert(size2index(extent_node_size_get(curnode)) + 1 > index); + index = size2index(extent_node_size_get(curnode)) + 1; + } + + return (node); +} + static void * chunk_recycle(arena_t *arena, extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, bool cache, void *new_addr, size_t size, @@ -69,7 +102,6 @@ chunk_recycle(arena_t *arena, extent_tree_t *chunks_szad, { void *ret; extent_node_t *node; - extent_node_t key; size_t alloc_size, leadsize, trailsize; bool zeroed; @@ -80,10 +112,13 @@ chunk_recycle(arena_t *arena, extent_tree_t *chunks_szad, /* Beware size_t wrap-around. */ if (alloc_size < size) return (NULL); - extent_node_init(&key, arena, new_addr, alloc_size, false); malloc_mutex_lock(&arena->chunks_mtx); - node = (new_addr != NULL) ? extent_tree_ad_search(chunks_ad, &key) : - extent_tree_szad_nsearch(chunks_szad, &key); + if (new_addr != NULL || size == chunksize) { + extent_node_t key; + extent_node_init(&key, arena, new_addr, alloc_size, false); + node = extent_tree_ad_search(chunks_ad, &key); + } else + node = chunk_first_fit(arena, chunks_szad, alloc_size); if (node == NULL || (new_addr != NULL && extent_node_size_get(node) < size)) { malloc_mutex_unlock(&arena->chunks_mtx); From 04ca7580dbc409915de05cb1cee12a369e898590 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 6 Mar 2015 23:25:13 -0800 Subject: [PATCH 0666/3378] Fix a chunk_recycle() regression. This regression was introduced by 97c04a93838c4001688fe31bf018972b4696efe2 (Use first-fit rather than first-best-fit run/chunk allocation.). --- src/chunk.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/src/chunk.c b/src/chunk.c index 875fa4cc..fb8cd413 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -64,13 +64,22 @@ chunk_deregister(const void *chunk, const extent_node_t *node) /* Do first-fit chunk selection. */ static extent_node_t * -chunk_first_fit(arena_t *arena, extent_tree_t *chunks_szad, size_t size) +chunk_first_fit(arena_t *arena, extent_tree_t *chunks_szad, + extent_tree_t *chunks_ad, size_t size) { extent_node_t *node; index_t index; assert(size == CHUNK_CEILING(size)); + if (size == chunksize) { + /* + * Any chunk will suffice, so simply select the one lowest in + * memory. + */ + return (extent_tree_ad_first(chunks_ad)); + } + /* * Iterate over all size classes that are at least large enough to * satisfy the request, search for the lowest chunk of each size class, @@ -113,12 +122,14 @@ chunk_recycle(arena_t *arena, extent_tree_t *chunks_szad, if (alloc_size < size) return (NULL); malloc_mutex_lock(&arena->chunks_mtx); - if (new_addr != NULL || size == chunksize) { + if (new_addr != NULL) { extent_node_t key; extent_node_init(&key, arena, new_addr, alloc_size, false); node = extent_tree_ad_search(chunks_ad, &key); - } else - node = chunk_first_fit(arena, chunks_szad, alloc_size); + } else { + node = chunk_first_fit(arena, chunks_szad, chunks_ad, + alloc_size); + } if (node == NULL || (new_addr != NULL && extent_node_size_get(node) < size)) { malloc_mutex_unlock(&arena->chunks_mtx); From 54673fd8d719e081536fb531417cd9060de895f0 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 23 Feb 2015 22:28:43 -0800 Subject: [PATCH 0667/3378] Update ChangeLog. --- ChangeLog | 151 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 150 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index d56ee999..ef7dbfdb 100644 --- a/ChangeLog +++ b/ChangeLog @@ -5,6 +5,155 @@ found in the git revision history: https://github.com/jemalloc/jemalloc +* 4.0.0 (XXX) See https://github.com/jemalloc/jemalloc/milestones/4.0.0 for + remaining work. + + This version contains many speed and space optimizations, both minor and + major. The major themes are generalization, unification, and simplification. + Although many of these optimizations cause no visible behavior change, their + cumulative effect is substantial. + + New features: + - Normalize size class spacing to be consistent across the complete size + range. By default there are four size classes per size doubling, but this + is now configurable via the --with-lg-size-class-group option. Also add the + --with-lg-page, --with-lg-page-sizes, --with-lg-quantum, and + --with-lg-tiny-min options, which can be used to tweak page and size class + settings. Impacts: + + Worst case performance for incrementally growing/shrinking reallocation + is improved because there are far fewer size classes, and therefore + copying happens less often. + + Internal fragmentation is limited to 20% for all but the smallest size + classes (those less than four times the quantum). (1B + 4 KiB) + and (1B + 4 MiB) previously suffered nearly 50% internal fragmentation. + + Chunk fragmentation tends to be lower because there are fewer distinct run + sizes to pack. + - Add support for explicit tcaches. The "tcache.create", "tcache.flush", and + "tcache.destroy" mallctls control tcache lifetime and flushing, and the + MALLOCX_TCACHE(tc) and MALLOCX_TCACHE_NONE flags to the *allocx() API + control which tcache is used for each operation. + - Implement per thread heap profiling, as well as the ability to + enable/disable heap profiling on a per thread basis. Add the "prof.reset", + "prof.lg_sample", "thread.prof.name", "thread.prof.active", + "opt.prof_thread_active_init", "prof.thread_active_init", and + "thread.prof.active" mallctls. + - Add support for per arena application-specified chunk allocators, configured + via the "arena.chunk.alloc" and "arena.chunk.dalloc" mallctls. + - Refactor huge allocation to be managed by arenas, so that arenas now + function as general purpose independent allocators. This is important in + the context of user-specified chunk allocators, aside from the scalability + benefits. Related new statistics: + + The "stats.arenas..huge.allocated", "stats.arenas..huge.nmalloc", + "stats.arenas..huge.ndalloc", and "stats.arenas..huge.nrequests" + mallctls provide high level per arena huge allocation statistics. + + The "arenas.nhchunks", "arenas.hchunks..size", + "stats.arenas..hchunks..nmalloc", + "stats.arenas..hchunks..ndalloc", + "stats.arenas..hchunks..nrequests", and + "stats.arenas..hchunks..curhchunks" mallctls provide per size class + statistics. + - Add the 'util' column to malloc_stats_print() output, which reports the + proportion of available regions that are currently in use for each small + size class. + - Add "alloc" and "free" modes for for junk filling (see the "opt.junk" + mallctl), so that it is possible to separately enable junk filling for + allocation versus deallocation. + - Add the jemalloc-config script, which provides information about how + jemalloc was configured, and how to integrate it into application builds. + - Add metadata statistics, which are accessible via the "stats.metadata", + "stats.arenas..metadata.mapped", and + "stats.arenas..metadata.allocated" mallctls. + - Add the "prof.gdump" mallctl, which makes it possible to toggle the gdump + feature on/off during program execution. + - Add sdallocx(), which implements sized deallocation. The primary + optimization over dallocx() is the removal of a metadata read, which often + suffers an L1 cache miss. + - Add missing header includes in jemalloc/jemalloc.h, so that applications + only have to #include . + - Add support for additional platforms: + + Bitrig + + Cygwin + + DragonFlyBSD + + iOS + + OpenBSD + + OpenRISC/or1k + + Optimizations: + - Switch run and chunk allocation from first-best-fit (among best-fit + candidates, choose the lowest in memory) to first-fit (among all candidates, + choose the lowest in memory). This tends to reduce chunk and virtual memory + fragmentation, respectively. + - Maintain dirty runs in per arena LRUs rather than in per arena trees of + dirty-run-containing chunks. In practice this change significantly reduces + dirty page purging volume. + - Integrate whole chunks into the unused dirty page purging machinery. This + reduces the cost of repeated huge allocation/deallocation, because it + effectively introduces a cache of chunks. + - Split the arena chunk map into two separate arrays, in order to increase + cache locality for the frequently accessed bits. + - Move small run metadata out of runs, into arena chunk headers. This reduces + run fragmentation, smaller runs reduce external fragmentation for small size + classes, and packed (less uniformly aligned) metadata layout improves CPU + cache set distribution. + - Micro-optimize the fast paths for the public API functions. + - Refactor thread-specific data to reside in a single structure. This assures + that only a single TLS read is necessary per call into the public API. + - Implement in-place huge allocation growing and shrinking. + - Refactor rtree (radix tree for chunk lookups) to be lock-free, and make + additional optimizations that reduce maximum lookup depth to one or two + levels. This resolves what was a concurrency bottleneck for per arena huge + allocation, because a global data structure is critical for determining + which arenas own which huge allocations. + + Incompatible changes: + - Replace --enable-cc-silence with --disable-cc-silence to suppress spurious + warnings by default. + - Assure that the constness of malloc_usable_size()'s return type matches that + of the system implementation. + - Change the heap profile dump format to support per thread heap profiling, + and enhance pprof with the --thread= option. As a result, the bundled + pprof must now be used rather than the upstream (gperftools) pprof. + - Disable "opt.prof_final" by default, in order to avoid atexit(3), which can + internally deadlock on some platforms. + - Change the "arenas.nlruns" mallctl type from size_t to unsigned. + - Replace the "stats.arenas..bins..allocated" mallctl with + "stats.arenas..bins..curregs". + - Ignore MALLOC_CONF in set{uid,gid,cap} binaries. + - Ignore MALLOCX_ARENA(a) in dallocx(), in favor of using the + MALLOCX_TCACHE(tc) and MALLOCX_TCACHE_NONE flags to control tcache usage. + + Removed features: + - Remove the *allocm() API, which is superseded by the *allocx() API. + - Remove the --enable-dss options, and make dss non-optional on all platforms + which support sbrk(2). + - Remove the "arenas.purge" mallctl, which was obsoleted by the + "arena..purge" mallctl in 3.1.0. + - Remove the unnecessary "opt.valgrind" mallctl; jemalloc automatically + detects whether it is running inside Valgrind. + - Remove the "stats.huge.allocated", "stats.huge.nmalloc", and + "stats.huge.ndalloc" mallctls. + - Remove the --enable-mremap option. + - Remove the --enable-ivsalloc option, and merge its functionality into + --enable-debug. + - Remove the "stats.chunks.current", "stats.chunks.total", and + "stats.chunks.high" mallctls. + + Bug fixes: + - Fix the cactive statistic to decrease (rather than increase) when active + memory decreases. This regression was first released in 3.5.0. + - Fix OOM handling in memalign() and valloc(). A variant of this bug existed + in all releases since 2.0.0, which introduced these functions. + - Fix the "arena..dss" mallctl to return an error if "primary" or + "secondary" precedence is specified, but sbrk(2) is not supported. + - Fix fallback lg_floor() implementations to handle extremely large inputs. + - Ensure the default purgeable zone is after the default zone on OS X. + - Fix latent bugs in atomic_*(). + - Fix the "arena..dss" mallctl to handle read-only calls. + - Fix tls_model configuration to enable the initial-exec model when possible. + - Mark malloc_conf as a weak symbol so that the application can override it. + - Correctly detect glibc's adaptive pthread mutexes. + - Fix the --without-export configure option. + * 3.6.0 (March 31, 2014) This version contains a critical bug fix for a regression present in 3.5.0 and @@ -21,7 +170,7 @@ found in the git revision history: backtracing to be reliable. - Use dss allocation precedence for huge allocations as well as small/large allocations. - - Fix test assertion failure message formatting. This bug did not manifect on + - Fix test assertion failure message formatting. This bug did not manifest on x86_64 systems because of implementation subtleties in va_list. - Fix inconsequential test failures for hash and SFMT code. From 38e42d311c1844a66e8ced84551621de41e42b85 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 10 Mar 2015 18:15:40 -0700 Subject: [PATCH 0668/3378] Refactor dirty run linkage to reduce sizeof(extent_node_t). --- include/jemalloc/internal/arena.h | 50 ++++++++--- include/jemalloc/internal/extent.h | 12 +-- include/jemalloc/internal/private_symbols.txt | 1 + src/arena.c | 89 ++++++++++--------- 4 files changed, 95 insertions(+), 57 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 50b296e4..de298e55 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -23,6 +23,7 @@ */ #define LG_DIRTY_MULT_DEFAULT 3 +typedef struct arena_runs_dirty_link_s arena_runs_dirty_link_t; typedef struct arena_run_s arena_run_t; typedef struct arena_chunk_map_bits_s arena_chunk_map_bits_t; typedef struct arena_chunk_map_misc_s arena_chunk_map_misc_t; @@ -120,6 +121,10 @@ struct arena_chunk_map_bits_s { #define CHUNK_MAP_KEY CHUNK_MAP_ALLOCATED }; +struct arena_runs_dirty_link_s { + qr(arena_runs_dirty_link_t) rd_link; +}; + /* * Each arena_chunk_map_misc_t corresponds to one page within the chunk, just * like arena_chunk_map_bits_t. Two separate arrays are stored within each @@ -131,13 +136,13 @@ struct arena_chunk_map_misc_s { * * 1) arena_t's runs_avail tree. * 2) arena_run_t conceptually uses this linkage for in-use non-full - * runs, rather than directly embedding linkage. + * runs, rather than directly embedding linkage. */ rb_node(arena_chunk_map_misc_t) rb_link; union { /* Linkage for list of dirty runs. */ - qr(arena_chunk_map_misc_t) rd_link; + arena_runs_dirty_link_t rd; /* Profile counters, used for large object runs. */ prof_tctx_t *prof_tctx; @@ -324,15 +329,27 @@ struct arena_s { * * LRU-----------------------------------------------------------MRU * - * ______________ ___ ___ - * ...-->|chunks_cache|<--------->|c|<-------------------->|c|<--... - * -------------- |h| |h| - * ____________ _____ |u| _____ _____ |u| - * ...-->|runs_dirty|<-->|run|<-->|n|<-->|run|<-->|run|<-->|n|<--... - * ------------ ----- |k| ----- ----- |k| - * --- --- + * /------------------\ + * | arena | + * | | + * | /------------\ | /-----------\ + * ...---->|chunks_cache|<---------------------->| chunk |<--... + * | \------------/ | | | + * | | | | + * | | /---\ /---\ | | + * | | |run| |run| | | + * | | | | | | | | + * | /----------\ | |---| |---| | /-----\ | + * ...----->|runs_dirty|<---->|rd |<---->|rd |<---->|rdelm|<-----... + * | \----------/ | |---| |---| | \-----/ | + * | | | | | | | | + * | | | | | | | | + * | | \---/ \---/ | | + * | | | | + * | | | | + * \------------------/ \-----------/ */ - arena_chunk_map_misc_t runs_dirty; + arena_runs_dirty_link_t runs_dirty; extent_node_t chunks_cache; /* Extant huge allocations. */ @@ -465,6 +482,7 @@ arena_chunk_map_misc_t *arena_miscelm_get(arena_chunk_t *chunk, size_t pageind); size_t arena_miscelm_to_pageind(arena_chunk_map_misc_t *miscelm); void *arena_miscelm_to_rpages(arena_chunk_map_misc_t *miscelm); +arena_chunk_map_misc_t *arena_rd_to_miscelm(arena_runs_dirty_link_t *rd); arena_chunk_map_misc_t *arena_run_to_miscelm(arena_run_t *run); size_t *arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind); size_t arena_mapbitsp_read(size_t *mapbitsp); @@ -556,6 +574,18 @@ arena_miscelm_to_rpages(arena_chunk_map_misc_t *miscelm) return ((void *)((uintptr_t)chunk + (pageind << LG_PAGE))); } +JEMALLOC_ALWAYS_INLINE arena_chunk_map_misc_t * +arena_rd_to_miscelm(arena_runs_dirty_link_t *rd) +{ + arena_chunk_map_misc_t *miscelm = (arena_chunk_map_misc_t + *)((uintptr_t)rd - offsetof(arena_chunk_map_misc_t, rd)); + + assert(arena_miscelm_to_pageind(miscelm) >= map_bias); + assert(arena_miscelm_to_pageind(miscelm) < chunk_npages); + + return (miscelm); +} + JEMALLOC_ALWAYS_INLINE arena_chunk_map_misc_t * arena_run_to_miscelm(arena_run_t *run) { diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h index 81ff40b8..5dbc04a5 100644 --- a/include/jemalloc/internal/extent.h +++ b/include/jemalloc/internal/extent.h @@ -34,7 +34,7 @@ struct extent_node_s { prof_tctx_t *en_prof_tctx; /* Linkage for arena's runs_dirty and chunks_cache rings. */ - arena_chunk_map_misc_t runs_dirty; + arena_runs_dirty_link_t rdelm; qr(extent_node_t) cc_link; union { @@ -79,7 +79,7 @@ void extent_node_init(extent_node_t *node, arena_t *arena, void *addr, size_t size, bool zeroed); void extent_node_dirty_linkage_init(extent_node_t *node); void extent_node_dirty_insert(extent_node_t *node, - arena_chunk_map_misc_t *runs_dirty, extent_node_t *chunks_dirty); + arena_runs_dirty_link_t *runs_dirty, extent_node_t *chunks_dirty); void extent_node_dirty_remove(extent_node_t *node); #endif @@ -186,16 +186,16 @@ JEMALLOC_INLINE void extent_node_dirty_linkage_init(extent_node_t *node) { - qr_new(&node->runs_dirty, rd_link); + qr_new(&node->rdelm, rd_link); qr_new(node, cc_link); } JEMALLOC_INLINE void extent_node_dirty_insert(extent_node_t *node, - arena_chunk_map_misc_t *runs_dirty, extent_node_t *chunks_dirty) + arena_runs_dirty_link_t *runs_dirty, extent_node_t *chunks_dirty) { - qr_meld(runs_dirty, &node->runs_dirty, rd_link); + qr_meld(runs_dirty, &node->rdelm, rd_link); qr_meld(chunks_dirty, node, cc_link); } @@ -203,7 +203,7 @@ JEMALLOC_INLINE void extent_node_dirty_remove(extent_node_t *node) { - qr_remove(&node->runs_dirty, rd_link); + qr_remove(&node->rdelm, rd_link); qr_remove(node, cc_link); } diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index 7c217c74..d086db18 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -81,6 +81,7 @@ arena_quarantine_junk_small arena_ralloc arena_ralloc_junk_large arena_ralloc_no_move +arena_rd_to_miscelm arena_redzone_corruption arena_run_regind arena_run_to_miscelm diff --git a/src/arena.c b/src/arena.c index 6f4197b2..5d792f99 100644 --- a/src/arena.c +++ b/src/arena.c @@ -136,8 +136,8 @@ arena_run_dirty_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind, assert(arena_mapbits_dirty_get(chunk, pageind+npages-1) == CHUNK_MAP_DIRTY); - qr_new(miscelm, rd_link); - qr_meld(&arena->runs_dirty, miscelm, rd_link); + qr_new(&miscelm->rd, rd_link); + qr_meld(&arena->runs_dirty, &miscelm->rd, rd_link); arena->ndirty += npages; } @@ -153,7 +153,7 @@ arena_run_dirty_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind, assert(arena_mapbits_dirty_get(chunk, pageind+npages-1) == CHUNK_MAP_DIRTY); - qr_remove(miscelm, rd_link); + qr_remove(&miscelm->rd, rd_link); assert(arena->ndirty >= npages); arena->ndirty -= npages; } @@ -1056,22 +1056,23 @@ static size_t arena_dirty_count(arena_t *arena) { size_t ndirty = 0; - arena_chunk_map_misc_t *runselm; + arena_runs_dirty_link_t *rdelm; extent_node_t *chunkselm; - for (runselm = qr_next(&arena->runs_dirty, rd_link), + for (rdelm = qr_next(&arena->runs_dirty, rd_link), chunkselm = qr_next(&arena->chunks_cache, cc_link); - runselm != &arena->runs_dirty; runselm = qr_next(runselm, - rd_link)) { + rdelm != &arena->runs_dirty; rdelm = qr_next(rdelm, rd_link)) { size_t npages; - if (runselm == &chunkselm->runs_dirty) { + if (rdelm == &chunkselm->rdelm) { npages = extent_node_size_get(chunkselm) >> LG_PAGE; chunkselm = qr_next(chunkselm, cc_link); } else { - arena_chunk_t *chunk = (arena_chunk_t - *)CHUNK_ADDR2BASE(runselm); - size_t pageind = arena_miscelm_to_pageind(runselm); + arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE( + rdelm); + arena_chunk_map_misc_t *miscelm = + arena_rd_to_miscelm(rdelm); + size_t pageind = arena_miscelm_to_pageind(miscelm); assert(arena_mapbits_allocated_get(chunk, pageind) == 0); assert(arena_mapbits_large_get(chunk, pageind) == 0); @@ -1107,21 +1108,21 @@ arena_compute_npurge(arena_t *arena, bool all) static size_t arena_stash_dirty(arena_t *arena, bool all, size_t npurge, - arena_chunk_map_misc_t *purge_runs_sentinel, + arena_runs_dirty_link_t *purge_runs_sentinel, extent_node_t *purge_chunks_sentinel) { - arena_chunk_map_misc_t *runselm, *runselm_next; + arena_runs_dirty_link_t *rdelm, *rdelm_next; extent_node_t *chunkselm; size_t nstashed = 0; /* Stash at least npurge pages. */ - for (runselm = qr_next(&arena->runs_dirty, rd_link), + for (rdelm = qr_next(&arena->runs_dirty, rd_link), chunkselm = qr_next(&arena->chunks_cache, cc_link); - runselm != &arena->runs_dirty; runselm = runselm_next) { + rdelm != &arena->runs_dirty; rdelm = rdelm_next) { size_t npages; - runselm_next = qr_next(runselm, rd_link); + rdelm_next = qr_next(rdelm, rd_link); - if (runselm == &chunkselm->runs_dirty) { + if (rdelm == &chunkselm->rdelm) { extent_node_t *chunkselm_next; bool zero; UNUSED void *chunk; @@ -1144,9 +1145,11 @@ arena_stash_dirty(arena_t *arena, bool all, size_t npurge, chunkselm = chunkselm_next; } else { arena_chunk_t *chunk = - (arena_chunk_t *)CHUNK_ADDR2BASE(runselm); - size_t pageind = arena_miscelm_to_pageind(runselm); - arena_run_t *run = &runselm->run; + (arena_chunk_t *)CHUNK_ADDR2BASE(rdelm); + arena_chunk_map_misc_t *miscelm = + arena_rd_to_miscelm(rdelm); + size_t pageind = arena_miscelm_to_pageind(miscelm); + arena_run_t *run = &miscelm->run; size_t run_size = arena_mapbits_unallocated_size_get(chunk, pageind); @@ -1167,12 +1170,12 @@ arena_stash_dirty(arena_t *arena, bool all, size_t npurge, arena_run_split_large(arena, run, run_size, false); /* Stash. */ if (false) - qr_new(runselm, rd_link); /* Redundant. */ + qr_new(rdelm, rd_link); /* Redundant. */ else { - assert(qr_next(runselm, rd_link) == runselm); - assert(qr_prev(runselm, rd_link) == runselm); + assert(qr_next(rdelm, rd_link) == rdelm); + assert(qr_prev(rdelm, rd_link) == rdelm); } - qr_meld(purge_runs_sentinel, runselm, rd_link); + qr_meld(purge_runs_sentinel, rdelm, rd_link); } nstashed += npages; @@ -1184,11 +1187,12 @@ arena_stash_dirty(arena_t *arena, bool all, size_t npurge, } static size_t -arena_purge_stashed(arena_t *arena, arena_chunk_map_misc_t *purge_runs_sentinel, +arena_purge_stashed(arena_t *arena, + arena_runs_dirty_link_t *purge_runs_sentinel, extent_node_t *purge_chunks_sentinel) { size_t npurged, nmadvise; - arena_chunk_map_misc_t *runselm; + arena_runs_dirty_link_t *rdelm; extent_node_t *chunkselm; if (config_stats) @@ -1196,13 +1200,12 @@ arena_purge_stashed(arena_t *arena, arena_chunk_map_misc_t *purge_runs_sentinel, npurged = 0; malloc_mutex_unlock(&arena->lock); - for (runselm = qr_next(purge_runs_sentinel, rd_link), + for (rdelm = qr_next(purge_runs_sentinel, rd_link), chunkselm = qr_next(purge_chunks_sentinel, cc_link); - runselm != purge_runs_sentinel; runselm = qr_next(runselm, - rd_link)) { + rdelm != purge_runs_sentinel; rdelm = qr_next(rdelm, rd_link)) { size_t npages; - if (runselm == &chunkselm->runs_dirty) { + if (rdelm == &chunkselm->rdelm) { size_t size = extent_node_size_get(chunkselm); bool unzeroed; @@ -1216,8 +1219,10 @@ arena_purge_stashed(arena_t *arena, arena_chunk_map_misc_t *purge_runs_sentinel, size_t pageind, run_size, flag_unzeroed, i; bool unzeroed; - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(runselm); - pageind = arena_miscelm_to_pageind(runselm); + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(rdelm); + arena_chunk_map_misc_t *miscelm = + arena_rd_to_miscelm(rdelm); + pageind = arena_miscelm_to_pageind(miscelm); run_size = arena_mapbits_large_size_get(chunk, pageind); npages = run_size >> LG_PAGE; @@ -1259,18 +1264,18 @@ arena_purge_stashed(arena_t *arena, arena_chunk_map_misc_t *purge_runs_sentinel, static void arena_unstash_purged(arena_t *arena, - arena_chunk_map_misc_t *purge_runs_sentinel, + arena_runs_dirty_link_t *purge_runs_sentinel, extent_node_t *purge_chunks_sentinel) { - arena_chunk_map_misc_t *runselm, *runselm_next; + arena_runs_dirty_link_t *rdelm, *rdelm_next; extent_node_t *chunkselm; /* Deallocate runs. */ - for (runselm = qr_next(purge_runs_sentinel, rd_link), + for (rdelm = qr_next(purge_runs_sentinel, rd_link), chunkselm = qr_next(purge_chunks_sentinel, cc_link); - runselm != purge_runs_sentinel; runselm = runselm_next) { - runselm_next = qr_next(runselm, rd_link); - if (runselm == &chunkselm->runs_dirty) { + rdelm != purge_runs_sentinel; rdelm = rdelm_next) { + rdelm_next = qr_next(rdelm, rd_link); + if (rdelm == &chunkselm->rdelm) { extent_node_t *chunkselm_next = qr_next(chunkselm, cc_link); void *addr = extent_node_addr_get(chunkselm); @@ -1281,8 +1286,10 @@ arena_unstash_purged(arena_t *arena, chunkselm = chunkselm_next; chunk_dalloc_arena(arena, addr, size, zeroed); } else { - arena_run_t *run = &runselm->run; - qr_remove(runselm, rd_link); + arena_chunk_map_misc_t *miscelm = + arena_rd_to_miscelm(rdelm); + arena_run_t *run = &miscelm->run; + qr_remove(rdelm, rd_link); arena_run_dalloc(arena, run, false, true); } } @@ -1292,7 +1299,7 @@ void arena_purge(arena_t *arena, bool all) { size_t npurge, npurgeable, npurged; - arena_chunk_map_misc_t purge_runs_sentinel; + arena_runs_dirty_link_t purge_runs_sentinel; extent_node_t purge_chunks_sentinel; /* From f5c8f37259d7697c3f850ac1e5ef63b724cf7689 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 10 Mar 2015 18:29:49 -0700 Subject: [PATCH 0669/3378] Normalize rdelm/rd structure field naming. --- include/jemalloc/internal/arena.h | 38 +++++++++++++++--------------- include/jemalloc/internal/extent.h | 8 +++---- src/arena.c | 8 +++---- 3 files changed, 27 insertions(+), 27 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index de298e55..9cbc591a 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -329,25 +329,25 @@ struct arena_s { * * LRU-----------------------------------------------------------MRU * - * /------------------\ - * | arena | - * | | - * | /------------\ | /-----------\ - * ...---->|chunks_cache|<---------------------->| chunk |<--... - * | \------------/ | | | - * | | | | - * | | /---\ /---\ | | - * | | |run| |run| | | - * | | | | | | | | - * | /----------\ | |---| |---| | /-----\ | - * ...----->|runs_dirty|<---->|rd |<---->|rd |<---->|rdelm|<-----... - * | \----------/ | |---| |---| | \-----/ | - * | | | | | | | | - * | | | | | | | | - * | | \---/ \---/ | | - * | | | | - * | | | | - * \------------------/ \-----------/ + * /-- arena ---\ + * | | + * | | + * |------------| /- chunk -\ + * ...->|chunks_cache|<--------------------------->| /----\ |<--... + * |------------| | |node| | + * | | | | | | + * | | /- run -\ /- run -\ | | | | + * | | | | | | | | | | + * | | | | | | | | | | + * |------------| |-------| |-------| | |----| | + * ...->|runs_dirty |<-->|rd |<-->|rd |<---->|rd |<----... + * |------------| |-------| |-------| | |----| | + * | | | | | | | | | | + * | | | | | | | \----/ | + * | | \-------/ \-------/ | | + * | | | | + * | | | | + * \------------/ \---------/ */ arena_runs_dirty_link_t runs_dirty; extent_node_t chunks_cache; diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h index 5dbc04a5..3751adc4 100644 --- a/include/jemalloc/internal/extent.h +++ b/include/jemalloc/internal/extent.h @@ -34,7 +34,7 @@ struct extent_node_s { prof_tctx_t *en_prof_tctx; /* Linkage for arena's runs_dirty and chunks_cache rings. */ - arena_runs_dirty_link_t rdelm; + arena_runs_dirty_link_t rd; qr(extent_node_t) cc_link; union { @@ -186,7 +186,7 @@ JEMALLOC_INLINE void extent_node_dirty_linkage_init(extent_node_t *node) { - qr_new(&node->rdelm, rd_link); + qr_new(&node->rd, rd_link); qr_new(node, cc_link); } @@ -195,7 +195,7 @@ extent_node_dirty_insert(extent_node_t *node, arena_runs_dirty_link_t *runs_dirty, extent_node_t *chunks_dirty) { - qr_meld(runs_dirty, &node->rdelm, rd_link); + qr_meld(runs_dirty, &node->rd, rd_link); qr_meld(chunks_dirty, node, cc_link); } @@ -203,7 +203,7 @@ JEMALLOC_INLINE void extent_node_dirty_remove(extent_node_t *node) { - qr_remove(&node->rdelm, rd_link); + qr_remove(&node->rd, rd_link); qr_remove(node, cc_link); } diff --git a/src/arena.c b/src/arena.c index 5d792f99..8af1a5df 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1064,7 +1064,7 @@ arena_dirty_count(arena_t *arena) rdelm != &arena->runs_dirty; rdelm = qr_next(rdelm, rd_link)) { size_t npages; - if (rdelm == &chunkselm->rdelm) { + if (rdelm == &chunkselm->rd) { npages = extent_node_size_get(chunkselm) >> LG_PAGE; chunkselm = qr_next(chunkselm, cc_link); } else { @@ -1122,7 +1122,7 @@ arena_stash_dirty(arena_t *arena, bool all, size_t npurge, size_t npages; rdelm_next = qr_next(rdelm, rd_link); - if (rdelm == &chunkselm->rdelm) { + if (rdelm == &chunkselm->rd) { extent_node_t *chunkselm_next; bool zero; UNUSED void *chunk; @@ -1205,7 +1205,7 @@ arena_purge_stashed(arena_t *arena, rdelm != purge_runs_sentinel; rdelm = qr_next(rdelm, rd_link)) { size_t npages; - if (rdelm == &chunkselm->rdelm) { + if (rdelm == &chunkselm->rd) { size_t size = extent_node_size_get(chunkselm); bool unzeroed; @@ -1275,7 +1275,7 @@ arena_unstash_purged(arena_t *arena, chunkselm = qr_next(purge_chunks_sentinel, cc_link); rdelm != purge_runs_sentinel; rdelm = rdelm_next) { rdelm_next = qr_next(rdelm, rd_link); - if (rdelm == &chunkselm->rdelm) { + if (rdelm == &chunkselm->rd) { extent_node_t *chunkselm_next = qr_next(chunkselm, cc_link); void *addr = extent_node_addr_get(chunkselm); From bc45d41d23bac598dbd38e5aac5a85b43d24bc04 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 11 Mar 2015 16:50:40 -0700 Subject: [PATCH 0670/3378] Fix a declaration-after-statement regression. --- src/arena.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/arena.c b/src/arena.c index 8af1a5df..e36cb502 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1215,11 +1215,10 @@ arena_purge_stashed(arena_t *arena, extent_node_zeroed_set(chunkselm, !unzeroed); chunkselm = qr_next(chunkselm, cc_link); } else { - arena_chunk_t *chunk; size_t pageind, run_size, flag_unzeroed, i; bool unzeroed; - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(rdelm); + arena_chunk_t *chunk = (arena_chunk_t + *)CHUNK_ADDR2BASE(rdelm); arena_chunk_map_misc_t *miscelm = arena_rd_to_miscelm(rdelm); pageind = arena_miscelm_to_pageind(miscelm); From fbd8d773ad0230ffba4e2c296dac3edcac9ca27e Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 11 Mar 2015 23:14:50 -0700 Subject: [PATCH 0671/3378] Fix unsigned comparison underflow. These bugs only affected tests and debug builds. --- include/jemalloc/internal/rtree.h | 2 +- src/rtree.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h index 2eb726d6..c1fb90c4 100644 --- a/include/jemalloc/internal/rtree.h +++ b/include/jemalloc/internal/rtree.h @@ -260,7 +260,7 @@ rtree_set(rtree_t *rtree, uintptr_t key, const extent_node_t *val) rtree_val_write(rtree, &node[subkey], val); return (false); } - assert(i < rtree->height - 1); + assert(i + 1 < rtree->height); child = rtree_child_read(rtree, &node[subkey], i); if (child == NULL) return (true); diff --git a/src/rtree.c b/src/rtree.c index 47d9084e..af0d97e7 100644 --- a/src/rtree.c +++ b/src/rtree.c @@ -63,7 +63,7 @@ static void rtree_delete_subtree(rtree_t *rtree, rtree_node_elm_t *node, unsigned level) { - if (level < rtree->height - 1) { + if (level + 1 < rtree->height) { size_t nchildren, i; nchildren = ZU(1) << rtree->levels[level].bits; From d69964bd2d31387f79a5f0494de8fd255b693afb Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 12 Mar 2015 16:25:18 -0700 Subject: [PATCH 0672/3378] Fix a heap profiling regression. Fix prof_tctx_comp() to incorporate tctx state into the comparison. During a dump it is possible for both a purgatory tctx and an otherwise equivalent nominal tctx to reside in the tree at the same time. This regression was introduced by 602c8e0971160e4b85b08b16cf8a2375aa24bc04 (Implement per thread heap profiling.). --- src/prof.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/prof.c b/src/prof.c index 4f1580b0..84fa5fda 100644 --- a/src/prof.c +++ b/src/prof.c @@ -137,8 +137,13 @@ prof_tctx_comp(const prof_tctx_t *a, const prof_tctx_t *b) { uint64_t a_uid = a->thr_uid; uint64_t b_uid = b->thr_uid; - - return ((a_uid > b_uid) - (a_uid < b_uid)); + int ret = (a_uid > b_uid) - (a_uid < b_uid); + if (ret == 0) { + prof_tctx_state_t a_state = a->state; + prof_tctx_state_t b_state = b->state; + ret = (a_state > b_state) - (a_state < b_state); + } + return (ret); } rb_gen(static UNUSED, tctx_tree_, prof_tctx_tree_t, prof_tctx_t, From f69e2f6fdab40c7612be5fd69960b8c7d40dba44 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Thu, 12 Mar 2015 08:51:05 +0900 Subject: [PATCH 0673/3378] Use the error code given to buferror on Windows a14bce85 made buferror not take an error code, and make the Windows code path for buferror use GetLastError, while the alternative code paths used errno. Then 2a83ed02 made buferror take an error code again, and while it changed the non-Windows code paths to use that error code, the Windows code path was not changed accordingly. --- src/util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/util.c b/src/util.c index a964d700..a6ef5d50 100644 --- a/src/util.c +++ b/src/util.c @@ -81,7 +81,7 @@ buferror(int err, char *buf, size_t buflen) { #ifdef _WIN32 - FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM, NULL, GetLastError(), 0, + FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM, NULL, err, 0, (LPSTR)buf, buflen, NULL); return (0); #elif defined(__GLIBC__) && defined(_GNU_SOURCE) From d6384b09e137874d7cdf527e5bb50abba0ae5f95 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Thu, 16 Oct 2014 17:02:18 -0400 Subject: [PATCH 0674/3378] use CLOCK_MONOTONIC in the timer if it's available Linux sets _POSIX_MONOTONIC_CLOCK to 0 meaning it *might* be available, so a sysconf check is necessary at runtime with a fallback to the mandatory CLOCK_REALTIME clock. --- test/include/test/timer.h | 10 ++++++++++ test/src/timer.c | 17 +++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/test/include/test/timer.h b/test/include/test/timer.h index 496072ac..9ffbaef5 100644 --- a/test/include/test/timer.h +++ b/test/include/test/timer.h @@ -1,10 +1,20 @@ /* Simple timer, for use in benchmark reporting. */ +#include #include +#define JEMALLOC_CLOCK_GETTIME defined(_POSIX_MONOTONIC_CLOCK) \ + && _POSIX_MONOTONIC_CLOCK >= 0 + typedef struct { +#if JEMALLOC_CLOCK_GETTIME + struct timespec tv0; + struct timespec tv1; + int clock_id; +#else struct timeval tv0; struct timeval tv1; +#endif } timedelta_t; void timer_start(timedelta_t *timer); diff --git a/test/src/timer.c b/test/src/timer.c index 36fbedd4..338a9eff 100644 --- a/test/src/timer.c +++ b/test/src/timer.c @@ -4,22 +4,39 @@ void timer_start(timedelta_t *timer) { +#if JEMALLOC_CLOCK_GETTIME + if (sysconf(_SC_MONOTONIC_CLOCK) <= 0) + timer->clock_id = CLOCK_REALTIME; + else + timer->clock_id = CLOCK_MONOTONIC; + clock_gettime(timer->clock_id, &timer->tv0); +#else gettimeofday(&timer->tv0, NULL); +#endif } void timer_stop(timedelta_t *timer) { +#if JEMALLOC_CLOCK_GETTIME + clock_gettime(timer->clock_id, &timer->tv1); +#else gettimeofday(&timer->tv1, NULL); +#endif } uint64_t timer_usec(const timedelta_t *timer) { +#if JEMALLOC_CLOCK_GETTIME + return (((timer->tv1.tv_sec - timer->tv0.tv_sec) * 1000000) + + (timer->tv1.tv_nsec - timer->tv0.tv_nsec) / 1000); +#else return (((timer->tv1.tv_sec - timer->tv0.tv_sec) * 1000000) + timer->tv1.tv_usec - timer->tv0.tv_usec); +#endif } void From 764b00023f2bc97f240c3a758ed23ce9c0ad8526 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 14 Mar 2015 14:01:35 -0700 Subject: [PATCH 0675/3378] Fix a heap profiling regression. Add the prof_tctx_state_destroying transitionary state to fix a race between a thread destroying a tctx and another thread creating a new equivalent tctx. This regression was introduced by 602c8e0971160e4b85b08b16cf8a2375aa24bc04 (Implement per thread heap profiling.). --- include/jemalloc/internal/prof.h | 1 + src/prof.c | 44 ++++++++++++++++++++++---------- 2 files changed, 32 insertions(+), 13 deletions(-) diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index f5082438..8967333a 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -81,6 +81,7 @@ struct prof_cnt_s { typedef enum { prof_tctx_state_initializing, prof_tctx_state_nominal, + prof_tctx_state_destroying, prof_tctx_state_dumping, prof_tctx_state_purgatory /* Dumper must finish destroying. */ } prof_tctx_state_t; diff --git a/src/prof.c b/src/prof.c index 84fa5fda..e86669c2 100644 --- a/src/prof.c +++ b/src/prof.c @@ -642,10 +642,13 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) ckh_remove(tsd, &tdata->bt2tctx, &gctx->bt, NULL, NULL); destroy_tdata = prof_tdata_should_destroy(tdata, false); + if (tctx->state == prof_tctx_state_nominal) + tctx->state = prof_tctx_state_destroying; malloc_mutex_unlock(tdata->lock); malloc_mutex_lock(gctx->lock); - if (tctx->state != prof_tctx_state_dumping) { + switch (tctx->state) { + case prof_tctx_state_destroying: tctx_tree_remove(&gctx->tctxs, tctx); destroy_tctx = true; if (prof_gctx_should_destroy(gctx)) { @@ -667,7 +670,8 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) destroy_gctx = true; } else destroy_gctx = false; - } else { + break; + case prof_tctx_state_dumping: /* * A dumping thread needs tctx to remain valid until dumping * has finished. Change state such that the dumping thread will @@ -676,6 +680,9 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) tctx->state = prof_tctx_state_purgatory; destroy_tctx = false; destroy_gctx = false; + break; + default: + not_reached(); } malloc_mutex_unlock(gctx->lock); if (destroy_gctx) { @@ -1021,21 +1028,30 @@ prof_tctx_merge_tdata(prof_tctx_t *tctx, prof_tdata_t *tdata) { malloc_mutex_lock(tctx->gctx->lock); - if (tctx->state == prof_tctx_state_initializing) { + + switch (tctx->state) { + case prof_tctx_state_initializing: + case prof_tctx_state_destroying: malloc_mutex_unlock(tctx->gctx->lock); return; - } - assert(tctx->state == prof_tctx_state_nominal); - tctx->state = prof_tctx_state_dumping; - malloc_mutex_unlock(tctx->gctx->lock); + case prof_tctx_state_nominal: + tctx->state = prof_tctx_state_dumping; + malloc_mutex_unlock(tctx->gctx->lock); - memcpy(&tctx->dump_cnts, &tctx->cnts, sizeof(prof_cnt_t)); + memcpy(&tctx->dump_cnts, &tctx->cnts, sizeof(prof_cnt_t)); - tdata->cnt_summed.curobjs += tctx->dump_cnts.curobjs; - tdata->cnt_summed.curbytes += tctx->dump_cnts.curbytes; - if (opt_prof_accum) { - tdata->cnt_summed.accumobjs += tctx->dump_cnts.accumobjs; - tdata->cnt_summed.accumbytes += tctx->dump_cnts.accumbytes; + tdata->cnt_summed.curobjs += tctx->dump_cnts.curobjs; + tdata->cnt_summed.curbytes += tctx->dump_cnts.curbytes; + if (opt_prof_accum) { + tdata->cnt_summed.accumobjs += + tctx->dump_cnts.accumobjs; + tdata->cnt_summed.accumbytes += + tctx->dump_cnts.accumbytes; + } + break; + case prof_tctx_state_dumping: + case prof_tctx_state_purgatory: + not_reached(); } } @@ -1059,6 +1075,7 @@ prof_tctx_merge_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) switch (tctx->state) { case prof_tctx_state_nominal: + case prof_tctx_state_destroying: /* New since dumping started; ignore. */ break; case prof_tctx_state_dumping: @@ -1094,6 +1111,7 @@ prof_tctx_finish_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) switch (tctx->state) { case prof_tctx_state_nominal: + case prof_tctx_state_destroying: /* New since dumping started; ignore. */ break; case prof_tctx_state_dumping: From 262146dfc4778f0671ab86458acd4ec531a80a34 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 14 Mar 2015 14:34:16 -0700 Subject: [PATCH 0676/3378] Eliminate innocuous compiler warnings. --- src/prof.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/prof.c b/src/prof.c index e86669c2..e9daa6f8 100644 --- a/src/prof.c +++ b/src/prof.c @@ -683,6 +683,8 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) break; default: not_reached(); + destroy_tctx = false; + destroy_gctx = false; } malloc_mutex_unlock(gctx->lock); if (destroy_gctx) { From 04211e226628c41da4b3804ba411b5dd4b3a02ab Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 16 Mar 2015 15:11:06 -0700 Subject: [PATCH 0677/3378] Fix heap profiling regressions. Remove the prof_tctx_state_destroying transitory state and instead add the tctx_uid field, so that the tuple uniquely identifies a tctx. This assures that tctx's are well ordered even when more than two with the same thr_uid coexist. A previous attempted fix based on prof_tctx_state_destroying was only sufficient for protecting against two coexisting tctx's, but it also introduced a new dumping race. These regressions were introduced by 602c8e0971160e4b85b08b16cf8a2375aa24bc04 (Implement per thread heap profiling.) and 764b00023f2bc97f240c3a758ed23ce9c0ad8526 (Fix a heap profiling regression.). --- include/jemalloc/internal/prof.h | 23 ++++++++++++++++++++++- src/prof.c | 21 +++++++++------------ 2 files changed, 31 insertions(+), 13 deletions(-) diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index 8967333a..2e227116 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -81,7 +81,6 @@ struct prof_cnt_s { typedef enum { prof_tctx_state_initializing, prof_tctx_state_nominal, - prof_tctx_state_destroying, prof_tctx_state_dumping, prof_tctx_state_purgatory /* Dumper must finish destroying. */ } prof_tctx_state_t; @@ -102,6 +101,21 @@ struct prof_tctx_s { /* Associated global context. */ prof_gctx_t *gctx; + /* + * UID that distinguishes multiple tctx's created by the same thread, + * but coexisting in gctx->tctxs. There are two ways that such + * coexistence can occur: + * - A dumper thread can cause a tctx to be retained in the purgatory + * state. + * - Although a single "producer" thread must create all tctx's which + * share the same thr_uid, multiple "consumers" can each concurrently + * execute portions of prof_tctx_destroy(). prof_tctx_destroy() only + * gets called once each time cnts.cur{objs,bytes} drop to 0, but this + * threshold can be hit again before the first consumer finishes + * executing prof_tctx_destroy(). + */ + uint64_t tctx_uid; + /* Linkage into gctx's tctxs. */ rb_node(prof_tctx_t) tctx_link; @@ -178,6 +192,13 @@ struct prof_tdata_s { rb_node(prof_tdata_t) tdata_link; + /* + * Counter used to initialize prof_tctx_t's tctx_uid. No locking is + * necessary when incrementing this field, because only one thread ever + * does so. + */ + uint64_t tctx_uid_next; + /* * Hash of (prof_bt_t *)-->(prof_tctx_t *). Each thread tracks * backtraces for which it has non-zero allocation/deallocation counters diff --git a/src/prof.c b/src/prof.c index e9daa6f8..f2a37253 100644 --- a/src/prof.c +++ b/src/prof.c @@ -135,13 +135,13 @@ static char *prof_thread_name_alloc(tsd_t *tsd, const char *thread_name); JEMALLOC_INLINE_C int prof_tctx_comp(const prof_tctx_t *a, const prof_tctx_t *b) { - uint64_t a_uid = a->thr_uid; - uint64_t b_uid = b->thr_uid; - int ret = (a_uid > b_uid) - (a_uid < b_uid); + uint64_t a_thr_uid = a->thr_uid; + uint64_t b_thr_uid = b->thr_uid; + int ret = (a_thr_uid > b_thr_uid) - (a_thr_uid < b_thr_uid); if (ret == 0) { - prof_tctx_state_t a_state = a->state; - prof_tctx_state_t b_state = b->state; - ret = (a_state > b_state) - (a_state < b_state); + uint64_t a_tctx_uid = a->tctx_uid; + uint64_t b_tctx_uid = b->tctx_uid; + ret = (a_tctx_uid > b_tctx_uid) - (a_tctx_uid < b_tctx_uid); } return (ret); } @@ -642,13 +642,11 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) ckh_remove(tsd, &tdata->bt2tctx, &gctx->bt, NULL, NULL); destroy_tdata = prof_tdata_should_destroy(tdata, false); - if (tctx->state == prof_tctx_state_nominal) - tctx->state = prof_tctx_state_destroying; malloc_mutex_unlock(tdata->lock); malloc_mutex_lock(gctx->lock); switch (tctx->state) { - case prof_tctx_state_destroying: + case prof_tctx_state_nominal: tctx_tree_remove(&gctx->tctxs, tctx); destroy_tctx = true; if (prof_gctx_should_destroy(gctx)) { @@ -795,6 +793,7 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) ret.p->thr_uid = tdata->thr_uid; memset(&ret.p->cnts, 0, sizeof(prof_cnt_t)); ret.p->gctx = gctx; + ret.p->tctx_uid = tdata->tctx_uid_next++; ret.p->prepared = true; ret.p->state = prof_tctx_state_initializing; malloc_mutex_lock(tdata->lock); @@ -1033,7 +1032,6 @@ prof_tctx_merge_tdata(prof_tctx_t *tctx, prof_tdata_t *tdata) switch (tctx->state) { case prof_tctx_state_initializing: - case prof_tctx_state_destroying: malloc_mutex_unlock(tctx->gctx->lock); return; case prof_tctx_state_nominal: @@ -1077,7 +1075,6 @@ prof_tctx_merge_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) switch (tctx->state) { case prof_tctx_state_nominal: - case prof_tctx_state_destroying: /* New since dumping started; ignore. */ break; case prof_tctx_state_dumping: @@ -1113,7 +1110,6 @@ prof_tctx_finish_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) switch (tctx->state) { case prof_tctx_state_nominal: - case prof_tctx_state_destroying: /* New since dumping started; ignore. */ break; case prof_tctx_state_dumping: @@ -1690,6 +1686,7 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim, tdata->thread_name = thread_name; tdata->attached = true; tdata->expired = false; + tdata->tctx_uid_next = 0; if (ckh_new(tsd, &tdata->bt2tctx, PROF_CKH_MINITEMS, prof_bt_hash, prof_bt_keycomp)) { From c9db461ffb608ad32aed0e34663ae58a992e1003 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Tue, 17 Mar 2015 12:09:30 +0900 Subject: [PATCH 0678/3378] Use InterlockedCompareExchange instead of non-existing InterlockedCompareExchange32 --- include/jemalloc/internal/atomic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h index 0d33065e..522dd2ae 100644 --- a/include/jemalloc/internal/atomic.h +++ b/include/jemalloc/internal/atomic.h @@ -457,7 +457,7 @@ atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s) { uint32_t o; - o = InterlockedCompareExchange32(p, s, c); + o = InterlockedCompareExchange(p, s, c); return (o != c); } From 8d6a3e8321a7767cb2ca0930b85d5d488a8cc659 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 18 Mar 2015 18:55:33 -0700 Subject: [PATCH 0679/3378] Implement dynamic per arena control over dirty page purging. Add mallctls: - arenas.lg_dirty_mult is initialized via opt.lg_dirty_mult, and can be modified to change the initial lg_dirty_mult setting for newly created arenas. - arena..lg_dirty_mult controls an individual arena's dirty page purging threshold, and synchronously triggers any purging that may be necessary to maintain the constraint. - arena..chunk.purge allows the per arena dirty page purging function to be replaced. This resolves #93. --- doc/jemalloc.xml.in | 88 +++++++++++-- include/jemalloc/internal/arena.h | 16 ++- include/jemalloc/internal/chunk.h | 6 + include/jemalloc/internal/private_symbols.txt | 7 + include/jemalloc/jemalloc_typedefs.h.in | 1 + src/arena.c | 87 +++++++++++-- src/chunk.c | 37 +++++- src/ctl.c | 121 ++++++++++++------ src/huge.c | 38 ++++-- src/stats.c | 10 ++ test/integration/chunk.c | 72 ++++++++--- test/unit/mallctl.c | 66 ++++++++++ test/unit/rtree.c | 10 +- 13 files changed, 460 insertions(+), 99 deletions(-) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 747e03f4..01ac38c3 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -937,7 +937,11 @@ for (i = 0; i < nbins; i++) { provides the kernel with sufficient information to recycle dirty pages if physical memory becomes scarce and the pages remain unused. The default minimum ratio is 8:1 (2^3:1); an option value of -1 will - disable dirty page purging. + disable dirty page purging. See arenas.lg_dirty_mult + and arena.<i>.lg_dirty_mult + for related dynamic control options. @@ -1151,7 +1155,7 @@ malloc_conf = "xmalloc:true";]]> opt.prof_active (bool) - rw + r- [] Profiling activated/deactivated. This is a secondary @@ -1489,6 +1493,20 @@ malloc_conf = "xmalloc:true";]]> settings. + + + arena.<i>.lg_dirty_mult + (ssize_t) + rw + + Current per-arena minimum ratio (log base 2) of active + to dirty pages for arena <i>. Each time this interface is set and + the ratio is increased, pages are synchronously purged as necessary to + impose the new ratio. See opt.lg_dirty_mult + for additional information. + + arena.<i>.chunk.alloc @@ -1544,12 +1562,12 @@ malloc_conf = "xmalloc:true";]]> allocation for arenas created via arenas.extend such that all chunks originate from an application-supplied chunk allocator - (by setting custom chunk allocation/deallocation functions just after - arena creation), but the automatically created arenas may have already - created chunks prior to the application having an opportunity to take - over chunk allocation. + (by setting custom chunk allocation/deallocation/purge functions just + after arena creation), but the automatically created arenas may have + already created chunks prior to the application having an opportunity to + take over chunk allocation. - typedef void (chunk_dalloc_t) + typedef bool (chunk_dalloc_t) void *chunk size_t size unsigned arena_ind @@ -1557,7 +1575,47 @@ malloc_conf = "xmalloc:true";]]> A chunk deallocation function conforms to the chunk_dalloc_t type and deallocates a chunk of given size on - behalf of arena arena_ind. + behalf of arena arena_ind, returning false upon + success. + + + + + arena.<i>.chunk.purge + (chunk_purge_t *) + rw + + Get or set the chunk purge function for arena <i>. + A chunk purge function optionally discards physical pages associated + with pages in the chunk's virtual memory range but leaves the virtual + memory mapping intact, and indicates via its return value whether pages + in the virtual memory range will be zero-filled the next time they are + accessed. If setting, the chunk purge function must be capable of + purging all extant chunks associated with arena <i>, usually by + passing unknown chunks to the purge function that was replaced. In + practice, it is feasible to control allocation for arenas created via + arenas.extend + such that all chunks originate from an application-supplied chunk + allocator (by setting custom chunk allocation/deallocation/purge + functions just after arena creation), but the automatically created + arenas may have already created chunks prior to the application having + an opportunity to take over chunk allocation. + + typedef bool (chunk_purge_t) + void *chunk + size_t offset + size_t length + unsigned arena_ind + + A chunk purge function conforms to the chunk_purge_t type + and purges pages within chunk at + offset bytes, extending for + length on behalf of arena + arena_ind, returning false if pages within the + purged virtual memory range will be zero-filled the next time they are + accessed. Note that the memory range being purged may span multiple + contiguous chunks, e.g. when purging memory that backed a huge + allocation. @@ -1581,6 +1639,20 @@ malloc_conf = "xmalloc:true";]]> initialized. + + + arenas.lg_dirty_mult + (ssize_t) + rw + + Current default per-arena minimum ratio (log base 2) of + active to dirty pages, used to initialize arena.<i>.lg_dirty_mult + during arena creation. See opt.lg_dirty_mult + for additional information. + + arenas.quantum diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 9cbc591a..56ee74aa 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -16,10 +16,10 @@ /* * The minimum ratio of active:dirty pages per arena is computed as: * - * (nactive >> opt_lg_dirty_mult) >= ndirty + * (nactive >> lg_dirty_mult) >= ndirty * - * So, supposing that opt_lg_dirty_mult is 3, there can be no less than 8 times - * as many active pages as dirty pages. + * So, supposing that lg_dirty_mult is 3, there can be no less than 8 times as + * many active pages as dirty pages. */ #define LG_DIRTY_MULT_DEFAULT 3 @@ -304,6 +304,9 @@ struct arena_s { */ arena_chunk_t *spare; + /* Minimum ratio (log base 2) of nactive:ndirty. */ + ssize_t lg_dirty_mult; + /* Number of pages in active runs and huge regions. */ size_t nactive; @@ -376,10 +379,11 @@ struct arena_s { malloc_mutex_t node_cache_mtx; /* - * User-configurable chunk allocation and deallocation functions. + * User-configurable chunk allocation/deallocation/purge functions. */ chunk_alloc_t *chunk_alloc; chunk_dalloc_t *chunk_dalloc; + chunk_purge_t *chunk_purge; /* bins is used to store trees of free regions. */ arena_bin_t bins[NBINS]; @@ -416,6 +420,8 @@ void arena_chunk_ralloc_huge_shrink(arena_t *arena, void *chunk, size_t oldsize, size_t usize); bool arena_chunk_ralloc_huge_expand(arena_t *arena, void *chunk, size_t oldsize, size_t usize, bool *zero); +ssize_t arena_lg_dirty_mult_get(arena_t *arena); +bool arena_lg_dirty_mult_set(arena_t *arena, ssize_t lg_dirty_mult); void arena_maybe_purge(arena_t *arena); void arena_purge_all(arena_t *arena); void arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, @@ -462,6 +468,8 @@ void *arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size, size_t extra, size_t alignment, bool zero, tcache_t *tcache); dss_prec_t arena_dss_prec_get(arena_t *arena); bool arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec); +ssize_t arena_lg_dirty_mult_default_get(void); +bool arena_lg_dirty_mult_default_set(ssize_t lg_dirty_mult); void arena_stats_merge(arena_t *arena, const char **dss, size_t *nactive, size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats, malloc_huge_stats_t *hstats); diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h index 1af5b24b..80938147 100644 --- a/include/jemalloc/internal/chunk.h +++ b/include/jemalloc/internal/chunk.h @@ -54,6 +54,12 @@ void chunk_dalloc_arena(arena_t *arena, void *chunk, size_t size, bool chunk_dalloc_default(void *chunk, size_t size, unsigned arena_ind); void chunk_dalloc_wrapper(arena_t *arena, chunk_dalloc_t *chunk_dalloc, void *chunk, size_t size); +bool chunk_purge_arena(arena_t *arena, void *chunk, size_t offset, + size_t length); +bool chunk_purge_default(void *chunk, size_t offset, size_t length, + unsigned arena_ind); +bool chunk_purge_wrapper(arena_t *arena, chunk_purge_t *chunk_purge, + void *chunk, size_t offset, size_t length); bool chunk_boot(void); void chunk_prefork(void); void chunk_postfork_parent(void); diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index d086db18..bc0f2a6a 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -30,6 +30,10 @@ arena_dalloc_small arena_dss_prec_get arena_dss_prec_set arena_init +arena_lg_dirty_mult_default_get +arena_lg_dirty_mult_default_set +arena_lg_dirty_mult_get +arena_lg_dirty_mult_set arena_malloc arena_malloc_large arena_malloc_small @@ -151,6 +155,9 @@ chunk_npages chunk_postfork_child chunk_postfork_parent chunk_prefork +chunk_purge_arena +chunk_purge_default +chunk_purge_wrapper chunk_record chunk_register chunks_rtree diff --git a/include/jemalloc/jemalloc_typedefs.h.in b/include/jemalloc/jemalloc_typedefs.h.in index 8092f1b1..d4b46908 100644 --- a/include/jemalloc/jemalloc_typedefs.h.in +++ b/include/jemalloc/jemalloc_typedefs.h.in @@ -1,2 +1,3 @@ typedef void *(chunk_alloc_t)(void *, size_t, size_t, bool *, unsigned); typedef bool (chunk_dalloc_t)(void *, size_t, unsigned); +typedef bool (chunk_purge_t)(void *, size_t, size_t, unsigned); diff --git a/src/arena.c b/src/arena.c index e36cb502..7272682d 100644 --- a/src/arena.c +++ b/src/arena.c @@ -5,6 +5,7 @@ /* Data. */ ssize_t opt_lg_dirty_mult = LG_DIRTY_MULT_DEFAULT; +static ssize_t lg_dirty_mult_default; arena_bin_info_t arena_bin_info[NBINS]; size_t map_bias; @@ -1032,15 +1033,49 @@ arena_run_alloc_small(arena_t *arena, size_t size, index_t binind) return (arena_run_alloc_small_helper(arena, size, binind)); } +static bool +arena_lg_dirty_mult_valid(ssize_t lg_dirty_mult) +{ + + return (lg_dirty_mult >= -1 && lg_dirty_mult < (sizeof(size_t) << 3)); +} + +ssize_t +arena_lg_dirty_mult_get(arena_t *arena) +{ + ssize_t lg_dirty_mult; + + malloc_mutex_lock(&arena->lock); + lg_dirty_mult = arena->lg_dirty_mult; + malloc_mutex_unlock(&arena->lock); + + return (lg_dirty_mult); +} + +bool +arena_lg_dirty_mult_set(arena_t *arena, ssize_t lg_dirty_mult) +{ + + if (!arena_lg_dirty_mult_valid(lg_dirty_mult)) + return (true); + + malloc_mutex_lock(&arena->lock); + arena->lg_dirty_mult = lg_dirty_mult; + arena_maybe_purge(arena); + malloc_mutex_unlock(&arena->lock); + + return (false); +} + void arena_maybe_purge(arena_t *arena) { size_t threshold; /* Don't purge if the option is disabled. */ - if (opt_lg_dirty_mult < 0) + if (arena->lg_dirty_mult < 0) return; - threshold = (arena->nactive >> opt_lg_dirty_mult); + threshold = (arena->nactive >> arena->lg_dirty_mult); threshold = threshold < chunk_npages ? chunk_npages : threshold; /* * Don't purge unless the number of purgeable pages exceeds the @@ -1096,7 +1131,7 @@ arena_compute_npurge(arena_t *arena, bool all) * purge. */ if (!all) { - size_t threshold = (arena->nactive >> opt_lg_dirty_mult); + size_t threshold = (arena->nactive >> arena->lg_dirty_mult); threshold = threshold < chunk_npages ? chunk_npages : threshold; npurge = arena->ndirty - threshold; @@ -1192,6 +1227,7 @@ arena_purge_stashed(arena_t *arena, extent_node_t *purge_chunks_sentinel) { size_t npurged, nmadvise; + chunk_purge_t *chunk_purge; arena_runs_dirty_link_t *rdelm; extent_node_t *chunkselm; @@ -1199,6 +1235,7 @@ arena_purge_stashed(arena_t *arena, nmadvise = 0; npurged = 0; + chunk_purge = arena->chunk_purge; malloc_mutex_unlock(&arena->lock); for (rdelm = qr_next(purge_runs_sentinel, rd_link), chunkselm = qr_next(purge_chunks_sentinel, cc_link); @@ -1207,11 +1244,16 @@ arena_purge_stashed(arena_t *arena, if (rdelm == &chunkselm->rd) { size_t size = extent_node_size_get(chunkselm); + void *addr, *chunk; + size_t offset; bool unzeroed; npages = size >> LG_PAGE; - unzeroed = pages_purge(extent_node_addr_get(chunkselm), - size); + addr = extent_node_addr_get(chunkselm); + chunk = CHUNK_ADDR2BASE(addr); + offset = CHUNK_ADDR2OFFSET(addr); + unzeroed = chunk_purge_wrapper(arena, chunk_purge, + chunk, offset, size); extent_node_zeroed_set(chunkselm, !unzeroed); chunkselm = qr_next(chunkselm, cc_link); } else { @@ -1226,15 +1268,15 @@ arena_purge_stashed(arena_t *arena, npages = run_size >> LG_PAGE; assert(pageind + npages <= chunk_npages); - unzeroed = pages_purge((void *)((uintptr_t)chunk + - (pageind << LG_PAGE)), run_size); + unzeroed = chunk_purge_wrapper(arena, chunk_purge, + chunk, pageind << LG_PAGE, run_size); flag_unzeroed = unzeroed ? CHUNK_MAP_UNZEROED : 0; /* * Set the unzeroed flag for all pages, now that - * pages_purge() has returned whether the pages were - * zeroed as a side effect of purging. This chunk map - * modification is safe even though the arena mutex + * chunk_purge_wrapper() has returned whether the pages + * were zeroed as a side effect of purging. This chunk + * map modification is safe even though the arena mutex * isn't currently owned by this thread, because the run * is marked as allocated, thus protecting it from being * modified by any other thread. As long as these @@ -1294,7 +1336,7 @@ arena_unstash_purged(arena_t *arena, } } -void +static void arena_purge(arena_t *arena, bool all) { size_t npurge, npurgeable, npurged; @@ -1309,7 +1351,7 @@ arena_purge(arena_t *arena, bool all) size_t ndirty = arena_dirty_count(arena); assert(ndirty == arena->ndirty); } - assert((arena->nactive >> opt_lg_dirty_mult) < arena->ndirty || all); + assert((arena->nactive >> arena->lg_dirty_mult) < arena->ndirty || all); if (config_stats) arena->stats.npurge++; @@ -2596,6 +2638,23 @@ arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec) return (false); } +ssize_t +arena_lg_dirty_mult_default_get(void) +{ + + return ((ssize_t)atomic_read_z((size_t *)&lg_dirty_mult_default)); +} + +bool +arena_lg_dirty_mult_default_set(ssize_t lg_dirty_mult) +{ + + if (!arena_lg_dirty_mult_valid(lg_dirty_mult)) + return (true); + atomic_write_z((size_t *)&lg_dirty_mult_default, (size_t)lg_dirty_mult); + return (false); +} + void arena_stats_merge(arena_t *arena, const char **dss, size_t *nactive, size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats, @@ -2702,6 +2761,7 @@ arena_new(unsigned ind) arena->spare = NULL; + arena->lg_dirty_mult = arena_lg_dirty_mult_default_get(); arena->nactive = 0; arena->ndirty = 0; @@ -2727,6 +2787,7 @@ arena_new(unsigned ind) arena->chunk_alloc = chunk_alloc_default; arena->chunk_dalloc = chunk_dalloc_default; + arena->chunk_purge = chunk_purge_default; /* Initialize bins. */ for (i = 0; i < NBINS; i++) { @@ -2860,6 +2921,8 @@ arena_boot(void) size_t header_size; unsigned i; + arena_lg_dirty_mult_default_set(opt_lg_dirty_mult); + /* * Compute the header size such that it is large enough to contain the * page map. The page map is biased to omit entries for the header diff --git a/src/chunk.c b/src/chunk.c index fb8cd413..70634107 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -391,8 +391,10 @@ chunk_record(arena_t *arena, extent_tree_t *chunks_szad, * pages have already been purged, so that this is only * a virtual memory leak. */ - if (cache) - pages_purge(chunk, size); + if (cache) { + chunk_purge_wrapper(arena, arena->chunk_purge, + chunk, 0, size); + } goto label_return; } extent_node_init(node, arena, chunk, size, !unzeroed); @@ -485,6 +487,37 @@ chunk_dalloc_wrapper(arena_t *arena, chunk_dalloc_t *chunk_dalloc, void *chunk, JEMALLOC_VALGRIND_MAKE_MEM_NOACCESS(chunk, size); } +bool +chunk_purge_arena(arena_t *arena, void *chunk, size_t offset, size_t length) +{ + + assert(chunk != NULL); + assert(CHUNK_ADDR2BASE(chunk) == chunk); + assert((offset & PAGE_MASK) == 0); + assert(length != 0); + assert((length & PAGE_MASK) == 0); + + return (pages_purge((void *)((uintptr_t)chunk + (uintptr_t)offset), + length)); +} + +bool +chunk_purge_default(void *chunk, size_t offset, size_t length, + unsigned arena_ind) +{ + + return (chunk_purge_arena(chunk_arena_get(arena_ind), chunk, offset, + length)); +} + +bool +chunk_purge_wrapper(arena_t *arena, chunk_purge_t *chunk_purge, void *chunk, + size_t offset, size_t length) +{ + + return (chunk_purge(chunk, offset, length, arena->ind)); +} + static rtree_node_elm_t * chunks_rtree_node_alloc(size_t nelms) { diff --git a/src/ctl.c b/src/ctl.c index cd7927fc..447b8776 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -116,8 +116,10 @@ CTL_PROTO(tcache_destroy) CTL_PROTO(arena_i_purge) static void arena_purge(unsigned arena_ind); CTL_PROTO(arena_i_dss) +CTL_PROTO(arena_i_lg_dirty_mult) CTL_PROTO(arena_i_chunk_alloc) CTL_PROTO(arena_i_chunk_dalloc) +CTL_PROTO(arena_i_chunk_purge) INDEX_PROTO(arena_i) CTL_PROTO(arenas_bin_i_size) CTL_PROTO(arenas_bin_i_nregs) @@ -129,6 +131,7 @@ CTL_PROTO(arenas_hchunk_i_size) INDEX_PROTO(arenas_hchunk_i) CTL_PROTO(arenas_narenas) CTL_PROTO(arenas_initialized) +CTL_PROTO(arenas_lg_dirty_mult) CTL_PROTO(arenas_quantum) CTL_PROTO(arenas_page) CTL_PROTO(arenas_tcache_max) @@ -283,12 +286,14 @@ static const ctl_named_node_t tcache_node[] = { static const ctl_named_node_t chunk_node[] = { {NAME("alloc"), CTL(arena_i_chunk_alloc)}, - {NAME("dalloc"), CTL(arena_i_chunk_dalloc)} + {NAME("dalloc"), CTL(arena_i_chunk_dalloc)}, + {NAME("purge"), CTL(arena_i_chunk_purge)} }; static const ctl_named_node_t arena_i_node[] = { {NAME("purge"), CTL(arena_i_purge)}, {NAME("dss"), CTL(arena_i_dss)}, + {NAME("lg_dirty_mult"), CTL(arena_i_lg_dirty_mult)}, {NAME("chunk"), CHILD(named, chunk)}, }; static const ctl_named_node_t super_arena_i_node[] = { @@ -337,6 +342,7 @@ static const ctl_indexed_node_t arenas_hchunk_node[] = { static const ctl_named_node_t arenas_node[] = { {NAME("narenas"), CTL(arenas_narenas)}, {NAME("initialized"), CTL(arenas_initialized)}, + {NAME("lg_dirty_mult"), CTL(arenas_lg_dirty_mult)}, {NAME("quantum"), CTL(arenas_quantum)}, {NAME("page"), CTL(arenas_page)}, {NAME("tcache_max"), CTL(arenas_tcache_max)}, @@ -1617,57 +1623,70 @@ label_return: } static int -arena_i_chunk_alloc_ctl(const size_t *mib, size_t miblen, void *oldp, +arena_i_lg_dirty_mult_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { int ret; unsigned arena_ind = mib[1]; arena_t *arena; - malloc_mutex_lock(&ctl_mtx); - if (arena_ind < narenas_total_get() && (arena = arena_get(tsd_fetch(), - arena_ind, false, true)) != NULL) { - malloc_mutex_lock(&arena->lock); - READ(arena->chunk_alloc, chunk_alloc_t *); - WRITE(arena->chunk_alloc, chunk_alloc_t *); - } else { + arena = arena_get(tsd_fetch(), arena_ind, false, (arena_ind == 0)); + if (arena == NULL) { ret = EFAULT; - goto label_outer_return; + goto label_return; } + + if (oldp != NULL && oldlenp != NULL) { + size_t oldval = arena_lg_dirty_mult_get(arena); + READ(oldval, ssize_t); + } + if (newp != NULL) { + if (newlen != sizeof(ssize_t)) { + ret = EINVAL; + goto label_return; + } + if (arena_lg_dirty_mult_set(arena, *(ssize_t *)newp)) { + ret = EFAULT; + goto label_return; + } + } + ret = 0; label_return: - malloc_mutex_unlock(&arena->lock); -label_outer_return: - malloc_mutex_unlock(&ctl_mtx); return (ret); } -static int -arena_i_chunk_dalloc_ctl(const size_t *mib, size_t miblen, void *oldp, - size_t *oldlenp, void *newp, size_t newlen) -{ - - int ret; - unsigned arena_ind = mib[1]; - arena_t *arena; - - malloc_mutex_lock(&ctl_mtx); - if (arena_ind < narenas_total_get() && (arena = arena_get(tsd_fetch(), - arena_ind, false, true)) != NULL) { - malloc_mutex_lock(&arena->lock); - READ(arena->chunk_dalloc, chunk_dalloc_t *); - WRITE(arena->chunk_dalloc, chunk_dalloc_t *); - } else { - ret = EFAULT; - goto label_outer_return; - } - ret = 0; -label_return: - malloc_mutex_unlock(&arena->lock); -label_outer_return: - malloc_mutex_unlock(&ctl_mtx); - return (ret); +#define CHUNK_FUNC(n) \ +static int \ +arena_i_chunk_##n##_ctl(const size_t *mib, size_t miblen, void *oldp, \ + size_t *oldlenp, void *newp, size_t newlen) \ +{ \ + \ + int ret; \ + unsigned arena_ind = mib[1]; \ + arena_t *arena; \ + \ + malloc_mutex_lock(&ctl_mtx); \ + if (arena_ind < narenas_total_get() && (arena = \ + arena_get(tsd_fetch(), arena_ind, false, true)) != NULL) { \ + malloc_mutex_lock(&arena->lock); \ + READ(arena->chunk_##n, chunk_##n##_t *); \ + WRITE(arena->chunk_##n, chunk_##n##_t *); \ + } else { \ + ret = EFAULT; \ + goto label_outer_return; \ + } \ + ret = 0; \ +label_return: \ + malloc_mutex_unlock(&arena->lock); \ +label_outer_return: \ + malloc_mutex_unlock(&ctl_mtx); \ + return (ret); \ } +CHUNK_FUNC(alloc) +CHUNK_FUNC(dalloc) +CHUNK_FUNC(purge) +#undef CHUNK_FUNC static const ctl_named_node_t * arena_i_index(const size_t *mib, size_t miblen, size_t i) @@ -1736,6 +1755,32 @@ label_return: return (ret); } +static int +arenas_lg_dirty_mult_ctl(const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) +{ + int ret; + + if (oldp != NULL && oldlenp != NULL) { + size_t oldval = arena_lg_dirty_mult_default_get(); + READ(oldval, ssize_t); + } + if (newp != NULL) { + if (newlen != sizeof(ssize_t)) { + ret = EINVAL; + goto label_return; + } + if (arena_lg_dirty_mult_default_set(*(ssize_t *)newp)) { + ret = EFAULT; + goto label_return; + } + } + + ret = 0; +label_return: + return (ret); +} + CTL_RO_NL_GEN(arenas_quantum, QUANTUM, size_t) CTL_RO_NL_GEN(arenas_page, PAGE, size_t) CTL_RO_NL_CGEN(config_tcache, arenas_tcache_max, tcache_maxclass, size_t) diff --git a/src/huge.c b/src/huge.c index 3092932e..aa26f5df 100644 --- a/src/huge.c +++ b/src/huge.c @@ -124,9 +124,10 @@ huge_ralloc_no_move_similar(void *ptr, size_t oldsize, size_t usize, size_t size, size_t extra, bool zero) { size_t usize_next; - bool zeroed; extent_node_t *node; arena_t *arena; + chunk_purge_t *chunk_purge; + bool zeroed; /* Increase usize to incorporate extra. */ while (usize < s2u(size+extra) && (usize_next = s2u(usize+1)) < oldsize) @@ -135,11 +136,18 @@ huge_ralloc_no_move_similar(void *ptr, size_t oldsize, size_t usize, if (oldsize == usize) return; + node = huge_node_get(ptr); + arena = extent_node_arena_get(node); + + malloc_mutex_lock(&arena->lock); + chunk_purge = arena->chunk_purge; + malloc_mutex_unlock(&arena->lock); + /* Fill if necessary (shrinking). */ if (oldsize > usize) { size_t sdiff = CHUNK_CEILING(usize) - usize; - zeroed = (sdiff != 0) ? !pages_purge((void *)((uintptr_t)ptr + - usize), sdiff) : true; + zeroed = (sdiff != 0) ? !chunk_purge_wrapper(arena, chunk_purge, + CHUNK_ADDR2BASE(ptr), CHUNK_ADDR2OFFSET(ptr), usize) : true; if (config_fill && unlikely(opt_junk_free)) { memset((void *)((uintptr_t)ptr + usize), 0x5a, oldsize - usize); @@ -148,8 +156,6 @@ huge_ralloc_no_move_similar(void *ptr, size_t oldsize, size_t usize, } else zeroed = true; - node = huge_node_get(ptr); - arena = extent_node_arena_get(node); malloc_mutex_lock(&arena->huge_mtx); /* Update the size of the huge allocation. */ assert(extent_node_size_get(node) != usize); @@ -177,22 +183,29 @@ huge_ralloc_no_move_similar(void *ptr, size_t oldsize, size_t usize, static void huge_ralloc_no_move_shrink(void *ptr, size_t oldsize, size_t usize) { - size_t sdiff; - bool zeroed; extent_node_t *node; arena_t *arena; + chunk_purge_t *chunk_purge; + size_t sdiff; + bool zeroed; + + node = huge_node_get(ptr); + arena = extent_node_arena_get(node); + + malloc_mutex_lock(&arena->lock); + chunk_purge = arena->chunk_purge; + malloc_mutex_unlock(&arena->lock); sdiff = CHUNK_CEILING(usize) - usize; - zeroed = (sdiff != 0) ? !pages_purge((void *)((uintptr_t)ptr + usize), - sdiff) : true; + zeroed = (sdiff != 0) ? !chunk_purge_wrapper(arena, chunk_purge, + CHUNK_ADDR2BASE((uintptr_t)ptr + usize), + CHUNK_ADDR2OFFSET((uintptr_t)ptr + usize), sdiff) : true; if (config_fill && unlikely(opt_junk_free)) { huge_dalloc_junk((void *)((uintptr_t)ptr + usize), oldsize - usize); zeroed = false; } - node = huge_node_get(ptr); - arena = extent_node_arena_get(node); malloc_mutex_lock(&arena->huge_mtx); /* Update the size of the huge allocation. */ extent_node_size_set(node, usize); @@ -291,8 +304,7 @@ huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, } /* Attempt to expand the allocation in-place. */ - if (huge_ralloc_no_move_expand(ptr, oldsize, size + extra, - zero)) { + if (huge_ralloc_no_move_expand(ptr, oldsize, size + extra, zero)) { if (extra == 0) return (true); diff --git a/src/stats.c b/src/stats.c index e0f71651..f246c8bc 100644 --- a/src/stats.c +++ b/src/stats.c @@ -264,6 +264,7 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, { unsigned nthreads; const char *dss; + ssize_t lg_dirty_mult; size_t page, pactive, pdirty, mapped; size_t metadata_mapped, metadata_allocated; uint64_t npurge, nmadvise, purged; @@ -282,6 +283,15 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, CTL_I_GET("stats.arenas.0.dss", &dss, const char *); malloc_cprintf(write_cb, cbopaque, "dss allocation precedence: %s\n", dss); + CTL_I_GET("stats.arenas.0.lg_dirty_mult", &lg_dirty_mult, ssize_t); + if (lg_dirty_mult >= 0) { + malloc_cprintf(write_cb, cbopaque, + "Min active:dirty page ratio: %u:1\n", + (1U << lg_dirty_mult)); + } else { + malloc_cprintf(write_cb, cbopaque, + "Min active:dirty page ratio: N/A\n"); + } CTL_I_GET("stats.arenas.0.pactive", &pactive, size_t); CTL_I_GET("stats.arenas.0.pdirty", &pdirty, size_t); CTL_I_GET("stats.arenas.0.npurge", &npurge, uint64_t); diff --git a/test/integration/chunk.c b/test/integration/chunk.c index 89938504..de45bc51 100644 --- a/test/integration/chunk.c +++ b/test/integration/chunk.c @@ -2,13 +2,8 @@ chunk_alloc_t *old_alloc; chunk_dalloc_t *old_dalloc; - -bool -chunk_dalloc(void *chunk, size_t size, unsigned arena_ind) -{ - - return (old_dalloc(chunk, size, arena_ind)); -} +chunk_purge_t *old_purge; +bool purged; void * chunk_alloc(void *new_addr, size_t size, size_t alignment, bool *zero, @@ -18,36 +13,79 @@ chunk_alloc(void *new_addr, size_t size, size_t alignment, bool *zero, return (old_alloc(new_addr, size, alignment, zero, arena_ind)); } +bool +chunk_dalloc(void *chunk, size_t size, unsigned arena_ind) +{ + + return (old_dalloc(chunk, size, arena_ind)); +} + +bool +chunk_purge(void *chunk, size_t offset, size_t length, unsigned arena_ind) +{ + + purged = true; + return (old_purge(chunk, offset, length, arena_ind)); +} + TEST_BEGIN(test_chunk) { void *p; chunk_alloc_t *new_alloc; chunk_dalloc_t *new_dalloc; - size_t old_size, new_size; + chunk_purge_t *new_purge; + size_t old_size, new_size, huge0, huge1, huge2, sz; new_alloc = chunk_alloc; new_dalloc = chunk_dalloc; + new_purge = chunk_purge; old_size = sizeof(chunk_alloc_t *); new_size = sizeof(chunk_alloc_t *); - assert_d_eq(mallctl("arena.0.chunk.alloc", &old_alloc, - &old_size, &new_alloc, new_size), 0, - "Unexpected alloc error"); - assert_ptr_ne(old_alloc, new_alloc, - "Unexpected alloc error"); + assert_d_eq(mallctl("arena.0.chunk.alloc", &old_alloc, &old_size, + &new_alloc, new_size), 0, "Unexpected alloc error"); + assert_ptr_ne(old_alloc, new_alloc, "Unexpected alloc error"); + assert_d_eq(mallctl("arena.0.chunk.dalloc", &old_dalloc, &old_size, &new_dalloc, new_size), 0, "Unexpected dalloc error"); assert_ptr_ne(old_dalloc, new_dalloc, "Unexpected dalloc error"); + assert_d_eq(mallctl("arena.0.chunk.purge", &old_purge, &old_size, + &new_purge, new_size), 0, "Unexpected purge error"); + assert_ptr_ne(old_purge, new_purge, "Unexpected purge error"); + + sz = sizeof(size_t); + assert_d_eq(mallctl("arenas.hchunk.0.size", &huge0, &sz, NULL, 0), 0, + "Unexpected arenas.hchunk.0.size failure"); + assert_d_eq(mallctl("arenas.hchunk.1.size", &huge1, &sz, NULL, 0), 0, + "Unexpected arenas.hchunk.1.size failure"); + assert_d_eq(mallctl("arenas.hchunk.2.size", &huge2, &sz, NULL, 0), 0, + "Unexpected arenas.hchunk.2.size failure"); + if (huge0 * 2 > huge2) { + /* + * There are at least four size classes per doubling, so + * xallocx() from size=huge2 to size=huge1 is guaranteed to + * leave trailing purgeable memory. + */ + p = mallocx(huge2, 0); + assert_ptr_not_null(p, "Unexpected mallocx() error"); + purged = false; + assert_zu_eq(xallocx(p, huge1, 0, 0), huge1, + "Unexpected xallocx() failure"); + assert_true(purged, "Unexpected purge"); + dallocx(p, 0); + } + p = mallocx(42, 0); - assert_ptr_ne(p, NULL, "Unexpected alloc error"); + assert_ptr_not_null(p, "Unexpected mallocx() error"); free(p); - assert_d_eq(mallctl("arena.0.chunk.alloc", NULL, - NULL, &old_alloc, old_size), 0, - "Unexpected alloc error"); + assert_d_eq(mallctl("arena.0.chunk.alloc", NULL, NULL, &old_alloc, + old_size), 0, "Unexpected alloc error"); assert_d_eq(mallctl("arena.0.chunk.dalloc", NULL, NULL, &old_dalloc, old_size), 0, "Unexpected dalloc error"); + assert_d_eq(mallctl("arena.0.chunk.purge", NULL, NULL, &old_purge, + old_size), 0, "Unexpected purge error"); } TEST_END diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c index 5960496f..31ada191 100644 --- a/test/unit/mallctl.c +++ b/test/unit/mallctl.c @@ -348,6 +348,38 @@ TEST_BEGIN(test_thread_arena) } TEST_END +TEST_BEGIN(test_arena_i_lg_dirty_mult) +{ + ssize_t lg_dirty_mult, orig_lg_dirty_mult, prev_lg_dirty_mult; + size_t sz = sizeof(ssize_t); + + assert_d_eq(mallctl("arena.0.lg_dirty_mult", &orig_lg_dirty_mult, &sz, + NULL, 0), 0, "Unexpected mallctl() failure"); + + lg_dirty_mult = -2; + assert_d_eq(mallctl("arena.0.lg_dirty_mult", NULL, NULL, + &lg_dirty_mult, sizeof(ssize_t)), EFAULT, + "Unexpected mallctl() success"); + + lg_dirty_mult = (sizeof(size_t) << 3); + assert_d_eq(mallctl("arena.0.lg_dirty_mult", NULL, NULL, + &lg_dirty_mult, sizeof(ssize_t)), EFAULT, + "Unexpected mallctl() success"); + + for (prev_lg_dirty_mult = orig_lg_dirty_mult, lg_dirty_mult = -1; + lg_dirty_mult < (sizeof(ssize_t) << 3); prev_lg_dirty_mult = + lg_dirty_mult, lg_dirty_mult++) { + ssize_t old_lg_dirty_mult; + + assert_d_eq(mallctl("arena.0.lg_dirty_mult", &old_lg_dirty_mult, + &sz, &lg_dirty_mult, sizeof(ssize_t)), 0, + "Unexpected mallctl() failure"); + assert_zd_eq(old_lg_dirty_mult, prev_lg_dirty_mult, + "Unexpected old arena.0.lg_dirty_mult"); + } +} +TEST_END + TEST_BEGIN(test_arena_i_purge) { unsigned narenas; @@ -427,6 +459,38 @@ TEST_BEGIN(test_arenas_initialized) } TEST_END +TEST_BEGIN(test_arenas_lg_dirty_mult) +{ + ssize_t lg_dirty_mult, orig_lg_dirty_mult, prev_lg_dirty_mult; + size_t sz = sizeof(ssize_t); + + assert_d_eq(mallctl("arenas.lg_dirty_mult", &orig_lg_dirty_mult, &sz, + NULL, 0), 0, "Unexpected mallctl() failure"); + + lg_dirty_mult = -2; + assert_d_eq(mallctl("arenas.lg_dirty_mult", NULL, NULL, + &lg_dirty_mult, sizeof(ssize_t)), EFAULT, + "Unexpected mallctl() success"); + + lg_dirty_mult = (sizeof(size_t) << 3); + assert_d_eq(mallctl("arenas.lg_dirty_mult", NULL, NULL, + &lg_dirty_mult, sizeof(ssize_t)), EFAULT, + "Unexpected mallctl() success"); + + for (prev_lg_dirty_mult = orig_lg_dirty_mult, lg_dirty_mult = -1; + lg_dirty_mult < (sizeof(ssize_t) << 3); prev_lg_dirty_mult = + lg_dirty_mult, lg_dirty_mult++) { + ssize_t old_lg_dirty_mult; + + assert_d_eq(mallctl("arenas.lg_dirty_mult", &old_lg_dirty_mult, + &sz, &lg_dirty_mult, sizeof(ssize_t)), 0, + "Unexpected mallctl() failure"); + assert_zd_eq(old_lg_dirty_mult, prev_lg_dirty_mult, + "Unexpected old arenas.lg_dirty_mult"); + } +} +TEST_END + TEST_BEGIN(test_arenas_constants) { @@ -554,9 +618,11 @@ main(void) test_tcache_none, test_tcache, test_thread_arena, + test_arena_i_lg_dirty_mult, test_arena_i_purge, test_arena_i_dss, test_arenas_initialized, + test_arenas_lg_dirty_mult, test_arenas_constants, test_arenas_bin_constants, test_arenas_lrun_constants, diff --git a/test/unit/rtree.c b/test/unit/rtree.c index 556c4a87..496e03a4 100644 --- a/test/unit/rtree.c +++ b/test/unit/rtree.c @@ -22,7 +22,7 @@ TEST_BEGIN(test_rtree_get_empty) rtree_t rtree; assert_false(rtree_new(&rtree, i, node_alloc, node_dalloc), "Unexpected rtree_new() failure"); - assert_ptr_eq(rtree_get(&rtree, 0), NULL, + assert_ptr_null(rtree_get(&rtree, 0), "rtree_get() should return NULL for empty tree"); rtree_delete(&rtree); } @@ -75,8 +75,8 @@ TEST_BEGIN(test_rtree_bits) "get key=%#"PRIxPTR, i, j, k, keys[j], keys[k]); } - assert_ptr_eq(rtree_get(&rtree, - (((uintptr_t)1) << (sizeof(uintptr_t)*8-i))), NULL, + assert_ptr_null(rtree_get(&rtree, + (((uintptr_t)1) << (sizeof(uintptr_t)*8-i))), "Only leftmost rtree leaf should be set; " "i=%u, j=%u", i, j); rtree_set(&rtree, keys[j], NULL); @@ -117,11 +117,11 @@ TEST_BEGIN(test_rtree_random) for (j = 0; j < NSET; j++) { rtree_set(&rtree, keys[j], NULL); - assert_ptr_eq(rtree_get(&rtree, keys[j]), NULL, + assert_ptr_null(rtree_get(&rtree, keys[j]), "rtree_get() should return previously set value"); } for (j = 0; j < NSET; j++) { - assert_ptr_eq(rtree_get(&rtree, keys[j]), NULL, + assert_ptr_null(rtree_get(&rtree, keys[j]), "rtree_get() should return previously set value"); } From e0a08a14962c8d6b09fd25ba9f3f6c57d5a4f844 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 18 Mar 2015 21:06:58 -0700 Subject: [PATCH 0680/3378] Restore --enable-ivsalloc. However, unlike before it was removed do not force --enable-ivsalloc when Darwin zone allocator integration is enabled, since the zone allocator code uses ivsalloc() regardless of whether malloc_usable_size() and sallocx() do. This resolves #211. --- ChangeLog | 2 -- INSTALL | 6 +++++ configure.ac | 23 ++++++++++++++++++- .../jemalloc/internal/jemalloc_internal.h.in | 7 ++++++ .../internal/jemalloc_internal_defs.h.in | 6 +++++ src/jemalloc.c | 4 ++-- 6 files changed, 43 insertions(+), 5 deletions(-) diff --git a/ChangeLog b/ChangeLog index ef7dbfdb..a462d025 100644 --- a/ChangeLog +++ b/ChangeLog @@ -133,8 +133,6 @@ found in the git revision history: - Remove the "stats.huge.allocated", "stats.huge.nmalloc", and "stats.huge.ndalloc" mallctls. - Remove the --enable-mremap option. - - Remove the --enable-ivsalloc option, and merge its functionality into - --enable-debug. - Remove the "stats.chunks.current", "stats.chunks.total", and "stats.chunks.high" mallctls. diff --git a/INSTALL b/INSTALL index 517fe021..cd760ca2 100644 --- a/INSTALL +++ b/INSTALL @@ -92,6 +92,7 @@ any of the following arguments (not a definitive list) to 'configure': --enable-debug Enable assertions and validation code. This incurs a substantial performance hit, but is very useful during application development. + Implies --enable-ivsalloc. --enable-code-coverage Enable code coverage support, for use during jemalloc test development. @@ -110,6 +111,11 @@ any of the following arguments (not a definitive list) to 'configure': Disable statistics gathering functionality. See the "opt.stats_print" option documentation for usage details. +--enable-ivsalloc + Enable validation code, which verifies that pointers reside within + jemalloc-owned chunks before dereferencing them. This incurs a minor + performance hit. + --enable-prof Enable heap profiling and leak detection functionality. See the "opt.prof" option documentation for usage details. When enabled, there are several diff --git a/configure.ac b/configure.ac index 4ac7ac82..be49743d 100644 --- a/configure.ac +++ b/configure.ac @@ -625,7 +625,8 @@ fi dnl Do not compile with debugging by default. AC_ARG_ENABLE([debug], - [AS_HELP_STRING([--enable-debug], [Build debugging code])], + [AS_HELP_STRING([--enable-debug], + [Build debugging code (implies --enable-ivsalloc)])], [if test "x$enable_debug" = "xno" ; then enable_debug="0" else @@ -637,8 +638,28 @@ fi if test "x$enable_debug" = "x1" ; then AC_DEFINE([JEMALLOC_DEBUG], [ ]) fi +if test "x$enable_debug" = "x1" ; then + AC_DEFINE([JEMALLOC_DEBUG], [ ]) + enable_ivsalloc="1" +fi AC_SUBST([enable_debug]) +dnl Do not validate pointers by default. +AC_ARG_ENABLE([ivsalloc], + [AS_HELP_STRING([--enable-ivsalloc], + [Validate pointers passed through the public API])], +[if test "x$enable_ivsalloc" = "xno" ; then + enable_ivsalloc="0" +else + enable_ivsalloc="1" +fi +], +[enable_ivsalloc="0"] +) +if test "x$enable_ivsalloc" = "x1" ; then + AC_DEFINE([JEMALLOC_IVSALLOC], [ ]) +fi + dnl Only optimize if not debugging. if test "x$enable_debug" = "x0" -a "x$no_CFLAGS" = "xyes" ; then dnl Make sure that an optimization flag was not specified in EXTRA_CFLAGS. diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 8ed69ce2..b398f31e 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -119,6 +119,13 @@ static const bool config_xmalloc = false #endif ; +static const bool config_ivsalloc = +#ifdef JEMALLOC_IVSALLOC + true +#else + false +#endif + ; #ifdef JEMALLOC_C11ATOMICS #include diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in index 191abc52..a943d23c 100644 --- a/include/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in @@ -186,6 +186,12 @@ #undef JEMALLOC_INTERNAL_FFSL #undef JEMALLOC_INTERNAL_FFS +/* + * JEMALLOC_IVSALLOC enables ivsalloc(), which verifies that pointers reside + * within jemalloc-owned chunks before dereferencing them. + */ +#undef JEMALLOC_IVSALLOC + /* * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings. */ diff --git a/src/jemalloc.c b/src/jemalloc.c index d5110092..7e9f4860 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -2306,7 +2306,7 @@ je_sallocx(const void *ptr, int flags) assert(malloc_initialized() || IS_INITIALIZER); malloc_thread_init(); - if (config_debug) + if (config_ivsalloc) usize = ivsalloc(ptr, config_prof); else usize = isalloc(ptr, config_prof); @@ -2434,7 +2434,7 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) assert(malloc_initialized() || IS_INITIALIZER); malloc_thread_init(); - if (config_debug) + if (config_ivsalloc) ret = ivsalloc(ptr, config_prof); else ret = (ptr == NULL) ? 0 : isalloc(ptr, config_prof); From 7e336e7359ec50f06ec73f29033c7807148bf476 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 20 Mar 2015 18:08:10 -0700 Subject: [PATCH 0681/3378] Fix lg_dirty_mult-related stats printing. This regression was introduced by 8d6a3e8321a7767cb2ca0930b85d5d488a8cc659 (Implement dynamic per arena control over dirty page purging.). This resolves #215. --- src/stats.c | 148 +++++++++++++++++++++++++++++----------------------- 1 file changed, 82 insertions(+), 66 deletions(-) diff --git a/src/stats.c b/src/stats.c index f246c8bc..ae74737c 100644 --- a/src/stats.c +++ b/src/stats.c @@ -6,31 +6,31 @@ xmallctl(n, v, &sz, NULL, 0); \ } while (0) -#define CTL_I_GET(n, v, t) do { \ +#define CTL_M1_GET(n, i, v, t) do { \ size_t mib[6]; \ size_t miblen = sizeof(mib) / sizeof(size_t); \ size_t sz = sizeof(t); \ xmallctlnametomib(n, mib, &miblen); \ - mib[2] = i; \ + mib[1] = (i); \ xmallctlbymib(mib, miblen, v, &sz, NULL, 0); \ } while (0) -#define CTL_J_GET(n, v, t) do { \ +#define CTL_M2_GET(n, i, v, t) do { \ size_t mib[6]; \ size_t miblen = sizeof(mib) / sizeof(size_t); \ size_t sz = sizeof(t); \ xmallctlnametomib(n, mib, &miblen); \ - mib[2] = j; \ + mib[2] = (i); \ xmallctlbymib(mib, miblen, v, &sz, NULL, 0); \ } while (0) -#define CTL_IJ_GET(n, v, t) do { \ +#define CTL_M2_M4_GET(n, i, j, v, t) do { \ size_t mib[6]; \ size_t miblen = sizeof(mib) / sizeof(size_t); \ size_t sz = sizeof(t); \ xmallctlnametomib(n, mib, &miblen); \ - mib[2] = i; \ - mib[4] = j; \ + mib[2] = (i); \ + mib[4] = (j); \ xmallctlbymib(mib, miblen, v, &sz, NULL, 0); \ } while (0) @@ -82,7 +82,8 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, for (j = 0, in_gap = false; j < nbins; j++) { uint64_t nruns; - CTL_IJ_GET("stats.arenas.0.bins.0.nruns", &nruns, uint64_t); + CTL_M2_M4_GET("stats.arenas.0.bins.0.nruns", i, j, &nruns, + uint64_t); if (nruns == 0) in_gap = true; else { @@ -98,27 +99,28 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, " ---\n"); in_gap = false; } - CTL_J_GET("arenas.bin.0.size", ®_size, size_t); - CTL_J_GET("arenas.bin.0.nregs", &nregs, uint32_t); - CTL_J_GET("arenas.bin.0.run_size", &run_size, size_t); - CTL_IJ_GET("stats.arenas.0.bins.0.nmalloc", + CTL_M2_GET("arenas.bin.0.size", j, ®_size, size_t); + CTL_M2_GET("arenas.bin.0.nregs", j, &nregs, uint32_t); + CTL_M2_GET("arenas.bin.0.run_size", j, &run_size, + size_t); + CTL_M2_M4_GET("stats.arenas.0.bins.0.nmalloc", i, j, &nmalloc, uint64_t); - CTL_IJ_GET("stats.arenas.0.bins.0.ndalloc", + CTL_M2_M4_GET("stats.arenas.0.bins.0.ndalloc", i, j, &ndalloc, uint64_t); - CTL_IJ_GET("stats.arenas.0.bins.0.curregs", + CTL_M2_M4_GET("stats.arenas.0.bins.0.curregs", i, j, &curregs, size_t); - CTL_IJ_GET("stats.arenas.0.bins.0.nrequests", + CTL_M2_M4_GET("stats.arenas.0.bins.0.nrequests", i, j, &nrequests, uint64_t); if (config_tcache) { - CTL_IJ_GET("stats.arenas.0.bins.0.nfills", - &nfills, uint64_t); - CTL_IJ_GET("stats.arenas.0.bins.0.nflushes", - &nflushes, uint64_t); + CTL_M2_M4_GET("stats.arenas.0.bins.0.nfills", i, + j, &nfills, uint64_t); + CTL_M2_M4_GET("stats.arenas.0.bins.0.nflushes", + i, j, &nflushes, uint64_t); } - CTL_IJ_GET("stats.arenas.0.bins.0.nreruns", &reruns, - uint64_t); - CTL_IJ_GET("stats.arenas.0.bins.0.curruns", &curruns, - size_t); + CTL_M2_M4_GET("stats.arenas.0.bins.0.nreruns", i, j, + &reruns, uint64_t); + CTL_M2_M4_GET("stats.arenas.0.bins.0.curruns", i, j, + &curruns, size_t); availregs = nregs * curruns; milli = (availregs != 0) ? (1000 * curregs) / availregs @@ -179,18 +181,18 @@ stats_arena_lruns_print(void (*write_cb)(void *, const char *), void *cbopaque, uint64_t nmalloc, ndalloc, nrequests; size_t run_size, curruns; - CTL_IJ_GET("stats.arenas.0.lruns.0.nmalloc", &nmalloc, + CTL_M2_M4_GET("stats.arenas.0.lruns.0.nmalloc", i, j, &nmalloc, uint64_t); - CTL_IJ_GET("stats.arenas.0.lruns.0.ndalloc", &ndalloc, - uint64_t); - CTL_IJ_GET("stats.arenas.0.lruns.0.nrequests", &nrequests, + CTL_M2_M4_GET("stats.arenas.0.lruns.0.ndalloc", i, j, &ndalloc, uint64_t); + CTL_M2_M4_GET("stats.arenas.0.lruns.0.nrequests", i, j, + &nrequests, uint64_t); if (nrequests == 0) in_gap = true; else { - CTL_J_GET("arenas.lrun.0.size", &run_size, size_t); - CTL_IJ_GET("stats.arenas.0.lruns.0.curruns", &curruns, - size_t); + CTL_M2_GET("arenas.lrun.0.size", j, &run_size, size_t); + CTL_M2_M4_GET("stats.arenas.0.lruns.0.curruns", i, j, + &curruns, size_t); if (in_gap) { malloc_cprintf(write_cb, cbopaque, " ---\n"); @@ -226,19 +228,19 @@ stats_arena_hchunks_print(void (*write_cb)(void *, const char *), uint64_t nmalloc, ndalloc, nrequests; size_t hchunk_size, curhchunks; - CTL_IJ_GET("stats.arenas.0.hchunks.0.nmalloc", &nmalloc, - uint64_t); - CTL_IJ_GET("stats.arenas.0.hchunks.0.ndalloc", &ndalloc, - uint64_t); - CTL_IJ_GET("stats.arenas.0.hchunks.0.nrequests", &nrequests, - uint64_t); + CTL_M2_M4_GET("stats.arenas.0.hchunks.0.nmalloc", i, j, + &nmalloc, uint64_t); + CTL_M2_M4_GET("stats.arenas.0.hchunks.0.ndalloc", i, j, + &ndalloc, uint64_t); + CTL_M2_M4_GET("stats.arenas.0.hchunks.0.nrequests", i, j, + &nrequests, uint64_t); if (nrequests == 0) in_gap = true; else { - CTL_J_GET("arenas.hchunk.0.size", &hchunk_size, + CTL_M2_GET("arenas.hchunk.0.size", j, &hchunk_size, size_t); - CTL_IJ_GET("stats.arenas.0.hchunks.0.curhchunks", - &curhchunks, size_t); + CTL_M2_M4_GET("stats.arenas.0.hchunks.0.curhchunks", i, + j, &curhchunks, size_t); if (in_gap) { malloc_cprintf(write_cb, cbopaque, " ---\n"); @@ -277,26 +279,26 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, CTL_GET("arenas.page", &page, size_t); - CTL_I_GET("stats.arenas.0.nthreads", &nthreads, unsigned); + CTL_M2_GET("stats.arenas.0.nthreads", i, &nthreads, unsigned); malloc_cprintf(write_cb, cbopaque, "assigned threads: %u\n", nthreads); - CTL_I_GET("stats.arenas.0.dss", &dss, const char *); + CTL_M2_GET("stats.arenas.0.dss", i, &dss, const char *); malloc_cprintf(write_cb, cbopaque, "dss allocation precedence: %s\n", dss); - CTL_I_GET("stats.arenas.0.lg_dirty_mult", &lg_dirty_mult, ssize_t); + CTL_M1_GET("arena.0.lg_dirty_mult", i, &lg_dirty_mult, ssize_t); if (lg_dirty_mult >= 0) { malloc_cprintf(write_cb, cbopaque, - "Min active:dirty page ratio: %u:1\n", + "min active:dirty page ratio: %u:1\n", (1U << lg_dirty_mult)); } else { malloc_cprintf(write_cb, cbopaque, - "Min active:dirty page ratio: N/A\n"); + "min active:dirty page ratio: N/A\n"); } - CTL_I_GET("stats.arenas.0.pactive", &pactive, size_t); - CTL_I_GET("stats.arenas.0.pdirty", &pdirty, size_t); - CTL_I_GET("stats.arenas.0.npurge", &npurge, uint64_t); - CTL_I_GET("stats.arenas.0.nmadvise", &nmadvise, uint64_t); - CTL_I_GET("stats.arenas.0.purged", &purged, uint64_t); + CTL_M2_GET("stats.arenas.0.pactive", i, &pactive, size_t); + CTL_M2_GET("stats.arenas.0.pdirty", i, &pdirty, size_t); + CTL_M2_GET("stats.arenas.0.npurge", i, &npurge, uint64_t); + CTL_M2_GET("stats.arenas.0.nmadvise", i, &nmadvise, uint64_t); + CTL_M2_GET("stats.arenas.0.purged", i, &purged, uint64_t); malloc_cprintf(write_cb, cbopaque, "dirty pages: %zu:%zu active:dirty, %"PRIu64" sweep%s," " %"PRIu64" madvise%s, %"PRIu64" purged\n", @@ -306,26 +308,31 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, malloc_cprintf(write_cb, cbopaque, " allocated nmalloc ndalloc" " nrequests\n"); - CTL_I_GET("stats.arenas.0.small.allocated", &small_allocated, size_t); - CTL_I_GET("stats.arenas.0.small.nmalloc", &small_nmalloc, uint64_t); - CTL_I_GET("stats.arenas.0.small.ndalloc", &small_ndalloc, uint64_t); - CTL_I_GET("stats.arenas.0.small.nrequests", &small_nrequests, uint64_t); + CTL_M2_GET("stats.arenas.0.small.allocated", i, &small_allocated, + size_t); + CTL_M2_GET("stats.arenas.0.small.nmalloc", i, &small_nmalloc, uint64_t); + CTL_M2_GET("stats.arenas.0.small.ndalloc", i, &small_ndalloc, uint64_t); + CTL_M2_GET("stats.arenas.0.small.nrequests", i, &small_nrequests, + uint64_t); malloc_cprintf(write_cb, cbopaque, "small: %12zu %12"PRIu64" %12"PRIu64" %12"PRIu64 "\n", small_allocated, small_nmalloc, small_ndalloc, small_nrequests); - CTL_I_GET("stats.arenas.0.large.allocated", &large_allocated, size_t); - CTL_I_GET("stats.arenas.0.large.nmalloc", &large_nmalloc, uint64_t); - CTL_I_GET("stats.arenas.0.large.ndalloc", &large_ndalloc, uint64_t); - CTL_I_GET("stats.arenas.0.large.nrequests", &large_nrequests, uint64_t); + CTL_M2_GET("stats.arenas.0.large.allocated", i, &large_allocated, + size_t); + CTL_M2_GET("stats.arenas.0.large.nmalloc", i, &large_nmalloc, uint64_t); + CTL_M2_GET("stats.arenas.0.large.ndalloc", i, &large_ndalloc, uint64_t); + CTL_M2_GET("stats.arenas.0.large.nrequests", i, &large_nrequests, + uint64_t); malloc_cprintf(write_cb, cbopaque, "large: %12zu %12"PRIu64" %12"PRIu64" %12"PRIu64 "\n", large_allocated, large_nmalloc, large_ndalloc, large_nrequests); - CTL_I_GET("stats.arenas.0.huge.allocated", &huge_allocated, size_t); - CTL_I_GET("stats.arenas.0.huge.nmalloc", &huge_nmalloc, uint64_t); - CTL_I_GET("stats.arenas.0.huge.ndalloc", &huge_ndalloc, uint64_t); - CTL_I_GET("stats.arenas.0.huge.nrequests", &huge_nrequests, uint64_t); + CTL_M2_GET("stats.arenas.0.huge.allocated", i, &huge_allocated, size_t); + CTL_M2_GET("stats.arenas.0.huge.nmalloc", i, &huge_nmalloc, uint64_t); + CTL_M2_GET("stats.arenas.0.huge.ndalloc", i, &huge_ndalloc, uint64_t); + CTL_M2_GET("stats.arenas.0.huge.nrequests", i, &huge_nrequests, + uint64_t); malloc_cprintf(write_cb, cbopaque, "huge: %12zu %12"PRIu64" %12"PRIu64" %12"PRIu64 "\n", @@ -339,11 +346,12 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, small_nrequests + large_nrequests + huge_nrequests); malloc_cprintf(write_cb, cbopaque, "active: %12zu\n", pactive * page); - CTL_I_GET("stats.arenas.0.mapped", &mapped, size_t); + CTL_M2_GET("stats.arenas.0.mapped", i, &mapped, size_t); malloc_cprintf(write_cb, cbopaque, "mapped: %12zu\n", mapped); - CTL_I_GET("stats.arenas.0.metadata.mapped", &metadata_mapped, size_t); - CTL_I_GET("stats.arenas.0.metadata.allocated", &metadata_allocated, + CTL_M2_GET("stats.arenas.0.metadata.mapped", i, &metadata_mapped, + size_t); + CTL_M2_GET("stats.arenas.0.metadata.allocated", i, &metadata_allocated, size_t); malloc_cprintf(write_cb, cbopaque, "metadata: mapped: %zu, allocated: %zu\n", metadata_mapped, @@ -464,6 +472,14 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, malloc_cprintf(write_cb, cbopaque, \ " opt."#n": %zd\n", ssv); \ } +#define OPT_WRITE_SSIZE_T_MUTABLE(n, m) \ + ssize_t ssv2; \ + if (je_mallctl("opt."#n, &ssv, &sssz, NULL, 0) == 0 && \ + je_mallctl(#m, &ssv2, &sssz, NULL, 0) == 0) { \ + malloc_cprintf(write_cb, cbopaque, \ + " opt."#n": %zd ("#m": %zd)\n", ssv, \ + ssv2); \ + } #define OPT_WRITE_CHAR_P(n) \ if (je_mallctl("opt."#n, &cpv, &cpsz, NULL, 0) == 0) { \ malloc_cprintf(write_cb, cbopaque, \ @@ -476,7 +492,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, OPT_WRITE_SIZE_T(lg_chunk) OPT_WRITE_CHAR_P(dss) OPT_WRITE_SIZE_T(narenas) - OPT_WRITE_SSIZE_T(lg_dirty_mult) + OPT_WRITE_SSIZE_T_MUTABLE(lg_dirty_mult, arenas.lg_dirty_mult) OPT_WRITE_BOOL(stats_print) OPT_WRITE_CHAR_P(junk) OPT_WRITE_SIZE_T(quarantine) @@ -519,7 +535,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, CTL_GET("arenas.page", &sv, size_t); malloc_cprintf(write_cb, cbopaque, "Page size: %zu\n", sv); - CTL_GET("opt.lg_dirty_mult", &ssv, ssize_t); + CTL_GET("arenas.lg_dirty_mult", &ssv, ssize_t); if (ssv >= 0) { malloc_cprintf(write_cb, cbopaque, "Min active:dirty page ratio per arena: %u:1\n", From fd5901ce3083cd3277b87aa414884d7628e2d509 Mon Sep 17 00:00:00 2001 From: Qinfan Wu Date: Sat, 21 Mar 2015 10:18:39 -0700 Subject: [PATCH 0682/3378] Fix a compile error caused by mixed declarations and code. --- src/stats.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/stats.c b/src/stats.c index ae74737c..b41b458b 100644 --- a/src/stats.c +++ b/src/stats.c @@ -472,14 +472,15 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, malloc_cprintf(write_cb, cbopaque, \ " opt."#n": %zd\n", ssv); \ } -#define OPT_WRITE_SSIZE_T_MUTABLE(n, m) \ +#define OPT_WRITE_SSIZE_T_MUTABLE(n, m) { \ ssize_t ssv2; \ if (je_mallctl("opt."#n, &ssv, &sssz, NULL, 0) == 0 && \ je_mallctl(#m, &ssv2, &sssz, NULL, 0) == 0) { \ malloc_cprintf(write_cb, cbopaque, \ " opt."#n": %zd ("#m": %zd)\n", ssv, \ ssv2); \ - } + } \ +} #define OPT_WRITE_CHAR_P(n) \ if (je_mallctl("opt."#n, &cpv, &cpsz, NULL, 0) == 0) { \ malloc_cprintf(write_cb, cbopaque, \ From 8ad6bf360f9ca5c6c9a1d8e5825ee473bb4697da Mon Sep 17 00:00:00 2001 From: Igor Podlesny Date: Sun, 22 Mar 2015 01:30:02 +0700 Subject: [PATCH 0683/3378] Fix indentation inconsistencies. --- include/jemalloc/internal/util.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h index 5ad4933d..001cd092 100644 --- a/include/jemalloc/internal/util.h +++ b/include/jemalloc/internal/util.h @@ -22,17 +22,17 @@ * uninitialized. */ #ifdef JEMALLOC_CC_SILENCE -# define JEMALLOC_CC_SILENCE_INIT(v) = v +# define JEMALLOC_CC_SILENCE_INIT(v) = v #else -# define JEMALLOC_CC_SILENCE_INIT(v) +# define JEMALLOC_CC_SILENCE_INIT(v) #endif #ifdef __GNUC__ -#define likely(x) __builtin_expect(!!(x), 1) -#define unlikely(x) __builtin_expect(!!(x), 0) +# define likely(x) __builtin_expect(!!(x), 1) +# define unlikely(x) __builtin_expect(!!(x), 0) #else -#define likely(x) !!(x) -#define unlikely(x) !!(x) +# define likely(x) !!(x) +# define unlikely(x) !!(x) #endif /* From 4acd75a694173186e9e0399d2855f05ce8553008 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 23 Mar 2015 17:25:57 -0700 Subject: [PATCH 0684/3378] Add the "stats.allocated" mallctl. --- ChangeLog | 2 ++ doc/jemalloc.xml.in | 23 +++++++++++++-- include/jemalloc/internal/base.h | 2 +- include/jemalloc/internal/ctl.h | 1 + include/jemalloc/internal/private_symbols.txt | 2 +- src/base.c | 29 ++++++++++++++----- src/ctl.c | 23 ++++++++++----- src/stats.c | 8 +++-- 8 files changed, 67 insertions(+), 23 deletions(-) diff --git a/ChangeLog b/ChangeLog index a462d025..26075766 100644 --- a/ChangeLog +++ b/ChangeLog @@ -63,6 +63,8 @@ found in the git revision history: - Add metadata statistics, which are accessible via the "stats.metadata", "stats.arenas..metadata.mapped", and "stats.arenas..metadata.allocated" mallctls. + - Add the "stats.resident" mallctl, which reports the upper limit of + physically resident memory mapped by the allocator. - Add the "prof.gdump" mallctl, which makes it possible to toggle the gdump feature on/off during program execution. - Add sdallocx(), which implements sized deallocation. The primary diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 01ac38c3..adff6a4d 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -1938,6 +1938,23 @@ malloc_conf = "xmalloc:true";]]> linkend="stats.arenas.i.metadata.allocated">stats.arenas.<i>.metadata.allocated). + + + stats.resident + (size_t) + r- + [] + + Maximum number of bytes in physically resident data + pages mapped by the allocator, comprising all pages dedicated to + allocator metadata, pages backing active allocations, and unused dirty + pages. This is a maximum rather than precise because pages may not + actually be physically resident if they correspond to demand-zeroed + virtual memory that has not yet been touched. This is a multiple of the + page size, and is larger than stats.active. + + stats.mapped @@ -1945,10 +1962,10 @@ malloc_conf = "xmalloc:true";]]> r- [] - Total number of bytes in chunks mapped on behalf of the - application. This is a multiple of the chunk size, and is at least as + Total number of bytes in active chunks mapped by the + allocator. This is a multiple of the chunk size, and is at least as large as stats.active. This + linkend="stats.resident">stats.resident. This does not include inactive chunks. diff --git a/include/jemalloc/internal/base.h b/include/jemalloc/internal/base.h index bec76b32..39e46ee4 100644 --- a/include/jemalloc/internal/base.h +++ b/include/jemalloc/internal/base.h @@ -10,7 +10,7 @@ #ifdef JEMALLOC_H_EXTERNS void *base_alloc(size_t size); -size_t base_allocated_get(void); +void base_stats_get(size_t *allocated, size_t *resident, size_t *mapped); bool base_boot(void); void base_prefork(void); void base_postfork_parent(void); diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h index ab9c9862..7c2a4bea 100644 --- a/include/jemalloc/internal/ctl.h +++ b/include/jemalloc/internal/ctl.h @@ -53,6 +53,7 @@ struct ctl_stats_s { size_t allocated; size_t active; size_t metadata; + size_t resident; size_t mapped; unsigned narenas; ctl_arena_stats_t *arenas; /* (narenas + 1) elements. */ diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index bc0f2a6a..aaf69786 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -111,11 +111,11 @@ atomic_sub_uint32 atomic_sub_uint64 atomic_sub_z base_alloc -base_allocated_get base_boot base_postfork_child base_postfork_parent base_prefork +base_stats_get bitmap_full bitmap_get bitmap_info_init diff --git a/src/base.c b/src/base.c index 01c62df4..1a9b829a 100644 --- a/src/base.c +++ b/src/base.c @@ -8,6 +8,8 @@ static malloc_mutex_t base_mtx; static extent_tree_t base_avail_szad; static extent_node_t *base_nodes; static size_t base_allocated; +static size_t base_resident; +static size_t base_mapped; /******************************************************************************/ @@ -54,11 +56,15 @@ base_chunk_alloc(size_t minsize) base_node_dalloc(node); return (NULL); } + base_mapped += csize; if (node == NULL) { + node = (extent_node_t *)addr; + addr = (void *)((uintptr_t)addr + nsize); csize -= nsize; - node = (extent_node_t *)((uintptr_t)addr + csize); - if (config_stats) + if (config_stats) { base_allocated += nsize; + base_resident += PAGE_CEILING(nsize); + } } extent_node_init(node, NULL, addr, csize, true); return (node); @@ -106,23 +112,30 @@ base_alloc(size_t size) extent_tree_szad_insert(&base_avail_szad, node); } else base_node_dalloc(node); - if (config_stats) + if (config_stats) { base_allocated += csize; + /* + * Add one PAGE to base_resident for every page boundary that is + * crossed by the new allocation. + */ + base_resident += PAGE_CEILING((uintptr_t)ret + csize) - + PAGE_CEILING((uintptr_t)ret); + } JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, csize); label_return: malloc_mutex_unlock(&base_mtx); return (ret); } -size_t -base_allocated_get(void) +void +base_stats_get(size_t *allocated, size_t *resident, size_t *mapped) { - size_t ret; malloc_mutex_lock(&base_mtx); - ret = base_allocated; + *allocated = base_allocated; + *resident = base_resident; + *mapped = base_mapped; malloc_mutex_unlock(&base_mtx); - return (ret); } bool diff --git a/src/ctl.c b/src/ctl.c index 447b8776..0ed8ddd4 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -194,6 +194,7 @@ CTL_PROTO(stats_cactive) CTL_PROTO(stats_allocated) CTL_PROTO(stats_active) CTL_PROTO(stats_metadata) +CTL_PROTO(stats_resident) CTL_PROTO(stats_mapped) /******************************************************************************/ @@ -469,6 +470,7 @@ static const ctl_named_node_t stats_node[] = { {NAME("allocated"), CTL(stats_allocated)}, {NAME("active"), CTL(stats_active)}, {NAME("metadata"), CTL(stats_metadata)}, + {NAME("resident"), CTL(stats_resident)}, {NAME("mapped"), CTL(stats_mapped)}, {NAME("arenas"), CHILD(indexed, stats_arenas)} }; @@ -711,17 +713,23 @@ ctl_refresh(void) } if (config_stats) { + size_t base_allocated, base_resident, base_mapped; + base_stats_get(&base_allocated, &base_resident, &base_mapped); ctl_stats.allocated = - ctl_stats.arenas[ctl_stats.narenas].allocated_small - + ctl_stats.arenas[ctl_stats.narenas].astats.allocated_large - + ctl_stats.arenas[ctl_stats.narenas].astats.allocated_huge; + ctl_stats.arenas[ctl_stats.narenas].allocated_small + + ctl_stats.arenas[ctl_stats.narenas].astats.allocated_large + + ctl_stats.arenas[ctl_stats.narenas].astats.allocated_huge; ctl_stats.active = (ctl_stats.arenas[ctl_stats.narenas].pactive << LG_PAGE); - ctl_stats.metadata = base_allocated_get() - + ctl_stats.arenas[ctl_stats.narenas].astats.metadata_mapped - + ctl_stats.arenas[ctl_stats.narenas].astats + ctl_stats.metadata = base_allocated + + ctl_stats.arenas[ctl_stats.narenas].astats.metadata_mapped + + ctl_stats.arenas[ctl_stats.narenas].astats .metadata_allocated; - ctl_stats.mapped = + ctl_stats.resident = base_resident + + ctl_stats.arenas[ctl_stats.narenas].astats.metadata_mapped + + ((ctl_stats.arenas[ctl_stats.narenas].pactive + + ctl_stats.arenas[ctl_stats.narenas].pdirty) << LG_PAGE); + ctl_stats.mapped = base_mapped + ctl_stats.arenas[ctl_stats.narenas].astats.mapped; } @@ -1976,6 +1984,7 @@ CTL_RO_CGEN(config_stats, stats_cactive, &stats_cactive, size_t *) CTL_RO_CGEN(config_stats, stats_allocated, ctl_stats.allocated, size_t) CTL_RO_CGEN(config_stats, stats_active, ctl_stats.active, size_t) CTL_RO_CGEN(config_stats, stats_metadata, ctl_stats.metadata, size_t) +CTL_RO_CGEN(config_stats, stats_resident, ctl_stats.resident, size_t) CTL_RO_CGEN(config_stats, stats_mapped, ctl_stats.mapped, size_t) CTL_RO_GEN(stats_arenas_i_dss, ctl_stats.arenas[mib[2]].dss, const char *) diff --git a/src/stats.c b/src/stats.c index b41b458b..c5cea5e6 100644 --- a/src/stats.c +++ b/src/stats.c @@ -573,16 +573,18 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, if (config_stats) { size_t *cactive; - size_t allocated, active, metadata, mapped; + size_t allocated, active, metadata, resident, mapped; CTL_GET("stats.cactive", &cactive, size_t *); CTL_GET("stats.allocated", &allocated, size_t); CTL_GET("stats.active", &active, size_t); CTL_GET("stats.metadata", &metadata, size_t); + CTL_GET("stats.resident", &resident, size_t); CTL_GET("stats.mapped", &mapped, size_t); malloc_cprintf(write_cb, cbopaque, - "Allocated: %zu, active: %zu, metadata: %zu, mapped: %zu\n", - allocated, active, metadata, mapped); + "Allocated: %zu, active: %zu, metadata: %zu, resident: %zu," + " mapped: %zu\n", allocated, active, metadata, resident, + mapped); malloc_cprintf(write_cb, cbopaque, "Current active ceiling: %zu\n", atomic_read_z(cactive)); From ef0a0cc3283ea561a40b33f4325d54bbc351de21 Mon Sep 17 00:00:00 2001 From: Igor Podlesny Date: Sun, 22 Mar 2015 23:49:58 +0700 Subject: [PATCH 0685/3378] We have pages_unmap(ret, size) so we use it. --- src/chunk_mmap.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/src/chunk_mmap.c b/src/chunk_mmap.c index 7e02c102..30ac10be 100644 --- a/src/chunk_mmap.c +++ b/src/chunk_mmap.c @@ -40,15 +40,7 @@ pages_map(void *addr, size_t size) /* * We succeeded in mapping memory, but not in the right place. */ - if (munmap(ret, size) == -1) { - char buf[BUFERROR_BUF]; - - buferror(get_errno(), buf, sizeof(buf)); - malloc_printf(" Date: Tue, 24 Mar 2015 12:33:12 -0700 Subject: [PATCH 0686/3378] Fix arena_get() usage. Fix arena_get() calls that specify refresh_if_missing=false. In ctl_refresh() and ctl.c's arena_purge(), these calls attempted to only refresh once, but did so in an unreliable way. arena_i_lg_dirty_mult_ctl() was simply wrong to pass refresh_if_missing=false. --- src/ctl.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/src/ctl.c b/src/ctl.c index 0ed8ddd4..44935467 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -684,6 +684,7 @@ ctl_refresh(void) { tsd_t *tsd; unsigned i; + bool refreshed; VARIABLE_ARRAY(arena_t *, tarenas, ctl_stats.narenas); /* @@ -694,8 +695,13 @@ ctl_refresh(void) ctl_arena_clear(&ctl_stats.arenas[ctl_stats.narenas]); tsd = tsd_fetch(); - for (i = 0; i < ctl_stats.narenas; i++) - tarenas[i] = arena_get(tsd, i, false, (i == 0)); + for (i = 0, refreshed = false; i < ctl_stats.narenas; i++) { + tarenas[i] = arena_get(tsd, i, false, false); + if (tarenas[i] == NULL && !refreshed) { + tarenas[i] = arena_get(tsd, i, false, true); + refreshed = true; + } + } for (i = 0; i < ctl_stats.narenas; i++) { if (tarenas[i] != NULL) @@ -1538,11 +1544,17 @@ arena_purge(unsigned arena_ind) { tsd_t *tsd; unsigned i; + bool refreshed; VARIABLE_ARRAY(arena_t *, tarenas, ctl_stats.narenas); tsd = tsd_fetch(); - for (i = 0; i < ctl_stats.narenas; i++) - tarenas[i] = arena_get(tsd, i, false, (i == 0)); + for (i = 0, refreshed = false; i < ctl_stats.narenas; i++) { + tarenas[i] = arena_get(tsd, i, false, false); + if (tarenas[i] == NULL && !refreshed) { + tarenas[i] = arena_get(tsd, i, false, true); + refreshed = true; + } + } if (arena_ind == ctl_stats.narenas) { unsigned i; @@ -1638,7 +1650,7 @@ arena_i_lg_dirty_mult_ctl(const size_t *mib, size_t miblen, void *oldp, unsigned arena_ind = mib[1]; arena_t *arena; - arena = arena_get(tsd_fetch(), arena_ind, false, (arena_ind == 0)); + arena = arena_get(tsd_fetch(), arena_ind, false, true); if (arena == NULL) { ret = EFAULT; goto label_return; From bd16ea49c3e36706a52ef9c8f560813c167fa085 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 24 Mar 2015 15:59:28 -0700 Subject: [PATCH 0687/3378] Fix signed/unsigned comparison in arena_lg_dirty_mult_valid(). --- src/arena.c | 3 ++- test/unit/mallctl.c | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/arena.c b/src/arena.c index 7272682d..d38ffc6b 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1037,7 +1037,8 @@ static bool arena_lg_dirty_mult_valid(ssize_t lg_dirty_mult) { - return (lg_dirty_mult >= -1 && lg_dirty_mult < (sizeof(size_t) << 3)); + return (lg_dirty_mult >= -1 && lg_dirty_mult < (ssize_t)(sizeof(size_t) + << 3)); } ssize_t diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c index 31ada191..29823a6c 100644 --- a/test/unit/mallctl.c +++ b/test/unit/mallctl.c @@ -367,8 +367,8 @@ TEST_BEGIN(test_arena_i_lg_dirty_mult) "Unexpected mallctl() success"); for (prev_lg_dirty_mult = orig_lg_dirty_mult, lg_dirty_mult = -1; - lg_dirty_mult < (sizeof(ssize_t) << 3); prev_lg_dirty_mult = - lg_dirty_mult, lg_dirty_mult++) { + lg_dirty_mult < (ssize_t)(sizeof(size_t) << 3); prev_lg_dirty_mult + = lg_dirty_mult, lg_dirty_mult++) { ssize_t old_lg_dirty_mult; assert_d_eq(mallctl("arena.0.lg_dirty_mult", &old_lg_dirty_mult, @@ -478,7 +478,7 @@ TEST_BEGIN(test_arenas_lg_dirty_mult) "Unexpected mallctl() success"); for (prev_lg_dirty_mult = orig_lg_dirty_mult, lg_dirty_mult = -1; - lg_dirty_mult < (sizeof(ssize_t) << 3); prev_lg_dirty_mult = + lg_dirty_mult < (ssize_t)(sizeof(size_t) << 3); prev_lg_dirty_mult = lg_dirty_mult, lg_dirty_mult++) { ssize_t old_lg_dirty_mult; From 562d266511053a51406e91c78eba640cb46ad9c8 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 24 Mar 2015 16:36:12 -0700 Subject: [PATCH 0688/3378] Add the "stats.arenas..lg_dirty_mult" mallctl. --- ChangeLog | 6 +++++- doc/jemalloc.xml.in | 12 ++++++++++++ include/jemalloc/internal/arena.h | 5 +++-- include/jemalloc/internal/ctl.h | 1 + src/arena.c | 8 +++++--- src/ctl.c | 11 ++++++++--- src/stats.c | 11 +---------- 7 files changed, 35 insertions(+), 19 deletions(-) diff --git a/ChangeLog b/ChangeLog index 26075766..8cc214ab 100644 --- a/ChangeLog +++ b/ChangeLog @@ -38,7 +38,8 @@ found in the git revision history: "opt.prof_thread_active_init", "prof.thread_active_init", and "thread.prof.active" mallctls. - Add support for per arena application-specified chunk allocators, configured - via the "arena.chunk.alloc" and "arena.chunk.dalloc" mallctls. + via the "arena.chunk.alloc", "arena.chunk.dalloc", and + "arena..chunk.purge" mallctls. - Refactor huge allocation to be managed by arenas, so that arenas now function as general purpose independent allocators. This is important in the context of user-specified chunk allocators, aside from the scalability @@ -65,6 +66,9 @@ found in the git revision history: "stats.arenas..metadata.allocated" mallctls. - Add the "stats.resident" mallctl, which reports the upper limit of physically resident memory mapped by the allocator. + - Add per arena control over unused dirty page purging, via the + "arenas.lg_dirty_mult", "arena..lg_dirty_mult", and + "stats.arenas..lg_dirty_mult" mallctls. - Add the "prof.gdump" mallctl, which makes it possible to toggle the gdump feature on/off during program execution. - Add sdallocx(), which implements sized deallocation. The primary diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index adff6a4d..d3f36164 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -1983,6 +1983,18 @@ malloc_conf = "xmalloc:true";]]> + + + stats.arenas.<i>.lg_dirty_mult + (ssize_t) + r- + + Minimum ratio (log base 2) of active to dirty pages. + See opt.lg_dirty_mult + for details. + + stats.arenas.<i>.nthreads diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 56ee74aa..dff99fb4 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -470,8 +470,9 @@ dss_prec_t arena_dss_prec_get(arena_t *arena); bool arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec); ssize_t arena_lg_dirty_mult_default_get(void); bool arena_lg_dirty_mult_default_set(ssize_t lg_dirty_mult); -void arena_stats_merge(arena_t *arena, const char **dss, size_t *nactive, - size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats, +void arena_stats_merge(arena_t *arena, const char **dss, + ssize_t *lg_dirty_mult, size_t *nactive, size_t *ndirty, + arena_stats_t *astats, malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats, malloc_huge_stats_t *hstats); arena_t *arena_new(unsigned ind); void arena_boot(void); diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h index 7c2a4bea..751c14b5 100644 --- a/include/jemalloc/internal/ctl.h +++ b/include/jemalloc/internal/ctl.h @@ -34,6 +34,7 @@ struct ctl_arena_stats_s { bool initialized; unsigned nthreads; const char *dss; + ssize_t lg_dirty_mult; size_t pactive; size_t pdirty; arena_stats_t astats; diff --git a/src/arena.c b/src/arena.c index d38ffc6b..bc13d209 100644 --- a/src/arena.c +++ b/src/arena.c @@ -2657,14 +2657,16 @@ arena_lg_dirty_mult_default_set(ssize_t lg_dirty_mult) } void -arena_stats_merge(arena_t *arena, const char **dss, size_t *nactive, - size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats, - malloc_large_stats_t *lstats, malloc_huge_stats_t *hstats) +arena_stats_merge(arena_t *arena, const char **dss, ssize_t *lg_dirty_mult, + size_t *nactive, size_t *ndirty, arena_stats_t *astats, + malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats, + malloc_huge_stats_t *hstats) { unsigned i; malloc_mutex_lock(&arena->lock); *dss = dss_prec_names[arena->dss_prec]; + *lg_dirty_mult = arena->lg_dirty_mult; *nactive += arena->nactive; *ndirty += arena->ndirty; diff --git a/src/ctl.c b/src/ctl.c index 44935467..d215b19b 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -181,6 +181,7 @@ CTL_PROTO(stats_arenas_i_hchunks_j_curhchunks) INDEX_PROTO(stats_arenas_i_hchunks_j) CTL_PROTO(stats_arenas_i_nthreads) CTL_PROTO(stats_arenas_i_dss) +CTL_PROTO(stats_arenas_i_lg_dirty_mult) CTL_PROTO(stats_arenas_i_pactive) CTL_PROTO(stats_arenas_i_pdirty) CTL_PROTO(stats_arenas_i_mapped) @@ -443,6 +444,7 @@ static const ctl_indexed_node_t stats_arenas_i_hchunks_node[] = { static const ctl_named_node_t stats_arenas_i_node[] = { {NAME("nthreads"), CTL(stats_arenas_i_nthreads)}, {NAME("dss"), CTL(stats_arenas_i_dss)}, + {NAME("lg_dirty_mult"), CTL(stats_arenas_i_lg_dirty_mult)}, {NAME("pactive"), CTL(stats_arenas_i_pactive)}, {NAME("pdirty"), CTL(stats_arenas_i_pdirty)}, {NAME("mapped"), CTL(stats_arenas_i_mapped)}, @@ -524,6 +526,7 @@ ctl_arena_clear(ctl_arena_stats_t *astats) { astats->dss = dss_prec_names[dss_prec_limit]; + astats->lg_dirty_mult = -1; astats->pactive = 0; astats->pdirty = 0; if (config_stats) { @@ -545,9 +548,9 @@ ctl_arena_stats_amerge(ctl_arena_stats_t *cstats, arena_t *arena) { unsigned i; - arena_stats_merge(arena, &cstats->dss, &cstats->pactive, - &cstats->pdirty, &cstats->astats, cstats->bstats, cstats->lstats, - cstats->hstats); + arena_stats_merge(arena, &cstats->dss, &cstats->lg_dirty_mult, + &cstats->pactive, &cstats->pdirty, &cstats->astats, cstats->bstats, + cstats->lstats, cstats->hstats); for (i = 0; i < NBINS; i++) { cstats->allocated_small += cstats->bstats[i].curregs * @@ -2000,6 +2003,8 @@ CTL_RO_CGEN(config_stats, stats_resident, ctl_stats.resident, size_t) CTL_RO_CGEN(config_stats, stats_mapped, ctl_stats.mapped, size_t) CTL_RO_GEN(stats_arenas_i_dss, ctl_stats.arenas[mib[2]].dss, const char *) +CTL_RO_GEN(stats_arenas_i_lg_dirty_mult, ctl_stats.arenas[mib[2]].lg_dirty_mult, + ssize_t) CTL_RO_GEN(stats_arenas_i_nthreads, ctl_stats.arenas[mib[2]].nthreads, unsigned) CTL_RO_GEN(stats_arenas_i_pactive, ctl_stats.arenas[mib[2]].pactive, size_t) CTL_RO_GEN(stats_arenas_i_pdirty, ctl_stats.arenas[mib[2]].pdirty, size_t) diff --git a/src/stats.c b/src/stats.c index c5cea5e6..6e1752ef 100644 --- a/src/stats.c +++ b/src/stats.c @@ -6,15 +6,6 @@ xmallctl(n, v, &sz, NULL, 0); \ } while (0) -#define CTL_M1_GET(n, i, v, t) do { \ - size_t mib[6]; \ - size_t miblen = sizeof(mib) / sizeof(size_t); \ - size_t sz = sizeof(t); \ - xmallctlnametomib(n, mib, &miblen); \ - mib[1] = (i); \ - xmallctlbymib(mib, miblen, v, &sz, NULL, 0); \ -} while (0) - #define CTL_M2_GET(n, i, v, t) do { \ size_t mib[6]; \ size_t miblen = sizeof(mib) / sizeof(size_t); \ @@ -285,7 +276,7 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, CTL_M2_GET("stats.arenas.0.dss", i, &dss, const char *); malloc_cprintf(write_cb, cbopaque, "dss allocation precedence: %s\n", dss); - CTL_M1_GET("arena.0.lg_dirty_mult", i, &lg_dirty_mult, ssize_t); + CTL_M2_GET("stats.arenas.0.lg_dirty_mult", i, &lg_dirty_mult, ssize_t); if (lg_dirty_mult >= 0) { malloc_cprintf(write_cb, cbopaque, "min active:dirty page ratio: %u:1\n", From 65db63cf3f0c5dd5126a1b3786756486eaf931ba Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 25 Mar 2015 18:56:55 -0700 Subject: [PATCH 0689/3378] Fix in-place shrinking huge reallocation purging bugs. Fix the shrinking case of huge_ralloc_no_move_similar() to purge the correct number of pages, at the correct offset. This regression was introduced by 8d6a3e8321a7767cb2ca0930b85d5d488a8cc659 (Implement dynamic per arena control over dirty page purging.). Fix huge_ralloc_no_move_shrink() to purge the correct number of pages. This bug was introduced by 9673983443a0782d975fbcb5d8457cfd411b8b56 (Purge/zero sub-chunk huge allocations as necessary.). --- src/arena.c | 7 +------ src/huge.c | 31 ++++++++++++++++--------------- 2 files changed, 17 insertions(+), 21 deletions(-) diff --git a/src/arena.c b/src/arena.c index bc13d209..30410683 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1245,16 +1245,11 @@ arena_purge_stashed(arena_t *arena, if (rdelm == &chunkselm->rd) { size_t size = extent_node_size_get(chunkselm); - void *addr, *chunk; - size_t offset; bool unzeroed; npages = size >> LG_PAGE; - addr = extent_node_addr_get(chunkselm); - chunk = CHUNK_ADDR2BASE(addr); - offset = CHUNK_ADDR2OFFSET(addr); unzeroed = chunk_purge_wrapper(arena, chunk_purge, - chunk, offset, size); + extent_node_addr_get(chunkselm), 0, size); extent_node_zeroed_set(chunkselm, !unzeroed); chunkselm = qr_next(chunkselm, cc_link); } else { diff --git a/src/huge.c b/src/huge.c index aa26f5df..32af2058 100644 --- a/src/huge.c +++ b/src/huge.c @@ -145,12 +145,11 @@ huge_ralloc_no_move_similar(void *ptr, size_t oldsize, size_t usize, /* Fill if necessary (shrinking). */ if (oldsize > usize) { - size_t sdiff = CHUNK_CEILING(usize) - usize; - zeroed = (sdiff != 0) ? !chunk_purge_wrapper(arena, chunk_purge, - CHUNK_ADDR2BASE(ptr), CHUNK_ADDR2OFFSET(ptr), usize) : true; + size_t sdiff = oldsize - usize; + zeroed = !chunk_purge_wrapper(arena, chunk_purge, ptr, usize, + sdiff); if (config_fill && unlikely(opt_junk_free)) { - memset((void *)((uintptr_t)ptr + usize), 0x5a, oldsize - - usize); + memset((void *)((uintptr_t)ptr + usize), 0x5a, sdiff); zeroed = false; } } else @@ -186,7 +185,6 @@ huge_ralloc_no_move_shrink(void *ptr, size_t oldsize, size_t usize) extent_node_t *node; arena_t *arena; chunk_purge_t *chunk_purge; - size_t sdiff; bool zeroed; node = huge_node_get(ptr); @@ -196,15 +194,18 @@ huge_ralloc_no_move_shrink(void *ptr, size_t oldsize, size_t usize) chunk_purge = arena->chunk_purge; malloc_mutex_unlock(&arena->lock); - sdiff = CHUNK_CEILING(usize) - usize; - zeroed = (sdiff != 0) ? !chunk_purge_wrapper(arena, chunk_purge, - CHUNK_ADDR2BASE((uintptr_t)ptr + usize), - CHUNK_ADDR2OFFSET((uintptr_t)ptr + usize), sdiff) : true; - if (config_fill && unlikely(opt_junk_free)) { - huge_dalloc_junk((void *)((uintptr_t)ptr + usize), oldsize - - usize); - zeroed = false; - } + if (oldsize > usize) { + size_t sdiff = oldsize - usize; + zeroed = !chunk_purge_wrapper(arena, chunk_purge, + CHUNK_ADDR2BASE((uintptr_t)ptr + usize), + CHUNK_ADDR2OFFSET((uintptr_t)ptr + usize), sdiff); + if (config_fill && unlikely(opt_junk_free)) { + huge_dalloc_junk((void *)((uintptr_t)ptr + usize), + sdiff); + zeroed = false; + } + } else + zeroed = true; malloc_mutex_lock(&arena->huge_mtx); /* Update the size of the huge allocation. */ From b80fbcbbdb7ea6ba5918db7c665c836baa8c0b2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Marie?= Date: Tue, 7 Apr 2015 12:21:19 +0200 Subject: [PATCH 0690/3378] OpenBSD don't support TLS under some compiler (gcc 4.8.4 in particular), the auto-detection of TLS don't work properly. force tls to be disabled. the testsuite pass under gcc (4.8.4) and gcc (4.2.1) --- configure.ac | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index be49743d..bf2ac3a8 100644 --- a/configure.ac +++ b/configure.ac @@ -283,7 +283,13 @@ case "${host}" in abi="elf" AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ]) ;; - *-*-openbsd*|*-*-bitrig*) + *-*-openbsd*) + CFLAGS="$CFLAGS" + abi="elf" + AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ]) + force_tls="0" + ;; + *-*-bitrig*) CFLAGS="$CFLAGS" abi="elf" AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ]) From 897503521ddb703a1388899f79112e048c328278 Mon Sep 17 00:00:00 2001 From: Qinfan Wu Date: Tue, 21 Apr 2015 16:57:42 -0700 Subject: [PATCH 0691/3378] Fix mallctl doc: arenas.hchunk..size --- ChangeLog | 2 +- doc/jemalloc.xml.in | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ChangeLog b/ChangeLog index 8cc214ab..6f79cacf 100644 --- a/ChangeLog +++ b/ChangeLog @@ -47,7 +47,7 @@ found in the git revision history: + The "stats.arenas..huge.allocated", "stats.arenas..huge.nmalloc", "stats.arenas..huge.ndalloc", and "stats.arenas..huge.nrequests" mallctls provide high level per arena huge allocation statistics. - + The "arenas.nhchunks", "arenas.hchunks..size", + + The "arenas.nhchunks", "arenas.hchunk..size", "stats.arenas..hchunks..nmalloc", "stats.arenas..hchunks..ndalloc", "stats.arenas..hchunks..nrequests", and diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index d3f36164..c9ee9970 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -1756,9 +1756,9 @@ malloc_conf = "xmalloc:true";]]> Total number of huge size classes. - + - arenas.hchunks.<i>.size + arenas.hchunk.<i>.size (size_t) r- From 95e88de0aab257020dfc33248b86331cbfac28b1 Mon Sep 17 00:00:00 2001 From: Igor Podlesny Date: Tue, 24 Mar 2015 12:49:26 +0700 Subject: [PATCH 0692/3378] Concise JEMALLOC_HAVE_ISSETUGID case in secure_getenv(). --- src/jemalloc.c | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/src/jemalloc.c b/src/jemalloc.c index 7e9f4860..a2d1c5c2 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -709,24 +709,16 @@ stats_print_atexit(void) */ #ifndef JEMALLOC_HAVE_SECURE_GETENV +static char * +secure_getenv(const char *name) +{ + # ifdef JEMALLOC_HAVE_ISSETUGID -static char * -secure_getenv(const char *name) -{ - - if (issetugid() == 0) - return (getenv(name)); - else + if (issetugid() != 0) return (NULL); -} -# else -static char * -secure_getenv(const char *name) -{ - +# endif return (getenv(name)); } -# endif #endif static unsigned From f1f2b4542902c5bc14788f6c2d4190b422e5901f Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 1 May 2015 08:57:41 -0700 Subject: [PATCH 0693/3378] Embed full library install when running ld on OS X. This resolves #228. --- configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index bf2ac3a8..5f9bbd37 100644 --- a/configure.ac +++ b/configure.ac @@ -268,7 +268,7 @@ case "${host}" in so="dylib" importlib="${so}" force_tls="0" - DSO_LDFLAGS='-shared -Wl,-dylib_install_name,$(@F)' + DSO_LDFLAGS='-shared -Wl,-install_name,$(LIBDIR)/$(@F)' SOREV="${rev}.${so}" sbrk_deprecated="1" ;; From 8e33c21d2d03ee7f540e32c3d75b10c128eaea57 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 1 May 2015 09:03:20 -0700 Subject: [PATCH 0694/3378] Prefer /proc//task//maps over /proc//maps on Linux. This resolves #227. --- src/prof.c | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/src/prof.c b/src/prof.c index f2a37253..8453ea87 100644 --- a/src/prof.c +++ b/src/prof.c @@ -1338,21 +1338,40 @@ label_return: return (ret); } +JEMALLOC_ATTR(format(printf, 1, 2)) +static int +prof_open_maps(const char *format, ...) +{ + int mfd; + va_list ap; + char filename[PATH_MAX + 1]; + + va_start(ap, format); + malloc_vsnprintf(filename, sizeof(filename), format, ap); + va_end(ap); + mfd = open(filename, O_RDONLY); + + return (mfd); +} + static bool prof_dump_maps(bool propagate_err) { bool ret; int mfd; - char filename[PATH_MAX + 1]; cassert(config_prof); #ifdef __FreeBSD__ - malloc_snprintf(filename, sizeof(filename), "/proc/curproc/map"); + mfd = prof_open_maps("/proc/curproc/map"); #else - malloc_snprintf(filename, sizeof(filename), "/proc/%d/maps", - (int)getpid()); + { + int pid = getpid(); + + mfd = prof_open_maps("/proc/%d/task/%d/maps", pid, pid); + if (mfd == -1) + mfd = prof_open_maps("/proc/%d/maps", pid); + } #endif - mfd = open(filename, O_RDONLY); if (mfd != -1) { ssize_t nread; From 7041720ac208fa2f7f65e40d8133d4b291516847 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 1 May 2015 12:31:12 -0700 Subject: [PATCH 0695/3378] Rename pprof to jeprof. This rename avoids installation collisions with the upstream gperftools. Additionally, jemalloc's per thread heap profile functionality introduced an incompatible file format, so it's now worthwhile to clearly distinguish jemalloc's version of this script from the upstream version. This resolves #229. --- .gitignore | 1 + ChangeLog | 5 +- Makefile.in | 2 +- bin/{pprof => jeprof.in} | 114 ++++++++++++++++++++------------------- configure.ac | 2 +- doc/jemalloc.xml.in | 5 +- src/prof.c | 2 +- 7 files changed, 68 insertions(+), 63 deletions(-) rename bin/{pprof => jeprof.in} (98%) mode change 100755 => 100644 diff --git a/.gitignore b/.gitignore index 5cd3e922..d0e39361 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ /bin/jemalloc-config /bin/jemalloc.sh +/bin/jeprof /config.stamp /config.log diff --git a/ChangeLog b/ChangeLog index 6f79cacf..33139f9b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -117,8 +117,9 @@ found in the git revision history: - Assure that the constness of malloc_usable_size()'s return type matches that of the system implementation. - Change the heap profile dump format to support per thread heap profiling, - and enhance pprof with the --thread= option. As a result, the bundled - pprof must now be used rather than the upstream (gperftools) pprof. + rename pprof to jeprof, and enhance it with the --thread= option. As a + result, the bundled jeprof must now be used rather than the upstream + (gperftools) pprof. - Disable "opt.prof_final" by default, in order to avoid atexit(3), which can internally deadlock on some platforms. - Change the "arenas.nlruns" mallctl type from size_t to unsigned. diff --git a/Makefile.in b/Makefile.in index a105bb12..f539fad6 100644 --- a/Makefile.in +++ b/Makefile.in @@ -73,7 +73,7 @@ endif LIBJEMALLOC := $(LIBPREFIX)jemalloc$(install_suffix) # Lists of files. -BINS := $(srcroot)bin/pprof $(objroot)bin/jemalloc-config $(objroot)bin/jemalloc.sh +BINS := $(objroot)bin/jemalloc-config $(objroot)bin/jemalloc.sh $(objroot)bin/jeprof C_HDRS := $(objroot)include/jemalloc/jemalloc$(install_suffix).h C_SRCS := $(srcroot)src/jemalloc.c $(srcroot)src/arena.c \ $(srcroot)src/atomic.c $(srcroot)src/base.c $(srcroot)src/bitmap.c \ diff --git a/bin/pprof b/bin/jeprof.in old mode 100755 new mode 100644 similarity index 98% rename from bin/pprof rename to bin/jeprof.in index df503aea..e7178078 --- a/bin/pprof +++ b/bin/jeprof.in @@ -40,28 +40,28 @@ # # Examples: # -# % tools/pprof "program" "profile" +# % tools/jeprof "program" "profile" # Enters "interactive" mode # -# % tools/pprof --text "program" "profile" +# % tools/jeprof --text "program" "profile" # Generates one line per procedure # -# % tools/pprof --gv "program" "profile" +# % tools/jeprof --gv "program" "profile" # Generates annotated call-graph and displays via "gv" # -# % tools/pprof --gv --focus=Mutex "program" "profile" +# % tools/jeprof --gv --focus=Mutex "program" "profile" # Restrict to code paths that involve an entry that matches "Mutex" # -# % tools/pprof --gv --focus=Mutex --ignore=string "program" "profile" +# % tools/jeprof --gv --focus=Mutex --ignore=string "program" "profile" # Restrict to code paths that involve an entry that matches "Mutex" # and does not match "string" # -# % tools/pprof --list=IBF_CheckDocid "program" "profile" +# % tools/jeprof --list=IBF_CheckDocid "program" "profile" # Generates disassembly listing of all routines with at least one # sample that match the --list= pattern. The listing is # annotated with the flat and cumulative sample counts at each line. # -# % tools/pprof --disasm=IBF_CheckDocid "program" "profile" +# % tools/jeprof --disasm=IBF_CheckDocid "program" "profile" # Generates disassembly listing of all routines with at least one # sample that match the --disasm= pattern. The listing is # annotated with the flat and cumulative sample counts at each PC value. @@ -72,10 +72,11 @@ use strict; use warnings; use Getopt::Long; +my $JEPROF_VERSION = "@jemalloc_version@"; my $PPROF_VERSION = "2.0"; # These are the object tools we use which can come from a -# user-specified location using --tools, from the PPROF_TOOLS +# user-specified location using --tools, from the JEPROF_TOOLS # environment variable, or from the environment. my %obj_tool_map = ( "objdump" => "objdump", @@ -144,13 +145,13 @@ my $sep_address = undef; sub usage_string { return < +jeprof [options] is a space separated list of profile names. -pprof [options] +jeprof [options] is a list of profile files where each file contains the necessary symbol mappings as well as profile data (likely generated with --raw). -pprof [options] +jeprof [options] is a remote form. Symbols are obtained from host:port$SYMBOL_PAGE Each name can be: @@ -161,9 +162,9 @@ pprof [options] $GROWTH_PAGE, $CONTENTION_PAGE, /pprof/wall, $CENSUSPROFILE_PAGE, or /pprof/filteredprofile. For instance: - pprof http://myserver.com:80$HEAP_PAGE + jeprof http://myserver.com:80$HEAP_PAGE If / is omitted, the service defaults to $PROFILE_PAGE (cpu profiling). -pprof --symbols +jeprof --symbols Maps addresses to symbol names. In this mode, stdin should be a list of library mappings, in the same format as is found in the heap- and cpu-profile files (this loosely matches that of /proc/self/maps @@ -202,7 +203,7 @@ Output type: --pdf Generate PDF to stdout --svg Generate SVG to stdout --gif Generate GIF to stdout - --raw Generate symbolized pprof data (useful with remote fetch) + --raw Generate symbolized jeprof data (useful with remote fetch) Heap-Profile Options: --inuse_space Display in-use (mega)bytes [default] @@ -236,34 +237,34 @@ Miscellaneous: --version Version information Environment Variables: - PPROF_TMPDIR Profiles directory. Defaults to \$HOME/pprof - PPROF_TOOLS Prefix for object tools pathnames + JEPROF_TMPDIR Profiles directory. Defaults to \$HOME/jeprof + JEPROF_TOOLS Prefix for object tools pathnames Examples: -pprof /bin/ls ls.prof +jeprof /bin/ls ls.prof Enters "interactive" mode -pprof --text /bin/ls ls.prof +jeprof --text /bin/ls ls.prof Outputs one line per procedure -pprof --web /bin/ls ls.prof +jeprof --web /bin/ls ls.prof Displays annotated call-graph in web browser -pprof --gv /bin/ls ls.prof +jeprof --gv /bin/ls ls.prof Displays annotated call-graph via 'gv' -pprof --gv --focus=Mutex /bin/ls ls.prof +jeprof --gv --focus=Mutex /bin/ls ls.prof Restricts to code paths including a .*Mutex.* entry -pprof --gv --focus=Mutex --ignore=string /bin/ls ls.prof +jeprof --gv --focus=Mutex --ignore=string /bin/ls ls.prof Code paths including Mutex but not string -pprof --list=getdir /bin/ls ls.prof +jeprof --list=getdir /bin/ls ls.prof (Per-line) annotated source listing for getdir() -pprof --disasm=getdir /bin/ls ls.prof +jeprof --disasm=getdir /bin/ls ls.prof (Per-PC) annotated disassembly for getdir() -pprof http://localhost:1234/ +jeprof http://localhost:1234/ Enters "interactive" mode -pprof --text localhost:1234 +jeprof --text localhost:1234 Outputs one line per procedure for localhost:1234 -pprof --raw localhost:1234 > ./local.raw -pprof --text ./local.raw +jeprof --raw localhost:1234 > ./local.raw +jeprof --text ./local.raw Fetches a remote profile for later analysis and then analyzes it in text mode. EOF @@ -271,7 +272,8 @@ EOF sub version_string { return <readline('(pprof) '))) { + my $term = new Term::ReadLine 'jeprof'; + while ( defined ($_ = $term->readline('(jeprof) '))) { $term->addhistory($_) if /\S/; if (!InteractiveCommand($orig_profile, $symbols, $libs, $total, $_)) { last; # exit when we get an interactive command to quit @@ -817,7 +819,7 @@ sub InteractiveMode { } } else { # don't have readline while (1) { - print STDERR "(pprof) "; + print STDERR "(jeprof) "; $_ = ; last if ! defined $_ ; s/\r//g; # turn windows-looking lines into unix-looking lines @@ -1010,7 +1012,7 @@ sub ProcessProfile { sub InteractiveHelpMessage { print STDERR <