diff --git a/.appveyor.yml b/.appveyor.yml new file mode 100644 index 00000000..ddd5c571 --- /dev/null +++ b/.appveyor.yml @@ -0,0 +1,28 @@ +version: '{build}' + +environment: + matrix: + - MSYSTEM: MINGW64 + CPU: x86_64 + MSVC: amd64 + - MSYSTEM: MINGW32 + CPU: i686 + MSVC: x86 + - MSYSTEM: MINGW64 + CPU: x86_64 + - MSYSTEM: MINGW32 + CPU: i686 + +install: + - set PATH=c:\msys64\%MSYSTEM%\bin;c:\msys64\usr\bin;%PATH% + - if defined MSVC call "c:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" %MSVC% + - if defined MSVC pacman --noconfirm -Rsc mingw-w64-%CPU%-gcc gcc + - pacman --noconfirm -Suy mingw-w64-%CPU%-make + +build_script: + - bash -c "autoconf" + - bash -c "./configure" + - mingw32-make -j3 + - file lib/jemalloc.dll + - mingw32-make -j3 tests + - mingw32-make -k check diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 00000000..1fed4f8e --- /dev/null +++ b/.travis.yml @@ -0,0 +1,29 @@ +language: c + +matrix: + include: + - os: linux + compiler: gcc + - os: linux + compiler: gcc + env: + - EXTRA_FLAGS=-m32 + addons: + apt: + packages: + - gcc-multilib + - os: osx + compiler: clang + - os: osx + compiler: clang + env: + - EXTRA_FLAGS=-m32 + +before_script: + - autoconf + - ./configure${EXTRA_FLAGS:+ CC="$CC $EXTRA_FLAGS"} + - make -j3 + - make -j3 tests + +script: + - make check diff --git a/ChangeLog b/ChangeLog index ed62e0e7..ac2e4d3c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -4,6 +4,49 @@ brevity. Much more detail can be found in the git revision history: https://github.com/jemalloc/jemalloc +* 4.3.0 (November 4, 2016) + + This is the first release that passes the test suite for multiple Windows + configurations, thanks in large part to @glandium setting up continuous + integration via AppVeyor (and Travis CI for Linux and OS X). + + New features: + - Add "J" (JSON) support to malloc_stats_print(). (@jasone) + - Add Cray compiler support. (@ronawho) + + Optimizations: + - Add/use adaptive spinning for bootstrapping and radix tree node + initialization. (@jasone) + + Bug fixes: + - Fix large allocation to search starting in the optimal size class heap, + which can substantially reduce virtual memory churn and fragmentation. This + regression was first released in 4.0.0. (@mjp41, @jasone) + - Fix stats.arenas..nthreads accounting. (@interwq) + - Fix and simplify decay-based purging. (@jasone) + - Make DSS (sbrk(2)-related) operations lockless, which resolves potential + deadlocks during thread exit. (@jasone) + - Fix over-sized allocation of radix tree leaf nodes. (@mjp41, @ogaun, + @jasone) + - Fix EXTRA_CFLAGS to not affect configuration. (@jasone) + - Fix a Valgrind integration bug. (@ronawho) + - Disallow 0x5a junk filling when running in Valgrind. (@jasone) + - Fix a file descriptor leak on Linux. This regression was first released in + 4.2.0. (@vsarunas, @jasone) + - Fix static linking of jemalloc with glibc. (@djwatson) + - Use syscall(2) rather than {open,read,close}(2) during boot on Linux. This + works around other libraries' system call wrappers performing reentrant + allocation. (@kspinka, @Whissi, @jasone) + - Fix OS X default zone replacement to work with OS X 10.12. (@glandium, + @jasone) + - Fix cached memory management to avoid needless commit/decommit operations + during purging, which resolves permanent virtual memory map fragmentation + issues on Windows. (@mjp41, @jasone) + - Fix TSD fetches to avoid (recursive) allocation. This is relevant to + non-TLS and Windows configurations. (@jasone) + - Fix malloc_conf overriding to work on Windows. (@jasone) + - Forcibly disable lazy-lock on Windows (was forcibly *enabled*). (@jasone) + * 4.2.1 (June 8, 2016) Bug fixes: @@ -19,7 +62,7 @@ brevity. Much more detail can be found in the git revision history: New features: - Add the arena..reset mallctl, which makes it possible to discard all of - an arena's allocations in a single operation. (@jasone@) + an arena's allocations in a single operation. (@jasone) - Add the stats.retained and stats.arenas..retained statistics. (@jasone) - Add the --with-version configure option. (@jasone) - Support --with-lg-page values larger than actual page size. (@jasone) diff --git a/Makefile.in b/Makefile.in index 652f01f2..d13c7f10 100644 --- a/Makefile.in +++ b/Makefile.in @@ -24,7 +24,8 @@ abs_objroot := @abs_objroot@ # Build parameters. CPPFLAGS := @CPPFLAGS@ -I$(srcroot)include -I$(objroot)include -CFLAGS := @CFLAGS@ +EXTRA_CFLAGS := @EXTRA_CFLAGS@ +CFLAGS := @CFLAGS@ $(EXTRA_CFLAGS) LDFLAGS := @LDFLAGS@ EXTRA_LDFLAGS := @EXTRA_LDFLAGS@ LIBS := @LIBS@ @@ -52,15 +53,19 @@ enable_prof := @enable_prof@ enable_valgrind := @enable_valgrind@ enable_zone_allocator := @enable_zone_allocator@ MALLOC_CONF := @JEMALLOC_CPREFIX@MALLOC_CONF +link_whole_archive := @link_whole_archive@ DSO_LDFLAGS = @DSO_LDFLAGS@ SOREV = @SOREV@ PIC_CFLAGS = @PIC_CFLAGS@ CTARGET = @CTARGET@ LDTARGET = @LDTARGET@ +TEST_LD_MODE = @TEST_LD_MODE@ MKLIB = @MKLIB@ AR = @AR@ ARFLAGS = @ARFLAGS@ CC_MM = @CC_MM@ +LM := @LM@ +INSTALL = @INSTALL@ ifeq (macho, $(ABI)) TEST_LIBRARY_PATH := DYLD_FALLBACK_LIBRARY_PATH="$(objroot)lib" @@ -99,6 +104,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \ $(srcroot)src/quarantine.c \ $(srcroot)src/rtree.c \ $(srcroot)src/stats.c \ + $(srcroot)src/spin.c \ $(srcroot)src/tcache.c \ $(srcroot)src/ticker.c \ $(srcroot)src/tsd.c \ @@ -122,6 +128,11 @@ DSOS := $(objroot)lib/$(LIBJEMALLOC).$(SOREV) ifneq ($(SOREV),$(SO)) DSOS += $(objroot)lib/$(LIBJEMALLOC).$(SO) endif +ifeq (1, $(link_whole_archive)) +LJEMALLOC := -Wl,--whole-archive -L$(objroot)lib -l$(LIBJEMALLOC) -Wl,--no-whole-archive +else +LJEMALLOC := $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) +endif PC := $(objroot)jemalloc.pc MAN3 := $(objroot)doc/jemalloc$(install_suffix).3 DOCS_XML := $(objroot)doc/jemalloc$(install_suffix).xml @@ -133,7 +144,11 @@ C_TESTLIB_SRCS := $(srcroot)test/src/btalloc.c $(srcroot)test/src/btalloc_0.c \ $(srcroot)test/src/mtx.c $(srcroot)test/src/mq.c \ $(srcroot)test/src/SFMT.c $(srcroot)test/src/test.c \ $(srcroot)test/src/thd.c $(srcroot)test/src/timer.c +ifeq (1, $(link_whole_archive)) +C_UTIL_INTEGRATION_SRCS := +else C_UTIL_INTEGRATION_SRCS := $(srcroot)src/nstime.c $(srcroot)src/util.c +endif TESTS_UNIT := \ $(srcroot)test/unit/a0.c \ $(srcroot)test/unit/arena_reset.c \ @@ -295,69 +310,69 @@ $(STATIC_LIBS): $(objroot)test/unit/%$(EXE): $(objroot)test/unit/%.$(O) $(TESTS_UNIT_LINK_OBJS) $(C_JET_OBJS) $(C_TESTLIB_UNIT_OBJS) @mkdir -p $(@D) - $(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(EXTRA_LDFLAGS) + $(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LDFLAGS) $(filter-out -lm,$(LIBS)) $(LM) $(EXTRA_LDFLAGS) $(objroot)test/integration/%$(EXE): $(objroot)test/integration/%.$(O) $(C_TESTLIB_INTEGRATION_OBJS) $(C_UTIL_INTEGRATION_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) @mkdir -p $(@D) - $(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(filter -lpthread,$(LIBS))) -lm $(EXTRA_LDFLAGS) + $(CC) $(TEST_LD_MODE) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LJEMALLOC) $(LDFLAGS) $(filter-out -lm,$(filter -lrt -lpthread,$(LIBS))) $(LM) $(EXTRA_LDFLAGS) $(objroot)test/stress/%$(EXE): $(objroot)test/stress/%.$(O) $(C_JET_OBJS) $(C_TESTLIB_STRESS_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) @mkdir -p $(@D) - $(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(EXTRA_LDFLAGS) + $(CC) $(TEST_LD_MODE) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(LIBS)) $(LM) $(EXTRA_LDFLAGS) build_lib_shared: $(DSOS) build_lib_static: $(STATIC_LIBS) build_lib: build_lib_shared build_lib_static install_bin: - install -d $(BINDIR) + $(INSTALL) -d $(BINDIR) @for b in $(BINS); do \ - echo "install -m 755 $$b $(BINDIR)"; \ - install -m 755 $$b $(BINDIR); \ + echo "$(INSTALL) -m 755 $$b $(BINDIR)"; \ + $(INSTALL) -m 755 $$b $(BINDIR); \ done install_include: - install -d $(INCLUDEDIR)/jemalloc + $(INSTALL) -d $(INCLUDEDIR)/jemalloc @for h in $(C_HDRS); do \ - echo "install -m 644 $$h $(INCLUDEDIR)/jemalloc"; \ - install -m 644 $$h $(INCLUDEDIR)/jemalloc; \ + echo "$(INSTALL) -m 644 $$h $(INCLUDEDIR)/jemalloc"; \ + $(INSTALL) -m 644 $$h $(INCLUDEDIR)/jemalloc; \ done install_lib_shared: $(DSOS) - install -d $(LIBDIR) - install -m 755 $(objroot)lib/$(LIBJEMALLOC).$(SOREV) $(LIBDIR) + $(INSTALL) -d $(LIBDIR) + $(INSTALL) -m 755 $(objroot)lib/$(LIBJEMALLOC).$(SOREV) $(LIBDIR) ifneq ($(SOREV),$(SO)) ln -sf $(LIBJEMALLOC).$(SOREV) $(LIBDIR)/$(LIBJEMALLOC).$(SO) endif install_lib_static: $(STATIC_LIBS) - install -d $(LIBDIR) + $(INSTALL) -d $(LIBDIR) @for l in $(STATIC_LIBS); do \ - echo "install -m 755 $$l $(LIBDIR)"; \ - install -m 755 $$l $(LIBDIR); \ + echo "$(INSTALL) -m 755 $$l $(LIBDIR)"; \ + $(INSTALL) -m 755 $$l $(LIBDIR); \ done install_lib_pc: $(PC) - install -d $(LIBDIR)/pkgconfig + $(INSTALL) -d $(LIBDIR)/pkgconfig @for l in $(PC); do \ - echo "install -m 644 $$l $(LIBDIR)/pkgconfig"; \ - install -m 644 $$l $(LIBDIR)/pkgconfig; \ + echo "$(INSTALL) -m 644 $$l $(LIBDIR)/pkgconfig"; \ + $(INSTALL) -m 644 $$l $(LIBDIR)/pkgconfig; \ done install_lib: install_lib_shared install_lib_static install_lib_pc install_doc_html: - install -d $(DATADIR)/doc/jemalloc$(install_suffix) + $(INSTALL) -d $(DATADIR)/doc/jemalloc$(install_suffix) @for d in $(DOCS_HTML); do \ - echo "install -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix)"; \ - install -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix); \ + echo "$(INSTALL) -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix)"; \ + $(INSTALL) -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix); \ done install_doc_man: - install -d $(MANDIR)/man3 + $(INSTALL) -d $(MANDIR)/man3 @for d in $(DOCS_MAN3); do \ - echo "install -m 644 $$d $(MANDIR)/man3"; \ - install -m 644 $$d $(MANDIR)/man3; \ + echo "$(INSTALL) -m 644 $$d $(MANDIR)/man3"; \ + $(INSTALL) -m 644 $$d $(MANDIR)/man3; \ done install_doc: install_doc_html install_doc_man diff --git a/README b/README index 9b268f42..5ff24a9e 100644 --- a/README +++ b/README @@ -17,4 +17,4 @@ jemalloc. The ChangeLog file contains a brief summary of changes for each release. -URL: http://www.canonware.com/jemalloc/ +URL: http://jemalloc.net/ diff --git a/configure.ac b/configure.ac index 7f19715d..104fd994 100644 --- a/configure.ac +++ b/configure.ac @@ -118,6 +118,7 @@ dnl If CFLAGS isn't defined, set CFLAGS to something reasonable. Otherwise, dnl just prevent autoconf from molesting CFLAGS. CFLAGS=$CFLAGS AC_PROG_CC + if test "x$GCC" != "xyes" ; then AC_CACHE_CHECK([whether compiler is MSVC], [je_cv_msvc], @@ -131,12 +132,54 @@ if test "x$GCC" != "xyes" ; then [je_cv_msvc=no])]) fi +dnl check if a cray prgenv wrapper compiler is being used +je_cv_cray_prgenv_wrapper="" +if test "x${PE_ENV}" != "x" ; then + case "${CC}" in + CC|cc) + je_cv_cray_prgenv_wrapper="yes" + ;; + *) + ;; + esac +fi + +AC_CACHE_CHECK([whether compiler is cray], + [je_cv_cray], + [AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], + [ +#ifndef _CRAYC + int fail[-1]; +#endif +])], + [je_cv_cray=yes], + [je_cv_cray=no])]) + +if test "x${je_cv_cray}" = "xyes" ; then + AC_CACHE_CHECK([whether cray compiler version is 8.4], + [je_cv_cray_84], + [AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], + [ +#if !(_RELEASE_MAJOR == 8 && _RELEASE_MINOR == 4) + int fail[-1]; +#endif +])], + [je_cv_cray_84=yes], + [je_cv_cray_84=no])]) +fi + if test "x$CFLAGS" = "x" ; then no_CFLAGS="yes" if test "x$GCC" = "xyes" ; then - JE_CFLAGS_APPEND([-std=gnu99]) - if test "x$je_cv_cflags_appended" = "x-std=gnu99" ; then +dnl JE_CFLAGS_APPEND([-std=gnu99]) + JE_CFLAGS_APPEND([-std=gnu11]) + if test "x$je_cv_cflags_appended" = "x-std=gnu11" ; then AC_DEFINE_UNQUOTED([JEMALLOC_HAS_RESTRICT]) + else + JE_CFLAGS_APPEND([-std=gnu99]) + if test "x$je_cv_cflags_appended" = "x-std=gnu99" ; then + AC_DEFINE_UNQUOTED([JEMALLOC_HAS_RESTRICT]) + fi fi JE_CFLAGS_APPEND([-Wall]) JE_CFLAGS_APPEND([-Werror=declaration-after-statement]) @@ -152,11 +195,21 @@ if test "x$CFLAGS" = "x" ; then JE_CFLAGS_APPEND([-FS]) CPPFLAGS="$CPPFLAGS -I${srcdir}/include/msvc_compat" fi + if test "x$je_cv_cray" = "xyes" ; then + dnl cray compiler 8.4 has an inlining bug + if test "x$je_cv_cray_84" = "xyes" ; then + JE_CFLAGS_APPEND([-hipa2]) + JE_CFLAGS_APPEND([-hnognu]) + fi + if test "x$enable_cc_silence" != "xno" ; then + dnl ignore unreachable code warning + JE_CFLAGS_APPEND([-hnomessage=128]) + dnl ignore redefinition of "malloc", "free", etc warning + JE_CFLAGS_APPEND([-hnomessage=1357]) + fi + fi fi -dnl Append EXTRA_CFLAGS to CFLAGS, if defined. -if test "x$EXTRA_CFLAGS" != "x" ; then - JE_CFLAGS_APPEND([$EXTRA_CFLAGS]) -fi +AC_SUBST([EXTRA_CFLAGS]) AC_PROG_CPP AC_C_BIGENDIAN([ac_cv_big_endian=1], [ac_cv_big_endian=0]) @@ -263,17 +316,27 @@ o="$ac_objext" a="a" exe="$ac_exeext" libprefix="lib" +link_whole_archive="0" DSO_LDFLAGS='-shared -Wl,-soname,$(@F)' RPATH='-Wl,-rpath,$(1)' SOREV="${so}.${rev}" PIC_CFLAGS='-fPIC -DPIC' CTARGET='-o $@' LDTARGET='-o $@' +TEST_LD_MODE= EXTRA_LDFLAGS= ARFLAGS='crus' AROUT=' $@' CC_MM=1 +if test "x$je_cv_cray_prgenv_wrapper" = "xyes" ; then + TEST_LD_MODE='-dynamic' +fi + +if test "x${je_cv_cray}" = "xyes" ; then + CC_MM= +fi + AN_MAKEVAR([AR], [AC_PROG_AR]) AN_PROGRAM([ar], [AC_PROG_AR]) AC_DEFUN([AC_PROG_AR], [AC_CHECK_TOOL(AR, ar, :)]) @@ -286,11 +349,11 @@ dnl dnl Define cpp macros in CPPFLAGS, rather than doing AC_DEFINE(macro), since the dnl definitions need to be seen before any headers are included, which is a pain dnl to make happen otherwise. +CFLAGS="$CFLAGS" default_munmap="1" maps_coalesce="1" case "${host}" in *-*-darwin* | *-*-ios*) - CFLAGS="$CFLAGS" abi="macho" AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ]) RPATH="" @@ -303,30 +366,26 @@ case "${host}" in sbrk_deprecated="1" ;; *-*-freebsd*) - CFLAGS="$CFLAGS" abi="elf" AC_DEFINE([JEMALLOC_SYSCTL_VM_OVERCOMMIT], [ ]) AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ]) force_lazy_lock="1" ;; *-*-dragonfly*) - CFLAGS="$CFLAGS" abi="elf" AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ]) ;; *-*-openbsd*) - CFLAGS="$CFLAGS" abi="elf" AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ]) force_tls="0" ;; *-*-bitrig*) - CFLAGS="$CFLAGS" abi="elf" AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ]) ;; *-*-linux*) - CFLAGS="$CFLAGS" + dnl syscall(2) and secure_getenv(3) are exposed by _GNU_SOURCE. CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE" abi="elf" AC_DEFINE([JEMALLOC_HAS_ALLOCA_H]) @@ -345,13 +404,12 @@ case "${host}" in #error aout #endif ]])], - [CFLAGS="$CFLAGS"; abi="elf"], + [abi="elf"], [abi="aout"]) AC_MSG_RESULT([$abi]) AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ]) ;; *-*-solaris2*) - CFLAGS="$CFLAGS" abi="elf" AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ]) RPATH='-Wl,-R,$(1)' @@ -372,7 +430,6 @@ case "${host}" in *-*-mingw* | *-*-cygwin*) abi="pecoff" force_tls="0" - force_lazy_lock="1" maps_coalesce="0" RPATH="" so="dll" @@ -389,6 +446,7 @@ case "${host}" in else importlib="${so}" DSO_LDFLAGS="-shared" + link_whole_archive="1" fi a="lib" libprefix="" @@ -426,17 +484,28 @@ AC_SUBST([o]) AC_SUBST([a]) AC_SUBST([exe]) AC_SUBST([libprefix]) +AC_SUBST([link_whole_archive]) AC_SUBST([DSO_LDFLAGS]) AC_SUBST([EXTRA_LDFLAGS]) AC_SUBST([SOREV]) AC_SUBST([PIC_CFLAGS]) AC_SUBST([CTARGET]) AC_SUBST([LDTARGET]) +AC_SUBST([TEST_LD_MODE]) AC_SUBST([MKLIB]) AC_SUBST([ARFLAGS]) AC_SUBST([AROUT]) AC_SUBST([CC_MM]) +dnl Determine whether libm must be linked to use e.g. log(3). +AC_SEARCH_LIBS([log], [m], , [AC_MSG_ERROR([Missing math functions])]) +if test "x$ac_cv_search_log" != "xnone required" ; then + LM="$ac_cv_search_log" +else + LM= +fi +AC_SUBST(LM) + JE_COMPILABLE([__attribute__ syntax], [static __attribute__((unused)) void foo(void){}], [], @@ -450,6 +519,7 @@ fi dnl Check for tls_model attribute support (clang 3.0 still lacks support). SAVED_CFLAGS="${CFLAGS}" JE_CFLAGS_APPEND([-Werror]) +JE_CFLAGS_APPEND([-herror_on_warning]) JE_COMPILABLE([tls_model attribute], [], [static __thread int __attribute__((tls_model("initial-exec"), unused)) foo; @@ -465,6 +535,7 @@ fi dnl Check for alloc_size attribute support. SAVED_CFLAGS="${CFLAGS}" JE_CFLAGS_APPEND([-Werror]) +JE_CFLAGS_APPEND([-herror_on_warning]) JE_COMPILABLE([alloc_size attribute], [#include ], [void *foo(size_t size) __attribute__((alloc_size(1)));], [je_cv_alloc_size]) @@ -475,6 +546,7 @@ fi dnl Check for format(gnu_printf, ...) attribute support. SAVED_CFLAGS="${CFLAGS}" JE_CFLAGS_APPEND([-Werror]) +JE_CFLAGS_APPEND([-herror_on_warning]) JE_COMPILABLE([format(gnu_printf, ...) attribute], [#include ], [void *foo(const char *format, ...) __attribute__((format(gnu_printf, 1, 2)));], [je_cv_format_gnu_printf]) @@ -485,6 +557,7 @@ fi dnl Check for format(printf, ...) attribute support. SAVED_CFLAGS="${CFLAGS}" JE_CFLAGS_APPEND([-Werror]) +JE_CFLAGS_APPEND([-herror_on_warning]) JE_COMPILABLE([format(printf, ...) attribute], [#include ], [void *foo(const char *format, ...) __attribute__((format(printf, 1, 2)));], [je_cv_format_printf]) @@ -879,9 +952,9 @@ fi AC_MSG_CHECKING([configured backtracing method]) AC_MSG_RESULT([$backtrace_method]) if test "x$enable_prof" = "x1" ; then - if test "x$abi" != "xpecoff"; then - dnl Heap profiling uses the log(3) function. - LIBS="$LIBS -lm" + dnl Heap profiling uses the log(3) function. + if test "x$LM" != "x" ; then + LIBS="$LIBS $LM" fi AC_DEFINE([JEMALLOC_PROF], [ ]) @@ -1050,6 +1123,23 @@ if test "x$enable_cache_oblivious" = "x1" ; then fi AC_SUBST([enable_cache_oblivious]) + + +JE_COMPILABLE([a program using __builtin_unreachable], [ +void foo (void) { + __builtin_unreachable(); +} +], [ + { + foo(); + } +], [je_cv_gcc_builtin_unreachable]) +if test "x${je_cv_gcc_builtin_unreachable}" = "xyes" ; then + AC_DEFINE([JEMALLOC_INTERNAL_UNREACHABLE], [__builtin_unreachable]) +else + AC_DEFINE([JEMALLOC_INTERNAL_UNREACHABLE], [abort]) +fi + dnl ============================================================================ dnl Check for __builtin_ffsl(), then ffsl(3), and fail if neither are found. dnl One of those two functions should (theoretically) exist on all platforms @@ -1244,6 +1334,74 @@ CPPFLAGS="$CPPFLAGS -D_REENTRANT" dnl Check whether clock_gettime(2) is in libc or librt. AC_SEARCH_LIBS([clock_gettime], [rt]) +dnl Cray wrapper compiler often adds `-lrt` when using `-static`. Check with +dnl `-dynamic` as well in case a user tries to dynamically link in jemalloc +if test "x$je_cv_cray_prgenv_wrapper" = "xyes" ; then + if test "$ac_cv_search_clock_gettime" != "-lrt"; then + SAVED_CFLAGS="${CFLAGS}" + + unset ac_cv_search_clock_gettime + JE_CFLAGS_APPEND([-dynamic]) + AC_SEARCH_LIBS([clock_gettime], [rt]) + + CFLAGS="${SAVED_CFLAGS}" + fi +fi + +dnl check for CLOCK_MONOTONIC_COARSE (Linux-specific). +JE_COMPILABLE([clock_gettime(CLOCK_MONOTONIC_COARSE, ...)], [ +#include +], [ + struct timespec ts; + + clock_gettime(CLOCK_MONOTONIC_COARSE, &ts); +], [je_cv_clock_monotonic_coarse]) +if test "x${je_cv_clock_monotonic_coarse}" = "xyes" ; then + AC_DEFINE([JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE]) +fi + +dnl check for CLOCK_MONOTONIC. +JE_COMPILABLE([clock_gettime(CLOCK_MONOTONIC, ...)], [ +#include +#include +], [ + struct timespec ts; + + clock_gettime(CLOCK_MONOTONIC, &ts); +#if !defined(_POSIX_MONOTONIC_CLOCK) || _POSIX_MONOTONIC_CLOCK < 0 +# error _POSIX_MONOTONIC_CLOCK missing/invalid +#endif +], [je_cv_clock_monotonic]) +if test "x${je_cv_clock_monotonic}" = "xyes" ; then + AC_DEFINE([JEMALLOC_HAVE_CLOCK_MONOTONIC]) +fi + +dnl Check for mach_absolute_time(). +JE_COMPILABLE([mach_absolute_time()], [ +#include +], [ + mach_absolute_time(); +], [je_cv_mach_absolute_time]) +if test "x${je_cv_mach_absolute_time}" = "xyes" ; then + AC_DEFINE([JEMALLOC_HAVE_MACH_ABSOLUTE_TIME]) +fi + +dnl Check if syscall(2) is usable. Treat warnings as errors, so that e.g. OS X +dnl 10.12's deprecation warning prevents use. +SAVED_CFLAGS="${CFLAGS}" +JE_CFLAGS_APPEND([-Werror]) +JE_COMPILABLE([syscall(2)], [ +#include +#include +], [ + syscall(SYS_write, 2, "hello", 5); +], + [je_cv_syscall]) +CFLAGS="${SAVED_CFLAGS}" +if test "x$je_cv_syscall" = "xyes" ; then + AC_DEFINE([JEMALLOC_HAVE_SYSCALL], [ ]) +fi + dnl Check if the GNU-specific secure_getenv function exists. AC_CHECK_FUNC([secure_getenv], [have_secure_getenv="1"], @@ -1298,9 +1456,17 @@ fi ], [enable_lazy_lock=""] ) -if test "x$enable_lazy_lock" = "x" -a "x${force_lazy_lock}" = "x1" ; then - AC_MSG_RESULT([Forcing lazy-lock to avoid allocator/threading bootstrap issues]) - enable_lazy_lock="1" +if test "x${enable_lazy_lock}" = "x" ; then + if test "x${force_lazy_lock}" = "x1" ; then + AC_MSG_RESULT([Forcing lazy-lock to avoid allocator/threading bootstrap issues]) + enable_lazy_lock="1" + else + enable_lazy_lock="0" + fi +fi +if test "x${enable_lazy_lock}" = "x1" -a "x${abi}" = "xpecoff" ; then + AC_MSG_RESULT([Forcing no lazy-lock because thread creation monitoring is unimplemented]) + enable_lazy_lock="0" fi if test "x$enable_lazy_lock" = "x1" ; then if test "x$abi" != "xpecoff" ; then @@ -1311,8 +1477,6 @@ if test "x$enable_lazy_lock" = "x1" ; then ]) fi AC_DEFINE([JEMALLOC_LAZY_LOCK], [ ]) -else - enable_lazy_lock="0" fi AC_SUBST([enable_lazy_lock]) @@ -1500,6 +1664,20 @@ if test "x${je_cv_builtin_clz}" = "xyes" ; then AC_DEFINE([JEMALLOC_HAVE_BUILTIN_CLZ], [ ]) fi +dnl ============================================================================ +dnl Check for os_unfair_lock operations as provided on Darwin. + +JE_COMPILABLE([Darwin os_unfair_lock_*()], [ +#include +], [ + os_unfair_lock lock = OS_UNFAIR_LOCK_INIT; + os_unfair_lock_lock(&lock); + os_unfair_lock_unlock(&lock); +], [je_cv_os_unfair_lock]) +if test "x${je_cv_os_unfair_lock}" = "xyes" ; then + AC_DEFINE([JEMALLOC_OS_UNFAIR_LOCK], [ ]) +fi + dnl ============================================================================ dnl Check for spinlock(3) operations as provided on Darwin. @@ -1744,6 +1922,7 @@ AC_MSG_RESULT([]) AC_MSG_RESULT([CONFIG : ${CONFIG}]) AC_MSG_RESULT([CC : ${CC}]) AC_MSG_RESULT([CFLAGS : ${CFLAGS}]) +AC_MSG_RESULT([EXTRA_CFLAGS : ${EXTRA_CFLAGS}]) AC_MSG_RESULT([CPPFLAGS : ${CPPFLAGS}]) AC_MSG_RESULT([LDFLAGS : ${LDFLAGS}]) AC_MSG_RESULT([EXTRA_LDFLAGS : ${EXTRA_LDFLAGS}]) diff --git a/doc/html.xsl.in b/doc/html.xsl.in index a91d9746..ec4fa655 100644 --- a/doc/html.xsl.in +++ b/doc/html.xsl.in @@ -1,4 +1,5 @@ + diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index c4a44e3c..3d2e721d 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -52,7 +52,7 @@ LIBRARY This manual describes jemalloc @jemalloc_version@. More information can be found at the jemalloc website. + url="http://jemalloc.net/">jemalloc website. SYNOPSIS @@ -180,20 +180,20 @@ Standard API - The malloc function allocates + The malloc() function allocates size bytes of uninitialized memory. The allocated space is suitably aligned (after possible pointer coercion) for storage of any type of object. - The calloc function allocates + The calloc() function allocates space for number objects, each size bytes in length. The result is identical to - calling malloc with an argument of + calling malloc() with an argument of number * size, with the exception that the allocated memory is explicitly initialized to zero bytes. - The posix_memalign function + The posix_memalign() function allocates size bytes of memory such that the allocation's base address is a multiple of alignment, and returns the allocation in the value @@ -201,7 +201,7 @@ alignment must be a power of 2 at least as large as sizeof(void *). - The aligned_alloc function + The aligned_alloc() function allocates size bytes of memory such that the allocation's base address is a multiple of alignment. The requested @@ -209,7 +209,7 @@ undefined if size is not an integral multiple of alignment. - The realloc function changes the + The realloc() function changes the size of the previously allocated memory referenced by ptr to size bytes. The contents of the memory are unchanged up to the lesser of the new and old @@ -217,26 +217,26 @@ portion of the memory are undefined. Upon success, the memory referenced by ptr is freed and a pointer to the newly allocated memory is returned. Note that - realloc may move the memory allocation, + realloc() may move the memory allocation, resulting in a different return value than ptr. If ptr is NULL, the - realloc function behaves identically to - malloc for the specified size. + realloc() function behaves identically to + malloc() for the specified size. - The free function causes the + The free() function causes the allocated memory referenced by ptr to be made available for future allocations. If ptr is NULL, no action occurs. Non-standard API - The mallocx, - rallocx, - xallocx, - sallocx, - dallocx, - sdallocx, and - nallocx functions all have a + The mallocx(), + rallocx(), + xallocx(), + sallocx(), + dallocx(), + sdallocx(), and + nallocx() functions all have a flags argument that can be used to specify options. The functions only check the options that are contextually relevant. Use bitwise or (|) operations to @@ -307,19 +307,19 @@ - The mallocx function allocates at + The mallocx() function allocates at least size bytes of memory, and returns a pointer to the base address of the allocation. Behavior is undefined if size is 0. - The rallocx function resizes the + The rallocx() function resizes the allocation at ptr to be at least size bytes, and returns a pointer to the base address of the resulting allocation, which may or may not have moved from its original location. Behavior is undefined if size is 0. - The xallocx function resizes the + The xallocx() function resizes the allocation at ptr in place to be at least size bytes, and returns the real size of the allocation. If extra is non-zero, an attempt is @@ -332,32 +332,32 @@ language="C">(size + extra > SIZE_T_MAX). - The sallocx function returns the + The sallocx() function returns the real size of the allocation at ptr. - The dallocx function causes the + The dallocx() function causes the memory referenced by ptr to be made available for future allocations. - The sdallocx function is an - extension of dallocx with a + The sdallocx() function is an + extension of dallocx() with a size parameter to allow the caller to pass in the allocation size as an optimization. The minimum valid input size is the original requested size of the allocation, and the maximum valid input size is the corresponding value returned by - nallocx or - sallocx. + nallocx() or + sallocx(). - The nallocx function allocates no + The nallocx() function allocates no memory, but it performs the same size computation as the - mallocx function, and returns the real + mallocx() function, and returns the real size of the allocation that would result from the equivalent - mallocx function call, or + mallocx() function call, or 0 if the inputs exceed the maximum supported size class and/or alignment. Behavior is undefined if size is 0. - The mallctl function provides a + The mallctl() function provides a general interface for introspecting the memory allocator, as well as setting modifiable parameters and triggering actions. The period-separated name argument specifies a @@ -372,12 +372,12 @@ newlen; otherwise pass NULL and 0. - The mallctlnametomib function + The mallctlnametomib() function provides a way to avoid repeated name lookups for applications that repeatedly query the same portion of the namespace, by translating a name - to a “Management Information Base” (MIB) that can be passed - repeatedly to mallctlbymib. Upon - successful return from mallctlnametomib, + to a Management Information Base (MIB) that can be passed + repeatedly to mallctlbymib(). Upon + successful return from mallctlnametomib(), mibp contains an array of *miblenp integers, where *miblenp is the lesser of the number of components @@ -410,39 +410,40 @@ for (i = 0; i < nbins; i++) { /* Do something with bin_size... */ }]]> - The malloc_stats_print function - writes human-readable summary statistics via the - write_cb callback function pointer and - cbopaque data passed to - write_cb, or - malloc_message if - write_cb is NULL. This - function can be called repeatedly. General information that never - changes during execution can be omitted by specifying "g" as a character + The malloc_stats_print() function writes + summary statistics via the write_cb callback + function pointer and cbopaque data passed to + write_cb, or malloc_message() + if write_cb is NULL. The + statistics are presented in human-readable form unless J is + specified as a character within the opts string, in + which case the statistics are presented in JSON format. This function can be + called repeatedly. General information that never changes during + execution can be omitted by specifying g as a character within the opts string. Note that - malloc_message uses the - mallctl* functions internally, so - inconsistent statistics can be reported if multiple threads use these - functions simultaneously. If is - specified during configuration, “m” and “a” can - be specified to omit merged arena and per arena statistics, respectively; - “b”, “l”, and “h” can be specified to - omit per size class statistics for bins, large objects, and huge objects, - respectively. Unrecognized characters are silently ignored. Note that - thread caching may prevent some statistics from being completely up to - date, since extra locking would be required to merge counters that track - thread cache operations. - + malloc_message() uses the + mallctl*() functions internally, so inconsistent + statistics can be reported if multiple threads use these functions + simultaneously. If is specified during + configuration, m and a can be specified to + omit merged arena and per arena statistics, respectively; + b, l, and h can be specified + to omit per size class statistics for bins, large objects, and huge + objects, respectively. Unrecognized characters are silently ignored. + Note that thread caching may prevent some statistics from being completely + up to date, since extra locking would be required to merge counters that + track thread cache operations. - The malloc_usable_size function + The malloc_usable_size() function returns the usable size of the allocation pointed to by ptr. The return value may be larger than the size that was requested during allocation. The - malloc_usable_size function is not a - mechanism for in-place realloc; rather + malloc_usable_size() function is not a + mechanism for in-place realloc(); rather it is provided solely as a tool for introspection purposes. Any discrepancy between the requested allocation size and the size reported - by malloc_usable_size should not be + by malloc_usable_size() should not be depended on, since such behavior is entirely implementation-dependent. @@ -455,12 +456,12 @@ for (i = 0; i < nbins; i++) { The string specified via , the string pointed to by the global variable malloc_conf, the - “name” of the file referenced by the symbolic link named + name of the file referenced by the symbolic link named /etc/malloc.conf, and the value of the environment variable MALLOC_CONF, will be interpreted, in that order, from left to right as options. Note that malloc_conf may be read before - main is entered, so the declaration of + main() is entered, so the declaration of malloc_conf should specify an initializer that contains the final value to be read by jemalloc. and malloc_conf are compile-time mechanisms, whereas @@ -549,14 +550,14 @@ for (i = 0; i < nbins; i++) { nearest multiple of the cacheline size, or specify cacheline alignment when allocating. - The realloc, - rallocx, and - xallocx functions may resize allocations + The realloc(), + rallocx(), and + xallocx() functions may resize allocations without moving them under limited circumstances. Unlike the - *allocx API, the standard API does not + *allocx() API, the standard API does not officially round up the usable size of an allocation to the nearest size class, so technically it is necessary to call - realloc to grow e.g. a 9-byte allocation to + realloc() to grow e.g. a 9-byte allocation to 16 bytes, or shrink a 16-byte allocation to 9 bytes. Growth and shrinkage trivially succeeds in place as long as the pre-size and post-size both round up to the same size class. No other API guarantees are made regarding @@ -702,7 +703,7 @@ for (i = 0; i < nbins; i++) { MALLCTL NAMESPACE The following names are defined in the namespace accessible via the - mallctl* functions. Value types are + mallctl*() functions. Value types are specified in parentheses, their readable/writable statuses are encoded as rw, r-, -w, or --, and required build configuration flags follow, if @@ -733,7 +734,7 @@ for (i = 0; i < nbins; i++) { rw If a value is passed in, refresh the data from which - the mallctl* functions report values, + the mallctl*() functions report values, and increment the epoch. Return the current epoch. This is useful for detecting whether another thread caused a refresh. @@ -917,12 +918,12 @@ for (i = 0; i < nbins; i++) { settings are supported if sbrk 2 is supported by the operating - system: “disabled”, “primary”, and - “secondary”; otherwise only “disabled” is - supported. The default is “secondary” if + system: disabled, primary, and + secondary; otherwise only disabled is + supported. The default is secondary if sbrk 2 is supported by the operating - system; “disabled” otherwise. + system; disabled otherwise. @@ -1013,19 +1014,19 @@ for (i = 0; i < nbins; i++) { r- Enable/disable statistics printing at exit. If - enabled, the malloc_stats_print + enabled, the malloc_stats_print() function is called at program exit via an atexit 3 function. If is specified during configuration, this has the potential to cause deadlock for a multi-threaded process that exits while one or more threads are executing in the memory allocation - functions. Furthermore, atexit may + functions. Furthermore, atexit() may allocate memory during application initialization and then deadlock internally when jemalloc in turn calls - atexit, so this option is not + atexit(), so this option is not universally usable (though the application can register its own - atexit function with equivalent + atexit() function with equivalent functionality). Therefore, this option should only be used with care; it is primarily intended as a performance tuning aid during application development. This option is disabled by default. @@ -1038,15 +1039,16 @@ for (i = 0; i < nbins; i++) { r- [] - Junk filling. If set to "alloc", each byte of - uninitialized allocated memory will be initialized to - 0xa5. If set to "free", all deallocated memory will - be initialized to 0x5a. If set to "true", both - allocated and deallocated memory will be initialized, and if set to - "false", junk filling be disabled entirely. This is intended for - debugging and will impact performance negatively. This option is - "false" by default unless is specified - during configuration, in which case it is "true" by default unless + Junk filling. If set to alloc, each byte + of uninitialized allocated memory will be initialized to + 0xa5. If set to free, all deallocated + memory will be initialized to 0x5a. If set to + true, both allocated and deallocated memory will be + initialized, and if set to false, junk filling be + disabled entirely. This is intended for debugging and will impact + performance negatively. This option is false by default + unless is specified during + configuration, in which case it is true by default unless running inside Valgrind. @@ -1101,8 +1103,8 @@ for (i = 0; i < nbins; i++) { Zero filling enabled/disabled. If enabled, each byte of uninitialized allocated memory will be initialized to 0. Note that this initialization only happens once for each byte, so - realloc and - rallocx calls do not zero memory that + realloc() and + rallocx() calls do not zero memory that was previously allocated. This is intended for debugging and will impact performance negatively. This option is disabled by default. @@ -1325,11 +1327,11 @@ malloc_conf = "xmalloc:true";]]> <prefix>.<pid>.<seq>.f.heap, where <prefix> is controlled by the opt.prof_prefix - option. Note that atexit may allocate + option. Note that atexit() may allocate memory during application initialization and then deadlock internally - when jemalloc in turn calls atexit, so + when jemalloc in turn calls atexit(), so this option is not universally usable (though the application can - register its own atexit function with + register its own atexit() function with equivalent functionality). This option is disabled by default. @@ -1388,7 +1390,7 @@ malloc_conf = "xmalloc:true";]]> thread.allocated mallctl. This is useful for avoiding the overhead of repeated - mallctl* calls. + mallctl*() calls. @@ -1415,7 +1417,7 @@ malloc_conf = "xmalloc:true";]]> thread.deallocated mallctl. This is useful for avoiding the overhead of repeated - mallctl* calls. + mallctl*() calls. @@ -2734,7 +2736,7 @@ MAPPED_LIBRARIES: of run-time assertions that catch application errors such as double-free, write-after-free, etc. - Programs often accidentally depend on “uninitialized” + Programs often accidentally depend on uninitialized memory actually being filled with zero bytes. Junk filling (see the opt.junk option) tends to expose such bugs in the form of obviously incorrect @@ -2763,29 +2765,29 @@ MAPPED_LIBRARIES: to override the function which emits the text strings forming the errors and warnings if for some reason the STDERR_FILENO file descriptor is not suitable for this. - malloc_message takes the + malloc_message() takes the cbopaque pointer argument that is NULL unless overridden by the arguments in a call to - malloc_stats_print, followed by a string + malloc_stats_print(), followed by a string pointer. Please note that doing anything which tries to allocate memory in this function is likely to result in a crash or deadlock. All messages are prefixed by - “<jemalloc>: ”. + <jemalloc>: . RETURN VALUES Standard API - The malloc and - calloc functions return a pointer to the + The malloc() and + calloc() functions return a pointer to the allocated memory if successful; otherwise a NULL pointer is returned and errno is set to ENOMEM. - The posix_memalign function + The posix_memalign() function returns the value 0 if successful; otherwise it returns an error value. - The posix_memalign function will fail + The posix_memalign() function will fail if: @@ -2804,11 +2806,11 @@ MAPPED_LIBRARIES: - The aligned_alloc function returns + The aligned_alloc() function returns a pointer to the allocated memory if successful; otherwise a NULL pointer is returned and errno is set. The - aligned_alloc function will fail if: + aligned_alloc() function will fail if: EINVAL @@ -2825,44 +2827,44 @@ MAPPED_LIBRARIES: - The realloc function returns a + The realloc() function returns a pointer, possibly identical to ptr, to the allocated memory if successful; otherwise a NULL pointer is returned, and errno is set to ENOMEM if the error was the result of an - allocation failure. The realloc + allocation failure. The realloc() function always leaves the original buffer intact when an error occurs. - The free function returns no + The free() function returns no value. Non-standard API - The mallocx and - rallocx functions return a pointer to + The mallocx() and + rallocx() functions return a pointer to the allocated memory if successful; otherwise a NULL pointer is returned to indicate insufficient contiguous memory was available to service the allocation request. - The xallocx function returns the + The xallocx() function returns the real size of the resulting resized allocation pointed to by ptr, which is a value less than size if the allocation could not be adequately grown in place. - The sallocx function returns the + The sallocx() function returns the real size of the allocation pointed to by ptr. - The nallocx returns the real size + The nallocx() returns the real size that would result from a successful equivalent - mallocx function call, or zero if + mallocx() function call, or zero if insufficient memory is available to perform the size computation. - The mallctl, - mallctlnametomib, and - mallctlbymib functions return 0 on + The mallctl(), + mallctlnametomib(), and + mallctlbymib() functions return 0 on success; otherwise they return an error value. The functions will fail if: @@ -2898,13 +2900,13 @@ MAPPED_LIBRARIES: EFAULT An interface with side effects failed in some way - not directly related to mallctl* + not directly related to mallctl*() read/write processing. - The malloc_usable_size function + The malloc_usable_size() function returns the usable size of the allocation pointed to by ptr. @@ -2952,13 +2954,13 @@ malloc_conf = "lg_chunk:24";]]> STANDARDS - The malloc, - calloc, - realloc, and - free functions conform to ISO/IEC - 9899:1990 (“ISO C90”). + The malloc(), + calloc(), + realloc(), and + free() functions conform to ISO/IEC + 9899:1990 (ISO C90). - The posix_memalign function conforms - to IEEE Std 1003.1-2001 (“POSIX.1”). + The posix_memalign() function conforms + to IEEE Std 1003.1-2001 (POSIX.1). diff --git a/doc/stylesheet.xsl b/doc/stylesheet.xsl index 4e334a86..619365d8 100644 --- a/doc/stylesheet.xsl +++ b/doc/stylesheet.xsl @@ -1,7 +1,10 @@ ansi - + + + + - "" + diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index b1de2b61..1277d080 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -42,6 +42,7 @@ typedef struct arena_chunk_map_bits_s arena_chunk_map_bits_t; typedef struct arena_chunk_map_misc_s arena_chunk_map_misc_t; typedef struct arena_chunk_s arena_chunk_t; typedef struct arena_bin_info_s arena_bin_info_t; +typedef struct arena_decay_s arena_decay_t; typedef struct arena_bin_s arena_bin_t; typedef struct arena_s arena_t; typedef struct arena_tdata_s arena_tdata_t; @@ -257,6 +258,49 @@ struct arena_bin_info_s { uint32_t reg0_offset; }; +struct arena_decay_s { + /* + * Approximate time in seconds from the creation of a set of unused + * dirty pages until an equivalent set of unused dirty pages is purged + * and/or reused. + */ + ssize_t time; + /* time / SMOOTHSTEP_NSTEPS. */ + nstime_t interval; + /* + * Time at which the current decay interval logically started. We do + * not actually advance to a new epoch until sometime after it starts + * because of scheduling and computation delays, and it is even possible + * to completely skip epochs. In all cases, during epoch advancement we + * merge all relevant activity into the most recently recorded epoch. + */ + nstime_t epoch; + /* Deadline randomness generator. */ + uint64_t jitter_state; + /* + * Deadline for current epoch. This is the sum of interval and per + * epoch jitter which is a uniform random variable in [0..interval). + * Epochs always advance by precise multiples of interval, but we + * randomize the deadline to reduce the likelihood of arenas purging in + * lockstep. + */ + nstime_t deadline; + /* + * Number of dirty pages at beginning of current epoch. During epoch + * advancement we use the delta between arena->decay.ndirty and + * arena->ndirty to determine how many dirty pages, if any, were + * generated. + */ + size_t ndirty; + /* + * Trailing log of how many unused dirty pages were generated during + * each of the past SMOOTHSTEP_NSTEPS decay epochs, where the last + * element is the most recent epoch. Corresponding epoch times are + * relative to epoch. + */ + size_t backlog[SMOOTHSTEP_NSTEPS]; +}; + struct arena_bin_s { /* * All operations on runcur, runs, and stats require that lock be @@ -394,52 +438,8 @@ struct arena_s { arena_runs_dirty_link_t runs_dirty; extent_node_t chunks_cache; - /* - * Approximate time in seconds from the creation of a set of unused - * dirty pages until an equivalent set of unused dirty pages is purged - * and/or reused. - */ - ssize_t decay_time; - /* decay_time / SMOOTHSTEP_NSTEPS. */ - nstime_t decay_interval; - /* - * Time at which the current decay interval logically started. We do - * not actually advance to a new epoch until sometime after it starts - * because of scheduling and computation delays, and it is even possible - * to completely skip epochs. In all cases, during epoch advancement we - * merge all relevant activity into the most recently recorded epoch. - */ - nstime_t decay_epoch; - /* decay_deadline randomness generator. */ - uint64_t decay_jitter_state; - /* - * Deadline for current epoch. This is the sum of decay_interval and - * per epoch jitter which is a uniform random variable in - * [0..decay_interval). Epochs always advance by precise multiples of - * decay_interval, but we randomize the deadline to reduce the - * likelihood of arenas purging in lockstep. - */ - nstime_t decay_deadline; - /* - * Number of dirty pages at beginning of current epoch. During epoch - * advancement we use the delta between decay_ndirty and ndirty to - * determine how many dirty pages, if any, were generated, and record - * the result in decay_backlog. - */ - size_t decay_ndirty; - /* - * Memoized result of arena_decay_backlog_npages_limit() corresponding - * to the current contents of decay_backlog, i.e. the limit on how many - * pages are allowed to exist for the decay epochs. - */ - size_t decay_backlog_npages_limit; - /* - * Trailing log of how many unused dirty pages were generated during - * each of the past SMOOTHSTEP_NSTEPS decay epochs, where the last - * element is the most recent epoch. Corresponding epoch times are - * relative to decay_epoch. - */ - size_t decay_backlog[SMOOTHSTEP_NSTEPS]; + /* Decay-based purging state. */ + arena_decay_t decay; /* Extant huge allocations. */ ql_head(extent_node_t) huge; @@ -470,10 +470,12 @@ struct arena_s { arena_bin_t bins[NBINS]; /* - * Quantized address-ordered heaps of this arena's available runs. The - * heaps are used for first-best-fit run allocation. + * Size-segregated address-ordered heaps of this arena's available runs, + * used for first-best-fit run allocation. Runs are quantized, i.e. + * they reside in the last heap which corresponds to a size class less + * than or equal to the run size. */ - arena_run_heap_t runs_avail[1]; /* Dynamically sized. */ + arena_run_heap_t runs_avail[NPSIZES]; }; /* Used in conjunction with tsd for fast arena-related context lookup. */ @@ -505,7 +507,6 @@ extern size_t map_bias; /* Number of arena chunk header pages. */ extern size_t map_misc_offset; extern size_t arena_maxrun; /* Max run size for arenas. */ extern size_t large_maxclass; /* Max large size class. */ -extern size_t run_quantize_max; /* Max run_quantize_*() input. */ extern unsigned nlclasses; /* Number of large size classes. */ extern unsigned nhclasses; /* Number of huge size classes. */ @@ -601,7 +602,7 @@ unsigned arena_nthreads_get(arena_t *arena, bool internal); void arena_nthreads_inc(arena_t *arena, bool internal); void arena_nthreads_dec(arena_t *arena, bool internal); arena_t *arena_new(tsdn_t *tsdn, unsigned ind); -bool arena_boot(void); +void arena_boot(void); void arena_prefork0(tsdn_t *tsdn, arena_t *arena); void arena_prefork1(tsdn_t *tsdn, arena_t *arena); void arena_prefork2(tsdn_t *tsdn, arena_t *arena); diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h index 3f15ea14..3936f68b 100644 --- a/include/jemalloc/internal/atomic.h +++ b/include/jemalloc/internal/atomic.h @@ -66,8 +66,7 @@ void atomic_write_u(unsigned *p, unsigned x); #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ATOMIC_C_)) /******************************************************************************/ /* 64-bit operations. */ -#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3) -# if (defined(__amd64__) || defined(__x86_64__)) +#if (defined(__amd64__) || defined(__x86_64__)) JEMALLOC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x) { @@ -125,7 +124,7 @@ atomic_write_uint64(uint64_t *p, uint64_t x) : "memory" /* Clobbers. */ ); } -# elif (defined(JEMALLOC_C11ATOMICS)) +#elif (defined(JEMALLOC_C11ATOMICS)) JEMALLOC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x) { @@ -153,7 +152,7 @@ atomic_write_uint64(uint64_t *p, uint64_t x) volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p; atomic_store(a, x); } -# elif (defined(JEMALLOC_ATOMIC9)) +#elif (defined(JEMALLOC_ATOMIC9)) JEMALLOC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x) { @@ -193,7 +192,7 @@ atomic_write_uint64(uint64_t *p, uint64_t x) atomic_store_rel_long(p, x); } -# elif (defined(JEMALLOC_OSATOMIC)) +#elif (defined(JEMALLOC_OSATOMIC)) JEMALLOC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x) { @@ -225,7 +224,7 @@ atomic_write_uint64(uint64_t *p, uint64_t x) o = atomic_read_uint64(p); } while (atomic_cas_uint64(p, o, x)); } -# elif (defined(_MSC_VER)) +#elif (defined(_MSC_VER)) JEMALLOC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x) { @@ -255,7 +254,7 @@ atomic_write_uint64(uint64_t *p, uint64_t x) InterlockedExchange64(p, x); } -# elif (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8) || \ +#elif (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8) || \ defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8)) JEMALLOC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x) @@ -284,9 +283,8 @@ atomic_write_uint64(uint64_t *p, uint64_t x) __sync_lock_test_and_set(p, x); } -# else -# error "Missing implementation for 64-bit atomic operations" -# endif +#else +# error "Missing implementation for 64-bit atomic operations" #endif /******************************************************************************/ diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h index c9fd4ecb..38c9a012 100644 --- a/include/jemalloc/internal/chunk.h +++ b/include/jemalloc/internal/chunk.h @@ -58,7 +58,7 @@ void chunk_deregister(const void *chunk, const extent_node_t *node); void *chunk_alloc_base(size_t size); void *chunk_alloc_cache(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, void *new_addr, size_t size, size_t alignment, - bool *zero, bool dalloc_node); + bool *zero, bool *commit, bool dalloc_node); void *chunk_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit); @@ -71,9 +71,6 @@ bool chunk_purge_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, void *chunk, size_t size, size_t offset, size_t length); bool chunk_boot(void); -void chunk_prefork(tsdn_t *tsdn); -void chunk_postfork_parent(tsdn_t *tsdn); -void chunk_postfork_child(tsdn_t *tsdn); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ diff --git a/include/jemalloc/internal/chunk_dss.h b/include/jemalloc/internal/chunk_dss.h index 724fa579..da8511ba 100644 --- a/include/jemalloc/internal/chunk_dss.h +++ b/include/jemalloc/internal/chunk_dss.h @@ -21,15 +21,13 @@ extern const char *dss_prec_names[]; /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -dss_prec_t chunk_dss_prec_get(tsdn_t *tsdn); -bool chunk_dss_prec_set(tsdn_t *tsdn, dss_prec_t dss_prec); +dss_prec_t chunk_dss_prec_get(void); +bool chunk_dss_prec_set(dss_prec_t dss_prec); void *chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit); -bool chunk_in_dss(tsdn_t *tsdn, void *chunk); -bool chunk_dss_boot(void); -void chunk_dss_prefork(tsdn_t *tsdn); -void chunk_dss_postfork_parent(tsdn_t *tsdn); -void chunk_dss_postfork_child(tsdn_t *tsdn); +bool chunk_in_dss(void *chunk); +bool chunk_dss_mergeable(void *chunk_a, void *chunk_b); +void chunk_dss_boot(void); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ diff --git a/include/jemalloc/internal/ckh.h b/include/jemalloc/internal/ckh.h index 46e151cd..f75ad90b 100644 --- a/include/jemalloc/internal/ckh.h +++ b/include/jemalloc/internal/ckh.h @@ -64,13 +64,13 @@ struct ckh_s { /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -bool ckh_new(tsdn_t *tsdn, ckh_t *ckh, size_t minitems, ckh_hash_t *hash, +bool ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp); -void ckh_delete(tsdn_t *tsdn, ckh_t *ckh); +void ckh_delete(tsd_t *tsd, ckh_t *ckh); size_t ckh_count(ckh_t *ckh); bool ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data); -bool ckh_insert(tsdn_t *tsdn, ckh_t *ckh, const void *key, const void *data); -bool ckh_remove(tsdn_t *tsdn, ckh_t *ckh, const void *searchkey, void **key, +bool ckh_insert(tsd_t *tsd, ckh_t *ckh, const void *key, const void *data); +bool ckh_remove(tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key, void **data); bool ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data); void ckh_string_hash(const void *key, size_t r_hash[2]); diff --git a/include/jemalloc/internal/huge.h b/include/jemalloc/internal/huge.h index b5fa9e63..22184d9b 100644 --- a/include/jemalloc/internal/huge.h +++ b/include/jemalloc/internal/huge.h @@ -17,7 +17,7 @@ bool huge_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, void *huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t usize, size_t alignment, bool zero, tcache_t *tcache); #ifdef JEMALLOC_JET -typedef void (huge_dalloc_junk_t)(tsdn_t *, void *, size_t); +typedef void (huge_dalloc_junk_t)(void *, size_t); extern huge_dalloc_junk_t *huge_dalloc_junk; #endif void huge_dalloc(tsdn_t *tsdn, void *ptr); diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 8f82edd4..fdc8fef9 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -162,7 +162,9 @@ static const bool config_cache_oblivious = #endif #include "jemalloc/internal/ph.h" +#ifndef __PGI #define RB_COMPACT +#endif #include "jemalloc/internal/rb.h" #include "jemalloc/internal/qr.h" #include "jemalloc/internal/ql.h" @@ -185,6 +187,9 @@ static const bool config_cache_oblivious = #include "jemalloc/internal/jemalloc_internal_macros.h" +/* Page size index type. */ +typedef unsigned pszind_t; + /* Size class index type. */ typedef unsigned szind_t; @@ -234,7 +239,7 @@ typedef unsigned szind_t; # ifdef __alpha__ # define LG_QUANTUM 4 # endif -# if (defined(__sparc64__) || defined(__sparcv9)) +# if (defined(__sparc64__) || defined(__sparcv9) || defined(__sparc_v9__)) # define LG_QUANTUM 4 # endif # if (defined(__amd64__) || defined(__x86_64__) || defined(_M_X64)) @@ -364,6 +369,7 @@ typedef unsigned szind_t; #include "jemalloc/internal/valgrind.h" #include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/spin.h" #include "jemalloc/internal/prng.h" #include "jemalloc/internal/ticker.h" #include "jemalloc/internal/ckh.h" @@ -396,6 +402,7 @@ typedef unsigned szind_t; #include "jemalloc/internal/valgrind.h" #include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/spin.h" #include "jemalloc/internal/prng.h" #include "jemalloc/internal/ticker.h" #include "jemalloc/internal/ckh.h" @@ -455,11 +462,16 @@ extern unsigned narenas_auto; */ extern arena_t **arenas; +/* + * pind2sz_tab encodes the same information as could be computed by + * pind2sz_compute(). + */ +extern size_t const pind2sz_tab[NPSIZES]; /* * index2size_tab encodes the same information as could be computed (at * unacceptable cost in some code paths) by index2size_compute(). */ -extern size_t const index2size_tab[NSIZES+1]; +extern size_t const index2size_tab[NSIZES]; /* * size2index_tab is a compact lookup table that rounds request sizes up to * size classes. In order to reduce cache footprint, the table is compressed, @@ -467,6 +479,7 @@ extern size_t const index2size_tab[NSIZES+1]; */ extern uint8_t const size2index_tab[]; +arena_t *a0get(void); void *a0malloc(size_t size); void a0dalloc(void *ptr); void *bootstrap_malloc(size_t size); @@ -492,6 +505,7 @@ void jemalloc_postfork_child(void); #include "jemalloc/internal/valgrind.h" #include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/spin.h" #include "jemalloc/internal/prng.h" #include "jemalloc/internal/ticker.h" #include "jemalloc/internal/ckh.h" @@ -524,6 +538,7 @@ void jemalloc_postfork_child(void); #include "jemalloc/internal/valgrind.h" #include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/spin.h" #include "jemalloc/internal/prng.h" #include "jemalloc/internal/ticker.h" #include "jemalloc/internal/ckh.h" @@ -543,6 +558,11 @@ void jemalloc_postfork_child(void); #include "jemalloc/internal/huge.h" #ifndef JEMALLOC_ENABLE_INLINE +pszind_t psz2ind(size_t psz); +size_t pind2sz_compute(pszind_t pind); +size_t pind2sz_lookup(pszind_t pind); +size_t pind2sz(pszind_t pind); +size_t psz2u(size_t psz); szind_t size2index_compute(size_t size); szind_t size2index_lookup(size_t size); szind_t size2index(size_t size); @@ -555,7 +575,7 @@ size_t s2u(size_t size); size_t sa2u(size_t size, size_t alignment); arena_t *arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal); arena_t *arena_choose(tsd_t *tsd, arena_t *arena); -arena_t *arena_ichoose(tsdn_t *tsdn, arena_t *arena); +arena_t *arena_ichoose(tsd_t *tsd, arena_t *arena); arena_tdata_t *arena_tdata_get(tsd_t *tsd, unsigned ind, bool refresh_if_missing); arena_t *arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing); @@ -563,10 +583,90 @@ ticker_t *decay_ticker_get(tsd_t *tsd, unsigned ind); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) +JEMALLOC_INLINE pszind_t +psz2ind(size_t psz) +{ + + if (unlikely(psz > HUGE_MAXCLASS)) + return (NPSIZES); + { + pszind_t x = lg_floor((psz<<1)-1); + pszind_t shift = (x < LG_SIZE_CLASS_GROUP + LG_PAGE) ? 0 : x - + (LG_SIZE_CLASS_GROUP + LG_PAGE); + pszind_t grp = shift << LG_SIZE_CLASS_GROUP; + + pszind_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_PAGE + 1) ? + LG_PAGE : x - LG_SIZE_CLASS_GROUP - 1; + + size_t delta_inverse_mask = ZI(-1) << lg_delta; + pszind_t mod = ((((psz-1) & delta_inverse_mask) >> lg_delta)) & + ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1); + + pszind_t ind = grp + mod; + return (ind); + } +} + +JEMALLOC_INLINE size_t +pind2sz_compute(pszind_t pind) +{ + + { + size_t grp = pind >> LG_SIZE_CLASS_GROUP; + size_t mod = pind & ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1); + + size_t grp_size_mask = ~((!!grp)-1); + size_t grp_size = ((ZU(1) << (LG_PAGE + + (LG_SIZE_CLASS_GROUP-1))) << grp) & grp_size_mask; + + size_t shift = (grp == 0) ? 1 : grp; + size_t lg_delta = shift + (LG_PAGE-1); + size_t mod_size = (mod+1) << lg_delta; + + size_t sz = grp_size + mod_size; + return (sz); + } +} + +JEMALLOC_INLINE size_t +pind2sz_lookup(pszind_t pind) +{ + size_t ret = (size_t)pind2sz_tab[pind]; + assert(ret == pind2sz_compute(pind)); + return (ret); +} + +JEMALLOC_INLINE size_t +pind2sz(pszind_t pind) +{ + + assert(pind < NPSIZES); + return (pind2sz_lookup(pind)); +} + +JEMALLOC_INLINE size_t +psz2u(size_t psz) +{ + + if (unlikely(psz > HUGE_MAXCLASS)) + return (0); + { + size_t x = lg_floor((psz<<1)-1); + size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_PAGE + 1) ? + LG_PAGE : x - LG_SIZE_CLASS_GROUP - 1; + size_t delta = ZU(1) << lg_delta; + size_t delta_mask = delta - 1; + size_t usize = (psz + delta_mask) & ~delta_mask; + return (usize); + } +} + JEMALLOC_INLINE szind_t size2index_compute(size_t size) { + if (unlikely(size > HUGE_MAXCLASS)) + return (NSIZES); #if (NTBINS != 0) if (size <= (ZU(1) << LG_TINY_MAXCLASS)) { szind_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1; @@ -575,9 +675,7 @@ size2index_compute(size_t size) } #endif { - szind_t x = unlikely(ZI(size) < 0) ? ((size<<1) ? - (ZU(1)<<(LG_SIZEOF_PTR+3)) : ((ZU(1)<<(LG_SIZEOF_PTR+3))-1)) - : lg_floor((size<<1)-1); + szind_t x = lg_floor((size<<1)-1); szind_t shift = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM) ? 0 : x - (LG_SIZE_CLASS_GROUP + LG_QUANTUM); szind_t grp = shift << LG_SIZE_CLASS_GROUP; @@ -663,6 +761,8 @@ JEMALLOC_ALWAYS_INLINE size_t s2u_compute(size_t size) { + if (unlikely(size > HUGE_MAXCLASS)) + return (0); #if (NTBINS > 0) if (size <= (ZU(1) << LG_TINY_MAXCLASS)) { size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1; @@ -672,9 +772,7 @@ s2u_compute(size_t size) } #endif { - size_t x = unlikely(ZI(size) < 0) ? ((size<<1) ? - (ZU(1)<<(LG_SIZEOF_PTR+3)) : ((ZU(1)<<(LG_SIZEOF_PTR+3))-1)) - : lg_floor((size<<1)-1); + size_t x = lg_floor((size<<1)-1); size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1) ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1; size_t delta = ZU(1) << lg_delta; @@ -815,14 +913,10 @@ arena_choose(tsd_t *tsd, arena_t *arena) } JEMALLOC_INLINE arena_t * -arena_ichoose(tsdn_t *tsdn, arena_t *arena) +arena_ichoose(tsd_t *tsd, arena_t *arena) { - assert(!tsdn_null(tsdn) || arena != NULL); - - if (!tsdn_null(tsdn)) - return (arena_choose_impl(tsdn_tsd(tsdn), NULL, true)); - return (arena); + return (arena_choose_impl(tsd, arena, true)); } JEMALLOC_INLINE arena_tdata_t * diff --git a/include/jemalloc/internal/jemalloc_internal_decls.h b/include/jemalloc/internal/jemalloc_internal_decls.h index 2b8ca5d0..c907d910 100644 --- a/include/jemalloc/internal/jemalloc_internal_decls.h +++ b/include/jemalloc/internal/jemalloc_internal_decls.h @@ -17,8 +17,18 @@ # include # endif # include +# ifdef JEMALLOC_OS_UNFAIR_LOCK +# include +# endif +# ifdef JEMALLOC_GLIBC_MALLOC_HOOK +# include +# endif # include # include +# include +# ifdef JEMALLOC_HAVE_MACH_ABSOLUTE_TIME +# include +# endif #endif #include diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in index 7de0cf7c..9b3dca50 100644 --- a/include/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in @@ -60,12 +60,20 @@ */ #undef JEMALLOC_HAVE_MADVISE +/* + * Defined if os_unfair_lock_*() functions are available, as provided by Darwin. + */ +#undef JEMALLOC_OS_UNFAIR_LOCK + /* * Defined if OSSpin*() functions are available, as provided by Darwin, and * documented in the spinlock(3) manual page. */ #undef JEMALLOC_OSSPIN +/* Defined if syscall(2) is available. */ +#undef JEMALLOC_HAVE_SYSCALL + /* * Defined if secure_getenv(3) is available. */ @@ -76,6 +84,21 @@ */ #undef JEMALLOC_HAVE_ISSETUGID +/* + * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available. + */ +#undef JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE + +/* + * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available. + */ +#undef JEMALLOC_HAVE_CLOCK_MONOTONIC + +/* + * Defined if mach_absolute_time() is available. + */ +#undef JEMALLOC_HAVE_MACH_ABSOLUTE_TIME + /* * Defined if _malloc_thread_cleanup() exists. At least in the case of * FreeBSD, pthread_key_create() allocates, which if used during malloc @@ -188,6 +211,12 @@ /* TLS is used to map arenas and magazine caches to threads. */ #undef JEMALLOC_TLS +/* + * Used to mark unreachable code to quiet "end of non-void" compiler warnings. + * Don't use this directly; instead use unreachable() from util.h + */ +#undef JEMALLOC_INTERNAL_UNREACHABLE + /* * ffs*() functions to use for bitmapping. Don't use these directly; instead, * use ffs_*() from util.h. diff --git a/include/jemalloc/internal/mb.h b/include/jemalloc/internal/mb.h index 437c86f7..5384728f 100644 --- a/include/jemalloc/internal/mb.h +++ b/include/jemalloc/internal/mb.h @@ -105,8 +105,8 @@ mb_write(void) malloc_mutex_t mtx; malloc_mutex_init(&mtx, "mb", WITNESS_RANK_OMIT); - malloc_mutex_lock(NULL, &mtx); - malloc_mutex_unlock(NULL, &mtx); + malloc_mutex_lock(TSDN_NULL, &mtx); + malloc_mutex_unlock(TSDN_NULL, &mtx); } #endif #endif diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h index 52217991..b442d2d4 100644 --- a/include/jemalloc/internal/mutex.h +++ b/include/jemalloc/internal/mutex.h @@ -5,6 +5,9 @@ typedef struct malloc_mutex_s malloc_mutex_t; #ifdef _WIN32 # define MALLOC_MUTEX_INITIALIZER +#elif (defined(JEMALLOC_OS_UNFAIR_LOCK)) +# define MALLOC_MUTEX_INITIALIZER \ + {OS_UNFAIR_LOCK_INIT, WITNESS_INITIALIZER(WITNESS_RANK_OMIT)} #elif (defined(JEMALLOC_OSSPIN)) # define MALLOC_MUTEX_INITIALIZER {0, WITNESS_INITIALIZER(WITNESS_RANK_OMIT)} #elif (defined(JEMALLOC_MUTEX_INIT_CB)) @@ -35,6 +38,8 @@ struct malloc_mutex_s { # else CRITICAL_SECTION lock; # endif +#elif (defined(JEMALLOC_OS_UNFAIR_LOCK)) + os_unfair_lock lock; #elif (defined(JEMALLOC_OSSPIN)) OSSpinLock lock; #elif (defined(JEMALLOC_MUTEX_INIT_CB)) @@ -88,6 +93,8 @@ malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex) # else EnterCriticalSection(&mutex->lock); # endif +#elif (defined(JEMALLOC_OS_UNFAIR_LOCK)) + os_unfair_lock_lock(&mutex->lock); #elif (defined(JEMALLOC_OSSPIN)) OSSpinLockLock(&mutex->lock); #else @@ -109,6 +116,8 @@ malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex) # else LeaveCriticalSection(&mutex->lock); # endif +#elif (defined(JEMALLOC_OS_UNFAIR_LOCK)) + os_unfair_lock_unlock(&mutex->lock); #elif (defined(JEMALLOC_OSSPIN)) OSSpinLockUnlock(&mutex->lock); #else diff --git a/include/jemalloc/internal/nstime.h b/include/jemalloc/internal/nstime.h index dc293b73..93b27dc8 100644 --- a/include/jemalloc/internal/nstime.h +++ b/include/jemalloc/internal/nstime.h @@ -1,9 +1,6 @@ /******************************************************************************/ #ifdef JEMALLOC_H_TYPES -#define JEMALLOC_CLOCK_GETTIME defined(_POSIX_MONOTONIC_CLOCK) \ - && _POSIX_MONOTONIC_CLOCK >= 0 - typedef struct nstime_s nstime_t; /* Maximum supported number of seconds (~584 years). */ @@ -34,9 +31,12 @@ void nstime_imultiply(nstime_t *time, uint64_t multiplier); void nstime_idivide(nstime_t *time, uint64_t divisor); uint64_t nstime_divide(const nstime_t *time, const nstime_t *divisor); #ifdef JEMALLOC_JET +typedef bool (nstime_monotonic_t)(void); +extern nstime_monotonic_t *nstime_monotonic; typedef bool (nstime_update_t)(nstime_t *); extern nstime_update_t *nstime_update; #else +bool nstime_monotonic(void); bool nstime_update(nstime_t *time); #endif diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index f2b6a55d..8972b37b 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -1,4 +1,5 @@ a0dalloc +a0get a0malloc arena_aalloc arena_alloc_junk_small @@ -167,20 +168,15 @@ chunk_dalloc_mmap chunk_dalloc_wrapper chunk_deregister chunk_dss_boot -chunk_dss_postfork_child -chunk_dss_postfork_parent +chunk_dss_mergeable chunk_dss_prec_get chunk_dss_prec_set -chunk_dss_prefork chunk_hooks_default chunk_hooks_get chunk_hooks_set chunk_in_dss chunk_lookup chunk_npages -chunk_postfork_child -chunk_postfork_parent -chunk_prefork chunk_purge_wrapper chunk_register chunks_rtree @@ -360,6 +356,7 @@ nstime_idivide nstime_imultiply nstime_init nstime_init2 +nstime_monotonic nstime_ns nstime_nsec nstime_sec @@ -401,6 +398,10 @@ pages_map pages_purge pages_trim pages_unmap +pind2sz +pind2sz_compute +pind2sz_lookup +pind2sz_tab pow2_ceil_u32 pow2_ceil_u64 pow2_ceil_zu @@ -454,12 +455,13 @@ prof_thread_active_init_set prof_thread_active_set prof_thread_name_get prof_thread_name_set +psz2ind +psz2u purge_mode_names quarantine quarantine_alloc_hook quarantine_alloc_hook_work quarantine_cleanup -register_zone rtree_child_read rtree_child_read_hard rtree_child_tryread @@ -477,7 +479,6 @@ rtree_val_read rtree_val_write run_quantize_ceil run_quantize_floor -run_quantize_max s2u s2u_compute s2u_lookup @@ -487,6 +488,8 @@ size2index size2index_compute size2index_lookup size2index_tab +spin_adaptive +spin_init stats_cactive stats_cactive_add stats_cactive_get @@ -545,7 +548,9 @@ tsd_booted_get tsd_cleanup tsd_cleanup_wrapper tsd_fetch +tsd_fetch_impl tsd_get +tsd_get_allocates tsd_iarena_get tsd_iarena_set tsd_iarenap_get @@ -604,9 +609,11 @@ witness_lock witness_lock_error witness_lockless_error witness_not_owner_error +witness_owner witness_owner_error witness_postfork_child witness_postfork_parent witness_prefork witness_unlock witnesses_cleanup +zone_register diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index 21dff5fb..8293b71e 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -299,9 +299,9 @@ extern prof_dump_header_t *prof_dump_header; void prof_idump(tsdn_t *tsdn); bool prof_mdump(tsd_t *tsd, const char *filename); void prof_gdump(tsdn_t *tsdn); -prof_tdata_t *prof_tdata_init(tsdn_t *tsdn); +prof_tdata_t *prof_tdata_init(tsd_t *tsd); prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata); -void prof_reset(tsdn_t *tsdn, size_t lg_sample); +void prof_reset(tsd_t *tsd, size_t lg_sample); void prof_tdata_cleanup(tsd_t *tsd); bool prof_active_get(tsdn_t *tsdn); bool prof_active_set(tsdn_t *tsdn, bool active); @@ -315,7 +315,7 @@ bool prof_gdump_get(tsdn_t *tsdn); bool prof_gdump_set(tsdn_t *tsdn, bool active); void prof_boot0(void); void prof_boot1(void); -bool prof_boot2(tsdn_t *tsdn); +bool prof_boot2(tsd_t *tsd); void prof_prefork0(tsdn_t *tsdn); void prof_prefork1(tsdn_t *tsdn); void prof_postfork_parent(tsdn_t *tsdn); @@ -384,7 +384,7 @@ prof_tdata_get(tsd_t *tsd, bool create) if (create) { if (unlikely(tdata == NULL)) { if (tsd_nominal(tsd)) { - tdata = prof_tdata_init(tsd_tsdn(tsd)); + tdata = prof_tdata_init(tsd); tsd_prof_tdata_set(tsd, tdata); } } else if (unlikely(tdata->expired)) { diff --git a/include/jemalloc/internal/size_classes.sh b/include/jemalloc/internal/size_classes.sh index 2b0ca29a..f6fbce4e 100755 --- a/include/jemalloc/internal/size_classes.sh +++ b/include/jemalloc/internal/size_classes.sh @@ -48,6 +48,21 @@ size_class() { lg_p=$5 lg_kmax=$6 + if [ ${lg_delta} -ge ${lg_p} ] ; then + psz="yes" + else + pow2 ${lg_p}; p=${pow2_result} + pow2 ${lg_grp}; grp=${pow2_result} + pow2 ${lg_delta}; delta=${pow2_result} + sz=$((${grp} + ${delta} * ${ndelta})) + npgs=$((${sz} / ${p})) + if [ ${sz} -eq $((${npgs} * ${p})) ] ; then + psz="yes" + else + psz="no" + fi + fi + lg ${ndelta}; lg_ndelta=${lg_result}; pow2 ${lg_ndelta} if [ ${pow2_result} -lt ${ndelta} ] ; then rem="yes" @@ -74,14 +89,15 @@ size_class() { else lg_delta_lookup="no" fi - printf ' SC(%3d, %6d, %8d, %6d, %3s, %2s) \\\n' ${index} ${lg_grp} ${lg_delta} ${ndelta} ${bin} ${lg_delta_lookup} + printf ' SC(%3d, %6d, %8d, %6d, %3s, %3s, %2s) \\\n' ${index} ${lg_grp} ${lg_delta} ${ndelta} ${psz} ${bin} ${lg_delta_lookup} # Defined upon return: - # - lg_delta_lookup (${lg_delta} or "no") + # - psz ("yes" or "no") # - bin ("yes" or "no") + # - lg_delta_lookup (${lg_delta} or "no") } sep_line() { - echo " \\" + echo " \\" } size_classes() { @@ -95,12 +111,13 @@ size_classes() { pow2 ${lg_g}; g=${pow2_result} echo "#define SIZE_CLASSES \\" - echo " /* index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup */ \\" + echo " /* index, lg_grp, lg_delta, ndelta, psz, bin, lg_delta_lookup */ \\" ntbins=0 nlbins=0 lg_tiny_maxclass='"NA"' nbins=0 + npsizes=0 # Tiny size classes. ndelta=0 @@ -112,6 +129,9 @@ size_classes() { if [ ${lg_delta_lookup} != "no" ] ; then nlbins=$((${index} + 1)) fi + if [ ${psz} = "yes" ] ; then + npsizes=$((${npsizes} + 1)) + fi if [ ${bin} != "no" ] ; then nbins=$((${index} + 1)) fi @@ -133,11 +153,17 @@ size_classes() { index=$((${index} + 1)) lg_grp=$((${lg_grp} + 1)) lg_delta=$((${lg_delta} + 1)) + if [ ${psz} = "yes" ] ; then + npsizes=$((${npsizes} + 1)) + fi fi while [ ${ndelta} -lt ${g} ] ; do size_class ${index} ${lg_grp} ${lg_delta} ${ndelta} ${lg_p} ${lg_kmax} index=$((${index} + 1)) ndelta=$((${ndelta} + 1)) + if [ ${psz} = "yes" ] ; then + npsizes=$((${npsizes} + 1)) + fi done # All remaining groups. @@ -157,6 +183,9 @@ size_classes() { # Final written value is correct: lookup_maxclass="((((size_t)1) << ${lg_grp}) + (((size_t)${ndelta}) << ${lg_delta}))" fi + if [ ${psz} = "yes" ] ; then + npsizes=$((${npsizes} + 1)) + fi if [ ${bin} != "no" ] ; then nbins=$((${index} + 1)) # Final written value is correct: @@ -183,6 +212,7 @@ size_classes() { # - nlbins # - nbins # - nsizes + # - npsizes # - lg_tiny_maxclass # - lookup_maxclass # - small_maxclass @@ -200,13 +230,13 @@ cat <iteration = 0; +} + +JEMALLOC_INLINE void +spin_adaptive(spin_t *spin) +{ + volatile uint64_t i; + + for (i = 0; i < (KQU(1) << spin->iteration); i++) + CPU_SPINWAIT; + + if (spin->iteration < 63) + spin->iteration++; +} + +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ + diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index 70883b1a..01ba062d 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -145,7 +145,7 @@ tcache_t *tcache_create(tsdn_t *tsdn, arena_t *arena); void tcache_cleanup(tsd_t *tsd); void tcache_enabled_cleanup(tsd_t *tsd); void tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena); -bool tcaches_create(tsdn_t *tsdn, unsigned *r_ind); +bool tcaches_create(tsd_t *tsd, unsigned *r_ind); void tcaches_flush(tsd_t *tsd, unsigned ind); void tcaches_destroy(tsd_t *tsd, unsigned ind); bool tcache_boot(tsdn_t *tsdn); diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h index bf113411..9055acaf 100644 --- a/include/jemalloc/internal/tsd.h +++ b/include/jemalloc/internal/tsd.h @@ -48,7 +48,7 @@ typedef enum { * * bool example_tsd_boot(void) {...} * bool example_tsd_booted_get(void) {...} - * example_t *example_tsd_get() {...} + * example_t *example_tsd_get(bool init) {...} * void example_tsd_set(example_t *val) {...} * * Note that all of the functions deal in terms of (a_type *) rather than @@ -105,7 +105,7 @@ a_name##tsd_boot(void); \ a_attr bool \ a_name##tsd_booted_get(void); \ a_attr a_type * \ -a_name##tsd_get(void); \ +a_name##tsd_get(bool init); \ a_attr void \ a_name##tsd_set(a_type *val); @@ -213,9 +213,15 @@ a_name##tsd_booted_get(void) \ \ return (a_name##tsd_booted); \ } \ +a_attr bool \ +a_name##tsd_get_allocates(void) \ +{ \ + \ + return (false); \ +} \ /* Get/set. */ \ a_attr a_type * \ -a_name##tsd_get(void) \ +a_name##tsd_get(bool init) \ { \ \ assert(a_name##tsd_booted); \ @@ -264,9 +270,15 @@ a_name##tsd_booted_get(void) \ \ return (a_name##tsd_booted); \ } \ +a_attr bool \ +a_name##tsd_get_allocates(void) \ +{ \ + \ + return (false); \ +} \ /* Get/set. */ \ a_attr a_type * \ -a_name##tsd_get(void) \ +a_name##tsd_get(bool init) \ { \ \ assert(a_name##tsd_booted); \ @@ -325,14 +337,14 @@ a_name##tsd_wrapper_set(a_name##tsd_wrapper_t *wrapper) \ } \ } \ a_attr a_name##tsd_wrapper_t * \ -a_name##tsd_wrapper_get(void) \ +a_name##tsd_wrapper_get(bool init) \ { \ DWORD error = GetLastError(); \ a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *) \ TlsGetValue(a_name##tsd_tsd); \ SetLastError(error); \ \ - if (unlikely(wrapper == NULL)) { \ + if (init && unlikely(wrapper == NULL)) { \ wrapper = (a_name##tsd_wrapper_t *) \ malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t)); \ if (wrapper == NULL) { \ @@ -392,14 +404,22 @@ a_name##tsd_booted_get(void) \ \ return (a_name##tsd_booted); \ } \ +a_attr bool \ +a_name##tsd_get_allocates(void) \ +{ \ + \ + return (true); \ +} \ /* Get/set. */ \ a_attr a_type * \ -a_name##tsd_get(void) \ +a_name##tsd_get(bool init) \ { \ a_name##tsd_wrapper_t *wrapper; \ \ assert(a_name##tsd_booted); \ - wrapper = a_name##tsd_wrapper_get(); \ + wrapper = a_name##tsd_wrapper_get(init); \ + if (a_name##tsd_get_allocates() && !init && wrapper == NULL) \ + return (NULL); \ return (&wrapper->val); \ } \ a_attr void \ @@ -408,7 +428,7 @@ a_name##tsd_set(a_type *val) \ a_name##tsd_wrapper_t *wrapper; \ \ assert(a_name##tsd_booted); \ - wrapper = a_name##tsd_wrapper_get(); \ + wrapper = a_name##tsd_wrapper_get(true); \ wrapper->val = *(val); \ if (a_cleanup != malloc_tsd_no_cleanup) \ wrapper->initialized = true; \ @@ -452,12 +472,12 @@ a_name##tsd_wrapper_set(a_name##tsd_wrapper_t *wrapper) \ } \ } \ a_attr a_name##tsd_wrapper_t * \ -a_name##tsd_wrapper_get(void) \ +a_name##tsd_wrapper_get(bool init) \ { \ a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *) \ pthread_getspecific(a_name##tsd_tsd); \ \ - if (unlikely(wrapper == NULL)) { \ + if (init && unlikely(wrapper == NULL)) { \ tsd_init_block_t block; \ wrapper = tsd_init_check_recursion( \ &a_name##tsd_init_head, &block); \ @@ -520,14 +540,22 @@ a_name##tsd_booted_get(void) \ \ return (a_name##tsd_booted); \ } \ +a_attr bool \ +a_name##tsd_get_allocates(void) \ +{ \ + \ + return (true); \ +} \ /* Get/set. */ \ a_attr a_type * \ -a_name##tsd_get(void) \ +a_name##tsd_get(bool init) \ { \ a_name##tsd_wrapper_t *wrapper; \ \ assert(a_name##tsd_booted); \ - wrapper = a_name##tsd_wrapper_get(); \ + wrapper = a_name##tsd_wrapper_get(init); \ + if (a_name##tsd_get_allocates() && !init && wrapper == NULL) \ + return (NULL); \ return (&wrapper->val); \ } \ a_attr void \ @@ -536,7 +564,7 @@ a_name##tsd_set(a_type *val) \ a_name##tsd_wrapper_t *wrapper; \ \ assert(a_name##tsd_booted); \ - wrapper = a_name##tsd_wrapper_get(); \ + wrapper = a_name##tsd_wrapper_get(true); \ wrapper->val = *(val); \ if (a_cleanup != malloc_tsd_no_cleanup) \ wrapper->initialized = true; \ @@ -639,6 +667,7 @@ void tsd_cleanup(void *arg); #ifndef JEMALLOC_ENABLE_INLINE malloc_tsd_protos(JEMALLOC_ATTR(unused), , tsd_t) +tsd_t *tsd_fetch_impl(bool init); tsd_t *tsd_fetch(void); tsdn_t *tsd_tsdn(tsd_t *tsd); bool tsd_nominal(tsd_t *tsd); @@ -658,9 +687,13 @@ malloc_tsd_externs(, tsd_t) malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, , tsd_t, tsd_initializer, tsd_cleanup) JEMALLOC_ALWAYS_INLINE tsd_t * -tsd_fetch(void) +tsd_fetch_impl(bool init) { - tsd_t *tsd = tsd_get(); + tsd_t *tsd = tsd_get(init); + + if (!init && tsd_get_allocates() && tsd == NULL) + return (NULL); + assert(tsd != NULL); if (unlikely(tsd->state != tsd_state_nominal)) { if (tsd->state == tsd_state_uninitialized) { @@ -677,6 +710,13 @@ tsd_fetch(void) return (tsd); } +JEMALLOC_ALWAYS_INLINE tsd_t * +tsd_fetch(void) +{ + + return (tsd_fetch_impl(true)); +} + JEMALLOC_ALWAYS_INLINE tsdn_t * tsd_tsdn(tsd_t *tsd) { @@ -723,7 +763,7 @@ tsdn_fetch(void) if (!tsd_booted_get()) return (NULL); - return (tsd_tsdn(tsd_fetch())); + return (tsd_tsdn(tsd_fetch_impl(false))); } JEMALLOC_ALWAYS_INLINE bool diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h index a0c2203d..aee00d6d 100644 --- a/include/jemalloc/internal/util.h +++ b/include/jemalloc/internal/util.h @@ -61,30 +61,20 @@ # define JEMALLOC_CC_SILENCE_INIT(v) #endif -#define JEMALLOC_GNUC_PREREQ(major, minor) \ - (!defined(__clang__) && \ - (__GNUC__ > (major) || (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))) -#ifndef __has_builtin -# define __has_builtin(builtin) (0) -#endif -#define JEMALLOC_CLANG_HAS_BUILTIN(builtin) \ - (defined(__clang__) && __has_builtin(builtin)) - #ifdef __GNUC__ # define likely(x) __builtin_expect(!!(x), 1) # define unlikely(x) __builtin_expect(!!(x), 0) -# if JEMALLOC_GNUC_PREREQ(4, 6) || \ - JEMALLOC_CLANG_HAS_BUILTIN(__builtin_unreachable) -# define unreachable() __builtin_unreachable() -# else -# define unreachable() abort() -# endif #else # define likely(x) !!(x) # define unlikely(x) !!(x) -# define unreachable() abort() #endif +#if !defined(JEMALLOC_INTERNAL_UNREACHABLE) +# error JEMALLOC_INTERNAL_UNREACHABLE should have been defined by configure +#endif + +#define unreachable() JEMALLOC_INTERNAL_UNREACHABLE() + #include "jemalloc/internal/assert.h" /* Use to assert a particular configuration, e.g., cassert(config_debug). */ diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h index d78dca2d..cdf15d79 100644 --- a/include/jemalloc/internal/witness.h +++ b/include/jemalloc/internal/witness.h @@ -108,6 +108,7 @@ void witness_postfork_child(tsd_t *tsd); #ifdef JEMALLOC_H_INLINES #ifndef JEMALLOC_ENABLE_INLINE +bool witness_owner(tsd_t *tsd, const witness_t *witness); void witness_assert_owner(tsdn_t *tsdn, const witness_t *witness); void witness_assert_not_owner(tsdn_t *tsdn, const witness_t *witness); void witness_assert_lockless(tsdn_t *tsdn); @@ -116,12 +117,25 @@ void witness_unlock(tsdn_t *tsdn, witness_t *witness); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MUTEX_C_)) +JEMALLOC_INLINE bool +witness_owner(tsd_t *tsd, const witness_t *witness) +{ + witness_list_t *witnesses; + witness_t *w; + + witnesses = tsd_witnessesp_get(tsd); + ql_foreach(w, witnesses, link) { + if (w == witness) + return (true); + } + + return (false); +} + JEMALLOC_INLINE void witness_assert_owner(tsdn_t *tsdn, const witness_t *witness) { tsd_t *tsd; - witness_list_t *witnesses; - witness_t *w; if (!config_debug) return; @@ -132,11 +146,8 @@ witness_assert_owner(tsdn_t *tsdn, const witness_t *witness) if (witness->rank == WITNESS_RANK_OMIT) return; - witnesses = tsd_witnessesp_get(tsd); - ql_foreach(w, witnesses, link) { - if (w == witness) - return; - } + if (witness_owner(tsd, witness)) + return; witness_owner_error(witness); } @@ -238,10 +249,16 @@ witness_unlock(tsdn_t *tsdn, witness_t *witness) if (witness->rank == WITNESS_RANK_OMIT) return; - witness_assert_owner(tsdn, witness); - - witnesses = tsd_witnessesp_get(tsd); - ql_remove(witnesses, witness, link); + /* + * Check whether owner before removal, rather than relying on + * witness_assert_owner() to abort, so that unit tests can test this + * function's failure mode without causing undefined behavior. + */ + if (witness_owner(tsd, witness)) { + witnesses = tsd_witnessesp_get(tsd); + ql_remove(witnesses, witness, link); + } else + witness_assert_owner(tsdn, witness); } #endif diff --git a/jemalloc.pc.in b/jemalloc.pc.in index 1a3ad9b3..a318e8dd 100644 --- a/jemalloc.pc.in +++ b/jemalloc.pc.in @@ -6,7 +6,7 @@ install_suffix=@install_suffix@ Name: jemalloc Description: A general purpose malloc(3) implementation that emphasizes fragmentation avoidance and scalable concurrency support. -URL: http://www.canonware.com/jemalloc +URL: http://jemalloc.net/ Version: @jemalloc_version@ Cflags: -I${includedir} Libs: -L${libdir} -ljemalloc${install_suffix} diff --git a/msvc/ReadMe.txt b/msvc/ReadMe.txt index 02b97f74..77d567da 100644 --- a/msvc/ReadMe.txt +++ b/msvc/ReadMe.txt @@ -17,7 +17,7 @@ How to build jemalloc for Windows (note: x86/x64 doesn't matter at this point) 5. Generate header files: - sh -c "./autogen.sh CC=cl --enable-lazy-lock=no" + sh -c "CC=cl ./autogen.sh" 6. Now the project can be opened and built in Visual Studio: msvc\jemalloc_vc2015.sln diff --git a/src/arena.c b/src/arena.c index ce62590b..43c3ccf2 100644 --- a/src/arena.c +++ b/src/arena.c @@ -21,15 +21,8 @@ size_t map_bias; size_t map_misc_offset; size_t arena_maxrun; /* Max run size for arenas. */ size_t large_maxclass; /* Max large size class. */ -size_t run_quantize_max; /* Max run_quantize_*() input. */ -static size_t small_maxrun; /* Max run size for small size classes. */ -static bool *small_run_tab; /* Valid small run page multiples. */ -static size_t *run_quantize_floor_tab; /* run_quantize_floor() memoization. */ -static size_t *run_quantize_ceil_tab; /* run_quantize_ceil() memoization. */ unsigned nlclasses; /* Number of large size classes. */ unsigned nhclasses; /* Number of huge size classes. */ -static szind_t runs_avail_bias; /* Size index for first runs_avail tree. */ -static szind_t runs_avail_nclasses; /* Number of runs_avail trees. */ /******************************************************************************/ /* @@ -37,6 +30,8 @@ static szind_t runs_avail_nclasses; /* Number of runs_avail trees. */ * definition. */ +static void arena_chunk_dalloc(tsdn_t *tsdn, arena_t *arena, + arena_chunk_t *chunk); static void arena_purge_to_limit(tsdn_t *tsdn, arena_t *arena, size_t ndirty_limit); static void arena_run_dalloc(tsdn_t *tsdn, arena_t *arena, arena_run_t *run, @@ -77,83 +72,6 @@ arena_run_addr_comp(const arena_chunk_map_misc_t *a, ph_gen(static UNUSED, arena_run_heap_, arena_run_heap_t, arena_chunk_map_misc_t, ph_link, arena_run_addr_comp) -static size_t -run_quantize_floor_compute(size_t size) -{ - size_t qsize; - - assert(size != 0); - assert(size == PAGE_CEILING(size)); - - /* Don't change sizes that are valid small run sizes. */ - if (size <= small_maxrun && small_run_tab[size >> LG_PAGE]) - return (size); - - /* - * Round down to the nearest run size that can actually be requested - * during normal large allocation. Add large_pad so that cache index - * randomization can offset the allocation from the page boundary. - */ - qsize = index2size(size2index(size - large_pad + 1) - 1) + large_pad; - if (qsize <= SMALL_MAXCLASS + large_pad) - return (run_quantize_floor_compute(size - large_pad)); - assert(qsize <= size); - return (qsize); -} - -static size_t -run_quantize_ceil_compute_hard(size_t size) -{ - size_t large_run_size_next; - - assert(size != 0); - assert(size == PAGE_CEILING(size)); - - /* - * Return the next quantized size greater than the input size. - * Quantized sizes comprise the union of run sizes that back small - * region runs, and run sizes that back large regions with no explicit - * alignment constraints. - */ - - if (size > SMALL_MAXCLASS) { - large_run_size_next = PAGE_CEILING(index2size(size2index(size - - large_pad) + 1) + large_pad); - } else - large_run_size_next = SIZE_T_MAX; - if (size >= small_maxrun) - return (large_run_size_next); - - while (true) { - size += PAGE; - assert(size <= small_maxrun); - if (small_run_tab[size >> LG_PAGE]) { - if (large_run_size_next < size) - return (large_run_size_next); - return (size); - } - } -} - -static size_t -run_quantize_ceil_compute(size_t size) -{ - size_t qsize = run_quantize_floor_compute(size); - - if (qsize < size) { - /* - * Skip a quantization that may have an adequately large run, - * because under-sized runs may be mixed in. This only happens - * when an unusual size is requested, i.e. for aligned - * allocation, and is just one of several places where linear - * search would potentially find sufficiently aligned available - * memory somewhere lower. - */ - qsize = run_quantize_ceil_compute_hard(qsize); - } - return (qsize); -} - #ifdef JEMALLOC_JET #undef run_quantize_floor #define run_quantize_floor JEMALLOC_N(n_run_quantize_floor) @@ -162,13 +80,27 @@ static size_t run_quantize_floor(size_t size) { size_t ret; + pszind_t pind; assert(size > 0); - assert(size <= run_quantize_max); + assert(size <= HUGE_MAXCLASS); assert((size & PAGE_MASK) == 0); - ret = run_quantize_floor_tab[(size >> LG_PAGE) - 1]; - assert(ret == run_quantize_floor_compute(size)); + assert(size != 0); + assert(size == PAGE_CEILING(size)); + + pind = psz2ind(size - large_pad + 1); + if (pind == 0) { + /* + * Avoid underflow. This short-circuit would also do the right + * thing for all sizes in the range for which there are + * PAGE-spaced size classes, but it's simplest to just handle + * the one case that would cause erroneous results. + */ + return (size); + } + ret = pind2sz(pind - 1) + large_pad; + assert(ret <= size); return (ret); } #ifdef JEMALLOC_JET @@ -187,11 +119,21 @@ run_quantize_ceil(size_t size) size_t ret; assert(size > 0); - assert(size <= run_quantize_max); + assert(size <= HUGE_MAXCLASS); assert((size & PAGE_MASK) == 0); - ret = run_quantize_ceil_tab[(size >> LG_PAGE) - 1]; - assert(ret == run_quantize_ceil_compute(size)); + ret = run_quantize_floor(size); + if (ret < size) { + /* + * Skip a quantization that may have an adequately large run, + * because under-sized runs may be mixed in. This only happens + * when an unusual size is requested, i.e. for aligned + * allocation, and is just one of several places where linear + * search would potentially find sufficiently aligned available + * memory somewhere lower. + */ + ret = pind2sz(psz2ind(ret - large_pad + 1)) + large_pad; + } return (ret); } #ifdef JEMALLOC_JET @@ -200,25 +142,15 @@ run_quantize_ceil(size_t size) run_quantize_t *run_quantize_ceil = JEMALLOC_N(n_run_quantize_ceil); #endif -static arena_run_heap_t * -arena_runs_avail_get(arena_t *arena, szind_t ind) -{ - - assert(ind >= runs_avail_bias); - assert(ind - runs_avail_bias < runs_avail_nclasses); - - return (&arena->runs_avail[ind - runs_avail_bias]); -} - static void arena_avail_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind, size_t npages) { - szind_t ind = size2index(run_quantize_floor(arena_miscelm_size_get( + pszind_t pind = psz2ind(run_quantize_floor(arena_miscelm_size_get( arena_miscelm_get_const(chunk, pageind)))); assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >> LG_PAGE)); - arena_run_heap_insert(arena_runs_avail_get(arena, ind), + arena_run_heap_insert(&arena->runs_avail[pind], arena_miscelm_get_mutable(chunk, pageind)); } @@ -226,11 +158,11 @@ static void arena_avail_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind, size_t npages) { - szind_t ind = size2index(run_quantize_floor(arena_miscelm_size_get( + pszind_t pind = psz2ind(run_quantize_floor(arena_miscelm_size_get( arena_miscelm_get_const(chunk, pageind)))); assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >> LG_PAGE)); - arena_run_heap_remove(arena_runs_avail_get(arena, ind), + arena_run_heap_remove(&arena->runs_avail[pind], arena_miscelm_get_mutable(chunk, pageind)); } @@ -649,14 +581,13 @@ arena_chunk_alloc_internal(tsdn_t *tsdn, arena_t *arena, bool *zero, chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER; chunk = chunk_alloc_cache(tsdn, arena, &chunk_hooks, NULL, chunksize, - chunksize, zero, true); + chunksize, zero, commit, true); if (chunk != NULL) { if (arena_chunk_register(tsdn, arena, chunk, *zero)) { chunk_dalloc_cache(tsdn, arena, &chunk_hooks, chunk, chunksize, true); return (NULL); } - *commit = true; } if (chunk == NULL) { chunk = arena_chunk_alloc_internal_hard(tsdn, arena, @@ -953,6 +884,7 @@ arena_chunk_alloc_huge(tsdn_t *tsdn, arena_t *arena, size_t usize, void *ret; chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER; size_t csize = CHUNK_CEILING(usize); + bool commit = true; malloc_mutex_lock(tsdn, &arena->lock); @@ -964,7 +896,7 @@ arena_chunk_alloc_huge(tsdn_t *tsdn, arena_t *arena, size_t usize, arena_nactive_add(arena, usize >> LG_PAGE); ret = chunk_alloc_cache(tsdn, arena, &chunk_hooks, NULL, csize, - alignment, zero, true); + alignment, zero, &commit, true); malloc_mutex_unlock(tsdn, &arena->lock); if (ret == NULL) { ret = arena_chunk_alloc_huge_hard(tsdn, arena, &chunk_hooks, @@ -1074,6 +1006,7 @@ arena_chunk_ralloc_huge_expand(tsdn_t *tsdn, arena_t *arena, void *chunk, void *nchunk = (void *)((uintptr_t)chunk + CHUNK_CEILING(oldsize)); size_t udiff = usize - oldsize; size_t cdiff = CHUNK_CEILING(usize) - CHUNK_CEILING(oldsize); + bool commit = true; malloc_mutex_lock(tsdn, &arena->lock); @@ -1085,7 +1018,7 @@ arena_chunk_ralloc_huge_expand(tsdn_t *tsdn, arena_t *arena, void *chunk, arena_nactive_add(arena, udiff >> LG_PAGE); err = (chunk_alloc_cache(tsdn, arena, &chunk_hooks, nchunk, cdiff, - chunksize, zero, true) == NULL); + chunksize, zero, &commit, true) == NULL); malloc_mutex_unlock(tsdn, &arena->lock); if (err) { err = arena_chunk_ralloc_huge_expand_hard(tsdn, arena, @@ -1109,12 +1042,13 @@ arena_chunk_ralloc_huge_expand(tsdn_t *tsdn, arena_t *arena, void *chunk, static arena_run_t * arena_run_first_best_fit(arena_t *arena, size_t size) { - szind_t ind, i; + pszind_t pind, i; - ind = size2index(run_quantize_ceil(size)); - for (i = ind; i < runs_avail_nclasses + runs_avail_bias; i++) { + pind = psz2ind(run_quantize_ceil(size)); + + for (i = pind; pind2sz(i) <= large_maxclass; i++) { arena_chunk_map_misc_t *miscelm = arena_run_heap_first( - arena_runs_avail_get(arena, i)); + &arena->runs_avail[i]); if (miscelm != NULL) return (&miscelm->run); } @@ -1125,7 +1059,7 @@ arena_run_first_best_fit(arena_t *arena, size_t size) static arena_run_t * arena_run_alloc_large_helper(arena_t *arena, size_t size, bool zero) { - arena_run_t *run = arena_run_first_best_fit(arena, s2u(size)); + arena_run_t *run = arena_run_first_best_fit(arena, size); if (run != NULL) { if (arena_run_split_large(arena, run, size, zero)) run = NULL; @@ -1256,14 +1190,14 @@ arena_decay_deadline_init(arena_t *arena) * Generate a new deadline that is uniformly random within the next * epoch after the current one. */ - nstime_copy(&arena->decay_deadline, &arena->decay_epoch); - nstime_add(&arena->decay_deadline, &arena->decay_interval); - if (arena->decay_time > 0) { + nstime_copy(&arena->decay.deadline, &arena->decay.epoch); + nstime_add(&arena->decay.deadline, &arena->decay.interval); + if (arena->decay.time > 0) { nstime_t jitter; - nstime_init(&jitter, prng_range(&arena->decay_jitter_state, - nstime_ns(&arena->decay_interval))); - nstime_add(&arena->decay_deadline, &jitter); + nstime_init(&jitter, prng_range(&arena->decay.jitter_state, + nstime_ns(&arena->decay.interval))); + nstime_add(&arena->decay.deadline, &jitter); } } @@ -1273,7 +1207,7 @@ arena_decay_deadline_reached(const arena_t *arena, const nstime_t *time) assert(opt_purge == purge_mode_decay); - return (nstime_compare(&arena->decay_deadline, time) <= 0); + return (nstime_compare(&arena->decay.deadline, time) <= 0); } static size_t @@ -1298,92 +1232,103 @@ arena_decay_backlog_npages_limit(const arena_t *arena) */ sum = 0; for (i = 0; i < SMOOTHSTEP_NSTEPS; i++) - sum += arena->decay_backlog[i] * h_steps[i]; + sum += arena->decay.backlog[i] * h_steps[i]; npages_limit_backlog = (size_t)(sum >> SMOOTHSTEP_BFP); return (npages_limit_backlog); } static void -arena_decay_epoch_advance(arena_t *arena, const nstime_t *time) +arena_decay_backlog_update_last(arena_t *arena) { - uint64_t nadvance_u64; - nstime_t delta; - size_t ndirty_delta; + size_t ndirty_delta = (arena->ndirty > arena->decay.ndirty) ? + arena->ndirty - arena->decay.ndirty : 0; + arena->decay.backlog[SMOOTHSTEP_NSTEPS-1] = ndirty_delta; +} - assert(opt_purge == purge_mode_decay); - assert(arena_decay_deadline_reached(arena, time)); +static void +arena_decay_backlog_update(arena_t *arena, uint64_t nadvance_u64) +{ - nstime_copy(&delta, time); - nstime_subtract(&delta, &arena->decay_epoch); - nadvance_u64 = nstime_divide(&delta, &arena->decay_interval); - assert(nadvance_u64 > 0); - - /* Add nadvance_u64 decay intervals to epoch. */ - nstime_copy(&delta, &arena->decay_interval); - nstime_imultiply(&delta, nadvance_u64); - nstime_add(&arena->decay_epoch, &delta); - - /* Set a new deadline. */ - arena_decay_deadline_init(arena); - - /* Update the backlog. */ if (nadvance_u64 >= SMOOTHSTEP_NSTEPS) { - memset(arena->decay_backlog, 0, (SMOOTHSTEP_NSTEPS-1) * + memset(arena->decay.backlog, 0, (SMOOTHSTEP_NSTEPS-1) * sizeof(size_t)); } else { size_t nadvance_z = (size_t)nadvance_u64; assert((uint64_t)nadvance_z == nadvance_u64); - memmove(arena->decay_backlog, &arena->decay_backlog[nadvance_z], + memmove(arena->decay.backlog, &arena->decay.backlog[nadvance_z], (SMOOTHSTEP_NSTEPS - nadvance_z) * sizeof(size_t)); if (nadvance_z > 1) { - memset(&arena->decay_backlog[SMOOTHSTEP_NSTEPS - + memset(&arena->decay.backlog[SMOOTHSTEP_NSTEPS - nadvance_z], 0, (nadvance_z-1) * sizeof(size_t)); } } - ndirty_delta = (arena->ndirty > arena->decay_ndirty) ? arena->ndirty - - arena->decay_ndirty : 0; - arena->decay_ndirty = arena->ndirty; - arena->decay_backlog[SMOOTHSTEP_NSTEPS-1] = ndirty_delta; - arena->decay_backlog_npages_limit = - arena_decay_backlog_npages_limit(arena); + + arena_decay_backlog_update_last(arena); } -static size_t -arena_decay_npages_limit(arena_t *arena) +static void +arena_decay_epoch_advance_helper(arena_t *arena, const nstime_t *time) { - size_t npages_limit; + uint64_t nadvance_u64; + nstime_t delta; assert(opt_purge == purge_mode_decay); + assert(arena_decay_deadline_reached(arena, time)); - npages_limit = arena->decay_backlog_npages_limit; + nstime_copy(&delta, time); + nstime_subtract(&delta, &arena->decay.epoch); + nadvance_u64 = nstime_divide(&delta, &arena->decay.interval); + assert(nadvance_u64 > 0); - /* Add in any dirty pages created during the current epoch. */ - if (arena->ndirty > arena->decay_ndirty) - npages_limit += arena->ndirty - arena->decay_ndirty; + /* Add nadvance_u64 decay intervals to epoch. */ + nstime_copy(&delta, &arena->decay.interval); + nstime_imultiply(&delta, nadvance_u64); + nstime_add(&arena->decay.epoch, &delta); - return (npages_limit); + /* Set a new deadline. */ + arena_decay_deadline_init(arena); + + /* Update the backlog. */ + arena_decay_backlog_update(arena, nadvance_u64); +} + +static void +arena_decay_epoch_advance_purge(tsdn_t *tsdn, arena_t *arena) +{ + size_t ndirty_limit = arena_decay_backlog_npages_limit(arena); + + if (arena->ndirty > ndirty_limit) + arena_purge_to_limit(tsdn, arena, ndirty_limit); + arena->decay.ndirty = arena->ndirty; +} + +static void +arena_decay_epoch_advance(tsdn_t *tsdn, arena_t *arena, const nstime_t *time) +{ + + arena_decay_epoch_advance_helper(arena, time); + arena_decay_epoch_advance_purge(tsdn, arena); } static void arena_decay_init(arena_t *arena, ssize_t decay_time) { - arena->decay_time = decay_time; + arena->decay.time = decay_time; if (decay_time > 0) { - nstime_init2(&arena->decay_interval, decay_time, 0); - nstime_idivide(&arena->decay_interval, SMOOTHSTEP_NSTEPS); + nstime_init2(&arena->decay.interval, decay_time, 0); + nstime_idivide(&arena->decay.interval, SMOOTHSTEP_NSTEPS); } - nstime_init(&arena->decay_epoch, 0); - nstime_update(&arena->decay_epoch); - arena->decay_jitter_state = (uint64_t)(uintptr_t)arena; + nstime_init(&arena->decay.epoch, 0); + nstime_update(&arena->decay.epoch); + arena->decay.jitter_state = (uint64_t)(uintptr_t)arena; arena_decay_deadline_init(arena); - arena->decay_ndirty = arena->ndirty; - arena->decay_backlog_npages_limit = 0; - memset(arena->decay_backlog, 0, SMOOTHSTEP_NSTEPS * sizeof(size_t)); + arena->decay.ndirty = arena->ndirty; + memset(arena->decay.backlog, 0, SMOOTHSTEP_NSTEPS * sizeof(size_t)); } static bool @@ -1403,7 +1348,7 @@ arena_decay_time_get(tsdn_t *tsdn, arena_t *arena) ssize_t decay_time; malloc_mutex_lock(tsdn, &arena->lock); - decay_time = arena->decay_time; + decay_time = arena->decay.time; malloc_mutex_unlock(tsdn, &arena->lock); return (decay_time); @@ -1464,35 +1409,44 @@ static void arena_maybe_purge_decay(tsdn_t *tsdn, arena_t *arena) { nstime_t time; - size_t ndirty_limit; assert(opt_purge == purge_mode_decay); /* Purge all or nothing if the option is disabled. */ - if (arena->decay_time <= 0) { - if (arena->decay_time == 0) + if (arena->decay.time <= 0) { + if (arena->decay.time == 0) arena_purge_to_limit(tsdn, arena, 0); return; } - nstime_copy(&time, &arena->decay_epoch); - if (unlikely(nstime_update(&time))) { - /* Time went backwards. Force an epoch advance. */ - nstime_copy(&time, &arena->decay_deadline); + nstime_init(&time, 0); + nstime_update(&time); + if (unlikely(!nstime_monotonic() && nstime_compare(&arena->decay.epoch, + &time) > 0)) { + /* + * Time went backwards. Move the epoch back in time and + * generate a new deadline, with the expectation that time + * typically flows forward for long enough periods of time that + * epochs complete. Unfortunately, this strategy is susceptible + * to clock jitter triggering premature epoch advances, but + * clock jitter estimation and compensation isn't feasible here + * because calls into this code are event-driven. + */ + nstime_copy(&arena->decay.epoch, &time); + arena_decay_deadline_init(arena); + } else { + /* Verify that time does not go backwards. */ + assert(nstime_compare(&arena->decay.epoch, &time) <= 0); } - if (arena_decay_deadline_reached(arena, &time)) - arena_decay_epoch_advance(arena, &time); - - ndirty_limit = arena_decay_npages_limit(arena); - /* - * Don't try to purge unless the number of purgeable pages exceeds the - * current limit. + * If the deadline has been reached, advance to the current epoch and + * purge to the new limit if necessary. Note that dirty pages created + * during the current epoch are not subject to purge until a future + * epoch, so as a result purging only happens during epoch advances. */ - if (arena->ndirty <= ndirty_limit) - return; - arena_purge_to_limit(tsdn, arena, ndirty_limit); + if (arena_decay_deadline_reached(arena, &time)) + arena_decay_epoch_advance(tsdn, arena, &time); } void @@ -1561,7 +1515,7 @@ arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, if (rdelm == &chunkselm->rd) { extent_node_t *chunkselm_next; - bool zero; + bool zero, commit; UNUSED void *chunk; npages = extent_node_size_get(chunkselm) >> LG_PAGE; @@ -1575,10 +1529,11 @@ arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, * dalloc_node=false argument to chunk_alloc_cache(). */ zero = false; + commit = false; chunk = chunk_alloc_cache(tsdn, arena, chunk_hooks, extent_node_addr_get(chunkselm), extent_node_size_get(chunkselm), chunksize, &zero, - false); + &commit, false); assert(chunk == extent_node_addr_get(chunkselm)); assert(zero == extent_node_zeroed_get(chunkselm)); extent_node_dirty_insert(chunkselm, purge_runs_sentinel, @@ -1967,7 +1922,8 @@ arena_reset(tsd_t *tsd, arena_t *arena) assert(!arena->purging); arena->nactive = 0; - for(i = 0; i < runs_avail_nclasses; i++) + for (i = 0; i < sizeof(arena->runs_avail) / sizeof(arena_run_heap_t); + i++) arena_run_heap_new(&arena->runs_avail[i]); malloc_mutex_unlock(tsd_tsdn(tsd), &arena->lock); @@ -3391,7 +3347,7 @@ arena_basic_stats_merge_locked(arena_t *arena, unsigned *nthreads, *nthreads += arena_nthreads_get(arena, false); *dss = dss_prec_names[arena->dss_prec]; *lg_dirty_mult = arena->lg_dirty_mult; - *decay_time = arena->decay_time; + *decay_time = arena->decay.time; *nactive += arena->nactive; *ndirty += arena->ndirty; } @@ -3496,23 +3452,19 @@ arena_t * arena_new(tsdn_t *tsdn, unsigned ind) { arena_t *arena; - size_t arena_size; unsigned i; - /* Compute arena size to incorporate sufficient runs_avail elements. */ - arena_size = offsetof(arena_t, runs_avail) + (sizeof(arena_run_heap_t) * - runs_avail_nclasses); /* * Allocate arena, arena->lstats, and arena->hstats contiguously, mainly * because there is no way to clean up if base_alloc() OOMs. */ if (config_stats) { arena = (arena_t *)base_alloc(tsdn, - CACHELINE_CEILING(arena_size) + QUANTUM_CEILING(nlclasses * - sizeof(malloc_large_stats_t) + nhclasses) * - sizeof(malloc_huge_stats_t)); + CACHELINE_CEILING(sizeof(arena_t)) + + QUANTUM_CEILING((nlclasses * sizeof(malloc_large_stats_t)) + + (nhclasses * sizeof(malloc_huge_stats_t)))); } else - arena = (arena_t *)base_alloc(tsdn, arena_size); + arena = (arena_t *)base_alloc(tsdn, sizeof(arena_t)); if (arena == NULL) return (NULL); @@ -3524,11 +3476,11 @@ arena_new(tsdn_t *tsdn, unsigned ind) if (config_stats) { memset(&arena->stats, 0, sizeof(arena_stats_t)); arena->stats.lstats = (malloc_large_stats_t *)((uintptr_t)arena - + CACHELINE_CEILING(arena_size)); + + CACHELINE_CEILING(sizeof(arena_t))); memset(arena->stats.lstats, 0, nlclasses * sizeof(malloc_large_stats_t)); arena->stats.hstats = (malloc_huge_stats_t *)((uintptr_t)arena - + CACHELINE_CEILING(arena_size) + + + CACHELINE_CEILING(sizeof(arena_t)) + QUANTUM_CEILING(nlclasses * sizeof(malloc_large_stats_t))); memset(arena->stats.hstats, 0, nhclasses * sizeof(malloc_huge_stats_t)); @@ -3551,7 +3503,7 @@ arena_new(tsdn_t *tsdn, unsigned ind) (uint64_t)(uintptr_t)arena; } - arena->dss_prec = chunk_dss_prec_get(tsdn); + arena->dss_prec = chunk_dss_prec_get(); ql_new(&arena->achunks); @@ -3562,8 +3514,10 @@ arena_new(tsdn_t *tsdn, unsigned ind) arena->nactive = 0; arena->ndirty = 0; - for(i = 0; i < runs_avail_nclasses; i++) + for (i = 0; i < sizeof(arena->runs_avail) / sizeof(arena_run_heap_t); + i++) arena_run_heap_new(&arena->runs_avail[i]); + qr_new(&arena->runs_dirty, rd_link); qr_new(&arena->chunks_cache, cc_link); @@ -3693,9 +3647,6 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info) bin_info->reg0_offset = (uint32_t)(actual_run_size - (actual_nregs * bin_info->reg_interval) - pad_size + bin_info->redzone_size); - if (actual_run_size > small_maxrun) - small_maxrun = actual_run_size; - assert(bin_info->reg0_offset - bin_info->redzone_size + (bin_info->nregs * bin_info->reg_interval) + pad_size == bin_info->run_size); } @@ -3711,7 +3662,7 @@ bin_info_init(void) bin_info_run_size_calc(bin_info); \ bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs); #define BIN_INFO_INIT_bin_no(index, size) -#define SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup) \ +#define SC(index, lg_grp, lg_delta, ndelta, psz, bin, lg_delta_lookup) \ BIN_INFO_INIT_bin_##bin(index, (ZU(1)<> - LG_PAGE)); - if (small_run_tab == NULL) - return (true); - -#define TAB_INIT_bin_yes(index, size) { \ - arena_bin_info_t *bin_info = &arena_bin_info[index]; \ - small_run_tab[bin_info->run_size >> LG_PAGE] = true; \ - } -#define TAB_INIT_bin_no(index, size) -#define SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup) \ - TAB_INIT_bin_##bin(index, (ZU(1)<> LG_PAGE)); - if (run_quantize_floor_tab == NULL) - return (true); - - run_quantize_ceil_tab = (size_t *)base_alloc(NULL, sizeof(size_t) * - (run_quantize_max >> LG_PAGE)); - if (run_quantize_ceil_tab == NULL) - return (true); - - for (i = 1; i <= run_quantize_max >> LG_PAGE; i++) { - size_t run_size = i << LG_PAGE; - - run_quantize_floor_tab[i-1] = - run_quantize_floor_compute(run_size); - run_quantize_ceil_tab[i-1] = - run_quantize_ceil_compute(run_size); - } - - return (false); -} - -bool +void arena_boot(void) { unsigned i; @@ -3822,15 +3718,6 @@ arena_boot(void) nhclasses = NSIZES - nlclasses - NBINS; bin_info_init(); - if (small_run_size_init()) - return (true); - if (run_quantize_init()) - return (true); - - runs_avail_bias = size2index(PAGE); - runs_avail_nclasses = size2index(run_quantize_max)+1 - runs_avail_bias; - - return (false); } void diff --git a/src/chunk.c b/src/chunk.c index f292c980..07e26f77 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -316,10 +316,11 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, size_t i; size_t *p = (size_t *)(uintptr_t)ret; - JEMALLOC_VALGRIND_MAKE_MEM_DEFINED(ret, size); for (i = 0; i < size / sizeof(size_t); i++) assert(p[i] == 0); } + if (config_valgrind) + JEMALLOC_VALGRIND_MAKE_MEM_DEFINED(ret, size); } return (ret); } @@ -384,23 +385,21 @@ chunk_alloc_base(size_t size) void * chunk_alloc_cache(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, - void *new_addr, size_t size, size_t alignment, bool *zero, bool dalloc_node) + void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit, + bool dalloc_node) { void *ret; - bool commit; assert(size != 0); assert((size & chunksize_mask) == 0); assert(alignment != 0); assert((alignment & chunksize_mask) == 0); - commit = true; ret = chunk_recycle(tsdn, arena, chunk_hooks, &arena->chunks_szad_cached, &arena->chunks_ad_cached, true, - new_addr, size, alignment, zero, &commit, dalloc_node); + new_addr, size, alignment, zero, commit, dalloc_node); if (ret == NULL) return (NULL); - assert(commit); if (config_valgrind) JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, size); return (ret); @@ -610,10 +609,10 @@ chunk_dalloc_cache(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, } static bool -chunk_dalloc_default_impl(tsdn_t *tsdn, void *chunk, size_t size) +chunk_dalloc_default_impl(void *chunk, size_t size) { - if (!have_dss || !chunk_in_dss(tsdn, chunk)) + if (!have_dss || !chunk_in_dss(chunk)) return (chunk_dalloc_mmap(chunk, size)); return (true); } @@ -622,11 +621,8 @@ static bool chunk_dalloc_default(void *chunk, size_t size, bool committed, unsigned arena_ind) { - tsdn_t *tsdn; - tsdn = tsdn_fetch(); - - return (chunk_dalloc_default_impl(tsdn, chunk, size)); + return (chunk_dalloc_default_impl(chunk, size)); } void @@ -644,7 +640,7 @@ chunk_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, /* Try to deallocate. */ if (chunk_hooks->dalloc == chunk_dalloc_default) { /* Call directly to propagate tsdn. */ - err = chunk_dalloc_default_impl(tsdn, chunk, size); + err = chunk_dalloc_default_impl(chunk, size); } else err = chunk_hooks->dalloc(chunk, size, committed, arena->ind); @@ -717,13 +713,12 @@ chunk_split_default(void *chunk, size_t size, size_t size_a, size_t size_b, } static bool -chunk_merge_default_impl(tsdn_t *tsdn, void *chunk_a, void *chunk_b) +chunk_merge_default_impl(void *chunk_a, void *chunk_b) { if (!maps_coalesce) return (true); - if (have_dss && chunk_in_dss(tsdn, chunk_a) != chunk_in_dss(tsdn, - chunk_b)) + if (have_dss && !chunk_dss_mergeable(chunk_a, chunk_b)) return (true); return (false); @@ -733,11 +728,8 @@ static bool chunk_merge_default(void *chunk_a, size_t size_a, void *chunk_b, size_t size_b, bool committed, unsigned arena_ind) { - tsdn_t *tsdn; - tsdn = tsdn_fetch(); - - return (chunk_merge_default_impl(tsdn, chunk_a, chunk_b)); + return (chunk_merge_default_impl(chunk_a, chunk_b)); } static rtree_node_elm_t * @@ -781,32 +773,11 @@ chunk_boot(void) chunksize_mask = chunksize - 1; chunk_npages = (chunksize >> LG_PAGE); - if (have_dss && chunk_dss_boot()) - return (true); + if (have_dss) + chunk_dss_boot(); if (rtree_new(&chunks_rtree, (unsigned)((ZU(1) << (LG_SIZEOF_PTR+3)) - opt_lg_chunk), chunks_rtree_node_alloc, NULL)) return (true); return (false); } - -void -chunk_prefork(tsdn_t *tsdn) -{ - - chunk_dss_prefork(tsdn); -} - -void -chunk_postfork_parent(tsdn_t *tsdn) -{ - - chunk_dss_postfork_parent(tsdn); -} - -void -chunk_postfork_child(tsdn_t *tsdn) -{ - - chunk_dss_postfork_child(tsdn); -} diff --git a/src/chunk_dss.c b/src/chunk_dss.c index 0b1f82bd..85a13548 100644 --- a/src/chunk_dss.c +++ b/src/chunk_dss.c @@ -10,20 +10,19 @@ const char *dss_prec_names[] = { "N/A" }; -/* Current dss precedence default, used when creating new arenas. */ -static dss_prec_t dss_prec_default = DSS_PREC_DEFAULT; - /* - * Protects sbrk() calls. This avoids malloc races among threads, though it - * does not protect against races with threads that call sbrk() directly. + * Current dss precedence default, used when creating new arenas. NB: This is + * stored as unsigned rather than dss_prec_t because in principle there's no + * guarantee that sizeof(dss_prec_t) is the same as sizeof(unsigned), and we use + * atomic operations to synchronize the setting. */ -static malloc_mutex_t dss_mtx; +static unsigned dss_prec_default = (unsigned)DSS_PREC_DEFAULT; /* Base address of the DSS. */ static void *dss_base; -/* Current end of the DSS, or ((void *)-1) if the DSS is exhausted. */ -static void *dss_prev; -/* Current upper limit on DSS addresses. */ +/* Atomic boolean indicating whether the DSS is exhausted. */ +static unsigned dss_exhausted; +/* Atomic current upper limit on DSS addresses. */ static void *dss_max; /******************************************************************************/ @@ -41,30 +40,59 @@ chunk_dss_sbrk(intptr_t increment) } dss_prec_t -chunk_dss_prec_get(tsdn_t *tsdn) +chunk_dss_prec_get(void) { dss_prec_t ret; if (!have_dss) return (dss_prec_disabled); - malloc_mutex_lock(tsdn, &dss_mtx); - ret = dss_prec_default; - malloc_mutex_unlock(tsdn, &dss_mtx); + ret = (dss_prec_t)atomic_read_u(&dss_prec_default); return (ret); } bool -chunk_dss_prec_set(tsdn_t *tsdn, dss_prec_t dss_prec) +chunk_dss_prec_set(dss_prec_t dss_prec) { if (!have_dss) return (dss_prec != dss_prec_disabled); - malloc_mutex_lock(tsdn, &dss_mtx); - dss_prec_default = dss_prec; - malloc_mutex_unlock(tsdn, &dss_mtx); + atomic_write_u(&dss_prec_default, (unsigned)dss_prec); return (false); } +static void * +chunk_dss_max_update(void *new_addr) +{ + void *max_cur; + spin_t spinner; + + /* + * Get the current end of the DSS as max_cur and assure that dss_max is + * up to date. + */ + spin_init(&spinner); + while (true) { + void *max_prev = atomic_read_p(&dss_max); + + max_cur = chunk_dss_sbrk(0); + if ((uintptr_t)max_prev > (uintptr_t)max_cur) { + /* + * Another thread optimistically updated dss_max. Wait + * for it to finish. + */ + spin_adaptive(&spinner); + continue; + } + if (!atomic_cas_p(&dss_max, max_prev, max_cur)) + break; + } + /* Fixed new_addr can only be supported if it is at the edge of DSS. */ + if (new_addr != NULL && max_cur != new_addr) + return (NULL); + + return (max_cur); +} + void * chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit) @@ -80,28 +108,20 @@ chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size, if ((intptr_t)size < 0) return (NULL); - malloc_mutex_lock(tsdn, &dss_mtx); - if (dss_prev != (void *)-1) { - + if (!atomic_read_u(&dss_exhausted)) { /* * The loop is necessary to recover from races with other * threads that are using the DSS for something other than * malloc. */ - do { - void *ret, *cpad, *dss_next; + while (true) { + void *ret, *cpad, *max_cur, *dss_next, *dss_prev; size_t gap_size, cpad_size; intptr_t incr; - /* Avoid an unnecessary system call. */ - if (new_addr != NULL && dss_max != new_addr) - break; - /* Get the current end of the DSS. */ - dss_max = chunk_dss_sbrk(0); - - /* Make sure the earlier condition still holds. */ - if (new_addr != NULL && dss_max != new_addr) - break; + max_cur = chunk_dss_max_update(new_addr); + if (max_cur == NULL) + goto label_oom; /* * Calculate how much padding is necessary to @@ -120,17 +140,23 @@ chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size, cpad_size = (uintptr_t)ret - (uintptr_t)cpad; dss_next = (void *)((uintptr_t)ret + size); if ((uintptr_t)ret < (uintptr_t)dss_max || - (uintptr_t)dss_next < (uintptr_t)dss_max) { - /* Wrap-around. */ - malloc_mutex_unlock(tsdn, &dss_mtx); - return (NULL); - } + (uintptr_t)dss_next < (uintptr_t)dss_max) + goto label_oom; /* Wrap-around. */ incr = gap_size + cpad_size + size; + + /* + * Optimistically update dss_max, and roll back below if + * sbrk() fails. No other thread will try to extend the + * DSS while dss_max is greater than the current DSS + * max reported by sbrk(0). + */ + if (atomic_cas_p(&dss_max, max_cur, dss_next)) + continue; + + /* Try to allocate. */ dss_prev = chunk_dss_sbrk(incr); - if (dss_prev == dss_max) { + if (dss_prev == max_cur) { /* Success. */ - dss_max = dss_next; - malloc_mutex_unlock(tsdn, &dss_mtx); if (cpad_size != 0) { chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER; @@ -147,68 +173,65 @@ chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size, *commit = pages_decommit(ret, size); return (ret); } - } while (dss_prev != (void *)-1); - } - malloc_mutex_unlock(tsdn, &dss_mtx); + /* + * Failure, whether due to OOM or a race with a raw + * sbrk() call from outside the allocator. Try to roll + * back optimistic dss_max update; if rollback fails, + * it's due to another caller of this function having + * succeeded since this invocation started, in which + * case rollback is not necessary. + */ + atomic_cas_p(&dss_max, dss_next, max_cur); + if (dss_prev == (void *)-1) { + /* OOM. */ + atomic_write_u(&dss_exhausted, (unsigned)true); + goto label_oom; + } + } + } +label_oom: return (NULL); } -bool -chunk_in_dss(tsdn_t *tsdn, void *chunk) +static bool +chunk_in_dss_helper(void *chunk, void *max) { - bool ret; - cassert(have_dss); - - malloc_mutex_lock(tsdn, &dss_mtx); - if ((uintptr_t)chunk >= (uintptr_t)dss_base - && (uintptr_t)chunk < (uintptr_t)dss_max) - ret = true; - else - ret = false; - malloc_mutex_unlock(tsdn, &dss_mtx); - - return (ret); + return ((uintptr_t)chunk >= (uintptr_t)dss_base && (uintptr_t)chunk < + (uintptr_t)max); } bool +chunk_in_dss(void *chunk) +{ + + cassert(have_dss); + + return (chunk_in_dss_helper(chunk, atomic_read_p(&dss_max))); +} + +bool +chunk_dss_mergeable(void *chunk_a, void *chunk_b) +{ + void *max; + + cassert(have_dss); + + max = atomic_read_p(&dss_max); + return (chunk_in_dss_helper(chunk_a, max) == + chunk_in_dss_helper(chunk_b, max)); +} + +void chunk_dss_boot(void) { cassert(have_dss); - if (malloc_mutex_init(&dss_mtx, "dss", WITNESS_RANK_DSS)) - return (true); dss_base = chunk_dss_sbrk(0); - dss_prev = dss_base; + dss_exhausted = (unsigned)(dss_base == (void *)-1); dss_max = dss_base; - - return (false); -} - -void -chunk_dss_prefork(tsdn_t *tsdn) -{ - - if (have_dss) - malloc_mutex_prefork(tsdn, &dss_mtx); -} - -void -chunk_dss_postfork_parent(tsdn_t *tsdn) -{ - - if (have_dss) - malloc_mutex_postfork_parent(tsdn, &dss_mtx); -} - -void -chunk_dss_postfork_child(tsdn_t *tsdn) -{ - - if (have_dss) - malloc_mutex_postfork_child(tsdn, &dss_mtx); } /******************************************************************************/ diff --git a/src/ckh.c b/src/ckh.c index 747c1c86..3be671c3 100644 --- a/src/ckh.c +++ b/src/ckh.c @@ -40,8 +40,8 @@ /******************************************************************************/ /* Function prototypes for non-inline static functions. */ -static bool ckh_grow(tsdn_t *tsdn, ckh_t *ckh); -static void ckh_shrink(tsdn_t *tsdn, ckh_t *ckh); +static bool ckh_grow(tsd_t *tsd, ckh_t *ckh); +static void ckh_shrink(tsd_t *tsd, ckh_t *ckh); /******************************************************************************/ @@ -244,7 +244,7 @@ ckh_rebuild(ckh_t *ckh, ckhc_t *aTab) } static bool -ckh_grow(tsdn_t *tsdn, ckh_t *ckh) +ckh_grow(tsd_t *tsd, ckh_t *ckh) { bool ret; ckhc_t *tab, *ttab; @@ -270,8 +270,8 @@ ckh_grow(tsdn_t *tsdn, ckh_t *ckh) ret = true; goto label_return; } - tab = (ckhc_t *)ipallocztm(tsdn, usize, CACHELINE, true, NULL, - true, arena_ichoose(tsdn, NULL)); + tab = (ckhc_t *)ipallocztm(tsd_tsdn(tsd), usize, CACHELINE, + true, NULL, true, arena_ichoose(tsd, NULL)); if (tab == NULL) { ret = true; goto label_return; @@ -283,12 +283,12 @@ ckh_grow(tsdn_t *tsdn, ckh_t *ckh) ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS; if (!ckh_rebuild(ckh, tab)) { - idalloctm(tsdn, tab, NULL, true, true); + idalloctm(tsd_tsdn(tsd), tab, NULL, true, true); break; } /* Rebuilding failed, so back out partially rebuilt table. */ - idalloctm(tsdn, ckh->tab, NULL, true, true); + idalloctm(tsd_tsdn(tsd), ckh->tab, NULL, true, true); ckh->tab = tab; ckh->lg_curbuckets = lg_prevbuckets; } @@ -299,7 +299,7 @@ label_return: } static void -ckh_shrink(tsdn_t *tsdn, ckh_t *ckh) +ckh_shrink(tsd_t *tsd, ckh_t *ckh) { ckhc_t *tab, *ttab; size_t usize; @@ -314,8 +314,8 @@ ckh_shrink(tsdn_t *tsdn, ckh_t *ckh) usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE); if (unlikely(usize == 0 || usize > HUGE_MAXCLASS)) return; - tab = (ckhc_t *)ipallocztm(tsdn, usize, CACHELINE, true, NULL, true, - arena_ichoose(tsdn, NULL)); + tab = (ckhc_t *)ipallocztm(tsd_tsdn(tsd), usize, CACHELINE, true, NULL, + true, arena_ichoose(tsd, NULL)); if (tab == NULL) { /* * An OOM error isn't worth propagating, since it doesn't @@ -330,7 +330,7 @@ ckh_shrink(tsdn_t *tsdn, ckh_t *ckh) ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS; if (!ckh_rebuild(ckh, tab)) { - idalloctm(tsdn, tab, NULL, true, true); + idalloctm(tsd_tsdn(tsd), tab, NULL, true, true); #ifdef CKH_COUNT ckh->nshrinks++; #endif @@ -338,7 +338,7 @@ ckh_shrink(tsdn_t *tsdn, ckh_t *ckh) } /* Rebuilding failed, so back out partially rebuilt table. */ - idalloctm(tsdn, ckh->tab, NULL, true, true); + idalloctm(tsd_tsdn(tsd), ckh->tab, NULL, true, true); ckh->tab = tab; ckh->lg_curbuckets = lg_prevbuckets; #ifdef CKH_COUNT @@ -347,7 +347,7 @@ ckh_shrink(tsdn_t *tsdn, ckh_t *ckh) } bool -ckh_new(tsdn_t *tsdn, ckh_t *ckh, size_t minitems, ckh_hash_t *hash, +ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp) { bool ret; @@ -391,8 +391,8 @@ ckh_new(tsdn_t *tsdn, ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ret = true; goto label_return; } - ckh->tab = (ckhc_t *)ipallocztm(tsdn, usize, CACHELINE, true, NULL, - true, arena_ichoose(tsdn, NULL)); + ckh->tab = (ckhc_t *)ipallocztm(tsd_tsdn(tsd), usize, CACHELINE, true, + NULL, true, arena_ichoose(tsd, NULL)); if (ckh->tab == NULL) { ret = true; goto label_return; @@ -404,7 +404,7 @@ label_return: } void -ckh_delete(tsdn_t *tsdn, ckh_t *ckh) +ckh_delete(tsd_t *tsd, ckh_t *ckh) { assert(ckh != NULL); @@ -421,7 +421,7 @@ ckh_delete(tsdn_t *tsdn, ckh_t *ckh) (unsigned long long)ckh->nrelocs); #endif - idalloctm(tsdn, ckh->tab, NULL, true, true); + idalloctm(tsd_tsdn(tsd), ckh->tab, NULL, true, true); if (config_debug) memset(ckh, JEMALLOC_FREE_JUNK, sizeof(ckh_t)); } @@ -456,7 +456,7 @@ ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data) } bool -ckh_insert(tsdn_t *tsdn, ckh_t *ckh, const void *key, const void *data) +ckh_insert(tsd_t *tsd, ckh_t *ckh, const void *key, const void *data) { bool ret; @@ -468,7 +468,7 @@ ckh_insert(tsdn_t *tsdn, ckh_t *ckh, const void *key, const void *data) #endif while (ckh_try_insert(ckh, &key, &data)) { - if (ckh_grow(tsdn, ckh)) { + if (ckh_grow(tsd, ckh)) { ret = true; goto label_return; } @@ -480,7 +480,7 @@ label_return: } bool -ckh_remove(tsdn_t *tsdn, ckh_t *ckh, const void *searchkey, void **key, +ckh_remove(tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key, void **data) { size_t cell; @@ -502,7 +502,7 @@ ckh_remove(tsdn_t *tsdn, ckh_t *ckh, const void *searchkey, void **key, + LG_CKH_BUCKET_CELLS - 2)) && ckh->lg_curbuckets > ckh->lg_minbuckets) { /* Ignore error due to OOM. */ - ckh_shrink(tsdn, ckh); + ckh_shrink(tsd, ckh); } return (false); diff --git a/src/ctl.c b/src/ctl.c index dad80086..bc78b205 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -1478,7 +1478,7 @@ tcache_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx); READONLY(); - if (tcaches_create(tsd_tsdn(tsd), &tcache_ind)) { + if (tcaches_create(tsd, &tcache_ind)) { ret = EFAULT; goto label_return; } @@ -1685,11 +1685,11 @@ arena_i_dss_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, dss_prec_old = arena_dss_prec_get(tsd_tsdn(tsd), arena); } else { if (dss_prec != dss_prec_limit && - chunk_dss_prec_set(tsd_tsdn(tsd), dss_prec)) { + chunk_dss_prec_set(dss_prec)) { ret = EFAULT; goto label_return; } - dss_prec_old = chunk_dss_prec_get(tsd_tsdn(tsd)); + dss_prec_old = chunk_dss_prec_get(); } dss = dss_prec_names[dss_prec_old]; @@ -2100,7 +2100,7 @@ prof_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, if (lg_sample >= (sizeof(uint64_t) << 3)) lg_sample = (sizeof(uint64_t) << 3) - 1; - prof_reset(tsd_tsdn(tsd), lg_sample); + prof_reset(tsd, lg_sample); ret = 0; label_return: diff --git a/src/huge.c b/src/huge.c index 3a2877ca..62e6932b 100644 --- a/src/huge.c +++ b/src/huge.c @@ -54,6 +54,7 @@ huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, { void *ret; size_t ausize; + arena_t *iarena; extent_node_t *node; bool is_zeroed; @@ -67,8 +68,9 @@ huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, assert(ausize >= chunksize); /* Allocate an extent node with which to track the chunk. */ + iarena = (!tsdn_null(tsdn)) ? arena_ichoose(tsdn_tsd(tsdn), NULL) : a0get(); node = ipallocztm(tsdn, CACHELINE_CEILING(sizeof(extent_node_t)), - CACHELINE, false, NULL, true, arena_ichoose(tsdn, arena)); + CACHELINE, false, NULL, true, iarena); if (node == NULL) return (NULL); @@ -114,7 +116,7 @@ huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, #define huge_dalloc_junk JEMALLOC_N(huge_dalloc_junk_impl) #endif static void -huge_dalloc_junk(tsdn_t *tsdn, void *ptr, size_t usize) +huge_dalloc_junk(void *ptr, size_t usize) { if (config_fill && have_dss && unlikely(opt_junk_free)) { @@ -122,7 +124,7 @@ huge_dalloc_junk(tsdn_t *tsdn, void *ptr, size_t usize) * Only bother junk filling if the chunk isn't about to be * unmapped. */ - if (!config_munmap || (have_dss && chunk_in_dss(tsdn, ptr))) + if (!config_munmap || (have_dss && chunk_in_dss(ptr))) memset(ptr, JEMALLOC_FREE_JUNK, usize); } } @@ -221,7 +223,7 @@ huge_ralloc_no_move_shrink(tsdn_t *tsdn, void *ptr, size_t oldsize, if (oldsize > usize) { size_t sdiff = oldsize - usize; if (config_fill && unlikely(opt_junk_free)) { - huge_dalloc_junk(tsdn, (void *)((uintptr_t)ptr + usize), + huge_dalloc_junk((void *)((uintptr_t)ptr + usize), sdiff); post_zeroed = false; } else { @@ -402,7 +404,7 @@ huge_dalloc(tsdn_t *tsdn, void *ptr) ql_remove(&arena->huge, node, ql_link); malloc_mutex_unlock(tsdn, &arena->huge_mtx); - huge_dalloc_junk(tsdn, extent_node_addr_get(node), + huge_dalloc_junk(extent_node_addr_get(node), extent_node_size_get(node)); arena_chunk_dalloc_huge(tsdn, extent_node_arena_get(node), extent_node_addr_get(node), extent_node_size_get(node)); diff --git a/src/jemalloc.c b/src/jemalloc.c index 5d1f4937..38650ff0 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -5,7 +5,11 @@ /* Data. */ /* Runtime configuration options. */ -const char *je_malloc_conf JEMALLOC_ATTR(weak); +const char *je_malloc_conf +#ifndef _WIN32 + JEMALLOC_ATTR(weak) +#endif + ; bool opt_abort = #ifdef JEMALLOC_DEBUG true @@ -85,14 +89,25 @@ enum { }; static uint8_t malloc_slow_flags; -/* Last entry for overflow detection only. */ JEMALLOC_ALIGNED(CACHELINE) -const size_t index2size_tab[NSIZES+1] = { -#define SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup) \ +const size_t pind2sz_tab[NPSIZES] = { +#define PSZ_yes(lg_grp, ndelta, lg_delta) \ + (((ZU(1)<= 2.6 has the CPU_COUNT macro. + * + * glibc's sysconf() uses isspace(). glibc allocates for the first time + * *before* setting up the isspace tables. Therefore we need a + * different method to get the number of CPUs. + */ + { + cpu_set_t set; + + pthread_getaffinity_np(pthread_self(), sizeof(set), &set); + result = CPU_COUNT(&set); + } #else result = sysconf(_SC_NPROCESSORS_ONLN); #endif @@ -1103,8 +1140,7 @@ malloc_conf_init(void) for (i = 0; i < dss_prec_limit; i++) { if (strncmp(dss_prec_names[i], v, vlen) == 0) { - if (chunk_dss_prec_set(NULL, - i)) { + if (chunk_dss_prec_set(i)) { malloc_conf_error( "Error setting dss", k, klen, v, vlen); @@ -1149,9 +1185,20 @@ malloc_conf_init(void) if (config_fill) { if (CONF_MATCH("junk")) { if (CONF_MATCH_VALUE("true")) { - opt_junk = "true"; - opt_junk_alloc = opt_junk_free = - true; + if (config_valgrind && + unlikely(in_valgrind)) { + malloc_conf_error( + "Deallocation-time " + "junk filling cannot " + "be enabled while " + "running inside " + "Valgrind", k, klen, v, + vlen); + } else { + opt_junk = "true"; + opt_junk_alloc = true; + opt_junk_free = true; + } } else if (CONF_MATCH_VALUE("false")) { opt_junk = "false"; opt_junk_alloc = opt_junk_free = @@ -1161,9 +1208,20 @@ malloc_conf_init(void) opt_junk_alloc = true; opt_junk_free = false; } else if (CONF_MATCH_VALUE("free")) { - opt_junk = "free"; - opt_junk_alloc = false; - opt_junk_free = true; + if (config_valgrind && + unlikely(in_valgrind)) { + malloc_conf_error( + "Deallocation-time " + "junk filling cannot " + "be enabled while " + "running inside " + "Valgrind", k, klen, v, + vlen); + } else { + opt_junk = "free"; + opt_junk_alloc = false; + opt_junk_free = true; + } } else { malloc_conf_error( "Invalid conf value", k, @@ -1249,11 +1307,14 @@ malloc_init_hard_needed(void) } #ifdef JEMALLOC_THREADED_INIT if (malloc_initializer != NO_INITIALIZER && !IS_INITIALIZER) { + spin_t spinner; + /* Busy-wait until the initializing thread completes. */ + spin_init(&spinner); do { - malloc_mutex_unlock(NULL, &init_lock); - CPU_SPINWAIT; - malloc_mutex_lock(NULL, &init_lock); + malloc_mutex_unlock(TSDN_NULL, &init_lock); + spin_adaptive(&spinner); + malloc_mutex_lock(TSDN_NULL, &init_lock); } while (!malloc_initialized()); return (false); } @@ -1287,8 +1348,7 @@ malloc_init_hard_a0_locked() return (true); if (config_prof) prof_boot1(); - if (arena_boot()) - return (true); + arena_boot(); if (config_tcache && tcache_boot(TSDN_NULL)) return (true); if (malloc_mutex_init(&arenas_lock, "arenas", WITNESS_RANK_ARENAS)) @@ -1419,7 +1479,7 @@ malloc_init_hard(void) return (true); malloc_mutex_lock(tsd_tsdn(tsd), &init_lock); - if (config_prof && prof_boot2(tsd_tsdn(tsd))) { + if (config_prof && prof_boot2(tsd)) { malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock); return (true); } @@ -1994,6 +2054,29 @@ JEMALLOC_EXPORT void *(*__realloc_hook)(void *ptr, size_t size) = je_realloc; JEMALLOC_EXPORT void *(*__memalign_hook)(size_t alignment, size_t size) = je_memalign; # endif + +#ifdef CPU_COUNT +/* + * To enable static linking with glibc, the libc specific malloc interface must + * be implemented also, so none of glibc's malloc.o functions are added to the + * link. + */ +#define ALIAS(je_fn) __attribute__((alias (#je_fn), used)) +/* To force macro expansion of je_ prefix before stringification. */ +#define PREALIAS(je_fn) ALIAS(je_fn) +void *__libc_malloc(size_t size) PREALIAS(je_malloc); +void __libc_free(void* ptr) PREALIAS(je_free); +void *__libc_realloc(void* ptr, size_t size) PREALIAS(je_realloc); +void *__libc_calloc(size_t n, size_t size) PREALIAS(je_calloc); +void *__libc_memalign(size_t align, size_t s) PREALIAS(je_memalign); +void *__libc_valloc(size_t size) PREALIAS(je_valloc); +int __posix_memalign(void** r, size_t a, size_t s) + PREALIAS(je_posix_memalign); +#undef PREALIAS +#undef ALIAS + +#endif + #endif /* @@ -2747,7 +2830,6 @@ _malloc_prefork(void) } } base_prefork(tsd_tsdn(tsd)); - chunk_prefork(tsd_tsdn(tsd)); for (i = 0; i < narenas; i++) { if ((arena = arena_get(tsd_tsdn(tsd), i, false)) != NULL) arena_prefork3(tsd_tsdn(tsd), arena); @@ -2776,7 +2858,6 @@ _malloc_postfork(void) witness_postfork_parent(tsd); /* Release all mutexes, now that fork() has completed. */ - chunk_postfork_parent(tsd_tsdn(tsd)); base_postfork_parent(tsd_tsdn(tsd)); for (i = 0, narenas = narenas_total_get(); i < narenas; i++) { arena_t *arena; @@ -2801,7 +2882,6 @@ jemalloc_postfork_child(void) witness_postfork_child(tsd); /* Release all mutexes, now that fork() has completed. */ - chunk_postfork_child(tsd_tsdn(tsd)); base_postfork_child(tsd_tsdn(tsd)); for (i = 0, narenas = narenas_total_get(); i < narenas; i++) { arena_t *arena; diff --git a/src/mutex.c b/src/mutex.c index a1fac342..6333e73d 100644 --- a/src/mutex.c +++ b/src/mutex.c @@ -80,6 +80,8 @@ malloc_mutex_init(malloc_mutex_t *mutex, const char *name, witness_rank_t rank) _CRT_SPINCOUNT)) return (true); # endif +#elif (defined(JEMALLOC_OS_UNFAIR_LOCK)) + mutex->lock = OS_UNFAIR_LOCK_INIT; #elif (defined(JEMALLOC_OSSPIN)) mutex->lock = 0; #elif (defined(JEMALLOC_MUTEX_INIT_CB)) diff --git a/src/nstime.c b/src/nstime.c index aad2c260..0948e29f 100644 --- a/src/nstime.c +++ b/src/nstime.c @@ -97,6 +97,76 @@ nstime_divide(const nstime_t *time, const nstime_t *divisor) return (time->ns / divisor->ns); } +#ifdef _WIN32 +# define NSTIME_MONOTONIC true +static void +nstime_get(nstime_t *time) +{ + FILETIME ft; + uint64_t ticks_100ns; + + GetSystemTimeAsFileTime(&ft); + ticks_100ns = (((uint64_t)ft.dwHighDateTime) << 32) | ft.dwLowDateTime; + + nstime_init(time, ticks_100ns * 100); +} +#elif JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE +# define NSTIME_MONOTONIC true +static void +nstime_get(nstime_t *time) +{ + struct timespec ts; + + clock_gettime(CLOCK_MONOTONIC_COARSE, &ts); + nstime_init2(time, ts.tv_sec, ts.tv_nsec); +} +#elif JEMALLOC_HAVE_CLOCK_MONOTONIC +# define NSTIME_MONOTONIC true +static void +nstime_get(nstime_t *time) +{ + struct timespec ts; + + clock_gettime(CLOCK_MONOTONIC, &ts); + nstime_init2(time, ts.tv_sec, ts.tv_nsec); +} +#elif JEMALLOC_HAVE_MACH_ABSOLUTE_TIME +# define NSTIME_MONOTONIC true +static void +nstime_get(nstime_t *time) +{ + + nstime_init(time, mach_absolute_time()); +} +#else +# define NSTIME_MONOTONIC false +static void +nstime_get(nstime_t *time) +{ + struct timeval tv; + + gettimeofday(&tv, NULL); + nstime_init2(time, tv.tv_sec, tv.tv_usec * 1000); +} +#endif + +#ifdef JEMALLOC_JET +#undef nstime_monotonic +#define nstime_monotonic JEMALLOC_N(n_nstime_monotonic) +#endif +bool +nstime_monotonic(void) +{ + + return (NSTIME_MONOTONIC); +#undef NSTIME_MONOTONIC +} +#ifdef JEMALLOC_JET +#undef nstime_monotonic +#define nstime_monotonic JEMALLOC_N(nstime_monotonic) +nstime_monotonic_t *nstime_monotonic = JEMALLOC_N(n_nstime_monotonic); +#endif + #ifdef JEMALLOC_JET #undef nstime_update #define nstime_update JEMALLOC_N(n_nstime_update) @@ -107,33 +177,7 @@ nstime_update(nstime_t *time) nstime_t old_time; nstime_copy(&old_time, time); - -#ifdef _WIN32 - { - FILETIME ft; - uint64_t ticks; - GetSystemTimeAsFileTime(&ft); - ticks = (((uint64_t)ft.dwHighDateTime) << 32) | - ft.dwLowDateTime; - time->ns = ticks * 100; - } -#elif JEMALLOC_CLOCK_GETTIME - { - struct timespec ts; - - if (sysconf(_SC_MONOTONIC_CLOCK) > 0) - clock_gettime(CLOCK_MONOTONIC, &ts); - else - clock_gettime(CLOCK_REALTIME, &ts); - time->ns = ts.tv_sec * BILLION + ts.tv_nsec; - } -#else - { - struct timeval tv; - gettimeofday(&tv, NULL); - time->ns = tv.tv_sec * BILLION + tv.tv_usec * 1000; - } -#endif + nstime_get(time); /* Handle non-monotonic clocks. */ if (unlikely(nstime_compare(&old_time, time) > 0)) { diff --git a/src/pages.c b/src/pages.c index 2a9b7e37..647952ac 100644 --- a/src/pages.c +++ b/src/pages.c @@ -207,6 +207,11 @@ os_overcommits_sysctl(void) #endif #ifdef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY +/* + * Use syscall(2) rather than {open,read,close}(2) when possible to avoid + * reentry during bootstrapping if another library has interposed system call + * wrappers. + */ static bool os_overcommits_proc(void) { @@ -214,11 +219,26 @@ os_overcommits_proc(void) char buf[1]; ssize_t nread; +#if defined(JEMALLOC_HAVE_SYSCALL) && defined(SYS_open) + fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY); +#else fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY); +#endif if (fd == -1) return (false); /* Error. */ +#if defined(JEMALLOC_HAVE_SYSCALL) && defined(SYS_read) + nread = (ssize_t)syscall(SYS_read, fd, &buf, sizeof(buf)); +#else nread = read(fd, &buf, sizeof(buf)); +#endif + +#if defined(JEMALLOC_HAVE_SYSCALL) && defined(SYS_close) + syscall(SYS_close, fd); +#else + close(fd); +#endif + if (nread < 1) return (false); /* Error. */ /* diff --git a/src/prof.c b/src/prof.c index c1f58d46..140d5b22 100644 --- a/src/prof.c +++ b/src/prof.c @@ -125,7 +125,7 @@ static bool prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx); static void prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx); static bool prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata, bool even_if_attached); -static void prof_tdata_destroy(tsdn_t *tsdn, prof_tdata_t *tdata, +static void prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata, bool even_if_attached); static char *prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name); @@ -591,7 +591,7 @@ prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx, assert(gctx->nlimbo != 0); if (tctx_tree_empty(&gctx->tctxs) && gctx->nlimbo == 1) { /* Remove gctx from bt2gctx. */ - if (ckh_remove(tsd_tsdn(tsd), &bt2gctx, &gctx->bt, NULL, NULL)) + if (ckh_remove(tsd, &bt2gctx, &gctx->bt, NULL, NULL)) not_reached(); prof_leave(tsd, tdata_self); /* Destroy gctx. */ @@ -651,7 +651,7 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) assert(tctx->cnts.accumobjs == 0); assert(tctx->cnts.accumbytes == 0); - ckh_remove(tsd_tsdn(tsd), &tdata->bt2tctx, &gctx->bt, NULL, NULL); + ckh_remove(tsd, &tdata->bt2tctx, &gctx->bt, NULL, NULL); destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata, false); malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock); @@ -704,7 +704,7 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) malloc_mutex_assert_not_owner(tsd_tsdn(tsd), tctx->tdata->lock); if (destroy_tdata) - prof_tdata_destroy(tsd_tsdn(tsd), tdata, false); + prof_tdata_destroy(tsd, tdata, false); if (destroy_tctx) idalloctm(tsd_tsdn(tsd), tctx, NULL, true, true); @@ -733,7 +733,7 @@ prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata, return (true); } btkey.p = &gctx.p->bt; - if (ckh_insert(tsd_tsdn(tsd), &bt2gctx, btkey.v, gctx.v)) { + if (ckh_insert(tsd, &bt2gctx, btkey.v, gctx.v)) { /* OOM. */ prof_leave(tsd, tdata); idalloctm(tsd_tsdn(tsd), gctx.v, NULL, true, true); @@ -795,7 +795,7 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) /* Link a prof_tctx_t into gctx for this thread. */ ret.v = iallocztm(tsd_tsdn(tsd), sizeof(prof_tctx_t), size2index(sizeof(prof_tctx_t)), false, NULL, true, - arena_ichoose(tsd_tsdn(tsd), NULL), true); + arena_ichoose(tsd, NULL), true); if (ret.p == NULL) { if (new_gctx) prof_gctx_try_destroy(tsd, tdata, gctx, tdata); @@ -810,8 +810,7 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) ret.p->prepared = true; ret.p->state = prof_tctx_state_initializing; malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock); - error = ckh_insert(tsd_tsdn(tsd), &tdata->bt2tctx, btkey, - ret.v); + error = ckh_insert(tsd, &tdata->bt2tctx, btkey, ret.v); malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock); if (error) { if (new_gctx) @@ -1791,7 +1790,7 @@ prof_thr_uid_alloc(tsdn_t *tsdn) } static prof_tdata_t * -prof_tdata_init_impl(tsdn_t *tsdn, uint64_t thr_uid, uint64_t thr_discrim, +prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim, char *thread_name, bool active) { prof_tdata_t *tdata; @@ -1799,7 +1798,7 @@ prof_tdata_init_impl(tsdn_t *tsdn, uint64_t thr_uid, uint64_t thr_discrim, cassert(config_prof); /* Initialize an empty cache for this thread. */ - tdata = (prof_tdata_t *)iallocztm(tsdn, sizeof(prof_tdata_t), + tdata = (prof_tdata_t *)iallocztm(tsd_tsdn(tsd), sizeof(prof_tdata_t), size2index(sizeof(prof_tdata_t)), false, NULL, true, arena_get(TSDN_NULL, 0, true), true); if (tdata == NULL) @@ -1813,9 +1812,9 @@ prof_tdata_init_impl(tsdn_t *tsdn, uint64_t thr_uid, uint64_t thr_discrim, tdata->expired = false; tdata->tctx_uid_next = 0; - if (ckh_new(tsdn, &tdata->bt2tctx, PROF_CKH_MINITEMS, - prof_bt_hash, prof_bt_keycomp)) { - idalloctm(tsdn, tdata, NULL, true, true); + if (ckh_new(tsd, &tdata->bt2tctx, PROF_CKH_MINITEMS, prof_bt_hash, + prof_bt_keycomp)) { + idalloctm(tsd_tsdn(tsd), tdata, NULL, true, true); return (NULL); } @@ -1829,19 +1828,19 @@ prof_tdata_init_impl(tsdn_t *tsdn, uint64_t thr_uid, uint64_t thr_discrim, tdata->dumping = false; tdata->active = active; - malloc_mutex_lock(tsdn, &tdatas_mtx); + malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx); tdata_tree_insert(&tdatas, tdata); - malloc_mutex_unlock(tsdn, &tdatas_mtx); + malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx); return (tdata); } prof_tdata_t * -prof_tdata_init(tsdn_t *tsdn) +prof_tdata_init(tsd_t *tsd) { - return (prof_tdata_init_impl(tsdn, prof_thr_uid_alloc(tsdn), 0, NULL, - prof_thread_active_init_get(tsdn))); + return (prof_tdata_init_impl(tsd, prof_thr_uid_alloc(tsd_tsdn(tsd)), 0, + NULL, prof_thread_active_init_get(tsd_tsdn(tsd)))); } static bool @@ -1866,31 +1865,29 @@ prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata, } static void -prof_tdata_destroy_locked(tsdn_t *tsdn, prof_tdata_t *tdata, +prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata, bool even_if_attached) { - malloc_mutex_assert_owner(tsdn, &tdatas_mtx); - - assert(tsdn_null(tsdn) || tsd_prof_tdata_get(tsdn_tsd(tsdn)) != tdata); + malloc_mutex_assert_owner(tsd_tsdn(tsd), &tdatas_mtx); tdata_tree_remove(&tdatas, tdata); assert(prof_tdata_should_destroy_unlocked(tdata, even_if_attached)); if (tdata->thread_name != NULL) - idalloctm(tsdn, tdata->thread_name, NULL, true, true); - ckh_delete(tsdn, &tdata->bt2tctx); - idalloctm(tsdn, tdata, NULL, true, true); + idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, true, true); + ckh_delete(tsd, &tdata->bt2tctx); + idalloctm(tsd_tsdn(tsd), tdata, NULL, true, true); } static void -prof_tdata_destroy(tsdn_t *tsdn, prof_tdata_t *tdata, bool even_if_attached) +prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata, bool even_if_attached) { - malloc_mutex_lock(tsdn, &tdatas_mtx); - prof_tdata_destroy_locked(tsdn, tdata, even_if_attached); - malloc_mutex_unlock(tsdn, &tdatas_mtx); + malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx); + prof_tdata_destroy_locked(tsd, tdata, even_if_attached); + malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx); } static void @@ -1913,7 +1910,7 @@ prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata) destroy_tdata = false; malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock); if (destroy_tdata) - prof_tdata_destroy(tsd_tsdn(tsd), tdata, true); + prof_tdata_destroy(tsd, tdata, true); } prof_tdata_t * @@ -1926,8 +1923,8 @@ prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata) bool active = tdata->active; prof_tdata_detach(tsd, tdata); - return (prof_tdata_init_impl(tsd_tsdn(tsd), thr_uid, thr_discrim, - thread_name, active)); + return (prof_tdata_init_impl(tsd, thr_uid, thr_discrim, thread_name, + active)); } static bool @@ -1956,30 +1953,30 @@ prof_tdata_reset_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, void *arg) } void -prof_reset(tsdn_t *tsdn, size_t lg_sample) +prof_reset(tsd_t *tsd, size_t lg_sample) { prof_tdata_t *next; assert(lg_sample < (sizeof(uint64_t) << 3)); - malloc_mutex_lock(tsdn, &prof_dump_mtx); - malloc_mutex_lock(tsdn, &tdatas_mtx); + malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx); + malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx); lg_prof_sample = lg_sample; next = NULL; do { prof_tdata_t *to_destroy = tdata_tree_iter(&tdatas, next, - prof_tdata_reset_iter, (void *)tsdn); + prof_tdata_reset_iter, (void *)tsd); if (to_destroy != NULL) { next = tdata_tree_next(&tdatas, to_destroy); - prof_tdata_destroy_locked(tsdn, to_destroy, false); + prof_tdata_destroy_locked(tsd, to_destroy, false); } else next = NULL; } while (next != NULL); - malloc_mutex_unlock(tsdn, &tdatas_mtx); - malloc_mutex_unlock(tsdn, &prof_dump_mtx); + malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx); + malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx); } void @@ -2189,7 +2186,7 @@ prof_boot1(void) } bool -prof_boot2(tsdn_t *tsdn) +prof_boot2(tsd_t *tsd) { cassert(config_prof); @@ -2215,7 +2212,7 @@ prof_boot2(tsdn_t *tsdn) WITNESS_RANK_PROF_THREAD_ACTIVE_INIT)) return (true); - if (ckh_new(tsdn, &bt2gctx, PROF_CKH_MINITEMS, prof_bt_hash, + if (ckh_new(tsd, &bt2gctx, PROF_CKH_MINITEMS, prof_bt_hash, prof_bt_keycomp)) return (true); if (malloc_mutex_init(&bt2gctx_mtx, "prof_bt2gctx", @@ -2246,8 +2243,8 @@ prof_boot2(tsdn_t *tsdn) abort(); } - gctx_locks = (malloc_mutex_t *)base_alloc(tsdn, PROF_NCTX_LOCKS - * sizeof(malloc_mutex_t)); + gctx_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd), + PROF_NCTX_LOCKS * sizeof(malloc_mutex_t)); if (gctx_locks == NULL) return (true); for (i = 0; i < PROF_NCTX_LOCKS; i++) { @@ -2256,7 +2253,7 @@ prof_boot2(tsdn_t *tsdn) return (true); } - tdata_locks = (malloc_mutex_t *)base_alloc(tsdn, + tdata_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd), PROF_NTDATA_LOCKS * sizeof(malloc_mutex_t)); if (tdata_locks == NULL) return (true); diff --git a/src/rtree.c b/src/rtree.c index 3166b45f..f2e2997d 100644 --- a/src/rtree.c +++ b/src/rtree.c @@ -96,12 +96,15 @@ rtree_node_init(rtree_t *rtree, unsigned level, rtree_node_elm_t **elmp) rtree_node_elm_t *node; if (atomic_cas_p((void **)elmp, NULL, RTREE_NODE_INITIALIZING)) { + spin_t spinner; + /* * Another thread is already in the process of initializing. * Spin-wait until initialization is complete. */ + spin_init(&spinner); do { - CPU_SPINWAIT; + spin_adaptive(&spinner); node = atomic_read_p((void **)elmp); } while (node == RTREE_NODE_INITIALIZING); } else { @@ -125,5 +128,5 @@ rtree_node_elm_t * rtree_child_read_hard(rtree_t *rtree, rtree_node_elm_t *elm, unsigned level) { - return (rtree_node_init(rtree, level, &elm->child)); + return (rtree_node_init(rtree, level+1, &elm->child)); } diff --git a/src/spin.c b/src/spin.c new file mode 100644 index 00000000..5242d95a --- /dev/null +++ b/src/spin.c @@ -0,0 +1,2 @@ +#define JEMALLOC_SPIN_C_ +#include "jemalloc/internal/jemalloc_internal.h" diff --git a/src/stats.c b/src/stats.c index 073be4fe..bd8af399 100644 --- a/src/stats.c +++ b/src/stats.c @@ -32,86 +32,107 @@ bool opt_stats_print = false; size_t stats_cactive = 0; -/******************************************************************************/ -/* Function prototypes for non-inline static functions. */ - -static void stats_arena_bins_print(void (*write_cb)(void *, const char *), - void *cbopaque, unsigned i); -static void stats_arena_lruns_print(void (*write_cb)(void *, const char *), - void *cbopaque, unsigned i); -static void stats_arena_hchunks_print( - void (*write_cb)(void *, const char *), void *cbopaque, unsigned i); -static void stats_arena_print(void (*write_cb)(void *, const char *), - void *cbopaque, unsigned i, bool bins, bool large, bool huge); - /******************************************************************************/ static void stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, - unsigned i) + bool json, bool large, bool huge, unsigned i) { size_t page; - bool config_tcache, in_gap; + bool config_tcache, in_gap, in_gap_prev; unsigned nbins, j; CTL_GET("arenas.page", &page, size_t); - CTL_GET("config.tcache", &config_tcache, bool); - if (config_tcache) { - malloc_cprintf(write_cb, cbopaque, - "bins: size ind allocated nmalloc" - " ndalloc nrequests curregs curruns regs" - " pgs util nfills nflushes newruns" - " reruns\n"); - } else { - malloc_cprintf(write_cb, cbopaque, - "bins: size ind allocated nmalloc" - " ndalloc nrequests curregs curruns regs" - " pgs util newruns reruns\n"); - } CTL_GET("arenas.nbins", &nbins, unsigned); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\"bins\": [\n"); + } else { + CTL_GET("config.tcache", &config_tcache, bool); + if (config_tcache) { + malloc_cprintf(write_cb, cbopaque, + "bins: size ind allocated nmalloc" + " ndalloc nrequests curregs" + " curruns regs pgs util nfills" + " nflushes newruns reruns\n"); + } else { + malloc_cprintf(write_cb, cbopaque, + "bins: size ind allocated nmalloc" + " ndalloc nrequests curregs" + " curruns regs pgs util newruns" + " reruns\n"); + } + } for (j = 0, in_gap = false; j < nbins; j++) { uint64_t nruns; + size_t reg_size, run_size, curregs; + size_t curruns; + uint32_t nregs; + uint64_t nmalloc, ndalloc, nrequests, nfills, nflushes; + uint64_t nreruns; CTL_M2_M4_GET("stats.arenas.0.bins.0.nruns", i, j, &nruns, uint64_t); - if (nruns == 0) - in_gap = true; - else { - size_t reg_size, run_size, curregs, availregs, milli; - size_t curruns; - uint32_t nregs; - uint64_t nmalloc, ndalloc, nrequests, nfills, nflushes; - uint64_t reruns; - char util[6]; /* "x.yyy". */ + in_gap_prev = in_gap; + in_gap = (nruns == 0); - if (in_gap) { - malloc_cprintf(write_cb, cbopaque, - " ---\n"); - in_gap = false; - } - CTL_M2_GET("arenas.bin.0.size", j, ®_size, size_t); - CTL_M2_GET("arenas.bin.0.nregs", j, &nregs, uint32_t); - CTL_M2_GET("arenas.bin.0.run_size", j, &run_size, - size_t); - CTL_M2_M4_GET("stats.arenas.0.bins.0.nmalloc", i, j, - &nmalloc, uint64_t); - CTL_M2_M4_GET("stats.arenas.0.bins.0.ndalloc", i, j, - &ndalloc, uint64_t); - CTL_M2_M4_GET("stats.arenas.0.bins.0.curregs", i, j, - &curregs, size_t); - CTL_M2_M4_GET("stats.arenas.0.bins.0.nrequests", i, j, - &nrequests, uint64_t); + if (!json && in_gap_prev && !in_gap) { + malloc_cprintf(write_cb, cbopaque, + " ---\n"); + } + + CTL_M2_GET("arenas.bin.0.size", j, ®_size, size_t); + CTL_M2_GET("arenas.bin.0.nregs", j, &nregs, uint32_t); + CTL_M2_GET("arenas.bin.0.run_size", j, &run_size, size_t); + + CTL_M2_M4_GET("stats.arenas.0.bins.0.nmalloc", i, j, &nmalloc, + uint64_t); + CTL_M2_M4_GET("stats.arenas.0.bins.0.ndalloc", i, j, &ndalloc, + uint64_t); + CTL_M2_M4_GET("stats.arenas.0.bins.0.curregs", i, j, &curregs, + size_t); + CTL_M2_M4_GET("stats.arenas.0.bins.0.nrequests", i, j, + &nrequests, uint64_t); + if (config_tcache) { + CTL_M2_M4_GET("stats.arenas.0.bins.0.nfills", i, j, + &nfills, uint64_t); + CTL_M2_M4_GET("stats.arenas.0.bins.0.nflushes", i, j, + &nflushes, uint64_t); + } + CTL_M2_M4_GET("stats.arenas.0.bins.0.nreruns", i, j, &nreruns, + uint64_t); + CTL_M2_M4_GET("stats.arenas.0.bins.0.curruns", i, j, &curruns, + size_t); + + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t{\n" + "\t\t\t\t\t\t\"nmalloc\": %"FMTu64",\n" + "\t\t\t\t\t\t\"ndalloc\": %"FMTu64",\n" + "\t\t\t\t\t\t\"curregs\": %zu,\n" + "\t\t\t\t\t\t\"nrequests\": %"FMTu64",\n", + nmalloc, + ndalloc, + curregs, + nrequests); if (config_tcache) { - CTL_M2_M4_GET("stats.arenas.0.bins.0.nfills", i, - j, &nfills, uint64_t); - CTL_M2_M4_GET("stats.arenas.0.bins.0.nflushes", - i, j, &nflushes, uint64_t); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\t\"nfills\": %"FMTu64",\n" + "\t\t\t\t\t\t\"nflushes\": %"FMTu64",\n", + nfills, + nflushes); } - CTL_M2_M4_GET("stats.arenas.0.bins.0.nreruns", i, j, - &reruns, uint64_t); - CTL_M2_M4_GET("stats.arenas.0.bins.0.curruns", i, j, - &curruns, size_t); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\t\"nreruns\": %"FMTu64",\n" + "\t\t\t\t\t\t\"curruns\": %zu\n" + "\t\t\t\t\t}%s\n", + nreruns, + curruns, + (j + 1 < nbins) ? "," : ""); + } else if (!in_gap) { + size_t availregs, milli; + char util[6]; /* "x.yyy". */ availregs = nregs * curruns; milli = (availregs != 0) ? (1000 * curregs) / availregs @@ -138,7 +159,7 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, reg_size, j, curregs * reg_size, nmalloc, ndalloc, nrequests, curregs, curruns, nregs, run_size / page, util, nfills, nflushes, - nruns, reruns); + nruns, nreruns); } else { malloc_cprintf(write_cb, cbopaque, "%20zu %3u %12zu %12"FMTu64 @@ -147,28 +168,38 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, " %12"FMTu64"\n", reg_size, j, curregs * reg_size, nmalloc, ndalloc, nrequests, curregs, curruns, nregs, - run_size / page, util, nruns, reruns); + run_size / page, util, nruns, nreruns); } } } - if (in_gap) { + if (json) { malloc_cprintf(write_cb, cbopaque, - " ---\n"); + "\t\t\t\t]%s\n", (large || huge) ? "," : ""); + } else { + if (in_gap) { + malloc_cprintf(write_cb, cbopaque, + " ---\n"); + } } } static void stats_arena_lruns_print(void (*write_cb)(void *, const char *), void *cbopaque, - unsigned i) + bool json, bool huge, unsigned i) { unsigned nbins, nlruns, j; - bool in_gap; + bool in_gap, in_gap_prev; - malloc_cprintf(write_cb, cbopaque, - "large: size ind allocated nmalloc ndalloc" - " nrequests curruns\n"); CTL_GET("arenas.nbins", &nbins, unsigned); CTL_GET("arenas.nlruns", &nlruns, unsigned); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\"lruns\": [\n"); + } else { + malloc_cprintf(write_cb, cbopaque, + "large: size ind allocated nmalloc" + " ndalloc nrequests curruns\n"); + } for (j = 0, in_gap = false; j < nlruns; j++) { uint64_t nmalloc, ndalloc, nrequests; size_t run_size, curruns; @@ -179,17 +210,25 @@ stats_arena_lruns_print(void (*write_cb)(void *, const char *), void *cbopaque, uint64_t); CTL_M2_M4_GET("stats.arenas.0.lruns.0.nrequests", i, j, &nrequests, uint64_t); - if (nrequests == 0) - in_gap = true; - else { - CTL_M2_GET("arenas.lrun.0.size", j, &run_size, size_t); - CTL_M2_M4_GET("stats.arenas.0.lruns.0.curruns", i, j, - &curruns, size_t); - if (in_gap) { - malloc_cprintf(write_cb, cbopaque, - " ---\n"); - in_gap = false; - } + in_gap_prev = in_gap; + in_gap = (nrequests == 0); + + if (!json && in_gap_prev && !in_gap) { + malloc_cprintf(write_cb, cbopaque, + " ---\n"); + } + + CTL_M2_GET("arenas.lrun.0.size", j, &run_size, size_t); + CTL_M2_M4_GET("stats.arenas.0.lruns.0.curruns", i, j, &curruns, + size_t); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t{\n" + "\t\t\t\t\t\t\"curruns\": %zu\n" + "\t\t\t\t\t}%s\n", + curruns, + (j + 1 < nlruns) ? "," : ""); + } else if (!in_gap) { malloc_cprintf(write_cb, cbopaque, "%20zu %3u %12zu %12"FMTu64" %12"FMTu64 " %12"FMTu64" %12zu\n", @@ -197,25 +236,35 @@ stats_arena_lruns_print(void (*write_cb)(void *, const char *), void *cbopaque, ndalloc, nrequests, curruns); } } - if (in_gap) { + if (json) { malloc_cprintf(write_cb, cbopaque, - " ---\n"); + "\t\t\t\t]%s\n", huge ? "," : ""); + } else { + if (in_gap) { + malloc_cprintf(write_cb, cbopaque, + " ---\n"); + } } } static void stats_arena_hchunks_print(void (*write_cb)(void *, const char *), - void *cbopaque, unsigned i) + void *cbopaque, bool json, unsigned i) { unsigned nbins, nlruns, nhchunks, j; - bool in_gap; + bool in_gap, in_gap_prev; - malloc_cprintf(write_cb, cbopaque, - "huge: size ind allocated nmalloc ndalloc" - " nrequests curhchunks\n"); CTL_GET("arenas.nbins", &nbins, unsigned); CTL_GET("arenas.nlruns", &nlruns, unsigned); CTL_GET("arenas.nhchunks", &nhchunks, unsigned); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\"hchunks\": [\n"); + } else { + malloc_cprintf(write_cb, cbopaque, + "huge: size ind allocated nmalloc" + " ndalloc nrequests curhchunks\n"); + } for (j = 0, in_gap = false; j < nhchunks; j++) { uint64_t nmalloc, ndalloc, nrequests; size_t hchunk_size, curhchunks; @@ -226,18 +275,25 @@ stats_arena_hchunks_print(void (*write_cb)(void *, const char *), &ndalloc, uint64_t); CTL_M2_M4_GET("stats.arenas.0.hchunks.0.nrequests", i, j, &nrequests, uint64_t); - if (nrequests == 0) - in_gap = true; - else { - CTL_M2_GET("arenas.hchunk.0.size", j, &hchunk_size, - size_t); - CTL_M2_M4_GET("stats.arenas.0.hchunks.0.curhchunks", i, - j, &curhchunks, size_t); - if (in_gap) { - malloc_cprintf(write_cb, cbopaque, - " ---\n"); - in_gap = false; - } + in_gap_prev = in_gap; + in_gap = (nrequests == 0); + + if (!json && in_gap_prev && !in_gap) { + malloc_cprintf(write_cb, cbopaque, + " ---\n"); + } + + CTL_M2_GET("arenas.hchunk.0.size", j, &hchunk_size, size_t); + CTL_M2_M4_GET("stats.arenas.0.hchunks.0.curhchunks", i, j, + &curhchunks, size_t); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t{\n" + "\t\t\t\t\t\t\"curhchunks\": %zu\n" + "\t\t\t\t\t}%s\n", + curhchunks, + (j + 1 < nhchunks) ? "," : ""); + } else if (!in_gap) { malloc_cprintf(write_cb, cbopaque, "%20zu %3u %12zu %12"FMTu64" %12"FMTu64 " %12"FMTu64" %12zu\n", @@ -246,15 +302,20 @@ stats_arena_hchunks_print(void (*write_cb)(void *, const char *), nrequests, curhchunks); } } - if (in_gap) { + if (json) { malloc_cprintf(write_cb, cbopaque, - " ---\n"); + "\t\t\t\t]\n"); + } else { + if (in_gap) { + malloc_cprintf(write_cb, cbopaque, + " ---\n"); + } } } static void stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, - unsigned i, bool bins, bool large, bool huge) + bool json, unsigned i, bool bins, bool large, bool huge) { unsigned nthreads; const char *dss; @@ -272,100 +333,731 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, CTL_GET("arenas.page", &page, size_t); CTL_M2_GET("stats.arenas.0.nthreads", i, &nthreads, unsigned); - malloc_cprintf(write_cb, cbopaque, - "assigned threads: %u\n", nthreads); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\"nthreads\": %u,\n", nthreads); + } else { + malloc_cprintf(write_cb, cbopaque, + "assigned threads: %u\n", nthreads); + } + CTL_M2_GET("stats.arenas.0.dss", i, &dss, const char *); - malloc_cprintf(write_cb, cbopaque, "dss allocation precedence: %s\n", - dss); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\"dss\": \"%s\",\n", dss); + } else { + malloc_cprintf(write_cb, cbopaque, + "dss allocation precedence: %s\n", dss); + } + CTL_M2_GET("stats.arenas.0.lg_dirty_mult", i, &lg_dirty_mult, ssize_t); - if (opt_purge == purge_mode_ratio) { - if (lg_dirty_mult >= 0) { - malloc_cprintf(write_cb, cbopaque, - "min active:dirty page ratio: %u:1\n", - (1U << lg_dirty_mult)); - } else { - malloc_cprintf(write_cb, cbopaque, - "min active:dirty page ratio: N/A\n"); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\"lg_dirty_mult\": %zd,\n", lg_dirty_mult); + } else { + if (opt_purge == purge_mode_ratio) { + if (lg_dirty_mult >= 0) { + malloc_cprintf(write_cb, cbopaque, + "min active:dirty page ratio: %u:1\n", + (1U << lg_dirty_mult)); + } else { + malloc_cprintf(write_cb, cbopaque, + "min active:dirty page ratio: N/A\n"); + } } } + CTL_M2_GET("stats.arenas.0.decay_time", i, &decay_time, ssize_t); - if (opt_purge == purge_mode_decay) { - if (decay_time >= 0) { - malloc_cprintf(write_cb, cbopaque, "decay time: %zd\n", - decay_time); - } else - malloc_cprintf(write_cb, cbopaque, "decay time: N/A\n"); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\"decay_time\": %zd,\n", decay_time); + } else { + if (opt_purge == purge_mode_decay) { + if (decay_time >= 0) { + malloc_cprintf(write_cb, cbopaque, + "decay time: %zd\n", decay_time); + } else { + malloc_cprintf(write_cb, cbopaque, + "decay time: N/A\n"); + } + } } + CTL_M2_GET("stats.arenas.0.pactive", i, &pactive, size_t); CTL_M2_GET("stats.arenas.0.pdirty", i, &pdirty, size_t); CTL_M2_GET("stats.arenas.0.npurge", i, &npurge, uint64_t); CTL_M2_GET("stats.arenas.0.nmadvise", i, &nmadvise, uint64_t); CTL_M2_GET("stats.arenas.0.purged", i, &purged, uint64_t); - malloc_cprintf(write_cb, cbopaque, - "purging: dirty: %zu, sweeps: %"FMTu64", madvises: %"FMTu64", " - "purged: %"FMTu64"\n", pdirty, npurge, nmadvise, purged); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\"pactive\": %zu,\n", pactive); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\"pdirty\": %zu,\n", pdirty); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\"npurge\": %"FMTu64",\n", npurge); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\"nmadvise\": %"FMTu64",\n", nmadvise); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\"purged\": %"FMTu64",\n", purged); + } else { + malloc_cprintf(write_cb, cbopaque, + "purging: dirty: %zu, sweeps: %"FMTu64", madvises: %"FMTu64 + ", purged: %"FMTu64"\n", pdirty, npurge, nmadvise, purged); + } - malloc_cprintf(write_cb, cbopaque, - " allocated nmalloc ndalloc" - " nrequests\n"); CTL_M2_GET("stats.arenas.0.small.allocated", i, &small_allocated, size_t); CTL_M2_GET("stats.arenas.0.small.nmalloc", i, &small_nmalloc, uint64_t); CTL_M2_GET("stats.arenas.0.small.ndalloc", i, &small_ndalloc, uint64_t); CTL_M2_GET("stats.arenas.0.small.nrequests", i, &small_nrequests, uint64_t); - malloc_cprintf(write_cb, cbopaque, - "small: %12zu %12"FMTu64" %12"FMTu64 - " %12"FMTu64"\n", - small_allocated, small_nmalloc, small_ndalloc, small_nrequests); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\"small\": {\n"); + + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\"allocated\": %zu,\n", small_allocated); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\"nmalloc\": %"FMTu64",\n", small_nmalloc); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\"ndalloc\": %"FMTu64",\n", small_ndalloc); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\"nrequests\": %"FMTu64"\n", small_nrequests); + + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t},\n"); + } else { + malloc_cprintf(write_cb, cbopaque, + " allocated nmalloc" + " ndalloc nrequests\n"); + malloc_cprintf(write_cb, cbopaque, + "small: %12zu %12"FMTu64" %12"FMTu64 + " %12"FMTu64"\n", + small_allocated, small_nmalloc, small_ndalloc, + small_nrequests); + } + CTL_M2_GET("stats.arenas.0.large.allocated", i, &large_allocated, size_t); CTL_M2_GET("stats.arenas.0.large.nmalloc", i, &large_nmalloc, uint64_t); CTL_M2_GET("stats.arenas.0.large.ndalloc", i, &large_ndalloc, uint64_t); CTL_M2_GET("stats.arenas.0.large.nrequests", i, &large_nrequests, uint64_t); - malloc_cprintf(write_cb, cbopaque, - "large: %12zu %12"FMTu64" %12"FMTu64 - " %12"FMTu64"\n", - large_allocated, large_nmalloc, large_ndalloc, large_nrequests); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\"large\": {\n"); + + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\"allocated\": %zu,\n", large_allocated); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\"nmalloc\": %"FMTu64",\n", large_nmalloc); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\"ndalloc\": %"FMTu64",\n", large_ndalloc); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\"nrequests\": %"FMTu64"\n", large_nrequests); + + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t},\n"); + } else { + malloc_cprintf(write_cb, cbopaque, + "large: %12zu %12"FMTu64" %12"FMTu64 + " %12"FMTu64"\n", + large_allocated, large_nmalloc, large_ndalloc, + large_nrequests); + } + CTL_M2_GET("stats.arenas.0.huge.allocated", i, &huge_allocated, size_t); CTL_M2_GET("stats.arenas.0.huge.nmalloc", i, &huge_nmalloc, uint64_t); CTL_M2_GET("stats.arenas.0.huge.ndalloc", i, &huge_ndalloc, uint64_t); CTL_M2_GET("stats.arenas.0.huge.nrequests", i, &huge_nrequests, uint64_t); - malloc_cprintf(write_cb, cbopaque, - "huge: %12zu %12"FMTu64" %12"FMTu64 - " %12"FMTu64"\n", - huge_allocated, huge_nmalloc, huge_ndalloc, huge_nrequests); - malloc_cprintf(write_cb, cbopaque, - "total: %12zu %12"FMTu64" %12"FMTu64 - " %12"FMTu64"\n", - small_allocated + large_allocated + huge_allocated, - small_nmalloc + large_nmalloc + huge_nmalloc, - small_ndalloc + large_ndalloc + huge_ndalloc, - small_nrequests + large_nrequests + huge_nrequests); - malloc_cprintf(write_cb, cbopaque, - "active: %12zu\n", pactive * page); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\"huge\": {\n"); + + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\"allocated\": %zu,\n", huge_allocated); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\"nmalloc\": %"FMTu64",\n", huge_nmalloc); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\"ndalloc\": %"FMTu64",\n", huge_ndalloc); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\"nrequests\": %"FMTu64"\n", huge_nrequests); + + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t},\n"); + } else { + malloc_cprintf(write_cb, cbopaque, + "huge: %12zu %12"FMTu64" %12"FMTu64 + " %12"FMTu64"\n", + huge_allocated, huge_nmalloc, huge_ndalloc, huge_nrequests); + malloc_cprintf(write_cb, cbopaque, + "total: %12zu %12"FMTu64" %12"FMTu64 + " %12"FMTu64"\n", + small_allocated + large_allocated + huge_allocated, + small_nmalloc + large_nmalloc + huge_nmalloc, + small_ndalloc + large_ndalloc + huge_ndalloc, + small_nrequests + large_nrequests + huge_nrequests); + } + if (!json) { + malloc_cprintf(write_cb, cbopaque, + "active: %12zu\n", pactive * page); + } + CTL_M2_GET("stats.arenas.0.mapped", i, &mapped, size_t); - malloc_cprintf(write_cb, cbopaque, - "mapped: %12zu\n", mapped); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\"mapped\": %zu,\n", mapped); + } else { + malloc_cprintf(write_cb, cbopaque, + "mapped: %12zu\n", mapped); + } + CTL_M2_GET("stats.arenas.0.retained", i, &retained, size_t); - malloc_cprintf(write_cb, cbopaque, - "retained: %12zu\n", retained); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\"retained\": %zu,\n", retained); + } else { + malloc_cprintf(write_cb, cbopaque, + "retained: %12zu\n", retained); + } + CTL_M2_GET("stats.arenas.0.metadata.mapped", i, &metadata_mapped, size_t); CTL_M2_GET("stats.arenas.0.metadata.allocated", i, &metadata_allocated, size_t); - malloc_cprintf(write_cb, cbopaque, - "metadata: mapped: %zu, allocated: %zu\n", - metadata_mapped, metadata_allocated); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\"metadata\": {\n"); - if (bins) - stats_arena_bins_print(write_cb, cbopaque, i); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\"mapped\": %zu,\n", metadata_mapped); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\"allocated\": %zu\n", metadata_allocated); + + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t},\n"); + } else { + malloc_cprintf(write_cb, cbopaque, + "metadata: mapped: %zu, allocated: %zu\n", + metadata_mapped, metadata_allocated); + } + + if (bins) { + stats_arena_bins_print(write_cb, cbopaque, json, large, huge, + i); + } if (large) - stats_arena_lruns_print(write_cb, cbopaque, i); + stats_arena_lruns_print(write_cb, cbopaque, json, huge, i); if (huge) - stats_arena_hchunks_print(write_cb, cbopaque, i); + stats_arena_hchunks_print(write_cb, cbopaque, json, i); +} + +static void +stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque, + bool json, bool merged, bool unmerged) +{ + const char *cpv; + bool bv; + unsigned uv; + uint32_t u32v; + uint64_t u64v; + ssize_t ssv; + size_t sv, bsz, usz, ssz, sssz, cpsz; + + bsz = sizeof(bool); + usz = sizeof(unsigned); + ssz = sizeof(size_t); + sssz = sizeof(ssize_t); + cpsz = sizeof(const char *); + + CTL_GET("version", &cpv, const char *); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\"version\": \"%s\",\n", cpv); + } else + malloc_cprintf(write_cb, cbopaque, "Version: %s\n", cpv); + + /* config. */ +#define CONFIG_WRITE_BOOL_JSON(n, c) \ + if (json) { \ + CTL_GET("config."#n, &bv, bool); \ + malloc_cprintf(write_cb, cbopaque, \ + "\t\t\t\""#n"\": %s%s\n", bv ? "true" : "false", \ + (c)); \ + } + + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\"config\": {\n"); + } + + CONFIG_WRITE_BOOL_JSON(cache_oblivious, ",") + + CTL_GET("config.debug", &bv, bool); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"debug\": %s,\n", bv ? "true" : "false"); + } else { + malloc_cprintf(write_cb, cbopaque, "Assertions %s\n", + bv ? "enabled" : "disabled"); + } + + CONFIG_WRITE_BOOL_JSON(fill, ",") + CONFIG_WRITE_BOOL_JSON(lazy_lock, ",") + + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"malloc_conf\": \"%s\",\n", + config_malloc_conf); + } else { + malloc_cprintf(write_cb, cbopaque, + "config.malloc_conf: \"%s\"\n", config_malloc_conf); + } + + CONFIG_WRITE_BOOL_JSON(munmap, ",") + CONFIG_WRITE_BOOL_JSON(prof, ",") + CONFIG_WRITE_BOOL_JSON(prof_libgcc, ",") + CONFIG_WRITE_BOOL_JSON(prof_libunwind, ",") + CONFIG_WRITE_BOOL_JSON(stats, ",") + CONFIG_WRITE_BOOL_JSON(tcache, ",") + CONFIG_WRITE_BOOL_JSON(tls, ",") + CONFIG_WRITE_BOOL_JSON(utrace, ",") + CONFIG_WRITE_BOOL_JSON(valgrind, ",") + CONFIG_WRITE_BOOL_JSON(xmalloc, "") + + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t},\n"); + } +#undef CONFIG_WRITE_BOOL_JSON + + /* opt. */ +#define OPT_WRITE_BOOL(n, c) \ + if (je_mallctl("opt."#n, (void *)&bv, &bsz, NULL, 0) == 0) { \ + if (json) { \ + malloc_cprintf(write_cb, cbopaque, \ + "\t\t\t\""#n"\": %s%s\n", bv ? "true" : \ + "false", (c)); \ + } else { \ + malloc_cprintf(write_cb, cbopaque, \ + " opt."#n": %s\n", bv ? "true" : "false"); \ + } \ + } +#define OPT_WRITE_BOOL_MUTABLE(n, m, c) { \ + bool bv2; \ + if (je_mallctl("opt."#n, (void *)&bv, &bsz, NULL, 0) == 0 && \ + je_mallctl(#m, &bv2, &bsz, NULL, 0) == 0) { \ + if (json) { \ + malloc_cprintf(write_cb, cbopaque, \ + "\t\t\t\""#n"\": %s%s\n", bv ? "true" : \ + "false", (c)); \ + } else { \ + malloc_cprintf(write_cb, cbopaque, \ + " opt."#n": %s ("#m": %s)\n", bv ? "true" \ + : "false", bv2 ? "true" : "false"); \ + } \ + } \ +} +#define OPT_WRITE_UNSIGNED(n, c) \ + if (je_mallctl("opt."#n, (void *)&uv, &usz, NULL, 0) == 0) { \ + if (json) { \ + malloc_cprintf(write_cb, cbopaque, \ + "\t\t\t\""#n"\": %u%s\n", uv, (c)); \ + } else { \ + malloc_cprintf(write_cb, cbopaque, \ + " opt."#n": %u\n", uv); \ + } \ + } +#define OPT_WRITE_SIZE_T(n, c) \ + if (je_mallctl("opt."#n, (void *)&sv, &ssz, NULL, 0) == 0) { \ + if (json) { \ + malloc_cprintf(write_cb, cbopaque, \ + "\t\t\t\""#n"\": %zu%s\n", sv, (c)); \ + } else { \ + malloc_cprintf(write_cb, cbopaque, \ + " opt."#n": %zu\n", sv); \ + } \ + } +#define OPT_WRITE_SSIZE_T(n, c) \ + if (je_mallctl("opt."#n, (void *)&ssv, &sssz, NULL, 0) == 0) { \ + if (json) { \ + malloc_cprintf(write_cb, cbopaque, \ + "\t\t\t\""#n"\": %zd%s\n", ssv, (c)); \ + } else { \ + malloc_cprintf(write_cb, cbopaque, \ + " opt."#n": %zd\n", ssv); \ + } \ + } +#define OPT_WRITE_SSIZE_T_MUTABLE(n, m, c) { \ + ssize_t ssv2; \ + if (je_mallctl("opt."#n, (void *)&ssv, &sssz, NULL, 0) == 0 && \ + je_mallctl(#m, &ssv2, &sssz, NULL, 0) == 0) { \ + if (json) { \ + malloc_cprintf(write_cb, cbopaque, \ + "\t\t\t\""#n"\": %zd%s\n", ssv, (c)); \ + } else { \ + malloc_cprintf(write_cb, cbopaque, \ + " opt."#n": %zd ("#m": %zd)\n", \ + ssv, ssv2); \ + } \ + } \ +} +#define OPT_WRITE_CHAR_P(n, c) \ + if (je_mallctl("opt."#n, (void *)&cpv, &cpsz, NULL, 0) == 0) { \ + if (json) { \ + malloc_cprintf(write_cb, cbopaque, \ + "\t\t\t\""#n"\": \"%s\"%s\n", cpv, (c)); \ + } else { \ + malloc_cprintf(write_cb, cbopaque, \ + " opt."#n": \"%s\"\n", cpv); \ + } \ + } + + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\"opt\": {\n"); + } else { + malloc_cprintf(write_cb, cbopaque, + "Run-time option settings:\n"); + } + OPT_WRITE_BOOL(abort, ",") + OPT_WRITE_SIZE_T(lg_chunk, ",") + OPT_WRITE_CHAR_P(dss, ",") + OPT_WRITE_UNSIGNED(narenas, ",") + OPT_WRITE_CHAR_P(purge, ",") + if (json || opt_purge == purge_mode_ratio) { + OPT_WRITE_SSIZE_T_MUTABLE(lg_dirty_mult, + arenas.lg_dirty_mult, ",") + } + if (json || opt_purge == purge_mode_decay) { + OPT_WRITE_SSIZE_T_MUTABLE(decay_time, arenas.decay_time, ",") + } + OPT_WRITE_CHAR_P(junk, ",") + OPT_WRITE_SIZE_T(quarantine, ",") + OPT_WRITE_BOOL(redzone, ",") + OPT_WRITE_BOOL(zero, ",") + OPT_WRITE_BOOL(utrace, ",") + OPT_WRITE_BOOL(xmalloc, ",") + OPT_WRITE_BOOL(tcache, ",") + OPT_WRITE_SSIZE_T(lg_tcache_max, ",") + OPT_WRITE_BOOL(prof, ",") + OPT_WRITE_CHAR_P(prof_prefix, ",") + OPT_WRITE_BOOL_MUTABLE(prof_active, prof.active, ",") + OPT_WRITE_BOOL_MUTABLE(prof_thread_active_init, prof.thread_active_init, + ",") + OPT_WRITE_SSIZE_T_MUTABLE(lg_prof_sample, prof.lg_sample, ",") + OPT_WRITE_BOOL(prof_accum, ",") + OPT_WRITE_SSIZE_T(lg_prof_interval, ",") + OPT_WRITE_BOOL(prof_gdump, ",") + OPT_WRITE_BOOL(prof_final, ",") + OPT_WRITE_BOOL(prof_leak, ",") + /* + * stats_print is always emitted, so as long as stats_print comes last + * it's safe to unconditionally omit the comma here (rather than having + * to conditionally omit it elsewhere depending on configuration). + */ + OPT_WRITE_BOOL(stats_print, "") + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t},\n"); + } + +#undef OPT_WRITE_BOOL +#undef OPT_WRITE_BOOL_MUTABLE +#undef OPT_WRITE_SIZE_T +#undef OPT_WRITE_SSIZE_T +#undef OPT_WRITE_CHAR_P + + /* arenas. */ + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\"arenas\": {\n"); + } + + CTL_GET("arenas.narenas", &uv, unsigned); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"narenas\": %u,\n", uv); + } else + malloc_cprintf(write_cb, cbopaque, "Arenas: %u\n", uv); + + CTL_GET("arenas.lg_dirty_mult", &ssv, ssize_t); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"lg_dirty_mult\": %zd,\n", ssv); + } else if (opt_purge == purge_mode_ratio) { + if (ssv >= 0) { + malloc_cprintf(write_cb, cbopaque, + "Min active:dirty page ratio per arena: " + "%u:1\n", (1U << ssv)); + } else { + malloc_cprintf(write_cb, cbopaque, + "Min active:dirty page ratio per arena: " + "N/A\n"); + } + } + CTL_GET("arenas.decay_time", &ssv, ssize_t); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"decay_time\": %zd,\n", ssv); + } else if (opt_purge == purge_mode_decay) { + malloc_cprintf(write_cb, cbopaque, + "Unused dirty page decay time: %zd%s\n", + ssv, (ssv < 0) ? " (no decay)" : ""); + } + + CTL_GET("arenas.quantum", &sv, size_t); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"quantum\": %zu,\n", sv); + } else + malloc_cprintf(write_cb, cbopaque, "Quantum size: %zu\n", sv); + + CTL_GET("arenas.page", &sv, size_t); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"page\": %zu,\n", sv); + } else + malloc_cprintf(write_cb, cbopaque, "Page size: %zu\n", sv); + + if (je_mallctl("arenas.tcache_max", (void *)&sv, &ssz, NULL, 0) == 0) { + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"tcache_max\": %zu,\n", sv); + } else { + malloc_cprintf(write_cb, cbopaque, + "Maximum thread-cached size class: %zu\n", sv); + } + } + + if (json) { + unsigned nbins, nlruns, nhchunks, i; + + CTL_GET("arenas.nbins", &nbins, unsigned); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"nbins\": %u,\n", nbins); + + CTL_GET("arenas.nhbins", &uv, unsigned); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"nhbins\": %u,\n", uv); + + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"bin\": [\n"); + for (i = 0; i < nbins; i++) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t{\n"); + + CTL_M2_GET("arenas.bin.0.size", i, &sv, size_t); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\"size\": %zu,\n", sv); + + CTL_M2_GET("arenas.bin.0.nregs", i, &u32v, uint32_t); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\"nregs\": %"FMTu32",\n", u32v); + + CTL_M2_GET("arenas.bin.0.run_size", i, &sv, size_t); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\"run_size\": %zu\n", sv); + + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t}%s\n", (i + 1 < nbins) ? "," : ""); + } + malloc_cprintf(write_cb, cbopaque, + "\t\t\t],\n"); + + CTL_GET("arenas.nlruns", &nlruns, unsigned); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"nlruns\": %u,\n", nlruns); + + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"lrun\": [\n"); + for (i = 0; i < nlruns; i++) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t{\n"); + + CTL_M2_GET("arenas.lrun.0.size", i, &sv, size_t); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\"size\": %zu\n", sv); + + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t}%s\n", (i + 1 < nlruns) ? "," : ""); + } + malloc_cprintf(write_cb, cbopaque, + "\t\t\t],\n"); + + CTL_GET("arenas.nhchunks", &nhchunks, unsigned); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"nhchunks\": %u,\n", nhchunks); + + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"hchunk\": [\n"); + for (i = 0; i < nhchunks; i++) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t{\n"); + + CTL_M2_GET("arenas.hchunk.0.size", i, &sv, size_t); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\t\"size\": %zu\n", sv); + + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t}%s\n", (i + 1 < nhchunks) ? "," : ""); + } + malloc_cprintf(write_cb, cbopaque, + "\t\t\t]\n"); + + malloc_cprintf(write_cb, cbopaque, + "\t\t},\n"); + } + + /* prof. */ + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\"prof\": {\n"); + + CTL_GET("prof.thread_active_init", &bv, bool); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"thread_active_init\": %s,\n", bv ? "true" : + "false"); + + CTL_GET("prof.active", &bv, bool); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"active\": %s,\n", bv ? "true" : "false"); + + CTL_GET("prof.gdump", &bv, bool); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"gdump\": %s,\n", bv ? "true" : "false"); + + CTL_GET("prof.interval", &u64v, uint64_t); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"interval\": %"FMTu64",\n", u64v); + + CTL_GET("prof.lg_sample", &ssv, ssize_t); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"lg_sample\": %zd\n", ssv); + + malloc_cprintf(write_cb, cbopaque, + "\t\t}%s\n", (config_stats || merged || unmerged) ? "," : + ""); + } +} + +static void +stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque, + bool json, bool merged, bool unmerged, bool bins, bool large, bool huge) +{ + size_t *cactive; + size_t allocated, active, metadata, resident, mapped, retained; + + CTL_GET("stats.cactive", &cactive, size_t *); + CTL_GET("stats.allocated", &allocated, size_t); + CTL_GET("stats.active", &active, size_t); + CTL_GET("stats.metadata", &metadata, size_t); + CTL_GET("stats.resident", &resident, size_t); + CTL_GET("stats.mapped", &mapped, size_t); + CTL_GET("stats.retained", &retained, size_t); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\"stats\": {\n"); + + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"cactive\": %zu,\n", atomic_read_z(cactive)); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"allocated\": %zu,\n", allocated); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"active\": %zu,\n", active); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"metadata\": %zu,\n", metadata); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"resident\": %zu,\n", resident); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"mapped\": %zu,\n", mapped); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"retained\": %zu\n", retained); + + malloc_cprintf(write_cb, cbopaque, + "\t\t}%s\n", (merged || unmerged) ? "," : ""); + } else { + malloc_cprintf(write_cb, cbopaque, + "Allocated: %zu, active: %zu, metadata: %zu," + " resident: %zu, mapped: %zu, retained: %zu\n", + allocated, active, metadata, resident, mapped, retained); + malloc_cprintf(write_cb, cbopaque, + "Current active ceiling: %zu\n", + atomic_read_z(cactive)); + } + + if (merged || unmerged) { + unsigned narenas; + + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\"stats.arenas\": {\n"); + } + + CTL_GET("arenas.narenas", &narenas, unsigned); + { + VARIABLE_ARRAY(bool, initialized, narenas); + size_t isz; + unsigned i, j, ninitialized; + + isz = sizeof(bool) * narenas; + xmallctl("arenas.initialized", (void *)initialized, + &isz, NULL, 0); + for (i = ninitialized = 0; i < narenas; i++) { + if (initialized[i]) + ninitialized++; + } + + /* Merged stats. */ + if (merged && (ninitialized > 1 || !unmerged)) { + /* Print merged arena stats. */ + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"merged\": {\n"); + } else { + malloc_cprintf(write_cb, cbopaque, + "\nMerged arenas stats:\n"); + } + stats_arena_print(write_cb, cbopaque, json, + narenas, bins, large, huge); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t}%s\n", (ninitialized > 1) ? + "," : ""); + } + } + + /* Unmerged stats. */ + for (i = j = 0; i < narenas; i++) { + if (initialized[i]) { + if (json) { + j++; + malloc_cprintf(write_cb, + cbopaque, + "\t\t\t\"%u\": {\n", i); + } else { + malloc_cprintf(write_cb, + cbopaque, "\narenas[%u]:\n", + i); + } + stats_arena_print(write_cb, cbopaque, + json, i, bins, large, huge); + if (json) { + malloc_cprintf(write_cb, + cbopaque, + "\t\t\t}%s\n", (j < + ninitialized) ? "," : ""); + } + } + } + } + + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t}\n"); + } + } } void @@ -375,6 +1067,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, int err; uint64_t epoch; size_t u64sz; + bool json = false; bool general = true; bool merged = true; bool unmerged = true; @@ -408,6 +1101,9 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, for (i = 0; opts[i] != '\0'; i++) { switch (opts[i]) { + case 'J': + json = true; + break; case 'g': general = false; break; @@ -431,246 +1127,27 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, } } - malloc_cprintf(write_cb, cbopaque, - "___ Begin jemalloc statistics ___\n"); - if (general) { - const char *cpv; - bool bv; - unsigned uv; - ssize_t ssv; - size_t sv, bsz, usz, ssz, sssz, cpsz; - - bsz = sizeof(bool); - usz = sizeof(unsigned); - ssz = sizeof(size_t); - sssz = sizeof(ssize_t); - cpsz = sizeof(const char *); - - CTL_GET("version", &cpv, const char *); - malloc_cprintf(write_cb, cbopaque, "Version: %s\n", cpv); - CTL_GET("config.debug", &bv, bool); - malloc_cprintf(write_cb, cbopaque, "Assertions %s\n", - bv ? "enabled" : "disabled"); + if (json) { malloc_cprintf(write_cb, cbopaque, - "config.malloc_conf: \"%s\"\n", config_malloc_conf); - -#define OPT_WRITE_BOOL(n) \ - if (je_mallctl("opt."#n, &bv, &bsz, NULL, 0) == 0) { \ - malloc_cprintf(write_cb, cbopaque, \ - " opt."#n": %s\n", bv ? "true" : "false"); \ - } -#define OPT_WRITE_BOOL_MUTABLE(n, m) { \ - bool bv2; \ - if (je_mallctl("opt."#n, &bv, &bsz, NULL, 0) == 0 && \ - je_mallctl(#m, &bv2, &bsz, NULL, 0) == 0) { \ - malloc_cprintf(write_cb, cbopaque, \ - " opt."#n": %s ("#m": %s)\n", bv ? "true" \ - : "false", bv2 ? "true" : "false"); \ - } \ -} -#define OPT_WRITE_UNSIGNED(n) \ - if (je_mallctl("opt."#n, &uv, &usz, NULL, 0) == 0) { \ - malloc_cprintf(write_cb, cbopaque, \ - " opt."#n": %u\n", uv); \ - } -#define OPT_WRITE_SIZE_T(n) \ - if (je_mallctl("opt."#n, &sv, &ssz, NULL, 0) == 0) { \ - malloc_cprintf(write_cb, cbopaque, \ - " opt."#n": %zu\n", sv); \ - } -#define OPT_WRITE_SSIZE_T(n) \ - if (je_mallctl("opt."#n, &ssv, &sssz, NULL, 0) == 0) { \ - malloc_cprintf(write_cb, cbopaque, \ - " opt."#n": %zd\n", ssv); \ - } -#define OPT_WRITE_SSIZE_T_MUTABLE(n, m) { \ - ssize_t ssv2; \ - if (je_mallctl("opt."#n, &ssv, &sssz, NULL, 0) == 0 && \ - je_mallctl(#m, &ssv2, &sssz, NULL, 0) == 0) { \ - malloc_cprintf(write_cb, cbopaque, \ - " opt."#n": %zd ("#m": %zd)\n", \ - ssv, ssv2); \ - } \ -} -#define OPT_WRITE_CHAR_P(n) \ - if (je_mallctl("opt."#n, &cpv, &cpsz, NULL, 0) == 0) { \ - malloc_cprintf(write_cb, cbopaque, \ - " opt."#n": \"%s\"\n", cpv); \ - } - + "{\n" + "\t\"jemalloc\": {\n"); + } else { malloc_cprintf(write_cb, cbopaque, - "Run-time option settings:\n"); - OPT_WRITE_BOOL(abort) - OPT_WRITE_SIZE_T(lg_chunk) - OPT_WRITE_CHAR_P(dss) - OPT_WRITE_UNSIGNED(narenas) - OPT_WRITE_CHAR_P(purge) - if (opt_purge == purge_mode_ratio) { - OPT_WRITE_SSIZE_T_MUTABLE(lg_dirty_mult, - arenas.lg_dirty_mult) - } - if (opt_purge == purge_mode_decay) - OPT_WRITE_SSIZE_T_MUTABLE(decay_time, arenas.decay_time) - OPT_WRITE_BOOL(stats_print) - OPT_WRITE_CHAR_P(junk) - OPT_WRITE_SIZE_T(quarantine) - OPT_WRITE_BOOL(redzone) - OPT_WRITE_BOOL(zero) - OPT_WRITE_BOOL(utrace) - OPT_WRITE_BOOL(valgrind) - OPT_WRITE_BOOL(xmalloc) - OPT_WRITE_BOOL(tcache) - OPT_WRITE_SSIZE_T(lg_tcache_max) - OPT_WRITE_BOOL(prof) - OPT_WRITE_CHAR_P(prof_prefix) - OPT_WRITE_BOOL_MUTABLE(prof_active, prof.active) - OPT_WRITE_BOOL_MUTABLE(prof_thread_active_init, - prof.thread_active_init) - OPT_WRITE_SSIZE_T(lg_prof_sample) - OPT_WRITE_BOOL(prof_accum) - OPT_WRITE_SSIZE_T(lg_prof_interval) - OPT_WRITE_BOOL(prof_gdump) - OPT_WRITE_BOOL(prof_final) - OPT_WRITE_BOOL(prof_leak) - -#undef OPT_WRITE_BOOL -#undef OPT_WRITE_BOOL_MUTABLE -#undef OPT_WRITE_SIZE_T -#undef OPT_WRITE_SSIZE_T -#undef OPT_WRITE_CHAR_P - - malloc_cprintf(write_cb, cbopaque, "CPUs: %u\n", ncpus); - - CTL_GET("arenas.narenas", &uv, unsigned); - malloc_cprintf(write_cb, cbopaque, "Arenas: %u\n", uv); - - malloc_cprintf(write_cb, cbopaque, "Pointer size: %zu\n", - sizeof(void *)); - - CTL_GET("arenas.quantum", &sv, size_t); - malloc_cprintf(write_cb, cbopaque, "Quantum size: %zu\n", - sv); - - CTL_GET("arenas.page", &sv, size_t); - malloc_cprintf(write_cb, cbopaque, "Page size: %zu\n", sv); - - CTL_GET("arenas.lg_dirty_mult", &ssv, ssize_t); - if (opt_purge == purge_mode_ratio) { - if (ssv >= 0) { - malloc_cprintf(write_cb, cbopaque, - "Min active:dirty page ratio per arena: " - "%u:1\n", (1U << ssv)); - } else { - malloc_cprintf(write_cb, cbopaque, - "Min active:dirty page ratio per arena: " - "N/A\n"); - } - } - CTL_GET("arenas.decay_time", &ssv, ssize_t); - if (opt_purge == purge_mode_decay) { - malloc_cprintf(write_cb, cbopaque, - "Unused dirty page decay time: %zd%s\n", - ssv, (ssv < 0) ? " (no decay)" : ""); - } - if (je_mallctl("arenas.tcache_max", &sv, &ssz, NULL, 0) == 0) { - malloc_cprintf(write_cb, cbopaque, - "Maximum thread-cached size class: %zu\n", sv); - } - if (je_mallctl("opt.prof", &bv, &bsz, NULL, 0) == 0 && bv) { - CTL_GET("prof.lg_sample", &sv, size_t); - malloc_cprintf(write_cb, cbopaque, - "Average profile sample interval: %"FMTu64 - " (2^%zu)\n", (((uint64_t)1U) << sv), sv); - - CTL_GET("opt.lg_prof_interval", &ssv, ssize_t); - if (ssv >= 0) { - malloc_cprintf(write_cb, cbopaque, - "Average profile dump interval: %"FMTu64 - " (2^%zd)\n", - (((uint64_t)1U) << ssv), ssv); - } else { - malloc_cprintf(write_cb, cbopaque, - "Average profile dump interval: N/A\n"); - } - } - CTL_GET("opt.lg_chunk", &sv, size_t); - malloc_cprintf(write_cb, cbopaque, - "Chunk size: %zu (2^%zu)\n", (ZU(1) << sv), sv); + "___ Begin jemalloc statistics ___\n"); } + if (general) + stats_general_print(write_cb, cbopaque, json, merged, unmerged); if (config_stats) { - size_t *cactive; - size_t allocated, active, metadata, resident, mapped, retained; - - CTL_GET("stats.cactive", &cactive, size_t *); - CTL_GET("stats.allocated", &allocated, size_t); - CTL_GET("stats.active", &active, size_t); - CTL_GET("stats.metadata", &metadata, size_t); - CTL_GET("stats.resident", &resident, size_t); - CTL_GET("stats.mapped", &mapped, size_t); - CTL_GET("stats.retained", &retained, size_t); - malloc_cprintf(write_cb, cbopaque, - "Allocated: %zu, active: %zu, metadata: %zu," - " resident: %zu, mapped: %zu, retained: %zu\n", - allocated, active, metadata, resident, mapped, retained); - malloc_cprintf(write_cb, cbopaque, - "Current active ceiling: %zu\n", - atomic_read_z(cactive)); - - if (merged) { - unsigned narenas; - - CTL_GET("arenas.narenas", &narenas, unsigned); - { - VARIABLE_ARRAY(bool, initialized, narenas); - size_t isz; - unsigned i, ninitialized; - - isz = sizeof(bool) * narenas; - xmallctl("arenas.initialized", initialized, - &isz, NULL, 0); - for (i = ninitialized = 0; i < narenas; i++) { - if (initialized[i]) - ninitialized++; - } - - if (ninitialized > 1 || !unmerged) { - /* Print merged arena stats. */ - malloc_cprintf(write_cb, cbopaque, - "\nMerged arenas stats:\n"); - stats_arena_print(write_cb, cbopaque, - narenas, bins, large, huge); - } - } - } - - if (unmerged) { - unsigned narenas; - - /* Print stats for each arena. */ - - CTL_GET("arenas.narenas", &narenas, unsigned); - { - VARIABLE_ARRAY(bool, initialized, narenas); - size_t isz; - unsigned i; - - isz = sizeof(bool) * narenas; - xmallctl("arenas.initialized", initialized, - &isz, NULL, 0); - - for (i = 0; i < narenas; i++) { - if (initialized[i]) { - malloc_cprintf(write_cb, - cbopaque, - "\narenas[%u]:\n", i); - stats_arena_print(write_cb, - cbopaque, i, bins, large, - huge); - } - } - } - } + stats_print_helper(write_cb, cbopaque, json, merged, unmerged, + bins, large, huge); + } + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t}\n" + "}\n"); + } else { + malloc_cprintf(write_cb, cbopaque, + "--- End jemalloc statistics ---\n"); } - malloc_cprintf(write_cb, cbopaque, "--- End jemalloc statistics ---\n"); } diff --git a/src/tcache.c b/src/tcache.c index 175759c7..f97aa420 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -445,14 +445,14 @@ tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) } bool -tcaches_create(tsdn_t *tsdn, unsigned *r_ind) +tcaches_create(tsd_t *tsd, unsigned *r_ind) { arena_t *arena; tcache_t *tcache; tcaches_t *elm; if (tcaches == NULL) { - tcaches = base_alloc(tsdn, sizeof(tcache_t *) * + tcaches = base_alloc(tsd_tsdn(tsd), sizeof(tcache_t *) * (MALLOCX_TCACHE_MAX+1)); if (tcaches == NULL) return (true); @@ -460,10 +460,10 @@ tcaches_create(tsdn_t *tsdn, unsigned *r_ind) if (tcaches_avail == NULL && tcaches_past > MALLOCX_TCACHE_MAX) return (true); - arena = arena_ichoose(tsdn, NULL); + arena = arena_ichoose(tsd, NULL); if (unlikely(arena == NULL)) return (true); - tcache = tcache_create(tsdn, arena); + tcache = tcache_create(tsd_tsdn(tsd), arena); if (tcache == NULL) return (true); diff --git a/src/tsd.c b/src/tsd.c index aeaa5e18..ec69a51c 100644 --- a/src/tsd.c +++ b/src/tsd.c @@ -171,10 +171,10 @@ tsd_init_check_recursion(tsd_init_head_t *head, tsd_init_block_t *block) tsd_init_block_t *iter; /* Check whether this thread has already inserted into the list. */ - malloc_mutex_lock(NULL, &head->lock); + malloc_mutex_lock(TSDN_NULL, &head->lock); ql_foreach(iter, &head->blocks, link) { if (iter->thread == self) { - malloc_mutex_unlock(NULL, &head->lock); + malloc_mutex_unlock(TSDN_NULL, &head->lock); return (iter->data); } } @@ -182,7 +182,7 @@ tsd_init_check_recursion(tsd_init_head_t *head, tsd_init_block_t *block) ql_elm_new(block, link); block->thread = self; ql_tail_insert(&head->blocks, block, link); - malloc_mutex_unlock(NULL, &head->lock); + malloc_mutex_unlock(TSDN_NULL, &head->lock); return (NULL); } @@ -190,8 +190,8 @@ void tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block) { - malloc_mutex_lock(NULL, &head->lock); + malloc_mutex_lock(TSDN_NULL, &head->lock); ql_remove(&head->blocks, block, link); - malloc_mutex_unlock(NULL, &head->lock); + malloc_mutex_unlock(TSDN_NULL, &head->lock); } #endif diff --git a/src/util.c b/src/util.c index a1c4a2a4..79052674 100644 --- a/src/util.c +++ b/src/util.c @@ -49,7 +49,7 @@ static void wrtmessage(void *cbopaque, const char *s) { -#ifdef SYS_write +#if defined(JEMALLOC_HAVE_SYSCALL) && defined(SYS_write) /* * Use syscall(2) rather than write(2) when possible in order to avoid * the possibility of memory allocation within libc. This is necessary diff --git a/src/zone.c b/src/zone.c index 2c17123a..0571920e 100644 --- a/src/zone.c +++ b/src/zone.c @@ -4,7 +4,7 @@ #endif /* - * The malloc_default_purgeable_zone function is only available on >= 10.6. + * The malloc_default_purgeable_zone() function is only available on >= 10.6. * We need to check whether it is present at runtime, thus the weak_import. */ extern malloc_zone_t *malloc_default_purgeable_zone(void) @@ -13,8 +13,9 @@ JEMALLOC_ATTR(weak_import); /******************************************************************************/ /* Data. */ -static malloc_zone_t zone; -static struct malloc_introspection_t zone_introspect; +static malloc_zone_t *default_zone, *purgeable_zone; +static malloc_zone_t jemalloc_zone; +static struct malloc_introspection_t jemalloc_zone_introspect; /******************************************************************************/ /* Function prototypes for non-inline static functions. */ @@ -164,89 +165,103 @@ static void zone_force_unlock(malloc_zone_t *zone) { + /* + * Call jemalloc_postfork_child() rather than + * jemalloc_postfork_parent(), because this function is executed by both + * parent and child. The parent can tolerate having state + * reinitialized, but the child cannot unlock mutexes that were locked + * by the parent. + */ if (isthreaded) - jemalloc_postfork_parent(); + jemalloc_postfork_child(); } -JEMALLOC_ATTR(constructor) -void -register_zone(void) +static void +zone_init(void) { - /* - * If something else replaced the system default zone allocator, don't - * register jemalloc's. - */ - malloc_zone_t *default_zone = malloc_default_zone(); - malloc_zone_t *purgeable_zone = NULL; - if (!default_zone->zone_name || - strcmp(default_zone->zone_name, "DefaultMallocZone") != 0) { - return; - } - - zone.size = (void *)zone_size; - zone.malloc = (void *)zone_malloc; - zone.calloc = (void *)zone_calloc; - zone.valloc = (void *)zone_valloc; - zone.free = (void *)zone_free; - zone.realloc = (void *)zone_realloc; - zone.destroy = (void *)zone_destroy; - zone.zone_name = "jemalloc_zone"; - zone.batch_malloc = NULL; - zone.batch_free = NULL; - zone.introspect = &zone_introspect; - zone.version = JEMALLOC_ZONE_VERSION; + jemalloc_zone.size = (void *)zone_size; + jemalloc_zone.malloc = (void *)zone_malloc; + jemalloc_zone.calloc = (void *)zone_calloc; + jemalloc_zone.valloc = (void *)zone_valloc; + jemalloc_zone.free = (void *)zone_free; + jemalloc_zone.realloc = (void *)zone_realloc; + jemalloc_zone.destroy = (void *)zone_destroy; + jemalloc_zone.zone_name = "jemalloc_zone"; + jemalloc_zone.batch_malloc = NULL; + jemalloc_zone.batch_free = NULL; + jemalloc_zone.introspect = &jemalloc_zone_introspect; + jemalloc_zone.version = JEMALLOC_ZONE_VERSION; #if (JEMALLOC_ZONE_VERSION >= 5) - zone.memalign = zone_memalign; + jemalloc_zone.memalign = zone_memalign; #endif #if (JEMALLOC_ZONE_VERSION >= 6) - zone.free_definite_size = zone_free_definite_size; + jemalloc_zone.free_definite_size = zone_free_definite_size; #endif #if (JEMALLOC_ZONE_VERSION >= 8) - zone.pressure_relief = NULL; + jemalloc_zone.pressure_relief = NULL; #endif - zone_introspect.enumerator = NULL; - zone_introspect.good_size = (void *)zone_good_size; - zone_introspect.check = NULL; - zone_introspect.print = NULL; - zone_introspect.log = NULL; - zone_introspect.force_lock = (void *)zone_force_lock; - zone_introspect.force_unlock = (void *)zone_force_unlock; - zone_introspect.statistics = NULL; + jemalloc_zone_introspect.enumerator = NULL; + jemalloc_zone_introspect.good_size = (void *)zone_good_size; + jemalloc_zone_introspect.check = NULL; + jemalloc_zone_introspect.print = NULL; + jemalloc_zone_introspect.log = NULL; + jemalloc_zone_introspect.force_lock = (void *)zone_force_lock; + jemalloc_zone_introspect.force_unlock = (void *)zone_force_unlock; + jemalloc_zone_introspect.statistics = NULL; #if (JEMALLOC_ZONE_VERSION >= 6) - zone_introspect.zone_locked = NULL; + jemalloc_zone_introspect.zone_locked = NULL; #endif #if (JEMALLOC_ZONE_VERSION >= 7) - zone_introspect.enable_discharge_checking = NULL; - zone_introspect.disable_discharge_checking = NULL; - zone_introspect.discharge = NULL; -#ifdef __BLOCKS__ - zone_introspect.enumerate_discharged_pointers = NULL; -#else - zone_introspect.enumerate_unavailable_without_blocks = NULL; -#endif + jemalloc_zone_introspect.enable_discharge_checking = NULL; + jemalloc_zone_introspect.disable_discharge_checking = NULL; + jemalloc_zone_introspect.discharge = NULL; +# ifdef __BLOCKS__ + jemalloc_zone_introspect.enumerate_discharged_pointers = NULL; +# else + jemalloc_zone_introspect.enumerate_unavailable_without_blocks = NULL; +# endif #endif +} + +static malloc_zone_t * +zone_default_get(void) +{ + malloc_zone_t **zones = NULL; + unsigned int num_zones = 0; /* - * The default purgeable zone is created lazily by OSX's libc. It uses - * the default zone when it is created for "small" allocations - * (< 15 KiB), but assumes the default zone is a scalable_zone. This - * obviously fails when the default zone is the jemalloc zone, so - * malloc_default_purgeable_zone is called beforehand so that the - * default purgeable zone is created when the default zone is still - * a scalable_zone. As purgeable zones only exist on >= 10.6, we need - * to check for the existence of malloc_default_purgeable_zone() at - * run time. + * On OSX 10.12, malloc_default_zone returns a special zone that is not + * present in the list of registered zones. That zone uses a "lite zone" + * if one is present (apparently enabled when malloc stack logging is + * enabled), or the first registered zone otherwise. In practice this + * means unless malloc stack logging is enabled, the first registered + * zone is the default. So get the list of zones to get the first one, + * instead of relying on malloc_default_zone. */ - if (malloc_default_purgeable_zone != NULL) - purgeable_zone = malloc_default_purgeable_zone(); + if (KERN_SUCCESS != malloc_get_all_zones(0, NULL, + (vm_address_t**)&zones, &num_zones)) { + /* + * Reset the value in case the failure happened after it was + * set. + */ + num_zones = 0; + } - /* Register the custom zone. At this point it won't be the default. */ - malloc_zone_register(&zone); + if (num_zones) + return (zones[0]); + + return (malloc_default_zone()); +} + +/* As written, this function can only promote jemalloc_zone. */ +static void +zone_promote(void) +{ + malloc_zone_t *zone; do { - default_zone = malloc_default_zone(); /* * Unregister and reregister the default zone. On OSX >= 10.6, * unregistering takes the last registered zone and places it @@ -257,6 +272,7 @@ register_zone(void) */ malloc_zone_unregister(default_zone); malloc_zone_register(default_zone); + /* * On OSX 10.6, having the default purgeable zone appear before * the default zone makes some things crash because it thinks it @@ -268,9 +284,47 @@ register_zone(void) * above, i.e. the default zone. Registering it again then puts * it at the end, obviously after the default zone. */ - if (purgeable_zone) { + if (purgeable_zone != NULL) { malloc_zone_unregister(purgeable_zone); malloc_zone_register(purgeable_zone); } - } while (malloc_default_zone() != &zone); + + zone = zone_default_get(); + } while (zone != &jemalloc_zone); +} + +JEMALLOC_ATTR(constructor) +void +zone_register(void) +{ + + /* + * If something else replaced the system default zone allocator, don't + * register jemalloc's. + */ + default_zone = zone_default_get(); + if (!default_zone->zone_name || strcmp(default_zone->zone_name, + "DefaultMallocZone") != 0) + return; + + /* + * The default purgeable zone is created lazily by OSX's libc. It uses + * the default zone when it is created for "small" allocations + * (< 15 KiB), but assumes the default zone is a scalable_zone. This + * obviously fails when the default zone is the jemalloc zone, so + * malloc_default_purgeable_zone() is called beforehand so that the + * default purgeable zone is created when the default zone is still + * a scalable_zone. As purgeable zones only exist on >= 10.6, we need + * to check for the existence of malloc_default_purgeable_zone() at + * run time. + */ + purgeable_zone = (malloc_default_purgeable_zone == NULL) ? NULL : + malloc_default_purgeable_zone(); + + /* Register the custom zone. At this point it won't be the default. */ + zone_init(); + malloc_zone_register(&jemalloc_zone); + + /* Promote the custom zone to be default. */ + zone_promote(); } diff --git a/test/include/test/mtx.h b/test/include/test/mtx.h index bbe822f5..58afbc3d 100644 --- a/test/include/test/mtx.h +++ b/test/include/test/mtx.h @@ -8,6 +8,8 @@ typedef struct { #ifdef _WIN32 CRITICAL_SECTION lock; +#elif (defined(JEMALLOC_OS_UNFAIR_LOCK)) + os_unfair_lock lock; #elif (defined(JEMALLOC_OSSPIN)) OSSpinLock lock; #else diff --git a/test/integration/aligned_alloc.c b/test/integration/aligned_alloc.c index 60900148..58438421 100644 --- a/test/integration/aligned_alloc.c +++ b/test/integration/aligned_alloc.c @@ -1,9 +1,20 @@ #include "test/jemalloc_test.h" #define CHUNK 0x400000 -/* #define MAXALIGN ((size_t)UINT64_C(0x80000000000)) */ -#define MAXALIGN ((size_t)0x2000000LU) -#define NITER 4 +#define MAXALIGN (((size_t)1) << 23) + +/* + * On systems which can't merge extents, tests that call this function generate + * a lot of dirty memory very quickly. Purging between cycles mitigates + * potential OOM on e.g. 32-bit Windows. + */ +static void +purge(void) +{ + + assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0, + "Unexpected mallctl error"); +} TEST_BEGIN(test_alignment_errors) { @@ -74,6 +85,7 @@ TEST_END TEST_BEGIN(test_alignment_and_size) { +#define NITER 4 size_t alignment, size, total; unsigned i; void *ps[NITER]; @@ -110,7 +122,9 @@ TEST_BEGIN(test_alignment_and_size) } } } + purge(); } +#undef NITER } TEST_END diff --git a/test/integration/mallocx.c b/test/integration/mallocx.c index 55e1a090..43b76eba 100644 --- a/test/integration/mallocx.c +++ b/test/integration/mallocx.c @@ -50,6 +50,19 @@ get_huge_size(size_t ind) return (get_size_impl("arenas.hchunk.0.size", ind)); } +/* + * On systems which can't merge extents, tests that call this function generate + * a lot of dirty memory very quickly. Purging between cycles mitigates + * potential OOM on e.g. 32-bit Windows. + */ +static void +purge(void) +{ + + assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0, + "Unexpected mallctl error"); +} + TEST_BEGIN(test_overflow) { size_t hugemax; @@ -96,6 +109,7 @@ TEST_BEGIN(test_oom) if (ptrs[i] != NULL) dallocx(ptrs[i], 0); } + purge(); #if LG_SIZEOF_PTR == 3 assert_ptr_null(mallocx(0x8000000000000000ULL, @@ -113,7 +127,7 @@ TEST_END TEST_BEGIN(test_basic) { -#define MAXSZ (((size_t)1) << 26) +#define MAXSZ (((size_t)1) << 23) size_t sz; for (sz = 1; sz < MAXSZ; sz = nallocx(sz, 0) + 1) { @@ -122,23 +136,28 @@ TEST_BEGIN(test_basic) nsz = nallocx(sz, 0); assert_zu_ne(nsz, 0, "Unexpected nallocx() error"); p = mallocx(sz, 0); - assert_ptr_not_null(p, "Unexpected mallocx() error"); + assert_ptr_not_null(p, + "Unexpected mallocx(size=%zx, flags=0) error", sz); rsz = sallocx(p, 0); assert_zu_ge(rsz, sz, "Real size smaller than expected"); assert_zu_eq(nsz, rsz, "nallocx()/sallocx() size mismatch"); dallocx(p, 0); p = mallocx(sz, 0); - assert_ptr_not_null(p, "Unexpected mallocx() error"); + assert_ptr_not_null(p, + "Unexpected mallocx(size=%zx, flags=0) error", sz); dallocx(p, 0); nsz = nallocx(sz, MALLOCX_ZERO); assert_zu_ne(nsz, 0, "Unexpected nallocx() error"); p = mallocx(sz, MALLOCX_ZERO); - assert_ptr_not_null(p, "Unexpected mallocx() error"); + assert_ptr_not_null(p, + "Unexpected mallocx(size=%zx, flags=MALLOCX_ZERO) error", + nsz); rsz = sallocx(p, 0); assert_zu_eq(nsz, rsz, "nallocx()/sallocx() rsize mismatch"); dallocx(p, 0); + purge(); } #undef MAXSZ } @@ -146,7 +165,7 @@ TEST_END TEST_BEGIN(test_alignment_and_size) { -#define MAXALIGN (((size_t)1) << 25) +#define MAXALIGN (((size_t)1) << 23) #define NITER 4 size_t nsz, rsz, sz, alignment, total; unsigned i; @@ -196,6 +215,7 @@ TEST_BEGIN(test_alignment_and_size) } } } + purge(); } #undef MAXALIGN #undef NITER diff --git a/test/integration/posix_memalign.c b/test/integration/posix_memalign.c index 19741c6c..e22e1020 100644 --- a/test/integration/posix_memalign.c +++ b/test/integration/posix_memalign.c @@ -1,9 +1,20 @@ #include "test/jemalloc_test.h" #define CHUNK 0x400000 -/* #define MAXALIGN ((size_t)UINT64_C(0x80000000000)) */ -#define MAXALIGN ((size_t)0x2000000LU) -#define NITER 4 +#define MAXALIGN (((size_t)1) << 23) + +/* + * On systems which can't merge extents, tests that call this function generate + * a lot of dirty memory very quickly. Purging between cycles mitigates + * potential OOM on e.g. 32-bit Windows. + */ +static void +purge(void) +{ + + assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0, + "Unexpected mallctl error"); +} TEST_BEGIN(test_alignment_errors) { @@ -66,6 +77,7 @@ TEST_END TEST_BEGIN(test_alignment_and_size) { +#define NITER 4 size_t alignment, size, total; unsigned i; int err; @@ -104,7 +116,9 @@ TEST_BEGIN(test_alignment_and_size) } } } + purge(); } +#undef NITER } TEST_END diff --git a/test/src/mtx.c b/test/src/mtx.c index 73bd02f6..8a5dfdd9 100644 --- a/test/src/mtx.c +++ b/test/src/mtx.c @@ -11,6 +11,8 @@ mtx_init(mtx_t *mtx) #ifdef _WIN32 if (!InitializeCriticalSectionAndSpinCount(&mtx->lock, _CRT_SPINCOUNT)) return (true); +#elif (defined(JEMALLOC_OS_UNFAIR_LOCK)) + mtx->lock = OS_UNFAIR_LOCK_INIT; #elif (defined(JEMALLOC_OSSPIN)) mtx->lock = 0; #else @@ -33,6 +35,7 @@ mtx_fini(mtx_t *mtx) { #ifdef _WIN32 +#elif (defined(JEMALLOC_OS_UNFAIR_LOCK)) #elif (defined(JEMALLOC_OSSPIN)) #else pthread_mutex_destroy(&mtx->lock); @@ -45,6 +48,8 @@ mtx_lock(mtx_t *mtx) #ifdef _WIN32 EnterCriticalSection(&mtx->lock); +#elif (defined(JEMALLOC_OS_UNFAIR_LOCK)) + os_unfair_lock_lock(&mtx->lock); #elif (defined(JEMALLOC_OSSPIN)) OSSpinLockLock(&mtx->lock); #else @@ -58,6 +63,8 @@ mtx_unlock(mtx_t *mtx) #ifdef _WIN32 LeaveCriticalSection(&mtx->lock); +#elif (defined(JEMALLOC_OS_UNFAIR_LOCK)) + os_unfair_lock_unlock(&mtx->lock); #elif (defined(JEMALLOC_OSSPIN)) OSSpinLockUnlock(&mtx->lock); #else diff --git a/test/unit/ckh.c b/test/unit/ckh.c index 961e2acb..2cbc2268 100644 --- a/test/unit/ckh.c +++ b/test/unit/ckh.c @@ -2,24 +2,24 @@ TEST_BEGIN(test_new_delete) { - tsdn_t *tsdn; + tsd_t *tsd; ckh_t ckh; - tsdn = tsdn_fetch(); + tsd = tsd_fetch(); - assert_false(ckh_new(tsdn, &ckh, 2, ckh_string_hash, + assert_false(ckh_new(tsd, &ckh, 2, ckh_string_hash, ckh_string_keycomp), "Unexpected ckh_new() error"); - ckh_delete(tsdn, &ckh); + ckh_delete(tsd, &ckh); - assert_false(ckh_new(tsdn, &ckh, 3, ckh_pointer_hash, + assert_false(ckh_new(tsd, &ckh, 3, ckh_pointer_hash, ckh_pointer_keycomp), "Unexpected ckh_new() error"); - ckh_delete(tsdn, &ckh); + ckh_delete(tsd, &ckh); } TEST_END TEST_BEGIN(test_count_insert_search_remove) { - tsdn_t *tsdn; + tsd_t *tsd; ckh_t ckh; const char *strs[] = { "a string", @@ -30,9 +30,9 @@ TEST_BEGIN(test_count_insert_search_remove) const char *missing = "A string not in the hash table."; size_t i; - tsdn = tsdn_fetch(); + tsd = tsd_fetch(); - assert_false(ckh_new(tsdn, &ckh, 2, ckh_string_hash, + assert_false(ckh_new(tsd, &ckh, 2, ckh_string_hash, ckh_string_keycomp), "Unexpected ckh_new() error"); assert_zu_eq(ckh_count(&ckh), 0, "ckh_count() should return %zu, but it returned %zu", ZU(0), @@ -40,7 +40,7 @@ TEST_BEGIN(test_count_insert_search_remove) /* Insert. */ for (i = 0; i < sizeof(strs)/sizeof(const char *); i++) { - ckh_insert(tsdn, &ckh, strs[i], strs[i]); + ckh_insert(tsd, &ckh, strs[i], strs[i]); assert_zu_eq(ckh_count(&ckh), i+1, "ckh_count() should return %zu, but it returned %zu", i+1, ckh_count(&ckh)); @@ -85,7 +85,7 @@ TEST_BEGIN(test_count_insert_search_remove) vp = (i & 2) ? &v.p : NULL; k.p = NULL; v.p = NULL; - assert_false(ckh_remove(tsdn, &ckh, strs[i], kp, vp), + assert_false(ckh_remove(tsd, &ckh, strs[i], kp, vp), "Unexpected ckh_remove() error"); ks = (i & 1) ? strs[i] : (const char *)NULL; @@ -101,22 +101,22 @@ TEST_BEGIN(test_count_insert_search_remove) ckh_count(&ckh)); } - ckh_delete(tsdn, &ckh); + ckh_delete(tsd, &ckh); } TEST_END TEST_BEGIN(test_insert_iter_remove) { #define NITEMS ZU(1000) - tsdn_t *tsdn; + tsd_t *tsd; ckh_t ckh; void **p[NITEMS]; void *q, *r; size_t i; - tsdn = tsdn_fetch(); + tsd = tsd_fetch(); - assert_false(ckh_new(tsdn, &ckh, 2, ckh_pointer_hash, + assert_false(ckh_new(tsd, &ckh, 2, ckh_pointer_hash, ckh_pointer_keycomp), "Unexpected ckh_new() error"); for (i = 0; i < NITEMS; i++) { @@ -128,7 +128,7 @@ TEST_BEGIN(test_insert_iter_remove) size_t j; for (j = i; j < NITEMS; j++) { - assert_false(ckh_insert(tsdn, &ckh, p[j], p[j]), + assert_false(ckh_insert(tsd, &ckh, p[j], p[j]), "Unexpected ckh_insert() failure"); assert_false(ckh_search(&ckh, p[j], &q, &r), "Unexpected ckh_search() failure"); @@ -143,13 +143,13 @@ TEST_BEGIN(test_insert_iter_remove) for (j = i + 1; j < NITEMS; j++) { assert_false(ckh_search(&ckh, p[j], NULL, NULL), "Unexpected ckh_search() failure"); - assert_false(ckh_remove(tsdn, &ckh, p[j], &q, &r), + assert_false(ckh_remove(tsd, &ckh, p[j], &q, &r), "Unexpected ckh_remove() failure"); assert_ptr_eq(p[j], q, "Key pointer mismatch"); assert_ptr_eq(p[j], r, "Value pointer mismatch"); assert_true(ckh_search(&ckh, p[j], NULL, NULL), "Unexpected ckh_search() success"); - assert_true(ckh_remove(tsdn, &ckh, p[j], &q, &r), + assert_true(ckh_remove(tsd, &ckh, p[j], &q, &r), "Unexpected ckh_remove() success"); } @@ -184,13 +184,13 @@ TEST_BEGIN(test_insert_iter_remove) for (i = 0; i < NITEMS; i++) { assert_false(ckh_search(&ckh, p[i], NULL, NULL), "Unexpected ckh_search() failure"); - assert_false(ckh_remove(tsdn, &ckh, p[i], &q, &r), + assert_false(ckh_remove(tsd, &ckh, p[i], &q, &r), "Unexpected ckh_remove() failure"); assert_ptr_eq(p[i], q, "Key pointer mismatch"); assert_ptr_eq(p[i], r, "Value pointer mismatch"); assert_true(ckh_search(&ckh, p[i], NULL, NULL), "Unexpected ckh_search() success"); - assert_true(ckh_remove(tsdn, &ckh, p[i], &q, &r), + assert_true(ckh_remove(tsd, &ckh, p[i], &q, &r), "Unexpected ckh_remove() success"); dallocx(p[i], 0); } @@ -198,7 +198,7 @@ TEST_BEGIN(test_insert_iter_remove) assert_zu_eq(ckh_count(&ckh), 0, "ckh_count() should return %zu, but it returned %zu", ZU(0), ckh_count(&ckh)); - ckh_delete(tsdn, &ckh); + ckh_delete(tsd, &ckh); #undef NITEMS } TEST_END diff --git a/test/unit/decay.c b/test/unit/decay.c index 70a2e67a..e169ae24 100644 --- a/test/unit/decay.c +++ b/test/unit/decay.c @@ -2,20 +2,28 @@ const char *malloc_conf = "purge:decay,decay_time:1"; +static nstime_monotonic_t *nstime_monotonic_orig; static nstime_update_t *nstime_update_orig; static unsigned nupdates_mock; static nstime_t time_mock; -static bool nonmonotonic_mock; +static bool monotonic_mock; + +static bool +nstime_monotonic_mock(void) +{ + + return (monotonic_mock); +} static bool nstime_update_mock(nstime_t *time) { nupdates_mock++; - if (!nonmonotonic_mock) + if (monotonic_mock) nstime_copy(time, &time_mock); - return (nonmonotonic_mock); + return (!monotonic_mock); } TEST_BEGIN(test_decay_ticks) @@ -245,9 +253,11 @@ TEST_BEGIN(test_decay_ticker) nupdates_mock = 0; nstime_init(&time_mock, 0); nstime_update(&time_mock); - nonmonotonic_mock = false; + monotonic_mock = true; + nstime_monotonic_orig = nstime_monotonic; nstime_update_orig = nstime_update; + nstime_monotonic = nstime_monotonic_mock; nstime_update = nstime_update_mock; for (i = 0; i < NPS; i++) { @@ -259,6 +269,7 @@ TEST_BEGIN(test_decay_ticker) "Expected nstime_update() to be called"); } + nstime_monotonic = nstime_monotonic_orig; nstime_update = nstime_update_orig; nstime_init(&time, 0); @@ -316,9 +327,11 @@ TEST_BEGIN(test_decay_nonmonotonic) nupdates_mock = 0; nstime_init(&time_mock, 0); nstime_update(&time_mock); - nonmonotonic_mock = true; + monotonic_mock = false; + nstime_monotonic_orig = nstime_monotonic; nstime_update_orig = nstime_update; + nstime_monotonic = nstime_monotonic_mock; nstime_update = nstime_update_mock; for (i = 0; i < NPS; i++) { @@ -342,8 +355,9 @@ TEST_BEGIN(test_decay_nonmonotonic) config_stats ? 0 : ENOENT, "Unexpected mallctl result"); if (config_stats) - assert_u64_gt(npurge1, npurge0, "Expected purging to occur"); + assert_u64_eq(npurge0, npurge1, "Unexpected purging occurred"); + nstime_monotonic = nstime_monotonic_orig; nstime_update = nstime_update_orig; #undef NPS } diff --git a/test/unit/fork.c b/test/unit/fork.c index 46c815ef..c530797c 100644 --- a/test/unit/fork.c +++ b/test/unit/fork.c @@ -26,7 +26,7 @@ TEST_BEGIN(test_fork) test_fail("Unexpected fork() failure"); } else if (pid == 0) { /* Child. */ - exit(0); + _exit(0); } else { int status; diff --git a/test/unit/junk.c b/test/unit/junk.c index acddc601..460bd524 100644 --- a/test/unit/junk.c +++ b/test/unit/junk.c @@ -53,10 +53,10 @@ arena_dalloc_junk_large_intercept(void *ptr, size_t usize) } static void -huge_dalloc_junk_intercept(tsdn_t *tsdn, void *ptr, size_t usize) +huge_dalloc_junk_intercept(void *ptr, size_t usize) { - huge_dalloc_junk_orig(tsdn, ptr, usize); + huge_dalloc_junk_orig(ptr, usize); /* * The conditions under which junk filling actually occurs are nuanced * enough that it doesn't make sense to duplicate the decision logic in diff --git a/test/unit/math.c b/test/unit/math.c index ebec77a6..adb72bed 100644 --- a/test/unit/math.c +++ b/test/unit/math.c @@ -5,6 +5,10 @@ #include +#ifdef __PGI +#undef INFINITY +#endif + #ifndef INFINITY #define INFINITY (DBL_MAX + DBL_MAX) #endif diff --git a/test/unit/nstime.c b/test/unit/nstime.c index cd7d9a6d..0368bc26 100644 --- a/test/unit/nstime.c +++ b/test/unit/nstime.c @@ -176,6 +176,13 @@ TEST_BEGIN(test_nstime_divide) } TEST_END +TEST_BEGIN(test_nstime_monotonic) +{ + + nstime_monotonic(); +} +TEST_END + TEST_BEGIN(test_nstime_update) { nstime_t nst; @@ -198,7 +205,6 @@ TEST_BEGIN(test_nstime_update) assert_d_eq(nstime_compare(&nst, &nst0), 0, "Time should not have been modified"); } - } TEST_END @@ -216,5 +222,6 @@ main(void) test_nstime_imultiply, test_nstime_idivide, test_nstime_divide, + test_nstime_monotonic, test_nstime_update)); } diff --git a/test/unit/run_quantize.c b/test/unit/run_quantize.c index f6a2f74f..45f32018 100644 --- a/test/unit/run_quantize.c +++ b/test/unit/run_quantize.c @@ -111,7 +111,7 @@ TEST_BEGIN(test_monotonic) floor_prev = 0; ceil_prev = 0; - for (i = 1; i < run_quantize_max >> LG_PAGE; i++) { + for (i = 1; i <= large_maxclass >> LG_PAGE; i++) { size_t run_size, floor, ceil; run_size = i << LG_PAGE; diff --git a/test/unit/size_classes.c b/test/unit/size_classes.c index 2e2caaf5..4e1e0ce4 100644 --- a/test/unit/size_classes.c +++ b/test/unit/size_classes.c @@ -80,25 +80,96 @@ TEST_BEGIN(test_size_classes) } TEST_END +TEST_BEGIN(test_psize_classes) +{ + size_t size_class, max_size_class; + pszind_t pind, max_pind; + + max_size_class = get_max_size_class(); + max_pind = psz2ind(max_size_class); + + for (pind = 0, size_class = pind2sz(pind); pind < max_pind || + size_class < max_size_class; pind++, size_class = + pind2sz(pind)) { + assert_true(pind < max_pind, + "Loop conditionals should be equivalent; pind=%u, " + "size_class=%zu (%#zx)", pind, size_class, size_class); + assert_true(size_class < max_size_class, + "Loop conditionals should be equivalent; pind=%u, " + "size_class=%zu (%#zx)", pind, size_class, size_class); + + assert_u_eq(pind, psz2ind(size_class), + "psz2ind() does not reverse pind2sz(): pind=%u -->" + " size_class=%zu --> pind=%u --> size_class=%zu", pind, + size_class, psz2ind(size_class), + pind2sz(psz2ind(size_class))); + assert_zu_eq(size_class, pind2sz(psz2ind(size_class)), + "pind2sz() does not reverse psz2ind(): pind=%u -->" + " size_class=%zu --> pind=%u --> size_class=%zu", pind, + size_class, psz2ind(size_class), + pind2sz(psz2ind(size_class))); + + assert_u_eq(pind+1, psz2ind(size_class+1), + "Next size_class does not round up properly"); + + assert_zu_eq(size_class, (pind > 0) ? + psz2u(pind2sz(pind-1)+1) : psz2u(1), + "psz2u() does not round up to size class"); + assert_zu_eq(size_class, psz2u(size_class-1), + "psz2u() does not round up to size class"); + assert_zu_eq(size_class, psz2u(size_class), + "psz2u() does not compute same size class"); + assert_zu_eq(psz2u(size_class+1), pind2sz(pind+1), + "psz2u() does not round up to next size class"); + } + + assert_u_eq(pind, psz2ind(pind2sz(pind)), + "psz2ind() does not reverse pind2sz()"); + assert_zu_eq(max_size_class, pind2sz(psz2ind(max_size_class)), + "pind2sz() does not reverse psz2ind()"); + + assert_zu_eq(size_class, psz2u(pind2sz(pind-1)+1), + "psz2u() does not round up to size class"); + assert_zu_eq(size_class, psz2u(size_class-1), + "psz2u() does not round up to size class"); + assert_zu_eq(size_class, psz2u(size_class), + "psz2u() does not compute same size class"); +} +TEST_END + TEST_BEGIN(test_overflow) { size_t max_size_class; max_size_class = get_max_size_class(); - assert_u_ge(size2index(max_size_class+1), NSIZES, - "size2index() should return >= NSIZES on overflow"); - assert_u_ge(size2index(ZU(PTRDIFF_MAX)+1), NSIZES, - "size2index() should return >= NSIZES on overflow"); - assert_u_ge(size2index(SIZE_T_MAX), NSIZES, - "size2index() should return >= NSIZES on overflow"); + assert_u_eq(size2index(max_size_class+1), NSIZES, + "size2index() should return NSIZES on overflow"); + assert_u_eq(size2index(ZU(PTRDIFF_MAX)+1), NSIZES, + "size2index() should return NSIZES on overflow"); + assert_u_eq(size2index(SIZE_T_MAX), NSIZES, + "size2index() should return NSIZES on overflow"); - assert_zu_gt(s2u(max_size_class+1), HUGE_MAXCLASS, - "s2u() should return > HUGE_MAXCLASS for unsupported size"); - assert_zu_gt(s2u(ZU(PTRDIFF_MAX)+1), HUGE_MAXCLASS, - "s2u() should return > HUGE_MAXCLASS for unsupported size"); + assert_zu_eq(s2u(max_size_class+1), 0, + "s2u() should return 0 for unsupported size"); + assert_zu_eq(s2u(ZU(PTRDIFF_MAX)+1), 0, + "s2u() should return 0 for unsupported size"); assert_zu_eq(s2u(SIZE_T_MAX), 0, "s2u() should return 0 on overflow"); + + assert_u_eq(psz2ind(max_size_class+1), NPSIZES, + "psz2ind() should return NPSIZES on overflow"); + assert_u_eq(psz2ind(ZU(PTRDIFF_MAX)+1), NPSIZES, + "psz2ind() should return NPSIZES on overflow"); + assert_u_eq(psz2ind(SIZE_T_MAX), NPSIZES, + "psz2ind() should return NPSIZES on overflow"); + + assert_zu_eq(psz2u(max_size_class+1), 0, + "psz2u() should return 0 for unsupported size"); + assert_zu_eq(psz2u(ZU(PTRDIFF_MAX)+1), 0, + "psz2u() should return 0 for unsupported size"); + assert_zu_eq(psz2u(SIZE_T_MAX), 0, + "psz2u() should return 0 on overflow"); } TEST_END @@ -108,5 +179,6 @@ main(void) return (test( test_size_classes, + test_psize_classes, test_overflow)); } diff --git a/test/unit/tsd.c b/test/unit/tsd.c index 7dde4b77..4e2622a3 100644 --- a/test/unit/tsd.c +++ b/test/unit/tsd.c @@ -58,18 +58,18 @@ thd_start(void *arg) data_t d = (data_t)(uintptr_t)arg; void *p; - assert_x_eq(*data_tsd_get(), DATA_INIT, + assert_x_eq(*data_tsd_get(true), DATA_INIT, "Initial tsd get should return initialization value"); p = malloc(1); assert_ptr_not_null(p, "Unexpected malloc() failure"); data_tsd_set(&d); - assert_x_eq(*data_tsd_get(), d, + assert_x_eq(*data_tsd_get(true), d, "After tsd set, tsd get should return value that was set"); d = 0; - assert_x_eq(*data_tsd_get(), (data_t)(uintptr_t)arg, + assert_x_eq(*data_tsd_get(true), (data_t)(uintptr_t)arg, "Resetting local data should have no effect on tsd"); free(p);