diff --git a/.appveyor.yml b/.appveyor.yml new file mode 100644 index 00000000..c74e89db --- /dev/null +++ b/.appveyor.yml @@ -0,0 +1,51 @@ +version: '{build}' + +environment: + matrix: + - MSYSTEM: MINGW64 + CPU: x86_64 + MSVC: amd64 + CONFIG_FLAGS: --enable-debug + - MSYSTEM: MINGW64 + CPU: x86_64 + CONFIG_FLAGS: --enable-debug + EXTRA_CFLAGS: "-fcommon" + - MSYSTEM: MINGW32 + CPU: i686 + MSVC: x86 + CONFIG_FLAGS: --enable-debug + - MSYSTEM: MINGW32 + CPU: i686 + CONFIG_FLAGS: --enable-debug + EXTRA_CFLAGS: "-fcommon" + - MSYSTEM: MINGW64 + CPU: x86_64 + MSVC: amd64 + CONFIG_FLAGS: + - MSYSTEM: MINGW64 + CPU: x86_64 + CONFIG_FLAGS: + EXTRA_CFLAGS: "-fcommon" + - MSYSTEM: MINGW32 + CPU: i686 + MSVC: x86 + CONFIG_FLAGS: + - MSYSTEM: MINGW32 + CPU: i686 + CONFIG_FLAGS: + EXTRA_CFLAGS: "-fcommon" + +install: + - set PATH=c:\msys64\%MSYSTEM%\bin;c:\msys64\usr\bin;%PATH% + - if defined MSVC call "c:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" %MSVC% + - if defined MSVC pacman --noconfirm -Rsc mingw-w64-%CPU%-gcc gcc + - pacman --noconfirm -Syuu + - pacman --noconfirm -S autoconf + +build_script: + - bash -c "autoconf" + - bash -c "./configure $CONFIG_FLAGS" + - mingw32-make + - file lib/jemalloc.dll + - mingw32-make tests + - mingw32-make -k check diff --git a/.autom4te.cfg b/.autom4te.cfg new file mode 100644 index 00000000..fe2424db --- /dev/null +++ b/.autom4te.cfg @@ -0,0 +1,3 @@ +begin-language: "Autoconf-without-aclocal-m4" +args: --no-cache +end-language: "Autoconf-without-aclocal-m4" diff --git a/.clang-format b/.clang-format new file mode 100644 index 00000000..527ec375 --- /dev/null +++ b/.clang-format @@ -0,0 +1,122 @@ +# jemalloc targets clang-format version 8. We include every option it supports +# here, but comment out the ones that aren't relevant for us. +--- +# AccessModifierOffset: -2 +AlignAfterOpenBracket: DontAlign +AlignConsecutiveAssignments: false +AlignConsecutiveDeclarations: true +AlignEscapedNewlines: Right +AlignOperands: false +AlignTrailingComments: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortBlocksOnASingleLine: false +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: Empty +AllowShortIfStatementsOnASingleLine: false +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterReturnType: AllDefinitions +AlwaysBreakBeforeMultilineStrings: true +# AlwaysBreakTemplateDeclarations: Yes +BinPackArguments: true +BinPackParameters: true +BraceWrapping: + AfterClass: true + AfterControlStatement: true + AfterEnum: true + AfterFunction: true + AfterNamespace: true + AfterObjCDeclaration: true + AfterStruct: true + AfterUnion: true + BeforeCatch: true + BeforeElse: true + IndentBraces: false +# BreakAfterJavaFieldAnnotations: true +BreakBeforeBinaryOperators: NonAssignment +BreakBeforeBraces: Attach +BreakBeforeTernaryOperators: true +# BreakConstructorInitializers: BeforeColon +# BreakInheritanceList: BeforeColon +BreakStringLiterals: false +ColumnLimit: 80 +# CommentPragmas: '' +# CompactNamespaces: true +# ConstructorInitializerAllOnOneLineOrOnePerLine: true +# ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: true +DerivePointerAlignment: false +DisableFormat: false +ExperimentalAutoDetectBinPacking: false +FixNamespaceComments: true +ForEachMacros: [ ql_foreach, qr_foreach, ] +# IncludeBlocks: Preserve +# IncludeCategories: +# - Regex: '^<.*\.h(pp)?>' +# Priority: 1 +# IncludeIsMainRegex: '' +IndentCaseLabels: false +IndentPPDirectives: AfterHash +IndentWidth: 8 +IndentWrappedFunctionNames: false +# JavaImportGroups: [] +# JavaScriptQuotes: Leave +# JavaScriptWrapImports: True +KeepEmptyLinesAtTheStartOfBlocks: false +Language: Cpp +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 1 +# NamespaceIndentation: None +# ObjCBinPackProtocolList: Auto +# ObjCBlockIndentWidth: 2 +# ObjCSpaceAfterProperty: false +# ObjCSpaceBeforeProtocolList: false + +PenaltyBreakAssignment: 100 +PenaltyBreakBeforeFirstCallParameter: 100 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakString: 1000 +# PenaltyBreakTemplateDeclaration: 10 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 60 +PointerAlignment: Right +# RawStringFormats: +# - Language: TextProto +# Delimiters: +# - 'pb' +# - 'proto' +# EnclosingFunctions: +# - 'PARSE_TEXT_PROTO' +# BasedOnStyle: google +# - Language: Cpp +# Delimiters: +# - 'cc' +# - 'cpp' +# BasedOnStyle: llvm +# CanonicalDelimiter: 'cc' +ReflowComments: false +SortIncludes: false +SpaceAfterCStyleCast: false +# SpaceAfterTemplateKeyword: true +SpaceBeforeAssignmentOperators: true +# SpaceBeforeCpp11BracedList: false +# SpaceBeforeCtorInitializerColon: true +# SpaceBeforeInheritanceColon: true +SpaceBeforeParens: ControlStatements +# SpaceBeforeRangeBasedForLoopColon: true +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 1 +SpacesInAngles: false +SpacesInCStyleCastParentheses: false +# SpacesInContainerLiterals: false +SpacesInParentheses: false +SpacesInSquareBrackets: false +# Standard: Cpp11 +# This is nominally supported in clang-format version 8, but not in the build +# used by some of the core jemalloc developers. +# StatementMacros: [] +TabWidth: 8 +UseTab: ForIndentation +... diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs new file mode 100644 index 00000000..7f5f6975 --- /dev/null +++ b/.git-blame-ignore-revs @@ -0,0 +1,2 @@ +554185356bf990155df8d72060c4efe993642baf +34f359e0ca613b5f9d970e9b2152a5203c9df8d6 diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000..6313b56c --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +* text=auto eol=lf diff --git a/.github/workflows/check_formatting.yaml b/.github/workflows/check_formatting.yaml new file mode 100644 index 00000000..8a10065f --- /dev/null +++ b/.github/workflows/check_formatting.yaml @@ -0,0 +1,10 @@ +name: 'Check Formatting' +on: [pull_request] +jobs: + check-formatting: + runs-on: ubuntu-latest + steps: + - name: Check out repository + uses: actions/checkout@v4 + - name: Check for trailing whitespace + run: scripts/check_trailing_whitespace.sh diff --git a/.github/workflows/freebsd-ci.yml b/.github/workflows/freebsd-ci.yml new file mode 100644 index 00000000..6c702d88 --- /dev/null +++ b/.github/workflows/freebsd-ci.yml @@ -0,0 +1,66 @@ +# This config file is generated by ./scripts/gen_gh_actions.py. +# Do not edit by hand. + +name: FreeBSD CI + +on: + push: + branches: [ dev, ci_travis ] + pull_request: + branches: [ dev ] + +jobs: + test-freebsd: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + debug: ['--enable-debug', '--disable-debug'] + prof: ['--enable-prof', '--disable-prof'] + arch: ['64-bit', '32-bit'] + uncommon: + - '' + - '--with-lg-page=16 --with-malloc-conf=tcache:false' + + name: FreeBSD (${{ matrix.arch }}, debug=${{ matrix.debug }}, prof=${{ matrix.prof }}${{ matrix.uncommon && ', uncommon' || '' }}) + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 1 + + - name: Test on FreeBSD + uses: vmactions/freebsd-vm@v1 + with: + release: '15.0' + usesh: true + prepare: | + pkg install -y autoconf gmake + run: | + # Verify we're running in FreeBSD + echo "==== System Information ====" + uname -a + freebsd-version + echo "============================" + + # Set compiler flags for 32-bit if needed + if [ "${{ matrix.arch }}" = "32-bit" ]; then + export CC="cc -m32" + export CXX="c++ -m32" + fi + + # Generate configure script + autoconf + + # Configure with matrix options + ./configure --with-jemalloc-prefix=ci_ ${{ matrix.debug }} ${{ matrix.prof }} ${{ matrix.uncommon }} + + # Get CPU count for parallel builds + export JFLAG=$(sysctl -n kern.smp.cpus) + + gmake -j${JFLAG} + gmake -j${JFLAG} tests + gmake check + + + diff --git a/.github/workflows/linux-ci.yml b/.github/workflows/linux-ci.yml new file mode 100644 index 00000000..c5e0c9aa --- /dev/null +++ b/.github/workflows/linux-ci.yml @@ -0,0 +1,695 @@ +# This config file is generated by ./scripts/gen_gh_actions.py. +# Do not edit by hand. + +name: Linux CI + +on: + push: + branches: [ dev, ci_travis ] + pull_request: + branches: [ dev ] + +jobs: + test-linux: + runs-on: ubuntu-24.04 + strategy: + fail-fast: false + matrix: + include: + - env: + CC: gcc + CXX: g++ + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: clang + CXX: clang++ + EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes" + - env: + CC: gcc + CXX: g++ + CROSS_COMPILE_32BIT: yes + COMPILER_FLAGS: -m32 + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: --enable-debug + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: --enable-prof + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: --disable-stats + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: --disable-libdl + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: --enable-opt-safety-checks + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: --with-lg-page=16 + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--enable-prof --enable-prof-frameptr" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--with-malloc-conf=tcache:false" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--with-malloc-conf=dss:primary" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--with-malloc-conf=percpu_arena:percpu" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--with-malloc-conf=background_thread:true" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: clang + CXX: clang++ + CROSS_COMPILE_32BIT: yes + COMPILER_FLAGS: -m32 + EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes" + - env: + CC: clang + CXX: clang++ + CONFIGURE_FLAGS: --enable-debug + EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes" + - env: + CC: clang + CXX: clang++ + CONFIGURE_FLAGS: --enable-prof + EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes" + - env: + CC: clang + CXX: clang++ + CONFIGURE_FLAGS: --disable-stats + EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes" + - env: + CC: clang + CXX: clang++ + CONFIGURE_FLAGS: --disable-libdl + EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes" + - env: + CC: clang + CXX: clang++ + CONFIGURE_FLAGS: --enable-opt-safety-checks + EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes" + - env: + CC: clang + CXX: clang++ + CONFIGURE_FLAGS: --with-lg-page=16 + EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes" + - env: + CC: clang + CXX: clang++ + CONFIGURE_FLAGS: "--enable-prof --enable-prof-frameptr" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes" + - env: + CC: clang + CXX: clang++ + CONFIGURE_FLAGS: "--with-malloc-conf=tcache:false" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes" + - env: + CC: clang + CXX: clang++ + CONFIGURE_FLAGS: "--with-malloc-conf=dss:primary" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes" + - env: + CC: clang + CXX: clang++ + CONFIGURE_FLAGS: "--with-malloc-conf=percpu_arena:percpu" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes" + - env: + CC: clang + CXX: clang++ + CONFIGURE_FLAGS: "--with-malloc-conf=background_thread:true" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes" + - env: + CC: gcc + CXX: g++ + CROSS_COMPILE_32BIT: yes + COMPILER_FLAGS: -m32 + CONFIGURE_FLAGS: --enable-debug + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CROSS_COMPILE_32BIT: yes + COMPILER_FLAGS: -m32 + CONFIGURE_FLAGS: --enable-prof + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CROSS_COMPILE_32BIT: yes + COMPILER_FLAGS: -m32 + CONFIGURE_FLAGS: --disable-stats + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CROSS_COMPILE_32BIT: yes + COMPILER_FLAGS: -m32 + CONFIGURE_FLAGS: --disable-libdl + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CROSS_COMPILE_32BIT: yes + COMPILER_FLAGS: -m32 + CONFIGURE_FLAGS: --enable-opt-safety-checks + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CROSS_COMPILE_32BIT: yes + COMPILER_FLAGS: -m32 + CONFIGURE_FLAGS: --with-lg-page=16 + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CROSS_COMPILE_32BIT: yes + COMPILER_FLAGS: -m32 + CONFIGURE_FLAGS: "--enable-prof --enable-prof-frameptr" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CROSS_COMPILE_32BIT: yes + COMPILER_FLAGS: -m32 + CONFIGURE_FLAGS: "--with-malloc-conf=tcache:false" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CROSS_COMPILE_32BIT: yes + COMPILER_FLAGS: -m32 + CONFIGURE_FLAGS: "--with-malloc-conf=dss:primary" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CROSS_COMPILE_32BIT: yes + COMPILER_FLAGS: -m32 + CONFIGURE_FLAGS: "--with-malloc-conf=percpu_arena:percpu" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CROSS_COMPILE_32BIT: yes + COMPILER_FLAGS: -m32 + CONFIGURE_FLAGS: "--with-malloc-conf=background_thread:true" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--enable-debug --enable-prof" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--enable-debug --disable-stats" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--enable-debug --disable-libdl" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--enable-debug --enable-opt-safety-checks" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--enable-debug --with-lg-page=16" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--enable-debug --enable-prof --enable-prof-frameptr" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--enable-debug --with-malloc-conf=tcache:false" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--enable-debug --with-malloc-conf=dss:primary" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--enable-debug --with-malloc-conf=percpu_arena:percpu" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--enable-debug --with-malloc-conf=background_thread:true" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--enable-prof --disable-stats" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--enable-prof --disable-libdl" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--enable-prof --enable-opt-safety-checks" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--enable-prof --with-lg-page=16" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--enable-prof --enable-prof --enable-prof-frameptr" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--enable-prof --with-malloc-conf=tcache:false" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--enable-prof --with-malloc-conf=dss:primary" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--enable-prof --with-malloc-conf=percpu_arena:percpu" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--enable-prof --with-malloc-conf=background_thread:true" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--disable-stats --disable-libdl" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--disable-stats --enable-opt-safety-checks" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--disable-stats --with-lg-page=16" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--disable-stats --enable-prof --enable-prof-frameptr" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--disable-stats --with-malloc-conf=tcache:false" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--disable-stats --with-malloc-conf=dss:primary" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--disable-stats --with-malloc-conf=percpu_arena:percpu" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--disable-stats --with-malloc-conf=background_thread:true" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--disable-libdl --enable-opt-safety-checks" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--disable-libdl --with-lg-page=16" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--disable-libdl --enable-prof --enable-prof-frameptr" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--disable-libdl --with-malloc-conf=tcache:false" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--disable-libdl --with-malloc-conf=dss:primary" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--disable-libdl --with-malloc-conf=percpu_arena:percpu" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--disable-libdl --with-malloc-conf=background_thread:true" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--enable-opt-safety-checks --with-lg-page=16" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--enable-opt-safety-checks --enable-prof --enable-prof-frameptr" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--enable-opt-safety-checks --with-malloc-conf=tcache:false" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--enable-opt-safety-checks --with-malloc-conf=dss:primary" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--enable-opt-safety-checks --with-malloc-conf=percpu_arena:percpu" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--enable-opt-safety-checks --with-malloc-conf=background_thread:true" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--with-lg-page=16 --enable-prof --enable-prof-frameptr" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--with-lg-page=16 --with-malloc-conf=tcache:false" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--with-lg-page=16 --with-malloc-conf=dss:primary" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--with-lg-page=16 --with-malloc-conf=percpu_arena:percpu" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--with-lg-page=16 --with-malloc-conf=background_thread:true" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--enable-prof --enable-prof-frameptr --with-malloc-conf=tcache:false" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--enable-prof --enable-prof-frameptr --with-malloc-conf=dss:primary" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--enable-prof --enable-prof-frameptr --with-malloc-conf=percpu_arena:percpu" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--enable-prof --enable-prof-frameptr --with-malloc-conf=background_thread:true" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--with-malloc-conf=tcache:false,dss:primary" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--with-malloc-conf=tcache:false,percpu_arena:percpu" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--with-malloc-conf=tcache:false,background_thread:true" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--with-malloc-conf=dss:primary,percpu_arena:percpu" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--with-malloc-conf=dss:primary,background_thread:true" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--with-malloc-conf=percpu_arena:percpu,background_thread:true" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--enable-debug --disable-cache-oblivious --enable-stats --enable-log --enable-prof" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--enable-debug --enable-experimental-smallocx --enable-stats --enable-prof" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + + steps: + - uses: actions/checkout@v4 + + - name: Show OS version + run: | + echo "=== System Information ===" + uname -a + echo "" + echo "=== Architecture ===" + uname -m + arch + echo "" + echo "=== OS Release ===" + cat /etc/os-release || true + echo "" + echo "=== CPU Info ===" + lscpu | grep -E "Architecture|CPU op-mode|Byte Order|CPU\(s\):" || true + + - name: Install dependencies (32-bit) + if: matrix.env.CROSS_COMPILE_32BIT == 'yes' + run: | + sudo dpkg --add-architecture i386 + sudo apt-get update + sudo apt-get install -y gcc-multilib g++-multilib libc6-dev-i386 + + - name: Build and test + env: + CC: ${{ matrix.env.CC }} + CXX: ${{ matrix.env.CXX }} + COMPILER_FLAGS: ${{ matrix.env.COMPILER_FLAGS }} + CONFIGURE_FLAGS: ${{ matrix.env.CONFIGURE_FLAGS }} + EXTRA_CFLAGS: ${{ matrix.env.EXTRA_CFLAGS }} + run: | + # Verify the script generates the same output + ./scripts/gen_gh_actions.py > gh_actions_script.yml + + # Run autoconf + autoconf + + # Configure with flags + if [ -n "$COMPILER_FLAGS" ]; then + ./configure CC="${CC} ${COMPILER_FLAGS}" CXX="${CXX} ${COMPILER_FLAGS}" $CONFIGURE_FLAGS + else + ./configure $CONFIGURE_FLAGS + fi + + # Build + make -j3 + make -j3 tests + + # Run tests + make check + + + test-linux-arm64: + runs-on: ubuntu-24.04-arm + strategy: + fail-fast: false + matrix: + include: + - env: + CC: gcc + CXX: g++ + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: clang + CXX: clang++ + EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: --enable-debug + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: --enable-prof + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: --disable-stats + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: --disable-libdl + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: --enable-opt-safety-checks + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: --with-lg-page=16 + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--with-lg-page=16 --with-lg-hugepage=29" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--enable-prof --enable-prof-frameptr" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--with-malloc-conf=tcache:false" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--with-malloc-conf=dss:primary" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--with-malloc-conf=percpu_arena:percpu" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--with-malloc-conf=background_thread:true" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds" + + steps: + - uses: actions/checkout@v4 + + - name: Show OS version + run: | + echo "=== System Information ===" + uname -a + echo "" + echo "=== Architecture ===" + uname -m + arch + echo "" + echo "=== OS Release ===" + cat /etc/os-release || true + echo "" + echo "=== CPU Info ===" + lscpu | grep -E "Architecture|CPU op-mode|Byte Order|CPU\(s\):" || true + + - name: Install dependencies (32-bit) + if: matrix.env.CROSS_COMPILE_32BIT == 'yes' + run: | + sudo dpkg --add-architecture i386 + sudo apt-get update + sudo apt-get install -y gcc-multilib g++-multilib libc6-dev-i386 + + - name: Build and test + env: + CC: ${{ matrix.env.CC }} + CXX: ${{ matrix.env.CXX }} + COMPILER_FLAGS: ${{ matrix.env.COMPILER_FLAGS }} + CONFIGURE_FLAGS: ${{ matrix.env.CONFIGURE_FLAGS }} + EXTRA_CFLAGS: ${{ matrix.env.EXTRA_CFLAGS }} + run: | + # Verify the script generates the same output + ./scripts/gen_gh_actions.py > gh_actions_script.yml + + # Run autoconf + autoconf + + # Configure with flags + if [ -n "$COMPILER_FLAGS" ]; then + ./configure CC="${CC} ${COMPILER_FLAGS}" CXX="${CXX} ${COMPILER_FLAGS}" $CONFIGURE_FLAGS + else + ./configure $CONFIGURE_FLAGS + fi + + # Build + make -j3 + make -j3 tests + + # Run tests + make check + + + diff --git a/.github/workflows/macos-ci.yml b/.github/workflows/macos-ci.yml new file mode 100644 index 00000000..585551d0 --- /dev/null +++ b/.github/workflows/macos-ci.yml @@ -0,0 +1,212 @@ +# This config file is generated by ./scripts/gen_gh_actions.py. +# Do not edit by hand. + +name: macOS CI + +on: + push: + branches: [ dev, ci_travis ] + pull_request: + branches: [ dev ] + +jobs: + test-macos: + runs-on: macos-15-intel + strategy: + fail-fast: false + matrix: + include: + - env: + CC: gcc + CXX: g++ + EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations" + - env: + CC: gcc + CXX: g++ + CROSS_COMPILE_32BIT: yes + EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: --enable-debug + EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: --disable-stats + EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: --disable-libdl + EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: --enable-opt-safety-checks + EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: --with-lg-page=16 + EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--with-malloc-conf=tcache:false" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--with-malloc-conf=percpu_arena:percpu" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations" + + steps: + - uses: actions/checkout@v4 + + - name: Show OS version + run: | + echo "=== macOS Version ===" + sw_vers + echo "" + echo "=== Architecture ===" + uname -m + arch + echo "" + echo "=== CPU Info ===" + sysctl -n machdep.cpu.brand_string + sysctl -n hw.machine + + - name: Install dependencies + run: | + brew install autoconf + + - name: Build and test + env: + CC: ${{ matrix.env.CC || 'gcc' }} + CXX: ${{ matrix.env.CXX || 'g++' }} + COMPILER_FLAGS: ${{ matrix.env.COMPILER_FLAGS }} + CONFIGURE_FLAGS: ${{ matrix.env.CONFIGURE_FLAGS }} + EXTRA_CFLAGS: ${{ matrix.env.EXTRA_CFLAGS }} + run: | + # Run autoconf + autoconf + + # Configure with flags + if [ -n "$COMPILER_FLAGS" ]; then + ./configure CC="${CC} ${COMPILER_FLAGS}" CXX="${CXX} ${COMPILER_FLAGS}" $CONFIGURE_FLAGS + else + ./configure $CONFIGURE_FLAGS + fi + + # Build + make -j3 + make -j3 tests + + # Run tests + make check + + + test-macos-arm64: + runs-on: macos-15 + strategy: + fail-fast: false + matrix: + include: + - env: + CC: gcc + CXX: g++ + EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations" + - env: + CC: gcc + CXX: g++ + CROSS_COMPILE_32BIT: yes + EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: --enable-debug + EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: --disable-stats + EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: --disable-libdl + EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: --enable-opt-safety-checks + EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: --with-lg-page=16 + EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--with-lg-page=16 --with-lg-hugepage=29" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--with-malloc-conf=tcache:false" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations" + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: "--with-malloc-conf=percpu_arena:percpu" + EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations" + + steps: + - uses: actions/checkout@v4 + + - name: Show OS version + run: | + echo "=== macOS Version ===" + sw_vers + echo "" + echo "=== Architecture ===" + uname -m + arch + echo "" + echo "=== CPU Info ===" + sysctl -n machdep.cpu.brand_string + sysctl -n hw.machine + + - name: Install dependencies + run: | + brew install autoconf + + - name: Build and test + env: + CC: ${{ matrix.env.CC || 'gcc' }} + CXX: ${{ matrix.env.CXX || 'g++' }} + COMPILER_FLAGS: ${{ matrix.env.COMPILER_FLAGS }} + CONFIGURE_FLAGS: ${{ matrix.env.CONFIGURE_FLAGS }} + EXTRA_CFLAGS: ${{ matrix.env.EXTRA_CFLAGS }} + run: | + # Run autoconf + autoconf + + # Configure with flags + if [ -n "$COMPILER_FLAGS" ]; then + ./configure CC="${CC} ${COMPILER_FLAGS}" CXX="${CXX} ${COMPILER_FLAGS}" $CONFIGURE_FLAGS + else + ./configure $CONFIGURE_FLAGS + fi + + # Build + make -j3 + make -j3 tests + + # Run tests + make check + + + diff --git a/.github/workflows/static_analysis.yaml b/.github/workflows/static_analysis.yaml new file mode 100644 index 00000000..29e617fc --- /dev/null +++ b/.github/workflows/static_analysis.yaml @@ -0,0 +1,68 @@ +name: 'Static Analysis' +on: [pull_request] +jobs: + static-analysis: + runs-on: ubuntu-latest + steps: + # We build libunwind ourselves because sadly the version + # provided by Ubuntu via apt-get is much too old. + - name: Check out libunwind + uses: actions/checkout@v4 + with: + repository: libunwind/libunwind + path: libunwind + ref: 'v1.6.2' + github-server-url: 'https://github.com' + - name: Install libunwind + run: | + cd libunwind + autoreconf -i + ./configure --prefix=/usr + make -s -j $(nproc) V=0 + sudo make -s install V=0 + cd .. + rm -rf libunwind + - name: Check out repository + uses: actions/checkout@v4 + # We download LLVM directly from the latest stable release + # on GitHub, because this tends to be much newer than the + # version available via apt-get in Ubuntu. + - name: Download LLVM + uses: dsaltares/fetch-gh-release-asset@master + with: + repo: 'llvm/llvm-project' + version: 'tags/llvmorg-16.0.4' + file: 'clang[+]llvm-.*x86_64-linux-gnu.*' + regex: true + target: 'llvm_assets/' + token: ${{ secrets.GITHUB_TOKEN }} + - name: Install prerequisites + id: install_prerequisites + run: | + tar -C llvm_assets -xaf llvm_assets/*.tar* & + sudo apt-get update + sudo apt-get install -y jq bear python3-pip + pip install codechecker + echo "Extracting LLVM from tar" 1>&2 + wait + echo "LLVM_BIN_DIR=$(echo llvm_assets/clang*/bin)" >> "$GITHUB_OUTPUT" + - name: Run static analysis + id: run_static_analysis + run: > + PATH="${{ steps.install_prerequisites.outputs.LLVM_BIN_DIR }}:$PATH" + LDFLAGS='-L/usr/lib' + scripts/run_static_analysis.sh static_analysis_results "$GITHUB_OUTPUT" + - name: Upload static analysis results + if: ${{ steps.run_static_analysis.outputs.HAS_STATIC_ANALYSIS_RESULTS }} == '1' + uses: actions/upload-artifact@v4 + with: + name: static_analysis_results + path: static_analysis_results + - name: Check static analysis results + run: | + if [[ "${{ steps.run_static_analysis.outputs.HAS_STATIC_ANALYSIS_RESULTS }}" == '1' ]] + then + echo "::error::Static analysis found issues with your code. Download the 'static_analysis_results' artifact from this workflow and view the 'index.html' file contained within it in a web browser locally for detailed results." + exit 1 + fi + diff --git a/.github/workflows/windows-ci.yml b/.github/workflows/windows-ci.yml new file mode 100644 index 00000000..f40ba086 --- /dev/null +++ b/.github/workflows/windows-ci.yml @@ -0,0 +1,155 @@ +# This config file is generated by ./scripts/gen_gh_actions.py. +# Do not edit by hand. + +name: Windows CI + +on: + push: + branches: [ dev, ci_travis ] + pull_request: + branches: [ dev ] + +jobs: + test-windows: + runs-on: windows-latest + strategy: + fail-fast: false + matrix: + include: + - env: + CC: gcc + CXX: g++ + EXTRA_CFLAGS: -fcommon + - env: + CC: gcc + CXX: g++ + CONFIGURE_FLAGS: --enable-debug + EXTRA_CFLAGS: -fcommon + - env: + CC: cl.exe + CXX: cl.exe + - env: + CC: gcc + CXX: g++ + CROSS_COMPILE_32BIT: yes + EXTRA_CFLAGS: -fcommon + - env: + CC: cl.exe + CXX: cl.exe + CONFIGURE_FLAGS: --enable-debug + - env: + CC: gcc + CXX: g++ + CROSS_COMPILE_32BIT: yes + CONFIGURE_FLAGS: --enable-debug + EXTRA_CFLAGS: -fcommon + - env: + CC: cl.exe + CXX: cl.exe + CROSS_COMPILE_32BIT: yes + - env: + CC: cl.exe + CXX: cl.exe + CROSS_COMPILE_32BIT: yes + CONFIGURE_FLAGS: --enable-debug + + steps: + - uses: actions/checkout@v4 + + - name: Show OS version + shell: cmd + run: | + echo === Windows Version === + systeminfo | findstr /B /C:"OS Name" /C:"OS Version" + ver + echo. + echo === Architecture === + echo PROCESSOR_ARCHITECTURE=%PROCESSOR_ARCHITECTURE% + echo. + + - name: Setup MSYS2 + uses: msys2/setup-msys2@v2 + with: + msystem: ${{ matrix.env.CROSS_COMPILE_32BIT == 'yes' && 'MINGW32' || 'MINGW64' }} + update: true + install: >- + autotools + git + pacboy: >- + make:p + gcc:p + binutils:p + + - name: Build and test (MinGW-GCC) + if: matrix.env.CC != 'cl.exe' + shell: msys2 {0} + env: + CC: ${{ matrix.env.CC || 'gcc' }} + CXX: ${{ matrix.env.CXX || 'g++' }} + COMPILER_FLAGS: ${{ matrix.env.COMPILER_FLAGS }} + CONFIGURE_FLAGS: ${{ matrix.env.CONFIGURE_FLAGS }} + EXTRA_CFLAGS: ${{ matrix.env.EXTRA_CFLAGS }} + run: | + # Run autoconf + autoconf + + # Configure with flags + if [ -n "$COMPILER_FLAGS" ]; then + ./configure CC="${CC} ${COMPILER_FLAGS}" CXX="${CXX} ${COMPILER_FLAGS}" $CONFIGURE_FLAGS + else + ./configure $CONFIGURE_FLAGS + fi + + # Build (mingw32-make is the "make" command in MSYS2) + mingw32-make -j3 + mingw32-make tests + + # Run tests + mingw32-make -k check + + - name: Setup MSVC environment + if: matrix.env.CC == 'cl.exe' + uses: ilammy/msvc-dev-cmd@v1 + with: + arch: ${{ matrix.env.CROSS_COMPILE_32BIT == 'yes' && 'x86' || 'x64' }} + + - name: Build and test (MSVC) + if: matrix.env.CC == 'cl.exe' + shell: msys2 {0} + env: + CONFIGURE_FLAGS: ${{ matrix.env.CONFIGURE_FLAGS }} + MSYS2_PATH_TYPE: inherit + run: | + # Export MSVC environment variables for configure + export CC=cl.exe + export CXX=cl.exe + export AR=lib.exe + export NM=dumpbin.exe + export RANLIB=: + + # Verify cl.exe is accessible (should be in PATH via inherit) + if ! which cl.exe > /dev/null 2>&1; then + echo "cl.exe not found, trying to locate MSVC..." + # Find and add MSVC bin directory to PATH + MSVC_BIN=$(cmd.exe /c "echo %VCToolsInstallDir%" | tr -d '\\r' | sed 's/\\\\\\\\/\//g' | sed 's/C:/\\/c/g') + if [ -n "$MSVC_BIN" ]; then + export PATH="$PATH:$MSVC_BIN/bin/Hostx64/x64:$MSVC_BIN/bin/Hostx86/x86" + fi + fi + + # Run autoconf + autoconf + + # Configure with MSVC + ./configure CC=cl.exe CXX=cl.exe AR=lib.exe $CONFIGURE_FLAGS + + # Build (mingw32-make is the "make" command in MSYS2) + mingw32-make -j3 + # Build tests sequentially due to PDB file issues + mingw32-make tests + + # Run tests + mingw32-make -k check + + + diff --git a/.gitignore b/.gitignore index 6607a5fd..95dbaa5f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,25 +1,108 @@ -/autom4te.cache/ +/bin/jemalloc-config +/bin/jemalloc.sh +/bin/jeprof + /config.stamp /config.log /config.status /configure + /doc/html.xsl /doc/manpages.xsl /doc/jemalloc.xml /doc/jemalloc.html /doc/jemalloc.3 + +/doc_internal/PROFILING_INTERNALS.pdf + +/jemalloc.pc + /lib/ + /Makefile -/include/jemalloc/internal/jemalloc_internal\.h -/include/jemalloc/internal/size_classes\.h -/include/jemalloc/jemalloc\.h -/include/jemalloc/jemalloc_defs\.h -/test/jemalloc_test\.h + +/include/jemalloc/internal/jemalloc_preamble.h +/include/jemalloc/internal/jemalloc_internal_defs.h +/include/jemalloc/internal/private_namespace.gen.h +/include/jemalloc/internal/private_namespace.h +/include/jemalloc/internal/private_namespace_jet.gen.h +/include/jemalloc/internal/private_namespace_jet.h +/include/jemalloc/internal/private_symbols.awk +/include/jemalloc/internal/private_symbols_jet.awk +/include/jemalloc/internal/public_namespace.h +/include/jemalloc/internal/public_symbols.txt +/include/jemalloc/internal/public_unnamespace.h +/include/jemalloc/jemalloc.h +/include/jemalloc/jemalloc_defs.h +/include/jemalloc/jemalloc_macros.h +/include/jemalloc/jemalloc_mangle.h +/include/jemalloc/jemalloc_mangle_jet.h +/include/jemalloc/jemalloc_protos.h +/include/jemalloc/jemalloc_protos_jet.h +/include/jemalloc/jemalloc_rename.h +/include/jemalloc/jemalloc_typedefs.h + /src/*.[od] -/test/*.[od] -/test/*.out -/test/[a-zA-Z_]* -!test/*.c -!test/*.exp +/src/*.sym + +# These are semantically meaningful for clangd and related tooling. +/build/ +/.cache/ +compile_commands.json +/static_analysis_raw_results +/static_analysis_results + +/run_tests.out/ + +/test/test.sh +test/include/test/jemalloc_test.h +test/include/test/jemalloc_test_defs.h + +/test/integration/[A-Za-z]* +!/test/integration/cpp/ +!/test/integration/[A-Za-z]*.* +/test/integration/*.[od] +/test/integration/*.out + +/test/integration/cpp/[A-Za-z]* +!/test/integration/cpp/[A-Za-z]*.* +/test/integration/cpp/*.[od] +/test/integration/cpp/*.out + +/test/src/*.[od] + +/test/stress/[A-Za-z]* +!/test/stress/[A-Za-z]*.* +!/test/stress/pa/ +/test/stress/*.[od] +/test/stress/*.out + +/test/unit/[A-Za-z]* +!/test/unit/[A-Za-z]*.* +/test/unit/*.[od] +/test/unit/*.out + +/test/analyze/[A-Za-z]* +!/test/analyze/[A-Za-z]*.* +/test/analyze/*.[od] +/test/analyze/*.out + /VERSION -/bin/jemalloc.sh + +*.pdb +*.sdf +*.opendb +*.VC.db +*.opensdf +*.cachefile +*.suo +*.user +*.sln.docstates +*.tmp +.vs/ +/msvc/Win32/ +/msvc/x64/ +/msvc/projects/*/*/Debug*/ +/msvc/projects/*/*/Release*/ +/msvc/projects/*/*/Win32/ +/msvc/projects/*/*/x64/ diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 00000000..643da4f1 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,365 @@ +# This config file is generated by ./scripts/gen_travis.py. +# Do not edit by hand. + +# We use 'minimal', because 'generic' makes Windows VMs hang at startup. Also +# the software provided by 'generic' is simply not needed for our tests. +# Differences are explained here: +# https://docs.travis-ci.com/user/languages/minimal-and-generic/ +language: minimal +dist: jammy + +jobs: + include: + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=clang CXX=clang++ EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=clang CXX=clang++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes" + - os: linux + arch: amd64 + env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes" + - os: linux + arch: amd64 + env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes" + - os: linux + arch: amd64 + env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes" + - os: linux + arch: amd64 + env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes" + - os: linux + arch: amd64 + env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes" + - os: linux + arch: amd64 + env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes" + - os: linux + arch: amd64 + env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes" + - os: linux + arch: amd64 + env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes" + - os: linux + arch: amd64 + env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes" + - os: linux + arch: amd64 + env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes" + - os: linux + arch: amd64 + env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-prof --enable-prof-frameptr" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof --enable-prof-frameptr" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --enable-prof --enable-prof-frameptr" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --enable-prof --enable-prof-frameptr" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --enable-prof --enable-prof-frameptr" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --enable-prof --enable-prof-frameptr" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary,percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary,background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: amd64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu,background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: arm64 + env: CC=gcc CXX=g++ EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: arm64 + env: CC=clang CXX=clang++ EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes" + - os: linux + arch: arm64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: arm64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: arm64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: arm64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: arm64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: arm64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: arm64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-lg-hugepage=29" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: arm64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: arm64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: arm64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: arm64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + arch: arm64 + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + # Development build + - os: linux + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --disable-cache-oblivious --enable-stats --enable-log --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + # --enable-expermental-smallocx: + - os: linux + env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-experimental-smallocx --enable-stats --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + + +before_install: + - |- + if test -f "./scripts/$TRAVIS_OS_NAME/before_install.sh"; then + source ./scripts/$TRAVIS_OS_NAME/before_install.sh + fi + +before_script: + - |- + if test -f "./scripts/$TRAVIS_OS_NAME/before_script.sh"; then + source ./scripts/$TRAVIS_OS_NAME/before_script.sh + else + scripts/gen_travis.py > travis_script && diff .travis.yml travis_script + autoconf + # If COMPILER_FLAGS are not empty, add them to CC and CXX + ./configure ${COMPILER_FLAGS:+ CC="$CC $COMPILER_FLAGS" CXX="$CXX $COMPILER_FLAGS"} $CONFIGURE_FLAGS + make -j3 + make -j3 tests + fi + +script: + - |- + if test -f "./scripts/$TRAVIS_OS_NAME/script.sh"; then + source ./scripts/$TRAVIS_OS_NAME/script.sh + else + make check + fi + diff --git a/COPYING b/COPYING index 019e8132..3b7fd358 100644 --- a/COPYING +++ b/COPYING @@ -1,10 +1,10 @@ Unless otherwise specified, files in the jemalloc source distribution are subject to the following license: -------------------------------------------------------------------------------- -Copyright (C) 2002-2013 Jason Evans . +Copyright (C) 2002-present Jason Evans . All rights reserved. Copyright (C) 2007-2012 Mozilla Foundation. All rights reserved. -Copyright (C) 2009-2013 Facebook, Inc. All rights reserved. +Copyright (C) 2009-present Facebook, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: diff --git a/ChangeLog b/ChangeLog index 65782253..3bc84360 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,10 +1,1379 @@ Following are change highlights associated with official releases. Important -bug fixes are all mentioned, but internal enhancements are omitted here for -brevity (even though they are more fun to write about). Much more detail can be -found in the git revision history: +bug fixes are all mentioned, but some internal enhancements are omitted here for +brevity. Much more detail can be found in the git revision history: - http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git - git://canonware.com/jemalloc.git + https://github.com/jemalloc/jemalloc + +* 5.3.1 (Apr 13, 2026) + +This release includes over 390 commits spanning bug fixes, new features, +performance optimizations, and portability improvements. Multiple percent +of system-level metric improvements were measured in tested production +workloads. The release has gone through large-scale production testing +at Meta. + +New features: + - Support pvalloc. (@Lapenkov: 5b1f2cc5) + - Add double free detection for the debug build. (@izaitsevfb: + 36366f3c, @guangli-dai: 42daa1ac, @divanorama: 1897f185) + - Add compile-time option `--enable-pageid` to enable memory mapping + annotation. (@devnexen: 4fc5c4fb) + - Add runtime option `prof_bt_max` to control the max stack depth for + profiling. (@guangli-dai: a0734fd6) + - Add compile-time option `--enable-force-getenv` to use `getenv` instead + of `secure_getenv`. (@interwq: 481bbfc9) + - Add compile-time option `--disable-dss` to disable the usage of + `sbrk(2)`. (@Svetlitski: ea5b7bea) + - Add runtime option `tcache_ncached_max` to control the number of items + in each size bin in the thread cache. (@guangli-dai: 8a22d10b) + - Add runtime option `calloc_madvise_threshold` to determine if kernel or + memset is used to zero the allocations for calloc. (@nullptr0-0: + 5081c16b) + - Add compile-time option `--disable-user-config` to disable reading the + runtime configurations from `/etc/malloc.conf` or environment variable + `MALLOC_CONF`. (@roblabla: c17bf8b3) + - Add runtime option `disable_large_size_classes` to guard the new usable + size calculation, which minimizes the memory overhead for large + allocations, i.e., >= 4 * PAGE. (@guangli-dai: c067a55c, 8347f104) + - Enable process_madvise usage, add runtime option + `process_madvise_max_batch` to control the max # of regions in each + madvise batch. (@interwq: 22440a02, @spredolac: 4246475b) + - Add mallctl interfaces: + + `opt.prof_bt_max` (@guangli-dai: a0734fd6) + + `arena..name` to set and get arena names. (@guangli-dai: ba19d2cb) + + `thread.tcache.max` to set and get the `tcache_max` of the current + thread. (@guangli-dai: a442d9b8) + + `thread.tcache.ncached_max.write` and + `thread.tcache.ncached_max.read_sizeclass` to set and get the + `ncached_max` setup of the current thread. (@guangli-dai: 630f7de9, + 6b197fdd) + + `arenas.hugepage` to return the hugepage size used, also exported to + malloc stats. (@ilvokhin: 90c627ed) + + `approximate_stats.active` to return an estimate of the current active + bytes, which should not be compared with other stats retrieved. + (@guangli-dai: 0988583d) + +Bug fixes: + - Prevent potential deadlocks in decaying during reentrancy. (@interwq: + 434a68e2) + - Fix segfault in extent coalescing. (@Svetlitski: 12311fe6) + - Add null pointer detections in mallctl calls. (@Svetlitski: dc0a184f, + 0288126d) + - Make mallctl `arenas.lookup` triable without crashing on invalid + pointers. (@auxten: 019cccc2, 5bac3849) + - Demote sampled allocations for proper deallocations during + `arena_reset`. (@Svetlitski: 62648c88) + - Fix jemalloc's `read(2)` and `write(2)`. (@Svetlitski: d2c9ed3d, @lexprfuncall: + 9fdc1160) + - Fix the pkg-config metadata file. (@BtbN: ed7e6fe7, ce8ce99a) + - Fix the autogen.sh so that it accepts quoted extra options. + (@honggyukim: f6fe6abd) + - Fix `rallocx()` to set errno to ENOMEM upon OOMing. (@arter97: 38056fea, + @interwq: 83b07578) + - Avoid stack overflow for internal variable array usage. (@nullptr0-0: + 47c9bcd4, 48f66cf4, @xinydev: 9169e927) + - Fix background thread initialization race. (@puzpuzpuz: 4d0ffa07) + - Guard os_page_id against a NULL address. (@lexprfuncall: 79cc7dcc) + - Handle tcache init failures gracefully. (@lexprfuncall: a056c20d) + - Fix missing release of acquired neighbor edata in + extent_try_coalesce_impl. (@spredolac: 675ab079) + - Fix memory leak of old curr_reg on san_bump_grow_locked failure. + (@spredolac: 5904a421) + - Fix large alloc nrequests under-counting on cache misses. (@spredolac: + 3cc56d32) + +Portability improvements: + - Fix the build in C99. (@abaelhe: 56ddbea2) + - Add `pthread_setaffinity_np` detection for non Linux/BSD platforms. + (@devnexen: 4c95c953) + - Make `VARIABLE_ARRAY` compatible with compilers not supporting VLA, + i.e., Visual Studio C compiler in C11 or C17 modes. (@madscientist: + be65438f) + - Fix the build on Linux using musl library. (@marv: aba1645f, 45249cf5) + - Reduce the memory overhead in small allocation sampling for systems + with larger page sizes, e.g., ARM. (@Svetlitski: 5a858c64) + - Add C23's `free_sized` and `free_aligned_sized`. (@Svetlitski: + cdb2c0e0) + - Enable heap profiling on MacOS. (@nullptr0-0: 4b555c11) + - Fix incorrect printing on 32bit. (@sundb: 630434bb) + - Make `JEMALLOC_CXX_THROW` compatible with C++ versions newer than + C++17. (@r-barnes, @guangli-dai: 21bcc0a8) + - Fix mmap tag conflicts on MacOS. (@kdrag0n: c893fcd1) + - Fix monotonic timer assumption for win32. (@burtonli: 8dc97b11) + - Fix VM over-reservation on systems with larger pages, e.g., aarch64. + (@interwq: cd05b19f) + - Remove `unreachable()` macro conditionally to prevent definition + conflicts for C23+. (@appujee: d8486b26, 4b88bddb) + - Fix dlsym failure observed on FreeBSD. (@rhelmot: 86bbabac) + - Change the default page size to 64KB on aarch64 Linux. (@lexprfuncall: + 9442300c) + - Update config.guess and config.sub to the latest version. + (@lexprfuncall: c51949ea) + - Determine the page size on Android from NDK header files. + (@lexprfuncall: c51abba1) + - Improve the portability of grep patterns in configure.ac. + (@lexprfuncall: 365747bc) + - Add compile-time option `--with-cxx-stdlib` to specify the C++ standard + library. (@yuxuanchen1997: a10ef3e1) + +Optimizations and refactors: + - Enable tcache for deallocation-only threads. (@interwq: 143e9c4a) + - Inline to accelerate operator delete. (@guangli-dai: e8f9f138) + - Optimize pairing heap's performance. (@deadalnix: 5266152d, be6da4f6, + 543e2d61, 10d71315, 92aa52c0, @Svetlitski: 36ca0c1b) + - Inline the storage for thread name in the profiling data. (@interwq: + ce0b7ab6, e62aa478) + - Optimize a hot function `edata_cmp_summary_comp` to accelerate it. + (@Svetlitski: 6841110b, @guangli-dai: 0181aaa4) + - Allocate thread cache using the base allocator, which enables thread + cache to use thp when `metadata_thp` is turned on. (@interwq: + 72cfdce7) + - Allow oversize arena not to purge immediately when background threads + are enabled, although the default decay time is 0 to be back compatible. + (@interwq: d1313313) + - Optimize thread-local storage implementation on Windows. (@mcfi: + 9e123a83, 3a0d9cda) + - Optimize fast path to allow static size class computation. (@interwq: + 323ed2e3) + - Redesign tcache GC to regulate the frequency and make it + locality-aware. The new design is default on, guarded by option + `experimental_tcache_gc`. (@nullptr0-0: 0c88be9e, e2c9f3a9, + 14d5dc13, @deadalnix: 5afff2e4) + - Reduce the arena switching overhead by avoiding forced purging when + background thread is enabled. (@interwq: a3910b98) + - Improve the reuse efficiency by limiting the maximum coalesced size for + large extents. (@jiebinn: 3c14707b) + - Refactor thread events to allow registration of users' thread events + and remove prof_threshold as the built-in event. (@spredolac: e6864c60, + 015b0179, 34ace916) + +Documentation: + - Update Windows building instructions. (@Lapenkov: 37139328) + - Add vcpkg installation instructions. (@LilyWangLL: c0c9783e) + - Update profiling internals with an example. (@jordalgo: b04e7666) + +* 5.3.0 (May 6, 2022) + + This release contains many speed and space optimizations, from micro + optimizations on common paths to rework of internal data structures and + locking schemes, and many more too detailed to list below. Multiple percent + of system level metric improvements were measured in tested production + workloads. The release has gone through large-scale production testing. + + New features: + - Add the thread.idle mallctl which hints that the calling thread will be + idle for a nontrivial period of time. (@davidtgoldblatt) + - Allow small size classes to be the maximum size class to cache in the + thread-specific cache, through the opt.[lg_]tcache_max option. (@interwq, + @jordalgo) + - Make the behavior of realloc(ptr, 0) configurable with opt.zero_realloc. + (@davidtgoldblatt) + - Add 'make uninstall' support. (@sangshuduo, @Lapenkov) + - Support C++17 over-aligned allocation. (@marksantaniello) + - Add the thread.peak mallctl for approximate per-thread peak memory tracking. + (@davidtgoldblatt) + - Add interval-based stats output opt.stats_interval. (@interwq) + - Add prof.prefix to override filename prefixes for dumps. (@zhxchen17) + - Add high resolution timestamp support for profiling. (@tyroguru) + - Add the --collapsed flag to jeprof for flamegraph generation. + (@igorwwwwwwwwwwwwwwwwwwww) + - Add the --debug-syms-by-id option to jeprof for debug symbols discovery. + (@DeannaGelbart) + - Add the opt.prof_leak_error option to exit with error code when leak is + detected using opt.prof_final. (@yunxuo) + - Add opt.cache_oblivious as an runtime alternative to config.cache_oblivious. + (@interwq) + - Add mallctl interfaces: + + opt.zero_realloc (@davidtgoldblatt) + + opt.cache_oblivious (@interwq) + + opt.prof_leak_error (@yunxuo) + + opt.stats_interval (@interwq) + + opt.stats_interval_opts (@interwq) + + opt.tcache_max (@interwq) + + opt.trust_madvise (@azat) + + prof.prefix (@zhxchen17) + + stats.zero_reallocs (@davidtgoldblatt) + + thread.idle (@davidtgoldblatt) + + thread.peak.{read,reset} (@davidtgoldblatt) + + Bug fixes: + - Fix the synchronization around explicit tcache creation which could cause + invalid tcache identifiers. This regression was first released in 5.0.0. + (@yoshinorim, @davidtgoldblatt) + - Fix a profiling biasing issue which could cause incorrect heap usage and + object counts. This issue existed in all previous releases with the heap + profiling feature. (@davidtgoldblatt) + - Fix the order of stats counter updating on large realloc which could cause + failed assertions. This regression was first released in 5.0.0. (@azat) + - Fix the locking on the arena destroy mallctl, which could cause concurrent + arena creations to fail. This functionality was first introduced in 5.0.0. + (@interwq) + + Portability improvements: + - Remove nothrow from system function declarations on macOS and FreeBSD. + (@davidtgoldblatt, @fredemmott, @leres) + - Improve overcommit and page alignment settings on NetBSD. (@zoulasc) + - Improve CPU affinity support on BSD platforms. (@devnexen) + - Improve utrace detection and support. (@devnexen) + - Improve QEMU support with MADV_DONTNEED zeroed pages detection. (@azat) + - Add memcntl support on Solaris / illumos. (@devnexen) + - Improve CPU_SPINWAIT on ARM. (@AWSjswinney) + - Improve TSD cleanup on FreeBSD. (@Lapenkov) + - Disable percpu_arena if the CPU count cannot be reliably detected. (@azat) + - Add malloc_size(3) override support. (@devnexen) + - Add mmap VM_MAKE_TAG support. (@devnexen) + - Add support for MADV_[NO]CORE. (@devnexen) + - Add support for DragonFlyBSD. (@devnexen) + - Fix the QUANTUM setting on MIPS64. (@brooksdavis) + - Add the QUANTUM setting for ARC. (@vineetgarc) + - Add the QUANTUM setting for LoongArch. (@wangjl-uos) + - Add QNX support. (@jqian-aurora) + - Avoid atexit(3) calls unless the relevant profiling features are enabled. + (@BusyJay, @laiwei-rice, @interwq) + - Fix unknown option detection when using Clang. (@Lapenkov) + - Fix symbol conflict with musl libc. (@georgthegreat) + - Add -Wimplicit-fallthrough checks. (@nickdesaulniers) + - Add __forceinline support on MSVC. (@santagada) + - Improve FreeBSD and Windows CI support. (@Lapenkov) + - Add CI support for PPC64LE architecture. (@ezeeyahoo) + + Incompatible changes: + - Maximum size class allowed in tcache (opt.[lg_]tcache_max) now has an upper + bound of 8MiB. (@interwq) + + Optimizations and refactors (@davidtgoldblatt, @Lapenkov, @interwq): + - Optimize the common cases of the thread cache operations. + - Optimize internal data structures, including RB tree and pairing heap. + - Optimize the internal locking on extent management. + - Extract and refactor the internal page allocator and interface modules. + + Documentation: + - Fix doc build with --with-install-suffix. (@lawmurray, @interwq) + - Add PROFILING_INTERNALS.md. (@davidtgoldblatt) + - Ensure the proper order of doc building and installation. (@Mingli-Yu) + +* 5.2.1 (August 5, 2019) + + This release is primarily about Windows. A critical virtual memory leak is + resolved on all Windows platforms. The regression was present in all releases + since 5.0.0. + + Bug fixes: + - Fix a severe virtual memory leak on Windows. This regression was first + released in 5.0.0. (@Ignition, @j0t, @frederik-h, @davidtgoldblatt, + @interwq) + - Fix size 0 handling in posix_memalign(). This regression was first released + in 5.2.0. (@interwq) + - Fix the prof_log unit test which may observe unexpected backtraces from + compiler optimizations. The test was first added in 5.2.0. (@marxin, + @gnzlbg, @interwq) + - Fix the declaration of the extent_avail tree. This regression was first + released in 5.1.0. (@zoulasc) + - Fix an incorrect reference in jeprof. This functionality was first released + in 3.0.0. (@prehistoric-penguin) + - Fix an assertion on the deallocation fast-path. This regression was first + released in 5.2.0. (@yinan1048576) + - Fix the TLS_MODEL attribute in headers. This regression was first released + in 5.0.0. (@zoulasc, @interwq) + + Optimizations and refactors: + - Implement opt.retain on Windows and enable by default on 64-bit. (@interwq, + @davidtgoldblatt) + - Optimize away a branch on the operator delete[] path. (@mgrice) + - Add format annotation to the format generator function. (@zoulasc) + - Refactor and improve the size class header generation. (@yinan1048576) + - Remove best fit. (@djwatson) + - Avoid blocking on background thread locks for stats. (@oranagra, @interwq) + +* 5.2.0 (April 2, 2019) + + This release includes a few notable improvements, which are summarized below: + 1) improved fast-path performance from the optimizations by @djwatson; 2) + reduced virtual memory fragmentation and metadata usage; and 3) bug fixes on + setting the number of background threads. In addition, peak / spike memory + usage is improved with certain allocation patterns. As usual, the release and + prior dev versions have gone through large-scale production testing. + + New features: + - Implement oversize_threshold, which uses a dedicated arena for allocations + crossing the specified threshold to reduce fragmentation. (@interwq) + - Add extents usage information to stats. (@tyleretzel) + - Log time information for sampled allocations. (@tyleretzel) + - Support 0 size in sdallocx. (@djwatson) + - Output rate for certain counters in malloc_stats. (@zinoale) + - Add configure option --enable-readlinkat, which allows the use of readlinkat + over readlink. (@davidtgoldblatt) + - Add configure options --{enable,disable}-{static,shared} to allow not + building unwanted libraries. (@Ericson2314) + - Add configure option --disable-libdl to enable fully static builds. + (@interwq) + - Add mallctl interfaces: + + opt.oversize_threshold (@interwq) + + stats.arenas..extent_avail (@tyleretzel) + + stats.arenas..extents..n{dirty,muzzy,retained} (@tyleretzel) + + stats.arenas..extents..{dirty,muzzy,retained}_bytes + (@tyleretzel) + + Portability improvements: + - Update MSVC builds. (@maksqwe, @rustyx) + - Workaround a compiler optimizer bug on s390x. (@rkmisra) + - Make use of pthread_set_name_np(3) on FreeBSD. (@trasz) + - Implement malloc_getcpu() to enable percpu_arena for windows. (@santagada) + - Link against -pthread instead of -lpthread. (@paravoid) + - Make background_thread not dependent on libdl. (@interwq) + - Add stringify to fix a linker directive issue on MSVC. (@daverigby) + - Detect and fall back when 8-bit atomics are unavailable. (@interwq) + - Fall back to the default pthread_create if dlsym(3) fails. (@interwq) + + Optimizations and refactors: + - Refactor the TSD module. (@davidtgoldblatt) + - Avoid taking extents_muzzy mutex when muzzy is disabled. (@interwq) + - Avoid taking large_mtx for auto arenas on the tcache flush path. (@interwq) + - Optimize ixalloc by avoiding a size lookup. (@interwq) + - Implement opt.oversize_threshold which uses a dedicated arena for requests + crossing the threshold, also eagerly purges the oversize extents. Default + the threshold to 8 MiB. (@interwq) + - Clean compilation with -Wextra. (@gnzlbg, @jasone) + - Refactor the size class module. (@davidtgoldblatt) + - Refactor the stats emitter. (@tyleretzel) + - Optimize pow2_ceil. (@rkmisra) + - Avoid runtime detection of lazy purging on FreeBSD. (@trasz) + - Optimize mmap(2) alignment handling on FreeBSD. (@trasz) + - Improve error handling for THP state initialization. (@jsteemann) + - Rework the malloc() fast path. (@djwatson) + - Rework the free() fast path. (@djwatson) + - Refactor and optimize the tcache fill / flush paths. (@djwatson) + - Optimize sync / lwsync on PowerPC. (@chmeeedalf) + - Bypass extent_dalloc() when retain is enabled. (@interwq) + - Optimize the locking on large deallocation. (@interwq) + - Reduce the number of pages committed from sanity checking in debug build. + (@trasz, @interwq) + - Deprecate OSSpinLock. (@interwq) + - Lower the default number of background threads to 4 (when the feature + is enabled). (@interwq) + - Optimize the trylock spin wait. (@djwatson) + - Use arena index for arena-matching checks. (@interwq) + - Avoid forced decay on thread termination when using background threads. + (@interwq) + - Disable muzzy decay by default. (@djwatson, @interwq) + - Only initialize libgcc unwinder when profiling is enabled. (@paravoid, + @interwq) + + Bug fixes (all only relevant to jemalloc 5.x): + - Fix background thread index issues with max_background_threads. (@djwatson, + @interwq) + - Fix stats output for opt.lg_extent_max_active_fit. (@interwq) + - Fix opt.prof_prefix initialization. (@davidtgoldblatt) + - Properly trigger decay on tcache destroy. (@interwq, @amosbird) + - Fix tcache.flush. (@interwq) + - Detect whether explicit extent zero out is necessary with huge pages or + custom extent hooks, which may change the purge semantics. (@interwq) + - Fix a side effect caused by extent_max_active_fit combined with decay-based + purging, where freed extents can accumulate and not be reused for an + extended period of time. (@interwq, @mpghf) + - Fix a missing unlock on extent register error handling. (@zoulasc) + + Testing: + - Simplify the Travis script output. (@gnzlbg) + - Update the test scripts for FreeBSD. (@devnexen) + - Add unit tests for the producer-consumer pattern. (@interwq) + - Add Cirrus-CI config for FreeBSD builds. (@jasone) + - Add size-matching sanity checks on tcache flush. (@davidtgoldblatt, + @interwq) + + Incompatible changes: + - Remove --with-lg-page-sizes. (@davidtgoldblatt) + + Documentation: + - Attempt to build docs by default, however skip doc building when xsltproc + is missing. (@interwq, @cmuellner) + +* 5.1.0 (May 4, 2018) + + This release is primarily about fine-tuning, ranging from several new features + to numerous notable performance and portability enhancements. The release and + prior dev versions have been running in multiple large scale applications for + months, and the cumulative improvements are substantial in many cases. + + Given the long and successful production runs, this release is likely a good + candidate for applications to upgrade, from both jemalloc 5.0 and before. For + performance-critical applications, the newly added TUNING.md provides + guidelines on jemalloc tuning. + + New features: + - Implement transparent huge page support for internal metadata. (@interwq) + - Add opt.thp to allow enabling / disabling transparent huge pages for all + mappings. (@interwq) + - Add maximum background thread count option. (@djwatson) + - Allow prof_active to control opt.lg_prof_interval and prof.gdump. + (@interwq) + - Allow arena index lookup based on allocation addresses via mallctl. + (@lionkov) + - Allow disabling initial-exec TLS model. (@davidtgoldblatt, @KenMacD) + - Add opt.lg_extent_max_active_fit to set the max ratio between the size of + the active extent selected (to split off from) and the size of the requested + allocation. (@interwq, @davidtgoldblatt) + - Add retain_grow_limit to set the max size when growing virtual address + space. (@interwq) + - Add mallctl interfaces: + + arena..retain_grow_limit (@interwq) + + arenas.lookup (@lionkov) + + max_background_threads (@djwatson) + + opt.lg_extent_max_active_fit (@interwq) + + opt.max_background_threads (@djwatson) + + opt.metadata_thp (@interwq) + + opt.thp (@interwq) + + stats.metadata_thp (@interwq) + + Portability improvements: + - Support GNU/kFreeBSD configuration. (@paravoid) + - Support m68k, nios2 and SH3 architectures. (@paravoid) + - Fall back to FD_CLOEXEC when O_CLOEXEC is unavailable. (@zonyitoo) + - Fix symbol listing for cross-compiling. (@tamird) + - Fix high bits computation on ARM. (@davidtgoldblatt, @paravoid) + - Disable the CPU_SPINWAIT macro for Power. (@davidtgoldblatt, @marxin) + - Fix MSVC 2015 & 2017 builds. (@rustyx) + - Improve RISC-V support. (@EdSchouten) + - Set name mangling script in strict mode. (@nicolov) + - Avoid MADV_HUGEPAGE on ARM. (@marxin) + - Modify configure to determine return value of strerror_r. + (@davidtgoldblatt, @cferris1000) + - Make sure CXXFLAGS is tested with CPP compiler. (@nehaljwani) + - Fix 32-bit build on MSVC. (@rustyx) + - Fix external symbol on MSVC. (@maksqwe) + - Avoid a printf format specifier warning. (@jasone) + - Add configure option --disable-initial-exec-tls which can allow jemalloc to + be dynamically loaded after program startup. (@davidtgoldblatt, @KenMacD) + - AArch64: Add ILP32 support. (@cmuellner) + - Add --with-lg-vaddr configure option to support cross compiling. + (@cmuellner, @davidtgoldblatt) + + Optimizations and refactors: + - Improve active extent fit with extent_max_active_fit. This considerably + reduces fragmentation over time and improves virtual memory and metadata + usage. (@davidtgoldblatt, @interwq) + - Eagerly coalesce large extents to reduce fragmentation. (@interwq) + - sdallocx: only read size info when page aligned (i.e. possibly sampled), + which speeds up the sized deallocation path significantly. (@interwq) + - Avoid attempting new mappings for in place expansion with retain, since + it rarely succeeds in practice and causes high overhead. (@interwq) + - Refactor OOM handling in newImpl. (@wqfish) + - Add internal fine-grained logging functionality for debugging use. + (@davidtgoldblatt) + - Refactor arena / tcache interactions. (@davidtgoldblatt) + - Refactor extent management with dumpable flag. (@davidtgoldblatt) + - Add runtime detection of lazy purging. (@interwq) + - Use pairing heap instead of red-black tree for extents_avail. (@djwatson) + - Use sysctl on startup in FreeBSD. (@trasz) + - Use thread local prng state instead of atomic. (@djwatson) + - Make decay to always purge one more extent than before, because in + practice large extents are usually the ones that cross the decay threshold. + Purging the additional extent helps save memory as well as reduce VM + fragmentation. (@interwq) + - Fast division by dynamic values. (@davidtgoldblatt) + - Improve the fit for aligned allocation. (@interwq, @edwinsmith) + - Refactor extent_t bitpacking. (@rkmisra) + - Optimize the generated assembly for ticker operations. (@davidtgoldblatt) + - Convert stats printing to use a structured text emitter. (@davidtgoldblatt) + - Remove preserve_lru feature for extents management. (@djwatson) + - Consolidate two memory loads into one on the fast deallocation path. + (@davidtgoldblatt, @interwq) + + Bug fixes (most of the issues are only relevant to jemalloc 5.0): + - Fix deadlock with multithreaded fork in OS X. (@davidtgoldblatt) + - Validate returned file descriptor before use. (@zonyitoo) + - Fix a few background thread initialization and shutdown issues. (@interwq) + - Fix an extent coalesce + decay race by taking both coalescing extents off + the LRU list. (@interwq) + - Fix potentially unbound increase during decay, caused by one thread keep + stashing memory to purge while other threads generating new pages. The + number of pages to purge is checked to prevent this. (@interwq) + - Fix a FreeBSD bootstrap assertion. (@strejda, @interwq) + - Handle 32 bit mutex counters. (@rkmisra) + - Fix a indexing bug when creating background threads. (@davidtgoldblatt, + @binliu19) + - Fix arguments passed to extent_init. (@yuleniwo, @interwq) + - Fix addresses used for ordering mutexes. (@rkmisra) + - Fix abort_conf processing during bootstrap. (@interwq) + - Fix include path order for out-of-tree builds. (@cmuellner) + + Incompatible changes: + - Remove --disable-thp. (@interwq) + - Remove mallctl interfaces: + + config.thp (@interwq) + + Documentation: + - Add TUNING.md. (@interwq, @davidtgoldblatt, @djwatson) + +* 5.0.1 (July 1, 2017) + + This bugfix release fixes several issues, most of which are obscure enough + that typical applications are not impacted. + + Bug fixes: + - Update decay->nunpurged before purging, in order to avoid potential update + races and subsequent incorrect purging volume. (@interwq) + - Only abort on dlsym(3) error if the failure impacts an enabled feature (lazy + locking and/or background threads). This mitigates an initialization + failure bug for which we still do not have a clear reproduction test case. + (@interwq) + - Modify tsd management so that it neither crashes nor leaks if a thread's + only allocation activity is to call free() after TLS destructors have been + executed. This behavior was observed when operating with GNU libc, and is + unlikely to be an issue with other libc implementations. (@interwq) + - Mask signals during background thread creation. This prevents signals from + being inadvertently delivered to background threads. (@jasone, + @davidtgoldblatt, @interwq) + - Avoid inactivity checks within background threads, in order to prevent + recursive mutex acquisition. (@interwq) + - Fix extent_grow_retained() to use the specified hooks when the + arena..extent_hooks mallctl is used to override the default hooks. + (@interwq) + - Add missing reentrancy support for custom extent hooks which allocate. + (@interwq) + - Post-fork(2), re-initialize the list of tcaches associated with each arena + to contain no tcaches except the forking thread's. (@interwq) + - Add missing post-fork(2) mutex reinitialization for extent_grow_mtx. This + fixes potential deadlocks after fork(2). (@interwq) + - Enforce minimum autoconf version (currently 2.68), since 2.63 is known to + generate corrupt configure scripts. (@jasone) + - Ensure that the configured page size (--with-lg-page) is no larger than the + configured huge page size (--with-lg-hugepage). (@jasone) + +* 5.0.0 (June 13, 2017) + + Unlike all previous jemalloc releases, this release does not use naturally + aligned "chunks" for virtual memory management, and instead uses page-aligned + "extents". This change has few externally visible effects, but the internal + impacts are... extensive. Many other internal changes combine to make this + the most cohesively designed version of jemalloc so far, with ample + opportunity for further enhancements. + + Continuous integration is now an integral aspect of development thanks to the + efforts of @davidtgoldblatt, and the dev branch tends to remain reasonably + stable on the tested platforms (Linux, FreeBSD, macOS, and Windows). As a + side effect the official release frequency may decrease over time. + + New features: + - Implement optional per-CPU arena support; threads choose which arena to use + based on current CPU rather than on fixed thread-->arena associations. + (@interwq) + - Implement two-phase decay of unused dirty pages. Pages transition from + dirty-->muzzy-->clean, where the first phase transition relies on + madvise(... MADV_FREE) semantics, and the second phase transition discards + pages such that they are replaced with demand-zeroed pages on next access. + (@jasone) + - Increase decay time resolution from seconds to milliseconds. (@jasone) + - Implement opt-in per CPU background threads, and use them for asynchronous + decay-driven unused dirty page purging. (@interwq) + - Add mutex profiling, which collects a variety of statistics useful for + diagnosing overhead/contention issues. (@interwq) + - Add C++ new/delete operator bindings. (@djwatson) + - Support manually created arena destruction, such that all data and metadata + are discarded. Add MALLCTL_ARENAS_DESTROYED for accessing merged stats + associated with destroyed arenas. (@jasone) + - Add MALLCTL_ARENAS_ALL as a fixed index for use in accessing + merged/destroyed arena statistics via mallctl. (@jasone) + - Add opt.abort_conf to optionally abort if invalid configuration options are + detected during initialization. (@interwq) + - Add opt.stats_print_opts, so that e.g. JSON output can be selected for the + stats dumped during exit if opt.stats_print is true. (@jasone) + - Add --with-version=VERSION for use when embedding jemalloc into another + project's git repository. (@jasone) + - Add --disable-thp to support cross compiling. (@jasone) + - Add --with-lg-hugepage to support cross compiling. (@jasone) + - Add mallctl interfaces (various authors): + + background_thread + + opt.abort_conf + + opt.retain + + opt.percpu_arena + + opt.background_thread + + opt.{dirty,muzzy}_decay_ms + + opt.stats_print_opts + + arena..initialized + + arena..destroy + + arena..{dirty,muzzy}_decay_ms + + arena..extent_hooks + + arenas.{dirty,muzzy}_decay_ms + + arenas.bin..slab_size + + arenas.nlextents + + arenas.lextent..size + + arenas.create + + stats.background_thread.{num_threads,num_runs,run_interval} + + stats.mutexes.{ctl,background_thread,prof,reset}. + {num_ops,num_spin_acq,num_wait,max_wait_time,total_wait_time,max_num_thds, + num_owner_switch} + + stats.arenas..{dirty,muzzy}_decay_ms + + stats.arenas..uptime + + stats.arenas..{pmuzzy,base,internal,resident} + + stats.arenas..{dirty,muzzy}_{npurge,nmadvise,purged} + + stats.arenas..bins..{nslabs,reslabs,curslabs} + + stats.arenas..bins..mutex. + {num_ops,num_spin_acq,num_wait,max_wait_time,total_wait_time,max_num_thds, + num_owner_switch} + + stats.arenas..lextents..{nmalloc,ndalloc,nrequests,curlextents} + + stats.arenas.i.mutexes.{large,extent_avail,extents_dirty,extents_muzzy, + extents_retained,decay_dirty,decay_muzzy,base,tcache_list}. + {num_ops,num_spin_acq,num_wait,max_wait_time,total_wait_time,max_num_thds, + num_owner_switch} + + Portability improvements: + - Improve reentrant allocation support, such that deadlock is less likely if + e.g. a system library call in turn allocates memory. (@davidtgoldblatt, + @interwq) + - Support static linking of jemalloc with glibc. (@djwatson) + + Optimizations and refactors: + - Organize virtual memory as "extents" of virtual memory pages, rather than as + naturally aligned "chunks", and store all metadata in arbitrarily distant + locations. This reduces virtual memory external fragmentation, and will + interact better with huge pages (not yet explicitly supported). (@jasone) + - Fold large and huge size classes together; only small and large size classes + remain. (@jasone) + - Unify the allocation paths, and merge most fast-path branching decisions. + (@davidtgoldblatt, @interwq) + - Embed per thread automatic tcache into thread-specific data, which reduces + conditional branches and dereferences. Also reorganize tcache to increase + fast-path data locality. (@interwq) + - Rewrite atomics to closely model the C11 API, convert various + synchronization from mutex-based to atomic, and use the explicit memory + ordering control to resolve various hypothetical races without increasing + synchronization overhead. (@davidtgoldblatt) + - Extensively optimize rtree via various methods: + + Add multiple layers of rtree lookup caching, since rtree lookups are now + part of fast-path deallocation. (@interwq) + + Determine rtree layout at compile time. (@jasone) + + Make the tree shallower for common configurations. (@jasone) + + Embed the root node in the top-level rtree data structure, thus avoiding + one level of indirection. (@jasone) + + Further specialize leaf elements as compared to internal node elements, + and directly embed extent metadata needed for fast-path deallocation. + (@jasone) + + Ignore leading always-zero address bits (architecture-specific). + (@jasone) + - Reorganize headers (ongoing work) to make them hermetic, and disentangle + various module dependencies. (@davidtgoldblatt) + - Convert various internal data structures such as size class metadata from + boot-time-initialized to compile-time-initialized. Propagate resulting data + structure simplifications, such as making arena metadata fixed-size. + (@jasone) + - Simplify size class lookups when constrained to size classes that are + multiples of the page size. This speeds lookups, but the primary benefit is + complexity reduction in code that was the source of numerous regressions. + (@jasone) + - Lock individual extents when possible for localized extent operations, + rather than relying on a top-level arena lock. (@davidtgoldblatt, @jasone) + - Use first fit layout policy instead of best fit, in order to improve + packing. (@jasone) + - If munmap(2) is not in use, use an exponential series to grow each arena's + virtual memory, so that the number of disjoint virtual memory mappings + remains low. (@jasone) + - Implement per arena base allocators, so that arenas never share any virtual + memory pages. (@jasone) + - Automatically generate private symbol name mangling macros. (@jasone) + + Incompatible changes: + - Replace chunk hooks with an expanded/normalized set of extent hooks. + (@jasone) + - Remove ratio-based purging. (@jasone) + - Remove --disable-tcache. (@jasone) + - Remove --disable-tls. (@jasone) + - Remove --enable-ivsalloc. (@jasone) + - Remove --with-lg-size-class-group. (@jasone) + - Remove --with-lg-tiny-min. (@jasone) + - Remove --disable-cc-silence. (@jasone) + - Remove --enable-code-coverage. (@jasone) + - Remove --disable-munmap (replaced by opt.retain). (@jasone) + - Remove Valgrind support. (@jasone) + - Remove quarantine support. (@jasone) + - Remove redzone support. (@jasone) + - Remove mallctl interfaces (various authors): + + config.munmap + + config.tcache + + config.tls + + config.valgrind + + opt.lg_chunk + + opt.purge + + opt.lg_dirty_mult + + opt.decay_time + + opt.quarantine + + opt.redzone + + opt.thp + + arena..lg_dirty_mult + + arena..decay_time + + arena..chunk_hooks + + arenas.initialized + + arenas.lg_dirty_mult + + arenas.decay_time + + arenas.bin..run_size + + arenas.nlruns + + arenas.lrun..size + + arenas.nhchunks + + arenas.hchunk..size + + arenas.extend + + stats.cactive + + stats.arenas..lg_dirty_mult + + stats.arenas..decay_time + + stats.arenas..metadata.{mapped,allocated} + + stats.arenas..{npurge,nmadvise,purged} + + stats.arenas..huge.{allocated,nmalloc,ndalloc,nrequests} + + stats.arenas..bins..{nruns,reruns,curruns} + + stats.arenas..lruns..{nmalloc,ndalloc,nrequests,curruns} + + stats.arenas..hchunks..{nmalloc,ndalloc,nrequests,curhchunks} + + Bug fixes: + - Improve interval-based profile dump triggering to dump only one profile when + a single allocation's size exceeds the interval. (@jasone) + - Use prefixed function names (as controlled by --with-jemalloc-prefix) when + pruning backtrace frames in jeprof. (@jasone) + +* 4.5.0 (February 28, 2017) + + This is the first release to benefit from much broader continuous integration + testing, thanks to @davidtgoldblatt. Had we had this testing infrastructure + in place for prior releases, it would have caught all of the most serious + regressions fixed by this release. + + New features: + - Add --disable-thp and the opt.thp mallctl to provide opt-out mechanisms for + transparent huge page integration. (@jasone) + - Update zone allocator integration to work with macOS 10.12. (@glandium) + - Restructure *CFLAGS configuration, so that CFLAGS behaves typically, and + EXTRA_CFLAGS provides a way to specify e.g. -Werror during building, but not + during configuration. (@jasone, @ronawho) + + Bug fixes: + - Fix DSS (sbrk(2)-based) allocation. This regression was first released in + 4.3.0. (@jasone) + - Handle race in per size class utilization computation. This functionality + was first released in 4.0.0. (@interwq) + - Fix lock order reversal during gdump. (@jasone) + - Fix/refactor tcache synchronization. This regression was first released in + 4.0.0. (@jasone) + - Fix various JSON-formatted malloc_stats_print() bugs. This functionality + was first released in 4.3.0. (@jasone) + - Fix huge-aligned allocation. This regression was first released in 4.4.0. + (@jasone) + - When transparent huge page integration is enabled, detect what state pages + start in according to the kernel's current operating mode, and only convert + arena chunks to non-huge during purging if that is not their initial state. + This functionality was first released in 4.4.0. (@jasone) + - Fix lg_chunk clamping for the --enable-cache-oblivious --disable-fill case. + This regression was first released in 4.0.0. (@jasone, @428desmo) + - Properly detect sparc64 when building for Linux. (@glaubitz) + +* 4.4.0 (December 3, 2016) + + New features: + - Add configure support for *-*-linux-android. (@cferris1000, @jasone) + - Add the --disable-syscall configure option, for use on systems that place + security-motivated limitations on syscall(2). (@jasone) + - Add support for Debian GNU/kFreeBSD. (@thesam) + + Optimizations: + - Add extent serial numbers and use them where appropriate as a sort key that + is higher priority than address, so that the allocation policy prefers older + extents. This tends to improve locality (decrease fragmentation) when + memory grows downward. (@jasone) + - Refactor madvise(2) configuration so that MADV_FREE is detected and utilized + on Linux 4.5 and newer. (@jasone) + - Mark partially purged arena chunks as non-huge-page. This improves + interaction with Linux's transparent huge page functionality. (@jasone) + + Bug fixes: + - Fix size class computations for edge conditions involving extremely large + allocations. This regression was first released in 4.0.0. (@jasone, + @ingvarha) + - Remove overly restrictive assertions related to the cactive statistic. This + regression was first released in 4.1.0. (@jasone) + - Implement a more reliable detection scheme for os_unfair_lock on macOS. + (@jszakmeister) + +* 4.3.1 (November 7, 2016) + + Bug fixes: + - Fix a severe virtual memory leak. This regression was first released in + 4.3.0. (@interwq, @jasone) + - Refactor atomic and prng APIs to restore support for 32-bit platforms that + use pre-C11 toolchains, e.g. FreeBSD's mips. (@jasone) + +* 4.3.0 (November 4, 2016) + + This is the first release that passes the test suite for multiple Windows + configurations, thanks in large part to @glandium setting up continuous + integration via AppVeyor (and Travis CI for Linux and OS X). + + New features: + - Add "J" (JSON) support to malloc_stats_print(). (@jasone) + - Add Cray compiler support. (@ronawho) + + Optimizations: + - Add/use adaptive spinning for bootstrapping and radix tree node + initialization. (@jasone) + + Bug fixes: + - Fix large allocation to search starting in the optimal size class heap, + which can substantially reduce virtual memory churn and fragmentation. This + regression was first released in 4.0.0. (@mjp41, @jasone) + - Fix stats.arenas..nthreads accounting. (@interwq) + - Fix and simplify decay-based purging. (@jasone) + - Make DSS (sbrk(2)-related) operations lockless, which resolves potential + deadlocks during thread exit. (@jasone) + - Fix over-sized allocation of radix tree leaf nodes. (@mjp41, @ogaun, + @jasone) + - Fix over-sized allocation of arena_t (plus associated stats) data + structures. (@jasone, @interwq) + - Fix EXTRA_CFLAGS to not affect configuration. (@jasone) + - Fix a Valgrind integration bug. (@ronawho) + - Disallow 0x5a junk filling when running in Valgrind. (@jasone) + - Fix a file descriptor leak on Linux. This regression was first released in + 4.2.0. (@vsarunas, @jasone) + - Fix static linking of jemalloc with glibc. (@djwatson) + - Use syscall(2) rather than {open,read,close}(2) during boot on Linux. This + works around other libraries' system call wrappers performing reentrant + allocation. (@kspinka, @Whissi, @jasone) + - Fix OS X default zone replacement to work with OS X 10.12. (@glandium, + @jasone) + - Fix cached memory management to avoid needless commit/decommit operations + during purging, which resolves permanent virtual memory map fragmentation + issues on Windows. (@mjp41, @jasone) + - Fix TSD fetches to avoid (recursive) allocation. This is relevant to + non-TLS and Windows configurations. (@jasone) + - Fix malloc_conf overriding to work on Windows. (@jasone) + - Forcibly disable lazy-lock on Windows (was forcibly *enabled*). (@jasone) + +* 4.2.1 (June 8, 2016) + + Bug fixes: + - Fix bootstrapping issues for configurations that require allocation during + tsd initialization (e.g. --disable-tls). (@cferris1000, @jasone) + - Fix gettimeofday() version of nstime_update(). (@ronawho) + - Fix Valgrind regressions in calloc() and chunk_alloc_wrapper(). (@ronawho) + - Fix potential VM map fragmentation regression. (@jasone) + - Fix opt_zero-triggered in-place huge reallocation zeroing. (@jasone) + - Fix heap profiling context leaks in reallocation edge cases. (@jasone) + +* 4.2.0 (May 12, 2016) + + New features: + - Add the arena..reset mallctl, which makes it possible to discard all of + an arena's allocations in a single operation. (@jasone) + - Add the stats.retained and stats.arenas..retained statistics. (@jasone) + - Add the --with-version configure option. (@jasone) + - Support --with-lg-page values larger than actual page size. (@jasone) + + Optimizations: + - Use pairing heaps rather than red-black trees for various hot data + structures. (@djwatson, @jasone) + - Streamline fast paths of rtree operations. (@jasone) + - Optimize the fast paths of calloc() and [m,d,sd]allocx(). (@jasone) + - Decommit unused virtual memory if the OS does not overcommit. (@jasone) + - Specify MAP_NORESERVE on Linux if [heuristic] overcommit is active, in order + to avoid unfortunate interactions during fork(2). (@jasone) + + Bug fixes: + - Fix chunk accounting related to triggering gdump profiles. (@jasone) + - Link against librt for clock_gettime(2) if glibc < 2.17. (@jasone) + - Scale leak report summary according to sampling probability. (@jasone) + +* 4.1.1 (May 3, 2016) + + This bugfix release resolves a variety of mostly minor issues, though the + bitmap fix is critical for 64-bit Windows. + + Bug fixes: + - Fix the linear scan version of bitmap_sfu() to shift by the proper amount + even when sizeof(long) is not the same as sizeof(void *), as on 64-bit + Windows. (@jasone) + - Fix hashing functions to avoid unaligned memory accesses (and resulting + crashes). This is relevant at least to some ARM-based platforms. + (@rkmisra) + - Fix fork()-related lock rank ordering reversals. These reversals were + unlikely to cause deadlocks in practice except when heap profiling was + enabled and active. (@jasone) + - Fix various chunk leaks in OOM code paths. (@jasone) + - Fix malloc_stats_print() to print opt.narenas correctly. (@jasone) + - Fix MSVC-specific build/test issues. (@rustyx, @yuslepukhin) + - Fix a variety of test failures that were due to test fragility rather than + core bugs. (@jasone) + +* 4.1.0 (February 28, 2016) + + This release is primarily about optimizations, but it also incorporates a lot + of portability-motivated refactoring and enhancements. Many people worked on + this release, to an extent that even with the omission here of minor changes + (see git revision history), and of the people who reported and diagnosed + issues, so much of the work was contributed that starting with this release, + changes are annotated with author credits to help reflect the collaborative + effort involved. + + New features: + - Implement decay-based unused dirty page purging, a major optimization with + mallctl API impact. This is an alternative to the existing ratio-based + unused dirty page purging, and is intended to eventually become the sole + purging mechanism. New mallctls: + + opt.purge + + opt.decay_time + + arena..decay + + arena..decay_time + + arenas.decay_time + + stats.arenas..decay_time + (@jasone, @cevans87) + - Add --with-malloc-conf, which makes it possible to embed a default + options string during configuration. This was motivated by the desire to + specify --with-malloc-conf=purge:decay , since the default must remain + purge:ratio until the 5.0.0 release. (@jasone) + - Add MS Visual Studio 2015 support. (@rustyx, @yuslepukhin) + - Make *allocx() size class overflow behavior defined. The maximum + size class is now less than PTRDIFF_MAX to protect applications against + numerical overflow, and all allocation functions are guaranteed to indicate + errors rather than potentially crashing if the request size exceeds the + maximum size class. (@jasone) + - jeprof: + + Add raw heap profile support. (@jasone) + + Add --retain and --exclude for backtrace symbol filtering. (@jasone) + + Optimizations: + - Optimize the fast path to combine various bootstrapping and configuration + checks and execute more streamlined code in the common case. (@interwq) + - Use linear scan for small bitmaps (used for small object tracking). In + addition to speeding up bitmap operations on 64-bit systems, this reduces + allocator metadata overhead by approximately 0.2%. (@djwatson) + - Separate arena_avail trees, which substantially speeds up run tree + operations. (@djwatson) + - Use memoization (boot-time-computed table) for run quantization. Separate + arena_avail trees reduced the importance of this optimization. (@jasone) + - Attempt mmap-based in-place huge reallocation. This can dramatically speed + up incremental huge reallocation. (@jasone) + + Incompatible changes: + - Make opt.narenas unsigned rather than size_t. (@jasone) + + Bug fixes: + - Fix stats.cactive accounting regression. (@rustyx, @jasone) + - Handle unaligned keys in hash(). This caused problems for some ARM systems. + (@jasone, @cferris1000) + - Refactor arenas array. In addition to fixing a fork-related deadlock, this + makes arena lookups faster and simpler. (@jasone) + - Move retained memory allocation out of the default chunk allocation + function, to a location that gets executed even if the application installs + a custom chunk allocation function. This resolves a virtual memory leak. + (@buchgr) + - Fix a potential tsd cleanup leak. (@cferris1000, @jasone) + - Fix run quantization. In practice this bug had no impact unless + applications requested memory with alignment exceeding one page. + (@jasone, @djwatson) + - Fix LinuxThreads-specific bootstrapping deadlock. (Cosmin Paraschiv) + - jeprof: + + Don't discard curl options if timeout is not defined. (@djwatson) + + Detect failed profile fetches. (@djwatson) + - Fix stats.arenas..{dss,lg_dirty_mult,decay_time,pactive,pdirty} for + --disable-stats case. (@jasone) + +* 4.0.4 (October 24, 2015) + + This bugfix release fixes another xallocx() regression. No other regressions + have come to light in over a month, so this is likely a good starting point + for people who prefer to wait for "dot one" releases with all the major issues + shaken out. + + Bug fixes: + - Fix xallocx(..., MALLOCX_ZERO to zero the last full trailing page of large + allocations that have been randomly assigned an offset of 0 when + --enable-cache-oblivious configure option is enabled. + +* 4.0.3 (September 24, 2015) + + This bugfix release continues the trend of xallocx() and heap profiling fixes. + + Bug fixes: + - Fix xallocx(..., MALLOCX_ZERO) to zero all trailing bytes of large + allocations when --enable-cache-oblivious configure option is enabled. + - Fix xallocx(..., MALLOCX_ZERO) to zero trailing bytes of huge allocations + when resizing from/to a size class that is not a multiple of the chunk size. + - Fix prof_tctx_dump_iter() to filter out nodes that were created after heap + profile dumping started. + - Work around a potentially bad thread-specific data initialization + interaction with NPTL (glibc's pthreads implementation). + +* 4.0.2 (September 21, 2015) + + This bugfix release addresses a few bugs specific to heap profiling. + + Bug fixes: + - Fix ixallocx_prof_sample() to never modify nor create sampled small + allocations. xallocx() is in general incapable of moving small allocations, + so this fix removes buggy code without loss of generality. + - Fix irallocx_prof_sample() to always allocate large regions, even when + alignment is non-zero. + - Fix prof_alloc_rollback() to read tdata from thread-specific data rather + than dereferencing a potentially invalid tctx. + +* 4.0.1 (September 15, 2015) + + This is a bugfix release that is somewhat high risk due to the amount of + refactoring required to address deep xallocx() problems. As a side effect of + these fixes, xallocx() now tries harder to partially fulfill requests for + optional extra space. Note that a couple of minor heap profiling + optimizations are included, but these are better thought of as performance + fixes that were integral to discovering most of the other bugs. + + Optimizations: + - Avoid a chunk metadata read in arena_prof_tctx_set(), since it is in the + fast path when heap profiling is enabled. Additionally, split a special + case out into arena_prof_tctx_reset(), which also avoids chunk metadata + reads. + - Optimize irallocx_prof() to optimistically update the sampler state. The + prior implementation appears to have been a holdover from when + rallocx()/xallocx() functionality was combined as rallocm(). + + Bug fixes: + - Fix TLS configuration such that it is enabled by default for platforms on + which it works correctly. + - Fix arenas_cache_cleanup() and arena_get_hard() to handle + allocation/deallocation within the application's thread-specific data + cleanup functions even after arenas_cache is torn down. + - Fix xallocx() bugs related to size+extra exceeding HUGE_MAXCLASS. + - Fix chunk purge hook calls for in-place huge shrinking reallocation to + specify the old chunk size rather than the new chunk size. This bug caused + no correctness issues for the default chunk purge function, but was + visible to custom functions set via the "arena..chunk_hooks" mallctl. + - Fix heap profiling bugs: + + Fix heap profiling to distinguish among otherwise identical sample sites + with interposed resets (triggered via the "prof.reset" mallctl). This bug + could cause data structure corruption that would most likely result in a + segfault. + + Fix irealloc_prof() to prof_alloc_rollback() on OOM. + + Make one call to prof_active_get_unlocked() per allocation event, and use + the result throughout the relevant functions that handle an allocation + event. Also add a missing check in prof_realloc(). These fixes protect + allocation events against concurrent prof_active changes. + + Fix ixallocx_prof() to pass usize_max and zero to ixallocx_prof_sample() + in the correct order. + + Fix prof_realloc() to call prof_free_sampled_object() after calling + prof_malloc_sample_object(). Prior to this fix, if tctx and old_tctx were + the same, the tctx could have been prematurely destroyed. + - Fix portability bugs: + + Don't bitshift by negative amounts when encoding/decoding run sizes in + chunk header maps. This affected systems with page sizes greater than 8 + KiB. + + Rename index_t to szind_t to avoid an existing type on Solaris. + + Add JEMALLOC_CXX_THROW to the memalign() function prototype, in order to + match glibc and avoid compilation errors when including both + jemalloc/jemalloc.h and malloc.h in C++ code. + + Don't assume that /bin/sh is appropriate when running size_classes.sh + during configuration. + + Consider __sparcv9 a synonym for __sparc64__ when defining LG_QUANTUM. + + Link tests to librt if it contains clock_gettime(2). + +* 4.0.0 (August 17, 2015) + + This version contains many speed and space optimizations, both minor and + major. The major themes are generalization, unification, and simplification. + Although many of these optimizations cause no visible behavior change, their + cumulative effect is substantial. + + New features: + - Normalize size class spacing to be consistent across the complete size + range. By default there are four size classes per size doubling, but this + is now configurable via the --with-lg-size-class-group option. Also add the + --with-lg-page, --with-lg-page-sizes, --with-lg-quantum, and + --with-lg-tiny-min options, which can be used to tweak page and size class + settings. Impacts: + + Worst case performance for incrementally growing/shrinking reallocation + is improved because there are far fewer size classes, and therefore + copying happens less often. + + Internal fragmentation is limited to 20% for all but the smallest size + classes (those less than four times the quantum). (1B + 4 KiB) + and (1B + 4 MiB) previously suffered nearly 50% internal fragmentation. + + Chunk fragmentation tends to be lower because there are fewer distinct run + sizes to pack. + - Add support for explicit tcaches. The "tcache.create", "tcache.flush", and + "tcache.destroy" mallctls control tcache lifetime and flushing, and the + MALLOCX_TCACHE(tc) and MALLOCX_TCACHE_NONE flags to the *allocx() API + control which tcache is used for each operation. + - Implement per thread heap profiling, as well as the ability to + enable/disable heap profiling on a per thread basis. Add the "prof.reset", + "prof.lg_sample", "thread.prof.name", "thread.prof.active", + "opt.prof_thread_active_init", "prof.thread_active_init", and + "thread.prof.active" mallctls. + - Add support for per arena application-specified chunk allocators, configured + via the "arena..chunk_hooks" mallctl. + - Refactor huge allocation to be managed by arenas, so that arenas now + function as general purpose independent allocators. This is important in + the context of user-specified chunk allocators, aside from the scalability + benefits. Related new statistics: + + The "stats.arenas..huge.allocated", "stats.arenas..huge.nmalloc", + "stats.arenas..huge.ndalloc", and "stats.arenas..huge.nrequests" + mallctls provide high level per arena huge allocation statistics. + + The "arenas.nhchunks", "arenas.hchunk..size", + "stats.arenas..hchunks..nmalloc", + "stats.arenas..hchunks..ndalloc", + "stats.arenas..hchunks..nrequests", and + "stats.arenas..hchunks..curhchunks" mallctls provide per size class + statistics. + - Add the 'util' column to malloc_stats_print() output, which reports the + proportion of available regions that are currently in use for each small + size class. + - Add "alloc" and "free" modes for for junk filling (see the "opt.junk" + mallctl), so that it is possible to separately enable junk filling for + allocation versus deallocation. + - Add the jemalloc-config script, which provides information about how + jemalloc was configured, and how to integrate it into application builds. + - Add metadata statistics, which are accessible via the "stats.metadata", + "stats.arenas..metadata.mapped", and + "stats.arenas..metadata.allocated" mallctls. + - Add the "stats.resident" mallctl, which reports the upper limit of + physically resident memory mapped by the allocator. + - Add per arena control over unused dirty page purging, via the + "arenas.lg_dirty_mult", "arena..lg_dirty_mult", and + "stats.arenas..lg_dirty_mult" mallctls. + - Add the "prof.gdump" mallctl, which makes it possible to toggle the gdump + feature on/off during program execution. + - Add sdallocx(), which implements sized deallocation. The primary + optimization over dallocx() is the removal of a metadata read, which often + suffers an L1 cache miss. + - Add missing header includes in jemalloc/jemalloc.h, so that applications + only have to #include . + - Add support for additional platforms: + + Bitrig + + Cygwin + + DragonFlyBSD + + iOS + + OpenBSD + + OpenRISC/or1k + + Optimizations: + - Maintain dirty runs in per arena LRUs rather than in per arena trees of + dirty-run-containing chunks. In practice this change significantly reduces + dirty page purging volume. + - Integrate whole chunks into the unused dirty page purging machinery. This + reduces the cost of repeated huge allocation/deallocation, because it + effectively introduces a cache of chunks. + - Split the arena chunk map into two separate arrays, in order to increase + cache locality for the frequently accessed bits. + - Move small run metadata out of runs, into arena chunk headers. This reduces + run fragmentation, smaller runs reduce external fragmentation for small size + classes, and packed (less uniformly aligned) metadata layout improves CPU + cache set distribution. + - Randomly distribute large allocation base pointer alignment relative to page + boundaries in order to more uniformly utilize CPU cache sets. This can be + disabled via the --disable-cache-oblivious configure option, and queried via + the "config.cache_oblivious" mallctl. + - Micro-optimize the fast paths for the public API functions. + - Refactor thread-specific data to reside in a single structure. This assures + that only a single TLS read is necessary per call into the public API. + - Implement in-place huge allocation growing and shrinking. + - Refactor rtree (radix tree for chunk lookups) to be lock-free, and make + additional optimizations that reduce maximum lookup depth to one or two + levels. This resolves what was a concurrency bottleneck for per arena huge + allocation, because a global data structure is critical for determining + which arenas own which huge allocations. + + Incompatible changes: + - Replace --enable-cc-silence with --disable-cc-silence to suppress spurious + warnings by default. + - Assure that the constness of malloc_usable_size()'s return type matches that + of the system implementation. + - Change the heap profile dump format to support per thread heap profiling, + rename pprof to jeprof, and enhance it with the --thread= option. As a + result, the bundled jeprof must now be used rather than the upstream + (gperftools) pprof. + - Disable "opt.prof_final" by default, in order to avoid atexit(3), which can + internally deadlock on some platforms. + - Change the "arenas.nlruns" mallctl type from size_t to unsigned. + - Replace the "stats.arenas..bins..allocated" mallctl with + "stats.arenas..bins..curregs". + - Ignore MALLOC_CONF in set{uid,gid,cap} binaries. + - Ignore MALLOCX_ARENA(a) in dallocx(), in favor of using the + MALLOCX_TCACHE(tc) and MALLOCX_TCACHE_NONE flags to control tcache usage. + + Removed features: + - Remove the *allocm() API, which is superseded by the *allocx() API. + - Remove the --enable-dss options, and make dss non-optional on all platforms + which support sbrk(2). + - Remove the "arenas.purge" mallctl, which was obsoleted by the + "arena..purge" mallctl in 3.1.0. + - Remove the unnecessary "opt.valgrind" mallctl; jemalloc automatically + detects whether it is running inside Valgrind. + - Remove the "stats.huge.allocated", "stats.huge.nmalloc", and + "stats.huge.ndalloc" mallctls. + - Remove the --enable-mremap option. + - Remove the "stats.chunks.current", "stats.chunks.total", and + "stats.chunks.high" mallctls. + + Bug fixes: + - Fix the cactive statistic to decrease (rather than increase) when active + memory decreases. This regression was first released in 3.5.0. + - Fix OOM handling in memalign() and valloc(). A variant of this bug existed + in all releases since 2.0.0, which introduced these functions. + - Fix an OOM-related regression in arena_tcache_fill_small(), which could + cause cache corruption on OOM. This regression was present in all releases + from 2.2.0 through 3.6.0. + - Fix size class overflow handling for malloc(), posix_memalign(), memalign(), + calloc(), and realloc() when profiling is enabled. + - Fix the "arena..dss" mallctl to return an error if "primary" or + "secondary" precedence is specified, but sbrk(2) is not supported. + - Fix fallback lg_floor() implementations to handle extremely large inputs. + - Ensure the default purgeable zone is after the default zone on OS X. + - Fix latent bugs in atomic_*(). + - Fix the "arena..dss" mallctl to handle read-only calls. + - Fix tls_model configuration to enable the initial-exec model when possible. + - Mark malloc_conf as a weak symbol so that the application can override it. + - Correctly detect glibc's adaptive pthread mutexes. + - Fix the --without-export configure option. + +* 3.6.0 (March 31, 2014) + + This version contains a critical bug fix for a regression present in 3.5.0 and + 3.5.1. + + Bug fixes: + - Fix a regression in arena_chunk_alloc() that caused crashes during + small/large allocation if chunk allocation failed. In the absence of this + bug, chunk allocation failure would result in allocation failure, e.g. NULL + return from malloc(). This regression was introduced in 3.5.0. + - Fix backtracing for gcc intrinsics-based backtracing by specifying + -fno-omit-frame-pointer to gcc. Note that the application (and all the + libraries it links to) must also be compiled with this option for + backtracing to be reliable. + - Use dss allocation precedence for huge allocations as well as small/large + allocations. + - Fix test assertion failure message formatting. This bug did not manifest on + x86_64 systems because of implementation subtleties in va_list. + - Fix inconsequential test failures for hash and SFMT code. + + New features: + - Support heap profiling on FreeBSD. This feature depends on the proc + filesystem being mounted during heap profile dumping. + +* 3.5.1 (February 25, 2014) + + This version primarily addresses minor bugs in test code. + + Bug fixes: + - Configure Solaris/Illumos to use MADV_FREE. + - Fix junk filling for mremap(2)-based huge reallocation. This is only + relevant if configuring with the --enable-mremap option specified. + - Avoid compilation failure if 'restrict' C99 keyword is not supported by the + compiler. + - Add a configure test for SSE2 rather than assuming it is usable on i686 + systems. This fixes test compilation errors, especially on 32-bit Linux + systems. + - Fix mallctl argument size mismatches (size_t vs. uint64_t) in the stats unit + test. + - Fix/remove flawed alignment-related overflow tests. + - Prevent compiler optimizations that could change backtraces in the + prof_accum unit test. + +* 3.5.0 (January 22, 2014) + + This version focuses on refactoring and automated testing, though it also + includes some non-trivial heap profiling optimizations not mentioned below. + + New features: + - Add the *allocx() API, which is a successor to the experimental *allocm() + API. The *allocx() functions are slightly simpler to use because they have + fewer parameters, they directly return the results of primary interest, and + mallocx()/rallocx() avoid the strict aliasing pitfall that + allocm()/rallocm() share with posix_memalign(). Note that *allocm() is + slated for removal in the next non-bugfix release. + - Add support for LinuxThreads. + + Bug fixes: + - Unless heap profiling is enabled, disable floating point code and don't link + with libm. This, in combination with e.g. EXTRA_CFLAGS=-mno-sse on x64 + systems, makes it possible to completely disable floating point register + use. Some versions of glibc neglect to save/restore caller-saved floating + point registers during dynamic lazy symbol loading, and the symbol loading + code uses whatever malloc the application happens to have linked/loaded + with, the result being potential floating point register corruption. + - Report ENOMEM rather than EINVAL if an OOM occurs during heap profiling + backtrace creation in imemalign(). This bug impacted posix_memalign() and + aligned_alloc(). + - Fix a file descriptor leak in a prof_dump_maps() error path. + - Fix prof_dump() to close the dump file descriptor for all relevant error + paths. + - Fix rallocm() to use the arena specified by the ALLOCM_ARENA(s) flag for + allocation, not just deallocation. + - Fix a data race for large allocation stats counters. + - Fix a potential infinite loop during thread exit. This bug occurred on + Solaris, and could affect other platforms with similar pthreads TSD + implementations. + - Don't junk-fill reallocations unless usable size changes. This fixes a + violation of the *allocx()/*allocm() semantics. + - Fix growing large reallocation to junk fill new space. + - Fix huge deallocation to junk fill when munmap is disabled. + - Change the default private namespace prefix from empty to je_, and change + --with-private-namespace-prefix so that it prepends an additional prefix + rather than replacing je_. This reduces the likelihood of applications + which statically link jemalloc experiencing symbol name collisions. + - Add missing private namespace mangling (relevant when + --with-private-namespace is specified). + - Add and use JEMALLOC_INLINE_C so that static inline functions are marked as + static even for debug builds. + - Add a missing mutex unlock in a malloc_init_hard() error path. In practice + this error path is never executed. + - Fix numerous bugs in malloc_strotumax() error handling/reporting. These + bugs had no impact except for malformed inputs. + - Fix numerous bugs in malloc_snprintf(). These bugs were not exercised by + existing calls, so they had no impact. + +* 3.4.1 (October 20, 2013) + + Bug fixes: + - Fix a race in the "arenas.extend" mallctl that could cause memory corruption + of internal data structures and subsequent crashes. + - Fix Valgrind integration flaws that caused Valgrind warnings about reads of + uninitialized memory in: + + arena chunk headers + + internal zero-initialized data structures (relevant to tcache and prof + code) + - Preserve errno during the first allocation. A readlink(2) call during + initialization fails unless /etc/malloc.conf exists, so errno was typically + set during the first allocation prior to this fix. + - Fix compilation warnings reported by gcc 4.8.1. + +* 3.4.0 (June 2, 2013) + + This version is essentially a small bugfix release, but the addition of + aarch64 support requires that the minor version be incremented. + + Bug fixes: + - Fix race-triggered deadlocks in chunk_record(). These deadlocks were + typically triggered by multiple threads concurrently deallocating huge + objects. + + New features: + - Add support for the aarch64 architecture. + +* 3.3.1 (March 6, 2013) + + This version fixes bugs that are typically encountered only when utilizing + custom run-time options. + + Bug fixes: + - Fix a locking order bug that could cause deadlock during fork if heap + profiling were enabled. + - Fix a chunk recycling bug that could cause the allocator to lose track of + whether a chunk was zeroed. On FreeBSD, NetBSD, and OS X, it could cause + corruption if allocating via sbrk(2) (unlikely unless running with the + "dss:primary" option specified). This was completely harmless on Linux + unless using mlockall(2) (and unlikely even then, unless the + --disable-munmap configure option or the "dss:primary" option was + specified). This regression was introduced in 3.1.0 by the + mlockall(2)/madvise(2) interaction fix. + - Fix TLS-related memory corruption that could occur during thread exit if the + thread never allocated memory. Only the quarantine and prof facilities were + susceptible. + - Fix two quarantine bugs: + + Internal reallocation of the quarantined object array leaked the old + array. + + Reallocation failure for internal reallocation of the quarantined object + array (very unlikely) resulted in memory corruption. + - Fix Valgrind integration to annotate all internally allocated memory in a + way that keeps Valgrind happy about internal data structure access. + - Fix building for s390 systems. * 3.3.0 (January 23, 2013) @@ -19,7 +1388,7 @@ found in the git revision history: Bug fixes: - Fix "arenas.extend" mallctl to output the number of arenas. - - Fix chunk_recycyle() to unconditionally inform Valgrind that returned memory + - Fix chunk_recycle() to unconditionally inform Valgrind that returned memory is undefined. - Fix build break on FreeBSD related to alloca.h. @@ -365,7 +1734,7 @@ found in the git revision history: - Make it possible for the application to manually flush a thread's cache, via the "tcache.flush" mallctl. - Base maximum dirty page count on proportion of active memory. - - Compute various addtional run-time statistics, including per size class + - Compute various additional run-time statistics, including per size class statistics for large objects. - Expose malloc_stats_print(), which can be called repeatedly by the application. diff --git a/INSTALL b/INSTALL deleted file mode 100644 index 6e371ce5..00000000 --- a/INSTALL +++ /dev/null @@ -1,293 +0,0 @@ -Building and installing jemalloc can be as simple as typing the following while -in the root directory of the source tree: - - ./configure - make - make install - -=== Advanced configuration ===================================================== - -The 'configure' script supports numerous options that allow control of which -functionality is enabled, where jemalloc is installed, etc. Optionally, pass -any of the following arguments (not a definitive list) to 'configure': - ---help - Print a definitive list of options. - ---prefix= - Set the base directory in which to install. For example: - - ./configure --prefix=/usr/local - - will cause files to be installed into /usr/local/include, /usr/local/lib, - and /usr/local/man. - ---with-rpath= - Embed one or more library paths, so that libjemalloc can find the libraries - it is linked to. This works only on ELF-based systems. - ---with-mangling= - Mangle public symbols specified in which is a comma-separated list of - name:mangled pairs. - - For example, to use ld's --wrap option as an alternative method for - overriding libc's malloc implementation, specify something like: - - --with-mangling=malloc:__wrap_malloc,free:__wrap_free[...] - - Note that mangling happens prior to application of the prefix specified by - --with-jemalloc-prefix, and mangled symbols are then ignored when applying - the prefix. - ---with-jemalloc-prefix= - Prefix all public APIs with . For example, if is - "prefix_", API changes like the following occur: - - malloc() --> prefix_malloc() - malloc_conf --> prefix_malloc_conf - /etc/malloc.conf --> /etc/prefix_malloc.conf - MALLOC_CONF --> PREFIX_MALLOC_CONF - - This makes it possible to use jemalloc at the same time as the system - allocator, or even to use multiple copies of jemalloc simultaneously. - - By default, the prefix is "", except on OS X, where it is "je_". On OS X, - jemalloc overlays the default malloc zone, but makes no attempt to actually - replace the "malloc", "calloc", etc. symbols. - ---without-export - Don't export public APIs. This can be useful when building jemalloc as a - static library, or to avoid exporting public APIs when using the zone - allocator on OSX. - ---with-private-namespace= - Prefix all library-private APIs with . For shared libraries, - symbol visibility mechanisms prevent these symbols from being exported, but - for static libraries, naming collisions are a real possibility. By - default, the prefix is "" (empty string). - ---with-install-suffix= - Append to the base name of all installed files, such that multiple - versions of jemalloc can coexist in the same installation directory. For - example, libjemalloc.so.0 becomes libjemalloc.so.0. - ---enable-cc-silence - Enable code that silences non-useful compiler warnings. This is helpful - when trying to tell serious warnings from those due to compiler - limitations, but it potentially incurs a performance penalty. - ---enable-debug - Enable assertions and validation code. This incurs a substantial - performance hit, but is very useful during application development. - Implies --enable-ivsalloc. - ---enable-ivsalloc - Enable validation code, which verifies that pointers reside within - jemalloc-owned chunks before dereferencing them. This incurs a substantial - performance hit. - ---disable-stats - Disable statistics gathering functionality. See the "opt.stats_print" - option documentation for usage details. - ---enable-prof - Enable heap profiling and leak detection functionality. See the "opt.prof" - option documentation for usage details. When enabled, there are several - approaches to backtracing, and the configure script chooses the first one - in the following list that appears to function correctly: - - + libunwind (requires --enable-prof-libunwind) - + libgcc (unless --disable-prof-libgcc) - + gcc intrinsics (unless --disable-prof-gcc) - ---enable-prof-libunwind - Use the libunwind library (http://www.nongnu.org/libunwind/) for stack - backtracing. - ---disable-prof-libgcc - Disable the use of libgcc's backtracing functionality. - ---disable-prof-gcc - Disable the use of gcc intrinsics for backtracing. - ---with-static-libunwind= - Statically link against the specified libunwind.a rather than dynamically - linking with -lunwind. - ---disable-tcache - Disable thread-specific caches for small objects. Objects are cached and - released in bulk, thus reducing the total number of mutex operations. See - the "opt.tcache" option for usage details. - ---enable-mremap - Enable huge realloc() via mremap(2). mremap() is disabled by default - because the flavor used is specific to Linux, which has a quirk in its - virtual memory allocation algorithm that causes semi-permanent VM map holes - under normal jemalloc operation. - ---disable-munmap - Disable virtual memory deallocation via munmap(2); instead keep track of - the virtual memory for later use. munmap() is disabled by default (i.e. - --disable-munmap is implied) on Linux, which has a quirk in its virtual - memory allocation algorithm that causes semi-permanent VM map holes under - normal jemalloc operation. - ---enable-dss - Enable support for page allocation/deallocation via sbrk(2), in addition to - mmap(2). - ---disable-fill - Disable support for junk/zero filling of memory, quarantine, and redzones. - See the "opt.junk", "opt.zero", "opt.quarantine", and "opt.redzone" option - documentation for usage details. - ---disable-valgrind - Disable support for Valgrind. - ---disable-experimental - Disable support for the experimental API (*allocm()). - ---disable-zone-allocator - Disable zone allocator for Darwin. This means jemalloc won't be hooked as - the default allocator on OSX/iOS. - ---enable-utrace - Enable utrace(2)-based allocation tracing. This feature is not broadly - portable (FreeBSD has it, but Linux and OS X do not). - ---enable-xmalloc - Enable support for optional immediate termination due to out-of-memory - errors, as is commonly implemented by "xmalloc" wrapper function for malloc. - See the "opt.xmalloc" option documentation for usage details. - ---enable-lazy-lock - Enable code that wraps pthread_create() to detect when an application - switches from single-threaded to multi-threaded mode, so that it can avoid - mutex locking/unlocking operations while in single-threaded mode. In - practice, this feature usually has little impact on performance unless - thread-specific caching is disabled. - ---disable-tls - Disable thread-local storage (TLS), which allows for fast access to - thread-local variables via the __thread keyword. If TLS is available, - jemalloc uses it for several purposes. - ---with-xslroot= - Specify where to find DocBook XSL stylesheets when building the - documentation. - -The following environment variables (not a definitive list) impact configure's -behavior: - -CFLAGS="?" - Pass these flags to the compiler. You probably shouldn't define this unless - you know what you are doing. (Use EXTRA_CFLAGS instead.) - -EXTRA_CFLAGS="?" - Append these flags to CFLAGS. This makes it possible to add flags such as - -Werror, while allowing the configure script to determine what other flags - are appropriate for the specified configuration. - - The configure script specifically checks whether an optimization flag (-O*) - is specified in EXTRA_CFLAGS, and refrains from specifying an optimization - level if it finds that one has already been specified. - -CPPFLAGS="?" - Pass these flags to the C preprocessor. Note that CFLAGS is not passed to - 'cpp' when 'configure' is looking for include files, so you must use - CPPFLAGS instead if you need to help 'configure' find header files. - -LD_LIBRARY_PATH="?" - 'ld' uses this colon-separated list to find libraries. - -LDFLAGS="?" - Pass these flags when linking. - -PATH="?" - 'configure' uses this to find programs. - -=== Advanced compilation ======================================================= - -To build only parts of jemalloc, use the following targets: - - build_lib_shared - build_lib_static - build_lib - build_doc_html - build_doc_man - build_doc - -To install only parts of jemalloc, use the following targets: - - install_bin - install_include - install_lib_shared - install_lib_static - install_lib - install_doc_html - install_doc_man - install_doc - -To clean up build results to varying degrees, use the following make targets: - - clean - distclean - relclean - -=== Advanced installation ====================================================== - -Optionally, define make variables when invoking make, including (not -exclusively): - -INCLUDEDIR="?" - Use this as the installation prefix for header files. - -LIBDIR="?" - Use this as the installation prefix for libraries. - -MANDIR="?" - Use this as the installation prefix for man pages. - -DESTDIR="?" - Prepend DESTDIR to INCLUDEDIR, LIBDIR, DATADIR, and MANDIR. This is useful - when installing to a different path than was specified via --prefix. - -CC="?" - Use this to invoke the C compiler. - -CFLAGS="?" - Pass these flags to the compiler. - -CPPFLAGS="?" - Pass these flags to the C preprocessor. - -LDFLAGS="?" - Pass these flags when linking. - -PATH="?" - Use this to search for programs used during configuration and building. - -=== Development ================================================================ - -If you intend to make non-trivial changes to jemalloc, use the 'autogen.sh' -script rather than 'configure'. This re-generates 'configure', enables -configuration dependency rules, and enables re-generation of automatically -generated source files. - -The build system supports using an object directory separate from the source -tree. For example, you can create an 'obj' directory, and from within that -directory, issue configuration and build commands: - - autoconf - mkdir obj - cd obj - ../configure --enable-autogen - make - -=== Documentation ============================================================== - -The manual page is generated in both html and roff formats. Any web browser -can be used to view the html manual. The roff manual page can be formatted -prior to installation via the following command: - - nroff -man -t doc/jemalloc.3 diff --git a/INSTALL.md b/INSTALL.md new file mode 100644 index 00000000..2333f13d --- /dev/null +++ b/INSTALL.md @@ -0,0 +1,527 @@ +Building and installing a packaged release of jemalloc can be as simple as +typing the following while in the root directory of the source tree: + + ./configure + make + make install + +If building from unpackaged developer sources, the simplest command sequence +that might work is: + + ./autogen.sh + make + make install + +You can uninstall the installed build artifacts like this: + + make uninstall + +Notes: + - "autoconf" needs to be installed + - Documentation is built by the default target only when xsltproc is +available. Build will warn but not stop if the dependency is missing. + + +## Advanced configuration + +The 'configure' script supports numerous options that allow control of which +functionality is enabled, where jemalloc is installed, etc. Optionally, pass +any of the following arguments (not a definitive list) to 'configure': + +* `--help` + + Print a definitive list of options. + +* `--prefix=` + + Set the base directory in which to install. For example: + + ./configure --prefix=/usr/local + + will cause files to be installed into /usr/local/include, /usr/local/lib, + and /usr/local/man. + +* `--with-version=(..--g|VERSION)` + + The VERSION file is mandatory for successful configuration, and the + following steps are taken to assure its presence: + 1) If --with-version=..--g is specified, + generate VERSION using the specified value. + 2) If --with-version is not specified in either form and the source + directory is inside a git repository, try to generate VERSION via 'git + describe' invocations that pattern-match release tags. + 3) If VERSION is missing, generate it with a bogus version: + 0.0.0-0-g0000000000000000000000000000000000000000 + + Note that --with-version=VERSION bypasses (1) and (2), which simplifies + VERSION configuration when embedding a jemalloc release into another + project's git repository. + +* `--with-rpath=` + + Embed one or more library paths, so that libjemalloc can find the libraries + it is linked to. This works only on ELF-based systems. + +* `--with-mangling=` + + Mangle public symbols specified in which is a comma-separated list of + name:mangled pairs. + + For example, to use ld's --wrap option as an alternative method for + overriding libc's malloc implementation, specify something like: + + --with-mangling=malloc:__wrap_malloc,free:__wrap_free[...] + + Note that mangling happens prior to application of the prefix specified by + --with-jemalloc-prefix, and mangled symbols are then ignored when applying + the prefix. + +* `--with-jemalloc-prefix=` + + Prefix all public APIs with . For example, if is + "prefix_", API changes like the following occur: + + malloc() --> prefix_malloc() + malloc_conf --> prefix_malloc_conf + /etc/malloc.conf --> /etc/prefix_malloc.conf + MALLOC_CONF --> PREFIX_MALLOC_CONF + + This makes it possible to use jemalloc at the same time as the system + allocator, or even to use multiple copies of jemalloc simultaneously. + + By default, the prefix is "", except on OS X, where it is "je_". On OS X, + jemalloc overlays the default malloc zone, but makes no attempt to actually + replace the "malloc", "calloc", etc. symbols. + +* `--without-export` + + Don't export public APIs. This can be useful when building jemalloc as a + static library, or to avoid exporting public APIs when using the zone + allocator on OSX. + +* `--with-private-namespace=` + + Prefix all library-private APIs with je_. For shared libraries, + symbol visibility mechanisms prevent these symbols from being exported, but + for static libraries, naming collisions are a real possibility. By + default, is empty, which results in a symbol prefix of je_ . + +* `--with-install-suffix=` + + Append to the base name of all installed files, such that multiple + versions of jemalloc can coexist in the same installation directory. For + example, libjemalloc.so.0 becomes libjemalloc.so.0. + +* `--with-malloc-conf=` + + Embed `` as a run-time options string that is processed prior to + the malloc_conf global variable, the /etc/malloc.conf symlink, and the + MALLOC_CONF environment variable. For example, to change the default decay + time to 30 seconds: + + --with-malloc-conf=decay_ms:30000 + +* `--enable-debug` + + Enable assertions and validation code. This incurs a substantial + performance hit, but is very useful during application development. + +* `--disable-stats` + + Disable statistics gathering functionality. See the "opt.stats_print" + option documentation for usage details. + +* `--enable-prof` + + Enable heap profiling and leak detection functionality. See the "opt.prof" + option documentation for usage details. When enabled, there are several + approaches to backtracing, and the configure script chooses the first one + in the following list that appears to function correctly: + + + libunwind (requires --enable-prof-libunwind) + + frame pointer (requires --enable-prof-frameptr) + + libgcc (unless --disable-prof-libgcc) + + gcc intrinsics (unless --disable-prof-gcc) + +* `--enable-prof-libunwind` + + Use the libunwind library (http://www.nongnu.org/libunwind/) for stack + backtracing. + +* `--enable-prof-frameptr` + + Use the optimized frame pointer unwinder for stack backtracing. Safe + to use in mixed code (with and without frame pointers) - but requires + frame pointers to produce meaningful stacks. Linux only. + +* `--disable-prof-libgcc` + + Disable the use of libgcc's backtracing functionality. + +* `--disable-prof-gcc` + + Disable the use of gcc intrinsics for backtracing. + +* `--with-static-libunwind=` + + Statically link against the specified libunwind.a rather than dynamically + linking with -lunwind. + +* `--disable-fill` + + Disable support for junk/zero filling of memory. See the "opt.junk" and + "opt.zero" option documentation for usage details. + +* `--disable-zone-allocator` + + Disable zone allocator for Darwin. This means jemalloc won't be hooked as + the default allocator on OSX/iOS. + +* `--enable-utrace` + + Enable utrace(2)-based allocation tracing. This feature is not broadly + portable (FreeBSD has it, but Linux and OS X do not). + +* `--enable-xmalloc` + + Enable support for optional immediate termination due to out-of-memory + errors, as is commonly implemented by "xmalloc" wrapper function for malloc. + See the "opt.xmalloc" option documentation for usage details. + +* `--enable-lazy-lock` + + Enable code that wraps pthread_create() to detect when an application + switches from single-threaded to multi-threaded mode, so that it can avoid + mutex locking/unlocking operations while in single-threaded mode. In + practice, this feature usually has little impact on performance unless + thread-specific caching is disabled. + +* `--disable-cache-oblivious` + + Disable cache-oblivious large allocation alignment by default, for large + allocation requests with no alignment constraints. If this feature is + disabled, all large allocations are page-aligned as an implementation + artifact, which can severely harm CPU cache utilization. However, the + cache-oblivious layout comes at the cost of one extra page per large + allocation, which in the most extreme case increases physical memory usage + for the 16 KiB size class to 20 KiB. + +* `--disable-syscall` + + Disable use of syscall(2) rather than {open,read,write,close}(2). This is + intended as a workaround for systems that place security limitations on + syscall(2). + +* `--disable-cxx` + + Disable C++ integration. This will cause new and delete operator + implementations to be omitted. + +* `--with-xslroot=` + + Specify where to find DocBook XSL stylesheets when building the + documentation. + +* `--with-lg-page=` + + Specify the base 2 log of the allocator page size, which must in turn be at + least as large as the system page size. By default the configure script + determines the host's page size and sets the allocator page size equal to + the system page size, so this option need not be specified unless the + system page size may change between configuration and execution, e.g. when + cross compiling. + +* `--with-lg-hugepage=` + + Specify the base 2 log of the system huge page size. This option is useful + when cross compiling, or when overriding the default for systems that do + not explicitly support huge pages. + +* `--with-lg-quantum=` + + Specify the base 2 log of the minimum allocation alignment. jemalloc needs + to know the minimum alignment that meets the following C standard + requirement (quoted from the April 12, 2011 draft of the C11 standard): + + > The pointer returned if the allocation succeeds is suitably aligned so + that it may be assigned to a pointer to any type of object with a + fundamental alignment requirement and then used to access such an object + or an array of such objects in the space allocated [...] + + This setting is architecture-specific, and although jemalloc includes known + safe values for the most commonly used modern architectures, there is a + wrinkle related to GNU libc (glibc) that may impact your choice of + . On most modern architectures, this mandates 16-byte + alignment (=4), but the glibc developers chose not to meet this + requirement for performance reasons. An old discussion can be found at + . Unlike glibc, + jemalloc does follow the C standard by default (caveat: jemalloc + technically cheats for size classes smaller than the quantum), but the fact + that Linux systems already work around this allocator noncompliance means + that it is generally safe in practice to let jemalloc's minimum alignment + follow glibc's lead. If you specify `--with-lg-quantum=3` during + configuration, jemalloc will provide additional size classes that are not + 16-byte-aligned (24, 40, and 56). + +* `--with-lg-vaddr=` + + Specify the number of significant virtual address bits. By default, the + configure script attempts to detect virtual address size on those platforms + where it knows how, and picks a default otherwise. This option may be + useful when cross-compiling. + +* `--disable-initial-exec-tls` + + Disable the initial-exec TLS model for jemalloc's internal thread-local + storage (on those platforms that support explicit settings). This can allow + jemalloc to be dynamically loaded after program startup (e.g. using dlopen). + Note that in this case, there will be two malloc implementations operating + in the same process, which will almost certainly result in confusing runtime + crashes if pointers leak from one implementation to the other. + +* `--disable-libdl` + + Disable the usage of libdl, namely dlsym(3) which is required by the lazy + lock option. This can allow building static binaries. + +The following environment variables (not a definitive list) impact configure's +behavior: + +* `CFLAGS="?"` +* `CXXFLAGS="?"` + + Pass these flags to the C/C++ compiler. Any flags set by the configure + script are prepended, which means explicitly set flags generally take + precedence. Take care when specifying flags such as -Werror, because + configure tests may be affected in undesirable ways. + +* `EXTRA_CFLAGS="?"` +* `EXTRA_CXXFLAGS="?"` + + Append these flags to CFLAGS/CXXFLAGS, without passing them to the + compiler(s) during configuration. This makes it possible to add flags such + as -Werror, while allowing the configure script to determine what other + flags are appropriate for the specified configuration. + +* `CPPFLAGS="?"` + + Pass these flags to the C preprocessor. Note that CFLAGS is not passed to + 'cpp' when 'configure' is looking for include files, so you must use + CPPFLAGS instead if you need to help 'configure' find header files. + +* `LD_LIBRARY_PATH="?"` + + 'ld' uses this colon-separated list to find libraries. + +* `LDFLAGS="?"` + + Pass these flags when linking. + +* `PATH="?"` + + 'configure' uses this to find programs. + +In some cases it may be necessary to work around configuration results that do +not match reality. For example, Linux 3.4 added support for the MADV_DONTDUMP +flag to madvise(2), which can cause problems if building on a host with +MADV_DONTDUMP support and deploying to a target without. To work around this, +use a cache file to override the relevant configuration variable defined in +configure.ac, e.g.: + + echo "je_cv_madv_dontdump=no" > config.cache && ./configure -C + + +## Advanced compilation + +To build only parts of jemalloc, use the following targets: + + build_lib_shared + build_lib_static + build_lib + build_doc_html + build_doc_man + build_doc + +To install only parts of jemalloc, use the following targets: + + install_bin + install_include + install_lib_shared + install_lib_static + install_lib_pc + install_lib + install_doc_html + install_doc_man + install_doc + +To clean up build results to varying degrees, use the following make targets: + + clean + distclean + relclean + + +## Advanced installation + +Optionally, define make variables when invoking make, including (not +exclusively): + +* `INCLUDEDIR="?"` + + Use this as the installation prefix for header files. + +* `LIBDIR="?"` + + Use this as the installation prefix for libraries. + +* `MANDIR="?"` + + Use this as the installation prefix for man pages. + +* `DESTDIR="?"` + + Prepend DESTDIR to INCLUDEDIR, LIBDIR, DATADIR, and MANDIR. This is useful + when installing to a different path than was specified via --prefix. + +* `CC="?"` + + Use this to invoke the C compiler. + +* `CFLAGS="?"` + + Pass these flags to the compiler. + +* `CPPFLAGS="?"` + + Pass these flags to the C preprocessor. + +* `LDFLAGS="?"` + + Pass these flags when linking. + +* `PATH="?"` + + Use this to search for programs used during configuration and building. + +## Building for Windows + +There are at least two ways to build jemalloc's libraries for Windows. They +differ in their ease of use and flexibility. + +### With MSVC solutions +This is the easy, but less flexible approach. It doesn't let you specify +arguments to the `configure` script. + +1. Install Cygwin with at least the following packages: + * autoconf + * autogen + * gawk + * grep + * sed + +2. Install Visual Studio 2015 or 2017 with Visual C++ + +3. Add Cygwin\bin to the PATH environment variable + +4. Open "x64 Native Tools Command Prompt for VS 2017" + (note: x86/x64 doesn't matter at this point) + +5. Generate header files: + sh -c "CC=cl ./autogen.sh" + +6. Now the project can be opened and built in Visual Studio: + msvc\jemalloc_vc2017.sln + +### With MSYS +This is a more involved approach that offers the same configuration flexibility +as Linux builds. We use it for our CI workflow to test different jemalloc +configurations on Windows. + +1. Install the prerequisites + 1. MSYS2 + 2. Chocolatey + 3. Visual Studio if you want to compile with MSVC compiler + +2. Run your bash emulation. It could be MSYS2 or Git Bash (this manual was + tested on both) +3. Manually and selectively follow + [before_install.sh](https://github.com/jemalloc/jemalloc/blob/dev/scripts/windows/before_install.sh) + script. + 1. Skip the `TRAVIS_OS_NAME` check, `rm -rf C:/tools/msys64` and `choco + uninstall/upgrade` part. + 2. If using `msys2` shell, add path to `RefreshEnv.cmd` to `PATH`: + `PATH="$PATH:/c/ProgramData/chocolatey/bin"` + 3. Assign `msys_shell_cmd`, `msys2`, `mingw32` and `mingw64` as in the + script. + 4. Pick `CROSS_COMPILE_32BIT` , `CC` and `USE_MSVC` values depending on + your needs. For instance, if you'd like to build for x86_64 Windows + with `gcc`, then `CROSS_COMPILE_32BIT="no"`, `CC="gcc"` and + `USE_MSVC=""`. If you'd like to build for x86 Windows with `cl.exe`, + then `CROSS_COMPILE_32BIT="yes"`, `CC="cl.exe"`, `USE_MSVC="x86"`. + For x86_64 builds with `cl.exe`, assign `USE_MSVC="amd64"` and + `CROSS_COMPILE_32BIT="no"`. + 5. Replace the path to `vcvarsall.bat` with the path on your system. For + instance, on my Windows PC with Visual Studio 17, the path is + `C:\Program Files (x86)\Microsoft Visual + Studio\2017\BuildTools\VC\Auxiliary\Build\vcvarsall.bat`. + 6. Execute the rest of the script. It will install the required + dependencies and assign the variable `build_env`, which is a function + that executes following commands with the correct environment + variables set. +4. Use `$build_env ` as you would in a Linux shell: + 1. `$build_env autoconf` + 2. `$build_env ./configure CC="" ` + 3. `$build_env mingw32-make` + +If you're having any issues with the above, ensure the following: + +5. When you run `cmd //C RefreshEnv.cmd`, you get an output line starting with + `Refreshing` . If it errors saying `RefreshEnv.cmd` is not found, then you + need to add it to your `PATH` as described above in item 3.2 + +6. When you run `cmd //C $vcvarsall`, it prints a bunch of environment + variables. Otherwise, check the path to the `vcvarsall.bat` in `$vcvarsall` + script and fix it. + +### Building from vcpkg + +The jemalloc port in vcpkg is kept up to date by Microsoft team members and +community contributors. The url of vcpkg is: https://github.com/Microsoft/vcpkg +. You can download and install jemalloc using the vcpkg dependency manager: + +```shell +git clone https://github.com/Microsoft/vcpkg.git +cd vcpkg +./bootstrap-vcpkg.sh # ./bootstrap-vcpkg.bat for Windows +./vcpkg integrate install +./vcpkg install jemalloc +``` + +If the version is out of date, please [create an issue or pull +request](https://github.com/Microsoft/vcpkg) on the vcpkg repository. + +## Development + +If you intend to make non-trivial changes to jemalloc, use the 'autogen.sh' +script rather than 'configure'. This re-generates 'configure', enables +configuration dependency rules, and enables re-generation of automatically +generated source files. + +The build system supports using an object directory separate from the source +tree. For example, you can create an 'obj' directory, and from within that +directory, issue configuration and build commands: + + autoconf + mkdir obj + cd obj + ../configure --enable-autogen + make + + +## Documentation + +The manual page is generated in both html and roff formats. Any web browser +can be used to view the html manual. The roff manual page can be formatted +prior to installation via the following command: + + nroff -man -t doc/jemalloc.3 diff --git a/Makefile.in b/Makefile.in index 74810472..435fc34d 100644 --- a/Makefile.in +++ b/Makefile.in @@ -9,6 +9,7 @@ vpath % . SHELL := /bin/sh CC := @CC@ +CXX := @CXX@ # Configuration parameters. DESTDIR = @@ -23,8 +24,15 @@ abs_srcroot := @abs_srcroot@ abs_objroot := @abs_objroot@ # Build parameters. -CPPFLAGS := @CPPFLAGS@ -I$(srcroot)include -I$(objroot)include -CFLAGS := @CFLAGS@ +CPPFLAGS := @CPPFLAGS@ -I$(objroot)include -I$(srcroot)include +CONFIGURE_CFLAGS := @CONFIGURE_CFLAGS@ +SPECIFIED_CFLAGS := @SPECIFIED_CFLAGS@ +EXTRA_CFLAGS := @EXTRA_CFLAGS@ +CFLAGS := $(strip $(CONFIGURE_CFLAGS) $(SPECIFIED_CFLAGS) $(EXTRA_CFLAGS)) +CONFIGURE_CXXFLAGS := @CONFIGURE_CXXFLAGS@ +SPECIFIED_CXXFLAGS := @SPECIFIED_CXXFLAGS@ +EXTRA_CXXFLAGS := @EXTRA_CXXFLAGS@ +CXXFLAGS := $(strip $(CONFIGURE_CXXFLAGS) $(SPECIFIED_CXXFLAGS) $(EXTRA_CXXFLAGS)) LDFLAGS := @LDFLAGS@ EXTRA_LDFLAGS := @EXTRA_LDFLAGS@ LIBS := @LIBS@ @@ -39,23 +47,37 @@ REV := @rev@ install_suffix := @install_suffix@ ABI := @abi@ XSLTPROC := @XSLTPROC@ +XSLROOT := @XSLROOT@ AUTOCONF := @AUTOCONF@ _RPATH = @RPATH@ RPATH = $(if $(1),$(call _RPATH,$(1))) -cfghdrs_in := @cfghdrs_in@ +cfghdrs_in := $(addprefix $(srcroot),@cfghdrs_in@) cfghdrs_out := @cfghdrs_out@ -cfgoutputs_in := @cfgoutputs_in@ +cfgoutputs_in := $(addprefix $(srcroot),@cfgoutputs_in@) cfgoutputs_out := @cfgoutputs_out@ enable_autogen := @enable_autogen@ -enable_experimental := @enable_experimental@ +enable_doc := @enable_doc@ +enable_shared := @enable_shared@ +enable_static := @enable_static@ +enable_prof := @enable_prof@ enable_zone_allocator := @enable_zone_allocator@ +enable_experimental_smallocx := @enable_experimental_smallocx@ +MALLOC_CONF := @JEMALLOC_CPREFIX@MALLOC_CONF +link_whole_archive := @link_whole_archive@ DSO_LDFLAGS = @DSO_LDFLAGS@ SOREV = @SOREV@ PIC_CFLAGS = @PIC_CFLAGS@ CTARGET = @CTARGET@ LDTARGET = @LDTARGET@ +TEST_LD_MODE = @TEST_LD_MODE@ MKLIB = @MKLIB@ +AR = @AR@ +ARFLAGS = @ARFLAGS@ +DUMP_SYMS = @DUMP_SYMS@ +AWK := @AWK@ CC_MM = @CC_MM@ +LM := @LM@ +INSTALL = @INSTALL@ ifeq (macho, $(ABI)) TEST_LIBRARY_PATH := DYLD_FALLBACK_LIBRARY_PATH="$(objroot)lib" @@ -70,19 +92,77 @@ endif LIBJEMALLOC := $(LIBPREFIX)jemalloc$(install_suffix) # Lists of files. -BINS := $(srcroot)bin/pprof $(objroot)bin/jemalloc.sh -CHDRS := $(objroot)include/jemalloc/jemalloc$(install_suffix).h \ - $(objroot)include/jemalloc/jemalloc_defs$(install_suffix).h -CSRCS := $(srcroot)src/jemalloc.c $(srcroot)src/arena.c $(srcroot)src/atomic.c \ - $(srcroot)src/base.c $(srcroot)src/bitmap.c $(srcroot)src/chunk.c \ - $(srcroot)src/chunk_dss.c $(srcroot)src/chunk_mmap.c \ - $(srcroot)src/ckh.c $(srcroot)src/ctl.c $(srcroot)src/extent.c \ - $(srcroot)src/hash.c $(srcroot)src/huge.c $(srcroot)src/mb.c \ - $(srcroot)src/mutex.c $(srcroot)src/prof.c $(srcroot)src/quarantine.c \ - $(srcroot)src/rtree.c $(srcroot)src/stats.c $(srcroot)src/tcache.c \ - $(srcroot)src/util.c $(srcroot)src/tsd.c +BINS := $(objroot)bin/jemalloc-config $(objroot)bin/jemalloc.sh $(objroot)bin/jeprof +C_HDRS := $(objroot)include/jemalloc/jemalloc$(install_suffix).h +C_SRCS := $(srcroot)src/jemalloc.c \ + $(srcroot)src/arena.c \ + $(srcroot)src/background_thread.c \ + $(srcroot)src/base.c \ + $(srcroot)src/bin.c \ + $(srcroot)src/bin_info.c \ + $(srcroot)src/bitmap.c \ + $(srcroot)src/buf_writer.c \ + $(srcroot)src/cache_bin.c \ + $(srcroot)src/ckh.c \ + $(srcroot)src/counter.c \ + $(srcroot)src/ctl.c \ + $(srcroot)src/decay.c \ + $(srcroot)src/div.c \ + $(srcroot)src/ecache.c \ + $(srcroot)src/edata.c \ + $(srcroot)src/edata_cache.c \ + $(srcroot)src/ehooks.c \ + $(srcroot)src/emap.c \ + $(srcroot)src/eset.c \ + $(srcroot)src/exp_grow.c \ + $(srcroot)src/extent.c \ + $(srcroot)src/extent_dss.c \ + $(srcroot)src/extent_mmap.c \ + $(srcroot)src/fxp.c \ + $(srcroot)src/san.c \ + $(srcroot)src/san_bump.c \ + $(srcroot)src/hook.c \ + $(srcroot)src/hpa.c \ + $(srcroot)src/hpa_central.c \ + $(srcroot)src/hpa_hooks.c \ + $(srcroot)src/hpa_utils.c \ + $(srcroot)src/hpdata.c \ + $(srcroot)src/inspect.c \ + $(srcroot)src/large.c \ + $(srcroot)src/log.c \ + $(srcroot)src/malloc_io.c \ + $(srcroot)src/conf.c \ + $(srcroot)src/mutex.c \ + $(srcroot)src/nstime.c \ + $(srcroot)src/pa.c \ + $(srcroot)src/pa_extra.c \ + $(srcroot)src/pac.c \ + $(srcroot)src/pages.c \ + $(srcroot)src/peak_event.c \ + $(srcroot)src/prof.c \ + $(srcroot)src/prof_data.c \ + $(srcroot)src/prof_log.c \ + $(srcroot)src/prof_recent.c \ + $(srcroot)src/prof_stack_range.c \ + $(srcroot)src/prof_stats.c \ + $(srcroot)src/prof_sys.c \ + $(srcroot)src/psset.c \ + $(srcroot)src/rtree.c \ + $(srcroot)src/safety_check.c \ + $(srcroot)src/sc.c \ + $(srcroot)src/sec.c \ + $(srcroot)src/stats.c \ + $(srcroot)src/sz.c \ + $(srcroot)src/tcache.c \ + $(srcroot)src/test_hooks.c \ + $(srcroot)src/thread_event.c \ + $(srcroot)src/thread_event_registry.c \ + $(srcroot)src/ticker.c \ + $(srcroot)src/tsd.c \ + $(srcroot)src/util.c \ + $(srcroot)src/witness.c ifeq ($(enable_zone_allocator), 1) -CSRCS += $(srcroot)src/zone.c +C_SRCS += $(srcroot)src/zone.c endif ifeq ($(IMPORTLIB),$(SO)) STATIC_LIBS := $(objroot)lib/$(LIBJEMALLOC).$(A) @@ -96,40 +176,261 @@ DSOS := $(objroot)lib/$(LIBJEMALLOC).$(SOREV) ifneq ($(SOREV),$(SO)) DSOS += $(objroot)lib/$(LIBJEMALLOC).$(SO) endif -MAN3 := $(objroot)doc/jemalloc$(install_suffix).3 +ifeq (1, $(link_whole_archive)) +LJEMALLOC := -Wl,--whole-archive -L$(objroot)lib -l$(LIBJEMALLOC) -Wl,--no-whole-archive +else +LJEMALLOC := $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) +endif +PC := $(objroot)jemalloc.pc DOCS_XML := $(objroot)doc/jemalloc$(install_suffix).xml -DOCS_HTML := $(DOCS_XML:$(objroot)%.xml=$(srcroot)%.html) -DOCS_MAN3 := $(DOCS_XML:$(objroot)%.xml=$(srcroot)%.3) +DOCS_HTML := $(DOCS_XML:$(objroot)%.xml=$(objroot)%.html) +DOCS_MAN3 := $(DOCS_XML:$(objroot)%.xml=$(objroot)%.3) DOCS := $(DOCS_HTML) $(DOCS_MAN3) -CTESTS := $(srcroot)test/aligned_alloc.c $(srcroot)test/allocated.c \ - $(srcroot)test/ALLOCM_ARENA.c $(srcroot)test/bitmap.c \ - $(srcroot)test/mremap.c $(srcroot)test/posix_memalign.c \ - $(srcroot)test/thread_arena.c $(srcroot)test/thread_tcache_enabled.c -ifeq ($(enable_experimental), 1) -CTESTS += $(srcroot)test/allocm.c $(srcroot)test/rallocm.c +C_TESTLIB_SRCS := $(srcroot)test/src/btalloc.c $(srcroot)test/src/btalloc_0.c \ + $(srcroot)test/src/btalloc_1.c $(srcroot)test/src/math.c \ + $(srcroot)test/src/mtx.c $(srcroot)test/src/sleep.c \ + $(srcroot)test/src/SFMT.c $(srcroot)test/src/test.c \ + $(srcroot)test/src/thd.c $(srcroot)test/src/timer.c +ifeq (1, $(link_whole_archive)) +C_UTIL_INTEGRATION_SRCS := +C_UTIL_CPP_SRCS := +else +C_UTIL_INTEGRATION_SRCS := $(srcroot)src/nstime.c $(srcroot)src/malloc_io.c \ + $(srcroot)src/ticker.c +C_UTIL_CPP_SRCS := $(srcroot)src/nstime.c $(srcroot)src/malloc_io.c +endif +TESTS_UNIT := \ + $(srcroot)test/unit/a0.c \ + $(srcroot)test/unit/arena_decay.c \ + $(srcroot)test/unit/arena_reset.c \ + $(srcroot)test/unit/atomic.c \ + $(srcroot)test/unit/background_thread.c \ + $(srcroot)test/unit/background_thread_enable.c \ + $(srcroot)test/unit/background_thread_init.c \ + $(srcroot)test/unit/base.c \ + $(srcroot)test/unit/batch_alloc.c \ + $(srcroot)test/unit/bin.c \ + $(srcroot)test/unit/binshard.c \ + $(srcroot)test/unit/bitmap.c \ + $(srcroot)test/unit/bit_util.c \ + $(srcroot)test/unit/buf_writer.c \ + $(srcroot)test/unit/cache_bin.c \ + $(srcroot)test/unit/ckh.c \ + $(srcroot)test/unit/conf.c \ + $(srcroot)test/unit/conf_init_0.c \ + $(srcroot)test/unit/conf_init_1.c \ + $(srcroot)test/unit/conf_init_confirm.c \ + $(srcroot)test/unit/conf_parse.c \ + $(srcroot)test/unit/counter.c \ + $(srcroot)test/unit/decay.c \ + $(srcroot)test/unit/div.c \ + $(srcroot)test/unit/double_free.c \ + $(srcroot)test/unit/edata_cache.c \ + $(srcroot)test/unit/emitter.c \ + $(srcroot)test/unit/extent_quantize.c \ + ${srcroot}test/unit/fb.c \ + $(srcroot)test/unit/fork.c \ + ${srcroot}test/unit/fxp.c \ + ${srcroot}test/unit/san.c \ + ${srcroot}test/unit/san_bump.c \ + $(srcroot)test/unit/hash.c \ + $(srcroot)test/unit/hook.c \ + $(srcroot)test/unit/hpa.c \ + $(srcroot)test/unit/hpa_sec_integration.c \ + $(srcroot)test/unit/hpa_thp_always.c \ + $(srcroot)test/unit/hpa_vectorized_madvise.c \ + $(srcroot)test/unit/hpa_vectorized_madvise_large_batch.c \ + $(srcroot)test/unit/hpa_background_thread.c \ + $(srcroot)test/unit/hpdata.c \ + $(srcroot)test/unit/huge.c \ + $(srcroot)test/unit/inspect.c \ + $(srcroot)test/unit/junk.c \ + $(srcroot)test/unit/junk_alloc.c \ + $(srcroot)test/unit/junk_free.c \ + $(srcroot)test/unit/json_stats.c \ + $(srcroot)test/unit/large_ralloc.c \ + $(srcroot)test/unit/log.c \ + $(srcroot)test/unit/mallctl.c \ + $(srcroot)test/unit/malloc_conf_2.c \ + $(srcroot)test/unit/malloc_io.c \ + $(srcroot)test/unit/math.c \ + $(srcroot)test/unit/mpsc_queue.c \ + $(srcroot)test/unit/mq.c \ + $(srcroot)test/unit/mtx.c \ + $(srcroot)test/unit/nstime.c \ + $(srcroot)test/unit/ncached_max.c \ + $(srcroot)test/unit/oversize_threshold.c \ + $(srcroot)test/unit/pa.c \ + $(srcroot)test/unit/pack.c \ + $(srcroot)test/unit/pages.c \ + $(srcroot)test/unit/peak.c \ + $(srcroot)test/unit/ph.c \ + $(srcroot)test/unit/prng.c \ + $(srcroot)test/unit/prof_accum.c \ + $(srcroot)test/unit/prof_active.c \ + $(srcroot)test/unit/prof_gdump.c \ + $(srcroot)test/unit/prof_hook.c \ + $(srcroot)test/unit/prof_idump.c \ + $(srcroot)test/unit/prof_log.c \ + $(srcroot)test/unit/prof_mdump.c \ + $(srcroot)test/unit/prof_recent.c \ + $(srcroot)test/unit/prof_reset.c \ + $(srcroot)test/unit/prof_small.c \ + $(srcroot)test/unit/prof_stats.c \ + $(srcroot)test/unit/prof_tctx.c \ + $(srcroot)test/unit/prof_thread_name.c \ + $(srcroot)test/unit/prof_sys_thread_name.c \ + $(srcroot)test/unit/psset.c \ + $(srcroot)test/unit/ql.c \ + $(srcroot)test/unit/qr.c \ + $(srcroot)test/unit/rb.c \ + $(srcroot)test/unit/retained.c \ + $(srcroot)test/unit/rtree.c \ + $(srcroot)test/unit/safety_check.c \ + $(srcroot)test/unit/sc.c \ + $(srcroot)test/unit/sec.c \ + $(srcroot)test/unit/seq.c \ + $(srcroot)test/unit/SFMT.c \ + $(srcroot)test/unit/size_check.c \ + $(srcroot)test/unit/size_classes.c \ + $(srcroot)test/unit/slab.c \ + $(srcroot)test/unit/smoothstep.c \ + $(srcroot)test/unit/spin.c \ + $(srcroot)test/unit/stats.c \ + $(srcroot)test/unit/stats_print.c \ + $(srcroot)test/unit/sz.c \ + $(srcroot)test/unit/tcache_init.c \ + $(srcroot)test/unit/tcache_max.c \ + $(srcroot)test/unit/test_hooks.c \ + $(srcroot)test/unit/thread_event.c \ + $(srcroot)test/unit/ticker.c \ + $(srcroot)test/unit/tsd.c \ + $(srcroot)test/unit/uaf.c \ + $(srcroot)test/unit/witness.c \ + $(srcroot)test/unit/zero.c \ + $(srcroot)test/unit/zero_realloc_abort.c \ + $(srcroot)test/unit/zero_realloc_free.c \ + $(srcroot)test/unit/zero_realloc_alloc.c \ + $(srcroot)test/unit/zero_reallocs.c +ifeq (@enable_prof@, 1) +TESTS_UNIT += \ + $(srcroot)test/unit/arena_reset_prof.c \ + $(srcroot)test/unit/batch_alloc_prof.c +endif +TESTS_INTEGRATION := $(srcroot)test/integration/aligned_alloc.c \ + $(srcroot)test/integration/allocated.c \ + $(srcroot)test/integration/extent.c \ + $(srcroot)test/integration/malloc.c \ + $(srcroot)test/integration/mallocx.c \ + $(srcroot)test/integration/MALLOCX_ARENA.c \ + $(srcroot)test/integration/overflow.c \ + $(srcroot)test/integration/posix_memalign.c \ + $(srcroot)test/integration/rallocx.c \ + $(srcroot)test/integration/sdallocx.c \ + $(srcroot)test/integration/slab_sizes.c \ + $(srcroot)test/integration/thread_arena.c \ + $(srcroot)test/integration/thread_tcache_enabled.c \ + $(srcroot)test/integration/xallocx.c +ifeq (@enable_experimental_smallocx@, 1) +TESTS_INTEGRATION += \ + $(srcroot)test/integration/smallocx.c +endif +ifeq (@enable_cxx@, 1) +CPP_SRCS := $(srcroot)src/jemalloc_cpp.cpp +TESTS_INTEGRATION_CPP := $(srcroot)test/integration/cpp/basic.cpp \ + $(srcroot)test/integration/cpp/infallible_new_true.cpp \ + $(srcroot)test/integration/cpp/infallible_new_false.cpp +else +CPP_SRCS := +TESTS_INTEGRATION_CPP := +endif +TESTS_ANALYZE := $(srcroot)test/analyze/prof_bias.c \ + $(srcroot)test/analyze/rand.c \ + $(srcroot)test/analyze/sizes.c +TESTS_STRESS := $(srcroot)test/stress/batch_alloc.c \ + $(srcroot)test/stress/fill_flush.c \ + $(srcroot)test/stress/hookbench.c \ + $(srcroot)test/stress/large_microbench.c \ + $(srcroot)test/stress/mallctl.c \ + $(srcroot)test/stress/microbench.c +ifeq (@enable_cxx@, 1) +TESTS_STRESS_CPP := $(srcroot)test/stress/cpp/microbench.cpp +else +TESTS_STRESS_CPP := endif -COBJS := $(CSRCS:$(srcroot)%.c=$(objroot)%.$(O)) -CPICOBJS := $(CSRCS:$(srcroot)%.c=$(objroot)%.pic.$(O)) -CTESTOBJS := $(CTESTS:$(srcroot)%.c=$(objroot)%.$(O)) + +TESTS := $(TESTS_UNIT) $(TESTS_INTEGRATION) $(TESTS_INTEGRATION_CPP) \ + $(TESTS_ANALYZE) $(TESTS_STRESS) $(TESTS_STRESS_CPP) + +PRIVATE_NAMESPACE_HDRS := $(objroot)include/jemalloc/internal/private_namespace.h $(objroot)include/jemalloc/internal/private_namespace_jet.h +PRIVATE_NAMESPACE_GEN_HDRS := $(PRIVATE_NAMESPACE_HDRS:%.h=%.gen.h) +C_SYM_OBJS := $(C_SRCS:$(srcroot)%.c=$(objroot)%.sym.$(O)) +C_SYMS := $(C_SRCS:$(srcroot)%.c=$(objroot)%.sym) +C_OBJS := $(C_SRCS:$(srcroot)%.c=$(objroot)%.$(O)) +CPP_OBJS := $(CPP_SRCS:$(srcroot)%.cpp=$(objroot)%.$(O)) +C_PIC_OBJS := $(C_SRCS:$(srcroot)%.c=$(objroot)%.pic.$(O)) +CPP_PIC_OBJS := $(CPP_SRCS:$(srcroot)%.cpp=$(objroot)%.pic.$(O)) +C_JET_SYM_OBJS := $(C_SRCS:$(srcroot)%.c=$(objroot)%.jet.sym.$(O)) +C_JET_SYMS := $(C_SRCS:$(srcroot)%.c=$(objroot)%.jet.sym) +C_JET_OBJS := $(C_SRCS:$(srcroot)%.c=$(objroot)%.jet.$(O)) +C_TESTLIB_UNIT_OBJS := $(C_TESTLIB_SRCS:$(srcroot)%.c=$(objroot)%.unit.$(O)) +C_TESTLIB_INTEGRATION_OBJS := $(C_TESTLIB_SRCS:$(srcroot)%.c=$(objroot)%.integration.$(O)) +C_UTIL_INTEGRATION_OBJS := $(C_UTIL_INTEGRATION_SRCS:$(srcroot)%.c=$(objroot)%.integration.$(O)) +C_TESTLIB_ANALYZE_OBJS := $(C_TESTLIB_SRCS:$(srcroot)%.c=$(objroot)%.analyze.$(O)) +C_TESTLIB_STRESS_OBJS := $(C_TESTLIB_SRCS:$(srcroot)%.c=$(objroot)%.stress.$(O)) +C_TESTLIB_OBJS := $(C_TESTLIB_UNIT_OBJS) $(C_TESTLIB_INTEGRATION_OBJS) \ + $(C_UTIL_INTEGRATION_OBJS) $(C_TESTLIB_ANALYZE_OBJS) \ + $(C_TESTLIB_STRESS_OBJS) + +TESTS_UNIT_OBJS := $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%.$(O)) +TESTS_INTEGRATION_OBJS := $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%.$(O)) +TESTS_INTEGRATION_CPP_OBJS := $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%.$(O)) +TESTS_ANALYZE_OBJS := $(TESTS_ANALYZE:$(srcroot)%.c=$(objroot)%.$(O)) +TESTS_STRESS_OBJS := $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%.$(O)) +TESTS_STRESS_CPP_OBJS := $(TESTS_STRESS_CPP:$(srcroot)%.cpp=$(objroot)%.$(O)) +TESTS_OBJS := $(TESTS_UNIT_OBJS) $(TESTS_INTEGRATION_OBJS) $(TESTS_ANALYZE_OBJS) \ + $(TESTS_STRESS_OBJS) +TESTS_CPP_OBJS := $(TESTS_INTEGRATION_CPP_OBJS) $(TESTS_STRESS_CPP_OBJS) .PHONY: all dist build_doc_html build_doc_man build_doc .PHONY: install_bin install_include install_lib .PHONY: install_doc_html install_doc_man install_doc install .PHONY: tests check clean distclean relclean -.SECONDARY : $(CTESTOBJS) +.SECONDARY : $(PRIVATE_NAMESPACE_GEN_HDRS) $(TESTS_OBJS) $(TESTS_CPP_OBJS) # Default target. -all: build +all: build_lib dist: build_doc -$(srcroot)doc/%.html : $(objroot)doc/%.xml $(srcroot)doc/stylesheet.xsl $(objroot)doc/html.xsl +$(objroot)doc/%$(install_suffix).html : $(objroot)doc/%.xml $(srcroot)doc/stylesheet.xsl $(objroot)doc/html.xsl +ifneq ($(XSLROOT),) $(XSLTPROC) -o $@ $(objroot)doc/html.xsl $< +else +ifeq ($(wildcard $(DOCS_HTML)),) + @echo "

Missing xsltproc. Doc not built.

" > $@ +endif + @echo "Missing xsltproc. "$@" not (re)built." +endif -$(srcroot)doc/%.3 : $(objroot)doc/%.xml $(srcroot)doc/stylesheet.xsl $(objroot)doc/manpages.xsl +$(objroot)doc/%$(install_suffix).3 : $(objroot)doc/%.xml $(srcroot)doc/stylesheet.xsl $(objroot)doc/manpages.xsl +ifneq ($(XSLROOT),) $(XSLTPROC) -o $@ $(objroot)doc/manpages.xsl $< +# The -o option (output filename) of xsltproc may not work (it uses the +# in the .xml file). Manually add the suffix if so. + ifneq ($(install_suffix),) + @if [ -f $(objroot)doc/jemalloc.3 ]; then \ + mv $(objroot)doc/jemalloc.3 $(objroot)doc/jemalloc$(install_suffix).3 ; \ + fi + endif +else +ifeq ($(wildcard $(DOCS_MAN3)),) + @echo "Missing xsltproc. Doc not built." > $@ +endif + @echo "Missing xsltproc. "$@" not (re)built." +endif build_doc_html: $(DOCS_HTML) build_doc_man: $(DOCS_MAN3) @@ -139,155 +440,356 @@ build_doc: $(DOCS) # Include generated dependency files. # ifdef CC_MM --include $(COBJS:%.$(O)=%.d) --include $(CPICOBJS:%.$(O)=%.d) --include $(CTESTOBJS:%.$(O)=%.d) +-include $(C_SYM_OBJS:%.$(O)=%.d) +-include $(C_OBJS:%.$(O)=%.d) +-include $(CPP_OBJS:%.$(O)=%.d) +-include $(C_PIC_OBJS:%.$(O)=%.d) +-include $(CPP_PIC_OBJS:%.$(O)=%.d) +-include $(C_JET_SYM_OBJS:%.$(O)=%.d) +-include $(C_JET_OBJS:%.$(O)=%.d) +-include $(C_TESTLIB_OBJS:%.$(O)=%.d) +-include $(TESTS_OBJS:%.$(O)=%.d) +-include $(TESTS_CPP_OBJS:%.$(O)=%.d) endif -$(COBJS): $(objroot)src/%.$(O): $(srcroot)src/%.c -$(CPICOBJS): $(objroot)src/%.pic.$(O): $(srcroot)src/%.c -$(CPICOBJS): CFLAGS += $(PIC_CFLAGS) -$(CTESTOBJS): $(objroot)test/%.$(O): $(srcroot)test/%.c -$(CTESTOBJS): CPPFLAGS += -I$(objroot)test +$(C_SYM_OBJS): $(objroot)src/%.sym.$(O): $(srcroot)src/%.c +$(C_SYM_OBJS): CPPFLAGS += -DJEMALLOC_NO_PRIVATE_NAMESPACE +$(C_SYMS): $(objroot)src/%.sym: $(objroot)src/%.sym.$(O) +$(C_OBJS): $(objroot)src/%.$(O): $(srcroot)src/%.c +$(CPP_OBJS): $(objroot)src/%.$(O): $(srcroot)src/%.cpp +$(C_PIC_OBJS): $(objroot)src/%.pic.$(O): $(srcroot)src/%.c +$(C_PIC_OBJS): CFLAGS += $(PIC_CFLAGS) +$(CPP_PIC_OBJS): $(objroot)src/%.pic.$(O): $(srcroot)src/%.cpp +$(CPP_PIC_OBJS): CXXFLAGS += $(PIC_CFLAGS) +$(C_JET_SYM_OBJS): $(objroot)src/%.jet.sym.$(O): $(srcroot)src/%.c +$(C_JET_SYM_OBJS): CPPFLAGS += -DJEMALLOC_JET -DJEMALLOC_NO_PRIVATE_NAMESPACE +$(C_JET_SYMS): $(objroot)src/%.jet.sym: $(objroot)src/%.jet.sym.$(O) +$(C_JET_OBJS): $(objroot)src/%.jet.$(O): $(srcroot)src/%.c +$(C_JET_OBJS): CPPFLAGS += -DJEMALLOC_JET +$(C_TESTLIB_UNIT_OBJS): $(objroot)test/src/%.unit.$(O): $(srcroot)test/src/%.c +$(C_TESTLIB_UNIT_OBJS): CPPFLAGS += -DJEMALLOC_UNIT_TEST +$(C_TESTLIB_INTEGRATION_OBJS): $(objroot)test/src/%.integration.$(O): $(srcroot)test/src/%.c +$(C_TESTLIB_INTEGRATION_OBJS): CPPFLAGS += -DJEMALLOC_INTEGRATION_TEST +$(C_UTIL_INTEGRATION_OBJS): $(objroot)src/%.integration.$(O): $(srcroot)src/%.c +$(C_TESTLIB_ANALYZE_OBJS): $(objroot)test/src/%.analyze.$(O): $(srcroot)test/src/%.c +$(C_TESTLIB_ANALYZE_OBJS): CPPFLAGS += -DJEMALLOC_ANALYZE_TEST +$(C_TESTLIB_STRESS_OBJS): $(objroot)test/src/%.stress.$(O): $(srcroot)test/src/%.c +$(C_TESTLIB_STRESS_OBJS): CPPFLAGS += -DJEMALLOC_STRESS_TEST -DJEMALLOC_STRESS_TESTLIB +$(C_TESTLIB_OBJS): CPPFLAGS += -I$(srcroot)test/include -I$(objroot)test/include +$(TESTS_UNIT_OBJS): CPPFLAGS += -DJEMALLOC_UNIT_TEST +$(TESTS_INTEGRATION_OBJS): CPPFLAGS += -DJEMALLOC_INTEGRATION_TEST +$(TESTS_INTEGRATION_CPP_OBJS): CPPFLAGS += -DJEMALLOC_INTEGRATION_CPP_TEST +$(TESTS_ANALYZE_OBJS): CPPFLAGS += -DJEMALLOC_ANALYZE_TEST +$(TESTS_STRESS_OBJS): CPPFLAGS += -DJEMALLOC_STRESS_TEST +$(TESTS_STRESS_CPP_OBJS): CPPFLAGS += -DJEMALLOC_STRESS_CPP_TEST +$(TESTS_OBJS): $(objroot)test/%.$(O): $(srcroot)test/%.c +$(TESTS_CPP_OBJS): $(objroot)test/%.$(O): $(srcroot)test/%.cpp +$(TESTS_OBJS): CPPFLAGS += -I$(srcroot)test/include -I$(objroot)test/include +$(TESTS_CPP_OBJS): CPPFLAGS += -I$(srcroot)test/include -I$(objroot)test/include +$(TESTS_OBJS): CFLAGS += -fno-builtin +$(TESTS_CPP_OBJS): CPPFLAGS += -fno-builtin ifneq ($(IMPORTLIB),$(SO)) -$(COBJS): CPPFLAGS += -DDLLEXPORT +$(CPP_OBJS) $(C_SYM_OBJS) $(C_OBJS) $(C_JET_SYM_OBJS) $(C_JET_OBJS): CPPFLAGS += -DDLLEXPORT endif +# Dependencies. ifndef CC_MM -# Dependencies HEADER_DIRS = $(srcroot)include/jemalloc/internal \ $(objroot)include/jemalloc $(objroot)include/jemalloc/internal -HEADERS = $(wildcard $(foreach dir,$(HEADER_DIRS),$(dir)/*.h)) -$(COBJS) $(CPICOBJS) $(CTESTOBJS): $(HEADERS) -$(CTESTOBJS): $(objroot)test/jemalloc_test.h +HEADERS = $(filter-out $(PRIVATE_NAMESPACE_HDRS),$(wildcard $(foreach dir,$(HEADER_DIRS),$(dir)/*.h))) +$(C_SYM_OBJS) $(C_OBJS) $(CPP_OBJS) $(C_PIC_OBJS) $(CPP_PIC_OBJS) $(C_JET_SYM_OBJS) $(C_JET_OBJS) $(C_TESTLIB_OBJS) $(TESTS_OBJS) $(TESTS_CPP_OBJS): $(HEADERS) +$(TESTS_OBJS) $(TESTS_CPP_OBJS): $(objroot)test/include/test/jemalloc_test.h endif -$(COBJS) $(CPICOBJS) $(CTESTOBJS): %.$(O): +$(C_OBJS) $(CPP_OBJS) $(C_PIC_OBJS) $(CPP_PIC_OBJS) $(C_TESTLIB_INTEGRATION_OBJS) $(C_UTIL_INTEGRATION_OBJS) $(TESTS_INTEGRATION_OBJS) $(TESTS_INTEGRATION_CPP_OBJS): $(objroot)include/jemalloc/internal/private_namespace.h +$(C_JET_OBJS) $(C_TESTLIB_UNIT_OBJS) $(C_TESTLIB_ANALYZE_OBJS) $(C_TESTLIB_STRESS_OBJS) $(TESTS_UNIT_OBJS) $(TESTS_ANALYZE_OBJS) $(TESTS_STRESS_OBJS) $(TESTS_STRESS_CPP_OBJS): $(objroot)include/jemalloc/internal/private_namespace_jet.h + +$(C_SYM_OBJS) $(C_OBJS) $(C_PIC_OBJS) $(C_JET_SYM_OBJS) $(C_JET_OBJS) $(C_TESTLIB_OBJS) $(TESTS_OBJS): %.$(O): @mkdir -p $(@D) $(CC) $(CFLAGS) -c $(CPPFLAGS) $(CTARGET) $< ifdef CC_MM @$(CC) -MM $(CPPFLAGS) -MT $@ -o $(@:%.$(O)=%.d) $< endif +$(C_SYMS): %.sym: + @mkdir -p $(@D) + $(DUMP_SYMS) $< | $(AWK) -f $(objroot)include/jemalloc/internal/private_symbols.awk > $@ + +$(C_JET_SYMS): %.sym: + @mkdir -p $(@D) + $(DUMP_SYMS) $< | $(AWK) -f $(objroot)include/jemalloc/internal/private_symbols_jet.awk > $@ + +$(objroot)include/jemalloc/internal/private_namespace.gen.h: $(C_SYMS) + $(SHELL) $(srcroot)include/jemalloc/internal/private_namespace.sh $^ > $@ + +$(objroot)include/jemalloc/internal/private_namespace_jet.gen.h: $(C_JET_SYMS) + $(SHELL) $(srcroot)include/jemalloc/internal/private_namespace.sh $^ > $@ + +%.h: %.gen.h + @if ! `cmp -s $< $@` ; then echo "cp $< $@"; cp $< $@ ; fi + +$(CPP_OBJS) $(CPP_PIC_OBJS) $(TESTS_CPP_OBJS): %.$(O): + @mkdir -p $(@D) + $(CXX) $(CXXFLAGS) -c $(CPPFLAGS) $(CTARGET) $< +ifdef CC_MM + @$(CXX) -MM $(CPPFLAGS) -MT $@ -o $(@:%.$(O)=%.d) $< +endif + ifneq ($(SOREV),$(SO)) %.$(SO) : %.$(SOREV) @mkdir -p $(@D) ln -sf $( $(objroot)$${t}.out 2>&1; \ - if test -e "$(srcroot)$${t}.exp"; then \ - diff -w -u $(srcroot)$${t}.exp \ - $(objroot)$${t}.out >/dev/null 2>&1; \ - fail=$$?; \ - if test "$${fail}" -eq "1" ; then \ - failures=`expr $${failures} + 1`; \ - echo "*** FAIL ***"; \ - else \ - echo "pass"; \ - fi; \ - else \ - echo "*** FAIL *** (.exp file is missing)"; \ - failures=`expr $${failures} + 1`; \ - fi; \ - done; \ - echo "========================================="; \ - echo "Failures: $${failures}/$${total}"' +uninstall_bin: + $(RM) -v $(foreach b,$(notdir $(BINS)),$(BINDIR)/$(b)) + +uninstall_include: + $(RM) -v $(foreach h,$(notdir $(C_HDRS)),$(INCLUDEDIR)/jemalloc/$(h)) + rmdir -v $(INCLUDEDIR)/jemalloc + +uninstall_lib_shared: + $(RM) -v $(LIBDIR)/$(LIBJEMALLOC).$(SOREV) +ifneq ($(SOREV),$(SO)) + $(RM) -v $(LIBDIR)/$(LIBJEMALLOC).$(SO) +endif + +uninstall_lib_static: + $(RM) -v $(foreach l,$(notdir $(STATIC_LIBS)),$(LIBDIR)/$(l)) + +uninstall_lib_pc: + $(RM) -v $(foreach p,$(notdir $(PC)),$(LIBDIR)/pkgconfig/$(p)) + +ifeq ($(enable_shared), 1) +uninstall_lib: uninstall_lib_shared +endif +ifeq ($(enable_static), 1) +uninstall_lib: uninstall_lib_static +endif +uninstall_lib: uninstall_lib_pc + +uninstall_doc_html: + $(RM) -v $(foreach d,$(notdir $(DOCS_HTML)),$(DATADIR)/doc/jemalloc$(install_suffix)/$(d)) + rmdir -v $(DATADIR)/doc/jemalloc$(install_suffix) + +uninstall_doc_man: + $(RM) -v $(foreach d,$(notdir $(DOCS_MAN3)),$(MANDIR)/man3/$(d)) + +uninstall_doc: uninstall_doc_html uninstall_doc_man + +uninstall: uninstall_bin uninstall_include uninstall_lib + +ifeq ($(enable_doc), 1) +uninstall: uninstall_doc +endif + +tests_unit: $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%$(EXE)) +tests_integration: $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%$(EXE)) $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%$(EXE)) +tests_analyze: $(TESTS_ANALYZE:$(srcroot)%.c=$(objroot)%$(EXE)) +tests_stress: $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%$(EXE)) $(TESTS_STRESS_CPP:$(srcroot)%.cpp=$(objroot)%$(EXE)) +tests_pa: $(objroot)test/stress/pa/pa_data_preprocessor$(EXE) $(objroot)test/stress/pa/pa_microbench$(EXE) +tests: tests_unit tests_integration tests_analyze tests_stress + +check_unit_dir: + @mkdir -p $(objroot)test/unit +check_integration_dir: + @mkdir -p $(objroot)test/integration +analyze_dir: + @mkdir -p $(objroot)test/analyze +stress_dir: + @mkdir -p $(objroot)test/stress +check_dir: check_unit_dir check_integration_dir + +check_unit: tests_unit check_unit_dir + $(SHELL) $(objroot)test/test.sh $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%) +check_integration_prof: tests_integration check_integration_dir +ifeq ($(enable_prof), 1) + $(MALLOC_CONF)="prof:true" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%) + $(MALLOC_CONF)="prof:true,prof_active:false" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%) +endif +check_integration_decay: tests_integration check_integration_dir + $(MALLOC_CONF)="dirty_decay_ms:-1,muzzy_decay_ms:-1" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%) + $(MALLOC_CONF)="dirty_decay_ms:0,muzzy_decay_ms:0" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%) +check_integration: tests_integration check_integration_dir + $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%) +analyze: tests_analyze analyze_dir +ifeq ($(enable_prof), 1) + $(MALLOC_CONF)="prof:true" $(SHELL) $(objroot)test/test.sh $(TESTS_ANALYZE:$(srcroot)%.c=$(objroot)%) +else + $(SHELL) $(objroot)test/test.sh $(TESTS_ANALYZE:$(srcroot)%.c=$(objroot)%) +endif +stress: tests_stress stress_dir + $(SHELL) $(objroot)test/test.sh $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%) + $(SHELL) $(objroot)test/test.sh $(TESTS_STRESS_CPP:$(srcroot)%.cpp=$(objroot)%) +check: check_unit check_integration check_integration_decay check_integration_prof clean: - rm -f $(COBJS) - rm -f $(CPICOBJS) - rm -f $(COBJS:%.$(O)=%.d) - rm -f $(CPICOBJS:%.$(O)=%.d) - rm -f $(CTESTOBJS:%.$(O)=%$(EXE)) - rm -f $(CTESTOBJS) - rm -f $(CTESTOBJS:%.$(O)=%.d) - rm -f $(CTESTOBJS:%.$(O)=%.out) + rm -f $(PRIVATE_NAMESPACE_HDRS) + rm -f $(PRIVATE_NAMESPACE_GEN_HDRS) + rm -f $(C_SYM_OBJS) + rm -f $(C_SYMS) + rm -f $(C_OBJS) + rm -f $(CPP_OBJS) + rm -f $(C_PIC_OBJS) + rm -f $(CPP_PIC_OBJS) + rm -f $(C_JET_SYM_OBJS) + rm -f $(C_JET_SYMS) + rm -f $(C_JET_OBJS) + rm -f $(C_TESTLIB_OBJS) + rm -f $(C_SYM_OBJS:%.$(O)=%.d) + rm -f $(C_OBJS:%.$(O)=%.d) + rm -f $(CPP_OBJS:%.$(O)=%.d) + rm -f $(C_PIC_OBJS:%.$(O)=%.d) + rm -f $(CPP_PIC_OBJS:%.$(O)=%.d) + rm -f $(C_JET_SYM_OBJS:%.$(O)=%.d) + rm -f $(C_JET_OBJS:%.$(O)=%.d) + rm -f $(C_TESTLIB_OBJS:%.$(O)=%.d) + rm -f $(TESTS_OBJS:%.$(O)=%$(EXE)) + rm -f $(TESTS_OBJS) + rm -f $(TESTS_OBJS:%.$(O)=%.d) + rm -f $(TESTS_OBJS:%.$(O)=%.out) + rm -f $(TESTS_CPP_OBJS:%.$(O)=%$(EXE)) + rm -f $(TESTS_CPP_OBJS) + rm -f $(TESTS_CPP_OBJS:%.$(O)=%.d) + rm -f $(TESTS_CPP_OBJS:%.$(O)=%.out) rm -f $(DSOS) $(STATIC_LIBS) distclean: clean - rm -rf $(objroot)autom4te.cache + rm -f $(objroot)bin/jemalloc-config + rm -f $(objroot)bin/jemalloc.sh + rm -f $(objroot)bin/jeprof rm -f $(objroot)config.log rm -f $(objroot)config.status rm -f $(objroot)config.stamp @@ -296,7 +798,7 @@ distclean: clean relclean: distclean rm -f $(objroot)configure - rm -f $(srcroot)VERSION + rm -f $(objroot)VERSION rm -f $(DOCS_HTML) rm -f $(DOCS_MAN3) diff --git a/README b/README index 7661683b..d33a69ce 100644 --- a/README +++ b/README @@ -1,10 +1,14 @@ -jemalloc is a general-purpose scalable concurrent malloc(3) implementation. -This distribution is a "portable" implementation that currently targets -FreeBSD, Linux, Apple OS X, and MinGW. jemalloc is included as the default -allocator in the FreeBSD and NetBSD operating systems, and it is used by the -Mozilla Firefox web browser on Microsoft Windows-related platforms. Depending -on your needs, one of the other divergent versions may suit your needs better -than this distribution. +jemalloc is a general purpose malloc(3) implementation that emphasizes +fragmentation avoidance and scalable concurrency support. jemalloc first came +into use as the FreeBSD libc allocator in 2005, and since then it has found its +way into numerous applications that rely on its predictable behavior. In 2010 +jemalloc development efforts broadened to include developer support features +such as heap profiling and extensive monitoring/tuning hooks. Modern jemalloc +releases continue to be integrated back into FreeBSD, and therefore versatility +remains critical. Ongoing development efforts trend toward making jemalloc +among the best allocators for a broad range of demanding applications, and +eliminating/mitigating weaknesses that have practical repercussions for real +world applications. The COPYING file contains copyright and licensing information. @@ -13,4 +17,4 @@ jemalloc. The ChangeLog file contains a brief summary of changes for each release. -URL: http://www.canonware.com/jemalloc/ +URL: https://jemalloc.net/ diff --git a/TUNING.md b/TUNING.md new file mode 100644 index 00000000..1f6bef35 --- /dev/null +++ b/TUNING.md @@ -0,0 +1,129 @@ +This document summarizes the common approaches for performance fine tuning with +jemalloc (as of 5.3.0). The default configuration of jemalloc tends to work +reasonably well in practice, and most applications should not have to tune any +options. However, in order to cover a wide range of applications and avoid +pathological cases, the default setting is sometimes kept conservative and +suboptimal, even for many common workloads. When jemalloc is properly tuned for +a specific application / workload, it is common to improve system level metrics +by a few percent, or make favorable trade-offs. + + +## Notable runtime options for performance tuning + +Runtime options can be set via +[malloc_conf](https://jemalloc.net/jemalloc.3.html#tuning). + +* [background_thread](https://jemalloc.net/jemalloc.3.html#background_thread) + + Enabling jemalloc background threads generally improves the tail latency for + application threads, since unused memory purging is shifted to the dedicated + background threads. In addition, unintended purging delay caused by + application inactivity is avoided with background threads. + + Suggested: `background_thread:true` when jemalloc managed threads can be + allowed. + +* [metadata_thp](https://jemalloc.net/jemalloc.3.html#opt.metadata_thp) + + Allowing jemalloc to utilize transparent huge pages for its internal + metadata usually reduces TLB misses significantly, especially for programs + with large memory footprint and frequent allocation / deallocation + activities. Metadata memory usage may increase due to the use of huge + pages. + + Suggested for allocation intensive programs: `metadata_thp:auto` or + `metadata_thp:always`, which is expected to improve CPU utilization at a + small memory cost. + +* [dirty_decay_ms](https://jemalloc.net/jemalloc.3.html#opt.dirty_decay_ms) and + [muzzy_decay_ms](https://jemalloc.net/jemalloc.3.html#opt.muzzy_decay_ms) + + Decay time determines how fast jemalloc returns unused pages back to the + operating system, and therefore provides a fairly straightforward trade-off + between CPU and memory usage. Shorter decay time purges unused pages faster + to reduces memory usage (usually at the cost of more CPU cycles spent on + purging), and vice versa. + + Suggested: tune the values based on the desired trade-offs. + +* [narenas](https://jemalloc.net/jemalloc.3.html#opt.narenas) + + By default jemalloc uses multiple arenas to reduce internal lock contention. + However high arena count may also increase overall memory fragmentation, + since arenas manage memory independently. When high degree of parallelism + is not expected at the allocator level, lower number of arenas often + improves memory usage. + + Suggested: if low parallelism is expected, try lower arena count while + monitoring CPU and memory usage. + +* [percpu_arena](https://jemalloc.net/jemalloc.3.html#opt.percpu_arena) + + Enable dynamic thread to arena association based on running CPU. This has + the potential to improve locality, e.g. when thread to CPU affinity is + present. + + Suggested: try `percpu_arena:percpu` or `percpu_arena:phycpu` if + thread migration between processors is expected to be infrequent. + +Examples: + +* High resource consumption application, prioritizing CPU utilization: + + `background_thread:true,metadata_thp:auto` combined with relaxed decay time + (increased `dirty_decay_ms` and / or `muzzy_decay_ms`, + e.g. `dirty_decay_ms:30000,muzzy_decay_ms:30000`). + +* High resource consumption application, prioritizing memory usage: + + `background_thread:true,tcache_max:4096` combined with shorter decay time + (decreased `dirty_decay_ms` and / or `muzzy_decay_ms`, + e.g. `dirty_decay_ms:5000,muzzy_decay_ms:5000`), and lower arena count + (e.g. number of CPUs). + +* Low resource consumption application: + + `narenas:1,tcache_max:1024` combined with shorter decay time (decreased + `dirty_decay_ms` and / or `muzzy_decay_ms`,e.g. + `dirty_decay_ms:1000,muzzy_decay_ms:0`). + +* Extremely conservative -- minimize memory usage at all costs, only suitable when +allocation activity is very rare: + + `narenas:1,tcache:false,dirty_decay_ms:0,muzzy_decay_ms:0` + +Note that it is recommended to combine the options with `abort_conf:true` which +aborts immediately on illegal options. + +## Beyond runtime options + +In addition to the runtime options, there are a number of programmatic ways to +improve application performance with jemalloc. + +* [Explicit arenas](https://jemalloc.net/jemalloc.3.html#arenas.create) + + Manually created arenas can help performance in various ways, e.g. by + managing locality and contention for specific usages. For example, + applications can explicitly allocate frequently accessed objects from a + dedicated arena with + [mallocx()](https://jemalloc.net/jemalloc.3.html#MALLOCX_ARENA) to improve + locality. In addition, explicit arenas often benefit from individually + tuned options, e.g. relaxed [decay + time](https://jemalloc.net/jemalloc.3.html#arena.i.dirty_decay_ms) if + frequent reuse is expected. + +* [Extent hooks](https://jemalloc.net/jemalloc.3.html#arena.i.extent_hooks) + + Extent hooks allow customization for managing underlying memory. One use + case for performance purpose is to utilize huge pages -- for example, + [HHVM](httpss://github.com/facebook/hhvm/blob/master/hphp/util/alloc.cpp) + uses explicit arenas with customized extent hooks to manage 1GB huge pages + for frequently accessed data, which reduces TLB misses significantly. + +* [Explicit thread-to-arena + binding](https://jemalloc.net/jemalloc.3.html#thread.arena) + + It is common for some threads in an application to have different memory + access / allocation patterns. Threads with heavy workloads often benefit + from explicit binding, e.g. binding very active threads to dedicated arenas + may reduce contention at the allocator level. diff --git a/autogen.sh b/autogen.sh index 75f32da6..c5325fc9 100755 --- a/autogen.sh +++ b/autogen.sh @@ -9,8 +9,8 @@ for i in autoconf; do fi done -echo "./configure --enable-autogen $@" -./configure --enable-autogen $@ +echo "./configure --enable-autogen \"$@\"" +./configure --enable-autogen "$@" if [ $? -ne 0 ]; then echo "Error $? in ./configure" exit 1 diff --git a/bin/jemalloc-config.in b/bin/jemalloc-config.in new file mode 100644 index 00000000..80eca2e6 --- /dev/null +++ b/bin/jemalloc-config.in @@ -0,0 +1,83 @@ +#!/bin/sh + +usage() { + cat < +Options: + --help | -h : Print usage. + --version : Print jemalloc version. + --revision : Print shared library revision number. + --config : Print configure options used to build jemalloc. + --prefix : Print installation directory prefix. + --bindir : Print binary installation directory. + --datadir : Print data installation directory. + --includedir : Print include installation directory. + --libdir : Print library installation directory. + --mandir : Print manual page installation directory. + --cc : Print compiler used to build jemalloc. + --cflags : Print compiler flags used to build jemalloc. + --cppflags : Print preprocessor flags used to build jemalloc. + --cxxflags : Print C++ compiler flags used to build jemalloc. + --ldflags : Print library flags used to build jemalloc. + --libs : Print libraries jemalloc was linked against. +EOF +} + +prefix="@prefix@" +exec_prefix="@exec_prefix@" + +case "$1" in +--help | -h) + usage + exit 0 + ;; +--version) + echo "@jemalloc_version@" + ;; +--revision) + echo "@rev@" + ;; +--config) + echo "@CONFIG@" + ;; +--prefix) + echo "@PREFIX@" + ;; +--bindir) + echo "@BINDIR@" + ;; +--datadir) + echo "@DATADIR@" + ;; +--includedir) + echo "@INCLUDEDIR@" + ;; +--libdir) + echo "@LIBDIR@" + ;; +--mandir) + echo "@MANDIR@" + ;; +--cc) + echo "@CC@" + ;; +--cflags) + echo "@CFLAGS@" + ;; +--cppflags) + echo "@CPPFLAGS@" + ;; +--cxxflags) + echo "@CXXFLAGS@" + ;; +--ldflags) + echo "@LDFLAGS@ @EXTRA_LDFLAGS@" + ;; +--libs) + echo "@LIBS@" + ;; +*) + usage + exit 1 +esac diff --git a/bin/pprof b/bin/jeprof.in old mode 100755 new mode 100644 similarity index 88% rename from bin/pprof rename to bin/jeprof.in index 727eb437..9cae84ed --- a/bin/pprof +++ b/bin/jeprof.in @@ -2,11 +2,11 @@ # Copyright (c) 1998-2007, Google Inc. # All rights reserved. -# +# # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: -# +# # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above @@ -16,7 +16,7 @@ # * Neither the name of Google Inc. nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. -# +# # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR @@ -40,28 +40,28 @@ # # Examples: # -# % tools/pprof "program" "profile" +# % tools/jeprof "program" "profile" # Enters "interactive" mode # -# % tools/pprof --text "program" "profile" +# % tools/jeprof --text "program" "profile" # Generates one line per procedure # -# % tools/pprof --gv "program" "profile" +# % tools/jeprof --gv "program" "profile" # Generates annotated call-graph and displays via "gv" # -# % tools/pprof --gv --focus=Mutex "program" "profile" +# % tools/jeprof --gv --focus=Mutex "program" "profile" # Restrict to code paths that involve an entry that matches "Mutex" # -# % tools/pprof --gv --focus=Mutex --ignore=string "program" "profile" +# % tools/jeprof --gv --focus=Mutex --ignore=string "program" "profile" # Restrict to code paths that involve an entry that matches "Mutex" # and does not match "string" # -# % tools/pprof --list=IBF_CheckDocid "program" "profile" +# % tools/jeprof --list=IBF_CheckDocid "program" "profile" # Generates disassembly listing of all routines with at least one # sample that match the --list= pattern. The listing is # annotated with the flat and cumulative sample counts at each line. # -# % tools/pprof --disasm=IBF_CheckDocid "program" "profile" +# % tools/jeprof --disasm=IBF_CheckDocid "program" "profile" # Generates disassembly listing of all routines with at least one # sample that match the --disasm= pattern. The listing is # annotated with the flat and cumulative sample counts at each PC value. @@ -71,11 +71,13 @@ use strict; use warnings; use Getopt::Long; +use Cwd; +my $JEPROF_VERSION = "@jemalloc_version@"; my $PPROF_VERSION = "2.0"; # These are the object tools we use which can come from a -# user-specified location using --tools, from the PPROF_TOOLS +# user-specified location using --tools, from the JEPROF_TOOLS # environment variable, or from the environment. my %obj_tool_map = ( "objdump" => "objdump", @@ -86,6 +88,7 @@ my %obj_tool_map = ( #"nm_pdb" => "nm-pdb", # for reading windows (PDB-format) executables #"addr2line_pdb" => "addr2line-pdb", # ditto #"otool" => "otool", # equivalent of objdump on OS X + #"dyld_info" => "dyld_info", # equivalent of otool on OS X for shared cache ); # NOTE: these are lists, so you can put in commandline flags if you want. my @DOT = ("dot"); # leave non-absolute, since it may be in /usr/local @@ -94,7 +97,7 @@ my @EVINCE = ("evince"); # could also be xpdf or perhaps acroread my @KCACHEGRIND = ("kcachegrind"); my @PS2PDF = ("ps2pdf"); # These are used for dynamic profiles -my @URL_FETCHER = ("curl", "-s"); +my @URL_FETCHER = ("curl", "-s", "--fail"); # These are the web pages that servers need to support for dynamic profiles my $HEAP_PAGE = "/pprof/heap"; @@ -144,13 +147,13 @@ my $sep_address = undef; sub usage_string { return < +jeprof [options] is a space separated list of profile names. -pprof [options] +jeprof [options] is a list of profile files where each file contains the necessary symbol mappings as well as profile data (likely generated with --raw). -pprof [options] +jeprof [options] is a remote form. Symbols are obtained from host:port$SYMBOL_PAGE Each name can be: @@ -161,9 +164,9 @@ pprof [options] $GROWTH_PAGE, $CONTENTION_PAGE, /pprof/wall, $CENSUSPROFILE_PAGE, or /pprof/filteredprofile. For instance: - pprof http://myserver.com:80$HEAP_PAGE + jeprof http://myserver.com:80$HEAP_PAGE If / is omitted, the service defaults to $PROFILE_PAGE (cpu profiling). -pprof --symbols +jeprof --symbols Maps addresses to symbol names. In this mode, stdin should be a list of library mappings, in the same format as is found in the heap- and cpu-profile files (this loosely matches that of /proc/self/maps @@ -202,7 +205,9 @@ Output type: --pdf Generate PDF to stdout --svg Generate SVG to stdout --gif Generate GIF to stdout - --raw Generate symbolized pprof data (useful with remote fetch) + --raw Generate symbolized jeprof data (useful with remote fetch) + --collapsed Generate collapsed stacks for building flame graphs + (see http://www.brendangregg.com/flamegraphs.html) Heap-Profile Options: --inuse_space Display in-use (mega)bytes [default] @@ -222,47 +227,51 @@ Call-graph Options: --nodefraction= Hide nodes below *total [default=.005] --edgefraction= Hide edges below *total [default=.001] --maxdegree= Max incoming/outgoing edges per node [default=8] - --focus= Focus on nodes matching - --ignore= Ignore nodes matching + --focus= Focus on backtraces with nodes matching + --thread= Show profile for thread + --ignore= Ignore backtraces with nodes matching --scale= Set GV scaling [default=0] --heapcheck Make nodes with non-0 object counts (i.e. direct leak generators) more visible + --retain= Retain only nodes that match + --exclude= Exclude all nodes that match Miscellaneous: --tools=[,...] \$PATH for object tool pathnames --test Run unit tests --help This message --version Version information + --debug-syms-by-id (Linux only) Find debug symbol files by build ID as well as by name Environment Variables: - PPROF_TMPDIR Profiles directory. Defaults to \$HOME/pprof - PPROF_TOOLS Prefix for object tools pathnames + JEPROF_TMPDIR Profiles directory. Defaults to \$HOME/jeprof + JEPROF_TOOLS Prefix for object tools pathnames Examples: -pprof /bin/ls ls.prof +jeprof /bin/ls ls.prof Enters "interactive" mode -pprof --text /bin/ls ls.prof +jeprof --text /bin/ls ls.prof Outputs one line per procedure -pprof --web /bin/ls ls.prof +jeprof --web /bin/ls ls.prof Displays annotated call-graph in web browser -pprof --gv /bin/ls ls.prof +jeprof --gv /bin/ls ls.prof Displays annotated call-graph via 'gv' -pprof --gv --focus=Mutex /bin/ls ls.prof +jeprof --gv --focus=Mutex /bin/ls ls.prof Restricts to code paths including a .*Mutex.* entry -pprof --gv --focus=Mutex --ignore=string /bin/ls ls.prof +jeprof --gv --focus=Mutex --ignore=string /bin/ls ls.prof Code paths including Mutex but not string -pprof --list=getdir /bin/ls ls.prof +jeprof --list=getdir /bin/ls ls.prof (Per-line) annotated source listing for getdir() -pprof --disasm=getdir /bin/ls ls.prof +jeprof --disasm=getdir /bin/ls ls.prof (Per-PC) annotated disassembly for getdir() -pprof http://localhost:1234/ +jeprof http://localhost:1234/ Enters "interactive" mode -pprof --text localhost:1234 +jeprof --text localhost:1234 Outputs one line per procedure for localhost:1234 -pprof --raw localhost:1234 > ./local.raw -pprof --text ./local.raw +jeprof --raw localhost:1234 > ./local.raw +jeprof --text ./local.raw Fetches a remote profile for later analysis and then analyzes it in text mode. EOF @@ -270,7 +279,8 @@ EOF sub version_string { return < \$main::opt_svg, "gif!" => \$main::opt_gif, "raw!" => \$main::opt_raw, + "collapsed!" => \$main::opt_collapsed, "interactive!" => \$main::opt_interactive, "nodecount=i" => \$main::opt_nodecount, "nodefraction=f" => \$main::opt_nodefraction, "edgefraction=f" => \$main::opt_edgefraction, "maxdegree=i" => \$main::opt_maxdegree, "focus=s" => \$main::opt_focus, + "thread=s" => \$main::opt_thread, "ignore=s" => \$main::opt_ignore, "scale=i" => \$main::opt_scale, "heapcheck" => \$main::opt_heapcheck, + "retain=s" => \$main::opt_retain, + "exclude=s" => \$main::opt_exclude, "inuse_space!" => \$main::opt_inuse_space, "inuse_objects!" => \$main::opt_inuse_objects, "alloc_space!" => \$main::opt_alloc_space, @@ -417,6 +436,7 @@ sub Init() { "tools=s" => \$main::opt_tools, "test!" => \$main::opt_test, "debug!" => \$main::opt_debug, + "debug-syms-by-id!" => \$main::opt_debug_syms_by_id, # Undocumented flags used only by unittests: "test_stride=i" => \$main::opt_test_stride, ) || usage("Invalid option(s)"); @@ -478,6 +498,7 @@ sub Init() { $main::opt_svg + $main::opt_gif + $main::opt_raw + + $main::opt_collapsed + $main::opt_interactive + 0; if ($modes > 1) { @@ -560,68 +581,19 @@ sub Init() { foreach (@prefix_list) { s|/+$||; } + + # Flag to prevent us from trying over and over to use + # elfutils if it's not installed (used only with + # --debug-syms-by-id option). + $main::gave_up_on_elfutils = 0; } -sub Main() { - Init(); - $main::collected_profile = undef; - @main::profile_files = (); - $main::op_time = time(); - - # Printing symbols is special and requires a lot less info that most. - if ($main::opt_symbols) { - PrintSymbols(*STDIN); # Get /proc/maps and symbols output from stdin - return; - } - - # Fetch all profile data - FetchDynamicProfiles(); - - # this will hold symbols that we read from the profile files - my $symbol_map = {}; - - # Read one profile, pick the last item on the list - my $data = ReadProfile($main::prog, pop(@main::profile_files)); - my $profile = $data->{profile}; - my $pcs = $data->{pcs}; - my $libs = $data->{libs}; # Info about main program and shared libraries - $symbol_map = MergeSymbols($symbol_map, $data->{symbols}); - - # Add additional profiles, if available. - if (scalar(@main::profile_files) > 0) { - foreach my $pname (@main::profile_files) { - my $data2 = ReadProfile($main::prog, $pname); - $profile = AddProfile($profile, $data2->{profile}); - $pcs = AddPcs($pcs, $data2->{pcs}); - $symbol_map = MergeSymbols($symbol_map, $data2->{symbols}); - } - } - - # Subtract base from profile, if specified - if ($main::opt_base ne '') { - my $base = ReadProfile($main::prog, $main::opt_base); - $profile = SubtractProfile($profile, $base->{profile}); - $pcs = AddPcs($pcs, $base->{pcs}); - $symbol_map = MergeSymbols($symbol_map, $base->{symbols}); - } +sub FilterAndPrint { + my ($profile, $symbols, $libs, $thread) = @_; # Get total data in profile my $total = TotalProfile($profile); - # Collect symbols - my $symbols; - if ($main::use_symbolized_profile) { - $symbols = FetchSymbols($pcs, $symbol_map); - } elsif ($main::use_symbol_page) { - $symbols = FetchSymbols($pcs); - } else { - # TODO(csilvers): $libs uses the /proc/self/maps data from profile1, - # which may differ from the data from subsequent profiles, especially - # if they were run on different machines. Use appropriate libs for - # each pc somehow. - $symbols = ExtractSymbols($libs, $pcs); - } - # Remove uniniteresting stack items $profile = RemoveUninterestingFrames($symbols, $profile); @@ -656,11 +628,15 @@ sub Main() { # (only matters when --heapcheck is given but we must be # compatible with old branches that did not pass --heapcheck always): if ($total != 0) { - printf("Total: %s %s\n", Unparse($total), Units()); + printf("Total%s: %s %s\n", + (defined($thread) ? " (t$thread)" : ""), + Unparse($total), Units()); } PrintText($symbols, $flat, $cumulative, -1); } elsif ($main::opt_raw) { PrintSymbolizedProfile($symbols, $profile, $main::prog); + } elsif ($main::opt_collapsed) { + PrintCollapsedStacks($symbols, $profile); } elsif ($main::opt_callgrind) { PrintCallgrind($calls); } else { @@ -692,6 +668,77 @@ sub Main() { } else { InteractiveMode($profile, $symbols, $libs, $total); } +} + +sub Main() { + Init(); + $main::collected_profile = undef; + @main::profile_files = (); + $main::op_time = time(); + + # Printing symbols is special and requires a lot less info that most. + if ($main::opt_symbols) { + PrintSymbols(*STDIN); # Get /proc/maps and symbols output from stdin + return; + } + + # Fetch all profile data + FetchDynamicProfiles(); + + # this will hold symbols that we read from the profile files + my $symbol_map = {}; + + # Read one profile, pick the last item on the list + my $data = ReadProfile($main::prog, $main::profile_files[0]); + my $profile = $data->{profile}; + my $pcs = $data->{pcs}; + my $libs = $data->{libs}; # Info about main program and shared libraries + $symbol_map = MergeSymbols($symbol_map, $data->{symbols}); + + # Add additional profiles, if available. + if (scalar(@main::profile_files) > 1) { + foreach my $pname (@main::profile_files[1..$#main::profile_files]) { + my $data2 = ReadProfile($main::prog, $pname); + $profile = AddProfile($profile, $data2->{profile}); + $pcs = AddPcs($pcs, $data2->{pcs}); + $symbol_map = MergeSymbols($symbol_map, $data2->{symbols}); + } + } + + # Subtract base from profile, if specified + if ($main::opt_base ne '') { + my $base = ReadProfile($main::prog, $main::opt_base); + $profile = SubtractProfile($profile, $base->{profile}); + $pcs = AddPcs($pcs, $base->{pcs}); + $symbol_map = MergeSymbols($symbol_map, $base->{symbols}); + } + + # Collect symbols + my $symbols; + if ($main::use_symbolized_profile) { + $symbols = FetchSymbols($pcs, $symbol_map); + } elsif ($main::use_symbol_page) { + $symbols = FetchSymbols($pcs); + } else { + # TODO(csilvers): $libs uses the /proc/self/maps data from profile1, + # which may differ from the data from subsequent profiles, especially + # if they were run on different machines. Use appropriate libs for + # each pc somehow. + $symbols = ExtractSymbols($libs, $pcs); + } + + if (!defined($main::opt_thread)) { + FilterAndPrint($profile, $symbols, $libs); + } + if (defined($data->{threads})) { + foreach my $thread (sort { $a <=> $b } keys(%{$data->{threads}})) { + if (defined($main::opt_thread) && + ($main::opt_thread eq '*' || $main::opt_thread == $thread)) { + my $thread_profile = $data->{threads}{$thread}; + FilterAndPrint($thread_profile, $symbols, $libs, $thread); + } + } + } cleanup(); exit(0); @@ -780,14 +827,14 @@ sub InteractiveMode { $| = 1; # Make output unbuffered for interactive mode my ($orig_profile, $symbols, $libs, $total) = @_; - print STDERR "Welcome to pprof! For help, type 'help'.\n"; + print STDERR "Welcome to jeprof! For help, type 'help'.\n"; # Use ReadLine if it's installed and input comes from a console. if ( -t STDIN && !ReadlineMightFail() && defined(eval {require Term::ReadLine}) ) { - my $term = new Term::ReadLine 'pprof'; - while ( defined ($_ = $term->readline('(pprof) '))) { + my $term = new Term::ReadLine 'jeprof'; + while ( defined ($_ = $term->readline('(jeprof) '))) { $term->addhistory($_) if /\S/; if (!InteractiveCommand($orig_profile, $symbols, $libs, $total, $_)) { last; # exit when we get an interactive command to quit @@ -795,7 +842,7 @@ sub InteractiveMode { } } else { # don't have readline while (1) { - print STDERR "(pprof) "; + print STDERR "(jeprof) "; $_ = ; last if ! defined $_ ; s/\r//g; # turn windows-looking lines into unix-looking lines @@ -988,7 +1035,7 @@ sub ProcessProfile { sub InteractiveHelpMessage { print STDERR <