diff --git a/INSTALL.md b/INSTALL.md index f772dd09..7f6137ee 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -139,6 +139,7 @@ any of the following arguments (not a definitive list) to 'configure': in the following list that appears to function correctly: + libunwind (requires --enable-prof-libunwind) + + frame pointer (requires --enable-prof-frameptr) + libgcc (unless --disable-prof-libgcc) + gcc intrinsics (unless --disable-prof-gcc) @@ -147,6 +148,12 @@ any of the following arguments (not a definitive list) to 'configure': Use the libunwind library (http://www.nongnu.org/libunwind/) for stack backtracing. +* `--enable-prof-frameptr` + + Use the optimized frame pointer unwinder for stack backtracing. Safe + to use in mixed code (with and without frame pointers) - but requires + frame pointers to produce meaningful stacks. Linux only. + * `--disable-prof-libgcc` Disable the use of libgcc's backtracing functionality. diff --git a/Makefile.in b/Makefile.in index 94208f37..6a386720 100644 --- a/Makefile.in +++ b/Makefile.in @@ -142,6 +142,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \ $(srcroot)src/prof_data.c \ $(srcroot)src/prof_log.c \ $(srcroot)src/prof_recent.c \ + $(srcroot)src/prof_stack_range.c \ $(srcroot)src/prof_stats.c \ $(srcroot)src/prof_sys.c \ $(srcroot)src/psset.c \ diff --git a/configure.ac b/configure.ac index 2e7f610d..4776aa60 100644 --- a/configure.ac +++ b/configure.ac @@ -1448,6 +1448,33 @@ if test "x$backtrace_method" = "x" -a "x$enable_prof_libunwind" = "x1" ; then fi fi +if test `uname -s` = "Linux" +then + AC_ARG_ENABLE([prof-frameptr], + [AS_HELP_STRING([--enable-prof-frameptr], [Use optimized frame pointer unwinder for backtracing (Linux only)])], + [if test "x$enable_prof_frameptr" = "xno" ; then + enable_prof_frameptr="0" + else + enable_prof_frameptr="1" + if test "x$enable_prof" = "x0" ; then + AC_MSG_ERROR([--enable-prof-frameptr should only be used with --enable-prof]) + fi + fi + ], + [enable_prof_frameptr="0"] + ) + if test "x$backtrace_method" = "x" -a "x$enable_prof_frameptr" = "x1" \ + -a "x$GCC" = "xyes" ; then + JE_CFLAGS_ADD([-fno-omit-frame-pointer]) + backtrace_method="frame pointer linux" + AC_DEFINE([JEMALLOC_PROF_FRAME_POINTER], [ ], [ ]) + else + enable_prof_frameptr="0" + fi +else + enable_prof_frameptr="0" +fi + AC_ARG_ENABLE([prof-libgcc], [AS_HELP_STRING([--disable-prof-libgcc], [Do not use libgcc for backtracing])], @@ -2847,6 +2874,7 @@ AC_MSG_RESULT([stats : ${enable_stats}]) AC_MSG_RESULT([experimental_smallocx : ${enable_experimental_smallocx}]) AC_MSG_RESULT([prof : ${enable_prof}]) AC_MSG_RESULT([prof-libunwind : ${enable_prof_libunwind}]) +AC_MSG_RESULT([prof-frameptr : ${enable_prof_frameptr}]) AC_MSG_RESULT([prof-libgcc : ${enable_prof_libgcc}]) AC_MSG_RESULT([prof-gcc : ${enable_prof_gcc}]) AC_MSG_RESULT([fill : ${enable_fill}]) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 89a176e0..59058bad 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -897,6 +897,16 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay", during build configuration. + + + config.prof_frameptr + (bool) + r- + + was specified + during build configuration. + + config.stats diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in index 7f369873..7ad75a06 100644 --- a/include/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in @@ -167,6 +167,9 @@ /* Use gcc intrinsics for profile backtracing if defined. */ #undef JEMALLOC_PROF_GCC +/* Use frame pointer for profile backtracing if defined. Linux only. */ +#undef JEMALLOC_PROF_FRAME_POINTER + /* JEMALLOC_PAGEID enabled page id */ #undef JEMALLOC_PAGEID diff --git a/include/jemalloc/internal/jemalloc_preamble.h.in b/include/jemalloc/internal/jemalloc_preamble.h.in index 6b55e47f..ebce5d56 100644 --- a/include/jemalloc/internal/jemalloc_preamble.h.in +++ b/include/jemalloc/internal/jemalloc_preamble.h.in @@ -114,6 +114,13 @@ static const bool config_prof_libunwind = false #endif ; +static const bool config_prof_frameptr = +#ifdef JEMALLOC_PROF_FRAME_POINTER + true +#else + false +#endif + ; static const bool maps_coalesce = #ifdef JEMALLOC_MAPS_COALESCE true diff --git a/include/jemalloc/internal/malloc_io.h b/include/jemalloc/internal/malloc_io.h index bb43fb5c..9c7c6ec2 100644 --- a/include/jemalloc/internal/malloc_io.h +++ b/include/jemalloc/internal/malloc_io.h @@ -154,5 +154,12 @@ static inline int malloc_close(int fd) { #endif } +static inline off_t malloc_lseek(int fd, off_t offset, int whence) { +#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_lseek) + return (off_t)syscall(SYS_lseek, fd, offset, whence); +#else + return lseek(fd, offset, whence); +#endif +} #endif /* JEMALLOC_INTERNAL_MALLOC_IO_H */ diff --git a/include/jemalloc/internal/prof_sys.h b/include/jemalloc/internal/prof_sys.h index e6e7f06f..3377ba92 100644 --- a/include/jemalloc/internal/prof_sys.h +++ b/include/jemalloc/internal/prof_sys.h @@ -20,6 +20,7 @@ void prof_fdump_impl(tsd_t *tsd); void prof_idump_impl(tsd_t *tsd); bool prof_mdump_impl(tsd_t *tsd, const char *filename); void prof_gdump_impl(tsd_t *tsd); +uintptr_t prof_thread_stack_start(uintptr_t stack_end); /* Used in unit tests. */ typedef int (prof_sys_thread_name_read_t)(char *buf, size_t limit); diff --git a/src/ctl.c b/src/ctl.c index 2a9e47f2..690bbabc 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -89,6 +89,7 @@ CTL_PROTO(config_opt_safety_checks) CTL_PROTO(config_prof) CTL_PROTO(config_prof_libgcc) CTL_PROTO(config_prof_libunwind) +CTL_PROTO(config_prof_frameptr) CTL_PROTO(config_stats) CTL_PROTO(config_utrace) CTL_PROTO(config_xmalloc) @@ -436,6 +437,7 @@ static const ctl_named_node_t config_node[] = { {NAME("prof"), CTL(config_prof)}, {NAME("prof_libgcc"), CTL(config_prof_libgcc)}, {NAME("prof_libunwind"), CTL(config_prof_libunwind)}, + {NAME("prof_frameptr"), CTL(config_prof_frameptr)}, {NAME("stats"), CTL(config_stats)}, {NAME("utrace"), CTL(config_utrace)}, {NAME("xmalloc"), CTL(config_xmalloc)} @@ -2178,6 +2180,7 @@ CTL_RO_CONFIG_GEN(config_opt_safety_checks, bool) CTL_RO_CONFIG_GEN(config_prof, bool) CTL_RO_CONFIG_GEN(config_prof_libgcc, bool) CTL_RO_CONFIG_GEN(config_prof_libunwind, bool) +CTL_RO_CONFIG_GEN(config_prof_frameptr, bool) CTL_RO_CONFIG_GEN(config_stats, bool) CTL_RO_CONFIG_GEN(config_utrace, bool) CTL_RO_CONFIG_GEN(config_xmalloc, bool) diff --git a/src/prof_stack_range.c b/src/prof_stack_range.c new file mode 100644 index 00000000..c3458044 --- /dev/null +++ b/src/prof_stack_range.c @@ -0,0 +1,161 @@ +#include "jemalloc/internal/jemalloc_preamble.h" +#include "jemalloc/internal/jemalloc_internal_includes.h" + +#include "jemalloc/internal/malloc_io.h" +#include "jemalloc/internal/prof_sys.h" + +#if defined (__linux__) + +#include +#include +#include +#include // strtoul +#include +#include + +static int prof_mapping_containing_addr( + uintptr_t addr, + const char* maps_path, + uintptr_t* mm_start, + uintptr_t* mm_end) { + int ret = ENOENT; // not found + *mm_start = *mm_end = 0; + + // Each line of /proc//maps is: + // - + // + // The fields we care about are always within the first 34 characters so + // as long as `buf` contains the start of a mapping line it can always be + // parsed. + static const int kMappingFieldsWidth = 34; + + int fd = -1; + char buf[4096]; + ssize_t remaining = 0; // actual number of bytes read to buf + char* line = NULL; + + while (1) { + if (fd < 0) { + // case 0: initial open of maps file + fd = malloc_open(maps_path, O_RDONLY); + if (fd < 0) { + return errno; + } + + remaining = malloc_read_fd(fd, buf, sizeof(buf)); + if (remaining <= 0) { + break; + } + line = buf; + } else if (line == NULL) { + // case 1: no newline found in buf + remaining = malloc_read_fd(fd, buf, sizeof(buf)); + if (remaining <= 0) { + break; + } + line = memchr(buf, '\n', remaining); + if (line != NULL) { + line++; // advance to character after newline + remaining -= (line - buf); + } + } else if (line != NULL && remaining < kMappingFieldsWidth) { + // case 2: found newline but insufficient characters remaining in buf + + // fd currently points to the character immediately after the last + // character in buf. Seek fd to the character after the newline. + if (malloc_lseek(fd, -remaining, SEEK_CUR) == -1) { + ret = errno; + break; + } + + remaining = malloc_read_fd(fd, buf, sizeof(buf)); + if (remaining <= 0) { + break; + } + line = buf; + } else { + // case 3: found newline and sufficient characters to parse + + // parse - + char* tmp = line; + uintptr_t start_addr = strtoul(tmp, &tmp, 16); + if (addr >= start_addr) { + tmp++; // advance to character after '-' + uintptr_t end_addr = strtoul(tmp, &tmp, 16); + if (addr < end_addr) { + *mm_start = start_addr; + *mm_end = end_addr; + ret = 0; + break; + } + } + + // Advance to character after next newline in the current buf. + char* prev_line = line; + line = memchr(line, '\n', remaining); + if (line != NULL) { + line++; // advance to character after newline + remaining -= (line - prev_line); + } + } + } + + malloc_close(fd); + return ret; +} + +static uintptr_t prof_main_thread_stack_start(const char* stat_path) { + uintptr_t stack_start = 0; + + int fd = malloc_open(stat_path, O_RDONLY); + if (fd < 0) { + return 0; + } + + char buf[512]; + ssize_t n = malloc_read_fd(fd, buf, sizeof(buf) - 1); + if (n >= 0) { + buf[n] = '\0'; + if (sscanf( + buf, + "%*d (%*[^)]) %*c %*d %*d %*d %*d %*d %*u %*u %*u %*u %*u %*u %*u %*d %*d %*d %*d %*d %*d %*u %*u %*d %*u %*u %*u %"FMTuPTR, + &stack_start) != 1) { + } + } + malloc_close(fd); + return stack_start; +} + +uintptr_t prof_thread_stack_start(uintptr_t stack_end) { + pid_t pid = getpid(); + pid_t tid = gettid(); + if (pid == tid) { + char stat_path[32]; // "/proc//stat" + malloc_snprintf(stat_path, sizeof(stat_path), "/proc/%d/stat", pid); + return prof_main_thread_stack_start(stat_path); + } else { + // NOTE: Prior to kernel 4.5 an entry for every thread stack was included in + // /proc//maps as [STACK:]. Starting with kernel 4.5 only the main + // thread stack remains as the [stack] mapping. For other thread stacks the + // mapping is still visible in /proc//task//maps (though not + // labeled as [STACK:tid]). + // https://lists.ubuntu.com/archives/kernel-team/2016-March/074681.html + char maps_path[64]; // "/proc//task//maps" + malloc_snprintf(maps_path, sizeof(maps_path), "/proc/%d/task/%d/maps", pid, tid); + + uintptr_t mm_start, mm_end; + if (prof_mapping_containing_addr( + stack_end, maps_path, &mm_start, &mm_end) != 0) { + return 0; + } + return mm_end; + } +} + +#else + +uintptr_t prof_thread_stack_start(UNUSED uintptr_t stack_end) { + return 0; +} + +#endif // __linux__ diff --git a/src/prof_sys.c b/src/prof_sys.c index 8a904040..f0bc8b4b 100644 --- a/src/prof_sys.c +++ b/src/prof_sys.c @@ -3,6 +3,7 @@ #include "jemalloc/internal/buf_writer.h" #include "jemalloc/internal/ctl.h" +#include "jemalloc/internal/malloc_io.h" #include "jemalloc/internal/prof_data.h" #include "jemalloc/internal/prof_sys.h" @@ -98,6 +99,45 @@ prof_backtrace_impl(void **vec, unsigned *len, unsigned max_len) { _Unwind_Backtrace(prof_unwind_callback, &data); } +#elif (defined(JEMALLOC_PROF_FRAME_POINTER)) +JEMALLOC_DIAGNOSTIC_PUSH +JEMALLOC_DIAGNOSTIC_IGNORE_FRAME_ADDRESS +static void +prof_backtrace_impl(void **vec, unsigned *len, unsigned max_len) { + // stack_start - highest possible valid stack address (assumption: stacks grow downward) + // stack_end - current stack frame and lowest possible valid stack address + // (all earlier frames will be at higher addresses than this) + + // always safe to get the current stack frame address + void** stack_end = (void**)__builtin_frame_address(0); + if (stack_end == NULL) { + *len = 0; + return; + } + + static __thread void **stack_start = (void **)0; // thread local + if (stack_start == 0 || stack_end >= stack_start) { + stack_start = (void**)prof_thread_stack_start((uintptr_t)stack_end); + } + + if (stack_start == 0 || stack_end >= stack_start) { + *len = 0; + return; + } + + unsigned ii = 0; + void** fp = (void**)stack_end; + while (fp < stack_start && ii < max_len) { + vec[ii++] = fp[1]; + void** fp_prev = fp; + fp = fp[0]; + if (unlikely(fp <= fp_prev)) { // sanity check forward progress + break; + } + } + *len = ii; +} +JEMALLOC_DIAGNOSTIC_POP #elif (defined(JEMALLOC_PROF_GCC)) JEMALLOC_DIAGNOSTIC_PUSH JEMALLOC_DIAGNOSTIC_IGNORE_FRAME_ADDRESS @@ -484,7 +524,7 @@ prof_getpid(void) { #endif } -long +static long prof_get_pid_namespace() { long ret = 0; diff --git a/src/stats.c b/src/stats.c index d5be92d3..1e607d9e 100644 --- a/src/stats.c +++ b/src/stats.c @@ -1467,6 +1467,7 @@ stats_general_print(emitter_t *emitter) { CONFIG_WRITE_BOOL(prof); CONFIG_WRITE_BOOL(prof_libgcc); CONFIG_WRITE_BOOL(prof_libunwind); + CONFIG_WRITE_BOOL(prof_frameptr); CONFIG_WRITE_BOOL(stats); CONFIG_WRITE_BOOL(utrace); CONFIG_WRITE_BOOL(xmalloc); diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c index ffe5c411..65e84370 100644 --- a/test/unit/mallctl.c +++ b/test/unit/mallctl.c @@ -255,6 +255,7 @@ TEST_BEGIN(test_mallctl_config) { TEST_MALLCTL_CONFIG(prof, bool); TEST_MALLCTL_CONFIG(prof_libgcc, bool); TEST_MALLCTL_CONFIG(prof_libunwind, bool); + TEST_MALLCTL_CONFIG(prof_frameptr, bool); TEST_MALLCTL_CONFIG(stats, bool); TEST_MALLCTL_CONFIG(utrace, bool); TEST_MALLCTL_CONFIG(xmalloc, bool);