mirror of
https://github.com/jemalloc/jemalloc.git
synced 2026-04-14 14:41:42 +03:00
Fix frame pointer based unwinder to handle changing stack range
This commit is contained in:
parent
ad108d50f1
commit
773b5809f9
3 changed files with 233 additions and 167 deletions
|
|
@ -20,7 +20,7 @@ void prof_fdump_impl(tsd_t *tsd);
|
|||
void prof_idump_impl(tsd_t *tsd);
|
||||
bool prof_mdump_impl(tsd_t *tsd, const char *filename);
|
||||
void prof_gdump_impl(tsd_t *tsd);
|
||||
uintptr_t prof_thread_stack_start(uintptr_t stack_end);
|
||||
int prof_thread_stack_range(uintptr_t fp, uintptr_t *low, uintptr_t *high);
|
||||
|
||||
/* Used in unit tests. */
|
||||
typedef int (prof_sys_thread_name_read_t)(char *buf, size_t limit);
|
||||
|
|
|
|||
|
|
@ -4,158 +4,163 @@
|
|||
#include "jemalloc/internal/malloc_io.h"
|
||||
#include "jemalloc/internal/prof_sys.h"
|
||||
|
||||
#if defined (__linux__) && defined(JEMALLOC_HAVE_GETTID)
|
||||
#if defined(__linux__) && defined(JEMALLOC_HAVE_GETTID)
|
||||
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h> // strtoul
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
# include <errno.h>
|
||||
# include <fcntl.h>
|
||||
# include <stdio.h>
|
||||
# include <stdlib.h> // strtoul
|
||||
# include <string.h>
|
||||
# include <unistd.h>
|
||||
|
||||
static int prof_mapping_containing_addr(
|
||||
uintptr_t addr,
|
||||
const char* maps_path,
|
||||
uintptr_t* mm_start,
|
||||
uintptr_t* mm_end) {
|
||||
int ret = ENOENT; // not found
|
||||
*mm_start = *mm_end = 0;
|
||||
|
||||
// Each line of /proc/<pid>/maps is:
|
||||
// <start>-<end> <perms> <offset> <dev> <inode> <pathname>
|
||||
//
|
||||
// The fields we care about are always within the first 34 characters so
|
||||
// as long as `buf` contains the start of a mapping line it can always be
|
||||
// parsed.
|
||||
static const int kMappingFieldsWidth = 34;
|
||||
|
||||
int fd = -1;
|
||||
char buf[4096];
|
||||
ssize_t remaining = 0; // actual number of bytes read to buf
|
||||
char* line = NULL;
|
||||
|
||||
while (1) {
|
||||
if (fd < 0) {
|
||||
// case 0: initial open of maps file
|
||||
fd = malloc_open(maps_path, O_RDONLY);
|
||||
if (fd < 0) {
|
||||
return errno;
|
||||
}
|
||||
|
||||
remaining = malloc_read_fd(fd, buf, sizeof(buf));
|
||||
if (remaining <= 0) {
|
||||
break;
|
||||
}
|
||||
line = buf;
|
||||
} else if (line == NULL) {
|
||||
// case 1: no newline found in buf
|
||||
remaining = malloc_read_fd(fd, buf, sizeof(buf));
|
||||
if (remaining <= 0) {
|
||||
break;
|
||||
}
|
||||
line = memchr(buf, '\n', remaining);
|
||||
if (line != NULL) {
|
||||
line++; // advance to character after newline
|
||||
remaining -= (line - buf);
|
||||
}
|
||||
} else if (line != NULL && remaining < kMappingFieldsWidth) {
|
||||
// case 2: found newline but insufficient characters remaining in buf
|
||||
|
||||
// fd currently points to the character immediately after the last
|
||||
// character in buf. Seek fd to the character after the newline.
|
||||
if (malloc_lseek(fd, -remaining, SEEK_CUR) == -1) {
|
||||
ret = errno;
|
||||
break;
|
||||
}
|
||||
|
||||
remaining = malloc_read_fd(fd, buf, sizeof(buf));
|
||||
if (remaining <= 0) {
|
||||
break;
|
||||
}
|
||||
line = buf;
|
||||
} else {
|
||||
// case 3: found newline and sufficient characters to parse
|
||||
|
||||
// parse <start>-<end>
|
||||
char* tmp = line;
|
||||
uintptr_t start_addr = strtoul(tmp, &tmp, 16);
|
||||
if (addr >= start_addr) {
|
||||
tmp++; // advance to character after '-'
|
||||
uintptr_t end_addr = strtoul(tmp, &tmp, 16);
|
||||
if (addr < end_addr) {
|
||||
*mm_start = start_addr;
|
||||
*mm_end = end_addr;
|
||||
ret = 0;
|
||||
break;
|
||||
/*
|
||||
* Converts a string representing a hexadecimal number to an unsigned long long
|
||||
* integer. Functionally equivalent to strtoull() (for base 16) but faster for
|
||||
* that case.
|
||||
*
|
||||
* @param nptr Pointer to the string to be converted.
|
||||
* @param endptr Pointer to a pointer to character, which will be set to the
|
||||
* character in `nptr` where parsing stopped. Can be NULL.
|
||||
* @return The converted unsigned long long integer value.
|
||||
*/
|
||||
static inline unsigned long long int
|
||||
strtoull_hex(const char *nptr, char **endptr) {
|
||||
unsigned long long int val = 0;
|
||||
int ii = 0;
|
||||
for (; ii < 16; ++ii) {
|
||||
char c = nptr[ii];
|
||||
if (c >= '0' && c <= '9') {
|
||||
val = (val << 4) + (c - '0');
|
||||
} else if (c >= 'a' && c <= 'f') {
|
||||
val = (val << 4) + (c - 'a' + 10);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Advance to character after next newline in the current buf.
|
||||
char* prev_line = line;
|
||||
line = memchr(line, '\n', remaining);
|
||||
if (line != NULL) {
|
||||
line++; // advance to character after newline
|
||||
remaining -= (line - prev_line);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
malloc_close(fd);
|
||||
return ret;
|
||||
if (endptr) {
|
||||
*endptr = (char *)(nptr + ii);
|
||||
}
|
||||
return val;
|
||||
}
|
||||
|
||||
static uintptr_t prof_main_thread_stack_start(const char* stat_path) {
|
||||
uintptr_t stack_start = 0;
|
||||
static int
|
||||
prof_mapping_containing_addr(uintptr_t addr, const char *maps_path,
|
||||
uintptr_t *mm_start, uintptr_t *mm_end) {
|
||||
int ret = ENOENT; /* not found */
|
||||
*mm_start = *mm_end = 0;
|
||||
|
||||
int fd = malloc_open(stat_path, O_RDONLY);
|
||||
if (fd < 0) {
|
||||
return 0;
|
||||
}
|
||||
/*
|
||||
* Each line of /proc/<pid>/maps is:
|
||||
* <start>-<end> <perms> <offset> <dev> <inode> <pathname>
|
||||
*
|
||||
* The fields we care about are always within the first 34 characters so
|
||||
* as long as `buf` contains the start of a mapping line it can always be
|
||||
* parsed.
|
||||
*/
|
||||
static const int kMappingFieldsWidth = 34;
|
||||
|
||||
char buf[512];
|
||||
ssize_t n = malloc_read_fd(fd, buf, sizeof(buf) - 1);
|
||||
if (n >= 0) {
|
||||
buf[n] = '\0';
|
||||
if (sscanf(
|
||||
buf,
|
||||
"%*d (%*[^)]) %*c %*d %*d %*d %*d %*d %*u %*u %*u %*u %*u %*u %*u %*d %*d %*d %*d %*d %*d %*u %*u %*d %*u %*u %*u %"FMTuPTR,
|
||||
&stack_start) != 1) {
|
||||
int fd = -1;
|
||||
char buf[4096];
|
||||
ssize_t remaining = 0; /* actual number of bytes read to buf */
|
||||
char *line = NULL;
|
||||
|
||||
while (1) {
|
||||
if (fd < 0) {
|
||||
/* case 0: initial open of maps file */
|
||||
fd = malloc_open(maps_path, O_RDONLY);
|
||||
if (fd < 0) {
|
||||
return errno;
|
||||
}
|
||||
|
||||
remaining = malloc_read_fd(fd, buf, sizeof(buf));
|
||||
if (remaining <= 0) {
|
||||
ret = errno;
|
||||
break;
|
||||
}
|
||||
line = buf;
|
||||
} else if (line == NULL) {
|
||||
/* case 1: no newline found in buf */
|
||||
remaining = malloc_read_fd(fd, buf, sizeof(buf));
|
||||
if (remaining <= 0) {
|
||||
ret = errno;
|
||||
break;
|
||||
}
|
||||
line = memchr(buf, '\n', remaining);
|
||||
if (line != NULL) {
|
||||
line++; /* advance to character after newline */
|
||||
remaining -= (line - buf);
|
||||
}
|
||||
} else if (line != NULL && remaining < kMappingFieldsWidth) {
|
||||
/*
|
||||
* case 2: found newline but insufficient characters remaining in
|
||||
* buf
|
||||
*/
|
||||
memcpy(buf, line,
|
||||
remaining); /* copy remaining characters to start of buf */
|
||||
line = buf;
|
||||
|
||||
size_t count =
|
||||
malloc_read_fd(fd, buf + remaining, sizeof(buf) - remaining);
|
||||
if (count <= 0) {
|
||||
ret = errno;
|
||||
break;
|
||||
}
|
||||
|
||||
remaining += count; /* actual number of bytes read to buf */
|
||||
} else {
|
||||
/* case 3: found newline and sufficient characters to parse */
|
||||
|
||||
/* parse <start>-<end> */
|
||||
char *tmp = line;
|
||||
uintptr_t start_addr = (uintptr_t)strtoull_hex(tmp, &tmp);
|
||||
if (addr >= start_addr) {
|
||||
tmp++; /* advance to character after '-' */
|
||||
uintptr_t end_addr = (uintptr_t)strtoull_hex(tmp, NULL);
|
||||
if (addr < end_addr) {
|
||||
*mm_start = start_addr;
|
||||
*mm_end = end_addr;
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Advance to character after next newline in the current buf. */
|
||||
char *prev_line = line;
|
||||
line = memchr(line, '\n', remaining);
|
||||
if (line != NULL) {
|
||||
line++; /* advance to character after newline */
|
||||
remaining -= (line - prev_line);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
malloc_close(fd);
|
||||
return stack_start;
|
||||
|
||||
malloc_close(fd);
|
||||
return ret;
|
||||
}
|
||||
|
||||
uintptr_t prof_thread_stack_start(uintptr_t stack_end) {
|
||||
pid_t pid = getpid();
|
||||
pid_t tid = gettid();
|
||||
if (pid == tid) {
|
||||
char stat_path[32]; // "/proc/<pid>/stat"
|
||||
malloc_snprintf(stat_path, sizeof(stat_path), "/proc/%d/stat", pid);
|
||||
return prof_main_thread_stack_start(stat_path);
|
||||
} else {
|
||||
// NOTE: Prior to kernel 4.5 an entry for every thread stack was included in
|
||||
// /proc/<pid>/maps as [STACK:<tid>]. Starting with kernel 4.5 only the main
|
||||
// thread stack remains as the [stack] mapping. For other thread stacks the
|
||||
// mapping is still visible in /proc/<pid>/task/<tid>/maps (though not
|
||||
// labeled as [STACK:tid]).
|
||||
// https://lists.ubuntu.com/archives/kernel-team/2016-March/074681.html
|
||||
char maps_path[64]; // "/proc/<pid>/task/<tid>/maps"
|
||||
malloc_snprintf(maps_path, sizeof(maps_path), "/proc/%d/task/%d/maps", pid, tid);
|
||||
|
||||
uintptr_t mm_start, mm_end;
|
||||
if (prof_mapping_containing_addr(
|
||||
stack_end, maps_path, &mm_start, &mm_end) != 0) {
|
||||
return 0;
|
||||
}
|
||||
return mm_end;
|
||||
}
|
||||
int
|
||||
prof_thread_stack_range(uintptr_t fp, uintptr_t *low, uintptr_t *high) {
|
||||
/*
|
||||
* NOTE: Prior to kernel 4.5 an entry for every thread stack was included in
|
||||
* /proc/<pid>/maps as [STACK:<tid>]. Starting with kernel 4.5 only the main
|
||||
* thread stack remains as the [stack] mapping. For other thread stacks the
|
||||
* mapping is still visible in /proc/<pid>/task/<tid>/maps (though not
|
||||
* labeled as [STACK:tid]).
|
||||
* https://lists.ubuntu.com/archives/kernel-team/2016-March/074681.html
|
||||
*/
|
||||
char maps_path[64]; // "/proc/<pid>/task/<tid>/maps"
|
||||
malloc_snprintf(maps_path, sizeof(maps_path), "/proc/%d/task/%d/maps",
|
||||
getpid(), gettid());
|
||||
return prof_mapping_containing_addr(fp, maps_path, low, high);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
uintptr_t prof_thread_stack_start(UNUSED uintptr_t stack_end) {
|
||||
return 0;
|
||||
int
|
||||
prof_thread_stack_range(
|
||||
UNUSED uintptr_t addr, uintptr_t *stack_start, uintptr_t *stack_end) {
|
||||
*stack_start = *stack_end = 0;
|
||||
return ENOENT;
|
||||
}
|
||||
|
||||
#endif // __linux__
|
||||
#endif // __linux__
|
||||
|
|
|
|||
117
src/prof_sys.c
117
src/prof_sys.c
|
|
@ -23,6 +23,11 @@
|
|||
#define _Unwind_Backtrace JEMALLOC_TEST_HOOK(_Unwind_Backtrace, test_hooks_libc_hook)
|
||||
#endif
|
||||
|
||||
#ifdef JEMALLOC_PROF_FRAME_POINTER
|
||||
// execinfo backtrace() as fallback unwinder
|
||||
#include <execinfo.h>
|
||||
#endif
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
malloc_mutex_t prof_dump_filename_mtx;
|
||||
|
|
@ -102,41 +107,97 @@ prof_backtrace_impl(void **vec, unsigned *len, unsigned max_len) {
|
|||
#elif (defined(JEMALLOC_PROF_FRAME_POINTER))
|
||||
JEMALLOC_DIAGNOSTIC_PUSH
|
||||
JEMALLOC_DIAGNOSTIC_IGNORE_FRAME_ADDRESS
|
||||
|
||||
struct stack_range {
|
||||
uintptr_t start;
|
||||
uintptr_t end;
|
||||
};
|
||||
|
||||
struct thread_unwind_info {
|
||||
struct stack_range stack_range;
|
||||
bool fallback;
|
||||
};
|
||||
static __thread struct thread_unwind_info unwind_info = {
|
||||
.stack_range = {
|
||||
.start = 0,
|
||||
.end = 0,
|
||||
},
|
||||
.fallback = false,
|
||||
}; /* thread local */
|
||||
|
||||
static void
|
||||
prof_backtrace_impl(void **vec, unsigned *len, unsigned max_len) {
|
||||
// stack_start - highest possible valid stack address (assumption: stacks grow downward)
|
||||
// stack_end - current stack frame and lowest possible valid stack address
|
||||
// (all earlier frames will be at higher addresses than this)
|
||||
/* fp: current stack frame pointer
|
||||
*
|
||||
* stack_range: readable stack memory range for the current thread.
|
||||
* Used to validate frame addresses during stack unwinding.
|
||||
* For most threads there is a single valid stack range
|
||||
* that is fixed at thread creation time. This may not be
|
||||
* the case when folly fibers or boost contexts are used.
|
||||
* In those cases fall back to using execinfo backtrace()
|
||||
* (DWARF unwind).
|
||||
*/
|
||||
|
||||
// always safe to get the current stack frame address
|
||||
void** stack_end = (void**)__builtin_frame_address(0);
|
||||
if (stack_end == NULL) {
|
||||
*len = 0;
|
||||
return;
|
||||
}
|
||||
/* always safe to get the current stack frame address */
|
||||
uintptr_t fp = (uintptr_t)__builtin_frame_address(0);
|
||||
|
||||
static __thread void **stack_start = (void **)0; // thread local
|
||||
if (stack_start == 0 || stack_end >= stack_start) {
|
||||
stack_start = (void**)prof_thread_stack_start((uintptr_t)stack_end);
|
||||
}
|
||||
/* new thread - get the stack range */
|
||||
if (!unwind_info.fallback &&
|
||||
unwind_info.stack_range.start == unwind_info.stack_range.end) {
|
||||
if (prof_thread_stack_range(fp, &unwind_info.stack_range.start,
|
||||
&unwind_info.stack_range.end) != 0) {
|
||||
unwind_info.fallback = true;
|
||||
} else {
|
||||
assert(fp >= unwind_info.stack_range.start
|
||||
&& fp < unwind_info.stack_range.end);
|
||||
}
|
||||
}
|
||||
|
||||
if (stack_start == 0 || stack_end >= stack_start) {
|
||||
*len = 0;
|
||||
return;
|
||||
}
|
||||
if (unwind_info.fallback) {
|
||||
goto label_fallback;
|
||||
}
|
||||
|
||||
unsigned ii = 0;
|
||||
void** fp = (void**)stack_end;
|
||||
while (fp < stack_start && ii < max_len) {
|
||||
vec[ii++] = fp[1];
|
||||
void** fp_prev = fp;
|
||||
fp = fp[0];
|
||||
if (unlikely(fp <= fp_prev)) { // sanity check forward progress
|
||||
break;
|
||||
}
|
||||
}
|
||||
*len = ii;
|
||||
unsigned ii = 0;
|
||||
while (ii < max_len && fp != 0) {
|
||||
if (fp < unwind_info.stack_range.start ||
|
||||
fp >= unwind_info.stack_range.end) {
|
||||
/*
|
||||
* Determining the stack range from procfs can be
|
||||
* relatively expensive especially for programs with
|
||||
* many threads / shared libraries. If the stack
|
||||
* range has changed, it is likely to change again
|
||||
* in the future (fibers or some other stack
|
||||
* manipulation). So fall back to backtrace for this
|
||||
* thread.
|
||||
*/
|
||||
unwind_info.fallback = true;
|
||||
goto label_fallback;
|
||||
}
|
||||
void* ip = ((void **)fp)[1];
|
||||
if (ip == 0) {
|
||||
break;
|
||||
}
|
||||
vec[ii++] = ip;
|
||||
fp = ((uintptr_t *)fp)[0];
|
||||
}
|
||||
*len = ii;
|
||||
return;
|
||||
|
||||
label_fallback:
|
||||
/*
|
||||
* Using the backtrace from execinfo.h here. Note that it may get
|
||||
* redirected to libunwind when a libunwind not built with build-time
|
||||
* flag --disable-weak-backtrace is linked.
|
||||
*/
|
||||
assert(unwind_info.fallback);
|
||||
int nframes = backtrace(vec, max_len);
|
||||
if (nframes > 0) {
|
||||
*len = nframes;
|
||||
} else {
|
||||
*len = 0;
|
||||
}
|
||||
}
|
||||
|
||||
JEMALLOC_DIAGNOSTIC_POP
|
||||
#elif (defined(JEMALLOC_PROF_GCC))
|
||||
JEMALLOC_DIAGNOSTIC_PUSH
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue