diff --git a/jemalloc/doc/jemalloc.xml.in b/jemalloc/doc/jemalloc.xml.in
index 97893c16..2bde8904 100644
--- a/jemalloc/doc/jemalloc.xml.in
+++ b/jemalloc/doc/jemalloc.xml.in
@@ -1642,6 +1642,16 @@ malloc_conf = "xmalloc:true";]]>
+
+
+ stats.arenas.<i>.nthreads
+ (unsigned)
+ r-
+
+ Number of threads currently assigned to
+ arena.
+
+
stats.arenas.<i>.pactive
diff --git a/jemalloc/include/jemalloc/internal/arena.h b/jemalloc/include/jemalloc/internal/arena.h
index 1744b45b..94b7f3d9 100644
--- a/jemalloc/include/jemalloc/internal/arena.h
+++ b/jemalloc/include/jemalloc/internal/arena.h
@@ -295,8 +295,18 @@ struct arena_s {
unsigned ind;
/*
- * All non-bin-related operations on this arena require that lock be
- * locked.
+ * Number of threads currently assigned to this arena. This field is
+ * protected by arenas_lock.
+ */
+ unsigned nthreads;
+
+ /*
+ * There are three classes of arena operations from a locking
+ * perspective:
+ * 1) Thread asssignment (modifies nthreads) is protected by
+ * arenas_lock.
+ * 2) Bin-related operations are protected by bin locks.
+ * 3) Chunk- and run-related operations are protected by this mutex.
*/
malloc_mutex_t lock;
diff --git a/jemalloc/include/jemalloc/internal/ctl.h b/jemalloc/include/jemalloc/internal/ctl.h
index 8776ad13..f1f5eb70 100644
--- a/jemalloc/include/jemalloc/internal/ctl.h
+++ b/jemalloc/include/jemalloc/internal/ctl.h
@@ -29,6 +29,7 @@ struct ctl_node_s {
struct ctl_arena_stats_s {
bool initialized;
+ unsigned nthreads;
size_t pactive;
size_t pdirty;
#ifdef JEMALLOC_STATS
diff --git a/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in b/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in
index a80fc7cd..a7472c00 100644
--- a/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in
@@ -293,6 +293,7 @@ extern size_t lg_pagesize;
extern unsigned ncpus;
extern malloc_mutex_t arenas_lock; /* Protects arenas initialization. */
+extern pthread_key_t arenas_tsd;
#ifndef NO_TLS
/*
* Map of pthread_self() --> arenas[???], used for selecting an arena to use
@@ -302,9 +303,9 @@ extern __thread arena_t *arenas_tls JEMALLOC_ATTR(tls_model("initial-exec"));
# define ARENA_GET() arenas_tls
# define ARENA_SET(v) do { \
arenas_tls = (v); \
+ pthread_setspecific(arenas_tsd, (void *)(v)); \
} while (0)
#else
-extern pthread_key_t arenas_tsd;
# define ARENA_GET() ((arena_t *)pthread_getspecific(arenas_tsd))
# define ARENA_SET(v) do { \
pthread_setspecific(arenas_tsd, (void *)(v)); \
diff --git a/jemalloc/src/arena.c b/jemalloc/src/arena.c
index a1fa2a32..022f9ec3 100644
--- a/jemalloc/src/arena.c
+++ b/jemalloc/src/arena.c
@@ -2175,6 +2175,7 @@ arena_new(arena_t *arena, unsigned ind)
arena_bin_t *bin;
arena->ind = ind;
+ arena->nthreads = 0;
if (malloc_mutex_init(&arena->lock))
return (true);
diff --git a/jemalloc/src/ctl.c b/jemalloc/src/ctl.c
index c32e955e..b4f280dc 100644
--- a/jemalloc/src/ctl.c
+++ b/jemalloc/src/ctl.c
@@ -182,6 +182,7 @@ CTL_PROTO(stats_arenas_i_lruns_j_highruns)
CTL_PROTO(stats_arenas_i_lruns_j_curruns)
INDEX_PROTO(stats_arenas_i_lruns_j)
#endif
+CTL_PROTO(stats_arenas_i_nthreads)
CTL_PROTO(stats_arenas_i_pactive)
CTL_PROTO(stats_arenas_i_pdirty)
#ifdef JEMALLOC_STATS
@@ -434,6 +435,7 @@ static const ctl_node_t stats_arenas_i_lruns_node[] = {
#endif
static const ctl_node_t stats_arenas_i_node[] = {
+ {NAME("nthreads"), CTL(stats_arenas_i_nthreads)},
{NAME("pactive"), CTL(stats_arenas_i_pactive)},
{NAME("pdirty"), CTL(stats_arenas_i_pdirty)}
#ifdef JEMALLOC_STATS
@@ -620,6 +622,7 @@ ctl_arena_refresh(arena_t *arena, unsigned i)
ctl_arena_clear(astats);
+ sstats->nthreads += astats->nthreads;
#ifdef JEMALLOC_STATS
ctl_arena_stats_amerge(astats, arena);
/* Merge into sum stats as well. */
@@ -657,10 +660,17 @@ ctl_refresh(void)
* Clear sum stats, since they will be merged into by
* ctl_arena_refresh().
*/
+ ctl_stats.arenas[narenas].nthreads = 0;
ctl_arena_clear(&ctl_stats.arenas[narenas]);
malloc_mutex_lock(&arenas_lock);
memcpy(tarenas, arenas, sizeof(arena_t *) * narenas);
+ for (i = 0; i < narenas; i++) {
+ if (arenas[i] != NULL)
+ ctl_stats.arenas[i].nthreads = arenas[i]->nthreads;
+ else
+ ctl_stats.arenas[i].nthreads = 0;
+ }
malloc_mutex_unlock(&arenas_lock);
for (i = 0; i < narenas; i++) {
bool initialized = (tarenas[i] != NULL);
@@ -1129,6 +1139,8 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
malloc_mutex_lock(&arenas_lock);
if ((arena = arenas[newind]) == NULL)
arena = arenas_extend(newind);
+ arenas[oldind]->nthreads--;
+ arenas[newind]->nthreads++;
malloc_mutex_unlock(&arenas_lock);
if (arena == NULL) {
ret = EAGAIN;
@@ -1536,6 +1548,7 @@ stats_arenas_i_lruns_j_index(const size_t *mib, size_t miblen, size_t j)
}
#endif
+CTL_RO_GEN(stats_arenas_i_nthreads, ctl_stats.arenas[mib[2]].nthreads, unsigned)
CTL_RO_GEN(stats_arenas_i_pactive, ctl_stats.arenas[mib[2]].pactive, size_t)
CTL_RO_GEN(stats_arenas_i_pdirty, ctl_stats.arenas[mib[2]].pdirty, size_t)
#ifdef JEMALLOC_STATS
diff --git a/jemalloc/src/jemalloc.c b/jemalloc/src/jemalloc.c
index 9f2fa92e..ecd521c9 100644
--- a/jemalloc/src/jemalloc.c
+++ b/jemalloc/src/jemalloc.c
@@ -7,12 +7,10 @@
malloc_mutex_t arenas_lock;
arena_t **arenas;
unsigned narenas;
-static unsigned next_arena;
+pthread_key_t arenas_tsd;
#ifndef NO_TLS
__thread arena_t *arenas_tls JEMALLOC_ATTR(tls_model("initial-exec"));
-#else
-pthread_key_t arenas_tsd;
#endif
#ifdef JEMALLOC_STATS
@@ -70,6 +68,7 @@ size_t opt_narenas = 0;
static void wrtmessage(void *cbopaque, const char *s);
static void stats_print_atexit(void);
static unsigned malloc_ncpus(void);
+static void arenas_cleanup(void *arg);
#if (defined(JEMALLOC_STATS) && defined(NO_TLS))
static void thread_allocated_cleanup(void *arg);
#endif
@@ -147,13 +146,53 @@ choose_arena_hard(void)
arena_t *ret;
if (narenas > 1) {
+ unsigned i, choose, first_null;
+
+ choose = 0;
+ first_null = narenas;
malloc_mutex_lock(&arenas_lock);
- if ((ret = arenas[next_arena]) == NULL)
- ret = arenas_extend(next_arena);
- next_arena = (next_arena + 1) % narenas;
+ assert(arenas[i] != NULL);
+ for (i = 1; i < narenas; i++) {
+ if (arenas[i] != NULL) {
+ /*
+ * Choose the first arena that has the lowest
+ * number of threads assigned to it.
+ */
+ if (arenas[i]->nthreads <
+ arenas[choose]->nthreads)
+ choose = i;
+ } else if (first_null == narenas) {
+ /*
+ * Record the index of the first uninitialized
+ * arena, in case all extant arenas are in use.
+ *
+ * NB: It is possible for there to be
+ * discontinuities in terms of initialized
+ * versus uninitialized arenas, due to the
+ * "thread.arena" mallctl.
+ */
+ first_null = i;
+ }
+ }
+
+ if (arenas[choose] == 0 || first_null == narenas) {
+ /*
+ * Use an unloaded arena, or the least loaded arena if
+ * all arenas are already initialized.
+ */
+ ret = arenas[choose];
+ } else {
+ /* Initialize a new arena. */
+ ret = arenas_extend(first_null);
+ }
+ ret->nthreads++;
malloc_mutex_unlock(&arenas_lock);
- } else
+ } else {
ret = arenas[0];
+ malloc_mutex_lock(&arenas_lock);
+ ret->nthreads++;
+ malloc_mutex_unlock(&arenas_lock);
+ }
ARENA_SET(ret);
@@ -259,6 +298,16 @@ malloc_ncpus(void)
return (ret);
}
+static void
+arenas_cleanup(void *arg)
+{
+ arena_t *arena = (arena_t *)arg;
+
+ malloc_mutex_lock(&arenas_lock);
+ arena->nthreads--;
+ malloc_mutex_unlock(&arenas_lock);
+}
+
#if (defined(JEMALLOC_STATS) && defined(NO_TLS))
static void
thread_allocated_cleanup(void *arg)
@@ -737,6 +786,7 @@ malloc_init_hard(void)
* threaded mode.
*/
ARENA_SET(arenas[0]);
+ arenas[0]->nthreads++;
if (malloc_mutex_init(&arenas_lock))
return (true);
@@ -779,14 +829,10 @@ malloc_init_hard(void)
malloc_write(")\n");
}
- next_arena = (narenas > 0) ? 1 : 0;
-
-#ifdef NO_TLS
- if (pthread_key_create(&arenas_tsd, NULL) != 0) {
+ if (pthread_key_create(&arenas_tsd, arenas_cleanup) != 0) {
malloc_mutex_unlock(&init_lock);
return (true);
}
-#endif
/* Allocate and initialize arenas. */
arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas);
@@ -819,7 +865,6 @@ malloc_init_hard(void)
return (false);
}
-
#ifdef JEMALLOC_ZONE
JEMALLOC_ATTR(constructor)
void
diff --git a/jemalloc/src/stats.c b/jemalloc/src/stats.c
index 3dfe0d23..81105c45 100644
--- a/jemalloc/src/stats.c
+++ b/jemalloc/src/stats.c
@@ -319,6 +319,7 @@ static void
stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
unsigned i)
{
+ unsigned nthreads;
size_t pagesize, pactive, pdirty, mapped;
uint64_t npurge, nmadvise, purged;
size_t small_allocated;
@@ -328,6 +329,9 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
CTL_GET("arenas.pagesize", &pagesize, size_t);
+ CTL_I_GET("stats.arenas.0.nthreads", &nthreads, unsigned);
+ malloc_cprintf(write_cb, cbopaque,
+ "assigned threads: %u\n", nthreads);
CTL_I_GET("stats.arenas.0.pactive", &pactive, size_t);
CTL_I_GET("stats.arenas.0.pdirty", &pdirty, size_t);
CTL_I_GET("stats.arenas.0.npurge", &npurge, uint64_t);