Fast-path improvement: reduce # of branches and unnecessary operations.

- Combine multiple runtime branches into a single malloc_slow check.
- Avoid calling arena_choose / size2index / index2size on fast path.
- A few micro optimizations.
This commit is contained in:
Qi Wang 2015-10-27 15:12:10 -07:00 committed by Jason Evans
parent 710ca112e3
commit f4a0f32d34
11 changed files with 357 additions and 214 deletions

View file

@ -551,9 +551,9 @@ prof_gctx_create(tsd_t *tsd, prof_bt_t *bt)
/*
* Create a single allocation that has space for vec of length bt->len.
*/
prof_gctx_t *gctx = (prof_gctx_t *)iallocztm(tsd, offsetof(prof_gctx_t,
vec) + (bt->len * sizeof(void *)), false, tcache_get(tsd, true),
true, NULL);
size_t size = offsetof(prof_gctx_t, vec) + (bt->len * sizeof(void *));
prof_gctx_t *gctx = (prof_gctx_t *)iallocztm(tsd, size,
size2index(size), false, tcache_get(tsd, true), true, NULL, true);
if (gctx == NULL)
return (NULL);
gctx->lock = prof_gctx_mutex_choose();
@ -594,7 +594,7 @@ prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx,
prof_leave(tsd, tdata_self);
/* Destroy gctx. */
malloc_mutex_unlock(gctx->lock);
idalloctm(tsd, gctx, tcache_get(tsd, false), true);
idalloctm(tsd, gctx, tcache_get(tsd, false), true, true);
} else {
/*
* Compensate for increment in prof_tctx_destroy() or
@ -701,7 +701,7 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx)
prof_tdata_destroy(tsd, tdata, false);
if (destroy_tctx)
idalloctm(tsd, tctx, tcache_get(tsd, false), true);
idalloctm(tsd, tctx, tcache_get(tsd, false), true, true);
}
static bool
@ -730,7 +730,8 @@ prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
if (ckh_insert(tsd, &bt2gctx, btkey.v, gctx.v)) {
/* OOM. */
prof_leave(tsd, tdata);
idalloctm(tsd, gctx.v, tcache_get(tsd, false), true);
idalloctm(tsd, gctx.v, tcache_get(tsd, false), true,
true);
return (true);
}
new_gctx = true;
@ -789,8 +790,9 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt)
/* Link a prof_tctx_t into gctx for this thread. */
tcache = tcache_get(tsd, true);
ret.v = iallocztm(tsd, sizeof(prof_tctx_t), false, tcache, true,
NULL);
ret.v = iallocztm(tsd, sizeof(prof_tctx_t),
size2index(sizeof(prof_tctx_t)), false, tcache, true, NULL,
true);
if (ret.p == NULL) {
if (new_gctx)
prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
@ -810,7 +812,7 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt)
if (error) {
if (new_gctx)
prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
idalloctm(tsd, ret.v, tcache, true);
idalloctm(tsd, ret.v, tcache, true, true);
return (NULL);
}
malloc_mutex_lock(gctx->lock);
@ -1211,7 +1213,7 @@ prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs)
tctx_tree_remove(&gctx->tctxs,
to_destroy);
idalloctm(tsd, to_destroy,
tcache_get(tsd, false), true);
tcache_get(tsd, false), true, true);
} else
next = NULL;
} while (next != NULL);
@ -1714,8 +1716,8 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
/* Initialize an empty cache for this thread. */
tcache = tcache_get(tsd, true);
tdata = (prof_tdata_t *)iallocztm(tsd, sizeof(prof_tdata_t), false,
tcache, true, NULL);
tdata = (prof_tdata_t *)iallocztm(tsd, sizeof(prof_tdata_t),
size2index(sizeof(prof_tdata_t)), false, tcache, true, NULL, true);
if (tdata == NULL)
return (NULL);
@ -1729,7 +1731,7 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
if (ckh_new(tsd, &tdata->bt2tctx, PROF_CKH_MINITEMS,
prof_bt_hash, prof_bt_keycomp)) {
idalloctm(tsd, tdata, tcache, true);
idalloctm(tsd, tdata, tcache, true, true);
return (NULL);
}
@ -1784,9 +1786,9 @@ prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata,
tcache = tcache_get(tsd, false);
if (tdata->thread_name != NULL)
idalloctm(tsd, tdata->thread_name, tcache, true);
idalloctm(tsd, tdata->thread_name, tcache, true, true);
ckh_delete(tsd, &tdata->bt2tctx);
idalloctm(tsd, tdata, tcache, true);
idalloctm(tsd, tdata, tcache, true, true);
}
static void
@ -1947,7 +1949,8 @@ prof_thread_name_alloc(tsd_t *tsd, const char *thread_name)
if (size == 1)
return ("");
ret = iallocztm(tsd, size, false, tcache_get(tsd, true), true, NULL);
ret = iallocztm(tsd, size, size2index(size), false, tcache_get(tsd,
true), true, NULL, true);
if (ret == NULL)
return (NULL);
memcpy(ret, thread_name, size);
@ -1980,7 +1983,7 @@ prof_thread_name_set(tsd_t *tsd, const char *thread_name)
if (tdata->thread_name != NULL) {
idalloctm(tsd, tdata->thread_name, tcache_get(tsd, false),
true);
true, true);
tdata->thread_name = NULL;
}
if (strlen(s) > 0)