mirror of
https://github.com/jemalloc/jemalloc.git
synced 2026-05-14 17:06:22 +03:00
Remove obsolete TSD recompute state
This commit is contained in:
parent
ff2c2548a3
commit
42e671433f
4 changed files with 19 additions and 96 deletions
|
|
@ -37,13 +37,12 @@ TSD_DATA_SLOWER
|
|||
JEMALLOC_ALWAYS_INLINE t * \
|
||||
tsd_##n##p_get(tsd_t *tsd) { \
|
||||
/* \
|
||||
* Because the state might change asynchronously if it's \
|
||||
* nominal, we need to make sure that we only read it once. \
|
||||
* Read the state once, so that a transition between \
|
||||
* nominal states does not confuse this assertion. \
|
||||
*/ \
|
||||
uint8_t state = tsd_state_get(tsd); \
|
||||
assert(state == tsd_state_nominal || \
|
||||
state == tsd_state_nominal_slow || \
|
||||
state == tsd_state_nominal_recompute || \
|
||||
state == tsd_state_reincarnated || \
|
||||
state == tsd_state_minimal_initialized); \
|
||||
return tsd_##n##p_get_unsafe(tsd); \
|
||||
|
|
@ -95,11 +94,6 @@ TSD_DATA_SLOWER
|
|||
|
||||
JEMALLOC_ALWAYS_INLINE void
|
||||
tsd_assert_fast(tsd_t *tsd) {
|
||||
/*
|
||||
* Note that our fastness assertion does *not* include global slowness
|
||||
* counters; it's not in general possible to ensure that they won't
|
||||
* change asynchronously from underneath us.
|
||||
*/
|
||||
assert(!malloc_slow && tsd_tcache_enabled_get(tsd)
|
||||
&& tsd_reentrancy_level_get(tsd) == 0);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -157,39 +157,29 @@ enum {
|
|||
tsd_state_nominal = 0,
|
||||
/* Initialized but on slow path. */
|
||||
tsd_state_nominal_slow = 1,
|
||||
/*
|
||||
* Some thread has changed global state in such a way that all nominal
|
||||
* threads need to recompute their fast / slow status the next time they
|
||||
* get a chance.
|
||||
*
|
||||
* Any thread can change another thread's status *to* recompute, but
|
||||
* threads are the only ones who can change their status *from*
|
||||
* recompute.
|
||||
*/
|
||||
tsd_state_nominal_recompute = 2,
|
||||
/*
|
||||
* The above nominal states should be lower values. We use
|
||||
* tsd_nominal_max to separate nominal states from threads in the
|
||||
* process of being born / dying.
|
||||
*/
|
||||
tsd_state_nominal_max = 2,
|
||||
tsd_state_nominal_max = 1,
|
||||
|
||||
/*
|
||||
* A thread might free() during its death as its only allocator action;
|
||||
* in such scenarios, we need tsd, but set up in such a way that no
|
||||
* cleanup is necessary.
|
||||
*/
|
||||
tsd_state_minimal_initialized = 3,
|
||||
tsd_state_minimal_initialized = 2,
|
||||
/* States during which we know we're in thread death. */
|
||||
tsd_state_purgatory = 4,
|
||||
tsd_state_reincarnated = 5,
|
||||
tsd_state_purgatory = 3,
|
||||
tsd_state_reincarnated = 4,
|
||||
/*
|
||||
* What it says on the tin; tsd that hasn't been initialized. Note
|
||||
* that even when the tsd struct lives in TLS, when need to keep track
|
||||
* of stuff like whether or not our pthread destructors have been
|
||||
* scheduled, so this really truly is different than the nominal state.
|
||||
*/
|
||||
tsd_state_uninitialized = 6
|
||||
tsd_state_uninitialized = 5
|
||||
};
|
||||
|
||||
/*
|
||||
|
|
@ -203,12 +193,10 @@ enum {
|
|||
# define tsd_state_t atomic_u8_t
|
||||
# define tsd_atomic_load atomic_load_u8
|
||||
# define tsd_atomic_store atomic_store_u8
|
||||
# define tsd_atomic_exchange atomic_exchange_u8
|
||||
#else
|
||||
# define tsd_state_t atomic_u32_t
|
||||
# define tsd_atomic_load atomic_load_u32
|
||||
# define tsd_atomic_store atomic_store_u32
|
||||
# define tsd_atomic_exchange atomic_exchange_u32
|
||||
#endif
|
||||
|
||||
/* The actual tsd. */
|
||||
|
|
|
|||
|
|
@ -101,53 +101,6 @@ te_assert_invariants_debug(tsd_t *tsd) {
|
|||
te_assert_invariants_impl(tsd, &ctx);
|
||||
}
|
||||
|
||||
/*
|
||||
* Synchronization around the fast threshold in tsd --
|
||||
* There are two threads to consider in the synchronization here:
|
||||
* - The owner of the tsd being updated by a slow path change
|
||||
* - The remote thread, doing that slow path change.
|
||||
*
|
||||
* As a design constraint, we want to ensure that a slow-path transition cannot
|
||||
* be ignored for arbitrarily long, and that if the remote thread causes a
|
||||
* slow-path transition and then communicates with the owner thread that it has
|
||||
* occurred, then the owner will go down the slow path on the next allocator
|
||||
* operation (so that we don't want to just wait until the owner hits its slow
|
||||
* path reset condition on its own).
|
||||
*
|
||||
* Here's our strategy to do that:
|
||||
*
|
||||
* The remote thread will update the slow-path stores to TSD variables, issue a
|
||||
* SEQ_CST fence, and then update the TSD next_event_fast counter. The owner
|
||||
* thread will update next_event_fast, issue an SEQ_CST fence, and then check
|
||||
* its TSD to see if it's on the slow path.
|
||||
|
||||
* This is fairly straightforward when 64-bit atomics are supported. Assume that
|
||||
* the remote fence is sandwiched between two owner fences in the reset pathway.
|
||||
* The case where there is no preceding or trailing owner fence (i.e. because
|
||||
* the owner thread is near the beginning or end of its life) can be analyzed
|
||||
* similarly. The owner store to next_event_fast preceding the earlier owner
|
||||
* fence will be earlier in coherence order than the remote store to it, so that
|
||||
* the owner thread will go down the slow path once the store becomes visible to
|
||||
* it, which is no later than the time of the second fence.
|
||||
|
||||
* The case where we don't support 64-bit atomics is trickier, since word
|
||||
* tearing is possible. We'll repeat the same analysis, and look at the two
|
||||
* owner fences sandwiching the remote fence. The next_event_fast stores done
|
||||
* alongside the earlier owner fence cannot overwrite any of the remote stores
|
||||
* (since they precede the earlier owner fence in sb, which precedes the remote
|
||||
* fence in sc, which precedes the remote stores in sb). After the second owner
|
||||
* fence there will be a re-check of the slow-path variables anyways, so the
|
||||
* "owner will notice that it's on the slow path eventually" guarantee is
|
||||
* satisfied. To make sure that the out-of-band-messaging constraint is as well,
|
||||
* note that either the message passing is sequenced before the second owner
|
||||
* fence (in which case the remote stores happen before the second set of owner
|
||||
* stores, so malloc sees a value of zero for next_event_fast and goes down the
|
||||
* slow path), or it is not (in which case the owner sees the tsd slow-path
|
||||
* writes on its previous update). This leaves open the possibility that the
|
||||
* remote thread will (at some arbitrary point in the future) zero out one half
|
||||
* of the owner thread's next_event_fast, but that's always safe (it just sends
|
||||
* it down the slow path earlier).
|
||||
*/
|
||||
static void
|
||||
te_ctx_next_event_fast_update(te_ctx_t *ctx) {
|
||||
uint64_t next_event = te_ctx_next_event_get(ctx);
|
||||
|
|
@ -160,7 +113,6 @@ te_ctx_next_event_fast_update(te_ctx_t *ctx) {
|
|||
void
|
||||
te_recompute_fast_threshold(tsd_t *tsd) {
|
||||
if (tsd_state_get(tsd) != tsd_state_nominal) {
|
||||
/* Check first because this is also called on purgatory. */
|
||||
te_next_event_fast_set_non_nominal(tsd);
|
||||
return;
|
||||
}
|
||||
|
|
@ -170,11 +122,6 @@ te_recompute_fast_threshold(tsd_t *tsd) {
|
|||
te_ctx_next_event_fast_update(&ctx);
|
||||
te_ctx_get(tsd, &ctx, false);
|
||||
te_ctx_next_event_fast_update(&ctx);
|
||||
|
||||
atomic_fence(ATOMIC_SEQ_CST);
|
||||
if (tsd_state_get(tsd) != tsd_state_nominal) {
|
||||
te_next_event_fast_set_non_nominal(tsd);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
|
|
|
|||
30
src/tsd.c
30
src/tsd.c
|
|
@ -124,20 +124,17 @@ tsd_state_compute(tsd_t *tsd) {
|
|||
|
||||
void
|
||||
tsd_slow_update(tsd_t *tsd) {
|
||||
uint8_t old_state;
|
||||
do {
|
||||
uint8_t new_state = tsd_state_compute(tsd);
|
||||
old_state = tsd_atomic_exchange(
|
||||
&tsd->state, new_state, ATOMIC_ACQUIRE);
|
||||
} while (old_state == tsd_state_nominal_recompute);
|
||||
assert(!tsd_booted_get() || tsd_get(false) == tsd
|
||||
|| (tsd_get_allocates() && tsd_get(false) == NULL));
|
||||
tsd_atomic_store(&tsd->state, tsd_state_compute(tsd), ATOMIC_RELAXED);
|
||||
|
||||
te_recompute_fast_threshold(tsd);
|
||||
}
|
||||
|
||||
void
|
||||
tsd_state_set(tsd_t *tsd, uint8_t new_state) {
|
||||
/* Only the tsd module can change the state *to* recompute. */
|
||||
assert(new_state != tsd_state_nominal_recompute);
|
||||
assert(!tsd_booted_get() || tsd_get(false) == tsd
|
||||
|| (tsd_get_allocates() && tsd_get(false) == NULL));
|
||||
uint8_t old_state = tsd_atomic_load(&tsd->state, ATOMIC_RELAXED);
|
||||
if (old_state > tsd_state_nominal_max) {
|
||||
/*
|
||||
|
|
@ -162,10 +159,10 @@ tsd_state_set(tsd_t *tsd, uint8_t new_state) {
|
|||
&tsd->state, new_state, ATOMIC_RELAXED);
|
||||
} else {
|
||||
/*
|
||||
* This is the tricky case. We're transitioning from
|
||||
* one nominal state to another. The caller can't know
|
||||
* about any races that are occurring at the same time,
|
||||
* so we always have to recompute no matter what.
|
||||
* We're transitioning from one nominal state to
|
||||
* another. Recompute the state from the underlying
|
||||
* slow-path data rather than trusting the caller's
|
||||
* requested nominal state.
|
||||
*/
|
||||
tsd_slow_update(tsd);
|
||||
}
|
||||
|
|
@ -234,13 +231,10 @@ tsd_fetch_slow(tsd_t *tsd, bool minimal) {
|
|||
|
||||
if (tsd_state_get(tsd) == tsd_state_nominal_slow) {
|
||||
/*
|
||||
* On slow path but no work needed. Note that we can't
|
||||
* necessarily *assert* that we're slow, because we might be
|
||||
* slow because of an asynchronous modification to global state,
|
||||
* which might be asynchronously modified *back*.
|
||||
* On slow path but no work needed. Local slow-path state may
|
||||
* have changed since we computed the TSD state, so don't assert
|
||||
* on the underlying cause here.
|
||||
*/
|
||||
} else if (tsd_state_get(tsd) == tsd_state_nominal_recompute) {
|
||||
tsd_slow_update(tsd);
|
||||
} else if (tsd_state_get(tsd) == tsd_state_uninitialized) {
|
||||
if (!minimal) {
|
||||
if (tsd_booted) {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue