Source File
mgclimit.go
Belonging Package
runtime
// Copyright 2022 The Go Authors. All rights reserved.// Use of this source code is governed by a BSD-style// license that can be found in the LICENSE file.package runtimeimport// gcCPULimiter is a mechanism to limit GC CPU utilization in situations// where it might become excessive and inhibit application progress (e.g.// a death spiral).//// The core of the limiter is a leaky bucket mechanism that fills with GC// CPU time and drains with mutator time. Because the bucket fills and// drains with time directly (i.e. without any weighting), this effectively// sets a very conservative limit of 50%. This limit could be enforced directly,// however, but the purpose of the bucket is to accommodate spikes in GC CPU// utilization without hurting throughput.//// Note that the bucket in the leaky bucket mechanism can never go negative,// so the GC never gets credit for a lot of CPU time spent without the GC// running. This is intentional, as an application that stays idle for, say,// an entire day, could build up enough credit to fail to prevent a death// spiral the following day. The bucket's capacity is the GC's only leeway.//// The capacity thus also sets the window the limiter considers. For example,// if the capacity of the bucket is 1 cpu-second, then the limiter will not// kick in until at least 1 full cpu-second in the last 2 cpu-second window// is spent on GC CPU time.var gcCPULimiter gcCPULimiterStatetype gcCPULimiterState struct {lock atomic.Uint32enabled atomic.Boolbucket struct {// Invariants:// - fill >= 0// - capacity >= 0// - fill <= capacityfill, capacity uint64}// overflow is the cumulative amount of GC CPU time that we tried to fill the// bucket with but exceeded its capacity.overflow uint64// gcEnabled is an internal copy of gcBlackenEnabled that determines// whether the limiter tracks total assist time.//// gcBlackenEnabled isn't used directly so as to keep this structure// unit-testable.gcEnabled bool// transitioning is true when the GC is in a STW and transitioning between// the mark and sweep phases.transitioning bool// assistTimePool is the accumulated assist time since the last update.assistTimePool atomic.Int64// idleMarkTimePool is the accumulated idle mark time since the last update.idleMarkTimePool atomic.Int64// idleTimePool is the accumulated time Ps spent on the idle list since the last update.idleTimePool atomic.Int64// lastUpdate is the nanotime timestamp of the last time update was called.//// Updated under lock, but may be read concurrently.lastUpdate atomic.Int64// lastEnabledCycle is the GC cycle that last had the limiter enabled.lastEnabledCycle atomic.Uint32// nprocs is an internal copy of gomaxprocs, used to determine total available// CPU time.//// gomaxprocs isn't used directly so as to keep this structure unit-testable.nprocs int32// test indicates whether this instance of the struct was made for testing purposes.test bool}// limiting returns true if the CPU limiter is currently enabled, meaning the Go GC// should take action to limit CPU utilization.//// It is safe to call concurrently with other operations.func ( *gcCPULimiterState) () bool {return .enabled.Load()}// startGCTransition notifies the limiter of a GC transition.//// This call takes ownership of the limiter and disables all other means of// updating the limiter. Release ownership by calling finishGCTransition.//// It is safe to call concurrently with other operations.func ( *gcCPULimiterState) ( bool, int64) {if !.tryLock() {// This must happen during a STW, so we can't fail to acquire the lock.// If we did, something went wrong. Throw.throw("failed to acquire lock to start a GC transition")}if .gcEnabled == {throw("transitioning GC to the same state as before?")}// Flush whatever was left between the last update and now..updateLocked().gcEnabled =.transitioning = true// N.B. finishGCTransition releases the lock.//// We don't release here to increase the chance that if there's a failure// to finish the transition, that we throw on failing to acquire the lock.}// finishGCTransition notifies the limiter that the GC transition is complete// and releases ownership of it. It also accumulates STW time in the bucket.// now must be the timestamp from the end of the STW pause.func ( *gcCPULimiterState) ( int64) {if !.transitioning {throw("finishGCTransition called without starting one?")}// Count the full nprocs set of CPU time because the world is stopped// between startGCTransition and finishGCTransition. Even though the GC// isn't running on all CPUs, it is preventing user code from doing so,// so it might as well be.if := .lastUpdate.Load(); >= {.accumulate(0, (-)*int64(.nprocs))}.lastUpdate.Store().transitioning = false.unlock()}// gcCPULimiterUpdatePeriod dictates the maximum amount of wall-clock time// we can go before updating the limiter.const gcCPULimiterUpdatePeriod = 10e6 // 10ms// needUpdate returns true if the limiter's maximum update period has been// exceeded, and so would benefit from an update.func ( *gcCPULimiterState) ( int64) bool {return -.lastUpdate.Load() > gcCPULimiterUpdatePeriod}// addAssistTime notifies the limiter of additional assist time. It will be// included in the next update.func ( *gcCPULimiterState) ( int64) {.assistTimePool.Add()}// addIdleTime notifies the limiter of additional time a P spent on the idle list. It will be// subtracted from the total CPU time in the next update.func ( *gcCPULimiterState) ( int64) {.idleTimePool.Add()}// update updates the bucket given runtime-specific information. now is the// current monotonic time in nanoseconds.//// This is safe to call concurrently with other operations, except *GCTransition.func ( *gcCPULimiterState) ( int64) {if !.tryLock() {// We failed to acquire the lock, which means something else is currently// updating. Just drop our update, the next one to update will include// our total assist time.return}if .transitioning {throw("update during transition")}.updateLocked().unlock()}// updateLocked is the implementation of update. l.lock must be held.func ( *gcCPULimiterState) ( int64) {:= .lastUpdate.Load()if < {// Defensively avoid overflow. This isn't even the latest update anyway.return}:= ( - ) * int64(.nprocs).lastUpdate.Store()// Drain the pool of assist time.:= .assistTimePool.Load()if != 0 {.assistTimePool.Add(-)}// Drain the pool of idle time.:= .idleTimePool.Load()if != 0 {.idleTimePool.Add(-)}if !.test {// Consume time from in-flight events. Make sure we're not preemptible so allp can't change.//// The reason we do this instead of just waiting for those events to finish and push updates// is to ensure that all the time we're accounting for happened sometime between lastUpdate// and now. This dramatically simplifies reasoning about the limiter because we're not at// risk of extra time being accounted for in this window than actually happened in this window,// leading to all sorts of weird transient behavior.:= acquirem()for , := range allp {, := .limiterEvent.consume()switch {case limiterEventIdleMarkWork:fallthroughcase limiterEventIdle:+=sched.idleTime.Add()case limiterEventMarkAssist:fallthroughcase limiterEventScavengeAssist:+=case limiterEventNone:breakdefault:throw("invalid limiter event type found")}}releasem()}// Compute total GC time.:=if .gcEnabled {+= int64(float64() * gcBackgroundUtilization)}// Subtract out all idle time from the total time. Do this after computing// GC time, because the background utilization is dependent on the *real*// total time, not the total time after idle time is subtracted.//// Idle time is counted as any time that a P is on the P idle list plus idle mark// time. Idle mark workers soak up time that the application spends idle.//// On a heavily undersubscribed system, any additional idle time can skew GC CPU// utilization, because the GC might be executing continuously and thrashing,// yet the CPU utilization with respect to GOMAXPROCS will be quite low, so// the limiter fails to turn on. By subtracting idle time, we're removing time that// we know the application was idle giving a more accurate picture of whether// the GC is thrashing.//// Note that this can cause the limiter to turn on even if it's not needed. For// instance, on a system with 32 Ps but only 1 running goroutine, each GC will have// 8 dedicated GC workers. Assuming the GC cycle is half mark phase and half sweep// phase, then the GC CPU utilization over that cycle, with idle time removed, will// be 8/(8+2) = 80%. Even though the limiter turns on, though, assist should be// unnecessary, as the GC has way more CPU time to outpace the 1 goroutine that's// running.-=.accumulate(-, )}// accumulate adds time to the bucket and signals whether the limiter is enabled.//// This is an internal function that deals just with the bucket. Prefer update.// l.lock must be held.func ( *gcCPULimiterState) (, int64) {:= .bucket.capacity - .bucket.fill:= == 0// Let's be careful about three things here:// 1. The addition and subtraction, for the invariants.// 2. Overflow.// 3. Excessive mutation of l.enabled, which is accessed// by all assists, potentially more than once.:= -// Handle limiting case.if > 0 && <= uint64() {.overflow += uint64() -.bucket.fill = .bucket.capacityif ! {.enabled.Store(true).lastEnabledCycle.Store(memstats.numgc + 1)}return}// Handle non-limiting cases.if < 0 && .bucket.fill <= uint64(-) {// Bucket emptied..bucket.fill = 0} else {// All other cases..bucket.fill -= uint64(-)}if != 0 && {.enabled.Store(false)}}// tryLock attempts to lock l. Returns true on success.func ( *gcCPULimiterState) () bool {return .lock.CompareAndSwap(0, 1)}// unlock releases the lock on l. Must be called if tryLock returns true.func ( *gcCPULimiterState) () {:= .lock.Swap(0)if != 1 {throw("double unlock")}}// capacityPerProc is the limiter's bucket capacity for each P in GOMAXPROCS.const capacityPerProc = 1e9 // 1 second in nanoseconds// resetCapacity updates the capacity based on GOMAXPROCS. Must not be called// while the GC is enabled.//// It is safe to call concurrently with other operations.func ( *gcCPULimiterState) ( int64, int32) {if !.tryLock() {// This must happen during a STW, so we can't fail to acquire the lock.// If we did, something went wrong. Throw.throw("failed to acquire lock to reset capacity")}// Flush the rest of the time for this period..updateLocked().nprocs =.bucket.capacity = uint64() * capacityPerProcif .bucket.fill > .bucket.capacity {.bucket.fill = .bucket.capacity.enabled.Store(true).lastEnabledCycle.Store(memstats.numgc + 1)} else if .bucket.fill < .bucket.capacity {.enabled.Store(false)}.unlock()}// limiterEventType indicates the type of an event occurring on some P.//// These events represent the full set of events that the GC CPU limiter tracks// to execute its function.//// This type may use no more than limiterEventBits bits of information.type limiterEventType uint8const (limiterEventNone limiterEventType = iota // None of the following events.limiterEventIdleMarkWork // Refers to an idle mark worker (see gcMarkWorkerMode).limiterEventMarkAssist // Refers to mark assist (see gcAssistAlloc).limiterEventScavengeAssist // Refers to a scavenge assist (see allocSpan).limiterEventIdle // Refers to time a P spent on the idle list.limiterEventBits = 3)// limiterEventTypeMask is a mask for the bits in p.limiterEventStart that represent// the event type. The rest of the bits of that field represent a timestamp.const (limiterEventTypeMask = uint64((1<<limiterEventBits)-1) << (64 - limiterEventBits)limiterEventStampNone = limiterEventStamp(0))// limiterEventStamp is a nanotime timestamp packed with a limiterEventType.type limiterEventStamp uint64// makeLimiterEventStamp creates a new stamp from the event type and the current timestamp.func ( limiterEventType, int64) limiterEventStamp {return limiterEventStamp(uint64()<<(64-limiterEventBits) | (uint64() &^ limiterEventTypeMask))}// duration computes the difference between now and the start time stored in the stamp.//// Returns 0 if the difference is negative, which may happen if now is stale or if the// before and after timestamps cross a 2^(64-limiterEventBits) boundary.func ( limiterEventStamp) ( int64) int64 {// The top limiterEventBits bits of the timestamp are derived from the current time// when computing a duration.:= int64((uint64() & limiterEventTypeMask) | (uint64() &^ limiterEventTypeMask))if < {return 0}return -}// type extracts the event type from the stamp.func ( limiterEventStamp) () limiterEventType {return limiterEventType( >> (64 - limiterEventBits))}// limiterEvent represents tracking state for an event tracked by the GC CPU limiter.type limiterEvent struct {stamp atomic.Uint64 // Stores a limiterEventStamp.}// start begins tracking a new limiter event of the current type. If an event// is already in flight, then a new event cannot begin because the current time is// already being attributed to that event. In this case, this function returns false.// Otherwise, it returns true.//// The caller must be non-preemptible until at least stop is called or this function// returns false. Because this is trying to measure "on-CPU" time of some event, getting// scheduled away during it can mean that whatever we're measuring isn't a reflection// of "on-CPU" time. The OS could deschedule us at any time, but we want to maintain as// close of an approximation as we can.func ( *limiterEvent) ( limiterEventType, int64) bool {if limiterEventStamp(.stamp.Load()).typ() != limiterEventNone {return false}.stamp.Store(uint64(makeLimiterEventStamp(, )))return true}// consume acquires the partial event CPU time from any in-flight event.// It achieves this by storing the current time as the new event time.//// Returns the type of the in-flight event, as well as how long it's currently been// executing for. Returns limiterEventNone if no event is active.func ( *limiterEvent) ( int64) ( limiterEventType, int64) {// Read the limiter event timestamp and update it to now.for {:= limiterEventStamp(.stamp.Load())= .typ()if == limiterEventNone {// There's no in-flight event, so just push that up.return}= .duration()if == 0 {// We might have a stale now value, or this crossed the// 2^(64-limiterEventBits) boundary in the clock readings.// Just ignore it.return limiterEventNone, 0}:= makeLimiterEventStamp(, )if .stamp.CompareAndSwap(uint64(), uint64()) {break}}return}// stop stops the active limiter event. Throws if the//// The caller must be non-preemptible across the event. See start as to why.func ( *limiterEvent) ( limiterEventType, int64) {var limiterEventStampfor {= limiterEventStamp(.stamp.Load())if .typ() != {print("runtime: want=", , " got=", .typ(), "\n")throw("limiterEvent.stop: found wrong event in p's limiter event slot")}if .stamp.CompareAndSwap(uint64(), uint64(limiterEventStampNone)) {break}}:= .duration()if == 0 {// It's possible that we're missing time because we crossed a// 2^(64-limiterEventBits) boundary between the start and end.// In this case, we're dropping that information. This is OK because// at worst it'll cause a transient hiccup that will quickly resolve// itself as all new timestamps begin on the other side of the boundary.// Such a hiccup should be incredibly rare.return}// Account for the event.switch {case limiterEventIdleMarkWork:gcCPULimiter.addIdleTime()case limiterEventIdle:gcCPULimiter.addIdleTime()sched.idleTime.Add()case limiterEventMarkAssist:fallthroughcase limiterEventScavengeAssist:gcCPULimiter.addAssistTime()default:throw("limiterEvent.stop: invalid limiter event type found")}}
![]() |
The pages are generated with Golds v0.6.7. (GOOS=linux GOARCH=amd64) Golds is a Go 101 project developed by Tapir Liu. PR and bug reports are welcome and can be submitted to the issue list. Please follow @Go100and1 (reachable from the left QR code) to get the latest news of Golds. |