// Copyright 2009 The Go Authors. All rights reserved.// Use of this source code is governed by a BSD-style// license that can be found in the LICENSE file.// Malloc profiling.// Patterned after tcmalloc's algorithms; shorter code.package runtimeimport ()// NOTE(rsc): Everything here could use cas if contention became an issue.var (// profInsertLock protects changes to the start of all *bucket linked listsprofInsertLockmutex// profBlockLock protects the contents of every blockRecord structprofBlockLockmutex// profMemActiveLock protects the active field of every memRecord structprofMemActiveLockmutex// profMemFutureLock is a set of locks that protect the respective elements // of the future array of every memRecord structprofMemFutureLock [len(memRecord{}.future)]mutex)// All memory allocations are local and do not escape outside of the profiler.// The profiler is forbidden from referring to garbage-collected memory.const (// profile typesmemProfilebucketType = 1 + iotablockProfilemutexProfile// size of bucket hash tablebuckHashSize = 179999// max depth of stack to record in bucketmaxStack = 32)typebucketTypeint// A bucket holds per-call-stack profiling information.// The representation is a bit sleazy, inherited from C.// This struct defines the bucket header. It is followed in// memory by the stack words and then the actual record// data, either a memRecord or a blockRecord.//// Per-call-stack profiling information.// Lookup by hashing call stack into a linked-list hash table.//// None of the fields in this bucket header are modified after// creation, including its next and allnext links.//// No heap pointers.typebucketstruct { _ sys.NotInHeapnext *bucketallnext *buckettypbucketType// memBucket or blockBucket (includes mutexProfile)hashuintptrsizeuintptrnstkuintptr}// A memRecord is the bucket data for a bucket of type memProfile,// part of the memory profile.typememRecordstruct {// The following complex 3-stage scheme of stats accumulation // is required to obtain a consistent picture of mallocs and frees // for some point in time. // The problem is that mallocs come in real time, while frees // come only after a GC during concurrent sweeping. So if we would // naively count them, we would get a skew toward mallocs. // // Hence, we delay information to get consistent snapshots as // of mark termination. Allocations count toward the next mark // termination's snapshot, while sweep frees count toward the // previous mark termination's snapshot: // // MT MT MT MT // .·| .·| .·| .·| // .·˙ | .·˙ | .·˙ | .·˙ | // .·˙ | .·˙ | .·˙ | .·˙ | // .·˙ |.·˙ |.·˙ |.·˙ | // // alloc → ▲ ← free // ┠┅┅┅┅┅┅┅┅┅┅┅P // C+2 → C+1 → C // // alloc → ▲ ← free // ┠┅┅┅┅┅┅┅┅┅┅┅P // C+2 → C+1 → C // // Since we can't publish a consistent snapshot until all of // the sweep frees are accounted for, we wait until the next // mark termination ("MT" above) to publish the previous mark // termination's snapshot ("P" above). To do this, allocation // and free events are accounted to *future* heap profile // cycles ("C+n" above) and we only publish a cycle once all // of the events from that cycle must be done. Specifically: // // Mallocs are accounted to cycle C+2. // Explicit frees are accounted to cycle C+2. // GC frees (done during sweeping) are accounted to cycle C+1. // // After mark termination, we increment the global heap // profile cycle counter and accumulate the stats from cycle C // into the active profile.// active is the currently published profile. A profiling // cycle can be accumulated into active once its complete.activememRecordCycle// future records the profile events we're counting for cycles // that have not yet been published. This is ring buffer // indexed by the global heap profile cycle C and stores // cycles C, C+1, and C+2. Unlike active, these counts are // only for a single cycle; they are not cumulative across // cycles. // // We store cycle C here because there's a window between when // C becomes the active cycle and when we've flushed it to // active.future [3]memRecordCycle}// memRecordCycletypememRecordCyclestruct {allocs, freesuintptralloc_bytes, free_bytesuintptr}// add accumulates b into a. It does not zero b.func ( *memRecordCycle) ( *memRecordCycle) { .allocs += .allocs .frees += .frees .alloc_bytes += .alloc_bytes .free_bytes += .free_bytes}// A blockRecord is the bucket data for a bucket of type blockProfile,// which is used in blocking and mutex profiles.typeblockRecordstruct {countfloat64cyclesint64}var (mbucketsatomic.UnsafePointer// *bucket, memory profile bucketsbbucketsatomic.UnsafePointer// *bucket, blocking profile bucketsxbucketsatomic.UnsafePointer// *bucket, mutex profile bucketsbuckhashatomic.UnsafePointer// *buckhashArraymProfCyclemProfCycleHolder)typebuckhashArray [buckHashSize]atomic.UnsafePointer// *bucketconstmProfCycleWrap = uint32(len(memRecord{}.future)) * (2 << 24)// mProfCycleHolder holds the global heap profile cycle number (wrapped at// mProfCycleWrap, stored starting at bit 1), and a flag (stored at bit 0) to// indicate whether future[cycle] in all buckets has been queued to flush into// the active profile.typemProfCycleHolderstruct {valueatomic.Uint32}// read returns the current cycle count.func ( *mProfCycleHolder) () ( uint32) { := .value.Load() = >> 1return}// setFlushed sets the flushed flag. It returns the current cycle count and the// previous value of the flushed flag.func ( *mProfCycleHolder) () ( uint32, bool) {for { := .value.Load() = >> 1 = ( & 0x1) != 0 := | 0x1if .value.CompareAndSwap(, ) {return , } }}// increment increases the cycle count by one, wrapping the value at// mProfCycleWrap. It clears the flushed flag.func ( *mProfCycleHolder) () {// We explicitly wrap mProfCycle rather than depending on // uint wraparound because the memRecord.future ring does not // itself wrap at a power of two.for { := .value.Load() := >> 1 = ( + 1) % mProfCycleWrap := << 1if .value.CompareAndSwap(, ) {break } }}// newBucket allocates a bucket with the given type and number of stack entries.func ( bucketType, int) *bucket { := unsafe.Sizeof(bucket{}) + uintptr()*unsafe.Sizeof(uintptr(0))switch {default:throw("invalid profile bucket type")casememProfile: += unsafe.Sizeof(memRecord{})caseblockProfile, mutexProfile: += unsafe.Sizeof(blockRecord{}) } := (*bucket)(persistentalloc(, 0, &memstats.buckhash_sys)) .typ = .nstk = uintptr()return}// stk returns the slice in b holding the stack.func ( *bucket) () []uintptr { := (*[maxStack]uintptr)(add(unsafe.Pointer(), unsafe.Sizeof(*)))return [:.nstk:.nstk]}// mp returns the memRecord associated with the memProfile bucket b.func ( *bucket) () *memRecord {if .typ != memProfile {throw("bad use of bucket.mp") } := add(unsafe.Pointer(), unsafe.Sizeof(*)+.nstk*unsafe.Sizeof(uintptr(0)))return (*memRecord)()}// bp returns the blockRecord associated with the blockProfile bucket b.func ( *bucket) () *blockRecord {if .typ != blockProfile && .typ != mutexProfile {throw("bad use of bucket.bp") } := add(unsafe.Pointer(), unsafe.Sizeof(*)+.nstk*unsafe.Sizeof(uintptr(0)))return (*blockRecord)()}// Return the bucket for stk[0:nstk], allocating new bucket if needed.func ( bucketType, uintptr, []uintptr, bool) *bucket { := (*buckhashArray)(buckhash.Load())if == nil {lock(&profInsertLock)// check again under the lock = (*buckhashArray)(buckhash.Load())if == nil { = (*buckhashArray)(sysAlloc(unsafe.Sizeof(buckhashArray{}), &memstats.buckhash_sys))if == nil {throw("runtime: cannot allocate memory") }buckhash.StoreNoWB(unsafe.Pointer()) }unlock(&profInsertLock) }// Hash stack.varuintptrfor , := range { += += << 10 ^= >> 6 }// hash in size += += << 10 ^= >> 6// finalize += << 3 ^= >> 11 := int( % buckHashSize)// first check optimistically, without the lockfor := (*bucket)([].Load()); != nil; = .next {if .typ == && .hash == && .size == && eqslice(.stk(), ) {return } }if ! {returnnil }lock(&profInsertLock)// check again under the insertion lockfor := (*bucket)([].Load()); != nil; = .next {if .typ == && .hash == && .size == && eqslice(.stk(), ) {unlock(&profInsertLock)return } }// Create new bucket. := newBucket(, len())copy(.stk(), ) .hash = .size = var *atomic.UnsafePointerif == memProfile { = &mbuckets } elseif == mutexProfile { = &xbuckets } else { = &bbuckets } .next = (*bucket)([].Load()) .allnext = (*bucket)(.Load()) [].StoreNoWB(unsafe.Pointer()) .StoreNoWB(unsafe.Pointer())unlock(&profInsertLock)return}func (, []uintptr) bool {iflen() != len() {returnfalse }for , := range {if != [] {returnfalse } }returntrue}// mProf_NextCycle publishes the next heap profile cycle and creates a// fresh heap profile cycle. This operation is fast and can be done// during STW. The caller must call mProf_Flush before calling// mProf_NextCycle again.//// This is called by mark termination during STW so allocations and// frees after the world is started again count towards a new heap// profiling cycle.func () {mProfCycle.increment()}// mProf_Flush flushes the events from the current heap profiling// cycle into the active profile. After this it is safe to start a new// heap profiling cycle with mProf_NextCycle.//// This is called by GC after mark termination starts the world. In// contrast with mProf_NextCycle, this is somewhat expensive, but safe// to do concurrently.func () { , := mProfCycle.setFlushed()if {return } := % uint32(len(memRecord{}.future))lock(&profMemActiveLock)lock(&profMemFutureLock[])mProf_FlushLocked()unlock(&profMemFutureLock[])unlock(&profMemActiveLock)}// mProf_FlushLocked flushes the events from the heap profiling cycle at index// into the active profile. The caller must hold the lock for the active profile// (profMemActiveLock) and for the profiling cycle at index// (profMemFutureLock[index]).func ( uint32) {assertLockHeld(&profMemActiveLock)assertLockHeld(&profMemFutureLock[]) := (*bucket)(mbuckets.Load())for := ; != nil; = .allnext { := .mp()// Flush cycle C into the published profile and clear // it for reuse. := &.future[] .active.add() * = memRecordCycle{} }}// mProf_PostSweep records that all sweep frees for this GC cycle have// completed. This has the effect of publishing the heap profile// snapshot as of the last mark termination without advancing the heap// profile cycle.func () {// Flush cycle C+1 to the active profile so everything as of // the last mark termination becomes visible. *Don't* advance // the cycle, since we're still accumulating allocs in cycle // C+2, which have to become C+1 in the next mark termination // and so on. := mProfCycle.read() + 1 := % uint32(len(memRecord{}.future))lock(&profMemActiveLock)lock(&profMemFutureLock[])mProf_FlushLocked()unlock(&profMemFutureLock[])unlock(&profMemActiveLock)}// Called by malloc to record a profiled block.func ( unsafe.Pointer, uintptr) {var [maxStack]uintptr := callers(4, [:]) := (mProfCycle.read() + 2) % uint32(len(memRecord{}.future)) := stkbucket(memProfile, , [:], true) := .mp() := &.future[]lock(&profMemFutureLock[]) .allocs++ .alloc_bytes += unlock(&profMemFutureLock[])// Setprofilebucket locks a bunch of other mutexes, so we call it outside of // the profiler locks. This reduces potential contention and chances of // deadlocks. Since the object must be alive during the call to // mProf_Malloc, it's fine to do this non-atomically.systemstack(func() {setprofilebucket(, ) })}// Called when freeing a profiled block.func ( *bucket, uintptr) { := (mProfCycle.read() + 1) % uint32(len(memRecord{}.future)) := .mp() := &.future[]lock(&profMemFutureLock[]) .frees++ .free_bytes += unlock(&profMemFutureLock[])}varblockprofilerateuint64// in CPU ticks// SetBlockProfileRate controls the fraction of goroutine blocking events// that are reported in the blocking profile. The profiler aims to sample// an average of one blocking event per rate nanoseconds spent blocked.//// To include every blocking event in the profile, pass rate = 1.// To turn off profiling entirely, pass rate <= 0.func ( int) {varint64if <= 0 { = 0// disable profiling } elseif == 1 { = 1// profile everything } else {// convert ns to cycles, use float64 to prevent overflow during multiplication = int64(float64() * float64(tickspersecond()) / (1000 * 1000 * 1000))if == 0 { = 1 } }atomic.Store64(&blockprofilerate, uint64())}func ( int64, int) {if <= 0 { = 1 } := int64(atomic.Load64(&blockprofilerate))ifblocksampled(, ) {saveblockevent(, , +1, blockProfile) }}// blocksampled returns true for all events where cycles >= rate. Shorter// events have a cycles/rate random chance of returning true.func (, int64) bool {if <= 0 || ( > && int64(fastrand())% > ) {returnfalse }returntrue}func (, int64, int, bucketType) { := getg()varintvar [maxStack]uintptrif .m.curg == nil || .m.curg == { = callers(, [:]) } else { = gcallers(.m.curg, , [:]) } := stkbucket(, 0, [:], true) := .bp()lock(&profBlockLock)// We want to up-scale the count and cycles according to the // probability that the event was sampled. For block profile events, // the sample probability is 1 if cycles >= rate, and cycles / rate // otherwise. For mutex profile events, the sample probability is 1 / rate. // We scale the events by 1 / (probability the event was sampled).if == blockProfile && < {// Remove sampling bias, see discussion on http://golang.org/cl/299991. .count += float64() / float64() .cycles += } elseif == mutexProfile { .count += float64() .cycles += * } else { .count++ .cycles += }unlock(&profBlockLock)}varmutexprofilerateuint64// fraction sampled// SetMutexProfileFraction controls the fraction of mutex contention events// that are reported in the mutex profile. On average 1/rate events are// reported. The previous rate is returned.//// To turn off profiling entirely, pass rate 0.// To just read the current rate, pass rate < 0.// (For n>1 the details of sampling may change.)func ( int) int {if < 0 {returnint(mutexprofilerate) } := mutexprofilerateatomic.Store64(&mutexprofilerate, uint64())returnint()}//go:linkname mutexevent sync.eventfunc ( int64, int) {if < 0 { = 0 } := int64(atomic.Load64(&mutexprofilerate))// TODO(pjw): measure impact of always calling fastrand vs using something // like malloc.go:nextSample()if > 0 && int64(fastrand())% == 0 {saveblockevent(, , +1, mutexProfile) }}// Go interface to profile data.// A StackRecord describes a single execution stack.typeStackRecordstruct {Stack0 [32]uintptr// stack trace for this record; ends at first 0 entry}// Stack returns the stack trace associated with the record,// a prefix of r.Stack0.func ( *StackRecord) () []uintptr {for , := range .Stack0 {if == 0 {return .Stack0[0:] } }return .Stack0[0:]}// MemProfileRate controls the fraction of memory allocations// that are recorded and reported in the memory profile.// The profiler aims to sample an average of// one allocation per MemProfileRate bytes allocated.//// To include every allocated block in the profile, set MemProfileRate to 1.// To turn off profiling entirely, set MemProfileRate to 0.//// The tools that process the memory profiles assume that the// profile rate is constant across the lifetime of the program// and equal to the current value. Programs that change the// memory profiling rate should do so just once, as early as// possible in the execution of the program (for example,// at the beginning of main).varMemProfileRateint = 512 * 1024// disableMemoryProfiling is set by the linker if runtime.MemProfile// is not used and the link type guarantees nobody else could use it// elsewhere.vardisableMemoryProfilingbool// A MemProfileRecord describes the live objects allocated// by a particular call sequence (stack trace).typeMemProfileRecordstruct {AllocBytes, FreeBytesint64// number of bytes allocated, freedAllocObjects, FreeObjectsint64// number of objects allocated, freedStack0 [32]uintptr// stack trace for this record; ends at first 0 entry}// InUseBytes returns the number of bytes in use (AllocBytes - FreeBytes).func ( *MemProfileRecord) () int64 { return .AllocBytes - .FreeBytes }// InUseObjects returns the number of objects in use (AllocObjects - FreeObjects).func ( *MemProfileRecord) () int64 {return .AllocObjects - .FreeObjects}// Stack returns the stack trace associated with the record,// a prefix of r.Stack0.func ( *MemProfileRecord) () []uintptr {for , := range .Stack0 {if == 0 {return .Stack0[0:] } }return .Stack0[0:]}// MemProfile returns a profile of memory allocated and freed per allocation// site.//// MemProfile returns n, the number of records in the current memory profile.// If len(p) >= n, MemProfile copies the profile into p and returns n, true.// If len(p) < n, MemProfile does not change p and returns n, false.//// If inuseZero is true, the profile includes allocation records// where r.AllocBytes > 0 but r.AllocBytes == r.FreeBytes.// These are sites where memory was allocated, but it has all// been released back to the runtime.//// The returned profile may be up to two garbage collection cycles old.// This is to avoid skewing the profile toward allocations; because// allocations happen in real time but frees are delayed until the garbage// collector performs sweeping, the profile only accounts for allocations// that have had a chance to be freed by the garbage collector.//// Most clients should use the runtime/pprof package or// the testing package's -test.memprofile flag instead// of calling MemProfile directly.func ( []MemProfileRecord, bool) ( int, bool) { := mProfCycle.read()// If we're between mProf_NextCycle and mProf_Flush, take care // of flushing to the active profile so we only have to look // at the active profile below. := % uint32(len(memRecord{}.future))lock(&profMemActiveLock)lock(&profMemFutureLock[])mProf_FlushLocked()unlock(&profMemFutureLock[]) := true := (*bucket)(mbuckets.Load())for := ; != nil; = .allnext { := .mp()if || .active.alloc_bytes != .active.free_bytes { ++ }if .active.allocs != 0 || .active.frees != 0 { = false } }if {// Absolutely no data, suggesting that a garbage collection // has not yet happened. In order to allow profiling when // garbage collection is disabled from the beginning of execution, // accumulate all of the cycles, and recount buckets. = 0for := ; != nil; = .allnext { := .mp()for := range .future {lock(&profMemFutureLock[]) .active.add(&.future[]) .future[] = memRecordCycle{}unlock(&profMemFutureLock[]) }if || .active.alloc_bytes != .active.free_bytes { ++ } } }if <= len() { = true := 0for := ; != nil; = .allnext { := .mp()if || .active.alloc_bytes != .active.free_bytes {record(&[], ) ++ } } }unlock(&profMemActiveLock)return}// Write b's data to r.func ( *MemProfileRecord, *bucket) { := .mp() .AllocBytes = int64(.active.alloc_bytes) .FreeBytes = int64(.active.free_bytes) .AllocObjects = int64(.active.allocs) .FreeObjects = int64(.active.frees)ifraceenabled {racewriterangepc(unsafe.Pointer(&.Stack0[0]), unsafe.Sizeof(.Stack0), getcallerpc(), abi.FuncPCABIInternal(MemProfile)) }ifmsanenabled {msanwrite(unsafe.Pointer(&.Stack0[0]), unsafe.Sizeof(.Stack0)) }ifasanenabled {asanwrite(unsafe.Pointer(&.Stack0[0]), unsafe.Sizeof(.Stack0)) }copy(.Stack0[:], .stk())for := int(.nstk); < len(.Stack0); ++ { .Stack0[] = 0 }}func ( func(*bucket, uintptr, *uintptr, uintptr, uintptr, uintptr)) {lock(&profMemActiveLock) := (*bucket)(mbuckets.Load())for := ; != nil; = .allnext { := .mp() (, .nstk, &.stk()[0], .size, .active.allocs, .active.frees) }unlock(&profMemActiveLock)}// BlockProfileRecord describes blocking events originated// at a particular call sequence (stack trace).typeBlockProfileRecordstruct {Countint64Cyclesint64StackRecord}// BlockProfile returns n, the number of records in the current blocking profile.// If len(p) >= n, BlockProfile copies the profile into p and returns n, true.// If len(p) < n, BlockProfile does not change p and returns n, false.//// Most clients should use the runtime/pprof package or// the testing package's -test.blockprofile flag instead// of calling BlockProfile directly.func ( []BlockProfileRecord) ( int, bool) {lock(&profBlockLock) := (*bucket)(bbuckets.Load())for := ; != nil; = .allnext { ++ }if <= len() { = truefor := ; != nil; = .allnext { := .bp() := &[0] .Count = int64(.count)// Prevent callers from having to worry about division by zero errors. // See discussion on http://golang.org/cl/299991.if .Count == 0 { .Count = 1 } .Cycles = .cyclesifraceenabled {racewriterangepc(unsafe.Pointer(&.Stack0[0]), unsafe.Sizeof(.Stack0), getcallerpc(), abi.FuncPCABIInternal()) }ifmsanenabled {msanwrite(unsafe.Pointer(&.Stack0[0]), unsafe.Sizeof(.Stack0)) }ifasanenabled {asanwrite(unsafe.Pointer(&.Stack0[0]), unsafe.Sizeof(.Stack0)) } := copy(.Stack0[:], .stk())for ; < len(.Stack0); ++ { .Stack0[] = 0 } = [1:] } }unlock(&profBlockLock)return}// MutexProfile returns n, the number of records in the current mutex profile.// If len(p) >= n, MutexProfile copies the profile into p and returns n, true.// Otherwise, MutexProfile does not change p, and returns n, false.//// Most clients should use the runtime/pprof package// instead of calling MutexProfile directly.func ( []BlockProfileRecord) ( int, bool) {lock(&profBlockLock) := (*bucket)(xbuckets.Load())for := ; != nil; = .allnext { ++ }if <= len() { = truefor := ; != nil; = .allnext { := .bp() := &[0] .Count = int64(.count) .Cycles = .cycles := copy(.Stack0[:], .stk())for ; < len(.Stack0); ++ { .Stack0[] = 0 } = [1:] } }unlock(&profBlockLock)return}// ThreadCreateProfile returns n, the number of records in the thread creation profile.// If len(p) >= n, ThreadCreateProfile copies the profile into p and returns n, true.// If len(p) < n, ThreadCreateProfile does not change p and returns n, false.//// Most clients should use the runtime/pprof package instead// of calling ThreadCreateProfile directly.func ( []StackRecord) ( int, bool) { := (*m)(atomic.Loadp(unsafe.Pointer(&allm)))for := ; != nil; = .alllink { ++ }if <= len() { = true := 0for := ; != nil; = .alllink { [].Stack0 = .createstack ++ } }return}//go:linkname runtime_goroutineProfileWithLabels runtime/pprof.runtime_goroutineProfileWithLabelsfunc ( []StackRecord, []unsafe.Pointer) ( int, bool) {returngoroutineProfileWithLabels(, )}// labels may be nil. If labels is non-nil, it must have the same length as p.func ( []StackRecord, []unsafe.Pointer) ( int, bool) {if != nil && len() != len() { = nil }returngoroutineProfileWithLabelsConcurrent(, )}vargoroutineProfile = struct { sema uint32 active bool offset atomic.Int64 records []StackRecord labels []unsafe.Pointer}{sema: 1,}// goroutineProfileState indicates the status of a goroutine's stack for the// current in-progress goroutine profile. Goroutines' stacks are initially// "Absent" from the profile, and end up "Satisfied" by the time the profile is// complete. While a goroutine's stack is being captured, its// goroutineProfileState will be "InProgress" and it will not be able to run// until the capture completes and the state moves to "Satisfied".//// Some goroutines (the finalizer goroutine, which at various times can be// either a "system" or a "user" goroutine, and the goroutine that is// coordinating the profile, any goroutines created during the profile) move// directly to the "Satisfied" state.typegoroutineProfileStateuint32const (goroutineProfileAbsentgoroutineProfileState = iotagoroutineProfileInProgressgoroutineProfileSatisfied)typegoroutineProfileStateHolderatomic.Uint32func ( *goroutineProfileStateHolder) () goroutineProfileState {returngoroutineProfileState((*atomic.Uint32)().Load())}func ( *goroutineProfileStateHolder) ( goroutineProfileState) { (*atomic.Uint32)().Store(uint32())}func ( *goroutineProfileStateHolder) (, goroutineProfileState) bool {return (*atomic.Uint32)().CompareAndSwap(uint32(), uint32())}func ( []StackRecord, []unsafe.Pointer) ( int, bool) {semacquire(&goroutineProfile.sema) := getg()stopTheWorld(stwGoroutineProfile)// Using gcount while the world is stopped should give us a consistent view // of the number of live goroutines, minus the number of goroutines that are // alive and permanently marked as "system". But to make this count agree // with what we'd get from isSystemGoroutine, we need special handling for // goroutines that can vary between user and system to ensure that the count // doesn't change during the collection. So, check the finalizer goroutine // in particular. = int(gcount())iffingStatus.Load()&fingRunningFinalizer != 0 { ++ }if > len() {// There's not enough space in p to store the whole profile, so (per the // contract of runtime.GoroutineProfile) we're not allowed to write to p // at all and must return n, false.startTheWorld()semrelease(&goroutineProfile.sema)return , false }// Save current goroutine. := getcallersp() := getcallerpc()systemstack(func() {saveg(, , , &[0]) }) .goroutineProfiled.Store(goroutineProfileSatisfied)goroutineProfile.offset.Store(1)// Prepare for all other goroutines to enter the profile. Aside from ourg, // every goroutine struct in the allgs list has its goroutineProfiled field // cleared. Any goroutine created from this point on (while // goroutineProfile.active is set) will start with its goroutineProfiled // field set to goroutineProfileSatisfied.goroutineProfile.active = truegoroutineProfile.records = goroutineProfile.labels = // The finalizer goroutine needs special handling because it can vary over // time between being a user goroutine (eligible for this profile) and a // system goroutine (to be excluded). Pick one before restarting the world.iffing != nil {fing.goroutineProfiled.Store(goroutineProfileSatisfied)ifreadgstatus(fing) != _Gdead && !isSystemGoroutine(fing, false) {doRecordGoroutineProfile(fing) } }startTheWorld()// Visit each goroutine that existed as of the startTheWorld call above. // // New goroutines may not be in this list, but we didn't want to know about // them anyway. If they do appear in this list (via reusing a dead goroutine // struct, or racing to launch between the world restarting and us getting // the list), they will already have their goroutineProfiled field set to // goroutineProfileSatisfied before their state transitions out of _Gdead. // // Any goroutine that the scheduler tries to execute concurrently with this // call will start by adding itself to the profile (before the act of // executing can cause any changes in its stack).forEachGRace(func( *g) {tryRecordGoroutineProfile(, Gosched) })stopTheWorld(stwGoroutineProfileCleanup) := goroutineProfile.offset.Swap(0)goroutineProfile.active = falsegoroutineProfile.records = nilgoroutineProfile.labels = nilstartTheWorld()// Restore the invariant that every goroutine struct in allgs has its // goroutineProfiled field cleared.forEachGRace(func( *g) { .goroutineProfiled.Store(goroutineProfileAbsent) })ifraceenabled {raceacquire(unsafe.Pointer(&labelSync)) }if != int() {// It's a big surprise that the number of goroutines changed while we // were collecting the profile. But probably better to return a // truncated profile than to crash the whole process. // // For instance, needm moves a goroutine out of the _Gdead state and so // might be able to change the goroutine count without interacting with // the scheduler. For code like that, the race windows are small and the // combination of features is uncommon, so it's hard to be (and remain) // sure we've caught them all. }semrelease(&goroutineProfile.sema)return , true}// tryRecordGoroutineProfileWB asserts that write barriers are allowed and calls// tryRecordGoroutineProfile.////go:yeswritebarrierrecfunc ( *g) {ifgetg().m.p.ptr() == nil {throw("no P available, write barriers are forbidden") }tryRecordGoroutineProfile(, osyield)}// tryRecordGoroutineProfile ensures that gp1 has the appropriate representation// in the current goroutine profile: either that it should not be profiled, or// that a snapshot of its call stack and labels are now in the profile.func ( *g, func()) {ifreadgstatus() == _Gdead {// Dead goroutines should not appear in the profile. Goroutines that // start while profile collection is active will get goroutineProfiled // set to goroutineProfileSatisfied before transitioning out of _Gdead, // so here we check _Gdead first.return }ifisSystemGoroutine(, true) {// System goroutines should not appear in the profile. (The finalizer // goroutine is marked as "already profiled".)return }for { := .goroutineProfiled.Load()if == goroutineProfileSatisfied {// This goroutine is already in the profile (or is new since the // start of collection, so shouldn't appear in the profile).break }if == goroutineProfileInProgress {// Something else is adding gp1 to the goroutine profile right now. // Give that a moment to finish. ()continue }// While we have gp1.goroutineProfiled set to // goroutineProfileInProgress, gp1 may appear _Grunnable but will not // actually be able to run. Disable preemption for ourselves, to make // sure we finish profiling gp1 right away instead of leaving it stuck // in this limbo. := acquirem()if .goroutineProfiled.CompareAndSwap(goroutineProfileAbsent, goroutineProfileInProgress) {doRecordGoroutineProfile() .goroutineProfiled.Store(goroutineProfileSatisfied) }releasem() }}// doRecordGoroutineProfile writes gp1's call stack and labels to an in-progress// goroutine profile. Preemption is disabled.//// This may be called via tryRecordGoroutineProfile in two ways: by the// goroutine that is coordinating the goroutine profile (running on its own// stack), or from the scheduler in preparation to execute gp1 (running on the// system stack).func ( *g) {ifreadgstatus() == _Grunning {print("doRecordGoroutineProfile gp1=", .goid, "\n")throw("cannot read stack of running goroutine") } := int(goroutineProfile.offset.Add(1)) - 1if >= len(goroutineProfile.records) {// Should be impossible, but better to return a truncated profile than // to crash the entire process at this point. Instead, deal with it in // goroutineProfileWithLabelsConcurrent where we have more context.return }// saveg calls gentraceback, which may call cgo traceback functions. When // called from the scheduler, this is on the system stack already so // traceback.go:cgoContextPCs will avoid calling back into the scheduler. // // When called from the goroutine coordinating the profile, we still have // set gp1.goroutineProfiled to goroutineProfileInProgress and so are still // preventing it from being truly _Grunnable. So we'll use the system stack // to avoid schedule delays.systemstack(func() { saveg(^uintptr(0), ^uintptr(0), , &goroutineProfile.records[]) })ifgoroutineProfile.labels != nil {goroutineProfile.labels[] = .labels }}func ( []StackRecord, []unsafe.Pointer) ( int, bool) { := getg() := func( *g) bool {// Checking isSystemGoroutine here makes GoroutineProfile // consistent with both NumGoroutine and Stack.return != && readgstatus() != _Gdead && !isSystemGoroutine(, false) }stopTheWorld(stwGoroutineProfile)// World is stopped, no locking required. = 1forEachGRace(func( *g) {if () { ++ } })if <= len() { = true , := , // Save current goroutine. := getcallersp() := getcallerpc()systemstack(func() {saveg(, , , &[0]) }) = [1:]// If we have a place to put our goroutine labelmap, insert it there.if != nil { [0] = .labels = [1:] }// Save other goroutines.forEachGRace(func( *g) {if !() {return }iflen() == 0 {// Should be impossible, but better to return a // truncated profile than to crash the entire process.return }// saveg calls gentraceback, which may call cgo traceback functions. // The world is stopped, so it cannot use cgocall (which will be // blocked at exitsyscall). Do it on the system stack so it won't // call into the schedular (see traceback.go:cgoContextPCs).systemstack(func() { saveg(^uintptr(0), ^uintptr(0), , &[0]) })if != nil { [0] = .labels = [1:] } = [1:] }) }ifraceenabled {raceacquire(unsafe.Pointer(&labelSync)) }startTheWorld()return , }// GoroutineProfile returns n, the number of records in the active goroutine stack profile.// If len(p) >= n, GoroutineProfile copies the profile into p and returns n, true.// If len(p) < n, GoroutineProfile does not change p and returns n, false.//// Most clients should use the runtime/pprof package instead// of calling GoroutineProfile directly.func ( []StackRecord) ( int, bool) {returngoroutineProfileWithLabels(, nil)}func (, uintptr, *g, *StackRecord) {varunwinder .initAt(, , 0, , unwindSilentErrors) := tracebackPCs(&, 0, .Stack0[:])if < len(.Stack0) { .Stack0[] = 0 }}// Stack formats a stack trace of the calling goroutine into buf// and returns the number of bytes written to buf.// If all is true, Stack formats stack traces of all other goroutines// into buf after the trace for the current goroutine.func ( []byte, bool) int {if {stopTheWorld(stwAllGoroutinesStack) } := 0iflen() > 0 { := getg() := getcallersp() := getcallerpc()systemstack(func() { := getg()// Force traceback=1 to override GOTRACEBACK setting, // so that Stack's results are consistent. // GOTRACEBACK is only about crash dumps. .m.traceback = 1 .writebuf = [0:0:len()]goroutineheader()traceback(, , 0, )if {tracebackothers() } .m.traceback = 0 = len(.writebuf) .writebuf = nil }) }if {startTheWorld() }return}// Tracing of alloc/free/gc.vartracelockmutexfunc ( unsafe.Pointer, uintptr, *_type) {lock(&tracelock) := getg() .m.traceback = 2if == nil {print("tracealloc(", , ", ", hex(), ")\n") } else {print("tracealloc(", , ", ", hex(), ", ", toRType().string(), ")\n") }if .m.curg == nil || == .m.curg {goroutineheader() := getcallerpc() := getcallersp()systemstack(func() {traceback(, , 0, ) }) } else {goroutineheader(.m.curg)traceback(^uintptr(0), ^uintptr(0), 0, .m.curg) }print("\n") .m.traceback = 0unlock(&tracelock)}func ( unsafe.Pointer, uintptr) {lock(&tracelock) := getg() .m.traceback = 2print("tracefree(", , ", ", hex(), ")\n")goroutineheader() := getcallerpc() := getcallersp()systemstack(func() {traceback(, , 0, ) })print("\n") .m.traceback = 0unlock(&tracelock)}func () {lock(&tracelock) := getg() .m.traceback = 2print("tracegc()\n")// running on m->g0 stack; show all non-g0 goroutinestracebackothers()print("end tracegc\n")print("\n") .m.traceback = 0unlock(&tracelock)}
The pages are generated with Goldsv0.6.7. (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu.
PR and bug reports are welcome and can be submitted to the issue list.
Please follow @Go100and1 (reachable from the left QR code) to get the latest news of Golds.