// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// Garbage collector: type and heap bitmaps.
//
// Stack, data, and bss bitmaps
//
// Stack frames and global variables in the data and bss sections are
// described by bitmaps with 1 bit per pointer-sized word. A "1" bit
// means the word is a live pointer to be visited by the GC (referred to
// as "pointer"). A "0" bit means the word should be ignored by GC
// (referred to as "scalar", though it could be a dead pointer value).
//
// Heap bitmap
//
// The heap bitmap comprises 1 bit for each pointer-sized word in the heap,
// recording whether a pointer is stored in that word or not. This bitmap
// is stored in the heapArena metadata backing each heap arena.
// That is, if ha is the heapArena for the arena starting at "start",
// then ha.bitmap[0] holds the 64 bits for the 64 words "start"
// through start+63*ptrSize, ha.bitmap[1] holds the entries for
// start+64*ptrSize through start+127*ptrSize, and so on.
// Bits correspond to words in little-endian order. ha.bitmap[0]&1 represents
// the word at "start", ha.bitmap[0]>>1&1 represents the word at start+8, etc.
// (For 32-bit platforms, s/64/32/.)
//
// We also keep a noMorePtrs bitmap which allows us to stop scanning
// the heap bitmap early in certain situations. If ha.noMorePtrs[i]>>j&1
// is 1, then the object containing the last word described by ha.bitmap[8*i+j]
// has no more pointers beyond those described by ha.bitmap[8*i+j].
// If ha.noMorePtrs[i]>>j&1 is set, the entries in ha.bitmap[8*i+j+1] and
// beyond must all be zero until the start of the next object.
//
// The bitmap for noscan spans is set to all zero at span allocation time.
//
// The bitmap for unallocated objects in scannable spans is not maintained
// (can be junk).

package runtime

import (
	
	
	
	
)

// addb returns the byte pointer p+n.
//
//go:nowritebarrier
//go:nosplit
func ( *byte,  uintptr) *byte {
	// Note: wrote out full expression instead of calling add(p, n)
	// to reduce the number of temporaries generated by the
	// compiler for this trivial expression during inlining.
	return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer()) + ))
}

// subtractb returns the byte pointer p-n.
//
//go:nowritebarrier
//go:nosplit
func ( *byte,  uintptr) *byte {
	// Note: wrote out full expression instead of calling add(p, -n)
	// to reduce the number of temporaries generated by the
	// compiler for this trivial expression during inlining.
	return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer()) - ))
}

// add1 returns the byte pointer p+1.
//
//go:nowritebarrier
//go:nosplit
func ( *byte) *byte {
	// Note: wrote out full expression instead of calling addb(p, 1)
	// to reduce the number of temporaries generated by the
	// compiler for this trivial expression during inlining.
	return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer()) + 1))
}

// subtract1 returns the byte pointer p-1.
//
// nosplit because it is used during write barriers and must not be preempted.
//
//go:nowritebarrier
//go:nosplit
func ( *byte) *byte {
	// Note: wrote out full expression instead of calling subtractb(p, 1)
	// to reduce the number of temporaries generated by the
	// compiler for this trivial expression during inlining.
	return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer()) - 1))
}

// markBits provides access to the mark bit for an object in the heap.
// bytep points to the byte holding the mark bit.
// mask is a byte with a single bit set that can be &ed with *bytep
// to see if the bit has been set.
// *m.byte&m.mask != 0 indicates the mark bit is set.
// index can be used along with span information to generate
// the address of the object in the heap.
// We maintain one set of mark bits for allocation and one for
// marking purposes.
type markBits struct {
	bytep *uint8
	mask  uint8
	index uintptr
}

//go:nosplit
func ( *mspan) ( uintptr) markBits {
	,  := .allocBits.bitp()
	return markBits{, , }
}

// refillAllocCache takes 8 bytes s.allocBits starting at whichByte
// and negates them so that ctz (count trailing zeros) instructions
// can be used. It then places these 8 bytes into the cached 64 bit
// s.allocCache.
func ( *mspan) ( uintptr) {
	 := (*[8]uint8)(unsafe.Pointer(.allocBits.bytep()))
	 := uint64(0)
	 |= uint64([0])
	 |= uint64([1]) << (1 * 8)
	 |= uint64([2]) << (2 * 8)
	 |= uint64([3]) << (3 * 8)
	 |= uint64([4]) << (4 * 8)
	 |= uint64([5]) << (5 * 8)
	 |= uint64([6]) << (6 * 8)
	 |= uint64([7]) << (7 * 8)
	.allocCache = ^
}

// nextFreeIndex returns the index of the next free object in s at
// or after s.freeindex.
// There are hardware instructions that can be used to make this
// faster if profiling warrants it.
func ( *mspan) () uintptr {
	 := .freeindex
	 := .nelems
	if  ==  {
		return 
	}
	if  >  {
		throw("s.freeindex > s.nelems")
	}

	 := .allocCache

	 := sys.TrailingZeros64()
	for  == 64 {
		// Move index to start of next cached bits.
		 = ( + 64) &^ (64 - 1)
		if  >=  {
			.freeindex = 
			return 
		}
		 :=  / 8
		// Refill s.allocCache with the next 64 alloc bits.
		.refillAllocCache()
		 = .allocCache
		 = sys.TrailingZeros64()
		// nothing available in cached bits
		// grab the next 8 bytes and try again.
	}
	 :=  + uintptr()
	if  >=  {
		.freeindex = 
		return 
	}

	.allocCache >>= uint( + 1)
	 =  + 1

	if %64 == 0 &&  !=  {
		// We just incremented s.freeindex so it isn't 0.
		// As each 1 in s.allocCache was encountered and used for allocation
		// it was shifted away. At this point s.allocCache contains all 0s.
		// Refill s.allocCache so that it corresponds
		// to the bits at s.allocBits starting at s.freeindex.
		 :=  / 8
		.refillAllocCache()
	}
	.freeindex = 
	return 
}

// isFree reports whether the index'th object in s is unallocated.
//
// The caller must ensure s.state is mSpanInUse, and there must have
// been no preemption points since ensuring this (which could allow a
// GC transition, which would allow the state to change).
func ( *mspan) ( uintptr) bool {
	if  < .freeIndexForScan {
		return false
	}
	,  := .allocBits.bitp()
	return *& == 0
}

// divideByElemSize returns n/s.elemsize.
// n must be within [0, s.npages*_PageSize),
// or may be exactly s.npages*_PageSize
// if s.elemsize is from sizeclasses.go.
//
// nosplit, because it is called by objIndex, which is nosplit
//
//go:nosplit
func ( *mspan) ( uintptr) uintptr {
	const  = false

	// See explanation in mksizeclasses.go's computeDivMagic.
	 := uintptr((uint64() * uint64(.divMul)) >> 32)

	if  &&  != /.elemsize {
		println(, "/", .elemsize, "should be", /.elemsize, "but got", )
		throw("bad magic division")
	}
	return 
}

// nosplit, because it is called by other nosplit code like findObject
//
//go:nosplit
func ( *mspan) ( uintptr) uintptr {
	return .divideByElemSize( - .base())
}

func ( uintptr) markBits {
	 := spanOf()
	 := .objIndex()
	return .markBitsForIndex()
}

func ( *mspan) ( uintptr) markBits {
	,  := .gcmarkBits.bitp()
	return markBits{, , }
}

func ( *mspan) () markBits {
	return markBits{&.gcmarkBits.x, uint8(1), 0}
}

// isMarked reports whether mark bit m is set.
func ( markBits) () bool {
	return *.bytep&.mask != 0
}

// setMarked sets the marked bit in the markbits, atomically.
func ( markBits) () {
	// Might be racing with other updates, so use atomic update always.
	// We used to be clever here and use a non-atomic update in certain
	// cases, but it's not worth the risk.
	atomic.Or8(.bytep, .mask)
}

// setMarkedNonAtomic sets the marked bit in the markbits, non-atomically.
func ( markBits) () {
	*.bytep |= .mask
}

// clearMarked clears the marked bit in the markbits, atomically.
func ( markBits) () {
	// Might be racing with other updates, so use atomic update always.
	// We used to be clever here and use a non-atomic update in certain
	// cases, but it's not worth the risk.
	atomic.And8(.bytep, ^.mask)
}

// markBitsForSpan returns the markBits for the span base address base.
func ( uintptr) ( markBits) {
	 = markBitsForAddr()
	if .mask != 1 {
		throw("markBitsForSpan: unaligned start")
	}
	return 
}

// advance advances the markBits to the next object in the span.
func ( *markBits) () {
	if .mask == 1<<7 {
		.bytep = (*uint8)(unsafe.Pointer(uintptr(unsafe.Pointer(.bytep)) + 1))
		.mask = 1
	} else {
		.mask = .mask << 1
	}
	.index++
}

// clobberdeadPtr is a special value that is used by the compiler to
// clobber dead stack slots, when -clobberdead flag is set.
const clobberdeadPtr = uintptr(0xdeaddead | 0xdeaddead<<((^uintptr(0)>>63)*32))

// badPointer throws bad pointer in heap panic.
func ( *mspan, , ,  uintptr) {
	// Typically this indicates an incorrect use
	// of unsafe or cgo to store a bad pointer in
	// the Go heap. It may also indicate a runtime
	// bug.
	//
	// TODO(austin): We could be more aggressive
	// and detect pointers to unallocated objects
	// in allocated spans.
	printlock()
	print("runtime: pointer ", hex())
	if  != nil {
		 := .state.get()
		if  != mSpanInUse {
			print(" to unallocated span")
		} else {
			print(" to unused region of span")
		}
		print(" span.base()=", hex(.base()), " span.limit=", hex(.limit), " span.state=", )
	}
	print("\n")
	if  != 0 {
		print("runtime: found in object at *(", hex(), "+", hex(), ")\n")
		gcDumpObject("object", , )
	}
	getg().m.traceback = 2
	throw("found bad pointer in Go heap (incorrect use of unsafe or cgo?)")
}

// findObject returns the base address for the heap object containing
// the address p, the object's span, and the index of the object in s.
// If p does not point into a heap object, it returns base == 0.
//
// If p points is an invalid heap pointer and debug.invalidptr != 0,
// findObject panics.
//
// refBase and refOff optionally give the base address of the object
// in which the pointer p was found and the byte offset at which it
// was found. These are used for error reporting.
//
// It is nosplit so it is safe for p to be a pointer to the current goroutine's stack.
// Since p is a uintptr, it would not be adjusted if the stack were to move.
//
//go:nosplit
func (, ,  uintptr) ( uintptr,  *mspan,  uintptr) {
	 = spanOf()
	// If s is nil, the virtual address has never been part of the heap.
	// This pointer may be to some mmap'd region, so we allow it.
	if  == nil {
		if (GOARCH == "amd64" || GOARCH == "arm64") &&  == clobberdeadPtr && debug.invalidptr != 0 {
			// Crash if clobberdeadPtr is seen. Only on AMD64 and ARM64 for now,
			// as they are the only platform where compiler's clobberdead mode is
			// implemented. On these platforms clobberdeadPtr cannot be a valid address.
			badPointer(, , , )
		}
		return
	}
	// If p is a bad pointer, it may not be in s's bounds.
	//
	// Check s.state to synchronize with span initialization
	// before checking other fields. See also spanOfHeap.
	if  := .state.get();  != mSpanInUse ||  < .base() ||  >= .limit {
		// Pointers into stacks are also ok, the runtime manages these explicitly.
		if  == mSpanManual {
			return
		}
		// The following ensures that we are rigorous about what data
		// structures hold valid pointers.
		if debug.invalidptr != 0 {
			badPointer(, , , )
		}
		return
	}

	 = .objIndex()
	 = .base() + *.elemsize
	return
}

// reflect_verifyNotInHeapPtr reports whether converting the not-in-heap pointer into a unsafe.Pointer is ok.
//
//go:linkname reflect_verifyNotInHeapPtr reflect.verifyNotInHeapPtr
func ( uintptr) bool {
	// Conversion to a pointer is ok as long as findObject above does not call badPointer.
	// Since we're already promised that p doesn't point into the heap, just disallow heap
	// pointers and the special clobbered pointer.
	return spanOf() == nil &&  != clobberdeadPtr
}

const ptrBits = 8 * goarch.PtrSize

// heapBits provides access to the bitmap bits for a single heap word.
// The methods on heapBits take value receivers so that the compiler
// can more easily inline calls to those methods and registerize the
// struct fields independently.
type heapBits struct {
	// heapBits will report on pointers in the range [addr,addr+size).
	// The low bit of mask contains the pointerness of the word at addr
	// (assuming valid>0).
	addr, size uintptr

	// The next few pointer bits representing words starting at addr.
	// Those bits already returned by next() are zeroed.
	mask uintptr
	// Number of bits in mask that are valid. mask is always less than 1<<valid.
	valid uintptr
}

// heapBitsForAddr returns the heapBits for the address addr.
// The caller must ensure [addr,addr+size) is in an allocated span.
// In particular, be careful not to point past the end of an object.
//
// nosplit because it is used during write barriers and must not be preempted.
//
//go:nosplit
func (,  uintptr) heapBits {
	// Find arena
	 := arenaIndex()
	 := mheap_.arenas[.l1()][.l2()]

	// Word index in arena.
	 :=  / goarch.PtrSize % heapArenaWords

	// Word index and bit offset in bitmap array.
	 :=  / ptrBits
	 :=  % ptrBits

	// Grab relevant bits of bitmap.
	 := .bitmap[] >> 
	 := ptrBits - 

	// Process depending on where the object ends.
	 :=  / goarch.PtrSize
	if  <  {
		// Bits for this object end before the end of this bitmap word.
		// Squash bits for the following objects.
		 &= 1<<(&(ptrBits-1)) - 1
		 = 
	} else if  ==  {
		// Bits for this object end at exactly the end of this bitmap word.
		// All good.
	} else {
		// Bits for this object extend into the next bitmap word. See if there
		// may be any pointers recorded there.
		if uintptr(.noMorePtrs[/8])>>(%8)&1 != 0 {
			// No more pointers in this object after this bitmap word.
			// Update size so we know not to look there.
			 =  * goarch.PtrSize
		}
	}

	return heapBits{addr: , size: , mask: , valid: }
}

// Returns the (absolute) address of the next known pointer and
// a heapBits iterator representing any remaining pointers.
// If there are no more pointers, returns address 0.
// Note that next does not modify h. The caller must record the result.
//
// nosplit because it is used during write barriers and must not be preempted.
//
//go:nosplit
func ( heapBits) () (heapBits, uintptr) {
	for {
		if .mask != 0 {
			var  int
			if goarch.PtrSize == 8 {
				 = sys.TrailingZeros64(uint64(.mask))
			} else {
				 = sys.TrailingZeros32(uint32(.mask))
			}
			.mask ^= uintptr(1) << ( & (ptrBits - 1))
			return , .addr + uintptr()*goarch.PtrSize
		}

		// Skip words that we've already processed.
		.addr += .valid * goarch.PtrSize
		.size -= .valid * goarch.PtrSize
		if .size == 0 {
			return , 0 // no more pointers
		}

		// Grab more bits and try again.
		 = heapBitsForAddr(.addr, .size)
	}
}

// nextFast is like next, but can return 0 even when there are more pointers
// to be found. Callers should call next if nextFast returns 0 as its second
// return value.
//
//	if addr, h = h.nextFast(); addr == 0 {
//	    if addr, h = h.next(); addr == 0 {
//	        ... no more pointers ...
//	    }
//	}
//	... process pointer at addr ...
//
// nextFast is designed to be inlineable.
//
//go:nosplit
func ( heapBits) () (heapBits, uintptr) {
	// TESTQ/JEQ
	if .mask == 0 {
		return , 0
	}
	// BSFQ
	var  int
	if goarch.PtrSize == 8 {
		 = sys.TrailingZeros64(uint64(.mask))
	} else {
		 = sys.TrailingZeros32(uint32(.mask))
	}
	// BTCQ
	.mask ^= uintptr(1) << ( & (ptrBits - 1))
	// LEAQ (XX)(XX*8)
	return , .addr + uintptr()*goarch.PtrSize
}

// bulkBarrierPreWrite executes a write barrier
// for every pointer slot in the memory range [src, src+size),
// using pointer/scalar information from [dst, dst+size).
// This executes the write barriers necessary before a memmove.
// src, dst, and size must be pointer-aligned.
// The range [dst, dst+size) must lie within a single object.
// It does not perform the actual writes.
//
// As a special case, src == 0 indicates that this is being used for a
// memclr. bulkBarrierPreWrite will pass 0 for the src of each write
// barrier.
//
// Callers should call bulkBarrierPreWrite immediately before
// calling memmove(dst, src, size). This function is marked nosplit
// to avoid being preempted; the GC must not stop the goroutine
// between the memmove and the execution of the barriers.
// The caller is also responsible for cgo pointer checks if this
// may be writing Go pointers into non-Go memory.
//
// The pointer bitmap is not maintained for allocations containing
// no pointers at all; any caller of bulkBarrierPreWrite must first
// make sure the underlying allocation contains pointers, usually
// by checking typ.PtrBytes.
//
// Callers must perform cgo checks if goexperiment.CgoCheck2.
//
//go:nosplit
func (, ,  uintptr) {
	if (||)&(goarch.PtrSize-1) != 0 {
		throw("bulkBarrierPreWrite: unaligned arguments")
	}
	if !writeBarrier.needed {
		return
	}
	if  := spanOf();  == nil {
		// If dst is a global, use the data or BSS bitmaps to
		// execute write barriers.
		for ,  := range activeModules() {
			if .data <=  &&  < .edata {
				bulkBarrierBitmap(, , , -.data, .gcdatamask.bytedata)
				return
			}
		}
		for ,  := range activeModules() {
			if .bss <=  &&  < .ebss {
				bulkBarrierBitmap(, , , -.bss, .gcbssmask.bytedata)
				return
			}
		}
		return
	} else if .state.get() != mSpanInUse ||  < .base() || .limit <=  {
		// dst was heap memory at some point, but isn't now.
		// It can't be a global. It must be either our stack,
		// or in the case of direct channel sends, it could be
		// another stack. Either way, no need for barriers.
		// This will also catch if dst is in a freed span,
		// though that should never have.
		return
	}

	 := &getg().m.p.ptr().wbBuf
	 := heapBitsForAddr(, )
	if  == 0 {
		for {
			var  uintptr
			if ,  = .next();  == 0 {
				break
			}
			 := (*uintptr)(unsafe.Pointer())
			 := .get1()
			[0] = *
		}
	} else {
		for {
			var  uintptr
			if ,  = .next();  == 0 {
				break
			}
			 := (*uintptr)(unsafe.Pointer())
			 := (*uintptr)(unsafe.Pointer( + ( - )))
			 := .get2()
			[0] = *
			[1] = *
		}
	}
}

// bulkBarrierPreWriteSrcOnly is like bulkBarrierPreWrite but
// does not execute write barriers for [dst, dst+size).
//
// In addition to the requirements of bulkBarrierPreWrite
// callers need to ensure [dst, dst+size) is zeroed.
//
// This is used for special cases where e.g. dst was just
// created and zeroed with malloc.
//
//go:nosplit
func (, ,  uintptr) {
	if (||)&(goarch.PtrSize-1) != 0 {
		throw("bulkBarrierPreWrite: unaligned arguments")
	}
	if !writeBarrier.needed {
		return
	}
	 := &getg().m.p.ptr().wbBuf
	 := heapBitsForAddr(, )
	for {
		var  uintptr
		if ,  = .next();  == 0 {
			break
		}
		 := (*uintptr)(unsafe.Pointer( -  + ))
		 := .get1()
		[0] = *
	}
}

// bulkBarrierBitmap executes write barriers for copying from [src,
// src+size) to [dst, dst+size) using a 1-bit pointer bitmap. src is
// assumed to start maskOffset bytes into the data covered by the
// bitmap in bits (which may not be a multiple of 8).
//
// This is used by bulkBarrierPreWrite for writes to data and BSS.
//
//go:nosplit
func (, , ,  uintptr,  *uint8) {
	 :=  / goarch.PtrSize
	 = addb(, /8)
	 := uint8(1) << ( % 8)

	 := &getg().m.p.ptr().wbBuf
	for  := uintptr(0);  < ;  += goarch.PtrSize {
		if  == 0 {
			 = addb(, 1)
			if * == 0 {
				// Skip 8 words.
				 += 7 * goarch.PtrSize
				continue
			}
			 = 1
		}
		if *& != 0 {
			 := (*uintptr)(unsafe.Pointer( + ))
			if  == 0 {
				 := .get1()
				[0] = *
			} else {
				 := (*uintptr)(unsafe.Pointer( + ))
				 := .get2()
				[0] = *
				[1] = *
			}
		}
		 <<= 1
	}
}

// typeBitsBulkBarrier executes a write barrier for every
// pointer that would be copied from [src, src+size) to [dst,
// dst+size) by a memmove using the type bitmap to locate those
// pointer slots.
//
// The type typ must correspond exactly to [src, src+size) and [dst, dst+size).
// dst, src, and size must be pointer-aligned.
// The type typ must have a plain bitmap, not a GC program.
// The only use of this function is in channel sends, and the
// 64 kB channel element limit takes care of this for us.
//
// Must not be preempted because it typically runs right before memmove,
// and the GC must observe them as an atomic action.
//
// Callers must perform cgo checks if goexperiment.CgoCheck2.
//
//go:nosplit
func ( *_type, , ,  uintptr) {
	if  == nil {
		throw("runtime: typeBitsBulkBarrier without type")
	}
	if .Size_ !=  {
		println("runtime: typeBitsBulkBarrier with type ", toRType().string(), " of size ", .Size_, " but memory size", )
		throw("runtime: invalid typeBitsBulkBarrier")
	}
	if .Kind_&kindGCProg != 0 {
		println("runtime: typeBitsBulkBarrier with type ", toRType().string(), " with GC prog")
		throw("runtime: invalid typeBitsBulkBarrier")
	}
	if !writeBarrier.needed {
		return
	}
	 := .GCData
	 := &getg().m.p.ptr().wbBuf
	var  uint32
	for  := uintptr(0);  < .PtrBytes;  += goarch.PtrSize {
		if &(goarch.PtrSize*8-1) == 0 {
			 = uint32(*)
			 = addb(, 1)
		} else {
			 =  >> 1
		}
		if &1 != 0 {
			 := (*uintptr)(unsafe.Pointer( + ))
			 := (*uintptr)(unsafe.Pointer( + ))
			 := .get2()
			[0] = *
			[1] = *
		}
	}
}

// initHeapBits initializes the heap bitmap for a span.
// If this is a span of single pointer allocations, it initializes all
// words to pointer. If force is true, clears all bits.
func ( *mspan) ( bool) {
	if  || .spanclass.noscan() {
		// Set all the pointer bits to zero. We do this once
		// when the span is allocated so we don't have to do it
		// for each object allocation.
		 := .base()
		 := .npages * pageSize
		 := writeHeapBitsForAddr()
		.flush(, )
		return
	}
	 := goarch.PtrSize == 8 && .elemsize == goarch.PtrSize
	if ! {
		return // nothing to do
	}
	 := writeHeapBitsForAddr(.base())
	 := .npages * pageSize
	 :=  / goarch.PtrSize
	for  := uintptr(0);  < ;  += ptrBits {
		 = .write(^uintptr(0), ptrBits)
	}
	.flush(.base(), )
}

// countAlloc returns the number of objects allocated in span s by
// scanning the allocation bitmap.
func ( *mspan) () int {
	 := 0
	 := divRoundUp(.nelems, 8)
	// Iterate over each 8-byte chunk and count allocations
	// with an intrinsic. Note that newMarkBits guarantees that
	// gcmarkBits will be 8-byte aligned, so we don't have to
	// worry about edge cases, irrelevant bits will simply be zero.
	for  := uintptr(0);  < ;  += 8 {
		// Extract 64 bits from the byte pointer and get a OnesCount.
		// Note that the unsafe cast here doesn't preserve endianness,
		// but that's OK. We only care about how many bits are 1, not
		// about the order we discover them in.
		 := *(*uint64)(unsafe.Pointer(.gcmarkBits.bytep()))
		 += sys.OnesCount64()
	}
	return 
}

type writeHeapBits struct {
	addr  uintptr // address that the low bit of mask represents the pointer state of.
	mask  uintptr // some pointer bits starting at the address addr.
	valid uintptr // number of bits in buf that are valid (including low)
	low   uintptr // number of low-order bits to not overwrite
}

func ( uintptr) ( writeHeapBits) {
	// We start writing bits maybe in the middle of a heap bitmap word.
	// Remember how many bits into the word we started, so we can be sure
	// not to overwrite the previous bits.
	.low =  / goarch.PtrSize % ptrBits

	// round down to heap word that starts the bitmap word.
	.addr =  - .low*goarch.PtrSize

	// We don't have any bits yet.
	.mask = 0
	.valid = .low

	return
}

// write appends the pointerness of the next valid pointer slots
// using the low valid bits of bits. 1=pointer, 0=scalar.
func ( writeHeapBits) (,  uintptr) writeHeapBits {
	if .valid+ <= ptrBits {
		// Fast path - just accumulate the bits.
		.mask |=  << .valid
		.valid += 
		return 
	}
	// Too many bits to fit in this word. Write the current word
	// out and move on to the next word.

	 := .mask | <<.valid       // mask for this word
	.mask =  >> (ptrBits - .valid) // leftover for next word
	.valid +=  - ptrBits           // have h.valid+valid bits, writing ptrBits of them

	// Flush mask to the memory bitmap.
	// TODO: figure out how to cache arena lookup.
	 := arenaIndex(.addr)
	 := mheap_.arenas[.l1()][.l2()]
	 := .addr / (ptrBits * goarch.PtrSize) % heapArenaBitmapWords
	 := uintptr(1)<<.low - 1
	.bitmap[] = .bitmap[]& | 
	// Note: no synchronization required for this write because
	// the allocator has exclusive access to the page, and the bitmap
	// entries are all for a single page. Also, visibility of these
	// writes is guaranteed by the publication barrier in mallocgc.

	// Clear noMorePtrs bit, since we're going to be writing bits
	// into the following word.
	.noMorePtrs[/8] &^= uint8(1) << ( % 8)
	// Note: same as above

	// Move to next word of bitmap.
	.addr += ptrBits * goarch.PtrSize
	.low = 0
	return 
}

// Add padding of size bytes.
func ( writeHeapBits) ( uintptr) writeHeapBits {
	if  == 0 {
		return 
	}
	 :=  / goarch.PtrSize
	for  > ptrBits {
		 = .write(0, ptrBits)
		 -= ptrBits
	}
	return .write(0, )
}

// Flush the bits that have been written, and add zeros as needed
// to cover the full object [addr, addr+size).
func ( writeHeapBits) (,  uintptr) {
	// zeros counts the number of bits needed to represent the object minus the
	// number of bits we've already written. This is the number of 0 bits
	// that need to be added.
	 := (+-.addr)/goarch.PtrSize - .valid

	// Add zero bits up to the bitmap word boundary
	if  > 0 {
		 := ptrBits - .valid
		if  >  {
			 = 
		}
		.valid += 
		 -= 
	}

	// Find word in bitmap that we're going to write.
	 := arenaIndex(.addr)
	 := mheap_.arenas[.l1()][.l2()]
	 := .addr / (ptrBits * goarch.PtrSize) % heapArenaBitmapWords

	// Write remaining bits.
	if .valid != .low {
		 := uintptr(1)<<.low - 1      // don't clear existing bits below "low"
		 |= ^(uintptr(1)<<.valid - 1) // don't clear existing bits above "valid"
		.bitmap[] = .bitmap[]& | .mask
	}
	if  == 0 {
		return
	}

	// Record in the noMorePtrs map that there won't be any more 1 bits,
	// so readers can stop early.
	.noMorePtrs[/8] |= uint8(1) << ( % 8)

	// Advance to next bitmap word.
	.addr += ptrBits * goarch.PtrSize

	// Continue on writing zeros for the rest of the object.
	// For standard use of the ptr bits this is not required, as
	// the bits are read from the beginning of the object. Some uses,
	// like noscan spans, oblets, bulk write barriers, and cgocheck, might
	// start mid-object, so these writes are still required.
	for {
		// Write zero bits.
		 := arenaIndex(.addr)
		 := mheap_.arenas[.l1()][.l2()]
		 := .addr / (ptrBits * goarch.PtrSize) % heapArenaBitmapWords
		if  < ptrBits {
			.bitmap[] &^= uintptr(1)<< - 1
			break
		} else if  == ptrBits {
			.bitmap[] = 0
			break
		} else {
			.bitmap[] = 0
			 -= ptrBits
		}
		.noMorePtrs[/8] |= uint8(1) << ( % 8)
		.addr += ptrBits * goarch.PtrSize
	}
}

// Read the bytes starting at the aligned pointer p into a uintptr.
// Read is little-endian.
func ( *byte) uintptr {
	 := *(*uintptr)(unsafe.Pointer())
	if goarch.BigEndian {
		if goarch.PtrSize == 8 {
			return uintptr(sys.Bswap64(uint64()))
		}
		return uintptr(sys.Bswap32(uint32()))
	}
	return 
}

// heapBitsSetType records that the new allocation [x, x+size)
// holds in [x, x+dataSize) one or more values of type typ.
// (The number of values is given by dataSize / typ.Size.)
// If dataSize < size, the fragment [x+dataSize, x+size) is
// recorded as non-pointer data.
// It is known that the type has pointers somewhere;
// malloc does not call heapBitsSetType when there are no pointers,
// because all free objects are marked as noscan during
// heapBitsSweepSpan.
//
// There can only be one allocation from a given span active at a time,
// and the bitmap for a span always falls on word boundaries,
// so there are no write-write races for access to the heap bitmap.
// Hence, heapBitsSetType can access the bitmap without atomics.
//
// There can be read-write races between heapBitsSetType and things
// that read the heap bitmap like scanobject. However, since
// heapBitsSetType is only used for objects that have not yet been
// made reachable, readers will ignore bits being modified by this
// function. This does mean this function cannot transiently modify
// bits that belong to neighboring objects. Also, on weakly-ordered
// machines, callers must execute a store/store (publication) barrier
// between calling this function and making the object reachable.
func (, ,  uintptr,  *_type) {
	const  = false // slow but helpful; enable to test modifications to this code

	if  && %.Size_ != 0 {
		throw("heapBitsSetType: dataSize not a multiple of typ.Size")
	}

	if goarch.PtrSize == 8 &&  == goarch.PtrSize {
		// It's one word and it has pointers, it must be a pointer.
		// Since all allocated one-word objects are pointers
		// (non-pointers are aggregated into tinySize allocations),
		// (*mspan).initHeapBits sets the pointer bits for us.
		// Nothing to do here.
		if  {
			,  := heapBitsForAddr(, ).next()
			if  !=  {
				throw("heapBitsSetType: pointer bit missing")
			}
			_,  = .next()
			if  != 0 {
				throw("heapBitsSetType: second pointer bit found")
			}
		}
		return
	}

	 := writeHeapBitsForAddr()

	// Handle GC program.
	if .Kind_&kindGCProg != 0 {
		// Expand the gc program into the storage we're going to use for the actual object.
		 := (*uint8)(unsafe.Pointer())
		 := runGCProg(addb(.GCData, 4), )
		// Use the expanded program to set the heap bits.
		for  := uintptr(0); true;  += .Size_ {
			// Copy expanded program to heap bitmap.
			 := 
			 := 
			for  > 8 {
				 = .write(uintptr(*), 8)
				 = add1()
				 -= 8
			}
			 = .write(uintptr(*), )

			if +.Size_ ==  {
				break // no padding after last element
			}

			// Pad with zeros to the start of the next element.
			 = .pad(.Size_ - *goarch.PtrSize)
		}

		.flush(, )

		// Erase the expanded GC program.
		memclrNoHeapPointers(unsafe.Pointer(), (+7)/8)
		return
	}

	// Note about sizes:
	//
	// typ.Size is the number of words in the object,
	// and typ.PtrBytes is the number of words in the prefix
	// of the object that contains pointers. That is, the final
	// typ.Size - typ.PtrBytes words contain no pointers.
	// This allows optimization of a common pattern where
	// an object has a small header followed by a large scalar
	// buffer. If we know the pointers are over, we don't have
	// to scan the buffer's heap bitmap at all.
	// The 1-bit ptrmasks are sized to contain only bits for
	// the typ.PtrBytes prefix, zero padded out to a full byte
	// of bitmap. If there is more room in the allocated object,
	// that space is pointerless. The noMorePtrs bitmap will prevent
	// scanning large pointerless tails of an object.
	//
	// Replicated copies are not as nice: if there is an array of
	// objects with scalar tails, all but the last tail does have to
	// be initialized, because there is no way to say "skip forward".

	 := .PtrBytes / goarch.PtrSize
	if .Size_ ==  { // Single element
		if  <= ptrBits { // Single small element
			 := readUintptr(.GCData)
			 = .write(, )
		} else { // Single large element
			 := .GCData
			for {
				 = .write(readUintptr(), ptrBits)
				 = addb(, ptrBits/8)
				 -= ptrBits
				if  <= ptrBits {
					break
				}
			}
			 := readUintptr()
			 = .write(, )
		}
	} else { // Repeated element
		 := .Size_ / goarch.PtrSize // total words, including scalar tail
		if  <= ptrBits {               // Repeated small element
			 :=  / .Size_
			 := readUintptr(.GCData)
			// Make larger unit to repeat
			for  <= ptrBits/2 {
				if &1 != 0 {
					 = .write(, )
				}
				 /= 2
				 |=  << 
				 += 
				 *= 2
				if  == 1 {
					break
				}
			}
			for  > 1 {
				 = .write(, )
				--
			}
			 = .write(, )
		} else { // Repeated large element
			for  := uintptr(0); true;  += .Size_ {
				 := .GCData
				 := 
				for  > ptrBits {
					 = .write(readUintptr(), ptrBits)
					 = addb(, ptrBits/8)
					 -= ptrBits
				}
				 := readUintptr()
				 = .write(, )
				if +.Size_ ==  {
					break // don't need the trailing nonptr bits on the last element.
				}
				// Pad with zeros to the start of the next element.
				 = .pad(.Size_ - .PtrBytes)
			}
		}
	}
	.flush(, )

	if  {
		 := heapBitsForAddr(, )
		for  := uintptr(0);  < ;  += goarch.PtrSize {
			// Compute the pointer bit we want at offset i.
			 := false
			if  <  {
				 :=  % .Size_
				if  < .PtrBytes {
					 :=  / goarch.PtrSize
					 = *addb(.GCData, /8)>>(%8)&1 != 0
				}
			}
			if  {
				var  uintptr
				,  = .next()
				if  != + {
					throw("heapBitsSetType: pointer entry not correct")
				}
			}
		}
		if ,  := .next();  != 0 {
			throw("heapBitsSetType: extra pointer")
		}
	}
}

var debugPtrmask struct {
	lock mutex
	data *byte
}

// progToPointerMask returns the 1-bit pointer mask output by the GC program prog.
// size the size of the region described by prog, in bytes.
// The resulting bitvector will have no more than size/goarch.PtrSize bits.
func ( *byte,  uintptr) bitvector {
	 := (/goarch.PtrSize + 7) / 8
	 := (*[1 << 30]byte)(persistentalloc(+1, 1, &memstats.buckhash_sys))[:+1]
	[len()-1] = 0xa1 // overflow check sentinel
	 = runGCProg(, &[0])
	if [len()-1] != 0xa1 {
		throw("progToPointerMask: overflow")
	}
	return bitvector{int32(), &[0]}
}

// Packed GC pointer bitmaps, aka GC programs.
//
// For large types containing arrays, the type information has a
// natural repetition that can be encoded to save space in the
// binary and in the memory representation of the type information.
//
// The encoding is a simple Lempel-Ziv style bytecode machine
// with the following instructions:
//
//	00000000: stop
//	0nnnnnnn: emit n bits copied from the next (n+7)/8 bytes
//	10000000 n c: repeat the previous n bits c times; n, c are varints
//	1nnnnnnn c: repeat the previous n bits c times; c is a varint

// runGCProg returns the number of 1-bit entries written to memory.
func (,  *byte) uintptr {
	 := 

	// Bits waiting to be written to memory.
	var  uintptr
	var  uintptr

	 := 
:
	for {
		// Flush accumulated full bytes.
		// The rest of the loop assumes that nbits <= 7.
		for ;  >= 8;  -= 8 {
			* = uint8()
			 = add1()
			 >>= 8
		}

		// Process one instruction.
		 := uintptr(*)
		 = add1()
		 :=  & 0x7F
		if &0x80 == 0 {
			// Literal bits; n == 0 means end of program.
			if  == 0 {
				// Program is over.
				break 
			}
			 :=  / 8
			for  := uintptr(0);  < ; ++ {
				 |= uintptr(*) << 
				 = add1()
				* = uint8()
				 = add1()
				 >>= 8
			}
			if  %= 8;  > 0 {
				 |= uintptr(*) << 
				 = add1()
				 += 
			}
			continue 
		}

		// Repeat. If n == 0, it is encoded in a varint in the next bytes.
		if  == 0 {
			for  := uint(0); ;  += 7 {
				 := uintptr(*)
				 = add1()
				 |= ( & 0x7F) << 
				if &0x80 == 0 {
					break
				}
			}
		}

		// Count is encoded in a varint in the next bytes.
		 := uintptr(0)
		for  := uint(0); ;  += 7 {
			 := uintptr(*)
			 = add1()
			 |= ( & 0x7F) << 
			if &0x80 == 0 {
				break
			}
		}
		 *=  // now total number of bits to copy

		// If the number of bits being repeated is small, load them
		// into a register and use that register for the entire loop
		// instead of repeatedly reading from memory.
		// Handling fewer than 8 bits here makes the general loop simpler.
		// The cutoff is goarch.PtrSize*8 - 7 to guarantee that when we add
		// the pattern to a bit buffer holding at most 7 bits (a partial byte)
		// it will not overflow.
		 := 
		const  = goarch.PtrSize*8 - 7
		if  <=  {
			// Start with bits in output buffer.
			 := 
			 := 

			// If we need more bits, fetch them from memory.
			 = subtract1()
			for  <  {
				 <<= 8
				 |= uintptr(*)
				 = subtract1()
				 += 8
			}

			// We started with the whole bit output buffer,
			// and then we loaded bits from whole bytes.
			// Either way, we might now have too many instead of too few.
			// Discard the extra.
			if  >  {
				 >>=  - 
				 = 
			}

			// Replicate pattern to at most maxBits.
			if  == 1 {
				// One bit being repeated.
				// If the bit is 1, make the pattern all 1s.
				// If the bit is 0, the pattern is already all 0s,
				// but we can claim that the number of bits
				// in the word is equal to the number we need (c),
				// because right shift of bits will zero fill.
				if  == 1 {
					 = 1<< - 1
					 = 
				} else {
					 = 
				}
			} else {
				 := 
				 := 
				if + <=  {
					// Double pattern until the whole uintptr is filled.
					for  <= goarch.PtrSize*8 {
						 |=  << 
						 += 
					}
					// Trim away incomplete copy of original pattern in high bits.
					// TODO(rsc): Replace with table lookup or loop on systems without divide?
					 =  /  * 
					 &= 1<< - 1
					 = 
					 = 
				}
			}

			// Add pattern to bit buffer and flush bit buffer, c/npattern times.
			// Since pattern contains >8 bits, there will be full bytes to flush
			// on each iteration.
			for ;  >= ;  -=  {
				 |=  << 
				 += 
				for  >= 8 {
					* = uint8()
					 = add1()
					 >>= 8
					 -= 8
				}
			}

			// Add final fragment to bit buffer.
			if  > 0 {
				 &= 1<< - 1
				 |=  << 
				 += 
			}
			continue 
		}

		// Repeat; n too large to fit in a register.
		// Since nbits <= 7, we know the first few bytes of repeated data
		// are already written to memory.
		 :=  -  // n > nbits because n > maxBits and nbits <= 7
		// Leading src fragment.
		 = subtractb(, (+7)/8)
		if  :=  & 7;  != 0 {
			 |= uintptr(*) >> (8 - ) << 
			 = add1()
			 += 
			 -= 
		}
		// Main loop: load one byte, write another.
		// The bits are rotating through the bit buffer.
		for  :=  / 8;  > 0; -- {
			 |= uintptr(*) << 
			 = add1()
			* = uint8()
			 = add1()
			 >>= 8
		}
		// Final src fragment.
		if  %= 8;  > 0 {
			 |= (uintptr(*) & (1<< - 1)) << 
			 += 
		}
	}

	// Write any final bits out, using full-byte writes, even for the final byte.
	 := (uintptr(unsafe.Pointer())-uintptr(unsafe.Pointer()))*8 + 
	 += - & 7
	for ;  > 0;  -= 8 {
		* = uint8()
		 = add1()
		 >>= 8
	}
	return 
}

// materializeGCProg allocates space for the (1-bit) pointer bitmask
// for an object of size ptrdata.  Then it fills that space with the
// pointer bitmask specified by the program prog.
// The bitmask starts at s.startAddr.
// The result must be deallocated with dematerializeGCProg.
func ( uintptr,  *byte) *mspan {
	// Each word of ptrdata needs one bit in the bitmap.
	 := divRoundUp(, 8*goarch.PtrSize)
	// Compute the number of pages needed for bitmapBytes.
	 := divRoundUp(, pageSize)
	 := mheap_.allocManual(, spanAllocPtrScalarBits)
	runGCProg(addb(, 4), (*byte)(unsafe.Pointer(.startAddr)))
	return 
}
func ( *mspan) {
	mheap_.freeManual(, spanAllocPtrScalarBits)
}

func ( *byte) {
	 := 0
	for {
		 := *
		 = add1()
		if  == 0 {
			print("\t", , " end\n")
			break
		}
		if &0x80 == 0 {
			print("\t", , " lit ", , ":")
			 := int(+7) / 8
			for  := 0;  < ; ++ {
				print(" ", hex(*))
				 = add1()
			}
			print("\n")
			 += int()
		} else {
			 := int( &^ 0x80)
			if  == 0 {
				for  := uint(0); ;  += 7 {
					 := *
					 = add1()
					 |= int(&0x7f) << 
					if &0x80 == 0 {
						break
					}
				}
			}
			 := 0
			for  := uint(0); ;  += 7 {
				 := *
				 = add1()
				 |= int(&0x7f) << 
				if &0x80 == 0 {
					break
				}
			}
			print("\t", , " repeat ", , " × ", , "\n")
			 +=  * 
		}
	}
}

// Testing.

// reflect_gcbits returns the GC type info for x, for testing.
// The result is the bitmap entries (0 or 1), one entry per byte.
//
//go:linkname reflect_gcbits reflect.gcbits
func ( any) []byte {
	return getgcmask()
}

// Returns GC type info for the pointer stored in ep for testing.
// If ep points to the stack, only static live information will be returned
// (i.e. not for objects which are only dynamically live stack objects).
func ( any) ( []byte) {
	 := *efaceOf(&)
	 := .data
	 := ._type
	// data or bss
	for ,  := range activeModules() {
		// data
		if .data <= uintptr() && uintptr() < .edata {
			 := .gcdatamask.bytedata
			 := (*ptrtype)(unsafe.Pointer()).Elem.Size_
			 = make([]byte, /goarch.PtrSize)
			for  := uintptr(0);  < ;  += goarch.PtrSize {
				 := (uintptr() +  - .data) / goarch.PtrSize
				[/goarch.PtrSize] = (*addb(, /8) >> ( % 8)) & 1
			}
			return
		}

		// bss
		if .bss <= uintptr() && uintptr() < .ebss {
			 := .gcbssmask.bytedata
			 := (*ptrtype)(unsafe.Pointer()).Elem.Size_
			 = make([]byte, /goarch.PtrSize)
			for  := uintptr(0);  < ;  += goarch.PtrSize {
				 := (uintptr() +  - .bss) / goarch.PtrSize
				[/goarch.PtrSize] = (*addb(, /8) >> ( % 8)) & 1
			}
			return
		}
	}

	// heap
	if , ,  := findObject(uintptr(), 0, 0);  != 0 {
		if .spanclass.noscan() {
			return nil
		}
		 := .elemsize
		 := heapBitsForAddr(, )
		 = make([]byte, /goarch.PtrSize)
		for {
			var  uintptr
			if ,  = .next();  == 0 {
				break
			}
			[(-)/goarch.PtrSize] = 1
		}
		// Callers expect this mask to end at the last pointer.
		for len() > 0 && [len()-1] == 0 {
			 = [:len()-1]
		}
		return
	}

	// stack
	if  := getg(); .m.curg.stack.lo <= uintptr() && uintptr() < .m.curg.stack.hi {
		 := false
		var  unwinder
		for .initAt(.m.curg.sched.pc, .m.curg.sched.sp, 0, .m.curg, 0); .valid(); .next() {
			if .frame.sp <= uintptr() && uintptr() < .frame.varp {
				 = true
				break
			}
		}
		if  {
			, ,  := .frame.getStackMap(nil, false)
			if .n == 0 {
				return
			}
			 := uintptr(.n) * goarch.PtrSize
			 := (*ptrtype)(unsafe.Pointer()).Elem.Size_
			 = make([]byte, /goarch.PtrSize)
			for  := uintptr(0);  < ;  += goarch.PtrSize {
				 := (uintptr() +  - .frame.varp + ) / goarch.PtrSize
				[/goarch.PtrSize] = .ptrbit()
			}
		}
		return
	}

	// otherwise, not something the GC knows about.
	// possibly read-only data, like malloc(0).
	// must not have pointers
	return
}