package flate

import (
	
	
	
)

const (
	maxStatelessBlock = math.MaxInt16
	// dictionary will be taken from maxStatelessBlock, so limit it.
	maxStatelessDict = 8 << 10

	slTableBits  = 13
	slTableSize  = 1 << slTableBits
	slTableShift = 32 - slTableBits
)

type statelessWriter struct {
	dst    io.Writer
	closed bool
}

func ( *statelessWriter) () error {
	if .closed {
		return nil
	}
	.closed = true
	// Emit EOF block
	return StatelessDeflate(.dst, nil, true, nil)
}

func ( *statelessWriter) ( []byte) ( int,  error) {
	 = StatelessDeflate(.dst, , false, nil)
	if  != nil {
		return 0, 
	}
	return len(), nil
}

func ( *statelessWriter) ( io.Writer) {
	.dst = 
	.closed = false
}

// NewStatelessWriter will do compression but without maintaining any state
// between Write calls.
// There will be no memory kept between Write calls,
// but compression and speed will be suboptimal.
// Because of this, the size of actual Write calls will affect output size.
func ( io.Writer) io.WriteCloser {
	return &statelessWriter{dst: }
}

// bitWriterPool contains bit writers that can be reused.
var bitWriterPool = sync.Pool{
	New: func() interface{} {
		return newHuffmanBitWriter(nil)
	},
}

// StatelessDeflate allows compressing directly to a Writer without retaining state.
// When returning everything will be flushed.
// Up to 8KB of an optional dictionary can be given which is presumed to precede the block.
// Longer dictionaries will be truncated and will still produce valid output.
// Sending nil dictionary is perfectly fine.
func ( io.Writer,  []byte,  bool,  []byte) error {
	var  tokens
	 := bitWriterPool.Get().(*huffmanBitWriter)
	.reset()
	defer func() {
		// don't keep a reference to our output
		.reset(nil)
		bitWriterPool.Put()
	}()
	if  && len() == 0 {
		// Just write an EOF block.
		// Could be faster...
		.writeStoredHeader(0, true)
		.flush()
		return .err
	}

	// Truncate dict
	if len() > maxStatelessDict {
		 = [len()-maxStatelessDict:]
	}

	// For subsequent loops, keep shallow dict reference to avoid alloc+copy.
	var  []byte

	for len() > 0 {
		 := 
		if len() > 0 {
			if len() > maxStatelessBlock-maxStatelessDict {
				 = [:maxStatelessBlock-maxStatelessDict]
			}
		} else if len() > maxStatelessBlock-len() {
			 = [:maxStatelessBlock-len()]
		}
		 := 
		 = [len():]
		 := 
		if len() > 0 {
			// combine dict and source
			 := len() + len()
			 := make([]byte, )
			copy(, )
			copy([len():], )
			 = 
		}
		// Compress
		if len() == 0 {
			statelessEnc(&, , int16(len()))
		} else {
			statelessEnc(&, [:maxStatelessDict+len()], maxStatelessDict)
		}
		 :=  && len() == 0

		if .n == 0 {
			.writeStoredHeader(len(), )
			if .err != nil {
				return .err
			}
			.writeBytes()
		} else if int(.n) > len()-len()>>4 {
			// If we removed less than 1/16th, huffman compress the block.
			.writeBlockHuff(, , len() == 0)
		} else {
			.writeBlockDynamic(&, , , len() == 0)
		}
		if len() > 0 {
			// Retain a dict if we have more
			 = [len()-maxStatelessDict:]
			 = nil
			.Reset()
		}
		if .err != nil {
			return .err
		}
	}
	if ! {
		// Align, only a stored block can do that.
		.writeStoredHeader(0, false)
	}
	.flush()
	return .err
}

func ( uint32) uint32 {
	return ( * 0x1e35a7bd) >> slTableShift
}

func ( []byte,  int16) uint32 {
	// Help the compiler eliminate bounds checks on the read so it can be done in a single read.
	 = [:]
	 = [:4]
	return uint32([0]) | uint32([1])<<8 | uint32([2])<<16 | uint32([3])<<24
}

func ( []byte,  int16) uint64 {
	// Help the compiler eliminate bounds checks on the read so it can be done in a single read.
	 = [:]
	 = [:8]
	return uint64([0]) | uint64([1])<<8 | uint64([2])<<16 | uint64([3])<<24 |
		uint64([4])<<32 | uint64([5])<<40 | uint64([6])<<48 | uint64([7])<<56
}

func ( *tokens,  []byte,  int16) {
	const (
		            = 12 - 1
		 = 1 + 1 + 
	)

	type  struct {
		 int16
	}

	var  [slTableSize]

	// This check isn't in the Snappy implementation, but there, the caller
	// instead of the callee handles this case.
	if len()-int() <  {
		// We do not fill the token table.
		// This will be picked up by caller.
		.n = 0
		return
	}
	// Index until startAt
	if  > 0 {
		 := load3232(, 0)
		for  := int16(0);  < ; ++ {
			[hashSL()] = {: }
			 = ( >> 8) | (uint32([+4]) << 24)
		}
	}

	 :=  + 1
	 := 
	// sLimit is when to stop looking for offset/length copies. The inputMargin
	// lets us use a fast path for emitLiteral in the main loop, while we are
	// looking for copies.
	 := int16(len() - )

	// nextEmit is where in src the next emitLiteral should start from.
	 := load3216(, )

	for {
		const  = 5
		const  = 2

		 := 
		var  
		for {
			 := hashSL()
			 = []
			 =  +  + (-)>>
			if  >  ||  <= 0 {
				goto 
			}

			 := load6416(, )
			[] = {: }
			 = hashSL(uint32())

			if  == load3216(, .) {
				[] = {: }
				break
			}

			// Do one right away...
			 = uint32()
			 = 
			++
			 = []
			 >>= 8
			[] = {: }

			if  == load3216(, .) {
				[] = {: }
				break
			}
			 = uint32()
			 = 
		}

		// A 4-byte match has been found. We'll later see if more than 4 bytes
		// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
		// them as literal bytes.
		for {
			// Invariant: we have a 4-byte match at s, and no need to emit any
			// literal bytes prior to s.

			// Extend the 4-byte match as long as possible.
			 := .
			 := int16(matchLen([+4:], [+4:]) + 4)

			// Extend backwards
			for  > 0 &&  >  && [-1] == [-1] {
				--
				--
				++
			}
			if  <  {
				if false {
					emitLiteral(, [:])
				} else {
					for ,  := range [:] {
						.tokens[.n] = token()
						.litHist[]++
						.n++
					}
				}
			}

			// Save the match found
			.AddMatchLong(int32(), uint32(--baseMatchOffset))
			 += 
			 = 
			if  >=  {
				 =  + 1
			}
			if  >=  {
				goto 
			}

			// We could immediately start working at s now, but to improve
			// compression we first update the hash table at s-2 and at s. If
			// another emitCopy is not our next move, also calculate nextHash
			// at s+1. At least on GOARCH=amd64, these three hash calculations
			// are faster as one load64 call (with some shifts) instead of
			// three load32 calls.
			 := load6416(, -2)
			 :=  - 2
			 := hashSL(uint32())
			[] = {: }
			 >>= 16
			 := hashSL(uint32())
			 = []
			[] = {:  + 2}

			if uint32() != load3216(, .) {
				 = uint32( >> 8)
				++
				break
			}
		}
	}

:
	if int() < len() {
		// If nothing was added, don't encode literals.
		if .n == 0 {
			return
		}
		emitLiteral(, [:])
	}
}