Code Examples
package main
import (
"bytes"
"compress/flate"
"fmt"
"io"
"log"
"os"
"strings"
)
func main() {
// The dictionary is a string of bytes. When compressing some input data,
// the compressor will attempt to substitute substrings with matches found
// in the dictionary. As such, the dictionary should only contain substrings
// that are expected to be found in the actual data stream.
const dict = `<?xml version="1.0"?>` + `<book>` + `<data>` + `<meta name="` + `" content="`
// The data to compress should (but is not required to) contain frequent
// substrings that match those in the dictionary.
const data = `<?xml version="1.0"?>
<book>
<meta name="title" content="The Go Programming Language"/>
<meta name="authors" content="Alan Donovan and Brian Kernighan"/>
<meta name="published" content="2015-10-26"/>
<meta name="isbn" content="978-0134190440"/>
<data>...</data>
</book>
`
var b bytes.Buffer
// Compress the data using the specially crafted dictionary.
zw, err := flate.NewWriterDict(&b, flate.DefaultCompression, []byte(dict))
if err != nil {
log.Fatal(err)
}
if _, err := io.Copy(zw, strings.NewReader(data)); err != nil {
log.Fatal(err)
}
if err := zw.Close(); err != nil {
log.Fatal(err)
}
// The decompressor must use the same dictionary as the compressor.
// Otherwise, the input may appear as corrupted.
fmt.Println("Decompressed output using the dictionary:")
zr := flate.NewReaderDict(bytes.NewReader(b.Bytes()), []byte(dict))
if _, err := io.Copy(os.Stdout, zr); err != nil {
log.Fatal(err)
}
if err := zr.Close(); err != nil {
log.Fatal(err)
}
fmt.Println()
// Substitute all of the bytes in the dictionary with a '#' to visually
// demonstrate the approximate effectiveness of using a preset dictionary.
fmt.Println("Substrings matched by the dictionary are marked with #:")
hashDict := []byte(dict)
for i := range hashDict {
hashDict[i] = '#'
}
zr = flate.NewReaderDict(&b, hashDict)
if _, err := io.Copy(os.Stdout, zr); err != nil {
log.Fatal(err)
}
if err := zr.Close(); err != nil {
log.Fatal(err)
}
}
package main
import (
"bytes"
"compress/flate"
"io"
"log"
"os"
"strings"
)
func main() {
proverbs := []string{
"Don't communicate by sharing memory, share memory by communicating.\n",
"Concurrency is not parallelism.\n",
"The bigger the interface, the weaker the abstraction.\n",
"Documentation is for users.\n",
}
var r strings.Reader
var b bytes.Buffer
buf := make([]byte, 32<<10)
zw, err := flate.NewWriter(nil, flate.DefaultCompression)
if err != nil {
log.Fatal(err)
}
zr := flate.NewReader(nil)
for _, s := range proverbs {
r.Reset(s)
b.Reset()
// Reset the compressor and encode from some input stream.
zw.Reset(&b)
if _, err := io.CopyBuffer(zw, &r, buf); err != nil {
log.Fatal(err)
}
if err := zw.Close(); err != nil {
log.Fatal(err)
}
// Reset the decompressor and decode to some output stream.
if err := zr.(flate.Resetter).Reset(&b, nil); err != nil {
log.Fatal(err)
}
if _, err := io.CopyBuffer(os.Stdout, zr, buf); err != nil {
log.Fatal(err)
}
if err := zr.Close(); err != nil {
log.Fatal(err)
}
}
}
package main
import (
"compress/flate"
"fmt"
"io"
"log"
"strings"
"sync"
)
func main() {
var wg sync.WaitGroup
defer wg.Wait()
// Use io.Pipe to simulate a network connection.
// A real network application should take care to properly close the
// underlying connection.
rp, wp := io.Pipe()
// Start a goroutine to act as the transmitter.
wg.Add(1)
go func() {
defer wg.Done()
zw, err := flate.NewWriter(wp, flate.BestSpeed)
if err != nil {
log.Fatal(err)
}
b := make([]byte, 256)
for _, m := range strings.Fields("A long time ago in a galaxy far, far away...") {
// We use a simple framing format where the first byte is the
// message length, followed the message itself.
b[0] = uint8(copy(b[1:], m))
if _, err := zw.Write(b[:1+len(m)]); err != nil {
log.Fatal(err)
}
// Flush ensures that the receiver can read all data sent so far.
if err := zw.Flush(); err != nil {
log.Fatal(err)
}
}
if err := zw.Close(); err != nil {
log.Fatal(err)
}
}()
// Start a goroutine to act as the receiver.
wg.Add(1)
go func() {
defer wg.Done()
zr := flate.NewReader(rp)
b := make([]byte, 256)
for {
// Read the message length.
// This is guaranteed to return for every corresponding
// Flush and Close on the transmitter side.
if _, err := io.ReadFull(zr, b[:1]); err != nil {
if err == io.EOF {
break // The transmitter closed the stream
}
log.Fatal(err)
}
// Read the message content.
n := int(b[0])
if _, err := io.ReadFull(zr, b[:n]); err != nil {
log.Fatal(err)
}
fmt.Printf("Received %d bytes: %s\n", n, b[:n])
}
fmt.Println()
if err := zr.Close(); err != nil {
log.Fatal(err)
}
}()
}
Package-Level Type Names (total 23, in which 7 are exported)
/* sort exporteds by: | */
A CorruptInputError reports the presence of corrupt input at a given offset.( CorruptInputError) Error() string
CorruptInputError : error
An InternalError reports an error in the flate code itself.( InternalError) Error() string
InternalError : error
The actual read interface needed by NewReader.
If the passed in io.Reader does not also have ReadByte,
the NewReader will introduce its own buffering.( Reader) Read(p []byte) (n int, err error)( Reader) ReadByte() (byte, error)
*bufio.Reader
bufio.ReadWriter
*bytes.Buffer
*bytes.Reader
github.com/klauspost/compress/flate.Reader(interface)
*strings.Reader
*encoding/json.encodeState
math/big.byteReader
Reader : github.com/klauspost/compress/flate.Reader
Reader : io.ByteReader
Reader : io.Reader
A ReadError reports an error encountered while reading input.
Deprecated: No longer returned. // error returned by underlying Read // byte offset where error occurred(*ReadError) Error() string
*ReadError : error
Resetter resets a ReadCloser returned by NewReader or NewReaderDict
to switch to a new underlying Reader. This permits reusing a ReadCloser
instead of allocating a new one. Reset discards any buffered data and resets the Resetter as if it was
newly initialized with the given reader.
github.com/klauspost/compress/flate.Resetter(interface)
*decompressor
*github.com/klauspost/compress/flate.decompressor
Resetter : github.com/klauspost/compress/flate.Resetter
A WriteError reports an error encountered while writing output.
Deprecated: No longer returned. // error returned by underlying Read // byte offset where error occurred(*WriteError) Error() string
*WriteError : error
A Writer takes data written to it and writes the compressed
form of that data to an underlying writer (see NewWriter).dcompressordict[]byte Close flushes and closes the writer. Flush flushes any pending data to the underlying writer.
It is useful mainly in compressed network protocols, to ensure that
a remote reader has enough data to reconstruct a packet.
Flush does not return until the data has been written.
Calling Flush when there is no pending data still causes the Writer
to emit a sync marker of at least 4 bytes.
If the underlying writer returns an error, Flush returns that error.
In the terminology of the zlib library, Flush is equivalent to Z_SYNC_FLUSH. Reset discards the writer's state and makes it equivalent to
the result of NewWriter or NewWriterDict called with dst
and w's level and dictionary. Write writes data to w, which will eventually write the
compressed form of data to its underlying writer.
*Writer : internal/bisect.Writer
*Writer : io.Closer
*Writer : io.WriteCloser
*Writer : io.Writer
*Writer : crypto/tls.transcriptHash
func NewWriter(w io.Writer, level int) (*Writer, error)
func NewWriterDict(w io.Writer, level int, dict []byte) (*Writer, error)
func nhooyr.io/websocket.getFlateWriter(w io.Writer) *Writer
func nhooyr.io/websocket.putFlateWriter(w *Writer)
// Encoder for BestSpeed // window index where current tokens startbulkHasherfunc([]byte, []uint32) // if true, still need to process window[index-1]. Input hash chains
hashHead[hashValue] contains the largest inputIndex with the specified hash value
If hashHead[hashValue] is within the current window, then
hashPrev[hashHead[hashValue] & windowMask] contains the previous index
with the same hash value.compressionLevelcompressionLevelcompressionLevel.chainintcompressionLevel.fastSkipHashingintcompressionLevel.goodintcompressionLevel.lazyintcompressionLevel.levelintcompressionLevel.niceinterrerror compression algorithm // copy data to windowhashHead[131072]uint32 hashMatch must be able to contain hashes for the maximum match length.hashOffsetinthashPrev[32768]uint32 input window: unprocessed data is window[index:windowEnd] deflate statemaxInsertIndexintoffsetint // process window // requesting flush queued output tokensw*huffmanBitWriterwindow[]bytewindowEndint(*compressor) close() error(*compressor) deflate() encSpeed will compress and store the currently added data,
if enough has been accumulated or we at the end of the stream.
Any error that occurred will be in d.err(*compressor) fillDeflate(b []byte) int(*compressor) fillStore(b []byte) int fillWindow will fill the current window with the supplied
dictionary and calculate all hashes.
This is much faster than doing a full encode.
Should only be used after a reset. Try to find a match starting at index whose length is greater than prevSize.
We only look at chainCount possibilities before giving up.(*compressor) init(w io.Writer, level int) (err error)(*compressor) initDeflate()(*compressor) reset(w io.Writer)(*compressor) store() storeHuff compresses and stores the currently added data
when the d.window is full or we are at the end of the stream.
Any error that occurred will be in d.err(*compressor) syncFlush() error(*compressor) write(b []byte) (n int, err error)(*compressor) writeBlock(tokens []token, index int) error(*compressor) writeStoredBlock(buf []byte) error
Decompress state. Input bits, in top of b. Length arrays used to define Huffman codes. Temporary buffer (avoids repeated allocation).codebits*[19]intcopyDistintcopyLenint Output history, buffer.errerrorfinalbool Huffman decoders for literal/length, distance. Huffman decoders for literal/length, distance.hd*huffmanDecoderhl*huffmanDecodernbuint Input source. // created if provided io.Reader does not implement io.ByteReaderroffsetint64 Next step in the decompression,
and decompression state.stepStateinttoRead[]byte(*decompressor) Close() error(*decompressor) Read(b []byte) (int, error)(*decompressor) Reset(r io.Reader, dict []byte) error copyData copies f.copyLen bytes from the underlying reader into f.hist.
It pauses for reads when f.hist is full. Copy a single uncompressed data block from input to output.(*decompressor) finishBlock() Read the next Huffman-encoded symbol from f according to h. Decode a single Huffman block from f.
hl and hd are the Huffman states for the lit/length values
and the distance values, respectively. If hd == nil, using the
fixed distance encoding associated with fixed Huffman blocks.(*decompressor) makeReader(r io.Reader)(*decompressor) moreBits() error(*decompressor) nextBlock()(*decompressor) readHuffman() error
*decompressor : Resetter
*decompressor : github.com/klauspost/compress/flate.Resetter
*decompressor : io.Closer
*decompressor : io.ReadCloser
*decompressor : io.Reader
deflateFast maintains the table for matches,
and the previous byte block for cross block matching. // Current match offset. // Previous block, zero length if unknown.table[16384]tableEntry encode encodes a block given in src and appends tokens
to dst and returns the result. matchLen returns the match length between src[s:] and src[t:].
t can be negative to indicate the match is starting in e.prev.
We assume that src[s-4:s] and src[t-4:t] already match. Reset resets the encoding history.
This ensures that no matches are made to the previous block. shiftOffsets will shift down all match offset.
This is only called in rare situations to prevent integer overflow.
See https://golang.org/issue/18636 and https://github.com/golang/go/issues/34121.
func newDeflateFast() *deflateFast
dictDecoder implements the LZ77 sliding dictionary as used in decompression.
LZ77 decompresses data through sequences of two forms of commands:
- Literal insertions: Runs of one or more symbols are inserted into the data
stream as is. This is accomplished through the writeByte method for a
single symbol, or combinations of writeSlice/writeMark for multiple symbols.
Any valid stream must start with a literal insertion if no preset dictionary
is used.
- Backward copies: Runs of one or more symbols are copied from previously
emitted data. Backward copies come as the tuple (dist, length) where dist
determines how far back in the stream to copy from and length determines how
many bytes to copy. Note that it is valid for the length to be greater than
the distance. Since LZ77 uses forward copies, that situation is used to
perform a form of run-length encoding on repeated runs of symbols.
The writeCopy and tryWriteCopy are used to implement this command.
For performance reasons, this implementation performs little to no sanity
checks about the arguments. As such, the invariants documented for each
method call must be respected. // Has a full window length been written yet? // Sliding window history // Have emitted hist[:rdPos] already Invariant: 0 <= rdPos <= wrPos <= len(hist) // Current output position in buffer availRead reports the number of bytes that can be flushed by readFlush. availWrite reports the available amount of output buffer space. histSize reports the total amount of historical data in the dictionary. init initializes dictDecoder to have a sliding window dictionary of the given
size. If a preset dict is provided, it will initialize the dictionary with
the contents of dict. readFlush returns a slice of the historical buffer that is ready to be
emitted to the user. The data returned by readFlush must be fully consumed
before calling any other dictDecoder methods. tryWriteCopy tries to copy a string at a given (distance, length) to the
output. This specialized version is optimized for short distances.
This method is designed to be inlined for performance reasons.
This invariant must be kept: 0 < dist <= histSize() writeByte writes a single byte to the dictionary.
This invariant must be kept: 0 < availWrite() writeCopy copies a string at a given (dist, length) to the output.
This returns the number of bytes copied and may be less than the requested
length if the available space in the output buffer is too small.
This invariant must be kept: 0 < dist <= histSize() writeMark advances the writer pointer by cnt.
This invariant must be kept: 0 <= cnt <= availWrite() writeSlice returns a slice of the available buffer to write data to.
This invariant will be kept: len(s) <= availWrite()
hcode is a huffman code with a bit code and bit length.codeuint16lenuint16 set sets the code and length of an hcode.
Data waiting to be written is bytes[0:nbytes]
and then the low nbits of bits. Data is always written
sequentially into the bytes array.bytes[248]bytecodegen[]uint8codegenEncoding*huffmanEncodercodegenFreq[19]int32errerrorliteralEncoding*huffmanEncoderliteralFreq[]int32nbitsuintnbytesintoffsetEncoding*huffmanEncoderoffsetFreq[]int32 writer is the underlying writer.
Do not use it directly; use the write method, which ensures
that Write errors are sticky. dynamicSize returns the size of dynamically encoded data in bits. fixedSize returns the size of dynamically encoded data in bits.(*huffmanBitWriter) flush() RFC 1951 3.2.7 specifies a special run-length encoding for specifying
the literal and offset lengths arrays (which are concatenated into a single
array). This method generates that run-length encoding.
The result is written into the codegen array, and the frequencies
of each code is written into the codegenFreq array.
Codes 0-15 are single byte codes. Codes 16-18 are followed by additional
information. Code badCode is an end marker
numLiterals The number of literals in literalEncoding
numOffsets The number of offsets in offsetEncoding
litenc, offenc The literal and offset encoder to use indexTokens indexes a slice of tokens, and updates
literalFreq and offsetFreq, and generates literalEncoding
and offsetEncoding.
The number of literal and offset tokens is returned.(*huffmanBitWriter) reset(writer io.Writer) storedSize calculates the stored size, including header.
The function returns the size in bits and whether the block
fits inside a single block.(*huffmanBitWriter) write(b []byte)(*huffmanBitWriter) writeBits(b int32, nb uint) writeBlock will write a block of tokens with the smallest encoding.
The original input can be supplied, and if the huffman encoded data
is larger than the original bytes, the data will be written as a
stored block.
If the input is nil, the tokens will always be Huffman encoded. writeBlockDynamic encodes a block using a dynamic Huffman table.
This should be used if the symbols used have a disproportionate
histogram distribution.
If input is supplied and the compression savings are below 1/16th of the
input size the block is stored. writeBlockHuff encodes a block of bytes as either
Huffman encoded literals or uncompressed bytes if the
results only gains very little from compression.(*huffmanBitWriter) writeBytes(bytes []byte)(*huffmanBitWriter) writeCode(c hcode) Write the header of a dynamic Huffman block to the output stream.
numLiterals The number of literals specified in codegen
numOffsets The number of offsets specified in codegen
numCodegens The number of codegens used in codegen(*huffmanBitWriter) writeFixedHeader(isEof bool)(*huffmanBitWriter) writeStoredHeader(length int, isEof bool) writeTokens writes a slice of tokens to the output.
codes for literal and offset encoding must be supplied.
func newHuffmanBitWriter(w io.Writer) *huffmanBitWriter
// chunks as described above // mask the width of the link table // overflow links // the minimum code length Initialize Huffman decoding tables from array of code lengths.
Following this function, h is guaranteed to be initialized into a complete
tree (i.e., neither over-subscribed nor under-subscribed). The exception is a
degenerate case where the tree has only a single symbol with length 1. Empty
trees are permitted.
var fixedHuffmanDecoder
bitCount[17]int32codes[]hcodefreqcache[]literalNode // stored to avoid repeated allocation in generate // stored to avoid repeated allocation in generate Look at the leaves and assign them a bit count and an encoding as specified
in RFC 1951 3.2.2 bitCounts computes the number of literals assigned to each bit size in the Huffman encoding.
It is only called when list.length >= 3.
The cases of 0, 1, and 2 literals are handled by special case code.
list is an array of the literals with non-zero frequencies
and their associated frequencies. The array is in order of increasing
frequency and has as its last element a special element with frequency
MaxInt32.
maxBits is the maximum number of bits that should be used to encode any literal.
It must be less than 16.
bitCounts returns an integer slice in which slice[i] indicates the number of literals
that should be encoded in i bits.(*huffmanEncoder) bitLength(freq []int32) int Update this Huffman Code object to be the minimum code for the specified frequency count.
freq is an array of frequencies, in which freq[i] gives the frequency of literal i.
maxBits The maximum number of bits to use for any literal.
func generateFixedLiteralEncoding() *huffmanEncoder
func generateFixedOffsetEncoding() *huffmanEncoder
func newHuffmanEncoder(size int) *huffmanEncoder
var fixedLiteralEncoding *huffmanEncoder
var fixedOffsetEncoding *huffmanEncoder
var huffOffset *huffmanEncoder
A levelInfo describes the state of the constructed tree for a given depth. The frequency of the last node at this level Our level. for better printing The number of chains remaining to generate for this level before moving
up to the next level The frequency of the next character to add to this level The frequency of the next pair (from level below) to add to this level.
Only valid if the "needed" value of the next lower level is 0.
Package-Level Functions (total 26, in which 4 are exported)
NewReader returns a new ReadCloser that can be used
to read the uncompressed version of r.
If r does not also implement io.ByteReader,
the decompressor may read more data than necessary from r.
The reader returns io.EOF after the final block in the DEFLATE stream has
been encountered. Any trailing data after the final block is ignored.
The ReadCloser returned by NewReader also implements Resetter.
NewReaderDict is like NewReader but initializes the reader
with a preset dictionary. The returned Reader behaves as if
the uncompressed data stream started with the given dictionary,
which has already been read. NewReaderDict is typically used
to read data compressed by NewWriterDict.
The ReadCloser returned by NewReader also implements Resetter.
NewWriter returns a new Writer compressing data at the given level.
Following zlib, levels range from 1 (BestSpeed) to 9 (BestCompression);
higher levels typically run slower but compress more. Level 0
(NoCompression) does not attempt any compression; it only adds the
necessary DEFLATE framing.
Level -1 (DefaultCompression) uses the default compression level.
Level -2 (HuffmanOnly) will use Huffman compression only, giving
a very fast compression for all types of input, but sacrificing considerable
compression efficiency.
If level is in the range [-2, 9] then the error returned will be nil.
Otherwise the error returned will be non-nil.
NewWriterDict is like NewWriter but initializes the new
Writer with a preset dictionary. The returned Writer behaves
as if the dictionary had been written to it without producing
any compressed output. The compressed data written to w
can only be decompressed by a Reader initialized with the
same dictionary.
bulkHash4 will compute hashes using the same
algorithm as hash4.
HuffmanOnly disables Lempel-Ziv match searching and only performs Huffman
entropy encoding. This mode is useful in compressing data that has
already been compressed with an LZ style algorithm (e.g. Snappy or LZ4)
that lacks an entropy encoder. Compression gains are achieved when
certain bytes in the input stream occur more frequently than others.
Note that HuffmanOnly produces a compressed output that is
RFC 1951 compliant. That is, any valid DEFLATE decompressor will
continue to be able to decompress this output.
The LZ77 step produces a sequence of literal tokens and <length, offset>
pair tokens. The offset is also known as distance. The underlying wire
format limits the range of lengths and offsets. For example, there are
256 legitimate lengths: those in the range [3, 258]. This package's
compressor uses a higher minimum match length, enabling optimizations
such as finding matches via 32-bit loads and compares.
bufferFlushSize indicates the buffer size
after which bytes are flushed to the writer.
Should preferably be a multiple of 6, since
we accumulate 6 bytes between writes to the buffer.
Reset the buffer offset when reaching this.
Offsets are stored between blocks as int32 values.
Since the offset we are checking against is at the beginning
of the buffer, we need to subtract the current and input
buffer to not risk overflowing the int32.
bufferSize is the actual output byte buffer size.
It must have additional headroom for a flush
which can contain up to 8 bytes.
These constants are defined by the Snappy implementation so that its
assembly implementation can fast-path some 16-bytes-at-a-time copies. They
aren't necessary in the pure Go implementation, as we don't use those same
optimizations, but using the same thresholds doesn't really hurt.
The next three numbers come from the RFC section 3.2.7, with the
additional proviso in section 3.2.5 which implies that distance codes
30 and 31 should never occur in compressed data.
constminMatchLength = 4 // The smallest match length that the compressor actually emits
These constants are defined by the Snappy implementation so that its
assembly implementation can fast-path some 16-bytes-at-a-time copies. They
aren't necessary in the pure Go implementation, as we don't use those same
optimizations, but using the same thresholds doesn't really hurt.
constnumCodes = 19 // number of codes in Huffman meta-code
The pages are generated with Goldsv0.6.7. (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu.
PR and bug reports are welcome and can be submitted to the issue list.
Please follow @Go100and1 (reachable from the left QR code) to get the latest news of Golds.