Source File
transform.go
Belonging Package
vendor/golang.org/x/text/transform
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package transform provides reader and writer wrappers that transform the
// bytes passing through as well as various transformations. Example
// transformations provided by other packages include normalization and
// conversion between character sets.
package transform // import "golang.org/x/text/transform"
import (
)
var (
// ErrShortDst means that the destination buffer was too short to
// receive all of the transformed bytes.
ErrShortDst = errors.New("transform: short destination buffer")
// ErrShortSrc means that the source buffer has insufficient data to
// complete the transformation.
ErrShortSrc = errors.New("transform: short source buffer")
// ErrEndOfSpan means that the input and output (the transformed input)
// are not identical.
ErrEndOfSpan = errors.New("transform: input and output are not identical")
// errInconsistentByteCount means that Transform returned success (nil
// error) but also returned nSrc inconsistent with the src argument.
errInconsistentByteCount = errors.New("transform: inconsistent byte count returned")
// errShortInternal means that an internal buffer is not large enough
// to make progress and the Transform operation must be aborted.
errShortInternal = errors.New("transform: short internal buffer")
)
// Transformer transforms bytes.
type Transformer interface {
// Transform writes to dst the transformed bytes read from src, and
// returns the number of dst bytes written and src bytes read. The
// atEOF argument tells whether src represents the last bytes of the
// input.
//
// Callers should always process the nDst bytes produced and account
// for the nSrc bytes consumed before considering the error err.
//
// A nil error means that all of the transformed bytes (whether freshly
// transformed from src or left over from previous Transform calls)
// were written to dst. A nil error can be returned regardless of
// whether atEOF is true. If err is nil then nSrc must equal len(src);
// the converse is not necessarily true.
//
// ErrShortDst means that dst was too short to receive all of the
// transformed bytes. ErrShortSrc means that src had insufficient data
// to complete the transformation. If both conditions apply, then
// either error may be returned. Other than the error conditions listed
// here, implementations are free to report other errors that arise.
Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error)
// Reset resets the state and allows a Transformer to be reused.
Reset()
}
// SpanningTransformer extends the Transformer interface with a Span method
// that determines how much of the input already conforms to the Transformer.
type SpanningTransformer interface {
Transformer
// Span returns a position in src such that transforming src[:n] results in
// identical output src[:n] for these bytes. It does not necessarily return
// the largest such n. The atEOF argument tells whether src represents the
// last bytes of the input.
//
// Callers should always account for the n bytes consumed before
// considering the error err.
//
// A nil error means that all input bytes are known to be identical to the
// output produced by the Transformer. A nil error can be returned
// regardless of whether atEOF is true. If err is nil, then n must
// equal len(src); the converse is not necessarily true.
//
// ErrEndOfSpan means that the Transformer output may differ from the
// input after n bytes. Note that n may be len(src), meaning that the output
// would contain additional bytes after otherwise identical output.
// ErrShortSrc means that src had insufficient data to determine whether the
// remaining bytes would change. Other than the error conditions listed
// here, implementations are free to report other errors that arise.
//
// Calling Span can modify the Transformer state as a side effect. In
// effect, it does the transformation just as calling Transform would, only
// without copying to a destination buffer and only up to a point it can
// determine the input and output bytes are the same. This is obviously more
// limited than calling Transform, but can be more efficient in terms of
// copying and allocating buffers. Calls to Span and Transform may be
// interleaved.
Span(src []byte, atEOF bool) (n int, err error)
}
// NopResetter can be embedded by implementations of Transformer to add a nop
// Reset method.
type NopResetter struct{}
// Reset implements the Reset method of the Transformer interface.
func (NopResetter) () {}
// Reader wraps another io.Reader by transforming the bytes read.
type Reader struct {
r io.Reader
t Transformer
err error
// dst[dst0:dst1] contains bytes that have been transformed by t but
// not yet copied out via Read.
dst []byte
dst0, dst1 int
// src[src0:src1] contains bytes that have been read from r but not
// yet transformed through t.
src []byte
src0, src1 int
// transformComplete is whether the transformation is complete,
// regardless of whether or not it was successful.
transformComplete bool
}
const defaultBufSize = 4096
// NewReader returns a new Reader that wraps r by transforming the bytes read
// via t. It calls Reset on t.
func ( io.Reader, Transformer) *Reader {
.Reset()
return &Reader{
r: ,
t: ,
dst: make([]byte, defaultBufSize),
src: make([]byte, defaultBufSize),
}
}
// Read implements the io.Reader interface.
func ( *Reader) ( []byte) (int, error) {
, := 0, error(nil)
for {
// Copy out any transformed bytes and return the final error if we are done.
if .dst0 != .dst1 {
= copy(, .dst[.dst0:.dst1])
.dst0 +=
if .dst0 == .dst1 && .transformComplete {
return , .err
}
return , nil
} else if .transformComplete {
return 0, .err
}
// Try to transform some source bytes, or to flush the transformer if we
// are out of source bytes. We do this even if r.r.Read returned an error.
// As the io.Reader documentation says, "process the n > 0 bytes returned
// before considering the error".
if .src0 != .src1 || .err != nil {
.dst0 = 0
.dst1, , = .t.Transform(.dst, .src[.src0:.src1], .err == io.EOF)
.src0 +=
switch {
case == nil:
if .src0 != .src1 {
.err = errInconsistentByteCount
}
// The Transform call was successful; we are complete if we
// cannot read more bytes into src.
.transformComplete = .err != nil
continue
case == ErrShortDst && (.dst1 != 0 || != 0):
// Make room in dst by copying out, and try again.
continue
case == ErrShortSrc && .src1-.src0 != len(.src) && .err == nil:
// Read more bytes into src via the code below, and try again.
default:
.transformComplete = true
// The reader error (r.err) takes precedence over the
// transformer error (err) unless r.err is nil or io.EOF.
if .err == nil || .err == io.EOF {
.err =
}
continue
}
}
// Move any untransformed source bytes to the start of the buffer
// and read more bytes.
if .src0 != 0 {
.src0, .src1 = 0, copy(.src, .src[.src0:.src1])
}
, .err = .r.Read(.src[.src1:])
.src1 +=
}
}
// TODO: implement ReadByte (and ReadRune??).
// Writer wraps another io.Writer by transforming the bytes read.
// The user needs to call Close to flush unwritten bytes that may
// be buffered.
type Writer struct {
w io.Writer
t Transformer
dst []byte
// src[:n] contains bytes that have not yet passed through t.
src []byte
n int
}
// NewWriter returns a new Writer that wraps w by transforming the bytes written
// via t. It calls Reset on t.
func ( io.Writer, Transformer) *Writer {
.Reset()
return &Writer{
w: ,
t: ,
dst: make([]byte, defaultBufSize),
src: make([]byte, defaultBufSize),
}
}
// Write implements the io.Writer interface. If there are not enough
// bytes available to complete a Transform, the bytes will be buffered
// for the next write. Call Close to convert the remaining bytes.
func ( *Writer) ( []byte) ( int, error) {
:=
if .n > 0 {
// Append bytes from data to the last remainder.
// TODO: limit the amount copied on first try.
= copy(.src[.n:], )
.n +=
= .src[:.n]
}
for {
, , := .t.Transform(.dst, , false)
if , := .w.Write(.dst[:]); != nil {
return ,
}
= [:]
if .n == 0 {
+=
} else if len() <= {
// Enough bytes from w.src have been consumed. We make src point
// to data instead to reduce the copying.
.n = 0
-= len()
= [:]
if < len() && ( == nil || == ErrShortSrc) {
continue
}
}
switch {
case ErrShortDst:
// This error is okay as long as we are making progress.
if > 0 || > 0 {
continue
}
case ErrShortSrc:
if len() < len(.src) {
:= copy(.src, )
// If w.n > 0, bytes from data were already copied to w.src and n
// was already set to the number of bytes consumed.
if .n == 0 {
+=
}
.n =
= nil
} else if > 0 || > 0 {
// Not enough buffer to store the remainder. Keep processing as
// long as there is progress. Without this case, transforms that
// require a lookahead larger than the buffer may result in an
// error. This is not something one may expect to be common in
// practice, but it may occur when buffers are set to small
// sizes during testing.
continue
}
case nil:
if .n > 0 {
= errInconsistentByteCount
}
}
return ,
}
}
// Close implements the io.Closer interface.
func ( *Writer) () error {
:= .src[:.n]
for {
, , := .t.Transform(.dst, , true)
if , := .w.Write(.dst[:]); != nil {
return
}
if != ErrShortDst {
return
}
= [:]
}
}
type nop struct{ NopResetter }
func (nop) (, []byte, bool) (, int, error) {
:= copy(, )
if < len() {
= ErrShortDst
}
return , ,
}
func (nop) ( []byte, bool) ( int, error) {
return len(), nil
}
type discard struct{ NopResetter }
func (discard) (, []byte, bool) (, int, error) {
return 0, len(), nil
}
var (
// Discard is a Transformer for which all Transform calls succeed
// by consuming all bytes and writing nothing.
Discard Transformer = discard{}
// Nop is a SpanningTransformer that copies src to dst.
Nop SpanningTransformer = nop{}
)
// chain is a sequence of links. A chain with N Transformers has N+1 links and
// N+1 buffers. Of those N+1 buffers, the first and last are the src and dst
// buffers given to chain.Transform and the middle N-1 buffers are intermediate
// buffers owned by the chain. The i'th link transforms bytes from the i'th
// buffer chain.link[i].b at read offset chain.link[i].p to the i+1'th buffer
// chain.link[i+1].b at write offset chain.link[i+1].n, for i in [0, N).
type chain struct {
link []link
err error
// errStart is the index at which the error occurred plus 1. Processing
// errStart at this level at the next call to Transform. As long as
// errStart > 0, chain will not consume any more source bytes.
errStart int
}
func ( *chain) ( int, error) {
if := + 1; > .errStart {
.errStart =
.err =
}
}
type link struct {
t Transformer
// b[p:n] holds the bytes to be transformed by t.
b []byte
p int
n int
}
func ( *link) () []byte {
return .b[.p:.n]
}
func ( *link) () []byte {
return .b[.n:]
}
// Chain returns a Transformer that applies t in sequence.
func ( ...Transformer) Transformer {
if len() == 0 {
return nop{}
}
:= &chain{link: make([]link, len()+1)}
for , := range {
.link[].t =
}
// Allocate intermediate buffers.
:= make([][defaultBufSize]byte, len()-1)
for := range {
.link[+1].b = [][:]
}
return
}
// Reset resets the state of Chain. It calls Reset on all the Transformers.
func ( *chain) () {
for , := range .link {
if .t != nil {
.t.Reset()
}
.link[].p, .link[].n = 0, 0
}
}
// TODO: make chain use Span (is going to be fun to implement!)
// Transform applies the transformers of c in sequence.
func ( *chain) (, []byte, bool) (, int, error) {
// Set up src and dst in the chain.
:= &.link[0]
:= &.link[len(.link)-1]
.b, .p, .n = , 0, len()
.b, .n = , 0
var , bool // for detecting progress
// i is the index of the next Transformer to apply, for i in [low, high].
// low is the lowest index for which c.link[low] may still produce bytes.
// high is the highest index for which c.link[high] has a Transformer.
// The error returned by Transform determines whether to increase or
// decrease i. We try to completely fill a buffer before converting it.
for , , := .errStart, .errStart, len(.link)-2; <= && <= ; {
, := &.link[], &.link[+1]
, , := .t.Transform(.dst(), .src(), && == )
.n +=
.p +=
if > 0 && .p == .n {
.p, .n = 0, 0
}
, = , false
switch {
case ErrShortDst:
// Process the destination buffer next. Return if we are already
// at the high index.
if == {
return .n, .p, ErrShortDst
}
if .n != 0 {
++
// If the Transformer at the next index is not able to process any
// source bytes there is nothing that can be done to make progress
// and the bytes will remain unprocessed. lastFull is used to
// detect this and break out of the loop with a fatal error.
= true
continue
}
// The destination buffer was too small, but is completely empty.
// Return a fatal error as this transformation can never complete.
.fatalError(, errShortInternal)
case ErrShortSrc:
if == 0 {
// Save ErrShortSrc in err. All other errors take precedence.
= ErrShortSrc
break
}
// Source bytes were depleted before filling up the destination buffer.
// Verify we made some progress, move the remaining bytes to the errStart
// and try to get more source bytes.
if && == 0 || .n-.p == len(.b) {
// There were not enough source bytes to proceed while the source
// buffer cannot hold any more bytes. Return a fatal error as this
// transformation can never complete.
.fatalError(, errShortInternal)
break
}
// in.b is an internal buffer and we can make progress.
.p, .n = 0, copy(.b, .src())
fallthrough
case nil:
// if i == low, we have depleted the bytes at index i or any lower levels.
// In that case we increase low and i. In all other cases we decrease i to
// fetch more bytes before proceeding to the next index.
if > {
--
continue
}
default:
.fatalError(, )
}
// Exhausted level low or fatal error: increase low and continue
// to process the bytes accepted so far.
++
=
}
// If c.errStart > 0, this means we found a fatal error. We will clear
// all upstream buffers. At this point, no more progress can be made
// downstream, as Transform would have bailed while handling ErrShortDst.
if .errStart > 0 {
for := 1; < .errStart; ++ {
.link[].p, .link[].n = 0, 0
}
, .errStart, .err = .err, 0, nil
}
return .n, .p,
}
// Deprecated: Use runes.Remove instead.
func ( func( rune) bool) Transformer {
return removeF()
}
type removeF func(r rune) bool
func (removeF) () {}
// Transform implements the Transformer interface.
func ( removeF) (, []byte, bool) (, int, error) {
for , := rune(0), 0; len() > 0; = [:] {
if = rune([0]); < utf8.RuneSelf {
= 1
} else {
, = utf8.DecodeRune()
if == 1 {
// Invalid rune.
if ! && !utf8.FullRune() {
= ErrShortSrc
break
}
// We replace illegal bytes with RuneError. Not doing so might
// otherwise turn a sequence of invalid UTF-8 into valid UTF-8.
// The resulting byte sequence may subsequently contain runes
// for which t(r) is true that were passed unnoticed.
if !() {
if +3 > len() {
= ErrShortDst
break
}
+= copy([:], "\uFFFD")
}
++
continue
}
}
if !() {
if + > len() {
= ErrShortDst
break
}
+= copy([:], [:])
}
+=
}
return
}
// grow returns a new []byte that is longer than b, and copies the first n bytes
// of b to the start of the new slice.
func ( []byte, int) []byte {
:= len()
if <= 32 {
= 64
} else if <= 256 {
*= 2
} else {
+= >> 1
}
:= make([]byte, )
copy(, [:])
return
}
const initialBufSize = 128
// String returns a string with the result of converting s[:n] using t, where
// n <= len(s). If err == nil, n will be len(s). It calls Reset on t.
func ( Transformer, string) ( string, int, error) {
.Reset()
if == "" {
// Fast path for the common case for empty input. Results in about a
// 86% reduction of running time for BenchmarkStringLowerEmpty.
if , , := .Transform(nil, nil, true); == nil {
return "", 0, nil
}
}
// Allocate only once. Note that both dst and src escape when passed to
// Transform.
:= [2 * initialBufSize]byte{}
:= [:initialBufSize:initialBufSize]
:= [initialBufSize : 2*initialBufSize]
// The input string s is transformed in multiple chunks (starting with a
// chunk size of initialBufSize). nDst and nSrc are per-chunk (or
// per-Transform-call) indexes, pDst and pSrc are overall indexes.
, := 0, 0
, := 0, 0
// pPrefix is the length of a common prefix: the first pPrefix bytes of the
// result will equal the first pPrefix bytes of s. It is not guaranteed to
// be the largest such value, but if pPrefix, len(result) and len(s) are
// all equal after the final transform (i.e. calling Transform with atEOF
// being true returned nil error) then we don't need to allocate a new
// result string.
:= 0
for {
// Invariant: pDst == pPrefix && pSrc == pPrefix.
:= copy(, [:])
, , = .Transform(, [:], + == len())
+=
+=
// TODO: let transformers implement an optional Spanner interface, akin
// to norm's QuickSpan. This would even allow us to avoid any allocation.
if !bytes.Equal([:], [:]) {
break
}
=
if == ErrShortDst {
// A buffer can only be short if a transformer modifies its input.
break
} else if == ErrShortSrc {
if == 0 {
// No progress was made.
break
}
// Equal so far and !atEOF, so continue checking.
} else if != nil || == len() {
return string([:]), ,
}
}
// Post-condition: pDst == pPrefix + nDst && pSrc == pPrefix + nSrc.
// We have transformed the first pSrc bytes of the input s to become pDst
// transformed bytes. Those transformed bytes are discontiguous: the first
// pPrefix of them equal s[:pPrefix] and the last nDst of them equal
// dst[:nDst]. We copy them around, into a new dst buffer if necessary, so
// that they become one contiguous slice: dst[:pDst].
if != 0 {
:=
if > len() {
= make([]byte, len()+-)
}
copy([:], [:])
copy([:], [:])
=
}
// Prevent duplicate Transform calls with atEOF being true at the end of
// the input. Also return if we have an unrecoverable error.
if ( == nil && == len()) ||
( != nil && != ErrShortDst && != ErrShortSrc) {
return string([:]), ,
}
// Transform the remaining input, growing dst and src buffers as necessary.
for {
:= copy(, [:])
:= + == len()
, , := .Transform([:], [:], )
+=
+=
// If we got ErrShortDst or ErrShortSrc, do not grow as long as we can
// make progress. This may avoid excessive allocations.
if == ErrShortDst {
if == 0 {
= grow(, )
}
} else if == ErrShortSrc {
if {
return string([:]), ,
}
if == 0 {
= grow(, 0)
}
} else if != nil || == len() {
return string([:]), ,
}
}
}
// Bytes returns a new byte slice with the result of converting b[:n] using t,
// where n <= len(b). If err == nil, n will be len(b). It calls Reset on t.
func ( Transformer, []byte) ( []byte, int, error) {
return doAppend(, 0, make([]byte, len()), )
}
// Append appends the result of converting src[:n] using t to dst, where
// n <= len(src), If err == nil, n will be len(src). It calls Reset on t.
func ( Transformer, , []byte) ( []byte, int, error) {
if len() == cap() {
:= len() + len() // It is okay for this to be 0.
:= make([]byte, )
= [:copy(, )]
}
return doAppend(, len(), [:cap()], )
}
func ( Transformer, int, , []byte) ( []byte, int, error) {
.Reset()
:= 0
for {
, , := .Transform([:], [:], true)
+=
+=
if != ErrShortDst {
return [:], ,
}
// Grow the destination buffer, but do not grow as long as we can make
// progress. This may avoid excessive allocations.
if == 0 {
= grow(, )
}
}
}
The pages are generated with Golds v0.6.7. (GOOS=linux GOARCH=amd64) Golds is a Go 101 project developed by Tapir Liu. PR and bug reports are welcome and can be submitted to the issue list. Please follow @Go100and1 (reachable from the left QR code) to get the latest news of Golds. |