// Copyright 2016 The Go Authors. All rights reserved.// Use of this source code is governed by a BSD-style// license that can be found in the LICENSE file.
// Package bidirule implements the Bidi Rule defined by RFC 5893.//// This package is under development. The API may change without notice and// without preserving backward compatibility.
package bidiruleimport ()// This file contains an implementation of RFC 5893: Right-to-Left Scripts for// Internationalized Domain Names for Applications (IDNA)//// A label is an individual component of a domain name. Labels are usually// shown separated by dots; for example, the domain name "www.example.com" is// composed of three labels: "www", "example", and "com".//// An RTL label is a label that contains at least one character of class R, AL,// or AN. An LTR label is any label that is not an RTL label.//// A "Bidi domain name" is a domain name that contains at least one RTL label.//// The following guarantees can be made based on the above://// o In a domain name consisting of only labels that satisfy the rule,// the requirements of Section 3 are satisfied. Note that even LTR// labels and pure ASCII labels have to be tested.//// o In a domain name consisting of only LDH labels (as defined in the// Definitions document [RFC5890]) and labels that satisfy the rule,// the requirements of Section 3 are satisfied as long as a label// that starts with an ASCII digit does not come after a// right-to-left label.//// No guarantee is given for other combinations.// ErrInvalid indicates a label is invalid according to the Bidi Rule.varErrInvalid = errors.New("bidirule: failed Bidi Rule")typeruleStateuint8const (ruleInitialruleState = iotaruleLTRruleLTRFinalruleRTLruleRTLFinalruleInvalid)typeruleTransitionstruct {nextruleStatemaskuint16}vartransitions = [...][2]ruleTransition{// [2.1] The first character must be a character with Bidi property L, R, or // AL. If it has the R or AL property, it is an RTL label; if it has the L // property, it is an LTR label.ruleInitial: { {ruleLTRFinal, 1 << bidi.L}, {ruleRTLFinal, 1<<bidi.R | 1<<bidi.AL}, },ruleRTL: {// [2.3] In an RTL label, the end of the label must be a character with // Bidi property R, AL, EN, or AN, followed by zero or more characters // with Bidi property NSM. {ruleRTLFinal, 1<<bidi.R | 1<<bidi.AL | 1<<bidi.EN | 1<<bidi.AN},// [2.2] In an RTL label, only characters with the Bidi properties R, // AL, AN, EN, ES, CS, ET, ON, BN, or NSM are allowed. // We exclude the entries from [2.3] {ruleRTL, 1<<bidi.ES | 1<<bidi.CS | 1<<bidi.ET | 1<<bidi.ON | 1<<bidi.BN | 1<<bidi.NSM}, },ruleRTLFinal: {// [2.3] In an RTL label, the end of the label must be a character with // Bidi property R, AL, EN, or AN, followed by zero or more characters // with Bidi property NSM. {ruleRTLFinal, 1<<bidi.R | 1<<bidi.AL | 1<<bidi.EN | 1<<bidi.AN | 1<<bidi.NSM},// [2.2] In an RTL label, only characters with the Bidi properties R, // AL, AN, EN, ES, CS, ET, ON, BN, or NSM are allowed. // We exclude the entries from [2.3] and NSM. {ruleRTL, 1<<bidi.ES | 1<<bidi.CS | 1<<bidi.ET | 1<<bidi.ON | 1<<bidi.BN}, },ruleLTR: {// [2.6] In an LTR label, the end of the label must be a character with // Bidi property L or EN, followed by zero or more characters with Bidi // property NSM. {ruleLTRFinal, 1<<bidi.L | 1<<bidi.EN},// [2.5] In an LTR label, only characters with the Bidi properties L, // EN, ES, CS, ET, ON, BN, or NSM are allowed. // We exclude the entries from [2.6]. {ruleLTR, 1<<bidi.ES | 1<<bidi.CS | 1<<bidi.ET | 1<<bidi.ON | 1<<bidi.BN | 1<<bidi.NSM}, },ruleLTRFinal: {// [2.6] In an LTR label, the end of the label must be a character with // Bidi property L or EN, followed by zero or more characters with Bidi // property NSM. {ruleLTRFinal, 1<<bidi.L | 1<<bidi.EN | 1<<bidi.NSM},// [2.5] In an LTR label, only characters with the Bidi properties L, // EN, ES, CS, ET, ON, BN, or NSM are allowed. // We exclude the entries from [2.6]. {ruleLTR, 1<<bidi.ES | 1<<bidi.CS | 1<<bidi.ET | 1<<bidi.ON | 1<<bidi.BN}, },ruleInvalid: { {ruleInvalid, 0}, {ruleInvalid, 0}, },}// [2.4] In an RTL label, if an EN is present, no AN may be present, and// vice versa.constexclusiveRTL = uint16(1<<bidi.EN | 1<<bidi.AN)// From RFC 5893// An RTL label is a label that contains at least one character of type// R, AL, or AN.//// An LTR label is any label that is not an RTL label.// Direction reports the direction of the given label as defined by RFC 5893.// The Bidi Rule does not have to be applied to labels of the category// LeftToRight.func ( []byte) bidi.Direction {for := 0; < len(); { , := bidi.Lookup([:])if == 0 { ++ } := .Class()if == bidi.R || == bidi.AL || == bidi.AN {returnbidi.RightToLeft } += }returnbidi.LeftToRight}// DirectionString reports the direction of the given label as defined by RFC// 5893. The Bidi Rule does not have to be applied to labels of the category// LeftToRight.func ( string) bidi.Direction {for := 0; < len(); { , := bidi.LookupString([:])if == 0 { ++continue } := .Class()if == bidi.R || == bidi.AL || == bidi.AN {returnbidi.RightToLeft } += }returnbidi.LeftToRight}// Valid reports whether b conforms to the BiDi rule.func ( []byte) bool {varTransformerif , := .advance(); ! || < len() {returnfalse }return .isFinal()}// ValidString reports whether s conforms to the BiDi rule.func ( string) bool {varTransformerif , := .advanceString(); ! || < len() {returnfalse }return .isFinal()}// New returns a Transformer that verifies that input adheres to the Bidi Rule.func () *Transformer {return &Transformer{}}// Transformer implements transform.Transform.typeTransformerstruct {stateruleStatehasRTLboolseenuint16}// A rule can only be violated for "Bidi Domain names", meaning if one of the// following categories has been observed.func ( *Transformer) () bool {const = 1<<bidi.R | 1<<bidi.AL | 1<<bidi.ANreturn .seen& != 0}// Reset implements transform.Transformer.func ( *Transformer) () { * = Transformer{} }// Transform implements transform.Transformer. This Transformer has state and// needs to be reset between uses.func ( *Transformer) (, []byte, bool) (, int, error) {iflen() < len() { = [:len()] = false = transform.ErrShortDst } , := .Span(, )copy(, [:])if == nil || != nil && != transform.ErrShortSrc { = }return , , }// Span returns the first n bytes of src that conform to the Bidi rule.func ( *Transformer) ( []byte, bool) ( int, error) {if .state == ruleInvalid && .isRTL() {return0, ErrInvalid } , := .advance()switch {case !: = ErrInvalidcase < len():if ! { = transform.ErrShortSrcbreak } = ErrInvalidcase !.isFinal(): = ErrInvalid }return , }// Precomputing the ASCII values decreases running time for the ASCII fast path// by about 30%.varasciiTable [128]bidi.Propertiesfunc () {for := rangeasciiTable { , := bidi.LookupRune(rune())asciiTable[] = }}func ( *Transformer) ( []byte) ( int, bool) {varbidi.Propertiesvarintfor < len() {if [] < utf8.RuneSelf { , = asciiTable[[]], 1 } else { , = bidi.Lookup([:])if <= 1 {if == 1 {// We always consider invalid UTF-8 to be invalid, even if // the string has not yet been determined to be RTL. // TODO: is this correct?return , false }return , true// incomplete UTF-8 encoding } }// TODO: using CompactClass would result in noticeable speedup. // See unicode/bidi/prop.go:Properties.CompactClass. := uint16(1 << .Class()) .seen |= if .seen&exclusiveRTL == exclusiveRTL { .state = ruleInvalidreturn , false }switch := transitions[.state]; {case [0].mask& != 0: .state = [0].nextcase [1].mask& != 0: .state = [1].nextdefault: .state = ruleInvalidif .isRTL() {return , false } } += }return , true}func ( *Transformer) ( string) ( int, bool) {varbidi.Propertiesvarintfor < len() {if [] < utf8.RuneSelf { , = asciiTable[[]], 1 } else { , = bidi.LookupString([:])if <= 1 {if == 1 {return , false// invalid UTF-8 }return , true// incomplete UTF-8 encoding } }// TODO: using CompactClass results in noticeable speedup. // See unicode/bidi/prop.go:Properties.CompactClass. := uint16(1 << .Class()) .seen |= if .seen&exclusiveRTL == exclusiveRTL { .state = ruleInvalidreturn , false }switch := transitions[.state]; {case [0].mask& != 0: .state = [0].nextcase [1].mask& != 0: .state = [1].nextdefault: .state = ruleInvalidif .isRTL() {return , false } } += }return , true}
The pages are generated with Goldsv0.6.7. (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu.
PR and bug reports are welcome and can be submitted to the issue list.
Please follow @Go100and1 (reachable from the left QR code) to get the latest news of Golds.