Source File
bracket.go
Belonging Package
vendor/golang.org/x/text/unicode/bidi
// Copyright 2015 The Go Authors. All rights reserved.// Use of this source code is governed by a BSD-style// license that can be found in the LICENSE file.package bidiimport ()// This file contains a port of the reference implementation of the// Bidi Parentheses Algorithm:// https://www.unicode.org/Public/PROGRAMS/BidiReferenceJava/BidiPBAReference.java//// The implementation in this file covers definitions BD14-BD16 and rule N0// of UAX#9.//// Some preprocessing is done for each rune before data is passed to this// algorithm:// - opening and closing brackets are identified// - a bracket pair type, like '(' and ')' is assigned a unique identifier that// is identical for the opening and closing bracket. It is left to do these// mappings.// - The BPA algorithm requires that bracket characters that are canonical// equivalents of each other be able to be substituted for each other.// It is the responsibility of the caller to do this canonicalization.//// In implementing BD16, this implementation departs slightly from the "logical"// algorithm defined in UAX#9. In particular, the stack referenced there// supports operations that go beyond a "basic" stack. An equivalent// implementation based on a linked list is used here.// Bidi_Paired_Bracket_Type// BD14. An opening paired bracket is a character whose// Bidi_Paired_Bracket_Type property value is Open.//// BD15. A closing paired bracket is a character whose// Bidi_Paired_Bracket_Type property value is Close.type bracketType byteconst (bpNone bracketType = iotabpOpenbpClose)// bracketPair holds a pair of index values for opening and closing bracket// location of a bracket pair.type bracketPair struct {opener intcloser int}func ( *bracketPair) () string {return fmt.Sprintf("(%v, %v)", .opener, .closer)}// bracketPairs is a slice of bracketPairs with a sort.Interface implementation.type bracketPairs []bracketPairfunc ( bracketPairs) () int { return len() }func ( bracketPairs) (, int) { [], [] = [], [] }func ( bracketPairs) (, int) bool { return [].opener < [].opener }// resolvePairedBrackets runs the paired bracket part of the UBA algorithm.//// For each rune, it takes the indexes into the original string, the class the// bracket type (in pairTypes) and the bracket identifier (pairValues). It also// takes the direction type for the start-of-sentence and the embedding level.//// The identifiers for bracket types are the rune of the canonicalized opening// bracket for brackets (open or close) or 0 for runes that are not brackets.func ( *isolatingRunSequence) {:= bracketPairer{sos: .sos,openers: list.New(),codesIsolatedRun: .types,indexes: .indexes,}:= Lif .level&1 != 0 {= R}.locateBrackets(.p.pairTypes, .p.pairValues).resolveBrackets(, .p.initialTypes)}type bracketPairer struct {sos Class // direction corresponding to start of sequence// The following is a restatement of BD 16 using non-algorithmic language.//// A bracket pair is a pair of characters consisting of an opening// paired bracket and a closing paired bracket such that the// Bidi_Paired_Bracket property value of the former equals the latter,// subject to the following constraints.// - both characters of a pair occur in the same isolating run sequence// - the closing character of a pair follows the opening character// - any bracket character can belong at most to one pair, the earliest possible one// - any bracket character not part of a pair is treated like an ordinary character// - pairs may nest properly, but their spans may not overlap otherwise// Bracket characters with canonical decompositions are supposed to be// treated as if they had been normalized, to allow normalized and non-// normalized text to give the same result. In this implementation that step// is pushed out to the caller. The caller has to ensure that the pairValue// slices contain the rune of the opening bracket after normalization for// any opening or closing bracket.openers *list.List // list of positions for opening brackets// bracket pair positions sorted by location of opening bracketpairPositions bracketPairscodesIsolatedRun []Class // directional bidi codes for an isolated runindexes []int // array of index values into the original string}// matchOpener reports whether characters at given positions form a matching// bracket pair.func ( *bracketPairer) ( []rune, , int) bool {return [.indexes[]] == [.indexes[]]}const maxPairingDepth = 63// locateBrackets locates matching bracket pairs according to BD16.//// This implementation uses a linked list instead of a stack, because, while// elements are added at the front (like a push) they are not generally removed// in atomic 'pop' operations, reducing the benefit of the stack archetype.func ( *bracketPairer) ( []bracketType, []rune) {// traverse the run// do that explicitly (not in a for-each) so we can record positionfor , := range .indexes {// look at the bracket type for each characterif [] == bpNone || .codesIsolatedRun[] != ON {// continue scanningcontinue}switch [] {case bpOpen:// check if maximum pairing depth reachedif .openers.Len() == maxPairingDepth {.openers.Init()return}// remember opener location, most recent first.openers.PushFront()case bpClose:// see if there is a match:= 0for := .openers.Front(); != nil; = .Next() {++:= .Value.(int)if .matchOpener(, , ) {// if the opener matches, add nested pair to the ordered list.pairPositions = append(.pairPositions, bracketPair{, })// remove up to and including matched openerfor ; > 0; -- {.openers.Remove(.openers.Front())}break}}sort.Sort(.pairPositions)// if we get here, the closing bracket matched no openers// and gets ignored}}}// Bracket pairs within an isolating run sequence are processed as units so// that both the opening and the closing paired bracket in a pair resolve to// the same direction.//// N0. Process bracket pairs in an isolating run sequence sequentially in// the logical order of the text positions of the opening paired brackets// using the logic given below. Within this scope, bidirectional types EN// and AN are treated as R.//// Identify the bracket pairs in the current isolating run sequence// according to BD16. For each bracket-pair element in the list of pairs of// text positions://// a Inspect the bidirectional types of the characters enclosed within the// bracket pair.//// b If any strong type (either L or R) matching the embedding direction is// found, set the type for both brackets in the pair to match the embedding// direction.//// o [ e ] o -> o e e e o//// o [ o e ] -> o e o e e//// o [ NI e ] -> o e NI e e//// c Otherwise, if a strong type (opposite the embedding direction) is// found, test for adjacent strong types as follows: 1 First, check// backwards before the opening paired bracket until the first strong type// (L, R, or sos) is found. If that first preceding strong type is opposite// the embedding direction, then set the type for both brackets in the pair// to that type. 2 Otherwise, set the type for both brackets in the pair to// the embedding direction.//// o [ o ] e -> o o o o e//// o [ o NI ] o -> o o o NI o o//// e [ o ] o -> e e o e o//// e [ o ] e -> e e o e e//// e ( o [ o ] NI ) e -> e e o o o o NI e e//// d Otherwise, do not set the type for the current bracket pair. Note that// if the enclosed text contains no strong types the paired brackets will// both resolve to the same level when resolved individually using rules N1// and N2.//// e ( NI ) o -> e ( NI ) o// getStrongTypeN0 maps character's directional code to strong type as required// by rule N0.//// TODO: have separate type for "strong" directionality.func ( *bracketPairer) ( int) Class {switch .codesIsolatedRun[] {// in the scope of N0, number types are treated as Rcase EN, AN, AL, R:return Rcase L:return Ldefault:return ON}}// classifyPairContent reports the strong types contained inside a Bracket Pair,// assuming the given embedding direction.//// It returns ON if no strong type is found. If a single strong type is found,// it returns this type. Otherwise it returns the embedding direction.//// TODO: use separate type for "strong" directionality.func ( *bracketPairer) ( bracketPair, Class) Class {:= ONfor := .opener + 1; < .closer; ++ {:= .getStrongTypeN0()if == ON {continue}if == {return // type matching embedding direction found}=}// return ON if no strong type found, or class opposite to dirEmbedreturn}// classBeforePair determines which strong types are present before a Bracket// Pair. Return R or L if strong type found, otherwise ON.func ( *bracketPairer) ( bracketPair) Class {for := .opener - 1; >= 0; -- {if := .getStrongTypeN0(); != ON {return}}// no strong types found, return sosreturn .sos}// assignBracketType implements rule N0 for a single bracket pair.func ( *bracketPairer) ( bracketPair, Class, []Class) {// rule "N0, a", inspect contents of pair:= .classifyPairContent(, )// dirPair is now L, R, or N (no strong type found)// the following logical tests are performed out of order compared to// the statement of the rules but yield the same resultsif == ON {return // case "d" - nothing to do}if != {// case "c": strong type found, opposite - check before (c.1)= .classBeforePair()if == || == ON {// no strong opposite type found before - use embedding (c.2)=}}// else: case "b", strong type found matching embedding,// no explicit action needed, as dirPair is already set to embedding// direction// set the bracket types to the type found.setBracketsToType(, , )}func ( *bracketPairer) ( bracketPair, Class, []Class) {.codesIsolatedRun[.opener] =.codesIsolatedRun[.closer] =for := .opener + 1; < .closer; ++ {:= .indexes[]if [] != NSM {break}.codesIsolatedRun[] =}for := .closer + 1; < len(.indexes); ++ {:= .indexes[]if [] != NSM {break}.codesIsolatedRun[] =}}// resolveBrackets implements rule N0 for a list of pairs.func ( *bracketPairer) ( Class, []Class) {for , := range .pairPositions {.assignBracketType(, , )}}
![]() |
The pages are generated with Golds v0.6.7. (GOOS=linux GOARCH=amd64) Golds is a Go 101 project developed by Tapir Liu. PR and bug reports are welcome and can be submitted to the issue list. Please follow @Go100and1 (reachable from the left QR code) to get the latest news of Golds. |