package syntax
import (
"strconv"
"strings"
"unicode"
)
type Regexp struct {
Op Op
Flags Flags
Sub []*Regexp
Sub0 [1 ]*Regexp
Rune []rune
Rune0 [2 ]rune
Min , Max int
Cap int
Name string
}
type Op uint8
const (
OpNoMatch Op = 1 + iota
OpEmptyMatch
OpLiteral
OpCharClass
OpAnyCharNotNL
OpAnyChar
OpBeginLine
OpEndLine
OpBeginText
OpEndText
OpWordBoundary
OpNoWordBoundary
OpCapture
OpStar
OpPlus
OpQuest
OpRepeat
OpConcat
OpAlternate
)
const opPseudo Op = 128
func (x *Regexp ) Equal (y *Regexp ) bool {
if x == nil || y == nil {
return x == y
}
if x .Op != y .Op {
return false
}
switch x .Op {
case OpEndText :
if x .Flags &WasDollar != y .Flags &WasDollar {
return false
}
case OpLiteral , OpCharClass :
if len (x .Rune ) != len (y .Rune ) {
return false
}
for i , r := range x .Rune {
if r != y .Rune [i ] {
return false
}
}
case OpAlternate , OpConcat :
if len (x .Sub ) != len (y .Sub ) {
return false
}
for i , sub := range x .Sub {
if !sub .Equal (y .Sub [i ]) {
return false
}
}
case OpStar , OpPlus , OpQuest :
if x .Flags &NonGreedy != y .Flags &NonGreedy || !x .Sub [0 ].Equal (y .Sub [0 ]) {
return false
}
case OpRepeat :
if x .Flags &NonGreedy != y .Flags &NonGreedy || x .Min != y .Min || x .Max != y .Max || !x .Sub [0 ].Equal (y .Sub [0 ]) {
return false
}
case OpCapture :
if x .Cap != y .Cap || x .Name != y .Name || !x .Sub [0 ].Equal (y .Sub [0 ]) {
return false
}
}
return true
}
func writeRegexp (b *strings .Builder , re *Regexp ) {
switch re .Op {
default :
b .WriteString ("<invalid op" + strconv .Itoa (int (re .Op )) + ">" )
case OpNoMatch :
b .WriteString (`[^\x00-\x{10FFFF}]` )
case OpEmptyMatch :
b .WriteString (`(?:)` )
case OpLiteral :
if re .Flags &FoldCase != 0 {
b .WriteString (`(?i:` )
}
for _ , r := range re .Rune {
escape (b , r , false )
}
if re .Flags &FoldCase != 0 {
b .WriteString (`)` )
}
case OpCharClass :
if len (re .Rune )%2 != 0 {
b .WriteString (`[invalid char class]` )
break
}
b .WriteRune ('[' )
if len (re .Rune ) == 0 {
b .WriteString (`^\x00-\x{10FFFF}` )
} else if re .Rune [0 ] == 0 && re .Rune [len (re .Rune )-1 ] == unicode .MaxRune && len (re .Rune ) > 2 {
b .WriteRune ('^' )
for i := 1 ; i < len (re .Rune )-1 ; i += 2 {
lo , hi := re .Rune [i ]+1 , re .Rune [i +1 ]-1
escape (b , lo , lo == '-' )
if lo != hi {
b .WriteRune ('-' )
escape (b , hi , hi == '-' )
}
}
} else {
for i := 0 ; i < len (re .Rune ); i += 2 {
lo , hi := re .Rune [i ], re .Rune [i +1 ]
escape (b , lo , lo == '-' )
if lo != hi {
b .WriteRune ('-' )
escape (b , hi , hi == '-' )
}
}
}
b .WriteRune (']' )
case OpAnyCharNotNL :
b .WriteString (`(?-s:.)` )
case OpAnyChar :
b .WriteString (`(?s:.)` )
case OpBeginLine :
b .WriteString (`(?m:^)` )
case OpEndLine :
b .WriteString (`(?m:$)` )
case OpBeginText :
b .WriteString (`\A` )
case OpEndText :
if re .Flags &WasDollar != 0 {
b .WriteString (`(?-m:$)` )
} else {
b .WriteString (`\z` )
}
case OpWordBoundary :
b .WriteString (`\b` )
case OpNoWordBoundary :
b .WriteString (`\B` )
case OpCapture :
if re .Name != "" {
b .WriteString (`(?P<` )
b .WriteString (re .Name )
b .WriteRune ('>' )
} else {
b .WriteRune ('(' )
}
if re .Sub [0 ].Op != OpEmptyMatch {
writeRegexp (b , re .Sub [0 ])
}
b .WriteRune (')' )
case OpStar , OpPlus , OpQuest , OpRepeat :
if sub := re .Sub [0 ]; sub .Op > OpCapture || sub .Op == OpLiteral && len (sub .Rune ) > 1 {
b .WriteString (`(?:` )
writeRegexp (b , sub )
b .WriteString (`)` )
} else {
writeRegexp (b , sub )
}
switch re .Op {
case OpStar :
b .WriteRune ('*' )
case OpPlus :
b .WriteRune ('+' )
case OpQuest :
b .WriteRune ('?' )
case OpRepeat :
b .WriteRune ('{' )
b .WriteString (strconv .Itoa (re .Min ))
if re .Max != re .Min {
b .WriteRune (',' )
if re .Max >= 0 {
b .WriteString (strconv .Itoa (re .Max ))
}
}
b .WriteRune ('}' )
}
if re .Flags &NonGreedy != 0 {
b .WriteRune ('?' )
}
case OpConcat :
for _ , sub := range re .Sub {
if sub .Op == OpAlternate {
b .WriteString (`(?:` )
writeRegexp (b , sub )
b .WriteString (`)` )
} else {
writeRegexp (b , sub )
}
}
case OpAlternate :
for i , sub := range re .Sub {
if i > 0 {
b .WriteRune ('|' )
}
writeRegexp (b , sub )
}
}
}
func (re *Regexp ) String () string {
var b strings .Builder
writeRegexp (&b , re )
return b .String ()
}
const meta = `\.+*?()|[]{}^$`
func escape (b *strings .Builder , r rune , force bool ) {
if unicode .IsPrint (r ) {
if strings .ContainsRune (meta , r ) || force {
b .WriteRune ('\\' )
}
b .WriteRune (r )
return
}
switch r {
case '\a' :
b .WriteString (`\a` )
case '\f' :
b .WriteString (`\f` )
case '\n' :
b .WriteString (`\n` )
case '\r' :
b .WriteString (`\r` )
case '\t' :
b .WriteString (`\t` )
case '\v' :
b .WriteString (`\v` )
default :
if r < 0x100 {
b .WriteString (`\x` )
s := strconv .FormatInt (int64 (r ), 16 )
if len (s ) == 1 {
b .WriteRune ('0' )
}
b .WriteString (s )
break
}
b .WriteString (`\x{` )
b .WriteString (strconv .FormatInt (int64 (r ), 16 ))
b .WriteString (`}` )
}
}
func (re *Regexp ) MaxCap () int {
m := 0
if re .Op == OpCapture {
m = re .Cap
}
for _ , sub := range re .Sub {
if n := sub .MaxCap (); m < n {
m = n
}
}
return m
}
func (re *Regexp ) CapNames () []string {
names := make ([]string , re .MaxCap ()+1 )
re .capNames (names )
return names
}
func (re *Regexp ) capNames (names []string ) {
if re .Op == OpCapture {
names [re .Cap ] = re .Name
}
for _ , sub := range re .Sub {
sub .capNames (names )
}
}
The pages are generated with Golds v0.6.7 . (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu .
PR and bug reports are welcome and can be submitted to the issue list .
Please follow @Go100and1 (reachable from the left QR code) to get the latest news of Golds .