diff options
Diffstat (limited to 'vendor/github.com/gobwas/glob/syntax/lexer/lexer.go')
-rw-r--r-- | vendor/github.com/gobwas/glob/syntax/lexer/lexer.go | 273 |
1 files changed, 273 insertions, 0 deletions
diff --git a/vendor/github.com/gobwas/glob/syntax/lexer/lexer.go b/vendor/github.com/gobwas/glob/syntax/lexer/lexer.go new file mode 100644 index 0000000000..a1c8d1962a --- /dev/null +++ b/vendor/github.com/gobwas/glob/syntax/lexer/lexer.go @@ -0,0 +1,273 @@ +package lexer + +import ( + "bytes" + "fmt" + "github.com/gobwas/glob/util/runes" + "unicode/utf8" +) + +const ( + char_any = '*' + char_comma = ',' + char_single = '?' + char_escape = '\\' + char_range_open = '[' + char_range_close = ']' + char_terms_open = '{' + char_terms_close = '}' + char_range_not = '!' + char_range_between = '-' +) + +var specials = []byte{ + char_any, + char_single, + char_escape, + char_range_open, + char_range_close, + char_terms_open, + char_terms_close, +} + +func Special(c byte) bool { + return bytes.IndexByte(specials, c) != -1 +} + +type tokens []Token + +func (i *tokens) shift() (ret Token) { + ret = (*i)[0] + copy(*i, (*i)[1:]) + *i = (*i)[:len(*i)-1] + return +} + +func (i *tokens) push(v Token) { + *i = append(*i, v) +} + +func (i *tokens) empty() bool { + return len(*i) == 0 +} + +var eof rune = 0 + +type lexer struct { + data string + pos int + err error + + tokens tokens + termsLevel int + + lastRune rune + lastRuneSize int + hasRune bool +} + +func NewLexer(source string) *lexer { + l := &lexer{ + data: source, + tokens: tokens(make([]Token, 0, 4)), + } + return l +} + +func (l *lexer) Next() Token { + if l.err != nil { + return Token{Error, l.err.Error()} + } + if !l.tokens.empty() { + return l.tokens.shift() + } + + l.fetchItem() + return l.Next() +} + +func (l *lexer) peek() (r rune, w int) { + if l.pos == len(l.data) { + return eof, 0 + } + + r, w = utf8.DecodeRuneInString(l.data[l.pos:]) + if r == utf8.RuneError { + l.errorf("could not read rune") + r = eof + w = 0 + } + + return +} + +func (l *lexer) read() rune { + if l.hasRune { + l.hasRune = false + l.seek(l.lastRuneSize) + return l.lastRune + } + + r, s := l.peek() + l.seek(s) + + l.lastRune = r + l.lastRuneSize = s + + return r +} + +func (l *lexer) seek(w int) { + l.pos += w +} + +func (l *lexer) unread() { + if l.hasRune { + l.errorf("could not unread rune") + return + } + l.seek(-l.lastRuneSize) + l.hasRune = true +} + +func (l *lexer) errorf(f string, v ...interface{}) { + l.err = fmt.Errorf(f, v...) +} + +func (l *lexer) inTerms() bool { + return l.termsLevel > 0 +} + +func (l *lexer) termsEnter() { + l.termsLevel++ +} + +func (l *lexer) termsLeave() { + l.termsLevel-- +} + +var inTextBreakers = []rune{char_single, char_any, char_range_open, char_terms_open} +var inTermsBreakers = append(inTextBreakers, char_terms_close, char_comma) + +func (l *lexer) fetchItem() { + r := l.read() + switch { + case r == eof: + l.tokens.push(Token{EOF, ""}) + + case r == char_terms_open: + l.termsEnter() + l.tokens.push(Token{TermsOpen, string(r)}) + + case r == char_comma && l.inTerms(): + l.tokens.push(Token{Separator, string(r)}) + + case r == char_terms_close && l.inTerms(): + l.tokens.push(Token{TermsClose, string(r)}) + l.termsLeave() + + case r == char_range_open: + l.tokens.push(Token{RangeOpen, string(r)}) + l.fetchRange() + + case r == char_single: + l.tokens.push(Token{Single, string(r)}) + + case r == char_any: + if l.read() == char_any { + l.tokens.push(Token{Super, string(r) + string(r)}) + } else { + l.unread() + l.tokens.push(Token{Any, string(r)}) + } + + default: + l.unread() + + var breakers []rune + if l.inTerms() { + breakers = inTermsBreakers + } else { + breakers = inTextBreakers + } + l.fetchText(breakers) + } +} + +func (l *lexer) fetchRange() { + var wantHi bool + var wantClose bool + var seenNot bool + for { + r := l.read() + if r == eof { + l.errorf("unexpected end of input") + return + } + + if wantClose { + if r != char_range_close { + l.errorf("expected close range character") + } else { + l.tokens.push(Token{RangeClose, string(r)}) + } + return + } + + if wantHi { + l.tokens.push(Token{RangeHi, string(r)}) + wantClose = true + continue + } + + if !seenNot && r == char_range_not { + l.tokens.push(Token{Not, string(r)}) + seenNot = true + continue + } + + if n, w := l.peek(); n == char_range_between { + l.seek(w) + l.tokens.push(Token{RangeLo, string(r)}) + l.tokens.push(Token{RangeBetween, string(n)}) + wantHi = true + continue + } + + l.unread() // unread first peek and fetch as text + l.fetchText([]rune{char_range_close}) + wantClose = true + } +} + +func (l *lexer) fetchText(breakers []rune) { + var data []rune + var escaped bool + +reading: + for { + r := l.read() + if r == eof { + break + } + + if !escaped { + if r == char_escape { + escaped = true + continue + } + + if runes.IndexRune(breakers, r) != -1 { + l.unread() + break reading + } + } + + escaped = false + data = append(data, r) + } + + if len(data) > 0 { + l.tokens.push(Token{Text, string(data)}) + } +} |