diff options
Diffstat (limited to 'vendor/github.com/couchbase/vellum/regexp/compile.go')
-rw-r--r-- | vendor/github.com/couchbase/vellum/regexp/compile.go | 79 |
1 files changed, 49 insertions, 30 deletions
diff --git a/vendor/github.com/couchbase/vellum/regexp/compile.go b/vendor/github.com/couchbase/vellum/regexp/compile.go index 6922b749db..55280164c7 100644 --- a/vendor/github.com/couchbase/vellum/regexp/compile.go +++ b/vendor/github.com/couchbase/vellum/regexp/compile.go @@ -18,17 +18,27 @@ import ( "regexp/syntax" "unicode" + unicode_utf8 "unicode/utf8" + "github.com/couchbase/vellum/utf8" ) type compiler struct { sizeLimit uint insts prog + instsPool []inst + + sequences utf8.Sequences + rangeStack utf8.RangeStack + startBytes []byte + endBytes []byte } func newCompiler(sizeLimit uint) *compiler { return &compiler{ - sizeLimit: sizeLimit, + sizeLimit: sizeLimit, + startBytes: make([]byte, unicode_utf8.UTFMax), + endBytes: make([]byte, unicode_utf8.UTFMax), } } @@ -37,13 +47,13 @@ func (c *compiler) compile(ast *syntax.Regexp) (prog, error) { if err != nil { return nil, err } - c.insts = append(c.insts, &inst{ - op: OpMatch, - }) + inst := c.allocInst() + inst.op = OpMatch + c.insts = append(c.insts, inst) return c.insts, nil } -func (c *compiler) c(ast *syntax.Regexp) error { +func (c *compiler) c(ast *syntax.Regexp) (err error) { if ast.Flags&syntax.NonGreedy > 1 { return ErrNoLazy } @@ -67,11 +77,12 @@ func (c *compiler) c(ast *syntax.Regexp) error { next.Rune = next.Rune0[0:2] return c.c(&next) } - seqs, err := utf8.NewSequences(r, r) + c.sequences, c.rangeStack, err = utf8.NewSequencesPrealloc( + r, r, c.sequences, c.rangeStack, c.startBytes, c.endBytes) if err != nil { return err } - for _, seq := range seqs { + for _, seq := range c.sequences { c.compileUtf8Ranges(seq) } } @@ -106,8 +117,7 @@ func (c *compiler) c(ast *syntax.Regexp) error { if len(ast.Sub) == 0 { return nil } - jmpsToEnd := []uint{} - + jmpsToEnd := make([]uint, 0, len(ast.Sub)-1) // does not handle last entry for i := 0; i < len(ast.Sub)-1; i++ { sub := ast.Sub[i] @@ -188,7 +198,8 @@ func (c *compiler) c(ast *syntax.Regexp) error { return err } } - var splits, starts []uint + splits := make([]uint, 0, ast.Max-ast.Min) + starts := make([]uint, 0, ast.Max-ast.Min) for i := ast.Min; i < ast.Max; i++ { splits = append(splits, c.emptySplit()) starts = append(starts, uint(len(c.insts))) @@ -218,8 +229,7 @@ func (c *compiler) compileClass(ast *syntax.Regexp) error { if len(ast.Rune) == 0 { return nil } - var jmps []uint - + jmps := make([]uint, 0, len(ast.Rune)-2) // does not do last pair for i := 0; i < len(ast.Rune)-2; i += 2 { rstart := ast.Rune[i] @@ -249,16 +259,16 @@ func (c *compiler) compileClass(ast *syntax.Regexp) error { return nil } -func (c *compiler) compileClassRange(startR, endR rune) error { - seqs, err := utf8.NewSequences(startR, endR) +func (c *compiler) compileClassRange(startR, endR rune) (err error) { + c.sequences, c.rangeStack, err = utf8.NewSequencesPrealloc( + startR, endR, c.sequences, c.rangeStack, c.startBytes, c.endBytes) if err != nil { return err } - var jmps []uint - + jmps := make([]uint, 0, len(c.sequences)-1) // does not do last entry - for i := 0; i < len(seqs)-1; i++ { - seq := seqs[i] + for i := 0; i < len(c.sequences)-1; i++ { + seq := c.sequences[i] split := c.emptySplit() j1 := c.top() c.compileUtf8Ranges(seq) @@ -267,7 +277,7 @@ func (c *compiler) compileClassRange(startR, endR rune) error { c.setSplit(split, j1, j2) } // handle last entry - c.compileUtf8Ranges(seqs[len(seqs)-1]) + c.compileUtf8Ranges(c.sequences[len(c.sequences)-1]) end := c.top() for _, jmp := range jmps { c.setJump(jmp, end) @@ -278,25 +288,25 @@ func (c *compiler) compileClassRange(startR, endR rune) error { func (c *compiler) compileUtf8Ranges(seq utf8.Sequence) { for _, r := range seq { - c.insts = append(c.insts, &inst{ - op: OpRange, - rangeStart: r.Start, - rangeEnd: r.End, - }) + inst := c.allocInst() + inst.op = OpRange + inst.rangeStart = r.Start + inst.rangeEnd = r.End + c.insts = append(c.insts, inst) } } func (c *compiler) emptySplit() uint { - c.insts = append(c.insts, &inst{ - op: OpSplit, - }) + inst := c.allocInst() + inst.op = OpSplit + c.insts = append(c.insts, inst) return c.top() - 1 } func (c *compiler) emptyJump() uint { - c.insts = append(c.insts, &inst{ - op: OpJmp, - }) + inst := c.allocInst() + inst.op = OpJmp + c.insts = append(c.insts, inst) return c.top() - 1 } @@ -314,3 +324,12 @@ func (c *compiler) setJump(i, pc uint) { func (c *compiler) top() uint { return uint(len(c.insts)) } + +func (c *compiler) allocInst() *inst { + if len(c.instsPool) <= 0 { + c.instsPool = make([]inst, 16) + } + inst := &c.instsPool[0] + c.instsPool = c.instsPool[1:] + return inst +} |