You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

regexp.go 1.7KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263
  1. // Copyright (c) 2020 Couchbase, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package scorch
  15. import (
  16. "regexp/syntax"
  17. "github.com/blevesearch/vellum/regexp"
  18. )
  19. func parseRegexp(pattern string) (a *regexp.Regexp, prefixBeg, prefixEnd []byte, err error) {
  20. // TODO: potential optimization where syntax.Regexp supports a Simplify() API?
  21. parsed, err := syntax.Parse(pattern, syntax.Perl)
  22. if err != nil {
  23. return nil, nil, nil, err
  24. }
  25. re, err := regexp.NewParsedWithLimit(pattern, parsed, regexp.DefaultLimit)
  26. if err != nil {
  27. return nil, nil, nil, err
  28. }
  29. prefix := literalPrefix(parsed)
  30. if prefix != "" {
  31. prefixBeg := []byte(prefix)
  32. prefixEnd := calculateExclusiveEndFromPrefix(prefixBeg)
  33. return re, prefixBeg, prefixEnd, nil
  34. }
  35. return re, nil, nil, nil
  36. }
  37. // Returns the literal prefix given the parse tree for a regexp
  38. func literalPrefix(s *syntax.Regexp) string {
  39. // traverse the left-most branch in the parse tree as long as the
  40. // node represents a concatenation
  41. for s != nil && s.Op == syntax.OpConcat {
  42. if len(s.Sub) < 1 {
  43. return ""
  44. }
  45. s = s.Sub[0]
  46. }
  47. if s.Op == syntax.OpLiteral && (s.Flags&syntax.FoldCase == 0) {
  48. return string(s.Rune)
  49. }
  50. return "" // no literal prefix
  51. }