You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

scanner.go 2.3KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117
  1. package revision
  2. import (
  3. "bufio"
  4. "io"
  5. "unicode"
  6. )
  7. // runeCategoryValidator takes a rune as input and
  8. // validates it belongs to a rune category
  9. type runeCategoryValidator func(r rune) bool
  10. // tokenizeExpression aggregates a series of runes matching check predicate into a single
  11. // string and provides given tokenType as token type
  12. func tokenizeExpression(ch rune, tokenType token, check runeCategoryValidator, r *bufio.Reader) (token, string, error) {
  13. var data []rune
  14. data = append(data, ch)
  15. for {
  16. c, _, err := r.ReadRune()
  17. if c == zeroRune {
  18. break
  19. }
  20. if err != nil {
  21. return tokenError, "", err
  22. }
  23. if check(c) {
  24. data = append(data, c)
  25. } else {
  26. err := r.UnreadRune()
  27. if err != nil {
  28. return tokenError, "", err
  29. }
  30. return tokenType, string(data), nil
  31. }
  32. }
  33. return tokenType, string(data), nil
  34. }
  35. var zeroRune = rune(0)
  36. // scanner represents a lexical scanner.
  37. type scanner struct {
  38. r *bufio.Reader
  39. }
  40. // newScanner returns a new instance of scanner.
  41. func newScanner(r io.Reader) *scanner {
  42. return &scanner{r: bufio.NewReader(r)}
  43. }
  44. // Scan extracts tokens and their strings counterpart
  45. // from the reader
  46. func (s *scanner) scan() (token, string, error) {
  47. ch, _, err := s.r.ReadRune()
  48. if err != nil && err != io.EOF {
  49. return tokenError, "", err
  50. }
  51. switch ch {
  52. case zeroRune:
  53. return eof, "", nil
  54. case ':':
  55. return colon, string(ch), nil
  56. case '~':
  57. return tilde, string(ch), nil
  58. case '^':
  59. return caret, string(ch), nil
  60. case '.':
  61. return dot, string(ch), nil
  62. case '/':
  63. return slash, string(ch), nil
  64. case '{':
  65. return obrace, string(ch), nil
  66. case '}':
  67. return cbrace, string(ch), nil
  68. case '-':
  69. return minus, string(ch), nil
  70. case '@':
  71. return at, string(ch), nil
  72. case '\\':
  73. return aslash, string(ch), nil
  74. case '?':
  75. return qmark, string(ch), nil
  76. case '*':
  77. return asterisk, string(ch), nil
  78. case '[':
  79. return obracket, string(ch), nil
  80. case '!':
  81. return emark, string(ch), nil
  82. }
  83. if unicode.IsSpace(ch) {
  84. return space, string(ch), nil
  85. }
  86. if unicode.IsControl(ch) {
  87. return control, string(ch), nil
  88. }
  89. if unicode.IsLetter(ch) {
  90. return tokenizeExpression(ch, word, unicode.IsLetter, s.r)
  91. }
  92. if unicode.IsNumber(ch) {
  93. return tokenizeExpression(ch, number, unicode.IsNumber, s.r)
  94. }
  95. return tokenError, string(ch), nil
  96. }