You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163
  1. --[[
  2. Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com>
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ]]--
  13. --[[ Lua LPEG grammar based on https://github.com/xolox/lua-lxsh/ ]]
  14. local lpeg = require "lpeg"
  15. local P = lpeg.P
  16. local R = lpeg.R
  17. local S = lpeg.S
  18. local D = R '09' -- Digits
  19. local I = R('AZ', 'az', '\127\255') + '_' -- Identifiers
  20. local B = -(I + D) -- Word boundary
  21. local EOS = -lpeg.P(1) -- end of string
  22. -- Pattern for long strings and long comments.
  23. local longstring = #(P '[[' + (P '[' * P '=' ^ 0 * '[')) * P(function(input, index)
  24. local level = input:match('^%[(=*)%[', index)
  25. if level then
  26. local _, last = input:find(']' .. level .. ']', index, true)
  27. if last then
  28. return last + 1
  29. end
  30. end
  31. end)
  32. -- String literals.
  33. local singlequoted = P "'" * ((1 - S "'\r\n\f\\") + (P '\\' * 1)) ^ 0 * "'"
  34. local doublequoted = P '"' * ((1 - S '"\r\n\f\\') + (P '\\' * 1)) ^ 0 * '"'
  35. -- Comments.
  36. local eol = P '\r\n' + '\n'
  37. local line = (1 - S '\r\n\f') ^ 0 * eol ^ -1
  38. local singleline = P '--' * line
  39. local multiline = P '--' * longstring
  40. -- Numbers.
  41. local sign = S '+-' ^ -1
  42. local decimal = D ^ 1
  43. local hexadecimal = P '0' * S 'xX' * R('09', 'AF', 'af') ^ 1
  44. local float = D ^ 1 * P '.' * D ^ 0 + P '.' * D ^ 1
  45. local maybeexp = (float + decimal) * (S 'eE' * sign * D ^ 1) ^ -1
  46. local function compile_keywords(keywords)
  47. local list = {}
  48. for word in keywords:gmatch('%S+') do
  49. list[#list + 1] = word
  50. end
  51. -- Sort by length
  52. table.sort(list, function(a, b)
  53. return #a > #b
  54. end)
  55. local pattern
  56. for _, word in ipairs(list) do
  57. local p = lpeg.P(word)
  58. pattern = pattern and (pattern + p) or p
  59. end
  60. local AB = B + EOS -- ending boundary
  61. return pattern * AB
  62. end
  63. -- Identifiers
  64. local ident = I * (I + D) ^ 0
  65. local expr = ('.' * ident) ^ 0
  66. local patterns = {
  67. { 'whitespace', S '\r\n\f\t\v ' ^ 1 },
  68. { 'constant', (P 'true' + 'false' + 'nil') * B },
  69. { 'string', singlequoted + doublequoted + longstring },
  70. { 'comment', multiline + singleline },
  71. { 'number', hexadecimal + maybeexp },
  72. { 'operator', P 'not' + '...' + 'and' + '..' + '~=' + '==' + '>=' + '<='
  73. + 'or' + S ']{=>^[<;)*(%}+-:,/.#' },
  74. { 'keyword', compile_keywords([[
  75. break do else elseif end for function if in local repeat return then until while
  76. ]]) },
  77. { 'identifier', lpeg.Cmt(ident,
  78. function(input, index)
  79. return expr:match(input, index)
  80. end)
  81. },
  82. { 'error', 1 },
  83. }
  84. local compiled
  85. local function compile_patterns()
  86. if not compiled then
  87. local function process(elt)
  88. local n, grammar = elt[1], elt[2]
  89. return lpeg.Cc(n) * lpeg.P(grammar) * lpeg.Cp()
  90. end
  91. local any = process(patterns[1])
  92. for i = 2, #patterns do
  93. any = any + process(patterns[i])
  94. end
  95. compiled = any
  96. end
  97. return compiled
  98. end
  99. local function sync(token, lnum, cnum)
  100. local lastidx
  101. lnum, cnum = lnum or 1, cnum or 1
  102. if token:find '\n' then
  103. for i in token:gmatch '()\n' do
  104. lnum = lnum + 1
  105. lastidx = i
  106. end
  107. cnum = #token - lastidx + 1
  108. else
  109. cnum = cnum + #token
  110. end
  111. return lnum, cnum
  112. end
  113. local exports = {}
  114. exports.gmatch = function(input)
  115. local parser = compile_patterns()
  116. local index, lnum, cnum = 1, 1, 1
  117. return function()
  118. local kind, after = parser:match(input, index)
  119. if kind and after then
  120. local text = input:sub(index, after - 1)
  121. local oldlnum, oldcnum = lnum, cnum
  122. index = after
  123. lnum, cnum = sync(text, lnum, cnum)
  124. return kind, text, oldlnum, oldcnum
  125. end
  126. end
  127. end
  128. exports.lex_to_table = function(input)
  129. local out = {}
  130. for kind, text, lnum, cnum in exports.gmatch(input) do
  131. out[#out + 1] = { kind, text, lnum, cnum }
  132. end
  133. return out
  134. end
  135. return exports