You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

lua_lexer.lua 4.0KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161
  1. --[[
  2. Copyright (c) 2019, Vsevolod Stakhov <vsevolod@highsecure.ru>
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ]]--
  13. --[[ Lua LPEG grammar based on https://github.com/xolox/lua-lxsh/ ]]
  14. local lpeg = require "lpeg"
  15. local P = lpeg.P
  16. local R = lpeg.R
  17. local S = lpeg.S
  18. local D = R'09' -- Digits
  19. local I = R('AZ', 'az', '\127\255') + '_' -- Identifiers
  20. local B = -(I + D) -- Word boundary
  21. local EOS = -lpeg.P(1) -- end of string
  22. -- Pattern for long strings and long comments.
  23. local longstring = #(P'[[' + (P'[' * P'='^0 * '[')) * P(function(input, index)
  24. local level = input:match('^%[(=*)%[', index)
  25. if level then
  26. local _, last = input:find(']' .. level .. ']', index, true)
  27. if last then return last + 1 end
  28. end
  29. end)
  30. -- String literals.
  31. local singlequoted = P"'" * ((1 - S"'\r\n\f\\") + (P'\\' * 1))^0 * "'"
  32. local doublequoted = P'"' * ((1 - S'"\r\n\f\\') + (P'\\' * 1))^0 * '"'
  33. -- Comments.
  34. local eol = P'\r\n' + '\n'
  35. local line = (1 - S'\r\n\f')^0 * eol^-1
  36. local singleline = P'--' * line
  37. local multiline = P'--' * longstring
  38. -- Numbers.
  39. local sign = S'+-'^-1
  40. local decimal = D^1
  41. local hexadecimal = P'0' * S'xX' * R('09', 'AF', 'af') ^ 1
  42. local float = D^1 * P'.' * D^0 + P'.' * D^1
  43. local maybeexp = (float + decimal) * (S'eE' * sign * D^1)^-1
  44. local function compile_keywords(keywords)
  45. local list = {}
  46. for word in keywords:gmatch('%S+') do
  47. list[#list + 1] = word
  48. end
  49. -- Sort by length
  50. table.sort(list, function(a, b)
  51. return #a > #b
  52. end)
  53. local pattern
  54. for _, word in ipairs(list) do
  55. local p = lpeg.P(word)
  56. pattern = pattern and (pattern + p) or p
  57. end
  58. local AB = B + EOS -- ending boundary
  59. return pattern * AB
  60. end
  61. -- Identifiers
  62. local ident = I * (I + D)^0
  63. local expr = ('.' * ident)^0
  64. local patterns = {
  65. {'whitespace', S'\r\n\f\t\v '^1},
  66. {'constant', (P'true' + 'false' + 'nil') * B},
  67. {'string', singlequoted + doublequoted + longstring},
  68. {'comment', multiline + singleline},
  69. {'number', hexadecimal + maybeexp},
  70. {'operator', P'not' + '...' + 'and' + '..' + '~=' + '==' + '>=' + '<='
  71. + 'or' + S']{=>^[<;)*(%}+-:,/.#'},
  72. {'keyword', compile_keywords([[
  73. break do else elseif end for function if in local repeat return then until while
  74. ]])},
  75. {'identifier', lpeg.Cmt(ident,
  76. function(input, index)
  77. return expr:match(input, index)
  78. end)
  79. },
  80. {'error', 1},
  81. }
  82. local compiled
  83. local function compile_patterns()
  84. if not compiled then
  85. local function process(elt)
  86. local n,grammar = elt[1],elt[2]
  87. return lpeg.Cc(n) * lpeg.P(grammar) * lpeg.Cp()
  88. end
  89. local any = process(patterns[1])
  90. for i = 2, #patterns do
  91. any = any + process(patterns[i])
  92. end
  93. compiled = any
  94. end
  95. return compiled
  96. end
  97. local function sync(token, lnum, cnum)
  98. local lastidx
  99. lnum, cnum = lnum or 1, cnum or 1
  100. if token:find '\n' then
  101. for i in token:gmatch '()\n' do
  102. lnum = lnum + 1
  103. lastidx = i
  104. end
  105. cnum = #token - lastidx + 1
  106. else
  107. cnum = cnum + #token
  108. end
  109. return lnum, cnum
  110. end
  111. local exports = {}
  112. exports.gmatch = function(input)
  113. local parser = compile_patterns()
  114. local index, lnum, cnum = 1, 1, 1
  115. return function()
  116. local kind, after = parser:match(input, index)
  117. if kind and after then
  118. local text = input:sub(index, after - 1)
  119. local oldlnum, oldcnum = lnum, cnum
  120. index = after
  121. lnum, cnum = sync(text, lnum, cnum)
  122. return kind, text, oldlnum, oldcnum
  123. end
  124. end
  125. end
  126. exports.lex_to_table = function(input)
  127. local out = {}
  128. for kind, text, lnum, cnum in exports.gmatch(input) do
  129. out[#out + 1] = {kind, text, lnum, cnum}
  130. end
  131. return out
  132. end
  133. return exports