123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163 |
- --[[
- Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com>
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- ]]--
-
- --[[ Lua LPEG grammar based on https://github.com/xolox/lua-lxsh/ ]]
-
-
- local lpeg = require "lpeg"
-
- local P = lpeg.P
- local R = lpeg.R
- local S = lpeg.S
- local D = R '09' -- Digits
- local I = R('AZ', 'az', '\127\255') + '_' -- Identifiers
- local B = -(I + D) -- Word boundary
- local EOS = -lpeg.P(1) -- end of string
-
- -- Pattern for long strings and long comments.
- local longstring = #(P '[[' + (P '[' * P '=' ^ 0 * '[')) * P(function(input, index)
- local level = input:match('^%[(=*)%[', index)
- if level then
- local _, last = input:find(']' .. level .. ']', index, true)
- if last then
- return last + 1
- end
- end
- end)
-
- -- String literals.
- local singlequoted = P "'" * ((1 - S "'\r\n\f\\") + (P '\\' * 1)) ^ 0 * "'"
- local doublequoted = P '"' * ((1 - S '"\r\n\f\\') + (P '\\' * 1)) ^ 0 * '"'
-
- -- Comments.
- local eol = P '\r\n' + '\n'
- local line = (1 - S '\r\n\f') ^ 0 * eol ^ -1
- local singleline = P '--' * line
- local multiline = P '--' * longstring
-
- -- Numbers.
- local sign = S '+-' ^ -1
- local decimal = D ^ 1
- local hexadecimal = P '0' * S 'xX' * R('09', 'AF', 'af') ^ 1
- local float = D ^ 1 * P '.' * D ^ 0 + P '.' * D ^ 1
- local maybeexp = (float + decimal) * (S 'eE' * sign * D ^ 1) ^ -1
-
- local function compile_keywords(keywords)
- local list = {}
- for word in keywords:gmatch('%S+') do
- list[#list + 1] = word
- end
- -- Sort by length
- table.sort(list, function(a, b)
- return #a > #b
- end)
-
- local pattern
- for _, word in ipairs(list) do
- local p = lpeg.P(word)
- pattern = pattern and (pattern + p) or p
- end
-
- local AB = B + EOS -- ending boundary
- return pattern * AB
- end
-
- -- Identifiers
- local ident = I * (I + D) ^ 0
- local expr = ('.' * ident) ^ 0
-
- local patterns = {
- { 'whitespace', S '\r\n\f\t\v ' ^ 1 },
- { 'constant', (P 'true' + 'false' + 'nil') * B },
- { 'string', singlequoted + doublequoted + longstring },
- { 'comment', multiline + singleline },
- { 'number', hexadecimal + maybeexp },
- { 'operator', P 'not' + '...' + 'and' + '..' + '~=' + '==' + '>=' + '<='
- + 'or' + S ']{=>^[<;)*(%}+-:,/.#' },
- { 'keyword', compile_keywords([[
- break do else elseif end for function if in local repeat return then until while
- ]]) },
- { 'identifier', lpeg.Cmt(ident,
- function(input, index)
- return expr:match(input, index)
- end)
- },
- { 'error', 1 },
- }
-
- local compiled
-
- local function compile_patterns()
- if not compiled then
- local function process(elt)
- local n, grammar = elt[1], elt[2]
- return lpeg.Cc(n) * lpeg.P(grammar) * lpeg.Cp()
- end
- local any = process(patterns[1])
- for i = 2, #patterns do
- any = any + process(patterns[i])
- end
- compiled = any
- end
-
- return compiled
- end
-
- local function sync(token, lnum, cnum)
- local lastidx
- lnum, cnum = lnum or 1, cnum or 1
- if token:find '\n' then
- for i in token:gmatch '()\n' do
- lnum = lnum + 1
- lastidx = i
- end
- cnum = #token - lastidx + 1
- else
- cnum = cnum + #token
- end
- return lnum, cnum
- end
-
- local exports = {}
-
- exports.gmatch = function(input)
- local parser = compile_patterns()
- local index, lnum, cnum = 1, 1, 1
-
- return function()
- local kind, after = parser:match(input, index)
- if kind and after then
- local text = input:sub(index, after - 1)
- local oldlnum, oldcnum = lnum, cnum
- index = after
- lnum, cnum = sync(text, lnum, cnum)
- return kind, text, oldlnum, oldcnum
- end
- end
- end
-
- exports.lex_to_table = function(input)
- local out = {}
-
- for kind, text, lnum, cnum in exports.gmatch(input) do
- out[#out + 1] = { kind, text, lnum, cnum }
- end
-
- return out
- end
-
- return exports
|