]> source.dussan.org Git - rspamd.git/commitdiff
[Project] Implement syntax highlighting for Lua
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Wed, 4 Sep 2019 10:47:22 +0000 (11:47 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Wed, 4 Sep 2019 10:49:00 +0000 (11:49 +0100)
lualib/lua_lexer.lua [new file with mode: 0644]
src/rspamadm/lua_repl.c

diff --git a/lualib/lua_lexer.lua b/lualib/lua_lexer.lua
new file mode 100644 (file)
index 0000000..8c751d8
--- /dev/null
@@ -0,0 +1,162 @@
+--[[
+Copyright (c) 2019, Vsevolod Stakhov <vsevolod@highsecure.ru>
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+]]--
+
+--[[ Lua LPEG grammar based on https://github.com/xolox/lua-lxsh/ ]]
+
+
+local lpeg = require "lpeg"
+
+local P = lpeg.P
+local R = lpeg.R
+local S = lpeg.S
+local D = R'09' -- Digits
+local I = R('AZ', 'az', '\127\255') + '_' -- Identifiers
+local B = -(I + D) -- Word boundary
+local SOS = lpeg.P(function(s, i) return i == 1 end) -- start of string
+local EOS = -lpeg.P(1) -- end of string
+
+-- Pattern for long strings and long comments.
+local longstring = #(P'[[' + (P'[' * P'='^0 * '[')) * P(function(input, index)
+  local level = input:match('^%[(=*)%[', index)
+  if level then
+    local _, last = input:find(']' .. level .. ']', index, true)
+    if last then return last + 1 end
+  end
+end)
+
+-- String literals.
+local singlequoted = P"'" * ((1 - S"'\r\n\f\\") + (P'\\' * 1))^0 * "'"
+local doublequoted = P'"' * ((1 - S'"\r\n\f\\') + (P'\\' * 1))^0 * '"'
+
+-- Comments.
+local eol = P'\r\n' + '\n'
+local line = (1 - S'\r\n\f')^0 * eol^-1
+local singleline = P'--' * line
+local multiline = P'--' * longstring
+
+-- Numbers.
+local sign = S'+-'^-1
+local decimal = D^1
+local hexadecimal = P'0' * S'xX' * R('09', 'AF', 'af') ^ 1
+local float = D^1 * P'.' * D^0 + P'.' * D^1
+local maybeexp = (float + decimal) * (S'eE' * sign * D^1)^-1
+
+local function compile_keywords(keywords)
+  local list = {}
+  for word in keywords:gmatch('%S+') do
+    list[#list + 1] = word
+  end
+  -- Sort by length
+  table.sort(list, function(a, b)
+    return #a > #b
+  end)
+
+  local pattern
+  for _, word in ipairs(list) do
+    local p = lpeg.P(word)
+    pattern = pattern and (pattern + p) or p
+  end
+
+  local AB = B + EOS -- ending boundary
+  return pattern * AB
+end
+
+-- Identifiers
+local ident = I * (I + D)^0
+local expr = ('.' * ident)^0
+
+local patterns = {
+  {'whitespace',  S'\r\n\f\t\v '^1},
+  {'constant', (P'true' + 'false' + 'nil') * B},
+  {'string', singlequoted + doublequoted + longstring},
+  {'comment', multiline + singleline},
+  {'number', hexadecimal + maybeexp},
+  {'operator', P'not' + '...' + 'and' + '..' + '~=' + '==' + '>=' + '<='
+      + 'or' + S']{=>^[<;)*(%}+-:,/.#'},
+  {'keyword', compile_keywords([[
+      break do else elseif end for function if in local repeat return then until while
+      ]])},
+  {'identifier', lpeg.Cmt(ident,
+      function(input, index)
+        return expr:match(input, index)
+      end)
+  },
+  {'error', 1},
+}
+
+local compiled
+
+local function compile_patterns()
+  if not compiled then
+    local function process(elt)
+      local n,grammar = elt[1],elt[2]
+      return lpeg.Cc(n) * lpeg.P(grammar) * lpeg.Cp()
+    end
+    local any = process(patterns[1])
+    for i = 2, #patterns do
+      any = any + process(patterns[i])
+    end
+    compiled = any
+  end
+
+  return compiled
+end
+
+local function sync(token, lnum, cnum)
+  local lastidx
+  lnum, cnum = lnum or 1, cnum or 1
+  if token:find '\n' then
+    for i in token:gmatch '()\n' do
+      lnum = lnum + 1
+      lastidx = i
+    end
+    cnum = #token - lastidx + 1
+  else
+    cnum = cnum + #token
+  end
+  return lnum, cnum
+end
+
+local exports = {}
+
+exports.gmatch = function(input)
+  local parser = compile_patterns()
+  local index, lnum, cnum = 1, 1, 1
+
+  return function()
+    local kind, after = parser:match(input, index)
+    if kind and after then
+      local text = input:sub(index, after - 1)
+      local oldlnum, oldcnum = lnum, cnum
+      index = after
+      lnum, cnum = sync(text, lnum, cnum)
+      return kind, text, oldlnum, oldcnum
+    end
+  end
+end
+
+exports.lex_to_table = function(input)
+  local out = {}
+
+  for kind, text, lnum, cnum in exports.gmatch(input) do
+    out[#out + 1] = {kind, text, lnum, cnum}
+  end
+
+  return out
+end
+
+return exports
+
index 997fd4e9d19d0d82508af25c55c28fd3df79f22a..54a2ed01e1e286940cf50d6388676cac7545abfe 100644 (file)
@@ -40,7 +40,6 @@ static guint max_history = 2000;
 static gchar *serve = NULL;
 static gchar *exec_line = NULL;
 static gint batch = -1;
-static gboolean per_line = FALSE;
 extern struct rspamd_async_session *rspamadm_session;
 
 static const char *default_history_file = ".rspamd_repl.hist";
@@ -125,8 +124,6 @@ static GOptionEntry entries[] = {
                                "Serve http lua server", NULL},
                {"batch", 'b', 0, G_OPTION_ARG_NONE, &batch,
                                "Batch execution mode", NULL},
-               {"per-line", 'p', 0, G_OPTION_ARG_NONE, &per_line,
-                               "Pass each line of input to the specified lua script", NULL},
                {"exec", 'e', 0, G_OPTION_ARG_STRING, &exec_line,
                                "Execute specified script", NULL},
                {"args", 'a', 0, G_OPTION_ARG_STRING_ARRAY, &lua_args,
@@ -228,15 +225,12 @@ rspamadm_lua_load_script (lua_State *L, const gchar *path)
                return FALSE;
        }
 
-       if (!per_line) {
-
-               if (lua_repl_thread_call (thread, 0, (void *)path, lua_thread_str_error_cb) != 0) {
-                       return FALSE;
-               }
-
-               lua_settop (L, 0);
+       if (lua_repl_thread_call (thread, 0, (void *)path, lua_thread_str_error_cb) != 0) {
+               return FALSE;
        }
 
+       lua_settop (L, 0);
+
        return TRUE;
 }
 
@@ -274,24 +268,22 @@ rspamadm_exec_input (lua_State *L, const gchar *input)
 
        g_string_free (tb, TRUE);
 
-       if (!per_line) {
 
-               top = lua_gettop (L);
+       top = lua_gettop (L);
 
-               if (lua_repl_thread_call (thread, 0, NULL, NULL) == 0) {
-                       /* Print output */
-                       for (i = top; i <= lua_gettop (L); i++) {
-                               if (lua_isfunction (L, i)) {
-                                       lua_pushvalue (L, i);
-                                       cbref = luaL_ref (L, LUA_REGISTRYINDEX);
+       if (lua_repl_thread_call (thread, 0, NULL, NULL) == 0) {
+               /* Print output */
+               for (i = top; i <= lua_gettop (L); i++) {
+                       if (lua_isfunction (L, i)) {
+                               lua_pushvalue (L, i);
+                               cbref = luaL_ref (L, LUA_REGISTRYINDEX);
 
-                                       rspamd_printf ("local function: %d\n", cbref);
-                               } else {
-                                       memset (&tr, 0, sizeof (tr));
-                                       lua_logger_out_type (L, i, outbuf, sizeof (outbuf), &tr,
-                                                       LUA_ESCAPE_UNPRINTABLE);
-                                       rspamd_printf ("%s\n", outbuf);
-                               }
+                               rspamd_printf ("local function: %d\n", cbref);
+                       } else {
+                               memset (&tr, 0, sizeof (tr));
+                               lua_logger_out_type (L, i, outbuf, sizeof (outbuf), &tr,
+                                               LUA_ESCAPE_UNPRINTABLE);
+                               rspamd_printf ("%s\n", outbuf);
                        }
                }
        }
@@ -516,8 +508,106 @@ rspamadm_lua_try_dot_command (lua_State *L, const gchar *input)
        return FALSE;
 }
 
+#ifdef WITH_LUA_REPL
+static gint lex_ref_idx = -1;
+
 static void
-rspamadm_lua_run_repl (lua_State *L)
+lua_syntax_highlighter (const char *str, ReplxxColor *colours, int size, void *ud)
+{
+       lua_State *L = (lua_State *)ud;
+
+       if (lex_ref_idx == -1) {
+               if (!rspamd_lua_require_function (L, "lua_lexer", "lex_to_table")) {
+                       fprintf (stderr, "cannot require lua_lexer!\n");
+
+                       exit (EXIT_FAILURE);
+               }
+
+               lex_ref_idx = luaL_ref (L, LUA_REGISTRYINDEX);
+       }
+
+       lua_rawgeti (L, LUA_REGISTRYINDEX, lex_ref_idx);
+       lua_pushstring (L, str);
+
+       if (lua_pcall (L, 1, 1, 0) != 0) {
+               fprintf (stderr, "cannot lex a string!\n");
+       }
+       else {
+               /* Process what we have after lexing */
+               gsize nelts = rspamd_lua_table_size (L, -1);
+
+               for (gsize i = 0; i < nelts; i ++) {
+                       /*
+                        * Indexes in the table:
+                        * 1 - type of element (string)
+                        * 2 - text (string)
+                        * 3 - line num (int), always 1...
+                        * 4 - column num (must be less than size)
+                        */
+                       const gchar *what, *text;
+                       gsize column, tlen, cur_top, elt_pos;
+                       ReplxxColor elt_color = REPLXX_COLOR_DEFAULT;
+
+                       cur_top = lua_gettop (L);
+                       lua_rawgeti (L, -1, i + 1);
+                       elt_pos = lua_gettop (L);
+                       lua_rawgeti (L, elt_pos, 1);
+                       what = lua_tostring (L, -1);
+                       lua_rawgeti (L, elt_pos, 2);
+                       text = lua_tolstring (L, -1, &tlen);
+                       lua_rawgeti (L, elt_pos, 4);
+                       column = lua_tointeger (L, -1);
+
+                       g_assert (column > 0);
+                       column --; /* Start from 0 */
+
+                       if (column + tlen > size) {
+                               /* Likely utf8 case, too complicated to match */
+                               lua_settop (L, cur_top);
+                               continue;
+                       }
+
+                       /* Check what and adjust color */
+                       if (strcmp (what, "identifier") == 0) {
+                               elt_color = REPLXX_COLOR_NORMAL;
+                       }
+                       else if (strcmp (what, "number") == 0) {
+                               elt_color = REPLXX_COLOR_BLUE;
+                       }
+                       else if (strcmp (what, "string") == 0) {
+                               elt_color = REPLXX_COLOR_GREEN;
+                       }
+                       else if (strcmp (what, "keyword") == 0) {
+                               elt_color = REPLXX_COLOR_WHITE;
+                       }
+                       else if (strcmp (what, "constant") == 0) {
+                               elt_color = REPLXX_COLOR_WHITE;
+                       }
+                       else if (strcmp (what, "operator") == 0) {
+                               elt_color = REPLXX_COLOR_CYAN;
+                       }
+                       else if (strcmp (what, "comment") == 0) {
+                               elt_color = REPLXX_COLOR_BRIGHTGREEN;
+                       }
+                       else if (strcmp (what, "error") == 0) {
+                               elt_color = REPLXX_COLOR_ERROR;
+                       }
+
+                       for (gsize j = column; j < column + tlen; j ++) {
+                               colours[j] = elt_color;
+                       }
+
+                       /* Restore stack */
+                       lua_settop (L, cur_top);
+               }
+       }
+
+       lua_settop (L, 0);
+}
+#endif
+
+static void
+rspamadm_lua_run_repl (lua_State *L, bool is_batch)
 {
        gchar *input;
        gboolean is_multiline = FALSE;
@@ -546,6 +636,11 @@ rspamadm_lua_run_repl (lua_State *L)
 
                lua_settop (L, 0);
 #else
+               if (!is_batch) {
+                       replxx_set_highlighter_callback (rx_instance, lua_syntax_highlighter,
+                                       L);
+               }
+
                if (!is_multiline) {
                        input = (gchar *)replxx_input (rx_instance, MAIN_PROMPT);
 
@@ -555,7 +650,9 @@ rspamadm_lua_run_repl (lua_State *L)
 
                        if (input[0] == '.') {
                                if (rspamadm_lua_try_dot_command (L, input)) {
-                                       replxx_history_add (rx_instance, input);
+                                       if (!is_batch) {
+                                               replxx_history_add (rx_instance, input);
+                                       }
                                        continue;
                                }
                        }
@@ -567,7 +664,9 @@ rspamadm_lua_run_repl (lua_State *L)
                        }
 
                        rspamadm_exec_input (L, input);
-                       replxx_history_add (rx_instance, input);
+                       if (!is_batch) {
+                               replxx_history_add (rx_instance, input);
+                       }
                        lua_settop (L, 0);
                }
                else {
@@ -589,7 +688,9 @@ rspamadm_lua_run_repl (lua_State *L)
                                        }
                                }
 
-                               replxx_history_add (rx_instance, tb->str);
+                               if (!is_batch) {
+                                       replxx_history_add (rx_instance, tb->str);
+                               }
                                g_string_free (tb, TRUE);
                        }
                        else {
@@ -899,69 +1000,19 @@ rspamadm_lua (gint argc, gchar **argv, const struct rspamadm_command *cmd)
                g_hash_table_insert (cmds_hash, (gpointer)cmds[i].name, &cmds[i]);
        }
 
-       if (per_line) {
-               GIOChannel *in;
-               GString *buf;
-               gsize end_pos;
-               GIOStatus ret;
-               gint old_top;
-               GError *err = NULL;
-
-               in = g_io_channel_unix_new (STDIN_FILENO);
-               buf = g_string_sized_new (BUFSIZ);
-
-again:
-               while ((ret = g_io_channel_read_line_string (in, buf, &end_pos, &err)) ==
-                               G_IO_STATUS_NORMAL) {
-                       old_top = lua_gettop (L);
-                       lua_pushvalue (L, -1);
-                       lua_pushlstring (L, buf->str, MIN (buf->len, end_pos));
-                       lua_setglobal (L, "input");
-
-                       struct thread_entry *thread = lua_thread_pool_get_for_config (rspamd_main->cfg);
-                       L = thread->lua_state;
 
-                       lua_repl_thread_call (thread, 0, NULL, NULL);
-
-                       lua_settop (L, old_top);
-               }
-
-               if (ret == G_IO_STATUS_AGAIN) {
-                       goto again;
-               }
-
-               g_string_free (buf, TRUE);
-               g_io_channel_shutdown (in, FALSE, NULL);
-
-               if (ret == G_IO_STATUS_EOF) {
-                       if (err) {
-                               g_error_free (err);
-                       }
-               }
-               else {
-                       rspamd_fprintf (stderr, "IO error: %e\n", err);
-
-                       if (err) {
-                               g_error_free (err);
-                       }
-
-                       exit (-errno);
-               }
-       }
-       else {
 #ifdef WITH_LUA_REPL
-               rx_instance = replxx_init ();
+       rx_instance = replxx_init ();
 #endif
-               if (!batch) {
-                       replxx_set_max_history_size (rx_instance, max_history);
-                       replxx_history_load (rx_instance, histfile);
-                       rspamadm_lua_run_repl (L);
-                       replxx_history_save (rx_instance, histfile);
-               } else {
-                       rspamadm_lua_run_repl (L);
-               }
+       if (!batch) {
+               replxx_set_max_history_size (rx_instance, max_history);
+               replxx_history_load (rx_instance, histfile);
+               rspamadm_lua_run_repl (L, false);
+               replxx_history_save (rx_instance, histfile);
+       } else {
+               rspamadm_lua_run_repl (L, true);
+       }
 #ifdef WITH_LUA_REPL
-                replxx_end (rx_instance);
+       replxx_end (rx_instance);
 #endif
-       }
 }