From afa975523e60d36be65d5b79d650a1c98e0a3b0d Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Fri, 18 Jan 2019 16:48:29 +0000 Subject: [PATCH] [Minor] Core: Add lua utility to find some obscured unicode symbols --- src/libutil/str_util.h | 3 +++ src/lua/lua_util.c | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/src/libutil/str_util.h b/src/libutil/str_util.h index a2669d1a0..f95932547 100644 --- a/src/libutil/str_util.h +++ b/src/libutil/str_util.h @@ -455,5 +455,8 @@ gchar * rspamd_str_make_utf_valid (const gchar *src, gsize slen, gsize *dstlen); gsize rspamd_gstring_strip (GString *s, const gchar *strip_chars); #define IS_ZERO_WIDTH_SPACE(uc) ((uc) == 0x200b || (uc) == 0x200c) +#define IS_OBSCURED_CHAR(uc) (((uc) >= 0x200B && (uc) <= 0x200F) || \ + ((uc) >= 0x2028 && (uc) <= 0x202F) || \ + ((uc) >= 0x205F && (uc) <= 0x206F)) #endif /* SRC_LIBUTIL_STR_UTIL_H_ */ diff --git a/src/lua/lua_util.c b/src/lua/lua_util.c index 81b44bd28..94554faa1 100644 --- a/src/lua/lua_util.c +++ b/src/lua/lua_util.c @@ -399,6 +399,13 @@ LUA_FUNCTION_DEF (util, is_utf_spoofed); */ LUA_FUNCTION_DEF (util, is_valid_utf8); +/*** + * @function util.has_obscured_utf(str) + * Returns true if a string has obscure UTF symbols (zero width spaces, order marks), ignores invalid utf characters + * @return {boolean} true if a has obscured utf characters + */ +LUA_FUNCTION_DEF (util, has_obscured_utf); + /*** * @function util.readline([prompt]) * Returns string read from stdin with history and editing support @@ -609,6 +616,7 @@ static const struct luaL_reg utillib_f[] = { LUA_INTERFACE_DEF (util, caseless_hash_fast), LUA_INTERFACE_DEF (util, is_utf_spoofed), LUA_INTERFACE_DEF (util, is_valid_utf8), + LUA_INTERFACE_DEF (util, has_obscured_utf), LUA_INTERFACE_DEF (util, readline), LUA_INTERFACE_DEF (util, readpassphrase), LUA_INTERFACE_DEF (util, file_exists), @@ -2609,6 +2617,36 @@ lua_util_is_valid_utf8 (lua_State *L) return 1; } +static gint +lua_util_has_obscured_utf (lua_State *L) +{ + LUA_TRACE_POINT; + const gchar *str; + gsize len; + gint32 i = 0; + UChar32 uc; + + str = lua_tolstring (L, 1, &len); + + while (i < len) { + U8_NEXT (str, i, len, uc); + + if (uc > 0) { + if (IS_OBSCURED_CHAR (uc)) { + lua_pushboolean (L, true); + lua_pushnumber (L, uc); /* Character */ + lua_pushnumber (L, i); /* Offset */ + + return 3; + } + } + } + + lua_pushboolean (L, false); + + return 1; +} + static gint lua_util_readline (lua_State *L) { -- 2.39.5