diff options
author | Miecio Za <miecio@miecio.net> | 2019-03-18 19:56:57 +0100 |
---|---|---|
committer | Miecio Za <miecio@miecio.net> | 2019-03-21 16:02:57 +0100 |
commit | 7824318dca65d223df397329b3eabde86c3fde02 (patch) | |
tree | db460952d782db42c32dda1d182752e95c62b676 /src/lua/lua_util.c | |
parent | 198e242157ed81b871671f6a77e3d525a57350a5 (diff) | |
download | rspamd-7824318dca65d223df397329b3eabde86c3fde02.tar.gz rspamd-7824318dca65d223df397329b3eabde86c3fde02.zip |
[Minor] Add util.if_utf_mixed_script to lua
Add new function which implements PoC for chekcing
mixed script in utf string. Behaviour is similar
to single string spoof detection in libicu before
version 58
Diffstat (limited to 'src/lua/lua_util.c')
-rw-r--r-- | src/lua/lua_util.c | 51 |
1 files changed, 51 insertions, 0 deletions
diff --git a/src/lua/lua_util.c b/src/lua/lua_util.c index 1a37eaef6..881257ed3 100644 --- a/src/lua/lua_util.c +++ b/src/lua/lua_util.c @@ -29,6 +29,7 @@ #include <zlib.h> #include "unicode/uspoof.h" +#include "unicode/uscript.h" /*** * @module rspamd_util @@ -394,6 +395,14 @@ LUA_FUNCTION_DEF (util, normalize_prob); LUA_FUNCTION_DEF (util, is_utf_spoofed); /** +* @function util.is_utf_mixed_script(str) +* Returns true if a string contains mixed unicode scripts +* @param {string} String to check +* @return {boolean} true if a string contains chars with mixed unicode script +*/ +LUA_FUNCTION_DEF (util, is_utf_mixed_script); + +/** * @function util.is_utf_outside_range(str, range_start, range_end) * Returns true if a string contains chars outside range * @param {string} String to check @@ -633,6 +642,7 @@ static const struct luaL_reg utillib_f[] = { LUA_INTERFACE_DEF (util, caseless_hash), LUA_INTERFACE_DEF (util, caseless_hash_fast), LUA_INTERFACE_DEF (util, is_utf_spoofed), + LUA_INTERFACE_DEF (util, is_utf_mixed_script), LUA_INTERFACE_DEF (util, is_utf_outside_range), LUA_INTERFACE_DEF (util, get_string_stats), LUA_INTERFACE_DEF (util, is_valid_utf8), @@ -2499,6 +2509,47 @@ lua_util_is_utf_spoofed (lua_State *L) } static gint +lua_util_is_utf_mixed_script(lua_State *L) +{ + LUA_TRACE_POINT; + gsize len_of_string; + const gchar *end, *string_to_check = lua_tolstring (L, 1, &len_of_string); + UScriptCode last_script_code = USCRIPT_INVALID_CODE; + UErrorCode uc_err = U_ZERO_ERROR; + + if (string_to_check && g_utf8_validate (string_to_check, len_of_string, &end)) { + len_of_string = g_utf8_strlen (string_to_check, len_of_string); + + for(; *string_to_check; string_to_check = g_utf8_next_char(string_to_check)){ + gunichar char_to_check = g_utf8_get_char(string_to_check); + UScriptCode current_script_code = uscript_getScript(char_to_check, &uc_err); + if (uc_err != U_ZERO_ERROR){ + msg_err ("cannot get unicode script for character, error: %s", u_errorName (uc_err)); + lua_pushboolean (L, false); + return 1; + } + if ( current_script_code != USCRIPT_COMMON && current_script_code != USCRIPT_INHERITED ){ + if (last_script_code == USCRIPT_INVALID_CODE ){ + last_script_code = current_script_code; + } else { + if ( last_script_code != current_script_code ){ + lua_pushboolean (L, true); + return 1; + } + } + } + } + } + else { + return luaL_error (L, "invalid arguments"); + } + + lua_pushboolean (L, false); + + return 1; +} + +static gint lua_util_get_string_stats (lua_State *L) { LUA_TRACE_POINT; |