diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2017-02-13 12:17:58 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2017-02-13 12:19:20 +0000 |
commit | ce8352554afa648b81400e3267587e22705b08cf (patch) | |
tree | 1593c3ca8ddb3a52359f50c4784bf2797a26e13b /src/lua/lua_util.c | |
parent | 3125cf20b87ff211cf2326b9682b39703a996066 (diff) | |
download | rspamd-ce8352554afa648b81400e3267587e22705b08cf.tar.gz rspamd-ce8352554afa648b81400e3267587e22705b08cf.zip |
[Fix] Improve OMOGRAPH_URL rule
- Calculate omographs in each dot component separately
- Normalize omographs
- Count utf8 characters properly
Diffstat (limited to 'src/lua/lua_util.c')
-rw-r--r-- | src/lua/lua_util.c | 7 |
1 files changed, 6 insertions, 1 deletions
diff --git a/src/lua/lua_util.c b/src/lua/lua_util.c index 220467664..669ac5bd9 100644 --- a/src/lua/lua_util.c +++ b/src/lua/lua_util.c @@ -1891,7 +1891,7 @@ lua_util_count_non_ascii (lua_State *L) { gsize len; const gchar *str = lua_tolstring (L, 1, &len); - const gchar *p, *end; + const gchar *p, *end, *np; gint ret = 0, total = 0; if (str != NULL) { @@ -1900,8 +1900,13 @@ lua_util_count_non_ascii (lua_State *L) while (p < end) { if (*p & 0x80) { + np = g_utf8_find_next_char (p, end); ret ++; total ++; + + p = (np != p) ? np : p + 1; + + continue; } else if (g_ascii_isalpha (*p)) { total ++; |