summaryrefslogtreecommitdiffstats
path: root/src/lua/lua_util.c
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2017-02-13 12:17:58 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2017-02-13 12:19:20 +0000
commitce8352554afa648b81400e3267587e22705b08cf (patch)
tree1593c3ca8ddb3a52359f50c4784bf2797a26e13b /src/lua/lua_util.c
parent3125cf20b87ff211cf2326b9682b39703a996066 (diff)
downloadrspamd-ce8352554afa648b81400e3267587e22705b08cf.tar.gz
rspamd-ce8352554afa648b81400e3267587e22705b08cf.zip
[Fix] Improve OMOGRAPH_URL rule
- Calculate omographs in each dot component separately - Normalize omographs - Count utf8 characters properly
Diffstat (limited to 'src/lua/lua_util.c')
-rw-r--r--src/lua/lua_util.c7
1 files changed, 6 insertions, 1 deletions
diff --git a/src/lua/lua_util.c b/src/lua/lua_util.c
index 220467664..669ac5bd9 100644
--- a/src/lua/lua_util.c
+++ b/src/lua/lua_util.c
@@ -1891,7 +1891,7 @@ lua_util_count_non_ascii (lua_State *L)
{
gsize len;
const gchar *str = lua_tolstring (L, 1, &len);
- const gchar *p, *end;
+ const gchar *p, *end, *np;
gint ret = 0, total = 0;
if (str != NULL) {
@@ -1900,8 +1900,13 @@ lua_util_count_non_ascii (lua_State *L)
while (p < end) {
if (*p & 0x80) {
+ np = g_utf8_find_next_char (p, end);
ret ++;
total ++;
+
+ p = (np != p) ? np : p + 1;
+
+ continue;
}
else if (g_ascii_isalpha (*p)) {
total ++;