aboutsummaryrefslogtreecommitdiffstats
path: root/src/lua
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2018-09-06 19:49:44 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2018-09-06 19:50:18 +0100
commitc31f8bf12bff61c9422de9eeff0292c6ac339c5e (patch)
tree224c38634f5d6f45218752ca3abb1b39bc7e4093 /src/lua
parentaf5f57916e4345d988802794c84460960ee47d0c (diff)
downloadrspamd-c31f8bf12bff61c9422de9eeff0292c6ac339c5e.tar.gz
rspamd-c31f8bf12bff61c9422de9eeff0292c6ac339c5e.zip
[Feature] Implement new text tokenizer based on libicu
Diffstat (limited to 'src/lua')
-rw-r--r--src/lua/lua_util.c11
1 files changed, 10 insertions, 1 deletions
diff --git a/src/lua/lua_util.c b/src/lua/lua_util.c
index 3de68e60a..d6095ab52 100644
--- a/src/lua/lua_util.c
+++ b/src/lua/lua_util.c
@@ -1078,6 +1078,7 @@ lua_util_tokenize_text (lua_State *L)
GList *exceptions = NULL, *cur;
struct rspamd_lua_text *t;
struct rspamd_process_exception *ex;
+ UText utxt = UTEXT_INITIALIZER;
GArray *res;
rspamd_stat_token_t *w;
@@ -1129,7 +1130,15 @@ lua_util_tokenize_text (lua_State *L)
exceptions = g_list_reverse (exceptions);
}
- res = rspamd_tokenize_text ((gchar *)in, len, RSPAMD_TOKENIZE_UTF, NULL,
+ UErrorCode uc_err = U_ZERO_ERROR;
+ utext_openUTF8 (&utxt,
+ in,
+ len,
+ &uc_err);
+
+ res = rspamd_tokenize_text ((gchar *)in, len,
+ &utxt,
+ RSPAMD_TOKENIZE_UTF, NULL,
exceptions,
NULL);