diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2018-09-06 19:49:44 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2018-09-06 19:50:18 +0100 |
commit | c31f8bf12bff61c9422de9eeff0292c6ac339c5e (patch) | |
tree | 224c38634f5d6f45218752ca3abb1b39bc7e4093 /src/lua | |
parent | af5f57916e4345d988802794c84460960ee47d0c (diff) | |
download | rspamd-c31f8bf12bff61c9422de9eeff0292c6ac339c5e.tar.gz rspamd-c31f8bf12bff61c9422de9eeff0292c6ac339c5e.zip |
[Feature] Implement new text tokenizer based on libicu
Diffstat (limited to 'src/lua')
-rw-r--r-- | src/lua/lua_util.c | 11 |
1 files changed, 10 insertions, 1 deletions
diff --git a/src/lua/lua_util.c b/src/lua/lua_util.c index 3de68e60a..d6095ab52 100644 --- a/src/lua/lua_util.c +++ b/src/lua/lua_util.c @@ -1078,6 +1078,7 @@ lua_util_tokenize_text (lua_State *L) GList *exceptions = NULL, *cur; struct rspamd_lua_text *t; struct rspamd_process_exception *ex; + UText utxt = UTEXT_INITIALIZER; GArray *res; rspamd_stat_token_t *w; @@ -1129,7 +1130,15 @@ lua_util_tokenize_text (lua_State *L) exceptions = g_list_reverse (exceptions); } - res = rspamd_tokenize_text ((gchar *)in, len, RSPAMD_TOKENIZE_UTF, NULL, + UErrorCode uc_err = U_ZERO_ERROR; + utext_openUTF8 (&utxt, + in, + len, + &uc_err); + + res = rspamd_tokenize_text ((gchar *)in, len, + &utxt, + RSPAMD_TOKENIZE_UTF, NULL, exceptions, NULL); |