aboutsummaryrefslogtreecommitdiffstats
path: root/src/plugins
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2018-09-06 19:49:44 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2018-09-06 19:50:18 +0100
commitc31f8bf12bff61c9422de9eeff0292c6ac339c5e (patch)
tree224c38634f5d6f45218752ca3abb1b39bc7e4093 /src/plugins
parentaf5f57916e4345d988802794c84460960ee47d0c (diff)
downloadrspamd-c31f8bf12bff61c9422de9eeff0292c6ac339c5e.tar.gz
rspamd-c31f8bf12bff61c9422de9eeff0292c6ac339c5e.zip
[Feature] Implement new text tokenizer based on libicu
Diffstat (limited to 'src/plugins')
-rw-r--r--src/plugins/chartable.c12
1 files changed, 11 insertions, 1 deletions
diff --git a/src/plugins/chartable.c b/src/plugins/chartable.c
index 3c7157311..f917c26c8 100644
--- a/src/plugins/chartable.c
+++ b/src/plugins/chartable.c
@@ -619,7 +619,17 @@ chartable_symbol_callback (struct rspamd_task *task, void *unused)
guint i;
gdouble cur_score = 0.0;
- words = rspamd_tokenize_text (task->subject, strlen (task->subject),
+ UText utxt = UTEXT_INITIALIZER;
+ UErrorCode uc_err = U_ZERO_ERROR;
+ gsize slen = strlen (task->subject);
+
+ utext_openUTF8 (&utxt,
+ task->subject,
+ slen,
+ &uc_err);
+
+ words = rspamd_tokenize_text (task->subject, slen,
+ &utxt,
RSPAMD_TOKENIZE_UTF,
NULL,
NULL,