diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2018-09-06 19:49:44 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2018-09-06 19:50:18 +0100 |
commit | c31f8bf12bff61c9422de9eeff0292c6ac339c5e (patch) | |
tree | 224c38634f5d6f45218752ca3abb1b39bc7e4093 /src/plugins | |
parent | af5f57916e4345d988802794c84460960ee47d0c (diff) | |
download | rspamd-c31f8bf12bff61c9422de9eeff0292c6ac339c5e.tar.gz rspamd-c31f8bf12bff61c9422de9eeff0292c6ac339c5e.zip |
[Feature] Implement new text tokenizer based on libicu
Diffstat (limited to 'src/plugins')
-rw-r--r-- | src/plugins/chartable.c | 12 |
1 files changed, 11 insertions, 1 deletions
diff --git a/src/plugins/chartable.c b/src/plugins/chartable.c index 3c7157311..f917c26c8 100644 --- a/src/plugins/chartable.c +++ b/src/plugins/chartable.c @@ -619,7 +619,17 @@ chartable_symbol_callback (struct rspamd_task *task, void *unused) guint i; gdouble cur_score = 0.0; - words = rspamd_tokenize_text (task->subject, strlen (task->subject), + UText utxt = UTEXT_INITIALIZER; + UErrorCode uc_err = U_ZERO_ERROR; + gsize slen = strlen (task->subject); + + utext_openUTF8 (&utxt, + task->subject, + slen, + &uc_err); + + words = rspamd_tokenize_text (task->subject, slen, + &utxt, RSPAMD_TOKENIZE_UTF, NULL, NULL, |