diff options
Diffstat (limited to 'src/libstat')
-rw-r--r-- | src/libstat/stat_process.c | 8 | ||||
-rw-r--r-- | src/libstat/tokenizers/tokenizers.c | 8 | ||||
-rw-r--r-- | src/libstat/tokenizers/tokenizers.h | 2 |
3 files changed, 9 insertions, 9 deletions
diff --git a/src/libstat/stat_process.c b/src/libstat/stat_process.c index 540a9e23f..394173444 100644 --- a/src/libstat/stat_process.c +++ b/src/libstat/stat_process.c @@ -331,8 +331,8 @@ rspamd_stat_process_tokenize (struct rspamd_stat_ctx *st_ctx, for (i = 0; i < task->text_parts->len; i++) { part = g_ptr_array_index (task->text_parts, i); - if (!IS_PART_EMPTY (part) && part->normalized_words != NULL) { - reserved_len += part->normalized_words->len; + if (!IS_PART_EMPTY (part) && part->utf_words != NULL) { + reserved_len += part->utf_words->len; } /* XXX: normal window size */ reserved_len += 5; @@ -346,9 +346,9 @@ rspamd_stat_process_tokenize (struct rspamd_stat_ctx *st_ctx, for (i = 0; i < task->text_parts->len; i ++) { part = g_ptr_array_index (task->text_parts, i); - if (!IS_PART_EMPTY (part) && part->normalized_words != NULL) { + if (!IS_PART_EMPTY (part) && part->utf_words != NULL) { st_ctx->tokenizer->tokenize_func (st_ctx, task->task_pool, - part->normalized_words, IS_PART_UTF (part), + part->utf_words, IS_PART_UTF (part), NULL, task->tokens); } diff --git a/src/libstat/tokenizers/tokenizers.c b/src/libstat/tokenizers/tokenizers.c index fce98c53f..5436430fe 100644 --- a/src/libstat/tokenizers/tokenizers.c +++ b/src/libstat/tokenizers/tokenizers.c @@ -59,7 +59,7 @@ const gchar t_delimiters[255] = { /* Get next word from specified f_str_t buf */ static gboolean -rspamd_tokenizer_get_word_compat (rspamd_stat_token_t * buf, +rspamd_tokenizer_get_word_raw (rspamd_stat_token_t * buf, gchar const **cur, rspamd_stat_token_t * token, GList **exceptions, gsize *rl, gboolean unused) { @@ -149,7 +149,7 @@ rspamd_tokenizer_get_word_compat (rspamd_stat_token_t * buf, } static gboolean -rspamd_tokenizer_get_word (rspamd_stat_token_t * buf, +rspamd_tokenizer_get_word_utf8 (rspamd_stat_token_t * buf, gchar const **cur, rspamd_stat_token_t * token, GList **exceptions, gsize *rl, gboolean check_signature) @@ -355,10 +355,10 @@ rspamd_tokenize_text (const gchar *text, gsize len, switch (how) { case RSPAMD_TOKENIZE_RAW: - func = rspamd_tokenizer_get_word_compat; + func = rspamd_tokenizer_get_word_raw; break; case RSPAMD_TOKENIZE_UTF: - func = rspamd_tokenizer_get_word; + func = rspamd_tokenizer_get_word_utf8; break; default: g_assert_not_reached (); diff --git a/src/libstat/tokenizers/tokenizers.h b/src/libstat/tokenizers/tokenizers.h index 8be5f98a8..16ab142fd 100644 --- a/src/libstat/tokenizers/tokenizers.h +++ b/src/libstat/tokenizers/tokenizers.h @@ -28,7 +28,7 @@ struct rspamd_stat_tokenizer { enum rspamd_tokenize_type { RSPAMD_TOKENIZE_UTF = 0, RSPAMD_TOKENIZE_RAW, - RSPAMD_TOKENIZE_UCS + RSPAMD_TOKENIZE_UNICODE }; /* Compare two token nodes */ |