diff options
-rw-r--r-- | src/libmime/message.c | 17 | ||||
-rw-r--r-- | src/libstat/tokenizers/tokenizers.c | 4 |
2 files changed, 10 insertions, 11 deletions
diff --git a/src/libmime/message.c b/src/libmime/message.c index 2eac86ed2..dfef04ce8 100644 --- a/src/libmime/message.c +++ b/src/libmime/message.c @@ -1180,7 +1180,7 @@ rspamd_normalize_text_part (struct rspamd_task *task, struct sb_stemmer *stem = NULL; rspamd_fstring_t *w, stw; const guchar *r; - guint i; + guint i, nlen; GArray *tmp; if (part->language && part->language[0] != '\0' && part->is_utf) { @@ -1203,13 +1203,11 @@ rspamd_normalize_text_part (struct rspamd_task *task, r = sb_stemmer_stem (stem, w->begin, w->len); } - if (stem == NULL || r == NULL) { - stw.begin = rspamd_mempool_fstrdup (task->task_pool, w); - stw.len = w->len; - } - else { - stw.begin = rspamd_mempool_strdup (task->task_pool, r); - stw.len = strlen (r); + if (stem != NULL && r != NULL) { + nlen = strlen (r); + nlen = MIN (nlen, stw.len); + memcpy (stw.begin, r, nlen); + stw.len = nlen; } if (part->is_utf) { @@ -1218,9 +1216,8 @@ rspamd_normalize_text_part (struct rspamd_task *task, else { rspamd_str_lc (stw.begin, stw.len); } - g_array_append_val (part->normalized_words, stw); } - g_array_free (tmp, TRUE); + part->normalized_words = tmp; } if (stem != NULL) { diff --git a/src/libstat/tokenizers/tokenizers.c b/src/libstat/tokenizers/tokenizers.c index c9b65e343..eebc57c22 100644 --- a/src/libstat/tokenizers/tokenizers.c +++ b/src/libstat/tokenizers/tokenizers.c @@ -110,6 +110,7 @@ rspamd_tokenizer_get_word_compat (rspamd_fstring_t * buf, token->begin = buf->begin; token->len = 0; } + *cur = token->begin; } token->len = 0; @@ -223,6 +224,7 @@ rspamd_tokenizer_get_word (rspamd_fstring_t * buf, token->begin = "exception"; token->len = sizeof ("exception") - 1; state = skip_exception; + continue; } else if (g_unichar_isgraph (uc) && !g_unichar_ispunct (uc)) { state = feed_token; @@ -290,7 +292,7 @@ rspamd_tokenize_text (gchar *text, gsize len, gboolean is_utf, func = rspamd_tokenizer_get_word; } - res = g_array_new (FALSE, FALSE, sizeof (rspamd_fstring_t)); + res = g_array_sized_new (FALSE, FALSE, sizeof (rspamd_fstring_t), 128); while (func (&buf, &pos, &token, &cur, is_utf, &l)) { if (min_len > 0 && l < min_len) { |