From 85eead9d1fbebd84fa25046970f73fcb9a127e50 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Sat, 13 May 2017 12:44:14 +0100 Subject: [PATCH] [Fix] Fix lowercasing of stemmed words --- src/libmime/message.c | 2 +- src/libutil/str_util.c | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/libmime/message.c b/src/libmime/message.c index 9322bbd33..dfcd483a7 100644 --- a/src/libmime/message.c +++ b/src/libmime/message.c @@ -238,6 +238,7 @@ rspamd_extract_words (struct rspamd_task *task, nlen = strlen (r); nlen = MIN (nlen, w->len); temp_word = rspamd_mempool_alloc (task->task_pool, nlen); + memcpy (temp_word, r, nlen); if (IS_PART_UTF (part)) { rspamd_str_lc_utf8 (temp_word, nlen); @@ -246,7 +247,6 @@ rspamd_extract_words (struct rspamd_task *task, rspamd_str_lc (temp_word, nlen); } - memcpy (temp_word, r, nlen); w->begin = temp_word; w->len = nlen; } diff --git a/src/libutil/str_util.c b/src/libutil/str_util.c index e1f0d5369..004204881 100644 --- a/src/libutil/str_util.c +++ b/src/libutil/str_util.c @@ -147,14 +147,15 @@ rspamd_str_lc_utf8 (gchar *str, guint size) gunichar uc; while (remain > 0) { - uc = g_utf8_get_char (s); - uc = g_unichar_tolower (uc); p = g_utf8_next_char (s); if (p - s > remain) { break; } + uc = g_utf8_get_char (s); + uc = g_unichar_tolower (uc); + if (remain >= 6) { r = g_unichar_to_utf8 (uc, d); } -- 2.39.5