]> source.dussan.org Git - rspamd.git/commitdiff
[Fix] Fix lowercasing of stemmed words
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Sat, 13 May 2017 11:44:14 +0000 (12:44 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Sat, 13 May 2017 11:44:14 +0000 (12:44 +0100)
src/libmime/message.c
src/libutil/str_util.c

index 9322bbd33f45e2e5959c615c192aafc22e1bf6a4..dfcd483a72a916e1a19604acf576f23c28842cc2 100644 (file)
@@ -238,6 +238,7 @@ rspamd_extract_words (struct rspamd_task *task,
                                        nlen = strlen (r);
                                        nlen = MIN (nlen, w->len);
                                        temp_word = rspamd_mempool_alloc (task->task_pool, nlen);
+                                       memcpy (temp_word, r, nlen);
 
                                        if (IS_PART_UTF (part)) {
                                                rspamd_str_lc_utf8 (temp_word, nlen);
@@ -246,7 +247,6 @@ rspamd_extract_words (struct rspamd_task *task,
                                                rspamd_str_lc (temp_word, nlen);
                                        }
 
-                                       memcpy (temp_word, r, nlen);
                                        w->begin = temp_word;
                                        w->len = nlen;
                                }
index e1f0d53694e651131f846fe466592c3fdc442a0a..004204881e1cd86d8695d8d3f6e0b8e515cbac09 100644 (file)
@@ -147,14 +147,15 @@ rspamd_str_lc_utf8 (gchar *str, guint size)
        gunichar uc;
 
        while (remain > 0) {
-               uc = g_utf8_get_char (s);
-               uc = g_unichar_tolower (uc);
                p = g_utf8_next_char (s);
 
                if (p - s > remain) {
                        break;
                }
 
+               uc = g_utf8_get_char (s);
+               uc = g_unichar_tolower (uc);
+
                if (remain >= 6) {
                        r = g_unichar_to_utf8 (uc, d);
                }