]> source.dussan.org Git - rspamd.git/commitdiff
[Minor] Do not count empty words
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Mon, 28 Sep 2020 10:00:29 +0000 (11:00 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Mon, 28 Sep 2020 10:00:29 +0000 (11:00 +0100)
src/libmime/message.c
src/libmime/message.h

index f2f22f6fe1fbdb3e35dc36ce6aaa43693baab2b7..d6a5fb5ce195ea93cc4649383926ad0598019e50 100644 (file)
@@ -93,10 +93,10 @@ rspamd_mime_part_extract_words (struct rspamd_task *task,
                                if (w->stemmed.len <= 3) {
                                        short_len++;
                                }
-                       }
 
-                       if (w->flags & RSPAMD_STAT_TOKEN_FLAG_TEXT) {
-                               part->nwords ++;
+                               if (w->flags & RSPAMD_STAT_TOKEN_FLAG_TEXT) {
+                                       part->nwords ++;
+                               }
                        }
 
                        if (w->flags & (RSPAMD_STAT_TOKEN_FLAG_BROKEN_UNICODE|
index a921d6f3826477d2a86f925aca9f308d06585a8e..d6f1b76c0b5231047a10664e049461a7976a8657 100644 (file)
@@ -140,8 +140,8 @@ struct rspamd_mime_text_part {
        GByteArray *utf_content; /* utf8 encoded processed content */
        GByteArray *utf_raw_content; /* utf raw content */
        GByteArray *utf_stripped_content; /* utf content with no newlines */
-       GArray *normalized_hashes;
-       GArray *utf_words;
+       GArray *normalized_hashes; /* Array of guint64 */
+       GArray *utf_words; /* Array of rspamd_stat_token_t */
        UText utf_stripped_text; /* Used by libicu to represent the utf8 content */
 
        GPtrArray *newlines;    /**< positions of newlines in text, relative to content*/