From: Vsevolod Stakhov Date: Mon, 28 Sep 2020 10:00:29 +0000 (+0100) Subject: [Minor] Do not count empty words X-Git-Tag: 2.6~16 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=ffdde528bb3981325cdbc69600ffbaff024a6ad3;p=rspamd.git [Minor] Do not count empty words --- diff --git a/src/libmime/message.c b/src/libmime/message.c index f2f22f6fe..d6a5fb5ce 100644 --- a/src/libmime/message.c +++ b/src/libmime/message.c @@ -93,10 +93,10 @@ rspamd_mime_part_extract_words (struct rspamd_task *task, if (w->stemmed.len <= 3) { short_len++; } - } - if (w->flags & RSPAMD_STAT_TOKEN_FLAG_TEXT) { - part->nwords ++; + if (w->flags & RSPAMD_STAT_TOKEN_FLAG_TEXT) { + part->nwords ++; + } } if (w->flags & (RSPAMD_STAT_TOKEN_FLAG_BROKEN_UNICODE| diff --git a/src/libmime/message.h b/src/libmime/message.h index a921d6f38..d6f1b76c0 100644 --- a/src/libmime/message.h +++ b/src/libmime/message.h @@ -140,8 +140,8 @@ struct rspamd_mime_text_part { GByteArray *utf_content; /* utf8 encoded processed content */ GByteArray *utf_raw_content; /* utf raw content */ GByteArray *utf_stripped_content; /* utf content with no newlines */ - GArray *normalized_hashes; - GArray *utf_words; + GArray *normalized_hashes; /* Array of guint64 */ + GArray *utf_words; /* Array of rspamd_stat_token_t */ UText utf_stripped_text; /* Used by libicu to represent the utf8 content */ GPtrArray *newlines; /**< positions of newlines in text, relative to content*/