aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2020-09-28 11:00:29 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2020-09-28 11:00:29 +0100
commitffdde528bb3981325cdbc69600ffbaff024a6ad3 (patch)
treebc63626a8e14e386d4bf9a2716927c5bce2ac659
parent5dd1c05bcd1141456a9e48d24ceec4b3cde0f276 (diff)
downloadrspamd-ffdde528bb3981325cdbc69600ffbaff024a6ad3.tar.gz
rspamd-ffdde528bb3981325cdbc69600ffbaff024a6ad3.zip
[Minor] Do not count empty words
-rw-r--r--src/libmime/message.c6
-rw-r--r--src/libmime/message.h4
2 files changed, 5 insertions, 5 deletions
diff --git a/src/libmime/message.c b/src/libmime/message.c
index f2f22f6fe..d6a5fb5ce 100644
--- a/src/libmime/message.c
+++ b/src/libmime/message.c
@@ -93,10 +93,10 @@ rspamd_mime_part_extract_words (struct rspamd_task *task,
if (w->stemmed.len <= 3) {
short_len++;
}
- }
- if (w->flags & RSPAMD_STAT_TOKEN_FLAG_TEXT) {
- part->nwords ++;
+ if (w->flags & RSPAMD_STAT_TOKEN_FLAG_TEXT) {
+ part->nwords ++;
+ }
}
if (w->flags & (RSPAMD_STAT_TOKEN_FLAG_BROKEN_UNICODE|
diff --git a/src/libmime/message.h b/src/libmime/message.h
index a921d6f38..d6f1b76c0 100644
--- a/src/libmime/message.h
+++ b/src/libmime/message.h
@@ -140,8 +140,8 @@ struct rspamd_mime_text_part {
GByteArray *utf_content; /* utf8 encoded processed content */
GByteArray *utf_raw_content; /* utf raw content */
GByteArray *utf_stripped_content; /* utf content with no newlines */
- GArray *normalized_hashes;
- GArray *utf_words;
+ GArray *normalized_hashes; /* Array of guint64 */
+ GArray *utf_words; /* Array of rspamd_stat_token_t */
UText utf_stripped_text; /* Used by libicu to represent the utf8 content */
GPtrArray *newlines; /**< positions of newlines in text, relative to content*/