diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2014-12-22 17:05:13 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2014-12-22 17:05:13 +0000 |
commit | 76f23e7218aa45679ad2b8821c3e1c1cd36dd869 (patch) | |
tree | 4bc6df265c9a665ecfe8692ed2af755bbedccb7c | |
parent | 44ac52fb4400994a800ddd1ddcd11c96dc1e7084 (diff) | |
download | rspamd-76f23e7218aa45679ad2b8821c3e1c1cd36dd869.tar.gz rspamd-76f23e7218aa45679ad2b8821c3e1c1cd36dd869.zip |
Skip short words.
-rw-r--r-- | src/libmime/message.c | 16 |
1 files changed, 16 insertions, 0 deletions
diff --git a/src/libmime/message.c b/src/libmime/message.c index 8ce3e720f..6140f3c24 100644 --- a/src/libmime/message.c +++ b/src/libmime/message.c @@ -1036,6 +1036,7 @@ process_text_part (struct rspamd_task *task, struct mime_text_part *text_part; const gchar *cd; gchar *pos; + gsize l; rspamd_fstring_t token, buf; /* Skip attachements */ @@ -1136,7 +1137,22 @@ process_text_part (struct rspamd_task *task, text_part->words = g_array_new (FALSE, FALSE, sizeof (rspamd_fstring_t)); while ((pos = rspamd_tokenizer_get_word (&buf, &token, &text_part->urls_offset)) != NULL) { + if (text_part->is_utf) { + l = g_utf8_strlen (token.begin, token.len); + } + else { + l = token.len; + } + /* + * XXX: make this configurable + */ + if (l < 4) { + token.begin = pos; + continue; + } g_array_append_val (text_part->words, token); + + token.begin = pos; } } |