summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2014-12-22 17:05:13 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2014-12-22 17:05:13 +0000
commit76f23e7218aa45679ad2b8821c3e1c1cd36dd869 (patch)
tree4bc6df265c9a665ecfe8692ed2af755bbedccb7c
parent44ac52fb4400994a800ddd1ddcd11c96dc1e7084 (diff)
downloadrspamd-76f23e7218aa45679ad2b8821c3e1c1cd36dd869.tar.gz
rspamd-76f23e7218aa45679ad2b8821c3e1c1cd36dd869.zip
Skip short words.
-rw-r--r--src/libmime/message.c16
1 files changed, 16 insertions, 0 deletions
diff --git a/src/libmime/message.c b/src/libmime/message.c
index 8ce3e720f..6140f3c24 100644
--- a/src/libmime/message.c
+++ b/src/libmime/message.c
@@ -1036,6 +1036,7 @@ process_text_part (struct rspamd_task *task,
struct mime_text_part *text_part;
const gchar *cd;
gchar *pos;
+ gsize l;
rspamd_fstring_t token, buf;
/* Skip attachements */
@@ -1136,7 +1137,22 @@ process_text_part (struct rspamd_task *task,
text_part->words = g_array_new (FALSE, FALSE, sizeof (rspamd_fstring_t));
while ((pos = rspamd_tokenizer_get_word (&buf,
&token, &text_part->urls_offset)) != NULL) {
+ if (text_part->is_utf) {
+ l = g_utf8_strlen (token.begin, token.len);
+ }
+ else {
+ l = token.len;
+ }
+ /*
+ * XXX: make this configurable
+ */
+ if (l < 4) {
+ token.begin = pos;
+ continue;
+ }
g_array_append_val (text_part->words, token);
+
+ token.begin = pos;
}
}