From 76f23e7218aa45679ad2b8821c3e1c1cd36dd869 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Mon, 22 Dec 2014 17:05:13 +0000 Subject: [PATCH] Skip short words. --- src/libmime/message.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/libmime/message.c b/src/libmime/message.c index 8ce3e720f..6140f3c24 100644 --- a/src/libmime/message.c +++ b/src/libmime/message.c @@ -1036,6 +1036,7 @@ process_text_part (struct rspamd_task *task, struct mime_text_part *text_part; const gchar *cd; gchar *pos; + gsize l; rspamd_fstring_t token, buf; /* Skip attachements */ @@ -1136,7 +1137,22 @@ process_text_part (struct rspamd_task *task, text_part->words = g_array_new (FALSE, FALSE, sizeof (rspamd_fstring_t)); while ((pos = rspamd_tokenizer_get_word (&buf, &token, &text_part->urls_offset)) != NULL) { + if (text_part->is_utf) { + l = g_utf8_strlen (token.begin, token.len); + } + else { + l = token.len; + } + /* + * XXX: make this configurable + */ + if (l < 4) { + token.begin = pos; + continue; + } g_array_append_val (text_part->words, token); + + token.begin = pos; } } -- 2.39.5