From e6657e05d2845bba220b93cc764a242cfb2e0aaf Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Thu, 28 Apr 2016 16:58:13 +0100 Subject: [PATCH] [Feature] Store text parts content with newlines stripped --- src/libmime/message.c | 33 +++++++++++++++++++++++++++++++-- src/libmime/message.h | 1 + 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/src/libmime/message.c b/src/libmime/message.c index 0daacc40b..8b43aec8c 100644 --- a/src/libmime/message.c +++ b/src/libmime/message.c @@ -983,7 +983,7 @@ rspamd_normalize_text_part (struct rspamd_task *task, struct sb_stemmer *stem = NULL; #endif rspamd_ftok_t *w; - const guchar *r; + const guchar *r, *p, *c, *end; gchar *temp_word; guint i, nlen; @@ -996,6 +996,35 @@ rspamd_normalize_text_part (struct rspamd_task *task, } } #endif + /* Strip newlines */ + part->stripped_content = g_byte_array_sized_new (part->content->len); + p = part->content->data; + c = p; + end = p + part->content->len; + + while (p < end) { + if (*p == '\r' || *p == '\n') { + if (p > c) { + g_byte_array_append (part->stripped_content, c, p - c); + } + + while (p < end && (*p == '\r' || *p == '\n')) { + p ++; + } + c = p; + } + else { + p ++; + } + } + + if (p > c) { + g_byte_array_append (part->stripped_content, c, p - c); + } + + rspamd_mempool_add_destructor (task->task_pool, + (rspamd_mempool_destruct_t) free_byte_array_callback, + part->stripped_content); /* Ugly workaround */ part->normalized_words = rspamd_tokenize_text (part->content->data, @@ -1124,7 +1153,7 @@ process_text_part (struct rspamd_task *task, const gchar *cd, *p, *c; guint remain; - /* Skip attachements */ + /* Skip attachments */ #ifndef GMIME24 cd = g_mime_part_get_content_disposition (GMIME_PART (mime_part->mime)); if (cd && diff --git a/src/libmime/message.h b/src/libmime/message.h index 4f8f27b0b..2c2365ef7 100644 --- a/src/libmime/message.h +++ b/src/libmime/message.h @@ -43,6 +43,7 @@ struct mime_text_part { const gchar *real_charset; GByteArray *orig; GByteArray *content; + GByteArray *stripped_content; /**< no newlines or html tags */ struct html_content *html; GList *urls_offset; /**< list of offsets of urls */ GMimeObject *parent; -- 2.39.5