diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2016-04-28 16:58:13 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2016-04-28 16:58:13 +0100 |
commit | 2e5fc80627fcc828513aa6ad25726949dd3338fc (patch) | |
tree | f74efbc8041d571a479ba64602d6bcc3967e1a62 /src/libmime | |
parent | 9f218324e0a2430572fa1b4f9ab169980d001eb0 (diff) | |
download | rspamd-2e5fc80627fcc828513aa6ad25726949dd3338fc.tar.gz rspamd-2e5fc80627fcc828513aa6ad25726949dd3338fc.zip |
[Feature] Store text parts content with newlines stripped
Diffstat (limited to 'src/libmime')
-rw-r--r-- | src/libmime/message.c | 33 | ||||
-rw-r--r-- | src/libmime/message.h | 1 |
2 files changed, 32 insertions, 2 deletions
diff --git a/src/libmime/message.c b/src/libmime/message.c index a52b43f36..be773d480 100644 --- a/src/libmime/message.c +++ b/src/libmime/message.c @@ -984,7 +984,7 @@ rspamd_normalize_text_part (struct rspamd_task *task, struct sb_stemmer *stem = NULL; #endif rspamd_ftok_t *w; - const guchar *r; + const guchar *r, *p, *c, *end; gchar *temp_word; guint i, nlen; @@ -997,6 +997,35 @@ rspamd_normalize_text_part (struct rspamd_task *task, } } #endif + /* Strip newlines */ + part->stripped_content = g_byte_array_sized_new (part->content->len); + p = part->content->data; + c = p; + end = p + part->content->len; + + while (p < end) { + if (*p == '\r' || *p == '\n') { + if (p > c) { + g_byte_array_append (part->stripped_content, c, p - c); + } + + while (p < end && (*p == '\r' || *p == '\n')) { + p ++; + } + c = p; + } + else { + p ++; + } + } + + if (p > c) { + g_byte_array_append (part->stripped_content, c, p - c); + } + + rspamd_mempool_add_destructor (task->task_pool, + (rspamd_mempool_destruct_t) free_byte_array_callback, + part->stripped_content); /* Ugly workaround */ part->normalized_words = rspamd_tokenize_text (part->content->data, @@ -1125,7 +1154,7 @@ process_text_part (struct rspamd_task *task, const gchar *cd, *p, *c; guint remain; - /* Skip attachements */ + /* Skip attachments */ #ifndef GMIME24 cd = g_mime_part_get_content_disposition (GMIME_PART (mime_part->mime)); if (cd && diff --git a/src/libmime/message.h b/src/libmime/message.h index 2f7539825..59fa0b73c 100644 --- a/src/libmime/message.h +++ b/src/libmime/message.h @@ -43,6 +43,7 @@ struct mime_text_part { const gchar *real_charset; GByteArray *orig; GByteArray *content; + GByteArray *stripped_content; /**< no newlines or html tags */ struct html_content *html; GList *urls_offset; /**< list of offsets of urls */ GMimeObject *parent; |