summaryrefslogtreecommitdiffstats
path: root/src/libmime
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2016-04-28 16:58:13 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2016-04-28 16:58:13 +0100
commit2e5fc80627fcc828513aa6ad25726949dd3338fc (patch)
treef74efbc8041d571a479ba64602d6bcc3967e1a62 /src/libmime
parent9f218324e0a2430572fa1b4f9ab169980d001eb0 (diff)
downloadrspamd-2e5fc80627fcc828513aa6ad25726949dd3338fc.tar.gz
rspamd-2e5fc80627fcc828513aa6ad25726949dd3338fc.zip
[Feature] Store text parts content with newlines stripped
Diffstat (limited to 'src/libmime')
-rw-r--r--src/libmime/message.c33
-rw-r--r--src/libmime/message.h1
2 files changed, 32 insertions, 2 deletions
diff --git a/src/libmime/message.c b/src/libmime/message.c
index a52b43f36..be773d480 100644
--- a/src/libmime/message.c
+++ b/src/libmime/message.c
@@ -984,7 +984,7 @@ rspamd_normalize_text_part (struct rspamd_task *task,
struct sb_stemmer *stem = NULL;
#endif
rspamd_ftok_t *w;
- const guchar *r;
+ const guchar *r, *p, *c, *end;
gchar *temp_word;
guint i, nlen;
@@ -997,6 +997,35 @@ rspamd_normalize_text_part (struct rspamd_task *task,
}
}
#endif
+ /* Strip newlines */
+ part->stripped_content = g_byte_array_sized_new (part->content->len);
+ p = part->content->data;
+ c = p;
+ end = p + part->content->len;
+
+ while (p < end) {
+ if (*p == '\r' || *p == '\n') {
+ if (p > c) {
+ g_byte_array_append (part->stripped_content, c, p - c);
+ }
+
+ while (p < end && (*p == '\r' || *p == '\n')) {
+ p ++;
+ }
+ c = p;
+ }
+ else {
+ p ++;
+ }
+ }
+
+ if (p > c) {
+ g_byte_array_append (part->stripped_content, c, p - c);
+ }
+
+ rspamd_mempool_add_destructor (task->task_pool,
+ (rspamd_mempool_destruct_t) free_byte_array_callback,
+ part->stripped_content);
/* Ugly workaround */
part->normalized_words = rspamd_tokenize_text (part->content->data,
@@ -1125,7 +1154,7 @@ process_text_part (struct rspamd_task *task,
const gchar *cd, *p, *c;
guint remain;
- /* Skip attachements */
+ /* Skip attachments */
#ifndef GMIME24
cd = g_mime_part_get_content_disposition (GMIME_PART (mime_part->mime));
if (cd &&
diff --git a/src/libmime/message.h b/src/libmime/message.h
index 2f7539825..59fa0b73c 100644
--- a/src/libmime/message.h
+++ b/src/libmime/message.h
@@ -43,6 +43,7 @@ struct mime_text_part {
const gchar *real_charset;
GByteArray *orig;
GByteArray *content;
+ GByteArray *stripped_content; /**< no newlines or html tags */
struct html_content *html;
GList *urls_offset; /**< list of offsets of urls */
GMimeObject *parent;