diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2016-07-23 17:13:36 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2016-07-23 17:14:45 +0100 |
commit | f5dcf4b8a4a6a9881d95e4d4b1edd4c27c077d08 (patch) | |
tree | c18b5037178bc3f9b0de97f2df35d3507497a7b0 /src/libmime | |
parent | c9733c168687f1b0bf843adbfdcd3a1d586f099b (diff) | |
download | rspamd-f5dcf4b8a4a6a9881d95e4d4b1edd4c27c077d08.tar.gz rspamd-f5dcf4b8a4a6a9881d95e4d4b1edd4c27c077d08.zip |
[Feature] Create a dedicated parser to strip newlines
Issue: #744
Diffstat (limited to 'src/libmime')
-rw-r--r-- | src/libmime/message.c | 68 | ||||
-rw-r--r-- | src/libmime/smtp_parsers.h | 4 |
2 files changed, 12 insertions, 60 deletions
diff --git a/src/libmime/message.c b/src/libmime/message.c index 6e4e69597..89ccff68b 100644 --- a/src/libmime/message.c +++ b/src/libmime/message.c @@ -794,68 +794,16 @@ rspamd_normalize_text_part (struct rspamd_task *task, c = p; end = p + part->content->len; - while (p < end) { - p = memchr (c, '\n', end - c); - - if (p) { - if (*(p - 1) == '\r') { - p --; - } - - if (p > c) { - g_byte_array_append (part->stripped_content, c, p - c); - } - - /* - * Now we need to decide, maybe we have the following cases: - * 1. Multiple newlines must be replaced by one newline - * 2. If a line is finished with punctuation character, then insert - * one newline - * 3. In HTML parts we have to insert newlines as well - */ - - if (p > part->content->data && - (IS_PART_HTML (part) || - *(p - 1) == '\n' || - g_ascii_ispunct (*(p - 1)) - )) { - g_byte_array_append (part->stripped_content, "\n", 1); - } - - /* As it could cause reallocation, we initially store offsets */ - g_ptr_array_add (part->newlines, - GUINT_TO_POINTER (part->stripped_content->len)); - ex = rspamd_mempool_alloc (task->task_pool, sizeof (*ex)); - ex->pos = part->stripped_content->len; - ex->len = 0; - ex->type = RSPAMD_EXCEPTION_NEWLINE; - part->exceptions = g_list_prepend (part->exceptions, ex); - part->nlines ++; - p ++; - - while (p < end && (*p == '\r' || *p == '\n')) { - if (*p == '\n') { - part->nlines ++; - } - - p ++; - } - c = p; - } - else { - p = end; - break; - } - } - - if (p > c) { - g_byte_array_append (part->stripped_content, c, p - c); - } + rspamd_strip_newlines_parse (p, end, part->stripped_content, + IS_PART_HTML (part), &part->nlines, part->newlines); - /* Now convert offsets to real pointers for convenience */ for (i = 0; i < part->newlines->len; i ++) { - guint off = GPOINTER_TO_UINT (g_ptr_array_index (part->newlines, i)); - g_ptr_array_index (part->newlines, i) = part->stripped_content->data + off; + ex = rspamd_mempool_alloc (task->task_pool, sizeof (*ex)); + p = g_ptr_array_index (part->newlines, i); + ex->pos = p - c; + ex->len = 0; + ex->type = RSPAMD_EXCEPTION_NEWLINE; + part->exceptions = g_list_prepend (part->exceptions, ex); } rspamd_mempool_add_destructor (task->task_pool, diff --git a/src/libmime/smtp_parsers.h b/src/libmime/smtp_parsers.h index 62e7738e3..07bd24688 100644 --- a/src/libmime/smtp_parsers.h +++ b/src/libmime/smtp_parsers.h @@ -26,4 +26,8 @@ int rspamd_smtp_recieved_parse (struct rspamd_task *task, int rspamd_smtp_addr_parse (const char *data, size_t len, struct rspamd_email_address *addr); +void rspamd_strip_newlines_parse (const gchar *begin, const gchar *pe, + GByteArray *data, gboolean is_html, guint *newlines_count, + GPtrArray *newlines); + #endif /* SRC_LIBMIME_SMTP_PARSERS_H_ */ |