aboutsummaryrefslogtreecommitdiffstats
path: root/src/libmime
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2016-12-13 19:06:29 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2016-12-13 19:06:49 +0000
commit9b2d502e69ab45a32fb89088b4b06961f54a49ce (patch)
treedbf20ff02fe89422d30428d635672593b77e9ff8 /src/libmime
parentc5c33ded60fc05166f2f9cea94290eca3148edd1 (diff)
downloadrspamd-9b2d502e69ab45a32fb89088b4b06961f54a49ce.tar.gz
rspamd-9b2d502e69ab45a32fb89088b4b06961f54a49ce.zip
[Feature] Add preliminary multipart support
Diffstat (limited to 'src/libmime')
-rw-r--r--src/libmime/mime_parser.c334
1 files changed, 326 insertions, 8 deletions
diff --git a/src/libmime/mime_parser.c b/src/libmime/mime_parser.c
index eab42b9b6..27eedb644 100644
--- a/src/libmime/mime_parser.c
+++ b/src/libmime/mime_parser.c
@@ -24,6 +24,12 @@
#include "cryptobox.h"
static struct rspamd_multipattern *mp_boundary = NULL;
+static const guint max_nested = 32;
+
+#define msg_debug_mime(...) rspamd_default_log_function (G_LOG_LEVEL_DEBUG, \
+ "mime", task->task_pool->tag.uid, \
+ G_STRFUNC, \
+ __VA_ARGS__)
struct rspamd_mime_parser_stack {
GPtrArray *stack; /* Stack of parts */
@@ -32,6 +38,23 @@ struct rspamd_mime_parser_stack {
const gchar *end;
};
+static gboolean
+rspamd_mime_parse_multipart_part (struct rspamd_task *task,
+ struct rspamd_mime_part *part,
+ struct rspamd_mime_parser_stack *st,
+ GError **err);
+static gboolean
+rspamd_mime_parse_message (struct rspamd_task *task,
+ struct rspamd_mime_part *part,
+ struct rspamd_mime_parser_stack *st,
+ GError **err);
+static gboolean
+rspamd_mime_parse_normal_part (struct rspamd_task *task,
+ struct rspamd_mime_part *part,
+ struct rspamd_mime_parser_stack *st,
+ GError **err);
+
+
#define RSPAMD_MIME_QUARK (rspamd_mime_parser_quark())
static GQuark
rspamd_mime_parser_quark (void)
@@ -39,6 +62,31 @@ rspamd_mime_parser_quark (void)
return g_quark_from_static_string ("mime-parser");
}
+static const gchar*
+rspamd_cte_to_string (enum rspamd_cte ct)
+{
+ const gchar *ret = "unknown";
+
+ switch (ct) {
+ case RSPAMD_CTE_7BIT:
+ ret = "7bit";
+ break;
+ case RSPAMD_CTE_8BIT:
+ ret = "8bit";
+ break;
+ case RSPAMD_CTE_QP:
+ ret = "quoted-printable";
+ break;
+ case RSPAMD_CTE_B64:
+ ret = "base64";
+ break;
+ default:
+ break;
+ }
+
+ return ret;
+}
+
static void
rspamd_mime_parser_init_mp (void)
{
@@ -46,6 +94,7 @@ rspamd_mime_parser_init_mp (void)
g_assert (mp_boundary != NULL);
rspamd_multipattern_add_pattern (mp_boundary, "\r--", 0);
rspamd_multipattern_add_pattern (mp_boundary, "\n--", 0);
+ g_assert (rspamd_multipattern_compile (mp_boundary, NULL));
}
static enum rspamd_cte
@@ -75,7 +124,8 @@ rspamd_mime_parse_cte (const gchar *in, gsize len)
}
static void
-rspamd_mime_part_get_cte_heuristic (struct rspamd_mime_part *part)
+rspamd_mime_part_get_cte_heuristic (struct rspamd_task *task,
+ struct rspamd_mime_part *part)
{
const guint check_len = 80;
guint real_len, nspaces = 0, neqsign = 0, n8bit = 0;
@@ -119,6 +169,9 @@ rspamd_mime_part_get_cte_heuristic (struct rspamd_mime_part *part)
else {
ret = RSPAMD_CTE_8BIT;
}
+
+ part->cte = ret;
+ msg_debug_mime ("detected cte: %s", rspamd_cte_to_string (ret));
}
static void
@@ -134,7 +187,7 @@ rspamd_mime_part_get_cte (struct rspamd_task *task, struct rspamd_mime_part *par
"Content-Transfer-Encoding", FALSE);
if (hdrs == NULL) {
- rspamd_mime_part_get_cte_heuristic (part);
+ rspamd_mime_part_get_cte_heuristic (task, part);
}
else {
for (i = 0; i < hdrs->len; i ++) {
@@ -151,10 +204,11 @@ rspamd_mime_part_get_cte (struct rspamd_task *task, struct rspamd_mime_part *par
}
if (cte == RSPAMD_CTE_UNKNOWN) {
- rspamd_mime_part_get_cte_heuristic (part);
+ rspamd_mime_part_get_cte_heuristic (task, part);
}
else {
part->cte = cte;
+ msg_debug_mime ("processed cte: %s", rspamd_cte_to_string (cte));
}
}
}
@@ -203,16 +257,260 @@ rspamd_mime_parse_normal_part (struct rspamd_task *task,
g_assert_not_reached ();
}
+ msg_debug_mime ("parsed data part %T/%T of length %z (%z orig), %s cte",
+ &part->ct->type, &part->ct->subtype, part->parsed_data.len,
+ part->raw_data.len, rspamd_cte_to_string (part->cte));
+
return TRUE;
}
+struct rspamd_mime_multipart_cbdata {
+ struct rspamd_task *task;
+ struct rspamd_mime_part *multipart;
+ struct rspamd_mime_parser_stack *st;
+ const gchar *part_start;
+ rspamd_ftok_t *cur_boundary;
+ GError **err;
+};
+
+static gboolean
+rspamd_mime_process_multipart_node (struct rspamd_task *task,
+ struct rspamd_mime_parser_stack *st,
+ struct rspamd_mime_part *multipart,
+ const gchar *start, const gchar *end,
+ GError **err)
+{
+ struct rspamd_content_type *ct, *sel = NULL;
+ struct rspamd_mime_header *hdr;
+ GPtrArray *hdrs = NULL;
+ struct rspamd_mime_part *npart;
+ GString str;
+ goffset hdr_pos, body_pos;
+ guint i;
+ gboolean ret = FALSE;
+
+
+ str.str = (gchar *)start;
+ str.len = end - start;
+
+ hdr_pos = rspamd_string_find_eoh (&str, &body_pos);
+ npart = rspamd_mempool_alloc0 (task->task_pool,
+ sizeof (struct rspamd_mime_part));
+ npart->parent = multipart;
+
+ if (hdr_pos > 0 && hdr_pos < str.len) {
+
+ npart->raw_headers_str = str.str;
+ npart->raw_headers_len = str.len;
+ npart->raw_data.begin = start + body_pos;
+ npart->raw_data.len = (end - start) - body_pos;
+
+ if (task->raw_headers_content.len > 0) {
+ rspamd_mime_headers_process (task, task->raw_headers,
+ npart->raw_headers_str,
+ npart->raw_headers_len,
+ TRUE);
+ }
+
+ hdrs = rspamd_message_get_header_from_hash (st->cur_part->raw_headers,
+ task->task_pool,
+ "Content-Type", FALSE);
+
+ }
+ else {
+ npart->raw_headers_str = 0;
+ npart->raw_headers_len = 0;
+ npart->raw_data.begin = start;
+ npart->raw_data.len = end - start;
+ }
+
+
+ if (hdrs != NULL) {
+
+ for (i = 0; i < hdrs->len; i ++) {
+ hdr = g_ptr_array_index (hdrs, i);
+ ct = rspamd_content_type_parse (hdr->value, strlen (hdr->value),
+ task->task_pool);
+
+ /* Here we prefer multipart content-type or any content-type */
+ if (ct) {
+ if (sel == NULL) {
+ sel = ct;
+ }
+ else if (ct->flags & RSPAMD_CONTENT_TYPE_MULTIPART) {
+ sel = ct;
+ }
+ }
+ }
+ }
+
+ if (sel == NULL) {
+ /* TODO: assume part as octet-stream */
+ return FALSE;
+ }
+
+ npart->ct = sel;
+
+ if (sel->flags & RSPAMD_CONTENT_TYPE_MULTIPART) {
+ st->cur_part = npart;
+ g_ptr_array_add (st->stack, npart);
+ ret = rspamd_mime_parse_multipart_part (task, npart, st, err);
+ }
+ else if (sel->flags & RSPAMD_CONTENT_TYPE_MESSAGE) {
+ st->cur_part = npart;
+ g_ptr_array_add (st->stack, npart);
+ ret = rspamd_mime_parse_message (task, npart, st, err);
+ }
+ else {
+ ret = rspamd_mime_parse_normal_part (task, npart, st, err);
+ }
+
+ return ret;
+}
+
+static gint
+rspamd_mime_parse_multipart_cb (struct rspamd_multipattern *mp,
+ guint strnum,
+ gint match_start,
+ gint match_pos,
+ const gchar *text,
+ gsize len,
+ void *context)
+{
+ struct rspamd_mime_multipart_cbdata *cb = context;
+ struct rspamd_task *task;
+ const gchar *pos = text + match_pos, *end = text + len, *st;
+ gint ret = 0;
+
+ task = cb->task;
+
+ /* Now check boundary */
+ if (!cb->part_start) {
+ if (cb->cur_boundary) {
+ if (match_pos + cb->cur_boundary->len > len) {
+ if (memcmp (pos, cb->cur_boundary->begin, cb->cur_boundary->len) != 0) {
+ msg_debug_mime ("found invalid boundary: %*s, %T expected",
+ (gint)cb->cur_boundary->len, pos, cb->cur_boundary);
+
+ /* Just continue search */
+ return 0;
+ }
+
+ pos += cb->cur_boundary->len;
+
+ while (pos < end && (*pos == '\r' || *pos == '\n')) {
+ pos ++;
+ }
+
+ cb->part_start = pos;
+ }
+ else {
+ msg_debug_mime ("boundary is stripped");
+ return (-1);
+ }
+ }
+ else {
+ /* We see something like boundary: '[\r\n]--xxx */
+ /* TODO: write heuristic */
+ }
+ }
+ else {
+ /* We have seen the start of the boundary */
+ if (cb->part_start < pos) {
+ /* We should have seen some boundary */
+ g_assert (cb->cur_boundary != NULL);
+
+ if (match_pos + cb->cur_boundary->len > len) {
+ if (memcmp (pos, cb->cur_boundary->begin, cb->cur_boundary->len) != 0) {
+ msg_debug_mime ("found invalid boundary: %*s, %T expected",
+ (gint)cb->cur_boundary->len, pos, cb->cur_boundary);
+
+ /* Just continue search */
+ return 0;
+ }
+
+ pos += cb->cur_boundary->len;
+
+ if (pos < end - 1 && pos[0] == '-' && pos[1] == '-') {
+ /* It should be end of multipart, but it is sometimes isn't */
+ /* TODO: deal with such perversions */
+ pos += 2;
+ g_ptr_array_remove_index_fast (cb->st->stack,
+ cb->st->stack->len - 1);
+ ret = 1;
+ }
+
+ if (pos[0] != '\r' && pos[0] != '\n' && pos != end) {
+ /* This is not actually our boundary, but somethig else */
+ return 0;
+ }
+
+ st = match_pos + text;
+ /* Find the start of part */
+ while (st > cb->part_start && (*st == '\n' || *st == '\r')) {
+ st --;
+ }
+
+ if (!rspamd_mime_process_multipart_node (task, cb->st,
+ cb->multipart, cb->part_start, st, cb->err)) {
+ return -1;
+ }
+
+ while (pos < end && (*pos == '\r' || *pos == '\n')) {
+ pos ++;
+ }
+
+ /* Go towards the next part */
+ cb->part_start = pos;
+ }
+ else {
+ msg_debug_mime ("boundary is stripped");
+ return (-1);
+ }
+ }
+ else {
+ /* We have something very bad in fact */
+ g_assert_not_reached ();
+ }
+ }
+
+ return ret;
+}
+
static gboolean
rspamd_mime_parse_multipart_part (struct rspamd_task *task,
struct rspamd_mime_part *part,
struct rspamd_mime_parser_stack *st,
GError **err)
{
- return FALSE;
+ struct rspamd_mime_multipart_cbdata cbdata;
+ gint ret;
+
+ if (st->stack->len > max_nested) {
+ g_set_error (err, RSPAMD_MIME_QUARK, E2BIG, "Nesting level is too high: %d",
+ st->stack->len);
+ return FALSE;
+ }
+
+ cbdata.multipart = part;
+ cbdata.task = task;
+ cbdata.st = st;
+ cbdata.part_start = NULL;
+ cbdata.err = err;
+
+ if (part->ct->boundary.len > 0) {
+ /* We know our boundary */
+ cbdata.cur_boundary = &part->ct->boundary;
+ }
+ else {
+ /* Guess boundary */
+ cbdata.cur_boundary = NULL;
+ }
+
+ ret = rspamd_multipattern_lookup (mp_boundary, part->raw_data.begin,
+ part->raw_data.len, rspamd_mime_parse_multipart_cb, &cbdata, NULL);
+
+ return (ret != -1);
}
static gboolean
@@ -232,6 +530,12 @@ rspamd_mime_parse_message (struct rspamd_task *task,
gboolean ret = FALSE;
GString str;
+ if (st->stack->len > max_nested) {
+ g_set_error (err, RSPAMD_MIME_QUARK, E2BIG, "Nesting level is too high: %d",
+ st->stack->len);
+ return FALSE;
+ }
+
/* Parse headers */
if (st->cur_part == NULL) {
p = task->msg.begin;
@@ -308,16 +612,16 @@ rspamd_mime_parse_message (struct rspamd_task *task,
"Content-Type", FALSE);
}
else {
+ /* Adjust part data */
part->raw_headers_str = str.str;
part->raw_headers_len = str.len;
part->raw_data.begin = p + body_pos;
part->raw_data.len -= body_pos;
- task->raw_headers_content.body_start = p + body_pos;
if (task->raw_headers_content.len > 0) {
rspamd_mime_headers_process (task, task->raw_headers,
- task->raw_headers_content.begin,
- task->raw_headers_content.len,
+ part->raw_headers_str,
+ part->raw_headers_len,
TRUE);
}
@@ -396,9 +700,23 @@ rspamd_mime_parse_message (struct rspamd_task *task,
ret = rspamd_mime_parse_normal_part (task, npart, st, err);
}
+ if (part) {
+ /* Remove message part from the stack */
+ g_ptr_array_remove_index_fast (st->stack, st->stack->len - 1);
+ }
+
return ret;
}
+static void
+rspamd_mime_parse_stack_free (struct rspamd_mime_parser_stack *st)
+{
+ if (st) {
+ g_ptr_array_free (st->stack, TRUE);
+ g_slice_free1 (sizeof (*st), st);
+ }
+}
+
gboolean
rspamd_mime_parse_task (struct rspamd_task *task, GError **err)
{
@@ -419,7 +737,7 @@ rspamd_mime_parse_task (struct rspamd_task *task, GError **err)
}
ret = rspamd_mime_parse_message (task, NULL, st, err);
- //rspamd_mime_parse_stack_free (st);
+ rspamd_mime_parse_stack_free (st);
return ret;
}