summaryrefslogtreecommitdiffstats
path: root/src/libmime
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2016-12-14 16:07:08 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2016-12-14 16:07:08 +0000
commit3f798771dc245a323c1b476c7be2b4454a4bf391 (patch)
tree9c72ff3a230ac7aff1746cd6e1f5153f345a5a41 /src/libmime
parentd7f3f254ff4219080c67833d1984a641a290ef4c (diff)
downloadrspamd-3f798771dc245a323c1b476c7be2b4454a4bf391.tar.gz
rspamd-3f798771dc245a323c1b476c7be2b4454a4bf391.zip
[Fix] Various fixes to mime parser
Diffstat (limited to 'src/libmime')
-rw-r--r--src/libmime/mime_parser.c238
1 files changed, 149 insertions, 89 deletions
diff --git a/src/libmime/mime_parser.c b/src/libmime/mime_parser.c
index b4ee5fcbb..db43fcfe7 100644
--- a/src/libmime/mime_parser.c
+++ b/src/libmime/mime_parser.c
@@ -317,7 +317,7 @@ rspamd_mime_process_multipart_node (struct rspamd_task *task,
rspamd_mime_headers_process (task, npart->raw_headers,
npart->raw_headers_str,
npart->raw_headers_len,
- TRUE);
+ FALSE);
}
hdrs = rspamd_message_get_header_from_hash (npart->raw_headers,
@@ -368,7 +368,10 @@ rspamd_mime_process_multipart_node (struct rspamd_task *task,
else if (sel->flags & RSPAMD_CONTENT_TYPE_MESSAGE) {
st->cur_part = npart;
g_ptr_array_add (st->stack, npart);
- ret = rspamd_mime_parse_message (task, npart, st, err);
+
+ if ((ret = rspamd_mime_parse_normal_part (task, npart, st, err))) {
+ ret = rspamd_mime_parse_message (task, npart, st, err);
+ }
}
else {
ret = rspamd_mime_parse_normal_part (task, npart, st, err);
@@ -390,6 +393,8 @@ rspamd_mime_parse_multipart_cb (struct rspamd_multipattern *mp,
struct rspamd_task *task;
const gchar *pos = text + match_pos, *end = text + len, *st;
gint ret = 0;
+ guint i, j;
+ struct rspamd_mime_part *par;
task = cb->task;
@@ -441,16 +446,96 @@ rspamd_mime_parse_multipart_cb (struct rspamd_multipattern *mp,
/* We should have seen some boundary */
g_assert (cb->cur_boundary != NULL);
- if (match_pos + cb->cur_boundary->len <= len) {
- if (rspamd_lc_cmp (pos, cb->cur_boundary->begin,
- cb->cur_boundary->len) != 0) {
- msg_debug_mime ("found invalid boundary: %*s, %T expected",
- (gint)cb->cur_boundary->len, pos, cb->cur_boundary);
+ if (pos == end) {
+ return 1;
+ /* We have part without ending line, assume it fine */
+ st = match_pos + text - 2;
- /* Just continue search */
- return 0;
+ if (!rspamd_mime_process_multipart_node (task, cb->st,
+ cb->multipart, cb->part_start, st, cb->err)) {
+ return -1;
}
+ while (pos < end && (*pos == '\r' || *pos == '\n')) {
+ pos ++;
+ }
+
+ /* Go towards the next part */
+ cb->part_start = pos;
+ cb->st->pos = pos;
+ }
+
+ if (match_pos + cb->cur_boundary->len > len ||
+ rspamd_lc_cmp (pos, cb->cur_boundary->begin,
+ cb->cur_boundary->len) != 0) {
+ msg_debug_mime ("found invalid boundary: %*s, %T expected",
+ (gint)cb->cur_boundary->len, pos, cb->cur_boundary);
+
+ /*
+ * We also need to check parent parts:
+ *
+ * --1
+ * --1.1
+ * --1.1 <- this one is closed implicitly by the next
+ * --1--
+ */
+ if (cb->st->stack->len > 0) {
+ for (i = cb->st->stack->len - 1; ; i --) {
+ par = g_ptr_array_index (cb->st->stack, i);
+
+ if (par->ct->flags & RSPAMD_CONTENT_TYPE_MULTIPART) {
+ if (par->ct->boundary.len > 0 &&
+ end - pos >= par->ct->boundary.len &&
+ rspamd_lc_cmp (pos,
+ par->ct->boundary.begin,
+ par->ct->boundary.len) == 0) {
+ pos += par->ct->boundary.len + 1;
+
+ /* Now we need to check for:
+ * 1. --\r|\n
+ * 2. \r|\n
+ * 3. --<EOF>
+ */
+ ret = 0;
+
+ if (pos == end || (*pos == '\r' || *pos == '\n')) {
+ ret = 1;
+ }
+ else if (pos < end - 1) {
+ if (pos[0] == '-' && pos[1] == '-') {
+ pos += 2;
+ if (pos == end ||
+ (*pos == '\r' || *pos == '\n')) {
+ ret = 1;
+ }
+ }
+ }
+
+ if (ret) {
+ /* Unwind parts stack to this part */
+ for (j = cb->st->stack->len - 1; j > i; j ++) {
+ g_ptr_array_remove_index_fast (
+ cb->st->stack, j);
+ }
+
+ g_ptr_array_remove_index_fast (cb->st->stack, i);
+ }
+
+ break;
+ }
+ }
+
+ if (i == 0) {
+ break;
+ }
+ }
+ }
+ if (!ret) {
+ /* Not a valid boundary */
+ return 0;
+ }
+ }
+ else {
pos += cb->cur_boundary->len;
cb->st->pos = pos;
@@ -458,20 +543,14 @@ rspamd_mime_parse_multipart_cb (struct rspamd_multipattern *mp,
/* It should be end of multipart, but it is sometimes isn't */
/* TODO: deal with such perversions */
pos += 2;
- g_ptr_array_remove_index_fast (cb->st->stack,
- cb->st->stack->len - 1);
ret = 1;
}
else if (pos[0] != '\r' && pos[0] != '\n' && pos != end) {
- /* This is not actually our boundary, but somethig else */
+ /* This is not actually our boundary, but something else */
return 0;
}
- st = match_pos + text;
- /* Find the start of part */
- while (st > cb->part_start && (*st == '\n' || *st == '\r')) {
- st --;
- }
+ st = match_pos + text - 2;
if (!rspamd_mime_process_multipart_node (task, cb->st,
cb->multipart, cb->part_start, st, cb->err)) {
@@ -486,14 +565,6 @@ rspamd_mime_parse_multipart_cb (struct rspamd_multipattern *mp,
cb->part_start = pos;
cb->st->pos = pos;
}
- else {
- msg_debug_mime ("boundary is stripped");
- g_set_error (cb->err, RSPAMD_MIME_QUARK, EINVAL,
- "middle boundary is stripped at %d (%zd available)",
- match_pos, len);
-
- return (-1);
- }
}
else {
/* We have something very bad in fact */
@@ -537,8 +608,20 @@ rspamd_mime_parse_multipart_part (struct rspamd_task *task,
cbdata.cur_boundary = NULL;
}
- ret = rspamd_multipattern_lookup (mp_boundary, part->raw_data.begin,
- part->raw_data.len, rspamd_mime_parse_multipart_cb, &cbdata, NULL);
+ ret = rspamd_multipattern_lookup (mp_boundary, part->raw_data.begin - 1,
+ part->raw_data.len + 1,
+ rspamd_mime_parse_multipart_cb, &cbdata, NULL);
+
+ if (st->pos < part->raw_data.begin + part->raw_data.len) {
+ if (!rspamd_mime_process_multipart_node (task, st,
+ part, cbdata.part_start,
+ part->raw_data.begin + part->raw_data.len, err)) {
+ return -1;
+ }
+ }
+
+ /* Cleanup stack */
+ g_ptr_array_remove_index_fast (st->stack, st->stack->len - 1);
return (ret != -1);
}
@@ -566,12 +649,12 @@ rspamd_mime_parse_message (struct rspamd_task *task,
return FALSE;
}
- if (part) {
- g_ptr_array_add (task->parts, part);
- }
+ /* Allocate real part */
+ npart = rspamd_mempool_alloc0 (task->task_pool,
+ sizeof (struct rspamd_mime_part));
- /* Parse headers */
- if (st->cur_part == NULL) {
+ if (part == NULL) {
+ /* Top level message */
p = task->msg.begin;
len = task->msg.len;
/* Skip any space characters to avoid some bad messages to be unparsed */
@@ -610,26 +693,10 @@ rspamd_mime_parse_message (struct rspamd_task *task,
str.str = (gchar *)p;
str.len = len;
- }
- else {
- p = part->raw_data.begin;
- len = part->raw_data.len;
-
- /* Skip any space characters to avoid some bad messages to be unparsed */
- while (len > 0 && g_ascii_isspace (*p)) {
- p ++;
- len --;
- }
-
- str.str = (gchar *)p;
- str.len = len;
- }
- hdr_pos = rspamd_string_find_eoh (&str, &body_pos);
-
- if (hdr_pos > 0 && hdr_pos < str.len) {
+ hdr_pos = rspamd_string_find_eoh (&str, &body_pos);
- if (part == NULL) {
+ if (hdr_pos > 0 && hdr_pos < str.len) {
task->raw_headers_content.begin = (gchar *) (str.str);
task->raw_headers_content.len = hdr_pos;
task->raw_headers_content.body_start = str.str + body_pos;
@@ -645,29 +712,43 @@ rspamd_mime_parse_message (struct rspamd_task *task,
task->task_pool,
"Content-Type", FALSE);
}
- else {
- /* Adjust part data */
- part->raw_headers = g_hash_table_new_full (rspamd_strcase_hash,
- rspamd_strcase_equal, NULL, rspamd_ptr_array_free_hard);
- part->raw_headers_str = str.str;
- part->raw_headers_len = hdr_pos;
- part->raw_data.begin = p + body_pos;
- part->raw_data.len -= body_pos;
-
- if (part->raw_headers_len > 0) {
- rspamd_mime_headers_process (task, part->raw_headers,
- part->raw_headers_str,
- part->raw_headers_len,
- TRUE);
- }
- hdrs = rspamd_message_get_header_from_hash (part->raw_headers,
- task->task_pool,
- "Content-Type", FALSE);
+ pbegin = st->pos;
+ plen = st->end - pbegin;
+ npart->raw_headers = g_hash_table_ref (task->raw_headers);
+ }
+ else {
+ str.str = (gchar *)part->parsed_data.begin;
+ str.len = part->parsed_data.len;
+
+ hdr_pos = rspamd_string_find_eoh (&str, &body_pos);
+ npart->raw_headers = g_hash_table_new_full (rspamd_strcase_hash,
+ rspamd_strcase_equal, NULL, rspamd_ptr_array_free_hard);
+
+ if (hdr_pos > 0 && hdr_pos < str.len) {
+ npart->raw_headers_str = (gchar *) (str.str);
+ npart->raw_headers_len = hdr_pos;
+ npart->raw_data.begin = str.str + body_pos;
+
+ if (npart->raw_headers_len > 0) {
+ rspamd_mime_headers_process (task, npart->raw_headers,
+ npart->raw_headers_str,
+ npart->raw_headers_len,
+ FALSE);
+ }
}
+ pbegin = part->parsed_data.begin;
+ plen = part->parsed_data.len;
+
+ hdrs = rspamd_message_get_header_from_hash (npart->raw_headers,
+ task->task_pool,
+ "Content-Type", FALSE);
}
+ npart->raw_data.begin = pbegin;
+ npart->raw_data.len = plen;
+ npart->parent_part = part;
if (hdrs == NULL) {
g_set_error (err, RSPAMD_MIME_QUARK, EINVAL,
@@ -699,29 +780,8 @@ rspamd_mime_parse_message (struct rspamd_task *task,
return FALSE;
}
- if (part) {
- pbegin = part->raw_data.begin;
- plen = part->raw_data.len;
- }
- else {
- pbegin = st->pos;
- plen = st->end - pbegin;
- }
-
- npart = rspamd_mempool_alloc0 (task->task_pool,
- sizeof (struct rspamd_mime_part));
- npart->raw_data.begin = pbegin;
- npart->raw_data.len = plen;
- npart->parent_part = part;
npart->ct = sel;
- if (part == NULL) {
- npart->raw_headers = g_hash_table_ref (task->raw_headers);
- }
- else {
- npart->raw_headers = g_hash_table_ref (part->raw_headers);
- }
-
if (sel->flags & RSPAMD_CONTENT_TYPE_MULTIPART) {
st->cur_part = npart;
g_ptr_array_add (st->stack, npart);