From 65a811fb43b0d0e3557e53c9cf18131e20ba5456 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Thu, 7 Jul 2016 16:02:01 +0100 Subject: [PATCH] [Fix] More fixes to end of headers detection --- src/client/rspamc.c | 2 +- src/libmime/message.c | 57 ++++++++---------------------------------- src/libserver/dkim.c | 12 +++++++-- src/libserver/task.h | 6 ++++- src/libutil/str_util.c | 19 +++++++++++++- src/libutil/str_util.h | 2 +- 6 files changed, 46 insertions(+), 52 deletions(-) diff --git a/src/client/rspamc.c b/src/client/rspamc.c index 592971b73..b8cec9e67 100644 --- a/src/client/rspamc.c +++ b/src/client/rspamc.c @@ -1045,7 +1045,7 @@ rspamc_mime_output (FILE *out, ucl_object_t *result, GString *input, gboolean is_spam = FALSE; gchar *json_header, *json_header_encoded, *sc; - headers_pos = rspamd_string_find_eoh (input); + headers_pos = rspamd_string_find_eoh (input, NULL); if (headers_pos == -1) { rspamd_fprintf (stderr,"cannot find end of headers position"); diff --git a/src/libmime/message.c b/src/libmime/message.c index de70d76bc..f6c023294 100644 --- a/src/libmime/message.c +++ b/src/libmime/message.c @@ -218,8 +218,14 @@ process_raw_headers (struct rspamd_task *task, GHashTable *target, tmp ++; } + if (p + 1 == end) { + new->raw_len = end - new->raw_value; + } + else { + new->raw_len = p - new->raw_value; + } + new->value = tmp; - new->raw_len = p - new->raw_value; new->decoded = g_mime_utils_header_decode_text (new->value); if (new->decoded != NULL) { @@ -1300,44 +1306,6 @@ rspamd_message_from_data (struct rspamd_task *task, GByteArray *data, } } -static inline const gchar * -rspamd_message_find_body_start (const gchar *headers_end, const gchar *body_end) -{ - const gchar *p = headers_end; - enum { - st_start = 0, - st_cr, - st_lf, - } state = st_start; - - if (headers_end + 1 >= body_end) { - return headers_end; - } - - switch (state) { - case st_start: - if (*p == '\r') { - p ++; - state = st_cr; - } - else if (*p == '\n') { - p ++; - state = st_lf; - } - break; - case st_cr: - if (*p == '\n' && p < body_end) { - /* CRLF */ - p ++; - } - break; - case st_lf: - break; - } - - return p; -} - gboolean rspamd_message_parse (struct rspamd_task *task) { @@ -1354,7 +1322,7 @@ rspamd_message_parse (struct rspamd_task *task) struct received_header *recv, *trecv; const gchar *p; gsize len; - goffset hdr_pos; + goffset hdr_pos, body_pos; gint i; gdouble diff, *pdiff; guint tw, *ptw, dw; @@ -1450,15 +1418,12 @@ rspamd_message_parse (struct rspamd_task *task) str.str = tmp->data; str.len = tmp->len; - hdr_pos = rspamd_string_find_eoh (&str); + hdr_pos = rspamd_string_find_eoh (&str, &body_pos); if (hdr_pos > 0 && hdr_pos < tmp->len) { - static const gchar *body_start; - - body_start = rspamd_message_find_body_start (p + hdr_pos, - p + len); task->raw_headers_content.begin = (gchar *) (p); - task->raw_headers_content.len = body_start - p; + task->raw_headers_content.len = hdr_pos; + task->raw_headers_content.body_start = p + body_pos; if (task->raw_headers_content.len > 0) { process_raw_headers (task, task->raw_headers, diff --git a/src/libserver/dkim.c b/src/libserver/dkim.c index 3fdaae84d..89a8b9175 100644 --- a/src/libserver/dkim.c +++ b/src/libserver/dkim.c @@ -1793,7 +1793,11 @@ rspamd_dkim_check (rspamd_dkim_context_t *ctx, /* First of all find place of body */ p = task->msg.begin; body_end = task->msg.begin + task->msg.len; - body_start = task->msg.begin + task->raw_headers_content.len; + body_start = task->raw_headers_content.body_start; + + if (!body_start) { + return DKIM_RECORD_ERROR; + } /* Start canonization of body part */ if (!rspamd_dkim_canonize_body (&ctx->common, body_start, body_end)) { @@ -2049,12 +2053,16 @@ rspamd_dkim_sign (struct rspamd_task *task, /* First of all find place of body */ p = task->msg.begin; body_end = task->msg.begin + task->msg.len; - body_start = task->msg.begin + task->raw_headers_content.len; + body_start = task->raw_headers_content.body_start; if (len > 0) { ctx->common.len = len; } + if (!body_start) { + return NULL; + } + /* Start canonization of body part */ if (!rspamd_dkim_canonize_body (&ctx->common, body_start, body_end)) { return NULL; diff --git a/src/libserver/task.h b/src/libserver/task.h index f34c2ce10..4fb2ba3e3 100644 --- a/src/libserver/task.h +++ b/src/libserver/task.h @@ -142,7 +142,11 @@ struct rspamd_task { GMimeMessage *message; /**< message, parsed with GMime */ GPtrArray *parts; /**< list of parsed parts */ GPtrArray *text_parts; /**< list of text parts */ - rspamd_ftok_t raw_headers_content; /**< list of raw headers */ + struct { + const gchar *begin; + gsize len; + const gchar *body_start; + } raw_headers_content; /**< list of raw headers */ GPtrArray *received; /**< list of received headers */ GHashTable *urls; /**< list of parsed urls */ GHashTable *emails; /**< list of parsed emails */ diff --git a/src/libutil/str_util.c b/src/libutil/str_util.c index 1ce81bc9e..c39dcb7ee 100644 --- a/src/libutil/str_util.c +++ b/src/libutil/str_util.c @@ -1413,7 +1413,7 @@ rspamd_substring_search_twoway (const gchar *in, gint inlen, goffset -rspamd_string_find_eoh (GString *input) +rspamd_string_find_eoh (GString *input, goffset *body_start) { const gchar *p, *c = NULL, *end; enum { @@ -1459,6 +1459,10 @@ rspamd_string_find_eoh (GString *input) } else { /* We have \r\r[^\n] */ + if (body_start) { + *body_start = p - input->str + 1; + } + return p - input->str; } } @@ -1474,6 +1478,9 @@ rspamd_string_find_eoh (GString *input) case got_lf: if (*p == '\n') { /* We have \n\n, which is obviously end of headers */ + if (body_start) { + *body_start = p - input->str + 1; + } return p - input->str; } else if (*p == '\r') { @@ -1517,11 +1524,21 @@ rspamd_string_find_eoh (GString *input) break; case got_linebreak_lf: g_assert (c != NULL); + if (body_start) { + /* \r\n\r\n */ + *body_start = p - input->str; + } + return c - input->str; } } if (state == got_linebreak_lf) { + if (body_start) { + /* \r\n\r\n */ + *body_start = p - input->str; + } + return c - input->str; } diff --git a/src/libutil/str_util.h b/src/libutil/str_util.h index 695a8d022..1ae09f576 100644 --- a/src/libutil/str_util.h +++ b/src/libutil/str_util.h @@ -269,7 +269,7 @@ goffset rspamd_substring_search_twoway (const gchar *in, gint inlen, * Hence, to obtain the real EOH position, it is also required to skip * space characters */ -goffset rspamd_string_find_eoh (GString *input); +goffset rspamd_string_find_eoh (GString *input, goffset *body_start); #define rspamd_ucl_emit_gstring(o, t, target) \ -- 2.39.5