]> source.dussan.org Git - rspamd.git/commitdiff
[Fix] More fixes to end of headers detection
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 7 Jul 2016 15:02:01 +0000 (16:02 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 7 Jul 2016 15:02:01 +0000 (16:02 +0100)
src/client/rspamc.c
src/libmime/message.c
src/libserver/dkim.c
src/libserver/task.h
src/libutil/str_util.c
src/libutil/str_util.h

index 592971b73b87e422fd7c15ea4528b2cddf2dce41..b8cec9e67b636f971593bfa66180707f2ea324ef 100644 (file)
@@ -1045,7 +1045,7 @@ rspamc_mime_output (FILE *out, ucl_object_t *result, GString *input,
        gboolean is_spam = FALSE;
        gchar *json_header, *json_header_encoded, *sc;
 
-       headers_pos = rspamd_string_find_eoh (input);
+       headers_pos = rspamd_string_find_eoh (input, NULL);
 
        if (headers_pos == -1) {
                rspamd_fprintf (stderr,"cannot find end of headers position");
index de70d76bcb2ccd18babe8714d87c16ee1c6b7e86..f6c023294b87e0856834f430ea70c1fe95bfcecd 100644 (file)
@@ -218,8 +218,14 @@ process_raw_headers (struct rspamd_task *task, GHashTable *target,
                                tmp ++;
                        }
 
+                       if (p + 1 == end) {
+                               new->raw_len = end - new->raw_value;
+                       }
+                       else {
+                               new->raw_len = p - new->raw_value;
+                       }
+
                        new->value = tmp;
-                       new->raw_len = p - new->raw_value;
                        new->decoded = g_mime_utils_header_decode_text (new->value);
 
                        if (new->decoded != NULL) {
@@ -1300,44 +1306,6 @@ rspamd_message_from_data (struct rspamd_task *task, GByteArray *data,
        }
 }
 
-static inline const gchar *
-rspamd_message_find_body_start (const gchar *headers_end, const gchar *body_end)
-{
-       const gchar *p = headers_end;
-       enum {
-               st_start = 0,
-               st_cr,
-               st_lf,
-       } state = st_start;
-
-       if (headers_end + 1 >= body_end) {
-               return headers_end;
-       }
-
-       switch (state) {
-       case st_start:
-               if (*p == '\r') {
-                       p ++;
-                       state = st_cr;
-               }
-               else if (*p == '\n') {
-                       p ++;
-                       state = st_lf;
-               }
-               break;
-       case st_cr:
-               if (*p == '\n' && p < body_end) {
-                       /* CRLF */
-                       p ++;
-               }
-               break;
-       case st_lf:
-               break;
-       }
-
-       return p;
-}
-
 gboolean
 rspamd_message_parse (struct rspamd_task *task)
 {
@@ -1354,7 +1322,7 @@ rspamd_message_parse (struct rspamd_task *task)
        struct received_header *recv, *trecv;
        const gchar *p;
        gsize len;
-       goffset hdr_pos;
+       goffset hdr_pos, body_pos;
        gint i;
        gdouble diff, *pdiff;
        guint tw, *ptw, dw;
@@ -1450,15 +1418,12 @@ rspamd_message_parse (struct rspamd_task *task)
                        str.str = tmp->data;
                        str.len = tmp->len;
 
-                       hdr_pos = rspamd_string_find_eoh (&str);
+                       hdr_pos = rspamd_string_find_eoh (&str, &body_pos);
 
                        if (hdr_pos > 0 && hdr_pos < tmp->len) {
-                               static const gchar *body_start;
-
-                               body_start = rspamd_message_find_body_start (p + hdr_pos,
-                                               p + len);
                                task->raw_headers_content.begin = (gchar *) (p);
-                               task->raw_headers_content.len = body_start - p;
+                               task->raw_headers_content.len = hdr_pos;
+                               task->raw_headers_content.body_start = p + body_pos;
 
                                if (task->raw_headers_content.len > 0) {
                                        process_raw_headers (task, task->raw_headers,
index 3fdaae84ddc4db5d284f96f5439f48fbf8fc07d0..89a8b917536b25d0153db0e8152f0c1eaa33571e 100644 (file)
@@ -1793,7 +1793,11 @@ rspamd_dkim_check (rspamd_dkim_context_t *ctx,
        /* First of all find place of body */
        p = task->msg.begin;
        body_end = task->msg.begin + task->msg.len;
-       body_start = task->msg.begin + task->raw_headers_content.len;
+       body_start = task->raw_headers_content.body_start;
+
+       if (!body_start) {
+               return DKIM_RECORD_ERROR;
+       }
 
        /* Start canonization of body part */
        if (!rspamd_dkim_canonize_body (&ctx->common, body_start, body_end)) {
@@ -2049,12 +2053,16 @@ rspamd_dkim_sign (struct rspamd_task *task,
        /* First of all find place of body */
        p = task->msg.begin;
        body_end = task->msg.begin + task->msg.len;
-       body_start = task->msg.begin + task->raw_headers_content.len;
+       body_start = task->raw_headers_content.body_start;
 
        if (len > 0) {
                ctx->common.len = len;
        }
 
+       if (!body_start) {
+               return NULL;
+       }
+
        /* Start canonization of body part */
        if (!rspamd_dkim_canonize_body (&ctx->common, body_start, body_end)) {
                return NULL;
index f34c2ce1028145a22986598be80487b82f2f2831..4fb2ba3e3b1cda6af692670ce80e6bc305cb3875 100644 (file)
@@ -142,7 +142,11 @@ struct rspamd_task {
        GMimeMessage *message;                                                  /**< message, parsed with GMime                                         */
        GPtrArray *parts;                                                               /**< list of parsed parts                                                       */
        GPtrArray *text_parts;                                                  /**< list of text parts                                                         */
-       rspamd_ftok_t raw_headers_content;                              /**< list of raw headers                                                        */
+       struct {
+               const gchar *begin;
+               gsize len;
+               const gchar *body_start;
+       } raw_headers_content;                          /**< list of raw headers                                                        */
        GPtrArray *received;                                                    /**< list of received headers                                           */
        GHashTable *urls;                                                               /**< list of parsed urls                                                        */
        GHashTable *emails;                                                             /**< list of parsed emails                                                      */
index 1ce81bc9e6ac412791e73d003cf56fcc28fba5a0..c39dcb7ee101a463e17d0dab6538b58fbe6af3aa 100644 (file)
@@ -1413,7 +1413,7 @@ rspamd_substring_search_twoway (const gchar *in, gint inlen,
 
 
 goffset
-rspamd_string_find_eoh (GString *input)
+rspamd_string_find_eoh (GString *input, goffset *body_start)
 {
        const gchar *p, *c = NULL, *end;
        enum {
@@ -1459,6 +1459,10 @@ rspamd_string_find_eoh (GString *input)
                                }
                                else {
                                        /* We have \r\r[^\n] */
+                                       if (body_start) {
+                                               *body_start = p - input->str + 1;
+                                       }
+
                                        return p - input->str;
                                }
                        }
@@ -1474,6 +1478,9 @@ rspamd_string_find_eoh (GString *input)
                case got_lf:
                        if (*p == '\n') {
                                /* We have \n\n, which is obviously end of headers */
+                               if (body_start) {
+                                       *body_start = p - input->str + 1;
+                               }
                                return p - input->str;
                        }
                        else if (*p == '\r') {
@@ -1517,11 +1524,21 @@ rspamd_string_find_eoh (GString *input)
                        break;
                case got_linebreak_lf:
                        g_assert (c != NULL);
+                       if (body_start) {
+                               /* \r\n\r\n */
+                               *body_start = p - input->str;
+                       }
+
                        return c - input->str;
                }
        }
 
        if (state == got_linebreak_lf) {
+               if (body_start) {
+                       /* \r\n\r\n */
+                       *body_start = p - input->str;
+               }
+
                return c - input->str;
        }
 
index 695a8d022e7ccfd80d6bb533a9ca4716d7f8e256..1ae09f57604835d3c4aab28486b5401844c8ed36 100644 (file)
@@ -269,7 +269,7 @@ goffset rspamd_substring_search_twoway (const gchar *in, gint inlen,
  * Hence, to obtain the real EOH position, it is also required to skip
  * space characters
  */
-goffset rspamd_string_find_eoh (GString *input);
+goffset rspamd_string_find_eoh (GString *input, goffset *body_start);
 
 
 #define rspamd_ucl_emit_gstring(o, t, target) \