]> source.dussan.org Git - rspamd.git/commitdiff
[Minor] Move headers parsing to a separate unit
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 13 Dec 2016 10:06:59 +0000 (10:06 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 13 Dec 2016 10:07:22 +0000 (10:07 +0000)
src/libmime/CMakeLists.txt
src/libmime/message.c
src/libmime/message.h
src/libmime/mime_headers.c [new file with mode: 0644]
src/libmime/mime_headers.h [new file with mode: 0644]

index 39bd2d4026c8c6e40e96bbf7643827cf93aac287..3ec13937aebf192bcb37370e9d495c34f227f020 100644 (file)
@@ -6,6 +6,7 @@ SET(LIBRSPAMDMIMESRC
                                ${CMAKE_CURRENT_SOURCE_DIR}/images.c
                                ${CMAKE_CURRENT_SOURCE_DIR}/message.c
                                ${CMAKE_CURRENT_SOURCE_DIR}/archives.c
-                               ${CMAKE_CURRENT_SOURCE_DIR}/content_type.c)
+                               ${CMAKE_CURRENT_SOURCE_DIR}/content_type.c
+                               ${CMAKE_CURRENT_SOURCE_DIR}/mime_headers.c)
 
 SET(RSPAMD_MIME ${LIBRSPAMDMIMESRC} PARENT_SCOPE)
\ No newline at end of file
index 7ed0d2c019bb36e13e7c8165173c524687f0fb78..cb9009d7b0e8bfae24d31b744118778e02cd38f2 100644 (file)
@@ -52,293 +52,6 @@ rspamd_message_quark (void)
        return g_quark_from_static_string ("mime-error");
 }
 
-static void
-append_raw_header (struct rspamd_task *task,
-               GHashTable *target, struct raw_header *rh)
-{
-       GPtrArray *ar;
-
-       if ((ar = g_hash_table_lookup (target, rh->name)) != NULL) {
-               g_ptr_array_add (ar, rh);
-               msg_debug_task ("append raw header %s: %s", rh->name, rh->value);
-       }
-       else {
-               ar = g_ptr_array_sized_new (2);
-               g_ptr_array_add (ar, rh);
-               g_hash_table_insert (target, rh->name, ar);
-               msg_debug_task ("add new raw header %s: %s", rh->name, rh->value);
-       }
-}
-
-/* Convert raw headers to a list of struct raw_header * */
-static void
-process_raw_headers (struct rspamd_task *task, GHashTable *target,
-               const gchar *in, gsize len)
-{
-       struct raw_header *new = NULL;
-       const gchar *p, *c, *end;
-       gchar *tmp, *tp;
-       gint state = 0, l, next_state = 100, err_state = 100, t_state;
-       gboolean valid_folding = FALSE;
-       guint nlines_count[RSPAMD_TASK_NEWLINES_MAX];
-
-       p = in;
-       end = p + len;
-       c = p;
-       memset (nlines_count, 0, sizeof (nlines_count));
-       msg_debug_task ("start processing headers");
-
-       while (p < end) {
-               /* FSM for processing headers */
-               switch (state) {
-               case 0:
-                       /* Begin processing headers */
-                       if (!g_ascii_isalpha (*p)) {
-                               /* We have some garbage at the beginning of headers, skip this line */
-                               state = 100;
-                               next_state = 0;
-                       }
-                       else {
-                               state = 1;
-                               c = p;
-                       }
-                       break;
-               case 1:
-                       /* We got something like header's name */
-                       if (*p == ':') {
-                               new =
-                                       rspamd_mempool_alloc0 (task->task_pool,
-                                               sizeof (struct raw_header));
-                               l = p - c;
-                               tmp = rspamd_mempool_alloc (task->task_pool, l + 1);
-                               rspamd_strlcpy (tmp, c, l + 1);
-                               new->name = tmp;
-                               new->empty_separator = TRUE;
-                               new->raw_value = c;
-                               new->raw_len = p - c; /* Including trailing ':' */
-                               p++;
-                               state = 2;
-                               c = p;
-                       }
-                       else if (g_ascii_isspace (*p)) {
-                               /* Not header but some garbage */
-                               task->flags |= RSPAMD_TASK_FLAG_BROKEN_HEADERS;
-                               state = 100;
-                               next_state = 0;
-                       }
-                       else {
-                               p++;
-                       }
-                       break;
-               case 2:
-                       /* We got header's name, so skip any \t or spaces */
-                       if (*p == '\t') {
-                               new->tab_separated = TRUE;
-                               new->empty_separator = FALSE;
-                               p++;
-                       }
-                       else if (*p == ' ') {
-                               new->empty_separator = FALSE;
-                               p++;
-                       }
-                       else if (*p == '\n' || *p == '\r') {
-
-                               if (*p == '\n') {
-                                       nlines_count[RSPAMD_TASK_NEWLINES_LF] ++;
-                               }
-                               else if (*(p + 1) == '\n') {
-                                       nlines_count[RSPAMD_TASK_NEWLINES_CRLF] ++;
-                               }
-                               else {
-                                       nlines_count[RSPAMD_TASK_NEWLINES_CR] ++;
-                               }
-
-                               /* Process folding */
-                               state = 99;
-                               l = p - c;
-                               if (l > 0) {
-                                       tmp = rspamd_mempool_alloc (task->task_pool, l + 1);
-                                       rspamd_strlcpy (tmp, c, l + 1);
-                                       new->separator = tmp;
-                               }
-                               next_state = 3;
-                               err_state = 5;
-                               c = p;
-                       }
-                       else {
-                               /* Process value */
-                               l = p - c;
-                               if (l >= 0) {
-                                       tmp = rspamd_mempool_alloc (task->task_pool, l + 1);
-                                       rspamd_strlcpy (tmp, c, l + 1);
-                                       new->separator = tmp;
-                               }
-                               c = p;
-                               state = 3;
-                       }
-                       break;
-               case 3:
-                       if (*p == '\r' || *p == '\n') {
-                               /* Hold folding */
-                               if (*p == '\n') {
-                                       nlines_count[RSPAMD_TASK_NEWLINES_LF] ++;
-                               }
-                               else if (*(p + 1) == '\n') {
-                                       nlines_count[RSPAMD_TASK_NEWLINES_CRLF] ++;
-                               }
-                               else {
-                                       nlines_count[RSPAMD_TASK_NEWLINES_CR] ++;
-                               }
-                               state = 99;
-                               next_state = 3;
-                               err_state = 4;
-                       }
-                       else if (p + 1 == end) {
-                               state = 4;
-                       }
-                       else {
-                               p++;
-                       }
-                       break;
-               case 4:
-                       /* Copy header's value */
-                       l = p - c;
-                       tmp = rspamd_mempool_alloc (task->task_pool, l + 1);
-                       tp = tmp;
-                       t_state = 0;
-                       while (l--) {
-                               if (t_state == 0) {
-                                       /* Before folding */
-                                       if (*c == '\n' || *c == '\r') {
-                                               t_state = 1;
-                                               c++;
-                                               *tp++ = ' ';
-                                       }
-                                       else {
-                                               *tp++ = *c++;
-                                       }
-                               }
-                               else if (t_state == 1) {
-                                       /* Inside folding */
-                                       if (g_ascii_isspace (*c)) {
-                                               c++;
-                                       }
-                                       else {
-                                               t_state = 0;
-                                               *tp++ = *c++;
-                                       }
-                               }
-                       }
-                       /* Strip last space that can be added by \r\n parsing */
-                       if (*(tp - 1) == ' ') {
-                               tp--;
-                       }
-
-                       *tp = '\0';
-                       /* Strip the initial spaces that could also be added by folding */
-                       while (*tmp != '\0' && g_ascii_isspace (*tmp)) {
-                               tmp ++;
-                       }
-
-                       if (p + 1 == end) {
-                               new->raw_len = end - new->raw_value;
-                       }
-                       else {
-                               new->raw_len = p - new->raw_value;
-                       }
-
-                       new->value = tmp;
-                       new->decoded = g_mime_utils_header_decode_text (new->value);
-
-                       if (new->decoded != NULL) {
-                               rspamd_mempool_add_destructor (task->task_pool,
-                                               (rspamd_mempool_destruct_t)g_free, new->decoded);
-                       }
-                       else {
-                               new->decoded = "";
-                       }
-
-                       append_raw_header (task, target, new);
-                       state = 0;
-                       break;
-               case 5:
-                       /* Header has only name, no value */
-                       new->value = "";
-                       new->decoded = "";
-                       append_raw_header (task, target, new);
-                       state = 0;
-                       break;
-               case 99:
-                       /* Folding state */
-                       if (p + 1 == end) {
-                               state = err_state;
-                       }
-                       else {
-                               if (*p == '\r' || *p == '\n') {
-                                       p++;
-                                       valid_folding = FALSE;
-                               }
-                               else if (*p == '\t' || *p == ' ') {
-                                       /* Valid folding */
-                                       p++;
-                                       valid_folding = TRUE;
-                               }
-                               else {
-                                       if (valid_folding) {
-                                               debug_task ("go to state: %d->%d", state, next_state);
-                                               state = next_state;
-                                       }
-                                       else {
-                                               /* Fall back */
-                                               debug_task ("go to state: %d->%d", state, err_state);
-                                               state = err_state;
-                                       }
-                               }
-                       }
-                       break;
-               case 100:
-                       /* Fail state, skip line */
-
-                       if (*p == '\r') {
-                               if (*(p + 1) == '\n') {
-                                       nlines_count[RSPAMD_TASK_NEWLINES_CRLF] ++;
-                                       p++;
-                               }
-                               p++;
-                               state = next_state;
-                       }
-                       else if (*p == '\n') {
-                               nlines_count[RSPAMD_TASK_NEWLINES_LF] ++;
-
-                               if (*(p + 1) == '\r') {
-                                       p++;
-                               }
-                               p++;
-                               state = next_state;
-                       }
-                       else if (p + 1 == end) {
-                               state = next_state;
-                               p++;
-                       }
-                       else {
-                               p++;
-                       }
-                       break;
-               }
-       }
-
-       guint max_cnt = 0;
-       gint sel = 0;
-
-       for (gint i = 0; i < RSPAMD_TASK_NEWLINES_MAX; i ++) {
-               if (nlines_count[i] > max_cnt) {
-                       max_cnt = nlines_count[i];
-                       sel = i;
-               }
-       }
-
-       task->nlines_type = sel;
-}
 
 static void
 free_byte_array_callback (void *pointer)
@@ -1198,8 +911,8 @@ mime_foreach_callback (GMimeObject * part, gpointer user_data)
                                rspamd_strcase_equal, NULL, rspamd_ptr_array_free_hard);
 
                if (hdrs != NULL) {
-                       process_raw_headers (task, mime_part->raw_headers,
-                                       hdrs, strlen (hdrs));
+                       rspamd_mime_headers_process (task, mime_part->raw_headers,
+                                       hdrs, strlen (hdrs), FALSE);
                        mime_part->raw_headers_str = hdrs;
                }
 
@@ -1267,8 +980,8 @@ mime_foreach_callback (GMimeObject * part, gpointer user_data)
                                                rspamd_strcase_equal, NULL, rspamd_ptr_array_free_hard);
 
                                if (hdrs != NULL) {
-                                       process_raw_headers (task, mime_part->raw_headers,
-                                                       hdrs, strlen (hdrs));
+                                       rspamd_mime_headers_process (task, mime_part->raw_headers,
+                                                       hdrs, strlen (hdrs), FALSE);
                                        mime_part->raw_headers_str = hdrs;
                                }
 
@@ -1538,9 +1251,10 @@ rspamd_message_parse (struct rspamd_task *task)
                                task->raw_headers_content.body_start = p + body_pos;
 
                                if (task->raw_headers_content.len > 0) {
-                                       process_raw_headers (task, task->raw_headers,
+                                       rspamd_mime_headers_process (task, task->raw_headers,
                                                        task->raw_headers_content.begin,
-                                                       task->raw_headers_content.len);
+                                                       task->raw_headers_content.len,
+                                                       TRUE);
                                }
                        }
                }
index 8003d073c88ed305426f7d156146a074f43af9cb..ff7ebafc9327284a5bfe59fae64a10ccbf0f8c66 100644 (file)
@@ -10,6 +10,7 @@
 #include "email_addr.h"
 #include "addr.h"
 #include "cryptobox.h"
+#include "mime_headers.h"
 #include <gmime/gmime.h>
 
 struct rspamd_task;
@@ -91,17 +92,6 @@ struct received_header {
        enum rspamd_received_type type;
 };
 
-struct raw_header {
-       gchar *name;
-       gchar *value;
-       const gchar *raw_value; /* As it is in the message (unfolded and unparsed) */
-       gsize raw_len;
-       gboolean tab_separated;
-       gboolean empty_separator;
-       gchar *separator;
-       gchar *decoded;
-};
-
 /**
  * Parse and pre-process mime message
  * @param task worker_task object
diff --git a/src/libmime/mime_headers.c b/src/libmime/mime_headers.c
new file mode 100644 (file)
index 0000000..8ba8e6c
--- /dev/null
@@ -0,0 +1,312 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mime_headers.h"
+#include "task.h"
+
+static void
+rspamd_mime_header_add (struct rspamd_task *task,
+               GHashTable *target, struct raw_header *rh)
+{
+       GPtrArray *ar;
+
+       if ((ar = g_hash_table_lookup (target, rh->name)) != NULL) {
+               g_ptr_array_add (ar, rh);
+               msg_debug_task ("append raw header %s: %s", rh->name, rh->value);
+       }
+       else {
+               ar = g_ptr_array_sized_new (2);
+               g_ptr_array_add (ar, rh);
+               g_hash_table_insert (target, rh->name, ar);
+               msg_debug_task ("add new raw header %s: %s", rh->name, rh->value);
+       }
+}
+
+/* Convert raw headers to a list of struct raw_header * */
+void
+rspamd_mime_headers_process (struct rspamd_task *task, GHashTable *target,
+               const gchar *in, gsize len, gboolean check_newlines)
+{
+       struct raw_header *new = NULL;
+       const gchar *p, *c, *end;
+       gchar *tmp, *tp;
+       gint state = 0, l, next_state = 100, err_state = 100, t_state;
+       gboolean valid_folding = FALSE;
+       guint nlines_count[RSPAMD_TASK_NEWLINES_MAX];
+
+       p = in;
+       end = p + len;
+       c = p;
+       memset (nlines_count, 0, sizeof (nlines_count));
+       msg_debug_task ("start processing headers");
+
+       while (p < end) {
+               /* FSM for processing headers */
+               switch (state) {
+               case 0:
+                       /* Begin processing headers */
+                       if (!g_ascii_isalpha (*p)) {
+                               /* We have some garbage at the beginning of headers, skip this line */
+                               state = 100;
+                               next_state = 0;
+                       }
+                       else {
+                               state = 1;
+                               c = p;
+                       }
+                       break;
+               case 1:
+                       /* We got something like header's name */
+                       if (*p == ':') {
+                               new =
+                                       rspamd_mempool_alloc0 (task->task_pool,
+                                               sizeof (struct raw_header));
+                               l = p - c;
+                               tmp = rspamd_mempool_alloc (task->task_pool, l + 1);
+                               rspamd_strlcpy (tmp, c, l + 1);
+                               new->name = tmp;
+                               new->empty_separator = TRUE;
+                               new->raw_value = c;
+                               new->raw_len = p - c; /* Including trailing ':' */
+                               p++;
+                               state = 2;
+                               c = p;
+                       }
+                       else if (g_ascii_isspace (*p)) {
+                               /* Not header but some garbage */
+                               task->flags |= RSPAMD_TASK_FLAG_BROKEN_HEADERS;
+                               state = 100;
+                               next_state = 0;
+                       }
+                       else {
+                               p++;
+                       }
+                       break;
+               case 2:
+                       /* We got header's name, so skip any \t or spaces */
+                       if (*p == '\t') {
+                               new->tab_separated = TRUE;
+                               new->empty_separator = FALSE;
+                               p++;
+                       }
+                       else if (*p == ' ') {
+                               new->empty_separator = FALSE;
+                               p++;
+                       }
+                       else if (*p == '\n' || *p == '\r') {
+
+                               if (check_newlines) {
+                                       if (*p == '\n') {
+                                               nlines_count[RSPAMD_TASK_NEWLINES_LF] ++;
+                                       }
+                                       else if (*(p + 1) == '\n') {
+                                               nlines_count[RSPAMD_TASK_NEWLINES_CRLF] ++;
+                                       }
+                                       else {
+                                               nlines_count[RSPAMD_TASK_NEWLINES_CR] ++;
+                                       }
+                               }
+
+                               /* Process folding */
+                               state = 99;
+                               l = p - c;
+                               if (l > 0) {
+                                       tmp = rspamd_mempool_alloc (task->task_pool, l + 1);
+                                       rspamd_strlcpy (tmp, c, l + 1);
+                                       new->separator = tmp;
+                               }
+                               next_state = 3;
+                               err_state = 5;
+                               c = p;
+                       }
+                       else {
+                               /* Process value */
+                               l = p - c;
+                               if (l >= 0) {
+                                       tmp = rspamd_mempool_alloc (task->task_pool, l + 1);
+                                       rspamd_strlcpy (tmp, c, l + 1);
+                                       new->separator = tmp;
+                               }
+                               c = p;
+                               state = 3;
+                       }
+                       break;
+               case 3:
+                       if (*p == '\r' || *p == '\n') {
+                               /* Hold folding */
+                               if (check_newlines) {
+                                       if (*p == '\n') {
+                                               nlines_count[RSPAMD_TASK_NEWLINES_LF] ++;
+                                       }
+                                       else if (*(p + 1) == '\n') {
+                                               nlines_count[RSPAMD_TASK_NEWLINES_CRLF] ++;
+                                       }
+                                       else {
+                                               nlines_count[RSPAMD_TASK_NEWLINES_CR] ++;
+                                       }
+                               }
+                               state = 99;
+                               next_state = 3;
+                               err_state = 4;
+                       }
+                       else if (p + 1 == end) {
+                               state = 4;
+                       }
+                       else {
+                               p++;
+                       }
+                       break;
+               case 4:
+                       /* Copy header's value */
+                       l = p - c;
+                       tmp = rspamd_mempool_alloc (task->task_pool, l + 1);
+                       tp = tmp;
+                       t_state = 0;
+                       while (l--) {
+                               if (t_state == 0) {
+                                       /* Before folding */
+                                       if (*c == '\n' || *c == '\r') {
+                                               t_state = 1;
+                                               c++;
+                                               *tp++ = ' ';
+                                       }
+                                       else {
+                                               *tp++ = *c++;
+                                       }
+                               }
+                               else if (t_state == 1) {
+                                       /* Inside folding */
+                                       if (g_ascii_isspace (*c)) {
+                                               c++;
+                                       }
+                                       else {
+                                               t_state = 0;
+                                               *tp++ = *c++;
+                                       }
+                               }
+                       }
+                       /* Strip last space that can be added by \r\n parsing */
+                       if (*(tp - 1) == ' ') {
+                               tp--;
+                       }
+
+                       *tp = '\0';
+                       /* Strip the initial spaces that could also be added by folding */
+                       while (*tmp != '\0' && g_ascii_isspace (*tmp)) {
+                               tmp ++;
+                       }
+
+                       if (p + 1 == end) {
+                               new->raw_len = end - new->raw_value;
+                       }
+                       else {
+                               new->raw_len = p - new->raw_value;
+                       }
+
+                       new->value = tmp;
+                       new->decoded = g_mime_utils_header_decode_text (new->value);
+
+                       if (new->decoded != NULL) {
+                               rspamd_mempool_add_destructor (task->task_pool,
+                                               (rspamd_mempool_destruct_t)g_free, new->decoded);
+                       }
+                       else {
+                               new->decoded = "";
+                       }
+
+                       rspamd_mime_header_add (task, target, new);
+                       state = 0;
+                       break;
+               case 5:
+                       /* Header has only name, no value */
+                       new->value = "";
+                       new->decoded = "";
+                       rspamd_mime_header_add (task, target, new);
+                       state = 0;
+                       break;
+               case 99:
+                       /* Folding state */
+                       if (p + 1 == end) {
+                               state = err_state;
+                       }
+                       else {
+                               if (*p == '\r' || *p == '\n') {
+                                       p++;
+                                       valid_folding = FALSE;
+                               }
+                               else if (*p == '\t' || *p == ' ') {
+                                       /* Valid folding */
+                                       p++;
+                                       valid_folding = TRUE;
+                               }
+                               else {
+                                       if (valid_folding) {
+                                               debug_task ("go to state: %d->%d", state, next_state);
+                                               state = next_state;
+                                       }
+                                       else {
+                                               /* Fall back */
+                                               debug_task ("go to state: %d->%d", state, err_state);
+                                               state = err_state;
+                                       }
+                               }
+                       }
+                       break;
+               case 100:
+                       /* Fail state, skip line */
+
+                       if (*p == '\r') {
+                               if (*(p + 1) == '\n') {
+                                       nlines_count[RSPAMD_TASK_NEWLINES_CRLF] ++;
+                                       p++;
+                               }
+                               p++;
+                               state = next_state;
+                       }
+                       else if (*p == '\n') {
+                               nlines_count[RSPAMD_TASK_NEWLINES_LF] ++;
+
+                               if (*(p + 1) == '\r') {
+                                       p++;
+                               }
+                               p++;
+                               state = next_state;
+                       }
+                       else if (p + 1 == end) {
+                               state = next_state;
+                               p++;
+                       }
+                       else {
+                               p++;
+                       }
+                       break;
+               }
+       }
+
+       if (check_newlines) {
+               guint max_cnt = 0;
+               gint sel = 0;
+
+               for (gint i = 0; i < RSPAMD_TASK_NEWLINES_MAX; i ++) {
+                       if (nlines_count[i] > max_cnt) {
+                               max_cnt = nlines_count[i];
+                               sel = i;
+                       }
+               }
+
+               task->nlines_type = sel;
+       }
+}
diff --git a/src/libmime/mime_headers.h b/src/libmime/mime_headers.h
new file mode 100644 (file)
index 0000000..aa76bed
--- /dev/null
@@ -0,0 +1,37 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef SRC_LIBMIME_MIME_HEADERS_H_
+#define SRC_LIBMIME_MIME_HEADERS_H_
+
+#include "config.h"
+
+struct rspamd_task;
+
+struct raw_header {
+       gchar *name;
+       gchar *value;
+       const gchar *raw_value; /* As it is in the message (unfolded and unparsed) */
+       gsize raw_len;
+       gboolean tab_separated;
+       gboolean empty_separator;
+       gchar *separator;
+       gchar *decoded;
+};
+
+void rspamd_mime_headers_process (struct rspamd_task *task, GHashTable *target,
+               const gchar *in, gsize len, gboolean check_newlines);
+
+#endif /* SRC_LIBMIME_MIME_HEADERS_H_ */