diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2016-12-13 10:06:59 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2016-12-13 10:07:22 +0000 |
commit | 1f5d9867a1257fe5633f8a03cd1aa09cc2fd84ee (patch) | |
tree | 87ac5e8b6f3d5724354e94e2f0cc155a9b1493a9 /src/libmime | |
parent | 14d9177ec87e420688db7a98eba6cff953eef54a (diff) | |
download | rspamd-1f5d9867a1257fe5633f8a03cd1aa09cc2fd84ee.tar.gz rspamd-1f5d9867a1257fe5633f8a03cd1aa09cc2fd84ee.zip |
[Minor] Move headers parsing to a separate unit
Diffstat (limited to 'src/libmime')
-rw-r--r-- | src/libmime/CMakeLists.txt | 3 | ||||
-rw-r--r-- | src/libmime/message.c | 300 | ||||
-rw-r--r-- | src/libmime/message.h | 12 | ||||
-rw-r--r-- | src/libmime/mime_headers.c | 312 | ||||
-rw-r--r-- | src/libmime/mime_headers.h | 37 |
5 files changed, 359 insertions, 305 deletions
diff --git a/src/libmime/CMakeLists.txt b/src/libmime/CMakeLists.txt index 39bd2d402..3ec13937a 100644 --- a/src/libmime/CMakeLists.txt +++ b/src/libmime/CMakeLists.txt @@ -6,6 +6,7 @@ SET(LIBRSPAMDMIMESRC ${CMAKE_CURRENT_SOURCE_DIR}/images.c ${CMAKE_CURRENT_SOURCE_DIR}/message.c ${CMAKE_CURRENT_SOURCE_DIR}/archives.c - ${CMAKE_CURRENT_SOURCE_DIR}/content_type.c) + ${CMAKE_CURRENT_SOURCE_DIR}/content_type.c + ${CMAKE_CURRENT_SOURCE_DIR}/mime_headers.c) SET(RSPAMD_MIME ${LIBRSPAMDMIMESRC} PARENT_SCOPE)
\ No newline at end of file diff --git a/src/libmime/message.c b/src/libmime/message.c index 7ed0d2c01..cb9009d7b 100644 --- a/src/libmime/message.c +++ b/src/libmime/message.c @@ -52,293 +52,6 @@ rspamd_message_quark (void) return g_quark_from_static_string ("mime-error"); } -static void -append_raw_header (struct rspamd_task *task, - GHashTable *target, struct raw_header *rh) -{ - GPtrArray *ar; - - if ((ar = g_hash_table_lookup (target, rh->name)) != NULL) { - g_ptr_array_add (ar, rh); - msg_debug_task ("append raw header %s: %s", rh->name, rh->value); - } - else { - ar = g_ptr_array_sized_new (2); - g_ptr_array_add (ar, rh); - g_hash_table_insert (target, rh->name, ar); - msg_debug_task ("add new raw header %s: %s", rh->name, rh->value); - } -} - -/* Convert raw headers to a list of struct raw_header * */ -static void -process_raw_headers (struct rspamd_task *task, GHashTable *target, - const gchar *in, gsize len) -{ - struct raw_header *new = NULL; - const gchar *p, *c, *end; - gchar *tmp, *tp; - gint state = 0, l, next_state = 100, err_state = 100, t_state; - gboolean valid_folding = FALSE; - guint nlines_count[RSPAMD_TASK_NEWLINES_MAX]; - - p = in; - end = p + len; - c = p; - memset (nlines_count, 0, sizeof (nlines_count)); - msg_debug_task ("start processing headers"); - - while (p < end) { - /* FSM for processing headers */ - switch (state) { - case 0: - /* Begin processing headers */ - if (!g_ascii_isalpha (*p)) { - /* We have some garbage at the beginning of headers, skip this line */ - state = 100; - next_state = 0; - } - else { - state = 1; - c = p; - } - break; - case 1: - /* We got something like header's name */ - if (*p == ':') { - new = - rspamd_mempool_alloc0 (task->task_pool, - sizeof (struct raw_header)); - l = p - c; - tmp = rspamd_mempool_alloc (task->task_pool, l + 1); - rspamd_strlcpy (tmp, c, l + 1); - new->name = tmp; - new->empty_separator = TRUE; - new->raw_value = c; - new->raw_len = p - c; /* Including trailing ':' */ - p++; - state = 2; - c = p; - } - else if (g_ascii_isspace (*p)) { - /* Not header but some garbage */ - task->flags |= RSPAMD_TASK_FLAG_BROKEN_HEADERS; - state = 100; - next_state = 0; - } - else { - p++; - } - break; - case 2: - /* We got header's name, so skip any \t or spaces */ - if (*p == '\t') { - new->tab_separated = TRUE; - new->empty_separator = FALSE; - p++; - } - else if (*p == ' ') { - new->empty_separator = FALSE; - p++; - } - else if (*p == '\n' || *p == '\r') { - - if (*p == '\n') { - nlines_count[RSPAMD_TASK_NEWLINES_LF] ++; - } - else if (*(p + 1) == '\n') { - nlines_count[RSPAMD_TASK_NEWLINES_CRLF] ++; - } - else { - nlines_count[RSPAMD_TASK_NEWLINES_CR] ++; - } - - /* Process folding */ - state = 99; - l = p - c; - if (l > 0) { - tmp = rspamd_mempool_alloc (task->task_pool, l + 1); - rspamd_strlcpy (tmp, c, l + 1); - new->separator = tmp; - } - next_state = 3; - err_state = 5; - c = p; - } - else { - /* Process value */ - l = p - c; - if (l >= 0) { - tmp = rspamd_mempool_alloc (task->task_pool, l + 1); - rspamd_strlcpy (tmp, c, l + 1); - new->separator = tmp; - } - c = p; - state = 3; - } - break; - case 3: - if (*p == '\r' || *p == '\n') { - /* Hold folding */ - if (*p == '\n') { - nlines_count[RSPAMD_TASK_NEWLINES_LF] ++; - } - else if (*(p + 1) == '\n') { - nlines_count[RSPAMD_TASK_NEWLINES_CRLF] ++; - } - else { - nlines_count[RSPAMD_TASK_NEWLINES_CR] ++; - } - state = 99; - next_state = 3; - err_state = 4; - } - else if (p + 1 == end) { - state = 4; - } - else { - p++; - } - break; - case 4: - /* Copy header's value */ - l = p - c; - tmp = rspamd_mempool_alloc (task->task_pool, l + 1); - tp = tmp; - t_state = 0; - while (l--) { - if (t_state == 0) { - /* Before folding */ - if (*c == '\n' || *c == '\r') { - t_state = 1; - c++; - *tp++ = ' '; - } - else { - *tp++ = *c++; - } - } - else if (t_state == 1) { - /* Inside folding */ - if (g_ascii_isspace (*c)) { - c++; - } - else { - t_state = 0; - *tp++ = *c++; - } - } - } - /* Strip last space that can be added by \r\n parsing */ - if (*(tp - 1) == ' ') { - tp--; - } - - *tp = '\0'; - /* Strip the initial spaces that could also be added by folding */ - while (*tmp != '\0' && g_ascii_isspace (*tmp)) { - tmp ++; - } - - if (p + 1 == end) { - new->raw_len = end - new->raw_value; - } - else { - new->raw_len = p - new->raw_value; - } - - new->value = tmp; - new->decoded = g_mime_utils_header_decode_text (new->value); - - if (new->decoded != NULL) { - rspamd_mempool_add_destructor (task->task_pool, - (rspamd_mempool_destruct_t)g_free, new->decoded); - } - else { - new->decoded = ""; - } - - append_raw_header (task, target, new); - state = 0; - break; - case 5: - /* Header has only name, no value */ - new->value = ""; - new->decoded = ""; - append_raw_header (task, target, new); - state = 0; - break; - case 99: - /* Folding state */ - if (p + 1 == end) { - state = err_state; - } - else { - if (*p == '\r' || *p == '\n') { - p++; - valid_folding = FALSE; - } - else if (*p == '\t' || *p == ' ') { - /* Valid folding */ - p++; - valid_folding = TRUE; - } - else { - if (valid_folding) { - debug_task ("go to state: %d->%d", state, next_state); - state = next_state; - } - else { - /* Fall back */ - debug_task ("go to state: %d->%d", state, err_state); - state = err_state; - } - } - } - break; - case 100: - /* Fail state, skip line */ - - if (*p == '\r') { - if (*(p + 1) == '\n') { - nlines_count[RSPAMD_TASK_NEWLINES_CRLF] ++; - p++; - } - p++; - state = next_state; - } - else if (*p == '\n') { - nlines_count[RSPAMD_TASK_NEWLINES_LF] ++; - - if (*(p + 1) == '\r') { - p++; - } - p++; - state = next_state; - } - else if (p + 1 == end) { - state = next_state; - p++; - } - else { - p++; - } - break; - } - } - - guint max_cnt = 0; - gint sel = 0; - - for (gint i = 0; i < RSPAMD_TASK_NEWLINES_MAX; i ++) { - if (nlines_count[i] > max_cnt) { - max_cnt = nlines_count[i]; - sel = i; - } - } - - task->nlines_type = sel; -} static void free_byte_array_callback (void *pointer) @@ -1198,8 +911,8 @@ mime_foreach_callback (GMimeObject * part, gpointer user_data) rspamd_strcase_equal, NULL, rspamd_ptr_array_free_hard); if (hdrs != NULL) { - process_raw_headers (task, mime_part->raw_headers, - hdrs, strlen (hdrs)); + rspamd_mime_headers_process (task, mime_part->raw_headers, + hdrs, strlen (hdrs), FALSE); mime_part->raw_headers_str = hdrs; } @@ -1267,8 +980,8 @@ mime_foreach_callback (GMimeObject * part, gpointer user_data) rspamd_strcase_equal, NULL, rspamd_ptr_array_free_hard); if (hdrs != NULL) { - process_raw_headers (task, mime_part->raw_headers, - hdrs, strlen (hdrs)); + rspamd_mime_headers_process (task, mime_part->raw_headers, + hdrs, strlen (hdrs), FALSE); mime_part->raw_headers_str = hdrs; } @@ -1538,9 +1251,10 @@ rspamd_message_parse (struct rspamd_task *task) task->raw_headers_content.body_start = p + body_pos; if (task->raw_headers_content.len > 0) { - process_raw_headers (task, task->raw_headers, + rspamd_mime_headers_process (task, task->raw_headers, task->raw_headers_content.begin, - task->raw_headers_content.len); + task->raw_headers_content.len, + TRUE); } } } diff --git a/src/libmime/message.h b/src/libmime/message.h index 8003d073c..ff7ebafc9 100644 --- a/src/libmime/message.h +++ b/src/libmime/message.h @@ -10,6 +10,7 @@ #include "email_addr.h" #include "addr.h" #include "cryptobox.h" +#include "mime_headers.h" #include <gmime/gmime.h> struct rspamd_task; @@ -91,17 +92,6 @@ struct received_header { enum rspamd_received_type type; }; -struct raw_header { - gchar *name; - gchar *value; - const gchar *raw_value; /* As it is in the message (unfolded and unparsed) */ - gsize raw_len; - gboolean tab_separated; - gboolean empty_separator; - gchar *separator; - gchar *decoded; -}; - /** * Parse and pre-process mime message * @param task worker_task object diff --git a/src/libmime/mime_headers.c b/src/libmime/mime_headers.c new file mode 100644 index 000000000..8ba8e6c60 --- /dev/null +++ b/src/libmime/mime_headers.c @@ -0,0 +1,312 @@ +/*- + * Copyright 2016 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mime_headers.h" +#include "task.h" + +static void +rspamd_mime_header_add (struct rspamd_task *task, + GHashTable *target, struct raw_header *rh) +{ + GPtrArray *ar; + + if ((ar = g_hash_table_lookup (target, rh->name)) != NULL) { + g_ptr_array_add (ar, rh); + msg_debug_task ("append raw header %s: %s", rh->name, rh->value); + } + else { + ar = g_ptr_array_sized_new (2); + g_ptr_array_add (ar, rh); + g_hash_table_insert (target, rh->name, ar); + msg_debug_task ("add new raw header %s: %s", rh->name, rh->value); + } +} + +/* Convert raw headers to a list of struct raw_header * */ +void +rspamd_mime_headers_process (struct rspamd_task *task, GHashTable *target, + const gchar *in, gsize len, gboolean check_newlines) +{ + struct raw_header *new = NULL; + const gchar *p, *c, *end; + gchar *tmp, *tp; + gint state = 0, l, next_state = 100, err_state = 100, t_state; + gboolean valid_folding = FALSE; + guint nlines_count[RSPAMD_TASK_NEWLINES_MAX]; + + p = in; + end = p + len; + c = p; + memset (nlines_count, 0, sizeof (nlines_count)); + msg_debug_task ("start processing headers"); + + while (p < end) { + /* FSM for processing headers */ + switch (state) { + case 0: + /* Begin processing headers */ + if (!g_ascii_isalpha (*p)) { + /* We have some garbage at the beginning of headers, skip this line */ + state = 100; + next_state = 0; + } + else { + state = 1; + c = p; + } + break; + case 1: + /* We got something like header's name */ + if (*p == ':') { + new = + rspamd_mempool_alloc0 (task->task_pool, + sizeof (struct raw_header)); + l = p - c; + tmp = rspamd_mempool_alloc (task->task_pool, l + 1); + rspamd_strlcpy (tmp, c, l + 1); + new->name = tmp; + new->empty_separator = TRUE; + new->raw_value = c; + new->raw_len = p - c; /* Including trailing ':' */ + p++; + state = 2; + c = p; + } + else if (g_ascii_isspace (*p)) { + /* Not header but some garbage */ + task->flags |= RSPAMD_TASK_FLAG_BROKEN_HEADERS; + state = 100; + next_state = 0; + } + else { + p++; + } + break; + case 2: + /* We got header's name, so skip any \t or spaces */ + if (*p == '\t') { + new->tab_separated = TRUE; + new->empty_separator = FALSE; + p++; + } + else if (*p == ' ') { + new->empty_separator = FALSE; + p++; + } + else if (*p == '\n' || *p == '\r') { + + if (check_newlines) { + if (*p == '\n') { + nlines_count[RSPAMD_TASK_NEWLINES_LF] ++; + } + else if (*(p + 1) == '\n') { + nlines_count[RSPAMD_TASK_NEWLINES_CRLF] ++; + } + else { + nlines_count[RSPAMD_TASK_NEWLINES_CR] ++; + } + } + + /* Process folding */ + state = 99; + l = p - c; + if (l > 0) { + tmp = rspamd_mempool_alloc (task->task_pool, l + 1); + rspamd_strlcpy (tmp, c, l + 1); + new->separator = tmp; + } + next_state = 3; + err_state = 5; + c = p; + } + else { + /* Process value */ + l = p - c; + if (l >= 0) { + tmp = rspamd_mempool_alloc (task->task_pool, l + 1); + rspamd_strlcpy (tmp, c, l + 1); + new->separator = tmp; + } + c = p; + state = 3; + } + break; + case 3: + if (*p == '\r' || *p == '\n') { + /* Hold folding */ + if (check_newlines) { + if (*p == '\n') { + nlines_count[RSPAMD_TASK_NEWLINES_LF] ++; + } + else if (*(p + 1) == '\n') { + nlines_count[RSPAMD_TASK_NEWLINES_CRLF] ++; + } + else { + nlines_count[RSPAMD_TASK_NEWLINES_CR] ++; + } + } + state = 99; + next_state = 3; + err_state = 4; + } + else if (p + 1 == end) { + state = 4; + } + else { + p++; + } + break; + case 4: + /* Copy header's value */ + l = p - c; + tmp = rspamd_mempool_alloc (task->task_pool, l + 1); + tp = tmp; + t_state = 0; + while (l--) { + if (t_state == 0) { + /* Before folding */ + if (*c == '\n' || *c == '\r') { + t_state = 1; + c++; + *tp++ = ' '; + } + else { + *tp++ = *c++; + } + } + else if (t_state == 1) { + /* Inside folding */ + if (g_ascii_isspace (*c)) { + c++; + } + else { + t_state = 0; + *tp++ = *c++; + } + } + } + /* Strip last space that can be added by \r\n parsing */ + if (*(tp - 1) == ' ') { + tp--; + } + + *tp = '\0'; + /* Strip the initial spaces that could also be added by folding */ + while (*tmp != '\0' && g_ascii_isspace (*tmp)) { + tmp ++; + } + + if (p + 1 == end) { + new->raw_len = end - new->raw_value; + } + else { + new->raw_len = p - new->raw_value; + } + + new->value = tmp; + new->decoded = g_mime_utils_header_decode_text (new->value); + + if (new->decoded != NULL) { + rspamd_mempool_add_destructor (task->task_pool, + (rspamd_mempool_destruct_t)g_free, new->decoded); + } + else { + new->decoded = ""; + } + + rspamd_mime_header_add (task, target, new); + state = 0; + break; + case 5: + /* Header has only name, no value */ + new->value = ""; + new->decoded = ""; + rspamd_mime_header_add (task, target, new); + state = 0; + break; + case 99: + /* Folding state */ + if (p + 1 == end) { + state = err_state; + } + else { + if (*p == '\r' || *p == '\n') { + p++; + valid_folding = FALSE; + } + else if (*p == '\t' || *p == ' ') { + /* Valid folding */ + p++; + valid_folding = TRUE; + } + else { + if (valid_folding) { + debug_task ("go to state: %d->%d", state, next_state); + state = next_state; + } + else { + /* Fall back */ + debug_task ("go to state: %d->%d", state, err_state); + state = err_state; + } + } + } + break; + case 100: + /* Fail state, skip line */ + + if (*p == '\r') { + if (*(p + 1) == '\n') { + nlines_count[RSPAMD_TASK_NEWLINES_CRLF] ++; + p++; + } + p++; + state = next_state; + } + else if (*p == '\n') { + nlines_count[RSPAMD_TASK_NEWLINES_LF] ++; + + if (*(p + 1) == '\r') { + p++; + } + p++; + state = next_state; + } + else if (p + 1 == end) { + state = next_state; + p++; + } + else { + p++; + } + break; + } + } + + if (check_newlines) { + guint max_cnt = 0; + gint sel = 0; + + for (gint i = 0; i < RSPAMD_TASK_NEWLINES_MAX; i ++) { + if (nlines_count[i] > max_cnt) { + max_cnt = nlines_count[i]; + sel = i; + } + } + + task->nlines_type = sel; + } +} diff --git a/src/libmime/mime_headers.h b/src/libmime/mime_headers.h new file mode 100644 index 000000000..aa76bed18 --- /dev/null +++ b/src/libmime/mime_headers.h @@ -0,0 +1,37 @@ +/*- + * Copyright 2016 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef SRC_LIBMIME_MIME_HEADERS_H_ +#define SRC_LIBMIME_MIME_HEADERS_H_ + +#include "config.h" + +struct rspamd_task; + +struct raw_header { + gchar *name; + gchar *value; + const gchar *raw_value; /* As it is in the message (unfolded and unparsed) */ + gsize raw_len; + gboolean tab_separated; + gboolean empty_separator; + gchar *separator; + gchar *decoded; +}; + +void rspamd_mime_headers_process (struct rspamd_task *task, GHashTable *target, + const gchar *in, gsize len, gboolean check_newlines); + +#endif /* SRC_LIBMIME_MIME_HEADERS_H_ */ |