aboutsummaryrefslogtreecommitdiffstats
path: root/src/libmime
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2016-12-13 10:06:59 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2016-12-13 10:07:22 +0000
commit1f5d9867a1257fe5633f8a03cd1aa09cc2fd84ee (patch)
tree87ac5e8b6f3d5724354e94e2f0cc155a9b1493a9 /src/libmime
parent14d9177ec87e420688db7a98eba6cff953eef54a (diff)
downloadrspamd-1f5d9867a1257fe5633f8a03cd1aa09cc2fd84ee.tar.gz
rspamd-1f5d9867a1257fe5633f8a03cd1aa09cc2fd84ee.zip
[Minor] Move headers parsing to a separate unit
Diffstat (limited to 'src/libmime')
-rw-r--r--src/libmime/CMakeLists.txt3
-rw-r--r--src/libmime/message.c300
-rw-r--r--src/libmime/message.h12
-rw-r--r--src/libmime/mime_headers.c312
-rw-r--r--src/libmime/mime_headers.h37
5 files changed, 359 insertions, 305 deletions
diff --git a/src/libmime/CMakeLists.txt b/src/libmime/CMakeLists.txt
index 39bd2d402..3ec13937a 100644
--- a/src/libmime/CMakeLists.txt
+++ b/src/libmime/CMakeLists.txt
@@ -6,6 +6,7 @@ SET(LIBRSPAMDMIMESRC
${CMAKE_CURRENT_SOURCE_DIR}/images.c
${CMAKE_CURRENT_SOURCE_DIR}/message.c
${CMAKE_CURRENT_SOURCE_DIR}/archives.c
- ${CMAKE_CURRENT_SOURCE_DIR}/content_type.c)
+ ${CMAKE_CURRENT_SOURCE_DIR}/content_type.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/mime_headers.c)
SET(RSPAMD_MIME ${LIBRSPAMDMIMESRC} PARENT_SCOPE) \ No newline at end of file
diff --git a/src/libmime/message.c b/src/libmime/message.c
index 7ed0d2c01..cb9009d7b 100644
--- a/src/libmime/message.c
+++ b/src/libmime/message.c
@@ -52,293 +52,6 @@ rspamd_message_quark (void)
return g_quark_from_static_string ("mime-error");
}
-static void
-append_raw_header (struct rspamd_task *task,
- GHashTable *target, struct raw_header *rh)
-{
- GPtrArray *ar;
-
- if ((ar = g_hash_table_lookup (target, rh->name)) != NULL) {
- g_ptr_array_add (ar, rh);
- msg_debug_task ("append raw header %s: %s", rh->name, rh->value);
- }
- else {
- ar = g_ptr_array_sized_new (2);
- g_ptr_array_add (ar, rh);
- g_hash_table_insert (target, rh->name, ar);
- msg_debug_task ("add new raw header %s: %s", rh->name, rh->value);
- }
-}
-
-/* Convert raw headers to a list of struct raw_header * */
-static void
-process_raw_headers (struct rspamd_task *task, GHashTable *target,
- const gchar *in, gsize len)
-{
- struct raw_header *new = NULL;
- const gchar *p, *c, *end;
- gchar *tmp, *tp;
- gint state = 0, l, next_state = 100, err_state = 100, t_state;
- gboolean valid_folding = FALSE;
- guint nlines_count[RSPAMD_TASK_NEWLINES_MAX];
-
- p = in;
- end = p + len;
- c = p;
- memset (nlines_count, 0, sizeof (nlines_count));
- msg_debug_task ("start processing headers");
-
- while (p < end) {
- /* FSM for processing headers */
- switch (state) {
- case 0:
- /* Begin processing headers */
- if (!g_ascii_isalpha (*p)) {
- /* We have some garbage at the beginning of headers, skip this line */
- state = 100;
- next_state = 0;
- }
- else {
- state = 1;
- c = p;
- }
- break;
- case 1:
- /* We got something like header's name */
- if (*p == ':') {
- new =
- rspamd_mempool_alloc0 (task->task_pool,
- sizeof (struct raw_header));
- l = p - c;
- tmp = rspamd_mempool_alloc (task->task_pool, l + 1);
- rspamd_strlcpy (tmp, c, l + 1);
- new->name = tmp;
- new->empty_separator = TRUE;
- new->raw_value = c;
- new->raw_len = p - c; /* Including trailing ':' */
- p++;
- state = 2;
- c = p;
- }
- else if (g_ascii_isspace (*p)) {
- /* Not header but some garbage */
- task->flags |= RSPAMD_TASK_FLAG_BROKEN_HEADERS;
- state = 100;
- next_state = 0;
- }
- else {
- p++;
- }
- break;
- case 2:
- /* We got header's name, so skip any \t or spaces */
- if (*p == '\t') {
- new->tab_separated = TRUE;
- new->empty_separator = FALSE;
- p++;
- }
- else if (*p == ' ') {
- new->empty_separator = FALSE;
- p++;
- }
- else if (*p == '\n' || *p == '\r') {
-
- if (*p == '\n') {
- nlines_count[RSPAMD_TASK_NEWLINES_LF] ++;
- }
- else if (*(p + 1) == '\n') {
- nlines_count[RSPAMD_TASK_NEWLINES_CRLF] ++;
- }
- else {
- nlines_count[RSPAMD_TASK_NEWLINES_CR] ++;
- }
-
- /* Process folding */
- state = 99;
- l = p - c;
- if (l > 0) {
- tmp = rspamd_mempool_alloc (task->task_pool, l + 1);
- rspamd_strlcpy (tmp, c, l + 1);
- new->separator = tmp;
- }
- next_state = 3;
- err_state = 5;
- c = p;
- }
- else {
- /* Process value */
- l = p - c;
- if (l >= 0) {
- tmp = rspamd_mempool_alloc (task->task_pool, l + 1);
- rspamd_strlcpy (tmp, c, l + 1);
- new->separator = tmp;
- }
- c = p;
- state = 3;
- }
- break;
- case 3:
- if (*p == '\r' || *p == '\n') {
- /* Hold folding */
- if (*p == '\n') {
- nlines_count[RSPAMD_TASK_NEWLINES_LF] ++;
- }
- else if (*(p + 1) == '\n') {
- nlines_count[RSPAMD_TASK_NEWLINES_CRLF] ++;
- }
- else {
- nlines_count[RSPAMD_TASK_NEWLINES_CR] ++;
- }
- state = 99;
- next_state = 3;
- err_state = 4;
- }
- else if (p + 1 == end) {
- state = 4;
- }
- else {
- p++;
- }
- break;
- case 4:
- /* Copy header's value */
- l = p - c;
- tmp = rspamd_mempool_alloc (task->task_pool, l + 1);
- tp = tmp;
- t_state = 0;
- while (l--) {
- if (t_state == 0) {
- /* Before folding */
- if (*c == '\n' || *c == '\r') {
- t_state = 1;
- c++;
- *tp++ = ' ';
- }
- else {
- *tp++ = *c++;
- }
- }
- else if (t_state == 1) {
- /* Inside folding */
- if (g_ascii_isspace (*c)) {
- c++;
- }
- else {
- t_state = 0;
- *tp++ = *c++;
- }
- }
- }
- /* Strip last space that can be added by \r\n parsing */
- if (*(tp - 1) == ' ') {
- tp--;
- }
-
- *tp = '\0';
- /* Strip the initial spaces that could also be added by folding */
- while (*tmp != '\0' && g_ascii_isspace (*tmp)) {
- tmp ++;
- }
-
- if (p + 1 == end) {
- new->raw_len = end - new->raw_value;
- }
- else {
- new->raw_len = p - new->raw_value;
- }
-
- new->value = tmp;
- new->decoded = g_mime_utils_header_decode_text (new->value);
-
- if (new->decoded != NULL) {
- rspamd_mempool_add_destructor (task->task_pool,
- (rspamd_mempool_destruct_t)g_free, new->decoded);
- }
- else {
- new->decoded = "";
- }
-
- append_raw_header (task, target, new);
- state = 0;
- break;
- case 5:
- /* Header has only name, no value */
- new->value = "";
- new->decoded = "";
- append_raw_header (task, target, new);
- state = 0;
- break;
- case 99:
- /* Folding state */
- if (p + 1 == end) {
- state = err_state;
- }
- else {
- if (*p == '\r' || *p == '\n') {
- p++;
- valid_folding = FALSE;
- }
- else if (*p == '\t' || *p == ' ') {
- /* Valid folding */
- p++;
- valid_folding = TRUE;
- }
- else {
- if (valid_folding) {
- debug_task ("go to state: %d->%d", state, next_state);
- state = next_state;
- }
- else {
- /* Fall back */
- debug_task ("go to state: %d->%d", state, err_state);
- state = err_state;
- }
- }
- }
- break;
- case 100:
- /* Fail state, skip line */
-
- if (*p == '\r') {
- if (*(p + 1) == '\n') {
- nlines_count[RSPAMD_TASK_NEWLINES_CRLF] ++;
- p++;
- }
- p++;
- state = next_state;
- }
- else if (*p == '\n') {
- nlines_count[RSPAMD_TASK_NEWLINES_LF] ++;
-
- if (*(p + 1) == '\r') {
- p++;
- }
- p++;
- state = next_state;
- }
- else if (p + 1 == end) {
- state = next_state;
- p++;
- }
- else {
- p++;
- }
- break;
- }
- }
-
- guint max_cnt = 0;
- gint sel = 0;
-
- for (gint i = 0; i < RSPAMD_TASK_NEWLINES_MAX; i ++) {
- if (nlines_count[i] > max_cnt) {
- max_cnt = nlines_count[i];
- sel = i;
- }
- }
-
- task->nlines_type = sel;
-}
static void
free_byte_array_callback (void *pointer)
@@ -1198,8 +911,8 @@ mime_foreach_callback (GMimeObject * part, gpointer user_data)
rspamd_strcase_equal, NULL, rspamd_ptr_array_free_hard);
if (hdrs != NULL) {
- process_raw_headers (task, mime_part->raw_headers,
- hdrs, strlen (hdrs));
+ rspamd_mime_headers_process (task, mime_part->raw_headers,
+ hdrs, strlen (hdrs), FALSE);
mime_part->raw_headers_str = hdrs;
}
@@ -1267,8 +980,8 @@ mime_foreach_callback (GMimeObject * part, gpointer user_data)
rspamd_strcase_equal, NULL, rspamd_ptr_array_free_hard);
if (hdrs != NULL) {
- process_raw_headers (task, mime_part->raw_headers,
- hdrs, strlen (hdrs));
+ rspamd_mime_headers_process (task, mime_part->raw_headers,
+ hdrs, strlen (hdrs), FALSE);
mime_part->raw_headers_str = hdrs;
}
@@ -1538,9 +1251,10 @@ rspamd_message_parse (struct rspamd_task *task)
task->raw_headers_content.body_start = p + body_pos;
if (task->raw_headers_content.len > 0) {
- process_raw_headers (task, task->raw_headers,
+ rspamd_mime_headers_process (task, task->raw_headers,
task->raw_headers_content.begin,
- task->raw_headers_content.len);
+ task->raw_headers_content.len,
+ TRUE);
}
}
}
diff --git a/src/libmime/message.h b/src/libmime/message.h
index 8003d073c..ff7ebafc9 100644
--- a/src/libmime/message.h
+++ b/src/libmime/message.h
@@ -10,6 +10,7 @@
#include "email_addr.h"
#include "addr.h"
#include "cryptobox.h"
+#include "mime_headers.h"
#include <gmime/gmime.h>
struct rspamd_task;
@@ -91,17 +92,6 @@ struct received_header {
enum rspamd_received_type type;
};
-struct raw_header {
- gchar *name;
- gchar *value;
- const gchar *raw_value; /* As it is in the message (unfolded and unparsed) */
- gsize raw_len;
- gboolean tab_separated;
- gboolean empty_separator;
- gchar *separator;
- gchar *decoded;
-};
-
/**
* Parse and pre-process mime message
* @param task worker_task object
diff --git a/src/libmime/mime_headers.c b/src/libmime/mime_headers.c
new file mode 100644
index 000000000..8ba8e6c60
--- /dev/null
+++ b/src/libmime/mime_headers.c
@@ -0,0 +1,312 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mime_headers.h"
+#include "task.h"
+
+static void
+rspamd_mime_header_add (struct rspamd_task *task,
+ GHashTable *target, struct raw_header *rh)
+{
+ GPtrArray *ar;
+
+ if ((ar = g_hash_table_lookup (target, rh->name)) != NULL) {
+ g_ptr_array_add (ar, rh);
+ msg_debug_task ("append raw header %s: %s", rh->name, rh->value);
+ }
+ else {
+ ar = g_ptr_array_sized_new (2);
+ g_ptr_array_add (ar, rh);
+ g_hash_table_insert (target, rh->name, ar);
+ msg_debug_task ("add new raw header %s: %s", rh->name, rh->value);
+ }
+}
+
+/* Convert raw headers to a list of struct raw_header * */
+void
+rspamd_mime_headers_process (struct rspamd_task *task, GHashTable *target,
+ const gchar *in, gsize len, gboolean check_newlines)
+{
+ struct raw_header *new = NULL;
+ const gchar *p, *c, *end;
+ gchar *tmp, *tp;
+ gint state = 0, l, next_state = 100, err_state = 100, t_state;
+ gboolean valid_folding = FALSE;
+ guint nlines_count[RSPAMD_TASK_NEWLINES_MAX];
+
+ p = in;
+ end = p + len;
+ c = p;
+ memset (nlines_count, 0, sizeof (nlines_count));
+ msg_debug_task ("start processing headers");
+
+ while (p < end) {
+ /* FSM for processing headers */
+ switch (state) {
+ case 0:
+ /* Begin processing headers */
+ if (!g_ascii_isalpha (*p)) {
+ /* We have some garbage at the beginning of headers, skip this line */
+ state = 100;
+ next_state = 0;
+ }
+ else {
+ state = 1;
+ c = p;
+ }
+ break;
+ case 1:
+ /* We got something like header's name */
+ if (*p == ':') {
+ new =
+ rspamd_mempool_alloc0 (task->task_pool,
+ sizeof (struct raw_header));
+ l = p - c;
+ tmp = rspamd_mempool_alloc (task->task_pool, l + 1);
+ rspamd_strlcpy (tmp, c, l + 1);
+ new->name = tmp;
+ new->empty_separator = TRUE;
+ new->raw_value = c;
+ new->raw_len = p - c; /* Including trailing ':' */
+ p++;
+ state = 2;
+ c = p;
+ }
+ else if (g_ascii_isspace (*p)) {
+ /* Not header but some garbage */
+ task->flags |= RSPAMD_TASK_FLAG_BROKEN_HEADERS;
+ state = 100;
+ next_state = 0;
+ }
+ else {
+ p++;
+ }
+ break;
+ case 2:
+ /* We got header's name, so skip any \t or spaces */
+ if (*p == '\t') {
+ new->tab_separated = TRUE;
+ new->empty_separator = FALSE;
+ p++;
+ }
+ else if (*p == ' ') {
+ new->empty_separator = FALSE;
+ p++;
+ }
+ else if (*p == '\n' || *p == '\r') {
+
+ if (check_newlines) {
+ if (*p == '\n') {
+ nlines_count[RSPAMD_TASK_NEWLINES_LF] ++;
+ }
+ else if (*(p + 1) == '\n') {
+ nlines_count[RSPAMD_TASK_NEWLINES_CRLF] ++;
+ }
+ else {
+ nlines_count[RSPAMD_TASK_NEWLINES_CR] ++;
+ }
+ }
+
+ /* Process folding */
+ state = 99;
+ l = p - c;
+ if (l > 0) {
+ tmp = rspamd_mempool_alloc (task->task_pool, l + 1);
+ rspamd_strlcpy (tmp, c, l + 1);
+ new->separator = tmp;
+ }
+ next_state = 3;
+ err_state = 5;
+ c = p;
+ }
+ else {
+ /* Process value */
+ l = p - c;
+ if (l >= 0) {
+ tmp = rspamd_mempool_alloc (task->task_pool, l + 1);
+ rspamd_strlcpy (tmp, c, l + 1);
+ new->separator = tmp;
+ }
+ c = p;
+ state = 3;
+ }
+ break;
+ case 3:
+ if (*p == '\r' || *p == '\n') {
+ /* Hold folding */
+ if (check_newlines) {
+ if (*p == '\n') {
+ nlines_count[RSPAMD_TASK_NEWLINES_LF] ++;
+ }
+ else if (*(p + 1) == '\n') {
+ nlines_count[RSPAMD_TASK_NEWLINES_CRLF] ++;
+ }
+ else {
+ nlines_count[RSPAMD_TASK_NEWLINES_CR] ++;
+ }
+ }
+ state = 99;
+ next_state = 3;
+ err_state = 4;
+ }
+ else if (p + 1 == end) {
+ state = 4;
+ }
+ else {
+ p++;
+ }
+ break;
+ case 4:
+ /* Copy header's value */
+ l = p - c;
+ tmp = rspamd_mempool_alloc (task->task_pool, l + 1);
+ tp = tmp;
+ t_state = 0;
+ while (l--) {
+ if (t_state == 0) {
+ /* Before folding */
+ if (*c == '\n' || *c == '\r') {
+ t_state = 1;
+ c++;
+ *tp++ = ' ';
+ }
+ else {
+ *tp++ = *c++;
+ }
+ }
+ else if (t_state == 1) {
+ /* Inside folding */
+ if (g_ascii_isspace (*c)) {
+ c++;
+ }
+ else {
+ t_state = 0;
+ *tp++ = *c++;
+ }
+ }
+ }
+ /* Strip last space that can be added by \r\n parsing */
+ if (*(tp - 1) == ' ') {
+ tp--;
+ }
+
+ *tp = '\0';
+ /* Strip the initial spaces that could also be added by folding */
+ while (*tmp != '\0' && g_ascii_isspace (*tmp)) {
+ tmp ++;
+ }
+
+ if (p + 1 == end) {
+ new->raw_len = end - new->raw_value;
+ }
+ else {
+ new->raw_len = p - new->raw_value;
+ }
+
+ new->value = tmp;
+ new->decoded = g_mime_utils_header_decode_text (new->value);
+
+ if (new->decoded != NULL) {
+ rspamd_mempool_add_destructor (task->task_pool,
+ (rspamd_mempool_destruct_t)g_free, new->decoded);
+ }
+ else {
+ new->decoded = "";
+ }
+
+ rspamd_mime_header_add (task, target, new);
+ state = 0;
+ break;
+ case 5:
+ /* Header has only name, no value */
+ new->value = "";
+ new->decoded = "";
+ rspamd_mime_header_add (task, target, new);
+ state = 0;
+ break;
+ case 99:
+ /* Folding state */
+ if (p + 1 == end) {
+ state = err_state;
+ }
+ else {
+ if (*p == '\r' || *p == '\n') {
+ p++;
+ valid_folding = FALSE;
+ }
+ else if (*p == '\t' || *p == ' ') {
+ /* Valid folding */
+ p++;
+ valid_folding = TRUE;
+ }
+ else {
+ if (valid_folding) {
+ debug_task ("go to state: %d->%d", state, next_state);
+ state = next_state;
+ }
+ else {
+ /* Fall back */
+ debug_task ("go to state: %d->%d", state, err_state);
+ state = err_state;
+ }
+ }
+ }
+ break;
+ case 100:
+ /* Fail state, skip line */
+
+ if (*p == '\r') {
+ if (*(p + 1) == '\n') {
+ nlines_count[RSPAMD_TASK_NEWLINES_CRLF] ++;
+ p++;
+ }
+ p++;
+ state = next_state;
+ }
+ else if (*p == '\n') {
+ nlines_count[RSPAMD_TASK_NEWLINES_LF] ++;
+
+ if (*(p + 1) == '\r') {
+ p++;
+ }
+ p++;
+ state = next_state;
+ }
+ else if (p + 1 == end) {
+ state = next_state;
+ p++;
+ }
+ else {
+ p++;
+ }
+ break;
+ }
+ }
+
+ if (check_newlines) {
+ guint max_cnt = 0;
+ gint sel = 0;
+
+ for (gint i = 0; i < RSPAMD_TASK_NEWLINES_MAX; i ++) {
+ if (nlines_count[i] > max_cnt) {
+ max_cnt = nlines_count[i];
+ sel = i;
+ }
+ }
+
+ task->nlines_type = sel;
+ }
+}
diff --git a/src/libmime/mime_headers.h b/src/libmime/mime_headers.h
new file mode 100644
index 000000000..aa76bed18
--- /dev/null
+++ b/src/libmime/mime_headers.h
@@ -0,0 +1,37 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef SRC_LIBMIME_MIME_HEADERS_H_
+#define SRC_LIBMIME_MIME_HEADERS_H_
+
+#include "config.h"
+
+struct rspamd_task;
+
+struct raw_header {
+ gchar *name;
+ gchar *value;
+ const gchar *raw_value; /* As it is in the message (unfolded and unparsed) */
+ gsize raw_len;
+ gboolean tab_separated;
+ gboolean empty_separator;
+ gchar *separator;
+ gchar *decoded;
+};
+
+void rspamd_mime_headers_process (struct rspamd_task *task, GHashTable *target,
+ const gchar *in, gsize len, gboolean check_newlines);
+
+#endif /* SRC_LIBMIME_MIME_HEADERS_H_ */