From d05af027b89abf021c1f010cfa655bf48cb17e4d Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Mon, 4 Jul 2016 16:15:14 +0100 Subject: [PATCH] [Feature] Support archive files list extraction --- src/libmime/CMakeLists.txt | 3 +- src/libmime/archives.c | 174 +++++++++++++++++++++++++++++++++++++ src/libmime/archives.h | 41 +++++++++ src/libmime/message.c | 2 + 4 files changed, 219 insertions(+), 1 deletion(-) create mode 100644 src/libmime/archives.c create mode 100644 src/libmime/archives.h diff --git a/src/libmime/CMakeLists.txt b/src/libmime/CMakeLists.txt index ac05afd72..a159e126a 100644 --- a/src/libmime/CMakeLists.txt +++ b/src/libmime/CMakeLists.txt @@ -6,6 +6,7 @@ SET(LIBRSPAMDMIMESRC ${CMAKE_CURRENT_SOURCE_DIR}/images.c ${CMAKE_CURRENT_SOURCE_DIR}/message.c ${CMAKE_CURRENT_SOURCE_DIR}/smtp_utils.c - ${CMAKE_CURRENT_SOURCE_DIR}/smtp_proto.c) + ${CMAKE_CURRENT_SOURCE_DIR}/smtp_proto.c + ${CMAKE_CURRENT_SOURCE_DIR}/archives.c) SET(RSPAMD_MIME ${LIBRSPAMDMIMESRC} PARENT_SCOPE) \ No newline at end of file diff --git a/src/libmime/archives.c b/src/libmime/archives.c new file mode 100644 index 000000000..2de9d7176 --- /dev/null +++ b/src/libmime/archives.c @@ -0,0 +1,174 @@ +/*- + * Copyright 2016 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "config.h" +#include "message.h" +#include "task.h" +#include "archives.h" + +static void +rspamd_archive_dtor (gpointer p) +{ + struct rspamd_archive *arch = p; + GString *s; + guint i; + + for (i = 0; i < arch->files->len; i ++) { + s = g_ptr_array_index (arch->files, i); + + g_string_free (s, TRUE); + } + + g_ptr_array_free (arch->files, TRUE); +} + +static void +rspamd_archive_process_zip (struct rspamd_task *task, + struct rspamd_mime_part *part) +{ + const guchar *p, *start, *end, *eocd = NULL, *cd; + const guint32 eocd_magic = 0x06054b50, cd_basic_len = 46; + const guchar cd_magic[] = {0x50, 0x4b, 0x01, 0x02}; + guint32 cd_offset, cd_size; + guint16 extra_len, fname_len, comment_len; + struct rspamd_archive *arch; + GString *fname; + + /* Zip files have interesting data at the end of archive */ + p = part->content->data + part->content->len - 1; + start = part->content->data; + end = p; + + /* Search for EOCD: + * 22 bytes is a typical size of eocd without a comment and + * end points one byte after the last character + */ + p -= 21; + + while (p > start + sizeof (guint32)) { + guint32 t; + + /* XXX: not an efficient approach */ + memcpy (&t, p, sizeof (t)); + + if (GUINT32_FROM_LE (t) == eocd_magic) { + eocd = p; + break; + } + + p --; + } + + + if (eocd == NULL) { + /* Not a zip file */ + msg_debug_task ("zip archive is invalid (no EOCD): %s", part->boundary); + + return; + } + + if (end - eocd < 21) { + msg_debug_task ("zip archive is invalid (short EOCD): %s", part->boundary); + + return; + } + + + memcpy (&cd_size, eocd + 12, sizeof (cd_size)); + cd_size = GUINT32_FROM_LE (cd_size); + memcpy (&cd_offset, eocd + 16, sizeof (cd_offset)); + cd_offset = GUINT32_FROM_LE (cd_offset); + + /* We need to check sanity as well */ + if (cd_offset + cd_size != (guint)(eocd - start)) { + msg_debug_task ("zip archive is invalid (bad size/offset for CD): %s", + part->boundary); + + return; + } + + cd = start + cd_offset; + + arch = rspamd_mempool_alloc0 (task->task_pool, sizeof (*arch)); + arch->files = g_ptr_array_new (); + arch->type = RSPAMD_ARCHIVE_ZIP; + rspamd_mempool_add_destructor (task->task_pool, rspamd_archive_dtor, + arch); + + while (cd < eocd) { + /* Read central directory record */ + if (eocd - cd < cd_basic_len || + memcmp (cd, cd_magic, sizeof (cd_magic)) != 0) { + msg_debug_task ("zip archive is invalid (bad cd record): %s", + part->boundary); + + return; + } + + memcpy (&fname_len, cd + 28, sizeof (fname_len)); + fname_len = GUINT16_FROM_LE (fname_len); + memcpy (&extra_len, cd + 30, sizeof (extra_len)); + extra_len = GUINT16_FROM_LE (extra_len); + memcpy (&comment_len, cd + 32, sizeof (comment_len)); + comment_len = GUINT16_FROM_LE (comment_len); + + if (cd + fname_len + comment_len + extra_len + cd_basic_len > eocd) { + msg_debug_task ("zip archive is invalid (too large cd record): %s", + part->boundary); + + return; + } + + fname = g_string_new_len (cd + cd_basic_len, fname_len); + g_ptr_array_add (arch->files, fname); + msg_debug_task ("found file in zip archive: %v", fname); + + cd += fname_len + comment_len + extra_len + cd_basic_len; + } + + part->flags |= RSPAMD_MIME_PART_ARCHIVE; + part->specific_data = arch; +} + +void +rspamd_archives_process (struct rspamd_task *task) +{ + guint i; + struct rspamd_mime_part *part; + + for (i = 0; i < task->parts->len; i ++) { + part = g_ptr_array_index (task->parts, i); + if (g_mime_content_type_is_type (part->type, "application", "zip") && + part->content->len > 0) { + rspamd_archive_process_zip (task, part); + } + } +} + + +const gchar * +rspamd_archive_type_str (enum rspamd_archive_type type) +{ + const gchar *ret = "unknown"; + + switch (type) { + case RSPAMD_ARCHIVE_ZIP: + ret = "zip"; + break; + } + + return ret; +} diff --git a/src/libmime/archives.h b/src/libmime/archives.h new file mode 100644 index 000000000..917a37a70 --- /dev/null +++ b/src/libmime/archives.h @@ -0,0 +1,41 @@ +/*- + * Copyright 2016 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef SRC_LIBMIME_ARCHIVES_H_ +#define SRC_LIBMIME_ARCHIVES_H_ + +#include "config.h" + +enum rspamd_archive_type { + RSPAMD_ARCHIVE_ZIP, +}; + + +struct rspamd_archive { + enum rspamd_archive_type type; + GPtrArray *files; /* Array of GStrings */ +}; + +/** + * Process archives from a worker task + */ +void rspamd_archives_process (struct rspamd_task *task); + +/** + * Get textual representation of an archive's type + */ +const gchar * rspamd_archive_type_str (enum rspamd_archive_type type); + +#endif /* SRC_LIBMIME_ARCHIVES_H_ */ diff --git a/src/libmime/message.c b/src/libmime/message.c index de2d337d4..c74ed1301 100644 --- a/src/libmime/message.c +++ b/src/libmime/message.c @@ -21,6 +21,7 @@ #include "libutil/regexp.h" #include "html.h" #include "images.h" +#include "archives.h" #include "email_addr.h" #include "utlist.h" #include "tokenizers/tokenizers.h" @@ -1459,6 +1460,7 @@ rspamd_message_parse (struct rspamd_task *task) } rspamd_images_process (task); + rspamd_archives_process (task); /* Parse received headers */ first = rspamd_message_get_header (task, "Received", FALSE); -- 2.39.5