aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2016-07-04 16:15:14 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2016-07-04 16:15:14 +0100
commitd05af027b89abf021c1f010cfa655bf48cb17e4d (patch)
tree45c57c0d8253d1139bc9f325419cdd61f495a5fb
parent759fa05d26aeb86fa9005ce39d222c65c45ef787 (diff)
downloadrspamd-d05af027b89abf021c1f010cfa655bf48cb17e4d.tar.gz
rspamd-d05af027b89abf021c1f010cfa655bf48cb17e4d.zip
[Feature] Support archive files list extraction
-rw-r--r--src/libmime/CMakeLists.txt3
-rw-r--r--src/libmime/archives.c174
-rw-r--r--src/libmime/archives.h41
-rw-r--r--src/libmime/message.c2
4 files changed, 219 insertions, 1 deletions
diff --git a/src/libmime/CMakeLists.txt b/src/libmime/CMakeLists.txt
index ac05afd72..a159e126a 100644
--- a/src/libmime/CMakeLists.txt
+++ b/src/libmime/CMakeLists.txt
@@ -6,6 +6,7 @@ SET(LIBRSPAMDMIMESRC
${CMAKE_CURRENT_SOURCE_DIR}/images.c
${CMAKE_CURRENT_SOURCE_DIR}/message.c
${CMAKE_CURRENT_SOURCE_DIR}/smtp_utils.c
- ${CMAKE_CURRENT_SOURCE_DIR}/smtp_proto.c)
+ ${CMAKE_CURRENT_SOURCE_DIR}/smtp_proto.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/archives.c)
SET(RSPAMD_MIME ${LIBRSPAMDMIMESRC} PARENT_SCOPE) \ No newline at end of file
diff --git a/src/libmime/archives.c b/src/libmime/archives.c
new file mode 100644
index 000000000..2de9d7176
--- /dev/null
+++ b/src/libmime/archives.c
@@ -0,0 +1,174 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "config.h"
+#include "message.h"
+#include "task.h"
+#include "archives.h"
+
+static void
+rspamd_archive_dtor (gpointer p)
+{
+ struct rspamd_archive *arch = p;
+ GString *s;
+ guint i;
+
+ for (i = 0; i < arch->files->len; i ++) {
+ s = g_ptr_array_index (arch->files, i);
+
+ g_string_free (s, TRUE);
+ }
+
+ g_ptr_array_free (arch->files, TRUE);
+}
+
+static void
+rspamd_archive_process_zip (struct rspamd_task *task,
+ struct rspamd_mime_part *part)
+{
+ const guchar *p, *start, *end, *eocd = NULL, *cd;
+ const guint32 eocd_magic = 0x06054b50, cd_basic_len = 46;
+ const guchar cd_magic[] = {0x50, 0x4b, 0x01, 0x02};
+ guint32 cd_offset, cd_size;
+ guint16 extra_len, fname_len, comment_len;
+ struct rspamd_archive *arch;
+ GString *fname;
+
+ /* Zip files have interesting data at the end of archive */
+ p = part->content->data + part->content->len - 1;
+ start = part->content->data;
+ end = p;
+
+ /* Search for EOCD:
+ * 22 bytes is a typical size of eocd without a comment and
+ * end points one byte after the last character
+ */
+ p -= 21;
+
+ while (p > start + sizeof (guint32)) {
+ guint32 t;
+
+ /* XXX: not an efficient approach */
+ memcpy (&t, p, sizeof (t));
+
+ if (GUINT32_FROM_LE (t) == eocd_magic) {
+ eocd = p;
+ break;
+ }
+
+ p --;
+ }
+
+
+ if (eocd == NULL) {
+ /* Not a zip file */
+ msg_debug_task ("zip archive is invalid (no EOCD): %s", part->boundary);
+
+ return;
+ }
+
+ if (end - eocd < 21) {
+ msg_debug_task ("zip archive is invalid (short EOCD): %s", part->boundary);
+
+ return;
+ }
+
+
+ memcpy (&cd_size, eocd + 12, sizeof (cd_size));
+ cd_size = GUINT32_FROM_LE (cd_size);
+ memcpy (&cd_offset, eocd + 16, sizeof (cd_offset));
+ cd_offset = GUINT32_FROM_LE (cd_offset);
+
+ /* We need to check sanity as well */
+ if (cd_offset + cd_size != (guint)(eocd - start)) {
+ msg_debug_task ("zip archive is invalid (bad size/offset for CD): %s",
+ part->boundary);
+
+ return;
+ }
+
+ cd = start + cd_offset;
+
+ arch = rspamd_mempool_alloc0 (task->task_pool, sizeof (*arch));
+ arch->files = g_ptr_array_new ();
+ arch->type = RSPAMD_ARCHIVE_ZIP;
+ rspamd_mempool_add_destructor (task->task_pool, rspamd_archive_dtor,
+ arch);
+
+ while (cd < eocd) {
+ /* Read central directory record */
+ if (eocd - cd < cd_basic_len ||
+ memcmp (cd, cd_magic, sizeof (cd_magic)) != 0) {
+ msg_debug_task ("zip archive is invalid (bad cd record): %s",
+ part->boundary);
+
+ return;
+ }
+
+ memcpy (&fname_len, cd + 28, sizeof (fname_len));
+ fname_len = GUINT16_FROM_LE (fname_len);
+ memcpy (&extra_len, cd + 30, sizeof (extra_len));
+ extra_len = GUINT16_FROM_LE (extra_len);
+ memcpy (&comment_len, cd + 32, sizeof (comment_len));
+ comment_len = GUINT16_FROM_LE (comment_len);
+
+ if (cd + fname_len + comment_len + extra_len + cd_basic_len > eocd) {
+ msg_debug_task ("zip archive is invalid (too large cd record): %s",
+ part->boundary);
+
+ return;
+ }
+
+ fname = g_string_new_len (cd + cd_basic_len, fname_len);
+ g_ptr_array_add (arch->files, fname);
+ msg_debug_task ("found file in zip archive: %v", fname);
+
+ cd += fname_len + comment_len + extra_len + cd_basic_len;
+ }
+
+ part->flags |= RSPAMD_MIME_PART_ARCHIVE;
+ part->specific_data = arch;
+}
+
+void
+rspamd_archives_process (struct rspamd_task *task)
+{
+ guint i;
+ struct rspamd_mime_part *part;
+
+ for (i = 0; i < task->parts->len; i ++) {
+ part = g_ptr_array_index (task->parts, i);
+ if (g_mime_content_type_is_type (part->type, "application", "zip") &&
+ part->content->len > 0) {
+ rspamd_archive_process_zip (task, part);
+ }
+ }
+}
+
+
+const gchar *
+rspamd_archive_type_str (enum rspamd_archive_type type)
+{
+ const gchar *ret = "unknown";
+
+ switch (type) {
+ case RSPAMD_ARCHIVE_ZIP:
+ ret = "zip";
+ break;
+ }
+
+ return ret;
+}
diff --git a/src/libmime/archives.h b/src/libmime/archives.h
new file mode 100644
index 000000000..917a37a70
--- /dev/null
+++ b/src/libmime/archives.h
@@ -0,0 +1,41 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef SRC_LIBMIME_ARCHIVES_H_
+#define SRC_LIBMIME_ARCHIVES_H_
+
+#include "config.h"
+
+enum rspamd_archive_type {
+ RSPAMD_ARCHIVE_ZIP,
+};
+
+
+struct rspamd_archive {
+ enum rspamd_archive_type type;
+ GPtrArray *files; /* Array of GStrings */
+};
+
+/**
+ * Process archives from a worker task
+ */
+void rspamd_archives_process (struct rspamd_task *task);
+
+/**
+ * Get textual representation of an archive's type
+ */
+const gchar * rspamd_archive_type_str (enum rspamd_archive_type type);
+
+#endif /* SRC_LIBMIME_ARCHIVES_H_ */
diff --git a/src/libmime/message.c b/src/libmime/message.c
index de2d337d4..c74ed1301 100644
--- a/src/libmime/message.c
+++ b/src/libmime/message.c
@@ -21,6 +21,7 @@
#include "libutil/regexp.h"
#include "html.h"
#include "images.h"
+#include "archives.h"
#include "email_addr.h"
#include "utlist.h"
#include "tokenizers/tokenizers.h"
@@ -1459,6 +1460,7 @@ rspamd_message_parse (struct rspamd_task *task)
}
rspamd_images_process (task);
+ rspamd_archives_process (task);
/* Parse received headers */
first = rspamd_message_get_header (task, "Received", FALSE);