]> source.dussan.org Git - rspamd.git/commitdiff
[Feature] Support archive files list extraction
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Mon, 4 Jul 2016 15:15:14 +0000 (16:15 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Mon, 4 Jul 2016 15:15:14 +0000 (16:15 +0100)
src/libmime/CMakeLists.txt
src/libmime/archives.c [new file with mode: 0644]
src/libmime/archives.h [new file with mode: 0644]
src/libmime/message.c

index ac05afd729164797e1fedcab6e2070d725c3021c..a159e126ae8c245745b62cf2f210c3201f90ce94 100644 (file)
@@ -6,6 +6,7 @@ SET(LIBRSPAMDMIMESRC
                                ${CMAKE_CURRENT_SOURCE_DIR}/images.c
                                ${CMAKE_CURRENT_SOURCE_DIR}/message.c
                                ${CMAKE_CURRENT_SOURCE_DIR}/smtp_utils.c
-                               ${CMAKE_CURRENT_SOURCE_DIR}/smtp_proto.c)
+                               ${CMAKE_CURRENT_SOURCE_DIR}/smtp_proto.c
+                               ${CMAKE_CURRENT_SOURCE_DIR}/archives.c)
 
 SET(RSPAMD_MIME ${LIBRSPAMDMIMESRC} PARENT_SCOPE)
\ No newline at end of file
diff --git a/src/libmime/archives.c b/src/libmime/archives.c
new file mode 100644 (file)
index 0000000..2de9d71
--- /dev/null
@@ -0,0 +1,174 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "config.h"
+#include "message.h"
+#include "task.h"
+#include "archives.h"
+
+static void
+rspamd_archive_dtor (gpointer p)
+{
+       struct rspamd_archive *arch = p;
+       GString *s;
+       guint i;
+
+       for (i = 0; i < arch->files->len; i ++) {
+               s = g_ptr_array_index (arch->files, i);
+
+               g_string_free (s, TRUE);
+       }
+
+       g_ptr_array_free (arch->files, TRUE);
+}
+
+static void
+rspamd_archive_process_zip (struct rspamd_task *task,
+               struct rspamd_mime_part *part)
+{
+       const guchar *p, *start, *end, *eocd = NULL, *cd;
+       const guint32 eocd_magic = 0x06054b50, cd_basic_len = 46;
+       const guchar cd_magic[] = {0x50, 0x4b, 0x01, 0x02};
+       guint32 cd_offset, cd_size;
+       guint16 extra_len, fname_len, comment_len;
+       struct rspamd_archive *arch;
+       GString *fname;
+
+       /* Zip files have interesting data at the end of archive */
+       p = part->content->data + part->content->len - 1;
+       start = part->content->data;
+       end = p;
+
+       /* Search for EOCD:
+        * 22 bytes is a typical size of eocd without a comment and
+        * end points one byte after the last character
+        */
+       p -= 21;
+
+       while (p > start + sizeof (guint32)) {
+               guint32 t;
+
+               /* XXX: not an efficient approach */
+               memcpy (&t, p, sizeof (t));
+
+               if (GUINT32_FROM_LE (t) == eocd_magic) {
+                       eocd = p;
+                       break;
+               }
+
+               p --;
+       }
+
+
+       if (eocd == NULL) {
+               /* Not a zip file */
+               msg_debug_task ("zip archive is invalid (no EOCD): %s", part->boundary);
+
+               return;
+       }
+
+       if (end - eocd < 21) {
+               msg_debug_task ("zip archive is invalid (short EOCD): %s", part->boundary);
+
+               return;
+       }
+
+
+       memcpy (&cd_size, eocd + 12, sizeof (cd_size));
+       cd_size = GUINT32_FROM_LE (cd_size);
+       memcpy (&cd_offset, eocd + 16, sizeof (cd_offset));
+       cd_offset = GUINT32_FROM_LE (cd_offset);
+
+       /* We need to check sanity as well */
+       if (cd_offset + cd_size != (guint)(eocd - start)) {
+               msg_debug_task ("zip archive is invalid (bad size/offset for CD): %s",
+                               part->boundary);
+
+               return;
+       }
+
+       cd = start + cd_offset;
+
+       arch = rspamd_mempool_alloc0 (task->task_pool, sizeof (*arch));
+       arch->files = g_ptr_array_new ();
+       arch->type = RSPAMD_ARCHIVE_ZIP;
+       rspamd_mempool_add_destructor (task->task_pool, rspamd_archive_dtor,
+                       arch);
+
+       while (cd < eocd) {
+               /* Read central directory record */
+               if (eocd - cd < cd_basic_len ||
+                               memcmp (cd, cd_magic, sizeof (cd_magic)) != 0) {
+                       msg_debug_task ("zip archive is invalid (bad cd record): %s",
+                                       part->boundary);
+
+                       return;
+               }
+
+               memcpy (&fname_len, cd + 28, sizeof (fname_len));
+               fname_len = GUINT16_FROM_LE (fname_len);
+               memcpy (&extra_len, cd + 30, sizeof (extra_len));
+               extra_len = GUINT16_FROM_LE (extra_len);
+               memcpy (&comment_len, cd + 32, sizeof (comment_len));
+               comment_len = GUINT16_FROM_LE (comment_len);
+
+               if (cd + fname_len + comment_len + extra_len + cd_basic_len > eocd) {
+                       msg_debug_task ("zip archive is invalid (too large cd record): %s",
+                                       part->boundary);
+
+                       return;
+               }
+
+               fname = g_string_new_len (cd + cd_basic_len, fname_len);
+               g_ptr_array_add (arch->files, fname);
+               msg_debug_task ("found file in zip archive: %v", fname);
+
+               cd += fname_len + comment_len + extra_len + cd_basic_len;
+       }
+
+       part->flags |= RSPAMD_MIME_PART_ARCHIVE;
+       part->specific_data = arch;
+}
+
+void
+rspamd_archives_process (struct rspamd_task *task)
+{
+       guint i;
+       struct rspamd_mime_part *part;
+
+       for (i = 0; i < task->parts->len; i ++) {
+               part = g_ptr_array_index (task->parts, i);
+               if (g_mime_content_type_is_type (part->type, "application", "zip") &&
+                               part->content->len > 0) {
+                       rspamd_archive_process_zip (task, part);
+               }
+       }
+}
+
+
+const gchar *
+rspamd_archive_type_str (enum rspamd_archive_type type)
+{
+       const gchar *ret = "unknown";
+
+       switch (type) {
+       case RSPAMD_ARCHIVE_ZIP:
+               ret = "zip";
+               break;
+       }
+
+       return ret;
+}
diff --git a/src/libmime/archives.h b/src/libmime/archives.h
new file mode 100644 (file)
index 0000000..917a37a
--- /dev/null
@@ -0,0 +1,41 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef SRC_LIBMIME_ARCHIVES_H_
+#define SRC_LIBMIME_ARCHIVES_H_
+
+#include "config.h"
+
+enum rspamd_archive_type {
+       RSPAMD_ARCHIVE_ZIP,
+};
+
+
+struct rspamd_archive {
+       enum rspamd_archive_type type;
+       GPtrArray *files; /* Array of GStrings */
+};
+
+/**
+ * Process archives from a worker task
+ */
+void rspamd_archives_process (struct rspamd_task *task);
+
+/**
+ * Get textual representation of an archive's type
+ */
+const gchar * rspamd_archive_type_str (enum rspamd_archive_type type);
+
+#endif /* SRC_LIBMIME_ARCHIVES_H_ */
index de2d337d445f25e4273bad2a89489c0b76b6d237..c74ed1301f81ae510010fe35a627d742ce1a5f78 100644 (file)
@@ -21,6 +21,7 @@
 #include "libutil/regexp.h"
 #include "html.h"
 #include "images.h"
+#include "archives.h"
 #include "email_addr.h"
 #include "utlist.h"
 #include "tokenizers/tokenizers.h"
@@ -1459,6 +1460,7 @@ rspamd_message_parse (struct rspamd_task *task)
        }
 
        rspamd_images_process (task);
+       rspamd_archives_process (task);
 
        /* Parse received headers */
        first = rspamd_message_get_header (task, "Received", FALSE);