]> source.dussan.org Git - rspamd.git/commitdiff
[Feature] Store more information about compressed files
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 5 Jul 2016 09:12:43 +0000 (10:12 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 5 Jul 2016 09:12:43 +0000 (10:12 +0100)
src/libmime/archives.c
src/libmime/archives.h
src/lua/lua_mimepart.c
src/lua/lua_task.c

index 3cd4079fa26dcd45058aca67bb8cfed8c2407982..2a38a3641e22db560c1a15629675f50e07fed97d 100644 (file)
@@ -23,13 +23,16 @@ static void
 rspamd_archive_dtor (gpointer p)
 {
        struct rspamd_archive *arch = p;
-       GString *s;
+       struct rspamd_archive_file *f;
        guint i;
 
        for (i = 0; i < arch->files->len; i ++) {
-               s = g_ptr_array_index (arch->files, i);
+               f = g_ptr_array_index (arch->files, i);
 
-               g_string_free (s, TRUE);
+               if (f->fname) {
+                       g_string_free (f->fname, TRUE);
+               }
+               g_slice_free1 (sizeof (*f), f);
        }
 
        g_ptr_array_free (arch->files, TRUE);
@@ -42,10 +45,10 @@ rspamd_archive_process_zip (struct rspamd_task *task,
        const guchar *p, *start, *end, *eocd = NULL, *cd;
        const guint32 eocd_magic = 0x06054b50, cd_basic_len = 46;
        const guchar cd_magic[] = {0x50, 0x4b, 0x01, 0x02};
-       guint32 cd_offset, cd_size;
+       guint32 cd_offset, cd_size, comp_size, uncomp_size;
        guint16 extra_len, fname_len, comment_len;
        struct rspamd_archive *arch;
-       GString *fname;
+       struct rspamd_archive_file *f;
 
        /* Zip files have interesting data at the end of archive */
        p = part->content->data + part->content->len - 1;
@@ -118,6 +121,10 @@ rspamd_archive_process_zip (struct rspamd_task *task,
                        return;
                }
 
+               memcpy (&comp_size, cd + 20, sizeof (guint32));
+               comp_size = GUINT32_FROM_LE (comp_size);
+               memcpy (&uncomp_size, cd + 24, sizeof (guint32));
+               uncomp_size = GUINT32_FROM_LE (uncomp_size);
                memcpy (&fname_len, cd + 28, sizeof (fname_len));
                fname_len = GUINT16_FROM_LE (fname_len);
                memcpy (&extra_len, cd + 30, sizeof (extra_len));
@@ -132,9 +139,12 @@ rspamd_archive_process_zip (struct rspamd_task *task,
                        return;
                }
 
-               fname = g_string_new_len (cd + cd_basic_len, fname_len);
-               g_ptr_array_add (arch->files, fname);
-               msg_debug_task ("found file in zip archive: %v", fname);
+               f = g_slice_alloc0 (sizeof (*f));
+               f->fname = g_string_new_len (cd + cd_basic_len, fname_len);
+               f->compressed_size = comp_size;
+               f->uncompressed_size = uncomp_size;
+               g_ptr_array_add (arch->files, f);
+               msg_debug_task ("found file in zip archive: %v", f->fname);
 
                cd += fname_len + comment_len + extra_len + cd_basic_len;
        }
@@ -242,8 +252,9 @@ rspamd_archive_process_rar_v4 (struct rspamd_task *task, const guchar *start,
        const guchar *p = start, *start_section;
        guint8 type;
        guint flags;
-       guint64 sz;
+       guint64 sz, comp_sz, uncomp_sz;
        struct rspamd_archive *arch;
+       struct rspamd_archive_file *f;
 
        arch = rspamd_mempool_alloc0 (task->task_pool, sizeof (*arch));
        arch->files = g_ptr_array_new ();
@@ -275,6 +286,8 @@ rspamd_archive_process_rar_v4 (struct rspamd_task *task, const guchar *start,
 
                        RAR_READ_UINT32 (tmp);
                        sz += tmp;
+                       /* This is also used as PACK_SIZE */
+                       comp_sz = tmp;
                }
 
                if (sz == 0) {
@@ -287,11 +300,12 @@ rspamd_archive_process_rar_v4 (struct rspamd_task *task, const guchar *start,
 
                if (type == 0x74) {
                        guint fname_len;
-                       GString *s;
 
                        /* File header */
+                       /* Uncompressed size */
+                       RAR_READ_UINT32 (uncomp_sz);
                        /* Skip to NAME_SIZE element */
-                       RAR_SKIP_BYTES (15);
+                       RAR_SKIP_BYTES (11);
                        RAR_READ_UINT16 (fname_len);
 
                        if (fname_len == 0 || fname_len > (gsize)(end - p)) {
@@ -309,10 +323,14 @@ rspamd_archive_process_rar_v4 (struct rspamd_task *task, const guchar *start,
 
                                RAR_READ_UINT32 (tmp);
                                sz += tmp;
+                               comp_sz += tmp;
                                /* HIGH_UNP_SIZE  */
-                               RAR_SKIP_BYTES (4);
+                               RAR_READ_UINT32 (tmp);
+                               uncomp_sz += tmp;
                        }
 
+                       f = g_slice_alloc0 (sizeof (*f));
+
                        if (flags & 0x200) {
                                /* We have unicode + normal version */
                                guchar *tmp;
@@ -321,18 +339,25 @@ rspamd_archive_process_rar_v4 (struct rspamd_task *task, const guchar *start,
 
                                if (tmp != NULL) {
                                        /* Just use ASCII version */
-                                       s = g_string_new_len (p, tmp - p);
+                                       f->fname = g_string_new_len (p, tmp - p);
                                }
                                else {
                                        /* We have UTF8 filename, use it as is */
-                                       s = g_string_new_len (p, fname_len);
+                                       f->fname = g_string_new_len (p, fname_len);
                                }
                        }
                        else {
-                               s = g_string_new_len (p, fname_len);
+                               f->fname = g_string_new_len (p, fname_len);
+                       }
+
+                       f->compressed_size = comp_sz;
+                       f->uncompressed_size = uncomp_sz;
+
+                       if (flags & 0x4) {
+                               f->flags |= RSPAMD_ARCHIVE_FILE_ENCRYPTED;
                        }
 
-                       g_ptr_array_add (arch->files, s);
+                       g_ptr_array_add (arch->files, f);
                }
 
                p = start_section;
@@ -355,8 +380,9 @@ rspamd_archive_process_rar (struct rspamd_task *task,
                        rar_v4_magic[] = {0x52, 0x61, 0x72, 0x21, 0x1A, 0x07, 0x00};
        const guint rar_encrypted_header = 4, rar_main_header = 1,
                        rar_file_header = 2;
-       guint64 vint, sz;
+       guint64 vint, sz, comp_sz = 0, uncomp_sz = 0;
        struct rspamd_archive *arch;
+       struct rspamd_archive_file *f;
        gint r;
 
        p = part->content->data;
@@ -383,6 +409,7 @@ rspamd_archive_process_rar (struct rspamd_task *task,
                return;
        }
 
+       /* Rar v5 format */
        arch = rspamd_mempool_alloc0 (task->task_pool, sizeof (*arch));
        arch->files = g_ptr_array_new ();
        arch->type = RSPAMD_ARCHIVE_RAR;
@@ -438,7 +465,6 @@ rspamd_archive_process_rar (struct rspamd_task *task,
                        /* We have a file header, go forward */
                        const guchar *section_type_start = p;
                        guint64 flags, fname_len;
-                       GString *s;
 
                        p += r; /* Remain from type */
                        /* Header flags */
@@ -451,8 +477,9 @@ rspamd_archive_process_rar (struct rspamd_task *task,
                                RAR_READ_VINT_SKIP ();
                        }
                        if (flags & 0x2) {
-                               /* Data size */
+                               /* Data size - compressed size */
                                RAR_READ_VINT_SKIP ();
+                               comp_sz = vint;
                        }
 
                        /* File flags */
@@ -462,6 +489,7 @@ rspamd_archive_process_rar (struct rspamd_task *task,
 
                        /* Unpacked size */
                        RAR_READ_VINT_SKIP ();
+                       uncomp_sz = vint;
                        /* Attributes */
                        RAR_READ_VINT_SKIP ();
 
@@ -488,8 +516,11 @@ rspamd_archive_process_rar (struct rspamd_task *task,
                                return;
                        }
 
-                       s = g_string_new_len (p, fname_len);
-                       g_ptr_array_add (arch->files, s);
+                       f = g_slice_alloc0 (sizeof (*f));
+                       f->uncompressed_size = uncomp_sz;
+                       f->compressed_size = comp_sz;
+                       f->fname = g_string_new_len (p, fname_len);
+                       g_ptr_array_add (arch->files, f);
                        /* Restore p to the beginning of the header */
                        p = section_type_start;
                        RAR_SKIP_BYTES (sz);
index 87caeced18f1249297d25567f47e469fc7c5d535..d6d47448677c00f35c8ab4f762856c8bd42faa9a 100644 (file)
@@ -27,12 +27,23 @@ enum rspamd_archive_flags {
        RSPAMD_ARCHIVE_ENCRYPTED = (1 << 0),
 };
 
+enum rspamd_archive_file_flags {
+       RSPAMD_ARCHIVE_FILE_ENCRYPTED = (1 << 0),
+};
+
+struct rspamd_archive_file {
+       GString *fname;
+       gsize compressed_size;
+       gsize uncompressed_size;
+       enum rspamd_archive_file_flags flags;
+};
+
 struct rspamd_archive {
        enum rspamd_archive_type type;
        const gchar *archive_name;
        gsize size;
        enum rspamd_archive_flags flags;
-       GPtrArray *files; /* Array of GStrings */
+       GPtrArray *files; /* Array of struct rspamd_archive_file */
 };
 
 /**
index a9f6164588c1cfa9c3264cbcd987a29a518e3d0c..39ab9c8a30903931a8683ceabb1352b69f0f28bf 100644 (file)
@@ -240,7 +240,9 @@ LUA_FUNCTION_DEF (mimepart, is_archive);
  * the following methods:
  *
  * * `get_files` - return list of strings with filenames inside archive
- * * `get_type` - return string representation of archive's type (e.g. 'zip')
+ * * `get_files_full` - return list of tables with all information about files
+ * * `is_encrypted` - return true if an archive is encrypted
+ * * `get_type` - return string representation of image's type (e.g. 'zip')
  * * `get_filename` - return string with archive's file name
  * * `get_size` - return size in bytes
  * @return {rspamd_archive} archive structure or nil if a part is not an archive
index f933c5a979ab41cb2c57952cf39a5c2470268bbf..29f2f104691551a44ecb50b80e712ff82c7505c5 100644 (file)
@@ -395,6 +395,8 @@ LUA_FUNCTION_DEF (task, get_images);
  * Each archive has the following methods available:
  *
  * * `get_files` - return list of strings with filenames inside archive
+ * * `get_files_full` - return list of tables with all information about files
+ * * `is_encrypted` - return true if an archive is encrypted
  * * `get_type` - return string representation of image's type (e.g. 'zip')
  * * `get_filename` - return string with archive's file name
  * * `get_size` - return size in bytes
@@ -718,12 +720,16 @@ static const struct luaL_reg imagelib_m[] = {
 /* Archive methods */
 LUA_FUNCTION_DEF (archive, get_type);
 LUA_FUNCTION_DEF (archive, get_files);
+LUA_FUNCTION_DEF (archive, get_files_full);
+LUA_FUNCTION_DEF (archive, is_encrypted);
 LUA_FUNCTION_DEF (archive, get_filename);
 LUA_FUNCTION_DEF (archive, get_size);
 
 static const struct luaL_reg archivelib_m[] = {
        LUA_INTERFACE_DEF (archive, get_type),
        LUA_INTERFACE_DEF (archive, get_files),
+       LUA_INTERFACE_DEF (archive, get_files_full),
+       LUA_INTERFACE_DEF (archive, is_encrypted),
        LUA_INTERFACE_DEF (archive, get_filename),
        LUA_INTERFACE_DEF (archive, get_size),
        {"__tostring", rspamd_lua_class_tostring},
@@ -3175,15 +3181,15 @@ lua_archive_get_files (lua_State *L)
 {
        struct rspamd_archive *arch = lua_check_archive (L);
        guint i;
-       GString *s;
+       struct rspamd_archive_file *f;
 
        if (arch != NULL) {
                lua_createtable (L, arch->files->len, 0);
 
                for (i = 0; i < arch->files->len; i ++) {
-                       s = g_ptr_array_index (arch->files, i);
+                       f = g_ptr_array_index (arch->files, i);
 
-                       lua_pushlstring (L, s->str, s->len);
+                       lua_pushlstring (L, f->fname->str, f->fname->len);
                        lua_rawseti (L, -2, i + 1);
                }
        }
@@ -3194,6 +3200,62 @@ lua_archive_get_files (lua_State *L)
        return 1;
 }
 
+static gint
+lua_archive_get_files_full (lua_State *L)
+{
+       struct rspamd_archive *arch = lua_check_archive (L);
+       guint i;
+       struct rspamd_archive_file *f;
+
+       if (arch != NULL) {
+               lua_createtable (L, arch->files->len, 0);
+
+               for (i = 0; i < arch->files->len; i ++) {
+                       f = g_ptr_array_index (arch->files, i);
+
+                       lua_createtable (L, 0, 4);
+
+                       lua_pushstring (L, "name");
+                       lua_pushlstring (L, f->fname->str, f->fname->len);
+                       lua_settable (L, -3);
+
+                       lua_pushstring (L, "compressed_size");
+                       lua_pushnumber (L, f->compressed_size);
+                       lua_settable (L, -3);
+
+                       lua_pushstring (L, "uncompressed_size");
+                       lua_pushnumber (L, f->uncompressed_size);
+                       lua_settable (L, -3);
+
+                       lua_pushstring (L, "encrypted");
+                       lua_pushboolean (L, (f->flags & RSPAMD_ARCHIVE_FILE_ENCRYPTED) ? true : false);
+                       lua_settable (L, -3);
+
+                       lua_rawseti (L, -2, i + 1);
+               }
+       }
+       else {
+               return luaL_error (L, "invalid arguments");
+       }
+
+       return 1;
+}
+
+static gint
+lua_archive_is_encrypted (lua_State *L)
+{
+       struct rspamd_archive *arch = lua_check_archive (L);
+
+       if (arch != NULL) {
+               lua_pushboolean (L, (arch->flags & RSPAMD_ARCHIVE_ENCRYPTED) ? true : false);
+       }
+       else {
+               return luaL_error (L, "invalid arguments");
+       }
+
+       return 1;
+}
+
 static gint
 lua_archive_get_size (lua_State *L)
 {