From ef214b9c5496e897507849bfd3238f3c04481847 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Tue, 5 Jul 2016 10:12:43 +0100 Subject: [PATCH] [Feature] Store more information about compressed files --- src/libmime/archives.c | 73 ++++++++++++++++++++++++++++++------------ src/libmime/archives.h | 13 +++++++- src/lua/lua_mimepart.c | 4 ++- src/lua/lua_task.c | 68 +++++++++++++++++++++++++++++++++++++-- 4 files changed, 132 insertions(+), 26 deletions(-) diff --git a/src/libmime/archives.c b/src/libmime/archives.c index 3cd4079fa..2a38a3641 100644 --- a/src/libmime/archives.c +++ b/src/libmime/archives.c @@ -23,13 +23,16 @@ static void rspamd_archive_dtor (gpointer p) { struct rspamd_archive *arch = p; - GString *s; + struct rspamd_archive_file *f; guint i; for (i = 0; i < arch->files->len; i ++) { - s = g_ptr_array_index (arch->files, i); + f = g_ptr_array_index (arch->files, i); - g_string_free (s, TRUE); + if (f->fname) { + g_string_free (f->fname, TRUE); + } + g_slice_free1 (sizeof (*f), f); } g_ptr_array_free (arch->files, TRUE); @@ -42,10 +45,10 @@ rspamd_archive_process_zip (struct rspamd_task *task, const guchar *p, *start, *end, *eocd = NULL, *cd; const guint32 eocd_magic = 0x06054b50, cd_basic_len = 46; const guchar cd_magic[] = {0x50, 0x4b, 0x01, 0x02}; - guint32 cd_offset, cd_size; + guint32 cd_offset, cd_size, comp_size, uncomp_size; guint16 extra_len, fname_len, comment_len; struct rspamd_archive *arch; - GString *fname; + struct rspamd_archive_file *f; /* Zip files have interesting data at the end of archive */ p = part->content->data + part->content->len - 1; @@ -118,6 +121,10 @@ rspamd_archive_process_zip (struct rspamd_task *task, return; } + memcpy (&comp_size, cd + 20, sizeof (guint32)); + comp_size = GUINT32_FROM_LE (comp_size); + memcpy (&uncomp_size, cd + 24, sizeof (guint32)); + uncomp_size = GUINT32_FROM_LE (uncomp_size); memcpy (&fname_len, cd + 28, sizeof (fname_len)); fname_len = GUINT16_FROM_LE (fname_len); memcpy (&extra_len, cd + 30, sizeof (extra_len)); @@ -132,9 +139,12 @@ rspamd_archive_process_zip (struct rspamd_task *task, return; } - fname = g_string_new_len (cd + cd_basic_len, fname_len); - g_ptr_array_add (arch->files, fname); - msg_debug_task ("found file in zip archive: %v", fname); + f = g_slice_alloc0 (sizeof (*f)); + f->fname = g_string_new_len (cd + cd_basic_len, fname_len); + f->compressed_size = comp_size; + f->uncompressed_size = uncomp_size; + g_ptr_array_add (arch->files, f); + msg_debug_task ("found file in zip archive: %v", f->fname); cd += fname_len + comment_len + extra_len + cd_basic_len; } @@ -242,8 +252,9 @@ rspamd_archive_process_rar_v4 (struct rspamd_task *task, const guchar *start, const guchar *p = start, *start_section; guint8 type; guint flags; - guint64 sz; + guint64 sz, comp_sz, uncomp_sz; struct rspamd_archive *arch; + struct rspamd_archive_file *f; arch = rspamd_mempool_alloc0 (task->task_pool, sizeof (*arch)); arch->files = g_ptr_array_new (); @@ -275,6 +286,8 @@ rspamd_archive_process_rar_v4 (struct rspamd_task *task, const guchar *start, RAR_READ_UINT32 (tmp); sz += tmp; + /* This is also used as PACK_SIZE */ + comp_sz = tmp; } if (sz == 0) { @@ -287,11 +300,12 @@ rspamd_archive_process_rar_v4 (struct rspamd_task *task, const guchar *start, if (type == 0x74) { guint fname_len; - GString *s; /* File header */ + /* Uncompressed size */ + RAR_READ_UINT32 (uncomp_sz); /* Skip to NAME_SIZE element */ - RAR_SKIP_BYTES (15); + RAR_SKIP_BYTES (11); RAR_READ_UINT16 (fname_len); if (fname_len == 0 || fname_len > (gsize)(end - p)) { @@ -309,10 +323,14 @@ rspamd_archive_process_rar_v4 (struct rspamd_task *task, const guchar *start, RAR_READ_UINT32 (tmp); sz += tmp; + comp_sz += tmp; /* HIGH_UNP_SIZE */ - RAR_SKIP_BYTES (4); + RAR_READ_UINT32 (tmp); + uncomp_sz += tmp; } + f = g_slice_alloc0 (sizeof (*f)); + if (flags & 0x200) { /* We have unicode + normal version */ guchar *tmp; @@ -321,18 +339,25 @@ rspamd_archive_process_rar_v4 (struct rspamd_task *task, const guchar *start, if (tmp != NULL) { /* Just use ASCII version */ - s = g_string_new_len (p, tmp - p); + f->fname = g_string_new_len (p, tmp - p); } else { /* We have UTF8 filename, use it as is */ - s = g_string_new_len (p, fname_len); + f->fname = g_string_new_len (p, fname_len); } } else { - s = g_string_new_len (p, fname_len); + f->fname = g_string_new_len (p, fname_len); + } + + f->compressed_size = comp_sz; + f->uncompressed_size = uncomp_sz; + + if (flags & 0x4) { + f->flags |= RSPAMD_ARCHIVE_FILE_ENCRYPTED; } - g_ptr_array_add (arch->files, s); + g_ptr_array_add (arch->files, f); } p = start_section; @@ -355,8 +380,9 @@ rspamd_archive_process_rar (struct rspamd_task *task, rar_v4_magic[] = {0x52, 0x61, 0x72, 0x21, 0x1A, 0x07, 0x00}; const guint rar_encrypted_header = 4, rar_main_header = 1, rar_file_header = 2; - guint64 vint, sz; + guint64 vint, sz, comp_sz = 0, uncomp_sz = 0; struct rspamd_archive *arch; + struct rspamd_archive_file *f; gint r; p = part->content->data; @@ -383,6 +409,7 @@ rspamd_archive_process_rar (struct rspamd_task *task, return; } + /* Rar v5 format */ arch = rspamd_mempool_alloc0 (task->task_pool, sizeof (*arch)); arch->files = g_ptr_array_new (); arch->type = RSPAMD_ARCHIVE_RAR; @@ -438,7 +465,6 @@ rspamd_archive_process_rar (struct rspamd_task *task, /* We have a file header, go forward */ const guchar *section_type_start = p; guint64 flags, fname_len; - GString *s; p += r; /* Remain from type */ /* Header flags */ @@ -451,8 +477,9 @@ rspamd_archive_process_rar (struct rspamd_task *task, RAR_READ_VINT_SKIP (); } if (flags & 0x2) { - /* Data size */ + /* Data size - compressed size */ RAR_READ_VINT_SKIP (); + comp_sz = vint; } /* File flags */ @@ -462,6 +489,7 @@ rspamd_archive_process_rar (struct rspamd_task *task, /* Unpacked size */ RAR_READ_VINT_SKIP (); + uncomp_sz = vint; /* Attributes */ RAR_READ_VINT_SKIP (); @@ -488,8 +516,11 @@ rspamd_archive_process_rar (struct rspamd_task *task, return; } - s = g_string_new_len (p, fname_len); - g_ptr_array_add (arch->files, s); + f = g_slice_alloc0 (sizeof (*f)); + f->uncompressed_size = uncomp_sz; + f->compressed_size = comp_sz; + f->fname = g_string_new_len (p, fname_len); + g_ptr_array_add (arch->files, f); /* Restore p to the beginning of the header */ p = section_type_start; RAR_SKIP_BYTES (sz); diff --git a/src/libmime/archives.h b/src/libmime/archives.h index 87caeced1..d6d474486 100644 --- a/src/libmime/archives.h +++ b/src/libmime/archives.h @@ -27,12 +27,23 @@ enum rspamd_archive_flags { RSPAMD_ARCHIVE_ENCRYPTED = (1 << 0), }; +enum rspamd_archive_file_flags { + RSPAMD_ARCHIVE_FILE_ENCRYPTED = (1 << 0), +}; + +struct rspamd_archive_file { + GString *fname; + gsize compressed_size; + gsize uncompressed_size; + enum rspamd_archive_file_flags flags; +}; + struct rspamd_archive { enum rspamd_archive_type type; const gchar *archive_name; gsize size; enum rspamd_archive_flags flags; - GPtrArray *files; /* Array of GStrings */ + GPtrArray *files; /* Array of struct rspamd_archive_file */ }; /** diff --git a/src/lua/lua_mimepart.c b/src/lua/lua_mimepart.c index a9f616458..39ab9c8a3 100644 --- a/src/lua/lua_mimepart.c +++ b/src/lua/lua_mimepart.c @@ -240,7 +240,9 @@ LUA_FUNCTION_DEF (mimepart, is_archive); * the following methods: * * * `get_files` - return list of strings with filenames inside archive - * * `get_type` - return string representation of archive's type (e.g. 'zip') + * * `get_files_full` - return list of tables with all information about files + * * `is_encrypted` - return true if an archive is encrypted + * * `get_type` - return string representation of image's type (e.g. 'zip') * * `get_filename` - return string with archive's file name * * `get_size` - return size in bytes * @return {rspamd_archive} archive structure or nil if a part is not an archive diff --git a/src/lua/lua_task.c b/src/lua/lua_task.c index f933c5a97..29f2f1046 100644 --- a/src/lua/lua_task.c +++ b/src/lua/lua_task.c @@ -395,6 +395,8 @@ LUA_FUNCTION_DEF (task, get_images); * Each archive has the following methods available: * * * `get_files` - return list of strings with filenames inside archive + * * `get_files_full` - return list of tables with all information about files + * * `is_encrypted` - return true if an archive is encrypted * * `get_type` - return string representation of image's type (e.g. 'zip') * * `get_filename` - return string with archive's file name * * `get_size` - return size in bytes @@ -718,12 +720,16 @@ static const struct luaL_reg imagelib_m[] = { /* Archive methods */ LUA_FUNCTION_DEF (archive, get_type); LUA_FUNCTION_DEF (archive, get_files); +LUA_FUNCTION_DEF (archive, get_files_full); +LUA_FUNCTION_DEF (archive, is_encrypted); LUA_FUNCTION_DEF (archive, get_filename); LUA_FUNCTION_DEF (archive, get_size); static const struct luaL_reg archivelib_m[] = { LUA_INTERFACE_DEF (archive, get_type), LUA_INTERFACE_DEF (archive, get_files), + LUA_INTERFACE_DEF (archive, get_files_full), + LUA_INTERFACE_DEF (archive, is_encrypted), LUA_INTERFACE_DEF (archive, get_filename), LUA_INTERFACE_DEF (archive, get_size), {"__tostring", rspamd_lua_class_tostring}, @@ -3175,15 +3181,15 @@ lua_archive_get_files (lua_State *L) { struct rspamd_archive *arch = lua_check_archive (L); guint i; - GString *s; + struct rspamd_archive_file *f; if (arch != NULL) { lua_createtable (L, arch->files->len, 0); for (i = 0; i < arch->files->len; i ++) { - s = g_ptr_array_index (arch->files, i); + f = g_ptr_array_index (arch->files, i); - lua_pushlstring (L, s->str, s->len); + lua_pushlstring (L, f->fname->str, f->fname->len); lua_rawseti (L, -2, i + 1); } } @@ -3194,6 +3200,62 @@ lua_archive_get_files (lua_State *L) return 1; } +static gint +lua_archive_get_files_full (lua_State *L) +{ + struct rspamd_archive *arch = lua_check_archive (L); + guint i; + struct rspamd_archive_file *f; + + if (arch != NULL) { + lua_createtable (L, arch->files->len, 0); + + for (i = 0; i < arch->files->len; i ++) { + f = g_ptr_array_index (arch->files, i); + + lua_createtable (L, 0, 4); + + lua_pushstring (L, "name"); + lua_pushlstring (L, f->fname->str, f->fname->len); + lua_settable (L, -3); + + lua_pushstring (L, "compressed_size"); + lua_pushnumber (L, f->compressed_size); + lua_settable (L, -3); + + lua_pushstring (L, "uncompressed_size"); + lua_pushnumber (L, f->uncompressed_size); + lua_settable (L, -3); + + lua_pushstring (L, "encrypted"); + lua_pushboolean (L, (f->flags & RSPAMD_ARCHIVE_FILE_ENCRYPTED) ? true : false); + lua_settable (L, -3); + + lua_rawseti (L, -2, i + 1); + } + } + else { + return luaL_error (L, "invalid arguments"); + } + + return 1; +} + +static gint +lua_archive_is_encrypted (lua_State *L) +{ + struct rspamd_archive *arch = lua_check_archive (L); + + if (arch != NULL) { + lua_pushboolean (L, (arch->flags & RSPAMD_ARCHIVE_ENCRYPTED) ? true : false); + } + else { + return luaL_error (L, "invalid arguments"); + } + + return 1; +} + static gint lua_archive_get_size (lua_State *L) { -- 2.39.5