/*- * Copyright 2016 Vsevolod Stakhov * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "config.h" #include "message.h" #include "task.h" #include "archives.h" static void rspamd_archive_dtor (gpointer p) { struct rspamd_archive *arch = p; struct rspamd_archive_file *f; guint i; for (i = 0; i < arch->files->len; i ++) { f = g_ptr_array_index (arch->files, i); if (f->fname) { g_string_free (f->fname, TRUE); } g_slice_free1 (sizeof (*f), f); } g_ptr_array_free (arch->files, TRUE); } static void rspamd_archive_process_zip (struct rspamd_task *task, struct rspamd_mime_part *part) { const guchar *p, *start, *end, *eocd = NULL, *cd; const guint32 eocd_magic = 0x06054b50, cd_basic_len = 46; const guchar cd_magic[] = {0x50, 0x4b, 0x01, 0x02}; guint32 cd_offset, cd_size, comp_size, uncomp_size; guint16 extra_len, fname_len, comment_len; struct rspamd_archive *arch; struct rspamd_archive_file *f; /* Zip files have interesting data at the end of archive */ p = part->content->data + part->content->len - 1; start = part->content->data; end = p; /* Search for EOCD: * 22 bytes is a typical size of eocd without a comment and * end points one byte after the last character */ p -= 21; while (p > start + sizeof (guint32)) { guint32 t; /* XXX: not an efficient approach */ memcpy (&t, p, sizeof (t)); if (GUINT32_FROM_LE (t) == eocd_magic) { eocd = p; break; } p --; } if (eocd == NULL) { /* Not a zip file */ msg_debug_task ("zip archive is invalid (no EOCD): %s", part->filename); return; } if (end - eocd < 21) { msg_debug_task ("zip archive is invalid (short EOCD): %s", part->filename); return; } memcpy (&cd_size, eocd + 12, sizeof (cd_size)); cd_size = GUINT32_FROM_LE (cd_size); memcpy (&cd_offset, eocd + 16, sizeof (cd_offset)); cd_offset = GUINT32_FROM_LE (cd_offset); /* We need to check sanity as well */ if (cd_offset + cd_size != (guint)(eocd - start)) { msg_debug_task ("zip archive is invalid (bad size/offset for CD): %s", part->filename); return; } cd = start + cd_offset; arch = rspamd_mempool_alloc0 (task->task_pool, sizeof (*arch)); arch->files = g_ptr_array_new (); arch->type = RSPAMD_ARCHIVE_ZIP; rspamd_mempool_add_destructor (task->task_pool, rspamd_archive_dtor, arch); while (cd < eocd) { /* Read central directory record */ if (eocd - cd < cd_basic_len || memcmp (cd, cd_magic, sizeof (cd_magic)) != 0) { msg_debug_task ("zip archive is invalid (bad cd record): %s", part->filename); return; } memcpy (&comp_size, cd + 20, sizeof (guint32)); comp_size = GUINT32_FROM_LE (comp_size); memcpy (&uncomp_size, cd + 24, sizeof (guint32)); uncomp_size = GUINT32_FROM_LE (uncomp_size); memcpy (&fname_len, cd + 28, sizeof (fname_len)); fname_len = GUINT16_FROM_LE (fname_len); memcpy (&extra_len, cd + 30, sizeof (extra_len)); extra_len = GUINT16_FROM_LE (extra_len); memcpy (&comment_len, cd + 32, sizeof (comment_len)); comment_len = GUINT16_FROM_LE (comment_len); if (cd + fname_len + comment_len + extra_len + cd_basic_len > eocd) { msg_debug_task ("zip archive is invalid (too large cd record): %s", part->filename); return; } f = g_slice_alloc0 (sizeof (*f)); f->fname = g_string_new_len (cd + cd_basic_len, fname_len); f->compressed_size = comp_size; f->uncompressed_size = uncomp_size; g_ptr_array_add (arch->files, f); msg_debug_task ("found file in zip archive: %v", f->fname); cd += fname_len + comment_len + extra_len + cd_basic_len; } part->flags |= RSPAMD_MIME_PART_ARCHIVE; part->specific_data = arch; arch->archive_name = part->filename; arch->size = part->content->len; } static inline gint rspamd_archive_rar_read_vint (const guchar *start, gsize remain, guint64 *res) { /* * From http://www.rarlab.com/technote.htm: * Variable length integer. Can include one or more bytes, where * lower 7 bits of every byte contain integer data and highest bit * in every byte is the continuation flag. * If highest bit is 0, this is the last byte in sequence. * So first byte contains 7 least significant bits of integer and * continuation flag. Second byte, if present, contains next 7 bits and so on. */ guint64 t = 0; guint shift = 0; const guchar *p = start; while (remain > 0 && shift <= 57) { if (*p & 0x80) { t |= (*p & 0x7f) << shift; } else { t |= (*p & 0x7f) << shift; p ++; break; } shift += 7; p++; remain --; } if (remain == 0 || shift > 64) { return -1; } *res = GUINT64_FROM_LE (t); return p - start; } #define RAR_SKIP_BYTES(n) do { \ if ((n) <= 0) { \ msg_debug_task ("rar archive is invalid (bad skip value): %s", part->filename); \ return; \ } \ if ((gsize)(end - p) < (n)) { \ msg_debug_task ("rar archive is invalid (truncated): %s", part->filename); \ return; \ } \ p += (n); \ } while (0) #define RAR_READ_VINT() do { \ r = rspamd_archive_rar_read_vint (p, end - p, &vint); \ if (r == -1) { \ msg_debug_task ("rar archive is invalid (bad vint): %s", part->filename); \ return; \ } \ else if (r == 0) { \ msg_debug_task ("rar archive is invalid (BAD vint offset): %s", part->filename); \ return; \ }\ } while (0) #define RAR_READ_VINT_SKIP() do { \ r = rspamd_archive_rar_read_vint (p, end - p, &vint); \ if (r == -1) { \ msg_debug_task ("rar archive is invalid (bad vint): %s", part->filename); \ return; \ } \ p += r; \ } while (0) #define RAR_READ_UINT16(n) do { \ if (end - p < (glong)sizeof (guint16)) { \ msg_debug_task ("rar archive is invalid (bad int16): %s", part->filename); \ return; \ } \ n = p[0] + (p[1] << 8); \ p += sizeof (guint16); \ } while (0) #define RAR_READ_UINT32(n) do { \ if (end - p < (glong)sizeof (guint32)) { \ msg_debug_task ("rar archive is invalid (bad int32): %s", part->filename); \ return; \ } \ n = p[0] + (p[1] << 8) + (p[2] << 16) + (p[3] << 24); \ p += sizeof (guint32); \ } while (0) static void rspamd_archive_process_rar_v4 (struct rspamd_task *task, const guchar *start, const guchar *end, struct rspamd_mime_part *part) { const guchar *p = start, *start_section; guint8 type; guint flags; guint64 sz, comp_sz = 0, uncomp_sz = 0; struct rspamd_archive *arch; struct rspamd_archive_file *f; arch = rspamd_mempool_alloc0 (task->task_pool, sizeof (*arch)); arch->files = g_ptr_array_new (); arch->type = RSPAMD_ARCHIVE_RAR; rspamd_mempool_add_destructor (task->task_pool, rspamd_archive_dtor, arch); while (p < end) { /* Crc16 */ start_section = p; RAR_SKIP_BYTES (sizeof (guint16)); type = *p; p ++; RAR_READ_UINT16 (flags); if (type == 0x73) { /* Main header, check for encryption */ if (flags & 0x80) { arch->flags |= RSPAMD_ARCHIVE_ENCRYPTED; goto end; } } RAR_READ_UINT16 (sz); if (flags & 0x8000) { /* We also need to read ADD_SIZE element */ guint32 tmp; RAR_READ_UINT32 (tmp); sz += tmp; /* This is also used as PACK_SIZE */ comp_sz = tmp; } if (sz == 0) { /* Zero sized block - error */ msg_debug_task ("rar archive is invalid (zero size block): %s", part->filename); return; } if (type == 0x74) { guint fname_len; /* File header */ /* Uncompressed size */ RAR_READ_UINT32 (uncomp_sz); /* Skip to NAME_SIZE element */ RAR_SKIP_BYTES (11); RAR_READ_UINT16 (fname_len); if (fname_len == 0 || fname_len > (gsize)(end - p)) { msg_debug_task ("rar archive is invalid (bad fileame size): %s", part->filename); return; } /* Attrs */ RAR_SKIP_BYTES (4); if (flags & 0x100) { /* We also need to read HIGH_PACK_SIZE */ guint32 tmp; RAR_READ_UINT32 (tmp); sz += tmp; comp_sz += tmp; /* HIGH_UNP_SIZE */ RAR_READ_UINT32 (tmp); uncomp_sz += tmp; } f = g_slice_alloc0 (sizeof (*f)); if (flags & 0x200) { /* We have unicode + normal version */ guchar *tmp; tmp = memchr (p, '\0', fname_len); if (tmp != NULL) { /* Just use ASCII version */ f->fname = g_string_new_len (p, tmp - p); } else { /* We have UTF8 filename, use it as is */ f->fname = g_string_new_len (p, fname_len); } } else { f->fname = g_string_new_len (p, fname_len); } f->compressed_size = comp_sz; f->uncompressed_size = uncomp_sz; if (flags & 0x4) { f->flags |= RSPAMD_ARCHIVE_FILE_ENCRYPTED; } g_ptr_array_add (arch->files, f); } p = start_section; RAR_SKIP_BYTES (sz); } end: part->flags |= RSPAMD_MIME_PART_ARCHIVE; part->specific_data = arch; arch->archive_name = part->filename; arch->size = part->content->len; } static void rspamd_archive_process_rar (struct rspamd_task *task, struct rspamd_mime_part *part) { const guchar *p, *end, *section_start; const guchar rar_v5_magic[] = {0x52, 0x61, 0x72, 0x21, 0x1A, 0x07, 0x01, 0x00}, rar_v4_magic[] = {0x52, 0x61, 0x72, 0x21, 0x1A, 0x07, 0x00}; const guint rar_encrypted_header = 4, rar_main_header = 1, rar_file_header = 2; guint64 vint, sz, comp_sz = 0, uncomp_sz = 0, flags = 0, type = 0; struct rspamd_archive *arch; struct rspamd_archive_file *f; gint r; p = part->content->data; end = p + part->content->len; if ((gsize)(end - p) <= sizeof (rar_v5_magic)) { msg_debug_task ("rar archive is invalid (too small): %s", part->filename); return; } if (memcmp (p, rar_v5_magic, sizeof (rar_v5_magic)) == 0) { p += sizeof (rar_v5_magic); } else if (memcmp (p, rar_v4_magic, sizeof (rar_v4_magic)) == 0) { p += sizeof (rar_v4_magic); rspamd_archive_process_rar_v4 (task, p, end, part); return; } else { msg_debug_task ("rar archive is invalid (no rar magic): %s", part->filename); return; } /* Rar v5 format */ arch = rspamd_mempool_alloc0 (task->task_pool, sizeof (*arch)); arch->files = g_ptr_array_new (); arch->type = RSPAMD_ARCHIVE_RAR; rspamd_mempool_add_destructor (task->task_pool, rspamd_archive_dtor, arch); /* Now we can have either encryption header or archive header */ /* Crc 32 */ RAR_SKIP_BYTES (sizeof (guint32)); /* Size */ RAR_READ_VINT_SKIP (); sz = vint; /* Type */ section_start = p; RAR_READ_VINT_SKIP (); type = vint; /* Header flags */ RAR_READ_VINT_SKIP (); flags = vint; if (flags & 0x1) { /* Have extra zone */ RAR_READ_VINT_SKIP (); } if (flags & 0x2) { /* Data zone is presented */ RAR_READ_VINT_SKIP (); sz += vint; } if (type == rar_encrypted_header) { /* We can't read any further information as archive is encrypted */ arch->flags |= RSPAMD_ARCHIVE_ENCRYPTED; goto end; } else if (type != rar_main_header) { msg_debug_task ("rar archive is invalid (bad main header): %s", part->filename); return; } /* Nothing useful in main header */ p = section_start; RAR_SKIP_BYTES (sz); while (p < end) { /* Read the next header */ /* Crc 32 */ RAR_SKIP_BYTES (sizeof (guint32)); /* Size */ RAR_READ_VINT_SKIP (); sz = vint; if (sz == 0) { /* Zero sized block - error */ msg_debug_task ("rar archive is invalid (zero size block): %s", part->filename); return; } section_start = p; /* Type */ RAR_READ_VINT_SKIP (); type = vint; /* Header flags */ RAR_READ_VINT_SKIP (); flags = vint; if (flags & 0x1) { /* Have extra zone */ RAR_READ_VINT_SKIP (); } if (flags & 0x2) { /* Data zone is presented */ RAR_READ_VINT_SKIP (); sz += vint; comp_sz = vint; } if (type != rar_file_header) { p = section_start; RAR_SKIP_BYTES (sz); } else { /* We have a file header, go forward */ guint64 fname_len; /* File header specific flags */ RAR_READ_VINT_SKIP (); flags = vint; /* Unpacked size */ RAR_READ_VINT_SKIP (); uncomp_sz = vint; /* Attributes */ RAR_READ_VINT_SKIP (); if (flags & 0x2) { /* Unix mtime */ RAR_SKIP_BYTES (sizeof (guint32)); } if (flags & 0x4) { /* Crc32 */ RAR_SKIP_BYTES (sizeof (guint32)); } /* Compression */ RAR_READ_VINT_SKIP (); /* Host OS */ RAR_READ_VINT_SKIP (); /* Filename length (finally!) */ RAR_READ_VINT_SKIP (); fname_len = vint; if (fname_len == 0 || fname_len > (gsize)(end - p)) { msg_debug_task ("rar archive is invalid (bad fileame size): %s", part->filename); return; } f = g_slice_alloc0 (sizeof (*f)); f->uncompressed_size = uncomp_sz; f->compressed_size = comp_sz; f->fname = g_string_new_len (p, fname_len); g_ptr_array_add (arch->files, f); /* Restore p to the beginning of the header */ p = section_start; RAR_SKIP_BYTES (sz); } } end: part->flags |= RSPAMD_MIME_PART_ARCHIVE; part->specific_data = arch; arch->archive_name = part->filename; arch->size = part->content->len; } static gboolean rspamd_archive_cheat_detect (struct rspamd_mime_part *part, const gchar *str, const guchar *magic_start, gsize magic_len) { GMimeContentType *ct; const gchar *fname, *p; ct = part->type; if (ct && ct->type && ct->subtype && strcmp (ct->type, "application") == 0) { if (rspamd_substring_search_caseless (ct->subtype, strlen (ct->subtype), str, strlen (str)) != -1) { return TRUE; } } fname = part->filename; if (fname && strlen (fname) > strlen (str)) { p = fname + strlen (fname) - strlen (str); if (rspamd_lc_cmp (p, str, strlen (str)) == 0) { if (*(p - 1) == '.') { return TRUE; } } } if (magic_start != NULL) { if (part->content->len > magic_len && memcmp (part->content->data, magic_start, magic_len) == 0) { return TRUE; } } return FALSE; } void rspamd_archives_process (struct rspamd_task *task) { guint i; struct rspamd_mime_part *part; const guchar rar_magic[] = {0x52, 0x61, 0x72, 0x21, 0x1A, 0x07}; const guchar zip_magic[] = {0x50, 0x4b, 0x03, 0x04}; for (i = 0; i < task->parts->len; i ++) { part = g_ptr_array_index (task->parts, i); if (part->content->len > 0) { if (rspamd_archive_cheat_detect (part, "zip", zip_magic, sizeof (zip_magic))) { rspamd_archive_process_zip (task, part); } else if (rspamd_archive_cheat_detect (part, "rar", rar_magic, sizeof (rar_magic))) { rspamd_archive_process_rar (task, part); } } } } const gchar * rspamd_archive_type_str (enum rspamd_archive_type type) { const gchar *ret = "unknown"; switch (type) { case RSPAMD_ARCHIVE_ZIP: ret = "zip"; break; case RSPAMD_ARCHIVE_RAR: ret = "rar"; break; } return ret; }