/*-
 * Copyright 2016 Vsevolod Stakhov
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include "config.h"
#include "message.h"
#include "task.h"
#include "archives.h"
#include <unicode/uchar.h>
#include <unicode/utf8.h>
#include <unicode/utf16.h>

static void
rspamd_archive_dtor (gpointer p)
{
	struct rspamd_archive *arch = p;
	struct rspamd_archive_file *f;
	guint i;

	for (i = 0; i < arch->files->len; i ++) {
		f = g_ptr_array_index (arch->files, i);

		if (f->fname) {
			g_string_free (f->fname, TRUE);
		}

		g_free (f);
	}

	g_ptr_array_free (arch->files, TRUE);
}

static void
rspamd_archive_process_zip (struct rspamd_task *task,
		struct rspamd_mime_part *part)
{
	const guchar *p, *start, *end, *eocd = NULL, *cd;
	const guint32 eocd_magic = 0x06054b50, cd_basic_len = 46;
	const guchar cd_magic[] = {0x50, 0x4b, 0x01, 0x02};
	const guint max_processed = 1024;
	guint32 cd_offset, cd_size, comp_size, uncomp_size, processed = 0;
	guint16 extra_len, fname_len, comment_len;
	struct rspamd_archive *arch;
	struct rspamd_archive_file *f;

	/* Zip files have interesting data at the end of archive */
	p = part->parsed_data.begin + part->parsed_data.len - 1;
	start = part->parsed_data.begin;
	end = p;

	/* Search for EOCD:
	 * 22 bytes is a typical size of eocd without a comment and
	 * end points one byte after the last character
	 */
	p -= 21;

	while (p > start + sizeof (guint32)) {
		guint32 t;

		if (processed > max_processed) {
			break;
		}

		/* XXX: not an efficient approach */
		memcpy (&t, p, sizeof (t));

		if (GUINT32_FROM_LE (t) == eocd_magic) {
			eocd = p;
			break;
		}

		p --;
		processed ++;
	}


	if (eocd == NULL) {
		/* Not a zip file */
		msg_debug_task ("zip archive is invalid (no EOCD)");

		return;
	}

	if (end - eocd < 21) {
		msg_debug_task ("zip archive is invalid (short EOCD)");

		return;
	}


	memcpy (&cd_size, eocd + 12, sizeof (cd_size));
	cd_size = GUINT32_FROM_LE (cd_size);
	memcpy (&cd_offset, eocd + 16, sizeof (cd_offset));
	cd_offset = GUINT32_FROM_LE (cd_offset);

	/* We need to check sanity as well */
	if (cd_offset + cd_size != (guint)(eocd - start)) {
		msg_debug_task ("zip archive is invalid (bad size/offset for CD)");

		return;
	}

	cd = start + cd_offset;

	arch = rspamd_mempool_alloc0 (task->task_pool, sizeof (*arch));
	arch->files = g_ptr_array_new ();
	arch->type = RSPAMD_ARCHIVE_ZIP;
	rspamd_mempool_add_destructor (task->task_pool, rspamd_archive_dtor,
			arch);

	while (cd < eocd) {
		/* Read central directory record */
		if (eocd - cd < cd_basic_len ||
				memcmp (cd, cd_magic, sizeof (cd_magic)) != 0) {
			msg_debug_task ("zip archive is invalid (bad cd record)");

			return;
		}

		memcpy (&comp_size, cd + 20, sizeof (guint32));
		comp_size = GUINT32_FROM_LE (comp_size);
		memcpy (&uncomp_size, cd + 24, sizeof (guint32));
		uncomp_size = GUINT32_FROM_LE (uncomp_size);
		memcpy (&fname_len, cd + 28, sizeof (fname_len));
		fname_len = GUINT16_FROM_LE (fname_len);
		memcpy (&extra_len, cd + 30, sizeof (extra_len));
		extra_len = GUINT16_FROM_LE (extra_len);
		memcpy (&comment_len, cd + 32, sizeof (comment_len));
		comment_len = GUINT16_FROM_LE (comment_len);

		if (cd + fname_len + comment_len + extra_len + cd_basic_len > eocd) {
			msg_debug_task ("zip archive is invalid (too large cd record)");

			return;
		}

		f = g_malloc0 (sizeof (*f));
		f->fname = g_string_new_len (cd + cd_basic_len, fname_len);
		f->compressed_size = comp_size;
		f->uncompressed_size = uncomp_size;
		g_ptr_array_add (arch->files, f);
		msg_debug_task ("found file in zip archive: %v", f->fname);

		cd += fname_len + comment_len + extra_len + cd_basic_len;
	}

	part->flags |= RSPAMD_MIME_PART_ARCHIVE;
	part->specific.arch = arch;

	if (part->cd) {
		arch->archive_name = &part->cd->filename;
	}

	arch->size = part->parsed_data.len;
}

static inline gint
rspamd_archive_rar_read_vint (const guchar *start, gsize remain, guint64 *res)
{
	/*
	 * From http://www.rarlab.com/technote.htm:
	 * Variable length integer. Can include one or more bytes, where
	 * lower 7 bits of every byte contain integer data and highest bit
	 * in every byte is the continuation flag.
	 * If highest bit is 0, this is the last byte in sequence.
	 * So first byte contains 7 least significant bits of integer and
	 * continuation flag. Second byte, if present, contains next 7 bits and so on.
	 */
	guint64 t = 0;
	guint shift = 0;
	const guchar *p = start;

	while (remain > 0 && shift <= 57) {
		if (*p & 0x80) {
			t |= ((guint64)(*p & 0x7f)) << shift;
		}
		else {
			t |= ((guint64)(*p & 0x7f)) << shift;
			p ++;
			break;
		}

		shift += 7;
		p++;
		remain --;
	}

	if (remain == 0 || shift > 64) {
		return -1;
	}

	*res = GUINT64_FROM_LE (t);

	return p - start;
}

#define RAR_SKIP_BYTES(n) do { \
	if ((n) <= 0) { \
		msg_debug_task ("rar archive is invalid (bad skip value)"); \
		return; \
	} \
	if ((gsize)(end - p) < (n)) { \
		msg_debug_task ("rar archive is invalid (truncated)"); \
		return; \
	} \
	p += (n); \
} while (0)

#define RAR_READ_VINT() do { \
	r = rspamd_archive_rar_read_vint (p, end - p, &vint); \
	if (r == -1) { \
		msg_debug_task ("rar archive is invalid (bad vint)"); \
		return; \
	} \
	else if (r == 0) { \
		msg_debug_task ("rar archive is invalid (BAD vint offset)"); \
		return; \
	}\
} while (0)

#define RAR_READ_VINT_SKIP() do { \
	r = rspamd_archive_rar_read_vint (p, end - p, &vint); \
	if (r == -1) { \
		msg_debug_task ("rar archive is invalid (bad vint)"); \
		return; \
	} \
	p += r; \
} while (0)

#define RAR_READ_UINT16(n) do { \
	if (end - p < (glong)sizeof (guint16)) { \
		msg_debug_task ("rar archive is invalid (bad int16)"); \
		return; \
	} \
	n = p[0] + (p[1] << 8); \
	p += sizeof (guint16); \
} while (0)

#define RAR_READ_UINT32(n) do { \
	if (end - p < (glong)sizeof (guint32)) { \
		msg_debug_task ("rar archive is invalid (bad int32)"); \
		return; \
	} \
	n = (guint)p[0] + ((guint)p[1] << 8) + ((guint)p[2] << 16) + ((guint)p[3] << 24); \
	p += sizeof (guint32); \
} while (0)

static void
rspamd_archive_process_rar_v4 (struct rspamd_task *task, const guchar *start,
		const guchar *end, struct rspamd_mime_part *part)
{
	const guchar *p = start, *start_section;
	guint8 type;
	guint flags;
	guint64 sz, comp_sz = 0, uncomp_sz = 0;
	struct rspamd_archive *arch;
	struct rspamd_archive_file *f;

	arch = rspamd_mempool_alloc0 (task->task_pool, sizeof (*arch));
	arch->files = g_ptr_array_new ();
	arch->type = RSPAMD_ARCHIVE_RAR;
	rspamd_mempool_add_destructor (task->task_pool, rspamd_archive_dtor,
			arch);

	while (p < end) {
		/* Crc16 */
		start_section = p;
		RAR_SKIP_BYTES (sizeof (guint16));
		type = *p;
		p ++;
		RAR_READ_UINT16 (flags);

		if (type == 0x73) {
			/* Main header, check for encryption */
			if (flags & 0x80) {
				arch->flags |= RSPAMD_ARCHIVE_ENCRYPTED;
				goto end;
			}
		}

		RAR_READ_UINT16 (sz);

		if (flags & 0x8000) {
			/* We also need to read ADD_SIZE element */
			guint32 tmp;

			RAR_READ_UINT32 (tmp);
			sz += tmp;
			/* This is also used as PACK_SIZE */
			comp_sz = tmp;
		}

		if (sz == 0) {
			/* Zero sized block - error */
			msg_debug_task ("rar archive is invalid (zero size block)");

			return;
		}

		if (type == 0x74) {
			guint fname_len;

			/* File header */
			/* Uncompressed size */
			RAR_READ_UINT32 (uncomp_sz);
			/* Skip to NAME_SIZE element */
			RAR_SKIP_BYTES (11);
			RAR_READ_UINT16 (fname_len);

			if (fname_len == 0 || fname_len > (gsize)(end - p)) {
				msg_debug_task ("rar archive is invalid (bad fileame size)");

				return;
			}

			/* Attrs */
			RAR_SKIP_BYTES (4);

			if (flags & 0x100) {
				/* We also need to read HIGH_PACK_SIZE */
				guint32 tmp;

				RAR_READ_UINT32 (tmp);
				sz += tmp;
				comp_sz += tmp;
				/* HIGH_UNP_SIZE  */
				RAR_READ_UINT32 (tmp);
				uncomp_sz += tmp;
			}

			f = g_malloc0 (sizeof (*f));

			if (flags & 0x200) {
				/* We have unicode + normal version */
				guchar *tmp;

				tmp = memchr (p, '\0', fname_len);

				if (tmp != NULL) {
					/* Just use ASCII version */
					f->fname = g_string_new_len (p, tmp - p);
				}
				else {
					/* We have UTF8 filename, use it as is */
					f->fname = g_string_new_len (p, fname_len);
				}
			}
			else {
				f->fname = g_string_new_len (p, fname_len);
			}

			f->compressed_size = comp_sz;
			f->uncompressed_size = uncomp_sz;

			if (flags & 0x4) {
				f->flags |= RSPAMD_ARCHIVE_FILE_ENCRYPTED;
			}

			g_ptr_array_add (arch->files, f);
		}

		p = start_section;
		RAR_SKIP_BYTES (sz);
	}

end:
	part->flags |= RSPAMD_MIME_PART_ARCHIVE;
	part->specific.arch = arch;
	arch->archive_name = &part->cd->filename;
	arch->size = part->parsed_data.len;
}

static void
rspamd_archive_process_rar (struct rspamd_task *task,
		struct rspamd_mime_part *part)
{
	const guchar *p, *end, *section_start;
	const guchar rar_v5_magic[] = {0x52, 0x61, 0x72, 0x21, 0x1A, 0x07, 0x01, 0x00},
			rar_v4_magic[] = {0x52, 0x61, 0x72, 0x21, 0x1A, 0x07, 0x00};
	const guint rar_encrypted_header = 4, rar_main_header = 1,
			rar_file_header = 2;
	guint64 vint, sz, comp_sz = 0, uncomp_sz = 0, flags = 0, type = 0;
	struct rspamd_archive *arch;
	struct rspamd_archive_file *f;
	gint r;

	p = part->parsed_data.begin;
	end = p + part->parsed_data.len;

	if ((gsize)(end - p) <= sizeof (rar_v5_magic)) {
		msg_debug_task ("rar archive is invalid (too small)");

		return;
	}

	if (memcmp (p, rar_v5_magic, sizeof (rar_v5_magic)) == 0) {
		p += sizeof (rar_v5_magic);
	}
	else if (memcmp (p, rar_v4_magic, sizeof (rar_v4_magic)) == 0) {
		p += sizeof (rar_v4_magic);

		rspamd_archive_process_rar_v4 (task, p, end, part);
		return;
	}
	else {
		msg_debug_task ("rar archive is invalid (no rar magic)");

		return;
	}

	/* Rar v5 format */
	arch = rspamd_mempool_alloc0 (task->task_pool, sizeof (*arch));
	arch->files = g_ptr_array_new ();
	arch->type = RSPAMD_ARCHIVE_RAR;
	rspamd_mempool_add_destructor (task->task_pool, rspamd_archive_dtor,
			arch);

	/* Now we can have either encryption header or archive header */
	/* Crc 32 */
	RAR_SKIP_BYTES (sizeof (guint32));
	/* Size */
	RAR_READ_VINT_SKIP ();
	sz = vint;
	/* Type */
	section_start = p;
	RAR_READ_VINT_SKIP ();
	type = vint;
	/* Header flags */
	RAR_READ_VINT_SKIP ();
	flags = vint;

	if (flags & 0x1) {
		/* Have extra zone */
		RAR_READ_VINT_SKIP ();
	}
	if (flags & 0x2) {
		/* Data zone is presented */
		RAR_READ_VINT_SKIP ();
		sz += vint;
	}

	if (type == rar_encrypted_header) {
		/* We can't read any further information as archive is encrypted */
		arch->flags |= RSPAMD_ARCHIVE_ENCRYPTED;
		goto end;
	}
	else if (type != rar_main_header) {
		msg_debug_task ("rar archive is invalid (bad main header)");

		return;
	}

	/* Nothing useful in main header */
	p = section_start;
	RAR_SKIP_BYTES (sz);

	while (p < end) {
		/* Read the next header */
		/* Crc 32 */
		RAR_SKIP_BYTES (sizeof (guint32));
		/* Size */
		RAR_READ_VINT_SKIP ();

		sz = vint;
		if (sz == 0) {
			/* Zero sized block - error */
			msg_debug_task ("rar archive is invalid (zero size block)");

			return;
		}

		section_start = p;
		/* Type */
		RAR_READ_VINT_SKIP ();
		type = vint;
		/* Header flags */
		RAR_READ_VINT_SKIP ();
		flags = vint;

		if (flags & 0x1) {
			/* Have extra zone */
			RAR_READ_VINT_SKIP ();
		}
		if (flags & 0x2) {
			/* Data zone is presented */
			RAR_READ_VINT_SKIP ();
			sz += vint;
			comp_sz = vint;
		}

		if (type != rar_file_header) {
			p = section_start;
			RAR_SKIP_BYTES (sz);
		}
		else {
			/* We have a file header, go forward */
			guint64 fname_len;

			/* File header specific flags */
			RAR_READ_VINT_SKIP ();
			flags = vint;

			/* Unpacked size */
			RAR_READ_VINT_SKIP ();
			uncomp_sz = vint;
			/* Attributes */
			RAR_READ_VINT_SKIP ();

			if (flags & 0x2) {
				/* Unix mtime */
				RAR_SKIP_BYTES (sizeof (guint32));
			}
			if (flags & 0x4) {
				/* Crc32 */
				RAR_SKIP_BYTES (sizeof (guint32));
			}

			/* Compression */
			RAR_READ_VINT_SKIP ();
			/* Host OS */
			RAR_READ_VINT_SKIP ();
			/* Filename length (finally!) */
			RAR_READ_VINT_SKIP ();
			fname_len = vint;

			if (fname_len == 0 || fname_len > (gsize)(end - p)) {
				msg_debug_task ("rar archive is invalid (bad fileame size)");

				return;
			}

			f = g_malloc0 (sizeof (*f));
			f->uncompressed_size = uncomp_sz;
			f->compressed_size = comp_sz;
			f->fname = g_string_new_len (p, fname_len);
			g_ptr_array_add (arch->files, f);

			/* Restore p to the beginning of the header */
			p = section_start;
			RAR_SKIP_BYTES (sz);
		}
	}

end:
part->flags |= RSPAMD_MIME_PART_ARCHIVE;
	part->specific.arch = arch;
	if (part->cd != NULL) {
		arch->archive_name = &part->cd->filename;
	}
	arch->size = part->parsed_data.len;
}

static inline gint
rspamd_archive_7zip_read_vint (const guchar *start, gsize remain, guint64 *res)
{
	/*
	 * REAL_UINT64 means real UINT64.
	 * UINT64 means real UINT64 encoded with the following scheme:
	 *
	 * Size of encoding sequence depends from first byte:
	 * First_Byte  Extra_Bytes        Value
	 * (binary)
	 * 0xxxxxxx               : ( xxxxxxx           )
	 * 10xxxxxx    BYTE y[1]  : (  xxxxxx << (8 * 1)) + y
	 * 110xxxxx    BYTE y[2]  : (   xxxxx << (8 * 2)) + y
	 * ...
	 * 1111110x    BYTE y[6]  : (       x << (8 * 6)) + y
	 * 11111110    BYTE y[7]  :                         y
	 * 11111111    BYTE y[8]  :                         y
	 */
	guchar t;

	if (remain == 0) {
		return -1;
	}

	t = *start;

	if (!isset (&t, 7)) {
		/* Trivial case */
		*res = t;
		return 1;
	}
	else if (t == 0xFF) {
		if (remain >= sizeof (guint64) + 1) {
			memcpy (res, start + 1, sizeof (guint64));
			*res = GUINT64_FROM_LE (*res);

			return sizeof (guint64) + 1;
		}
	}
	else {
		gint cur_bit = 6, intlen = 1;
		const guchar bmask = 0xFF;
		guint64 tgt;

		while (cur_bit > 0) {
			if (!isset (&t, cur_bit)) {
				if (remain >= intlen + 1) {
					memcpy (&tgt, start + 1, intlen);
					tgt = GUINT64_FROM_LE (tgt);
					/* Shift back */
					tgt >>= sizeof (tgt) - NBBY * intlen;
					/* Add masked value */
					tgt += (guint64)(t & (bmask >> (NBBY - cur_bit)))
							<< (NBBY * intlen);
					*res = tgt;

					return intlen + 1;
				}
			}
			cur_bit --;
			intlen ++;
		}
	}

	return -1;
}

#define SZ_READ_VINT_SKIP() do { \
	r = rspamd_archive_7zip_read_vint (p, end - p, &vint); \
	if (r == -1) { \
		msg_debug_task ("7z archive is invalid (bad vint)"); \
		return; \
	} \
	p += r; \
} while (0)
#define SZ_READ_VINT(var) do { \
	int r; \
	r = rspamd_archive_7zip_read_vint (p, end - p, &(var)); \
	if (r == -1) { \
		msg_debug_task ("7z archive is invalid (bad vint): %s", G_STRLOC); \
		return NULL; \
	} \
	p += r; \
} while (0)

#define SZ_READ_UINT64(n) do { \
	if (end - p < (goffset)sizeof (guint64)) { \
		msg_debug_task ("7zip archive is invalid (bad vint): %s", G_STRLOC); \
		return; \
	} \
	n = *(guint64 *)p; \
	n = GUINT64_FROM_LE(n); \
	p += sizeof (guint64); \
} while (0)
#define SZ_SKIP_BYTES(n) do { \
	if (end - p > (n)) { \
		p += (n); \
	} \
	else { \
		msg_debug_task ("7zip archive is invalid (truncated); wanted to read %d bytes, %d avail: %s", (gint)(n), (gint)(end - p), G_STRLOC); \
		return NULL; \
	} \
} while (0)

enum rspamd_7zip_header_mark {
	kEnd = 0x00,
	kHeader = 0x01,
	kArchiveProperties = 0x02,
	kAdditionalStreamsInfo = 0x03,
	kMainStreamsInfo = 0x04,
	kFilesInfo = 0x05,
	kPackInfo = 0x06,
	kUnPackInfo = 0x07,
	kSubStreamsInfo = 0x08,
	kSize = 0x09,
	kCRC = 0x0A,
	kFolder = 0x0B,
	kCodersUnPackSize = 0x0C,
	kNumUnPackStream = 0x0D,
	kEmptyStream = 0x0E,
	kEmptyFile = 0x0F,
	kAnti = 0x10,
	kName = 0x11,
	kCTime = 0x12,
	kATime = 0x13,
	kMTime = 0x14,
	kWinAttributes = 0x15,
	kComment = 0x16,
	kEncodedHeader = 0x17,
	kStartPos = 0x18,
	kDummy = 0x19,
};

static const guchar *
rspamd_7zip_read_bits (struct rspamd_task *task,
		const guchar *p, const guchar *end,
		struct rspamd_archive *arch, guint nbits,
		guint *pbits_set)
{
	unsigned mask = 0, avail = 0, i;
	gboolean bit_set = 0;

	for (i = 0; i < nbits; i++) {
		if (mask == 0) {
			avail = *p;
			SZ_SKIP_BYTES(1);
			mask = 0x80;
		}

		bit_set = (avail & mask) ? 1 : 0;

		if (bit_set && pbits_set) {
			(*pbits_set) ++;
		}

		mask >>= 1;
	}

	return p;
}

static const guchar *
rspamd_7zip_read_digest (struct rspamd_task *task,
		const guchar *p, const guchar *end,
		struct rspamd_archive *arch,
		guint64 num_streams,
		guint *pdigest_read)
{
	guchar all_defined = *p;
	guint64 i;
	guint num_defined = 0;
	/*
	 * BYTE AllAreDefined
	 *  if (AllAreDefined == 0)
	 *  {
	 *    for(NumStreams)
	 *    BIT Defined
	 *  }
	 *  UINT32 CRCs[NumDefined]
	 */
	SZ_SKIP_BYTES(1);

	if (all_defined) {
		num_defined = num_streams;
	}
	else {
		if (num_streams > 8192) {
			/* Gah */
			return NULL;
		}

		p = rspamd_7zip_read_bits (task, p, end, arch, num_streams, &num_defined);

		if (p == NULL) {
			return NULL;
		}
	}

	for (i = 0; i < num_defined; i ++) {
		SZ_SKIP_BYTES(sizeof(guint32));
	}

	if (pdigest_read) {
		*pdigest_read = num_defined;
	}

	return p;
}

static const guchar *
rspamd_7zip_read_pack_info (struct rspamd_task *task,
		const guchar *p, const guchar *end,
		struct rspamd_archive *arch)
{
	guint64 pack_pos = 0, pack_streams = 0, i, cur_sz;
	guint num_digests = 0;
	guchar t;
	/*
	 *  UINT64 PackPos
	 *  UINT64 NumPackStreams
	 *
	 *  []
	 *  BYTE NID::kSize    (0x09)
	 *  UINT64 PackSizes[NumPackStreams]
	 *  []
	 *
	 *  []
	 *  BYTE NID::kCRC      (0x0A)
	 *  PackStreamDigests[NumPackStreams]
	 *  []
	 *  BYTE NID::kEnd
	 */

	SZ_READ_VINT(pack_pos);
	SZ_READ_VINT(pack_streams);

	while (p != NULL && p < end) {
		t = *p;
		SZ_SKIP_BYTES(1);

		switch (t) {
		case kSize:
			/* We need to skip pack_streams VINTS */
			for (i = 0; i < pack_streams; i++) {
				SZ_READ_VINT(cur_sz);
			}
			break;
		case kCRC:
			/* CRCs are more complicated */
			p = rspamd_7zip_read_digest (task, p, end, arch, pack_streams,
					&num_digests);
			break;
		case kEnd:
			goto end;
			break;
		default:
			p = NULL;
			msg_debug_task ("bad 7zip type: %c; %s", t, G_STRLOC);
			goto end;
			break;
		}
	}

end:

	return p;
}

static const guchar *
rspamd_7zip_read_folder (struct rspamd_task *task,
		const guchar *p, const guchar *end,
		struct rspamd_archive *arch, guint *pnstreams)
{
	guint64 ncoders = 0, i, noutstreams = 0, ninstreams = 0;

	SZ_READ_VINT (ncoders);

	for (i = 0; i < ncoders && p != NULL && p < end; i ++) {
		guint64 sz, tmp;
		guchar t;
		/*
		 * BYTE
		 * {
		 *   0:3 CodecIdSize
		 *   4:  Is Complex Coder
		 *   5:  There Are Attributes
		 *   6:  Reserved
		 *   7:  There are more alternative methods. (Not used anymore, must be 0).
		 * }
		 * BYTE CodecId[CodecIdSize]
		 * if (Is Complex Coder)
		 * {
		 *   UINT64 NumInStreams;
		 *   UINT64 NumOutStreams;
		 * }
		 * if (There Are Attributes)
		 * {
		 *   UINT64 PropertiesSize
		 *   BYTE Properties[PropertiesSize]
		 * }
		 */
		t = *p;
		SZ_SKIP_BYTES(1);
		sz = t & 0x7;
		/* Codec ID */
		SZ_SKIP_BYTES (sz);

		if (t & (1u << 4)) {
			/* Complex */
			SZ_READ_VINT (tmp); /* InStreams */
			ninstreams += tmp;
			SZ_READ_VINT (tmp); /* OutStreams */
			noutstreams += tmp;
		}
		else {
			/* XXX: is it correct ? */
			noutstreams ++;
			ninstreams ++;
		}
		if (t & (1u << 5)) {
			/* Attributes ... */
			SZ_READ_VINT (tmp); /* Size of attrs */
			SZ_SKIP_BYTES (tmp);
		}
	}

	if (noutstreams > 1) {
		/* BindPairs, WTF, huh */
		for (i = 0; i < noutstreams - 1; i ++) {
			guint64 tmp;

			SZ_READ_VINT (tmp);
			SZ_READ_VINT (tmp);
		}
	}

	gint64 npacked = (gint64)ninstreams - (gint64)noutstreams + 1;

	if (npacked > 1) {
		/* Gah... */
		for (i = 0; i < npacked; i ++) {
			guint64 tmp;

			SZ_READ_VINT (tmp);
		}
	}

	*pnstreams = noutstreams;

	return p;
}

static const guchar *
rspamd_7zip_read_coders_info (struct rspamd_task *task,
		const guchar *p, const guchar *end,
		struct rspamd_archive *arch,
		guint *pnum_folders, guint *pnum_nodigest)
{
	guint64 num_folders = 0, i, tmp;
	guchar t;
	guint *folder_nstreams = NULL, num_digests = 0, digests_read = 0;

	while (p != NULL && p < end) {
		/*
		 * BYTE NID::kFolder  (0x0B)
		 *  UINT64 NumFolders
		 *  BYTE External
		 *  switch(External)
		 *  {
		 * 	case 0:
		 * 	  Folders[NumFolders]
		 * 	case 1:
		 * 	  UINT64 DataStreamIndex
		 *   }
		 *   BYTE ID::kCodersUnPackSize  (0x0C)
		 *   for(Folders)
		 * 	for(Folder.NumOutStreams)
		 * 	 UINT64 UnPackSize;
		 *   []
		 *   BYTE NID::kCRC   (0x0A)
		 *   UnPackDigests[NumFolders]
		 *   []
		 *   BYTE NID::kEnd
		 */

		t = *p;
		SZ_SKIP_BYTES(1);

		switch (t) {
		case kFolder:
			SZ_READ_VINT (num_folders);
			if (*p != 0) {
				/* External folders */
				SZ_SKIP_BYTES(1);
				SZ_READ_VINT (tmp);
			}
			else {
				SZ_SKIP_BYTES(1);

				if (num_folders > 8192) {
					/* Gah */
					return NULL;
				}

				folder_nstreams = g_alloca (sizeof (int) * num_folders);

				for (i = 0; i < num_folders && p != NULL && p < end; i++) {
					p = rspamd_7zip_read_folder (task, p, end, arch,
							&folder_nstreams[i]);

					num_digests += folder_nstreams[i];
				}
			}
			break;
		case kCodersUnPackSize:
			for (i = 0; i < num_folders && p != NULL && p < end; i++) {
				if (folder_nstreams) {
					for (guint j = 0; j < folder_nstreams[i]; j++) {
						guint64 tmp;

						SZ_READ_VINT (tmp); /* Unpacked size */
					}
				}
				else {
					msg_err_task ("internal 7zip error");
				}
			}
			break;
		case kCRC:
			/*
			 * Here are dragons. Spec tells that here there could be up
			 * to nfolders digests. However, according to the actual source
			 * code, in case of multiple out streams there should be digests
			 * for all out streams.
			 *
			 * In the real life (tm) it is even more idiotic: all these digests
			 * are in another section! But that section needs number of digests
			 * that are absent here. It is the most stupid thing I've ever seen
			 * in any file format.
			 *
			 * I hope there *WAS* some reason to do such shit...
			 */
			p = rspamd_7zip_read_digest (task, p, end, arch, num_digests,
					&digests_read);
			break;
		case kEnd:
			goto end;
			break;
		default:
			p = NULL;
			msg_debug_task ("bad 7zip type: %c; %s", t, G_STRLOC);
			goto end;
			break;
		}
	}

end:

	if (pnum_nodigest) {
		*pnum_nodigest = num_digests - digests_read;
	}
	if (pnum_folders) {
		*pnum_folders = num_folders;
	}

	return p;
}

static const guchar *
rspamd_7zip_read_substreams_info (struct rspamd_task *task,
		const guchar *p, const guchar *end,
		struct rspamd_archive *arch,
		guint num_folders, guint num_nodigest)
{
	guchar t;
	guint i;
	guint64 *folder_nstreams;

	if (num_folders > 8192) {
		/* Gah */
		return NULL;
	}

	folder_nstreams = g_alloca (sizeof (guint64) * num_folders);

	while (p != NULL && p < end) {
		/*
		 * []
		 *  BYTE NID::kNumUnPackStream; (0x0D)
		 *  UINT64 NumUnPackStreamsInFolders[NumFolders];
		 *  []
		 *
		 *  []
		 *  BYTE NID::kSize  (0x09)
		 *  UINT64 UnPackSizes[??]
		 *  []
		 *
		 *
		 *  []
		 *  BYTE NID::kCRC  (0x0A)
		 *  Digests[Number of streams with unknown CRC]
		 *  []

		 */
		t = *p;
		SZ_SKIP_BYTES(1);

		switch (t) {
		case kNumUnPackStream:
			for (i = 0; i < num_folders; i ++) {
				guint64 tmp;

				SZ_READ_VINT (tmp);
				folder_nstreams[i] = tmp;
			}
			break;
		case kCRC:
			/*
			 * Read the comment in the rspamd_7zip_read_coders_info
			 */
			p = rspamd_7zip_read_digest (task, p, end, arch, num_nodigest,
					NULL);
			break;
		case kSize:
			/*
			 * Another brain damaged logic, but we have to support it
			 * as there are no ways to proceed without it.
			 * In fact, it is just absent in the real life...
			 */
			for (i = 0; i < num_folders; i ++) {
				for (guint j = 0; j < folder_nstreams[i]; j++) {
					guint64 tmp;

					SZ_READ_VINT (tmp); /* Who cares indeed */
				}
			}
			break;
		case kEnd:
			goto end;
			break;
		default:
			p = NULL;
			msg_debug_task ("bad 7zip type: %c; %s", t, G_STRLOC);
			goto end;
			break;
		}
	}

end:
	return p;
}

static const guchar *
rspamd_7zip_read_main_streams_info (struct rspamd_task *task,
		const guchar *p, const guchar *end,
		struct rspamd_archive *arch)
{
	guchar t;
	guint num_folders = 0, unknown_digests = 0;

	while (p != NULL && p < end) {
		t = *p;
		SZ_SKIP_BYTES(1);

		/*
		 *
		 *  []
		 *  PackInfo
		 *  []

		 *  []
		 *  CodersInfo
		 *  []
		 *
		 *  []
		 *  SubStreamsInfo
		 *  []
		 *
		 *  BYTE NID::kEnd
		 */
		switch (t) {
		case kPackInfo:
			p = rspamd_7zip_read_pack_info (task, p, end, arch);
			break;
		case kUnPackInfo:
			p = rspamd_7zip_read_coders_info (task, p, end, arch, &num_folders,
					&unknown_digests);
			break;
		case kSubStreamsInfo:
			p = rspamd_7zip_read_substreams_info (task, p, end, arch, num_folders,
					unknown_digests);
			break;
			break;
		case kEnd:
			goto end;
			break;
		default:
			p = NULL;
			msg_debug_task ("bad 7zip type: %c; %s", t, G_STRLOC);
			goto end;
			break;
		}
	}

end:
	return p;
}

static const guchar *
rspamd_7zip_read_archive_props (struct rspamd_task *task,
		const guchar *p, const guchar *end,
		struct rspamd_archive *arch)
{
	guchar proptype;
	guint64 proplen;

	/*
	 * for (;;)
	 * {
	 *   BYTE PropertyType;
	 *   if (aType == 0)
	 *     break;
	 *   UINT64 PropertySize;
	 *   BYTE PropertyData[PropertySize];
	 * }
	 */

	proptype = *p;
	SZ_SKIP_BYTES(1);

	if (p != NULL) {
		while (proptype != 0) {
			SZ_READ_VINT(proplen);

			if (p + proplen < end) {
				p += proplen;
			}
			else {
				return NULL;
			}

			proptype = *p;
			SZ_SKIP_BYTES(1);
		}
	}

	return p;
}

static GString *
rspamd_7zip_ucs2_to_utf8 (struct rspamd_task *task, const guchar *p,
		const guchar *end)
{
	GString *res;
	goffset dest_pos = 0, src_pos = 0;
	const gsize len = (end - p) / sizeof (guint16);
	guint16 *up;
	UChar32 wc;
	UBool is_error = 0;

	res = g_string_sized_new ((end - p) + sizeof (wc) * 2 + 1);
	up = (guint16 *)p;

	while (src_pos < len) {
		U16_NEXT (up, src_pos, len, wc);
		U8_APPEND (res->str, dest_pos, res->allocated_len, wc, is_error);

		if (is_error) {
			g_string_free (res, TRUE);

			return NULL;
		}
	}

	g_assert (dest_pos < res->allocated_len);

	res->len = dest_pos;
	res->str[dest_pos] = '\0';

	return res;
}

static const guchar *
rspamd_7zip_read_files_info (struct rspamd_task *task,
		const guchar *p, const guchar *end,
		struct rspamd_archive *arch)
{
	guint64 nfiles = 0, sz, i;
	guchar t, b;
	struct rspamd_archive_file *fentry;

	SZ_READ_VINT (nfiles);

	for (;p != NULL && p < end;) {
		t = *p;
		SZ_SKIP_BYTES (1);

		if (t == kEnd) {
			goto end;
		}

		/* This is SO SPECIAL, gah */
		SZ_READ_VINT (sz);

		switch (t) {
		case kEmptyStream:
		case kEmptyFile:
		case kAnti: /* AntiFile, OMFG */
			/* We don't care about these bits */
		case kCTime:
		case kATime:
		case kMTime:
			/* We don't care of these guys, but we still have to parse them, gah */
			if (sz == 0) {
				goto end;
			}
			else {
				SZ_SKIP_BYTES (sz);
			}
			break;
		case kName:
			/* The most useful part in this whole bloody format */
			b = *p; /* External flag */
			SZ_SKIP_BYTES (1);

			if (b) {
				/* TODO: for the god sake, do something about external
				 * filenames...
				 */
				guint64 tmp;

				SZ_READ_VINT (tmp);
			}
			else {
				for (i = 0; i < nfiles; i ++) {
					/* Zero terminated wchar_t: happy converting... */
					/* First, find terminator */
					const guchar *fend = NULL, *tp = p;
					GString *res;

					while (tp < end - 1) {
						if (*tp == 0 && *(tp + 1) == 0) {
							fend = tp;
							break;
						}

						tp += 2;
					}

					if (fend == NULL || fend - p == 0) {
						/* Crap instead of fname */
						msg_debug_task ("bad 7zip name; %s", G_STRLOC);
					}

					res = rspamd_7zip_ucs2_to_utf8 (task, p, fend);

					if (res != NULL) {
						fentry = g_malloc0 (sizeof (fentry));
						fentry->fname = res;
						g_ptr_array_add (arch->files, fentry);
					}
					else {
						msg_debug_task ("bad 7zip name; %s", G_STRLOC);
					}
					/* Skip zero terminating character */
					p = fend + 2;
				}
			}
			break;
		case kDummy:
		case kWinAttributes:
			if (sz == 0) {
				goto end;
			}
			else {
				SZ_SKIP_BYTES (sz);
			}
			break;
		default:
			p = NULL;
			msg_debug_task ("bad 7zip type: %c; %s", t, G_STRLOC);
			goto end;
			break;
		}
	}

end:
	return p;
}

static const guchar *
rspamd_7zip_read_next_section (struct rspamd_task *task,
		const guchar *p, const guchar *end,
		struct rspamd_archive *arch)
{
	guchar t = *p;

	SZ_SKIP_BYTES(1);

	switch (t) {
	case kHeader:
		/* We just skip byte and go further */
		break;
	case kEncodedHeader:
		/*
		 * In fact, headers are just packed, but we assume it as
		 * encrypted to distinguish from the normal archives
		 */
		arch->flags |= RSPAMD_ARCHIVE_ENCRYPTED;
		p = NULL; /* Cannot get anything useful */
		break;
	case kArchiveProperties:
		p = rspamd_7zip_read_archive_props (task, p, end, arch);
		break;
	case kMainStreamsInfo:
		p = rspamd_7zip_read_main_streams_info (task, p, end, arch);
		break;
	case kAdditionalStreamsInfo:
		p = rspamd_7zip_read_main_streams_info (task, p, end, arch);
		break;
	case kFilesInfo:
		p = rspamd_7zip_read_files_info (task, p, end, arch);
		break;
	default:
		p = NULL;
		msg_debug_task ("bad 7zip type: %c; %s", t, G_STRLOC);
		break;
	}

	return p;
}

static void
rspamd_archive_process_7zip (struct rspamd_task *task,
		struct rspamd_mime_part *part)
{
	struct rspamd_archive *arch;
	const guchar *start, *p, *end;
	const guchar sz_magic[] = {'7', 'z', 0xBC, 0xAF, 0x27, 0x1C};
	guint64 section_offset = 0, section_length = 0;

	start = part->parsed_data.begin;
	p = start;
	end = p + part->parsed_data.len;

	if (end - p <= sizeof (guint64) + sizeof (guint32) ||
			memcmp (p, sz_magic, sizeof (sz_magic)) != 0) {
		msg_debug_task ("7z archive is invalid (no 7z magic)");

		return;
	}

	arch = rspamd_mempool_alloc0 (task->task_pool, sizeof (*arch));
	arch->files = g_ptr_array_new ();
	arch->type = RSPAMD_ARCHIVE_7ZIP;
	rspamd_mempool_add_destructor (task->task_pool, rspamd_archive_dtor,
			arch);

	/* Magic (6 bytes) + version (2 bytes) + crc32 (4 bytes) */
	p += sizeof (guint64) + sizeof (guint32);

	SZ_READ_UINT64(section_offset);
	SZ_READ_UINT64(section_length);

	if (end - p > sizeof (guint32)) {
		p += sizeof (guint32);
	}
	else {
		msg_debug_task ("7z archive is invalid (truncated crc)");

		return;
	}

	if (end - p > section_offset) {
		p += section_offset;
	}
	else {
		msg_debug_task ("7z archive is invalid (incorrect section offset)");

		return;
	}

	while ((p = rspamd_7zip_read_next_section (task, p, end, arch)) != NULL);

	part->flags |= RSPAMD_MIME_PART_ARCHIVE;
	part->specific.arch = arch;
	if (part->cd != NULL) {
		arch->archive_name = &part->cd->filename;
	}
	arch->size = part->parsed_data.len;
}

static gboolean
rspamd_archive_cheat_detect (struct rspamd_mime_part *part, const gchar *str,
		const guchar *magic_start, gsize magic_len)
{
	struct rspamd_content_type *ct;
	const gchar *p;
	rspamd_ftok_t srch, *fname;

	ct = part->ct;
	RSPAMD_FTOK_ASSIGN (&srch, "application");

	if (ct && ct->type.len && ct->subtype.len > 0 && rspamd_ftok_cmp (&ct->type,
			&srch) == 0) {
		if (rspamd_substring_search_caseless (ct->subtype.begin, ct->subtype.len,
				str, strlen (str)) != -1) {
			/* We still need to check magic, see #1848 */
			if (magic_start != NULL) {
				if (part->parsed_data.len > magic_len &&
						memcmp (part->parsed_data.begin,
								magic_start, magic_len) == 0) {
					return TRUE;
				}
				/* No magic, refuse this type of archive */
				return FALSE;
			}
			else {
				return TRUE;
			}
		}
	}

	if (part->cd) {
		fname = &part->cd->filename;

		if (fname && fname->len > strlen (str)) {
			p = fname->begin + fname->len - strlen (str);

			if (rspamd_lc_cmp (p, str, strlen (str)) == 0) {
				if (*(p - 1) == '.') {
					if (magic_start != NULL) {
						if (part->parsed_data.len > magic_len &&
								memcmp (part->parsed_data.begin,
										magic_start, magic_len) == 0) {
							return TRUE;
						}
						/* No magic, refuse this type of archive */
						return FALSE;
					}

					return TRUE;
				}
			}
		}

		if (magic_start != NULL) {
			if (part->parsed_data.len > magic_len && memcmp (part->parsed_data.begin,
					magic_start, magic_len) == 0) {
				return TRUE;
			}
		}
	}

	return FALSE;
}

void
rspamd_archives_process (struct rspamd_task *task)
{
	guint i;
	struct rspamd_mime_part *part;
	const guchar rar_magic[] = {0x52, 0x61, 0x72, 0x21, 0x1A, 0x07};
	const guchar zip_magic[] = {0x50, 0x4b, 0x03, 0x04};
	const guchar sz_magic[] = {'7', 'z', 0xBC, 0xAF, 0x27, 0x1C};

	for (i = 0; i < task->parts->len; i ++) {
		part = g_ptr_array_index (task->parts, i);

		if (!(part->flags & (RSPAMD_MIME_PART_TEXT|RSPAMD_MIME_PART_IMAGE))) {
			if (part->parsed_data.len > 0) {
				if (rspamd_archive_cheat_detect (part, "zip",
						zip_magic, sizeof (zip_magic))) {
					rspamd_archive_process_zip (task, part);
				} else if (rspamd_archive_cheat_detect (part, "rar",
						rar_magic, sizeof (rar_magic))) {
					rspamd_archive_process_rar (task, part);
				} else if (rspamd_archive_cheat_detect (part, "7z",
						sz_magic, sizeof (sz_magic))) {
					rspamd_archive_process_7zip (task, part);
				}

				if (IS_CT_TEXT (part->ct) &&
						(part->flags & RSPAMD_MIME_PART_ARCHIVE)) {
					msg_info_task ("found archive with incorrect content-type: %T/%T",
							&part->ct->type, &part->ct->subtype);
					part->ct->flags |= RSPAMD_CONTENT_TYPE_BROKEN;
				}
			}
		}
	}
}


const gchar *
rspamd_archive_type_str (enum rspamd_archive_type type)
{
	const gchar *ret = "unknown";

	switch (type) {
	case RSPAMD_ARCHIVE_ZIP:
		ret = "zip";
		break;
	case RSPAMD_ARCHIVE_RAR:
		ret = "rar";
		break;
	case RSPAMD_ARCHIVE_7ZIP:
		ret = "7z";
		break;
	}

	return ret;
}