1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095 |
- /*
- * Copyright 2024 Vsevolod Stakhov
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- #include "config.h"
- #include "message.h"
- #include "task.h"
- #include "archives.h"
- #include "libmime/mime_encoding.h"
- #include <unicode/uchar.h>
- #include <unicode/utf8.h>
- #include <unicode/utf16.h>
- #include <unicode/ucnv.h>
-
- #include <archive.h>
- #include <archive_entry.h>
-
- #define msg_debug_archive(...) rspamd_conditional_debug_fast(NULL, NULL, \
- rspamd_archive_log_id, "archive", task->task_pool->tag.uid, \
- G_STRFUNC, \
- __VA_ARGS__)
-
- INIT_LOG_MODULE(archive)
-
- static void
- rspamd_archive_dtor(gpointer p)
- {
- struct rspamd_archive *arch = p;
- struct rspamd_archive_file *f;
- unsigned int i;
-
- for (i = 0; i < arch->files->len; i++) {
- f = g_ptr_array_index(arch->files, i);
-
- if (f->fname) {
- g_string_free(f->fname, TRUE);
- }
-
- g_free(f);
- }
-
- g_ptr_array_free(arch->files, TRUE);
- }
-
- static bool
- rspamd_archive_file_try_utf(struct rspamd_task *task,
- struct rspamd_archive *arch,
- struct rspamd_archive_file *fentry,
- const char *in, gsize inlen)
- {
- const char *charset = NULL, *p, *end;
- GString *res;
-
- charset = rspamd_mime_charset_find_by_content(in, inlen, TRUE);
-
- if (charset) {
- UChar *tmp;
- UErrorCode uc_err = U_ZERO_ERROR;
- int32_t r, clen, dlen;
- struct rspamd_charset_converter *conv;
- UConverter *utf8_converter;
-
- conv = rspamd_mime_get_converter_cached(charset, task->task_pool,
- TRUE, &uc_err);
- utf8_converter = rspamd_get_utf8_converter();
-
- if (conv == NULL) {
- msg_info_task("cannot open converter for %s: %s",
- charset, u_errorName(uc_err));
- fentry->flags |= RSPAMD_ARCHIVE_FILE_OBFUSCATED;
- fentry->fname = g_string_new_len(in, inlen);
-
- return false;
- }
-
- tmp = g_malloc(sizeof(*tmp) * (inlen + 1));
- r = rspamd_converter_to_uchars(conv, tmp, inlen + 1,
- in, inlen, &uc_err);
- if (!U_SUCCESS(uc_err)) {
- msg_info_task("cannot convert data to unicode from %s: %s",
- charset, u_errorName(uc_err));
- g_free(tmp);
-
- fentry->flags |= RSPAMD_ARCHIVE_FILE_OBFUSCATED;
- fentry->fname = g_string_new_len(in, inlen);
-
- return NULL;
- }
-
- int i = 0;
-
- while (i < r) {
- UChar32 uc;
-
- U16_NEXT(tmp, i, r, uc);
-
- if (IS_ZERO_WIDTH_SPACE(uc) || u_iscntrl(uc)) {
- msg_info_task("control character in archive file name found: 0x%02xd "
- "(filename=%T)",
- uc, arch->archive_name);
- fentry->flags |= RSPAMD_ARCHIVE_FILE_OBFUSCATED;
- break;
- }
- }
-
- clen = ucnv_getMaxCharSize(utf8_converter);
- dlen = UCNV_GET_MAX_BYTES_FOR_STRING(r, clen);
- res = g_string_sized_new(dlen);
- r = ucnv_fromUChars(utf8_converter, res->str, dlen, tmp, r, &uc_err);
-
- if (!U_SUCCESS(uc_err)) {
- msg_info_task("cannot convert data from unicode from %s: %s",
- charset, u_errorName(uc_err));
- g_free(tmp);
- g_string_free(res, TRUE);
- fentry->flags |= RSPAMD_ARCHIVE_FILE_OBFUSCATED;
- fentry->fname = g_string_new_len(in, inlen);
-
- return NULL;
- }
-
- g_free(tmp);
- res->len = r;
-
- msg_debug_archive("converted from %s to UTF-8 inlen: %z, outlen: %d",
- charset, inlen, r);
- fentry->fname = res;
- }
- else {
- /* Convert unsafe characters to '?' */
- res = g_string_sized_new(inlen);
- p = in;
- end = in + inlen;
-
- while (p < end) {
- if (g_ascii_isgraph(*p)) {
- g_string_append_c(res, *p);
- }
- else {
- g_string_append_c(res, '?');
-
- if (*p < 0x7f && (g_ascii_iscntrl(*p) || *p == '\0')) {
- if (!(fentry->flags & RSPAMD_ARCHIVE_FILE_OBFUSCATED)) {
- msg_info_task("suspicious character in archive file name found: 0x%02xd "
- "(filename=%T)",
- (int) *p, arch->archive_name);
- fentry->flags |= RSPAMD_ARCHIVE_FILE_OBFUSCATED;
- }
- }
- }
-
- p++;
- }
- fentry->fname = res;
- }
-
- return true;
- }
-
- static void
- rspamd_archive_process_zip(struct rspamd_task *task,
- struct rspamd_mime_part *part)
- {
- const unsigned char *p, *start, *end, *eocd = NULL, *cd;
- const uint32_t eocd_magic = 0x06054b50, cd_basic_len = 46;
- const unsigned char cd_magic[] = {0x50, 0x4b, 0x01, 0x02};
- const unsigned int max_processed = 1024;
- uint32_t cd_offset, cd_size, comp_size, uncomp_size, processed = 0;
- uint16_t extra_len, fname_len, comment_len;
- struct rspamd_archive *arch;
- struct rspamd_archive_file *f = NULL;
-
- /* Zip files have interesting data at the end of archive */
- p = part->parsed_data.begin + part->parsed_data.len - 1;
- start = part->parsed_data.begin;
- end = p;
-
- /* Search for EOCD:
- * 22 bytes is a typical size of eocd without a comment and
- * end points one byte after the last character
- */
- p -= 21;
-
- while (p > start + sizeof(uint32_t)) {
- uint32_t t;
-
- if (processed > max_processed) {
- break;
- }
-
- /* XXX: not an efficient approach */
- memcpy(&t, p, sizeof(t));
-
- if (GUINT32_FROM_LE(t) == eocd_magic) {
- eocd = p;
- break;
- }
-
- p--;
- processed++;
- }
-
-
- if (eocd == NULL) {
- /* Not a zip file */
- msg_info_task("zip archive is invalid (no EOCD)");
-
- return;
- }
-
- if (end - eocd < 21) {
- msg_info_task("zip archive is invalid (short EOCD)");
-
- return;
- }
-
-
- memcpy(&cd_size, eocd + 12, sizeof(cd_size));
- cd_size = GUINT32_FROM_LE(cd_size);
- memcpy(&cd_offset, eocd + 16, sizeof(cd_offset));
- cd_offset = GUINT32_FROM_LE(cd_offset);
-
- /* We need to check sanity as well */
- if (cd_offset + cd_size > (unsigned int) (eocd - start)) {
- msg_info_task("zip archive is invalid (bad size/offset for CD)");
-
- return;
- }
-
- cd = start + cd_offset;
-
- arch = rspamd_mempool_alloc0(task->task_pool, sizeof(*arch));
- arch->files = g_ptr_array_new();
- arch->type = RSPAMD_ARCHIVE_ZIP;
- if (part->cd) {
- arch->archive_name = &part->cd->filename;
- }
- rspamd_mempool_add_destructor(task->task_pool, rspamd_archive_dtor,
- arch);
-
- while (cd < start + cd_offset + cd_size) {
- uint16_t flags;
-
- /* Read central directory record */
- if (eocd - cd < cd_basic_len ||
- memcmp(cd, cd_magic, sizeof(cd_magic)) != 0) {
- msg_info_task("zip archive is invalid (bad cd record)");
-
- return;
- }
-
- memcpy(&flags, cd + 8, sizeof(uint16_t));
- flags = GUINT16_FROM_LE(flags);
- memcpy(&comp_size, cd + 20, sizeof(uint32_t));
- comp_size = GUINT32_FROM_LE(comp_size);
- memcpy(&uncomp_size, cd + 24, sizeof(uint32_t));
- uncomp_size = GUINT32_FROM_LE(uncomp_size);
- memcpy(&fname_len, cd + 28, sizeof(fname_len));
- fname_len = GUINT16_FROM_LE(fname_len);
- memcpy(&extra_len, cd + 30, sizeof(extra_len));
- extra_len = GUINT16_FROM_LE(extra_len);
- memcpy(&comment_len, cd + 32, sizeof(comment_len));
- comment_len = GUINT16_FROM_LE(comment_len);
-
- if (cd + fname_len + comment_len + extra_len + cd_basic_len > eocd) {
- msg_info_task("zip archive is invalid (too large cd record)");
-
- return;
- }
-
- f = g_malloc0(sizeof(*f));
- rspamd_archive_file_try_utf(task, arch, f, cd + cd_basic_len, fname_len);
-
- f->compressed_size = comp_size;
- f->uncompressed_size = uncomp_size;
-
- if (flags & 0x41u) {
- f->flags |= RSPAMD_ARCHIVE_FILE_ENCRYPTED;
- }
-
- if (f->fname) {
- if (f->flags & RSPAMD_ARCHIVE_FILE_OBFUSCATED) {
- arch->flags |= RSPAMD_ARCHIVE_HAS_OBFUSCATED_FILES;
- }
-
- g_ptr_array_add(arch->files, f);
- msg_debug_archive("found file in zip archive: %v", f->fname);
- }
- else {
- g_free(f);
-
- return;
- }
-
- /* Process extra fields */
- const unsigned char *extra = cd + fname_len + cd_basic_len;
- p = extra;
-
- while (p + sizeof(uint16_t) * 2 < extra + extra_len) {
- uint16_t hid, hlen;
-
- memcpy(&hid, p, sizeof(uint16_t));
- hid = GUINT16_FROM_LE(hid);
- memcpy(&hlen, p + sizeof(uint16_t), sizeof(uint16_t));
- hlen = GUINT16_FROM_LE(hlen);
-
- if (hid == 0x0017) {
- f->flags |= RSPAMD_ARCHIVE_FILE_ENCRYPTED;
- }
-
- p += hlen + sizeof(uint16_t) * 2;
- }
-
- cd += fname_len + comment_len + extra_len + cd_basic_len;
- }
-
- part->part_type = RSPAMD_MIME_PART_ARCHIVE;
- part->specific.arch = arch;
-
- arch->size = part->parsed_data.len;
- }
-
- static inline int
- rspamd_archive_rar_read_vint(const unsigned char *start, gsize remain, uint64_t *res)
- {
- /*
- * From http://www.rarlab.com/technote.htm:
- * Variable length integer. Can include one or more bytes, where
- * lower 7 bits of every byte contain integer data and highest bit
- * in every byte is the continuation flag.
- * If highest bit is 0, this is the last byte in sequence.
- * So first byte contains 7 least significant bits of integer and
- * continuation flag. Second byte, if present, contains next 7 bits and so on.
- */
- uint64_t t = 0;
- unsigned int shift = 0;
- const unsigned char *p = start;
-
- while (remain > 0 && shift <= 57) {
- if (*p & 0x80) {
- t |= ((uint64_t) (*p & 0x7f)) << shift;
- }
- else {
- t |= ((uint64_t) (*p & 0x7f)) << shift;
- p++;
- break;
- }
-
- shift += 7;
- p++;
- remain--;
- }
-
- if (remain == 0 || shift > 64) {
- return -1;
- }
-
- *res = GUINT64_FROM_LE(t);
-
- return p - start;
- }
-
- #define RAR_SKIP_BYTES(n) \
- do { \
- if ((n) <= 0) { \
- msg_debug_archive("rar archive is invalid (bad skip value)"); \
- return; \
- } \
- if ((gsize) (end - p) < (n)) { \
- msg_debug_archive("rar archive is invalid (truncated)"); \
- return; \
- } \
- p += (n); \
- } while (0)
-
- #define RAR_READ_VINT() \
- do { \
- r = rspamd_archive_rar_read_vint(p, end - p, &vint); \
- if (r == -1) { \
- msg_debug_archive("rar archive is invalid (bad vint)"); \
- return; \
- } \
- else if (r == 0) { \
- msg_debug_archive("rar archive is invalid (BAD vint offset)"); \
- return; \
- } \
- } while (0)
-
- #define RAR_READ_VINT_SKIP() \
- do { \
- r = rspamd_archive_rar_read_vint(p, end - p, &vint); \
- if (r == -1) { \
- msg_debug_archive("rar archive is invalid (bad vint)"); \
- return; \
- } \
- p += r; \
- } while (0)
-
- #define RAR_READ_UINT16(n) \
- do { \
- if (end - p < (glong) sizeof(uint16_t)) { \
- msg_debug_archive("rar archive is invalid (bad int16)"); \
- return; \
- } \
- n = p[0] + (p[1] << 8); \
- p += sizeof(uint16_t); \
- } while (0)
-
- #define RAR_READ_UINT32(n) \
- do { \
- if (end - p < (glong) sizeof(uint32_t)) { \
- msg_debug_archive("rar archive is invalid (bad int32)"); \
- return; \
- } \
- n = (unsigned int) p[0] + ((unsigned int) p[1] << 8) + ((unsigned int) p[2] << 16) + ((unsigned int) p[3] << 24); \
- p += sizeof(uint32_t); \
- } while (0)
-
- static void
- rspamd_archive_process_rar_v4(struct rspamd_task *task, const unsigned char *start,
- const unsigned char *end, struct rspamd_mime_part *part)
- {
- const unsigned char *p = start, *start_section;
- uint8_t type;
- unsigned int flags;
- uint64_t sz, comp_sz = 0, uncomp_sz = 0;
- struct rspamd_archive *arch;
- struct rspamd_archive_file *f;
-
- arch = rspamd_mempool_alloc0(task->task_pool, sizeof(*arch));
- arch->files = g_ptr_array_new();
- arch->type = RSPAMD_ARCHIVE_RAR;
- if (part->cd) {
- arch->archive_name = &part->cd->filename;
- }
- rspamd_mempool_add_destructor(task->task_pool, rspamd_archive_dtor,
- arch);
-
- while (p < end) {
- /* Crc16 */
- start_section = p;
- RAR_SKIP_BYTES(sizeof(uint16_t));
- type = *p;
- p++;
- RAR_READ_UINT16(flags);
-
- if (type == 0x73) {
- /* Main header, check for encryption */
- if (flags & 0x80) {
- arch->flags |= RSPAMD_ARCHIVE_ENCRYPTED;
- goto end;
- }
- }
-
- RAR_READ_UINT16(sz);
-
- if (flags & 0x8000) {
- /* We also need to read ADD_SIZE element */
- uint32_t tmp;
-
- RAR_READ_UINT32(tmp);
- sz += tmp;
- /* This is also used as PACK_SIZE */
- comp_sz = tmp;
- }
-
- if (sz == 0) {
- /* Zero sized block - error */
- msg_debug_archive("rar archive is invalid (zero size block)");
-
- return;
- }
-
- if (type == 0x74) {
- unsigned int fname_len;
-
- /* File header */
- /* Uncompressed size */
- RAR_READ_UINT32(uncomp_sz);
- /* Skip to NAME_SIZE element */
- RAR_SKIP_BYTES(11);
- RAR_READ_UINT16(fname_len);
-
- if (fname_len == 0 || fname_len > (gsize) (end - p)) {
- msg_debug_archive("rar archive is invalid (bad filename size: %d)",
- fname_len);
-
- return;
- }
-
- /* Attrs */
- RAR_SKIP_BYTES(4);
-
- if (flags & 0x100) {
- /* We also need to read HIGH_PACK_SIZE */
- uint32_t tmp;
-
- RAR_READ_UINT32(tmp);
- sz += tmp;
- comp_sz += tmp;
- /* HIGH_UNP_SIZE */
- RAR_READ_UINT32(tmp);
- uncomp_sz += tmp;
- }
-
- f = g_malloc0(sizeof(*f));
-
- if (flags & 0x200) {
- /* We have unicode + normal version */
- unsigned char *tmp;
-
- tmp = memchr(p, '\0', fname_len);
-
- if (tmp != NULL) {
- /* Just use ASCII version */
- rspamd_archive_file_try_utf(task, arch, f, p, tmp - p);
- msg_debug_archive("found ascii filename in rarv4 archive: %v",
- f->fname);
- }
- else {
- /* We have UTF8 filename, use it as is */
- rspamd_archive_file_try_utf(task, arch, f, p, fname_len);
- msg_debug_archive("found utf filename in rarv4 archive: %v",
- f->fname);
- }
- }
- else {
- rspamd_archive_file_try_utf(task, arch, f, p, fname_len);
- msg_debug_archive("found ascii (old) filename in rarv4 archive: %v",
- f->fname);
- }
-
- f->compressed_size = comp_sz;
- f->uncompressed_size = uncomp_sz;
-
- if (flags & 0x4) {
- f->flags |= RSPAMD_ARCHIVE_FILE_ENCRYPTED;
- }
-
- if (f->fname) {
- if (f->flags & RSPAMD_ARCHIVE_FILE_OBFUSCATED) {
- arch->flags |= RSPAMD_ARCHIVE_HAS_OBFUSCATED_FILES;
- }
- g_ptr_array_add(arch->files, f);
- }
- else {
- g_free(f);
- }
- }
-
- p = start_section;
- RAR_SKIP_BYTES(sz);
- }
-
- end:
- part->part_type = RSPAMD_MIME_PART_ARCHIVE;
- part->specific.arch = arch;
- arch->size = part->parsed_data.len;
- }
-
- static void
- rspamd_archive_process_rar(struct rspamd_task *task,
- struct rspamd_mime_part *part)
- {
- const unsigned char *p, *end, *section_start;
- const unsigned char rar_v5_magic[] = {0x52, 0x61, 0x72, 0x21, 0x1A, 0x07, 0x01, 0x00},
- rar_v4_magic[] = {0x52, 0x61, 0x72, 0x21, 0x1A, 0x07, 0x00};
- const unsigned int rar_encrypted_header = 4, rar_main_header = 1,
- rar_file_header = 2;
- uint64_t vint, sz, comp_sz = 0, uncomp_sz = 0, flags = 0, type = 0,
- extra_sz = 0;
- struct rspamd_archive *arch;
- struct rspamd_archive_file *f;
- int r;
-
- p = part->parsed_data.begin;
- end = p + part->parsed_data.len;
-
- if ((gsize) (end - p) <= sizeof(rar_v5_magic)) {
- msg_debug_archive("rar archive is invalid (too small)");
-
- return;
- }
-
- if (memcmp(p, rar_v5_magic, sizeof(rar_v5_magic)) == 0) {
- p += sizeof(rar_v5_magic);
- }
- else if (memcmp(p, rar_v4_magic, sizeof(rar_v4_magic)) == 0) {
- p += sizeof(rar_v4_magic);
-
- rspamd_archive_process_rar_v4(task, p, end, part);
- return;
- }
- else {
- msg_debug_archive("rar archive is invalid (no rar magic)");
-
- return;
- }
-
- /* Rar v5 format */
- arch = rspamd_mempool_alloc0(task->task_pool, sizeof(*arch));
- arch->files = g_ptr_array_new();
- arch->type = RSPAMD_ARCHIVE_RAR;
- if (part->cd) {
- arch->archive_name = &part->cd->filename;
- }
- rspamd_mempool_add_destructor(task->task_pool, rspamd_archive_dtor,
- arch);
-
- /* Now we can have either encryption header or archive header */
- /* Crc 32 */
- RAR_SKIP_BYTES(sizeof(uint32_t));
- /* Size */
- RAR_READ_VINT_SKIP();
- sz = vint;
- /* Type */
- section_start = p;
- RAR_READ_VINT_SKIP();
- type = vint;
- /* Header flags */
- RAR_READ_VINT_SKIP();
- flags = vint;
-
- if (flags & 0x1) {
- /* Have extra zone */
- RAR_READ_VINT_SKIP();
- }
- if (flags & 0x2) {
- /* Data zone is presented */
- RAR_READ_VINT_SKIP();
- sz += vint;
- }
-
- if (type == rar_encrypted_header) {
- /* We can't read any further information as archive is encrypted */
- arch->flags |= RSPAMD_ARCHIVE_ENCRYPTED;
- goto end;
- }
- else if (type != rar_main_header) {
- msg_debug_archive("rar archive is invalid (bad main header)");
-
- return;
- }
-
- /* Nothing useful in main header */
- p = section_start;
- RAR_SKIP_BYTES(sz);
-
- while (p < end) {
- gboolean has_extra = FALSE;
- /* Read the next header */
- /* Crc 32 */
- RAR_SKIP_BYTES(sizeof(uint32_t));
- /* Size */
- RAR_READ_VINT_SKIP();
-
- sz = vint;
- if (sz == 0) {
- /* Zero sized block - error */
- msg_debug_archive("rar archive is invalid (zero size block)");
-
- return;
- }
-
- section_start = p;
- /* Type */
- RAR_READ_VINT_SKIP();
- type = vint;
- /* Header flags */
- RAR_READ_VINT_SKIP();
- flags = vint;
-
- if (flags & 0x1) {
- /* Have extra zone */
- RAR_READ_VINT_SKIP();
- extra_sz = vint;
- has_extra = TRUE;
- }
-
- if (flags & 0x2) {
- /* Data zone is presented */
- RAR_READ_VINT_SKIP();
- sz += vint;
- comp_sz = vint;
- }
-
- if (type != rar_file_header) {
- p = section_start;
- RAR_SKIP_BYTES(sz);
- }
- else {
- /* We have a file header, go forward */
- uint64_t fname_len;
- bool is_directory = false;
-
- /* File header specific flags */
- RAR_READ_VINT_SKIP();
- flags = vint;
-
- /* Unpacked size */
- RAR_READ_VINT_SKIP();
- uncomp_sz = vint;
- /* Attributes */
- RAR_READ_VINT_SKIP();
-
- if (flags & 0x2) {
- /* Unix mtime */
- RAR_SKIP_BYTES(sizeof(uint32_t));
- }
- if (flags & 0x4) {
- /* Crc32 */
- RAR_SKIP_BYTES(sizeof(uint32_t));
- }
- if (flags & 0x1) {
- /* Ignore directories for sanity purposes */
- is_directory = true;
- msg_debug_archive("skip directory record in a rar archive");
- }
-
- if (!is_directory) {
- /* Compression */
- RAR_READ_VINT_SKIP();
- /* Host OS */
- RAR_READ_VINT_SKIP();
- /* Filename length (finally!) */
- RAR_READ_VINT_SKIP();
- fname_len = vint;
-
- if (fname_len == 0 || fname_len > (gsize) (end - p)) {
- msg_debug_archive("rar archive is invalid (bad filename size)");
-
- return;
- }
-
- f = g_malloc0(sizeof(*f));
- f->uncompressed_size = uncomp_sz;
- f->compressed_size = comp_sz;
- rspamd_archive_file_try_utf(task, arch, f, p, fname_len);
-
- if (f->fname) {
- msg_debug_archive("added rarv5 file: %v", f->fname);
- g_ptr_array_add(arch->files, f);
- if (f->flags & RSPAMD_ARCHIVE_FILE_OBFUSCATED) {
- arch->flags |= RSPAMD_ARCHIVE_HAS_OBFUSCATED_FILES;
- }
- }
- else {
- g_free(f);
- f = NULL;
- }
-
- if (f && has_extra && extra_sz > 0 &&
- p + fname_len + extra_sz < end) {
- /* Try to find encryption record in extra field */
- const unsigned char *ex = p + fname_len;
-
- while (ex < p + extra_sz) {
- const unsigned char *t;
- int64_t cur_sz = 0, sec_type = 0;
-
- r = rspamd_archive_rar_read_vint(ex, extra_sz, &cur_sz);
- if (r == -1) {
- msg_debug_archive("rar archive is invalid (bad vint)");
- return;
- }
-
- t = ex + r;
-
- r = rspamd_archive_rar_read_vint(t, extra_sz - r, &sec_type);
- if (r == -1) {
- msg_debug_archive("rar archive is invalid (bad vint)");
- return;
- }
-
- if (sec_type == 0x01) {
- f->flags |= RSPAMD_ARCHIVE_FILE_ENCRYPTED;
- arch->flags |= RSPAMD_ARCHIVE_ENCRYPTED;
- break;
- }
-
- ex += cur_sz;
- }
- }
- }
-
- /* Restore p to the beginning of the header */
- p = section_start;
- RAR_SKIP_BYTES(sz);
- }
- }
-
- end:
- part->part_type = RSPAMD_MIME_PART_ARCHIVE;
- part->specific.arch = arch;
- arch->size = part->parsed_data.len;
- }
-
- static inline int
- rspamd_archive_7zip_read_vint(const unsigned char *start, gsize remain, uint64_t *res)
- {
- /*
- * REAL_UINT64 means real UINT64.
- * UINT64 means real UINT64 encoded with the following scheme:
- *
- * Size of encoding sequence depends from first byte:
- * First_Byte Extra_Bytes Value
- * (binary)
- * 0xxxxxxx : ( xxxxxxx )
- * 10xxxxxx BYTE y[1] : ( xxxxxx << (8 * 1)) + y
- * 110xxxxx BYTE y[2] : ( xxxxx << (8 * 2)) + y
- * ...
- * 1111110x BYTE y[6] : ( x << (8 * 6)) + y
- * 11111110 BYTE y[7] : y
- * 11111111 BYTE y[8] : y
- */
- unsigned char t;
-
- if (remain == 0) {
- return -1;
- }
-
- t = *start;
-
- if (!isset(&t, 7)) {
- /* Trivial case */
- *res = t;
- return 1;
- }
- else if (t == 0xFF) {
- if (remain >= sizeof(uint64_t) + 1) {
- memcpy(res, start + 1, sizeof(uint64_t));
- *res = GUINT64_FROM_LE(*res);
-
- return sizeof(uint64_t) + 1;
- }
- }
- else {
- int cur_bit = 6, intlen = 1;
- const unsigned char bmask = 0xFF;
- uint64_t tgt;
-
- while (cur_bit > 0) {
- if (!isset(&t, cur_bit)) {
- if (remain >= intlen + 1) {
- memcpy(&tgt, start + 1, intlen);
- tgt = GUINT64_FROM_LE(tgt);
- /* Shift back */
- tgt >>= sizeof(tgt) - NBBY * intlen;
- /* Add masked value */
- tgt += (uint64_t) (t & (bmask >> (NBBY - cur_bit)))
- << (NBBY * intlen);
- *res = tgt;
-
- return intlen + 1;
- }
- }
- cur_bit--;
- intlen++;
- }
- }
-
- return -1;
- }
-
- #define SZ_READ_VINT_SKIP() \
- do { \
- r = rspamd_archive_7zip_read_vint(p, end - p, &vint); \
- if (r == -1) { \
- msg_debug_archive("7z archive is invalid (bad vint)"); \
- return; \
- } \
- p += r; \
- } while (0)
- #define SZ_READ_VINT(var) \
- do { \
- int r; \
- r = rspamd_archive_7zip_read_vint(p, end - p, &(var)); \
- if (r == -1) { \
- msg_debug_archive("7z archive is invalid (bad vint): %s", G_STRLOC); \
- return NULL; \
- } \
- p += r; \
- } while (0)
-
- #define SZ_READ_UINT64(n) \
- do { \
- if (end - p < (goffset) sizeof(uint64_t)) { \
- msg_debug_archive("7zip archive is invalid (bad uint64): %s", G_STRLOC); \
- return; \
- } \
- memcpy(&(n), p, sizeof(uint64_t)); \
- n = GUINT64_FROM_LE(n); \
- p += sizeof(uint64_t); \
- } while (0)
- #define SZ_SKIP_BYTES(n) \
- do { \
- if (end - p >= (n)) { \
- p += (n); \
- } \
- else { \
- msg_debug_archive("7zip archive is invalid (truncated); wanted to read %d bytes, %d avail: %s", (int) (n), (int) (end - p), G_STRLOC); \
- return NULL; \
- } \
- } while (0)
-
- enum rspamd_7zip_header_mark {
- kEnd = 0x00,
- kHeader = 0x01,
- kArchiveProperties = 0x02,
- kAdditionalStreamsInfo = 0x03,
- kMainStreamsInfo = 0x04,
- kFilesInfo = 0x05,
- kPackInfo = 0x06,
- kUnPackInfo = 0x07,
- kSubStreamsInfo = 0x08,
- kSize = 0x09,
- kCRC = 0x0A,
- kFolder = 0x0B,
- kCodersUnPackSize = 0x0C,
- kNumUnPackStream = 0x0D,
- kEmptyStream = 0x0E,
- kEmptyFile = 0x0F,
- kAnti = 0x10,
- kName = 0x11,
- kCTime = 0x12,
- kATime = 0x13,
- kMTime = 0x14,
- kWinAttributes = 0x15,
- kComment = 0x16,
- kEncodedHeader = 0x17,
- kStartPos = 0x18,
- kDummy = 0x19,
- };
-
-
- #define _7Z_CRYPTO_MAIN_ZIP 0x06F10101 /* Main Zip crypto algo */
- #define _7Z_CRYPTO_RAR_29 0x06F10303 /* Rar29 AES-128 + (modified SHA-1) */
- #define _7Z_CRYPTO_AES_256_SHA_256 0x06F10701 /* AES-256 + SHA-256 */
-
- #define IS_SZ_ENCRYPTED(codec_id) (((codec_id) == _7Z_CRYPTO_MAIN_ZIP) || \
- ((codec_id) == _7Z_CRYPTO_RAR_29) || \
- ((codec_id) == _7Z_CRYPTO_AES_256_SHA_256))
-
- static const unsigned char *
- rspamd_7zip_read_bits(struct rspamd_task *task,
- const unsigned char *p, const unsigned char *end,
- struct rspamd_archive *arch, unsigned int nbits,
- unsigned int *pbits_set)
- {
- unsigned mask = 0, avail = 0, i;
- gboolean bit_set = 0;
-
- for (i = 0; i < nbits; i++) {
- if (mask == 0) {
- avail = *p;
- SZ_SKIP_BYTES(1);
- mask = 0x80;
- }
-
- bit_set = (avail & mask) ? 1 : 0;
-
- if (bit_set && pbits_set) {
- (*pbits_set)++;
- }
-
- mask >>= 1;
- }
-
- return p;
- }
-
- static const unsigned char *
- rspamd_7zip_read_digest(struct rspamd_task *task,
- const unsigned char *p, const unsigned char *end,
- struct rspamd_archive *arch,
- uint64_t num_streams,
- unsigned int *pdigest_read)
- {
- unsigned char all_defined = *p;
- uint64_t i;
- unsigned int num_defined = 0;
- /*
- * BYTE AllAreDefined
- * if (AllAreDefined == 0)
- * {
- * for(NumStreams)
- * BIT Defined
- * }
- * UINT32 CRCs[NumDefined]
- */
- SZ_SKIP_BYTES(1);
-
- if (all_defined) {
- num_defined = num_streams;
- }
- else {
- if (num_streams > 8192) {
- /* Gah */
- return NULL;
- }
-
- p = rspamd_7zip_read_bits(task, p, end, arch, num_streams, &num_defined);
-
- if (p == NULL) {
- return NULL;
- }
- }
-
- for (i = 0; i < num_defined; i++) {
- SZ_SKIP_BYTES(sizeof(uint32_t));
- }
-
- if (pdigest_read) {
- *pdigest_read = num_defined;
- }
-
- return p;
- }
-
- static const unsigned char *
- rspamd_7zip_read_pack_info(struct rspamd_task *task,
- const unsigned char *p, const unsigned char *end,
- struct rspamd_archive *arch)
- {
- uint64_t pack_pos = 0, pack_streams = 0, i, cur_sz;
- unsigned int num_digests = 0;
- unsigned char t;
- /*
- * UINT64 PackPos
- * UINT64 NumPackStreams
- *
- * []
- * BYTE NID::kSize (0x09)
- * UINT64 PackSizes[NumPackStreams]
- * []
- *
- * []
- * BYTE NID::kCRC (0x0A)
- * PackStreamDigests[NumPackStreams]
- * []
- * BYTE NID::kEnd
- */
-
- SZ_READ_VINT(pack_pos);
- SZ_READ_VINT(pack_streams);
-
- while (p != NULL && p < end) {
- t = *p;
- SZ_SKIP_BYTES(1);
- msg_debug_archive("7zip: read pack info %xc", t);
-
- switch (t) {
- case kSize:
- /* We need to skip pack_streams VINTS */
- for (i = 0; i < pack_streams; i++) {
- SZ_READ_VINT(cur_sz);
- }
- break;
- case kCRC:
- /* CRCs are more complicated */
- p = rspamd_7zip_read_digest(task, p, end, arch, pack_streams,
- &num_digests);
- break;
- case kEnd:
- goto end;
- break;
- default:
- p = NULL;
- msg_debug_archive("bad 7zip type: %xc; %s", t, G_STRLOC);
- goto end;
- break;
- }
- }
-
- end:
-
- return p;
- }
-
- static const unsigned char *
- rspamd_7zip_read_folder(struct rspamd_task *task,
- const unsigned char *p, const unsigned char *end,
- struct rspamd_archive *arch, unsigned int *pnstreams, unsigned int *ndigests)
- {
- uint64_t ncoders = 0, i, j, noutstreams = 0, ninstreams = 0;
-
- SZ_READ_VINT(ncoders);
-
- for (i = 0; i < ncoders && p != NULL && p < end; i++) {
- uint64_t sz, tmp;
- unsigned char t;
- /*
- * BYTE
- * {
- * 0:3 CodecIdSize
- * 4: Is Complex Coder
- * 5: There Are Attributes
- * 6: Reserved
- * 7: There are more alternative methods. (Not used anymore, must be 0).
- * }
- * BYTE CodecId[CodecIdSize]
- * if (Is Complex Coder)
- * {
- * UINT64 NumInStreams;
- * UINT64 NumOutStreams;
- * }
- * if (There Are Attributes)
- * {
- * UINT64 PropertiesSize
- * BYTE Properties[PropertiesSize]
- * }
- */
- t = *p;
- SZ_SKIP_BYTES(1);
- sz = t & 0xF;
- /* Codec ID */
- tmp = 0;
- for (j = 0; j < sz; j++) {
- tmp <<= 8;
- tmp += p[j];
- }
-
- msg_debug_archive("7zip: read codec id: %L", tmp);
-
- if (IS_SZ_ENCRYPTED(tmp)) {
- msg_debug_archive("7zip: encrypted codec: %L", tmp);
- arch->flags |= RSPAMD_ARCHIVE_ENCRYPTED;
- }
-
- SZ_SKIP_BYTES(sz);
-
- if (t & (1u << 4)) {
- /* Complex */
- SZ_READ_VINT(tmp); /* InStreams */
- ninstreams += tmp;
- SZ_READ_VINT(tmp); /* OutStreams */
- noutstreams += tmp;
- }
- else {
- /* XXX: is it correct ? */
- noutstreams++;
- ninstreams++;
- }
- if (t & (1u << 5)) {
- /* Attributes ... */
- SZ_READ_VINT(tmp); /* Size of attrs */
- SZ_SKIP_BYTES(tmp);
- }
- }
-
- if (noutstreams > 1) {
- /* BindPairs, WTF, huh */
- for (i = 0; i < noutstreams - 1; i++) {
- uint64_t tmp;
-
- SZ_READ_VINT(tmp);
- SZ_READ_VINT(tmp);
- }
- }
-
- int64_t npacked = (int64_t) ninstreams - (int64_t) noutstreams + 1;
- msg_debug_archive("7zip: instreams=%L, outstreams=%L, packed=%L",
- ninstreams, noutstreams, npacked);
-
- if (npacked > 1) {
- /* Gah... */
- for (i = 0; i < npacked; i++) {
- uint64_t tmp;
-
- SZ_READ_VINT(tmp);
- }
- }
-
- *pnstreams = noutstreams;
- (*ndigests) += npacked;
-
- return p;
- }
-
- static const unsigned char *
- rspamd_7zip_read_coders_info(struct rspamd_task *task,
- const unsigned char *p, const unsigned char *end,
- struct rspamd_archive *arch,
- unsigned int *pnum_folders, unsigned int *pnum_nodigest)
- {
- uint64_t num_folders = 0, i, tmp;
- unsigned char t;
- unsigned int *folder_nstreams = NULL, num_digests = 0, digests_read = 0;
-
- while (p != NULL && p < end) {
- /*
- * BYTE NID::kFolder (0x0B)
- * UINT64 NumFolders
- * BYTE External
- * switch(External)
- * {
- * case 0:
- * Folders[NumFolders]
- * case 1:
- * UINT64 DataStreamIndex
- * }
- * BYTE ID::kCodersUnPackSize (0x0C)
- * for(Folders)
- * for(Folder.NumOutStreams)
- * UINT64 UnPackSize;
- * []
- * BYTE NID::kCRC (0x0A)
- * UnPackDigests[NumFolders]
- * []
- * BYTE NID::kEnd
- */
-
- t = *p;
- SZ_SKIP_BYTES(1);
- msg_debug_archive("7zip: read coders info %xc", t);
-
- switch (t) {
- case kFolder:
- SZ_READ_VINT(num_folders);
- msg_debug_archive("7zip: nfolders=%L", num_folders);
-
- if (*p != 0) {
- /* External folders */
- SZ_SKIP_BYTES(1);
- SZ_READ_VINT(tmp);
- }
- else {
- SZ_SKIP_BYTES(1);
-
- if (num_folders > 8192) {
- /* Gah */
- return NULL;
- }
-
- if (folder_nstreams) {
- g_free(folder_nstreams);
- }
-
- folder_nstreams = g_malloc(sizeof(int) * num_folders);
-
- for (i = 0; i < num_folders && p != NULL && p < end; i++) {
- p = rspamd_7zip_read_folder(task, p, end, arch,
- &folder_nstreams[i], &num_digests);
- }
- }
- break;
- case kCodersUnPackSize:
- for (i = 0; i < num_folders && p != NULL && p < end; i++) {
- if (folder_nstreams) {
- for (unsigned int j = 0; j < folder_nstreams[i]; j++) {
- SZ_READ_VINT(tmp); /* Unpacked size */
- msg_debug_archive("7zip: unpacked size "
- "(folder=%d, stream=%d) = %L",
- (int) i, j, tmp);
- }
- }
- else {
- msg_err_task("internal 7zip error");
- }
- }
- break;
- case kCRC:
- /*
- * Here are dragons. Spec tells that here there could be up
- * to nfolders digests. However, according to the actual source
- * code, in case of multiple out streams there should be digests
- * for all out streams.
- *
- * In the real life (tm) it is even more idiotic: all these digests
- * are in another section! But that section needs number of digests
- * that are absent here. It is the most stupid thing I've ever seen
- * in any file format.
- *
- * I hope there *WAS* some reason to do such shit...
- */
- p = rspamd_7zip_read_digest(task, p, end, arch, num_digests,
- &digests_read);
- break;
- case kEnd:
- goto end;
- break;
- default:
- p = NULL;
- msg_debug_archive("bad 7zip type: %xc; %s", t, G_STRLOC);
- goto end;
- break;
- }
- }
-
- end:
-
- if (pnum_nodigest) {
- *pnum_nodigest = num_digests - digests_read;
- }
- if (pnum_folders) {
- *pnum_folders = num_folders;
- }
-
- if (folder_nstreams) {
- g_free(folder_nstreams);
- }
-
- return p;
- }
-
- static const unsigned char *
- rspamd_7zip_read_substreams_info(struct rspamd_task *task,
- const unsigned char *p, const unsigned char *end,
- struct rspamd_archive *arch,
- unsigned int num_folders, unsigned int num_nodigest)
- {
- unsigned char t;
- unsigned int i;
- uint64_t *folder_nstreams;
-
- if (num_folders > 8192) {
- /* Gah */
- return NULL;
- }
-
- folder_nstreams = g_alloca(sizeof(uint64_t) * num_folders);
- memset(folder_nstreams, 0, sizeof(uint64_t) * num_folders);
-
- while (p != NULL && p < end) {
- /*
- * []
- * BYTE NID::kNumUnPackStream; (0x0D)
- * UINT64 NumUnPackStreamsInFolders[NumFolders];
- * []
- *
- * []
- * BYTE NID::kSize (0x09)
- * UINT64 UnPackSizes[??]
- * []
- *
- *
- * []
- * BYTE NID::kCRC (0x0A)
- * Digests[Number of streams with unknown CRC]
- * []
-
- */
- t = *p;
- SZ_SKIP_BYTES(1);
-
- msg_debug_archive("7zip: read substream info %xc", t);
-
- switch (t) {
- case kNumUnPackStream:
- for (i = 0; i < num_folders; i++) {
- uint64_t tmp;
-
- SZ_READ_VINT(tmp);
- folder_nstreams[i] = tmp;
- }
- break;
- case kCRC:
- /*
- * Read the comment in the rspamd_7zip_read_coders_info
- */
- p = rspamd_7zip_read_digest(task, p, end, arch, num_nodigest,
- NULL);
- break;
- case kSize:
- /*
- * Another brain damaged logic, but we have to support it
- * as there are no ways to proceed without it.
- * In fact, it is just absent in the real life...
- */
- for (i = 0; i < num_folders; i++) {
- for (unsigned int j = 0; j < folder_nstreams[i]; j++) {
- uint64_t tmp;
-
- SZ_READ_VINT(tmp); /* Who cares indeed */
- }
- }
- break;
- case kEnd:
- goto end;
- break;
- default:
- p = NULL;
- msg_debug_archive("bad 7zip type: %xc; %s", t, G_STRLOC);
- goto end;
- break;
- }
- }
-
- end:
- return p;
- }
-
- static const unsigned char *
- rspamd_7zip_read_main_streams_info(struct rspamd_task *task,
- const unsigned char *p, const unsigned char *end,
- struct rspamd_archive *arch)
- {
- unsigned char t;
- unsigned int num_folders = 0, unknown_digests = 0;
-
- while (p != NULL && p < end) {
- t = *p;
- SZ_SKIP_BYTES(1);
- msg_debug_archive("7zip: read main streams info %xc", t);
-
- /*
- *
- * []
- * PackInfo
- * []
-
- * []
- * CodersInfo
- * []
- *
- * []
- * SubStreamsInfo
- * []
- *
- * BYTE NID::kEnd
- */
- switch (t) {
- case kPackInfo:
- p = rspamd_7zip_read_pack_info(task, p, end, arch);
- break;
- case kUnPackInfo:
- p = rspamd_7zip_read_coders_info(task, p, end, arch, &num_folders,
- &unknown_digests);
- break;
- case kSubStreamsInfo:
- p = rspamd_7zip_read_substreams_info(task, p, end, arch, num_folders,
- unknown_digests);
- break;
- break;
- case kEnd:
- goto end;
- break;
- default:
- p = NULL;
- msg_debug_archive("bad 7zip type: %xc; %s", t, G_STRLOC);
- goto end;
- break;
- }
- }
-
- end:
- return p;
- }
-
- static const unsigned char *
- rspamd_7zip_read_archive_props(struct rspamd_task *task,
- const unsigned char *p, const unsigned char *end,
- struct rspamd_archive *arch)
- {
- unsigned char proptype;
- uint64_t proplen;
-
- /*
- * for (;;)
- * {
- * BYTE PropertyType;
- * if (aType == 0)
- * break;
- * UINT64 PropertySize;
- * BYTE PropertyData[PropertySize];
- * }
- */
-
- if (p != NULL) {
- proptype = *p;
- SZ_SKIP_BYTES(1);
-
- while (proptype != 0) {
- SZ_READ_VINT(proplen);
-
- if (p + proplen < end) {
- p += proplen;
- }
- else {
- return NULL;
- }
-
- proptype = *p;
- SZ_SKIP_BYTES(1);
- }
- }
-
- return p;
- }
-
- static GString *
- rspamd_7zip_ucs2_to_utf8(struct rspamd_task *task, const unsigned char *p,
- const unsigned char *end)
- {
- GString *res;
- goffset dest_pos = 0, src_pos = 0;
- const gsize len = (end - p) / sizeof(uint16_t);
- uint16_t *up;
- UChar32 wc;
- UBool is_error = 0;
-
- res = g_string_sized_new((end - p) * 3 / 2 + sizeof(wc) + 1);
- up = (uint16_t *) p;
-
- while (src_pos < len) {
- U16_NEXT(up, src_pos, len, wc);
-
- if (wc > 0) {
- U8_APPEND(res->str, dest_pos,
- res->allocated_len - 1,
- wc, is_error);
- }
-
- if (is_error) {
- g_string_free(res, TRUE);
-
- return NULL;
- }
- }
-
- g_assert(dest_pos < res->allocated_len);
-
- res->len = dest_pos;
- res->str[dest_pos] = '\0';
-
- return res;
- }
-
- static const unsigned char *
- rspamd_7zip_read_files_info(struct rspamd_task *task,
- const unsigned char *p, const unsigned char *end,
- struct rspamd_archive *arch)
- {
- uint64_t nfiles = 0, sz, i;
- unsigned char t, b;
- struct rspamd_archive_file *fentry;
-
- SZ_READ_VINT(nfiles);
-
- for (; p != NULL && p < end;) {
- t = *p;
- SZ_SKIP_BYTES(1);
-
- msg_debug_archive("7zip: read file data type %xc", t);
-
- if (t == kEnd) {
- goto end;
- }
-
- /* This is SO SPECIAL, gah */
- SZ_READ_VINT(sz);
-
- switch (t) {
- case kEmptyStream:
- case kEmptyFile:
- case kAnti: /* AntiFile, OMFG */
- /* We don't care about these bits */
- case kCTime:
- case kATime:
- case kMTime:
- /* We don't care of these guys, but we still have to parse them, gah */
- if (sz > 0) {
- SZ_SKIP_BYTES(sz);
- }
- break;
- case kName:
- /* The most useful part in this whole bloody format */
- b = *p; /* External flag */
- SZ_SKIP_BYTES(1);
-
- if (b) {
- /* TODO: for the god sake, do something about external
- * filenames...
- */
- uint64_t tmp;
-
- SZ_READ_VINT(tmp);
- }
- else {
- for (i = 0; i < nfiles; i++) {
- /* Zero terminated wchar_t: happy converting... */
- /* First, find terminator */
- const unsigned char *fend = NULL, *tp = p;
- GString *res;
-
- while (tp < end - 1) {
- if (*tp == 0 && *(tp + 1) == 0) {
- fend = tp;
- break;
- }
-
- tp += 2;
- }
-
- if (fend == NULL || fend - p == 0) {
- /* Crap instead of fname */
- msg_debug_archive("bad 7zip name; %s", G_STRLOC);
- goto end;
- }
-
- res = rspamd_7zip_ucs2_to_utf8(task, p, fend);
-
- if (res != NULL) {
- fentry = g_malloc0(sizeof(*fentry));
- fentry->fname = res;
- g_ptr_array_add(arch->files, fentry);
- msg_debug_archive("7zip: found file %v", res);
- }
- else {
- msg_debug_archive("bad 7zip name; %s", G_STRLOC);
- }
- /* Skip zero terminating character */
- p = fend + 2;
- }
- }
- break;
- case kDummy:
- case kWinAttributes:
- if (sz > 0) {
- SZ_SKIP_BYTES(sz);
- }
- break;
- default:
- p = NULL;
- msg_debug_archive("bad 7zip type: %xc; %s", t, G_STRLOC);
- goto end;
- break;
- }
- }
-
- end:
- return p;
- }
-
- static const unsigned char *
- rspamd_7zip_read_next_section(struct rspamd_task *task,
- const unsigned char *p, const unsigned char *end,
- struct rspamd_archive *arch,
- struct rspamd_mime_part *part)
- {
- unsigned char t = *p;
-
- SZ_SKIP_BYTES(1);
-
- msg_debug_archive("7zip: read section %xc", t);
-
- switch (t) {
- case kHeader:
- /* We just skip byte and go further */
- break;
- case kEncodedHeader:
- /*
- * In fact, headers are just packed, but we assume it as
- * encrypted to distinguish from the normal archives
- */
- {
- msg_debug_archive("7zip: encoded header, needs to be uncompressed");
- struct archive *a = archive_read_new();
- archive_read_support_format_7zip(a);
- int r = archive_read_open_memory(a, part->parsed_data.begin, part->parsed_data.len);
- if (r != ARCHIVE_OK) {
- msg_debug_archive("7zip: cannot open memory archive: %s", archive_error_string(a));
- archive_read_free(a);
- return NULL;
- }
-
- /* Clean the existing files if any */
- rspamd_archive_dtor(arch);
- arch->files = g_ptr_array_new();
-
- struct archive_entry *ae;
-
- while (archive_read_next_header(a, &ae) == ARCHIVE_OK) {
- const char *name = archive_entry_pathname_utf8(ae);
- if (name) {
- msg_debug_archive("7zip: found file %s", name);
- struct rspamd_archive_file *f = g_malloc0(sizeof(*f));
- f->fname = g_string_new(name);
- g_ptr_array_add(arch->files, f);
- }
- archive_read_data_skip(a);
- }
-
- if (archive_read_has_encrypted_entries(a) > 0) {
- msg_debug_archive("7zip: found encrypted stuff");
- arch->flags |= RSPAMD_ARCHIVE_ENCRYPTED;
- }
-
- archive_read_free(a);
- p = NULL; /* Stop internal processor, as we rely on libarchive here */
- break;
- }
- case kArchiveProperties:
- p = rspamd_7zip_read_archive_props(task, p, end, arch);
- break;
- case kMainStreamsInfo:
- p = rspamd_7zip_read_main_streams_info(task, p, end, arch);
- break;
- case kAdditionalStreamsInfo:
- p = rspamd_7zip_read_main_streams_info(task, p, end, arch);
- break;
- case kFilesInfo:
- p = rspamd_7zip_read_files_info(task, p, end, arch);
- break;
- case kEnd:
- p = NULL;
- msg_debug_archive("7zip: read final section");
- break;
- default:
- p = NULL;
- msg_debug_archive("bad 7zip type: %xc; %s", t, G_STRLOC);
- break;
- }
-
- return p;
- }
-
- static void
- rspamd_archive_process_7zip(struct rspamd_task *task,
- struct rspamd_mime_part *part)
- {
- struct rspamd_archive *arch;
- const unsigned char *start, *p, *end;
- const unsigned char sz_magic[] = {'7', 'z', 0xBC, 0xAF, 0x27, 0x1C};
- uint64_t section_offset = 0, section_length = 0;
-
- start = part->parsed_data.begin;
- p = start;
- end = p + part->parsed_data.len;
-
- if (end - p <= sizeof(uint64_t) + sizeof(uint32_t) ||
- memcmp(p, sz_magic, sizeof(sz_magic)) != 0) {
- msg_debug_archive("7z archive is invalid (no 7z magic)");
-
- return;
- }
-
- arch = rspamd_mempool_alloc0(task->task_pool, sizeof(*arch));
- arch->files = g_ptr_array_new();
- arch->type = RSPAMD_ARCHIVE_7ZIP;
- rspamd_mempool_add_destructor(task->task_pool, rspamd_archive_dtor,
- arch);
-
- /* Magic (6 bytes) + version (2 bytes) + crc32 (4 bytes) */
- p += sizeof(uint64_t) + sizeof(uint32_t);
-
- SZ_READ_UINT64(section_offset);
- SZ_READ_UINT64(section_length);
-
- if (end - p > sizeof(uint32_t)) {
- p += sizeof(uint32_t);
- }
- else {
- msg_debug_archive("7z archive is invalid (truncated crc)");
-
- return;
- }
-
- if (end - p > section_offset) {
- p += section_offset;
- }
- else {
- msg_debug_archive("7z archive is invalid (incorrect section offset)");
-
- return;
- }
-
- while ((p = rspamd_7zip_read_next_section(task, p, end, arch, part)) != NULL)
- ;
-
- part->part_type = RSPAMD_MIME_PART_ARCHIVE;
- part->specific.arch = arch;
- if (part->cd != NULL) {
- arch->archive_name = &part->cd->filename;
- }
- arch->size = part->parsed_data.len;
- }
-
- static void
- rspamd_archive_process_gzip(struct rspamd_task *task,
- struct rspamd_mime_part *part)
- {
- struct rspamd_archive *arch;
- const unsigned char *start, *p, *end;
- const unsigned char gz_magic[] = {0x1F, 0x8B};
- unsigned char flags;
-
- start = part->parsed_data.begin;
- p = start;
- end = p + part->parsed_data.len;
-
- if (end - p <= 10 || memcmp(p, gz_magic, sizeof(gz_magic)) != 0) {
- msg_debug_archive("gzip archive is invalid (no gzip magic)");
-
- return;
- }
-
- arch = rspamd_mempool_alloc0(task->task_pool, sizeof(*arch));
- arch->files = g_ptr_array_sized_new(1);
- arch->type = RSPAMD_ARCHIVE_GZIP;
- if (part->cd) {
- arch->archive_name = &part->cd->filename;
- }
- rspamd_mempool_add_destructor(task->task_pool, rspamd_archive_dtor,
- arch);
-
- flags = p[3];
-
- if (flags & (1u << 5)) {
- arch->flags |= RSPAMD_ARCHIVE_ENCRYPTED;
- }
-
- if (flags & (1u << 3)) {
- /* We have file name presented in archive, try to use it */
- if (flags & (1u << 1)) {
- /* Multipart */
- p += 12;
- }
- else {
- p += 10;
- }
-
- if (flags & (1u << 2)) {
- /* Optional section */
- uint16_t optlen = 0;
-
- RAR_READ_UINT16(optlen);
-
- if (end <= p + optlen) {
- msg_debug_archive("gzip archive is invalid, bad extra length: %d",
- (int) optlen);
-
- return;
- }
-
- p += optlen;
- }
-
- /* Read file name */
- const unsigned char *fname_start = p;
-
- while (p < end) {
- if (*p == '\0') {
- if (p > fname_start) {
- struct rspamd_archive_file *f;
-
- f = g_malloc0(sizeof(*f));
-
- rspamd_archive_file_try_utf(task, arch, f,
- fname_start, p - fname_start);
-
- if (f->fname) {
- g_ptr_array_add(arch->files, f);
-
- if (f->flags & RSPAMD_ARCHIVE_FILE_OBFUSCATED) {
- arch->flags |= RSPAMD_ARCHIVE_HAS_OBFUSCATED_FILES;
- }
- }
- else {
- /* Invalid filename, skip */
- g_free(f);
- }
-
- goto set;
- }
- }
-
- p++;
- }
-
- /* Wrong filename, not zero terminated */
- msg_debug_archive("gzip archive is invalid, bad filename at pos %d",
- (int) (p - start));
-
- return;
- }
-
- /* Fallback, we need to extract file name from archive name if possible */
- if (part->cd && part->cd->filename.len > 0) {
- const char *dot_pos, *slash_pos;
-
- dot_pos = rspamd_memrchr(part->cd->filename.begin, '.',
- part->cd->filename.len);
-
- if (dot_pos) {
- struct rspamd_archive_file *f;
-
- slash_pos = rspamd_memrchr(part->cd->filename.begin, '/',
- part->cd->filename.len);
-
- if (slash_pos && slash_pos < dot_pos) {
- f = g_malloc0(sizeof(*f));
- f->fname = g_string_sized_new(dot_pos - slash_pos);
- g_string_append_len(f->fname, slash_pos + 1,
- dot_pos - slash_pos - 1);
-
- msg_debug_archive("fallback to gzip filename based on cd: %v",
- f->fname);
-
- g_ptr_array_add(arch->files, f);
-
- goto set;
- }
- else {
- const char *fname_start = part->cd->filename.begin;
-
- f = g_malloc0(sizeof(*f));
-
- if (memchr(fname_start, '.', part->cd->filename.len) != dot_pos) {
- /* Double dots, something like foo.exe.gz */
- f->fname = g_string_sized_new(dot_pos - fname_start);
- g_string_append_len(f->fname, fname_start,
- dot_pos - fname_start);
- }
- else {
- /* Single dot, something like foo.gzz */
- f->fname = g_string_sized_new(part->cd->filename.len);
- g_string_append_len(f->fname, fname_start,
- part->cd->filename.len);
- }
-
- msg_debug_archive("fallback to gzip filename based on cd: %v",
- f->fname);
-
- g_ptr_array_add(arch->files, f);
-
- goto set;
- }
- }
- }
-
- return;
-
- set:
- /* Set archive data */
- part->part_type = RSPAMD_MIME_PART_ARCHIVE;
- part->specific.arch = arch;
- arch->size = part->parsed_data.len;
- }
-
- static gboolean
- rspamd_archive_cheat_detect(struct rspamd_mime_part *part, const char *str,
- const unsigned char *magic_start, gsize magic_len)
- {
- struct rspamd_content_type *ct;
- const char *p;
- rspamd_ftok_t srch, *fname;
-
- ct = part->ct;
- RSPAMD_FTOK_ASSIGN(&srch, "application");
-
- if (ct && ct->type.len && ct->subtype.len > 0 && rspamd_ftok_cmp(&ct->type, &srch) == 0) {
- if (rspamd_substring_search_caseless(ct->subtype.begin, ct->subtype.len,
- str, strlen(str)) != -1) {
- /* We still need to check magic, see #1848 */
- if (magic_start != NULL) {
- if (part->parsed_data.len > magic_len &&
- memcmp(part->parsed_data.begin,
- magic_start, magic_len) == 0) {
- return TRUE;
- }
- /* No magic, refuse this type of archive */
- return FALSE;
- }
- else {
- return TRUE;
- }
- }
- }
-
- if (part->cd) {
- fname = &part->cd->filename;
-
- if (fname && fname->len > strlen(str)) {
- p = fname->begin + fname->len - strlen(str);
-
- if (rspamd_lc_cmp(p, str, strlen(str)) == 0) {
- if (*(p - 1) == '.') {
- if (magic_start != NULL) {
- if (part->parsed_data.len > magic_len &&
- memcmp(part->parsed_data.begin,
- magic_start, magic_len) == 0) {
- return TRUE;
- }
- /* No magic, refuse this type of archive */
- return FALSE;
- }
-
- return TRUE;
- }
- }
- }
-
- if (magic_start != NULL) {
- if (part->parsed_data.len > magic_len &&
- memcmp(part->parsed_data.begin, magic_start, magic_len) == 0) {
- return TRUE;
- }
- }
- }
- else {
- if (magic_start != NULL) {
- if (part->parsed_data.len > magic_len &&
- memcmp(part->parsed_data.begin, magic_start, magic_len) == 0) {
- return TRUE;
- }
- }
- }
-
- return FALSE;
- }
-
- void rspamd_archives_process(struct rspamd_task *task)
- {
- unsigned int i;
- struct rspamd_mime_part *part;
- const unsigned char rar_magic[] = {0x52, 0x61, 0x72, 0x21, 0x1A, 0x07};
- const unsigned char zip_magic[] = {0x50, 0x4b, 0x03, 0x04};
- const unsigned char sz_magic[] = {'7', 'z', 0xBC, 0xAF, 0x27, 0x1C};
- const unsigned char gz_magic[] = {0x1F, 0x8B, 0x08};
-
- PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, parts), i, part)
- {
- if (part->part_type == RSPAMD_MIME_PART_UNDEFINED) {
- if (part->parsed_data.len > 0) {
- if (rspamd_archive_cheat_detect(part, "zip",
- zip_magic, sizeof(zip_magic))) {
- rspamd_archive_process_zip(task, part);
- }
- else if (rspamd_archive_cheat_detect(part, "rar",
- rar_magic, sizeof(rar_magic))) {
- rspamd_archive_process_rar(task, part);
- }
- else if (rspamd_archive_cheat_detect(part, "7z",
- sz_magic, sizeof(sz_magic))) {
- rspamd_archive_process_7zip(task, part);
- }
- else if (rspamd_archive_cheat_detect(part, "gz",
- gz_magic, sizeof(gz_magic))) {
- rspamd_archive_process_gzip(task, part);
- }
-
- if (part->ct && (part->ct->flags & RSPAMD_CONTENT_TYPE_TEXT) &&
- part->part_type == RSPAMD_MIME_PART_ARCHIVE &&
- part->specific.arch) {
- struct rspamd_archive *arch = part->specific.arch;
-
- msg_info_task("found %s archive with incorrect content-type: %T/%T",
- rspamd_archive_type_str(arch->type),
- &part->ct->type, &part->ct->subtype);
-
- if (!(part->ct->flags & RSPAMD_CONTENT_TYPE_MISSING)) {
- part->ct->flags |= RSPAMD_CONTENT_TYPE_BROKEN;
- }
- }
- }
- }
- }
- }
-
-
- const char *
- rspamd_archive_type_str(enum rspamd_archive_type type)
- {
- const char *ret = "unknown";
-
- switch (type) {
- case RSPAMD_ARCHIVE_ZIP:
- ret = "zip";
- break;
- case RSPAMD_ARCHIVE_RAR:
- ret = "rar";
- break;
- case RSPAMD_ARCHIVE_7ZIP:
- ret = "7z";
- break;
- case RSPAMD_ARCHIVE_GZIP:
- ret = "gz";
- break;
- }
-
- return ret;
- }
|