12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057 |
- /*-
- * Copyright 2016 Vsevolod Stakhov
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- #include "config.h"
- #include "message.h"
- #include "task.h"
- #include "archives.h"
- #include "libmime/mime_encoding.h"
- #include <unicode/uchar.h>
- #include <unicode/utf8.h>
- #include <unicode/utf16.h>
- #include <unicode/ucnv.h>
-
- #define msg_debug_archive(...) rspamd_conditional_debug_fast(NULL, NULL, \
- rspamd_archive_log_id, "archive", task->task_pool->tag.uid, \
- G_STRFUNC, \
- __VA_ARGS__)
-
- INIT_LOG_MODULE(archive)
-
- static void
- rspamd_archive_dtor(gpointer p)
- {
- struct rspamd_archive *arch = p;
- struct rspamd_archive_file *f;
- guint i;
-
- for (i = 0; i < arch->files->len; i++) {
- f = g_ptr_array_index(arch->files, i);
-
- if (f->fname) {
- g_string_free(f->fname, TRUE);
- }
-
- g_free(f);
- }
-
- g_ptr_array_free(arch->files, TRUE);
- }
-
- static bool
- rspamd_archive_file_try_utf(struct rspamd_task *task,
- struct rspamd_archive *arch,
- struct rspamd_archive_file *fentry,
- const gchar *in, gsize inlen)
- {
- const gchar *charset = NULL, *p, *end;
- GString *res;
-
- charset = rspamd_mime_charset_find_by_content(in, inlen, TRUE);
-
- if (charset) {
- UChar *tmp;
- UErrorCode uc_err = U_ZERO_ERROR;
- gint32 r, clen, dlen;
- struct rspamd_charset_converter *conv;
- UConverter *utf8_converter;
-
- conv = rspamd_mime_get_converter_cached(charset, task->task_pool,
- TRUE, &uc_err);
- utf8_converter = rspamd_get_utf8_converter();
-
- if (conv == NULL) {
- msg_info_task("cannot open converter for %s: %s",
- charset, u_errorName(uc_err));
- fentry->flags |= RSPAMD_ARCHIVE_FILE_OBFUSCATED;
- fentry->fname = g_string_new_len(in, inlen);
-
- return false;
- }
-
- tmp = g_malloc(sizeof(*tmp) * (inlen + 1));
- r = rspamd_converter_to_uchars(conv, tmp, inlen + 1,
- in, inlen, &uc_err);
- if (!U_SUCCESS(uc_err)) {
- msg_info_task("cannot convert data to unicode from %s: %s",
- charset, u_errorName(uc_err));
- g_free(tmp);
-
- fentry->flags |= RSPAMD_ARCHIVE_FILE_OBFUSCATED;
- fentry->fname = g_string_new_len(in, inlen);
-
- return NULL;
- }
-
- int i = 0;
-
- while (i < r) {
- UChar32 uc;
-
- U16_NEXT(tmp, i, r, uc);
-
- if (IS_ZERO_WIDTH_SPACE(uc) || u_iscntrl(uc)) {
- msg_info_task("control character in archive file name found: 0x%02xd "
- "(filename=%T)",
- uc, arch->archive_name);
- fentry->flags |= RSPAMD_ARCHIVE_FILE_OBFUSCATED;
- break;
- }
- }
-
- clen = ucnv_getMaxCharSize(utf8_converter);
- dlen = UCNV_GET_MAX_BYTES_FOR_STRING(r, clen);
- res = g_string_sized_new(dlen);
- r = ucnv_fromUChars(utf8_converter, res->str, dlen, tmp, r, &uc_err);
-
- if (!U_SUCCESS(uc_err)) {
- msg_info_task("cannot convert data from unicode from %s: %s",
- charset, u_errorName(uc_err));
- g_free(tmp);
- g_string_free(res, TRUE);
- fentry->flags |= RSPAMD_ARCHIVE_FILE_OBFUSCATED;
- fentry->fname = g_string_new_len(in, inlen);
-
- return NULL;
- }
-
- g_free(tmp);
- res->len = r;
-
- msg_debug_archive("converted from %s to UTF-8 inlen: %z, outlen: %d",
- charset, inlen, r);
- fentry->fname = res;
- }
- else {
- /* Convert unsafe characters to '?' */
- res = g_string_sized_new(inlen);
- p = in;
- end = in + inlen;
-
- while (p < end) {
- if (g_ascii_isgraph(*p)) {
- g_string_append_c(res, *p);
- }
- else {
- g_string_append_c(res, '?');
-
- if (*p < 0x7f && (g_ascii_iscntrl(*p) || *p == '\0')) {
- if (!(fentry->flags & RSPAMD_ARCHIVE_FILE_OBFUSCATED)) {
- msg_info_task("suspicious character in archive file name found: 0x%02xd "
- "(filename=%T)",
- (int) *p, arch->archive_name);
- fentry->flags |= RSPAMD_ARCHIVE_FILE_OBFUSCATED;
- }
- }
- }
-
- p++;
- }
- fentry->fname = res;
- }
-
- return true;
- }
-
- static void
- rspamd_archive_process_zip(struct rspamd_task *task,
- struct rspamd_mime_part *part)
- {
- const guchar *p, *start, *end, *eocd = NULL, *cd;
- const guint32 eocd_magic = 0x06054b50, cd_basic_len = 46;
- const guchar cd_magic[] = {0x50, 0x4b, 0x01, 0x02};
- const guint max_processed = 1024;
- guint32 cd_offset, cd_size, comp_size, uncomp_size, processed = 0;
- guint16 extra_len, fname_len, comment_len;
- struct rspamd_archive *arch;
- struct rspamd_archive_file *f = NULL;
-
- /* Zip files have interesting data at the end of archive */
- p = part->parsed_data.begin + part->parsed_data.len - 1;
- start = part->parsed_data.begin;
- end = p;
-
- /* Search for EOCD:
- * 22 bytes is a typical size of eocd without a comment and
- * end points one byte after the last character
- */
- p -= 21;
-
- while (p > start + sizeof(guint32)) {
- guint32 t;
-
- if (processed > max_processed) {
- break;
- }
-
- /* XXX: not an efficient approach */
- memcpy(&t, p, sizeof(t));
-
- if (GUINT32_FROM_LE(t) == eocd_magic) {
- eocd = p;
- break;
- }
-
- p--;
- processed++;
- }
-
-
- if (eocd == NULL) {
- /* Not a zip file */
- msg_info_task("zip archive is invalid (no EOCD)");
-
- return;
- }
-
- if (end - eocd < 21) {
- msg_info_task("zip archive is invalid (short EOCD)");
-
- return;
- }
-
-
- memcpy(&cd_size, eocd + 12, sizeof(cd_size));
- cd_size = GUINT32_FROM_LE(cd_size);
- memcpy(&cd_offset, eocd + 16, sizeof(cd_offset));
- cd_offset = GUINT32_FROM_LE(cd_offset);
-
- /* We need to check sanity as well */
- if (cd_offset + cd_size > (guint) (eocd - start)) {
- msg_info_task("zip archive is invalid (bad size/offset for CD)");
-
- return;
- }
-
- cd = start + cd_offset;
-
- arch = rspamd_mempool_alloc0(task->task_pool, sizeof(*arch));
- arch->files = g_ptr_array_new();
- arch->type = RSPAMD_ARCHIVE_ZIP;
- if (part->cd) {
- arch->archive_name = &part->cd->filename;
- }
- rspamd_mempool_add_destructor(task->task_pool, rspamd_archive_dtor,
- arch);
-
- while (cd < start + cd_offset + cd_size) {
- guint16 flags;
-
- /* Read central directory record */
- if (eocd - cd < cd_basic_len ||
- memcmp(cd, cd_magic, sizeof(cd_magic)) != 0) {
- msg_info_task("zip archive is invalid (bad cd record)");
-
- return;
- }
-
- memcpy(&flags, cd + 8, sizeof(guint16));
- flags = GUINT16_FROM_LE(flags);
- memcpy(&comp_size, cd + 20, sizeof(guint32));
- comp_size = GUINT32_FROM_LE(comp_size);
- memcpy(&uncomp_size, cd + 24, sizeof(guint32));
- uncomp_size = GUINT32_FROM_LE(uncomp_size);
- memcpy(&fname_len, cd + 28, sizeof(fname_len));
- fname_len = GUINT16_FROM_LE(fname_len);
- memcpy(&extra_len, cd + 30, sizeof(extra_len));
- extra_len = GUINT16_FROM_LE(extra_len);
- memcpy(&comment_len, cd + 32, sizeof(comment_len));
- comment_len = GUINT16_FROM_LE(comment_len);
-
- if (cd + fname_len + comment_len + extra_len + cd_basic_len > eocd) {
- msg_info_task("zip archive is invalid (too large cd record)");
-
- return;
- }
-
- f = g_malloc0(sizeof(*f));
- rspamd_archive_file_try_utf(task, arch, f, cd + cd_basic_len, fname_len);
-
- f->compressed_size = comp_size;
- f->uncompressed_size = uncomp_size;
-
- if (flags & 0x41u) {
- f->flags |= RSPAMD_ARCHIVE_FILE_ENCRYPTED;
- }
-
- if (f->fname) {
- if (f->flags & RSPAMD_ARCHIVE_FILE_OBFUSCATED) {
- arch->flags |= RSPAMD_ARCHIVE_HAS_OBFUSCATED_FILES;
- }
-
- g_ptr_array_add(arch->files, f);
- msg_debug_archive("found file in zip archive: %v", f->fname);
- }
- else {
- g_free(f);
-
- return;
- }
-
- /* Process extra fields */
- const guchar *extra = cd + fname_len + cd_basic_len;
- p = extra;
-
- while (p + sizeof(guint16) * 2 < extra + extra_len) {
- guint16 hid, hlen;
-
- memcpy(&hid, p, sizeof(guint16));
- hid = GUINT16_FROM_LE(hid);
- memcpy(&hlen, p + sizeof(guint16), sizeof(guint16));
- hlen = GUINT16_FROM_LE(hlen);
-
- if (hid == 0x0017) {
- f->flags |= RSPAMD_ARCHIVE_FILE_ENCRYPTED;
- }
-
- p += hlen + sizeof(guint16) * 2;
- }
-
- cd += fname_len + comment_len + extra_len + cd_basic_len;
- }
-
- part->part_type = RSPAMD_MIME_PART_ARCHIVE;
- part->specific.arch = arch;
-
- arch->size = part->parsed_data.len;
- }
-
- static inline gint
- rspamd_archive_rar_read_vint(const guchar *start, gsize remain, guint64 *res)
- {
- /*
- * From http://www.rarlab.com/technote.htm:
- * Variable length integer. Can include one or more bytes, where
- * lower 7 bits of every byte contain integer data and highest bit
- * in every byte is the continuation flag.
- * If highest bit is 0, this is the last byte in sequence.
- * So first byte contains 7 least significant bits of integer and
- * continuation flag. Second byte, if present, contains next 7 bits and so on.
- */
- guint64 t = 0;
- guint shift = 0;
- const guchar *p = start;
-
- while (remain > 0 && shift <= 57) {
- if (*p & 0x80) {
- t |= ((guint64) (*p & 0x7f)) << shift;
- }
- else {
- t |= ((guint64) (*p & 0x7f)) << shift;
- p++;
- break;
- }
-
- shift += 7;
- p++;
- remain--;
- }
-
- if (remain == 0 || shift > 64) {
- return -1;
- }
-
- *res = GUINT64_FROM_LE(t);
-
- return p - start;
- }
-
- #define RAR_SKIP_BYTES(n) \
- do { \
- if ((n) <= 0) { \
- msg_debug_archive("rar archive is invalid (bad skip value)"); \
- return; \
- } \
- if ((gsize) (end - p) < (n)) { \
- msg_debug_archive("rar archive is invalid (truncated)"); \
- return; \
- } \
- p += (n); \
- } while (0)
-
- #define RAR_READ_VINT() \
- do { \
- r = rspamd_archive_rar_read_vint(p, end - p, &vint); \
- if (r == -1) { \
- msg_debug_archive("rar archive is invalid (bad vint)"); \
- return; \
- } \
- else if (r == 0) { \
- msg_debug_archive("rar archive is invalid (BAD vint offset)"); \
- return; \
- } \
- } while (0)
-
- #define RAR_READ_VINT_SKIP() \
- do { \
- r = rspamd_archive_rar_read_vint(p, end - p, &vint); \
- if (r == -1) { \
- msg_debug_archive("rar archive is invalid (bad vint)"); \
- return; \
- } \
- p += r; \
- } while (0)
-
- #define RAR_READ_UINT16(n) \
- do { \
- if (end - p < (glong) sizeof(guint16)) { \
- msg_debug_archive("rar archive is invalid (bad int16)"); \
- return; \
- } \
- n = p[0] + (p[1] << 8); \
- p += sizeof(guint16); \
- } while (0)
-
- #define RAR_READ_UINT32(n) \
- do { \
- if (end - p < (glong) sizeof(guint32)) { \
- msg_debug_archive("rar archive is invalid (bad int32)"); \
- return; \
- } \
- n = (guint) p[0] + ((guint) p[1] << 8) + ((guint) p[2] << 16) + ((guint) p[3] << 24); \
- p += sizeof(guint32); \
- } while (0)
-
- static void
- rspamd_archive_process_rar_v4(struct rspamd_task *task, const guchar *start,
- const guchar *end, struct rspamd_mime_part *part)
- {
- const guchar *p = start, *start_section;
- guint8 type;
- guint flags;
- guint64 sz, comp_sz = 0, uncomp_sz = 0;
- struct rspamd_archive *arch;
- struct rspamd_archive_file *f;
-
- arch = rspamd_mempool_alloc0(task->task_pool, sizeof(*arch));
- arch->files = g_ptr_array_new();
- arch->type = RSPAMD_ARCHIVE_RAR;
- if (part->cd) {
- arch->archive_name = &part->cd->filename;
- }
- rspamd_mempool_add_destructor(task->task_pool, rspamd_archive_dtor,
- arch);
-
- while (p < end) {
- /* Crc16 */
- start_section = p;
- RAR_SKIP_BYTES(sizeof(guint16));
- type = *p;
- p++;
- RAR_READ_UINT16(flags);
-
- if (type == 0x73) {
- /* Main header, check for encryption */
- if (flags & 0x80) {
- arch->flags |= RSPAMD_ARCHIVE_ENCRYPTED;
- goto end;
- }
- }
-
- RAR_READ_UINT16(sz);
-
- if (flags & 0x8000) {
- /* We also need to read ADD_SIZE element */
- guint32 tmp;
-
- RAR_READ_UINT32(tmp);
- sz += tmp;
- /* This is also used as PACK_SIZE */
- comp_sz = tmp;
- }
-
- if (sz == 0) {
- /* Zero sized block - error */
- msg_debug_archive("rar archive is invalid (zero size block)");
-
- return;
- }
-
- if (type == 0x74) {
- guint fname_len;
-
- /* File header */
- /* Uncompressed size */
- RAR_READ_UINT32(uncomp_sz);
- /* Skip to NAME_SIZE element */
- RAR_SKIP_BYTES(11);
- RAR_READ_UINT16(fname_len);
-
- if (fname_len == 0 || fname_len > (gsize) (end - p)) {
- msg_debug_archive("rar archive is invalid (bad filename size: %d)",
- fname_len);
-
- return;
- }
-
- /* Attrs */
- RAR_SKIP_BYTES(4);
-
- if (flags & 0x100) {
- /* We also need to read HIGH_PACK_SIZE */
- guint32 tmp;
-
- RAR_READ_UINT32(tmp);
- sz += tmp;
- comp_sz += tmp;
- /* HIGH_UNP_SIZE */
- RAR_READ_UINT32(tmp);
- uncomp_sz += tmp;
- }
-
- f = g_malloc0(sizeof(*f));
-
- if (flags & 0x200) {
- /* We have unicode + normal version */
- guchar *tmp;
-
- tmp = memchr(p, '\0', fname_len);
-
- if (tmp != NULL) {
- /* Just use ASCII version */
- rspamd_archive_file_try_utf(task, arch, f, p, tmp - p);
- msg_debug_archive("found ascii filename in rarv4 archive: %v",
- f->fname);
- }
- else {
- /* We have UTF8 filename, use it as is */
- rspamd_archive_file_try_utf(task, arch, f, p, fname_len);
- msg_debug_archive("found utf filename in rarv4 archive: %v",
- f->fname);
- }
- }
- else {
- rspamd_archive_file_try_utf(task, arch, f, p, fname_len);
- msg_debug_archive("found ascii (old) filename in rarv4 archive: %v",
- f->fname);
- }
-
- f->compressed_size = comp_sz;
- f->uncompressed_size = uncomp_sz;
-
- if (flags & 0x4) {
- f->flags |= RSPAMD_ARCHIVE_FILE_ENCRYPTED;
- }
-
- if (f->fname) {
- if (f->flags & RSPAMD_ARCHIVE_FILE_OBFUSCATED) {
- arch->flags |= RSPAMD_ARCHIVE_HAS_OBFUSCATED_FILES;
- }
- g_ptr_array_add(arch->files, f);
- }
- else {
- g_free(f);
- }
- }
-
- p = start_section;
- RAR_SKIP_BYTES(sz);
- }
-
- end:
- part->part_type = RSPAMD_MIME_PART_ARCHIVE;
- part->specific.arch = arch;
- arch->size = part->parsed_data.len;
- }
-
- static void
- rspamd_archive_process_rar(struct rspamd_task *task,
- struct rspamd_mime_part *part)
- {
- const guchar *p, *end, *section_start;
- const guchar rar_v5_magic[] = {0x52, 0x61, 0x72, 0x21, 0x1A, 0x07, 0x01, 0x00},
- rar_v4_magic[] = {0x52, 0x61, 0x72, 0x21, 0x1A, 0x07, 0x00};
- const guint rar_encrypted_header = 4, rar_main_header = 1,
- rar_file_header = 2;
- guint64 vint, sz, comp_sz = 0, uncomp_sz = 0, flags = 0, type = 0,
- extra_sz = 0;
- struct rspamd_archive *arch;
- struct rspamd_archive_file *f;
- gint r;
-
- p = part->parsed_data.begin;
- end = p + part->parsed_data.len;
-
- if ((gsize) (end - p) <= sizeof(rar_v5_magic)) {
- msg_debug_archive("rar archive is invalid (too small)");
-
- return;
- }
-
- if (memcmp(p, rar_v5_magic, sizeof(rar_v5_magic)) == 0) {
- p += sizeof(rar_v5_magic);
- }
- else if (memcmp(p, rar_v4_magic, sizeof(rar_v4_magic)) == 0) {
- p += sizeof(rar_v4_magic);
-
- rspamd_archive_process_rar_v4(task, p, end, part);
- return;
- }
- else {
- msg_debug_archive("rar archive is invalid (no rar magic)");
-
- return;
- }
-
- /* Rar v5 format */
- arch = rspamd_mempool_alloc0(task->task_pool, sizeof(*arch));
- arch->files = g_ptr_array_new();
- arch->type = RSPAMD_ARCHIVE_RAR;
- if (part->cd) {
- arch->archive_name = &part->cd->filename;
- }
- rspamd_mempool_add_destructor(task->task_pool, rspamd_archive_dtor,
- arch);
-
- /* Now we can have either encryption header or archive header */
- /* Crc 32 */
- RAR_SKIP_BYTES(sizeof(guint32));
- /* Size */
- RAR_READ_VINT_SKIP();
- sz = vint;
- /* Type */
- section_start = p;
- RAR_READ_VINT_SKIP();
- type = vint;
- /* Header flags */
- RAR_READ_VINT_SKIP();
- flags = vint;
-
- if (flags & 0x1) {
- /* Have extra zone */
- RAR_READ_VINT_SKIP();
- }
- if (flags & 0x2) {
- /* Data zone is presented */
- RAR_READ_VINT_SKIP();
- sz += vint;
- }
-
- if (type == rar_encrypted_header) {
- /* We can't read any further information as archive is encrypted */
- arch->flags |= RSPAMD_ARCHIVE_ENCRYPTED;
- goto end;
- }
- else if (type != rar_main_header) {
- msg_debug_archive("rar archive is invalid (bad main header)");
-
- return;
- }
-
- /* Nothing useful in main header */
- p = section_start;
- RAR_SKIP_BYTES(sz);
-
- while (p < end) {
- gboolean has_extra = FALSE;
- /* Read the next header */
- /* Crc 32 */
- RAR_SKIP_BYTES(sizeof(guint32));
- /* Size */
- RAR_READ_VINT_SKIP();
-
- sz = vint;
- if (sz == 0) {
- /* Zero sized block - error */
- msg_debug_archive("rar archive is invalid (zero size block)");
-
- return;
- }
-
- section_start = p;
- /* Type */
- RAR_READ_VINT_SKIP();
- type = vint;
- /* Header flags */
- RAR_READ_VINT_SKIP();
- flags = vint;
-
- if (flags & 0x1) {
- /* Have extra zone */
- RAR_READ_VINT_SKIP();
- extra_sz = vint;
- has_extra = TRUE;
- }
-
- if (flags & 0x2) {
- /* Data zone is presented */
- RAR_READ_VINT_SKIP();
- sz += vint;
- comp_sz = vint;
- }
-
- if (type != rar_file_header) {
- p = section_start;
- RAR_SKIP_BYTES(sz);
- }
- else {
- /* We have a file header, go forward */
- guint64 fname_len;
- bool is_directory = false;
-
- /* File header specific flags */
- RAR_READ_VINT_SKIP();
- flags = vint;
-
- /* Unpacked size */
- RAR_READ_VINT_SKIP();
- uncomp_sz = vint;
- /* Attributes */
- RAR_READ_VINT_SKIP();
-
- if (flags & 0x2) {
- /* Unix mtime */
- RAR_SKIP_BYTES(sizeof(guint32));
- }
- if (flags & 0x4) {
- /* Crc32 */
- RAR_SKIP_BYTES(sizeof(guint32));
- }
- if (flags & 0x1) {
- /* Ignore directories for sanity purposes */
- is_directory = true;
- msg_debug_archive("skip directory record in a rar archive");
- }
-
- if (!is_directory) {
- /* Compression */
- RAR_READ_VINT_SKIP();
- /* Host OS */
- RAR_READ_VINT_SKIP();
- /* Filename length (finally!) */
- RAR_READ_VINT_SKIP();
- fname_len = vint;
-
- if (fname_len == 0 || fname_len > (gsize) (end - p)) {
- msg_debug_archive("rar archive is invalid (bad filename size)");
-
- return;
- }
-
- f = g_malloc0(sizeof(*f));
- f->uncompressed_size = uncomp_sz;
- f->compressed_size = comp_sz;
- rspamd_archive_file_try_utf(task, arch, f, p, fname_len);
-
- if (f->fname) {
- msg_debug_archive("added rarv5 file: %v", f->fname);
- g_ptr_array_add(arch->files, f);
- if (f->flags & RSPAMD_ARCHIVE_FILE_OBFUSCATED) {
- arch->flags |= RSPAMD_ARCHIVE_HAS_OBFUSCATED_FILES;
- }
- }
- else {
- g_free(f);
- f = NULL;
- }
-
- if (f && has_extra && extra_sz > 0 &&
- p + fname_len + extra_sz < end) {
- /* Try to find encryption record in extra field */
- const guchar *ex = p + fname_len;
-
- while (ex < p + extra_sz) {
- const guchar *t;
- gint64 cur_sz = 0, sec_type = 0;
-
- r = rspamd_archive_rar_read_vint(ex, extra_sz, &cur_sz);
- if (r == -1) {
- msg_debug_archive("rar archive is invalid (bad vint)");
- return;
- }
-
- t = ex + r;
-
- r = rspamd_archive_rar_read_vint(t, extra_sz - r, &sec_type);
- if (r == -1) {
- msg_debug_archive("rar archive is invalid (bad vint)");
- return;
- }
-
- if (sec_type == 0x01) {
- f->flags |= RSPAMD_ARCHIVE_FILE_ENCRYPTED;
- arch->flags |= RSPAMD_ARCHIVE_ENCRYPTED;
- break;
- }
-
- ex += cur_sz;
- }
- }
- }
-
- /* Restore p to the beginning of the header */
- p = section_start;
- RAR_SKIP_BYTES(sz);
- }
- }
-
- end:
- part->part_type = RSPAMD_MIME_PART_ARCHIVE;
- part->specific.arch = arch;
- arch->size = part->parsed_data.len;
- }
-
- static inline gint
- rspamd_archive_7zip_read_vint(const guchar *start, gsize remain, guint64 *res)
- {
- /*
- * REAL_UINT64 means real UINT64.
- * UINT64 means real UINT64 encoded with the following scheme:
- *
- * Size of encoding sequence depends from first byte:
- * First_Byte Extra_Bytes Value
- * (binary)
- * 0xxxxxxx : ( xxxxxxx )
- * 10xxxxxx BYTE y[1] : ( xxxxxx << (8 * 1)) + y
- * 110xxxxx BYTE y[2] : ( xxxxx << (8 * 2)) + y
- * ...
- * 1111110x BYTE y[6] : ( x << (8 * 6)) + y
- * 11111110 BYTE y[7] : y
- * 11111111 BYTE y[8] : y
- */
- guchar t;
-
- if (remain == 0) {
- return -1;
- }
-
- t = *start;
-
- if (!isset(&t, 7)) {
- /* Trivial case */
- *res = t;
- return 1;
- }
- else if (t == 0xFF) {
- if (remain >= sizeof(guint64) + 1) {
- memcpy(res, start + 1, sizeof(guint64));
- *res = GUINT64_FROM_LE(*res);
-
- return sizeof(guint64) + 1;
- }
- }
- else {
- gint cur_bit = 6, intlen = 1;
- const guchar bmask = 0xFF;
- guint64 tgt;
-
- while (cur_bit > 0) {
- if (!isset(&t, cur_bit)) {
- if (remain >= intlen + 1) {
- memcpy(&tgt, start + 1, intlen);
- tgt = GUINT64_FROM_LE(tgt);
- /* Shift back */
- tgt >>= sizeof(tgt) - NBBY * intlen;
- /* Add masked value */
- tgt += (guint64) (t & (bmask >> (NBBY - cur_bit)))
- << (NBBY * intlen);
- *res = tgt;
-
- return intlen + 1;
- }
- }
- cur_bit--;
- intlen++;
- }
- }
-
- return -1;
- }
-
- #define SZ_READ_VINT_SKIP() \
- do { \
- r = rspamd_archive_7zip_read_vint(p, end - p, &vint); \
- if (r == -1) { \
- msg_debug_archive("7z archive is invalid (bad vint)"); \
- return; \
- } \
- p += r; \
- } while (0)
- #define SZ_READ_VINT(var) \
- do { \
- int r; \
- r = rspamd_archive_7zip_read_vint(p, end - p, &(var)); \
- if (r == -1) { \
- msg_debug_archive("7z archive is invalid (bad vint): %s", G_STRLOC); \
- return NULL; \
- } \
- p += r; \
- } while (0)
-
- #define SZ_READ_UINT64(n) \
- do { \
- if (end - p < (goffset) sizeof(guint64)) { \
- msg_debug_archive("7zip archive is invalid (bad uint64): %s", G_STRLOC); \
- return; \
- } \
- memcpy(&(n), p, sizeof(guint64)); \
- n = GUINT64_FROM_LE(n); \
- p += sizeof(guint64); \
- } while (0)
- #define SZ_SKIP_BYTES(n) \
- do { \
- if (end - p >= (n)) { \
- p += (n); \
- } \
- else { \
- msg_debug_archive("7zip archive is invalid (truncated); wanted to read %d bytes, %d avail: %s", (gint) (n), (gint) (end - p), G_STRLOC); \
- return NULL; \
- } \
- } while (0)
-
- enum rspamd_7zip_header_mark {
- kEnd = 0x00,
- kHeader = 0x01,
- kArchiveProperties = 0x02,
- kAdditionalStreamsInfo = 0x03,
- kMainStreamsInfo = 0x04,
- kFilesInfo = 0x05,
- kPackInfo = 0x06,
- kUnPackInfo = 0x07,
- kSubStreamsInfo = 0x08,
- kSize = 0x09,
- kCRC = 0x0A,
- kFolder = 0x0B,
- kCodersUnPackSize = 0x0C,
- kNumUnPackStream = 0x0D,
- kEmptyStream = 0x0E,
- kEmptyFile = 0x0F,
- kAnti = 0x10,
- kName = 0x11,
- kCTime = 0x12,
- kATime = 0x13,
- kMTime = 0x14,
- kWinAttributes = 0x15,
- kComment = 0x16,
- kEncodedHeader = 0x17,
- kStartPos = 0x18,
- kDummy = 0x19,
- };
-
-
- #define _7Z_CRYPTO_MAIN_ZIP 0x06F10101 /* Main Zip crypto algo */
- #define _7Z_CRYPTO_RAR_29 0x06F10303 /* Rar29 AES-128 + (modified SHA-1) */
- #define _7Z_CRYPTO_AES_256_SHA_256 0x06F10701 /* AES-256 + SHA-256 */
-
- #define IS_SZ_ENCRYPTED(codec_id) (((codec_id) == _7Z_CRYPTO_MAIN_ZIP) || \
- ((codec_id) == _7Z_CRYPTO_RAR_29) || \
- ((codec_id) == _7Z_CRYPTO_AES_256_SHA_256))
-
- static const guchar *
- rspamd_7zip_read_bits(struct rspamd_task *task,
- const guchar *p, const guchar *end,
- struct rspamd_archive *arch, guint nbits,
- guint *pbits_set)
- {
- unsigned mask = 0, avail = 0, i;
- gboolean bit_set = 0;
-
- for (i = 0; i < nbits; i++) {
- if (mask == 0) {
- avail = *p;
- SZ_SKIP_BYTES(1);
- mask = 0x80;
- }
-
- bit_set = (avail & mask) ? 1 : 0;
-
- if (bit_set && pbits_set) {
- (*pbits_set)++;
- }
-
- mask >>= 1;
- }
-
- return p;
- }
-
- static const guchar *
- rspamd_7zip_read_digest(struct rspamd_task *task,
- const guchar *p, const guchar *end,
- struct rspamd_archive *arch,
- guint64 num_streams,
- guint *pdigest_read)
- {
- guchar all_defined = *p;
- guint64 i;
- guint num_defined = 0;
- /*
- * BYTE AllAreDefined
- * if (AllAreDefined == 0)
- * {
- * for(NumStreams)
- * BIT Defined
- * }
- * UINT32 CRCs[NumDefined]
- */
- SZ_SKIP_BYTES(1);
-
- if (all_defined) {
- num_defined = num_streams;
- }
- else {
- if (num_streams > 8192) {
- /* Gah */
- return NULL;
- }
-
- p = rspamd_7zip_read_bits(task, p, end, arch, num_streams, &num_defined);
-
- if (p == NULL) {
- return NULL;
- }
- }
-
- for (i = 0; i < num_defined; i++) {
- SZ_SKIP_BYTES(sizeof(guint32));
- }
-
- if (pdigest_read) {
- *pdigest_read = num_defined;
- }
-
- return p;
- }
-
- static const guchar *
- rspamd_7zip_read_pack_info(struct rspamd_task *task,
- const guchar *p, const guchar *end,
- struct rspamd_archive *arch)
- {
- guint64 pack_pos = 0, pack_streams = 0, i, cur_sz;
- guint num_digests = 0;
- guchar t;
- /*
- * UINT64 PackPos
- * UINT64 NumPackStreams
- *
- * []
- * BYTE NID::kSize (0x09)
- * UINT64 PackSizes[NumPackStreams]
- * []
- *
- * []
- * BYTE NID::kCRC (0x0A)
- * PackStreamDigests[NumPackStreams]
- * []
- * BYTE NID::kEnd
- */
-
- SZ_READ_VINT(pack_pos);
- SZ_READ_VINT(pack_streams);
-
- while (p != NULL && p < end) {
- t = *p;
- SZ_SKIP_BYTES(1);
- msg_debug_archive("7zip: read pack info %xc", t);
-
- switch (t) {
- case kSize:
- /* We need to skip pack_streams VINTS */
- for (i = 0; i < pack_streams; i++) {
- SZ_READ_VINT(cur_sz);
- }
- break;
- case kCRC:
- /* CRCs are more complicated */
- p = rspamd_7zip_read_digest(task, p, end, arch, pack_streams,
- &num_digests);
- break;
- case kEnd:
- goto end;
- break;
- default:
- p = NULL;
- msg_debug_archive("bad 7zip type: %xc; %s", t, G_STRLOC);
- goto end;
- break;
- }
- }
-
- end:
-
- return p;
- }
-
- static const guchar *
- rspamd_7zip_read_folder(struct rspamd_task *task,
- const guchar *p, const guchar *end,
- struct rspamd_archive *arch, guint *pnstreams, guint *ndigests)
- {
- guint64 ncoders = 0, i, j, noutstreams = 0, ninstreams = 0;
-
- SZ_READ_VINT(ncoders);
-
- for (i = 0; i < ncoders && p != NULL && p < end; i++) {
- guint64 sz, tmp;
- guchar t;
- /*
- * BYTE
- * {
- * 0:3 CodecIdSize
- * 4: Is Complex Coder
- * 5: There Are Attributes
- * 6: Reserved
- * 7: There are more alternative methods. (Not used anymore, must be 0).
- * }
- * BYTE CodecId[CodecIdSize]
- * if (Is Complex Coder)
- * {
- * UINT64 NumInStreams;
- * UINT64 NumOutStreams;
- * }
- * if (There Are Attributes)
- * {
- * UINT64 PropertiesSize
- * BYTE Properties[PropertiesSize]
- * }
- */
- t = *p;
- SZ_SKIP_BYTES(1);
- sz = t & 0xF;
- /* Codec ID */
- tmp = 0;
- for (j = 0; j < sz; j++) {
- tmp <<= 8;
- tmp += p[j];
- }
-
- msg_debug_archive("7zip: read codec id: %L", tmp);
-
- if (IS_SZ_ENCRYPTED(tmp)) {
- arch->flags |= RSPAMD_ARCHIVE_ENCRYPTED;
- }
-
- SZ_SKIP_BYTES(sz);
-
- if (t & (1u << 4)) {
- /* Complex */
- SZ_READ_VINT(tmp); /* InStreams */
- ninstreams += tmp;
- SZ_READ_VINT(tmp); /* OutStreams */
- noutstreams += tmp;
- }
- else {
- /* XXX: is it correct ? */
- noutstreams++;
- ninstreams++;
- }
- if (t & (1u << 5)) {
- /* Attributes ... */
- SZ_READ_VINT(tmp); /* Size of attrs */
- SZ_SKIP_BYTES(tmp);
- }
- }
-
- if (noutstreams > 1) {
- /* BindPairs, WTF, huh */
- for (i = 0; i < noutstreams - 1; i++) {
- guint64 tmp;
-
- SZ_READ_VINT(tmp);
- SZ_READ_VINT(tmp);
- }
- }
-
- gint64 npacked = (gint64) ninstreams - (gint64) noutstreams + 1;
- msg_debug_archive("7zip: instreams=%L, outstreams=%L, packed=%L",
- ninstreams, noutstreams, npacked);
-
- if (npacked > 1) {
- /* Gah... */
- for (i = 0; i < npacked; i++) {
- guint64 tmp;
-
- SZ_READ_VINT(tmp);
- }
- }
-
- *pnstreams = noutstreams;
- (*ndigests) += npacked;
-
- return p;
- }
-
- static const guchar *
- rspamd_7zip_read_coders_info(struct rspamd_task *task,
- const guchar *p, const guchar *end,
- struct rspamd_archive *arch,
- guint *pnum_folders, guint *pnum_nodigest)
- {
- guint64 num_folders = 0, i, tmp;
- guchar t;
- guint *folder_nstreams = NULL, num_digests = 0, digests_read = 0;
-
- while (p != NULL && p < end) {
- /*
- * BYTE NID::kFolder (0x0B)
- * UINT64 NumFolders
- * BYTE External
- * switch(External)
- * {
- * case 0:
- * Folders[NumFolders]
- * case 1:
- * UINT64 DataStreamIndex
- * }
- * BYTE ID::kCodersUnPackSize (0x0C)
- * for(Folders)
- * for(Folder.NumOutStreams)
- * UINT64 UnPackSize;
- * []
- * BYTE NID::kCRC (0x0A)
- * UnPackDigests[NumFolders]
- * []
- * BYTE NID::kEnd
- */
-
- t = *p;
- SZ_SKIP_BYTES(1);
- msg_debug_archive("7zip: read coders info %xc", t);
-
- switch (t) {
- case kFolder:
- SZ_READ_VINT(num_folders);
- msg_debug_archive("7zip: nfolders=%L", num_folders);
-
- if (*p != 0) {
- /* External folders */
- SZ_SKIP_BYTES(1);
- SZ_READ_VINT(tmp);
- }
- else {
- SZ_SKIP_BYTES(1);
-
- if (num_folders > 8192) {
- /* Gah */
- return NULL;
- }
-
- if (folder_nstreams) {
- g_free(folder_nstreams);
- }
-
- folder_nstreams = g_malloc(sizeof(int) * num_folders);
-
- for (i = 0; i < num_folders && p != NULL && p < end; i++) {
- p = rspamd_7zip_read_folder(task, p, end, arch,
- &folder_nstreams[i], &num_digests);
- }
- }
- break;
- case kCodersUnPackSize:
- for (i = 0; i < num_folders && p != NULL && p < end; i++) {
- if (folder_nstreams) {
- for (guint j = 0; j < folder_nstreams[i]; j++) {
- SZ_READ_VINT(tmp); /* Unpacked size */
- msg_debug_archive("7zip: unpacked size "
- "(folder=%d, stream=%d) = %L",
- (gint) i, j, tmp);
- }
- }
- else {
- msg_err_task("internal 7zip error");
- }
- }
- break;
- case kCRC:
- /*
- * Here are dragons. Spec tells that here there could be up
- * to nfolders digests. However, according to the actual source
- * code, in case of multiple out streams there should be digests
- * for all out streams.
- *
- * In the real life (tm) it is even more idiotic: all these digests
- * are in another section! But that section needs number of digests
- * that are absent here. It is the most stupid thing I've ever seen
- * in any file format.
- *
- * I hope there *WAS* some reason to do such shit...
- */
- p = rspamd_7zip_read_digest(task, p, end, arch, num_digests,
- &digests_read);
- break;
- case kEnd:
- goto end;
- break;
- default:
- p = NULL;
- msg_debug_archive("bad 7zip type: %xc; %s", t, G_STRLOC);
- goto end;
- break;
- }
- }
-
- end:
-
- if (pnum_nodigest) {
- *pnum_nodigest = num_digests - digests_read;
- }
- if (pnum_folders) {
- *pnum_folders = num_folders;
- }
-
- if (folder_nstreams) {
- g_free(folder_nstreams);
- }
-
- return p;
- }
-
- static const guchar *
- rspamd_7zip_read_substreams_info(struct rspamd_task *task,
- const guchar *p, const guchar *end,
- struct rspamd_archive *arch,
- guint num_folders, guint num_nodigest)
- {
- guchar t;
- guint i;
- guint64 *folder_nstreams;
-
- if (num_folders > 8192) {
- /* Gah */
- return NULL;
- }
-
- folder_nstreams = g_alloca(sizeof(guint64) * num_folders);
- memset(folder_nstreams, 0, sizeof(guint64) * num_folders);
-
- while (p != NULL && p < end) {
- /*
- * []
- * BYTE NID::kNumUnPackStream; (0x0D)
- * UINT64 NumUnPackStreamsInFolders[NumFolders];
- * []
- *
- * []
- * BYTE NID::kSize (0x09)
- * UINT64 UnPackSizes[??]
- * []
- *
- *
- * []
- * BYTE NID::kCRC (0x0A)
- * Digests[Number of streams with unknown CRC]
- * []
-
- */
- t = *p;
- SZ_SKIP_BYTES(1);
-
- msg_debug_archive("7zip: read substream info %xc", t);
-
- switch (t) {
- case kNumUnPackStream:
- for (i = 0; i < num_folders; i++) {
- guint64 tmp;
-
- SZ_READ_VINT(tmp);
- folder_nstreams[i] = tmp;
- }
- break;
- case kCRC:
- /*
- * Read the comment in the rspamd_7zip_read_coders_info
- */
- p = rspamd_7zip_read_digest(task, p, end, arch, num_nodigest,
- NULL);
- break;
- case kSize:
- /*
- * Another brain damaged logic, but we have to support it
- * as there are no ways to proceed without it.
- * In fact, it is just absent in the real life...
- */
- for (i = 0; i < num_folders; i++) {
- for (guint j = 0; j < folder_nstreams[i]; j++) {
- guint64 tmp;
-
- SZ_READ_VINT(tmp); /* Who cares indeed */
- }
- }
- break;
- case kEnd:
- goto end;
- break;
- default:
- p = NULL;
- msg_debug_archive("bad 7zip type: %xc; %s", t, G_STRLOC);
- goto end;
- break;
- }
- }
-
- end:
- return p;
- }
-
- static const guchar *
- rspamd_7zip_read_main_streams_info(struct rspamd_task *task,
- const guchar *p, const guchar *end,
- struct rspamd_archive *arch)
- {
- guchar t;
- guint num_folders = 0, unknown_digests = 0;
-
- while (p != NULL && p < end) {
- t = *p;
- SZ_SKIP_BYTES(1);
- msg_debug_archive("7zip: read main streams info %xc", t);
-
- /*
- *
- * []
- * PackInfo
- * []
-
- * []
- * CodersInfo
- * []
- *
- * []
- * SubStreamsInfo
- * []
- *
- * BYTE NID::kEnd
- */
- switch (t) {
- case kPackInfo:
- p = rspamd_7zip_read_pack_info(task, p, end, arch);
- break;
- case kUnPackInfo:
- p = rspamd_7zip_read_coders_info(task, p, end, arch, &num_folders,
- &unknown_digests);
- break;
- case kSubStreamsInfo:
- p = rspamd_7zip_read_substreams_info(task, p, end, arch, num_folders,
- unknown_digests);
- break;
- break;
- case kEnd:
- goto end;
- break;
- default:
- p = NULL;
- msg_debug_archive("bad 7zip type: %xc; %s", t, G_STRLOC);
- goto end;
- break;
- }
- }
-
- end:
- return p;
- }
-
- static const guchar *
- rspamd_7zip_read_archive_props(struct rspamd_task *task,
- const guchar *p, const guchar *end,
- struct rspamd_archive *arch)
- {
- guchar proptype;
- guint64 proplen;
-
- /*
- * for (;;)
- * {
- * BYTE PropertyType;
- * if (aType == 0)
- * break;
- * UINT64 PropertySize;
- * BYTE PropertyData[PropertySize];
- * }
- */
-
- if (p != NULL) {
- proptype = *p;
- SZ_SKIP_BYTES(1);
-
- while (proptype != 0) {
- SZ_READ_VINT(proplen);
-
- if (p + proplen < end) {
- p += proplen;
- }
- else {
- return NULL;
- }
-
- proptype = *p;
- SZ_SKIP_BYTES(1);
- }
- }
-
- return p;
- }
-
- static GString *
- rspamd_7zip_ucs2_to_utf8(struct rspamd_task *task, const guchar *p,
- const guchar *end)
- {
- GString *res;
- goffset dest_pos = 0, src_pos = 0;
- const gsize len = (end - p) / sizeof(guint16);
- guint16 *up;
- UChar32 wc;
- UBool is_error = 0;
-
- res = g_string_sized_new((end - p) * 3 / 2 + sizeof(wc) + 1);
- up = (guint16 *) p;
-
- while (src_pos < len) {
- U16_NEXT(up, src_pos, len, wc);
-
- if (wc > 0) {
- U8_APPEND(res->str, dest_pos,
- res->allocated_len - 1,
- wc, is_error);
- }
-
- if (is_error) {
- g_string_free(res, TRUE);
-
- return NULL;
- }
- }
-
- g_assert(dest_pos < res->allocated_len);
-
- res->len = dest_pos;
- res->str[dest_pos] = '\0';
-
- return res;
- }
-
- static const guchar *
- rspamd_7zip_read_files_info(struct rspamd_task *task,
- const guchar *p, const guchar *end,
- struct rspamd_archive *arch)
- {
- guint64 nfiles = 0, sz, i;
- guchar t, b;
- struct rspamd_archive_file *fentry;
-
- SZ_READ_VINT(nfiles);
-
- for (; p != NULL && p < end;) {
- t = *p;
- SZ_SKIP_BYTES(1);
-
- msg_debug_archive("7zip: read file data type %xc", t);
-
- if (t == kEnd) {
- goto end;
- }
-
- /* This is SO SPECIAL, gah */
- SZ_READ_VINT(sz);
-
- switch (t) {
- case kEmptyStream:
- case kEmptyFile:
- case kAnti: /* AntiFile, OMFG */
- /* We don't care about these bits */
- case kCTime:
- case kATime:
- case kMTime:
- /* We don't care of these guys, but we still have to parse them, gah */
- if (sz > 0) {
- SZ_SKIP_BYTES(sz);
- }
- break;
- case kName:
- /* The most useful part in this whole bloody format */
- b = *p; /* External flag */
- SZ_SKIP_BYTES(1);
-
- if (b) {
- /* TODO: for the god sake, do something about external
- * filenames...
- */
- guint64 tmp;
-
- SZ_READ_VINT(tmp);
- }
- else {
- for (i = 0; i < nfiles; i++) {
- /* Zero terminated wchar_t: happy converting... */
- /* First, find terminator */
- const guchar *fend = NULL, *tp = p;
- GString *res;
-
- while (tp < end - 1) {
- if (*tp == 0 && *(tp + 1) == 0) {
- fend = tp;
- break;
- }
-
- tp += 2;
- }
-
- if (fend == NULL || fend - p == 0) {
- /* Crap instead of fname */
- msg_debug_archive("bad 7zip name; %s", G_STRLOC);
- goto end;
- }
-
- res = rspamd_7zip_ucs2_to_utf8(task, p, fend);
-
- if (res != NULL) {
- fentry = g_malloc0(sizeof(*fentry));
- fentry->fname = res;
- g_ptr_array_add(arch->files, fentry);
- msg_debug_archive("7zip: found file %v", res);
- }
- else {
- msg_debug_archive("bad 7zip name; %s", G_STRLOC);
- }
- /* Skip zero terminating character */
- p = fend + 2;
- }
- }
- break;
- case kDummy:
- case kWinAttributes:
- if (sz > 0) {
- SZ_SKIP_BYTES(sz);
- }
- break;
- default:
- p = NULL;
- msg_debug_archive("bad 7zip type: %xc; %s", t, G_STRLOC);
- goto end;
- break;
- }
- }
-
- end:
- return p;
- }
-
- static const guchar *
- rspamd_7zip_read_next_section(struct rspamd_task *task,
- const guchar *p, const guchar *end,
- struct rspamd_archive *arch)
- {
- guchar t = *p;
-
- SZ_SKIP_BYTES(1);
-
- msg_debug_archive("7zip: read section %xc", t);
-
- switch (t) {
- case kHeader:
- /* We just skip byte and go further */
- break;
- case kEncodedHeader:
- /*
- * In fact, headers are just packed, but we assume it as
- * encrypted to distinguish from the normal archives
- */
- msg_debug_archive("7zip: encoded header, needs to be uncompressed");
- arch->flags |= RSPAMD_ARCHIVE_CANNOT_READ;
- p = NULL; /* Cannot get anything useful */
- break;
- case kArchiveProperties:
- p = rspamd_7zip_read_archive_props(task, p, end, arch);
- break;
- case kMainStreamsInfo:
- p = rspamd_7zip_read_main_streams_info(task, p, end, arch);
- break;
- case kAdditionalStreamsInfo:
- p = rspamd_7zip_read_main_streams_info(task, p, end, arch);
- break;
- case kFilesInfo:
- p = rspamd_7zip_read_files_info(task, p, end, arch);
- break;
- case kEnd:
- p = NULL;
- msg_debug_archive("7zip: read final section");
- break;
- default:
- p = NULL;
- msg_debug_archive("bad 7zip type: %xc; %s", t, G_STRLOC);
- break;
- }
-
- return p;
- }
-
- static void
- rspamd_archive_process_7zip(struct rspamd_task *task,
- struct rspamd_mime_part *part)
- {
- struct rspamd_archive *arch;
- const guchar *start, *p, *end;
- const guchar sz_magic[] = {'7', 'z', 0xBC, 0xAF, 0x27, 0x1C};
- guint64 section_offset = 0, section_length = 0;
-
- start = part->parsed_data.begin;
- p = start;
- end = p + part->parsed_data.len;
-
- if (end - p <= sizeof(guint64) + sizeof(guint32) ||
- memcmp(p, sz_magic, sizeof(sz_magic)) != 0) {
- msg_debug_archive("7z archive is invalid (no 7z magic)");
-
- return;
- }
-
- arch = rspamd_mempool_alloc0(task->task_pool, sizeof(*arch));
- arch->files = g_ptr_array_new();
- arch->type = RSPAMD_ARCHIVE_7ZIP;
- rspamd_mempool_add_destructor(task->task_pool, rspamd_archive_dtor,
- arch);
-
- /* Magic (6 bytes) + version (2 bytes) + crc32 (4 bytes) */
- p += sizeof(guint64) + sizeof(guint32);
-
- SZ_READ_UINT64(section_offset);
- SZ_READ_UINT64(section_length);
-
- if (end - p > sizeof(guint32)) {
- p += sizeof(guint32);
- }
- else {
- msg_debug_archive("7z archive is invalid (truncated crc)");
-
- return;
- }
-
- if (end - p > section_offset) {
- p += section_offset;
- }
- else {
- msg_debug_archive("7z archive is invalid (incorrect section offset)");
-
- return;
- }
-
- while ((p = rspamd_7zip_read_next_section(task, p, end, arch)) != NULL)
- ;
-
- part->part_type = RSPAMD_MIME_PART_ARCHIVE;
- part->specific.arch = arch;
- if (part->cd != NULL) {
- arch->archive_name = &part->cd->filename;
- }
- arch->size = part->parsed_data.len;
- }
-
- static void
- rspamd_archive_process_gzip(struct rspamd_task *task,
- struct rspamd_mime_part *part)
- {
- struct rspamd_archive *arch;
- const guchar *start, *p, *end;
- const guchar gz_magic[] = {0x1F, 0x8B};
- guchar flags;
-
- start = part->parsed_data.begin;
- p = start;
- end = p + part->parsed_data.len;
-
- if (end - p <= 10 || memcmp(p, gz_magic, sizeof(gz_magic)) != 0) {
- msg_debug_archive("gzip archive is invalid (no gzip magic)");
-
- return;
- }
-
- arch = rspamd_mempool_alloc0(task->task_pool, sizeof(*arch));
- arch->files = g_ptr_array_sized_new(1);
- arch->type = RSPAMD_ARCHIVE_GZIP;
- if (part->cd) {
- arch->archive_name = &part->cd->filename;
- }
- rspamd_mempool_add_destructor(task->task_pool, rspamd_archive_dtor,
- arch);
-
- flags = p[3];
-
- if (flags & (1u << 5)) {
- arch->flags |= RSPAMD_ARCHIVE_ENCRYPTED;
- }
-
- if (flags & (1u << 3)) {
- /* We have file name presented in archive, try to use it */
- if (flags & (1u << 1)) {
- /* Multipart */
- p += 12;
- }
- else {
- p += 10;
- }
-
- if (flags & (1u << 2)) {
- /* Optional section */
- guint16 optlen = 0;
-
- RAR_READ_UINT16(optlen);
-
- if (end <= p + optlen) {
- msg_debug_archive("gzip archive is invalid, bad extra length: %d",
- (int) optlen);
-
- return;
- }
-
- p += optlen;
- }
-
- /* Read file name */
- const guchar *fname_start = p;
-
- while (p < end) {
- if (*p == '\0') {
- if (p > fname_start) {
- struct rspamd_archive_file *f;
-
- f = g_malloc0(sizeof(*f));
-
- rspamd_archive_file_try_utf(task, arch, f,
- fname_start, p - fname_start);
-
- if (f->fname) {
- g_ptr_array_add(arch->files, f);
-
- if (f->flags & RSPAMD_ARCHIVE_FILE_OBFUSCATED) {
- arch->flags |= RSPAMD_ARCHIVE_HAS_OBFUSCATED_FILES;
- }
- }
- else {
- /* Invalid filename, skip */
- g_free(f);
- }
-
- goto set;
- }
- }
-
- p++;
- }
-
- /* Wrong filename, not zero terminated */
- msg_debug_archive("gzip archive is invalid, bad filename at pos %d",
- (int) (p - start));
-
- return;
- }
-
- /* Fallback, we need to extract file name from archive name if possible */
- if (part->cd && part->cd->filename.len > 0) {
- const gchar *dot_pos, *slash_pos;
-
- dot_pos = rspamd_memrchr(part->cd->filename.begin, '.',
- part->cd->filename.len);
-
- if (dot_pos) {
- struct rspamd_archive_file *f;
-
- slash_pos = rspamd_memrchr(part->cd->filename.begin, '/',
- part->cd->filename.len);
-
- if (slash_pos && slash_pos < dot_pos) {
- f = g_malloc0(sizeof(*f));
- f->fname = g_string_sized_new(dot_pos - slash_pos);
- g_string_append_len(f->fname, slash_pos + 1,
- dot_pos - slash_pos - 1);
-
- msg_debug_archive("fallback to gzip filename based on cd: %v",
- f->fname);
-
- g_ptr_array_add(arch->files, f);
-
- goto set;
- }
- else {
- const gchar *fname_start = part->cd->filename.begin;
-
- f = g_malloc0(sizeof(*f));
-
- if (memchr(fname_start, '.', part->cd->filename.len) != dot_pos) {
- /* Double dots, something like foo.exe.gz */
- f->fname = g_string_sized_new(dot_pos - fname_start);
- g_string_append_len(f->fname, fname_start,
- dot_pos - fname_start);
- }
- else {
- /* Single dot, something like foo.gzz */
- f->fname = g_string_sized_new(part->cd->filename.len);
- g_string_append_len(f->fname, fname_start,
- part->cd->filename.len);
- }
-
- msg_debug_archive("fallback to gzip filename based on cd: %v",
- f->fname);
-
- g_ptr_array_add(arch->files, f);
-
- goto set;
- }
- }
- }
-
- return;
-
- set:
- /* Set archive data */
- part->part_type = RSPAMD_MIME_PART_ARCHIVE;
- part->specific.arch = arch;
- arch->size = part->parsed_data.len;
- }
-
- static gboolean
- rspamd_archive_cheat_detect(struct rspamd_mime_part *part, const gchar *str,
- const guchar *magic_start, gsize magic_len)
- {
- struct rspamd_content_type *ct;
- const gchar *p;
- rspamd_ftok_t srch, *fname;
-
- ct = part->ct;
- RSPAMD_FTOK_ASSIGN(&srch, "application");
-
- if (ct && ct->type.len && ct->subtype.len > 0 && rspamd_ftok_cmp(&ct->type, &srch) == 0) {
- if (rspamd_substring_search_caseless(ct->subtype.begin, ct->subtype.len,
- str, strlen(str)) != -1) {
- /* We still need to check magic, see #1848 */
- if (magic_start != NULL) {
- if (part->parsed_data.len > magic_len &&
- memcmp(part->parsed_data.begin,
- magic_start, magic_len) == 0) {
- return TRUE;
- }
- /* No magic, refuse this type of archive */
- return FALSE;
- }
- else {
- return TRUE;
- }
- }
- }
-
- if (part->cd) {
- fname = &part->cd->filename;
-
- if (fname && fname->len > strlen(str)) {
- p = fname->begin + fname->len - strlen(str);
-
- if (rspamd_lc_cmp(p, str, strlen(str)) == 0) {
- if (*(p - 1) == '.') {
- if (magic_start != NULL) {
- if (part->parsed_data.len > magic_len &&
- memcmp(part->parsed_data.begin,
- magic_start, magic_len) == 0) {
- return TRUE;
- }
- /* No magic, refuse this type of archive */
- return FALSE;
- }
-
- return TRUE;
- }
- }
- }
-
- if (magic_start != NULL) {
- if (part->parsed_data.len > magic_len &&
- memcmp(part->parsed_data.begin, magic_start, magic_len) == 0) {
- return TRUE;
- }
- }
- }
- else {
- if (magic_start != NULL) {
- if (part->parsed_data.len > magic_len &&
- memcmp(part->parsed_data.begin, magic_start, magic_len) == 0) {
- return TRUE;
- }
- }
- }
-
- return FALSE;
- }
-
- void rspamd_archives_process(struct rspamd_task *task)
- {
- guint i;
- struct rspamd_mime_part *part;
- const guchar rar_magic[] = {0x52, 0x61, 0x72, 0x21, 0x1A, 0x07};
- const guchar zip_magic[] = {0x50, 0x4b, 0x03, 0x04};
- const guchar sz_magic[] = {'7', 'z', 0xBC, 0xAF, 0x27, 0x1C};
- const guchar gz_magic[] = {0x1F, 0x8B, 0x08};
-
- PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, parts), i, part)
- {
- if (part->part_type == RSPAMD_MIME_PART_UNDEFINED) {
- if (part->parsed_data.len > 0) {
- if (rspamd_archive_cheat_detect(part, "zip",
- zip_magic, sizeof(zip_magic))) {
- rspamd_archive_process_zip(task, part);
- }
- else if (rspamd_archive_cheat_detect(part, "rar",
- rar_magic, sizeof(rar_magic))) {
- rspamd_archive_process_rar(task, part);
- }
- else if (rspamd_archive_cheat_detect(part, "7z",
- sz_magic, sizeof(sz_magic))) {
- rspamd_archive_process_7zip(task, part);
- }
- else if (rspamd_archive_cheat_detect(part, "gz",
- gz_magic, sizeof(gz_magic))) {
- rspamd_archive_process_gzip(task, part);
- }
-
- if (part->ct && (part->ct->flags & RSPAMD_CONTENT_TYPE_TEXT) &&
- part->part_type == RSPAMD_MIME_PART_ARCHIVE &&
- part->specific.arch) {
- struct rspamd_archive *arch = part->specific.arch;
-
- msg_info_task("found %s archive with incorrect content-type: %T/%T",
- rspamd_archive_type_str(arch->type),
- &part->ct->type, &part->ct->subtype);
-
- if (!(part->ct->flags & RSPAMD_CONTENT_TYPE_MISSING)) {
- part->ct->flags |= RSPAMD_CONTENT_TYPE_BROKEN;
- }
- }
- }
- }
- }
- }
-
-
- const gchar *
- rspamd_archive_type_str(enum rspamd_archive_type type)
- {
- const gchar *ret = "unknown";
-
- switch (type) {
- case RSPAMD_ARCHIVE_ZIP:
- ret = "zip";
- break;
- case RSPAMD_ARCHIVE_RAR:
- ret = "rar";
- break;
- case RSPAMD_ARCHIVE_7ZIP:
- ret = "7z";
- break;
- case RSPAMD_ARCHIVE_GZIP:
- ret = "gz";
- break;
- }
-
- return ret;
- }
|