You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

archives.c 42KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931
  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "config.h"
  17. #include "message.h"
  18. #include "task.h"
  19. #include "archives.h"
  20. #include "libmime/mime_encoding.h"
  21. #include <unicode/uchar.h>
  22. #include <unicode/utf8.h>
  23. #include <unicode/utf16.h>
  24. #include <unicode/ucnv.h>
  25. #define msg_debug_archive(...) rspamd_conditional_debug_fast (NULL, NULL, \
  26. rspamd_archive_log_id, "archive", task->task_pool->tag.uid, \
  27. G_STRFUNC, \
  28. __VA_ARGS__)
  29. INIT_LOG_MODULE(archive)
  30. static void
  31. rspamd_archive_dtor (gpointer p)
  32. {
  33. struct rspamd_archive *arch = p;
  34. struct rspamd_archive_file *f;
  35. guint i;
  36. for (i = 0; i < arch->files->len; i ++) {
  37. f = g_ptr_array_index (arch->files, i);
  38. if (f->fname) {
  39. g_string_free (f->fname, TRUE);
  40. }
  41. g_free (f);
  42. }
  43. g_ptr_array_free (arch->files, TRUE);
  44. }
  45. static GString *
  46. rspamd_archive_file_try_utf (struct rspamd_task *task,
  47. const gchar *in, gsize inlen)
  48. {
  49. const gchar *charset = NULL, *p, *end;
  50. GString *res;
  51. charset = rspamd_mime_charset_find_by_content (in, inlen);
  52. if (charset) {
  53. UChar *tmp;
  54. UErrorCode uc_err = U_ZERO_ERROR;
  55. gint32 r, clen, dlen;
  56. struct rspamd_charset_converter *conv;
  57. UConverter *utf8_converter;
  58. conv = rspamd_mime_get_converter_cached (charset, &uc_err);
  59. utf8_converter = rspamd_get_utf8_converter ();
  60. if (conv == NULL) {
  61. msg_err_task ("cannot open converter for %s: %s",
  62. charset, u_errorName (uc_err));
  63. return NULL;
  64. }
  65. tmp = g_malloc (sizeof (*tmp) * (inlen + 1));
  66. r = rspamd_converter_to_uchars (conv, tmp, inlen + 1,
  67. in, inlen, &uc_err);
  68. if (!U_SUCCESS (uc_err)) {
  69. msg_err_task ("cannot convert data to unicode from %s: %s",
  70. charset, u_errorName (uc_err));
  71. g_free (tmp);
  72. return NULL;
  73. }
  74. clen = ucnv_getMaxCharSize (utf8_converter);
  75. dlen = UCNV_GET_MAX_BYTES_FOR_STRING (r, clen);
  76. res = g_string_sized_new (dlen);
  77. r = ucnv_fromUChars (utf8_converter, res->str, dlen, tmp, r, &uc_err);
  78. if (!U_SUCCESS (uc_err)) {
  79. msg_err_task ("cannot convert data from unicode from %s: %s",
  80. charset, u_errorName (uc_err));
  81. g_free (tmp);
  82. g_string_free (res, TRUE);
  83. return NULL;
  84. }
  85. g_free (tmp);
  86. res->len = r;
  87. msg_debug_archive ("converted from %s to UTF-8 inlen: %z, outlen: %d",
  88. charset, inlen, r);
  89. }
  90. else {
  91. /* Convert unsafe characters to '?' */
  92. res = g_string_sized_new (inlen);
  93. p = in;
  94. end = in + inlen;
  95. while (p < end) {
  96. if (g_ascii_isgraph (*p)) {
  97. g_string_append_c (res, *p);
  98. }
  99. else {
  100. g_string_append_c (res, '?');
  101. }
  102. p ++;
  103. }
  104. }
  105. return res;
  106. }
  107. static void
  108. rspamd_archive_process_zip (struct rspamd_task *task,
  109. struct rspamd_mime_part *part)
  110. {
  111. const guchar *p, *start, *end, *eocd = NULL, *cd;
  112. const guint32 eocd_magic = 0x06054b50, cd_basic_len = 46;
  113. const guchar cd_magic[] = {0x50, 0x4b, 0x01, 0x02};
  114. const guint max_processed = 1024;
  115. guint32 cd_offset, cd_size, comp_size, uncomp_size, processed = 0;
  116. guint16 extra_len, fname_len, comment_len;
  117. struct rspamd_archive *arch;
  118. struct rspamd_archive_file *f;
  119. /* Zip files have interesting data at the end of archive */
  120. p = part->parsed_data.begin + part->parsed_data.len - 1;
  121. start = part->parsed_data.begin;
  122. end = p;
  123. /* Search for EOCD:
  124. * 22 bytes is a typical size of eocd without a comment and
  125. * end points one byte after the last character
  126. */
  127. p -= 21;
  128. while (p > start + sizeof (guint32)) {
  129. guint32 t;
  130. if (processed > max_processed) {
  131. break;
  132. }
  133. /* XXX: not an efficient approach */
  134. memcpy (&t, p, sizeof (t));
  135. if (GUINT32_FROM_LE (t) == eocd_magic) {
  136. eocd = p;
  137. break;
  138. }
  139. p --;
  140. processed ++;
  141. }
  142. if (eocd == NULL) {
  143. /* Not a zip file */
  144. msg_debug_archive ("zip archive is invalid (no EOCD)");
  145. return;
  146. }
  147. if (end - eocd < 21) {
  148. msg_debug_archive ("zip archive is invalid (short EOCD)");
  149. return;
  150. }
  151. memcpy (&cd_size, eocd + 12, sizeof (cd_size));
  152. cd_size = GUINT32_FROM_LE (cd_size);
  153. memcpy (&cd_offset, eocd + 16, sizeof (cd_offset));
  154. cd_offset = GUINT32_FROM_LE (cd_offset);
  155. /* We need to check sanity as well */
  156. if (cd_offset + cd_size != (guint)(eocd - start)) {
  157. msg_debug_archive ("zip archive is invalid (bad size/offset for CD)");
  158. return;
  159. }
  160. cd = start + cd_offset;
  161. arch = rspamd_mempool_alloc0 (task->task_pool, sizeof (*arch));
  162. arch->files = g_ptr_array_new ();
  163. arch->type = RSPAMD_ARCHIVE_ZIP;
  164. rspamd_mempool_add_destructor (task->task_pool, rspamd_archive_dtor,
  165. arch);
  166. while (cd < eocd) {
  167. /* Read central directory record */
  168. if (eocd - cd < cd_basic_len ||
  169. memcmp (cd, cd_magic, sizeof (cd_magic)) != 0) {
  170. msg_debug_archive ("zip archive is invalid (bad cd record)");
  171. return;
  172. }
  173. memcpy (&comp_size, cd + 20, sizeof (guint32));
  174. comp_size = GUINT32_FROM_LE (comp_size);
  175. memcpy (&uncomp_size, cd + 24, sizeof (guint32));
  176. uncomp_size = GUINT32_FROM_LE (uncomp_size);
  177. memcpy (&fname_len, cd + 28, sizeof (fname_len));
  178. fname_len = GUINT16_FROM_LE (fname_len);
  179. memcpy (&extra_len, cd + 30, sizeof (extra_len));
  180. extra_len = GUINT16_FROM_LE (extra_len);
  181. memcpy (&comment_len, cd + 32, sizeof (comment_len));
  182. comment_len = GUINT16_FROM_LE (comment_len);
  183. if (cd + fname_len + comment_len + extra_len + cd_basic_len > eocd) {
  184. msg_debug_archive ("zip archive is invalid (too large cd record)");
  185. return;
  186. }
  187. f = g_malloc0 (sizeof (*f));
  188. f->fname = rspamd_archive_file_try_utf (task,
  189. cd + cd_basic_len, fname_len);
  190. f->compressed_size = comp_size;
  191. f->uncompressed_size = uncomp_size;
  192. if (f->fname) {
  193. g_ptr_array_add (arch->files, f);
  194. msg_debug_archive ("found file in zip archive: %v", f->fname);
  195. }
  196. else {
  197. g_free (f);
  198. }
  199. cd += fname_len + comment_len + extra_len + cd_basic_len;
  200. }
  201. part->flags |= RSPAMD_MIME_PART_ARCHIVE;
  202. part->specific.arch = arch;
  203. if (part->cd) {
  204. arch->archive_name = &part->cd->filename;
  205. }
  206. arch->size = part->parsed_data.len;
  207. }
  208. static inline gint
  209. rspamd_archive_rar_read_vint (const guchar *start, gsize remain, guint64 *res)
  210. {
  211. /*
  212. * From http://www.rarlab.com/technote.htm:
  213. * Variable length integer. Can include one or more bytes, where
  214. * lower 7 bits of every byte contain integer data and highest bit
  215. * in every byte is the continuation flag.
  216. * If highest bit is 0, this is the last byte in sequence.
  217. * So first byte contains 7 least significant bits of integer and
  218. * continuation flag. Second byte, if present, contains next 7 bits and so on.
  219. */
  220. guint64 t = 0;
  221. guint shift = 0;
  222. const guchar *p = start;
  223. while (remain > 0 && shift <= 57) {
  224. if (*p & 0x80) {
  225. t |= ((guint64)(*p & 0x7f)) << shift;
  226. }
  227. else {
  228. t |= ((guint64)(*p & 0x7f)) << shift;
  229. p ++;
  230. break;
  231. }
  232. shift += 7;
  233. p++;
  234. remain --;
  235. }
  236. if (remain == 0 || shift > 64) {
  237. return -1;
  238. }
  239. *res = GUINT64_FROM_LE (t);
  240. return p - start;
  241. }
  242. #define RAR_SKIP_BYTES(n) do { \
  243. if ((n) <= 0) { \
  244. msg_debug_archive ("rar archive is invalid (bad skip value)"); \
  245. return; \
  246. } \
  247. if ((gsize)(end - p) < (n)) { \
  248. msg_debug_archive ("rar archive is invalid (truncated)"); \
  249. return; \
  250. } \
  251. p += (n); \
  252. } while (0)
  253. #define RAR_READ_VINT() do { \
  254. r = rspamd_archive_rar_read_vint (p, end - p, &vint); \
  255. if (r == -1) { \
  256. msg_debug_archive ("rar archive is invalid (bad vint)"); \
  257. return; \
  258. } \
  259. else if (r == 0) { \
  260. msg_debug_archive ("rar archive is invalid (BAD vint offset)"); \
  261. return; \
  262. }\
  263. } while (0)
  264. #define RAR_READ_VINT_SKIP() do { \
  265. r = rspamd_archive_rar_read_vint (p, end - p, &vint); \
  266. if (r == -1) { \
  267. msg_debug_archive ("rar archive is invalid (bad vint)"); \
  268. return; \
  269. } \
  270. p += r; \
  271. } while (0)
  272. #define RAR_READ_UINT16(n) do { \
  273. if (end - p < (glong)sizeof (guint16)) { \
  274. msg_debug_archive ("rar archive is invalid (bad int16)"); \
  275. return; \
  276. } \
  277. n = p[0] + (p[1] << 8); \
  278. p += sizeof (guint16); \
  279. } while (0)
  280. #define RAR_READ_UINT32(n) do { \
  281. if (end - p < (glong)sizeof (guint32)) { \
  282. msg_debug_archive ("rar archive is invalid (bad int32)"); \
  283. return; \
  284. } \
  285. n = (guint)p[0] + ((guint)p[1] << 8) + ((guint)p[2] << 16) + ((guint)p[3] << 24); \
  286. p += sizeof (guint32); \
  287. } while (0)
  288. static void
  289. rspamd_archive_process_rar_v4 (struct rspamd_task *task, const guchar *start,
  290. const guchar *end, struct rspamd_mime_part *part)
  291. {
  292. const guchar *p = start, *start_section;
  293. guint8 type;
  294. guint flags;
  295. guint64 sz, comp_sz = 0, uncomp_sz = 0;
  296. struct rspamd_archive *arch;
  297. struct rspamd_archive_file *f;
  298. arch = rspamd_mempool_alloc0 (task->task_pool, sizeof (*arch));
  299. arch->files = g_ptr_array_new ();
  300. arch->type = RSPAMD_ARCHIVE_RAR;
  301. rspamd_mempool_add_destructor (task->task_pool, rspamd_archive_dtor,
  302. arch);
  303. while (p < end) {
  304. /* Crc16 */
  305. start_section = p;
  306. RAR_SKIP_BYTES (sizeof (guint16));
  307. type = *p;
  308. p ++;
  309. RAR_READ_UINT16 (flags);
  310. if (type == 0x73) {
  311. /* Main header, check for encryption */
  312. if (flags & 0x80) {
  313. arch->flags |= RSPAMD_ARCHIVE_ENCRYPTED;
  314. goto end;
  315. }
  316. }
  317. RAR_READ_UINT16 (sz);
  318. if (flags & 0x8000) {
  319. /* We also need to read ADD_SIZE element */
  320. guint32 tmp;
  321. RAR_READ_UINT32 (tmp);
  322. sz += tmp;
  323. /* This is also used as PACK_SIZE */
  324. comp_sz = tmp;
  325. }
  326. if (sz == 0) {
  327. /* Zero sized block - error */
  328. msg_debug_archive ("rar archive is invalid (zero size block)");
  329. return;
  330. }
  331. if (type == 0x74) {
  332. guint fname_len;
  333. /* File header */
  334. /* Uncompressed size */
  335. RAR_READ_UINT32 (uncomp_sz);
  336. /* Skip to NAME_SIZE element */
  337. RAR_SKIP_BYTES (11);
  338. RAR_READ_UINT16 (fname_len);
  339. if (fname_len == 0 || fname_len > (gsize)(end - p)) {
  340. msg_debug_archive ("rar archive is invalid (bad filename size: %d)",
  341. fname_len);
  342. return;
  343. }
  344. /* Attrs */
  345. RAR_SKIP_BYTES (4);
  346. if (flags & 0x100) {
  347. /* We also need to read HIGH_PACK_SIZE */
  348. guint32 tmp;
  349. RAR_READ_UINT32 (tmp);
  350. sz += tmp;
  351. comp_sz += tmp;
  352. /* HIGH_UNP_SIZE */
  353. RAR_READ_UINT32 (tmp);
  354. uncomp_sz += tmp;
  355. }
  356. f = g_malloc0 (sizeof (*f));
  357. if (flags & 0x200) {
  358. /* We have unicode + normal version */
  359. guchar *tmp;
  360. tmp = memchr (p, '\0', fname_len);
  361. if (tmp != NULL) {
  362. /* Just use ASCII version */
  363. f->fname = rspamd_archive_file_try_utf (task, p, tmp - p);
  364. msg_debug_archive ("found ascii filename in rarv4 archive: %v",
  365. f->fname);
  366. }
  367. else {
  368. /* We have UTF8 filename, use it as is */
  369. f->fname = rspamd_archive_file_try_utf (task, p, fname_len);
  370. msg_debug_archive ("found utf filename in rarv4 archive: %v",
  371. f->fname);
  372. }
  373. }
  374. else {
  375. f->fname = rspamd_archive_file_try_utf (task, p, fname_len);
  376. msg_debug_archive ("found ascii (old) filename in rarv4 archive: %v",
  377. f->fname);
  378. }
  379. f->compressed_size = comp_sz;
  380. f->uncompressed_size = uncomp_sz;
  381. if (flags & 0x4) {
  382. f->flags |= RSPAMD_ARCHIVE_FILE_ENCRYPTED;
  383. }
  384. if (f->fname) {
  385. g_ptr_array_add (arch->files, f);
  386. }
  387. else {
  388. g_free (f);
  389. }
  390. }
  391. p = start_section;
  392. RAR_SKIP_BYTES (sz);
  393. }
  394. end:
  395. part->flags |= RSPAMD_MIME_PART_ARCHIVE;
  396. part->specific.arch = arch;
  397. arch->archive_name = &part->cd->filename;
  398. arch->size = part->parsed_data.len;
  399. }
  400. static void
  401. rspamd_archive_process_rar (struct rspamd_task *task,
  402. struct rspamd_mime_part *part)
  403. {
  404. const guchar *p, *end, *section_start;
  405. const guchar rar_v5_magic[] = {0x52, 0x61, 0x72, 0x21, 0x1A, 0x07, 0x01, 0x00},
  406. rar_v4_magic[] = {0x52, 0x61, 0x72, 0x21, 0x1A, 0x07, 0x00};
  407. const guint rar_encrypted_header = 4, rar_main_header = 1,
  408. rar_file_header = 2;
  409. guint64 vint, sz, comp_sz = 0, uncomp_sz = 0, flags = 0, type = 0,
  410. extra_sz = 0;
  411. struct rspamd_archive *arch;
  412. struct rspamd_archive_file *f;
  413. gint r;
  414. p = part->parsed_data.begin;
  415. end = p + part->parsed_data.len;
  416. if ((gsize)(end - p) <= sizeof (rar_v5_magic)) {
  417. msg_debug_archive ("rar archive is invalid (too small)");
  418. return;
  419. }
  420. if (memcmp (p, rar_v5_magic, sizeof (rar_v5_magic)) == 0) {
  421. p += sizeof (rar_v5_magic);
  422. }
  423. else if (memcmp (p, rar_v4_magic, sizeof (rar_v4_magic)) == 0) {
  424. p += sizeof (rar_v4_magic);
  425. rspamd_archive_process_rar_v4 (task, p, end, part);
  426. return;
  427. }
  428. else {
  429. msg_debug_archive ("rar archive is invalid (no rar magic)");
  430. return;
  431. }
  432. /* Rar v5 format */
  433. arch = rspamd_mempool_alloc0 (task->task_pool, sizeof (*arch));
  434. arch->files = g_ptr_array_new ();
  435. arch->type = RSPAMD_ARCHIVE_RAR;
  436. rspamd_mempool_add_destructor (task->task_pool, rspamd_archive_dtor,
  437. arch);
  438. /* Now we can have either encryption header or archive header */
  439. /* Crc 32 */
  440. RAR_SKIP_BYTES (sizeof (guint32));
  441. /* Size */
  442. RAR_READ_VINT_SKIP ();
  443. sz = vint;
  444. /* Type */
  445. section_start = p;
  446. RAR_READ_VINT_SKIP ();
  447. type = vint;
  448. /* Header flags */
  449. RAR_READ_VINT_SKIP ();
  450. flags = vint;
  451. if (flags & 0x1) {
  452. /* Have extra zone */
  453. RAR_READ_VINT_SKIP ();
  454. }
  455. if (flags & 0x2) {
  456. /* Data zone is presented */
  457. RAR_READ_VINT_SKIP ();
  458. sz += vint;
  459. }
  460. if (type == rar_encrypted_header) {
  461. /* We can't read any further information as archive is encrypted */
  462. arch->flags |= RSPAMD_ARCHIVE_ENCRYPTED;
  463. goto end;
  464. }
  465. else if (type != rar_main_header) {
  466. msg_debug_archive ("rar archive is invalid (bad main header)");
  467. return;
  468. }
  469. /* Nothing useful in main header */
  470. p = section_start;
  471. RAR_SKIP_BYTES (sz);
  472. while (p < end) {
  473. gboolean has_extra = FALSE;
  474. /* Read the next header */
  475. /* Crc 32 */
  476. RAR_SKIP_BYTES (sizeof (guint32));
  477. /* Size */
  478. RAR_READ_VINT_SKIP ();
  479. sz = vint;
  480. if (sz == 0) {
  481. /* Zero sized block - error */
  482. msg_debug_archive ("rar archive is invalid (zero size block)");
  483. return;
  484. }
  485. section_start = p;
  486. /* Type */
  487. RAR_READ_VINT_SKIP ();
  488. type = vint;
  489. /* Header flags */
  490. RAR_READ_VINT_SKIP ();
  491. flags = vint;
  492. if (flags & 0x1) {
  493. /* Have extra zone */
  494. RAR_READ_VINT_SKIP ();
  495. extra_sz = vint;
  496. has_extra = TRUE;
  497. }
  498. if (flags & 0x2) {
  499. /* Data zone is presented */
  500. RAR_READ_VINT_SKIP ();
  501. sz += vint;
  502. comp_sz = vint;
  503. }
  504. if (type != rar_file_header) {
  505. p = section_start;
  506. RAR_SKIP_BYTES (sz);
  507. }
  508. else {
  509. /* We have a file header, go forward */
  510. guint64 fname_len;
  511. /* File header specific flags */
  512. RAR_READ_VINT_SKIP ();
  513. flags = vint;
  514. /* Unpacked size */
  515. RAR_READ_VINT_SKIP ();
  516. uncomp_sz = vint;
  517. /* Attributes */
  518. RAR_READ_VINT_SKIP ();
  519. if (flags & 0x2) {
  520. /* Unix mtime */
  521. RAR_SKIP_BYTES (sizeof (guint32));
  522. }
  523. if (flags & 0x4) {
  524. /* Crc32 */
  525. RAR_SKIP_BYTES (sizeof (guint32));
  526. }
  527. /* Compression */
  528. RAR_READ_VINT_SKIP ();
  529. /* Host OS */
  530. RAR_READ_VINT_SKIP ();
  531. /* Filename length (finally!) */
  532. RAR_READ_VINT_SKIP ();
  533. fname_len = vint;
  534. if (fname_len == 0 || fname_len > (gsize)(end - p)) {
  535. msg_debug_archive ("rar archive is invalid (bad filename size)");
  536. return;
  537. }
  538. f = g_malloc0 (sizeof (*f));
  539. f->uncompressed_size = uncomp_sz;
  540. f->compressed_size = comp_sz;
  541. f->fname = rspamd_archive_file_try_utf (task, p, fname_len);
  542. if (f->fname) {
  543. msg_debug_archive ("added rarv5 file: %v", f->fname);
  544. g_ptr_array_add (arch->files, f);
  545. }
  546. else {
  547. g_free (f);
  548. f = NULL;
  549. }
  550. if (f && has_extra && extra_sz > 0 &&
  551. p + fname_len + extra_sz < end) {
  552. /* Try to find encryption record in extra field */
  553. const guchar *ex = p + fname_len;
  554. while (ex < p + extra_sz) {
  555. const guchar *t;
  556. gint64 cur_sz = 0, sec_type = 0;
  557. r = rspamd_archive_rar_read_vint (ex, extra_sz, &cur_sz);
  558. if (r == -1) {
  559. msg_debug_archive ("rar archive is invalid (bad vint)");
  560. return;
  561. }
  562. t = ex + r;
  563. r = rspamd_archive_rar_read_vint (t, extra_sz - r, &sec_type);
  564. if (r == -1) {
  565. msg_debug_archive ("rar archive is invalid (bad vint)");
  566. return;
  567. }
  568. if (sec_type == 0x01) {
  569. f->flags |= RSPAMD_ARCHIVE_FILE_ENCRYPTED;
  570. arch->flags |= RSPAMD_ARCHIVE_ENCRYPTED;
  571. break;
  572. }
  573. ex += cur_sz;
  574. }
  575. }
  576. /* Restore p to the beginning of the header */
  577. p = section_start;
  578. RAR_SKIP_BYTES (sz);
  579. }
  580. }
  581. end:
  582. part->flags |= RSPAMD_MIME_PART_ARCHIVE;
  583. part->specific.arch = arch;
  584. if (part->cd != NULL) {
  585. arch->archive_name = &part->cd->filename;
  586. }
  587. arch->size = part->parsed_data.len;
  588. }
  589. static inline gint
  590. rspamd_archive_7zip_read_vint (const guchar *start, gsize remain, guint64 *res)
  591. {
  592. /*
  593. * REAL_UINT64 means real UINT64.
  594. * UINT64 means real UINT64 encoded with the following scheme:
  595. *
  596. * Size of encoding sequence depends from first byte:
  597. * First_Byte Extra_Bytes Value
  598. * (binary)
  599. * 0xxxxxxx : ( xxxxxxx )
  600. * 10xxxxxx BYTE y[1] : ( xxxxxx << (8 * 1)) + y
  601. * 110xxxxx BYTE y[2] : ( xxxxx << (8 * 2)) + y
  602. * ...
  603. * 1111110x BYTE y[6] : ( x << (8 * 6)) + y
  604. * 11111110 BYTE y[7] : y
  605. * 11111111 BYTE y[8] : y
  606. */
  607. guchar t;
  608. if (remain == 0) {
  609. return -1;
  610. }
  611. t = *start;
  612. if (!isset (&t, 7)) {
  613. /* Trivial case */
  614. *res = t;
  615. return 1;
  616. }
  617. else if (t == 0xFF) {
  618. if (remain >= sizeof (guint64) + 1) {
  619. memcpy (res, start + 1, sizeof (guint64));
  620. *res = GUINT64_FROM_LE (*res);
  621. return sizeof (guint64) + 1;
  622. }
  623. }
  624. else {
  625. gint cur_bit = 6, intlen = 1;
  626. const guchar bmask = 0xFF;
  627. guint64 tgt;
  628. while (cur_bit > 0) {
  629. if (!isset (&t, cur_bit)) {
  630. if (remain >= intlen + 1) {
  631. memcpy (&tgt, start + 1, intlen);
  632. tgt = GUINT64_FROM_LE (tgt);
  633. /* Shift back */
  634. tgt >>= sizeof (tgt) - NBBY * intlen;
  635. /* Add masked value */
  636. tgt += (guint64)(t & (bmask >> (NBBY - cur_bit)))
  637. << (NBBY * intlen);
  638. *res = tgt;
  639. return intlen + 1;
  640. }
  641. }
  642. cur_bit --;
  643. intlen ++;
  644. }
  645. }
  646. return -1;
  647. }
  648. #define SZ_READ_VINT_SKIP() do { \
  649. r = rspamd_archive_7zip_read_vint (p, end - p, &vint); \
  650. if (r == -1) { \
  651. msg_debug_archive ("7z archive is invalid (bad vint)"); \
  652. return; \
  653. } \
  654. p += r; \
  655. } while (0)
  656. #define SZ_READ_VINT(var) do { \
  657. int r; \
  658. r = rspamd_archive_7zip_read_vint (p, end - p, &(var)); \
  659. if (r == -1) { \
  660. msg_debug_archive ("7z archive is invalid (bad vint): %s", G_STRLOC); \
  661. return NULL; \
  662. } \
  663. p += r; \
  664. } while (0)
  665. #define SZ_READ_UINT64(n) do { \
  666. if (end - p < (goffset)sizeof (guint64)) { \
  667. msg_debug_archive ("7zip archive is invalid (bad uint64): %s", G_STRLOC); \
  668. return; \
  669. } \
  670. memcpy (&(n), p, sizeof (guint64)); \
  671. n = GUINT64_FROM_LE(n); \
  672. p += sizeof (guint64); \
  673. } while (0)
  674. #define SZ_SKIP_BYTES(n) do { \
  675. if (end - p >= (n)) { \
  676. p += (n); \
  677. } \
  678. else { \
  679. msg_debug_archive ("7zip archive is invalid (truncated); wanted to read %d bytes, %d avail: %s", (gint)(n), (gint)(end - p), G_STRLOC); \
  680. return NULL; \
  681. } \
  682. } while (0)
  683. enum rspamd_7zip_header_mark {
  684. kEnd = 0x00,
  685. kHeader = 0x01,
  686. kArchiveProperties = 0x02,
  687. kAdditionalStreamsInfo = 0x03,
  688. kMainStreamsInfo = 0x04,
  689. kFilesInfo = 0x05,
  690. kPackInfo = 0x06,
  691. kUnPackInfo = 0x07,
  692. kSubStreamsInfo = 0x08,
  693. kSize = 0x09,
  694. kCRC = 0x0A,
  695. kFolder = 0x0B,
  696. kCodersUnPackSize = 0x0C,
  697. kNumUnPackStream = 0x0D,
  698. kEmptyStream = 0x0E,
  699. kEmptyFile = 0x0F,
  700. kAnti = 0x10,
  701. kName = 0x11,
  702. kCTime = 0x12,
  703. kATime = 0x13,
  704. kMTime = 0x14,
  705. kWinAttributes = 0x15,
  706. kComment = 0x16,
  707. kEncodedHeader = 0x17,
  708. kStartPos = 0x18,
  709. kDummy = 0x19,
  710. };
  711. #define _7Z_CRYPTO_MAIN_ZIP 0x06F10101 /* Main Zip crypto algo */
  712. #define _7Z_CRYPTO_RAR_29 0x06F10303 /* Rar29 AES-128 + (modified SHA-1) */
  713. #define _7Z_CRYPTO_AES_256_SHA_256 0x06F10701 /* AES-256 + SHA-256 */
  714. #define IS_SZ_ENCRYPTED(codec_id) (((codec_id) == _7Z_CRYPTO_MAIN_ZIP) || \
  715. ((codec_id) == _7Z_CRYPTO_RAR_29) || \
  716. ((codec_id) == _7Z_CRYPTO_AES_256_SHA_256))
  717. static const guchar *
  718. rspamd_7zip_read_bits (struct rspamd_task *task,
  719. const guchar *p, const guchar *end,
  720. struct rspamd_archive *arch, guint nbits,
  721. guint *pbits_set)
  722. {
  723. unsigned mask = 0, avail = 0, i;
  724. gboolean bit_set = 0;
  725. for (i = 0; i < nbits; i++) {
  726. if (mask == 0) {
  727. avail = *p;
  728. SZ_SKIP_BYTES(1);
  729. mask = 0x80;
  730. }
  731. bit_set = (avail & mask) ? 1 : 0;
  732. if (bit_set && pbits_set) {
  733. (*pbits_set) ++;
  734. }
  735. mask >>= 1;
  736. }
  737. return p;
  738. }
  739. static const guchar *
  740. rspamd_7zip_read_digest (struct rspamd_task *task,
  741. const guchar *p, const guchar *end,
  742. struct rspamd_archive *arch,
  743. guint64 num_streams,
  744. guint *pdigest_read)
  745. {
  746. guchar all_defined = *p;
  747. guint64 i;
  748. guint num_defined = 0;
  749. /*
  750. * BYTE AllAreDefined
  751. * if (AllAreDefined == 0)
  752. * {
  753. * for(NumStreams)
  754. * BIT Defined
  755. * }
  756. * UINT32 CRCs[NumDefined]
  757. */
  758. SZ_SKIP_BYTES(1);
  759. if (all_defined) {
  760. num_defined = num_streams;
  761. }
  762. else {
  763. if (num_streams > 8192) {
  764. /* Gah */
  765. return NULL;
  766. }
  767. p = rspamd_7zip_read_bits (task, p, end, arch, num_streams, &num_defined);
  768. if (p == NULL) {
  769. return NULL;
  770. }
  771. }
  772. for (i = 0; i < num_defined; i ++) {
  773. SZ_SKIP_BYTES(sizeof(guint32));
  774. }
  775. if (pdigest_read) {
  776. *pdigest_read = num_defined;
  777. }
  778. return p;
  779. }
  780. static const guchar *
  781. rspamd_7zip_read_pack_info (struct rspamd_task *task,
  782. const guchar *p, const guchar *end,
  783. struct rspamd_archive *arch)
  784. {
  785. guint64 pack_pos = 0, pack_streams = 0, i, cur_sz;
  786. guint num_digests = 0;
  787. guchar t;
  788. /*
  789. * UINT64 PackPos
  790. * UINT64 NumPackStreams
  791. *
  792. * []
  793. * BYTE NID::kSize (0x09)
  794. * UINT64 PackSizes[NumPackStreams]
  795. * []
  796. *
  797. * []
  798. * BYTE NID::kCRC (0x0A)
  799. * PackStreamDigests[NumPackStreams]
  800. * []
  801. * BYTE NID::kEnd
  802. */
  803. SZ_READ_VINT(pack_pos);
  804. SZ_READ_VINT(pack_streams);
  805. while (p != NULL && p < end) {
  806. t = *p;
  807. SZ_SKIP_BYTES(1);
  808. msg_debug_archive ("7zip: read pack info %xc", t);
  809. switch (t) {
  810. case kSize:
  811. /* We need to skip pack_streams VINTS */
  812. for (i = 0; i < pack_streams; i++) {
  813. SZ_READ_VINT(cur_sz);
  814. }
  815. break;
  816. case kCRC:
  817. /* CRCs are more complicated */
  818. p = rspamd_7zip_read_digest (task, p, end, arch, pack_streams,
  819. &num_digests);
  820. break;
  821. case kEnd:
  822. goto end;
  823. break;
  824. default:
  825. p = NULL;
  826. msg_debug_archive ("bad 7zip type: %xc; %s", t, G_STRLOC);
  827. goto end;
  828. break;
  829. }
  830. }
  831. end:
  832. return p;
  833. }
  834. static const guchar *
  835. rspamd_7zip_read_folder (struct rspamd_task *task,
  836. const guchar *p, const guchar *end,
  837. struct rspamd_archive *arch, guint *pnstreams, guint *ndigests)
  838. {
  839. guint64 ncoders = 0, i, j, noutstreams = 0, ninstreams = 0;
  840. SZ_READ_VINT (ncoders);
  841. for (i = 0; i < ncoders && p != NULL && p < end; i ++) {
  842. guint64 sz, tmp;
  843. guchar t;
  844. /*
  845. * BYTE
  846. * {
  847. * 0:3 CodecIdSize
  848. * 4: Is Complex Coder
  849. * 5: There Are Attributes
  850. * 6: Reserved
  851. * 7: There are more alternative methods. (Not used anymore, must be 0).
  852. * }
  853. * BYTE CodecId[CodecIdSize]
  854. * if (Is Complex Coder)
  855. * {
  856. * UINT64 NumInStreams;
  857. * UINT64 NumOutStreams;
  858. * }
  859. * if (There Are Attributes)
  860. * {
  861. * UINT64 PropertiesSize
  862. * BYTE Properties[PropertiesSize]
  863. * }
  864. */
  865. t = *p;
  866. SZ_SKIP_BYTES (1);
  867. sz = t & 0xF;
  868. /* Codec ID */
  869. tmp = 0;
  870. for (j = 0; j < sz; j++) {
  871. tmp <<= 8;
  872. tmp += p[j];
  873. }
  874. msg_debug_archive ("7zip: read codec id: %L", tmp);
  875. if (IS_SZ_ENCRYPTED (tmp)) {
  876. arch->flags |= RSPAMD_ARCHIVE_ENCRYPTED;
  877. }
  878. SZ_SKIP_BYTES (sz);
  879. if (t & (1u << 4)) {
  880. /* Complex */
  881. SZ_READ_VINT (tmp); /* InStreams */
  882. ninstreams += tmp;
  883. SZ_READ_VINT (tmp); /* OutStreams */
  884. noutstreams += tmp;
  885. }
  886. else {
  887. /* XXX: is it correct ? */
  888. noutstreams ++;
  889. ninstreams ++;
  890. }
  891. if (t & (1u << 5)) {
  892. /* Attributes ... */
  893. SZ_READ_VINT (tmp); /* Size of attrs */
  894. SZ_SKIP_BYTES (tmp);
  895. }
  896. }
  897. if (noutstreams > 1) {
  898. /* BindPairs, WTF, huh */
  899. for (i = 0; i < noutstreams - 1; i ++) {
  900. guint64 tmp;
  901. SZ_READ_VINT (tmp);
  902. SZ_READ_VINT (tmp);
  903. }
  904. }
  905. gint64 npacked = (gint64)ninstreams - (gint64)noutstreams + 1;
  906. msg_debug_archive ("7zip: instreams=%L, outstreams=%L, packed=%L",
  907. ninstreams, noutstreams, npacked);
  908. if (npacked > 1) {
  909. /* Gah... */
  910. for (i = 0; i < npacked; i ++) {
  911. guint64 tmp;
  912. SZ_READ_VINT (tmp);
  913. }
  914. }
  915. *pnstreams = noutstreams;
  916. (*ndigests) += npacked;
  917. return p;
  918. }
  919. static const guchar *
  920. rspamd_7zip_read_coders_info (struct rspamd_task *task,
  921. const guchar *p, const guchar *end,
  922. struct rspamd_archive *arch,
  923. guint *pnum_folders, guint *pnum_nodigest)
  924. {
  925. guint64 num_folders = 0, i, tmp;
  926. guchar t;
  927. guint *folder_nstreams = NULL, num_digests = 0, digests_read = 0;
  928. while (p != NULL && p < end) {
  929. /*
  930. * BYTE NID::kFolder (0x0B)
  931. * UINT64 NumFolders
  932. * BYTE External
  933. * switch(External)
  934. * {
  935. * case 0:
  936. * Folders[NumFolders]
  937. * case 1:
  938. * UINT64 DataStreamIndex
  939. * }
  940. * BYTE ID::kCodersUnPackSize (0x0C)
  941. * for(Folders)
  942. * for(Folder.NumOutStreams)
  943. * UINT64 UnPackSize;
  944. * []
  945. * BYTE NID::kCRC (0x0A)
  946. * UnPackDigests[NumFolders]
  947. * []
  948. * BYTE NID::kEnd
  949. */
  950. t = *p;
  951. SZ_SKIP_BYTES(1);
  952. msg_debug_archive ("7zip: read coders info %xc", t);
  953. switch (t) {
  954. case kFolder:
  955. SZ_READ_VINT (num_folders);
  956. msg_debug_archive ("7zip: nfolders=%L", num_folders);
  957. if (*p != 0) {
  958. /* External folders */
  959. SZ_SKIP_BYTES(1);
  960. SZ_READ_VINT (tmp);
  961. }
  962. else {
  963. SZ_SKIP_BYTES(1);
  964. if (num_folders > 8192) {
  965. /* Gah */
  966. return NULL;
  967. }
  968. folder_nstreams = g_malloc (sizeof (int) * num_folders);
  969. for (i = 0; i < num_folders && p != NULL && p < end; i++) {
  970. p = rspamd_7zip_read_folder (task, p, end, arch,
  971. &folder_nstreams[i], &num_digests);
  972. }
  973. g_free (folder_nstreams);
  974. }
  975. break;
  976. case kCodersUnPackSize:
  977. for (i = 0; i < num_folders && p != NULL && p < end; i++) {
  978. if (folder_nstreams) {
  979. for (guint j = 0; j < folder_nstreams[i]; j++) {
  980. guint64 tmp;
  981. SZ_READ_VINT (tmp); /* Unpacked size */
  982. msg_debug_archive ("7zip: unpacked size (folder=%d, stream=%d) = %L",
  983. i, j, tmp);
  984. }
  985. }
  986. else {
  987. msg_err_task ("internal 7zip error");
  988. }
  989. }
  990. break;
  991. case kCRC:
  992. /*
  993. * Here are dragons. Spec tells that here there could be up
  994. * to nfolders digests. However, according to the actual source
  995. * code, in case of multiple out streams there should be digests
  996. * for all out streams.
  997. *
  998. * In the real life (tm) it is even more idiotic: all these digests
  999. * are in another section! But that section needs number of digests
  1000. * that are absent here. It is the most stupid thing I've ever seen
  1001. * in any file format.
  1002. *
  1003. * I hope there *WAS* some reason to do such shit...
  1004. */
  1005. p = rspamd_7zip_read_digest (task, p, end, arch, num_digests,
  1006. &digests_read);
  1007. break;
  1008. case kEnd:
  1009. goto end;
  1010. break;
  1011. default:
  1012. p = NULL;
  1013. msg_debug_archive ("bad 7zip type: %xc; %s", t, G_STRLOC);
  1014. goto end;
  1015. break;
  1016. }
  1017. }
  1018. end:
  1019. if (pnum_nodigest) {
  1020. *pnum_nodigest = num_digests - digests_read;
  1021. }
  1022. if (pnum_folders) {
  1023. *pnum_folders = num_folders;
  1024. }
  1025. return p;
  1026. }
  1027. static const guchar *
  1028. rspamd_7zip_read_substreams_info (struct rspamd_task *task,
  1029. const guchar *p, const guchar *end,
  1030. struct rspamd_archive *arch,
  1031. guint num_folders, guint num_nodigest)
  1032. {
  1033. guchar t;
  1034. guint i;
  1035. guint64 *folder_nstreams;
  1036. if (num_folders > 8192) {
  1037. /* Gah */
  1038. return NULL;
  1039. }
  1040. folder_nstreams = g_alloca (sizeof (guint64) * num_folders);
  1041. while (p != NULL && p < end) {
  1042. /*
  1043. * []
  1044. * BYTE NID::kNumUnPackStream; (0x0D)
  1045. * UINT64 NumUnPackStreamsInFolders[NumFolders];
  1046. * []
  1047. *
  1048. * []
  1049. * BYTE NID::kSize (0x09)
  1050. * UINT64 UnPackSizes[??]
  1051. * []
  1052. *
  1053. *
  1054. * []
  1055. * BYTE NID::kCRC (0x0A)
  1056. * Digests[Number of streams with unknown CRC]
  1057. * []
  1058. */
  1059. t = *p;
  1060. SZ_SKIP_BYTES(1);
  1061. msg_debug_archive ("7zip: read substream info %xc", t);
  1062. switch (t) {
  1063. case kNumUnPackStream:
  1064. for (i = 0; i < num_folders; i ++) {
  1065. guint64 tmp;
  1066. SZ_READ_VINT (tmp);
  1067. folder_nstreams[i] = tmp;
  1068. }
  1069. break;
  1070. case kCRC:
  1071. /*
  1072. * Read the comment in the rspamd_7zip_read_coders_info
  1073. */
  1074. p = rspamd_7zip_read_digest (task, p, end, arch, num_nodigest,
  1075. NULL);
  1076. break;
  1077. case kSize:
  1078. /*
  1079. * Another brain damaged logic, but we have to support it
  1080. * as there are no ways to proceed without it.
  1081. * In fact, it is just absent in the real life...
  1082. */
  1083. for (i = 0; i < num_folders; i ++) {
  1084. for (guint j = 0; j < folder_nstreams[i]; j++) {
  1085. guint64 tmp;
  1086. SZ_READ_VINT (tmp); /* Who cares indeed */
  1087. }
  1088. }
  1089. break;
  1090. case kEnd:
  1091. goto end;
  1092. break;
  1093. default:
  1094. p = NULL;
  1095. msg_debug_archive ("bad 7zip type: %xc; %s", t, G_STRLOC);
  1096. goto end;
  1097. break;
  1098. }
  1099. }
  1100. end:
  1101. return p;
  1102. }
  1103. static const guchar *
  1104. rspamd_7zip_read_main_streams_info (struct rspamd_task *task,
  1105. const guchar *p, const guchar *end,
  1106. struct rspamd_archive *arch)
  1107. {
  1108. guchar t;
  1109. guint num_folders = 0, unknown_digests = 0;
  1110. while (p != NULL && p < end) {
  1111. t = *p;
  1112. SZ_SKIP_BYTES(1);
  1113. msg_debug_archive ("7zip: read main streams info %xc", t);
  1114. /*
  1115. *
  1116. * []
  1117. * PackInfo
  1118. * []
  1119. * []
  1120. * CodersInfo
  1121. * []
  1122. *
  1123. * []
  1124. * SubStreamsInfo
  1125. * []
  1126. *
  1127. * BYTE NID::kEnd
  1128. */
  1129. switch (t) {
  1130. case kPackInfo:
  1131. p = rspamd_7zip_read_pack_info (task, p, end, arch);
  1132. break;
  1133. case kUnPackInfo:
  1134. p = rspamd_7zip_read_coders_info (task, p, end, arch, &num_folders,
  1135. &unknown_digests);
  1136. break;
  1137. case kSubStreamsInfo:
  1138. p = rspamd_7zip_read_substreams_info (task, p, end, arch, num_folders,
  1139. unknown_digests);
  1140. break;
  1141. break;
  1142. case kEnd:
  1143. goto end;
  1144. break;
  1145. default:
  1146. p = NULL;
  1147. msg_debug_archive ("bad 7zip type: %xc; %s", t, G_STRLOC);
  1148. goto end;
  1149. break;
  1150. }
  1151. }
  1152. end:
  1153. return p;
  1154. }
  1155. static const guchar *
  1156. rspamd_7zip_read_archive_props (struct rspamd_task *task,
  1157. const guchar *p, const guchar *end,
  1158. struct rspamd_archive *arch)
  1159. {
  1160. guchar proptype;
  1161. guint64 proplen;
  1162. /*
  1163. * for (;;)
  1164. * {
  1165. * BYTE PropertyType;
  1166. * if (aType == 0)
  1167. * break;
  1168. * UINT64 PropertySize;
  1169. * BYTE PropertyData[PropertySize];
  1170. * }
  1171. */
  1172. proptype = *p;
  1173. SZ_SKIP_BYTES(1);
  1174. if (p != NULL) {
  1175. while (proptype != 0) {
  1176. SZ_READ_VINT(proplen);
  1177. if (p + proplen < end) {
  1178. p += proplen;
  1179. }
  1180. else {
  1181. return NULL;
  1182. }
  1183. proptype = *p;
  1184. SZ_SKIP_BYTES(1);
  1185. }
  1186. }
  1187. return p;
  1188. }
  1189. static GString *
  1190. rspamd_7zip_ucs2_to_utf8 (struct rspamd_task *task, const guchar *p,
  1191. const guchar *end)
  1192. {
  1193. GString *res;
  1194. goffset dest_pos = 0, src_pos = 0;
  1195. const gsize len = (end - p) / sizeof (guint16);
  1196. guint16 *up;
  1197. UChar32 wc;
  1198. UBool is_error = 0;
  1199. res = g_string_sized_new ((end - p) + sizeof (wc) * 2 + 1);
  1200. up = (guint16 *)p;
  1201. while (src_pos < len) {
  1202. U16_NEXT (up, src_pos, len, wc);
  1203. if (wc > 0) {
  1204. U8_APPEND (res->str, dest_pos, res->allocated_len, wc, is_error);
  1205. }
  1206. if (is_error) {
  1207. g_string_free (res, TRUE);
  1208. return NULL;
  1209. }
  1210. }
  1211. g_assert (dest_pos < res->allocated_len);
  1212. res->len = dest_pos;
  1213. res->str[dest_pos] = '\0';
  1214. return res;
  1215. }
  1216. static const guchar *
  1217. rspamd_7zip_read_files_info (struct rspamd_task *task,
  1218. const guchar *p, const guchar *end,
  1219. struct rspamd_archive *arch)
  1220. {
  1221. guint64 nfiles = 0, sz, i;
  1222. guchar t, b;
  1223. struct rspamd_archive_file *fentry;
  1224. SZ_READ_VINT (nfiles);
  1225. for (;p != NULL && p < end;) {
  1226. t = *p;
  1227. SZ_SKIP_BYTES (1);
  1228. msg_debug_archive ("7zip: read file data type %xc", t);
  1229. if (t == kEnd) {
  1230. goto end;
  1231. }
  1232. /* This is SO SPECIAL, gah */
  1233. SZ_READ_VINT (sz);
  1234. switch (t) {
  1235. case kEmptyStream:
  1236. case kEmptyFile:
  1237. case kAnti: /* AntiFile, OMFG */
  1238. /* We don't care about these bits */
  1239. case kCTime:
  1240. case kATime:
  1241. case kMTime:
  1242. /* We don't care of these guys, but we still have to parse them, gah */
  1243. if (sz > 0) {
  1244. SZ_SKIP_BYTES (sz);
  1245. }
  1246. break;
  1247. case kName:
  1248. /* The most useful part in this whole bloody format */
  1249. b = *p; /* External flag */
  1250. SZ_SKIP_BYTES (1);
  1251. if (b) {
  1252. /* TODO: for the god sake, do something about external
  1253. * filenames...
  1254. */
  1255. guint64 tmp;
  1256. SZ_READ_VINT (tmp);
  1257. }
  1258. else {
  1259. for (i = 0; i < nfiles; i ++) {
  1260. /* Zero terminated wchar_t: happy converting... */
  1261. /* First, find terminator */
  1262. const guchar *fend = NULL, *tp = p;
  1263. GString *res;
  1264. while (tp < end - 1) {
  1265. if (*tp == 0 && *(tp + 1) == 0) {
  1266. fend = tp;
  1267. break;
  1268. }
  1269. tp += 2;
  1270. }
  1271. if (fend == NULL || fend - p == 0) {
  1272. /* Crap instead of fname */
  1273. msg_debug_archive ("bad 7zip name; %s", G_STRLOC);
  1274. goto end;
  1275. }
  1276. res = rspamd_7zip_ucs2_to_utf8 (task, p, fend);
  1277. if (res != NULL) {
  1278. fentry = g_malloc0 (sizeof (*fentry));
  1279. fentry->fname = res;
  1280. g_ptr_array_add (arch->files, fentry);
  1281. msg_debug_archive ("7zip: found file %v", res);
  1282. }
  1283. else {
  1284. msg_debug_archive ("bad 7zip name; %s", G_STRLOC);
  1285. }
  1286. /* Skip zero terminating character */
  1287. p = fend + 2;
  1288. }
  1289. }
  1290. break;
  1291. case kDummy:
  1292. case kWinAttributes:
  1293. if (sz > 0) {
  1294. SZ_SKIP_BYTES (sz);
  1295. }
  1296. break;
  1297. default:
  1298. p = NULL;
  1299. msg_debug_archive ("bad 7zip type: %xc; %s", t, G_STRLOC);
  1300. goto end;
  1301. break;
  1302. }
  1303. }
  1304. end:
  1305. return p;
  1306. }
  1307. static const guchar *
  1308. rspamd_7zip_read_next_section (struct rspamd_task *task,
  1309. const guchar *p, const guchar *end,
  1310. struct rspamd_archive *arch)
  1311. {
  1312. guchar t = *p;
  1313. SZ_SKIP_BYTES(1);
  1314. msg_debug_archive ("7zip: read section %xc", t);
  1315. switch (t) {
  1316. case kHeader:
  1317. /* We just skip byte and go further */
  1318. break;
  1319. case kEncodedHeader:
  1320. /*
  1321. * In fact, headers are just packed, but we assume it as
  1322. * encrypted to distinguish from the normal archives
  1323. */
  1324. arch->flags |= RSPAMD_ARCHIVE_ENCRYPTED;
  1325. p = NULL; /* Cannot get anything useful */
  1326. break;
  1327. case kArchiveProperties:
  1328. p = rspamd_7zip_read_archive_props (task, p, end, arch);
  1329. break;
  1330. case kMainStreamsInfo:
  1331. p = rspamd_7zip_read_main_streams_info (task, p, end, arch);
  1332. break;
  1333. case kAdditionalStreamsInfo:
  1334. p = rspamd_7zip_read_main_streams_info (task, p, end, arch);
  1335. break;
  1336. case kFilesInfo:
  1337. p = rspamd_7zip_read_files_info (task, p, end, arch);
  1338. break;
  1339. case kEnd:
  1340. p = NULL;
  1341. msg_debug_archive ("7zip: read final section");
  1342. break;
  1343. default:
  1344. p = NULL;
  1345. msg_debug_archive ("bad 7zip type: %xc; %s", t, G_STRLOC);
  1346. break;
  1347. }
  1348. return p;
  1349. }
  1350. static void
  1351. rspamd_archive_process_7zip (struct rspamd_task *task,
  1352. struct rspamd_mime_part *part)
  1353. {
  1354. struct rspamd_archive *arch;
  1355. const guchar *start, *p, *end;
  1356. const guchar sz_magic[] = {'7', 'z', 0xBC, 0xAF, 0x27, 0x1C};
  1357. guint64 section_offset = 0, section_length = 0;
  1358. start = part->parsed_data.begin;
  1359. p = start;
  1360. end = p + part->parsed_data.len;
  1361. if (end - p <= sizeof (guint64) + sizeof (guint32) ||
  1362. memcmp (p, sz_magic, sizeof (sz_magic)) != 0) {
  1363. msg_debug_archive ("7z archive is invalid (no 7z magic)");
  1364. return;
  1365. }
  1366. arch = rspamd_mempool_alloc0 (task->task_pool, sizeof (*arch));
  1367. arch->files = g_ptr_array_new ();
  1368. arch->type = RSPAMD_ARCHIVE_7ZIP;
  1369. rspamd_mempool_add_destructor (task->task_pool, rspamd_archive_dtor,
  1370. arch);
  1371. /* Magic (6 bytes) + version (2 bytes) + crc32 (4 bytes) */
  1372. p += sizeof (guint64) + sizeof (guint32);
  1373. SZ_READ_UINT64(section_offset);
  1374. SZ_READ_UINT64(section_length);
  1375. if (end - p > sizeof (guint32)) {
  1376. p += sizeof (guint32);
  1377. }
  1378. else {
  1379. msg_debug_archive ("7z archive is invalid (truncated crc)");
  1380. return;
  1381. }
  1382. if (end - p > section_offset) {
  1383. p += section_offset;
  1384. }
  1385. else {
  1386. msg_debug_archive ("7z archive is invalid (incorrect section offset)");
  1387. return;
  1388. }
  1389. while ((p = rspamd_7zip_read_next_section (task, p, end, arch)) != NULL);
  1390. part->flags |= RSPAMD_MIME_PART_ARCHIVE;
  1391. part->specific.arch = arch;
  1392. if (part->cd != NULL) {
  1393. arch->archive_name = &part->cd->filename;
  1394. }
  1395. arch->size = part->parsed_data.len;
  1396. }
  1397. static void
  1398. rspamd_archive_process_gzip (struct rspamd_task *task,
  1399. struct rspamd_mime_part *part) {
  1400. struct rspamd_archive *arch;
  1401. const guchar *start, *p, *end;
  1402. const guchar gz_magic[] = {0x1F, 0x8B};
  1403. guchar flags;
  1404. start = part->parsed_data.begin;
  1405. p = start;
  1406. end = p + part->parsed_data.len;
  1407. if (end - p <= 10 || memcmp (p, gz_magic, sizeof (gz_magic)) != 0) {
  1408. msg_debug_archive ("gzip archive is invalid (no gzip magic)");
  1409. return;
  1410. }
  1411. arch = rspamd_mempool_alloc0 (task->task_pool, sizeof (*arch));
  1412. arch->files = g_ptr_array_sized_new (1);
  1413. arch->type = RSPAMD_ARCHIVE_GZIP;
  1414. rspamd_mempool_add_destructor (task->task_pool, rspamd_archive_dtor,
  1415. arch);
  1416. flags = p[3];
  1417. if (flags & (1u << 5)) {
  1418. arch->flags |= RSPAMD_ARCHIVE_ENCRYPTED;
  1419. }
  1420. if (flags & (1u << 3)) {
  1421. /* We have file name presented in archive, try to use it */
  1422. if (flags & (1u << 1)) {
  1423. /* Multipart */
  1424. p += 12;
  1425. }
  1426. else {
  1427. p += 10;
  1428. }
  1429. if (flags & (1u << 2)) {
  1430. /* Optional section */
  1431. guint16 optlen = 0;
  1432. RAR_READ_UINT16 (optlen);
  1433. if (end <= p + optlen) {
  1434. msg_debug_archive ("gzip archive is invalid, bad extra length: %d",
  1435. (int)optlen);
  1436. return;
  1437. }
  1438. p += optlen;
  1439. }
  1440. /* Read file name */
  1441. const guchar *fname_start = p;
  1442. while (p < end) {
  1443. if (*p == '\0') {
  1444. if (p > fname_start) {
  1445. struct rspamd_archive_file *f;
  1446. f = g_malloc0 (sizeof (*f));
  1447. f->fname = rspamd_archive_file_try_utf (task, fname_start,
  1448. p - fname_start);
  1449. g_ptr_array_add (arch->files, f);
  1450. goto set;
  1451. }
  1452. }
  1453. p ++;
  1454. }
  1455. /* Wrong filename, not zero terminated */
  1456. msg_debug_archive ("gzip archive is invalid, bad filename at pos %d",
  1457. (int)(p - start));
  1458. return;
  1459. }
  1460. /* Fallback, we need to extract file name from archive name if possible */
  1461. if (part->cd->filename.len > 0) {
  1462. const gchar *dot_pos, *slash_pos;
  1463. dot_pos = rspamd_memrchr (part->cd->filename.begin, '.',
  1464. part->cd->filename.len);
  1465. if (dot_pos) {
  1466. struct rspamd_archive_file *f;
  1467. slash_pos = rspamd_memrchr (part->cd->filename.begin, '/',
  1468. part->cd->filename.len);
  1469. if (slash_pos && slash_pos < dot_pos) {
  1470. f = g_malloc0 (sizeof (*f));
  1471. f->fname = g_string_sized_new (dot_pos - slash_pos);
  1472. g_string_append_len (f->fname, slash_pos + 1,
  1473. dot_pos - slash_pos - 1);
  1474. msg_debug_archive ("fallback to gzip filename based on cd: %v",
  1475. f->fname);
  1476. g_ptr_array_add (arch->files, f);
  1477. goto set;
  1478. }
  1479. else {
  1480. const gchar *fname_start = part->cd->filename.begin;
  1481. f = g_malloc0 (sizeof (*f));
  1482. if (memchr (fname_start, '.', part->cd->filename.len) != dot_pos) {
  1483. /* Double dots, something like foo.exe.gz */
  1484. f->fname = g_string_sized_new (dot_pos - fname_start);
  1485. g_string_append_len (f->fname, fname_start,
  1486. dot_pos - fname_start);
  1487. }
  1488. else {
  1489. /* Single dot, something like foo.gzz */
  1490. f->fname = g_string_sized_new (part->cd->filename.len);
  1491. g_string_append_len (f->fname, fname_start,
  1492. part->cd->filename.len);
  1493. }
  1494. msg_debug_archive ("fallback to gzip filename based on cd: %v",
  1495. f->fname);
  1496. g_ptr_array_add (arch->files, f);
  1497. goto set;
  1498. }
  1499. }
  1500. }
  1501. return;
  1502. set:
  1503. /* Set archive data */
  1504. part->flags |= RSPAMD_MIME_PART_ARCHIVE;
  1505. part->specific.arch = arch;
  1506. if (part->cd) {
  1507. arch->archive_name = &part->cd->filename;
  1508. }
  1509. arch->size = part->parsed_data.len;
  1510. }
  1511. static gboolean
  1512. rspamd_archive_cheat_detect (struct rspamd_mime_part *part, const gchar *str,
  1513. const guchar *magic_start, gsize magic_len)
  1514. {
  1515. struct rspamd_content_type *ct;
  1516. const gchar *p;
  1517. rspamd_ftok_t srch, *fname;
  1518. ct = part->ct;
  1519. RSPAMD_FTOK_ASSIGN (&srch, "application");
  1520. if (ct && ct->type.len && ct->subtype.len > 0 && rspamd_ftok_cmp (&ct->type,
  1521. &srch) == 0) {
  1522. if (rspamd_substring_search_caseless (ct->subtype.begin, ct->subtype.len,
  1523. str, strlen (str)) != -1) {
  1524. /* We still need to check magic, see #1848 */
  1525. if (magic_start != NULL) {
  1526. if (part->parsed_data.len > magic_len &&
  1527. memcmp (part->parsed_data.begin,
  1528. magic_start, magic_len) == 0) {
  1529. return TRUE;
  1530. }
  1531. /* No magic, refuse this type of archive */
  1532. return FALSE;
  1533. }
  1534. else {
  1535. return TRUE;
  1536. }
  1537. }
  1538. }
  1539. if (part->cd) {
  1540. fname = &part->cd->filename;
  1541. if (fname && fname->len > strlen (str)) {
  1542. p = fname->begin + fname->len - strlen (str);
  1543. if (rspamd_lc_cmp (p, str, strlen (str)) == 0) {
  1544. if (*(p - 1) == '.') {
  1545. if (magic_start != NULL) {
  1546. if (part->parsed_data.len > magic_len &&
  1547. memcmp (part->parsed_data.begin,
  1548. magic_start, magic_len) == 0) {
  1549. return TRUE;
  1550. }
  1551. /* No magic, refuse this type of archive */
  1552. return FALSE;
  1553. }
  1554. return TRUE;
  1555. }
  1556. }
  1557. }
  1558. if (magic_start != NULL) {
  1559. if (part->parsed_data.len > magic_len &&
  1560. memcmp (part->parsed_data.begin, magic_start, magic_len) == 0) {
  1561. return TRUE;
  1562. }
  1563. }
  1564. }
  1565. return FALSE;
  1566. }
  1567. void
  1568. rspamd_archives_process (struct rspamd_task *task)
  1569. {
  1570. guint i;
  1571. struct rspamd_mime_part *part;
  1572. const guchar rar_magic[] = {0x52, 0x61, 0x72, 0x21, 0x1A, 0x07};
  1573. const guchar zip_magic[] = {0x50, 0x4b, 0x03, 0x04};
  1574. const guchar sz_magic[] = {'7', 'z', 0xBC, 0xAF, 0x27, 0x1C};
  1575. const guchar gz_magic[] = {0x1F, 0x8B};
  1576. for (i = 0; i < task->parts->len; i ++) {
  1577. part = g_ptr_array_index (task->parts, i);
  1578. if (!(part->flags & (RSPAMD_MIME_PART_TEXT|RSPAMD_MIME_PART_IMAGE))) {
  1579. if (part->parsed_data.len > 0) {
  1580. if (rspamd_archive_cheat_detect (part, "zip",
  1581. zip_magic, sizeof (zip_magic))) {
  1582. rspamd_archive_process_zip (task, part);
  1583. }
  1584. else if (rspamd_archive_cheat_detect (part, "rar",
  1585. rar_magic, sizeof (rar_magic))) {
  1586. rspamd_archive_process_rar (task, part);
  1587. }
  1588. else if (rspamd_archive_cheat_detect (part, "7z",
  1589. sz_magic, sizeof (sz_magic))) {
  1590. rspamd_archive_process_7zip (task, part);
  1591. }
  1592. else if (rspamd_archive_cheat_detect (part, "gz",
  1593. gz_magic, sizeof (gz_magic))) {
  1594. rspamd_archive_process_gzip (task, part);
  1595. }
  1596. if (IS_CT_TEXT (part->ct) &&
  1597. (part->flags & RSPAMD_MIME_PART_ARCHIVE)) {
  1598. msg_info_task ("found archive with incorrect content-type: %T/%T",
  1599. &part->ct->type, &part->ct->subtype);
  1600. part->ct->flags |= RSPAMD_CONTENT_TYPE_BROKEN;
  1601. }
  1602. }
  1603. }
  1604. }
  1605. }
  1606. const gchar *
  1607. rspamd_archive_type_str (enum rspamd_archive_type type)
  1608. {
  1609. const gchar *ret = "unknown";
  1610. switch (type) {
  1611. case RSPAMD_ARCHIVE_ZIP:
  1612. ret = "zip";
  1613. break;
  1614. case RSPAMD_ARCHIVE_RAR:
  1615. ret = "rar";
  1616. break;
  1617. case RSPAMD_ARCHIVE_7ZIP:
  1618. ret = "7z";
  1619. break;
  1620. case RSPAMD_ARCHIVE_GZIP:
  1621. ret = "gz";
  1622. break;
  1623. }
  1624. return ret;
  1625. }