You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.


  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "config.h"
  17. #include "message.h"
  18. #include "task.h"
  19. #include "archives.h"
  20. #include "libmime/mime_encoding.h"
  21. #include <unicode/uchar.h>
  22. #include <unicode/utf8.h>
  23. #include <unicode/utf16.h>
  24. #include <unicode/ucnv.h>
  25. #define msg_debug_archive(...) rspamd_conditional_debug_fast (NULL, NULL, \
  26. rspamd_archive_log_id, "archive", task->task_pool->tag.uid, \
  27. G_STRFUNC, \
  28. __VA_ARGS__)
  29. INIT_LOG_MODULE(archive)
  30. static void
  31. rspamd_archive_dtor (gpointer p)
  32. {
  33. struct rspamd_archive *arch = p;
  34. struct rspamd_archive_file *f;
  35. guint i;
  36. for (i = 0; i < arch->files->len; i ++) {
  37. f = g_ptr_array_index (arch->files, i);
  38. if (f->fname) {
  39. g_string_free (f->fname, TRUE);
  40. }
  41. g_free (f);
  42. }
  43. g_ptr_array_free (arch->files, TRUE);
  44. }
  45. static bool
  46. rspamd_archive_file_try_utf (struct rspamd_task *task,
  47. struct rspamd_archive *arch,
  48. struct rspamd_archive_file *fentry,
  49. const gchar *in, gsize inlen)
  50. {
  51. const gchar *charset = NULL, *p, *end;
  52. GString *res;
  53. charset = rspamd_mime_charset_find_by_content (in, inlen, TRUE);
  54. if (charset) {
  55. UChar *tmp;
  56. UErrorCode uc_err = U_ZERO_ERROR;
  57. gint32 r, clen, dlen;
  58. struct rspamd_charset_converter *conv;
  59. UConverter *utf8_converter;
  60. conv = rspamd_mime_get_converter_cached (charset, task->task_pool,
  61. TRUE, &uc_err);
  62. utf8_converter = rspamd_get_utf8_converter ();
  63. if (conv == NULL) {
  64. msg_info_task ("cannot open converter for %s: %s",
  65. charset, u_errorName (uc_err));
  66. fentry->flags |= RSPAMD_ARCHIVE_FILE_OBFUSCATED;
  67. fentry->fname = g_string_new_len(in, inlen);
  68. return false;
  69. }
  70. tmp = g_malloc (sizeof (*tmp) * (inlen + 1));
  71. r = rspamd_converter_to_uchars (conv, tmp, inlen + 1,
  72. in, inlen, &uc_err);
  73. if (!U_SUCCESS (uc_err)) {
  74. msg_info_task ("cannot convert data to unicode from %s: %s",
  75. charset, u_errorName (uc_err));
  76. g_free (tmp);
  77. fentry->flags |= RSPAMD_ARCHIVE_FILE_OBFUSCATED;
  78. fentry->fname = g_string_new_len(in, inlen);
  79. return NULL;
  80. }
  81. int i = 0;
  82. while (i < r) {
  83. UChar32 uc;
  84. U16_NEXT(tmp, i, r, uc);
  85. if (IS_ZERO_WIDTH_SPACE(uc) || u_iscntrl(uc)) {
  86. msg_info_task("control character in archive file name found: 0x%02xd "
  87. "(filename=%T)", uc, arch->archive_name);
  88. fentry->flags |= RSPAMD_ARCHIVE_FILE_OBFUSCATED;
  89. break;
  90. }
  91. }
  92. clen = ucnv_getMaxCharSize (utf8_converter);
  93. dlen = UCNV_GET_MAX_BYTES_FOR_STRING (r, clen);
  94. res = g_string_sized_new (dlen);
  95. r = ucnv_fromUChars (utf8_converter, res->str, dlen, tmp, r, &uc_err);
  96. if (!U_SUCCESS (uc_err)) {
  97. msg_info_task ("cannot convert data from unicode from %s: %s",
  98. charset, u_errorName (uc_err));
  99. g_free (tmp);
  100. g_string_free (res, TRUE);
  101. fentry->flags |= RSPAMD_ARCHIVE_FILE_OBFUSCATED;
  102. fentry->fname = g_string_new_len(in, inlen);
  103. return NULL;
  104. }
  105. g_free (tmp);
  106. res->len = r;
  107. msg_debug_archive ("converted from %s to UTF-8 inlen: %z, outlen: %d",
  108. charset, inlen, r);
  109. fentry->fname = res;
  110. }
  111. else {
  112. /* Convert unsafe characters to '?' */
  113. res = g_string_sized_new (inlen);
  114. p = in;
  115. end = in + inlen;
  116. while (p < end) {
  117. if (g_ascii_isgraph (*p)) {
  118. g_string_append_c (res, *p);
  119. }
  120. else {
  121. g_string_append_c (res, '?');
  122. msg_info_task("non graph character in archive file name found: 0x%02xd "
  123. "(filename=%T)", (int)*p, arch->archive_name);
  124. fentry->flags |= RSPAMD_ARCHIVE_FILE_OBFUSCATED;
  125. }
  126. p ++;
  127. }
  128. fentry->fname = res;
  129. }
  130. return true;
  131. }
  132. static void
  133. rspamd_archive_process_zip (struct rspamd_task *task,
  134. struct rspamd_mime_part *part)
  135. {
  136. const guchar *p, *start, *end, *eocd = NULL, *cd;
  137. const guint32 eocd_magic = 0x06054b50, cd_basic_len = 46;
  138. const guchar cd_magic[] = {0x50, 0x4b, 0x01, 0x02};
  139. const guint max_processed = 1024;
  140. guint32 cd_offset, cd_size, comp_size, uncomp_size, processed = 0;
  141. guint16 extra_len, fname_len, comment_len;
  142. struct rspamd_archive *arch;
  143. struct rspamd_archive_file *f = NULL;
  144. /* Zip files have interesting data at the end of archive */
  145. p = part->parsed_data.begin + part->parsed_data.len - 1;
  146. start = part->parsed_data.begin;
  147. end = p;
  148. /* Search for EOCD:
  149. * 22 bytes is a typical size of eocd without a comment and
  150. * end points one byte after the last character
  151. */
  152. p -= 21;
  153. while (p > start + sizeof (guint32)) {
  154. guint32 t;
  155. if (processed > max_processed) {
  156. break;
  157. }
  158. /* XXX: not an efficient approach */
  159. memcpy (&t, p, sizeof (t));
  160. if (GUINT32_FROM_LE (t) == eocd_magic) {
  161. eocd = p;
  162. break;
  163. }
  164. p --;
  165. processed ++;
  166. }
  167. if (eocd == NULL) {
  168. /* Not a zip file */
  169. msg_info_task ("zip archive is invalid (no EOCD)");
  170. return;
  171. }
  172. if (end - eocd < 21) {
  173. msg_info_task ("zip archive is invalid (short EOCD)");
  174. return;
  175. }
  176. memcpy (&cd_size, eocd + 12, sizeof (cd_size));
  177. cd_size = GUINT32_FROM_LE (cd_size);
  178. memcpy (&cd_offset, eocd + 16, sizeof (cd_offset));
  179. cd_offset = GUINT32_FROM_LE (cd_offset);
  180. /* We need to check sanity as well */
  181. if (cd_offset + cd_size > (guint)(eocd - start)) {
  182. msg_info_task ("zip archive is invalid (bad size/offset for CD)");
  183. return;
  184. }
  185. cd = start + cd_offset;
  186. arch = rspamd_mempool_alloc0 (task->task_pool, sizeof (*arch));
  187. arch->files = g_ptr_array_new ();
  188. arch->type = RSPAMD_ARCHIVE_ZIP;
  189. if (part->cd) {
  190. arch->archive_name = &part->cd->filename;
  191. }
  192. rspamd_mempool_add_destructor (task->task_pool, rspamd_archive_dtor,
  193. arch);
  194. while (cd < start + cd_offset + cd_size) {
  195. guint16 flags;
  196. /* Read central directory record */
  197. if (eocd - cd < cd_basic_len ||
  198. memcmp (cd, cd_magic, sizeof (cd_magic)) != 0) {
  199. msg_info_task ("zip archive is invalid (bad cd record)");
  200. return;
  201. }
  202. memcpy (&flags, cd + 8, sizeof (guint16));
  203. flags = GUINT16_FROM_LE (flags);
  204. memcpy (&comp_size, cd + 20, sizeof (guint32));
  205. comp_size = GUINT32_FROM_LE (comp_size);
  206. memcpy (&uncomp_size, cd + 24, sizeof (guint32));
  207. uncomp_size = GUINT32_FROM_LE (uncomp_size);
  208. memcpy (&fname_len, cd + 28, sizeof (fname_len));
  209. fname_len = GUINT16_FROM_LE (fname_len);
  210. memcpy (&extra_len, cd + 30, sizeof (extra_len));
  211. extra_len = GUINT16_FROM_LE (extra_len);
  212. memcpy (&comment_len, cd + 32, sizeof (comment_len));
  213. comment_len = GUINT16_FROM_LE (comment_len);
  214. if (cd + fname_len + comment_len + extra_len + cd_basic_len > eocd) {
  215. msg_info_task ("zip archive is invalid (too large cd record)");
  216. return;
  217. }
  218. f = g_malloc0 (sizeof (*f));
  219. rspamd_archive_file_try_utf (task, arch, f, cd + cd_basic_len, fname_len);
  220. f->compressed_size = comp_size;
  221. f->uncompressed_size = uncomp_size;
  222. if (flags & 0x41u) {
  223. f->flags |= RSPAMD_ARCHIVE_FILE_ENCRYPTED;
  224. }
  225. if (f->fname) {
  226. if (f->flags & RSPAMD_ARCHIVE_FILE_OBFUSCATED) {
  227. arch->flags |= RSPAMD_ARCHIVE_HAS_OBFUSCATED_FILES;
  228. }
  229. g_ptr_array_add (arch->files, f);
  230. msg_debug_archive ("found file in zip archive: %v", f->fname);
  231. }
  232. else {
  233. g_free (f);
  234. return;
  235. }
  236. /* Process extra fields */
  237. const guchar *extra = cd + fname_len + cd_basic_len;
  238. p = extra;
  239. while (p + sizeof (guint16) * 2 < extra + extra_len) {
  240. guint16 hid, hlen;
  241. memcpy (&hid, p, sizeof (guint16));
  242. hid = GUINT16_FROM_LE (hid);
  243. memcpy (&hlen, p + sizeof (guint16), sizeof (guint16));
  244. hlen = GUINT16_FROM_LE (hlen);
  245. if (hid == 0x0017) {
  246. f->flags |= RSPAMD_ARCHIVE_FILE_ENCRYPTED;
  247. }
  248. p += hlen + sizeof (guint16) * 2;
  249. }
  250. cd += fname_len + comment_len + extra_len + cd_basic_len;
  251. }
  252. part->part_type = RSPAMD_MIME_PART_ARCHIVE;
  253. part->specific.arch = arch;
  254. arch->size = part->parsed_data.len;
  255. }
  256. static inline gint
  257. rspamd_archive_rar_read_vint (const guchar *start, gsize remain, guint64 *res)
  258. {
  259. /*
  260. * From http://www.rarlab.com/technote.htm:
  261. * Variable length integer. Can include one or more bytes, where
  262. * lower 7 bits of every byte contain integer data and highest bit
  263. * in every byte is the continuation flag.
  264. * If highest bit is 0, this is the last byte in sequence.
  265. * So first byte contains 7 least significant bits of integer and
  266. * continuation flag. Second byte, if present, contains next 7 bits and so on.
  267. */
  268. guint64 t = 0;
  269. guint shift = 0;
  270. const guchar *p = start;
  271. while (remain > 0 && shift <= 57) {
  272. if (*p & 0x80) {
  273. t |= ((guint64)(*p & 0x7f)) << shift;
  274. }
  275. else {
  276. t |= ((guint64)(*p & 0x7f)) << shift;
  277. p ++;
  278. break;
  279. }
  280. shift += 7;
  281. p++;
  282. remain --;
  283. }
  284. if (remain == 0 || shift > 64) {
  285. return -1;
  286. }
  287. *res = GUINT64_FROM_LE (t);
  288. return p - start;
  289. }
  290. #define RAR_SKIP_BYTES(n) do { \
  291. if ((n) <= 0) { \
  292. msg_debug_archive ("rar archive is invalid (bad skip value)"); \
  293. return; \
  294. } \
  295. if ((gsize)(end - p) < (n)) { \
  296. msg_debug_archive ("rar archive is invalid (truncated)"); \
  297. return; \
  298. } \
  299. p += (n); \
  300. } while (0)
  301. #define RAR_READ_VINT() do { \
  302. r = rspamd_archive_rar_read_vint (p, end - p, &vint); \
  303. if (r == -1) { \
  304. msg_debug_archive ("rar archive is invalid (bad vint)"); \
  305. return; \
  306. } \
  307. else if (r == 0) { \
  308. msg_debug_archive ("rar archive is invalid (BAD vint offset)"); \
  309. return; \
  310. }\
  311. } while (0)
  312. #define RAR_READ_VINT_SKIP() do { \
  313. r = rspamd_archive_rar_read_vint (p, end - p, &vint); \
  314. if (r == -1) { \
  315. msg_debug_archive ("rar archive is invalid (bad vint)"); \
  316. return; \
  317. } \
  318. p += r; \
  319. } while (0)
  320. #define RAR_READ_UINT16(n) do { \
  321. if (end - p < (glong)sizeof (guint16)) { \
  322. msg_debug_archive ("rar archive is invalid (bad int16)"); \
  323. return; \
  324. } \
  325. n = p[0] + (p[1] << 8); \
  326. p += sizeof (guint16); \
  327. } while (0)
  328. #define RAR_READ_UINT32(n) do { \
  329. if (end - p < (glong)sizeof (guint32)) { \
  330. msg_debug_archive ("rar archive is invalid (bad int32)"); \
  331. return; \
  332. } \
  333. n = (guint)p[0] + ((guint)p[1] << 8) + ((guint)p[2] << 16) + ((guint)p[3] << 24); \
  334. p += sizeof (guint32); \
  335. } while (0)
  336. static void
  337. rspamd_archive_process_rar_v4 (struct rspamd_task *task, const guchar *start,
  338. const guchar *end, struct rspamd_mime_part *part)
  339. {
  340. const guchar *p = start, *start_section;
  341. guint8 type;
  342. guint flags;
  343. guint64 sz, comp_sz = 0, uncomp_sz = 0;
  344. struct rspamd_archive *arch;
  345. struct rspamd_archive_file *f;
  346. arch = rspamd_mempool_alloc0 (task->task_pool, sizeof (*arch));
  347. arch->files = g_ptr_array_new ();
  348. arch->type = RSPAMD_ARCHIVE_RAR;
  349. if (part->cd) {
  350. arch->archive_name = &part->cd->filename;
  351. }
  352. rspamd_mempool_add_destructor (task->task_pool, rspamd_archive_dtor,
  353. arch);
  354. while (p < end) {
  355. /* Crc16 */
  356. start_section = p;
  357. RAR_SKIP_BYTES (sizeof (guint16));
  358. type = *p;
  359. p ++;
  360. RAR_READ_UINT16 (flags);
  361. if (type == 0x73) {
  362. /* Main header, check for encryption */
  363. if (flags & 0x80) {
  364. arch->flags |= RSPAMD_ARCHIVE_ENCRYPTED;
  365. goto end;
  366. }
  367. }
  368. RAR_READ_UINT16 (sz);
  369. if (flags & 0x8000) {
  370. /* We also need to read ADD_SIZE element */
  371. guint32 tmp;
  372. RAR_READ_UINT32 (tmp);
  373. sz += tmp;
  374. /* This is also used as PACK_SIZE */
  375. comp_sz = tmp;
  376. }
  377. if (sz == 0) {
  378. /* Zero sized block - error */
  379. msg_debug_archive ("rar archive is invalid (zero size block)");
  380. return;
  381. }
  382. if (type == 0x74) {
  383. guint fname_len;
  384. /* File header */
  385. /* Uncompressed size */
  386. RAR_READ_UINT32 (uncomp_sz);
  387. /* Skip to NAME_SIZE element */
  388. RAR_SKIP_BYTES (11);
  389. RAR_READ_UINT16 (fname_len);
  390. if (fname_len == 0 || fname_len > (gsize)(end - p)) {
  391. msg_debug_archive ("rar archive is invalid (bad filename size: %d)",
  392. fname_len);
  393. return;
  394. }
  395. /* Attrs */
  396. RAR_SKIP_BYTES (4);
  397. if (flags & 0x100) {
  398. /* We also need to read HIGH_PACK_SIZE */
  399. guint32 tmp;
  400. RAR_READ_UINT32 (tmp);
  401. sz += tmp;
  402. comp_sz += tmp;
  403. /* HIGH_UNP_SIZE */
  404. RAR_READ_UINT32 (tmp);
  405. uncomp_sz += tmp;
  406. }
  407. f = g_malloc0 (sizeof (*f));
  408. if (flags & 0x200) {
  409. /* We have unicode + normal version */
  410. guchar *tmp;
  411. tmp = memchr (p, '\0', fname_len);
  412. if (tmp != NULL) {
  413. /* Just use ASCII version */
  414. rspamd_archive_file_try_utf (task, arch, f, p, tmp - p);
  415. msg_debug_archive ("found ascii filename in rarv4 archive: %v",
  416. f->fname);
  417. }
  418. else {
  419. /* We have UTF8 filename, use it as is */
  420. rspamd_archive_file_try_utf (task, arch, f, p, fname_len);
  421. msg_debug_archive ("found utf filename in rarv4 archive: %v",
  422. f->fname);
  423. }
  424. }
  425. else {
  426. rspamd_archive_file_try_utf (task, arch, f, p, fname_len);
  427. msg_debug_archive ("found ascii (old) filename in rarv4 archive: %v",
  428. f->fname);
  429. }
  430. f->compressed_size = comp_sz;
  431. f->uncompressed_size = uncomp_sz;
  432. if (flags & 0x4) {
  433. f->flags |= RSPAMD_ARCHIVE_FILE_ENCRYPTED;
  434. }
  435. if (f->fname) {
  436. if (f->flags & RSPAMD_ARCHIVE_FILE_OBFUSCATED) {
  437. arch->flags |= RSPAMD_ARCHIVE_HAS_OBFUSCATED_FILES;
  438. }
  439. g_ptr_array_add (arch->files, f);
  440. }
  441. else {
  442. g_free (f);
  443. }
  444. }
  445. p = start_section;
  446. RAR_SKIP_BYTES (sz);
  447. }
  448. end:
  449. part->part_type = RSPAMD_MIME_PART_ARCHIVE;
  450. part->specific.arch = arch;
  451. arch->size = part->parsed_data.len;
  452. }
  453. static void
  454. rspamd_archive_process_rar (struct rspamd_task *task,
  455. struct rspamd_mime_part *part)
  456. {
  457. const guchar *p, *end, *section_start;
  458. const guchar rar_v5_magic[] = {0x52, 0x61, 0x72, 0x21, 0x1A, 0x07, 0x01, 0x00},
  459. rar_v4_magic[] = {0x52, 0x61, 0x72, 0x21, 0x1A, 0x07, 0x00};
  460. const guint rar_encrypted_header = 4, rar_main_header = 1,
  461. rar_file_header = 2;
  462. guint64 vint, sz, comp_sz = 0, uncomp_sz = 0, flags = 0, type = 0,
  463. extra_sz = 0;
  464. struct rspamd_archive *arch;
  465. struct rspamd_archive_file *f;
  466. gint r;
  467. p = part->parsed_data.begin;
  468. end = p + part->parsed_data.len;
  469. if ((gsize)(end - p) <= sizeof (rar_v5_magic)) {
  470. msg_debug_archive ("rar archive is invalid (too small)");
  471. return;
  472. }
  473. if (memcmp (p, rar_v5_magic, sizeof (rar_v5_magic)) == 0) {
  474. p += sizeof (rar_v5_magic);
  475. }
  476. else if (memcmp (p, rar_v4_magic, sizeof (rar_v4_magic)) == 0) {
  477. p += sizeof (rar_v4_magic);
  478. rspamd_archive_process_rar_v4 (task, p, end, part);
  479. return;
  480. }
  481. else {
  482. msg_debug_archive ("rar archive is invalid (no rar magic)");
  483. return;
  484. }
  485. /* Rar v5 format */
  486. arch = rspamd_mempool_alloc0 (task->task_pool, sizeof (*arch));
  487. arch->files = g_ptr_array_new ();
  488. arch->type = RSPAMD_ARCHIVE_RAR;
  489. if (part->cd) {
  490. arch->archive_name = &part->cd->filename;
  491. }
  492. rspamd_mempool_add_destructor (task->task_pool, rspamd_archive_dtor,
  493. arch);
  494. /* Now we can have either encryption header or archive header */
  495. /* Crc 32 */
  496. RAR_SKIP_BYTES (sizeof (guint32));
  497. /* Size */
  498. RAR_READ_VINT_SKIP ();
  499. sz = vint;
  500. /* Type */
  501. section_start = p;
  502. RAR_READ_VINT_SKIP ();
  503. type = vint;
  504. /* Header flags */
  505. RAR_READ_VINT_SKIP ();
  506. flags = vint;
  507. if (flags & 0x1) {
  508. /* Have extra zone */
  509. RAR_READ_VINT_SKIP ();
  510. }
  511. if (flags & 0x2) {
  512. /* Data zone is presented */
  513. RAR_READ_VINT_SKIP ();
  514. sz += vint;
  515. }
  516. if (type == rar_encrypted_header) {
  517. /* We can't read any further information as archive is encrypted */
  518. arch->flags |= RSPAMD_ARCHIVE_ENCRYPTED;
  519. goto end;
  520. }
  521. else if (type != rar_main_header) {
  522. msg_debug_archive ("rar archive is invalid (bad main header)");
  523. return;
  524. }
  525. /* Nothing useful in main header */
  526. p = section_start;
  527. RAR_SKIP_BYTES (sz);
  528. while (p < end) {
  529. gboolean has_extra = FALSE;
  530. /* Read the next header */
  531. /* Crc 32 */
  532. RAR_SKIP_BYTES (sizeof (guint32));
  533. /* Size */
  534. RAR_READ_VINT_SKIP ();
  535. sz = vint;
  536. if (sz == 0) {
  537. /* Zero sized block - error */
  538. msg_debug_archive ("rar archive is invalid (zero size block)");
  539. return;
  540. }
  541. section_start = p;
  542. /* Type */
  543. RAR_READ_VINT_SKIP ();
  544. type = vint;
  545. /* Header flags */
  546. RAR_READ_VINT_SKIP ();
  547. flags = vint;
  548. if (flags & 0x1) {
  549. /* Have extra zone */
  550. RAR_READ_VINT_SKIP ();
  551. extra_sz = vint;
  552. has_extra = TRUE;
  553. }
  554. if (flags & 0x2) {
  555. /* Data zone is presented */
  556. RAR_READ_VINT_SKIP ();
  557. sz += vint;
  558. comp_sz = vint;
  559. }
  560. if (type != rar_file_header) {
  561. p = section_start;
  562. RAR_SKIP_BYTES (sz);
  563. }
  564. else {
  565. /* We have a file header, go forward */
  566. guint64 fname_len;
  567. /* File header specific flags */
  568. RAR_READ_VINT_SKIP ();
  569. flags = vint;
  570. /* Unpacked size */
  571. RAR_READ_VINT_SKIP ();
  572. uncomp_sz = vint;
  573. /* Attributes */
  574. RAR_READ_VINT_SKIP ();
  575. if (flags & 0x2) {
  576. /* Unix mtime */
  577. RAR_SKIP_BYTES (sizeof (guint32));
  578. }
  579. if (flags & 0x4) {
  580. /* Crc32 */
  581. RAR_SKIP_BYTES (sizeof (guint32));
  582. }
  583. /* Compression */
  584. RAR_READ_VINT_SKIP ();
  585. /* Host OS */
  586. RAR_READ_VINT_SKIP ();
  587. /* Filename length (finally!) */
  588. RAR_READ_VINT_SKIP ();
  589. fname_len = vint;
  590. if (fname_len == 0 || fname_len > (gsize)(end - p)) {
  591. msg_debug_archive ("rar archive is invalid (bad filename size)");
  592. return;
  593. }
  594. f = g_malloc0 (sizeof (*f));
  595. f->uncompressed_size = uncomp_sz;
  596. f->compressed_size = comp_sz;
  597. rspamd_archive_file_try_utf (task, arch, f, p, fname_len);
  598. if (f->fname) {
  599. msg_debug_archive ("added rarv5 file: %v", f->fname);
  600. g_ptr_array_add (arch->files, f);
  601. if (f->flags & RSPAMD_ARCHIVE_FILE_OBFUSCATED) {
  602. arch->flags |= RSPAMD_ARCHIVE_HAS_OBFUSCATED_FILES;
  603. }
  604. }
  605. else {
  606. g_free (f);
  607. f = NULL;
  608. }
  609. if (f && has_extra && extra_sz > 0 &&
  610. p + fname_len + extra_sz < end) {
  611. /* Try to find encryption record in extra field */
  612. const guchar *ex = p + fname_len;
  613. while (ex < p + extra_sz) {
  614. const guchar *t;
  615. gint64 cur_sz = 0, sec_type = 0;
  616. r = rspamd_archive_rar_read_vint (ex, extra_sz, &cur_sz);
  617. if (r == -1) {
  618. msg_debug_archive ("rar archive is invalid (bad vint)");
  619. return;
  620. }
  621. t = ex + r;
  622. r = rspamd_archive_rar_read_vint (t, extra_sz - r, &sec_type);
  623. if (r == -1) {
  624. msg_debug_archive ("rar archive is invalid (bad vint)");
  625. return;
  626. }
  627. if (sec_type == 0x01) {
  628. f->flags |= RSPAMD_ARCHIVE_FILE_ENCRYPTED;
  629. arch->flags |= RSPAMD_ARCHIVE_ENCRYPTED;
  630. break;
  631. }
  632. ex += cur_sz;
  633. }
  634. }
  635. /* Restore p to the beginning of the header */
  636. p = section_start;
  637. RAR_SKIP_BYTES (sz);
  638. }
  639. }
  640. end:
  641. part->part_type = RSPAMD_MIME_PART_ARCHIVE;
  642. part->specific.arch = arch;
  643. arch->size = part->parsed_data.len;
  644. }
  645. static inline gint
  646. rspamd_archive_7zip_read_vint (const guchar *start, gsize remain, guint64 *res)
  647. {
  648. /*
  649. * REAL_UINT64 means real UINT64.
  650. * UINT64 means real UINT64 encoded with the following scheme:
  651. *
  652. * Size of encoding sequence depends from first byte:
  653. * First_Byte Extra_Bytes Value
  654. * (binary)
  655. * 0xxxxxxx : ( xxxxxxx )
  656. * 10xxxxxx BYTE y[1] : ( xxxxxx << (8 * 1)) + y
  657. * 110xxxxx BYTE y[2] : ( xxxxx << (8 * 2)) + y
  658. * ...
  659. * 1111110x BYTE y[6] : ( x << (8 * 6)) + y
  660. * 11111110 BYTE y[7] : y
  661. * 11111111 BYTE y[8] : y
  662. */
  663. guchar t;
  664. if (remain == 0) {
  665. return -1;
  666. }
  667. t = *start;
  668. if (!isset (&t, 7)) {
  669. /* Trivial case */
  670. *res = t;
  671. return 1;
  672. }
  673. else if (t == 0xFF) {
  674. if (remain >= sizeof (guint64) + 1) {
  675. memcpy (res, start + 1, sizeof (guint64));
  676. *res = GUINT64_FROM_LE (*res);
  677. return sizeof (guint64) + 1;
  678. }
  679. }
  680. else {
  681. gint cur_bit = 6, intlen = 1;
  682. const guchar bmask = 0xFF;
  683. guint64 tgt;
  684. while (cur_bit > 0) {
  685. if (!isset (&t, cur_bit)) {
  686. if (remain >= intlen + 1) {
  687. memcpy (&tgt, start + 1, intlen);
  688. tgt = GUINT64_FROM_LE (tgt);
  689. /* Shift back */
  690. tgt >>= sizeof (tgt) - NBBY * intlen;
  691. /* Add masked value */
  692. tgt += (guint64)(t & (bmask >> (NBBY - cur_bit)))
  693. << (NBBY * intlen);
  694. *res = tgt;
  695. return intlen + 1;
  696. }
  697. }
  698. cur_bit --;
  699. intlen ++;
  700. }
  701. }
  702. return -1;
  703. }
  704. #define SZ_READ_VINT_SKIP() do { \
  705. r = rspamd_archive_7zip_read_vint (p, end - p, &vint); \
  706. if (r == -1) { \
  707. msg_debug_archive ("7z archive is invalid (bad vint)"); \
  708. return; \
  709. } \
  710. p += r; \
  711. } while (0)
  712. #define SZ_READ_VINT(var) do { \
  713. int r; \
  714. r = rspamd_archive_7zip_read_vint (p, end - p, &(var)); \
  715. if (r == -1) { \
  716. msg_debug_archive ("7z archive is invalid (bad vint): %s", G_STRLOC); \
  717. return NULL; \
  718. } \
  719. p += r; \
  720. } while (0)
  721. #define SZ_READ_UINT64(n) do { \
  722. if (end - p < (goffset)sizeof (guint64)) { \
  723. msg_debug_archive ("7zip archive is invalid (bad uint64): %s", G_STRLOC); \
  724. return; \
  725. } \
  726. memcpy (&(n), p, sizeof (guint64)); \
  727. n = GUINT64_FROM_LE(n); \
  728. p += sizeof (guint64); \
  729. } while (0)
  730. #define SZ_SKIP_BYTES(n) do { \
  731. if (end - p >= (n)) { \
  732. p += (n); \
  733. } \
  734. else { \
  735. msg_debug_archive ("7zip archive is invalid (truncated); wanted to read %d bytes, %d avail: %s", (gint)(n), (gint)(end - p), G_STRLOC); \
  736. return NULL; \
  737. } \
  738. } while (0)
  739. enum rspamd_7zip_header_mark {
  740. kEnd = 0x00,
  741. kHeader = 0x01,
  742. kArchiveProperties = 0x02,
  743. kAdditionalStreamsInfo = 0x03,
  744. kMainStreamsInfo = 0x04,
  745. kFilesInfo = 0x05,
  746. kPackInfo = 0x06,
  747. kUnPackInfo = 0x07,
  748. kSubStreamsInfo = 0x08,
  749. kSize = 0x09,
  750. kCRC = 0x0A,
  751. kFolder = 0x0B,
  752. kCodersUnPackSize = 0x0C,
  753. kNumUnPackStream = 0x0D,
  754. kEmptyStream = 0x0E,
  755. kEmptyFile = 0x0F,
  756. kAnti = 0x10,
  757. kName = 0x11,
  758. kCTime = 0x12,
  759. kATime = 0x13,
  760. kMTime = 0x14,
  761. kWinAttributes = 0x15,
  762. kComment = 0x16,
  763. kEncodedHeader = 0x17,
  764. kStartPos = 0x18,
  765. kDummy = 0x19,
  766. };
  767. #define _7Z_CRYPTO_MAIN_ZIP 0x06F10101 /* Main Zip crypto algo */
  768. #define _7Z_CRYPTO_RAR_29 0x06F10303 /* Rar29 AES-128 + (modified SHA-1) */
  769. #define _7Z_CRYPTO_AES_256_SHA_256 0x06F10701 /* AES-256 + SHA-256 */
  770. #define IS_SZ_ENCRYPTED(codec_id) (((codec_id) == _7Z_CRYPTO_MAIN_ZIP) || \
  771. ((codec_id) == _7Z_CRYPTO_RAR_29) || \
  772. ((codec_id) == _7Z_CRYPTO_AES_256_SHA_256))
  773. static const guchar *
  774. rspamd_7zip_read_bits (struct rspamd_task *task,
  775. const guchar *p, const guchar *end,
  776. struct rspamd_archive *arch, guint nbits,
  777. guint *pbits_set)
  778. {
  779. unsigned mask = 0, avail = 0, i;
  780. gboolean bit_set = 0;
  781. for (i = 0; i < nbits; i++) {
  782. if (mask == 0) {
  783. avail = *p;
  784. SZ_SKIP_BYTES(1);
  785. mask = 0x80;
  786. }
  787. bit_set = (avail & mask) ? 1 : 0;
  788. if (bit_set && pbits_set) {
  789. (*pbits_set) ++;
  790. }
  791. mask >>= 1;
  792. }
  793. return p;
  794. }
  795. static const guchar *
  796. rspamd_7zip_read_digest (struct rspamd_task *task,
  797. const guchar *p, const guchar *end,
  798. struct rspamd_archive *arch,
  799. guint64 num_streams,
  800. guint *pdigest_read)
  801. {
  802. guchar all_defined = *p;
  803. guint64 i;
  804. guint num_defined = 0;
  805. /*
  806. * BYTE AllAreDefined
  807. * if (AllAreDefined == 0)
  808. * {
  809. * for(NumStreams)
  810. * BIT Defined
  811. * }
  812. * UINT32 CRCs[NumDefined]
  813. */
  814. SZ_SKIP_BYTES(1);
  815. if (all_defined) {
  816. num_defined = num_streams;
  817. }
  818. else {
  819. if (num_streams > 8192) {
  820. /* Gah */
  821. return NULL;
  822. }
  823. p = rspamd_7zip_read_bits (task, p, end, arch, num_streams, &num_defined);
  824. if (p == NULL) {
  825. return NULL;
  826. }
  827. }
  828. for (i = 0; i < num_defined; i ++) {
  829. SZ_SKIP_BYTES(sizeof(guint32));
  830. }
  831. if (pdigest_read) {
  832. *pdigest_read = num_defined;
  833. }
  834. return p;
  835. }
  836. static const guchar *
  837. rspamd_7zip_read_pack_info (struct rspamd_task *task,
  838. const guchar *p, const guchar *end,
  839. struct rspamd_archive *arch)
  840. {
  841. guint64 pack_pos = 0, pack_streams = 0, i, cur_sz;
  842. guint num_digests = 0;
  843. guchar t;
  844. /*
  845. * UINT64 PackPos
  846. * UINT64 NumPackStreams
  847. *
  848. * []
  849. * BYTE NID::kSize (0x09)
  850. * UINT64 PackSizes[NumPackStreams]
  851. * []
  852. *
  853. * []
  854. * BYTE NID::kCRC (0x0A)
  855. * PackStreamDigests[NumPackStreams]
  856. * []
  857. * BYTE NID::kEnd
  858. */
  859. SZ_READ_VINT(pack_pos);
  860. SZ_READ_VINT(pack_streams);
  861. while (p != NULL && p < end) {
  862. t = *p;
  863. SZ_SKIP_BYTES(1);
  864. msg_debug_archive ("7zip: read pack info %xc", t);
  865. switch (t) {
  866. case kSize:
  867. /* We need to skip pack_streams VINTS */
  868. for (i = 0; i < pack_streams; i++) {
  869. SZ_READ_VINT(cur_sz);
  870. }
  871. break;
  872. case kCRC:
  873. /* CRCs are more complicated */
  874. p = rspamd_7zip_read_digest (task, p, end, arch, pack_streams,
  875. &num_digests);
  876. break;
  877. case kEnd:
  878. goto end;
  879. break;
  880. default:
  881. p = NULL;
  882. msg_debug_archive ("bad 7zip type: %xc; %s", t, G_STRLOC);
  883. goto end;
  884. break;
  885. }
  886. }
  887. end:
  888. return p;
  889. }
  890. static const guchar *
  891. rspamd_7zip_read_folder (struct rspamd_task *task,
  892. const guchar *p, const guchar *end,
  893. struct rspamd_archive *arch, guint *pnstreams, guint *ndigests)
  894. {
  895. guint64 ncoders = 0, i, j, noutstreams = 0, ninstreams = 0;
  896. SZ_READ_VINT (ncoders);
  897. for (i = 0; i < ncoders && p != NULL && p < end; i ++) {
  898. guint64 sz, tmp;
  899. guchar t;
  900. /*
  901. * BYTE
  902. * {
  903. * 0:3 CodecIdSize
  904. * 4: Is Complex Coder
  905. * 5: There Are Attributes
  906. * 6: Reserved
  907. * 7: There are more alternative methods. (Not used anymore, must be 0).
  908. * }
  909. * BYTE CodecId[CodecIdSize]
  910. * if (Is Complex Coder)
  911. * {
  912. * UINT64 NumInStreams;
  913. * UINT64 NumOutStreams;
  914. * }
  915. * if (There Are Attributes)
  916. * {
  917. * UINT64 PropertiesSize
  918. * BYTE Properties[PropertiesSize]
  919. * }
  920. */
  921. t = *p;
  922. SZ_SKIP_BYTES (1);
  923. sz = t & 0xF;
  924. /* Codec ID */
  925. tmp = 0;
  926. for (j = 0; j < sz; j++) {
  927. tmp <<= 8;
  928. tmp += p[j];
  929. }
  930. msg_debug_archive ("7zip: read codec id: %L", tmp);
  931. if (IS_SZ_ENCRYPTED (tmp)) {
  932. arch->flags |= RSPAMD_ARCHIVE_ENCRYPTED;
  933. }
  934. SZ_SKIP_BYTES (sz);
  935. if (t & (1u << 4)) {
  936. /* Complex */
  937. SZ_READ_VINT (tmp); /* InStreams */
  938. ninstreams += tmp;
  939. SZ_READ_VINT (tmp); /* OutStreams */
  940. noutstreams += tmp;
  941. }
  942. else {
  943. /* XXX: is it correct ? */
  944. noutstreams ++;
  945. ninstreams ++;
  946. }
  947. if (t & (1u << 5)) {
  948. /* Attributes ... */
  949. SZ_READ_VINT (tmp); /* Size of attrs */
  950. SZ_SKIP_BYTES (tmp);
  951. }
  952. }
  953. if (noutstreams > 1) {
  954. /* BindPairs, WTF, huh */
  955. for (i = 0; i < noutstreams - 1; i ++) {
  956. guint64 tmp;
  957. SZ_READ_VINT (tmp);
  958. SZ_READ_VINT (tmp);
  959. }
  960. }
  961. gint64 npacked = (gint64)ninstreams - (gint64)noutstreams + 1;
  962. msg_debug_archive ("7zip: instreams=%L, outstreams=%L, packed=%L",
  963. ninstreams, noutstreams, npacked);
  964. if (npacked > 1) {
  965. /* Gah... */
  966. for (i = 0; i < npacked; i ++) {
  967. guint64 tmp;
  968. SZ_READ_VINT (tmp);
  969. }
  970. }
  971. *pnstreams = noutstreams;
  972. (*ndigests) += npacked;
  973. return p;
  974. }
  975. static const guchar *
  976. rspamd_7zip_read_coders_info (struct rspamd_task *task,
  977. const guchar *p, const guchar *end,
  978. struct rspamd_archive *arch,
  979. guint *pnum_folders, guint *pnum_nodigest)
  980. {
  981. guint64 num_folders = 0, i, tmp;
  982. guchar t;
  983. guint *folder_nstreams = NULL, num_digests = 0, digests_read = 0;
  984. while (p != NULL && p < end) {
  985. /*
  986. * BYTE NID::kFolder (0x0B)
  987. * UINT64 NumFolders
  988. * BYTE External
  989. * switch(External)
  990. * {
  991. * case 0:
  992. * Folders[NumFolders]
  993. * case 1:
  994. * UINT64 DataStreamIndex
  995. * }
  996. * BYTE ID::kCodersUnPackSize (0x0C)
  997. * for(Folders)
  998. * for(Folder.NumOutStreams)
  999. * UINT64 UnPackSize;
  1000. * []
  1001. * BYTE NID::kCRC (0x0A)
  1002. * UnPackDigests[NumFolders]
  1003. * []
  1004. * BYTE NID::kEnd
  1005. */
  1006. t = *p;
  1007. SZ_SKIP_BYTES(1);
  1008. msg_debug_archive ("7zip: read coders info %xc", t);
  1009. switch (t) {
  1010. case kFolder:
  1011. SZ_READ_VINT (num_folders);
  1012. msg_debug_archive ("7zip: nfolders=%L", num_folders);
  1013. if (*p != 0) {
  1014. /* External folders */
  1015. SZ_SKIP_BYTES(1);
  1016. SZ_READ_VINT (tmp);
  1017. }
  1018. else {
  1019. SZ_SKIP_BYTES(1);
  1020. if (num_folders > 8192) {
  1021. /* Gah */
  1022. return NULL;
  1023. }
  1024. if (folder_nstreams) {
  1025. g_free (folder_nstreams);
  1026. }
  1027. folder_nstreams = g_malloc (sizeof (int) * num_folders);
  1028. for (i = 0; i < num_folders && p != NULL && p < end; i++) {
  1029. p = rspamd_7zip_read_folder (task, p, end, arch,
  1030. &folder_nstreams[i], &num_digests);
  1031. }
  1032. }
  1033. break;
  1034. case kCodersUnPackSize:
  1035. for (i = 0; i < num_folders && p != NULL && p < end; i++) {
  1036. if (folder_nstreams) {
  1037. for (guint j = 0; j < folder_nstreams[i]; j++) {
  1038. SZ_READ_VINT (tmp); /* Unpacked size */
  1039. msg_debug_archive ("7zip: unpacked size "
  1040. "(folder=%d, stream=%d) = %L",
  1041. (gint)i, j, tmp);
  1042. }
  1043. }
  1044. else {
  1045. msg_err_task ("internal 7zip error");
  1046. }
  1047. }
  1048. break;
  1049. case kCRC:
  1050. /*
  1051. * Here are dragons. Spec tells that here there could be up
  1052. * to nfolders digests. However, according to the actual source
  1053. * code, in case of multiple out streams there should be digests
  1054. * for all out streams.
  1055. *
  1056. * In the real life (tm) it is even more idiotic: all these digests
  1057. * are in another section! But that section needs number of digests
  1058. * that are absent here. It is the most stupid thing I've ever seen
  1059. * in any file format.
  1060. *
  1061. * I hope there *WAS* some reason to do such shit...
  1062. */
  1063. p = rspamd_7zip_read_digest (task, p, end, arch, num_digests,
  1064. &digests_read);
  1065. break;
  1066. case kEnd:
  1067. goto end;
  1068. break;
  1069. default:
  1070. p = NULL;
  1071. msg_debug_archive ("bad 7zip type: %xc; %s", t, G_STRLOC);
  1072. goto end;
  1073. break;
  1074. }
  1075. }
  1076. end:
  1077. if (pnum_nodigest) {
  1078. *pnum_nodigest = num_digests - digests_read;
  1079. }
  1080. if (pnum_folders) {
  1081. *pnum_folders = num_folders;
  1082. }
  1083. if (folder_nstreams) {
  1084. g_free (folder_nstreams);
  1085. }
  1086. return p;
  1087. }
  1088. static const guchar *
  1089. rspamd_7zip_read_substreams_info (struct rspamd_task *task,
  1090. const guchar *p, const guchar *end,
  1091. struct rspamd_archive *arch,
  1092. guint num_folders, guint num_nodigest)
  1093. {
  1094. guchar t;
  1095. guint i;
  1096. guint64 *folder_nstreams;
  1097. if (num_folders > 8192) {
  1098. /* Gah */
  1099. return NULL;
  1100. }
  1101. folder_nstreams = g_alloca (sizeof (guint64) * num_folders);
  1102. memset (folder_nstreams, 0, sizeof (guint64) * num_folders);
  1103. while (p != NULL && p < end) {
  1104. /*
  1105. * []
  1106. * BYTE NID::kNumUnPackStream; (0x0D)
  1107. * UINT64 NumUnPackStreamsInFolders[NumFolders];
  1108. * []
  1109. *
  1110. * []
  1111. * BYTE NID::kSize (0x09)
  1112. * UINT64 UnPackSizes[??]
  1113. * []
  1114. *
  1115. *
  1116. * []
  1117. * BYTE NID::kCRC (0x0A)
  1118. * Digests[Number of streams with unknown CRC]
  1119. * []
  1120. */
  1121. t = *p;
  1122. SZ_SKIP_BYTES(1);
  1123. msg_debug_archive ("7zip: read substream info %xc", t);
  1124. switch (t) {
  1125. case kNumUnPackStream:
  1126. for (i = 0; i < num_folders; i ++) {
  1127. guint64 tmp;
  1128. SZ_READ_VINT (tmp);
  1129. folder_nstreams[i] = tmp;
  1130. }
  1131. break;
  1132. case kCRC:
  1133. /*
  1134. * Read the comment in the rspamd_7zip_read_coders_info
  1135. */
  1136. p = rspamd_7zip_read_digest (task, p, end, arch, num_nodigest,
  1137. NULL);
  1138. break;
  1139. case kSize:
  1140. /*
  1141. * Another brain damaged logic, but we have to support it
  1142. * as there are no ways to proceed without it.
  1143. * In fact, it is just absent in the real life...
  1144. */
  1145. for (i = 0; i < num_folders; i ++) {
  1146. for (guint j = 0; j < folder_nstreams[i]; j++) {
  1147. guint64 tmp;
  1148. SZ_READ_VINT (tmp); /* Who cares indeed */
  1149. }
  1150. }
  1151. break;
  1152. case kEnd:
  1153. goto end;
  1154. break;
  1155. default:
  1156. p = NULL;
  1157. msg_debug_archive ("bad 7zip type: %xc; %s", t, G_STRLOC);
  1158. goto end;
  1159. break;
  1160. }
  1161. }
  1162. end:
  1163. return p;
  1164. }
  1165. static const guchar *
  1166. rspamd_7zip_read_main_streams_info (struct rspamd_task *task,
  1167. const guchar *p, const guchar *end,
  1168. struct rspamd_archive *arch)
  1169. {
  1170. guchar t;
  1171. guint num_folders = 0, unknown_digests = 0;
  1172. while (p != NULL && p < end) {
  1173. t = *p;
  1174. SZ_SKIP_BYTES(1);
  1175. msg_debug_archive ("7zip: read main streams info %xc", t);
  1176. /*
  1177. *
  1178. * []
  1179. * PackInfo
  1180. * []
  1181. * []
  1182. * CodersInfo
  1183. * []
  1184. *
  1185. * []
  1186. * SubStreamsInfo
  1187. * []
  1188. *
  1189. * BYTE NID::kEnd
  1190. */
  1191. switch (t) {
  1192. case kPackInfo:
  1193. p = rspamd_7zip_read_pack_info (task, p, end, arch);
  1194. break;
  1195. case kUnPackInfo:
  1196. p = rspamd_7zip_read_coders_info (task, p, end, arch, &num_folders,
  1197. &unknown_digests);
  1198. break;
  1199. case kSubStreamsInfo:
  1200. p = rspamd_7zip_read_substreams_info (task, p, end, arch, num_folders,
  1201. unknown_digests);
  1202. break;
  1203. break;
  1204. case kEnd:
  1205. goto end;
  1206. break;
  1207. default:
  1208. p = NULL;
  1209. msg_debug_archive ("bad 7zip type: %xc; %s", t, G_STRLOC);
  1210. goto end;
  1211. break;
  1212. }
  1213. }
  1214. end:
  1215. return p;
  1216. }
  1217. static const guchar *
  1218. rspamd_7zip_read_archive_props (struct rspamd_task *task,
  1219. const guchar *p, const guchar *end,
  1220. struct rspamd_archive *arch)
  1221. {
  1222. guchar proptype;
  1223. guint64 proplen;
  1224. /*
  1225. * for (;;)
  1226. * {
  1227. * BYTE PropertyType;
  1228. * if (aType == 0)
  1229. * break;
  1230. * UINT64 PropertySize;
  1231. * BYTE PropertyData[PropertySize];
  1232. * }
  1233. */
  1234. if (p != NULL) {
  1235. proptype = *p;
  1236. SZ_SKIP_BYTES(1);
  1237. while (proptype != 0) {
  1238. SZ_READ_VINT(proplen);
  1239. if (p + proplen < end) {
  1240. p += proplen;
  1241. }
  1242. else {
  1243. return NULL;
  1244. }
  1245. proptype = *p;
  1246. SZ_SKIP_BYTES(1);
  1247. }
  1248. }
  1249. return p;
  1250. }
  1251. static GString *
  1252. rspamd_7zip_ucs2_to_utf8 (struct rspamd_task *task, const guchar *p,
  1253. const guchar *end)
  1254. {
  1255. GString *res;
  1256. goffset dest_pos = 0, src_pos = 0;
  1257. const gsize len = (end - p) / sizeof (guint16);
  1258. guint16 *up;
  1259. UChar32 wc;
  1260. UBool is_error = 0;
  1261. res = g_string_sized_new ((end - p) * 3 / 2 + sizeof (wc) + 1);
  1262. up = (guint16 *)p;
  1263. while (src_pos < len) {
  1264. U16_NEXT (up, src_pos, len, wc);
  1265. if (wc > 0) {
  1266. U8_APPEND (res->str, dest_pos,
  1267. res->allocated_len - 1,
  1268. wc, is_error);
  1269. }
  1270. if (is_error) {
  1271. g_string_free (res, TRUE);
  1272. return NULL;
  1273. }
  1274. }
  1275. g_assert (dest_pos < res->allocated_len);
  1276. res->len = dest_pos;
  1277. res->str[dest_pos] = '\0';
  1278. return res;
  1279. }
  1280. static const guchar *
  1281. rspamd_7zip_read_files_info (struct rspamd_task *task,
  1282. const guchar *p, const guchar *end,
  1283. struct rspamd_archive *arch)
  1284. {
  1285. guint64 nfiles = 0, sz, i;
  1286. guchar t, b;
  1287. struct rspamd_archive_file *fentry;
  1288. SZ_READ_VINT (nfiles);
  1289. for (;p != NULL && p < end;) {
  1290. t = *p;
  1291. SZ_SKIP_BYTES (1);
  1292. msg_debug_archive ("7zip: read file data type %xc", t);
  1293. if (t == kEnd) {
  1294. goto end;
  1295. }
  1296. /* This is SO SPECIAL, gah */
  1297. SZ_READ_VINT (sz);
  1298. switch (t) {
  1299. case kEmptyStream:
  1300. case kEmptyFile:
  1301. case kAnti: /* AntiFile, OMFG */
  1302. /* We don't care about these bits */
  1303. case kCTime:
  1304. case kATime:
  1305. case kMTime:
  1306. /* We don't care of these guys, but we still have to parse them, gah */
  1307. if (sz > 0) {
  1308. SZ_SKIP_BYTES (sz);
  1309. }
  1310. break;
  1311. case kName:
  1312. /* The most useful part in this whole bloody format */
  1313. b = *p; /* External flag */
  1314. SZ_SKIP_BYTES (1);
  1315. if (b) {
  1316. /* TODO: for the god sake, do something about external
  1317. * filenames...
  1318. */
  1319. guint64 tmp;
  1320. SZ_READ_VINT (tmp);
  1321. }
  1322. else {
  1323. for (i = 0; i < nfiles; i ++) {
  1324. /* Zero terminated wchar_t: happy converting... */
  1325. /* First, find terminator */
  1326. const guchar *fend = NULL, *tp = p;
  1327. GString *res;
  1328. while (tp < end - 1) {
  1329. if (*tp == 0 && *(tp + 1) == 0) {
  1330. fend = tp;
  1331. break;
  1332. }
  1333. tp += 2;
  1334. }
  1335. if (fend == NULL || fend - p == 0) {
  1336. /* Crap instead of fname */
  1337. msg_debug_archive ("bad 7zip name; %s", G_STRLOC);
  1338. goto end;
  1339. }
  1340. res = rspamd_7zip_ucs2_to_utf8 (task, p, fend);
  1341. if (res != NULL) {
  1342. fentry = g_malloc0 (sizeof (*fentry));
  1343. fentry->fname = res;
  1344. g_ptr_array_add (arch->files, fentry);
  1345. msg_debug_archive ("7zip: found file %v", res);
  1346. }
  1347. else {
  1348. msg_debug_archive ("bad 7zip name; %s", G_STRLOC);
  1349. }
  1350. /* Skip zero terminating character */
  1351. p = fend + 2;
  1352. }
  1353. }
  1354. break;
  1355. case kDummy:
  1356. case kWinAttributes:
  1357. if (sz > 0) {
  1358. SZ_SKIP_BYTES (sz);
  1359. }
  1360. break;
  1361. default:
  1362. p = NULL;
  1363. msg_debug_archive ("bad 7zip type: %xc; %s", t, G_STRLOC);
  1364. goto end;
  1365. break;
  1366. }
  1367. }
  1368. end:
  1369. return p;
  1370. }
  1371. static const guchar *
  1372. rspamd_7zip_read_next_section (struct rspamd_task *task,
  1373. const guchar *p, const guchar *end,
  1374. struct rspamd_archive *arch)
  1375. {
  1376. guchar t = *p;
  1377. SZ_SKIP_BYTES(1);
  1378. msg_debug_archive ("7zip: read section %xc", t);
  1379. switch (t) {
  1380. case kHeader:
  1381. /* We just skip byte and go further */
  1382. break;
  1383. case kEncodedHeader:
  1384. /*
  1385. * In fact, headers are just packed, but we assume it as
  1386. * encrypted to distinguish from the normal archives
  1387. */
  1388. msg_debug_archive ("7zip: encoded header, needs to be uncompressed");
  1389. arch->flags |= RSPAMD_ARCHIVE_CANNOT_READ;
  1390. p = NULL; /* Cannot get anything useful */
  1391. break;
  1392. case kArchiveProperties:
  1393. p = rspamd_7zip_read_archive_props (task, p, end, arch);
  1394. break;
  1395. case kMainStreamsInfo:
  1396. p = rspamd_7zip_read_main_streams_info (task, p, end, arch);
  1397. break;
  1398. case kAdditionalStreamsInfo:
  1399. p = rspamd_7zip_read_main_streams_info (task, p, end, arch);
  1400. break;
  1401. case kFilesInfo:
  1402. p = rspamd_7zip_read_files_info (task, p, end, arch);
  1403. break;
  1404. case kEnd:
  1405. p = NULL;
  1406. msg_debug_archive ("7zip: read final section");
  1407. break;
  1408. default:
  1409. p = NULL;
  1410. msg_debug_archive ("bad 7zip type: %xc; %s", t, G_STRLOC);
  1411. break;
  1412. }
  1413. return p;
  1414. }
  1415. static void
  1416. rspamd_archive_process_7zip (struct rspamd_task *task,
  1417. struct rspamd_mime_part *part)
  1418. {
  1419. struct rspamd_archive *arch;
  1420. const guchar *start, *p, *end;
  1421. const guchar sz_magic[] = {'7', 'z', 0xBC, 0xAF, 0x27, 0x1C};
  1422. guint64 section_offset = 0, section_length = 0;
  1423. start = part->parsed_data.begin;
  1424. p = start;
  1425. end = p + part->parsed_data.len;
  1426. if (end - p <= sizeof (guint64) + sizeof (guint32) ||
  1427. memcmp (p, sz_magic, sizeof (sz_magic)) != 0) {
  1428. msg_debug_archive ("7z archive is invalid (no 7z magic)");
  1429. return;
  1430. }
  1431. arch = rspamd_mempool_alloc0 (task->task_pool, sizeof (*arch));
  1432. arch->files = g_ptr_array_new ();
  1433. arch->type = RSPAMD_ARCHIVE_7ZIP;
  1434. rspamd_mempool_add_destructor (task->task_pool, rspamd_archive_dtor,
  1435. arch);
  1436. /* Magic (6 bytes) + version (2 bytes) + crc32 (4 bytes) */
  1437. p += sizeof (guint64) + sizeof (guint32);
  1438. SZ_READ_UINT64(section_offset);
  1439. SZ_READ_UINT64(section_length);
  1440. if (end - p > sizeof (guint32)) {
  1441. p += sizeof (guint32);
  1442. }
  1443. else {
  1444. msg_debug_archive ("7z archive is invalid (truncated crc)");
  1445. return;
  1446. }
  1447. if (end - p > section_offset) {
  1448. p += section_offset;
  1449. }
  1450. else {
  1451. msg_debug_archive ("7z archive is invalid (incorrect section offset)");
  1452. return;
  1453. }
  1454. while ((p = rspamd_7zip_read_next_section (task, p, end, arch)) != NULL);
  1455. part->part_type = RSPAMD_MIME_PART_ARCHIVE;
  1456. part->specific.arch = arch;
  1457. if (part->cd != NULL) {
  1458. arch->archive_name = &part->cd->filename;
  1459. }
  1460. arch->size = part->parsed_data.len;
  1461. }
  1462. static void
  1463. rspamd_archive_process_gzip (struct rspamd_task *task,
  1464. struct rspamd_mime_part *part) {
  1465. struct rspamd_archive *arch;
  1466. const guchar *start, *p, *end;
  1467. const guchar gz_magic[] = {0x1F, 0x8B};
  1468. guchar flags;
  1469. start = part->parsed_data.begin;
  1470. p = start;
  1471. end = p + part->parsed_data.len;
  1472. if (end - p <= 10 || memcmp (p, gz_magic, sizeof (gz_magic)) != 0) {
  1473. msg_debug_archive ("gzip archive is invalid (no gzip magic)");
  1474. return;
  1475. }
  1476. arch = rspamd_mempool_alloc0 (task->task_pool, sizeof (*arch));
  1477. arch->files = g_ptr_array_sized_new (1);
  1478. arch->type = RSPAMD_ARCHIVE_GZIP;
  1479. if (part->cd) {
  1480. arch->archive_name = &part->cd->filename;
  1481. }
  1482. rspamd_mempool_add_destructor (task->task_pool, rspamd_archive_dtor,
  1483. arch);
  1484. flags = p[3];
  1485. if (flags & (1u << 5)) {
  1486. arch->flags |= RSPAMD_ARCHIVE_ENCRYPTED;
  1487. }
  1488. if (flags & (1u << 3)) {
  1489. /* We have file name presented in archive, try to use it */
  1490. if (flags & (1u << 1)) {
  1491. /* Multipart */
  1492. p += 12;
  1493. }
  1494. else {
  1495. p += 10;
  1496. }
  1497. if (flags & (1u << 2)) {
  1498. /* Optional section */
  1499. guint16 optlen = 0;
  1500. RAR_READ_UINT16 (optlen);
  1501. if (end <= p + optlen) {
  1502. msg_debug_archive ("gzip archive is invalid, bad extra length: %d",
  1503. (int)optlen);
  1504. return;
  1505. }
  1506. p += optlen;
  1507. }
  1508. /* Read file name */
  1509. const guchar *fname_start = p;
  1510. while (p < end) {
  1511. if (*p == '\0') {
  1512. if (p > fname_start) {
  1513. struct rspamd_archive_file *f;
  1514. f = g_malloc0 (sizeof (*f));
  1515. rspamd_archive_file_try_utf (task, arch, f,
  1516. fname_start, p - fname_start);
  1517. if (f->fname) {
  1518. g_ptr_array_add (arch->files, f);
  1519. if (f->flags & RSPAMD_ARCHIVE_FILE_OBFUSCATED) {
  1520. arch->flags |= RSPAMD_ARCHIVE_HAS_OBFUSCATED_FILES;
  1521. }
  1522. }
  1523. else {
  1524. /* Invalid filename, skip */
  1525. g_free (f);
  1526. }
  1527. goto set;
  1528. }
  1529. }
  1530. p ++;
  1531. }
  1532. /* Wrong filename, not zero terminated */
  1533. msg_debug_archive ("gzip archive is invalid, bad filename at pos %d",
  1534. (int)(p - start));
  1535. return;
  1536. }
  1537. /* Fallback, we need to extract file name from archive name if possible */
  1538. if (part->cd && part->cd->filename.len > 0) {
  1539. const gchar *dot_pos, *slash_pos;
  1540. dot_pos = rspamd_memrchr (part->cd->filename.begin, '.',
  1541. part->cd->filename.len);
  1542. if (dot_pos) {
  1543. struct rspamd_archive_file *f;
  1544. slash_pos = rspamd_memrchr (part->cd->filename.begin, '/',
  1545. part->cd->filename.len);
  1546. if (slash_pos && slash_pos < dot_pos) {
  1547. f = g_malloc0 (sizeof (*f));
  1548. f->fname = g_string_sized_new (dot_pos - slash_pos);
  1549. g_string_append_len (f->fname, slash_pos + 1,
  1550. dot_pos - slash_pos - 1);
  1551. msg_debug_archive ("fallback to gzip filename based on cd: %v",
  1552. f->fname);
  1553. g_ptr_array_add (arch->files, f);
  1554. goto set;
  1555. }
  1556. else {
  1557. const gchar *fname_start = part->cd->filename.begin;
  1558. f = g_malloc0 (sizeof (*f));
  1559. if (memchr (fname_start, '.', part->cd->filename.len) != dot_pos) {
  1560. /* Double dots, something like foo.exe.gz */
  1561. f->fname = g_string_sized_new (dot_pos - fname_start);
  1562. g_string_append_len (f->fname, fname_start,
  1563. dot_pos - fname_start);
  1564. }
  1565. else {
  1566. /* Single dot, something like foo.gzz */
  1567. f->fname = g_string_sized_new (part->cd->filename.len);
  1568. g_string_append_len (f->fname, fname_start,
  1569. part->cd->filename.len);
  1570. }
  1571. msg_debug_archive ("fallback to gzip filename based on cd: %v",
  1572. f->fname);
  1573. g_ptr_array_add (arch->files, f);
  1574. goto set;
  1575. }
  1576. }
  1577. }
  1578. return;
  1579. set:
  1580. /* Set archive data */
  1581. part->part_type = RSPAMD_MIME_PART_ARCHIVE;
  1582. part->specific.arch = arch;
  1583. arch->size = part->parsed_data.len;
  1584. }
  1585. static gboolean
  1586. rspamd_archive_cheat_detect (struct rspamd_mime_part *part, const gchar *str,
  1587. const guchar *magic_start, gsize magic_len)
  1588. {
  1589. struct rspamd_content_type *ct;
  1590. const gchar *p;
  1591. rspamd_ftok_t srch, *fname;
  1592. ct = part->ct;
  1593. RSPAMD_FTOK_ASSIGN (&srch, "application");
  1594. if (ct && ct->type.len && ct->subtype.len > 0 && rspamd_ftok_cmp (&ct->type,
  1595. &srch) == 0) {
  1596. if (rspamd_substring_search_caseless (ct->subtype.begin, ct->subtype.len,
  1597. str, strlen (str)) != -1) {
  1598. /* We still need to check magic, see #1848 */
  1599. if (magic_start != NULL) {
  1600. if (part->parsed_data.len > magic_len &&
  1601. memcmp (part->parsed_data.begin,
  1602. magic_start, magic_len) == 0) {
  1603. return TRUE;
  1604. }
  1605. /* No magic, refuse this type of archive */
  1606. return FALSE;
  1607. }
  1608. else {
  1609. return TRUE;
  1610. }
  1611. }
  1612. }
  1613. if (part->cd) {
  1614. fname = &part->cd->filename;
  1615. if (fname && fname->len > strlen (str)) {
  1616. p = fname->begin + fname->len - strlen (str);
  1617. if (rspamd_lc_cmp (p, str, strlen (str)) == 0) {
  1618. if (*(p - 1) == '.') {
  1619. if (magic_start != NULL) {
  1620. if (part->parsed_data.len > magic_len &&
  1621. memcmp (part->parsed_data.begin,
  1622. magic_start, magic_len) == 0) {
  1623. return TRUE;
  1624. }
  1625. /* No magic, refuse this type of archive */
  1626. return FALSE;
  1627. }
  1628. return TRUE;
  1629. }
  1630. }
  1631. }
  1632. if (magic_start != NULL) {
  1633. if (part->parsed_data.len > magic_len &&
  1634. memcmp (part->parsed_data.begin, magic_start, magic_len) == 0) {
  1635. return TRUE;
  1636. }
  1637. }
  1638. }
  1639. else {
  1640. if (magic_start != NULL) {
  1641. if (part->parsed_data.len > magic_len &&
  1642. memcmp (part->parsed_data.begin, magic_start, magic_len) == 0) {
  1643. return TRUE;
  1644. }
  1645. }
  1646. }
  1647. return FALSE;
  1648. }
  1649. void
  1650. rspamd_archives_process (struct rspamd_task *task)
  1651. {
  1652. guint i;
  1653. struct rspamd_mime_part *part;
  1654. const guchar rar_magic[] = {0x52, 0x61, 0x72, 0x21, 0x1A, 0x07};
  1655. const guchar zip_magic[] = {0x50, 0x4b, 0x03, 0x04};
  1656. const guchar sz_magic[] = {'7', 'z', 0xBC, 0xAF, 0x27, 0x1C};
  1657. const guchar gz_magic[] = {0x1F, 0x8B, 0x08};
  1658. PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, part) {
  1659. if (part->part_type == RSPAMD_MIME_PART_UNDEFINED) {
  1660. if (part->parsed_data.len > 0) {
  1661. if (rspamd_archive_cheat_detect (part, "zip",
  1662. zip_magic, sizeof (zip_magic))) {
  1663. rspamd_archive_process_zip (task, part);
  1664. }
  1665. else if (rspamd_archive_cheat_detect (part, "rar",
  1666. rar_magic, sizeof (rar_magic))) {
  1667. rspamd_archive_process_rar (task, part);
  1668. }
  1669. else if (rspamd_archive_cheat_detect (part, "7z",
  1670. sz_magic, sizeof (sz_magic))) {
  1671. rspamd_archive_process_7zip (task, part);
  1672. }
  1673. else if (rspamd_archive_cheat_detect (part, "gz",
  1674. gz_magic, sizeof (gz_magic))) {
  1675. rspamd_archive_process_gzip (task, part);
  1676. }
  1677. if (part->ct && (part->ct->flags & RSPAMD_CONTENT_TYPE_TEXT) &&
  1678. part->part_type == RSPAMD_MIME_PART_ARCHIVE &&
  1679. part->specific.arch) {
  1680. struct rspamd_archive *arch = part->specific.arch;
  1681. msg_info_task ("found %s archive with incorrect content-type: %T/%T",
  1682. rspamd_archive_type_str (arch->type),
  1683. &part->ct->type, &part->ct->subtype);
  1684. if (!(part->ct->flags & RSPAMD_CONTENT_TYPE_MISSING)) {
  1685. part->ct->flags |= RSPAMD_CONTENT_TYPE_BROKEN;
  1686. }
  1687. }
  1688. }
  1689. }
  1690. }
  1691. }
  1692. const gchar *
  1693. rspamd_archive_type_str (enum rspamd_archive_type type)
  1694. {
  1695. const gchar *ret = "unknown";
  1696. switch (type) {
  1697. case RSPAMD_ARCHIVE_ZIP:
  1698. ret = "zip";
  1699. break;
  1700. case RSPAMD_ARCHIVE_RAR:
  1701. ret = "rar";
  1702. break;
  1703. case RSPAMD_ARCHIVE_7ZIP:
  1704. ret = "7z";
  1705. break;
  1706. case RSPAMD_ARCHIVE_GZIP:
  1707. ret = "gz";
  1708. break;
  1709. }
  1710. return ret;
  1711. }