You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

content_type.c 20KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832
  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "libmime/content_type.h"
  17. #include "smtp_parsers.h"
  18. #include "utlist.h"
  19. #include "libserver/url.h"
  20. #include "libmime/mime_encoding.h"
  21. static gboolean
  22. rspamd_rfc2231_decode (rspamd_mempool_t *pool,
  23. struct rspamd_content_type_param *param,
  24. gchar *value_start, gchar *value_end)
  25. {
  26. gchar *quote_pos;
  27. quote_pos = memchr (value_start, '\'', value_end - value_start);
  28. if (quote_pos == NULL) {
  29. /* Plain percent encoding */
  30. gsize r = rspamd_url_decode (value_start, value_start,
  31. value_end - value_start);
  32. param->value.begin = value_start;
  33. param->value.len = r;
  34. }
  35. else {
  36. /*
  37. * We can have encoding'language'data, or
  38. * encoding'data (in theory).
  39. * Try to handle both...
  40. */
  41. const gchar *charset = NULL;
  42. rspamd_ftok_t ctok;
  43. ctok.begin = value_start;
  44. ctok.len = quote_pos - value_start;
  45. if (ctok.len > 0) {
  46. charset = rspamd_mime_detect_charset (&ctok, pool);
  47. }
  48. /* Now, we can check for either next quote sign or, eh, ignore that */
  49. value_start = quote_pos + 1;
  50. quote_pos = memchr (value_start, '\'', value_end - value_start);
  51. if (quote_pos) {
  52. /* Ignore language */
  53. value_start = quote_pos + 1;
  54. }
  55. /* Perform percent decoding */
  56. gsize r = rspamd_url_decode (value_start, value_start,
  57. value_end - value_start);
  58. GError *err = NULL;
  59. if (charset == NULL) {
  60. /* Try heuristic */
  61. charset = rspamd_mime_charset_find_by_content (value_start, r);
  62. }
  63. if (charset == NULL) {
  64. msg_warn_pool ("cannot convert parameter from charset %T", &ctok);
  65. return FALSE;
  66. }
  67. param->value.begin = rspamd_mime_text_to_utf8 (pool,
  68. value_start, r,
  69. charset, &param->value.len, &err);
  70. if (param->value.begin == NULL) {
  71. msg_warn_pool ("cannot convert parameter from charset %s: %e",
  72. charset, err);
  73. if (err) {
  74. g_error_free (err);
  75. }
  76. return FALSE;
  77. }
  78. }
  79. param->flags |= RSPAMD_CONTENT_PARAM_RFC2231;
  80. return TRUE;
  81. }
  82. static gboolean
  83. rspamd_param_maybe_rfc2231_process (rspamd_mempool_t *pool,
  84. struct rspamd_content_type_param *param,
  85. gchar *name_start, gchar *name_end,
  86. gchar *value_start, gchar *value_end)
  87. {
  88. const gchar *star_pos;
  89. star_pos = memchr (name_start, '*', name_end - name_start);
  90. if (star_pos == NULL) {
  91. return FALSE;
  92. }
  93. /* We have three possibilities here:
  94. * 1. name* (just name + 2231 encoding)
  95. * 2. name*(\d+) (piecewise stuff but no rfc2231 encoding)
  96. * 3. name*(\d+)* (piecewise stuff and rfc2231 encoding)
  97. */
  98. if (star_pos == name_end - 1) {
  99. /* First */
  100. if (rspamd_rfc2231_decode (pool, param, value_start, value_end)) {
  101. param->name.begin = name_start;
  102. param->name.len = name_end - name_start - 1;
  103. }
  104. }
  105. else if (*(name_end - 1) == '*') {
  106. /* Third */
  107. /* Check number */
  108. gulong tmp;
  109. if (!rspamd_strtoul (star_pos + 1, name_end - star_pos - 2, &tmp)) {
  110. return FALSE;
  111. }
  112. param->flags |= RSPAMD_CONTENT_PARAM_PIECEWISE|RSPAMD_CONTENT_PARAM_RFC2231;
  113. param->rfc2231_id = tmp;
  114. param->name.begin = name_start;
  115. param->name.len = star_pos - name_start;
  116. param->value.begin = value_start;
  117. param->value.len = value_end - value_start;
  118. /* Deal with that later... */
  119. }
  120. else {
  121. /* Second case */
  122. gulong tmp;
  123. if (!rspamd_strtoul (star_pos + 1, name_end - star_pos - 1, &tmp)) {
  124. return FALSE;
  125. }
  126. param->flags |= RSPAMD_CONTENT_PARAM_PIECEWISE;
  127. param->rfc2231_id = tmp;
  128. param->name.begin = name_start;
  129. param->name.len = star_pos - name_start;
  130. param->value.begin = value_start;
  131. param->value.len = value_end - value_start;
  132. }
  133. return TRUE;
  134. }
  135. static gint32
  136. rspamd_cmp_pieces (struct rspamd_content_type_param *p1, struct rspamd_content_type_param *p2)
  137. {
  138. return p1->rfc2231_id - p2->rfc2231_id;
  139. }
  140. static void
  141. rspamd_postprocess_ct_attributes (rspamd_mempool_t *pool,
  142. GHashTable *htb,
  143. void (*proc)(rspamd_mempool_t *, struct rspamd_content_type_param *, gpointer ud),
  144. gpointer procd)
  145. {
  146. GHashTableIter it;
  147. gpointer k, v;
  148. struct rspamd_content_type_param *param, *sorted, *cur;
  149. if (htb == NULL) {
  150. return;
  151. }
  152. g_hash_table_iter_init (&it, htb);
  153. while (g_hash_table_iter_next (&it, &k, &v)) {
  154. param = (struct rspamd_content_type_param *)v;
  155. if (param->flags & RSPAMD_CONTENT_PARAM_PIECEWISE) {
  156. /* Reconstruct param */
  157. gsize tlen = 0;
  158. gchar *ndata, *pos;
  159. sorted = param;
  160. DL_SORT (sorted, rspamd_cmp_pieces);
  161. DL_FOREACH (sorted, cur) {
  162. tlen += cur->value.len;
  163. }
  164. ndata = rspamd_mempool_alloc (pool, tlen);
  165. pos = ndata;
  166. DL_FOREACH (sorted, cur) {
  167. memcpy (pos, cur->value.begin, cur->value.len);
  168. pos += cur->value.len;
  169. }
  170. if (param->flags & RSPAMD_CONTENT_PARAM_RFC2231) {
  171. if (!rspamd_rfc2231_decode (pool, param,
  172. ndata, pos)) {
  173. param->flags |= RSPAMD_CONTENT_PARAM_BROKEN;
  174. param->value.begin = ndata;
  175. param->value.len = tlen;
  176. }
  177. }
  178. else {
  179. param->value.begin = ndata;
  180. param->value.len = tlen;
  181. }
  182. /* Detach from list */
  183. param->next = NULL;
  184. param->prev = param;
  185. }
  186. proc (pool, param, procd);
  187. }
  188. }
  189. static void
  190. rspamd_content_type_postprocess (rspamd_mempool_t *pool,
  191. struct rspamd_content_type_param *param,
  192. gpointer ud)
  193. {
  194. rspamd_ftok_t srch;
  195. struct rspamd_content_type_param *found = NULL;
  196. struct rspamd_content_type *ct = (struct rspamd_content_type *)ud;
  197. RSPAMD_FTOK_ASSIGN (&srch, "charset");
  198. if (rspamd_ftok_casecmp (&param->name, &srch) == 0) {
  199. /* Adjust charset */
  200. found = param;
  201. ct->charset.begin = param->value.begin;
  202. ct->charset.len = param->value.len;
  203. }
  204. RSPAMD_FTOK_ASSIGN (&srch, "boundary");
  205. if (rspamd_ftok_casecmp (&param->name, &srch) == 0) {
  206. found = param;
  207. gchar *lc_boundary;
  208. /* Adjust boundary */
  209. lc_boundary = rspamd_mempool_alloc (pool, param->value.len);
  210. memcpy (lc_boundary, param->value.begin, param->value.len);
  211. rspamd_str_lc (lc_boundary, param->value.len);
  212. ct->boundary.begin = lc_boundary;
  213. ct->boundary.len = param->value.len;
  214. /* Preserve original (case sensitive) boundary */
  215. ct->orig_boundary.begin = param->value.begin;
  216. ct->orig_boundary.len = param->value.len;
  217. }
  218. if (!found) {
  219. /* Just lowercase */
  220. rspamd_str_lc ((gchar *)param->value.begin, param->value.len);
  221. }
  222. }
  223. static void
  224. rspamd_content_disposition_postprocess (rspamd_mempool_t *pool,
  225. struct rspamd_content_type_param *param,
  226. gpointer ud)
  227. {
  228. rspamd_ftok_t srch;
  229. struct rspamd_content_disposition *cd = (struct rspamd_content_disposition *)ud;
  230. srch.begin = "filename";
  231. srch.len = 8;
  232. if (rspamd_ftok_casecmp (&param->name, &srch) == 0) {
  233. /* Adjust filename */
  234. cd->filename.begin = param->value.begin;
  235. cd->filename.len = param->value.len;
  236. }
  237. }
  238. void
  239. rspamd_content_type_add_param (rspamd_mempool_t *pool,
  240. struct rspamd_content_type *ct,
  241. gchar *name_start, gchar *name_end,
  242. gchar *value_start, gchar *value_end)
  243. {
  244. struct rspamd_content_type_param *nparam;
  245. rspamd_ftok_t srch;
  246. struct rspamd_content_type_param *found = NULL;
  247. g_assert (ct != NULL);
  248. nparam = rspamd_mempool_alloc0 (pool, sizeof (*nparam));
  249. rspamd_str_lc (name_start, name_end - name_start);
  250. if (!rspamd_param_maybe_rfc2231_process (pool, nparam, name_start,
  251. name_end, value_start, value_end)) {
  252. nparam->name.begin = name_start;
  253. nparam->name.len = name_end - name_start;
  254. nparam->value.begin = value_start;
  255. nparam->value.len = value_end - value_start;
  256. }
  257. srch.begin = nparam->name.begin;
  258. srch.len = nparam->name.len;
  259. if (ct->attrs) {
  260. found = g_hash_table_lookup (ct->attrs, &srch);
  261. } else {
  262. ct->attrs = g_hash_table_new (rspamd_ftok_icase_hash,
  263. rspamd_ftok_icase_equal);
  264. }
  265. if (!found) {
  266. DL_APPEND (found, nparam);
  267. g_hash_table_insert (ct->attrs, &nparam->name, nparam);
  268. }
  269. else {
  270. DL_APPEND (found, nparam);
  271. }
  272. }
  273. static struct rspamd_content_type *
  274. rspamd_content_type_parser (gchar *in, gsize len, rspamd_mempool_t *pool)
  275. {
  276. guint obraces = 0, ebraces = 0, qlen = 0;
  277. gchar *p, *c, *end, *pname_start = NULL, *pname_end = NULL;
  278. struct rspamd_content_type *res = NULL, val;
  279. gboolean eqsign_seen = FALSE;
  280. enum {
  281. parse_type,
  282. parse_subtype,
  283. parse_after_subtype,
  284. parse_param_name,
  285. parse_param_after_name,
  286. parse_param_value,
  287. parse_param_value_after_quote,
  288. parse_space,
  289. parse_quoted,
  290. parse_comment,
  291. } state = parse_space, next_state = parse_type;
  292. p = in;
  293. c = p;
  294. end = p + len;
  295. memset (&val, 0, sizeof (val));
  296. val.cpy = in;
  297. while (p < end) {
  298. switch (state) {
  299. case parse_type:
  300. if (g_ascii_isspace (*p) || *p == ';') {
  301. /* We have type without subtype */
  302. val.type.begin = c;
  303. val.type.len = p - c;
  304. state = parse_after_subtype;
  305. } else if (*p == '/') {
  306. val.type.begin = c;
  307. val.type.len = p - c;
  308. state = parse_space;
  309. next_state = parse_subtype;
  310. p++;
  311. } else {
  312. p++;
  313. }
  314. break;
  315. case parse_subtype:
  316. if (g_ascii_isspace (*p) || *p == ';') {
  317. val.subtype.begin = c;
  318. val.subtype.len = p - c;
  319. state = parse_after_subtype;
  320. } else {
  321. p++;
  322. }
  323. break;
  324. case parse_after_subtype:
  325. if (*p == ';' || g_ascii_isspace (*p)) {
  326. p++;
  327. } else if (*p == '(') {
  328. c = p;
  329. state = parse_comment;
  330. next_state = parse_param_name;
  331. obraces = 1;
  332. ebraces = 0;
  333. pname_start = NULL;
  334. pname_end = NULL;
  335. eqsign_seen = FALSE;
  336. p++;
  337. } else {
  338. c = p;
  339. state = parse_param_name;
  340. pname_start = NULL;
  341. pname_end = NULL;
  342. eqsign_seen = FALSE;
  343. }
  344. break;
  345. case parse_param_name:
  346. if (*p == '=') {
  347. pname_start = c;
  348. pname_end = p;
  349. state = parse_param_after_name;
  350. eqsign_seen = TRUE;
  351. p++;
  352. } else if (g_ascii_isspace (*p)) {
  353. pname_start = c;
  354. pname_end = p;
  355. state = parse_param_after_name;
  356. } else {
  357. p++;
  358. }
  359. break;
  360. case parse_param_after_name:
  361. if (g_ascii_isspace (*p)) {
  362. p++;
  363. } else if (*p == '=') {
  364. if (eqsign_seen) {
  365. /* Treat as value start */
  366. c = p;
  367. eqsign_seen = FALSE;
  368. state = parse_space;
  369. next_state = parse_param_value;
  370. p++;
  371. } else {
  372. eqsign_seen = TRUE;
  373. p++;
  374. }
  375. } else {
  376. if (eqsign_seen) {
  377. state = parse_param_value;
  378. c = p;
  379. } else {
  380. /* Invalid parameter without value */
  381. c = p;
  382. state = parse_param_name;
  383. pname_start = NULL;
  384. pname_end = NULL;
  385. }
  386. }
  387. break;
  388. case parse_param_value:
  389. if (*p == '"') {
  390. p++;
  391. c = p;
  392. state = parse_quoted;
  393. next_state = parse_param_value_after_quote;
  394. } else if (g_ascii_isspace (*p)) {
  395. if (pname_start && pname_end && pname_end > pname_start) {
  396. rspamd_content_type_add_param (pool, &val, pname_start,
  397. pname_end, c, p);
  398. }
  399. state = parse_space;
  400. next_state = parse_param_name;
  401. pname_start = NULL;
  402. pname_end = NULL;
  403. } else if (*p == '(') {
  404. if (pname_start && pname_end && pname_end > pname_start) {
  405. rspamd_content_type_add_param (pool, &val, pname_start,
  406. pname_end, c, p);
  407. }
  408. obraces = 1;
  409. ebraces = 0;
  410. p++;
  411. state = parse_comment;
  412. next_state = parse_param_name;
  413. pname_start = NULL;
  414. pname_end = NULL;
  415. }
  416. else if (*p == ';') {
  417. if (pname_start && pname_end && pname_end > pname_start) {
  418. rspamd_content_type_add_param (pool, &val, pname_start,
  419. pname_end, c, p);
  420. }
  421. p ++;
  422. state = parse_space;
  423. next_state = parse_param_name;
  424. pname_start = NULL;
  425. pname_end = NULL;
  426. }
  427. else {
  428. p++;
  429. }
  430. break;
  431. case parse_param_value_after_quote:
  432. if (pname_start && pname_end && pname_end > pname_start) {
  433. rspamd_content_type_add_param (pool, &val, pname_start,
  434. pname_end, c, c + qlen);
  435. }
  436. if (*p == '"') {
  437. p ++;
  438. if (p == end) {
  439. /* Last quote: done... */
  440. break;
  441. }
  442. if (*p == ';') {
  443. p ++;
  444. state = parse_space;
  445. next_state = parse_param_name;
  446. pname_start = NULL;
  447. pname_end = NULL;
  448. continue;
  449. }
  450. }
  451. /* We should not normally be here in fact */
  452. if (g_ascii_isspace (*p)) {
  453. state = parse_space;
  454. next_state = parse_param_name;
  455. pname_start = NULL;
  456. pname_end = NULL;
  457. } else if (*p == '(') {
  458. obraces = 1;
  459. ebraces = 0;
  460. p++;
  461. state = parse_comment;
  462. next_state = parse_param_name;
  463. pname_start = NULL;
  464. pname_end = NULL;
  465. } else {
  466. state = parse_param_name;
  467. pname_start = NULL;
  468. pname_end = NULL;
  469. c = p;
  470. }
  471. break;
  472. case parse_quoted:
  473. if (*p == '\\') {
  474. /* Quoted pair */
  475. if (p + 1 < end) {
  476. p += 2;
  477. } else {
  478. p++;
  479. }
  480. } else if (*p == '"') {
  481. qlen = p - c;
  482. state = next_state;
  483. } else {
  484. p++;
  485. }
  486. break;
  487. case parse_comment:
  488. if (*p == '(') {
  489. obraces++;
  490. p++;
  491. } else if (*p == ')') {
  492. ebraces++;
  493. p++;
  494. if (ebraces == obraces && p < end) {
  495. if (g_ascii_isspace (*p)) {
  496. state = parse_space;
  497. } else {
  498. c = p;
  499. state = next_state;
  500. }
  501. }
  502. } else {
  503. p++;
  504. }
  505. break;
  506. case parse_space:
  507. if (g_ascii_isspace (*p)) {
  508. p++;
  509. } else if (*p == '(') {
  510. obraces = 1;
  511. ebraces = 0;
  512. p++;
  513. state = parse_comment;
  514. } else {
  515. c = p;
  516. state = next_state;
  517. }
  518. break;
  519. }
  520. }
  521. /* Process leftover */
  522. switch (state) {
  523. case parse_type:
  524. val.type.begin = c;
  525. val.type.len = p - c;
  526. break;
  527. case parse_subtype:
  528. val.subtype.begin = c;
  529. val.subtype.len = p - c;
  530. break;
  531. case parse_param_value:
  532. if (pname_start && pname_end && pname_end > pname_start) {
  533. if (p > c && *(p - 1) == ';') {
  534. p --;
  535. }
  536. rspamd_content_type_add_param (pool, &val, pname_start,
  537. pname_end, c, p);
  538. }
  539. break;
  540. case parse_param_value_after_quote:
  541. if (pname_start && pname_end && pname_end > pname_start) {
  542. rspamd_content_type_add_param (pool, &val, pname_start,
  543. pname_end, c, c + qlen);
  544. }
  545. break;
  546. default:
  547. break;
  548. }
  549. if (val.type.len > 0) {
  550. gchar *tmp;
  551. res = rspamd_mempool_alloc (pool, sizeof (val));
  552. memcpy (res, &val, sizeof (val));
  553. /*
  554. * Lowercase type and subtype as they are specified as case insensitive
  555. * in rfc2045 section 5.1
  556. */
  557. tmp = rspamd_mempool_alloc (pool, val.type.len);
  558. memcpy (tmp, val.type.begin, val.type.len);
  559. rspamd_str_lc (tmp, val.type.len);
  560. res->type.begin = tmp;
  561. if (val.subtype.len > 0) {
  562. tmp = rspamd_mempool_alloc (pool, val.subtype.len);
  563. memcpy (tmp, val.subtype.begin, val.subtype.len);
  564. rspamd_str_lc (tmp, val.subtype.len);
  565. res->subtype.begin = tmp;
  566. }
  567. }
  568. return res;
  569. }
  570. struct rspamd_content_type *
  571. rspamd_content_type_parse (const gchar *in,
  572. gsize len, rspamd_mempool_t *pool)
  573. {
  574. struct rspamd_content_type *res = NULL;
  575. rspamd_ftok_t srch;
  576. gchar *cpy;
  577. cpy = rspamd_mempool_alloc (pool, len + 1);
  578. rspamd_strlcpy (cpy, in, len + 1);
  579. if ((res = rspamd_content_type_parser (cpy, len, pool)) != NULL) {
  580. if (res->attrs) {
  581. rspamd_mempool_add_destructor (pool,
  582. (rspamd_mempool_destruct_t)g_hash_table_unref, res->attrs);
  583. rspamd_postprocess_ct_attributes (pool, res->attrs,
  584. rspamd_content_type_postprocess, res);
  585. }
  586. /* Now do some hacks to work with broken content types */
  587. if (res->subtype.len == 0) {
  588. res->flags |= RSPAMD_CONTENT_TYPE_BROKEN;
  589. RSPAMD_FTOK_ASSIGN (&srch, "text");
  590. if (rspamd_ftok_casecmp (&res->type, &srch) == 0) {
  591. /* Workaround for Content-Type: text */
  592. /* Assume text/plain */
  593. RSPAMD_FTOK_ASSIGN (&srch, "plain");
  594. }
  595. else {
  596. RSPAMD_FTOK_ASSIGN (&srch, "html");
  597. if (rspamd_ftok_casecmp (&res->type, &srch) == 0) {
  598. /* Workaround for Content-Type: html */
  599. RSPAMD_FTOK_ASSIGN (&res->type, "text");
  600. RSPAMD_FTOK_ASSIGN (&res->subtype, "html");
  601. }
  602. else {
  603. RSPAMD_FTOK_ASSIGN (&srch, "application");
  604. if (rspamd_ftok_casecmp (&res->type, &srch) == 0) {
  605. RSPAMD_FTOK_ASSIGN (&res->subtype, "octet-stream");
  606. }
  607. }
  608. }
  609. }
  610. else {
  611. /* Common mistake done by retards */
  612. RSPAMD_FTOK_ASSIGN (&srch, "alternate");
  613. if (rspamd_ftok_casecmp (&res->subtype, &srch) == 0) {
  614. res->flags |= RSPAMD_CONTENT_TYPE_BROKEN;
  615. RSPAMD_FTOK_ASSIGN (&res->subtype, "alternative");
  616. }
  617. }
  618. RSPAMD_FTOK_ASSIGN (&srch, "multipart");
  619. if (rspamd_ftok_casecmp (&res->type, &srch) == 0) {
  620. res->flags |= RSPAMD_CONTENT_TYPE_MULTIPART;
  621. RSPAMD_FTOK_ASSIGN (&srch, "encrypted");
  622. if (rspamd_ftok_casecmp (&res->subtype, &srch) == 0) {
  623. res->flags |= RSPAMD_CONTENT_TYPE_ENCRYPTED;
  624. }
  625. }
  626. else {
  627. RSPAMD_FTOK_ASSIGN (&srch, "text");
  628. if (rspamd_ftok_casecmp (&res->type, &srch) == 0) {
  629. res->flags |= RSPAMD_CONTENT_TYPE_TEXT;
  630. }
  631. else {
  632. RSPAMD_FTOK_ASSIGN (&srch, "message");
  633. if (rspamd_ftok_casecmp (&res->type, &srch) == 0) {
  634. RSPAMD_FTOK_ASSIGN (&srch, "delivery-status");
  635. if (rspamd_ftok_casecmp (&res->subtype, &srch) == 0) {
  636. res->flags |= RSPAMD_CONTENT_TYPE_TEXT|RSPAMD_CONTENT_TYPE_DSN;
  637. }
  638. else {
  639. RSPAMD_FTOK_ASSIGN (&srch, "notification");
  640. if (rspamd_substring_search_caseless (res->subtype.begin,
  641. res->subtype.len, srch.begin, srch.len) != -1) {
  642. res->flags |= RSPAMD_CONTENT_TYPE_TEXT|
  643. RSPAMD_CONTENT_TYPE_DSN;
  644. }
  645. else {
  646. res->flags |= RSPAMD_CONTENT_TYPE_MESSAGE;
  647. }
  648. }
  649. }
  650. }
  651. }
  652. }
  653. else {
  654. msg_warn_pool ("cannot parse content type: %*s", (gint)len, cpy);
  655. }
  656. return res;
  657. }
  658. void
  659. rspamd_content_disposition_add_param (rspamd_mempool_t *pool,
  660. struct rspamd_content_disposition *cd,
  661. const gchar *name_start, const gchar *name_end,
  662. const gchar *value_start, const gchar *value_end)
  663. {
  664. rspamd_ftok_t srch;
  665. gchar *name_cpy, *value_cpy, *name_cpy_end, *value_cpy_end;
  666. struct rspamd_content_type_param *found = NULL, *nparam;
  667. g_assert (cd != NULL);
  668. name_cpy = rspamd_mempool_alloc (pool, name_end - name_start);
  669. memcpy (name_cpy, name_start, name_end - name_start);
  670. name_cpy_end = name_cpy + (name_end - name_start);
  671. value_cpy = rspamd_mempool_alloc (pool, value_end - value_start);
  672. memcpy (value_cpy, value_start, value_end - value_start);
  673. value_cpy_end = value_cpy + (value_end - value_start);
  674. nparam = rspamd_mempool_alloc0 (pool, sizeof (*nparam));
  675. rspamd_str_lc (name_cpy, name_cpy_end - name_cpy);
  676. if (!rspamd_param_maybe_rfc2231_process (pool, nparam, name_cpy,
  677. name_cpy_end, value_cpy, value_cpy_end)) {
  678. nparam->name.begin = name_cpy;
  679. nparam->name.len = name_cpy_end - name_cpy;
  680. nparam->value.begin = value_cpy;
  681. nparam->value.len = value_cpy_end - value_cpy;
  682. }
  683. srch.begin = nparam->name.begin;
  684. srch.len = nparam->name.len;
  685. if (cd->attrs) {
  686. found = g_hash_table_lookup (cd->attrs, &srch);
  687. } else {
  688. cd->attrs = g_hash_table_new (rspamd_ftok_icase_hash,
  689. rspamd_ftok_icase_equal);
  690. }
  691. if (!found) {
  692. DL_APPEND (found, nparam);
  693. g_hash_table_insert (cd->attrs, &nparam->name, nparam);
  694. }
  695. else {
  696. DL_APPEND (found, nparam);
  697. }
  698. }
  699. struct rspamd_content_disposition *
  700. rspamd_content_disposition_parse (const gchar *in,
  701. gsize len, rspamd_mempool_t *pool)
  702. {
  703. struct rspamd_content_disposition *res = NULL, val;
  704. if (rspamd_content_disposition_parser (in, len, &val, pool)) {
  705. res = rspamd_mempool_alloc (pool, sizeof (val));
  706. memcpy (res, &val, sizeof (val));
  707. res->lc_data = rspamd_mempool_alloc (pool, len + 1);
  708. rspamd_strlcpy (res->lc_data, in, len + 1);
  709. rspamd_str_lc (res->lc_data, len);
  710. if (res->attrs) {
  711. rspamd_postprocess_ct_attributes (pool, res->attrs,
  712. rspamd_content_disposition_postprocess, res);
  713. rspamd_mempool_add_destructor (pool,
  714. (rspamd_mempool_destruct_t)g_hash_table_unref, res->attrs);
  715. }
  716. }
  717. else {
  718. msg_warn_pool ("cannot parse content disposition: %*s",
  719. (gint)len, in);
  720. }
  721. return res;
  722. }