You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

content_type.c 20KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854
  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "libmime/content_type.h"
  17. #include "smtp_parsers.h"
  18. #include "utlist.h"
  19. #include "libserver/url.h"
  20. #include "libmime/mime_encoding.h"
  21. static gboolean
  22. rspamd_rfc2231_decode (rspamd_mempool_t *pool,
  23. struct rspamd_content_type_param *param,
  24. gchar *value_start, gchar *value_end)
  25. {
  26. gchar *quote_pos;
  27. quote_pos = memchr (value_start, '\'', value_end - value_start);
  28. if (quote_pos == NULL) {
  29. /* Plain percent encoding */
  30. gsize r = rspamd_url_decode (value_start, value_start,
  31. value_end - value_start);
  32. param->value.begin = value_start;
  33. param->value.len = r;
  34. }
  35. else {
  36. /*
  37. * We can have encoding'language'data, or
  38. * encoding'data (in theory).
  39. * Try to handle both...
  40. */
  41. const gchar *charset = NULL;
  42. rspamd_ftok_t ctok;
  43. ctok.begin = value_start;
  44. ctok.len = quote_pos - value_start;
  45. if (ctok.len > 0) {
  46. charset = rspamd_mime_detect_charset (&ctok, pool);
  47. }
  48. /* Now, we can check for either next quote sign or, eh, ignore that */
  49. value_start = quote_pos + 1;
  50. quote_pos = memchr (value_start, '\'', value_end - value_start);
  51. if (quote_pos) {
  52. /* Ignore language */
  53. value_start = quote_pos + 1;
  54. }
  55. /* Perform percent decoding */
  56. gsize r = rspamd_url_decode (value_start, value_start,
  57. value_end - value_start);
  58. GError *err = NULL;
  59. if (charset == NULL) {
  60. /* Try heuristic */
  61. charset = rspamd_mime_charset_find_by_content (value_start, r, TRUE);
  62. }
  63. if (charset == NULL) {
  64. msg_warn_pool ("cannot convert parameter from charset %T", &ctok);
  65. return FALSE;
  66. }
  67. param->value.begin = rspamd_mime_text_to_utf8 (pool,
  68. value_start, r,
  69. charset, &param->value.len, &err);
  70. if (param->value.begin == NULL) {
  71. msg_warn_pool ("cannot convert parameter from charset %s: %e",
  72. charset, err);
  73. if (err) {
  74. g_error_free (err);
  75. }
  76. return FALSE;
  77. }
  78. }
  79. param->flags |= RSPAMD_CONTENT_PARAM_RFC2231;
  80. return TRUE;
  81. }
  82. static gboolean
  83. rspamd_param_maybe_rfc2231_process (rspamd_mempool_t *pool,
  84. struct rspamd_content_type_param *param,
  85. gchar *name_start, gchar *name_end,
  86. gchar *value_start, gchar *value_end)
  87. {
  88. const gchar *star_pos;
  89. star_pos = memchr (name_start, '*', name_end - name_start);
  90. if (star_pos == NULL) {
  91. return FALSE;
  92. }
  93. /* We have three possibilities here:
  94. * 1. name* (just name + 2231 encoding)
  95. * 2. name*(\d+) (piecewise stuff but no rfc2231 encoding)
  96. * 3. name*(\d+)* (piecewise stuff and rfc2231 encoding)
  97. */
  98. if (star_pos == name_end - 1) {
  99. /* First */
  100. if (rspamd_rfc2231_decode (pool, param, value_start, value_end)) {
  101. param->name.begin = name_start;
  102. param->name.len = name_end - name_start - 1;
  103. }
  104. }
  105. else if (*(name_end - 1) == '*') {
  106. /* Third */
  107. /* Check number */
  108. gulong tmp;
  109. if (!rspamd_strtoul (star_pos + 1, name_end - star_pos - 2, &tmp)) {
  110. return FALSE;
  111. }
  112. param->flags |= RSPAMD_CONTENT_PARAM_PIECEWISE|RSPAMD_CONTENT_PARAM_RFC2231;
  113. param->rfc2231_id = tmp;
  114. param->name.begin = name_start;
  115. param->name.len = star_pos - name_start;
  116. param->value.begin = value_start;
  117. param->value.len = value_end - value_start;
  118. /* Deal with that later... */
  119. }
  120. else {
  121. /* Second case */
  122. gulong tmp;
  123. if (!rspamd_strtoul (star_pos + 1, name_end - star_pos - 1, &tmp)) {
  124. return FALSE;
  125. }
  126. param->flags |= RSPAMD_CONTENT_PARAM_PIECEWISE;
  127. param->rfc2231_id = tmp;
  128. param->name.begin = name_start;
  129. param->name.len = star_pos - name_start;
  130. param->value.begin = value_start;
  131. param->value.len = value_end - value_start;
  132. }
  133. return TRUE;
  134. }
  135. static gint32
  136. rspamd_cmp_pieces (struct rspamd_content_type_param *p1, struct rspamd_content_type_param *p2)
  137. {
  138. return p1->rfc2231_id - p2->rfc2231_id;
  139. }
  140. static void
  141. rspamd_postprocess_ct_attributes (rspamd_mempool_t *pool,
  142. GHashTable *htb,
  143. void (*proc)(rspamd_mempool_t *, struct rspamd_content_type_param *, gpointer ud),
  144. gpointer procd)
  145. {
  146. GHashTableIter it;
  147. gpointer k, v;
  148. struct rspamd_content_type_param *param, *sorted, *cur;
  149. if (htb == NULL) {
  150. return;
  151. }
  152. g_hash_table_iter_init (&it, htb);
  153. while (g_hash_table_iter_next (&it, &k, &v)) {
  154. param = (struct rspamd_content_type_param *)v;
  155. if (param->flags & RSPAMD_CONTENT_PARAM_PIECEWISE) {
  156. /* Reconstruct param */
  157. gsize tlen = 0;
  158. gchar *ndata, *pos;
  159. sorted = param;
  160. DL_SORT (sorted, rspamd_cmp_pieces);
  161. DL_FOREACH (sorted, cur) {
  162. tlen += cur->value.len;
  163. }
  164. ndata = rspamd_mempool_alloc (pool, tlen);
  165. pos = ndata;
  166. DL_FOREACH (sorted, cur) {
  167. memcpy (pos, cur->value.begin, cur->value.len);
  168. pos += cur->value.len;
  169. }
  170. if (param->flags & RSPAMD_CONTENT_PARAM_RFC2231) {
  171. if (!rspamd_rfc2231_decode (pool, param,
  172. ndata, pos)) {
  173. param->flags |= RSPAMD_CONTENT_PARAM_BROKEN;
  174. param->value.begin = ndata;
  175. param->value.len = tlen;
  176. }
  177. }
  178. else {
  179. param->value.begin = ndata;
  180. param->value.len = tlen;
  181. }
  182. /* Detach from list */
  183. param->next = NULL;
  184. param->prev = param;
  185. }
  186. gboolean invalid_utf = FALSE;
  187. if (param->value.begin != NULL && param->value.len > 0) {
  188. param->value.begin = rspamd_mime_header_decode(pool, param->value.begin,
  189. param->value.len, &invalid_utf);
  190. param->value.len = strlen(param->value.begin);
  191. }
  192. if (invalid_utf) {
  193. param->flags |= RSPAMD_CONTENT_PARAM_BROKEN;
  194. }
  195. proc (pool, param, procd);
  196. }
  197. }
  198. static void
  199. rspamd_content_type_postprocess (rspamd_mempool_t *pool,
  200. struct rspamd_content_type_param *param,
  201. gpointer ud)
  202. {
  203. rspamd_ftok_t srch;
  204. struct rspamd_content_type_param *found = NULL;
  205. struct rspamd_content_type *ct = (struct rspamd_content_type *)ud;
  206. RSPAMD_FTOK_ASSIGN (&srch, "charset");
  207. if (rspamd_ftok_icase_equal (&param->name, &srch)) {
  208. /* Adjust charset */
  209. found = param;
  210. ct->charset.begin = param->value.begin;
  211. ct->charset.len = param->value.len;
  212. }
  213. RSPAMD_FTOK_ASSIGN (&srch, "boundary");
  214. if (rspamd_ftok_icase_equal (&param->name, &srch)) {
  215. found = param;
  216. gchar *lc_boundary;
  217. /* Adjust boundary */
  218. lc_boundary = rspamd_mempool_alloc (pool, param->value.len);
  219. memcpy (lc_boundary, param->value.begin, param->value.len);
  220. rspamd_str_lc (lc_boundary, param->value.len);
  221. ct->boundary.begin = lc_boundary;
  222. ct->boundary.len = param->value.len;
  223. /* Preserve original (case sensitive) boundary */
  224. ct->orig_boundary.begin = param->value.begin;
  225. ct->orig_boundary.len = param->value.len;
  226. }
  227. if (!found) {
  228. RSPAMD_FTOK_ASSIGN (&srch, "name");
  229. if (!rspamd_ftok_icase_equal (&param->name, &srch)) {
  230. /* Just lowercase */
  231. rspamd_str_lc_utf8 ((gchar *) param->value.begin, param->value.len);
  232. }
  233. }
  234. }
  235. static void
  236. rspamd_content_disposition_postprocess (rspamd_mempool_t *pool,
  237. struct rspamd_content_type_param *param,
  238. gpointer ud)
  239. {
  240. rspamd_ftok_t srch;
  241. struct rspamd_content_disposition *cd = (struct rspamd_content_disposition *)ud;
  242. srch.begin = "filename";
  243. srch.len = 8;
  244. if (rspamd_ftok_icase_equal (&param->name, &srch)) {
  245. /* Adjust filename */
  246. cd->filename.begin = param->value.begin;
  247. cd->filename.len = param->value.len;
  248. }
  249. }
  250. void
  251. rspamd_content_type_add_param (rspamd_mempool_t *pool,
  252. struct rspamd_content_type *ct,
  253. gchar *name_start, gchar *name_end,
  254. gchar *value_start, gchar *value_end)
  255. {
  256. struct rspamd_content_type_param *nparam;
  257. rspamd_ftok_t srch;
  258. struct rspamd_content_type_param *found = NULL;
  259. g_assert (ct != NULL);
  260. nparam = rspamd_mempool_alloc0 (pool, sizeof (*nparam));
  261. rspamd_str_lc (name_start, name_end - name_start);
  262. if (!rspamd_param_maybe_rfc2231_process (pool, nparam, name_start,
  263. name_end, value_start, value_end)) {
  264. nparam->name.begin = name_start;
  265. nparam->name.len = name_end - name_start;
  266. nparam->value.begin = value_start;
  267. nparam->value.len = value_end - value_start;
  268. }
  269. srch.begin = nparam->name.begin;
  270. srch.len = nparam->name.len;
  271. if (ct->attrs) {
  272. found = g_hash_table_lookup (ct->attrs, &srch);
  273. } else {
  274. ct->attrs = g_hash_table_new (rspamd_ftok_icase_hash,
  275. rspamd_ftok_icase_equal);
  276. }
  277. if (!found) {
  278. DL_APPEND (found, nparam);
  279. g_hash_table_insert (ct->attrs, &nparam->name, nparam);
  280. }
  281. else {
  282. DL_APPEND (found, nparam);
  283. }
  284. }
  285. static struct rspamd_content_type *
  286. rspamd_content_type_parser (gchar *in, gsize len, rspamd_mempool_t *pool)
  287. {
  288. guint obraces = 0, ebraces = 0, qlen = 0;
  289. gchar *p, *c, *end, *pname_start = NULL, *pname_end = NULL;
  290. struct rspamd_content_type *res = NULL, val;
  291. gboolean eqsign_seen = FALSE;
  292. enum {
  293. parse_type,
  294. parse_subtype,
  295. parse_after_subtype,
  296. parse_param_name,
  297. parse_param_after_name,
  298. parse_param_value,
  299. parse_param_value_after_quote,
  300. parse_space,
  301. parse_quoted,
  302. parse_comment,
  303. } state = parse_space, next_state = parse_type;
  304. p = in;
  305. c = p;
  306. end = p + len;
  307. memset (&val, 0, sizeof (val));
  308. val.cpy = in;
  309. while (p < end) {
  310. switch (state) {
  311. case parse_type:
  312. if (g_ascii_isspace (*p) || *p == ';') {
  313. /* We have type without subtype */
  314. val.type.begin = c;
  315. val.type.len = p - c;
  316. state = parse_after_subtype;
  317. } else if (*p == '/') {
  318. val.type.begin = c;
  319. val.type.len = p - c;
  320. state = parse_space;
  321. next_state = parse_subtype;
  322. p++;
  323. } else {
  324. p++;
  325. }
  326. break;
  327. case parse_subtype:
  328. if (g_ascii_isspace (*p) || *p == ';') {
  329. val.subtype.begin = c;
  330. val.subtype.len = p - c;
  331. state = parse_after_subtype;
  332. } else {
  333. p++;
  334. }
  335. break;
  336. case parse_after_subtype:
  337. if (*p == ';' || g_ascii_isspace (*p)) {
  338. p++;
  339. } else if (*p == '(') {
  340. c = p;
  341. state = parse_comment;
  342. next_state = parse_param_name;
  343. obraces = 1;
  344. ebraces = 0;
  345. pname_start = NULL;
  346. pname_end = NULL;
  347. eqsign_seen = FALSE;
  348. p++;
  349. } else {
  350. c = p;
  351. state = parse_param_name;
  352. pname_start = NULL;
  353. pname_end = NULL;
  354. eqsign_seen = FALSE;
  355. }
  356. break;
  357. case parse_param_name:
  358. if (*p == '=') {
  359. pname_start = c;
  360. pname_end = p;
  361. state = parse_param_after_name;
  362. eqsign_seen = TRUE;
  363. p++;
  364. } else if (g_ascii_isspace (*p)) {
  365. pname_start = c;
  366. pname_end = p;
  367. state = parse_param_after_name;
  368. } else {
  369. p++;
  370. }
  371. break;
  372. case parse_param_after_name:
  373. if (g_ascii_isspace (*p)) {
  374. p++;
  375. } else if (*p == '=') {
  376. if (eqsign_seen) {
  377. /* Treat as value start */
  378. c = p;
  379. eqsign_seen = FALSE;
  380. state = parse_param_value;
  381. p++;
  382. } else {
  383. eqsign_seen = TRUE;
  384. p++;
  385. }
  386. } else {
  387. if (eqsign_seen) {
  388. state = parse_param_value;
  389. c = p;
  390. } else {
  391. /* Invalid parameter without value */
  392. c = p;
  393. state = parse_param_name;
  394. pname_start = NULL;
  395. pname_end = NULL;
  396. }
  397. }
  398. break;
  399. case parse_param_value:
  400. if (*p == '"') {
  401. p++;
  402. c = p;
  403. state = parse_quoted;
  404. next_state = parse_param_value_after_quote;
  405. } else if (g_ascii_isspace (*p)) {
  406. if (pname_start && pname_end && pname_end > pname_start) {
  407. rspamd_content_type_add_param (pool, &val, pname_start,
  408. pname_end, c, p);
  409. }
  410. state = parse_space;
  411. next_state = parse_param_name;
  412. pname_start = NULL;
  413. pname_end = NULL;
  414. } else if (*p == '(') {
  415. if (pname_start && pname_end && pname_end > pname_start) {
  416. rspamd_content_type_add_param (pool, &val, pname_start,
  417. pname_end, c, p);
  418. }
  419. obraces = 1;
  420. ebraces = 0;
  421. p++;
  422. state = parse_comment;
  423. next_state = parse_param_name;
  424. pname_start = NULL;
  425. pname_end = NULL;
  426. }
  427. else if (*p == ';') {
  428. if (pname_start && pname_end && pname_end > pname_start) {
  429. rspamd_content_type_add_param (pool, &val, pname_start,
  430. pname_end, c, p);
  431. }
  432. p ++;
  433. state = parse_space;
  434. next_state = parse_param_name;
  435. pname_start = NULL;
  436. pname_end = NULL;
  437. }
  438. else {
  439. p++;
  440. }
  441. break;
  442. case parse_param_value_after_quote:
  443. if (pname_start && pname_end && pname_end > pname_start) {
  444. rspamd_content_type_add_param (pool, &val, pname_start,
  445. pname_end, c, c + qlen);
  446. }
  447. if (*p == '"') {
  448. p ++;
  449. if (p == end) {
  450. /* Last quote: done... */
  451. state = parse_space;
  452. break;
  453. }
  454. if (*p == ';') {
  455. p ++;
  456. state = parse_space;
  457. next_state = parse_param_name;
  458. pname_start = NULL;
  459. pname_end = NULL;
  460. continue;
  461. }
  462. }
  463. /* We should not normally be here in fact */
  464. if (g_ascii_isspace (*p)) {
  465. state = parse_space;
  466. next_state = parse_param_name;
  467. pname_start = NULL;
  468. pname_end = NULL;
  469. } else if (*p == '(') {
  470. obraces = 1;
  471. ebraces = 0;
  472. p++;
  473. state = parse_comment;
  474. next_state = parse_param_name;
  475. pname_start = NULL;
  476. pname_end = NULL;
  477. } else {
  478. state = parse_param_name;
  479. pname_start = NULL;
  480. pname_end = NULL;
  481. c = p;
  482. }
  483. break;
  484. case parse_quoted:
  485. if (*p == '\\') {
  486. /* Quoted pair */
  487. if (p + 1 < end) {
  488. p += 2;
  489. } else {
  490. p++;
  491. }
  492. } else if (*p == '"') {
  493. qlen = p - c;
  494. state = next_state;
  495. } else {
  496. p++;
  497. }
  498. break;
  499. case parse_comment:
  500. if (*p == '(') {
  501. obraces++;
  502. p++;
  503. } else if (*p == ')') {
  504. ebraces++;
  505. p++;
  506. if (ebraces == obraces && p < end) {
  507. if (g_ascii_isspace (*p)) {
  508. state = parse_space;
  509. } else {
  510. c = p;
  511. state = next_state;
  512. }
  513. }
  514. } else {
  515. p++;
  516. }
  517. break;
  518. case parse_space:
  519. if (g_ascii_isspace (*p)) {
  520. p++;
  521. } else if (*p == '(') {
  522. obraces = 1;
  523. ebraces = 0;
  524. p++;
  525. state = parse_comment;
  526. } else {
  527. c = p;
  528. state = next_state;
  529. }
  530. break;
  531. }
  532. }
  533. /* Process leftover */
  534. switch (state) {
  535. case parse_type:
  536. val.type.begin = c;
  537. val.type.len = p - c;
  538. break;
  539. case parse_subtype:
  540. val.subtype.begin = c;
  541. val.subtype.len = p - c;
  542. break;
  543. case parse_param_value:
  544. if (pname_start && pname_end && pname_end > pname_start) {
  545. if (p > c && *(p - 1) == ';') {
  546. p --;
  547. }
  548. rspamd_content_type_add_param (pool, &val, pname_start,
  549. pname_end, c, p);
  550. }
  551. break;
  552. case parse_param_value_after_quote:
  553. if (pname_start && pname_end && pname_end > pname_start) {
  554. rspamd_content_type_add_param (pool, &val, pname_start,
  555. pname_end, c, c + qlen);
  556. }
  557. break;
  558. default:
  559. break;
  560. }
  561. if (val.type.len > 0) {
  562. gchar *tmp;
  563. res = rspamd_mempool_alloc (pool, sizeof (val));
  564. memcpy (res, &val, sizeof (val));
  565. /*
  566. * Lowercase type and subtype as they are specified as case insensitive
  567. * in rfc2045 section 5.1
  568. */
  569. tmp = rspamd_mempool_alloc (pool, val.type.len);
  570. memcpy (tmp, val.type.begin, val.type.len);
  571. rspamd_str_lc (tmp, val.type.len);
  572. res->type.begin = tmp;
  573. if (val.subtype.len > 0) {
  574. tmp = rspamd_mempool_alloc (pool, val.subtype.len);
  575. memcpy (tmp, val.subtype.begin, val.subtype.len);
  576. rspamd_str_lc (tmp, val.subtype.len);
  577. res->subtype.begin = tmp;
  578. }
  579. }
  580. return res;
  581. }
  582. struct rspamd_content_type *
  583. rspamd_content_type_parse (const gchar *in,
  584. gsize len, rspamd_mempool_t *pool)
  585. {
  586. struct rspamd_content_type *res = NULL;
  587. rspamd_ftok_t srch;
  588. gchar *cpy;
  589. cpy = rspamd_mempool_alloc (pool, len + 1);
  590. rspamd_strlcpy (cpy, in, len + 1);
  591. if ((res = rspamd_content_type_parser (cpy, len, pool)) != NULL) {
  592. if (res->attrs) {
  593. rspamd_mempool_add_destructor (pool,
  594. (rspamd_mempool_destruct_t)g_hash_table_unref, res->attrs);
  595. rspamd_postprocess_ct_attributes (pool, res->attrs,
  596. rspamd_content_type_postprocess, res);
  597. }
  598. /* Now do some hacks to work with broken content types */
  599. if (res->subtype.len == 0) {
  600. res->flags |= RSPAMD_CONTENT_TYPE_BROKEN;
  601. RSPAMD_FTOK_ASSIGN (&srch, "text");
  602. if (rspamd_ftok_casecmp (&res->type, &srch) == 0) {
  603. /* Workaround for Content-Type: text */
  604. /* Assume text/plain */
  605. RSPAMD_FTOK_ASSIGN (&srch, "plain");
  606. }
  607. else {
  608. RSPAMD_FTOK_ASSIGN (&srch, "html");
  609. if (rspamd_ftok_casecmp (&res->type, &srch) == 0) {
  610. /* Workaround for Content-Type: html */
  611. RSPAMD_FTOK_ASSIGN (&res->type, "text");
  612. RSPAMD_FTOK_ASSIGN (&res->subtype, "html");
  613. }
  614. else {
  615. RSPAMD_FTOK_ASSIGN (&srch, "application");
  616. if (rspamd_ftok_casecmp (&res->type, &srch) == 0) {
  617. RSPAMD_FTOK_ASSIGN (&res->subtype, "octet-stream");
  618. }
  619. }
  620. }
  621. }
  622. else {
  623. /* Common mistake done by retards */
  624. RSPAMD_FTOK_ASSIGN (&srch, "alternate");
  625. if (rspamd_ftok_casecmp (&res->subtype, &srch) == 0) {
  626. res->flags |= RSPAMD_CONTENT_TYPE_BROKEN;
  627. RSPAMD_FTOK_ASSIGN (&res->subtype, "alternative");
  628. }
  629. /* PKCS7 smime */
  630. RSPAMD_FTOK_ASSIGN (&srch, "pkcs7-mime");
  631. if (rspamd_substring_search (res->subtype.begin, res->subtype.len,
  632. srch.begin, srch.len) != -1) {
  633. res->flags |= RSPAMD_CONTENT_TYPE_SMIME;
  634. }
  635. }
  636. RSPAMD_FTOK_ASSIGN (&srch, "multipart");
  637. if (rspamd_ftok_casecmp (&res->type, &srch) == 0) {
  638. res->flags |= RSPAMD_CONTENT_TYPE_MULTIPART;
  639. RSPAMD_FTOK_ASSIGN (&srch, "encrypted");
  640. if (rspamd_ftok_casecmp (&res->subtype, &srch) == 0) {
  641. res->flags |= RSPAMD_CONTENT_TYPE_ENCRYPTED;
  642. }
  643. }
  644. else {
  645. RSPAMD_FTOK_ASSIGN (&srch, "text");
  646. if (rspamd_ftok_casecmp (&res->type, &srch) == 0) {
  647. res->flags |= RSPAMD_CONTENT_TYPE_TEXT;
  648. }
  649. else {
  650. RSPAMD_FTOK_ASSIGN (&srch, "message");
  651. if (rspamd_ftok_casecmp (&res->type, &srch) == 0) {
  652. RSPAMD_FTOK_ASSIGN (&srch, "delivery-status");
  653. if (rspamd_ftok_casecmp (&res->subtype, &srch) == 0) {
  654. res->flags |= RSPAMD_CONTENT_TYPE_TEXT|RSPAMD_CONTENT_TYPE_DSN;
  655. }
  656. else {
  657. RSPAMD_FTOK_ASSIGN (&srch, "notification");
  658. if (rspamd_substring_search_caseless (res->subtype.begin,
  659. res->subtype.len, srch.begin, srch.len) != -1) {
  660. res->flags |= RSPAMD_CONTENT_TYPE_TEXT|
  661. RSPAMD_CONTENT_TYPE_DSN;
  662. }
  663. else {
  664. res->flags |= RSPAMD_CONTENT_TYPE_MESSAGE;
  665. }
  666. }
  667. }
  668. }
  669. }
  670. }
  671. else {
  672. msg_warn_pool ("cannot parse content type: %*s", (gint)len, cpy);
  673. }
  674. return res;
  675. }
  676. void
  677. rspamd_content_disposition_add_param (rspamd_mempool_t *pool,
  678. struct rspamd_content_disposition *cd,
  679. const gchar *name_start, const gchar *name_end,
  680. const gchar *value_start, const gchar *value_end)
  681. {
  682. rspamd_ftok_t srch;
  683. gchar *name_cpy, *value_cpy, *name_cpy_end, *value_cpy_end;
  684. struct rspamd_content_type_param *found = NULL, *nparam;
  685. g_assert (cd != NULL);
  686. name_cpy = rspamd_mempool_alloc (pool, name_end - name_start);
  687. memcpy (name_cpy, name_start, name_end - name_start);
  688. name_cpy_end = name_cpy + (name_end - name_start);
  689. value_cpy = rspamd_mempool_alloc (pool, value_end - value_start);
  690. memcpy (value_cpy, value_start, value_end - value_start);
  691. value_cpy_end = value_cpy + (value_end - value_start);
  692. nparam = rspamd_mempool_alloc0 (pool, sizeof (*nparam));
  693. rspamd_str_lc (name_cpy, name_cpy_end - name_cpy);
  694. if (!rspamd_param_maybe_rfc2231_process (pool, nparam, name_cpy,
  695. name_cpy_end, value_cpy, value_cpy_end)) {
  696. nparam->name.begin = name_cpy;
  697. nparam->name.len = name_cpy_end - name_cpy;
  698. nparam->value.begin = value_cpy;
  699. nparam->value.len = value_cpy_end - value_cpy;
  700. }
  701. srch.begin = nparam->name.begin;
  702. srch.len = nparam->name.len;
  703. if (cd->attrs) {
  704. found = g_hash_table_lookup (cd->attrs, &srch);
  705. } else {
  706. cd->attrs = g_hash_table_new (rspamd_ftok_icase_hash,
  707. rspamd_ftok_icase_equal);
  708. }
  709. if (!found) {
  710. DL_APPEND (found, nparam);
  711. g_hash_table_insert (cd->attrs, &nparam->name, nparam);
  712. }
  713. else {
  714. DL_APPEND (found, nparam);
  715. }
  716. }
  717. struct rspamd_content_disposition *
  718. rspamd_content_disposition_parse (const gchar *in,
  719. gsize len, rspamd_mempool_t *pool)
  720. {
  721. struct rspamd_content_disposition *res = NULL, val;
  722. if (rspamd_content_disposition_parser (in, len, &val, pool)) {
  723. res = rspamd_mempool_alloc (pool, sizeof (val));
  724. memcpy (res, &val, sizeof (val));
  725. res->lc_data = rspamd_mempool_alloc (pool, len + 1);
  726. rspamd_strlcpy (res->lc_data, in, len + 1);
  727. rspamd_str_lc (res->lc_data, len);
  728. if (res->attrs) {
  729. rspamd_postprocess_ct_attributes (pool, res->attrs,
  730. rspamd_content_disposition_postprocess, res);
  731. rspamd_mempool_add_destructor (pool,
  732. (rspamd_mempool_destruct_t)g_hash_table_unref, res->attrs);
  733. }
  734. }
  735. else {
  736. msg_warn_pool ("cannot parse content disposition: %*s",
  737. (gint)len, in);
  738. }
  739. return res;
  740. }