You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

content_type.c 21KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884
  1. /*
  2. * Copyright 2024 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "libmime/content_type.h"
  17. #include "smtp_parsers.h"
  18. #include "utlist.h"
  19. #include "libserver/url.h"
  20. #include "libmime/mime_encoding.h"
  21. static gboolean
  22. rspamd_rfc2231_decode(rspamd_mempool_t *pool,
  23. struct rspamd_content_type_param *param,
  24. gchar *value_start, gchar *value_end)
  25. {
  26. gchar *quote_pos;
  27. quote_pos = memchr(value_start, '\'', value_end - value_start);
  28. if (quote_pos == NULL) {
  29. /* Plain percent encoding */
  30. gsize r = rspamd_url_decode(value_start, value_start,
  31. value_end - value_start);
  32. param->value.begin = value_start;
  33. param->value.len = r;
  34. }
  35. else {
  36. /*
  37. * We can have encoding'language'data, or
  38. * encoding'data (in theory).
  39. * Try to handle both...
  40. */
  41. const gchar *charset = NULL;
  42. rspamd_ftok_t ctok;
  43. ctok.begin = value_start;
  44. ctok.len = quote_pos - value_start;
  45. if (ctok.len > 0) {
  46. charset = rspamd_mime_detect_charset(&ctok, pool);
  47. }
  48. /* Now, we can check for either next quote sign or, eh, ignore that */
  49. value_start = quote_pos + 1;
  50. quote_pos = memchr(value_start, '\'', value_end - value_start);
  51. if (quote_pos) {
  52. /* Ignore language */
  53. value_start = quote_pos + 1;
  54. }
  55. /* Perform percent decoding */
  56. gsize r = rspamd_url_decode(value_start, value_start,
  57. value_end - value_start);
  58. GError *err = NULL;
  59. if (charset == NULL) {
  60. /* Try heuristic */
  61. charset = rspamd_mime_charset_find_by_content(value_start, r, TRUE);
  62. }
  63. if (charset == NULL) {
  64. msg_warn_pool("cannot convert parameter from charset %T", &ctok);
  65. return FALSE;
  66. }
  67. param->value.begin = rspamd_mime_text_to_utf8(pool,
  68. value_start, r,
  69. charset, &param->value.len, &err);
  70. if (param->value.begin == NULL) {
  71. msg_warn_pool("cannot convert parameter from charset %s: %e",
  72. charset, err);
  73. if (err) {
  74. g_error_free(err);
  75. }
  76. return FALSE;
  77. }
  78. }
  79. param->flags |= RSPAMD_CONTENT_PARAM_RFC2231;
  80. return TRUE;
  81. }
  82. static gboolean
  83. rspamd_param_maybe_rfc2231_process(rspamd_mempool_t *pool,
  84. struct rspamd_content_type_param *param,
  85. gchar *name_start, gchar *name_end,
  86. gchar *value_start, gchar *value_end)
  87. {
  88. const gchar *star_pos;
  89. star_pos = memchr(name_start, '*', name_end - name_start);
  90. if (star_pos == NULL) {
  91. return FALSE;
  92. }
  93. /* We have three possibilities here:
  94. * 1. name* (just name + 2231 encoding)
  95. * 2. name*(\d+) (piecewise stuff but no rfc2231 encoding)
  96. * 3. name*(\d+)* (piecewise stuff and rfc2231 encoding)
  97. */
  98. if (star_pos == name_end - 1) {
  99. /* First */
  100. if (rspamd_rfc2231_decode(pool, param, value_start, value_end)) {
  101. param->name.begin = name_start;
  102. param->name.len = name_end - name_start - 1;
  103. }
  104. }
  105. else if (*(name_end - 1) == '*') {
  106. /* Third */
  107. /* Check number */
  108. gulong tmp;
  109. if (!rspamd_strtoul(star_pos + 1, name_end - star_pos - 2, &tmp)) {
  110. return FALSE;
  111. }
  112. param->flags |= RSPAMD_CONTENT_PARAM_PIECEWISE | RSPAMD_CONTENT_PARAM_RFC2231;
  113. param->rfc2231_id = tmp;
  114. param->name.begin = name_start;
  115. param->name.len = star_pos - name_start;
  116. param->value.begin = value_start;
  117. param->value.len = value_end - value_start;
  118. /* Deal with that later... */
  119. }
  120. else {
  121. /* Second case */
  122. gulong tmp;
  123. if (!rspamd_strtoul(star_pos + 1, name_end - star_pos - 1, &tmp)) {
  124. return FALSE;
  125. }
  126. param->flags |= RSPAMD_CONTENT_PARAM_PIECEWISE;
  127. param->rfc2231_id = tmp;
  128. param->name.begin = name_start;
  129. param->name.len = star_pos - name_start;
  130. param->value.begin = value_start;
  131. param->value.len = value_end - value_start;
  132. }
  133. return TRUE;
  134. }
  135. static int32_t
  136. rspamd_cmp_pieces(struct rspamd_content_type_param *p1, struct rspamd_content_type_param *p2)
  137. {
  138. return p1->rfc2231_id - p2->rfc2231_id;
  139. }
  140. static void
  141. rspamd_postprocess_ct_attributes(rspamd_mempool_t *pool,
  142. GHashTable *htb,
  143. void (*proc)(rspamd_mempool_t *, struct rspamd_content_type_param *, gpointer ud),
  144. gpointer procd)
  145. {
  146. GHashTableIter it;
  147. gpointer k, v;
  148. struct rspamd_content_type_param *param, *sorted, *cur;
  149. if (htb == NULL) {
  150. return;
  151. }
  152. g_hash_table_iter_init(&it, htb);
  153. while (g_hash_table_iter_next(&it, &k, &v)) {
  154. param = (struct rspamd_content_type_param *) v;
  155. if (param->flags & RSPAMD_CONTENT_PARAM_PIECEWISE) {
  156. /* Reconstruct param */
  157. gsize tlen = 0;
  158. gchar *ndata, *pos;
  159. sorted = param;
  160. DL_SORT(sorted, rspamd_cmp_pieces);
  161. DL_FOREACH(sorted, cur)
  162. {
  163. tlen += cur->value.len;
  164. }
  165. ndata = rspamd_mempool_alloc(pool, tlen);
  166. pos = ndata;
  167. DL_FOREACH(sorted, cur)
  168. {
  169. memcpy(pos, cur->value.begin, cur->value.len);
  170. pos += cur->value.len;
  171. }
  172. if (param->flags & RSPAMD_CONTENT_PARAM_RFC2231) {
  173. if (!rspamd_rfc2231_decode(pool, param,
  174. ndata, pos)) {
  175. param->flags |= RSPAMD_CONTENT_PARAM_BROKEN;
  176. param->value.begin = ndata;
  177. param->value.len = tlen;
  178. }
  179. }
  180. else {
  181. param->value.begin = ndata;
  182. param->value.len = tlen;
  183. }
  184. /* Detach from list */
  185. param->next = NULL;
  186. param->prev = param;
  187. }
  188. gboolean invalid_utf = FALSE;
  189. if (param->value.begin != NULL && param->value.len > 0) {
  190. param->value.begin = rspamd_mime_header_decode(pool, param->value.begin,
  191. param->value.len, &invalid_utf);
  192. param->value.len = strlen(param->value.begin);
  193. }
  194. if (invalid_utf) {
  195. param->flags |= RSPAMD_CONTENT_PARAM_BROKEN;
  196. }
  197. proc(pool, param, procd);
  198. }
  199. }
  200. static void
  201. rspamd_content_type_postprocess(rspamd_mempool_t *pool,
  202. struct rspamd_content_type_param *param,
  203. gpointer ud)
  204. {
  205. rspamd_ftok_t srch;
  206. struct rspamd_content_type_param *found = NULL;
  207. struct rspamd_content_type *ct = (struct rspamd_content_type *) ud;
  208. RSPAMD_FTOK_ASSIGN(&srch, "charset");
  209. if (rspamd_ftok_icase_equal(&param->name, &srch)) {
  210. /* Adjust charset */
  211. found = param;
  212. ct->charset.begin = param->value.begin;
  213. ct->charset.len = param->value.len;
  214. }
  215. RSPAMD_FTOK_ASSIGN(&srch, "boundary");
  216. if (rspamd_ftok_icase_equal(&param->name, &srch)) {
  217. found = param;
  218. gchar *lc_boundary;
  219. /* Adjust boundary */
  220. lc_boundary = rspamd_mempool_alloc(pool, param->value.len);
  221. memcpy(lc_boundary, param->value.begin, param->value.len);
  222. rspamd_str_lc(lc_boundary, param->value.len);
  223. ct->boundary.begin = lc_boundary;
  224. ct->boundary.len = param->value.len;
  225. /* Preserve original (case sensitive) boundary */
  226. ct->orig_boundary.begin = param->value.begin;
  227. ct->orig_boundary.len = param->value.len;
  228. }
  229. if (!found) {
  230. RSPAMD_FTOK_ASSIGN(&srch, "name");
  231. if (!rspamd_ftok_icase_equal(&param->name, &srch)) {
  232. /* Just lowercase */
  233. rspamd_str_lc_utf8((gchar *) param->value.begin, param->value.len);
  234. }
  235. }
  236. }
  237. static void
  238. rspamd_content_disposition_postprocess(rspamd_mempool_t *pool,
  239. struct rspamd_content_type_param *param,
  240. gpointer ud)
  241. {
  242. rspamd_ftok_t srch;
  243. struct rspamd_content_disposition *cd = (struct rspamd_content_disposition *) ud;
  244. srch.begin = "filename";
  245. srch.len = 8;
  246. if (rspamd_ftok_icase_equal(&param->name, &srch)) {
  247. /* Adjust filename */
  248. cd->filename.begin = param->value.begin;
  249. cd->filename.len = param->value.len;
  250. }
  251. }
  252. void rspamd_content_type_add_param(rspamd_mempool_t *pool,
  253. struct rspamd_content_type *ct,
  254. gchar *name_start, gchar *name_end,
  255. gchar *value_start, gchar *value_end)
  256. {
  257. struct rspamd_content_type_param *nparam;
  258. rspamd_ftok_t srch;
  259. struct rspamd_content_type_param *found = NULL;
  260. g_assert(ct != NULL);
  261. nparam = rspamd_mempool_alloc0(pool, sizeof(*nparam));
  262. rspamd_str_lc(name_start, name_end - name_start);
  263. if (!rspamd_param_maybe_rfc2231_process(pool, nparam, name_start,
  264. name_end, value_start, value_end)) {
  265. nparam->name.begin = name_start;
  266. nparam->name.len = name_end - name_start;
  267. nparam->value.begin = value_start;
  268. nparam->value.len = value_end - value_start;
  269. }
  270. srch.begin = nparam->name.begin;
  271. srch.len = nparam->name.len;
  272. if (ct->attrs) {
  273. found = g_hash_table_lookup(ct->attrs, &srch);
  274. }
  275. else {
  276. ct->attrs = g_hash_table_new(rspamd_ftok_icase_hash,
  277. rspamd_ftok_icase_equal);
  278. }
  279. if (!found) {
  280. DL_APPEND(found, nparam);
  281. g_hash_table_insert(ct->attrs, &nparam->name, nparam);
  282. }
  283. else {
  284. DL_APPEND(found, nparam);
  285. }
  286. }
  287. static struct rspamd_content_type *
  288. rspamd_content_type_parser(gchar *in, gsize len, rspamd_mempool_t *pool)
  289. {
  290. guint obraces = 0, ebraces = 0, qlen = 0;
  291. gchar *p, *c, *end, *pname_start = NULL, *pname_end = NULL;
  292. struct rspamd_content_type *res = NULL, val;
  293. gboolean eqsign_seen = FALSE;
  294. enum {
  295. parse_type,
  296. parse_subtype,
  297. parse_after_subtype,
  298. parse_param_name,
  299. parse_param_after_name,
  300. parse_param_value,
  301. parse_param_value_after_quote,
  302. parse_space,
  303. parse_quoted,
  304. parse_comment,
  305. } state = parse_space,
  306. next_state = parse_type;
  307. p = in;
  308. c = p;
  309. end = p + len;
  310. memset(&val, 0, sizeof(val));
  311. val.cpy = in;
  312. while (p < end) {
  313. switch (state) {
  314. case parse_type:
  315. if (g_ascii_isspace(*p) || *p == ';') {
  316. /* We have type without subtype */
  317. val.type.begin = c;
  318. val.type.len = p - c;
  319. state = parse_after_subtype;
  320. }
  321. else if (*p == '/') {
  322. val.type.begin = c;
  323. val.type.len = p - c;
  324. state = parse_space;
  325. next_state = parse_subtype;
  326. p++;
  327. }
  328. else {
  329. p++;
  330. }
  331. break;
  332. case parse_subtype:
  333. if (g_ascii_isspace(*p) || *p == ';') {
  334. val.subtype.begin = c;
  335. val.subtype.len = p - c;
  336. state = parse_after_subtype;
  337. }
  338. else {
  339. p++;
  340. }
  341. break;
  342. case parse_after_subtype:
  343. if (*p == ';' || g_ascii_isspace(*p)) {
  344. p++;
  345. }
  346. else if (*p == '(') {
  347. c = p;
  348. state = parse_comment;
  349. next_state = parse_param_name;
  350. obraces = 1;
  351. ebraces = 0;
  352. pname_start = NULL;
  353. pname_end = NULL;
  354. eqsign_seen = FALSE;
  355. p++;
  356. }
  357. else {
  358. c = p;
  359. state = parse_param_name;
  360. pname_start = NULL;
  361. pname_end = NULL;
  362. eqsign_seen = FALSE;
  363. }
  364. break;
  365. case parse_param_name:
  366. if (*p == '=') {
  367. pname_start = c;
  368. pname_end = p;
  369. state = parse_param_after_name;
  370. eqsign_seen = TRUE;
  371. p++;
  372. }
  373. else if (g_ascii_isspace(*p)) {
  374. pname_start = c;
  375. pname_end = p;
  376. state = parse_param_after_name;
  377. }
  378. else {
  379. p++;
  380. }
  381. break;
  382. case parse_param_after_name:
  383. if (g_ascii_isspace(*p)) {
  384. p++;
  385. }
  386. else if (*p == '=') {
  387. if (eqsign_seen) {
  388. /* Treat as value start */
  389. c = p;
  390. eqsign_seen = FALSE;
  391. state = parse_param_value;
  392. p++;
  393. }
  394. else {
  395. eqsign_seen = TRUE;
  396. p++;
  397. }
  398. }
  399. else {
  400. if (eqsign_seen) {
  401. state = parse_param_value;
  402. c = p;
  403. }
  404. else {
  405. /* Invalid parameter without value */
  406. c = p;
  407. state = parse_param_name;
  408. pname_start = NULL;
  409. pname_end = NULL;
  410. }
  411. }
  412. break;
  413. case parse_param_value:
  414. if (*p == '"') {
  415. p++;
  416. c = p;
  417. state = parse_quoted;
  418. next_state = parse_param_value_after_quote;
  419. }
  420. else if (g_ascii_isspace(*p)) {
  421. if (pname_start && pname_end && pname_end > pname_start) {
  422. rspamd_content_type_add_param(pool, &val, pname_start,
  423. pname_end, c, p);
  424. }
  425. state = parse_space;
  426. next_state = parse_param_name;
  427. pname_start = NULL;
  428. pname_end = NULL;
  429. }
  430. else if (*p == '(') {
  431. if (pname_start && pname_end && pname_end > pname_start) {
  432. rspamd_content_type_add_param(pool, &val, pname_start,
  433. pname_end, c, p);
  434. }
  435. obraces = 1;
  436. ebraces = 0;
  437. p++;
  438. state = parse_comment;
  439. next_state = parse_param_name;
  440. pname_start = NULL;
  441. pname_end = NULL;
  442. }
  443. else if (*p == ';') {
  444. if (pname_start && pname_end && pname_end > pname_start) {
  445. rspamd_content_type_add_param(pool, &val, pname_start,
  446. pname_end, c, p);
  447. }
  448. p++;
  449. state = parse_space;
  450. next_state = parse_param_name;
  451. pname_start = NULL;
  452. pname_end = NULL;
  453. }
  454. else {
  455. p++;
  456. }
  457. break;
  458. case parse_param_value_after_quote:
  459. if (pname_start && pname_end && pname_end > pname_start) {
  460. rspamd_content_type_add_param(pool, &val, pname_start,
  461. pname_end, c, c + qlen);
  462. }
  463. if (*p == '"') {
  464. p++;
  465. if (p == end) {
  466. /* Last quote: done... */
  467. state = parse_space;
  468. break;
  469. }
  470. if (*p == ';') {
  471. p++;
  472. state = parse_space;
  473. next_state = parse_param_name;
  474. pname_start = NULL;
  475. pname_end = NULL;
  476. continue;
  477. }
  478. }
  479. /* We should not normally be here in fact */
  480. if (g_ascii_isspace(*p)) {
  481. state = parse_space;
  482. next_state = parse_param_name;
  483. pname_start = NULL;
  484. pname_end = NULL;
  485. }
  486. else if (*p == '(') {
  487. obraces = 1;
  488. ebraces = 0;
  489. p++;
  490. state = parse_comment;
  491. next_state = parse_param_name;
  492. pname_start = NULL;
  493. pname_end = NULL;
  494. }
  495. else {
  496. state = parse_param_name;
  497. pname_start = NULL;
  498. pname_end = NULL;
  499. c = p;
  500. }
  501. break;
  502. case parse_quoted:
  503. if (*p == '\\') {
  504. /* Quoted pair */
  505. if (p + 1 < end) {
  506. p += 2;
  507. }
  508. else {
  509. p++;
  510. }
  511. }
  512. else if (*p == '"') {
  513. qlen = p - c;
  514. state = next_state;
  515. }
  516. else {
  517. p++;
  518. }
  519. break;
  520. case parse_comment:
  521. if (*p == '(') {
  522. obraces++;
  523. p++;
  524. }
  525. else if (*p == ')') {
  526. ebraces++;
  527. p++;
  528. if (ebraces == obraces && p < end) {
  529. if (g_ascii_isspace(*p)) {
  530. state = parse_space;
  531. }
  532. else {
  533. c = p;
  534. state = next_state;
  535. }
  536. }
  537. }
  538. else {
  539. p++;
  540. }
  541. break;
  542. case parse_space:
  543. if (g_ascii_isspace(*p)) {
  544. p++;
  545. }
  546. else if (*p == '(') {
  547. obraces = 1;
  548. ebraces = 0;
  549. p++;
  550. state = parse_comment;
  551. }
  552. else {
  553. c = p;
  554. state = next_state;
  555. }
  556. break;
  557. }
  558. }
  559. /* Process leftover */
  560. switch (state) {
  561. case parse_type:
  562. val.type.begin = c;
  563. val.type.len = p - c;
  564. break;
  565. case parse_subtype:
  566. val.subtype.begin = c;
  567. val.subtype.len = p - c;
  568. break;
  569. case parse_param_value:
  570. if (pname_start && pname_end && pname_end > pname_start) {
  571. if (p > c && *(p - 1) == ';') {
  572. p--;
  573. }
  574. rspamd_content_type_add_param(pool, &val, pname_start,
  575. pname_end, c, p);
  576. }
  577. break;
  578. case parse_param_value_after_quote:
  579. if (pname_start && pname_end && pname_end > pname_start) {
  580. rspamd_content_type_add_param(pool, &val, pname_start,
  581. pname_end, c, c + qlen);
  582. }
  583. break;
  584. default:
  585. break;
  586. }
  587. if (val.type.len > 0) {
  588. gchar *tmp;
  589. res = rspamd_mempool_alloc(pool, sizeof(val));
  590. memcpy(res, &val, sizeof(val));
  591. /*
  592. * Lowercase type and subtype as they are specified as case insensitive
  593. * in rfc2045 section 5.1
  594. */
  595. tmp = rspamd_mempool_alloc(pool, val.type.len);
  596. memcpy(tmp, val.type.begin, val.type.len);
  597. rspamd_str_lc(tmp, val.type.len);
  598. res->type.begin = tmp;
  599. if (val.subtype.len > 0) {
  600. tmp = rspamd_mempool_alloc(pool, val.subtype.len);
  601. memcpy(tmp, val.subtype.begin, val.subtype.len);
  602. rspamd_str_lc(tmp, val.subtype.len);
  603. res->subtype.begin = tmp;
  604. }
  605. }
  606. return res;
  607. }
  608. struct rspamd_content_type *
  609. rspamd_content_type_parse(const gchar *in,
  610. gsize len, rspamd_mempool_t *pool)
  611. {
  612. struct rspamd_content_type *res = NULL;
  613. rspamd_ftok_t srch;
  614. gchar *cpy;
  615. cpy = rspamd_mempool_alloc(pool, len + 1);
  616. rspamd_strlcpy(cpy, in, len + 1);
  617. if ((res = rspamd_content_type_parser(cpy, len, pool)) != NULL) {
  618. if (res->attrs) {
  619. rspamd_mempool_add_destructor(pool,
  620. (rspamd_mempool_destruct_t) g_hash_table_unref, res->attrs);
  621. rspamd_postprocess_ct_attributes(pool, res->attrs,
  622. rspamd_content_type_postprocess, res);
  623. }
  624. /* Now do some hacks to work with broken content types */
  625. if (res->subtype.len == 0) {
  626. res->flags |= RSPAMD_CONTENT_TYPE_BROKEN;
  627. RSPAMD_FTOK_ASSIGN(&srch, "text");
  628. if (rspamd_ftok_casecmp(&res->type, &srch) == 0) {
  629. /* Workaround for Content-Type: text */
  630. /* Assume text/plain */
  631. RSPAMD_FTOK_ASSIGN(&srch, "plain");
  632. }
  633. else {
  634. RSPAMD_FTOK_ASSIGN(&srch, "html");
  635. if (rspamd_ftok_casecmp(&res->type, &srch) == 0) {
  636. /* Workaround for Content-Type: html */
  637. RSPAMD_FTOK_ASSIGN(&res->type, "text");
  638. RSPAMD_FTOK_ASSIGN(&res->subtype, "html");
  639. }
  640. else {
  641. RSPAMD_FTOK_ASSIGN(&srch, "application");
  642. if (rspamd_ftok_casecmp(&res->type, &srch) == 0) {
  643. RSPAMD_FTOK_ASSIGN(&res->subtype, "octet-stream");
  644. }
  645. }
  646. }
  647. }
  648. else {
  649. /* Common mistake done by retards */
  650. RSPAMD_FTOK_ASSIGN(&srch, "alternate");
  651. if (rspamd_ftok_casecmp(&res->subtype, &srch) == 0) {
  652. res->flags |= RSPAMD_CONTENT_TYPE_BROKEN;
  653. RSPAMD_FTOK_ASSIGN(&res->subtype, "alternative");
  654. }
  655. /* PKCS7 smime */
  656. RSPAMD_FTOK_ASSIGN(&srch, "pkcs7-mime");
  657. if (rspamd_substring_search(res->subtype.begin, res->subtype.len,
  658. srch.begin, srch.len) != -1) {
  659. res->flags |= RSPAMD_CONTENT_TYPE_SMIME;
  660. }
  661. }
  662. RSPAMD_FTOK_ASSIGN(&srch, "multipart");
  663. if (rspamd_ftok_casecmp(&res->type, &srch) == 0) {
  664. res->flags |= RSPAMD_CONTENT_TYPE_MULTIPART;
  665. RSPAMD_FTOK_ASSIGN(&srch, "encrypted");
  666. if (rspamd_ftok_casecmp(&res->subtype, &srch) == 0) {
  667. res->flags |= RSPAMD_CONTENT_TYPE_ENCRYPTED;
  668. }
  669. }
  670. else {
  671. RSPAMD_FTOK_ASSIGN(&srch, "text");
  672. if (rspamd_ftok_casecmp(&res->type, &srch) == 0) {
  673. res->flags |= RSPAMD_CONTENT_TYPE_TEXT;
  674. }
  675. else {
  676. RSPAMD_FTOK_ASSIGN(&srch, "message");
  677. if (rspamd_ftok_casecmp(&res->type, &srch) == 0) {
  678. RSPAMD_FTOK_ASSIGN(&srch, "delivery-status");
  679. if (rspamd_ftok_casecmp(&res->subtype, &srch) == 0) {
  680. res->flags |= RSPAMD_CONTENT_TYPE_TEXT | RSPAMD_CONTENT_TYPE_DSN;
  681. }
  682. else {
  683. RSPAMD_FTOK_ASSIGN(&srch, "notification");
  684. if (rspamd_substring_search_caseless(res->subtype.begin,
  685. res->subtype.len, srch.begin, srch.len) != -1) {
  686. res->flags |= RSPAMD_CONTENT_TYPE_TEXT |
  687. RSPAMD_CONTENT_TYPE_DSN;
  688. }
  689. else {
  690. res->flags |= RSPAMD_CONTENT_TYPE_MESSAGE;
  691. }
  692. }
  693. }
  694. }
  695. }
  696. }
  697. else {
  698. msg_warn_pool("cannot parse content type: %*s", (gint) len, cpy);
  699. }
  700. return res;
  701. }
  702. void rspamd_content_disposition_add_param(rspamd_mempool_t *pool,
  703. struct rspamd_content_disposition *cd,
  704. const gchar *name_start, const gchar *name_end,
  705. const gchar *value_start, const gchar *value_end)
  706. {
  707. rspamd_ftok_t srch;
  708. gchar *name_cpy, *value_cpy, *name_cpy_end, *value_cpy_end;
  709. struct rspamd_content_type_param *found = NULL, *nparam;
  710. g_assert(cd != NULL);
  711. name_cpy = rspamd_mempool_alloc(pool, name_end - name_start);
  712. memcpy(name_cpy, name_start, name_end - name_start);
  713. name_cpy_end = name_cpy + (name_end - name_start);
  714. value_cpy = rspamd_mempool_alloc(pool, value_end - value_start);
  715. memcpy(value_cpy, value_start, value_end - value_start);
  716. value_cpy_end = value_cpy + (value_end - value_start);
  717. nparam = rspamd_mempool_alloc0(pool, sizeof(*nparam));
  718. rspamd_str_lc(name_cpy, name_cpy_end - name_cpy);
  719. if (!rspamd_param_maybe_rfc2231_process(pool, nparam, name_cpy,
  720. name_cpy_end, value_cpy, value_cpy_end)) {
  721. nparam->name.begin = name_cpy;
  722. nparam->name.len = name_cpy_end - name_cpy;
  723. nparam->value.begin = value_cpy;
  724. nparam->value.len = value_cpy_end - value_cpy;
  725. }
  726. srch.begin = nparam->name.begin;
  727. srch.len = nparam->name.len;
  728. if (cd->attrs) {
  729. found = g_hash_table_lookup(cd->attrs, &srch);
  730. }
  731. else {
  732. cd->attrs = g_hash_table_new(rspamd_ftok_icase_hash,
  733. rspamd_ftok_icase_equal);
  734. }
  735. if (!found) {
  736. DL_APPEND(found, nparam);
  737. g_hash_table_insert(cd->attrs, &nparam->name, nparam);
  738. }
  739. else {
  740. DL_APPEND(found, nparam);
  741. }
  742. }
  743. struct rspamd_content_disposition *
  744. rspamd_content_disposition_parse(const gchar *in,
  745. gsize len, rspamd_mempool_t *pool)
  746. {
  747. struct rspamd_content_disposition *res = NULL, val;
  748. if (rspamd_content_disposition_parser(in, len, &val, pool)) {
  749. if (val.type == RSPAMD_CT_UNKNOWN) {
  750. /* 'Fix' type to attachment as MUA does */
  751. val.type = RSPAMD_CT_ATTACHMENT;
  752. }
  753. res = rspamd_mempool_alloc(pool, sizeof(val));
  754. memcpy(res, &val, sizeof(val));
  755. res->lc_data = rspamd_mempool_alloc(pool, len + 1);
  756. rspamd_strlcpy(res->lc_data, in, len + 1);
  757. rspamd_str_lc(res->lc_data, len);
  758. if (res->attrs) {
  759. rspamd_postprocess_ct_attributes(pool, res->attrs,
  760. rspamd_content_disposition_postprocess, res);
  761. rspamd_mempool_add_destructor(pool,
  762. (rspamd_mempool_destruct_t) g_hash_table_unref, res->attrs);
  763. }
  764. }
  765. else {
  766. msg_warn_pool("cannot parse content disposition: %*s",
  767. (gint) len, in);
  768. }
  769. return res;
  770. }