You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

mime_headers.c 33KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441
  1. /*
  2. * Copyright 2024 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "mime_headers.h"
  17. #include "smtp_parsers.h"
  18. #include "mime_encoding.h"
  19. #include "received.h"
  20. #include "contrib/uthash/utlist.h"
  21. #include "libserver/mempool_vars_internal.h"
  22. #include "libserver/cfg_file.h"
  23. #include "libutil/util.h"
  24. #include <unicode/utf8.h>
  25. KHASH_INIT(rspamd_mime_headers_htb, gchar *,
  26. struct rspamd_mime_header *, 1,
  27. rspamd_strcase_hash, rspamd_strcase_equal);
  28. struct rspamd_mime_headers_table {
  29. khash_t(rspamd_mime_headers_htb) htb;
  30. ref_entry_t ref;
  31. };
  32. static void
  33. rspamd_mime_header_check_special(struct rspamd_task *task,
  34. struct rspamd_mime_header *rh)
  35. {
  36. uint64_t h;
  37. const gchar *p, *end;
  38. gchar *id;
  39. gint max_recipients = -1, len;
  40. if (task->cfg) {
  41. max_recipients = task->cfg->max_recipients;
  42. }
  43. h = rspamd_icase_hash(rh->name, strlen(rh->name), 0xdeadbabe);
  44. switch (h) {
  45. case 0x88705DC4D9D61ABULL: /* received */
  46. if (rspamd_received_header_parse(task, rh->decoded, strlen(rh->decoded), rh)) {
  47. rh->flags |= RSPAMD_HEADER_RECEIVED;
  48. }
  49. break;
  50. case 0x76F31A09F4352521ULL: /* to */
  51. MESSAGE_FIELD(task, rcpt_mime) = rspamd_email_address_from_mime(task->task_pool,
  52. rh->value, strlen(rh->value),
  53. MESSAGE_FIELD(task, rcpt_mime), max_recipients);
  54. rh->flags |= RSPAMD_HEADER_TO | RSPAMD_HEADER_RCPT | RSPAMD_HEADER_UNIQUE;
  55. break;
  56. case 0x7EB117C1480B76ULL: /* cc */
  57. MESSAGE_FIELD(task, rcpt_mime) = rspamd_email_address_from_mime(task->task_pool,
  58. rh->value, strlen(rh->value),
  59. MESSAGE_FIELD(task, rcpt_mime), max_recipients);
  60. rh->flags |= RSPAMD_HEADER_CC | RSPAMD_HEADER_RCPT | RSPAMD_HEADER_UNIQUE;
  61. break;
  62. case 0xE4923E11C4989C8DULL: /* bcc */
  63. MESSAGE_FIELD(task, rcpt_mime) = rspamd_email_address_from_mime(task->task_pool,
  64. rh->value, strlen(rh->value),
  65. MESSAGE_FIELD(task, rcpt_mime), max_recipients);
  66. rh->flags |= RSPAMD_HEADER_BCC | RSPAMD_HEADER_RCPT | RSPAMD_HEADER_UNIQUE;
  67. break;
  68. case 0x41E1985EDC1CBDE4ULL: /* from */
  69. MESSAGE_FIELD(task, from_mime) = rspamd_email_address_from_mime(task->task_pool,
  70. rh->value, strlen(rh->value),
  71. MESSAGE_FIELD(task, from_mime), max_recipients);
  72. rh->flags |= RSPAMD_HEADER_FROM | RSPAMD_HEADER_SENDER | RSPAMD_HEADER_UNIQUE;
  73. break;
  74. case 0x43A558FC7C240226ULL: /* message-id */ {
  75. rh->flags = RSPAMD_HEADER_MESSAGE_ID | RSPAMD_HEADER_UNIQUE;
  76. p = rh->decoded;
  77. len = rspamd_strip_smtp_comments_inplace(rh->decoded, strlen(p));
  78. rh->decoded[len] = '\0'; /* Zero terminate after stripping */
  79. /* Strip surrounding spaces */
  80. rh->decoded = g_strstrip(rh->decoded);
  81. end = p + len;
  82. if (*p == '<') {
  83. p++;
  84. }
  85. if (end > p) {
  86. gchar *d;
  87. if (*(end - 1) == '>') {
  88. end--;
  89. }
  90. id = rspamd_mempool_alloc(task->task_pool, end - p + 1);
  91. d = id;
  92. while (p < end) {
  93. if (g_ascii_isgraph(*p)) {
  94. *d++ = *p++;
  95. }
  96. else {
  97. *d++ = '?';
  98. p++;
  99. }
  100. }
  101. *d = '\0';
  102. MESSAGE_FIELD(task, message_id) = id;
  103. }
  104. break;
  105. }
  106. case 0xB91D3910358E8212ULL: /* subject */
  107. if (MESSAGE_FIELD(task, subject) == NULL) {
  108. MESSAGE_FIELD(task, subject) = rh->decoded;
  109. }
  110. rh->flags = RSPAMD_HEADER_SUBJECT | RSPAMD_HEADER_UNIQUE;
  111. break;
  112. case 0xEE4AA2EAAC61D6F4ULL: /* return-path */
  113. if (task->from_envelope == NULL) {
  114. task->from_envelope = rspamd_email_address_from_smtp(rh->decoded,
  115. strlen(rh->decoded));
  116. }
  117. rh->flags = RSPAMD_HEADER_RETURN_PATH | RSPAMD_HEADER_UNIQUE;
  118. break;
  119. case 0xB9EEFAD2E93C2161ULL: /* delivered-to */
  120. if (task->deliver_to == NULL) {
  121. task->deliver_to = rh->decoded;
  122. }
  123. rh->flags = RSPAMD_HEADER_DELIVERED_TO;
  124. break;
  125. case 0x2EC3BFF3C393FC10ULL: /* date */
  126. case 0xAC0DDB1A1D214CAULL: /* sender */
  127. case 0x54094572367AB695ULL: /* in-reply-to */
  128. case 0x81CD9E9131AB6A9AULL: /* content-type */
  129. case 0xC39BD9A75AA25B60ULL: /* content-transfer-encoding */
  130. case 0xB3F6704CB3AD6589ULL: /* references */
  131. rh->flags = RSPAMD_HEADER_UNIQUE;
  132. break;
  133. }
  134. }
  135. static void
  136. rspamd_mime_header_add(struct rspamd_task *task,
  137. khash_t(rspamd_mime_headers_htb) * target,
  138. struct rspamd_mime_header **order_ptr,
  139. struct rspamd_mime_header *rh,
  140. gboolean check_special)
  141. {
  142. khiter_t k;
  143. struct rspamd_mime_header *ex;
  144. int res;
  145. k = kh_put(rspamd_mime_headers_htb, target, rh->name, &res);
  146. if (res == 0) {
  147. ex = kh_value(target, k);
  148. DL_APPEND(ex, rh);
  149. msg_debug_task("append raw header %s: %s", rh->name, rh->value);
  150. }
  151. else {
  152. kh_value(target, k) = rh;
  153. rh->prev = rh;
  154. rh->next = NULL;
  155. msg_debug_task("add new raw header %s: %s", rh->name, rh->value);
  156. }
  157. LL_PREPEND2(*order_ptr, rh, ord_next);
  158. if (check_special) {
  159. rspamd_mime_header_check_special(task, rh);
  160. }
  161. }
  162. /* Convert raw headers to a list of struct raw_header * */
  163. void rspamd_mime_headers_process(struct rspamd_task *task,
  164. struct rspamd_mime_headers_table *target,
  165. struct rspamd_mime_header **order_ptr,
  166. const gchar *in, gsize len,
  167. gboolean check_newlines)
  168. {
  169. struct rspamd_mime_header *nh = NULL;
  170. const gchar *p, *c, *end;
  171. gchar *tmp, *tp;
  172. gint state = 0, l, next_state = 100, err_state = 100, t_state;
  173. gboolean valid_folding = FALSE, shift_by_one = FALSE;
  174. guint nlines_count[RSPAMD_TASK_NEWLINES_MAX];
  175. guint norder = 0;
  176. p = in;
  177. end = p + len;
  178. c = p;
  179. memset(nlines_count, 0, sizeof(nlines_count));
  180. msg_debug_task("start processing headers");
  181. while (p < end) {
  182. /* FSM for processing headers */
  183. switch (state) {
  184. case 0:
  185. /* Begin processing headers */
  186. if (!g_ascii_isalpha(*p)) {
  187. /* We have some garbage at the beginning of headers, skip this line */
  188. state = 100;
  189. next_state = 0;
  190. }
  191. else {
  192. state = 1;
  193. c = p;
  194. }
  195. break;
  196. case 1:
  197. /* We got something like header's name */
  198. if (*p == ':') {
  199. nh = rspamd_mempool_alloc0(task->task_pool,
  200. sizeof(struct rspamd_mime_header));
  201. l = p - c;
  202. tmp = rspamd_mempool_alloc(task->task_pool, l + 1);
  203. rspamd_null_safe_copy(c, l, tmp, l + 1);
  204. nh->name = tmp;
  205. nh->flags |= RSPAMD_HEADER_EMPTY_SEPARATOR;
  206. nh->raw_value = c;
  207. nh->raw_len = p - c; /* Including trailing ':' */
  208. p++;
  209. state = 2;
  210. c = p;
  211. }
  212. else if (g_ascii_isspace(*p)) {
  213. /* Not header but some garbage */
  214. if (target == MESSAGE_FIELD(task, raw_headers)) {
  215. /* Do not propagate flag from the attachments */
  216. task->flags |= RSPAMD_TASK_FLAG_BROKEN_HEADERS;
  217. }
  218. state = 100;
  219. next_state = 0;
  220. }
  221. else {
  222. p++;
  223. }
  224. break;
  225. case 2:
  226. /* We got header's name, so skip any \t or spaces */
  227. if (*p == '\t') {
  228. nh->flags &= ~RSPAMD_HEADER_EMPTY_SEPARATOR;
  229. nh->flags |= RSPAMD_HEADER_TAB_SEPARATED;
  230. p++;
  231. }
  232. else if (*p == ' ') {
  233. nh->flags &= ~RSPAMD_HEADER_EMPTY_SEPARATOR;
  234. p++;
  235. }
  236. else if (*p == '\n' || *p == '\r') {
  237. if (check_newlines) {
  238. if (*p == '\n') {
  239. nlines_count[RSPAMD_TASK_NEWLINES_LF]++;
  240. }
  241. else if (p + 1 < end && *(p + 1) == '\n') {
  242. nlines_count[RSPAMD_TASK_NEWLINES_CRLF]++;
  243. }
  244. else {
  245. nlines_count[RSPAMD_TASK_NEWLINES_CR]++;
  246. }
  247. }
  248. /* Process folding */
  249. state = 99;
  250. l = p - c;
  251. if (l > 0) {
  252. tmp = rspamd_mempool_alloc(task->task_pool, l + 1);
  253. rspamd_null_safe_copy(c, l, tmp, l + 1);
  254. nh->separator = tmp;
  255. }
  256. next_state = 3;
  257. err_state = 5;
  258. c = p;
  259. }
  260. else {
  261. /* Process value */
  262. l = p - c;
  263. if (l >= 0) {
  264. tmp = rspamd_mempool_alloc(task->task_pool, l + 1);
  265. rspamd_null_safe_copy(c, l, tmp, l + 1);
  266. nh->separator = tmp;
  267. }
  268. c = p;
  269. state = 3;
  270. }
  271. break;
  272. case 3:
  273. if (*p == '\r' || *p == '\n') {
  274. /* Hold folding */
  275. if (check_newlines) {
  276. if (*p == '\n') {
  277. nlines_count[RSPAMD_TASK_NEWLINES_LF]++;
  278. }
  279. else if (p + 1 < end && *(p + 1) == '\n') {
  280. nlines_count[RSPAMD_TASK_NEWLINES_CRLF]++;
  281. }
  282. else {
  283. nlines_count[RSPAMD_TASK_NEWLINES_CR]++;
  284. }
  285. }
  286. state = 99;
  287. next_state = 3;
  288. err_state = 4;
  289. }
  290. else if (p + 1 == end) {
  291. state = 4;
  292. }
  293. else {
  294. p++;
  295. }
  296. break;
  297. case 4:
  298. /* Copy header's value */
  299. /*
  300. * XXX:
  301. * The original decision to use here null terminated
  302. * strings was extremely poor!
  303. */
  304. l = p - c;
  305. tmp = rspamd_mempool_alloc(task->task_pool, l + 1);
  306. tp = tmp;
  307. t_state = 0;
  308. while (l--) {
  309. if (t_state == 0) {
  310. /* Before folding */
  311. if (*c == '\n' || *c == '\r') {
  312. t_state = 1;
  313. c++;
  314. *tp++ = ' ';
  315. }
  316. else {
  317. if (*c != '\0') {
  318. *tp++ = *c++;
  319. }
  320. else {
  321. c++;
  322. }
  323. }
  324. }
  325. else if (t_state == 1) {
  326. /* Inside folding */
  327. if (g_ascii_isspace(*c)) {
  328. c++;
  329. }
  330. else {
  331. t_state = 0;
  332. if (*c != '\0') {
  333. *tp++ = *c++;
  334. }
  335. else {
  336. c++;
  337. }
  338. }
  339. }
  340. }
  341. /* Strip last space that can be added by \r\n parsing */
  342. if (tp > tmp && *(tp - 1) == ' ') {
  343. tp--;
  344. }
  345. *tp = '\0';
  346. /* Strip the initial spaces that could also be added by folding */
  347. while (*tmp != '\0' && g_ascii_isspace(*tmp)) {
  348. tmp++;
  349. }
  350. if (p + 1 == end) {
  351. nh->raw_len = end - nh->raw_value;
  352. }
  353. else {
  354. nh->raw_len = p - nh->raw_value;
  355. }
  356. nh->value = tmp;
  357. gboolean broken_utf = FALSE;
  358. nh->decoded = rspamd_mime_header_decode(task->task_pool,
  359. nh->value, strlen(tmp), &broken_utf);
  360. if (broken_utf) {
  361. task->flags |= RSPAMD_TASK_FLAG_BAD_UNICODE;
  362. }
  363. if (nh->decoded == NULL) {
  364. /* As we strip comments in place... */
  365. nh->decoded = rspamd_mempool_strdup(task->task_pool, "");
  366. }
  367. /* We also validate utf8 and replace all non-valid utf8 chars */
  368. rspamd_mime_charset_utf_enforce(nh->decoded, strlen(nh->decoded));
  369. nh->order = norder++;
  370. rspamd_mime_header_add(task, &target->htb, order_ptr, nh, check_newlines);
  371. nh = NULL;
  372. state = 0;
  373. break;
  374. case 5:
  375. /* Header has only name, no value */
  376. nh->value = rspamd_mempool_strdup(task->task_pool, "");
  377. nh->decoded = rspamd_mempool_strdup(task->task_pool, "");
  378. nh->raw_len = p - nh->raw_value;
  379. if (shift_by_one) {
  380. nh->raw_len++;
  381. }
  382. nh->order = norder++;
  383. rspamd_mime_header_add(task, &target->htb, order_ptr, nh, check_newlines);
  384. nh = NULL;
  385. state = 0;
  386. break;
  387. case 99:
  388. /* Folding state */
  389. if (p + 1 == end) {
  390. state = err_state;
  391. /* Include the last character into the next header */
  392. shift_by_one = TRUE;
  393. }
  394. else {
  395. if (*p == '\r' || *p == '\n') {
  396. p++;
  397. valid_folding = FALSE;
  398. }
  399. else if (*p == '\t' || *p == ' ') {
  400. /* Valid folding */
  401. p++;
  402. valid_folding = TRUE;
  403. }
  404. else {
  405. if (valid_folding) {
  406. debug_task("go to state: %d->%d", state, next_state);
  407. state = next_state;
  408. }
  409. else {
  410. /* Fall back */
  411. debug_task("go to state: %d->%d", state, err_state);
  412. state = err_state;
  413. }
  414. }
  415. }
  416. break;
  417. case 100:
  418. /* Fail state, skip line */
  419. if (*p == '\r') {
  420. if (p + 1 < end && *(p + 1) == '\n') {
  421. nlines_count[RSPAMD_TASK_NEWLINES_CRLF]++;
  422. p++;
  423. }
  424. p++;
  425. state = next_state;
  426. }
  427. else if (*p == '\n') {
  428. nlines_count[RSPAMD_TASK_NEWLINES_LF]++;
  429. if (p + 1 < end && *(p + 1) == '\r') {
  430. p++;
  431. }
  432. p++;
  433. state = next_state;
  434. }
  435. else if (p + 1 == end) {
  436. state = next_state;
  437. p++;
  438. }
  439. else {
  440. p++;
  441. }
  442. break;
  443. }
  444. }
  445. /* Since we have prepended headers, we need to reverse the list to get the actual order */
  446. LL_REVERSE(*order_ptr);
  447. if (check_newlines) {
  448. guint max_cnt = 0;
  449. gint sel = 0;
  450. rspamd_cryptobox_hash_state_t hs;
  451. guchar hout[rspamd_cryptobox_HASHBYTES], *hexout;
  452. for (gint i = RSPAMD_TASK_NEWLINES_CR; i < RSPAMD_TASK_NEWLINES_MAX; i++) {
  453. if (nlines_count[i] > max_cnt) {
  454. max_cnt = nlines_count[i];
  455. sel = i;
  456. }
  457. }
  458. MESSAGE_FIELD(task, nlines_type) = sel;
  459. rspamd_cryptobox_hash_init(&hs, NULL, 0);
  460. LL_FOREACH(*order_ptr, nh)
  461. {
  462. if (nh->name && nh->flags != RSPAMD_HEADER_RECEIVED) {
  463. rspamd_cryptobox_hash_update(&hs, nh->name, strlen(nh->name));
  464. }
  465. }
  466. rspamd_cryptobox_hash_final(&hs, hout);
  467. hexout = rspamd_mempool_alloc(task->task_pool, sizeof(hout) * 2 + 1);
  468. hexout[sizeof(hout) * 2] = '\0';
  469. rspamd_encode_hex_buf(hout, sizeof(hout), hexout,
  470. sizeof(hout) * 2 + 1);
  471. rspamd_mempool_set_variable(task->task_pool,
  472. RSPAMD_MEMPOOL_HEADERS_HASH,
  473. hexout, NULL);
  474. }
  475. }
  476. static void
  477. rspamd_mime_header_maybe_save_token(rspamd_mempool_t *pool,
  478. GString *out,
  479. GByteArray *token,
  480. GByteArray *decoded_token,
  481. rspamd_ftok_t *old_charset,
  482. rspamd_ftok_t *new_charset)
  483. {
  484. if (new_charset->len == 0) {
  485. g_assert_not_reached();
  486. }
  487. if (old_charset->len > 0) {
  488. if (rspamd_ftok_casecmp(new_charset, old_charset) == 0) {
  489. rspamd_ftok_t srch;
  490. /*
  491. * Special case for iso-2022-jp:
  492. * https://github.com/vstakhov/rspamd/issues/1669
  493. */
  494. RSPAMD_FTOK_ASSIGN(&srch, "iso-2022-jp");
  495. if (rspamd_ftok_casecmp(new_charset, &srch) != 0) {
  496. /* We can concatenate buffers, just return */
  497. return;
  498. }
  499. }
  500. }
  501. /* We need to flush and decode old token to out string */
  502. if (rspamd_mime_to_utf8_byte_array(token, decoded_token, pool,
  503. rspamd_mime_detect_charset(new_charset, pool))) {
  504. g_string_append_len(out, decoded_token->data, decoded_token->len);
  505. }
  506. /* We also reset buffer */
  507. g_byte_array_set_size(token, 0);
  508. /*
  509. * Propagate charset
  510. *
  511. * Here are dragons: we save the original charset to allow buffers concat
  512. * in the condition at the beginning of the function.
  513. * However, it will likely cause unnecessary calls for
  514. * `rspamd_mime_detect_charset` which could be relatively expensive.
  515. * But we ignore that for now...
  516. */
  517. memcpy(old_charset, new_charset, sizeof(*old_charset));
  518. }
  519. static void
  520. rspamd_mime_header_sanity_check(GString *str)
  521. {
  522. gsize i;
  523. gchar t;
  524. for (i = 0; i < str->len; i++) {
  525. t = str->str[i];
  526. if (!((t & 0x80) || g_ascii_isgraph(t))) {
  527. if (g_ascii_isspace(t)) {
  528. /* Replace spaces characters with plain space */
  529. str->str[i] = ' ';
  530. }
  531. else {
  532. str->str[i] = '?';
  533. }
  534. }
  535. }
  536. }
  537. gchar *
  538. rspamd_mime_header_decode(rspamd_mempool_t *pool, const gchar *in,
  539. gsize inlen, gboolean *invalid_utf)
  540. {
  541. GString *out;
  542. const guchar *c, *p, *end;
  543. const gchar *tok_start = NULL;
  544. gsize tok_len = 0, pos;
  545. GByteArray *token = NULL, *decoded;
  546. rspamd_ftok_t cur_charset = {0, NULL}, old_charset = {0, NULL};
  547. gint encoding;
  548. gssize r;
  549. guint qmarks = 0;
  550. gchar *ret;
  551. enum {
  552. parse_normal = 0,
  553. got_eqsign,
  554. got_encoded_start,
  555. got_more_qmark,
  556. skip_spaces,
  557. } state = parse_normal;
  558. g_assert(in != NULL);
  559. c = in;
  560. p = in;
  561. end = in + inlen;
  562. out = g_string_sized_new(inlen);
  563. token = g_byte_array_sized_new(80);
  564. decoded = g_byte_array_sized_new(122);
  565. while (p < end) {
  566. switch (state) {
  567. case parse_normal:
  568. if (*p == '=') {
  569. g_string_append_len(out, c, p - c);
  570. c = p;
  571. state = got_eqsign;
  572. }
  573. else if (*p >= 128) {
  574. gint off = 0;
  575. UChar32 uc;
  576. /* Unencoded character */
  577. g_string_append_len(out, c, p - c);
  578. /* Check if that's valid UTF8 */
  579. U8_NEXT(p, off, end - p, uc);
  580. if (uc <= 0) {
  581. c = p + 1;
  582. /* 0xFFFD in UTF8 */
  583. g_string_append_len(out, " ", 3);
  584. off = 0;
  585. U8_APPEND_UNSAFE(out->str + out->len - 3,
  586. off, 0xfffd);
  587. if (invalid_utf) {
  588. *invalid_utf = TRUE;
  589. }
  590. }
  591. else {
  592. c = p;
  593. p = p + off;
  594. continue; /* To avoid p ++ after this block */
  595. }
  596. }
  597. p++;
  598. break;
  599. case got_eqsign:
  600. if (*p == '?') {
  601. state = got_encoded_start;
  602. qmarks = 0;
  603. }
  604. else {
  605. g_string_append_len(out, c, 1);
  606. c = p;
  607. state = parse_normal;
  608. continue; /* Deal with == case */
  609. }
  610. p++;
  611. break;
  612. case got_encoded_start:
  613. if (*p == '?') {
  614. state = got_more_qmark;
  615. qmarks++;
  616. /* Skip multiple ? signs */
  617. p++;
  618. while (p < end && *p == '?') {
  619. p++;
  620. }
  621. continue;
  622. }
  623. p++;
  624. break;
  625. case got_more_qmark:
  626. if (*p == '=') {
  627. if (qmarks < 3) {
  628. state = got_encoded_start;
  629. }
  630. else {
  631. /* Finished encoded boundary */
  632. if (*c == '"') {
  633. /* Quoted string, non-RFC conformant but used by retards */
  634. c++;
  635. }
  636. if (rspamd_rfc2047_parser(c, p - c + 1, &encoding,
  637. &cur_charset.begin, &cur_charset.len,
  638. &tok_start, &tok_len)) {
  639. /* We have a token, so we can decode it from `encoding` */
  640. if (token->len > 0) {
  641. if (old_charset.len == 0) {
  642. memcpy(&old_charset, &cur_charset,
  643. sizeof(old_charset));
  644. }
  645. rspamd_mime_header_maybe_save_token(pool, out,
  646. token, decoded,
  647. &old_charset, &cur_charset);
  648. }
  649. qmarks = 0;
  650. pos = token->len;
  651. g_byte_array_set_size(token, pos + tok_len);
  652. if (encoding == RSPAMD_RFC2047_QP) {
  653. r = rspamd_decode_qp2047_buf(tok_start, tok_len,
  654. token->data + pos, tok_len);
  655. if (r != -1) {
  656. token->len = pos + r;
  657. }
  658. else {
  659. /* Cannot decode qp */
  660. token->len -= tok_len;
  661. }
  662. }
  663. else {
  664. if (rspamd_cryptobox_base64_decode(tok_start, tok_len,
  665. token->data + pos, &tok_len)) {
  666. token->len = pos + tok_len;
  667. }
  668. else {
  669. /* Cannot decode */
  670. token->len -= tok_len;
  671. }
  672. }
  673. c = p + 1;
  674. state = skip_spaces;
  675. }
  676. else {
  677. /* Not encoded-word */
  678. old_charset.len = 0;
  679. if (token->len > 0) {
  680. rspamd_mime_header_maybe_save_token(pool, out,
  681. token, decoded,
  682. &old_charset, &cur_charset);
  683. }
  684. g_string_append_len(out, c, p - c);
  685. c = p;
  686. state = parse_normal;
  687. }
  688. } /* qmarks >= 3 */
  689. } /* p == '=' */
  690. else {
  691. state = got_encoded_start;
  692. }
  693. p++;
  694. break;
  695. case skip_spaces:
  696. if (g_ascii_isspace(*p)) {
  697. p++;
  698. }
  699. else if (*p == '=' && p < end - 1 && p[1] == '?') {
  700. /* Next boundary, can glue */
  701. c = p;
  702. p += 2;
  703. state = got_encoded_start;
  704. }
  705. else {
  706. /* Need to save spaces and decoded token */
  707. if (token->len > 0) {
  708. old_charset.len = 0;
  709. rspamd_mime_header_maybe_save_token(pool, out,
  710. token, decoded,
  711. &old_charset, &cur_charset);
  712. }
  713. g_string_append_len(out, c, p - c);
  714. c = p;
  715. state = parse_normal;
  716. }
  717. break;
  718. }
  719. }
  720. /* Leftover */
  721. switch (state) {
  722. case skip_spaces:
  723. if (token->len > 0 && cur_charset.len > 0) {
  724. old_charset.len = 0;
  725. rspamd_mime_header_maybe_save_token(pool, out,
  726. token, decoded,
  727. &old_charset, &cur_charset);
  728. }
  729. break;
  730. default:
  731. /* Just copy leftover */
  732. if (p > c) {
  733. g_string_append_len(out, c, p - c);
  734. }
  735. break;
  736. }
  737. g_byte_array_free(token, TRUE);
  738. g_byte_array_free(decoded, TRUE);
  739. rspamd_mime_header_sanity_check(out);
  740. rspamd_mempool_notify_alloc(pool, out->len);
  741. ret = g_string_free(out, FALSE);
  742. rspamd_mempool_add_destructor(pool, g_free, ret);
  743. return ret;
  744. }
  745. gchar *
  746. rspamd_mime_header_encode(const gchar *in, gsize len)
  747. {
  748. const gchar *p = in, *end = in + len;
  749. gchar *out, encode_buf[80 * sizeof(uint32_t)];
  750. GString *res;
  751. gboolean need_encoding = FALSE;
  752. /* Check if we need to encode */
  753. while (p < end) {
  754. if ((((guchar) *p) & 0x80) != 0) {
  755. need_encoding = TRUE;
  756. break;
  757. }
  758. p++;
  759. }
  760. if (!need_encoding) {
  761. out = g_malloc(len + 1);
  762. rspamd_strlcpy(out, in, len + 1);
  763. }
  764. else {
  765. /* Need encode */
  766. gsize ulen, pos;
  767. gint r;
  768. const gchar *prev;
  769. /* Choose step: =?UTF-8?Q?<qp>?= should be less than 76 chars */
  770. guint step = (76 - 12) / 3 + 1;
  771. ulen = g_utf8_strlen(in, len);
  772. res = g_string_sized_new(len * 2 + 1);
  773. pos = 0;
  774. prev = in;
  775. /* Adjust chunk size for unicode average length */
  776. step *= 1.0 * ulen / (gdouble) len;
  777. while (pos < ulen) {
  778. p = g_utf8_offset_to_pointer(in, pos);
  779. if (p > prev) {
  780. /* Encode and print */
  781. r = rspamd_encode_qp2047_buf(prev, p - prev,
  782. encode_buf, sizeof(encode_buf));
  783. if (r != -1) {
  784. if (res->len > 0) {
  785. rspamd_printf_gstring(res, " =?UTF-8?Q?%*s?=", r,
  786. encode_buf);
  787. }
  788. else {
  789. rspamd_printf_gstring(res, "=?UTF-8?Q?%*s?=", r,
  790. encode_buf);
  791. }
  792. }
  793. }
  794. pos += MIN(step, ulen - pos);
  795. prev = p;
  796. }
  797. /* Leftover */
  798. if (prev < end) {
  799. r = rspamd_encode_qp2047_buf(prev, end - prev,
  800. encode_buf, sizeof(encode_buf));
  801. if (r != -1) {
  802. if (res->len > 0) {
  803. rspamd_printf_gstring(res, " =?UTF-8?Q?%*s?=", r,
  804. encode_buf);
  805. }
  806. else {
  807. rspamd_printf_gstring(res, "=?UTF-8?Q?%*s?=", r,
  808. encode_buf);
  809. }
  810. }
  811. }
  812. out = g_string_free(res, FALSE);
  813. }
  814. return out;
  815. }
  816. gchar *
  817. rspamd_mime_message_id_generate(const gchar *fqdn)
  818. {
  819. GString *out;
  820. uint64_t rnd, clk;
  821. out = g_string_sized_new(strlen(fqdn) + 22);
  822. rnd = ottery_rand_uint64();
  823. clk = rspamd_get_calendar_ticks() * 1e6;
  824. rspamd_printf_gstring(out, "%*bs.%*bs@%s",
  825. (gint) sizeof(uint64_t) - 3, (guchar *) &clk,
  826. (gint) sizeof(uint64_t), (gchar *) &rnd,
  827. fqdn);
  828. return g_string_free(out, FALSE);
  829. }
  830. struct rspamd_mime_header *
  831. rspamd_message_get_header_from_hash(struct rspamd_mime_headers_table *hdrs,
  832. const gchar *field,
  833. gboolean need_modified)
  834. {
  835. if (hdrs == NULL) {
  836. return NULL;
  837. }
  838. khiter_t k;
  839. khash_t(rspamd_mime_headers_htb) *htb = &hdrs->htb;
  840. struct rspamd_mime_header *hdr;
  841. if (htb) {
  842. k = kh_get(rspamd_mime_headers_htb, htb, (gchar *) field);
  843. if (k == kh_end(htb)) {
  844. return NULL;
  845. }
  846. hdr = kh_value(htb, k);
  847. if (!need_modified) {
  848. if (hdr->flags & RSPAMD_HEADER_NON_EXISTING) {
  849. return NULL;
  850. }
  851. return hdr;
  852. }
  853. else {
  854. if (hdr->flags & RSPAMD_HEADER_MODIFIED) {
  855. return hdr->modified_chain;
  856. }
  857. return hdr;
  858. }
  859. }
  860. return NULL;
  861. }
  862. struct rspamd_mime_header *
  863. rspamd_message_get_header_array(struct rspamd_task *task, const gchar *field,
  864. gboolean need_modified)
  865. {
  866. return rspamd_message_get_header_from_hash(
  867. MESSAGE_FIELD_CHECK(task, raw_headers),
  868. field, need_modified);
  869. }
  870. gsize rspamd_mime_headers_count(struct rspamd_mime_headers_table *hdrs)
  871. {
  872. if (hdrs) {
  873. return kh_size(&hdrs->htb);
  874. }
  875. return 0;
  876. }
  877. bool rspamd_mime_headers_foreach(const struct rspamd_mime_headers_table *hdrs,
  878. rspamd_hdr_traverse_func_t func, void *ud)
  879. {
  880. const gchar *name;
  881. struct rspamd_mime_header *hdr;
  882. kh_foreach(&hdrs->htb, name, hdr, {
  883. if (!func(name, hdr, ud)) {
  884. return false;
  885. }
  886. });
  887. return true;
  888. }
  889. static void
  890. rspamd_message_headers_dtor(struct rspamd_mime_headers_table *hdrs)
  891. {
  892. if (hdrs) {
  893. kfree(hdrs->htb.keys);
  894. kfree(hdrs->htb.vals);
  895. kfree(hdrs->htb.flags);
  896. g_free(hdrs);
  897. }
  898. }
  899. struct rspamd_mime_headers_table *
  900. rspamd_message_headers_ref(struct rspamd_mime_headers_table *hdrs)
  901. {
  902. REF_RETAIN(hdrs);
  903. return hdrs;
  904. }
  905. void rspamd_message_headers_unref(struct rspamd_mime_headers_table *hdrs)
  906. {
  907. REF_RELEASE(hdrs);
  908. }
  909. struct rspamd_mime_headers_table *
  910. rspamd_message_headers_new(void)
  911. {
  912. struct rspamd_mime_headers_table *nhdrs;
  913. nhdrs = g_malloc0(sizeof(*nhdrs));
  914. REF_INIT_RETAIN(nhdrs, rspamd_message_headers_dtor);
  915. return nhdrs;
  916. }
  917. gsize rspamd_message_header_unfold_inplace(char *hdr, gsize len)
  918. {
  919. /*
  920. * t - tortoise (destination)
  921. * h - hare (source)
  922. */
  923. char *t = hdr, *h = hdr, *end = (hdr + len);
  924. enum {
  925. copy_chars,
  926. folding_cr,
  927. folding_lf,
  928. folding_ws,
  929. } state = copy_chars;
  930. while (h < end) {
  931. switch (state) {
  932. case copy_chars:
  933. if (*h == '\r') {
  934. state = folding_cr;
  935. h++;
  936. }
  937. else if (*h == '\n') {
  938. state = folding_lf;
  939. h++;
  940. }
  941. else {
  942. *t++ = *h++;
  943. }
  944. break;
  945. case folding_cr:
  946. if (*h == '\n') {
  947. state = folding_lf;
  948. h++;
  949. }
  950. else if (g_ascii_isspace(*h)) {
  951. state = folding_ws;
  952. h++;
  953. }
  954. else {
  955. /* It is weird, not like a folding, so we need to revert back */
  956. *t++ = '\r';
  957. state = copy_chars;
  958. }
  959. break;
  960. case folding_lf:
  961. if (g_ascii_isspace(*h)) {
  962. state = folding_ws;
  963. h++;
  964. }
  965. else {
  966. /* It is weird, not like a folding, so we need to revert back */
  967. *t++ = '\n';
  968. state = copy_chars;
  969. }
  970. break;
  971. case folding_ws:
  972. if (!g_ascii_isspace(*h)) {
  973. *t++ = ' ';
  974. state = copy_chars;
  975. }
  976. else {
  977. h++;
  978. }
  979. break;
  980. }
  981. }
  982. return t - hdr;
  983. }
  984. void rspamd_message_set_modified_header(struct rspamd_task *task,
  985. struct rspamd_mime_headers_table *hdrs,
  986. const gchar *hdr_name,
  987. const ucl_object_t *obj,
  988. struct rspamd_mime_header **order_ptr)
  989. {
  990. khiter_t k;
  991. khash_t(rspamd_mime_headers_htb) *htb = &hdrs->htb;
  992. struct rspamd_mime_header *hdr_elt, *existing_chain;
  993. int i;
  994. if (htb) {
  995. k = kh_get(rspamd_mime_headers_htb, htb, (gchar *) hdr_name);
  996. if (k == kh_end(htb)) {
  997. hdr_elt = rspamd_mempool_alloc0(task->task_pool, sizeof(*hdr_elt));
  998. hdr_elt->flags |= RSPAMD_HEADER_MODIFIED | RSPAMD_HEADER_NON_EXISTING;
  999. hdr_elt->name = rspamd_mempool_strdup(task->task_pool, hdr_name);
  1000. int r;
  1001. k = kh_put(rspamd_mime_headers_htb, htb, hdr_elt->name, &r);
  1002. kh_value(htb, k) = hdr_elt;
  1003. if (order_ptr) {
  1004. /*
  1005. * This iterates over all headers in O(N), but we have no other options here, as the
  1006. * list is already set.
  1007. */
  1008. LL_APPEND2(*order_ptr, hdr_elt, ord_next);
  1009. }
  1010. }
  1011. else {
  1012. hdr_elt = kh_value(htb, k);
  1013. }
  1014. }
  1015. else {
  1016. /* No hash, no modification */
  1017. msg_err_task("internal error: calling for set_modified_header for no headers");
  1018. return;
  1019. }
  1020. if (hdr_elt->flags & RSPAMD_HEADER_MODIFIED) {
  1021. existing_chain = hdr_elt->modified_chain;
  1022. }
  1023. else {
  1024. existing_chain = hdr_elt;
  1025. }
  1026. const ucl_object_t *elt, *cur;
  1027. ucl_object_iter_t it;
  1028. /* First, deal with removed headers, copying the relevant headers with remove flag */
  1029. elt = ucl_object_lookup(obj, "remove");
  1030. /*
  1031. * remove: {1, 2 ...}
  1032. * where number is the header's position starting from '1'
  1033. */
  1034. if (elt && ucl_object_type(elt) == UCL_ARRAY) {
  1035. /* First, use a temporary array to keep all headers */
  1036. GPtrArray *existing_ar = g_ptr_array_new();
  1037. struct rspamd_mime_header *cur_hdr;
  1038. /* Exclude removed headers */
  1039. LL_FOREACH(existing_chain, cur_hdr)
  1040. {
  1041. if (!(cur_hdr->flags & RSPAMD_HEADER_REMOVED)) {
  1042. g_ptr_array_add(existing_ar, cur_hdr);
  1043. }
  1044. }
  1045. it = NULL;
  1046. while ((cur = ucl_object_iterate(elt, &it, true)) != NULL) {
  1047. if (ucl_object_type(cur) == UCL_INT) {
  1048. int ord = ucl_object_toint(cur);
  1049. if (ord == 0) {
  1050. /* Remove all headers in the existing chain */
  1051. PTR_ARRAY_FOREACH(existing_ar, i, cur_hdr)
  1052. {
  1053. cur_hdr->flags |= RSPAMD_HEADER_MODIFIED | RSPAMD_HEADER_REMOVED;
  1054. }
  1055. }
  1056. else if (ord > 0) {
  1057. /* Start from the top */
  1058. if (ord <= existing_ar->len) {
  1059. cur_hdr = g_ptr_array_index(existing_ar, ord - 1);
  1060. cur_hdr->flags |= RSPAMD_HEADER_MODIFIED | RSPAMD_HEADER_REMOVED;
  1061. }
  1062. }
  1063. else {
  1064. /* Start from the bottom; ord < 0 */
  1065. if ((-ord) <= existing_ar->len) {
  1066. cur_hdr = g_ptr_array_index(existing_ar, existing_ar->len + ord);
  1067. cur_hdr->flags |= RSPAMD_HEADER_MODIFIED | RSPAMD_HEADER_REMOVED;
  1068. }
  1069. }
  1070. }
  1071. }
  1072. /*
  1073. * Next, we return all headers modified to the existing chain
  1074. * This implies an additional copy of all structures but is safe enough to
  1075. * deal with it
  1076. */
  1077. hdr_elt->flags |= RSPAMD_HEADER_MODIFIED;
  1078. hdr_elt->modified_chain = NULL;
  1079. PTR_ARRAY_FOREACH(existing_ar, i, cur_hdr)
  1080. {
  1081. if (!(cur_hdr->flags & RSPAMD_HEADER_REMOVED)) {
  1082. struct rspamd_mime_header *nhdr = rspamd_mempool_alloc(
  1083. task->task_pool, sizeof(*nhdr));
  1084. memcpy(nhdr, cur_hdr, sizeof(*nhdr));
  1085. nhdr->modified_chain = NULL;
  1086. nhdr->prev = NULL;
  1087. nhdr->next = NULL;
  1088. nhdr->ord_next = NULL;
  1089. DL_APPEND(hdr_elt->modified_chain, nhdr);
  1090. }
  1091. }
  1092. g_ptr_array_free(existing_ar, TRUE);
  1093. /* End of headers removal logic */
  1094. }
  1095. /* We can now deal with headers additions */
  1096. elt = ucl_object_lookup(obj, "add");
  1097. if (elt && ucl_object_type(elt) == UCL_ARRAY) {
  1098. if (!(hdr_elt->flags & RSPAMD_HEADER_MODIFIED)) {
  1099. /* Copy the header itself to the modified chain */
  1100. struct rspamd_mime_header *nhdr;
  1101. hdr_elt->flags |= RSPAMD_HEADER_MODIFIED;
  1102. nhdr = rspamd_mempool_alloc(
  1103. task->task_pool, sizeof(*nhdr));
  1104. memcpy(nhdr, hdr_elt, sizeof(*hdr_elt));
  1105. nhdr->modified_chain = NULL;
  1106. nhdr->next = NULL;
  1107. nhdr->ord_next = NULL;
  1108. nhdr->prev = nhdr;
  1109. hdr_elt->modified_chain = nhdr;
  1110. }
  1111. /*
  1112. * add: {{1, "foo"}, {-1, "bar"} ...}
  1113. * where number is the header's position starting from '1'
  1114. */
  1115. it = NULL;
  1116. while ((cur = ucl_object_iterate(elt, &it, true)) != NULL) {
  1117. if (ucl_object_type(cur) == UCL_ARRAY) {
  1118. const ucl_object_t *order = ucl_array_find_index(cur, 0),
  1119. *value = ucl_array_find_index(cur, 1);
  1120. if (order && value &&
  1121. (ucl_object_type(order) == UCL_INT &&
  1122. ucl_object_type(value) == UCL_STRING)) {
  1123. int ord = ucl_object_toint(order);
  1124. const char *raw_value;
  1125. gsize raw_len;
  1126. raw_value = ucl_object_tolstring(value, &raw_len);
  1127. if (raw_len == 0) {
  1128. continue;
  1129. }
  1130. struct rspamd_mime_header *nhdr = rspamd_mempool_alloc0(
  1131. task->task_pool, sizeof(*nhdr));
  1132. nhdr->flags |= RSPAMD_HEADER_ADDED;
  1133. nhdr->name = hdr_elt->name;
  1134. nhdr->value = rspamd_mempool_alloc(task->task_pool,
  1135. raw_len + 1);
  1136. /* Strlcpy will ensure that value will have no embedded \0 */
  1137. rspamd_strlcpy(nhdr->value, raw_value, raw_len + 1);
  1138. gsize value_len = rspamd_message_header_unfold_inplace(nhdr->value, raw_len);
  1139. nhdr->value[value_len] = '\0';
  1140. /* Deal with the raw value */
  1141. size_t namelen = strlen(hdr_elt->name);
  1142. char *rawbuf = rspamd_mempool_alloc(task->task_pool, namelen +
  1143. raw_len +
  1144. sizeof(": \r\n"));
  1145. /* Name: value<newline> */
  1146. nhdr->raw_value = rawbuf;
  1147. memcpy(rawbuf, hdr_elt->name, namelen);
  1148. rawbuf += namelen;
  1149. memcpy(rawbuf, ": ", sizeof(": ") - 1);
  1150. nhdr->separator = rspamd_mempool_strdup(task->task_pool, " ");
  1151. rawbuf += sizeof(": ") - 1;
  1152. memcpy(rawbuf, raw_value, raw_len);
  1153. nhdr->raw_len = raw_len;
  1154. if (MESSAGE_FIELD(task, nlines_type) == RSPAMD_TASK_NEWLINES_LF) {
  1155. rawbuf[raw_len++] = '\n';
  1156. }
  1157. else {
  1158. rawbuf[raw_len++] = '\r';
  1159. if (MESSAGE_FIELD(task, nlines_type) == RSPAMD_TASK_NEWLINES_CRLF) {
  1160. rawbuf[raw_len++] = '\n';
  1161. }
  1162. }
  1163. rawbuf[raw_len] = '\0';
  1164. nhdr->decoded = rspamd_mime_header_decode(task->task_pool,
  1165. raw_value, nhdr->raw_len,
  1166. NULL);
  1167. /* Now find a position to insert a value */
  1168. struct rspamd_mime_header **pos = &hdr_elt->modified_chain;
  1169. if (ord == 0) {
  1170. DL_PREPEND(hdr_elt->modified_chain, nhdr);
  1171. }
  1172. else if (ord == -1) {
  1173. DL_APPEND(hdr_elt->modified_chain, nhdr);
  1174. }
  1175. else if (ord > 0) {
  1176. while (ord > 0 && (*pos)) {
  1177. ord--;
  1178. pos = &((*pos)->next);
  1179. }
  1180. if (*pos) {
  1181. /* pos is &(elt)->next */
  1182. nhdr->next = (*pos);
  1183. nhdr->prev = (*pos)->prev;
  1184. (*pos)->prev = nhdr;
  1185. *pos = nhdr;
  1186. }
  1187. else {
  1188. /* Last element */
  1189. DL_APPEND(*pos, nhdr);
  1190. }
  1191. }
  1192. else {
  1193. /* NYI: negative order is not defined */
  1194. msg_err_task("internal error: calling for set_modified_header "
  1195. "with negative add order header");
  1196. }
  1197. }
  1198. else {
  1199. msg_err_task("internal error: calling for set_modified_header "
  1200. "with invalid header");
  1201. }
  1202. }
  1203. }
  1204. }
  1205. }
  1206. gsize rspamd_strip_smtp_comments_inplace(gchar *input, gsize len)
  1207. {
  1208. enum parser_state {
  1209. parse_normal,
  1210. parse_obrace,
  1211. parse_comment,
  1212. parse_quoted_copy,
  1213. parse_quoted_ignore,
  1214. } state = parse_normal,
  1215. next_state = parse_normal;
  1216. gchar *d = input, *end = input + len, *start = input;
  1217. gchar t;
  1218. int obraces = 0, ebraces = 0;
  1219. while (input < end) {
  1220. t = *input;
  1221. switch (state) {
  1222. case parse_normal:
  1223. if (t == '(') {
  1224. state = parse_obrace;
  1225. }
  1226. else if (t == '\\') {
  1227. state = parse_quoted_copy;
  1228. next_state = parse_normal;
  1229. }
  1230. else {
  1231. *d++ = t;
  1232. }
  1233. input++;
  1234. break;
  1235. case parse_obrace:
  1236. obraces++;
  1237. if (t == '(') {
  1238. obraces++;
  1239. }
  1240. else if (t == ')') {
  1241. ebraces++;
  1242. if (obraces == ebraces) {
  1243. obraces = 0;
  1244. ebraces = 0;
  1245. state = parse_normal;
  1246. }
  1247. }
  1248. else if (t == '\\') {
  1249. state = parse_quoted_ignore;
  1250. next_state = parse_comment;
  1251. }
  1252. else {
  1253. state = parse_comment;
  1254. }
  1255. input++;
  1256. break;
  1257. case parse_comment:
  1258. if (t == '(') {
  1259. state = parse_obrace;
  1260. }
  1261. else if (t == ')') {
  1262. ebraces++;
  1263. if (obraces == ebraces) {
  1264. obraces = 0;
  1265. ebraces = 0;
  1266. state = parse_normal;
  1267. }
  1268. }
  1269. else if (t == '\\') {
  1270. state = parse_quoted_ignore;
  1271. next_state = parse_comment;
  1272. }
  1273. input++;
  1274. break;
  1275. case parse_quoted_copy:
  1276. *d++ = t;
  1277. state = next_state;
  1278. input++;
  1279. break;
  1280. case parse_quoted_ignore:
  1281. state = next_state;
  1282. input++;
  1283. break;
  1284. }
  1285. }
  1286. return (d - start);
  1287. }