You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

message.c 43KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593
  1. /*
  2. * Copyright (c) 2009, Rambler media
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are met:
  7. * * Redistributions of source code must retain the above copyright
  8. * notice, this list of conditions and the following disclaimer.
  9. * * Redistributions in binary form must reproduce the above copyright
  10. * notice, this list of conditions and the following disclaimer in the
  11. * documentation and/or other materials provided with the distribution.
  12. *
  13. * THIS SOFTWARE IS PROVIDED BY Rambler media ''AS IS'' AND ANY
  14. * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  15. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  16. * DISCLAIMED. IN NO EVENT SHALL Rambler BE LIABLE FOR ANY
  17. * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  18. * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  19. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  20. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  21. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  22. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  23. */
  24. #include "config.h"
  25. #include "util.h"
  26. #include "main.h"
  27. #include "message.h"
  28. #include "cfg_file.h"
  29. #include "html.h"
  30. #include "modules.h"
  31. #include "images.h"
  32. #define RECURSION_LIMIT 30
  33. #define UTF8_CHARSET "UTF-8"
  34. GByteArray *
  35. strip_html_tags (struct worker_task *task, memory_pool_t * pool, struct mime_text_part *part, GByteArray * src, gint *stateptr)
  36. {
  37. uint8_t *tbuf = NULL, *p, *tp = NULL, *rp, *tbegin = NULL, c, lc;
  38. gint br, i = 0, depth = 0, in_q = 0;
  39. gint state = 0;
  40. GByteArray *buf;
  41. GNode *level_ptr = NULL;
  42. gboolean erase = FALSE;
  43. if (stateptr)
  44. state = *stateptr;
  45. buf = g_byte_array_sized_new (src->len);
  46. g_byte_array_append (buf, src->data, src->len);
  47. c = *src->data;
  48. lc = '\0';
  49. p = src->data;
  50. rp = buf->data;
  51. br = 0;
  52. while (i < src->len) {
  53. switch (c) {
  54. case '\0':
  55. break;
  56. case '<':
  57. if (g_ascii_isspace (*(p + 1))) {
  58. goto reg_char;
  59. }
  60. if (state == 0) {
  61. lc = '<';
  62. tbegin = p + 1;
  63. state = 1;
  64. }
  65. else if (state == 1) {
  66. depth++;
  67. }
  68. break;
  69. case '(':
  70. if (state == 2) {
  71. if (lc != '"' && lc != '\'') {
  72. lc = '(';
  73. br++;
  74. }
  75. }
  76. else if (state == 0 && !erase) {
  77. *(rp++) = c;
  78. }
  79. break;
  80. case ')':
  81. if (state == 2) {
  82. if (lc != '"' && lc != '\'') {
  83. lc = ')';
  84. br--;
  85. }
  86. }
  87. else if (state == 0 && !erase) {
  88. *(rp++) = c;
  89. }
  90. break;
  91. case '>':
  92. if (depth) {
  93. depth--;
  94. break;
  95. }
  96. if (in_q) {
  97. break;
  98. }
  99. switch (state) {
  100. case 1: /* HTML/XML */
  101. lc = '>';
  102. in_q = state = 0;
  103. erase = !add_html_node (task, pool, part, tbegin, p - tbegin, &level_ptr);
  104. break;
  105. case 2: /* PHP */
  106. if (!br && lc != '\"' && *(p - 1) == '?') {
  107. in_q = state = 0;
  108. tp = tbuf;
  109. }
  110. break;
  111. case 3:
  112. in_q = state = 0;
  113. tp = tbuf;
  114. break;
  115. case 4: /* JavaScript/CSS/etc... */
  116. if (p >= src->data + 2 && *(p - 1) == '-' && *(p - 2) == '-') {
  117. in_q = state = 0;
  118. tp = tbuf;
  119. }
  120. break;
  121. default:
  122. if (!erase) {
  123. *(rp++) = c;
  124. }
  125. break;
  126. }
  127. break;
  128. case '"':
  129. case '\'':
  130. if (state == 2 && *(p - 1) != '\\') {
  131. if (lc == c) {
  132. lc = '\0';
  133. }
  134. else if (lc != '\\') {
  135. lc = c;
  136. }
  137. }
  138. else if (state == 0 && !erase) {
  139. *(rp++) = c;
  140. }
  141. if (state && p != src->data && *(p - 1) != '\\' && (!in_q || *p == in_q)) {
  142. if (in_q) {
  143. in_q = 0;
  144. }
  145. else {
  146. in_q = *p;
  147. }
  148. }
  149. break;
  150. case '!':
  151. /* JavaScript & Other HTML scripting languages */
  152. if (state == 1 && *(p - 1) == '<') {
  153. state = 3;
  154. lc = c;
  155. }
  156. else {
  157. if (state == 0 && !erase) {
  158. *(rp++) = c;
  159. }
  160. }
  161. break;
  162. case '-':
  163. if (state == 3 && p >= src->data + 2 && *(p - 1) == '-' && *(p - 2) == '!') {
  164. state = 4;
  165. }
  166. else {
  167. goto reg_char;
  168. }
  169. break;
  170. case '?':
  171. if (state == 1 && *(p - 1) == '<') {
  172. br = 0;
  173. state = 2;
  174. break;
  175. }
  176. case 'E':
  177. case 'e':
  178. /* !DOCTYPE exception */
  179. if (state == 3 && p > src->data + 6
  180. && g_ascii_tolower (*(p - 1)) == 'p'
  181. && g_ascii_tolower (*(p - 2)) == 'y'
  182. && g_ascii_tolower (*(p - 3)) == 't' && g_ascii_tolower (*(p - 4)) == 'c' && g_ascii_tolower (*(p - 5)) == 'o' && g_ascii_tolower (*(p - 6)) == 'd') {
  183. state = 1;
  184. break;
  185. }
  186. /* fall-through */
  187. case 'l':
  188. /* swm: If we encounter '<?xml' then we shouldn't be in
  189. * state == 2 (PHP). Switch back to HTML.
  190. */
  191. if (state == 2 && p > src->data + 2 && *(p - 1) == 'm' && *(p - 2) == 'x') {
  192. state = 1;
  193. break;
  194. }
  195. /* fall-through */
  196. default:
  197. reg_char:
  198. if (state == 0 && !erase) {
  199. *(rp++) = c;
  200. }
  201. break;
  202. }
  203. i++;
  204. if (i < src->len) {
  205. c = *(++p);
  206. }
  207. }
  208. if (rp < buf->data + src->len) {
  209. *rp = '\0';
  210. g_byte_array_set_size (buf, rp - buf->data);
  211. }
  212. /* Check tag balancing */
  213. if (level_ptr && level_ptr->data != NULL) {
  214. part->is_balanced = FALSE;
  215. }
  216. if (stateptr) {
  217. *stateptr = state;
  218. }
  219. return buf;
  220. }
  221. static void
  222. parse_qmail_recv (memory_pool_t * pool, gchar *line, struct received_header *r)
  223. {
  224. gchar *s, *p, t;
  225. /* We are intersted only with received from network headers */
  226. if ((p = strstr (line, "from network")) == NULL) {
  227. r->is_error = 2;
  228. return;
  229. }
  230. p += sizeof ("from network") - 1;
  231. while (g_ascii_isspace (*p) || *p == '[') {
  232. p++;
  233. }
  234. /* format is ip/host */
  235. s = p;
  236. if (*p) {
  237. while (g_ascii_isdigit (*++p) || *p == '.');
  238. if (*p != '/') {
  239. r->is_error = 1;
  240. return;
  241. }
  242. else {
  243. *p = '\0';
  244. r->real_ip = memory_pool_strdup (pool, s);
  245. *p = '/';
  246. /* Now try to parse hostname */
  247. s = ++p;
  248. while (g_ascii_isalnum (*p) || *p == '.' || *p == '-' || *p == '_') {
  249. p++;
  250. }
  251. t = *p;
  252. *p = '\0';
  253. r->real_hostname = memory_pool_strdup (pool, s);
  254. *p = t;
  255. }
  256. }
  257. }
  258. static void
  259. parse_recv_header (memory_pool_t * pool, gchar *line, struct received_header *r)
  260. {
  261. gchar *p, *s, t, **res = NULL;
  262. gint state = 0, next_state = 0;
  263. g_strstrip (line);
  264. p = line;
  265. s = line;
  266. while (*p) {
  267. switch (state) {
  268. /* Initial state, search for from */
  269. case 0:
  270. if (*p == 'f' || *p == 'F') {
  271. if (g_ascii_tolower (*++p) == 'r' && g_ascii_tolower (*++p) == 'o' && g_ascii_tolower (*++p) == 'm') {
  272. p++;
  273. state = 99;
  274. next_state = 1;
  275. }
  276. }
  277. else if (g_ascii_tolower (*p) == 'b' && g_ascii_tolower (*(p + 1)) == 'y') {
  278. state = 3;
  279. }
  280. else {
  281. /* This can be qmail header, parse it separately */
  282. parse_qmail_recv (pool, line, r);
  283. return;
  284. }
  285. break;
  286. /* Read hostname */
  287. case 1:
  288. if (*p == '[') {
  289. /* This should be IP address */
  290. res = &r->from_ip;
  291. state = 98;
  292. next_state = 3;
  293. s = ++p;
  294. }
  295. else if (g_ascii_isalnum (*p) || *p == '.' || *p == '-' || *p == '_') {
  296. p++;
  297. }
  298. else {
  299. t = *p;
  300. *p = '\0';
  301. r->from_hostname = memory_pool_strdup (pool, s);
  302. *p = t;
  303. state = 99;
  304. next_state = 3;
  305. }
  306. break;
  307. /* Try to extract additional info */
  308. case 3:
  309. /* Try to extract ip or () info or by */
  310. if (g_ascii_tolower (*p) == 'b' && g_ascii_tolower (*(p + 1)) == 'y') {
  311. p += 2;
  312. /* Skip spaces after by */
  313. state = 99;
  314. next_state = 5;
  315. }
  316. else if (*p == '(') {
  317. state = 99;
  318. next_state = 4;
  319. p++;
  320. }
  321. else if (*p == '[') {
  322. /* Got ip before '(' so extract it */
  323. s = ++p;
  324. res = &r->from_ip;
  325. state = 98;
  326. next_state = 3;
  327. }
  328. else {
  329. p++;
  330. }
  331. break;
  332. /* We are in () block. Here can be found real hostname and real ip, this is written by some MTA */
  333. case 4:
  334. /* End of block */
  335. if (*p == ')') {
  336. p++;
  337. state = 3;
  338. }
  339. else if (g_ascii_isalnum (*p) || *p == '.' || *p == '-' || *p == '_') {
  340. p++;
  341. }
  342. else if (*p == '[') {
  343. s = ++p;
  344. state = 98;
  345. res = &r->real_ip;
  346. next_state = 3;
  347. }
  348. else {
  349. if (s != p) {
  350. /* Got some real hostname */
  351. /* check whether it is helo or p is not space symbol */
  352. if (!g_ascii_isspace (*p) || *(p + 1) != '[') {
  353. /* skip all */
  354. while (*p++ != ')' && *p != '\0');
  355. state = 3;
  356. }
  357. else {
  358. t = *p;
  359. *p = '\0';
  360. r->real_hostname = memory_pool_strdup (pool, s);
  361. *p = t;
  362. /* Now parse ip */
  363. p += 2;
  364. s = p;
  365. res = &r->real_ip;
  366. state = 98;
  367. next_state = 4;
  368. }
  369. }
  370. else {
  371. r->is_error = 1;
  372. return;
  373. }
  374. }
  375. break;
  376. /* Got by word */
  377. case 5:
  378. /* Here can be only hostname */
  379. if (g_ascii_isalnum (*p) || *p == '.' || *p == '-' || *p == '_') {
  380. p++;
  381. }
  382. else {
  383. /* We got something like hostname */
  384. t = *p;
  385. *p = '\0';
  386. r->by_hostname = memory_pool_strdup (pool, s);
  387. *p = t;
  388. /* Now end of parsing */
  389. return;
  390. }
  391. break;
  392. /* Extract ip */
  393. case 98:
  394. while (g_ascii_isdigit (*++p) || *p == '.');
  395. if (*p != ']') {
  396. /* Not an ip in fact */
  397. state = next_state;
  398. p++;
  399. }
  400. else {
  401. *p = '\0';
  402. *res = memory_pool_strdup (pool, s);
  403. *p = ']';
  404. p++;
  405. state = next_state;
  406. }
  407. break;
  408. /* Skip spaces */
  409. case 99:
  410. if (!g_ascii_isspace (*p)) {
  411. state = next_state;
  412. s = p;
  413. }
  414. else {
  415. p++;
  416. }
  417. break;
  418. case 100:
  419. r->is_error = 1;
  420. return;
  421. break;
  422. }
  423. }
  424. r->is_error = 1;
  425. return;
  426. }
  427. /* Convert raw headers to a list of struct raw_header * */
  428. static void
  429. process_raw_headers (struct worker_task *task)
  430. {
  431. struct raw_header *new;
  432. gchar *p, *c, *tmp, *tp;
  433. gint state = 0, l, next_state, err_state, t_state;
  434. gboolean valid_folding = FALSE;
  435. p = task->raw_headers;
  436. c = p;
  437. while (*p) {
  438. /* FSM for processing headers */
  439. switch (state) {
  440. case 0:
  441. /* Begin processing headers */
  442. if (!g_ascii_isalpha (*p)) {
  443. /* We have some garbadge at the beginning of headers, skip this line */
  444. state = 100;
  445. next_state = 0;
  446. }
  447. else {
  448. state = 1;
  449. c = p;
  450. }
  451. break;
  452. case 1:
  453. /* We got something like header's name */
  454. if (*p == ':') {
  455. new = memory_pool_alloc0 (task->task_pool, sizeof (struct raw_header));
  456. l = p - c;
  457. tmp = memory_pool_alloc (task->task_pool, l + 1);
  458. rspamd_strlcpy (tmp, c, l + 1);
  459. new->name = tmp;
  460. p ++;
  461. state = 2;
  462. }
  463. else if (g_ascii_isspace (*p)) {
  464. /* Not header but some garbadge */
  465. state = 100;
  466. next_state = 0;
  467. }
  468. else {
  469. p ++;
  470. }
  471. break;
  472. case 2:
  473. /* We got header's name, so skip any \t or spaces */
  474. if (*p == '\t') {
  475. new->tab_separated = TRUE;
  476. }
  477. else if (*p == ' '){
  478. p ++;
  479. }
  480. else if (*p == '\n' || *p == '\r') {
  481. /* Process folding */
  482. state = 99;
  483. next_state = 3;
  484. err_state = 5;
  485. c = p;
  486. }
  487. else {
  488. /* Process value */
  489. c = p;
  490. state = 3;
  491. }
  492. break;
  493. case 3:
  494. if (*p == '\r' || *p == '\n') {
  495. /* Hold folding */
  496. state = 99;
  497. next_state = 3;
  498. err_state = 4;
  499. }
  500. else {
  501. p ++;
  502. }
  503. break;
  504. case 4:
  505. /* Copy header's value */
  506. l = p - c;
  507. tmp = memory_pool_alloc (task->task_pool, l);
  508. tp = tmp;
  509. t_state = 0;
  510. while (l --) {
  511. if (t_state == 0) {
  512. /* Before folding */
  513. if (*c == '\n' || *c == '\r') {
  514. t_state = 1;
  515. }
  516. else {
  517. *tp ++ = *c ++;
  518. }
  519. }
  520. else if (t_state == 1) {
  521. /* Inside folding */
  522. if (g_ascii_isspace (*c)) {
  523. c++;
  524. }
  525. else {
  526. t_state = 0;
  527. *tp ++ = *c ++;
  528. }
  529. }
  530. }
  531. *tp = '\0';
  532. new->value = tmp;
  533. task->raw_headers_list = g_list_prepend (task->raw_headers_list, new);
  534. debug_task ("add raw header %s: %s", new->name, new->value);
  535. state = 0;
  536. break;
  537. case 5:
  538. /* Header has only name, no value */
  539. task->raw_headers_list = g_list_prepend (task->raw_headers_list, new);
  540. state = 0;
  541. debug_task ("add raw header %s: %s", new->name, new->value);
  542. break;
  543. case 99:
  544. /* Folding state */
  545. if (*p == '\r' || *p == '\n') {
  546. p ++;
  547. valid_folding = FALSE;
  548. }
  549. else if (*p == '\t' || *p == ' ') {
  550. /* Valid folding */
  551. p ++;
  552. valid_folding = TRUE;
  553. }
  554. else {
  555. if (valid_folding) {
  556. debug_task ("go to state: %d->%d", state, next_state);
  557. state = next_state;
  558. }
  559. else {
  560. /* Fall back */
  561. debug_task ("go to state: %d->%d", state, err_state);
  562. state = err_state;
  563. }
  564. }
  565. break;
  566. case 100:
  567. /* Fail state, skip line */
  568. if (*p == '\r') {
  569. if (*(p + 1) == '\n') {
  570. p ++;
  571. }
  572. p ++;
  573. state = next_state;
  574. }
  575. else if (*p == '\n') {
  576. if (*(p + 1) == '\r') {
  577. p ++;
  578. }
  579. p ++;
  580. state = next_state;
  581. }
  582. else {
  583. p ++;
  584. }
  585. break;
  586. }
  587. }
  588. }
  589. static void
  590. free_byte_array_callback (void *pointer)
  591. {
  592. GByteArray *arr = (GByteArray *) pointer;
  593. g_byte_array_free (arr, TRUE);
  594. }
  595. static void
  596. detect_real_charset (struct worker_task *task, GByteArray * part_content, struct mime_text_part *text_part)
  597. {
  598. /* First of all try to detect UTF symbols */
  599. text_part->is_utf = FALSE;
  600. /* At first decision try to validate a single character */
  601. if (g_utf8_get_char_validated (part_content->data, part_content->len) != -1) {
  602. /* Now validate the whole part */
  603. if (g_utf8_validate (part_content->data, part_content->len, NULL)) {
  604. text_part->is_utf = TRUE;
  605. text_part->real_charset = UTF8_CHARSET;
  606. return;
  607. }
  608. }
  609. /* Now try to detect specific symbols from some charsets */
  610. }
  611. static GByteArray *
  612. convert_text_to_utf (struct worker_task *task, GByteArray * part_content, GMimeContentType * type, struct mime_text_part *text_part)
  613. {
  614. GError *err = NULL;
  615. gsize read_bytes, write_bytes;
  616. const gchar *charset;
  617. gchar *res_str;
  618. GByteArray *result_array;
  619. if (task->cfg->raw_mode) {
  620. text_part->is_raw = TRUE;
  621. return part_content;
  622. }
  623. if ((charset = g_mime_content_type_get_parameter (type, "charset")) == NULL) {
  624. text_part->is_raw = TRUE;
  625. return part_content;
  626. }
  627. if (g_ascii_strcasecmp (charset, "utf-8") == 0 || g_ascii_strcasecmp (charset, "utf8") == 0) {
  628. text_part->is_raw = FALSE;
  629. return part_content;
  630. }
  631. res_str = g_convert_with_fallback (part_content->data, part_content->len, UTF8_CHARSET, charset, NULL, &read_bytes, &write_bytes, &err);
  632. if (res_str == NULL) {
  633. msg_warn ("cannot convert from %s to utf8: %s", charset, err ? err->message : "unknown problem");
  634. text_part->is_raw = TRUE;
  635. return part_content;
  636. }
  637. result_array = memory_pool_alloc (task->task_pool, sizeof (GByteArray));
  638. result_array->data = res_str;
  639. result_array->len = write_bytes;
  640. memory_pool_add_destructor (task->task_pool, (pool_destruct_func) g_free, res_str);
  641. text_part->is_raw = FALSE;
  642. return result_array;
  643. }
  644. static void
  645. process_text_part (struct worker_task *task, GByteArray *part_content, GMimeContentType *type,
  646. GMimeObject *part, GMimeObject *parent, gboolean is_empty)
  647. {
  648. struct mime_text_part *text_part;
  649. const gchar *cd;
  650. /* Skip attachements */
  651. #ifndef GMIME24
  652. cd = g_mime_part_get_content_disposition (GMIME_PART (part));
  653. if (cd && g_ascii_strcasecmp (cd, "attachment") == 0 && !task->cfg->check_text_attachements) {
  654. debug_task ("skip attachments for checking as text parts");
  655. return;
  656. }
  657. #else
  658. cd = g_mime_object_get_disposition (GMIME_OBJECT (part));
  659. if (cd && g_ascii_strcasecmp (cd, GMIME_DISPOSITION_ATTACHMENT) == 0 && !task->cfg->check_text_attachements) {
  660. debug_task ("skip attachments for checking as text parts");
  661. return;
  662. }
  663. #endif
  664. if (g_mime_content_type_is_type (type, "text", "html") || g_mime_content_type_is_type (type, "text", "xhtml")) {
  665. debug_task ("got urls from text/html part");
  666. text_part = memory_pool_alloc0 (task->task_pool, sizeof (struct mime_text_part));
  667. text_part->is_html = TRUE;
  668. if (is_empty) {
  669. text_part->is_empty = TRUE;
  670. text_part->orig = NULL;
  671. text_part->content = NULL;
  672. task->text_parts = g_list_prepend (task->text_parts, text_part);
  673. return;
  674. }
  675. text_part->orig = convert_text_to_utf (task, part_content, type, text_part);
  676. text_part->is_balanced = TRUE;
  677. text_part->html_nodes = NULL;
  678. text_part->parent = parent;
  679. text_part->html_urls = g_tree_new ((GCompareFunc) g_ascii_strcasecmp);
  680. text_part->urls = g_tree_new ((GCompareFunc) g_ascii_strcasecmp);
  681. text_part->content = strip_html_tags (task, task->task_pool, text_part, text_part->orig, NULL);
  682. if (text_part->html_nodes == NULL) {
  683. url_parse_text (task->task_pool, task, text_part, FALSE);
  684. }
  685. else {
  686. decode_entitles (text_part->content->data, &text_part->content->len);
  687. url_parse_text (task->task_pool, task, text_part, FALSE);
  688. #if 0
  689. url_parse_text (task->task_pool, task, text_part, TRUE);
  690. #endif
  691. }
  692. text_part->fuzzy = fuzzy_init_byte_array (text_part->content, task->task_pool);
  693. memory_pool_add_destructor (task->task_pool, (pool_destruct_func) free_byte_array_callback, text_part->content);
  694. memory_pool_add_destructor (task->task_pool, (pool_destruct_func) g_tree_destroy, text_part->html_urls);
  695. memory_pool_add_destructor (task->task_pool, (pool_destruct_func) g_tree_destroy, text_part->urls);
  696. task->text_parts = g_list_prepend (task->text_parts, text_part);
  697. }
  698. else if (g_mime_content_type_is_type (type, "text", "*")) {
  699. debug_task ("got urls from text/plain part");
  700. text_part = memory_pool_alloc0 (task->task_pool, sizeof (struct mime_text_part));
  701. text_part->is_html = FALSE;
  702. text_part->parent = parent;
  703. if (is_empty) {
  704. text_part->is_empty = TRUE;
  705. text_part->orig = NULL;
  706. text_part->content = NULL;
  707. task->text_parts = g_list_prepend (task->text_parts, text_part);
  708. return;
  709. }
  710. text_part->orig = convert_text_to_utf (task, part_content, type, text_part);
  711. text_part->content = text_part->orig;
  712. text_part->fuzzy = fuzzy_init_byte_array (text_part->content, task->task_pool);
  713. text_part->html_urls = NULL;
  714. text_part->urls = g_tree_new ((GCompareFunc) g_ascii_strcasecmp);
  715. url_parse_text (task->task_pool, task, text_part, FALSE);
  716. task->text_parts = g_list_prepend (task->text_parts, text_part);
  717. memory_pool_add_destructor (task->task_pool, (pool_destruct_func) g_tree_destroy, text_part->urls);
  718. }
  719. }
  720. #ifdef GMIME24
  721. static void
  722. mime_foreach_callback (GMimeObject * parent, GMimeObject * part, gpointer user_data)
  723. #else
  724. static void
  725. mime_foreach_callback (GMimeObject * part, gpointer user_data)
  726. #endif
  727. {
  728. struct worker_task *task = (struct worker_task *)user_data;
  729. struct mime_part *mime_part;
  730. GMimeContentType *type;
  731. GMimeDataWrapper *wrapper;
  732. GMimeStream *part_stream;
  733. GByteArray *part_content;
  734. task->parts_count++;
  735. /* 'part' points to the current part node that g_mime_message_foreach_part() is iterating over */
  736. /* find out what class 'part' is... */
  737. if (GMIME_IS_MESSAGE_PART (part)) {
  738. /* message/rfc822 or message/news */
  739. GMimeMessage *message;
  740. /* g_mime_message_foreach_part() won't descend into
  741. child message parts, so if we want to count any
  742. subparts of this child message, we'll have to call
  743. g_mime_message_foreach_part() again here. */
  744. message = g_mime_message_part_get_message ((GMimeMessagePart *) part);
  745. if (task->parser_recursion++ < RECURSION_LIMIT) {
  746. #ifdef GMIME24
  747. g_mime_message_foreach (message, mime_foreach_callback, task);
  748. #else
  749. g_mime_message_foreach_part (message, mime_foreach_callback, task);
  750. #endif
  751. }
  752. else {
  753. msg_err ("endless recursion detected: %d", task->parser_recursion);
  754. return;
  755. }
  756. g_object_unref (message);
  757. }
  758. else if (GMIME_IS_MESSAGE_PARTIAL (part)) {
  759. /* message/partial */
  760. /* this is an incomplete message part, probably a
  761. large message that the sender has broken into
  762. smaller parts and is sending us bit by bit. we
  763. could save some info about it so that we could
  764. piece this back together again once we get all the
  765. parts? */
  766. }
  767. else if (GMIME_IS_MULTIPART (part)) {
  768. /* multipart/mixed, multipart/alternative, multipart/related, multipart/signed, multipart/encrypted, etc... */
  769. task->parser_parent_part = part;
  770. #ifndef GMIME24
  771. debug_task ("detected multipart part");
  772. /* we'll get to finding out if this is a signed/encrypted multipart later... */
  773. if (task->parser_recursion++ < RECURSION_LIMIT) {
  774. g_mime_multipart_foreach ((GMimeMultipart *) part, mime_foreach_callback, task);
  775. }
  776. else {
  777. msg_err ("endless recursion detected: %d", task->parser_recursion);
  778. return;
  779. }
  780. #endif
  781. }
  782. else if (GMIME_IS_PART (part)) {
  783. /* a normal leaf part, could be text/plain or image/jpeg etc */
  784. #ifdef GMIME24
  785. type = (GMimeContentType *) g_mime_object_get_content_type (GMIME_OBJECT (part));
  786. #else
  787. type = (GMimeContentType *) g_mime_part_get_content_type (GMIME_PART (part));
  788. #endif
  789. if (type == NULL) {
  790. msg_warn ("type of part is unknown, assume text/plain");
  791. type = g_mime_content_type_new ("text", "plain");
  792. #ifdef GMIME24
  793. memory_pool_add_destructor (task->task_pool, (pool_destruct_func) g_object_unref, type);
  794. #else
  795. memory_pool_add_destructor (task->task_pool, (pool_destruct_func) g_mime_content_type_destroy, type);
  796. #endif
  797. }
  798. wrapper = g_mime_part_get_content_object (GMIME_PART (part));
  799. #ifdef GMIME24
  800. if (wrapper != NULL && GMIME_IS_DATA_WRAPPER (wrapper)) {
  801. #else
  802. if (wrapper != NULL) {
  803. #endif
  804. part_stream = g_mime_stream_mem_new ();
  805. if (g_mime_data_wrapper_write_to_stream (wrapper, part_stream) != -1) {
  806. g_mime_stream_mem_set_owner (GMIME_STREAM_MEM (part_stream), FALSE);
  807. part_content = g_mime_stream_mem_get_byte_array (GMIME_STREAM_MEM (part_stream));
  808. g_object_unref (part_stream);
  809. mime_part = memory_pool_alloc (task->task_pool, sizeof (struct mime_part));
  810. mime_part->type = type;
  811. mime_part->content = part_content;
  812. mime_part->parent = task->parser_parent_part;
  813. mime_part->filename = g_mime_part_get_filename (GMIME_PART (part));
  814. debug_task ("found part with content-type: %s/%s", type->type, type->subtype);
  815. task->parts = g_list_prepend (task->parts, mime_part);
  816. /* Skip empty parts */
  817. process_text_part (task, part_content, type, part, task->parser_parent_part, (part_content->len <= 0));
  818. }
  819. else {
  820. msg_warn ("write to stream failed: %d, %s", errno, strerror (errno));
  821. }
  822. #ifndef GMIME24
  823. g_object_unref (wrapper);
  824. #endif
  825. }
  826. else {
  827. msg_warn ("cannot get wrapper for mime part, type of part: %s/%s", type->type, type->subtype);
  828. }
  829. }
  830. else {
  831. g_assert_not_reached ();
  832. }
  833. }
  834. static void
  835. destroy_message (void *pointer)
  836. {
  837. GMimeMessage *msg = pointer;
  838. msg_debug ("freeing pointer %p", msg);
  839. g_object_unref (msg);
  840. }
  841. gint
  842. process_message (struct worker_task *task)
  843. {
  844. GMimeMessage *message;
  845. GMimeParser *parser;
  846. GMimeStream *stream;
  847. GByteArray *tmp;
  848. GList *first, *cur;
  849. GMimePart *part;
  850. GMimeDataWrapper *wrapper;
  851. struct received_header *recv;
  852. gchar *mid;
  853. tmp = memory_pool_alloc (task->task_pool, sizeof (GByteArray));
  854. tmp->data = task->msg->begin;
  855. tmp->len = task->msg->len;
  856. stream = g_mime_stream_mem_new_with_byte_array (tmp);
  857. /*
  858. * This causes g_mime_stream not to free memory by itself as it is memory allocated by
  859. * pool allocator
  860. */
  861. g_mime_stream_mem_set_owner (GMIME_STREAM_MEM (stream), FALSE);
  862. if (task->is_mime) {
  863. debug_task ("construct mime parser from string length %d", (gint)task->msg->len);
  864. /* create a new parser object to parse the stream */
  865. parser = g_mime_parser_new_with_stream (stream);
  866. g_object_unref (stream);
  867. /* parse the message from the stream */
  868. message = g_mime_parser_construct_message (parser);
  869. if (message == NULL) {
  870. msg_warn ("cannot construct mime from stream");
  871. return -1;
  872. }
  873. task->message = message;
  874. memory_pool_add_destructor (task->task_pool, (pool_destruct_func) destroy_message, task->message);
  875. task->parser_recursion = 0;
  876. #ifdef GMIME24
  877. g_mime_message_foreach (message, mime_foreach_callback, task);
  878. #else
  879. /*
  880. * This is rather strange, but gmime 2.2 do NOT pass top-level part to foreach callback
  881. * so we need to set up parent part by hands
  882. */
  883. task->parser_parent_part = g_mime_message_get_mime_part (message);
  884. g_object_unref (task->parser_parent_part);
  885. g_mime_message_foreach_part (message, mime_foreach_callback, task);
  886. #endif
  887. debug_task ("found %d parts in message", task->parts_count);
  888. if (task->queue_id == NULL) {
  889. task->queue_id = "undef";
  890. }
  891. task->message_id = g_mime_message_get_message_id (task->message);
  892. if (task->message_id == NULL) {
  893. task->message_id = "undef";
  894. }
  895. #ifdef GMIME24
  896. task->raw_headers = g_mime_object_get_headers (GMIME_OBJECT (task->message));
  897. #else
  898. task->raw_headers = g_mime_message_get_headers (task->message);
  899. #endif
  900. #ifdef RSPAMD_MAIN
  901. process_images (task);
  902. #endif
  903. /* Parse received headers */
  904. first = message_get_header (task->task_pool, message, "Received", FALSE);
  905. cur = first;
  906. while (cur) {
  907. recv = memory_pool_alloc0 (task->task_pool, sizeof (struct received_header));
  908. parse_recv_header (task->task_pool, cur->data, recv);
  909. task->received = g_list_prepend (task->received, recv);
  910. cur = g_list_next (cur);
  911. }
  912. if (first) {
  913. g_list_free (first);
  914. }
  915. if (task->raw_headers) {
  916. memory_pool_add_destructor (task->task_pool, (pool_destruct_func) g_free, task->raw_headers);
  917. memory_pool_add_destructor (task->task_pool, (pool_destruct_func) g_list_free, task->raw_headers_list);
  918. process_raw_headers (task);
  919. }
  920. task->rcpts = g_mime_message_get_all_recipients (message);
  921. if (task->rcpts) {
  922. #ifdef GMIME24
  923. memory_pool_add_destructor (task->task_pool, (pool_destruct_func) g_object_unref, task->rcpts);
  924. #else
  925. memory_pool_add_destructor (task->task_pool, (pool_destruct_func) internet_address_list_destroy, task->rcpts);
  926. #endif
  927. }
  928. /* free the parser (and the stream) */
  929. g_object_unref (parser);
  930. }
  931. else {
  932. /* We got only message, no mime headers or anything like this */
  933. /* Construct fake message for it */
  934. task->message = g_mime_message_new (TRUE);
  935. if (task->from) {
  936. g_mime_message_set_sender (task->message, task->from);
  937. }
  938. /* Construct part for it */
  939. part = g_mime_part_new_with_type ("text", "html");
  940. #ifdef GMIME24
  941. wrapper = g_mime_data_wrapper_new_with_stream (stream, GMIME_CONTENT_ENCODING_8BIT);
  942. #else
  943. wrapper = g_mime_data_wrapper_new_with_stream (stream, GMIME_PART_ENCODING_8BIT);
  944. #endif
  945. g_mime_part_set_content_object (part, wrapper);
  946. g_mime_message_set_mime_part (task->message, GMIME_OBJECT (part));
  947. /* Register destructors */
  948. memory_pool_add_destructor (task->task_pool, (pool_destruct_func) g_object_unref, wrapper);
  949. memory_pool_add_destructor (task->task_pool, (pool_destruct_func) g_object_unref, part);
  950. memory_pool_add_destructor (task->task_pool, (pool_destruct_func) destroy_message, task->message);
  951. /* Now parse in a normal way */
  952. task->parser_recursion = 0;
  953. #ifdef GMIME24
  954. g_mime_message_foreach (task->message, mime_foreach_callback, task);
  955. #else
  956. g_mime_message_foreach_part (task->message, mime_foreach_callback, task);
  957. #endif
  958. /* Generate message ID */
  959. mid = g_mime_utils_generate_message_id ("localhost.localdomain");
  960. memory_pool_add_destructor (task->task_pool, (pool_destruct_func) g_free, mid);
  961. g_mime_message_set_message_id (task->message, mid);
  962. task->message_id = mid;
  963. task->queue_id = mid;
  964. /* Set headers for message */
  965. if (task->subject) {
  966. g_mime_message_set_subject (task->message, task->subject);
  967. }
  968. /* Add recipients */
  969. #ifndef GMIME24
  970. if (task->rcpt) {
  971. cur = task->rcpt;
  972. while (cur) {
  973. g_mime_message_add_recipient (task->message, GMIME_RECIPIENT_TYPE_TO, NULL, (gchar *)cur->data);
  974. cur = g_list_next (cur);
  975. }
  976. }
  977. #endif
  978. }
  979. return 0;
  980. }
  981. struct gmime_raw_header {
  982. struct raw_header *next;
  983. gchar *name;
  984. gchar *value;
  985. };
  986. typedef struct _GMimeHeader {
  987. GHashTable *hash;
  988. GHashTable *writers;
  989. struct raw_header *headers;
  990. } local_GMimeHeader;
  991. /* known header field types */
  992. enum {
  993. HEADER_FROM = 0,
  994. HEADER_REPLY_TO,
  995. HEADER_TO,
  996. HEADER_CC,
  997. HEADER_BCC,
  998. HEADER_SUBJECT,
  999. HEADER_DATE,
  1000. HEADER_MESSAGE_ID,
  1001. HEADER_UNKNOWN
  1002. };
  1003. #ifndef GMIME24
  1004. static void
  1005. header_iterate (memory_pool_t * pool, struct gmime_raw_header *h, GList ** ret, const gchar *field, gboolean strong)
  1006. {
  1007. while (h) {
  1008. if (G_LIKELY (!strong)) {
  1009. if (h->value && !g_strncasecmp (field, h->name, strlen (field))) {
  1010. if (pool != NULL) {
  1011. *ret = g_list_prepend (*ret, memory_pool_strdup (pool, h->value));
  1012. }
  1013. else {
  1014. *ret = g_list_prepend (*ret, g_strdup (h->value));
  1015. }
  1016. }
  1017. }
  1018. else {
  1019. if (h->value && !strncmp (field, h->name, strlen (field))) {
  1020. if (pool != NULL) {
  1021. *ret = g_list_prepend (*ret, memory_pool_strdup (pool, h->value));
  1022. }
  1023. else {
  1024. *ret = g_list_prepend (*ret, g_strdup (h->value));
  1025. }
  1026. }
  1027. }
  1028. h = h->next;
  1029. }
  1030. }
  1031. #else
  1032. static void
  1033. header_iterate (memory_pool_t * pool, GMimeHeaderList * ls, GList ** ret, const gchar *field, gboolean strong)
  1034. {
  1035. GMimeHeaderIter *iter;
  1036. const gchar *name;
  1037. if (ls == NULL) {
  1038. *ret = NULL;
  1039. return;
  1040. }
  1041. iter = g_mime_header_iter_new ();
  1042. if (g_mime_header_list_get_iter (ls, iter) && g_mime_header_iter_first (iter)) {
  1043. while (g_mime_header_iter_is_valid (iter)) {
  1044. name = g_mime_header_iter_get_name (iter);
  1045. if (G_LIKELY (!strong)) {
  1046. if (!g_strncasecmp (field, name, strlen (name))) {
  1047. if (pool != NULL) {
  1048. *ret = g_list_prepend (*ret, memory_pool_strdup (pool, g_mime_header_iter_get_value (iter)));
  1049. }
  1050. else {
  1051. *ret = g_list_prepend (*ret, g_strdup (g_mime_header_iter_get_value (iter)));
  1052. }
  1053. }
  1054. }
  1055. else {
  1056. if (!strncmp (field, name, strlen (name))) {
  1057. if (pool != NULL) {
  1058. *ret = g_list_prepend (*ret, memory_pool_strdup (pool, g_mime_header_iter_get_value (iter)));
  1059. }
  1060. else {
  1061. *ret = g_list_prepend (*ret, g_strdup (g_mime_header_iter_get_value (iter)));
  1062. }
  1063. }
  1064. }
  1065. if (!g_mime_header_iter_next (iter)) {
  1066. break;
  1067. }
  1068. }
  1069. }
  1070. g_mime_header_iter_free (iter);
  1071. }
  1072. #endif
  1073. struct multipart_cb_data {
  1074. GList *ret;
  1075. memory_pool_t *pool;
  1076. const gchar *field;
  1077. gboolean try_search;
  1078. gboolean strong;
  1079. gint rec;
  1080. };
  1081. #define MAX_REC 10
  1082. static void
  1083. #ifdef GMIME24
  1084. multipart_iterate (GMimeObject * parent, GMimeObject * part, gpointer user_data)
  1085. #else
  1086. multipart_iterate (GMimeObject * part, gpointer user_data)
  1087. #endif
  1088. {
  1089. struct multipart_cb_data *data = user_data;
  1090. #ifndef GMIME24
  1091. struct gmime_raw_header *h;
  1092. #endif
  1093. GList *l = NULL;
  1094. if (data->try_search && part != NULL && GMIME_IS_PART (part)) {
  1095. #ifdef GMIME24
  1096. GMimeHeaderList *ls;
  1097. ls = g_mime_object_get_header_list (GMIME_OBJECT (part));
  1098. header_iterate (data->pool, ls, &l, data->field, data->strong);
  1099. #else
  1100. h = part->headers->headers;
  1101. header_iterate (data->pool, h, &l, data->field, data->strong);
  1102. #endif
  1103. if (l == NULL) {
  1104. /* Header not found, abandon search results */
  1105. data->try_search = FALSE;
  1106. g_list_free (data->ret);
  1107. data->ret = NULL;
  1108. }
  1109. else {
  1110. data->ret = g_list_concat (l, data->ret);
  1111. }
  1112. }
  1113. else if (data->try_search && GMIME_IS_MULTIPART (part)) {
  1114. /* Maybe endless recursion here ? */
  1115. if (data->rec++ < MAX_REC) {
  1116. g_mime_multipart_foreach (GMIME_MULTIPART (part), multipart_iterate, data);
  1117. }
  1118. else {
  1119. msg_info ("maximum recurse limit is over, stop recursing, %d", data->rec);
  1120. data->try_search = FALSE;
  1121. }
  1122. }
  1123. }
  1124. static GList *
  1125. local_message_get_header (memory_pool_t * pool, GMimeMessage * message, const gchar *field, gboolean strong)
  1126. {
  1127. GList *gret = NULL;
  1128. GMimeObject *part;
  1129. struct multipart_cb_data cb = {
  1130. .try_search = TRUE,
  1131. .rec = 0,
  1132. .ret = NULL,
  1133. };
  1134. cb.pool = pool;
  1135. cb.field = field;
  1136. cb.strong = strong;
  1137. #ifndef GMIME24
  1138. struct raw_header *h;
  1139. if (field == NULL) {
  1140. return NULL;
  1141. }
  1142. msg_debug ("iterate over headers to find header %s", field);
  1143. h = GMIME_OBJECT (message)->headers->headers;
  1144. header_iterate (pool, h, &gret, field, strong);
  1145. if (gret == NULL) {
  1146. /* Try to iterate with mime part headers */
  1147. msg_debug ("iterate over headers of mime part to find header %s", field);
  1148. part = g_mime_message_get_mime_part (message);
  1149. if (part) {
  1150. h = part->headers->headers;
  1151. header_iterate (pool, h, &gret, field, strong);
  1152. if (gret == NULL && GMIME_IS_MULTIPART (part)) {
  1153. msg_debug ("iterate over headers of each multipart's subparts %s", field);
  1154. g_mime_multipart_foreach (GMIME_MULTIPART (part), multipart_iterate, &cb);
  1155. if (cb.ret != NULL) {
  1156. gret = cb.ret;
  1157. }
  1158. }
  1159. #ifndef GMIME24
  1160. g_object_unref (part);
  1161. #endif
  1162. }
  1163. }
  1164. return gret;
  1165. #else
  1166. GMimeHeaderList *ls;
  1167. ls = g_mime_object_get_header_list (GMIME_OBJECT (message));
  1168. header_iterate (pool, ls, &gret, field, strong);
  1169. if (gret == NULL) {
  1170. /* Try to iterate with mime part headers */
  1171. part = g_mime_message_get_mime_part (message);
  1172. if (part) {
  1173. ls = g_mime_object_get_header_list (GMIME_OBJECT (part));
  1174. header_iterate (pool, ls, &gret, field, strong);
  1175. if (gret == NULL && GMIME_IS_MULTIPART (part)) {
  1176. g_mime_multipart_foreach (GMIME_MULTIPART (part), multipart_iterate, &cb);
  1177. if (cb.ret != NULL) {
  1178. gret = cb.ret;
  1179. }
  1180. }
  1181. #ifndef GMIME24
  1182. g_object_unref (part);
  1183. #endif
  1184. }
  1185. }
  1186. return gret;
  1187. #endif
  1188. }
  1189. /**
  1190. * g_mime_message_set_date_from_string: Set the message sent-date
  1191. * @message: MIME Message
  1192. * @string: A string of date
  1193. *
  1194. * Set the sent-date on a MIME Message.
  1195. **/
  1196. void
  1197. local_mime_message_set_date_from_string (GMimeMessage * message, const gchar * string)
  1198. {
  1199. time_t date;
  1200. gint offset = 0;
  1201. date = g_mime_utils_header_decode_date (string, &offset);
  1202. g_mime_message_set_date (message, date, offset);
  1203. }
  1204. /*
  1205. * Replacements for standart gmime functions but converting adresses to IA
  1206. */
  1207. static const gchar *
  1208. local_message_get_sender (GMimeMessage * message)
  1209. {
  1210. gchar *res;
  1211. const gchar *from = g_mime_message_get_sender (message);
  1212. InternetAddressList *ia;
  1213. #ifndef GMIME24
  1214. ia = internet_address_parse_string (from);
  1215. #else
  1216. ia = internet_address_list_parse_string (from);
  1217. #endif
  1218. if (!ia) {
  1219. return NULL;
  1220. }
  1221. res = internet_address_list_to_string (ia, FALSE);
  1222. #ifndef GMIME24
  1223. internet_address_list_destroy (ia);
  1224. #else
  1225. g_object_unref (ia);
  1226. #endif
  1227. return res;
  1228. }
  1229. static const gchar *
  1230. local_message_get_reply_to (GMimeMessage * message)
  1231. {
  1232. gchar *res;
  1233. const gchar *from = g_mime_message_get_reply_to (message);
  1234. InternetAddressList *ia;
  1235. #ifndef GMIME24
  1236. ia = internet_address_parse_string (from);
  1237. #else
  1238. ia = internet_address_list_parse_string (from);
  1239. #endif
  1240. if (!ia) {
  1241. return NULL;
  1242. }
  1243. res = internet_address_list_to_string (ia, FALSE);
  1244. #ifndef GMIME24
  1245. internet_address_list_destroy (ia);
  1246. #else
  1247. g_object_unref (ia);
  1248. #endif
  1249. return res;
  1250. }
  1251. #ifdef GMIME24
  1252. # define ADD_RECIPIENT_TEMPLATE(type,def) \
  1253. static void \
  1254. local_message_add_recipients_from_string_##type (GMimeMessage *message, const gchar *string, const gchar *value) \
  1255. { \
  1256. InternetAddressList *il, *new; \
  1257. \
  1258. il = g_mime_message_get_recipients (message, (def)); \
  1259. new = internet_address_list_parse_string (string); \
  1260. internet_address_list_append (il, new); \
  1261. } \
  1262. ADD_RECIPIENT_TEMPLATE (to, GMIME_RECIPIENT_TYPE_TO)
  1263. ADD_RECIPIENT_TEMPLATE (cc, GMIME_RECIPIENT_TYPE_CC)
  1264. ADD_RECIPIENT_TEMPLATE (bcc, GMIME_RECIPIENT_TYPE_BCC)
  1265. # define GET_RECIPIENT_TEMPLATE(type,def) \
  1266. static InternetAddressList* \
  1267. local_message_get_recipients_##type (GMimeMessage *message, const gchar *unused) \
  1268. { \
  1269. return g_mime_message_get_recipients (message, (def)); \
  1270. }
  1271. GET_RECIPIENT_TEMPLATE (to, GMIME_RECIPIENT_TYPE_TO)
  1272. GET_RECIPIENT_TEMPLATE (cc, GMIME_RECIPIENT_TYPE_CC)
  1273. GET_RECIPIENT_TEMPLATE (bcc, GMIME_RECIPIENT_TYPE_BCC)
  1274. #endif
  1275. /* different declarations for different types of set and get functions */
  1276. typedef const gchar *(*GetFunc) (GMimeMessage * message);
  1277. typedef InternetAddressList *(*GetRcptFunc) (GMimeMessage * message, const gchar *type);
  1278. typedef GList *(*GetListFunc) (memory_pool_t * pool, GMimeMessage * message, const gchar *type, gboolean strong);
  1279. typedef void (*SetFunc) (GMimeMessage * message, const gchar *value);
  1280. typedef void (*SetListFunc) (GMimeMessage * message, const gchar *field, const gchar *value);
  1281. /** different types of functions
  1282. *
  1283. * FUNC_CHARPTR
  1284. * - function with no arguments
  1285. * - get returns gchar*
  1286. *
  1287. * FUNC_IA (from Internet Address)
  1288. * - function with additional "field" argument from the fieldfunc table,
  1289. * - get returns Glist*
  1290. *
  1291. * FUNC_LIST
  1292. * - function with additional "field" argument (given arbitrary header field name)
  1293. * - get returns Glist*
  1294. **/
  1295. enum {
  1296. FUNC_CHARPTR = 0,
  1297. FUNC_CHARFREEPTR,
  1298. FUNC_IA,
  1299. FUNC_LIST
  1300. };
  1301. /**
  1302. * fieldfunc struct: structure of MIME fields and corresponding get and set
  1303. * functions.
  1304. **/
  1305. static struct {
  1306. gchar *name;
  1307. GetFunc func;
  1308. GetRcptFunc rcptfunc;
  1309. GetListFunc getlistfunc;
  1310. SetFunc setfunc;
  1311. SetListFunc setlfunc;
  1312. gint functype;
  1313. } fieldfunc[] =
  1314. {
  1315. {
  1316. "From", local_message_get_sender, NULL, NULL, g_mime_message_set_sender, NULL, FUNC_CHARFREEPTR}, {
  1317. "Reply-To", local_message_get_reply_to, NULL, NULL, g_mime_message_set_reply_to, NULL, FUNC_CHARFREEPTR},
  1318. #ifndef GMIME24
  1319. {
  1320. "To", NULL, (GetRcptFunc) g_mime_message_get_recipients, NULL, NULL, (SetListFunc) g_mime_message_add_recipients_from_string, FUNC_IA}, {
  1321. "Cc", NULL, (GetRcptFunc) g_mime_message_get_recipients, NULL, NULL, (SetListFunc) g_mime_message_add_recipients_from_string, FUNC_IA}, {
  1322. "Bcc", NULL, (GetRcptFunc) g_mime_message_get_recipients, NULL, NULL, (SetListFunc) g_mime_message_add_recipients_from_string, FUNC_IA}, {
  1323. "Date", (GetFunc) g_mime_message_get_date_string, NULL, NULL, local_mime_message_set_date_from_string, NULL, FUNC_CHARFREEPTR},
  1324. #else
  1325. {
  1326. "To", NULL, local_message_get_recipients_to, NULL, NULL, local_message_add_recipients_from_string_to, FUNC_IA}, {
  1327. "Cc", NULL, local_message_get_recipients_cc, NULL, NULL, local_message_add_recipients_from_string_cc, FUNC_IA}, {
  1328. "Bcc", NULL, local_message_get_recipients_bcc, NULL, NULL, local_message_add_recipients_from_string_bcc, FUNC_IA}, {
  1329. "Date", (GetFunc)g_mime_message_get_date_as_string, NULL, NULL, local_mime_message_set_date_from_string, NULL, FUNC_CHARFREEPTR},
  1330. #endif
  1331. {
  1332. "Subject", g_mime_message_get_subject, NULL, NULL, g_mime_message_set_subject, NULL, FUNC_CHARPTR}, {
  1333. "Message-Id", g_mime_message_get_message_id, NULL, NULL, g_mime_message_set_message_id, NULL, FUNC_CHARPTR},
  1334. #ifndef GMIME24
  1335. {
  1336. NULL, NULL, NULL, local_message_get_header, NULL, g_mime_message_add_header, FUNC_LIST}
  1337. #else
  1338. {
  1339. NULL, NULL, NULL, local_message_get_header, NULL, (SetListFunc)g_mime_object_append_header, FUNC_LIST}
  1340. #endif
  1341. };
  1342. /**
  1343. * message_set_header: set header of any type excluding special (Content- and MIME-Version:)
  1344. **/
  1345. void
  1346. message_set_header (GMimeMessage * message, const gchar *field, const gchar *value)
  1347. {
  1348. gint i;
  1349. if (!g_strcasecmp (field, "MIME-Version:") || !g_strncasecmp (field, "Content-", 8)) {
  1350. return;
  1351. }
  1352. for (i = 0; i <= HEADER_UNKNOWN; ++i) {
  1353. if (!fieldfunc[i].name || !g_strncasecmp (field, fieldfunc[i].name, strlen (fieldfunc[i].name))) {
  1354. switch (fieldfunc[i].functype) {
  1355. case FUNC_CHARPTR:
  1356. (*(fieldfunc[i].setfunc)) (message, value);
  1357. break;
  1358. case FUNC_IA:
  1359. (*(fieldfunc[i].setlfunc)) (message, fieldfunc[i].name, value);
  1360. break;
  1361. case FUNC_LIST:
  1362. (*(fieldfunc[i].setlfunc)) (message, field, value);
  1363. break;
  1364. }
  1365. break;
  1366. }
  1367. }
  1368. }
  1369. /**
  1370. * message_get_header: returns the list of 'any header' values
  1371. * (except of unsupported yet Content- and MIME-Version special headers)
  1372. *
  1373. * You should free the GList list by yourself.
  1374. **/
  1375. GList *
  1376. message_get_header (memory_pool_t * pool, GMimeMessage * message, const gchar *field, gboolean strong)
  1377. {
  1378. gint i;
  1379. gchar *ret = NULL, *ia_string;
  1380. GList *gret = NULL;
  1381. InternetAddressList *ia_list = NULL, *ia;
  1382. for (i = 0; i <= HEADER_UNKNOWN; ++i) {
  1383. if (!fieldfunc[i].name || !g_strncasecmp (field, fieldfunc[i].name, strlen (fieldfunc[i].name))) {
  1384. switch (fieldfunc[i].functype) {
  1385. case FUNC_CHARFREEPTR:
  1386. ret = (gchar *)(*(fieldfunc[i].func)) (message);
  1387. break;
  1388. case FUNC_CHARPTR:
  1389. ret = (gchar *)(*(fieldfunc[i].func)) (message);
  1390. break;
  1391. case FUNC_IA:
  1392. ia_list = (*(fieldfunc[i].rcptfunc)) (message, field);
  1393. ia = ia_list;
  1394. #ifndef GMIME24
  1395. while (ia && ia->address) {
  1396. ia_string = internet_address_to_string ((InternetAddress *) ia->address, FALSE);
  1397. if (pool != NULL) {
  1398. memory_pool_add_destructor (pool, (pool_destruct_func) g_free, ia_string);
  1399. }
  1400. gret = g_list_prepend (gret, ia_string);
  1401. ia = ia->next;
  1402. }
  1403. #else
  1404. i = internet_address_list_length (ia);
  1405. while (--i >= 0) {
  1406. ia_string = internet_address_to_string (internet_address_list_get_address (ia, i), FALSE);
  1407. if (pool != NULL) {
  1408. memory_pool_add_destructor (pool, (pool_destruct_func) g_free, ia_string);
  1409. }
  1410. gret = g_list_prepend (gret, ia_string);
  1411. }
  1412. #endif
  1413. break;
  1414. case FUNC_LIST:
  1415. gret = (*(fieldfunc[i].getlistfunc)) (pool, message, field, strong);
  1416. break;
  1417. }
  1418. break;
  1419. }
  1420. }
  1421. if (gret == NULL && ret != NULL) {
  1422. if (pool != NULL) {
  1423. gret = g_list_prepend (gret, memory_pool_strdup (pool, ret));
  1424. }
  1425. else {
  1426. gret = g_list_prepend (gret, g_strdup (ret));
  1427. }
  1428. }
  1429. if (fieldfunc[i].functype == FUNC_CHARFREEPTR && ret) {
  1430. g_free (ret);
  1431. }
  1432. return gret;
  1433. }
  1434. GList*
  1435. message_get_raw_header (struct worker_task *task, const gchar *field, gboolean strong)
  1436. {
  1437. GList *cur, *gret = NULL;
  1438. struct raw_header *rh;
  1439. cur = task->raw_headers_list;
  1440. while (cur) {
  1441. rh = cur->data;
  1442. if (strong) {
  1443. if (strcmp (rh->name, field) == 0) {
  1444. gret = g_list_prepend (gret, rh);
  1445. }
  1446. }
  1447. else {
  1448. if (g_ascii_strcasecmp (rh->name, field) == 0) {
  1449. gret = g_list_prepend (gret, rh);
  1450. }
  1451. }
  1452. cur = g_list_next (cur);
  1453. }
  1454. if (gret != NULL) {
  1455. memory_pool_add_destructor (task->task_pool, (pool_destruct_func)g_list_free, gret);
  1456. }
  1457. return gret;
  1458. }