Ви не можете вибрати більше 25 тем Теми мають розпочинатися з літери або цифри, можуть містити дефіси (-) і не повинні перевищувати 35 символів.

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501
  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "config.h"
  17. #include "email_addr.h"
  18. #include "message.h"
  19. #include "printf.h"
  20. #include "smtp_parsers.h"
  21. static void
  22. rspamd_email_address_unescape (struct rspamd_email_address *addr)
  23. {
  24. const char *h, *end;
  25. char *t, *d;
  26. if (addr->user_len == 0) {
  27. return;
  28. }
  29. d = g_malloc (addr->user_len);
  30. t = d;
  31. h = addr->user;
  32. end = h + addr->user_len;
  33. while (h < end) {
  34. if (*h != '\\') {
  35. *t++ = *h;
  36. }
  37. h ++;
  38. }
  39. addr->user = d;
  40. addr->user_len = t - d;
  41. addr->flags |= RSPAMD_EMAIL_ADDR_USER_ALLOCATED;
  42. }
  43. struct rspamd_email_address *
  44. rspamd_email_address_from_smtp (const gchar *str, guint len)
  45. {
  46. struct rspamd_email_address addr, *ret;
  47. gsize nlen;
  48. if (str == NULL || len == 0) {
  49. return NULL;
  50. }
  51. rspamd_smtp_addr_parse (str, len, &addr);
  52. if (addr.flags & RSPAMD_EMAIL_ADDR_VALID) {
  53. ret = g_malloc (sizeof (*ret));
  54. memcpy (ret, &addr, sizeof (addr));
  55. if ((ret->flags & RSPAMD_EMAIL_ADDR_QUOTED) && ret->addr[0] == '"') {
  56. if (ret->flags & RSPAMD_EMAIL_ADDR_HAS_BACKSLASH) {
  57. /* We also need to unquote user */
  58. rspamd_email_address_unescape (ret);
  59. }
  60. /* We need to unquote addr */
  61. nlen = ret->domain_len + ret->user_len + 2;
  62. ret->addr = g_malloc (nlen + 1);
  63. ret->addr_len = rspamd_snprintf ((char *)ret->addr, nlen, "%*s@%*s",
  64. (gint)ret->user_len, ret->user,
  65. (gint)ret->domain_len, ret->domain);
  66. ret->flags |= RSPAMD_EMAIL_ADDR_ADDR_ALLOCATED;
  67. }
  68. return ret;
  69. }
  70. return NULL;
  71. }
  72. void
  73. rspamd_email_address_free (struct rspamd_email_address *addr)
  74. {
  75. if (addr) {
  76. if (addr->flags & RSPAMD_EMAIL_ADDR_ADDR_ALLOCATED) {
  77. g_free ((void *) addr->addr);
  78. }
  79. if (addr->flags & RSPAMD_EMAIL_ADDR_USER_ALLOCATED) {
  80. g_free ((void *) addr->user);
  81. }
  82. g_free (addr);
  83. }
  84. }
  85. static inline void
  86. rspamd_email_address_add (rspamd_mempool_t *pool,
  87. GPtrArray *ar,
  88. struct rspamd_email_address *addr,
  89. GString *name)
  90. {
  91. struct rspamd_email_address *elt;
  92. guint nlen;
  93. elt = g_malloc0 (sizeof (*elt));
  94. if (addr != NULL) {
  95. memcpy (elt, addr, sizeof (*addr));
  96. }
  97. else {
  98. elt->addr = "";
  99. elt->domain = "";
  100. elt->raw = "<>";
  101. elt->raw_len = 2;
  102. elt->user = "";
  103. elt->flags |= RSPAMD_EMAIL_ADDR_EMPTY;
  104. }
  105. if ((elt->flags & RSPAMD_EMAIL_ADDR_QUOTED) && elt->addr[0] == '"') {
  106. if (elt->flags & RSPAMD_EMAIL_ADDR_HAS_BACKSLASH) {
  107. /* We also need to unquote user */
  108. rspamd_email_address_unescape (elt);
  109. }
  110. /* We need to unquote addr */
  111. nlen = elt->domain_len + elt->user_len + 2;
  112. elt->addr = g_malloc (nlen + 1);
  113. elt->addr_len = rspamd_snprintf ((char *)elt->addr, nlen, "%*s@%*s",
  114. (gint)elt->user_len, elt->user,
  115. (gint)elt->domain_len, elt->domain);
  116. elt->flags |= RSPAMD_EMAIL_ADDR_ADDR_ALLOCATED;
  117. }
  118. if (name->len > 0) {
  119. rspamd_gstring_strip (name, " \t\v");
  120. elt->name = rspamd_mime_header_decode (pool, name->str, name->len, NULL);
  121. }
  122. g_ptr_array_add (ar, elt);
  123. }
  124. /*
  125. * Tries to parse an email address that doesn't conform RFC
  126. */
  127. static gboolean
  128. rspamd_email_address_parse_heuristic (const char *data, size_t len,
  129. struct rspamd_email_address *addr)
  130. {
  131. const gchar *p = data, *at = NULL, *end = data + len;
  132. gboolean ret = FALSE;
  133. memset (addr, 0, sizeof (*addr));
  134. if (*p == '<' && len > 1) {
  135. /* Angled address */
  136. addr->addr_len = rspamd_memcspn (p + 1, ">", len - 1);
  137. addr->addr = p + 1;
  138. addr->raw = p;
  139. addr->raw_len = len;
  140. ret = TRUE;
  141. p = p + 1;
  142. len = addr->addr_len;
  143. end = p + len;
  144. }
  145. else if (len > 0) {
  146. addr->addr = p;
  147. addr->addr_len = len;
  148. addr->raw = p;
  149. addr->raw_len = len;
  150. ret = TRUE;
  151. }
  152. if (ret) {
  153. at = memchr (p, '@', len);
  154. if (at != NULL && at + 1 < end) {
  155. addr->domain = at + 1;
  156. addr->domain_len = end - (at + 1);
  157. addr->user = p;
  158. addr->user_len = at - p;
  159. }
  160. if (rspamd_str_has_8bit (p, len)) {
  161. addr->flags |= RSPAMD_EMAIL_ADDR_HAS_8BIT;
  162. }
  163. }
  164. return ret;
  165. }
  166. static inline gboolean
  167. rspamd_email_address_check_and_add (const gchar *start, gsize len,
  168. GPtrArray *res,
  169. rspamd_mempool_t *pool,
  170. GString *ns)
  171. {
  172. struct rspamd_email_address addr;
  173. /* The whole email is likely address */
  174. memset (&addr, 0, sizeof (addr));
  175. rspamd_smtp_addr_parse (start, len, &addr);
  176. if (addr.flags & RSPAMD_EMAIL_ADDR_VALID) {
  177. rspamd_email_address_add (pool, res, &addr, ns);
  178. }
  179. else {
  180. /* Try heuristic */
  181. if (rspamd_email_address_parse_heuristic (start,
  182. len, &addr)) {
  183. rspamd_email_address_add (pool, res, &addr, ns);
  184. return TRUE;
  185. }
  186. else {
  187. return FALSE;
  188. }
  189. }
  190. return TRUE;
  191. }
  192. GPtrArray *
  193. rspamd_email_address_from_mime (rspamd_mempool_t *pool,
  194. const gchar *hdr, guint len,
  195. GPtrArray *src)
  196. {
  197. GPtrArray *res = src;
  198. gboolean seen_at = FALSE;
  199. const gchar *p = hdr, *end = hdr + len, *c = hdr, *t;
  200. GString *ns, *cpy;
  201. gint obraces, ebraces;
  202. enum {
  203. parse_name = 0,
  204. parse_quoted,
  205. parse_addr,
  206. skip_spaces
  207. } state = parse_name, next_state = parse_name;
  208. if (res == NULL) {
  209. res = g_ptr_array_sized_new (2);
  210. rspamd_mempool_add_destructor (pool, rspamd_email_address_list_destroy,
  211. res);
  212. }
  213. ns = g_string_sized_new (len);
  214. cpy = g_string_sized_new (len);
  215. rspamd_mempool_add_destructor (pool, rspamd_gstring_free_hard, cpy);
  216. /* First, we need to remove all comments as they are terrible */
  217. obraces = 0;
  218. ebraces = 0;
  219. while (p < end) {
  220. if (state == parse_name) {
  221. if (*p == '\\') {
  222. if (obraces == 0) {
  223. g_string_append_c (cpy, *p);
  224. }
  225. p++;
  226. }
  227. else {
  228. if (*p == '"') {
  229. state = parse_quoted;
  230. }
  231. else if (*p == '(') {
  232. obraces ++;
  233. }
  234. else if (*p == ')') {
  235. ebraces ++;
  236. }
  237. if (obraces == ebraces) {
  238. obraces = 0;
  239. ebraces = 0;
  240. }
  241. }
  242. if (p < end && obraces == 0) {
  243. g_string_append_c (cpy, *p);
  244. }
  245. }
  246. else {
  247. /* Quoted elt */
  248. if (*p == '\\') {
  249. g_string_append_c (cpy, *p);
  250. p++;
  251. }
  252. else {
  253. if (*p == '"') {
  254. state = parse_name;
  255. }
  256. }
  257. if (p < end) {
  258. g_string_append_c (cpy, *p);
  259. }
  260. }
  261. p++;
  262. }
  263. state = parse_name;
  264. p = cpy->str;
  265. c = p;
  266. end = p + cpy->len;
  267. while (p < end) {
  268. switch (state) {
  269. case parse_name:
  270. if (*p == '"') {
  271. /* We need to strip last spaces and update `ns` */
  272. if (p > c) {
  273. guint nspaces = 0;
  274. t = p - 1;
  275. while (t > c && g_ascii_isspace (*t)) {
  276. t --;
  277. nspaces ++;
  278. }
  279. g_string_append_len (ns, c, t - c + 1);
  280. if (nspaces > 0) {
  281. g_string_append_c (ns, ' ');
  282. }
  283. }
  284. state = parse_quoted;
  285. c = p + 1;
  286. }
  287. else if (*p == '<') {
  288. if (p > c) {
  289. t = p - 1;
  290. while (t > c && g_ascii_isspace (*t)) {
  291. t --;
  292. }
  293. g_string_append_len (ns, c, t - c + 1);
  294. }
  295. c = p;
  296. state = parse_addr;
  297. }
  298. else if (*p == ',') {
  299. if (p > c && seen_at) {
  300. /*
  301. * Last token must be the address:
  302. * e.g. Some name name@domain.com
  303. */
  304. t = p - 1;
  305. while (t > c && g_ascii_isspace (*t)) {
  306. t --;
  307. }
  308. if (!rspamd_email_address_check_and_add (c, t - c + 1,
  309. res, pool, ns)) {
  310. rspamd_email_address_add (pool, res, NULL, ns);
  311. }
  312. /* Cleanup for the next use */
  313. g_string_set_size (ns, 0);
  314. seen_at = FALSE;
  315. }
  316. state = skip_spaces;
  317. next_state = parse_name;
  318. }
  319. else if (*p == '@') {
  320. seen_at = TRUE;
  321. }
  322. p ++;
  323. break;
  324. case parse_quoted:
  325. if (*p == '"') {
  326. if (p > c) {
  327. g_string_append_len (ns, c, p - c);
  328. }
  329. if (p + 1 < end && g_ascii_isspace (p[1])) {
  330. g_string_append_c (ns, ' ');
  331. }
  332. state = skip_spaces;
  333. next_state = parse_name;
  334. }
  335. p ++;
  336. break;
  337. case parse_addr:
  338. if (*p == '>') {
  339. if (!rspamd_email_address_check_and_add (c, p - c + 1,
  340. res, pool, ns)) {
  341. rspamd_email_address_add (pool, res, NULL, ns);
  342. }
  343. /* Cleanup for the next use */
  344. g_string_set_size (ns, 0);
  345. seen_at = FALSE;
  346. state = skip_spaces;
  347. next_state = parse_name;
  348. }
  349. else if (*p == '@') {
  350. seen_at = TRUE;
  351. }
  352. p ++;
  353. break;
  354. case skip_spaces:
  355. if (!g_ascii_isspace (*p)) {
  356. c = p;
  357. state = next_state;
  358. }
  359. else {
  360. p ++;
  361. }
  362. break;
  363. }
  364. }
  365. /* Handle leftover */
  366. switch (state) {
  367. case parse_name:
  368. /* Assume the whole header as name (bad thing) */
  369. if (p > c) {
  370. while (p > c && g_ascii_isspace (*p)) {
  371. p --;
  372. }
  373. if (p > c) {
  374. if (seen_at) {
  375. /* The whole email is likely address */
  376. if (!rspamd_email_address_check_and_add (c, p - c,
  377. res, pool, ns)) {
  378. if (res->len == 0) {
  379. rspamd_email_address_add (pool, res, NULL, ns);
  380. }
  381. }
  382. } else {
  383. /* No @ seen */
  384. g_string_append_len (ns, c, p - c);
  385. if (res->len == 0) {
  386. rspamd_email_address_add (pool, res, NULL, ns);
  387. }
  388. }
  389. }
  390. else if (res->len == 0) {
  391. rspamd_email_address_add (pool, res, NULL, ns);
  392. }
  393. }
  394. break;
  395. case parse_addr:
  396. if (p > c) {
  397. if (!rspamd_email_address_check_and_add (c, p - c,
  398. res, pool, ns)) {
  399. if (res->len == 0) {
  400. rspamd_email_address_add (pool, res, NULL, ns);
  401. }
  402. }
  403. }
  404. break;
  405. case parse_quoted:
  406. /* Unfinished quoted string or a comment */
  407. break;
  408. default:
  409. /* Do nothing */
  410. break;
  411. }
  412. g_string_free (ns, TRUE);
  413. return res;
  414. }
  415. void
  416. rspamd_email_address_list_destroy (gpointer ptr)
  417. {
  418. GPtrArray *ar = ptr;
  419. guint i;
  420. struct rspamd_email_address *addr;
  421. PTR_ARRAY_FOREACH (ar, i, addr) {
  422. rspamd_email_address_free (addr);
  423. }
  424. g_ptr_array_free (ar, TRUE);
  425. }