You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

received.cxx 26KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030
  1. /*
  2. * Copyright 2024 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "config.h"
  17. #include "libserver/url.h"
  18. #include "lua/lua_common.h"
  19. #include "libserver/cfg_file.h"
  20. #include "libserver/mempool_vars_internal.h"
  21. #include "mime_string.hxx"
  22. #include "smtp_parsers.h"
  23. #include "message.h"
  24. #include "received.hxx"
  25. #include "frozen/string.h"
  26. #include "frozen/unordered_map.h"
  27. namespace rspamd::mime {
  28. enum class received_part_type {
  29. RSPAMD_RECEIVED_PART_FROM,
  30. RSPAMD_RECEIVED_PART_BY,
  31. RSPAMD_RECEIVED_PART_FOR,
  32. RSPAMD_RECEIVED_PART_WITH,
  33. RSPAMD_RECEIVED_PART_ID,
  34. RSPAMD_RECEIVED_PART_UNKNOWN,
  35. };
  36. struct received_part {
  37. received_part_type type;
  38. mime_string data;
  39. std::vector<mime_string> comments;
  40. explicit received_part(received_part_type t)
  41. : type(t),
  42. data(received_char_filter)
  43. {
  44. }
  45. };
  46. static inline auto
  47. received_part_set_or_append(const char *begin,
  48. gsize len,
  49. mime_string &dest) -> void
  50. {
  51. if (len == 0) {
  52. return;
  53. }
  54. dest.append(begin, len);
  55. dest.trim(" \t");
  56. }
  57. static auto
  58. received_process_part(const std::string_view &data,
  59. received_part_type type,
  60. std::ptrdiff_t &last,
  61. received_part &npart) -> bool
  62. {
  63. auto obraces = 0, ebraces = 0;
  64. auto seen_tcpinfo = false;
  65. enum _parse_state {
  66. skip_spaces,
  67. in_comment,
  68. read_data,
  69. read_tcpinfo,
  70. all_done
  71. } state,
  72. next_state;
  73. /* In this function, we just process comments and data separately */
  74. const auto *p = data.data();
  75. const auto *end = p + data.size();
  76. const auto *c = p;
  77. state = skip_spaces;
  78. next_state = read_data;
  79. while (p < end) {
  80. switch (state) {
  81. case skip_spaces:
  82. if (!g_ascii_isspace(*p)) {
  83. c = p;
  84. state = next_state;
  85. }
  86. else {
  87. p++;
  88. }
  89. break;
  90. case in_comment:
  91. if (*p == '(') {
  92. obraces++;
  93. }
  94. else if (*p == ')') {
  95. ebraces++;
  96. if (ebraces >= obraces) {
  97. if (type != received_part_type::RSPAMD_RECEIVED_PART_UNKNOWN) {
  98. if (p > c) {
  99. npart.comments.emplace_back(received_char_filter);
  100. auto &comment = npart.comments.back();
  101. received_part_set_or_append(c, p - c,
  102. comment);
  103. }
  104. }
  105. p++;
  106. c = p;
  107. state = skip_spaces;
  108. next_state = read_data;
  109. continue;
  110. }
  111. }
  112. p++;
  113. break;
  114. case read_data:
  115. if (*p == '(') {
  116. if (p > c) {
  117. if (type != received_part_type::RSPAMD_RECEIVED_PART_UNKNOWN) {
  118. received_part_set_or_append(c, p - c,
  119. npart.data);
  120. }
  121. }
  122. state = in_comment;
  123. obraces = 1;
  124. ebraces = 0;
  125. p++;
  126. c = p;
  127. }
  128. else if (g_ascii_isspace(*p)) {
  129. if (p > c) {
  130. if (type != received_part_type::RSPAMD_RECEIVED_PART_UNKNOWN) {
  131. received_part_set_or_append(c, p - c,
  132. npart.data);
  133. }
  134. }
  135. state = skip_spaces;
  136. next_state = read_data;
  137. c = p;
  138. }
  139. else if (*p == ';') {
  140. /* It is actually delimiter of date part if not in the comments */
  141. if (p > c) {
  142. if (type != received_part_type::RSPAMD_RECEIVED_PART_UNKNOWN) {
  143. received_part_set_or_append(c, p - c,
  144. npart.data);
  145. }
  146. }
  147. state = all_done;
  148. continue;
  149. }
  150. else if (npart.data.size() > 0) {
  151. /* We have already received data and find something with no ( */
  152. if (!seen_tcpinfo && type == received_part_type::RSPAMD_RECEIVED_PART_FROM) {
  153. /* Check if we have something special here, such as TCPinfo */
  154. if (*c == '[') {
  155. state = read_tcpinfo;
  156. p++;
  157. }
  158. else {
  159. state = all_done;
  160. continue;
  161. }
  162. }
  163. else {
  164. state = all_done;
  165. continue;
  166. }
  167. }
  168. else {
  169. p++;
  170. }
  171. break;
  172. case read_tcpinfo:
  173. if (*p == ']') {
  174. received_part_set_or_append(c, p - c + 1,
  175. npart.data);
  176. seen_tcpinfo = TRUE;
  177. state = skip_spaces;
  178. next_state = read_data;
  179. c = p;
  180. }
  181. p++;
  182. break;
  183. case all_done:
  184. if (p > data.data()) {
  185. last = p - data.data();
  186. return true;
  187. }
  188. else {
  189. /* Empty element */
  190. return false;
  191. }
  192. break;
  193. }
  194. }
  195. /* Leftover */
  196. switch (state) {
  197. case read_data:
  198. if (p > c) {
  199. if (type != received_part_type::RSPAMD_RECEIVED_PART_UNKNOWN) {
  200. received_part_set_or_append(c, p - c,
  201. npart.data);
  202. }
  203. last = p - data.data();
  204. return true;
  205. }
  206. break;
  207. case skip_spaces:
  208. if (p > data.data()) {
  209. last = p - data.data();
  210. return true;
  211. }
  212. default:
  213. break;
  214. }
  215. return false;
  216. }
  217. template<std::size_t N>
  218. constexpr auto lit_compare_lowercase(const char lit[N], const char *in) -> bool
  219. {
  220. for (auto i = 0; i < N; i++) {
  221. if (lc_map[(unsigned char) in[i]] != lit[i]) {
  222. return false;
  223. }
  224. }
  225. return true;
  226. }
  227. static auto
  228. received_spill(const std::string_view &in,
  229. std::ptrdiff_t &date_pos) -> std::vector<received_part>
  230. {
  231. std::vector<received_part> parts;
  232. std::ptrdiff_t pos = 0;
  233. auto seen_from = false, seen_by = false;
  234. const auto *p = in.data();
  235. const auto *end = p + in.size();
  236. auto skip_spaces = [&p, end]() {
  237. while (p < end && g_ascii_isspace(*p)) {
  238. p++;
  239. }
  240. };
  241. skip_spaces();
  242. /* Skip SMTP comments */
  243. if (*p == '(') {
  244. auto obraces = 0, ebraces = 0;
  245. while (p < end) {
  246. if (*p == ')') {
  247. ebraces++;
  248. }
  249. else if (*p == '(') {
  250. obraces++;
  251. }
  252. p++;
  253. if (obraces == ebraces) {
  254. /* Skip spaces after */
  255. skip_spaces();
  256. break;
  257. }
  258. }
  259. }
  260. auto len = end - p;
  261. if (len == 0) {
  262. return parts;
  263. }
  264. auto maybe_process_part = [&](received_part_type what) -> bool {
  265. parts.emplace_back(what);
  266. auto &rcvd_part = parts.back();
  267. auto chunk = std::string_view{p, (std::size_t)(end - p)};
  268. if (!received_process_part(chunk, what, pos, rcvd_part)) {
  269. parts.pop_back();
  270. return false;
  271. }
  272. return true;
  273. };
  274. if (len > 4 && lit_compare_lowercase<4>("from", p)) {
  275. p += sizeof("from") - 1;
  276. /* We can now store from part */
  277. if (!maybe_process_part(received_part_type::RSPAMD_RECEIVED_PART_FROM)) {
  278. /* Do not accept malformed from */
  279. return {};
  280. }
  281. g_assert(pos != 0);
  282. p += pos;
  283. len = end > p ? end - p : 0;
  284. seen_from = true;
  285. }
  286. if (len > 2 && lit_compare_lowercase<2>("by", p)) {
  287. p += sizeof("by") - 1;
  288. if (!maybe_process_part(received_part_type::RSPAMD_RECEIVED_PART_BY)) {
  289. return {};
  290. }
  291. g_assert(pos != 0);
  292. p += pos;
  293. len = end > p ? end - p : 0;
  294. seen_by = true;
  295. }
  296. if (!seen_from && !seen_by) {
  297. /* Useless received */
  298. return {};
  299. }
  300. while (p < end) {
  301. bool got_part = false;
  302. if (*p == ';') {
  303. /* We are at the date separator, stop here */
  304. date_pos = p - in.data() + 1;
  305. break;
  306. }
  307. else {
  308. if (len > sizeof("with") && lit_compare_lowercase<4>("with", p)) {
  309. p += sizeof("with") - 1;
  310. got_part = maybe_process_part(received_part_type::RSPAMD_RECEIVED_PART_WITH);
  311. }
  312. else if (len > sizeof("for") && lit_compare_lowercase<3>("for", p)) {
  313. p += sizeof("for") - 1;
  314. got_part = maybe_process_part(received_part_type::RSPAMD_RECEIVED_PART_FOR);
  315. }
  316. else if (len > sizeof("id") && lit_compare_lowercase<2>("id", p)) {
  317. p += sizeof("id") - 1;
  318. got_part = maybe_process_part(received_part_type::RSPAMD_RECEIVED_PART_ID);
  319. }
  320. else {
  321. while (p < end) {
  322. if (!(g_ascii_isspace(*p) || *p == '(' || *p == ';')) {
  323. p++;
  324. }
  325. else {
  326. break;
  327. }
  328. }
  329. if (p == end) {
  330. return {};
  331. }
  332. else if (*p == ';') {
  333. date_pos = p - in.data() + 1;
  334. break;
  335. }
  336. else {
  337. got_part = maybe_process_part(received_part_type::RSPAMD_RECEIVED_PART_UNKNOWN);
  338. }
  339. }
  340. if (!got_part) {
  341. p++;
  342. len = end > p ? end - p : 0;
  343. }
  344. else {
  345. g_assert(pos != 0);
  346. p += pos;
  347. len = end > p ? end - p : 0;
  348. }
  349. }
  350. }
  351. return parts;
  352. }
  353. #define RSPAMD_INET_ADDRESS_PARSE_RECEIVED \
  354. (rspamd_inet_address_parse_flags)(RSPAMD_INET_ADDRESS_PARSE_REMOTE | RSPAMD_INET_ADDRESS_PARSE_NO_UNIX)
  355. static auto
  356. received_process_rdns(rspamd_mempool_t *pool,
  357. const std::string_view &in,
  358. mime_string &dest) -> bool
  359. {
  360. auto seen_dot = false;
  361. const auto *p = in.data();
  362. const auto *end = p + in.size();
  363. if (in.empty()) {
  364. return false;
  365. }
  366. if (*p == '[' && *(end - 1) == ']' && in.size() > 2) {
  367. /* We have enclosed ip address */
  368. auto *addr = rspamd_parse_inet_address_pool(p + 1,
  369. (end - p) - 2,
  370. pool,
  371. RSPAMD_INET_ADDRESS_PARSE_RECEIVED);
  372. if (addr) {
  373. const char *addr_str;
  374. if (rspamd_inet_address_get_port(addr) != 0) {
  375. addr_str = rspamd_inet_address_to_string_pretty(addr);
  376. }
  377. else {
  378. addr_str = rspamd_inet_address_to_string(addr);
  379. }
  380. dest.assign_copy(std::string_view{addr_str});
  381. return true;
  382. }
  383. }
  384. auto hlen = 0u;
  385. while (p < end) {
  386. if (!g_ascii_isspace(*p) && rspamd_url_is_domain(*p)) {
  387. if (*p == '.') {
  388. seen_dot = true;
  389. }
  390. hlen++;
  391. }
  392. else {
  393. break;
  394. }
  395. p++;
  396. }
  397. if (hlen > 0) {
  398. if (p == end || (seen_dot && (g_ascii_isspace(*p) || *p == '[' || *p == '('))) {
  399. /* All data looks like a hostname */
  400. dest.assign_copy(std::string_view{in.data(), hlen});
  401. return true;
  402. }
  403. }
  404. return false;
  405. }
  406. static auto
  407. received_process_host_tcpinfo(rspamd_mempool_t *pool,
  408. received_header &rh,
  409. const std::string_view &in) -> bool
  410. {
  411. rspamd_inet_addr_t *addr = nullptr;
  412. auto ret = false;
  413. if (in.empty()) {
  414. return false;
  415. }
  416. if (in[0] == '[') {
  417. /* Likely Exim version */
  418. auto brace_pos = in.find(']');
  419. if (brace_pos != std::string_view::npos) {
  420. auto substr_addr = in.substr(1, brace_pos - 1);
  421. addr = rspamd_parse_inet_address_pool(substr_addr.data(),
  422. substr_addr.size(),
  423. pool,
  424. RSPAMD_INET_ADDRESS_PARSE_RECEIVED);
  425. if (addr) {
  426. rh.addr = addr;
  427. rh.real_ip.assign_copy(std::string_view(rspamd_inet_address_to_string(addr)));
  428. }
  429. }
  430. }
  431. else {
  432. if (g_ascii_isxdigit(in[0])) {
  433. /* Try to parse IP address */
  434. addr = rspamd_parse_inet_address_pool(in.data(),
  435. in.size(), pool, RSPAMD_INET_ADDRESS_PARSE_RECEIVED);
  436. if (addr) {
  437. rh.addr = addr;
  438. rh.real_ip.assign_copy(std::string_view(rspamd_inet_address_to_string(addr)));
  439. }
  440. }
  441. if (!addr) {
  442. /* Try canonical Postfix version: rdns [ip] */
  443. auto obrace_pos = in.find('[');
  444. if (obrace_pos != std::string_view::npos) {
  445. auto ebrace_pos = in.rfind(']');
  446. if (ebrace_pos != std::string_view::npos && ebrace_pos > obrace_pos) {
  447. auto substr_addr = in.substr(obrace_pos + 1,
  448. ebrace_pos - obrace_pos - 1);
  449. addr = rspamd_parse_inet_address_pool(substr_addr.data(),
  450. substr_addr.size(),
  451. pool,
  452. RSPAMD_INET_ADDRESS_PARSE_RECEIVED);
  453. if (addr) {
  454. rh.addr = addr;
  455. rh.real_ip.assign_copy(std::string_view(rspamd_inet_address_to_string(addr)));
  456. /* Process with rDNS */
  457. auto rdns_substr = in.substr(0, obrace_pos);
  458. if (received_process_rdns(pool, rdns_substr, rh.real_hostname)) {
  459. ret = true;
  460. }
  461. }
  462. }
  463. }
  464. else {
  465. /* Hostname or some crap, sigh... */
  466. if (received_process_rdns(pool, in, rh.real_hostname)) {
  467. ret = true;
  468. }
  469. }
  470. }
  471. }
  472. return ret;
  473. }
  474. static void
  475. received_process_from(rspamd_mempool_t *pool,
  476. const received_part &rpart,
  477. received_header &rh)
  478. {
  479. if (rpart.data.size() > 0) {
  480. /* We have seen multiple cases:
  481. * - [ip] (hostname/unknown [real_ip])
  482. * - helo (hostname/unknown [real_ip])
  483. * - [ip]
  484. * - hostname
  485. * - hostname ([ip]:port helo=xxx)
  486. * Maybe more...
  487. */
  488. auto seen_ip_in_data = false;
  489. if (!rpart.comments.empty()) {
  490. /* We can have info within comment as part of RFC */
  491. received_process_host_tcpinfo(
  492. pool, rh,
  493. rpart.comments[0].as_view());
  494. }
  495. if (rh.real_ip.size() == 0) {
  496. /* Try to do the same with data */
  497. if (received_process_host_tcpinfo(
  498. pool, rh,
  499. rpart.data.as_view())) {
  500. seen_ip_in_data = true;
  501. }
  502. }
  503. if (!seen_ip_in_data) {
  504. if (rh.real_ip.size() != 0) {
  505. /* Get announced hostname (usually helo) */
  506. received_process_rdns(pool,
  507. rpart.data.as_view(),
  508. rh.from_hostname);
  509. }
  510. else {
  511. received_process_host_tcpinfo(pool,
  512. rh, rpart.data.as_view());
  513. }
  514. }
  515. }
  516. else {
  517. /* rpart->dlen = 0 */
  518. if (!rpart.comments.empty()) {
  519. received_process_host_tcpinfo(
  520. pool, rh,
  521. rpart.comments[0].as_view());
  522. }
  523. }
  524. }
  525. static auto
  526. received_header_parse(received_header_chain &chain, rspamd_mempool_t *pool,
  527. const std::string_view &in,
  528. struct rspamd_mime_header *hdr) -> bool
  529. {
  530. std::ptrdiff_t date_pos = -1;
  531. static constexpr const auto protos_map = frozen::make_unordered_map<frozen::string, received_flags>({{"smtp", received_flags::SMTP},
  532. {"esmtp", received_flags::ESMTP},
  533. {"utf8esmtp", received_flags::ESMTP |
  534. received_flags::UTF8},
  535. {"esmtpa", received_flags::ESMTPA |
  536. received_flags::AUTHENTICATED},
  537. {"utf8esmtpa", received_flags::ESMTPA |
  538. received_flags::AUTHENTICATED |
  539. received_flags::UTF8},
  540. {"esmtpsa", received_flags::ESMTPSA |
  541. received_flags::SSL |
  542. received_flags::AUTHENTICATED},
  543. {"utf8esmtpsa", received_flags::ESMTPSA |
  544. received_flags::SSL |
  545. received_flags::AUTHENTICATED |
  546. received_flags::UTF8},
  547. {"esmtps", received_flags::ESMTPS |
  548. received_flags::SSL},
  549. {"utf8esmtps", received_flags::ESMTPS |
  550. received_flags::SSL |
  551. received_flags::UTF8},
  552. {"lmtp", received_flags::LMTP},
  553. {"imap", received_flags::IMAP},
  554. {"imaps", received_flags::IMAP |
  555. received_flags::SSL},
  556. {"http", received_flags::HTTP},
  557. {"https", received_flags::HTTP |
  558. received_flags::SSL},
  559. {"local", received_flags::LOCAL}});
  560. auto parts = received_spill(in, date_pos);
  561. if (parts.empty()) {
  562. return false;
  563. }
  564. auto &rh = chain.new_received();
  565. rh.flags = received_flags::UNKNOWN;
  566. rh.hdr = hdr;
  567. for (const auto &part: parts) {
  568. switch (part.type) {
  569. case received_part_type::RSPAMD_RECEIVED_PART_FROM:
  570. received_process_from(pool, part, rh);
  571. break;
  572. case received_part_type::RSPAMD_RECEIVED_PART_BY:
  573. received_process_rdns(pool,
  574. part.data.as_view(),
  575. rh.by_hostname);
  576. break;
  577. case received_part_type::RSPAMD_RECEIVED_PART_WITH:
  578. if (part.data.size() > 0) {
  579. auto proto_flag_it = protos_map.find(part.data.as_view());
  580. if (proto_flag_it != protos_map.end()) {
  581. rh.flags = proto_flag_it->second;
  582. }
  583. }
  584. break;
  585. case received_part_type::RSPAMD_RECEIVED_PART_FOR:
  586. rh.for_mbox.assign_copy(part.data);
  587. rh.for_addr = rspamd_email_address_from_smtp(rh.for_mbox.data(),
  588. rh.for_mbox.size());
  589. break;
  590. default:
  591. /* Do nothing */
  592. break;
  593. }
  594. }
  595. if (!rh.real_hostname.empty() && rh.from_hostname.empty()) {
  596. rh.from_hostname.assign_copy(rh.real_hostname);
  597. }
  598. if (date_pos > 0 && date_pos < in.size()) {
  599. auto date_sub = in.substr(date_pos);
  600. rh.timestamp = rspamd_parse_smtp_date((const unsigned char *) date_sub.data(),
  601. date_sub.size(), nullptr);
  602. }
  603. return true;
  604. }
  605. static auto
  606. received_maybe_fix_task(struct rspamd_task *task) -> bool
  607. {
  608. auto *recv_chain_ptr = static_cast<received_header_chain *>(MESSAGE_FIELD(task, received_headers));
  609. if (recv_chain_ptr) {
  610. auto need_recv_correction = false;
  611. auto top_recv_maybe = recv_chain_ptr->get_received(0);
  612. if (top_recv_maybe.has_value()) {
  613. auto &top_recv = top_recv_maybe.value().get();
  614. const auto *raddr = top_recv.addr;
  615. if (top_recv.real_ip.size() == 0 || (task->cfg && task->cfg->ignore_received)) {
  616. need_recv_correction = true;
  617. }
  618. else if (!(task->flags & RSPAMD_TASK_FLAG_NO_IP) && task->from_addr) {
  619. if (!raddr) {
  620. need_recv_correction = true;
  621. }
  622. else {
  623. if (rspamd_inet_address_compare(raddr, task->from_addr, FALSE) != 0) {
  624. need_recv_correction = true;
  625. }
  626. }
  627. }
  628. if (need_recv_correction && !(task->flags & RSPAMD_TASK_FLAG_NO_IP) && task->from_addr) {
  629. msg_debug_task("the first received seems to be"
  630. " not ours, prepend it with fake one");
  631. auto &trecv = recv_chain_ptr->new_received(received_header_chain::append_type::append_head);
  632. trecv.flags |= received_flags::ARTIFICIAL;
  633. if (task->flags & RSPAMD_TASK_FLAG_SSL) {
  634. trecv.flags |= received_flags::SSL;
  635. }
  636. if (task->auth_user) {
  637. trecv.flags |= received_flags::AUTHENTICATED;
  638. }
  639. trecv.real_ip.assign_copy(std::string_view(rspamd_inet_address_to_string(task->from_addr)));
  640. const auto *mta_name = (const char *) rspamd_mempool_get_variable(task->task_pool,
  641. RSPAMD_MEMPOOL_MTA_NAME);
  642. if (mta_name) {
  643. trecv.by_hostname.assign_copy(std::string_view(mta_name));
  644. }
  645. trecv.addr = rspamd_inet_address_copy(task->from_addr,
  646. task->task_pool);
  647. if (task->hostname) {
  648. trecv.real_hostname.assign_copy(std::string_view(task->hostname));
  649. trecv.from_hostname.assign_copy(trecv.real_hostname);
  650. }
  651. return true;
  652. }
  653. /* Extract data from received header if we were not given IP */
  654. if (!need_recv_correction && (task->flags & RSPAMD_TASK_FLAG_NO_IP) &&
  655. (task->cfg && !task->cfg->ignore_received)) {
  656. if (!top_recv.real_ip.empty()) {
  657. if (!rspamd_parse_inet_address(&task->from_addr,
  658. top_recv.real_ip.data(),
  659. top_recv.real_ip.size(),
  660. RSPAMD_INET_ADDRESS_PARSE_NO_UNIX)) {
  661. msg_warn_task("cannot get IP from received header: '%s'",
  662. top_recv.real_ip.data());
  663. task->from_addr = nullptr;
  664. }
  665. }
  666. if (!top_recv.real_hostname.empty()) {
  667. task->hostname = top_recv.real_hostname.data();
  668. }
  669. return true;
  670. }
  671. }
  672. }
  673. return false;
  674. }
  675. static auto
  676. received_export_to_lua(received_header_chain *chain, lua_State *L) -> bool
  677. {
  678. if (chain == nullptr) {
  679. return false;
  680. }
  681. lua_createtable(L, chain->size(), 0);
  682. auto push_flag = [L](const received_header &rh, received_flags fl, const char *name) {
  683. lua_pushboolean(L, !!(rh.flags & fl));
  684. lua_setfield(L, -2, name);
  685. };
  686. auto i = 1;
  687. for (const auto &rh: chain->as_vector()) {
  688. lua_createtable(L, 0, 10);
  689. if (rh.hdr && rh.hdr->decoded) {
  690. rspamd_lua_table_set(L, "raw", rh.hdr->decoded);
  691. }
  692. lua_createtable(L, 0, 3);
  693. push_flag(rh, received_flags::ARTIFICIAL, "artificial");
  694. push_flag(rh, received_flags::AUTHENTICATED, "authenticated");
  695. push_flag(rh, received_flags::SSL, "ssl");
  696. push_flag(rh, received_flags::UTF8, "utf8");
  697. lua_setfield(L, -2, "flags");
  698. auto push_nullable_string = [L](const mime_string &st, const char *field) {
  699. if (st.empty()) {
  700. lua_pushnil(L);
  701. }
  702. else {
  703. lua_pushlstring(L, st.data(), st.size());
  704. }
  705. lua_setfield(L, -2, field);
  706. };
  707. push_nullable_string(rh.from_hostname, "from_hostname");
  708. push_nullable_string(rh.real_hostname, "real_hostname");
  709. push_nullable_string(rh.real_ip, "from_ip");
  710. push_nullable_string(rh.by_hostname, "by_hostname");
  711. push_nullable_string(rh.for_mbox, "for");
  712. if (rh.addr) {
  713. rspamd_lua_ip_push(L, rh.addr);
  714. }
  715. else {
  716. lua_pushnil(L);
  717. }
  718. lua_setfield(L, -2, "real_ip");
  719. lua_pushstring(L, received_protocol_to_string(rh.flags));
  720. lua_setfield(L, -2, "proto");
  721. lua_pushinteger(L, rh.timestamp);
  722. lua_setfield(L, -2, "timestamp");
  723. lua_rawseti(L, -2, i++);
  724. }
  725. return true;
  726. }
  727. }// namespace rspamd::mime
  728. bool rspamd_received_header_parse(struct rspamd_task *task,
  729. const char *data, size_t sz,
  730. struct rspamd_mime_header *hdr)
  731. {
  732. auto *recv_chain_ptr = static_cast<rspamd::mime::received_header_chain *>(MESSAGE_FIELD(task, received_headers));
  733. if (recv_chain_ptr == nullptr) {
  734. /* This constructor automatically registers dtor in mempool */
  735. recv_chain_ptr = new rspamd::mime::received_header_chain(task);
  736. MESSAGE_FIELD(task, received_headers) = (void *) recv_chain_ptr;
  737. }
  738. return rspamd::mime::received_header_parse(*recv_chain_ptr, task->task_pool,
  739. std::string_view{data, sz}, hdr);
  740. }
  741. bool rspamd_received_maybe_fix_task(struct rspamd_task *task)
  742. {
  743. return rspamd::mime::received_maybe_fix_task(task);
  744. }
  745. bool rspamd_received_export_to_lua(struct rspamd_task *task, lua_State *L)
  746. {
  747. return rspamd::mime::received_export_to_lua(
  748. static_cast<rspamd::mime::received_header_chain *>(MESSAGE_FIELD(task, received_headers)),
  749. L);
  750. }
  751. /* Tests part */
  752. #define DOCTEST_CONFIG_IMPLEMENTATION_IN_DLL
  753. #include "doctest/doctest.h"
  754. TEST_SUITE("received")
  755. {
  756. TEST_CASE("parse received")
  757. {
  758. using namespace std::string_view_literals;
  759. using map_type = ankerl::unordered_dense::map<std::string_view, std::string_view>;
  760. std::vector<std::pair<std::string_view, map_type>> cases{
  761. // Simple received
  762. {"from smtp11.mailtrack.pl (smtp11.mailtrack.pl [185.243.30.90])"sv,
  763. {{"real_ip", "185.243.30.90"},
  764. {"real_hostname", "smtp11.mailtrack.pl"},
  765. {"from_hostname", "smtp11.mailtrack.pl"}}},
  766. // Real Postfix IPv6 received
  767. {"from server.chat-met-vreemden.nl (unknown [IPv6:2a01:7c8:aab6:26d:5054:ff:fed1:1da2])\n"
  768. "\t(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))\n"
  769. "\t(Client did not present a certificate)\n"
  770. "\tby mx1.freebsd.org (Postfix) with ESMTPS id CF0171862\n"
  771. "\tfor <test@example.com>; Mon, 6 Jul 2015 09:01:20 +0000 (UTC)\n"
  772. "\t(envelope-from upwest201diana@outlook.com)"sv,
  773. {{"real_ip", "2a01:7c8:aab6:26d:5054:ff:fed1:1da2"},
  774. {"from_hostname", "server.chat-met-vreemden.nl"},
  775. {"by_hostname", "mx1.freebsd.org"},
  776. {"for_mbox", "<test@example.com>"}}},
  777. // Exim IPv4 received
  778. {"from localhost ([127.0.0.1]:49019 helo=hummus.csx.cam.ac.uk)\n"
  779. " by hummus.csx.cam.ac.uk with esmtp (Exim 4.91-pdpfix1)\n"
  780. " (envelope-from <exim-dev-bounces@exim.org>)\n"
  781. " id 1fZ55o-0006DP-3H\n"
  782. " for <xxx@xxx.xxx>; Sat, 30 Jun 2018 02:54:28 +0100"sv,
  783. {
  784. {"from_hostname", "localhost"},
  785. {"real_ip", "127.0.0.1"},
  786. {"for_mbox", "<xxx@xxx.xxx>"},
  787. {"by_hostname", "hummus.csx.cam.ac.uk"},
  788. }},
  789. // Exim IPv6 received
  790. {"from smtp.spodhuis.org ([2a02:898:31:0:48:4558:736d:7470]:38689\n"
  791. " helo=mx.spodhuis.org)\n"
  792. " by hummus.csx.cam.ac.uk with esmtpsa (TLSv1.3:TLS_AES_256_GCM_SHA384:256)\n"
  793. " (Exim 4.91-pdpfix1+cc) (envelope-from <xxx@exim.org>)\n"
  794. " id 1fZ55k-0006CO-9M\n"
  795. " for exim-dev@exim.org; Sat, 30 Jun 2018 02:54:24 +0100"sv,
  796. {
  797. {"from_hostname", "smtp.spodhuis.org"},
  798. {"real_ip", "2a02:898:31:0:48:4558:736d:7470"},
  799. {"for_mbox", "exim-dev@exim.org"},
  800. {"by_hostname", "hummus.csx.cam.ac.uk"},
  801. }},
  802. // Haraka received
  803. {"from aaa.cn ([1.1.1.1]) by localhost.localdomain (Haraka/2.8.18) with "
  804. "ESMTPA id 349C9C2B-491A-4925-A687-3EF14038C344.1 envelope-from <huxin@xxx.com> "
  805. "(authenticated bits=0); Tue, 03 Jul 2018 14:18:13 +0200"sv,
  806. {
  807. {"from_hostname", "aaa.cn"},
  808. {"real_ip", "1.1.1.1"},
  809. {"by_hostname", "localhost.localdomain"},
  810. }},
  811. // Invalid by
  812. {"from [192.83.172.101] (HELLO 148.251.238.35) (148.251.238.35) "
  813. "by guovswzqkvry051@sohu.com with gg login "
  814. "by AOL 6.0 for Windows US sub 008 SMTP ; Tue, 03 Jul 2018 09:01:47 -0300"sv,
  815. {
  816. {"from_hostname", "192.83.172.101"},
  817. {"real_ip", "192.83.172.101"},
  818. }},
  819. // Invalid hostinfo
  820. {"from example.com ([]) by example.com with ESMTP id 2019091111 ;"
  821. " Thu, 26 Sep 2019 11:19:07 +0200"sv,
  822. {
  823. {"by_hostname", "example.com"},
  824. {"from_hostname", "example.com"},
  825. {"real_hostname", "example.com"},
  826. }},
  827. // Different real and announced hostnames + broken crap
  828. {"from 171-29.br (1-1-1-1.z.com.br [1.1.1.1]) by x.com.br (Postfix) "
  829. "with;ESMTP id 44QShF6xj4z1X for <hey@y.br>; Thu, 21 Mar 2019 23:45:46 -0300 "
  830. ": <g @yi.br>"sv,
  831. {
  832. {"real_ip", "1.1.1.1"},
  833. {"from_hostname", "171-29.br"},
  834. {"real_hostname", "1-1-1-1.z.com.br"},
  835. {"by_hostname", "x.com.br"},
  836. }},
  837. // Different real and announced ips + no hostname
  838. {"from [127.0.0.1] ([127.0.0.2]) by smtp.gmail.com with ESMTPSA id xxxololo"sv,
  839. {
  840. {"real_ip", "127.0.0.2"},
  841. {"from_hostname", "127.0.0.1"},
  842. {"by_hostname", "smtp.gmail.com"},
  843. }},
  844. // Different real and hostanes
  845. {"from 185.118.166.127 (steven2.zhou01.pserver.ru [185.118.166.127]) "
  846. "by mail.832zsu.cn (Postfix) with ESMTPA id AAD722133E34"sv,
  847. {
  848. {"real_ip", "185.118.166.127"},
  849. {"from_hostname", "185.118.166.127"},
  850. {"real_hostname", "steven2.zhou01.pserver.ru"},
  851. {"by_hostname", "mail.832zsu.cn"},
  852. }},
  853. // \0 in received must be filtered
  854. {"from smtp11.mailt\0rack.pl (smtp11.mail\0track.pl [1\085.243.30.90])"sv,
  855. {{"real_ip", "185.243.30.90"},
  856. {"real_hostname", "smtp11.mailtrack.pl"},
  857. {"from_hostname", "smtp11.mailtrack.pl"}}},
  858. // No from part
  859. {"by mail.832zsu.cn (Postfix) with ESMTPA id AAD722133E34"sv,
  860. {
  861. {"by_hostname", "mail.832zsu.cn"},
  862. }},
  863. // From part is in the comment
  864. {"(from asterisk@localhost)\n"
  865. " by pbx.xxx.com (8.14.7/8.14.7/Submit) id 076Go4wD014562;\n"
  866. " Thu, 6 Aug 2020 11:50:04 -0500"sv,
  867. {
  868. {"by_hostname", "pbx.xxx.com"},
  869. }},
  870. };
  871. rspamd_mempool_t *pool = rspamd_mempool_new_default("rcvd test", 0);
  872. for (auto &&c: cases) {
  873. SUBCASE(c.first.data())
  874. {
  875. rspamd::mime::received_header_chain chain;
  876. auto ret = rspamd::mime::received_header_parse(chain, pool,
  877. c.first, nullptr);
  878. CHECK(ret == true);
  879. auto &&rh = chain.get_received(0);
  880. CHECK(rh.has_value());
  881. auto res = rh.value().get().as_map();
  882. for (const auto &expected: c.second) {
  883. CHECK_MESSAGE(res.contains(expected.first), expected.first.data());
  884. CHECK(res[expected.first] == expected.second);
  885. }
  886. for (const auto &existing: res) {
  887. CHECK_MESSAGE(c.second.contains(existing.first), existing.first.data());
  888. CHECK(c.second[existing.first] == existing.second);
  889. }
  890. }
  891. }
  892. rspamd_mempool_delete(pool);
  893. }
  894. }