You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

received.cxx 25KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037
  1. /*-
  2. * Copyright 2021 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include <mempool_vars_internal.h>
  17. #include "config.h"
  18. #include "libserver/url.h"
  19. #include "lua/lua_common.h"
  20. #include "libserver/cfg_file.h"
  21. #include "mime_string.hxx"
  22. #include "smtp_parsers.h"
  23. #include "message.h"
  24. #include "received.hxx"
  25. #include "frozen/string.h"
  26. #include "frozen/unordered_map.h"
  27. namespace rspamd::mime {
  28. enum class received_part_type {
  29. RSPAMD_RECEIVED_PART_FROM,
  30. RSPAMD_RECEIVED_PART_BY,
  31. RSPAMD_RECEIVED_PART_FOR,
  32. RSPAMD_RECEIVED_PART_WITH,
  33. RSPAMD_RECEIVED_PART_ID,
  34. RSPAMD_RECEIVED_PART_UNKNOWN,
  35. };
  36. struct received_part {
  37. received_part_type type;
  38. mime_string data;
  39. std::vector<mime_string> comments;
  40. explicit received_part(received_part_type t)
  41. : type(t),
  42. data(received_char_filter) {}
  43. };
  44. static inline auto
  45. received_part_set_or_append(const gchar *begin,
  46. gsize len,
  47. mime_string &dest) -> void
  48. {
  49. if (len == 0) {
  50. return;
  51. }
  52. dest.append(begin, len);
  53. dest.trim(" \t");
  54. }
  55. static auto
  56. received_process_part(const std::string_view &data,
  57. received_part_type type,
  58. std::ptrdiff_t &last,
  59. received_part &npart) -> bool
  60. {
  61. auto obraces = 0, ebraces = 0;
  62. auto seen_tcpinfo = false;
  63. enum _parse_state {
  64. skip_spaces,
  65. in_comment,
  66. read_data,
  67. read_tcpinfo,
  68. all_done
  69. } state, next_state;
  70. /* In this function, we just process comments and data separately */
  71. const auto *p = data.data();
  72. const auto *end = p + data.size();
  73. const auto *c = p;
  74. state = skip_spaces;
  75. next_state = read_data;
  76. while (p < end) {
  77. switch (state) {
  78. case skip_spaces:
  79. if (!g_ascii_isspace(*p)) {
  80. c = p;
  81. state = next_state;
  82. }
  83. else {
  84. p++;
  85. }
  86. break;
  87. case in_comment:
  88. if (*p == '(') {
  89. obraces++;
  90. }
  91. else if (*p == ')') {
  92. ebraces++;
  93. if (ebraces >= obraces) {
  94. if (type != received_part_type::RSPAMD_RECEIVED_PART_UNKNOWN) {
  95. if (p > c) {
  96. npart.comments.emplace_back(received_char_filter);
  97. auto &comment = npart.comments.back();
  98. received_part_set_or_append(c, p - c,
  99. comment);
  100. }
  101. }
  102. p++;
  103. c = p;
  104. state = skip_spaces;
  105. next_state = read_data;
  106. continue;
  107. }
  108. }
  109. p++;
  110. break;
  111. case read_data:
  112. if (*p == '(') {
  113. if (p > c) {
  114. if (type != received_part_type::RSPAMD_RECEIVED_PART_UNKNOWN) {
  115. received_part_set_or_append(c, p - c,
  116. npart.data);
  117. }
  118. }
  119. state = in_comment;
  120. obraces = 1;
  121. ebraces = 0;
  122. p++;
  123. c = p;
  124. }
  125. else if (g_ascii_isspace (*p)) {
  126. if (p > c) {
  127. if (type != received_part_type::RSPAMD_RECEIVED_PART_UNKNOWN) {
  128. received_part_set_or_append(c, p - c,
  129. npart.data);
  130. }
  131. }
  132. state = skip_spaces;
  133. next_state = read_data;
  134. c = p;
  135. }
  136. else if (*p == ';') {
  137. /* It is actually delimiter of date part if not in the comments */
  138. if (p > c) {
  139. if (type != received_part_type::RSPAMD_RECEIVED_PART_UNKNOWN) {
  140. received_part_set_or_append(c, p - c,
  141. npart.data);
  142. }
  143. }
  144. state = all_done;
  145. continue;
  146. }
  147. else if (npart.data.size() > 0) {
  148. /* We have already received data and find something with no ( */
  149. if (!seen_tcpinfo && type == received_part_type::RSPAMD_RECEIVED_PART_FROM) {
  150. /* Check if we have something special here, such as TCPinfo */
  151. if (*c == '[') {
  152. state = read_tcpinfo;
  153. p++;
  154. }
  155. else {
  156. state = all_done;
  157. continue;
  158. }
  159. }
  160. else {
  161. state = all_done;
  162. continue;
  163. }
  164. }
  165. else {
  166. p++;
  167. }
  168. break;
  169. case read_tcpinfo:
  170. if (*p == ']') {
  171. received_part_set_or_append(c, p - c + 1,
  172. npart.data);
  173. seen_tcpinfo = TRUE;
  174. state = skip_spaces;
  175. next_state = read_data;
  176. c = p;
  177. }
  178. p++;
  179. break;
  180. case all_done:
  181. if (p > data.data()) {
  182. last = p - data.data();
  183. return true;
  184. }
  185. else {
  186. /* Empty element */
  187. return false;
  188. }
  189. break;
  190. }
  191. }
  192. /* Leftover */
  193. switch (state) {
  194. case read_data:
  195. if (p > c) {
  196. if (type != received_part_type::RSPAMD_RECEIVED_PART_UNKNOWN) {
  197. received_part_set_or_append(c, p - c,
  198. npart.data);
  199. }
  200. last = p - data.data();
  201. return true;
  202. }
  203. break;
  204. case skip_spaces:
  205. if (p > data.data()) {
  206. last = p - data.data();
  207. return true;
  208. }
  209. default:
  210. break;
  211. }
  212. return false;
  213. }
  214. template <std::size_t N>
  215. constexpr auto lit_compare_lowercase(const char lit[N], const char *in) -> bool
  216. {
  217. for (auto i = 0; i < N; i ++) {
  218. if (lc_map[(unsigned char)in[i]] != lit[i]) {
  219. return false;
  220. }
  221. }
  222. return true;
  223. }
  224. static auto
  225. received_spill(const std::string_view &in,
  226. std::ptrdiff_t &date_pos) -> std::vector<received_part>
  227. {
  228. std::vector<received_part> parts;
  229. std::ptrdiff_t pos = 0;
  230. auto seen_from = false, seen_by = false;
  231. const auto *p = in.data();
  232. const auto *end = p + in.size();
  233. auto skip_spaces = [&p, end]() {
  234. while (p < end && g_ascii_isspace (*p)) {
  235. p++;
  236. }
  237. };
  238. skip_spaces();
  239. /* Skip SMTP comments */
  240. if (*p == '(') {
  241. auto obraces = 0, ebraces = 0;
  242. while (p < end) {
  243. if (*p == ')') {
  244. ebraces ++;
  245. }
  246. else if (*p == '(') {
  247. obraces ++;
  248. }
  249. p ++;
  250. if (obraces == ebraces) {
  251. /* Skip spaces after */
  252. skip_spaces();
  253. break;
  254. }
  255. }
  256. }
  257. auto len = end - p;
  258. if (len == 0) {
  259. return parts;
  260. }
  261. auto maybe_process_part = [&](received_part_type what) -> bool {
  262. parts.emplace_back(what);
  263. auto &rcvd_part = parts.back();
  264. auto chunk = std::string_view{p, (std::size_t)(end - p)};
  265. if (!received_process_part(chunk, what, pos, rcvd_part)) {
  266. parts.pop_back();
  267. return false;
  268. }
  269. return true;
  270. };
  271. if (len > 4 && lit_compare_lowercase<4>("from", p)) {
  272. p += sizeof("from") - 1;
  273. /* We can now store from part */
  274. if (!maybe_process_part(received_part_type::RSPAMD_RECEIVED_PART_FROM)) {
  275. /* Do not accept malformed from */
  276. return {};
  277. }
  278. g_assert (pos != 0);
  279. p += pos;
  280. len = end > p ? end - p : 0;
  281. seen_from = true;
  282. }
  283. if (len > 2 && lit_compare_lowercase<2>("by", p)) {
  284. p += sizeof("by") - 1;
  285. if (!maybe_process_part(received_part_type::RSPAMD_RECEIVED_PART_BY)) {
  286. return {};
  287. }
  288. g_assert (pos != 0);
  289. p += pos;
  290. len = end > p ? end - p : 0;
  291. seen_by = true;
  292. }
  293. if (!seen_from && !seen_by) {
  294. /* Useless received */
  295. return {};
  296. }
  297. while (p < end) {
  298. bool got_part = false;
  299. if (*p == ';') {
  300. /* We are at the date separator, stop here */
  301. date_pos = p - in.data() + 1;
  302. break;
  303. }
  304. else {
  305. if (len > sizeof("with") && lit_compare_lowercase<4>("with", p)) {
  306. p += sizeof("with") - 1;
  307. got_part = maybe_process_part(received_part_type::RSPAMD_RECEIVED_PART_WITH);
  308. }
  309. else if (len > sizeof("for") && lit_compare_lowercase<3>("for", p)) {
  310. p += sizeof("for") - 1;
  311. got_part = maybe_process_part(received_part_type::RSPAMD_RECEIVED_PART_FOR);
  312. }
  313. else if (len > sizeof("id") && lit_compare_lowercase<2>("id", p)) {
  314. p += sizeof("id") - 1;
  315. got_part = maybe_process_part(received_part_type::RSPAMD_RECEIVED_PART_ID);
  316. }
  317. else {
  318. while (p < end) {
  319. if (!(g_ascii_isspace (*p) || *p == '(' || *p == ';')) {
  320. p++;
  321. }
  322. else {
  323. break;
  324. }
  325. }
  326. if (p == end) {
  327. return {};
  328. }
  329. else if (*p == ';') {
  330. date_pos = p - in.data() + 1;
  331. break;
  332. }
  333. else {
  334. got_part = maybe_process_part(received_part_type::RSPAMD_RECEIVED_PART_UNKNOWN);
  335. }
  336. }
  337. if (!got_part) {
  338. p++;
  339. len = end > p ? end - p : 0;
  340. }
  341. else {
  342. g_assert (pos != 0);
  343. p += pos;
  344. len = end > p ? end - p : 0;
  345. }
  346. }
  347. }
  348. return parts;
  349. }
  350. #define RSPAMD_INET_ADDRESS_PARSE_RECEIVED \
  351. (rspamd_inet_address_parse_flags)(RSPAMD_INET_ADDRESS_PARSE_REMOTE|RSPAMD_INET_ADDRESS_PARSE_NO_UNIX)
  352. static auto
  353. received_process_rdns(rspamd_mempool_t *pool,
  354. const std::string_view &in,
  355. mime_string &dest) -> bool
  356. {
  357. auto seen_dot = false;
  358. const auto *p = in.data();
  359. const auto *end = p + in.size();
  360. if (in.empty()) {
  361. return false;
  362. }
  363. if (*p == '[' && *(end - 1) == ']' && in.size() > 2) {
  364. /* We have enclosed ip address */
  365. auto *addr = rspamd_parse_inet_address_pool(p + 1,
  366. (end - p) - 2,
  367. pool,
  368. RSPAMD_INET_ADDRESS_PARSE_RECEIVED);
  369. if (addr) {
  370. const gchar *addr_str;
  371. if (rspamd_inet_address_get_port(addr) != 0) {
  372. addr_str = rspamd_inet_address_to_string_pretty(addr);
  373. }
  374. else {
  375. addr_str = rspamd_inet_address_to_string(addr);
  376. }
  377. dest.assign_copy(std::string_view{addr_str});
  378. return true;
  379. }
  380. }
  381. auto hlen = 0u;
  382. while (p < end) {
  383. if (!g_ascii_isspace(*p) && rspamd_url_is_domain(*p)) {
  384. if (*p == '.') {
  385. seen_dot = true;
  386. }
  387. hlen++;
  388. }
  389. else {
  390. break;
  391. }
  392. p++;
  393. }
  394. if (hlen > 0) {
  395. if (p == end || (seen_dot && (g_ascii_isspace(*p) || *p == '[' || *p == '('))) {
  396. /* All data looks like a hostname */
  397. dest.assign_copy(std::string_view{in.data(), hlen});
  398. return true;
  399. }
  400. }
  401. return false;
  402. }
  403. static auto
  404. received_process_host_tcpinfo(rspamd_mempool_t *pool,
  405. received_header &rh,
  406. const std::string_view &in) -> bool
  407. {
  408. rspamd_inet_addr_t *addr = nullptr;
  409. auto ret = false;
  410. if (in.empty()) {
  411. return false;
  412. }
  413. if (in[0] == '[') {
  414. /* Likely Exim version */
  415. auto brace_pos = in.find(']');
  416. if (brace_pos != std::string_view::npos) {
  417. auto substr_addr = in.substr(1, brace_pos - 1);
  418. addr = rspamd_parse_inet_address_pool(substr_addr.data(),
  419. substr_addr.size(),
  420. pool,
  421. RSPAMD_INET_ADDRESS_PARSE_RECEIVED);
  422. if (addr) {
  423. rh.addr = addr;
  424. rh.real_ip.assign_copy(std::string_view(rspamd_inet_address_to_string(addr)));
  425. }
  426. }
  427. }
  428. else {
  429. if (g_ascii_isxdigit(in[0])) {
  430. /* Try to parse IP address */
  431. addr = rspamd_parse_inet_address_pool(in.data(),
  432. in.size(), pool, RSPAMD_INET_ADDRESS_PARSE_RECEIVED);
  433. if (addr) {
  434. rh.addr = addr;
  435. rh.real_ip.assign_copy(std::string_view(rspamd_inet_address_to_string(addr)));
  436. }
  437. }
  438. if (!addr) {
  439. /* Try canonical Postfix version: rdns [ip] */
  440. auto obrace_pos = in.find('[');
  441. if (obrace_pos != std::string_view::npos) {
  442. auto ebrace_pos = in.rfind(']');
  443. if (ebrace_pos != std::string_view::npos && ebrace_pos > obrace_pos) {
  444. auto substr_addr = in.substr(obrace_pos + 1,
  445. ebrace_pos - obrace_pos - 1);
  446. addr = rspamd_parse_inet_address_pool(substr_addr.data(),
  447. substr_addr.size(),
  448. pool,
  449. RSPAMD_INET_ADDRESS_PARSE_RECEIVED);
  450. if (addr) {
  451. rh.addr = addr;
  452. rh.real_ip.assign_copy(std::string_view(rspamd_inet_address_to_string(addr)));
  453. /* Process with rDNS */
  454. auto rdns_substr = in.substr(0, obrace_pos);
  455. if (received_process_rdns(pool,rdns_substr,rh.real_hostname)) {
  456. ret = true;
  457. }
  458. }
  459. }
  460. }
  461. else {
  462. /* Hostname or some crap, sigh... */
  463. if (received_process_rdns(pool, in, rh.real_hostname)) {
  464. ret = true;
  465. }
  466. }
  467. }
  468. }
  469. return ret;
  470. }
  471. static void
  472. received_process_from(rspamd_mempool_t *pool,
  473. const received_part &rpart,
  474. received_header &rh)
  475. {
  476. if (rpart.data.size() > 0) {
  477. /* We have seen multiple cases:
  478. * - [ip] (hostname/unknown [real_ip])
  479. * - helo (hostname/unknown [real_ip])
  480. * - [ip]
  481. * - hostname
  482. * - hostname ([ip]:port helo=xxx)
  483. * Maybe more...
  484. */
  485. auto seen_ip_in_data = false;
  486. if (!rpart.comments.empty()) {
  487. /* We can have info within comment as part of RFC */
  488. received_process_host_tcpinfo(
  489. pool, rh,
  490. rpart.comments[0].as_view());
  491. }
  492. if (rh.real_ip.size() == 0) {
  493. /* Try to do the same with data */
  494. if (received_process_host_tcpinfo(
  495. pool, rh,
  496. rpart.data.as_view())) {
  497. seen_ip_in_data = true;
  498. }
  499. }
  500. if (!seen_ip_in_data) {
  501. if (rh.real_ip.size() != 0) {
  502. /* Get announced hostname (usually helo) */
  503. received_process_rdns(pool,
  504. rpart.data.as_view(),
  505. rh.from_hostname);
  506. }
  507. else {
  508. received_process_host_tcpinfo(pool,
  509. rh, rpart.data.as_view());
  510. }
  511. }
  512. }
  513. else {
  514. /* rpart->dlen = 0 */
  515. if (!rpart.comments.empty()) {
  516. received_process_host_tcpinfo(
  517. pool, rh,
  518. rpart.comments[0].as_view());
  519. }
  520. }
  521. }
  522. static auto
  523. received_header_parse(received_header_chain &chain, rspamd_mempool_t *pool,
  524. const std::string_view &in,
  525. struct rspamd_mime_header *hdr) -> bool
  526. {
  527. std::ptrdiff_t date_pos = -1;
  528. static constexpr const auto protos_map = frozen::make_unordered_map<frozen::string, received_flags>({
  529. {"smtp", received_flags::SMTP},
  530. {"esmtp", received_flags::ESMTP},
  531. {"esmtpa", received_flags::ESMTPA |
  532. received_flags::AUTHENTICATED},
  533. {"esmtpsa", received_flags::ESMTPSA |
  534. received_flags::SSL |
  535. received_flags::AUTHENTICATED},
  536. {"esmtps", received_flags::ESMTPS |
  537. received_flags::SSL},
  538. {"lmtp", received_flags::LMTP},
  539. {"imap", received_flags::IMAP},
  540. {"imaps", received_flags::IMAP |
  541. received_flags::SSL},
  542. {"http", received_flags::HTTP},
  543. {"https", received_flags::HTTP |
  544. received_flags::SSL},
  545. {"local", received_flags::LOCAL}
  546. });
  547. auto parts = received_spill(in, date_pos);
  548. if (parts.empty()) {
  549. return false;
  550. }
  551. auto &rh = chain.new_received();
  552. rh.flags = received_flags::UNKNOWN;
  553. rh.hdr = hdr;
  554. for (const auto &part : parts) {
  555. switch (part.type) {
  556. case received_part_type::RSPAMD_RECEIVED_PART_FROM:
  557. received_process_from(pool, part, rh);
  558. break;
  559. case received_part_type::RSPAMD_RECEIVED_PART_BY:
  560. received_process_rdns(pool,
  561. part.data.as_view(),
  562. rh.by_hostname);
  563. break;
  564. case received_part_type::RSPAMD_RECEIVED_PART_WITH:
  565. if (part.data.size() > 0) {
  566. auto proto_flag_it = protos_map.find(part.data.as_view());
  567. if (proto_flag_it != protos_map.end()) {
  568. rh.flags = proto_flag_it->second;
  569. }
  570. }
  571. break;
  572. case received_part_type::RSPAMD_RECEIVED_PART_FOR:
  573. rh.for_mbox.assign_copy(part.data);
  574. rh.for_addr = rspamd_email_address_from_smtp(rh.for_mbox.data(),
  575. rh.for_mbox.size());
  576. break;
  577. default:
  578. /* Do nothing */
  579. break;
  580. }
  581. }
  582. if (!rh.real_hostname.empty() && rh.from_hostname.empty()) {
  583. rh.from_hostname.assign_copy(rh.real_hostname);
  584. }
  585. if (date_pos > 0 && date_pos < in.size()) {
  586. auto date_sub = in.substr(date_pos);
  587. rh.timestamp = rspamd_parse_smtp_date((const unsigned char*)date_sub.data(),
  588. date_sub.size(), nullptr);
  589. }
  590. return true;
  591. }
  592. static auto
  593. received_maybe_fix_task(struct rspamd_task *task) -> bool
  594. {
  595. auto *recv_chain_ptr = static_cast<received_header_chain *>(MESSAGE_FIELD(task, received_headers));
  596. if (recv_chain_ptr) {
  597. auto need_recv_correction = false;
  598. auto top_recv_maybe = recv_chain_ptr->get_received(0);
  599. if (top_recv_maybe.has_value()) {
  600. auto &top_recv = top_recv_maybe.value().get();
  601. const auto *raddr = top_recv.addr;
  602. if (top_recv.real_ip.size() == 0 || (task->cfg && task->cfg->ignore_received)) {
  603. need_recv_correction = true;
  604. }
  605. else if (!(task->flags & RSPAMD_TASK_FLAG_NO_IP) && task->from_addr) {
  606. if (!raddr) {
  607. need_recv_correction = true;
  608. }
  609. else {
  610. if (rspamd_inet_address_compare(raddr, task->from_addr, FALSE) != 0) {
  611. need_recv_correction = true;
  612. }
  613. }
  614. }
  615. if (need_recv_correction && !(task->flags & RSPAMD_TASK_FLAG_NO_IP)
  616. && task->from_addr) {
  617. msg_debug_task ("the first received seems to be"
  618. " not ours, prepend it with fake one");
  619. auto &trecv = recv_chain_ptr->new_received(received_header_chain::append_type::append_head);
  620. trecv.flags |= received_flags::ARTIFICIAL;
  621. if (task->flags & RSPAMD_TASK_FLAG_SSL) {
  622. trecv.flags |= received_flags::SSL;
  623. }
  624. if (task->auth_user) {
  625. trecv.flags |= received_flags::AUTHENTICATED;
  626. }
  627. trecv.real_ip.assign_copy(std::string_view(rspamd_inet_address_to_string(task->from_addr)));
  628. const auto *mta_name = (const char*)rspamd_mempool_get_variable(task->task_pool,
  629. RSPAMD_MEMPOOL_MTA_NAME);
  630. if (mta_name) {
  631. trecv.by_hostname.assign_copy(std::string_view(mta_name));
  632. }
  633. trecv.addr = rspamd_inet_address_copy(task->from_addr);
  634. if (task->hostname) {
  635. trecv.real_hostname.assign_copy(std::string_view(task->hostname));
  636. trecv.from_hostname.assign_copy(trecv.real_hostname);
  637. }
  638. return true;
  639. }
  640. /* Extract data from received header if we were not given IP */
  641. if (!need_recv_correction && (task->flags & RSPAMD_TASK_FLAG_NO_IP) &&
  642. (task->cfg && !task->cfg->ignore_received)) {
  643. if (!top_recv.real_ip.empty()) {
  644. if (!rspamd_parse_inet_address (&task->from_addr,
  645. top_recv.real_ip.data(),
  646. top_recv.real_ip.size(),
  647. RSPAMD_INET_ADDRESS_PARSE_NO_UNIX)) {
  648. msg_warn_task ("cannot get IP from received header: '%s'",
  649. top_recv.real_ip.data());
  650. task->from_addr = nullptr;
  651. }
  652. }
  653. if (!top_recv.real_hostname.empty()) {
  654. task->hostname = top_recv.real_hostname.data();
  655. }
  656. return true;
  657. }
  658. }
  659. }
  660. return false;
  661. }
  662. static auto
  663. received_export_to_lua(received_header_chain *chain, lua_State *L) -> bool
  664. {
  665. if (chain == nullptr) {
  666. return false;
  667. }
  668. lua_createtable(L, chain->size(), 0);
  669. auto push_flag = [L](const received_header &rh, received_flags fl, const char *name) {
  670. lua_pushboolean(L, !!(rh.flags & fl));
  671. lua_setfield(L, -2, name);
  672. };
  673. auto i = 1;
  674. for (const auto &rh : chain->as_vector()) {
  675. lua_createtable (L, 0, 10);
  676. if (rh.hdr && rh.hdr->decoded) {
  677. rspamd_lua_table_set(L, "raw", rh.hdr->decoded);
  678. }
  679. lua_createtable(L, 0, 3);
  680. push_flag(rh, received_flags::ARTIFICIAL, "artificial");
  681. push_flag(rh, received_flags::AUTHENTICATED, "authenticated");
  682. push_flag(rh, received_flags::SSL, "ssl");
  683. lua_setfield(L, -2, "flags");
  684. auto push_nullable_string = [L](const mime_string &st, const char *field) {
  685. if (st.empty()) {
  686. lua_pushnil(L);
  687. }
  688. else {
  689. lua_pushlstring(L, st.data(), st.size());
  690. }
  691. lua_setfield(L, -2, field);
  692. };
  693. push_nullable_string(rh.from_hostname, "from_hostname");
  694. push_nullable_string(rh.real_hostname, "real_hostname");
  695. push_nullable_string(rh.real_ip, "from_ip");
  696. push_nullable_string(rh.by_hostname, "by_hostname");
  697. push_nullable_string(rh.for_mbox, "for");
  698. if (rh.addr) {
  699. rspamd_lua_ip_push(L, rh.addr);
  700. }
  701. else {
  702. lua_pushnil(L);
  703. }
  704. lua_setfield(L, -2, "real_ip");
  705. lua_pushstring(L, received_protocol_to_string(rh.flags));
  706. lua_setfield(L, -2, "proto");
  707. lua_pushinteger(L, rh.timestamp);
  708. lua_setfield(L, -2, "timestamp");
  709. lua_rawseti(L, -2, i++);
  710. }
  711. return true;
  712. }
  713. } // namespace rspamd::mime
  714. bool
  715. rspamd_received_header_parse(struct rspamd_task *task,
  716. const char *data, size_t sz,
  717. struct rspamd_mime_header *hdr)
  718. {
  719. auto *recv_chain_ptr = static_cast<rspamd::mime::received_header_chain *>
  720. (MESSAGE_FIELD(task, received_headers));
  721. if (recv_chain_ptr == nullptr) {
  722. /* This constructor automatically registers dtor in mempool */
  723. recv_chain_ptr = new rspamd::mime::received_header_chain(task);
  724. MESSAGE_FIELD(task, received_headers) = (void *)recv_chain_ptr;
  725. }
  726. return rspamd::mime::received_header_parse(*recv_chain_ptr, task->task_pool,
  727. std::string_view{data, sz}, hdr);
  728. }
  729. bool
  730. rspamd_received_maybe_fix_task(struct rspamd_task *task)
  731. {
  732. return rspamd::mime::received_maybe_fix_task(task);
  733. }
  734. bool
  735. rspamd_received_export_to_lua(struct rspamd_task *task, lua_State *L)
  736. {
  737. return rspamd::mime::received_export_to_lua(
  738. static_cast<rspamd::mime::received_header_chain *>(MESSAGE_FIELD(task, received_headers)),
  739. L);
  740. }
  741. /* Tests part */
  742. #define DOCTEST_CONFIG_IMPLEMENTATION_IN_DLL
  743. #include "doctest/doctest.h"
  744. TEST_SUITE("received") {
  745. TEST_CASE("parse received")
  746. {
  747. using namespace std::string_view_literals;
  748. using map_type = robin_hood::unordered_flat_map<std::string_view, std::string_view>;
  749. std::vector<std::pair<std::string_view, map_type>> cases{
  750. // Simple received
  751. {"from smtp11.mailtrack.pl (smtp11.mailtrack.pl [185.243.30.90])"sv,
  752. {
  753. {"real_ip", "185.243.30.90"},
  754. {"real_hostname", "smtp11.mailtrack.pl"},
  755. {"from_hostname", "smtp11.mailtrack.pl"}
  756. }
  757. },
  758. // Real Postfix IPv6 received
  759. {"from server.chat-met-vreemden.nl (unknown [IPv6:2a01:7c8:aab6:26d:5054:ff:fed1:1da2])\n"
  760. "\t(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))\n"
  761. "\t(Client did not present a certificate)\n"
  762. "\tby mx1.freebsd.org (Postfix) with ESMTPS id CF0171862\n"
  763. "\tfor <test@example.com>; Mon, 6 Jul 2015 09:01:20 +0000 (UTC)\n"
  764. "\t(envelope-from upwest201diana@outlook.com)"sv,
  765. {
  766. {"real_ip", "2a01:7c8:aab6:26d:5054:ff:fed1:1da2"},
  767. {"from_hostname", "server.chat-met-vreemden.nl"},
  768. {"by_hostname", "mx1.freebsd.org"},
  769. {"for_mbox", "<test@example.com>"}
  770. }
  771. },
  772. // Exim IPv4 received
  773. {"from localhost ([127.0.0.1]:49019 helo=hummus.csx.cam.ac.uk)\n"
  774. " by hummus.csx.cam.ac.uk with esmtp (Exim 4.91-pdpfix1)\n"
  775. " (envelope-from <exim-dev-bounces@exim.org>)\n"
  776. " id 1fZ55o-0006DP-3H\n"
  777. " for <xxx@xxx.xxx>; Sat, 30 Jun 2018 02:54:28 +0100"sv,
  778. {
  779. {"from_hostname", "localhost"},
  780. {"real_ip", "127.0.0.1"},
  781. {"for_mbox", "<xxx@xxx.xxx>"},
  782. {"by_hostname", "hummus.csx.cam.ac.uk"},
  783. }
  784. },
  785. // Exim IPv6 received
  786. {"from smtp.spodhuis.org ([2a02:898:31:0:48:4558:736d:7470]:38689\n"
  787. " helo=mx.spodhuis.org)\n"
  788. " by hummus.csx.cam.ac.uk with esmtpsa (TLSv1.3:TLS_AES_256_GCM_SHA384:256)\n"
  789. " (Exim 4.91-pdpfix1+cc) (envelope-from <xxx@exim.org>)\n"
  790. " id 1fZ55k-0006CO-9M\n"
  791. " for exim-dev@exim.org; Sat, 30 Jun 2018 02:54:24 +0100"sv,
  792. {
  793. {"from_hostname", "smtp.spodhuis.org"},
  794. {"real_ip", "2a02:898:31:0:48:4558:736d:7470"},
  795. {"for_mbox", "exim-dev@exim.org"},
  796. {"by_hostname", "hummus.csx.cam.ac.uk"},
  797. }
  798. },
  799. // Haraka received
  800. {"from aaa.cn ([1.1.1.1]) by localhost.localdomain (Haraka/2.8.18) with "
  801. "ESMTPA id 349C9C2B-491A-4925-A687-3EF14038C344.1 envelope-from <huxin@xxx.com> "
  802. "(authenticated bits=0); Tue, 03 Jul 2018 14:18:13 +0200"sv,
  803. {
  804. {"from_hostname", "aaa.cn"},
  805. {"real_ip", "1.1.1.1"},
  806. {"by_hostname", "localhost.localdomain"},
  807. }
  808. },
  809. // Invalid by
  810. {"from [192.83.172.101] (HELLO 148.251.238.35) (148.251.238.35) "
  811. "by guovswzqkvry051@sohu.com with gg login "
  812. "by AOL 6.0 for Windows US sub 008 SMTP ; Tue, 03 Jul 2018 09:01:47 -0300"sv,
  813. {
  814. {"from_hostname", "192.83.172.101"},
  815. {"real_ip", "192.83.172.101"},
  816. }
  817. },
  818. // Invalid hostinfo
  819. {"from example.com ([]) by example.com with ESMTP id 2019091111 ;"
  820. " Thu, 26 Sep 2019 11:19:07 +0200"sv,
  821. {
  822. {"by_hostname", "example.com"},
  823. {"from_hostname", "example.com"},
  824. {"real_hostname", "example.com"},
  825. }
  826. },
  827. // Different real and announced hostnames + broken crap
  828. {"from 171-29.br (1-1-1-1.z.com.br [1.1.1.1]) by x.com.br (Postfix) "
  829. "with;ESMTP id 44QShF6xj4z1X for <hey@y.br>; Thu, 21 Mar 2019 23:45:46 -0300 "
  830. ": <g @yi.br>"sv,
  831. {
  832. {"real_ip", "1.1.1.1"},
  833. {"from_hostname", "171-29.br"},
  834. {"real_hostname", "1-1-1-1.z.com.br"},
  835. {"by_hostname", "x.com.br"},
  836. }
  837. },
  838. // Different real and announced ips + no hostname
  839. {"from [127.0.0.1] ([127.0.0.2]) by smtp.gmail.com with ESMTPSA id xxxololo"sv,
  840. {
  841. {"real_ip", "127.0.0.2"},
  842. {"from_hostname", "127.0.0.1"},
  843. {"by_hostname", "smtp.gmail.com"},
  844. }
  845. },
  846. // Different real and hostanes
  847. {"from 185.118.166.127 (steven2.zhou01.pserver.ru [185.118.166.127]) "
  848. "by mail.832zsu.cn (Postfix) with ESMTPA id AAD722133E34"sv,
  849. {
  850. {"real_ip", "185.118.166.127"},
  851. {"from_hostname", "185.118.166.127"},
  852. {"real_hostname", "steven2.zhou01.pserver.ru"},
  853. {"by_hostname", "mail.832zsu.cn"},
  854. }
  855. },
  856. // \0 in received must be filtered
  857. {"from smtp11.mailt\0rack.pl (smtp11.mail\0track.pl [1\085.243.30.90])"sv,
  858. {
  859. {"real_ip", "185.243.30.90"},
  860. {"real_hostname", "smtp11.mailtrack.pl"},
  861. {"from_hostname", "smtp11.mailtrack.pl"}
  862. }
  863. },
  864. // No from part
  865. {"by mail.832zsu.cn (Postfix) with ESMTPA id AAD722133E34"sv,
  866. {
  867. {"by_hostname", "mail.832zsu.cn"},
  868. }
  869. },
  870. // From part is in the comment
  871. {"(from asterisk@localhost)\n"
  872. " by pbx.xxx.com (8.14.7/8.14.7/Submit) id 076Go4wD014562;\n"
  873. " Thu, 6 Aug 2020 11:50:04 -0500"sv,
  874. {
  875. {"by_hostname", "pbx.xxx.com"},
  876. }
  877. },
  878. };
  879. rspamd_mempool_t *pool = rspamd_mempool_new_default("rcvd test", 0);
  880. for (auto &&c : cases) {
  881. SUBCASE(c.first.data()) {
  882. rspamd::mime::received_header_chain chain;
  883. auto ret = rspamd::mime::received_header_parse(chain, pool,
  884. c.first, nullptr);
  885. CHECK(ret == true);
  886. auto &&rh = chain.get_received(0);
  887. CHECK(rh.has_value());
  888. auto res = rh.value().get().as_map();
  889. for (const auto &expected : c.second) {
  890. CHECK_MESSAGE(res.contains(expected.first), expected.first.data());
  891. CHECK(res[expected.first] == expected.second);
  892. }
  893. for (const auto &existing : res) {
  894. CHECK_MESSAGE(c.second.contains(existing.first), existing.first.data());
  895. CHECK(c.second[existing.first] == existing.second);
  896. }
  897. }
  898. }
  899. rspamd_mempool_delete(pool);
  900. }
  901. }