From 5d3795649ea758ab176195c68aeba5a50a972356 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Thu, 7 Feb 2019 13:57:24 +0000 Subject: [Project] Attach new received parser --- src/CMakeLists.txt | 16 ++++++++-------- src/libmime/mime_headers.c | 48 +++++++++++++++++++++++++++++++++++----------- test/lua/unit/received.lua | 25 ++++++++++++++++++++---- 3 files changed, 66 insertions(+), 23 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index ff7198270..71ce71119 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -117,7 +117,7 @@ LIST(LENGTH PLUGINSSRC RSPAMD_MODULES_NUM) SET(RAGEL_DEPENDS "${CMAKE_SOURCE_DIR}/src/ragel/smtp_address.rl" "${CMAKE_SOURCE_DIR}/src/ragel/smtp_date.rl" "${CMAKE_SOURCE_DIR}/src/ragel/smtp_ip.rl" - "${CMAKE_SOURCE_DIR}/src/ragel/smtp_whitespace.rl" + "${CMAKE_SOURCE_DIR}/src/ragel/smtp_base.rl" "${CMAKE_SOURCE_DIR}/src/ragel/smtp_received.rl" "${CMAKE_SOURCE_DIR}/src/ragel/content_disposition.rl") RAGEL_TARGET(ragel_smtp_addr @@ -125,11 +125,6 @@ RAGEL_TARGET(ragel_smtp_addr DEPENDS ${RAGEL_DEPENDS} COMPILE_FLAGS -T1 OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/smtp_addr_parser.rl.c) -RAGEL_TARGET(ragel_smtp_received - INPUTS ${CMAKE_SOURCE_DIR}/src/ragel/smtp_received_parser.rl - DEPENDS ${RAGEL_DEPENDS} - COMPILE_FLAGS -T1 - OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/smtp_received_parser.rl.c) RAGEL_TARGET(ragel_content_disposition INPUTS ${CMAKE_SOURCE_DIR}/src/ragel/content_disposition_parser.rl DEPENDS ${RAGEL_DEPENDS} @@ -145,6 +140,11 @@ RAGEL_TARGET(ragel_smtp_date DEPENDS ${RAGEL_DEPENDS} COMPILE_FLAGS -G2 OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/date_parser.rl.c) +RAGEL_TARGET(ragel_smtp_ip + INPUTS ${CMAKE_SOURCE_DIR}/src/ragel/smtp_ip_parser.rl + DEPENDS ${RAGEL_DEPENDS} + COMPILE_FLAGS -G2 + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/ip_parser.rl.c) ######################### LINK SECTION ############################### ADD_LIBRARY(rspamd-server STATIC @@ -157,12 +157,12 @@ ADD_LIBRARY(rspamd-server STATIC ${CMAKE_CURRENT_BINARY_DIR}/modules.c ${PLUGINSSRC} "${RAGEL_ragel_smtp_addr_OUTPUTS}" - "${RAGEL_ragel_smtp_received_OUTPUTS}" "${RAGEL_ragel_newlines_strip_OUTPUTS}" "${RAGEL_ragel_content_type_OUTPUTS}" "${RAGEL_ragel_content_disposition_OUTPUTS}" "${RAGEL_ragel_rfc2047_OUTPUTS}" - "${RAGEL_ragel_smtp_date_OUTPUTS}") + "${RAGEL_ragel_smtp_date_OUTPUTS}" + "${RAGEL_ragel_smtp_ip_OUTPUTS}") TARGET_LINK_LIBRARIES(rspamd-server rspamd-http-parser) TARGET_LINK_LIBRARIES(rspamd-server rspamd-cdb) TARGET_LINK_LIBRARIES(rspamd-server rspamd-lpeg) diff --git a/src/libmime/mime_headers.c b/src/libmime/mime_headers.c index a9ebbdb3a..ec3d87e8a 100644 --- a/src/libmime/mime_headers.c +++ b/src/libmime/mime_headers.c @@ -1103,7 +1103,7 @@ rspamd_smtp_received_spill (struct rspamd_task *task, DL_APPEND (head, cur_part); } - while (p > end) { + while (p < end) { if (*p == ';') { /* We are at the date separator, stop here */ *date_pos = p - (const guchar *)data + 1; @@ -1171,21 +1171,39 @@ rspamd_smtp_received_process_rdns (struct rspamd_task *task, { const gchar *p, *end; gsize hlen = 0; + gboolean seen_dot = FALSE; p = begin; end = begin + len; while (p < end) { - if (rspamd_url_is_domain (*p)) { + if (!g_ascii_isspace (*p) && rspamd_url_is_domain (*p)) { + if (*p == '.') { + seen_dot = TRUE; + } + hlen ++; } + else { + break; + } p ++; } if (hlen > 0) { - if (p == end || g_ascii_isspace (*p) || *p == '[' || *p == '(') { - /* We have some hostname, accept it */ + if (p == end) { + /* All data looks like a hostname */ + gchar *dest; + + dest = rspamd_mempool_alloc (task->task_pool, + hlen + 1); + rspamd_strlcpy (dest, begin, hlen + 1); + *pdest = dest; + + return TRUE; + } + else if (seen_dot && (g_ascii_isspace (*p) || *p == '[' || *p == '(')) { gchar *dest; dest = rspamd_mempool_alloc (task->task_pool, @@ -1214,8 +1232,8 @@ rspamd_smtp_received_process_from_comment (struct rspamd_task *task, const gchar *brace_pos = memchr (comment->data, ']', comment->dlen); if (brace_pos) { - addr = rspamd_parse_smtp_ip (comment->data, - brace_pos - comment->data + 1, + addr = rspamd_parse_inet_address_pool (comment->data + 1, + brace_pos - comment->data - 1, task->task_pool); if (addr) { @@ -1245,8 +1263,8 @@ rspamd_smtp_received_process_from_comment (struct rspamd_task *task, ebrace_pos = memchr (obrace_pos, ']', dend - obrace_pos); if (ebrace_pos) { - addr = rspamd_parse_smtp_ip (obrace_pos, - ebrace_pos - obrace_pos + 1, + addr = rspamd_parse_inet_address_pool (obrace_pos + 1, + ebrace_pos - obrace_pos - 1, task->task_pool); if (addr) { @@ -1307,8 +1325,8 @@ rspamd_smtp_received_process_from (struct rspamd_task *task, rspamd_inet_addr_t *addr; if (brace_pos) { - addr = rspamd_parse_smtp_ip (rpart->data, - brace_pos - rpart->data + 1, + addr = rspamd_parse_inet_address_pool (rpart->data + 1, + brace_pos - rpart->data - 1, task->task_pool); if (addr) { @@ -1356,7 +1374,7 @@ rspamd_smtp_received_parse (struct rspamd_task *task, size_t len, struct received_header *rh) { - goffset date_pos = 0; + goffset date_pos = -1; struct rspamd_received_part *head, *cur; rspamd_ftok_t t1, t2; @@ -1452,6 +1470,9 @@ rspamd_smtp_received_parse (struct rspamd_task *task, } } + break; + default: + /* Do nothing */ break; } } @@ -1464,5 +1485,10 @@ rspamd_smtp_received_parse (struct rspamd_task *task, rh->from_hostname = rh->real_hostname; } + if (date_pos > 0 && date_pos < len) { + rh->timestamp = rspamd_parse_smtp_date (data + date_pos, + len - date_pos); + } + return 0; } \ No newline at end of file diff --git a/test/lua/unit/received.lua b/test/lua/unit/received.lua index ac21c0e83..8185d9ada 100644 --- a/test/lua/unit/received.lua +++ b/test/lua/unit/received.lua @@ -56,8 +56,8 @@ context("Received headers parser", function() for exim-dev@exim.org; Sat, 30 Jun 2018 02:54:24 +0100]], { from_hostname = 'smtp.spodhuis.org', - from_ip = '2a02:898:31:0:48:4558:736d:7470', - real_ip = '2a02:898:31:0:48:4558:736d:7470', + from_ip = '2a02:898:31::48:4558:736d:7470', + real_ip = '2a02:898:31::48:4558:736d:7470', by_hostname = 'hummus.csx.cam.ac.uk', } }, @@ -68,12 +68,29 @@ context("Received headers parser", function() real_ip = '1.1.1.1', } }, - {'from [192.83.172.101] by (HELLO 148.251.238.35 ) (148.251.238.35) by guovswzqkvry051@sohu.com with gg login by AOL 6.0 for Windows US sub 008 SMTP ; Tue, 03 Jul 2018 09:01:47 -0300', + {'from [192.83.172.101] (HELLO 148.251.238.35) (148.251.238.35) by guovswzqkvry051@sohu.com with gg login by AOL 6.0 for Windows US sub 008 SMTP ; Tue, 03 Jul 2018 09:01:47 -0300', { from_ip = '192.83.172.101', by_hostname = '', - } + }, + }, + {'from [61.174.163.26] (helo=host) by sc8-sf-list1.sourceforge.net with smtp (Exim 3.31-VA-mm2 #1 (Debian)) id 18t2z0-0001NX-00 for ; Wed, 12 Mar 2003 01:57:10 -0800', + { + from_ip = '61.174.163.26', + by_hostname = 'sc8-sf-list1.sourceforge.net', + }, }, + {[[from [127.0.0.1] (unknown [65.19.167.131]) + (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) + (Client did not present a certificate) + by mail01.someotherdomain.org (Postfix) with ESMTPSA id 43tYMW2yKHz50MHS + for ; Mon, 4 Feb 2019 16:39:35 +0000 (GMT)]], + { + from_ip = '65.19.167.131', + real_ip = '65.19.167.131', + by_hostname = 'mail01.someotherdomain.org', + } + } } local task = ffi.C.rspamd_task_new(nil, nil) -- cgit v1.2.3