]> source.dussan.org Git - rspamd.git/commitdiff
[Project] Add heuristical from parser to received parser
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Wed, 6 Feb 2019 14:47:20 +0000 (14:47 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Wed, 6 Feb 2019 14:47:20 +0000 (14:47 +0000)
src/libmime/email_addr.c
src/libmime/email_addr.h
src/libmime/mime_headers.c
src/libmime/mime_headers.h
src/libserver/task.h

index b8d4b04f9ee89ffa7ec83d02d3bafc0dc81c9c04..38de7b4f7449bdaad6f3b1b08fb7af8d3362e5f6 100644 (file)
@@ -496,30 +496,4 @@ rspamd_email_address_list_destroy (gpointer ptr)
        }
 
        g_ptr_array_free (ar, TRUE);
-}
-
-void rspamd_smtp_maybe_process_smtp_comment (struct rspamd_task *task,
-                                                                                        const char *data, size_t len,
-                                                                                        struct received_header *rh)
-{
-       if (!rh->by_hostname) {
-               /* Heuristic to detect IP addresses like in Exim received:
-                * [xxx]:port or [xxx]
-                */
-
-               if (*data == '[' && len > 2) {
-                       const gchar *p = data + 1;
-                       gsize iplen = rspamd_memcspn (p, "]", len - 1);
-
-                       if (iplen > 0) {
-                               guchar tbuf[sizeof(struct in6_addr) + sizeof(guint32)];
-
-                               if (rspamd_parse_inet_address_ip4 (p, iplen, tbuf) ||
-                                               rspamd_parse_inet_address_ip6 (p, iplen, tbuf)) {
-                                       rh->comment_ip = rspamd_mempool_alloc (task->task_pool, iplen + 1);
-                                       rspamd_strlcpy (rh->comment_ip, p, iplen + 1);
-                               }
-                       }
-               }
-       }
 }
\ No newline at end of file
index 129d2ba44874e8c0bf40476e2e10e5f646b8256a..a08d8ab3cc0aa53cd7c26f4088393d08f7862153 100644 (file)
@@ -53,16 +53,6 @@ struct rspamd_email_address {
 
 struct received_header;
 struct rspamd_task;
-/**
- * Try to parse SMTP comment to process stupid Exim received headers
- * @param task
- * @param data
- * @param len
- * @param rh
- */
-void rspamd_smtp_maybe_process_smtp_comment (struct rspamd_task *task,
-                                                                                        const char *data, size_t len,
-                                                                                        struct received_header *rh);
 
 /**
  * Create email address from a single rfc822 address (e.g. from mail from:)
index 19ad3262e82c6c1fbd2bd26d2d74fe44b92779b5..cb87bc46e7c12097b4d3cb0b2084f970a360eb0b 100644 (file)
@@ -19,6 +19,7 @@
 #include "mime_encoding.h"
 #include "contrib/uthash/utlist.h"
 #include "libserver/mempool_vars_internal.h"
+#include "libserver/url.h"
 #include <unicode/utf8.h>
 
 static void
@@ -37,19 +38,22 @@ rspamd_mime_header_check_special (struct rspamd_task *task,
                recv = rspamd_mempool_alloc0 (task->task_pool,
                                sizeof (struct received_header));
                recv->hdr = rh;
-               rspamd_smtp_received_parse (task, rh->decoded,
-                               strlen (rh->decoded), recv);
-               /* Set flags */
-               if (recv->type == RSPAMD_RECEIVED_ESMTPA ||
+
+               if (rspamd_smtp_received_parse (task, rh->decoded,
+                               strlen (rh->decoded), recv) != -1) {
+                       /* Set flags */
+                       if (recv->type == RSPAMD_RECEIVED_ESMTPA ||
                                recv->type == RSPAMD_RECEIVED_ESMTPSA) {
-                       recv->flags |= RSPAMD_RECEIVED_FLAG_AUTHENTICATED;
-               }
-               if (recv->type == RSPAMD_RECEIVED_ESMTPS ||
+                               recv->flags |= RSPAMD_RECEIVED_FLAG_AUTHENTICATED;
+                       }
+                       if (recv->type == RSPAMD_RECEIVED_ESMTPS ||
                                recv->type == RSPAMD_RECEIVED_ESMTPSA) {
-                       recv->flags |= RSPAMD_RECEIVED_FLAG_SSL;
+                               recv->flags |= RSPAMD_RECEIVED_FLAG_SSL;
+                       }
+
+                       g_ptr_array_add (task->received, recv);
                }
 
-               g_ptr_array_add (task->received, recv);
                rh->type = RSPAMD_HEADER_RECEIVED;
                break;
        case 0x76F31A09F4352521ULL:     /* to */
@@ -931,6 +935,8 @@ rspamd_smtp_received_process_part (struct rspamd_task *task,
                                                        memcpy (comment->data, c, p - c);
                                                        rspamd_str_lc (comment->data, p - c);
                                                        comment->dlen = p - c;
+                                                       comment->data = (gchar *)rspamd_string_len_strip (
+                                                                       comment->data, &comment->dlen, " \t");
 
                                                        if (!npart->head_comment) {
                                                                comment->prev = NULL;
@@ -964,6 +970,8 @@ rspamd_smtp_received_process_part (struct rspamd_task *task,
                                                memcpy (npart->data, c, p - c);
                                                rspamd_str_lc (npart->data, p - c);
                                                npart->dlen = p - c;
+                                               npart->data = (gchar *)rspamd_string_len_strip (
+                                                               npart->data, &npart->dlen, " \t");
                                        }
                                }
 
@@ -997,6 +1005,8 @@ rspamd_smtp_received_process_part (struct rspamd_task *task,
                                                memcpy (npart->data, c, p - c);
                                                rspamd_str_lc (npart->data, p - c);
                                                npart->dlen = p - c;
+                                               npart->data = (gchar *)rspamd_string_len_strip (
+                                                               npart->data, &npart->dlen, " \t");
                                        }
                                }
 
@@ -1029,6 +1039,8 @@ rspamd_smtp_received_process_part (struct rspamd_task *task,
                                memcpy (npart->data, c, p - c);
                                rspamd_str_lc (npart->data, p - c);
                                npart->dlen = p - c;
+                               npart->data = (gchar *)rspamd_string_len_strip (npart->data,
+                                               &npart->dlen, " \t");
                        }
 
                        return npart;
@@ -1161,13 +1173,205 @@ rspamd_smtp_received_spill (struct rspamd_task *task,
        return head;
 }
 
+static gboolean
+rspamd_smtp_received_process_rdns (struct rspamd_task *task,
+                                                                  const gchar *begin,
+                                                                  gsize len,
+                                                                  struct received_header *rh,
+                                                                  gboolean is_real)
+{
+       const gchar *p, *end;
+       gsize hlen = 0;
+
+       p = begin;
+       end = begin + len;
+
+       while (p < end) {
+               if (rspamd_url_is_domain (*p)) {
+                       hlen ++;
+               }
+
+               p ++;
+       }
+
+       if (hlen > 0) {
+               if (p == end || g_ascii_isspace (*p) || *p == '[' || *p == '(') {
+                       /* We have some hostname, accept it */
+                       gchar *dest;
+
+                       dest = rspamd_mempool_alloc (task->task_pool,
+                                       hlen + 1);
+                       rspamd_strlcpy (dest, begin, hlen + 1);
+
+                       if (is_real) {
+                               rh->real_hostname = dest;
+                       }
+                       else {
+                               rh->from_hostname = dest;
+                       }
+
+                       return TRUE;
+               }
+       }
+
+       return FALSE;
+}
+
+static gboolean
+rspamd_smtp_received_process_from_comment (struct rspamd_task *task,
+                                                                                  struct received_header *rh,
+                                                                                  struct rspamd_received_comment *comment)
+{
+       rspamd_inet_addr_t *addr;
+       gboolean ret = FALSE;
+
+       if (comment->data[0] == '[') {
+               /* Likely Exim version */
+
+               const gchar *brace_pos = memchr (comment->data, ']', comment->dlen);
+
+               if (brace_pos) {
+                       addr = rspamd_parse_smtp_ip (comment->data,
+                                       brace_pos - comment->data,
+                                       task->task_pool);
+
+                       if (addr) {
+                               rh->addr = addr;
+                               rh->real_ip = rspamd_inet_address_to_string (addr);
+                               rh->from_ip = rh->real_ip;
+                       }
+               }
+       }
+       else if (g_ascii_isxdigit (comment->data[0])) {
+               /* Try to parse IP address */
+               addr = rspamd_parse_inet_address_pool (comment->data,
+                               comment->dlen, task->task_pool);
+               if (addr) {
+                       rh->addr = addr;
+                       rh->real_ip = rspamd_inet_address_to_string (addr);
+                       rh->from_ip = rh->real_ip;
+               }
+       }
+       else {
+               /* Try canonical Postfix version: rdns [ip] */
+               const gchar *obrace_pos = memchr (comment->data, '[', comment->dlen),
+                               *ebrace_pos, *dend;
+
+               if (obrace_pos) {
+                       dend = comment->data + comment->dlen;
+                       ebrace_pos = memchr (obrace_pos, ']', dend - obrace_pos);
+
+                       if (ebrace_pos) {
+                               addr = rspamd_parse_smtp_ip (obrace_pos,
+                                               ebrace_pos - obrace_pos + 1, task->task_pool);
+
+                               if (addr) {
+                                       rh->addr = addr;
+                                       rh->real_ip = rspamd_inet_address_to_string (addr);
+                                       rh->from_ip = rh->real_ip;
+
+                                       /* Process with rDNS */
+                                       if (rspamd_smtp_received_process_rdns (task,
+                                                       comment->data,
+                                                       obrace_pos - comment->data,
+                                                       rh,
+                                                       TRUE)) {
+                                               ret = TRUE;
+                                       }
+                               }
+                       }
+               }
+               else {
+                       /* Hostname or some crap, sigh... */
+                       if (rspamd_smtp_received_process_rdns (task,
+                                       comment->data,
+                                       comment->dlen,
+                                       rh,
+                                       TRUE)) {
+                               ret = TRUE;
+                       }
+               }
+       }
+
+       return ret;
+}
+
+static void
+rspamd_smtp_received_process_from (struct rspamd_task *task,
+                                                                  struct rspamd_received_part *rpart,
+                                                                  struct received_header *rh)
+{
+       if (rpart->dlen > 0) {
+               /* We have seen multiple cases:
+                * - [ip] (hostname/unknown [real_ip])
+                * - helo (hostname/unknown [real_ip])
+                * - [ip]
+                * - hostname
+                * - hostname ([ip]:port helo=xxx)
+                * Maybe more...
+                */
+               gboolean seen_ip_in_data = FALSE, seen_rdns_in_comment = FALSE;
+
+               if (rpart->head_comment && rpart->head_comment->dlen > 0) {
+                       /* We can have info within comment as part of RFC */
+                       seen_rdns_in_comment = rspamd_smtp_received_process_from_comment (
+                                       task, rh, rpart->head_comment);
+               }
+               else if (rpart->data[0] == '[') {
+                       /* No comment, just something that looks like SMTP IP */
+                       const gchar *brace_pos = memchr (rpart->data, ']', rpart->dlen);
+                       rspamd_inet_addr_t *addr;
+
+                       if (brace_pos) {
+                               addr = rspamd_parse_smtp_ip (rpart->data, brace_pos -
+                                               rpart->data, task->task_pool);
+
+                               if (addr) {
+                                       seen_ip_in_data = TRUE;
+                                       rh->addr = addr;
+                                       rh->real_ip = rspamd_inet_address_to_string (addr);
+                                       rh->from_ip = rh->real_ip;
+                               }
+                       }
+               }
+               else if (g_ascii_isxdigit (rpart->data[0])) {
+                       /* Try to parse IP address */
+                       rspamd_inet_addr_t *addr;
+                       addr = rspamd_parse_inet_address_pool (rpart->data,
+                                       rpart->dlen, task->task_pool);
+                       if (addr) {
+                               seen_ip_in_data = TRUE;
+                               rh->addr = addr;
+                               rh->real_ip = rspamd_inet_address_to_string (addr);
+                               rh->from_ip = rh->real_ip;
+                       }
+               }
+
+               if (!seen_ip_in_data && !seen_rdns_in_comment) {
+                       /* Get rDNS */
+                       rspamd_smtp_received_process_rdns (task,
+                                       rpart->data,
+                                       rpart->dlen,
+                                       rh,
+                                       FALSE);
+               }
+       }
+       else {
+               /* rpart->dlen = 0 */
+
+               if (rpart->head_comment && rpart->head_comment->dlen > 0) {
+                       rspamd_smtp_received_process_from_comment (task,
+                                       rh, rpart->head_comment);
+               }
+       }
+}
+
 int
 rspamd_smtp_received_parse (struct rspamd_task *task,
                                                        const char *data,
                                                        size_t len,
                                                        struct received_header *rh)
 {
-       const gchar *p, *c, *end;
        goffset date_pos = 0;
        struct rspamd_received_part *head, *cur;
 
@@ -1177,5 +1381,13 @@ rspamd_smtp_received_parse (struct rspamd_task *task,
                return -1;
        }
 
+       DL_FOREACH (head, cur) {
+               switch (cur->type) {
+               case RSPAMD_RECEIVED_PART_FROM:
+                       rspamd_smtp_received_process_from (task, cur, rh);
+                       break;
+               }
+       }
+
        return 0;
 }
\ No newline at end of file
index ceed5ab06a0d2a02afbf6dee35c2166590619db8..cd21b79d50656671ac948eda6d18c2975ba089a2 100644 (file)
@@ -72,13 +72,12 @@ enum rspamd_received_type {
 #define RSPAMD_RECEIVED_FLAG_AUTHENTICATED (1 << 2)
 
 struct received_header {
-       gchar *from_hostname;
-       gchar *from_ip;
-       gchar *real_hostname;
-       gchar *real_ip;
-       gchar *by_hostname;
-       gchar *for_mbox;
-       gchar *comment_ip;
+       const gchar *from_hostname;
+       const gchar *from_ip;
+       const gchar *real_hostname;
+       const gchar *real_ip;
+       const gchar *by_hostname;
+       const gchar *for_mbox;
        rspamd_inet_addr_t *addr;
        struct rspamd_mime_header *hdr;
        time_t timestamp;
index 7cdc0953809f0b5e9b78181e28c24b402c57c120..93e0ae0e8c953087555b2822a657f3e0be838d1b 100644 (file)
@@ -150,7 +150,7 @@ struct rspamd_task {
        gchar *deliver_to;                                                              /**< address to deliver                                                         */
        gchar *user;                                                                    /**< user to deliver                                                            */
        gchar *subject;                                                                 /**< subject (for non-mime)                                                     */
-       gchar *hostname;                                                                /**< hostname reported by MTA                                           */
+       const gchar *hostname;                                                  /**< hostname reported by MTA                                           */
        GHashTable *request_headers;                                    /**< HTTP headers in a request                                          */
        GHashTable *reply_headers;                                              /**< Custom reply headers                                                       */
        struct {