]> source.dussan.org Git - rspamd.git/commitdiff
[Minor] Insert spaces only if text part is not suspected to be a link
authorMikhail Galanin <mgalanin@mimecast.com>
Tue, 18 Sep 2018 09:34:37 +0000 (10:34 +0100)
committerMikhail Galanin <mgalanin@mimecast.com>
Tue, 18 Sep 2018 09:34:37 +0000 (10:34 +0100)
sa_body requires newlines to be replaces with spaces.
However, some email clients highlight URL in triangular braces (<>) even if it's
broken by a newline. Let's just remove \n if current text part potentially
can be such URL.

src/libmime/message.c

index f0a235a4c4880d8677bc95d041fcf86332ec8ac8..39768e0e009387dec9f55620d88b6a3c2a3c190b 100644 (file)
@@ -244,6 +244,8 @@ rspamd_strip_newlines_parse (const gchar *begin, const gchar *pe,
        const gchar *p = begin, *c = begin;
        gchar last_c = '\0';
        gboolean crlf_added = FALSE;
+       gboolean url_open_bracket = FALSE;
+
        enum {
                normal_char,
                seen_cr,
@@ -285,6 +287,8 @@ rspamd_strip_newlines_parse (const gchar *begin, const gchar *pe,
                                break;
                        }
 
+                       url_open_bracket = FALSE;
+
                        p ++;
                }
                else if (G_UNLIKELY (*p == '\n')) {
@@ -300,7 +304,7 @@ rspamd_strip_newlines_parse (const gchar *begin, const gchar *pe,
 
                                c = p + 1;
 
-                               if (IS_PART_HTML (part) || g_ascii_ispunct (last_c)) {
+                               if (IS_PART_HTML (part) || !url_open_bracket) {
                                        g_byte_array_append (part->utf_stripped_content,
                                                        (const guint8 *)" ", 1);
                                        g_ptr_array_add (part->newlines,
@@ -315,7 +319,7 @@ rspamd_strip_newlines_parse (const gchar *begin, const gchar *pe,
                        case seen_cr:
                                /* \r\n */
                                if (!crlf_added) {
-                                       if (IS_PART_HTML (part) || g_ascii_ispunct (last_c)) {
+                                       if (IS_PART_HTML (part) || !url_open_bracket) {
                                                g_byte_array_append (part->utf_stripped_content,
                                                                (const guint8 *) " ", 1);
                                                crlf_added = TRUE;
@@ -345,10 +349,18 @@ rspamd_strip_newlines_parse (const gchar *begin, const gchar *pe,
                                c = p + 1;
                                break;
                        }
+                       url_open_bracket = FALSE;
 
                        p ++;
                }
                else {
+                       if ((*p) == '<') {
+                               url_open_bracket = TRUE;
+                       }
+                       else if ((*p) == '>') {
+                               url_open_bracket = FALSE;
+                       }
+
                        switch (state) {
                        case normal_char:
                                if (G_UNLIKELY (*p) == ' ') {