diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2016-09-22 18:10:43 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2016-09-22 18:10:43 +0100 |
commit | b0d3ec5f41c220a755e2f394b60481a9a5cb2ff1 (patch) | |
tree | 78cf40dedb8ec330b5b618bc7a05bdbf4723beb1 | |
parent | 261d4e4a379119d40d6cc891548c99b4b517c6d5 (diff) | |
download | rspamd-b0d3ec5f41c220a755e2f394b60481a9a5cb2ff1.tar.gz rspamd-b0d3ec5f41c220a755e2f394b60481a9a5cb2ff1.zip |
[Feature] Try to guess line endings when folding headers
-rw-r--r-- | src/client/rspamc.c | 4 | ||||
-rw-r--r-- | src/libmime/message.c | 37 | ||||
-rw-r--r-- | src/libserver/dkim.c | 3 | ||||
-rw-r--r-- | src/libserver/protocol.c | 2 | ||||
-rw-r--r-- | src/libserver/task.h | 3 | ||||
-rw-r--r-- | src/libutil/str_util.c | 95 | ||||
-rw-r--r-- | src/libutil/str_util.h | 13 | ||||
-rw-r--r-- | src/lua/lua_util.c | 3 |
8 files changed, 133 insertions, 27 deletions
diff --git a/src/client/rspamc.c b/src/client/rspamc.c index ae3b3fe82..3f038ed57 100644 --- a/src/client/rspamc.c +++ b/src/client/rspamc.c @@ -1129,7 +1129,7 @@ rspamc_mime_output (FILE *out, ucl_object_t *result, GString *input, folded_symbuf = rspamd_header_value_fold ("X-Spam-Symbols", symbuf->str, - 0); + 0, RSPAMD_TASK_NEWLINES_CRLF); rspamd_printf_gstring (added_headers, "X-Spam-Symbols: %v\r\n", folded_symbuf); @@ -1153,7 +1153,7 @@ rspamc_mime_output (FILE *out, ucl_object_t *result, GString *input, } json_header_encoded = rspamd_encode_base64_fold (json_header, - strlen (json_header), 60, NULL); + strlen (json_header), 60, NULL, RSPAMD_TASK_NEWLINES_CRLF); free (json_header); rspamd_printf_gstring (added_headers, "X-Spam-Result: %s\r\n", diff --git a/src/libmime/message.c b/src/libmime/message.c index 8f4417db4..346105438 100644 --- a/src/libmime/message.c +++ b/src/libmime/message.c @@ -80,10 +80,12 @@ process_raw_headers (struct rspamd_task *task, GHashTable *target, gchar *tmp, *tp; gint state = 0, l, next_state = 100, err_state = 100, t_state; gboolean valid_folding = FALSE; + guint nlines_count[RSPAMD_TASK_NEWLINES_MAX]; p = in; end = p + len; c = p; + memset (nlines_count, 0, sizeof (nlines_count)); while (p < end) { /* FSM for processing headers */ @@ -140,6 +142,17 @@ process_raw_headers (struct rspamd_task *task, GHashTable *target, p++; } else if (*p == '\n' || *p == '\r') { + + if (*p == '\n') { + nlines_count[RSPAMD_TASK_NEWLINES_LF] ++; + } + else if (*(p + 1) == '\n') { + nlines_count[RSPAMD_TASK_NEWLINES_CRLF] ++; + } + else { + nlines_count[RSPAMD_TASK_NEWLINES_CR] ++; + } + /* Process folding */ state = 99; l = p - c; @@ -167,6 +180,15 @@ process_raw_headers (struct rspamd_task *task, GHashTable *target, case 3: if (*p == '\r' || *p == '\n') { /* Hold folding */ + if (*p == '\n') { + nlines_count[RSPAMD_TASK_NEWLINES_LF] ++; + } + else if (*(p + 1) == '\n') { + nlines_count[RSPAMD_TASK_NEWLINES_CRLF] ++; + } + else { + nlines_count[RSPAMD_TASK_NEWLINES_CR] ++; + } state = 99; next_state = 3; err_state = 4; @@ -279,12 +301,15 @@ process_raw_headers (struct rspamd_task *task, GHashTable *target, if (*p == '\r') { if (*(p + 1) == '\n') { + nlines_count[RSPAMD_TASK_NEWLINES_CRLF] ++; p++; } p++; state = next_state; } else if (*p == '\n') { + nlines_count[RSPAMD_TASK_NEWLINES_LF] ++; + if (*(p + 1) == '\r') { p++; } @@ -301,6 +326,18 @@ process_raw_headers (struct rspamd_task *task, GHashTable *target, break; } } + + guint max_cnt = 0; + gint sel = 0; + + for (gint i = 0; i < RSPAMD_TASK_NEWLINES_MAX; i ++) { + if (nlines_count[i] > max_cnt) { + max_cnt = nlines_count[i]; + sel = i; + } + } + + task->nlines_type = sel; } static void diff --git a/src/libserver/dkim.c b/src/libserver/dkim.c index 33ac2cb96..d545a78e4 100644 --- a/src/libserver/dkim.c +++ b/src/libserver/dkim.c @@ -2162,7 +2162,8 @@ rspamd_dkim_sign (struct rspamd_task *task, return NULL; } - b64_data = rspamd_encode_base64_fold (rsa_buf, rsa_len, 70, NULL); + b64_data = rspamd_encode_base64_fold (rsa_buf, rsa_len, 70, NULL, + task->nlines_type); rspamd_printf_gstring (hdr, "%s", b64_data); g_free (b64_data); diff --git a/src/libserver/protocol.c b/src/libserver/protocol.c index 413d48bf2..3ec4c7eca 100644 --- a/src/libserver/protocol.c +++ b/src/libserver/protocol.c @@ -1037,7 +1037,7 @@ rspamd_protocol_write_ucl (struct rspamd_task *task) if (dkim_sig) { GString *folded_header = rspamd_header_value_fold ("DKIM-Signature", - dkim_sig->str, 80); + dkim_sig->str, 80, task->nlines_type); ucl_object_insert_key (top, ucl_object_fromstring_common (folded_header->str, folded_header->len, UCL_STRING_RAW), diff --git a/src/libserver/task.h b/src/libserver/task.h index aa1f52e45..915d58aa3 100644 --- a/src/libserver/task.h +++ b/src/libserver/task.h @@ -114,7 +114,7 @@ enum rspamd_task_stage { #define RSPAMD_TASK_IS_EMPTY(task) (((task)->flags & RSPAMD_TASK_FLAG_EMPTY)) struct rspamd_email_address; - +enum rspamd_newlines_type; /** * Worker task structure @@ -161,6 +161,7 @@ struct rspamd_task { GPtrArray *rcpt_envelope; /**< array of rspamd_email_address */ InternetAddressList *from_mime; struct rspamd_email_address *from_envelope; + enum rspamd_newlines_type nlines_type; /**< type of newlines (detected on most of headers */ GList *messages; /**< list of messages that would be reported */ struct rspamd_re_runtime *re_rt; /**< regexp runtime */ diff --git a/src/libutil/str_util.c b/src/libutil/str_util.c index 4210adbe2..ca40c86e4 100644 --- a/src/libutil/str_util.c +++ b/src/libutil/str_util.c @@ -701,14 +701,17 @@ rspamd_decode_base32 (const gchar *in, gsize inlen, gsize *outlen) static gchar * rspamd_encode_base64_common (const guchar *in, gsize inlen, gint str_len, - gsize *outlen, gboolean fold) + gsize *outlen, gboolean fold, enum rspamd_newlines_type how) { +#define ADD_SPLIT do { \ + if (how == RSPAMD_TASK_NEWLINES_CR || how == RSPAMD_TASK_NEWLINES_CRLF) *o++ = '\r'; \ + if (how == RSPAMD_TASK_NEWLINES_LF || how == RSPAMD_TASK_NEWLINES_CRLF) *o++ = '\n'; \ + if (fold) *o++ = '\t'; \ +} while (0) #define CHECK_SPLIT \ do { if (str_len > 0 && cols >= str_len) { \ - *o++ = '\r'; \ - *o++ = '\n'; \ - if (fold) *o++ = '\t'; \ - cols = 0; \ + ADD_SPLIT; \ + cols = 0; \ } } \ while (0) @@ -724,7 +727,28 @@ while (0) if (str_len > 0) { g_assert (str_len > 8); - allocated_len += (allocated_len / str_len + 1) * (fold ? 3 : 2) + 1; + if (fold) { + switch (how) { + case RSPAMD_TASK_NEWLINES_CR: + case RSPAMD_TASK_NEWLINES_LF: + allocated_len += (allocated_len / str_len + 1) * 2 + 1; + break; + default: + allocated_len += (allocated_len / str_len + 1) * 3 + 1; + break; + } + } + else { + switch (how) { + case RSPAMD_TASK_NEWLINES_CR: + case RSPAMD_TASK_NEWLINES_LF: + allocated_len += (allocated_len / str_len + 1) * 1 + 1; + break; + default: + allocated_len += (allocated_len / str_len + 1) * 2 + 1; + break; + } + } } out = g_malloc (allocated_len); @@ -755,11 +779,7 @@ while (0) cols --; } - *o++ = '\r'; - *o++ = '\n'; - if (fold) { - *o ++ = '\t'; - } + ADD_SPLIT; /* Remaining bytes */ while (shift >= 16) { @@ -851,14 +871,15 @@ gchar * rspamd_encode_base64 (const guchar *in, gsize inlen, gint str_len, gsize *outlen) { - return rspamd_encode_base64_common (in, inlen, str_len, outlen, FALSE); + return rspamd_encode_base64_common (in, inlen, str_len, outlen, FALSE, + RSPAMD_TASK_NEWLINES_CRLF); } gchar * rspamd_encode_base64_fold (const guchar *in, gsize inlen, gint str_len, - gsize *outlen) + gsize *outlen, enum rspamd_newlines_type how) { - return rspamd_encode_base64_common (in, inlen, str_len, outlen, TRUE); + return rspamd_encode_base64_common (in, inlen, str_len, outlen, TRUE, how); } gsize @@ -1004,7 +1025,8 @@ rspamd_strings_levenshtein_distance (const gchar *s1, gsize s1len, GString * rspamd_header_value_fold (const gchar *name, const gchar *value, - guint fold_max) + guint fold_max, + enum rspamd_newlines_type how) { GString *res; const guint default_fold_max = 76; @@ -1066,7 +1088,7 @@ rspamd_header_value_fold (const gchar *name, c = p; state = read_quoted; } - else if (*p == '\r') { + else if (*p == '\r' || *p == '\n') { /* Reset line length */ cur_len = 0; @@ -1105,7 +1127,19 @@ rspamd_header_value_fold (const gchar *name, /* Here, we have token start at 'c' and token end at 'p' */ if (fold_type == fold_after) { g_string_append_len (res, c, p - c); - g_string_append_len (res, "\r\n\t", 3); + + switch (how) { + case RSPAMD_TASK_NEWLINES_LF: + g_string_append_len (res, "\n\t", 2); + break; + case RSPAMD_TASK_NEWLINES_CR: + g_string_append_len (res, "\r\t", 2); + break; + case RSPAMD_TASK_NEWLINES_CRLF: + default: + g_string_append_len (res, "\r\n\t", 3); + break; + } /* Skip space if needed */ if (g_ascii_isspace (*p)) { @@ -1118,7 +1152,19 @@ rspamd_header_value_fold (const gchar *name, c ++; } - g_string_append_len (res, "\r\n\t", 3); + switch (how) { + case RSPAMD_TASK_NEWLINES_LF: + g_string_append_len (res, "\n\t", 2); + break; + case RSPAMD_TASK_NEWLINES_CR: + g_string_append_len (res, "\r\t", 2); + break; + case RSPAMD_TASK_NEWLINES_CRLF: + default: + g_string_append_len (res, "\r\n\t", 3); + break; + } + g_string_append_len (res, c, p - c); } @@ -1155,7 +1201,18 @@ rspamd_header_value_fold (const gchar *name, if (g_ascii_isspace (*c)) { c ++; } - g_string_append_len (res, "\r\n\t", 3); + switch (how) { + case RSPAMD_TASK_NEWLINES_LF: + g_string_append_len (res, "\n\t", 2); + break; + case RSPAMD_TASK_NEWLINES_CR: + g_string_append_len (res, "\r\t", 2); + break; + case RSPAMD_TASK_NEWLINES_CRLF: + default: + g_string_append_len (res, "\r\n\t", 3); + break; + } g_string_append_len (res, c, p - c); } else { diff --git a/src/libutil/str_util.h b/src/libutil/str_util.h index 91c80ff5d..9b9bbe0c1 100644 --- a/src/libutil/str_util.h +++ b/src/libutil/str_util.h @@ -20,6 +20,14 @@ #include "ucl.h" #include "fstring.h" + +enum rspamd_newlines_type { + RSPAMD_TASK_NEWLINES_CR, + RSPAMD_TASK_NEWLINES_LF, + RSPAMD_TASK_NEWLINES_CRLF, + RSPAMD_TASK_NEWLINES_MAX +}; + /** * Compare two memory regions of size `l` using case insensitive matching */ @@ -193,7 +201,7 @@ gchar * rspamd_encode_base64 (const guchar *in, gsize inlen, gint str_len, * @return freshly allocated base64 encoded value or NULL if input is invalid */ gchar * rspamd_encode_base64_fold (const guchar *in, gsize inlen, gint str_len, - gsize *outlen); + gsize *outlen, enum rspamd_newlines_type how); /** * Decode URL encoded string in-place and return new length of a string, src and dst are NULL terminated @@ -227,7 +235,8 @@ gint rspamd_strings_levenshtein_distance (const gchar *s1, gsize s1len, */ GString *rspamd_header_value_fold (const gchar *name, const gchar *value, - guint fold_max); + guint fold_max, + enum rspamd_newlines_type how); /** * Search for a substring `srch` in the text `in` using Karp-Rabin algorithm diff --git a/src/lua/lua_util.c b/src/lua/lua_util.c index 81038ffdd..b3c30ab29 100644 --- a/src/lua/lua_util.c +++ b/src/lua/lua_util.c @@ -1032,7 +1032,8 @@ lua_util_fold_header (lua_State *L) value = luaL_checkstring (L, 2); if (name && value) { - folded = rspamd_header_value_fold (name, value, 0); + folded = rspamd_header_value_fold (name, value, 0, + RSPAMD_TASK_NEWLINES_CRLF); if (folded) { lua_pushlstring (L, folded->str, folded->len); |