summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2016-09-22 18:10:43 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2016-09-22 18:10:43 +0100
commitb0d3ec5f41c220a755e2f394b60481a9a5cb2ff1 (patch)
tree78cf40dedb8ec330b5b618bc7a05bdbf4723beb1
parent261d4e4a379119d40d6cc891548c99b4b517c6d5 (diff)
downloadrspamd-b0d3ec5f41c220a755e2f394b60481a9a5cb2ff1.tar.gz
rspamd-b0d3ec5f41c220a755e2f394b60481a9a5cb2ff1.zip
[Feature] Try to guess line endings when folding headers
-rw-r--r--src/client/rspamc.c4
-rw-r--r--src/libmime/message.c37
-rw-r--r--src/libserver/dkim.c3
-rw-r--r--src/libserver/protocol.c2
-rw-r--r--src/libserver/task.h3
-rw-r--r--src/libutil/str_util.c95
-rw-r--r--src/libutil/str_util.h13
-rw-r--r--src/lua/lua_util.c3
8 files changed, 133 insertions, 27 deletions
diff --git a/src/client/rspamc.c b/src/client/rspamc.c
index ae3b3fe82..3f038ed57 100644
--- a/src/client/rspamc.c
+++ b/src/client/rspamc.c
@@ -1129,7 +1129,7 @@ rspamc_mime_output (FILE *out, ucl_object_t *result, GString *input,
folded_symbuf = rspamd_header_value_fold ("X-Spam-Symbols",
symbuf->str,
- 0);
+ 0, RSPAMD_TASK_NEWLINES_CRLF);
rspamd_printf_gstring (added_headers, "X-Spam-Symbols: %v\r\n",
folded_symbuf);
@@ -1153,7 +1153,7 @@ rspamc_mime_output (FILE *out, ucl_object_t *result, GString *input,
}
json_header_encoded = rspamd_encode_base64_fold (json_header,
- strlen (json_header), 60, NULL);
+ strlen (json_header), 60, NULL, RSPAMD_TASK_NEWLINES_CRLF);
free (json_header);
rspamd_printf_gstring (added_headers,
"X-Spam-Result: %s\r\n",
diff --git a/src/libmime/message.c b/src/libmime/message.c
index 8f4417db4..346105438 100644
--- a/src/libmime/message.c
+++ b/src/libmime/message.c
@@ -80,10 +80,12 @@ process_raw_headers (struct rspamd_task *task, GHashTable *target,
gchar *tmp, *tp;
gint state = 0, l, next_state = 100, err_state = 100, t_state;
gboolean valid_folding = FALSE;
+ guint nlines_count[RSPAMD_TASK_NEWLINES_MAX];
p = in;
end = p + len;
c = p;
+ memset (nlines_count, 0, sizeof (nlines_count));
while (p < end) {
/* FSM for processing headers */
@@ -140,6 +142,17 @@ process_raw_headers (struct rspamd_task *task, GHashTable *target,
p++;
}
else if (*p == '\n' || *p == '\r') {
+
+ if (*p == '\n') {
+ nlines_count[RSPAMD_TASK_NEWLINES_LF] ++;
+ }
+ else if (*(p + 1) == '\n') {
+ nlines_count[RSPAMD_TASK_NEWLINES_CRLF] ++;
+ }
+ else {
+ nlines_count[RSPAMD_TASK_NEWLINES_CR] ++;
+ }
+
/* Process folding */
state = 99;
l = p - c;
@@ -167,6 +180,15 @@ process_raw_headers (struct rspamd_task *task, GHashTable *target,
case 3:
if (*p == '\r' || *p == '\n') {
/* Hold folding */
+ if (*p == '\n') {
+ nlines_count[RSPAMD_TASK_NEWLINES_LF] ++;
+ }
+ else if (*(p + 1) == '\n') {
+ nlines_count[RSPAMD_TASK_NEWLINES_CRLF] ++;
+ }
+ else {
+ nlines_count[RSPAMD_TASK_NEWLINES_CR] ++;
+ }
state = 99;
next_state = 3;
err_state = 4;
@@ -279,12 +301,15 @@ process_raw_headers (struct rspamd_task *task, GHashTable *target,
if (*p == '\r') {
if (*(p + 1) == '\n') {
+ nlines_count[RSPAMD_TASK_NEWLINES_CRLF] ++;
p++;
}
p++;
state = next_state;
}
else if (*p == '\n') {
+ nlines_count[RSPAMD_TASK_NEWLINES_LF] ++;
+
if (*(p + 1) == '\r') {
p++;
}
@@ -301,6 +326,18 @@ process_raw_headers (struct rspamd_task *task, GHashTable *target,
break;
}
}
+
+ guint max_cnt = 0;
+ gint sel = 0;
+
+ for (gint i = 0; i < RSPAMD_TASK_NEWLINES_MAX; i ++) {
+ if (nlines_count[i] > max_cnt) {
+ max_cnt = nlines_count[i];
+ sel = i;
+ }
+ }
+
+ task->nlines_type = sel;
}
static void
diff --git a/src/libserver/dkim.c b/src/libserver/dkim.c
index 33ac2cb96..d545a78e4 100644
--- a/src/libserver/dkim.c
+++ b/src/libserver/dkim.c
@@ -2162,7 +2162,8 @@ rspamd_dkim_sign (struct rspamd_task *task,
return NULL;
}
- b64_data = rspamd_encode_base64_fold (rsa_buf, rsa_len, 70, NULL);
+ b64_data = rspamd_encode_base64_fold (rsa_buf, rsa_len, 70, NULL,
+ task->nlines_type);
rspamd_printf_gstring (hdr, "%s", b64_data);
g_free (b64_data);
diff --git a/src/libserver/protocol.c b/src/libserver/protocol.c
index 413d48bf2..3ec4c7eca 100644
--- a/src/libserver/protocol.c
+++ b/src/libserver/protocol.c
@@ -1037,7 +1037,7 @@ rspamd_protocol_write_ucl (struct rspamd_task *task)
if (dkim_sig) {
GString *folded_header = rspamd_header_value_fold ("DKIM-Signature",
- dkim_sig->str, 80);
+ dkim_sig->str, 80, task->nlines_type);
ucl_object_insert_key (top,
ucl_object_fromstring_common (folded_header->str,
folded_header->len, UCL_STRING_RAW),
diff --git a/src/libserver/task.h b/src/libserver/task.h
index aa1f52e45..915d58aa3 100644
--- a/src/libserver/task.h
+++ b/src/libserver/task.h
@@ -114,7 +114,7 @@ enum rspamd_task_stage {
#define RSPAMD_TASK_IS_EMPTY(task) (((task)->flags & RSPAMD_TASK_FLAG_EMPTY))
struct rspamd_email_address;
-
+enum rspamd_newlines_type;
/**
* Worker task structure
@@ -161,6 +161,7 @@ struct rspamd_task {
GPtrArray *rcpt_envelope; /**< array of rspamd_email_address */
InternetAddressList *from_mime;
struct rspamd_email_address *from_envelope;
+ enum rspamd_newlines_type nlines_type; /**< type of newlines (detected on most of headers */
GList *messages; /**< list of messages that would be reported */
struct rspamd_re_runtime *re_rt; /**< regexp runtime */
diff --git a/src/libutil/str_util.c b/src/libutil/str_util.c
index 4210adbe2..ca40c86e4 100644
--- a/src/libutil/str_util.c
+++ b/src/libutil/str_util.c
@@ -701,14 +701,17 @@ rspamd_decode_base32 (const gchar *in, gsize inlen, gsize *outlen)
static gchar *
rspamd_encode_base64_common (const guchar *in, gsize inlen, gint str_len,
- gsize *outlen, gboolean fold)
+ gsize *outlen, gboolean fold, enum rspamd_newlines_type how)
{
+#define ADD_SPLIT do { \
+ if (how == RSPAMD_TASK_NEWLINES_CR || how == RSPAMD_TASK_NEWLINES_CRLF) *o++ = '\r'; \
+ if (how == RSPAMD_TASK_NEWLINES_LF || how == RSPAMD_TASK_NEWLINES_CRLF) *o++ = '\n'; \
+ if (fold) *o++ = '\t'; \
+} while (0)
#define CHECK_SPLIT \
do { if (str_len > 0 && cols >= str_len) { \
- *o++ = '\r'; \
- *o++ = '\n'; \
- if (fold) *o++ = '\t'; \
- cols = 0; \
+ ADD_SPLIT; \
+ cols = 0; \
} } \
while (0)
@@ -724,7 +727,28 @@ while (0)
if (str_len > 0) {
g_assert (str_len > 8);
- allocated_len += (allocated_len / str_len + 1) * (fold ? 3 : 2) + 1;
+ if (fold) {
+ switch (how) {
+ case RSPAMD_TASK_NEWLINES_CR:
+ case RSPAMD_TASK_NEWLINES_LF:
+ allocated_len += (allocated_len / str_len + 1) * 2 + 1;
+ break;
+ default:
+ allocated_len += (allocated_len / str_len + 1) * 3 + 1;
+ break;
+ }
+ }
+ else {
+ switch (how) {
+ case RSPAMD_TASK_NEWLINES_CR:
+ case RSPAMD_TASK_NEWLINES_LF:
+ allocated_len += (allocated_len / str_len + 1) * 1 + 1;
+ break;
+ default:
+ allocated_len += (allocated_len / str_len + 1) * 2 + 1;
+ break;
+ }
+ }
}
out = g_malloc (allocated_len);
@@ -755,11 +779,7 @@ while (0)
cols --;
}
- *o++ = '\r';
- *o++ = '\n';
- if (fold) {
- *o ++ = '\t';
- }
+ ADD_SPLIT;
/* Remaining bytes */
while (shift >= 16) {
@@ -851,14 +871,15 @@ gchar *
rspamd_encode_base64 (const guchar *in, gsize inlen, gint str_len,
gsize *outlen)
{
- return rspamd_encode_base64_common (in, inlen, str_len, outlen, FALSE);
+ return rspamd_encode_base64_common (in, inlen, str_len, outlen, FALSE,
+ RSPAMD_TASK_NEWLINES_CRLF);
}
gchar *
rspamd_encode_base64_fold (const guchar *in, gsize inlen, gint str_len,
- gsize *outlen)
+ gsize *outlen, enum rspamd_newlines_type how)
{
- return rspamd_encode_base64_common (in, inlen, str_len, outlen, TRUE);
+ return rspamd_encode_base64_common (in, inlen, str_len, outlen, TRUE, how);
}
gsize
@@ -1004,7 +1025,8 @@ rspamd_strings_levenshtein_distance (const gchar *s1, gsize s1len,
GString *
rspamd_header_value_fold (const gchar *name,
const gchar *value,
- guint fold_max)
+ guint fold_max,
+ enum rspamd_newlines_type how)
{
GString *res;
const guint default_fold_max = 76;
@@ -1066,7 +1088,7 @@ rspamd_header_value_fold (const gchar *name,
c = p;
state = read_quoted;
}
- else if (*p == '\r') {
+ else if (*p == '\r' || *p == '\n') {
/* Reset line length */
cur_len = 0;
@@ -1105,7 +1127,19 @@ rspamd_header_value_fold (const gchar *name,
/* Here, we have token start at 'c' and token end at 'p' */
if (fold_type == fold_after) {
g_string_append_len (res, c, p - c);
- g_string_append_len (res, "\r\n\t", 3);
+
+ switch (how) {
+ case RSPAMD_TASK_NEWLINES_LF:
+ g_string_append_len (res, "\n\t", 2);
+ break;
+ case RSPAMD_TASK_NEWLINES_CR:
+ g_string_append_len (res, "\r\t", 2);
+ break;
+ case RSPAMD_TASK_NEWLINES_CRLF:
+ default:
+ g_string_append_len (res, "\r\n\t", 3);
+ break;
+ }
/* Skip space if needed */
if (g_ascii_isspace (*p)) {
@@ -1118,7 +1152,19 @@ rspamd_header_value_fold (const gchar *name,
c ++;
}
- g_string_append_len (res, "\r\n\t", 3);
+ switch (how) {
+ case RSPAMD_TASK_NEWLINES_LF:
+ g_string_append_len (res, "\n\t", 2);
+ break;
+ case RSPAMD_TASK_NEWLINES_CR:
+ g_string_append_len (res, "\r\t", 2);
+ break;
+ case RSPAMD_TASK_NEWLINES_CRLF:
+ default:
+ g_string_append_len (res, "\r\n\t", 3);
+ break;
+ }
+
g_string_append_len (res, c, p - c);
}
@@ -1155,7 +1201,18 @@ rspamd_header_value_fold (const gchar *name,
if (g_ascii_isspace (*c)) {
c ++;
}
- g_string_append_len (res, "\r\n\t", 3);
+ switch (how) {
+ case RSPAMD_TASK_NEWLINES_LF:
+ g_string_append_len (res, "\n\t", 2);
+ break;
+ case RSPAMD_TASK_NEWLINES_CR:
+ g_string_append_len (res, "\r\t", 2);
+ break;
+ case RSPAMD_TASK_NEWLINES_CRLF:
+ default:
+ g_string_append_len (res, "\r\n\t", 3);
+ break;
+ }
g_string_append_len (res, c, p - c);
}
else {
diff --git a/src/libutil/str_util.h b/src/libutil/str_util.h
index 91c80ff5d..9b9bbe0c1 100644
--- a/src/libutil/str_util.h
+++ b/src/libutil/str_util.h
@@ -20,6 +20,14 @@
#include "ucl.h"
#include "fstring.h"
+
+enum rspamd_newlines_type {
+ RSPAMD_TASK_NEWLINES_CR,
+ RSPAMD_TASK_NEWLINES_LF,
+ RSPAMD_TASK_NEWLINES_CRLF,
+ RSPAMD_TASK_NEWLINES_MAX
+};
+
/**
* Compare two memory regions of size `l` using case insensitive matching
*/
@@ -193,7 +201,7 @@ gchar * rspamd_encode_base64 (const guchar *in, gsize inlen, gint str_len,
* @return freshly allocated base64 encoded value or NULL if input is invalid
*/
gchar * rspamd_encode_base64_fold (const guchar *in, gsize inlen, gint str_len,
- gsize *outlen);
+ gsize *outlen, enum rspamd_newlines_type how);
/**
* Decode URL encoded string in-place and return new length of a string, src and dst are NULL terminated
@@ -227,7 +235,8 @@ gint rspamd_strings_levenshtein_distance (const gchar *s1, gsize s1len,
*/
GString *rspamd_header_value_fold (const gchar *name,
const gchar *value,
- guint fold_max);
+ guint fold_max,
+ enum rspamd_newlines_type how);
/**
* Search for a substring `srch` in the text `in` using Karp-Rabin algorithm
diff --git a/src/lua/lua_util.c b/src/lua/lua_util.c
index 81038ffdd..b3c30ab29 100644
--- a/src/lua/lua_util.c
+++ b/src/lua/lua_util.c
@@ -1032,7 +1032,8 @@ lua_util_fold_header (lua_State *L)
value = luaL_checkstring (L, 2);
if (name && value) {
- folded = rspamd_header_value_fold (name, value, 0);
+ folded = rspamd_header_value_fold (name, value, 0,
+ RSPAMD_TASK_NEWLINES_CRLF);
if (folded) {
lua_pushlstring (L, folded->str, folded->len);