From 963a8e9f83693a153f2133e6b946493601c43d21 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Mon, 22 Jul 2019 15:27:43 +0100 Subject: [PATCH] [Fix] Various fixes to QP encoding algorithm Issue: #2957 --- src/libutil/str_util.c | 180 ++++++++++++++++++++++++++++++++--------- 1 file changed, 144 insertions(+), 36 deletions(-) diff --git a/src/libutil/str_util.c b/src/libutil/str_util.c index 290110b2b..962ed1871 100644 --- a/src/libutil/str_util.c +++ b/src/libutil/str_util.c @@ -917,25 +917,61 @@ rspamd_encode_base64_fold (const guchar *in, gsize inlen, gint str_len, return rspamd_encode_base64_common (in, inlen, str_len, outlen, TRUE, how); } +#define QP_RANGE(x) (((x) >= 33 && (x) <= 60) || ((x) >= 62 && (x) <= 126) \ + || (x) == '\r' || (x) == '\n' || (x) == ' ' || (x) == '\t') +#define QP_SPAN_NORMAL(span, str_len) ((str_len) > 0 && \ + ((span) + 1) >= (str_len)) +#define QP_SPAN_SPECIAL(span, str_len) ((str_len) > 0 && \ + ((span) + 4) >= (str_len)) + gchar * rspamd_encode_qp_fold (const guchar *in, gsize inlen, gint str_len, gsize *outlen, enum rspamd_newlines_type how) { - gsize olen = 0, span = 0, i = 0; + gsize olen = 0, span = 0, i = 0, seen_spaces = 0; gchar *out; - gint ch; + gint ch, last_sp; const guchar *end = in + inlen, *p = in; static const gchar hexdigests[16] = "0123456789ABCDEF"; while (p < end) { ch = *p; - if (ch < 128 && ch != '\r' && ch != '\n') { + if (QP_RANGE(ch)) { olen ++; span ++; + + if (ch == '\r' || ch == '\n') { + if (seen_spaces > 0) { + /* We must encode spaces at the end of line */ + olen += 3; + seen_spaces = 0; + /* Special stuff for space character at the end */ + if (QP_SPAN_SPECIAL(span, str_len)) { + if (how == RSPAMD_TASK_NEWLINES_CRLF) { + /* =\r\n */ + olen += 3; + } + else { + olen += 2; + } + } + /* Continue with the same `ch` but without spaces logic */ + continue; + } + + span = 0; + } + else if (ch == ' ' || ch == '\t') { + seen_spaces ++; + last_sp = ch; + } + else { + seen_spaces = 0; + } } else { - if (str_len > 0 && span + 5 >= str_len) { + if (QP_SPAN_SPECIAL(span, str_len)) { if (how == RSPAMD_TASK_NEWLINES_CRLF) { /* =\r\n */ olen += 3; @@ -950,7 +986,7 @@ rspamd_encode_qp_fold (const guchar *in, gsize inlen, gint str_len, span += 3; } - if (str_len > 0 && span + 3 >= str_len) { + if (QP_SPAN_NORMAL(span, str_len)) { if (how == RSPAMD_TASK_NEWLINES_CRLF) { /* =\r\n */ olen += 3; @@ -964,21 +1000,112 @@ rspamd_encode_qp_fold (const guchar *in, gsize inlen, gint str_len, p ++; } + if (seen_spaces > 0) { + /* Reserve length for the last space encoded */ + olen += 3; + } + out = g_malloc (olen + 1); p = in; i = 0; span = 0; + seen_spaces = 0; while (p < end) { ch = *p; - if (ch < 128 && ch != '\r' && ch != '\n') { + if (QP_RANGE (ch)) { + if (ch == '\r' || ch == '\n') { + if (seen_spaces > 0) { + if (QP_SPAN_SPECIAL(span, str_len)) { + /* Add soft newline */ + i --; + + if (p + 1 < end || span + 3 >= str_len) { + switch (how) { + default: + case RSPAMD_TASK_NEWLINES_CRLF: + out[i++] = '='; + out[i++] = '\r'; + out[i++] = '\n'; + break; + case RSPAMD_TASK_NEWLINES_LF: + out[i++] = '='; + out[i++] = '\n'; + break; + case RSPAMD_TASK_NEWLINES_CR: + out[i++] = '='; + out[i++] = '\r'; + break; + } + } + + /* Now write encoded `last_sp` but after newline */ + out[i++] = '='; + out[i++] = hexdigests[((last_sp >> 4) & 0xF)]; + out[i++] = hexdigests[(last_sp & 0xF)]; + + span = 0; + } + else { + /* Encode last space */ + --i; + out[i++] = '='; + out[i++] = hexdigests[((last_sp >> 4) & 0xF)]; + out[i++] = hexdigests[(last_sp & 0xF)]; + seen_spaces = 0; + } + + continue; + } + span = 0; + } + else if (ch == ' ' || ch == '\t') { + seen_spaces ++; + last_sp = ch; + span ++; + } + else { + seen_spaces = 0; + span ++; + } + out[i++] = ch; - span ++; } else { - if (str_len > 0 && span + 5 >= str_len) { + if (QP_SPAN_SPECIAL(span, str_len)) { /* Add new line and then continue */ + if (p + 1 < end || span + 3 >= str_len) { + switch (how) { + default: + case RSPAMD_TASK_NEWLINES_CRLF: + out[i++] = '='; + out[i++] = '\r'; + out[i++] = '\n'; + break; + case RSPAMD_TASK_NEWLINES_LF: + out[i++] = '='; + out[i++] = '\n'; + break; + case RSPAMD_TASK_NEWLINES_CR: + out[i++] = '='; + out[i++] = '\r'; + break; + } + span = 0; + } + } + + out[i++] = '='; + out[i++] = hexdigests[((ch >> 4) & 0xF)]; + out[i++] = hexdigests[(ch & 0xF)]; + span += 3; + seen_spaces = 0; + } + + if (QP_SPAN_NORMAL(span, str_len)) { + /* Add new line and then continue */ + if (p + 1 < end || span > str_len || seen_spaces) { switch (how) { default: case RSPAMD_TASK_NEWLINES_CRLF: @@ -995,42 +1122,23 @@ rspamd_encode_qp_fold (const guchar *in, gsize inlen, gint str_len, out[i++] = '\r'; break; } - span = 0; + seen_spaces = 0; } - - out[i++] = '='; - out[i++] = hexdigests[((ch >> 4) & 0xF)]; - out[i++] = hexdigests[(ch & 0xF)]; - span += 3; - } - - if (str_len > 0 && span + 3 >= str_len) { - /* Add new line and then continue */ - switch (how) { - default: - case RSPAMD_TASK_NEWLINES_CRLF: - out[i++] = '='; - out[i++] = '\r'; - out[i++] = '\n'; - break; - case RSPAMD_TASK_NEWLINES_LF: - out[i++] = '='; - out[i++] = '\n'; - break; - case RSPAMD_TASK_NEWLINES_CR: - out[i++] = '='; - out[i++] = '\r'; - break; - } - - span = 0; } g_assert (i <= olen); p ++; } + /* Deal with the last space character */ + if (seen_spaces > 0) { + i --; + out[i++] = '='; + out[i++] = hexdigests[((last_sp >> 4) & 0xF)]; + out[i++] = hexdigests[(last_sp & 0xF)]; + } + out[i] = '\0'; if (outlen) { -- 2.39.5