From 58f7e9aa83e40fbfdc525760e935cde617aed28d Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Tue, 8 Sep 2015 13:41:48 +0100 Subject: [PATCH] Add headers folding routine. --- src/client/rspamc.c | 24 +++++- src/libutil/str_util.c | 164 +++++++++++++++++++++++++++++++++++++++++ src/libutil/str_util.h | 8 ++ 3 files changed, 195 insertions(+), 1 deletion(-) diff --git a/src/client/rspamc.c b/src/client/rspamc.c index 98ef61a48..a3d67a807 100644 --- a/src/client/rspamc.c +++ b/src/client/rspamc.c @@ -858,8 +858,10 @@ rspamc_mime_output (FILE *out, ucl_object_t *result, GString *input, GError *err GByteArray ar; GMimeParser *parser; GMimeMessage *message; - const ucl_object_t *metric, *res; + const ucl_object_t *cur, *metric, *res; + ucl_object_iter_t it = NULL; const gchar *action = "no action"; + GString *symbuf; gint act; gdouble score = 0.0, required_score = 0.0; gchar scorebuf[32]; @@ -935,6 +937,26 @@ rspamc_mime_output (FILE *out, ucl_object_t *result, GString *input, GError *err g_mime_object_append_header (GMIME_OBJECT (message), "X-Spam-Level", scorebuf); + /* Short description of all symbols */ + symbuf = g_string_sized_new (64); + + while ((cur = ucl_iterate_object (metric, &it, true)) != NULL) { + + if (ucl_object_type (cur) == UCL_OBJECT) { + rspamd_printf_gstring (symbuf, "%s,", ucl_object_key (cur)); + } + } + /* Trim the last comma */ + if (symbuf->str[symbuf->len - 1] == ',') { + g_string_erase (symbuf, symbuf->len - 1, 1); + } + + sc = g_mime_utils_header_encode_text (symbuf->str); + g_mime_object_append_header (GMIME_OBJECT (message), "X-Spam-Symbols", + sc); + g_free (sc); + g_string_free (symbuf, TRUE); + if (json || raw) { /* We also append json data as a specific header */ if (json) { diff --git a/src/libutil/str_util.c b/src/libutil/str_util.c index 75a200db5..844962c25 100644 --- a/src/libutil/str_util.c +++ b/src/libutil/str_util.c @@ -863,3 +863,167 @@ rspamd_strings_levenshtein_distance (const gchar *s1, gsize s1len, return column[s1len]; } + +GString * +rspamd_header_value_fold (const gchar *name, const gchar *value) +{ + GString *res; + const guint fold_max = 76; + guint cur_len; + const gchar *p, *c; + gboolean first_token = TRUE; + enum { + fold_before, + fold_after + } fold_type; + enum { + read_token, + read_quoted, + after_quote, + fold_token, + } state = read_token, next_state = read_token; + + g_assert (name != NULL); + g_assert (value != NULL); + + res = g_string_sized_new (strlen (value)); + + c = value; + p = c; + /* name: */ + cur_len = strlen (name) + 2; + + while (*p) { + switch (state) { + case read_token: + if (*p == ',' || *p == ';') { + /* We have something similar to the token's end, so check len */ + if (cur_len > fold_max * 0.8 && cur_len < fold_max) { + /* We want fold */ + fold_type = fold_after; + state = fold_token; + next_state = read_token; + } + else if (cur_len > fold_max && !first_token) { + fold_type = fold_before; + state = fold_token; + next_state = read_token; + } + else { + g_string_append_len (res, c, p - c); + c = p; + first_token = FALSE; + } + p ++; + } + else if (*p == '"') { + /* Fold before quoted tokens */ + g_string_append_len (res, c, p - c); + c = p; + state = read_quoted; + } + else if (*p == '\r') { + /* Reset line length */ + cur_len = 0; + + while (g_ascii_isspace (*p)) { + p ++; + } + + g_string_append_len (res, c, p - c); + c = p; + } + else if (g_ascii_isspace (*p)) { + if (cur_len > fold_max * 0.8 && cur_len < fold_max) { + /* We want fold */ + fold_type = fold_after; + state = fold_token; + next_state = read_token; + } + else if (cur_len > fold_max && !first_token) { + fold_type = fold_before; + state = fold_token; + next_state = read_token; + } + else { + g_string_append_len (res, c, p - c); + c = p; + first_token = FALSE; + p ++; + } + } + else { + p ++; + cur_len ++; + } + break; + case fold_token: + /* Here, we have token start at 'c' and token end at 'p' */ + if (fold_type == fold_after) { + g_string_append_len (res, c, p - c); + g_string_append_len (res, "\r\n\t", 3); + + /* Skip space if needed */ + if (g_ascii_isspace (*p)) { + p ++; + } + } + else { + /* Skip space if needed */ + if (g_ascii_isspace (*c)) { + c ++; + } + + g_string_append_len (res, "\r\n\t", 3); + g_string_append_len (res, c, p - c); + } + + c = p; + state = next_state; + cur_len = 0; + first_token = TRUE; + break; + + case read_quoted: + if (p != c && *p == '"') { + state = after_quote; + } + p ++; + cur_len ++; + break; + + case after_quote: + state = read_token; + /* Skip one more character after the quote */ + p ++; + cur_len ++; + g_string_append_len (res, c, p - c); + c = p; + first_token = TRUE; + break; + } + } + + /* Last token */ + switch (state) { + case read_token: + if (cur_len > fold_max && !first_token) { + g_string_append_len (res, "\r\n\t", 3); + g_string_append_len (res, c, p - c); + } + else { + g_string_append_len (res, c, p - c); + } + break; + case read_quoted: + case after_quote: + g_string_append_len (res, c, p - c); + break; + + default: + g_assert (p == c); + break; + } + + return res; +} diff --git a/src/libutil/str_util.h b/src/libutil/str_util.h index f8d9dd4dc..dcccc3d25 100644 --- a/src/libutil/str_util.h +++ b/src/libutil/str_util.h @@ -162,4 +162,12 @@ gsize rspamd_decode_url (gchar *dst, const gchar *src, gsize size); gint rspamd_strings_levenshtein_distance (const gchar *s1, gsize s1len, const gchar *s2, gsize s2len); +/** + * Fold header using rfc822 rules, return new GString from the previous one + * @param name name of header (used just for folding) + * @param value value of header + * @return new GString with the folded value + */ +GString * rspamd_header_value_fold (const gchar *name, const gchar *value); + #endif /* SRC_LIBUTIL_STR_UTIL_H_ */ -- 2.39.5