From 800399108d67878a6d52db6c3cb28d15e01ccc2a Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Wed, 30 Sep 2015 15:57:58 +0100 Subject: [PATCH] Add routine to find end of headers position in mime messages. --- src/libutil/str_util.c | 108 +++++++++++++++++++++++++++++++++++++++++ src/libutil/str_util.h | 9 ++++ 2 files changed, 117 insertions(+) diff --git a/src/libutil/str_util.c b/src/libutil/str_util.c index 00f689bff..6465208f9 100644 --- a/src/libutil/str_util.c +++ b/src/libutil/str_util.c @@ -1069,3 +1069,111 @@ rspamd_substring_search (const gchar *in, gsize inlen, return -1; } + +goffset +rspamd_string_find_eoh (GString *input) +{ + const gchar *p, *c = NULL, *end; + enum { + skip_char = 0, + got_cr, + got_lf, + got_linebreak, + got_linebreak_cr, + got_linebreak_lf + } state = skip_char; + + g_assert (input != NULL); + + p = input->str; + end = p + input->len; + + while (p < end) { + switch (state) { + case skip_char: + if (*p == '\r') { + p++; + state = got_cr; + } + else if (*p == '\n') { + p++; + state = got_lf; + } + else { + p++; + } + break; + + case got_cr: + if (*p == '\r') { + /* + * Double \r\r, so need to check the current char + * if it is '\n', then we have \r\r\n sequence, that is NOT + * double end of line + */ + if (p < end && p[1] == '\n') { + p++; + state = got_lf; + } + else { + /* We have \r\r[^\n] */ + return p - input->str; + } + } + else if (*p == '\n') { + p++; + state = got_lf; + } + else { + p++; + state = skip_char; + } + break; + case got_lf: + if (*p == '\n') { + /* We have \n\n, which is obviously end of headers */ + return p - input->str; + } + else if (*p == '\r') { + state = got_linebreak; + } + else { + p++; + state = skip_char; + } + break; + case got_linebreak: + if (*p == '\r') { + c = p; + p++; + state = got_linebreak_cr; + } + else if (*p == '\n') { + c = p; + p++; + state = got_linebreak_lf; + } + else { + p++; + state = skip_char; + } + break; + case got_linebreak_cr: + if (*p == '\r') { + /* Got double \r\r after \n, so does not treat it as EOH */ + state = got_linebreak_cr; + p++; + } + else if (*p == '\n') { + state = got_linebreak_lf; + p++; + } + break; + case got_linebreak_lf: + g_assert (c != NULL); + return c - input->str; + } + } + + return -1; +} diff --git a/src/libutil/str_util.h b/src/libutil/str_util.h index 3babc71f9..b6c8b2981 100644 --- a/src/libutil/str_util.h +++ b/src/libutil/str_util.h @@ -183,4 +183,13 @@ GString *rspamd_header_value_fold (const gchar *name, goffset rspamd_substring_search (const gchar *in, gsize inlen, const gchar *srch, gsize srchlen); + +/** + * Search for end-of-headers mark in the input string. Returns position just after + * the last header in message (but before the last newline character). + * Hence, to obtain the real EOH position, it is also required to skip + * space characters + */ +goffset rspamd_string_find_eoh (GString *input); + #endif /* SRC_LIBUTIL_STR_UTIL_H_ */ -- 2.39.5