From: Vsevolod Stakhov Date: Fri, 18 Jun 2021 12:35:05 +0000 (+0100) Subject: [Minor] Add a simple routine to remove smtp comments in place X-Git-Tag: 3.0~281 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=de7ac4e37284fcd241060213619297df41a71dce;p=rspamd.git [Minor] Add a simple routine to remove smtp comments in place --- diff --git a/src/libmime/mime_headers.c b/src/libmime/mime_headers.c index 9dc336cf7..0bca94024 100644 --- a/src/libmime/mime_headers.c +++ b/src/libmime/mime_headers.c @@ -1967,4 +1967,91 @@ rspamd_message_set_modified_header (struct rspamd_task *task, } } } +} + +gsize +rspamd_strip_smtp_comments_inplace (gchar *input, gsize len) +{ + enum parser_state { + parse_normal, + parse_obrace, + parse_comment, + parse_quoted_copy, + parse_quoted_ignore, + } state = parse_normal, next_state = parse_normal; + gchar *d = input, *end = input + len, *start = input; + gchar t; + int obraces = 0, ebraces = 0; + + while (input < end) { + t = *input; + switch (state) { + case parse_normal: + if (t == '(') { + state = parse_obrace; + } + else if (t == '\\') { + state = parse_quoted_copy; + next_state = parse_normal; + } + else { + *d++ = t; + } + input ++; + break; + case parse_obrace: + obraces ++; + if (t == '(') { + obraces ++; + } + else if (t == ')') { + ebraces ++; + + if (obraces == ebraces) { + obraces = 0; + ebraces = 0; + state = parse_normal; + } + } + else if (t == '\\') { + state = parse_quoted_ignore; + next_state = parse_comment; + } + else { + state = parse_comment; + } + input ++; + break; + case parse_comment: + if (t == '(') { + state = parse_obrace; + } + else if (t == ')') { + ebraces ++; + + if (obraces == ebraces) { + obraces = 0; + ebraces = 0; + state = parse_normal; + } + } + else if (t == '\\') { + state = parse_quoted_ignore; + next_state = parse_comment; + } + input ++; + break; + case parse_quoted_copy: + *d++ = t; + state = next_state; + input ++; + break; + case parse_quoted_ignore: + state = next_state; + input ++; + break; + } + } + + return (d - start); } \ No newline at end of file diff --git a/src/libmime/mime_headers.h b/src/libmime/mime_headers.h index ad8f1b68f..f24b0d6c6 100644 --- a/src/libmime/mime_headers.h +++ b/src/libmime/mime_headers.h @@ -206,6 +206,14 @@ struct rspamd_mime_headers_table * rspamd_message_headers_ref (struct rspamd_mim */ struct rspamd_mime_headers_table* rspamd_message_headers_new (void); +/** + * Strip rfc822 CFWS sequences from a string in place + * @param input input + * @param len length of the input + * @return new length of the input + */ +gsize rspamd_strip_smtp_comments_inplace (gchar *input, gsize len); + #ifdef __cplusplus } #endif diff --git a/test/rspamd_cxx_unit.cxx b/test/rspamd_cxx_unit.cxx index 9d37ff56a..e67060dca 100644 --- a/test/rspamd_cxx_unit.cxx +++ b/test/rspamd_cxx_unit.cxx @@ -19,9 +19,10 @@ #include #define DOCTEST_CONFIG_IMPLEMENTATION_IN_DLL - #include "doctest/doctest.h" +#include "rspamd_cxx_unit_utils.hxx" + static gboolean verbose = false; static const GOptionEntry entries[] = { diff --git a/test/rspamd_cxx_unit_utils.hxx b/test/rspamd_cxx_unit_utils.hxx new file mode 100644 index 000000000..f9aa86a95 --- /dev/null +++ b/test/rspamd_cxx_unit_utils.hxx @@ -0,0 +1,51 @@ +/*- + * Copyright 2021 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Detached unit tests for the utils */ + +#ifndef RSPAMD_RSPAMD_CXX_UNIT_UTILS_HXX +#define RSPAMD_RSPAMD_CXX_UNIT_UTILS_HXX + +#define DOCTEST_CONFIG_IMPLEMENTATION_IN_DLL +#include "doctest/doctest.h" + +#include "libmime/mime_headers.h" +#include +#include +#include + +TEST_CASE("rspamd_strip_smtp_comments_inplace") { + std::vector> cases{ + {"abc", "abc"}, + {"abc(foo)", "abc"}, + {"abc(foo()", "abc"}, + {"abc(foo))", "abc)"}, + {"abc(foo(bar))", "abc"}, + {"(bar)abc(foo)", "abc"}, + {"ab(ololo)c(foo)", "abc"}, + {"ab(trol\\\1lo)c(foo)", "abc"}, + {"\\ab(trol\\\1lo)c(foo)", "abc"}, + }; + + for (const auto &c : cases) { + auto *cpy = new char[c.first.size()]; + memcpy(cpy, c.first.data(), c.first.size()); + auto nlen = rspamd_strip_smtp_comments_inplace(cpy, c.first.size()); + CHECK(std::string{cpy,nlen} == c.second); + } +} + +#endif