aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2021-06-18 13:35:05 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2021-06-18 13:35:05 +0100
commitde7ac4e37284fcd241060213619297df41a71dce (patch)
treeeebb222e637b021c78888a84f6022a3231aba0d6
parentf6321fbd323034d9763e53a56af014872a8a625a (diff)
downloadrspamd-de7ac4e37284fcd241060213619297df41a71dce.tar.gz
rspamd-de7ac4e37284fcd241060213619297df41a71dce.zip
[Minor] Add a simple routine to remove smtp comments in place
-rw-r--r--src/libmime/mime_headers.c87
-rw-r--r--src/libmime/mime_headers.h8
-rw-r--r--test/rspamd_cxx_unit.cxx3
-rw-r--r--test/rspamd_cxx_unit_utils.hxx51
4 files changed, 148 insertions, 1 deletions
diff --git a/src/libmime/mime_headers.c b/src/libmime/mime_headers.c
index 9dc336cf7..0bca94024 100644
--- a/src/libmime/mime_headers.c
+++ b/src/libmime/mime_headers.c
@@ -1967,4 +1967,91 @@ rspamd_message_set_modified_header (struct rspamd_task *task,
}
}
}
+}
+
+gsize
+rspamd_strip_smtp_comments_inplace (gchar *input, gsize len)
+{
+ enum parser_state {
+ parse_normal,
+ parse_obrace,
+ parse_comment,
+ parse_quoted_copy,
+ parse_quoted_ignore,
+ } state = parse_normal, next_state = parse_normal;
+ gchar *d = input, *end = input + len, *start = input;
+ gchar t;
+ int obraces = 0, ebraces = 0;
+
+ while (input < end) {
+ t = *input;
+ switch (state) {
+ case parse_normal:
+ if (t == '(') {
+ state = parse_obrace;
+ }
+ else if (t == '\\') {
+ state = parse_quoted_copy;
+ next_state = parse_normal;
+ }
+ else {
+ *d++ = t;
+ }
+ input ++;
+ break;
+ case parse_obrace:
+ obraces ++;
+ if (t == '(') {
+ obraces ++;
+ }
+ else if (t == ')') {
+ ebraces ++;
+
+ if (obraces == ebraces) {
+ obraces = 0;
+ ebraces = 0;
+ state = parse_normal;
+ }
+ }
+ else if (t == '\\') {
+ state = parse_quoted_ignore;
+ next_state = parse_comment;
+ }
+ else {
+ state = parse_comment;
+ }
+ input ++;
+ break;
+ case parse_comment:
+ if (t == '(') {
+ state = parse_obrace;
+ }
+ else if (t == ')') {
+ ebraces ++;
+
+ if (obraces == ebraces) {
+ obraces = 0;
+ ebraces = 0;
+ state = parse_normal;
+ }
+ }
+ else if (t == '\\') {
+ state = parse_quoted_ignore;
+ next_state = parse_comment;
+ }
+ input ++;
+ break;
+ case parse_quoted_copy:
+ *d++ = t;
+ state = next_state;
+ input ++;
+ break;
+ case parse_quoted_ignore:
+ state = next_state;
+ input ++;
+ break;
+ }
+ }
+
+ return (d - start);
} \ No newline at end of file
diff --git a/src/libmime/mime_headers.h b/src/libmime/mime_headers.h
index ad8f1b68f..f24b0d6c6 100644
--- a/src/libmime/mime_headers.h
+++ b/src/libmime/mime_headers.h
@@ -206,6 +206,14 @@ struct rspamd_mime_headers_table * rspamd_message_headers_ref (struct rspamd_mim
*/
struct rspamd_mime_headers_table* rspamd_message_headers_new (void);
+/**
+ * Strip rfc822 CFWS sequences from a string in place
+ * @param input input
+ * @param len length of the input
+ * @return new length of the input
+ */
+gsize rspamd_strip_smtp_comments_inplace (gchar *input, gsize len);
+
#ifdef __cplusplus
}
#endif
diff --git a/test/rspamd_cxx_unit.cxx b/test/rspamd_cxx_unit.cxx
index 9d37ff56a..e67060dca 100644
--- a/test/rspamd_cxx_unit.cxx
+++ b/test/rspamd_cxx_unit.cxx
@@ -19,9 +19,10 @@
#include <memory>
#define DOCTEST_CONFIG_IMPLEMENTATION_IN_DLL
-
#include "doctest/doctest.h"
+#include "rspamd_cxx_unit_utils.hxx"
+
static gboolean verbose = false;
static const GOptionEntry entries[] =
{
diff --git a/test/rspamd_cxx_unit_utils.hxx b/test/rspamd_cxx_unit_utils.hxx
new file mode 100644
index 000000000..f9aa86a95
--- /dev/null
+++ b/test/rspamd_cxx_unit_utils.hxx
@@ -0,0 +1,51 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Detached unit tests for the utils */
+
+#ifndef RSPAMD_RSPAMD_CXX_UNIT_UTILS_HXX
+#define RSPAMD_RSPAMD_CXX_UNIT_UTILS_HXX
+
+#define DOCTEST_CONFIG_IMPLEMENTATION_IN_DLL
+#include "doctest/doctest.h"
+
+#include "libmime/mime_headers.h"
+#include <vector>
+#include <utility>
+#include <string>
+
+TEST_CASE("rspamd_strip_smtp_comments_inplace") {
+ std::vector<std::pair<std::string, std::string>> cases{
+ {"abc", "abc"},
+ {"abc(foo)", "abc"},
+ {"abc(foo()", "abc"},
+ {"abc(foo))", "abc)"},
+ {"abc(foo(bar))", "abc"},
+ {"(bar)abc(foo)", "abc"},
+ {"ab(ololo)c(foo)", "abc"},
+ {"ab(trol\\\1lo)c(foo)", "abc"},
+ {"\\ab(trol\\\1lo)c(foo)", "abc"},
+ };
+
+ for (const auto &c : cases) {
+ auto *cpy = new char[c.first.size()];
+ memcpy(cpy, c.first.data(), c.first.size());
+ auto nlen = rspamd_strip_smtp_comments_inplace(cpy, c.first.size());
+ CHECK(std::string{cpy,nlen} == c.second);
+ }
+}
+
+#endif