]> source.dussan.org Git - rspamd.git/commitdiff
[Minor] Add a simple routine to remove smtp comments in place
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Fri, 18 Jun 2021 12:35:05 +0000 (13:35 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Fri, 18 Jun 2021 12:35:05 +0000 (13:35 +0100)
src/libmime/mime_headers.c
src/libmime/mime_headers.h
test/rspamd_cxx_unit.cxx
test/rspamd_cxx_unit_utils.hxx [new file with mode: 0644]

index 9dc336cf756ee03affe117ec28b6b529356fc440..0bca94024e8b6623dbfd295bbd5af536ae0c1b96 100644 (file)
@@ -1967,4 +1967,91 @@ rspamd_message_set_modified_header (struct rspamd_task *task,
                        }
                }
        }
+}
+
+gsize
+rspamd_strip_smtp_comments_inplace (gchar *input, gsize len)
+{
+       enum parser_state {
+               parse_normal,
+               parse_obrace,
+               parse_comment,
+               parse_quoted_copy,
+               parse_quoted_ignore,
+       } state = parse_normal, next_state = parse_normal;
+       gchar *d = input, *end = input + len, *start = input;
+       gchar t;
+       int obraces = 0, ebraces = 0;
+
+       while (input < end) {
+               t = *input;
+               switch (state) {
+               case parse_normal:
+                       if (t == '(') {
+                               state = parse_obrace;
+                       }
+                       else if (t == '\\') {
+                               state = parse_quoted_copy;
+                               next_state = parse_normal;
+                       }
+                       else {
+                               *d++ = t;
+                       }
+                       input ++;
+                       break;
+               case parse_obrace:
+                       obraces ++;
+                       if (t == '(') {
+                               obraces ++;
+                       }
+                       else if (t == ')') {
+                               ebraces ++;
+
+                               if (obraces == ebraces) {
+                                       obraces = 0;
+                                       ebraces = 0;
+                                       state = parse_normal;
+                               }
+                       }
+                       else if (t == '\\') {
+                               state = parse_quoted_ignore;
+                               next_state = parse_comment;
+                       }
+                       else {
+                               state = parse_comment;
+                       }
+                       input ++;
+                       break;
+               case parse_comment:
+                       if (t == '(') {
+                               state = parse_obrace;
+                       }
+                       else if (t == ')') {
+                               ebraces ++;
+
+                               if (obraces == ebraces) {
+                                       obraces = 0;
+                                       ebraces = 0;
+                                       state = parse_normal;
+                               }
+                       }
+                       else if (t == '\\') {
+                               state = parse_quoted_ignore;
+                               next_state = parse_comment;
+                       }
+                       input ++;
+                       break;
+               case parse_quoted_copy:
+                       *d++ = t;
+                       state = next_state;
+                       input ++;
+                       break;
+               case parse_quoted_ignore:
+                       state = next_state;
+                       input ++;
+                       break;
+               }
+       }
+
+       return (d - start);
 }
\ No newline at end of file
index ad8f1b68f95636bcb47f86fcc74d9b12914a3fb3..f24b0d6c6bc079df521243b33c619e5300a510db 100644 (file)
@@ -206,6 +206,14 @@ struct rspamd_mime_headers_table * rspamd_message_headers_ref (struct rspamd_mim
  */
 struct rspamd_mime_headers_table* rspamd_message_headers_new (void);
 
+/**
+ * Strip rfc822 CFWS sequences from a string in place
+ * @param input input
+ * @param len length of the input
+ * @return new length of the input
+ */
+gsize rspamd_strip_smtp_comments_inplace (gchar *input, gsize len);
+
 #ifdef  __cplusplus
 }
 #endif
index 9d37ff56a1741864b6b5b73c32144c826874d495..e67060dcad6363e67f1f426b857cd839d4d2051c 100644 (file)
 #include <memory>
 
 #define DOCTEST_CONFIG_IMPLEMENTATION_IN_DLL
-
 #include "doctest/doctest.h"
 
+#include "rspamd_cxx_unit_utils.hxx"
+
 static gboolean verbose = false;
 static const GOptionEntry entries[] =
                {
diff --git a/test/rspamd_cxx_unit_utils.hxx b/test/rspamd_cxx_unit_utils.hxx
new file mode 100644 (file)
index 0000000..f9aa86a
--- /dev/null
@@ -0,0 +1,51 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Detached unit tests for the utils */
+
+#ifndef RSPAMD_RSPAMD_CXX_UNIT_UTILS_HXX
+#define RSPAMD_RSPAMD_CXX_UNIT_UTILS_HXX
+
+#define DOCTEST_CONFIG_IMPLEMENTATION_IN_DLL
+#include "doctest/doctest.h"
+
+#include "libmime/mime_headers.h"
+#include <vector>
+#include <utility>
+#include <string>
+
+TEST_CASE("rspamd_strip_smtp_comments_inplace") {
+       std::vector<std::pair<std::string, std::string>> cases{
+                       {"abc", "abc"},
+                       {"abc(foo)", "abc"},
+                       {"abc(foo()", "abc"},
+                       {"abc(foo))", "abc)"},
+                       {"abc(foo(bar))", "abc"},
+                       {"(bar)abc(foo)", "abc"},
+                       {"ab(ololo)c(foo)", "abc"},
+                       {"ab(trol\\\1lo)c(foo)", "abc"},
+                       {"\\ab(trol\\\1lo)c(foo)", "abc"},
+       };
+
+       for (const auto &c : cases) {
+               auto *cpy = new char[c.first.size()];
+               memcpy(cpy, c.first.data(), c.first.size());
+               auto nlen = rspamd_strip_smtp_comments_inplace(cpy, c.first.size());
+               CHECK(std::string{cpy,nlen} == c.second);
+       }
+}
+
+#endif