]> source.dussan.org Git - rspamd.git/commitdiff
[Feature] Add a simple benchmark for content type parsing
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Mon, 12 Dec 2016 14:47:45 +0000 (14:47 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Mon, 12 Dec 2016 15:43:35 +0000 (15:43 +0000)
utils/CMakeLists.txt
utils/content_type_bench.c [new file with mode: 0644]

index e0bb5c88636e789d94b87f5b93e42f7b971803db..28ea43139fa3c423af3bd1216ec136a0f9fd722c 100644 (file)
@@ -1,39 +1,33 @@
 SET(UTILSERVERSRC rspamd_http_server.c)
 SET(UTILBENCHSRC rspamd_http_bench.c)
 SET(RECVBENCHSRC received_parser_bench.c)
+SET(CTYPEBENCHSRC content_type_bench.c)
 
-ADD_EXECUTABLE(rspamd-http-server ${UTILSERVERSRC})
-SET_TARGET_PROPERTIES(rspamd-http-server PROPERTIES LINKER_LANGUAGE C)
-TARGET_LINK_LIBRARIES(rspamd-http-server rspamd-server)
-TARGET_LINK_LIBRARIES(rspamd-http-server rspamd-http-parser)
-TARGET_LINK_LIBRARIES(rspamd-http-server ${RSPAMD_REQUIRED_LIBRARIES})
+MACRO(ADD_UTIL NAME)
+       ADD_EXECUTABLE("${NAME}" "${ARGN}")
+       IF (ENABLE_HYPERSCAN MATCHES "ON")
+               SET_TARGET_PROPERTIES("${NAME}" PROPERTIES LINKER_LANGUAGE CXX)
+       ELSE()
+               SET_TARGET_PROPERTIES("${NAME}" PROPERTIES LINKER_LANGUAGE C)
+       ENDIF()
+       TARGET_LINK_LIBRARIES("${NAME}" rspamd-server)
+       IF (ENABLE_SNOWBALL MATCHES "ON")
+       TARGET_LINK_LIBRARIES("${NAME}" stemmer)
+       ENDIF()
+       IF(ENABLE_HIREDIS MATCHES "ON")
+               TARGET_LINK_LIBRARIES("${NAME}" rspamd-hiredis)
+       ENDIF()
+       TARGET_LINK_LIBRARIES("${NAME}" ${RSPAMD_REQUIRED_LIBRARIES})
+ENDMACRO()
 
-ADD_EXECUTABLE(rspamd-http-bench ${UTILBENCHSRC})
-SET_TARGET_PROPERTIES(rspamd-http-bench PROPERTIES LINKER_LANGUAGE C)
-TARGET_LINK_LIBRARIES(rspamd-http-bench rspamd-http-parser)
-TARGET_LINK_LIBRARIES(rspamd-http-bench rspamd-server)
-TARGET_LINK_LIBRARIES(rspamd-http-bench ${RSPAMD_REQUIRED_LIBRARIES})
-
-ADD_EXECUTABLE(rspamd-received-bench ${RECVBENCHSRC})
-SET_TARGET_PROPERTIES(rspamd-received-bench PROPERTIES LINKER_LANGUAGE C)
-TARGET_LINK_LIBRARIES(rspamd-received-bench rspamd-server)
-IF (ENABLE_SNOWBALL MATCHES "ON")
-       TARGET_LINK_LIBRARIES(rspamd-received-bench stemmer)
-ENDIF()
-IF(ENABLE_HIREDIS MATCHES "ON")
-       TARGET_LINK_LIBRARIES(rspamd-received-bench rspamd-hiredis)
-ENDIF()
-TARGET_LINK_LIBRARIES(rspamd-received-bench ${RSPAMD_REQUIRED_LIBRARIES})
-
-IF (ENABLE_HYPERSCAN MATCHES "ON")
-       SET_TARGET_PROPERTIES(rspamd-http-bench PROPERTIES LINKER_LANGUAGE CXX)
-       SET_TARGET_PROPERTIES(rspamd-http-server PROPERTIES LINKER_LANGUAGE CXX)
-       SET_TARGET_PROPERTIES(rspamd-received-bench PROPERTIES LINKER_LANGUAGE CXX)
-ENDIF()
+ADD_UTIL(rspamd-http-server ${UTILSERVERSRC})
+ADD_UTIL(rspamd-http-bench ${UTILBENCHSRC})
+ADD_UTIL(rspamd-received-bench ${RECVBENCHSRC})
+ADD_UTIL(rspamd-ctype-bench ${CTYPEBENCHSRC})
 
 # Redirector
 IF (ENABLE_REDIRECTOR MATCHES "ON")
     CONFIGURE_FILE(redirector.pl.in redirector.pl @ONLY)
     INSTALL(PROGRAMS "${CMAKE_CURRENT_BINARY_DIR}/redirector.pl"
             DESTINATION bin RENAME rspamd-redirector)
-ENDIF (ENABLE_REDIRECTOR MATCHES "ON")
\ No newline at end of file
+ENDIF (ENABLE_REDIRECTOR MATCHES "ON")
diff --git a/utils/content_type_bench.c b/utils/content_type_bench.c
new file mode 100644 (file)
index 0000000..87b30ca
--- /dev/null
@@ -0,0 +1,123 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "config.h"
+#include "printf.h"
+#include "message.h"
+#include "util.h"
+#include "content_type.h"
+
+static gdouble total_time = 0;
+static gint total_parsed = 0;
+static gint total_valid = 0;
+static gint total_type = 0;
+static gint total_subtype = 0;
+static gint total_charset = 0;
+static gint total_attrs = 0;
+
+static void
+rspamd_process_file (const gchar *fname)
+{
+       rspamd_mempool_t *pool;
+       GIOChannel *f;
+       GError *err = NULL;
+       GString *buf;
+       struct rspamd_content_type *ct;
+       gdouble t1, t2;
+
+       f = g_io_channel_new_file (fname, "r", &err);
+
+       if (!f) {
+               rspamd_fprintf (stderr, "cannot open %s: %e\n", fname, err);
+               g_error_free (err);
+
+               return;
+       }
+
+       g_io_channel_set_encoding (f, NULL, NULL);
+       buf = g_string_sized_new (8192);
+       pool = rspamd_mempool_new (rspamd_mempool_suggest_size (), "test");
+
+       while (g_io_channel_read_line_string (f, buf, NULL, &err)
+                       == G_IO_STATUS_NORMAL) {
+
+               while (buf->len > 0 && g_ascii_isspace (buf->str[buf->len - 1])) {
+                       buf->len --;
+               }
+
+               t1 = rspamd_get_virtual_ticks ();
+               ct = rspamd_content_type_parse (buf->str, buf->len, pool);
+               t2 = rspamd_get_virtual_ticks ();
+
+               total_time += t2 - t1;
+               total_parsed ++;
+
+               if (ct) {
+                       total_valid ++;
+
+                       if (ct->type.len > 0) {
+                               total_type ++;
+                       }
+                       if (ct->subtype.len > 0) {
+                               total_subtype ++;
+                       }
+                       if (ct->charset.len > 0) {
+                               total_charset ++;
+                       }
+                       if (ct->attrs) {
+                               total_attrs ++;
+                       }
+               }
+       }
+
+       if (err) {
+               rspamd_fprintf (stderr, "cannot read %s: %e\n", fname, err);
+               g_error_free (err);
+       }
+
+       g_io_channel_unref (f);
+       g_string_free (buf, TRUE);
+       rspamd_mempool_delete (pool);
+}
+
+int
+main (int argc, char **argv)
+{
+       gint i;
+
+       g_mime_init (0);
+
+       for (i = 1; i < argc; i ++) {
+               if (argv[i]) {
+                       rspamd_process_file (argv[i]);
+               }
+       }
+
+       rspamd_printf ("Parsed %d received headers in %.3f seconds\n"
+                       "Total valid (has by part): %d\n"
+                       "Total known type: %d\n"
+                       "Total known subtype: %d\n"
+                       "Total known charset: %d\n"
+                       "Total has attrs: %d\n",
+                       total_parsed, total_time,
+                       total_valid, total_type,
+                       total_subtype, total_type,
+                       total_attrs);
+
+       g_mime_shutdown ();
+
+       return 0;
+}