From 0a4f8e4121ac3ef00b93c9c99b3b943bdf195ce8 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Mon, 12 Dec 2016 14:47:45 +0000 Subject: [PATCH] [Feature] Add a simple benchmark for content type parsing --- utils/CMakeLists.txt | 50 +++++++-------- utils/content_type_bench.c | 123 +++++++++++++++++++++++++++++++++++++ 2 files changed, 145 insertions(+), 28 deletions(-) create mode 100644 utils/content_type_bench.c diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt index e0bb5c886..28ea43139 100644 --- a/utils/CMakeLists.txt +++ b/utils/CMakeLists.txt @@ -1,39 +1,33 @@ SET(UTILSERVERSRC rspamd_http_server.c) SET(UTILBENCHSRC rspamd_http_bench.c) SET(RECVBENCHSRC received_parser_bench.c) +SET(CTYPEBENCHSRC content_type_bench.c) -ADD_EXECUTABLE(rspamd-http-server ${UTILSERVERSRC}) -SET_TARGET_PROPERTIES(rspamd-http-server PROPERTIES LINKER_LANGUAGE C) -TARGET_LINK_LIBRARIES(rspamd-http-server rspamd-server) -TARGET_LINK_LIBRARIES(rspamd-http-server rspamd-http-parser) -TARGET_LINK_LIBRARIES(rspamd-http-server ${RSPAMD_REQUIRED_LIBRARIES}) +MACRO(ADD_UTIL NAME) + ADD_EXECUTABLE("${NAME}" "${ARGN}") + IF (ENABLE_HYPERSCAN MATCHES "ON") + SET_TARGET_PROPERTIES("${NAME}" PROPERTIES LINKER_LANGUAGE CXX) + ELSE() + SET_TARGET_PROPERTIES("${NAME}" PROPERTIES LINKER_LANGUAGE C) + ENDIF() + TARGET_LINK_LIBRARIES("${NAME}" rspamd-server) + IF (ENABLE_SNOWBALL MATCHES "ON") + TARGET_LINK_LIBRARIES("${NAME}" stemmer) + ENDIF() + IF(ENABLE_HIREDIS MATCHES "ON") + TARGET_LINK_LIBRARIES("${NAME}" rspamd-hiredis) + ENDIF() + TARGET_LINK_LIBRARIES("${NAME}" ${RSPAMD_REQUIRED_LIBRARIES}) +ENDMACRO() -ADD_EXECUTABLE(rspamd-http-bench ${UTILBENCHSRC}) -SET_TARGET_PROPERTIES(rspamd-http-bench PROPERTIES LINKER_LANGUAGE C) -TARGET_LINK_LIBRARIES(rspamd-http-bench rspamd-http-parser) -TARGET_LINK_LIBRARIES(rspamd-http-bench rspamd-server) -TARGET_LINK_LIBRARIES(rspamd-http-bench ${RSPAMD_REQUIRED_LIBRARIES}) - -ADD_EXECUTABLE(rspamd-received-bench ${RECVBENCHSRC}) -SET_TARGET_PROPERTIES(rspamd-received-bench PROPERTIES LINKER_LANGUAGE C) -TARGET_LINK_LIBRARIES(rspamd-received-bench rspamd-server) -IF (ENABLE_SNOWBALL MATCHES "ON") - TARGET_LINK_LIBRARIES(rspamd-received-bench stemmer) -ENDIF() -IF(ENABLE_HIREDIS MATCHES "ON") - TARGET_LINK_LIBRARIES(rspamd-received-bench rspamd-hiredis) -ENDIF() -TARGET_LINK_LIBRARIES(rspamd-received-bench ${RSPAMD_REQUIRED_LIBRARIES}) - -IF (ENABLE_HYPERSCAN MATCHES "ON") - SET_TARGET_PROPERTIES(rspamd-http-bench PROPERTIES LINKER_LANGUAGE CXX) - SET_TARGET_PROPERTIES(rspamd-http-server PROPERTIES LINKER_LANGUAGE CXX) - SET_TARGET_PROPERTIES(rspamd-received-bench PROPERTIES LINKER_LANGUAGE CXX) -ENDIF() +ADD_UTIL(rspamd-http-server ${UTILSERVERSRC}) +ADD_UTIL(rspamd-http-bench ${UTILBENCHSRC}) +ADD_UTIL(rspamd-received-bench ${RECVBENCHSRC}) +ADD_UTIL(rspamd-ctype-bench ${CTYPEBENCHSRC}) # Redirector IF (ENABLE_REDIRECTOR MATCHES "ON") CONFIGURE_FILE(redirector.pl.in redirector.pl @ONLY) INSTALL(PROGRAMS "${CMAKE_CURRENT_BINARY_DIR}/redirector.pl" DESTINATION bin RENAME rspamd-redirector) -ENDIF (ENABLE_REDIRECTOR MATCHES "ON") \ No newline at end of file +ENDIF (ENABLE_REDIRECTOR MATCHES "ON") diff --git a/utils/content_type_bench.c b/utils/content_type_bench.c new file mode 100644 index 000000000..87b30cacf --- /dev/null +++ b/utils/content_type_bench.c @@ -0,0 +1,123 @@ +/*- + * Copyright 2016 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "config.h" +#include "printf.h" +#include "message.h" +#include "util.h" +#include "content_type.h" + +static gdouble total_time = 0; +static gint total_parsed = 0; +static gint total_valid = 0; +static gint total_type = 0; +static gint total_subtype = 0; +static gint total_charset = 0; +static gint total_attrs = 0; + +static void +rspamd_process_file (const gchar *fname) +{ + rspamd_mempool_t *pool; + GIOChannel *f; + GError *err = NULL; + GString *buf; + struct rspamd_content_type *ct; + gdouble t1, t2; + + f = g_io_channel_new_file (fname, "r", &err); + + if (!f) { + rspamd_fprintf (stderr, "cannot open %s: %e\n", fname, err); + g_error_free (err); + + return; + } + + g_io_channel_set_encoding (f, NULL, NULL); + buf = g_string_sized_new (8192); + pool = rspamd_mempool_new (rspamd_mempool_suggest_size (), "test"); + + while (g_io_channel_read_line_string (f, buf, NULL, &err) + == G_IO_STATUS_NORMAL) { + + while (buf->len > 0 && g_ascii_isspace (buf->str[buf->len - 1])) { + buf->len --; + } + + t1 = rspamd_get_virtual_ticks (); + ct = rspamd_content_type_parse (buf->str, buf->len, pool); + t2 = rspamd_get_virtual_ticks (); + + total_time += t2 - t1; + total_parsed ++; + + if (ct) { + total_valid ++; + + if (ct->type.len > 0) { + total_type ++; + } + if (ct->subtype.len > 0) { + total_subtype ++; + } + if (ct->charset.len > 0) { + total_charset ++; + } + if (ct->attrs) { + total_attrs ++; + } + } + } + + if (err) { + rspamd_fprintf (stderr, "cannot read %s: %e\n", fname, err); + g_error_free (err); + } + + g_io_channel_unref (f); + g_string_free (buf, TRUE); + rspamd_mempool_delete (pool); +} + +int +main (int argc, char **argv) +{ + gint i; + + g_mime_init (0); + + for (i = 1; i < argc; i ++) { + if (argv[i]) { + rspamd_process_file (argv[i]); + } + } + + rspamd_printf ("Parsed %d received headers in %.3f seconds\n" + "Total valid (has by part): %d\n" + "Total known type: %d\n" + "Total known subtype: %d\n" + "Total known charset: %d\n" + "Total has attrs: %d\n", + total_parsed, total_time, + total_valid, total_type, + total_subtype, total_type, + total_attrs); + + g_mime_shutdown (); + + return 0; +} -- 2.39.5