From 683b90f4c6c744557f7429ce6ff77c0f7d2175e1 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Fri, 6 May 2011 19:18:40 +0400 Subject: [PATCH] * Major cleanup of cmake build system * Add initial version of statshow utility for statfiles debugging * Add debugging for statistics * Remove unused utilities --- CMakeLists.txt | 131 +---------------- config.h.in | 2 +- src/classifiers/bayes.c | 7 +- src/classifiers/classifiers.h | 1 + src/controller.c | 3 +- src/filter.c | 9 +- src/tokenizers/osb.c | 5 +- src/tokenizers/tokenizers.c | 4 +- src/tokenizers/tokenizers.h | 4 +- test/CMakeLists.txt | 41 ++++++ utils/CMakeLists.txt | 7 + utils/expression_parser.c | 54 ------- utils/statshow/CMakeLists.txt | 22 +++ utils/statshow/statshow.c | 262 ++++++++++++++++++++++++++++++++++ utils/url_extracter.c | 73 ---------- 15 files changed, 356 insertions(+), 269 deletions(-) create mode 100644 test/CMakeLists.txt create mode 100644 utils/CMakeLists.txt delete mode 100644 utils/expression_parser.c create mode 100644 utils/statshow/CMakeLists.txt create mode 100644 utils/statshow/statshow.c delete mode 100644 utils/url_extracter.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 632b0e294..47770487a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,7 +7,7 @@ PROJECT(rspamd C) SET(RSPAMD_VERSION_MAJOR 0) SET(RSPAMD_VERSION_MINOR 3) -SET(RSPAMD_VERSION_PATCH 12) +SET(RSPAMD_VERSION_PATCH 13) SET(RSPAMD_VERSION "${RSPAMD_VERSION_MAJOR}.${RSPAMD_VERSION_MINOR}.${RSPAMD_VERSION_PATCH}") @@ -19,7 +19,6 @@ CMAKE_MINIMUM_REQUIRED(VERSION 2.6.0 FATAL_ERROR) OPTION(DEBUG_MODE "Enable debug output [default: ON]" ON) OPTION(ENABLE_OPTIMIZATION "Enable optimization [default: OFF]" OFF) -OPTION(ENABLE_PERL "Enable perl client API [default: OFF]" OFF) OPTION(SKIP_RELINK_RPATH "Skip relinking and full RPATH for the install tree" OFF) OPTION(ENABLE_REDIRECTOR "Enable redirector install [default: OFF]" OFF) OPTION(ENABLE_PROFILING "Enable profiling [default: OFF]" OFF) @@ -159,32 +158,6 @@ IF(CMAKE_SYSTEM_NAME STREQUAL "SunOS") MESSAGE(STATUS "Configuring for Solaris") ENDIF(CMAKE_SYSTEM_NAME STREQUAL "SunOS") -IF(ENABLE_PERL MATCHES "ON") - - IF(NOT PERL_EXECUTABLE) - MESSAGE(FATAL_ERROR "Error: Perl is not found but is required") - ENDIF(NOT PERL_EXECUTABLE) - - # Find perl libraries and cflags - EXECUTE_PROCESS(COMMAND ${PERL_EXECUTABLE} -MExtUtils::Embed -e ccopts OUTPUT_VARIABLE PERL_CFLAGS) - EXECUTE_PROCESS(COMMAND ${PERL_EXECUTABLE} -MExtUtils::Embed -e ldopts OUTPUT_VARIABLE PERL_LDFLAGS) - STRING(REGEX REPLACE "[\r\n]" " " PERL_CFLAGS ${PERL_CFLAGS}) - STRING(REGEX REPLACE " +$" "" PERL_CFLAGS ${PERL_CFLAGS}) - STRING(REGEX REPLACE "[\r\n]" " " PERL_LDFLAGS ${PERL_LDFLAGS}) - STRING(REGEX REPLACE " +$" "" PERL_LDFLAGS ${PERL_LDFLAGS}) - # Handle DynaLoader - STRING(REGEX MATCH "/[^ ]*/DynaLoader.a" PERL_DYNALOADER ${PERL_LDFLAGS}) - STRING(REGEX REPLACE "/[^ ]*/DynaLoader.a " "" PERL_LDFLAGS ${PERL_LDFLAGS}) - - IF(PERL_DYNALOADER) - EXECUTE_PROCESS(COMMAND ${CMAKE_COMMAND} -E copy ${PERL_DYNALOADER} ${rspamd_BINARY_DIR}/compat/libdynaloader.so) - LINK_DIRECTORIES(${rspamd_BINARY_DIR}/compat/) - ENDIF(PERL_DYNALOADER) -ELSE(ENABLE_PERL MATCHES "ON") - SET(WITHOUT_PERL 1) -ENDIF(ENABLE_PERL MATCHES "ON") - - INCLUDE(FindLua51) # Check for luajit IF(ENABLE_LUAJIT MATCHES "ON") @@ -525,11 +498,6 @@ ENDIF(SUPPORT_STD_FLAG) SET(CMAKE_C_FLAGS "${CMAKE_C_OPT_FLAGS} ${CMAKE_C_FLAGS} ${CMAKE_C_WARN_FLAGS}") - -IF(ENABLE_REDIRECTOR MATCHES "ON") - CONFIGURE_FILE(utils/redirector.pl.in utils/redirector.pl @ONLY) -ENDIF(ENABLE_REDIRECTOR MATCHES "ON") - IF(DEBUG_MODE MATCHES "ON") SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g") ENDIF(DEBUG_MODE MATCHES "ON") @@ -620,8 +588,8 @@ ADD_SUBDIRECTORY(src/client) ADD_SUBDIRECTORY(src/json) ADD_SUBDIRECTORY(src/cdb) -# ADD_SUBDIRECTORY(src/evdns) -# ADD_SUBDIRECTORY(src/plugins/custom) +ADD_SUBDIRECTORY(utils) +ADD_SUBDIRECTORY(test) SET(TOKENIZERSSRC src/tokenizers/tokenizers.c src/tokenizers/osb.c) @@ -636,47 +604,6 @@ SET(PLUGINSSRC src/plugins/surbl.c src/plugins/fuzzy_check.c src/plugins/spf.c) -SET(TESTSRC test/rspamd_expression_test.c - test/rspamd_memcached_test.c - test/rspamd_mem_pool_test.c - test/rspamd_statfile_test.c - test/rspamd_fuzzy_test.c - test/rspamd_test_suite.c - test/rspamd_url_test.c - test/rspamd_dns_test.c) - -SET(TESTDEPENDS src/mem_pool.c - src/hash.c - src/url.c - src/trie.c - src/util.c - src/radix.c - src/fuzzy.c - src/map.c - src/logger.c - src/memcached.c - src/message.c - src/html.c - src/expressions.c - src/statfile.c - src/events.c - src/upstream.c - src/dns.c) - -SET(UTILSSRC utils/url_extracter.c) -SET(EXPRSRC utils/expression_parser.c) - -SET(UTILSDEPENDS src/mem_pool.c - src/hash.c - src/url.c - src/trie.c - src/fuzzy.c - src/expressions.c - src/message.c - src/html.c - src/util.c - src/radix.c) - LIST(LENGTH PLUGINSSRC RSPAMD_MODULES_NUM) ############################ TARGETS SECTION ############################### @@ -685,20 +612,6 @@ ADD_CUSTOM_COMMAND(OUTPUT src/modules.c COMMAND ../utils/gen-modules.sh ${PLUGINSSRC} WORKING_DIRECTORY src) -IF(ENABLE_PERL MATCHES "ON") - ADD_CUSTOM_COMMAND(OUTPUT ${CMAKE_SOURCE_DIR}/perl/Makefile - DEPENDS ${CMAKE_SOURCE_DIR}/perl/Makefile.PL - COMMAND ${PERL_EXECUTABLE} ./Makefile.PL DESTDIR=${DESTDIR} PREFIX=${PREFIX} INSTALLMAN3DIR=${MAN_PREFIX}/man3 - WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/perl) - CONFIGURE_FILE(perl/Makefile.PL.in perl/Makefile.PL) - ADD_CUSTOM_TARGET(perlmodule - COMMAND ${CMAKE_MAKE_PROGRAM} DESTDIR=${DESTDIR} - DEPENDS ${CMAKE_SOURCE_DIR}/perl/Makefile - WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/perl - VERBATIM) - -ENDIF(ENABLE_PERL MATCHES "ON") - CONFIGURE_FILE(config.h.in src/config.h) CONFIGURE_FILE(contrib/exim/local_scan.c.in contrib/exim/local_scan_rspamd.c @ONLY) CONFIGURE_FILE(rspamd.xml.sample conf/rspamd.xml.sample @ONLY) @@ -710,19 +623,6 @@ SET_TARGET_PROPERTIES(rspamd PROPERTIES LINKER_LANGUAGE C) SET_TARGET_PROPERTIES(rspamd PROPERTIES COMPILE_FLAGS "-DRSPAMD_MAIN") SET_TARGET_PROPERTIES(rspamd PROPERTIES VERSION ${RSPAMD_VERSION}) -IF(ENABLE_PERL MATCHES "ON") - - SET_TARGET_PROPERTIES(rspamd PROPERTIES COMPILE_FLAGS ${PERL_CFLAGS} - LINK_FLAGS ${PERL_LDFLAGS}) - IF(PERL_DYNALOADER) - TARGET_LINK_LIBRARIES(rspamd dynaloader) - ENDIF(PERL_DYNALOADER) - -ENDIF(ENABLE_PERL MATCHES "ON") -IF(ENABLE_PERL MATCHES "ON") - ADD_DEPENDENCIES(rspamd perlmodule) -ENDIF(ENABLE_PERL MATCHES "ON") - TARGET_LINK_LIBRARIES(rspamd rspamd_lua) IF(ENABLE_LUAJIT MATCHES "ON") TARGET_LINK_LIBRARIES(rspamd "${LUAJIT_LIBRARY}") @@ -752,21 +652,6 @@ IF(ENABLE_GPERF_TOOLS MATCHES "ON") TARGET_LINK_LIBRARIES(rspamd profiler) ENDIF(ENABLE_GPERF_TOOLS MATCHES "ON") -ADD_EXECUTABLE(test/rspamd-test ${TESTDEPENDS} ${CONTRIBSRC} ${TESTSRC}) -SET_TARGET_PROPERTIES(test/rspamd-test PROPERTIES LINKER_LANGUAGE C) -SET_TARGET_PROPERTIES(test/rspamd-test PROPERTIES COMPILE_FLAGS "-DRSPAMD_TEST") -TARGET_LINK_LIBRARIES(test/rspamd-test event) -TARGET_LINK_LIBRARIES(test/rspamd-test ${GLIB2_LIBRARIES}) -TARGET_LINK_LIBRARIES(test/rspamd-test ${CMAKE_REQUIRED_LIBRARIES}) -IF(GMIME2_FOUND) - TARGET_LINK_LIBRARIES(test/rspamd-test ${GMIME2_LIBRARIES}) -ELSE(GMIME2_FOUND) - TARGET_LINK_LIBRARIES(test/rspamd-test ${GMIME24_LIBRARIES}) -ENDIF(GMIME2_FOUND) -IF(ENABLE_STATIC MATCHES "ON") - TARGET_LINK_LIBRARIES(test/rspamd-test ${PCRE_LIBRARIES}) -ENDIF(ENABLE_STATIC MATCHES "ON") - ##################### INSTALLATION ########################################## @@ -823,16 +708,6 @@ ENDFOREACH(LUA_CONF) INSTALL(FILES "doc/rspamd.8" DESTINATION man/man8) INSTALL(FILES "doc/rspamc.1" DESTINATION man/man1) -# Perl lib -IF(ENABLE_PERL MATCHES "ON") - INSTALL(CODE "EXECUTE_PROCESS(COMMAND make install WORKING_DIRECTORY perl)") -ENDIF(ENABLE_PERL MATCHES "ON") - -# Redirector -IF(ENABLE_REDIRECTOR MATCHES "ON") - INSTALL(PROGRAMS utils/redirector.pl DESTINATION bin RENAME rspamd-redirector) -ENDIF(ENABLE_REDIRECTOR MATCHES "ON") - # Start scripts IF(CMAKE_SYSTEM_NAME STREQUAL "FreeBSD" AND NOT BUILD_PORT) INSTALL(PROGRAMS freebsd/rspamd.sh DESTINATION etc/rc.d) diff --git a/config.h.in b/config.h.in index 902fbdee6..4d8af2e6d 100644 --- a/config.h.in +++ b/config.h.in @@ -113,7 +113,7 @@ #cmakedefine HAVE_SETITIMER 1 -#cmakedefine WITHOUT_PERL 1 +#define WITHOUT_PERL 1 #cmakedefine WITH_LUA 1 diff --git a/src/classifiers/bayes.c b/src/classifiers/bayes.c index 9ef2544b0..b4f7826e5 100644 --- a/src/classifiers/bayes.c +++ b/src/classifiers/bayes.c @@ -131,6 +131,7 @@ bayes_classify_callback (gpointer key, gpointer value, gpointer data) if (cur->post_probability < G_MINDOUBLE * 100) { cur->post_probability = G_MINDOUBLE * 100; } + } renorm = 0; for (i = 0; i < cd->statfiles_num; i ++) { @@ -144,6 +145,10 @@ bayes_classify_callback (gpointer key, gpointer value, gpointer data) if (cur->post_probability < G_MINDOUBLE * 10) { cur->post_probability = G_MINDOUBLE * 100; } + if (cd->ctx->debug) { + msg_info ("token: %s, statfile: %s, probability: %.4f, post_probability: %.4f", + node->extra, cur->st->symbol, cur->value, cur->post_probability); + } } return FALSE; @@ -156,7 +161,7 @@ bayes_init (memory_pool_t *pool, struct classifier_config *cfg) ctx->pool = pool; ctx->cfg = cfg; - + ctx->debug = FALSE; return ctx; } diff --git a/src/classifiers/classifiers.h b/src/classifiers/classifiers.h index 0e6df173a..601db0205 100644 --- a/src/classifiers/classifiers.h +++ b/src/classifiers/classifiers.h @@ -15,6 +15,7 @@ struct worker_task; struct classifier_ctx { memory_pool_t *pool; GHashTable *results; + gboolean debug; struct classifier_config *cfg; }; diff --git a/src/controller.c b/src/controller.c index 9504d3b1f..a06351bb6 100644 --- a/src/controller.c +++ b/src/controller.c @@ -850,7 +850,8 @@ controller_read_socket (f_str_t * in, void *arg) c.begin = part->content->data; c.len = part->content->len; - if (!session->learn_classifier->tokenizer->tokenize_func (session->learn_classifier->tokenizer, session->session_pool, &c, &tokens)) { + if (!session->learn_classifier->tokenizer->tokenize_func (session->learn_classifier->tokenizer, + session->session_pool, &c, &tokens, FALSE)) { i = rspamd_snprintf (out_buf, sizeof (out_buf), "weights failed, tokenizer error" CRLF END); free_task (task, FALSE); if (!rspamd_dispatcher_write (session->dispatcher, out_buf, i, FALSE, FALSE)) { diff --git a/src/filter.c b/src/filter.c index df8e1a9e0..2f8b27060 100644 --- a/src/filter.c +++ b/src/filter.c @@ -36,9 +36,6 @@ #include "classifiers/classifiers.h" #include "tokenizers/tokenizers.h" -#ifndef WITHOUT_PERL -# include "perl.h" -#endif #ifdef WITH_LUA # include "lua/lua_common.h" #endif @@ -615,7 +612,7 @@ classifiers_callback (gpointer value, void *arg) c.len = strlen (cur->data); if (c.len > 0) { c.begin = cur->data; - if (!cl->tokenizer->tokenize_func (cl->tokenizer, task->task_pool, &c, &tokens)) { + if (!cl->tokenizer->tokenize_func (cl->tokenizer, task->task_pool, &c, &tokens, FALSE)) { msg_info ("cannot tokenize input"); return; } @@ -630,7 +627,7 @@ classifiers_callback (gpointer value, void *arg) c.begin = text_part->content->data; c.len = text_part->content->len; /* Tree would be freed at task pool freeing */ - if (!cl->tokenizer->tokenize_func (cl->tokenizer, task->task_pool, &c, &tokens)) { + if (!cl->tokenizer->tokenize_func (cl->tokenizer, task->task_pool, &c, &tokens, FALSE)) { msg_info ("cannot tokenize input"); return; } @@ -857,7 +854,7 @@ learn_task (const gchar *statfile, struct worker_task *task, GError **err) /* Get tokens */ if (!cl->tokenizer->tokenize_func ( cl->tokenizer, task->task_pool, - &c, &tokens)) { + &c, &tokens, FALSE)) { g_set_error (err, filter_error_quark(), 2, "Cannot tokenize message"); return FALSE; } diff --git a/src/tokenizers/osb.c b/src/tokenizers/osb.c index ae59cf8ea..41bcce737 100644 --- a/src/tokenizers/osb.c +++ b/src/tokenizers/osb.c @@ -35,7 +35,7 @@ extern const int primes[]; int -osb_tokenize_text (struct tokenizer *tokenizer, memory_pool_t * pool, f_str_t * input, GTree ** tree) +osb_tokenize_text (struct tokenizer *tokenizer, memory_pool_t * pool, f_str_t * input, GTree ** tree, gboolean save_token) { token_node_t *new = NULL; f_str_t token = { NULL, 0, 0 }, *res; @@ -69,6 +69,9 @@ osb_tokenize_text (struct tokenizer *tokenizer, memory_pool_t * pool, f_str_t * new = memory_pool_alloc0 (pool, sizeof (token_node_t)); new->h1 = h1; new->h2 = h2; + if (save_token) { + new->extra = (uintptr_t)memory_pool_fstrdup (pool, &token); + } if (g_tree_lookup (*tree, new) == NULL) { g_tree_insert (*tree, new, new); diff --git a/src/tokenizers/tokenizers.c b/src/tokenizers/tokenizers.c index b7318bdfc..5af3fe6d5 100644 --- a/src/tokenizers/tokenizers.c +++ b/src/tokenizers/tokenizers.c @@ -239,13 +239,13 @@ tokenize_subject (struct worker_task *task, GTree ** tree) new = memory_pool_alloc (task->task_pool, sizeof (token_node_t)); subject.begin = task->subject; subject.len = strlen (task->subject); - osb_tokenizer->tokenize_func (osb_tokenizer, task->task_pool, &subject, tree); + osb_tokenizer->tokenize_func (osb_tokenizer, task->task_pool, &subject, tree, FALSE); } if ((sub = g_mime_message_get_subject (task->message)) != NULL) { new = memory_pool_alloc (task->task_pool, sizeof (token_node_t)); subject.begin = (gchar *)sub; subject.len = strlen (sub); - osb_tokenizer->tokenize_func (osb_tokenizer, task->task_pool, &subject, tree); + osb_tokenizer->tokenize_func (osb_tokenizer, task->task_pool, &subject, tree, FALSE); } } diff --git a/src/tokenizers/tokenizers.h b/src/tokenizers/tokenizers.h index 59a2684d0..741753328 100644 --- a/src/tokenizers/tokenizers.h +++ b/src/tokenizers/tokenizers.h @@ -24,7 +24,7 @@ typedef struct token_node_s { /* Common tokenizer structure */ struct tokenizer { char *name; - int (*tokenize_func)(struct tokenizer *tokenizer, memory_pool_t *pool, f_str_t *input, GTree **cur); + int (*tokenize_func)(struct tokenizer *tokenizer, memory_pool_t *pool, f_str_t *input, GTree **cur, gboolean save_token); f_str_t* (*get_next_word)(f_str_t *buf, f_str_t *token); }; @@ -35,7 +35,7 @@ struct tokenizer* get_tokenizer (char *name); /* Get next word from specified f_str_t buf */ f_str_t *get_next_word (f_str_t *buf, f_str_t *token); /* OSB tokenize function */ -int osb_tokenize_text (struct tokenizer *tokenizer, memory_pool_t *pool, f_str_t *input, GTree **cur); +int osb_tokenize_text (struct tokenizer *tokenizer, memory_pool_t *pool, f_str_t *input, GTree **cur, gboolean save_token); /* Common tokenizer for headers */ int tokenize_headers (memory_pool_t *pool, struct worker_task *task, GTree **cur); /* Make tokens for a subject */ diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt new file mode 100644 index 000000000..4255e7bf0 --- /dev/null +++ b/test/CMakeLists.txt @@ -0,0 +1,41 @@ +SET(TESTSRC rspamd_expression_test.c + rspamd_memcached_test.c + rspamd_mem_pool_test.c + rspamd_statfile_test.c + rspamd_fuzzy_test.c + rspamd_test_suite.c + rspamd_url_test.c + rspamd_dns_test.c) + +SET(TESTDEPENDS ../src/mem_pool.c + ../src/hash.c + ../src/url.c + ../src/trie.c + ../src/util.c + ../src/radix.c + ../src/fuzzy.c + ../src/map.c + ../src/logger.c + ../src/memcached.c + ../src/message.c + ../src/html.c + ../src/expressions.c + ../src/statfile.c + ../src/events.c + ../src/upstream.c + ../src/dns.c) + +ADD_EXECUTABLE(rspamd-test EXCLUDE_FROM_ALL ${TESTDEPENDS} ${CONTRIBSRC} ${TESTSRC}) +SET_TARGET_PROPERTIES(rspamd-test PROPERTIES LINKER_LANGUAGE C) +SET_TARGET_PROPERTIES(rspamd-test PROPERTIES COMPILE_FLAGS "-DRSPAMD_TEST") +TARGET_LINK_LIBRARIES(rspamd-test event) +TARGET_LINK_LIBRARIES(rspamd-test ${GLIB2_LIBRARIES}) +TARGET_LINK_LIBRARIES(rspamd-test ${CMAKE_REQUIRED_LIBRARIES}) +IF(GMIME2_FOUND) + TARGET_LINK_LIBRARIES(rspamd-test ${GMIME2_LIBRARIES}) +ELSE(GMIME2_FOUND) + TARGET_LINK_LIBRARIES(rspamd-test ${GMIME24_LIBRARIES}) +ENDIF(GMIME2_FOUND) +IF(ENABLE_STATIC MATCHES "ON") + TARGET_LINK_LIBRARIES(rspamd-test ${PCRE_LIBRARIES}) +ENDIF(ENABLE_STATIC MATCHES "ON") \ No newline at end of file diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt new file mode 100644 index 000000000..8b9e5dca6 --- /dev/null +++ b/utils/CMakeLists.txt @@ -0,0 +1,7 @@ +ADD_SUBDIRECTORY(statshow) + +# Redirector +IF(ENABLE_REDIRECTOR MATCHES "ON") + CONFIGURE_FILE(redirector.pl.in redirector.pl @ONLY) + INSTALL(PROGRAMS redirector.pl DESTINATION bin RENAME rspamd-redirector) +ENDIF(ENABLE_REDIRECTOR MATCHES "ON") \ No newline at end of file diff --git a/utils/expression_parser.c b/utils/expression_parser.c deleted file mode 100644 index 38b52934f..000000000 --- a/utils/expression_parser.c +++ /dev/null @@ -1,54 +0,0 @@ -#include "../src/config.h" -#include "../src/main.h" -#include "../src/cfg_file.h" -#include "../src/expressions.h" - -rspamd_hash_t *counters = NULL; - -int -main (int argc, char **argv) -{ - memory_pool_t *pool; - struct expression *cur; - char *line, *outstr; - int r, s; - char buf[BUFSIZ]; - - pool = memory_pool_new (memory_pool_get_size ()); - - line = fgets (buf, sizeof (buf), stdin); - while (line) { - s = strlen (line); - if (buf[s - 1] == '\n') { - buf[s - 1] = '\0'; - } - if (buf[s - 2] == '\r') { - buf[s - 2] = '\0'; - } - - r = 0; - cur = parse_expression (pool, line); - s = strlen (line) * 4; - outstr = memory_pool_alloc (pool, s); - while (cur) { - if (cur->type == EXPR_REGEXP) { - r += snprintf (outstr + r, s - r, "OP:%s ", (char *)cur->content.operand); - } else if (cur->type == EXPR_STR) { - r += snprintf (outstr + r, s - r, "S:%s ", (char *)cur->content.operand); - - } else if (cur->type == EXPR_FUNCTION) { - r += snprintf (outstr + r, s - r, "F:%s ", ((struct expression_function *)cur->content.operand)->name); - } - else { - r += snprintf (outstr + r, s - r, "O:%c ", cur->content.operation); - } - cur = cur->next; - } - printf ("Parsed expression: '%s' -> '%s'\n", line, outstr); - line = fgets (buf, sizeof (buf), stdin); - } - - memory_pool_delete (pool); - - return 0; -} diff --git a/utils/statshow/CMakeLists.txt b/utils/statshow/CMakeLists.txt new file mode 100644 index 000000000..12de6038e --- /dev/null +++ b/utils/statshow/CMakeLists.txt @@ -0,0 +1,22 @@ +SET(STATSHOWSRC statshow.c) + +ADD_EXECUTABLE(statshow EXCLUDE_FROM_ALL ${CLASSIFIERSSRC} ${TOKENIZERSSRC} ${STATSHOWSRC}) +SET_TARGET_PROPERTIES(statshow PROPERTIES LINKER_LANGUAGE C) +SET_TARGET_PROPERTIES(statshow PROPERTIES COMPILE_FLAGS "-I../../src") +TARGET_LINK_LIBRARIES(statshow event) +TARGET_LINK_LIBRARIES(statshow ${GLIB2_LIBRARIES}) +TARGET_LINK_LIBRARIES(statshow ${CMAKE_REQUIRED_LIBRARIES}) +IF(GMIME2_FOUND) + TARGET_LINK_LIBRARIES(statshow ${GMIME2_LIBRARIES}) +ELSE(GMIME2_FOUND) + TARGET_LINK_LIBRARIES(statshow ${GMIME24_LIBRARIES}) +ENDIF(GMIME2_FOUND) +TARGET_LINK_LIBRARIES(statshow rspamd_lua) +IF(ENABLE_LUAJIT MATCHES "ON") + TARGET_LINK_LIBRARIES(statshow "${LUAJIT_LIBRARY}") +ELSE(ENABLE_LUAJIT MATCHES "ON") + TARGET_LINK_LIBRARIES(statshow "${LUA_LIBRARY}") +ENDIF(ENABLE_LUAJIT MATCHES "ON") +IF(ENABLE_STATIC MATCHES "ON") + TARGET_LINK_LIBRARIES(statshow ${PCRE_LIBRARIES}) +ENDIF(ENABLE_STATIC MATCHES "ON") \ No newline at end of file diff --git a/utils/statshow/statshow.c b/utils/statshow/statshow.c new file mode 100644 index 000000000..7dc040a37 --- /dev/null +++ b/utils/statshow/statshow.c @@ -0,0 +1,262 @@ +/* Copyright (c) 2010, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Rambler BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "main.h" +#include "cfg_file.h" +#include "util.h" +#include "map.h" +#include "cfg_xml.h" +#include "classifiers/classifiers.h" +#include "tokenizers/tokenizers.h" +#include "message.h" + + +static gchar *cfg_name; + +static GOptionEntry entries[] = +{ + { "config", 'c', 0, G_OPTION_ARG_STRING, &cfg_name, "Specify config file", NULL }, + { NULL, 0, 0, G_OPTION_ARG_NONE, NULL, NULL, NULL } +}; + +static void +read_cmd_line (gint *argc, gchar ***argv, struct config_file *cfg) +{ + GError *error = NULL; + GOptionContext *context; + + context = g_option_context_new ("- run statshow utility"); + g_option_context_set_summary (context, "Summary:\n Statshow utility version " RVERSION "\n Release id: " RID); + g_option_context_add_main_entries (context, entries, NULL); + if (!g_option_context_parse (context, argc, argv, &error)) { + fprintf (stderr, "option parsing failed: %s\n", error->message); + exit (1); + } + cfg->cfg_name = cfg_name; +} + +static gboolean +load_rspamd_config (struct config_file *cfg) +{ + if (! read_xml_config (cfg, cfg->cfg_name)) { + return FALSE; + } + + /* Do post-load actions */ + post_load_config (cfg); + + return TRUE; +} + +static void +classifiers_callback (gpointer value, void *arg) +{ + struct worker_task *task = arg; + struct classifier_config *cl = value; + struct classifier_ctx *ctx; + struct mime_text_part *text_part; + GTree *tokens = NULL; + GList *cur; + f_str_t c; + gchar *header = NULL; + + ctx = cl->classifier->init_func (task->task_pool, cl); + ctx->debug = TRUE; + + if ((tokens = g_hash_table_lookup (task->tokens, cl->tokenizer)) == NULL) { + while (cur != NULL) { + if (header) { + c.len = strlen (cur->data); + if (c.len > 0) { + c.begin = cur->data; + if (!cl->tokenizer->tokenize_func (cl->tokenizer, task->task_pool, &c, &tokens, FALSE)) { + msg_info ("cannot tokenize input"); + return; + } + } + } + else { + text_part = (struct mime_text_part *)cur->data; + if (text_part->is_empty) { + cur = g_list_next (cur); + continue; + } + c.begin = text_part->content->data; + c.len = text_part->content->len; + /* Tree would be freed at task pool freeing */ + if (!cl->tokenizer->tokenize_func (cl->tokenizer, task->task_pool, &c, &tokens, FALSE)) { + msg_info ("cannot tokenize input"); + return; + } + } + cur = g_list_next (cur); + } + g_hash_table_insert (task->tokens, cl->tokenizer, tokens); + } + + if (tokens == NULL) { + return; + } + + /* Take care of subject */ + tokenize_subject (task, &tokens); + cl->classifier->classify_func (ctx, task->worker->srv->statfile_pool, tokens, task); +} + +static void +process_buffer (gchar *buf, gsize len, struct rspamd_main *rspamd) +{ + struct worker_task *task; + struct rspamd_worker *fake_worker; + + + /* Make fake worker for task */ + fake_worker = g_malloc (sizeof (struct rspamd_worker)); + fake_worker->srv = rspamd; + + /* Make task */ + task = construct_task (fake_worker); + /* Copy message */ + task->msg = memory_pool_alloc (task->task_pool, sizeof (f_str_t)); + task->msg->begin = buf; + task->msg->len = len; + + /* Process message */ + if (process_message (task) != 0) { + return; + } + + g_list_foreach (task->cfg->classifiers, classifiers_callback, task); + + g_free (fake_worker); +} + +static void +process_stdin (struct rspamd_main *rspamd) +{ + gchar *in_buf; + gint r = 0, len; + + /* Allocate input buffer */ + len = BUFSIZ; + in_buf = g_malloc (len); + + /* Read stdin */ + while (!feof (stdin)) { + r += fread (in_buf + r, 1, len - r, stdin); + if (len - r < len / 2) { + /* Grow buffer */ + len *= 2; + in_buf = g_realloc (in_buf, len); + } + } + + process_buffer (in_buf, r, rspamd); + g_free (in_buf); +} + +static void +process_file (const gchar *filename, struct rspamd_main *rspamd) +{ + struct stat st; + char *in_buf; + gsize r = 0; + gint fd; + + if (stat (filename, &st) == -1) { + msg_err ("stat failed: %s", strerror (errno)); + return; + } + + if ((fd = open (filename, O_RDONLY)) == -1) { + msg_err ("stat failed: %s", strerror (errno)); + return; + } + + in_buf = g_malloc (st.st_size); + + while (r < st.st_size) { + r += read (fd, in_buf + r, r - st.st_size); + } + + process_buffer (in_buf, r, rspamd); + g_free (in_buf); +} + +gint +main (gint argc, gchar **argv, gchar **env) +{ + struct config_file *cfg; + struct rspamd_main *rspamd; + gchar **arg; + + rspamd = (struct rspamd_main *)g_malloc (sizeof (struct rspamd_main)); + bzero (rspamd, sizeof (struct rspamd_main)); + rspamd->server_pool = memory_pool_new (memory_pool_get_size ()); + rspamd->cfg = (struct config_file *)g_malloc (sizeof (struct config_file)); + if (!rspamd || !rspamd->cfg) { + fprintf (stderr, "Cannot allocate memory\n"); + exit (-errno); + } + + bzero (rspamd->cfg, sizeof (struct config_file)); + rspamd->cfg->cfg_pool = memory_pool_new (memory_pool_get_size ()); + init_defaults (rspamd->cfg); + + read_cmd_line (&argc, &argv, rspamd->cfg); + if (rspamd->cfg->cfg_name == NULL) { + rspamd->cfg->cfg_name = FIXED_CONFIG_FILE; + } + + /* First set logger to console logger */ + rspamd_set_logger (RSPAMD_LOG_CONSOLE, TYPE_MAIN, rspamd->cfg); + (void)open_log (); + g_log_set_default_handler (rspamd_glib_log_function, rspamd->cfg); + + /* Init classifiers options */ + register_classifier_opt ("bayes", "min_tokens"); + register_classifier_opt ("winnow", "min_tokens"); + register_classifier_opt ("winnow", "learn_threshold"); + /* Load config */ + if (! load_rspamd_config (rspamd->cfg)) { + exit (EXIT_FAILURE); + } + + /* Init statfile pool */ + rspamd->statfile_pool = statfile_pool_new (rspamd->server_pool, rspamd->cfg->max_statfile_size); + + /* Check argc */ + if (argc > 1) { + arg = argv[1]; + while (*arg) { + process_file (*arg, rspamd); + arg ++; + } + } + else { + process_stdin (rspamd); + } + + return 0; +} diff --git a/utils/url_extracter.c b/utils/url_extracter.c deleted file mode 100644 index 6130c51ef..000000000 --- a/utils/url_extracter.c +++ /dev/null @@ -1,73 +0,0 @@ -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "../src/config.h" -#if !defined(HAVE_OWN_QUEUE_H) && defined(HAVE_SYS_QUEUE_H) -#include -#endif -#ifdef HAVE_OWN_QUEUE_H -#include "../src/queue.h" -#endif - -#include "../src/main.h" -#include "../src/cfg_file.h" -#include "../src/url.h" -#include "../src/util.h" -#include "../src/message.h" - -rspamd_hash_t *counters = NULL; - -int -main (int argc, char **argv) -{ - struct worker_task task; - struct uri *url; - char *buf = NULL; - size_t pos = 0, size = 65535; - GList *cur; - - g_mem_set_vtable(glib_mem_profiler_table); - g_mime_init (0); - bzero (&task, sizeof (struct worker_task)); - task.task_pool = memory_pool_new (memory_pool_get_size ()); - - /* Preallocate buffer */ - buf = g_malloc (size); - - while (!feof (stdin)) { - *(buf + pos) = getchar (); - pos ++; - if (pos == size) { - size *= 2; - buf = g_realloc (buf, size); - } - } - - task.cfg = memory_pool_alloc0 (task.task_pool, sizeof (struct config_file)); - - task.msg = memory_pool_alloc (task.task_pool, sizeof (f_str_t)); - task.msg->begin = buf; - task.msg->len = pos; - process_message (&task); - - cur = task.urls; - while (cur) { - url = cur->data; - printf ("%s\n", struri (url)); - cur = g_list_next (cur); - } - - return 0; -} -- 2.39.5