]> source.dussan.org Git - rspamd.git/commitdiff
* Major cleanup of cmake build system
authorVsevolod Stakhov <vsevolod@rambler-co.ru>
Fri, 6 May 2011 15:18:40 +0000 (19:18 +0400)
committerVsevolod Stakhov <vsevolod@rambler-co.ru>
Fri, 6 May 2011 15:18:40 +0000 (19:18 +0400)
* Add initial version of statshow utility for statfiles debugging
* Add debugging for statistics
* Remove unused utilities

15 files changed:
CMakeLists.txt
config.h.in
src/classifiers/bayes.c
src/classifiers/classifiers.h
src/controller.c
src/filter.c
src/tokenizers/osb.c
src/tokenizers/tokenizers.c
src/tokenizers/tokenizers.h
test/CMakeLists.txt [new file with mode: 0644]
utils/CMakeLists.txt [new file with mode: 0644]
utils/expression_parser.c [deleted file]
utils/statshow/CMakeLists.txt [new file with mode: 0644]
utils/statshow/statshow.c [new file with mode: 0644]
utils/url_extracter.c [deleted file]

index 632b0e294c3a9b645f63291dd6fe7804a9fd8e1e..47770487a408abe6cbac36738c2dab78cd4cc7c1 100644 (file)
@@ -7,7 +7,7 @@ PROJECT(rspamd C)
 
 SET(RSPAMD_VERSION_MAJOR 0)
 SET(RSPAMD_VERSION_MINOR 3)
-SET(RSPAMD_VERSION_PATCH 12)
+SET(RSPAMD_VERSION_PATCH 13)
 
 
 SET(RSPAMD_VERSION         "${RSPAMD_VERSION_MAJOR}.${RSPAMD_VERSION_MINOR}.${RSPAMD_VERSION_PATCH}")
@@ -19,7 +19,6 @@ CMAKE_MINIMUM_REQUIRED(VERSION 2.6.0 FATAL_ERROR)
 
 OPTION(DEBUG_MODE          "Enable debug output [default: ON]"                  ON)
 OPTION(ENABLE_OPTIMIZATION "Enable optimization [default: OFF]"                 OFF)
-OPTION(ENABLE_PERL         "Enable perl client API [default: OFF]"              OFF)
 OPTION(SKIP_RELINK_RPATH   "Skip relinking and full RPATH for the install tree" OFF)
 OPTION(ENABLE_REDIRECTOR   "Enable redirector install [default: OFF]"           OFF)
 OPTION(ENABLE_PROFILING    "Enable profiling [default: OFF]"                    OFF)
@@ -159,32 +158,6 @@ IF(CMAKE_SYSTEM_NAME STREQUAL "SunOS")
        MESSAGE(STATUS "Configuring for Solaris")
 ENDIF(CMAKE_SYSTEM_NAME STREQUAL "SunOS")
 
-IF(ENABLE_PERL MATCHES "ON")
-
-       IF(NOT PERL_EXECUTABLE)
-               MESSAGE(FATAL_ERROR "Error: Perl is not found but is required")
-       ENDIF(NOT PERL_EXECUTABLE)
-
-       # Find perl libraries and cflags
-       EXECUTE_PROCESS(COMMAND ${PERL_EXECUTABLE} -MExtUtils::Embed -e ccopts OUTPUT_VARIABLE PERL_CFLAGS)
-       EXECUTE_PROCESS(COMMAND ${PERL_EXECUTABLE} -MExtUtils::Embed -e ldopts OUTPUT_VARIABLE PERL_LDFLAGS)
-       STRING(REGEX REPLACE "[\r\n]" " " PERL_CFLAGS ${PERL_CFLAGS})
-       STRING(REGEX REPLACE " +$" "" PERL_CFLAGS ${PERL_CFLAGS})
-       STRING(REGEX REPLACE "[\r\n]" " " PERL_LDFLAGS ${PERL_LDFLAGS})
-       STRING(REGEX REPLACE " +$" "" PERL_LDFLAGS ${PERL_LDFLAGS})
-       # Handle DynaLoader
-       STRING(REGEX MATCH "/[^ ]*/DynaLoader.a" PERL_DYNALOADER ${PERL_LDFLAGS})
-       STRING(REGEX REPLACE "/[^ ]*/DynaLoader.a " "" PERL_LDFLAGS ${PERL_LDFLAGS})
-
-       IF(PERL_DYNALOADER)
-               EXECUTE_PROCESS(COMMAND ${CMAKE_COMMAND} -E copy ${PERL_DYNALOADER} ${rspamd_BINARY_DIR}/compat/libdynaloader.so)
-               LINK_DIRECTORIES(${rspamd_BINARY_DIR}/compat/)
-       ENDIF(PERL_DYNALOADER)
-ELSE(ENABLE_PERL MATCHES "ON")
-       SET(WITHOUT_PERL 1)
-ENDIF(ENABLE_PERL MATCHES "ON")
-
-
 INCLUDE(FindLua51)
 # Check for luajit
 IF(ENABLE_LUAJIT MATCHES "ON")
@@ -525,11 +498,6 @@ ENDIF(SUPPORT_STD_FLAG)
 
 SET(CMAKE_C_FLAGS "${CMAKE_C_OPT_FLAGS} ${CMAKE_C_FLAGS} ${CMAKE_C_WARN_FLAGS}")
 
-
-IF(ENABLE_REDIRECTOR MATCHES "ON")
-       CONFIGURE_FILE(utils/redirector.pl.in utils/redirector.pl @ONLY)
-ENDIF(ENABLE_REDIRECTOR MATCHES "ON")
-
 IF(DEBUG_MODE MATCHES "ON")
        SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g")
 ENDIF(DEBUG_MODE MATCHES "ON")
@@ -620,8 +588,8 @@ ADD_SUBDIRECTORY(src/client)
 
 ADD_SUBDIRECTORY(src/json)
 ADD_SUBDIRECTORY(src/cdb)
-# ADD_SUBDIRECTORY(src/evdns)
-# ADD_SUBDIRECTORY(src/plugins/custom)
+ADD_SUBDIRECTORY(utils)
+ADD_SUBDIRECTORY(test)
 
 SET(TOKENIZERSSRC  src/tokenizers/tokenizers.c
                                src/tokenizers/osb.c)
@@ -636,47 +604,6 @@ SET(PLUGINSSRC     src/plugins/surbl.c
                                src/plugins/fuzzy_check.c
                                src/plugins/spf.c)
 
-SET(TESTSRC            test/rspamd_expression_test.c
-                               test/rspamd_memcached_test.c
-                               test/rspamd_mem_pool_test.c
-                               test/rspamd_statfile_test.c
-                               test/rspamd_fuzzy_test.c
-                               test/rspamd_test_suite.c
-                               test/rspamd_url_test.c
-                               test/rspamd_dns_test.c)
-
-SET(TESTDEPENDS        src/mem_pool.c
-                               src/hash.c
-                               src/url.c
-                               src/trie.c
-                               src/util.c
-                               src/radix.c
-                               src/fuzzy.c
-                               src/map.c
-                               src/logger.c
-                               src/memcached.c
-                               src/message.c
-                               src/html.c
-                               src/expressions.c
-                               src/statfile.c
-                               src/events.c
-                               src/upstream.c
-                               src/dns.c)
-
-SET(UTILSSRC   utils/url_extracter.c)
-SET(EXPRSRC    utils/expression_parser.c)
-
-SET(UTILSDEPENDS src/mem_pool.c
-                               src/hash.c
-                               src/url.c
-                               src/trie.c
-                               src/fuzzy.c
-                               src/expressions.c
-                               src/message.c
-                               src/html.c
-                               src/util.c
-                               src/radix.c)
-
 LIST(LENGTH PLUGINSSRC RSPAMD_MODULES_NUM)
 
 ############################ TARGETS SECTION ###############################
@@ -685,20 +612,6 @@ ADD_CUSTOM_COMMAND(OUTPUT src/modules.c
                                        COMMAND ../utils/gen-modules.sh ${PLUGINSSRC}
                                        WORKING_DIRECTORY src)
 
-IF(ENABLE_PERL MATCHES "ON")
-       ADD_CUSTOM_COMMAND(OUTPUT ${CMAKE_SOURCE_DIR}/perl/Makefile 
-                                               DEPENDS ${CMAKE_SOURCE_DIR}/perl/Makefile.PL
-                                               COMMAND ${PERL_EXECUTABLE} ./Makefile.PL DESTDIR=${DESTDIR} PREFIX=${PREFIX} INSTALLMAN3DIR=${MAN_PREFIX}/man3
-                                               WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/perl)
-       CONFIGURE_FILE(perl/Makefile.PL.in perl/Makefile.PL)
-       ADD_CUSTOM_TARGET(perlmodule
-                                               COMMAND ${CMAKE_MAKE_PROGRAM} DESTDIR=${DESTDIR}
-                                               DEPENDS ${CMAKE_SOURCE_DIR}/perl/Makefile
-                                               WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/perl
-                                               VERBATIM)
-       
-ENDIF(ENABLE_PERL MATCHES "ON")
-
 CONFIGURE_FILE(config.h.in src/config.h)
 CONFIGURE_FILE(contrib/exim/local_scan.c.in contrib/exim/local_scan_rspamd.c @ONLY)
 CONFIGURE_FILE(rspamd.xml.sample conf/rspamd.xml.sample @ONLY)
@@ -710,19 +623,6 @@ SET_TARGET_PROPERTIES(rspamd PROPERTIES LINKER_LANGUAGE C)
 SET_TARGET_PROPERTIES(rspamd PROPERTIES COMPILE_FLAGS "-DRSPAMD_MAIN")
 SET_TARGET_PROPERTIES(rspamd PROPERTIES VERSION ${RSPAMD_VERSION})
 
-IF(ENABLE_PERL MATCHES "ON")
-
-       SET_TARGET_PROPERTIES(rspamd PROPERTIES COMPILE_FLAGS ${PERL_CFLAGS}
-                                                                                       LINK_FLAGS ${PERL_LDFLAGS})
-       IF(PERL_DYNALOADER)
-               TARGET_LINK_LIBRARIES(rspamd dynaloader)
-       ENDIF(PERL_DYNALOADER)
-
-ENDIF(ENABLE_PERL MATCHES "ON")
-IF(ENABLE_PERL MATCHES "ON")
-       ADD_DEPENDENCIES(rspamd perlmodule)
-ENDIF(ENABLE_PERL MATCHES "ON")
-
 TARGET_LINK_LIBRARIES(rspamd rspamd_lua)
 IF(ENABLE_LUAJIT MATCHES "ON")
     TARGET_LINK_LIBRARIES(rspamd "${LUAJIT_LIBRARY}")
@@ -752,21 +652,6 @@ IF(ENABLE_GPERF_TOOLS MATCHES "ON")
        TARGET_LINK_LIBRARIES(rspamd profiler)
 ENDIF(ENABLE_GPERF_TOOLS MATCHES "ON")
 
-ADD_EXECUTABLE(test/rspamd-test ${TESTDEPENDS} ${CONTRIBSRC} ${TESTSRC})
-SET_TARGET_PROPERTIES(test/rspamd-test PROPERTIES LINKER_LANGUAGE C)
-SET_TARGET_PROPERTIES(test/rspamd-test PROPERTIES COMPILE_FLAGS "-DRSPAMD_TEST")
-TARGET_LINK_LIBRARIES(test/rspamd-test event)
-TARGET_LINK_LIBRARIES(test/rspamd-test ${GLIB2_LIBRARIES})
-TARGET_LINK_LIBRARIES(test/rspamd-test ${CMAKE_REQUIRED_LIBRARIES})
-IF(GMIME2_FOUND)
-       TARGET_LINK_LIBRARIES(test/rspamd-test ${GMIME2_LIBRARIES})
-ELSE(GMIME2_FOUND)
-       TARGET_LINK_LIBRARIES(test/rspamd-test ${GMIME24_LIBRARIES})
-ENDIF(GMIME2_FOUND)
-IF(ENABLE_STATIC MATCHES "ON")
-       TARGET_LINK_LIBRARIES(test/rspamd-test ${PCRE_LIBRARIES})
-ENDIF(ENABLE_STATIC MATCHES "ON")
-
 
 ##################### INSTALLATION ##########################################
 
@@ -823,16 +708,6 @@ ENDFOREACH(LUA_CONF)
 INSTALL(FILES "doc/rspamd.8" DESTINATION man/man8)
 INSTALL(FILES "doc/rspamc.1" DESTINATION man/man1)
 
-# Perl lib
-IF(ENABLE_PERL MATCHES "ON")
-       INSTALL(CODE "EXECUTE_PROCESS(COMMAND make install WORKING_DIRECTORY perl)")
-ENDIF(ENABLE_PERL MATCHES "ON")
-
-# Redirector
-IF(ENABLE_REDIRECTOR MATCHES "ON")
-       INSTALL(PROGRAMS utils/redirector.pl DESTINATION bin RENAME rspamd-redirector)
-ENDIF(ENABLE_REDIRECTOR MATCHES "ON")
-
 # Start scripts
 IF(CMAKE_SYSTEM_NAME STREQUAL "FreeBSD" AND NOT BUILD_PORT)
        INSTALL(PROGRAMS freebsd/rspamd.sh DESTINATION etc/rc.d)
index 902fbdee62640f6c0c1f2949f59bf3cdbcc44e8a..4d8af2e6ddf867312e96c3c60d9cdbc05d690e17 100644 (file)
 
 #cmakedefine HAVE_SETITIMER      1
 
-#cmakedefine WITHOUT_PERL        1
+#define      WITHOUT_PERL        1
 
 #cmakedefine WITH_LUA            1
 
index 9ef2544b0df959197debff2df4f52cef952bd095..b4f7826e5b2870c4d2ebd6813dacb94be554564a 100644 (file)
@@ -131,6 +131,7 @@ bayes_classify_callback (gpointer key, gpointer value, gpointer data)
                if (cur->post_probability < G_MINDOUBLE * 100) {
                        cur->post_probability = G_MINDOUBLE * 100;
                }
+
        }
        renorm = 0;
        for (i = 0; i < cd->statfiles_num; i ++) {
@@ -144,6 +145,10 @@ bayes_classify_callback (gpointer key, gpointer value, gpointer data)
                if (cur->post_probability < G_MINDOUBLE * 10) {
                        cur->post_probability = G_MINDOUBLE * 100;
                }
+               if (cd->ctx->debug) {
+                       msg_info ("token: %s, statfile: %s, probability: %.4f, post_probability: %.4f",
+                                       node->extra, cur->st->symbol, cur->value, cur->post_probability);
+               }
        }
 
        return FALSE;
@@ -156,7 +161,7 @@ bayes_init (memory_pool_t *pool, struct classifier_config *cfg)
 
        ctx->pool = pool;
        ctx->cfg = cfg;
-
+       ctx->debug = FALSE;
 
        return ctx;
 }
index 0e6df173a9843e6b78541d08e9ca2a24b4df9f15..601db0205f853d77d3ae8124247c4dc2395410fe 100644 (file)
@@ -15,6 +15,7 @@ struct worker_task;
 struct classifier_ctx {
        memory_pool_t *pool;
        GHashTable *results;
+       gboolean debug;
        struct classifier_config *cfg;
 };
 
index 9504d3b1fca0b3247be59784da4fe2db70faebb9..a06351bb63962cc100df0580244de91520fde79e 100644 (file)
@@ -850,7 +850,8 @@ controller_read_socket (f_str_t * in, void *arg)
 
                        c.begin = part->content->data;
                        c.len = part->content->len;
-                       if (!session->learn_classifier->tokenizer->tokenize_func (session->learn_classifier->tokenizer, session->session_pool, &c, &tokens)) {
+                       if (!session->learn_classifier->tokenizer->tokenize_func (session->learn_classifier->tokenizer,
+                                       session->session_pool, &c, &tokens, FALSE)) {
                                i = rspamd_snprintf (out_buf, sizeof (out_buf), "weights failed, tokenizer error" CRLF END);
                                free_task (task, FALSE);
                                if (!rspamd_dispatcher_write (session->dispatcher, out_buf, i, FALSE, FALSE)) {
index df8e1a9e05fadd091da12d8809995d13d54df2b6..2f8b27060bfdc6a6d15080398f72bf67427084c7 100644 (file)
@@ -36,9 +36,6 @@
 #include "classifiers/classifiers.h"
 #include "tokenizers/tokenizers.h"
 
-#ifndef WITHOUT_PERL
-#   include "perl.h"
-#endif
 #ifdef WITH_LUA
 #   include "lua/lua_common.h"
 #endif
@@ -615,7 +612,7 @@ classifiers_callback (gpointer value, void *arg)
                                c.len = strlen (cur->data);
                                if (c.len > 0) {
                                        c.begin = cur->data;
-                                       if (!cl->tokenizer->tokenize_func (cl->tokenizer, task->task_pool, &c, &tokens)) {
+                                       if (!cl->tokenizer->tokenize_func (cl->tokenizer, task->task_pool, &c, &tokens, FALSE)) {
                                                msg_info ("cannot tokenize input");
                                                return;
                                        }
@@ -630,7 +627,7 @@ classifiers_callback (gpointer value, void *arg)
                                c.begin = text_part->content->data;
                                c.len = text_part->content->len;
                                /* Tree would be freed at task pool freeing */
-                               if (!cl->tokenizer->tokenize_func (cl->tokenizer, task->task_pool, &c, &tokens)) {
+                               if (!cl->tokenizer->tokenize_func (cl->tokenizer, task->task_pool, &c, &tokens, FALSE)) {
                                        msg_info ("cannot tokenize input");
                                        return;
                                }
@@ -857,7 +854,7 @@ learn_task (const gchar *statfile, struct worker_task *task, GError **err)
                /* Get tokens */
                if (!cl->tokenizer->tokenize_func (
                                cl->tokenizer, task->task_pool,
-                               &c, &tokens)) {
+                               &c, &tokens, FALSE)) {
                        g_set_error (err, filter_error_quark(), 2, "Cannot tokenize message");
                        return FALSE;
                }
index ae59cf8ea70bdeeed1cf7609165cba79a254ac69..41bcce737ffe806cd405a8d4bc0c64fe6979f4a0 100644 (file)
@@ -35,7 +35,7 @@
 extern const int                primes[];
 
 int
-osb_tokenize_text (struct tokenizer *tokenizer, memory_pool_t * pool, f_str_t * input, GTree ** tree)
+osb_tokenize_text (struct tokenizer *tokenizer, memory_pool_t * pool, f_str_t * input, GTree ** tree, gboolean save_token)
 {
        token_node_t                   *new = NULL;
        f_str_t                         token = { NULL, 0, 0 }, *res;
@@ -69,6 +69,9 @@ osb_tokenize_text (struct tokenizer *tokenizer, memory_pool_t * pool, f_str_t *
                        new = memory_pool_alloc0 (pool, sizeof (token_node_t));
                        new->h1 = h1;
                        new->h2 = h2;
+                       if (save_token) {
+                               new->extra = (uintptr_t)memory_pool_fstrdup (pool, &token);
+                       }
 
                        if (g_tree_lookup (*tree, new) == NULL) {
                                g_tree_insert (*tree, new, new);
index b7318bdfcd14071be25b94a0797f8e561bb4c147..5af3fe6d5883046020822d13a6844182eb9784ef 100644 (file)
@@ -239,13 +239,13 @@ tokenize_subject (struct worker_task *task, GTree ** tree)
                new = memory_pool_alloc (task->task_pool, sizeof (token_node_t));
                subject.begin = task->subject;
                subject.len = strlen (task->subject);
-               osb_tokenizer->tokenize_func (osb_tokenizer, task->task_pool, &subject, tree);
+               osb_tokenizer->tokenize_func (osb_tokenizer, task->task_pool, &subject, tree, FALSE);
        }
        if ((sub = g_mime_message_get_subject (task->message)) != NULL) {
                new = memory_pool_alloc (task->task_pool, sizeof (token_node_t));
                subject.begin = (gchar *)sub;
                subject.len = strlen (sub);
-               osb_tokenizer->tokenize_func (osb_tokenizer, task->task_pool, &subject, tree);
+               osb_tokenizer->tokenize_func (osb_tokenizer, task->task_pool, &subject, tree, FALSE);
        }
 }
 
index 59a2684d071d5948438056cd6614698a9f8b96de..7417533289da10017cfabd2ce19fb85d725ccf53 100644 (file)
@@ -24,7 +24,7 @@ typedef struct token_node_s {
 /* Common tokenizer structure */
 struct tokenizer {
        char *name;
-       int (*tokenize_func)(struct tokenizer *tokenizer, memory_pool_t *pool, f_str_t *input, GTree **cur);
+       int (*tokenize_func)(struct tokenizer *tokenizer, memory_pool_t *pool, f_str_t *input, GTree **cur, gboolean save_token);
        f_str_t* (*get_next_word)(f_str_t *buf, f_str_t *token);
 };
 
@@ -35,7 +35,7 @@ struct tokenizer* get_tokenizer (char *name);
 /* Get next word from specified f_str_t buf */
 f_str_t *get_next_word (f_str_t *buf, f_str_t *token);
 /* OSB tokenize function */
-int osb_tokenize_text (struct tokenizer *tokenizer, memory_pool_t *pool, f_str_t *input, GTree **cur);
+int osb_tokenize_text (struct tokenizer *tokenizer, memory_pool_t *pool, f_str_t *input, GTree **cur, gboolean save_token);
 /* Common tokenizer for headers */
 int tokenize_headers (memory_pool_t *pool, struct worker_task *task, GTree **cur);
 /* Make tokens for a subject */
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
new file mode 100644 (file)
index 0000000..4255e7b
--- /dev/null
@@ -0,0 +1,41 @@
+SET(TESTSRC            rspamd_expression_test.c
+                               rspamd_memcached_test.c
+                               rspamd_mem_pool_test.c
+                               rspamd_statfile_test.c
+                               rspamd_fuzzy_test.c
+                               rspamd_test_suite.c
+                               rspamd_url_test.c
+                               rspamd_dns_test.c)
+
+SET(TESTDEPENDS        ../src/mem_pool.c
+                               ../src/hash.c
+                               ../src/url.c
+                               ../src/trie.c
+                               ../src/util.c
+                               ../src/radix.c
+                               ../src/fuzzy.c
+                               ../src/map.c
+                               ../src/logger.c
+                               ../src/memcached.c
+                               ../src/message.c
+                               ../src/html.c
+                               ../src/expressions.c
+                               ../src/statfile.c
+                               ../src/events.c
+                               ../src/upstream.c
+                               ../src/dns.c)
+
+ADD_EXECUTABLE(rspamd-test EXCLUDE_FROM_ALL ${TESTDEPENDS} ${CONTRIBSRC} ${TESTSRC})
+SET_TARGET_PROPERTIES(rspamd-test PROPERTIES LINKER_LANGUAGE C)
+SET_TARGET_PROPERTIES(rspamd-test PROPERTIES COMPILE_FLAGS "-DRSPAMD_TEST")
+TARGET_LINK_LIBRARIES(rspamd-test event)
+TARGET_LINK_LIBRARIES(rspamd-test ${GLIB2_LIBRARIES})
+TARGET_LINK_LIBRARIES(rspamd-test ${CMAKE_REQUIRED_LIBRARIES})
+IF(GMIME2_FOUND)
+       TARGET_LINK_LIBRARIES(rspamd-test ${GMIME2_LIBRARIES})
+ELSE(GMIME2_FOUND)
+       TARGET_LINK_LIBRARIES(rspamd-test ${GMIME24_LIBRARIES})
+ENDIF(GMIME2_FOUND)
+IF(ENABLE_STATIC MATCHES "ON")
+       TARGET_LINK_LIBRARIES(rspamd-test ${PCRE_LIBRARIES})
+ENDIF(ENABLE_STATIC MATCHES "ON")
\ No newline at end of file
diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt
new file mode 100644 (file)
index 0000000..8b9e5dc
--- /dev/null
@@ -0,0 +1,7 @@
+ADD_SUBDIRECTORY(statshow)
+
+# Redirector
+IF(ENABLE_REDIRECTOR MATCHES "ON")
+    CONFIGURE_FILE(redirector.pl.in redirector.pl @ONLY)
+       INSTALL(PROGRAMS redirector.pl DESTINATION bin RENAME rspamd-redirector)
+ENDIF(ENABLE_REDIRECTOR MATCHES "ON")
\ No newline at end of file
diff --git a/utils/expression_parser.c b/utils/expression_parser.c
deleted file mode 100644 (file)
index 38b5293..0000000
+++ /dev/null
@@ -1,54 +0,0 @@
-#include "../src/config.h"
-#include "../src/main.h"
-#include "../src/cfg_file.h"
-#include "../src/expressions.h"
-
-rspamd_hash_t *counters = NULL;
-
-int 
-main (int argc, char **argv)
-{
-       memory_pool_t *pool;
-       struct expression *cur;
-       char *line, *outstr;
-       int r, s;
-       char buf[BUFSIZ];
-
-       pool = memory_pool_new (memory_pool_get_size ());
-       
-       line = fgets (buf, sizeof (buf), stdin);
-       while (line) {
-               s = strlen (line);
-               if (buf[s - 1] == '\n') {
-                       buf[s - 1] = '\0';
-               }
-               if (buf[s - 2] == '\r') {
-                       buf[s - 2] = '\0';
-               }
-
-               r = 0;
-               cur = parse_expression (pool, line);
-               s = strlen (line) * 4;
-               outstr = memory_pool_alloc (pool, s);
-               while (cur) {
-                       if (cur->type == EXPR_REGEXP) {
-                               r += snprintf (outstr + r, s - r, "OP:%s ", (char *)cur->content.operand);
-                       } else if (cur->type == EXPR_STR) {
-                               r += snprintf (outstr + r, s - r, "S:%s ", (char *)cur->content.operand);
-
-                       } else if (cur->type == EXPR_FUNCTION) {
-                               r += snprintf (outstr + r, s - r, "F:%s ", ((struct expression_function *)cur->content.operand)->name);
-                       }
-                       else {
-                               r += snprintf (outstr + r, s - r, "O:%c ", cur->content.operation);
-                       }
-                       cur = cur->next;
-               }
-               printf ("Parsed expression: '%s' -> '%s'\n", line, outstr);
-               line = fgets (buf, sizeof (buf), stdin);
-       }
-
-       memory_pool_delete (pool);
-
-       return 0;
-}
diff --git a/utils/statshow/CMakeLists.txt b/utils/statshow/CMakeLists.txt
new file mode 100644 (file)
index 0000000..12de603
--- /dev/null
@@ -0,0 +1,22 @@
+SET(STATSHOWSRC        statshow.c)
+
+ADD_EXECUTABLE(statshow EXCLUDE_FROM_ALL  ${CLASSIFIERSSRC} ${TOKENIZERSSRC} ${STATSHOWSRC})
+SET_TARGET_PROPERTIES(statshow PROPERTIES LINKER_LANGUAGE C)
+SET_TARGET_PROPERTIES(statshow PROPERTIES COMPILE_FLAGS "-I../../src")
+TARGET_LINK_LIBRARIES(statshow event)
+TARGET_LINK_LIBRARIES(statshow ${GLIB2_LIBRARIES})
+TARGET_LINK_LIBRARIES(statshow ${CMAKE_REQUIRED_LIBRARIES})
+IF(GMIME2_FOUND)
+       TARGET_LINK_LIBRARIES(statshow ${GMIME2_LIBRARIES})
+ELSE(GMIME2_FOUND)
+       TARGET_LINK_LIBRARIES(statshow ${GMIME24_LIBRARIES})
+ENDIF(GMIME2_FOUND)
+TARGET_LINK_LIBRARIES(statshow rspamd_lua)
+IF(ENABLE_LUAJIT MATCHES "ON")
+    TARGET_LINK_LIBRARIES(statshow "${LUAJIT_LIBRARY}")
+ELSE(ENABLE_LUAJIT MATCHES "ON")
+    TARGET_LINK_LIBRARIES(statshow "${LUA_LIBRARY}")
+ENDIF(ENABLE_LUAJIT MATCHES "ON")
+IF(ENABLE_STATIC MATCHES "ON")
+       TARGET_LINK_LIBRARIES(statshow ${PCRE_LIBRARIES})
+ENDIF(ENABLE_STATIC MATCHES "ON")
\ No newline at end of file
diff --git a/utils/statshow/statshow.c b/utils/statshow/statshow.c
new file mode 100644 (file)
index 0000000..7dc040a
--- /dev/null
@@ -0,0 +1,262 @@
+/* Copyright (c) 2010, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *       * Redistributions of source code must retain the above copyright
+ *         notice, this list of conditions and the following disclaimer.
+ *       * Redistributions in binary form must reproduce the above copyright
+ *         notice, this list of conditions and the following disclaimer in the
+ *         documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Rambler BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "main.h"
+#include "cfg_file.h"
+#include "util.h"
+#include "map.h"
+#include "cfg_xml.h"
+#include "classifiers/classifiers.h"
+#include "tokenizers/tokenizers.h"
+#include "message.h"
+
+
+static gchar                   *cfg_name;
+
+static GOptionEntry entries[] =
+{
+  { "config", 'c', 0, G_OPTION_ARG_STRING, &cfg_name, "Specify config file", NULL },
+  { NULL, 0, 0, G_OPTION_ARG_NONE, NULL, NULL, NULL }
+};
+
+static void
+read_cmd_line (gint *argc, gchar ***argv, struct config_file *cfg)
+{
+       GError                         *error = NULL;
+       GOptionContext                 *context;
+
+       context = g_option_context_new ("- run statshow utility");
+       g_option_context_set_summary (context, "Summary:\n  Statshow utility version " RVERSION "\n  Release id: " RID);
+       g_option_context_add_main_entries (context, entries, NULL);
+       if (!g_option_context_parse (context, argc, argv, &error)) {
+               fprintf (stderr, "option parsing failed: %s\n", error->message);
+               exit (1);
+       }
+       cfg->cfg_name = cfg_name;
+}
+
+static gboolean
+load_rspamd_config (struct config_file *cfg)
+{
+       if (! read_xml_config (cfg, cfg->cfg_name)) {
+               return FALSE;
+       }
+
+       /* Do post-load actions */
+       post_load_config (cfg);
+
+       return TRUE;
+}
+
+static void
+classifiers_callback (gpointer value, void *arg)
+{
+       struct worker_task             *task = arg;
+       struct classifier_config       *cl = value;
+       struct classifier_ctx          *ctx;
+       struct mime_text_part          *text_part;
+       GTree                          *tokens = NULL;
+       GList                          *cur;
+       f_str_t                         c;
+       gchar                           *header = NULL;
+
+       ctx = cl->classifier->init_func (task->task_pool, cl);
+       ctx->debug = TRUE;
+
+       if ((tokens = g_hash_table_lookup (task->tokens, cl->tokenizer)) == NULL) {
+               while (cur != NULL) {
+                       if (header) {
+                               c.len = strlen (cur->data);
+                               if (c.len > 0) {
+                                       c.begin = cur->data;
+                                       if (!cl->tokenizer->tokenize_func (cl->tokenizer, task->task_pool, &c, &tokens, FALSE)) {
+                                               msg_info ("cannot tokenize input");
+                                               return;
+                                       }
+                               }
+                       }
+                       else {
+                               text_part = (struct mime_text_part *)cur->data;
+                               if (text_part->is_empty) {
+                                       cur = g_list_next (cur);
+                                       continue;
+                               }
+                               c.begin = text_part->content->data;
+                               c.len = text_part->content->len;
+                               /* Tree would be freed at task pool freeing */
+                               if (!cl->tokenizer->tokenize_func (cl->tokenizer, task->task_pool, &c, &tokens, FALSE)) {
+                                       msg_info ("cannot tokenize input");
+                                       return;
+                               }
+                       }
+                       cur = g_list_next (cur);
+               }
+               g_hash_table_insert (task->tokens, cl->tokenizer, tokens);
+       }
+
+       if (tokens == NULL) {
+               return;
+       }
+
+       /* Take care of subject */
+       tokenize_subject (task, &tokens);
+       cl->classifier->classify_func (ctx, task->worker->srv->statfile_pool, tokens, task);
+}
+
+static void
+process_buffer (gchar *buf, gsize len, struct rspamd_main *rspamd)
+{
+       struct worker_task              *task;
+       struct rspamd_worker            *fake_worker;
+
+
+       /* Make fake worker for task */
+       fake_worker = g_malloc (sizeof (struct rspamd_worker));
+       fake_worker->srv = rspamd;
+
+       /* Make task */
+       task = construct_task (fake_worker);
+       /* Copy message */
+       task->msg = memory_pool_alloc (task->task_pool, sizeof (f_str_t));
+       task->msg->begin = buf;
+       task->msg->len = len;
+
+       /* Process message */
+       if (process_message (task) != 0) {
+               return;
+       }
+
+       g_list_foreach (task->cfg->classifiers, classifiers_callback, task);
+
+       g_free (fake_worker);
+}
+
+static void
+process_stdin (struct rspamd_main *rspamd)
+{
+       gchar                           *in_buf;
+       gint                             r = 0, len;
+
+       /* Allocate input buffer */
+       len = BUFSIZ;
+       in_buf = g_malloc (len);
+
+       /* Read stdin */
+       while (!feof (stdin)) {
+               r += fread (in_buf + r, 1, len - r, stdin);
+               if (len - r < len / 2) {
+                       /* Grow buffer */
+                       len *= 2;
+                       in_buf = g_realloc (in_buf, len);
+               }
+       }
+
+       process_buffer (in_buf, r, rspamd);
+       g_free (in_buf);
+}
+
+static void
+process_file (const gchar *filename, struct rspamd_main *rspamd)
+{
+       struct stat                     st;
+       char                           *in_buf;
+       gsize                           r = 0;
+       gint                            fd;
+
+       if (stat (filename, &st) == -1) {
+               msg_err ("stat failed: %s", strerror (errno));
+               return;
+       }
+
+       if ((fd = open (filename, O_RDONLY)) == -1) {
+               msg_err ("stat failed: %s", strerror (errno));
+               return;
+       }
+
+       in_buf = g_malloc (st.st_size);
+
+       while (r < st.st_size) {
+               r += read (fd, in_buf + r, r - st.st_size);
+       }
+
+       process_buffer (in_buf, r, rspamd);
+       g_free (in_buf);
+}
+
+gint
+main (gint argc, gchar **argv, gchar **env)
+{
+       struct config_file             *cfg;
+       struct rspamd_main             *rspamd;
+       gchar                          **arg;
+
+       rspamd = (struct rspamd_main *)g_malloc (sizeof (struct rspamd_main));
+       bzero (rspamd, sizeof (struct rspamd_main));
+       rspamd->server_pool = memory_pool_new (memory_pool_get_size ());
+       rspamd->cfg = (struct config_file *)g_malloc (sizeof (struct config_file));
+       if (!rspamd || !rspamd->cfg) {
+               fprintf (stderr, "Cannot allocate memory\n");
+               exit (-errno);
+       }
+
+       bzero (rspamd->cfg, sizeof (struct config_file));
+       rspamd->cfg->cfg_pool = memory_pool_new (memory_pool_get_size ());
+       init_defaults (rspamd->cfg);
+
+       read_cmd_line (&argc, &argv, rspamd->cfg);
+       if (rspamd->cfg->cfg_name == NULL) {
+               rspamd->cfg->cfg_name = FIXED_CONFIG_FILE;
+       }
+
+       /* First set logger to console logger */
+       rspamd_set_logger (RSPAMD_LOG_CONSOLE, TYPE_MAIN, rspamd->cfg);
+       (void)open_log ();
+       g_log_set_default_handler (rspamd_glib_log_function, rspamd->cfg);
+
+       /* Init classifiers options */
+       register_classifier_opt ("bayes", "min_tokens");
+       register_classifier_opt ("winnow", "min_tokens");
+       register_classifier_opt ("winnow", "learn_threshold");
+       /* Load config */
+       if (! load_rspamd_config (rspamd->cfg)) {
+               exit (EXIT_FAILURE);
+       }
+
+       /* Init statfile pool */
+       rspamd->statfile_pool = statfile_pool_new (rspamd->server_pool, rspamd->cfg->max_statfile_size);
+
+       /* Check argc */
+       if (argc > 1) {
+               arg = argv[1];
+               while (*arg) {
+                       process_file (*arg, rspamd);
+                       arg ++;
+               }
+       }
+       else {
+               process_stdin (rspamd);
+       }
+
+       return 0;
+}
diff --git a/utils/url_extracter.c b/utils/url_extracter.c
deleted file mode 100644 (file)
index 6130c51..0000000
+++ /dev/null
@@ -1,73 +0,0 @@
-#include <sys/types.h>
-#include <sys/time.h>
-#include <sys/wait.h>
-#include <sys/param.h>
-
-#include <netinet/in.h>
-#include <arpa/inet.h>
-#include <netdb.h>
-#include <syslog.h>
-#include <fcntl.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include <gmime/gmime.h>
-
-#include "../src/config.h"
-#if !defined(HAVE_OWN_QUEUE_H) && defined(HAVE_SYS_QUEUE_H)
-#include <sys/queue.h>
-#endif
-#ifdef HAVE_OWN_QUEUE_H
-#include "../src/queue.h"
-#endif
-
-#include "../src/main.h"
-#include "../src/cfg_file.h"
-#include "../src/url.h"
-#include "../src/util.h"
-#include "../src/message.h"
-
-rspamd_hash_t *counters = NULL;
-
-int
-main (int argc, char **argv)
-{
-       struct worker_task task;
-       struct uri *url;
-       char *buf = NULL;
-       size_t pos = 0, size = 65535;
-       GList *cur;
-       
-       g_mem_set_vtable(glib_mem_profiler_table);
-       g_mime_init (0);
-       bzero (&task, sizeof (struct worker_task));
-       task.task_pool = memory_pool_new (memory_pool_get_size ());
-       
-       /* Preallocate buffer */
-       buf = g_malloc (size);
-
-       while (!feof (stdin)) {
-               *(buf + pos) = getchar ();
-               pos ++;
-               if (pos == size) {
-                       size *= 2;
-                       buf = g_realloc (buf, size);
-               }
-       }
-       
-       task.cfg = memory_pool_alloc0 (task.task_pool, sizeof (struct config_file));
-
-       task.msg = memory_pool_alloc (task.task_pool, sizeof (f_str_t));
-       task.msg->begin = buf;
-       task.msg->len = pos;
-       process_message (&task);
-       
-       cur = task.urls;
-       while (cur) {
-               url = cur->data;
-               printf ("%s\n", struri (url));
-               cur = g_list_next (cur);
-       }
-       
-       return 0;
-}