aboutsummaryrefslogtreecommitdiffstats
path: root/utils
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rambler-co.ru>2011-05-06 19:18:40 +0400
committerVsevolod Stakhov <vsevolod@rambler-co.ru>2011-05-06 19:18:40 +0400
commit683b90f4c6c744557f7429ce6ff77c0f7d2175e1 (patch)
tree6e5f5cfdb0070cc7387d4045e955c6226d9f225d /utils
parent56586078f92c4cf71fad46e1f4888a49749a6313 (diff)
downloadrspamd-683b90f4c6c744557f7429ce6ff77c0f7d2175e1.tar.gz
rspamd-683b90f4c6c744557f7429ce6ff77c0f7d2175e1.zip
* Major cleanup of cmake build system
* Add initial version of statshow utility for statfiles debugging * Add debugging for statistics * Remove unused utilities
Diffstat (limited to 'utils')
-rw-r--r--utils/CMakeLists.txt7
-rw-r--r--utils/expression_parser.c54
-rw-r--r--utils/statshow/CMakeLists.txt22
-rw-r--r--utils/statshow/statshow.c262
-rw-r--r--utils/url_extracter.c73
5 files changed, 291 insertions, 127 deletions
diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt
new file mode 100644
index 000000000..8b9e5dca6
--- /dev/null
+++ b/utils/CMakeLists.txt
@@ -0,0 +1,7 @@
+ADD_SUBDIRECTORY(statshow)
+
+# Redirector
+IF(ENABLE_REDIRECTOR MATCHES "ON")
+ CONFIGURE_FILE(redirector.pl.in redirector.pl @ONLY)
+ INSTALL(PROGRAMS redirector.pl DESTINATION bin RENAME rspamd-redirector)
+ENDIF(ENABLE_REDIRECTOR MATCHES "ON") \ No newline at end of file
diff --git a/utils/expression_parser.c b/utils/expression_parser.c
deleted file mode 100644
index 38b52934f..000000000
--- a/utils/expression_parser.c
+++ /dev/null
@@ -1,54 +0,0 @@
-#include "../src/config.h"
-#include "../src/main.h"
-#include "../src/cfg_file.h"
-#include "../src/expressions.h"
-
-rspamd_hash_t *counters = NULL;
-
-int
-main (int argc, char **argv)
-{
- memory_pool_t *pool;
- struct expression *cur;
- char *line, *outstr;
- int r, s;
- char buf[BUFSIZ];
-
- pool = memory_pool_new (memory_pool_get_size ());
-
- line = fgets (buf, sizeof (buf), stdin);
- while (line) {
- s = strlen (line);
- if (buf[s - 1] == '\n') {
- buf[s - 1] = '\0';
- }
- if (buf[s - 2] == '\r') {
- buf[s - 2] = '\0';
- }
-
- r = 0;
- cur = parse_expression (pool, line);
- s = strlen (line) * 4;
- outstr = memory_pool_alloc (pool, s);
- while (cur) {
- if (cur->type == EXPR_REGEXP) {
- r += snprintf (outstr + r, s - r, "OP:%s ", (char *)cur->content.operand);
- } else if (cur->type == EXPR_STR) {
- r += snprintf (outstr + r, s - r, "S:%s ", (char *)cur->content.operand);
-
- } else if (cur->type == EXPR_FUNCTION) {
- r += snprintf (outstr + r, s - r, "F:%s ", ((struct expression_function *)cur->content.operand)->name);
- }
- else {
- r += snprintf (outstr + r, s - r, "O:%c ", cur->content.operation);
- }
- cur = cur->next;
- }
- printf ("Parsed expression: '%s' -> '%s'\n", line, outstr);
- line = fgets (buf, sizeof (buf), stdin);
- }
-
- memory_pool_delete (pool);
-
- return 0;
-}
diff --git a/utils/statshow/CMakeLists.txt b/utils/statshow/CMakeLists.txt
new file mode 100644
index 000000000..12de6038e
--- /dev/null
+++ b/utils/statshow/CMakeLists.txt
@@ -0,0 +1,22 @@
+SET(STATSHOWSRC statshow.c)
+
+ADD_EXECUTABLE(statshow EXCLUDE_FROM_ALL ${CLASSIFIERSSRC} ${TOKENIZERSSRC} ${STATSHOWSRC})
+SET_TARGET_PROPERTIES(statshow PROPERTIES LINKER_LANGUAGE C)
+SET_TARGET_PROPERTIES(statshow PROPERTIES COMPILE_FLAGS "-I../../src")
+TARGET_LINK_LIBRARIES(statshow event)
+TARGET_LINK_LIBRARIES(statshow ${GLIB2_LIBRARIES})
+TARGET_LINK_LIBRARIES(statshow ${CMAKE_REQUIRED_LIBRARIES})
+IF(GMIME2_FOUND)
+ TARGET_LINK_LIBRARIES(statshow ${GMIME2_LIBRARIES})
+ELSE(GMIME2_FOUND)
+ TARGET_LINK_LIBRARIES(statshow ${GMIME24_LIBRARIES})
+ENDIF(GMIME2_FOUND)
+TARGET_LINK_LIBRARIES(statshow rspamd_lua)
+IF(ENABLE_LUAJIT MATCHES "ON")
+ TARGET_LINK_LIBRARIES(statshow "${LUAJIT_LIBRARY}")
+ELSE(ENABLE_LUAJIT MATCHES "ON")
+ TARGET_LINK_LIBRARIES(statshow "${LUA_LIBRARY}")
+ENDIF(ENABLE_LUAJIT MATCHES "ON")
+IF(ENABLE_STATIC MATCHES "ON")
+ TARGET_LINK_LIBRARIES(statshow ${PCRE_LIBRARIES})
+ENDIF(ENABLE_STATIC MATCHES "ON") \ No newline at end of file
diff --git a/utils/statshow/statshow.c b/utils/statshow/statshow.c
new file mode 100644
index 000000000..7dc040a37
--- /dev/null
+++ b/utils/statshow/statshow.c
@@ -0,0 +1,262 @@
+/* Copyright (c) 2010, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Rambler BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "main.h"
+#include "cfg_file.h"
+#include "util.h"
+#include "map.h"
+#include "cfg_xml.h"
+#include "classifiers/classifiers.h"
+#include "tokenizers/tokenizers.h"
+#include "message.h"
+
+
+static gchar *cfg_name;
+
+static GOptionEntry entries[] =
+{
+ { "config", 'c', 0, G_OPTION_ARG_STRING, &cfg_name, "Specify config file", NULL },
+ { NULL, 0, 0, G_OPTION_ARG_NONE, NULL, NULL, NULL }
+};
+
+static void
+read_cmd_line (gint *argc, gchar ***argv, struct config_file *cfg)
+{
+ GError *error = NULL;
+ GOptionContext *context;
+
+ context = g_option_context_new ("- run statshow utility");
+ g_option_context_set_summary (context, "Summary:\n Statshow utility version " RVERSION "\n Release id: " RID);
+ g_option_context_add_main_entries (context, entries, NULL);
+ if (!g_option_context_parse (context, argc, argv, &error)) {
+ fprintf (stderr, "option parsing failed: %s\n", error->message);
+ exit (1);
+ }
+ cfg->cfg_name = cfg_name;
+}
+
+static gboolean
+load_rspamd_config (struct config_file *cfg)
+{
+ if (! read_xml_config (cfg, cfg->cfg_name)) {
+ return FALSE;
+ }
+
+ /* Do post-load actions */
+ post_load_config (cfg);
+
+ return TRUE;
+}
+
+static void
+classifiers_callback (gpointer value, void *arg)
+{
+ struct worker_task *task = arg;
+ struct classifier_config *cl = value;
+ struct classifier_ctx *ctx;
+ struct mime_text_part *text_part;
+ GTree *tokens = NULL;
+ GList *cur;
+ f_str_t c;
+ gchar *header = NULL;
+
+ ctx = cl->classifier->init_func (task->task_pool, cl);
+ ctx->debug = TRUE;
+
+ if ((tokens = g_hash_table_lookup (task->tokens, cl->tokenizer)) == NULL) {
+ while (cur != NULL) {
+ if (header) {
+ c.len = strlen (cur->data);
+ if (c.len > 0) {
+ c.begin = cur->data;
+ if (!cl->tokenizer->tokenize_func (cl->tokenizer, task->task_pool, &c, &tokens, FALSE)) {
+ msg_info ("cannot tokenize input");
+ return;
+ }
+ }
+ }
+ else {
+ text_part = (struct mime_text_part *)cur->data;
+ if (text_part->is_empty) {
+ cur = g_list_next (cur);
+ continue;
+ }
+ c.begin = text_part->content->data;
+ c.len = text_part->content->len;
+ /* Tree would be freed at task pool freeing */
+ if (!cl->tokenizer->tokenize_func (cl->tokenizer, task->task_pool, &c, &tokens, FALSE)) {
+ msg_info ("cannot tokenize input");
+ return;
+ }
+ }
+ cur = g_list_next (cur);
+ }
+ g_hash_table_insert (task->tokens, cl->tokenizer, tokens);
+ }
+
+ if (tokens == NULL) {
+ return;
+ }
+
+ /* Take care of subject */
+ tokenize_subject (task, &tokens);
+ cl->classifier->classify_func (ctx, task->worker->srv->statfile_pool, tokens, task);
+}
+
+static void
+process_buffer (gchar *buf, gsize len, struct rspamd_main *rspamd)
+{
+ struct worker_task *task;
+ struct rspamd_worker *fake_worker;
+
+
+ /* Make fake worker for task */
+ fake_worker = g_malloc (sizeof (struct rspamd_worker));
+ fake_worker->srv = rspamd;
+
+ /* Make task */
+ task = construct_task (fake_worker);
+ /* Copy message */
+ task->msg = memory_pool_alloc (task->task_pool, sizeof (f_str_t));
+ task->msg->begin = buf;
+ task->msg->len = len;
+
+ /* Process message */
+ if (process_message (task) != 0) {
+ return;
+ }
+
+ g_list_foreach (task->cfg->classifiers, classifiers_callback, task);
+
+ g_free (fake_worker);
+}
+
+static void
+process_stdin (struct rspamd_main *rspamd)
+{
+ gchar *in_buf;
+ gint r = 0, len;
+
+ /* Allocate input buffer */
+ len = BUFSIZ;
+ in_buf = g_malloc (len);
+
+ /* Read stdin */
+ while (!feof (stdin)) {
+ r += fread (in_buf + r, 1, len - r, stdin);
+ if (len - r < len / 2) {
+ /* Grow buffer */
+ len *= 2;
+ in_buf = g_realloc (in_buf, len);
+ }
+ }
+
+ process_buffer (in_buf, r, rspamd);
+ g_free (in_buf);
+}
+
+static void
+process_file (const gchar *filename, struct rspamd_main *rspamd)
+{
+ struct stat st;
+ char *in_buf;
+ gsize r = 0;
+ gint fd;
+
+ if (stat (filename, &st) == -1) {
+ msg_err ("stat failed: %s", strerror (errno));
+ return;
+ }
+
+ if ((fd = open (filename, O_RDONLY)) == -1) {
+ msg_err ("stat failed: %s", strerror (errno));
+ return;
+ }
+
+ in_buf = g_malloc (st.st_size);
+
+ while (r < st.st_size) {
+ r += read (fd, in_buf + r, r - st.st_size);
+ }
+
+ process_buffer (in_buf, r, rspamd);
+ g_free (in_buf);
+}
+
+gint
+main (gint argc, gchar **argv, gchar **env)
+{
+ struct config_file *cfg;
+ struct rspamd_main *rspamd;
+ gchar **arg;
+
+ rspamd = (struct rspamd_main *)g_malloc (sizeof (struct rspamd_main));
+ bzero (rspamd, sizeof (struct rspamd_main));
+ rspamd->server_pool = memory_pool_new (memory_pool_get_size ());
+ rspamd->cfg = (struct config_file *)g_malloc (sizeof (struct config_file));
+ if (!rspamd || !rspamd->cfg) {
+ fprintf (stderr, "Cannot allocate memory\n");
+ exit (-errno);
+ }
+
+ bzero (rspamd->cfg, sizeof (struct config_file));
+ rspamd->cfg->cfg_pool = memory_pool_new (memory_pool_get_size ());
+ init_defaults (rspamd->cfg);
+
+ read_cmd_line (&argc, &argv, rspamd->cfg);
+ if (rspamd->cfg->cfg_name == NULL) {
+ rspamd->cfg->cfg_name = FIXED_CONFIG_FILE;
+ }
+
+ /* First set logger to console logger */
+ rspamd_set_logger (RSPAMD_LOG_CONSOLE, TYPE_MAIN, rspamd->cfg);
+ (void)open_log ();
+ g_log_set_default_handler (rspamd_glib_log_function, rspamd->cfg);
+
+ /* Init classifiers options */
+ register_classifier_opt ("bayes", "min_tokens");
+ register_classifier_opt ("winnow", "min_tokens");
+ register_classifier_opt ("winnow", "learn_threshold");
+ /* Load config */
+ if (! load_rspamd_config (rspamd->cfg)) {
+ exit (EXIT_FAILURE);
+ }
+
+ /* Init statfile pool */
+ rspamd->statfile_pool = statfile_pool_new (rspamd->server_pool, rspamd->cfg->max_statfile_size);
+
+ /* Check argc */
+ if (argc > 1) {
+ arg = argv[1];
+ while (*arg) {
+ process_file (*arg, rspamd);
+ arg ++;
+ }
+ }
+ else {
+ process_stdin (rspamd);
+ }
+
+ return 0;
+}
diff --git a/utils/url_extracter.c b/utils/url_extracter.c
deleted file mode 100644
index 6130c51ef..000000000
--- a/utils/url_extracter.c
+++ /dev/null
@@ -1,73 +0,0 @@
-#include <sys/types.h>
-#include <sys/time.h>
-#include <sys/wait.h>
-#include <sys/param.h>
-
-#include <netinet/in.h>
-#include <arpa/inet.h>
-#include <netdb.h>
-#include <syslog.h>
-#include <fcntl.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include <gmime/gmime.h>
-
-#include "../src/config.h"
-#if !defined(HAVE_OWN_QUEUE_H) && defined(HAVE_SYS_QUEUE_H)
-#include <sys/queue.h>
-#endif
-#ifdef HAVE_OWN_QUEUE_H
-#include "../src/queue.h"
-#endif
-
-#include "../src/main.h"
-#include "../src/cfg_file.h"
-#include "../src/url.h"
-#include "../src/util.h"
-#include "../src/message.h"
-
-rspamd_hash_t *counters = NULL;
-
-int
-main (int argc, char **argv)
-{
- struct worker_task task;
- struct uri *url;
- char *buf = NULL;
- size_t pos = 0, size = 65535;
- GList *cur;
-
- g_mem_set_vtable(glib_mem_profiler_table);
- g_mime_init (0);
- bzero (&task, sizeof (struct worker_task));
- task.task_pool = memory_pool_new (memory_pool_get_size ());
-
- /* Preallocate buffer */
- buf = g_malloc (size);
-
- while (!feof (stdin)) {
- *(buf + pos) = getchar ();
- pos ++;
- if (pos == size) {
- size *= 2;
- buf = g_realloc (buf, size);
- }
- }
-
- task.cfg = memory_pool_alloc0 (task.task_pool, sizeof (struct config_file));
-
- task.msg = memory_pool_alloc (task.task_pool, sizeof (f_str_t));
- task.msg->begin = buf;
- task.msg->len = pos;
- process_message (&task);
-
- cur = task.urls;
- while (cur) {
- url = cur->data;
- printf ("%s\n", struri (url));
- cur = g_list_next (cur);
- }
-
- return 0;
-}