/* Copyright (c) 2010, Vsevolod Stakhov * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL Rambler BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "config.h" #include "main.h" #include "cfg_file.h" #include "util.h" #include "map.h" #include "cfg_xml.h" #include "classifiers/classifiers.h" #include "tokenizers/tokenizers.h" #include "message.h" #include "lua/lua_common.h" module_t modules[] = { {NULL, NULL, NULL, NULL} }; struct rspamd_main *rspamd_main = NULL; static gchar *cfg_name; extern rspamd_hash_t *counters; static GOptionEntry entries[] = { { "config", 'c', 0, G_OPTION_ARG_STRING, &cfg_name, "Specify config file", NULL }, { NULL, 0, 0, G_OPTION_ARG_NONE, NULL, NULL, NULL } }; static void read_cmd_line (gint *argc, gchar ***argv, struct config_file *cfg) { GError *error = NULL; GOptionContext *context; context = g_option_context_new ("- run statshow utility"); g_option_context_set_summary (context, "Summary:\n Statshow utility version " RVERSION "\n Release id: " RID); g_option_context_add_main_entries (context, entries, NULL); if (!g_option_context_parse (context, argc, argv, &error)) { fprintf (stderr, "option parsing failed: %s\n", error->message); exit (1); } cfg->cfg_name = cfg_name; } static gboolean load_rspamd_config (struct config_file *cfg) { if (! read_xml_config (cfg, cfg->cfg_name)) { return FALSE; } /* Do post-load actions */ post_load_config (cfg); return TRUE; } static void classifiers_callback (gpointer value, void *arg) { struct worker_task *task = arg; struct classifier_config *cl = value; struct classifier_ctx *ctx; struct mime_text_part *text_part; GTree *tokens = NULL; GList *cur; f_str_t c; gchar *header = NULL; ctx = cl->classifier->init_func (task->task_pool, cl); ctx->debug = TRUE; cur = g_list_first (task->text_parts); if ((tokens = g_hash_table_lookup (task->tokens, cl->tokenizer)) == NULL) { while (cur != NULL) { if (header) { c.len = strlen (cur->data); if (c.len > 0) { c.begin = cur->data; if (!cl->tokenizer->tokenize_func (cl->tokenizer, task->task_pool, &c, &tokens, TRUE, FALSE, NULL)) { msg_info ("cannot tokenize input"); return; } } } else { text_part = (struct mime_text_part *)cur->data; if (text_part->is_empty) { cur = g_list_next (cur); continue; } c.begin = text_part->content->data; c.len = text_part->content->len; /* Tree would be freed at task pool freeing */ if (!cl->tokenizer->tokenize_func (cl->tokenizer, task->task_pool, &c, &tokens, TRUE, text_part->is_utf, text_part->urls_offset)) { msg_info ("cannot tokenize input"); return; } } cur = g_list_next (cur); } g_hash_table_insert (task->tokens, cl->tokenizer, tokens); } if (tokens == NULL) { return; } /* Take care of subject */ tokenize_subject (task, &tokens); cl->classifier->classify_func (ctx, task->worker->srv->statfile_pool, tokens, task); } static void process_buffer (gchar *buf, gsize len, struct rspamd_main *rspamd) { struct worker_task *task; struct rspamd_worker *fake_worker; /* Make fake worker for task */ fake_worker = g_malloc (sizeof (struct rspamd_worker)); fake_worker->srv = rspamd; /* Make task */ task = construct_task (fake_worker); /* Copy message */ task->msg = memory_pool_alloc (task->task_pool, sizeof (f_str_t)); task->msg->begin = buf; task->msg->len = len; /* Process message */ if (process_message (task) != 0) { return; } g_list_foreach (task->cfg->classifiers, classifiers_callback, task); g_free (fake_worker); } static void process_stdin (struct rspamd_main *rspamd) { gchar *in_buf; gint r = 0, len; /* Allocate input buffer */ len = BUFSIZ; in_buf = g_malloc (len); /* Read stdin */ while (!feof (stdin)) { r += fread (in_buf + r, 1, len - r, stdin); if (len - r < len / 2) { /* Grow buffer */ len *= 2; in_buf = g_realloc (in_buf, len); } } process_buffer (in_buf, r, rspamd); g_free (in_buf); } static void process_file (const gchar *filename, struct rspamd_main *rspamd) { struct stat st; char *in_buf; gsize r = 0; gint fd; if (stat (filename, &st) == -1) { msg_err ("stat failed: %s", strerror (errno)); return; } if ((fd = open (filename, O_RDONLY)) == -1) { msg_err ("stat failed: %s", strerror (errno)); return; } in_buf = g_malloc (st.st_size); while (r < st.st_size) { r += read (fd, in_buf + r, r - st.st_size); } process_buffer (in_buf, r, rspamd); g_free (in_buf); } gint main (gint argc, gchar **argv, gchar **env) { gchar **arg; rspamd_main = (struct rspamd_main *)g_malloc (sizeof (struct rspamd_main)); memset (rspamd_main, 0, sizeof (struct rspamd_main)); rspamd_main->server_pool = memory_pool_new (memory_pool_get_size ()); rspamd_main->cfg = (struct config_file *)g_malloc (sizeof (struct config_file)); if (!rspamd_main || !rspamd_main->cfg) { fprintf (stderr, "Cannot allocate memory\n"); exit (-errno); } rspamd_main->cfg->modules_num = 0; memset (rspamd_main->cfg, 0, sizeof (struct config_file)); rspamd_main->cfg->cfg_pool = memory_pool_new (memory_pool_get_size ()); init_defaults (rspamd_main->cfg); read_cmd_line (&argc, &argv, rspamd_main->cfg); if (rspamd_main->cfg->cfg_name == NULL) { rspamd_main->cfg->cfg_name = FIXED_CONFIG_FILE; } /* First set logger to console logger */ rspamd_set_logger (RSPAMD_LOG_CONSOLE, TYPE_MAIN, rspamd_main); (void)open_log (rspamd_main->logger); g_log_set_default_handler (rspamd_glib_log_function, rspamd_main); init_lua (rspamd_main->cfg); /* Init counters */ counters = rspamd_hash_new_shared (rspamd_main->server_pool, g_str_hash, g_str_equal, 64); /* Init classifiers options */ register_classifier_opt ("bayes", "min_tokens"); register_classifier_opt ("winnow", "min_tokens"); register_classifier_opt ("winnow", "learn_threshold"); /* Load config */ if (! load_rspamd_config (rspamd_main->cfg)) { exit (EXIT_FAILURE); } /* Init statfile pool */ rspamd_main->statfile_pool = statfile_pool_new (rspamd_main->server_pool, rspamd_main->cfg->max_statfile_size); g_mime_init (0); rspamd_main->cfg->log_extended = FALSE; /* Check argc */ if (argc > 1) { arg = &argv[1]; while (*arg) { process_file (*arg, rspamd_main); arg ++; } } else { process_stdin (rspamd_main); } return 0; }