aboutsummaryrefslogtreecommitdiffstats
path: root/src/libserver
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2015-06-02 12:37:22 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2015-06-02 15:39:37 +0100
commita31ea02deb31a3ec805a795a656ca62065fc38b0 (patch)
tree10eda9e3513765c7ff3ad390f329b5132b2b2eb1 /src/libserver
parenta891c5b85e3ada0f6180c4ec3cc53013d5a4994d (diff)
downloadrspamd-a31ea02deb31a3ec805a795a656ca62065fc38b0.tar.gz
rspamd-a31ea02deb31a3ec805a795a656ca62065fc38b0.zip
Reorganize structure of filter components.
Diffstat (limited to 'src/libserver')
-rw-r--r--src/libserver/CMakeLists.txt1
-rw-r--r--src/libserver/cfg_rcl.c4
-rw-r--r--src/libserver/composites.c335
-rw-r--r--src/libserver/composites.h50
-rw-r--r--src/libserver/task.c89
-rw-r--r--src/libserver/task.h12
6 files changed, 489 insertions, 2 deletions
diff --git a/src/libserver/CMakeLists.txt b/src/libserver/CMakeLists.txt
index 4edbe054d..1f8df6c13 100644
--- a/src/libserver/CMakeLists.txt
+++ b/src/libserver/CMakeLists.txt
@@ -3,6 +3,7 @@ SET(LIBRSPAMDSERVERSRC
${CMAKE_CURRENT_SOURCE_DIR}/buffer.c
${CMAKE_CURRENT_SOURCE_DIR}/cfg_utils.c
${CMAKE_CURRENT_SOURCE_DIR}/cfg_rcl.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/composites.c
${CMAKE_CURRENT_SOURCE_DIR}/dkim.c
${CMAKE_CURRENT_SOURCE_DIR}/dns.c
${CMAKE_CURRENT_SOURCE_DIR}/dynamic_cfg.c
diff --git a/src/libserver/cfg_rcl.c b/src/libserver/cfg_rcl.c
index 314be10d0..fc8ada749 100644
--- a/src/libserver/cfg_rcl.c
+++ b/src/libserver/cfg_rcl.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2013, Vsevolod Stakhov
+/* Copyright (c) 2013-2015, Vsevolod Stakhov
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -28,7 +28,7 @@
#include "cfg_file.h"
#include "lua/lua_common.h"
#include "expression.h"
-
+#include "composites.h"
struct rspamd_rcl_default_handler_data {
struct rspamd_rcl_struct_parser pd;
diff --git a/src/libserver/composites.c b/src/libserver/composites.c
new file mode 100644
index 000000000..b6d2fb863
--- /dev/null
+++ b/src/libserver/composites.c
@@ -0,0 +1,335 @@
+/*
+ * Copyright (c) 2015, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include "config.h"
+#include "logger.h"
+#include "expression.h"
+#include "task.h"
+#include "utlist.h"
+#include "filter.h"
+#include "composites.h"
+
+struct composites_data {
+ struct rspamd_task *task;
+ struct rspamd_composite *composite;
+ struct metric_result *metric_res;
+ GTree *symbols_to_remove;
+ guint8 *checked;
+};
+
+struct symbol_remove_data {
+ struct symbol *ms;
+ gboolean remove_weight;
+ gboolean remove_symbol;
+ GList *comp;
+};
+
+static rspamd_expression_atom_t * rspamd_composite_expr_parse (const gchar *line, gsize len,
+ rspamd_mempool_t *pool, gpointer ud, GError **err);
+static gint rspamd_composite_expr_process (gpointer input, rspamd_expression_atom_t *atom);
+static gint rspamd_composite_expr_priority (rspamd_expression_atom_t *atom);
+static void rspamd_composite_expr_destroy (rspamd_expression_atom_t *atom);
+
+const struct rspamd_atom_subr composite_expr_subr = {
+ .parse = rspamd_composite_expr_parse,
+ .process = rspamd_composite_expr_process,
+ .priority = rspamd_composite_expr_priority,
+ .destroy = rspamd_composite_expr_destroy
+};
+
+static GQuark
+rspamd_composites_quark (void)
+{
+ return g_quark_from_static_string ("composites");
+}
+
+static rspamd_expression_atom_t *
+rspamd_composite_expr_parse (const gchar *line, gsize len,
+ rspamd_mempool_t *pool, gpointer ud, GError **err)
+{
+ gsize clen;
+ rspamd_expression_atom_t *res;
+
+ /*
+ * Composites are just sequences of symbols
+ */
+ clen = strcspn (line, ", \t()><+!|&\n");
+ if (clen == 0) {
+ /* Invalid composite atom */
+ g_set_error (err, rspamd_composites_quark (), 100, "Invalid composite: %s",
+ line);
+ return NULL;
+ }
+
+ res = rspamd_mempool_alloc0 (pool, sizeof (*res));
+ res->len = clen;
+ res->str = line;
+ res->data = rspamd_mempool_alloc (pool, clen + 1);
+ rspamd_strlcpy (res->data, line, clen + 1);
+
+ return res;
+}
+
+static gint
+rspamd_composite_process_single_symbol (struct composites_data *cd,
+ const gchar *sym, struct symbol **pms)
+{
+ struct symbol *ms = NULL;
+ gint rc = 0;
+ struct rspamd_composite *ncomp;
+
+ if ((ms = g_hash_table_lookup (cd->metric_res->symbols, sym)) == NULL) {
+ if ((ncomp =
+ g_hash_table_lookup (cd->task->cfg->composite_symbols,
+ sym)) != NULL) {
+ /* Set checked for this symbol to avoid cyclic references */
+ if (isclr (cd->checked, ncomp->id * 2)) {
+ setbit (cd->checked, cd->composite->id * 2);
+ rc = rspamd_process_expression (ncomp->expr,
+ RSPAMD_EXPRESSION_FLAG_NOOPT, cd);
+ clrbit (cd->checked, cd->composite->id * 2);
+
+ if (rc) {
+ setbit (cd->checked, ncomp->id * 2 + 1);
+ }
+ setbit (cd->checked, ncomp->id * 2);
+
+ ms = g_hash_table_lookup (cd->metric_res->symbols, sym);
+ }
+ else {
+ /*
+ * XXX: in case of cyclic references this would return 0
+ */
+ rc = isset (cd->checked, ncomp->id * 2 + 1);
+ }
+ }
+ }
+ else {
+ rc = 1;
+ }
+
+ *pms = ms;
+ return rc;
+}
+
+static gint
+rspamd_composite_expr_process (gpointer input, rspamd_expression_atom_t *atom)
+{
+ struct composites_data *cd = (struct composites_data *)input;
+ const gchar *sym = atom->data;
+ struct symbol_remove_data *rd;
+ struct symbol *ms;
+ struct rspamd_symbols_group *gr;
+ struct rspamd_symbol_def *sdef;
+ gint rc = 0;
+ gchar t = '\0';
+
+ if (isset (cd->checked, cd->composite->id * 2)) {
+ /* We have already checked this composite, so just return its value */
+ rc = isset (cd->checked, cd->composite->id * 2 + 1);
+ return rc;
+ }
+
+ if (*sym == '~' || *sym == '-') {
+ t = *sym ++;
+ }
+
+ if (strncmp (sym, "g:", 2) == 0) {
+ gr = g_hash_table_lookup (cd->task->cfg->symbols_groups, sym + 2);
+
+ if (gr != NULL) {
+ LL_FOREACH (gr->symbols, sdef) {
+ rc = rspamd_composite_process_single_symbol (cd, sdef->name, &ms);
+ if (rc) {
+ break;
+ }
+ }
+ }
+ }
+ else {
+ rc = rspamd_composite_process_single_symbol (cd, sym, &ms);
+ }
+
+ if (rc && ms) {
+ /*
+ * At this point we know that we need to do something about this symbol,
+ * however, we don't know whether we need to delete it unfortunately,
+ * that depends on the later decisions when the complete expression is
+ * evaluated.
+ */
+ if ((rd = g_tree_lookup (cd->symbols_to_remove, ms->name)) == NULL) {
+ rd = rspamd_mempool_alloc (cd->task->task_pool, sizeof (*rd));
+ rd->ms = ms;
+
+ if (G_UNLIKELY (t == '~')) {
+ rd->remove_weight = FALSE;
+ rd->remove_symbol = TRUE;
+ }
+ else if (G_UNLIKELY (t == '-')) {
+ rd->remove_symbol = FALSE;
+ rd->remove_weight = FALSE;
+ }
+ else {
+ rd->remove_symbol = TRUE;
+ rd->remove_weight = TRUE;
+ }
+
+ rd->comp = g_list_prepend (NULL, cd->composite);
+ g_tree_insert (cd->symbols_to_remove,
+ (gpointer)ms->name,
+ rd);
+ }
+ else {
+ /*
+ * XXX: what if we have different preferences regarding
+ * weight and symbol removal in different composites?
+ */
+ rd->comp = g_list_prepend (rd->comp, cd->composite);
+ }
+ }
+
+ return rc;
+}
+
+/*
+ * We don't have preferences for composites
+ */
+static gint
+rspamd_composite_expr_priority (rspamd_expression_atom_t *atom)
+{
+ return 0;
+}
+
+static void
+rspamd_composite_expr_destroy (rspamd_expression_atom_t *atom)
+{
+ /* Composite atoms are destroyed just with the pool */
+}
+
+static gint
+remove_compare_data (gconstpointer a, gconstpointer b)
+{
+ const gchar *ca = a, *cb = b;
+
+ return strcmp (ca, cb);
+}
+
+static void
+composites_foreach_callback (gpointer key, gpointer value, void *data)
+{
+ struct composites_data *cd = data;
+ struct rspamd_composite *comp = value;
+ gint rc;
+
+ cd->composite = comp;
+
+ rc = rspamd_process_expression (comp->expr, RSPAMD_EXPRESSION_FLAG_NOOPT, cd);
+
+ /* Checked bit */
+ setbit (cd->checked, comp->id * 2);
+
+ /* Result bit */
+ if (rc) {
+ setbit (cd->checked, comp->id * 2 + 1);
+ rspamd_task_insert_result_single (cd->task, key, 1.0, NULL);
+ }
+ else {
+ clrbit (cd->checked, comp->id * 2 + 1);
+ }
+}
+
+
+static gboolean
+composites_remove_symbols (gpointer key, gpointer value, gpointer data)
+{
+ struct composites_data *cd = data;
+ struct symbol_remove_data *rd = value;
+ GList *cur;
+ struct rspamd_composite *comp;
+ gboolean matched = FALSE;
+
+ cur = rd->comp;
+
+ /*
+ * XXX: actually, this is a weak assumption as we are unaware here about
+ * negate operation and so on. We need to parse AST directly and remove
+ * only those symbols that could be removed.
+ */
+ while (cur) {
+ comp = cur->data;
+
+ if (isset (cd->checked, comp->id * 2 + 1)) {
+ matched = TRUE;
+ break;
+ }
+
+ cur = g_list_next (cur);
+ }
+
+ g_list_free (rd->comp);
+
+ if (matched) {
+ if (rd->remove_symbol) {
+ g_hash_table_remove (cd->metric_res->symbols, key);
+ }
+ if (rd->remove_weight) {
+ cd->metric_res->score -= rd->ms->score;
+ }
+ }
+
+ return FALSE;
+}
+
+static void
+composites_metric_callback (gpointer key, gpointer value, gpointer data)
+{
+ struct rspamd_task *task = (struct rspamd_task *)data;
+ struct composites_data *cd =
+ rspamd_mempool_alloc (task->task_pool, sizeof (struct composites_data));
+ struct metric_result *metric_res = (struct metric_result *)value;
+
+ cd->task = task;
+ cd->metric_res = (struct metric_result *)metric_res;
+ cd->symbols_to_remove = g_tree_new (remove_compare_data);
+ cd->checked =
+ rspamd_mempool_alloc0 (task->task_pool,
+ NBYTES (g_hash_table_size (task->cfg->composite_symbols) * 2));
+
+ /* Process hash table */
+ g_hash_table_foreach (task->cfg->composite_symbols,
+ composites_foreach_callback,
+ cd);
+
+ /* Remove symbols that are in composites */
+ g_tree_foreach (cd->symbols_to_remove, composites_remove_symbols, cd);
+ /* Free list */
+ g_tree_destroy (cd->symbols_to_remove);
+}
+
+void
+rspamd_make_composites (struct rspamd_task *task)
+{
+ g_hash_table_foreach (task->results, composites_metric_callback, task);
+}
diff --git a/src/libserver/composites.h b/src/libserver/composites.h
new file mode 100644
index 000000000..fdcfe8241
--- /dev/null
+++ b/src/libserver/composites.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2015, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SRC_LIBSERVER_COMPOSITES_H_
+#define SRC_LIBSERVER_COMPOSITES_H_
+
+#include "config.h"
+
+struct rspamd_task;
+
+/**
+ * Subr for composite expressions
+ */
+extern const struct rspamd_atom_subr composite_expr_subr;
+/**
+ * Composite structure
+ */
+struct rspamd_composite {
+ struct rspamd_expression *expr;
+ gint id;
+};
+
+/**
+ * Process all results and form composite metrics from existent metrics as it is defined in config
+ * @param task worker's task that present message from user
+ */
+void rspamd_make_composites (struct rspamd_task *task);
+
+#endif /* SRC_LIBSERVER_COMPOSITES_H_ */
diff --git a/src/libserver/task.c b/src/libserver/task.c
index ba4f0c1bd..70da55afe 100644
--- a/src/libserver/task.c
+++ b/src/libserver/task.c
@@ -492,3 +492,92 @@ rspamd_task_re_cache_check (struct rspamd_task *task, const gchar *re)
return ret;
}
+
+gboolean
+rspamd_learn_task_spam (struct rspamd_classifier_config *cl,
+ struct rspamd_task *task,
+ gboolean is_spam,
+ GError **err)
+{
+ return rspamd_stat_learn (task, is_spam, task->cfg->lua_state, err);
+}
+
+/* Return true if metric has score that is more than spam score for it */
+static gboolean
+check_metric_is_spam (struct rspamd_task *task, struct metric *metric)
+{
+ struct metric_result *res;
+ double ms;
+
+ /* Avoid concurrency while checking results */
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30))
+ g_static_mutex_lock (&result_mtx);
+#else
+ G_LOCK (result_mtx);
+#endif
+ res = g_hash_table_lookup (task->results, metric->name);
+ if (res) {
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30))
+ g_static_mutex_unlock (&result_mtx);
+#else
+ G_UNLOCK (result_mtx);
+#endif
+ if (!check_metric_settings (task, metric, &ms)) {
+ ms = metric->actions[METRIC_ACTION_REJECT].score;
+ }
+ return (ms > 0 && res->score >= ms);
+ }
+
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30))
+ g_static_mutex_unlock (&result_mtx);
+#else
+ G_UNLOCK (result_mtx);
+#endif
+
+ return FALSE;
+}
+
+gint
+rspamd_process_filters (struct rspamd_task *task)
+{
+ GList *cur;
+ struct metric *metric;
+ gpointer item = NULL;
+
+ /* Insert default metric to be sure that it exists all the time */
+ rspamd_create_metric_result (task, DEFAULT_METRIC);
+ if (task->settings) {
+ const ucl_object_t *wl;
+
+ wl = ucl_object_find_key (task->settings, "whitelist");
+ if (wl != NULL) {
+ msg_info ("<%s> is whitelisted", task->message_id);
+ task->flags |= RSPAMD_TASK_FLAG_SKIP;
+ return 0;
+ }
+ }
+
+ /* Process metrics symbols */
+ while (rspamd_symbols_cache_process_symbol (task, task->cfg->cache, &item)) {
+ /* Check reject actions */
+ cur = task->cfg->metrics_list;
+ while (cur) {
+ metric = cur->data;
+ if (!(task->flags & RSPAMD_TASK_FLAG_PASS_ALL) &&
+ metric->actions[METRIC_ACTION_REJECT].score > 0 &&
+ check_metric_is_spam (task, metric)) {
+ msg_info ("<%s> has already scored more than %.2f, so do not "
+ "plan any more checks", task->message_id,
+ metric->actions[METRIC_ACTION_REJECT].score);
+ return 1;
+ }
+ cur = g_list_next (cur);
+ }
+ }
+
+ if (rspamd_session_events_pending (task->s) != 0) {
+ task->state = WAIT_FILTER;
+ }
+
+ return 1;
+}
diff --git a/src/libserver/task.h b/src/libserver/task.h
index 5ce24176c..45e720eb8 100644
--- a/src/libserver/task.h
+++ b/src/libserver/task.h
@@ -239,4 +239,16 @@ guint rspamd_task_re_cache_add (struct rspamd_task *task, const gchar *re,
*/
guint rspamd_task_re_cache_check (struct rspamd_task *task, const gchar *re);
+/**
+ * Learn specified statfile with message in a task
+ * @param statfile symbol of statfile
+ * @param task worker's task object
+ * @param err pointer to GError
+ * @return true if learn succeed
+ */
+gboolean rspamd_learn_task_spam (struct rspamd_classifier_config *cl,
+ struct rspamd_task *task,
+ gboolean is_spam,
+ GError **err);
+
#endif /* TASK_H_ */