@@ -40,25 +40,6 @@ | |||
#define COMMON_PART_FACTOR 95 | |||
static rspamd_expression_atom_t * rspamd_composite_expr_parse (const gchar *line, gsize len, | |||
rspamd_mempool_t *pool, gpointer ud, GError **err); | |||
static gint rspamd_composite_expr_process (gpointer input, rspamd_expression_atom_t *atom); | |||
static gint rspamd_composite_expr_priority (rspamd_expression_atom_t *atom); | |||
static void rspamd_composite_expr_destroy (rspamd_expression_atom_t *atom); | |||
const struct rspamd_atom_subr composite_expr_subr = { | |||
.parse = rspamd_composite_expr_parse, | |||
.process = rspamd_composite_expr_process, | |||
.priority = rspamd_composite_expr_priority, | |||
.destroy = rspamd_composite_expr_destroy | |||
}; | |||
static inline GQuark | |||
filter_error_quark (void) | |||
{ | |||
return g_quark_from_static_string ("g-filter-error-quark"); | |||
} | |||
struct metric_result * | |||
rspamd_create_metric_result (struct rspamd_task *task, const gchar *name) | |||
{ | |||
@@ -343,392 +324,6 @@ check_metric_settings (struct rspamd_task *task, struct metric *metric, | |||
return FALSE; | |||
} | |||
/* Return true if metric has score that is more than spam score for it */ | |||
static gboolean | |||
check_metric_is_spam (struct rspamd_task *task, struct metric *metric) | |||
{ | |||
struct metric_result *res; | |||
double ms; | |||
/* Avoid concurrency while checking results */ | |||
#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30)) | |||
g_static_mutex_lock (&result_mtx); | |||
#else | |||
G_LOCK (result_mtx); | |||
#endif | |||
res = g_hash_table_lookup (task->results, metric->name); | |||
if (res) { | |||
#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30)) | |||
g_static_mutex_unlock (&result_mtx); | |||
#else | |||
G_UNLOCK (result_mtx); | |||
#endif | |||
if (!check_metric_settings (task, metric, &ms)) { | |||
ms = metric->actions[METRIC_ACTION_REJECT].score; | |||
} | |||
return (ms > 0 && res->score >= ms); | |||
} | |||
#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30)) | |||
g_static_mutex_unlock (&result_mtx); | |||
#else | |||
G_UNLOCK (result_mtx); | |||
#endif | |||
return FALSE; | |||
} | |||
gint | |||
rspamd_process_filters (struct rspamd_task *task) | |||
{ | |||
GList *cur; | |||
struct metric *metric; | |||
gpointer item = NULL; | |||
/* Insert default metric to be sure that it exists all the time */ | |||
rspamd_create_metric_result (task, DEFAULT_METRIC); | |||
if (task->settings) { | |||
const ucl_object_t *wl; | |||
wl = ucl_object_find_key (task->settings, "whitelist"); | |||
if (wl != NULL) { | |||
msg_info ("<%s> is whitelisted", task->message_id); | |||
task->flags |= RSPAMD_TASK_FLAG_SKIP; | |||
return 0; | |||
} | |||
} | |||
/* Process metrics symbols */ | |||
while (rspamd_symbols_cache_process_symbol (task, task->cfg->cache, &item)) { | |||
/* Check reject actions */ | |||
cur = task->cfg->metrics_list; | |||
while (cur) { | |||
metric = cur->data; | |||
if (!(task->flags & RSPAMD_TASK_FLAG_PASS_ALL) && | |||
metric->actions[METRIC_ACTION_REJECT].score > 0 && | |||
check_metric_is_spam (task, metric)) { | |||
msg_info ("<%s> has already scored more than %.2f, so do not " | |||
"plan any more checks", task->message_id, | |||
metric->actions[METRIC_ACTION_REJECT].score); | |||
return 1; | |||
} | |||
cur = g_list_next (cur); | |||
} | |||
} | |||
if (rspamd_session_events_pending (task->s) != 0) { | |||
task->state = WAIT_FILTER; | |||
} | |||
return 1; | |||
} | |||
struct composites_data { | |||
struct rspamd_task *task; | |||
struct rspamd_composite *composite; | |||
struct metric_result *metric_res; | |||
GTree *symbols_to_remove; | |||
guint8 *checked; | |||
}; | |||
struct symbol_remove_data { | |||
struct symbol *ms; | |||
gboolean remove_weight; | |||
gboolean remove_symbol; | |||
GList *comp; | |||
}; | |||
/* | |||
* Composites are just sequences of symbols | |||
*/ | |||
static rspamd_expression_atom_t * | |||
rspamd_composite_expr_parse (const gchar *line, gsize len, | |||
rspamd_mempool_t *pool, gpointer ud, GError **err) | |||
{ | |||
gsize clen; | |||
rspamd_expression_atom_t *res; | |||
clen = strcspn (line, ", \t()><+!|&\n"); | |||
if (clen == 0) { | |||
/* Invalid composite atom */ | |||
g_set_error (err, filter_error_quark (), 100, "Invalid composite: %s", | |||
line); | |||
return NULL; | |||
} | |||
res = rspamd_mempool_alloc0 (pool, sizeof (*res)); | |||
res->len = clen; | |||
res->str = line; | |||
res->data = rspamd_mempool_alloc (pool, clen + 1); | |||
rspamd_strlcpy (res->data, line, clen + 1); | |||
return res; | |||
} | |||
static gint | |||
rspamd_composite_process_single_symbol (struct composites_data *cd, | |||
const gchar *sym, struct symbol **pms) | |||
{ | |||
struct symbol *ms = NULL; | |||
gint rc = 0; | |||
struct rspamd_composite *ncomp; | |||
if ((ms = g_hash_table_lookup (cd->metric_res->symbols, sym)) == NULL) { | |||
if ((ncomp = | |||
g_hash_table_lookup (cd->task->cfg->composite_symbols, | |||
sym)) != NULL) { | |||
/* Set checked for this symbol to avoid cyclic references */ | |||
if (isclr (cd->checked, ncomp->id * 2)) { | |||
setbit (cd->checked, cd->composite->id * 2); | |||
rc = rspamd_process_expression (ncomp->expr, | |||
RSPAMD_EXPRESSION_FLAG_NOOPT, cd); | |||
clrbit (cd->checked, cd->composite->id * 2); | |||
if (rc) { | |||
setbit (cd->checked, ncomp->id * 2 + 1); | |||
} | |||
setbit (cd->checked, ncomp->id * 2); | |||
ms = g_hash_table_lookup (cd->metric_res->symbols, sym); | |||
} | |||
else { | |||
/* | |||
* XXX: in case of cyclic references this would return 0 | |||
*/ | |||
rc = isset (cd->checked, ncomp->id * 2 + 1); | |||
} | |||
} | |||
} | |||
else { | |||
rc = 1; | |||
} | |||
*pms = ms; | |||
return rc; | |||
} | |||
static gint | |||
rspamd_composite_expr_process (gpointer input, rspamd_expression_atom_t *atom) | |||
{ | |||
struct composites_data *cd = (struct composites_data *)input; | |||
const gchar *sym = atom->data; | |||
struct symbol_remove_data *rd; | |||
struct symbol *ms; | |||
struct rspamd_symbols_group *gr; | |||
struct rspamd_symbol_def *sdef; | |||
gint rc = 0; | |||
gchar t = '\0'; | |||
if (isset (cd->checked, cd->composite->id * 2)) { | |||
/* We have already checked this composite, so just return its value */ | |||
rc = isset (cd->checked, cd->composite->id * 2 + 1); | |||
return rc; | |||
} | |||
if (*sym == '~' || *sym == '-') { | |||
t = *sym ++; | |||
} | |||
if (strncmp (sym, "g:", 2) == 0) { | |||
gr = g_hash_table_lookup (cd->task->cfg->symbols_groups, sym + 2); | |||
if (gr != NULL) { | |||
LL_FOREACH (gr->symbols, sdef) { | |||
rc = rspamd_composite_process_single_symbol (cd, sdef->name, &ms); | |||
if (rc) { | |||
break; | |||
} | |||
} | |||
} | |||
} | |||
else { | |||
rc = rspamd_composite_process_single_symbol (cd, sym, &ms); | |||
} | |||
if (rc && ms) { | |||
/* | |||
* At this point we know that we need to do something about this symbol, | |||
* however, we don't know whether we need to delete it unfortunately, | |||
* that depends on the later decisions when the complete expression is | |||
* evaluated. | |||
*/ | |||
if ((rd = g_tree_lookup (cd->symbols_to_remove, ms->name)) == NULL) { | |||
rd = rspamd_mempool_alloc (cd->task->task_pool, sizeof (*rd)); | |||
rd->ms = ms; | |||
if (G_UNLIKELY (t == '~')) { | |||
rd->remove_weight = FALSE; | |||
rd->remove_symbol = TRUE; | |||
} | |||
else if (G_UNLIKELY (t == '-')) { | |||
rd->remove_symbol = FALSE; | |||
rd->remove_weight = FALSE; | |||
} | |||
else { | |||
rd->remove_symbol = TRUE; | |||
rd->remove_weight = TRUE; | |||
} | |||
rd->comp = g_list_prepend (NULL, cd->composite); | |||
g_tree_insert (cd->symbols_to_remove, | |||
(gpointer)ms->name, | |||
rd); | |||
} | |||
else { | |||
/* | |||
* XXX: what if we have different preferences regarding | |||
* weight and symbol removal in different composites? | |||
*/ | |||
rd->comp = g_list_prepend (rd->comp, cd->composite); | |||
} | |||
} | |||
return rc; | |||
} | |||
/* | |||
* We don't have preferences for composites | |||
*/ | |||
static gint | |||
rspamd_composite_expr_priority (rspamd_expression_atom_t *atom) | |||
{ | |||
return 0; | |||
} | |||
static void | |||
rspamd_composite_expr_destroy (rspamd_expression_atom_t *atom) | |||
{ | |||
/* Composite atoms are destroyed just with the pool */ | |||
} | |||
static gint | |||
remove_compare_data (gconstpointer a, gconstpointer b) | |||
{ | |||
const gchar *ca = a, *cb = b; | |||
return strcmp (ca, cb); | |||
} | |||
static void | |||
composites_foreach_callback (gpointer key, gpointer value, void *data) | |||
{ | |||
struct composites_data *cd = data; | |||
struct rspamd_composite *comp = value; | |||
gint rc; | |||
cd->composite = comp; | |||
rc = rspamd_process_expression (comp->expr, RSPAMD_EXPRESSION_FLAG_NOOPT, cd); | |||
/* Checked bit */ | |||
setbit (cd->checked, comp->id * 2); | |||
/* Result bit */ | |||
if (rc) { | |||
setbit (cd->checked, comp->id * 2 + 1); | |||
rspamd_task_insert_result_single (cd->task, key, 1.0, NULL); | |||
} | |||
else { | |||
clrbit (cd->checked, comp->id * 2 + 1); | |||
} | |||
} | |||
static gboolean | |||
composites_remove_symbols (gpointer key, gpointer value, gpointer data) | |||
{ | |||
struct composites_data *cd = data; | |||
struct symbol_remove_data *rd = value; | |||
GList *cur; | |||
struct rspamd_composite *comp; | |||
gboolean matched = FALSE; | |||
cur = rd->comp; | |||
/* | |||
* XXX: actually, this is a weak assumption as we are unaware here about | |||
* negate operation and so on. We need to parse AST directly and remove | |||
* only those symbols that could be removed. | |||
*/ | |||
while (cur) { | |||
comp = cur->data; | |||
if (isset (cd->checked, comp->id * 2 + 1)) { | |||
matched = TRUE; | |||
break; | |||
} | |||
cur = g_list_next (cur); | |||
} | |||
g_list_free (rd->comp); | |||
if (matched) { | |||
if (rd->remove_symbol) { | |||
g_hash_table_remove (cd->metric_res->symbols, key); | |||
} | |||
if (rd->remove_weight) { | |||
cd->metric_res->score -= rd->ms->score; | |||
} | |||
} | |||
return FALSE; | |||
} | |||
static void | |||
composites_metric_callback (gpointer key, gpointer value, gpointer data) | |||
{ | |||
struct rspamd_task *task = (struct rspamd_task *)data; | |||
struct composites_data *cd = | |||
rspamd_mempool_alloc (task->task_pool, sizeof (struct composites_data)); | |||
struct metric_result *metric_res = (struct metric_result *)value; | |||
cd->task = task; | |||
cd->metric_res = (struct metric_result *)metric_res; | |||
cd->symbols_to_remove = g_tree_new (remove_compare_data); | |||
cd->checked = | |||
rspamd_mempool_alloc0 (task->task_pool, | |||
NBYTES (g_hash_table_size (task->cfg->composite_symbols) * 2)); | |||
/* Process hash table */ | |||
g_hash_table_foreach (task->cfg->composite_symbols, | |||
composites_foreach_callback, | |||
cd); | |||
/* Remove symbols that are in composites */ | |||
g_tree_foreach (cd->symbols_to_remove, composites_remove_symbols, cd); | |||
/* Free list */ | |||
g_tree_destroy (cd->symbols_to_remove); | |||
} | |||
void | |||
rspamd_make_composites (struct rspamd_task *task) | |||
{ | |||
g_hash_table_foreach (task->results, composites_metric_callback, task); | |||
} | |||
struct classifiers_cbdata { | |||
struct rspamd_task *task; | |||
struct lua_locked_state *nL; | |||
}; | |||
void | |||
rspamd_process_statistics (struct rspamd_task *task) | |||
{ | |||
if (RSPAMD_TASK_IS_SKIPPED (task)) { | |||
return; | |||
} | |||
/* TODO: handle err here */ | |||
rspamd_stat_classify (task, task->cfg->lua_state, NULL); | |||
/* Process results */ | |||
rspamd_make_composites (task); | |||
} | |||
static void | |||
insert_metric_header (gpointer metric_name, gpointer metric_value, | |||
gpointer data) | |||
@@ -910,16 +505,3 @@ rspamd_check_action_metric (struct rspamd_task *task, | |||
return METRIC_ACTION_NOACTION; | |||
} | |||
gboolean | |||
rspamd_learn_task_spam (struct rspamd_classifier_config *cl, | |||
struct rspamd_task *task, | |||
gboolean is_spam, | |||
GError **err) | |||
{ | |||
return rspamd_stat_learn (task, is_spam, task->cfg->lua_state, err); | |||
} | |||
/* | |||
* vi:ts=4 | |||
*/ |
@@ -73,19 +73,6 @@ struct metric_result { | |||
double grow_factor; /**< current grow factor */ | |||
}; | |||
/** | |||
* Subr for composite expressions | |||
*/ | |||
extern const struct rspamd_atom_subr composite_expr_subr; | |||
/** | |||
* Composite structure | |||
*/ | |||
struct rspamd_composite { | |||
struct rspamd_expression *expr; | |||
gint id; | |||
}; | |||
/** | |||
* Create or return existing result for the specified metric name | |||
* @param task task object | |||
@@ -95,19 +82,6 @@ struct rspamd_composite { | |||
struct metric_result * rspamd_create_metric_result (struct rspamd_task *task, | |||
const gchar *name); | |||
/** | |||
* Process all filters | |||
* @param task worker's task that present message from user | |||
* @return 0 - if there is non-finished tasks and 1 if processing is completed | |||
*/ | |||
gint rspamd_process_filters (struct rspamd_task *task); | |||
/** | |||
* Process message with statfiles | |||
* @param task worker's task that present message from user | |||
*/ | |||
void rspamd_process_statistics (struct rspamd_task *task); | |||
/** | |||
* Insert a result to task | |||
* @param task worker's task that present message from user | |||
@@ -134,12 +108,6 @@ void rspamd_task_insert_result_single (struct rspamd_task *task, | |||
double flag, | |||
GList *opts); | |||
/** | |||
* Process all results and form composite metrics from existent metrics as it is defined in config | |||
* @param task worker's task that present message from user | |||
*/ | |||
void rspamd_make_composites (struct rspamd_task *task); | |||
/** | |||
* Default consolidation function for metric, it get all symbols and multiply symbol | |||
* weight by some factor that is specified in config. Default factor is 1. | |||
@@ -152,18 +120,6 @@ double rspamd_factor_consolidation_func (struct rspamd_task *task, | |||
const gchar *unused); | |||
/** | |||
* Learn specified statfile with message in a task | |||
* @param statfile symbol of statfile | |||
* @param task worker's task object | |||
* @param err pointer to GError | |||
* @return true if learn succeed | |||
*/ | |||
gboolean rspamd_learn_task_spam (struct rspamd_classifier_config *cl, | |||
struct rspamd_task *task, | |||
gboolean is_spam, | |||
GError **err); | |||
/* | |||
* Get action from a string | |||
*/ |
@@ -3,6 +3,7 @@ SET(LIBRSPAMDSERVERSRC | |||
${CMAKE_CURRENT_SOURCE_DIR}/buffer.c | |||
${CMAKE_CURRENT_SOURCE_DIR}/cfg_utils.c | |||
${CMAKE_CURRENT_SOURCE_DIR}/cfg_rcl.c | |||
${CMAKE_CURRENT_SOURCE_DIR}/composites.c | |||
${CMAKE_CURRENT_SOURCE_DIR}/dkim.c | |||
${CMAKE_CURRENT_SOURCE_DIR}/dns.c | |||
${CMAKE_CURRENT_SOURCE_DIR}/dynamic_cfg.c |
@@ -1,4 +1,4 @@ | |||
/* Copyright (c) 2013, Vsevolod Stakhov | |||
/* Copyright (c) 2013-2015, Vsevolod Stakhov | |||
* All rights reserved. | |||
* | |||
* Redistribution and use in source and binary forms, with or without | |||
@@ -28,7 +28,7 @@ | |||
#include "cfg_file.h" | |||
#include "lua/lua_common.h" | |||
#include "expression.h" | |||
#include "composites.h" | |||
struct rspamd_rcl_default_handler_data { | |||
struct rspamd_rcl_struct_parser pd; |
@@ -0,0 +1,335 @@ | |||
/* | |||
* Copyright (c) 2015, Vsevolod Stakhov | |||
* All rights reserved. | |||
* | |||
* Redistribution and use in source and binary forms, with or without | |||
* modification, are permitted provided that the following conditions are met: | |||
* * Redistributions of source code must retain the above copyright | |||
* notice, this list of conditions and the following disclaimer. | |||
* * Redistributions in binary form must reproduce the above copyright | |||
* notice, this list of conditions and the following disclaimer in the | |||
* documentation and/or other materials provided with the distribution. | |||
* | |||
* THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY | |||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |||
* DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY | |||
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | |||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND | |||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
*/ | |||
#include "config.h" | |||
#include "logger.h" | |||
#include "expression.h" | |||
#include "task.h" | |||
#include "utlist.h" | |||
#include "filter.h" | |||
#include "composites.h" | |||
struct composites_data { | |||
struct rspamd_task *task; | |||
struct rspamd_composite *composite; | |||
struct metric_result *metric_res; | |||
GTree *symbols_to_remove; | |||
guint8 *checked; | |||
}; | |||
struct symbol_remove_data { | |||
struct symbol *ms; | |||
gboolean remove_weight; | |||
gboolean remove_symbol; | |||
GList *comp; | |||
}; | |||
static rspamd_expression_atom_t * rspamd_composite_expr_parse (const gchar *line, gsize len, | |||
rspamd_mempool_t *pool, gpointer ud, GError **err); | |||
static gint rspamd_composite_expr_process (gpointer input, rspamd_expression_atom_t *atom); | |||
static gint rspamd_composite_expr_priority (rspamd_expression_atom_t *atom); | |||
static void rspamd_composite_expr_destroy (rspamd_expression_atom_t *atom); | |||
const struct rspamd_atom_subr composite_expr_subr = { | |||
.parse = rspamd_composite_expr_parse, | |||
.process = rspamd_composite_expr_process, | |||
.priority = rspamd_composite_expr_priority, | |||
.destroy = rspamd_composite_expr_destroy | |||
}; | |||
static GQuark | |||
rspamd_composites_quark (void) | |||
{ | |||
return g_quark_from_static_string ("composites"); | |||
} | |||
static rspamd_expression_atom_t * | |||
rspamd_composite_expr_parse (const gchar *line, gsize len, | |||
rspamd_mempool_t *pool, gpointer ud, GError **err) | |||
{ | |||
gsize clen; | |||
rspamd_expression_atom_t *res; | |||
/* | |||
* Composites are just sequences of symbols | |||
*/ | |||
clen = strcspn (line, ", \t()><+!|&\n"); | |||
if (clen == 0) { | |||
/* Invalid composite atom */ | |||
g_set_error (err, rspamd_composites_quark (), 100, "Invalid composite: %s", | |||
line); | |||
return NULL; | |||
} | |||
res = rspamd_mempool_alloc0 (pool, sizeof (*res)); | |||
res->len = clen; | |||
res->str = line; | |||
res->data = rspamd_mempool_alloc (pool, clen + 1); | |||
rspamd_strlcpy (res->data, line, clen + 1); | |||
return res; | |||
} | |||
static gint | |||
rspamd_composite_process_single_symbol (struct composites_data *cd, | |||
const gchar *sym, struct symbol **pms) | |||
{ | |||
struct symbol *ms = NULL; | |||
gint rc = 0; | |||
struct rspamd_composite *ncomp; | |||
if ((ms = g_hash_table_lookup (cd->metric_res->symbols, sym)) == NULL) { | |||
if ((ncomp = | |||
g_hash_table_lookup (cd->task->cfg->composite_symbols, | |||
sym)) != NULL) { | |||
/* Set checked for this symbol to avoid cyclic references */ | |||
if (isclr (cd->checked, ncomp->id * 2)) { | |||
setbit (cd->checked, cd->composite->id * 2); | |||
rc = rspamd_process_expression (ncomp->expr, | |||
RSPAMD_EXPRESSION_FLAG_NOOPT, cd); | |||
clrbit (cd->checked, cd->composite->id * 2); | |||
if (rc) { | |||
setbit (cd->checked, ncomp->id * 2 + 1); | |||
} | |||
setbit (cd->checked, ncomp->id * 2); | |||
ms = g_hash_table_lookup (cd->metric_res->symbols, sym); | |||
} | |||
else { | |||
/* | |||
* XXX: in case of cyclic references this would return 0 | |||
*/ | |||
rc = isset (cd->checked, ncomp->id * 2 + 1); | |||
} | |||
} | |||
} | |||
else { | |||
rc = 1; | |||
} | |||
*pms = ms; | |||
return rc; | |||
} | |||
static gint | |||
rspamd_composite_expr_process (gpointer input, rspamd_expression_atom_t *atom) | |||
{ | |||
struct composites_data *cd = (struct composites_data *)input; | |||
const gchar *sym = atom->data; | |||
struct symbol_remove_data *rd; | |||
struct symbol *ms; | |||
struct rspamd_symbols_group *gr; | |||
struct rspamd_symbol_def *sdef; | |||
gint rc = 0; | |||
gchar t = '\0'; | |||
if (isset (cd->checked, cd->composite->id * 2)) { | |||
/* We have already checked this composite, so just return its value */ | |||
rc = isset (cd->checked, cd->composite->id * 2 + 1); | |||
return rc; | |||
} | |||
if (*sym == '~' || *sym == '-') { | |||
t = *sym ++; | |||
} | |||
if (strncmp (sym, "g:", 2) == 0) { | |||
gr = g_hash_table_lookup (cd->task->cfg->symbols_groups, sym + 2); | |||
if (gr != NULL) { | |||
LL_FOREACH (gr->symbols, sdef) { | |||
rc = rspamd_composite_process_single_symbol (cd, sdef->name, &ms); | |||
if (rc) { | |||
break; | |||
} | |||
} | |||
} | |||
} | |||
else { | |||
rc = rspamd_composite_process_single_symbol (cd, sym, &ms); | |||
} | |||
if (rc && ms) { | |||
/* | |||
* At this point we know that we need to do something about this symbol, | |||
* however, we don't know whether we need to delete it unfortunately, | |||
* that depends on the later decisions when the complete expression is | |||
* evaluated. | |||
*/ | |||
if ((rd = g_tree_lookup (cd->symbols_to_remove, ms->name)) == NULL) { | |||
rd = rspamd_mempool_alloc (cd->task->task_pool, sizeof (*rd)); | |||
rd->ms = ms; | |||
if (G_UNLIKELY (t == '~')) { | |||
rd->remove_weight = FALSE; | |||
rd->remove_symbol = TRUE; | |||
} | |||
else if (G_UNLIKELY (t == '-')) { | |||
rd->remove_symbol = FALSE; | |||
rd->remove_weight = FALSE; | |||
} | |||
else { | |||
rd->remove_symbol = TRUE; | |||
rd->remove_weight = TRUE; | |||
} | |||
rd->comp = g_list_prepend (NULL, cd->composite); | |||
g_tree_insert (cd->symbols_to_remove, | |||
(gpointer)ms->name, | |||
rd); | |||
} | |||
else { | |||
/* | |||
* XXX: what if we have different preferences regarding | |||
* weight and symbol removal in different composites? | |||
*/ | |||
rd->comp = g_list_prepend (rd->comp, cd->composite); | |||
} | |||
} | |||
return rc; | |||
} | |||
/* | |||
* We don't have preferences for composites | |||
*/ | |||
static gint | |||
rspamd_composite_expr_priority (rspamd_expression_atom_t *atom) | |||
{ | |||
return 0; | |||
} | |||
static void | |||
rspamd_composite_expr_destroy (rspamd_expression_atom_t *atom) | |||
{ | |||
/* Composite atoms are destroyed just with the pool */ | |||
} | |||
static gint | |||
remove_compare_data (gconstpointer a, gconstpointer b) | |||
{ | |||
const gchar *ca = a, *cb = b; | |||
return strcmp (ca, cb); | |||
} | |||
static void | |||
composites_foreach_callback (gpointer key, gpointer value, void *data) | |||
{ | |||
struct composites_data *cd = data; | |||
struct rspamd_composite *comp = value; | |||
gint rc; | |||
cd->composite = comp; | |||
rc = rspamd_process_expression (comp->expr, RSPAMD_EXPRESSION_FLAG_NOOPT, cd); | |||
/* Checked bit */ | |||
setbit (cd->checked, comp->id * 2); | |||
/* Result bit */ | |||
if (rc) { | |||
setbit (cd->checked, comp->id * 2 + 1); | |||
rspamd_task_insert_result_single (cd->task, key, 1.0, NULL); | |||
} | |||
else { | |||
clrbit (cd->checked, comp->id * 2 + 1); | |||
} | |||
} | |||
static gboolean | |||
composites_remove_symbols (gpointer key, gpointer value, gpointer data) | |||
{ | |||
struct composites_data *cd = data; | |||
struct symbol_remove_data *rd = value; | |||
GList *cur; | |||
struct rspamd_composite *comp; | |||
gboolean matched = FALSE; | |||
cur = rd->comp; | |||
/* | |||
* XXX: actually, this is a weak assumption as we are unaware here about | |||
* negate operation and so on. We need to parse AST directly and remove | |||
* only those symbols that could be removed. | |||
*/ | |||
while (cur) { | |||
comp = cur->data; | |||
if (isset (cd->checked, comp->id * 2 + 1)) { | |||
matched = TRUE; | |||
break; | |||
} | |||
cur = g_list_next (cur); | |||
} | |||
g_list_free (rd->comp); | |||
if (matched) { | |||
if (rd->remove_symbol) { | |||
g_hash_table_remove (cd->metric_res->symbols, key); | |||
} | |||
if (rd->remove_weight) { | |||
cd->metric_res->score -= rd->ms->score; | |||
} | |||
} | |||
return FALSE; | |||
} | |||
static void | |||
composites_metric_callback (gpointer key, gpointer value, gpointer data) | |||
{ | |||
struct rspamd_task *task = (struct rspamd_task *)data; | |||
struct composites_data *cd = | |||
rspamd_mempool_alloc (task->task_pool, sizeof (struct composites_data)); | |||
struct metric_result *metric_res = (struct metric_result *)value; | |||
cd->task = task; | |||
cd->metric_res = (struct metric_result *)metric_res; | |||
cd->symbols_to_remove = g_tree_new (remove_compare_data); | |||
cd->checked = | |||
rspamd_mempool_alloc0 (task->task_pool, | |||
NBYTES (g_hash_table_size (task->cfg->composite_symbols) * 2)); | |||
/* Process hash table */ | |||
g_hash_table_foreach (task->cfg->composite_symbols, | |||
composites_foreach_callback, | |||
cd); | |||
/* Remove symbols that are in composites */ | |||
g_tree_foreach (cd->symbols_to_remove, composites_remove_symbols, cd); | |||
/* Free list */ | |||
g_tree_destroy (cd->symbols_to_remove); | |||
} | |||
void | |||
rspamd_make_composites (struct rspamd_task *task) | |||
{ | |||
g_hash_table_foreach (task->results, composites_metric_callback, task); | |||
} |
@@ -0,0 +1,50 @@ | |||
/* | |||
* Copyright (c) 2015, Vsevolod Stakhov | |||
* All rights reserved. | |||
* | |||
* Redistribution and use in source and binary forms, with or without | |||
* modification, are permitted provided that the following conditions are met: | |||
* * Redistributions of source code must retain the above copyright | |||
* notice, this list of conditions and the following disclaimer. | |||
* * Redistributions in binary form must reproduce the above copyright | |||
* notice, this list of conditions and the following disclaimer in the | |||
* documentation and/or other materials provided with the distribution. | |||
* | |||
* THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY | |||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |||
* DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY | |||
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | |||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND | |||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
*/ | |||
#ifndef SRC_LIBSERVER_COMPOSITES_H_ | |||
#define SRC_LIBSERVER_COMPOSITES_H_ | |||
#include "config.h" | |||
struct rspamd_task; | |||
/** | |||
* Subr for composite expressions | |||
*/ | |||
extern const struct rspamd_atom_subr composite_expr_subr; | |||
/** | |||
* Composite structure | |||
*/ | |||
struct rspamd_composite { | |||
struct rspamd_expression *expr; | |||
gint id; | |||
}; | |||
/** | |||
* Process all results and form composite metrics from existent metrics as it is defined in config | |||
* @param task worker's task that present message from user | |||
*/ | |||
void rspamd_make_composites (struct rspamd_task *task); | |||
#endif /* SRC_LIBSERVER_COMPOSITES_H_ */ |
@@ -492,3 +492,92 @@ rspamd_task_re_cache_check (struct rspamd_task *task, const gchar *re) | |||
return ret; | |||
} | |||
gboolean | |||
rspamd_learn_task_spam (struct rspamd_classifier_config *cl, | |||
struct rspamd_task *task, | |||
gboolean is_spam, | |||
GError **err) | |||
{ | |||
return rspamd_stat_learn (task, is_spam, task->cfg->lua_state, err); | |||
} | |||
/* Return true if metric has score that is more than spam score for it */ | |||
static gboolean | |||
check_metric_is_spam (struct rspamd_task *task, struct metric *metric) | |||
{ | |||
struct metric_result *res; | |||
double ms; | |||
/* Avoid concurrency while checking results */ | |||
#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30)) | |||
g_static_mutex_lock (&result_mtx); | |||
#else | |||
G_LOCK (result_mtx); | |||
#endif | |||
res = g_hash_table_lookup (task->results, metric->name); | |||
if (res) { | |||
#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30)) | |||
g_static_mutex_unlock (&result_mtx); | |||
#else | |||
G_UNLOCK (result_mtx); | |||
#endif | |||
if (!check_metric_settings (task, metric, &ms)) { | |||
ms = metric->actions[METRIC_ACTION_REJECT].score; | |||
} | |||
return (ms > 0 && res->score >= ms); | |||
} | |||
#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30)) | |||
g_static_mutex_unlock (&result_mtx); | |||
#else | |||
G_UNLOCK (result_mtx); | |||
#endif | |||
return FALSE; | |||
} | |||
gint | |||
rspamd_process_filters (struct rspamd_task *task) | |||
{ | |||
GList *cur; | |||
struct metric *metric; | |||
gpointer item = NULL; | |||
/* Insert default metric to be sure that it exists all the time */ | |||
rspamd_create_metric_result (task, DEFAULT_METRIC); | |||
if (task->settings) { | |||
const ucl_object_t *wl; | |||
wl = ucl_object_find_key (task->settings, "whitelist"); | |||
if (wl != NULL) { | |||
msg_info ("<%s> is whitelisted", task->message_id); | |||
task->flags |= RSPAMD_TASK_FLAG_SKIP; | |||
return 0; | |||
} | |||
} | |||
/* Process metrics symbols */ | |||
while (rspamd_symbols_cache_process_symbol (task, task->cfg->cache, &item)) { | |||
/* Check reject actions */ | |||
cur = task->cfg->metrics_list; | |||
while (cur) { | |||
metric = cur->data; | |||
if (!(task->flags & RSPAMD_TASK_FLAG_PASS_ALL) && | |||
metric->actions[METRIC_ACTION_REJECT].score > 0 && | |||
check_metric_is_spam (task, metric)) { | |||
msg_info ("<%s> has already scored more than %.2f, so do not " | |||
"plan any more checks", task->message_id, | |||
metric->actions[METRIC_ACTION_REJECT].score); | |||
return 1; | |||
} | |||
cur = g_list_next (cur); | |||
} | |||
} | |||
if (rspamd_session_events_pending (task->s) != 0) { | |||
task->state = WAIT_FILTER; | |||
} | |||
return 1; | |||
} |
@@ -239,4 +239,16 @@ guint rspamd_task_re_cache_add (struct rspamd_task *task, const gchar *re, | |||
*/ | |||
guint rspamd_task_re_cache_check (struct rspamd_task *task, const gchar *re); | |||
/** | |||
* Learn specified statfile with message in a task | |||
* @param statfile symbol of statfile | |||
* @param task worker's task object | |||
* @param err pointer to GError | |||
* @return true if learn succeed | |||
*/ | |||
gboolean rspamd_learn_task_spam (struct rspamd_classifier_config *cl, | |||
struct rspamd_task *task, | |||
gboolean is_spam, | |||
GError **err); | |||
#endif /* TASK_H_ */ |
@@ -26,6 +26,7 @@ | |||
#include "symbols_cache.h" | |||
#include "expression.h" | |||
#include "filter.h" | |||
#include "composites.h" | |||
#ifdef HAVE_SYS_UTSNAME_H | |||
#include <sys/utsname.h> | |||
#endif |
@@ -28,6 +28,7 @@ | |||
#include "message.h" | |||
#include "radix.h" | |||
#include "expression.h" | |||
#include "composites.h" | |||
#include "utlist.h" | |||
/*** |