aboutsummaryrefslogtreecommitdiffstats
path: root/src/plugins
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2013-12-04 13:41:26 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2013-12-04 13:41:26 +0000
commit703fb40d6e37c5337a23694bce1bb114b7d7516a (patch)
treeebdba3aaa9018a01c21702c0785f00c609e77e21 /src/plugins
parentd0314f0ca99d2485054b692de565729f4e961306 (diff)
downloadrspamd-703fb40d6e37c5337a23694bce1bb114b7d7516a.tar.gz
rspamd-703fb40d6e37c5337a23694bce1bb114b7d7516a.zip
Rework fuzzy check module.
- Now all checks are organized to rules. - Allow to specify read_only rules to avoid problems on learning. - Use better normalizer for fuzzy module and it now returns values from 0 to 1.0 (like bayes does). - Update configuration accordingly. - Drop legacy configuration support. - Detect tanh as well and provide some reasonable (linear) fallback.
Diffstat (limited to 'src/plugins')
-rw-r--r--src/plugins/fuzzy_check.c726
1 files changed, 374 insertions, 352 deletions
diff --git a/src/plugins/fuzzy_check.c b/src/plugins/fuzzy_check.c
index b79ecc452..9d56806d6 100644
--- a/src/plugins/fuzzy_check.c
+++ b/src/plugins/fuzzy_check.c
@@ -60,72 +60,80 @@
#define DEFAULT_PORT 11335
struct storage_server {
- struct upstream up;
- gchar *name;
- gchar *addr;
- guint16 port;
+ struct upstream up;
+ gchar *name;
+ gchar *addr;
+ guint16 port;
};
struct fuzzy_mapping {
- guint64 fuzzy_flag;
- const gchar *symbol;
+ guint64 fuzzy_flag;
+ const gchar *symbol;
double weight;
};
struct fuzzy_mime_type {
- gchar *type;
- gchar *subtype;
+ gchar *type;
+ gchar *subtype;
+};
+
+struct fuzzy_rule {
+ struct storage_server *servers;
+ gint servers_num;
+ const gchar *symbol;
+ GHashTable *mappings;
+ GList *mime_types;
+ double max_score;
+ gboolean read_only;
};
struct fuzzy_ctx {
- gint (*filter) (struct worker_task * task);
- const gchar *symbol;
- struct storage_server *servers;
- gint servers_num;
- memory_pool_t *fuzzy_pool;
- double max_score;
- guint32 min_hash_len;
- radix_tree_t *whitelist;
- GHashTable *mappings;
- GList *mime_types;
- guint32 min_bytes;
- guint32 min_height;
- guint32 min_width;
- guint32 io_timeout;
+ gint (*filter) (struct worker_task * task);
+ memory_pool_t *fuzzy_pool;
+ GList *fuzzy_rules;
+ const gchar *default_symbol;
+ guint32 min_hash_len;
+ radix_tree_t *whitelist;
+ guint32 min_bytes;
+ guint32 min_height;
+ guint32 min_width;
+ guint32 io_timeout;
};
struct fuzzy_client_session {
- gint state;
- fuzzy_hash_t *h;
- struct event ev;
- struct timeval tv;
- struct worker_task *task;
- struct storage_server *server;
- gint fd;
+ gint state;
+ fuzzy_hash_t *h;
+ struct event ev;
+ struct timeval tv;
+ struct worker_task *task;
+ struct storage_server *server;
+ struct fuzzy_rule *rule;
+ gint fd;
};
struct fuzzy_learn_session {
- struct event ev;
- fuzzy_hash_t *h;
- gint cmd;
- gint value;
- gint flag;
- gint *saved;
- GError **err;
- struct timeval tv;
- struct controller_session *session;
- struct storage_server *server;
- struct worker_task *task;
- gint fd;
+ struct event ev;
+ fuzzy_hash_t *h;
+ gint cmd;
+ gint value;
+ gint flag;
+ gint *saved;
+ GError **err;
+ struct timeval tv;
+ struct controller_session *session;
+ struct storage_server *server;
+ struct fuzzy_rule *rule;
+ struct worker_task *task;
+ gint fd;
};
-static struct fuzzy_ctx *fuzzy_module_ctx = NULL;
-static const gchar hex_digits[] = "0123456789abcdef";
+static struct fuzzy_ctx *fuzzy_module_ctx = NULL;
+static const gchar hex_digits[] = "0123456789abcdef";
-static gint fuzzy_mime_filter (struct worker_task *task);
-static void fuzzy_symbol_callback (struct worker_task *task, void *unused);
-static void fuzzy_add_handler (gchar **args, struct controller_session *session);
-static void fuzzy_delete_handler (gchar **args, struct controller_session *session);
+static void fuzzy_symbol_callback (struct worker_task *task, void *unused);
+static void fuzzy_add_handler (gchar **args, struct controller_session *session);
+static void fuzzy_delete_handler (gchar **args,
+ struct controller_session *session);
/* Initialization */
gint fuzzy_check_module_init (struct config_file *cfg, struct module_ctx **ctx);
@@ -139,81 +147,35 @@ module_t fuzzy_check_module = {
fuzzy_check_module_reconfig
};
-/* Flags string is in format <numeric_flag>:<SYMBOL>:weight[, <numeric_flag>:<SYMBOL>:weight...] */
static void
-parse_flags_string_old (struct config_file *cfg, const gchar *str)
-{
- gchar **strvec, *item, *err_str, **map_str;
- gint num, i, t;
- struct fuzzy_mapping *map;
-
- strvec = g_strsplit_set (str, ", ;", 0);
- num = g_strv_length (strvec);
-
- for (i = 0; i < num; i ++) {
- item = strvec[i];
- map_str = g_strsplit_set (item, ":", 3);
- t = g_strv_length (map_str);
- if (t != 3 && t != 2) {
- msg_err ("invalid fuzzy mapping: %s", item);
- }
- else {
- map = memory_pool_alloc (fuzzy_module_ctx->fuzzy_pool, sizeof (struct fuzzy_mapping));
- map->symbol = memory_pool_strdup (fuzzy_module_ctx->fuzzy_pool, map_str[1]);
-
- errno = 0;
- map->fuzzy_flag = strtol (map_str[0], &err_str, 10);
- if (errno != 0 || (err_str && *err_str != '\0')) {
- msg_info ("cannot parse flag %s: %s", map_str[0], strerror (errno));
- continue;
- }
- else if (t == 2) {
- /* Weight is skipped in definition */
- map->weight = fuzzy_module_ctx->max_score;
- }
- else {
- map->weight = strtol (map_str[2], &err_str, 10);
-
- }
- /* Add flag to hash table */
- g_hash_table_insert (fuzzy_module_ctx->mappings, GINT_TO_POINTER(map->fuzzy_flag), map);
- register_virtual_symbol (&cfg->cache, map->symbol, map->weight);
- }
- g_strfreev (map_str);
- }
-
- g_strfreev (strvec);
-}
-
-static void
-parse_flags_string (struct config_file *cfg, ucl_object_t *val)
+parse_flags_string (struct fuzzy_rule *rule, struct config_file *cfg, ucl_object_t *val)
{
ucl_object_t *elt;
struct fuzzy_mapping *map;
const gchar *sym = NULL;
if (val->type == UCL_STRING) {
- parse_flags_string_old (cfg, ucl_obj_tostring (val));
+ msg_err ("string mappings are deprecated and no longer supported, use new style configuration");
}
else if (val->type == UCL_OBJECT) {
- elt = ucl_obj_get_key (val, "symbol");
+ elt = ucl_object_find_key (val, "symbol");
if (elt == NULL || !ucl_object_tostring_safe (elt, &sym)) {
sym = ucl_object_key (val);
}
if (sym != NULL) {
map = memory_pool_alloc (fuzzy_module_ctx->fuzzy_pool, sizeof (struct fuzzy_mapping));
map->symbol = sym;
- elt = ucl_obj_get_key (val, "flag");
+ elt = ucl_object_find_key (val, "flag");
if (elt != NULL && ucl_obj_toint_safe (elt, &map->fuzzy_flag)) {
- elt = ucl_obj_get_key (val, "weight");
+ elt = ucl_object_find_key (val, "weight");
if (elt != NULL) {
map->weight = ucl_obj_todouble (elt);
}
else {
- map->weight = fuzzy_module_ctx->max_score;
+ map->weight = rule->max_score;
}
/* Add flag to hash table */
- g_hash_table_insert (fuzzy_module_ctx->mappings, GINT_TO_POINTER (map->fuzzy_flag), map);
+ g_hash_table_insert (rule->mappings, GINT_TO_POINTER (map->fuzzy_flag), map);
register_virtual_symbol (&cfg->cache, map->symbol, map->weight);
}
else {
@@ -261,12 +223,12 @@ parse_mime_types (const gchar *str)
}
static gboolean
-fuzzy_check_content_type (GMimeContentType *type)
+fuzzy_check_content_type (struct fuzzy_rule *rule, GMimeContentType *type)
{
struct fuzzy_mime_type *ft;
GList *cur;
- cur = fuzzy_module_ctx->mime_types;
+ cur = rule->mime_types;
while (cur) {
ft = cur->data;
if (g_mime_content_type_is_type (type, ft->type, ft->subtype)) {
@@ -279,7 +241,7 @@ fuzzy_check_content_type (GMimeContentType *type)
}
static void
-parse_servers_string (const gchar *str)
+parse_servers_string (struct fuzzy_rule *rule, const gchar *str)
{
gchar **strvec;
gint i, num;
@@ -288,18 +250,18 @@ parse_servers_string (const gchar *str)
strvec = g_strsplit_set (str, ",", 0);
num = g_strv_length (strvec);
- fuzzy_module_ctx->servers = memory_pool_alloc0 (fuzzy_module_ctx->fuzzy_pool, sizeof (struct storage_server) * num);
+ rule->servers = memory_pool_alloc0 (fuzzy_module_ctx->fuzzy_pool, sizeof (struct storage_server) * num);
for (i = 0; i < num; i++) {
g_strstrip (strvec[i]);
- cur = &fuzzy_module_ctx->servers[fuzzy_module_ctx->servers_num];
+ cur = &rule->servers[rule->servers_num];
if (parse_host_port (fuzzy_module_ctx->fuzzy_pool, strvec[i], &cur->addr, &cur->port)) {
if (cur->port == 0) {
cur->port = DEFAULT_PORT;
}
cur->name = memory_pool_strdup (fuzzy_module_ctx->fuzzy_pool, strvec[i]);
- fuzzy_module_ctx->servers_num++;
+ rule->servers_num++;
}
}
@@ -310,22 +272,14 @@ parse_servers_string (const gchar *str)
static double
fuzzy_normalize (gint32 in, double weight)
{
- double ms = weight, ams = fabs (ms), ain = fabs (in);
-
- if (ams > 0.001) {
- if (ain < ams / 2.) {
- return in;
- }
- else if (ain < ams * 2.) {
- ain = ain / 3. + ams / 3.;
- return in > 0 ? ain : -(ain);
- }
- else {
- return in > 0 ? ms : -(ms);
- }
+ if (weight == 0) {
+ return 0;
}
-
- return (double)in;
+#ifdef HAVE_TANH
+ return tanh ((double)in / weight);
+#else
+ return (in < weight ? in / weight : weight);
+#endif
}
static const gchar *
@@ -349,16 +303,86 @@ fuzzy_to_string (fuzzy_hash_t *h)
return strbuf;
}
+static struct fuzzy_rule *
+fuzzy_rule_new (const char *default_symbol, memory_pool_t *pool)
+{
+ struct fuzzy_rule *rule;
+
+ rule = memory_pool_alloc0 (pool, sizeof (struct fuzzy_rule));
+
+ rule->mappings = g_hash_table_new (g_direct_hash, g_direct_equal);
+ rule->symbol = default_symbol;
+ memory_pool_add_destructor (pool, (pool_destruct_func)g_hash_table_unref, rule->mappings);
+ rule->read_only = TRUE;
+
+ return rule;
+}
+
+static gint
+fuzzy_parse_rule (struct config_file *cfg, ucl_object_t *obj)
+{
+ ucl_object_t *value, *cur;
+ struct fuzzy_rule *rule;
+ ucl_object_iter_t it = NULL;
+
+ if (obj->type != UCL_OBJECT) {
+ msg_err ("invalid rule definition");
+ return -1;
+ }
+
+ rule = fuzzy_rule_new (fuzzy_module_ctx->default_symbol, fuzzy_module_ctx->fuzzy_pool);
+
+ if ((value = ucl_object_find_key (obj, "mime_types")) != NULL) {
+ if (value->type == UCL_ARRAY) {
+ value = value->value.av;
+ }
+ LL_FOREACH (value, cur) {
+ rule->mime_types = g_list_concat (rule->mime_types,
+ parse_mime_types (ucl_obj_tostring (cur)));
+ }
+ }
+
+ if ((value = ucl_object_find_key (obj, "servers")) != NULL) {
+ if (value->type == UCL_ARRAY) {
+ value = value->value.av;
+ }
+ LL_FOREACH (value, cur) {
+ parse_servers_string (rule, ucl_obj_tostring (cur));
+ }
+ }
+ if ((value = ucl_object_find_key (obj, "fuzzy_map")) != NULL) {
+ while ((cur = ucl_iterate_object (value, &it, true)) != NULL) {
+ parse_flags_string (rule, cfg, cur);
+ }
+ }
+
+ if (rule->servers_num == 0) {
+ msg_err ("no servers defined for fuzzy rule with symbol: %s", rule->symbol);
+ return -1;
+ }
+ else {
+ fuzzy_module_ctx->fuzzy_rules = g_list_prepend (fuzzy_module_ctx->fuzzy_rules, rule);
+ if (rule->symbol != fuzzy_module_ctx->default_symbol) {
+ register_virtual_symbol (&cfg->cache, rule->symbol, 1.0);
+ }
+ }
+
+ if ((value = ucl_object_find_key (obj, "max_score")) != NULL) {
+ rule->max_score = ucl_obj_todouble (value);
+ }
+ if ((value = ucl_object_find_key (obj, "read_only")) != NULL) {
+ rule->read_only = ucl_obj_toboolean (value);
+ }
+
+ return 0;
+}
+
gint
fuzzy_check_module_init (struct config_file *cfg, struct module_ctx **ctx)
{
fuzzy_module_ctx = g_malloc0 (sizeof (struct fuzzy_ctx));
- fuzzy_module_ctx->filter = fuzzy_mime_filter;
fuzzy_module_ctx->fuzzy_pool = memory_pool_new (memory_pool_get_size ());
- fuzzy_module_ctx->servers = NULL;
- fuzzy_module_ctx->servers_num = 0;
- fuzzy_module_ctx->mappings = g_hash_table_new (g_direct_hash, g_direct_equal);
*ctx = (struct module_ctx *)fuzzy_module_ctx;
@@ -369,20 +393,13 @@ gint
fuzzy_check_module_config (struct config_file *cfg)
{
ucl_object_t *value, *cur;
- ucl_object_iter_t it = NULL;
- gint res = TRUE;
+ gint res = TRUE;
if ((value = get_module_opt (cfg, "fuzzy_check", "symbol")) != NULL) {
- fuzzy_module_ctx->symbol = ucl_obj_tostring (value);
+ fuzzy_module_ctx->default_symbol = ucl_obj_tostring (value);
}
else {
- fuzzy_module_ctx->symbol = DEFAULT_SYMBOL;
- }
- if ((value = get_module_opt (cfg, "fuzzy_check", "max_score")) != NULL) {
- fuzzy_module_ctx->max_score = ucl_obj_todouble (value);
- }
- else {
- fuzzy_module_ctx->max_score = 0.;
+ fuzzy_module_ctx->default_symbol = DEFAULT_SYMBOL;
}
if ((value = get_module_opt (cfg, "fuzzy_check", "min_length")) != NULL) {
@@ -415,11 +432,6 @@ fuzzy_check_module_config (struct config_file *cfg)
else {
fuzzy_module_ctx->io_timeout = DEFAULT_IO_TIMEOUT;
}
- if ((value = get_module_opt (cfg, "fuzzy_check", "mime_types")) != NULL) {
- LL_FOREACH (value, cur) {
- fuzzy_module_ctx->mime_types = parse_mime_types (ucl_obj_tostring (cur));
- }
- }
if ((value = get_module_opt (cfg, "fuzzy_check", "whitelist")) != NULL) {
fuzzy_module_ctx->whitelist = radix_tree_create ();
@@ -433,21 +445,24 @@ fuzzy_check_module_config (struct config_file *cfg)
fuzzy_module_ctx->whitelist = NULL;
}
- if ((value = get_module_opt (cfg, "fuzzy_check", "servers")) != NULL) {
+ if ((value = get_module_opt (cfg, "fuzzy_check", "rule")) != NULL) {
LL_FOREACH (value, cur) {
- parse_servers_string (ucl_obj_tostring (cur));
- }
- }
- if ((value = get_module_opt (cfg, "fuzzy_check", "fuzzy_map")) != NULL) {
- while ((cur = ucl_iterate_object (value, &it, true)) != NULL) {
- parse_flags_string (cfg, cur);
+ if (fuzzy_parse_rule (cfg, cur) == -1) {
+ return -1;
+ }
}
}
- register_symbol (&cfg->cache, fuzzy_module_ctx->symbol, fuzzy_module_ctx->max_score, fuzzy_symbol_callback, NULL);
+ if (fuzzy_module_ctx->fuzzy_rules != NULL) {
+ register_callback_symbol (&cfg->cache, fuzzy_module_ctx->default_symbol,
+ 1.0, fuzzy_symbol_callback, NULL);
- register_custom_controller_command ("fuzzy_add", fuzzy_add_handler, TRUE, TRUE);
- register_custom_controller_command ("fuzzy_del", fuzzy_delete_handler, TRUE, TRUE);
+ register_custom_controller_command ("fuzzy_add", fuzzy_add_handler, TRUE, TRUE);
+ register_custom_controller_command ("fuzzy_del", fuzzy_delete_handler, TRUE, TRUE);
+ }
+ else {
+ msg_warn ("fuzzy module is enabled but no rules are defined");
+ }
return res;
}
@@ -456,11 +471,9 @@ gint
fuzzy_check_module_reconfig (struct config_file *cfg)
{
memory_pool_delete (fuzzy_module_ctx->fuzzy_pool);
- fuzzy_module_ctx->servers = NULL;
- fuzzy_module_ctx->servers_num = 0;
+
fuzzy_module_ctx->fuzzy_pool = memory_pool_new (memory_pool_get_size ());
- g_hash_table_remove_all (fuzzy_module_ctx->mappings);
return fuzzy_check_module_config (cfg);
}
@@ -518,18 +531,18 @@ fuzzy_io_callback (gint fd, short what, void *arg)
}
*err_str = '\0';
/* Get mapping by flag */
- if ((map = g_hash_table_lookup (fuzzy_module_ctx->mappings, GINT_TO_POINTER (flag))) == NULL) {
+ if ((map = g_hash_table_lookup (session->rule->mappings, GINT_TO_POINTER (flag))) == NULL) {
/* Default symbol and default weight */
- symbol = fuzzy_module_ctx->symbol;
- nval = fuzzy_normalize (value, fuzzy_module_ctx->max_score);
+ symbol = session->rule->symbol;
+ nval = fuzzy_normalize (value, session->rule->max_score);
}
else {
/* Get symbol and weight from map */
symbol = map->symbol;
nval = fuzzy_normalize (value, map->weight);
}
- msg_info ("<%s>, found fuzzy hash '%s' with weight: %.2f, in list: %d",
- session->task->message_id, fuzzy_to_string (session->h), flag, nval);
+ msg_info ("<%s>, found fuzzy hash '%s' with weight: %.2f, in list: %s:%d",
+ session->task->message_id, fuzzy_to_string (session->h), nval, symbol, flag);
rspamd_snprintf (buf, sizeof (buf), "%d: %d / %.2f", flag, value, nval);
insert_result (session->task, symbol, nval, g_list_prepend (NULL,
memory_pool_strdup (session->task->task_pool, buf)));
@@ -575,7 +588,9 @@ fuzzy_learn_callback (gint fd, short what, void *arg)
cmd.flag = session->flag;
if (write (fd, &cmd, sizeof (struct fuzzy_cmd)) == -1) {
if (*(session->err) == NULL) {
- g_set_error (session->err, g_quark_from_static_string ("fuzzy check"), 404, "write socket error: %s", strerror (errno));
+ g_set_error (session->err,
+ g_quark_from_static_string ("fuzzy check"),
+ errno, "write socket error: %s", strerror (errno));
}
goto err;
}
@@ -587,21 +602,27 @@ fuzzy_learn_callback (gint fd, short what, void *arg)
}
else if (what == EV_READ) {
if (read (fd, buf, sizeof (buf)) == -1) {
- msg_info ("cannot add fuzzy hash for message <%s>", session->task->message_id);
+ msg_info ("cannot add fuzzy hash for message <%s> to list %s:%d", session->task->message_id,
+ session->rule->symbol, session->flag);
if (*(session->err) == NULL) {
- g_set_error (session->err, g_quark_from_static_string ("fuzzy check"), 404, "read socket error: %s", strerror (errno));
+ g_set_error (session->err,
+ g_quark_from_static_string ("fuzzy check"),
+ errno, "read socket error: %s", strerror (errno));
}
goto err;
}
else if (buf[0] == 'O' && buf[1] == 'K') {
- msg_info ("added fuzzy hash '%s' to list: %d for message <%s>",
- fuzzy_to_string (session->h), session->flag, session->task->message_id);
+ msg_info ("added fuzzy hash '%s' to list: %s:%d for message <%s>",
+ fuzzy_to_string (session->h), session->rule->symbol,
+ session->flag, session->task->message_id);
goto ok;
}
else {
- msg_info ("cannot add fuzzy hash for message <%s>", session->task->message_id);
+ msg_info ("cannot add fuzzy hash for message <%s> to list %s:%d", session->task->message_id,
+ session->rule->symbol, session->flag);
if (*(session->err) == NULL) {
- g_set_error (session->err, g_quark_from_static_string ("fuzzy check"), 500, "add fuzzy error");
+ g_set_error (session->err,
+ g_quark_from_static_string ("fuzzy check"), EINVAL, "add fuzzy error");
}
goto ok;
}
@@ -614,7 +635,8 @@ fuzzy_learn_callback (gint fd, short what, void *arg)
return;
err:
- msg_err ("got error in IO with server %s:%d, %d, %s", session->server->name, session->server->port, errno, strerror (errno));
+ msg_err ("got error in IO with server %s, %d, %s",
+ session->server->name, errno, strerror (errno));
ok:
if (--(*(session->saved)) == 0) {
session->session->state = STATE_REPLY;
@@ -648,7 +670,7 @@ ok:
}
static inline void
-register_fuzzy_call (struct worker_task *task, fuzzy_hash_t *h)
+register_fuzzy_call (struct worker_task *task, struct fuzzy_rule *rule, fuzzy_hash_t *h)
{
struct fuzzy_client_session *session;
struct storage_server *selected;
@@ -656,11 +678,12 @@ register_fuzzy_call (struct worker_task *task, fuzzy_hash_t *h)
/* Get upstream */
#ifdef HAVE_CLOCK_GETTIME
- selected = (struct storage_server *)get_upstream_by_hash (fuzzy_module_ctx->servers, fuzzy_module_ctx->servers_num,
+ selected = (struct storage_server *)get_upstream_by_hash (rule->servers, rule->servers_num,
sizeof (struct storage_server), task->ts.tv_sec,
- DEFAULT_UPSTREAM_ERROR_TIME, DEFAULT_UPSTREAM_DEAD_TIME, DEFAULT_UPSTREAM_MAXERRORS, h->hash_pipe, sizeof (h->hash_pipe));
+ DEFAULT_UPSTREAM_ERROR_TIME, DEFAULT_UPSTREAM_DEAD_TIME, DEFAULT_UPSTREAM_MAXERRORS,
+ h->hash_pipe, sizeof (h->hash_pipe));
#else
- selected = (struct storage_server *)get_upstream_by_hash (fuzzy_module_ctx->servers, fuzzy_module_ctx->servers_num,
+ selected = (struct storage_server *)get_upstream_by_hash (rule->servers, rule->servers_num,
sizeof (struct storage_server), task->tv.tv_sec,
DEFAULT_UPSTREAM_ERROR_TIME, DEFAULT_UPSTREAM_DEAD_TIME, DEFAULT_UPSTREAM_MAXERRORS, h->hash_pipe, sizeof (h->hash_pipe));
#endif
@@ -678,15 +701,15 @@ register_fuzzy_call (struct worker_task *task, fuzzy_hash_t *h)
session->task = task;
session->fd = sock;
session->server = selected;
+ session->rule = rule;
event_add (&session->ev, &session->tv);
register_async_event (task->s, fuzzy_io_fin, session, g_quark_from_static_string ("fuzzy check"));
}
}
}
-/* This callback is called when we check message via fuzzy hashes storage */
static void
-fuzzy_symbol_callback (struct worker_task *task, void *unused)
+fuzzy_check_rule (struct worker_task *task, struct fuzzy_rule *rule)
{
struct mime_text_part *part;
struct mime_part *mime_part;
@@ -696,25 +719,6 @@ fuzzy_symbol_callback (struct worker_task *task, void *unused)
GList *cur;
fuzzy_hash_t *fake_fuzzy;
-
- /* Check whitelist */
-#ifdef HAVE_INET_PTON
- if (fuzzy_module_ctx->whitelist && !task->from_addr.ipv6 && task->from_addr.d.in4.s_addr != INADDR_NONE) {
- if (radix32tree_find (fuzzy_module_ctx->whitelist, ntohl ((guint32) task->from_addr.d.in4.s_addr)) != RADIX_NO_VALUE) {
- msg_info ("<%s>, address %s is whitelisted, skip fuzzy check",
- task->message_id, inet_ntoa (task->from_addr.d.in4));
- return;
- }
- }
-#else
- if (fuzzy_module_ctx->whitelist && task->from_addr.s_addr != 0) {
- if (radix32tree_find (fuzzy_module_ctx->whitelist, ntohl ((guint32) task->from_addr.s_addr)) != RADIX_NO_VALUE) {
- msg_info ("<%s>, address %s is whitelisted, skip fuzzy check",
- task->message_id, inet_ntoa (task->from_addr));
- return;
- }
- }
-#endif
cur = task->text_parts;
while (cur) {
@@ -747,8 +751,8 @@ fuzzy_symbol_callback (struct worker_task *task, void *unused)
continue;
}
- register_fuzzy_call (task, part->fuzzy);
- register_fuzzy_call (task, part->double_fuzzy);
+ register_fuzzy_call (task, rule, part->fuzzy);
+ register_fuzzy_call (task, rule, part->double_fuzzy);
cur = g_list_next (cur);
}
@@ -763,7 +767,7 @@ fuzzy_symbol_callback (struct worker_task *task, void *unused)
/* Construct fake fuzzy hash */
fake_fuzzy = memory_pool_alloc0 (task->task_pool, sizeof (fuzzy_hash_t));
rspamd_strlcpy (fake_fuzzy->hash_pipe, checksum, sizeof (fake_fuzzy->hash_pipe));
- register_fuzzy_call (task, fake_fuzzy);
+ register_fuzzy_call (task, rule, fake_fuzzy);
g_free (checksum);
}
}
@@ -774,13 +778,14 @@ fuzzy_symbol_callback (struct worker_task *task, void *unused)
cur = task->parts;
while (cur) {
mime_part = cur->data;
- if (mime_part->content->len > 0 && fuzzy_check_content_type (mime_part->type)) {
+ if (mime_part->content->len > 0 && fuzzy_check_content_type (rule, mime_part->type)) {
if (fuzzy_module_ctx->min_bytes <= 0 || mime_part->content->len >= fuzzy_module_ctx->min_bytes) {
- checksum = g_compute_checksum_for_data (G_CHECKSUM_MD5, mime_part->content->data, mime_part->content->len);
+ checksum = g_compute_checksum_for_data (G_CHECKSUM_MD5,
+ mime_part->content->data, mime_part->content->len);
/* Construct fake fuzzy hash */
fake_fuzzy = memory_pool_alloc0 (task->task_pool, sizeof (fuzzy_hash_t));
rspamd_strlcpy (fake_fuzzy->hash_pipe, checksum, sizeof (fake_fuzzy->hash_pipe));
- register_fuzzy_call (task, fake_fuzzy);
+ register_fuzzy_call (task, rule, fake_fuzzy);
g_free (checksum);
}
}
@@ -788,41 +793,64 @@ fuzzy_symbol_callback (struct worker_task *task, void *unused)
}
}
+/* This callback is called when we check message via fuzzy hashes storage */
+static void
+fuzzy_symbol_callback (struct worker_task *task, void *unused)
+{
+ struct fuzzy_rule *rule;
+ GList *cur;
+
+ /* Check whitelist */
+#ifdef HAVE_INET_PTON
+ if (fuzzy_module_ctx->whitelist && !task->from_addr.ipv6 && task->from_addr.d.in4.s_addr != INADDR_NONE) {
+ if (radix32tree_find (fuzzy_module_ctx->whitelist, ntohl ((guint32) task->from_addr.d.in4.s_addr)) != RADIX_NO_VALUE) {
+ msg_info ("<%s>, address %s is whitelisted, skip fuzzy check",
+ task->message_id, inet_ntoa (task->from_addr.d.in4));
+ return;
+ }
+ }
+#else
+ if (fuzzy_module_ctx->whitelist && task->from_addr.s_addr != 0) {
+ if (radix32tree_find (fuzzy_module_ctx->whitelist, ntohl ((guint32) task->from_addr.s_addr)) != RADIX_NO_VALUE) {
+ msg_info ("<%s>, address %s is whitelisted, skip fuzzy check",
+ task->message_id, inet_ntoa (task->from_addr));
+ return;
+ }
+ }
+#endif
+
+ cur = fuzzy_module_ctx->fuzzy_rules;
+ while (cur) {
+ rule = cur->data;
+ fuzzy_check_rule (task, rule);
+ cur = g_list_next (cur);
+ }
+}
+
static inline gboolean
-register_fuzzy_controller_call (struct controller_session *session, struct worker_task *task, fuzzy_hash_t *h,
- gint cmd, gint value, gint flag, gint *saved, GError **err)
+register_fuzzy_controller_call (struct controller_session *session,
+ struct fuzzy_rule *rule, struct worker_task *task, fuzzy_hash_t *h,
+ gint cmd, gint value, gint flag, gint *saved, GError **err)
{
struct fuzzy_learn_session *s;
struct storage_server *selected;
- gint sock, r;
- gchar out_buf[BUFSIZ];
+ gint sock;
/* Get upstream */
#ifdef HAVE_CLOCK_GETTIME
- selected = (struct storage_server *)get_upstream_by_hash (fuzzy_module_ctx->servers, fuzzy_module_ctx->servers_num,
+ selected = (struct storage_server *)get_upstream_by_hash (rule->servers, rule->servers_num,
sizeof (struct storage_server), task->ts.tv_sec,
- DEFAULT_UPSTREAM_ERROR_TIME, DEFAULT_UPSTREAM_DEAD_TIME, DEFAULT_UPSTREAM_MAXERRORS, h->hash_pipe, sizeof (h->hash_pipe));
+ DEFAULT_UPSTREAM_ERROR_TIME, DEFAULT_UPSTREAM_DEAD_TIME, DEFAULT_UPSTREAM_MAXERRORS,
+ h->hash_pipe, sizeof (h->hash_pipe));
#else
- selected = (struct storage_server *)get_upstream_by_hash (fuzzy_module_ctx->servers, fuzzy_module_ctx->servers_num,
+ selected = (struct storage_server *)get_upstream_by_hash (rule->servers, rule->servers_num,
sizeof (struct storage_server), task->tv.tv_sec,
- DEFAULT_UPSTREAM_ERROR_TIME, DEFAULT_UPSTREAM_DEAD_TIME, DEFAULT_UPSTREAM_MAXERRORS, h->hash_pipe, sizeof (h->hash_pipe));
+ DEFAULT_UPSTREAM_ERROR_TIME, DEFAULT_UPSTREAM_DEAD_TIME, DEFAULT_UPSTREAM_MAXERRORS,
+ h->hash_pipe, sizeof (h->hash_pipe));
#endif
if (selected) {
/* Create UDP socket */
if ((sock = make_universal_socket (selected->addr, selected->port, SOCK_DGRAM, TRUE, FALSE, FALSE)) == -1) {
- msg_warn ("cannot connect to %s, %d, %s", selected->name, errno, strerror (errno));
- session->state = STATE_REPLY;
- if (session->restful) {
- r = rspamd_snprintf (out_buf, sizeof (out_buf), "HTTP/1.0 404 No hashes have been written" CRLF CRLF);
- }
- else {
- r = rspamd_snprintf (out_buf, sizeof (out_buf), "no hashes have been written" CRLF "END" CRLF);
- }
- if (! rspamd_dispatcher_write (session->dispatcher, out_buf, r, FALSE, FALSE)) {
- return FALSE;
- }
- free_task (task, FALSE);
- rspamd_dispatcher_restore (session->dispatcher);
return FALSE;
}
else {
@@ -841,28 +869,117 @@ register_fuzzy_controller_call (struct controller_session *session, struct worke
s->saved = saved;
s->fd = sock;
s->err = err;
+ s->rule = rule;
event_add (&s->ev, &s->tv);
(*saved)++;
register_async_event (session->s, fuzzy_learn_fin, s, g_quark_from_static_string ("fuzzy check"));
return TRUE;
}
}
+
return FALSE;
}
-static void
-fuzzy_process_handler (struct controller_session *session, f_str_t * in)
+static gboolean
+fuzzy_process_rule (struct controller_session *session, struct fuzzy_rule *rule,
+ struct worker_task *task, GError **err, gint cmd, gint flag, gint value, gint *saved)
{
- struct worker_task *task;
struct mime_text_part *part;
struct mime_part *mime_part;
struct rspamd_image *image;
- GList *cur;
- GError **err;
- gint r, cmd = 0, value = 0, flag = 0, *saved, *sargs;
- gchar out_buf[BUFSIZ], *checksum;
+ GList *cur;
+ gchar *checksum;
fuzzy_hash_t fake_fuzzy;
+ /* Plan new event for writing */
+ cur = task->text_parts;
+
+ while (cur) {
+ part = cur->data;
+ if (part->is_empty || part->fuzzy == NULL || part->fuzzy->hash_pipe[0] == '\0' ||
+ (fuzzy_module_ctx->min_bytes > 0 && part->content->len < fuzzy_module_ctx->min_bytes)) {
+ /* Skip empty parts */
+ cur = g_list_next (cur);
+ continue;
+ }
+ if (! register_fuzzy_controller_call (session, rule, task,
+ part->fuzzy, cmd, value, flag, saved, err)) {
+ return FALSE;
+ }
+ if (! register_fuzzy_controller_call (session, rule, task,
+ part->double_fuzzy, cmd, value, flag, saved, err)) {
+ /* Cannot write hash */
+ return FALSE;
+ }
+ cur = g_list_next (cur);
+ }
+
+ /* Process images */
+ cur = task->images;
+ while (cur) {
+ image = cur->data;
+ if (image->data->len > 0) {
+ if (fuzzy_module_ctx->min_height <= 0 || image->height >= fuzzy_module_ctx->min_height) {
+ if (fuzzy_module_ctx->min_width <= 0 || image->width >= fuzzy_module_ctx->min_width) {
+ checksum = g_compute_checksum_for_data (G_CHECKSUM_MD5, image->data->data, image->data->len);
+ /* Construct fake fuzzy hash */
+ fake_fuzzy.block_size = 0;
+ memset (fake_fuzzy.hash_pipe, 0, sizeof (fake_fuzzy.hash_pipe));
+ rspamd_strlcpy (fake_fuzzy.hash_pipe, checksum, sizeof (fake_fuzzy.hash_pipe));
+ if (! register_fuzzy_controller_call (session, rule, task,
+ &fake_fuzzy, cmd, value, flag, saved, err)) {
+ g_free (checksum);
+ return FALSE;
+ }
+
+ msg_info ("save hash of image: [%s] to list: %d", checksum, flag);
+ g_free (checksum);
+ }
+ }
+ }
+ cur = g_list_next (cur);
+ }
+ /* Process other parts */
+ cur = task->parts;
+ while (cur) {
+ mime_part = cur->data;
+ if (mime_part->content->len > 0 && fuzzy_check_content_type (rule, mime_part->type)) {
+ if (fuzzy_module_ctx->min_bytes <= 0 || mime_part->content->len >= fuzzy_module_ctx->min_bytes) {
+ checksum = g_compute_checksum_for_data (G_CHECKSUM_MD5,
+ mime_part->content->data, mime_part->content->len);
+ /* Construct fake fuzzy hash */
+ fake_fuzzy.block_size = 0;
+ memset (fake_fuzzy.hash_pipe, 0, sizeof (fake_fuzzy.hash_pipe));
+ rspamd_strlcpy (fake_fuzzy.hash_pipe, checksum, sizeof (fake_fuzzy.hash_pipe));
+ if (! register_fuzzy_controller_call (session, rule, task,
+ &fake_fuzzy, cmd, value, flag, saved, err)) {
+ return FALSE;
+ }
+ msg_info ("save hash of part of type: %s/%s: [%s] to list %d",
+ mime_part->type->type, mime_part->type->subtype,
+ checksum, flag);
+ g_free (checksum);
+ }
+ }
+ cur = g_list_next (cur);
+ }
+
+ memory_pool_add_destructor (session->session_pool, (pool_destruct_func)free_task_soft, task);
+
+ return TRUE;
+}
+
+static void
+fuzzy_process_handler (struct controller_session *session, f_str_t * in)
+{
+ struct fuzzy_rule *rule;
+ gboolean processed = FALSE, res = TRUE;
+ GList *cur;
+ struct worker_task *task;
+ GError **err;
+ gint r, cmd = 0, value = 0, flag = 0, *saved, *sargs;
+ gchar out_buf[BUFSIZ];
+
/* Extract arguments */
if (session->other_data) {
sargs = session->other_data;
@@ -870,17 +987,17 @@ fuzzy_process_handler (struct controller_session *session, f_str_t * in)
value = sargs[1];
flag = sargs[2];
}
-
+
/* Prepare task */
task = construct_task (session->worker);
session->other_data = task;
session->state = STATE_WAIT;
-
+
/* Allocate message from string */
task->msg = memory_pool_alloc (task->task_pool, sizeof (f_str_t));
task->msg->begin = in->begin;
task->msg->len = in->len;
-
+
saved = memory_pool_alloc0 (session->session_pool, sizeof (gint));
err = memory_pool_alloc0 (session->session_pool, sizeof (GError *));
@@ -901,130 +1018,29 @@ fuzzy_process_handler (struct controller_session *session, f_str_t * in)
rspamd_dispatcher_restore (session->dispatcher);
return;
}
- else {
- /* Plan new event for writing */
- cur = task->text_parts;
+ cur = fuzzy_module_ctx->fuzzy_rules;
+ while (cur && res) {
+ rule = cur->data;
- while (cur) {
- part = cur->data;
- if (part->is_empty || part->fuzzy == NULL || part->fuzzy->hash_pipe[0] == '\0' ||
- (fuzzy_module_ctx->min_bytes > 0 && part->content->len < fuzzy_module_ctx->min_bytes)) {
- /* Skip empty parts */
- cur = g_list_next (cur);
- continue;
- }
- if (! register_fuzzy_controller_call (session, task, part->fuzzy, cmd, value, flag, saved, err)) {
- /* Cannot write hash */
- session->state = STATE_REPLY;
- if (session->restful) {
- r = rspamd_snprintf (out_buf, sizeof (out_buf), "HTTP/1.0 500 Cannot write fuzzy hash" CRLF CRLF);
- }
- else {
- r = rspamd_snprintf (out_buf, sizeof (out_buf), "cannot write fuzzy hash" CRLF "END" CRLF);
- }
- if (! rspamd_dispatcher_write (session->dispatcher, out_buf, r, FALSE, FALSE)) {
- return;
- }
- rspamd_dispatcher_restore (session->dispatcher);
- free_task (task, FALSE);
- return;
- }
- if (! register_fuzzy_controller_call (session, task, part->double_fuzzy, cmd, value, flag, saved, err)) {
- /* Cannot write hash */
- session->state = STATE_REPLY;
- if (session->restful) {
- r = rspamd_snprintf (out_buf, sizeof (out_buf), "HTTP/1.0 500 Cannot write fuzzy hash" CRLF CRLF);
- }
- else {
- r = rspamd_snprintf (out_buf, sizeof (out_buf), "cannot write fuzzy hash" CRLF "END" CRLF);
- }
- if (! rspamd_dispatcher_write (session->dispatcher, out_buf, r, FALSE, FALSE)) {
- return;
- }
- free_task (task, FALSE);
- rspamd_dispatcher_restore (session->dispatcher);
- return;
- }
- cur = g_list_next (cur);
- }
- /* Process images */
- cur = task->images;
- while (cur) {
- image = cur->data;
- if (image->data->len > 0) {
- if (fuzzy_module_ctx->min_height <= 0 || image->height >= fuzzy_module_ctx->min_height) {
- if (fuzzy_module_ctx->min_width <= 0 || image->width >= fuzzy_module_ctx->min_width) {
- checksum = g_compute_checksum_for_data (G_CHECKSUM_MD5, image->data->data, image->data->len);
- /* Construct fake fuzzy hash */
- fake_fuzzy.block_size = 0;
- bzero (fake_fuzzy.hash_pipe, sizeof (fake_fuzzy.hash_pipe));
- rspamd_strlcpy (fake_fuzzy.hash_pipe, checksum, sizeof (fake_fuzzy.hash_pipe));
- if (! register_fuzzy_controller_call (session, task, &fake_fuzzy, cmd, value, flag, saved, err)) {
- /* Cannot write hash */
- session->state = STATE_REPLY;
- if (session->restful) {
- r = rspamd_snprintf (out_buf, sizeof (out_buf), "HTTP/1.0 500 Cannot write fuzzy hash" CRLF CRLF);
- }
- else {
- r = rspamd_snprintf (out_buf, sizeof (out_buf), "cannot write fuzzy hash" CRLF "END" CRLF);
- }
- g_free (checksum);
- free_task (task, FALSE);
- if (! rspamd_dispatcher_write (session->dispatcher, out_buf, r, FALSE, FALSE)) {
- return;
- }
- rspamd_dispatcher_restore (session->dispatcher);
- return;
- }
-
- msg_info ("save hash of image: [%s] to list: %d", checksum, flag);
- g_free (checksum);
- }
- }
- }
+ if (rule->read_only) {
cur = g_list_next (cur);
+ continue;
}
- /* Process other parts */
- cur = task->parts;
- while (cur) {
- mime_part = cur->data;
- if (mime_part->content->len > 0 && fuzzy_check_content_type (mime_part->type)) {
- if (fuzzy_module_ctx->min_bytes <= 0 || mime_part->content->len >= fuzzy_module_ctx->min_bytes) {
- checksum = g_compute_checksum_for_data (G_CHECKSUM_MD5, mime_part->content->data, mime_part->content->len);
- /* Construct fake fuzzy hash */
- fake_fuzzy.block_size = 0;
- bzero (fake_fuzzy.hash_pipe, sizeof (fake_fuzzy.hash_pipe));
- rspamd_strlcpy (fake_fuzzy.hash_pipe, checksum, sizeof (fake_fuzzy.hash_pipe));
- if (! register_fuzzy_controller_call (session, task, &fake_fuzzy, cmd, value, flag, saved, err)) {
- /* Cannot write hash */
- session->state = STATE_REPLY;
- if (session->restful) {
- r = rspamd_snprintf (out_buf, sizeof (out_buf), "HTTP/1.0 500 Cannot write fuzzy hash" CRLF CRLF);
- }
- else {
- r = rspamd_snprintf (out_buf, sizeof (out_buf), "cannot write fuzzy hash" CRLF "END" CRLF);
- }
- g_free (checksum);
- free_task (task, FALSE);
- if (! rspamd_dispatcher_write (session->dispatcher, out_buf, r, FALSE, FALSE)) {
- return;
- }
- rspamd_dispatcher_restore (session->dispatcher);
- return;
- }
- msg_info ("save hash of part of type: %s/%s: [%s] to list %d",
- mime_part->type->type, mime_part->type->subtype,
- checksum, flag);
- g_free (checksum);
- }
- }
+
+ /* Check for flag */
+ if (g_hash_table_lookup (rule->mappings, GINT_TO_POINTER (flag)) == NULL) {
cur = g_list_next (cur);
+ continue;
}
- }
- memory_pool_add_destructor (session->session_pool, (pool_destruct_func)free_task_soft, task);
+ processed = TRUE;
+
+ res = fuzzy_process_rule (session, rule, task, err, cmd, flag, value, saved);
+
+ cur = g_list_next (cur);
+ }
- if (*saved == 0) {
+ if (!res) {
session->state = STATE_REPLY;
if (session->restful) {
r = rspamd_snprintf (out_buf, sizeof (out_buf), "HTTP/1.0 404 No hashes have been written" CRLF CRLF);
@@ -1037,6 +1053,19 @@ fuzzy_process_handler (struct controller_session *session, f_str_t * in)
}
rspamd_dispatcher_restore (session->dispatcher);
}
+ else if (!processed) {
+ session->state = STATE_REPLY;
+ if (session->restful) {
+ r = rspamd_snprintf (out_buf, sizeof (out_buf), "HTTP/1.0 404 No fuzzy rules matched" CRLF CRLF);
+ }
+ else {
+ r = rspamd_snprintf (out_buf, sizeof (out_buf), "no fuzzy rules matched" CRLF "END" CRLF);
+ }
+ if (! rspamd_dispatcher_write (session->dispatcher, out_buf, r, FALSE, FALSE)) {
+ return;
+ }
+ rspamd_dispatcher_restore (session->dispatcher);
+ }
}
static void
@@ -1155,10 +1184,3 @@ fuzzy_delete_handler (gchar **args, struct controller_session *session)
{
fuzzy_controller_handler (args, session, FUZZY_DEL);
}
-
-static gint
-fuzzy_mime_filter (struct worker_task *task)
-{
- /* XXX: remove this */
- return 0;
-}