From f2daba8fdb6045ed4be8a5458b8450a73363e725 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Mon, 23 May 2016 18:58:12 +0100 Subject: [PATCH] [Fix] More fixes to multi-flag fuzzy storage --- src/fuzzy_storage.c | 2 +- src/libserver/fuzzy_backend.c | 22 +++--- src/plugins/fuzzy_check.c | 126 ++++++++++++++++++++-------------- 3 files changed, 85 insertions(+), 65 deletions(-) diff --git a/src/fuzzy_storage.c b/src/fuzzy_storage.c index a10fd145d..bd888fd3c 100644 --- a/src/fuzzy_storage.c +++ b/src/fuzzy_storage.c @@ -701,7 +701,7 @@ reply: flag = 0; } - result.flag = flag; + result.flag = flag + 1; } memcpy (&session->reply.rep, &result, sizeof (result)); diff --git a/src/libserver/fuzzy_backend.c b/src/libserver/fuzzy_backend.c index fa5518c2d..3f2bbafab 100644 --- a/src/libserver/fuzzy_backend.c +++ b/src/libserver/fuzzy_backend.c @@ -528,8 +528,8 @@ rspamd_fuzzy_backend_check (struct rspamd_fuzzy_backend *backend, rep.flag = sqlite3_column_int ( prepared_stmts[RSPAMD_FUZZY_BACKEND_CHECK].stmt, 2); - if (!(rep.flag & flags_mask)) { - rep.flag = (1U << rep.flag) | flags_mask; + if (!(rep.flag & flags_mask) && rep.flag > 0) { + rep.flag = (1U << (rep.flag - 1)) | flags_mask; } } } @@ -615,8 +615,8 @@ rspamd_fuzzy_backend_check (struct rspamd_fuzzy_backend *backend, prepared_stmts[RSPAMD_FUZZY_BACKEND_GET_DIGEST_BY_ID].stmt, 3); - if (!(rep.flag & flags_mask)) { - rep.flag = (1U << rep.flag) | flags_mask; + if (!(rep.flag & flags_mask) && rep.flag > 0) { + rep.flag = (1U << (rep.flag - 1)) | flags_mask; } } } @@ -673,7 +673,7 @@ rspamd_fuzzy_backend_add (struct rspamd_fuzzy_backend *backend, return FALSE; } - if (cmd->flag > 31) { + if (cmd->flag > 31 || cmd->flag == 0) { msg_err_fuzzy_backend ("flag more than 31 is no longer supported"); return FALSE; } @@ -689,7 +689,7 @@ rspamd_fuzzy_backend_add (struct rspamd_fuzzy_backend *backend, 2); rspamd_fuzzy_backend_cleanup_stmt (backend, RSPAMD_FUZZY_BACKEND_CHECK); - if ((flag & cmd->flag) == cmd->flag) { + if (flag & (1U << (cmd->flag - 1))) { /* We need to increase weight */ rc = rspamd_fuzzy_backend_run_stmt (backend, TRUE, RSPAMD_FUZZY_BACKEND_UPDATE, @@ -706,24 +706,24 @@ rspamd_fuzzy_backend_add (struct rspamd_fuzzy_backend *backend, /* We need to relearn actually */ if (flag & flags_mask) { /* This is already new format */ - flag |= cmd->flag; + flag |= (1U << (cmd->flag - 1)); } else { /* Convert to the new format */ - if (flag > 31) { + if (flag > 31 || flag == 0) { msg_warn_fuzzy_backend ("storage had flag more than 31, remove " "it"); flag = cmd->flag | flags_mask; } else { - flag = (1U << flag) | cmd->flag | flags_mask; + flag = (1U << (flag - 1)) | (1U << (cmd->flag - 1)) | flags_mask; } } rc = rspamd_fuzzy_backend_run_stmt (backend, TRUE, RSPAMD_FUZZY_BACKEND_UPDATE_FLAG, (gint64) cmd->value, - (gint64) cmd->flag, + (gint64) flag, cmd->digest); if (rc != SQLITE_OK) { @@ -738,7 +738,7 @@ rspamd_fuzzy_backend_add (struct rspamd_fuzzy_backend *backend, rspamd_fuzzy_backend_cleanup_stmt (backend, RSPAMD_FUZZY_BACKEND_CHECK); rc = rspamd_fuzzy_backend_run_stmt (backend, FALSE, RSPAMD_FUZZY_BACKEND_INSERT, - (gint) (1U << cmd->flag), + (gint) (1U << (cmd->flag - 1)), cmd->digest, (gint64) cmd->value, (gint64) timestamp); diff --git a/src/plugins/fuzzy_check.c b/src/plugins/fuzzy_check.c index c11ab40bf..721f1257a 100644 --- a/src/plugins/fuzzy_check.c +++ b/src/plugins/fuzzy_check.c @@ -1397,6 +1397,66 @@ fuzzy_process_reply (guchar **pos, gint *r, GPtrArray *req, return NULL; } +static void +fuzzy_insert_result (struct fuzzy_client_session *session, + const struct rspamd_fuzzy_reply *rep, + struct rspamd_fuzzy_cmd *cmd, guint flag) +{ + const gchar *symbol; + struct fuzzy_mapping *map; + struct rspamd_task *task = session->task; + double nval; + guchar buf[2048]; + + /* Get mapping by flag */ + if ((map = + g_hash_table_lookup (session->rule->mappings, + GINT_TO_POINTER (rep->flag))) == NULL) { + /* Default symbol and default weight */ + symbol = session->rule->symbol; + + } + else { + /* Get symbol and weight from map */ + symbol = map->symbol; + } + + + /* + * Hash is assumed to be found if probability is more than 0.5 + * In that case `value` means number of matches + * Otherwise `value` means error code + */ + + nval = fuzzy_normalize (rep->value, + session->rule->max_score); + nval *= rep->prob; + msg_info_task ( + "found fuzzy hash %*xs with weight: " + "%.2f, in list: %s:%d%s", + rspamd_fuzzy_hash_len, cmd->digest, + nval, + symbol, + rep->flag, + map == NULL ? "(unknown)" : ""); + if (map != NULL || !session->rule->skip_unknown) { + rspamd_snprintf (buf, + sizeof (buf), + "%d:%*xs:%.2f", + rep->flag, + rspamd_fuzzy_hash_len, cmd->digest, + rep->prob, + nval); + rspamd_task_insert_result_single (session->task, + symbol, + nval, + g_list_prepend (NULL, + rspamd_mempool_strdup ( + session->task->task_pool, + buf))); + } +} + /* Fuzzy check callback */ static void fuzzy_check_io_callback (gint fd, short what, void *arg) @@ -1404,14 +1464,12 @@ fuzzy_check_io_callback (gint fd, short what, void *arg) struct fuzzy_client_session *session = arg; const struct rspamd_fuzzy_reply *rep; struct rspamd_task *task; - struct fuzzy_mapping *map; guchar buf[2048], *p; - const gchar *symbol; struct fuzzy_cmd_io *io; struct rspamd_fuzzy_cmd *cmd = NULL; guint i; gint r; - double nval; + enum { return_error = 0, return_want_more, @@ -1434,68 +1492,30 @@ fuzzy_check_io_callback (gint fd, short what, void *arg) while ((rep = fuzzy_process_reply (&p, &r, session->commands, session->rule, &cmd)) != NULL) { - /* Get mapping by flag */ - if ((map = - g_hash_table_lookup (session->rule->mappings, - GINT_TO_POINTER (rep->flag))) == NULL) { - /* Default symbol and default weight */ - symbol = session->rule->symbol; - - } - else { - /* Get symbol and weight from map */ - symbol = map->symbol; - } - - - /* - * Hash is assumed to be found if probability is more than 0.5 - * In that case `value` means number of matches - * Otherwise `value` means error code - */ if (rep->prob > 0.5) { - nval = fuzzy_normalize (rep->value, - session->rule->max_score); - nval *= rep->prob; - msg_info_task ( - "found fuzzy hash %*xs with weight: " - "%.2f, in list: %s:%d%s", - rspamd_fuzzy_hash_len, cmd->digest, - nval, - symbol, - rep->flag, - map == NULL ? "(unknown)" : ""); - if (map != NULL || !session->rule->skip_unknown) { - rspamd_snprintf (buf, - sizeof (buf), - "%d:%*xs:%.2f", - rep->flag, - rspamd_fuzzy_hash_len, cmd->digest, - rep->prob, - nval); - rspamd_task_insert_result_single (session->task, - symbol, - nval, - g_list_prepend (NULL, - rspamd_mempool_strdup ( - session->task->task_pool, - buf))); + if (rep->flag & (1U << 31)) { + /* Multi-flag */ + for (i = 0; i < 31; i ++) { + if ((1U << i) & rep->flag) { + fuzzy_insert_result (session, rep, cmd, i + 1); + } + } + } + else { + fuzzy_insert_result (session, rep, cmd, rep->flag); } } else if (rep->value == 403) { msg_info_task ( - "fuzzy check error for %s(%d): forbidden", - symbol, + "fuzzy check error for %d: forbidden", rep->flag); } else if (rep->value != 0) { msg_info_task ( - "fuzzy check error for %s(%d): unknown error (%d)", - symbol, + "fuzzy check error for %d: unknown error (%d)", rep->flag, rep->value); } - /* Not found */ ret = return_finished; } -- 2.39.5