From a21fb8ed5b1642031c2b612cac45d176e9fc00f8 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Tue, 14 Jul 2015 18:48:42 +0100 Subject: [PATCH] Start removing of old fuzzy algorithm. --- src/libmime/message.c | 2 -- src/libmime/mime_expressions.c | 22 +++++++++++++--------- src/libutil/fstring.c | 13 +++++++++++++ src/libutil/fstring.h | 3 +++ src/plugins/fuzzy_check.c | 15 ++------------- 5 files changed, 31 insertions(+), 24 deletions(-) diff --git a/src/libmime/message.c b/src/libmime/message.c index 2fcb4f7cd..b1d80f7e9 100644 --- a/src/libmime/message.c +++ b/src/libmime/message.c @@ -1360,7 +1360,6 @@ process_text_part (struct rspamd_task *task, } rspamd_url_text_extract (task->task_pool, task, text_part, TRUE); - rspamd_fuzzy_from_text_part (text_part, task->task_pool, task->cfg->max_diff); rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) free_byte_array_callback, text_part->content); @@ -1388,7 +1387,6 @@ process_text_part (struct rspamd_task *task, text_part); text_part->orig = part_content; rspamd_url_text_extract (task->task_pool, task, text_part, FALSE); - rspamd_fuzzy_from_text_part (text_part, task->task_pool, task->cfg->max_diff); g_ptr_array_add (task->text_parts, text_part); } else { diff --git a/src/libmime/mime_expressions.c b/src/libmime/mime_expressions.c index 112c7a37f..e64aa03e0 100644 --- a/src/libmime/mime_expressions.c +++ b/src/libmime/mime_expressions.c @@ -1179,6 +1179,7 @@ rspamd_parts_distance (struct rspamd_task * task, GArray * args, void *unused) struct expression_argument *arg; GMimeObject *parent; const GMimeContentType *ct; + guint tw, dw; gint *pdiff; if (args == NULL || args->len == 0) { @@ -1276,18 +1277,21 @@ rspamd_parts_distance (struct rspamd_task * task, GArray * args, void *unused) NULL); return FALSE; } - if (!IS_PART_EMPTY (p1) && !IS_PART_EMPTY (p2)) { - if (p1->diff_str != NULL && p2->diff_str != NULL) { - diff = rspamd_diff_distance_normalized (p1->diff_str, - p2->diff_str); - } - else { - diff = rspamd_fuzzy_compare_parts (p1, p2); - } - debug_task ( + if (!IS_PART_EMPTY (p1) && !IS_PART_EMPTY (p2) && + p1->normalized_words && p2->normalized_words) { + + tw = 0; + dw = 0; + diff = 100; + /* XXX: Need levenshtein distance for a set of words */ + + msg_debug ( + "different words: %d, total words: %d, " "got likeliness between parts of %d%%, threshold is %d%%", + dw, tw, diff, threshold); + *pdiff = diff; rspamd_mempool_set_variable (task->task_pool, "parts_distance", diff --git a/src/libutil/fstring.c b/src/libutil/fstring.c index 96c57131a..991cd3000 100644 --- a/src/libutil/fstring.c +++ b/src/libutil/fstring.c @@ -466,3 +466,16 @@ rspamd_fstrstrip (rspamd_fstring_t * str) str->len = r; } + +gboolean +rspamd_fstring_equal (const rspamd_fstring_t *s1, + const rspamd_fstring_t *s2) +{ + g_assert (s1 != NULL && s2 != NULL); + + if (s1->len == s2->len) { + return (memcmp (s1->begin, s2->begin, s1->len) == 0); + } + + return FALSE; +} diff --git a/src/libutil/fstring.h b/src/libutil/fstring.h index 3dbc2233b..27482877c 100644 --- a/src/libutil/fstring.h +++ b/src/libutil/fstring.h @@ -118,4 +118,7 @@ gchar * rspamd_fstr_c_str (rspamd_fstring_t *str, rspamd_mempool_t *pool); */ void rspamd_fstrstrip (rspamd_fstring_t *str); +gboolean rspamd_fstring_equal (const rspamd_fstring_t *s1, + const rspamd_fstring_t *s2); + #endif diff --git a/src/plugins/fuzzy_check.c b/src/plugins/fuzzy_check.c index 088a31979..ec849da54 100644 --- a/src/plugins/fuzzy_check.c +++ b/src/plugins/fuzzy_check.c @@ -978,18 +978,15 @@ fuzzy_generate_commands (struct rspamd_task *task, struct fuzzy_rule *rule, task->message_id, fuzzy_module_ctx->min_bytes); continue; } - /* Check length of hash */ - hashlen = strlen (part->fuzzy->hash_pipe); - if (hashlen == 0) { + if (part->words == NULL || part->words->len == 0) { msg_info ("<%s>, part hash empty, skip fuzzy check", task->message_id, fuzzy_module_ctx->min_hash_len); continue; } if (fuzzy_module_ctx->min_hash_len != 0 && - hashlen * part->fuzzy->block_size < - fuzzy_module_ctx->min_hash_len) { + part->words->len < fuzzy_module_ctx->min_hash_len) { msg_info ( "<%s>, part hash is shorter than %d symbols, skip fuzzy check", task->message_id, @@ -997,14 +994,6 @@ fuzzy_generate_commands (struct rspamd_task *task, struct fuzzy_rule *rule, continue; } - /* - * Try legacy first - */ - cmd = fuzzy_cmd_from_text_part (rule, c, flag, value, task->task_pool, - part, TRUE, NULL); - if (cmd) { - g_ptr_array_add (res, cmd); - } cmd = fuzzy_cmd_from_text_part (rule, c, flag, value, task->task_pool, part, FALSE, NULL); if (cmd) { -- 2.39.5