]> source.dussan.org Git - rspamd.git/commitdiff
Start removing of old fuzzy algorithm.
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 14 Jul 2015 17:48:42 +0000 (18:48 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 14 Jul 2015 17:48:42 +0000 (18:48 +0100)
src/libmime/message.c
src/libmime/mime_expressions.c
src/libutil/fstring.c
src/libutil/fstring.h
src/plugins/fuzzy_check.c

index 2fcb4f7cd36567c7469770a9b9d618c91ca7bd0a..b1d80f7e9dd59a4ad9300b250f99e978fc2bb0b4 100644 (file)
@@ -1360,7 +1360,6 @@ process_text_part (struct rspamd_task *task,
                }
                rspamd_url_text_extract (task->task_pool, task, text_part, TRUE);
 
-               rspamd_fuzzy_from_text_part (text_part, task->task_pool, task->cfg->max_diff);
                rspamd_mempool_add_destructor (task->task_pool,
                        (rspamd_mempool_destruct_t) free_byte_array_callback,
                        text_part->content);
@@ -1388,7 +1387,6 @@ process_text_part (struct rspamd_task *task,
                                text_part);
                text_part->orig = part_content;
                rspamd_url_text_extract (task->task_pool, task, text_part, FALSE);
-               rspamd_fuzzy_from_text_part (text_part, task->task_pool, task->cfg->max_diff);
                g_ptr_array_add (task->text_parts, text_part);
        }
        else {
index 112c7a37f04acb50ca558ce23e2e4202d869ed17..e64aa03e0a49a2eea73f6be7ba6894bda068e64e 100644 (file)
@@ -1179,6 +1179,7 @@ rspamd_parts_distance (struct rspamd_task * task, GArray * args, void *unused)
        struct expression_argument *arg;
        GMimeObject *parent;
        const GMimeContentType *ct;
+       guint tw, dw;
        gint *pdiff;
 
        if (args == NULL || args->len == 0) {
@@ -1276,18 +1277,21 @@ rspamd_parts_distance (struct rspamd_task * task, GArray * args, void *unused)
                                NULL);
                        return FALSE;
                }
-               if (!IS_PART_EMPTY (p1) && !IS_PART_EMPTY (p2)) {
-                       if (p1->diff_str != NULL && p2->diff_str != NULL) {
-                               diff = rspamd_diff_distance_normalized (p1->diff_str,
-                                               p2->diff_str);
-                       }
-                       else {
-                               diff = rspamd_fuzzy_compare_parts (p1, p2);
-                       }
-                       debug_task (
+               if (!IS_PART_EMPTY (p1) && !IS_PART_EMPTY (p2) &&
+                               p1->normalized_words && p2->normalized_words) {
+
+                       tw = 0;
+                       dw = 0;
+                       diff = 100;
+                       /* XXX: Need levenshtein distance for a set of words */
+
+                       msg_debug (
+                               "different words: %d, total words: %d, "
                                "got likeliness between parts of %d%%, threshold is %d%%",
+                               dw, tw,
                                diff,
                                threshold);
+
                        *pdiff = diff;
                        rspamd_mempool_set_variable (task->task_pool,
                                "parts_distance",
index 96c57131aee320e84cab6ce52a1eb063881ce9cf..991cd30001d3fd41099d93149ed5271e26330207 100644 (file)
@@ -466,3 +466,16 @@ rspamd_fstrstrip (rspamd_fstring_t * str)
 
        str->len = r;
 }
+
+gboolean
+rspamd_fstring_equal (const rspamd_fstring_t *s1,
+               const rspamd_fstring_t *s2)
+{
+       g_assert (s1 != NULL && s2 != NULL);
+
+       if (s1->len == s2->len) {
+               return (memcmp (s1->begin, s2->begin, s1->len) == 0);
+       }
+
+       return FALSE;
+}
index 3dbc2233b872133f7d1807c27439ec4263303207..27482877c91dc224d2cd62274849994e90a1a24b 100644 (file)
@@ -118,4 +118,7 @@ gchar * rspamd_fstr_c_str (rspamd_fstring_t *str, rspamd_mempool_t *pool);
  */
 void rspamd_fstrstrip (rspamd_fstring_t *str);
 
+gboolean rspamd_fstring_equal (const rspamd_fstring_t *s1,
+               const rspamd_fstring_t *s2);
+
 #endif
index 088a3197908f69d2f5695efad9bcc14042ecd32d..ec849da54c6ff341e60a0ea0087eacbc273ea536 100644 (file)
@@ -978,18 +978,15 @@ fuzzy_generate_commands (struct rspamd_task *task, struct fuzzy_rule *rule,
                                task->message_id, fuzzy_module_ctx->min_bytes);
                        continue;
                }
-               /* Check length of hash */
-               hashlen = strlen (part->fuzzy->hash_pipe);
 
-               if (hashlen == 0) {
+               if (part->words == NULL || part->words->len == 0) {
                        msg_info ("<%s>, part hash empty, skip fuzzy check",
                                task->message_id, fuzzy_module_ctx->min_hash_len);
                        continue;
                }
 
                if (fuzzy_module_ctx->min_hash_len != 0 &&
-                       hashlen * part->fuzzy->block_size <
-                       fuzzy_module_ctx->min_hash_len) {
+                       part->words->len < fuzzy_module_ctx->min_hash_len) {
                        msg_info (
                                "<%s>, part hash is shorter than %d symbols, skip fuzzy check",
                                task->message_id,
@@ -997,14 +994,6 @@ fuzzy_generate_commands (struct rspamd_task *task, struct fuzzy_rule *rule,
                        continue;
                }
 
-               /*
-                * Try legacy first
-                */
-               cmd = fuzzy_cmd_from_text_part (rule, c, flag, value, task->task_pool,
-                               part, TRUE, NULL);
-               if (cmd) {
-                       g_ptr_array_add (res, cmd);
-               }
                cmd = fuzzy_cmd_from_text_part (rule, c, flag, value, task->task_pool,
                                part, FALSE, NULL);
                if (cmd) {