aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2015-07-14 18:48:42 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2015-07-14 18:48:42 +0100
commita21fb8ed5b1642031c2b612cac45d176e9fc00f8 (patch)
tree357b4cd6043da7bb12a0641c5f327118af1ac847
parent02b6117a397bb5cba27ca63a7e2df1c5dbfd0125 (diff)
downloadrspamd-a21fb8ed5b1642031c2b612cac45d176e9fc00f8.tar.gz
rspamd-a21fb8ed5b1642031c2b612cac45d176e9fc00f8.zip
Start removing of old fuzzy algorithm.
-rw-r--r--src/libmime/message.c2
-rw-r--r--src/libmime/mime_expressions.c22
-rw-r--r--src/libutil/fstring.c13
-rw-r--r--src/libutil/fstring.h3
-rw-r--r--src/plugins/fuzzy_check.c15
5 files changed, 31 insertions, 24 deletions
diff --git a/src/libmime/message.c b/src/libmime/message.c
index 2fcb4f7cd..b1d80f7e9 100644
--- a/src/libmime/message.c
+++ b/src/libmime/message.c
@@ -1360,7 +1360,6 @@ process_text_part (struct rspamd_task *task,
}
rspamd_url_text_extract (task->task_pool, task, text_part, TRUE);
- rspamd_fuzzy_from_text_part (text_part, task->task_pool, task->cfg->max_diff);
rspamd_mempool_add_destructor (task->task_pool,
(rspamd_mempool_destruct_t) free_byte_array_callback,
text_part->content);
@@ -1388,7 +1387,6 @@ process_text_part (struct rspamd_task *task,
text_part);
text_part->orig = part_content;
rspamd_url_text_extract (task->task_pool, task, text_part, FALSE);
- rspamd_fuzzy_from_text_part (text_part, task->task_pool, task->cfg->max_diff);
g_ptr_array_add (task->text_parts, text_part);
}
else {
diff --git a/src/libmime/mime_expressions.c b/src/libmime/mime_expressions.c
index 112c7a37f..e64aa03e0 100644
--- a/src/libmime/mime_expressions.c
+++ b/src/libmime/mime_expressions.c
@@ -1179,6 +1179,7 @@ rspamd_parts_distance (struct rspamd_task * task, GArray * args, void *unused)
struct expression_argument *arg;
GMimeObject *parent;
const GMimeContentType *ct;
+ guint tw, dw;
gint *pdiff;
if (args == NULL || args->len == 0) {
@@ -1276,18 +1277,21 @@ rspamd_parts_distance (struct rspamd_task * task, GArray * args, void *unused)
NULL);
return FALSE;
}
- if (!IS_PART_EMPTY (p1) && !IS_PART_EMPTY (p2)) {
- if (p1->diff_str != NULL && p2->diff_str != NULL) {
- diff = rspamd_diff_distance_normalized (p1->diff_str,
- p2->diff_str);
- }
- else {
- diff = rspamd_fuzzy_compare_parts (p1, p2);
- }
- debug_task (
+ if (!IS_PART_EMPTY (p1) && !IS_PART_EMPTY (p2) &&
+ p1->normalized_words && p2->normalized_words) {
+
+ tw = 0;
+ dw = 0;
+ diff = 100;
+ /* XXX: Need levenshtein distance for a set of words */
+
+ msg_debug (
+ "different words: %d, total words: %d, "
"got likeliness between parts of %d%%, threshold is %d%%",
+ dw, tw,
diff,
threshold);
+
*pdiff = diff;
rspamd_mempool_set_variable (task->task_pool,
"parts_distance",
diff --git a/src/libutil/fstring.c b/src/libutil/fstring.c
index 96c57131a..991cd3000 100644
--- a/src/libutil/fstring.c
+++ b/src/libutil/fstring.c
@@ -466,3 +466,16 @@ rspamd_fstrstrip (rspamd_fstring_t * str)
str->len = r;
}
+
+gboolean
+rspamd_fstring_equal (const rspamd_fstring_t *s1,
+ const rspamd_fstring_t *s2)
+{
+ g_assert (s1 != NULL && s2 != NULL);
+
+ if (s1->len == s2->len) {
+ return (memcmp (s1->begin, s2->begin, s1->len) == 0);
+ }
+
+ return FALSE;
+}
diff --git a/src/libutil/fstring.h b/src/libutil/fstring.h
index 3dbc2233b..27482877c 100644
--- a/src/libutil/fstring.h
+++ b/src/libutil/fstring.h
@@ -118,4 +118,7 @@ gchar * rspamd_fstr_c_str (rspamd_fstring_t *str, rspamd_mempool_t *pool);
*/
void rspamd_fstrstrip (rspamd_fstring_t *str);
+gboolean rspamd_fstring_equal (const rspamd_fstring_t *s1,
+ const rspamd_fstring_t *s2);
+
#endif
diff --git a/src/plugins/fuzzy_check.c b/src/plugins/fuzzy_check.c
index 088a31979..ec849da54 100644
--- a/src/plugins/fuzzy_check.c
+++ b/src/plugins/fuzzy_check.c
@@ -978,18 +978,15 @@ fuzzy_generate_commands (struct rspamd_task *task, struct fuzzy_rule *rule,
task->message_id, fuzzy_module_ctx->min_bytes);
continue;
}
- /* Check length of hash */
- hashlen = strlen (part->fuzzy->hash_pipe);
- if (hashlen == 0) {
+ if (part->words == NULL || part->words->len == 0) {
msg_info ("<%s>, part hash empty, skip fuzzy check",
task->message_id, fuzzy_module_ctx->min_hash_len);
continue;
}
if (fuzzy_module_ctx->min_hash_len != 0 &&
- hashlen * part->fuzzy->block_size <
- fuzzy_module_ctx->min_hash_len) {
+ part->words->len < fuzzy_module_ctx->min_hash_len) {
msg_info (
"<%s>, part hash is shorter than %d symbols, skip fuzzy check",
task->message_id,
@@ -997,14 +994,6 @@ fuzzy_generate_commands (struct rspamd_task *task, struct fuzzy_rule *rule,
continue;
}
- /*
- * Try legacy first
- */
- cmd = fuzzy_cmd_from_text_part (rule, c, flag, value, task->task_pool,
- part, TRUE, NULL);
- if (cmd) {
- g_ptr_array_add (res, cmd);
- }
cmd = fuzzy_cmd_from_text_part (rule, c, flag, value, task->task_pool,
part, FALSE, NULL);
if (cmd) {