diff options
author | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2011-07-13 19:39:37 +0400 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2011-07-13 19:39:37 +0400 |
commit | b0ddff4f0d56a877305649a14b902b3f23140b4b (patch) | |
tree | 65a2066d13b480b062b20036280d18321fb91e94 /src/fuzzy.c | |
parent | 091e84951a2b032bb2930b300ffe43eaf01a304e (diff) | |
download | rspamd-b0ddff4f0d56a877305649a14b902b3f23140b4b.tar.gz rspamd-b0ddff4f0d56a877305649a14b902b3f23140b4b.zip |
* Add new algorithm based on diff algorithm to compare relatively short text parts
Diffstat (limited to 'src/fuzzy.c')
-rw-r--r-- | src/fuzzy.c | 24 |
1 files changed, 18 insertions, 6 deletions
diff --git a/src/fuzzy.c b/src/fuzzy.c index 5a2decb34..2639d68a7 100644 --- a/src/fuzzy.c +++ b/src/fuzzy.c @@ -313,7 +313,7 @@ fuzzy_init_byte_array (GByteArray * in, memory_pool_t * pool) } void -fuzzy_init_part (struct mime_text_part *part, memory_pool_t *pool) +fuzzy_init_part (struct mime_text_part *part, memory_pool_t *pool, gsize max_diff) { fuzzy_hash_t *new, *new2; gchar *c, *end, *begin; @@ -321,7 +321,7 @@ fuzzy_init_part (struct mime_text_part *part, memory_pool_t *pool) GList *cur_offset; struct process_exception *cur_ex = NULL; gunichar uc; - GString *debug; + gboolean write_diff = FALSE; cur_offset = part->urls_offset; if (cur_offset != NULL) { @@ -371,7 +371,15 @@ fuzzy_init_part (struct mime_text_part *part, memory_pool_t *pool) } } - debug = g_string_sized_new (real_len); + write_diff = real_len < max_diff; + + if (write_diff) { + part->diff_str = fstralloc (pool, real_len); + } + else { + part->diff_str = NULL; + } + new->block_size = fuzzy_blocksize (real_len); new2->block_size = new->block_size * 2; @@ -397,7 +405,9 @@ fuzzy_init_part (struct mime_text_part *part, memory_pool_t *pool) uc = g_utf8_get_char (c); if (g_unichar_isalnum (uc)) { fuzzy_update2 (new, new2, uc); - g_string_append_unichar (debug, uc); + if (write_diff) { + fstrpush_unichar (part->diff_str, uc); + } } c = g_utf8_next_char (c); } @@ -415,13 +425,15 @@ fuzzy_init_part (struct mime_text_part *part, memory_pool_t *pool) else { if (!g_ascii_isspace (*c) && !g_ascii_ispunct (*c)) { fuzzy_update2 (new, new2, *c); - g_string_append_c (debug, *c); + if (write_diff) { + fstrpush (part->diff_str, *c); + } } c++; } } } - msg_info ("make hash of string: %v", debug); + /* Check whether we have more bytes in a rolling window */ if (new->rh != 0) { new->hash_pipe[new->hi] = b64[new->h % 64]; |