summaryrefslogtreecommitdiffstats
path: root/src/fuzzy.c
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rambler-co.ru>2011-07-13 19:39:37 +0400
committerVsevolod Stakhov <vsevolod@rambler-co.ru>2011-07-13 19:39:37 +0400
commitb0ddff4f0d56a877305649a14b902b3f23140b4b (patch)
tree65a2066d13b480b062b20036280d18321fb91e94 /src/fuzzy.c
parent091e84951a2b032bb2930b300ffe43eaf01a304e (diff)
downloadrspamd-b0ddff4f0d56a877305649a14b902b3f23140b4b.tar.gz
rspamd-b0ddff4f0d56a877305649a14b902b3f23140b4b.zip
* Add new algorithm based on diff algorithm to compare relatively short text parts
Diffstat (limited to 'src/fuzzy.c')
-rw-r--r--src/fuzzy.c24
1 files changed, 18 insertions, 6 deletions
diff --git a/src/fuzzy.c b/src/fuzzy.c
index 5a2decb34..2639d68a7 100644
--- a/src/fuzzy.c
+++ b/src/fuzzy.c
@@ -313,7 +313,7 @@ fuzzy_init_byte_array (GByteArray * in, memory_pool_t * pool)
}
void
-fuzzy_init_part (struct mime_text_part *part, memory_pool_t *pool)
+fuzzy_init_part (struct mime_text_part *part, memory_pool_t *pool, gsize max_diff)
{
fuzzy_hash_t *new, *new2;
gchar *c, *end, *begin;
@@ -321,7 +321,7 @@ fuzzy_init_part (struct mime_text_part *part, memory_pool_t *pool)
GList *cur_offset;
struct process_exception *cur_ex = NULL;
gunichar uc;
- GString *debug;
+ gboolean write_diff = FALSE;
cur_offset = part->urls_offset;
if (cur_offset != NULL) {
@@ -371,7 +371,15 @@ fuzzy_init_part (struct mime_text_part *part, memory_pool_t *pool)
}
}
- debug = g_string_sized_new (real_len);
+ write_diff = real_len < max_diff;
+
+ if (write_diff) {
+ part->diff_str = fstralloc (pool, real_len);
+ }
+ else {
+ part->diff_str = NULL;
+ }
+
new->block_size = fuzzy_blocksize (real_len);
new2->block_size = new->block_size * 2;
@@ -397,7 +405,9 @@ fuzzy_init_part (struct mime_text_part *part, memory_pool_t *pool)
uc = g_utf8_get_char (c);
if (g_unichar_isalnum (uc)) {
fuzzy_update2 (new, new2, uc);
- g_string_append_unichar (debug, uc);
+ if (write_diff) {
+ fstrpush_unichar (part->diff_str, uc);
+ }
}
c = g_utf8_next_char (c);
}
@@ -415,13 +425,15 @@ fuzzy_init_part (struct mime_text_part *part, memory_pool_t *pool)
else {
if (!g_ascii_isspace (*c) && !g_ascii_ispunct (*c)) {
fuzzy_update2 (new, new2, *c);
- g_string_append_c (debug, *c);
+ if (write_diff) {
+ fstrpush (part->diff_str, *c);
+ }
}
c++;
}
}
}
- msg_info ("make hash of string: %v", debug);
+
/* Check whether we have more bytes in a rolling window */
if (new->rh != 0) {
new->hash_pipe[new->hi] = b64[new->h % 64];