diff options
author | cebka@lenovo-laptop <cebka@lenovo-laptop> | 2010-02-08 19:23:53 +0300 |
---|---|---|
committer | cebka@lenovo-laptop <cebka@lenovo-laptop> | 2010-02-08 19:23:53 +0300 |
commit | 341a236aa614d66bd76764e3eb315b6df0688ef5 (patch) | |
tree | 8db27485dbf87bc338cf311d4b6c70070774a618 /src/fuzzy_storage.c | |
parent | 10c8ad2246130d77b19ee7036e3f0a74c47425a1 (diff) | |
download | rspamd-341a236aa614d66bd76764e3eb315b6df0688ef5.tar.gz rspamd-341a236aa614d66bd76764e3eb315b6df0688ef5.zip |
* Add ability to add weight for fuzzy hashes, this can be very useful for autolearning fuzzy storage by users
Diffstat (limited to 'src/fuzzy_storage.c')
-rw-r--r-- | src/fuzzy_storage.c | 52 |
1 files changed, 47 insertions, 5 deletions
diff --git a/src/fuzzy_storage.c b/src/fuzzy_storage.c index e05a479f6..c3a29e17f 100644 --- a/src/fuzzy_storage.c +++ b/src/fuzzy_storage.c @@ -62,6 +62,7 @@ static struct timeval tmv; static struct event tev; struct rspamd_fuzzy_node { + int32_t value; fuzzy_hash_t h; uint64_t time; }; @@ -239,7 +240,7 @@ read_hashes_file (struct rspamd_worker *wrk) return TRUE; } -static gboolean +static int process_check_command (struct fuzzy_cmd *cmd) { GList *cur; @@ -248,7 +249,7 @@ process_check_command (struct fuzzy_cmd *cmd) int prob = 0; if (!bloom_check (bf, cmd->hash)) { - return FALSE; + return 0; } memcpy (s.hash_pipe, cmd->hash, sizeof (s.hash_pipe)); @@ -260,12 +261,38 @@ process_check_command (struct fuzzy_cmd *cmd) h = cur->data; if ((prob = fuzzy_compare_hashes (&h->h, &s)) > LEV_LIMIT) { msg_info ("fuzzy hash was found, probability %d%%", prob); - return TRUE; + return h->value; } cur = g_list_next (cur); } msg_debug ("fuzzy hash was NOT found, prob is %d%%", prob); + return 0; +} + +static gboolean +update_hash (struct fuzzy_cmd *cmd) +{ + GList *cur; + struct rspamd_fuzzy_node *h; + fuzzy_hash_t s; + int prob = 0; + + memcpy (s.hash_pipe, cmd->hash, sizeof (s.hash_pipe)); + s.block_size = cmd->blocksize; + cur = hashes[cmd->blocksize % BUCKETS]->head; + + /* XXX: too slow way */ + while (cur) { + h = cur->data; + if ((prob = fuzzy_compare_hashes (&h->h, &s)) > LEV_LIMIT) { + h->value += cmd->value; + msg_info ("fuzzy hash was found, probability %d%%, set new value to %d", prob, h->value); + return TRUE; + } + cur = g_list_next (cur); + } + return FALSE; } @@ -275,7 +302,9 @@ process_write_command (struct fuzzy_cmd *cmd) struct rspamd_fuzzy_node *h; if (bloom_check (bf, cmd->hash)) { - return FALSE; + if (update_hash (cmd)) { + return TRUE; + } } h = g_malloc (sizeof (struct rspamd_fuzzy_node)); @@ -343,9 +372,22 @@ else { \ static void process_fuzzy_command (struct fuzzy_session *session) { + int r; + char buf[64]; + switch (session->cmd.cmd) { case FUZZY_CHECK: - CMD_PROCESS (check); + if ((r = process_check_command (&session->cmd))) { + r = snprintf (buf, sizeof (buf), "OK %d" CRLF, r); + if (sendto (session->fd, buf, r, 0, (struct sockaddr *)&session->sa, session->salen) == -1) { + msg_err ("error while writing reply: %s", strerror (errno)); + } + } + else { + if (sendto (session->fd, "ERR" CRLF, sizeof ("ERR" CRLF) - 1, 0, (struct sockaddr *)&session->sa, session->salen) == -1) { + msg_err ("error while writing reply: %s", strerror (errno)); + } + } break; case FUZZY_WRITE: CMD_PROCESS (write); |