From 09c8e56e36b88709152929d9845d6bf8d5d443ab Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Fri, 9 Dec 2016 12:50:51 +0000 Subject: [PATCH] [Feature] Use shingles for images fuzzying --- src/plugins/fuzzy_check.c | 28 +++++++++++++--------------- test/rspamd_shingles_test.c | 12 ++++++------ 2 files changed, 19 insertions(+), 21 deletions(-) diff --git a/src/plugins/fuzzy_check.c b/src/plugins/fuzzy_check.c index f37ef45d0..1f3561706 100644 --- a/src/plugins/fuzzy_check.c +++ b/src/plugins/fuzzy_check.c @@ -1242,7 +1242,7 @@ fuzzy_cmd_from_text_part (struct fuzzy_rule *rule, msg_debug_pool ("loading shingles of type %s with key %*xs", rule->algorithm_str, 16, rule->shingles_key->str); - sh = rspamd_shingles_generate (words, + sh = rspamd_shingles_from_text (words, rule->shingles_key->str, pool, rspamd_shingles_default_filter, NULL, rule->alg); @@ -1299,9 +1299,8 @@ fuzzy_cmd_from_image_part (struct fuzzy_rule *rule, { struct rspamd_fuzzy_shingle_cmd *shcmd; struct rspamd_fuzzy_encrypted_shingle_cmd *encshcmd, *cached; - guint i; struct fuzzy_cmd_io *io; - guint64 shingles[RSPAMD_SHINGLE_SIZE]; + struct rspamd_shingle *sh; cached = fuzzy_cmd_get_cached (rule, pool, img); @@ -1318,25 +1317,23 @@ fuzzy_cmd_from_image_part (struct fuzzy_rule *rule, /* * Generate shingles */ - G_STATIC_ASSERT (G_N_ELEMENTS (img->fuzzy_sig) == RSPAMD_SHINGLE_SIZE); - - for (i = 0; i < RSPAMD_SHINGLE_SIZE; i ++) { - shingles[i] = rspamd_cryptobox_fast_hash_specific ( - RSPAMD_CRYPTOBOX_MUMHASH, - (const guchar *)&img->fuzzy_sig[i], - sizeof (img->fuzzy_sig[i]), 0); + sh = rspamd_shingles_from_image (img->dct, + rule->shingles_key->str, pool, + rspamd_shingles_default_filter, NULL, + rule->alg); + if (sh != NULL) { + memcpy (&shcmd->sgl, sh->hashes, sizeof (shcmd->sgl)); + shcmd->basic.shingles_count = RSPAMD_SHINGLE_SIZE; } + rspamd_cryptobox_hash (shcmd->basic.digest, - (const guchar *)img->fuzzy_sig, sizeof (img->fuzzy_sig), + (const guchar *)img->dct, sizeof (gdouble) * 64 * 64, rule->hash_key->str, rule->hash_key->len); msg_debug_pool ("loading shingles of type %s with key %*xs", rule->algorithm_str, 16, rule->shingles_key->str); - memcpy (&shcmd->sgl, shingles, sizeof (shcmd->sgl)); - shcmd->basic.shingles_count = RSPAMD_SHINGLE_SIZE; - /* * We always save encrypted command as it can handle both * encrypted and unencrypted requests. @@ -1605,9 +1602,10 @@ fuzzy_insert_result (struct fuzzy_client_session *session, nval *= rep->prob; msg_info_task ( "found fuzzy hash %*xs with weight: " - "%.2f, in list: %s:%d%s", + "%.2f, probability %.2f, in list: %s:%d%s", (gint)sizeof (cmd->digest), cmd->digest, nval, + (gdouble)rep->prob, symbol, rep->flag, map == NULL ? "(unknown)" : ""); diff --git a/test/rspamd_shingles_test.c b/test/rspamd_shingles_test.c index 401a26309..e1367cca4 100644 --- a/test/rspamd_shingles_test.c +++ b/test/rspamd_shingles_test.c @@ -117,11 +117,11 @@ test_case (gsize cnt, gsize max_len, gdouble perm_factor, ottery_rand_bytes (key, sizeof (key)); input = generate_fuzzy_words (cnt, max_len); ts1 = rspamd_get_virtual_ticks (); - sgl = rspamd_shingles_generate (input, key, NULL, + sgl = rspamd_shingles_from_text (input, key, NULL, rspamd_shingles_default_filter, NULL, alg); ts2 = rspamd_get_virtual_ticks (); permute_vector (input, perm_factor); - sgl_permuted = rspamd_shingles_generate (input, key, NULL, + sgl_permuted = rspamd_shingles_from_text (input, key, NULL, rspamd_shingles_default_filter, NULL, alg); res = rspamd_shingles_compare (sgl, sgl_permuted); @@ -203,28 +203,28 @@ rspamd_shingles_test_func (void) g_array_append_val (input, tok); } - sgl = rspamd_shingles_generate (input, key, NULL, + sgl = rspamd_shingles_from_text (input, key, NULL, rspamd_shingles_default_filter, NULL, RSPAMD_SHINGLES_OLD); for (i = 0; i < RSPAMD_SHINGLE_SIZE; i ++) { g_assert (sgl->hashes[i] == expected_old[i]); } g_free (sgl); - sgl = rspamd_shingles_generate (input, key, NULL, + sgl = rspamd_shingles_from_text (input, key, NULL, rspamd_shingles_default_filter, NULL, RSPAMD_SHINGLES_XXHASH); for (i = 0; i < RSPAMD_SHINGLE_SIZE; i ++) { g_assert (sgl->hashes[i] == expected_xxhash[i]); } g_free (sgl); - sgl = rspamd_shingles_generate (input, key, NULL, + sgl = rspamd_shingles_from_text (input, key, NULL, rspamd_shingles_default_filter, NULL, RSPAMD_SHINGLES_MUMHASH); for (i = 0; i < RSPAMD_SHINGLE_SIZE; i ++) { g_assert (sgl->hashes[i] == expected_mumhash[i]); } g_free (sgl); - sgl = rspamd_shingles_generate (input, key, NULL, + sgl = rspamd_shingles_from_text (input, key, NULL, rspamd_shingles_default_filter, NULL, RSPAMD_SHINGLES_FAST); for (i = 0; i < RSPAMD_SHINGLE_SIZE; i ++) { g_assert (sgl->hashes[i] == expected_fasthash[i]); -- 2.39.5