aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2016-12-09 12:50:51 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2016-12-09 12:51:08 +0000
commit09c8e56e36b88709152929d9845d6bf8d5d443ab (patch)
tree41362367d16cb5dc26bcd92dfdfbd92b3f291dfa
parent4fd2f26b9f0985a64122187313cec5ae4ed35c78 (diff)
downloadrspamd-09c8e56e36b88709152929d9845d6bf8d5d443ab.tar.gz
rspamd-09c8e56e36b88709152929d9845d6bf8d5d443ab.zip
[Feature] Use shingles for images fuzzying
-rw-r--r--src/plugins/fuzzy_check.c28
-rw-r--r--test/rspamd_shingles_test.c12
2 files changed, 19 insertions, 21 deletions
diff --git a/src/plugins/fuzzy_check.c b/src/plugins/fuzzy_check.c
index f37ef45d0..1f3561706 100644
--- a/src/plugins/fuzzy_check.c
+++ b/src/plugins/fuzzy_check.c
@@ -1242,7 +1242,7 @@ fuzzy_cmd_from_text_part (struct fuzzy_rule *rule,
msg_debug_pool ("loading shingles of type %s with key %*xs",
rule->algorithm_str,
16, rule->shingles_key->str);
- sh = rspamd_shingles_generate (words,
+ sh = rspamd_shingles_from_text (words,
rule->shingles_key->str, pool,
rspamd_shingles_default_filter, NULL,
rule->alg);
@@ -1299,9 +1299,8 @@ fuzzy_cmd_from_image_part (struct fuzzy_rule *rule,
{
struct rspamd_fuzzy_shingle_cmd *shcmd;
struct rspamd_fuzzy_encrypted_shingle_cmd *encshcmd, *cached;
- guint i;
struct fuzzy_cmd_io *io;
- guint64 shingles[RSPAMD_SHINGLE_SIZE];
+ struct rspamd_shingle *sh;
cached = fuzzy_cmd_get_cached (rule, pool, img);
@@ -1318,25 +1317,23 @@ fuzzy_cmd_from_image_part (struct fuzzy_rule *rule,
/*
* Generate shingles
*/
- G_STATIC_ASSERT (G_N_ELEMENTS (img->fuzzy_sig) == RSPAMD_SHINGLE_SIZE);
-
- for (i = 0; i < RSPAMD_SHINGLE_SIZE; i ++) {
- shingles[i] = rspamd_cryptobox_fast_hash_specific (
- RSPAMD_CRYPTOBOX_MUMHASH,
- (const guchar *)&img->fuzzy_sig[i],
- sizeof (img->fuzzy_sig[i]), 0);
+ sh = rspamd_shingles_from_image (img->dct,
+ rule->shingles_key->str, pool,
+ rspamd_shingles_default_filter, NULL,
+ rule->alg);
+ if (sh != NULL) {
+ memcpy (&shcmd->sgl, sh->hashes, sizeof (shcmd->sgl));
+ shcmd->basic.shingles_count = RSPAMD_SHINGLE_SIZE;
}
+
rspamd_cryptobox_hash (shcmd->basic.digest,
- (const guchar *)img->fuzzy_sig, sizeof (img->fuzzy_sig),
+ (const guchar *)img->dct, sizeof (gdouble) * 64 * 64,
rule->hash_key->str, rule->hash_key->len);
msg_debug_pool ("loading shingles of type %s with key %*xs",
rule->algorithm_str,
16, rule->shingles_key->str);
- memcpy (&shcmd->sgl, shingles, sizeof (shcmd->sgl));
- shcmd->basic.shingles_count = RSPAMD_SHINGLE_SIZE;
-
/*
* We always save encrypted command as it can handle both
* encrypted and unencrypted requests.
@@ -1605,9 +1602,10 @@ fuzzy_insert_result (struct fuzzy_client_session *session,
nval *= rep->prob;
msg_info_task (
"found fuzzy hash %*xs with weight: "
- "%.2f, in list: %s:%d%s",
+ "%.2f, probability %.2f, in list: %s:%d%s",
(gint)sizeof (cmd->digest), cmd->digest,
nval,
+ (gdouble)rep->prob,
symbol,
rep->flag,
map == NULL ? "(unknown)" : "");
diff --git a/test/rspamd_shingles_test.c b/test/rspamd_shingles_test.c
index 401a26309..e1367cca4 100644
--- a/test/rspamd_shingles_test.c
+++ b/test/rspamd_shingles_test.c
@@ -117,11 +117,11 @@ test_case (gsize cnt, gsize max_len, gdouble perm_factor,
ottery_rand_bytes (key, sizeof (key));
input = generate_fuzzy_words (cnt, max_len);
ts1 = rspamd_get_virtual_ticks ();
- sgl = rspamd_shingles_generate (input, key, NULL,
+ sgl = rspamd_shingles_from_text (input, key, NULL,
rspamd_shingles_default_filter, NULL, alg);
ts2 = rspamd_get_virtual_ticks ();
permute_vector (input, perm_factor);
- sgl_permuted = rspamd_shingles_generate (input, key, NULL,
+ sgl_permuted = rspamd_shingles_from_text (input, key, NULL,
rspamd_shingles_default_filter, NULL, alg);
res = rspamd_shingles_compare (sgl, sgl_permuted);
@@ -203,28 +203,28 @@ rspamd_shingles_test_func (void)
g_array_append_val (input, tok);
}
- sgl = rspamd_shingles_generate (input, key, NULL,
+ sgl = rspamd_shingles_from_text (input, key, NULL,
rspamd_shingles_default_filter, NULL, RSPAMD_SHINGLES_OLD);
for (i = 0; i < RSPAMD_SHINGLE_SIZE; i ++) {
g_assert (sgl->hashes[i] == expected_old[i]);
}
g_free (sgl);
- sgl = rspamd_shingles_generate (input, key, NULL,
+ sgl = rspamd_shingles_from_text (input, key, NULL,
rspamd_shingles_default_filter, NULL, RSPAMD_SHINGLES_XXHASH);
for (i = 0; i < RSPAMD_SHINGLE_SIZE; i ++) {
g_assert (sgl->hashes[i] == expected_xxhash[i]);
}
g_free (sgl);
- sgl = rspamd_shingles_generate (input, key, NULL,
+ sgl = rspamd_shingles_from_text (input, key, NULL,
rspamd_shingles_default_filter, NULL, RSPAMD_SHINGLES_MUMHASH);
for (i = 0; i < RSPAMD_SHINGLE_SIZE; i ++) {
g_assert (sgl->hashes[i] == expected_mumhash[i]);
}
g_free (sgl);
- sgl = rspamd_shingles_generate (input, key, NULL,
+ sgl = rspamd_shingles_from_text (input, key, NULL,
rspamd_shingles_default_filter, NULL, RSPAMD_SHINGLES_FAST);
for (i = 0; i < RSPAMD_SHINGLE_SIZE; i ++) {
g_assert (sgl->hashes[i] == expected_fasthash[i]);