summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2016-12-08 18:35:12 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2016-12-08 18:35:12 +0000
commit2c320e44e3f02bc823d8fa273e3382f00d2bea4b (patch)
tree516ae54c7fc79f63a72ec21ce45f28bc547dbaec /src
parentf449e295fd9b11acdff158a091a4086dd34e6837 (diff)
downloadrspamd-2c320e44e3f02bc823d8fa273e3382f00d2bea4b.tar.gz
rspamd-2c320e44e3f02bc823d8fa273e3382f00d2bea4b.zip
[Rework] Rework images fuzzy hashes algorithm
Diffstat (limited to 'src')
-rw-r--r--src/libmime/images.c112
-rw-r--r--src/libmime/images.h5
-rw-r--r--src/plugins/fuzzy_check.c14
3 files changed, 67 insertions, 64 deletions
diff --git a/src/libmime/images.c b/src/libmime/images.c
index e6fe8ee4b..81bdcce22 100644
--- a/src/libmime/images.c
+++ b/src/libmime/images.c
@@ -214,9 +214,7 @@ rspamd_image_normalize (struct rspamd_task *task, struct rspamd_image *img)
{
#ifdef WITH_GD
gdImagePtr src = NULL, dst = NULL;
- guint nw, nh, i, j, b = 0;
- gdouble avg, sum;
- guchar sig[rspamd_cryptobox_HASHBYTES];
+ guint nw, nh, i, j, b = 0, nmax, nmin;
if (img->data->len == 0 || img->data->len > G_MAXINT32) {
return;
@@ -258,18 +256,20 @@ rspamd_image_normalize (struct rspamd_task *task, struct rspamd_image *img)
gdImageGrayScale (dst);
gdImageDestroy (src);
- img->normalized_data = g_array_sized_new (FALSE, FALSE, sizeof (gint),
- nh * nw);
-
- avg = 0;
+ img->is_normalized = TRUE;
+ nmax = 0;
+ nmin = G_MAXUINT;
/* Calculate moving average */
for (i = 0; i < nh; i ++) {
for (j = 0; j < nw; j ++) {
- gint px = gdImageGetPixel (dst, j, i);
- avg += (px - avg) / (gdouble)(i * nh + j + 1);
-
- g_array_append_val (img->normalized_data, px);
+ guint px = (guint)gdImageGetPixel (dst, j, i);
+ if (px > nmax) {
+ nmax = px;
+ }
+ if (px < nmin) {
+ nmin = px;
+ }
}
}
@@ -279,7 +279,7 @@ rspamd_image_normalize (struct rspamd_task *task, struct rspamd_image *img)
* ****
* ****
*
- * Get sum of saturation values, and set bit if sum is > avg * 4
+ * Get sum of saturation values, and set bit if sum is > avg
* Then go further
*
* ****
@@ -287,58 +287,58 @@ rspamd_image_normalize (struct rspamd_task *task, struct rspamd_image *img)
*
* and repeat this algorithm.
*
- * So on each iteration we move by 16 pixels and calculate 2 bits of signature
- * hence, we produce ({64} / {4}) ^ 2 * 2 == 512 bits
+ * So on each iteration we move by 16 pixels and calculate 2 elements of
+ * signature
*/
for (i = 0; i < nh; i += 4) {
for (j = 0; j < nw; j += 4) {
- gint p[8];
-
- p[0] = g_array_index (img->normalized_data, gint, i * nh + j);
- p[1] = g_array_index (img->normalized_data, gint, i * nh + j + 1);
- p[2] = g_array_index (img->normalized_data, gint, i * nh + j + 2);
- p[3] = g_array_index (img->normalized_data, gint, i * nh + j + 3);
- p[4] = g_array_index (img->normalized_data, gint, (i + 1) * nh + j);
- p[5] = g_array_index (img->normalized_data, gint, (i + 1) * nh + j + 1);
- p[6] = g_array_index (img->normalized_data, gint, (i + 1) * nh + j + 2);
- p[7] = g_array_index (img->normalized_data, gint, (i + 1) * nh + j + 3);
- sum = p[0] + p[1] + p[2] + p[3] + p[4] + p[5] + p[6] + p[7];
-
- if (fabs (sum) >= fabs (avg * 8)) {
- setbit (sig, b);
- }
- else {
- clrbit (sig, b);
- }
- b ++;
-
- p[0] = g_array_index (img->normalized_data, gint, (i + 2) * nh + j);
- p[1] = g_array_index (img->normalized_data, gint, (i + 2) * nh + j + 1);
- p[2] = g_array_index (img->normalized_data, gint, (i + 2) * nh + j + 2);
- p[3] = g_array_index (img->normalized_data, gint, (i + 2) * nh + j + 3);
- p[4] = g_array_index (img->normalized_data, gint, (i + 3) * nh + j);
- p[5] = g_array_index (img->normalized_data, gint, (i + 3) * nh + j + 1);
- p[6] = g_array_index (img->normalized_data, gint, (i + 3) * nh + j + 2);
- p[7] = g_array_index (img->normalized_data, gint, (i + 3) * nh + j + 3);
-
- sum = p[0] + p[1] + p[2] + p[3] + p[4] + p[5] + p[6] + p[7];
-
- if (fabs (sum) >= fabs (avg * 8)) {
- setbit (sig, b);
- }
- else {
- clrbit (sig, b);
- }
- b ++;
+ guint p[8];
+ guint64 n = 0;
+
+ p[0] = nmax - (guint)gdImageGetPixel (dst, i, j) + nmin;
+ p[1] = nmax - (guint)gdImageGetPixel (dst, i, j + 1) + nmin;
+ p[2] = nmax - (guint)gdImageGetPixel (dst, i, j + 2) + nmin;
+ p[3] = nmax - (guint)gdImageGetPixel (dst, i, j + 3) + nmin;
+ p[4] = nmax - (guint)gdImageGetPixel (dst, i + 1, j) + nmin;
+ p[5] = nmax - (guint)gdImageGetPixel (dst, i + 1, j + 1) + nmin;
+ p[6] = nmax - (guint)gdImageGetPixel (dst, i + 1, j + 2) + nmin;
+ p[7] = nmax - (guint)gdImageGetPixel (dst, i + 1, j + 3) + nmin;
+
+ n |= ((guint64)(p[0] / (nmax - nmin) % 256)) << 0;
+ n |= ((guint64)(p[1] / (nmax - nmin) % 256)) << 8;
+ n |= ((guint64)(p[2] / (nmax - nmin) % 256)) << 16;
+ n |= ((guint64)(p[3] / (nmax - nmin) % 256)) << 24;
+ n |= ((guint64)(p[4] / (nmax - nmin) % 256)) << 32;
+ n |= ((guint64)(p[5] / (nmax - nmin) % 256)) << 40;
+ n |= ((guint64)(p[6] / (nmax - nmin) % 256)) << 48;
+ n |= ((guint64)(p[7] / (nmax - nmin) % 256)) << 56;
+ img->fuzzy_sig[b++] = n;
+
+ p[0] = nmax - (guint)gdImageGetPixel (dst, i + 2, j) + nmin;
+ p[1] = nmax - (guint)gdImageGetPixel (dst, i + 2, j + 1) + nmin;
+ p[2] = nmax - (guint)gdImageGetPixel (dst, i + 2, j + 2) + nmin;
+ p[3] = nmax - (guint)gdImageGetPixel (dst, i + 2, j + 3) + nmin;
+ p[4] = nmax - (guint)gdImageGetPixel (dst, i + 3, j) + nmin;
+ p[5] = nmax - (guint)gdImageGetPixel (dst, i + 3, j + 1) + nmin;
+ p[6] = nmax - (guint)gdImageGetPixel (dst, i + 3, j + 2) + nmin;
+ p[7] = nmax - (guint)gdImageGetPixel (dst, i + 3, j + 3) + nmin;
+
+ n |= ((guint64)(p[0] / (nmax - nmin) % 256)) << 0;
+ n |= ((guint64)(p[1] / (nmax - nmin) % 256)) << 8;
+ n |= ((guint64)(p[2] / (nmax - nmin) % 256)) << 16;
+ n |= ((guint64)(p[3] / (nmax - nmin) % 256)) << 24;
+ n |= ((guint64)(p[4] / (nmax - nmin) % 256)) << 32;
+ n |= ((guint64)(p[5] / (nmax - nmin) % 256)) << 40;
+ n |= ((guint64)(p[6] / (nmax - nmin) % 256)) << 48;
+ n |= ((guint64)(p[7] / (nmax - nmin) % 256)) << 56;
+ img->fuzzy_sig[b++] = n;
}
}
- msg_debug_task ("avg: %.0f, sig: %32xs, bits: %d", avg, sig, b);
- memcpy (img->fuzzy_sig, sig, sizeof (img->fuzzy_sig));
+ msg_debug_task ("min: %d, max: %d, sig: %32xs, elts: %d", nmin, nmax,
+ (const char *)img->fuzzy_sig, b);
gdImageDestroy (dst);
- rspamd_mempool_add_destructor (task->task_pool, rspamd_array_free_hard,
- img->normalized_data);
}
#endif
}
diff --git a/src/libmime/images.h b/src/libmime/images.h
index 1b46954e0..01d0afd22 100644
--- a/src/libmime/images.h
+++ b/src/libmime/images.h
@@ -2,7 +2,6 @@
#define IMAGES_H_
#include "config.h"
-#include "cryptobox.h"
struct html_image;
struct rspamd_task;
@@ -19,13 +18,13 @@ enum rspamd_image_type {
struct rspamd_image {
struct rspamd_mime_part *parent;
GByteArray *data;
- GArray *normalized_data;
- guchar fuzzy_sig[rspamd_cryptobox_HASHBYTES];
const gchar *filename;
struct html_image *html_image;
enum rspamd_image_type type;
guint32 width;
guint32 height;
+ gboolean is_normalized;
+ guint64 fuzzy_sig[32];
};
/*
diff --git a/src/plugins/fuzzy_check.c b/src/plugins/fuzzy_check.c
index 3213837a1..f37ef45d0 100644
--- a/src/plugins/fuzzy_check.c
+++ b/src/plugins/fuzzy_check.c
@@ -1318,12 +1318,16 @@ fuzzy_cmd_from_image_part (struct fuzzy_rule *rule,
/*
* Generate shingles
*/
- for (i = 0; i < sizeof (img->fuzzy_sig); i += 2) {
- shingles[i / 2] = rspamd_cryptobox_fast_hash_specific (
- RSPAMD_CRYPTOBOX_MUMHASH, &img->fuzzy_sig[i], 2, 0);
+ G_STATIC_ASSERT (G_N_ELEMENTS (img->fuzzy_sig) == RSPAMD_SHINGLE_SIZE);
+
+ for (i = 0; i < RSPAMD_SHINGLE_SIZE; i ++) {
+ shingles[i] = rspamd_cryptobox_fast_hash_specific (
+ RSPAMD_CRYPTOBOX_MUMHASH,
+ (const guchar *)&img->fuzzy_sig[i],
+ sizeof (img->fuzzy_sig[i]), 0);
}
rspamd_cryptobox_hash (shcmd->basic.digest,
- img->fuzzy_sig, sizeof (img->fuzzy_sig),
+ (const guchar *)img->fuzzy_sig, sizeof (img->fuzzy_sig),
rule->hash_key->str, rule->hash_key->len);
msg_debug_pool ("loading shingles of type %s with key %*xs",
@@ -2211,7 +2215,7 @@ fuzzy_generate_commands (struct rspamd_task *task, struct fuzzy_rule *rule,
g_ptr_array_add (res, io);
}
- if (image->normalized_data) {
+ if (image->is_normalized) {
io = fuzzy_cmd_from_image_part (rule, c, flag, value,
task->task_pool,
image);