aboutsummaryrefslogtreecommitdiffstats
path: root/src/libmime/images.c
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2016-12-08 17:17:08 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2016-12-08 17:47:44 +0000
commitc245ec1df1d9e53dd0f4f28239688d40bc3d9d28 (patch)
treebe7cb1da09a371b60ae2d8e2a3060c81fafcb02e /src/libmime/images.c
parent0b2fdd92b52b4fa37437ad03e7dfb82d796930a9 (diff)
downloadrspamd-c245ec1df1d9e53dd0f4f28239688d40bc3d9d28.tar.gz
rspamd-c245ec1df1d9e53dd0f4f28239688d40bc3d9d28.zip
[Feature] Implement new algorithm for fuzzy hashes of images
Diffstat (limited to 'src/libmime/images.c')
-rw-r--r--src/libmime/images.c89
1 files changed, 74 insertions, 15 deletions
diff --git a/src/libmime/images.c b/src/libmime/images.c
index a65c580bb..57528fb54 100644
--- a/src/libmime/images.c
+++ b/src/libmime/images.c
@@ -21,8 +21,9 @@
#ifdef WITH_GD
#include "gd.h"
+#include <math.h>
-#define RSPAMD_NORMALIZED_DIM 64
+#define RSPAMD_NORMALIZED_DIM rspamd_cryptobox_HASHBYTES / 8
#endif
static const guint8 png_signature[] = {137, 80, 78, 71, 13, 10, 26, 10};
@@ -213,7 +214,9 @@ rspamd_image_normalize (struct rspamd_task *task, struct rspamd_image *img)
{
#ifdef WITH_GD
gdImagePtr src = NULL, dst = NULL;
- guint nw, nh, i, j;
+ guint nw, nh, i, j, b = 0;
+ gdouble avg, sum;
+ guchar sig[rspamd_cryptobox_HASHBYTES];
if (img->data->len == 0 || img->data->len > G_MAXINT32) {
return;
@@ -247,35 +250,91 @@ rspamd_image_normalize (struct rspamd_task *task, struct rspamd_image *img)
}
else {
gdImageSetInterpolationMethod (src, GD_BILINEAR_FIXED);
- nw = img->width;
- nh = img->height;
- if (nh > RSPAMD_NORMALIZED_DIM) {
- nw = nw * RSPAMD_NORMALIZED_DIM / nh;
- nw = nw ? nw : 1;
- nh = RSPAMD_NORMALIZED_DIM;
- }
-
- if (nw > RSPAMD_NORMALIZED_DIM) {
- nh = nh * RSPAMD_NORMALIZED_DIM / nw;
- nh = nh ? nh : 1;
- nw = RSPAMD_NORMALIZED_DIM;
- }
+ nw = RSPAMD_NORMALIZED_DIM;
+ nh = RSPAMD_NORMALIZED_DIM;
dst = gdImageScale (src, nw, nh);
+ gdImageGrayScale (dst);
gdImageDestroy (src);
img->normalized_data = g_array_sized_new (FALSE, FALSE, sizeof (gint),
nh * nw);
+ avg = 0;
+
+ /* Calculate moving average */
for (i = 0; i < nh; i ++) {
for (j = 0; j < nw; j ++) {
gint px = gdImageGetPixel (dst, j, i);
+ avg += (px - avg) / (gdouble)(i * nh + j + 1);
g_array_append_val (img->normalized_data, px);
}
}
+ /*
+ * Split message into blocks:
+ *
+ * ****
+ * ****
+ *
+ * Get sum of saturation values, and set bit if sum is > avg * 4
+ * Then go further
+ *
+ * ****
+ * ****
+ *
+ * and repeat this algorithm.
+ *
+ * So on each iteration we move by 16 pixels and calculate 2 bits of signature
+ * hence, we produce ({64} / {4}) ^ 2 * 2 == 512 bits
+ */
+ for (i = 0; i < nh; i += 4) {
+ for (j = 0; j < nw; j += 4) {
+ gint p[8];
+
+ p[0] = g_array_index (img->normalized_data, gint, i * nh + j);
+ p[1] = g_array_index (img->normalized_data, gint, i * nh + j + 1);
+ p[2] = g_array_index (img->normalized_data, gint, i * nh + j + 2);
+ p[3] = g_array_index (img->normalized_data, gint, i * nh + j + 3);
+ p[4] = g_array_index (img->normalized_data, gint, (i + 1) * nh + j);
+ p[5] = g_array_index (img->normalized_data, gint, (i + 1) * nh + j + 1);
+ p[6] = g_array_index (img->normalized_data, gint, (i + 1) * nh + j + 2);
+ p[7] = g_array_index (img->normalized_data, gint, (i + 1) * nh + j + 3);
+ sum = p[0] + p[1] + p[2] + p[3] + p[4] + p[5] + p[6] + p[7];
+
+ if (fabs (sum) >= fabs (avg * 8)) {
+ setbit (sig, b);
+ }
+ else {
+ clrbit (sig, b);
+ }
+ b ++;
+
+ p[0] = g_array_index (img->normalized_data, gint, (i + 2) * nh + j);
+ p[1] = g_array_index (img->normalized_data, gint, (i + 2) * nh + j + 1);
+ p[2] = g_array_index (img->normalized_data, gint, (i + 2) * nh + j + 2);
+ p[3] = g_array_index (img->normalized_data, gint, (i + 2) * nh + j + 3);
+ p[4] = g_array_index (img->normalized_data, gint, (i + 3) * nh + j);
+ p[5] = g_array_index (img->normalized_data, gint, (i + 3) * nh + j + 1);
+ p[6] = g_array_index (img->normalized_data, gint, (i + 3) * nh + j + 2);
+ p[7] = g_array_index (img->normalized_data, gint, (i + 3) * nh + j + 3);
+
+ sum = p[0] + p[1] + p[2] + p[3] + p[4] + p[5] + p[6] + p[7];
+
+ if (fabs (sum) >= fabs (avg * 8)) {
+ setbit (sig, b);
+ }
+ else {
+ clrbit (sig, b);
+ }
+ b ++;
+ }
+ }
+
+ msg_debug_task ("avg: %.0f, sig: %32xs, bits: %d", avg, sig, b);
+
gdImageDestroy (dst);
rspamd_mempool_add_destructor (task->task_pool, rspamd_array_free_hard,
img->normalized_data);