summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2016-12-09 12:50:11 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2016-12-09 12:51:08 +0000
commit7ddd7a0d48c32f227a73f8d00c4e3ff34ab6feaf (patch)
treec33d9b4b561350f01e2dd43aa60b003311c21b39 /src
parent4ea095b0e117649f3d8f733023b983b047358871 (diff)
downloadrspamd-7ddd7a0d48c32f227a73f8d00c4e3ff34ab6feaf.tar.gz
rspamd-7ddd7a0d48c32f227a73f8d00c4e3ff34ab6feaf.zip
[Feature] Apply DCT using AAN for fuzzy signature
Diffstat (limited to 'src')
-rw-r--r--src/libmime/images.c266
-rw-r--r--src/libmime/images.h2
2 files changed, 201 insertions, 67 deletions
diff --git a/src/libmime/images.c b/src/libmime/images.c
index 81bdcce22..58e288bc5 100644
--- a/src/libmime/images.c
+++ b/src/libmime/images.c
@@ -23,7 +23,7 @@
#include "gd.h"
#include <math.h>
-#define RSPAMD_NORMALIZED_DIM rspamd_cryptobox_HASHBYTES / 8
+#define RSPAMD_NORMALIZED_DIM 64
#endif
static const guint8 png_signature[] = {137, 80, 78, 71, 13, 10, 26, 10};
@@ -209,12 +209,167 @@ process_bmp_image (struct rspamd_task *task, GByteArray *data)
return img;
}
+#define SET_P(off_i, off_j) do { \
+ p[0] = (guint)gdImageGetPixel (dst, i + (off_i), j + (off_j)); \
+ p[1] = (guint)gdImageGetPixel (dst, i + (off_i), j + (off_j) + 1); \
+ p[2] = (guint)gdImageGetPixel (dst, i + (off_i), j + (off_j) + 2); \
+ p[3] = (guint)gdImageGetPixel (dst, i + (off_i), j + (off_j) + 3); \
+ p[4] = (guint)gdImageGetPixel (dst, i + (off_i) + 1, j + (off_j)); \
+ p[5] = (guint)gdImageGetPixel (dst, i + (off_i) + 1, j + (off_j) + 1); \
+ p[6] = (guint)gdImageGetPixel (dst, i + (off_i) + 1, j + (off_j) + 2); \
+ p[7] = (guint)gdImageGetPixel (dst, i + (off_i) + 1, j + (off_j) + 3); \
+} while (0)
+
+#define SET_N() do { \
+ n += (guint) ((gint)p[0] - navg); \
+ n += (guint) ((gint)p[1] - navg); \
+ n += (guint) ((gint)p[2] - navg); \
+ n += (guint) ((gint)p[3] - navg); \
+ n += (guint) ((gint)p[4] - navg); \
+ n += (guint) ((gint)p[5] - navg); \
+ n += (guint) ((gint)p[6] - navg); \
+ n += (guint) ((gint)p[7] - navg); \
+} while (0)
+
+#ifdef WITH_GD
+/*
+ * DCT from Emil Mikulic.
+ * http://unix4lyfe.org/dct/
+ */
+static void
+rspamd_image_dct_block (gint pixels[8][8], gdouble *out)
+{
+ gint i;
+ gint rows[8][8];
+
+ static const gint c1 = 1004 /* cos(pi/16) << 10 */,
+ s1 = 200 /* sin(pi/16) */,
+ c3 = 851 /* cos(3pi/16) << 10 */,
+ s3 = 569 /* sin(3pi/16) << 10 */,
+ r2c6 = 554 /* sqrt(2)*cos(6pi/16) << 10 */,
+ r2s6 = 1337 /* sqrt(2)*sin(6pi/16) << 10 */,
+ r2 = 181; /* sqrt(2) << 7*/
+
+ gint x0, x1, x2, x3, x4, x5, x6, x7, x8;
+
+ /* transform rows */
+ for (i = 0; i < 8; i++) {
+ x0 = pixels[0][i];
+ x1 = pixels[1][i];
+ x2 = pixels[2][i];
+ x3 = pixels[3][i];
+ x4 = pixels[4][i];
+ x5 = pixels[5][i];
+ x6 = pixels[6][i];
+ x7 = pixels[7][i];
+
+ /* Stage 1 */
+ x8 = x7 + x0;
+ x0 -= x7;
+ x7 = x1 + x6;
+ x1 -= x6;
+ x6 = x2 + x5;
+ x2 -= x5;
+ x5 = x3 + x4;
+ x3 -= x4;
+
+ /* Stage 2 */
+ x4 = x8 + x5;
+ x8 -= x5;
+ x5 = x7 + x6;
+ x7 -= x6;
+ x6 = c1 * (x1 + x2);
+ x2 = (-s1 - c1) * x2 + x6;
+ x1 = (s1 - c1) * x1 + x6;
+ x6 = c3 * (x0 + x3);
+ x3 = (-s3 - c3) * x3 + x6;
+ x0 = (s3 - c3) * x0 + x6;
+
+ /* Stage 3 */
+ x6 = x4 + x5;
+ x4 -= x5;
+ x5 = r2c6 * (x7 + x8);
+ x7 = (-r2s6 - r2c6) * x7 + x5;
+ x8 = (r2s6 - r2c6) * x8 + x5;
+ x5 = x0 + x2;
+ x0 -= x2;
+ x2 = x3 + x1;
+ x3 -= x1;
+
+ /* Stage 4 and output */
+ rows[i][0] = x6;
+ rows[i][4] = x4;
+ rows[i][2] = x8 >> 10;
+ rows[i][6] = x7 >> 10;
+ rows[i][7] = (x2 - x5) >> 10;
+ rows[i][1] = (x2 + x5) >> 10;
+ rows[i][3] = (x3 * r2) >> 17;
+ rows[i][5] = (x0 * r2) >> 17;
+ }
+
+ /* transform columns */
+ for (i = 0; i < 8; i++) {
+ x0 = rows[0][i];
+ x1 = rows[1][i];
+ x2 = rows[2][i];
+ x3 = rows[3][i];
+ x4 = rows[4][i];
+ x5 = rows[5][i];
+ x6 = rows[6][i];
+ x7 = rows[7][i];
+
+ /* Stage 1 */
+ x8 = x7 + x0;
+ x0 -= x7;
+ x7 = x1 + x6;
+ x1 -= x6;
+ x6 = x2 + x5;
+ x2 -= x5;
+ x5 = x3 + x4;
+ x3 -= x4;
+
+ /* Stage 2 */
+ x4 = x8 + x5;
+ x8 -= x5;
+ x5 = x7 + x6;
+ x7 -= x6;
+ x6 = c1 * (x1 + x2);
+ x2 = (-s1 - c1) * x2 + x6;
+ x1 = (s1 - c1) * x1 + x6;
+ x6 = c3 * (x0 + x3);
+ x3 = (-s3 - c3) * x3 + x6;
+ x0 = (s3 - c3) * x0 + x6;
+
+ /* Stage 3 */
+ x6 = x4 + x5;
+ x4 -= x5;
+ x5 = r2c6 * (x7 + x8);
+ x7 = (-r2s6 - r2c6) * x7 + x5;
+ x8 = (r2s6 - r2c6) * x8 + x5;
+ x5 = x0 + x2;
+ x0 -= x2;
+ x2 = x3 + x1;
+ x3 -= x1;
+
+ /* Stage 4 and output */
+ out[i * 8] = (double) ((x6 + 16) >> 3);
+ out[i * 8 + 1] = (double) ((x4 + 16) >> 3);
+ out[i * 8 + 2] = (double) ((x8 + 16384) >> 13);
+ out[i * 8 + 3] = (double) ((x7 + 16384) >> 13);
+ out[i * 8 + 4] = (double) ((x2 - x5 + 16384) >> 13);
+ out[i * 8 + 5] = (double) ((x2 + x5 + 16384) >> 13);
+ out[i * 8 + 6] = (double) (((x3 >> 8) * r2 + 8192) >> 12);
+ out[i * 8 + 7] = (double) (((x0 >> 8) * r2 + 8192) >> 12);
+ }
+}
+#endif
+
static void
rspamd_image_normalize (struct rspamd_task *task, struct rspamd_image *img)
{
#ifdef WITH_GD
gdImagePtr src = NULL, dst = NULL;
- guint nw, nh, i, j, b = 0, nmax, nmin;
+ guint i, j, k, l;
if (img->data->len == 0 || img->data->len > G_MAXINT32) {
return;
@@ -249,29 +404,14 @@ rspamd_image_normalize (struct rspamd_task *task, struct rspamd_image *img)
else {
gdImageSetInterpolationMethod (src, GD_BILINEAR_FIXED);
- nw = RSPAMD_NORMALIZED_DIM;
- nh = RSPAMD_NORMALIZED_DIM;
-
- dst = gdImageScale (src, nw, nh);
+ dst = gdImageScale (src, RSPAMD_NORMALIZED_DIM, RSPAMD_NORMALIZED_DIM);
gdImageGrayScale (dst);
gdImageDestroy (src);
img->is_normalized = TRUE;
- nmax = 0;
- nmin = G_MAXUINT;
-
- /* Calculate moving average */
- for (i = 0; i < nh; i ++) {
- for (j = 0; j < nw; j ++) {
- guint px = (guint)gdImageGetPixel (dst, j, i);
- if (px > nmax) {
- nmax = px;
- }
- if (px < nmin) {
- nmin = px;
- }
- }
- }
+ img->dct = g_malloc (sizeof (gdouble) * 64 * 64);
+ rspamd_mempool_add_destructor (task->task_pool, g_free,
+ img->dct);
/*
* Split message into blocks:
@@ -290,59 +430,53 @@ rspamd_image_normalize (struct rspamd_task *task, struct rspamd_image *img)
* So on each iteration we move by 16 pixels and calculate 2 elements of
* signature
*/
- for (i = 0; i < nh; i += 4) {
- for (j = 0; j < nw; j += 4) {
- guint p[8];
- guint64 n = 0;
-
- p[0] = nmax - (guint)gdImageGetPixel (dst, i, j) + nmin;
- p[1] = nmax - (guint)gdImageGetPixel (dst, i, j + 1) + nmin;
- p[2] = nmax - (guint)gdImageGetPixel (dst, i, j + 2) + nmin;
- p[3] = nmax - (guint)gdImageGetPixel (dst, i, j + 3) + nmin;
- p[4] = nmax - (guint)gdImageGetPixel (dst, i + 1, j) + nmin;
- p[5] = nmax - (guint)gdImageGetPixel (dst, i + 1, j + 1) + nmin;
- p[6] = nmax - (guint)gdImageGetPixel (dst, i + 1, j + 2) + nmin;
- p[7] = nmax - (guint)gdImageGetPixel (dst, i + 1, j + 3) + nmin;
-
- n |= ((guint64)(p[0] / (nmax - nmin) % 256)) << 0;
- n |= ((guint64)(p[1] / (nmax - nmin) % 256)) << 8;
- n |= ((guint64)(p[2] / (nmax - nmin) % 256)) << 16;
- n |= ((guint64)(p[3] / (nmax - nmin) % 256)) << 24;
- n |= ((guint64)(p[4] / (nmax - nmin) % 256)) << 32;
- n |= ((guint64)(p[5] / (nmax - nmin) % 256)) << 40;
- n |= ((guint64)(p[6] / (nmax - nmin) % 256)) << 48;
- n |= ((guint64)(p[7] / (nmax - nmin) % 256)) << 56;
- img->fuzzy_sig[b++] = n;
-
- p[0] = nmax - (guint)gdImageGetPixel (dst, i + 2, j) + nmin;
- p[1] = nmax - (guint)gdImageGetPixel (dst, i + 2, j + 1) + nmin;
- p[2] = nmax - (guint)gdImageGetPixel (dst, i + 2, j + 2) + nmin;
- p[3] = nmax - (guint)gdImageGetPixel (dst, i + 2, j + 3) + nmin;
- p[4] = nmax - (guint)gdImageGetPixel (dst, i + 3, j) + nmin;
- p[5] = nmax - (guint)gdImageGetPixel (dst, i + 3, j + 1) + nmin;
- p[6] = nmax - (guint)gdImageGetPixel (dst, i + 3, j + 2) + nmin;
- p[7] = nmax - (guint)gdImageGetPixel (dst, i + 3, j + 3) + nmin;
-
- n |= ((guint64)(p[0] / (nmax - nmin) % 256)) << 0;
- n |= ((guint64)(p[1] / (nmax - nmin) % 256)) << 8;
- n |= ((guint64)(p[2] / (nmax - nmin) % 256)) << 16;
- n |= ((guint64)(p[3] / (nmax - nmin) % 256)) << 24;
- n |= ((guint64)(p[4] / (nmax - nmin) % 256)) << 32;
- n |= ((guint64)(p[5] / (nmax - nmin) % 256)) << 40;
- n |= ((guint64)(p[6] / (nmax - nmin) % 256)) << 48;
- n |= ((guint64)(p[7] / (nmax - nmin) % 256)) << 56;
- img->fuzzy_sig[b++] = n;
+ for (i = 0; i < RSPAMD_NORMALIZED_DIM; i += 8) {
+ for (j = 0; j < RSPAMD_NORMALIZED_DIM; j += 8) {
+ gint p[8][8];
+
+ for (k = 0; k < 8; k ++) {
+ p[k][0] = gdImageGetPixel (dst, i + k, j);
+ p[k][1] = gdImageGetPixel (dst, i + k, j + 1);
+ p[k][2] = gdImageGetPixel (dst, i + k, j + 2);
+ p[k][3] = gdImageGetPixel (dst, i + k, j + 3);
+ p[k][4] = gdImageGetPixel (dst, i + k, j + 4);
+ p[k][5] = gdImageGetPixel (dst, i + k, j + 5);
+ p[k][6] = gdImageGetPixel (dst, i + k, j + 6);
+ p[k][7] = gdImageGetPixel (dst, i + k, j + 7);
+ }
+
+ rspamd_image_dct_block (p,
+ img->dct + i * RSPAMD_NORMALIZED_DIM + j);
+
+ gdouble avg = 0.0;
+
+ for (k = 0; k < 8; k ++) {
+ for (l = 0; l < 8; l ++) {
+ gdouble x = *(img->dct +
+ i * RSPAMD_NORMALIZED_DIM + j + k * 8 + l);
+ avg += (x - avg) / (gdouble)(k * 8 + l + 1);
+ }
+
+ }
+
+ for (k = 0; k < 8; k ++) {
+ for (l = 0; l < 8; l ++) {
+ gdouble* x = img->dct +
+ i * RSPAMD_NORMALIZED_DIM + j + k * 8 + l;
+ *x = *x >= avg ? 1.0 : 0.0;
+ }
+ }
+
}
}
- msg_debug_task ("min: %d, max: %d, sig: %32xs, elts: %d", nmin, nmax,
- (const char *)img->fuzzy_sig, b);
-
gdImageDestroy (dst);
}
#endif
}
+#undef SET_P
+
static void
process_image (struct rspamd_task *task, struct rspamd_mime_part *part)
{
diff --git a/src/libmime/images.h b/src/libmime/images.h
index 01d0afd22..1ac6c8786 100644
--- a/src/libmime/images.h
+++ b/src/libmime/images.h
@@ -24,7 +24,7 @@ struct rspamd_image {
guint32 width;
guint32 height;
gboolean is_normalized;
- guint64 fuzzy_sig[32];
+ gdouble *dct;
};
/*