]> source.dussan.org Git - rspamd.git/commitdiff
[Feature] Cache and simplify DCT and jpeg decode
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Fri, 9 Dec 2016 18:47:32 +0000 (18:47 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Fri, 9 Dec 2016 18:47:32 +0000 (18:47 +0000)
src/libmime/images.c
src/libmime/images.h

index 15ed6c6ecc5887ebe345be3506ea736510e0977b..d6cd602a91910c0825c2ebf01b2592b265926a8c 100644 (file)
 
 #ifdef USABLE_GD
 #include "gd.h"
+#include "hash.h"
 #include <math.h>
 
 #define RSPAMD_NORMALIZED_DIM 64
+#define RSPAMD_IMAGES_CACHE_SIZE 256
+
+static rspamd_lru_hash_t *images_hash = NULL;
 #endif
 
 static const guint8 png_signature[] = {137, 80, 78, 71, 13, 10, 26, 10};
@@ -35,7 +39,6 @@ static const guint8 bmp_signature[] = {'B', 'M'};
 
 static void process_image (struct rspamd_task *task, struct rspamd_mime_part *part);
 
-
 void
 rspamd_images_process (struct rspamd_task *task)
 {
@@ -340,6 +343,87 @@ rspamd_image_dct_block (gint pixels[8][8], gdouble *out)
                out[i * 8 + 7] = (double) (((x0 >> 8) * r2 + 8192) >> 12);
        }
 }
+
+struct rspamd_image_cache_entry {
+       guchar digest[64];
+       guchar dct[RSPAMD_DCT_LEN / NBBY];
+};
+
+static void
+rspamd_image_cache_entry_dtor (gpointer p)
+{
+       struct rspamd_image_cache_entry *entry = p;
+       g_slice_free1 (sizeof (*entry), entry);
+}
+
+static guint32
+rspamd_image_dct_hash (gconstpointer p)
+{
+       return rspamd_cryptobox_fast_hash (p, rspamd_cryptobox_HASHBYTES,
+                       rspamd_hash_seed ());
+}
+
+static gboolean
+rspamd_image_dct_equal (gconstpointer a, gconstpointer b)
+{
+       return memcmp (a, b, rspamd_cryptobox_HASHBYTES) == 0;
+}
+
+static void
+rspamd_image_create_cache (struct rspamd_config *cfg)
+{
+       images_hash = rspamd_lru_hash_new_full (RSPAMD_IMAGES_CACHE_SIZE, NULL,
+                       rspamd_image_cache_entry_dtor,
+                       rspamd_image_dct_hash, rspamd_image_dct_equal);
+}
+
+static gboolean
+rspamd_image_check_hash (struct rspamd_task *task, struct rspamd_image *img)
+{
+       struct rspamd_image_cache_entry *found;
+
+       if (images_hash == NULL) {
+               rspamd_image_create_cache (task->cfg);
+       }
+
+       found = rspamd_lru_hash_lookup (images_hash, img->parent->digest,
+                       task->tv.tv_sec);
+
+       if (found) {
+               /* We need to decompress */
+               img->dct = g_malloc (RSPAMD_DCT_LEN / NBBY);
+               rspamd_mempool_add_destructor (task->task_pool, g_free,
+                               img->dct);
+               /* Copy as found could be destroyed by LRU */
+               memcpy (img->dct, found->dct, RSPAMD_DCT_LEN / NBBY);
+               img->is_normalized = TRUE;
+
+               return TRUE;
+       }
+
+       return FALSE;
+}
+
+static void
+rspamd_image_save_hash (struct rspamd_task *task, struct rspamd_image *img)
+{
+       struct rspamd_image_cache_entry *found;
+
+       if (img->is_normalized) {
+               found = rspamd_lru_hash_lookup (images_hash, img->parent->digest,
+                               task->tv.tv_sec);
+
+               if (!found) {
+                       found = g_slice_alloc0 (sizeof (*found));
+                       memcpy (found->dct, img->dct, RSPAMD_DCT_LEN / NBBY);
+                       memcpy (found->digest, img->parent->digest, sizeof (found->digest));
+
+                       rspamd_lru_hash_insert (images_hash, found->digest, found,
+                                       task->tv.tv_sec, 0);
+               }
+       }
+}
+
 #endif
 
 static void
@@ -348,6 +432,7 @@ rspamd_image_normalize (struct rspamd_task *task, struct rspamd_image *img)
 #ifdef USABLE_GD
        gdImagePtr src = NULL, dst = NULL;
        guint i, j, k, l;
+       gdouble *dct;
 
        if (img->data->len == 0 || img->data->len > G_MAXINT32) {
                return;
@@ -358,6 +443,10 @@ rspamd_image_normalize (struct rspamd_task *task, struct rspamd_image *img)
                return;
        }
 
+       if (rspamd_image_check_hash (task, img)) {
+               return;
+       }
+
        switch (img->type) {
        case IMAGE_TYPE_JPG:
                src = gdImageCreateFromJpegPtr (img->data->len, img->data->data);
@@ -387,7 +476,8 @@ rspamd_image_normalize (struct rspamd_task *task, struct rspamd_image *img)
                gdImageDestroy (src);
 
                img->is_normalized = TRUE;
-               img->dct = g_malloc (sizeof (gdouble) * 64 * 64);
+               dct = g_malloc (sizeof (gdouble) * RSPAMD_DCT_LEN);
+               img->dct = g_malloc0 (RSPAMD_DCT_LEN / NBBY);
                rspamd_mempool_add_destructor (task->task_pool, g_free,
                                img->dct);
 
@@ -424,31 +514,37 @@ rspamd_image_normalize (struct rspamd_task *task, struct rspamd_image *img)
                                }
 
                                rspamd_image_dct_block (p,
-                                               img->dct + i * RSPAMD_NORMALIZED_DIM + j);
+                                               dct + i * RSPAMD_NORMALIZED_DIM + j);
 
                                gdouble avg = 0.0;
 
                                for (k = 0; k < 8; k ++) {
                                        for (l = 0; l < 8; l ++) {
-                                               gdouble x = *(img->dct +
+                                               gdouble x = *(dct +
                                                                i * RSPAMD_NORMALIZED_DIM + j + k * 8 + l);
                                                avg += (x - avg) / (gdouble)(k * 8 + l + 1);
                                        }
 
                                }
 
+
                                for (k = 0; k < 8; k ++) {
                                        for (l = 0; l < 8; l ++) {
-                                               gdouble* x = img->dct +
-                                                               i * RSPAMD_NORMALIZED_DIM + j + k * 8 + l;
-                                               *x = *x >= avg ? 1.0 : 0.0;
+                                               guint idx = i * RSPAMD_NORMALIZED_DIM + j + k * 8 + l;
+
+                                               if (dct[idx] >= avg) {
+                                                       setbit (img->dct, idx);
+                                               }
                                        }
                                }
 
+
                        }
                }
 
                gdImageDestroy (dst);
+               g_free (dct);
+               rspamd_image_save_hash (task, img);
        }
 #endif
 }
index 1ac6c87861fc0bcff1ef7c4895f9186524f78f18..1ad73f69e03ed72dddb2e3392183e7c3aaa845e5 100644 (file)
@@ -7,6 +7,8 @@ struct html_image;
 struct rspamd_task;
 struct rspamd_mime_part;
 
+#define RSPAMD_DCT_LEN (64 * 64)
+
 enum rspamd_image_type {
        IMAGE_TYPE_PNG = 0,
        IMAGE_TYPE_JPG,
@@ -24,7 +26,7 @@ struct rspamd_image {
        guint32 width;
        guint32 height;
        gboolean is_normalized;
-       gdouble *dct;
+       guchar *dct;
 };
 
 /*