aboutsummaryrefslogtreecommitdiffstats
path: root/src/libutil/shingles.c
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2017-04-03 13:52:53 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2017-04-03 13:52:53 +0100
commitde922dd854760bcd362adbc3a2dd5a7d203ad10f (patch)
treed89d3674b86346f26b0c3f060a8c5537bf4acb4e /src/libutil/shingles.c
parent7641f747f2afc07bb5846ac32adf62250f6e7e28 (diff)
downloadrspamd-de922dd854760bcd362adbc3a2dd5a7d203ad10f.tar.gz
rspamd-de922dd854760bcd362adbc3a2dd5a7d203ad10f.zip
[Feature] Use cached shingles keys
Diffstat (limited to 'src/libutil/shingles.c')
-rw-r--r--src/libutil/shingles.c115
1 files changed, 95 insertions, 20 deletions
diff --git a/src/libutil/shingles.c b/src/libutil/shingles.c
index 2dd7faffc..ddac12ac4 100644
--- a/src/libutil/shingles.c
+++ b/src/libutil/shingles.c
@@ -20,6 +20,97 @@
#include "libstat/stat_api.h"
#define SHINGLES_WINDOW 3
+#define SHINGLES_KEY_SIZE rspamd_cryptobox_SIPKEYBYTES
+
+static guint
+rspamd_shingles_keys_hash (gconstpointer k)
+{
+ return rspamd_cryptobox_fast_hash (k, SHINGLES_KEY_SIZE,
+ rspamd_hash_seed ());
+}
+
+static gboolean
+rspamd_shingles_keys_equal (gconstpointer k1, gconstpointer k2)
+{
+ return (memcmp (k1, k2, SHINGLES_KEY_SIZE) == 0);
+}
+
+static void
+rspamd_shingles_keys_free (gpointer p)
+{
+ guchar **k = p;
+ guint i;
+
+ for (i = 0; i < RSPAMD_SHINGLE_SIZE; i ++) {
+ g_free (k[i]);
+ }
+
+ g_free (k);
+}
+
+static guchar **
+rspamd_shingles_keys_new (void)
+{
+ guchar **k;
+ guint i;
+
+ k = g_malloc0 (sizeof (guchar *) * RSPAMD_SHINGLE_SIZE);
+
+ for (i = 0; i < RSPAMD_SHINGLE_SIZE; i ++) {
+ k[i] = g_malloc0 (sizeof (guchar) * SHINGLES_KEY_SIZE);
+ }
+
+ return k;
+}
+
+static guchar **
+rspamd_shingles_get_keys_cached (const guchar key[SHINGLES_KEY_SIZE])
+{
+ static GHashTable *ht = NULL;
+ guchar **keys = NULL, *key_cpy;
+ rspamd_cryptobox_hash_state_t bs;
+ const guchar *cur_key;
+ guchar shabuf[rspamd_cryptobox_HASHBYTES], *out_key;
+ guint i;
+
+ if (ht == NULL) {
+ ht = g_hash_table_new_full (rspamd_shingles_keys_hash,
+ rspamd_shingles_keys_equal, g_free, rspamd_shingles_keys_free);
+ }
+ else {
+ keys = g_hash_table_lookup (ht, key);
+ }
+
+ if (keys == NULL) {
+ keys = rspamd_shingles_keys_new ();
+ key_cpy = g_malloc (SHINGLES_KEY_SIZE);
+ memcpy (key_cpy, key, SHINGLES_KEY_SIZE);
+
+ /* Generate keys */
+ rspamd_cryptobox_hash_init (&bs, NULL, 0);
+ cur_key = key;
+ out_key = keys[0];
+
+ for (i = 0; i < RSPAMD_SHINGLE_SIZE; i ++) {
+ /*
+ * To generate a set of hashes we just apply sha256 to the
+ * initial key as many times as many hashes are required and
+ * xor left and right parts of sha256 to get a single 16 bytes SIP key.
+ */
+ rspamd_cryptobox_hash_update (&bs, cur_key, 16);
+ rspamd_cryptobox_hash_final (&bs, shabuf);
+
+ memcpy (out_key, shabuf, 16);
+ rspamd_cryptobox_hash_init (&bs, NULL, 0);
+ cur_key = out_key;
+ out_key = keys[i + 1];
+ }
+
+ g_hash_table_insert (ht, key_cpy, keys);
+ }
+
+ return keys;
+}
struct rspamd_shingle* RSPAMD_OPTIMIZE("unroll-loops")
rspamd_shingles_from_text (GArray *input,
@@ -31,12 +122,9 @@ rspamd_shingles_from_text (GArray *input,
{
struct rspamd_shingle *res;
guint64 **hashes;
- rspamd_sipkey_t keys[RSPAMD_SHINGLE_SIZE];
- guchar shabuf[rspamd_cryptobox_HASHBYTES], *out_key;
- const guchar *cur_key;
+ guchar **keys;
rspamd_fstring_t *row;
rspamd_stat_token_t *word;
- rspamd_cryptobox_hash_state_t bs;
guint64 val;
gint i, j, k;
gsize hlen, beg = 0;
@@ -49,29 +137,16 @@ rspamd_shingles_from_text (GArray *input,
res = g_malloc (sizeof (*res));
}
- rspamd_cryptobox_hash_init (&bs, NULL, 0);
row = rspamd_fstring_sized_new (256);
- cur_key = key;
- out_key = (guchar *)&keys[0];
/* Init hashes pipes and keys */
hashes = g_slice_alloc (sizeof (*hashes) * RSPAMD_SHINGLE_SIZE);
- hlen = input->len > SHINGLES_WINDOW ? (input->len - SHINGLES_WINDOW + 1) : 1;
+ hlen = input->len > SHINGLES_WINDOW ?
+ (input->len - SHINGLES_WINDOW + 1) : 1;
+ keys = rspamd_shingles_get_keys_cached (key);
for (i = 0; i < RSPAMD_SHINGLE_SIZE; i ++) {
hashes[i] = g_slice_alloc (hlen * sizeof (guint64));
- /*
- * To generate a set of hashes we just apply sha256 to the
- * initial key as many times as many hashes are required and
- * xor left and right parts of sha256 to get a single 16 bytes SIP key.
- */
- rspamd_cryptobox_hash_update (&bs, cur_key, 16);
- rspamd_cryptobox_hash_final (&bs, shabuf);
-
- memcpy (out_key, shabuf, 16);
- rspamd_cryptobox_hash_init (&bs, NULL, 0);
- cur_key = out_key;
- out_key += 16;
}
/* Now parse input words into a vector of hashes using rolling window */