summaryrefslogtreecommitdiffstats
path: root/src/libutil/shingles.c
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2016-05-12 09:48:29 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2016-05-12 09:48:29 +0100
commit931615a2e0c1069c161bf2c9516732f576f20ee3 (patch)
tree9a195f6cc8da39dcc64eb8984189589e5a974f10 /src/libutil/shingles.c
parent25db12ee3fce3a41c1fd907373ead67015cef6ed (diff)
downloadrspamd-931615a2e0c1069c161bf2c9516732f576f20ee3.tar.gz
rspamd-931615a2e0c1069c161bf2c9516732f576f20ee3.zip
[Feature] Further micro-optimizations for hashing and shingles
Diffstat (limited to 'src/libutil/shingles.c')
-rw-r--r--src/libutil/shingles.c53
1 files changed, 27 insertions, 26 deletions
diff --git a/src/libutil/shingles.c b/src/libutil/shingles.c
index 3e238fa5c..8d1b147db 100644
--- a/src/libutil/shingles.c
+++ b/src/libutil/shingles.c
@@ -19,7 +19,7 @@
#define SHINGLES_WINDOW 3
-struct rspamd_shingle*
+struct rspamd_shingle* RSPAMD_OPTIMIZE("unroll-loops")
rspamd_shingles_generate (GArray *input,
const guchar key[16],
rspamd_mempool_t *pool,
@@ -28,7 +28,7 @@ rspamd_shingles_generate (GArray *input,
enum rspamd_shingle_alg alg)
{
struct rspamd_shingle *res;
- GArray *hashes[RSPAMD_SHINGLE_SIZE];
+ guint64 **hashes;
rspamd_sipkey_t keys[RSPAMD_SHINGLE_SIZE];
guchar shabuf[rspamd_cryptobox_HASHBYTES], *out_key;
const guchar *cur_key;
@@ -36,7 +36,8 @@ rspamd_shingles_generate (GArray *input,
rspamd_ftok_t *word;
rspamd_cryptobox_hash_state_t bs;
guint64 val;
- gint i, j, k, beg = 0;
+ gint i, j, k;
+ gsize hlen, beg = 0;
enum rspamd_cryptobox_fast_hash_type ht;
if (pool != NULL) {
@@ -52,9 +53,11 @@ rspamd_shingles_generate (GArray *input,
out_key = (guchar *)&keys[0];
/* Init hashes pipes and keys */
+ hashes = g_slice_alloc (sizeof (*hashes) * RSPAMD_SHINGLE_SIZE);
+ hlen = input->len > SHINGLES_WINDOW ? (input->len - SHINGLES_WINDOW + 1) : 1;
+
for (i = 0; i < RSPAMD_SHINGLE_SIZE; i ++) {
- hashes[i] = g_array_sized_new (FALSE, FALSE, sizeof (guint64),
- input->len + SHINGLES_WINDOW);
+ hashes[i] = g_slice_alloc (hlen * sizeof (guint64));
/*
* To generate a set of hashes we just apply sha256 to the
* initial key as many times as many hashes are required and
@@ -80,29 +83,34 @@ rspamd_shingles_generate (GArray *input,
word = &g_array_index (input, rspamd_ftok_t, j);
row = rspamd_fstring_append (row, word->begin, word->len);
}
- beg++;
/* Now we need to create a new row here */
for (j = 0; j < RSPAMD_SHINGLE_SIZE; j ++) {
rspamd_cryptobox_siphash ((guchar *)&val, row->str, row->len,
keys[j]);
- g_array_append_val (hashes[j], val);
+ g_assert (hlen > beg);
+ hashes[j][beg] = val;
}
+ beg++;
+
row = rspamd_fstring_assign (row, "", 0);
}
}
}
else {
guint64 res[SHINGLES_WINDOW * RSPAMD_SHINGLE_SIZE];
- guint64 RSPAMD_ALIGNED(32) tmpbuf[16];
- guint rlen;
- if (alg == RSPAMD_SHINGLES_XXHASH) {
+ switch (alg) {
+ case RSPAMD_SHINGLES_XXHASH:
ht = RSPAMD_CRYPTOBOX_XXHASH64;
- }
- else {
+ break;
+ case RSPAMD_SHINGLES_MUMHASH:
ht = RSPAMD_CRYPTOBOX_MUMHASH;
+ break;
+ default:
+ ht = RSPAMD_CRYPTOBOX_HASHFAST;
+ break;
}
memset (res, 0, sizeof (res));
@@ -119,26 +127,17 @@ rspamd_shingles_generate (GArray *input,
word = &g_array_index (input, rspamd_ftok_t, beg);
/* Insert the last element to the pipe */
- if (word->len >= sizeof (tmpbuf)) {
- rlen = sizeof (tmpbuf);
- memcpy (tmpbuf, word->begin, rlen);
- }
- else {
- rlen = word->len / sizeof (guint64) + 1;
- memset (tmpbuf, 0, rlen * sizeof (guint64));
- memcpy (tmpbuf, word->begin, word->len);
- }
-
res[j * SHINGLES_WINDOW + SHINGLES_WINDOW - 1] =
rspamd_cryptobox_fast_hash_specific (ht,
- tmpbuf,rlen * sizeof (guint64),
+ word->begin, word->len,
*(guint64 *)keys[j]);
val = 0;
for (k = 0; k < SHINGLES_WINDOW; k ++) {
val ^= res[j * SHINGLES_WINDOW + k];
}
- g_array_append_val (hashes[j], val);
+ g_assert (hlen > beg);
+ hashes[j][beg] = val;
}
beg++;
}
@@ -147,11 +146,13 @@ rspamd_shingles_generate (GArray *input,
/* Now we need to filter all hashes and make a shingles result */
for (i = 0; i < RSPAMD_SHINGLE_SIZE; i ++) {
- res->hashes[i] = filter ((guint64 *)hashes[i]->data, hashes[i]->len,
+ res->hashes[i] = filter (hashes[i], hlen,
i, key, filterd);
- g_array_free (hashes[i], TRUE);
+ g_slice_free1 (hlen * sizeof (guint64), hashes[i]);
}
+ g_slice_free1 (sizeof (*hashes) * RSPAMD_SHINGLE_SIZE, hashes);
+
rspamd_fstring_free (row);
return res;