Browse Source

[Minor] Use a more unified approach to hash strings

tags/3.3
Vsevolod Stakhov 1 year ago
parent
commit
bde1d6037b
No account linked to committer's email address
1 changed files with 22 additions and 35 deletions
  1. 22
    35
      src/libutil/fstring.c

+ 22
- 35
src/libutil/fstring.c View File

@@ -16,6 +16,7 @@
#include "fstring.h"
#include "str_util.h"
#include "contrib/fastutf8/fastutf8.h"
#include "contrib/mumhash/mum.h"


#ifdef WITH_JEMALLOC
@@ -230,32 +231,10 @@ rspamd_fstring_erase (rspamd_fstring_t *str, gsize pos, gsize len)
}

/* Compat code */
static guint32
fstrhash_c (gchar c, guint32 hval)
static guint64
fstrhash_c (guint64 c, guint64 hval)
{
guint32 tmp;
/*
* xor in the current byte against each byte of hval
* (which alone guarantees that every bit of input will have
* an effect on the output)
*/
tmp = c & 0xFF;
tmp = tmp | (tmp << 8) | (tmp << 16) | (tmp << 24);
hval ^= tmp;

/* add some bits out of the middle as low order bits */
hval = hval + ((hval >> 12) & 0x0000ffff);

/* swap most and min significative bytes */
tmp = (hval << 24) | ((hval >> 24) & 0xff);
/* zero most and min significative bytes of hval */
hval &= 0x00ffff00;
hval |= tmp;
/*
* rotate hval 3 bits to the left (thereby making the
* 3rd msb of the above mess the hsb of the output hash)
*/
return (hval << 3) + (hval >> 29);
return mum_hash_step(hval, c);
}


@@ -266,9 +245,8 @@ guint32
rspamd_fstrhash_lc (const rspamd_ftok_t * str, gboolean is_utf)
{
gsize i;
guint32 j, hval;
guint64 hval;
const gchar *p, *end = NULL;
gchar t;
gunichar uc;

if (str == NULL) {
@@ -285,18 +263,27 @@ rspamd_fstrhash_lc (const rspamd_ftok_t * str, gboolean is_utf)
}
while (p < end) {
uc = g_unichar_tolower (g_utf8_get_char (p));
for (j = 0; j < sizeof (gunichar); j++) {
t = (uc >> (j * 8)) & 0xff;
if (t != 0) {
hval = fstrhash_c (t, hval);
}
}
hval = fstrhash_c (uc, hval);
p = g_utf8_next_char (p);
}
}
else {
for (i = 0; i < str->len; i++, p++) {
hval = fstrhash_c (g_ascii_tolower (*p), hval);
gsize large_steps = str->len / sizeof (guint64);
for (i = 0; i < large_steps; i++, p += sizeof (guint64)) {
/* Copy to the uint64 lowercasing each byte */
union {
char c[sizeof(guint64)];
guint64 iu64;
} t;
for (int j = 0; j < sizeof(guint64); j ++) {
t.c[j] = g_ascii_tolower(p[j]);
}
hval = fstrhash_c (t.iu64, hval);
}

gsize remain = str->len % sizeof(guint64);
for (i = 0; i < remain; i ++, p ++) {
hval = fstrhash_c (g_ascii_tolower(*p), hval);
}
}


Loading…
Cancel
Save