#include "config.h"
#include "bloom.h"
+#include "xxhash.h"
/* 4 bits are used for counting (implementing delete operation) */
#define SIZE_BIT 4
#define GETBIT(a, n) (a[n * SIZE_BIT / CHAR_BIT] & (0xF << (n % (CHAR_BIT/SIZE_BIT) * SIZE_BIT)))
/* Common hash functions */
-guint
-bloom_sax_hash (const gchar *key)
-{
- guint h = 0;
-
- while (*key)
- h ^= (h << 5) + (h >> 2) + (gchar)*key++;
-
- return h;
-}
-
-guint
-bloom_sdbm_hash (const gchar *key)
-{
- guint h = 0;
-
- while (*key)
- h = (gchar)*key++ + (h << 6) + (h << 16) - h;
-
- return h;
-}
-
-guint
-bloom_fnv_hash (const gchar *key)
-{
- guint h = 0;
-
- while (*key) {
- h ^= (gchar)*key++;
- h += (h << 1) + (h << 4) + (h << 7) + (h << 8) + (h << 24);
- }
-
- return h;
-}
-
-guint
-bloom_rs_hash (const gchar *key)
-{
- guint b = 378551;
- guint a = 63689;
- guint hash = 0;
-
- while (*key) {
- hash = hash * a + (gchar)*key++;
- a = a * b;
- }
-
- return hash;
-}
-
-guint
-bloom_js_hash (const gchar *key)
-{
- guint hash = 1315423911;
-
- while (*key) {
- hash ^= ((hash << 5) + (gchar)*key++ + (hash >> 2));
- }
-
- return hash;
-}
-
-
-guint
-bloom_elf_hash (const gchar *key)
-{
- guint hash = 0;
- guint x = 0;
-
- while (*key) {
- hash = (hash << 4) + (gchar)*key++;
- if ((x = hash & 0xF0000000L) != 0) {
- hash ^= (x >> 24);
- }
- hash &= ~x;
- }
-
- return hash;
-}
-
-
-guint
-bloom_bkdr_hash (const gchar *key)
-{
- guint seed = 131; /* 31 131 1313 13131 131313 etc.. */
- guint hash = 0;
-
- while (*key) {
- hash = (hash * seed) + (gchar)*key++;
- }
-
- return hash;
-}
-guint
-bloom_ap_hash (const gchar *key)
+rspamd_bloom_filter_t *
+rspamd_bloom_create (size_t size, size_t nfuncs, ...)
{
- guint hash = 0xAAAAAAAA;
- guint i = 0;
-
- while (*key) {
- hash ^= ((i & 1) == 0) ? ((hash << 7) ^ ((gchar)*key) * (hash >> 3)) : (~((hash << 11) + (((gchar)*key) ^ (hash >> 5))));
- key++;
- }
-
- return hash;
-}
-
-bloom_filter_t *
-bloom_create (size_t size, size_t nfuncs, ...)
-{
- bloom_filter_t *bloom;
+ rspamd_bloom_filter_t *bloom;
va_list l;
gsize n;
- if (!(bloom = g_malloc (sizeof (bloom_filter_t)))) {
+ if (!(bloom = g_malloc (sizeof (rspamd_bloom_filter_t)))) {
return NULL;
}
if (!(bloom->a = g_new0 (gchar, (size + CHAR_BIT - 1) / CHAR_BIT * SIZE_BIT))) {
g_free (bloom);
return NULL;
}
- if (!(bloom->funcs = (hashfunc_t *) g_malloc (nfuncs * sizeof (hashfunc_t)))) {
+ if (!(bloom->seeds = g_new0 (guint32, nfuncs))) {
g_free (bloom->a);
g_free (bloom);
return NULL;
va_start (l, nfuncs);
for (n = 0; n < nfuncs; ++n) {
- bloom->funcs[n] = va_arg (l, hashfunc_t);
+ bloom->seeds[n] = va_arg (l, guint32);
}
va_end (l);
}
void
-bloom_destroy (bloom_filter_t * bloom)
+rspamd_bloom_destroy (rspamd_bloom_filter_t * bloom)
{
g_free (bloom->a);
- g_free (bloom->funcs);
+ g_free (bloom->seeds);
g_free (bloom);
}
gboolean
-bloom_add (bloom_filter_t * bloom, const gchar *s)
+rspamd_bloom_add (rspamd_bloom_filter_t * bloom, const gchar *s)
{
- size_t n;
+ size_t n, len;
u_char t;
guint v;
+ if (s == NULL) {
+ return FALSE;
+ }
+ len = strlen (s);
for (n = 0; n < bloom->nfuncs; ++n) {
- v = bloom->funcs[n] (s) % bloom->asize;
+ v = XXH32 (s, len, bloom->seeds[n]) % bloom->asize;
INCBIT (bloom->a, v, t);
}
}
gboolean
-bloom_del (bloom_filter_t * bloom, const gchar *s)
+rspamd_bloom_del (rspamd_bloom_filter_t * bloom, const gchar *s)
{
- size_t n;
+ size_t n, len;
u_char t;
guint v;
+ if (s == NULL) {
+ return FALSE;
+ }
+ len = strlen (s);
for (n = 0; n < bloom->nfuncs; ++n) {
- v = bloom->funcs[n] (s) % bloom->asize;
+ v = XXH32 (s, len, bloom->seeds[n]) % bloom->asize;
DECBIT (bloom->a, v, t);
}
}
gboolean
-bloom_check (bloom_filter_t * bloom, const gchar *s)
+rspamd_bloom_check (rspamd_bloom_filter_t * bloom, const gchar *s)
{
- size_t n;
+ size_t n, len;
guint v;
+ if (s == NULL) {
+ return FALSE;
+ }
+ len = strlen (s);
for (n = 0; n < bloom->nfuncs; ++n) {
- v = bloom->funcs[n] (s) % bloom->asize;
- if (!(GETBIT (bloom->a, v)))
+ v = XXH32 (s, len, bloom->seeds[n]) % bloom->asize;
+ if (!(GETBIT (bloom->a, v))) {
return FALSE;
+ }
}
return TRUE;
#include "config.h"
-typedef guint (*hashfunc_t) (const gchar *);
-
-typedef struct bloom_filter_s {
+typedef struct rspamd_bloom_filter_s {
size_t asize;
gchar *a;
size_t nfuncs;
- hashfunc_t *funcs;
-} bloom_filter_t;
+ guint32 *seeds;
+} rspamd_bloom_filter_t;
-/* Hash functions */
-guint bloom_sax_hash (const gchar *key);
-guint bloom_sdbm_hash (const gchar *key);
-guint bloom_fnv_hash (const gchar *key);
-guint bloom_rs_hash (const gchar *key);
-guint bloom_js_hash (const gchar *key);
-guint bloom_elf_hash (const gchar *key);
-guint bloom_bkdr_hash (const gchar *key);
-guint bloom_ap_hash (const gchar *key);
-#define DEFAULT_BLOOM_HASHES 8, bloom_sax_hash, bloom_sdbm_hash, bloom_fnv_hash, bloom_rs_hash, bloom_js_hash, bloom_elf_hash, bloom_bkdr_hash, bloom_ap_hash
+/*
+ * Some random uint32 seeds for hashing
+ */
+#define RSPAMD_DEFAULT_BLOOM_HASHES 8, 0x61782caaU, 0x79ab8141U, 0xe45ee2d1U, \
+ 0xf97542d1U, 0x1e2623edU, 0xf5a23cfeU, 0xa41b2508U, 0x85abdce8U
/*
* Create new bloom filter
* @param nfuncs number of hash functions
* @param ... hash functions list
*/
-bloom_filter_t* bloom_create (size_t size, size_t nfuncs, ...);
+rspamd_bloom_filter_t* rspamd_bloom_create (size_t size, size_t nfuncs, ...);
/*
* Destroy bloom filter
*/
-void bloom_destroy (bloom_filter_t * bloom);
+void rspamd_bloom_destroy (rspamd_bloom_filter_t * bloom);
/*
* Add a string to bloom filter
*/
-gboolean bloom_add (bloom_filter_t * bloom, const gchar *s);
+gboolean rspamd_bloom_add (rspamd_bloom_filter_t * bloom, const gchar *s);
/*
* Delete a string from bloom filter
*/
-gboolean bloom_del (bloom_filter_t * bloom, const gchar *s);
+gboolean rspamd_bloom_del (rspamd_bloom_filter_t * bloom, const gchar *s);
/*
* Check whether this string is in bloom filter (algorithm produces FALSE-POSITIVES, so result must be checked if it is positive)
*/
-gboolean bloom_check (bloom_filter_t * bloom, const gchar *s);
+gboolean rspamd_bloom_check (rspamd_bloom_filter_t * bloom, const gchar *s);
#endif
static GQueue *hashes[BUCKETS];
static GQueue *frequent;
static GHashTable *static_hash;
-static rspamd_bloom_filter_t *bf;
+static bloom_filter_t *bf;
/* Number of cache modifications */
static guint32 mods = 0;
}
server_stat->fuzzy_hashes --;
g_hash_table_remove (static_hash, node->h.hash_pipe);
- rspamd_bloom_del (bf, node->h.hash_pipe);
+ bloom_del (bf, node->h.hash_pipe);
g_slice_free1 (sizeof (struct rspamd_fuzzy_node), node);
}
else {
node = (struct rspamd_fuzzy_node *)cur->data;
head = hashes[node->h.block_size % BUCKETS];
g_queue_delete_link (head, cur);
- rspamd_bloom_del (bf, node->h.hash_pipe);
+ bloom_del (bf, node->h.hash_pipe);
if (node->time != INVALID_NODE_TIME) {
server_stat->fuzzy_hashes_expired ++;
}
g_queue_push_head (hashes[node->h.block_size % BUCKETS], node);
}
}
- rspamd_bloom_add (bf, node->h.hash_pipe);
+ bloom_add (bf, node->h.hash_pipe);
if (touch_stat) {
server_stat->fuzzy_hashes ++;
}
struct rspamd_fuzzy_node *h;
- if (!rspamd_bloom_check (bf, cmd->hash)) {
+ if (!bloom_check (bf, cmd->hash)) {
return 0;
}
tmp = cur;
cur = g_list_next (cur);
g_queue_delete_link (hash, tmp);
- rspamd_bloom_del (bf, s->hash_pipe);
+ bloom_del (bf, s->hash_pipe);
msg_info ("fuzzy hash was successfully deleted");
server_stat->fuzzy_hashes --;
mods++;
fuzzy_hash_t s;
gboolean res = FALSE;
- if (!rspamd_bloom_check (bf, cmd->hash)) {
+ if (!bloom_check (bf, cmd->hash)) {
return FALSE;
}
rspamd_mutex_unlock (ctx->update_mtx);
}
+static gboolean
+parse_fuzzy_update_list (struct rspamd_fuzzy_storage_ctx *ctx)
+{
+ gchar **strvec, **cur;
+ struct in_addr ina;
+ guint32 mask;
+
+ strvec = g_strsplit_set (ctx->update_map, ",", 0);
+ cur = strvec;
+
+ while (*cur != NULL) {
+ /* XXX: handle only ipv4 addresses */
+ if (parse_ipmask_v4 (*cur, &ina, &mask)) {
+ if (ctx->update_ips == NULL) {
+ ctx->update_ips = radix_tree_create ();
+ }
+ radix32tree_add (ctx->update_ips, htonl (ina.s_addr), mask, 1);
+ }
+ cur ++;
+ }
+
+ return (ctx->update_ips != NULL);
+}
+
gpointer
init_fuzzy (struct config_file *cfg)
{
if (ctx->update_map != NULL) {
if (!add_map (worker->srv->cfg, ctx->update_map, "Allow fuzzy updates from specified addresses",
read_radix_list, fin_radix_list, (void **)&ctx->update_ips)) {
- if (!rspamd_parse_ip_list (ctx->update_map, &ctx->update_ips)) {
+ if (!parse_fuzzy_update_list (ctx)) {
msg_warn ("cannot load or parse ip list from '%s'", ctx->update_map);
}
}