#include "bloom.h"
#include "fuzzy_storage.h"
+#ifdef WITH_JUDY
+#include <Judy.h>
+#endif
+
/* This number is used as limit while comparing two fuzzy hashes, this value can vary from 0 to 100 */
#define LEV_LIMIT 99
/* This number is used as limit while we are making decision to write new hash file or not */
/* Number of insuccessfull bind retries */
#define MAX_RETRIES 40
/* Weight of hash to consider it frequent */
-#define FREQUENT_SCORE 100
+#define DEFAULT_FREQUENT_SCORE 100
static GQueue *hashes[BUCKETS];
static GQueue *frequent;
+#ifdef WITH_JUDY
+static gpointer jtree;
+static gboolean use_judy = FALSE;
+#endif
static bloom_filter_t *bf;
/* Number of cache modifications */
static uint32_t mods = 0;
+/* Frequent score number */
+static uint32_t frequent_score = DEFAULT_FREQUENT_SCORE;
/* For evtimer */
static struct timeval tmv;
static struct event tev;
GList *cur, *tmp;
struct rspamd_fuzzy_node *node;
uint64_t expire, now;
+#ifdef WITH_JUDY
+ PPvoid_t pvalue;
+ char indexbuf[1024], tmpindex[1024];
+#endif
/* Check for modifications */
if (mods < MOD_LIMIT) {
expire = DEFAULT_EXPIRE;
}
- /* Sync section */
- if ((fd = open (filename, O_WRONLY)) != -1) {
- /* Aqquire a lock */
- (void)lock_file (fd, FALSE);
- (void)unlock_file (fd, FALSE);
- }
-
if ((fd = open (filename, O_WRONLY | O_TRUNC | O_CREAT, S_IWUSR | S_IRUSR | S_IRGRP | S_IROTH)) == -1) {
msg_err ("cannot create hash file %s: %s", filename, strerror (errno));
return;
(void)lock_file (fd, FALSE);
now = (uint64_t) time (NULL);
+
+#ifdef WITH_JUDY
+ if (use_judy) {
+ indexbuf[0] = '\0';
+ pvalue = JudySLFirst (jtree, indexbuf, PJE0);
+ while (pvalue) {
+ node = *((struct rspamd_fuzzy_node **)pvalue);
+ if (now - node->time > expire) {
+ /* Remove expired item */
+ g_strlcpy (tmpindex, indexbuf, sizeof (tmpindex));
+ pvalue = JudySLNext (jtree, tmpindex, PJE0);
+ JudySLDel (&jtree, indexbuf, PJE0);
+ g_strlcpy (indexbuf, tmpindex, sizeof (indexbuf));
+ bloom_del (bf, node->h.hash_pipe);
+ server_stat->fuzzy_hashes_expired ++;
+ server_stat->fuzzy_hashes --;
+ g_free (node);
+ continue;
+ }
+ if (write (fd, node, sizeof (struct rspamd_fuzzy_node)) == -1) {
+ msg_err ("cannot write file %s: %s", filename, strerror (errno));
+ }
+ pvalue = JudySLNext (jtree, indexbuf, PJE0);
+ }
+ }
+ else {
+#endif
cur = frequent->head;
while (cur) {
node = cur->data;
cur = g_list_next (cur);
}
}
+#ifdef WITH_JUDY
+ }
+#endif
(void)unlock_file (fd, FALSE);
close (fd);
struct stat st;
char *filename;
struct rspamd_fuzzy_node *node;
+#ifdef WITH_JUDY
+ PPvoid_t pvalue;
+ if (use_judy) {
+ jtree = NULL;
+ }
+ else {
+#endif
for (i = 0; i < BUCKETS; i++) {
hashes[i] = g_queue_new ();
}
frequent = g_queue_new ();
+#ifdef WITH_JUDY
+ }
+#endif
filename = g_hash_table_lookup (wrk->cf->params, "hashfile");
if (filename == NULL) {
if (r != sizeof (struct rspamd_fuzzy_node)) {
break;
}
- if (node->value > FREQUENT_SCORE) {
+#ifdef WITH_JUDY
+ if (use_judy) {
+ pvalue = JudySLIns (&jtree, node->h.hash_pipe, PJE0);
+ *pvalue = node;
+ }
+ else {
+#endif
+ if (node->value > frequent_score) {
g_queue_push_head (frequent, node);
}
else {
g_queue_push_head (hashes[node->h.block_size % BUCKETS], node);
}
+#ifdef WITH_JUDY
+ }
+#endif
bloom_add (bf, node->h.hash_pipe);
server_stat->fuzzy_hashes ++;
}
+#ifdef WITH_JUDY
+ if (!use_judy) {
+#endif
/* Sort everything */
g_queue_sort (frequent, compare_nodes, NULL);
for (i = 0; i < BUCKETS; i ++) {
g_queue_sort (hashes[i], compare_nodes, NULL);
}
+#ifdef WITH_JUDY
+ }
+#endif
(void)unlock_file (fd, FALSE);
close (fd);
GList *cur;
struct rspamd_fuzzy_node *h;
int prob = 0;
-
+#ifdef WITH_JUDY
+ PPvoid_t pvalue;
+
+ if (use_judy) {
+ pvalue = JudySLGet (jtree, s->hash_pipe, PJE0);
+ if (pvalue != NULL) {
+ h = *((struct rspamd_fuzzy_node **)pvalue);
+ msg_info ("fuzzy hash was found in judy tree");
+ if (update_value) {
+ h->value += update_value;
+ }
+ return h->value;
+ }
+ }
+ else {
+#endif
cur = frequent->head;
while (cur) {
h = cur->data;
h->value += update_value;
msg_info ("new hash weight: %d", h->value);
}
- if (h->value > FREQUENT_SCORE) {
+ if (h->value > frequent_score) {
g_queue_unlink (hash, cur);
g_queue_push_head_link (frequent, cur);
msg_info ("moved hash to frequent list");
}
cur = g_list_next (cur);
}
+#ifdef WITH_JUDY
+ }
+#endif
return 0;
}
static gboolean
update_hash (struct fuzzy_cmd *cmd)
{
- GList *cur;
fuzzy_hash_t s;
memcpy (s.hash_pipe, cmd->hash, sizeof (s.hash_pipe));
s.block_size = cmd->blocksize;
- cur = hashes[cmd->blocksize % BUCKETS]->head;
return check_hash_node (hashes[cmd->blocksize % BUCKETS], &s, cmd->value);
}
process_write_command (struct fuzzy_cmd *cmd)
{
struct rspamd_fuzzy_node *h;
+#ifdef WITH_JUDY
+ PPvoid_t pvalue;
+#endif
if (bloom_check (bf, cmd->hash)) {
if (update_hash (cmd)) {
memcpy (&h->h.hash_pipe, &cmd->hash, sizeof (cmd->hash));
h->h.block_size = cmd->blocksize;
h->time = (uint64_t) time (NULL);
+ h->value = cmd->value;
+#ifdef WITH_JUDY
+ if (use_judy) {
+ pvalue = JudySLIns (&jtree, h->h.hash_pipe, PJE0);
+ *pvalue = h;
+ }
+ else {
+#endif
+
g_queue_push_head (hashes[cmd->blocksize % BUCKETS], h);
+#ifdef WITH_JUDY
+ }
+#endif
bloom_add (bf, cmd->hash);
mods++;
server_stat->fuzzy_hashes ++;
GList *cur, *tmp;
struct rspamd_fuzzy_node *h;
gboolean res = FALSE;
-
+#ifdef WITH_JUDY
+ PPvoid_t pvalue;
+
+ if (use_judy) {
+ pvalue = JudySLGet (jtree, s->hash_pipe, PJE0);
+ if (pvalue) {
+ res = JudySLDel (&jtree, s->hash_pipe, PJE0);
+ g_free (*pvalue);
+ }
+ }
+ else {
+#endif
cur = hash->head;
/* XXX: too slow way */
}
cur = g_list_next (cur);
}
+#ifdef WITH_JUDY
+ }
+#endif
return res;
memcpy (s.hash_pipe, cmd->hash, sizeof (s.hash_pipe));
s.block_size = cmd->blocksize;
-
+#ifdef WITH_JUDY
+ if (use_judy) {
+ return delete_hash (NULL, &s);
+ }
+ else {
+#endif
res = delete_hash (frequent, &s);
if (!res) {
res = delete_hash (hashes[cmd->blocksize % BUCKETS], &s);
else {
(void)delete_hash (hashes[cmd->blocksize % BUCKETS], &s);
}
+#ifdef WITH_JUDY
+ }
+#endif
return res;
}
struct sigaction signals;
struct event sev;
int retries = 0;
+ char *value;
worker->srv->pid = getpid ();
worker->srv->type = TYPE_FUZZY;
signal_add (&worker->sig_ev, NULL);
signal_set (&sev, SIGTERM, sigterm_handler, (void *)worker);
signal_add (&sev, NULL);
+ /* Get params */
+ if ((value = g_hash_table_lookup (worker->cf->params, "frequent_score")) != NULL) {
+ frequent_score = strtol (value, NULL, 10);
+ }
+ if ((value = g_hash_table_lookup (worker->cf->params, "use_judy")) != NULL) {
+#ifdef WITH_JUDY
+ use_judy = TRUE;
+#else
+ msg_err ("cannot use judy storage as judy support is not compiled in");
+#endif
+ }
/* Init bloom filter */
bf = bloom_create (20000000L, DEFAULT_BLOOM_HASHES);