]> source.dussan.org Git - rspamd.git/commitdiff
Migrate to lc-compressed btrie algorithm
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 11 Feb 2016 12:58:51 +0000 (12:58 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 11 Feb 2016 12:58:51 +0000 (12:58 +0000)
src/CMakeLists.txt
src/libutil/radix.c

index 601b5eca7614d9df3866b01c7484f1857fb8b4cc..6191be291f00743c5ef8f0e3f0afab63b4c3b91c 100644 (file)
@@ -107,6 +107,7 @@ ADD_LIBRARY(rspamd-server STATIC ${RSPAMD_CRYPTOBOX} ${RSPAMD_UTIL} ${RSPAMD_LUA
 TARGET_LINK_LIBRARIES(rspamd-server rspamd-http-parser)
 TARGET_LINK_LIBRARIES(rspamd-server rspamd-cdb)
 TARGET_LINK_LIBRARIES(rspamd-server rspamd-lpeg)
+TARGET_LINK_LIBRARIES(rspamd-server lcbtrie)
 
 IF (ENABLE_CLANG_PLUGIN MATCHES "ON")
        ADD_DEPENDENCIES(rspamd-server rspamd-clang)
index 99f4c88d8e161a33e75d74a2a10b32b91e3b61db..2b67d791740ba79018e0722b9ec49746e58841a4 100644 (file)
@@ -17,6 +17,7 @@
 #include "radix.h"
 #include "rspamd.h"
 #include "mem_pool.h"
+#include "btrie.h"
 
 #define msg_err_radix(...) rspamd_default_log_function (G_LOG_LEVEL_CRITICAL, \
         "radix", tree->pool->tag.uid, \
         G_STRFUNC, \
         __VA_ARGS__)
 
-struct radix_compressed_node {
-       union {
-               struct {
-                       struct radix_compressed_node *right;
-                       struct radix_compressed_node *left;
-               } n;
-               struct {
-                       uint8_t *key;
-                       guint keylen;
-                       guint level;
-               } s;
-       } d;
-       uintptr_t value;
-       gboolean skipped;
-};
-
-
 struct radix_tree_compressed {
-       struct radix_compressed_node *root;
        rspamd_mempool_t *pool;
        size_t size;
+       struct btrie *tree;
 };
 
-static gboolean
-radix_compare_compressed (struct radix_compressed_node *node,
-               const guint8 *key, guint keylen, guint cur_level)
-{
-       const guint8 *nk;
-       const guint8 *k;
-       guint8 bit;
-       guint shift, rbits, skip;
-
-       if (node->d.s.keylen > keylen) {
-               /* Obvious case */
-               return FALSE;
-       }
-
-
-       /* Compare byte aligned levels of a compressed node */
-       shift = node->d.s.level / NBBY;
-       /*
-        * We know that at least of cur_level bits are the same,
-        * se we can optimize search slightly
-        */
-       if (shift > 0) {
-               skip = cur_level / NBBY;
-               if (shift > skip &&
-                               memcmp (node->d.s.key + skip, key + skip, shift - skip) != 0) {
-                       return FALSE;
-               }
-       }
-
-       rbits = node->d.s.level % NBBY;
-       if (rbits > 0) {
-               /* Precisely compare remaining bits */
-               nk = node->d.s.key + shift;
-               k = key + shift;
-
-               bit = 1U << 7;
-
-               while (rbits > 0) {
-                       if ((*nk & bit) != (*k & bit)) {
-                               return FALSE;
-                       }
-                       bit >>= 1;
-                       rbits --;
-               }
-       }
-
-       return TRUE;
-}
-
 uintptr_t
 radix_find_compressed (radix_compressed_t * tree, const guint8 *key, gsize keylen)
 {
-       struct radix_compressed_node *node;
-       guint32 bit;
-       gsize kremain = keylen / sizeof (guint32);
-       uintptr_t value;
-       const guint32 *k = (guint32 *)key;
-       guint32 kv = ntohl (*k);
-       guint cur_level = 0;
-
-       bit = 1U << 31;
-       value = RADIX_NO_VALUE;
-       node = tree->root;
-
-       msg_debug_radix ("trying to find key: %*xs", (gint)keylen, key);
-       while (node && kremain) {
-               if (node->skipped) {
-                       msg_debug_radix ("finding in the compressed node: %p at level %d",
-                                       (gpointer)node->value, cur_level);
-                       /* It is obviously a leaf node */
-                       if (radix_compare_compressed (node, key, keylen, cur_level)) {
-                               return node->value;
-                       }
-                       else {
-                               return value;
-                       }
-               }
-               if (node->value != RADIX_NO_VALUE) {
-                       value = node->value;
-               }
-
-               msg_debug_radix ("finding value cur value: %p, left: %p, "
-                               "right: %p, go %s, level: %d",
-                               (gpointer)node->value, node->d.n.left,
-                               node->d.n.right, (kv & bit) ? "right" : "left",
-                               cur_level);
-               if (kv & bit) {
-                       node = node->d.n.right;
-               }
-               else {
-                       node = node->d.n.left;
-               }
-
-               bit >>= 1;
-               if (bit == 0) {
-                       k ++;
-                       bit = 1U << 31;
-                       kv = ntohl (*k);
-                       kremain --;
-               }
-               cur_level ++;
-       }
-
-       if (node) {
-               /* We still have a node but no more key, so we can search for skipped node */
-               if (node->skipped) {
-                       msg_debug_radix ("finding in the compressed node: %p at level %d",
-                                       (gpointer)node->value, cur_level);
-                       /* It is obviously a leaf node */
-                       if (radix_compare_compressed (node, key, keylen, cur_level)) {
-                               return node->value;
-                       }
-               }
-       }
-
-       return value;
-}
-
-
-static struct radix_compressed_node *
-radix_uncompress_path (radix_compressed_t *tree,
-               struct radix_compressed_node *node,
-               guint start_level,
-               guint levels_uncompress)
-{
-       guint8 *nkey = node->d.s.key + start_level / NBBY;
-       guint8 bit = 1U << (7 - start_level % NBBY);
-       struct radix_compressed_node *leaf, *next;
-
-       /* Make compressed leaf */
-       leaf = rspamd_mempool_alloc (tree->pool, sizeof (*node));
-       memcpy (leaf, node, sizeof (*node));
-
-       /* Make compressed node as uncompressed */
-       node->skipped = FALSE;
-       node->value = RADIX_NO_VALUE;
-
-       msg_debug_radix (
-                       "uncompress %ud levels of tree from %ud to %ud, stored key: %*xs",
-                       levels_uncompress,
-                       start_level,
-                       start_level + levels_uncompress,
-                       leaf->d.s.keylen,
-                       leaf->d.s.key);
-
-       /* Uncompress the desired path */
-       while (levels_uncompress) {
-               next = rspamd_mempool_alloc (tree->pool, sizeof (*node));
-
-               next->skipped = FALSE;
-               next->value = RADIX_NO_VALUE;
-
-               if (*nkey & bit) {
-                       node->d.n.right = next;
-                       node->d.n.left = NULL;
-               }
-               else {
-                       node->d.n.left = next;
-                       node->d.n.right = NULL;
-               }
-
-               msg_debug_radix ("uncompress path for node: %p, left: %p, "
-                               "right: %p, go %s", (gpointer)node->value, node->d.n.left,
-                               node->d.n.right, (*nkey & bit) ? "right" : "left");
-
-               bit >>= 1;
-               if (bit == 0) {
-                       nkey ++;
-                       bit = 1U << 7;
-               }
-               node = next;
-               levels_uncompress --;
-       }
-
-       /* Attach leaf node, that was previously a compressed node */
-       msg_debug_radix ("attach leaf node to %s with value %p", (*nkey & bit) ? "right" : "left",
-                       (gpointer)leaf->value);
-       if (*nkey & bit) {
-               node->d.n.right = leaf;
-               node->d.n.left = NULL;
-       }
-       else {
-               node->d.n.left = leaf;
-               node->d.n.right = NULL;
-       }
-
-       /* Return node */
-       return node;
-}
-
-
-static struct radix_compressed_node *
-radix_make_leaf_node (radix_compressed_t *tree,
-               guint8 *key, guint keylen, guint level,
-               uintptr_t value,
-               gboolean compressed)
-{
-       struct radix_compressed_node *node;
-
-       node = rspamd_mempool_alloc (tree->pool, sizeof (struct radix_compressed_node));
-       if (compressed) {
-               node->skipped = TRUE;
-               node->d.s.keylen = keylen;
-               node->d.s.key = rspamd_mempool_alloc (tree->pool, node->d.s.keylen);
-               node->d.s.level = level;
-               memcpy (node->d.s.key, key, node->d.s.keylen);
-       }
-       else {
-               /* Uncompressed leaf node */
-               memset (node, 0, sizeof (*node));
-       }
-       node->value = value;
-       msg_debug_radix ("insert new leaf node with value %p to level %d",
-                       (gpointer)value, level);
+       gconstpointer ret;
 
-       return node;
-}
+       g_assert (tree != NULL);
 
-static void
-radix_move_up_compressed_leaf (radix_compressed_t *tree,
-               struct radix_compressed_node *leaf,
-               struct radix_compressed_node *parent, uintptr_t value,
-               guint8 *key, guint keylen, guint leaf_level)
-{
-       parent->value = leaf->value;
-
-       leaf->value = value;
-       //g_slice_free1 (leaf->d.s.keylen, leaf->d.s.key);
-       leaf->d.s.keylen = keylen;
-       leaf->d.s.key = rspamd_mempool_alloc (tree->pool, leaf->d.s.keylen);
-       memcpy (leaf->d.s.key, key, keylen);
-       leaf->d.s.level = leaf_level;
-}
-
-static uintptr_t
-radix_replace_node (radix_compressed_t *tree,
-               struct radix_compressed_node *node,
-               guint8 *key, gsize keylen,
-               uintptr_t value)
-{
-       uintptr_t oldval;
-
-       if (node->skipped) {
-               /*
-                * For leaf nodes we have to deal with the keys as well, since
-                * we might find that keys are different for the same leaf node
-                */
-               //g_slice_free1 (node->d.s.keylen, node->d.s.key);
-               node->d.s.keylen = keylen;
-               node->d.s.key = rspamd_mempool_alloc (tree->pool, node->d.s.keylen);
-               memcpy (node->d.s.key, key, node->d.s.keylen);
-               oldval = node->value;
-               node->value = value;
-               msg_debug_radix ("replace value for leaf node with: %p, old value: %p",
-                               (gpointer)value, (gpointer)oldval);
-       }
-       else {
-               oldval = node->value;
-               node->value = value;
-               msg_debug_radix ("replace value for node with: %p, old value: %p",
-                               (gpointer)value, (gpointer)oldval);
-       }
+       ret = btrie_lookup (tree->tree, key, keylen * NBBY);
 
-       return oldval;
-}
-
-static uintptr_t
-radix_uncompress_node (radix_compressed_t *tree,
-               struct radix_compressed_node *node,
-               guint8 *key, gsize keylen,
-               uintptr_t value,
-               guint cur_level,
-               guint target_level,
-               guint8 bit)
-{
-       /* Find the largest common prefix of the compressed node and target node */
-       gsize kremain = keylen - cur_level / NBBY;
-       guint8 *nkey = node->d.s.key + cur_level / NBBY;
-       guint8 *k = key + cur_level / NBBY;
-       guint levels_uncompress = 0, start_level = cur_level;
-       gboolean masked = FALSE;
-       struct radix_compressed_node *leaf;
-
-       msg_debug_radix ("want to uncompress nodes from level %ud to level %ud, "
-                       "compressed node level: %ud",
-                       cur_level, target_level, node->d.s.level);
-       while (cur_level < target_level) {
-               guint8 kb = *k & bit;
-               guint8 nb = *nkey & bit;
-
-               if (cur_level >= node->d.s.level) {
-                       msg_debug_radix ("found available masked path at level %ud", cur_level);
-                       masked = TRUE;
-                       break;
-               }
-               if (kb != nb) {
-                       msg_debug_radix ("found available path at level %ud", cur_level);
-                       break;
-               }
-
-               cur_level ++;
-               levels_uncompress ++;
-               bit >>= 1;
-               if (bit == 0) {
-                       k ++;
-                       nkey ++;
-                       bit = 1U << 7;
-                       kremain --;
-               }
-       }
-
-       if (kremain == 0) {
-               /* Nodes are equal */
-               return radix_replace_node (tree, node, key, keylen, value);
-       }
-       else {
-               /*
-                * We need to uncompress the common path
-                */
-               struct radix_compressed_node *nnode;
-
-               nnode = radix_uncompress_path (tree, node, start_level, levels_uncompress);
-
-               /*
-                * Now nnode is the last uncompressed node with compressed leaf inside
-                * and we also know that the current bit is different
-                *
-                * - if we have target_level == cur_level, then we can safely assign the
-                * value of that parent node
-                * - otherwise we insert new compressed leaf node
-                */
-               if (cur_level == target_level) {
-                       msg_debug_radix ("insert detached leaf node with value: %p",
-                                       (gpointer)value);
-                       nnode->value = value;
-               }
-               else if (masked) {
-                       /*
-                        * Here we just add the previous value of node to the current node
-                        * and replace value in the leaf
-                        */
-                       if (nnode->d.n.left != NULL) {
-                               leaf = nnode->d.n.left;
-                       }
-                       else {
-                               leaf = nnode->d.n.right;
-                       }
-                       msg_debug_radix ("move leaf node with value: %p, to level %ud, "
-                                       "set leaf node value to %p and level %ud", (gpointer)nnode->value,
-                                       cur_level,
-                                       (gpointer)value, target_level);
-                       radix_move_up_compressed_leaf (tree, leaf, nnode, value, key, keylen,
-                                       target_level);
-               }
-               else {
-                       node = radix_make_leaf_node (tree, key, keylen,
-                                       target_level, value, TRUE);
-                       if (nnode->d.n.left == NULL) {
-                               nnode->d.n.left = node;
-                       }
-                       else {
-                               nnode->d.n.right = node;
-                       }
-               }
-               tree->size ++;
+       if (ret == NULL) {
+               return RADIX_NO_VALUE;
        }
 
-       return value;
+       return (uintptr_t)ret;
 }
 
 
@@ -435,125 +65,21 @@ radix_insert_compressed (radix_compressed_t * tree,
        gsize masklen,
        uintptr_t value)
 {
-       struct radix_compressed_node *node, *next = NULL, **prev;
-       gsize keybits = keylen * NBBY;
-       guint target_level = (keylen * NBBY - masklen);
-       guint cur_level = 0;
-       guint8 bit, *k = key;
-       gsize kremain = keylen;
-       uintptr_t oldval = RADIX_NO_VALUE;
-
-       bit = 1U << 7;
-       node = tree->root;
+       guint keybits = keylen * NBBY;
+       uintptr_t old;
 
+       g_assert (tree != NULL);
        g_assert (keybits >= masklen);
-       msg_debug_radix ("want insert value %p with mask %z, key: %*xs",
-                       (gpointer)value, masklen, (int)keylen, key);
-
-       node = tree->root;
-       next = node;
-       prev = &tree->root;
-
-       /* Search for the place to insert element */
-       while (node && cur_level < target_level) {
-               if (node->skipped) {
-                       /* We have found skipped node and we need to uncompress it */
-                       return radix_uncompress_node (tree, node, key, keylen, value,
-                                       cur_level, target_level, bit);
-               }
-               if (*k & bit) {
-                       next = node->d.n.right;
-                       prev = &node->d.n.right;
-               }
-               else {
-                       next = node->d.n.left;
-                       prev = &node->d.n.left;
-               }
 
-               if (next == NULL) {
-                       /* Need to insert some nodes */
-                       break;
-               }
+       msg_debug_radix ("want insert value %p with mask %z, key: %*xs",
+                       (gpointer)value, masklen, (int)keybits, key);
 
-               bit >>= 1;
-               if (bit == 0) {
-                       k ++;
-                       bit = 1U << 7;
-                       kremain --;
-               }
-               cur_level ++;
-               node = next;
-       }
+       old = radix_find_compressed (tree, key, keylen);
 
-       if (next == NULL) {
-               next = radix_make_leaf_node (tree, key, keylen, target_level, value,
-                               TRUE);
-               *prev = next;
-               tree->size ++;
-       }
-       else if (next->value == RADIX_NO_VALUE) {
-               msg_debug_radix ("insert value node with %p", (gpointer)value);
-               next->value = value;
-               tree->size ++;
-       }
-       else {
-               if (next->skipped) {
-                       /*
-                        * For skipped node we replace value if the level of skipped node
-                        * is equal to the target level
-                        */
-                       if (next->d.s.level == target_level) {
-                               oldval = radix_replace_node (tree, next, key, keylen, value);
-                       }
-                       else if (next->d.s.level > target_level) {
-                               /*
-                                * Here we must create new normal node and insert compressed leaf
-                                * one level below
-                                */
-                               node = radix_make_leaf_node (tree, key, keylen,
-                                               target_level, value, FALSE);
-                               *prev = node;
-                               if (*k & bit) {
-                                       node->d.n.right = next;
-                               }
-                               else {
-                                       node->d.n.left = next;
-                               }
-                               oldval = next->value;
-                               tree->size ++;
-                       }
-                       else {
-                               /*
-                                * We must convert old compressed node to a normal node and
-                                * create new compressed leaf attached to that normal node
-                                */
-                               node = radix_make_leaf_node (tree, key, keylen,
-                                               target_level, value, TRUE);
-                               *prev = next;
-                               msg_debug_radix ("move leaf node with value: %p, to level %ud, "
-                                               "set leaf node value to %p and level %ud", (gpointer)next->value,
-                                               cur_level,
-                                               (gpointer)value, target_level);
-                               next->skipped = FALSE;
-                               if (*k & bit) {
-                                       next->d.n.right = node;
-                                       next->d.n.left = NULL;
-                               }
-                               else {
-                                       next->d.n.left = node;
-                                       next->d.n.right = NULL;
-                               }
-                               oldval = next->value;
-                               tree->size ++;
-                       }
-               }
-               else {
-                       oldval = radix_replace_node (tree, next, key, keylen, value);
-               }
-               return oldval;
-       }
+       btrie_add_prefix (tree->tree, key, keybits - masklen,
+                       (gconstpointer)value);
 
-       return next->value;
+       return old;
 }
 
 
@@ -569,7 +95,7 @@ radix_create_compressed (void)
 
        tree->pool = rspamd_mempool_new (rspamd_mempool_suggest_size (), NULL);
        tree->size = 0;
-       tree->root = NULL;
+       tree->tree = btrie_init (tree->pool);
 
        return tree;
 }