From 894920c51a99656725823e7d23f6ca788206db02 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Thu, 11 Feb 2016 12:58:51 +0000 Subject: [PATCH] Migrate to lc-compressed btrie algorithm --- src/CMakeLists.txt | 1 + src/libutil/radix.c | 510 ++------------------------------------------ 2 files changed, 19 insertions(+), 492 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 601b5eca7..6191be291 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -107,6 +107,7 @@ ADD_LIBRARY(rspamd-server STATIC ${RSPAMD_CRYPTOBOX} ${RSPAMD_UTIL} ${RSPAMD_LUA TARGET_LINK_LIBRARIES(rspamd-server rspamd-http-parser) TARGET_LINK_LIBRARIES(rspamd-server rspamd-cdb) TARGET_LINK_LIBRARIES(rspamd-server rspamd-lpeg) +TARGET_LINK_LIBRARIES(rspamd-server lcbtrie) IF (ENABLE_CLANG_PLUGIN MATCHES "ON") ADD_DEPENDENCIES(rspamd-server rspamd-clang) diff --git a/src/libutil/radix.c b/src/libutil/radix.c index 99f4c88d8..2b67d7917 100644 --- a/src/libutil/radix.c +++ b/src/libutil/radix.c @@ -17,6 +17,7 @@ #include "radix.h" #include "rspamd.h" #include "mem_pool.h" +#include "btrie.h" #define msg_err_radix(...) rspamd_default_log_function (G_LOG_LEVEL_CRITICAL, \ "radix", tree->pool->tag.uid, \ @@ -35,397 +36,26 @@ G_STRFUNC, \ __VA_ARGS__) -struct radix_compressed_node { - union { - struct { - struct radix_compressed_node *right; - struct radix_compressed_node *left; - } n; - struct { - uint8_t *key; - guint keylen; - guint level; - } s; - } d; - uintptr_t value; - gboolean skipped; -}; - - struct radix_tree_compressed { - struct radix_compressed_node *root; rspamd_mempool_t *pool; size_t size; + struct btrie *tree; }; -static gboolean -radix_compare_compressed (struct radix_compressed_node *node, - const guint8 *key, guint keylen, guint cur_level) -{ - const guint8 *nk; - const guint8 *k; - guint8 bit; - guint shift, rbits, skip; - - if (node->d.s.keylen > keylen) { - /* Obvious case */ - return FALSE; - } - - - /* Compare byte aligned levels of a compressed node */ - shift = node->d.s.level / NBBY; - /* - * We know that at least of cur_level bits are the same, - * se we can optimize search slightly - */ - if (shift > 0) { - skip = cur_level / NBBY; - if (shift > skip && - memcmp (node->d.s.key + skip, key + skip, shift - skip) != 0) { - return FALSE; - } - } - - rbits = node->d.s.level % NBBY; - if (rbits > 0) { - /* Precisely compare remaining bits */ - nk = node->d.s.key + shift; - k = key + shift; - - bit = 1U << 7; - - while (rbits > 0) { - if ((*nk & bit) != (*k & bit)) { - return FALSE; - } - bit >>= 1; - rbits --; - } - } - - return TRUE; -} - uintptr_t radix_find_compressed (radix_compressed_t * tree, const guint8 *key, gsize keylen) { - struct radix_compressed_node *node; - guint32 bit; - gsize kremain = keylen / sizeof (guint32); - uintptr_t value; - const guint32 *k = (guint32 *)key; - guint32 kv = ntohl (*k); - guint cur_level = 0; - - bit = 1U << 31; - value = RADIX_NO_VALUE; - node = tree->root; - - msg_debug_radix ("trying to find key: %*xs", (gint)keylen, key); - while (node && kremain) { - if (node->skipped) { - msg_debug_radix ("finding in the compressed node: %p at level %d", - (gpointer)node->value, cur_level); - /* It is obviously a leaf node */ - if (radix_compare_compressed (node, key, keylen, cur_level)) { - return node->value; - } - else { - return value; - } - } - if (node->value != RADIX_NO_VALUE) { - value = node->value; - } - - msg_debug_radix ("finding value cur value: %p, left: %p, " - "right: %p, go %s, level: %d", - (gpointer)node->value, node->d.n.left, - node->d.n.right, (kv & bit) ? "right" : "left", - cur_level); - if (kv & bit) { - node = node->d.n.right; - } - else { - node = node->d.n.left; - } - - bit >>= 1; - if (bit == 0) { - k ++; - bit = 1U << 31; - kv = ntohl (*k); - kremain --; - } - cur_level ++; - } - - if (node) { - /* We still have a node but no more key, so we can search for skipped node */ - if (node->skipped) { - msg_debug_radix ("finding in the compressed node: %p at level %d", - (gpointer)node->value, cur_level); - /* It is obviously a leaf node */ - if (radix_compare_compressed (node, key, keylen, cur_level)) { - return node->value; - } - } - } - - return value; -} - - -static struct radix_compressed_node * -radix_uncompress_path (radix_compressed_t *tree, - struct radix_compressed_node *node, - guint start_level, - guint levels_uncompress) -{ - guint8 *nkey = node->d.s.key + start_level / NBBY; - guint8 bit = 1U << (7 - start_level % NBBY); - struct radix_compressed_node *leaf, *next; - - /* Make compressed leaf */ - leaf = rspamd_mempool_alloc (tree->pool, sizeof (*node)); - memcpy (leaf, node, sizeof (*node)); - - /* Make compressed node as uncompressed */ - node->skipped = FALSE; - node->value = RADIX_NO_VALUE; - - msg_debug_radix ( - "uncompress %ud levels of tree from %ud to %ud, stored key: %*xs", - levels_uncompress, - start_level, - start_level + levels_uncompress, - leaf->d.s.keylen, - leaf->d.s.key); - - /* Uncompress the desired path */ - while (levels_uncompress) { - next = rspamd_mempool_alloc (tree->pool, sizeof (*node)); - - next->skipped = FALSE; - next->value = RADIX_NO_VALUE; - - if (*nkey & bit) { - node->d.n.right = next; - node->d.n.left = NULL; - } - else { - node->d.n.left = next; - node->d.n.right = NULL; - } - - msg_debug_radix ("uncompress path for node: %p, left: %p, " - "right: %p, go %s", (gpointer)node->value, node->d.n.left, - node->d.n.right, (*nkey & bit) ? "right" : "left"); - - bit >>= 1; - if (bit == 0) { - nkey ++; - bit = 1U << 7; - } - node = next; - levels_uncompress --; - } - - /* Attach leaf node, that was previously a compressed node */ - msg_debug_radix ("attach leaf node to %s with value %p", (*nkey & bit) ? "right" : "left", - (gpointer)leaf->value); - if (*nkey & bit) { - node->d.n.right = leaf; - node->d.n.left = NULL; - } - else { - node->d.n.left = leaf; - node->d.n.right = NULL; - } - - /* Return node */ - return node; -} - - -static struct radix_compressed_node * -radix_make_leaf_node (radix_compressed_t *tree, - guint8 *key, guint keylen, guint level, - uintptr_t value, - gboolean compressed) -{ - struct radix_compressed_node *node; - - node = rspamd_mempool_alloc (tree->pool, sizeof (struct radix_compressed_node)); - if (compressed) { - node->skipped = TRUE; - node->d.s.keylen = keylen; - node->d.s.key = rspamd_mempool_alloc (tree->pool, node->d.s.keylen); - node->d.s.level = level; - memcpy (node->d.s.key, key, node->d.s.keylen); - } - else { - /* Uncompressed leaf node */ - memset (node, 0, sizeof (*node)); - } - node->value = value; - msg_debug_radix ("insert new leaf node with value %p to level %d", - (gpointer)value, level); + gconstpointer ret; - return node; -} + g_assert (tree != NULL); -static void -radix_move_up_compressed_leaf (radix_compressed_t *tree, - struct radix_compressed_node *leaf, - struct radix_compressed_node *parent, uintptr_t value, - guint8 *key, guint keylen, guint leaf_level) -{ - parent->value = leaf->value; - - leaf->value = value; - //g_slice_free1 (leaf->d.s.keylen, leaf->d.s.key); - leaf->d.s.keylen = keylen; - leaf->d.s.key = rspamd_mempool_alloc (tree->pool, leaf->d.s.keylen); - memcpy (leaf->d.s.key, key, keylen); - leaf->d.s.level = leaf_level; -} - -static uintptr_t -radix_replace_node (radix_compressed_t *tree, - struct radix_compressed_node *node, - guint8 *key, gsize keylen, - uintptr_t value) -{ - uintptr_t oldval; - - if (node->skipped) { - /* - * For leaf nodes we have to deal with the keys as well, since - * we might find that keys are different for the same leaf node - */ - //g_slice_free1 (node->d.s.keylen, node->d.s.key); - node->d.s.keylen = keylen; - node->d.s.key = rspamd_mempool_alloc (tree->pool, node->d.s.keylen); - memcpy (node->d.s.key, key, node->d.s.keylen); - oldval = node->value; - node->value = value; - msg_debug_radix ("replace value for leaf node with: %p, old value: %p", - (gpointer)value, (gpointer)oldval); - } - else { - oldval = node->value; - node->value = value; - msg_debug_radix ("replace value for node with: %p, old value: %p", - (gpointer)value, (gpointer)oldval); - } + ret = btrie_lookup (tree->tree, key, keylen * NBBY); - return oldval; -} - -static uintptr_t -radix_uncompress_node (radix_compressed_t *tree, - struct radix_compressed_node *node, - guint8 *key, gsize keylen, - uintptr_t value, - guint cur_level, - guint target_level, - guint8 bit) -{ - /* Find the largest common prefix of the compressed node and target node */ - gsize kremain = keylen - cur_level / NBBY; - guint8 *nkey = node->d.s.key + cur_level / NBBY; - guint8 *k = key + cur_level / NBBY; - guint levels_uncompress = 0, start_level = cur_level; - gboolean masked = FALSE; - struct radix_compressed_node *leaf; - - msg_debug_radix ("want to uncompress nodes from level %ud to level %ud, " - "compressed node level: %ud", - cur_level, target_level, node->d.s.level); - while (cur_level < target_level) { - guint8 kb = *k & bit; - guint8 nb = *nkey & bit; - - if (cur_level >= node->d.s.level) { - msg_debug_radix ("found available masked path at level %ud", cur_level); - masked = TRUE; - break; - } - if (kb != nb) { - msg_debug_radix ("found available path at level %ud", cur_level); - break; - } - - cur_level ++; - levels_uncompress ++; - bit >>= 1; - if (bit == 0) { - k ++; - nkey ++; - bit = 1U << 7; - kremain --; - } - } - - if (kremain == 0) { - /* Nodes are equal */ - return radix_replace_node (tree, node, key, keylen, value); - } - else { - /* - * We need to uncompress the common path - */ - struct radix_compressed_node *nnode; - - nnode = radix_uncompress_path (tree, node, start_level, levels_uncompress); - - /* - * Now nnode is the last uncompressed node with compressed leaf inside - * and we also know that the current bit is different - * - * - if we have target_level == cur_level, then we can safely assign the - * value of that parent node - * - otherwise we insert new compressed leaf node - */ - if (cur_level == target_level) { - msg_debug_radix ("insert detached leaf node with value: %p", - (gpointer)value); - nnode->value = value; - } - else if (masked) { - /* - * Here we just add the previous value of node to the current node - * and replace value in the leaf - */ - if (nnode->d.n.left != NULL) { - leaf = nnode->d.n.left; - } - else { - leaf = nnode->d.n.right; - } - msg_debug_radix ("move leaf node with value: %p, to level %ud, " - "set leaf node value to %p and level %ud", (gpointer)nnode->value, - cur_level, - (gpointer)value, target_level); - radix_move_up_compressed_leaf (tree, leaf, nnode, value, key, keylen, - target_level); - } - else { - node = radix_make_leaf_node (tree, key, keylen, - target_level, value, TRUE); - if (nnode->d.n.left == NULL) { - nnode->d.n.left = node; - } - else { - nnode->d.n.right = node; - } - } - tree->size ++; + if (ret == NULL) { + return RADIX_NO_VALUE; } - return value; + return (uintptr_t)ret; } @@ -435,125 +65,21 @@ radix_insert_compressed (radix_compressed_t * tree, gsize masklen, uintptr_t value) { - struct radix_compressed_node *node, *next = NULL, **prev; - gsize keybits = keylen * NBBY; - guint target_level = (keylen * NBBY - masklen); - guint cur_level = 0; - guint8 bit, *k = key; - gsize kremain = keylen; - uintptr_t oldval = RADIX_NO_VALUE; - - bit = 1U << 7; - node = tree->root; + guint keybits = keylen * NBBY; + uintptr_t old; + g_assert (tree != NULL); g_assert (keybits >= masklen); - msg_debug_radix ("want insert value %p with mask %z, key: %*xs", - (gpointer)value, masklen, (int)keylen, key); - - node = tree->root; - next = node; - prev = &tree->root; - - /* Search for the place to insert element */ - while (node && cur_level < target_level) { - if (node->skipped) { - /* We have found skipped node and we need to uncompress it */ - return radix_uncompress_node (tree, node, key, keylen, value, - cur_level, target_level, bit); - } - if (*k & bit) { - next = node->d.n.right; - prev = &node->d.n.right; - } - else { - next = node->d.n.left; - prev = &node->d.n.left; - } - if (next == NULL) { - /* Need to insert some nodes */ - break; - } + msg_debug_radix ("want insert value %p with mask %z, key: %*xs", + (gpointer)value, masklen, (int)keybits, key); - bit >>= 1; - if (bit == 0) { - k ++; - bit = 1U << 7; - kremain --; - } - cur_level ++; - node = next; - } + old = radix_find_compressed (tree, key, keylen); - if (next == NULL) { - next = radix_make_leaf_node (tree, key, keylen, target_level, value, - TRUE); - *prev = next; - tree->size ++; - } - else if (next->value == RADIX_NO_VALUE) { - msg_debug_radix ("insert value node with %p", (gpointer)value); - next->value = value; - tree->size ++; - } - else { - if (next->skipped) { - /* - * For skipped node we replace value if the level of skipped node - * is equal to the target level - */ - if (next->d.s.level == target_level) { - oldval = radix_replace_node (tree, next, key, keylen, value); - } - else if (next->d.s.level > target_level) { - /* - * Here we must create new normal node and insert compressed leaf - * one level below - */ - node = radix_make_leaf_node (tree, key, keylen, - target_level, value, FALSE); - *prev = node; - if (*k & bit) { - node->d.n.right = next; - } - else { - node->d.n.left = next; - } - oldval = next->value; - tree->size ++; - } - else { - /* - * We must convert old compressed node to a normal node and - * create new compressed leaf attached to that normal node - */ - node = radix_make_leaf_node (tree, key, keylen, - target_level, value, TRUE); - *prev = next; - msg_debug_radix ("move leaf node with value: %p, to level %ud, " - "set leaf node value to %p and level %ud", (gpointer)next->value, - cur_level, - (gpointer)value, target_level); - next->skipped = FALSE; - if (*k & bit) { - next->d.n.right = node; - next->d.n.left = NULL; - } - else { - next->d.n.left = node; - next->d.n.right = NULL; - } - oldval = next->value; - tree->size ++; - } - } - else { - oldval = radix_replace_node (tree, next, key, keylen, value); - } - return oldval; - } + btrie_add_prefix (tree->tree, key, keybits - masklen, + (gconstpointer)value); - return next->value; + return old; } @@ -569,7 +95,7 @@ radix_create_compressed (void) tree->pool = rspamd_mempool_new (rspamd_mempool_suggest_size (), NULL); tree->size = 0; - tree->root = NULL; + tree->tree = btrie_init (tree->pool); return tree; } -- 2.39.5