From 34b343ece7bc2c8cb868223b8c0a447c279a2f24 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Mon, 15 Sep 2014 10:45:19 +0100 Subject: [PATCH] Implement new path-compressed radix trie. - The performance benefit over the old algorithm is about 10 times. - Memory usage is significantly lower as well. - Now radix trie can accept any IPv4/IPv6 values --- src/libutil/radix.c | 311 +++++++++++++++++++++++++++++++++++++++++++- src/libutil/radix.h | 18 +++ 2 files changed, 322 insertions(+), 7 deletions(-) diff --git a/src/libutil/radix.c b/src/libutil/radix.c index 79c4883d8..cd40c7905 100644 --- a/src/libutil/radix.c +++ b/src/libutil/radix.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009-2012, Vsevolod Stakhov + * Copyright (c) 2009-2014, Vsevolod Stakhov * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -25,6 +25,7 @@ #include "config.h" #include "radix.h" +#include "main.h" #include "mem_pool.h" static void * radix_alloc (radix_tree_t * tree); @@ -43,6 +44,28 @@ struct radix_tree_s { rspamd_mempool_t *pool; }; +struct radix_compressed_node { + union { + struct { + struct radix_compressed_node *right; + struct radix_compressed_node *left; + } n; + struct { + uint8_t *key; + size_t keylen; + } s; + } d; + gboolean skipped; + uintptr_t value; +}; + + +struct radix_tree_compressed { + struct radix_compressed_node *root; + size_t size; + rspamd_mempool_t *pool; +}; + radix_tree_t * radix_tree_create (void) { @@ -69,12 +92,6 @@ radix_tree_create (void) return tree; } -enum radix_insert_type { - RADIX_INSERT, - RADIX_ADD, - RADIX_REPLACE -}; - static uintptr_t radix32tree_insert_common (radix_tree_t * tree, guint32 key, @@ -350,6 +367,286 @@ radix32_tree_find_addr (radix_tree_t *tree, rspamd_inet_addr_t *addr) return radix32tree_find (tree, ntohl (addr->addr.s4.sin_addr.s_addr)); } +uintptr_t +radix_find_compressed (radix_compressed_t * tree, guint8 *key, gsize keylen) +{ + struct radix_compressed_node *node; + guint8 bit; + gsize kremain = keylen; + uintptr_t value; + + bit = 1 << 7; + value = RADIX_NO_VALUE; + node = tree->root; + + while (node && kremain) { + if (node->value != RADIX_NO_VALUE) { + value = node->value; + } + + if (node->skipped) { + /* It is obviously a leaf node */ + if (node->d.s.keylen <= keylen && + memcmp (node->d.s.key, key, node->d.s.keylen) == 0) { + return node->value; + } + else { + return value; + } + } + + if (*key & bit) { + node = node->d.n.right; + } + else { + node = node->d.n.left; + } + + bit >>= 1; + if (bit == 0) { + key ++; + bit = 1 << 7; + kremain --; + } + } + + return value; +} + + +static struct radix_compressed_node * +radix_uncompress_path (struct radix_compressed_node *node, + guint start_level, + guint levels_uncompress) +{ + guint8 *nkey = node->d.s.key + start_level / NBBY; + guint8 bit = 1 << (8 - start_level % NBBY); + struct radix_compressed_node *leaf, *next; + + /* Make compressed leaf */ + leaf = g_slice_alloc (sizeof (*node)); + memcpy (leaf, node, sizeof (*node)); + + node->skipped = FALSE; + + msg_debug ("uncompress %ud levels of tree", levels_uncompress); + while (levels_uncompress) { + next = g_slice_alloc (sizeof (*node)); + + next->skipped = FALSE; + next->d.n.right = NULL; + next->d.n.left = NULL; + next->value = RADIX_NO_VALUE; + + if (*nkey & bit) { + node->d.n.right = next; + node->d.n.left = NULL; + } + else { + node->d.n.left = next; + node->d.n.right = NULL; + } + + bit >>= 1; + if (bit == 0) { + nkey ++; + bit = 1 << 7; + } + node = next; + levels_uncompress --; + } + + /* Attach leaf node */ + if (*nkey & bit) { + node->d.n.right = leaf; + node->d.n.left = NULL; + } + else { + node->d.n.left = leaf; + node->d.n.right = NULL; + } + + /* Return node */ + return node; +} + + +static struct radix_compressed_node * +radix_make_leaf_node (guint8 *key, gsize keylen, gsize masklen, + uintptr_t value, + gboolean compressed) +{ + struct radix_compressed_node *node; + + node = g_slice_alloc (sizeof (struct radix_compressed_node)); + if (compressed) { + node->skipped = TRUE; + node->d.s.keylen = keylen; + node->d.s.key = g_slice_alloc (node->d.s.keylen); + memcpy (node->d.s.key, key, node->d.s.keylen); + } + else { + /* Uncompressed leaf node */ + memset (node, 0, sizeof (*node)); + } + node->value = value; + + return node; +} + +static uintptr_t +radix_uncompress_node (struct radix_compressed_node *node, + guint8 *key, gsize keylen, + uintptr_t value, + guint cur_level, + guint target_level, + guint8 bit) +{ + /* Find the largest common prefix of the compressed node and target node */ + gsize kremain = keylen - cur_level / NBBY; + guint8 *nkey = node->d.s.key + cur_level / NBBY; + guint levels_uncompress = 0, start_level = cur_level; + + msg_debug ("want to uncompress nodes from level %ud to level %ud", + cur_level, target_level); + while (cur_level < target_level) { + guint8 kb = *key & bit; + guint8 nb = *nkey & bit; + + if (kb != nb) { + msg_debug ("found available path at level %ud", cur_level); + break; + } + cur_level ++; + levels_uncompress ++; + bit >>= 1; + if (bit == 0) { + key ++; + nkey ++; + bit = 1 << 7; + kremain --; + } + } + + if (kremain == 0) { + /* Nodes are equal */ + uintptr_t oldval = node->value; + node->value = value; + return oldval; + } + else { + /* + * We need to uncompress the common path + */ + struct radix_compressed_node *nnode; + + nnode = radix_uncompress_path (node, start_level, levels_uncompress); + + /* + * Now nnode is the last uncompressed node with compressed leaf inside + * and we also know that the current bit is different + * + * - if we have target_level == cur_level, then we can safely assign the + * value of that parent node + * - otherwise we insert new compressed leaf node + */ + if (cur_level == target_level) { + nnode->value = value; + } + else { + node = radix_make_leaf_node (key, keylen, value, + keylen - target_level / NBBY, TRUE); + if (nnode->d.n.left == NULL) { + nnode->d.n.left = node; + } + else { + nnode->d.n.right = node; + } + } + } + + return value; +} + + +uintptr_t +radix_insert_compressed (radix_compressed_t * tree, + guint8 *key, gsize keylen, + gsize masklen, + uintptr_t value) +{ + struct radix_compressed_node *node, *next = NULL, **prev; + gsize keybits = keylen * NBBY; + guint target_level = (keylen * NBBY - masklen); + guint cur_level = 0; + guint8 bit; + gsize kremain = keylen; + + bit = 1 << 7; + node = tree->root; + + g_assert (keybits >= masklen); + + node = tree->root; + prev = &tree->root; + + /* Search for the place to insert element */ + while (node && cur_level < target_level) { + if (node->skipped) { + /* We have found skipped node and we need to uncompress it */ + return radix_uncompress_node (node, key, keylen, value, cur_level, + target_level, bit); + } + if (*key & bit) { + next = node->d.n.right; + prev = &node->d.n.right; + } + else { + next = node->d.n.left; + prev = &node->d.n.left; + } + + if (next == NULL) { + /* Need to insert some nodes */ + break; + } + + bit >>= 1; + if (bit == 0) { + key ++; + bit = 1 << 7; + kremain --; + } + cur_level ++; + node = next; + } + + if (next == NULL) { + next = radix_make_leaf_node (key, keylen, masklen, value, + TRUE); + *prev = next; + } + + return next->value; +} + + +radix_compressed_t * +radix_tree_create_compressed (void) +{ + radix_compressed_t *tree; + + tree = g_slice_alloc (sizeof (radix_tree_t)); + if (tree == NULL) { + return NULL; + } + + tree->size = 0; + tree->root = NULL; + + return tree; +} + /* * vi:ts=4 */ diff --git a/src/libutil/radix.h b/src/libutil/radix.h index 5b014355e..3ecb52eba 100644 --- a/src/libutil/radix.h +++ b/src/libutil/radix.h @@ -9,6 +9,13 @@ typedef struct radix_node_s radix_node_t; typedef struct radix_tree_s radix_tree_t; +typedef struct radix_tree_compressed radix_compressed_t; + +enum radix_insert_type { + RADIX_INSERT, + RADIX_ADD, + RADIX_REPLACE +}; typedef gboolean (*radix_tree_traverse_func)(guint32 key, guint32 mask, uintptr_t value, void *user_data); @@ -86,4 +93,15 @@ void radix32tree_traverse (radix_tree_t *tree, */ void radix_tree_free (radix_tree_t *tree); +uintptr_t +radix_insert_compressed (radix_compressed_t * tree, + guint8 *key, gsize keylen, + gsize masklen, + uintptr_t value); + +uintptr_t radix_find_compressed (radix_compressed_t * tree, guint8 *key, + gsize keylen); + +radix_compressed_t *radix_tree_create_compressed (void); + #endif -- 2.39.5