From ef128a94ce6fded66b0dbf62354798abf30b1ab0 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Tue, 21 Apr 2015 14:05:16 +0100 Subject: [PATCH] Remove old trie code. --- src/libserver/url.c | 1 - src/libutil/CMakeLists.txt | 1 - src/libutil/trie.c | 242 ------------------------------------- src/libutil/trie.h | 91 -------------- src/lua/lua_config.c | 1 - src/plugins/surbl.c | 1 - 6 files changed, 337 deletions(-) delete mode 100644 src/libutil/trie.c delete mode 100644 src/libutil/trie.h diff --git a/src/libserver/url.c b/src/libserver/url.c index c31a727c7..7faf5acc3 100644 --- a/src/libserver/url.c +++ b/src/libserver/url.c @@ -30,7 +30,6 @@ #include "fstring.h" #include "main.h" #include "message.h" -#include "trie.h" #include "http.h" #include "acism.h" diff --git a/src/libutil/CMakeLists.txt b/src/libutil/CMakeLists.txt index 3ef5b1d07..728eeaa47 100644 --- a/src/libutil/CMakeLists.txt +++ b/src/libutil/CMakeLists.txt @@ -18,7 +18,6 @@ SET(LIBRSPAMDUTILSRC ${CMAKE_CURRENT_SOURCE_DIR}/regexp.c ${CMAKE_CURRENT_SOURCE_DIR}/rrd.c ${CMAKE_CURRENT_SOURCE_DIR}/shingles.c - ${CMAKE_CURRENT_SOURCE_DIR}/trie.c ${CMAKE_CURRENT_SOURCE_DIR}/upstream.c ${CMAKE_CURRENT_SOURCE_DIR}/util.c) # Rspamdutil diff --git a/src/libutil/trie.c b/src/libutil/trie.c deleted file mode 100644 index b3bdeaf4a..000000000 --- a/src/libutil/trie.c +++ /dev/null @@ -1,242 +0,0 @@ -/* Copyright (c) 2010, Vsevolod Stakhov - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "mem_pool.h" -#include "trie.h" - -rspamd_trie_t * -rspamd_trie_create (gboolean icase) -{ - rspamd_trie_t *new; - - new = g_malloc (sizeof (rspamd_trie_t)); - - new->icase = icase; - new->pool = rspamd_mempool_new (rspamd_mempool_suggest_size ()); - new->root.fail = NULL; - new->root.final = 0; - new->root.id = 0; - new->root.next = NULL; - new->root.match = NULL; - new->fail_states = g_ptr_array_sized_new (8); - - return new; -} - -/* - * Insert a single character as the specified level of the suffix tree - */ -static struct rspamd_trie_state * -rspamd_trie_insert_char (rspamd_trie_t *trie, - guint depth, - struct rspamd_trie_state *pos, - gchar c) -{ - struct rspamd_trie_match *new_match; - struct rspamd_trie_state *new_pos; - - /* New match is inserted before pos */ - new_match = - rspamd_mempool_alloc (trie->pool, sizeof (struct rspamd_trie_match)); - new_match->next = pos->match; - new_match->c = c; - - /* Now set match link */ - pos->match = new_match; - - new_match->state = - rspamd_mempool_alloc (trie->pool, sizeof (struct rspamd_trie_state)); - new_pos = new_match->state; - new_pos->match = NULL; - new_pos->fail = &trie->root; - new_pos->final = 0; - new_pos->id = -1; - - if (trie->fail_states->len < depth + 1) { - /* Grow fail states array if depth is more than its size */ - guint size = trie->fail_states->len; - - size = MAX (size * 2, depth + 1); - g_ptr_array_set_size (trie->fail_states, size); - } - - new_pos->next = trie->fail_states->pdata[depth]; - trie->fail_states->pdata[depth] = new_pos; - - return new_pos; -} - -/* Traverse the specified node to find corresponding match */ -static inline struct rspamd_trie_match * -check_match (struct rspamd_trie_state *s, gchar c) -{ - struct rspamd_trie_match *match = s->match; - - while (match && match->c != c) { - match = match->next; - } - - return match; -} - -void -rspamd_trie_insert (rspamd_trie_t *trie, const gchar *pattern, gint pattern_id) -{ - const guchar *p = pattern; - struct rspamd_trie_state *q, *q1, *r, *cur_node; - struct rspamd_trie_match *m, *n; - guint i, depth = 0; - gchar c; - - /* Insert pattern to the trie */ - - cur_node = &trie->root; - - while (*p) { - c = trie->icase ? g_ascii_tolower (*p) : *p; - m = check_match (cur_node, c); - if (m == NULL) { - /* Insert a character at specified level depth */ - cur_node = rspamd_trie_insert_char (trie, depth, cur_node, c); - } - else { - cur_node = m->state; - } - p++; - depth++; - } - - cur_node->final = depth; - cur_node->id = pattern_id; - - /* Update fail states and build fail states graph */ - /* Go through the whole depth of prefixes */ - for (i = 0; i < trie->fail_states->len; i++) { - q = trie->fail_states->pdata[i]; - while (q) { - m = q->match; - while (m) { - c = m->c; - q1 = m->state; - r = q->fail; - /* Move q->fail to last known fail location for this character (or to NULL) */ - while (r && (n = check_match (r, c)) == NULL) { - r = r->fail; - } - - /* We have found new fail location for character c, so set it in q1 */ - if (r != NULL) { - q1->fail = n->state; - if (q1->fail->final > q1->final) { - q1->final = q1->fail->final; - - if (q1->id == -1) { - q1->id = q1->fail->id; - } - } - } - else { - /* Search from root */ - if ((n = check_match (&trie->root, c))) { - q1->fail = n->state; - } - else { - q1->fail = &trie->root; - } - } - - m = m->next; - } - - q = q->next; - } - } -} - -const gchar * -rspamd_trie_lookup (rspamd_trie_t *trie, - const gchar *buffer, - gsize buflen, - gint *matched_id) -{ - const guchar *p = buffer, *prev, *ret; - struct rspamd_trie_state *cur_node; - struct rspamd_trie_match *m = NULL; - gchar c; - - - cur_node = &trie->root; - prev = p; - ret = p; - - while (buflen) { - c = trie->icase ? g_ascii_tolower (*p) : *p; - - /* Match pattern or use fail-path to restore state */ - while (cur_node != NULL && (m = check_match (cur_node, c)) == NULL) { - cur_node = cur_node->fail; - } - - /* Shift left in the text */ - if (cur_node == &trie->root) { - /* 1 character pattern found */ - ret = prev; - } - else if (cur_node == NULL) { - /* We have tried the pattern but eventually it was not found */ - cur_node = &trie->root; - ret = p; - p++; - prev = p; - buflen--; - continue; - } - - if (m != NULL) { - /* Match found */ - cur_node = m->state; - - if (cur_node->final) { - /* The complete pattern found */ - if (matched_id != NULL) { - *matched_id = cur_node->id; - } - return (const gchar *) ret; - } - } - p++; - prev = p; - buflen--; - } - - return NULL; -} - -void -rspamd_trie_free (rspamd_trie_t *trie) -{ - g_ptr_array_free (trie->fail_states, TRUE); - rspamd_mempool_delete (trie->pool); - g_free (trie); -} diff --git a/src/libutil/trie.h b/src/libutil/trie.h deleted file mode 100644 index 037ffd9ee..000000000 --- a/src/libutil/trie.h +++ /dev/null @@ -1,91 +0,0 @@ -/* Copyright (c) 2010, Vsevolod Stakhov - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY Rambler media ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL Rambler BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - - -#ifndef TRIE_H_ -#define TRIE_H_ - -#include "config.h" -#include "mem_pool.h" - -/* - * Rspamd implements basic bitwise prefixed trie structure - */ - -struct rspamd_trie_match; - -struct rspamd_trie_state { - struct rspamd_trie_state *next; - struct rspamd_trie_state *fail; - struct rspamd_trie_match *match; - guint final; - gint id; -}; - -struct rspamd_trie_match { - struct rspamd_trie_match *next; - struct rspamd_trie_state *state; - gchar c; -}; - -typedef struct rspamd_trie_s { - struct rspamd_trie_state root; - GPtrArray *fail_states; - gboolean icase; - rspamd_mempool_t *pool; -} rspamd_trie_t; - -/* - * Create a new suffix trie - */ -rspamd_trie_t * rspamd_trie_create (gboolean icase); - -/* - * Insert a pattern into the trie - * @param trie suffix trie - * @param pattern text of element - * @param pattern_id id of element - */ -void rspamd_trie_insert (rspamd_trie_t *trie, - const gchar *pattern, - gint pattern_id); - -/* - * Search for a text using suffix trie - * @param trie suffix trie - * @param buffer a text where to search for trie patterns - * @param buflen a length of text - * @param mached_id on a successfull search here would be stored id of pattern found - * @return Position in a text where pattern was found or NULL if no patterns were found - */ -const gchar * rspamd_trie_lookup (rspamd_trie_t *trie, - const gchar *buffer, - gsize buflen, - gint *matched_id); - -/* - * Deallocate suffix trie - */ -void rspamd_trie_free (rspamd_trie_t *trie); - -#endif /* TRIE_H_ */ diff --git a/src/lua/lua_config.c b/src/lua/lua_config.c index 54c4d74de..ad980e57e 100644 --- a/src/lua/lua_config.c +++ b/src/lua/lua_config.c @@ -27,7 +27,6 @@ #include "map.h" #include "message.h" #include "radix.h" -#include "trie.h" #include "expression.h" /*** diff --git a/src/plugins/surbl.c b/src/plugins/surbl.c index 351dd1317..7a22001b5 100644 --- a/src/plugins/surbl.c +++ b/src/plugins/surbl.c @@ -178,7 +178,6 @@ redirector_insert (gpointer st, gconstpointer key, gpointer value) rspamd_strlcpy (pattern, begin, pat.len + 1); pat.ptr = pattern; - //rspamd_trie_insert (surbl_module_ctx->redirector_trie, new, idx); g_array_append_val (cbdata->patterns, pat); if (g_ascii_isspace (*p)) { -- 2.39.5