aboutsummaryrefslogtreecommitdiffstats
path: root/src/tokenizers/tokenizers.h
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rambler-co.ru>2008-12-04 18:41:00 +0300
committerVsevolod Stakhov <vsevolod@rambler-co.ru>2008-12-04 18:41:00 +0300
commit249c0583d2a12ddde67e05251e47f256a58cfd05 (patch)
tree97c1db6e72d4bec5a2640425127c2d094fadea86 /src/tokenizers/tokenizers.h
parent42b81716ece887b0011b1e40b0101ad37598997e (diff)
downloadrspamd-249c0583d2a12ddde67e05251e47f256a58cfd05.tar.gz
rspamd-249c0583d2a12ddde67e05251e47f256a58cfd05.zip
* Use binary tree in tokenizers, that would provide us fast checking for unique tokens and have O(log n) difficulty
Diffstat (limited to 'src/tokenizers/tokenizers.h')
-rw-r--r--src/tokenizers/tokenizers.h12
1 files changed, 6 insertions, 6 deletions
diff --git a/src/tokenizers/tokenizers.h b/src/tokenizers/tokenizers.h
index 96a2027a5..c3453a945 100644
--- a/src/tokenizers/tokenizers.h
+++ b/src/tokenizers/tokenizers.h
@@ -14,26 +14,26 @@
/* Size for features pipe */
#define FEATURE_WINDOW_SIZE 5
-typedef struct token_list_s {
+typedef struct token_node_s {
uint32_t h1;
uint32_t h2;
- struct token_list_s *next;
-} token_list_t;
-
+} token_node_t;
/* Common tokenizer structure */
struct tokenizer {
char *name;
- token_list_t* (*tokenize_func)(struct tokenizer *tokenizer, memory_pool_t *pool, f_str_t *input);
+ GTree* (*tokenize_func)(struct tokenizer *tokenizer, memory_pool_t *pool, f_str_t *input);
f_str_t* (*get_next_word)(f_str_t *buf, f_str_t *token);
};
+/* Compare two token nodes */
+int token_node_compare_func (gconstpointer a, gconstpointer b);
/* Get tokenizer structure by name or return NULL if this name is not found */
struct tokenizer* get_tokenizer (char *name);
/* Get next word from specified f_str_t buf */
f_str_t *get_next_word (f_str_t *buf, f_str_t *token);
/* OSB tokenize function */
-token_list_t* osb_tokenize_text (struct tokenizer *tokenizer, memory_pool_t *pool, f_str_t *input);
+GTree* osb_tokenize_text (struct tokenizer *tokenizer, memory_pool_t *pool, f_str_t *input);
/* Array of all defined tokenizers */
extern struct tokenizer tokenizers[];