diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2015-01-23 13:50:17 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2015-01-23 13:50:17 +0000 |
commit | 8a05515078bc8fd3d642778fcae0d005a38ec7b0 (patch) | |
tree | 239d86bbbb5ba6bafdfa6aeaa41e848dcdc46079 /src/libstat/tokenizers | |
parent | 8f5509c65dc6907a7581518246a200236088423c (diff) | |
download | rspamd-8a05515078bc8fd3d642778fcae0d005a38ec7b0.tar.gz rspamd-8a05515078bc8fd3d642778fcae0d005a38ec7b0.zip |
Reorganize libstat API.
Diffstat (limited to 'src/libstat/tokenizers')
-rw-r--r-- | src/libstat/tokenizers/tokenizers.c | 18 | ||||
-rw-r--r-- | src/libstat/tokenizers/tokenizers.h | 49 |
2 files changed, 49 insertions, 18 deletions
diff --git a/src/libstat/tokenizers/tokenizers.c b/src/libstat/tokenizers/tokenizers.c index 10e4b92d5..eee41a971 100644 --- a/src/libstat/tokenizers/tokenizers.c +++ b/src/libstat/tokenizers/tokenizers.c @@ -29,10 +29,6 @@ #include "main.h" #include "tokenizers.h" -struct tokenizer tokenizers[] = { - {"osb-text", osb_tokenize_text, rspamd_tokenizer_get_word}, -}; - const int primes[] = { 1, 7, 3, 13, @@ -75,20 +71,6 @@ const gchar t_delimiters[255] = { 0, 0, 0, 0, 0 }; -struct tokenizer * -rspamd_stat_get_tokenizer (const char *name) -{ - guint i; - - for (i = 0; i < sizeof (tokenizers) / sizeof (tokenizers[0]); i++) { - if (strcmp (tokenizers[i].name, name) == 0) { - return &tokenizers[i]; - } - } - - return NULL; -} - int token_node_compare_func (gconstpointer a, gconstpointer b) { diff --git a/src/libstat/tokenizers/tokenizers.h b/src/libstat/tokenizers/tokenizers.h new file mode 100644 index 000000000..8ee11cea1 --- /dev/null +++ b/src/libstat/tokenizers/tokenizers.h @@ -0,0 +1,49 @@ +#ifndef TOKENIZERS_H +#define TOKENIZERS_H + +#include "config.h" +#include "mem_pool.h" +#include "fstring.h" +#include "main.h" +#include "stat_api.h" + +/* Common tokenizer structure */ +struct tokenizer { + gchar *name; + gint (*tokenize_func)(struct tokenizer *tokenizer, + rspamd_mempool_t *pool, + GArray *words, + GTree **cur, + gboolean save_token, + gboolean is_utf, + GList *exceptions); + gchar * (*get_next_word)(rspamd_fstring_t *buf, rspamd_fstring_t *token, GList **exceptions); +}; + +/* Compare two token nodes */ +int token_node_compare_func (gconstpointer a, gconstpointer b); + +/* Get next word from specified f_str_t buf */ +gchar * rspamd_tokenizer_get_word (rspamd_fstring_t *buf, + rspamd_fstring_t *token, GList **exceptions); + +/* Tokenize text into array of words (rspamd_fstring_t type) */ +GArray * rspamd_tokenize_text (gchar *text, gsize len, gboolean is_utf, + gsize min_len, GList **exceptions); + +/* OSB tokenize function */ +int osb_tokenize_text (struct tokenizer *tokenizer, + rspamd_mempool_t *pool, + GArray *input, + GTree **cur, + gboolean save_token, + gboolean is_utf, + GList *exceptions); + +/* Make tokens for a subject */ +void tokenize_subject (struct rspamd_task *task, GTree ** tree); + +#endif +/* + * vi:ts=4 + */ |