diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2015-01-23 13:50:17 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2015-01-23 13:50:17 +0000 |
commit | 8a05515078bc8fd3d642778fcae0d005a38ec7b0 (patch) | |
tree | 239d86bbbb5ba6bafdfa6aeaa41e848dcdc46079 /src/libstat/tokenizers/tokenizers.h | |
parent | 8f5509c65dc6907a7581518246a200236088423c (diff) | |
download | rspamd-8a05515078bc8fd3d642778fcae0d005a38ec7b0.tar.gz rspamd-8a05515078bc8fd3d642778fcae0d005a38ec7b0.zip |
Reorganize libstat API.
Diffstat (limited to 'src/libstat/tokenizers/tokenizers.h')
-rw-r--r-- | src/libstat/tokenizers/tokenizers.h | 49 |
1 files changed, 49 insertions, 0 deletions
diff --git a/src/libstat/tokenizers/tokenizers.h b/src/libstat/tokenizers/tokenizers.h new file mode 100644 index 000000000..8ee11cea1 --- /dev/null +++ b/src/libstat/tokenizers/tokenizers.h @@ -0,0 +1,49 @@ +#ifndef TOKENIZERS_H +#define TOKENIZERS_H + +#include "config.h" +#include "mem_pool.h" +#include "fstring.h" +#include "main.h" +#include "stat_api.h" + +/* Common tokenizer structure */ +struct tokenizer { + gchar *name; + gint (*tokenize_func)(struct tokenizer *tokenizer, + rspamd_mempool_t *pool, + GArray *words, + GTree **cur, + gboolean save_token, + gboolean is_utf, + GList *exceptions); + gchar * (*get_next_word)(rspamd_fstring_t *buf, rspamd_fstring_t *token, GList **exceptions); +}; + +/* Compare two token nodes */ +int token_node_compare_func (gconstpointer a, gconstpointer b); + +/* Get next word from specified f_str_t buf */ +gchar * rspamd_tokenizer_get_word (rspamd_fstring_t *buf, + rspamd_fstring_t *token, GList **exceptions); + +/* Tokenize text into array of words (rspamd_fstring_t type) */ +GArray * rspamd_tokenize_text (gchar *text, gsize len, gboolean is_utf, + gsize min_len, GList **exceptions); + +/* OSB tokenize function */ +int osb_tokenize_text (struct tokenizer *tokenizer, + rspamd_mempool_t *pool, + GArray *input, + GTree **cur, + gboolean save_token, + gboolean is_utf, + GList *exceptions); + +/* Make tokens for a subject */ +void tokenize_subject (struct rspamd_task *task, GTree ** tree); + +#endif +/* + * vi:ts=4 + */ |