From: Vsevolod Stakhov Date: Fri, 23 Jan 2015 13:50:17 +0000 (+0000) Subject: Reorganize libstat API. X-Git-Tag: 0.9.0~846 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=8a05515078bc8fd3d642778fcae0d005a38ec7b0;p=rspamd.git Reorganize libstat API. --- diff --git a/src/libstat/CMakeLists.txt b/src/libstat/CMakeLists.txt index 4c02c49c6..97725d2f9 100644 --- a/src/libstat/CMakeLists.txt +++ b/src/libstat/CMakeLists.txt @@ -1,14 +1,11 @@ # Librspamdserver -SET(LIBSTATSRC - stat_config.c) +SET(LIBSTATSRC stat_config.c) SET(TOKENIZERSSRC tokenizers/tokenizers.c tokenizers/osb.c) -SET(CLASSIFIERSSRC classifiers/classifiers.c - classifiers/bayes.c) +SET(CLASSIFIERSSRC classifiers/bayes.c) -SET(BACKENDSSRC backends/backends.c - backends/mmaped_file.c) +SET(BACKENDSSRC backends/mmaped_file.c) ADD_LIBRARY(rspamd-stat ${LINK_TYPE} ${LIBSTATSRC} ${TOKENIZERSSRC} diff --git a/src/libstat/backends.h b/src/libstat/backends.h deleted file mode 100644 index ecd41173d..000000000 --- a/src/libstat/backends.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (c) 2015, Vsevolod Stakhov - * - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ -#ifndef BACKENDS_H_ -#define BACKENDS_H_ - -#include "config.h" -#include "cfg_file.h" - -#define RSPAMD_DEFAULT_BACKEND "mmap" - -struct rspamd_stat_backend { - const char *name; - gpointer (*init)(struct rspamd_statfile_config *cfg); - gpointer ctx; -}; - -extern struct rspamd_stat_backend statfile_backends[]; - -struct rspamd_stat_backend *rspamd_stat_get_backend (const char *name); - -#endif /* BACKENDS_H_ */ diff --git a/src/libstat/backends/backends.c b/src/libstat/backends/backends.c deleted file mode 100644 index 0701a2ff9..000000000 --- a/src/libstat/backends/backends.c +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright (c) 2015, Vsevolod Stakhov - * - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "main.h" -#include "backends.h" -#include "mmaped_file.h" - -struct rspamd_stat_backend statfile_backends[] = { - { - .name = RSPAMD_DEFAULT_BACKEND, - .init = rspamd_mmaped_file_init, - } -}; - - -struct rspamd_stat_backend * -rspamd_stat_get_backend (const char *name) -{ - guint i; - - for (i = 0; i < G_N_ELEMENTS (statfile_backends); i++) { - if (strcmp (statfile_backends[i].name, name) == 0) { - return &statfile_backends[i]; - } - } - - return NULL; -} diff --git a/src/libstat/backends/backends.h b/src/libstat/backends/backends.h new file mode 100644 index 000000000..37a7560cd --- /dev/null +++ b/src/libstat/backends/backends.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2015, Vsevolod Stakhov + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef BACKENDS_H_ +#define BACKENDS_H_ + +#include "config.h" + +#define RSPAMD_DEFAULT_BACKEND "mmap" + +/* Forwarded declarations */ +struct rspamd_classifier_config; +struct rspamd_statfile_config; +struct rspamd_config; + +struct rspamd_stat_backend { + const char *name; + gpointer (*init)(struct rspamd_statfile_config *cfg); + gpointer ctx; +}; + +gpointer rspamd_mmaped_file_init(struct rspamd_config *cfg); + +#endif /* BACKENDS_H_ */ diff --git a/src/libstat/backends/mmaped_file.c b/src/libstat/backends/mmaped_file.c index 9ac725fe7..cef17f465 100644 --- a/src/libstat/backends/mmaped_file.c +++ b/src/libstat/backends/mmaped_file.c @@ -24,7 +24,6 @@ #include "config.h" -#include "mmaped_file.h" #include "main.h" #define CHAIN_LENGTH 128 diff --git a/src/libstat/backends/mmaped_file.h b/src/libstat/backends/mmaped_file.h deleted file mode 100644 index f3f25c8cb..000000000 --- a/src/libstat/backends/mmaped_file.h +++ /dev/null @@ -1,20 +0,0 @@ -/** - * @file statfile.h - * Describes common methods for accessing statistics files and caching them in memory - */ - -#ifndef RSPAMD_STATFILE_H -#define RSPAMD_STATFILE_H - -#include "config.h" - - -/* Forwarded declarations */ -struct rspamd_classifier_config; -struct rspamd_statfile_config; -struct rspamd_config; - -gpointer -rspamd_mmaped_file_init(struct rspamd_config *cfg); - -#endif diff --git a/src/libstat/classifiers.h b/src/libstat/classifiers.h deleted file mode 100644 index 8c05bc6db..000000000 --- a/src/libstat/classifiers.h +++ /dev/null @@ -1,62 +0,0 @@ -#ifndef CLASSIFIERS_H -#define CLASSIFIERS_H - -#include "config.h" -#include "mem_pool.h" -#include "tokenizers.h" -#include - -/* Consider this value as 0 */ -#define ALPHA 0.0001 - -struct rspamd_classifier_config; -struct rspamd_task; - -struct classifier_ctx { - rspamd_mempool_t *pool; - GHashTable *results; - gboolean debug; - struct rspamd_classifier_config *cfg; -}; - -struct classify_weight { - const char *name; - long double weight; -}; - -/* Common classifier structure */ -struct classifier { - char *name; - struct classifier_ctx * (*init_func)(rspamd_mempool_t *pool, - struct rspamd_classifier_config *cf); - gboolean (*classify_func)(struct classifier_ctx * ctx, - GTree *input, struct rspamd_task *task, - lua_State *L); - gboolean (*learn_spam_func)(struct classifier_ctx * ctx, - GTree *input, struct rspamd_task *task, gboolean is_spam, lua_State *L, - GError **err); -}; - -/* Get classifier structure by name or return NULL if this name is not found */ -struct classifier * rspamd_stat_get_classifier (const char *name); - -/* Bayes algorithm */ -struct classifier_ctx * bayes_init (rspamd_mempool_t *pool, - struct rspamd_classifier_config *cf); -gboolean bayes_classify (struct classifier_ctx * ctx, - GTree *input, - struct rspamd_task *task, - lua_State *L); -gboolean bayes_learn_spam (struct classifier_ctx * ctx, - GTree *input, - struct rspamd_task *task, - gboolean is_spam, - lua_State *L, - GError **err); -/* Array of all defined classifiers */ -extern struct classifier classifiers[]; - -#endif -/* - * vi:ts=4 - */ diff --git a/src/libstat/classifiers/classifiers.c b/src/libstat/classifiers/classifiers.c deleted file mode 100644 index 4d78f1f81..000000000 --- a/src/libstat/classifiers/classifiers.c +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (c) 2009-2012, Vsevolod Stakhov - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* - * Common classifier functions - */ - -#include "classifiers.h" - -struct classifier classifiers[] = { - { - .name = "bayes", - .init_func = bayes_init, - .classify_func = bayes_classify, - .learn_spam_func = bayes_learn_spam, - } -}; - -struct classifier * -rspamd_stat_get_classifier (const char *name) -{ - guint i; - - for (i = 0; i < sizeof (classifiers) / sizeof (classifiers[0]); i++) { - if (strcmp (classifiers[i].name, name) == 0) { - return &classifiers[i]; - } - } - - return NULL; -} - -/* - * vi:ts=4 - */ diff --git a/src/libstat/classifiers/classifiers.h b/src/libstat/classifiers/classifiers.h new file mode 100644 index 000000000..4ae1ba23d --- /dev/null +++ b/src/libstat/classifiers/classifiers.h @@ -0,0 +1,49 @@ +#ifndef CLASSIFIERS_H +#define CLASSIFIERS_H + +#include "config.h" + +/* Consider this value as 0 */ +#define ALPHA 0.0001 + +struct rspamd_classifier_config; +struct rspamd_task; + +/* Common classifier structure */ +struct classifier_ctx { + rspamd_mempool_t *pool; + GHashTable *results; + gboolean debug; + struct rspamd_classifier_config *cfg; +}; + +struct classifier { + char *name; + struct classifier_ctx * (*init_func)(rspamd_mempool_t *pool, + struct rspamd_classifier_config *cf); + gboolean (*classify_func)(struct classifier_ctx * ctx, + GTree *input, struct rspamd_task *task, + lua_State *L); + gboolean (*learn_spam_func)(struct classifier_ctx * ctx, + GTree *input, struct rspamd_task *task, gboolean is_spam, lua_State *L, + GError **err); +}; + +/* Bayes algorithm */ +struct classifier_ctx * bayes_init (rspamd_mempool_t *pool, + struct rspamd_classifier_config *cf); +gboolean bayes_classify (struct classifier_ctx * ctx, + GTree *input, + struct rspamd_task *task, + lua_State *L); +gboolean bayes_learn_spam (struct classifier_ctx * ctx, + GTree *input, + struct rspamd_task *task, + gboolean is_spam, + lua_State *L, + GError **err); + +#endif +/* + * vi:ts=4 + */ diff --git a/src/libstat/stat_api.h b/src/libstat/stat_api.h index 64b3f0b92..0e2bf86b8 100644 --- a/src/libstat/stat_api.h +++ b/src/libstat/stat_api.h @@ -31,35 +31,6 @@ * High level statistics API */ -struct rspamd_statfile_runtime { - struct rspamd_statfile_config *st; - guint64 hits; - guint64 total_hits; -}; - -struct rspamd_classifier_runtime { - double ham_prob; - double spam_prob; - guint64 total_spam; - guint64 total_ham; - guint64 processed_tokens; - gsize max_tokens; -}; - -struct rspamd_token_result { - double value; - struct rspamd_statfile_runtime *st_runtime; - - struct rspamd_classifier_runtime *cl_runtime; -}; - -#define RSPAMD_MAX_TOKEN_LEN 64 -typedef struct token_node_s { - guchar data[RSPAMD_MAX_TOKEN_LEN]; - guint datalen; - GArray *results; -} rspamd_token_t; - /** * Initialise statistics modules * @param cfg diff --git a/src/libstat/stat_config.c b/src/libstat/stat_config.c index fd2c0f165..a539f3c80 100644 --- a/src/libstat/stat_config.c +++ b/src/libstat/stat_config.c @@ -27,4 +27,34 @@ #include "stat_api.h" #include "main.h" #include "cfg_rcl.h" +#include "stat_internal.h" +#include "backends/mmaped_file.h" +static struct rspamd_stat_ctx *stat_ctx = NULL; + +static struct classifier classifiers[] = { + { + .name = "bayes", + .init_func = bayes_init, + .classify_func = bayes_classify, + .learn_spam_func = bayes_learn_spam, + } +}; + +static struct tokenizer tokenizers[] = { + {"osb-text", osb_tokenize_text, rspamd_tokenizer_get_word}, +}; + +struct rspamd_stat_backend statfile_backends[] = { + { + .name = RSPAMD_DEFAULT_BACKEND, + .init = rspamd_mmaped_file_init, + } +}; + + +void +rspamd_stat_init (struct rspamd_config *cfg) +{ + +} diff --git a/src/libstat/stat_internal.h b/src/libstat/stat_internal.h new file mode 100644 index 000000000..4aa625b27 --- /dev/null +++ b/src/libstat/stat_internal.h @@ -0,0 +1,70 @@ +/* Copyright (c) 2015, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef STAT_INTERNAL_H_ +#define STAT_INTERNAL_H_ + +#include "config.h" +#include "task.h" +#include "classifiers/classifiers.h" +#include "tokenizers/tokenizers.h" +#include "backends/backends.h" + +struct rspamd_statfile_runtime { + struct rspamd_statfile_config *st; + guint64 hits; + guint64 total_hits; +}; + +struct rspamd_classifier_runtime { + double ham_prob; + double spam_prob; + guint64 total_spam; + guint64 total_ham; + guint64 processed_tokens; + gsize max_tokens; +}; + +struct rspamd_token_result { + double value; + struct rspamd_statfile_runtime *st_runtime; + + struct rspamd_classifier_runtime *cl_runtime; +}; + +#define RSPAMD_MAX_TOKEN_LEN 64 +typedef struct token_node_s { + guchar data[RSPAMD_MAX_TOKEN_LEN]; + guint datalen; + GArray *results; +} rspamd_token_t; + +struct rspamd_stat_ctx { + struct classifier *classifiers; + guint classifiers_count; + struct tokenizer *tokenizers; + guint tokenizers_count; + struct rspamd_stat_backend *backends; + guint backends_count; +}; + +#endif /* STAT_INTERNAL_H_ */ diff --git a/src/libstat/tokenizers.h b/src/libstat/tokenizers.h deleted file mode 100644 index 73d07a5c4..000000000 --- a/src/libstat/tokenizers.h +++ /dev/null @@ -1,55 +0,0 @@ -#ifndef TOKENIZERS_H -#define TOKENIZERS_H - -#include "config.h" -#include "mem_pool.h" -#include "fstring.h" -#include "main.h" -#include "stat_api.h" - -/* Common tokenizer structure */ -struct tokenizer { - gchar *name; - gint (*tokenize_func)(struct tokenizer *tokenizer, - rspamd_mempool_t *pool, - GArray *words, - GTree **cur, - gboolean save_token, - gboolean is_utf, - GList *exceptions); - gchar * (*get_next_word)(rspamd_fstring_t *buf, rspamd_fstring_t *token, GList **exceptions); -}; - -/* Compare two token nodes */ -int token_node_compare_func (gconstpointer a, gconstpointer b); - -/* Get tokenizer structure by name or return NULL if this name is not found */ -struct tokenizer * rspamd_stat_get_tokenizer (const char *name); - -/* Get next word from specified f_str_t buf */ -gchar * rspamd_tokenizer_get_word (rspamd_fstring_t *buf, - rspamd_fstring_t *token, GList **exceptions); - -/* Tokenize text into array of words (rspamd_fstring_t type) */ -GArray * rspamd_tokenize_text (gchar *text, gsize len, gboolean is_utf, - gsize min_len, GList **exceptions); - -/* OSB tokenize function */ -int osb_tokenize_text (struct tokenizer *tokenizer, - rspamd_mempool_t *pool, - GArray *input, - GTree **cur, - gboolean save_token, - gboolean is_utf, - GList *exceptions); - -/* Make tokens for a subject */ -void tokenize_subject (struct rspamd_task *task, GTree ** tree); - -/* Array of all defined tokenizers */ -extern struct tokenizer tokenizers[]; - -#endif -/* - * vi:ts=4 - */ diff --git a/src/libstat/tokenizers/tokenizers.c b/src/libstat/tokenizers/tokenizers.c index 10e4b92d5..eee41a971 100644 --- a/src/libstat/tokenizers/tokenizers.c +++ b/src/libstat/tokenizers/tokenizers.c @@ -29,10 +29,6 @@ #include "main.h" #include "tokenizers.h" -struct tokenizer tokenizers[] = { - {"osb-text", osb_tokenize_text, rspamd_tokenizer_get_word}, -}; - const int primes[] = { 1, 7, 3, 13, @@ -75,20 +71,6 @@ const gchar t_delimiters[255] = { 0, 0, 0, 0, 0 }; -struct tokenizer * -rspamd_stat_get_tokenizer (const char *name) -{ - guint i; - - for (i = 0; i < sizeof (tokenizers) / sizeof (tokenizers[0]); i++) { - if (strcmp (tokenizers[i].name, name) == 0) { - return &tokenizers[i]; - } - } - - return NULL; -} - int token_node_compare_func (gconstpointer a, gconstpointer b) { diff --git a/src/libstat/tokenizers/tokenizers.h b/src/libstat/tokenizers/tokenizers.h new file mode 100644 index 000000000..8ee11cea1 --- /dev/null +++ b/src/libstat/tokenizers/tokenizers.h @@ -0,0 +1,49 @@ +#ifndef TOKENIZERS_H +#define TOKENIZERS_H + +#include "config.h" +#include "mem_pool.h" +#include "fstring.h" +#include "main.h" +#include "stat_api.h" + +/* Common tokenizer structure */ +struct tokenizer { + gchar *name; + gint (*tokenize_func)(struct tokenizer *tokenizer, + rspamd_mempool_t *pool, + GArray *words, + GTree **cur, + gboolean save_token, + gboolean is_utf, + GList *exceptions); + gchar * (*get_next_word)(rspamd_fstring_t *buf, rspamd_fstring_t *token, GList **exceptions); +}; + +/* Compare two token nodes */ +int token_node_compare_func (gconstpointer a, gconstpointer b); + +/* Get next word from specified f_str_t buf */ +gchar * rspamd_tokenizer_get_word (rspamd_fstring_t *buf, + rspamd_fstring_t *token, GList **exceptions); + +/* Tokenize text into array of words (rspamd_fstring_t type) */ +GArray * rspamd_tokenize_text (gchar *text, gsize len, gboolean is_utf, + gsize min_len, GList **exceptions); + +/* OSB tokenize function */ +int osb_tokenize_text (struct tokenizer *tokenizer, + rspamd_mempool_t *pool, + GArray *input, + GTree **cur, + gboolean save_token, + gboolean is_utf, + GList *exceptions); + +/* Make tokens for a subject */ +void tokenize_subject (struct rspamd_task *task, GTree ** tree); + +#endif +/* + * vi:ts=4 + */