diff options
Diffstat (limited to 'src/libstat')
-rw-r--r-- | src/libstat/backends/backends.h | 136 | ||||
-rw-r--r-- | src/libstat/classifiers/classifiers.h | 75 | ||||
-rw-r--r-- | src/libstat/learn_cache/learn_cache.h | 67 | ||||
-rw-r--r-- | src/libstat/stat_api.h | 26 | ||||
-rw-r--r-- | src/libstat/stat_internal.h | 37 | ||||
-rw-r--r-- | src/libstat/tokenizers/tokenizers.h | 43 |
6 files changed, 236 insertions, 148 deletions
diff --git a/src/libstat/backends/backends.h b/src/libstat/backends/backends.h index 025e9bf34..fa3785540 100644 --- a/src/libstat/backends/backends.h +++ b/src/libstat/backends/backends.h @@ -21,6 +21,10 @@ #define RSPAMD_DEFAULT_BACKEND "mmap" +#ifdef __cplusplus +extern "C" { +#endif + /* Forwarded declarations */ struct rspamd_classifier_config; struct rspamd_statfile_config; @@ -32,73 +36,93 @@ struct rspamd_task; struct rspamd_stat_backend { const char *name; - gpointer (*init)(struct rspamd_stat_ctx *ctx, struct rspamd_config *cfg, - struct rspamd_statfile *st); - gpointer (*runtime)(struct rspamd_task *task, - struct rspamd_statfile_config *stcf, gboolean learn, gpointer ctx); - gboolean (*process_tokens)(struct rspamd_task *task, GPtrArray *tokens, - gint id, - gpointer ctx); - gboolean (*finalize_process)(struct rspamd_task *task, - gpointer runtime, gpointer ctx); - gboolean (*learn_tokens)(struct rspamd_task *task, GPtrArray *tokens, - gint id, - gpointer ctx); - gulong (*total_learns)(struct rspamd_task *task, - gpointer runtime, gpointer ctx); - gboolean (*finalize_learn)(struct rspamd_task *task, - gpointer runtime, gpointer ctx, GError **err); - gulong (*inc_learns)(struct rspamd_task *task, - gpointer runtime, gpointer ctx); - gulong (*dec_learns)(struct rspamd_task *task, - gpointer runtime, gpointer ctx); - ucl_object_t* (*get_stat)(gpointer runtime, gpointer ctx); - void (*close)(gpointer ctx); - - gpointer (*load_tokenizer_config)(gpointer runtime, gsize *sz); + + gpointer (*init) (struct rspamd_stat_ctx *ctx, struct rspamd_config *cfg, + struct rspamd_statfile *st); + + gpointer (*runtime) (struct rspamd_task *task, + struct rspamd_statfile_config *stcf, gboolean learn, gpointer ctx); + + gboolean (*process_tokens) (struct rspamd_task *task, GPtrArray *tokens, + gint id, + gpointer ctx); + + gboolean (*finalize_process) (struct rspamd_task *task, + gpointer runtime, gpointer ctx); + + gboolean (*learn_tokens) (struct rspamd_task *task, GPtrArray *tokens, + gint id, + gpointer ctx); + + gulong (*total_learns) (struct rspamd_task *task, + gpointer runtime, gpointer ctx); + + gboolean (*finalize_learn) (struct rspamd_task *task, + gpointer runtime, gpointer ctx, GError **err); + + gulong (*inc_learns) (struct rspamd_task *task, + gpointer runtime, gpointer ctx); + + gulong (*dec_learns) (struct rspamd_task *task, + gpointer runtime, gpointer ctx); + + ucl_object_t *(*get_stat) (gpointer runtime, gpointer ctx); + + void (*close) (gpointer ctx); + + gpointer (*load_tokenizer_config) (gpointer runtime, gsize *sz); + gpointer ctx; }; #define RSPAMD_STAT_BACKEND_DEF(name) \ - gpointer rspamd_##name##_init (struct rspamd_stat_ctx *ctx, \ - struct rspamd_config *cfg, struct rspamd_statfile *st); \ - gpointer rspamd_##name##_runtime (struct rspamd_task *task, \ - struct rspamd_statfile_config *stcf, \ - gboolean learn, gpointer ctx); \ - gboolean rspamd_##name##_process_tokens (struct rspamd_task *task, \ + gpointer rspamd_##name##_init (struct rspamd_stat_ctx *ctx, \ + struct rspamd_config *cfg, struct rspamd_statfile *st); \ + gpointer rspamd_##name##_runtime (struct rspamd_task *task, \ + struct rspamd_statfile_config *stcf, \ + gboolean learn, gpointer ctx); \ + gboolean rspamd_##name##_process_tokens (struct rspamd_task *task, \ GPtrArray *tokens, gint id, \ - gpointer ctx); \ - gboolean rspamd_##name##_finalize_process (struct rspamd_task *task, \ - gpointer runtime, \ - gpointer ctx); \ - gboolean rspamd_##name##_learn_tokens (struct rspamd_task *task, \ + gpointer ctx); \ + gboolean rspamd_##name##_finalize_process (struct rspamd_task *task, \ + gpointer runtime, \ + gpointer ctx); \ + gboolean rspamd_##name##_learn_tokens (struct rspamd_task *task, \ GPtrArray *tokens, gint id, \ - gpointer ctx); \ - gboolean rspamd_##name##_finalize_learn (struct rspamd_task *task, \ - gpointer runtime, \ - gpointer ctx, GError **err); \ - gulong rspamd_##name##_total_learns (struct rspamd_task *task, \ - gpointer runtime, \ - gpointer ctx); \ - gulong rspamd_##name##_inc_learns (struct rspamd_task *task, \ - gpointer runtime, \ - gpointer ctx); \ - gulong rspamd_##name##_dec_learns (struct rspamd_task *task, \ - gpointer runtime, \ - gpointer ctx); \ - gulong rspamd_##name##_learns (struct rspamd_task *task, \ - gpointer runtime, \ - gpointer ctx); \ - ucl_object_t * rspamd_##name##_get_stat (gpointer runtime, \ - gpointer ctx); \ - gpointer rspamd_##name##_load_tokenizer_config (gpointer runtime, \ - gsize *len); \ - void rspamd_##name##_close (gpointer ctx) + gpointer ctx); \ + gboolean rspamd_##name##_finalize_learn (struct rspamd_task *task, \ + gpointer runtime, \ + gpointer ctx, GError **err); \ + gulong rspamd_##name##_total_learns (struct rspamd_task *task, \ + gpointer runtime, \ + gpointer ctx); \ + gulong rspamd_##name##_inc_learns (struct rspamd_task *task, \ + gpointer runtime, \ + gpointer ctx); \ + gulong rspamd_##name##_dec_learns (struct rspamd_task *task, \ + gpointer runtime, \ + gpointer ctx); \ + gulong rspamd_##name##_learns (struct rspamd_task *task, \ + gpointer runtime, \ + gpointer ctx); \ + ucl_object_t * rspamd_##name##_get_stat (gpointer runtime, \ + gpointer ctx); \ + gpointer rspamd_##name##_load_tokenizer_config (gpointer runtime, \ + gsize *len); \ + void rspamd_##name##_close (gpointer ctx) RSPAMD_STAT_BACKEND_DEF(mmaped_file); + RSPAMD_STAT_BACKEND_DEF(sqlite3); + #ifdef WITH_HIREDIS + RSPAMD_STAT_BACKEND_DEF(redis); + +#endif + +#ifdef __cplusplus +} #endif #endif /* BACKENDS_H_ */ diff --git a/src/libstat/classifiers/classifiers.h b/src/libstat/classifiers/classifiers.h index 738a5e8c9..4e159fb13 100644 --- a/src/libstat/classifiers/classifiers.h +++ b/src/libstat/classifiers/classifiers.h @@ -9,6 +9,10 @@ /* Consider this value as 0 */ #define ALPHA 0.0001 +#ifdef __cplusplus +extern "C" { +#endif + struct rspamd_classifier_config; struct rspamd_task; struct rspamd_config; @@ -18,49 +22,58 @@ struct token_node_s; struct rspamd_stat_classifier { char *name; - gboolean (*init_func)(struct rspamd_config *cfg, - struct ev_loop *ev_base, - struct rspamd_classifier *cl); - gboolean (*classify_func)(struct rspamd_classifier * ctx, - GPtrArray *tokens, - struct rspamd_task *task); - gboolean (*learn_spam_func)(struct rspamd_classifier * ctx, - GPtrArray *input, - struct rspamd_task *task, - gboolean is_spam, - gboolean unlearn, - GError **err); - void (*fin_func)(struct rspamd_classifier *cl); + + gboolean (*init_func) (struct rspamd_config *cfg, + struct ev_loop *ev_base, + struct rspamd_classifier *cl); + + gboolean (*classify_func) (struct rspamd_classifier *ctx, + GPtrArray *tokens, + struct rspamd_task *task); + + gboolean (*learn_spam_func) (struct rspamd_classifier *ctx, + GPtrArray *input, + struct rspamd_task *task, + gboolean is_spam, + gboolean unlearn, + GError **err); + + void (*fin_func) (struct rspamd_classifier *cl); }; /* Bayes algorithm */ gboolean bayes_init (struct rspamd_config *cfg, struct ev_loop *ev_base, struct rspamd_classifier *); + gboolean bayes_classify (struct rspamd_classifier *ctx, - GPtrArray *tokens, - struct rspamd_task *task); + GPtrArray *tokens, + struct rspamd_task *task); + gboolean bayes_learn_spam (struct rspamd_classifier *ctx, - GPtrArray *tokens, - struct rspamd_task *task, - gboolean is_spam, - gboolean unlearn, - GError **err); + GPtrArray *tokens, + struct rspamd_task *task, + gboolean is_spam, + gboolean unlearn, + GError **err); + void bayes_fin (struct rspamd_classifier *); /* Generic lua classifier */ gboolean lua_classifier_init (struct rspamd_config *cfg, struct ev_loop *ev_base, struct rspamd_classifier *); + gboolean lua_classifier_classify (struct rspamd_classifier *ctx, - GPtrArray *tokens, - struct rspamd_task *task); + GPtrArray *tokens, + struct rspamd_task *task); + gboolean lua_classifier_learn_spam (struct rspamd_classifier *ctx, - GPtrArray *tokens, - struct rspamd_task *task, - gboolean is_spam, - gboolean unlearn, - GError **err); + GPtrArray *tokens, + struct rspamd_task *task, + gboolean is_spam, + gboolean unlearn, + GError **err); extern guint rspamd_bayes_log_id; #define msg_debug_bayes(...) rspamd_conditional_debug_fast (NULL, task->from_addr, \ @@ -68,7 +81,9 @@ extern guint rspamd_bayes_log_id; G_STRFUNC, \ __VA_ARGS__) + +#ifdef __cplusplus +} +#endif + #endif -/* - * vi:ts=4 - */ diff --git a/src/libstat/learn_cache/learn_cache.h b/src/libstat/learn_cache/learn_cache.h index 6673d2239..1816c9f7a 100644 --- a/src/libstat/learn_cache/learn_cache.h +++ b/src/libstat/learn_cache/learn_cache.h @@ -19,6 +19,10 @@ #include "config.h" #include "ucl.h" +#ifdef __cplusplus +extern "C" { +#endif + #define RSPAMD_DEFAULT_CACHE "sqlite3" struct rspamd_task; @@ -28,40 +32,53 @@ struct rspamd_statfile; struct rspamd_stat_cache { const char *name; - gpointer (*init)(struct rspamd_stat_ctx *ctx, - struct rspamd_config *cfg, - struct rspamd_statfile *st, - const ucl_object_t *cf); - gpointer (*runtime)(struct rspamd_task *task, - gpointer ctx, gboolean learn); - gint (*check)(struct rspamd_task *task, - gboolean is_spam, - gpointer runtime); - gint (*learn)(struct rspamd_task *task, - gboolean is_spam, - gpointer runtime); + + gpointer (*init) (struct rspamd_stat_ctx *ctx, + struct rspamd_config *cfg, + struct rspamd_statfile *st, + const ucl_object_t *cf); + + gpointer (*runtime) (struct rspamd_task *task, + gpointer ctx, gboolean learn); + + gint (*check) (struct rspamd_task *task, + gboolean is_spam, + gpointer runtime); + + gint (*learn) (struct rspamd_task *task, + gboolean is_spam, + gpointer runtime); + void (*close) (gpointer ctx); + gpointer ctx; }; #define RSPAMD_STAT_CACHE_DEF(name) \ - gpointer rspamd_stat_cache_##name##_init (struct rspamd_stat_ctx *ctx, \ - struct rspamd_config *cfg, \ - struct rspamd_statfile *st, \ - const ucl_object_t *cf); \ - gpointer rspamd_stat_cache_##name##_runtime (struct rspamd_task *task, \ - gpointer ctx, gboolean learn); \ - gint rspamd_stat_cache_##name##_check (struct rspamd_task *task, \ - gboolean is_spam, \ - gpointer runtime); \ - gint rspamd_stat_cache_##name##_learn (struct rspamd_task *task, \ - gboolean is_spam, \ - gpointer runtime); \ - void rspamd_stat_cache_##name##_close (gpointer ctx) + gpointer rspamd_stat_cache_##name##_init (struct rspamd_stat_ctx *ctx, \ + struct rspamd_config *cfg, \ + struct rspamd_statfile *st, \ + const ucl_object_t *cf); \ + gpointer rspamd_stat_cache_##name##_runtime (struct rspamd_task *task, \ + gpointer ctx, gboolean learn); \ + gint rspamd_stat_cache_##name##_check (struct rspamd_task *task, \ + gboolean is_spam, \ + gpointer runtime); \ + gint rspamd_stat_cache_##name##_learn (struct rspamd_task *task, \ + gboolean is_spam, \ + gpointer runtime); \ + void rspamd_stat_cache_##name##_close (gpointer ctx) RSPAMD_STAT_CACHE_DEF(sqlite3); + #ifdef WITH_HIREDIS + RSPAMD_STAT_CACHE_DEF(redis); + +#endif + +#ifdef __cplusplus +} #endif #endif /* LEARN_CACHE_H_ */ diff --git a/src/libstat/stat_api.h b/src/libstat/stat_api.h index 40a6bc716..f91c8b79a 100644 --- a/src/libstat/stat_api.h +++ b/src/libstat/stat_api.h @@ -18,9 +18,13 @@ #include "config.h" #include "task.h" -#include <lua.h> +#include "lua/lua_common.h" #include "contrib/libev/ev.h" +#ifdef __cplusplus +extern "C" { +#endif + /** * @file stat_api.h * High level statistics API @@ -89,7 +93,7 @@ void rspamd_stat_close (void); * @param task */ void rspamd_stat_process_tokenize (struct rspamd_stat_ctx *st_ctx, - struct rspamd_task *task); + struct rspamd_task *task); /** * Classify the task specified and insert symbols if needed @@ -99,7 +103,7 @@ void rspamd_stat_process_tokenize (struct rspamd_stat_ctx *st_ctx, * @return TRUE if task has been classified */ rspamd_stat_result_t rspamd_stat_classify (struct rspamd_task *task, - lua_State *L, guint stage, GError **err); + lua_State *L, guint stage, GError **err); /** @@ -119,9 +123,9 @@ gboolean rspamd_stat_check_autolearn (struct rspamd_task *task); * @return TRUE if task has been learned */ rspamd_stat_result_t rspamd_stat_learn (struct rspamd_task *task, - gboolean spam, lua_State *L, const gchar *classifier, - guint stage, - GError **err); + gboolean spam, lua_State *L, const gchar *classifier, + guint stage, + GError **err); /** * Get the overall statistics for all statfile backends @@ -130,10 +134,14 @@ rspamd_stat_result_t rspamd_stat_learn (struct rspamd_task *task, * @return array of statistical information */ rspamd_stat_result_t rspamd_stat_statistics (struct rspamd_task *task, - struct rspamd_config *cfg, - guint64 *total_learns, - ucl_object_t **res); + struct rspamd_config *cfg, + guint64 *total_learns, + ucl_object_t **res); void rspamd_stat_unload (void); +#ifdef __cplusplus +} +#endif + #endif /* STAT_API_H_ */ diff --git a/src/libstat/stat_internal.h b/src/libstat/stat_internal.h index 50dbae9c1..967a3c4d6 100644 --- a/src/libstat/stat_internal.h +++ b/src/libstat/stat_internal.h @@ -24,6 +24,10 @@ #include "backends/backends.h" #include "learn_cache/learn_cache.h" +#ifdef __cplusplus +extern "C" { +#endif + struct rspamd_statfile_runtime { struct rspamd_statfile_config *st; gpointer backend_runtime; @@ -54,10 +58,11 @@ struct rspamd_statfile { struct rspamd_stat_async_elt; -typedef void (*rspamd_stat_async_handler)(struct rspamd_stat_async_elt *elt, - gpointer ud); -typedef void (*rspamd_stat_async_cleanup)(struct rspamd_stat_async_elt *elt, - gpointer ud); +typedef void (*rspamd_stat_async_handler) (struct rspamd_stat_async_elt *elt, + gpointer ud); + +typedef void (*rspamd_stat_async_cleanup) (struct rspamd_stat_async_elt *elt, + gpointer ud); struct rspamd_stat_async_elt { rspamd_stat_async_handler handler; @@ -102,18 +107,26 @@ typedef enum rspamd_learn_cache_result { RSPAMD_LEARN_INGORE } rspamd_learn_t; -struct rspamd_stat_ctx * rspamd_stat_get_ctx (void); -struct rspamd_stat_classifier * rspamd_stat_get_classifier (const gchar *name); -struct rspamd_stat_backend * rspamd_stat_get_backend (const gchar *name); -struct rspamd_stat_tokenizer * rspamd_stat_get_tokenizer (const gchar *name); -struct rspamd_stat_cache * rspamd_stat_get_cache (const gchar *name); -struct rspamd_stat_async_elt* rspamd_stat_ctx_register_async ( +struct rspamd_stat_ctx *rspamd_stat_get_ctx (void); + +struct rspamd_stat_classifier *rspamd_stat_get_classifier (const gchar *name); + +struct rspamd_stat_backend *rspamd_stat_get_backend (const gchar *name); + +struct rspamd_stat_tokenizer *rspamd_stat_get_tokenizer (const gchar *name); + +struct rspamd_stat_cache *rspamd_stat_get_cache (const gchar *name); + +struct rspamd_stat_async_elt *rspamd_stat_ctx_register_async ( rspamd_stat_async_handler handler, rspamd_stat_async_cleanup cleanup, gpointer d, gdouble timeout); -static GQuark rspamd_stat_quark (void) -{ +static GQuark rspamd_stat_quark (void) { return g_quark_from_static_string ("rspamd-statistics"); } +#ifdef __cplusplus +} +#endif + #endif /* STAT_INTERNAL_H_ */ diff --git a/src/libstat/tokenizers/tokenizers.h b/src/libstat/tokenizers/tokenizers.h index 784426d31..bf4987c7a 100644 --- a/src/libstat/tokenizers/tokenizers.h +++ b/src/libstat/tokenizers/tokenizers.h @@ -11,20 +11,26 @@ #define RSPAMD_DEFAULT_TOKENIZER "osb" +#ifdef __cplusplus +extern "C" { +#endif + struct rspamd_tokenizer_runtime; struct rspamd_stat_ctx; /* Common tokenizer structure */ struct rspamd_stat_tokenizer { gchar *name; + gpointer (*get_config) (rspamd_mempool_t *pool, struct rspamd_tokenizer_config *cf, gsize *len); - gint (*tokenize_func)(struct rspamd_stat_ctx *ctx, - struct rspamd_task *task, - GArray *words, - gboolean is_utf, - const gchar *prefix, - GPtrArray *result); + + gint (*tokenize_func) (struct rspamd_stat_ctx *ctx, + struct rspamd_task *task, + GArray *words, + gboolean is_utf, + const gchar *prefix, + GPtrArray *result); }; enum rspamd_tokenize_type { @@ -38,13 +44,13 @@ gint token_node_compare_func (gconstpointer a, gconstpointer b); /* Tokenize text into array of words (rspamd_stat_token_t type) */ -GArray * rspamd_tokenize_text (const gchar *text, gsize len, - const UText *utxt, - enum rspamd_tokenize_type how, - struct rspamd_config *cfg, - GList *exceptions, - guint64 *hash, - GArray *cur_words); +GArray *rspamd_tokenize_text (const gchar *text, gsize len, + const UText *utxt, + enum rspamd_tokenize_type how, + struct rspamd_config *cfg, + GList *exceptions, + guint64 *hash, + GArray *cur_words); /* OSB tokenize function */ gint rspamd_tokenizer_osb (struct rspamd_stat_ctx *ctx, @@ -59,14 +65,19 @@ gpointer rspamd_tokenizer_osb_get_config (rspamd_mempool_t *pool, gsize *len); struct rspamd_lang_detector; + void rspamd_normalize_single_word (rspamd_stat_token_t *tok, rspamd_mempool_t *pool); + void rspamd_normalize_words (GArray *words, rspamd_mempool_t *pool); + void rspamd_stem_words (GArray *words, rspamd_mempool_t *pool, const gchar *language, struct rspamd_lang_detector *d); void rspamd_tokenize_meta_words (struct rspamd_task *task); + +#ifdef __cplusplus +} +#endif + #endif -/* - * vi:ts=4 - */ |