aboutsummaryrefslogtreecommitdiffstats
path: root/src/libstat
diff options
context:
space:
mode:
Diffstat (limited to 'src/libstat')
-rw-r--r--src/libstat/backends/backends.h136
-rw-r--r--src/libstat/classifiers/classifiers.h75
-rw-r--r--src/libstat/learn_cache/learn_cache.h67
-rw-r--r--src/libstat/stat_api.h26
-rw-r--r--src/libstat/stat_internal.h37
-rw-r--r--src/libstat/tokenizers/tokenizers.h43
6 files changed, 236 insertions, 148 deletions
diff --git a/src/libstat/backends/backends.h b/src/libstat/backends/backends.h
index 025e9bf34..fa3785540 100644
--- a/src/libstat/backends/backends.h
+++ b/src/libstat/backends/backends.h
@@ -21,6 +21,10 @@
#define RSPAMD_DEFAULT_BACKEND "mmap"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
/* Forwarded declarations */
struct rspamd_classifier_config;
struct rspamd_statfile_config;
@@ -32,73 +36,93 @@ struct rspamd_task;
struct rspamd_stat_backend {
const char *name;
- gpointer (*init)(struct rspamd_stat_ctx *ctx, struct rspamd_config *cfg,
- struct rspamd_statfile *st);
- gpointer (*runtime)(struct rspamd_task *task,
- struct rspamd_statfile_config *stcf, gboolean learn, gpointer ctx);
- gboolean (*process_tokens)(struct rspamd_task *task, GPtrArray *tokens,
- gint id,
- gpointer ctx);
- gboolean (*finalize_process)(struct rspamd_task *task,
- gpointer runtime, gpointer ctx);
- gboolean (*learn_tokens)(struct rspamd_task *task, GPtrArray *tokens,
- gint id,
- gpointer ctx);
- gulong (*total_learns)(struct rspamd_task *task,
- gpointer runtime, gpointer ctx);
- gboolean (*finalize_learn)(struct rspamd_task *task,
- gpointer runtime, gpointer ctx, GError **err);
- gulong (*inc_learns)(struct rspamd_task *task,
- gpointer runtime, gpointer ctx);
- gulong (*dec_learns)(struct rspamd_task *task,
- gpointer runtime, gpointer ctx);
- ucl_object_t* (*get_stat)(gpointer runtime, gpointer ctx);
- void (*close)(gpointer ctx);
-
- gpointer (*load_tokenizer_config)(gpointer runtime, gsize *sz);
+
+ gpointer (*init) (struct rspamd_stat_ctx *ctx, struct rspamd_config *cfg,
+ struct rspamd_statfile *st);
+
+ gpointer (*runtime) (struct rspamd_task *task,
+ struct rspamd_statfile_config *stcf, gboolean learn, gpointer ctx);
+
+ gboolean (*process_tokens) (struct rspamd_task *task, GPtrArray *tokens,
+ gint id,
+ gpointer ctx);
+
+ gboolean (*finalize_process) (struct rspamd_task *task,
+ gpointer runtime, gpointer ctx);
+
+ gboolean (*learn_tokens) (struct rspamd_task *task, GPtrArray *tokens,
+ gint id,
+ gpointer ctx);
+
+ gulong (*total_learns) (struct rspamd_task *task,
+ gpointer runtime, gpointer ctx);
+
+ gboolean (*finalize_learn) (struct rspamd_task *task,
+ gpointer runtime, gpointer ctx, GError **err);
+
+ gulong (*inc_learns) (struct rspamd_task *task,
+ gpointer runtime, gpointer ctx);
+
+ gulong (*dec_learns) (struct rspamd_task *task,
+ gpointer runtime, gpointer ctx);
+
+ ucl_object_t *(*get_stat) (gpointer runtime, gpointer ctx);
+
+ void (*close) (gpointer ctx);
+
+ gpointer (*load_tokenizer_config) (gpointer runtime, gsize *sz);
+
gpointer ctx;
};
#define RSPAMD_STAT_BACKEND_DEF(name) \
- gpointer rspamd_##name##_init (struct rspamd_stat_ctx *ctx, \
- struct rspamd_config *cfg, struct rspamd_statfile *st); \
- gpointer rspamd_##name##_runtime (struct rspamd_task *task, \
- struct rspamd_statfile_config *stcf, \
- gboolean learn, gpointer ctx); \
- gboolean rspamd_##name##_process_tokens (struct rspamd_task *task, \
+ gpointer rspamd_##name##_init (struct rspamd_stat_ctx *ctx, \
+ struct rspamd_config *cfg, struct rspamd_statfile *st); \
+ gpointer rspamd_##name##_runtime (struct rspamd_task *task, \
+ struct rspamd_statfile_config *stcf, \
+ gboolean learn, gpointer ctx); \
+ gboolean rspamd_##name##_process_tokens (struct rspamd_task *task, \
GPtrArray *tokens, gint id, \
- gpointer ctx); \
- gboolean rspamd_##name##_finalize_process (struct rspamd_task *task, \
- gpointer runtime, \
- gpointer ctx); \
- gboolean rspamd_##name##_learn_tokens (struct rspamd_task *task, \
+ gpointer ctx); \
+ gboolean rspamd_##name##_finalize_process (struct rspamd_task *task, \
+ gpointer runtime, \
+ gpointer ctx); \
+ gboolean rspamd_##name##_learn_tokens (struct rspamd_task *task, \
GPtrArray *tokens, gint id, \
- gpointer ctx); \
- gboolean rspamd_##name##_finalize_learn (struct rspamd_task *task, \
- gpointer runtime, \
- gpointer ctx, GError **err); \
- gulong rspamd_##name##_total_learns (struct rspamd_task *task, \
- gpointer runtime, \
- gpointer ctx); \
- gulong rspamd_##name##_inc_learns (struct rspamd_task *task, \
- gpointer runtime, \
- gpointer ctx); \
- gulong rspamd_##name##_dec_learns (struct rspamd_task *task, \
- gpointer runtime, \
- gpointer ctx); \
- gulong rspamd_##name##_learns (struct rspamd_task *task, \
- gpointer runtime, \
- gpointer ctx); \
- ucl_object_t * rspamd_##name##_get_stat (gpointer runtime, \
- gpointer ctx); \
- gpointer rspamd_##name##_load_tokenizer_config (gpointer runtime, \
- gsize *len); \
- void rspamd_##name##_close (gpointer ctx)
+ gpointer ctx); \
+ gboolean rspamd_##name##_finalize_learn (struct rspamd_task *task, \
+ gpointer runtime, \
+ gpointer ctx, GError **err); \
+ gulong rspamd_##name##_total_learns (struct rspamd_task *task, \
+ gpointer runtime, \
+ gpointer ctx); \
+ gulong rspamd_##name##_inc_learns (struct rspamd_task *task, \
+ gpointer runtime, \
+ gpointer ctx); \
+ gulong rspamd_##name##_dec_learns (struct rspamd_task *task, \
+ gpointer runtime, \
+ gpointer ctx); \
+ gulong rspamd_##name##_learns (struct rspamd_task *task, \
+ gpointer runtime, \
+ gpointer ctx); \
+ ucl_object_t * rspamd_##name##_get_stat (gpointer runtime, \
+ gpointer ctx); \
+ gpointer rspamd_##name##_load_tokenizer_config (gpointer runtime, \
+ gsize *len); \
+ void rspamd_##name##_close (gpointer ctx)
RSPAMD_STAT_BACKEND_DEF(mmaped_file);
+
RSPAMD_STAT_BACKEND_DEF(sqlite3);
+
#ifdef WITH_HIREDIS
+
RSPAMD_STAT_BACKEND_DEF(redis);
+
+#endif
+
+#ifdef __cplusplus
+}
#endif
#endif /* BACKENDS_H_ */
diff --git a/src/libstat/classifiers/classifiers.h b/src/libstat/classifiers/classifiers.h
index 738a5e8c9..4e159fb13 100644
--- a/src/libstat/classifiers/classifiers.h
+++ b/src/libstat/classifiers/classifiers.h
@@ -9,6 +9,10 @@
/* Consider this value as 0 */
#define ALPHA 0.0001
+#ifdef __cplusplus
+extern "C" {
+#endif
+
struct rspamd_classifier_config;
struct rspamd_task;
struct rspamd_config;
@@ -18,49 +22,58 @@ struct token_node_s;
struct rspamd_stat_classifier {
char *name;
- gboolean (*init_func)(struct rspamd_config *cfg,
- struct ev_loop *ev_base,
- struct rspamd_classifier *cl);
- gboolean (*classify_func)(struct rspamd_classifier * ctx,
- GPtrArray *tokens,
- struct rspamd_task *task);
- gboolean (*learn_spam_func)(struct rspamd_classifier * ctx,
- GPtrArray *input,
- struct rspamd_task *task,
- gboolean is_spam,
- gboolean unlearn,
- GError **err);
- void (*fin_func)(struct rspamd_classifier *cl);
+
+ gboolean (*init_func) (struct rspamd_config *cfg,
+ struct ev_loop *ev_base,
+ struct rspamd_classifier *cl);
+
+ gboolean (*classify_func) (struct rspamd_classifier *ctx,
+ GPtrArray *tokens,
+ struct rspamd_task *task);
+
+ gboolean (*learn_spam_func) (struct rspamd_classifier *ctx,
+ GPtrArray *input,
+ struct rspamd_task *task,
+ gboolean is_spam,
+ gboolean unlearn,
+ GError **err);
+
+ void (*fin_func) (struct rspamd_classifier *cl);
};
/* Bayes algorithm */
gboolean bayes_init (struct rspamd_config *cfg,
struct ev_loop *ev_base,
struct rspamd_classifier *);
+
gboolean bayes_classify (struct rspamd_classifier *ctx,
- GPtrArray *tokens,
- struct rspamd_task *task);
+ GPtrArray *tokens,
+ struct rspamd_task *task);
+
gboolean bayes_learn_spam (struct rspamd_classifier *ctx,
- GPtrArray *tokens,
- struct rspamd_task *task,
- gboolean is_spam,
- gboolean unlearn,
- GError **err);
+ GPtrArray *tokens,
+ struct rspamd_task *task,
+ gboolean is_spam,
+ gboolean unlearn,
+ GError **err);
+
void bayes_fin (struct rspamd_classifier *);
/* Generic lua classifier */
gboolean lua_classifier_init (struct rspamd_config *cfg,
struct ev_loop *ev_base,
struct rspamd_classifier *);
+
gboolean lua_classifier_classify (struct rspamd_classifier *ctx,
- GPtrArray *tokens,
- struct rspamd_task *task);
+ GPtrArray *tokens,
+ struct rspamd_task *task);
+
gboolean lua_classifier_learn_spam (struct rspamd_classifier *ctx,
- GPtrArray *tokens,
- struct rspamd_task *task,
- gboolean is_spam,
- gboolean unlearn,
- GError **err);
+ GPtrArray *tokens,
+ struct rspamd_task *task,
+ gboolean is_spam,
+ gboolean unlearn,
+ GError **err);
extern guint rspamd_bayes_log_id;
#define msg_debug_bayes(...) rspamd_conditional_debug_fast (NULL, task->from_addr, \
@@ -68,7 +81,9 @@ extern guint rspamd_bayes_log_id;
G_STRFUNC, \
__VA_ARGS__)
+
+#ifdef __cplusplus
+}
+#endif
+
#endif
-/*
- * vi:ts=4
- */
diff --git a/src/libstat/learn_cache/learn_cache.h b/src/libstat/learn_cache/learn_cache.h
index 6673d2239..1816c9f7a 100644
--- a/src/libstat/learn_cache/learn_cache.h
+++ b/src/libstat/learn_cache/learn_cache.h
@@ -19,6 +19,10 @@
#include "config.h"
#include "ucl.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
#define RSPAMD_DEFAULT_CACHE "sqlite3"
struct rspamd_task;
@@ -28,40 +32,53 @@ struct rspamd_statfile;
struct rspamd_stat_cache {
const char *name;
- gpointer (*init)(struct rspamd_stat_ctx *ctx,
- struct rspamd_config *cfg,
- struct rspamd_statfile *st,
- const ucl_object_t *cf);
- gpointer (*runtime)(struct rspamd_task *task,
- gpointer ctx, gboolean learn);
- gint (*check)(struct rspamd_task *task,
- gboolean is_spam,
- gpointer runtime);
- gint (*learn)(struct rspamd_task *task,
- gboolean is_spam,
- gpointer runtime);
+
+ gpointer (*init) (struct rspamd_stat_ctx *ctx,
+ struct rspamd_config *cfg,
+ struct rspamd_statfile *st,
+ const ucl_object_t *cf);
+
+ gpointer (*runtime) (struct rspamd_task *task,
+ gpointer ctx, gboolean learn);
+
+ gint (*check) (struct rspamd_task *task,
+ gboolean is_spam,
+ gpointer runtime);
+
+ gint (*learn) (struct rspamd_task *task,
+ gboolean is_spam,
+ gpointer runtime);
+
void (*close) (gpointer ctx);
+
gpointer ctx;
};
#define RSPAMD_STAT_CACHE_DEF(name) \
- gpointer rspamd_stat_cache_##name##_init (struct rspamd_stat_ctx *ctx, \
- struct rspamd_config *cfg, \
- struct rspamd_statfile *st, \
- const ucl_object_t *cf); \
- gpointer rspamd_stat_cache_##name##_runtime (struct rspamd_task *task, \
- gpointer ctx, gboolean learn); \
- gint rspamd_stat_cache_##name##_check (struct rspamd_task *task, \
- gboolean is_spam, \
- gpointer runtime); \
- gint rspamd_stat_cache_##name##_learn (struct rspamd_task *task, \
- gboolean is_spam, \
- gpointer runtime); \
- void rspamd_stat_cache_##name##_close (gpointer ctx)
+ gpointer rspamd_stat_cache_##name##_init (struct rspamd_stat_ctx *ctx, \
+ struct rspamd_config *cfg, \
+ struct rspamd_statfile *st, \
+ const ucl_object_t *cf); \
+ gpointer rspamd_stat_cache_##name##_runtime (struct rspamd_task *task, \
+ gpointer ctx, gboolean learn); \
+ gint rspamd_stat_cache_##name##_check (struct rspamd_task *task, \
+ gboolean is_spam, \
+ gpointer runtime); \
+ gint rspamd_stat_cache_##name##_learn (struct rspamd_task *task, \
+ gboolean is_spam, \
+ gpointer runtime); \
+ void rspamd_stat_cache_##name##_close (gpointer ctx)
RSPAMD_STAT_CACHE_DEF(sqlite3);
+
#ifdef WITH_HIREDIS
+
RSPAMD_STAT_CACHE_DEF(redis);
+
+#endif
+
+#ifdef __cplusplus
+}
#endif
#endif /* LEARN_CACHE_H_ */
diff --git a/src/libstat/stat_api.h b/src/libstat/stat_api.h
index 40a6bc716..f91c8b79a 100644
--- a/src/libstat/stat_api.h
+++ b/src/libstat/stat_api.h
@@ -18,9 +18,13 @@
#include "config.h"
#include "task.h"
-#include <lua.h>
+#include "lua/lua_common.h"
#include "contrib/libev/ev.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
/**
* @file stat_api.h
* High level statistics API
@@ -89,7 +93,7 @@ void rspamd_stat_close (void);
* @param task
*/
void rspamd_stat_process_tokenize (struct rspamd_stat_ctx *st_ctx,
- struct rspamd_task *task);
+ struct rspamd_task *task);
/**
* Classify the task specified and insert symbols if needed
@@ -99,7 +103,7 @@ void rspamd_stat_process_tokenize (struct rspamd_stat_ctx *st_ctx,
* @return TRUE if task has been classified
*/
rspamd_stat_result_t rspamd_stat_classify (struct rspamd_task *task,
- lua_State *L, guint stage, GError **err);
+ lua_State *L, guint stage, GError **err);
/**
@@ -119,9 +123,9 @@ gboolean rspamd_stat_check_autolearn (struct rspamd_task *task);
* @return TRUE if task has been learned
*/
rspamd_stat_result_t rspamd_stat_learn (struct rspamd_task *task,
- gboolean spam, lua_State *L, const gchar *classifier,
- guint stage,
- GError **err);
+ gboolean spam, lua_State *L, const gchar *classifier,
+ guint stage,
+ GError **err);
/**
* Get the overall statistics for all statfile backends
@@ -130,10 +134,14 @@ rspamd_stat_result_t rspamd_stat_learn (struct rspamd_task *task,
* @return array of statistical information
*/
rspamd_stat_result_t rspamd_stat_statistics (struct rspamd_task *task,
- struct rspamd_config *cfg,
- guint64 *total_learns,
- ucl_object_t **res);
+ struct rspamd_config *cfg,
+ guint64 *total_learns,
+ ucl_object_t **res);
void rspamd_stat_unload (void);
+#ifdef __cplusplus
+}
+#endif
+
#endif /* STAT_API_H_ */
diff --git a/src/libstat/stat_internal.h b/src/libstat/stat_internal.h
index 50dbae9c1..967a3c4d6 100644
--- a/src/libstat/stat_internal.h
+++ b/src/libstat/stat_internal.h
@@ -24,6 +24,10 @@
#include "backends/backends.h"
#include "learn_cache/learn_cache.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
struct rspamd_statfile_runtime {
struct rspamd_statfile_config *st;
gpointer backend_runtime;
@@ -54,10 +58,11 @@ struct rspamd_statfile {
struct rspamd_stat_async_elt;
-typedef void (*rspamd_stat_async_handler)(struct rspamd_stat_async_elt *elt,
- gpointer ud);
-typedef void (*rspamd_stat_async_cleanup)(struct rspamd_stat_async_elt *elt,
- gpointer ud);
+typedef void (*rspamd_stat_async_handler) (struct rspamd_stat_async_elt *elt,
+ gpointer ud);
+
+typedef void (*rspamd_stat_async_cleanup) (struct rspamd_stat_async_elt *elt,
+ gpointer ud);
struct rspamd_stat_async_elt {
rspamd_stat_async_handler handler;
@@ -102,18 +107,26 @@ typedef enum rspamd_learn_cache_result {
RSPAMD_LEARN_INGORE
} rspamd_learn_t;
-struct rspamd_stat_ctx * rspamd_stat_get_ctx (void);
-struct rspamd_stat_classifier * rspamd_stat_get_classifier (const gchar *name);
-struct rspamd_stat_backend * rspamd_stat_get_backend (const gchar *name);
-struct rspamd_stat_tokenizer * rspamd_stat_get_tokenizer (const gchar *name);
-struct rspamd_stat_cache * rspamd_stat_get_cache (const gchar *name);
-struct rspamd_stat_async_elt* rspamd_stat_ctx_register_async (
+struct rspamd_stat_ctx *rspamd_stat_get_ctx (void);
+
+struct rspamd_stat_classifier *rspamd_stat_get_classifier (const gchar *name);
+
+struct rspamd_stat_backend *rspamd_stat_get_backend (const gchar *name);
+
+struct rspamd_stat_tokenizer *rspamd_stat_get_tokenizer (const gchar *name);
+
+struct rspamd_stat_cache *rspamd_stat_get_cache (const gchar *name);
+
+struct rspamd_stat_async_elt *rspamd_stat_ctx_register_async (
rspamd_stat_async_handler handler, rspamd_stat_async_cleanup cleanup,
gpointer d, gdouble timeout);
-static GQuark rspamd_stat_quark (void)
-{
+static GQuark rspamd_stat_quark (void) {
return g_quark_from_static_string ("rspamd-statistics");
}
+#ifdef __cplusplus
+}
+#endif
+
#endif /* STAT_INTERNAL_H_ */
diff --git a/src/libstat/tokenizers/tokenizers.h b/src/libstat/tokenizers/tokenizers.h
index 784426d31..bf4987c7a 100644
--- a/src/libstat/tokenizers/tokenizers.h
+++ b/src/libstat/tokenizers/tokenizers.h
@@ -11,20 +11,26 @@
#define RSPAMD_DEFAULT_TOKENIZER "osb"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
struct rspamd_tokenizer_runtime;
struct rspamd_stat_ctx;
/* Common tokenizer structure */
struct rspamd_stat_tokenizer {
gchar *name;
+
gpointer (*get_config) (rspamd_mempool_t *pool,
struct rspamd_tokenizer_config *cf, gsize *len);
- gint (*tokenize_func)(struct rspamd_stat_ctx *ctx,
- struct rspamd_task *task,
- GArray *words,
- gboolean is_utf,
- const gchar *prefix,
- GPtrArray *result);
+
+ gint (*tokenize_func) (struct rspamd_stat_ctx *ctx,
+ struct rspamd_task *task,
+ GArray *words,
+ gboolean is_utf,
+ const gchar *prefix,
+ GPtrArray *result);
};
enum rspamd_tokenize_type {
@@ -38,13 +44,13 @@ gint token_node_compare_func (gconstpointer a, gconstpointer b);
/* Tokenize text into array of words (rspamd_stat_token_t type) */
-GArray * rspamd_tokenize_text (const gchar *text, gsize len,
- const UText *utxt,
- enum rspamd_tokenize_type how,
- struct rspamd_config *cfg,
- GList *exceptions,
- guint64 *hash,
- GArray *cur_words);
+GArray *rspamd_tokenize_text (const gchar *text, gsize len,
+ const UText *utxt,
+ enum rspamd_tokenize_type how,
+ struct rspamd_config *cfg,
+ GList *exceptions,
+ guint64 *hash,
+ GArray *cur_words);
/* OSB tokenize function */
gint rspamd_tokenizer_osb (struct rspamd_stat_ctx *ctx,
@@ -59,14 +65,19 @@ gpointer rspamd_tokenizer_osb_get_config (rspamd_mempool_t *pool,
gsize *len);
struct rspamd_lang_detector;
+
void rspamd_normalize_single_word (rspamd_stat_token_t *tok, rspamd_mempool_t *pool);
+
void rspamd_normalize_words (GArray *words, rspamd_mempool_t *pool);
+
void rspamd_stem_words (GArray *words, rspamd_mempool_t *pool,
const gchar *language,
struct rspamd_lang_detector *d);
void rspamd_tokenize_meta_words (struct rspamd_task *task);
+
+#ifdef __cplusplus
+}
+#endif
+
#endif
-/*
- * vi:ts=4
- */