aboutsummaryrefslogtreecommitdiffstats
path: root/src/libstat/classifiers.h
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2015-01-16 15:28:40 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2015-01-16 15:28:40 +0000
commitb5597411a2a4f9b46c0076ccddb95f8eacc1cb7f (patch)
treea647a4306708df37a3ea1d97666fd2d325e24464 /src/libstat/classifiers.h
parentffd95d7c71307bb9540f07bbaac3b04859226837 (diff)
downloadrspamd-b5597411a2a4f9b46c0076ccddb95f8eacc1cb7f.tar.gz
rspamd-b5597411a2a4f9b46c0076ccddb95f8eacc1cb7f.zip
Reorganize statfiles and classifiers into libstat.
Diffstat (limited to 'src/libstat/classifiers.h')
-rw-r--r--src/libstat/classifiers.h111
1 files changed, 111 insertions, 0 deletions
diff --git a/src/libstat/classifiers.h b/src/libstat/classifiers.h
new file mode 100644
index 000000000..fd1b63bcf
--- /dev/null
+++ b/src/libstat/classifiers.h
@@ -0,0 +1,111 @@
+#ifndef CLASSIFIERS_H
+#define CLASSIFIERS_H
+
+#include "config.h"
+#include "mem_pool.h"
+#include "statfile.h"
+#include "tokenizers.h"
+#include <lua.h>
+
+/* Consider this value as 0 */
+#define ALPHA 0.0001
+
+struct rspamd_classifier_config;
+struct rspamd_task;
+
+struct classifier_ctx {
+ rspamd_mempool_t *pool;
+ GHashTable *results;
+ gboolean debug;
+ struct rspamd_classifier_config *cfg;
+};
+
+struct classify_weight {
+ const char *name;
+ long double weight;
+};
+
+/* Common classifier structure */
+struct classifier {
+ char *name;
+ struct classifier_ctx * (*init_func)(rspamd_mempool_t *pool,
+ struct rspamd_classifier_config *cf);
+ gboolean (*classify_func)(struct classifier_ctx * ctx,
+ statfile_pool_t *pool, GTree *input, struct rspamd_task *task,
+ lua_State *L);
+ gboolean (*learn_func)(struct classifier_ctx * ctx, statfile_pool_t *pool,
+ const char *symbol, GTree *input, gboolean in_class,
+ double *sum, double multiplier, GError **err);
+ gboolean (*learn_spam_func)(struct classifier_ctx * ctx,
+ statfile_pool_t *pool,
+ GTree *input, struct rspamd_task *task, gboolean is_spam, lua_State *L,
+ GError **err);
+ GList * (*weights_func)(struct classifier_ctx * ctx, statfile_pool_t *pool,
+ GTree *input, struct rspamd_task *task);
+};
+
+/* Get classifier structure by name or return NULL if this name is not found */
+struct classifier * get_classifier (const char *name);
+
+/* Winnow algorithm */
+struct classifier_ctx * winnow_init (rspamd_mempool_t *pool,
+ struct rspamd_classifier_config *cf);
+gboolean winnow_classify (struct classifier_ctx * ctx,
+ statfile_pool_t *pool,
+ GTree *input,
+ struct rspamd_task *task,
+ lua_State *L);
+gboolean winnow_learn (struct classifier_ctx * ctx,
+ statfile_pool_t *pool,
+ const char *symbol,
+ GTree *input,
+ gboolean in_class,
+ double *sum,
+ double multiplier,
+ GError **err);
+gboolean winnow_learn_spam (struct classifier_ctx * ctx,
+ statfile_pool_t *pool,
+ GTree *input,
+ struct rspamd_task *task,
+ gboolean is_spam,
+ lua_State *L,
+ GError **err);
+GList * winnow_weights (struct classifier_ctx * ctx,
+ statfile_pool_t *pool,
+ GTree *input,
+ struct rspamd_task *task);
+
+/* Bayes algorithm */
+struct classifier_ctx * bayes_init (rspamd_mempool_t *pool,
+ struct rspamd_classifier_config *cf);
+gboolean bayes_classify (struct classifier_ctx * ctx,
+ statfile_pool_t *pool,
+ GTree *input,
+ struct rspamd_task *task,
+ lua_State *L);
+gboolean bayes_learn (struct classifier_ctx * ctx,
+ statfile_pool_t *pool,
+ const char *symbol,
+ GTree *input,
+ gboolean in_class,
+ double *sum,
+ double multiplier,
+ GError **err);
+gboolean bayes_learn_spam (struct classifier_ctx * ctx,
+ statfile_pool_t *pool,
+ GTree *input,
+ struct rspamd_task *task,
+ gboolean is_spam,
+ lua_State *L,
+ GError **err);
+GList * bayes_weights (struct classifier_ctx * ctx,
+ statfile_pool_t *pool,
+ GTree *input,
+ struct rspamd_task *task);
+/* Array of all defined classifiers */
+extern struct classifier classifiers[];
+
+#endif
+/*
+ * vi:ts=4
+ */