diff options
Diffstat (limited to 'src/libstat/stat_api.h')
-rw-r--r-- | src/libstat/stat_api.h | 81 |
1 files changed, 57 insertions, 24 deletions
diff --git a/src/libstat/stat_api.h b/src/libstat/stat_api.h index f28922588..aa6111a8b 100644 --- a/src/libstat/stat_api.h +++ b/src/libstat/stat_api.h @@ -20,6 +20,7 @@ #include "task.h" #include "lua/lua_common.h" #include "contrib/libev/ev.h" +#include "libserver/word.h" #ifdef __cplusplus extern "C" { @@ -30,36 +31,14 @@ extern "C" { * High level statistics API */ -#define RSPAMD_STAT_TOKEN_FLAG_TEXT (1u << 0) -#define RSPAMD_STAT_TOKEN_FLAG_META (1u << 1) -#define RSPAMD_STAT_TOKEN_FLAG_LUA_META (1u << 2) -#define RSPAMD_STAT_TOKEN_FLAG_EXCEPTION (1u << 3) -#define RSPAMD_STAT_TOKEN_FLAG_HEADER (1u << 4) -#define RSPAMD_STAT_TOKEN_FLAG_UNIGRAM (1u << 5) -#define RSPAMD_STAT_TOKEN_FLAG_UTF (1u << 6) -#define RSPAMD_STAT_TOKEN_FLAG_NORMALISED (1u << 7) -#define RSPAMD_STAT_TOKEN_FLAG_STEMMED (1u << 8) -#define RSPAMD_STAT_TOKEN_FLAG_BROKEN_UNICODE (1u << 9) -#define RSPAMD_STAT_TOKEN_FLAG_STOP_WORD (1u << 10) -#define RSPAMD_STAT_TOKEN_FLAG_SKIPPED (1u << 11) -#define RSPAMD_STAT_TOKEN_FLAG_INVISIBLE_SPACES (1u << 12) -#define RSPAMD_STAT_TOKEN_FLAG_EMOJI (1u << 13) - -typedef struct rspamd_stat_token_s { - rspamd_ftok_t original; /* utf8 raw */ - rspamd_ftok_unicode_t unicode; /* array of unicode characters, normalized, lowercased */ - rspamd_ftok_t normalized; /* normalized and lowercased utf8 */ - rspamd_ftok_t stemmed; /* stemmed utf8 */ - unsigned int flags; -} rspamd_stat_token_t; #define RSPAMD_TOKEN_VALUE_TYPE float typedef struct token_node_s { uint64_t data; unsigned int window_idx; unsigned int flags; - rspamd_stat_token_t *t1; - rspamd_stat_token_t *t2; + rspamd_word_t *t1; + rspamd_word_t *t2; RSPAMD_TOKEN_VALUE_TYPE values[0]; } rspamd_token_t; @@ -129,6 +108,23 @@ rspamd_stat_result_t rspamd_stat_learn(struct rspamd_task *task, GError **err); /** + * Learn task as a specific class, task must be processed prior to this call + * @param task task to learn + * @param class_name name of the class to learn (e.g., "spam", "ham", "transactional") + * @param L lua state + * @param classifier NULL to learn all classifiers, name to learn a specific one + * @param stage learning stage + * @param err error returned + * @return TRUE if task has been learned + */ +rspamd_stat_result_t rspamd_stat_learn_class(struct rspamd_task *task, + const char *class_name, + lua_State *L, + const char *classifier, + unsigned int stage, + GError **err); + +/** * Get the overall statistics for all statfile backends * @param cfg configuration * @param total_learns the total number of learns is stored here @@ -141,6 +137,43 @@ rspamd_stat_result_t rspamd_stat_statistics(struct rspamd_task *task, void rspamd_stat_unload(void); +/** + * Multi-class classification result structure + */ +typedef struct { + char **class_names; /**< Array of class names */ + double *probabilities; /**< Array of probabilities for each class */ + unsigned int num_classes; /**< Number of classes */ + const char *winning_class; /**< Name of the winning class (reference, not owned) */ + double confidence; /**< Confidence of the winning class */ +} rspamd_multiclass_result_t; + +/** + * Set multi-class classification result for a task + */ +void rspamd_task_set_multiclass_result(struct rspamd_task *task, + rspamd_multiclass_result_t *result); + +/** + * Get multi-class classification result from a task + */ +rspamd_multiclass_result_t *rspamd_task_get_multiclass_result(struct rspamd_task *task); + +/** + * Free multi-class result structure + */ +void rspamd_multiclass_result_free(rspamd_multiclass_result_t *result); + +/** + * Set autolearn class for a task + */ +void rspamd_task_set_autolearn_class(struct rspamd_task *task, const char *class_name); + +/** + * Get autolearn class from a task + */ +const char *rspamd_task_get_autolearn_class(struct rspamd_task *task); + #ifdef __cplusplus } #endif |