aboutsummaryrefslogtreecommitdiffstats
path: root/src/libstat/tokenizers
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2019-07-08 15:22:05 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2019-07-08 15:22:05 +0100
commitc271eb36656a4ff88a9c8c1d59934949260275a3 (patch)
tree3dd586e38b5b37cfebc7c12b6ff6f434904167c7 /src/libstat/tokenizers
parentb266445f47dec06392a7e058f499325fa3c052b9 (diff)
downloadrspamd-c271eb36656a4ff88a9c8c1d59934949260275a3.tar.gz
rspamd-c271eb36656a4ff88a9c8c1d59934949260275a3.zip
[Rework] Add C++ guards to all headers
Diffstat (limited to 'src/libstat/tokenizers')
-rw-r--r--src/libstat/tokenizers/tokenizers.h43
1 files changed, 27 insertions, 16 deletions
diff --git a/src/libstat/tokenizers/tokenizers.h b/src/libstat/tokenizers/tokenizers.h
index 784426d31..bf4987c7a 100644
--- a/src/libstat/tokenizers/tokenizers.h
+++ b/src/libstat/tokenizers/tokenizers.h
@@ -11,20 +11,26 @@
#define RSPAMD_DEFAULT_TOKENIZER "osb"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
struct rspamd_tokenizer_runtime;
struct rspamd_stat_ctx;
/* Common tokenizer structure */
struct rspamd_stat_tokenizer {
gchar *name;
+
gpointer (*get_config) (rspamd_mempool_t *pool,
struct rspamd_tokenizer_config *cf, gsize *len);
- gint (*tokenize_func)(struct rspamd_stat_ctx *ctx,
- struct rspamd_task *task,
- GArray *words,
- gboolean is_utf,
- const gchar *prefix,
- GPtrArray *result);
+
+ gint (*tokenize_func) (struct rspamd_stat_ctx *ctx,
+ struct rspamd_task *task,
+ GArray *words,
+ gboolean is_utf,
+ const gchar *prefix,
+ GPtrArray *result);
};
enum rspamd_tokenize_type {
@@ -38,13 +44,13 @@ gint token_node_compare_func (gconstpointer a, gconstpointer b);
/* Tokenize text into array of words (rspamd_stat_token_t type) */
-GArray * rspamd_tokenize_text (const gchar *text, gsize len,
- const UText *utxt,
- enum rspamd_tokenize_type how,
- struct rspamd_config *cfg,
- GList *exceptions,
- guint64 *hash,
- GArray *cur_words);
+GArray *rspamd_tokenize_text (const gchar *text, gsize len,
+ const UText *utxt,
+ enum rspamd_tokenize_type how,
+ struct rspamd_config *cfg,
+ GList *exceptions,
+ guint64 *hash,
+ GArray *cur_words);
/* OSB tokenize function */
gint rspamd_tokenizer_osb (struct rspamd_stat_ctx *ctx,
@@ -59,14 +65,19 @@ gpointer rspamd_tokenizer_osb_get_config (rspamd_mempool_t *pool,
gsize *len);
struct rspamd_lang_detector;
+
void rspamd_normalize_single_word (rspamd_stat_token_t *tok, rspamd_mempool_t *pool);
+
void rspamd_normalize_words (GArray *words, rspamd_mempool_t *pool);
+
void rspamd_stem_words (GArray *words, rspamd_mempool_t *pool,
const gchar *language,
struct rspamd_lang_detector *d);
void rspamd_tokenize_meta_words (struct rspamd_task *task);
+
+#ifdef __cplusplus
+}
+#endif
+
#endif
-/*
- * vi:ts=4
- */