summaryrefslogtreecommitdiffstats
path: root/src/libstat/tokenizers/tokenizers.h
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2015-01-23 13:50:17 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2015-01-23 13:50:17 +0000
commit8a05515078bc8fd3d642778fcae0d005a38ec7b0 (patch)
tree239d86bbbb5ba6bafdfa6aeaa41e848dcdc46079 /src/libstat/tokenizers/tokenizers.h
parent8f5509c65dc6907a7581518246a200236088423c (diff)
downloadrspamd-8a05515078bc8fd3d642778fcae0d005a38ec7b0.tar.gz
rspamd-8a05515078bc8fd3d642778fcae0d005a38ec7b0.zip
Reorganize libstat API.
Diffstat (limited to 'src/libstat/tokenizers/tokenizers.h')
-rw-r--r--src/libstat/tokenizers/tokenizers.h49
1 files changed, 49 insertions, 0 deletions
diff --git a/src/libstat/tokenizers/tokenizers.h b/src/libstat/tokenizers/tokenizers.h
new file mode 100644
index 000000000..8ee11cea1
--- /dev/null
+++ b/src/libstat/tokenizers/tokenizers.h
@@ -0,0 +1,49 @@
+#ifndef TOKENIZERS_H
+#define TOKENIZERS_H
+
+#include "config.h"
+#include "mem_pool.h"
+#include "fstring.h"
+#include "main.h"
+#include "stat_api.h"
+
+/* Common tokenizer structure */
+struct tokenizer {
+ gchar *name;
+ gint (*tokenize_func)(struct tokenizer *tokenizer,
+ rspamd_mempool_t *pool,
+ GArray *words,
+ GTree **cur,
+ gboolean save_token,
+ gboolean is_utf,
+ GList *exceptions);
+ gchar * (*get_next_word)(rspamd_fstring_t *buf, rspamd_fstring_t *token, GList **exceptions);
+};
+
+/* Compare two token nodes */
+int token_node_compare_func (gconstpointer a, gconstpointer b);
+
+/* Get next word from specified f_str_t buf */
+gchar * rspamd_tokenizer_get_word (rspamd_fstring_t *buf,
+ rspamd_fstring_t *token, GList **exceptions);
+
+/* Tokenize text into array of words (rspamd_fstring_t type) */
+GArray * rspamd_tokenize_text (gchar *text, gsize len, gboolean is_utf,
+ gsize min_len, GList **exceptions);
+
+/* OSB tokenize function */
+int osb_tokenize_text (struct tokenizer *tokenizer,
+ rspamd_mempool_t *pool,
+ GArray *input,
+ GTree **cur,
+ gboolean save_token,
+ gboolean is_utf,
+ GList *exceptions);
+
+/* Make tokens for a subject */
+void tokenize_subject (struct rspamd_task *task, GTree ** tree);
+
+#endif
+/*
+ * vi:ts=4
+ */