aboutsummaryrefslogtreecommitdiffstats
path: root/src/tokenizers/tokenizers.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/tokenizers/tokenizers.h')
-rw-r--r--src/tokenizers/tokenizers.h15
1 files changed, 15 insertions, 0 deletions
diff --git a/src/tokenizers/tokenizers.h b/src/tokenizers/tokenizers.h
index 6b4bff5e0..96a2027a5 100644
--- a/src/tokenizers/tokenizers.h
+++ b/src/tokenizers/tokenizers.h
@@ -20,8 +20,23 @@ typedef struct token_list_s {
struct token_list_s *next;
} token_list_t;
+
+/* Common tokenizer structure */
+struct tokenizer {
+ char *name;
+ token_list_t* (*tokenize_func)(struct tokenizer *tokenizer, memory_pool_t *pool, f_str_t *input);
+ f_str_t* (*get_next_word)(f_str_t *buf, f_str_t *token);
+};
+
+/* Get tokenizer structure by name or return NULL if this name is not found */
+struct tokenizer* get_tokenizer (char *name);
/* Get next word from specified f_str_t buf */
f_str_t *get_next_word (f_str_t *buf, f_str_t *token);
+/* OSB tokenize function */
+token_list_t* osb_tokenize_text (struct tokenizer *tokenizer, memory_pool_t *pool, f_str_t *input);
+
+/* Array of all defined tokenizers */
+extern struct tokenizer tokenizers[];
#endif
/*