You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

tokenizers.h 1.0KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344
  1. #ifndef TOKENIZERS_H
  2. #define TOKENIZERS_H
  3. #include <sys/types.h>
  4. #include "../config.h"
  5. #ifdef HAVE_STDINT_H
  6. #include <stdint.h>
  7. #endif
  8. #include "../mem_pool.h"
  9. #include "../fstring.h"
  10. #include "../main.h"
  11. /* Size for features pipe */
  12. #define FEATURE_WINDOW_SIZE 5
  13. typedef struct token_list_s {
  14. uint32_t h1;
  15. uint32_t h2;
  16. struct token_list_s *next;
  17. } token_list_t;
  18. /* Common tokenizer structure */
  19. struct tokenizer {
  20. char *name;
  21. token_list_t* (*tokenize_func)(struct tokenizer *tokenizer, memory_pool_t *pool, f_str_t *input);
  22. f_str_t* (*get_next_word)(f_str_t *buf, f_str_t *token);
  23. };
  24. /* Get tokenizer structure by name or return NULL if this name is not found */
  25. struct tokenizer* get_tokenizer (char *name);
  26. /* Get next word from specified f_str_t buf */
  27. f_str_t *get_next_word (f_str_t *buf, f_str_t *token);
  28. /* OSB tokenize function */
  29. token_list_t* osb_tokenize_text (struct tokenizer *tokenizer, memory_pool_t *pool, f_str_t *input);
  30. /* Array of all defined tokenizers */
  31. extern struct tokenizer tokenizers[];
  32. #endif
  33. /*
  34. * vi:ts=4
  35. */