diff options
Diffstat (limited to 'src/tokenizers/tokenizers.c')
-rw-r--r-- | src/tokenizers/tokenizers.c | 45 |
1 files changed, 45 insertions, 0 deletions
diff --git a/src/tokenizers/tokenizers.c b/src/tokenizers/tokenizers.c new file mode 100644 index 000000000..132a57ce0 --- /dev/null +++ b/src/tokenizers/tokenizers.c @@ -0,0 +1,45 @@ +/* + * Common tokenization functions + */ + +#include <sys/types.h> +#include "tokenizers.h" + +/* Get next word from specified f_str_t buf */ +f_str_t * +get_next_word (f_str_t *buf, f_str_t *token) +{ + size_t remain; + char *pos; + + if (buf == NULL) { + return NULL; + } + + if (token->begin == NULL) { + token->begin = buf->begin; + } + + remain = buf->len - (token->begin - buf->begin); + if (remain <= 0) { + return NULL; + } + + token->begin = token->begin + token->len; + token->len = 0; + + pos = token->begin; + /* Skip non graph symbols */ + while (remain-- && !g_ascii_isgraph (*pos ++)) { + token->begin ++; + } + while (remain-- && g_ascii_isgraph (*pos ++)) { + token->len ++; + } + + return token; +} + +/* + * vi:ts=4 + */ |