summaryrefslogtreecommitdiffstats
path: root/src/tokenizers/tokenizers.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/tokenizers/tokenizers.c')
-rw-r--r--src/tokenizers/tokenizers.c45
1 files changed, 45 insertions, 0 deletions
diff --git a/src/tokenizers/tokenizers.c b/src/tokenizers/tokenizers.c
new file mode 100644
index 000000000..132a57ce0
--- /dev/null
+++ b/src/tokenizers/tokenizers.c
@@ -0,0 +1,45 @@
+/*
+ * Common tokenization functions
+ */
+
+#include <sys/types.h>
+#include "tokenizers.h"
+
+/* Get next word from specified f_str_t buf */
+f_str_t *
+get_next_word (f_str_t *buf, f_str_t *token)
+{
+ size_t remain;
+ char *pos;
+
+ if (buf == NULL) {
+ return NULL;
+ }
+
+ if (token->begin == NULL) {
+ token->begin = buf->begin;
+ }
+
+ remain = buf->len - (token->begin - buf->begin);
+ if (remain <= 0) {
+ return NULL;
+ }
+
+ token->begin = token->begin + token->len;
+ token->len = 0;
+
+ pos = token->begin;
+ /* Skip non graph symbols */
+ while (remain-- && !g_ascii_isgraph (*pos ++)) {
+ token->begin ++;
+ }
+ while (remain-- && g_ascii_isgraph (*pos ++)) {
+ token->len ++;
+ }
+
+ return token;
+}
+
+/*
+ * vi:ts=4
+ */