aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/tokenizers/osb.c1
-rw-r--r--src/tokenizers/tokenizers.c4
2 files changed, 3 insertions, 2 deletions
diff --git a/src/tokenizers/osb.c b/src/tokenizers/osb.c
index bc57255cb..8c1b4618a 100644
--- a/src/tokenizers/osb.c
+++ b/src/tokenizers/osb.c
@@ -86,6 +86,7 @@ osb_tokenize_text (struct tokenizer *tokenizer, memory_pool_t * pool, f_str_t *
}
k ++;
token.begin = res;
+ g_assert (k < 10000);
}
return TRUE;
diff --git a/src/tokenizers/tokenizers.c b/src/tokenizers/tokenizers.c
index be73e506d..16dc763ed 100644
--- a/src/tokenizers/tokenizers.c
+++ b/src/tokenizers/tokenizers.c
@@ -148,7 +148,7 @@ get_next_word (f_str_t * buf, f_str_t * token, GList **exceptions)
if (ex != NULL && ex->pos == pos) {
/* Go to the next exception */
*exceptions = g_list_next (*exceptions);
- return p + ex->len + 1;
+ return p + ex->len;
}
pos++;
p++;
@@ -160,7 +160,7 @@ get_next_word (f_str_t * buf, f_str_t * token, GList **exceptions)
while (remain > 0 && !t_delimiters[*p]) {
if (ex != NULL && ex->pos == pos) {
*exceptions = g_list_next (*exceptions);
- return p + ex->len + 1;
+ return p + ex->len;
}
token->len++;
pos++;