From 96d7668ecaa4c57633689abf25188e2a0f08bdaf Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Mon, 27 Jun 2011 18:50:29 +0400 Subject: [PATCH] Fix incorrect calculating of token length. --- src/tokenizers/osb.c | 1 + src/tokenizers/tokenizers.c | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/tokenizers/osb.c b/src/tokenizers/osb.c index bc57255cb..8c1b4618a 100644 --- a/src/tokenizers/osb.c +++ b/src/tokenizers/osb.c @@ -86,6 +86,7 @@ osb_tokenize_text (struct tokenizer *tokenizer, memory_pool_t * pool, f_str_t * } k ++; token.begin = res; + g_assert (k < 10000); } return TRUE; diff --git a/src/tokenizers/tokenizers.c b/src/tokenizers/tokenizers.c index be73e506d..16dc763ed 100644 --- a/src/tokenizers/tokenizers.c +++ b/src/tokenizers/tokenizers.c @@ -148,7 +148,7 @@ get_next_word (f_str_t * buf, f_str_t * token, GList **exceptions) if (ex != NULL && ex->pos == pos) { /* Go to the next exception */ *exceptions = g_list_next (*exceptions); - return p + ex->len + 1; + return p + ex->len; } pos++; p++; @@ -160,7 +160,7 @@ get_next_word (f_str_t * buf, f_str_t * token, GList **exceptions) while (remain > 0 && !t_delimiters[*p]) { if (ex != NULL && ex->pos == pos) { *exceptions = g_list_next (*exceptions); - return p + ex->len + 1; + return p + ex->len; } token->len++; pos++; -- 2.39.5