aboutsummaryrefslogtreecommitdiffstats
path: root/src/tokenizers
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rambler-co.ru>2011-06-27 18:50:29 +0400
committerVsevolod Stakhov <vsevolod@rambler-co.ru>2011-06-27 18:50:29 +0400
commit96d7668ecaa4c57633689abf25188e2a0f08bdaf (patch)
tree6ca62f2c44a4a0089ee49470357d439b0b131c0c /src/tokenizers
parenta3fa4d672341fd2f1888d3a2f2ed85ae57913b78 (diff)
downloadrspamd-96d7668ecaa4c57633689abf25188e2a0f08bdaf.tar.gz
rspamd-96d7668ecaa4c57633689abf25188e2a0f08bdaf.zip
Fix incorrect calculating of token length.
Diffstat (limited to 'src/tokenizers')
-rw-r--r--src/tokenizers/osb.c1
-rw-r--r--src/tokenizers/tokenizers.c4
2 files changed, 3 insertions, 2 deletions
diff --git a/src/tokenizers/osb.c b/src/tokenizers/osb.c
index bc57255cb..8c1b4618a 100644
--- a/src/tokenizers/osb.c
+++ b/src/tokenizers/osb.c
@@ -86,6 +86,7 @@ osb_tokenize_text (struct tokenizer *tokenizer, memory_pool_t * pool, f_str_t *
}
k ++;
token.begin = res;
+ g_assert (k < 10000);
}
return TRUE;
diff --git a/src/tokenizers/tokenizers.c b/src/tokenizers/tokenizers.c
index be73e506d..16dc763ed 100644
--- a/src/tokenizers/tokenizers.c
+++ b/src/tokenizers/tokenizers.c
@@ -148,7 +148,7 @@ get_next_word (f_str_t * buf, f_str_t * token, GList **exceptions)
if (ex != NULL && ex->pos == pos) {
/* Go to the next exception */
*exceptions = g_list_next (*exceptions);
- return p + ex->len + 1;
+ return p + ex->len;
}
pos++;
p++;
@@ -160,7 +160,7 @@ get_next_word (f_str_t * buf, f_str_t * token, GList **exceptions)
while (remain > 0 && !t_delimiters[*p]) {
if (ex != NULL && ex->pos == pos) {
*exceptions = g_list_next (*exceptions);
- return p + ex->len + 1;
+ return p + ex->len;
}
token->len++;
pos++;