aboutsummaryrefslogtreecommitdiffstats
path: root/src/tokenizers
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rambler-co.ru>2009-01-19 17:01:08 +0300
committerVsevolod Stakhov <vsevolod@rambler-co.ru>2009-01-19 17:01:08 +0300
commit87c9659fdd08bbbc0eb796afccf7237a03181498 (patch)
tree9d18d2064ac00b566c48eda1c1b58a31f8c7dd72 /src/tokenizers
parentfe5ad5874aad220fb12a259e607f89ce5fae7465 (diff)
downloadrspamd-87c9659fdd08bbbc0eb796afccf7237a03181498.tar.gz
rspamd-87c9659fdd08bbbc0eb796afccf7237a03181498.zip
* Rewrite perl client for rspamd, now it allows access to both normal and control interfaces
* Fix small errors in tokenizer and controller interface
Diffstat (limited to 'src/tokenizers')
-rw-r--r--src/tokenizers/osb.c1
-rw-r--r--src/tokenizers/tokenizers.c6
2 files changed, 5 insertions, 2 deletions
diff --git a/src/tokenizers/osb.c b/src/tokenizers/osb.c
index afd2febd8..451644675 100644
--- a/src/tokenizers/osb.c
+++ b/src/tokenizers/osb.c
@@ -43,6 +43,7 @@ osb_tokenize_text (struct tokenizer *tokenizer, memory_pool_t *pool, f_str_t *in
hashpipe[i] = hashpipe[i - 1];
}
hashpipe[0] = fstrhash (&token);
+ msg_debug ("osb_tokenize_text: text token %s, hash: %d", fstrcstr (&token, pool), hashpipe[0]);
for (i = 1; i < FEATURE_WINDOW_SIZE; i ++) {
h1 = hashpipe[0]* primes[0] + hashpipe[i] * primes[i<<1];
diff --git a/src/tokenizers/tokenizers.c b/src/tokenizers/tokenizers.c
index 280ebd477..f0481e00d 100644
--- a/src/tokenizers/tokenizers.c
+++ b/src/tokenizers/tokenizers.c
@@ -60,11 +60,13 @@ get_next_word (f_str_t *buf, f_str_t *token)
pos = token->begin;
/* Skip non graph symbols */
- while (remain-- && !g_ascii_isgraph (*pos ++)) {
+ while (remain-- && !g_ascii_isgraph (*pos)) {
token->begin ++;
+ pos ++;
}
- while (remain-- && g_ascii_isgraph (*pos ++)) {
+ while (remain-- && g_ascii_isgraph (*pos)) {
token->len ++;
+ pos ++;
}
if (token->len == 0) {