diff options
author | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2009-07-09 20:45:11 +0400 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2009-07-09 20:45:11 +0400 |
commit | 2234daebbb352b444b322d43cc6c1093f0ce949c (patch) | |
tree | 320131facabccd4f5aa3eddc465bc50a707b2b00 /src/tokenizers/tokenizers.c | |
parent | 19baadf6a0e6b2554de67b674a2c6f30efda13bb (diff) | |
download | rspamd-2234daebbb352b444b322d43cc6c1093f0ce949c.tar.gz rspamd-2234daebbb352b444b322d43cc6c1093f0ce949c.zip |
* Make autolearn working
Diffstat (limited to 'src/tokenizers/tokenizers.c')
-rw-r--r-- | src/tokenizers/tokenizers.c | 8 |
1 files changed, 3 insertions, 5 deletions
diff --git a/src/tokenizers/tokenizers.c b/src/tokenizers/tokenizers.c index 4527e699c..7db1af12c 100644 --- a/src/tokenizers/tokenizers.c +++ b/src/tokenizers/tokenizers.c @@ -78,12 +78,11 @@ f_str_t * get_next_word (f_str_t *buf, f_str_t *token) { size_t remain; - char *pos; + unsigned char *pos; if (buf == NULL) { return NULL; } - if (token->begin == NULL) { token->begin = buf->begin; } @@ -95,15 +94,14 @@ get_next_word (f_str_t *buf, f_str_t *token) if (remain <= 0) { return NULL; } - pos = token->begin; /* Skip non graph symbols */ - while (remain > 0 && !g_ascii_isgraph (*pos)) { + while (remain > 0 && (!g_ascii_isgraph (*pos) && *pos < 127)) { token->begin ++; pos ++; remain --; } - while (remain > 0 && g_ascii_isgraph (*pos)) { + while (remain > 0 && (g_ascii_isgraph (*pos) || *pos > 127)) { token->len ++; pos ++; remain --; |