summaryrefslogtreecommitdiffstats
path: root/src/tokenizers/tokenizers.c
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rambler-co.ru>2009-07-09 20:45:11 +0400
committerVsevolod Stakhov <vsevolod@rambler-co.ru>2009-07-09 20:45:11 +0400
commit2234daebbb352b444b322d43cc6c1093f0ce949c (patch)
tree320131facabccd4f5aa3eddc465bc50a707b2b00 /src/tokenizers/tokenizers.c
parent19baadf6a0e6b2554de67b674a2c6f30efda13bb (diff)
downloadrspamd-2234daebbb352b444b322d43cc6c1093f0ce949c.tar.gz
rspamd-2234daebbb352b444b322d43cc6c1093f0ce949c.zip
* Make autolearn working
Diffstat (limited to 'src/tokenizers/tokenizers.c')
-rw-r--r--src/tokenizers/tokenizers.c8
1 files changed, 3 insertions, 5 deletions
diff --git a/src/tokenizers/tokenizers.c b/src/tokenizers/tokenizers.c
index 4527e699c..7db1af12c 100644
--- a/src/tokenizers/tokenizers.c
+++ b/src/tokenizers/tokenizers.c
@@ -78,12 +78,11 @@ f_str_t *
get_next_word (f_str_t *buf, f_str_t *token)
{
size_t remain;
- char *pos;
+ unsigned char *pos;
if (buf == NULL) {
return NULL;
}
-
if (token->begin == NULL) {
token->begin = buf->begin;
}
@@ -95,15 +94,14 @@ get_next_word (f_str_t *buf, f_str_t *token)
if (remain <= 0) {
return NULL;
}
-
pos = token->begin;
/* Skip non graph symbols */
- while (remain > 0 && !g_ascii_isgraph (*pos)) {
+ while (remain > 0 && (!g_ascii_isgraph (*pos) && *pos < 127)) {
token->begin ++;
pos ++;
remain --;
}
- while (remain > 0 && g_ascii_isgraph (*pos)) {
+ while (remain > 0 && (g_ascii_isgraph (*pos) || *pos > 127)) {
token->len ++;
pos ++;
remain --;