diff options
author | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2009-01-21 17:25:06 +0300 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2009-01-21 17:25:06 +0300 |
commit | 1dc0f6ad2c2e97e11881a7e1b0a4142e65f50898 (patch) | |
tree | f0a714e2e87ebd50f6016c8cc7f2a8e03a9cc2d8 /src/tokenizers/tokenizers.c | |
parent | 87c9659fdd08bbbc0eb796afccf7237a03181498 (diff) | |
download | rspamd-1dc0f6ad2c2e97e11881a7e1b0a4142e65f50898.tar.gz rspamd-1dc0f6ad2c2e97e11881a7e1b0a4142e65f50898.zip |
* Rewrite message parser
* Change mime parts storage
* Add html tags striping (ported from php code)
* Rework learning to process only text and striped html parts
Diffstat (limited to 'src/tokenizers/tokenizers.c')
-rw-r--r-- | src/tokenizers/tokenizers.c | 6 |
1 files changed, 4 insertions, 2 deletions
diff --git a/src/tokenizers/tokenizers.c b/src/tokenizers/tokenizers.c index f0481e00d..6c92f9a97 100644 --- a/src/tokenizers/tokenizers.c +++ b/src/tokenizers/tokenizers.c @@ -60,13 +60,15 @@ get_next_word (f_str_t *buf, f_str_t *token) pos = token->begin; /* Skip non graph symbols */ - while (remain-- && !g_ascii_isgraph (*pos)) { + while (remain > 0 && !g_ascii_isgraph (*pos)) { token->begin ++; pos ++; + remain --; } - while (remain-- && g_ascii_isgraph (*pos)) { + while (remain > 0 && g_ascii_isgraph (*pos)) { token->len ++; pos ++; + remain --; } if (token->len == 0) { |