aboutsummaryrefslogtreecommitdiffstats
path: root/src/libstat
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2019-08-27 18:20:59 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2019-08-27 18:20:59 +0100
commita7f5e5eb06168374f1ee25b744e4b37f1ad4c8a0 (patch)
treeafaed34b1643b8cd4c2d80b4e13aa09a092fea28 /src/libstat
parenta0da48f14fee2e3c66927111ed5a05086001196f (diff)
downloadrspamd-a7f5e5eb06168374f1ee25b744e4b37f1ad4c8a0.tar.gz
rspamd-a7f5e5eb06168374f1ee25b744e4b37f1ad4c8a0.zip
[Fix] Fix normalization of non-alphabet based languages
Diffstat (limited to 'src/libstat')
-rw-r--r--src/libstat/backends/redis_backend.c3
-rw-r--r--src/libstat/tokenizers/tokenizers.c8
2 files changed, 4 insertions, 7 deletions
diff --git a/src/libstat/backends/redis_backend.c b/src/libstat/backends/redis_backend.c
index 9ac6fb445..9dd3624fb 100644
--- a/src/libstat/backends/redis_backend.c
+++ b/src/libstat/backends/redis_backend.c
@@ -526,7 +526,8 @@ rspamd_redis_tokens_to_query (struct rspamd_task *task,
"HSET %b_tokens %b %b",
prefix, (size_t) prefix_len,
n0, (size_t) l0,
- tok->t1->stemmed.begin, tok->t1->stemmed.len);
+ tok->t1->stemmed.begin,
+ tok->t1->stemmed.len);
}
}
else {
diff --git a/src/libstat/tokenizers/tokenizers.c b/src/libstat/tokenizers/tokenizers.c
index ea3c84c67..000f2033c 100644
--- a/src/libstat/tokenizers/tokenizers.c
+++ b/src/libstat/tokenizers/tokenizers.c
@@ -679,14 +679,10 @@ rspamd_uchars_to_ucs32 (const UChar *src, gsize srclen,
}
#endif
- if (cat == U_UPPERCASE_LETTER ||
- cat == U_LOWERCASE_LETTER ||
- cat == U_DECIMAL_DIGIT_NUMBER ||
+ if ((cat >= U_UPPERCASE_LETTER && cat <= U_OTHER_NUMBER) ||
cat == U_CONNECTOR_PUNCTUATION ||
cat == U_MATH_SYMBOL ||
- cat == U_CURRENCY_SYMBOL ||
- cat == U_INITIAL_PUNCTUATION ||
- cat == U_FINAL_PUNCTUATION) {
+ cat == U_CURRENCY_SYMBOL) {
*d++ = u_tolower (t);
}
}