]> source.dussan.org Git - rspamd.git/commitdiff
[Fix] Fix normalization of non-alphabet based languages
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 27 Aug 2019 17:20:59 +0000 (18:20 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Wed, 28 Aug 2019 09:05:24 +0000 (10:05 +0100)
src/libstat/backends/redis_backend.c
src/libstat/tokenizers/tokenizers.c

index d54767c129f4a166bb4191148384a42e39e9c1c1..baeb2308d8bea6a317e1ad8f3bf4c668943f7b99 100644 (file)
@@ -527,7 +527,8 @@ rspamd_redis_tokens_to_query (struct rspamd_task *task,
                                                                "HSET %b_tokens %b %b",
                                                                prefix, (size_t) prefix_len,
                                                                n0, (size_t) l0,
-                                                               tok->t1->stemmed.begin, tok->t1->stemmed.len);
+                                                               tok->t1->stemmed.begin,
+                                                               tok->t1->stemmed.len);
                                        }
                                }
                                else {
index f69378f9b99abb132c5b9923c523c3cc4b361a5a..ffa1af9db1f77e7135c48d4f34038bb05b9a56cc 100644 (file)
@@ -627,14 +627,10 @@ rspamd_uchars_to_ucs32 (const UChar *src, gsize srclen,
                        }
 #endif
 
-                       if (cat == U_UPPERCASE_LETTER ||
-                                       cat == U_LOWERCASE_LETTER ||
-                                       cat == U_DECIMAL_DIGIT_NUMBER ||
+                       if ((cat >= U_UPPERCASE_LETTER && cat <= U_OTHER_NUMBER) ||
                                        cat == U_CONNECTOR_PUNCTUATION ||
                                        cat == U_MATH_SYMBOL ||
-                                       cat == U_CURRENCY_SYMBOL ||
-                                       cat == U_INITIAL_PUNCTUATION ||
-                                       cat == U_FINAL_PUNCTUATION) {
+                                       cat == U_CURRENCY_SYMBOL) {
                                *d++ = u_tolower (t);
                        }
                }