From: Vsevolod Stakhov Date: Mon, 2 Aug 2021 20:24:14 +0000 (+0100) Subject: [Minor] Allow to compare utf8 strings of different length X-Git-Tag: 3.0~60 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=79339e5d4f52643b702b207313a3230dc6a97bba;p=rspamd.git [Minor] Allow to compare utf8 strings of different length --- diff --git a/src/libutil/cxx/utf8_util.cxx b/src/libutil/cxx/utf8_util.cxx index 8b99d1f35..cf6e70fe6 100644 --- a/src/libutil/cxx/utf8_util.cxx +++ b/src/libutil/cxx/utf8_util.cxx @@ -176,18 +176,23 @@ struct rspamd_icu_collate_storage { static rspamd_icu_collate_storage collate_storage; int -rspamd_utf8_strcmp(const char *s1, const char *s2, gsize n) +rspamd_utf8_strcmp_sizes(const char *s1, gsize n1, const char *s2, gsize n2) { - if (n >= std::numeric_limits::max()) { + if (n1 >= std::numeric_limits::max() || n2 >= std::numeric_limits::max()) { /* * It's hard to say what to do here... But libicu wants int, so we fall * back to g_ascii_strcasecmp which can deal with size_t */ - return g_ascii_strncasecmp(s1, s2, n); + if (n1 == n2) { + return g_ascii_strncasecmp(s1, s2, n1); + } + else { + return n1 - n2; + } } UErrorCode success = U_ZERO_ERROR; - auto res = collate_storage.collator->compareUTF8({s1, (int) n}, {s2, (int) n}, + auto res = collate_storage.collator->compareUTF8({s1, (int) n1}, {s2, (int) n2}, success); switch (res) { @@ -201,6 +206,12 @@ rspamd_utf8_strcmp(const char *s1, const char *s2, gsize n) } } +int +rspamd_utf8_strcmp(const char *s1, const char *s2, gsize n) +{ + return rspamd_utf8_strcmp_sizes(s1, n, s2, n); +} + TEST_SUITE("utf8 utils") { TEST_CASE("utf8 normalise") { std::tuple cases[] = { diff --git a/src/libutil/cxx/utf8_util.h b/src/libutil/cxx/utf8_util.h index 28bd6a144..a9476f78d 100644 --- a/src/libutil/cxx/utf8_util.h +++ b/src/libutil/cxx/utf8_util.h @@ -59,6 +59,15 @@ enum rspamd_normalise_result rspamd_normalise_unicode_inplace(gchar *start, gsiz * @return an integer greater than, equal to, or less than 0, according as the string s1 is greater than, equal to, or less than the string s2. */ int rspamd_utf8_strcmp(const char *s1, const char *s2, gsize n); +/** + * Similar to rspamd_utf8_strcmp but accepts two sizes + * @param s1 + * @param n1 + * @param s2 + * @param n2 + * @return + */ +int rspamd_utf8_strcmp_sizes(const char *s1, gsize n1, const char *s2, gsize n2); #ifdef __cplusplus }