summaryrefslogtreecommitdiffstats
path: root/src/libutil
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2021-08-02 21:24:14 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2021-08-02 21:24:14 +0100
commit79339e5d4f52643b702b207313a3230dc6a97bba (patch)
treeda11a77305355a490cf6c8656fe8c5a0435c71d1 /src/libutil
parent06ba232b45946fc52c5d812551ac50c2343e3b99 (diff)
downloadrspamd-79339e5d4f52643b702b207313a3230dc6a97bba.tar.gz
rspamd-79339e5d4f52643b702b207313a3230dc6a97bba.zip
[Minor] Allow to compare utf8 strings of different length
Diffstat (limited to 'src/libutil')
-rw-r--r--src/libutil/cxx/utf8_util.cxx19
-rw-r--r--src/libutil/cxx/utf8_util.h9
2 files changed, 24 insertions, 4 deletions
diff --git a/src/libutil/cxx/utf8_util.cxx b/src/libutil/cxx/utf8_util.cxx
index 8b99d1f35..cf6e70fe6 100644
--- a/src/libutil/cxx/utf8_util.cxx
+++ b/src/libutil/cxx/utf8_util.cxx
@@ -176,18 +176,23 @@ struct rspamd_icu_collate_storage {
static rspamd_icu_collate_storage collate_storage;
int
-rspamd_utf8_strcmp(const char *s1, const char *s2, gsize n)
+rspamd_utf8_strcmp_sizes(const char *s1, gsize n1, const char *s2, gsize n2)
{
- if (n >= std::numeric_limits<int>::max()) {
+ if (n1 >= std::numeric_limits<int>::max() || n2 >= std::numeric_limits<int>::max()) {
/*
* It's hard to say what to do here... But libicu wants int, so we fall
* back to g_ascii_strcasecmp which can deal with size_t
*/
- return g_ascii_strncasecmp(s1, s2, n);
+ if (n1 == n2) {
+ return g_ascii_strncasecmp(s1, s2, n1);
+ }
+ else {
+ return n1 - n2;
+ }
}
UErrorCode success = U_ZERO_ERROR;
- auto res = collate_storage.collator->compareUTF8({s1, (int) n}, {s2, (int) n},
+ auto res = collate_storage.collator->compareUTF8({s1, (int) n1}, {s2, (int) n2},
success);
switch (res) {
@@ -201,6 +206,12 @@ rspamd_utf8_strcmp(const char *s1, const char *s2, gsize n)
}
}
+int
+rspamd_utf8_strcmp(const char *s1, const char *s2, gsize n)
+{
+ return rspamd_utf8_strcmp_sizes(s1, n, s2, n);
+}
+
TEST_SUITE("utf8 utils") {
TEST_CASE("utf8 normalise") {
std::tuple<const char *, const char *, int> cases[] = {
diff --git a/src/libutil/cxx/utf8_util.h b/src/libutil/cxx/utf8_util.h
index 28bd6a144..a9476f78d 100644
--- a/src/libutil/cxx/utf8_util.h
+++ b/src/libutil/cxx/utf8_util.h
@@ -59,6 +59,15 @@ enum rspamd_normalise_result rspamd_normalise_unicode_inplace(gchar *start, gsiz
* @return an integer greater than, equal to, or less than 0, according as the string s1 is greater than, equal to, or less than the string s2.
*/
int rspamd_utf8_strcmp(const char *s1, const char *s2, gsize n);
+/**
+ * Similar to rspamd_utf8_strcmp but accepts two sizes
+ * @param s1
+ * @param n1
+ * @param s2
+ * @param n2
+ * @return
+ */
+int rspamd_utf8_strcmp_sizes(const char *s1, gsize n1, const char *s2, gsize n2);
#ifdef __cplusplus
}