aboutsummaryrefslogtreecommitdiffstats
path: root/src/libutil/str_util.c
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2019-08-13 09:46:18 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2019-08-13 09:46:18 +0100
commitf1e9625920e4e9add168e30c0441a4312b23c890 (patch)
treeb272a83d3f8c6af8fec2eb139d415d1b8d672b1f /src/libutil/str_util.c
parente900e3fce155c8ad08c69b3be0668e25262b15d1 (diff)
downloadrspamd-f1e9625920e4e9add168e30c0441a4312b23c890.tar.gz
rspamd-f1e9625920e4e9add168e30c0441a4312b23c890.zip
[Minor] Rework utf8 lowercasing
Diffstat (limited to 'src/libutil/str_util.c')
-rw-r--r--src/libutil/str_util.c46
1 files changed, 19 insertions, 27 deletions
diff --git a/src/libutil/str_util.c b/src/libutil/str_util.c
index 1f2c4629f..4ce84fa65 100644
--- a/src/libutil/str_util.c
+++ b/src/libutil/str_util.c
@@ -62,7 +62,7 @@ const guchar lc_map[256] = {
0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff
};
-void
+guint
rspamd_str_lc (gchar *str, guint size)
{
guint leftover = size % 4;
@@ -93,6 +93,7 @@ rspamd_str_lc (gchar *str, guint size)
*dest = lc_map[(guchar)str[i]];
}
+ return size;
}
gint
@@ -144,42 +145,33 @@ rspamd_lc_cmp (const gchar *s, const gchar *d, gsize l)
* string to lower case, so some locale peculiarities are simply ignored
* If the target string is longer than initial one, then we just trim it
*/
-void
+guint
rspamd_str_lc_utf8 (gchar *str, guint size)
{
- const gchar *s = str, *p;
- gchar *d = str, tst[6];
- gint remain = size;
- gint r;
- gunichar uc;
+ guchar *d = (guchar *)str, tst[6];
+ gint32 i = 0, prev = 0;
+ UChar32 uc;
- while (remain > 0) {
- p = g_utf8_next_char (s);
+ while (i < size) {
+ prev = i;
- if (p - s > remain) {
- break;
- }
+ U8_NEXT ((guint8*)str, i, size, uc);
+ uc = u_tolower (uc);
- uc = g_utf8_get_char (s);
- uc = g_unichar_tolower (uc);
+ gint32 olen = 0;
+ U8_APPEND_UNSAFE (tst, olen, uc);
- if (remain >= 6) {
- r = g_unichar_to_utf8 (uc, d);
+ if (olen <= (i - prev)) {
+ memcpy (d, tst, olen);
+ d += olen;
}
else {
- /* We must be cautious here to avoid broken unicode being append */
- r = g_unichar_to_utf8 (uc, tst);
- if (r > remain) {
- break;
- }
- else {
- memcpy (d, tst, r);
- }
+ /* Lowercasing has increased the length, so we need to ignore it */
+ d += i - prev;
}
- remain -= r;
- s = p;
- d += r;
}
+
+ return d - (guchar *)str;
}
gboolean