From: Vsevolod Stakhov Date: Wed, 7 Oct 2015 11:26:38 +0000 (+0100) Subject: Generalize lowercase comparision. X-Git-Tag: 1.0.5~66^2~18 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=3b9d4282aea747995442cbebcc6b4908c6b52049;p=rspamd.git Generalize lowercase comparision. --- diff --git a/src/libutil/fstring.c b/src/libutil/fstring.c index d59ccaa4d..232b0021c 100644 --- a/src/libutil/fstring.c +++ b/src/libutil/fstring.c @@ -23,6 +23,7 @@ */ #include "fstring.h" +#include "str_util.h" static const gsize default_initial_size = 48; /* Maximum size when we double the size of new string */ @@ -257,56 +258,6 @@ rspamd_fstring_equal (const rspamd_fstring_t *s1, return FALSE; } -extern const guchar lc_map[256]; - -static gint -rspamd_fstring_lc_cmp (const gchar *s, const gchar *d, gsize l) -{ - guint fp, i; - guchar c1, c2, c3, c4; - union { - guchar c[4]; - guint32 n; - } cmp1, cmp2; - gsize leftover = l % 4; - gint ret = 0; - - fp = l - leftover; - - for (i = 0; i != fp; i += 4) { - c1 = s[i], c2 = s[i + 1], c3 = s[i + 2], c4 = s[i + 3]; - cmp1.c[0] = lc_map[c1]; - cmp1.c[1] = lc_map[c2]; - cmp1.c[2] = lc_map[c3]; - cmp1.c[3] = lc_map[c4]; - - c1 = d[i], c2 = d[i + 1], c3 = d[i + 2], c4 = d[i + 3]; - cmp2.c[0] = lc_map[c1]; - cmp2.c[1] = lc_map[c2]; - cmp2.c[2] = lc_map[c3]; - cmp2.c[3] = lc_map[c4]; - - if (cmp1.n != cmp2.n) { - return cmp1.n - cmp2.n; - } - - s += 4; - d += 4; - } - - while (leftover > 0) { - if (g_ascii_tolower (*s) != g_ascii_tolower (*d)) { - return (*s) - (*d); - } - - leftover--; - s++; - d++; - } - - return ret; -} - gint rspamd_fstring_casecmp (const rspamd_fstring_t *s1, const rspamd_fstring_t *s2) @@ -316,7 +267,7 @@ rspamd_fstring_casecmp (const rspamd_fstring_t *s1, g_assert (s1 != NULL && s2 != NULL); if (s1->len == s2->len) { - ret = rspamd_fstring_lc_cmp (s1->str, s2->str, s1->len); + ret = rspamd_lc_cmp (s1->str, s2->str, s1->len); } else { ret = s1->len - s2->len; @@ -347,7 +298,7 @@ rspamd_ftok_casecmp (const rspamd_ftok_t *s1, g_assert (s1 != NULL && s2 != NULL); if (s1->len == s2->len) { - ret = rspamd_fstring_lc_cmp (s1->begin, s2->begin, s1->len); + ret = rspamd_lc_cmp (s1->begin, s2->begin, s1->len); } else { ret = s1->len - s2->len; diff --git a/src/libutil/str_util.c b/src/libutil/str_util.c index f68b975a4..a2093e484 100644 --- a/src/libutil/str_util.c +++ b/src/libutil/str_util.c @@ -28,7 +28,7 @@ #include "mem_pool.h" #include "xxhash.h" -const guchar lc_map[256] = { +static const guchar lc_map[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, @@ -94,6 +94,54 @@ rspamd_str_lc (gchar *str, guint size) } +gint +rspamd_lc_cmp (const gchar *s, const gchar *d, gsize l) +{ + guint fp, i; + guchar c1, c2, c3, c4; + union { + guchar c[4]; + guint32 n; + } cmp1, cmp2; + gsize leftover = l % 4; + gint ret = 0; + + fp = l - leftover; + + for (i = 0; i != fp; i += 4) { + c1 = s[i], c2 = s[i + 1], c3 = s[i + 2], c4 = s[i + 3]; + cmp1.c[0] = lc_map[c1]; + cmp1.c[1] = lc_map[c2]; + cmp1.c[2] = lc_map[c3]; + cmp1.c[3] = lc_map[c4]; + + c1 = d[i], c2 = d[i + 1], c3 = d[i + 2], c4 = d[i + 3]; + cmp2.c[0] = lc_map[c1]; + cmp2.c[1] = lc_map[c2]; + cmp2.c[2] = lc_map[c3]; + cmp2.c[3] = lc_map[c4]; + + if (cmp1.n != cmp2.n) { + return cmp1.n - cmp2.n; + } + + s += 4; + d += 4; + } + + while (leftover > 0) { + if (g_ascii_tolower (*s) != g_ascii_tolower (*d)) { + return (*s) - (*d); + } + + leftover--; + s++; + d++; + } + + return ret; +} + /* * The purpose of this function is fast and in place conversion of a unicode * string to lower case, so some locale peculiarities are simply ignored diff --git a/src/libutil/str_util.h b/src/libutil/str_util.h index fdadc4811..801bcc109 100644 --- a/src/libutil/str_util.h +++ b/src/libutil/str_util.h @@ -28,6 +28,11 @@ #include "config.h" +/** + * Compare two memory regions of size `l` using case insensitive matching + */ +gint rspamd_lc_cmp (const gchar *s, const gchar *d, gsize l); + /** * Convert string to lowercase in-place using ASCII conversion */