diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2015-07-17 15:39:46 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2015-07-17 15:39:46 +0100 |
commit | 7041327261fc00c1ad88e5dc3eb905558bc4b5d5 (patch) | |
tree | f1c88e57b35a57629bd13928b26a996543fd9ea4 | |
parent | 584ebb4ea0a09d341cea73dce2a930fe1fa1dbb6 (diff) | |
download | rspamd-7041327261fc00c1ad88e5dc3eb905558bc4b5d5.tar.gz rspamd-7041327261fc00c1ad88e5dc3eb905558bc4b5d5.zip |
Move string utilities to a separate module.
-rw-r--r-- | src/libutil/CMakeLists.txt | 1 | ||||
-rw-r--r-- | src/libutil/str_util.c | 709 | ||||
-rw-r--r-- | src/libutil/str_util.h | 135 | ||||
-rw-r--r-- | src/libutil/util.c | 684 | ||||
-rw-r--r-- | src/libutil/util.h | 96 |
5 files changed, 846 insertions, 779 deletions
diff --git a/src/libutil/CMakeLists.txt b/src/libutil/CMakeLists.txt index 29c3b2429..61e5d6d15 100644 --- a/src/libutil/CMakeLists.txt +++ b/src/libutil/CMakeLists.txt @@ -19,6 +19,7 @@ SET(LIBRSPAMDUTILSRC ${CMAKE_CURRENT_SOURCE_DIR}/rrd.c ${CMAKE_CURRENT_SOURCE_DIR}/shingles.c ${CMAKE_CURRENT_SOURCE_DIR}/sqlite_utils.c + ${CMAKE_CURRENT_SOURCE_DIR}/str_util.c ${CMAKE_CURRENT_SOURCE_DIR}/upstream.c ${CMAKE_CURRENT_SOURCE_DIR}/util.c) # Rspamdutil diff --git a/src/libutil/str_util.c b/src/libutil/str_util.c new file mode 100644 index 000000000..58105be36 --- /dev/null +++ b/src/libutil/str_util.c @@ -0,0 +1,709 @@ +/* + * Copyright (c) 2015, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +#include "config.h" +#include "util.h" +#include "mem_pool.h" +#include "xxhash.h" + +static const guchar lc_map[256] = { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, + 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, + 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, + 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, + 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, + 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, + 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, + 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, + 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, + 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, + 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, + 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, + 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, + 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, + 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, + 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff +}; + +void +rspamd_str_lc (gchar *str, guint size) +{ + guint leftover = size % 4; + guint fp, i; + const uint8_t* s = (const uint8_t*) str; + gchar *dest = str; + guchar c1, c2, c3, c4; + + fp = size - leftover; + + for (i = 0; i != fp; i += 4) { + c1 = s[i], c2 = s[i + 1], c3 = s[i + 2], c4 = s[i + 3]; + dest[0] = lc_map[c1]; + dest[1] = lc_map[c2]; + dest[2] = lc_map[c3]; + dest[3] = lc_map[c4]; + dest += 4; + } + + switch (leftover) { + case 3: + *dest++ = lc_map[(guchar)str[i++]]; + case 2: + *dest++ = lc_map[(guchar)str[i++]]; + case 1: + *dest++ = lc_map[(guchar)str[i]]; + } + +} + +/* + * The purpose of this function is fast and in place conversion of a unicode + * string to lower case, so some locale peculiarities are simply ignored + * If the target string is longer than initial one, then we just trim it + */ +void +rspamd_str_lc_utf8 (gchar *str, guint size) +{ + const gchar *s = str, *p; + gchar *d = str, tst[6]; + gint remain = size; + gint r; + gunichar uc; + + while (remain > 0) { + uc = g_utf8_get_char (s); + uc = g_unichar_tolower (uc); + p = g_utf8_next_char (s); + + if (p - s > remain) { + break; + } + + if (remain >= 6) { + r = g_unichar_to_utf8 (uc, d); + } + else { + /* We must be cautious here to avoid broken unicode being append */ + r = g_unichar_to_utf8 (uc, tst); + if (r > remain) { + break; + } + else { + memcpy (d, tst, r); + } + } + remain -= r; + s = p; + d += r; + } +} + +gboolean +rspamd_strcase_equal (gconstpointer v, gconstpointer v2) +{ + if (g_ascii_strcasecmp ((const gchar *)v, (const gchar *)v2) == 0) { + return TRUE; + } + + return FALSE; +} + +static guint +rspamd_icase_hash (const gchar *in, gsize len) +{ + guint leftover = len % 4; + guint fp, i; + const uint8_t* s = (const uint8_t*) in; + union { + struct { + guchar c1, c2, c3, c4; + } c; + guint32 pp; + } u; + XXH64_state_t st; + + fp = len - leftover; + XXH64_reset (&st, rspamd_hash_seed ()); + + for (i = 0; i != fp; i += 4) { + u.c.c1 = s[i], u.c.c2 = s[i + 1], u.c.c3 = s[i + 2], u.c.c4 = s[i + 3]; + u.c.c1 = lc_map[u.c.c1]; + u.c.c2 = lc_map[u.c.c2]; + u.c.c3 = lc_map[u.c.c3]; + u.c.c4 = lc_map[u.c.c4]; + XXH64_update (&st, &u.pp, sizeof (u)); + } + + u.pp = 0; + switch (leftover) { + case 3: + u.c.c3 = lc_map[(guchar)s[i++]]; + case 2: + u.c.c2 = lc_map[(guchar)s[i++]]; + case 1: + u.c.c1 = lc_map[(guchar)s[i]]; + XXH64_update (&st, &u.pp, leftover); + break; + } + + return XXH64_digest (&st); +} + +guint +rspamd_strcase_hash (gconstpointer key) +{ + const gchar *p = key; + gsize len; + + len = strlen (p); + + return rspamd_icase_hash (p, len); +} + +guint +rspamd_str_hash (gconstpointer key) +{ + gsize len; + + len = strlen ((const gchar *)key); + + return XXH64 (key, len, rspamd_hash_seed ()); +} + +gboolean +rspamd_str_equal (gconstpointer v, gconstpointer v2) +{ + return strcmp ((const gchar *)v, (const gchar *)v2) == 0; +} + +gboolean +rspamd_fstring_icase_equal (gconstpointer v, gconstpointer v2) +{ + const rspamd_fstring_t *f1 = v, *f2 = v2; + if (f1->len == f2->len && + g_ascii_strncasecmp (f1->begin, f2->begin, f1->len) == 0) { + return TRUE; + } + + return FALSE; +} + + +guint +rspamd_fstring_icase_hash (gconstpointer key) +{ + const rspamd_fstring_t *f = key; + + return rspamd_icase_hash (f->begin, f->len); +} + +gboolean +rspamd_gstring_icase_equal (gconstpointer v, gconstpointer v2) +{ + const GString *f1 = v, *f2 = v2; + if (f1->len == f2->len && + g_ascii_strncasecmp (f1->str, f2->str, f1->len) == 0) { + return TRUE; + } + + return FALSE; +} + + +guint +rspamd_gstring_icase_hash (gconstpointer key) +{ + const GString *f = key; + + return rspamd_icase_hash (f->str, f->len); +} + +gsize +rspamd_strlcpy (gchar *dst, const gchar *src, gsize siz) +{ + gchar *d = dst; + const gchar *s = src; + gsize n = siz; + + /* Copy as many bytes as will fit */ + if (n != 0) { + while (--n != 0) { + if ((*d++ = *s++) == '\0') { + break; + } + } + } + + if (n == 0 && siz != 0) { + *d = '\0'; + } + + return (s - src - 1); /* count does not include NUL */ +} + +gsize +rspamd_strlcpy_tolower (gchar *dst, const gchar *src, gsize siz) +{ + gchar *d = dst; + const gchar *s = src; + gsize n = siz; + + /* Copy as many bytes as will fit */ + if (n != 0) { + while (--n != 0) { + if ((*d++ = g_ascii_tolower (*s++)) == '\0') { + break; + } + } + } + + if (n == 0 && siz != 0) { + *d = '\0'; + } + + return (s - src - 1); /* count does not include NUL */ +} + + +/* + * Find the first occurrence of find in s, ignore case. + */ +gchar * +rspamd_strncasestr (const gchar *s, const gchar *find, gint len) +{ + gchar c, sc; + gsize mlen; + + if ((c = *find++) != 0) { + c = g_ascii_tolower (c); + mlen = strlen (find); + do { + do { + if ((sc = *s++) == 0 || len-- == 0) + return (NULL); + } while (g_ascii_tolower (sc) != c); + } while (g_ascii_strncasecmp (s, find, mlen) != 0); + s--; + } + return ((gchar *)s); +} + +/* + * Try to convert string of length to long + */ +gboolean +rspamd_strtol (const gchar *s, gsize len, glong *value) +{ + const gchar *p = s, *end = s + len; + gchar c; + glong v = 0; + const glong cutoff = G_MAXLONG / 10, cutlim = G_MAXLONG % 10; + gboolean neg; + + /* Case negative values */ + if (*p == '-') { + neg = TRUE; + p++; + } + else { + neg = FALSE; + } + /* Some preparations for range errors */ + + while (p < end) { + c = *p; + if (c >= '0' && c <= '9') { + c -= '0'; + if (v > cutoff || (v == cutoff && c > cutlim)) { + /* Range error */ + *value = neg ? G_MINLONG : G_MAXLONG; + return FALSE; + } + else { + v *= 10; + v += c; + } + } + else { + return FALSE; + } + p++; + } + + *value = neg ? -(v) : v; + return TRUE; +} + +/* + * Try to convert string of length to long + */ +gboolean +rspamd_strtoul (const gchar *s, gsize len, gulong *value) +{ + const gchar *p = s, *end = s + len; + gchar c; + gulong v = 0; + const gulong cutoff = G_MAXULONG / 10, cutlim = G_MAXULONG % 10; + + /* Some preparations for range errors */ + while (p < end) { + c = *p; + if (c >= '0' && c <= '9') { + c -= '0'; + if (v > cutoff || (v == cutoff && (guint8)c > cutlim)) { + /* Range error */ + *value = G_MAXULONG; + return FALSE; + } + else { + v *= 10; + v += c; + } + } + else { + return FALSE; + } + p++; + } + + *value = v; + return TRUE; +} + +/** + * Utility function to provide mem_pool copy for rspamd_hash_table_copy function + * @param data string to copy + * @param ud memory pool to use + * @return + */ +gpointer +rspamd_str_pool_copy (gconstpointer data, gpointer ud) +{ + rspamd_mempool_t *pool = ud; + + return data ? rspamd_mempool_strdup (pool, data) : NULL; +} + +/* + * We use here z-base32 encoding described here: + * http://philzimmermann.com/docs/human-oriented-base-32-encoding.txt + */ + +gchar * +rspamd_encode_base32 (const guchar *in, gsize inlen) +{ + gint remain = -1, x; + gsize i, r; + gsize allocated_len = inlen * 8 / 5 + 2; + gchar *out; + static const char b32[]="ybndrfg8ejkmcpqxot1uwisza345h769"; + + out = g_malloc (allocated_len); + for (i = 0, r = 0; i < inlen; i++) { + switch (i % 5) { + case 0: + /* 8 bits of input and 3 to remain */ + x = in[i]; + remain = in[i] >> 5; + out[r++] = b32[x & 0x1F]; + break; + case 1: + /* 11 bits of input, 1 to remain */ + x = remain | in[i] << 3; + out[r++] = b32[x & 0x1F]; + out[r++] = b32[x >> 5 & 0x1F]; + remain = x >> 10; + break; + case 2: + /* 9 bits of input, 4 to remain */ + x = remain | in[i] << 1; + out[r++] = b32[x & 0x1F]; + remain = x >> 5; + break; + case 3: + /* 12 bits of input, 2 to remain */ + x = remain | in[i] << 4; + out[r++] = b32[x & 0x1F]; + out[r++] = b32[x >> 5 & 0x1F]; + remain = x >> 10 & 0x3; + break; + case 4: + /* 10 bits of output, nothing to remain */ + x = remain | in[i] << 2; + out[r++] = b32[x & 0x1F]; + out[r++] = b32[x >> 5 & 0x1F]; + remain = -1; + break; + default: + /* Not to be happen */ + break; + } + + } + if (remain >= 0) { + out[r++] = b32[remain]; + } + + out[r] = 0; + g_assert (r < allocated_len); + + return out; +} + +static const guchar b32_dec[] = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0x12, 0xff, 0x19, 0x1a, 0x1b, 0x1e, 0x1d, + 0x07, 0x1f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0x18, 0x01, 0x0c, 0x03, 0x08, 0x05, 0x06, + 0x1c, 0x15, 0x09, 0x0a, 0xff, 0x0b, 0x02, 0x10, + 0x0d, 0x0e, 0x04, 0x16, 0x11, 0x13, 0xff, 0x14, + 0x0f, 0x00, 0x17, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0x18, 0x01, 0x0c, 0x03, 0x08, 0x05, 0x06, + 0x1c, 0x15, 0x09, 0x0a, 0xff, 0x0b, 0x02, 0x10, + 0x0d, 0x0e, 0x04, 0x16, 0x11, 0x13, 0xff, 0x14, + 0x0f, 0x00, 0x17, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff +}; + +guchar* +rspamd_decode_base32 (const gchar *in, gsize inlen, gsize *outlen) +{ + guchar *res, decoded; + guchar c; + guint acc = 0U; + guint processed_bits = 0; + gsize olen = 0, i, allocated_len = inlen * 5 / 8 + 2; + + res = g_malloc (allocated_len); + + for (i = 0; i < inlen; i ++) { + c = (guchar)in[i]; + + if (processed_bits >= 8) { + processed_bits -= 8; + res[olen++] = acc & 0xFF; + acc >>= 8; + } + + decoded = b32_dec[c]; + if (decoded == 0xff) { + g_free (res); + return NULL; + } + + acc = (decoded << processed_bits) | acc; + processed_bits += 5; + } + + if (processed_bits > 0) { + res[olen++] = (acc & 0xFF); + } + + g_assert (olen <= allocated_len); + + *outlen = olen; + + return res; +} + + +gchar * +rspamd_encode_base64 (const guchar *in, gsize inlen, gint str_len, gsize *outlen) +{ +#define CHECK_SPLIT \ + do { if (str_len > 0 && cols >= str_len) { \ + *o++ = '\r'; \ + *o++ = '\n'; \ + cols = 0; \ + } } \ +while (0) + + gsize allocated_len = (inlen / 3) * 4 + 4; + gchar *out, *o; + guint64 n; + guint32 rem, t, carry; + gint cols, shift; + static const char b64_enc[] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789+/"; + + if (str_len > 0) { + g_assert (str_len > 8); + allocated_len += (allocated_len / str_len + 1) * 2 + 1; + } + + out = g_malloc (allocated_len); + o = out; + cols = 0; + + while (inlen > 6) { + n = *(guint64 *)in; + n = GUINT64_TO_BE (n); + + if (str_len <= 0 || cols <= str_len - 8) { + *o++ = b64_enc[(n >> 58) & 0x3F]; + *o++ = b64_enc[(n >> 52) & 0x3F]; + *o++ = b64_enc[(n >> 46) & 0x3F]; + *o++ = b64_enc[(n >> 40) & 0x3F]; + *o++ = b64_enc[(n >> 34) & 0x3F]; + *o++ = b64_enc[(n >> 28) & 0x3F]; + *o++ = b64_enc[(n >> 22) & 0x3F]; + *o++ = b64_enc[(n >> 16) & 0x3F]; + cols += 8; + } + else { + cols = str_len - cols; + shift = 58; + while (cols) { + *o++ = b64_enc[(n >> shift) & 0x3F]; + shift -= 6; + cols --; + } + + *o++ = '\r'; + *o++ = '\n'; + + /* Remaining bytes */ + while (shift >= 16) { + *o++ = b64_enc[(n >> shift) & 0x3F]; + shift -= 6; + cols ++; + } + } + + in += 6; + inlen -= 6; + } + + CHECK_SPLIT; + + rem = 0; + carry = 0; + + for (;;) { + /* Padding + remaining data (0 - 2 bytes) */ + switch (rem) { + case 0: + if (inlen-- == 0) { + goto end; + } + t = *in++; + *o++ = b64_enc[t >> 2]; + carry = (t << 4) & 0x30; + rem = 1; + cols ++; + case 1: + if (inlen-- == 0) { + goto end; + } + CHECK_SPLIT; + t = *in++; + *o++ = b64_enc[carry | (t >> 4)]; + carry = (t << 2) & 0x3C; + rem = 2; + cols ++; + default: + if (inlen-- == 0) { + goto end; + } + CHECK_SPLIT; + t = *in ++; + *o++ = b64_enc[carry | (t >> 6)]; + cols ++; + CHECK_SPLIT; + *o++ = b64_enc[t & 0x3F]; + cols ++; + CHECK_SPLIT; + rem = 0; + } + } + +end: + if (rem == 1) { + *o++ = b64_enc[carry]; + cols ++; + CHECK_SPLIT; + *o++ = '='; + cols ++; + CHECK_SPLIT; + *o++ = '='; + cols ++; + CHECK_SPLIT; + } + else if (rem == 2) { + *o++ = b64_enc[carry]; + cols ++; + CHECK_SPLIT; + *o++ = '='; + cols ++; + } + + CHECK_SPLIT; + + *o = '\0'; + + if (outlen != NULL) { + *outlen = o - out; + } + + return out; +} diff --git a/src/libutil/str_util.h b/src/libutil/str_util.h new file mode 100644 index 000000000..a9c920c31 --- /dev/null +++ b/src/libutil/str_util.h @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2015, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +#ifndef SRC_LIBUTIL_STR_UTIL_H_ +#define SRC_LIBUTIL_STR_UTIL_H_ + +#include "config.h" + +/** + * Convert string to lowercase in-place using ASCII conversion + */ +void rspamd_str_lc (gchar *str, guint size); +/** + * Convert string to lowercase in-place using utf (limited) conversion + */ +void rspamd_str_lc_utf8 (gchar *str, guint size); + +/* + * Hash table utility functions for case insensitive hashing + */ +guint rspamd_strcase_hash (gconstpointer key); +gboolean rspamd_strcase_equal (gconstpointer v, gconstpointer v2); + +/* + * Hash table utility functions for case sensitive hashing + */ +guint rspamd_str_hash (gconstpointer key); +gboolean rspamd_str_equal (gconstpointer v, gconstpointer v2); + + +/* + * Hash table utility functions for hashing fixed strings + */ +guint rspamd_fstring_icase_hash (gconstpointer key); +gboolean rspamd_fstring_icase_equal (gconstpointer v, gconstpointer v2); +guint rspamd_gstring_icase_hash (gconstpointer key); +gboolean rspamd_gstring_icase_equal (gconstpointer v, gconstpointer v2); + +/** + * Copy src to dest limited to len, in compare with standart strlcpy(3) rspamd strlcpy does not + * traverse the whole string and it is possible to use it for non NULL terminated strings. This is + * more like memccpy(dst, src, size, '\0') + * + * @param dst destination string + * @param src source string + * @param siz length of destination buffer + * @return bytes copied + */ +gsize rspamd_strlcpy (gchar *dst, const gchar *src, gsize siz); + +/** + * Lowercase strlcpy variant + * @param dst + * @param src + * @param siz + * @return + */ +gsize rspamd_strlcpy_tolower (gchar *dst, const gchar *src, gsize siz); + +/* + * Find string find in string s ignoring case + */ +gchar * rspamd_strncasestr (const gchar *s, const gchar *find, gint len); + +/* + * Try to convert string of length to long + */ +gboolean rspamd_strtol (const gchar *s, gsize len, glong *value); + +/* + * Try to convert string of length to unsigned long + */ +gboolean rspamd_strtoul (const gchar *s, gsize len, gulong *value); + +/** + * Utility function to provide mem_pool copy for rspamd_hash_table_copy function + * @param data string to copy + * @param ud memory pool to use + * @return + */ +gpointer rspamd_str_pool_copy (gconstpointer data, gpointer ud); + +/** + * Encode string using base32 encoding + * @param in input + * @param inlen input length + * @return freshly allocated base32 encoding of a specified string + */ +gchar * rspamd_encode_base32 (const guchar *in, gsize inlen); + +/** + * Decode string using base32 encoding + * @param in input + * @param inlen input length + * @return freshly allocated base32 decoded value or NULL if input is invalid + */ +guchar* rspamd_decode_base32 (const gchar *in, gsize inlen, gsize *outlen); + +/** + * Encode string using base64 encoding + * @param in input + * @param inlen input length + * @param str_len maximum string length (if <= 0 then no lines are split) + * @return freshly allocated base64 encoded value or NULL if input is invalid + */ +gchar * rspamd_encode_base64 (const guchar *in, gsize inlen, gint str_len, + gsize *outlen); + +#ifndef g_tolower +# define g_tolower(x) (((x) >= 'A' && (x) <= 'Z') ? (x) - 'A' + 'a' : (x)) +#endif + +#endif /* SRC_LIBUTIL_STR_UTIL_H_ */ diff --git a/src/libutil/util.c b/src/libutil/util.c index bc58fc70a..02f1721ff 100644 --- a/src/libutil/util.c +++ b/src/libutil/util.c @@ -660,114 +660,6 @@ rspamd_pass_signal (GHashTable * workers, gint signo) g_hash_table_foreach (workers, pass_signal_cb, GINT_TO_POINTER (signo)); } -static const guchar lc_map[256] = { - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, - 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, - 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, - 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, - 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, - 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, - 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, - 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, - 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, - 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, - 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, - 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, - 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, - 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, - 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, - 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, - 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, - 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, - 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, - 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, - 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, - 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, - 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, - 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, - 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, - 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, - 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, - 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, - 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, - 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, - 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, - 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff -}; - -void -rspamd_str_lc (gchar *str, guint size) -{ - guint leftover = size % 4; - guint fp, i; - const uint8_t* s = (const uint8_t*) str; - gchar *dest = str; - guchar c1, c2, c3, c4; - - fp = size - leftover; - - for (i = 0; i != fp; i += 4) { - c1 = s[i], c2 = s[i + 1], c3 = s[i + 2], c4 = s[i + 3]; - dest[0] = lc_map[c1]; - dest[1] = lc_map[c2]; - dest[2] = lc_map[c3]; - dest[3] = lc_map[c4]; - dest += 4; - } - - switch (leftover) { - case 3: - *dest++ = lc_map[(guchar)str[i++]]; - case 2: - *dest++ = lc_map[(guchar)str[i++]]; - case 1: - *dest++ = lc_map[(guchar)str[i]]; - } - -} - -/* - * The purpose of this function is fast and in place conversion of a unicode - * string to lower case, so some locale peculiarities are simply ignored - * If the target string is longer than initial one, then we just trim it - */ -void -rspamd_str_lc_utf8 (gchar *str, guint size) -{ - const gchar *s = str, *p; - gchar *d = str, tst[6]; - gint remain = size; - gint r; - gunichar uc; - - while (remain > 0) { - uc = g_utf8_get_char (s); - uc = g_unichar_tolower (uc); - p = g_utf8_next_char (s); - - if (p - s > remain) { - break; - } - - if (remain >= 6) { - r = g_unichar_to_utf8 (uc, d); - } - else { - /* We must be cautious here to avoid broken unicode being append */ - r = g_unichar_to_utf8 (uc, tst); - if (r > remain) { - break; - } - else { - memcpy (d, tst, r); - } - } - remain -= r; - s = p; - d += r; - } -} - #ifndef HAVE_SETPROCTITLE static gchar *title_buffer = 0; @@ -1208,130 +1100,6 @@ rspamd_log_check_time (gdouble start_real, gdouble start_virtual, gint resolutio return (const gchar *)res; } -#ifndef g_tolower -# define g_tolower(x) (((x) >= 'A' && (x) <= 'Z') ? (x) - 'A' + 'a' : (x)) -#endif - - -gboolean -rspamd_strcase_equal (gconstpointer v, gconstpointer v2) -{ - if (g_ascii_strcasecmp ((const gchar *)v, (const gchar *)v2) == 0) { - return TRUE; - } - - return FALSE; -} - -static guint -rspamd_icase_hash (const gchar *in, gsize len) -{ - guint leftover = len % 4; - guint fp, i; - const uint8_t* s = (const uint8_t*) in; - union { - struct { - guchar c1, c2, c3, c4; - } c; - guint32 pp; - } u; - XXH64_state_t st; - - fp = len - leftover; - XXH64_reset (&st, rspamd_hash_seed ()); - - for (i = 0; i != fp; i += 4) { - u.c.c1 = s[i], u.c.c2 = s[i + 1], u.c.c3 = s[i + 2], u.c.c4 = s[i + 3]; - u.c.c1 = lc_map[u.c.c1]; - u.c.c2 = lc_map[u.c.c2]; - u.c.c3 = lc_map[u.c.c3]; - u.c.c4 = lc_map[u.c.c4]; - XXH64_update (&st, &u.pp, sizeof (u)); - } - - u.pp = 0; - switch (leftover) { - case 3: - u.c.c3 = lc_map[(guchar)s[i++]]; - case 2: - u.c.c2 = lc_map[(guchar)s[i++]]; - case 1: - u.c.c1 = lc_map[(guchar)s[i]]; - XXH64_update (&st, &u.pp, leftover); - break; - } - - return XXH64_digest (&st); -} - -guint -rspamd_strcase_hash (gconstpointer key) -{ - const gchar *p = key; - gsize len; - - len = strlen (p); - - return rspamd_icase_hash (p, len); -} - -guint -rspamd_str_hash (gconstpointer key) -{ - gsize len; - - len = strlen ((const gchar *)key); - - return XXH64 (key, len, rspamd_hash_seed ()); -} - -gboolean -rspamd_str_equal (gconstpointer v, gconstpointer v2) -{ - return strcmp ((const gchar *)v, (const gchar *)v2) == 0; -} - -gboolean -rspamd_fstring_icase_equal (gconstpointer v, gconstpointer v2) -{ - const rspamd_fstring_t *f1 = v, *f2 = v2; - if (f1->len == f2->len && - g_ascii_strncasecmp (f1->begin, f2->begin, f1->len) == 0) { - return TRUE; - } - - return FALSE; -} - - -guint -rspamd_fstring_icase_hash (gconstpointer key) -{ - const rspamd_fstring_t *f = key; - - return rspamd_icase_hash (f->begin, f->len); -} - -gboolean -rspamd_gstring_icase_equal (gconstpointer v, gconstpointer v2) -{ - const GString *f1 = v, *f2 = v2; - if (f1->len == f2->len && - g_ascii_strncasecmp (f1->str, f2->str, f1->len) == 0) { - return TRUE; - } - - return FALSE; -} - - -guint -rspamd_gstring_icase_hash (gconstpointer key) -{ - const GString *f = key; - - return rspamd_icase_hash (f->str, f->len); -} void gperf_profiler_init (struct rspamd_config *cfg, const gchar *descr) @@ -1494,53 +1262,6 @@ g_ptr_array_new_full (guint reserved_size, } #endif - -gsize -rspamd_strlcpy (gchar *dst, const gchar *src, gsize siz) -{ - gchar *d = dst; - const gchar *s = src; - gsize n = siz; - - /* Copy as many bytes as will fit */ - if (n != 0) { - while (--n != 0) { - if ((*d++ = *s++) == '\0') { - break; - } - } - } - - if (n == 0 && siz != 0) { - *d = '\0'; - } - - return (s - src - 1); /* count does not include NUL */ -} - -gsize -rspamd_strlcpy_tolower (gchar *dst, const gchar *src, gsize siz) -{ - gchar *d = dst; - const gchar *s = src; - gsize n = siz; - - /* Copy as many bytes as will fit */ - if (n != 0) { - while (--n != 0) { - if ((*d++ = g_ascii_tolower (*s++)) == '\0') { - break; - } - } - } - - if (n == 0 && siz != 0) { - *d = '\0'; - } - - return (s - src - 1); /* count does not include NUL */ -} - guint rspamd_url_hash (gconstpointer u) { @@ -1607,111 +1328,6 @@ rspamd_urls_cmp (gconstpointer a, gconstpointer b) return r == 0; } -/* - * Find the first occurrence of find in s, ignore case. - */ -gchar * -rspamd_strncasestr (const gchar *s, const gchar *find, gint len) -{ - gchar c, sc; - gsize mlen; - - if ((c = *find++) != 0) { - c = g_ascii_tolower (c); - mlen = strlen (find); - do { - do { - if ((sc = *s++) == 0 || len-- == 0) - return (NULL); - } while (g_ascii_tolower (sc) != c); - } while (g_ascii_strncasecmp (s, find, mlen) != 0); - s--; - } - return ((gchar *)s); -} - -/* - * Try to convert string of length to long - */ -gboolean -rspamd_strtol (const gchar *s, gsize len, glong *value) -{ - const gchar *p = s, *end = s + len; - gchar c; - glong v = 0; - const glong cutoff = G_MAXLONG / 10, cutlim = G_MAXLONG % 10; - gboolean neg; - - /* Case negative values */ - if (*p == '-') { - neg = TRUE; - p++; - } - else { - neg = FALSE; - } - /* Some preparations for range errors */ - - while (p < end) { - c = *p; - if (c >= '0' && c <= '9') { - c -= '0'; - if (v > cutoff || (v == cutoff && c > cutlim)) { - /* Range error */ - *value = neg ? G_MINLONG : G_MAXLONG; - return FALSE; - } - else { - v *= 10; - v += c; - } - } - else { - return FALSE; - } - p++; - } - - *value = neg ? -(v) : v; - return TRUE; -} - -/* - * Try to convert string of length to long - */ -gboolean -rspamd_strtoul (const gchar *s, gsize len, gulong *value) -{ - const gchar *p = s, *end = s + len; - gchar c; - gulong v = 0; - const gulong cutoff = G_MAXULONG / 10, cutlim = G_MAXULONG % 10; - - /* Some preparations for range errors */ - while (p < end) { - c = *p; - if (c >= '0' && c <= '9') { - c -= '0'; - if (v > cutoff || (v == cutoff && (guint8)c > cutlim)) { - /* Range error */ - *value = G_MAXULONG; - return FALSE; - } - else { - v *= 10; - v += c; - } - } - else { - return FALSE; - } - p++; - } - - *value = v; - return TRUE; -} - gint rspamd_fallocate (gint fd, off_t offset, off_t len) { @@ -1978,20 +1594,6 @@ rspamd_hash_table_copy (GHashTable *src, GHashTable *dst, } } -/** - * Utility function to provide mem_pool copy for rspamd_hash_table_copy function - * @param data string to copy - * @param ud memory pool to use - * @return - */ -gpointer -rspamd_str_pool_copy (gconstpointer data, gpointer ud) -{ - rspamd_mempool_t *pool = ud; - - return data ? rspamd_mempool_strdup (pool, data) : NULL; -} - static volatile sig_atomic_t saved_signo[NSIG]; static @@ -2190,292 +1792,6 @@ rspamd_ucl_emit_gstring (ucl_object_t *obj, ucl_object_emit_full (obj, emit_type, &func); } -/* - * We use here z-base32 encoding described here: - * http://philzimmermann.com/docs/human-oriented-base-32-encoding.txt - */ - -gchar * -rspamd_encode_base32 (const guchar *in, gsize inlen) -{ - gint remain = -1, x; - gsize i, r; - gsize allocated_len = inlen * 8 / 5 + 2; - gchar *out; - static const char b32[]="ybndrfg8ejkmcpqxot1uwisza345h769"; - - out = g_malloc (allocated_len); - for (i = 0, r = 0; i < inlen; i++) { - switch (i % 5) { - case 0: - /* 8 bits of input and 3 to remain */ - x = in[i]; - remain = in[i] >> 5; - out[r++] = b32[x & 0x1F]; - break; - case 1: - /* 11 bits of input, 1 to remain */ - x = remain | in[i] << 3; - out[r++] = b32[x & 0x1F]; - out[r++] = b32[x >> 5 & 0x1F]; - remain = x >> 10; - break; - case 2: - /* 9 bits of input, 4 to remain */ - x = remain | in[i] << 1; - out[r++] = b32[x & 0x1F]; - remain = x >> 5; - break; - case 3: - /* 12 bits of input, 2 to remain */ - x = remain | in[i] << 4; - out[r++] = b32[x & 0x1F]; - out[r++] = b32[x >> 5 & 0x1F]; - remain = x >> 10 & 0x3; - break; - case 4: - /* 10 bits of output, nothing to remain */ - x = remain | in[i] << 2; - out[r++] = b32[x & 0x1F]; - out[r++] = b32[x >> 5 & 0x1F]; - remain = -1; - break; - default: - /* Not to be happen */ - break; - } - - } - if (remain >= 0) { - out[r++] = b32[remain]; - } - - out[r] = 0; - g_assert (r < allocated_len); - - return out; -} - -static const guchar b32_dec[] = { - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0x12, 0xff, 0x19, 0x1a, 0x1b, 0x1e, 0x1d, - 0x07, 0x1f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0x18, 0x01, 0x0c, 0x03, 0x08, 0x05, 0x06, - 0x1c, 0x15, 0x09, 0x0a, 0xff, 0x0b, 0x02, 0x10, - 0x0d, 0x0e, 0x04, 0x16, 0x11, 0x13, 0xff, 0x14, - 0x0f, 0x00, 0x17, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0x18, 0x01, 0x0c, 0x03, 0x08, 0x05, 0x06, - 0x1c, 0x15, 0x09, 0x0a, 0xff, 0x0b, 0x02, 0x10, - 0x0d, 0x0e, 0x04, 0x16, 0x11, 0x13, 0xff, 0x14, - 0x0f, 0x00, 0x17, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff -}; - -guchar* -rspamd_decode_base32 (const gchar *in, gsize inlen, gsize *outlen) -{ - guchar *res, decoded; - guchar c; - guint acc = 0U; - guint processed_bits = 0; - gsize olen = 0, i, allocated_len = inlen * 5 / 8 + 2; - - res = g_malloc (allocated_len); - - for (i = 0; i < inlen; i ++) { - c = (guchar)in[i]; - - if (processed_bits >= 8) { - processed_bits -= 8; - res[olen++] = acc & 0xFF; - acc >>= 8; - } - - decoded = b32_dec[c]; - if (decoded == 0xff) { - g_free (res); - return NULL; - } - - acc = (decoded << processed_bits) | acc; - processed_bits += 5; - } - - if (processed_bits > 0) { - res[olen++] = (acc & 0xFF); - } - - g_assert (olen <= allocated_len); - - *outlen = olen; - - return res; -} - - -gchar * -rspamd_encode_base64 (const guchar *in, gsize inlen, gint str_len, gsize *outlen) -{ -#define CHECK_SPLIT \ - do { if (str_len > 0 && cols >= str_len) { \ - *o++ = '\r'; \ - *o++ = '\n'; \ - cols = 0; \ - } } \ -while (0) - - gsize allocated_len = (inlen / 3) * 4 + 4; - gchar *out, *o; - guint64 n; - guint32 rem, t, carry; - gint cols, shift; - static const char b64_enc[] = - "ABCDEFGHIJKLMNOPQRSTUVWXYZ" - "abcdefghijklmnopqrstuvwxyz" - "0123456789+/"; - - if (str_len > 0) { - g_assert (str_len > 8); - allocated_len += (allocated_len / str_len + 1) * 2 + 1; - } - - out = g_malloc (allocated_len); - o = out; - cols = 0; - - while (inlen > 6) { - n = *(guint64 *)in; - n = GUINT64_TO_BE (n); - - if (str_len <= 0 || cols <= str_len - 8) { - *o++ = b64_enc[(n >> 58) & 0x3F]; - *o++ = b64_enc[(n >> 52) & 0x3F]; - *o++ = b64_enc[(n >> 46) & 0x3F]; - *o++ = b64_enc[(n >> 40) & 0x3F]; - *o++ = b64_enc[(n >> 34) & 0x3F]; - *o++ = b64_enc[(n >> 28) & 0x3F]; - *o++ = b64_enc[(n >> 22) & 0x3F]; - *o++ = b64_enc[(n >> 16) & 0x3F]; - cols += 8; - } - else { - cols = str_len - cols; - shift = 58; - while (cols) { - *o++ = b64_enc[(n >> shift) & 0x3F]; - shift -= 6; - cols --; - } - - *o++ = '\r'; - *o++ = '\n'; - - /* Remaining bytes */ - while (shift >= 16) { - *o++ = b64_enc[(n >> shift) & 0x3F]; - shift -= 6; - cols ++; - } - } - - in += 6; - inlen -= 6; - } - - CHECK_SPLIT; - - rem = 0; - carry = 0; - - for (;;) { - /* Padding + remaining data (0 - 2 bytes) */ - switch (rem) { - case 0: - if (inlen-- == 0) { - goto end; - } - t = *in++; - *o++ = b64_enc[t >> 2]; - carry = (t << 4) & 0x30; - rem = 1; - cols ++; - case 1: - if (inlen-- == 0) { - goto end; - } - CHECK_SPLIT; - t = *in++; - *o++ = b64_enc[carry | (t >> 4)]; - carry = (t << 2) & 0x3C; - rem = 2; - cols ++; - default: - if (inlen-- == 0) { - goto end; - } - CHECK_SPLIT; - t = *in ++; - *o++ = b64_enc[carry | (t >> 6)]; - cols ++; - CHECK_SPLIT; - *o++ = b64_enc[t & 0x3F]; - cols ++; - CHECK_SPLIT; - rem = 0; - } - } - -end: - if (rem == 1) { - *o++ = b64_enc[carry]; - cols ++; - CHECK_SPLIT; - *o++ = '='; - cols ++; - CHECK_SPLIT; - *o++ = '='; - cols ++; - CHECK_SPLIT; - } - else if (rem == 2) { - *o++ = b64_enc[carry]; - cols ++; - CHECK_SPLIT; - *o++ = '='; - cols ++; - } - - CHECK_SPLIT; - - *o = '\0'; - - if (outlen != NULL) { - *outlen = o - out; - } - - return out; -} - gdouble rspamd_get_ticks (void) { diff --git a/src/libutil/util.h b/src/libutil/util.h index d3dcdbcf7..ee7bf01f8 100644 --- a/src/libutil/util.h +++ b/src/libutil/util.h @@ -7,6 +7,7 @@ #include "fstring.h" #include "ucl.h" #include "addr.h" +#include "str_util.h" struct rspamd_config; struct rspamd_main; @@ -105,11 +106,6 @@ void rspamd_signals_init (struct sigaction *sa, void (*sig_handler)(gint)); * Send specified signal to each worker */ void rspamd_pass_signal (GHashTable *, gint ); -/* - * Convert string to lowercase - */ -void rspamd_str_lc (gchar *str, guint size); -void rspamd_str_lc_utf8 (gchar *str, guint size); #ifndef HAVE_SETPROCTITLE /* @@ -167,27 +163,6 @@ gboolean rspamd_file_lock (gint fd, gboolean async); gboolean rspamd_file_unlock (gint fd, gboolean async); /* - * Hash table utility functions for case insensitive hashing - */ -guint rspamd_strcase_hash (gconstpointer key); -gboolean rspamd_strcase_equal (gconstpointer v, gconstpointer v2); - -/* - * Hash table utility functions for case sensitive hashing - */ -guint rspamd_str_hash (gconstpointer key); -gboolean rspamd_str_equal (gconstpointer v, gconstpointer v2); - - -/* - * Hash table utility functions for hashing fixed strings - */ -guint rspamd_fstring_icase_hash (gconstpointer key); -gboolean rspamd_fstring_icase_equal (gconstpointer v, gconstpointer v2); -guint rspamd_gstring_icase_hash (gconstpointer key); -gboolean rspamd_gstring_icase_equal (gconstpointer v, gconstpointer v2); - -/* * Google perf-tools initialization function */ void gperf_profiler_init (struct rspamd_config *cfg, const gchar *descr); @@ -203,27 +178,6 @@ void g_queue_clear (GQueue *queue); #endif -/** - * Copy src to dest limited to len, in compare with standart strlcpy(3) rspamd strlcpy does not - * traverse the whole string and it is possible to use it for non NULL terminated strings. This is - * more like memccpy(dst, src, size, '\0') - * - * @param dst destination string - * @param src source string - * @param siz length of destination buffer - * @return bytes copied - */ -gsize rspamd_strlcpy (gchar *dst, const gchar *src, gsize siz); - -/** - * Lowercase strlcpy variant - * @param dst - * @param src - * @param siz - * @return - */ -gsize rspamd_strlcpy_tolower (gchar *dst, const gchar *src, gsize siz); - /* * Convert milliseconds to timeval fields */ @@ -245,21 +199,6 @@ gboolean rspamd_emails_cmp (gconstpointer a, gconstpointer b); /* Compare two urls for building emails hash */ gboolean rspamd_urls_cmp (gconstpointer a, gconstpointer b); -/* - * Find string find in string s ignoring case - */ -gchar * rspamd_strncasestr (const gchar *s, const gchar *find, gint len); - -/* - * Try to convert string of length to long - */ -gboolean rspamd_strtol (const gchar *s, gsize len, glong *value); - -/* - * Try to convert string of length to unsigned long - */ -gboolean rspamd_strtoul (const gchar *s, gsize len, gulong *value); - /** * Try to allocate a file on filesystem (using fallocate or posix_fallocate) * @param fd descriptor @@ -385,13 +324,6 @@ void rspamd_hash_table_copy (GHashTable *src, GHashTable *dst, gpointer (*value_copy_func)(gconstpointer data, gpointer ud), gpointer ud); -/** - * Utility function to provide mem_pool copy for rspamd_hash_table_copy function - * @param data string to copy - * @param ud memory pool to use - * @return - */ -gpointer rspamd_str_pool_copy (gconstpointer data, gpointer ud); /** * Read passphrase from tty @@ -414,32 +346,6 @@ void rspamd_ucl_emit_gstring (ucl_object_t *obj, GString *target); /** - * Encode string using base32 encoding - * @param in input - * @param inlen input length - * @return freshly allocated base32 encoding of a specified string - */ -gchar * rspamd_encode_base32 (const guchar *in, gsize inlen); - -/** - * Decode string using base32 encoding - * @param in input - * @param inlen input length - * @return freshly allocated base32 decoded value or NULL if input is invalid - */ -guchar* rspamd_decode_base32 (const gchar *in, gsize inlen, gsize *outlen); - -/** - * Encode string using base64 encoding - * @param in input - * @param inlen input length - * @param str_len maximum string length (if <= 0 then no lines are split) - * @return freshly allocated base64 encoded value or NULL if input is invalid - */ -gchar * rspamd_encode_base64 (const guchar *in, gsize inlen, gint str_len, - gsize *outlen); - -/** * Portably return the current clock ticks as seconds * @return */ |