aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2015-07-17 15:39:46 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2015-07-17 15:39:46 +0100
commit7041327261fc00c1ad88e5dc3eb905558bc4b5d5 (patch)
treef1c88e57b35a57629bd13928b26a996543fd9ea4 /src
parent584ebb4ea0a09d341cea73dce2a930fe1fa1dbb6 (diff)
downloadrspamd-7041327261fc00c1ad88e5dc3eb905558bc4b5d5.tar.gz
rspamd-7041327261fc00c1ad88e5dc3eb905558bc4b5d5.zip
Move string utilities to a separate module.
Diffstat (limited to 'src')
-rw-r--r--src/libutil/CMakeLists.txt1
-rw-r--r--src/libutil/str_util.c709
-rw-r--r--src/libutil/str_util.h135
-rw-r--r--src/libutil/util.c684
-rw-r--r--src/libutil/util.h96
5 files changed, 846 insertions, 779 deletions
diff --git a/src/libutil/CMakeLists.txt b/src/libutil/CMakeLists.txt
index 29c3b2429..61e5d6d15 100644
--- a/src/libutil/CMakeLists.txt
+++ b/src/libutil/CMakeLists.txt
@@ -19,6 +19,7 @@ SET(LIBRSPAMDUTILSRC
${CMAKE_CURRENT_SOURCE_DIR}/rrd.c
${CMAKE_CURRENT_SOURCE_DIR}/shingles.c
${CMAKE_CURRENT_SOURCE_DIR}/sqlite_utils.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/str_util.c
${CMAKE_CURRENT_SOURCE_DIR}/upstream.c
${CMAKE_CURRENT_SOURCE_DIR}/util.c)
# Rspamdutil
diff --git a/src/libutil/str_util.c b/src/libutil/str_util.c
new file mode 100644
index 000000000..58105be36
--- /dev/null
+++ b/src/libutil/str_util.c
@@ -0,0 +1,709 @@
+/*
+ * Copyright (c) 2015, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include "config.h"
+#include "util.h"
+#include "mem_pool.h"
+#include "xxhash.h"
+
+static const guchar lc_map[256] = {
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
+ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
+ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
+ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
+ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
+ 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
+ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
+ 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
+ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
+ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
+ 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
+ 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
+ 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
+ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
+ 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
+ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
+ 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
+ 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
+ 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
+ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
+ 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
+ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
+ 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff
+};
+
+void
+rspamd_str_lc (gchar *str, guint size)
+{
+ guint leftover = size % 4;
+ guint fp, i;
+ const uint8_t* s = (const uint8_t*) str;
+ gchar *dest = str;
+ guchar c1, c2, c3, c4;
+
+ fp = size - leftover;
+
+ for (i = 0; i != fp; i += 4) {
+ c1 = s[i], c2 = s[i + 1], c3 = s[i + 2], c4 = s[i + 3];
+ dest[0] = lc_map[c1];
+ dest[1] = lc_map[c2];
+ dest[2] = lc_map[c3];
+ dest[3] = lc_map[c4];
+ dest += 4;
+ }
+
+ switch (leftover) {
+ case 3:
+ *dest++ = lc_map[(guchar)str[i++]];
+ case 2:
+ *dest++ = lc_map[(guchar)str[i++]];
+ case 1:
+ *dest++ = lc_map[(guchar)str[i]];
+ }
+
+}
+
+/*
+ * The purpose of this function is fast and in place conversion of a unicode
+ * string to lower case, so some locale peculiarities are simply ignored
+ * If the target string is longer than initial one, then we just trim it
+ */
+void
+rspamd_str_lc_utf8 (gchar *str, guint size)
+{
+ const gchar *s = str, *p;
+ gchar *d = str, tst[6];
+ gint remain = size;
+ gint r;
+ gunichar uc;
+
+ while (remain > 0) {
+ uc = g_utf8_get_char (s);
+ uc = g_unichar_tolower (uc);
+ p = g_utf8_next_char (s);
+
+ if (p - s > remain) {
+ break;
+ }
+
+ if (remain >= 6) {
+ r = g_unichar_to_utf8 (uc, d);
+ }
+ else {
+ /* We must be cautious here to avoid broken unicode being append */
+ r = g_unichar_to_utf8 (uc, tst);
+ if (r > remain) {
+ break;
+ }
+ else {
+ memcpy (d, tst, r);
+ }
+ }
+ remain -= r;
+ s = p;
+ d += r;
+ }
+}
+
+gboolean
+rspamd_strcase_equal (gconstpointer v, gconstpointer v2)
+{
+ if (g_ascii_strcasecmp ((const gchar *)v, (const gchar *)v2) == 0) {
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+static guint
+rspamd_icase_hash (const gchar *in, gsize len)
+{
+ guint leftover = len % 4;
+ guint fp, i;
+ const uint8_t* s = (const uint8_t*) in;
+ union {
+ struct {
+ guchar c1, c2, c3, c4;
+ } c;
+ guint32 pp;
+ } u;
+ XXH64_state_t st;
+
+ fp = len - leftover;
+ XXH64_reset (&st, rspamd_hash_seed ());
+
+ for (i = 0; i != fp; i += 4) {
+ u.c.c1 = s[i], u.c.c2 = s[i + 1], u.c.c3 = s[i + 2], u.c.c4 = s[i + 3];
+ u.c.c1 = lc_map[u.c.c1];
+ u.c.c2 = lc_map[u.c.c2];
+ u.c.c3 = lc_map[u.c.c3];
+ u.c.c4 = lc_map[u.c.c4];
+ XXH64_update (&st, &u.pp, sizeof (u));
+ }
+
+ u.pp = 0;
+ switch (leftover) {
+ case 3:
+ u.c.c3 = lc_map[(guchar)s[i++]];
+ case 2:
+ u.c.c2 = lc_map[(guchar)s[i++]];
+ case 1:
+ u.c.c1 = lc_map[(guchar)s[i]];
+ XXH64_update (&st, &u.pp, leftover);
+ break;
+ }
+
+ return XXH64_digest (&st);
+}
+
+guint
+rspamd_strcase_hash (gconstpointer key)
+{
+ const gchar *p = key;
+ gsize len;
+
+ len = strlen (p);
+
+ return rspamd_icase_hash (p, len);
+}
+
+guint
+rspamd_str_hash (gconstpointer key)
+{
+ gsize len;
+
+ len = strlen ((const gchar *)key);
+
+ return XXH64 (key, len, rspamd_hash_seed ());
+}
+
+gboolean
+rspamd_str_equal (gconstpointer v, gconstpointer v2)
+{
+ return strcmp ((const gchar *)v, (const gchar *)v2) == 0;
+}
+
+gboolean
+rspamd_fstring_icase_equal (gconstpointer v, gconstpointer v2)
+{
+ const rspamd_fstring_t *f1 = v, *f2 = v2;
+ if (f1->len == f2->len &&
+ g_ascii_strncasecmp (f1->begin, f2->begin, f1->len) == 0) {
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+
+guint
+rspamd_fstring_icase_hash (gconstpointer key)
+{
+ const rspamd_fstring_t *f = key;
+
+ return rspamd_icase_hash (f->begin, f->len);
+}
+
+gboolean
+rspamd_gstring_icase_equal (gconstpointer v, gconstpointer v2)
+{
+ const GString *f1 = v, *f2 = v2;
+ if (f1->len == f2->len &&
+ g_ascii_strncasecmp (f1->str, f2->str, f1->len) == 0) {
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+
+guint
+rspamd_gstring_icase_hash (gconstpointer key)
+{
+ const GString *f = key;
+
+ return rspamd_icase_hash (f->str, f->len);
+}
+
+gsize
+rspamd_strlcpy (gchar *dst, const gchar *src, gsize siz)
+{
+ gchar *d = dst;
+ const gchar *s = src;
+ gsize n = siz;
+
+ /* Copy as many bytes as will fit */
+ if (n != 0) {
+ while (--n != 0) {
+ if ((*d++ = *s++) == '\0') {
+ break;
+ }
+ }
+ }
+
+ if (n == 0 && siz != 0) {
+ *d = '\0';
+ }
+
+ return (s - src - 1); /* count does not include NUL */
+}
+
+gsize
+rspamd_strlcpy_tolower (gchar *dst, const gchar *src, gsize siz)
+{
+ gchar *d = dst;
+ const gchar *s = src;
+ gsize n = siz;
+
+ /* Copy as many bytes as will fit */
+ if (n != 0) {
+ while (--n != 0) {
+ if ((*d++ = g_ascii_tolower (*s++)) == '\0') {
+ break;
+ }
+ }
+ }
+
+ if (n == 0 && siz != 0) {
+ *d = '\0';
+ }
+
+ return (s - src - 1); /* count does not include NUL */
+}
+
+
+/*
+ * Find the first occurrence of find in s, ignore case.
+ */
+gchar *
+rspamd_strncasestr (const gchar *s, const gchar *find, gint len)
+{
+ gchar c, sc;
+ gsize mlen;
+
+ if ((c = *find++) != 0) {
+ c = g_ascii_tolower (c);
+ mlen = strlen (find);
+ do {
+ do {
+ if ((sc = *s++) == 0 || len-- == 0)
+ return (NULL);
+ } while (g_ascii_tolower (sc) != c);
+ } while (g_ascii_strncasecmp (s, find, mlen) != 0);
+ s--;
+ }
+ return ((gchar *)s);
+}
+
+/*
+ * Try to convert string of length to long
+ */
+gboolean
+rspamd_strtol (const gchar *s, gsize len, glong *value)
+{
+ const gchar *p = s, *end = s + len;
+ gchar c;
+ glong v = 0;
+ const glong cutoff = G_MAXLONG / 10, cutlim = G_MAXLONG % 10;
+ gboolean neg;
+
+ /* Case negative values */
+ if (*p == '-') {
+ neg = TRUE;
+ p++;
+ }
+ else {
+ neg = FALSE;
+ }
+ /* Some preparations for range errors */
+
+ while (p < end) {
+ c = *p;
+ if (c >= '0' && c <= '9') {
+ c -= '0';
+ if (v > cutoff || (v == cutoff && c > cutlim)) {
+ /* Range error */
+ *value = neg ? G_MINLONG : G_MAXLONG;
+ return FALSE;
+ }
+ else {
+ v *= 10;
+ v += c;
+ }
+ }
+ else {
+ return FALSE;
+ }
+ p++;
+ }
+
+ *value = neg ? -(v) : v;
+ return TRUE;
+}
+
+/*
+ * Try to convert string of length to long
+ */
+gboolean
+rspamd_strtoul (const gchar *s, gsize len, gulong *value)
+{
+ const gchar *p = s, *end = s + len;
+ gchar c;
+ gulong v = 0;
+ const gulong cutoff = G_MAXULONG / 10, cutlim = G_MAXULONG % 10;
+
+ /* Some preparations for range errors */
+ while (p < end) {
+ c = *p;
+ if (c >= '0' && c <= '9') {
+ c -= '0';
+ if (v > cutoff || (v == cutoff && (guint8)c > cutlim)) {
+ /* Range error */
+ *value = G_MAXULONG;
+ return FALSE;
+ }
+ else {
+ v *= 10;
+ v += c;
+ }
+ }
+ else {
+ return FALSE;
+ }
+ p++;
+ }
+
+ *value = v;
+ return TRUE;
+}
+
+/**
+ * Utility function to provide mem_pool copy for rspamd_hash_table_copy function
+ * @param data string to copy
+ * @param ud memory pool to use
+ * @return
+ */
+gpointer
+rspamd_str_pool_copy (gconstpointer data, gpointer ud)
+{
+ rspamd_mempool_t *pool = ud;
+
+ return data ? rspamd_mempool_strdup (pool, data) : NULL;
+}
+
+/*
+ * We use here z-base32 encoding described here:
+ * http://philzimmermann.com/docs/human-oriented-base-32-encoding.txt
+ */
+
+gchar *
+rspamd_encode_base32 (const guchar *in, gsize inlen)
+{
+ gint remain = -1, x;
+ gsize i, r;
+ gsize allocated_len = inlen * 8 / 5 + 2;
+ gchar *out;
+ static const char b32[]="ybndrfg8ejkmcpqxot1uwisza345h769";
+
+ out = g_malloc (allocated_len);
+ for (i = 0, r = 0; i < inlen; i++) {
+ switch (i % 5) {
+ case 0:
+ /* 8 bits of input and 3 to remain */
+ x = in[i];
+ remain = in[i] >> 5;
+ out[r++] = b32[x & 0x1F];
+ break;
+ case 1:
+ /* 11 bits of input, 1 to remain */
+ x = remain | in[i] << 3;
+ out[r++] = b32[x & 0x1F];
+ out[r++] = b32[x >> 5 & 0x1F];
+ remain = x >> 10;
+ break;
+ case 2:
+ /* 9 bits of input, 4 to remain */
+ x = remain | in[i] << 1;
+ out[r++] = b32[x & 0x1F];
+ remain = x >> 5;
+ break;
+ case 3:
+ /* 12 bits of input, 2 to remain */
+ x = remain | in[i] << 4;
+ out[r++] = b32[x & 0x1F];
+ out[r++] = b32[x >> 5 & 0x1F];
+ remain = x >> 10 & 0x3;
+ break;
+ case 4:
+ /* 10 bits of output, nothing to remain */
+ x = remain | in[i] << 2;
+ out[r++] = b32[x & 0x1F];
+ out[r++] = b32[x >> 5 & 0x1F];
+ remain = -1;
+ break;
+ default:
+ /* Not to be happen */
+ break;
+ }
+
+ }
+ if (remain >= 0) {
+ out[r++] = b32[remain];
+ }
+
+ out[r] = 0;
+ g_assert (r < allocated_len);
+
+ return out;
+}
+
+static const guchar b32_dec[] = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0x12, 0xff, 0x19, 0x1a, 0x1b, 0x1e, 0x1d,
+ 0x07, 0x1f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0x18, 0x01, 0x0c, 0x03, 0x08, 0x05, 0x06,
+ 0x1c, 0x15, 0x09, 0x0a, 0xff, 0x0b, 0x02, 0x10,
+ 0x0d, 0x0e, 0x04, 0x16, 0x11, 0x13, 0xff, 0x14,
+ 0x0f, 0x00, 0x17, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0x18, 0x01, 0x0c, 0x03, 0x08, 0x05, 0x06,
+ 0x1c, 0x15, 0x09, 0x0a, 0xff, 0x0b, 0x02, 0x10,
+ 0x0d, 0x0e, 0x04, 0x16, 0x11, 0x13, 0xff, 0x14,
+ 0x0f, 0x00, 0x17, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+};
+
+guchar*
+rspamd_decode_base32 (const gchar *in, gsize inlen, gsize *outlen)
+{
+ guchar *res, decoded;
+ guchar c;
+ guint acc = 0U;
+ guint processed_bits = 0;
+ gsize olen = 0, i, allocated_len = inlen * 5 / 8 + 2;
+
+ res = g_malloc (allocated_len);
+
+ for (i = 0; i < inlen; i ++) {
+ c = (guchar)in[i];
+
+ if (processed_bits >= 8) {
+ processed_bits -= 8;
+ res[olen++] = acc & 0xFF;
+ acc >>= 8;
+ }
+
+ decoded = b32_dec[c];
+ if (decoded == 0xff) {
+ g_free (res);
+ return NULL;
+ }
+
+ acc = (decoded << processed_bits) | acc;
+ processed_bits += 5;
+ }
+
+ if (processed_bits > 0) {
+ res[olen++] = (acc & 0xFF);
+ }
+
+ g_assert (olen <= allocated_len);
+
+ *outlen = olen;
+
+ return res;
+}
+
+
+gchar *
+rspamd_encode_base64 (const guchar *in, gsize inlen, gint str_len, gsize *outlen)
+{
+#define CHECK_SPLIT \
+ do { if (str_len > 0 && cols >= str_len) { \
+ *o++ = '\r'; \
+ *o++ = '\n'; \
+ cols = 0; \
+ } } \
+while (0)
+
+ gsize allocated_len = (inlen / 3) * 4 + 4;
+ gchar *out, *o;
+ guint64 n;
+ guint32 rem, t, carry;
+ gint cols, shift;
+ static const char b64_enc[] =
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "abcdefghijklmnopqrstuvwxyz"
+ "0123456789+/";
+
+ if (str_len > 0) {
+ g_assert (str_len > 8);
+ allocated_len += (allocated_len / str_len + 1) * 2 + 1;
+ }
+
+ out = g_malloc (allocated_len);
+ o = out;
+ cols = 0;
+
+ while (inlen > 6) {
+ n = *(guint64 *)in;
+ n = GUINT64_TO_BE (n);
+
+ if (str_len <= 0 || cols <= str_len - 8) {
+ *o++ = b64_enc[(n >> 58) & 0x3F];
+ *o++ = b64_enc[(n >> 52) & 0x3F];
+ *o++ = b64_enc[(n >> 46) & 0x3F];
+ *o++ = b64_enc[(n >> 40) & 0x3F];
+ *o++ = b64_enc[(n >> 34) & 0x3F];
+ *o++ = b64_enc[(n >> 28) & 0x3F];
+ *o++ = b64_enc[(n >> 22) & 0x3F];
+ *o++ = b64_enc[(n >> 16) & 0x3F];
+ cols += 8;
+ }
+ else {
+ cols = str_len - cols;
+ shift = 58;
+ while (cols) {
+ *o++ = b64_enc[(n >> shift) & 0x3F];
+ shift -= 6;
+ cols --;
+ }
+
+ *o++ = '\r';
+ *o++ = '\n';
+
+ /* Remaining bytes */
+ while (shift >= 16) {
+ *o++ = b64_enc[(n >> shift) & 0x3F];
+ shift -= 6;
+ cols ++;
+ }
+ }
+
+ in += 6;
+ inlen -= 6;
+ }
+
+ CHECK_SPLIT;
+
+ rem = 0;
+ carry = 0;
+
+ for (;;) {
+ /* Padding + remaining data (0 - 2 bytes) */
+ switch (rem) {
+ case 0:
+ if (inlen-- == 0) {
+ goto end;
+ }
+ t = *in++;
+ *o++ = b64_enc[t >> 2];
+ carry = (t << 4) & 0x30;
+ rem = 1;
+ cols ++;
+ case 1:
+ if (inlen-- == 0) {
+ goto end;
+ }
+ CHECK_SPLIT;
+ t = *in++;
+ *o++ = b64_enc[carry | (t >> 4)];
+ carry = (t << 2) & 0x3C;
+ rem = 2;
+ cols ++;
+ default:
+ if (inlen-- == 0) {
+ goto end;
+ }
+ CHECK_SPLIT;
+ t = *in ++;
+ *o++ = b64_enc[carry | (t >> 6)];
+ cols ++;
+ CHECK_SPLIT;
+ *o++ = b64_enc[t & 0x3F];
+ cols ++;
+ CHECK_SPLIT;
+ rem = 0;
+ }
+ }
+
+end:
+ if (rem == 1) {
+ *o++ = b64_enc[carry];
+ cols ++;
+ CHECK_SPLIT;
+ *o++ = '=';
+ cols ++;
+ CHECK_SPLIT;
+ *o++ = '=';
+ cols ++;
+ CHECK_SPLIT;
+ }
+ else if (rem == 2) {
+ *o++ = b64_enc[carry];
+ cols ++;
+ CHECK_SPLIT;
+ *o++ = '=';
+ cols ++;
+ }
+
+ CHECK_SPLIT;
+
+ *o = '\0';
+
+ if (outlen != NULL) {
+ *outlen = o - out;
+ }
+
+ return out;
+}
diff --git a/src/libutil/str_util.h b/src/libutil/str_util.h
new file mode 100644
index 000000000..a9c920c31
--- /dev/null
+++ b/src/libutil/str_util.h
@@ -0,0 +1,135 @@
+/*
+ * Copyright (c) 2015, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#ifndef SRC_LIBUTIL_STR_UTIL_H_
+#define SRC_LIBUTIL_STR_UTIL_H_
+
+#include "config.h"
+
+/**
+ * Convert string to lowercase in-place using ASCII conversion
+ */
+void rspamd_str_lc (gchar *str, guint size);
+/**
+ * Convert string to lowercase in-place using utf (limited) conversion
+ */
+void rspamd_str_lc_utf8 (gchar *str, guint size);
+
+/*
+ * Hash table utility functions for case insensitive hashing
+ */
+guint rspamd_strcase_hash (gconstpointer key);
+gboolean rspamd_strcase_equal (gconstpointer v, gconstpointer v2);
+
+/*
+ * Hash table utility functions for case sensitive hashing
+ */
+guint rspamd_str_hash (gconstpointer key);
+gboolean rspamd_str_equal (gconstpointer v, gconstpointer v2);
+
+
+/*
+ * Hash table utility functions for hashing fixed strings
+ */
+guint rspamd_fstring_icase_hash (gconstpointer key);
+gboolean rspamd_fstring_icase_equal (gconstpointer v, gconstpointer v2);
+guint rspamd_gstring_icase_hash (gconstpointer key);
+gboolean rspamd_gstring_icase_equal (gconstpointer v, gconstpointer v2);
+
+/**
+ * Copy src to dest limited to len, in compare with standart strlcpy(3) rspamd strlcpy does not
+ * traverse the whole string and it is possible to use it for non NULL terminated strings. This is
+ * more like memccpy(dst, src, size, '\0')
+ *
+ * @param dst destination string
+ * @param src source string
+ * @param siz length of destination buffer
+ * @return bytes copied
+ */
+gsize rspamd_strlcpy (gchar *dst, const gchar *src, gsize siz);
+
+/**
+ * Lowercase strlcpy variant
+ * @param dst
+ * @param src
+ * @param siz
+ * @return
+ */
+gsize rspamd_strlcpy_tolower (gchar *dst, const gchar *src, gsize siz);
+
+/*
+ * Find string find in string s ignoring case
+ */
+gchar * rspamd_strncasestr (const gchar *s, const gchar *find, gint len);
+
+/*
+ * Try to convert string of length to long
+ */
+gboolean rspamd_strtol (const gchar *s, gsize len, glong *value);
+
+/*
+ * Try to convert string of length to unsigned long
+ */
+gboolean rspamd_strtoul (const gchar *s, gsize len, gulong *value);
+
+/**
+ * Utility function to provide mem_pool copy for rspamd_hash_table_copy function
+ * @param data string to copy
+ * @param ud memory pool to use
+ * @return
+ */
+gpointer rspamd_str_pool_copy (gconstpointer data, gpointer ud);
+
+/**
+ * Encode string using base32 encoding
+ * @param in input
+ * @param inlen input length
+ * @return freshly allocated base32 encoding of a specified string
+ */
+gchar * rspamd_encode_base32 (const guchar *in, gsize inlen);
+
+/**
+ * Decode string using base32 encoding
+ * @param in input
+ * @param inlen input length
+ * @return freshly allocated base32 decoded value or NULL if input is invalid
+ */
+guchar* rspamd_decode_base32 (const gchar *in, gsize inlen, gsize *outlen);
+
+/**
+ * Encode string using base64 encoding
+ * @param in input
+ * @param inlen input length
+ * @param str_len maximum string length (if <= 0 then no lines are split)
+ * @return freshly allocated base64 encoded value or NULL if input is invalid
+ */
+gchar * rspamd_encode_base64 (const guchar *in, gsize inlen, gint str_len,
+ gsize *outlen);
+
+#ifndef g_tolower
+# define g_tolower(x) (((x) >= 'A' && (x) <= 'Z') ? (x) - 'A' + 'a' : (x))
+#endif
+
+#endif /* SRC_LIBUTIL_STR_UTIL_H_ */
diff --git a/src/libutil/util.c b/src/libutil/util.c
index bc58fc70a..02f1721ff 100644
--- a/src/libutil/util.c
+++ b/src/libutil/util.c
@@ -660,114 +660,6 @@ rspamd_pass_signal (GHashTable * workers, gint signo)
g_hash_table_foreach (workers, pass_signal_cb, GINT_TO_POINTER (signo));
}
-static const guchar lc_map[256] = {
- 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
- 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
- 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
- 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
- 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
- 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
- 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
- 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
- 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
- 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
- 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
- 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
- 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
- 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
- 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
- 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
- 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
- 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
- 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
- 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
- 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
- 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
- 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
- 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
- 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
- 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
- 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
- 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
- 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
- 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
- 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
- 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff
-};
-
-void
-rspamd_str_lc (gchar *str, guint size)
-{
- guint leftover = size % 4;
- guint fp, i;
- const uint8_t* s = (const uint8_t*) str;
- gchar *dest = str;
- guchar c1, c2, c3, c4;
-
- fp = size - leftover;
-
- for (i = 0; i != fp; i += 4) {
- c1 = s[i], c2 = s[i + 1], c3 = s[i + 2], c4 = s[i + 3];
- dest[0] = lc_map[c1];
- dest[1] = lc_map[c2];
- dest[2] = lc_map[c3];
- dest[3] = lc_map[c4];
- dest += 4;
- }
-
- switch (leftover) {
- case 3:
- *dest++ = lc_map[(guchar)str[i++]];
- case 2:
- *dest++ = lc_map[(guchar)str[i++]];
- case 1:
- *dest++ = lc_map[(guchar)str[i]];
- }
-
-}
-
-/*
- * The purpose of this function is fast and in place conversion of a unicode
- * string to lower case, so some locale peculiarities are simply ignored
- * If the target string is longer than initial one, then we just trim it
- */
-void
-rspamd_str_lc_utf8 (gchar *str, guint size)
-{
- const gchar *s = str, *p;
- gchar *d = str, tst[6];
- gint remain = size;
- gint r;
- gunichar uc;
-
- while (remain > 0) {
- uc = g_utf8_get_char (s);
- uc = g_unichar_tolower (uc);
- p = g_utf8_next_char (s);
-
- if (p - s > remain) {
- break;
- }
-
- if (remain >= 6) {
- r = g_unichar_to_utf8 (uc, d);
- }
- else {
- /* We must be cautious here to avoid broken unicode being append */
- r = g_unichar_to_utf8 (uc, tst);
- if (r > remain) {
- break;
- }
- else {
- memcpy (d, tst, r);
- }
- }
- remain -= r;
- s = p;
- d += r;
- }
-}
-
#ifndef HAVE_SETPROCTITLE
static gchar *title_buffer = 0;
@@ -1208,130 +1100,6 @@ rspamd_log_check_time (gdouble start_real, gdouble start_virtual, gint resolutio
return (const gchar *)res;
}
-#ifndef g_tolower
-# define g_tolower(x) (((x) >= 'A' && (x) <= 'Z') ? (x) - 'A' + 'a' : (x))
-#endif
-
-
-gboolean
-rspamd_strcase_equal (gconstpointer v, gconstpointer v2)
-{
- if (g_ascii_strcasecmp ((const gchar *)v, (const gchar *)v2) == 0) {
- return TRUE;
- }
-
- return FALSE;
-}
-
-static guint
-rspamd_icase_hash (const gchar *in, gsize len)
-{
- guint leftover = len % 4;
- guint fp, i;
- const uint8_t* s = (const uint8_t*) in;
- union {
- struct {
- guchar c1, c2, c3, c4;
- } c;
- guint32 pp;
- } u;
- XXH64_state_t st;
-
- fp = len - leftover;
- XXH64_reset (&st, rspamd_hash_seed ());
-
- for (i = 0; i != fp; i += 4) {
- u.c.c1 = s[i], u.c.c2 = s[i + 1], u.c.c3 = s[i + 2], u.c.c4 = s[i + 3];
- u.c.c1 = lc_map[u.c.c1];
- u.c.c2 = lc_map[u.c.c2];
- u.c.c3 = lc_map[u.c.c3];
- u.c.c4 = lc_map[u.c.c4];
- XXH64_update (&st, &u.pp, sizeof (u));
- }
-
- u.pp = 0;
- switch (leftover) {
- case 3:
- u.c.c3 = lc_map[(guchar)s[i++]];
- case 2:
- u.c.c2 = lc_map[(guchar)s[i++]];
- case 1:
- u.c.c1 = lc_map[(guchar)s[i]];
- XXH64_update (&st, &u.pp, leftover);
- break;
- }
-
- return XXH64_digest (&st);
-}
-
-guint
-rspamd_strcase_hash (gconstpointer key)
-{
- const gchar *p = key;
- gsize len;
-
- len = strlen (p);
-
- return rspamd_icase_hash (p, len);
-}
-
-guint
-rspamd_str_hash (gconstpointer key)
-{
- gsize len;
-
- len = strlen ((const gchar *)key);
-
- return XXH64 (key, len, rspamd_hash_seed ());
-}
-
-gboolean
-rspamd_str_equal (gconstpointer v, gconstpointer v2)
-{
- return strcmp ((const gchar *)v, (const gchar *)v2) == 0;
-}
-
-gboolean
-rspamd_fstring_icase_equal (gconstpointer v, gconstpointer v2)
-{
- const rspamd_fstring_t *f1 = v, *f2 = v2;
- if (f1->len == f2->len &&
- g_ascii_strncasecmp (f1->begin, f2->begin, f1->len) == 0) {
- return TRUE;
- }
-
- return FALSE;
-}
-
-
-guint
-rspamd_fstring_icase_hash (gconstpointer key)
-{
- const rspamd_fstring_t *f = key;
-
- return rspamd_icase_hash (f->begin, f->len);
-}
-
-gboolean
-rspamd_gstring_icase_equal (gconstpointer v, gconstpointer v2)
-{
- const GString *f1 = v, *f2 = v2;
- if (f1->len == f2->len &&
- g_ascii_strncasecmp (f1->str, f2->str, f1->len) == 0) {
- return TRUE;
- }
-
- return FALSE;
-}
-
-
-guint
-rspamd_gstring_icase_hash (gconstpointer key)
-{
- const GString *f = key;
-
- return rspamd_icase_hash (f->str, f->len);
-}
void
gperf_profiler_init (struct rspamd_config *cfg, const gchar *descr)
@@ -1494,53 +1262,6 @@ g_ptr_array_new_full (guint reserved_size,
}
#endif
-
-gsize
-rspamd_strlcpy (gchar *dst, const gchar *src, gsize siz)
-{
- gchar *d = dst;
- const gchar *s = src;
- gsize n = siz;
-
- /* Copy as many bytes as will fit */
- if (n != 0) {
- while (--n != 0) {
- if ((*d++ = *s++) == '\0') {
- break;
- }
- }
- }
-
- if (n == 0 && siz != 0) {
- *d = '\0';
- }
-
- return (s - src - 1); /* count does not include NUL */
-}
-
-gsize
-rspamd_strlcpy_tolower (gchar *dst, const gchar *src, gsize siz)
-{
- gchar *d = dst;
- const gchar *s = src;
- gsize n = siz;
-
- /* Copy as many bytes as will fit */
- if (n != 0) {
- while (--n != 0) {
- if ((*d++ = g_ascii_tolower (*s++)) == '\0') {
- break;
- }
- }
- }
-
- if (n == 0 && siz != 0) {
- *d = '\0';
- }
-
- return (s - src - 1); /* count does not include NUL */
-}
-
guint
rspamd_url_hash (gconstpointer u)
{
@@ -1607,111 +1328,6 @@ rspamd_urls_cmp (gconstpointer a, gconstpointer b)
return r == 0;
}
-/*
- * Find the first occurrence of find in s, ignore case.
- */
-gchar *
-rspamd_strncasestr (const gchar *s, const gchar *find, gint len)
-{
- gchar c, sc;
- gsize mlen;
-
- if ((c = *find++) != 0) {
- c = g_ascii_tolower (c);
- mlen = strlen (find);
- do {
- do {
- if ((sc = *s++) == 0 || len-- == 0)
- return (NULL);
- } while (g_ascii_tolower (sc) != c);
- } while (g_ascii_strncasecmp (s, find, mlen) != 0);
- s--;
- }
- return ((gchar *)s);
-}
-
-/*
- * Try to convert string of length to long
- */
-gboolean
-rspamd_strtol (const gchar *s, gsize len, glong *value)
-{
- const gchar *p = s, *end = s + len;
- gchar c;
- glong v = 0;
- const glong cutoff = G_MAXLONG / 10, cutlim = G_MAXLONG % 10;
- gboolean neg;
-
- /* Case negative values */
- if (*p == '-') {
- neg = TRUE;
- p++;
- }
- else {
- neg = FALSE;
- }
- /* Some preparations for range errors */
-
- while (p < end) {
- c = *p;
- if (c >= '0' && c <= '9') {
- c -= '0';
- if (v > cutoff || (v == cutoff && c > cutlim)) {
- /* Range error */
- *value = neg ? G_MINLONG : G_MAXLONG;
- return FALSE;
- }
- else {
- v *= 10;
- v += c;
- }
- }
- else {
- return FALSE;
- }
- p++;
- }
-
- *value = neg ? -(v) : v;
- return TRUE;
-}
-
-/*
- * Try to convert string of length to long
- */
-gboolean
-rspamd_strtoul (const gchar *s, gsize len, gulong *value)
-{
- const gchar *p = s, *end = s + len;
- gchar c;
- gulong v = 0;
- const gulong cutoff = G_MAXULONG / 10, cutlim = G_MAXULONG % 10;
-
- /* Some preparations for range errors */
- while (p < end) {
- c = *p;
- if (c >= '0' && c <= '9') {
- c -= '0';
- if (v > cutoff || (v == cutoff && (guint8)c > cutlim)) {
- /* Range error */
- *value = G_MAXULONG;
- return FALSE;
- }
- else {
- v *= 10;
- v += c;
- }
- }
- else {
- return FALSE;
- }
- p++;
- }
-
- *value = v;
- return TRUE;
-}
-
gint
rspamd_fallocate (gint fd, off_t offset, off_t len)
{
@@ -1978,20 +1594,6 @@ rspamd_hash_table_copy (GHashTable *src, GHashTable *dst,
}
}
-/**
- * Utility function to provide mem_pool copy for rspamd_hash_table_copy function
- * @param data string to copy
- * @param ud memory pool to use
- * @return
- */
-gpointer
-rspamd_str_pool_copy (gconstpointer data, gpointer ud)
-{
- rspamd_mempool_t *pool = ud;
-
- return data ? rspamd_mempool_strdup (pool, data) : NULL;
-}
-
static volatile sig_atomic_t saved_signo[NSIG];
static
@@ -2190,292 +1792,6 @@ rspamd_ucl_emit_gstring (ucl_object_t *obj,
ucl_object_emit_full (obj, emit_type, &func);
}
-/*
- * We use here z-base32 encoding described here:
- * http://philzimmermann.com/docs/human-oriented-base-32-encoding.txt
- */
-
-gchar *
-rspamd_encode_base32 (const guchar *in, gsize inlen)
-{
- gint remain = -1, x;
- gsize i, r;
- gsize allocated_len = inlen * 8 / 5 + 2;
- gchar *out;
- static const char b32[]="ybndrfg8ejkmcpqxot1uwisza345h769";
-
- out = g_malloc (allocated_len);
- for (i = 0, r = 0; i < inlen; i++) {
- switch (i % 5) {
- case 0:
- /* 8 bits of input and 3 to remain */
- x = in[i];
- remain = in[i] >> 5;
- out[r++] = b32[x & 0x1F];
- break;
- case 1:
- /* 11 bits of input, 1 to remain */
- x = remain | in[i] << 3;
- out[r++] = b32[x & 0x1F];
- out[r++] = b32[x >> 5 & 0x1F];
- remain = x >> 10;
- break;
- case 2:
- /* 9 bits of input, 4 to remain */
- x = remain | in[i] << 1;
- out[r++] = b32[x & 0x1F];
- remain = x >> 5;
- break;
- case 3:
- /* 12 bits of input, 2 to remain */
- x = remain | in[i] << 4;
- out[r++] = b32[x & 0x1F];
- out[r++] = b32[x >> 5 & 0x1F];
- remain = x >> 10 & 0x3;
- break;
- case 4:
- /* 10 bits of output, nothing to remain */
- x = remain | in[i] << 2;
- out[r++] = b32[x & 0x1F];
- out[r++] = b32[x >> 5 & 0x1F];
- remain = -1;
- break;
- default:
- /* Not to be happen */
- break;
- }
-
- }
- if (remain >= 0) {
- out[r++] = b32[remain];
- }
-
- out[r] = 0;
- g_assert (r < allocated_len);
-
- return out;
-}
-
-static const guchar b32_dec[] = {
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0x12, 0xff, 0x19, 0x1a, 0x1b, 0x1e, 0x1d,
- 0x07, 0x1f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0x18, 0x01, 0x0c, 0x03, 0x08, 0x05, 0x06,
- 0x1c, 0x15, 0x09, 0x0a, 0xff, 0x0b, 0x02, 0x10,
- 0x0d, 0x0e, 0x04, 0x16, 0x11, 0x13, 0xff, 0x14,
- 0x0f, 0x00, 0x17, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0x18, 0x01, 0x0c, 0x03, 0x08, 0x05, 0x06,
- 0x1c, 0x15, 0x09, 0x0a, 0xff, 0x0b, 0x02, 0x10,
- 0x0d, 0x0e, 0x04, 0x16, 0x11, 0x13, 0xff, 0x14,
- 0x0f, 0x00, 0x17, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
-};
-
-guchar*
-rspamd_decode_base32 (const gchar *in, gsize inlen, gsize *outlen)
-{
- guchar *res, decoded;
- guchar c;
- guint acc = 0U;
- guint processed_bits = 0;
- gsize olen = 0, i, allocated_len = inlen * 5 / 8 + 2;
-
- res = g_malloc (allocated_len);
-
- for (i = 0; i < inlen; i ++) {
- c = (guchar)in[i];
-
- if (processed_bits >= 8) {
- processed_bits -= 8;
- res[olen++] = acc & 0xFF;
- acc >>= 8;
- }
-
- decoded = b32_dec[c];
- if (decoded == 0xff) {
- g_free (res);
- return NULL;
- }
-
- acc = (decoded << processed_bits) | acc;
- processed_bits += 5;
- }
-
- if (processed_bits > 0) {
- res[olen++] = (acc & 0xFF);
- }
-
- g_assert (olen <= allocated_len);
-
- *outlen = olen;
-
- return res;
-}
-
-
-gchar *
-rspamd_encode_base64 (const guchar *in, gsize inlen, gint str_len, gsize *outlen)
-{
-#define CHECK_SPLIT \
- do { if (str_len > 0 && cols >= str_len) { \
- *o++ = '\r'; \
- *o++ = '\n'; \
- cols = 0; \
- } } \
-while (0)
-
- gsize allocated_len = (inlen / 3) * 4 + 4;
- gchar *out, *o;
- guint64 n;
- guint32 rem, t, carry;
- gint cols, shift;
- static const char b64_enc[] =
- "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
- "abcdefghijklmnopqrstuvwxyz"
- "0123456789+/";
-
- if (str_len > 0) {
- g_assert (str_len > 8);
- allocated_len += (allocated_len / str_len + 1) * 2 + 1;
- }
-
- out = g_malloc (allocated_len);
- o = out;
- cols = 0;
-
- while (inlen > 6) {
- n = *(guint64 *)in;
- n = GUINT64_TO_BE (n);
-
- if (str_len <= 0 || cols <= str_len - 8) {
- *o++ = b64_enc[(n >> 58) & 0x3F];
- *o++ = b64_enc[(n >> 52) & 0x3F];
- *o++ = b64_enc[(n >> 46) & 0x3F];
- *o++ = b64_enc[(n >> 40) & 0x3F];
- *o++ = b64_enc[(n >> 34) & 0x3F];
- *o++ = b64_enc[(n >> 28) & 0x3F];
- *o++ = b64_enc[(n >> 22) & 0x3F];
- *o++ = b64_enc[(n >> 16) & 0x3F];
- cols += 8;
- }
- else {
- cols = str_len - cols;
- shift = 58;
- while (cols) {
- *o++ = b64_enc[(n >> shift) & 0x3F];
- shift -= 6;
- cols --;
- }
-
- *o++ = '\r';
- *o++ = '\n';
-
- /* Remaining bytes */
- while (shift >= 16) {
- *o++ = b64_enc[(n >> shift) & 0x3F];
- shift -= 6;
- cols ++;
- }
- }
-
- in += 6;
- inlen -= 6;
- }
-
- CHECK_SPLIT;
-
- rem = 0;
- carry = 0;
-
- for (;;) {
- /* Padding + remaining data (0 - 2 bytes) */
- switch (rem) {
- case 0:
- if (inlen-- == 0) {
- goto end;
- }
- t = *in++;
- *o++ = b64_enc[t >> 2];
- carry = (t << 4) & 0x30;
- rem = 1;
- cols ++;
- case 1:
- if (inlen-- == 0) {
- goto end;
- }
- CHECK_SPLIT;
- t = *in++;
- *o++ = b64_enc[carry | (t >> 4)];
- carry = (t << 2) & 0x3C;
- rem = 2;
- cols ++;
- default:
- if (inlen-- == 0) {
- goto end;
- }
- CHECK_SPLIT;
- t = *in ++;
- *o++ = b64_enc[carry | (t >> 6)];
- cols ++;
- CHECK_SPLIT;
- *o++ = b64_enc[t & 0x3F];
- cols ++;
- CHECK_SPLIT;
- rem = 0;
- }
- }
-
-end:
- if (rem == 1) {
- *o++ = b64_enc[carry];
- cols ++;
- CHECK_SPLIT;
- *o++ = '=';
- cols ++;
- CHECK_SPLIT;
- *o++ = '=';
- cols ++;
- CHECK_SPLIT;
- }
- else if (rem == 2) {
- *o++ = b64_enc[carry];
- cols ++;
- CHECK_SPLIT;
- *o++ = '=';
- cols ++;
- }
-
- CHECK_SPLIT;
-
- *o = '\0';
-
- if (outlen != NULL) {
- *outlen = o - out;
- }
-
- return out;
-}
-
gdouble
rspamd_get_ticks (void)
{
diff --git a/src/libutil/util.h b/src/libutil/util.h
index d3dcdbcf7..ee7bf01f8 100644
--- a/src/libutil/util.h
+++ b/src/libutil/util.h
@@ -7,6 +7,7 @@
#include "fstring.h"
#include "ucl.h"
#include "addr.h"
+#include "str_util.h"
struct rspamd_config;
struct rspamd_main;
@@ -105,11 +106,6 @@ void rspamd_signals_init (struct sigaction *sa, void (*sig_handler)(gint));
* Send specified signal to each worker
*/
void rspamd_pass_signal (GHashTable *, gint );
-/*
- * Convert string to lowercase
- */
-void rspamd_str_lc (gchar *str, guint size);
-void rspamd_str_lc_utf8 (gchar *str, guint size);
#ifndef HAVE_SETPROCTITLE
/*
@@ -167,27 +163,6 @@ gboolean rspamd_file_lock (gint fd, gboolean async);
gboolean rspamd_file_unlock (gint fd, gboolean async);
/*
- * Hash table utility functions for case insensitive hashing
- */
-guint rspamd_strcase_hash (gconstpointer key);
-gboolean rspamd_strcase_equal (gconstpointer v, gconstpointer v2);
-
-/*
- * Hash table utility functions for case sensitive hashing
- */
-guint rspamd_str_hash (gconstpointer key);
-gboolean rspamd_str_equal (gconstpointer v, gconstpointer v2);
-
-
-/*
- * Hash table utility functions for hashing fixed strings
- */
-guint rspamd_fstring_icase_hash (gconstpointer key);
-gboolean rspamd_fstring_icase_equal (gconstpointer v, gconstpointer v2);
-guint rspamd_gstring_icase_hash (gconstpointer key);
-gboolean rspamd_gstring_icase_equal (gconstpointer v, gconstpointer v2);
-
-/*
* Google perf-tools initialization function
*/
void gperf_profiler_init (struct rspamd_config *cfg, const gchar *descr);
@@ -203,27 +178,6 @@ void g_queue_clear (GQueue *queue);
#endif
-/**
- * Copy src to dest limited to len, in compare with standart strlcpy(3) rspamd strlcpy does not
- * traverse the whole string and it is possible to use it for non NULL terminated strings. This is
- * more like memccpy(dst, src, size, '\0')
- *
- * @param dst destination string
- * @param src source string
- * @param siz length of destination buffer
- * @return bytes copied
- */
-gsize rspamd_strlcpy (gchar *dst, const gchar *src, gsize siz);
-
-/**
- * Lowercase strlcpy variant
- * @param dst
- * @param src
- * @param siz
- * @return
- */
-gsize rspamd_strlcpy_tolower (gchar *dst, const gchar *src, gsize siz);
-
/*
* Convert milliseconds to timeval fields
*/
@@ -245,21 +199,6 @@ gboolean rspamd_emails_cmp (gconstpointer a, gconstpointer b);
/* Compare two urls for building emails hash */
gboolean rspamd_urls_cmp (gconstpointer a, gconstpointer b);
-/*
- * Find string find in string s ignoring case
- */
-gchar * rspamd_strncasestr (const gchar *s, const gchar *find, gint len);
-
-/*
- * Try to convert string of length to long
- */
-gboolean rspamd_strtol (const gchar *s, gsize len, glong *value);
-
-/*
- * Try to convert string of length to unsigned long
- */
-gboolean rspamd_strtoul (const gchar *s, gsize len, gulong *value);
-
/**
* Try to allocate a file on filesystem (using fallocate or posix_fallocate)
* @param fd descriptor
@@ -385,13 +324,6 @@ void rspamd_hash_table_copy (GHashTable *src, GHashTable *dst,
gpointer (*value_copy_func)(gconstpointer data, gpointer ud),
gpointer ud);
-/**
- * Utility function to provide mem_pool copy for rspamd_hash_table_copy function
- * @param data string to copy
- * @param ud memory pool to use
- * @return
- */
-gpointer rspamd_str_pool_copy (gconstpointer data, gpointer ud);
/**
* Read passphrase from tty
@@ -414,32 +346,6 @@ void rspamd_ucl_emit_gstring (ucl_object_t *obj,
GString *target);
/**
- * Encode string using base32 encoding
- * @param in input
- * @param inlen input length
- * @return freshly allocated base32 encoding of a specified string
- */
-gchar * rspamd_encode_base32 (const guchar *in, gsize inlen);
-
-/**
- * Decode string using base32 encoding
- * @param in input
- * @param inlen input length
- * @return freshly allocated base32 decoded value or NULL if input is invalid
- */
-guchar* rspamd_decode_base32 (const gchar *in, gsize inlen, gsize *outlen);
-
-/**
- * Encode string using base64 encoding
- * @param in input
- * @param inlen input length
- * @param str_len maximum string length (if <= 0 then no lines are split)
- * @return freshly allocated base64 encoded value or NULL if input is invalid
- */
-gchar * rspamd_encode_base64 (const guchar *in, gsize inlen, gint str_len,
- gsize *outlen);
-
-/**
* Portably return the current clock ticks as seconds
* @return
*/