aboutsummaryrefslogtreecommitdiffstats
path: root/src/libutil
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2014-04-21 16:25:51 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2014-04-21 16:25:51 +0100
commit61555065f3d1c8badcc9573691232f1b6e42988c (patch)
tree563d5b7cb8c468530f7e79c4da0a75267b1184e1 /src/libutil
parentad5bf825b7f33bc10311673991f0cc888e69c0b1 (diff)
downloadrspamd-61555065f3d1c8badcc9573691232f1b6e42988c.tar.gz
rspamd-61555065f3d1c8badcc9573691232f1b6e42988c.zip
Rework project structure, remove trash files.
Diffstat (limited to 'src/libutil')
-rw-r--r--src/libutil/CMakeLists.txt50
-rw-r--r--src/libutil/aio_event.c487
-rw-r--r--src/libutil/aio_event.h67
-rw-r--r--src/libutil/bloom.c153
-rw-r--r--src/libutil/bloom.h48
-rw-r--r--src/libutil/diff.c445
-rw-r--r--src/libutil/diff.h74
-rw-r--r--src/libutil/fstring.c461
-rw-r--r--src/libutil/fstring.h120
-rw-r--r--src/libutil/fuzzy.c498
-rw-r--r--src/libutil/fuzzy.h69
-rw-r--r--src/libutil/hash.c489
-rw-r--r--src/libutil/hash.h160
-rw-r--r--src/libutil/http.c1222
-rw-r--r--src/libutil/http.h278
-rw-r--r--src/libutil/logger.c769
-rw-r--r--src/libutil/logger.h117
-rw-r--r--src/libutil/map.c1148
-rw-r--r--src/libutil/map.h134
-rw-r--r--src/libutil/mem_pool.c776
-rw-r--r--src/libutil/mem_pool.h299
-rw-r--r--src/libutil/memcached.c831
-rw-r--r--src/libutil/memcached.h142
-rw-r--r--src/libutil/printf.c635
-rw-r--r--src/libutil/printf.h75
-rw-r--r--src/libutil/radix.c311
-rw-r--r--src/libutil/radix.h82
-rw-r--r--src/libutil/rrd.c1015
-rw-r--r--src/libutil/rrd.h374
-rw-r--r--src/libutil/trie.c230
-rw-r--r--src/libutil/trie.h86
-rw-r--r--src/libutil/upstream.c525
-rw-r--r--src/libutil/upstream.h127
-rw-r--r--src/libutil/util.c2275
-rw-r--r--src/libutil/util.h491
35 files changed, 15063 insertions, 0 deletions
diff --git a/src/libutil/CMakeLists.txt b/src/libutil/CMakeLists.txt
new file mode 100644
index 000000000..2a5ab46c5
--- /dev/null
+++ b/src/libutil/CMakeLists.txt
@@ -0,0 +1,50 @@
+# Librspamd-util
+SET(LIBRSPAMDUTILSRC aio_event.c
+ bloom.c
+ diff.c
+ fstring.c
+ fuzzy.c
+ hash.c
+ http.c
+ logger.c
+ map.c
+ memcached.c
+ mem_pool.c
+ printf.c
+ radix.c
+ rrd.c
+ trie.c
+ upstream.c
+ util.c)
+# Rspamdutil
+ADD_LIBRARY(rspamd-util ${LINK_TYPE} ${LIBRSPAMDUTILSRC})
+IF(CMAKE_COMPILER_IS_GNUCC)
+SET_TARGET_PROPERTIES(rspamd-util PROPERTIES COMPILE_FLAGS "-fno-strict-aliasing")
+ENDIF(CMAKE_COMPILER_IS_GNUCC)
+
+
+TARGET_LINK_LIBRARIES(rspamd-util ${RSPAMD_REQUIRED_LIBRARIES})
+TARGET_LINK_LIBRARIES(rspamd-util pcre)
+TARGET_LINK_LIBRARIES(rspamd-util ucl)
+TARGET_LINK_LIBRARIES(rspamd-util ottery)
+TARGET_LINK_LIBRARIES(rspamd-util rspamd-http-parser)
+TARGET_LINK_LIBRARIES(rspamd-util event)
+TARGET_LINK_LIBRARIES(rspamd-util xxhash)
+IF(OPENSSL_FOUND)
+ TARGET_LINK_LIBRARIES(rspamd-util ${OPENSSL_LIBRARIES})
+ENDIF(OPENSSL_FOUND)
+
+IF(NOT DEBIAN_BUILD)
+SET_TARGET_PROPERTIES(rspamd-util PROPERTIES VERSION ${RSPAMD_VERSION})
+ENDIF(NOT DEBIAN_BUILD)
+
+IF(GLIB_COMPAT)
+ INCLUDE_DIRECTORIES("${CMAKE_CURRENT_SOURCE_DIR}/contrib/lgpl")
+ TARGET_LINK_LIBRARIES(rspamd-util glibadditions)
+ENDIF(GLIB_COMPAT)
+
+IF(NO_SHARED MATCHES "OFF")
+ INSTALL(TARGETS rspamd-util
+ LIBRARY DESTINATION ${LIBDIR}
+ PUBLIC_HEADER DESTINATION include)
+ENDIF(NO_SHARED MATCHES "OFF") \ No newline at end of file
diff --git a/src/libutil/aio_event.c b/src/libutil/aio_event.c
new file mode 100644
index 000000000..ccda37083
--- /dev/null
+++ b/src/libutil/aio_event.c
@@ -0,0 +1,487 @@
+/* Copyright (c) 2010-2011, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "aio_event.h"
+#include "main.h"
+
+#ifdef HAVE_SYS_EVENTFD_H
+#include <sys/eventfd.h>
+#endif
+
+#ifdef HAVE_AIO_H
+#include <aio.h>
+#endif
+
+/* Linux syscall numbers */
+#if defined(__i386__)
+# define SYS_io_setup 245
+# define SYS_io_destroy 246
+# define SYS_io_getevents 247
+# define SYS_io_submit 248
+# define SYS_io_cancel 249
+#elif defined(__x86_64__)
+# define SYS_io_setup 206
+# define SYS_io_destroy 207
+# define SYS_io_getevents 208
+# define SYS_io_submit 209
+# define SYS_io_cancel 210
+#else
+# warning "aio is not supported on this platform, please contact author for details"
+# define SYS_io_setup 0
+# define SYS_io_destroy 0
+# define SYS_io_getevents 0
+# define SYS_io_submit 0
+# define SYS_io_cancel 0
+#endif
+
+#define SYS_eventfd 323
+#define MAX_AIO_EV 64
+
+struct io_cbdata {
+ gint fd;
+ rspamd_aio_cb cb;
+ guint64 len;
+ gpointer buf;
+ gpointer io_buf;
+ gpointer ud;
+};
+
+#ifdef LINUX
+
+/* Linux specific mappings and utilities to avoid using of libaio */
+
+typedef unsigned long aio_context_t;
+
+typedef enum io_iocb_cmd {
+ IO_CMD_PREAD = 0,
+ IO_CMD_PWRITE = 1,
+
+ IO_CMD_FSYNC = 2,
+ IO_CMD_FDSYNC = 3,
+
+ IO_CMD_POLL = 5,
+ IO_CMD_NOOP = 6,
+} io_iocb_cmd_t;
+
+#if defined(__LITTLE_ENDIAN)
+#define PADDED(x,y) x, y
+#elif defined(__BIG_ENDIAN)
+#define PADDED(x,y) y, x
+#else
+#error edit for your odd byteorder.
+#endif
+
+/*
+ * we always use a 64bit off_t when communicating
+ * with userland. its up to libraries to do the
+ * proper padding and aio_error abstraction
+ */
+
+struct iocb {
+ /* these are internal to the kernel/libc. */
+ guint64 aio_data; /* data to be returned in event's data */
+ guint32 PADDED(aio_key, aio_reserved1);
+ /* the kernel sets aio_key to the req # */
+
+ /* common fields */
+ guint16 aio_lio_opcode; /* see IOCB_CMD_ above */
+ gint16 aio_reqprio;
+ guint32 aio_fildes;
+
+ guint64 aio_buf;
+ guint64 aio_nbytes;
+ gint64 aio_offset;
+
+ /* extra parameters */
+ guint64 aio_reserved2; /* TODO: use this for a (struct sigevent *) */
+
+ /* flags for the "struct iocb" */
+ guint32 aio_flags;
+
+ /*
+ * if the IOCB_FLAG_RESFD flag of "aio_flags" is set, this is an
+ * eventfd to signal AIO readiness to
+ */
+ guint32 aio_resfd;
+};
+
+struct io_event {
+ guint64 data; /* the data field from the iocb */
+ guint64 obj; /* what iocb this event came from */
+ gint64 res; /* result code for this event */
+ gint64 res2; /* secondary result */
+};
+
+/* Linux specific io calls */
+static int
+io_setup (guint nr_reqs, aio_context_t *ctx)
+{
+ return syscall (SYS_io_setup, nr_reqs, ctx);
+}
+
+static int
+io_destroy (aio_context_t ctx)
+{
+ return syscall (SYS_io_destroy, ctx);
+}
+
+static int
+io_getevents (aio_context_t ctx, long min_nr, long nr, struct io_event *events, struct timespec *tmo)
+{
+ return syscall (SYS_io_getevents, ctx, min_nr, nr, events, tmo);
+}
+
+static int
+io_submit (aio_context_t ctx, long n, struct iocb **paiocb)
+{
+ return syscall (SYS_io_submit, ctx, n, paiocb);
+}
+
+static int
+io_cancel (aio_context_t ctx, struct iocb *iocb, struct io_event *result)
+{
+ return syscall (SYS_io_cancel, ctx, iocb, result);
+}
+
+# ifndef HAVE_SYS_EVENTFD_H
+static int
+eventfd (guint initval, guint flags)
+{
+ return syscall (SYS_eventfd, initval);
+}
+# endif
+
+#endif
+
+/**
+ * AIO context
+ */
+struct aio_context {
+ struct event_base *base;
+ gboolean has_aio; /**< Whether we have aio support on a system */
+#ifdef LINUX
+ /* Eventfd variant */
+ gint event_fd;
+ struct event eventfd_ev;
+ aio_context_t io_ctx;
+#elif defined(HAVE_AIO_H)
+ /* POSIX aio */
+ struct event rtsigs[128];
+#endif
+};
+
+#ifdef LINUX
+/* Eventfd read callback */
+static void
+rspamd_eventfdcb (gint fd, gshort what, gpointer ud)
+{
+ struct aio_context *ctx = ud;
+ guint64 ready;
+ gint done, i;
+ struct io_event event[32];
+ struct timespec ts;
+ struct io_cbdata *ev_data;
+
+ /* Eventfd returns number of events ready got from kernel */
+ if (read (fd, &ready, 8) != 8) {
+ if (errno == EAGAIN) {
+ return;
+ }
+ msg_err ("eventfd read returned error: %s", strerror (errno));
+ }
+
+ ts.tv_sec = 0;
+ ts.tv_nsec = 0;
+
+ while (ready) {
+ /* Get events ready */
+ done = io_getevents (ctx->io_ctx, 1, 32, event, &ts);
+
+ if (done > 0) {
+ ready -= done;
+
+ for (i = 0; i < done; i ++) {
+ ev_data = (struct io_cbdata *) (uintptr_t) event[i].data;
+ /* Call this callback */
+ ev_data->cb (ev_data->fd, event[i].res, ev_data->len, ev_data->buf, ev_data->ud);
+ if (ev_data->io_buf) {
+ free (ev_data->io_buf);
+ }
+ g_slice_free1 (sizeof (struct io_cbdata), ev_data);
+ }
+ }
+ else if (done == 0) {
+ /* No more events are ready */
+ return;
+ }
+ else {
+ msg_err ("io_getevents failed: %s", strerror (errno));
+ return;
+ }
+ }
+}
+
+#endif
+
+/**
+ * Initialize aio with specified event base
+ */
+struct aio_context*
+rspamd_aio_init (struct event_base *base)
+{
+ struct aio_context *new;
+
+ /* First of all we need to detect which type of aio we can try to use */
+ new = g_malloc0 (sizeof (struct aio_context));
+ new->base = base;
+
+#ifdef LINUX
+ /* On linux we are trying to use io (3) and eventfd for notifying */
+ new->event_fd = eventfd (0, 0);
+ if (new->event_fd == -1) {
+ msg_err ("eventfd failed: %s", strerror (errno));
+ }
+ else {
+ /* Set this socket non-blocking */
+ if (make_socket_nonblocking (new->event_fd) == -1) {
+ msg_err ("non blocking for eventfd failed: %s", strerror (errno));
+ close (new->event_fd);
+ }
+ else {
+ event_set (&new->eventfd_ev, new->event_fd, EV_READ|EV_PERSIST, rspamd_eventfdcb, new);
+ event_base_set (new->base, &new->eventfd_ev);
+ event_add (&new->eventfd_ev, NULL);
+ if (io_setup (MAX_AIO_EV, &new->io_ctx) == -1) {
+ msg_err ("io_setup failed: %s", strerror (errno));
+ close (new->event_fd);
+ }
+ else {
+ new->has_aio = TRUE;
+ }
+ }
+ }
+#elif defined(HAVE_AIO_H)
+ /* TODO: implement this */
+#endif
+
+ return new;
+}
+
+/**
+ * Open file for aio
+ */
+gint
+rspamd_aio_open (struct aio_context *ctx, const gchar *path, int flags)
+{
+ gint fd = -1;
+ /* Fallback */
+ if (!ctx->has_aio) {
+ return open (path, flags);
+ }
+#ifdef LINUX
+
+ fd = open (path, flags | O_DIRECT);
+
+ return fd;
+#elif defined(HAVE_AIO_H)
+ fd = open (path, flags);
+#endif
+
+ return fd;
+}
+
+/**
+ * Asynchronous read of file
+ */
+gint
+rspamd_aio_read (gint fd, gpointer buf, guint64 len, guint64 offset, struct aio_context *ctx, rspamd_aio_cb cb, gpointer ud)
+{
+ struct io_cbdata *cbdata;
+ gint r = -1;
+
+ if (ctx->has_aio) {
+#ifdef LINUX
+ struct iocb *iocb[1];
+
+ cbdata = g_slice_alloc (sizeof (struct io_cbdata));
+ cbdata->cb = cb;
+ cbdata->buf = buf;
+ cbdata->len = len;
+ cbdata->ud = ud;
+ cbdata->fd = fd;
+ cbdata->io_buf = NULL;
+
+ iocb[0] = alloca (sizeof (struct iocb));
+ memset (iocb[0], 0, sizeof (struct iocb));
+ iocb[0]->aio_fildes = fd;
+ iocb[0]->aio_lio_opcode = IO_CMD_PREAD;
+ iocb[0]->aio_reqprio = 0;
+ iocb[0]->aio_buf = (guint64)((uintptr_t)buf);
+ iocb[0]->aio_nbytes = len;
+ iocb[0]->aio_offset = offset;
+ iocb[0]->aio_flags |= (1 << 0) /* IOCB_FLAG_RESFD */;
+ iocb[0]->aio_resfd = ctx->event_fd;
+ iocb[0]->aio_data = (guint64)((uintptr_t)cbdata);
+
+ /* Iocb is copied to kernel internally, so it is safe to put it on stack */
+ if (io_submit (ctx->io_ctx, 1, iocb) == 1) {
+ return len;
+ }
+ else {
+ if (errno == EAGAIN || errno == ENOSYS) {
+ /* Fall back to sync read */
+ goto blocking;
+ }
+ return -1;
+ }
+
+#elif defined(HAVE_AIO_H)
+#endif
+ }
+ else {
+ /* Blocking variant */
+blocking:
+#ifdef _LARGEFILE64_SOURCE
+ r = lseek64 (fd, offset, SEEK_SET);
+#else
+ r = lseek (fd, offset, SEEK_SET);
+#endif
+ if (r > 0) {
+ r = read (fd, buf, len);
+ if (r >= 0) {
+ cb (fd, 0, r, buf, ud);
+ }
+ else {
+ cb (fd, r, -1, buf, ud);
+ }
+ }
+ }
+
+ return r;
+}
+
+/**
+ * Asynchronous write of file
+ */
+gint
+rspamd_aio_write (gint fd, gpointer buf, guint64 len, guint64 offset, struct aio_context *ctx, rspamd_aio_cb cb, gpointer ud)
+{
+ struct io_cbdata *cbdata;
+ gint r = -1;
+
+ if (ctx->has_aio) {
+#ifdef LINUX
+ struct iocb *iocb[1];
+
+ cbdata = g_slice_alloc (sizeof (struct io_cbdata));
+ cbdata->cb = cb;
+ cbdata->buf = buf;
+ cbdata->len = len;
+ cbdata->ud = ud;
+ cbdata->fd = fd;
+ /* We need to align pointer on boundary of 512 bytes here */
+ if (posix_memalign (&cbdata->io_buf, 512, len) != 0) {
+ return -1;
+ }
+ memcpy (cbdata->io_buf, buf, len);
+
+ iocb[0] = alloca (sizeof (struct iocb));
+ memset (iocb[0], 0, sizeof (struct iocb));
+ iocb[0]->aio_fildes = fd;
+ iocb[0]->aio_lio_opcode = IO_CMD_PWRITE;
+ iocb[0]->aio_reqprio = 0;
+ iocb[0]->aio_buf = (guint64)((uintptr_t)cbdata->io_buf);
+ iocb[0]->aio_nbytes = len;
+ iocb[0]->aio_offset = offset;
+ iocb[0]->aio_flags |= (1 << 0) /* IOCB_FLAG_RESFD */;
+ iocb[0]->aio_resfd = ctx->event_fd;
+ iocb[0]->aio_data = (guint64)((uintptr_t)cbdata);
+
+ /* Iocb is copied to kernel internally, so it is safe to put it on stack */
+ if (io_submit (ctx->io_ctx, 1, iocb) == 1) {
+ return len;
+ }
+ else {
+ if (errno == EAGAIN || errno == ENOSYS) {
+ /* Fall back to sync read */
+ goto blocking;
+ }
+ return -1;
+ }
+
+#elif defined(HAVE_AIO_H)
+#endif
+ }
+ else {
+ /* Blocking variant */
+blocking:
+#ifdef _LARGEFILE64_SOURCE
+ r = lseek64 (fd, offset, SEEK_SET);
+#else
+ r = lseek (fd, offset, SEEK_SET);
+#endif
+ if (r > 0) {
+ r = write (fd, buf, len);
+ if (r >= 0) {
+ cb (fd, 0, r, buf, ud);
+ }
+ else {
+ cb (fd, r, -1, buf, ud);
+ }
+ }
+ }
+
+ return r;
+}
+
+/**
+ * Close of aio operations
+ */
+gint
+rspamd_aio_close (gint fd, struct aio_context *ctx)
+{
+ gint r = -1;
+
+ if (ctx->has_aio) {
+#ifdef LINUX
+ struct iocb iocb;
+ struct io_event ev;
+
+ memset (&iocb, 0, sizeof (struct iocb));
+ iocb.aio_fildes = fd;
+ iocb.aio_lio_opcode = IO_CMD_NOOP;
+
+ /* Iocb is copied to kernel internally, so it is safe to put it on stack */
+ r = io_cancel (ctx->io_ctx, &iocb, &ev);
+ close (fd);
+ return r;
+
+#elif defined(HAVE_AIO_H)
+#endif
+ }
+
+ r = close (fd);
+
+ return r;
+}
diff --git a/src/libutil/aio_event.h b/src/libutil/aio_event.h
new file mode 100644
index 000000000..45f6015de
--- /dev/null
+++ b/src/libutil/aio_event.h
@@ -0,0 +1,67 @@
+/* Copyright (c) 2010-2011, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#ifndef AIO_EVENT_H_
+#define AIO_EVENT_H_
+
+#include "config.h"
+
+/**
+ * AIO context
+ */
+struct aio_context;
+
+/**
+ * Callback for notifying
+ */
+typedef void (*rspamd_aio_cb) (gint fd, gint res, guint64 len, gpointer data, gpointer ud);
+
+/**
+ * Initialize aio with specified event base
+ */
+struct aio_context* rspamd_aio_init (struct event_base *base);
+
+/**
+ * Open file for aio
+ */
+gint rspamd_aio_open (struct aio_context *ctx, const gchar *path, int flags);
+
+/**
+ * Asynchronous read of file
+ */
+gint rspamd_aio_read (gint fd, gpointer buf, guint64 len, guint64 offset,
+ struct aio_context *ctx, rspamd_aio_cb cb, gpointer ud);
+
+/**
+ * Asynchronous write of file
+ */
+gint rspamd_aio_write (gint fd, gpointer buf, guint64 len, guint64 offset,
+ struct aio_context *ctx, rspamd_aio_cb cb, gpointer ud);
+
+/**
+ * Close of aio operations
+ */
+gint rspamd_aio_close (gint fd, struct aio_context *ctx);
+
+#endif /* AIO_EVENT_H_ */
diff --git a/src/libutil/bloom.c b/src/libutil/bloom.c
new file mode 100644
index 000000000..f857d2e49
--- /dev/null
+++ b/src/libutil/bloom.c
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2009-2012, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "bloom.h"
+#include "xxhash.h"
+
+/* 4 bits are used for counting (implementing delete operation) */
+#define SIZE_BIT 4
+
+/* These macroes are for 4 bits for counting element */
+#define INCBIT(a, n, acc) do { \
+ acc = a[n * SIZE_BIT / CHAR_BIT] & (0xF << (n % (CHAR_BIT / SIZE_BIT) * SIZE_BIT)); \
+ acc ++; \
+ acc &= 0xF; \
+ \
+ a[n * SIZE_BIT / CHAR_BIT] &= (0xF << (4 - (n % (CHAR_BIT/SIZE_BIT) * SIZE_BIT))); \
+ a[n * SIZE_BIT / CHAR_BIT] |= (acc << (n % (CHAR_BIT/SIZE_BIT) * SIZE_BIT)); \
+} while (0);
+
+#define DECBIT(a, n, acc) do { \
+ acc = a[n * SIZE_BIT / CHAR_BIT] & (0xF << (n % (CHAR_BIT / SIZE_BIT) * SIZE_BIT)); \
+ acc --; \
+ acc &= 0xF; \
+ \
+ a[n * SIZE_BIT / CHAR_BIT] &= (0xF << (4 - (n % (CHAR_BIT/SIZE_BIT) * SIZE_BIT))); \
+ a[n * SIZE_BIT / CHAR_BIT] |= (acc << (n % (CHAR_BIT/SIZE_BIT) * SIZE_BIT)); \
+} while (0);
+
+#define GETBIT(a, n) (a[n * SIZE_BIT / CHAR_BIT] & (0xF << (n % (CHAR_BIT/SIZE_BIT) * SIZE_BIT)))
+
+/* Common hash functions */
+
+
+rspamd_bloom_filter_t *
+rspamd_bloom_create (size_t size, size_t nfuncs, ...)
+{
+ rspamd_bloom_filter_t *bloom;
+ va_list l;
+ gsize n;
+
+ if (!(bloom = g_malloc (sizeof (rspamd_bloom_filter_t)))) {
+ return NULL;
+ }
+ if (!(bloom->a = g_new0 (gchar, (size + CHAR_BIT - 1) / CHAR_BIT * SIZE_BIT))) {
+ g_free (bloom);
+ return NULL;
+ }
+ if (!(bloom->seeds = g_new0 (guint32, nfuncs))) {
+ g_free (bloom->a);
+ g_free (bloom);
+ return NULL;
+ }
+
+ va_start (l, nfuncs);
+ for (n = 0; n < nfuncs; ++n) {
+ bloom->seeds[n] = va_arg (l, guint32);
+ }
+ va_end (l);
+
+ bloom->nfuncs = nfuncs;
+ bloom->asize = size;
+
+ return bloom;
+}
+
+void
+rspamd_bloom_destroy (rspamd_bloom_filter_t * bloom)
+{
+ g_free (bloom->a);
+ g_free (bloom->seeds);
+ g_free (bloom);
+}
+
+gboolean
+rspamd_bloom_add (rspamd_bloom_filter_t * bloom, const gchar *s)
+{
+ size_t n, len;
+ u_char t;
+ guint v;
+
+ if (s == NULL) {
+ return FALSE;
+ }
+ len = strlen (s);
+ for (n = 0; n < bloom->nfuncs; ++n) {
+ v = XXH32 (s, len, bloom->seeds[n]) % bloom->asize;
+ INCBIT (bloom->a, v, t);
+ }
+
+ return TRUE;
+}
+
+gboolean
+rspamd_bloom_del (rspamd_bloom_filter_t * bloom, const gchar *s)
+{
+ size_t n, len;
+ u_char t;
+ guint v;
+
+ if (s == NULL) {
+ return FALSE;
+ }
+ len = strlen (s);
+ for (n = 0; n < bloom->nfuncs; ++n) {
+ v = XXH32 (s, len, bloom->seeds[n]) % bloom->asize;
+ DECBIT (bloom->a, v, t);
+ }
+
+ return TRUE;
+
+}
+
+gboolean
+rspamd_bloom_check (rspamd_bloom_filter_t * bloom, const gchar *s)
+{
+ size_t n, len;
+ guint v;
+
+ if (s == NULL) {
+ return FALSE;
+ }
+ len = strlen (s);
+ for (n = 0; n < bloom->nfuncs; ++n) {
+ v = XXH32 (s, len, bloom->seeds[n]) % bloom->asize;
+ if (!(GETBIT (bloom->a, v))) {
+ return FALSE;
+ }
+ }
+
+ return TRUE;
+}
diff --git a/src/libutil/bloom.h b/src/libutil/bloom.h
new file mode 100644
index 000000000..380143c80
--- /dev/null
+++ b/src/libutil/bloom.h
@@ -0,0 +1,48 @@
+#ifndef __RSPAMD_BLOOM_H__
+#define __RSPAMD_BLOOM_H__
+
+#include "config.h"
+
+typedef struct rspamd_bloom_filter_s {
+ size_t asize;
+ gchar *a;
+ size_t nfuncs;
+ guint32 *seeds;
+} rspamd_bloom_filter_t;
+
+
+/*
+ * Some random uint32 seeds for hashing
+ */
+#define RSPAMD_DEFAULT_BLOOM_HASHES 8, 0x61782caaU, 0x79ab8141U, 0xe45ee2d1U, \
+ 0xf97542d1U, 0x1e2623edU, 0xf5a23cfeU, 0xa41b2508U, 0x85abdce8U
+
+/*
+ * Create new bloom filter
+ * @param size length of bloom buffer
+ * @param nfuncs number of hash functions
+ * @param ... hash functions list
+ */
+rspamd_bloom_filter_t* rspamd_bloom_create (size_t size, size_t nfuncs, ...);
+
+/*
+ * Destroy bloom filter
+ */
+void rspamd_bloom_destroy (rspamd_bloom_filter_t * bloom);
+
+/*
+ * Add a string to bloom filter
+ */
+gboolean rspamd_bloom_add (rspamd_bloom_filter_t * bloom, const gchar *s);
+
+/*
+ * Delete a string from bloom filter
+ */
+gboolean rspamd_bloom_del (rspamd_bloom_filter_t * bloom, const gchar *s);
+
+/*
+ * Check whether this string is in bloom filter (algorithm produces FALSE-POSITIVES, so result must be checked if it is positive)
+ */
+gboolean rspamd_bloom_check (rspamd_bloom_filter_t * bloom, const gchar *s);
+
+#endif
diff --git a/src/libutil/diff.c b/src/libutil/diff.c
new file mode 100644
index 000000000..4038d8680
--- /dev/null
+++ b/src/libutil/diff.c
@@ -0,0 +1,445 @@
+/* diff - compute a shortest edit script (SES) given two sequences
+ * Copyright (c) 2004 Michael B. Allen <mba2000 ioplex.com>
+ * Copyright (c) 2010-2014, Vsevolod Stakhov
+ *
+ * The MIT License
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/* This algorithm is basically Myers' solution to SES/LCS with
+ * the Hirschberg linear space refinement as described in the
+ * following publication:
+ *
+ * E. Myers, ``An O(ND) Difference Algorithm and Its Variations,''
+ * Algorithmica 1, 2 (1986), 251-266.
+ * http://www.cs.arizona.edu/people/gene/PAPERS/diff.ps
+ *
+ * This is the same algorithm used by GNU diff(1).
+ */
+
+
+#include "config.h"
+#include "diff.h"
+
+
+#define FV(k) _v(ctx, (k), 0)
+#define RV(k) _v(ctx, (k), 1)
+
+#define MAX_DIFF 1024
+
+struct _ctx
+{
+ GArray *buf;
+ GArray *ses;
+ gint si;
+ gint dmax;
+};
+
+struct middle_snake
+{
+ gint x, y, u, v;
+};
+
+static
+void maybe_resize_array(GArray *arr, guint k)
+{
+ if (k > arr->len) {
+ g_array_set_size (arr, k);
+ }
+
+}
+
+static void
+_setv(struct _ctx *ctx, gint k, gint r, gint val)
+{
+ gint j;
+ gint *i;
+ /* Pack -N to N ginto 0 to N * 2
+ */
+ j = k <= 0 ? -k * 4 + r : k * 4 + (r - 2);
+
+ maybe_resize_array (ctx->buf, j);
+ i = (gint *) &g_array_index (ctx->buf, gint, j);
+ *i = val;
+}
+
+static gint
+_v(struct _ctx *ctx, gint k, gint r)
+{
+ gint j;
+
+ j = k <= 0 ? -k * 4 + r : k * 4 + (r - 2);
+
+ return *((gint *) &g_array_index (ctx->buf, gint, j));
+}
+
+static gint
+_find_middle_snake(const void *a, gint aoff, gint n, const void *b,
+ gint boff, gint m, struct _ctx *ctx, struct middle_snake *ms)
+{
+ gint delta, odd, mid, d;
+
+ delta = n - m;
+ odd = delta & 1;
+ mid = (n + m) / 2;
+ mid += odd;
+
+ _setv (ctx, 1, 0, 0);
+ _setv (ctx, delta - 1, 1, n);
+
+ for (d = 0; d <= mid; d++) {
+ gint k, x, y;
+
+ if ((2 * d - 1) >= ctx->dmax) {
+ return ctx->dmax;
+ }
+
+ for (k = d; k >= -d; k -= 2) {
+ if (k == -d || (k != d && FV(k - 1) < FV(k + 1))) {
+ x = FV(k + 1);
+ }
+ else {
+ x = FV(k - 1) + 1;
+ }
+ y = x - k;
+
+ ms->x = x;
+ ms->y = y;
+ const guchar *a0 = (const guchar *) a + aoff;
+ const guchar *b0 = (const guchar *) b + boff;
+ while (x < n && y < m && a0[x] == b0[y]) {
+ x++;
+ y++;
+ }
+ _setv (ctx, k, 0, x);
+
+ if (odd && k >= (delta - (d - 1)) && k <= (delta + (d - 1))) {
+ if (x >= RV(k)) {
+ ms->u = x;
+ ms->v = y;
+ return 2 * d - 1;
+ }
+ }
+ }
+ for (k = d; k >= -d; k -= 2) {
+ gint kr = (n - m) + k;
+
+ if (k == d || (k != -d && RV(kr - 1) < RV(kr + 1))) {
+ x = RV(kr - 1);
+ }
+ else {
+ x = RV(kr + 1) - 1;
+ }
+ y = x - kr;
+
+ ms->u = x;
+ ms->v = y;
+ const guchar *a0 = (const guchar *) a + aoff;
+ const guchar *b0 = (const guchar *) b + boff;
+ while (x > 0 && y > 0 && a0[x - 1] == b0[y - 1]) {
+ x--;
+ y--;
+ }
+ _setv (ctx, kr, 1, x);
+
+ if (!odd && kr >= -d && kr <= d) {
+ if (x <= FV(kr)) {
+ ms->x = x;
+ ms->y = y;
+ return 2 * d;
+ }
+ }
+ }
+ }
+
+ errno = EFAULT;
+
+ return -1;
+}
+
+static void
+_edit(struct _ctx *ctx, gint op, gint off, gint len)
+{
+ struct diff_edit *e = NULL, newe;
+
+ if (len == 0 || ctx->ses == NULL) {
+ return;
+ }
+ /*
+ * Add an edit to the SES (or
+ * coalesce if the op is the same)
+ */
+ if (ctx->ses->len != 0) {
+ e = &g_array_index (ctx->ses, struct diff_edit, ctx->ses->len - 1);
+ }
+ if (e == NULL || e->op != op) {
+ newe.op = op;
+ newe.off = off;
+ newe.len = len;
+ g_array_append_val (ctx->ses, newe);
+ }
+ else {
+ e->len += len;
+ }
+}
+
+static gint
+_ses(const void *a, gint aoff, gint n, const void *b, gint boff,
+ gint m, struct _ctx *ctx)
+{
+ struct middle_snake ms = {
+ .x = 0,
+ .y = 0,
+ .u = 0,
+ .v = 0
+ };
+ gint d;
+
+ if (n == 0) {
+ _edit (ctx, DIFF_INSERT, boff, m);
+ d = m;
+ }
+ else if (m == 0) {
+ _edit (ctx, DIFF_DELETE, aoff, n);
+ d = n;
+ }
+ else {
+ /* Find the middle "snake" around which we
+ * recursively solve the sub-problems.
+ */
+ d = _find_middle_snake (a, aoff, n, b, boff, m, ctx, &ms);
+ if (d == -1) {
+ return -1;
+ }
+ else if (d >= ctx->dmax) {
+ return ctx->dmax;
+ }
+ else if (ctx->ses == NULL) {
+ return d;
+ }
+ else if (d > 1) {
+ if (_ses (a, aoff, ms.x, b, boff, ms.y, ctx) == -1) {
+ return -1;
+ }
+
+ _edit (ctx, DIFF_MATCH, aoff + ms.x, ms.u - ms.x);
+
+ aoff += ms.u;
+ boff += ms.v;
+ n -= ms.u;
+ m -= ms.v;
+ if (_ses (a, aoff, n, b, boff, m, ctx) == -1) {
+ return -1;
+ }
+ }
+ else {
+ gint x = ms.x;
+ gint u = ms.u;
+
+ /* There are only 4 base cases when the
+ * edit distance is 1.
+ *
+ * n > m m > n
+ *
+ * - |
+ * \ \ x != u
+ * \ \
+ *
+ * \ \
+ * \ \ x == u
+ * - |
+ */
+
+ if (m > n) {
+ if (x == u) {
+ _edit (ctx, DIFF_MATCH, aoff, n);
+ _edit (ctx, DIFF_INSERT, boff + (m - 1), 1);
+ }
+ else {
+ _edit (ctx, DIFF_INSERT, boff, 1);
+ _edit (ctx, DIFF_MATCH, aoff, n);
+ }
+ }
+ else {
+ if (x == u) {
+ _edit (ctx, DIFF_MATCH, aoff, m);
+ _edit (ctx, DIFF_DELETE, aoff + (n - 1), 1);
+ }
+ else {
+ _edit (ctx, DIFF_DELETE, aoff, 1);
+ _edit (ctx, DIFF_MATCH, aoff + 1, m);
+ }
+ }
+ }
+ }
+
+ return d;
+}
+
+gint
+rspamd_diff(const void *a, gint aoff, gint n, const void *b, gint boff, gint m,
+ gint dmax, GArray *ses, gint *sn)
+{
+ struct _ctx ctx;
+ gint d, x, y;
+ struct diff_edit *e = NULL;
+ GArray *tmp;
+
+ tmp = g_array_sized_new (FALSE, TRUE, sizeof(gint), dmax);
+ ctx.buf = tmp;
+ ctx.ses = ses;
+ ctx.si = 0;
+ ctx.dmax = dmax;
+
+ /* The _ses function assumes the SES will begin or end with a delete
+ * or insert. The following will insure this is true by eating any
+ * beginning matches. This is also a quick to process sequences
+ * that match entirely.
+ */
+ x = y = 0;
+ const guchar *a0 = (const guchar *) a + aoff;
+ const guchar *b0 = (const guchar *) b + boff;
+ while (x < n && y < m && a0[x] == b0[y]) {
+ x++;
+ y++;
+ }
+ _edit (&ctx, DIFF_MATCH, aoff, x);
+
+ if ((d = _ses (a, aoff + x, n - x, b, boff + y, m - y, &ctx)) == -1) {
+ g_array_free (tmp, TRUE);
+ return -1;
+ }
+ if (ses && sn && e) {
+ *sn = e->op ? ctx.si + 1 : 0;
+ }
+
+ g_array_free (tmp, TRUE);
+ return d;
+}
+
+static guint32
+compare_diff_distance_unnormalized (f_str_t *s1, f_str_t *s2)
+{
+ GArray *ses;
+ struct diff_edit *e;
+ guint i;
+ guint32 distance = 0;
+
+ ses = g_array_sized_new (FALSE, TRUE, sizeof (struct diff_edit), MAX_DIFF);
+
+ if (rspamd_diff (s1->begin, 0, s1->len,
+ s2->begin, 0, s2->len, MAX_DIFF, ses, NULL) == -1) {
+ /* Diff failed, strings are different */
+ g_array_free (ses, TRUE);
+ return 0;
+ }
+
+ for (i = 0; i < ses->len; i ++) {
+ e = &g_array_index(ses, struct diff_edit, i);
+ if (e->op != DIFF_MATCH) {
+ distance += e->len;
+ }
+ }
+
+ g_array_free (ses, TRUE);
+
+ return distance;
+}
+
+guint32
+compare_diff_distance (f_str_t *s1, f_str_t *s2)
+{
+
+ return 100 - (2 * compare_diff_distance_unnormalized (s1, s2) * 100) / (s1->len + s2->len);
+}
+
+
+guint32
+compare_diff_distance_normalized (f_str_t *s1, f_str_t *s2)
+{
+ gchar b1[BUFSIZ], b2[BUFSIZ], *t, *h, *p1, *p2;
+ gsize r1, r2;
+ f_str_t t1, t2;
+ guint32 cur_diff = 0;
+
+ r1 = s1->len;
+ r2 = s2->len;
+ p1 = s1->begin;
+ p2 = s2->begin;
+
+ while (r1 > 0 && r2 > 0) {
+ /* Copy strings to the buffer normalized */
+ h = p1;
+ t = b1;
+
+ /* The first string */
+ while (r1 > 0 && t - b1 < (gint)sizeof (b1)) {
+ if (!g_ascii_isspace (*h)) {
+ *t++ = g_ascii_tolower (*h);
+ }
+ h ++;
+ p1 ++;
+ r1 --;
+ }
+
+ t1.begin = b1;
+ t1.len = t - b1;
+
+ /* The second string */
+ h = p2;
+ t = b2;
+ while (r2 > 0 && t - b2 < (gint)sizeof (b2)) {
+ if (!g_ascii_isspace (*h)) {
+ *t++ = g_ascii_tolower (*h);
+ }
+ h ++;
+ p2 ++;
+ r2 --;
+ }
+
+ t2.begin = b2;
+ t2.len = t - b2;
+
+ cur_diff += compare_diff_distance_unnormalized (&t1, &t2);
+ }
+
+ if (r1 > 0) {
+ h = p1;
+ while (r1 > 0) {
+ if (!g_ascii_isspace (*h)) {
+ cur_diff ++;
+ }
+ r1 --;
+ h ++;
+ }
+ }
+ else if (r2 > 0) {
+ h = p2;
+ while (r2 > 0) {
+ if (!g_ascii_isspace (*h)) {
+ cur_diff ++;
+ }
+ r2 --;
+ h ++;
+ }
+ }
+
+ return 100 - (2 * cur_diff * 100) / (s1->len + s2->len);
+}
diff --git a/src/libutil/diff.h b/src/libutil/diff.h
new file mode 100644
index 000000000..cea5e5d4a
--- /dev/null
+++ b/src/libutil/diff.h
@@ -0,0 +1,74 @@
+/* Copyright (c) 2010, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Rambler BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#ifndef DIFF_H_
+#define DIFF_H_
+
+#include "config.h"
+#include "fstring.h"
+
+typedef enum
+{
+ DIFF_MATCH = 1,
+ DIFF_DELETE,
+ DIFF_INSERT
+} diff_op;
+
+struct diff_edit
+{
+ gshort op;
+ gint off; /* off ginto s1 if MATCH or DELETE but s2 if INSERT */
+ gint len;
+};
+
+/*
+ * Calculate difference between two strings using diff algorithm
+ * @param a the first line begin
+ * @param aoff the first line offset
+ * @param n the first line length
+ * @param b the second line begin
+ * @param boff the second line offset
+ * @param b the second line length
+ * @param dmax maximum differences number
+ * @param ses here would be stored the shortest script to transform a to b
+ * @param sn here would be stored a number of differences between a and b
+ * @return distance between strings or -1 in case of error
+ */
+gint rspamd_diff(const void *a, gint aoff, gint n, const void *b, gint boff, gint m,
+ gint dmax, GArray *ses, gint *sn);
+
+/*
+ * Calculate distance between two strings (in percentage) using diff algorithm.
+ * @return 100 in case of identical strings and 0 in case of totally different strings.
+ */
+guint32 compare_diff_distance (f_str_t *s1, f_str_t *s2);
+
+/*
+ * Calculate distance between two strings (in percentage) using diff algorithm. Strings are normalized before:
+ * all spaces are removed and all characters are lowercased.
+ * @return 100 in case of identical strings and 0 in case of totally different strings.
+*/
+guint32 compare_diff_distance_normalized (f_str_t *s1, f_str_t *s2);
+
+#endif /* DIFF_H_ */
diff --git a/src/libutil/fstring.c b/src/libutil/fstring.c
new file mode 100644
index 000000000..098824101
--- /dev/null
+++ b/src/libutil/fstring.c
@@ -0,0 +1,461 @@
+/*
+ * Copyright (c) 2009-2012, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "fstring.h"
+
+/*
+ * Search first occurence of character in string
+ */
+ssize_t
+fstrchr (f_str_t * src, gchar c)
+{
+ register size_t cur = 0;
+
+ while (cur < src->len) {
+ if (*(src->begin + cur) == c) {
+ return cur;
+ }
+ cur++;
+ }
+
+ return -1;
+}
+
+/*
+ * Search last occurence of character in string
+ */
+ssize_t
+fstrrchr (f_str_t * src, gchar c)
+{
+ register ssize_t cur = src->len;
+
+ while (cur > 0) {
+ if (*(src->begin + cur) == c) {
+ return cur;
+ }
+ cur--;
+ }
+
+ return -1;
+}
+
+/*
+ * Search for pattern in orig
+ */
+ssize_t
+fstrstr (f_str_t * orig, f_str_t * pattern)
+{
+ register size_t cur = 0, pcur = 0;
+
+ if (pattern->len > orig->len) {
+ return -1;
+ }
+
+ while (cur < orig->len) {
+ if (*(orig->begin + cur) == *pattern->begin) {
+ while (cur < orig->len && pcur < pattern->len) {
+ if (*(orig->begin + cur) != *(pattern->begin + pcur)) {
+ pcur = 0;
+ break;
+ }
+ cur++;
+ pcur++;
+ }
+ return cur - pattern->len;
+ }
+ cur++;
+ }
+
+ return -1;
+
+}
+
+/*
+ * Search for pattern in orig ignoring case
+ */
+ssize_t
+fstrstri (f_str_t * orig, f_str_t * pattern)
+{
+ register size_t cur = 0, pcur = 0;
+
+ if (pattern->len > orig->len) {
+ return -1;
+ }
+
+ while (cur < orig->len) {
+ if (g_ascii_tolower (*(orig->begin + cur)) == g_ascii_tolower (*pattern->begin)) {
+ while (cur < orig->len && pcur < pattern->len) {
+ if (g_ascii_tolower (*(orig->begin + cur)) != g_ascii_tolower (*(pattern->begin + pcur))) {
+ pcur = 0;
+ break;
+ }
+ cur++;
+ pcur++;
+ }
+ return cur - pattern->len;
+ }
+ cur++;
+ }
+
+ return -1;
+
+}
+
+/*
+ * Split string by tokens
+ * word contains parsed word
+ *
+ * Return: -1 - no new words can be extracted
+ * 1 - word was extracted and there are more words
+ * 0 - last word extracted
+ */
+gint
+fstrtok (f_str_t * text, const gchar *sep, f_tok_t * state)
+{
+ register size_t cur;
+ const gchar *csep = sep;
+
+ if (state->pos >= text->len) {
+ return -1;
+ }
+
+ cur = state->pos;
+
+ while (cur < text->len) {
+ while (*csep) {
+ if (*(text->begin + cur) == *csep) {
+ state->word.begin = (text->begin + state->pos);
+ state->word.len = cur - state->pos;
+ state->pos = cur + 1;
+ return 1;
+ }
+ csep++;
+ }
+ csep = sep;
+ cur++;
+ }
+
+ /* Last word */
+ state->word.begin = (text->begin + state->pos);
+ state->word.len = cur - state->pos;
+ state->pos = cur;
+
+ return 0;
+}
+
+/*
+ * Copy one string into other
+ */
+size_t
+fstrcpy (f_str_t * dest, f_str_t * src)
+{
+ register size_t cur = 0;
+
+ if (dest->size < src->len) {
+ return 0;
+ }
+
+ while (cur < src->len && cur < dest->size) {
+ *(dest->begin + cur) = *(src->begin + cur);
+ cur++;
+ }
+
+ return cur;
+}
+
+/*
+ * Concatenate two strings
+ */
+size_t
+fstrcat (f_str_t * dest, f_str_t * src)
+{
+ register size_t cur = 0;
+ gchar *p = dest->begin + dest->len;
+
+ if (dest->size < src->len + dest->len) {
+ return 0;
+ }
+
+ while (cur < src->len) {
+ *p = *(src->begin + cur);
+ p++;
+ cur++;
+ }
+
+ dest->len += src->len;
+
+ return cur;
+
+}
+
+/*
+ * Make copy of string to 0-terminated string
+ */
+gchar *
+fstrcstr (f_str_t * str, rspamd_mempool_t * pool)
+{
+ gchar *res;
+ res = rspamd_mempool_alloc (pool, str->len + 1);
+
+ /* Do not allow multiply \0 characters */
+ memccpy (res, str->begin, '\0', str->len);
+ res[str->len] = 0;
+
+ return res;
+}
+
+/*
+ * Push one character to fstr
+ */
+gint
+fstrpush (f_str_t * dest, gchar c)
+{
+ if (dest->size < dest->len) {
+ /* Need to reallocate string */
+ return 0;
+ }
+
+ *(dest->begin + dest->len) = c;
+ dest->len++;
+ return 1;
+}
+
+/*
+ * Push one character to fstr
+ */
+gint
+fstrpush_unichar (f_str_t * dest, gunichar c)
+{
+ int l;
+ if (dest->size < dest->len) {
+ /* Need to reallocate string */
+ return 0;
+ }
+
+ l = g_unichar_to_utf8 (c, dest->begin + dest->len);
+ dest->len += l;
+ return l;
+}
+
+/*
+ * Allocate memory for f_str_t
+ */
+f_str_t *
+fstralloc (rspamd_mempool_t * pool, size_t len)
+{
+ f_str_t *res = rspamd_mempool_alloc (pool, sizeof (f_str_t));
+
+ res->begin = rspamd_mempool_alloc (pool, len);
+
+ res->size = len;
+ res->len = 0;
+ return res;
+}
+
+/*
+ * Allocate memory for f_str_t from temporary pool
+ */
+f_str_t *
+fstralloc_tmp (rspamd_mempool_t * pool, size_t len)
+{
+ f_str_t *res = rspamd_mempool_alloc_tmp (pool, sizeof (f_str_t));
+
+ res->begin = rspamd_mempool_alloc_tmp (pool, len);
+
+ res->size = len;
+ res->len = 0;
+ return res;
+}
+
+/*
+ * Truncate string to its len
+ */
+f_str_t *
+fstrtruncate (rspamd_mempool_t * pool, f_str_t * orig)
+{
+ f_str_t *res;
+
+ if (orig == NULL || orig->len == 0 || orig->size <= orig->len) {
+ return orig;
+ }
+
+ res = fstralloc (pool, orig->len);
+ if (res == NULL) {
+ return NULL;
+ }
+ fstrcpy (res, orig);
+
+ return res;
+}
+
+/*
+ * Enlarge string to new size
+ */
+f_str_t *
+fstrgrow (rspamd_mempool_t * pool, f_str_t * orig, size_t newlen)
+{
+ f_str_t *res;
+
+ if (orig == NULL || orig->len == 0 || orig->size >= newlen) {
+ return orig;
+ }
+
+ res = fstralloc (pool, newlen);
+ if (res == NULL) {
+ return NULL;
+ }
+ fstrcpy (res, orig);
+
+ return res;
+}
+
+static guint32
+fstrhash_c (gchar c, guint32 hval)
+{
+ guint32 tmp;
+ /*
+ * xor in the current byte against each byte of hval
+ * (which alone gaurantees that every bit of input will have
+ * an effect on the output)
+ */
+ tmp = c & 0xFF;
+ tmp = tmp | (tmp << 8) | (tmp << 16) | (tmp << 24);
+ hval ^= tmp;
+
+ /* add some bits out of the middle as low order bits */
+ hval = hval + ((hval >> 12) & 0x0000ffff);
+
+ /* swap most and min significative bytes */
+ tmp = (hval << 24) | ((hval >> 24) & 0xff);
+ /* zero most and min significative bytes of hval */
+ hval &= 0x00ffff00;
+ hval |= tmp;
+ /*
+ * rotate hval 3 bits to the left (thereby making the
+ * 3rd msb of the above mess the hsb of the output hash)
+ */
+ return (hval << 3) + (hval >> 29);
+}
+
+/*
+ * Return hash value for a string
+ */
+guint32
+fstrhash (f_str_t * str)
+{
+ size_t i;
+ guint32 hval;
+ gchar *c = str->begin;
+
+ if (str == NULL) {
+ return 0;
+ }
+ hval = str->len;
+
+ for (i = 0; i < str->len; i++, c++) {
+ hval = fstrhash_c (*c, hval);
+ }
+ return hval;
+}
+
+/*
+ * Return hash value for a string
+ */
+guint32
+fstrhash_lowercase (f_str_t * str, gboolean is_utf)
+{
+ gsize i;
+ guint32 j, hval;
+ const gchar *p = str->begin, *end = NULL;
+ gchar t;
+ gunichar uc;
+
+ if (str == NULL) {
+ return 0;
+ }
+ hval = str->len;
+
+ if (is_utf) {
+ while (end < str->begin + str->len) {
+ if (!g_utf8_validate (p, str->len, &end)) {
+ return fstrhash_lowercase (str, FALSE);
+ }
+ while (p < end) {
+ uc = g_unichar_tolower (g_utf8_get_char (p));
+ for (j = 0; j < sizeof (gunichar); j ++) {
+ t = (uc >> (j * 8)) & 0xff;
+ if (t != 0) {
+ hval = fstrhash_c (t, hval);
+ }
+ }
+ p = g_utf8_next_char (p);
+ }
+ p = end + 1;
+ }
+
+ }
+ else {
+ for (i = 0; i < str->len; i++, p++) {
+ hval = fstrhash_c (g_ascii_tolower (*p), hval);
+ }
+ }
+
+ return hval;
+}
+
+void
+fstrstrip (f_str_t * str)
+{
+ gchar *p = str->begin;
+ guint r = 0;
+
+ while (r < str->len) {
+ if (g_ascii_isspace (*p)) {
+ p++;
+ r++;
+ }
+ else {
+ break;
+ }
+ }
+
+ if (r > 0) {
+ memmove (str->begin, p, str->len - r);
+ str->len -= r;
+ }
+
+ r = str->len;
+ p = str->begin + str->len;
+ while (r > 0) {
+ if (g_ascii_isspace (*p)) {
+ p--;
+ r--;
+ }
+ else {
+ break;
+ }
+ }
+
+ str->len = r;
+}
diff --git a/src/libutil/fstring.h b/src/libutil/fstring.h
new file mode 100644
index 000000000..bd680e365
--- /dev/null
+++ b/src/libutil/fstring.h
@@ -0,0 +1,120 @@
+/*
+ * Functions for handling with fixed size strings
+ */
+
+#ifndef FSTRING_H
+#define FSTRING_H
+
+#include "config.h"
+#include "mem_pool.h"
+
+#define update_buf_size(x) (x)->free = (x)->buf->size - ((x)->pos - (x)->buf->begin); (x)->buf->len = (x)->pos - (x)->buf->begin
+
+typedef struct f_str_s {
+ gchar *begin;
+ size_t len;
+ size_t size;
+} f_str_t;
+
+typedef struct f_str_buf_s {
+ f_str_t *buf;
+ gchar *pos;
+ size_t free;
+} f_str_buf_t;
+
+typedef struct f_tok_s {
+ f_str_t word;
+ size_t pos;
+} f_tok_t;
+
+/*
+ * Search first occurence of character in string
+ */
+ssize_t fstrchr (f_str_t *src, gchar c);
+
+/*
+ * Search last occurence of character in string
+ */
+ssize_t fstrrchr (f_str_t *src, gchar c);
+
+/*
+ * Search for pattern in orig
+ */
+ssize_t fstrstr (f_str_t *orig, f_str_t *pattern);
+
+/*
+ * Search for pattern in orig ignoring case
+ */
+ssize_t fstrstri (f_str_t *orig, f_str_t *pattern);
+
+/*
+ * Split string by tokens
+ * word contains parsed word
+ */
+gint fstrtok (f_str_t *text, const gchar *sep, f_tok_t *state);
+
+/*
+ * Copy one string into other
+ */
+size_t fstrcpy (f_str_t *dest, f_str_t *src);
+
+/*
+ * Concatenate two strings
+ */
+size_t fstrcat (f_str_t *dest, f_str_t *src);
+
+/*
+ * Push one character to fstr
+ */
+gint fstrpush (f_str_t *dest, gchar c);
+
+/*
+ * Push one character to fstr
+ */
+gint fstrpush_unichar (f_str_t *dest, gunichar c);
+
+/*
+ * Allocate memory for f_str_t
+ */
+f_str_t* fstralloc (rspamd_mempool_t *pool, size_t len);
+
+/*
+ * Allocate memory for f_str_t from temporary pool
+ */
+f_str_t* fstralloc_tmp (rspamd_mempool_t *pool, size_t len);
+
+/*
+ * Truncate string to its len
+ */
+f_str_t* fstrtruncate (rspamd_mempool_t *pool, f_str_t *orig);
+
+/*
+ * Enlarge string to new size
+ */
+f_str_t* fstrgrow (rspamd_mempool_t *pool, f_str_t *orig, size_t newlen);
+
+/*
+ * Return specified character
+ */
+#define fstridx(str, pos) *((str)->begin + (pos))
+
+/*
+ * Return fast hash value for fixed string
+ */
+guint32 fstrhash (f_str_t *str);
+
+/*
+ * Return fast hash value for fixed string converted to lowercase
+ */
+guint32 fstrhash_lowercase (f_str_t *str, gboolean is_utf);
+/*
+ * Make copy of string to 0-terminated string
+ */
+gchar* fstrcstr (f_str_t *str, rspamd_mempool_t *pool);
+
+/*
+ * Strip fstr string from space symbols
+ */
+void fstrstrip (f_str_t *str);
+
+#endif
diff --git a/src/libutil/fuzzy.c b/src/libutil/fuzzy.c
new file mode 100644
index 000000000..7e8a01ce3
--- /dev/null
+++ b/src/libutil/fuzzy.c
@@ -0,0 +1,498 @@
+/*
+ * Copyright (c) 2009-2012, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include "config.h"
+#include "mem_pool.h"
+#include "fstring.h"
+#include "fuzzy.h"
+#include "message.h"
+#include "url.h"
+#include "main.h"
+
+#define ROLL_WINDOW_SIZE 9
+#define MIN_FUZZY_BLOCK_SIZE 3
+#define HASH_INIT 0x28021967
+
+static const char *b64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+
+struct roll_state {
+ guint32 h[3];
+ gchar window[ROLL_WINDOW_SIZE];
+ gint n;
+};
+
+static struct roll_state rs;
+
+
+/* Rolling hash function based on Adler-32 checksum */
+static guint32
+fuzzy_roll_hash (guint c)
+{
+ /* Check window position */
+ if (rs.n == ROLL_WINDOW_SIZE) {
+ rs.n = 0;
+ }
+
+ rs.h[1] -= rs.h[0];
+ rs.h[1] += ROLL_WINDOW_SIZE * c;
+
+ rs.h[0] += c;
+ rs.h[0] -= rs.window[rs.n];
+
+ /* Save current symbol */
+ rs.window[rs.n] = c;
+ rs.n++;
+
+ rs.h[2] <<= 5;
+ rs.h[2] ^= c;
+
+ return rs.h[0] + rs.h[1] + rs.h[2];
+}
+
+/* A simple non-rolling hash, based on the FNV hash */
+static guint32
+fuzzy_fnv_hash (guint c, guint32 hval)
+{
+ hval ^= c;
+ hval += (hval << 1) + (hval << 4) + (hval << 7) + (hval << 8) + (hval << 24);
+ return hval;
+}
+
+/* Calculate blocksize depending on length of input */
+static guint32
+fuzzy_blocksize (guint32 len)
+{
+ guint32 nlen = MIN_FUZZY_BLOCK_SIZE;
+
+ while (nlen * (FUZZY_HASHLEN - 1) < len) {
+ nlen *= 2;
+ }
+ return nlen;
+}
+
+
+/* Update hash with new symbol */
+static void
+fuzzy_update (fuzzy_hash_t * h, guint c)
+{
+ h->rh = fuzzy_roll_hash (c);
+ h->h = fuzzy_fnv_hash (c, h->h);
+
+ if (h->rh % h->block_size == (h->block_size - 1)) {
+ h->hash_pipe[h->hi] = b64[h->h % 64];
+ if (h->hi < FUZZY_HASHLEN - 2) {
+ h->h = HASH_INIT;
+ h->hi++;
+ }
+ }
+}
+
+static void
+fuzzy_update2 (fuzzy_hash_t * h1, fuzzy_hash_t *h2, guint c)
+{
+ h1->rh = fuzzy_roll_hash (c);
+ h1->h = fuzzy_fnv_hash (c, h1->h);
+ h2->rh = h1->rh;
+ h2->h = fuzzy_fnv_hash (c, h2->h);
+
+ if (h1->rh % h1->block_size == (h1->block_size - 1)) {
+ h1->hash_pipe[h1->hi] = b64[h1->h % 64];
+ if (h1->hi < FUZZY_HASHLEN - 2) {
+ h1->h = HASH_INIT;
+ h1->hi++;
+ }
+ }
+ if (h2->rh % h2->block_size == (h2->block_size - 1)) {
+ h2->hash_pipe[h2->hi] = b64[h2->h % 64];
+ if (h2->hi < FUZZY_HASHLEN - 2) {
+ h2->h = HASH_INIT;
+ h2->hi++;
+ }
+ }
+}
+
+/*
+ * Levenshtein distance between string1 and string2.
+ *
+ * Replace cost is normally 1, and 2 with nonzero xcost.
+ */
+guint32
+lev_distance (gchar *s1, gint len1, gchar *s2, gint len2)
+{
+ gint i;
+ gint *row; /* we only need to keep one row of costs */
+ gint *end;
+ gint half, nx;
+ gchar *sx, *char2p, char1;
+ gint *p, D, x, offset, c3;
+
+ /* strip common prefix */
+ while (len1 > 0 && len2 > 0 && *s1 == *s2) {
+ len1--;
+ len2--;
+ s1++;
+ s2++;
+ }
+
+ /* strip common suffix */
+ while (len1 > 0 && len2 > 0 && s1[len1 - 1] == s2[len2 - 1]) {
+ len1--;
+ len2--;
+ }
+
+ /* catch trivial cases */
+ if (len1 == 0) {
+ return len2;
+ }
+
+ if (len2 == 0) {
+ return len1;
+ }
+
+ /* make the inner cycle (i.e. string2) the longer one */
+ if (len1 > len2) {
+ nx = len1;
+ sx = s1;
+ len1 = len2;
+ len2 = nx;
+ s1 = s2;
+ s2 = sx;
+ }
+ /* check len1 == 1 separately */
+ if (len1 == 1) {
+ return len2 - (memchr (s2, *s1, len2) != NULL);
+ }
+
+ len1++;
+ len2++;
+ half = len1 >> 1;
+
+ /* initalize first row */
+ row = g_malloc (len2 * sizeof (gint));
+ end = row + len2 - 1;
+ for (i = 0; i < len2; i++) {
+ row[i] = i;
+ }
+
+ /* in this case we don't have to scan two corner triangles (of size len1/2)
+ * in the matrix because no best path can go throught them. note this
+ * breaks when len1 == len2 == 2 so the memchr() special case above is
+ * necessary */
+ row[0] = len1 - half - 1;
+ for (i = 1; i < len1; i++) {
+ char1 = s1[i - 1];
+ /* skip the upper triangle */
+ if (i >= len1 - half) {
+ offset = i - (len1 - half);
+ char2p = s2 + offset;
+ p = row + offset;
+ c3 = *(p++) + (char1 != *(char2p++));
+ x = *p;
+ x++;
+ D = x;
+ if (x > c3)
+ x = c3;
+ *(p++) = x;
+ }
+ else {
+ p = row + 1;
+ char2p = s2;
+ D = x = i;
+ }
+ /* skip the lower triangle */
+ if (i <= half + 1)
+ end = row + len2 + i - half - 2;
+ /* main */
+ while (p <= end) {
+ c3 = --D + (char1 != *(char2p++));
+ x++;
+ if (x > c3)
+ x = c3;
+ D = *p;
+ D++;
+ if (x > D)
+ x = D;
+ *(p++) = x;
+ }
+ /* lower triangle sentinel */
+ if (i <= half) {
+ c3 = --D + (char1 != *char2p);
+ x++;
+ if (x > c3)
+ x = c3;
+ *p = x;
+ }
+ }
+
+ i = *end;
+ g_free (row);
+ return i;
+}
+
+/* Calculate fuzzy hash for specified string */
+fuzzy_hash_t *
+fuzzy_init (f_str_t * in, rspamd_mempool_t * pool)
+{
+ fuzzy_hash_t *new;
+ guint i, repeats = 0;
+ gchar *c = in->begin, last = '\0';
+ gsize real_len = 0;
+
+ new = rspamd_mempool_alloc0 (pool, sizeof (fuzzy_hash_t));
+ bzero (&rs, sizeof (rs));
+ for (i = 0; i < in->len; i++) {
+ if (*c == last) {
+ repeats++;
+ }
+ else {
+ repeats = 0;
+ }
+ if (!g_ascii_isspace (*c) && !g_ascii_ispunct (*c) && repeats < 3) {
+ real_len ++;
+ }
+ last = *c;
+ c++;
+ }
+
+ new->block_size = fuzzy_blocksize (real_len);
+ c = in->begin;
+
+ for (i = 0; i < in->len; i++) {
+ if (*c == last) {
+ repeats++;
+ }
+ else {
+ repeats = 0;
+ }
+ if (!g_ascii_isspace (*c) && !g_ascii_ispunct (*c) && repeats < 3) {
+ fuzzy_update (new, *c);
+ }
+ last = *c;
+ c++;
+ }
+
+ /* Check whether we have more bytes in a rolling window */
+ if (new->rh != 0) {
+ new->hash_pipe[new->hi] = b64[new->h % 64];
+ }
+
+ return new;
+}
+
+fuzzy_hash_t *
+fuzzy_init_byte_array (GByteArray * in, rspamd_mempool_t * pool)
+{
+ f_str_t f;
+
+ f.begin = (gchar *)in->data;
+ f.len = in->len;
+
+ return fuzzy_init (&f, pool);
+}
+
+void
+fuzzy_init_part (struct mime_text_part *part, rspamd_mempool_t *pool, gsize max_diff)
+{
+ fuzzy_hash_t *new, *new2;
+ gchar *c, *end, *begin;
+ gsize real_len = 0, len = part->content->len;
+ GList *cur_offset;
+ struct process_exception *cur_ex = NULL;
+ gunichar uc;
+ gboolean write_diff = FALSE;
+
+ cur_offset = part->urls_offset;
+ if (cur_offset != NULL) {
+ cur_ex = cur_offset->data;
+ }
+
+ begin = (gchar *)part->content->data;
+ c = begin;
+ new = rspamd_mempool_alloc0 (pool, sizeof (fuzzy_hash_t));
+ new2 = rspamd_mempool_alloc0 (pool, sizeof (fuzzy_hash_t));
+ bzero (&rs, sizeof (rs));
+ end = c + len;
+
+ if (part->is_utf) {
+ while (c < end) {
+ if (cur_ex != NULL && (gint)cur_ex->pos == c - begin) {
+ c += cur_ex->len + 1;
+ cur_offset = g_list_next (cur_offset);
+ if (cur_offset != NULL) {
+ cur_ex = cur_offset->data;
+ }
+ }
+ else {
+ uc = g_utf8_get_char (c);
+ if (g_unichar_isalnum (uc)) {
+ real_len ++;
+ }
+ c = g_utf8_next_char (c);
+ }
+ }
+ }
+ else {
+ while (c < end) {
+ if (cur_ex != NULL && (gint)cur_ex->pos == c - begin) {
+ c += cur_ex->len + 1;
+ cur_offset = g_list_next (cur_offset);
+ if (cur_offset != NULL) {
+ cur_ex = cur_offset->data;
+ }
+ }
+ else {
+ if (!g_ascii_isspace (*c) && !g_ascii_ispunct (*c)) {
+ real_len ++;
+ }
+ c++;
+ }
+ }
+ }
+
+ write_diff = real_len > 0 && real_len < max_diff;
+
+ if (write_diff) {
+ part->diff_str = fstralloc (pool, real_len);
+ }
+ else {
+ part->diff_str = NULL;
+ }
+
+ new->block_size = fuzzy_blocksize (real_len);
+ new2->block_size = new->block_size * 2;
+
+ cur_offset = part->urls_offset;
+ if (cur_offset != NULL) {
+ cur_ex = cur_offset->data;
+ }
+
+ begin = (gchar *)part->content->data;
+ c = begin;
+ end = c + len;
+ if (part->is_utf) {
+
+ while (c < end) {
+ if (cur_ex != NULL && (gint)cur_ex->pos == c - begin) {
+ c += cur_ex->len + 1;
+ cur_offset = g_list_next (cur_offset);
+ if (cur_offset != NULL) {
+ cur_ex = cur_offset->data;
+ }
+ }
+ else {
+ uc = g_utf8_get_char (c);
+ if (g_unichar_isalnum (uc)) {
+ fuzzy_update2 (new, new2, uc);
+ if (write_diff) {
+ fstrpush_unichar (part->diff_str, uc);
+ }
+ }
+ c = g_utf8_next_char (c);
+ }
+ }
+ }
+ else {
+ while (c < end) {
+ if (cur_ex != NULL && (gint)cur_ex->pos == c - begin) {
+ c += cur_ex->len + 1;
+ cur_offset = g_list_next (cur_offset);
+ if (cur_offset != NULL) {
+ cur_ex = cur_offset->data;
+ }
+ }
+ else {
+ if (!g_ascii_isspace (*c) && !g_ascii_ispunct (*c)) {
+ fuzzy_update2 (new, new2, *c);
+ if (write_diff) {
+ fstrpush (part->diff_str, *c);
+ }
+ }
+ c++;
+ }
+ }
+ }
+
+ /* Check whether we have more bytes in a rolling window */
+ if (new->rh != 0) {
+ new->hash_pipe[new->hi] = b64[new->h % 64];
+ }
+ if (new2->rh != 0) {
+ new2->hash_pipe[new2->hi] = b64[new2->h % 64];
+ }
+
+ part->fuzzy = new;
+ part->double_fuzzy = new2;
+}
+
+/* Compare score of difference between two hashes 0 - different hashes, 100 - identical hashes */
+gint
+fuzzy_compare_hashes (fuzzy_hash_t * h1, fuzzy_hash_t * h2)
+{
+ gint res, l1, l2;
+
+ /* If we have hashes of different size, input strings are too different */
+ if (h1->block_size != h2->block_size) {
+ return 0;
+ }
+
+ l1 = strlen (h1->hash_pipe);
+ l2 = strlen (h2->hash_pipe);
+
+ if (l1 == 0 || l2 == 0) {
+ if (l1 == 0 && l2 == 0) {
+ return 100;
+ }
+ else {
+ return 0;
+ }
+ }
+
+ res = lev_distance (h1->hash_pipe, l1, h2->hash_pipe, l2);
+ res = 100 - (2 * res * 100) / (l1 + l2);
+
+ return res;
+}
+
+gint
+fuzzy_compare_parts (struct mime_text_part *p1, struct mime_text_part *p2)
+{
+ if (p1->fuzzy != NULL && p2->fuzzy != NULL) {
+ if (p1->fuzzy->block_size == p2->fuzzy->block_size) {
+ return fuzzy_compare_hashes (p1->fuzzy, p2->fuzzy);
+ }
+ else if (p1->double_fuzzy->block_size == p2->fuzzy->block_size) {
+ return fuzzy_compare_hashes (p1->double_fuzzy, p2->fuzzy);
+ }
+ else if (p2->double_fuzzy->block_size == p1->fuzzy->block_size) {
+ return fuzzy_compare_hashes (p2->double_fuzzy, p1->fuzzy);
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * vi:ts=4
+ */
diff --git a/src/libutil/fuzzy.h b/src/libutil/fuzzy.h
new file mode 100644
index 000000000..c226c5765
--- /dev/null
+++ b/src/libutil/fuzzy.h
@@ -0,0 +1,69 @@
+/**
+ * @file fuzzy.h
+ * Fuzzy hashes API
+ */
+
+#ifndef RSPAMD_FUZZY_H
+#define RSPAMD_FUZZY_H
+
+#include "config.h"
+#include "mem_pool.h"
+#include "fstring.h"
+
+#define FUZZY_HASHLEN 64
+
+typedef struct fuzzy_hash_s {
+ gchar hash_pipe[FUZZY_HASHLEN]; /**< result hash */
+ guint32 block_size; /**< current blocksize */
+ guint32 rh; /**< roll hash value */
+ guint32 h; /**< hash of block */
+ guint32 hi; /**< current index in hash pipe */
+} fuzzy_hash_t;
+
+struct mime_text_part;
+
+/**
+ * Calculate fuzzy hash for specified string
+ * @param in input string
+ * @param pool pool object
+ * @return fuzzy_hash object allocated in pool
+ */
+fuzzy_hash_t * fuzzy_init (f_str_t *in, rspamd_mempool_t *pool);
+/**
+ * Calculate fuzzy hash for specified byte array
+ * @param in input string
+ * @param pool pool object
+ * @return fuzzy_hash object allocated in pool
+ */
+fuzzy_hash_t * fuzzy_init_byte_array (GByteArray *in, rspamd_mempool_t *pool);
+
+/**
+ * Calculate fuzzy hash for specified text part
+ * @param part text part object
+ * @param pool pool object
+ * @param max_diff maximum text length to use diff algorithm in comparasions
+ * @return fuzzy_hash object allocated in pool
+ */
+void fuzzy_init_part (struct mime_text_part *part, rspamd_mempool_t *pool, gsize max_diff);
+
+/**
+ * Compare score of difference between two hashes
+ * @param h1 first hash
+ * @param h2 second hash
+ * @return result in percents 0 - different hashes, 100 - identical hashes
+ */
+gint fuzzy_compare_hashes (fuzzy_hash_t *h1, fuzzy_hash_t *h2);
+
+/*
+ * Compare two text parts and return percents of difference
+ */
+gint fuzzy_compare_parts (struct mime_text_part *p1, struct mime_text_part *p2);
+
+/*
+ * Calculate levenstein distance between two strings. Note: this algorithm should be used
+ * only for short texts - it runs too slow on long ones.
+ */
+guint32 lev_distance (gchar *s1, gint len1, gchar *s2, gint len2);
+
+
+#endif
diff --git a/src/libutil/hash.c b/src/libutil/hash.c
new file mode 100644
index 000000000..3bb381651
--- /dev/null
+++ b/src/libutil/hash.c
@@ -0,0 +1,489 @@
+/*
+ * Copyright (c) 2009-2012, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "hash.h"
+
+#define HASH_TABLE_MIN_SIZE 19
+#define HASH_TABLE_MAX_SIZE 13845163
+
+/*
+ * Performs a lookup in the hash table. Virtually all hash operations
+ * will use this function internally.
+ */
+static inline struct rspamd_hash_node **
+rspamd_hash_lookup_node (rspamd_hash_t * hash, gconstpointer key, guint * hash_return)
+{
+ struct rspamd_hash_node **node_ptr, *node;
+ guint hash_value;
+ hash_value = (*hash->hash_func) (key);
+
+ if (hash->shared) {
+ rspamd_mempool_rlock_rwlock (hash->lock);
+ }
+ node_ptr = &hash->nodes[hash_value % hash->size];
+
+ if (hash_return)
+ *hash_return = hash_value;
+
+ /* Hash table lookup needs to be fast.
+ * We therefore remove the extra conditional of testing
+ * whether to call the key_equal_func or not from
+ * the inner loop.
+ *
+ * Additional optimisation: first check if our full hash
+ * values are equal so we can avoid calling the full-blown
+ * key equality function in most cases.
+ */
+ if (hash->key_equal_func) {
+ while ((node = *node_ptr)) {
+ if (node->key_hash == hash_value && hash->key_equal_func (node->key, key)) {
+ break;
+ }
+ node_ptr = &(*node_ptr)->next;
+ }
+ }
+ else {
+ while ((node = *node_ptr)) {
+ if (node->key == key) {
+ break;
+ }
+ node_ptr = &(*node_ptr)->next;
+ }
+ }
+ if (hash->shared) {
+ rspamd_mempool_runlock_rwlock (hash->lock);
+ }
+ return node_ptr;
+}
+
+/*
+ * Removes a node from the hash table and updates the node count.
+ * No table resize is performed.
+ */
+static void
+rspamd_hash_remove_node (rspamd_hash_t * hash, struct rspamd_hash_node ***node_ptr_ptr)
+{
+ struct rspamd_hash_node **node_ptr, *node;
+
+ if (hash->shared) {
+ rspamd_mempool_wlock_rwlock (hash->lock);
+ }
+ node_ptr = *node_ptr_ptr;
+ node = *node_ptr;
+
+ *node_ptr = node->next;
+
+ hash->nnodes--;
+ if (hash->shared) {
+ rspamd_mempool_wunlock_rwlock (hash->lock);
+ }
+}
+
+/*
+ * Resizes the hash table to the optimal size based on the number of
+ * nodes currently held.
+ */
+static void
+rspamd_hash_resize (rspamd_hash_t * hash)
+{
+ struct rspamd_hash_node **new_nodes;
+ struct rspamd_hash_node *node, *next;
+ guint hash_val;
+ gint new_size, i;
+
+ new_size = g_spaced_primes_closest (hash->nnodes);
+ new_size = CLAMP (new_size, HASH_TABLE_MIN_SIZE, HASH_TABLE_MAX_SIZE);
+
+ if (hash->shared) {
+ new_nodes = rspamd_mempool_alloc_shared (hash->pool, sizeof (struct rspamd_hash_node *) * new_size);
+ }
+ else {
+ new_nodes = rspamd_mempool_alloc (hash->pool, sizeof (struct rspamd_hash_node *) * new_size);
+ }
+
+ if (hash->shared) {
+ rspamd_mempool_wlock_rwlock (hash->lock);
+ }
+
+ for (i = 0; i < hash->size; i++) {
+ for (node = hash->nodes[i]; node; node = next) {
+ next = node->next;
+ hash_val = node->key_hash % new_size;
+ node->next = new_nodes[hash_val];
+ new_nodes[hash_val] = node;
+ }
+ }
+
+ hash->nodes = new_nodes;
+ hash->size = new_size;
+
+ if (hash->shared) {
+ rspamd_mempool_wunlock_rwlock (hash->lock);
+ }
+}
+
+/*
+ * Resizes the hash table, if needed.
+ */
+static inline void
+rspamd_hash_maybe_resize (rspamd_hash_t * hash)
+{
+ gint nnodes = hash->nnodes;
+ gint size = hash->size;
+
+ if ((size >= 3 * nnodes && size > HASH_TABLE_MIN_SIZE) || (3 * size <= nnodes && size < HASH_TABLE_MAX_SIZE)) {
+ rspamd_hash_resize (hash);
+ }
+}
+
+/* Create new hash in specified pool */
+rspamd_hash_t *
+rspamd_hash_new (rspamd_mempool_t * pool, GHashFunc hash_func, GEqualFunc key_equal_func)
+{
+ rspamd_hash_t *hash;
+
+ hash = rspamd_mempool_alloc (pool, sizeof (rspamd_hash_t));
+ hash->size = HASH_TABLE_MIN_SIZE;
+ hash->nnodes = 0;
+ hash->hash_func = hash_func ? hash_func : g_direct_hash;
+ hash->key_equal_func = key_equal_func;
+ hash->nodes = rspamd_mempool_alloc0 (pool, sizeof (struct rspamd_hash_node *) * hash->size);
+ hash->shared = 0;
+ hash->pool = pool;
+
+ return hash;
+}
+
+/*
+ * Create new hash in specified pool using shared memory
+ */
+rspamd_hash_t *
+rspamd_hash_new_shared (rspamd_mempool_t * pool, GHashFunc hash_func, GEqualFunc key_equal_func, gint size)
+{
+ rspamd_hash_t *hash;
+
+ hash = rspamd_mempool_alloc_shared (pool, sizeof (rspamd_hash_t));
+ hash->size = size;
+ hash->nnodes = 0;
+ hash->hash_func = hash_func ? hash_func : g_direct_hash;
+ hash->key_equal_func = key_equal_func;
+ hash->nodes = rspamd_mempool_alloc0_shared (pool, sizeof (struct rspamd_hash_node *) * hash->size);
+ hash->shared = 1;
+ /* Get mutex from pool for locking on insert/remove operations */
+ hash->lock = rspamd_mempool_get_rwlock (pool);
+ hash->pool = pool;
+
+ return hash;
+}
+
+/*
+ * Insert item in hash
+ */
+void
+rspamd_hash_insert (rspamd_hash_t * hash, gpointer key, gpointer value)
+{
+ struct rspamd_hash_node **node_ptr, *node;
+ guint key_hash;
+
+ g_return_if_fail (hash != NULL);
+ node_ptr = rspamd_hash_lookup_node (hash, key, &key_hash);
+
+ if (hash->shared) {
+ rspamd_mempool_wlock_rwlock (hash->lock);
+ }
+ if ((node = *node_ptr)) {
+ node->key = key;
+ node->value = value;
+ }
+ else {
+ if (hash->shared) {
+ node = rspamd_mempool_alloc_shared (hash->pool, sizeof (struct rspamd_hash_node));
+ }
+ else {
+ node = rspamd_mempool_alloc (hash->pool, sizeof (struct rspamd_hash_node));
+ }
+
+ node->key = key;
+ node->value = value;
+ node->key_hash = key_hash;
+ node->next = NULL;
+
+ *node_ptr = node;
+ hash->nnodes++;
+ }
+ if (hash->shared) {
+ rspamd_mempool_wunlock_rwlock (hash->lock);
+ }
+
+ if (!hash->shared) {
+ rspamd_hash_maybe_resize (hash);
+ }
+}
+
+/*
+ * Remove item from hash
+ */
+gboolean
+rspamd_hash_remove (rspamd_hash_t * hash, gpointer key)
+{
+ struct rspamd_hash_node **node_ptr;
+
+ g_return_val_if_fail (hash != NULL, FALSE);
+
+ node_ptr = rspamd_hash_lookup_node (hash, key, NULL);
+ if (*node_ptr == NULL)
+ return FALSE;
+
+ rspamd_hash_remove_node (hash, &node_ptr);
+ rspamd_hash_maybe_resize (hash);
+
+ return TRUE;
+}
+
+/*
+ * Lookup item from hash
+ */
+gpointer
+rspamd_hash_lookup (rspamd_hash_t * hash, gpointer key)
+{
+ struct rspamd_hash_node *node;
+ g_return_val_if_fail (hash != NULL, NULL);
+
+ node = *rspamd_hash_lookup_node (hash, key, NULL);
+
+ return node ? node->value : NULL;
+}
+
+/*
+ * Iterate throught hash
+ */
+void
+rspamd_hash_foreach (rspamd_hash_t * hash, GHFunc func, gpointer user_data)
+{
+ struct rspamd_hash_node *node;
+ gint i;
+
+ g_return_if_fail (hash != NULL);
+ g_return_if_fail (func != NULL);
+
+ if (hash->shared) {
+ rspamd_mempool_rlock_rwlock (hash->lock);
+ }
+ for (i = 0; i < hash->size; i++) {
+ for (node = hash->nodes[i]; node; node = node->next) {
+ (*func) (node->key, node->value, user_data);
+ }
+ }
+ if (hash->shared) {
+ rspamd_mempool_runlock_rwlock (hash->lock);
+ }
+}
+
+/**
+ * LRU hashing
+ */
+
+static void
+rspamd_lru_hash_destroy_node (gpointer v)
+{
+ rspamd_lru_element_t *node = v;
+
+ if (node->hash->value_destroy) {
+ node->hash->value_destroy (node->data);
+ }
+ g_queue_delete_link (node->hash->q, node->link);
+ g_slice_free1 (sizeof (rspamd_lru_element_t), node);
+}
+
+static rspamd_lru_element_t*
+rspamd_lru_create_node (rspamd_lru_hash_t *hash, gpointer key, gpointer value, time_t now, guint ttl)
+{
+ rspamd_lru_element_t *node;
+
+ node = g_slice_alloc (sizeof (rspamd_lru_element_t));
+ node->data = value;
+ node->key = key;
+ node->store_time = now;
+ node->ttl = ttl;
+ node->hash = hash;
+
+ return node;
+}
+
+/**
+ * Create new lru hash with GHashTable as storage
+ * @param maxsize maximum elements in a hash
+ * @param maxage maximum age of elemnt
+ * @param hash_func pointer to hash function
+ * @param key_equal_func pointer to function for comparing keys
+ * @return new rspamd_hash object
+ */
+rspamd_lru_hash_t*
+rspamd_lru_hash_new (GHashFunc hash_func, GEqualFunc key_equal_func, gint maxsize, gint maxage,
+ GDestroyNotify key_destroy, GDestroyNotify value_destroy)
+{
+ rspamd_lru_hash_t *new;
+
+ new = g_malloc (sizeof (rspamd_lru_hash_t));
+ new->storage = g_hash_table_new_full (hash_func, key_equal_func, key_destroy, rspamd_lru_hash_destroy_node);
+ new->maxage = maxage;
+ new->maxsize = maxsize;
+ new->value_destroy = value_destroy;
+ new->key_destroy = NULL;
+ new->q = g_queue_new ();
+ new->insert_func = (lru_cache_insert_func)g_hash_table_replace;
+ new->lookup_func = (lru_cache_lookup_func)g_hash_table_lookup;
+ new->delete_func = (lru_cache_delete_func)g_hash_table_remove;
+ new->destroy_func = (lru_cache_destroy_func)g_hash_table_destroy;
+
+ return new;
+}
+/**
+ * Create new lru hash with custom storage
+ * @param maxsize maximum elements in a hash
+ * @param maxage maximum age of elemnt
+ * @param hash_func pointer to hash function
+ * @param key_equal_func pointer to function for comparing keys
+ * @return new rspamd_hash object
+ */
+rspamd_lru_hash_t*
+rspamd_lru_hash_new_full (GHashFunc hash_func, GEqualFunc key_equal_func,
+ gint maxsize, gint maxage, GDestroyNotify key_destroy, GDestroyNotify value_destroy,
+ gpointer storage, lru_cache_insert_func insert_func, lru_cache_lookup_func lookup_func,
+ lru_cache_delete_func delete_func)
+{
+ rspamd_lru_hash_t *new;
+
+ new = g_malloc (sizeof (rspamd_lru_hash_t));
+ new->storage = storage;
+ new->maxage = maxage;
+ new->maxsize = maxsize;
+ new->value_destroy = value_destroy;
+ new->key_destroy = key_destroy;
+ new->q = g_queue_new ();
+ new->insert_func = insert_func;
+ new->lookup_func = lookup_func;
+ new->delete_func = delete_func;
+ new->destroy_func = NULL;
+
+ return new;
+}
+
+/**
+ * Lookup item from hash
+ * @param hash hash object
+ * @param key key to find
+ * @return value of key or NULL if key is not found
+ */
+gpointer
+rspamd_lru_hash_lookup (rspamd_lru_hash_t *hash, gpointer key, time_t now)
+{
+ rspamd_lru_element_t *res;
+
+ if ((res = hash->lookup_func (hash->storage, key)) != NULL) {
+ if (res->ttl != 0) {
+ if (now - res->store_time > res->ttl) {
+ hash->delete_func (hash->storage, key);
+ return NULL;
+ }
+ }
+ if (hash->maxage > 0) {
+ if (now - res->store_time > hash->maxage) {
+ res = g_queue_peek_tail (hash->q);
+ /* Expire elements from queue tail */
+ while (res != NULL && now - res->store_time > hash->maxage) {
+ hash->delete_func (hash->storage, res->key);
+ res = g_queue_peek_tail (hash->q);
+ }
+
+ return NULL;
+ }
+ }
+ return res->data;
+ }
+
+ return NULL;
+}
+/**
+ * Insert item in hash
+ * @param hash hash object
+ * @param key key to insert
+ * @param value value of key
+ */
+void
+rspamd_lru_hash_insert (rspamd_lru_hash_t *hash, gpointer key, gpointer value,
+ time_t now, guint ttl)
+{
+ rspamd_lru_element_t *res;
+ gint removed = 0;
+
+ if ((res = hash->lookup_func (hash->storage, key)) != NULL) {
+ hash->delete_func (hash->storage, res->key);
+ }
+ else {
+ if (hash->maxsize > 0 &&
+ (gint)g_queue_get_length (hash->q) >= hash->maxsize) {
+ /* Expire some elements */
+ res = g_queue_peek_tail (hash->q);
+ if (hash->maxage > 0) {
+ while (res != NULL && now - res->store_time > hash->maxage) {
+ if (res->key != NULL) {
+ hash->delete_func (hash->storage, res->key);
+ }
+ else {
+ break;
+ }
+ res = g_queue_peek_tail (hash->q);
+ removed ++;
+ }
+ }
+ if (removed == 0) {
+ /* Remove explicitly */
+ if (res->key != NULL) {
+ hash->delete_func (hash->storage, res->key);
+ }
+ }
+ }
+ }
+
+ res = rspamd_lru_create_node (hash, key, value, now, ttl);
+ hash->insert_func (hash->storage, key, res);
+ g_queue_push_head (hash->q, res);
+ res->link = g_queue_peek_head_link (hash->q);
+}
+
+void
+rspamd_lru_hash_destroy (rspamd_lru_hash_t *hash)
+{
+ if (hash->destroy_func) {
+ hash->destroy_func (hash->storage);
+ }
+ g_queue_free (hash->q);
+ g_free (hash);
+}
+
+/*
+ * vi:ts=4
+ */
diff --git a/src/libutil/hash.h b/src/libutil/hash.h
new file mode 100644
index 000000000..c5d4639af
--- /dev/null
+++ b/src/libutil/hash.h
@@ -0,0 +1,160 @@
+/**
+ * @file hash.h
+ * Hash table implementation that allows using memory pools for storage as well as using
+ * shared memory for this purpose
+ */
+
+#ifndef RSPAMD_HASH_H
+#define RSPAMD_HASH_H
+
+#include "mem_pool.h"
+
+struct rspamd_hash_node {
+ gpointer key;
+ gpointer value;
+ guint key_hash;
+ struct rspamd_hash_node *next;
+};
+
+typedef struct rspamd_hash_s {
+ gint size;
+ gint nnodes;
+ struct rspamd_hash_node **nodes;
+
+ GHashFunc hash_func;
+ GEqualFunc key_equal_func;
+ gint shared;
+ rspamd_mempool_rwlock_t *lock;
+ rspamd_mempool_t *pool;
+} rspamd_hash_t;
+
+typedef void (*lru_cache_insert_func)(gpointer storage, gpointer key, gpointer value);
+typedef gpointer (*lru_cache_lookup_func)(gpointer storage, gpointer key);
+typedef gboolean (*lru_cache_delete_func)(gpointer storage, gpointer key);
+typedef void (*lru_cache_destroy_func)(gpointer storage);
+
+typedef struct rspamd_lru_hash_s {
+ gint maxsize;
+ gint maxage;
+ GDestroyNotify value_destroy;
+ GDestroyNotify key_destroy;
+ GQueue *q;
+ gpointer storage;
+ lru_cache_insert_func insert_func;
+ lru_cache_lookup_func lookup_func;
+ lru_cache_delete_func delete_func;
+ lru_cache_destroy_func destroy_func;
+} rspamd_lru_hash_t;
+
+typedef struct rspamd_lru_element_s {
+ gpointer data;
+ gpointer key;
+ time_t store_time;
+ guint ttl;
+ rspamd_lru_hash_t *hash;
+ GList *link;
+} rspamd_lru_element_t;
+
+
+#define rspamd_hash_size(x) (x)->nnodes
+
+/**
+ * Create new hash in specified pool
+ * @param pool memory pool object
+ * @param hash_func pointer to hash function
+ * @param key_equal_func pointer to function for comparing keys
+ * @return new rspamd_hash object
+ */
+rspamd_hash_t* rspamd_hash_new (rspamd_mempool_t *pool, GHashFunc hash_func, GEqualFunc key_equal_func);
+
+/**
+ * Create new hash in specified pool using shared memory
+ * @param pool memory pool object
+ * @param hash_func pointer to hash function
+ * @param key_equal_func pointer to function for comparing keys
+ * @return new rspamd_hash object
+ */
+rspamd_hash_t* rspamd_hash_new_shared (rspamd_mempool_t *pool, GHashFunc hash_func, GEqualFunc key_equal_func, gint size);
+
+/**
+ * Insert item in hash
+ * @param hash hash object
+ * @param key key to insert
+ * @param value value of key
+ */
+void rspamd_hash_insert (rspamd_hash_t *hash, gpointer key, gpointer value);
+
+/**
+ * Remove item from hash
+ * @param hash hash object
+ * @param key key to delete
+ */
+gboolean rspamd_hash_remove (rspamd_hash_t *hash, gpointer key);
+
+/**
+ * Lookup item from hash
+ * @param hash hash object
+ * @param key key to find
+ * @return value of key or NULL if key is not found
+ */
+gpointer rspamd_hash_lookup (rspamd_hash_t *hash, gpointer key);
+
+/**
+ * Iterate throught hash
+ * @param hash hash object
+ * @param func user's function that would be called for each key/value pair
+ * @param user_data pointer to user's data that would be passed to user's function
+ */
+void rspamd_hash_foreach (rspamd_hash_t *hash, GHFunc func, gpointer user_data);
+
+/**
+ * Create new lru hash
+ * @param maxsize maximum elements in a hash
+ * @param maxage maximum age of elemnt
+ * @param hash_func pointer to hash function
+ * @param key_equal_func pointer to function for comparing keys
+ * @return new rspamd_hash object
+ */
+rspamd_lru_hash_t* rspamd_lru_hash_new (GHashFunc hash_func, GEqualFunc key_equal_func,
+ gint maxsize, gint maxage, GDestroyNotify key_destroy, GDestroyNotify value_destroy);
+
+/**
+ * Create new lru hash with custom storage
+ * @param maxsize maximum elements in a hash
+ * @param maxage maximum age of elemnt
+ * @param hash_func pointer to hash function
+ * @param key_equal_func pointer to function for comparing keys
+ * @return new rspamd_hash object
+ */
+rspamd_lru_hash_t* rspamd_lru_hash_new_full (GHashFunc hash_func, GEqualFunc key_equal_func,
+ gint maxsize, gint maxage, GDestroyNotify key_destroy, GDestroyNotify value_destroy,
+ gpointer storage, lru_cache_insert_func insert_func, lru_cache_lookup_func lookup_func,
+ lru_cache_delete_func delete_func);
+/**
+ * Lookup item from hash
+ * @param hash hash object
+ * @param key key to find
+ * @return value of key or NULL if key is not found
+ */
+gpointer rspamd_lru_hash_lookup (rspamd_lru_hash_t *hash, gpointer key, time_t now);
+/**
+ * Insert item in hash
+ * @param hash hash object
+ * @param key key to insert
+ * @param value value of key
+ */
+void rspamd_lru_hash_insert (rspamd_lru_hash_t *hash, gpointer key, gpointer value,
+ time_t now, guint ttl);
+
+/**
+ * Remove lru hash
+ * @param hash hash object
+ */
+
+void rspamd_lru_hash_destroy (rspamd_lru_hash_t *hash);
+
+#endif
+
+/*
+ * vi:ts=4
+ */
diff --git a/src/libutil/http.c b/src/libutil/http.c
new file mode 100644
index 000000000..491468352
--- /dev/null
+++ b/src/libutil/http.c
@@ -0,0 +1,1222 @@
+/* Copyright (c) 2014, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "http.h"
+#include "utlist.h"
+#include "util.h"
+#include "printf.h"
+#include "logger.h"
+
+struct rspamd_http_connection_private {
+ GString *buf;
+ gboolean new_header;
+ struct rspamd_http_header *header;
+ struct http_parser parser;
+ struct http_parser_settings parser_cb;
+ struct event ev;
+ struct timeval tv;
+ struct timeval *ptv;
+ struct rspamd_http_message *msg;
+ struct iovec *out;
+ guint outlen;
+ gsize wr_pos;
+ gsize wr_total;
+};
+
+enum http_magic_type {
+ HTTP_MAGIC_PLAIN = 0,
+ HTTP_MAGIC_HTML,
+ HTTP_MAGIC_CSS,
+ HTTP_MAGIC_JS,
+ HTTP_MAGIC_PNG,
+ HTTP_MAGIC_JPG
+};
+
+static const struct _rspamd_http_magic {
+ const gchar *ext;
+ const gchar *ct;
+} http_file_types[] = {
+ [HTTP_MAGIC_PLAIN] = { "txt", "text/plain" },
+ [HTTP_MAGIC_HTML] = { "html", "text/html" },
+ [HTTP_MAGIC_CSS] = { "css", "text/css" },
+ [HTTP_MAGIC_JS] = { "js", "application/javascript" },
+ [HTTP_MAGIC_PNG] = { "png", "image/png" },
+ [HTTP_MAGIC_JPG] = { "jpg", "image/jpeg" },
+};
+
+static gchar *http_week[] = { "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" };
+static gchar *http_month[] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun",
+ "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
+
+
+#define HTTP_ERROR http_error_quark ()
+GQuark
+http_error_quark (void)
+{
+ return g_quark_from_static_string ("http-error-quark");
+}
+
+static const gchar *
+rspamd_http_code_to_str (gint code)
+{
+ if (code == 200) {
+ return "OK";
+ }
+ else if (code == 404) {
+ return "Not found";
+ }
+ else if (code == 403 || code == 401) {
+ return "Not authorized";
+ }
+ else if (code >= 400 && code < 500) {
+ return "Bad request";
+ }
+ else if (code >= 300 && code < 400) {
+ return "See Other";
+ }
+ else if (code >= 500 && code < 600) {
+ return "Internal server error";
+ }
+
+ return "Unknown error";
+}
+
+/*
+ * Obtained from nginx
+ * Copyright (C) Igor Sysoev
+ */
+static guint mday[] = { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 };
+
+time_t
+rspamd_http_parse_date (const gchar *header, gsize len)
+{
+ const gchar *p, *end;
+ gint month;
+ guint day, year, hour, min, sec;
+ guint64 time;
+ enum {
+ no = 0, rfc822, /* Tue, 10 Nov 2002 23:50:13 */
+ rfc850, /* Tuesday, 10-Dec-02 23:50:13 */
+ isoc /* Tue Dec 10 23:50:13 2002 */
+ } fmt;
+
+ fmt = 0;
+ if (len > 0) {
+ end = header + len;
+ }
+ else {
+ end = header + strlen (header);
+ }
+
+#if (NGX_SUPPRESS_WARN)
+ day = 32;
+ year = 2038;
+#endif
+
+ for (p = header; p < end; p++) {
+ if (*p == ',') {
+ break;
+ }
+
+ if (*p == ' ') {
+ fmt = isoc;
+ break;
+ }
+ }
+
+ for (p++; p < end; p++)
+ if (*p != ' ') {
+ break;
+ }
+
+ if (end - p < 18) {
+ return (time_t)-1;
+ }
+
+ if (fmt != isoc) {
+ if (*p < '0' || *p > '9' || *(p + 1) < '0' || *(p + 1) > '9') {
+ return (time_t)-1;
+ }
+
+ day = (*p - '0') * 10 + *(p + 1) - '0';
+ p += 2;
+
+ if (*p == ' ') {
+ if (end - p < 18) {
+ return (time_t)-1;
+ }
+ fmt = rfc822;
+
+ }
+ else if (*p == '-') {
+ fmt = rfc850;
+
+ }
+ else {
+ return (time_t)-1;
+ }
+
+ p++;
+ }
+
+ switch (*p) {
+
+ case 'J':
+ month = *(p + 1) == 'a' ? 0 : *(p + 2) == 'n' ? 5 : 6;
+ break;
+
+ case 'F':
+ month = 1;
+ break;
+
+ case 'M':
+ month = *(p + 2) == 'r' ? 2 : 4;
+ break;
+
+ case 'A':
+ month = *(p + 1) == 'p' ? 3 : 7;
+ break;
+
+ case 'S':
+ month = 8;
+ break;
+
+ case 'O':
+ month = 9;
+ break;
+
+ case 'N':
+ month = 10;
+ break;
+
+ case 'D':
+ month = 11;
+ break;
+
+ default:
+ return (time_t)-1;
+ }
+
+ p += 3;
+
+ if ((fmt == rfc822 && *p != ' ') || (fmt == rfc850 && *p != '-')) {
+ return (time_t)-1;
+ }
+
+ p++;
+
+ if (fmt == rfc822) {
+ if (*p < '0' || *p > '9' || *(p + 1) < '0' || *(p + 1) > '9'
+ || *(p + 2) < '0' || *(p + 2) > '9' || *(p + 3) < '0'
+ || *(p + 3) > '9') {
+ return (time_t)-1;
+ }
+
+ year = (*p - '0') * 1000 + (*(p + 1) - '0') * 100
+ + (*(p + 2) - '0') * 10 + *(p + 3) - '0';
+ p += 4;
+
+ }
+ else if (fmt == rfc850) {
+ if (*p < '0' || *p > '9' || *(p + 1) < '0' || *(p + 1) > '9') {
+ return (time_t)-1;
+ }
+
+ year = (*p - '0') * 10 + *(p + 1) - '0';
+ year += (year < 70) ? 2000 : 1900;
+ p += 2;
+ }
+
+ if (fmt == isoc) {
+ if (*p == ' ') {
+ p++;
+ }
+
+ if (*p < '0' || *p > '9') {
+ return (time_t)-1;
+ }
+
+ day = *p++ - '0';
+
+ if (*p != ' ') {
+ if (*p < '0' || *p > '9') {
+ return (time_t)-1;
+ }
+
+ day = day * 10 + *p++ - '0';
+ }
+
+ if (end - p < 14) {
+ return (time_t)-1;
+ }
+ }
+
+ if (*p++ != ' ') {
+ return (time_t)-1;
+ }
+
+ if (*p < '0' || *p > '9' || *(p + 1) < '0' || *(p + 1) > '9') {
+ return (time_t)-1;
+ }
+
+ hour = (*p - '0') * 10 + *(p + 1) - '0';
+ p += 2;
+
+ if (*p++ != ':') {
+ return (time_t)-1;
+ }
+
+ if (*p < '0' || *p > '9' || *(p + 1) < '0' || *(p + 1) > '9') {
+ return (time_t)-1;
+ }
+
+ min = (*p - '0') * 10 + *(p + 1) - '0';
+ p += 2;
+
+ if (*p++ != ':') {
+ return (time_t)-1;
+ }
+
+ if (*p < '0' || *p > '9' || *(p + 1) < '0' || *(p + 1) > '9') {
+ return (time_t)-1;
+ }
+
+ sec = (*p - '0') * 10 + *(p + 1) - '0';
+
+ if (fmt == isoc) {
+ p += 2;
+
+ if (*p++ != ' ') {
+ return (time_t)-1;
+ }
+
+ if (*p < '0' || *p > '9' || *(p + 1) < '0' || *(p + 1) > '9'
+ || *(p + 2) < '0' || *(p + 2) > '9' || *(p + 3) < '0'
+ || *(p + 3) > '9') {
+ return (time_t)-1;
+ }
+
+ year = (*p - '0') * 1000 + (*(p + 1) - '0') * 100
+ + (*(p + 2) - '0') * 10 + *(p + 3) - '0';
+ }
+
+ if (hour > 23 || min > 59 || sec > 59) {
+ return (time_t)-1;
+ }
+
+ if (day == 29 && month == 1) {
+ if ((year & 3) || ((year % 100 == 0) && (year % 400) != 0)) {
+ return (time_t)-1;
+ }
+
+ }
+ else if (day > mday[month]) {
+ return (time_t)-1;
+ }
+
+ /*
+ * shift new year to March 1 and start months from 1 (not 0),
+ * it is needed for Gauss' formula
+ */
+
+ if (--month <= 0) {
+ month += 12;
+ year -= 1;
+ }
+
+ /* Gauss' formula for Gregorian days since March 1, 1 BC */
+
+ time = (guint64) (
+ /* days in years including leap years since March 1, 1 BC */
+
+ 365 * year + year / 4 - year / 100 + year / 400
+
+ /* days before the month */
+
+ + 367 * month / 12 - 30
+
+ /* days before the day */
+
+ + day - 1
+
+ /*
+ * 719527 days were between March 1, 1 BC and March 1, 1970,
+ * 31 and 28 days were in January and February 1970
+ */
+
+ - 719527 + 31 + 28) * 86400 + hour * 3600 + min * 60 + sec;
+
+ return (time_t) time;
+}
+
+static inline void
+rspamd_http_check_date (struct rspamd_http_connection_private *priv)
+{
+ if (g_ascii_strcasecmp (priv->header->name->str, "date") == 0) {
+ priv->msg->date = rspamd_http_parse_date (priv->header->value->str,
+ priv->header->value->len);
+ }
+}
+
+static gint
+rspamd_http_on_url (http_parser* parser, const gchar *at, size_t length)
+{
+ struct rspamd_http_connection *conn = (struct rspamd_http_connection *)parser->data;
+ struct rspamd_http_connection_private *priv;
+
+ priv = conn->priv;
+
+ g_string_append_len (priv->msg->url, at, length);
+
+ return 0;
+}
+
+static gint
+rspamd_http_on_header_field (http_parser* parser, const gchar *at, size_t length)
+{
+ struct rspamd_http_connection *conn = (struct rspamd_http_connection *)parser->data;
+ struct rspamd_http_connection_private *priv;
+
+ priv = conn->priv;
+
+ if (priv->header == NULL) {
+ priv->header = g_slice_alloc (sizeof (struct rspamd_http_header));
+ priv->header->name = g_string_sized_new (32);
+ priv->header->value = g_string_sized_new (32);
+ }
+ else if (priv->new_header) {
+ DL_APPEND (priv->msg->headers, priv->header);
+ rspamd_http_check_date (priv);
+ priv->header = g_slice_alloc (sizeof (struct rspamd_http_header));
+ priv->header->name = g_string_sized_new (32);
+ priv->header->value = g_string_sized_new (32);
+ }
+
+ priv->new_header = FALSE;
+ g_string_append_len (priv->header->name, at, length);
+
+ return 0;
+}
+
+static gint
+rspamd_http_on_header_value (http_parser* parser, const gchar *at, size_t length)
+{
+ struct rspamd_http_connection *conn = (struct rspamd_http_connection *)parser->data;
+ struct rspamd_http_connection_private *priv;
+
+ priv = conn->priv;
+
+ if (priv->header == NULL) {
+ /* Should not happen */
+ return -1;
+ }
+
+ priv->new_header = TRUE;
+ g_string_append_len (priv->header->value, at, length);
+
+ return 0;
+}
+
+static int
+rspamd_http_on_headers_complete (http_parser* parser)
+{
+ struct rspamd_http_connection *conn = (struct rspamd_http_connection *)parser->data;
+ struct rspamd_http_connection_private *priv;
+
+ priv = conn->priv;
+
+ if (priv->header != NULL) {
+ DL_APPEND (priv->msg->headers, priv->header);
+ rspamd_http_check_date (priv);
+ priv->header = NULL;
+ }
+
+ if (parser->content_length != 0 && parser->content_length != ULLONG_MAX) {
+ priv->msg->body = g_string_sized_new (parser->content_length + 1);
+ }
+ else {
+ priv->msg->body = g_string_sized_new (BUFSIZ);
+ }
+
+ priv->msg->method = parser->method;
+
+ return 0;
+}
+
+static int
+rspamd_http_on_body (http_parser* parser, const gchar *at, size_t length)
+{
+ struct rspamd_http_connection *conn = (struct rspamd_http_connection *)parser->data;
+ struct rspamd_http_connection_private *priv;
+
+ priv = conn->priv;
+
+ g_string_append_len (priv->msg->body, at, length);
+
+ if (conn->opts & RSPAMD_HTTP_BODY_PARTIAL) {
+ return (conn->body_handler (conn, priv->msg, at, length));
+ }
+
+ return 0;
+}
+
+static int
+rspamd_http_on_message_complete (http_parser* parser)
+{
+ struct rspamd_http_connection *conn = (struct rspamd_http_connection *)parser->data;
+ struct rspamd_http_connection_private *priv;
+ int ret = 0;
+
+ priv = conn->priv;
+
+ if (conn->body_handler != NULL) {
+ rspamd_http_connection_ref (conn);
+ if (conn->opts & RSPAMD_HTTP_BODY_PARTIAL) {
+ ret = conn->body_handler (conn, priv->msg, NULL, 0);
+ }
+ else {
+ ret = conn->body_handler (conn, priv->msg, priv->msg->body->str, priv->msg->body->len);
+ }
+ rspamd_http_connection_unref (conn);
+ }
+
+ if (ret == 0) {
+ rspamd_http_connection_ref (conn);
+ ret = conn->finish_handler (conn, priv->msg);
+ rspamd_http_connection_unref (conn);
+ }
+
+ return ret;
+}
+
+static void
+rspamd_http_write_helper (struct rspamd_http_connection *conn)
+{
+ struct rspamd_http_connection_private *priv;
+ struct iovec *start;
+ guint niov, i;
+ gsize remain;
+ gssize r;
+ GError *err;
+
+ priv = conn->priv;
+
+ if (priv->wr_pos == priv->wr_total) {
+ rspamd_http_connection_ref (conn);
+ conn->finish_handler (conn, priv->msg);
+ rspamd_http_connection_unref (conn);
+ return;
+ }
+
+ start = &priv->out[0];
+ niov = priv->outlen;
+ remain = priv->wr_pos;
+ for (i = 0; i < priv->outlen && remain > 0; i ++) {
+ /* Find out the first iov required */
+ start = &priv->out[i];
+ if (start->iov_len <= remain) {
+ remain -= start->iov_len;
+ start = &priv->out[i + 1];
+ niov --;
+ }
+ else {
+ start->iov_base = (void *)((char *)start->iov_base + remain);
+ start->iov_len -= remain;
+ remain = 0;
+ }
+ }
+
+ r = writev (conn->fd, start, MIN (IOV_MAX, niov));
+
+ if (r == -1) {
+ err = g_error_new (HTTP_ERROR, errno, "IO write error: %s", strerror (errno));
+ rspamd_http_connection_ref (conn);
+ conn->error_handler (conn, err);
+ rspamd_http_connection_unref (conn);
+ g_error_free (err);
+ return;
+ }
+ else {
+ priv->wr_pos += r;
+ }
+
+ if (priv->wr_pos >= priv->wr_total) {
+ rspamd_http_connection_ref (conn);
+ conn->finish_handler (conn, priv->msg);
+ rspamd_http_connection_unref (conn);
+ }
+ else {
+ /* Want to write more */
+ event_add (&priv->ev, priv->ptv);
+ }
+}
+
+static void
+rspamd_http_event_handler (int fd, short what, gpointer ud)
+{
+ struct rspamd_http_connection *conn = (struct rspamd_http_connection *)ud;
+ struct rspamd_http_connection_private *priv;
+ GString *buf;
+ gssize r;
+ GError *err;
+
+ priv = conn->priv;
+ buf = priv->buf;
+
+ if (what == EV_READ) {
+ r = read (fd, buf->str, buf->allocated_len);
+ if (r == -1) {
+ err = g_error_new (HTTP_ERROR, errno, "IO read error: %s", strerror (errno));
+ conn->error_handler (conn, err);
+ g_error_free (err);
+ return;
+ }
+ else {
+ buf->len = r;
+ rspamd_http_connection_ref (conn);
+ if (http_parser_execute (&priv->parser, &priv->parser_cb, buf->str, r) != (size_t)r) {
+ err = g_error_new (HTTP_ERROR, priv->parser.http_errno,
+ "HTTP parser error: %s", http_errno_description (priv->parser.http_errno));
+ conn->error_handler (conn, err);
+ g_error_free (err);
+ rspamd_http_connection_unref (conn);
+ return;
+ }
+ rspamd_http_connection_unref (conn);
+ }
+ }
+ else if (what == EV_TIMEOUT) {
+ err = g_error_new (HTTP_ERROR, ETIMEDOUT,
+ "IO timeout");
+ rspamd_http_connection_ref (conn);
+ conn->error_handler (conn, err);
+ rspamd_http_connection_unref (conn);
+ g_error_free (err);
+ return;
+ }
+ else if (what == EV_WRITE) {
+ rspamd_http_write_helper (conn);
+ }
+}
+
+struct rspamd_http_connection*
+rspamd_http_connection_new (rspamd_http_body_handler_t body_handler,
+ rspamd_http_error_handler_t error_handler,
+ rspamd_http_finish_handler_t finish_handler,
+ enum rspamd_http_options opts,
+ enum rspamd_http_connection_type type)
+{
+ struct rspamd_http_connection *new;
+ struct rspamd_http_connection_private *priv;
+
+ if (error_handler == NULL || finish_handler == NULL) {
+ return NULL;
+ }
+
+ new = g_slice_alloc0 (sizeof (struct rspamd_http_connection));
+ new->opts = opts;
+ new->type = type;
+ new->body_handler = body_handler;
+ new->error_handler = error_handler;
+ new->finish_handler = finish_handler;
+ new->fd = -1;
+ new->ref = 1;
+
+ /* Init priv */
+ priv = g_slice_alloc0 (sizeof (struct rspamd_http_connection_private));
+ http_parser_init (&priv->parser, type == RSPAMD_HTTP_SERVER ? HTTP_REQUEST : HTTP_RESPONSE);
+ priv->parser.data = new;
+ priv->parser_cb.on_url = rspamd_http_on_url;
+ priv->parser_cb.on_header_field = rspamd_http_on_header_field;
+ priv->parser_cb.on_header_value = rspamd_http_on_header_value;
+ priv->parser_cb.on_headers_complete = rspamd_http_on_headers_complete;
+ priv->parser_cb.on_body = rspamd_http_on_body;
+ priv->parser_cb.on_message_complete = rspamd_http_on_message_complete;
+
+ new->priv = priv;
+
+ return new;
+}
+
+void
+rspamd_http_connection_reset (struct rspamd_http_connection *conn)
+{
+ struct rspamd_http_connection_private *priv;
+ struct rspamd_http_message *msg;
+
+ priv = conn->priv;
+ msg = priv->msg;
+
+ /* Clear request */
+ if (msg != NULL) {
+ rspamd_http_message_free (msg);
+ priv->msg = NULL;
+ }
+
+ /* Clear priv */
+ event_del (&priv->ev);
+ if (priv->buf != NULL) {
+ g_string_free (priv->buf, TRUE);
+ priv->buf = NULL;
+ }
+ if (priv->out != NULL) {
+ g_slice_free1 (sizeof (struct iovec) * priv->outlen, priv->out);
+ priv->out = NULL;
+ }
+}
+
+void
+rspamd_http_connection_free (struct rspamd_http_connection *conn)
+{
+ struct rspamd_http_connection_private *priv;
+
+ priv = conn->priv;
+ rspamd_http_connection_reset (conn);
+ g_slice_free1 (sizeof (struct rspamd_http_connection_private), priv);
+ g_slice_free1 (sizeof (struct rspamd_http_connection), conn);
+}
+
+void
+rspamd_http_connection_read_message (struct rspamd_http_connection *conn,
+ gpointer ud, gint fd, struct timeval *timeout, struct event_base *base)
+{
+ struct rspamd_http_connection_private *priv = conn->priv;
+ struct rspamd_http_message *req;
+
+ conn->fd = fd;
+ conn->ud = ud;
+ req = rspamd_http_new_message (conn->type == RSPAMD_HTTP_SERVER ? HTTP_REQUEST : HTTP_RESPONSE);
+ priv->msg = req;
+
+ if (timeout == NULL) {
+ priv->ptv = NULL;
+ }
+ else {
+ memcpy (&priv->tv, timeout, sizeof (struct timeval));
+ priv->ptv = &priv->tv;
+ }
+ priv->header = NULL;
+ priv->buf = g_string_sized_new (BUFSIZ);
+ priv->new_header = TRUE;
+
+ event_set (&priv->ev, fd, EV_READ | EV_PERSIST, rspamd_http_event_handler, conn);
+ event_base_set (base, &priv->ev);
+ event_add (&priv->ev, priv->ptv);
+}
+
+void
+rspamd_http_connection_write_message (struct rspamd_http_connection *conn,
+ struct rspamd_http_message *msg, const gchar *host, const gchar *mime_type,
+ gpointer ud, gint fd, struct timeval *timeout, struct event_base *base)
+{
+ struct rspamd_http_connection_private *priv = conn->priv;
+ struct rspamd_http_header *hdr;
+ struct tm t, *ptm;
+ gchar datebuf[64], *pbody;
+ gint i;
+ gsize bodylen;
+
+ conn->fd = fd;
+ conn->ud = ud;
+ priv->msg = msg;
+
+ if (timeout == NULL) {
+ priv->ptv = NULL;
+ }
+ else {
+ memcpy (&priv->tv, timeout, sizeof (struct timeval));
+ priv->ptv = &priv->tv;
+ }
+ priv->header = NULL;
+ priv->buf = g_string_sized_new (128);
+
+ if (msg->method < HTTP_SYMBOLS) {
+ if (msg->body == NULL || msg->body->len == 0) {
+ pbody = NULL;
+ bodylen = 0;
+ priv->outlen = 2;
+ msg->method = HTTP_GET;
+ }
+ else {
+ pbody = msg->body->str;
+ bodylen = msg->body->len;
+ priv->outlen = 3;
+ msg->method = HTTP_POST;
+ }
+ }
+ else if (msg->body != NULL) {
+ pbody = msg->body->str;
+ bodylen = msg->body->len;
+ priv->outlen = 2;
+ }
+ else {
+ /* Invalid body for spamc method */
+ return;
+ }
+
+ if (conn->type == RSPAMD_HTTP_SERVER) {
+ /* Format reply */
+ if (msg->method < HTTP_SYMBOLS) {
+ ptm = gmtime (&msg->date);
+ t = *ptm;
+ rspamd_snprintf (datebuf, sizeof (datebuf), "%s, %02d %s %4d %02d:%02d:%02d GMT",
+ http_week[t.tm_wday],
+ t.tm_mday,
+ http_month[t.tm_mon],
+ t.tm_year + 1900,
+ t.tm_hour,
+ t.tm_min,
+ t.tm_sec);
+ if (mime_type == NULL) {
+ mime_type = "text/plain";
+ }
+ rspamd_printf_gstring (priv->buf, "HTTP/1.1 %d %s\r\n"
+ "Connection: close\r\n"
+ "Server: %s\r\n"
+ "Date: %s\r\n"
+ "Content-Length: %z\r\n"
+ "Content-Type: %s\r\n",
+ msg->code, rspamd_http_code_to_str (msg->code),
+ "rspamd/" RVERSION,
+ datebuf,
+ msg->body->len,
+ mime_type);
+ }
+ else {
+ /* Legacy spamd reply */
+ rspamd_printf_gstring (priv->buf, "RSPAMD/1.3 0 EX_OK\r\n");
+ }
+ }
+ else {
+ /* Format request */
+ if (host != NULL) {
+ rspamd_printf_gstring (priv->buf, "%s %v HTTP/1.1\r\n"
+ "Connection: close\r\n"
+ "Host: %s\r\n"
+ "Content-Length: %z\r\n",
+ http_method_str (msg->method), msg->url, host, msg->body->len);
+ }
+ else {
+ /* Fallback to HTTP/1.0 */
+ rspamd_printf_gstring (priv->buf, "%s %v HTTP/1.0\r\n"
+ "Content-Length: %z\r\n",
+ http_method_str (msg->method), msg->url, msg->body->len);
+ }
+ }
+ /* Allocate iov */
+ priv->wr_total = bodylen + priv->buf->len + 2;
+ DL_FOREACH (msg->headers, hdr) {
+ /* <name><: ><value><\r\n> */
+ priv->wr_total += hdr->name->len + hdr->value->len + 4;
+ priv->outlen += 4;
+ }
+ priv->out = g_slice_alloc (sizeof (struct iovec) * priv->outlen);
+ priv->wr_pos = 0;
+
+ /* Now set up all iov */
+ priv->out[0].iov_base = priv->buf->str;
+ priv->out[0].iov_len = priv->buf->len;
+ i = 1;
+ LL_FOREACH (msg->headers, hdr) {
+ priv->out[i].iov_base = hdr->name->str;
+ priv->out[i++].iov_len = hdr->name->len;
+ priv->out[i].iov_base = ": ";
+ priv->out[i++].iov_len = 2;
+ priv->out[i].iov_base = hdr->value->str;
+ priv->out[i++].iov_len = hdr->value->len;
+ priv->out[i].iov_base = "\r\n";
+ priv->out[i++].iov_len = 2;
+ }
+ if (msg->method < HTTP_SYMBOLS) {
+ priv->out[i].iov_base = "\r\n";
+ priv->out[i++].iov_len = 2;
+ }
+ else {
+ /* No CRLF for compatibility reply */
+ priv->wr_total -= 2;
+ }
+ if (msg->body != NULL) {
+ priv->out[i].iov_base = pbody;
+ priv->out[i++].iov_len = bodylen;
+ }
+
+ event_set (&priv->ev, fd, EV_WRITE, rspamd_http_event_handler, conn);
+ event_base_set (base, &priv->ev);
+ event_add (&priv->ev, priv->ptv);
+}
+
+struct rspamd_http_message*
+rspamd_http_new_message (enum http_parser_type type)
+{
+ struct rspamd_http_message *new;
+
+ new = g_slice_alloc (sizeof (struct rspamd_http_message));
+ if (type == HTTP_REQUEST) {
+ new->url = g_string_sized_new (32);
+ }
+ else {
+ new->url = NULL;
+ new->code = 200;
+ }
+ new->headers = NULL;
+ new->date = 0;
+ new->body = NULL;
+ new->type = type;
+ new->method = HTTP_GET;
+
+ return new;
+}
+
+void
+rspamd_http_message_free (struct rspamd_http_message *msg)
+{
+ struct rspamd_http_header *hdr, *tmp_hdr;
+
+ LL_FOREACH_SAFE (msg->headers, hdr, tmp_hdr) {
+ g_string_free (hdr->name, TRUE);
+ g_string_free (hdr->value, TRUE);
+ g_slice_free1 (sizeof (struct rspamd_http_header), hdr);
+ }
+ if (msg->body != NULL) {
+ g_string_free (msg->body, TRUE);
+ }
+ if (msg->url != NULL) {
+ g_string_free (msg->url, TRUE);
+ }
+ g_slice_free1 (sizeof (struct rspamd_http_message), msg);
+}
+
+void rspamd_http_message_add_header (struct rspamd_http_message *msg,
+ const gchar *name,
+ const gchar *value)
+{
+ struct rspamd_http_header *hdr;
+
+ if (msg != NULL && name != NULL && value != NULL) {
+ hdr = g_slice_alloc (sizeof (struct rspamd_http_header));
+ hdr->name = g_string_new (name);
+ hdr->value = g_string_new (value);
+ DL_APPEND (msg->headers, hdr);
+ }
+}
+
+const gchar*
+rspamd_http_message_find_header (struct rspamd_http_message *msg, const gchar *name)
+{
+ struct rspamd_http_header *hdr;
+ const gchar *res = NULL;
+ guint slen = strlen (name);
+
+ if (msg != NULL) {
+ LL_FOREACH (msg->headers, hdr) {
+ if (hdr->name->len == slen) {
+ if (memcmp (hdr->name->str, name, slen) == 0) {
+ res = hdr->value->str;
+ break;
+ }
+ }
+ }
+ }
+
+ return res;
+}
+
+/*
+ * HTTP router functions
+ */
+
+static void
+rspamd_http_entry_free (struct rspamd_http_connection_entry *entry)
+{
+ if (entry != NULL) {
+ close (entry->conn->fd);
+ rspamd_http_connection_unref (entry->conn);
+ g_slice_free1 (sizeof (struct rspamd_http_connection_entry), entry);
+ if (entry->rt->finish_handler) {
+ entry->rt->finish_handler (entry);
+ }
+ }
+}
+
+static void
+rspamd_http_router_error_handler (struct rspamd_http_connection *conn, GError *err)
+{
+ struct rspamd_http_connection_entry *entry = conn->ud;
+ struct rspamd_http_message *msg;
+
+ if (entry->is_reply) {
+ /* At this point we need to finish this session and close owned socket */
+ if (entry->rt->error_handler != NULL) {
+ entry->rt->error_handler (entry, err);
+ }
+ rspamd_http_entry_free (entry);
+ }
+ else {
+ /* Here we can write a reply to a client */
+ if (entry->rt->error_handler != NULL) {
+ entry->rt->error_handler (entry, err);
+ }
+ msg = rspamd_http_new_message (HTTP_RESPONSE);
+ msg->date = time (NULL);
+ msg->code = err->code;
+ msg->body = g_string_new (err->message);
+ rspamd_http_connection_reset (entry->conn);
+ rspamd_http_connection_write_message (entry->conn, msg, NULL,
+ "text/plain", entry, entry->conn->fd, entry->rt->ptv, entry->rt->ev_base);
+ entry->is_reply = TRUE;
+ }
+}
+
+static const gchar *
+rspamd_http_router_detect_ct (const gchar *path)
+{
+ const gchar *dot;
+ guint i;
+
+ dot = strrchr (path, '.');
+ if (dot == NULL) {
+ return http_file_types[HTTP_MAGIC_PLAIN].ct;
+ }
+ dot ++;
+
+ for (i = 0; i < G_N_ELEMENTS (http_file_types); i ++) {
+ if (strcmp (http_file_types[i].ext, dot) == 0) {
+ return http_file_types[i].ct;
+ }
+ }
+
+ return http_file_types[HTTP_MAGIC_PLAIN].ct;
+}
+
+static gboolean
+rspamd_http_router_try_file (struct rspamd_http_connection_entry *entry,
+ struct rspamd_http_message *msg, gboolean expand_path)
+{
+ struct stat st;
+ gint fd;
+ gchar filebuf[PATH_MAX], realbuf[PATH_MAX], *dir;
+ struct rspamd_http_message *reply_msg;
+
+ /* XXX: filter filename component only */
+ if (expand_path) {
+ rspamd_snprintf (filebuf, sizeof (filebuf), "%s%c%v",
+ entry->rt->default_fs_path, G_DIR_SEPARATOR, msg->url);
+ }
+ else {
+ rspamd_snprintf (filebuf, sizeof (filebuf), "%v",
+ msg->url);
+ }
+
+ if (realpath (filebuf, realbuf) == NULL ||
+ lstat (realbuf, &st) == -1) {
+ return FALSE;
+ }
+
+ if (S_ISDIR (st.st_mode) && expand_path) {
+ /* Try to append 'index.html' to the url */
+ g_string_append_printf (msg->url, "%c%s", G_DIR_SEPARATOR,
+ "index.html");
+ return rspamd_http_router_try_file (entry, msg, FALSE);
+ }
+ else if (!S_ISREG (st.st_mode)) {
+ return FALSE;
+ }
+
+ /* We also need to ensure that file is inside the defined dir */
+ dir = dirname (realbuf);
+ if (dir == NULL || strncmp (dir, entry->rt->default_fs_path,
+ strlen (entry->rt->default_fs_path)) != 0) {
+ return FALSE;
+ }
+
+ fd = open (realbuf, O_RDONLY);
+ if (fd == -1) {
+ return FALSE;
+ }
+
+ reply_msg = rspamd_http_new_message (HTTP_RESPONSE);
+ reply_msg->date = time (NULL);
+ reply_msg->code = 200;
+ reply_msg->body = g_string_sized_new (st.st_size);
+
+ if (read (fd, reply_msg->body->str, st.st_size) != st.st_size) {
+ close (fd);
+ rspamd_http_message_free (reply_msg);
+ return FALSE;
+ }
+
+ reply_msg->body->len = st.st_size;
+ reply_msg->body->str[st.st_size] = '\0';
+ close (fd);
+
+ rspamd_http_connection_reset (entry->conn);
+
+ /* XXX: detect content type */
+ rspamd_http_connection_write_message (entry->conn, reply_msg, NULL,
+ rspamd_http_router_detect_ct (realbuf), entry, entry->conn->fd,
+ entry->rt->ptv, entry->rt->ev_base);
+
+ return TRUE;
+}
+
+static int
+rspamd_http_router_finish_handler (struct rspamd_http_connection *conn,
+ struct rspamd_http_message *msg)
+{
+ struct rspamd_http_connection_entry *entry = conn->ud;
+ rspamd_http_router_handler_t handler = NULL;
+ gpointer found;
+ struct rspamd_http_message *err_msg;
+ GError *err;
+
+ G_STATIC_ASSERT (sizeof (rspamd_http_router_handler_t) == sizeof (gpointer));
+
+ if (entry->is_reply) {
+ /* Request is finished, it is safe to free a connection */
+ rspamd_http_entry_free (entry);
+ }
+ else {
+ /* Search for path */
+ if (msg->url != NULL && msg->url->len != 0) {
+ found = g_hash_table_lookup (entry->rt->paths, msg->url->str);
+ memcpy (&handler, &found, sizeof (found));
+ }
+ entry->is_reply = TRUE;
+ if (handler != NULL) {
+ return handler (entry, msg);
+ }
+ else {
+ if (entry->rt->default_fs_path == NULL ||
+ rspamd_http_router_try_file (entry, msg, TRUE)) {
+ err = g_error_new (HTTP_ERROR, 404,
+ "Not found");
+ if (entry->rt->error_handler != NULL) {
+ entry->rt->error_handler (entry, err);
+ }
+ err_msg = rspamd_http_new_message (HTTP_RESPONSE);
+ err_msg->date = time (NULL);
+ err_msg->code = err->code;
+ err_msg->body = g_string_new (err->message);
+ rspamd_http_connection_reset (entry->conn);
+ rspamd_http_connection_write_message (entry->conn, err_msg, NULL,
+ "text/plain", entry, entry->conn->fd,
+ entry->rt->ptv, entry->rt->ev_base);
+ g_error_free (err);
+ }
+ }
+ }
+
+ return 0;
+}
+
+struct rspamd_http_connection_router*
+rspamd_http_router_new (rspamd_http_router_error_handler_t eh,
+ rspamd_http_router_finish_handler_t fh,
+ struct timeval *timeout, struct event_base *base,
+ const char *default_fs_path)
+{
+ struct rspamd_http_connection_router* new;
+ struct stat st;
+
+ new = g_slice_alloc (sizeof (struct rspamd_http_connection_router));
+ new->paths = g_hash_table_new (rspamd_strcase_hash, rspamd_strcase_equal);
+ new->conns = NULL;
+ new->error_handler = eh;
+ new->finish_handler = fh;
+ new->ev_base = base;
+ if (timeout) {
+ new->tv = *timeout;
+ new->ptv = &new->tv;
+ }
+ else {
+ new->ptv = NULL;
+ }
+
+ new->default_fs_path = NULL;
+ if (default_fs_path != NULL) {
+ if (stat (default_fs_path, &st) == -1) {
+ msg_err ("cannot stat %s", default_fs_path);
+ }
+ else {
+ if (!S_ISDIR (st.st_mode)) {
+ msg_err ("path %s is not a directory", default_fs_path);
+ }
+ else {
+ new->default_fs_path = g_strdup (default_fs_path);
+ }
+ }
+ }
+
+ return new;
+}
+
+void
+rspamd_http_router_add_path (struct rspamd_http_connection_router *router,
+ const gchar *path, rspamd_http_router_handler_t handler)
+{
+ gpointer ptr;
+ G_STATIC_ASSERT (sizeof (rspamd_http_router_handler_t) == sizeof (gpointer));
+
+ if (path != NULL && handler != NULL && router != NULL) {
+ memcpy (&ptr, &handler, sizeof (ptr));
+ g_hash_table_insert (router->paths, (gpointer)path, ptr);
+ }
+}
+
+void
+rspamd_http_router_handle_socket (struct rspamd_http_connection_router *router,
+ gint fd, gpointer ud)
+{
+ struct rspamd_http_connection_entry *conn;
+
+ conn = g_slice_alloc (sizeof (struct rspamd_http_connection_entry));
+ conn->rt = router;
+ conn->ud = ud;
+ conn->is_reply = FALSE;
+
+ conn->conn = rspamd_http_connection_new (NULL, rspamd_http_router_error_handler,
+ rspamd_http_router_finish_handler, 0, RSPAMD_HTTP_SERVER);
+
+ rspamd_http_connection_read_message (conn->conn, conn, fd, router->ptv,
+ router->ev_base);
+ LL_PREPEND (router->conns, conn);
+}
+
+void
+rspamd_http_router_free (struct rspamd_http_connection_router *router)
+{
+ struct rspamd_http_connection_entry *conn, *tmp;
+
+ if (router) {
+ LL_FOREACH_SAFE (router->conns, conn, tmp) {
+ rspamd_http_entry_free (conn);
+ }
+
+ if (router->default_fs_path != NULL) {
+ g_free (router->default_fs_path);
+ }
+ g_hash_table_unref (router->paths);
+ g_slice_free1 (sizeof (struct rspamd_http_connection_router), router);
+ }
+}
diff --git a/src/libutil/http.h b/src/libutil/http.h
new file mode 100644
index 000000000..8af4429c6
--- /dev/null
+++ b/src/libutil/http.h
@@ -0,0 +1,278 @@
+/* Copyright (c) 2014, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef HTTP_H_
+#define HTTP_H_
+
+/**
+ * @file http.h
+ *
+ * This is an interface for HTTP client and conn. This code uses HTTP parser written
+ * by Joyent Inc based on nginx code.
+ */
+
+#include "config.h"
+#include "http_parser.h"
+
+enum rspamd_http_connection_type {
+ RSPAMD_HTTP_SERVER,
+ RSPAMD_HTTP_CLIENT
+};
+
+/**
+ * HTTP header structure
+ */
+struct rspamd_http_header {
+ GString *name;
+ GString *value;
+ struct rspamd_http_header *next, *prev;
+};
+
+/**
+ * HTTP message structure, used for requests and replies
+ */
+struct rspamd_http_message {
+ GString *url;
+ struct rspamd_http_header *headers;
+ GString *body;
+ enum http_parser_type type;
+ time_t date;
+ gint code;
+ enum http_method method;
+};
+
+
+/**
+ * Options for HTTP connection
+ */
+enum rspamd_http_options {
+ RSPAMD_HTTP_BODY_PARTIAL = 0x1//!< RSPAMD_HTTP_BODY_PARTIAL
+};
+
+struct rspamd_http_connection_private;
+struct rspamd_http_connection;
+struct rspamd_http_connection_router;
+struct rspamd_http_connection_entry;
+
+typedef int (*rspamd_http_body_handler_t) (struct rspamd_http_connection *conn,
+ struct rspamd_http_message *msg,
+ const gchar *chunk,
+ gsize len);
+
+typedef void (*rspamd_http_error_handler_t) (struct rspamd_http_connection *conn, GError *err);
+
+typedef int (*rspamd_http_finish_handler_t) (struct rspamd_http_connection *conn,
+ struct rspamd_http_message *msg);
+
+typedef int (*rspamd_http_router_handler_t) (struct rspamd_http_connection_entry *conn_ent,
+ struct rspamd_http_message *msg);
+typedef void (*rspamd_http_router_error_handler_t) (struct rspamd_http_connection_entry *conn_ent,
+ GError *err);
+typedef void (*rspamd_http_router_finish_handler_t) (struct rspamd_http_connection_entry *conn_ent);
+
+/**
+ * HTTP connection structure
+ */
+struct rspamd_http_connection {
+ struct rspamd_http_connection_private *priv;
+ rspamd_http_body_handler_t body_handler;
+ rspamd_http_error_handler_t error_handler;
+ rspamd_http_finish_handler_t finish_handler;
+ gpointer ud;
+ enum rspamd_http_options opts;
+ enum rspamd_http_connection_type type;
+ gint fd;
+ gint ref;
+};
+
+struct rspamd_http_connection_entry {
+ struct rspamd_http_connection_router *rt;
+ struct rspamd_http_connection *conn;
+ gpointer ud;
+ gboolean is_reply;
+ struct rspamd_http_connection_entry *next;
+};
+
+struct rspamd_http_connection_router {
+ struct rspamd_http_connection_entry *conns;
+ GHashTable *paths;
+ struct timeval tv;
+ struct timeval *ptv;
+ struct event_base *ev_base;
+ gchar *default_fs_path;
+ rspamd_http_router_error_handler_t error_handler;
+ rspamd_http_router_finish_handler_t finish_handler;
+};
+
+/**
+ * Create new http connection
+ * @param handler_t handler_t for body
+ * @param opts options
+ * @return new connection structure
+ */
+struct rspamd_http_connection* rspamd_http_connection_new (
+ rspamd_http_body_handler_t body_handler,
+ rspamd_http_error_handler_t error_handler,
+ rspamd_http_finish_handler_t finish_handler,
+ enum rspamd_http_options opts,
+ enum rspamd_http_connection_type type);
+
+/**
+ * Handle a request using socket fd and user data ud
+ * @param conn connection structure
+ * @param ud opaque user data
+ * @param fd fd to read/write
+ */
+void rspamd_http_connection_read_message (
+ struct rspamd_http_connection *conn,
+ gpointer ud,
+ gint fd,
+ struct timeval *timeout,
+ struct event_base *base);
+
+/**
+ * Send reply using initialised connection
+ * @param conn connection structure
+ * @param msg HTTP message
+ * @param ud opaque user data
+ * @param fd fd to read/write
+ */
+void rspamd_http_connection_write_message (
+ struct rspamd_http_connection *conn,
+ struct rspamd_http_message *msg,
+ const gchar *host,
+ const gchar *mime_type,
+ gpointer ud,
+ gint fd,
+ struct timeval *timeout,
+ struct event_base *base);
+
+/**
+ * Free connection structure
+ * @param conn
+ */
+void rspamd_http_connection_free (struct rspamd_http_connection *conn);
+
+/**
+ * Increase refcount for a connection
+ * @param conn
+ * @return
+ */
+static inline struct rspamd_http_connection *
+rspamd_http_connection_ref (struct rspamd_http_connection *conn)
+{
+ conn->ref ++;
+ return conn;
+}
+
+/**
+ * Decrease a refcount for a connection and free it if refcount is equal to zero
+ * @param conn
+ */
+static void
+rspamd_http_connection_unref (struct rspamd_http_connection *conn)
+{
+ if (--conn->ref <= 0) {
+ rspamd_http_connection_free (conn);
+ }
+}
+
+/**
+ * Reset connection for a new request
+ * @param conn
+ */
+void rspamd_http_connection_reset (struct rspamd_http_connection *conn);
+
+/**
+ * Create new HTTP reply
+ * @param code code to pass
+ * @return new reply object
+ */
+struct rspamd_http_message* rspamd_http_new_message (enum http_parser_type type);
+
+/**
+ * Append a header to reply
+ * @param rep
+ * @param name
+ * @param value
+ */
+void rspamd_http_message_add_header (struct rspamd_http_message *rep, const gchar *name, const gchar *value);
+
+/**
+ * Search for a specified header in message
+ * @param rep message
+ * @param name name of header
+ */
+const gchar* rspamd_http_message_find_header (struct rspamd_http_message *rep, const gchar *name);
+
+/**
+ * Free HTTP reply
+ * @param rep
+ */
+void rspamd_http_message_free (struct rspamd_http_message *msg);
+
+/**
+ * Parse HTTP date header and return it as time_t
+ * @param header HTTP date header
+ * @param len length of header
+ * @return time_t or (time_t)-1 in case of error
+ */
+time_t rspamd_http_parse_date (const gchar *header, gsize len);
+
+/**
+ * Create new http connection router and the associated HTTP connection
+ * @param eh error handler callback
+ * @param fh finish handler callback
+ * @param default_fs_path if not NULL try to serve static files from
+ * the specified directory
+ * @return
+ */
+struct rspamd_http_connection_router* rspamd_http_router_new (
+ rspamd_http_router_error_handler_t eh,
+ rspamd_http_router_finish_handler_t fh,
+ struct timeval *timeout,
+ struct event_base *base,
+ const char *default_fs_path);
+
+/**
+ * Add new path to the router
+ */
+void rspamd_http_router_add_path (struct rspamd_http_connection_router *router,
+ const gchar *path, rspamd_http_router_handler_t handler);
+
+/**
+ * Handle new accepted socket
+ * @param router router object
+ * @param fd server socket
+ * @param ud opaque userdata
+ */
+void rspamd_http_router_handle_socket (struct rspamd_http_connection_router *router,
+ gint fd, gpointer ud);
+
+/**
+ * Free router and all connections associated
+ * @param router
+ */
+void rspamd_http_router_free (struct rspamd_http_connection_router *router);
+
+#endif /* HTTP_H_ */
diff --git a/src/libutil/logger.c b/src/libutil/logger.c
new file mode 100644
index 000000000..01814d24d
--- /dev/null
+++ b/src/libutil/logger.c
@@ -0,0 +1,769 @@
+/*
+ * Copyright (c) 2009-2012, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include "config.h"
+#include "logger.h"
+#include "util.h"
+#include "main.h"
+#include "map.h"
+
+/* How much message should be repeated before it is count to be repeated one */
+#define REPEATS_MIN 3
+#define REPEATS_MAX 300
+
+/**
+ * Static structure that store logging parameters
+ * It is NOT shared between processes and is created by main process
+ */
+struct rspamd_logger_s {
+ rspamd_log_func_t log_func;
+ struct config_file *cfg;
+ struct {
+ guint32 size;
+ guint32 used;
+ u_char *buf;
+ } io_buf;
+ gint fd;
+ gboolean is_buffered;
+ gboolean enabled;
+ gboolean is_debug;
+ gboolean throttling;
+ time_t throttling_time;
+ sig_atomic_t do_reopen_log;
+ enum rspamd_log_type type;
+ pid_t pid;
+ GQuark process_type;
+ radix_tree_t *debug_ip;
+ guint32 last_line_cksum;
+ guint32 repeats;
+ gchar *saved_message;
+ gchar *saved_function;
+ GMutex *mtx;
+};
+
+static const gchar lf_chr = '\n';
+
+static rspamd_logger_t *default_logger = NULL;
+
+
+static void
+syslog_log_function (const gchar * log_domain, const gchar *function,
+ GLogLevelFlags log_level, const gchar * message,
+ gboolean forced, gpointer arg);
+static void
+file_log_function (const gchar * log_domain, const gchar *function,
+ GLogLevelFlags log_level, const gchar * message,
+ gboolean forced, gpointer arg);
+
+/**
+ * Calculate checksum for log line (used for repeating logic)
+ */
+static inline guint32
+rspamd_log_calculate_cksum (const gchar *message, size_t mlen)
+{
+ const gchar *bp = message;
+ const gchar *be = bp + mlen;
+ guint32 hval = 0;
+
+ while (bp < be) {
+ hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
+ hval ^= (guint32)*bp++;
+ }
+
+ /* return our new hash value */
+ return hval;
+
+}
+
+/*
+ * Write a line to log file (unbuffered)
+ */
+static void
+direct_write_log_line (rspamd_logger_t *rspamd_log, void *data, gint count, gboolean is_iov)
+{
+ gchar errmsg[128];
+ struct iovec *iov;
+ const gchar *line;
+ gint r;
+
+ if (rspamd_log->enabled) {
+ if (is_iov) {
+ iov = (struct iovec *)data;
+ r = writev (rspamd_log->fd, iov, count);
+ }
+ else {
+ line = (const gchar *)data;
+ r = write (rspamd_log->fd, line, count);
+ }
+ if (r == -1) {
+ /* We cannot write message to file, so we need to detect error and make decision */
+ r = rspamd_snprintf (errmsg, sizeof (errmsg), "direct_write_log_line: cannot write log line: %s", strerror (errno));
+ if (errno == EIO || errno == EINTR) {
+ /* Descriptor is somehow invalid, try to restart */
+ reopen_log (rspamd_log);
+ if (write (rspamd_log->fd, errmsg, r) != -1) {
+ /* Try again */
+ direct_write_log_line (rspamd_log, data, count, is_iov);
+ }
+ }
+ else if (errno == EFAULT || errno == EINVAL || errno == EFBIG || errno == ENOSPC) {
+ /* Rare case */
+ rspamd_log->throttling = TRUE;
+ rspamd_log->throttling_time = time (NULL);
+ }
+ else if (errno == EPIPE || errno == EBADF) {
+ /* We write to some pipe and it disappears, disable logging or we has opened bad file descriptor */
+ rspamd_log->enabled = FALSE;
+ }
+ }
+ else if (rspamd_log->throttling) {
+ rspamd_log->throttling = FALSE;
+ }
+ }
+}
+
+static void
+rspamd_escape_log_string (gchar *str)
+{
+ guchar *p = (guchar *)str;
+
+ while (*p) {
+ if ((*p & 0x80) || !g_ascii_isprint (*p)) {
+ *p = '?';
+ }
+ else if (*p == '\n' || *p == '\r') {
+ *p = ' ';
+ }
+ p ++;
+ }
+}
+
+/* Logging utility functions */
+gint
+open_log_priv (rspamd_logger_t *rspamd_log, uid_t uid, gid_t gid)
+{
+ switch (rspamd_log->cfg->log_type) {
+ case RSPAMD_LOG_CONSOLE:
+ /* Do nothing with console */
+ rspamd_log->enabled = TRUE;
+ return 0;
+ case RSPAMD_LOG_SYSLOG:
+ openlog ("rspamd", LOG_NDELAY | LOG_PID, rspamd_log->cfg->log_facility);
+ rspamd_log->enabled = TRUE;
+ return 0;
+ case RSPAMD_LOG_FILE:
+ rspamd_log->fd = open (rspamd_log->cfg->log_file, O_CREAT | O_WRONLY | O_APPEND,
+ S_IWUSR | S_IRUSR | S_IRGRP | S_IROTH);
+ if (rspamd_log->fd == -1) {
+ fprintf (stderr, "open_log: cannot open desired log file: %s, %s",
+ rspamd_log->cfg->log_file, strerror (errno));
+ return -1;
+ }
+ if (fchown (rspamd_log->fd, uid, gid) == -1) {
+ fprintf (stderr, "open_log: cannot chown desired log file: %s, %s",
+ rspamd_log->cfg->log_file, strerror (errno));
+ close (rspamd_log->fd);
+ return -1;
+ }
+ rspamd_log->enabled = TRUE;
+ return 0;
+ }
+ return -1;
+}
+
+void
+close_log_priv (rspamd_logger_t *rspamd_log, uid_t uid, gid_t gid)
+{
+ gchar tmpbuf[256];
+ flush_log_buf (rspamd_log);
+
+ switch (rspamd_log->type) {
+ case RSPAMD_LOG_CONSOLE:
+ /* Do nothing special */
+ break;
+ case RSPAMD_LOG_SYSLOG:
+ closelog ();
+ break;
+ case RSPAMD_LOG_FILE:
+ if (rspamd_log->enabled) {
+ if (rspamd_log->repeats > REPEATS_MIN) {
+ rspamd_snprintf (tmpbuf, sizeof (tmpbuf), "Last message repeated %ud times", rspamd_log->repeats);
+ rspamd_log->repeats = 0;
+ if (rspamd_log->saved_message) {
+ file_log_function (NULL, rspamd_log->saved_function, rspamd_log->cfg->log_level, rspamd_log->saved_message, TRUE, rspamd_log);
+ g_free (rspamd_log->saved_message);
+ g_free (rspamd_log->saved_function);
+ rspamd_log->saved_message = NULL;
+ rspamd_log->saved_function = NULL;
+ }
+ /* It is safe to use temporary buffer here as it is not static */
+ file_log_function (NULL, __FUNCTION__, rspamd_log->cfg->log_level, tmpbuf, TRUE, rspamd_log);
+ return;
+ }
+
+ if (fsync (rspamd_log->fd) == -1) {
+ msg_err ("error syncing log file: %s", strerror (errno));
+ }
+ close (rspamd_log->fd);
+ }
+ break;
+ }
+
+ rspamd_log->enabled = FALSE;
+}
+
+gint
+reopen_log_priv (rspamd_logger_t *rspamd_log, uid_t uid, gid_t gid)
+{
+ close_log_priv (rspamd_log, uid, gid);
+ if (open_log_priv (rspamd_log, uid, gid) == 0) {
+ msg_info ("log file reopened");
+ return 0;
+ }
+
+ return -1;
+}
+
+/**
+ * Open log file or initialize other structures
+ */
+gint
+open_log (rspamd_logger_t *logger)
+{
+ return open_log_priv (logger, -1, -1);
+}
+/**
+ * Close log file or destroy other structures
+ */
+void
+close_log (rspamd_logger_t *logger)
+{
+ close_log_priv (logger, -1, -1);
+}
+/**
+ * Close and open log again
+ */
+gint
+reopen_log (rspamd_logger_t *logger)
+{
+ return reopen_log_priv (logger, -1, -1);
+}
+
+/*
+ * Setup logger
+ */
+void
+rspamd_set_logger (struct config_file *cfg, GQuark ptype, struct rspamd_main *rspamd)
+{
+ gchar **strvec, *p, *err;
+ gint num, i, k;
+ struct in_addr addr;
+ guint32 mask = 0xFFFFFFFF;
+
+ if (rspamd->logger == NULL) {
+ rspamd->logger = g_malloc (sizeof (rspamd_logger_t));
+ memset (rspamd->logger, 0, sizeof (rspamd_logger_t));
+ }
+
+ rspamd->logger->type = cfg->log_type;
+ rspamd->logger->pid = getpid ();
+ rspamd->logger->process_type = ptype;
+
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30))
+ rspamd->logger->mtx = g_mutex_new ();
+#else
+ rspamd->logger->mtx = g_malloc (sizeof (GMutex));
+ g_mutex_init (rspamd->logger->mtx);
+#endif
+
+ switch (cfg->log_type) {
+ case RSPAMD_LOG_CONSOLE:
+ rspamd->logger->log_func = file_log_function;
+ rspamd->logger->fd = STDERR_FILENO;
+ break;
+ case RSPAMD_LOG_SYSLOG:
+ rspamd->logger->log_func = syslog_log_function;
+ break;
+ case RSPAMD_LOG_FILE:
+ rspamd->logger->log_func = file_log_function;
+ break;
+ }
+
+ rspamd->logger->cfg = cfg;
+ /* Set up buffer */
+ if (rspamd->cfg->log_buffered) {
+ if (rspamd->cfg->log_buf_size != 0) {
+ rspamd->logger->io_buf.size = rspamd->cfg->log_buf_size;
+ }
+ else {
+ rspamd->logger->io_buf.size = BUFSIZ;
+ }
+ rspamd->logger->is_buffered = TRUE;
+ rspamd->logger->io_buf.buf = g_malloc (rspamd->logger->io_buf.size);
+ }
+ /* Set up conditional logging */
+ if (rspamd->cfg->debug_ip_map != NULL) {
+ /* Try to add it as map first of all */
+ if (rspamd->logger->debug_ip) {
+ radix_tree_free (rspamd->logger->debug_ip);
+ }
+ rspamd->logger->debug_ip = radix_tree_create ();
+ if (!add_map (rspamd->cfg, rspamd->cfg->debug_ip_map, "IP addresses for which debug logs are enabled",
+ read_radix_list, fin_radix_list, (void **)&rspamd->logger->debug_ip)) {
+ /* Try to parse it as list */
+ strvec = g_strsplit_set (rspamd->cfg->debug_ip_map, ",; ", 0);
+ num = g_strv_length (strvec);
+
+ for (i = 0; i < num; i++) {
+ g_strstrip (strvec[i]);
+
+ if ((p = strchr (strvec[i], '/')) != NULL) {
+ /* Try to extract mask */
+ *p = '\0';
+ p ++;
+ errno = 0;
+ k = strtoul (p, &err, 10);
+ if (errno != 0 || *err != '\0' || k > 32) {
+ continue;
+ }
+ }
+ else {
+ k = 32;
+ }
+ if (inet_aton (strvec[i], &addr)) {
+ /* Check ip */
+ mask = mask << (32 - k);
+ radix32tree_insert (rspamd->logger->debug_ip, ntohl (addr.s_addr), mask, 1);
+ }
+ }
+ g_strfreev (strvec);
+ }
+ }
+ else if (rspamd->logger->debug_ip) {
+ radix_tree_free (rspamd->logger->debug_ip);
+ rspamd->logger->debug_ip = NULL;
+ }
+
+ default_logger = rspamd->logger;
+}
+
+/**
+ * Used after fork() for updating structure params
+ */
+void
+update_log_pid (GQuark ptype, rspamd_logger_t *rspamd_log)
+{
+ rspamd_log->pid = getpid ();
+ rspamd_log->process_type = ptype;
+}
+
+/**
+ * Flush logging buffer
+ */
+void
+flush_log_buf (rspamd_logger_t *rspamd_log)
+{
+ if (rspamd_log->is_buffered && (rspamd_log->type == RSPAMD_LOG_CONSOLE || rspamd_log->type == RSPAMD_LOG_FILE)) {
+ direct_write_log_line (rspamd_log, rspamd_log->io_buf.buf, rspamd_log->io_buf.used, FALSE);
+ rspamd_log->io_buf.used = 0;
+ }
+}
+
+
+void
+rspamd_common_logv (rspamd_logger_t *rspamd_log, GLogLevelFlags log_level, const gchar *function,
+ const gchar *fmt, va_list args)
+{
+ static gchar logbuf[BUFSIZ];
+ u_char *end;
+
+ if (rspamd_log == NULL) {
+ rspamd_log = default_logger;
+ }
+
+ if (rspamd_log == NULL) {
+ /* Just fprintf message to stderr */
+ if (log_level >= G_LOG_LEVEL_INFO) {
+ end = rspamd_vsnprintf (logbuf, sizeof (logbuf), fmt, args);
+ *end = '\0';
+ rspamd_escape_log_string (logbuf);
+ fprintf (stderr, "%s\n", logbuf);
+ }
+ }
+ else if (log_level <= rspamd_log->cfg->log_level) {
+ g_mutex_lock (rspamd_log->mtx);
+ end = rspamd_vsnprintf (logbuf, sizeof (logbuf), fmt, args);
+ *end = '\0';
+ rspamd_escape_log_string (logbuf);
+ rspamd_log->log_func (NULL, function, log_level, logbuf, FALSE, rspamd_log);
+ g_mutex_unlock (rspamd_log->mtx);
+ }
+}
+
+/**
+ * This log functions select real logger and write message if level is less or equal to configured log level
+ */
+void
+rspamd_common_log_function (rspamd_logger_t *rspamd_log, GLogLevelFlags log_level,
+ const gchar *function, const gchar *fmt, ...)
+{
+ va_list vp;
+
+ va_start (vp, fmt);
+ rspamd_common_logv (rspamd_log, log_level, function, fmt, vp);
+ va_end (vp);
+}
+
+void
+rspamd_default_logv (GLogLevelFlags log_level, const gchar *function,
+ const gchar *fmt, va_list args)
+{
+ rspamd_common_logv (NULL, log_level, function, fmt, args);
+}
+
+void
+rspamd_default_log_function (GLogLevelFlags log_level,
+ const gchar *function, const gchar *fmt, ...)
+{
+
+ va_list vp;
+
+ va_start (vp, fmt);
+ rspamd_default_logv (log_level, function, fmt, vp);
+ va_end (vp);
+}
+
+
+/**
+ * Fill buffer with message (limits must be checked BEFORE this call)
+ */
+static void
+fill_buffer (rspamd_logger_t *rspamd_log, const struct iovec *iov, gint iovcnt)
+{
+ gint i;
+
+ for (i = 0; i < iovcnt; i ++) {
+ memcpy (rspamd_log->io_buf.buf + rspamd_log->io_buf.used, iov[i].iov_base, iov[i].iov_len);
+ rspamd_log->io_buf.used += iov[i].iov_len;
+ }
+
+}
+
+/*
+ * Write message to buffer or to file (using direct_write_log_line function)
+ */
+static void
+file_log_helper (rspamd_logger_t *rspamd_log, const struct iovec *iov, gint iovcnt)
+{
+ size_t len = 0;
+ gint i;
+
+ if (! rspamd_log->is_buffered) {
+ /* Write string directly */
+ direct_write_log_line (rspamd_log, (void *)iov, iovcnt, TRUE);
+ }
+ else {
+ /* Calculate total length */
+ for (i = 0; i < iovcnt; i ++) {
+ len += iov[i].iov_len;
+ }
+ /* Fill buffer */
+ if (rspamd_log->io_buf.size < len) {
+ /* Buffer is too small to hold this string, so write it dirrectly */
+ flush_log_buf (rspamd_log);
+ direct_write_log_line (rspamd_log, (void *)iov, iovcnt, TRUE);
+ }
+ else if (rspamd_log->io_buf.used + len >= rspamd_log->io_buf.size) {
+ /* Buffer is full, try to write it dirrectly */
+ flush_log_buf (rspamd_log);
+ fill_buffer (rspamd_log, iov, iovcnt);
+ }
+ else {
+ /* Copy incoming string to buffer */
+ fill_buffer (rspamd_log, iov, iovcnt);
+ }
+ }
+}
+
+/**
+ * Syslog interface for logging
+ */
+static void
+syslog_log_function (const gchar * log_domain, const gchar *function, GLogLevelFlags log_level, const gchar * message, gboolean forced, gpointer arg)
+{
+ rspamd_logger_t *rspamd_log = arg;
+
+ if (! rspamd_log->enabled) {
+ return;
+ }
+ if (function == NULL) {
+ if (forced || log_level <= rspamd_log->cfg->log_level) {
+ if (forced || log_level >= G_LOG_LEVEL_DEBUG) {
+ syslog (LOG_DEBUG, "%s", message);
+ }
+ else if (log_level >= G_LOG_LEVEL_INFO) {
+ syslog (LOG_INFO, "%s", message);
+ }
+ else if (log_level >= G_LOG_LEVEL_WARNING) {
+ syslog (LOG_WARNING, "%s", message);
+ }
+ else if (log_level >= G_LOG_LEVEL_CRITICAL) {
+ syslog (LOG_ERR, "%s", message);
+ }
+ }
+ }
+ else {
+ if (forced || log_level <= rspamd_log->cfg->log_level) {
+ if (log_level >= G_LOG_LEVEL_DEBUG) {
+ syslog (LOG_DEBUG, "%s: %s", function, message);
+ }
+ else if (log_level >= G_LOG_LEVEL_INFO) {
+ syslog (LOG_INFO, "%s: %s", function, message);
+ }
+ else if (log_level >= G_LOG_LEVEL_WARNING) {
+ syslog (LOG_WARNING, "%s: %s", function, message);
+ }
+ else if (log_level >= G_LOG_LEVEL_CRITICAL) {
+ syslog (LOG_ERR, "%s: %s", function, message);
+ }
+ }
+ }
+}
+
+/**
+ * Main file interface for logging
+ */
+static void
+file_log_function (const gchar * log_domain, const gchar *function, GLogLevelFlags log_level, const gchar * message, gboolean forced, gpointer arg)
+{
+ gchar tmpbuf[256], timebuf[32];
+ time_t now;
+ struct tm *tms;
+ struct iovec iov[4];
+ gint r = 0;
+ guint32 cksum;
+ size_t mlen;
+ const gchar *cptype = NULL;
+ gboolean got_time = FALSE;
+ rspamd_logger_t *rspamd_log = arg;
+
+ if (! rspamd_log->enabled) {
+ return;
+ }
+
+
+ if (forced || log_level <= rspamd_log->cfg->log_level) {
+ /* Check throttling due to write errors */
+ if (rspamd_log->throttling) {
+ now = time (NULL);
+ if (rspamd_log->throttling_time != now) {
+ rspamd_log->throttling_time = now;
+ got_time = TRUE;
+ }
+ else {
+ /* Do not try to write to file too often while throttling */
+ return;
+ }
+ }
+ /* Check repeats */
+ mlen = strlen (message);
+ cksum = rspamd_log_calculate_cksum (message, mlen);
+ if (cksum == rspamd_log->last_line_cksum) {
+ rspamd_log->repeats ++;
+ if (rspamd_log->repeats > REPEATS_MIN && rspamd_log->repeats < REPEATS_MAX) {
+ /* Do not log anything */
+ if (rspamd_log->saved_message == 0) {
+ rspamd_log->saved_message = g_strdup (message);
+ rspamd_log->saved_function = g_strdup (function);
+ }
+ return;
+ }
+ else if (rspamd_log->repeats > REPEATS_MAX) {
+ rspamd_snprintf (tmpbuf, sizeof (tmpbuf), "Last message repeated %ud times", rspamd_log->repeats);
+ rspamd_log->repeats = 0;
+ /* It is safe to use temporary buffer here as it is not static */
+ if (rspamd_log->saved_message) {
+ file_log_function (log_domain, rspamd_log->saved_function, log_level, rspamd_log->saved_message, forced, arg);
+ }
+ file_log_function (log_domain, __FUNCTION__, log_level, tmpbuf, forced, arg);
+ file_log_function (log_domain, function, log_level, message, forced, arg);
+ rspamd_log->repeats = REPEATS_MIN + 1;
+ return;
+ }
+ }
+ else {
+ /* Reset counter if new message differs from saved message */
+ rspamd_log->last_line_cksum = cksum;
+ if (rspamd_log->repeats > REPEATS_MIN) {
+ rspamd_snprintf (tmpbuf, sizeof (tmpbuf), "Last message repeated %ud times", rspamd_log->repeats);
+ rspamd_log->repeats = 0;
+ if (rspamd_log->saved_message) {
+ file_log_function (log_domain, rspamd_log->saved_function, log_level, rspamd_log->saved_message, forced, arg);
+ g_free (rspamd_log->saved_message);
+ g_free (rspamd_log->saved_function);
+ rspamd_log->saved_message = NULL;
+ rspamd_log->saved_function = NULL;
+ }
+ file_log_function (log_domain, __FUNCTION__, log_level, tmpbuf, forced, arg);
+ /* It is safe to use temporary buffer here as it is not static */
+ file_log_function (log_domain, function, log_level, message, forced, arg);
+ return;
+ }
+ else {
+ rspamd_log->repeats = 0;
+ }
+ }
+
+ if (rspamd_log->cfg->log_extended) {
+ if (! got_time) {
+ now = time (NULL);
+ }
+
+ /* Format time */
+ tms = localtime (&now);
+
+ strftime (timebuf, sizeof (timebuf), "%F %H:%M:%S", tms);
+ cptype = g_quark_to_string (rspamd_log->process_type);
+
+ if (rspamd_log->cfg->log_color) {
+ if (log_level >= G_LOG_LEVEL_INFO) {
+ /* White */
+ r = rspamd_snprintf (tmpbuf, sizeof (tmpbuf), "\033[1;37m");
+ }
+ else if (log_level >= G_LOG_LEVEL_WARNING) {
+ /* Magenta */
+ r = rspamd_snprintf (tmpbuf, sizeof (tmpbuf), "\033[2;32m");
+ }
+ else if (log_level >= G_LOG_LEVEL_CRITICAL) {
+ /* Red */
+ r = rspamd_snprintf (tmpbuf, sizeof (tmpbuf), "\033[1;31m");
+ }
+ }
+ else {
+ r = 0;
+ }
+ if (function == NULL) {
+ r += rspamd_snprintf (tmpbuf + r, sizeof (tmpbuf) - r, "%s #%P(%s) ", timebuf, rspamd_log->pid, cptype);
+ }
+ else {
+ r += rspamd_snprintf (tmpbuf + r, sizeof (tmpbuf) -r, "%s #%P(%s) %s: ", timebuf, rspamd_log->pid, cptype, function);
+ }
+ /* Construct IOV for log line */
+ iov[0].iov_base = tmpbuf;
+ iov[0].iov_len = r;
+ iov[1].iov_base = (void *)message;
+ iov[1].iov_len = mlen;
+ iov[2].iov_base = (void *)&lf_chr;
+ iov[2].iov_len = 1;
+ if (rspamd_log->cfg->log_color) {
+ iov[3].iov_base = "\033[0m";
+ iov[3].iov_len = sizeof ("\033[0m") - 1;
+ /* Call helper (for buffering) */
+ file_log_helper (rspamd_log, iov, 4);
+ }
+ else {
+ /* Call helper (for buffering) */
+ file_log_helper (rspamd_log, iov, 3);
+ }
+ }
+ else {
+ iov[0].iov_base = (void *)message;
+ iov[0].iov_len = mlen;
+ iov[1].iov_base = (void *)&lf_chr;
+ iov[1].iov_len = 1;
+ if (rspamd_log->cfg->log_color) {
+ iov[2].iov_base = "\033[0m";
+ iov[2].iov_len = sizeof ("\033[0m") - 1;
+ /* Call helper (for buffering) */
+ file_log_helper (rspamd_log, iov, 3);
+ }
+ else {
+ /* Call helper (for buffering) */
+ file_log_helper (rspamd_log, iov, 2);
+ }
+ }
+ }
+}
+
+/**
+ * Write log line depending on ip
+ */
+void
+rspamd_conditional_debug (rspamd_logger_t *rspamd_log,
+ rspamd_inet_addr_t *addr, const gchar *function, const gchar *fmt, ...)
+{
+ static gchar logbuf[BUFSIZ];
+ va_list vp;
+ u_char *end;
+
+ if (rspamd_log->cfg->log_level >= G_LOG_LEVEL_DEBUG || rspamd_log->is_debug) {
+ if (rspamd_log->debug_ip && addr != NULL) {
+ if (addr->af == AF_INET && radix32tree_find (rspamd_log->debug_ip,
+ ntohl (addr->addr.s4.sin_addr.s_addr)) == RADIX_NO_VALUE) {
+ return;
+ }
+ }
+ g_mutex_lock (rspamd_log->mtx);
+ va_start (vp, fmt);
+ end = rspamd_vsnprintf (logbuf, sizeof (logbuf), fmt, vp);
+ *end = '\0';
+ rspamd_escape_log_string (logbuf);
+ va_end (vp);
+ rspamd_log->log_func (NULL, function, G_LOG_LEVEL_DEBUG, logbuf, TRUE, rspamd_log);
+ g_mutex_unlock (rspamd_log->mtx);
+ }
+}
+/**
+ * Wrapper for glib logger
+ */
+void
+rspamd_glib_log_function (const gchar *log_domain, GLogLevelFlags log_level, const gchar *message, gpointer arg)
+{
+ rspamd_logger_t *rspamd_log = arg;
+
+ if (rspamd_log->enabled) {
+ g_mutex_lock (rspamd_log->mtx);
+ rspamd_log->log_func (log_domain, NULL, log_level, message, FALSE, rspamd_log);
+ g_mutex_unlock (rspamd_log->mtx);
+ }
+}
+
+/**
+ * Temporary turn on debugging
+ */
+void
+rspamd_log_debug (rspamd_logger_t *rspamd_log)
+{
+ rspamd_log->is_debug = TRUE;
+}
+
+/**
+ * Turn off temporary debugging
+ */
+void
+rspamd_log_nodebug (rspamd_logger_t *rspamd_log)
+{
+ rspamd_log->is_debug = FALSE;
+}
diff --git a/src/libutil/logger.h b/src/libutil/logger.h
new file mode 100644
index 000000000..b0766b938
--- /dev/null
+++ b/src/libutil/logger.h
@@ -0,0 +1,117 @@
+#ifndef RSPAMD_LOGGER_H
+#define RSPAMD_LOGGER_H
+
+#include "config.h"
+#include "cfg_file.h"
+#include "radix.h"
+#include "util.h"
+
+
+typedef void (*rspamd_log_func_t)(const gchar * log_domain, const gchar *function,
+ GLogLevelFlags log_level, const gchar * message,
+ gboolean forced, gpointer arg);
+
+typedef struct rspamd_logger_s rspamd_logger_t;
+/**
+ * Init logger
+ */
+void rspamd_set_logger (struct config_file *cfg, GQuark ptype, struct rspamd_main *main);
+/**
+ * Open log file or initialize other structures
+ */
+gint open_log (rspamd_logger_t *logger);
+/**
+ * Close log file or destroy other structures
+ */
+void close_log (rspamd_logger_t *logger);
+/**
+ * Close and open log again
+ */
+gint reopen_log (rspamd_logger_t *logger);
+
+/**
+ * Open log file or initialize other structures for privileged processes
+ */
+gint open_log_priv (rspamd_logger_t *logger, uid_t uid, gid_t gid);
+/**
+ * Close log file or destroy other structures for privileged processes
+ */
+void close_log_priv (rspamd_logger_t *logger, uid_t uid, gid_t gid);
+/**
+ * Close and open log again for privileged processes
+ */
+gint reopen_log_priv (rspamd_logger_t *logger, uid_t uid, gid_t gid);
+
+/**
+ * Set log pid
+ */
+void update_log_pid (GQuark ptype, rspamd_logger_t *logger);
+
+/**
+ * Flush log buffer for some types of logging
+ */
+void flush_log_buf (rspamd_logger_t *logger);
+/**
+ * Log function that is compatible for glib messages
+ */
+void rspamd_glib_log_function (const gchar *log_domain,
+ GLogLevelFlags log_level, const gchar *message, gpointer arg);
+
+/**
+ * Function with variable number of arguments support
+ */
+void rspamd_common_log_function (rspamd_logger_t *logger,
+ GLogLevelFlags log_level, const gchar *function, const gchar *fmt, ...);
+
+void rspamd_common_logv (rspamd_logger_t *logger,
+ GLogLevelFlags log_level, const gchar *function, const gchar *fmt, va_list args);
+
+/**
+ * Conditional debug function
+ */
+void rspamd_conditional_debug (rspamd_logger_t *logger,
+ rspamd_inet_addr_t *addr, const gchar *function, const gchar *fmt, ...) ;
+
+/**
+ * Function with variable number of arguments support that uses static default logger
+ */
+void rspamd_default_log_function (GLogLevelFlags log_level, const gchar *function,
+ const gchar *fmt, ...);
+
+/**
+ * Varargs version of default log function
+ * @param log_level
+ * @param function
+ * @param fmt
+ * @param args
+ */
+void rspamd_default_logv (GLogLevelFlags log_level, const gchar *function, const gchar *fmt, va_list args);
+
+/**
+ * Temporary turn on debug
+ */
+void rspamd_log_debug (rspamd_logger_t *logger);
+
+/**
+ * Turn off debug
+ */
+void rspamd_log_nodebug (rspamd_logger_t *logger);
+
+/* Typical functions */
+
+/* Logging in postfix style */
+#if defined(RSPAMD_MAIN)
+#define msg_err(...) rspamd_common_log_function(rspamd_main->logger, G_LOG_LEVEL_CRITICAL, __FUNCTION__, __VA_ARGS__)
+#define msg_warn(...) rspamd_common_log_function(rspamd_main->logger, G_LOG_LEVEL_WARNING, __FUNCTION__, __VA_ARGS__)
+#define msg_info(...) rspamd_common_log_function(rspamd_main->logger, G_LOG_LEVEL_INFO, __FUNCTION__, __VA_ARGS__)
+#define msg_debug(...) rspamd_conditional_debug(rspamd_main->logger, NULL, __FUNCTION__, __VA_ARGS__)
+#define debug_task(...) rspamd_conditional_debug(rspamd_main->logger, &task->from_addr, __FUNCTION__, __VA_ARGS__)
+#else
+#define msg_err(...) rspamd_default_log_function(G_LOG_LEVEL_CRITICAL, __FUNCTION__, __VA_ARGS__)
+#define msg_warn(...) rspamd_default_log_function(G_LOG_LEVEL_WARNING, __FUNCTION__, __VA_ARGS__)
+#define msg_info(...) rspamd_default_log_function(G_LOG_LEVEL_INFO, __FUNCTION__, __VA_ARGS__)
+#define msg_debug(...) rspamd_default_log_function(G_LOG_LEVEL_DEBUG, __FUNCTION__, __VA_ARGS__)
+#define debug_task(...) rspamd_default_log_function(G_LOG_LEVEL_DEBUG, __FUNCTION__, __VA_ARGS__)
+#endif
+
+#endif
diff --git a/src/libutil/map.c b/src/libutil/map.c
new file mode 100644
index 000000000..703622585
--- /dev/null
+++ b/src/libutil/map.c
@@ -0,0 +1,1148 @@
+/*
+ * Copyright (c) 2009-2012, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Implementation of map files handling
+ */
+#include "config.h"
+#include "map.h"
+#include "http.h"
+#include "main.h"
+#include "util.h"
+#include "mem_pool.h"
+
+static const gchar *hash_fill = "1";
+
+/* Http reply */
+struct http_reply {
+ gint code;
+ GHashTable *headers;
+ gchar *cur_header;
+ gint parser_state;
+};
+
+struct http_callback_data {
+ struct event ev;
+ struct event_base *ev_base;
+ struct timeval tv;
+ struct rspamd_map *map;
+ struct http_map_data *data;
+ struct http_reply *reply;
+ struct map_cb_data cbdata;
+
+ gint state;
+ gint fd;
+};
+
+/* Value in seconds after whitch we would try to do stat on list file */
+
+/* HTTP timeouts */
+#define HTTP_CONNECT_TIMEOUT 2
+#define HTTP_READ_TIMEOUT 10
+
+/**
+ * Helper for HTTP connection establishment
+ */
+static gint
+connect_http (struct rspamd_map *map, struct http_map_data *data, gboolean is_async)
+{
+ gint sock;
+
+ if ((sock = make_tcp_socket (data->addr, FALSE, is_async)) == -1) {
+ msg_info ("cannot connect to http server %s: %d, %s", data->host, errno, strerror (errno));
+ return -1;
+ }
+
+ return sock;
+}
+
+/**
+ * Write HTTP request
+ */
+static void
+write_http_request (struct rspamd_map *map, struct http_map_data *data, gint sock)
+{
+ gchar outbuf[BUFSIZ], datebuf[128];
+ gint r;
+ struct tm *tm;
+
+ tm = gmtime (&data->last_checked);
+ strftime (datebuf, sizeof (datebuf), "%a, %d %b %Y %H:%M:%S %Z", tm);
+ r = rspamd_snprintf (outbuf, sizeof (outbuf), "GET %s%s HTTP/1.1" CRLF "Connection: close" CRLF "Host: %s" CRLF, (*data->path == '/') ? "" : "/", data->path, data->host);
+ if (data->last_checked != 0) {
+ r += rspamd_snprintf (outbuf + r, sizeof (outbuf) - r, "If-Modified-Since: %s" CRLF, datebuf);
+ }
+
+ r += rspamd_snprintf (outbuf + r, sizeof (outbuf) - r, CRLF);
+
+ if (write (sock, outbuf, r) == -1) {
+ msg_err ("failed to write request: %d, %s", errno, strerror (errno));
+ }
+}
+
+/**
+ * FSM for parsing HTTP reply
+ */
+static gchar *
+parse_http_reply (gchar * chunk, gint len, struct http_reply *reply)
+{
+ gchar *s, *p, *err_str, *tmp;
+ p = chunk;
+ s = chunk;
+
+ while (p - chunk < len) {
+ switch (reply->parser_state) {
+ /* Search status code */
+ case 0:
+ /* Search for status code */
+ if (*p != ' ') {
+ p++;
+ }
+ else {
+ /* Try to parse HTTP reply code */
+ reply->code = strtoul (++p, (gchar **)&err_str, 10);
+ if (*err_str != ' ') {
+ msg_info ("error while reading HTTP status code: %s", p);
+ return NULL;
+ }
+ /* Now skip to end of status string */
+ reply->parser_state = 1;
+ continue;
+ }
+ break;
+ /* Skip to end of line */
+ case 1:
+ if (*p == '\n') {
+ /* Switch to read header state */
+ reply->parser_state = 2;
+ }
+ /* Each skipped symbol is proceeded */
+ s = ++p;
+ break;
+ /* Read header value */
+ case 2:
+ if (*p == ':') {
+ reply->cur_header = g_malloc (p - s + 1);
+ rspamd_strlcpy (reply->cur_header, s, p - s + 1);
+ reply->parser_state = 3;
+ }
+ else if (*p == '\r' && *(p + 1) == '\n') {
+ /* Last empty line */
+ reply->parser_state = 5;
+ }
+ p++;
+ break;
+ /* Skip spaces after header name */
+ case 3:
+ if (*p != ' ') {
+ s = p;
+ reply->parser_state = 4;
+ }
+ else {
+ p++;
+ }
+ break;
+ /* Read header value */
+ case 4:
+ if (*p == '\r') {
+ if (reply->cur_header != NULL) {
+ tmp = g_malloc (p - s + 1);
+ rspamd_strlcpy (tmp, s, p - s + 1);
+ g_hash_table_insert (reply->headers, reply->cur_header, tmp);
+ reply->cur_header = NULL;
+ }
+ reply->parser_state = 1;
+ }
+ p++;
+ break;
+ case 5:
+ /* Set pointer to begining of HTTP body */
+ p++;
+ s = p;
+ reply->parser_state = 6;
+ break;
+ case 6:
+ /* Headers parsed, just return */
+ return p;
+ break;
+ }
+ }
+
+ return s;
+}
+
+/**
+ * Read and parse chunked header
+ */
+static gint
+read_chunk_header (gchar * buf, gint len, struct http_map_data *data)
+{
+ gchar chunkbuf[32], *p, *c, *err_str;
+ gint skip = 0;
+
+ p = chunkbuf;
+ c = buf;
+ /* Find hex digits */
+ while (g_ascii_isxdigit (*c) && p - chunkbuf < (gint)(sizeof (chunkbuf) - 1) && skip < len) {
+ *p++ = *c++;
+ skip++;
+ }
+ *p = '\0';
+ data->chunk = strtoul (chunkbuf, &err_str, 16);
+ if (*err_str != '\0') {
+ return -1;
+ }
+
+ /* Now skip to CRLF */
+ while (*c != '\n' && skip < len) {
+ c++;
+ skip++;
+ }
+ if (*c == '\n' && skip < len) {
+ skip++;
+ c++;
+ }
+ data->chunk_remain = data->chunk;
+
+ return skip;
+}
+
+/**
+ * Helper callback for reading chunked reply
+ */
+static gboolean
+read_http_chunked (gchar * buf, size_t len, struct rspamd_map *map, struct http_map_data *data, struct map_cb_data *cbdata)
+{
+ gchar *p = buf, *remain;
+ gint skip = 0;
+
+ if (data->chunked == 1) {
+ /* Read first chunk data */
+ if ((skip = read_chunk_header (buf, len, data)) != -1) {
+ p += skip;
+ len -= skip;
+ data->chunked = 2;
+ }
+ else {
+ msg_info ("invalid chunked reply: %*s", (gint)len, buf);
+ return FALSE;
+ }
+ }
+
+ if (data->chunk_remain == 0) {
+ /* Read another chunk */
+ if ((skip = read_chunk_header (buf, len, data)) != -1) {
+ p += skip;
+ len -= skip;
+ }
+ else {
+ msg_info ("invalid chunked reply: %*s", (gint)len, buf);
+ return FALSE;
+ }
+ if (data->chunk == 0) {
+ return FALSE;
+ }
+ }
+
+ if (data->chunk_remain <= len ) {
+ /* Call callback and move remaining buffer */
+ remain = map->read_callback (map->pool, p, data->chunk_remain, cbdata);
+ if (remain != NULL && remain != p + data->chunk_remain) {
+ /* Copy remaining buffer to start of buffer */
+ data->rlen = len - (remain - p);
+ memmove (buf, remain, data->rlen);
+ data->chunk_remain -= data->rlen;
+ }
+ else {
+ /* Copy other part */
+ data->rlen = len - data->chunk_remain;
+ if (data->rlen > 0) {
+ memmove (buf, p + data->chunk_remain, data->rlen);
+ }
+ data->chunk_remain = 0;
+ }
+
+ }
+ else {
+ /* Just read another portion of chunk */
+ data->chunk_remain -= len;
+ remain = map->read_callback (map->pool, p, len, cbdata);
+ if (remain != NULL && remain != p + len) {
+ /* copy remaining buffer to start of buffer */
+ data->rlen = len - (remain - p);
+ memmove (buf, remain, data->rlen);
+ }
+ }
+
+ return TRUE;
+}
+
+/**
+ * Callback for reading HTTP reply
+ */
+static gboolean
+read_http_common (struct rspamd_map *map, struct http_map_data *data, struct http_reply *reply, struct map_cb_data *cbdata, gint fd)
+{
+ gchar *remain, *pos;
+ ssize_t r;
+ gchar *te, *date;
+
+ if ((r = read (fd, data->read_buf + data->rlen, sizeof (data->read_buf) - data->rlen)) > 0) {
+ r += data->rlen;
+ data->rlen = 0;
+ remain = parse_http_reply (data->read_buf, r, reply);
+ if (remain != NULL && remain != data->read_buf) {
+ /* copy remaining data->read_buffer to start of data->read_buffer */
+ data->rlen = r - (remain - data->read_buf);
+ memmove (data->read_buf, remain, data->rlen);
+ r = data->rlen;
+ data->rlen = 0;
+ }
+ if (r <= 0) {
+ return TRUE;
+ }
+ if (reply->parser_state == 6) {
+ /* If reply header is parsed successfully, try to read further data */
+ if (reply->code != 200 && reply->code != 304) {
+ msg_err ("got error reply from server %s, %d", data->host, reply->code);
+ return FALSE;
+ }
+ else if (reply->code == 304) {
+ /* Do not read anything */
+ return FALSE;
+ }
+ pos = data->read_buf;
+ /* Check for chunked */
+ if (data->chunked == 0) {
+ if ((te = g_hash_table_lookup (reply->headers, "Transfer-Encoding")) != NULL) {
+ if (g_ascii_strcasecmp (te, "chunked") == 0) {
+ data->chunked = 1;
+ }
+ else {
+ data->chunked = -1;
+ }
+ }
+ else {
+ data->chunked = -1;
+ }
+ }
+ /* Check for date */
+ date = g_hash_table_lookup (reply->headers, "Date");
+ if (date != NULL) {
+ data->last_checked = rspamd_http_parse_date (date, -1);
+ }
+ else {
+ data->last_checked = (time_t)-1;
+ }
+
+ if (data->chunked > 0) {
+ return read_http_chunked (data->read_buf, r, map, data, cbdata);
+ }
+ /* Read more data */
+ remain = map->read_callback (map->pool, pos, r, cbdata);
+ if (remain != NULL && remain != pos + r) {
+ /* copy remaining data->read_buffer to start of data->read_buffer */
+ data->rlen = r - (remain - pos);
+ memmove (pos, remain, data->rlen);
+ }
+ }
+ }
+ else {
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+/**
+ * Sync read of HTTP reply
+ */
+static void
+read_http_sync (struct rspamd_map *map, struct http_map_data *data)
+{
+ struct map_cb_data cbdata;
+ gint fd;
+ struct http_reply *repl;
+
+ if (map->read_callback == NULL || map->fin_callback == NULL) {
+ msg_err ("bad callback for reading map file");
+ return;
+ }
+
+ /* Connect synced */
+ if ((fd = connect_http (map, data, FALSE)) == -1) {
+ return;
+ }
+ write_http_request (map, data, fd);
+
+ cbdata.state = 0;
+ cbdata.map = map;
+ cbdata.prev_data = *map->user_data;
+ cbdata.cur_data = NULL;
+
+ repl = g_malloc (sizeof (struct http_reply));
+ repl->parser_state = 0;
+ repl->code = 404;
+ repl->headers = g_hash_table_new_full (rspamd_strcase_hash, rspamd_strcase_equal, g_free, g_free);
+
+ while (read_http_common (map, data, repl, &cbdata, fd));
+
+ close (fd);
+
+ map->fin_callback (map->pool, &cbdata);
+ *map->user_data = cbdata.cur_data;
+ if (data->last_checked == (time_t)-1) {
+ data->last_checked = time (NULL);
+ }
+
+ g_hash_table_destroy (repl->headers);
+ g_free (repl);
+}
+
+/**
+ * Callback for reading data from file
+ */
+static void
+read_map_file (struct rspamd_map *map, struct file_map_data *data)
+{
+ struct map_cb_data cbdata;
+ gchar buf[BUFSIZ], *remain;
+ ssize_t r;
+ gint fd, rlen;
+
+ if (map->read_callback == NULL || map->fin_callback == NULL) {
+ msg_err ("bad callback for reading map file");
+ return;
+ }
+
+ if ((fd = open (data->filename, O_RDONLY)) == -1) {
+ msg_warn ("cannot open file '%s': %s", data->filename, strerror (errno));
+ return;
+ }
+
+ cbdata.state = 0;
+ cbdata.prev_data = *map->user_data;
+ cbdata.cur_data = NULL;
+ cbdata.map = map;
+
+ rlen = 0;
+ while ((r = read (fd, buf + rlen, sizeof (buf) - rlen - 1)) > 0) {
+ r += rlen;
+ buf[r] = '\0';
+ remain = map->read_callback (map->pool, buf, r, &cbdata);
+ if (remain != NULL) {
+ /* copy remaining buffer to start of buffer */
+ rlen = r - (remain - buf);
+ memmove (buf, remain, rlen);
+ }
+ }
+
+ close (fd);
+
+ map->fin_callback (map->pool, &cbdata);
+ *map->user_data = cbdata.cur_data;
+}
+
+/**
+ * FSM for parsing lists
+ */
+gchar *
+abstract_parse_kv_list (rspamd_mempool_t * pool, gchar * chunk, gint len, struct map_cb_data *data, insert_func func)
+{
+ gchar *c, *p, *key = NULL, *value = NULL;
+
+ p = chunk;
+ c = p;
+
+ while (p - chunk < len) {
+ switch (data->state) {
+ case 0:
+ /* read key */
+ /* Check here comments, eol and end of buffer */
+ if (*p == '#') {
+ if (key != NULL && p - c >= 0) {
+ value = rspamd_mempool_alloc (pool, p - c + 1);
+ memcpy (value, c, p - c);
+ value[p - c] = '\0';
+ value = g_strstrip (value);
+ func (data->cur_data, key, value);
+ msg_debug ("insert kv pair: %s -> %s", key, value);
+ }
+ data->state = 99;
+ }
+ else if (*p == '\r' || *p == '\n' || p - chunk == len - 1) {
+ if (key != NULL && p - c >= 0) {
+ value = rspamd_mempool_alloc (pool, p - c + 1);
+ memcpy (value, c, p - c);
+ value[p - c] = '\0';
+
+ value = g_strstrip (value);
+ func (data->cur_data, key, value);
+ msg_debug ("insert kv pair: %s -> %s", key, value);
+ }
+ else if (key == NULL && p - c > 0) {
+ /* Key only line */
+ key = rspamd_mempool_alloc (pool, p - c + 1);
+ memcpy (key, c, p - c);
+ key[p - c] = '\0';
+ value = rspamd_mempool_alloc (pool, 1);
+ *value = '\0';
+ func (data->cur_data, key, value);
+ msg_debug ("insert kv pair: %s -> %s", key, value);
+ }
+ data->state = 100;
+ key = NULL;
+ }
+ else if (g_ascii_isspace (*p)) {
+ if (p - c > 0) {
+ key = rspamd_mempool_alloc (pool, p - c + 1);
+ memcpy (key, c, p - c);
+ key[p - c] = '\0';
+ data->state = 2;
+ }
+ else {
+ key = NULL;
+ }
+ }
+ else {
+ p ++;
+ }
+ break;
+ case 2:
+ /* Skip spaces before value */
+ if (!g_ascii_isspace (*p)) {
+ c = p;
+ data->state = 0;
+ }
+ else {
+ p ++;
+ }
+ break;
+ case 99:
+ /* SKIP_COMMENT */
+ /* Skip comment till end of line */
+ if (*p == '\r' || *p == '\n') {
+ while ((*p == '\r' || *p == '\n') && p - chunk < len) {
+ p++;
+ }
+ c = p;
+ key = NULL;
+ data->state = 0;
+ }
+ else {
+ p++;
+ }
+ break;
+ case 100:
+ /* Skip \r\n and whitespaces */
+ if (*p == '\r' || *p == '\n' || g_ascii_isspace (*p)) {
+ p ++;
+ }
+ else {
+ c = p;
+ key = NULL;
+ data->state = 0;
+ }
+ break;
+ }
+ }
+
+ return c;
+}
+
+gchar *
+abstract_parse_list (rspamd_mempool_t * pool, gchar * chunk, gint len, struct map_cb_data *data, insert_func func)
+{
+ gchar *s, *p, *str, *start;
+
+ p = chunk;
+ start = p;
+
+ str = g_malloc (len + 1);
+ s = str;
+
+ while (p - chunk < len) {
+ switch (data->state) {
+ /* READ_SYMBOL */
+ case 0:
+ if (*p == '#') {
+ /* Got comment */
+ if (s != str) {
+ /* Save previous string in lines like: "127.0.0.1 #localhost" */
+ *s = '\0';
+ s = rspamd_mempool_strdup (pool, g_strstrip (str));
+ if (strlen (s) > 0) {
+ func (data->cur_data, s, hash_fill);
+ }
+ s = str;
+ start = p;
+ }
+ data->state = 1;
+ }
+ else if (*p == '\r' || *p == '\n') {
+ /* Got EOL marker, save stored string */
+ if (s != str) {
+ *s = '\0';
+ s = rspamd_mempool_strdup (pool, g_strstrip (str));
+ if (strlen (s) > 0) {
+ func (data->cur_data, s, hash_fill);
+ }
+ s = str;
+ }
+ /* Skip EOL symbols */
+ while ((*p == '\r' || *p == '\n') && p - chunk < len) {
+ p++;
+ }
+ start = p;
+ }
+ else {
+ /* Store new string in s */
+ *s = *p;
+ s++;
+ p++;
+ }
+ break;
+ /* SKIP_COMMENT */
+ case 1:
+ /* Skip comment till end of line */
+ if (*p == '\r' || *p == '\n') {
+ while ((*p == '\r' || *p == '\n') && p - chunk < len) {
+ p++;
+ }
+ s = str;
+ start = p;
+ data->state = 0;
+ }
+ else {
+ p++;
+ }
+ break;
+ }
+ }
+
+ g_free (str);
+
+ return start;
+}
+
+/**
+ * Radix tree helper function
+ */
+static void
+radix_tree_insert_helper (gpointer st, gconstpointer key, gpointer value)
+{
+ radix_tree_t *tree = st;
+
+ guint32 mask = 0xFFFFFFFF;
+ guint32 ip;
+ gchar *token, *ipnet, *err_str, **strv, **cur;
+ struct in_addr ina;
+ gint k;
+
+ /* Split string if there are multiple items inside a single string */
+ strv = g_strsplit_set ((gchar *)key, " ,;", 0);
+ cur = strv;
+ while (*cur) {
+ if (**cur == '\0') {
+ cur++;
+ continue;
+ }
+ /* Extract ipnet */
+ ipnet = *cur;
+ token = strsep (&ipnet, "/");
+
+ if (ipnet != NULL) {
+ errno = 0;
+ /* Get mask */
+ k = strtoul (ipnet, &err_str, 10);
+ if (errno != 0) {
+ msg_warn ("invalid netmask, error detected on symbol: %s, erorr: %s", err_str, strerror (errno));
+ k = 32;
+ }
+ else if (k > 32 || k < 0) {
+ msg_warn ("invalid netmask value: %d", k);
+ k = 32;
+ }
+ /* Calculate mask based on CIDR presentation */
+ mask = mask << (32 - k);
+ }
+
+ /* Check IP */
+ if (inet_aton (token, &ina) == 0) {
+ msg_err ("invalid ip address: %s", token);
+ return;
+ }
+
+ /* Insert ip in a tree */
+ ip = ntohl ((guint32) ina.s_addr);
+ k = radix32tree_insert (tree, ip, mask, 1);
+ if (k == -1) {
+ msg_warn ("cannot insert ip to tree: %s, mask %X", inet_ntoa (ina), mask);
+ }
+ else if (k == 1) {
+ msg_warn ("ip %s, mask %X, value already exists", inet_ntoa (ina), mask);
+ }
+ cur++;
+ }
+
+ g_strfreev (strv);
+}
+
+/* Helpers */
+gchar *
+read_host_list (rspamd_mempool_t * pool, gchar * chunk, gint len, struct map_cb_data *data)
+{
+ if (data->cur_data == NULL) {
+ data->cur_data = g_hash_table_new (rspamd_strcase_hash, rspamd_strcase_equal);
+ }
+ return abstract_parse_list (pool, chunk, len, data, (insert_func) g_hash_table_insert);
+}
+
+void
+fin_host_list (rspamd_mempool_t * pool, struct map_cb_data *data)
+{
+ if (data->prev_data) {
+ g_hash_table_destroy (data->prev_data);
+ }
+}
+
+gchar *
+read_kv_list (rspamd_mempool_t * pool, gchar * chunk, gint len, struct map_cb_data *data)
+{
+ if (data->cur_data == NULL) {
+ data->cur_data = g_hash_table_new (rspamd_strcase_hash, rspamd_strcase_equal);
+ }
+ return abstract_parse_kv_list (pool, chunk, len, data, (insert_func) g_hash_table_insert);
+}
+
+void
+fin_kv_list (rspamd_mempool_t * pool, struct map_cb_data *data)
+{
+ if (data->prev_data) {
+ g_hash_table_destroy (data->prev_data);
+ }
+}
+
+gchar *
+read_radix_list (rspamd_mempool_t * pool, gchar * chunk, gint len, struct map_cb_data *data)
+{
+ if (data->cur_data == NULL) {
+ data->cur_data = radix_tree_create ();
+ }
+ return abstract_parse_list (pool, chunk, len, data, (insert_func) radix_tree_insert_helper);
+}
+
+void
+fin_radix_list (rspamd_mempool_t * pool, struct map_cb_data *data)
+{
+ if (data->prev_data) {
+ radix_tree_free (data->prev_data);
+ }
+}
+
+/**
+ * Common file callback
+ */
+static void
+file_callback (gint fd, short what, void *ud)
+{
+ struct rspamd_map *map = ud;
+ struct file_map_data *data = map->map_data;
+ struct stat st;
+ gdouble jittered_sec;
+
+ /* Plan event again with jitter */
+ evtimer_del (&map->ev);
+ jittered_sec = (map->cfg->map_timeout + g_random_double () * map->cfg->map_timeout);
+ double_to_tv (jittered_sec, &map->tv);
+
+ evtimer_add (&map->ev, &map->tv);
+
+ if (g_atomic_int_get (map->locked)) {
+ msg_info ("don't try to reread map as it is locked by other process, will reread it later");
+ return;
+ }
+
+ if (stat (data->filename, &st) != -1 && (st.st_mtime > data->st.st_mtime || data->st.st_mtime == -1)) {
+ /* File was modified since last check */
+ memcpy (&data->st, &st, sizeof (struct stat));
+ }
+ else {
+ return;
+ }
+
+ msg_info ("rereading map file %s", data->filename);
+ read_map_file (map, data);
+}
+
+/**
+ * Callback for destroying HTTP callback data
+ */
+static void
+free_http_cbdata (struct http_callback_data *cbd)
+{
+ if (cbd->reply) {
+ g_hash_table_destroy (cbd->reply->headers);
+ g_free (cbd->reply);
+ }
+ g_atomic_int_set (cbd->map->locked, 0);
+ event_del (&cbd->ev);
+ close (cbd->fd);
+ g_free (cbd);
+}
+
+/**
+ * Async HTTP request parser
+ */
+static void
+http_async_callback (gint fd, short what, void *ud)
+{
+ struct http_callback_data *cbd = ud;
+
+ /* Begin of connection */
+ if (what == EV_WRITE) {
+ if (cbd->state == 0) {
+ /* Can write request */
+ write_http_request (cbd->map, cbd->data, fd);
+ /* Plan reading */
+ event_set (&cbd->ev, cbd->fd, EV_READ | EV_PERSIST, http_async_callback, cbd);
+ event_base_set (cbd->ev_base, &cbd->ev);
+ cbd->tv.tv_sec = HTTP_READ_TIMEOUT;
+ cbd->tv.tv_usec = 0;
+ cbd->state = 1;
+ /* Allocate reply structure */
+ cbd->reply = g_malloc (sizeof (struct http_reply));
+ cbd->reply->parser_state = 0;
+ cbd->reply->code = 404;
+ cbd->reply->headers = g_hash_table_new_full (rspamd_strcase_hash, rspamd_strcase_equal, g_free, g_free);
+ cbd->cbdata.state = 0;
+ cbd->cbdata.prev_data = *cbd->map->user_data;
+ cbd->cbdata.cur_data = NULL;
+ cbd->cbdata.map = cbd->map;
+ cbd->data->rlen = 0;
+ cbd->data->chunk = 0;
+ cbd->data->chunk_remain = 0;
+ cbd->data->chunked = FALSE;
+ cbd->data->read_buf[0] = '\0';
+
+ event_add (&cbd->ev, &cbd->tv);
+ }
+ else {
+ msg_err ("bad state when got write readiness");
+ free_http_cbdata (cbd);
+ return;
+ }
+ }
+ /* Got reply, parse it */
+ else if (what == EV_READ) {
+ if (cbd->state >= 1) {
+ if (!read_http_common (cbd->map, cbd->data, cbd->reply, &cbd->cbdata, cbd->fd)) {
+ /* Handle Not-Modified in a special way */
+ if (cbd->reply->code == 304) {
+ if (cbd->data->last_checked == (time_t)-1) {
+ cbd->data->last_checked = time (NULL);
+ }
+ msg_info ("data is not modified for server %s", cbd->data->host);
+ }
+ else if (cbd->cbdata.cur_data != NULL) {
+ /* Destroy old data and start reading request data */
+ cbd->map->fin_callback (cbd->map->pool, &cbd->cbdata);
+ *cbd->map->user_data = cbd->cbdata.cur_data;
+ if (cbd->data->last_checked == (time_t)-1) {
+ cbd->data->last_checked = time (NULL);
+ }
+ }
+ if (cbd->state == 1 && cbd->reply->code == 200) {
+ /* Write to log that data is modified */
+ msg_info ("rereading map data from %s", cbd->data->host);
+ }
+
+ free_http_cbdata (cbd);
+ return;
+ }
+ else if (cbd->state == 1) {
+ /* Write to log that data is modified */
+ msg_info ("rereading map data from %s", cbd->data->host);
+ }
+ cbd->state = 2;
+ }
+ }
+ else {
+ msg_err ("connection with http server terminated incorrectly");
+ free_http_cbdata (cbd);
+ }
+}
+
+/**
+ * Async HTTP callback
+ */
+static void
+http_callback (gint fd, short what, void *ud)
+{
+ struct rspamd_map *map = ud;
+ struct http_map_data *data = map->map_data;
+ gint sock;
+ struct http_callback_data *cbd;
+ gdouble jittered_sec;
+
+ /* Plan event again with jitter */
+ evtimer_del (&map->ev);
+ jittered_sec = (map->cfg->map_timeout + g_random_double () * map->cfg->map_timeout);
+ double_to_tv (jittered_sec, &map->tv);
+ evtimer_add (&map->ev, &map->tv);
+
+ if (g_atomic_int_get (map->locked)) {
+ msg_info ("don't try to reread map as it is locked by other process, will reread it later");
+ return;
+ }
+
+ g_atomic_int_inc (map->locked);
+
+ /* Connect asynced */
+ if ((sock = connect_http (map, data, TRUE)) == -1) {
+ g_atomic_int_set (map->locked, 0);
+ return;
+ }
+ else {
+ /* Plan event */
+ cbd = g_malloc (sizeof (struct http_callback_data));
+ cbd->ev_base = map->ev_base;
+ event_set (&cbd->ev, sock, EV_WRITE, http_async_callback, cbd);
+ event_base_set (cbd->ev_base, &cbd->ev);
+ cbd->tv.tv_sec = HTTP_CONNECT_TIMEOUT;
+ cbd->tv.tv_usec = 0;
+ cbd->map = map;
+ cbd->data = data;
+ cbd->state = 0;
+ cbd->fd = sock;
+ cbd->reply = NULL;
+ event_add (&cbd->ev, &cbd->tv);
+ }
+}
+
+/* Start watching event for all maps */
+void
+start_map_watch (struct config_file *cfg, struct event_base *ev_base)
+{
+ GList *cur = cfg->maps;
+ struct rspamd_map *map;
+ struct file_map_data *fdata;
+ gdouble jittered_sec;
+
+ /* First of all do synced read of data */
+ while (cur) {
+ map = cur->data;
+ map->ev_base = ev_base;
+ if (map->protocol == MAP_PROTO_FILE) {
+ evtimer_set (&map->ev, file_callback, map);
+ event_base_set (map->ev_base, &map->ev);
+ /* Read initial data */
+ fdata = map->map_data;
+ if (fdata->st.st_mtime != -1) {
+ /* Do not try to read non-existent file */
+ read_map_file (map, map->map_data);
+ }
+ /* Plan event with jitter */
+ jittered_sec = (map->cfg->map_timeout + g_random_double () * map->cfg->map_timeout) / 2.;
+ double_to_tv (jittered_sec, &map->tv);
+ evtimer_add (&map->ev, &map->tv);
+ }
+ else if (map->protocol == MAP_PROTO_HTTP) {
+ evtimer_set (&map->ev, http_callback, map);
+ event_base_set (map->ev_base, &map->ev);
+ /* Read initial data */
+ read_http_sync (map, map->map_data);
+ /* Plan event with jitter */
+ jittered_sec = (map->cfg->map_timeout + g_random_double () * map->cfg->map_timeout);
+ double_to_tv (jittered_sec, &map->tv);
+ evtimer_add (&map->ev, &map->tv);
+ }
+ cur = g_list_next (cur);
+ }
+}
+
+void
+remove_all_maps (struct config_file *cfg)
+{
+ g_list_free (cfg->maps);
+ cfg->maps = NULL;
+ if (cfg->map_pool != NULL) {
+ rspamd_mempool_delete (cfg->map_pool);
+ cfg->map_pool = NULL;
+ }
+}
+
+gboolean
+check_map_proto (const gchar *map_line, gint *res, const gchar **pos)
+{
+ if (g_ascii_strncasecmp (map_line, "http://", sizeof ("http://") - 1) == 0) {
+ if (res && pos) {
+ *res = MAP_PROTO_HTTP;
+ *pos = map_line + sizeof ("http://") - 1;
+ }
+ }
+ else if (g_ascii_strncasecmp (map_line, "file://", sizeof ("file://") - 1) == 0) {
+ if (res && pos) {
+ *res = MAP_PROTO_FILE;
+ *pos = map_line + sizeof ("file://") - 1;
+ }
+ }
+ else if (*map_line == '/') {
+ /* Trivial file case */
+ *res = MAP_PROTO_FILE;
+ *pos = map_line;
+ }
+ else {
+ msg_debug ("invalid map fetching protocol: %s", map_line);
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+gboolean
+add_map (struct config_file *cfg, const gchar *map_line, const gchar *description,
+ map_cb_t read_callback, map_fin_cb_t fin_callback, void **user_data)
+{
+ struct rspamd_map *new_map;
+ enum fetch_proto proto;
+ const gchar *def, *p, *hostend;
+ struct file_map_data *fdata;
+ struct http_map_data *hdata;
+ gchar portbuf[6];
+ gint i, s, r;
+ struct addrinfo hints, *res;
+
+ /* First of all detect protocol line */
+ if (!check_map_proto (map_line, (int *)&proto, &def)) {
+ return FALSE;
+ }
+ /* Constant pool */
+ if (cfg->map_pool == NULL) {
+ cfg->map_pool = rspamd_mempool_new (rspamd_mempool_suggest_size ());
+ }
+ new_map = rspamd_mempool_alloc0 (cfg->map_pool, sizeof (struct rspamd_map));
+ new_map->read_callback = read_callback;
+ new_map->fin_callback = fin_callback;
+ new_map->user_data = user_data;
+ new_map->protocol = proto;
+ new_map->cfg = cfg;
+ new_map->id = g_random_int ();
+ new_map->locked = rspamd_mempool_alloc0_shared (cfg->cfg_pool, sizeof (gint));
+
+ if (proto == MAP_PROTO_FILE) {
+ new_map->uri = rspamd_mempool_strdup (cfg->cfg_pool, def);
+ def = new_map->uri;
+ }
+ else {
+ new_map->uri = rspamd_mempool_strdup (cfg->cfg_pool, map_line);
+ }
+ if (description != NULL) {
+ new_map->description = rspamd_mempool_strdup (cfg->cfg_pool, description);
+ }
+
+ /* Now check for each proto separately */
+ if (proto == MAP_PROTO_FILE) {
+ fdata = rspamd_mempool_alloc0 (cfg->map_pool, sizeof (struct file_map_data));
+ if (access (def, R_OK) == -1) {
+ if (errno != ENOENT) {
+ msg_err ("cannot open file '%s': %s", def, strerror (errno));
+ return FALSE;
+
+ }
+ msg_info ("map '%s' is not found, but it can be loaded automatically later", def);
+ /* We still can add this file */
+ fdata->st.st_mtime = -1;
+ }
+ else {
+ stat (def, &fdata->st);
+ }
+ fdata->filename = rspamd_mempool_strdup (cfg->map_pool, def);
+ new_map->map_data = fdata;
+ }
+ else if (proto == MAP_PROTO_HTTP) {
+ hdata = rspamd_mempool_alloc0 (cfg->map_pool, sizeof (struct http_map_data));
+ /* Try to search port */
+ if ((p = strchr (def, ':')) != NULL) {
+ hostend = p;
+ i = 0;
+ p++;
+ while (g_ascii_isdigit (*p) && i < (gint)sizeof (portbuf) - 1) {
+ portbuf[i++] = *p++;
+ }
+ if (*p != '/') {
+ msg_info ("bad http map definition: %s", def);
+ return FALSE;
+ }
+ portbuf[i] = '\0';
+ hdata->port = atoi (portbuf);
+ }
+ else {
+ /* Default http port */
+ rspamd_snprintf (portbuf, sizeof (portbuf), "80");
+ hdata->port = 80;
+ /* Now separate host from path */
+ if ((p = strchr (def, '/')) == NULL) {
+ msg_info ("bad http map definition: %s", def);
+ return FALSE;
+ }
+ hostend = p;
+ }
+ hdata->host = rspamd_mempool_alloc (cfg->map_pool, hostend - def + 1);
+ rspamd_strlcpy (hdata->host, def, hostend - def + 1);
+ hdata->path = rspamd_mempool_strdup (cfg->map_pool, p);
+ hdata->rlen = 0;
+ /* Now try to resolve */
+ memset (&hints, 0, sizeof (hints));
+ hints.ai_family = AF_UNSPEC; /* Allow IPv4 or IPv6 */
+ hints.ai_socktype = SOCK_STREAM; /* Stream socket */
+ hints.ai_flags = 0;
+ hints.ai_protocol = 0; /* Any protocol */
+ hints.ai_canonname = NULL;
+ hints.ai_addr = NULL;
+ hints.ai_next = NULL;
+
+ if ((r = getaddrinfo (hdata->host, portbuf, &hints, &res)) == 0) {
+ hdata->addr = res;
+ rspamd_mempool_add_destructor (cfg->cfg_pool, (rspamd_mempool_destruct_t)freeaddrinfo, hdata->addr);
+ }
+ else {
+ msg_err ("address resolution for %s failed: %s", hdata->host, gai_strerror (r));
+ return FALSE;
+ }
+ /* Now try to connect */
+ if ((s = make_tcp_socket (hdata->addr, FALSE, FALSE)) == -1) {
+ msg_info ("cannot connect to http server %s: %d, %s", hdata->host, errno, strerror (errno));
+ return FALSE;
+ }
+ close (s);
+ new_map->map_data = hdata;
+ }
+ /* Temp pool */
+ new_map->pool = rspamd_mempool_new (rspamd_mempool_suggest_size ());
+
+ cfg->maps = g_list_prepend (cfg->maps, new_map);
+
+ return TRUE;
+}
diff --git a/src/libutil/map.h b/src/libutil/map.h
new file mode 100644
index 000000000..1f34cdcc0
--- /dev/null
+++ b/src/libutil/map.h
@@ -0,0 +1,134 @@
+#ifndef RSPAMD_MAP_H
+#define RSPAMD_MAP_H
+
+#include "config.h"
+#include "mem_pool.h"
+#include "radix.h"
+
+/**
+ * Maps API is designed to load lists data from different dynamic sources.
+ * It monitor files and HTTP locations for modifications and reload them if they are
+ * modified.
+ */
+
+enum fetch_proto {
+ MAP_PROTO_FILE,
+ MAP_PROTO_HTTP,
+};
+
+/**
+ * Data specific to file maps
+ */
+struct file_map_data {
+ const gchar *filename;
+ struct stat st;
+};
+
+/**
+ * Data specific to HTTP maps
+ */
+struct http_map_data {
+ struct addrinfo *addr;
+ guint16 port;
+ gchar *path;
+ gchar *host;
+ time_t last_checked;
+ gshort chunked;
+ gchar read_buf[BUFSIZ];
+ guint32 rlen;
+ guint32 chunk;
+ guint32 chunk_remain;
+};
+
+struct map_cb_data;
+
+/**
+ * Callback types
+ */
+typedef gchar* (*map_cb_t)(rspamd_mempool_t *pool, gchar *chunk, gint len, struct map_cb_data *data);
+typedef void (*map_fin_cb_t)(rspamd_mempool_t *pool, struct map_cb_data *data);
+
+/**
+ * Common map object
+ */
+struct config_file;
+struct rspamd_map {
+ rspamd_mempool_t *pool;
+ struct config_file *cfg;
+ enum fetch_proto protocol;
+ map_cb_t read_callback;
+ map_fin_cb_t fin_callback;
+ void **user_data;
+ struct event ev;
+ struct timeval tv;
+ struct event_base *ev_base;
+ void *map_data;
+ gchar *uri;
+ gchar *description;
+ guint32 id;
+ guint32 checksum;
+ /* Shared lock for temporary disabling of map reading (e.g. when this map is written by UI) */
+ gint *locked;
+};
+
+/**
+ * Callback data for async load
+ */
+struct map_cb_data {
+ struct rspamd_map *map;
+ gint state;
+ void *prev_data;
+ void *cur_data;
+};
+
+
+/**
+ * Check map protocol
+ */
+gboolean check_map_proto (const gchar *map_line, gint *res, const gchar **pos);
+/**
+ * Add map from line
+ */
+gboolean add_map (struct config_file *cfg, const gchar *map_line, const gchar *description,
+ map_cb_t read_callback, map_fin_cb_t fin_callback, void **user_data);
+
+/**
+ * Start watching of maps by adding events to libevent event loop
+ */
+void start_map_watch (struct config_file *cfg, struct event_base *ev_base);
+
+/**
+ * Remove all maps watched (remove events)
+ */
+void remove_all_maps (struct config_file *cfg);
+
+typedef void (*insert_func) (gpointer st, gconstpointer key, gconstpointer value);
+
+/**
+ * Common callbacks for frequent types of lists
+ */
+
+/**
+ * Radix list is a list like ip/mask
+ */
+gchar* read_radix_list (rspamd_mempool_t *pool, gchar *chunk, gint len, struct map_cb_data *data);
+void fin_radix_list (rspamd_mempool_t *pool, struct map_cb_data *data);
+
+/**
+ * Host list is an ordinal list of hosts or domains
+ */
+gchar* read_host_list (rspamd_mempool_t *pool, gchar *chunk, gint len, struct map_cb_data *data);
+void fin_host_list (rspamd_mempool_t *pool, struct map_cb_data *data);
+
+/**
+ * Kv list is an ordinal list of keys and values separated by whitespace
+ */
+gchar* read_kv_list (rspamd_mempool_t *pool, gchar *chunk, gint len, struct map_cb_data *data);
+void fin_kv_list (rspamd_mempool_t *pool, struct map_cb_data *data);
+
+/**
+ * FSM for lists parsing (support comments, blank lines and partial replies)
+ */
+gchar * abstract_parse_list (rspamd_mempool_t * pool, gchar * chunk, gint len, struct map_cb_data *data, insert_func func);
+
+#endif
diff --git a/src/libutil/mem_pool.c b/src/libutil/mem_pool.c
new file mode 100644
index 000000000..8f1105add
--- /dev/null
+++ b/src/libutil/mem_pool.c
@@ -0,0 +1,776 @@
+/*
+ * Copyright (c) 2009-2012, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "mem_pool.h"
+#include "fstring.h"
+#include "logger.h"
+#include "util.h"
+#include "main.h"
+
+/* Sleep time for spin lock in nanoseconds */
+#define MUTEX_SLEEP_TIME 10000000L
+#define MUTEX_SPIN_COUNT 100
+
+#ifdef _THREAD_SAFE
+pthread_mutex_t stat_mtx = PTHREAD_MUTEX_INITIALIZER;
+# define STAT_LOCK() do { pthread_mutex_lock (&stat_mtx); } while (0)
+# define STAT_UNLOCK() do { pthread_mutex_unlock (&stat_mtx); } while (0)
+#else
+# define STAT_LOCK() do {} while (0)
+# define STAT_UNLOCK() do {} while (0)
+#endif
+
+#define POOL_MTX_LOCK() do { rspamd_mutex_lock (pool->mtx); } while (0)
+#define POOL_MTX_UNLOCK() do { rspamd_mutex_unlock (pool->mtx); } while (0)
+
+/*
+ * This define specify whether we should check all pools for free space for new object
+ * or just begin scan from current (recently attached) pool
+ * If MEMORY_GREEDY is defined, then we scan all pools to find free space (more CPU usage, slower
+ * but requires less memory). If it is not defined check only current pool and if object is too large
+ * to place in it allocate new one (this may cause huge CPU usage in some cases too, but generally faster than
+ * greedy method)
+ */
+#undef MEMORY_GREEDY
+
+/* Internal statistic */
+static rspamd_mempool_stat_t *mem_pool_stat = NULL;
+
+/**
+ * Function that return free space in pool page
+ * @param x pool page struct
+ */
+static gint
+pool_chain_free (struct _pool_chain *chain)
+{
+ return (gint)chain->len - (chain->pos - chain->begin + MEM_ALIGNMENT);
+}
+
+static struct _pool_chain *
+pool_chain_new (gsize size)
+{
+ struct _pool_chain *chain;
+
+ g_return_val_if_fail (size > 0, NULL);
+
+ chain = g_slice_alloc (sizeof (struct _pool_chain));
+
+ if (chain == NULL) {
+ msg_err ("cannot allocate %z bytes, aborting", sizeof (struct _pool_chain));
+ abort ();
+ }
+
+ chain->begin = g_slice_alloc (size);
+ if (chain->begin == NULL) {
+ msg_err ("cannot allocate %z bytes, aborting", size);
+ abort ();
+ }
+
+ chain->pos = align_ptr (chain->begin, MEM_ALIGNMENT);
+ chain->len = size;
+ chain->next = NULL;
+ STAT_LOCK ();
+ mem_pool_stat->bytes_allocated += size;
+ mem_pool_stat->chunks_allocated++;
+ STAT_UNLOCK ();
+
+ return chain;
+}
+
+static struct _pool_chain_shared *
+pool_chain_new_shared (gsize size)
+{
+ struct _pool_chain_shared *chain;
+ gpointer map;
+
+
+#if defined(HAVE_MMAP_ANON)
+ map = mmap (NULL, size + sizeof (struct _pool_chain_shared), PROT_READ | PROT_WRITE, MAP_ANON | MAP_SHARED, -1, 0);
+ if (map == MAP_FAILED) {
+ msg_err ("cannot allocate %z bytes, aborting", size + sizeof (struct _pool_chain));
+ abort ();
+ }
+ chain = (struct _pool_chain_shared *)map;
+ chain->begin = ((guint8 *) chain) + sizeof (struct _pool_chain_shared);
+#elif defined(HAVE_MMAP_ZERO)
+ gint fd;
+
+ fd = open ("/dev/zero", O_RDWR);
+ if (fd == -1) {
+ return NULL;
+ }
+ map = mmap (NULL, size + sizeof (struct _pool_chain_shared), PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ if (map == MAP_FAILED) {
+ msg_err ("cannot allocate %z bytes, aborting", size + sizeof (struct _pool_chain));
+ abort ();
+ }
+ chain = (struct _pool_chain_shared *)map;
+ chain->begin = ((guint8 *) chain) + sizeof (struct _pool_chain_shared);
+#else
+# error No mmap methods are defined
+#endif
+ chain->pos = align_ptr (chain->begin, MEM_ALIGNMENT);
+ chain->len = size;
+ chain->lock = NULL;
+ chain->next = NULL;
+ STAT_LOCK ();
+ mem_pool_stat->shared_chunks_allocated++;
+ mem_pool_stat->bytes_allocated += size;
+ STAT_UNLOCK ();
+
+ return chain;
+}
+
+
+/**
+ * Allocate new memory poll
+ * @param size size of pool's page
+ * @return new memory pool object
+ */
+rspamd_mempool_t *
+rspamd_mempool_new (gsize size)
+{
+ rspamd_mempool_t *new;
+ gpointer map;
+
+ g_return_val_if_fail (size > 0, NULL);
+ /* Allocate statistic structure if it is not allocated before */
+ if (mem_pool_stat == NULL) {
+#if defined(HAVE_MMAP_ANON)
+ map = mmap (NULL, sizeof (rspamd_mempool_stat_t), PROT_READ | PROT_WRITE, MAP_ANON | MAP_SHARED, -1, 0);
+ if (map == MAP_FAILED) {
+ msg_err ("cannot allocate %z bytes, aborting", sizeof (rspamd_mempool_stat_t));
+ abort ();
+ }
+ mem_pool_stat = (rspamd_mempool_stat_t *)map;
+#elif defined(HAVE_MMAP_ZERO)
+ gint fd;
+
+ fd = open ("/dev/zero", O_RDWR);
+ g_assert (fd != -1);
+ map = mmap (NULL, sizeof (rspamd_mempool_stat_t), PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ if (map == MAP_FAILED) {
+ msg_err ("cannot allocate %z bytes, aborting", sizeof (rspamd_mempool_stat_t));
+ abort ();
+ }
+ mem_pool_stat = (rspamd_mempool_stat_t *)map;
+#else
+# error No mmap methods are defined
+#endif
+ memset (map, 0, sizeof (rspamd_mempool_stat_t));
+ }
+
+ new = g_slice_alloc (sizeof (rspamd_mempool_t));
+ if (new == NULL) {
+ msg_err ("cannot allocate %z bytes, aborting", sizeof (rspamd_mempool_t));
+ abort ();
+ }
+
+ new->cur_pool = pool_chain_new (size);
+ new->shared_pool = NULL;
+ new->first_pool = new->cur_pool;
+ new->cur_pool_tmp = NULL;
+ new->first_pool_tmp = NULL;
+ new->destructors = NULL;
+ /* Set it upon first call of set variable */
+ new->variables = NULL;
+ new->mtx = rspamd_mutex_new ();
+
+ mem_pool_stat->pools_allocated++;
+
+ return new;
+}
+
+static void *
+memory_pool_alloc_common (rspamd_mempool_t * pool, gsize size, gboolean is_tmp)
+{
+ guint8 *tmp;
+ struct _pool_chain *new, *cur;
+ gint free;
+
+ if (pool) {
+ POOL_MTX_LOCK ();
+#ifdef MEMORY_GREEDY
+ if (is_tmp) {
+ cur = pool->first_pool_tmp;
+ }
+ else {
+ cur = pool->first_pool;
+ }
+#else
+ if (is_tmp) {
+ cur = pool->cur_pool_tmp;
+ }
+ else {
+ cur = pool->cur_pool;
+ }
+#endif
+ /* Find free space in pool chain */
+ while (cur != NULL &&
+ (free = pool_chain_free (cur)) < (gint)size &&
+ cur->next != NULL) {
+ cur = cur->next;
+ }
+
+ if (cur == NULL || (free < (gint)size && cur->next == NULL)) {
+ /* Allocate new pool */
+ if (cur == NULL) {
+ if (pool->first_pool->len >= size + MEM_ALIGNMENT) {
+ new = pool_chain_new (pool->first_pool->len);
+ }
+ else {
+ new = pool_chain_new (size + pool->first_pool->len + MEM_ALIGNMENT);
+ }
+ /* Connect to pool subsystem */
+ if (is_tmp) {
+ pool->first_pool_tmp = new;
+ }
+ else {
+ pool->first_pool = new;
+ }
+ }
+ else {
+ if (cur->len >= size + MEM_ALIGNMENT) {
+ new = pool_chain_new (cur->len);
+ }
+ else {
+ mem_pool_stat->oversized_chunks++;
+ new = pool_chain_new (size + pool->first_pool->len + MEM_ALIGNMENT);
+ }
+ /* Attach new pool to chain */
+ cur->next = new;
+ }
+ if (is_tmp) {
+ pool->cur_pool_tmp = new;
+ }
+ else {
+ pool->cur_pool = new;
+ }
+ /* No need to align again */
+ tmp = new->pos;
+ new->pos = tmp + size;
+ POOL_MTX_UNLOCK ();
+ return tmp;
+ }
+ /* No need to allocate page */
+ tmp = align_ptr (cur->pos, MEM_ALIGNMENT);
+ cur->pos = tmp + size;
+ POOL_MTX_UNLOCK ();
+ return tmp;
+ }
+ return NULL;
+}
+
+
+void *
+rspamd_mempool_alloc (rspamd_mempool_t * pool, gsize size)
+{
+ return memory_pool_alloc_common (pool, size, FALSE);
+}
+
+void *
+rspamd_mempool_alloc_tmp (rspamd_mempool_t * pool, gsize size)
+{
+ return memory_pool_alloc_common (pool, size, TRUE);
+}
+
+void *
+rspamd_mempool_alloc0 (rspamd_mempool_t * pool, gsize size)
+{
+ void *pointer = rspamd_mempool_alloc (pool, size);
+ if (pointer) {
+ memset (pointer, 0, size);
+ }
+ return pointer;
+}
+
+void *
+rspamd_mempool_alloc0_tmp (rspamd_mempool_t * pool, gsize size)
+{
+ void *pointer = rspamd_mempool_alloc_tmp (pool, size);
+ if (pointer) {
+ memset (pointer, 0, size);
+ }
+ return pointer;
+}
+
+void *
+rspamd_mempool_alloc0_shared (rspamd_mempool_t * pool, gsize size)
+{
+ void *pointer = rspamd_mempool_alloc_shared (pool, size);
+ if (pointer) {
+ memset (pointer, 0, size);
+ }
+ return pointer;
+}
+
+void *
+rspamd_mempool_alloc_shared (rspamd_mempool_t * pool, gsize size)
+{
+ guint8 *tmp;
+ struct _pool_chain_shared *new, *cur;
+ gint free;
+
+ if (pool) {
+ g_return_val_if_fail(size > 0, NULL);
+
+ POOL_MTX_LOCK ()
+ ;
+ cur = pool->shared_pool;
+ if (!cur) {
+ cur = pool_chain_new_shared (pool->first_pool->len);
+ pool->shared_pool = cur;
+ }
+
+ /* Find free space in pool chain */
+ while ((free = pool_chain_free ((struct _pool_chain *) cur))
+ < (gint) size && cur->next) {
+ cur = cur->next;
+ }
+ if (free < (gint) size && cur->next == NULL) {
+ /* Allocate new pool */
+
+ if (cur->len >= size + MEM_ALIGNMENT) {
+ new = pool_chain_new_shared (cur->len);
+ }
+ else {
+ mem_pool_stat->oversized_chunks++;
+ new = pool_chain_new_shared (
+ size + pool->first_pool->len + MEM_ALIGNMENT);
+ }
+ /* Attach new pool to chain */
+ cur->next = new;
+ new->pos += size;
+ STAT_LOCK ();
+ mem_pool_stat->bytes_allocated += size;
+ STAT_UNLOCK ();
+ POOL_MTX_UNLOCK ()
+ ;
+ return new->begin;
+ }
+ tmp = align_ptr(cur->pos, MEM_ALIGNMENT);
+ cur->pos = tmp + size;
+ POOL_MTX_UNLOCK ()
+ ;
+ return tmp;
+ }
+ return NULL;
+}
+
+
+gchar *
+rspamd_mempool_strdup (rspamd_mempool_t * pool, const gchar *src)
+{
+ gsize len;
+ gchar *newstr;
+
+ if (src == NULL) {
+ return NULL;
+ }
+
+ len = strlen (src);
+ newstr = rspamd_mempool_alloc (pool, len + 1);
+ memcpy (newstr, src, len);
+ newstr[len] = '\0';
+ return newstr;
+}
+
+gchar *
+rspamd_mempool_fstrdup (rspamd_mempool_t * pool, const struct f_str_s *src)
+{
+ gchar *newstr;
+
+ if (src == NULL) {
+ return NULL;
+ }
+
+ newstr = rspamd_mempool_alloc (pool, src->len + 1);
+ memcpy (newstr, src->begin, src->len);
+ newstr[src->len] = '\0';
+ return newstr;
+}
+
+
+gchar *
+rspamd_mempool_strdup_shared (rspamd_mempool_t * pool, const gchar *src)
+{
+ gsize len;
+ gchar *newstr;
+
+ if (src == NULL) {
+ return NULL;
+ }
+
+ len = strlen (src);
+ newstr = rspamd_mempool_alloc_shared (pool, len + 1);
+ memcpy (newstr, src, len);
+ newstr[len] = '\0';
+ return newstr;
+}
+
+/* Find pool for a pointer, returns NULL if pointer is not in pool */
+static struct _pool_chain_shared *
+memory_pool_find_pool (rspamd_mempool_t * pool, void *pointer)
+{
+ struct _pool_chain_shared *cur = pool->shared_pool;
+
+ while (cur) {
+ if ((guint8 *) pointer >= cur->begin && (guint8 *) pointer <= (cur->begin + cur->len)) {
+ return cur;
+ }
+ cur = cur->next;
+ }
+
+ return NULL;
+}
+
+static inline gint
+__mutex_spin (rspamd_mempool_mutex_t * mutex)
+{
+ /* check spin count */
+ if (g_atomic_int_dec_and_test (&mutex->spin)) {
+ /* This may be deadlock, so check owner of this lock */
+ if (mutex->owner == getpid ()) {
+ /* This mutex was locked by calling process, so it is just double lock and we can easily unlock it */
+ g_atomic_int_set (&mutex->spin, MUTEX_SPIN_COUNT);
+ return 0;
+ }
+ else if (kill (mutex->owner, 0) == -1) {
+ /* Owner process was not found, so release lock */
+ g_atomic_int_set (&mutex->spin, MUTEX_SPIN_COUNT);
+ return 0;
+ }
+ /* Spin again */
+ g_atomic_int_set (&mutex->spin, MUTEX_SPIN_COUNT);
+ }
+#ifdef HAVE_ASM_PAUSE
+ __asm __volatile ("pause");
+#elif defined(HAVE_SCHED_YIELD)
+ (void)sched_yield ();
+#endif
+
+#if defined(HAVE_NANOSLEEP)
+ struct timespec ts;
+ ts.tv_sec = 0;
+ ts.tv_nsec = MUTEX_SLEEP_TIME;
+ /* Spin */
+ while (nanosleep (&ts, &ts) == -1 && errno == EINTR);
+#else
+# error No methods to spin are defined
+#endif
+ return 1;
+}
+
+static void
+memory_pool_mutex_spin (rspamd_mempool_mutex_t * mutex)
+{
+ while (!g_atomic_int_compare_and_exchange (&mutex->lock, 0, 1)) {
+ if (!__mutex_spin (mutex)) {
+ return;
+ }
+ }
+}
+
+/* Simple implementation of spinlock */
+void
+rspamd_mempool_lock_shared (rspamd_mempool_t * pool, void *pointer)
+{
+ struct _pool_chain_shared *chain;
+
+ chain = memory_pool_find_pool (pool, pointer);
+ if (chain == NULL) {
+ return;
+ }
+ if (chain->lock == NULL) {
+ chain->lock = rspamd_mempool_get_mutex (pool);
+ }
+ rspamd_mempool_lock_mutex (chain->lock);
+}
+
+void
+rspamd_mempool_unlock_shared (rspamd_mempool_t * pool, void *pointer)
+{
+ struct _pool_chain_shared *chain;
+
+ chain = memory_pool_find_pool (pool, pointer);
+ if (chain == NULL) {
+ return;
+ }
+ if (chain->lock == NULL) {
+ chain->lock = rspamd_mempool_get_mutex (pool);
+ return;
+ }
+
+ rspamd_mempool_unlock_mutex (chain->lock);
+}
+
+void
+rspamd_mempool_add_destructor_full (rspamd_mempool_t * pool, rspamd_mempool_destruct_t func, void *data,
+ const gchar *function, const gchar *line)
+{
+ struct _pool_destructors *cur;
+
+ cur = rspamd_mempool_alloc (pool, sizeof (struct _pool_destructors));
+ if (cur) {
+ POOL_MTX_LOCK ();
+ cur->func = func;
+ cur->data = data;
+ cur->function = function;
+ cur->loc = line;
+ cur->prev = pool->destructors;
+ pool->destructors = cur;
+ POOL_MTX_UNLOCK ();
+ }
+}
+
+void
+rspamd_mempool_replace_destructor (rspamd_mempool_t * pool, rspamd_mempool_destruct_t func, void *old_data, void *new_data)
+{
+ struct _pool_destructors *tmp;
+
+ tmp = pool->destructors;
+ while (tmp) {
+ if (tmp->func == func && tmp->data == old_data) {
+ tmp->func = func;
+ tmp->data = new_data;
+ break;
+ }
+ tmp = tmp->prev;
+ }
+
+}
+
+void
+rspamd_mempool_delete (rspamd_mempool_t * pool)
+{
+ struct _pool_chain *cur = pool->first_pool, *tmp;
+ struct _pool_chain_shared *cur_shared = pool->shared_pool, *tmp_shared;
+ struct _pool_destructors *destructor = pool->destructors;
+
+ POOL_MTX_LOCK ();
+ /* Call all pool destructors */
+ while (destructor) {
+ /* Avoid calling destructors for NULL pointers */
+ if (destructor->data != NULL) {
+ destructor->func (destructor->data);
+ }
+ destructor = destructor->prev;
+ }
+
+ while (cur) {
+ tmp = cur;
+ cur = cur->next;
+ STAT_LOCK ();
+ mem_pool_stat->chunks_freed++;
+ mem_pool_stat->bytes_allocated -= tmp->len;
+ STAT_UNLOCK ();
+ g_slice_free1 (tmp->len, tmp->begin);
+ g_slice_free (struct _pool_chain, tmp);
+ }
+ /* Clean temporary pools */
+ cur = pool->first_pool_tmp;
+ while (cur) {
+ tmp = cur;
+ cur = cur->next;
+ STAT_LOCK ();
+ mem_pool_stat->chunks_freed++;
+ mem_pool_stat->bytes_allocated -= tmp->len;
+ STAT_UNLOCK ();
+ g_slice_free1 (tmp->len, tmp->begin);
+ g_slice_free (struct _pool_chain, tmp);
+ }
+ /* Unmap shared memory */
+ while (cur_shared) {
+ tmp_shared = cur_shared;
+ cur_shared = cur_shared->next;
+ STAT_LOCK ();
+ mem_pool_stat->chunks_freed++;
+ mem_pool_stat->bytes_allocated -= tmp_shared->len;
+ STAT_UNLOCK ();
+ munmap ((void *)tmp_shared, tmp_shared->len + sizeof (struct _pool_chain_shared));
+ }
+ if (pool->variables) {
+ g_hash_table_destroy (pool->variables);
+ }
+
+ mem_pool_stat->pools_freed++;
+ POOL_MTX_UNLOCK ();
+ rspamd_mutex_free (pool->mtx);
+ g_slice_free (rspamd_mempool_t, pool);
+}
+
+void
+rspamd_mempool_cleanup_tmp (rspamd_mempool_t* pool)
+{
+ struct _pool_chain *cur = pool->first_pool, *tmp;
+
+ POOL_MTX_LOCK ();
+ cur = pool->first_pool_tmp;
+ while (cur) {
+ tmp = cur;
+ cur = cur->next;
+ STAT_LOCK ();
+ mem_pool_stat->chunks_freed++;
+ mem_pool_stat->bytes_allocated -= tmp->len;
+ STAT_UNLOCK ();
+ g_slice_free1 (tmp->len, tmp->begin);
+ g_slice_free (struct _pool_chain, tmp);
+ }
+ mem_pool_stat->pools_freed++;
+ POOL_MTX_UNLOCK ();
+}
+
+void
+rspamd_mempool_stat (rspamd_mempool_stat_t * st)
+{
+ st->pools_allocated = mem_pool_stat->pools_allocated;
+ st->pools_freed = mem_pool_stat->pools_freed;
+ st->shared_chunks_allocated = mem_pool_stat->shared_chunks_allocated;
+ st->bytes_allocated = mem_pool_stat->bytes_allocated;
+ st->chunks_allocated = mem_pool_stat->chunks_allocated;
+ st->shared_chunks_allocated = mem_pool_stat->shared_chunks_allocated;
+ st->chunks_freed = mem_pool_stat->chunks_freed;
+ st->oversized_chunks = mem_pool_stat->oversized_chunks;
+}
+
+/* By default allocate 8Kb chunks of memory */
+#define FIXED_POOL_SIZE 8192
+gsize
+rspamd_mempool_suggest_size (void)
+{
+#ifdef HAVE_GETPAGESIZE
+ return MAX (getpagesize (), FIXED_POOL_SIZE);
+#else
+ return MAX (sysconf (_SC_PAGESIZE), FIXED_POOL_SIZE);
+#endif
+}
+
+rspamd_mempool_mutex_t *
+rspamd_mempool_get_mutex (rspamd_mempool_t * pool)
+{
+ rspamd_mempool_mutex_t *res;
+ if (pool != NULL) {
+ res = rspamd_mempool_alloc_shared (pool, sizeof (rspamd_mempool_mutex_t));
+ res->lock = 0;
+ res->owner = 0;
+ res->spin = MUTEX_SPIN_COUNT;
+ return res;
+ }
+ return NULL;
+}
+
+void
+rspamd_mempool_lock_mutex (rspamd_mempool_mutex_t * mutex)
+{
+ memory_pool_mutex_spin (mutex);
+ mutex->owner = getpid ();
+}
+
+void
+rspamd_mempool_unlock_mutex (rspamd_mempool_mutex_t * mutex)
+{
+ mutex->owner = 0;
+ (void)g_atomic_int_compare_and_exchange (&mutex->lock, 1, 0);
+}
+
+rspamd_mempool_rwlock_t *
+rspamd_mempool_get_rwlock (rspamd_mempool_t * pool)
+{
+ rspamd_mempool_rwlock_t *lock;
+
+ lock = rspamd_mempool_alloc_shared (pool, sizeof (rspamd_mempool_rwlock_t));
+ lock->__r_lock = rspamd_mempool_get_mutex (pool);
+ lock->__w_lock = rspamd_mempool_get_mutex (pool);
+
+ return lock;
+}
+
+void
+rspamd_mempool_rlock_rwlock (rspamd_mempool_rwlock_t * lock)
+{
+ /* Spin on write lock */
+ while (g_atomic_int_get (&lock->__w_lock->lock)) {
+ if (!__mutex_spin (lock->__w_lock)) {
+ break;
+ }
+ }
+
+ g_atomic_int_inc (&lock->__r_lock->lock);
+ lock->__r_lock->owner = getpid ();
+}
+
+void
+rspamd_mempool_wlock_rwlock (rspamd_mempool_rwlock_t * lock)
+{
+ /* Spin on write lock first */
+ rspamd_mempool_lock_mutex (lock->__w_lock);
+ /* Now we have write lock set up */
+ /* Wait all readers */
+ while (g_atomic_int_get (&lock->__r_lock->lock)) {
+ __mutex_spin (lock->__r_lock);
+ }
+}
+
+void
+rspamd_mempool_runlock_rwlock (rspamd_mempool_rwlock_t * lock)
+{
+ if (g_atomic_int_get (&lock->__r_lock->lock)) {
+ (void)g_atomic_int_dec_and_test (&lock->__r_lock->lock);
+ }
+}
+
+void
+rspamd_mempool_wunlock_rwlock (rspamd_mempool_rwlock_t * lock)
+{
+ rspamd_mempool_unlock_mutex (lock->__w_lock);
+}
+
+void
+rspamd_mempool_set_variable (rspamd_mempool_t *pool, const gchar *name, gpointer value, rspamd_mempool_destruct_t destructor)
+{
+ if (pool->variables == NULL) {
+ pool->variables = g_hash_table_new (rspamd_str_hash, rspamd_str_equal);
+ }
+
+ g_hash_table_insert (pool->variables, rspamd_mempool_strdup (pool, name), value);
+ if (destructor != NULL) {
+ rspamd_mempool_add_destructor (pool, destructor, value);
+ }
+}
+
+gpointer
+rspamd_mempool_get_variable (rspamd_mempool_t *pool, const gchar *name)
+{
+ if (pool->variables == NULL) {
+ return NULL;
+ }
+
+ return g_hash_table_lookup (pool->variables, name);
+}
+
+
+/*
+ * vi:ts=4
+ */
diff --git a/src/libutil/mem_pool.h b/src/libutil/mem_pool.h
new file mode 100644
index 000000000..f759ed60a
--- /dev/null
+++ b/src/libutil/mem_pool.h
@@ -0,0 +1,299 @@
+/**
+ * @file mem_pool.h
+ * \brief Memory pools library.
+ *
+ * Memory pools library. Library is designed to implement efficient way to
+ * store data in memory avoiding calling of many malloc/free. It has overhead
+ * because of fact that objects live in pool for rather long time and are not freed
+ * immediately after use, but if we know certainly when these objects can be used, we
+ * can use pool for them
+ */
+
+#ifndef RSPAMD_MEM_POOL_H
+#define RSPAMD_MEM_POOL_H
+
+#include "config.h"
+
+
+struct f_str_s;
+
+#define MEM_ALIGNMENT sizeof(unsigned long) /* platform word */
+#define align_ptr(p, a) \
+ (guint8 *) (((uintptr_t) (p) + ((uintptr_t) a - 1)) & ~((uintptr_t) a - 1))
+
+/**
+ * Destructor type definition
+ */
+typedef void (*rspamd_mempool_destruct_t)(void *ptr);
+
+/**
+ * Pool mutex structure
+ */
+typedef struct memory_pool_mutex_s {
+ gint lock;
+ pid_t owner;
+ guint spin;
+} rspamd_mempool_mutex_t;
+
+/**
+ * Pool page structure
+ */
+struct _pool_chain {
+ guint8 *begin; /**< begin of pool chain block */
+ guint8 *pos; /**< current start of free space in block */
+ gsize len; /**< length of block */
+ struct _pool_chain *next; /**< chain link */
+};
+
+/**
+ * Shared pool page
+ */
+struct _pool_chain_shared {
+ guint8 *begin;
+ guint8 *pos;
+ gsize len;
+ struct _pool_chain_shared *next;
+ rspamd_mempool_mutex_t *lock;
+};
+
+/**
+ * Destructors list item structure
+ */
+struct _pool_destructors {
+ rspamd_mempool_destruct_t func; /**< pointer to destructor */
+ void *data; /**< data to free */
+ const gchar *function; /**< function from which this destructor was added */
+ const gchar *loc; /**< line number */
+ struct _pool_destructors *prev; /**< chain link */
+};
+
+/**
+ * Memory pool type
+ */
+struct rspamd_mutex_s;
+typedef struct memory_pool_s {
+ struct _pool_chain *cur_pool; /**< currently used page */
+ struct _pool_chain *first_pool; /**< first page */
+ struct _pool_chain *cur_pool_tmp; /**< currently used temporary page */
+ struct _pool_chain *first_pool_tmp; /**< first temporary page */
+ struct _pool_chain_shared *shared_pool; /**< shared chain */
+ struct _pool_destructors *destructors; /**< destructors chain */
+ GHashTable *variables; /**< private memory pool variables */
+ struct rspamd_mutex_s *mtx; /**< threads lock */
+} rspamd_mempool_t;
+
+/**
+ * Statistics structure
+ */
+typedef struct memory_pool_stat_s {
+ gsize pools_allocated; /**< total number of allocated pools */
+ gsize pools_freed; /**< number of freed pools */
+ gsize bytes_allocated; /**< bytes that are allocated with pool allocator */
+ gsize chunks_allocated; /**< number of chunks that are allocated */
+ gsize shared_chunks_allocated; /**< shared chunks allocated */
+ gsize chunks_freed; /**< chunks freed */
+ gsize oversized_chunks; /**< oversized chunks */
+} rspamd_mempool_stat_t;
+
+/**
+ * Rwlock for locking shared memory regions
+ */
+typedef struct memory_pool_rwlock_s {
+ rspamd_mempool_mutex_t *__r_lock; /**< read mutex (private) */
+ rspamd_mempool_mutex_t *__w_lock; /**< write mutex (private) */
+} rspamd_mempool_rwlock_t;
+
+/**
+ * Allocate new memory poll
+ * @param size size of pool's page
+ * @return new memory pool object
+ */
+rspamd_mempool_t* rspamd_mempool_new (gsize size);
+
+/**
+ * Get memory from pool
+ * @param pool memory pool object
+ * @param size bytes to allocate
+ * @return pointer to allocated object
+ */
+void* rspamd_mempool_alloc (rspamd_mempool_t* pool, gsize size);
+
+/**
+ * Get memory from temporary pool
+ * @param pool memory pool object
+ * @param size bytes to allocate
+ * @return pointer to allocated object
+ */
+void* rspamd_mempool_alloc_tmp (rspamd_mempool_t* pool, gsize size);
+
+/**
+ * Get memory and set it to zero
+ * @param pool memory pool object
+ * @param size bytes to allocate
+ * @return pointer to allocated object
+ */
+void* rspamd_mempool_alloc0 (rspamd_mempool_t* pool, gsize size);
+
+/**
+ * Get memory and set it to zero
+ * @param pool memory pool object
+ * @param size bytes to allocate
+ * @return pointer to allocated object
+ */
+void* rspamd_mempool_alloc0_tmp (rspamd_mempool_t* pool, gsize size);
+
+/**
+ * Cleanup temporary data in pool
+ */
+void rspamd_mempool_cleanup_tmp (rspamd_mempool_t* pool);
+
+/**
+ * Make a copy of string in pool
+ * @param pool memory pool object
+ * @param src source string
+ * @return pointer to newly created string that is copy of src
+ */
+gchar* rspamd_mempool_strdup (rspamd_mempool_t* pool, const gchar *src);
+
+/**
+ * Make a copy of fixed string in pool as null terminated string
+ * @param pool memory pool object
+ * @param src source string
+ * @return pointer to newly created string that is copy of src
+ */
+gchar* rspamd_mempool_fstrdup (rspamd_mempool_t* pool, const struct f_str_s *src);
+
+/**
+ * Allocate piece of shared memory
+ * @param pool memory pool object
+ * @param size bytes to allocate
+ */
+void* rspamd_mempool_alloc_shared (rspamd_mempool_t* pool, gsize size);
+void* rspamd_mempool_alloc0_shared (rspamd_mempool_t *pool, gsize size);
+gchar* rspamd_mempool_strdup_shared (rspamd_mempool_t* pool, const gchar *src);
+
+/**
+ * Lock chunk of shared memory in which pointer is placed
+ * @param pool memory pool object
+ * @param pointer pointer of shared memory object that is to be locked (the whole page that contains that object is locked)
+ */
+void rspamd_mempool_lock_shared (rspamd_mempool_t *pool, void *pointer);
+
+/**
+ * Unlock chunk of shared memory in which pointer is placed
+ * @param pool memory pool object
+ * @param pointer pointer of shared memory object that is to be unlocked (the whole page that contains that object is locked)
+ */
+void rspamd_mempool_lock_shared (rspamd_mempool_t *pool, void *pointer);
+
+/**
+ * Add destructor callback to pool
+ * @param pool memory pool object
+ * @param func pointer to function-destructor
+ * @param data pointer to data that would be passed to destructor
+ */
+void rspamd_mempool_add_destructor_full (rspamd_mempool_t *pool, rspamd_mempool_destruct_t func, void *data,
+ const gchar *function, const gchar *line);
+
+/* Macros for common usage */
+#define rspamd_mempool_add_destructor(pool, func, data) \
+ rspamd_mempool_add_destructor_full(pool, func, data, G_STRFUNC, G_STRLOC)
+
+/**
+ * Replace destructor callback to pool for specified pointer
+ * @param pool memory pool object
+ * @param func pointer to function-destructor
+ * @param old_data pointer to old data
+ * @param new_data pointer to data that would be passed to destructor
+ */
+void rspamd_mempool_replace_destructor (rspamd_mempool_t *pool,
+ rspamd_mempool_destruct_t func, void *old_data, void *new_data);
+
+/**
+ * Delete pool, free all its chunks and call destructors chain
+ * @param pool memory pool object
+ */
+void rspamd_mempool_delete (rspamd_mempool_t *pool);
+
+/**
+ * Get new mutex from pool (allocated in shared memory)
+ * @param pool memory pool object
+ * @return mutex object
+ */
+rspamd_mempool_mutex_t* rspamd_mempool_get_mutex (rspamd_mempool_t *pool);
+
+/**
+ * Lock mutex
+ * @param mutex mutex to lock
+ */
+void rspamd_mempool_lock_mutex (rspamd_mempool_mutex_t *mutex);
+
+/**
+ * Unlock mutex
+ * @param mutex mutex to unlock
+ */
+void rspamd_mempool_unlock_mutex (rspamd_mempool_mutex_t *mutex);
+
+/**
+ * Create new rwlock and place it in shared memory
+ * @param pool memory pool object
+ * @return rwlock object
+ */
+rspamd_mempool_rwlock_t* rspamd_mempool_get_rwlock (rspamd_mempool_t *pool);
+
+/**
+ * Aquire read lock
+ * @param lock rwlock object
+ */
+void rspamd_mempool_rlock_rwlock (rspamd_mempool_rwlock_t *lock);
+
+/**
+ * Aquire write lock
+ * @param lock rwlock object
+ */
+void rspamd_mempool_wlock_rwlock (rspamd_mempool_rwlock_t *lock);
+
+/**
+ * Release read lock
+ * @param lock rwlock object
+ */
+void rspamd_mempool_runlock_rwlock (rspamd_mempool_rwlock_t *lock);
+
+/**
+ * Release write lock
+ * @param lock rwlock object
+ */
+void rspamd_mempool_wunlock_rwlock (rspamd_mempool_rwlock_t *lock);
+
+/**
+ * Get pool allocator statistics
+ * @param st stat pool struct
+ */
+void rspamd_mempool_stat (rspamd_mempool_stat_t *st);
+
+/**
+ * Get optimal pool size based on page size for this system
+ * @return size of memory page in system
+ */
+gsize rspamd_mempool_suggest_size (void);
+
+/**
+ * Set memory pool variable
+ * @param pool memory pool object
+ * @param name name of variable
+ * @param gpointer value value of variable
+ * @param destructor pointer to function-destructor
+ */
+void rspamd_mempool_set_variable (rspamd_mempool_t *pool, const gchar *name,
+ gpointer value, rspamd_mempool_destruct_t destructor);
+
+/**
+ * Get memory pool variable
+ * @param pool memory pool object
+ * @param name name of variable
+ * @return NULL or pointer to variable data
+ */
+gpointer rspamd_mempool_get_variable (rspamd_mempool_t *pool, const gchar *name);
+
+
+#endif
diff --git a/src/libutil/memcached.c b/src/libutil/memcached.c
new file mode 100644
index 000000000..e4c9be9d2
--- /dev/null
+++ b/src/libutil/memcached.c
@@ -0,0 +1,831 @@
+/*
+ * Copyright (c) 2009-2012, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef _THREAD_SAFE
+# include <pthread.h>
+#endif
+
+#include <stdarg.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/param.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sysexits.h>
+#include <unistd.h>
+#include <syslog.h>
+
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <sys/socket.h>
+#include <sys/poll.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/uio.h>
+#include <event.h>
+#include <glib.h>
+
+#include "memcached.h"
+
+#define CRLF "\r\n"
+#define END_TRAILER "END" CRLF
+#define STORED_TRAILER "STORED" CRLF
+#define NOT_STORED_TRAILER "NOT STORED" CRLF
+#define EXISTS_TRAILER "EXISTS" CRLF
+#define DELETED_TRAILER "DELETED" CRLF
+#define NOT_FOUND_TRAILER "NOT_FOUND" CRLF
+#define CLIENT_ERROR_TRAILER "CLIENT_ERROR"
+#define SERVER_ERROR_TRAILER "SERVER_ERROR"
+
+#define READ_BUFSIZ 1500
+#define MAX_RETRIES 3
+
+/* Header for udp protocol */
+struct memc_udp_header {
+ guint16 req_id;
+ guint16 seq_num;
+ guint16 dg_sent;
+ guint16 unused;
+};
+
+static void socket_callback (gint fd, short what, void *arg);
+static gint memc_parse_header (gchar *buf, size_t * len, gchar **end);
+
+/*
+ * Write to syslog if OPT_DEBUG is specified
+ */
+static void
+memc_log (const memcached_ctx_t * ctx, gint line, const gchar *fmt, ...)
+{
+ va_list args;
+ if (ctx->options & MEMC_OPT_DEBUG) {
+ va_start (args, fmt);
+ g_log (G_LOG_DOMAIN, G_LOG_LEVEL_DEBUG, "memc_debug(%d): host: %s, port: %d", line, inet_ntoa (ctx->addr), ntohs (ctx->port));
+ g_logv (G_LOG_DOMAIN, G_LOG_LEVEL_DEBUG, fmt, args);
+ va_end (args);
+ }
+}
+
+/*
+ * Callback for write command
+ */
+static void
+write_handler (gint fd, short what, memcached_ctx_t * ctx)
+{
+ gchar read_buf[READ_BUFSIZ];
+ gint retries;
+ ssize_t r;
+ struct memc_udp_header header;
+ struct iovec iov[4];
+
+ /* Write something to memcached */
+ if (what == EV_WRITE) {
+ if (ctx->protocol == UDP_TEXT) {
+ /* Send udp header */
+ bzero (&header, sizeof (header));
+ header.dg_sent = htons (1);
+ header.req_id = ctx->count;
+ }
+
+ r = snprintf (read_buf, READ_BUFSIZ, "%s %s 0 %d %zu" CRLF, ctx->cmd, ctx->param->key, ctx->param->expire, ctx->param->bufsize);
+ memc_log (ctx, __LINE__, "memc_write: send write request to memcached: %s", read_buf);
+
+ if (ctx->protocol == UDP_TEXT) {
+ iov[0].iov_base = &header;
+ iov[0].iov_len = sizeof (struct memc_udp_header);
+ if (ctx->param->bufpos == 0) {
+ iov[1].iov_base = read_buf;
+ iov[1].iov_len = r;
+ }
+ else {
+ iov[1].iov_base = NULL;
+ iov[1].iov_len = 0;
+ }
+ iov[2].iov_base = ctx->param->buf + ctx->param->bufpos;
+ iov[2].iov_len = ctx->param->bufsize - ctx->param->bufpos;
+ iov[3].iov_base = CRLF;
+ iov[3].iov_len = sizeof (CRLF) - 1;
+ if (writev (ctx->sock, iov, 4) == -1) {
+ memc_log (ctx, __LINE__, "memc_write: writev failed: %s", strerror (errno));
+ }
+ }
+ else {
+ iov[0].iov_base = read_buf;
+ iov[0].iov_len = r;
+ iov[1].iov_base = ctx->param->buf + ctx->param->bufpos;
+ iov[1].iov_len = ctx->param->bufsize - ctx->param->bufpos;
+ iov[2].iov_base = CRLF;
+ iov[2].iov_len = sizeof (CRLF) - 1;
+ if (writev (ctx->sock, iov, 3) == -1) {
+ memc_log (ctx, __LINE__, "memc_write: writev failed: %s", strerror (errno));
+ }
+ }
+ event_del (&ctx->mem_ev);
+ event_set (&ctx->mem_ev, ctx->sock, EV_READ | EV_PERSIST | EV_TIMEOUT, socket_callback, (void *)ctx);
+ event_add (&ctx->mem_ev, &ctx->timeout);
+ }
+ else if (what == EV_READ) {
+ /* Read header */
+ retries = 0;
+ while (ctx->protocol == UDP_TEXT) {
+ iov[0].iov_base = &header;
+ iov[0].iov_len = sizeof (struct memc_udp_header);
+ iov[1].iov_base = read_buf;
+ iov[1].iov_len = READ_BUFSIZ;
+ if ((r = readv (ctx->sock, iov, 2)) == -1) {
+ event_del (&ctx->mem_ev);
+ ctx->callback (ctx, SERVER_ERROR, ctx->callback_data);
+ }
+ if (header.req_id != ctx->count && retries < MAX_RETRIES) {
+ retries++;
+ /* Not our reply packet */
+ continue;
+ }
+ break;
+ }
+ if (ctx->protocol != UDP_TEXT) {
+ r = read (ctx->sock, read_buf, READ_BUFSIZ - 1);
+ }
+ memc_log (ctx, __LINE__, "memc_write: read reply from memcached: %s", read_buf);
+ /* Increment count */
+ ctx->count++;
+ event_del (&ctx->mem_ev);
+ if (strncmp (read_buf, STORED_TRAILER, sizeof (STORED_TRAILER) - 1) == 0) {
+ ctx->callback (ctx, OK, ctx->callback_data);
+ }
+ else if (strncmp (read_buf, NOT_STORED_TRAILER, sizeof (NOT_STORED_TRAILER) - 1) == 0) {
+ ctx->callback (ctx, CLIENT_ERROR, ctx->callback_data);
+ }
+ else if (strncmp (read_buf, EXISTS_TRAILER, sizeof (EXISTS_TRAILER) - 1) == 0) {
+ ctx->callback (ctx, EXISTS, ctx->callback_data);
+ }
+ else {
+ ctx->callback (ctx, SERVER_ERROR, ctx->callback_data);
+ }
+ }
+ else if (what == EV_TIMEOUT) {
+ event_del (&ctx->mem_ev);
+ ctx->callback (ctx, SERVER_TIMEOUT, ctx->callback_data);
+ }
+}
+
+/*
+ * Callback for read command
+ */
+static void
+read_handler (gint fd, short what, memcached_ctx_t * ctx)
+{
+ gchar read_buf[READ_BUFSIZ];
+ gchar *p;
+ ssize_t r;
+ size_t datalen;
+ struct memc_udp_header header;
+ struct iovec iov[2];
+ gint retries = 0, t;
+
+ if (what == EV_WRITE) {
+ /* Send command to memcached */
+ if (ctx->protocol == UDP_TEXT) {
+ /* Send udp header */
+ bzero (&header, sizeof (header));
+ header.dg_sent = htons (1);
+ header.req_id = ctx->count;
+ }
+
+ r = snprintf (read_buf, READ_BUFSIZ, "%s %s" CRLF, ctx->cmd, ctx->param->key);
+ memc_log (ctx, __LINE__, "memc_read: send read request to memcached: %s", read_buf);
+ if (ctx->protocol == UDP_TEXT) {
+ iov[0].iov_base = &header;
+ iov[0].iov_len = sizeof (struct memc_udp_header);
+ iov[1].iov_base = read_buf;
+ iov[1].iov_len = r;
+ if (writev (ctx->sock, iov, 2) == -1) {
+ memc_log (ctx, __LINE__, "memc_write: writev failed: %s", strerror (errno));
+ }
+ }
+ else {
+ if (write (ctx->sock, read_buf, r) == -1) {
+ memc_log (ctx, __LINE__, "memc_write: write failed: %s", strerror (errno));
+ }
+ }
+ event_del (&ctx->mem_ev);
+ event_set (&ctx->mem_ev, ctx->sock, EV_READ | EV_PERSIST | EV_TIMEOUT, socket_callback, (void *)ctx);
+ event_add (&ctx->mem_ev, &ctx->timeout);
+ }
+ else if (what == EV_READ) {
+ while (ctx->protocol == UDP_TEXT) {
+ iov[0].iov_base = &header;
+ iov[0].iov_len = sizeof (struct memc_udp_header);
+ iov[1].iov_base = read_buf;
+ iov[1].iov_len = READ_BUFSIZ;
+ if ((r = readv (ctx->sock, iov, 2)) == -1) {
+ event_del (&ctx->mem_ev);
+ ctx->callback (ctx, SERVER_ERROR, ctx->callback_data);
+ return;
+ }
+ memc_log (ctx, __LINE__, "memc_read: got read_buf: %s", read_buf);
+ if (header.req_id != ctx->count && retries < MAX_RETRIES) {
+ memc_log (ctx, __LINE__, "memc_read: got wrong packet id: %d, %d was awaited", header.req_id, ctx->count);
+ retries++;
+ /* Not our reply packet */
+ continue;
+ }
+ break;
+ }
+ if (ctx->protocol != UDP_TEXT) {
+ r = read (ctx->sock, read_buf, READ_BUFSIZ - 1);
+ }
+
+ if (r > 0) {
+ read_buf[r] = 0;
+ if (ctx->param->bufpos == 0) {
+ t = memc_parse_header (read_buf, &datalen, &p);
+ if (t < 0) {
+ event_del (&ctx->mem_ev);
+ memc_log (ctx, __LINE__, "memc_read: cannot parse memcached reply");
+ ctx->callback (ctx, SERVER_ERROR, ctx->callback_data);
+ return;
+ }
+ else if (t == 0) {
+ memc_log (ctx, __LINE__, "memc_read: record does not exists");
+ event_del (&ctx->mem_ev);
+ ctx->callback (ctx, NOT_EXISTS, ctx->callback_data);
+ return;
+ }
+
+ if (datalen > ctx->param->bufsize) {
+ memc_log (ctx, __LINE__, "memc_read: user's buffer is too small: %zd, %zd required", ctx->param->bufsize, datalen);
+ event_del (&ctx->mem_ev);
+ ctx->callback (ctx, WRONG_LENGTH, ctx->callback_data);
+ return;
+ }
+ /* Check if we already have all data in buffer */
+ if (r >= (ssize_t)(datalen + sizeof (END_TRAILER) + sizeof (CRLF) - 2)) {
+ /* Store all data in param's buffer */
+ memcpy (ctx->param->buf + ctx->param->bufpos, p, datalen);
+ /* Increment count */
+ ctx->count++;
+ event_del (&ctx->mem_ev);
+ ctx->callback (ctx, OK, ctx->callback_data);
+ return;
+ }
+ /* Subtract from sum parsed header's length */
+ r -= p - read_buf;
+ }
+ else {
+ p = read_buf;
+ }
+
+ if (strncmp (ctx->param->buf + ctx->param->bufpos + r - sizeof (END_TRAILER) - sizeof (CRLF) + 2, END_TRAILER, sizeof (END_TRAILER) - 1) == 0) {
+ r -= sizeof (END_TRAILER) - sizeof (CRLF) - 2;
+ memcpy (ctx->param->buf + ctx->param->bufpos, p, r);
+ event_del (&ctx->mem_ev);
+ ctx->callback (ctx, OK, ctx->callback_data);
+ return;
+ }
+ /* Store this part of data in param's buffer */
+ memcpy (ctx->param->buf + ctx->param->bufpos, p, r);
+ ctx->param->bufpos += r;
+ }
+ else {
+ memc_log (ctx, __LINE__, "memc_read: read(v) failed: %d, %s", r, strerror (errno));
+ event_del (&ctx->mem_ev);
+ ctx->callback (ctx, SERVER_ERROR, ctx->callback_data);
+ return;
+ }
+
+ ctx->count++;
+ }
+ else if (what == EV_TIMEOUT) {
+ event_del (&ctx->mem_ev);
+ ctx->callback (ctx, SERVER_TIMEOUT, ctx->callback_data);
+ }
+
+}
+
+/*
+ * Callback for delete command
+ */
+static void
+delete_handler (gint fd, short what, memcached_ctx_t * ctx)
+{
+ gchar read_buf[READ_BUFSIZ];
+ gint retries;
+ ssize_t r;
+ struct memc_udp_header header;
+ struct iovec iov[2];
+
+ /* Write something to memcached */
+ if (what == EV_WRITE) {
+ if (ctx->protocol == UDP_TEXT) {
+ /* Send udp header */
+ bzero (&header, sizeof (header));
+ header.dg_sent = htons (1);
+ header.req_id = ctx->count;
+ }
+ r = snprintf (read_buf, READ_BUFSIZ, "delete %s" CRLF, ctx->param->key);
+ memc_log (ctx, __LINE__, "memc_delete: send delete request to memcached: %s", read_buf);
+
+ if (ctx->protocol == UDP_TEXT) {
+ iov[0].iov_base = &header;
+ iov[0].iov_len = sizeof (struct memc_udp_header);
+ iov[1].iov_base = read_buf;
+ iov[1].iov_len = r;
+ ctx->param->bufpos = writev (ctx->sock, iov, 2);
+ if (ctx->param->bufpos == (size_t)-1) {
+ memc_log (ctx, __LINE__, "memc_write: writev failed: %s", strerror (errno));
+ }
+ }
+ else {
+ if (write (ctx->sock, read_buf, r) == -1) {
+ memc_log (ctx, __LINE__, "memc_write: write failed: %s", strerror (errno));
+ }
+ }
+ event_del (&ctx->mem_ev);
+ event_set (&ctx->mem_ev, ctx->sock, EV_READ | EV_PERSIST | EV_TIMEOUT, socket_callback, (void *)ctx);
+ event_add (&ctx->mem_ev, &ctx->timeout);
+ }
+ else if (what == EV_READ) {
+ /* Read header */
+ retries = 0;
+ while (ctx->protocol == UDP_TEXT) {
+ iov[0].iov_base = &header;
+ iov[0].iov_len = sizeof (struct memc_udp_header);
+ iov[1].iov_base = read_buf;
+ iov[1].iov_len = READ_BUFSIZ;
+ if ((r = readv (ctx->sock, iov, 2)) == -1) {
+ event_del (&ctx->mem_ev);
+ ctx->callback (ctx, SERVER_ERROR, ctx->callback_data);
+ return;
+ }
+ if (header.req_id != ctx->count && retries < MAX_RETRIES) {
+ retries++;
+ /* Not our reply packet */
+ continue;
+ }
+ break;
+ }
+ if (ctx->protocol != UDP_TEXT) {
+ r = read (ctx->sock, read_buf, READ_BUFSIZ - 1);
+ }
+ /* Increment count */
+ ctx->count++;
+ event_del (&ctx->mem_ev);
+ if (strncmp (read_buf, DELETED_TRAILER, sizeof (STORED_TRAILER) - 1) == 0) {
+ ctx->callback (ctx, OK, ctx->callback_data);
+ }
+ else if (strncmp (read_buf, NOT_FOUND_TRAILER, sizeof (NOT_FOUND_TRAILER) - 1) == 0) {
+ ctx->callback (ctx, NOT_EXISTS, ctx->callback_data);
+ }
+ else {
+ ctx->callback (ctx, SERVER_ERROR, ctx->callback_data);
+ }
+ }
+ else if (what == EV_TIMEOUT) {
+ event_del (&ctx->mem_ev);
+ ctx->callback (ctx, SERVER_TIMEOUT, ctx->callback_data);
+ }
+}
+
+/*
+ * Callback for our socket events
+ */
+static void
+socket_callback (gint fd, short what, void *arg)
+{
+ memcached_ctx_t *ctx = (memcached_ctx_t *) arg;
+
+ switch (ctx->op) {
+ case CMD_NULL:
+ /* Do nothing here */
+ break;
+ case CMD_CONNECT:
+ /* We have write readiness after connect call, so reinit event */
+ ctx->cmd = "connect";
+ if (what == EV_WRITE) {
+ event_del (&ctx->mem_ev);
+ event_set (&ctx->mem_ev, ctx->sock, EV_READ | EV_PERSIST | EV_TIMEOUT, socket_callback, (void *)ctx);
+ event_add (&ctx->mem_ev, NULL);
+ ctx->callback (ctx, OK, ctx->callback_data);
+ ctx->alive = 1;
+ }
+ else {
+ ctx->callback (ctx, SERVER_TIMEOUT, ctx->callback_data);
+ ctx->alive = 0;
+ }
+ break;
+ case CMD_WRITE:
+ write_handler (fd, what, ctx);
+ break;
+ case CMD_READ:
+ read_handler (fd, what, ctx);
+ break;
+ case CMD_DELETE:
+ delete_handler (fd, what, ctx);
+ break;
+ }
+}
+
+/*
+ * Common callback function for memcached operations if no user's callback is specified
+ */
+static void
+common_memc_callback (memcached_ctx_t * ctx, memc_error_t error, void *data)
+{
+ memc_log (ctx, __LINE__, "common_memc_callback: result of memc command '%s' is '%s'", ctx->cmd, memc_strerror (error));
+}
+
+/*
+ * Make socket for udp connection
+ */
+static gint
+memc_make_udp_sock (memcached_ctx_t * ctx)
+{
+ struct sockaddr_in sc;
+ gint ofl;
+
+ bzero (&sc, sizeof (struct sockaddr_in *));
+ sc.sin_family = AF_INET;
+ sc.sin_port = ctx->port;
+ memcpy (&sc.sin_addr, &ctx->addr, sizeof (struct in_addr));
+
+ ctx->sock = socket (PF_INET, SOCK_DGRAM, 0);
+
+ if (ctx->sock == -1) {
+ memc_log (ctx, __LINE__, "memc_make_udp_sock: socket() failed: %s", strerror (errno));
+ return -1;
+ }
+
+ /* set nonblocking */
+ ofl = fcntl (ctx->sock, F_GETFL, 0);
+ fcntl (ctx->sock, F_SETFL, ofl | O_NONBLOCK);
+
+ /*
+ * Call connect to set default destination for datagrams
+ * May not block
+ */
+ ctx->op = CMD_CONNECT;
+ event_set (&ctx->mem_ev, ctx->sock, EV_WRITE | EV_TIMEOUT, socket_callback, (void *)ctx);
+ event_add (&ctx->mem_ev, NULL);
+ return connect (ctx->sock, (struct sockaddr *)&sc, sizeof (struct sockaddr_in));
+}
+
+/*
+ * Make socket for tcp connection
+ */
+static gint
+memc_make_tcp_sock (memcached_ctx_t * ctx)
+{
+ struct sockaddr_in sc;
+ gint ofl, r;
+
+ bzero (&sc, sizeof (struct sockaddr_in *));
+ sc.sin_family = AF_INET;
+ sc.sin_port = ctx->port;
+ memcpy (&sc.sin_addr, &ctx->addr, sizeof (struct in_addr));
+
+ ctx->sock = socket (PF_INET, SOCK_STREAM, 0);
+
+ if (ctx->sock == -1) {
+ memc_log (ctx, __LINE__, "memc_make_tcp_sock: socket() failed: %s", strerror (errno));
+ return -1;
+ }
+
+ /* set nonblocking */
+ ofl = fcntl (ctx->sock, F_GETFL, 0);
+ fcntl (ctx->sock, F_SETFL, ofl | O_NONBLOCK);
+
+ if ((r = connect (ctx->sock, (struct sockaddr *)&sc, sizeof (struct sockaddr_in))) == -1) {
+ if (errno != EINPROGRESS) {
+ close (ctx->sock);
+ ctx->sock = -1;
+ memc_log (ctx, __LINE__, "memc_make_tcp_sock: connect() failed: %s", strerror (errno));
+ return -1;
+ }
+ }
+ ctx->op = CMD_CONNECT;
+ event_set (&ctx->mem_ev, ctx->sock, EV_WRITE | EV_TIMEOUT, socket_callback, (void *)ctx);
+ event_add (&ctx->mem_ev, &ctx->timeout);
+ return 0;
+}
+
+/*
+ * Parse VALUE reply from server and set len argument to value returned by memcached
+ */
+static gint
+memc_parse_header (gchar *buf, size_t * len, gchar **end)
+{
+ gchar *p, *c;
+ gint i;
+
+ /* VALUE <key> <flags> <bytes> [<cas unique>]\r\n */
+ c = strstr (buf, CRLF);
+ if (c == NULL) {
+ return -1;
+ }
+ *end = c + sizeof (CRLF) - 1;
+
+ if (strncmp (buf, "VALUE ", sizeof ("VALUE ") - 1) == 0) {
+ p = buf + sizeof ("VALUE ") - 1;
+
+ /* Read bytes value and ignore all other fields, such as flags and key */
+ for (i = 0; i < 2; i++) {
+ while (p++ < c && *p != ' ');
+
+ if (p > c) {
+ return -1;
+ }
+ }
+ *len = strtoul (p, &c, 10);
+ return 1;
+ }
+ /* If value not found memcached return just END\r\n , in this case return 0 */
+ else if (strncmp (buf, END_TRAILER, sizeof (END_TRAILER) - 1) == 0) {
+ return 0;
+ }
+
+ return -1;
+}
+
+
+/*
+ * Common read command handler for memcached
+ */
+memc_error_t
+memc_read (memcached_ctx_t * ctx, const gchar *cmd, memcached_param_t * param)
+{
+ ctx->cmd = cmd;
+ ctx->op = CMD_READ;
+ ctx->param = param;
+ event_set (&ctx->mem_ev, ctx->sock, EV_WRITE | EV_TIMEOUT, socket_callback, (void *)ctx);
+ event_add (&ctx->mem_ev, &ctx->timeout);
+
+ return OK;
+}
+
+/*
+ * Common write command handler for memcached
+ */
+memc_error_t
+memc_write (memcached_ctx_t * ctx, const gchar *cmd, memcached_param_t * param, gint expire)
+{
+ ctx->cmd = cmd;
+ ctx->op = CMD_WRITE;
+ ctx->param = param;
+ param->expire = expire;
+ event_set (&ctx->mem_ev, ctx->sock, EV_WRITE | EV_TIMEOUT, socket_callback, (void *)ctx);
+ event_add (&ctx->mem_ev, &ctx->timeout);
+
+ return OK;
+}
+
+/*
+ * Delete command handler
+ */
+memc_error_t
+memc_delete (memcached_ctx_t * ctx, memcached_param_t * param)
+{
+ ctx->cmd = "delete";
+ ctx->op = CMD_DELETE;
+ ctx->param = param;
+ event_set (&ctx->mem_ev, ctx->sock, EV_WRITE | EV_TIMEOUT, socket_callback, (void *)ctx);
+ event_add (&ctx->mem_ev, &ctx->timeout);
+
+ return OK;
+}
+
+/*
+ * Write handler for memcached mirroring
+ * writing is done to each memcached server
+ */
+memc_error_t
+memc_write_mirror (memcached_ctx_t * ctx, size_t memcached_num, const gchar *cmd, memcached_param_t * param, gint expire)
+{
+ memc_error_t r, result = OK;
+
+ while (memcached_num--) {
+ if (ctx[memcached_num].alive == 1) {
+ r = memc_write (&ctx[memcached_num], cmd, param, expire);
+ if (r != OK) {
+ memc_log (&ctx[memcached_num], __LINE__, "memc_write_mirror: cannot write to mirror server: %s", memc_strerror (r));
+ result = r;
+ ctx[memcached_num].alive = 0;
+ }
+ }
+ }
+
+ return result;
+}
+
+/*
+ * Read handler for memcached mirroring
+ * reading is done from first active memcached server
+ */
+memc_error_t
+memc_read_mirror (memcached_ctx_t * ctx, size_t memcached_num, const gchar *cmd, memcached_param_t * param)
+{
+ memc_error_t r, result = OK;
+
+ while (memcached_num--) {
+ if (ctx[memcached_num].alive == 1) {
+ r = memc_read (&ctx[memcached_num], cmd, param);
+ if (r != OK) {
+ result = r;
+ if (r != NOT_EXISTS) {
+ ctx[memcached_num].alive = 0;
+ memc_log (&ctx[memcached_num], __LINE__, "memc_read_mirror: cannot write read from mirror server: %s", memc_strerror (r));
+ }
+ else {
+ memc_log (&ctx[memcached_num], __LINE__, "memc_read_mirror: record not exists", memc_strerror (r));
+ }
+ }
+ else {
+ break;
+ }
+ }
+ }
+
+ return result;
+}
+
+/*
+ * Delete handler for memcached mirroring
+ * deleting is done for each active memcached server
+ */
+memc_error_t
+memc_delete_mirror (memcached_ctx_t * ctx, size_t memcached_num, const gchar *cmd, memcached_param_t * param)
+{
+ memc_error_t r, result = OK;
+
+ while (memcached_num--) {
+ if (ctx[memcached_num].alive == 1) {
+ r = memc_delete (&ctx[memcached_num], param);
+ if (r != OK) {
+ result = r;
+ if (r != NOT_EXISTS) {
+ ctx[memcached_num].alive = 0;
+ memc_log (&ctx[memcached_num], __LINE__, "memc_delete_mirror: cannot delete from mirror server: %s", memc_strerror (r));
+ }
+ }
+ }
+ }
+
+ return result;
+}
+
+
+/*
+ * Initialize memcached context for specified protocol
+ */
+gint
+memc_init_ctx (memcached_ctx_t * ctx)
+{
+ if (ctx == NULL) {
+ return -1;
+ }
+
+ ctx->count = 0;
+ ctx->alive = 0;
+ ctx->op = CMD_NULL;
+ /* Set default callback */
+ if (ctx->callback == NULL) {
+ ctx->callback = common_memc_callback;
+ }
+
+ switch (ctx->protocol) {
+ case UDP_TEXT:
+ return memc_make_udp_sock (ctx);
+ break;
+ case TCP_TEXT:
+ return memc_make_tcp_sock (ctx);
+ break;
+ /* Not implemented */
+ case UDP_BIN:
+ case TCP_BIN:
+ default:
+ return -1;
+ }
+}
+
+/*
+ * Mirror init
+ */
+gint
+memc_init_ctx_mirror (memcached_ctx_t * ctx, size_t memcached_num)
+{
+ gint r, result = -1;
+ while (memcached_num--) {
+ if (ctx[memcached_num].alive == 1) {
+ r = memc_init_ctx (&ctx[memcached_num]);
+ if (r == -1) {
+ ctx[memcached_num].alive = 0;
+ memc_log (&ctx[memcached_num], __LINE__, "memc_init_ctx_mirror: cannot connect to server");
+ }
+ else {
+ result = 1;
+ }
+ }
+ }
+
+ return result;
+}
+
+/*
+ * Close context connection
+ */
+gint
+memc_close_ctx (memcached_ctx_t * ctx)
+{
+ if (ctx != NULL && ctx->sock != -1) {
+ event_del (&ctx->mem_ev);
+ return close (ctx->sock);
+ }
+
+ return -1;
+}
+
+/*
+ * Mirror close
+ */
+gint
+memc_close_ctx_mirror (memcached_ctx_t * ctx, size_t memcached_num)
+{
+ gint r = 0;
+ while (memcached_num--) {
+ if (ctx[memcached_num].alive == 1) {
+ r = memc_close_ctx (&ctx[memcached_num]);
+ if (r == -1) {
+ memc_log (&ctx[memcached_num], __LINE__, "memc_close_ctx_mirror: cannot close connection to server properly");
+ ctx[memcached_num].alive = 0;
+ }
+ }
+ }
+
+ return r;
+}
+
+
+const gchar *
+memc_strerror (memc_error_t err)
+{
+ const gchar *p;
+
+ switch (err) {
+ case OK:
+ p = "Ok";
+ break;
+ case BAD_COMMAND:
+ p = "Bad command";
+ break;
+ case CLIENT_ERROR:
+ p = "Client error";
+ break;
+ case SERVER_ERROR:
+ p = "Server error";
+ break;
+ case SERVER_TIMEOUT:
+ p = "Server timeout";
+ break;
+ case NOT_EXISTS:
+ p = "Key not found";
+ break;
+ case EXISTS:
+ p = "Key already exists";
+ break;
+ case WRONG_LENGTH:
+ p = "Wrong result length";
+ break;
+ default:
+ p = "Unknown error";
+ break;
+ }
+
+ return p;
+}
+
+/*
+ * vi:ts=4
+ */
diff --git a/src/libutil/memcached.h b/src/libutil/memcached.h
new file mode 100644
index 000000000..098e26eea
--- /dev/null
+++ b/src/libutil/memcached.h
@@ -0,0 +1,142 @@
+#ifndef MEMCACHED_H
+#define MEMCACHED_H
+
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <sys/time.h>
+#include <time.h>
+
+#define MAXKEYLEN 250
+
+#define MEMC_OPT_DEBUG 0x1
+
+struct event;
+
+typedef enum memc_error {
+ OK,
+ BAD_COMMAND,
+ CLIENT_ERROR,
+ SERVER_ERROR,
+ SERVER_TIMEOUT,
+ NOT_EXISTS,
+ EXISTS,
+ WRONG_LENGTH
+} memc_error_t;
+
+/* XXX: Only UDP_TEXT is supported at present */
+typedef enum memc_proto {
+ UDP_TEXT,
+ TCP_TEXT,
+ UDP_BIN,
+ TCP_BIN
+} memc_proto_t;
+
+typedef enum memc_op {
+ CMD_NULL,
+ CMD_CONNECT,
+ CMD_READ,
+ CMD_WRITE,
+ CMD_DELETE,
+} memc_opt_t;
+
+typedef struct memcached_param_s {
+ gchar key[MAXKEYLEN];
+ u_char *buf;
+ size_t bufsize;
+ size_t bufpos;
+ gint expire;
+} memcached_param_t;
+
+
+/* Port must be in network byte order */
+typedef struct memcached_ctx_s {
+ memc_proto_t protocol;
+ struct in_addr addr;
+ guint16 port;
+ gint sock;
+ struct timeval timeout;
+ /* Counter that is used for memcached operations in network byte order */
+ guint16 count;
+ /* Flag that signalize that this memcached is alive */
+ short alive;
+ /* Options that can be specified for memcached connection */
+ short options;
+ /* Current operation */
+ memc_opt_t op;
+ /* Current command */
+ const gchar *cmd;
+ /* Current param */
+ memcached_param_t *param;
+ /* Callback for current operation */
+ void (*callback) (struct memcached_ctx_s *ctx, memc_error_t error, void *data);
+ /* Data for callback function */
+ void *callback_data;
+ /* Event structure */
+ struct event mem_ev;
+} memcached_ctx_t;
+
+typedef void (*memcached_callback_t) (memcached_ctx_t *ctx, memc_error_t error, void *data);
+
+/*
+ * Initialize connection to memcached server:
+ * addr, port and timeout fields in ctx must be filled with valid values
+ * Return:
+ * 0 - success
+ * -1 - error (error is stored in errno)
+ */
+gint memc_init_ctx (memcached_ctx_t *ctx);
+gint memc_init_ctx_mirror (memcached_ctx_t *ctx, size_t memcached_num);
+/*
+ * Memcached function for getting, setting, adding values to memcached server
+ * ctx - valid memcached context
+ * key - key to extract (max 250 characters as it specified in memcached API)
+ * buf, elemsize, nelem - allocated buffer of length nelem structures each of elemsize
+ * that would contain extracted data (NOT NULL TERMINATED)
+ * Return:
+ * memc_error_t
+ * nelem is changed according to actual number of extracted data
+ *
+ * "set" means "store this data".
+ *
+ * "add" means "store this data, but only if the server *doesn't* already
+ * hold data for this key".
+
+ * "replace" means "store this data, but only if the server *does*
+ * already hold data for this key".
+
+ * "append" means "add this data to an existing key after existing data".
+
+ * "prepend" means "add this data to an existing key before existing data".
+ */
+#define memc_get(ctx, param) memc_read(ctx, "get", param)
+#define memc_set(ctx, param, expire) memc_write(ctx, "set", param, expire)
+#define memc_add(ctx, param, expire) memc_write(ctx, "add", param, expire)
+#define memc_replace(ctx, param, expire) memc_write(ctx, "replace", param, expire)
+#define memc_append(ctx, param, expire) memc_write(ctx, "append", param, expire)
+#define memc_prepend(ctx, param, expire) memc_write(ctx, "prepend", param, expire)
+
+/* Functions that works with mirror of memcached servers */
+#define memc_get_mirror(ctx, num, param) memc_read_mirror(ctx, num, "get", param)
+#define memc_set_mirror(ctx, num, param, expire) memc_write_mirror(ctx, num, "set", param, expire)
+#define memc_add_mirror(ctx, num, param, expire) memc_write_mirror(ctx, num, "add", param, expire)
+#define memc_replace_mirror(ctx, num, param, expire) memc_write_mirror(ctx, num, "replace", param, expire)
+#define memc_append_mirror(ctx, num, param, expire) memc_write_mirror(ctx, num, "append", param, expire)
+#define memc_prepend_mirror(ctx, num, param, expire) memc_write_mirror(ctx, num, "prepend", param, expire)
+
+
+memc_error_t memc_read (memcached_ctx_t *ctx, const gchar *cmd, memcached_param_t *param);
+memc_error_t memc_write (memcached_ctx_t *ctx, const gchar *cmd, memcached_param_t *param, gint expire);
+memc_error_t memc_delete (memcached_ctx_t *ctx, memcached_param_t *params);
+
+memc_error_t memc_write_mirror (memcached_ctx_t *ctx, size_t memcached_num, const gchar *cmd, memcached_param_t *param, gint expire);
+memc_error_t memc_read_mirror (memcached_ctx_t *ctx, size_t memcached_num, const gchar *cmd, memcached_param_t *param);
+memc_error_t memc_delete_mirror (memcached_ctx_t *ctx, size_t memcached_num, const gchar *cmd, memcached_param_t *param);
+
+/* Return symbolic name of memcached error*/
+const gchar * memc_strerror (memc_error_t err);
+
+/* Destroy socket from ctx */
+gint memc_close_ctx (memcached_ctx_t *ctx);
+gint memc_close_ctx_mirror (memcached_ctx_t *ctx, size_t memcached_num);
+
+#endif
diff --git a/src/libutil/printf.c b/src/libutil/printf.c
new file mode 100644
index 000000000..d72ec95c8
--- /dev/null
+++ b/src/libutil/printf.c
@@ -0,0 +1,635 @@
+/* Copyright (c) 2010, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "printf.h"
+#include "fstring.h"
+#include "main.h"
+
+/**
+ * From FreeBSD libutil code
+ */
+static const int maxscale = 6;
+
+static gchar *
+rspamd_humanize_number (gchar *buf, gchar *last, gint64 num, gboolean bytes)
+{
+ const gchar *prefixes;
+ int i, r, remainder, sign;
+ gint64 divisor;
+ gsize baselen, len = last - buf;
+
+ remainder = 0;
+
+ baselen = 1;
+ if (!bytes) {
+ divisor = 1000;
+ prefixes = "\0\0\0k\0\0M\0\0G\0\0T\0\0P\0\0E";
+ }
+ else {
+ divisor = 1024;
+ prefixes = "B\0\0k\0\0M\0\0G\0\0T\0\0P\0\0E";
+ }
+
+
+#define SCALE2PREFIX(scale) (&prefixes[(scale) * 3])
+
+ if (num < 0) {
+ sign = -1;
+ num = -num;
+ baselen += 2; /* sign, digit */
+ }
+ else {
+ sign = 1;
+ baselen += 1; /* digit */
+ }
+
+ /* Check if enough room for `x y' + suffix + `\0' */
+ if (len < baselen + 1) {
+ return buf;
+ }
+
+ /*
+ * Divide the number until it fits the given column.
+ * If there will be an overflow by the rounding below,
+ * divide once more.
+ */
+ for (i = 0; i < maxscale && num > divisor; i++) {
+ remainder = num % divisor;
+ num /= divisor;
+ }
+
+ r = rspamd_snprintf (buf, len, "%L%s",
+ sign * (num + (remainder + 50) / 1000),
+ SCALE2PREFIX (i));
+
+#undef SCALE2PREFIX
+
+ return buf + r;
+}
+
+
+static gchar *
+rspamd_sprintf_num (gchar *buf, gchar *last, guint64 ui64, gchar zero,
+ guint hexadecimal, guint width)
+{
+ gchar *p, temp[sizeof ("18446744073709551615")];
+ size_t len;
+ guint32 ui32;
+ static gchar hex[] = "0123456789abcdef";
+ static gchar HEX[] = "0123456789ABCDEF";
+
+ p = temp + sizeof(temp);
+
+ if (hexadecimal == 0) {
+
+ if (ui64 <= G_MAXUINT32) {
+
+ /*
+ * To divide 64-bit numbers and to find remainders
+ * on the x86 platform gcc and icc call the libc functions
+ * [u]divdi3() and [u]moddi3(), they call another function
+ * in its turn. On FreeBSD it is the qdivrem() function,
+ * its source code is about 170 lines of the code.
+ * The glibc counterpart is about 150 lines of the code.
+ *
+ * For 32-bit numbers and some divisors gcc and icc use
+ * a inlined multiplication and shifts. For example,
+ * guint "i32 / 10" is compiled to
+ *
+ * (i32 * 0xCCCCCCCD) >> 35
+ */
+
+ ui32 = (guint32) ui64;
+
+ do {
+ *--p = (gchar) (ui32 % 10 + '0');
+ } while (ui32 /= 10);
+
+ } else {
+ do {
+ *--p = (gchar) (ui64 % 10 + '0');
+ } while (ui64 /= 10);
+ }
+
+ } else if (hexadecimal == 1) {
+
+ do {
+
+ /* the "(guint32)" cast disables the BCC's warning */
+ *--p = hex[(guint32) (ui64 & 0xf)];
+
+ } while (ui64 >>= 4);
+
+ } else { /* hexadecimal == 2 */
+
+ do {
+
+ /* the "(guint32)" cast disables the BCC's warning */
+ *--p = HEX[(guint32) (ui64 & 0xf)];
+
+ } while (ui64 >>= 4);
+ }
+
+ /* zero or space padding */
+
+ len = (temp + sizeof (temp)) - p;
+
+ while (len++ < width && buf < last) {
+ *buf++ = zero;
+ }
+
+ /* number safe copy */
+
+ len = (temp + sizeof (temp)) - p;
+
+ if (buf + len > last) {
+ len = last - buf;
+ }
+
+ return ((gchar *)memcpy (buf, p, len)) + len;
+}
+
+struct rspamd_printf_char_buf {
+ char *begin;
+ char *pos;
+ glong remain;
+};
+
+static glong
+rspamd_printf_append_char (const gchar *buf, glong buflen, gpointer ud)
+{
+ struct rspamd_printf_char_buf *dst = (struct rspamd_printf_char_buf *)ud;
+ glong wr;
+
+ if (dst->remain <= 0) {
+ return dst->remain;
+ }
+
+ wr = MIN (dst->remain, buflen);
+ memcpy (dst->pos, buf, wr);
+ dst->remain -= wr;
+ dst->pos += wr;
+
+ return wr;
+}
+
+static glong
+rspamd_printf_append_file (const gchar *buf, glong buflen, gpointer ud)
+{
+ FILE *dst = (FILE *)ud;
+
+ return fwrite (buf, 1, buflen, dst);
+}
+
+static glong
+rspamd_printf_append_gstring (const gchar *buf, glong buflen, gpointer ud)
+{
+ GString *dst = (GString *)ud;
+
+ g_string_append_len (dst, buf, buflen);
+
+ return buflen;
+}
+
+glong
+rspamd_fprintf (FILE *f, const gchar *fmt, ...)
+{
+ va_list args;
+ glong r;
+
+ va_start (args, fmt);
+ r = rspamd_vprintf_common (rspamd_printf_append_file, f, fmt, args);
+ va_end (args);
+
+ return r;
+}
+
+glong
+rspamd_log_fprintf (FILE *f, const gchar *fmt, ...)
+{
+ va_list args;
+ glong r;
+
+ va_start (args, fmt);
+ r = rspamd_vprintf_common (rspamd_printf_append_file, f, fmt, args);
+ va_end (args);
+
+ fflush (f);
+
+ return r;
+}
+
+
+glong
+rspamd_snprintf (gchar *buf, glong max, const gchar *fmt, ...)
+{
+ gchar *r;
+ va_list args;
+
+ va_start (args, fmt);
+ r = rspamd_vsnprintf (buf, max, fmt, args);
+ va_end (args);
+
+ return (r - buf);
+}
+
+gchar *
+rspamd_vsnprintf (gchar *buf, glong max, const gchar *fmt, va_list args)
+{
+ struct rspamd_printf_char_buf dst;
+
+ dst.begin = buf;
+ dst.pos = dst.begin;
+ dst.remain = max - 1;
+ (void)rspamd_vprintf_common (rspamd_printf_append_char, &dst, fmt, args);
+ *dst.pos = '\0';
+
+ return dst.pos;
+}
+
+glong
+rspamd_printf_gstring (GString *s, const gchar *fmt, ...)
+{
+ va_list args;
+ glong r;
+
+ va_start (args, fmt);
+ r = rspamd_vprintf_common (rspamd_printf_append_gstring, s, fmt, args);
+ va_end (args);
+
+ return r;
+}
+
+#define RSPAMD_PRINTF_APPEND(buf, len) \
+ do { \
+ wr = func ((buf), (len), apd); \
+ if (wr <= 0) { \
+ goto oob; \
+ } \
+ written += wr; \
+ fmt ++; \
+ buf_start = fmt; \
+ } while(0)
+
+glong
+rspamd_vprintf_common (rspamd_printf_append_func func, gpointer apd, const gchar *fmt, va_list args)
+{
+ gchar zero, numbuf[G_ASCII_DTOSTR_BUF_SIZE], *p, *last, c;
+ const gchar *buf_start = fmt;
+ gint d;
+ long double f, scale;
+ glong written = 0, wr, slen;
+ gint64 i64;
+ guint64 ui64;
+ guint width, sign, hex, humanize, bytes, frac_width, i;
+ f_str_t *v;
+ GString *gs;
+ gboolean bv;
+
+ while (*fmt) {
+
+ /*
+ * "buf < last" means that we could copy at least one character:
+ * the plain character, "%%", "%c", and minus without the checking
+ */
+
+ if (*fmt == '%') {
+
+ /* Append what we have in buf */
+ if (fmt > buf_start) {
+ wr = func (buf_start, fmt - buf_start, apd);
+ if (wr <= 0) {
+ goto oob;
+ }
+ written += wr;
+ }
+
+ i64 = 0;
+ ui64 = 0;
+
+ zero = (gchar) ((*++fmt == '0') ? '0' : ' ');
+ width = 0;
+ sign = 1;
+ hex = 0;
+ bytes = 0;
+ humanize = 0;
+ frac_width = 0;
+ slen = -1;
+
+ while (*fmt >= '0' && *fmt <= '9') {
+ width = width * 10 + *fmt++ - '0';
+ }
+
+
+ for ( ;; ) {
+ switch (*fmt) {
+
+ case 'u':
+ sign = 0;
+ fmt++;
+ continue;
+
+ case 'm':
+ fmt++;
+ continue;
+
+ case 'X':
+ hex = 2;
+ sign = 0;
+ fmt++;
+ continue;
+
+ case 'x':
+ hex = 1;
+ sign = 0;
+ fmt++;
+ continue;
+ case 'H':
+ humanize = 1;
+ bytes = 1;
+ sign = 0;
+ fmt ++;
+ continue;
+ case 'h':
+ humanize = 1;
+ sign = 0;
+ fmt ++;
+ continue;
+ case '.':
+ fmt++;
+
+ while (*fmt >= '0' && *fmt <= '9') {
+ frac_width = frac_width * 10 + *fmt++ - '0';
+ }
+
+ break;
+
+ case '*':
+ d = (gint)va_arg (args, gint);
+ if (G_UNLIKELY (d < 0)) {
+ msg_err ("critical error: size is less than 0");
+ return 0;
+ }
+ slen = (glong)d;
+ fmt++;
+ continue;
+
+ default:
+ break;
+ }
+
+ break;
+ }
+
+
+ switch (*fmt) {
+
+ case 'V':
+ v = va_arg (args, f_str_t *);
+ RSPAMD_PRINTF_APPEND (v->begin, v->len);
+
+ continue;
+
+ case 'v':
+ gs = va_arg (args, GString *);
+ RSPAMD_PRINTF_APPEND (gs->str, gs->len);
+
+ continue;
+
+ case 's':
+ p = va_arg (args, gchar *);
+ if (p == NULL) {
+ p = "(NULL)";
+ }
+
+ if (slen == -1) {
+ /* NULL terminated string */
+ slen = strlen (p);
+ }
+
+ RSPAMD_PRINTF_APPEND (p, slen);
+
+ continue;
+
+ case 'O':
+ i64 = (gint64) va_arg (args, off_t);
+ sign = 1;
+ break;
+
+ case 'P':
+ i64 = (gint64) va_arg (args, pid_t);
+ sign = 1;
+ break;
+
+ case 'T':
+ i64 = (gint64) va_arg (args, time_t);
+ sign = 1;
+ break;
+
+ case 'z':
+ if (sign) {
+ i64 = (gint64) va_arg (args, ssize_t);
+ } else {
+ ui64 = (guint64) va_arg (args, size_t);
+ }
+ break;
+
+ case 'd':
+ if (sign) {
+ i64 = (gint64) va_arg (args, gint);
+ } else {
+ ui64 = (guint64) va_arg (args, guint);
+ }
+ break;
+
+ case 'l':
+ if (sign) {
+ i64 = (gint64) va_arg(args, glong);
+ } else {
+ ui64 = (guint64) va_arg(args, gulong);
+ }
+ break;
+
+ case 'D':
+ if (sign) {
+ i64 = (gint64) va_arg(args, gint32);
+ } else {
+ ui64 = (guint64) va_arg(args, guint32);
+ }
+ break;
+
+ case 'L':
+ if (sign) {
+ i64 = va_arg (args, gint64);
+ } else {
+ ui64 = va_arg (args, guint64);
+ }
+ break;
+
+
+ case 'f':
+ case 'F':
+ if (*fmt == 'f') {
+ f = (long double) va_arg (args, double);
+ }
+ else {
+ f = (long double) va_arg (args, long double);
+ }
+ p = numbuf;
+ last = p + sizeof (numbuf);
+ if (f < 0) {
+ *p++ = '-';
+ f = -f;
+ }
+
+ ui64 = (gint64) f;
+
+ p = rspamd_sprintf_num (p, last, ui64, zero, 0, width);
+
+ if (frac_width) {
+
+ if (p < last) {
+ *p++ = '.';
+ }
+
+ scale = 1.0;
+
+ for (i = 0; i < frac_width; i++) {
+ scale *= 10.0;
+ }
+
+ /*
+ * (gint64) cast is required for msvc6:
+ * it can not convert guint64 to double
+ */
+ ui64 = (guint64) ((f - (gint64) ui64) * scale);
+
+ p = rspamd_sprintf_num (p, last, ui64, '0', 0, frac_width);
+ }
+
+ slen = p - numbuf;
+ RSPAMD_PRINTF_APPEND (numbuf, slen);
+
+ continue;
+
+ case 'g':
+ case 'G':
+ if (*fmt == 'g') {
+ f = (long double) va_arg (args, double);
+ }
+ else {
+ f = (long double) va_arg (args, long double);
+ }
+
+ g_ascii_formatd (numbuf, sizeof (numbuf), "%g", (double)f);
+ slen = strlen (numbuf);
+ RSPAMD_PRINTF_APPEND (numbuf, slen);
+
+ continue;
+
+ case 'b':
+ bv = (gboolean) va_arg (args, double);
+ RSPAMD_PRINTF_APPEND (bv ? "true" : "false", bv ? 4 : 5);
+
+ continue;
+
+ case 'p':
+ ui64 = (uintptr_t) va_arg (args, void *);
+ hex = 2;
+ sign = 0;
+ zero = '0';
+ width = sizeof (void *) * 2;
+ break;
+
+ case 'c':
+ c = va_arg (args, gint);
+ c &= 0xff;
+ RSPAMD_PRINTF_APPEND (&c, 1);
+
+ continue;
+
+ case 'Z':
+ c = '\0';
+ RSPAMD_PRINTF_APPEND (&c, 1);
+
+ continue;
+
+ case 'N':
+ c = LF;
+ RSPAMD_PRINTF_APPEND (&c, 1);
+
+ continue;
+
+ case '%':
+ c = '%';
+ RSPAMD_PRINTF_APPEND (&c, 1);
+
+ continue;
+
+ default:
+ c = *fmt;
+ RSPAMD_PRINTF_APPEND (&c, 1);
+
+ continue;
+ }
+
+ /* Print number */
+ p = numbuf;
+ last = p + sizeof (numbuf);
+ if (sign) {
+ if (i64 < 0) {
+ *p++ = '-';
+ ui64 = (guint64) -i64;
+
+ } else {
+ ui64 = (guint64) i64;
+ }
+ }
+
+ if (!humanize) {
+ p = rspamd_sprintf_num (p, last, ui64, zero, hex, width);
+ }
+ else {
+ p = rspamd_humanize_number (p, last, ui64, bytes);
+ }
+ slen = p - numbuf;
+ RSPAMD_PRINTF_APPEND (numbuf, slen);
+
+ } else {
+ fmt++;
+ }
+ }
+
+ /* Finish buffer */
+ if (fmt > buf_start) {
+ wr = func (buf_start, fmt - buf_start, apd);
+ if (wr <= 0) {
+ goto oob;
+ }
+ written += wr;
+ }
+
+oob:
+ return written;
+}
+
diff --git a/src/libutil/printf.h b/src/libutil/printf.h
new file mode 100644
index 000000000..a4e03791d
--- /dev/null
+++ b/src/libutil/printf.h
@@ -0,0 +1,75 @@
+/* Copyright (c) 2010, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Rambler BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#ifndef PRINTF_H_
+#define PRINTF_H_
+
+#include "config.h"
+
+/*
+ * supported formats:
+ * %[0][width][x][X]O off_t
+ * %[0][width]T time_t
+ * %[0][width][u][x|X|h|H]z ssize_t/size_t
+ * %[0][width][u][x|X|h|H]d gint/guint
+ * %[0][width][u][x|X|h|H]l long
+ * %[0][width][u][x|X|h|H]D gint32/guint32
+ * %[0][width][u][x|X|h|H]L gint64/guint64
+ * %[0][width][.width]f double
+ * %[0][width][.width]F long double
+ * %[0][width][.width]g double
+ * %[0][width][.width]G long double
+ * %b boolean (true or false)
+ * %P pid_t
+ * %r rlim_t
+ * %p void *
+ * %V f_str_t *
+ * %v GString *
+ * %s null-terminated string
+ * %*s length and string
+ * %Z '\0'
+ * %N '\n'
+ * %c gchar
+ * %% %
+ *
+ */
+
+/**
+ * Callback used for common printf operations
+ * @param buf buffer to append
+ * @param buflen lenght of the buffer
+ * @param ud opaque pointer
+ * @return number of characters written
+ */
+typedef glong (*rspamd_printf_append_func)(const gchar *buf, glong buflen, gpointer ud);
+
+glong rspamd_fprintf (FILE *f, const gchar *fmt, ...);
+glong rspamd_log_fprintf (FILE *f, const gchar *fmt, ...);
+glong rspamd_snprintf (gchar *buf, glong max, const gchar *fmt, ...);
+gchar *rspamd_vsnprintf (gchar *buf, glong max, const gchar *fmt, va_list args);
+glong rspamd_printf_gstring (GString *s, const gchar *fmt, ...);
+
+glong rspamd_vprintf_common (rspamd_printf_append_func func, gpointer apd, const gchar *fmt, va_list args);
+
+#endif /* PRINTF_H_ */
diff --git a/src/libutil/radix.c b/src/libutil/radix.c
new file mode 100644
index 000000000..1a05db178
--- /dev/null
+++ b/src/libutil/radix.c
@@ -0,0 +1,311 @@
+/*
+ * Copyright (c) 2009-2012, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include "config.h"
+#include "radix.h"
+#include "mem_pool.h"
+
+static void *radix_alloc (radix_tree_t * tree);
+
+radix_tree_t *
+radix_tree_create (void)
+{
+ radix_tree_t *tree;
+
+ tree = g_malloc (sizeof (radix_tree_t));
+ if (tree == NULL) {
+ return NULL;
+ }
+
+ tree->pool = rspamd_mempool_new (rspamd_mempool_suggest_size ());
+ tree->size = 0;
+
+ tree->root = radix_alloc (tree);
+ if (tree->root == NULL) {
+ return NULL;
+ }
+
+ tree->root->right = NULL;
+ tree->root->left = NULL;
+ tree->root->parent = NULL;
+ tree->root->value = RADIX_NO_VALUE;
+
+ return tree;
+}
+
+enum radix_insert_type {
+ RADIX_INSERT,
+ RADIX_ADD,
+ RADIX_REPLACE
+};
+
+static uintptr_t
+radix32tree_insert_common (radix_tree_t * tree, guint32 key, guint32 mask, uintptr_t value, enum radix_insert_type type)
+{
+ guint32 bit;
+ radix_node_t *node, *next;
+
+ bit = 0x80000000;
+
+ node = tree->root;
+ next = tree->root;
+ /* Find a place in trie to insert */
+ while (bit & mask) {
+ if (key & bit) {
+ next = node->right;
+ }
+ else {
+ next = node->left;
+ }
+
+ if (next == NULL) {
+ break;
+ }
+
+ bit >>= 1;
+ node = next;
+ }
+
+ if (next) {
+ if (node->value != RADIX_NO_VALUE) {
+ /* Value was found, switch on insert type */
+ switch (type) {
+ case RADIX_INSERT:
+ return 1;
+ case RADIX_ADD:
+ node->value += value;
+ return value;
+ case RADIX_REPLACE:
+ node->value = value;
+ return 1;
+ }
+ }
+
+ node->value = value;
+ node->key = key;
+ return 0;
+ }
+ /* Inserting value in trie creating all path components */
+ while (bit & mask) {
+ next = radix_alloc (tree);
+ if (next == NULL) {
+ return -1;
+ }
+
+ next->right = NULL;
+ next->left = NULL;
+ next->parent = node;
+ next->value = RADIX_NO_VALUE;
+
+ if (key & bit) {
+ node->right = next;
+
+ }
+ else {
+ node->left = next;
+ }
+
+ bit >>= 1;
+ node = next;
+ }
+
+ node->value = value;
+ node->key = key;
+
+ return 0;
+}
+
+gint
+radix32tree_insert (radix_tree_t *tree, guint32 key, guint32 mask, uintptr_t value)
+{
+ return (gint)radix32tree_insert_common (tree, key, mask, value, RADIX_INSERT);
+}
+
+uintptr_t
+radix32tree_add (radix_tree_t *tree, guint32 key, guint32 mask, uintptr_t value)
+{
+ return radix32tree_insert_common (tree, key, mask, value, RADIX_ADD);
+}
+
+gint
+radix32tree_replace (radix_tree_t *tree, guint32 key, guint32 mask, uintptr_t value)
+{
+ return (gint)radix32tree_insert_common (tree, key, mask, value, RADIX_REPLACE);
+}
+
+/*
+ * per recursion step:
+ * ptr + ptr + ptr + gint = 4 words
+ * result = 1 word
+ * 5 words total in stack
+ */
+static gboolean
+radix_recurse_nodes (radix_node_t *node, radix_tree_traverse_func func, void *user_data, gint level)
+{
+ if (node->left) {
+ if (radix_recurse_nodes (node->left, func, user_data, level + 1)) {
+ return TRUE;
+ }
+ }
+
+ if (node->value != RADIX_NO_VALUE) {
+ if (func (node->key, level, node->value, user_data)) {
+ return TRUE;
+ }
+ }
+
+ if (node->right) {
+ if (radix_recurse_nodes (node->right, func, user_data, level + 1)) {
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+void
+radix32tree_traverse (radix_tree_t *tree, radix_tree_traverse_func func, void *user_data)
+{
+ radix_recurse_nodes (tree->root, func, user_data, 0);
+}
+
+
+gint
+radix32tree_delete (radix_tree_t * tree, guint32 key, guint32 mask)
+{
+ guint32 bit;
+ radix_node_t *node;
+
+ bit = 0x80000000;
+ node = tree->root;
+
+ while (node && (bit & mask)) {
+ if (key & bit) {
+ node = node->right;
+
+ }
+ else {
+ node = node->left;
+ }
+
+ bit >>= 1;
+ }
+
+ if (node == NULL || node->parent == NULL) {
+ return -1;
+ }
+
+ if (node->right || node->left) {
+ if (node->value != RADIX_NO_VALUE) {
+ node->value = RADIX_NO_VALUE;
+ return 0;
+ }
+
+ return -1;
+ }
+
+ for (;;) {
+ if (node->parent->right == node) {
+ node->parent->right = NULL;
+
+ }
+ else {
+ node->parent->left = NULL;
+ }
+
+ node = node->parent;
+
+ if (node->right || node->left) {
+ break;
+ }
+
+ if (node->value != RADIX_NO_VALUE) {
+ break;
+ }
+
+ if (node->parent == NULL) {
+ break;
+ }
+ }
+
+ return 0;
+}
+
+
+uintptr_t
+radix32tree_find (radix_tree_t * tree, guint32 key)
+{
+ guint32 bit;
+ uintptr_t value;
+ radix_node_t *node;
+
+ bit = 0x80000000;
+ value = RADIX_NO_VALUE;
+ node = tree->root;
+
+ while (node) {
+ if (node->value != RADIX_NO_VALUE) {
+ value = node->value;
+ }
+
+ if (key & bit) {
+ node = node->right;
+
+ }
+ else {
+ node = node->left;
+ }
+
+ bit >>= 1;
+ }
+
+ return value;
+}
+
+
+static void *
+radix_alloc (radix_tree_t * tree)
+{
+ gchar *p;
+
+ p = rspamd_mempool_alloc (tree->pool, sizeof (radix_node_t));
+
+ tree->size += sizeof (radix_node_t);
+
+ return p;
+}
+
+void
+radix_tree_free (radix_tree_t * tree)
+{
+
+ g_return_if_fail (tree != NULL);
+ rspamd_mempool_delete (tree->pool);
+ g_free (tree);
+}
+
+/*
+ * vi:ts=4
+ */
diff --git a/src/libutil/radix.h b/src/libutil/radix.h
new file mode 100644
index 000000000..4cc2873c7
--- /dev/null
+++ b/src/libutil/radix.h
@@ -0,0 +1,82 @@
+#ifndef RADIX_H
+#define RADIX_H
+
+#include "config.h"
+#include "mem_pool.h"
+
+#define RADIX_NO_VALUE (uintptr_t)-1
+
+typedef struct radix_node_s radix_node_t;
+
+struct radix_node_s {
+ radix_node_t *right;
+ radix_node_t *left;
+ radix_node_t *parent;
+ uintptr_t value;
+ guint32 key;
+};
+
+
+typedef struct {
+ radix_node_t *root;
+ size_t size;
+ rspamd_mempool_t *pool;
+} radix_tree_t;
+
+typedef gboolean (*radix_tree_traverse_func)(guint32 key, guint32 mask, uintptr_t value, void *user_data);
+
+/**
+ * Create new radix tree
+ */
+radix_tree_t *radix_tree_create (void);
+
+/**
+ * Insert value to radix tree
+ * returns: 1 if value already exists
+ * 0 if operation was successfull
+ * -1 if there was some error
+ */
+gint radix32tree_insert (radix_tree_t *tree, guint32 key, guint32 mask, uintptr_t value);
+
+/**
+ * Add value to radix tree or insert it if value does not exists
+ * returns: value if value already exists and was added
+ * 0 if value was inserted
+ * -1 if there was some error
+ */
+uintptr_t radix32tree_add (radix_tree_t *tree, guint32 key, guint32 mask, uintptr_t value);
+
+/**
+ * Replace value in radix tree or insert it if value does not exists
+ * returns: 1 if value already exists and was replaced
+ * 0 if value was inserted
+ * -1 if there was some error
+ */
+gint radix32tree_replace (radix_tree_t *tree, guint32 key, guint32 mask, uintptr_t value);
+
+/**
+ * Delete value from radix tree
+ * returns: 1 if value does not exist
+ * 0 if value was deleted
+ * -1 if there was some error
+ */
+gint radix32tree_delete (radix_tree_t *tree, guint32 key, guint32 mask);
+
+/**
+ * Find value in radix tree
+ * returns: value if value was found
+ * RADIX_NO_VALUE if value was not found
+ */
+uintptr_t radix32tree_find (radix_tree_t *tree, guint32 key);
+
+/**
+ * Traverse via the whole tree calling specified callback
+ */
+void radix32tree_traverse (radix_tree_t *tree, radix_tree_traverse_func func, void *user_data);
+
+/**
+ * Frees radix tree
+ */
+void radix_tree_free (radix_tree_t *tree);
+
+#endif
diff --git a/src/libutil/rrd.c b/src/libutil/rrd.c
new file mode 100644
index 000000000..a0e21eaed
--- /dev/null
+++ b/src/libutil/rrd.c
@@ -0,0 +1,1015 @@
+/* Copyright (c) 2010-2012, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "rrd.h"
+#include "util.h"
+
+static GQuark
+rrd_error_quark (void)
+{
+ return g_quark_from_static_string ("rrd-error");
+}
+
+/**
+ * Convert rrd dst type from string to numeric value
+ */
+enum rrd_dst_type
+rrd_dst_from_string (const gchar *str)
+{
+ if (g_ascii_strcasecmp (str, "counter") == 0) {
+ return RRD_DST_COUNTER;
+ }
+ else if (g_ascii_strcasecmp (str, "absolute") == 0) {
+ return RRD_DST_ABSOLUTE;
+ }
+ else if (g_ascii_strcasecmp (str, "gauge") == 0) {
+ return RRD_DST_GAUGE;
+ }
+ else if (g_ascii_strcasecmp (str, "cdef") == 0) {
+ return RRD_DST_CDEF;
+ }
+ else if (g_ascii_strcasecmp (str, "derive") == 0) {
+ return RRD_DST_DERIVE;
+ }
+ return -1;
+}
+
+/**
+ * Convert numeric presentation of dst to string
+ */
+const gchar*
+rrd_dst_to_string (enum rrd_dst_type type)
+{
+ switch (type) {
+ case RRD_DST_COUNTER:
+ return "COUNTER";
+ case RRD_DST_ABSOLUTE:
+ return "ABSOLUTE";
+ case RRD_DST_GAUGE:
+ return "GAUGE";
+ case RRD_DST_CDEF:
+ return "CDEF";
+ case RRD_DST_DERIVE:
+ return "DERIVE";
+ default:
+ return "U";
+ }
+
+ return "U";
+}
+
+/**
+ * Convert rrd consolidation function type from string to numeric value
+ */
+enum rrd_cf_type
+rrd_cf_from_string (const gchar *str)
+{
+ if (g_ascii_strcasecmp (str, "average") == 0) {
+ return RRD_CF_AVERAGE;
+ }
+ else if (g_ascii_strcasecmp (str, "minimum") == 0) {
+ return RRD_CF_MINIMUM;
+ }
+ else if (g_ascii_strcasecmp (str, "maximum") == 0) {
+ return RRD_CF_MAXIMUM;
+ }
+ else if (g_ascii_strcasecmp (str, "last") == 0) {
+ return RRD_CF_LAST;
+ }
+ /* XXX: add other CF functions supported by rrd */
+
+ return -1;
+}
+
+/**
+ * Convert numeric presentation of cf to string
+ */
+const gchar*
+rrd_cf_to_string (enum rrd_cf_type type)
+{
+ switch (type) {
+ case RRD_CF_AVERAGE:
+ return "AVERAGE";
+ case RRD_CF_MINIMUM:
+ return "MINIMUM";
+ case RRD_CF_MAXIMUM:
+ return "MAXIMUM";
+ case RRD_CF_LAST:
+ return "LAST";
+ default:
+ return "U";
+ }
+
+ /* XXX: add other CF functions supported by rrd */
+
+ return "U";
+}
+
+void
+rrd_make_default_rra (const gchar *cf_name, gulong pdp_cnt, gulong rows, struct rrd_rra_def *rra)
+{
+ rra->pdp_cnt = pdp_cnt;
+ rra->row_cnt = rows;
+ rspamd_strlcpy (rra->cf_nam, cf_name, sizeof (rra->cf_nam));
+ memset (rra->par, 0, sizeof (rra->par));
+ rra->par[RRA_cdp_xff_val].dv = 0.5;
+}
+
+void
+rrd_make_default_ds (const gchar *name, gulong pdp_step, struct rrd_ds_def *ds)
+{
+ rspamd_strlcpy (ds->ds_nam, name, sizeof (ds->ds_nam));
+ rspamd_strlcpy (ds->dst, "COUNTER", sizeof (ds->dst));
+ memset (ds->par, 0, sizeof (ds->par));
+ ds->par[RRD_DS_mrhb_cnt].lv = pdp_step * 2;
+ ds->par[RRD_DS_min_val].dv = NAN;
+ ds->par[RRD_DS_max_val].dv = NAN;
+}
+
+/**
+ * Check rrd file for correctness (size, cookies, etc)
+ */
+static gboolean
+rspamd_rrd_check_file (const gchar *filename, gboolean need_data, GError **err)
+{
+ gint fd, i;
+ struct stat st;
+ struct rrd_file_head head;
+ struct rrd_rra_def rra;
+ gint head_size;
+
+ fd = open (filename, O_RDWR);
+ if (fd == -1) {
+ g_set_error (err, rrd_error_quark (), errno, "rrd open error: %s", strerror (errno));
+ return FALSE;
+ }
+
+ if (fstat (fd, &st) == -1) {
+ g_set_error (err, rrd_error_quark (), errno, "rrd stat error: %s", strerror (errno));
+ close (fd);
+ return FALSE;
+ }
+ if (st.st_size < (goffset)sizeof (struct rrd_file_head)) {
+ /* We have trimmed file */
+ g_set_error (err, rrd_error_quark (), EINVAL, "rrd size is bad: %ud", (guint)st.st_size);
+ close (fd);
+ return FALSE;
+ }
+
+ /* Try to read header */
+ if (read (fd, &head, sizeof (head)) != sizeof (head)) {
+ g_set_error (err, rrd_error_quark (), errno, "rrd read head error: %s", strerror (errno));
+ close (fd);
+ return FALSE;
+ }
+ /* Check magic */
+ if (memcmp (head.cookie, RRD_COOKIE, sizeof (head.cookie)) != 0 ||
+ memcmp (head.version, RRD_VERSION, sizeof (head.version)) != 0 ||
+ head.float_cookie != RRD_FLOAT_COOKIE) {
+ g_set_error (err, rrd_error_quark (), EINVAL, "rrd head cookies error: %s", strerror (errno));
+ close (fd);
+ return FALSE;
+ }
+ /* Check for other params */
+ if (head.ds_cnt <= 0 || head.rra_cnt <= 0) {
+ g_set_error (err, rrd_error_quark (), EINVAL, "rrd head cookies error: %s", strerror (errno));
+ close (fd);
+ return FALSE;
+ }
+ /* Now we can calculate the overall size of rrd */
+ head_size = sizeof (struct rrd_file_head) +
+ sizeof (struct rrd_ds_def) * head.ds_cnt +
+ sizeof (struct rrd_rra_def) * head.rra_cnt +
+ sizeof (struct rrd_live_head) +
+ sizeof (struct rrd_pdp_prep) * head.ds_cnt +
+ sizeof (struct rrd_cdp_prep) * head.ds_cnt * head.rra_cnt +
+ sizeof (struct rrd_rra_ptr) * head.rra_cnt;
+ if (st.st_size < (goffset)head_size) {
+ g_set_error (err, rrd_error_quark (), errno, "rrd file seems to have stripped header: %d", head_size);
+ close (fd);
+ return FALSE;
+ }
+
+ if (need_data) {
+ /* Now check rra */
+ if (lseek (fd, sizeof (struct rrd_ds_def) * head.ds_cnt, SEEK_CUR) == -1) {
+ g_set_error (err, rrd_error_quark (), errno, "rrd head lseek error: %s", strerror (errno));
+ close (fd);
+ return FALSE;
+ }
+ for (i = 0; i < (gint)head.rra_cnt; i ++) {
+ if (read (fd, &rra, sizeof (rra)) != sizeof (rra)) {
+ g_set_error (err, rrd_error_quark (), errno, "rrd read rra error: %s", strerror (errno));
+ close (fd);
+ return FALSE;
+ }
+ head_size += rra.row_cnt * head.ds_cnt * sizeof (gdouble);
+ }
+
+ if (st.st_size != head_size) {
+ g_set_error (err, rrd_error_quark (), EINVAL, "rrd file seems to have incorrect size: %d, must be %d", (gint)st.st_size, head_size);
+ close (fd);
+ return FALSE;
+ }
+ }
+
+ close (fd);
+ return TRUE;
+}
+
+/**
+ * Adjust pointers in mmapped rrd file
+ * @param file
+ */
+static void
+rspamd_rrd_adjust_pointers (struct rspamd_rrd_file *file, gboolean completed)
+{
+ guint8 *ptr;
+
+ ptr = file->map;
+ file->stat_head = (struct rrd_file_head *)ptr;
+ ptr += sizeof (struct rrd_file_head);
+ file->ds_def = (struct rrd_ds_def *)ptr;
+ ptr += sizeof (struct rrd_ds_def) * file->stat_head->ds_cnt;
+ file->rra_def = (struct rrd_rra_def *)ptr;
+ ptr += sizeof (struct rrd_rra_def) * file->stat_head->rra_cnt;
+ file->live_head = (struct rrd_live_head *)ptr;
+ ptr += sizeof (struct rrd_live_head);
+ file->pdp_prep = (struct rrd_pdp_prep *)ptr;
+ ptr += sizeof (struct rrd_pdp_prep) * file->stat_head->ds_cnt;
+ file->cdp_prep = (struct rrd_cdp_prep *)ptr;
+ ptr += sizeof (struct rrd_cdp_prep) * file->stat_head->rra_cnt * file->stat_head->ds_cnt;
+ file->rra_ptr = (struct rrd_rra_ptr *)ptr;
+ if (completed) {
+ ptr += sizeof (struct rrd_rra_ptr) * file->stat_head->rra_cnt;
+ file->rrd_value = (gdouble *)ptr;
+ }
+ else {
+ file->rrd_value = NULL;
+ }
+}
+
+/**
+ * Open completed or incompleted rrd file
+ * @param filename
+ * @param completed
+ * @param err
+ * @return
+ */
+static struct rspamd_rrd_file*
+rspamd_rrd_open_common (const gchar *filename, gboolean completed, GError **err)
+{
+ struct rspamd_rrd_file *new;
+ gint fd;
+ struct stat st;
+
+ if (!rspamd_rrd_check_file (filename, completed, err)) {
+ return NULL;
+ }
+
+ new = g_slice_alloc0 (sizeof (struct rspamd_rrd_file));
+
+ if (new == NULL) {
+ g_set_error (err, rrd_error_quark (), ENOMEM, "not enough memory");
+ return NULL;
+ }
+
+ /* Open file */
+ fd = open (filename, O_RDWR);
+ if (fd == -1) {
+ g_set_error (err, rrd_error_quark (), errno, "rrd open error: %s", strerror (errno));
+ return FALSE;
+ }
+
+ if (fstat (fd, &st) == -1) {
+ g_set_error (err, rrd_error_quark (), errno, "rrd stat error: %s", strerror (errno));
+ close (fd);
+ return FALSE;
+ }
+ /* Mmap file */
+ new->size = st.st_size;
+ if ((new->map = mmap (NULL, st.st_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0)) == MAP_FAILED) {
+ close (fd);
+ g_set_error (err, rrd_error_quark (), ENOMEM, "mmap failed: %s", strerror (errno));
+ g_slice_free1 (sizeof (struct rspamd_rrd_file), new);
+ return NULL;
+ }
+
+ close (fd);
+
+ /* Adjust pointers */
+ rspamd_rrd_adjust_pointers (new, completed);
+
+ /* Mark it as finalized */
+ new->finalized = completed;
+
+ new->filename = g_strdup (filename);
+
+ return new;
+}
+
+/**
+ * Open (and mmap) existing RRD file
+ * @param filename path
+ * @param err error pointer
+ * @return rrd file structure
+ */
+struct rspamd_rrd_file*
+rspamd_rrd_open (const gchar *filename, GError **err)
+{
+ return rspamd_rrd_open_common (filename, TRUE, err);
+}
+
+/**
+ * Create basic header for rrd file
+ * @param filename file path
+ * @param ds_count number of data sources
+ * @param rra_count number of round robin archives
+ * @param pdp_step step of primary data points
+ * @param err error pointer
+ * @return TRUE if file has been created
+ */
+struct rspamd_rrd_file*
+rspamd_rrd_create (const gchar *filename, gulong ds_count, gulong rra_count, gulong pdp_step, GError **err)
+{
+ struct rspamd_rrd_file *new;
+ struct rrd_file_head head;
+ struct rrd_ds_def ds;
+ struct rrd_rra_def rra;
+ struct rrd_live_head lh;
+ struct rrd_pdp_prep pdp;
+ struct rrd_cdp_prep cdp;
+ struct rrd_rra_ptr rra_ptr;
+ gint fd;
+ guint i, j;
+ struct timeval tv;
+
+ /* Open file */
+ fd = open (filename, O_RDWR | O_CREAT | O_TRUNC, 0644);
+ if (fd == -1) {
+ g_set_error (err, rrd_error_quark (), errno, "rrd create error: %s", strerror (errno));
+ return NULL;
+ }
+
+ /* Fill header */
+ memset (&head, 0, sizeof (head));
+ head.rra_cnt = rra_count;
+ head.ds_cnt = ds_count;
+ head.pdp_step = pdp_step;
+ memcpy (head.cookie, RRD_COOKIE, sizeof (head.cookie));
+ memcpy (head.version, RRD_VERSION, sizeof (head.version));
+ head.float_cookie = RRD_FLOAT_COOKIE;
+
+ if (write (fd, &head, sizeof (head)) != sizeof (head)) {
+ close (fd);
+ g_set_error (err, rrd_error_quark (), errno, "rrd write error: %s", strerror (errno));
+ return NULL;
+ }
+
+ /* Fill DS section */
+ memset (&ds.ds_nam, 0, sizeof (ds.ds_nam));
+ memcpy (&ds.dst, "COUNTER", sizeof ("COUNTER"));
+ memset (&ds.par, 0, sizeof (ds.par));
+ for (i = 0; i < ds_count; i ++) {
+ if (write (fd, &ds, sizeof (ds)) != sizeof (ds)) {
+ close (fd);
+ g_set_error (err, rrd_error_quark (), errno, "rrd write error: %s", strerror (errno));
+ return NULL;
+ }
+ }
+
+ /* Fill RRA section */
+ memcpy (&rra.cf_nam, "AVERAGE", sizeof ("AVERAGE"));
+ rra.pdp_cnt = 1;
+ memset (&rra.par, 0, sizeof (rra.par));
+ for (i = 0; i < rra_count; i ++) {
+ if (write (fd, &rra, sizeof (rra)) != sizeof (rra)) {
+ close (fd);
+ g_set_error (err, rrd_error_quark (), errno, "rrd write error: %s", strerror (errno));
+ return NULL;
+ }
+ }
+
+ /* Fill live header */
+ gettimeofday (&tv, NULL);
+ lh.last_up = tv.tv_sec;
+ lh.last_up_usec = tv.tv_usec;
+
+ if (write (fd, &lh, sizeof (lh)) != sizeof (lh)) {
+ close (fd);
+ g_set_error (err, rrd_error_quark (), errno, "rrd write error: %s", strerror (errno));
+ return NULL;
+ }
+
+ /* Fill pdp prep */
+ memcpy (&pdp.last_ds, "U", sizeof ("U"));
+ memset (&pdp.scratch, 0, sizeof (pdp.scratch));
+ pdp.scratch[PDP_val].dv = 0.;
+ pdp.scratch[PDP_unkn_sec_cnt].lv = 0;
+ for (i = 0; i < ds_count; i ++) {
+ if (write (fd, &pdp, sizeof (pdp)) != sizeof (pdp)) {
+ close (fd);
+ g_set_error (err, rrd_error_quark (), errno, "rrd write error: %s", strerror (errno));
+ return NULL;
+ }
+ }
+
+ /* Fill cdp prep */
+ memset (&cdp.scratch, 0, sizeof (cdp.scratch));
+ cdp.scratch[CDP_val].dv = NAN;
+ for (i = 0; i < rra_count; i ++) {
+ cdp.scratch[CDP_unkn_pdp_cnt].lv = 0;
+ for (j = 0; j < ds_count; j ++) {
+ if (write (fd, &cdp, sizeof (cdp)) != sizeof (cdp)) {
+ close (fd);
+ g_set_error (err, rrd_error_quark (), errno, "rrd write error: %s", strerror (errno));
+ return NULL;
+ }
+ }
+ }
+
+ /* Set row pointers */
+ memset (&rra_ptr, 0, sizeof (rra_ptr));
+ for (i = 0; i < rra_count; i ++) {
+ if (write (fd, &rra_ptr, sizeof (rra_ptr)) != sizeof (rra_ptr)) {
+ close (fd);
+ g_set_error (err, rrd_error_quark (), errno, "rrd write error: %s", strerror (errno));
+ return NULL;
+ }
+ }
+
+ close (fd);
+ new = rspamd_rrd_open_common (filename, FALSE, err);
+
+ return new;
+}
+
+/**
+ * Add data sources to rrd file
+ * @param filename path to file
+ * @param ds array of struct rrd_ds_def
+ * @param err error pointer
+ * @return TRUE if data sources were added
+ */
+gboolean
+rspamd_rrd_add_ds (struct rspamd_rrd_file *file, GArray *ds, GError **err)
+{
+
+ if (file == NULL || file->stat_head->ds_cnt * sizeof (struct rrd_ds_def) != ds->len) {
+ g_set_error (err, rrd_error_quark (), EINVAL, "rrd add ds failed: wrong arguments");
+ return FALSE;
+ }
+
+ /* Straightforward memcpy */
+ memcpy (file->ds_def, ds->data, ds->len);
+
+ return TRUE;
+}
+
+/**
+ * Add round robin archives to rrd file
+ * @param filename path to file
+ * @param ds array of struct rrd_rra_def
+ * @param err error pointer
+ * @return TRUE if archives were added
+ */
+gboolean
+rspamd_rrd_add_rra (struct rspamd_rrd_file *file, GArray *rra, GError **err)
+{
+ if (file == NULL || file->stat_head->rra_cnt * sizeof (struct rrd_rra_def) != rra->len) {
+ g_set_error (err, rrd_error_quark (), EINVAL, "rrd add rra failed: wrong arguments");
+ return FALSE;
+ }
+
+ /* Straightforward memcpy */
+ memcpy (file->rra_def, rra->data, rra->len);
+
+ return TRUE;
+}
+
+/**
+ * Finalize rrd file header and initialize all RRA in the file
+ * @param filename file path
+ * @param err error pointer
+ * @return TRUE if rrd file is ready for use
+ */
+gboolean
+rspamd_rrd_finalize (struct rspamd_rrd_file *file, GError **err)
+{
+ gint fd;
+ guint i;
+ gint count = 0;
+ gdouble vbuf[1024];
+ struct stat st;
+
+ if (file == NULL || file->filename == NULL) {
+ g_set_error (err, rrd_error_quark (), EINVAL, "rrd add rra failed: wrong arguments");
+ return FALSE;
+ }
+
+ fd = open (file->filename, O_RDWR);
+ if (fd == -1) {
+ g_set_error (err, rrd_error_quark (), errno, "rrd open error: %s", strerror (errno));
+ return FALSE;
+ }
+
+ if (lseek (fd, 0, SEEK_END) == -1) {
+ g_set_error (err, rrd_error_quark (), errno, "rrd seek error: %s", strerror (errno));
+ close (fd);
+ return FALSE;
+ }
+
+ /* Adjust CDP */
+ for (i = 0; i < file->stat_head->rra_cnt; i ++) {
+ file->cdp_prep->scratch[CDP_unkn_pdp_cnt].lv = 0;
+ /* Randomize row pointer */
+ file->rra_ptr->cur_row = g_random_int () % file->rra_def[i].row_cnt;
+ /* Calculate values count */
+ count += file->rra_def[i].row_cnt * file->stat_head->ds_cnt;
+ }
+
+ munmap (file->map, file->size);
+ /* Write values */
+ for (i = 0; i < G_N_ELEMENTS (vbuf); i ++) {
+ vbuf[i] = NAN;
+ }
+
+ while (count > 0) {
+ /* Write values in buffered matter */
+ if (write (fd, vbuf, MIN ((gint)G_N_ELEMENTS (vbuf), count) * sizeof (gdouble)) == -1) {
+ g_set_error (err, rrd_error_quark (), errno, "rrd write error: %s", strerror (errno));
+ close (fd);
+ return FALSE;
+ }
+ count -= G_N_ELEMENTS (vbuf);
+ }
+
+ if (fstat (fd, &st) == -1) {
+ g_set_error (err, rrd_error_quark (), errno, "rrd stat error: %s", strerror (errno));
+ close (fd);
+ return FALSE;
+ }
+
+ /* Mmap again */
+ file->size = st.st_size;
+ if ((file->map = mmap (NULL, st.st_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0)) == MAP_FAILED) {
+ close (fd);
+ g_set_error (err, rrd_error_quark (), ENOMEM, "mmap failed: %s", strerror (errno));
+ g_slice_free1 (sizeof (struct rspamd_rrd_file), file);
+ return FALSE;
+ }
+ close (fd);
+ /* Adjust pointers */
+ rspamd_rrd_adjust_pointers (file, TRUE);
+
+ file->finalized = TRUE;
+
+ return TRUE;
+}
+
+/**
+ * Update pdp_prep data
+ * @param file rrd file
+ * @param vals new values
+ * @param pdp_new new pdp array
+ * @param interval time elapsed from the last update
+ * @return
+ */
+static gboolean
+rspamd_rrd_update_pdp_prep (struct rspamd_rrd_file *file, gdouble *vals, gdouble *pdp_new, gdouble interval)
+{
+ guint i;
+ enum rrd_dst_type type;
+
+ for (i = 0; i < file->stat_head->ds_cnt; i ++) {
+ type = rrd_dst_from_string (file->ds_def[i].dst);
+
+ if (file->ds_def[i].par[RRD_DS_mrhb_cnt].lv < interval) {
+ rspamd_strlcpy (file->pdp_prep[i].last_ds, "U", sizeof (file->pdp_prep[i].last_ds));
+ }
+
+ if (file->ds_def[i].par[RRD_DS_mrhb_cnt].lv >= interval) {
+ switch (type) {
+ case RRD_DST_COUNTER:
+ case RRD_DST_DERIVE:
+ if (file->pdp_prep[i].last_ds[0] == 'U') {
+ pdp_new[i] = NAN;
+ }
+ else {
+ pdp_new[i] = vals[i] - strtod (file->pdp_prep[i].last_ds, NULL);
+ }
+ break;
+ case RRD_DST_GAUGE:
+ pdp_new[i] = vals[i] * interval;
+ break;
+ case RRD_DST_ABSOLUTE:
+ pdp_new[i] = vals[i];
+ break;
+ default:
+ return FALSE;
+ }
+ }
+ else {
+ pdp_new[i] = NAN;
+ }
+ /* Copy value to the last_ds */
+ if (!isnan (vals[i])) {
+ rspamd_snprintf (file->pdp_prep[i].last_ds, sizeof (file->pdp_prep[i].last_ds), "%.4f", vals[i]);
+ }
+ else {
+ file->pdp_prep[i].last_ds[0] = 'U';
+ file->pdp_prep[i].last_ds[1] = '\0';
+ }
+ }
+
+
+ return TRUE;
+}
+
+/**
+ * Update step for this pdp
+ * @param file
+ * @param pdp_new new pdp array
+ * @param pdp_temp temp pdp array
+ * @param interval time till last update
+ * @param pre_int pre interval
+ * @param post_int post intervall
+ * @param pdp_diff time till last pdp update
+ */
+static void
+rspamd_rrd_update_pdp_step (struct rspamd_rrd_file *file, gdouble *pdp_new, gdouble *pdp_temp, gdouble interval,
+ gdouble pre_int, gdouble post_int, gulong pdp_diff)
+{
+ guint i;
+ rrd_value_t *scratch;
+ gulong heartbeat;
+
+
+ for (i = 0; i < file->stat_head->ds_cnt; i ++) {
+ scratch = file->pdp_prep[i].scratch;
+ heartbeat = file->ds_def[i].par[RRD_DS_mrhb_cnt].lv;
+ if (!isnan (pdp_new[i])) {
+ if (isnan (scratch[PDP_val].dv)) {
+ scratch[PDP_val].dv = 0;
+ }
+ scratch[PDP_val].dv += pdp_new[i] / interval * pre_int;
+ pre_int = 0.0;
+ }
+ /* Check interval value for heartbeat for this DS */
+ if ((interval > heartbeat) || (file->stat_head->pdp_step / 2.0 < scratch[PDP_unkn_sec_cnt].lv)) {
+ pdp_temp[i] = NAN;
+ }
+ else {
+ pdp_temp[i] = scratch[PDP_val].dv /
+ ((double) (pdp_diff - scratch[PDP_unkn_sec_cnt].lv) - pre_int);
+ }
+
+ if (isnan (pdp_new[i])) {
+ scratch[PDP_unkn_sec_cnt].lv = floor (post_int);
+ scratch[PDP_val].dv = NAN;
+ } else {
+ scratch[PDP_unkn_sec_cnt].lv = 0;
+ scratch[PDP_val].dv = pdp_new[i] / interval * post_int;
+ }
+ }
+}
+
+/**
+ * Update CDP for this rra
+ * @param file rrd file
+ * @param pdp_steps how much pdp steps elapsed from the last update
+ * @param pdp_offset offset from pdp
+ * @param rra_steps how much steps must be updated for this rra
+ * @param rra_index index of desired rra
+ * @param pdp_temp temporary pdp points
+ */
+static void
+rspamd_rrd_update_cdp (struct rspamd_rrd_file *file, gdouble pdp_steps, gdouble pdp_offset, gulong *rra_steps, gulong rra_index,
+ gdouble *pdp_temp)
+{
+ guint i;
+ struct rrd_rra_def *rra;
+ rrd_value_t *scratch;
+ enum rrd_cf_type cf;
+ gdouble last_cdp, cur_cdp;
+ gulong pdp_in_cdp;
+
+ rra = &file->rra_def[rra_index];
+ cf = rrd_cf_from_string (rra->cf_nam);
+
+ /* Iterate over all DS for this RRA */
+ for (i = 0; i < file->stat_head->ds_cnt; i ++) {
+ /* Get CDP for this RRA and DS */
+ scratch = file->cdp_prep[rra_index * file->stat_head->ds_cnt + i].scratch;
+ if (rra->pdp_cnt > 1) {
+ /* Do we have any CDP to update for this rra ? */
+ if (rra_steps[rra_index] > 0) {
+ if (isnan (pdp_temp[i])) {
+ /* New pdp is nan */
+ /* Increment unknown points count */
+ scratch[CDP_unkn_pdp_cnt].lv += pdp_offset;
+ /* Reset secondary value */
+ scratch[CDP_secondary_val].dv = NAN;
+ }
+ else {
+ scratch[CDP_secondary_val].dv = pdp_temp[i];
+ }
+
+ /* Check XFF for this rra */
+ if (scratch[CDP_unkn_pdp_cnt].lv > rra->pdp_cnt * rra->par[RRA_cdp_xff_val].lv) {
+ /* XFF is reached */
+ scratch[CDP_primary_val].dv = NAN;
+ }
+ else {
+ /* Need to initialize CDP using specified consolidation */
+ switch (cf) {
+ case RRD_CF_AVERAGE:
+ last_cdp = isnan (scratch[CDP_val].dv) ? 0.0 : scratch[CDP_val].dv;
+ cur_cdp = isnan (pdp_temp[i]) ? 0.0 : pdp_temp[i];
+ scratch[CDP_primary_val].dv = (last_cdp + cur_cdp * pdp_offset) / (rra->pdp_cnt - scratch[CDP_unkn_pdp_cnt].lv);
+ break;
+ case RRD_CF_MAXIMUM:
+ last_cdp = isnan (scratch[CDP_val].dv) ? -INFINITY : scratch[CDP_val].dv;
+ cur_cdp = isnan (pdp_temp[i]) ? -INFINITY : pdp_temp[i];
+ scratch[CDP_primary_val].dv = MAX (last_cdp, cur_cdp);
+ break;
+ case RRD_CF_MINIMUM:
+ last_cdp = isnan (scratch[CDP_val].dv) ? INFINITY : scratch[CDP_val].dv;
+ cur_cdp = isnan (pdp_temp[i]) ? INFINITY : pdp_temp[i];
+ scratch[CDP_primary_val].dv = MIN (last_cdp, cur_cdp);
+ break;
+ case RRD_CF_LAST:
+ default:
+ scratch[CDP_primary_val].dv = pdp_temp[i];
+ break;
+ }
+ }
+ /* Init carry of this CDP */
+ pdp_in_cdp = (pdp_steps - pdp_offset) / rra->pdp_cnt;
+ if (pdp_in_cdp == 0 || isnan (pdp_temp[i])) {
+ /* Set overflow */
+ switch (cf) {
+ case RRD_CF_AVERAGE:
+ scratch[CDP_val].dv = 0;
+ break;
+ case RRD_CF_MAXIMUM:
+ scratch[CDP_val].dv = -INFINITY;
+ break;
+ case RRD_CF_MINIMUM:
+ scratch[CDP_val].dv = INFINITY;
+ break;
+ default:
+ scratch[CDP_val].dv = NAN;
+ break;
+ }
+ }
+ else {
+ /* Special carry for average */
+ if (cf == RRD_CF_AVERAGE) {
+ scratch[CDP_val].dv = pdp_temp[i] * pdp_in_cdp;
+ }
+ else {
+ scratch[CDP_val].dv = pdp_temp[i];
+ }
+ }
+ }
+ /* In this case we just need to update cdp_prep for this RRA */
+ else {
+ if (isnan (pdp_temp[i])) {
+ /* Just increase undefined zone */
+ scratch[CDP_unkn_pdp_cnt].lv += pdp_steps;
+ }
+ else {
+ /* Calculate cdp value */
+ last_cdp = scratch[CDP_val].dv;
+ switch (cf) {
+ case RRD_CF_AVERAGE:
+ if (isnan (last_cdp)) {
+ scratch[CDP_val].dv = pdp_temp[i] * pdp_steps;
+ }
+ else {
+ scratch[CDP_val].dv = last_cdp + pdp_temp[i] * pdp_steps;
+ }
+ break;
+ case RRD_CF_MAXIMUM:
+ scratch[CDP_val].dv = MAX (last_cdp, pdp_temp[i]);
+ break;
+ case RRD_CF_MINIMUM:
+ scratch[CDP_val].dv = MIN (last_cdp, pdp_temp[i]);
+ break;
+ case RRD_CF_LAST:
+ scratch[CDP_val].dv = pdp_temp[i];
+ break;
+ default:
+ scratch[CDP_val].dv = NAN;
+ break;
+ }
+ }
+ }
+ }
+ else {
+ /* We have nothing to consolidate, but we may miss some pdp */
+ if (pdp_steps > 2) {
+ /* Just write PDP value */
+ scratch[CDP_primary_val].dv = pdp_temp[i];
+ scratch[CDP_secondary_val].dv = pdp_temp[i];
+ }
+ }
+ }
+}
+
+/**
+ * Update RRA in a file
+ * @param file rrd file
+ * @param rra_steps steps for each rra
+ * @param now current time
+ */
+void
+rspamd_rrd_write_rra (struct rspamd_rrd_file *file, gulong *rra_steps)
+{
+ guint i, j, scratch_idx, cdp_idx, k;
+ struct rrd_rra_def *rra;
+ gdouble *rra_row;
+
+ /* Iterate over all RRA */
+ for (i = 0; i < file->stat_head->rra_cnt; i ++) {
+ rra = &file->rra_def[i];
+ /* How much steps need to be updated */
+ for (j = 0, scratch_idx = CDP_primary_val; j < rra_steps[i]; j ++, scratch_idx = CDP_secondary_val) {
+ /* Move row ptr */
+ if (++file->rra_ptr[i].cur_row >= rra->row_cnt) {
+ file->rra_ptr[i].cur_row = 0;
+ }
+ /* Calculate seek */
+ rra_row = file->rrd_value + (file->stat_head->ds_cnt * i + file->rra_ptr[i].cur_row);
+ /* Iterate over DS */
+ for (k = 0; k < file->stat_head->ds_cnt; k ++) {
+ cdp_idx = i * file->stat_head->ds_cnt + k;
+ memcpy (rra_row, &file->cdp_prep[cdp_idx].scratch[scratch_idx].dv, sizeof (gdouble));
+ rra_row ++;
+ }
+ }
+ }
+}
+
+/**
+ * Add record to rrd file
+ * @param file rrd file object
+ * @param points points (must be row suitable for this RRA, depending on ds count)
+ * @param err error pointer
+ * @return TRUE if a row has been added
+ */
+gboolean
+rspamd_rrd_add_record (struct rspamd_rrd_file* file, GArray *points, GError **err)
+{
+ gdouble interval, *pdp_new, *pdp_temp, pre_int, post_int;
+ guint i;
+ gulong pdp_steps, cur_pdp_count, prev_pdp_step, cur_pdp_step,
+ prev_pdp_age, cur_pdp_age, *rra_steps, pdp_offset;
+ struct timeval tv;
+
+ if (file == NULL || file->stat_head->ds_cnt * sizeof (gdouble) != points->len) {
+ g_set_error (err, rrd_error_quark (), EINVAL, "rrd add points failed: wrong arguments");
+ return FALSE;
+ }
+
+ /* Get interval */
+ gettimeofday (&tv, NULL);
+ interval = (gdouble)(tv.tv_sec - file->live_head->last_up) +
+ (gdouble)(tv.tv_usec - file->live_head->last_up_usec) / 1e6f;
+
+ /* Update PDP preparation values */
+ pdp_new = g_malloc (sizeof (gdouble) * file->stat_head->ds_cnt);
+ pdp_temp = g_malloc (sizeof (gdouble) * file->stat_head->ds_cnt);
+ /* How much steps need to be updated in each RRA */
+ rra_steps = g_malloc0 (sizeof (gulong) * file->stat_head->rra_cnt);
+
+ if (!rspamd_rrd_update_pdp_prep (file, (gdouble *)points->data, pdp_new, interval)) {
+ g_set_error (err, rrd_error_quark (), EINVAL, "rrd update pdp failed: wrong arguments");
+ g_free (pdp_new);
+ g_free (pdp_temp);
+ g_free (rra_steps);
+ return FALSE;
+ }
+
+ /* Calculate elapsed steps */
+ /* Age in seconds for previous pdp store */
+ prev_pdp_age = file->live_head->last_up % file->stat_head->pdp_step;
+ /* Time in seconds for last pdp update */
+ prev_pdp_step = file->live_head->last_up - prev_pdp_age;
+ /* Age in seconds from current time to required pdp time */
+ cur_pdp_age = tv.tv_sec % file->stat_head->pdp_step;
+ /* Time of desired pdp step */
+ cur_pdp_step = tv.tv_sec - cur_pdp_age;
+
+ if (cur_pdp_step > prev_pdp_step) {
+ pre_int = (gdouble)(cur_pdp_step - file->live_head->last_up) - ((double)file->live_head->last_up_usec) / 1e6f;
+ post_int = (gdouble)cur_pdp_age + ((double)tv.tv_usec) / 1e6f;
+ }
+ else {
+ pre_int = interval;
+ post_int = 0;
+ }
+ cur_pdp_count = cur_pdp_step / file->stat_head->pdp_step;
+ pdp_steps = (cur_pdp_step - prev_pdp_step) / file->stat_head->pdp_step;
+
+
+ if (pdp_steps == 0) {
+ /* Simple update of pdp prep */
+ for (i = 0; i < file->stat_head->ds_cnt; i ++) {
+ if (isnan (pdp_new[i])) {
+ /* Increment unknown period */
+ file->pdp_prep[i].scratch[PDP_unkn_sec_cnt].lv += floor (interval);
+ }
+ else {
+ if (isnan (file->pdp_prep[i].scratch[PDP_val].dv)) {
+ /* Reset pdp to the current value */
+ file->pdp_prep[i].scratch[PDP_val].dv = pdp_new[i];
+ }
+ else {
+ /* Increment pdp value */
+ file->pdp_prep[i].scratch[PDP_val].dv += pdp_new[i];
+ }
+ }
+ }
+ }
+ else {
+ /* Complex update of PDP, CDP and RRA */
+
+ /* Update PDP for this step */
+ rspamd_rrd_update_pdp_step (file, pdp_new, pdp_temp, interval, pre_int, post_int, pdp_steps * file->stat_head->pdp_step);
+
+
+ /* Update CDP points for each RRA*/
+ for (i = 0; i < file->stat_head->rra_cnt; i ++) {
+ /* Calculate pdp offset for this RRA */
+ pdp_offset = file->rra_def[i].pdp_cnt - cur_pdp_count % file->rra_def[i].pdp_cnt;
+ /* How much steps we got for this RRA */
+ if (pdp_offset <= pdp_steps) {
+ rra_steps[i] = (pdp_steps - pdp_offset) / file->rra_def[i].pdp_cnt + 1;
+ }
+ else {
+ /* This rra have not passed enough pdp steps */
+ rra_steps[i] = 0;
+ }
+ /* Update this specific CDP */
+ rspamd_rrd_update_cdp (file, pdp_steps, pdp_offset, rra_steps, i, pdp_temp);
+ /* Write RRA */
+ rspamd_rrd_write_rra (file, rra_steps);
+ }
+ }
+ file->live_head->last_up = tv.tv_sec;
+ file->live_head->last_up_usec = tv.tv_usec;
+
+ /* Sync and invalidate */
+ msync (file->map, file->size, MS_ASYNC | MS_INVALIDATE);
+
+ g_free (pdp_new);
+ g_free (pdp_temp);
+ g_free (rra_steps);
+
+ return TRUE;
+}
+
+/**
+ * Close rrd file
+ * @param file
+ * @return
+ */
+gint
+rspamd_rrd_close (struct rspamd_rrd_file* file)
+{
+ if (file == NULL) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ munmap (file->map, file->size);
+ if (file->filename != NULL) {
+ g_free (file->filename);
+ }
+ g_slice_free1 (sizeof (struct rspamd_rrd_file), file);
+
+ return 0;
+}
diff --git a/src/libutil/rrd.h b/src/libutil/rrd.h
new file mode 100644
index 000000000..ff6902894
--- /dev/null
+++ b/src/libutil/rrd.h
@@ -0,0 +1,374 @@
+/* Copyright (c) 2010-2012, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#ifndef RRD_H_
+#define RRD_H_
+
+#include "config.h"
+
+/**
+ * This file contains basic structure and functions to operate with round-robin databases
+ */
+
+#define RRD_COOKIE "RRD"
+#define RRD_VERSION "0003"
+#define RRD_FLOAT_COOKIE ((double)8.642135E130)
+
+typedef union {
+ unsigned long lv;
+ double dv;
+} rrd_value_t;
+
+struct rrd_file_head {
+ /* Data Base Identification Section ** */
+ gchar cookie[4]; /* RRD */
+ gchar version[5]; /* version of the format */
+ gdouble float_cookie; /* is it the correct double representation ? */
+
+ /* Data Base Structure Definition **** */
+ gulong ds_cnt; /* how many different ds provid input to the rrd */
+ gulong rra_cnt; /* how many rras will be maintained in the rrd */
+ gulong pdp_step; /* pdp interval in seconds */
+
+ rrd_value_t par[10]; /* global parameters ... unused
+ at the moment */
+};
+
+enum rrd_dst_type {
+ RRD_DST_COUNTER = 0, /* data source types available */
+ RRD_DST_ABSOLUTE,
+ RRD_DST_GAUGE,
+ RRD_DST_DERIVE,
+ RRD_DST_CDEF
+};
+enum rrd_ds_param {
+ RRD_DS_mrhb_cnt = 0, /* minimum required heartbeat */
+ RRD_DS_min_val, /* the processed input of a ds must */
+ RRD_DS_max_val, /* be between max_val and min_val
+ * both can be set to UNKNOWN if you
+ * do not care. Data outside the limits
+ * set to UNKNOWN */
+ RRD_DS_cdef = RRD_DS_mrhb_cnt
+}; /* pointer to encoded rpn expression only applies to DST_CDEF */
+
+
+/* The magic number here is one less than DS_NAM_SIZE */
+#define RRD_DS_NAM_SIZE 20
+
+#define RRD_DST_SIZE 20
+
+struct rrd_ds_def {
+ gchar ds_nam[RRD_DS_NAM_SIZE]; /* Name of the data source (null terminated) */
+ gchar dst[RRD_DST_SIZE]; /* Type of data source (null terminated) */
+ rrd_value_t par[10]; /* index of this array see ds_param_en */
+};
+
+/* RRA definition */
+
+enum rrd_cf_type {
+ RRD_CF_AVERAGE = 0, /* data consolidation functions */
+ RRD_CF_MINIMUM,
+ RRD_CF_MAXIMUM,
+ RRD_CF_LAST,
+ RRD_CF_HWPREDICT,
+ /* An array of predictions using the seasonal
+ * Holt-Winters algorithm. Requires an RRA of type
+ * CF_SEASONAL for this data source. */
+ RRD_CF_SEASONAL,
+ /* An array of seasonal effects. Requires an RRA of
+ * type CF_HWPREDICT for this data source. */
+ RRD_CF_DEVPREDICT,
+ /* An array of deviation predictions based upon
+ * smoothed seasonal deviations. Requires an RRA of
+ * type CF_DEVSEASONAL for this data source. */
+ RRD_CF_DEVSEASONAL,
+ /* An array of smoothed seasonal deviations. Requires
+ * an RRA of type CF_HWPREDICT for this data source.
+ * */
+ RRD_CF_FAILURES,
+ /* HWPREDICT that follows a moving baseline */
+ RRD_CF_MHWPREDICT
+ /* new entries must come last !!! */
+};
+
+
+#define MAX_RRA_PAR_EN 10
+
+enum rrd_rra_param {
+ RRA_cdp_xff_val = 0, /* what part of the consolidated
+ * datapoint must be known, to produce a
+ * valid entry in the rra */
+ /* CF_HWPREDICT: */
+ RRA_hw_alpha = 1,
+ /* exponential smoothing parameter for the intercept in
+ * the Holt-Winters prediction algorithm. */
+ RRA_hw_beta = 2,
+ /* exponential smoothing parameter for the slope in
+ * the Holt-Winters prediction algorithm. */
+
+ RRA_dependent_rra_idx = 3,
+ /* For CF_HWPREDICT: index of the RRA with the seasonal
+ * effects of the Holt-Winters algorithm (of type
+ * CF_SEASONAL).
+ * For CF_DEVPREDICT: index of the RRA with the seasonal
+ * deviation predictions (of type CF_DEVSEASONAL).
+ * For CF_SEASONAL: index of the RRA with the Holt-Winters
+ * intercept and slope coefficient (of type CF_HWPREDICT).
+ * For CF_DEVSEASONAL: index of the RRA with the
+ * Holt-Winters prediction (of type CF_HWPREDICT).
+ * For CF_FAILURES: index of the CF_DEVSEASONAL array.
+ * */
+
+ /* CF_SEASONAL and CF_DEVSEASONAL: */
+ RRA_seasonal_gamma = 1,
+ /* exponential smoothing parameter for seasonal effects. */
+
+ RRA_seasonal_smoothing_window = 2,
+ /* fraction of the season to include in the running average
+ * smoother */
+
+ /* RRA_dependent_rra_idx = 3, */
+
+ RRA_seasonal_smooth_idx = 4,
+ /* an integer between 0 and row_count - 1 which
+ * is index in the seasonal cycle for applying
+ * the period smoother. */
+
+ /* CF_FAILURES: */
+ RRA_delta_pos = 1, /* confidence bound scaling parameters */
+ RRA_delta_neg = 2,
+ /* RRA_dependent_rra_idx = 3, */
+ RRA_window_len = 4,
+ RRA_failure_threshold = 5
+ /* For CF_FAILURES, number of violations within the last
+ * window required to mark a failure. */
+};
+
+
+#define RRD_CF_NAM_SIZE 20
+
+struct rrd_rra_def {
+ gchar cf_nam[RRD_CF_NAM_SIZE]; /* consolidation function (null term) */
+ gulong row_cnt; /* number of entries in the store */
+ gulong pdp_cnt; /* how many primary data points are
+ * required for a consolidated data point?*/
+ rrd_value_t par[MAX_RRA_PAR_EN]; /* index see rra_param_en */
+
+};
+
+struct rrd_live_head {
+ time_t last_up; /* when was rrd last updated */
+ glong last_up_usec; /* micro seconds part of the update timestamp. Always >= 0 */
+};
+
+#define RRD_LAST_DS_LEN 30
+
+enum rrd_pdp_param {
+ PDP_unkn_sec_cnt = 0, /* how many seconds of the current
+ * pdp value is unknown data? */
+ PDP_val
+}; /* current value of the pdp.
+ this depends on dst */
+
+struct rrd_pdp_prep {
+ gchar last_ds[RRD_LAST_DS_LEN]; /* the last reading from the data
+ * source. this is stored in ASCII
+ * to cater for very large counters
+ * we might encounter in connection
+ * with SNMP. */
+ rrd_value_t scratch[10]; /* contents according to pdp_par_en */
+};
+
+#define RRD_MAX_CDP_PAR_EN 10
+#define RRD_MAX_CDP_FAILURES_IDX 8
+/* max CDP scratch entries avail to record violations for a FAILURES RRA */
+#define RRD_MAX_FAILURES_WINDOW_LEN 28
+
+enum rrd_cdp_param {
+ CDP_val = 0,
+ /* the base_interval is always an
+ * average */
+ CDP_unkn_pdp_cnt,
+ /* how many unknown pdp were
+ * integrated. This and the cdp_xff
+ * will decide if this is going to
+ * be a UNKNOWN or a valid value */
+ CDP_hw_intercept,
+ /* Current intercept coefficient for the Holt-Winters
+ * prediction algorithm. */
+ CDP_hw_last_intercept,
+ /* Last iteration intercept coefficient for the Holt-Winters
+ * prediction algorihtm. */
+ CDP_hw_slope,
+ /* Current slope coefficient for the Holt-Winters
+ * prediction algorithm. */
+ CDP_hw_last_slope,
+ /* Last iteration slope coeffient. */
+ CDP_null_count,
+ /* Number of sequential Unknown (DNAN) values + 1 preceding
+ * the current prediction.
+ * */
+ CDP_last_null_count,
+ /* Last iteration count of Unknown (DNAN) values. */
+ CDP_primary_val = 8,
+ /* optimization for bulk updates: the value of the first CDP
+ * value to be written in the bulk update. */
+ CDP_secondary_val = 9,
+ /* optimization for bulk updates: the value of subsequent
+ * CDP values to be written in the bulk update. */
+ CDP_hw_seasonal = CDP_hw_intercept,
+ /* Current seasonal coefficient for the Holt-Winters
+ * prediction algorithm. This is stored in CDP prep to avoid
+ * redundant seek operations. */
+ CDP_hw_last_seasonal = CDP_hw_last_intercept,
+ /* Last iteration seasonal coefficient. */
+ CDP_seasonal_deviation = CDP_hw_intercept,
+ CDP_last_seasonal_deviation = CDP_hw_last_intercept,
+ CDP_init_seasonal = CDP_null_count
+};
+
+struct rrd_cdp_prep {
+ rrd_value_t scratch[RRD_MAX_CDP_PAR_EN];
+ /* contents according to cdp_par_en *
+ * init state should be NAN */
+};
+
+struct rrd_rra_ptr {
+ gulong cur_row; /* current row in the rra */
+};
+
+/* Final rrd file structure */
+struct rspamd_rrd_file {
+ struct rrd_file_head *stat_head; /* the static header */
+ struct rrd_ds_def *ds_def; /* list of data source definitions */
+ struct rrd_rra_def *rra_def; /* list of round robin archive def */
+ struct rrd_live_head *live_head; /* rrd v >= 3 last_up with us */
+ struct rrd_pdp_prep *pdp_prep; /* pdp data prep area */
+ struct rrd_cdp_prep *cdp_prep; /* cdp prep area */
+ struct rrd_rra_ptr *rra_ptr; /* list of rra pointers */
+ gdouble *rrd_value; /* list of rrd values */
+
+ gchar *filename;
+ guint8* map; /* mmapped area */
+ gsize size; /* its size */
+ gboolean finalized;
+};
+
+
+/* Public API */
+
+/**
+ * Open (and mmap) existing RRD file
+ * @param filename path
+ * @param err error pointer
+ * @return rrd file structure
+ */
+struct rspamd_rrd_file* rspamd_rrd_open (const gchar *filename, GError **err);
+
+/**
+ * Create basic header for rrd file
+ * @param filename file path
+ * @param ds_count number of data sources
+ * @param rra_count number of round robin archives
+ * @param pdp_step step of primary data points
+ * @param err error pointer
+ * @return TRUE if file has been created
+ */
+struct rspamd_rrd_file* rspamd_rrd_create (const gchar *filename, gulong ds_count, gulong rra_count, gulong pdp_step, GError **err);
+
+/**
+ * Add data sources to rrd file
+ * @param filename path to file
+ * @param ds array of struct rrd_ds_def
+ * @param err error pointer
+ * @return TRUE if data sources were added
+ */
+gboolean rspamd_rrd_add_ds (struct rspamd_rrd_file* file, GArray *ds, GError **err);
+
+/**
+ * Add round robin archives to rrd file
+ * @param filename path to file
+ * @param ds array of struct rrd_rra_def
+ * @param err error pointer
+ * @return TRUE if archives were added
+ */
+gboolean rspamd_rrd_add_rra (struct rspamd_rrd_file *file, GArray *rra, GError **err);
+
+/**
+ * Finalize rrd file header and initialize all RRA in the file
+ * @param filename file path
+ * @param err error pointer
+ * @return TRUE if rrd file is ready for use
+ */
+gboolean rspamd_rrd_finalize (struct rspamd_rrd_file *file, GError **err);
+
+/**
+ * Add record to rrd file
+ * @param file rrd file object
+ * @param points points (must be row suitable for this RRA, depending on ds count)
+ * @param err error pointer
+ * @return TRUE if a row has been added
+ */
+gboolean rspamd_rrd_add_record (struct rspamd_rrd_file* file, GArray *points, GError **err);
+
+/**
+ * Close rrd file
+ * @param file
+ * @return
+ */
+gint rspamd_rrd_close (struct rspamd_rrd_file* file);
+
+/*
+ * Conversion functions
+ */
+
+/**
+ * Convert rrd dst type from string to numeric value
+ */
+enum rrd_dst_type rrd_dst_from_string (const gchar *str);
+/**
+ * Convert numeric presentation of dst to string
+ */
+const gchar* rrd_dst_to_string (enum rrd_dst_type type);
+/**
+ * Convert rrd consolidation function type from string to numeric value
+ */
+enum rrd_cf_type rrd_cf_from_string (const gchar *str);
+/**
+ * Convert numeric presentation of cf to string
+ */
+const gchar* rrd_cf_to_string (enum rrd_cf_type type);
+
+/* Default RRA and DS */
+
+/**
+ * Create default RRA
+ */
+void rrd_make_default_rra (const gchar *cf_name, gulong pdp_cnt, gulong rows, struct rrd_rra_def *rra);
+
+/**
+ * Create default DS
+ */
+void rrd_make_default_ds (const gchar *name, gulong pdp_step, struct rrd_ds_def *ds);
+#endif /* RRD_H_ */
diff --git a/src/libutil/trie.c b/src/libutil/trie.c
new file mode 100644
index 000000000..394c4e939
--- /dev/null
+++ b/src/libutil/trie.c
@@ -0,0 +1,230 @@
+/* Copyright (c) 2010, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "mem_pool.h"
+#include "trie.h"
+
+rspamd_trie_t*
+rspamd_trie_create (gboolean icase)
+{
+ rspamd_trie_t *new;
+
+ new = g_malloc (sizeof (rspamd_trie_t));
+
+ new->icase = icase;
+ new->pool = rspamd_mempool_new (rspamd_mempool_suggest_size ());
+ new->root.fail = NULL;
+ new->root.final = 0;
+ new->root.id = 0;
+ new->root.next = NULL;
+ new->root.match = NULL;
+ new->fail_states = g_ptr_array_sized_new (8);
+
+ return new;
+}
+
+/*
+ * Insert a single character as the specified level of the suffix tree
+ */
+static struct rspamd_trie_state *
+rspamd_trie_insert_char (rspamd_trie_t *trie, guint depth, struct rspamd_trie_state *pos, gchar c)
+{
+ struct rspamd_trie_match *new_match;
+ struct rspamd_trie_state *new_pos;
+
+ /* New match is inserted before pos */
+ new_match = rspamd_mempool_alloc (trie->pool, sizeof (struct rspamd_trie_match));
+ new_match->next = pos->match;
+ new_match->c = c;
+
+ /* Now set match link */
+ pos->match = new_match;
+
+ new_match->state = rspamd_mempool_alloc (trie->pool, sizeof (struct rspamd_trie_state));
+ new_pos = new_match->state;
+ new_pos->match = NULL;
+ new_pos->fail = &trie->root;
+ new_pos->final = 0;
+ new_pos->id = -1;
+
+ if (trie->fail_states->len < depth + 1) {
+ /* Grow fail states array if depth is more than its size */
+ guint size = trie->fail_states->len;
+
+ size = MAX (size * 2, depth + 1);
+ g_ptr_array_set_size (trie->fail_states, size);
+ }
+
+ new_pos->next = trie->fail_states->pdata[depth];
+ trie->fail_states->pdata[depth] = new_pos;
+
+ return new_pos;
+}
+
+/* Traverse the specified node to find corresponding match */
+static inline struct rspamd_trie_match *
+check_match (struct rspamd_trie_state *s, gchar c)
+{
+ struct rspamd_trie_match *match = s->match;
+
+ while (match && match->c != c) {
+ match = match->next;
+ }
+
+ return match;
+}
+
+void
+rspamd_trie_insert (rspamd_trie_t *trie, const gchar *pattern, gint pattern_id)
+{
+ const guchar *p = pattern;
+ struct rspamd_trie_state *q, *q1, *r, *cur_node;
+ struct rspamd_trie_match *m, *n;
+ guint i, depth = 0;
+ gchar c;
+
+ /* Insert pattern to the trie */
+
+ cur_node = &trie->root;
+
+ while (*p) {
+ c = trie->icase ? g_ascii_tolower (*p) : *p;
+ m = check_match (cur_node, c);
+ if (m == NULL) {
+ /* Insert a character at specified level depth */
+ cur_node = rspamd_trie_insert_char (trie, depth, cur_node, c);
+ }
+ else {
+ cur_node = m->state;
+ }
+ p ++;
+ depth ++;
+ }
+
+ cur_node->final = depth;
+ cur_node->id = pattern_id;
+
+ /* Update fail states and build fail states graph */
+ /* Go through the whole depth of prefixes */
+ for (i = 0; i < trie->fail_states->len; i++) {
+ q = trie->fail_states->pdata[i];
+ while (q) {
+ m = q->match;
+ while (m) {
+ c = m->c;
+ q1 = m->state;
+ r = q->fail;
+ /* Move q->fail to last known fail location for this character (or to NULL) */
+ while (r && (n = check_match (r, c)) == NULL) {
+ r = r->fail;
+ }
+
+ /* We have found new fail location for character c, so set it in q1 */
+ if (r != NULL) {
+ q1->fail = n->state;
+ if (q1->fail->final > q1->final) {
+ q1->final = q1->fail->final;
+ }
+ }
+ else {
+ /* Search from root */
+ if ((n = check_match (&trie->root, c))) {
+ q1->fail = n->state;
+ }
+ else {
+ q1->fail = &trie->root;
+ }
+ }
+
+ m = m->next;
+ }
+
+ q = q->next;
+ }
+ }
+}
+
+const gchar*
+rspamd_trie_lookup (rspamd_trie_t *trie, const gchar *buffer, gsize buflen, gint *matched_id)
+{
+ const guchar *p = buffer, *prev, *ret;
+ struct rspamd_trie_state *cur_node;
+ struct rspamd_trie_match *m = NULL;
+ gchar c;
+
+
+ cur_node = &trie->root;
+ prev = p;
+ ret = p;
+
+ while (buflen) {
+ c = trie->icase ? g_ascii_tolower (*p) : *p;
+
+ /* Match pattern or use fail-path to restore state */
+ while (cur_node != NULL && (m = check_match (cur_node, c)) == NULL) {
+ cur_node = cur_node->fail;
+ }
+
+ /* Shift left in the text */
+ if (cur_node == &trie->root) {
+ /* 1 character pattern found */
+ ret = prev;
+ }
+ else if (cur_node == NULL) {
+ /* We have tried the pattern but eventually it was not found */
+ cur_node = &trie->root;
+ ret = p;
+ p ++;
+ prev = p;
+ buflen --;
+ continue;
+ }
+
+ if (m != NULL) {
+ /* Match found */
+ cur_node = m->state;
+
+ if (cur_node->final) {
+ /* The complete pattern found */
+ if (matched_id != NULL) {
+ *matched_id = cur_node->id;
+ }
+ return (const gchar *) ret;
+ }
+ }
+ p ++;
+ prev = p;
+ buflen --;
+ }
+
+ return NULL;
+}
+
+void
+rspamd_trie_free (rspamd_trie_t *trie)
+{
+ g_ptr_array_free (trie->fail_states, TRUE);
+ rspamd_mempool_delete (trie->pool);
+ g_free (trie);
+}
diff --git a/src/libutil/trie.h b/src/libutil/trie.h
new file mode 100644
index 000000000..2792ee4a5
--- /dev/null
+++ b/src/libutil/trie.h
@@ -0,0 +1,86 @@
+/* Copyright (c) 2010, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Rambler media ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Rambler BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#ifndef TRIE_H_
+#define TRIE_H_
+
+#include "config.h"
+#include "mem_pool.h"
+
+/*
+ * Rspamd implements basic bitwise prefixed trie structure
+ */
+
+struct rspamd_trie_match;
+
+struct rspamd_trie_state {
+ struct rspamd_trie_state *next;
+ struct rspamd_trie_state *fail;
+ struct rspamd_trie_match *match;
+ guint final;
+ gint id;
+};
+
+struct rspamd_trie_match {
+ struct rspamd_trie_match *next;
+ struct rspamd_trie_state *state;
+ gchar c;
+};
+
+typedef struct rspamd_trie_s {
+ struct rspamd_trie_state root;
+ GPtrArray *fail_states;
+ gboolean icase;
+ rspamd_mempool_t *pool;
+} rspamd_trie_t;
+
+/*
+ * Create a new suffix trie
+ */
+rspamd_trie_t* rspamd_trie_create (gboolean icase);
+
+/*
+ * Insert a pattern into the trie
+ * @param trie suffix trie
+ * @param pattern text of element
+ * @param pattern_id id of element
+ */
+void rspamd_trie_insert (rspamd_trie_t *trie, const gchar *pattern, gint pattern_id);
+
+/*
+ * Search for a text using suffix trie
+ * @param trie suffix trie
+ * @param buffer a text where to search for trie patterns
+ * @param buflen a length of text
+ * @param mached_id on a successfull search here would be stored id of pattern found
+ * @return Position in a text where pattern was found or NULL if no patterns were found
+ */
+const gchar* rspamd_trie_lookup (rspamd_trie_t *trie, const gchar *buffer, gsize buflen, gint *matched_id);
+
+/*
+ * Deallocate suffix trie
+ */
+void rspamd_trie_free (rspamd_trie_t *trie);
+
+#endif /* TRIE_H_ */
diff --git a/src/libutil/upstream.c b/src/libutil/upstream.c
new file mode 100644
index 000000000..f82d3ba50
--- /dev/null
+++ b/src/libutil/upstream.c
@@ -0,0 +1,525 @@
+/*
+ * Copyright (c) 2009-2012, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "upstream.h"
+
+
+#ifdef _THREAD_SAFE
+pthread_rwlock_t upstream_mtx = PTHREAD_RWLOCK_INITIALIZER;
+# define U_RLOCK() do { pthread_rwlock_rdlock (&upstream_mtx); } while (0)
+# define U_WLOCK() do { pthread_rwlock_wrlock (&upstream_mtx); } while (0)
+# define U_UNLOCK() do { pthread_rwlock_unlock (&upstream_mtx); } while (0)
+#else
+# define U_RLOCK() do {} while (0)
+# define U_WLOCK() do {} while (0)
+# define U_UNLOCK() do {} while (0)
+#endif
+
+#define MAX_TRIES 20
+#define HASH_COMPAT
+
+/*
+ * Poly: 0xedb88320
+ * Init: 0x0
+ */
+
+static const guint32 crc32lookup[256] = {
+ 0x00000000U, 0x77073096U, 0xee0e612cU, 0x990951baU, 0x076dc419U, 0x706af48fU,
+ 0xe963a535U, 0x9e6495a3U, 0x0edb8832U, 0x79dcb8a4U, 0xe0d5e91eU, 0x97d2d988U,
+ 0x09b64c2bU, 0x7eb17cbdU, 0xe7b82d07U, 0x90bf1d91U, 0x1db71064U, 0x6ab020f2U,
+ 0xf3b97148U, 0x84be41deU, 0x1adad47dU, 0x6ddde4ebU, 0xf4d4b551U, 0x83d385c7U,
+ 0x136c9856U, 0x646ba8c0U, 0xfd62f97aU, 0x8a65c9ecU, 0x14015c4fU, 0x63066cd9U,
+ 0xfa0f3d63U, 0x8d080df5U, 0x3b6e20c8U, 0x4c69105eU, 0xd56041e4U, 0xa2677172U,
+ 0x3c03e4d1U, 0x4b04d447U, 0xd20d85fdU, 0xa50ab56bU, 0x35b5a8faU, 0x42b2986cU,
+ 0xdbbbc9d6U, 0xacbcf940U, 0x32d86ce3U, 0x45df5c75U, 0xdcd60dcfU, 0xabd13d59U,
+ 0x26d930acU, 0x51de003aU, 0xc8d75180U, 0xbfd06116U, 0x21b4f4b5U, 0x56b3c423U,
+ 0xcfba9599U, 0xb8bda50fU, 0x2802b89eU, 0x5f058808U, 0xc60cd9b2U, 0xb10be924U,
+ 0x2f6f7c87U, 0x58684c11U, 0xc1611dabU, 0xb6662d3dU, 0x76dc4190U, 0x01db7106U,
+ 0x98d220bcU, 0xefd5102aU, 0x71b18589U, 0x06b6b51fU, 0x9fbfe4a5U, 0xe8b8d433U,
+ 0x7807c9a2U, 0x0f00f934U, 0x9609a88eU, 0xe10e9818U, 0x7f6a0dbbU, 0x086d3d2dU,
+ 0x91646c97U, 0xe6635c01U, 0x6b6b51f4U, 0x1c6c6162U, 0x856530d8U, 0xf262004eU,
+ 0x6c0695edU, 0x1b01a57bU, 0x8208f4c1U, 0xf50fc457U, 0x65b0d9c6U, 0x12b7e950U,
+ 0x8bbeb8eaU, 0xfcb9887cU, 0x62dd1ddfU, 0x15da2d49U, 0x8cd37cf3U, 0xfbd44c65U,
+ 0x4db26158U, 0x3ab551ceU, 0xa3bc0074U, 0xd4bb30e2U, 0x4adfa541U, 0x3dd895d7U,
+ 0xa4d1c46dU, 0xd3d6f4fbU, 0x4369e96aU, 0x346ed9fcU, 0xad678846U, 0xda60b8d0U,
+ 0x44042d73U, 0x33031de5U, 0xaa0a4c5fU, 0xdd0d7cc9U, 0x5005713cU, 0x270241aaU,
+ 0xbe0b1010U, 0xc90c2086U, 0x5768b525U, 0x206f85b3U, 0xb966d409U, 0xce61e49fU,
+ 0x5edef90eU, 0x29d9c998U, 0xb0d09822U, 0xc7d7a8b4U, 0x59b33d17U, 0x2eb40d81U,
+ 0xb7bd5c3bU, 0xc0ba6cadU, 0xedb88320U, 0x9abfb3b6U, 0x03b6e20cU, 0x74b1d29aU,
+ 0xead54739U, 0x9dd277afU, 0x04db2615U, 0x73dc1683U, 0xe3630b12U, 0x94643b84U,
+ 0x0d6d6a3eU, 0x7a6a5aa8U, 0xe40ecf0bU, 0x9309ff9dU, 0x0a00ae27U, 0x7d079eb1U,
+ 0xf00f9344U, 0x8708a3d2U, 0x1e01f268U, 0x6906c2feU, 0xf762575dU, 0x806567cbU,
+ 0x196c3671U, 0x6e6b06e7U, 0xfed41b76U, 0x89d32be0U, 0x10da7a5aU, 0x67dd4accU,
+ 0xf9b9df6fU, 0x8ebeeff9U, 0x17b7be43U, 0x60b08ed5U, 0xd6d6a3e8U, 0xa1d1937eU,
+ 0x38d8c2c4U, 0x4fdff252U, 0xd1bb67f1U, 0xa6bc5767U, 0x3fb506ddU, 0x48b2364bU,
+ 0xd80d2bdaU, 0xaf0a1b4cU, 0x36034af6U, 0x41047a60U, 0xdf60efc3U, 0xa867df55U,
+ 0x316e8eefU, 0x4669be79U, 0xcb61b38cU, 0xbc66831aU, 0x256fd2a0U, 0x5268e236U,
+ 0xcc0c7795U, 0xbb0b4703U, 0x220216b9U, 0x5505262fU, 0xc5ba3bbeU, 0xb2bd0b28U,
+ 0x2bb45a92U, 0x5cb36a04U, 0xc2d7ffa7U, 0xb5d0cf31U, 0x2cd99e8bU, 0x5bdeae1dU,
+ 0x9b64c2b0U, 0xec63f226U, 0x756aa39cU, 0x026d930aU, 0x9c0906a9U, 0xeb0e363fU,
+ 0x72076785U, 0x05005713U, 0x95bf4a82U, 0xe2b87a14U, 0x7bb12baeU, 0x0cb61b38U,
+ 0x92d28e9bU, 0xe5d5be0dU, 0x7cdcefb7U, 0x0bdbdf21U, 0x86d3d2d4U, 0xf1d4e242U,
+ 0x68ddb3f8U, 0x1fda836eU, 0x81be16cdU, 0xf6b9265bU, 0x6fb077e1U, 0x18b74777U,
+ 0x88085ae6U, 0xff0f6a70U, 0x66063bcaU, 0x11010b5cU, 0x8f659effU, 0xf862ae69U,
+ 0x616bffd3U, 0x166ccf45U, 0xa00ae278U, 0xd70dd2eeU, 0x4e048354U, 0x3903b3c2U,
+ 0xa7672661U, 0xd06016f7U, 0x4969474dU, 0x3e6e77dbU, 0xaed16a4aU, 0xd9d65adcU,
+ 0x40df0b66U, 0x37d83bf0U, 0xa9bcae53U, 0xdebb9ec5U, 0x47b2cf7fU, 0x30b5ffe9U,
+ 0xbdbdf21cU, 0xcabac28aU, 0x53b39330U, 0x24b4a3a6U, 0xbad03605U, 0xcdd70693U,
+ 0x54de5729U, 0x23d967bfU, 0xb3667a2eU, 0xc4614ab8U, 0x5d681b02U, 0x2a6f2b94U,
+ 0xb40bbe37U, 0xc30c8ea1U, 0x5a05df1bU, 0x2d02ef8dU
+};
+
+/*
+ * Check upstream parameters and mark it whether valid or dead
+ */
+static void
+check_upstream (struct upstream *up, time_t now, time_t error_timeout, time_t revive_timeout, size_t max_errors)
+{
+ if (up->dead) {
+ if (now - up->time >= revive_timeout) {
+ U_WLOCK ();
+ up->dead = 0;
+ up->errors = 0;
+ up->time = 0;
+ up->weight = up->priority;
+ U_UNLOCK ();
+ }
+ }
+ else {
+ if (now - up->time >= error_timeout && up->errors >= max_errors) {
+ U_WLOCK ();
+ up->dead = 1;
+ up->time = now;
+ up->weight = 0;
+ U_UNLOCK ();
+ }
+ }
+}
+
+/*
+ * Call this function after failed upstream request
+ */
+void
+upstream_fail (struct upstream *up, time_t now)
+{
+ if (up->time != 0) {
+ up->errors++;
+ }
+ else {
+ U_WLOCK ();
+ up->time = now;
+ up->errors++;
+ U_UNLOCK ();
+ }
+}
+
+/*
+ * Call this function after successfull upstream request
+ */
+void
+upstream_ok (struct upstream *up, time_t now)
+{
+ if (up->errors != 0) {
+ U_WLOCK ();
+ up->errors = 0;
+ up->time = 0;
+ U_UNLOCK ();
+ }
+
+ up->weight--;
+}
+
+/*
+ * Mark all upstreams as active. This function is used when all upstreams are marked as inactive
+ */
+void
+revive_all_upstreams (void *ups, size_t members, size_t msize)
+{
+ guint i;
+ struct upstream *cur;
+ guchar *p;
+
+ U_WLOCK ();
+ p = ups;
+ for (i = 0; i < members; i++) {
+ cur = (struct upstream *)p;
+ cur->time = 0;
+ cur->errors = 0;
+ cur->dead = 0;
+ cur->weight = cur->priority;
+ p += msize;
+ }
+ U_UNLOCK ();
+}
+
+/*
+ * Scan all upstreams for errors and mark upstreams dead or alive depends on conditions,
+ * return number of alive upstreams
+ */
+static gint
+rescan_upstreams (void *ups, size_t members, size_t msize, time_t now, time_t error_timeout, time_t revive_timeout, size_t max_errors)
+{
+ guint i, alive;
+ struct upstream *cur;
+ guchar *p;
+
+ /* Recheck all upstreams */
+ p = ups;
+ alive = members;
+ for (i = 0; i < members; i++) {
+ cur = (struct upstream *)p;
+ check_upstream (cur, now, error_timeout, revive_timeout, max_errors);
+ alive -= cur->dead;
+ p += msize;
+ }
+
+ /* All upstreams are dead */
+ if (alive == 0) {
+ revive_all_upstreams (ups, members, msize);
+ alive = members;
+ }
+
+
+ return alive;
+
+}
+
+/* Return alive upstream by its number */
+static struct upstream *
+get_upstream_by_number (void *ups, size_t members, size_t msize, gint selected)
+{
+ guint i;
+ u_char *p, *c;
+ struct upstream *cur;
+
+ i = 0;
+ p = ups;
+ c = ups;
+ U_RLOCK ();
+ for (;;) {
+ /* Out of range, return NULL */
+ if (p > c + members * msize) {
+ break;
+ }
+
+ cur = (struct upstream *)p;
+ p += msize;
+
+ if (cur->dead) {
+ /* Skip inactive upstreams */
+ continue;
+ }
+ /* Return selected upstream */
+ if ((gint)i == selected) {
+ U_UNLOCK ();
+ return cur;
+ }
+ i++;
+ }
+ U_UNLOCK ();
+
+ /* Error */
+ return NULL;
+
+}
+
+/*
+ * Get hash key for specified key (perl hash)
+ */
+static guint32
+get_hash_for_key (guint32 hash, const gchar *key, size_t keylen)
+{
+ guint32 h, index;
+ const gchar *end = key + keylen;
+
+ h = ~hash;
+
+ if (end != key) {
+ while (key < end) {
+ index = (h ^ (u_char) * key) & 0x000000ffU;
+ h = (h >> 8) ^ crc32lookup[index];
+ ++key;
+ }
+ }
+ else {
+ while (*key) {
+ index = (h ^ (u_char) * key) & 0x000000ffU;
+ h = (h >> 8) ^ crc32lookup[index];
+ ++key;
+ }
+ }
+
+ return (~h);
+}
+
+/*
+ * Recheck all upstreams and return random active upstream
+ */
+struct upstream *
+get_random_upstream (void *ups, size_t members, size_t msize, time_t now, time_t error_timeout,
+ time_t revive_timeout, size_t max_errors)
+{
+ gint alive, selected;
+
+ alive = rescan_upstreams (ups, members, msize, now, error_timeout, revive_timeout, max_errors);
+ selected = rand () % alive;
+
+ return get_upstream_by_number (ups, members, msize, selected);
+}
+
+/*
+ * Return upstream by hash, that is calculated from active upstreams number
+ */
+struct upstream *
+get_upstream_by_hash (void *ups, size_t members, size_t msize, time_t now, time_t error_timeout,
+ time_t revive_timeout, size_t max_errors, const gchar *key, size_t keylen)
+{
+ gint alive, tries = 0, r;
+ guint32 h = 0, ht;
+ gchar *p, numbuf[4];
+ struct upstream *cur;
+
+ alive = rescan_upstreams (ups, members, msize, now, error_timeout, revive_timeout, max_errors);
+
+ if (alive == 0) {
+ return NULL;
+ }
+
+ h = get_hash_for_key (0, key, keylen);
+#ifdef HASH_COMPAT
+ h = (h >> 16) & 0x7fff;
+#endif
+ h %= members;
+
+ for (;;) {
+ p = (gchar *)ups + msize * h;
+ cur = (struct upstream *)p;
+ if (!cur->dead) {
+ break;
+ }
+ r = snprintf (numbuf, sizeof (numbuf), "%d", tries);
+ ht = get_hash_for_key (0, numbuf, r);
+ ht = get_hash_for_key (ht, key, keylen);
+#ifdef HASH_COMPAT
+ h += (ht >> 16) & 0x7fff;
+#else
+ h += ht;
+#endif
+ h %= members;
+ tries++;
+ if (tries > MAX_TRIES) {
+ return NULL;
+ }
+ }
+
+ U_RLOCK ();
+ p = ups;
+ U_UNLOCK ();
+ return cur;
+}
+
+/*
+ * Recheck all upstreams and return upstream in round-robin order according to weight and priority
+ */
+struct upstream *
+get_upstream_round_robin (void *ups, size_t members, size_t msize, time_t now, time_t error_timeout,
+ time_t revive_timeout, size_t max_errors)
+{
+ guint max_weight, i;
+ struct upstream *cur, *selected = NULL;
+ u_char *p;
+
+ /* Recheck all upstreams */
+ (void)rescan_upstreams (ups, members, msize, now, error_timeout, revive_timeout, max_errors);
+
+ p = ups;
+ max_weight = 0;
+ selected = (struct upstream *)p;
+ U_RLOCK ();
+ for (i = 0; i < members; i++) {
+ cur = (struct upstream *)p;
+ if (!cur->dead) {
+ if (max_weight < (guint)cur->weight) {
+ max_weight = cur->weight;
+ selected = cur;
+ }
+ }
+ p += msize;
+ }
+ U_UNLOCK ();
+
+ if (max_weight == 0) {
+ p = ups;
+ U_WLOCK ();
+ for (i = 0; i < members; i++) {
+ cur = (struct upstream *)p;
+ cur->weight = cur->priority;
+ if (!cur->dead) {
+ if (max_weight < cur->priority) {
+ max_weight = cur->priority;
+ selected = cur;
+ }
+ }
+ p += msize;
+ }
+ U_UNLOCK ();
+ }
+
+ return selected;
+}
+
+/*
+ * Recheck all upstreams and return upstream in round-robin order according to only priority (master-slaves)
+ */
+struct upstream *
+get_upstream_master_slave (void *ups, size_t members, size_t msize, time_t now, time_t error_timeout,
+ time_t revive_timeout, size_t max_errors)
+{
+ guint max_weight, i;
+ struct upstream *cur, *selected = NULL;
+ u_char *p;
+
+ /* Recheck all upstreams */
+ (void)rescan_upstreams (ups, members, msize, now, error_timeout, revive_timeout, max_errors);
+
+ p = ups;
+ max_weight = 0;
+ selected = (struct upstream *)p;
+ U_RLOCK ();
+ for (i = 0; i < members; i++) {
+ cur = (struct upstream *)p;
+ if (!cur->dead) {
+ if (max_weight < cur->priority) {
+ max_weight = cur->priority;
+ selected = cur;
+ }
+ }
+ p += msize;
+ }
+ U_UNLOCK ();
+
+ return selected;
+}
+
+/*
+ * Ketama manipulation functions
+ */
+
+static gint
+ketama_sort_cmp (const void *a1, const void *a2)
+{
+ return *((guint32 *) a1) - *((guint32 *) a2);
+}
+
+/*
+ * Add ketama points for specified upstream
+ */
+gint
+upstream_ketama_add (struct upstream *up, gchar *up_key, size_t keylen, size_t keypoints)
+{
+ guint32 h = 0;
+ gchar tmp[4];
+ guint i;
+
+ /* Allocate ketama points array */
+ if (up->ketama_points == NULL) {
+ up->ketama_points_size = keypoints;
+ up->ketama_points = malloc (sizeof (guint32) * up->ketama_points_size);
+ if (up->ketama_points == NULL) {
+ return -1;
+ }
+ }
+
+ h = get_hash_for_key (h, up_key, keylen);
+
+ for (i = 0; i < keypoints; i++) {
+ tmp[0] = i & 0xff;
+ tmp[1] = (i >> 8) & 0xff;
+ tmp[2] = (i >> 16) & 0xff;
+ tmp[3] = (i >> 24) & 0xff;
+
+ h = get_hash_for_key (h, tmp, sizeof (tmp) * sizeof (gchar));
+ up->ketama_points[i] = h;
+ }
+ /* Keep points sorted */
+ qsort (up->ketama_points, keypoints, sizeof (guint32), ketama_sort_cmp);
+
+ return 0;
+}
+
+/*
+ * Return upstream by hash and find nearest ketama point in some server
+ */
+struct upstream *
+get_upstream_by_hash_ketama (void *ups, size_t members, size_t msize,
+ time_t now, time_t error_timeout, time_t revive_timeout, size_t max_errors, const gchar *key, size_t keylen)
+{
+ guint alive, i;
+ guint32 h = 0, step, middle, d, min_diff = UINT_MAX;
+ gchar *p;
+ struct upstream *cur = NULL, *nearest = NULL;
+
+ alive = rescan_upstreams (ups, members, msize, now, error_timeout, revive_timeout, max_errors);
+
+ if (alive == 0) {
+ return NULL;
+ }
+
+ h = get_hash_for_key (h, key, keylen);
+
+ U_RLOCK ();
+ p = ups;
+ nearest = (struct upstream *)p;
+ for (i = 0; i < members; i++) {
+ cur = (struct upstream *)p;
+ if (!cur->dead && cur->ketama_points != NULL) {
+ /* Find nearest ketama point for this key */
+ step = cur->ketama_points_size / 2;
+ middle = step;
+ while (step != 1) {
+ d = cur->ketama_points[middle] - h;
+ if (abs (d) < (gint)min_diff) {
+ min_diff = abs (d);
+ nearest = cur;
+ }
+ step /= 2;
+ if (d > 0) {
+ middle -= step;
+ }
+ else {
+ middle += step;
+ }
+ }
+ }
+ }
+ U_UNLOCK ();
+ return nearest;
+}
+
+#undef U_LOCK
+#undef U_UNLOCK
+/*
+ * vi:ts=4
+ */
diff --git a/src/libutil/upstream.h b/src/libutil/upstream.h
new file mode 100644
index 000000000..da0a00013
--- /dev/null
+++ b/src/libutil/upstream.h
@@ -0,0 +1,127 @@
+#ifndef UPSTREAM_H
+#define UPSTREAM_H
+
+#include <sys/types.h>
+#include <stdint.h>
+
+/**
+ * Structure of generic upstream
+ */
+struct upstream {
+ guint errors; /**< Errors for this upstream */
+ time_t time; /**< Time of marking */
+ guint dead; /**< Dead flag */
+ guint priority; /**< Fixed priority */
+ gint16 weight; /**< Dynamic weight */
+ guint32 *ketama_points; /**< Ketama points array */
+ size_t ketama_points_size; /**< Ketama array size */
+};
+
+/**
+ * Upstream error logic
+ * 1. During error time we count upstream_ok and upstream_fail
+ * 2. If failcount is more then maxerrors then we mark upstream as unavailable for dead time
+ * 3. After dead time we mark upstream as alive and go to the step 1
+ * 4. If all upstreams are dead, marks every upstream as alive
+ */
+
+/**
+ * Add an error to an upstream
+ */
+void upstream_fail (struct upstream *up, time_t now);
+
+/**
+ * Increase upstream successes count
+ */
+void upstream_ok (struct upstream *up, time_t now);
+
+/**
+ * Make all upstreams alive
+ */
+void revive_all_upstreams (void *ups, size_t members, size_t msize);
+
+/**
+ * Add ketama points for upstream
+ */
+gint upstream_ketama_add (struct upstream *up, gchar *up_key, size_t keylen, size_t keypoints);
+
+/**
+ * Get a random upstream from array of upstreams
+ * @param ups array of structures that contains struct upstream as their first element
+ * @param members number of elements in array
+ * @param msize size of each member
+ * @param now current time
+ * @param error_timeout time during which we are counting errors
+ * @param revive_timeout time during which we counts upstream dead
+ * @param max_errors maximum errors during error_timeout to mark upstream dead
+ */
+struct upstream* get_random_upstream (void *ups, size_t members, size_t msize,
+ time_t now, time_t error_timeout,
+ time_t revive_timeout, size_t max_errors);
+
+/**
+ * Get upstream based on hash from array of upstreams
+ * @param ups array of structures that contains struct upstream as their first element
+ * @param members number of elements in array
+ * @param msize size of each member
+ * @param now current time
+ * @param error_timeout time during which we are counting errors
+ * @param revive_timeout time during which we counts upstream dead
+ * @param max_errors maximum errors during error_timeout to mark upstream dead
+ * @param key key for hashing
+ * @param keylen length of the key
+ */
+struct upstream* get_upstream_by_hash (void *ups, size_t members, size_t msize,
+ time_t now, time_t error_timeout,
+ time_t revive_timeout, size_t max_errors,
+ const gchar *key, size_t keylen);
+
+/**
+ * Get an upstream from array of upstreams based on its current weight
+ * @param ups array of structures that contains struct upstream as their first element
+ * @param members number of elements in array
+ * @param msize size of each member
+ * @param now current time
+ * @param error_timeout time during which we are counting errors
+ * @param revive_timeout time during which we counts upstream dead
+ * @param max_errors maximum errors during error_timeout to mark upstream dead
+ */
+struct upstream* get_upstream_round_robin (void *ups, size_t members, size_t msize,
+ time_t now, time_t error_timeout,
+ time_t revive_timeout, size_t max_errors);
+
+/**
+ * Get upstream based on hash from array of upstreams, this functions is using ketama algorithm
+ * @param ups array of structures that contains struct upstream as their first element
+ * @param members number of elements in array
+ * @param msize size of each member
+ * @param now current time
+ * @param error_timeout time during which we are counting errors
+ * @param revive_timeout time during which we counts upstream dead
+ * @param max_errors maximum errors during error_timeout to mark upstream dead
+ * @param key key for hashing
+ * @param keylen length of the key
+ */
+struct upstream* get_upstream_by_hash_ketama (void *ups, size_t members, size_t msize, time_t now,
+ time_t error_timeout, time_t revive_timeout, size_t max_errors,
+ const gchar *key, size_t keylen);
+
+/**
+ * Get an upstream from array of upstreams based on its current priority (not weight)
+ * @param ups array of structures that contains struct upstream as their first element
+ * @param members number of elements in array
+ * @param msize size of each member
+ * @param now current time
+ * @param error_timeout time during which we are counting errors
+ * @param revive_timeout time during which we counts upstream dead
+ * @param max_errors maximum errors during error_timeout to mark upstream dead
+ */
+struct upstream* get_upstream_master_slave (void *ups, size_t members, size_t msize,
+ time_t now, time_t error_timeout,
+ time_t revive_timeout, size_t max_errors);
+
+
+#endif /* UPSTREAM_H */
+/*
+ * vi:ts=4
+ */
diff --git a/src/libutil/util.c b/src/libutil/util.c
new file mode 100644
index 000000000..03b38e087
--- /dev/null
+++ b/src/libutil/util.c
@@ -0,0 +1,2275 @@
+/*
+ * Copyright (c) 2009-2012, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include "config.h"
+#include "util.h"
+#include "cfg_file.h"
+#include "main.h"
+#include "statfile.h"
+#include "filter.h"
+#include "message.h"
+
+#ifdef HAVE_OPENSSL
+#include <openssl/rand.h>
+#include <openssl/err.h>
+#endif
+
+#ifdef HAVE_TERMIOS_H
+#include <termios.h>
+#endif
+#ifdef HAVE_READPASSPHRASE_H
+#include <readpassphrase.h>
+#endif
+
+/* Check log messages intensity once per minute */
+#define CHECK_TIME 60
+/* More than 2 log messages per second */
+#define BUF_INTENSITY 2
+/* Default connect timeout for sync sockets */
+#define CONNECT_TIMEOUT 3
+
+gint
+make_socket_nonblocking (gint fd)
+{
+ gint ofl;
+
+ ofl = fcntl (fd, F_GETFL, 0);
+
+ if (fcntl (fd, F_SETFL, ofl | O_NONBLOCK) == -1) {
+ msg_warn ("fcntl failed: %d, '%s'", errno, strerror (errno));
+ return -1;
+ }
+ return 0;
+}
+
+gint
+make_socket_blocking (gint fd)
+{
+ gint ofl;
+
+ ofl = fcntl (fd, F_GETFL, 0);
+
+ if (fcntl (fd, F_SETFL, ofl & (~O_NONBLOCK)) == -1) {
+ msg_warn ("fcntl failed: %d, '%s'", errno, strerror (errno));
+ return -1;
+ }
+ return 0;
+}
+
+gint
+poll_sync_socket (gint fd, gint timeout, short events)
+{
+ gint r;
+ struct pollfd fds[1];
+
+ fds->fd = fd;
+ fds->events = events;
+ fds->revents = 0;
+ while ((r = poll (fds, 1, timeout)) < 0) {
+ if (errno != EINTR) {
+ break;
+ }
+ }
+
+ return r;
+}
+
+static gint
+make_inet_socket (gint type, struct addrinfo *addr, gboolean is_server, gboolean async, GList **list)
+{
+ gint fd, r, optlen, on = 1, s_error;
+ struct addrinfo *cur;
+
+ cur = addr;
+ while (cur) {
+ /* Create socket */
+ fd = socket (cur->ai_family, type, 0);
+ if (fd == -1) {
+ msg_warn ("socket failed: %d, '%s'", errno, strerror (errno));
+ goto out;
+ }
+
+ if (make_socket_nonblocking (fd) < 0) {
+ goto out;
+ }
+
+ /* Set close on exec */
+ if (fcntl (fd, F_SETFD, FD_CLOEXEC) == -1) {
+ msg_warn ("fcntl failed: %d, '%s'", errno, strerror (errno));
+ goto out;
+ }
+
+ if (is_server) {
+ setsockopt (fd, SOL_SOCKET, SO_REUSEADDR, (const void *)&on, sizeof (gint));
+#ifdef HAVE_IPV6_V6ONLY
+ if (cur->ai_family == AF_INET6) {
+ setsockopt (fd, IPPROTO_IPV6, IPV6_V6ONLY, (const void *)&on, sizeof (gint));
+ }
+#endif
+ r = bind (fd, cur->ai_addr, cur->ai_addrlen);
+ }
+ else {
+ r = connect (fd, cur->ai_addr, cur->ai_addrlen);
+ }
+
+ if (r == -1) {
+ if (errno != EINPROGRESS) {
+ msg_warn ("bind/connect failed: %d, '%s'", errno, strerror (errno));
+ goto out;
+ }
+ if (!async) {
+ /* Try to poll */
+ if (poll_sync_socket (fd, CONNECT_TIMEOUT * 1000, POLLOUT) <= 0) {
+ errno = ETIMEDOUT;
+ msg_warn ("bind/connect failed: timeout");
+ goto out;
+ }
+ else {
+ /* Make synced again */
+ if (make_socket_blocking (fd) < 0) {
+ goto out;
+ }
+ }
+ }
+ }
+ else {
+ /* Still need to check SO_ERROR on socket */
+ optlen = sizeof (s_error);
+ getsockopt (fd, SOL_SOCKET, SO_ERROR, (void *)&s_error, &optlen);
+ if (s_error) {
+ errno = s_error;
+ goto out;
+ }
+ }
+ if (list == NULL) {
+ /* Go out immediately */
+ break;
+ }
+ else if (fd != -1) {
+ *list = g_list_prepend (*list, GINT_TO_POINTER (fd));
+ cur = cur->ai_next;
+ continue;
+ }
+out:
+ if (fd != -1) {
+ close (fd);
+ }
+ fd = -1;
+ cur = cur->ai_next;
+ }
+ return (fd);
+}
+
+gint
+make_tcp_socket (struct addrinfo *addr, gboolean is_server, gboolean async)
+{
+ return make_inet_socket (SOCK_STREAM, addr, is_server, async, NULL);
+}
+
+gint
+make_udp_socket (struct addrinfo *addr, gboolean is_server, gboolean async)
+{
+ return make_inet_socket (SOCK_DGRAM, addr, is_server, async, NULL);
+}
+
+gint
+make_unix_socket (const gchar *path, struct sockaddr_un *addr, gint type, gboolean is_server, gboolean async)
+{
+ gint fd = -1, s_error, r, optlen, serrno, on = 1;
+ struct stat st;
+
+ if (path == NULL)
+ return -1;
+
+ addr->sun_family = AF_UNIX;
+
+ rspamd_strlcpy (addr->sun_path, path, sizeof (addr->sun_path));
+#ifdef FREEBSD
+ addr->sun_len = SUN_LEN (addr);
+#endif
+
+ if (is_server) {
+ /* Unlink socket if it exists already */
+ if (lstat (addr->sun_path, &st) != -1) {
+ if (S_ISSOCK (st.st_mode)) {
+ if (unlink (addr->sun_path) == -1) {
+ msg_warn ("unlink %s failed: %d, '%s'", addr->sun_path, errno, strerror (errno));
+ goto out;
+ }
+ }
+ else {
+ msg_warn ("%s is not a socket", addr->sun_path);
+ goto out;
+ }
+ }
+ }
+ fd = socket (PF_LOCAL, type, 0);
+
+ if (fd == -1) {
+ msg_warn ("socket failed %s: %d, '%s'", addr->sun_path, errno, strerror (errno));
+ return -1;
+ }
+
+ if (make_socket_nonblocking (fd) < 0) {
+ goto out;
+ }
+
+ /* Set close on exec */
+ if (fcntl (fd, F_SETFD, FD_CLOEXEC) == -1) {
+ msg_warn ("fcntl failed %s: %d, '%s'", addr->sun_path, errno, strerror (errno));
+ goto out;
+ }
+ if (is_server) {
+ setsockopt (fd, SOL_SOCKET, SO_REUSEADDR, (const void *)&on, sizeof (gint));
+ r = bind (fd, (struct sockaddr *)addr, SUN_LEN (addr));
+ }
+ else {
+ r = connect (fd, (struct sockaddr *)addr, SUN_LEN (addr));
+ }
+
+ if (r == -1) {
+ if (errno != EINPROGRESS) {
+ msg_warn ("bind/connect failed %s: %d, '%s'", addr->sun_path, errno, strerror (errno));
+ goto out;
+ }
+ if (!async) {
+ /* Try to poll */
+ if (poll_sync_socket (fd, CONNECT_TIMEOUT * 1000, POLLOUT) <= 0) {
+ errno = ETIMEDOUT;
+ msg_warn ("bind/connect failed %s: timeout", addr->sun_path);
+ goto out;
+ }
+ else {
+ /* Make synced again */
+ if (make_socket_blocking (fd) < 0) {
+ goto out;
+ }
+ }
+ }
+ }
+ else {
+ /* Still need to check SO_ERROR on socket */
+ optlen = sizeof (s_error);
+ getsockopt (fd, SOL_SOCKET, SO_ERROR, (void *)&s_error, &optlen);
+ if (s_error) {
+ errno = s_error;
+ goto out;
+ }
+ }
+
+
+ return (fd);
+
+ out:
+ serrno = errno;
+ if (fd != -1) {
+ close (fd);
+ }
+ errno = serrno;
+ return (-1);
+}
+
+/**
+ * Make a universal socket
+ * @param credits host, ip or path to unix socket
+ * @param port port (used for network sockets)
+ * @param async make this socket asynced
+ * @param is_server make this socket as server socket
+ * @param try_resolve try name resolution for a socket (BLOCKING)
+ */
+gint
+make_universal_socket (const gchar *credits, guint16 port,
+ gint type, gboolean async, gboolean is_server, gboolean try_resolve)
+{
+ struct sockaddr_un un;
+ struct stat st;
+ struct addrinfo hints, *res;
+ gint r;
+ gchar portbuf[8];
+
+ if (*credits == '/') {
+ if (is_server) {
+ return make_unix_socket (credits, &un, type, is_server, async);
+ }
+ else {
+ r = stat (credits, &st);
+ if (r == -1) {
+ /* Unix socket doesn't exists it must be created first */
+ errno = ENOENT;
+ return -1;
+ }
+ else {
+ if ((st.st_mode & S_IFSOCK) == 0) {
+ /* Path is not valid socket */
+ errno = EINVAL;
+ return -1;
+ }
+ else {
+ return make_unix_socket (credits, &un, type, is_server, async);
+ }
+ }
+ }
+ }
+ else {
+ /* TCP related part */
+ memset (&hints, 0, sizeof (hints));
+ hints.ai_family = AF_UNSPEC; /* Allow IPv4 or IPv6 */
+ hints.ai_socktype = type; /* Type of the socket */
+ hints.ai_flags = is_server ? AI_PASSIVE : 0;
+ hints.ai_protocol = 0; /* Any protocol */
+ hints.ai_canonname = NULL;
+ hints.ai_addr = NULL;
+ hints.ai_next = NULL;
+
+ if (!try_resolve) {
+ hints.ai_flags |= AI_NUMERICHOST | AI_NUMERICSERV;
+ }
+
+ rspamd_snprintf (portbuf, sizeof (portbuf), "%d", (int)port);
+ if ((r = getaddrinfo (credits, portbuf, &hints, &res)) == 0) {
+ r = make_inet_socket (type, res, is_server, async, NULL);
+ freeaddrinfo (res);
+ return r;
+ }
+ else {
+ msg_err ("address resolution for %s failed: %s", credits, gai_strerror (r));
+ return FALSE;
+ }
+ }
+}
+
+/**
+ * Make universal stream socket
+ * @param credits host, ip or path to unix socket
+ * @param port port (used for network sockets)
+ * @param async make this socket asynced
+ * @param is_server make this socket as server socket
+ * @param try_resolve try name resolution for a socket (BLOCKING)
+ */
+GList*
+make_universal_sockets_list (const gchar *credits, guint16 port,
+ gint type, gboolean async, gboolean is_server, gboolean try_resolve)
+{
+ struct sockaddr_un un;
+ struct stat st;
+ struct addrinfo hints, *res;
+ gint r, fd, serrno;
+ gchar portbuf[8], **strv, **cur;
+ GList *result = NULL, *rcur;
+
+ strv = g_strsplit_set (credits, ",", -1);
+ if (strv == NULL) {
+ msg_err ("invalid sockets credentials: %s", credits);
+ return NULL;
+ }
+ cur = strv;
+ while (*cur != NULL) {
+ if (*credits == '/') {
+ if (is_server) {
+ fd = make_unix_socket (credits, &un, type, is_server, async);
+ }
+ else {
+ r = stat (credits, &st);
+ if (r == -1) {
+ /* Unix socket doesn't exists it must be created first */
+ errno = ENOENT;
+ goto err;
+ }
+ else {
+ if ((st.st_mode & S_IFSOCK) == 0) {
+ /* Path is not valid socket */
+ errno = EINVAL;
+ goto err;
+ }
+ else {
+ fd = make_unix_socket (credits, &un, type, is_server, async);
+ }
+ }
+ }
+ if (fd != -1) {
+ result = g_list_prepend (result, GINT_TO_POINTER (fd));
+ }
+ else {
+ goto err;
+ }
+ }
+ else {
+ /* TCP related part */
+ memset (&hints, 0, sizeof (hints));
+ hints.ai_family = AF_UNSPEC; /* Allow IPv4 or IPv6 */
+ hints.ai_socktype = type; /* Type of the socket */
+ hints.ai_flags = is_server ? AI_PASSIVE : 0;
+ hints.ai_protocol = 0; /* Any protocol */
+ hints.ai_canonname = NULL;
+ hints.ai_addr = NULL;
+ hints.ai_next = NULL;
+
+ if (!try_resolve) {
+ hints.ai_flags |= AI_NUMERICHOST | AI_NUMERICSERV;
+ }
+
+ rspamd_snprintf (portbuf, sizeof (portbuf), "%d", (int)port);
+ if ((r = getaddrinfo (credits, portbuf, &hints, &res)) == 0) {
+ r = make_inet_socket (type, res, is_server, async, &result);
+ freeaddrinfo (res);
+ if (r == -1) {
+ goto err;
+ }
+ }
+ else {
+ msg_err ("address resolution for %s failed: %s", credits, gai_strerror (r));
+ goto err;
+ }
+ }
+ cur ++;
+ }
+
+ g_strfreev (strv);
+ return result;
+
+err:
+ g_strfreev (strv);
+ serrno = errno;
+ rcur = result;
+ while (rcur != NULL) {
+ fd = GPOINTER_TO_INT (rcur->data);
+ if (fd != -1) {
+ close (fd);
+ }
+ rcur = g_list_next (rcur);
+ }
+ if (result != NULL) {
+ g_list_free (result);
+ }
+
+ errno = serrno;
+ return NULL;
+}
+
+gint
+make_socketpair (gint pair[2])
+{
+ gint r;
+
+ r = socketpair (AF_LOCAL, SOCK_STREAM, 0, pair);
+
+ if (r == -1) {
+ msg_warn ("socketpair failed: %d, '%s'", errno, strerror (errno), pair[0], pair[1]);
+ return -1;
+ }
+ /* Set close on exec */
+ if (fcntl (pair[0], F_SETFD, FD_CLOEXEC) == -1) {
+ msg_warn ("fcntl failed: %d, '%s'", errno, strerror (errno));
+ goto out;
+ }
+ if (fcntl (pair[1], F_SETFD, FD_CLOEXEC) == -1) {
+ msg_warn ("fcntl failed: %d, '%s'", errno, strerror (errno));
+ goto out;
+ }
+
+ return 0;
+
+out:
+ close (pair[0]);
+ close (pair[1]);
+ return (-1);
+}
+
+gint
+write_pid (struct rspamd_main *main)
+{
+ pid_t pid;
+
+ if (main->cfg->pid_file == NULL) {
+ return -1;
+ }
+ main->pfh = rspamd_pidfile_open (main->cfg->pid_file, 0644, &pid);
+
+ if (main->pfh == NULL) {
+ return -1;
+ }
+
+ if (main->is_privilleged) {
+ /* Force root user as owner of pid file */
+#ifdef HAVE_PIDFILE_FILENO
+ if (fchown (pidfile_fileno (main->pfh), 0, 0) == -1) {
+#else
+ if (fchown (main->pfh->pf_fd, 0, 0) == -1) {
+#endif
+ msg_err ("cannot chown of pidfile %s to 0:0 user", main->cfg->pid_file);
+ }
+ }
+
+ rspamd_pidfile_write (main->pfh);
+
+ return 0;
+}
+
+#ifdef HAVE_SA_SIGINFO
+void
+init_signals (struct sigaction *signals, void (*sig_handler)(gint, siginfo_t *, void *))
+#else
+void
+init_signals (struct sigaction *signals, void (*sig_handler)(gint))
+#endif
+{
+ struct sigaction sigpipe_act;
+ /* Setting up signal handlers */
+ /* SIGUSR1 - reopen config file */
+ /* SIGUSR2 - worker is ready for accept */
+ sigemptyset (&signals->sa_mask);
+ sigaddset (&signals->sa_mask, SIGTERM);
+ sigaddset (&signals->sa_mask, SIGINT);
+ sigaddset (&signals->sa_mask, SIGHUP);
+ sigaddset (&signals->sa_mask, SIGCHLD);
+ sigaddset (&signals->sa_mask, SIGUSR1);
+ sigaddset (&signals->sa_mask, SIGUSR2);
+ sigaddset (&signals->sa_mask, SIGALRM);
+
+
+#ifdef HAVE_SA_SIGINFO
+ signals->sa_flags = SA_SIGINFO;
+ signals->sa_handler = NULL;
+ signals->sa_sigaction = sig_handler;
+#else
+ signals->sa_handler = sig_handler;
+ signals->sa_flags = 0;
+#endif
+ sigaction (SIGTERM, signals, NULL);
+ sigaction (SIGINT, signals, NULL);
+ sigaction (SIGHUP, signals, NULL);
+ sigaction (SIGCHLD, signals, NULL);
+ sigaction (SIGUSR1, signals, NULL);
+ sigaction (SIGUSR2, signals, NULL);
+ sigaction (SIGALRM, signals, NULL);
+
+ /* Ignore SIGPIPE as we handle write errors manually */
+ sigemptyset (&sigpipe_act.sa_mask);
+ sigaddset (&sigpipe_act.sa_mask, SIGPIPE);
+ sigpipe_act.sa_handler = SIG_IGN;
+ sigpipe_act.sa_flags = 0;
+ sigaction (SIGPIPE, &sigpipe_act, NULL);
+}
+
+static void
+pass_signal_cb (gpointer key, gpointer value, gpointer ud)
+{
+ struct rspamd_worker *cur = value;
+ gint signo = GPOINTER_TO_INT (ud);
+
+ kill (cur->pid, signo);
+}
+
+void
+pass_signal_worker (GHashTable * workers, gint signo)
+{
+ g_hash_table_foreach (workers, pass_signal_cb, GINT_TO_POINTER (signo));
+}
+
+void
+convert_to_lowercase (gchar *str, guint size)
+{
+ while (size--) {
+ *str = g_ascii_tolower (*str);
+ str++;
+ }
+}
+
+#ifndef HAVE_SETPROCTITLE
+
+static gchar *title_buffer = 0;
+static size_t title_buffer_size = 0;
+static gchar *title_progname, *title_progname_full;
+
+gint
+setproctitle (const gchar *fmt, ...)
+{
+ if (!title_buffer || !title_buffer_size) {
+ errno = ENOMEM;
+ return -1;
+ }
+
+ memset (title_buffer, '\0', title_buffer_size);
+
+ ssize_t written;
+
+ if (fmt) {
+ ssize_t written2;
+ va_list ap;
+
+ written = snprintf (title_buffer, title_buffer_size, "%s: ", title_progname);
+ if (written < 0 || (size_t) written >= title_buffer_size)
+ return -1;
+
+ va_start (ap, fmt);
+ written2 = vsnprintf (title_buffer + written, title_buffer_size - written, fmt, ap);
+ va_end (ap);
+ if (written2 < 0 || (size_t) written2 >= title_buffer_size - written)
+ return -1;
+ }
+ else {
+ written = snprintf (title_buffer, title_buffer_size, "%s", title_progname);
+ if (written < 0 || (size_t) written >= title_buffer_size)
+ return -1;
+ }
+
+ written = strlen (title_buffer);
+ memset (title_buffer + written, '\0', title_buffer_size - written);
+
+ return 0;
+}
+
+/*
+ It has to be _init function, because __attribute__((constructor))
+ functions gets called without arguments.
+*/
+
+gint
+init_title (gint argc, gchar *argv[], gchar *envp[])
+{
+#if defined(DARWIN) || defined(SOLARIS)
+ /* XXX: try to handle these OSes too */
+ return 0;
+#else
+ gchar *begin_of_buffer = 0, *end_of_buffer = 0;
+ gint i;
+
+ for (i = 0; i < argc; ++i) {
+ if (!begin_of_buffer)
+ begin_of_buffer = argv[i];
+ if (!end_of_buffer || end_of_buffer + 1 == argv[i])
+ end_of_buffer = argv[i] + strlen (argv[i]);
+ }
+
+ for (i = 0; envp[i]; ++i) {
+ if (!begin_of_buffer)
+ begin_of_buffer = envp[i];
+ if (!end_of_buffer || end_of_buffer + 1 == envp[i])
+ end_of_buffer = envp[i] + strlen (envp[i]);
+ }
+
+ if (!end_of_buffer)
+ return 0;
+
+ gchar **new_environ = g_malloc ((i + 1) * sizeof (envp[0]));
+
+ if (!new_environ)
+ return 0;
+
+ for (i = 0; envp[i]; ++i) {
+ if (!(new_environ[i] = g_strdup (envp[i])))
+ goto cleanup_enomem;
+ }
+ new_environ[i] = 0;
+
+ if (program_invocation_name) {
+ title_progname_full = g_strdup (program_invocation_name);
+
+ if (!title_progname_full)
+ goto cleanup_enomem;
+
+ gchar *p = strrchr (title_progname_full, '/');
+
+ if (p)
+ title_progname = p + 1;
+ else
+ title_progname = title_progname_full;
+
+ program_invocation_name = title_progname_full;
+ program_invocation_short_name = title_progname;
+ }
+
+ environ = new_environ;
+ title_buffer = begin_of_buffer;
+ title_buffer_size = end_of_buffer - begin_of_buffer;
+
+ return 0;
+
+ cleanup_enomem:
+ for (--i; i >= 0; --i) {
+ g_free (new_environ[i]);
+ }
+ g_free (new_environ);
+ return 0;
+#endif
+}
+#endif
+
+#ifndef HAVE_PIDFILE
+extern gchar *__progname;
+static gint _rspamd_pidfile_remove (rspamd_pidfh_t *pfh, gint freeit);
+
+static gint
+rspamd_pidfile_verify (rspamd_pidfh_t *pfh)
+{
+ struct stat sb;
+
+ if (pfh == NULL || pfh->pf_fd == -1)
+ return (-1);
+ /*
+ * Check remembered descriptor.
+ */
+ if (fstat (pfh->pf_fd, &sb) == -1)
+ return (errno);
+ if (sb.st_dev != pfh->pf_dev || sb.st_ino != pfh->pf_ino)
+ return -1;
+ return 0;
+}
+
+static gint
+rspamd_pidfile_read (const gchar *path, pid_t * pidptr)
+{
+ gchar buf[16], *endptr;
+ gint error, fd, i;
+
+ fd = open (path, O_RDONLY);
+ if (fd == -1)
+ return (errno);
+
+ i = read (fd, buf, sizeof (buf) - 1);
+ error = errno; /* Remember errno in case close() wants to change it. */
+ close (fd);
+ if (i == -1)
+ return error;
+ else if (i == 0)
+ return EAGAIN;
+ buf[i] = '\0';
+
+ *pidptr = strtol (buf, &endptr, 10);
+ if (endptr != &buf[i])
+ return EINVAL;
+
+ return 0;
+}
+
+rspamd_pidfh_t *
+rspamd_pidfile_open (const gchar *path, mode_t mode, pid_t * pidptr)
+{
+ rspamd_pidfh_t *pfh;
+ struct stat sb;
+ gint error, fd, len, count;
+ struct timespec rqtp;
+
+ pfh = g_malloc (sizeof (*pfh));
+ if (pfh == NULL)
+ return NULL;
+
+ if (path == NULL)
+ len = snprintf (pfh->pf_path, sizeof (pfh->pf_path), "/var/run/%s.pid", g_get_prgname ());
+ else
+ len = snprintf (pfh->pf_path, sizeof (pfh->pf_path), "%s", path);
+ if (len >= (gint)sizeof (pfh->pf_path)) {
+ g_free (pfh);
+ errno = ENAMETOOLONG;
+ return NULL;
+ }
+
+ /*
+ * Open the PID file and obtain exclusive lock.
+ * We truncate PID file here only to remove old PID immediatelly,
+ * PID file will be truncated again in pidfile_write(), so
+ * pidfile_write() can be called multiple times.
+ */
+ fd = open (pfh->pf_path, O_WRONLY | O_CREAT | O_TRUNC | O_NONBLOCK, mode);
+ lock_file (fd, TRUE);
+ if (fd == -1) {
+ count = 0;
+ rqtp.tv_sec = 0;
+ rqtp.tv_nsec = 5000000;
+ if (errno == EWOULDBLOCK && pidptr != NULL) {
+ again:
+ errno = rspamd_pidfile_read (pfh->pf_path, pidptr);
+ if (errno == 0)
+ errno = EEXIST;
+ else if (errno == EAGAIN) {
+ if (++count <= 3) {
+ nanosleep (&rqtp, 0);
+ goto again;
+ }
+ }
+ }
+ g_free (pfh);
+ return NULL;
+ }
+ /*
+ * Remember file information, so in pidfile_write() we are sure we write
+ * to the proper descriptor.
+ */
+ if (fstat (fd, &sb) == -1) {
+ error = errno;
+ unlink (pfh->pf_path);
+ close (fd);
+ g_free (pfh);
+ errno = error;
+ return NULL;
+ }
+
+ pfh->pf_fd = fd;
+ pfh->pf_dev = sb.st_dev;
+ pfh->pf_ino = sb.st_ino;
+
+ return pfh;
+}
+
+gint
+rspamd_pidfile_write (rspamd_pidfh_t *pfh)
+{
+ gchar pidstr[16];
+ gint error, fd;
+
+ /*
+ * Check remembered descriptor, so we don't overwrite some other
+ * file if pidfile was closed and descriptor reused.
+ */
+ errno = rspamd_pidfile_verify (pfh);
+ if (errno != 0) {
+ /*
+ * Don't close descriptor, because we are not sure if it's ours.
+ */
+ return -1;
+ }
+ fd = pfh->pf_fd;
+
+ /*
+ * Truncate PID file, so multiple calls of pidfile_write() are allowed.
+ */
+ if (ftruncate (fd, 0) == -1) {
+ error = errno;
+ _rspamd_pidfile_remove (pfh, 0);
+ errno = error;
+ return -1;
+ }
+
+ rspamd_snprintf (pidstr, sizeof (pidstr), "%P", getpid ());
+ if (pwrite (fd, pidstr, strlen (pidstr), 0) != (ssize_t) strlen (pidstr)) {
+ error = errno;
+ _rspamd_pidfile_remove (pfh, 0);
+ errno = error;
+ return -1;
+ }
+
+ return 0;
+}
+
+gint
+rspamd_pidfile_close (rspamd_pidfh_t *pfh)
+{
+ gint error;
+
+ error = rspamd_pidfile_verify (pfh);
+ if (error != 0) {
+ errno = error;
+ return -1;
+ }
+
+ if (close (pfh->pf_fd) == -1)
+ error = errno;
+ g_free (pfh);
+ if (error != 0) {
+ errno = error;
+ return -1;
+ }
+ return 0;
+}
+
+static gint
+_rspamd_pidfile_remove (rspamd_pidfh_t *pfh, gint freeit)
+{
+ gint error;
+
+ error = rspamd_pidfile_verify (pfh);
+ if (error != 0) {
+ errno = error;
+ return -1;
+ }
+
+ if (unlink (pfh->pf_path) == -1)
+ error = errno;
+ if (!unlock_file (pfh->pf_fd, FALSE)) {
+ if (error == 0)
+ error = errno;
+ }
+ if (close (pfh->pf_fd) == -1) {
+ if (error == 0)
+ error = errno;
+ }
+ if (freeit)
+ g_free (pfh);
+ else
+ pfh->pf_fd = -1;
+ if (error != 0) {
+ errno = error;
+ return -1;
+ }
+ return 0;
+}
+
+gint
+rspamd_pidfile_remove (rspamd_pidfh_t *pfh)
+{
+
+ return (_rspamd_pidfile_remove (pfh, 1));
+}
+#endif
+
+/* Replace %r with rcpt value and %f with from value, new string is allocated in pool */
+gchar *
+resolve_stat_filename (rspamd_mempool_t * pool, gchar *pattern, gchar *rcpt, gchar *from)
+{
+ gint need_to_format = 0, len = 0;
+ gint rcptlen, fromlen;
+ gchar *c = pattern, *new, *s;
+
+ if (rcpt) {
+ rcptlen = strlen (rcpt);
+ }
+ else {
+ rcptlen = 0;
+ }
+
+ if (from) {
+ fromlen = strlen (from);
+ }
+ else {
+ fromlen = 0;
+ }
+
+ /* Calculate length */
+ while (*c++) {
+ if (*c == '%' && *(c + 1) == 'r') {
+ len += rcptlen;
+ c += 2;
+ need_to_format = 1;
+ continue;
+ }
+ else if (*c == '%' && *(c + 1) == 'f') {
+ len += fromlen;
+ c += 2;
+ need_to_format = 1;
+ continue;
+ }
+ len++;
+ }
+
+ /* Do not allocate extra memory if we do not need to format string */
+ if (!need_to_format) {
+ return pattern;
+ }
+
+ /* Allocate new string */
+ new = rspamd_mempool_alloc (pool, len);
+ c = pattern;
+ s = new;
+
+ /* Format string */
+ while (*c++) {
+ if (*c == '%' && *(c + 1) == 'r') {
+ c += 2;
+ memcpy (s, rcpt, rcptlen);
+ s += rcptlen;
+ continue;
+ }
+ else if (*c == '%' && *(c + 1) == 'r') {
+ c += 2;
+ memcpy (s, from, fromlen);
+ s += fromlen;
+ continue;
+ }
+ *s++ = *c;
+ }
+
+ *s = '\0';
+
+ return new;
+}
+
+#ifdef HAVE_CLOCK_GETTIME
+const gchar *
+calculate_check_time (struct timeval *tv, struct timespec *begin, gint resolution, guint32 *scan_time)
+#else
+const gchar *
+calculate_check_time (struct timeval *begin, gint resolution, guint32 *scan_time)
+#endif
+{
+ double vdiff, diff;
+ static gchar res[64];
+ static gchar fmt[sizeof ("%.10f ms real, %.10f ms virtual")];
+ struct timeval tv_now;
+
+ if (gettimeofday (&tv_now, NULL) == -1) {
+ msg_warn ("gettimeofday failed: %s", strerror (errno));
+ }
+#ifdef HAVE_CLOCK_GETTIME
+ struct timespec ts;
+
+ diff = (tv_now.tv_sec - tv->tv_sec) * 1000. + /* Seconds */
+ (tv_now.tv_usec - tv->tv_usec) / 1000.; /* Microseconds */
+#ifdef HAVE_CLOCK_PROCESS_CPUTIME_ID
+ clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &ts);
+#elif defined(HAVE_CLOCK_VIRTUAL)
+ clock_gettime (CLOCK_VIRTUAL, &ts);
+#else
+ clock_gettime (CLOCK_REALTIME, &ts);
+#endif
+
+ vdiff = (ts.tv_sec - begin->tv_sec) * 1000. + /* Seconds */
+ (ts.tv_nsec - begin->tv_nsec) / 1000000.; /* Nanoseconds */
+#else
+ diff = (tv_now.tv_sec - begin->tv_sec) * 1000. + /* Seconds */
+ (tv_now.tv_usec - begin->tv_usec) / 1000.; /* Microseconds */
+
+ vdiff = diff;
+#endif
+
+ *scan_time = diff;
+
+ sprintf (fmt, "%%.%dfms real, %%.%dfms virtual", resolution, resolution);
+ snprintf (res, sizeof (res), fmt, diff, vdiff);
+
+ return (const gchar *)res;
+}
+
+#ifndef g_tolower
+# define g_tolower(x) (((x) >= 'A' && (x) <= 'Z') ? (x) - 'A' + 'a' : (x))
+#endif
+
+
+gboolean
+rspamd_strcase_equal (gconstpointer v, gconstpointer v2)
+{
+ if (g_ascii_strcasecmp ((const gchar *)v, (const gchar *)v2) == 0) {
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+
+guint
+rspamd_strcase_hash (gconstpointer key)
+{
+ const gchar *p = key;
+ gchar buf[256];
+ guint h = 0, i = 0;
+
+
+ while (*p != '\0') {
+ buf[i] = g_ascii_tolower (*p);
+ i++;
+ p++;
+ if (i == sizeof (buf)) {
+ h ^= murmur32_hash (buf, i);
+ i = 0;
+ }
+ }
+
+ if (i > 0) {
+ h ^= murmur32_hash (buf, i);
+ }
+
+ return h;
+}
+
+guint
+rspamd_str_hash (gconstpointer key)
+{
+ gsize len;
+
+ len = strlen ((const gchar *)key);
+
+ return murmur32_hash (key, len);
+}
+
+gboolean
+rspamd_str_equal (gconstpointer v, gconstpointer v2)
+{
+ return strcmp ((const gchar *)v, (const gchar *)v2) == 0;
+}
+
+gboolean
+fstr_strcase_equal (gconstpointer v, gconstpointer v2)
+{
+ const f_str_t *f1 = v, *f2 = v2;
+ if (f1->len == f2->len && g_ascii_strncasecmp (f1->begin, f2->begin, f1->len) == 0) {
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+
+guint
+fstr_strcase_hash (gconstpointer key)
+{
+ const f_str_t *f = key;
+ const gchar *p;
+ guint h = 0, i = 0;
+ gchar buf[256];
+
+ p = f->begin;
+ while (p - f->begin < (gint)f->len) {
+ buf[i] = g_ascii_tolower (*p);
+ i++;
+ p++;
+ if (i == sizeof (buf)) {
+ h ^= murmur32_hash (buf, i);
+ i = 0;
+ }
+ }
+
+ if (i > 0) {
+ h ^= murmur32_hash (buf, i);
+ }
+
+ return h;
+}
+
+void
+gperf_profiler_init (struct config_file *cfg, const gchar *descr)
+{
+#if defined(WITH_GPERF_TOOLS)
+ gchar prof_path[PATH_MAX];
+
+ if (getenv ("CPUPROFILE")) {
+
+ /* disable inherited Profiler enabled in master process */
+ ProfilerStop ();
+ }
+ /* Try to create temp directory for gmon.out and chdir to it */
+ if (cfg->profile_path == NULL) {
+ cfg->profile_path = g_strdup_printf ("%s/rspamd-profile", cfg->temp_dir);
+ }
+
+ snprintf (prof_path, sizeof (prof_path), "%s-%s.%d", cfg->profile_path, descr, (gint)getpid ());
+ if (ProfilerStart (prof_path)) {
+ /* start ITIMER_PROF timer */
+ ProfilerRegisterThread ();
+ }
+ else {
+ msg_warn ("cannot start google perftools profiler");
+ }
+
+#endif
+}
+
+#ifdef HAVE_FLOCK
+/* Flock version */
+gboolean
+lock_file (gint fd, gboolean async)
+{
+ gint flags;
+
+ if (async) {
+ flags = LOCK_EX | LOCK_NB;
+ }
+ else {
+ flags = LOCK_EX;
+ }
+
+ if (flock (fd, flags) == -1) {
+ if (async && errno == EAGAIN) {
+ return FALSE;
+ }
+ msg_warn ("lock on file failed: %s", strerror (errno));
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+gboolean
+unlock_file (gint fd, gboolean async)
+{
+ gint flags;
+
+ if (async) {
+ flags = LOCK_UN | LOCK_NB;
+ }
+ else {
+ flags = LOCK_UN;
+ }
+
+ if (flock (fd, flags) == -1) {
+ if (async && errno == EAGAIN) {
+ return FALSE;
+ }
+ msg_warn ("lock on file failed: %s", strerror (errno));
+ return FALSE;
+ }
+
+ return TRUE;
+
+}
+#else /* HAVE_FLOCK */
+/* Fctnl version */
+gboolean
+lock_file (gint fd, gboolean async)
+{
+ struct flock fl = {
+ .l_type = F_WRLCK,
+ .l_whence = SEEK_SET,
+ .l_start = 0,
+ .l_len = 0
+ };
+
+ if (fcntl (fd, async ? F_SETLK : F_SETLKW, &fl) == -1) {
+ if (async && (errno == EAGAIN || errno == EACCES)) {
+ return FALSE;
+ }
+ msg_warn ("lock on file failed: %s", strerror (errno));
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+gboolean
+unlock_file (gint fd, gboolean async)
+{
+ struct flock fl = {
+ .l_type = F_UNLCK,
+ .l_whence = SEEK_SET,
+ .l_start = 0,
+ .l_len = 0
+ };
+
+ if (fcntl (fd, async ? F_SETLK : F_SETLKW, &fl) == -1) {
+ if (async && (errno == EAGAIN || errno == EACCES)) {
+ return FALSE;
+ }
+ msg_warn ("lock on file failed: %s", strerror (errno));
+ return FALSE;
+ }
+
+ return TRUE;
+
+}
+#endif /* HAVE_FLOCK */
+
+
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION < 22))
+void
+g_ptr_array_unref (GPtrArray *array)
+{
+ g_ptr_array_free (array, TRUE);
+}
+#endif
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION < 14))
+void
+g_queue_clear (GQueue *queue)
+{
+ g_return_if_fail (queue != NULL);
+
+ g_list_free (queue->head);
+ queue->head = queue->tail = NULL;
+ queue->length = 0;
+}
+#endif
+
+gsize
+rspamd_strlcpy (gchar *dst, const gchar *src, gsize siz)
+{
+ gchar *d = dst;
+ const gchar *s = src;
+ gsize n = siz;
+
+ /* Copy as many bytes as will fit */
+ if (n != 0) {
+ while (--n != 0) {
+ if ((*d++ = *s++) == '\0') {
+ break;
+ }
+ }
+ }
+
+ if (n == 0 && siz != 0) {
+ *d = '\0';
+ }
+
+ return (s - src - 1); /* count does not include NUL */
+}
+
+gsize
+rspamd_strlcpy_tolower (gchar *dst, const gchar *src, gsize siz)
+{
+ gchar *d = dst;
+ const gchar *s = src;
+ gsize n = siz;
+
+ /* Copy as many bytes as will fit */
+ if (n != 0) {
+ while (--n != 0) {
+ if ((*d++ = g_ascii_tolower (*s++)) == '\0') {
+ break;
+ }
+ }
+ }
+
+ if (n == 0 && siz != 0) {
+ *d = '\0';
+ }
+
+ return (s - src - 1); /* count does not include NUL */
+}
+
+/* Compare two emails for building emails tree */
+gint
+compare_email_func (gconstpointer a, gconstpointer b)
+{
+ const struct uri *u1 = a, *u2 = b;
+ gint r;
+
+ if (u1->hostlen != u2->hostlen || u1->hostlen == 0) {
+ return u1->hostlen - u2->hostlen;
+ }
+ else {
+ if ((r = g_ascii_strncasecmp (u1->host, u2->host, u1->hostlen)) == 0){
+ if (u1->userlen != u2->userlen || u1->userlen == 0) {
+ return u1->userlen - u2->userlen;
+ }
+ else {
+ return g_ascii_strncasecmp (u1->user, u2->user, u1->userlen);
+ }
+ }
+ else {
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+gint
+compare_url_func (gconstpointer a, gconstpointer b)
+{
+ const struct uri *u1 = a, *u2 = b;
+ int r;
+
+ if (u1->hostlen != u2->hostlen || u1->hostlen == 0) {
+ return u1->hostlen - u2->hostlen;
+ }
+ else {
+ r = g_ascii_strncasecmp (u1->host, u2->host, u1->hostlen);
+ if (r == 0 && u1->is_phished != u2->is_phished) {
+ /* Always insert phished urls to the tree */
+ return -1;
+ }
+ }
+
+ return r;
+}
+
+/*
+ * Find the first occurrence of find in s, ignore case.
+ */
+gchar *
+rspamd_strncasestr (const gchar *s, const gchar *find, gint len)
+{
+ gchar c, sc;
+ gsize mlen;
+
+ if ((c = *find++) != 0) {
+ c = g_ascii_tolower (c);
+ mlen = strlen (find);
+ do {
+ do {
+ if ((sc = *s++) == 0 || len -- == 0)
+ return (NULL);
+ } while (g_ascii_tolower (sc) != c);
+ } while (g_ascii_strncasecmp (s, find, mlen) != 0);
+ s--;
+ }
+ return ((gchar *)s);
+}
+
+/*
+ * Try to convert string of length to long
+ */
+gboolean
+rspamd_strtol (const gchar *s, gsize len, glong *value)
+{
+ const gchar *p = s, *end = s + len;
+ gchar c;
+ glong v = 0;
+ const glong cutoff = G_MAXLONG / 10, cutlim = G_MAXLONG % 10;
+ gboolean neg;
+
+ /* Case negative values */
+ if (*p == '-') {
+ neg = TRUE;
+ p ++;
+ }
+ else {
+ neg = FALSE;
+ }
+ /* Some preparations for range errors */
+
+ while (p < end) {
+ c = *p;
+ if (c >= '0' && c <= '9') {
+ c -= '0';
+ if (v > cutoff || (v == cutoff && c > cutlim)) {
+ /* Range error */
+ *value = neg ? G_MINLONG : G_MAXLONG;
+ return FALSE;
+ }
+ else {
+ v *= 10;
+ v += c;
+ }
+ }
+ else {
+ return FALSE;
+ }
+ p ++;
+ }
+
+ *value = neg ? -(v) : v;
+ return TRUE;
+}
+
+/*
+ * Try to convert string of length to long
+ */
+gboolean
+rspamd_strtoul (const gchar *s, gsize len, gulong *value)
+{
+ const gchar *p = s, *end = s + len;
+ gchar c;
+ gulong v = 0;
+ const gulong cutoff = G_MAXULONG / 10, cutlim = G_MAXULONG % 10;
+
+ /* Some preparations for range errors */
+ while (p < end) {
+ c = *p;
+ if (c >= '0' && c <= '9') {
+ c -= '0';
+ if (v > cutoff || (v == cutoff && (guint8)c > cutlim)) {
+ /* Range error */
+ *value = G_MAXULONG;
+ return FALSE;
+ }
+ else {
+ v *= 10;
+ v += c;
+ }
+ }
+ else {
+ return FALSE;
+ }
+ p ++;
+ }
+
+ *value = v;
+ return TRUE;
+}
+
+gint
+rspamd_fallocate (gint fd, off_t offset, off_t len)
+{
+#if defined(HAVE_FALLOCATE)
+ return fallocate (fd, 0, offset, len);
+#elif defined(HAVE_POSIX_FALLOCATE)
+ return posix_fallocate (fd, offset, len);
+#else
+ /* Return 0 as nothing can be done on this system */
+ return 0;
+#endif
+}
+
+
+/**
+ * Create new mutex
+ * @return mutex or NULL
+ */
+inline rspamd_mutex_t*
+rspamd_mutex_new (void)
+{
+ rspamd_mutex_t *new;
+
+ new = g_slice_alloc (sizeof (rspamd_mutex_t));
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30))
+ g_mutex_init (&new->mtx);
+#else
+ g_static_mutex_init (&new->mtx);
+#endif
+
+ return new;
+}
+
+/**
+ * Lock mutex
+ * @param mtx
+ */
+inline void
+rspamd_mutex_lock (rspamd_mutex_t *mtx)
+{
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30))
+ g_mutex_lock (&mtx->mtx);
+#else
+ g_static_mutex_lock (&mtx->mtx);
+#endif
+}
+
+/**
+ * Unlock mutex
+ * @param mtx
+ */
+inline void
+rspamd_mutex_unlock (rspamd_mutex_t *mtx)
+{
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30))
+ g_mutex_unlock (&mtx->mtx);
+#else
+ g_static_mutex_unlock (&mtx->mtx);
+#endif
+}
+
+void
+rspamd_mutex_free (rspamd_mutex_t *mtx)
+{
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30))
+ g_mutex_clear (&mtx->mtx);
+#endif
+ g_slice_free1 (sizeof (rspamd_mutex_t), mtx);
+}
+
+/**
+ * Create new rwlock
+ * @return
+ */
+rspamd_rwlock_t*
+rspamd_rwlock_new (void)
+{
+ rspamd_rwlock_t *new;
+
+ new = g_malloc (sizeof (rspamd_rwlock_t));
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30))
+ g_rw_lock_init (&new->rwlock);
+#else
+ g_static_rw_lock_init (&new->rwlock);
+#endif
+
+ return new;
+}
+
+/**
+ * Lock rwlock for writing
+ * @param mtx
+ */
+inline void
+rspamd_rwlock_writer_lock (rspamd_rwlock_t *mtx)
+{
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30))
+ g_rw_lock_writer_lock (&mtx->rwlock);
+#else
+ g_static_rw_lock_writer_lock (&mtx->rwlock);
+#endif
+}
+
+/**
+ * Lock rwlock for reading
+ * @param mtx
+ */
+inline void
+rspamd_rwlock_reader_lock (rspamd_rwlock_t *mtx)
+{
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30))
+ g_rw_lock_reader_lock (&mtx->rwlock);
+#else
+ g_static_rw_lock_reader_lock (&mtx->rwlock);
+#endif
+}
+
+/**
+ * Unlock rwlock from writing
+ * @param mtx
+ */
+inline void
+rspamd_rwlock_writer_unlock (rspamd_rwlock_t *mtx)
+{
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30))
+ g_rw_lock_writer_unlock (&mtx->rwlock);
+#else
+ g_static_rw_lock_writer_unlock (&mtx->rwlock);
+#endif
+}
+
+/**
+ * Unlock rwlock from reading
+ * @param mtx
+ */
+inline void
+rspamd_rwlock_reader_unlock (rspamd_rwlock_t *mtx)
+{
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30))
+ g_rw_lock_reader_unlock (&mtx->rwlock);
+#else
+ g_static_rw_lock_reader_unlock (&mtx->rwlock);
+#endif
+}
+
+void
+rspamd_rwlock_free (rspamd_rwlock_t *mtx)
+{
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30))
+ g_rw_lock_clear (&mtx->rwlock);
+#endif
+ g_slice_free1 (sizeof (rspamd_rwlock_t), mtx);
+}
+
+struct rspamd_thread_data {
+ gchar *name;
+ gint id;
+ GThreadFunc func;
+ gpointer data;
+};
+
+static gpointer
+rspamd_thread_func (gpointer ud)
+{
+ struct rspamd_thread_data *td = ud;
+ sigset_t s_mask;
+
+ /* Ignore signals in thread */
+ sigemptyset (&s_mask);
+ sigaddset (&s_mask, SIGTERM);
+ sigaddset (&s_mask, SIGINT);
+ sigaddset (&s_mask, SIGHUP);
+ sigaddset (&s_mask, SIGCHLD);
+ sigaddset (&s_mask, SIGUSR1);
+ sigaddset (&s_mask, SIGUSR2);
+ sigaddset (&s_mask, SIGALRM);
+ sigaddset (&s_mask, SIGPIPE);
+
+ sigprocmask (SIG_BLOCK, &s_mask, NULL);
+
+ ud = td->func (td->data);
+ g_free (td->name);
+ g_free (td);
+
+ return ud;
+}
+
+/**
+ * Create new named thread
+ * @param name name pattern
+ * @param func function to start
+ * @param data data to pass to function
+ * @param err error pointer
+ * @return new thread object that can be joined
+ */
+GThread*
+rspamd_create_thread (const gchar *name, GThreadFunc func, gpointer data, GError **err)
+{
+ GThread *new;
+ struct rspamd_thread_data *td;
+ static gint32 id;
+ guint r;
+
+ r = strlen (name);
+ td = g_malloc (sizeof (struct rspamd_thread_data));
+ td->id = ++id;
+ td->name = g_malloc (r + sizeof ("4294967296"));
+ td->func = func;
+ td->data = data;
+
+ rspamd_snprintf (td->name, r + sizeof ("4294967296"), "%s-%d", name, id);
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30))
+ new = g_thread_try_new (td->name, rspamd_thread_func, td, err);
+#else
+ new = g_thread_create (rspamd_thread_func, td, TRUE, err);
+#endif
+
+ return new;
+}
+
+guint32
+murmur32_hash (const guint8 *in, gsize len)
+{
+
+
+ const guint32 c1 = 0xcc9e2d51;
+ const guint32 c2 = 0x1b873593;
+
+ const int nblocks = len / 4;
+ const guint32 *blocks = (const guint32 *)(in);
+ const guint8 *tail;
+ guint32 h = 0;
+ gint i;
+ guint32 k;
+
+ if (in == NULL || len == 0) {
+ return 0;
+ }
+
+ tail = (const guint8 *)(in + (nblocks * 4));
+
+ for (i = 0; i < nblocks; i++) {
+ k = blocks[i];
+
+ k *= c1;
+ k = (k << 15) | (k >> (32 - 15));
+ k *= c2;
+
+ h ^= k;
+ h = (h << 13) | (h >> (32 - 13));
+ h = (h * 5) + 0xe6546b64;
+ }
+
+ k = 0;
+ switch (len & 3) {
+ case 3:
+ k ^= tail[2] << 16;
+ case 2:
+ k ^= tail[1] << 8;
+ case 1:
+ k ^= tail[0];
+ k *= c1;
+ k = (k << 13) | (k >> (32 - 15));
+ k *= c2;
+ h ^= k;
+ };
+
+ h ^= len;
+
+ h ^= h >> 16;
+ h *= 0x85ebca6b;
+ h ^= h >> 13;
+ h *= 0xc2b2ae35;
+ h ^= h >> 16;
+
+ return h;
+}
+
+void
+murmur128_hash (const guint8 *in, gsize len, guint64 out[])
+{
+ const guint64 c1 = 0x87c37b91114253d5ULL;
+ const guint64 c2 = 0x4cf5ad432745937fULL;
+ const gint nblocks = len / 16;
+ const guint64 *blocks = (const guint64 *)(in);
+ const guint8 *tail;
+ guint64 h1 = 0;
+ guint64 h2 = 0;
+ int i;
+ guint64 k1, k2;
+
+ if (in == NULL || len == 0 || out == NULL) {
+ return;
+ }
+
+ tail = (const guint8 *)(in + (nblocks * 16));
+
+ for (i = 0; i < nblocks; i++) {
+ k1 = blocks[i*2+0];
+ k2 = blocks[i*2+1];
+
+ k1 *= c1;
+ k1 = (k1 << 31) | (k1 >> (64 - 31));
+ k1 *= c2;
+ h1 ^= k1;
+
+ h1 = (h1 << 27) | (h1 >> (64 - 27));
+ h1 += h2;
+ h1 = h1*5+0x52dce729;
+
+ k2 *= c2;
+ k2 = (k2 << 33) | (k2 >> (64 - 33));
+ k2 *= c1;
+ h2 ^= k2;
+
+ h2 = (h2 << 31) | (h2 >> (64 - 31));
+ h2 += h1;
+ h2 = h2*5+0x38495ab5;
+ }
+
+ k1 = k2 = 0;
+ switch (len & 15) {
+ case 15:
+ k2 ^= (guint64)(tail[14]) << 48;
+ case 14:
+ k2 ^= (guint64)(tail[13]) << 40;
+ case 13:
+ k2 ^= (guint64)(tail[12]) << 32;
+ case 12:
+ k2 ^= (guint64)(tail[11]) << 24;
+ case 11:
+ k2 ^= (guint64)(tail[10]) << 16;
+ case 10:
+ k2 ^= (guint64)(tail[ 9]) << 8;
+ case 9:
+ k2 ^= (guint64)(tail[ 8]) << 0;
+ k2 *= c2;
+ k2 = (k2 << 33) | (k2 >> (64 - 33));
+ k2 *= c1;
+ h2 ^= k2;
+
+ case 8:
+ k1 ^= (guint64)(tail[ 7]) << 56;
+ case 7:
+ k1 ^= (guint64)(tail[ 6]) << 48;
+ case 6:
+ k1 ^= (guint64)(tail[ 5]) << 40;
+ case 5:
+ k1 ^= (guint64)(tail[ 4]) << 32;
+ case 4:
+ k1 ^= (guint64)(tail[ 3]) << 24;
+ case 3:
+ k1 ^= (guint64)(tail[ 2]) << 16;
+ case 2:
+ k1 ^= (guint64)(tail[ 1]) << 8;
+ case 1:
+ k1 ^= (guint64)(tail[ 0]) << 0;
+ k1 *= c1;
+ k1 = (k1 << 31) | (k1 >> (64 - 31));
+ k1 *= c2;
+ h1 ^= k1;
+ };
+
+ //----------
+ // finalization
+
+ h1 ^= len;
+ h2 ^= len;
+
+ h1 += h2;
+ h2 += h1;
+
+ h1 ^= h1 >> 33;
+ h1 *= 0xff51afd7ed558ccdULL;
+ h1 ^= h1 >> 33;
+ h1 *= 0xc4ceb9fe1a85ec53ULL;
+ h1 ^= h1 >> 33;
+
+ h2 ^= h2 >> 33;
+ h2 *= 0xff51afd7ed558ccdULL;
+ h2 ^= h2 >> 33;
+ h2 *= 0xc4ceb9fe1a85ec53ULL;
+ h2 ^= h2 >> 33;
+
+ h1 += h2;
+ h2 += h1;
+
+ out[0] = h1;
+ out[1] = h2;
+}
+
+struct hash_copy_callback_data {
+ gpointer (*key_copy_func)(gconstpointer data, gpointer ud);
+ gpointer (*value_copy_func)(gconstpointer data, gpointer ud);
+ gpointer ud;
+ GHashTable *dst;
+};
+
+static void
+copy_foreach_callback (gpointer key, gpointer value, gpointer ud)
+{
+ struct hash_copy_callback_data *cb = ud;
+ gpointer nkey, nvalue;
+
+ nkey = cb->key_copy_func ? cb->key_copy_func (key, cb->ud) : (gpointer)key;
+ nvalue = cb->value_copy_func ? cb->value_copy_func (value, cb->ud) : (gpointer)value;
+ g_hash_table_insert (cb->dst, nkey, nvalue);
+}
+/**
+ * Deep copy of one hash table to another
+ * @param src source hash
+ * @param dst destination hash
+ * @param key_copy_func function called to copy or modify keys (or NULL)
+ * @param value_copy_func function called to copy or modify values (or NULL)
+ * @param ud user data for copy functions
+ */
+void rspamd_hash_table_copy (GHashTable *src, GHashTable *dst,
+ gpointer (*key_copy_func)(gconstpointer data, gpointer ud),
+ gpointer (*value_copy_func)(gconstpointer data, gpointer ud),
+ gpointer ud)
+{
+ struct hash_copy_callback_data cb;
+ if (src != NULL && dst != NULL) {
+ cb.key_copy_func = key_copy_func;
+ cb.value_copy_func = value_copy_func;
+ cb.ud = ud;
+ cb.dst = dst;
+ g_hash_table_foreach (src, copy_foreach_callback, &cb);
+ }
+}
+
+/**
+ * Utility function to provide mem_pool copy for rspamd_hash_table_copy function
+ * @param data string to copy
+ * @param ud memory pool to use
+ * @return
+ */
+gpointer
+rspamd_str_pool_copy (gconstpointer data, gpointer ud)
+{
+ rspamd_mempool_t *pool = ud;
+
+ return data ? rspamd_mempool_strdup (pool, data) : NULL;
+}
+
+gboolean
+parse_ipmask_v4 (const char *line, struct in_addr *ina, int *mask)
+{
+ const char *pos;
+ char ip_buf[INET_ADDRSTRLEN + 1], mask_buf[3] = { '\0', '\0', '\0' };
+
+ bzero (ip_buf, sizeof (ip_buf));
+
+ if ((pos = strchr (line, '/')) != NULL) {
+ rspamd_strlcpy (ip_buf, line, MIN ((gsize)(pos - line), sizeof (ip_buf)));
+ rspamd_strlcpy (mask_buf, pos + 1, sizeof (mask_buf));
+ }
+ else {
+ rspamd_strlcpy (ip_buf, line, sizeof (ip_buf));
+ }
+
+ if (!inet_aton (ip_buf, ina)) {
+ return FALSE;
+ }
+
+ if (mask_buf[0] != '\0') {
+ /* Also parse mask */
+ *mask = (mask_buf[0] - '0') * 10 + mask_buf[1] - '0';
+ if (*mask > 32) {
+ return FALSE;
+ }
+ }
+ else {
+ *mask = 32;
+ }
+
+ *mask = G_MAXUINT32 << (32 - *mask);
+
+ return TRUE;
+}
+
+static volatile sig_atomic_t saved_signo[NSIG];
+
+static
+void read_pass_tmp_sig_handler (int s)
+{
+
+ saved_signo[s] = 1;
+}
+
+#ifndef _PATH_TTY
+# define _PATH_TTY "/dev/tty"
+#endif
+
+gint
+rspamd_read_passphrase (gchar *buf, gint size, gint rwflag, gpointer key)
+{
+#ifdef HAVE_PASSPHRASE_H
+ gint len = 0;
+ gchar pass[BUFSIZ];
+
+ if (readpassphrase ("Enter passphrase: ", buf, size, RPP_ECHO_OFF | RPP_REQUIRE_TTY) == NULL) {
+ return 0;
+ }
+
+ return strlen (buf);
+#else
+ struct sigaction sa, savealrm, saveint, savehup, savequit, saveterm;
+ struct sigaction savetstp, savettin, savettou, savepipe;
+ struct termios term, oterm;
+ gint input, output, i;
+ gchar *end, *p, ch;
+
+restart:
+ if ((input = output = open (_PATH_TTY, O_RDWR)) == -1) {
+ errno = ENOTTY;
+ return 0;
+ }
+ if (fcntl (input, F_SETFD, FD_CLOEXEC) == -1) {
+ msg_warn ("fcntl failed: %d, '%s'", errno, strerror (errno));
+ }
+
+ /* Turn echo off */
+ if (tcgetattr (input, &oterm) != 0) {
+ errno = ENOTTY;
+ return 0;
+ }
+ memcpy(&term, &oterm, sizeof(term));
+ term.c_lflag &= ~(ECHO | ECHONL);
+ (void)tcsetattr(input, TCSAFLUSH, &term);
+ (void)write (output, "Enter passphrase: ", sizeof ("Enter passphrase: ") - 1);
+
+ /* Save the current sighandler */
+ for (i = 0; i < NSIG; i++) {
+ saved_signo[i] = 0;
+ }
+ sigemptyset(&sa.sa_mask);
+ sa.sa_flags = 0;
+ sa.sa_handler = read_pass_tmp_sig_handler;
+ (void)sigaction (SIGALRM, &sa, &savealrm);
+ (void)sigaction (SIGHUP, &sa, &savehup);
+ (void)sigaction (SIGINT, &sa, &saveint);
+ (void)sigaction (SIGPIPE, &sa, &savepipe);
+ (void)sigaction (SIGQUIT, &sa, &savequit);
+ (void)sigaction (SIGTERM, &sa, &saveterm);
+ (void)sigaction (SIGTSTP, &sa, &savetstp);
+ (void)sigaction (SIGTTIN, &sa, &savettin);
+ (void)sigaction (SIGTTOU, &sa, &savettou);
+
+ /* Now read a passphrase */
+ p = buf;
+ end = p + size - 1;
+ while (read (input, &ch, 1) == 1 && ch != '\n' && ch != '\r') {
+ if (p < end) {
+ *p++ = ch;
+ }
+ }
+ *p = '\0';
+ (void)write (output, "\n", 1);
+
+ /* Restore terminal state */
+ if (memcmp (&term, &oterm, sizeof (term)) != 0) {
+ while (tcsetattr (input, TCSAFLUSH, &oterm) == -1 &&
+ errno == EINTR && !saved_signo[SIGTTOU]);
+ }
+
+ /* Restore signal handlers */
+ (void)sigaction (SIGALRM, &savealrm, NULL);
+ (void)sigaction (SIGHUP, &savehup, NULL);
+ (void)sigaction (SIGINT, &saveint, NULL);
+ (void)sigaction (SIGQUIT, &savequit, NULL);
+ (void)sigaction (SIGPIPE, &savepipe, NULL);
+ (void)sigaction (SIGTERM, &saveterm, NULL);
+ (void)sigaction (SIGTSTP, &savetstp, NULL);
+ (void)sigaction (SIGTTIN, &savettin, NULL);
+ (void)sigaction (SIGTTOU, &savettou, NULL);
+
+ close (input);
+
+ /* Send signals pending */
+ for (i = 0; i < NSIG; i++) {
+ if (saved_signo[i]) {
+ kill(getpid(), i);
+ switch (i) {
+ case SIGTSTP:
+ case SIGTTIN:
+ case SIGTTOU:
+ goto restart;
+ }
+ }
+ }
+
+ return p - buf;
+#endif
+}
+
+gboolean
+rspamd_ip_is_valid (rspamd_inet_addr_t *addr)
+{
+ const struct in_addr ip4_any = { INADDR_ANY }, ip4_none = { INADDR_NONE };
+ const struct in6_addr ip6_any = IN6ADDR_ANY_INIT;
+
+ gboolean ret = FALSE;
+
+ if (G_LIKELY (addr->af == AF_INET)) {
+ if (memcmp (&addr->addr.s4.sin_addr, &ip4_any, sizeof (struct in_addr)) != 0 &&
+ memcmp (&addr->addr.s4.sin_addr, &ip4_none,
+ sizeof (struct in_addr)) != 0) {
+ ret = TRUE;
+ }
+ }
+ else if (G_UNLIKELY (addr->af == AF_INET6)) {
+ if (memcmp (&addr->addr.s6.sin6_addr, &ip6_any,
+ sizeof (struct in6_addr)) != 0) {
+ ret = TRUE;
+ }
+ }
+
+ return ret;
+}
+
+/*
+ * GString ucl emitting functions
+ */
+static int
+rspamd_gstring_append_character (unsigned char c, size_t len, void *ud)
+{
+ GString *buf = ud;
+ gsize old_len;
+
+ if (len == 1) {
+ g_string_append_c (buf, c);
+ }
+ else {
+ if (buf->allocated_len - buf->len <= len) {
+ old_len = buf->len;
+ g_string_set_size (buf, buf->len + len + 1);
+ buf->len = old_len;
+ }
+ memset (&buf->str[buf->len], c, len);
+ buf->len += len;
+ }
+
+ return 0;
+}
+
+static int
+rspamd_gstring_append_len (const unsigned char *str, size_t len, void *ud)
+{
+ GString *buf = ud;
+
+ g_string_append_len (buf, str, len);
+
+ return 0;
+}
+
+static int
+rspamd_gstring_append_int (int64_t val, void *ud)
+{
+ GString *buf = ud;
+
+ rspamd_printf_gstring (buf, "%L", (intmax_t)val);
+ return 0;
+}
+
+static int
+rspamd_gstring_append_double (double val, void *ud)
+{
+ GString *buf = ud;
+ const double delta = 0.0000001;
+
+ if (val == (double)(int)val) {
+ rspamd_printf_gstring (buf, "%.1f", val);
+ }
+ else if (fabs (val - (double)(int)val) < delta) {
+ /* Write at maximum precision */
+ rspamd_printf_gstring (buf, "%.*g", DBL_DIG, val);
+ }
+ else {
+ rspamd_printf_gstring (buf, "%f", val);
+ }
+
+ return 0;
+}
+
+void
+rspamd_ucl_emit_gstring (ucl_object_t *obj, enum ucl_emitter emit_type, GString *target)
+{
+ struct ucl_emitter_functions func = {
+ .ucl_emitter_append_character = rspamd_gstring_append_character,
+ .ucl_emitter_append_len = rspamd_gstring_append_len,
+ .ucl_emitter_append_int = rspamd_gstring_append_int,
+ .ucl_emitter_append_double = rspamd_gstring_append_double
+ };
+
+ func.ud = target;
+ ucl_object_emit_full (obj, emit_type, &func);
+}
+
+gint
+rspamd_accept_from_socket (gint sock, rspamd_inet_addr_t *addr)
+{
+ gint nfd, serrno;
+ socklen_t len = sizeof (addr->addr.ss);
+
+ if ((nfd = accept (sock, &addr->addr.sa, &len)) == -1) {
+ if (errno == EAGAIN || errno == EINTR || errno == EWOULDBLOCK) {
+ return 0;
+ }
+ return -1;
+ }
+
+ addr->slen = len;
+ addr->af = addr->addr.sa.sa_family;
+
+ if (make_socket_nonblocking (nfd) < 0) {
+ goto out;
+ }
+
+ /* Set close on exec */
+ if (fcntl (nfd, F_SETFD, FD_CLOEXEC) == -1) {
+ msg_warn ("fcntl failed: %d, '%s'", errno, strerror (errno));
+ goto out;
+ }
+
+ return (nfd);
+
+ out:
+ serrno = errno;
+ close (nfd);
+ errno = serrno;
+ return (-1);
+
+}
+
+gboolean
+rspamd_parse_inet_address (rspamd_inet_addr_t *target, const char *src)
+{
+ gboolean ret = FALSE;
+
+ if (inet_pton (AF_INET6, src, &target->addr.s6.sin6_addr) == 1) {
+ target->af = AF_INET6;
+ target->slen = sizeof (target->addr.s6);
+ ret = TRUE;
+ }
+ else if (inet_pton (AF_INET, src, &target->addr.s4.sin_addr) == 1) {
+ target->af = AF_INET;
+ target->slen = sizeof (target->addr.s4);
+ ret = TRUE;
+ }
+
+ target->addr.sa.sa_family = target->af;
+
+ return ret;
+}
+
+const char*
+rspamd_inet_address_to_string (rspamd_inet_addr_t *addr)
+{
+ static char addr_str[INET6_ADDRSTRLEN + 1];
+
+ switch (addr->af) {
+ case AF_INET:
+ return inet_ntop (addr->af, &addr->addr.s4.sin_addr, addr_str,
+ sizeof (addr_str));
+ case AF_INET6:
+ return inet_ntop (addr->af, &addr->addr.s6.sin6_addr, addr_str,
+ sizeof (addr_str));
+ case AF_UNIX:
+ return addr->addr.su.sun_path;
+ }
+
+ return "undefined";
+}
+
+uint16_t
+rspamd_inet_address_get_port (rspamd_inet_addr_t *addr)
+{
+ switch (addr->af) {
+ case AF_INET:
+ return ntohs (addr->addr.s4.sin_port);
+ case AF_INET6:
+ return ntohs (addr->addr.s6.sin6_port);
+ }
+
+ return 0;
+}
diff --git a/src/libutil/util.h b/src/libutil/util.h
new file mode 100644
index 000000000..0605fe87d
--- /dev/null
+++ b/src/libutil/util.h
@@ -0,0 +1,491 @@
+#ifndef RSPAMD_UTIL_H
+#define RSPAMD_UTIL_H
+
+#include "config.h"
+#include "mem_pool.h"
+#include "radix.h"
+#include "statfile.h"
+#include "printf.h"
+#include "fstring.h"
+#include "ucl.h"
+
+struct config_file;
+struct rspamd_main;
+struct workq;
+struct statfile;
+struct classifier_config;
+
+/**
+ * Union that is used for storing sockaddrs
+ */
+union sa_union {
+ struct sockaddr_storage ss;
+ struct sockaddr sa;
+ struct sockaddr_in s4;
+ struct sockaddr_in6 s6;
+ struct sockaddr_un su;
+};
+
+typedef struct _rspamd_inet_addr_s {
+ union sa_union addr;
+ socklen_t slen;
+ int af;
+} rspamd_inet_addr_t;
+
+
+/*
+ * Create socket and bind or connect it to specified address and port
+ */
+gint make_tcp_socket (struct addrinfo *, gboolean is_server, gboolean async);
+/*
+ * Create socket and bind or connect it to specified address and port
+ */
+gint make_udp_socket (struct addrinfo *, gboolean is_server, gboolean async);
+
+/*
+ * Create and bind or connect unix socket
+ */
+gint make_unix_socket (const gchar *, struct sockaddr_un *, gint type, gboolean is_server, gboolean async);
+
+/**
+ * Make a universal socket
+ * @param credits host, ip or path to unix socket
+ * @param port port (used for network sockets)
+ * @param type type of socket (SO_STREAM or SO_DGRAM)
+ * @param async make this socket asynced
+ * @param is_server make this socket as server socket
+ * @param try_resolve try name resolution for a socket (BLOCKING)
+ */
+gint make_universal_socket (const gchar *credits, guint16 port, gint type,
+ gboolean async, gboolean is_server, gboolean try_resolve);
+
+/**
+ * Make a universal sockets
+ * @param credits host, ip or path to unix socket (several items may be separated by ',')
+ * @param port port (used for network sockets)
+ * @param type type of socket (SO_STREAM or SO_DGRAM)
+ * @param async make this socket asynced
+ * @param is_server make this socket as server socket
+ * @param try_resolve try name resolution for a socket (BLOCKING)
+ */
+GList* make_universal_sockets_list (const gchar *credits, guint16 port, gint type,
+ gboolean async, gboolean is_server, gboolean try_resolve);
+/*
+ * Create socketpair
+ */
+gint make_socketpair (gint pair[2]);
+
+/*
+ * Write pid to file
+ */
+gint write_pid (struct rspamd_main *);
+
+/*
+ * Make specified socket non-blocking
+ */
+gint make_socket_nonblocking (gint);
+/*
+ * Make specified socket blocking
+ */
+gint make_socket_blocking (gint);
+
+/*
+ * Poll a sync socket for specified events
+ */
+gint poll_sync_socket (gint fd, gint timeout, short events);
+
+/*
+ * Init signals
+ */
+#ifdef HAVE_SA_SIGINFO
+void init_signals (struct sigaction *sa, void (*sig_handler)(gint, siginfo_t *, void *));
+#else
+void init_signals (struct sigaction *sa, void (*sig_handler)(gint));
+#endif
+
+/*
+ * Send specified signal to each worker
+ */
+void pass_signal_worker (GHashTable *, gint );
+/*
+ * Convert string to lowercase
+ */
+void convert_to_lowercase (gchar *str, guint size);
+
+#ifndef HAVE_SETPROCTITLE
+/*
+ * Process title utility functions
+ */
+gint init_title(gint argc, gchar *argv[], gchar *envp[]);
+gint setproctitle(const gchar *fmt, ...);
+#endif
+
+#ifndef HAVE_PIDFILE
+/*
+ * Pidfile functions from FreeBSD libutil code
+ */
+typedef struct rspamd_pidfh_s {
+ gint pf_fd;
+#ifdef HAVE_PATH_MAX
+ gchar pf_path[PATH_MAX + 1];
+#elif defined(HAVE_MAXPATHLEN)
+ gchar pf_path[MAXPATHLEN + 1];
+#else
+ gchar pf_path[1024 + 1];
+#endif
+ dev_t pf_dev;
+ ino_t pf_ino;
+} rspamd_pidfh_t;
+rspamd_pidfh_t *rspamd_pidfile_open(const gchar *path, mode_t mode, pid_t *pidptr);
+gint rspamd_pidfile_write(rspamd_pidfh_t *pfh);
+gint rspamd_pidfile_close(rspamd_pidfh_t *pfh);
+gint rspamd_pidfile_remove(rspamd_pidfh_t *pfh);
+#else
+typedef struct pidfh rspamd_pidfh_t;
+#define rspamd_pidfile_open pidfile_open
+#define rspamd_pidfile_write pidfile_write
+#define rspamd_pidfile_close pidfile_close
+#define rspamd_pidfile_remove pidfile_remove
+#endif
+
+/*
+ * Replace %r with rcpt value and %f with from value, new string is allocated in pool
+ */
+gchar* resolve_stat_filename (rspamd_mempool_t *pool, gchar *pattern, gchar *rcpt, gchar *from);
+#ifdef HAVE_CLOCK_GETTIME
+/*
+ * Calculate check time with specified resolution of timer
+ */
+const gchar* calculate_check_time (struct timeval *tv, struct timespec *begin, gint resolution, guint32 *scan_ms);
+#else
+const gchar* calculate_check_time (struct timeval *begin, gint resolution, guint32 *scan_ms);
+#endif
+
+/*
+ * File locking functions
+ */
+gboolean lock_file (gint fd, gboolean async);
+gboolean unlock_file (gint fd, gboolean async);
+
+/*
+ * Hash table utility functions for case insensitive hashing
+ */
+guint rspamd_strcase_hash (gconstpointer key);
+gboolean rspamd_strcase_equal (gconstpointer v, gconstpointer v2);
+
+/*
+ * Hash table utility functions for case sensitive hashing
+ */
+guint rspamd_str_hash (gconstpointer key);
+gboolean rspamd_str_equal (gconstpointer v, gconstpointer v2);
+
+
+/*
+ * Hash table utility functions for hashing fixed strings
+ */
+guint fstr_strcase_hash (gconstpointer key);
+gboolean fstr_strcase_equal (gconstpointer v, gconstpointer v2);
+
+/*
+ * Google perf-tools initialization function
+ */
+void gperf_profiler_init (struct config_file *cfg, const gchar *descr);
+
+/*
+ * Workarounds for older versions of glib
+ */
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION < 22))
+void g_ptr_array_unref (GPtrArray *array);
+#endif
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION < 14))
+void g_queue_clear (GQueue *queue);
+#endif
+
+
+/**
+ * Copy src to dest limited to len, in compare with standart strlcpy(3) rspamd strlcpy does not
+ * traverse the whole string and it is possible to use it for non NULL terminated strings. This is
+ * more like memccpy(dst, src, size, '\0')
+ *
+ * @param dst destination string
+ * @param src source string
+ * @param siz length of destination buffer
+ * @return bytes copied
+ */
+gsize rspamd_strlcpy (gchar *dst, const gchar *src, gsize siz);
+
+/**
+ * Lowercase strlcpy variant
+ * @param dst
+ * @param src
+ * @param siz
+ * @return
+ */
+gsize rspamd_strlcpy_tolower (gchar *dst, const gchar *src, gsize siz);
+
+/*
+ * Convert milliseconds to timeval fields
+ */
+#define msec_to_tv(msec, tv) do { (tv)->tv_sec = (msec) / 1000; (tv)->tv_usec = ((msec) - (tv)->tv_sec * 1000) * 1000; } while(0)
+#define double_to_tv(dbl, tv) do { (tv)->tv_sec = (int)(dbl); (tv)->tv_usec = ((dbl) - (int)(dbl))*1000*1000; } while(0)
+#define tv_to_msec(tv) (tv)->tv_sec * 1000 + (tv)->tv_usec / 1000
+
+/* Compare two emails for building emails tree */
+gint compare_email_func (gconstpointer a, gconstpointer b);
+
+/* Compare two urls for building emails tree */
+gint compare_url_func (gconstpointer a, gconstpointer b);
+
+/*
+ * Find string find in string s ignoring case
+ */
+gchar* rspamd_strncasestr (const gchar *s, const gchar *find, gint len);
+
+/*
+ * Try to convert string of length to long
+ */
+gboolean rspamd_strtol (const gchar *s, gsize len, glong *value);
+
+/*
+ * Try to convert string of length to unsigned long
+ */
+gboolean rspamd_strtoul (const gchar *s, gsize len, gulong *value);
+
+/**
+ * Try to allocate a file on filesystem (using fallocate or posix_fallocate)
+ * @param fd descriptor
+ * @param offset offset of file
+ * @param len length to allocate
+ * @return -1 in case of failure
+ */
+gint rspamd_fallocate (gint fd, off_t offset, off_t len);
+
+/**
+ * Return worker's control structure by its type
+ * @param type
+ * @return worker's control structure or NULL
+ */
+extern worker_t* get_worker_by_type (GQuark type);
+
+/**
+ * Utils for working with threads to be compatible with all glib versions
+ */
+typedef struct rspamd_mutex_s {
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30))
+ GMutex mtx;
+#else
+ GStaticMutex mtx;
+#endif
+} rspamd_mutex_t;
+
+typedef struct rspamd_rwlock_s {
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30))
+ GRWLock rwlock;
+#else
+ GStaticRWLock rwlock;
+#endif
+} rspamd_rwlock_t;
+
+
+/**
+ * Create new mutex
+ * @return mutex or NULL
+ */
+rspamd_mutex_t* rspamd_mutex_new (void);
+
+/**
+ * Lock mutex
+ * @param mtx
+ */
+void rspamd_mutex_lock (rspamd_mutex_t *mtx);
+
+/**
+ * Unlock mutex
+ * @param mtx
+ */
+void rspamd_mutex_unlock (rspamd_mutex_t *mtx);
+
+/**
+ * Clear rspamd mutex
+ * @param mtx
+ */
+void rspamd_mutex_free (rspamd_mutex_t *mtx);
+
+/**
+ * Create new rwloc
+ * @return
+ */
+rspamd_rwlock_t* rspamd_rwlock_new (void);
+
+/**
+ * Lock rwlock for writing
+ * @param mtx
+ */
+void rspamd_rwlock_writer_lock (rspamd_rwlock_t *mtx);
+
+/**
+ * Lock rwlock for reading
+ * @param mtx
+ */
+void rspamd_rwlock_reader_lock (rspamd_rwlock_t *mtx);
+
+/**
+ * Unlock rwlock from writing
+ * @param mtx
+ */
+void rspamd_rwlock_writer_unlock (rspamd_rwlock_t *mtx);
+
+/**
+ * Unlock rwlock from reading
+ * @param mtx
+ */
+void rspamd_rwlock_reader_unlock (rspamd_rwlock_t *mtx);
+
+/**
+ * Free rwlock
+ * @param mtx
+ */
+void rspamd_rwlock_free (rspamd_rwlock_t *mtx);
+
+static inline void
+rspamd_cond_wait (GCond *cond, rspamd_mutex_t *mtx)
+{
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30))
+ g_cond_wait (cond, &mtx->mtx);
+#else
+ g_cond_wait (cond, g_static_mutex_get_mutex (&mtx->mtx));
+#endif
+}
+
+/**
+ * Create new named thread
+ * @param name name pattern
+ * @param func function to start
+ * @param data data to pass to function
+ * @param err error pointer
+ * @return new thread object that can be joined
+ */
+GThread* rspamd_create_thread (const gchar *name, GThreadFunc func, gpointer data, GError **err);
+
+/**
+ * Return 32bit murmur hash value for specified input
+ * @param in input data
+ * @param len length of the input data
+ * @code
+ * MurmurHash3 was created by Austin Appleby in 2008. The cannonical
+ * implementations are in C++ and placed in the public.
+ *
+ * https://sites.google.com/site/murmurhash/
+ *
+ * Seungyoung Kim has ported it's cannonical implementation to C language
+ * in 2012 and published it as a part of qLibc component.
+ * @endcode
+ * @return
+ */
+guint32 murmur32_hash (const guint8 *in, gsize len);
+
+/**
+ * Return 32bit murmur hash value for specified input
+ * @param in input data
+ * @param len length of the input data
+ * @param out array of 2 guint64 variables
+ * @code
+ * MurmurHash3 was created by Austin Appleby in 2008. The cannonical
+ * implementations are in C++ and placed in the public.
+ *
+ * https://sites.google.com/site/murmurhash/
+ *
+ * Seungyoung Kim has ported it's cannonical implementation to C language
+ * in 2012 and published it as a part of qLibc component.
+ * @endcode
+ * @return
+ */
+void murmur128_hash (const guint8 *in, gsize len, guint64 out[]);
+
+/**
+ * Deep copy of one hash table to another
+ * @param src source hash
+ * @param dst destination hash
+ * @param key_copy_func function called to copy or modify keys (or NULL)
+ * @param value_copy_func function called to copy or modify values (or NULL)
+ * @param ud user data for copy functions
+ */
+void rspamd_hash_table_copy (GHashTable *src, GHashTable *dst,
+ gpointer (*key_copy_func)(gconstpointer data, gpointer ud),
+ gpointer (*value_copy_func)(gconstpointer data, gpointer ud),
+ gpointer ud);
+
+/**
+ * Utility function to provide mem_pool copy for rspamd_hash_table_copy function
+ * @param data string to copy
+ * @param ud memory pool to use
+ * @return
+ */
+gpointer rspamd_str_pool_copy (gconstpointer data, gpointer ud);
+
+/**
+ * Parse ipv4 address with optional mask in CIDR format
+ * @param line cidr notation of ipv4 address
+ * @param ina destination address
+ * @param mask destination mask
+ * @return
+ */
+gboolean parse_ipmask_v4 (const char *line, struct in_addr *ina, int *mask);
+
+/**
+ * Read passphrase from tty
+ * @param buf buffer to fill with a password
+ * @param size size of the buffer
+ * @param rwflag unused flag
+ * @param key unused key
+ * @return size of password read
+ */
+gint rspamd_read_passphrase (gchar *buf, gint size, gint rwflag, gpointer key);
+
+/**
+ * Check whether specified ip is valid (not INADDR_ANY or INADDR_NONE) for ipv4 or ipv6
+ * @param ptr pointer to struct in_addr or struct in6_addr
+ * @param af address family (AF_INET or AF_INET6)
+ * @return TRUE if the address is valid
+ */
+gboolean rspamd_ip_is_valid (rspamd_inet_addr_t *addr);
+
+/**
+ * Emit UCL object to gstring
+ * @param obj object to emit
+ * @param emit_type emitter type
+ * @param target target string
+ */
+void rspamd_ucl_emit_gstring (ucl_object_t *obj, enum ucl_emitter emit_type, GString *target);
+
+/**
+ * Accept from listening socket filling addr structure
+ * @param sock listening socket
+ * @param addr
+ * @return
+ */
+gint rspamd_accept_from_socket (gint sock, rspamd_inet_addr_t *addr);
+
+/**
+ * Try to parse address from string
+ * @param target target to fill
+ * @param src IP string representation
+ * @return TRUE if addr has been parsed
+ */
+gboolean rspamd_parse_inet_address (rspamd_inet_addr_t *target, const char *src);
+
+/**
+ * Returns string representation of inet address
+ * @param addr
+ * @return statically allocated string pointer (not thread safe)
+ */
+const char* rspamd_inet_address_to_string (rspamd_inet_addr_t *addr);
+
+/**
+ * Returns port number for the specified inet address in host byte order
+ * @param addr
+ * @return
+ */
+uint16_t rspamd_inet_address_get_port (rspamd_inet_addr_t *addr);
+
+#endif