diff options
Diffstat (limited to 'src/libutil')
35 files changed, 15063 insertions, 0 deletions
diff --git a/src/libutil/CMakeLists.txt b/src/libutil/CMakeLists.txt new file mode 100644 index 000000000..2a5ab46c5 --- /dev/null +++ b/src/libutil/CMakeLists.txt @@ -0,0 +1,50 @@ +# Librspamd-util +SET(LIBRSPAMDUTILSRC aio_event.c + bloom.c + diff.c + fstring.c + fuzzy.c + hash.c + http.c + logger.c + map.c + memcached.c + mem_pool.c + printf.c + radix.c + rrd.c + trie.c + upstream.c + util.c) +# Rspamdutil +ADD_LIBRARY(rspamd-util ${LINK_TYPE} ${LIBRSPAMDUTILSRC}) +IF(CMAKE_COMPILER_IS_GNUCC) +SET_TARGET_PROPERTIES(rspamd-util PROPERTIES COMPILE_FLAGS "-fno-strict-aliasing") +ENDIF(CMAKE_COMPILER_IS_GNUCC) + + +TARGET_LINK_LIBRARIES(rspamd-util ${RSPAMD_REQUIRED_LIBRARIES}) +TARGET_LINK_LIBRARIES(rspamd-util pcre) +TARGET_LINK_LIBRARIES(rspamd-util ucl) +TARGET_LINK_LIBRARIES(rspamd-util ottery) +TARGET_LINK_LIBRARIES(rspamd-util rspamd-http-parser) +TARGET_LINK_LIBRARIES(rspamd-util event) +TARGET_LINK_LIBRARIES(rspamd-util xxhash) +IF(OPENSSL_FOUND) + TARGET_LINK_LIBRARIES(rspamd-util ${OPENSSL_LIBRARIES}) +ENDIF(OPENSSL_FOUND) + +IF(NOT DEBIAN_BUILD) +SET_TARGET_PROPERTIES(rspamd-util PROPERTIES VERSION ${RSPAMD_VERSION}) +ENDIF(NOT DEBIAN_BUILD) + +IF(GLIB_COMPAT) + INCLUDE_DIRECTORIES("${CMAKE_CURRENT_SOURCE_DIR}/contrib/lgpl") + TARGET_LINK_LIBRARIES(rspamd-util glibadditions) +ENDIF(GLIB_COMPAT) + +IF(NO_SHARED MATCHES "OFF") + INSTALL(TARGETS rspamd-util + LIBRARY DESTINATION ${LIBDIR} + PUBLIC_HEADER DESTINATION include) +ENDIF(NO_SHARED MATCHES "OFF")
\ No newline at end of file diff --git a/src/libutil/aio_event.c b/src/libutil/aio_event.c new file mode 100644 index 000000000..ccda37083 --- /dev/null +++ b/src/libutil/aio_event.c @@ -0,0 +1,487 @@ +/* Copyright (c) 2010-2011, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "aio_event.h" +#include "main.h" + +#ifdef HAVE_SYS_EVENTFD_H +#include <sys/eventfd.h> +#endif + +#ifdef HAVE_AIO_H +#include <aio.h> +#endif + +/* Linux syscall numbers */ +#if defined(__i386__) +# define SYS_io_setup 245 +# define SYS_io_destroy 246 +# define SYS_io_getevents 247 +# define SYS_io_submit 248 +# define SYS_io_cancel 249 +#elif defined(__x86_64__) +# define SYS_io_setup 206 +# define SYS_io_destroy 207 +# define SYS_io_getevents 208 +# define SYS_io_submit 209 +# define SYS_io_cancel 210 +#else +# warning "aio is not supported on this platform, please contact author for details" +# define SYS_io_setup 0 +# define SYS_io_destroy 0 +# define SYS_io_getevents 0 +# define SYS_io_submit 0 +# define SYS_io_cancel 0 +#endif + +#define SYS_eventfd 323 +#define MAX_AIO_EV 64 + +struct io_cbdata { + gint fd; + rspamd_aio_cb cb; + guint64 len; + gpointer buf; + gpointer io_buf; + gpointer ud; +}; + +#ifdef LINUX + +/* Linux specific mappings and utilities to avoid using of libaio */ + +typedef unsigned long aio_context_t; + +typedef enum io_iocb_cmd { + IO_CMD_PREAD = 0, + IO_CMD_PWRITE = 1, + + IO_CMD_FSYNC = 2, + IO_CMD_FDSYNC = 3, + + IO_CMD_POLL = 5, + IO_CMD_NOOP = 6, +} io_iocb_cmd_t; + +#if defined(__LITTLE_ENDIAN) +#define PADDED(x,y) x, y +#elif defined(__BIG_ENDIAN) +#define PADDED(x,y) y, x +#else +#error edit for your odd byteorder. +#endif + +/* + * we always use a 64bit off_t when communicating + * with userland. its up to libraries to do the + * proper padding and aio_error abstraction + */ + +struct iocb { + /* these are internal to the kernel/libc. */ + guint64 aio_data; /* data to be returned in event's data */ + guint32 PADDED(aio_key, aio_reserved1); + /* the kernel sets aio_key to the req # */ + + /* common fields */ + guint16 aio_lio_opcode; /* see IOCB_CMD_ above */ + gint16 aio_reqprio; + guint32 aio_fildes; + + guint64 aio_buf; + guint64 aio_nbytes; + gint64 aio_offset; + + /* extra parameters */ + guint64 aio_reserved2; /* TODO: use this for a (struct sigevent *) */ + + /* flags for the "struct iocb" */ + guint32 aio_flags; + + /* + * if the IOCB_FLAG_RESFD flag of "aio_flags" is set, this is an + * eventfd to signal AIO readiness to + */ + guint32 aio_resfd; +}; + +struct io_event { + guint64 data; /* the data field from the iocb */ + guint64 obj; /* what iocb this event came from */ + gint64 res; /* result code for this event */ + gint64 res2; /* secondary result */ +}; + +/* Linux specific io calls */ +static int +io_setup (guint nr_reqs, aio_context_t *ctx) +{ + return syscall (SYS_io_setup, nr_reqs, ctx); +} + +static int +io_destroy (aio_context_t ctx) +{ + return syscall (SYS_io_destroy, ctx); +} + +static int +io_getevents (aio_context_t ctx, long min_nr, long nr, struct io_event *events, struct timespec *tmo) +{ + return syscall (SYS_io_getevents, ctx, min_nr, nr, events, tmo); +} + +static int +io_submit (aio_context_t ctx, long n, struct iocb **paiocb) +{ + return syscall (SYS_io_submit, ctx, n, paiocb); +} + +static int +io_cancel (aio_context_t ctx, struct iocb *iocb, struct io_event *result) +{ + return syscall (SYS_io_cancel, ctx, iocb, result); +} + +# ifndef HAVE_SYS_EVENTFD_H +static int +eventfd (guint initval, guint flags) +{ + return syscall (SYS_eventfd, initval); +} +# endif + +#endif + +/** + * AIO context + */ +struct aio_context { + struct event_base *base; + gboolean has_aio; /**< Whether we have aio support on a system */ +#ifdef LINUX + /* Eventfd variant */ + gint event_fd; + struct event eventfd_ev; + aio_context_t io_ctx; +#elif defined(HAVE_AIO_H) + /* POSIX aio */ + struct event rtsigs[128]; +#endif +}; + +#ifdef LINUX +/* Eventfd read callback */ +static void +rspamd_eventfdcb (gint fd, gshort what, gpointer ud) +{ + struct aio_context *ctx = ud; + guint64 ready; + gint done, i; + struct io_event event[32]; + struct timespec ts; + struct io_cbdata *ev_data; + + /* Eventfd returns number of events ready got from kernel */ + if (read (fd, &ready, 8) != 8) { + if (errno == EAGAIN) { + return; + } + msg_err ("eventfd read returned error: %s", strerror (errno)); + } + + ts.tv_sec = 0; + ts.tv_nsec = 0; + + while (ready) { + /* Get events ready */ + done = io_getevents (ctx->io_ctx, 1, 32, event, &ts); + + if (done > 0) { + ready -= done; + + for (i = 0; i < done; i ++) { + ev_data = (struct io_cbdata *) (uintptr_t) event[i].data; + /* Call this callback */ + ev_data->cb (ev_data->fd, event[i].res, ev_data->len, ev_data->buf, ev_data->ud); + if (ev_data->io_buf) { + free (ev_data->io_buf); + } + g_slice_free1 (sizeof (struct io_cbdata), ev_data); + } + } + else if (done == 0) { + /* No more events are ready */ + return; + } + else { + msg_err ("io_getevents failed: %s", strerror (errno)); + return; + } + } +} + +#endif + +/** + * Initialize aio with specified event base + */ +struct aio_context* +rspamd_aio_init (struct event_base *base) +{ + struct aio_context *new; + + /* First of all we need to detect which type of aio we can try to use */ + new = g_malloc0 (sizeof (struct aio_context)); + new->base = base; + +#ifdef LINUX + /* On linux we are trying to use io (3) and eventfd for notifying */ + new->event_fd = eventfd (0, 0); + if (new->event_fd == -1) { + msg_err ("eventfd failed: %s", strerror (errno)); + } + else { + /* Set this socket non-blocking */ + if (make_socket_nonblocking (new->event_fd) == -1) { + msg_err ("non blocking for eventfd failed: %s", strerror (errno)); + close (new->event_fd); + } + else { + event_set (&new->eventfd_ev, new->event_fd, EV_READ|EV_PERSIST, rspamd_eventfdcb, new); + event_base_set (new->base, &new->eventfd_ev); + event_add (&new->eventfd_ev, NULL); + if (io_setup (MAX_AIO_EV, &new->io_ctx) == -1) { + msg_err ("io_setup failed: %s", strerror (errno)); + close (new->event_fd); + } + else { + new->has_aio = TRUE; + } + } + } +#elif defined(HAVE_AIO_H) + /* TODO: implement this */ +#endif + + return new; +} + +/** + * Open file for aio + */ +gint +rspamd_aio_open (struct aio_context *ctx, const gchar *path, int flags) +{ + gint fd = -1; + /* Fallback */ + if (!ctx->has_aio) { + return open (path, flags); + } +#ifdef LINUX + + fd = open (path, flags | O_DIRECT); + + return fd; +#elif defined(HAVE_AIO_H) + fd = open (path, flags); +#endif + + return fd; +} + +/** + * Asynchronous read of file + */ +gint +rspamd_aio_read (gint fd, gpointer buf, guint64 len, guint64 offset, struct aio_context *ctx, rspamd_aio_cb cb, gpointer ud) +{ + struct io_cbdata *cbdata; + gint r = -1; + + if (ctx->has_aio) { +#ifdef LINUX + struct iocb *iocb[1]; + + cbdata = g_slice_alloc (sizeof (struct io_cbdata)); + cbdata->cb = cb; + cbdata->buf = buf; + cbdata->len = len; + cbdata->ud = ud; + cbdata->fd = fd; + cbdata->io_buf = NULL; + + iocb[0] = alloca (sizeof (struct iocb)); + memset (iocb[0], 0, sizeof (struct iocb)); + iocb[0]->aio_fildes = fd; + iocb[0]->aio_lio_opcode = IO_CMD_PREAD; + iocb[0]->aio_reqprio = 0; + iocb[0]->aio_buf = (guint64)((uintptr_t)buf); + iocb[0]->aio_nbytes = len; + iocb[0]->aio_offset = offset; + iocb[0]->aio_flags |= (1 << 0) /* IOCB_FLAG_RESFD */; + iocb[0]->aio_resfd = ctx->event_fd; + iocb[0]->aio_data = (guint64)((uintptr_t)cbdata); + + /* Iocb is copied to kernel internally, so it is safe to put it on stack */ + if (io_submit (ctx->io_ctx, 1, iocb) == 1) { + return len; + } + else { + if (errno == EAGAIN || errno == ENOSYS) { + /* Fall back to sync read */ + goto blocking; + } + return -1; + } + +#elif defined(HAVE_AIO_H) +#endif + } + else { + /* Blocking variant */ +blocking: +#ifdef _LARGEFILE64_SOURCE + r = lseek64 (fd, offset, SEEK_SET); +#else + r = lseek (fd, offset, SEEK_SET); +#endif + if (r > 0) { + r = read (fd, buf, len); + if (r >= 0) { + cb (fd, 0, r, buf, ud); + } + else { + cb (fd, r, -1, buf, ud); + } + } + } + + return r; +} + +/** + * Asynchronous write of file + */ +gint +rspamd_aio_write (gint fd, gpointer buf, guint64 len, guint64 offset, struct aio_context *ctx, rspamd_aio_cb cb, gpointer ud) +{ + struct io_cbdata *cbdata; + gint r = -1; + + if (ctx->has_aio) { +#ifdef LINUX + struct iocb *iocb[1]; + + cbdata = g_slice_alloc (sizeof (struct io_cbdata)); + cbdata->cb = cb; + cbdata->buf = buf; + cbdata->len = len; + cbdata->ud = ud; + cbdata->fd = fd; + /* We need to align pointer on boundary of 512 bytes here */ + if (posix_memalign (&cbdata->io_buf, 512, len) != 0) { + return -1; + } + memcpy (cbdata->io_buf, buf, len); + + iocb[0] = alloca (sizeof (struct iocb)); + memset (iocb[0], 0, sizeof (struct iocb)); + iocb[0]->aio_fildes = fd; + iocb[0]->aio_lio_opcode = IO_CMD_PWRITE; + iocb[0]->aio_reqprio = 0; + iocb[0]->aio_buf = (guint64)((uintptr_t)cbdata->io_buf); + iocb[0]->aio_nbytes = len; + iocb[0]->aio_offset = offset; + iocb[0]->aio_flags |= (1 << 0) /* IOCB_FLAG_RESFD */; + iocb[0]->aio_resfd = ctx->event_fd; + iocb[0]->aio_data = (guint64)((uintptr_t)cbdata); + + /* Iocb is copied to kernel internally, so it is safe to put it on stack */ + if (io_submit (ctx->io_ctx, 1, iocb) == 1) { + return len; + } + else { + if (errno == EAGAIN || errno == ENOSYS) { + /* Fall back to sync read */ + goto blocking; + } + return -1; + } + +#elif defined(HAVE_AIO_H) +#endif + } + else { + /* Blocking variant */ +blocking: +#ifdef _LARGEFILE64_SOURCE + r = lseek64 (fd, offset, SEEK_SET); +#else + r = lseek (fd, offset, SEEK_SET); +#endif + if (r > 0) { + r = write (fd, buf, len); + if (r >= 0) { + cb (fd, 0, r, buf, ud); + } + else { + cb (fd, r, -1, buf, ud); + } + } + } + + return r; +} + +/** + * Close of aio operations + */ +gint +rspamd_aio_close (gint fd, struct aio_context *ctx) +{ + gint r = -1; + + if (ctx->has_aio) { +#ifdef LINUX + struct iocb iocb; + struct io_event ev; + + memset (&iocb, 0, sizeof (struct iocb)); + iocb.aio_fildes = fd; + iocb.aio_lio_opcode = IO_CMD_NOOP; + + /* Iocb is copied to kernel internally, so it is safe to put it on stack */ + r = io_cancel (ctx->io_ctx, &iocb, &ev); + close (fd); + return r; + +#elif defined(HAVE_AIO_H) +#endif + } + + r = close (fd); + + return r; +} diff --git a/src/libutil/aio_event.h b/src/libutil/aio_event.h new file mode 100644 index 000000000..45f6015de --- /dev/null +++ b/src/libutil/aio_event.h @@ -0,0 +1,67 @@ +/* Copyright (c) 2010-2011, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +#ifndef AIO_EVENT_H_ +#define AIO_EVENT_H_ + +#include "config.h" + +/** + * AIO context + */ +struct aio_context; + +/** + * Callback for notifying + */ +typedef void (*rspamd_aio_cb) (gint fd, gint res, guint64 len, gpointer data, gpointer ud); + +/** + * Initialize aio with specified event base + */ +struct aio_context* rspamd_aio_init (struct event_base *base); + +/** + * Open file for aio + */ +gint rspamd_aio_open (struct aio_context *ctx, const gchar *path, int flags); + +/** + * Asynchronous read of file + */ +gint rspamd_aio_read (gint fd, gpointer buf, guint64 len, guint64 offset, + struct aio_context *ctx, rspamd_aio_cb cb, gpointer ud); + +/** + * Asynchronous write of file + */ +gint rspamd_aio_write (gint fd, gpointer buf, guint64 len, guint64 offset, + struct aio_context *ctx, rspamd_aio_cb cb, gpointer ud); + +/** + * Close of aio operations + */ +gint rspamd_aio_close (gint fd, struct aio_context *ctx); + +#endif /* AIO_EVENT_H_ */ diff --git a/src/libutil/bloom.c b/src/libutil/bloom.c new file mode 100644 index 000000000..f857d2e49 --- /dev/null +++ b/src/libutil/bloom.c @@ -0,0 +1,153 @@ +/* + * Copyright (c) 2009-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "bloom.h" +#include "xxhash.h" + +/* 4 bits are used for counting (implementing delete operation) */ +#define SIZE_BIT 4 + +/* These macroes are for 4 bits for counting element */ +#define INCBIT(a, n, acc) do { \ + acc = a[n * SIZE_BIT / CHAR_BIT] & (0xF << (n % (CHAR_BIT / SIZE_BIT) * SIZE_BIT)); \ + acc ++; \ + acc &= 0xF; \ + \ + a[n * SIZE_BIT / CHAR_BIT] &= (0xF << (4 - (n % (CHAR_BIT/SIZE_BIT) * SIZE_BIT))); \ + a[n * SIZE_BIT / CHAR_BIT] |= (acc << (n % (CHAR_BIT/SIZE_BIT) * SIZE_BIT)); \ +} while (0); + +#define DECBIT(a, n, acc) do { \ + acc = a[n * SIZE_BIT / CHAR_BIT] & (0xF << (n % (CHAR_BIT / SIZE_BIT) * SIZE_BIT)); \ + acc --; \ + acc &= 0xF; \ + \ + a[n * SIZE_BIT / CHAR_BIT] &= (0xF << (4 - (n % (CHAR_BIT/SIZE_BIT) * SIZE_BIT))); \ + a[n * SIZE_BIT / CHAR_BIT] |= (acc << (n % (CHAR_BIT/SIZE_BIT) * SIZE_BIT)); \ +} while (0); + +#define GETBIT(a, n) (a[n * SIZE_BIT / CHAR_BIT] & (0xF << (n % (CHAR_BIT/SIZE_BIT) * SIZE_BIT))) + +/* Common hash functions */ + + +rspamd_bloom_filter_t * +rspamd_bloom_create (size_t size, size_t nfuncs, ...) +{ + rspamd_bloom_filter_t *bloom; + va_list l; + gsize n; + + if (!(bloom = g_malloc (sizeof (rspamd_bloom_filter_t)))) { + return NULL; + } + if (!(bloom->a = g_new0 (gchar, (size + CHAR_BIT - 1) / CHAR_BIT * SIZE_BIT))) { + g_free (bloom); + return NULL; + } + if (!(bloom->seeds = g_new0 (guint32, nfuncs))) { + g_free (bloom->a); + g_free (bloom); + return NULL; + } + + va_start (l, nfuncs); + for (n = 0; n < nfuncs; ++n) { + bloom->seeds[n] = va_arg (l, guint32); + } + va_end (l); + + bloom->nfuncs = nfuncs; + bloom->asize = size; + + return bloom; +} + +void +rspamd_bloom_destroy (rspamd_bloom_filter_t * bloom) +{ + g_free (bloom->a); + g_free (bloom->seeds); + g_free (bloom); +} + +gboolean +rspamd_bloom_add (rspamd_bloom_filter_t * bloom, const gchar *s) +{ + size_t n, len; + u_char t; + guint v; + + if (s == NULL) { + return FALSE; + } + len = strlen (s); + for (n = 0; n < bloom->nfuncs; ++n) { + v = XXH32 (s, len, bloom->seeds[n]) % bloom->asize; + INCBIT (bloom->a, v, t); + } + + return TRUE; +} + +gboolean +rspamd_bloom_del (rspamd_bloom_filter_t * bloom, const gchar *s) +{ + size_t n, len; + u_char t; + guint v; + + if (s == NULL) { + return FALSE; + } + len = strlen (s); + for (n = 0; n < bloom->nfuncs; ++n) { + v = XXH32 (s, len, bloom->seeds[n]) % bloom->asize; + DECBIT (bloom->a, v, t); + } + + return TRUE; + +} + +gboolean +rspamd_bloom_check (rspamd_bloom_filter_t * bloom, const gchar *s) +{ + size_t n, len; + guint v; + + if (s == NULL) { + return FALSE; + } + len = strlen (s); + for (n = 0; n < bloom->nfuncs; ++n) { + v = XXH32 (s, len, bloom->seeds[n]) % bloom->asize; + if (!(GETBIT (bloom->a, v))) { + return FALSE; + } + } + + return TRUE; +} diff --git a/src/libutil/bloom.h b/src/libutil/bloom.h new file mode 100644 index 000000000..380143c80 --- /dev/null +++ b/src/libutil/bloom.h @@ -0,0 +1,48 @@ +#ifndef __RSPAMD_BLOOM_H__ +#define __RSPAMD_BLOOM_H__ + +#include "config.h" + +typedef struct rspamd_bloom_filter_s { + size_t asize; + gchar *a; + size_t nfuncs; + guint32 *seeds; +} rspamd_bloom_filter_t; + + +/* + * Some random uint32 seeds for hashing + */ +#define RSPAMD_DEFAULT_BLOOM_HASHES 8, 0x61782caaU, 0x79ab8141U, 0xe45ee2d1U, \ + 0xf97542d1U, 0x1e2623edU, 0xf5a23cfeU, 0xa41b2508U, 0x85abdce8U + +/* + * Create new bloom filter + * @param size length of bloom buffer + * @param nfuncs number of hash functions + * @param ... hash functions list + */ +rspamd_bloom_filter_t* rspamd_bloom_create (size_t size, size_t nfuncs, ...); + +/* + * Destroy bloom filter + */ +void rspamd_bloom_destroy (rspamd_bloom_filter_t * bloom); + +/* + * Add a string to bloom filter + */ +gboolean rspamd_bloom_add (rspamd_bloom_filter_t * bloom, const gchar *s); + +/* + * Delete a string from bloom filter + */ +gboolean rspamd_bloom_del (rspamd_bloom_filter_t * bloom, const gchar *s); + +/* + * Check whether this string is in bloom filter (algorithm produces FALSE-POSITIVES, so result must be checked if it is positive) + */ +gboolean rspamd_bloom_check (rspamd_bloom_filter_t * bloom, const gchar *s); + +#endif diff --git a/src/libutil/diff.c b/src/libutil/diff.c new file mode 100644 index 000000000..4038d8680 --- /dev/null +++ b/src/libutil/diff.c @@ -0,0 +1,445 @@ +/* diff - compute a shortest edit script (SES) given two sequences + * Copyright (c) 2004 Michael B. Allen <mba2000 ioplex.com> + * Copyright (c) 2010-2014, Vsevolod Stakhov + * + * The MIT License + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/* This algorithm is basically Myers' solution to SES/LCS with + * the Hirschberg linear space refinement as described in the + * following publication: + * + * E. Myers, ``An O(ND) Difference Algorithm and Its Variations,'' + * Algorithmica 1, 2 (1986), 251-266. + * http://www.cs.arizona.edu/people/gene/PAPERS/diff.ps + * + * This is the same algorithm used by GNU diff(1). + */ + + +#include "config.h" +#include "diff.h" + + +#define FV(k) _v(ctx, (k), 0) +#define RV(k) _v(ctx, (k), 1) + +#define MAX_DIFF 1024 + +struct _ctx +{ + GArray *buf; + GArray *ses; + gint si; + gint dmax; +}; + +struct middle_snake +{ + gint x, y, u, v; +}; + +static +void maybe_resize_array(GArray *arr, guint k) +{ + if (k > arr->len) { + g_array_set_size (arr, k); + } + +} + +static void +_setv(struct _ctx *ctx, gint k, gint r, gint val) +{ + gint j; + gint *i; + /* Pack -N to N ginto 0 to N * 2 + */ + j = k <= 0 ? -k * 4 + r : k * 4 + (r - 2); + + maybe_resize_array (ctx->buf, j); + i = (gint *) &g_array_index (ctx->buf, gint, j); + *i = val; +} + +static gint +_v(struct _ctx *ctx, gint k, gint r) +{ + gint j; + + j = k <= 0 ? -k * 4 + r : k * 4 + (r - 2); + + return *((gint *) &g_array_index (ctx->buf, gint, j)); +} + +static gint +_find_middle_snake(const void *a, gint aoff, gint n, const void *b, + gint boff, gint m, struct _ctx *ctx, struct middle_snake *ms) +{ + gint delta, odd, mid, d; + + delta = n - m; + odd = delta & 1; + mid = (n + m) / 2; + mid += odd; + + _setv (ctx, 1, 0, 0); + _setv (ctx, delta - 1, 1, n); + + for (d = 0; d <= mid; d++) { + gint k, x, y; + + if ((2 * d - 1) >= ctx->dmax) { + return ctx->dmax; + } + + for (k = d; k >= -d; k -= 2) { + if (k == -d || (k != d && FV(k - 1) < FV(k + 1))) { + x = FV(k + 1); + } + else { + x = FV(k - 1) + 1; + } + y = x - k; + + ms->x = x; + ms->y = y; + const guchar *a0 = (const guchar *) a + aoff; + const guchar *b0 = (const guchar *) b + boff; + while (x < n && y < m && a0[x] == b0[y]) { + x++; + y++; + } + _setv (ctx, k, 0, x); + + if (odd && k >= (delta - (d - 1)) && k <= (delta + (d - 1))) { + if (x >= RV(k)) { + ms->u = x; + ms->v = y; + return 2 * d - 1; + } + } + } + for (k = d; k >= -d; k -= 2) { + gint kr = (n - m) + k; + + if (k == d || (k != -d && RV(kr - 1) < RV(kr + 1))) { + x = RV(kr - 1); + } + else { + x = RV(kr + 1) - 1; + } + y = x - kr; + + ms->u = x; + ms->v = y; + const guchar *a0 = (const guchar *) a + aoff; + const guchar *b0 = (const guchar *) b + boff; + while (x > 0 && y > 0 && a0[x - 1] == b0[y - 1]) { + x--; + y--; + } + _setv (ctx, kr, 1, x); + + if (!odd && kr >= -d && kr <= d) { + if (x <= FV(kr)) { + ms->x = x; + ms->y = y; + return 2 * d; + } + } + } + } + + errno = EFAULT; + + return -1; +} + +static void +_edit(struct _ctx *ctx, gint op, gint off, gint len) +{ + struct diff_edit *e = NULL, newe; + + if (len == 0 || ctx->ses == NULL) { + return; + } + /* + * Add an edit to the SES (or + * coalesce if the op is the same) + */ + if (ctx->ses->len != 0) { + e = &g_array_index (ctx->ses, struct diff_edit, ctx->ses->len - 1); + } + if (e == NULL || e->op != op) { + newe.op = op; + newe.off = off; + newe.len = len; + g_array_append_val (ctx->ses, newe); + } + else { + e->len += len; + } +} + +static gint +_ses(const void *a, gint aoff, gint n, const void *b, gint boff, + gint m, struct _ctx *ctx) +{ + struct middle_snake ms = { + .x = 0, + .y = 0, + .u = 0, + .v = 0 + }; + gint d; + + if (n == 0) { + _edit (ctx, DIFF_INSERT, boff, m); + d = m; + } + else if (m == 0) { + _edit (ctx, DIFF_DELETE, aoff, n); + d = n; + } + else { + /* Find the middle "snake" around which we + * recursively solve the sub-problems. + */ + d = _find_middle_snake (a, aoff, n, b, boff, m, ctx, &ms); + if (d == -1) { + return -1; + } + else if (d >= ctx->dmax) { + return ctx->dmax; + } + else if (ctx->ses == NULL) { + return d; + } + else if (d > 1) { + if (_ses (a, aoff, ms.x, b, boff, ms.y, ctx) == -1) { + return -1; + } + + _edit (ctx, DIFF_MATCH, aoff + ms.x, ms.u - ms.x); + + aoff += ms.u; + boff += ms.v; + n -= ms.u; + m -= ms.v; + if (_ses (a, aoff, n, b, boff, m, ctx) == -1) { + return -1; + } + } + else { + gint x = ms.x; + gint u = ms.u; + + /* There are only 4 base cases when the + * edit distance is 1. + * + * n > m m > n + * + * - | + * \ \ x != u + * \ \ + * + * \ \ + * \ \ x == u + * - | + */ + + if (m > n) { + if (x == u) { + _edit (ctx, DIFF_MATCH, aoff, n); + _edit (ctx, DIFF_INSERT, boff + (m - 1), 1); + } + else { + _edit (ctx, DIFF_INSERT, boff, 1); + _edit (ctx, DIFF_MATCH, aoff, n); + } + } + else { + if (x == u) { + _edit (ctx, DIFF_MATCH, aoff, m); + _edit (ctx, DIFF_DELETE, aoff + (n - 1), 1); + } + else { + _edit (ctx, DIFF_DELETE, aoff, 1); + _edit (ctx, DIFF_MATCH, aoff + 1, m); + } + } + } + } + + return d; +} + +gint +rspamd_diff(const void *a, gint aoff, gint n, const void *b, gint boff, gint m, + gint dmax, GArray *ses, gint *sn) +{ + struct _ctx ctx; + gint d, x, y; + struct diff_edit *e = NULL; + GArray *tmp; + + tmp = g_array_sized_new (FALSE, TRUE, sizeof(gint), dmax); + ctx.buf = tmp; + ctx.ses = ses; + ctx.si = 0; + ctx.dmax = dmax; + + /* The _ses function assumes the SES will begin or end with a delete + * or insert. The following will insure this is true by eating any + * beginning matches. This is also a quick to process sequences + * that match entirely. + */ + x = y = 0; + const guchar *a0 = (const guchar *) a + aoff; + const guchar *b0 = (const guchar *) b + boff; + while (x < n && y < m && a0[x] == b0[y]) { + x++; + y++; + } + _edit (&ctx, DIFF_MATCH, aoff, x); + + if ((d = _ses (a, aoff + x, n - x, b, boff + y, m - y, &ctx)) == -1) { + g_array_free (tmp, TRUE); + return -1; + } + if (ses && sn && e) { + *sn = e->op ? ctx.si + 1 : 0; + } + + g_array_free (tmp, TRUE); + return d; +} + +static guint32 +compare_diff_distance_unnormalized (f_str_t *s1, f_str_t *s2) +{ + GArray *ses; + struct diff_edit *e; + guint i; + guint32 distance = 0; + + ses = g_array_sized_new (FALSE, TRUE, sizeof (struct diff_edit), MAX_DIFF); + + if (rspamd_diff (s1->begin, 0, s1->len, + s2->begin, 0, s2->len, MAX_DIFF, ses, NULL) == -1) { + /* Diff failed, strings are different */ + g_array_free (ses, TRUE); + return 0; + } + + for (i = 0; i < ses->len; i ++) { + e = &g_array_index(ses, struct diff_edit, i); + if (e->op != DIFF_MATCH) { + distance += e->len; + } + } + + g_array_free (ses, TRUE); + + return distance; +} + +guint32 +compare_diff_distance (f_str_t *s1, f_str_t *s2) +{ + + return 100 - (2 * compare_diff_distance_unnormalized (s1, s2) * 100) / (s1->len + s2->len); +} + + +guint32 +compare_diff_distance_normalized (f_str_t *s1, f_str_t *s2) +{ + gchar b1[BUFSIZ], b2[BUFSIZ], *t, *h, *p1, *p2; + gsize r1, r2; + f_str_t t1, t2; + guint32 cur_diff = 0; + + r1 = s1->len; + r2 = s2->len; + p1 = s1->begin; + p2 = s2->begin; + + while (r1 > 0 && r2 > 0) { + /* Copy strings to the buffer normalized */ + h = p1; + t = b1; + + /* The first string */ + while (r1 > 0 && t - b1 < (gint)sizeof (b1)) { + if (!g_ascii_isspace (*h)) { + *t++ = g_ascii_tolower (*h); + } + h ++; + p1 ++; + r1 --; + } + + t1.begin = b1; + t1.len = t - b1; + + /* The second string */ + h = p2; + t = b2; + while (r2 > 0 && t - b2 < (gint)sizeof (b2)) { + if (!g_ascii_isspace (*h)) { + *t++ = g_ascii_tolower (*h); + } + h ++; + p2 ++; + r2 --; + } + + t2.begin = b2; + t2.len = t - b2; + + cur_diff += compare_diff_distance_unnormalized (&t1, &t2); + } + + if (r1 > 0) { + h = p1; + while (r1 > 0) { + if (!g_ascii_isspace (*h)) { + cur_diff ++; + } + r1 --; + h ++; + } + } + else if (r2 > 0) { + h = p2; + while (r2 > 0) { + if (!g_ascii_isspace (*h)) { + cur_diff ++; + } + r2 --; + h ++; + } + } + + return 100 - (2 * cur_diff * 100) / (s1->len + s2->len); +} diff --git a/src/libutil/diff.h b/src/libutil/diff.h new file mode 100644 index 000000000..cea5e5d4a --- /dev/null +++ b/src/libutil/diff.h @@ -0,0 +1,74 @@ +/* Copyright (c) 2010, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Rambler BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +#ifndef DIFF_H_ +#define DIFF_H_ + +#include "config.h" +#include "fstring.h" + +typedef enum +{ + DIFF_MATCH = 1, + DIFF_DELETE, + DIFF_INSERT +} diff_op; + +struct diff_edit +{ + gshort op; + gint off; /* off ginto s1 if MATCH or DELETE but s2 if INSERT */ + gint len; +}; + +/* + * Calculate difference between two strings using diff algorithm + * @param a the first line begin + * @param aoff the first line offset + * @param n the first line length + * @param b the second line begin + * @param boff the second line offset + * @param b the second line length + * @param dmax maximum differences number + * @param ses here would be stored the shortest script to transform a to b + * @param sn here would be stored a number of differences between a and b + * @return distance between strings or -1 in case of error + */ +gint rspamd_diff(const void *a, gint aoff, gint n, const void *b, gint boff, gint m, + gint dmax, GArray *ses, gint *sn); + +/* + * Calculate distance between two strings (in percentage) using diff algorithm. + * @return 100 in case of identical strings and 0 in case of totally different strings. + */ +guint32 compare_diff_distance (f_str_t *s1, f_str_t *s2); + +/* + * Calculate distance between two strings (in percentage) using diff algorithm. Strings are normalized before: + * all spaces are removed and all characters are lowercased. + * @return 100 in case of identical strings and 0 in case of totally different strings. +*/ +guint32 compare_diff_distance_normalized (f_str_t *s1, f_str_t *s2); + +#endif /* DIFF_H_ */ diff --git a/src/libutil/fstring.c b/src/libutil/fstring.c new file mode 100644 index 000000000..098824101 --- /dev/null +++ b/src/libutil/fstring.c @@ -0,0 +1,461 @@ +/* + * Copyright (c) 2009-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "fstring.h" + +/* + * Search first occurence of character in string + */ +ssize_t +fstrchr (f_str_t * src, gchar c) +{ + register size_t cur = 0; + + while (cur < src->len) { + if (*(src->begin + cur) == c) { + return cur; + } + cur++; + } + + return -1; +} + +/* + * Search last occurence of character in string + */ +ssize_t +fstrrchr (f_str_t * src, gchar c) +{ + register ssize_t cur = src->len; + + while (cur > 0) { + if (*(src->begin + cur) == c) { + return cur; + } + cur--; + } + + return -1; +} + +/* + * Search for pattern in orig + */ +ssize_t +fstrstr (f_str_t * orig, f_str_t * pattern) +{ + register size_t cur = 0, pcur = 0; + + if (pattern->len > orig->len) { + return -1; + } + + while (cur < orig->len) { + if (*(orig->begin + cur) == *pattern->begin) { + while (cur < orig->len && pcur < pattern->len) { + if (*(orig->begin + cur) != *(pattern->begin + pcur)) { + pcur = 0; + break; + } + cur++; + pcur++; + } + return cur - pattern->len; + } + cur++; + } + + return -1; + +} + +/* + * Search for pattern in orig ignoring case + */ +ssize_t +fstrstri (f_str_t * orig, f_str_t * pattern) +{ + register size_t cur = 0, pcur = 0; + + if (pattern->len > orig->len) { + return -1; + } + + while (cur < orig->len) { + if (g_ascii_tolower (*(orig->begin + cur)) == g_ascii_tolower (*pattern->begin)) { + while (cur < orig->len && pcur < pattern->len) { + if (g_ascii_tolower (*(orig->begin + cur)) != g_ascii_tolower (*(pattern->begin + pcur))) { + pcur = 0; + break; + } + cur++; + pcur++; + } + return cur - pattern->len; + } + cur++; + } + + return -1; + +} + +/* + * Split string by tokens + * word contains parsed word + * + * Return: -1 - no new words can be extracted + * 1 - word was extracted and there are more words + * 0 - last word extracted + */ +gint +fstrtok (f_str_t * text, const gchar *sep, f_tok_t * state) +{ + register size_t cur; + const gchar *csep = sep; + + if (state->pos >= text->len) { + return -1; + } + + cur = state->pos; + + while (cur < text->len) { + while (*csep) { + if (*(text->begin + cur) == *csep) { + state->word.begin = (text->begin + state->pos); + state->word.len = cur - state->pos; + state->pos = cur + 1; + return 1; + } + csep++; + } + csep = sep; + cur++; + } + + /* Last word */ + state->word.begin = (text->begin + state->pos); + state->word.len = cur - state->pos; + state->pos = cur; + + return 0; +} + +/* + * Copy one string into other + */ +size_t +fstrcpy (f_str_t * dest, f_str_t * src) +{ + register size_t cur = 0; + + if (dest->size < src->len) { + return 0; + } + + while (cur < src->len && cur < dest->size) { + *(dest->begin + cur) = *(src->begin + cur); + cur++; + } + + return cur; +} + +/* + * Concatenate two strings + */ +size_t +fstrcat (f_str_t * dest, f_str_t * src) +{ + register size_t cur = 0; + gchar *p = dest->begin + dest->len; + + if (dest->size < src->len + dest->len) { + return 0; + } + + while (cur < src->len) { + *p = *(src->begin + cur); + p++; + cur++; + } + + dest->len += src->len; + + return cur; + +} + +/* + * Make copy of string to 0-terminated string + */ +gchar * +fstrcstr (f_str_t * str, rspamd_mempool_t * pool) +{ + gchar *res; + res = rspamd_mempool_alloc (pool, str->len + 1); + + /* Do not allow multiply \0 characters */ + memccpy (res, str->begin, '\0', str->len); + res[str->len] = 0; + + return res; +} + +/* + * Push one character to fstr + */ +gint +fstrpush (f_str_t * dest, gchar c) +{ + if (dest->size < dest->len) { + /* Need to reallocate string */ + return 0; + } + + *(dest->begin + dest->len) = c; + dest->len++; + return 1; +} + +/* + * Push one character to fstr + */ +gint +fstrpush_unichar (f_str_t * dest, gunichar c) +{ + int l; + if (dest->size < dest->len) { + /* Need to reallocate string */ + return 0; + } + + l = g_unichar_to_utf8 (c, dest->begin + dest->len); + dest->len += l; + return l; +} + +/* + * Allocate memory for f_str_t + */ +f_str_t * +fstralloc (rspamd_mempool_t * pool, size_t len) +{ + f_str_t *res = rspamd_mempool_alloc (pool, sizeof (f_str_t)); + + res->begin = rspamd_mempool_alloc (pool, len); + + res->size = len; + res->len = 0; + return res; +} + +/* + * Allocate memory for f_str_t from temporary pool + */ +f_str_t * +fstralloc_tmp (rspamd_mempool_t * pool, size_t len) +{ + f_str_t *res = rspamd_mempool_alloc_tmp (pool, sizeof (f_str_t)); + + res->begin = rspamd_mempool_alloc_tmp (pool, len); + + res->size = len; + res->len = 0; + return res; +} + +/* + * Truncate string to its len + */ +f_str_t * +fstrtruncate (rspamd_mempool_t * pool, f_str_t * orig) +{ + f_str_t *res; + + if (orig == NULL || orig->len == 0 || orig->size <= orig->len) { + return orig; + } + + res = fstralloc (pool, orig->len); + if (res == NULL) { + return NULL; + } + fstrcpy (res, orig); + + return res; +} + +/* + * Enlarge string to new size + */ +f_str_t * +fstrgrow (rspamd_mempool_t * pool, f_str_t * orig, size_t newlen) +{ + f_str_t *res; + + if (orig == NULL || orig->len == 0 || orig->size >= newlen) { + return orig; + } + + res = fstralloc (pool, newlen); + if (res == NULL) { + return NULL; + } + fstrcpy (res, orig); + + return res; +} + +static guint32 +fstrhash_c (gchar c, guint32 hval) +{ + guint32 tmp; + /* + * xor in the current byte against each byte of hval + * (which alone gaurantees that every bit of input will have + * an effect on the output) + */ + tmp = c & 0xFF; + tmp = tmp | (tmp << 8) | (tmp << 16) | (tmp << 24); + hval ^= tmp; + + /* add some bits out of the middle as low order bits */ + hval = hval + ((hval >> 12) & 0x0000ffff); + + /* swap most and min significative bytes */ + tmp = (hval << 24) | ((hval >> 24) & 0xff); + /* zero most and min significative bytes of hval */ + hval &= 0x00ffff00; + hval |= tmp; + /* + * rotate hval 3 bits to the left (thereby making the + * 3rd msb of the above mess the hsb of the output hash) + */ + return (hval << 3) + (hval >> 29); +} + +/* + * Return hash value for a string + */ +guint32 +fstrhash (f_str_t * str) +{ + size_t i; + guint32 hval; + gchar *c = str->begin; + + if (str == NULL) { + return 0; + } + hval = str->len; + + for (i = 0; i < str->len; i++, c++) { + hval = fstrhash_c (*c, hval); + } + return hval; +} + +/* + * Return hash value for a string + */ +guint32 +fstrhash_lowercase (f_str_t * str, gboolean is_utf) +{ + gsize i; + guint32 j, hval; + const gchar *p = str->begin, *end = NULL; + gchar t; + gunichar uc; + + if (str == NULL) { + return 0; + } + hval = str->len; + + if (is_utf) { + while (end < str->begin + str->len) { + if (!g_utf8_validate (p, str->len, &end)) { + return fstrhash_lowercase (str, FALSE); + } + while (p < end) { + uc = g_unichar_tolower (g_utf8_get_char (p)); + for (j = 0; j < sizeof (gunichar); j ++) { + t = (uc >> (j * 8)) & 0xff; + if (t != 0) { + hval = fstrhash_c (t, hval); + } + } + p = g_utf8_next_char (p); + } + p = end + 1; + } + + } + else { + for (i = 0; i < str->len; i++, p++) { + hval = fstrhash_c (g_ascii_tolower (*p), hval); + } + } + + return hval; +} + +void +fstrstrip (f_str_t * str) +{ + gchar *p = str->begin; + guint r = 0; + + while (r < str->len) { + if (g_ascii_isspace (*p)) { + p++; + r++; + } + else { + break; + } + } + + if (r > 0) { + memmove (str->begin, p, str->len - r); + str->len -= r; + } + + r = str->len; + p = str->begin + str->len; + while (r > 0) { + if (g_ascii_isspace (*p)) { + p--; + r--; + } + else { + break; + } + } + + str->len = r; +} diff --git a/src/libutil/fstring.h b/src/libutil/fstring.h new file mode 100644 index 000000000..bd680e365 --- /dev/null +++ b/src/libutil/fstring.h @@ -0,0 +1,120 @@ +/* + * Functions for handling with fixed size strings + */ + +#ifndef FSTRING_H +#define FSTRING_H + +#include "config.h" +#include "mem_pool.h" + +#define update_buf_size(x) (x)->free = (x)->buf->size - ((x)->pos - (x)->buf->begin); (x)->buf->len = (x)->pos - (x)->buf->begin + +typedef struct f_str_s { + gchar *begin; + size_t len; + size_t size; +} f_str_t; + +typedef struct f_str_buf_s { + f_str_t *buf; + gchar *pos; + size_t free; +} f_str_buf_t; + +typedef struct f_tok_s { + f_str_t word; + size_t pos; +} f_tok_t; + +/* + * Search first occurence of character in string + */ +ssize_t fstrchr (f_str_t *src, gchar c); + +/* + * Search last occurence of character in string + */ +ssize_t fstrrchr (f_str_t *src, gchar c); + +/* + * Search for pattern in orig + */ +ssize_t fstrstr (f_str_t *orig, f_str_t *pattern); + +/* + * Search for pattern in orig ignoring case + */ +ssize_t fstrstri (f_str_t *orig, f_str_t *pattern); + +/* + * Split string by tokens + * word contains parsed word + */ +gint fstrtok (f_str_t *text, const gchar *sep, f_tok_t *state); + +/* + * Copy one string into other + */ +size_t fstrcpy (f_str_t *dest, f_str_t *src); + +/* + * Concatenate two strings + */ +size_t fstrcat (f_str_t *dest, f_str_t *src); + +/* + * Push one character to fstr + */ +gint fstrpush (f_str_t *dest, gchar c); + +/* + * Push one character to fstr + */ +gint fstrpush_unichar (f_str_t *dest, gunichar c); + +/* + * Allocate memory for f_str_t + */ +f_str_t* fstralloc (rspamd_mempool_t *pool, size_t len); + +/* + * Allocate memory for f_str_t from temporary pool + */ +f_str_t* fstralloc_tmp (rspamd_mempool_t *pool, size_t len); + +/* + * Truncate string to its len + */ +f_str_t* fstrtruncate (rspamd_mempool_t *pool, f_str_t *orig); + +/* + * Enlarge string to new size + */ +f_str_t* fstrgrow (rspamd_mempool_t *pool, f_str_t *orig, size_t newlen); + +/* + * Return specified character + */ +#define fstridx(str, pos) *((str)->begin + (pos)) + +/* + * Return fast hash value for fixed string + */ +guint32 fstrhash (f_str_t *str); + +/* + * Return fast hash value for fixed string converted to lowercase + */ +guint32 fstrhash_lowercase (f_str_t *str, gboolean is_utf); +/* + * Make copy of string to 0-terminated string + */ +gchar* fstrcstr (f_str_t *str, rspamd_mempool_t *pool); + +/* + * Strip fstr string from space symbols + */ +void fstrstrip (f_str_t *str); + +#endif diff --git a/src/libutil/fuzzy.c b/src/libutil/fuzzy.c new file mode 100644 index 000000000..7e8a01ce3 --- /dev/null +++ b/src/libutil/fuzzy.c @@ -0,0 +1,498 @@ +/* + * Copyright (c) 2009-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +#include "config.h" +#include "mem_pool.h" +#include "fstring.h" +#include "fuzzy.h" +#include "message.h" +#include "url.h" +#include "main.h" + +#define ROLL_WINDOW_SIZE 9 +#define MIN_FUZZY_BLOCK_SIZE 3 +#define HASH_INIT 0x28021967 + +static const char *b64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + +struct roll_state { + guint32 h[3]; + gchar window[ROLL_WINDOW_SIZE]; + gint n; +}; + +static struct roll_state rs; + + +/* Rolling hash function based on Adler-32 checksum */ +static guint32 +fuzzy_roll_hash (guint c) +{ + /* Check window position */ + if (rs.n == ROLL_WINDOW_SIZE) { + rs.n = 0; + } + + rs.h[1] -= rs.h[0]; + rs.h[1] += ROLL_WINDOW_SIZE * c; + + rs.h[0] += c; + rs.h[0] -= rs.window[rs.n]; + + /* Save current symbol */ + rs.window[rs.n] = c; + rs.n++; + + rs.h[2] <<= 5; + rs.h[2] ^= c; + + return rs.h[0] + rs.h[1] + rs.h[2]; +} + +/* A simple non-rolling hash, based on the FNV hash */ +static guint32 +fuzzy_fnv_hash (guint c, guint32 hval) +{ + hval ^= c; + hval += (hval << 1) + (hval << 4) + (hval << 7) + (hval << 8) + (hval << 24); + return hval; +} + +/* Calculate blocksize depending on length of input */ +static guint32 +fuzzy_blocksize (guint32 len) +{ + guint32 nlen = MIN_FUZZY_BLOCK_SIZE; + + while (nlen * (FUZZY_HASHLEN - 1) < len) { + nlen *= 2; + } + return nlen; +} + + +/* Update hash with new symbol */ +static void +fuzzy_update (fuzzy_hash_t * h, guint c) +{ + h->rh = fuzzy_roll_hash (c); + h->h = fuzzy_fnv_hash (c, h->h); + + if (h->rh % h->block_size == (h->block_size - 1)) { + h->hash_pipe[h->hi] = b64[h->h % 64]; + if (h->hi < FUZZY_HASHLEN - 2) { + h->h = HASH_INIT; + h->hi++; + } + } +} + +static void +fuzzy_update2 (fuzzy_hash_t * h1, fuzzy_hash_t *h2, guint c) +{ + h1->rh = fuzzy_roll_hash (c); + h1->h = fuzzy_fnv_hash (c, h1->h); + h2->rh = h1->rh; + h2->h = fuzzy_fnv_hash (c, h2->h); + + if (h1->rh % h1->block_size == (h1->block_size - 1)) { + h1->hash_pipe[h1->hi] = b64[h1->h % 64]; + if (h1->hi < FUZZY_HASHLEN - 2) { + h1->h = HASH_INIT; + h1->hi++; + } + } + if (h2->rh % h2->block_size == (h2->block_size - 1)) { + h2->hash_pipe[h2->hi] = b64[h2->h % 64]; + if (h2->hi < FUZZY_HASHLEN - 2) { + h2->h = HASH_INIT; + h2->hi++; + } + } +} + +/* + * Levenshtein distance between string1 and string2. + * + * Replace cost is normally 1, and 2 with nonzero xcost. + */ +guint32 +lev_distance (gchar *s1, gint len1, gchar *s2, gint len2) +{ + gint i; + gint *row; /* we only need to keep one row of costs */ + gint *end; + gint half, nx; + gchar *sx, *char2p, char1; + gint *p, D, x, offset, c3; + + /* strip common prefix */ + while (len1 > 0 && len2 > 0 && *s1 == *s2) { + len1--; + len2--; + s1++; + s2++; + } + + /* strip common suffix */ + while (len1 > 0 && len2 > 0 && s1[len1 - 1] == s2[len2 - 1]) { + len1--; + len2--; + } + + /* catch trivial cases */ + if (len1 == 0) { + return len2; + } + + if (len2 == 0) { + return len1; + } + + /* make the inner cycle (i.e. string2) the longer one */ + if (len1 > len2) { + nx = len1; + sx = s1; + len1 = len2; + len2 = nx; + s1 = s2; + s2 = sx; + } + /* check len1 == 1 separately */ + if (len1 == 1) { + return len2 - (memchr (s2, *s1, len2) != NULL); + } + + len1++; + len2++; + half = len1 >> 1; + + /* initalize first row */ + row = g_malloc (len2 * sizeof (gint)); + end = row + len2 - 1; + for (i = 0; i < len2; i++) { + row[i] = i; + } + + /* in this case we don't have to scan two corner triangles (of size len1/2) + * in the matrix because no best path can go throught them. note this + * breaks when len1 == len2 == 2 so the memchr() special case above is + * necessary */ + row[0] = len1 - half - 1; + for (i = 1; i < len1; i++) { + char1 = s1[i - 1]; + /* skip the upper triangle */ + if (i >= len1 - half) { + offset = i - (len1 - half); + char2p = s2 + offset; + p = row + offset; + c3 = *(p++) + (char1 != *(char2p++)); + x = *p; + x++; + D = x; + if (x > c3) + x = c3; + *(p++) = x; + } + else { + p = row + 1; + char2p = s2; + D = x = i; + } + /* skip the lower triangle */ + if (i <= half + 1) + end = row + len2 + i - half - 2; + /* main */ + while (p <= end) { + c3 = --D + (char1 != *(char2p++)); + x++; + if (x > c3) + x = c3; + D = *p; + D++; + if (x > D) + x = D; + *(p++) = x; + } + /* lower triangle sentinel */ + if (i <= half) { + c3 = --D + (char1 != *char2p); + x++; + if (x > c3) + x = c3; + *p = x; + } + } + + i = *end; + g_free (row); + return i; +} + +/* Calculate fuzzy hash for specified string */ +fuzzy_hash_t * +fuzzy_init (f_str_t * in, rspamd_mempool_t * pool) +{ + fuzzy_hash_t *new; + guint i, repeats = 0; + gchar *c = in->begin, last = '\0'; + gsize real_len = 0; + + new = rspamd_mempool_alloc0 (pool, sizeof (fuzzy_hash_t)); + bzero (&rs, sizeof (rs)); + for (i = 0; i < in->len; i++) { + if (*c == last) { + repeats++; + } + else { + repeats = 0; + } + if (!g_ascii_isspace (*c) && !g_ascii_ispunct (*c) && repeats < 3) { + real_len ++; + } + last = *c; + c++; + } + + new->block_size = fuzzy_blocksize (real_len); + c = in->begin; + + for (i = 0; i < in->len; i++) { + if (*c == last) { + repeats++; + } + else { + repeats = 0; + } + if (!g_ascii_isspace (*c) && !g_ascii_ispunct (*c) && repeats < 3) { + fuzzy_update (new, *c); + } + last = *c; + c++; + } + + /* Check whether we have more bytes in a rolling window */ + if (new->rh != 0) { + new->hash_pipe[new->hi] = b64[new->h % 64]; + } + + return new; +} + +fuzzy_hash_t * +fuzzy_init_byte_array (GByteArray * in, rspamd_mempool_t * pool) +{ + f_str_t f; + + f.begin = (gchar *)in->data; + f.len = in->len; + + return fuzzy_init (&f, pool); +} + +void +fuzzy_init_part (struct mime_text_part *part, rspamd_mempool_t *pool, gsize max_diff) +{ + fuzzy_hash_t *new, *new2; + gchar *c, *end, *begin; + gsize real_len = 0, len = part->content->len; + GList *cur_offset; + struct process_exception *cur_ex = NULL; + gunichar uc; + gboolean write_diff = FALSE; + + cur_offset = part->urls_offset; + if (cur_offset != NULL) { + cur_ex = cur_offset->data; + } + + begin = (gchar *)part->content->data; + c = begin; + new = rspamd_mempool_alloc0 (pool, sizeof (fuzzy_hash_t)); + new2 = rspamd_mempool_alloc0 (pool, sizeof (fuzzy_hash_t)); + bzero (&rs, sizeof (rs)); + end = c + len; + + if (part->is_utf) { + while (c < end) { + if (cur_ex != NULL && (gint)cur_ex->pos == c - begin) { + c += cur_ex->len + 1; + cur_offset = g_list_next (cur_offset); + if (cur_offset != NULL) { + cur_ex = cur_offset->data; + } + } + else { + uc = g_utf8_get_char (c); + if (g_unichar_isalnum (uc)) { + real_len ++; + } + c = g_utf8_next_char (c); + } + } + } + else { + while (c < end) { + if (cur_ex != NULL && (gint)cur_ex->pos == c - begin) { + c += cur_ex->len + 1; + cur_offset = g_list_next (cur_offset); + if (cur_offset != NULL) { + cur_ex = cur_offset->data; + } + } + else { + if (!g_ascii_isspace (*c) && !g_ascii_ispunct (*c)) { + real_len ++; + } + c++; + } + } + } + + write_diff = real_len > 0 && real_len < max_diff; + + if (write_diff) { + part->diff_str = fstralloc (pool, real_len); + } + else { + part->diff_str = NULL; + } + + new->block_size = fuzzy_blocksize (real_len); + new2->block_size = new->block_size * 2; + + cur_offset = part->urls_offset; + if (cur_offset != NULL) { + cur_ex = cur_offset->data; + } + + begin = (gchar *)part->content->data; + c = begin; + end = c + len; + if (part->is_utf) { + + while (c < end) { + if (cur_ex != NULL && (gint)cur_ex->pos == c - begin) { + c += cur_ex->len + 1; + cur_offset = g_list_next (cur_offset); + if (cur_offset != NULL) { + cur_ex = cur_offset->data; + } + } + else { + uc = g_utf8_get_char (c); + if (g_unichar_isalnum (uc)) { + fuzzy_update2 (new, new2, uc); + if (write_diff) { + fstrpush_unichar (part->diff_str, uc); + } + } + c = g_utf8_next_char (c); + } + } + } + else { + while (c < end) { + if (cur_ex != NULL && (gint)cur_ex->pos == c - begin) { + c += cur_ex->len + 1; + cur_offset = g_list_next (cur_offset); + if (cur_offset != NULL) { + cur_ex = cur_offset->data; + } + } + else { + if (!g_ascii_isspace (*c) && !g_ascii_ispunct (*c)) { + fuzzy_update2 (new, new2, *c); + if (write_diff) { + fstrpush (part->diff_str, *c); + } + } + c++; + } + } + } + + /* Check whether we have more bytes in a rolling window */ + if (new->rh != 0) { + new->hash_pipe[new->hi] = b64[new->h % 64]; + } + if (new2->rh != 0) { + new2->hash_pipe[new2->hi] = b64[new2->h % 64]; + } + + part->fuzzy = new; + part->double_fuzzy = new2; +} + +/* Compare score of difference between two hashes 0 - different hashes, 100 - identical hashes */ +gint +fuzzy_compare_hashes (fuzzy_hash_t * h1, fuzzy_hash_t * h2) +{ + gint res, l1, l2; + + /* If we have hashes of different size, input strings are too different */ + if (h1->block_size != h2->block_size) { + return 0; + } + + l1 = strlen (h1->hash_pipe); + l2 = strlen (h2->hash_pipe); + + if (l1 == 0 || l2 == 0) { + if (l1 == 0 && l2 == 0) { + return 100; + } + else { + return 0; + } + } + + res = lev_distance (h1->hash_pipe, l1, h2->hash_pipe, l2); + res = 100 - (2 * res * 100) / (l1 + l2); + + return res; +} + +gint +fuzzy_compare_parts (struct mime_text_part *p1, struct mime_text_part *p2) +{ + if (p1->fuzzy != NULL && p2->fuzzy != NULL) { + if (p1->fuzzy->block_size == p2->fuzzy->block_size) { + return fuzzy_compare_hashes (p1->fuzzy, p2->fuzzy); + } + else if (p1->double_fuzzy->block_size == p2->fuzzy->block_size) { + return fuzzy_compare_hashes (p1->double_fuzzy, p2->fuzzy); + } + else if (p2->double_fuzzy->block_size == p1->fuzzy->block_size) { + return fuzzy_compare_hashes (p2->double_fuzzy, p1->fuzzy); + } + } + + return 0; +} + +/* + * vi:ts=4 + */ diff --git a/src/libutil/fuzzy.h b/src/libutil/fuzzy.h new file mode 100644 index 000000000..c226c5765 --- /dev/null +++ b/src/libutil/fuzzy.h @@ -0,0 +1,69 @@ +/** + * @file fuzzy.h + * Fuzzy hashes API + */ + +#ifndef RSPAMD_FUZZY_H +#define RSPAMD_FUZZY_H + +#include "config.h" +#include "mem_pool.h" +#include "fstring.h" + +#define FUZZY_HASHLEN 64 + +typedef struct fuzzy_hash_s { + gchar hash_pipe[FUZZY_HASHLEN]; /**< result hash */ + guint32 block_size; /**< current blocksize */ + guint32 rh; /**< roll hash value */ + guint32 h; /**< hash of block */ + guint32 hi; /**< current index in hash pipe */ +} fuzzy_hash_t; + +struct mime_text_part; + +/** + * Calculate fuzzy hash for specified string + * @param in input string + * @param pool pool object + * @return fuzzy_hash object allocated in pool + */ +fuzzy_hash_t * fuzzy_init (f_str_t *in, rspamd_mempool_t *pool); +/** + * Calculate fuzzy hash for specified byte array + * @param in input string + * @param pool pool object + * @return fuzzy_hash object allocated in pool + */ +fuzzy_hash_t * fuzzy_init_byte_array (GByteArray *in, rspamd_mempool_t *pool); + +/** + * Calculate fuzzy hash for specified text part + * @param part text part object + * @param pool pool object + * @param max_diff maximum text length to use diff algorithm in comparasions + * @return fuzzy_hash object allocated in pool + */ +void fuzzy_init_part (struct mime_text_part *part, rspamd_mempool_t *pool, gsize max_diff); + +/** + * Compare score of difference between two hashes + * @param h1 first hash + * @param h2 second hash + * @return result in percents 0 - different hashes, 100 - identical hashes + */ +gint fuzzy_compare_hashes (fuzzy_hash_t *h1, fuzzy_hash_t *h2); + +/* + * Compare two text parts and return percents of difference + */ +gint fuzzy_compare_parts (struct mime_text_part *p1, struct mime_text_part *p2); + +/* + * Calculate levenstein distance between two strings. Note: this algorithm should be used + * only for short texts - it runs too slow on long ones. + */ +guint32 lev_distance (gchar *s1, gint len1, gchar *s2, gint len2); + + +#endif diff --git a/src/libutil/hash.c b/src/libutil/hash.c new file mode 100644 index 000000000..3bb381651 --- /dev/null +++ b/src/libutil/hash.c @@ -0,0 +1,489 @@ +/* + * Copyright (c) 2009-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "hash.h" + +#define HASH_TABLE_MIN_SIZE 19 +#define HASH_TABLE_MAX_SIZE 13845163 + +/* + * Performs a lookup in the hash table. Virtually all hash operations + * will use this function internally. + */ +static inline struct rspamd_hash_node ** +rspamd_hash_lookup_node (rspamd_hash_t * hash, gconstpointer key, guint * hash_return) +{ + struct rspamd_hash_node **node_ptr, *node; + guint hash_value; + hash_value = (*hash->hash_func) (key); + + if (hash->shared) { + rspamd_mempool_rlock_rwlock (hash->lock); + } + node_ptr = &hash->nodes[hash_value % hash->size]; + + if (hash_return) + *hash_return = hash_value; + + /* Hash table lookup needs to be fast. + * We therefore remove the extra conditional of testing + * whether to call the key_equal_func or not from + * the inner loop. + * + * Additional optimisation: first check if our full hash + * values are equal so we can avoid calling the full-blown + * key equality function in most cases. + */ + if (hash->key_equal_func) { + while ((node = *node_ptr)) { + if (node->key_hash == hash_value && hash->key_equal_func (node->key, key)) { + break; + } + node_ptr = &(*node_ptr)->next; + } + } + else { + while ((node = *node_ptr)) { + if (node->key == key) { + break; + } + node_ptr = &(*node_ptr)->next; + } + } + if (hash->shared) { + rspamd_mempool_runlock_rwlock (hash->lock); + } + return node_ptr; +} + +/* + * Removes a node from the hash table and updates the node count. + * No table resize is performed. + */ +static void +rspamd_hash_remove_node (rspamd_hash_t * hash, struct rspamd_hash_node ***node_ptr_ptr) +{ + struct rspamd_hash_node **node_ptr, *node; + + if (hash->shared) { + rspamd_mempool_wlock_rwlock (hash->lock); + } + node_ptr = *node_ptr_ptr; + node = *node_ptr; + + *node_ptr = node->next; + + hash->nnodes--; + if (hash->shared) { + rspamd_mempool_wunlock_rwlock (hash->lock); + } +} + +/* + * Resizes the hash table to the optimal size based on the number of + * nodes currently held. + */ +static void +rspamd_hash_resize (rspamd_hash_t * hash) +{ + struct rspamd_hash_node **new_nodes; + struct rspamd_hash_node *node, *next; + guint hash_val; + gint new_size, i; + + new_size = g_spaced_primes_closest (hash->nnodes); + new_size = CLAMP (new_size, HASH_TABLE_MIN_SIZE, HASH_TABLE_MAX_SIZE); + + if (hash->shared) { + new_nodes = rspamd_mempool_alloc_shared (hash->pool, sizeof (struct rspamd_hash_node *) * new_size); + } + else { + new_nodes = rspamd_mempool_alloc (hash->pool, sizeof (struct rspamd_hash_node *) * new_size); + } + + if (hash->shared) { + rspamd_mempool_wlock_rwlock (hash->lock); + } + + for (i = 0; i < hash->size; i++) { + for (node = hash->nodes[i]; node; node = next) { + next = node->next; + hash_val = node->key_hash % new_size; + node->next = new_nodes[hash_val]; + new_nodes[hash_val] = node; + } + } + + hash->nodes = new_nodes; + hash->size = new_size; + + if (hash->shared) { + rspamd_mempool_wunlock_rwlock (hash->lock); + } +} + +/* + * Resizes the hash table, if needed. + */ +static inline void +rspamd_hash_maybe_resize (rspamd_hash_t * hash) +{ + gint nnodes = hash->nnodes; + gint size = hash->size; + + if ((size >= 3 * nnodes && size > HASH_TABLE_MIN_SIZE) || (3 * size <= nnodes && size < HASH_TABLE_MAX_SIZE)) { + rspamd_hash_resize (hash); + } +} + +/* Create new hash in specified pool */ +rspamd_hash_t * +rspamd_hash_new (rspamd_mempool_t * pool, GHashFunc hash_func, GEqualFunc key_equal_func) +{ + rspamd_hash_t *hash; + + hash = rspamd_mempool_alloc (pool, sizeof (rspamd_hash_t)); + hash->size = HASH_TABLE_MIN_SIZE; + hash->nnodes = 0; + hash->hash_func = hash_func ? hash_func : g_direct_hash; + hash->key_equal_func = key_equal_func; + hash->nodes = rspamd_mempool_alloc0 (pool, sizeof (struct rspamd_hash_node *) * hash->size); + hash->shared = 0; + hash->pool = pool; + + return hash; +} + +/* + * Create new hash in specified pool using shared memory + */ +rspamd_hash_t * +rspamd_hash_new_shared (rspamd_mempool_t * pool, GHashFunc hash_func, GEqualFunc key_equal_func, gint size) +{ + rspamd_hash_t *hash; + + hash = rspamd_mempool_alloc_shared (pool, sizeof (rspamd_hash_t)); + hash->size = size; + hash->nnodes = 0; + hash->hash_func = hash_func ? hash_func : g_direct_hash; + hash->key_equal_func = key_equal_func; + hash->nodes = rspamd_mempool_alloc0_shared (pool, sizeof (struct rspamd_hash_node *) * hash->size); + hash->shared = 1; + /* Get mutex from pool for locking on insert/remove operations */ + hash->lock = rspamd_mempool_get_rwlock (pool); + hash->pool = pool; + + return hash; +} + +/* + * Insert item in hash + */ +void +rspamd_hash_insert (rspamd_hash_t * hash, gpointer key, gpointer value) +{ + struct rspamd_hash_node **node_ptr, *node; + guint key_hash; + + g_return_if_fail (hash != NULL); + node_ptr = rspamd_hash_lookup_node (hash, key, &key_hash); + + if (hash->shared) { + rspamd_mempool_wlock_rwlock (hash->lock); + } + if ((node = *node_ptr)) { + node->key = key; + node->value = value; + } + else { + if (hash->shared) { + node = rspamd_mempool_alloc_shared (hash->pool, sizeof (struct rspamd_hash_node)); + } + else { + node = rspamd_mempool_alloc (hash->pool, sizeof (struct rspamd_hash_node)); + } + + node->key = key; + node->value = value; + node->key_hash = key_hash; + node->next = NULL; + + *node_ptr = node; + hash->nnodes++; + } + if (hash->shared) { + rspamd_mempool_wunlock_rwlock (hash->lock); + } + + if (!hash->shared) { + rspamd_hash_maybe_resize (hash); + } +} + +/* + * Remove item from hash + */ +gboolean +rspamd_hash_remove (rspamd_hash_t * hash, gpointer key) +{ + struct rspamd_hash_node **node_ptr; + + g_return_val_if_fail (hash != NULL, FALSE); + + node_ptr = rspamd_hash_lookup_node (hash, key, NULL); + if (*node_ptr == NULL) + return FALSE; + + rspamd_hash_remove_node (hash, &node_ptr); + rspamd_hash_maybe_resize (hash); + + return TRUE; +} + +/* + * Lookup item from hash + */ +gpointer +rspamd_hash_lookup (rspamd_hash_t * hash, gpointer key) +{ + struct rspamd_hash_node *node; + g_return_val_if_fail (hash != NULL, NULL); + + node = *rspamd_hash_lookup_node (hash, key, NULL); + + return node ? node->value : NULL; +} + +/* + * Iterate throught hash + */ +void +rspamd_hash_foreach (rspamd_hash_t * hash, GHFunc func, gpointer user_data) +{ + struct rspamd_hash_node *node; + gint i; + + g_return_if_fail (hash != NULL); + g_return_if_fail (func != NULL); + + if (hash->shared) { + rspamd_mempool_rlock_rwlock (hash->lock); + } + for (i = 0; i < hash->size; i++) { + for (node = hash->nodes[i]; node; node = node->next) { + (*func) (node->key, node->value, user_data); + } + } + if (hash->shared) { + rspamd_mempool_runlock_rwlock (hash->lock); + } +} + +/** + * LRU hashing + */ + +static void +rspamd_lru_hash_destroy_node (gpointer v) +{ + rspamd_lru_element_t *node = v; + + if (node->hash->value_destroy) { + node->hash->value_destroy (node->data); + } + g_queue_delete_link (node->hash->q, node->link); + g_slice_free1 (sizeof (rspamd_lru_element_t), node); +} + +static rspamd_lru_element_t* +rspamd_lru_create_node (rspamd_lru_hash_t *hash, gpointer key, gpointer value, time_t now, guint ttl) +{ + rspamd_lru_element_t *node; + + node = g_slice_alloc (sizeof (rspamd_lru_element_t)); + node->data = value; + node->key = key; + node->store_time = now; + node->ttl = ttl; + node->hash = hash; + + return node; +} + +/** + * Create new lru hash with GHashTable as storage + * @param maxsize maximum elements in a hash + * @param maxage maximum age of elemnt + * @param hash_func pointer to hash function + * @param key_equal_func pointer to function for comparing keys + * @return new rspamd_hash object + */ +rspamd_lru_hash_t* +rspamd_lru_hash_new (GHashFunc hash_func, GEqualFunc key_equal_func, gint maxsize, gint maxage, + GDestroyNotify key_destroy, GDestroyNotify value_destroy) +{ + rspamd_lru_hash_t *new; + + new = g_malloc (sizeof (rspamd_lru_hash_t)); + new->storage = g_hash_table_new_full (hash_func, key_equal_func, key_destroy, rspamd_lru_hash_destroy_node); + new->maxage = maxage; + new->maxsize = maxsize; + new->value_destroy = value_destroy; + new->key_destroy = NULL; + new->q = g_queue_new (); + new->insert_func = (lru_cache_insert_func)g_hash_table_replace; + new->lookup_func = (lru_cache_lookup_func)g_hash_table_lookup; + new->delete_func = (lru_cache_delete_func)g_hash_table_remove; + new->destroy_func = (lru_cache_destroy_func)g_hash_table_destroy; + + return new; +} +/** + * Create new lru hash with custom storage + * @param maxsize maximum elements in a hash + * @param maxage maximum age of elemnt + * @param hash_func pointer to hash function + * @param key_equal_func pointer to function for comparing keys + * @return new rspamd_hash object + */ +rspamd_lru_hash_t* +rspamd_lru_hash_new_full (GHashFunc hash_func, GEqualFunc key_equal_func, + gint maxsize, gint maxage, GDestroyNotify key_destroy, GDestroyNotify value_destroy, + gpointer storage, lru_cache_insert_func insert_func, lru_cache_lookup_func lookup_func, + lru_cache_delete_func delete_func) +{ + rspamd_lru_hash_t *new; + + new = g_malloc (sizeof (rspamd_lru_hash_t)); + new->storage = storage; + new->maxage = maxage; + new->maxsize = maxsize; + new->value_destroy = value_destroy; + new->key_destroy = key_destroy; + new->q = g_queue_new (); + new->insert_func = insert_func; + new->lookup_func = lookup_func; + new->delete_func = delete_func; + new->destroy_func = NULL; + + return new; +} + +/** + * Lookup item from hash + * @param hash hash object + * @param key key to find + * @return value of key or NULL if key is not found + */ +gpointer +rspamd_lru_hash_lookup (rspamd_lru_hash_t *hash, gpointer key, time_t now) +{ + rspamd_lru_element_t *res; + + if ((res = hash->lookup_func (hash->storage, key)) != NULL) { + if (res->ttl != 0) { + if (now - res->store_time > res->ttl) { + hash->delete_func (hash->storage, key); + return NULL; + } + } + if (hash->maxage > 0) { + if (now - res->store_time > hash->maxage) { + res = g_queue_peek_tail (hash->q); + /* Expire elements from queue tail */ + while (res != NULL && now - res->store_time > hash->maxage) { + hash->delete_func (hash->storage, res->key); + res = g_queue_peek_tail (hash->q); + } + + return NULL; + } + } + return res->data; + } + + return NULL; +} +/** + * Insert item in hash + * @param hash hash object + * @param key key to insert + * @param value value of key + */ +void +rspamd_lru_hash_insert (rspamd_lru_hash_t *hash, gpointer key, gpointer value, + time_t now, guint ttl) +{ + rspamd_lru_element_t *res; + gint removed = 0; + + if ((res = hash->lookup_func (hash->storage, key)) != NULL) { + hash->delete_func (hash->storage, res->key); + } + else { + if (hash->maxsize > 0 && + (gint)g_queue_get_length (hash->q) >= hash->maxsize) { + /* Expire some elements */ + res = g_queue_peek_tail (hash->q); + if (hash->maxage > 0) { + while (res != NULL && now - res->store_time > hash->maxage) { + if (res->key != NULL) { + hash->delete_func (hash->storage, res->key); + } + else { + break; + } + res = g_queue_peek_tail (hash->q); + removed ++; + } + } + if (removed == 0) { + /* Remove explicitly */ + if (res->key != NULL) { + hash->delete_func (hash->storage, res->key); + } + } + } + } + + res = rspamd_lru_create_node (hash, key, value, now, ttl); + hash->insert_func (hash->storage, key, res); + g_queue_push_head (hash->q, res); + res->link = g_queue_peek_head_link (hash->q); +} + +void +rspamd_lru_hash_destroy (rspamd_lru_hash_t *hash) +{ + if (hash->destroy_func) { + hash->destroy_func (hash->storage); + } + g_queue_free (hash->q); + g_free (hash); +} + +/* + * vi:ts=4 + */ diff --git a/src/libutil/hash.h b/src/libutil/hash.h new file mode 100644 index 000000000..c5d4639af --- /dev/null +++ b/src/libutil/hash.h @@ -0,0 +1,160 @@ +/** + * @file hash.h + * Hash table implementation that allows using memory pools for storage as well as using + * shared memory for this purpose + */ + +#ifndef RSPAMD_HASH_H +#define RSPAMD_HASH_H + +#include "mem_pool.h" + +struct rspamd_hash_node { + gpointer key; + gpointer value; + guint key_hash; + struct rspamd_hash_node *next; +}; + +typedef struct rspamd_hash_s { + gint size; + gint nnodes; + struct rspamd_hash_node **nodes; + + GHashFunc hash_func; + GEqualFunc key_equal_func; + gint shared; + rspamd_mempool_rwlock_t *lock; + rspamd_mempool_t *pool; +} rspamd_hash_t; + +typedef void (*lru_cache_insert_func)(gpointer storage, gpointer key, gpointer value); +typedef gpointer (*lru_cache_lookup_func)(gpointer storage, gpointer key); +typedef gboolean (*lru_cache_delete_func)(gpointer storage, gpointer key); +typedef void (*lru_cache_destroy_func)(gpointer storage); + +typedef struct rspamd_lru_hash_s { + gint maxsize; + gint maxage; + GDestroyNotify value_destroy; + GDestroyNotify key_destroy; + GQueue *q; + gpointer storage; + lru_cache_insert_func insert_func; + lru_cache_lookup_func lookup_func; + lru_cache_delete_func delete_func; + lru_cache_destroy_func destroy_func; +} rspamd_lru_hash_t; + +typedef struct rspamd_lru_element_s { + gpointer data; + gpointer key; + time_t store_time; + guint ttl; + rspamd_lru_hash_t *hash; + GList *link; +} rspamd_lru_element_t; + + +#define rspamd_hash_size(x) (x)->nnodes + +/** + * Create new hash in specified pool + * @param pool memory pool object + * @param hash_func pointer to hash function + * @param key_equal_func pointer to function for comparing keys + * @return new rspamd_hash object + */ +rspamd_hash_t* rspamd_hash_new (rspamd_mempool_t *pool, GHashFunc hash_func, GEqualFunc key_equal_func); + +/** + * Create new hash in specified pool using shared memory + * @param pool memory pool object + * @param hash_func pointer to hash function + * @param key_equal_func pointer to function for comparing keys + * @return new rspamd_hash object + */ +rspamd_hash_t* rspamd_hash_new_shared (rspamd_mempool_t *pool, GHashFunc hash_func, GEqualFunc key_equal_func, gint size); + +/** + * Insert item in hash + * @param hash hash object + * @param key key to insert + * @param value value of key + */ +void rspamd_hash_insert (rspamd_hash_t *hash, gpointer key, gpointer value); + +/** + * Remove item from hash + * @param hash hash object + * @param key key to delete + */ +gboolean rspamd_hash_remove (rspamd_hash_t *hash, gpointer key); + +/** + * Lookup item from hash + * @param hash hash object + * @param key key to find + * @return value of key or NULL if key is not found + */ +gpointer rspamd_hash_lookup (rspamd_hash_t *hash, gpointer key); + +/** + * Iterate throught hash + * @param hash hash object + * @param func user's function that would be called for each key/value pair + * @param user_data pointer to user's data that would be passed to user's function + */ +void rspamd_hash_foreach (rspamd_hash_t *hash, GHFunc func, gpointer user_data); + +/** + * Create new lru hash + * @param maxsize maximum elements in a hash + * @param maxage maximum age of elemnt + * @param hash_func pointer to hash function + * @param key_equal_func pointer to function for comparing keys + * @return new rspamd_hash object + */ +rspamd_lru_hash_t* rspamd_lru_hash_new (GHashFunc hash_func, GEqualFunc key_equal_func, + gint maxsize, gint maxage, GDestroyNotify key_destroy, GDestroyNotify value_destroy); + +/** + * Create new lru hash with custom storage + * @param maxsize maximum elements in a hash + * @param maxage maximum age of elemnt + * @param hash_func pointer to hash function + * @param key_equal_func pointer to function for comparing keys + * @return new rspamd_hash object + */ +rspamd_lru_hash_t* rspamd_lru_hash_new_full (GHashFunc hash_func, GEqualFunc key_equal_func, + gint maxsize, gint maxage, GDestroyNotify key_destroy, GDestroyNotify value_destroy, + gpointer storage, lru_cache_insert_func insert_func, lru_cache_lookup_func lookup_func, + lru_cache_delete_func delete_func); +/** + * Lookup item from hash + * @param hash hash object + * @param key key to find + * @return value of key or NULL if key is not found + */ +gpointer rspamd_lru_hash_lookup (rspamd_lru_hash_t *hash, gpointer key, time_t now); +/** + * Insert item in hash + * @param hash hash object + * @param key key to insert + * @param value value of key + */ +void rspamd_lru_hash_insert (rspamd_lru_hash_t *hash, gpointer key, gpointer value, + time_t now, guint ttl); + +/** + * Remove lru hash + * @param hash hash object + */ + +void rspamd_lru_hash_destroy (rspamd_lru_hash_t *hash); + +#endif + +/* + * vi:ts=4 + */ diff --git a/src/libutil/http.c b/src/libutil/http.c new file mode 100644 index 000000000..491468352 --- /dev/null +++ b/src/libutil/http.c @@ -0,0 +1,1222 @@ +/* Copyright (c) 2014, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "http.h" +#include "utlist.h" +#include "util.h" +#include "printf.h" +#include "logger.h" + +struct rspamd_http_connection_private { + GString *buf; + gboolean new_header; + struct rspamd_http_header *header; + struct http_parser parser; + struct http_parser_settings parser_cb; + struct event ev; + struct timeval tv; + struct timeval *ptv; + struct rspamd_http_message *msg; + struct iovec *out; + guint outlen; + gsize wr_pos; + gsize wr_total; +}; + +enum http_magic_type { + HTTP_MAGIC_PLAIN = 0, + HTTP_MAGIC_HTML, + HTTP_MAGIC_CSS, + HTTP_MAGIC_JS, + HTTP_MAGIC_PNG, + HTTP_MAGIC_JPG +}; + +static const struct _rspamd_http_magic { + const gchar *ext; + const gchar *ct; +} http_file_types[] = { + [HTTP_MAGIC_PLAIN] = { "txt", "text/plain" }, + [HTTP_MAGIC_HTML] = { "html", "text/html" }, + [HTTP_MAGIC_CSS] = { "css", "text/css" }, + [HTTP_MAGIC_JS] = { "js", "application/javascript" }, + [HTTP_MAGIC_PNG] = { "png", "image/png" }, + [HTTP_MAGIC_JPG] = { "jpg", "image/jpeg" }, +}; + +static gchar *http_week[] = { "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" }; +static gchar *http_month[] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", + "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" }; + + +#define HTTP_ERROR http_error_quark () +GQuark +http_error_quark (void) +{ + return g_quark_from_static_string ("http-error-quark"); +} + +static const gchar * +rspamd_http_code_to_str (gint code) +{ + if (code == 200) { + return "OK"; + } + else if (code == 404) { + return "Not found"; + } + else if (code == 403 || code == 401) { + return "Not authorized"; + } + else if (code >= 400 && code < 500) { + return "Bad request"; + } + else if (code >= 300 && code < 400) { + return "See Other"; + } + else if (code >= 500 && code < 600) { + return "Internal server error"; + } + + return "Unknown error"; +} + +/* + * Obtained from nginx + * Copyright (C) Igor Sysoev + */ +static guint mday[] = { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }; + +time_t +rspamd_http_parse_date (const gchar *header, gsize len) +{ + const gchar *p, *end; + gint month; + guint day, year, hour, min, sec; + guint64 time; + enum { + no = 0, rfc822, /* Tue, 10 Nov 2002 23:50:13 */ + rfc850, /* Tuesday, 10-Dec-02 23:50:13 */ + isoc /* Tue Dec 10 23:50:13 2002 */ + } fmt; + + fmt = 0; + if (len > 0) { + end = header + len; + } + else { + end = header + strlen (header); + } + +#if (NGX_SUPPRESS_WARN) + day = 32; + year = 2038; +#endif + + for (p = header; p < end; p++) { + if (*p == ',') { + break; + } + + if (*p == ' ') { + fmt = isoc; + break; + } + } + + for (p++; p < end; p++) + if (*p != ' ') { + break; + } + + if (end - p < 18) { + return (time_t)-1; + } + + if (fmt != isoc) { + if (*p < '0' || *p > '9' || *(p + 1) < '0' || *(p + 1) > '9') { + return (time_t)-1; + } + + day = (*p - '0') * 10 + *(p + 1) - '0'; + p += 2; + + if (*p == ' ') { + if (end - p < 18) { + return (time_t)-1; + } + fmt = rfc822; + + } + else if (*p == '-') { + fmt = rfc850; + + } + else { + return (time_t)-1; + } + + p++; + } + + switch (*p) { + + case 'J': + month = *(p + 1) == 'a' ? 0 : *(p + 2) == 'n' ? 5 : 6; + break; + + case 'F': + month = 1; + break; + + case 'M': + month = *(p + 2) == 'r' ? 2 : 4; + break; + + case 'A': + month = *(p + 1) == 'p' ? 3 : 7; + break; + + case 'S': + month = 8; + break; + + case 'O': + month = 9; + break; + + case 'N': + month = 10; + break; + + case 'D': + month = 11; + break; + + default: + return (time_t)-1; + } + + p += 3; + + if ((fmt == rfc822 && *p != ' ') || (fmt == rfc850 && *p != '-')) { + return (time_t)-1; + } + + p++; + + if (fmt == rfc822) { + if (*p < '0' || *p > '9' || *(p + 1) < '0' || *(p + 1) > '9' + || *(p + 2) < '0' || *(p + 2) > '9' || *(p + 3) < '0' + || *(p + 3) > '9') { + return (time_t)-1; + } + + year = (*p - '0') * 1000 + (*(p + 1) - '0') * 100 + + (*(p + 2) - '0') * 10 + *(p + 3) - '0'; + p += 4; + + } + else if (fmt == rfc850) { + if (*p < '0' || *p > '9' || *(p + 1) < '0' || *(p + 1) > '9') { + return (time_t)-1; + } + + year = (*p - '0') * 10 + *(p + 1) - '0'; + year += (year < 70) ? 2000 : 1900; + p += 2; + } + + if (fmt == isoc) { + if (*p == ' ') { + p++; + } + + if (*p < '0' || *p > '9') { + return (time_t)-1; + } + + day = *p++ - '0'; + + if (*p != ' ') { + if (*p < '0' || *p > '9') { + return (time_t)-1; + } + + day = day * 10 + *p++ - '0'; + } + + if (end - p < 14) { + return (time_t)-1; + } + } + + if (*p++ != ' ') { + return (time_t)-1; + } + + if (*p < '0' || *p > '9' || *(p + 1) < '0' || *(p + 1) > '9') { + return (time_t)-1; + } + + hour = (*p - '0') * 10 + *(p + 1) - '0'; + p += 2; + + if (*p++ != ':') { + return (time_t)-1; + } + + if (*p < '0' || *p > '9' || *(p + 1) < '0' || *(p + 1) > '9') { + return (time_t)-1; + } + + min = (*p - '0') * 10 + *(p + 1) - '0'; + p += 2; + + if (*p++ != ':') { + return (time_t)-1; + } + + if (*p < '0' || *p > '9' || *(p + 1) < '0' || *(p + 1) > '9') { + return (time_t)-1; + } + + sec = (*p - '0') * 10 + *(p + 1) - '0'; + + if (fmt == isoc) { + p += 2; + + if (*p++ != ' ') { + return (time_t)-1; + } + + if (*p < '0' || *p > '9' || *(p + 1) < '0' || *(p + 1) > '9' + || *(p + 2) < '0' || *(p + 2) > '9' || *(p + 3) < '0' + || *(p + 3) > '9') { + return (time_t)-1; + } + + year = (*p - '0') * 1000 + (*(p + 1) - '0') * 100 + + (*(p + 2) - '0') * 10 + *(p + 3) - '0'; + } + + if (hour > 23 || min > 59 || sec > 59) { + return (time_t)-1; + } + + if (day == 29 && month == 1) { + if ((year & 3) || ((year % 100 == 0) && (year % 400) != 0)) { + return (time_t)-1; + } + + } + else if (day > mday[month]) { + return (time_t)-1; + } + + /* + * shift new year to March 1 and start months from 1 (not 0), + * it is needed for Gauss' formula + */ + + if (--month <= 0) { + month += 12; + year -= 1; + } + + /* Gauss' formula for Gregorian days since March 1, 1 BC */ + + time = (guint64) ( + /* days in years including leap years since March 1, 1 BC */ + + 365 * year + year / 4 - year / 100 + year / 400 + + /* days before the month */ + + + 367 * month / 12 - 30 + + /* days before the day */ + + + day - 1 + + /* + * 719527 days were between March 1, 1 BC and March 1, 1970, + * 31 and 28 days were in January and February 1970 + */ + + - 719527 + 31 + 28) * 86400 + hour * 3600 + min * 60 + sec; + + return (time_t) time; +} + +static inline void +rspamd_http_check_date (struct rspamd_http_connection_private *priv) +{ + if (g_ascii_strcasecmp (priv->header->name->str, "date") == 0) { + priv->msg->date = rspamd_http_parse_date (priv->header->value->str, + priv->header->value->len); + } +} + +static gint +rspamd_http_on_url (http_parser* parser, const gchar *at, size_t length) +{ + struct rspamd_http_connection *conn = (struct rspamd_http_connection *)parser->data; + struct rspamd_http_connection_private *priv; + + priv = conn->priv; + + g_string_append_len (priv->msg->url, at, length); + + return 0; +} + +static gint +rspamd_http_on_header_field (http_parser* parser, const gchar *at, size_t length) +{ + struct rspamd_http_connection *conn = (struct rspamd_http_connection *)parser->data; + struct rspamd_http_connection_private *priv; + + priv = conn->priv; + + if (priv->header == NULL) { + priv->header = g_slice_alloc (sizeof (struct rspamd_http_header)); + priv->header->name = g_string_sized_new (32); + priv->header->value = g_string_sized_new (32); + } + else if (priv->new_header) { + DL_APPEND (priv->msg->headers, priv->header); + rspamd_http_check_date (priv); + priv->header = g_slice_alloc (sizeof (struct rspamd_http_header)); + priv->header->name = g_string_sized_new (32); + priv->header->value = g_string_sized_new (32); + } + + priv->new_header = FALSE; + g_string_append_len (priv->header->name, at, length); + + return 0; +} + +static gint +rspamd_http_on_header_value (http_parser* parser, const gchar *at, size_t length) +{ + struct rspamd_http_connection *conn = (struct rspamd_http_connection *)parser->data; + struct rspamd_http_connection_private *priv; + + priv = conn->priv; + + if (priv->header == NULL) { + /* Should not happen */ + return -1; + } + + priv->new_header = TRUE; + g_string_append_len (priv->header->value, at, length); + + return 0; +} + +static int +rspamd_http_on_headers_complete (http_parser* parser) +{ + struct rspamd_http_connection *conn = (struct rspamd_http_connection *)parser->data; + struct rspamd_http_connection_private *priv; + + priv = conn->priv; + + if (priv->header != NULL) { + DL_APPEND (priv->msg->headers, priv->header); + rspamd_http_check_date (priv); + priv->header = NULL; + } + + if (parser->content_length != 0 && parser->content_length != ULLONG_MAX) { + priv->msg->body = g_string_sized_new (parser->content_length + 1); + } + else { + priv->msg->body = g_string_sized_new (BUFSIZ); + } + + priv->msg->method = parser->method; + + return 0; +} + +static int +rspamd_http_on_body (http_parser* parser, const gchar *at, size_t length) +{ + struct rspamd_http_connection *conn = (struct rspamd_http_connection *)parser->data; + struct rspamd_http_connection_private *priv; + + priv = conn->priv; + + g_string_append_len (priv->msg->body, at, length); + + if (conn->opts & RSPAMD_HTTP_BODY_PARTIAL) { + return (conn->body_handler (conn, priv->msg, at, length)); + } + + return 0; +} + +static int +rspamd_http_on_message_complete (http_parser* parser) +{ + struct rspamd_http_connection *conn = (struct rspamd_http_connection *)parser->data; + struct rspamd_http_connection_private *priv; + int ret = 0; + + priv = conn->priv; + + if (conn->body_handler != NULL) { + rspamd_http_connection_ref (conn); + if (conn->opts & RSPAMD_HTTP_BODY_PARTIAL) { + ret = conn->body_handler (conn, priv->msg, NULL, 0); + } + else { + ret = conn->body_handler (conn, priv->msg, priv->msg->body->str, priv->msg->body->len); + } + rspamd_http_connection_unref (conn); + } + + if (ret == 0) { + rspamd_http_connection_ref (conn); + ret = conn->finish_handler (conn, priv->msg); + rspamd_http_connection_unref (conn); + } + + return ret; +} + +static void +rspamd_http_write_helper (struct rspamd_http_connection *conn) +{ + struct rspamd_http_connection_private *priv; + struct iovec *start; + guint niov, i; + gsize remain; + gssize r; + GError *err; + + priv = conn->priv; + + if (priv->wr_pos == priv->wr_total) { + rspamd_http_connection_ref (conn); + conn->finish_handler (conn, priv->msg); + rspamd_http_connection_unref (conn); + return; + } + + start = &priv->out[0]; + niov = priv->outlen; + remain = priv->wr_pos; + for (i = 0; i < priv->outlen && remain > 0; i ++) { + /* Find out the first iov required */ + start = &priv->out[i]; + if (start->iov_len <= remain) { + remain -= start->iov_len; + start = &priv->out[i + 1]; + niov --; + } + else { + start->iov_base = (void *)((char *)start->iov_base + remain); + start->iov_len -= remain; + remain = 0; + } + } + + r = writev (conn->fd, start, MIN (IOV_MAX, niov)); + + if (r == -1) { + err = g_error_new (HTTP_ERROR, errno, "IO write error: %s", strerror (errno)); + rspamd_http_connection_ref (conn); + conn->error_handler (conn, err); + rspamd_http_connection_unref (conn); + g_error_free (err); + return; + } + else { + priv->wr_pos += r; + } + + if (priv->wr_pos >= priv->wr_total) { + rspamd_http_connection_ref (conn); + conn->finish_handler (conn, priv->msg); + rspamd_http_connection_unref (conn); + } + else { + /* Want to write more */ + event_add (&priv->ev, priv->ptv); + } +} + +static void +rspamd_http_event_handler (int fd, short what, gpointer ud) +{ + struct rspamd_http_connection *conn = (struct rspamd_http_connection *)ud; + struct rspamd_http_connection_private *priv; + GString *buf; + gssize r; + GError *err; + + priv = conn->priv; + buf = priv->buf; + + if (what == EV_READ) { + r = read (fd, buf->str, buf->allocated_len); + if (r == -1) { + err = g_error_new (HTTP_ERROR, errno, "IO read error: %s", strerror (errno)); + conn->error_handler (conn, err); + g_error_free (err); + return; + } + else { + buf->len = r; + rspamd_http_connection_ref (conn); + if (http_parser_execute (&priv->parser, &priv->parser_cb, buf->str, r) != (size_t)r) { + err = g_error_new (HTTP_ERROR, priv->parser.http_errno, + "HTTP parser error: %s", http_errno_description (priv->parser.http_errno)); + conn->error_handler (conn, err); + g_error_free (err); + rspamd_http_connection_unref (conn); + return; + } + rspamd_http_connection_unref (conn); + } + } + else if (what == EV_TIMEOUT) { + err = g_error_new (HTTP_ERROR, ETIMEDOUT, + "IO timeout"); + rspamd_http_connection_ref (conn); + conn->error_handler (conn, err); + rspamd_http_connection_unref (conn); + g_error_free (err); + return; + } + else if (what == EV_WRITE) { + rspamd_http_write_helper (conn); + } +} + +struct rspamd_http_connection* +rspamd_http_connection_new (rspamd_http_body_handler_t body_handler, + rspamd_http_error_handler_t error_handler, + rspamd_http_finish_handler_t finish_handler, + enum rspamd_http_options opts, + enum rspamd_http_connection_type type) +{ + struct rspamd_http_connection *new; + struct rspamd_http_connection_private *priv; + + if (error_handler == NULL || finish_handler == NULL) { + return NULL; + } + + new = g_slice_alloc0 (sizeof (struct rspamd_http_connection)); + new->opts = opts; + new->type = type; + new->body_handler = body_handler; + new->error_handler = error_handler; + new->finish_handler = finish_handler; + new->fd = -1; + new->ref = 1; + + /* Init priv */ + priv = g_slice_alloc0 (sizeof (struct rspamd_http_connection_private)); + http_parser_init (&priv->parser, type == RSPAMD_HTTP_SERVER ? HTTP_REQUEST : HTTP_RESPONSE); + priv->parser.data = new; + priv->parser_cb.on_url = rspamd_http_on_url; + priv->parser_cb.on_header_field = rspamd_http_on_header_field; + priv->parser_cb.on_header_value = rspamd_http_on_header_value; + priv->parser_cb.on_headers_complete = rspamd_http_on_headers_complete; + priv->parser_cb.on_body = rspamd_http_on_body; + priv->parser_cb.on_message_complete = rspamd_http_on_message_complete; + + new->priv = priv; + + return new; +} + +void +rspamd_http_connection_reset (struct rspamd_http_connection *conn) +{ + struct rspamd_http_connection_private *priv; + struct rspamd_http_message *msg; + + priv = conn->priv; + msg = priv->msg; + + /* Clear request */ + if (msg != NULL) { + rspamd_http_message_free (msg); + priv->msg = NULL; + } + + /* Clear priv */ + event_del (&priv->ev); + if (priv->buf != NULL) { + g_string_free (priv->buf, TRUE); + priv->buf = NULL; + } + if (priv->out != NULL) { + g_slice_free1 (sizeof (struct iovec) * priv->outlen, priv->out); + priv->out = NULL; + } +} + +void +rspamd_http_connection_free (struct rspamd_http_connection *conn) +{ + struct rspamd_http_connection_private *priv; + + priv = conn->priv; + rspamd_http_connection_reset (conn); + g_slice_free1 (sizeof (struct rspamd_http_connection_private), priv); + g_slice_free1 (sizeof (struct rspamd_http_connection), conn); +} + +void +rspamd_http_connection_read_message (struct rspamd_http_connection *conn, + gpointer ud, gint fd, struct timeval *timeout, struct event_base *base) +{ + struct rspamd_http_connection_private *priv = conn->priv; + struct rspamd_http_message *req; + + conn->fd = fd; + conn->ud = ud; + req = rspamd_http_new_message (conn->type == RSPAMD_HTTP_SERVER ? HTTP_REQUEST : HTTP_RESPONSE); + priv->msg = req; + + if (timeout == NULL) { + priv->ptv = NULL; + } + else { + memcpy (&priv->tv, timeout, sizeof (struct timeval)); + priv->ptv = &priv->tv; + } + priv->header = NULL; + priv->buf = g_string_sized_new (BUFSIZ); + priv->new_header = TRUE; + + event_set (&priv->ev, fd, EV_READ | EV_PERSIST, rspamd_http_event_handler, conn); + event_base_set (base, &priv->ev); + event_add (&priv->ev, priv->ptv); +} + +void +rspamd_http_connection_write_message (struct rspamd_http_connection *conn, + struct rspamd_http_message *msg, const gchar *host, const gchar *mime_type, + gpointer ud, gint fd, struct timeval *timeout, struct event_base *base) +{ + struct rspamd_http_connection_private *priv = conn->priv; + struct rspamd_http_header *hdr; + struct tm t, *ptm; + gchar datebuf[64], *pbody; + gint i; + gsize bodylen; + + conn->fd = fd; + conn->ud = ud; + priv->msg = msg; + + if (timeout == NULL) { + priv->ptv = NULL; + } + else { + memcpy (&priv->tv, timeout, sizeof (struct timeval)); + priv->ptv = &priv->tv; + } + priv->header = NULL; + priv->buf = g_string_sized_new (128); + + if (msg->method < HTTP_SYMBOLS) { + if (msg->body == NULL || msg->body->len == 0) { + pbody = NULL; + bodylen = 0; + priv->outlen = 2; + msg->method = HTTP_GET; + } + else { + pbody = msg->body->str; + bodylen = msg->body->len; + priv->outlen = 3; + msg->method = HTTP_POST; + } + } + else if (msg->body != NULL) { + pbody = msg->body->str; + bodylen = msg->body->len; + priv->outlen = 2; + } + else { + /* Invalid body for spamc method */ + return; + } + + if (conn->type == RSPAMD_HTTP_SERVER) { + /* Format reply */ + if (msg->method < HTTP_SYMBOLS) { + ptm = gmtime (&msg->date); + t = *ptm; + rspamd_snprintf (datebuf, sizeof (datebuf), "%s, %02d %s %4d %02d:%02d:%02d GMT", + http_week[t.tm_wday], + t.tm_mday, + http_month[t.tm_mon], + t.tm_year + 1900, + t.tm_hour, + t.tm_min, + t.tm_sec); + if (mime_type == NULL) { + mime_type = "text/plain"; + } + rspamd_printf_gstring (priv->buf, "HTTP/1.1 %d %s\r\n" + "Connection: close\r\n" + "Server: %s\r\n" + "Date: %s\r\n" + "Content-Length: %z\r\n" + "Content-Type: %s\r\n", + msg->code, rspamd_http_code_to_str (msg->code), + "rspamd/" RVERSION, + datebuf, + msg->body->len, + mime_type); + } + else { + /* Legacy spamd reply */ + rspamd_printf_gstring (priv->buf, "RSPAMD/1.3 0 EX_OK\r\n"); + } + } + else { + /* Format request */ + if (host != NULL) { + rspamd_printf_gstring (priv->buf, "%s %v HTTP/1.1\r\n" + "Connection: close\r\n" + "Host: %s\r\n" + "Content-Length: %z\r\n", + http_method_str (msg->method), msg->url, host, msg->body->len); + } + else { + /* Fallback to HTTP/1.0 */ + rspamd_printf_gstring (priv->buf, "%s %v HTTP/1.0\r\n" + "Content-Length: %z\r\n", + http_method_str (msg->method), msg->url, msg->body->len); + } + } + /* Allocate iov */ + priv->wr_total = bodylen + priv->buf->len + 2; + DL_FOREACH (msg->headers, hdr) { + /* <name><: ><value><\r\n> */ + priv->wr_total += hdr->name->len + hdr->value->len + 4; + priv->outlen += 4; + } + priv->out = g_slice_alloc (sizeof (struct iovec) * priv->outlen); + priv->wr_pos = 0; + + /* Now set up all iov */ + priv->out[0].iov_base = priv->buf->str; + priv->out[0].iov_len = priv->buf->len; + i = 1; + LL_FOREACH (msg->headers, hdr) { + priv->out[i].iov_base = hdr->name->str; + priv->out[i++].iov_len = hdr->name->len; + priv->out[i].iov_base = ": "; + priv->out[i++].iov_len = 2; + priv->out[i].iov_base = hdr->value->str; + priv->out[i++].iov_len = hdr->value->len; + priv->out[i].iov_base = "\r\n"; + priv->out[i++].iov_len = 2; + } + if (msg->method < HTTP_SYMBOLS) { + priv->out[i].iov_base = "\r\n"; + priv->out[i++].iov_len = 2; + } + else { + /* No CRLF for compatibility reply */ + priv->wr_total -= 2; + } + if (msg->body != NULL) { + priv->out[i].iov_base = pbody; + priv->out[i++].iov_len = bodylen; + } + + event_set (&priv->ev, fd, EV_WRITE, rspamd_http_event_handler, conn); + event_base_set (base, &priv->ev); + event_add (&priv->ev, priv->ptv); +} + +struct rspamd_http_message* +rspamd_http_new_message (enum http_parser_type type) +{ + struct rspamd_http_message *new; + + new = g_slice_alloc (sizeof (struct rspamd_http_message)); + if (type == HTTP_REQUEST) { + new->url = g_string_sized_new (32); + } + else { + new->url = NULL; + new->code = 200; + } + new->headers = NULL; + new->date = 0; + new->body = NULL; + new->type = type; + new->method = HTTP_GET; + + return new; +} + +void +rspamd_http_message_free (struct rspamd_http_message *msg) +{ + struct rspamd_http_header *hdr, *tmp_hdr; + + LL_FOREACH_SAFE (msg->headers, hdr, tmp_hdr) { + g_string_free (hdr->name, TRUE); + g_string_free (hdr->value, TRUE); + g_slice_free1 (sizeof (struct rspamd_http_header), hdr); + } + if (msg->body != NULL) { + g_string_free (msg->body, TRUE); + } + if (msg->url != NULL) { + g_string_free (msg->url, TRUE); + } + g_slice_free1 (sizeof (struct rspamd_http_message), msg); +} + +void rspamd_http_message_add_header (struct rspamd_http_message *msg, + const gchar *name, + const gchar *value) +{ + struct rspamd_http_header *hdr; + + if (msg != NULL && name != NULL && value != NULL) { + hdr = g_slice_alloc (sizeof (struct rspamd_http_header)); + hdr->name = g_string_new (name); + hdr->value = g_string_new (value); + DL_APPEND (msg->headers, hdr); + } +} + +const gchar* +rspamd_http_message_find_header (struct rspamd_http_message *msg, const gchar *name) +{ + struct rspamd_http_header *hdr; + const gchar *res = NULL; + guint slen = strlen (name); + + if (msg != NULL) { + LL_FOREACH (msg->headers, hdr) { + if (hdr->name->len == slen) { + if (memcmp (hdr->name->str, name, slen) == 0) { + res = hdr->value->str; + break; + } + } + } + } + + return res; +} + +/* + * HTTP router functions + */ + +static void +rspamd_http_entry_free (struct rspamd_http_connection_entry *entry) +{ + if (entry != NULL) { + close (entry->conn->fd); + rspamd_http_connection_unref (entry->conn); + g_slice_free1 (sizeof (struct rspamd_http_connection_entry), entry); + if (entry->rt->finish_handler) { + entry->rt->finish_handler (entry); + } + } +} + +static void +rspamd_http_router_error_handler (struct rspamd_http_connection *conn, GError *err) +{ + struct rspamd_http_connection_entry *entry = conn->ud; + struct rspamd_http_message *msg; + + if (entry->is_reply) { + /* At this point we need to finish this session and close owned socket */ + if (entry->rt->error_handler != NULL) { + entry->rt->error_handler (entry, err); + } + rspamd_http_entry_free (entry); + } + else { + /* Here we can write a reply to a client */ + if (entry->rt->error_handler != NULL) { + entry->rt->error_handler (entry, err); + } + msg = rspamd_http_new_message (HTTP_RESPONSE); + msg->date = time (NULL); + msg->code = err->code; + msg->body = g_string_new (err->message); + rspamd_http_connection_reset (entry->conn); + rspamd_http_connection_write_message (entry->conn, msg, NULL, + "text/plain", entry, entry->conn->fd, entry->rt->ptv, entry->rt->ev_base); + entry->is_reply = TRUE; + } +} + +static const gchar * +rspamd_http_router_detect_ct (const gchar *path) +{ + const gchar *dot; + guint i; + + dot = strrchr (path, '.'); + if (dot == NULL) { + return http_file_types[HTTP_MAGIC_PLAIN].ct; + } + dot ++; + + for (i = 0; i < G_N_ELEMENTS (http_file_types); i ++) { + if (strcmp (http_file_types[i].ext, dot) == 0) { + return http_file_types[i].ct; + } + } + + return http_file_types[HTTP_MAGIC_PLAIN].ct; +} + +static gboolean +rspamd_http_router_try_file (struct rspamd_http_connection_entry *entry, + struct rspamd_http_message *msg, gboolean expand_path) +{ + struct stat st; + gint fd; + gchar filebuf[PATH_MAX], realbuf[PATH_MAX], *dir; + struct rspamd_http_message *reply_msg; + + /* XXX: filter filename component only */ + if (expand_path) { + rspamd_snprintf (filebuf, sizeof (filebuf), "%s%c%v", + entry->rt->default_fs_path, G_DIR_SEPARATOR, msg->url); + } + else { + rspamd_snprintf (filebuf, sizeof (filebuf), "%v", + msg->url); + } + + if (realpath (filebuf, realbuf) == NULL || + lstat (realbuf, &st) == -1) { + return FALSE; + } + + if (S_ISDIR (st.st_mode) && expand_path) { + /* Try to append 'index.html' to the url */ + g_string_append_printf (msg->url, "%c%s", G_DIR_SEPARATOR, + "index.html"); + return rspamd_http_router_try_file (entry, msg, FALSE); + } + else if (!S_ISREG (st.st_mode)) { + return FALSE; + } + + /* We also need to ensure that file is inside the defined dir */ + dir = dirname (realbuf); + if (dir == NULL || strncmp (dir, entry->rt->default_fs_path, + strlen (entry->rt->default_fs_path)) != 0) { + return FALSE; + } + + fd = open (realbuf, O_RDONLY); + if (fd == -1) { + return FALSE; + } + + reply_msg = rspamd_http_new_message (HTTP_RESPONSE); + reply_msg->date = time (NULL); + reply_msg->code = 200; + reply_msg->body = g_string_sized_new (st.st_size); + + if (read (fd, reply_msg->body->str, st.st_size) != st.st_size) { + close (fd); + rspamd_http_message_free (reply_msg); + return FALSE; + } + + reply_msg->body->len = st.st_size; + reply_msg->body->str[st.st_size] = '\0'; + close (fd); + + rspamd_http_connection_reset (entry->conn); + + /* XXX: detect content type */ + rspamd_http_connection_write_message (entry->conn, reply_msg, NULL, + rspamd_http_router_detect_ct (realbuf), entry, entry->conn->fd, + entry->rt->ptv, entry->rt->ev_base); + + return TRUE; +} + +static int +rspamd_http_router_finish_handler (struct rspamd_http_connection *conn, + struct rspamd_http_message *msg) +{ + struct rspamd_http_connection_entry *entry = conn->ud; + rspamd_http_router_handler_t handler = NULL; + gpointer found; + struct rspamd_http_message *err_msg; + GError *err; + + G_STATIC_ASSERT (sizeof (rspamd_http_router_handler_t) == sizeof (gpointer)); + + if (entry->is_reply) { + /* Request is finished, it is safe to free a connection */ + rspamd_http_entry_free (entry); + } + else { + /* Search for path */ + if (msg->url != NULL && msg->url->len != 0) { + found = g_hash_table_lookup (entry->rt->paths, msg->url->str); + memcpy (&handler, &found, sizeof (found)); + } + entry->is_reply = TRUE; + if (handler != NULL) { + return handler (entry, msg); + } + else { + if (entry->rt->default_fs_path == NULL || + rspamd_http_router_try_file (entry, msg, TRUE)) { + err = g_error_new (HTTP_ERROR, 404, + "Not found"); + if (entry->rt->error_handler != NULL) { + entry->rt->error_handler (entry, err); + } + err_msg = rspamd_http_new_message (HTTP_RESPONSE); + err_msg->date = time (NULL); + err_msg->code = err->code; + err_msg->body = g_string_new (err->message); + rspamd_http_connection_reset (entry->conn); + rspamd_http_connection_write_message (entry->conn, err_msg, NULL, + "text/plain", entry, entry->conn->fd, + entry->rt->ptv, entry->rt->ev_base); + g_error_free (err); + } + } + } + + return 0; +} + +struct rspamd_http_connection_router* +rspamd_http_router_new (rspamd_http_router_error_handler_t eh, + rspamd_http_router_finish_handler_t fh, + struct timeval *timeout, struct event_base *base, + const char *default_fs_path) +{ + struct rspamd_http_connection_router* new; + struct stat st; + + new = g_slice_alloc (sizeof (struct rspamd_http_connection_router)); + new->paths = g_hash_table_new (rspamd_strcase_hash, rspamd_strcase_equal); + new->conns = NULL; + new->error_handler = eh; + new->finish_handler = fh; + new->ev_base = base; + if (timeout) { + new->tv = *timeout; + new->ptv = &new->tv; + } + else { + new->ptv = NULL; + } + + new->default_fs_path = NULL; + if (default_fs_path != NULL) { + if (stat (default_fs_path, &st) == -1) { + msg_err ("cannot stat %s", default_fs_path); + } + else { + if (!S_ISDIR (st.st_mode)) { + msg_err ("path %s is not a directory", default_fs_path); + } + else { + new->default_fs_path = g_strdup (default_fs_path); + } + } + } + + return new; +} + +void +rspamd_http_router_add_path (struct rspamd_http_connection_router *router, + const gchar *path, rspamd_http_router_handler_t handler) +{ + gpointer ptr; + G_STATIC_ASSERT (sizeof (rspamd_http_router_handler_t) == sizeof (gpointer)); + + if (path != NULL && handler != NULL && router != NULL) { + memcpy (&ptr, &handler, sizeof (ptr)); + g_hash_table_insert (router->paths, (gpointer)path, ptr); + } +} + +void +rspamd_http_router_handle_socket (struct rspamd_http_connection_router *router, + gint fd, gpointer ud) +{ + struct rspamd_http_connection_entry *conn; + + conn = g_slice_alloc (sizeof (struct rspamd_http_connection_entry)); + conn->rt = router; + conn->ud = ud; + conn->is_reply = FALSE; + + conn->conn = rspamd_http_connection_new (NULL, rspamd_http_router_error_handler, + rspamd_http_router_finish_handler, 0, RSPAMD_HTTP_SERVER); + + rspamd_http_connection_read_message (conn->conn, conn, fd, router->ptv, + router->ev_base); + LL_PREPEND (router->conns, conn); +} + +void +rspamd_http_router_free (struct rspamd_http_connection_router *router) +{ + struct rspamd_http_connection_entry *conn, *tmp; + + if (router) { + LL_FOREACH_SAFE (router->conns, conn, tmp) { + rspamd_http_entry_free (conn); + } + + if (router->default_fs_path != NULL) { + g_free (router->default_fs_path); + } + g_hash_table_unref (router->paths); + g_slice_free1 (sizeof (struct rspamd_http_connection_router), router); + } +} diff --git a/src/libutil/http.h b/src/libutil/http.h new file mode 100644 index 000000000..8af4429c6 --- /dev/null +++ b/src/libutil/http.h @@ -0,0 +1,278 @@ +/* Copyright (c) 2014, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef HTTP_H_ +#define HTTP_H_ + +/** + * @file http.h + * + * This is an interface for HTTP client and conn. This code uses HTTP parser written + * by Joyent Inc based on nginx code. + */ + +#include "config.h" +#include "http_parser.h" + +enum rspamd_http_connection_type { + RSPAMD_HTTP_SERVER, + RSPAMD_HTTP_CLIENT +}; + +/** + * HTTP header structure + */ +struct rspamd_http_header { + GString *name; + GString *value; + struct rspamd_http_header *next, *prev; +}; + +/** + * HTTP message structure, used for requests and replies + */ +struct rspamd_http_message { + GString *url; + struct rspamd_http_header *headers; + GString *body; + enum http_parser_type type; + time_t date; + gint code; + enum http_method method; +}; + + +/** + * Options for HTTP connection + */ +enum rspamd_http_options { + RSPAMD_HTTP_BODY_PARTIAL = 0x1//!< RSPAMD_HTTP_BODY_PARTIAL +}; + +struct rspamd_http_connection_private; +struct rspamd_http_connection; +struct rspamd_http_connection_router; +struct rspamd_http_connection_entry; + +typedef int (*rspamd_http_body_handler_t) (struct rspamd_http_connection *conn, + struct rspamd_http_message *msg, + const gchar *chunk, + gsize len); + +typedef void (*rspamd_http_error_handler_t) (struct rspamd_http_connection *conn, GError *err); + +typedef int (*rspamd_http_finish_handler_t) (struct rspamd_http_connection *conn, + struct rspamd_http_message *msg); + +typedef int (*rspamd_http_router_handler_t) (struct rspamd_http_connection_entry *conn_ent, + struct rspamd_http_message *msg); +typedef void (*rspamd_http_router_error_handler_t) (struct rspamd_http_connection_entry *conn_ent, + GError *err); +typedef void (*rspamd_http_router_finish_handler_t) (struct rspamd_http_connection_entry *conn_ent); + +/** + * HTTP connection structure + */ +struct rspamd_http_connection { + struct rspamd_http_connection_private *priv; + rspamd_http_body_handler_t body_handler; + rspamd_http_error_handler_t error_handler; + rspamd_http_finish_handler_t finish_handler; + gpointer ud; + enum rspamd_http_options opts; + enum rspamd_http_connection_type type; + gint fd; + gint ref; +}; + +struct rspamd_http_connection_entry { + struct rspamd_http_connection_router *rt; + struct rspamd_http_connection *conn; + gpointer ud; + gboolean is_reply; + struct rspamd_http_connection_entry *next; +}; + +struct rspamd_http_connection_router { + struct rspamd_http_connection_entry *conns; + GHashTable *paths; + struct timeval tv; + struct timeval *ptv; + struct event_base *ev_base; + gchar *default_fs_path; + rspamd_http_router_error_handler_t error_handler; + rspamd_http_router_finish_handler_t finish_handler; +}; + +/** + * Create new http connection + * @param handler_t handler_t for body + * @param opts options + * @return new connection structure + */ +struct rspamd_http_connection* rspamd_http_connection_new ( + rspamd_http_body_handler_t body_handler, + rspamd_http_error_handler_t error_handler, + rspamd_http_finish_handler_t finish_handler, + enum rspamd_http_options opts, + enum rspamd_http_connection_type type); + +/** + * Handle a request using socket fd and user data ud + * @param conn connection structure + * @param ud opaque user data + * @param fd fd to read/write + */ +void rspamd_http_connection_read_message ( + struct rspamd_http_connection *conn, + gpointer ud, + gint fd, + struct timeval *timeout, + struct event_base *base); + +/** + * Send reply using initialised connection + * @param conn connection structure + * @param msg HTTP message + * @param ud opaque user data + * @param fd fd to read/write + */ +void rspamd_http_connection_write_message ( + struct rspamd_http_connection *conn, + struct rspamd_http_message *msg, + const gchar *host, + const gchar *mime_type, + gpointer ud, + gint fd, + struct timeval *timeout, + struct event_base *base); + +/** + * Free connection structure + * @param conn + */ +void rspamd_http_connection_free (struct rspamd_http_connection *conn); + +/** + * Increase refcount for a connection + * @param conn + * @return + */ +static inline struct rspamd_http_connection * +rspamd_http_connection_ref (struct rspamd_http_connection *conn) +{ + conn->ref ++; + return conn; +} + +/** + * Decrease a refcount for a connection and free it if refcount is equal to zero + * @param conn + */ +static void +rspamd_http_connection_unref (struct rspamd_http_connection *conn) +{ + if (--conn->ref <= 0) { + rspamd_http_connection_free (conn); + } +} + +/** + * Reset connection for a new request + * @param conn + */ +void rspamd_http_connection_reset (struct rspamd_http_connection *conn); + +/** + * Create new HTTP reply + * @param code code to pass + * @return new reply object + */ +struct rspamd_http_message* rspamd_http_new_message (enum http_parser_type type); + +/** + * Append a header to reply + * @param rep + * @param name + * @param value + */ +void rspamd_http_message_add_header (struct rspamd_http_message *rep, const gchar *name, const gchar *value); + +/** + * Search for a specified header in message + * @param rep message + * @param name name of header + */ +const gchar* rspamd_http_message_find_header (struct rspamd_http_message *rep, const gchar *name); + +/** + * Free HTTP reply + * @param rep + */ +void rspamd_http_message_free (struct rspamd_http_message *msg); + +/** + * Parse HTTP date header and return it as time_t + * @param header HTTP date header + * @param len length of header + * @return time_t or (time_t)-1 in case of error + */ +time_t rspamd_http_parse_date (const gchar *header, gsize len); + +/** + * Create new http connection router and the associated HTTP connection + * @param eh error handler callback + * @param fh finish handler callback + * @param default_fs_path if not NULL try to serve static files from + * the specified directory + * @return + */ +struct rspamd_http_connection_router* rspamd_http_router_new ( + rspamd_http_router_error_handler_t eh, + rspamd_http_router_finish_handler_t fh, + struct timeval *timeout, + struct event_base *base, + const char *default_fs_path); + +/** + * Add new path to the router + */ +void rspamd_http_router_add_path (struct rspamd_http_connection_router *router, + const gchar *path, rspamd_http_router_handler_t handler); + +/** + * Handle new accepted socket + * @param router router object + * @param fd server socket + * @param ud opaque userdata + */ +void rspamd_http_router_handle_socket (struct rspamd_http_connection_router *router, + gint fd, gpointer ud); + +/** + * Free router and all connections associated + * @param router + */ +void rspamd_http_router_free (struct rspamd_http_connection_router *router); + +#endif /* HTTP_H_ */ diff --git a/src/libutil/logger.c b/src/libutil/logger.c new file mode 100644 index 000000000..01814d24d --- /dev/null +++ b/src/libutil/logger.c @@ -0,0 +1,769 @@ +/* + * Copyright (c) 2009-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +#include "config.h" +#include "logger.h" +#include "util.h" +#include "main.h" +#include "map.h" + +/* How much message should be repeated before it is count to be repeated one */ +#define REPEATS_MIN 3 +#define REPEATS_MAX 300 + +/** + * Static structure that store logging parameters + * It is NOT shared between processes and is created by main process + */ +struct rspamd_logger_s { + rspamd_log_func_t log_func; + struct config_file *cfg; + struct { + guint32 size; + guint32 used; + u_char *buf; + } io_buf; + gint fd; + gboolean is_buffered; + gboolean enabled; + gboolean is_debug; + gboolean throttling; + time_t throttling_time; + sig_atomic_t do_reopen_log; + enum rspamd_log_type type; + pid_t pid; + GQuark process_type; + radix_tree_t *debug_ip; + guint32 last_line_cksum; + guint32 repeats; + gchar *saved_message; + gchar *saved_function; + GMutex *mtx; +}; + +static const gchar lf_chr = '\n'; + +static rspamd_logger_t *default_logger = NULL; + + +static void +syslog_log_function (const gchar * log_domain, const gchar *function, + GLogLevelFlags log_level, const gchar * message, + gboolean forced, gpointer arg); +static void +file_log_function (const gchar * log_domain, const gchar *function, + GLogLevelFlags log_level, const gchar * message, + gboolean forced, gpointer arg); + +/** + * Calculate checksum for log line (used for repeating logic) + */ +static inline guint32 +rspamd_log_calculate_cksum (const gchar *message, size_t mlen) +{ + const gchar *bp = message; + const gchar *be = bp + mlen; + guint32 hval = 0; + + while (bp < be) { + hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24); + hval ^= (guint32)*bp++; + } + + /* return our new hash value */ + return hval; + +} + +/* + * Write a line to log file (unbuffered) + */ +static void +direct_write_log_line (rspamd_logger_t *rspamd_log, void *data, gint count, gboolean is_iov) +{ + gchar errmsg[128]; + struct iovec *iov; + const gchar *line; + gint r; + + if (rspamd_log->enabled) { + if (is_iov) { + iov = (struct iovec *)data; + r = writev (rspamd_log->fd, iov, count); + } + else { + line = (const gchar *)data; + r = write (rspamd_log->fd, line, count); + } + if (r == -1) { + /* We cannot write message to file, so we need to detect error and make decision */ + r = rspamd_snprintf (errmsg, sizeof (errmsg), "direct_write_log_line: cannot write log line: %s", strerror (errno)); + if (errno == EIO || errno == EINTR) { + /* Descriptor is somehow invalid, try to restart */ + reopen_log (rspamd_log); + if (write (rspamd_log->fd, errmsg, r) != -1) { + /* Try again */ + direct_write_log_line (rspamd_log, data, count, is_iov); + } + } + else if (errno == EFAULT || errno == EINVAL || errno == EFBIG || errno == ENOSPC) { + /* Rare case */ + rspamd_log->throttling = TRUE; + rspamd_log->throttling_time = time (NULL); + } + else if (errno == EPIPE || errno == EBADF) { + /* We write to some pipe and it disappears, disable logging or we has opened bad file descriptor */ + rspamd_log->enabled = FALSE; + } + } + else if (rspamd_log->throttling) { + rspamd_log->throttling = FALSE; + } + } +} + +static void +rspamd_escape_log_string (gchar *str) +{ + guchar *p = (guchar *)str; + + while (*p) { + if ((*p & 0x80) || !g_ascii_isprint (*p)) { + *p = '?'; + } + else if (*p == '\n' || *p == '\r') { + *p = ' '; + } + p ++; + } +} + +/* Logging utility functions */ +gint +open_log_priv (rspamd_logger_t *rspamd_log, uid_t uid, gid_t gid) +{ + switch (rspamd_log->cfg->log_type) { + case RSPAMD_LOG_CONSOLE: + /* Do nothing with console */ + rspamd_log->enabled = TRUE; + return 0; + case RSPAMD_LOG_SYSLOG: + openlog ("rspamd", LOG_NDELAY | LOG_PID, rspamd_log->cfg->log_facility); + rspamd_log->enabled = TRUE; + return 0; + case RSPAMD_LOG_FILE: + rspamd_log->fd = open (rspamd_log->cfg->log_file, O_CREAT | O_WRONLY | O_APPEND, + S_IWUSR | S_IRUSR | S_IRGRP | S_IROTH); + if (rspamd_log->fd == -1) { + fprintf (stderr, "open_log: cannot open desired log file: %s, %s", + rspamd_log->cfg->log_file, strerror (errno)); + return -1; + } + if (fchown (rspamd_log->fd, uid, gid) == -1) { + fprintf (stderr, "open_log: cannot chown desired log file: %s, %s", + rspamd_log->cfg->log_file, strerror (errno)); + close (rspamd_log->fd); + return -1; + } + rspamd_log->enabled = TRUE; + return 0; + } + return -1; +} + +void +close_log_priv (rspamd_logger_t *rspamd_log, uid_t uid, gid_t gid) +{ + gchar tmpbuf[256]; + flush_log_buf (rspamd_log); + + switch (rspamd_log->type) { + case RSPAMD_LOG_CONSOLE: + /* Do nothing special */ + break; + case RSPAMD_LOG_SYSLOG: + closelog (); + break; + case RSPAMD_LOG_FILE: + if (rspamd_log->enabled) { + if (rspamd_log->repeats > REPEATS_MIN) { + rspamd_snprintf (tmpbuf, sizeof (tmpbuf), "Last message repeated %ud times", rspamd_log->repeats); + rspamd_log->repeats = 0; + if (rspamd_log->saved_message) { + file_log_function (NULL, rspamd_log->saved_function, rspamd_log->cfg->log_level, rspamd_log->saved_message, TRUE, rspamd_log); + g_free (rspamd_log->saved_message); + g_free (rspamd_log->saved_function); + rspamd_log->saved_message = NULL; + rspamd_log->saved_function = NULL; + } + /* It is safe to use temporary buffer here as it is not static */ + file_log_function (NULL, __FUNCTION__, rspamd_log->cfg->log_level, tmpbuf, TRUE, rspamd_log); + return; + } + + if (fsync (rspamd_log->fd) == -1) { + msg_err ("error syncing log file: %s", strerror (errno)); + } + close (rspamd_log->fd); + } + break; + } + + rspamd_log->enabled = FALSE; +} + +gint +reopen_log_priv (rspamd_logger_t *rspamd_log, uid_t uid, gid_t gid) +{ + close_log_priv (rspamd_log, uid, gid); + if (open_log_priv (rspamd_log, uid, gid) == 0) { + msg_info ("log file reopened"); + return 0; + } + + return -1; +} + +/** + * Open log file or initialize other structures + */ +gint +open_log (rspamd_logger_t *logger) +{ + return open_log_priv (logger, -1, -1); +} +/** + * Close log file or destroy other structures + */ +void +close_log (rspamd_logger_t *logger) +{ + close_log_priv (logger, -1, -1); +} +/** + * Close and open log again + */ +gint +reopen_log (rspamd_logger_t *logger) +{ + return reopen_log_priv (logger, -1, -1); +} + +/* + * Setup logger + */ +void +rspamd_set_logger (struct config_file *cfg, GQuark ptype, struct rspamd_main *rspamd) +{ + gchar **strvec, *p, *err; + gint num, i, k; + struct in_addr addr; + guint32 mask = 0xFFFFFFFF; + + if (rspamd->logger == NULL) { + rspamd->logger = g_malloc (sizeof (rspamd_logger_t)); + memset (rspamd->logger, 0, sizeof (rspamd_logger_t)); + } + + rspamd->logger->type = cfg->log_type; + rspamd->logger->pid = getpid (); + rspamd->logger->process_type = ptype; + +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30)) + rspamd->logger->mtx = g_mutex_new (); +#else + rspamd->logger->mtx = g_malloc (sizeof (GMutex)); + g_mutex_init (rspamd->logger->mtx); +#endif + + switch (cfg->log_type) { + case RSPAMD_LOG_CONSOLE: + rspamd->logger->log_func = file_log_function; + rspamd->logger->fd = STDERR_FILENO; + break; + case RSPAMD_LOG_SYSLOG: + rspamd->logger->log_func = syslog_log_function; + break; + case RSPAMD_LOG_FILE: + rspamd->logger->log_func = file_log_function; + break; + } + + rspamd->logger->cfg = cfg; + /* Set up buffer */ + if (rspamd->cfg->log_buffered) { + if (rspamd->cfg->log_buf_size != 0) { + rspamd->logger->io_buf.size = rspamd->cfg->log_buf_size; + } + else { + rspamd->logger->io_buf.size = BUFSIZ; + } + rspamd->logger->is_buffered = TRUE; + rspamd->logger->io_buf.buf = g_malloc (rspamd->logger->io_buf.size); + } + /* Set up conditional logging */ + if (rspamd->cfg->debug_ip_map != NULL) { + /* Try to add it as map first of all */ + if (rspamd->logger->debug_ip) { + radix_tree_free (rspamd->logger->debug_ip); + } + rspamd->logger->debug_ip = radix_tree_create (); + if (!add_map (rspamd->cfg, rspamd->cfg->debug_ip_map, "IP addresses for which debug logs are enabled", + read_radix_list, fin_radix_list, (void **)&rspamd->logger->debug_ip)) { + /* Try to parse it as list */ + strvec = g_strsplit_set (rspamd->cfg->debug_ip_map, ",; ", 0); + num = g_strv_length (strvec); + + for (i = 0; i < num; i++) { + g_strstrip (strvec[i]); + + if ((p = strchr (strvec[i], '/')) != NULL) { + /* Try to extract mask */ + *p = '\0'; + p ++; + errno = 0; + k = strtoul (p, &err, 10); + if (errno != 0 || *err != '\0' || k > 32) { + continue; + } + } + else { + k = 32; + } + if (inet_aton (strvec[i], &addr)) { + /* Check ip */ + mask = mask << (32 - k); + radix32tree_insert (rspamd->logger->debug_ip, ntohl (addr.s_addr), mask, 1); + } + } + g_strfreev (strvec); + } + } + else if (rspamd->logger->debug_ip) { + radix_tree_free (rspamd->logger->debug_ip); + rspamd->logger->debug_ip = NULL; + } + + default_logger = rspamd->logger; +} + +/** + * Used after fork() for updating structure params + */ +void +update_log_pid (GQuark ptype, rspamd_logger_t *rspamd_log) +{ + rspamd_log->pid = getpid (); + rspamd_log->process_type = ptype; +} + +/** + * Flush logging buffer + */ +void +flush_log_buf (rspamd_logger_t *rspamd_log) +{ + if (rspamd_log->is_buffered && (rspamd_log->type == RSPAMD_LOG_CONSOLE || rspamd_log->type == RSPAMD_LOG_FILE)) { + direct_write_log_line (rspamd_log, rspamd_log->io_buf.buf, rspamd_log->io_buf.used, FALSE); + rspamd_log->io_buf.used = 0; + } +} + + +void +rspamd_common_logv (rspamd_logger_t *rspamd_log, GLogLevelFlags log_level, const gchar *function, + const gchar *fmt, va_list args) +{ + static gchar logbuf[BUFSIZ]; + u_char *end; + + if (rspamd_log == NULL) { + rspamd_log = default_logger; + } + + if (rspamd_log == NULL) { + /* Just fprintf message to stderr */ + if (log_level >= G_LOG_LEVEL_INFO) { + end = rspamd_vsnprintf (logbuf, sizeof (logbuf), fmt, args); + *end = '\0'; + rspamd_escape_log_string (logbuf); + fprintf (stderr, "%s\n", logbuf); + } + } + else if (log_level <= rspamd_log->cfg->log_level) { + g_mutex_lock (rspamd_log->mtx); + end = rspamd_vsnprintf (logbuf, sizeof (logbuf), fmt, args); + *end = '\0'; + rspamd_escape_log_string (logbuf); + rspamd_log->log_func (NULL, function, log_level, logbuf, FALSE, rspamd_log); + g_mutex_unlock (rspamd_log->mtx); + } +} + +/** + * This log functions select real logger and write message if level is less or equal to configured log level + */ +void +rspamd_common_log_function (rspamd_logger_t *rspamd_log, GLogLevelFlags log_level, + const gchar *function, const gchar *fmt, ...) +{ + va_list vp; + + va_start (vp, fmt); + rspamd_common_logv (rspamd_log, log_level, function, fmt, vp); + va_end (vp); +} + +void +rspamd_default_logv (GLogLevelFlags log_level, const gchar *function, + const gchar *fmt, va_list args) +{ + rspamd_common_logv (NULL, log_level, function, fmt, args); +} + +void +rspamd_default_log_function (GLogLevelFlags log_level, + const gchar *function, const gchar *fmt, ...) +{ + + va_list vp; + + va_start (vp, fmt); + rspamd_default_logv (log_level, function, fmt, vp); + va_end (vp); +} + + +/** + * Fill buffer with message (limits must be checked BEFORE this call) + */ +static void +fill_buffer (rspamd_logger_t *rspamd_log, const struct iovec *iov, gint iovcnt) +{ + gint i; + + for (i = 0; i < iovcnt; i ++) { + memcpy (rspamd_log->io_buf.buf + rspamd_log->io_buf.used, iov[i].iov_base, iov[i].iov_len); + rspamd_log->io_buf.used += iov[i].iov_len; + } + +} + +/* + * Write message to buffer or to file (using direct_write_log_line function) + */ +static void +file_log_helper (rspamd_logger_t *rspamd_log, const struct iovec *iov, gint iovcnt) +{ + size_t len = 0; + gint i; + + if (! rspamd_log->is_buffered) { + /* Write string directly */ + direct_write_log_line (rspamd_log, (void *)iov, iovcnt, TRUE); + } + else { + /* Calculate total length */ + for (i = 0; i < iovcnt; i ++) { + len += iov[i].iov_len; + } + /* Fill buffer */ + if (rspamd_log->io_buf.size < len) { + /* Buffer is too small to hold this string, so write it dirrectly */ + flush_log_buf (rspamd_log); + direct_write_log_line (rspamd_log, (void *)iov, iovcnt, TRUE); + } + else if (rspamd_log->io_buf.used + len >= rspamd_log->io_buf.size) { + /* Buffer is full, try to write it dirrectly */ + flush_log_buf (rspamd_log); + fill_buffer (rspamd_log, iov, iovcnt); + } + else { + /* Copy incoming string to buffer */ + fill_buffer (rspamd_log, iov, iovcnt); + } + } +} + +/** + * Syslog interface for logging + */ +static void +syslog_log_function (const gchar * log_domain, const gchar *function, GLogLevelFlags log_level, const gchar * message, gboolean forced, gpointer arg) +{ + rspamd_logger_t *rspamd_log = arg; + + if (! rspamd_log->enabled) { + return; + } + if (function == NULL) { + if (forced || log_level <= rspamd_log->cfg->log_level) { + if (forced || log_level >= G_LOG_LEVEL_DEBUG) { + syslog (LOG_DEBUG, "%s", message); + } + else if (log_level >= G_LOG_LEVEL_INFO) { + syslog (LOG_INFO, "%s", message); + } + else if (log_level >= G_LOG_LEVEL_WARNING) { + syslog (LOG_WARNING, "%s", message); + } + else if (log_level >= G_LOG_LEVEL_CRITICAL) { + syslog (LOG_ERR, "%s", message); + } + } + } + else { + if (forced || log_level <= rspamd_log->cfg->log_level) { + if (log_level >= G_LOG_LEVEL_DEBUG) { + syslog (LOG_DEBUG, "%s: %s", function, message); + } + else if (log_level >= G_LOG_LEVEL_INFO) { + syslog (LOG_INFO, "%s: %s", function, message); + } + else if (log_level >= G_LOG_LEVEL_WARNING) { + syslog (LOG_WARNING, "%s: %s", function, message); + } + else if (log_level >= G_LOG_LEVEL_CRITICAL) { + syslog (LOG_ERR, "%s: %s", function, message); + } + } + } +} + +/** + * Main file interface for logging + */ +static void +file_log_function (const gchar * log_domain, const gchar *function, GLogLevelFlags log_level, const gchar * message, gboolean forced, gpointer arg) +{ + gchar tmpbuf[256], timebuf[32]; + time_t now; + struct tm *tms; + struct iovec iov[4]; + gint r = 0; + guint32 cksum; + size_t mlen; + const gchar *cptype = NULL; + gboolean got_time = FALSE; + rspamd_logger_t *rspamd_log = arg; + + if (! rspamd_log->enabled) { + return; + } + + + if (forced || log_level <= rspamd_log->cfg->log_level) { + /* Check throttling due to write errors */ + if (rspamd_log->throttling) { + now = time (NULL); + if (rspamd_log->throttling_time != now) { + rspamd_log->throttling_time = now; + got_time = TRUE; + } + else { + /* Do not try to write to file too often while throttling */ + return; + } + } + /* Check repeats */ + mlen = strlen (message); + cksum = rspamd_log_calculate_cksum (message, mlen); + if (cksum == rspamd_log->last_line_cksum) { + rspamd_log->repeats ++; + if (rspamd_log->repeats > REPEATS_MIN && rspamd_log->repeats < REPEATS_MAX) { + /* Do not log anything */ + if (rspamd_log->saved_message == 0) { + rspamd_log->saved_message = g_strdup (message); + rspamd_log->saved_function = g_strdup (function); + } + return; + } + else if (rspamd_log->repeats > REPEATS_MAX) { + rspamd_snprintf (tmpbuf, sizeof (tmpbuf), "Last message repeated %ud times", rspamd_log->repeats); + rspamd_log->repeats = 0; + /* It is safe to use temporary buffer here as it is not static */ + if (rspamd_log->saved_message) { + file_log_function (log_domain, rspamd_log->saved_function, log_level, rspamd_log->saved_message, forced, arg); + } + file_log_function (log_domain, __FUNCTION__, log_level, tmpbuf, forced, arg); + file_log_function (log_domain, function, log_level, message, forced, arg); + rspamd_log->repeats = REPEATS_MIN + 1; + return; + } + } + else { + /* Reset counter if new message differs from saved message */ + rspamd_log->last_line_cksum = cksum; + if (rspamd_log->repeats > REPEATS_MIN) { + rspamd_snprintf (tmpbuf, sizeof (tmpbuf), "Last message repeated %ud times", rspamd_log->repeats); + rspamd_log->repeats = 0; + if (rspamd_log->saved_message) { + file_log_function (log_domain, rspamd_log->saved_function, log_level, rspamd_log->saved_message, forced, arg); + g_free (rspamd_log->saved_message); + g_free (rspamd_log->saved_function); + rspamd_log->saved_message = NULL; + rspamd_log->saved_function = NULL; + } + file_log_function (log_domain, __FUNCTION__, log_level, tmpbuf, forced, arg); + /* It is safe to use temporary buffer here as it is not static */ + file_log_function (log_domain, function, log_level, message, forced, arg); + return; + } + else { + rspamd_log->repeats = 0; + } + } + + if (rspamd_log->cfg->log_extended) { + if (! got_time) { + now = time (NULL); + } + + /* Format time */ + tms = localtime (&now); + + strftime (timebuf, sizeof (timebuf), "%F %H:%M:%S", tms); + cptype = g_quark_to_string (rspamd_log->process_type); + + if (rspamd_log->cfg->log_color) { + if (log_level >= G_LOG_LEVEL_INFO) { + /* White */ + r = rspamd_snprintf (tmpbuf, sizeof (tmpbuf), "\033[1;37m"); + } + else if (log_level >= G_LOG_LEVEL_WARNING) { + /* Magenta */ + r = rspamd_snprintf (tmpbuf, sizeof (tmpbuf), "\033[2;32m"); + } + else if (log_level >= G_LOG_LEVEL_CRITICAL) { + /* Red */ + r = rspamd_snprintf (tmpbuf, sizeof (tmpbuf), "\033[1;31m"); + } + } + else { + r = 0; + } + if (function == NULL) { + r += rspamd_snprintf (tmpbuf + r, sizeof (tmpbuf) - r, "%s #%P(%s) ", timebuf, rspamd_log->pid, cptype); + } + else { + r += rspamd_snprintf (tmpbuf + r, sizeof (tmpbuf) -r, "%s #%P(%s) %s: ", timebuf, rspamd_log->pid, cptype, function); + } + /* Construct IOV for log line */ + iov[0].iov_base = tmpbuf; + iov[0].iov_len = r; + iov[1].iov_base = (void *)message; + iov[1].iov_len = mlen; + iov[2].iov_base = (void *)&lf_chr; + iov[2].iov_len = 1; + if (rspamd_log->cfg->log_color) { + iov[3].iov_base = "\033[0m"; + iov[3].iov_len = sizeof ("\033[0m") - 1; + /* Call helper (for buffering) */ + file_log_helper (rspamd_log, iov, 4); + } + else { + /* Call helper (for buffering) */ + file_log_helper (rspamd_log, iov, 3); + } + } + else { + iov[0].iov_base = (void *)message; + iov[0].iov_len = mlen; + iov[1].iov_base = (void *)&lf_chr; + iov[1].iov_len = 1; + if (rspamd_log->cfg->log_color) { + iov[2].iov_base = "\033[0m"; + iov[2].iov_len = sizeof ("\033[0m") - 1; + /* Call helper (for buffering) */ + file_log_helper (rspamd_log, iov, 3); + } + else { + /* Call helper (for buffering) */ + file_log_helper (rspamd_log, iov, 2); + } + } + } +} + +/** + * Write log line depending on ip + */ +void +rspamd_conditional_debug (rspamd_logger_t *rspamd_log, + rspamd_inet_addr_t *addr, const gchar *function, const gchar *fmt, ...) +{ + static gchar logbuf[BUFSIZ]; + va_list vp; + u_char *end; + + if (rspamd_log->cfg->log_level >= G_LOG_LEVEL_DEBUG || rspamd_log->is_debug) { + if (rspamd_log->debug_ip && addr != NULL) { + if (addr->af == AF_INET && radix32tree_find (rspamd_log->debug_ip, + ntohl (addr->addr.s4.sin_addr.s_addr)) == RADIX_NO_VALUE) { + return; + } + } + g_mutex_lock (rspamd_log->mtx); + va_start (vp, fmt); + end = rspamd_vsnprintf (logbuf, sizeof (logbuf), fmt, vp); + *end = '\0'; + rspamd_escape_log_string (logbuf); + va_end (vp); + rspamd_log->log_func (NULL, function, G_LOG_LEVEL_DEBUG, logbuf, TRUE, rspamd_log); + g_mutex_unlock (rspamd_log->mtx); + } +} +/** + * Wrapper for glib logger + */ +void +rspamd_glib_log_function (const gchar *log_domain, GLogLevelFlags log_level, const gchar *message, gpointer arg) +{ + rspamd_logger_t *rspamd_log = arg; + + if (rspamd_log->enabled) { + g_mutex_lock (rspamd_log->mtx); + rspamd_log->log_func (log_domain, NULL, log_level, message, FALSE, rspamd_log); + g_mutex_unlock (rspamd_log->mtx); + } +} + +/** + * Temporary turn on debugging + */ +void +rspamd_log_debug (rspamd_logger_t *rspamd_log) +{ + rspamd_log->is_debug = TRUE; +} + +/** + * Turn off temporary debugging + */ +void +rspamd_log_nodebug (rspamd_logger_t *rspamd_log) +{ + rspamd_log->is_debug = FALSE; +} diff --git a/src/libutil/logger.h b/src/libutil/logger.h new file mode 100644 index 000000000..b0766b938 --- /dev/null +++ b/src/libutil/logger.h @@ -0,0 +1,117 @@ +#ifndef RSPAMD_LOGGER_H +#define RSPAMD_LOGGER_H + +#include "config.h" +#include "cfg_file.h" +#include "radix.h" +#include "util.h" + + +typedef void (*rspamd_log_func_t)(const gchar * log_domain, const gchar *function, + GLogLevelFlags log_level, const gchar * message, + gboolean forced, gpointer arg); + +typedef struct rspamd_logger_s rspamd_logger_t; +/** + * Init logger + */ +void rspamd_set_logger (struct config_file *cfg, GQuark ptype, struct rspamd_main *main); +/** + * Open log file or initialize other structures + */ +gint open_log (rspamd_logger_t *logger); +/** + * Close log file or destroy other structures + */ +void close_log (rspamd_logger_t *logger); +/** + * Close and open log again + */ +gint reopen_log (rspamd_logger_t *logger); + +/** + * Open log file or initialize other structures for privileged processes + */ +gint open_log_priv (rspamd_logger_t *logger, uid_t uid, gid_t gid); +/** + * Close log file or destroy other structures for privileged processes + */ +void close_log_priv (rspamd_logger_t *logger, uid_t uid, gid_t gid); +/** + * Close and open log again for privileged processes + */ +gint reopen_log_priv (rspamd_logger_t *logger, uid_t uid, gid_t gid); + +/** + * Set log pid + */ +void update_log_pid (GQuark ptype, rspamd_logger_t *logger); + +/** + * Flush log buffer for some types of logging + */ +void flush_log_buf (rspamd_logger_t *logger); +/** + * Log function that is compatible for glib messages + */ +void rspamd_glib_log_function (const gchar *log_domain, + GLogLevelFlags log_level, const gchar *message, gpointer arg); + +/** + * Function with variable number of arguments support + */ +void rspamd_common_log_function (rspamd_logger_t *logger, + GLogLevelFlags log_level, const gchar *function, const gchar *fmt, ...); + +void rspamd_common_logv (rspamd_logger_t *logger, + GLogLevelFlags log_level, const gchar *function, const gchar *fmt, va_list args); + +/** + * Conditional debug function + */ +void rspamd_conditional_debug (rspamd_logger_t *logger, + rspamd_inet_addr_t *addr, const gchar *function, const gchar *fmt, ...) ; + +/** + * Function with variable number of arguments support that uses static default logger + */ +void rspamd_default_log_function (GLogLevelFlags log_level, const gchar *function, + const gchar *fmt, ...); + +/** + * Varargs version of default log function + * @param log_level + * @param function + * @param fmt + * @param args + */ +void rspamd_default_logv (GLogLevelFlags log_level, const gchar *function, const gchar *fmt, va_list args); + +/** + * Temporary turn on debug + */ +void rspamd_log_debug (rspamd_logger_t *logger); + +/** + * Turn off debug + */ +void rspamd_log_nodebug (rspamd_logger_t *logger); + +/* Typical functions */ + +/* Logging in postfix style */ +#if defined(RSPAMD_MAIN) +#define msg_err(...) rspamd_common_log_function(rspamd_main->logger, G_LOG_LEVEL_CRITICAL, __FUNCTION__, __VA_ARGS__) +#define msg_warn(...) rspamd_common_log_function(rspamd_main->logger, G_LOG_LEVEL_WARNING, __FUNCTION__, __VA_ARGS__) +#define msg_info(...) rspamd_common_log_function(rspamd_main->logger, G_LOG_LEVEL_INFO, __FUNCTION__, __VA_ARGS__) +#define msg_debug(...) rspamd_conditional_debug(rspamd_main->logger, NULL, __FUNCTION__, __VA_ARGS__) +#define debug_task(...) rspamd_conditional_debug(rspamd_main->logger, &task->from_addr, __FUNCTION__, __VA_ARGS__) +#else +#define msg_err(...) rspamd_default_log_function(G_LOG_LEVEL_CRITICAL, __FUNCTION__, __VA_ARGS__) +#define msg_warn(...) rspamd_default_log_function(G_LOG_LEVEL_WARNING, __FUNCTION__, __VA_ARGS__) +#define msg_info(...) rspamd_default_log_function(G_LOG_LEVEL_INFO, __FUNCTION__, __VA_ARGS__) +#define msg_debug(...) rspamd_default_log_function(G_LOG_LEVEL_DEBUG, __FUNCTION__, __VA_ARGS__) +#define debug_task(...) rspamd_default_log_function(G_LOG_LEVEL_DEBUG, __FUNCTION__, __VA_ARGS__) +#endif + +#endif diff --git a/src/libutil/map.c b/src/libutil/map.c new file mode 100644 index 000000000..703622585 --- /dev/null +++ b/src/libutil/map.c @@ -0,0 +1,1148 @@ +/* + * Copyright (c) 2009-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Implementation of map files handling + */ +#include "config.h" +#include "map.h" +#include "http.h" +#include "main.h" +#include "util.h" +#include "mem_pool.h" + +static const gchar *hash_fill = "1"; + +/* Http reply */ +struct http_reply { + gint code; + GHashTable *headers; + gchar *cur_header; + gint parser_state; +}; + +struct http_callback_data { + struct event ev; + struct event_base *ev_base; + struct timeval tv; + struct rspamd_map *map; + struct http_map_data *data; + struct http_reply *reply; + struct map_cb_data cbdata; + + gint state; + gint fd; +}; + +/* Value in seconds after whitch we would try to do stat on list file */ + +/* HTTP timeouts */ +#define HTTP_CONNECT_TIMEOUT 2 +#define HTTP_READ_TIMEOUT 10 + +/** + * Helper for HTTP connection establishment + */ +static gint +connect_http (struct rspamd_map *map, struct http_map_data *data, gboolean is_async) +{ + gint sock; + + if ((sock = make_tcp_socket (data->addr, FALSE, is_async)) == -1) { + msg_info ("cannot connect to http server %s: %d, %s", data->host, errno, strerror (errno)); + return -1; + } + + return sock; +} + +/** + * Write HTTP request + */ +static void +write_http_request (struct rspamd_map *map, struct http_map_data *data, gint sock) +{ + gchar outbuf[BUFSIZ], datebuf[128]; + gint r; + struct tm *tm; + + tm = gmtime (&data->last_checked); + strftime (datebuf, sizeof (datebuf), "%a, %d %b %Y %H:%M:%S %Z", tm); + r = rspamd_snprintf (outbuf, sizeof (outbuf), "GET %s%s HTTP/1.1" CRLF "Connection: close" CRLF "Host: %s" CRLF, (*data->path == '/') ? "" : "/", data->path, data->host); + if (data->last_checked != 0) { + r += rspamd_snprintf (outbuf + r, sizeof (outbuf) - r, "If-Modified-Since: %s" CRLF, datebuf); + } + + r += rspamd_snprintf (outbuf + r, sizeof (outbuf) - r, CRLF); + + if (write (sock, outbuf, r) == -1) { + msg_err ("failed to write request: %d, %s", errno, strerror (errno)); + } +} + +/** + * FSM for parsing HTTP reply + */ +static gchar * +parse_http_reply (gchar * chunk, gint len, struct http_reply *reply) +{ + gchar *s, *p, *err_str, *tmp; + p = chunk; + s = chunk; + + while (p - chunk < len) { + switch (reply->parser_state) { + /* Search status code */ + case 0: + /* Search for status code */ + if (*p != ' ') { + p++; + } + else { + /* Try to parse HTTP reply code */ + reply->code = strtoul (++p, (gchar **)&err_str, 10); + if (*err_str != ' ') { + msg_info ("error while reading HTTP status code: %s", p); + return NULL; + } + /* Now skip to end of status string */ + reply->parser_state = 1; + continue; + } + break; + /* Skip to end of line */ + case 1: + if (*p == '\n') { + /* Switch to read header state */ + reply->parser_state = 2; + } + /* Each skipped symbol is proceeded */ + s = ++p; + break; + /* Read header value */ + case 2: + if (*p == ':') { + reply->cur_header = g_malloc (p - s + 1); + rspamd_strlcpy (reply->cur_header, s, p - s + 1); + reply->parser_state = 3; + } + else if (*p == '\r' && *(p + 1) == '\n') { + /* Last empty line */ + reply->parser_state = 5; + } + p++; + break; + /* Skip spaces after header name */ + case 3: + if (*p != ' ') { + s = p; + reply->parser_state = 4; + } + else { + p++; + } + break; + /* Read header value */ + case 4: + if (*p == '\r') { + if (reply->cur_header != NULL) { + tmp = g_malloc (p - s + 1); + rspamd_strlcpy (tmp, s, p - s + 1); + g_hash_table_insert (reply->headers, reply->cur_header, tmp); + reply->cur_header = NULL; + } + reply->parser_state = 1; + } + p++; + break; + case 5: + /* Set pointer to begining of HTTP body */ + p++; + s = p; + reply->parser_state = 6; + break; + case 6: + /* Headers parsed, just return */ + return p; + break; + } + } + + return s; +} + +/** + * Read and parse chunked header + */ +static gint +read_chunk_header (gchar * buf, gint len, struct http_map_data *data) +{ + gchar chunkbuf[32], *p, *c, *err_str; + gint skip = 0; + + p = chunkbuf; + c = buf; + /* Find hex digits */ + while (g_ascii_isxdigit (*c) && p - chunkbuf < (gint)(sizeof (chunkbuf) - 1) && skip < len) { + *p++ = *c++; + skip++; + } + *p = '\0'; + data->chunk = strtoul (chunkbuf, &err_str, 16); + if (*err_str != '\0') { + return -1; + } + + /* Now skip to CRLF */ + while (*c != '\n' && skip < len) { + c++; + skip++; + } + if (*c == '\n' && skip < len) { + skip++; + c++; + } + data->chunk_remain = data->chunk; + + return skip; +} + +/** + * Helper callback for reading chunked reply + */ +static gboolean +read_http_chunked (gchar * buf, size_t len, struct rspamd_map *map, struct http_map_data *data, struct map_cb_data *cbdata) +{ + gchar *p = buf, *remain; + gint skip = 0; + + if (data->chunked == 1) { + /* Read first chunk data */ + if ((skip = read_chunk_header (buf, len, data)) != -1) { + p += skip; + len -= skip; + data->chunked = 2; + } + else { + msg_info ("invalid chunked reply: %*s", (gint)len, buf); + return FALSE; + } + } + + if (data->chunk_remain == 0) { + /* Read another chunk */ + if ((skip = read_chunk_header (buf, len, data)) != -1) { + p += skip; + len -= skip; + } + else { + msg_info ("invalid chunked reply: %*s", (gint)len, buf); + return FALSE; + } + if (data->chunk == 0) { + return FALSE; + } + } + + if (data->chunk_remain <= len ) { + /* Call callback and move remaining buffer */ + remain = map->read_callback (map->pool, p, data->chunk_remain, cbdata); + if (remain != NULL && remain != p + data->chunk_remain) { + /* Copy remaining buffer to start of buffer */ + data->rlen = len - (remain - p); + memmove (buf, remain, data->rlen); + data->chunk_remain -= data->rlen; + } + else { + /* Copy other part */ + data->rlen = len - data->chunk_remain; + if (data->rlen > 0) { + memmove (buf, p + data->chunk_remain, data->rlen); + } + data->chunk_remain = 0; + } + + } + else { + /* Just read another portion of chunk */ + data->chunk_remain -= len; + remain = map->read_callback (map->pool, p, len, cbdata); + if (remain != NULL && remain != p + len) { + /* copy remaining buffer to start of buffer */ + data->rlen = len - (remain - p); + memmove (buf, remain, data->rlen); + } + } + + return TRUE; +} + +/** + * Callback for reading HTTP reply + */ +static gboolean +read_http_common (struct rspamd_map *map, struct http_map_data *data, struct http_reply *reply, struct map_cb_data *cbdata, gint fd) +{ + gchar *remain, *pos; + ssize_t r; + gchar *te, *date; + + if ((r = read (fd, data->read_buf + data->rlen, sizeof (data->read_buf) - data->rlen)) > 0) { + r += data->rlen; + data->rlen = 0; + remain = parse_http_reply (data->read_buf, r, reply); + if (remain != NULL && remain != data->read_buf) { + /* copy remaining data->read_buffer to start of data->read_buffer */ + data->rlen = r - (remain - data->read_buf); + memmove (data->read_buf, remain, data->rlen); + r = data->rlen; + data->rlen = 0; + } + if (r <= 0) { + return TRUE; + } + if (reply->parser_state == 6) { + /* If reply header is parsed successfully, try to read further data */ + if (reply->code != 200 && reply->code != 304) { + msg_err ("got error reply from server %s, %d", data->host, reply->code); + return FALSE; + } + else if (reply->code == 304) { + /* Do not read anything */ + return FALSE; + } + pos = data->read_buf; + /* Check for chunked */ + if (data->chunked == 0) { + if ((te = g_hash_table_lookup (reply->headers, "Transfer-Encoding")) != NULL) { + if (g_ascii_strcasecmp (te, "chunked") == 0) { + data->chunked = 1; + } + else { + data->chunked = -1; + } + } + else { + data->chunked = -1; + } + } + /* Check for date */ + date = g_hash_table_lookup (reply->headers, "Date"); + if (date != NULL) { + data->last_checked = rspamd_http_parse_date (date, -1); + } + else { + data->last_checked = (time_t)-1; + } + + if (data->chunked > 0) { + return read_http_chunked (data->read_buf, r, map, data, cbdata); + } + /* Read more data */ + remain = map->read_callback (map->pool, pos, r, cbdata); + if (remain != NULL && remain != pos + r) { + /* copy remaining data->read_buffer to start of data->read_buffer */ + data->rlen = r - (remain - pos); + memmove (pos, remain, data->rlen); + } + } + } + else { + return FALSE; + } + + return TRUE; +} + +/** + * Sync read of HTTP reply + */ +static void +read_http_sync (struct rspamd_map *map, struct http_map_data *data) +{ + struct map_cb_data cbdata; + gint fd; + struct http_reply *repl; + + if (map->read_callback == NULL || map->fin_callback == NULL) { + msg_err ("bad callback for reading map file"); + return; + } + + /* Connect synced */ + if ((fd = connect_http (map, data, FALSE)) == -1) { + return; + } + write_http_request (map, data, fd); + + cbdata.state = 0; + cbdata.map = map; + cbdata.prev_data = *map->user_data; + cbdata.cur_data = NULL; + + repl = g_malloc (sizeof (struct http_reply)); + repl->parser_state = 0; + repl->code = 404; + repl->headers = g_hash_table_new_full (rspamd_strcase_hash, rspamd_strcase_equal, g_free, g_free); + + while (read_http_common (map, data, repl, &cbdata, fd)); + + close (fd); + + map->fin_callback (map->pool, &cbdata); + *map->user_data = cbdata.cur_data; + if (data->last_checked == (time_t)-1) { + data->last_checked = time (NULL); + } + + g_hash_table_destroy (repl->headers); + g_free (repl); +} + +/** + * Callback for reading data from file + */ +static void +read_map_file (struct rspamd_map *map, struct file_map_data *data) +{ + struct map_cb_data cbdata; + gchar buf[BUFSIZ], *remain; + ssize_t r; + gint fd, rlen; + + if (map->read_callback == NULL || map->fin_callback == NULL) { + msg_err ("bad callback for reading map file"); + return; + } + + if ((fd = open (data->filename, O_RDONLY)) == -1) { + msg_warn ("cannot open file '%s': %s", data->filename, strerror (errno)); + return; + } + + cbdata.state = 0; + cbdata.prev_data = *map->user_data; + cbdata.cur_data = NULL; + cbdata.map = map; + + rlen = 0; + while ((r = read (fd, buf + rlen, sizeof (buf) - rlen - 1)) > 0) { + r += rlen; + buf[r] = '\0'; + remain = map->read_callback (map->pool, buf, r, &cbdata); + if (remain != NULL) { + /* copy remaining buffer to start of buffer */ + rlen = r - (remain - buf); + memmove (buf, remain, rlen); + } + } + + close (fd); + + map->fin_callback (map->pool, &cbdata); + *map->user_data = cbdata.cur_data; +} + +/** + * FSM for parsing lists + */ +gchar * +abstract_parse_kv_list (rspamd_mempool_t * pool, gchar * chunk, gint len, struct map_cb_data *data, insert_func func) +{ + gchar *c, *p, *key = NULL, *value = NULL; + + p = chunk; + c = p; + + while (p - chunk < len) { + switch (data->state) { + case 0: + /* read key */ + /* Check here comments, eol and end of buffer */ + if (*p == '#') { + if (key != NULL && p - c >= 0) { + value = rspamd_mempool_alloc (pool, p - c + 1); + memcpy (value, c, p - c); + value[p - c] = '\0'; + value = g_strstrip (value); + func (data->cur_data, key, value); + msg_debug ("insert kv pair: %s -> %s", key, value); + } + data->state = 99; + } + else if (*p == '\r' || *p == '\n' || p - chunk == len - 1) { + if (key != NULL && p - c >= 0) { + value = rspamd_mempool_alloc (pool, p - c + 1); + memcpy (value, c, p - c); + value[p - c] = '\0'; + + value = g_strstrip (value); + func (data->cur_data, key, value); + msg_debug ("insert kv pair: %s -> %s", key, value); + } + else if (key == NULL && p - c > 0) { + /* Key only line */ + key = rspamd_mempool_alloc (pool, p - c + 1); + memcpy (key, c, p - c); + key[p - c] = '\0'; + value = rspamd_mempool_alloc (pool, 1); + *value = '\0'; + func (data->cur_data, key, value); + msg_debug ("insert kv pair: %s -> %s", key, value); + } + data->state = 100; + key = NULL; + } + else if (g_ascii_isspace (*p)) { + if (p - c > 0) { + key = rspamd_mempool_alloc (pool, p - c + 1); + memcpy (key, c, p - c); + key[p - c] = '\0'; + data->state = 2; + } + else { + key = NULL; + } + } + else { + p ++; + } + break; + case 2: + /* Skip spaces before value */ + if (!g_ascii_isspace (*p)) { + c = p; + data->state = 0; + } + else { + p ++; + } + break; + case 99: + /* SKIP_COMMENT */ + /* Skip comment till end of line */ + if (*p == '\r' || *p == '\n') { + while ((*p == '\r' || *p == '\n') && p - chunk < len) { + p++; + } + c = p; + key = NULL; + data->state = 0; + } + else { + p++; + } + break; + case 100: + /* Skip \r\n and whitespaces */ + if (*p == '\r' || *p == '\n' || g_ascii_isspace (*p)) { + p ++; + } + else { + c = p; + key = NULL; + data->state = 0; + } + break; + } + } + + return c; +} + +gchar * +abstract_parse_list (rspamd_mempool_t * pool, gchar * chunk, gint len, struct map_cb_data *data, insert_func func) +{ + gchar *s, *p, *str, *start; + + p = chunk; + start = p; + + str = g_malloc (len + 1); + s = str; + + while (p - chunk < len) { + switch (data->state) { + /* READ_SYMBOL */ + case 0: + if (*p == '#') { + /* Got comment */ + if (s != str) { + /* Save previous string in lines like: "127.0.0.1 #localhost" */ + *s = '\0'; + s = rspamd_mempool_strdup (pool, g_strstrip (str)); + if (strlen (s) > 0) { + func (data->cur_data, s, hash_fill); + } + s = str; + start = p; + } + data->state = 1; + } + else if (*p == '\r' || *p == '\n') { + /* Got EOL marker, save stored string */ + if (s != str) { + *s = '\0'; + s = rspamd_mempool_strdup (pool, g_strstrip (str)); + if (strlen (s) > 0) { + func (data->cur_data, s, hash_fill); + } + s = str; + } + /* Skip EOL symbols */ + while ((*p == '\r' || *p == '\n') && p - chunk < len) { + p++; + } + start = p; + } + else { + /* Store new string in s */ + *s = *p; + s++; + p++; + } + break; + /* SKIP_COMMENT */ + case 1: + /* Skip comment till end of line */ + if (*p == '\r' || *p == '\n') { + while ((*p == '\r' || *p == '\n') && p - chunk < len) { + p++; + } + s = str; + start = p; + data->state = 0; + } + else { + p++; + } + break; + } + } + + g_free (str); + + return start; +} + +/** + * Radix tree helper function + */ +static void +radix_tree_insert_helper (gpointer st, gconstpointer key, gpointer value) +{ + radix_tree_t *tree = st; + + guint32 mask = 0xFFFFFFFF; + guint32 ip; + gchar *token, *ipnet, *err_str, **strv, **cur; + struct in_addr ina; + gint k; + + /* Split string if there are multiple items inside a single string */ + strv = g_strsplit_set ((gchar *)key, " ,;", 0); + cur = strv; + while (*cur) { + if (**cur == '\0') { + cur++; + continue; + } + /* Extract ipnet */ + ipnet = *cur; + token = strsep (&ipnet, "/"); + + if (ipnet != NULL) { + errno = 0; + /* Get mask */ + k = strtoul (ipnet, &err_str, 10); + if (errno != 0) { + msg_warn ("invalid netmask, error detected on symbol: %s, erorr: %s", err_str, strerror (errno)); + k = 32; + } + else if (k > 32 || k < 0) { + msg_warn ("invalid netmask value: %d", k); + k = 32; + } + /* Calculate mask based on CIDR presentation */ + mask = mask << (32 - k); + } + + /* Check IP */ + if (inet_aton (token, &ina) == 0) { + msg_err ("invalid ip address: %s", token); + return; + } + + /* Insert ip in a tree */ + ip = ntohl ((guint32) ina.s_addr); + k = radix32tree_insert (tree, ip, mask, 1); + if (k == -1) { + msg_warn ("cannot insert ip to tree: %s, mask %X", inet_ntoa (ina), mask); + } + else if (k == 1) { + msg_warn ("ip %s, mask %X, value already exists", inet_ntoa (ina), mask); + } + cur++; + } + + g_strfreev (strv); +} + +/* Helpers */ +gchar * +read_host_list (rspamd_mempool_t * pool, gchar * chunk, gint len, struct map_cb_data *data) +{ + if (data->cur_data == NULL) { + data->cur_data = g_hash_table_new (rspamd_strcase_hash, rspamd_strcase_equal); + } + return abstract_parse_list (pool, chunk, len, data, (insert_func) g_hash_table_insert); +} + +void +fin_host_list (rspamd_mempool_t * pool, struct map_cb_data *data) +{ + if (data->prev_data) { + g_hash_table_destroy (data->prev_data); + } +} + +gchar * +read_kv_list (rspamd_mempool_t * pool, gchar * chunk, gint len, struct map_cb_data *data) +{ + if (data->cur_data == NULL) { + data->cur_data = g_hash_table_new (rspamd_strcase_hash, rspamd_strcase_equal); + } + return abstract_parse_kv_list (pool, chunk, len, data, (insert_func) g_hash_table_insert); +} + +void +fin_kv_list (rspamd_mempool_t * pool, struct map_cb_data *data) +{ + if (data->prev_data) { + g_hash_table_destroy (data->prev_data); + } +} + +gchar * +read_radix_list (rspamd_mempool_t * pool, gchar * chunk, gint len, struct map_cb_data *data) +{ + if (data->cur_data == NULL) { + data->cur_data = radix_tree_create (); + } + return abstract_parse_list (pool, chunk, len, data, (insert_func) radix_tree_insert_helper); +} + +void +fin_radix_list (rspamd_mempool_t * pool, struct map_cb_data *data) +{ + if (data->prev_data) { + radix_tree_free (data->prev_data); + } +} + +/** + * Common file callback + */ +static void +file_callback (gint fd, short what, void *ud) +{ + struct rspamd_map *map = ud; + struct file_map_data *data = map->map_data; + struct stat st; + gdouble jittered_sec; + + /* Plan event again with jitter */ + evtimer_del (&map->ev); + jittered_sec = (map->cfg->map_timeout + g_random_double () * map->cfg->map_timeout); + double_to_tv (jittered_sec, &map->tv); + + evtimer_add (&map->ev, &map->tv); + + if (g_atomic_int_get (map->locked)) { + msg_info ("don't try to reread map as it is locked by other process, will reread it later"); + return; + } + + if (stat (data->filename, &st) != -1 && (st.st_mtime > data->st.st_mtime || data->st.st_mtime == -1)) { + /* File was modified since last check */ + memcpy (&data->st, &st, sizeof (struct stat)); + } + else { + return; + } + + msg_info ("rereading map file %s", data->filename); + read_map_file (map, data); +} + +/** + * Callback for destroying HTTP callback data + */ +static void +free_http_cbdata (struct http_callback_data *cbd) +{ + if (cbd->reply) { + g_hash_table_destroy (cbd->reply->headers); + g_free (cbd->reply); + } + g_atomic_int_set (cbd->map->locked, 0); + event_del (&cbd->ev); + close (cbd->fd); + g_free (cbd); +} + +/** + * Async HTTP request parser + */ +static void +http_async_callback (gint fd, short what, void *ud) +{ + struct http_callback_data *cbd = ud; + + /* Begin of connection */ + if (what == EV_WRITE) { + if (cbd->state == 0) { + /* Can write request */ + write_http_request (cbd->map, cbd->data, fd); + /* Plan reading */ + event_set (&cbd->ev, cbd->fd, EV_READ | EV_PERSIST, http_async_callback, cbd); + event_base_set (cbd->ev_base, &cbd->ev); + cbd->tv.tv_sec = HTTP_READ_TIMEOUT; + cbd->tv.tv_usec = 0; + cbd->state = 1; + /* Allocate reply structure */ + cbd->reply = g_malloc (sizeof (struct http_reply)); + cbd->reply->parser_state = 0; + cbd->reply->code = 404; + cbd->reply->headers = g_hash_table_new_full (rspamd_strcase_hash, rspamd_strcase_equal, g_free, g_free); + cbd->cbdata.state = 0; + cbd->cbdata.prev_data = *cbd->map->user_data; + cbd->cbdata.cur_data = NULL; + cbd->cbdata.map = cbd->map; + cbd->data->rlen = 0; + cbd->data->chunk = 0; + cbd->data->chunk_remain = 0; + cbd->data->chunked = FALSE; + cbd->data->read_buf[0] = '\0'; + + event_add (&cbd->ev, &cbd->tv); + } + else { + msg_err ("bad state when got write readiness"); + free_http_cbdata (cbd); + return; + } + } + /* Got reply, parse it */ + else if (what == EV_READ) { + if (cbd->state >= 1) { + if (!read_http_common (cbd->map, cbd->data, cbd->reply, &cbd->cbdata, cbd->fd)) { + /* Handle Not-Modified in a special way */ + if (cbd->reply->code == 304) { + if (cbd->data->last_checked == (time_t)-1) { + cbd->data->last_checked = time (NULL); + } + msg_info ("data is not modified for server %s", cbd->data->host); + } + else if (cbd->cbdata.cur_data != NULL) { + /* Destroy old data and start reading request data */ + cbd->map->fin_callback (cbd->map->pool, &cbd->cbdata); + *cbd->map->user_data = cbd->cbdata.cur_data; + if (cbd->data->last_checked == (time_t)-1) { + cbd->data->last_checked = time (NULL); + } + } + if (cbd->state == 1 && cbd->reply->code == 200) { + /* Write to log that data is modified */ + msg_info ("rereading map data from %s", cbd->data->host); + } + + free_http_cbdata (cbd); + return; + } + else if (cbd->state == 1) { + /* Write to log that data is modified */ + msg_info ("rereading map data from %s", cbd->data->host); + } + cbd->state = 2; + } + } + else { + msg_err ("connection with http server terminated incorrectly"); + free_http_cbdata (cbd); + } +} + +/** + * Async HTTP callback + */ +static void +http_callback (gint fd, short what, void *ud) +{ + struct rspamd_map *map = ud; + struct http_map_data *data = map->map_data; + gint sock; + struct http_callback_data *cbd; + gdouble jittered_sec; + + /* Plan event again with jitter */ + evtimer_del (&map->ev); + jittered_sec = (map->cfg->map_timeout + g_random_double () * map->cfg->map_timeout); + double_to_tv (jittered_sec, &map->tv); + evtimer_add (&map->ev, &map->tv); + + if (g_atomic_int_get (map->locked)) { + msg_info ("don't try to reread map as it is locked by other process, will reread it later"); + return; + } + + g_atomic_int_inc (map->locked); + + /* Connect asynced */ + if ((sock = connect_http (map, data, TRUE)) == -1) { + g_atomic_int_set (map->locked, 0); + return; + } + else { + /* Plan event */ + cbd = g_malloc (sizeof (struct http_callback_data)); + cbd->ev_base = map->ev_base; + event_set (&cbd->ev, sock, EV_WRITE, http_async_callback, cbd); + event_base_set (cbd->ev_base, &cbd->ev); + cbd->tv.tv_sec = HTTP_CONNECT_TIMEOUT; + cbd->tv.tv_usec = 0; + cbd->map = map; + cbd->data = data; + cbd->state = 0; + cbd->fd = sock; + cbd->reply = NULL; + event_add (&cbd->ev, &cbd->tv); + } +} + +/* Start watching event for all maps */ +void +start_map_watch (struct config_file *cfg, struct event_base *ev_base) +{ + GList *cur = cfg->maps; + struct rspamd_map *map; + struct file_map_data *fdata; + gdouble jittered_sec; + + /* First of all do synced read of data */ + while (cur) { + map = cur->data; + map->ev_base = ev_base; + if (map->protocol == MAP_PROTO_FILE) { + evtimer_set (&map->ev, file_callback, map); + event_base_set (map->ev_base, &map->ev); + /* Read initial data */ + fdata = map->map_data; + if (fdata->st.st_mtime != -1) { + /* Do not try to read non-existent file */ + read_map_file (map, map->map_data); + } + /* Plan event with jitter */ + jittered_sec = (map->cfg->map_timeout + g_random_double () * map->cfg->map_timeout) / 2.; + double_to_tv (jittered_sec, &map->tv); + evtimer_add (&map->ev, &map->tv); + } + else if (map->protocol == MAP_PROTO_HTTP) { + evtimer_set (&map->ev, http_callback, map); + event_base_set (map->ev_base, &map->ev); + /* Read initial data */ + read_http_sync (map, map->map_data); + /* Plan event with jitter */ + jittered_sec = (map->cfg->map_timeout + g_random_double () * map->cfg->map_timeout); + double_to_tv (jittered_sec, &map->tv); + evtimer_add (&map->ev, &map->tv); + } + cur = g_list_next (cur); + } +} + +void +remove_all_maps (struct config_file *cfg) +{ + g_list_free (cfg->maps); + cfg->maps = NULL; + if (cfg->map_pool != NULL) { + rspamd_mempool_delete (cfg->map_pool); + cfg->map_pool = NULL; + } +} + +gboolean +check_map_proto (const gchar *map_line, gint *res, const gchar **pos) +{ + if (g_ascii_strncasecmp (map_line, "http://", sizeof ("http://") - 1) == 0) { + if (res && pos) { + *res = MAP_PROTO_HTTP; + *pos = map_line + sizeof ("http://") - 1; + } + } + else if (g_ascii_strncasecmp (map_line, "file://", sizeof ("file://") - 1) == 0) { + if (res && pos) { + *res = MAP_PROTO_FILE; + *pos = map_line + sizeof ("file://") - 1; + } + } + else if (*map_line == '/') { + /* Trivial file case */ + *res = MAP_PROTO_FILE; + *pos = map_line; + } + else { + msg_debug ("invalid map fetching protocol: %s", map_line); + return FALSE; + } + + return TRUE; +} + +gboolean +add_map (struct config_file *cfg, const gchar *map_line, const gchar *description, + map_cb_t read_callback, map_fin_cb_t fin_callback, void **user_data) +{ + struct rspamd_map *new_map; + enum fetch_proto proto; + const gchar *def, *p, *hostend; + struct file_map_data *fdata; + struct http_map_data *hdata; + gchar portbuf[6]; + gint i, s, r; + struct addrinfo hints, *res; + + /* First of all detect protocol line */ + if (!check_map_proto (map_line, (int *)&proto, &def)) { + return FALSE; + } + /* Constant pool */ + if (cfg->map_pool == NULL) { + cfg->map_pool = rspamd_mempool_new (rspamd_mempool_suggest_size ()); + } + new_map = rspamd_mempool_alloc0 (cfg->map_pool, sizeof (struct rspamd_map)); + new_map->read_callback = read_callback; + new_map->fin_callback = fin_callback; + new_map->user_data = user_data; + new_map->protocol = proto; + new_map->cfg = cfg; + new_map->id = g_random_int (); + new_map->locked = rspamd_mempool_alloc0_shared (cfg->cfg_pool, sizeof (gint)); + + if (proto == MAP_PROTO_FILE) { + new_map->uri = rspamd_mempool_strdup (cfg->cfg_pool, def); + def = new_map->uri; + } + else { + new_map->uri = rspamd_mempool_strdup (cfg->cfg_pool, map_line); + } + if (description != NULL) { + new_map->description = rspamd_mempool_strdup (cfg->cfg_pool, description); + } + + /* Now check for each proto separately */ + if (proto == MAP_PROTO_FILE) { + fdata = rspamd_mempool_alloc0 (cfg->map_pool, sizeof (struct file_map_data)); + if (access (def, R_OK) == -1) { + if (errno != ENOENT) { + msg_err ("cannot open file '%s': %s", def, strerror (errno)); + return FALSE; + + } + msg_info ("map '%s' is not found, but it can be loaded automatically later", def); + /* We still can add this file */ + fdata->st.st_mtime = -1; + } + else { + stat (def, &fdata->st); + } + fdata->filename = rspamd_mempool_strdup (cfg->map_pool, def); + new_map->map_data = fdata; + } + else if (proto == MAP_PROTO_HTTP) { + hdata = rspamd_mempool_alloc0 (cfg->map_pool, sizeof (struct http_map_data)); + /* Try to search port */ + if ((p = strchr (def, ':')) != NULL) { + hostend = p; + i = 0; + p++; + while (g_ascii_isdigit (*p) && i < (gint)sizeof (portbuf) - 1) { + portbuf[i++] = *p++; + } + if (*p != '/') { + msg_info ("bad http map definition: %s", def); + return FALSE; + } + portbuf[i] = '\0'; + hdata->port = atoi (portbuf); + } + else { + /* Default http port */ + rspamd_snprintf (portbuf, sizeof (portbuf), "80"); + hdata->port = 80; + /* Now separate host from path */ + if ((p = strchr (def, '/')) == NULL) { + msg_info ("bad http map definition: %s", def); + return FALSE; + } + hostend = p; + } + hdata->host = rspamd_mempool_alloc (cfg->map_pool, hostend - def + 1); + rspamd_strlcpy (hdata->host, def, hostend - def + 1); + hdata->path = rspamd_mempool_strdup (cfg->map_pool, p); + hdata->rlen = 0; + /* Now try to resolve */ + memset (&hints, 0, sizeof (hints)); + hints.ai_family = AF_UNSPEC; /* Allow IPv4 or IPv6 */ + hints.ai_socktype = SOCK_STREAM; /* Stream socket */ + hints.ai_flags = 0; + hints.ai_protocol = 0; /* Any protocol */ + hints.ai_canonname = NULL; + hints.ai_addr = NULL; + hints.ai_next = NULL; + + if ((r = getaddrinfo (hdata->host, portbuf, &hints, &res)) == 0) { + hdata->addr = res; + rspamd_mempool_add_destructor (cfg->cfg_pool, (rspamd_mempool_destruct_t)freeaddrinfo, hdata->addr); + } + else { + msg_err ("address resolution for %s failed: %s", hdata->host, gai_strerror (r)); + return FALSE; + } + /* Now try to connect */ + if ((s = make_tcp_socket (hdata->addr, FALSE, FALSE)) == -1) { + msg_info ("cannot connect to http server %s: %d, %s", hdata->host, errno, strerror (errno)); + return FALSE; + } + close (s); + new_map->map_data = hdata; + } + /* Temp pool */ + new_map->pool = rspamd_mempool_new (rspamd_mempool_suggest_size ()); + + cfg->maps = g_list_prepend (cfg->maps, new_map); + + return TRUE; +} diff --git a/src/libutil/map.h b/src/libutil/map.h new file mode 100644 index 000000000..1f34cdcc0 --- /dev/null +++ b/src/libutil/map.h @@ -0,0 +1,134 @@ +#ifndef RSPAMD_MAP_H +#define RSPAMD_MAP_H + +#include "config.h" +#include "mem_pool.h" +#include "radix.h" + +/** + * Maps API is designed to load lists data from different dynamic sources. + * It monitor files and HTTP locations for modifications and reload them if they are + * modified. + */ + +enum fetch_proto { + MAP_PROTO_FILE, + MAP_PROTO_HTTP, +}; + +/** + * Data specific to file maps + */ +struct file_map_data { + const gchar *filename; + struct stat st; +}; + +/** + * Data specific to HTTP maps + */ +struct http_map_data { + struct addrinfo *addr; + guint16 port; + gchar *path; + gchar *host; + time_t last_checked; + gshort chunked; + gchar read_buf[BUFSIZ]; + guint32 rlen; + guint32 chunk; + guint32 chunk_remain; +}; + +struct map_cb_data; + +/** + * Callback types + */ +typedef gchar* (*map_cb_t)(rspamd_mempool_t *pool, gchar *chunk, gint len, struct map_cb_data *data); +typedef void (*map_fin_cb_t)(rspamd_mempool_t *pool, struct map_cb_data *data); + +/** + * Common map object + */ +struct config_file; +struct rspamd_map { + rspamd_mempool_t *pool; + struct config_file *cfg; + enum fetch_proto protocol; + map_cb_t read_callback; + map_fin_cb_t fin_callback; + void **user_data; + struct event ev; + struct timeval tv; + struct event_base *ev_base; + void *map_data; + gchar *uri; + gchar *description; + guint32 id; + guint32 checksum; + /* Shared lock for temporary disabling of map reading (e.g. when this map is written by UI) */ + gint *locked; +}; + +/** + * Callback data for async load + */ +struct map_cb_data { + struct rspamd_map *map; + gint state; + void *prev_data; + void *cur_data; +}; + + +/** + * Check map protocol + */ +gboolean check_map_proto (const gchar *map_line, gint *res, const gchar **pos); +/** + * Add map from line + */ +gboolean add_map (struct config_file *cfg, const gchar *map_line, const gchar *description, + map_cb_t read_callback, map_fin_cb_t fin_callback, void **user_data); + +/** + * Start watching of maps by adding events to libevent event loop + */ +void start_map_watch (struct config_file *cfg, struct event_base *ev_base); + +/** + * Remove all maps watched (remove events) + */ +void remove_all_maps (struct config_file *cfg); + +typedef void (*insert_func) (gpointer st, gconstpointer key, gconstpointer value); + +/** + * Common callbacks for frequent types of lists + */ + +/** + * Radix list is a list like ip/mask + */ +gchar* read_radix_list (rspamd_mempool_t *pool, gchar *chunk, gint len, struct map_cb_data *data); +void fin_radix_list (rspamd_mempool_t *pool, struct map_cb_data *data); + +/** + * Host list is an ordinal list of hosts or domains + */ +gchar* read_host_list (rspamd_mempool_t *pool, gchar *chunk, gint len, struct map_cb_data *data); +void fin_host_list (rspamd_mempool_t *pool, struct map_cb_data *data); + +/** + * Kv list is an ordinal list of keys and values separated by whitespace + */ +gchar* read_kv_list (rspamd_mempool_t *pool, gchar *chunk, gint len, struct map_cb_data *data); +void fin_kv_list (rspamd_mempool_t *pool, struct map_cb_data *data); + +/** + * FSM for lists parsing (support comments, blank lines and partial replies) + */ +gchar * abstract_parse_list (rspamd_mempool_t * pool, gchar * chunk, gint len, struct map_cb_data *data, insert_func func); + +#endif diff --git a/src/libutil/mem_pool.c b/src/libutil/mem_pool.c new file mode 100644 index 000000000..8f1105add --- /dev/null +++ b/src/libutil/mem_pool.c @@ -0,0 +1,776 @@ +/* + * Copyright (c) 2009-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "mem_pool.h" +#include "fstring.h" +#include "logger.h" +#include "util.h" +#include "main.h" + +/* Sleep time for spin lock in nanoseconds */ +#define MUTEX_SLEEP_TIME 10000000L +#define MUTEX_SPIN_COUNT 100 + +#ifdef _THREAD_SAFE +pthread_mutex_t stat_mtx = PTHREAD_MUTEX_INITIALIZER; +# define STAT_LOCK() do { pthread_mutex_lock (&stat_mtx); } while (0) +# define STAT_UNLOCK() do { pthread_mutex_unlock (&stat_mtx); } while (0) +#else +# define STAT_LOCK() do {} while (0) +# define STAT_UNLOCK() do {} while (0) +#endif + +#define POOL_MTX_LOCK() do { rspamd_mutex_lock (pool->mtx); } while (0) +#define POOL_MTX_UNLOCK() do { rspamd_mutex_unlock (pool->mtx); } while (0) + +/* + * This define specify whether we should check all pools for free space for new object + * or just begin scan from current (recently attached) pool + * If MEMORY_GREEDY is defined, then we scan all pools to find free space (more CPU usage, slower + * but requires less memory). If it is not defined check only current pool and if object is too large + * to place in it allocate new one (this may cause huge CPU usage in some cases too, but generally faster than + * greedy method) + */ +#undef MEMORY_GREEDY + +/* Internal statistic */ +static rspamd_mempool_stat_t *mem_pool_stat = NULL; + +/** + * Function that return free space in pool page + * @param x pool page struct + */ +static gint +pool_chain_free (struct _pool_chain *chain) +{ + return (gint)chain->len - (chain->pos - chain->begin + MEM_ALIGNMENT); +} + +static struct _pool_chain * +pool_chain_new (gsize size) +{ + struct _pool_chain *chain; + + g_return_val_if_fail (size > 0, NULL); + + chain = g_slice_alloc (sizeof (struct _pool_chain)); + + if (chain == NULL) { + msg_err ("cannot allocate %z bytes, aborting", sizeof (struct _pool_chain)); + abort (); + } + + chain->begin = g_slice_alloc (size); + if (chain->begin == NULL) { + msg_err ("cannot allocate %z bytes, aborting", size); + abort (); + } + + chain->pos = align_ptr (chain->begin, MEM_ALIGNMENT); + chain->len = size; + chain->next = NULL; + STAT_LOCK (); + mem_pool_stat->bytes_allocated += size; + mem_pool_stat->chunks_allocated++; + STAT_UNLOCK (); + + return chain; +} + +static struct _pool_chain_shared * +pool_chain_new_shared (gsize size) +{ + struct _pool_chain_shared *chain; + gpointer map; + + +#if defined(HAVE_MMAP_ANON) + map = mmap (NULL, size + sizeof (struct _pool_chain_shared), PROT_READ | PROT_WRITE, MAP_ANON | MAP_SHARED, -1, 0); + if (map == MAP_FAILED) { + msg_err ("cannot allocate %z bytes, aborting", size + sizeof (struct _pool_chain)); + abort (); + } + chain = (struct _pool_chain_shared *)map; + chain->begin = ((guint8 *) chain) + sizeof (struct _pool_chain_shared); +#elif defined(HAVE_MMAP_ZERO) + gint fd; + + fd = open ("/dev/zero", O_RDWR); + if (fd == -1) { + return NULL; + } + map = mmap (NULL, size + sizeof (struct _pool_chain_shared), PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if (map == MAP_FAILED) { + msg_err ("cannot allocate %z bytes, aborting", size + sizeof (struct _pool_chain)); + abort (); + } + chain = (struct _pool_chain_shared *)map; + chain->begin = ((guint8 *) chain) + sizeof (struct _pool_chain_shared); +#else +# error No mmap methods are defined +#endif + chain->pos = align_ptr (chain->begin, MEM_ALIGNMENT); + chain->len = size; + chain->lock = NULL; + chain->next = NULL; + STAT_LOCK (); + mem_pool_stat->shared_chunks_allocated++; + mem_pool_stat->bytes_allocated += size; + STAT_UNLOCK (); + + return chain; +} + + +/** + * Allocate new memory poll + * @param size size of pool's page + * @return new memory pool object + */ +rspamd_mempool_t * +rspamd_mempool_new (gsize size) +{ + rspamd_mempool_t *new; + gpointer map; + + g_return_val_if_fail (size > 0, NULL); + /* Allocate statistic structure if it is not allocated before */ + if (mem_pool_stat == NULL) { +#if defined(HAVE_MMAP_ANON) + map = mmap (NULL, sizeof (rspamd_mempool_stat_t), PROT_READ | PROT_WRITE, MAP_ANON | MAP_SHARED, -1, 0); + if (map == MAP_FAILED) { + msg_err ("cannot allocate %z bytes, aborting", sizeof (rspamd_mempool_stat_t)); + abort (); + } + mem_pool_stat = (rspamd_mempool_stat_t *)map; +#elif defined(HAVE_MMAP_ZERO) + gint fd; + + fd = open ("/dev/zero", O_RDWR); + g_assert (fd != -1); + map = mmap (NULL, sizeof (rspamd_mempool_stat_t), PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if (map == MAP_FAILED) { + msg_err ("cannot allocate %z bytes, aborting", sizeof (rspamd_mempool_stat_t)); + abort (); + } + mem_pool_stat = (rspamd_mempool_stat_t *)map; +#else +# error No mmap methods are defined +#endif + memset (map, 0, sizeof (rspamd_mempool_stat_t)); + } + + new = g_slice_alloc (sizeof (rspamd_mempool_t)); + if (new == NULL) { + msg_err ("cannot allocate %z bytes, aborting", sizeof (rspamd_mempool_t)); + abort (); + } + + new->cur_pool = pool_chain_new (size); + new->shared_pool = NULL; + new->first_pool = new->cur_pool; + new->cur_pool_tmp = NULL; + new->first_pool_tmp = NULL; + new->destructors = NULL; + /* Set it upon first call of set variable */ + new->variables = NULL; + new->mtx = rspamd_mutex_new (); + + mem_pool_stat->pools_allocated++; + + return new; +} + +static void * +memory_pool_alloc_common (rspamd_mempool_t * pool, gsize size, gboolean is_tmp) +{ + guint8 *tmp; + struct _pool_chain *new, *cur; + gint free; + + if (pool) { + POOL_MTX_LOCK (); +#ifdef MEMORY_GREEDY + if (is_tmp) { + cur = pool->first_pool_tmp; + } + else { + cur = pool->first_pool; + } +#else + if (is_tmp) { + cur = pool->cur_pool_tmp; + } + else { + cur = pool->cur_pool; + } +#endif + /* Find free space in pool chain */ + while (cur != NULL && + (free = pool_chain_free (cur)) < (gint)size && + cur->next != NULL) { + cur = cur->next; + } + + if (cur == NULL || (free < (gint)size && cur->next == NULL)) { + /* Allocate new pool */ + if (cur == NULL) { + if (pool->first_pool->len >= size + MEM_ALIGNMENT) { + new = pool_chain_new (pool->first_pool->len); + } + else { + new = pool_chain_new (size + pool->first_pool->len + MEM_ALIGNMENT); + } + /* Connect to pool subsystem */ + if (is_tmp) { + pool->first_pool_tmp = new; + } + else { + pool->first_pool = new; + } + } + else { + if (cur->len >= size + MEM_ALIGNMENT) { + new = pool_chain_new (cur->len); + } + else { + mem_pool_stat->oversized_chunks++; + new = pool_chain_new (size + pool->first_pool->len + MEM_ALIGNMENT); + } + /* Attach new pool to chain */ + cur->next = new; + } + if (is_tmp) { + pool->cur_pool_tmp = new; + } + else { + pool->cur_pool = new; + } + /* No need to align again */ + tmp = new->pos; + new->pos = tmp + size; + POOL_MTX_UNLOCK (); + return tmp; + } + /* No need to allocate page */ + tmp = align_ptr (cur->pos, MEM_ALIGNMENT); + cur->pos = tmp + size; + POOL_MTX_UNLOCK (); + return tmp; + } + return NULL; +} + + +void * +rspamd_mempool_alloc (rspamd_mempool_t * pool, gsize size) +{ + return memory_pool_alloc_common (pool, size, FALSE); +} + +void * +rspamd_mempool_alloc_tmp (rspamd_mempool_t * pool, gsize size) +{ + return memory_pool_alloc_common (pool, size, TRUE); +} + +void * +rspamd_mempool_alloc0 (rspamd_mempool_t * pool, gsize size) +{ + void *pointer = rspamd_mempool_alloc (pool, size); + if (pointer) { + memset (pointer, 0, size); + } + return pointer; +} + +void * +rspamd_mempool_alloc0_tmp (rspamd_mempool_t * pool, gsize size) +{ + void *pointer = rspamd_mempool_alloc_tmp (pool, size); + if (pointer) { + memset (pointer, 0, size); + } + return pointer; +} + +void * +rspamd_mempool_alloc0_shared (rspamd_mempool_t * pool, gsize size) +{ + void *pointer = rspamd_mempool_alloc_shared (pool, size); + if (pointer) { + memset (pointer, 0, size); + } + return pointer; +} + +void * +rspamd_mempool_alloc_shared (rspamd_mempool_t * pool, gsize size) +{ + guint8 *tmp; + struct _pool_chain_shared *new, *cur; + gint free; + + if (pool) { + g_return_val_if_fail(size > 0, NULL); + + POOL_MTX_LOCK () + ; + cur = pool->shared_pool; + if (!cur) { + cur = pool_chain_new_shared (pool->first_pool->len); + pool->shared_pool = cur; + } + + /* Find free space in pool chain */ + while ((free = pool_chain_free ((struct _pool_chain *) cur)) + < (gint) size && cur->next) { + cur = cur->next; + } + if (free < (gint) size && cur->next == NULL) { + /* Allocate new pool */ + + if (cur->len >= size + MEM_ALIGNMENT) { + new = pool_chain_new_shared (cur->len); + } + else { + mem_pool_stat->oversized_chunks++; + new = pool_chain_new_shared ( + size + pool->first_pool->len + MEM_ALIGNMENT); + } + /* Attach new pool to chain */ + cur->next = new; + new->pos += size; + STAT_LOCK (); + mem_pool_stat->bytes_allocated += size; + STAT_UNLOCK (); + POOL_MTX_UNLOCK () + ; + return new->begin; + } + tmp = align_ptr(cur->pos, MEM_ALIGNMENT); + cur->pos = tmp + size; + POOL_MTX_UNLOCK () + ; + return tmp; + } + return NULL; +} + + +gchar * +rspamd_mempool_strdup (rspamd_mempool_t * pool, const gchar *src) +{ + gsize len; + gchar *newstr; + + if (src == NULL) { + return NULL; + } + + len = strlen (src); + newstr = rspamd_mempool_alloc (pool, len + 1); + memcpy (newstr, src, len); + newstr[len] = '\0'; + return newstr; +} + +gchar * +rspamd_mempool_fstrdup (rspamd_mempool_t * pool, const struct f_str_s *src) +{ + gchar *newstr; + + if (src == NULL) { + return NULL; + } + + newstr = rspamd_mempool_alloc (pool, src->len + 1); + memcpy (newstr, src->begin, src->len); + newstr[src->len] = '\0'; + return newstr; +} + + +gchar * +rspamd_mempool_strdup_shared (rspamd_mempool_t * pool, const gchar *src) +{ + gsize len; + gchar *newstr; + + if (src == NULL) { + return NULL; + } + + len = strlen (src); + newstr = rspamd_mempool_alloc_shared (pool, len + 1); + memcpy (newstr, src, len); + newstr[len] = '\0'; + return newstr; +} + +/* Find pool for a pointer, returns NULL if pointer is not in pool */ +static struct _pool_chain_shared * +memory_pool_find_pool (rspamd_mempool_t * pool, void *pointer) +{ + struct _pool_chain_shared *cur = pool->shared_pool; + + while (cur) { + if ((guint8 *) pointer >= cur->begin && (guint8 *) pointer <= (cur->begin + cur->len)) { + return cur; + } + cur = cur->next; + } + + return NULL; +} + +static inline gint +__mutex_spin (rspamd_mempool_mutex_t * mutex) +{ + /* check spin count */ + if (g_atomic_int_dec_and_test (&mutex->spin)) { + /* This may be deadlock, so check owner of this lock */ + if (mutex->owner == getpid ()) { + /* This mutex was locked by calling process, so it is just double lock and we can easily unlock it */ + g_atomic_int_set (&mutex->spin, MUTEX_SPIN_COUNT); + return 0; + } + else if (kill (mutex->owner, 0) == -1) { + /* Owner process was not found, so release lock */ + g_atomic_int_set (&mutex->spin, MUTEX_SPIN_COUNT); + return 0; + } + /* Spin again */ + g_atomic_int_set (&mutex->spin, MUTEX_SPIN_COUNT); + } +#ifdef HAVE_ASM_PAUSE + __asm __volatile ("pause"); +#elif defined(HAVE_SCHED_YIELD) + (void)sched_yield (); +#endif + +#if defined(HAVE_NANOSLEEP) + struct timespec ts; + ts.tv_sec = 0; + ts.tv_nsec = MUTEX_SLEEP_TIME; + /* Spin */ + while (nanosleep (&ts, &ts) == -1 && errno == EINTR); +#else +# error No methods to spin are defined +#endif + return 1; +} + +static void +memory_pool_mutex_spin (rspamd_mempool_mutex_t * mutex) +{ + while (!g_atomic_int_compare_and_exchange (&mutex->lock, 0, 1)) { + if (!__mutex_spin (mutex)) { + return; + } + } +} + +/* Simple implementation of spinlock */ +void +rspamd_mempool_lock_shared (rspamd_mempool_t * pool, void *pointer) +{ + struct _pool_chain_shared *chain; + + chain = memory_pool_find_pool (pool, pointer); + if (chain == NULL) { + return; + } + if (chain->lock == NULL) { + chain->lock = rspamd_mempool_get_mutex (pool); + } + rspamd_mempool_lock_mutex (chain->lock); +} + +void +rspamd_mempool_unlock_shared (rspamd_mempool_t * pool, void *pointer) +{ + struct _pool_chain_shared *chain; + + chain = memory_pool_find_pool (pool, pointer); + if (chain == NULL) { + return; + } + if (chain->lock == NULL) { + chain->lock = rspamd_mempool_get_mutex (pool); + return; + } + + rspamd_mempool_unlock_mutex (chain->lock); +} + +void +rspamd_mempool_add_destructor_full (rspamd_mempool_t * pool, rspamd_mempool_destruct_t func, void *data, + const gchar *function, const gchar *line) +{ + struct _pool_destructors *cur; + + cur = rspamd_mempool_alloc (pool, sizeof (struct _pool_destructors)); + if (cur) { + POOL_MTX_LOCK (); + cur->func = func; + cur->data = data; + cur->function = function; + cur->loc = line; + cur->prev = pool->destructors; + pool->destructors = cur; + POOL_MTX_UNLOCK (); + } +} + +void +rspamd_mempool_replace_destructor (rspamd_mempool_t * pool, rspamd_mempool_destruct_t func, void *old_data, void *new_data) +{ + struct _pool_destructors *tmp; + + tmp = pool->destructors; + while (tmp) { + if (tmp->func == func && tmp->data == old_data) { + tmp->func = func; + tmp->data = new_data; + break; + } + tmp = tmp->prev; + } + +} + +void +rspamd_mempool_delete (rspamd_mempool_t * pool) +{ + struct _pool_chain *cur = pool->first_pool, *tmp; + struct _pool_chain_shared *cur_shared = pool->shared_pool, *tmp_shared; + struct _pool_destructors *destructor = pool->destructors; + + POOL_MTX_LOCK (); + /* Call all pool destructors */ + while (destructor) { + /* Avoid calling destructors for NULL pointers */ + if (destructor->data != NULL) { + destructor->func (destructor->data); + } + destructor = destructor->prev; + } + + while (cur) { + tmp = cur; + cur = cur->next; + STAT_LOCK (); + mem_pool_stat->chunks_freed++; + mem_pool_stat->bytes_allocated -= tmp->len; + STAT_UNLOCK (); + g_slice_free1 (tmp->len, tmp->begin); + g_slice_free (struct _pool_chain, tmp); + } + /* Clean temporary pools */ + cur = pool->first_pool_tmp; + while (cur) { + tmp = cur; + cur = cur->next; + STAT_LOCK (); + mem_pool_stat->chunks_freed++; + mem_pool_stat->bytes_allocated -= tmp->len; + STAT_UNLOCK (); + g_slice_free1 (tmp->len, tmp->begin); + g_slice_free (struct _pool_chain, tmp); + } + /* Unmap shared memory */ + while (cur_shared) { + tmp_shared = cur_shared; + cur_shared = cur_shared->next; + STAT_LOCK (); + mem_pool_stat->chunks_freed++; + mem_pool_stat->bytes_allocated -= tmp_shared->len; + STAT_UNLOCK (); + munmap ((void *)tmp_shared, tmp_shared->len + sizeof (struct _pool_chain_shared)); + } + if (pool->variables) { + g_hash_table_destroy (pool->variables); + } + + mem_pool_stat->pools_freed++; + POOL_MTX_UNLOCK (); + rspamd_mutex_free (pool->mtx); + g_slice_free (rspamd_mempool_t, pool); +} + +void +rspamd_mempool_cleanup_tmp (rspamd_mempool_t* pool) +{ + struct _pool_chain *cur = pool->first_pool, *tmp; + + POOL_MTX_LOCK (); + cur = pool->first_pool_tmp; + while (cur) { + tmp = cur; + cur = cur->next; + STAT_LOCK (); + mem_pool_stat->chunks_freed++; + mem_pool_stat->bytes_allocated -= tmp->len; + STAT_UNLOCK (); + g_slice_free1 (tmp->len, tmp->begin); + g_slice_free (struct _pool_chain, tmp); + } + mem_pool_stat->pools_freed++; + POOL_MTX_UNLOCK (); +} + +void +rspamd_mempool_stat (rspamd_mempool_stat_t * st) +{ + st->pools_allocated = mem_pool_stat->pools_allocated; + st->pools_freed = mem_pool_stat->pools_freed; + st->shared_chunks_allocated = mem_pool_stat->shared_chunks_allocated; + st->bytes_allocated = mem_pool_stat->bytes_allocated; + st->chunks_allocated = mem_pool_stat->chunks_allocated; + st->shared_chunks_allocated = mem_pool_stat->shared_chunks_allocated; + st->chunks_freed = mem_pool_stat->chunks_freed; + st->oversized_chunks = mem_pool_stat->oversized_chunks; +} + +/* By default allocate 8Kb chunks of memory */ +#define FIXED_POOL_SIZE 8192 +gsize +rspamd_mempool_suggest_size (void) +{ +#ifdef HAVE_GETPAGESIZE + return MAX (getpagesize (), FIXED_POOL_SIZE); +#else + return MAX (sysconf (_SC_PAGESIZE), FIXED_POOL_SIZE); +#endif +} + +rspamd_mempool_mutex_t * +rspamd_mempool_get_mutex (rspamd_mempool_t * pool) +{ + rspamd_mempool_mutex_t *res; + if (pool != NULL) { + res = rspamd_mempool_alloc_shared (pool, sizeof (rspamd_mempool_mutex_t)); + res->lock = 0; + res->owner = 0; + res->spin = MUTEX_SPIN_COUNT; + return res; + } + return NULL; +} + +void +rspamd_mempool_lock_mutex (rspamd_mempool_mutex_t * mutex) +{ + memory_pool_mutex_spin (mutex); + mutex->owner = getpid (); +} + +void +rspamd_mempool_unlock_mutex (rspamd_mempool_mutex_t * mutex) +{ + mutex->owner = 0; + (void)g_atomic_int_compare_and_exchange (&mutex->lock, 1, 0); +} + +rspamd_mempool_rwlock_t * +rspamd_mempool_get_rwlock (rspamd_mempool_t * pool) +{ + rspamd_mempool_rwlock_t *lock; + + lock = rspamd_mempool_alloc_shared (pool, sizeof (rspamd_mempool_rwlock_t)); + lock->__r_lock = rspamd_mempool_get_mutex (pool); + lock->__w_lock = rspamd_mempool_get_mutex (pool); + + return lock; +} + +void +rspamd_mempool_rlock_rwlock (rspamd_mempool_rwlock_t * lock) +{ + /* Spin on write lock */ + while (g_atomic_int_get (&lock->__w_lock->lock)) { + if (!__mutex_spin (lock->__w_lock)) { + break; + } + } + + g_atomic_int_inc (&lock->__r_lock->lock); + lock->__r_lock->owner = getpid (); +} + +void +rspamd_mempool_wlock_rwlock (rspamd_mempool_rwlock_t * lock) +{ + /* Spin on write lock first */ + rspamd_mempool_lock_mutex (lock->__w_lock); + /* Now we have write lock set up */ + /* Wait all readers */ + while (g_atomic_int_get (&lock->__r_lock->lock)) { + __mutex_spin (lock->__r_lock); + } +} + +void +rspamd_mempool_runlock_rwlock (rspamd_mempool_rwlock_t * lock) +{ + if (g_atomic_int_get (&lock->__r_lock->lock)) { + (void)g_atomic_int_dec_and_test (&lock->__r_lock->lock); + } +} + +void +rspamd_mempool_wunlock_rwlock (rspamd_mempool_rwlock_t * lock) +{ + rspamd_mempool_unlock_mutex (lock->__w_lock); +} + +void +rspamd_mempool_set_variable (rspamd_mempool_t *pool, const gchar *name, gpointer value, rspamd_mempool_destruct_t destructor) +{ + if (pool->variables == NULL) { + pool->variables = g_hash_table_new (rspamd_str_hash, rspamd_str_equal); + } + + g_hash_table_insert (pool->variables, rspamd_mempool_strdup (pool, name), value); + if (destructor != NULL) { + rspamd_mempool_add_destructor (pool, destructor, value); + } +} + +gpointer +rspamd_mempool_get_variable (rspamd_mempool_t *pool, const gchar *name) +{ + if (pool->variables == NULL) { + return NULL; + } + + return g_hash_table_lookup (pool->variables, name); +} + + +/* + * vi:ts=4 + */ diff --git a/src/libutil/mem_pool.h b/src/libutil/mem_pool.h new file mode 100644 index 000000000..f759ed60a --- /dev/null +++ b/src/libutil/mem_pool.h @@ -0,0 +1,299 @@ +/** + * @file mem_pool.h + * \brief Memory pools library. + * + * Memory pools library. Library is designed to implement efficient way to + * store data in memory avoiding calling of many malloc/free. It has overhead + * because of fact that objects live in pool for rather long time and are not freed + * immediately after use, but if we know certainly when these objects can be used, we + * can use pool for them + */ + +#ifndef RSPAMD_MEM_POOL_H +#define RSPAMD_MEM_POOL_H + +#include "config.h" + + +struct f_str_s; + +#define MEM_ALIGNMENT sizeof(unsigned long) /* platform word */ +#define align_ptr(p, a) \ + (guint8 *) (((uintptr_t) (p) + ((uintptr_t) a - 1)) & ~((uintptr_t) a - 1)) + +/** + * Destructor type definition + */ +typedef void (*rspamd_mempool_destruct_t)(void *ptr); + +/** + * Pool mutex structure + */ +typedef struct memory_pool_mutex_s { + gint lock; + pid_t owner; + guint spin; +} rspamd_mempool_mutex_t; + +/** + * Pool page structure + */ +struct _pool_chain { + guint8 *begin; /**< begin of pool chain block */ + guint8 *pos; /**< current start of free space in block */ + gsize len; /**< length of block */ + struct _pool_chain *next; /**< chain link */ +}; + +/** + * Shared pool page + */ +struct _pool_chain_shared { + guint8 *begin; + guint8 *pos; + gsize len; + struct _pool_chain_shared *next; + rspamd_mempool_mutex_t *lock; +}; + +/** + * Destructors list item structure + */ +struct _pool_destructors { + rspamd_mempool_destruct_t func; /**< pointer to destructor */ + void *data; /**< data to free */ + const gchar *function; /**< function from which this destructor was added */ + const gchar *loc; /**< line number */ + struct _pool_destructors *prev; /**< chain link */ +}; + +/** + * Memory pool type + */ +struct rspamd_mutex_s; +typedef struct memory_pool_s { + struct _pool_chain *cur_pool; /**< currently used page */ + struct _pool_chain *first_pool; /**< first page */ + struct _pool_chain *cur_pool_tmp; /**< currently used temporary page */ + struct _pool_chain *first_pool_tmp; /**< first temporary page */ + struct _pool_chain_shared *shared_pool; /**< shared chain */ + struct _pool_destructors *destructors; /**< destructors chain */ + GHashTable *variables; /**< private memory pool variables */ + struct rspamd_mutex_s *mtx; /**< threads lock */ +} rspamd_mempool_t; + +/** + * Statistics structure + */ +typedef struct memory_pool_stat_s { + gsize pools_allocated; /**< total number of allocated pools */ + gsize pools_freed; /**< number of freed pools */ + gsize bytes_allocated; /**< bytes that are allocated with pool allocator */ + gsize chunks_allocated; /**< number of chunks that are allocated */ + gsize shared_chunks_allocated; /**< shared chunks allocated */ + gsize chunks_freed; /**< chunks freed */ + gsize oversized_chunks; /**< oversized chunks */ +} rspamd_mempool_stat_t; + +/** + * Rwlock for locking shared memory regions + */ +typedef struct memory_pool_rwlock_s { + rspamd_mempool_mutex_t *__r_lock; /**< read mutex (private) */ + rspamd_mempool_mutex_t *__w_lock; /**< write mutex (private) */ +} rspamd_mempool_rwlock_t; + +/** + * Allocate new memory poll + * @param size size of pool's page + * @return new memory pool object + */ +rspamd_mempool_t* rspamd_mempool_new (gsize size); + +/** + * Get memory from pool + * @param pool memory pool object + * @param size bytes to allocate + * @return pointer to allocated object + */ +void* rspamd_mempool_alloc (rspamd_mempool_t* pool, gsize size); + +/** + * Get memory from temporary pool + * @param pool memory pool object + * @param size bytes to allocate + * @return pointer to allocated object + */ +void* rspamd_mempool_alloc_tmp (rspamd_mempool_t* pool, gsize size); + +/** + * Get memory and set it to zero + * @param pool memory pool object + * @param size bytes to allocate + * @return pointer to allocated object + */ +void* rspamd_mempool_alloc0 (rspamd_mempool_t* pool, gsize size); + +/** + * Get memory and set it to zero + * @param pool memory pool object + * @param size bytes to allocate + * @return pointer to allocated object + */ +void* rspamd_mempool_alloc0_tmp (rspamd_mempool_t* pool, gsize size); + +/** + * Cleanup temporary data in pool + */ +void rspamd_mempool_cleanup_tmp (rspamd_mempool_t* pool); + +/** + * Make a copy of string in pool + * @param pool memory pool object + * @param src source string + * @return pointer to newly created string that is copy of src + */ +gchar* rspamd_mempool_strdup (rspamd_mempool_t* pool, const gchar *src); + +/** + * Make a copy of fixed string in pool as null terminated string + * @param pool memory pool object + * @param src source string + * @return pointer to newly created string that is copy of src + */ +gchar* rspamd_mempool_fstrdup (rspamd_mempool_t* pool, const struct f_str_s *src); + +/** + * Allocate piece of shared memory + * @param pool memory pool object + * @param size bytes to allocate + */ +void* rspamd_mempool_alloc_shared (rspamd_mempool_t* pool, gsize size); +void* rspamd_mempool_alloc0_shared (rspamd_mempool_t *pool, gsize size); +gchar* rspamd_mempool_strdup_shared (rspamd_mempool_t* pool, const gchar *src); + +/** + * Lock chunk of shared memory in which pointer is placed + * @param pool memory pool object + * @param pointer pointer of shared memory object that is to be locked (the whole page that contains that object is locked) + */ +void rspamd_mempool_lock_shared (rspamd_mempool_t *pool, void *pointer); + +/** + * Unlock chunk of shared memory in which pointer is placed + * @param pool memory pool object + * @param pointer pointer of shared memory object that is to be unlocked (the whole page that contains that object is locked) + */ +void rspamd_mempool_lock_shared (rspamd_mempool_t *pool, void *pointer); + +/** + * Add destructor callback to pool + * @param pool memory pool object + * @param func pointer to function-destructor + * @param data pointer to data that would be passed to destructor + */ +void rspamd_mempool_add_destructor_full (rspamd_mempool_t *pool, rspamd_mempool_destruct_t func, void *data, + const gchar *function, const gchar *line); + +/* Macros for common usage */ +#define rspamd_mempool_add_destructor(pool, func, data) \ + rspamd_mempool_add_destructor_full(pool, func, data, G_STRFUNC, G_STRLOC) + +/** + * Replace destructor callback to pool for specified pointer + * @param pool memory pool object + * @param func pointer to function-destructor + * @param old_data pointer to old data + * @param new_data pointer to data that would be passed to destructor + */ +void rspamd_mempool_replace_destructor (rspamd_mempool_t *pool, + rspamd_mempool_destruct_t func, void *old_data, void *new_data); + +/** + * Delete pool, free all its chunks and call destructors chain + * @param pool memory pool object + */ +void rspamd_mempool_delete (rspamd_mempool_t *pool); + +/** + * Get new mutex from pool (allocated in shared memory) + * @param pool memory pool object + * @return mutex object + */ +rspamd_mempool_mutex_t* rspamd_mempool_get_mutex (rspamd_mempool_t *pool); + +/** + * Lock mutex + * @param mutex mutex to lock + */ +void rspamd_mempool_lock_mutex (rspamd_mempool_mutex_t *mutex); + +/** + * Unlock mutex + * @param mutex mutex to unlock + */ +void rspamd_mempool_unlock_mutex (rspamd_mempool_mutex_t *mutex); + +/** + * Create new rwlock and place it in shared memory + * @param pool memory pool object + * @return rwlock object + */ +rspamd_mempool_rwlock_t* rspamd_mempool_get_rwlock (rspamd_mempool_t *pool); + +/** + * Aquire read lock + * @param lock rwlock object + */ +void rspamd_mempool_rlock_rwlock (rspamd_mempool_rwlock_t *lock); + +/** + * Aquire write lock + * @param lock rwlock object + */ +void rspamd_mempool_wlock_rwlock (rspamd_mempool_rwlock_t *lock); + +/** + * Release read lock + * @param lock rwlock object + */ +void rspamd_mempool_runlock_rwlock (rspamd_mempool_rwlock_t *lock); + +/** + * Release write lock + * @param lock rwlock object + */ +void rspamd_mempool_wunlock_rwlock (rspamd_mempool_rwlock_t *lock); + +/** + * Get pool allocator statistics + * @param st stat pool struct + */ +void rspamd_mempool_stat (rspamd_mempool_stat_t *st); + +/** + * Get optimal pool size based on page size for this system + * @return size of memory page in system + */ +gsize rspamd_mempool_suggest_size (void); + +/** + * Set memory pool variable + * @param pool memory pool object + * @param name name of variable + * @param gpointer value value of variable + * @param destructor pointer to function-destructor + */ +void rspamd_mempool_set_variable (rspamd_mempool_t *pool, const gchar *name, + gpointer value, rspamd_mempool_destruct_t destructor); + +/** + * Get memory pool variable + * @param pool memory pool object + * @param name name of variable + * @return NULL or pointer to variable data + */ +gpointer rspamd_mempool_get_variable (rspamd_mempool_t *pool, const gchar *name); + + +#endif diff --git a/src/libutil/memcached.c b/src/libutil/memcached.c new file mode 100644 index 000000000..e4c9be9d2 --- /dev/null +++ b/src/libutil/memcached.c @@ -0,0 +1,831 @@ +/* + * Copyright (c) 2009-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef _THREAD_SAFE +# include <pthread.h> +#endif + +#include <stdarg.h> + +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/param.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sysexits.h> +#include <unistd.h> +#include <syslog.h> + +#include <netinet/in.h> +#include <arpa/inet.h> +#include <sys/socket.h> +#include <sys/poll.h> +#include <errno.h> +#include <fcntl.h> +#include <sys/uio.h> +#include <event.h> +#include <glib.h> + +#include "memcached.h" + +#define CRLF "\r\n" +#define END_TRAILER "END" CRLF +#define STORED_TRAILER "STORED" CRLF +#define NOT_STORED_TRAILER "NOT STORED" CRLF +#define EXISTS_TRAILER "EXISTS" CRLF +#define DELETED_TRAILER "DELETED" CRLF +#define NOT_FOUND_TRAILER "NOT_FOUND" CRLF +#define CLIENT_ERROR_TRAILER "CLIENT_ERROR" +#define SERVER_ERROR_TRAILER "SERVER_ERROR" + +#define READ_BUFSIZ 1500 +#define MAX_RETRIES 3 + +/* Header for udp protocol */ +struct memc_udp_header { + guint16 req_id; + guint16 seq_num; + guint16 dg_sent; + guint16 unused; +}; + +static void socket_callback (gint fd, short what, void *arg); +static gint memc_parse_header (gchar *buf, size_t * len, gchar **end); + +/* + * Write to syslog if OPT_DEBUG is specified + */ +static void +memc_log (const memcached_ctx_t * ctx, gint line, const gchar *fmt, ...) +{ + va_list args; + if (ctx->options & MEMC_OPT_DEBUG) { + va_start (args, fmt); + g_log (G_LOG_DOMAIN, G_LOG_LEVEL_DEBUG, "memc_debug(%d): host: %s, port: %d", line, inet_ntoa (ctx->addr), ntohs (ctx->port)); + g_logv (G_LOG_DOMAIN, G_LOG_LEVEL_DEBUG, fmt, args); + va_end (args); + } +} + +/* + * Callback for write command + */ +static void +write_handler (gint fd, short what, memcached_ctx_t * ctx) +{ + gchar read_buf[READ_BUFSIZ]; + gint retries; + ssize_t r; + struct memc_udp_header header; + struct iovec iov[4]; + + /* Write something to memcached */ + if (what == EV_WRITE) { + if (ctx->protocol == UDP_TEXT) { + /* Send udp header */ + bzero (&header, sizeof (header)); + header.dg_sent = htons (1); + header.req_id = ctx->count; + } + + r = snprintf (read_buf, READ_BUFSIZ, "%s %s 0 %d %zu" CRLF, ctx->cmd, ctx->param->key, ctx->param->expire, ctx->param->bufsize); + memc_log (ctx, __LINE__, "memc_write: send write request to memcached: %s", read_buf); + + if (ctx->protocol == UDP_TEXT) { + iov[0].iov_base = &header; + iov[0].iov_len = sizeof (struct memc_udp_header); + if (ctx->param->bufpos == 0) { + iov[1].iov_base = read_buf; + iov[1].iov_len = r; + } + else { + iov[1].iov_base = NULL; + iov[1].iov_len = 0; + } + iov[2].iov_base = ctx->param->buf + ctx->param->bufpos; + iov[2].iov_len = ctx->param->bufsize - ctx->param->bufpos; + iov[3].iov_base = CRLF; + iov[3].iov_len = sizeof (CRLF) - 1; + if (writev (ctx->sock, iov, 4) == -1) { + memc_log (ctx, __LINE__, "memc_write: writev failed: %s", strerror (errno)); + } + } + else { + iov[0].iov_base = read_buf; + iov[0].iov_len = r; + iov[1].iov_base = ctx->param->buf + ctx->param->bufpos; + iov[1].iov_len = ctx->param->bufsize - ctx->param->bufpos; + iov[2].iov_base = CRLF; + iov[2].iov_len = sizeof (CRLF) - 1; + if (writev (ctx->sock, iov, 3) == -1) { + memc_log (ctx, __LINE__, "memc_write: writev failed: %s", strerror (errno)); + } + } + event_del (&ctx->mem_ev); + event_set (&ctx->mem_ev, ctx->sock, EV_READ | EV_PERSIST | EV_TIMEOUT, socket_callback, (void *)ctx); + event_add (&ctx->mem_ev, &ctx->timeout); + } + else if (what == EV_READ) { + /* Read header */ + retries = 0; + while (ctx->protocol == UDP_TEXT) { + iov[0].iov_base = &header; + iov[0].iov_len = sizeof (struct memc_udp_header); + iov[1].iov_base = read_buf; + iov[1].iov_len = READ_BUFSIZ; + if ((r = readv (ctx->sock, iov, 2)) == -1) { + event_del (&ctx->mem_ev); + ctx->callback (ctx, SERVER_ERROR, ctx->callback_data); + } + if (header.req_id != ctx->count && retries < MAX_RETRIES) { + retries++; + /* Not our reply packet */ + continue; + } + break; + } + if (ctx->protocol != UDP_TEXT) { + r = read (ctx->sock, read_buf, READ_BUFSIZ - 1); + } + memc_log (ctx, __LINE__, "memc_write: read reply from memcached: %s", read_buf); + /* Increment count */ + ctx->count++; + event_del (&ctx->mem_ev); + if (strncmp (read_buf, STORED_TRAILER, sizeof (STORED_TRAILER) - 1) == 0) { + ctx->callback (ctx, OK, ctx->callback_data); + } + else if (strncmp (read_buf, NOT_STORED_TRAILER, sizeof (NOT_STORED_TRAILER) - 1) == 0) { + ctx->callback (ctx, CLIENT_ERROR, ctx->callback_data); + } + else if (strncmp (read_buf, EXISTS_TRAILER, sizeof (EXISTS_TRAILER) - 1) == 0) { + ctx->callback (ctx, EXISTS, ctx->callback_data); + } + else { + ctx->callback (ctx, SERVER_ERROR, ctx->callback_data); + } + } + else if (what == EV_TIMEOUT) { + event_del (&ctx->mem_ev); + ctx->callback (ctx, SERVER_TIMEOUT, ctx->callback_data); + } +} + +/* + * Callback for read command + */ +static void +read_handler (gint fd, short what, memcached_ctx_t * ctx) +{ + gchar read_buf[READ_BUFSIZ]; + gchar *p; + ssize_t r; + size_t datalen; + struct memc_udp_header header; + struct iovec iov[2]; + gint retries = 0, t; + + if (what == EV_WRITE) { + /* Send command to memcached */ + if (ctx->protocol == UDP_TEXT) { + /* Send udp header */ + bzero (&header, sizeof (header)); + header.dg_sent = htons (1); + header.req_id = ctx->count; + } + + r = snprintf (read_buf, READ_BUFSIZ, "%s %s" CRLF, ctx->cmd, ctx->param->key); + memc_log (ctx, __LINE__, "memc_read: send read request to memcached: %s", read_buf); + if (ctx->protocol == UDP_TEXT) { + iov[0].iov_base = &header; + iov[0].iov_len = sizeof (struct memc_udp_header); + iov[1].iov_base = read_buf; + iov[1].iov_len = r; + if (writev (ctx->sock, iov, 2) == -1) { + memc_log (ctx, __LINE__, "memc_write: writev failed: %s", strerror (errno)); + } + } + else { + if (write (ctx->sock, read_buf, r) == -1) { + memc_log (ctx, __LINE__, "memc_write: write failed: %s", strerror (errno)); + } + } + event_del (&ctx->mem_ev); + event_set (&ctx->mem_ev, ctx->sock, EV_READ | EV_PERSIST | EV_TIMEOUT, socket_callback, (void *)ctx); + event_add (&ctx->mem_ev, &ctx->timeout); + } + else if (what == EV_READ) { + while (ctx->protocol == UDP_TEXT) { + iov[0].iov_base = &header; + iov[0].iov_len = sizeof (struct memc_udp_header); + iov[1].iov_base = read_buf; + iov[1].iov_len = READ_BUFSIZ; + if ((r = readv (ctx->sock, iov, 2)) == -1) { + event_del (&ctx->mem_ev); + ctx->callback (ctx, SERVER_ERROR, ctx->callback_data); + return; + } + memc_log (ctx, __LINE__, "memc_read: got read_buf: %s", read_buf); + if (header.req_id != ctx->count && retries < MAX_RETRIES) { + memc_log (ctx, __LINE__, "memc_read: got wrong packet id: %d, %d was awaited", header.req_id, ctx->count); + retries++; + /* Not our reply packet */ + continue; + } + break; + } + if (ctx->protocol != UDP_TEXT) { + r = read (ctx->sock, read_buf, READ_BUFSIZ - 1); + } + + if (r > 0) { + read_buf[r] = 0; + if (ctx->param->bufpos == 0) { + t = memc_parse_header (read_buf, &datalen, &p); + if (t < 0) { + event_del (&ctx->mem_ev); + memc_log (ctx, __LINE__, "memc_read: cannot parse memcached reply"); + ctx->callback (ctx, SERVER_ERROR, ctx->callback_data); + return; + } + else if (t == 0) { + memc_log (ctx, __LINE__, "memc_read: record does not exists"); + event_del (&ctx->mem_ev); + ctx->callback (ctx, NOT_EXISTS, ctx->callback_data); + return; + } + + if (datalen > ctx->param->bufsize) { + memc_log (ctx, __LINE__, "memc_read: user's buffer is too small: %zd, %zd required", ctx->param->bufsize, datalen); + event_del (&ctx->mem_ev); + ctx->callback (ctx, WRONG_LENGTH, ctx->callback_data); + return; + } + /* Check if we already have all data in buffer */ + if (r >= (ssize_t)(datalen + sizeof (END_TRAILER) + sizeof (CRLF) - 2)) { + /* Store all data in param's buffer */ + memcpy (ctx->param->buf + ctx->param->bufpos, p, datalen); + /* Increment count */ + ctx->count++; + event_del (&ctx->mem_ev); + ctx->callback (ctx, OK, ctx->callback_data); + return; + } + /* Subtract from sum parsed header's length */ + r -= p - read_buf; + } + else { + p = read_buf; + } + + if (strncmp (ctx->param->buf + ctx->param->bufpos + r - sizeof (END_TRAILER) - sizeof (CRLF) + 2, END_TRAILER, sizeof (END_TRAILER) - 1) == 0) { + r -= sizeof (END_TRAILER) - sizeof (CRLF) - 2; + memcpy (ctx->param->buf + ctx->param->bufpos, p, r); + event_del (&ctx->mem_ev); + ctx->callback (ctx, OK, ctx->callback_data); + return; + } + /* Store this part of data in param's buffer */ + memcpy (ctx->param->buf + ctx->param->bufpos, p, r); + ctx->param->bufpos += r; + } + else { + memc_log (ctx, __LINE__, "memc_read: read(v) failed: %d, %s", r, strerror (errno)); + event_del (&ctx->mem_ev); + ctx->callback (ctx, SERVER_ERROR, ctx->callback_data); + return; + } + + ctx->count++; + } + else if (what == EV_TIMEOUT) { + event_del (&ctx->mem_ev); + ctx->callback (ctx, SERVER_TIMEOUT, ctx->callback_data); + } + +} + +/* + * Callback for delete command + */ +static void +delete_handler (gint fd, short what, memcached_ctx_t * ctx) +{ + gchar read_buf[READ_BUFSIZ]; + gint retries; + ssize_t r; + struct memc_udp_header header; + struct iovec iov[2]; + + /* Write something to memcached */ + if (what == EV_WRITE) { + if (ctx->protocol == UDP_TEXT) { + /* Send udp header */ + bzero (&header, sizeof (header)); + header.dg_sent = htons (1); + header.req_id = ctx->count; + } + r = snprintf (read_buf, READ_BUFSIZ, "delete %s" CRLF, ctx->param->key); + memc_log (ctx, __LINE__, "memc_delete: send delete request to memcached: %s", read_buf); + + if (ctx->protocol == UDP_TEXT) { + iov[0].iov_base = &header; + iov[0].iov_len = sizeof (struct memc_udp_header); + iov[1].iov_base = read_buf; + iov[1].iov_len = r; + ctx->param->bufpos = writev (ctx->sock, iov, 2); + if (ctx->param->bufpos == (size_t)-1) { + memc_log (ctx, __LINE__, "memc_write: writev failed: %s", strerror (errno)); + } + } + else { + if (write (ctx->sock, read_buf, r) == -1) { + memc_log (ctx, __LINE__, "memc_write: write failed: %s", strerror (errno)); + } + } + event_del (&ctx->mem_ev); + event_set (&ctx->mem_ev, ctx->sock, EV_READ | EV_PERSIST | EV_TIMEOUT, socket_callback, (void *)ctx); + event_add (&ctx->mem_ev, &ctx->timeout); + } + else if (what == EV_READ) { + /* Read header */ + retries = 0; + while (ctx->protocol == UDP_TEXT) { + iov[0].iov_base = &header; + iov[0].iov_len = sizeof (struct memc_udp_header); + iov[1].iov_base = read_buf; + iov[1].iov_len = READ_BUFSIZ; + if ((r = readv (ctx->sock, iov, 2)) == -1) { + event_del (&ctx->mem_ev); + ctx->callback (ctx, SERVER_ERROR, ctx->callback_data); + return; + } + if (header.req_id != ctx->count && retries < MAX_RETRIES) { + retries++; + /* Not our reply packet */ + continue; + } + break; + } + if (ctx->protocol != UDP_TEXT) { + r = read (ctx->sock, read_buf, READ_BUFSIZ - 1); + } + /* Increment count */ + ctx->count++; + event_del (&ctx->mem_ev); + if (strncmp (read_buf, DELETED_TRAILER, sizeof (STORED_TRAILER) - 1) == 0) { + ctx->callback (ctx, OK, ctx->callback_data); + } + else if (strncmp (read_buf, NOT_FOUND_TRAILER, sizeof (NOT_FOUND_TRAILER) - 1) == 0) { + ctx->callback (ctx, NOT_EXISTS, ctx->callback_data); + } + else { + ctx->callback (ctx, SERVER_ERROR, ctx->callback_data); + } + } + else if (what == EV_TIMEOUT) { + event_del (&ctx->mem_ev); + ctx->callback (ctx, SERVER_TIMEOUT, ctx->callback_data); + } +} + +/* + * Callback for our socket events + */ +static void +socket_callback (gint fd, short what, void *arg) +{ + memcached_ctx_t *ctx = (memcached_ctx_t *) arg; + + switch (ctx->op) { + case CMD_NULL: + /* Do nothing here */ + break; + case CMD_CONNECT: + /* We have write readiness after connect call, so reinit event */ + ctx->cmd = "connect"; + if (what == EV_WRITE) { + event_del (&ctx->mem_ev); + event_set (&ctx->mem_ev, ctx->sock, EV_READ | EV_PERSIST | EV_TIMEOUT, socket_callback, (void *)ctx); + event_add (&ctx->mem_ev, NULL); + ctx->callback (ctx, OK, ctx->callback_data); + ctx->alive = 1; + } + else { + ctx->callback (ctx, SERVER_TIMEOUT, ctx->callback_data); + ctx->alive = 0; + } + break; + case CMD_WRITE: + write_handler (fd, what, ctx); + break; + case CMD_READ: + read_handler (fd, what, ctx); + break; + case CMD_DELETE: + delete_handler (fd, what, ctx); + break; + } +} + +/* + * Common callback function for memcached operations if no user's callback is specified + */ +static void +common_memc_callback (memcached_ctx_t * ctx, memc_error_t error, void *data) +{ + memc_log (ctx, __LINE__, "common_memc_callback: result of memc command '%s' is '%s'", ctx->cmd, memc_strerror (error)); +} + +/* + * Make socket for udp connection + */ +static gint +memc_make_udp_sock (memcached_ctx_t * ctx) +{ + struct sockaddr_in sc; + gint ofl; + + bzero (&sc, sizeof (struct sockaddr_in *)); + sc.sin_family = AF_INET; + sc.sin_port = ctx->port; + memcpy (&sc.sin_addr, &ctx->addr, sizeof (struct in_addr)); + + ctx->sock = socket (PF_INET, SOCK_DGRAM, 0); + + if (ctx->sock == -1) { + memc_log (ctx, __LINE__, "memc_make_udp_sock: socket() failed: %s", strerror (errno)); + return -1; + } + + /* set nonblocking */ + ofl = fcntl (ctx->sock, F_GETFL, 0); + fcntl (ctx->sock, F_SETFL, ofl | O_NONBLOCK); + + /* + * Call connect to set default destination for datagrams + * May not block + */ + ctx->op = CMD_CONNECT; + event_set (&ctx->mem_ev, ctx->sock, EV_WRITE | EV_TIMEOUT, socket_callback, (void *)ctx); + event_add (&ctx->mem_ev, NULL); + return connect (ctx->sock, (struct sockaddr *)&sc, sizeof (struct sockaddr_in)); +} + +/* + * Make socket for tcp connection + */ +static gint +memc_make_tcp_sock (memcached_ctx_t * ctx) +{ + struct sockaddr_in sc; + gint ofl, r; + + bzero (&sc, sizeof (struct sockaddr_in *)); + sc.sin_family = AF_INET; + sc.sin_port = ctx->port; + memcpy (&sc.sin_addr, &ctx->addr, sizeof (struct in_addr)); + + ctx->sock = socket (PF_INET, SOCK_STREAM, 0); + + if (ctx->sock == -1) { + memc_log (ctx, __LINE__, "memc_make_tcp_sock: socket() failed: %s", strerror (errno)); + return -1; + } + + /* set nonblocking */ + ofl = fcntl (ctx->sock, F_GETFL, 0); + fcntl (ctx->sock, F_SETFL, ofl | O_NONBLOCK); + + if ((r = connect (ctx->sock, (struct sockaddr *)&sc, sizeof (struct sockaddr_in))) == -1) { + if (errno != EINPROGRESS) { + close (ctx->sock); + ctx->sock = -1; + memc_log (ctx, __LINE__, "memc_make_tcp_sock: connect() failed: %s", strerror (errno)); + return -1; + } + } + ctx->op = CMD_CONNECT; + event_set (&ctx->mem_ev, ctx->sock, EV_WRITE | EV_TIMEOUT, socket_callback, (void *)ctx); + event_add (&ctx->mem_ev, &ctx->timeout); + return 0; +} + +/* + * Parse VALUE reply from server and set len argument to value returned by memcached + */ +static gint +memc_parse_header (gchar *buf, size_t * len, gchar **end) +{ + gchar *p, *c; + gint i; + + /* VALUE <key> <flags> <bytes> [<cas unique>]\r\n */ + c = strstr (buf, CRLF); + if (c == NULL) { + return -1; + } + *end = c + sizeof (CRLF) - 1; + + if (strncmp (buf, "VALUE ", sizeof ("VALUE ") - 1) == 0) { + p = buf + sizeof ("VALUE ") - 1; + + /* Read bytes value and ignore all other fields, such as flags and key */ + for (i = 0; i < 2; i++) { + while (p++ < c && *p != ' '); + + if (p > c) { + return -1; + } + } + *len = strtoul (p, &c, 10); + return 1; + } + /* If value not found memcached return just END\r\n , in this case return 0 */ + else if (strncmp (buf, END_TRAILER, sizeof (END_TRAILER) - 1) == 0) { + return 0; + } + + return -1; +} + + +/* + * Common read command handler for memcached + */ +memc_error_t +memc_read (memcached_ctx_t * ctx, const gchar *cmd, memcached_param_t * param) +{ + ctx->cmd = cmd; + ctx->op = CMD_READ; + ctx->param = param; + event_set (&ctx->mem_ev, ctx->sock, EV_WRITE | EV_TIMEOUT, socket_callback, (void *)ctx); + event_add (&ctx->mem_ev, &ctx->timeout); + + return OK; +} + +/* + * Common write command handler for memcached + */ +memc_error_t +memc_write (memcached_ctx_t * ctx, const gchar *cmd, memcached_param_t * param, gint expire) +{ + ctx->cmd = cmd; + ctx->op = CMD_WRITE; + ctx->param = param; + param->expire = expire; + event_set (&ctx->mem_ev, ctx->sock, EV_WRITE | EV_TIMEOUT, socket_callback, (void *)ctx); + event_add (&ctx->mem_ev, &ctx->timeout); + + return OK; +} + +/* + * Delete command handler + */ +memc_error_t +memc_delete (memcached_ctx_t * ctx, memcached_param_t * param) +{ + ctx->cmd = "delete"; + ctx->op = CMD_DELETE; + ctx->param = param; + event_set (&ctx->mem_ev, ctx->sock, EV_WRITE | EV_TIMEOUT, socket_callback, (void *)ctx); + event_add (&ctx->mem_ev, &ctx->timeout); + + return OK; +} + +/* + * Write handler for memcached mirroring + * writing is done to each memcached server + */ +memc_error_t +memc_write_mirror (memcached_ctx_t * ctx, size_t memcached_num, const gchar *cmd, memcached_param_t * param, gint expire) +{ + memc_error_t r, result = OK; + + while (memcached_num--) { + if (ctx[memcached_num].alive == 1) { + r = memc_write (&ctx[memcached_num], cmd, param, expire); + if (r != OK) { + memc_log (&ctx[memcached_num], __LINE__, "memc_write_mirror: cannot write to mirror server: %s", memc_strerror (r)); + result = r; + ctx[memcached_num].alive = 0; + } + } + } + + return result; +} + +/* + * Read handler for memcached mirroring + * reading is done from first active memcached server + */ +memc_error_t +memc_read_mirror (memcached_ctx_t * ctx, size_t memcached_num, const gchar *cmd, memcached_param_t * param) +{ + memc_error_t r, result = OK; + + while (memcached_num--) { + if (ctx[memcached_num].alive == 1) { + r = memc_read (&ctx[memcached_num], cmd, param); + if (r != OK) { + result = r; + if (r != NOT_EXISTS) { + ctx[memcached_num].alive = 0; + memc_log (&ctx[memcached_num], __LINE__, "memc_read_mirror: cannot write read from mirror server: %s", memc_strerror (r)); + } + else { + memc_log (&ctx[memcached_num], __LINE__, "memc_read_mirror: record not exists", memc_strerror (r)); + } + } + else { + break; + } + } + } + + return result; +} + +/* + * Delete handler for memcached mirroring + * deleting is done for each active memcached server + */ +memc_error_t +memc_delete_mirror (memcached_ctx_t * ctx, size_t memcached_num, const gchar *cmd, memcached_param_t * param) +{ + memc_error_t r, result = OK; + + while (memcached_num--) { + if (ctx[memcached_num].alive == 1) { + r = memc_delete (&ctx[memcached_num], param); + if (r != OK) { + result = r; + if (r != NOT_EXISTS) { + ctx[memcached_num].alive = 0; + memc_log (&ctx[memcached_num], __LINE__, "memc_delete_mirror: cannot delete from mirror server: %s", memc_strerror (r)); + } + } + } + } + + return result; +} + + +/* + * Initialize memcached context for specified protocol + */ +gint +memc_init_ctx (memcached_ctx_t * ctx) +{ + if (ctx == NULL) { + return -1; + } + + ctx->count = 0; + ctx->alive = 0; + ctx->op = CMD_NULL; + /* Set default callback */ + if (ctx->callback == NULL) { + ctx->callback = common_memc_callback; + } + + switch (ctx->protocol) { + case UDP_TEXT: + return memc_make_udp_sock (ctx); + break; + case TCP_TEXT: + return memc_make_tcp_sock (ctx); + break; + /* Not implemented */ + case UDP_BIN: + case TCP_BIN: + default: + return -1; + } +} + +/* + * Mirror init + */ +gint +memc_init_ctx_mirror (memcached_ctx_t * ctx, size_t memcached_num) +{ + gint r, result = -1; + while (memcached_num--) { + if (ctx[memcached_num].alive == 1) { + r = memc_init_ctx (&ctx[memcached_num]); + if (r == -1) { + ctx[memcached_num].alive = 0; + memc_log (&ctx[memcached_num], __LINE__, "memc_init_ctx_mirror: cannot connect to server"); + } + else { + result = 1; + } + } + } + + return result; +} + +/* + * Close context connection + */ +gint +memc_close_ctx (memcached_ctx_t * ctx) +{ + if (ctx != NULL && ctx->sock != -1) { + event_del (&ctx->mem_ev); + return close (ctx->sock); + } + + return -1; +} + +/* + * Mirror close + */ +gint +memc_close_ctx_mirror (memcached_ctx_t * ctx, size_t memcached_num) +{ + gint r = 0; + while (memcached_num--) { + if (ctx[memcached_num].alive == 1) { + r = memc_close_ctx (&ctx[memcached_num]); + if (r == -1) { + memc_log (&ctx[memcached_num], __LINE__, "memc_close_ctx_mirror: cannot close connection to server properly"); + ctx[memcached_num].alive = 0; + } + } + } + + return r; +} + + +const gchar * +memc_strerror (memc_error_t err) +{ + const gchar *p; + + switch (err) { + case OK: + p = "Ok"; + break; + case BAD_COMMAND: + p = "Bad command"; + break; + case CLIENT_ERROR: + p = "Client error"; + break; + case SERVER_ERROR: + p = "Server error"; + break; + case SERVER_TIMEOUT: + p = "Server timeout"; + break; + case NOT_EXISTS: + p = "Key not found"; + break; + case EXISTS: + p = "Key already exists"; + break; + case WRONG_LENGTH: + p = "Wrong result length"; + break; + default: + p = "Unknown error"; + break; + } + + return p; +} + +/* + * vi:ts=4 + */ diff --git a/src/libutil/memcached.h b/src/libutil/memcached.h new file mode 100644 index 000000000..098e26eea --- /dev/null +++ b/src/libutil/memcached.h @@ -0,0 +1,142 @@ +#ifndef MEMCACHED_H +#define MEMCACHED_H + +#include <sys/types.h> +#include <netinet/in.h> +#include <sys/time.h> +#include <time.h> + +#define MAXKEYLEN 250 + +#define MEMC_OPT_DEBUG 0x1 + +struct event; + +typedef enum memc_error { + OK, + BAD_COMMAND, + CLIENT_ERROR, + SERVER_ERROR, + SERVER_TIMEOUT, + NOT_EXISTS, + EXISTS, + WRONG_LENGTH +} memc_error_t; + +/* XXX: Only UDP_TEXT is supported at present */ +typedef enum memc_proto { + UDP_TEXT, + TCP_TEXT, + UDP_BIN, + TCP_BIN +} memc_proto_t; + +typedef enum memc_op { + CMD_NULL, + CMD_CONNECT, + CMD_READ, + CMD_WRITE, + CMD_DELETE, +} memc_opt_t; + +typedef struct memcached_param_s { + gchar key[MAXKEYLEN]; + u_char *buf; + size_t bufsize; + size_t bufpos; + gint expire; +} memcached_param_t; + + +/* Port must be in network byte order */ +typedef struct memcached_ctx_s { + memc_proto_t protocol; + struct in_addr addr; + guint16 port; + gint sock; + struct timeval timeout; + /* Counter that is used for memcached operations in network byte order */ + guint16 count; + /* Flag that signalize that this memcached is alive */ + short alive; + /* Options that can be specified for memcached connection */ + short options; + /* Current operation */ + memc_opt_t op; + /* Current command */ + const gchar *cmd; + /* Current param */ + memcached_param_t *param; + /* Callback for current operation */ + void (*callback) (struct memcached_ctx_s *ctx, memc_error_t error, void *data); + /* Data for callback function */ + void *callback_data; + /* Event structure */ + struct event mem_ev; +} memcached_ctx_t; + +typedef void (*memcached_callback_t) (memcached_ctx_t *ctx, memc_error_t error, void *data); + +/* + * Initialize connection to memcached server: + * addr, port and timeout fields in ctx must be filled with valid values + * Return: + * 0 - success + * -1 - error (error is stored in errno) + */ +gint memc_init_ctx (memcached_ctx_t *ctx); +gint memc_init_ctx_mirror (memcached_ctx_t *ctx, size_t memcached_num); +/* + * Memcached function for getting, setting, adding values to memcached server + * ctx - valid memcached context + * key - key to extract (max 250 characters as it specified in memcached API) + * buf, elemsize, nelem - allocated buffer of length nelem structures each of elemsize + * that would contain extracted data (NOT NULL TERMINATED) + * Return: + * memc_error_t + * nelem is changed according to actual number of extracted data + * + * "set" means "store this data". + * + * "add" means "store this data, but only if the server *doesn't* already + * hold data for this key". + + * "replace" means "store this data, but only if the server *does* + * already hold data for this key". + + * "append" means "add this data to an existing key after existing data". + + * "prepend" means "add this data to an existing key before existing data". + */ +#define memc_get(ctx, param) memc_read(ctx, "get", param) +#define memc_set(ctx, param, expire) memc_write(ctx, "set", param, expire) +#define memc_add(ctx, param, expire) memc_write(ctx, "add", param, expire) +#define memc_replace(ctx, param, expire) memc_write(ctx, "replace", param, expire) +#define memc_append(ctx, param, expire) memc_write(ctx, "append", param, expire) +#define memc_prepend(ctx, param, expire) memc_write(ctx, "prepend", param, expire) + +/* Functions that works with mirror of memcached servers */ +#define memc_get_mirror(ctx, num, param) memc_read_mirror(ctx, num, "get", param) +#define memc_set_mirror(ctx, num, param, expire) memc_write_mirror(ctx, num, "set", param, expire) +#define memc_add_mirror(ctx, num, param, expire) memc_write_mirror(ctx, num, "add", param, expire) +#define memc_replace_mirror(ctx, num, param, expire) memc_write_mirror(ctx, num, "replace", param, expire) +#define memc_append_mirror(ctx, num, param, expire) memc_write_mirror(ctx, num, "append", param, expire) +#define memc_prepend_mirror(ctx, num, param, expire) memc_write_mirror(ctx, num, "prepend", param, expire) + + +memc_error_t memc_read (memcached_ctx_t *ctx, const gchar *cmd, memcached_param_t *param); +memc_error_t memc_write (memcached_ctx_t *ctx, const gchar *cmd, memcached_param_t *param, gint expire); +memc_error_t memc_delete (memcached_ctx_t *ctx, memcached_param_t *params); + +memc_error_t memc_write_mirror (memcached_ctx_t *ctx, size_t memcached_num, const gchar *cmd, memcached_param_t *param, gint expire); +memc_error_t memc_read_mirror (memcached_ctx_t *ctx, size_t memcached_num, const gchar *cmd, memcached_param_t *param); +memc_error_t memc_delete_mirror (memcached_ctx_t *ctx, size_t memcached_num, const gchar *cmd, memcached_param_t *param); + +/* Return symbolic name of memcached error*/ +const gchar * memc_strerror (memc_error_t err); + +/* Destroy socket from ctx */ +gint memc_close_ctx (memcached_ctx_t *ctx); +gint memc_close_ctx_mirror (memcached_ctx_t *ctx, size_t memcached_num); + +#endif diff --git a/src/libutil/printf.c b/src/libutil/printf.c new file mode 100644 index 000000000..d72ec95c8 --- /dev/null +++ b/src/libutil/printf.c @@ -0,0 +1,635 @@ +/* Copyright (c) 2010, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "printf.h" +#include "fstring.h" +#include "main.h" + +/** + * From FreeBSD libutil code + */ +static const int maxscale = 6; + +static gchar * +rspamd_humanize_number (gchar *buf, gchar *last, gint64 num, gboolean bytes) +{ + const gchar *prefixes; + int i, r, remainder, sign; + gint64 divisor; + gsize baselen, len = last - buf; + + remainder = 0; + + baselen = 1; + if (!bytes) { + divisor = 1000; + prefixes = "\0\0\0k\0\0M\0\0G\0\0T\0\0P\0\0E"; + } + else { + divisor = 1024; + prefixes = "B\0\0k\0\0M\0\0G\0\0T\0\0P\0\0E"; + } + + +#define SCALE2PREFIX(scale) (&prefixes[(scale) * 3]) + + if (num < 0) { + sign = -1; + num = -num; + baselen += 2; /* sign, digit */ + } + else { + sign = 1; + baselen += 1; /* digit */ + } + + /* Check if enough room for `x y' + suffix + `\0' */ + if (len < baselen + 1) { + return buf; + } + + /* + * Divide the number until it fits the given column. + * If there will be an overflow by the rounding below, + * divide once more. + */ + for (i = 0; i < maxscale && num > divisor; i++) { + remainder = num % divisor; + num /= divisor; + } + + r = rspamd_snprintf (buf, len, "%L%s", + sign * (num + (remainder + 50) / 1000), + SCALE2PREFIX (i)); + +#undef SCALE2PREFIX + + return buf + r; +} + + +static gchar * +rspamd_sprintf_num (gchar *buf, gchar *last, guint64 ui64, gchar zero, + guint hexadecimal, guint width) +{ + gchar *p, temp[sizeof ("18446744073709551615")]; + size_t len; + guint32 ui32; + static gchar hex[] = "0123456789abcdef"; + static gchar HEX[] = "0123456789ABCDEF"; + + p = temp + sizeof(temp); + + if (hexadecimal == 0) { + + if (ui64 <= G_MAXUINT32) { + + /* + * To divide 64-bit numbers and to find remainders + * on the x86 platform gcc and icc call the libc functions + * [u]divdi3() and [u]moddi3(), they call another function + * in its turn. On FreeBSD it is the qdivrem() function, + * its source code is about 170 lines of the code. + * The glibc counterpart is about 150 lines of the code. + * + * For 32-bit numbers and some divisors gcc and icc use + * a inlined multiplication and shifts. For example, + * guint "i32 / 10" is compiled to + * + * (i32 * 0xCCCCCCCD) >> 35 + */ + + ui32 = (guint32) ui64; + + do { + *--p = (gchar) (ui32 % 10 + '0'); + } while (ui32 /= 10); + + } else { + do { + *--p = (gchar) (ui64 % 10 + '0'); + } while (ui64 /= 10); + } + + } else if (hexadecimal == 1) { + + do { + + /* the "(guint32)" cast disables the BCC's warning */ + *--p = hex[(guint32) (ui64 & 0xf)]; + + } while (ui64 >>= 4); + + } else { /* hexadecimal == 2 */ + + do { + + /* the "(guint32)" cast disables the BCC's warning */ + *--p = HEX[(guint32) (ui64 & 0xf)]; + + } while (ui64 >>= 4); + } + + /* zero or space padding */ + + len = (temp + sizeof (temp)) - p; + + while (len++ < width && buf < last) { + *buf++ = zero; + } + + /* number safe copy */ + + len = (temp + sizeof (temp)) - p; + + if (buf + len > last) { + len = last - buf; + } + + return ((gchar *)memcpy (buf, p, len)) + len; +} + +struct rspamd_printf_char_buf { + char *begin; + char *pos; + glong remain; +}; + +static glong +rspamd_printf_append_char (const gchar *buf, glong buflen, gpointer ud) +{ + struct rspamd_printf_char_buf *dst = (struct rspamd_printf_char_buf *)ud; + glong wr; + + if (dst->remain <= 0) { + return dst->remain; + } + + wr = MIN (dst->remain, buflen); + memcpy (dst->pos, buf, wr); + dst->remain -= wr; + dst->pos += wr; + + return wr; +} + +static glong +rspamd_printf_append_file (const gchar *buf, glong buflen, gpointer ud) +{ + FILE *dst = (FILE *)ud; + + return fwrite (buf, 1, buflen, dst); +} + +static glong +rspamd_printf_append_gstring (const gchar *buf, glong buflen, gpointer ud) +{ + GString *dst = (GString *)ud; + + g_string_append_len (dst, buf, buflen); + + return buflen; +} + +glong +rspamd_fprintf (FILE *f, const gchar *fmt, ...) +{ + va_list args; + glong r; + + va_start (args, fmt); + r = rspamd_vprintf_common (rspamd_printf_append_file, f, fmt, args); + va_end (args); + + return r; +} + +glong +rspamd_log_fprintf (FILE *f, const gchar *fmt, ...) +{ + va_list args; + glong r; + + va_start (args, fmt); + r = rspamd_vprintf_common (rspamd_printf_append_file, f, fmt, args); + va_end (args); + + fflush (f); + + return r; +} + + +glong +rspamd_snprintf (gchar *buf, glong max, const gchar *fmt, ...) +{ + gchar *r; + va_list args; + + va_start (args, fmt); + r = rspamd_vsnprintf (buf, max, fmt, args); + va_end (args); + + return (r - buf); +} + +gchar * +rspamd_vsnprintf (gchar *buf, glong max, const gchar *fmt, va_list args) +{ + struct rspamd_printf_char_buf dst; + + dst.begin = buf; + dst.pos = dst.begin; + dst.remain = max - 1; + (void)rspamd_vprintf_common (rspamd_printf_append_char, &dst, fmt, args); + *dst.pos = '\0'; + + return dst.pos; +} + +glong +rspamd_printf_gstring (GString *s, const gchar *fmt, ...) +{ + va_list args; + glong r; + + va_start (args, fmt); + r = rspamd_vprintf_common (rspamd_printf_append_gstring, s, fmt, args); + va_end (args); + + return r; +} + +#define RSPAMD_PRINTF_APPEND(buf, len) \ + do { \ + wr = func ((buf), (len), apd); \ + if (wr <= 0) { \ + goto oob; \ + } \ + written += wr; \ + fmt ++; \ + buf_start = fmt; \ + } while(0) + +glong +rspamd_vprintf_common (rspamd_printf_append_func func, gpointer apd, const gchar *fmt, va_list args) +{ + gchar zero, numbuf[G_ASCII_DTOSTR_BUF_SIZE], *p, *last, c; + const gchar *buf_start = fmt; + gint d; + long double f, scale; + glong written = 0, wr, slen; + gint64 i64; + guint64 ui64; + guint width, sign, hex, humanize, bytes, frac_width, i; + f_str_t *v; + GString *gs; + gboolean bv; + + while (*fmt) { + + /* + * "buf < last" means that we could copy at least one character: + * the plain character, "%%", "%c", and minus without the checking + */ + + if (*fmt == '%') { + + /* Append what we have in buf */ + if (fmt > buf_start) { + wr = func (buf_start, fmt - buf_start, apd); + if (wr <= 0) { + goto oob; + } + written += wr; + } + + i64 = 0; + ui64 = 0; + + zero = (gchar) ((*++fmt == '0') ? '0' : ' '); + width = 0; + sign = 1; + hex = 0; + bytes = 0; + humanize = 0; + frac_width = 0; + slen = -1; + + while (*fmt >= '0' && *fmt <= '9') { + width = width * 10 + *fmt++ - '0'; + } + + + for ( ;; ) { + switch (*fmt) { + + case 'u': + sign = 0; + fmt++; + continue; + + case 'm': + fmt++; + continue; + + case 'X': + hex = 2; + sign = 0; + fmt++; + continue; + + case 'x': + hex = 1; + sign = 0; + fmt++; + continue; + case 'H': + humanize = 1; + bytes = 1; + sign = 0; + fmt ++; + continue; + case 'h': + humanize = 1; + sign = 0; + fmt ++; + continue; + case '.': + fmt++; + + while (*fmt >= '0' && *fmt <= '9') { + frac_width = frac_width * 10 + *fmt++ - '0'; + } + + break; + + case '*': + d = (gint)va_arg (args, gint); + if (G_UNLIKELY (d < 0)) { + msg_err ("critical error: size is less than 0"); + return 0; + } + slen = (glong)d; + fmt++; + continue; + + default: + break; + } + + break; + } + + + switch (*fmt) { + + case 'V': + v = va_arg (args, f_str_t *); + RSPAMD_PRINTF_APPEND (v->begin, v->len); + + continue; + + case 'v': + gs = va_arg (args, GString *); + RSPAMD_PRINTF_APPEND (gs->str, gs->len); + + continue; + + case 's': + p = va_arg (args, gchar *); + if (p == NULL) { + p = "(NULL)"; + } + + if (slen == -1) { + /* NULL terminated string */ + slen = strlen (p); + } + + RSPAMD_PRINTF_APPEND (p, slen); + + continue; + + case 'O': + i64 = (gint64) va_arg (args, off_t); + sign = 1; + break; + + case 'P': + i64 = (gint64) va_arg (args, pid_t); + sign = 1; + break; + + case 'T': + i64 = (gint64) va_arg (args, time_t); + sign = 1; + break; + + case 'z': + if (sign) { + i64 = (gint64) va_arg (args, ssize_t); + } else { + ui64 = (guint64) va_arg (args, size_t); + } + break; + + case 'd': + if (sign) { + i64 = (gint64) va_arg (args, gint); + } else { + ui64 = (guint64) va_arg (args, guint); + } + break; + + case 'l': + if (sign) { + i64 = (gint64) va_arg(args, glong); + } else { + ui64 = (guint64) va_arg(args, gulong); + } + break; + + case 'D': + if (sign) { + i64 = (gint64) va_arg(args, gint32); + } else { + ui64 = (guint64) va_arg(args, guint32); + } + break; + + case 'L': + if (sign) { + i64 = va_arg (args, gint64); + } else { + ui64 = va_arg (args, guint64); + } + break; + + + case 'f': + case 'F': + if (*fmt == 'f') { + f = (long double) va_arg (args, double); + } + else { + f = (long double) va_arg (args, long double); + } + p = numbuf; + last = p + sizeof (numbuf); + if (f < 0) { + *p++ = '-'; + f = -f; + } + + ui64 = (gint64) f; + + p = rspamd_sprintf_num (p, last, ui64, zero, 0, width); + + if (frac_width) { + + if (p < last) { + *p++ = '.'; + } + + scale = 1.0; + + for (i = 0; i < frac_width; i++) { + scale *= 10.0; + } + + /* + * (gint64) cast is required for msvc6: + * it can not convert guint64 to double + */ + ui64 = (guint64) ((f - (gint64) ui64) * scale); + + p = rspamd_sprintf_num (p, last, ui64, '0', 0, frac_width); + } + + slen = p - numbuf; + RSPAMD_PRINTF_APPEND (numbuf, slen); + + continue; + + case 'g': + case 'G': + if (*fmt == 'g') { + f = (long double) va_arg (args, double); + } + else { + f = (long double) va_arg (args, long double); + } + + g_ascii_formatd (numbuf, sizeof (numbuf), "%g", (double)f); + slen = strlen (numbuf); + RSPAMD_PRINTF_APPEND (numbuf, slen); + + continue; + + case 'b': + bv = (gboolean) va_arg (args, double); + RSPAMD_PRINTF_APPEND (bv ? "true" : "false", bv ? 4 : 5); + + continue; + + case 'p': + ui64 = (uintptr_t) va_arg (args, void *); + hex = 2; + sign = 0; + zero = '0'; + width = sizeof (void *) * 2; + break; + + case 'c': + c = va_arg (args, gint); + c &= 0xff; + RSPAMD_PRINTF_APPEND (&c, 1); + + continue; + + case 'Z': + c = '\0'; + RSPAMD_PRINTF_APPEND (&c, 1); + + continue; + + case 'N': + c = LF; + RSPAMD_PRINTF_APPEND (&c, 1); + + continue; + + case '%': + c = '%'; + RSPAMD_PRINTF_APPEND (&c, 1); + + continue; + + default: + c = *fmt; + RSPAMD_PRINTF_APPEND (&c, 1); + + continue; + } + + /* Print number */ + p = numbuf; + last = p + sizeof (numbuf); + if (sign) { + if (i64 < 0) { + *p++ = '-'; + ui64 = (guint64) -i64; + + } else { + ui64 = (guint64) i64; + } + } + + if (!humanize) { + p = rspamd_sprintf_num (p, last, ui64, zero, hex, width); + } + else { + p = rspamd_humanize_number (p, last, ui64, bytes); + } + slen = p - numbuf; + RSPAMD_PRINTF_APPEND (numbuf, slen); + + } else { + fmt++; + } + } + + /* Finish buffer */ + if (fmt > buf_start) { + wr = func (buf_start, fmt - buf_start, apd); + if (wr <= 0) { + goto oob; + } + written += wr; + } + +oob: + return written; +} + diff --git a/src/libutil/printf.h b/src/libutil/printf.h new file mode 100644 index 000000000..a4e03791d --- /dev/null +++ b/src/libutil/printf.h @@ -0,0 +1,75 @@ +/* Copyright (c) 2010, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Rambler BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +#ifndef PRINTF_H_ +#define PRINTF_H_ + +#include "config.h" + +/* + * supported formats: + * %[0][width][x][X]O off_t + * %[0][width]T time_t + * %[0][width][u][x|X|h|H]z ssize_t/size_t + * %[0][width][u][x|X|h|H]d gint/guint + * %[0][width][u][x|X|h|H]l long + * %[0][width][u][x|X|h|H]D gint32/guint32 + * %[0][width][u][x|X|h|H]L gint64/guint64 + * %[0][width][.width]f double + * %[0][width][.width]F long double + * %[0][width][.width]g double + * %[0][width][.width]G long double + * %b boolean (true or false) + * %P pid_t + * %r rlim_t + * %p void * + * %V f_str_t * + * %v GString * + * %s null-terminated string + * %*s length and string + * %Z '\0' + * %N '\n' + * %c gchar + * %% % + * + */ + +/** + * Callback used for common printf operations + * @param buf buffer to append + * @param buflen lenght of the buffer + * @param ud opaque pointer + * @return number of characters written + */ +typedef glong (*rspamd_printf_append_func)(const gchar *buf, glong buflen, gpointer ud); + +glong rspamd_fprintf (FILE *f, const gchar *fmt, ...); +glong rspamd_log_fprintf (FILE *f, const gchar *fmt, ...); +glong rspamd_snprintf (gchar *buf, glong max, const gchar *fmt, ...); +gchar *rspamd_vsnprintf (gchar *buf, glong max, const gchar *fmt, va_list args); +glong rspamd_printf_gstring (GString *s, const gchar *fmt, ...); + +glong rspamd_vprintf_common (rspamd_printf_append_func func, gpointer apd, const gchar *fmt, va_list args); + +#endif /* PRINTF_H_ */ diff --git a/src/libutil/radix.c b/src/libutil/radix.c new file mode 100644 index 000000000..1a05db178 --- /dev/null +++ b/src/libutil/radix.c @@ -0,0 +1,311 @@ +/* + * Copyright (c) 2009-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +#include "config.h" +#include "radix.h" +#include "mem_pool.h" + +static void *radix_alloc (radix_tree_t * tree); + +radix_tree_t * +radix_tree_create (void) +{ + radix_tree_t *tree; + + tree = g_malloc (sizeof (radix_tree_t)); + if (tree == NULL) { + return NULL; + } + + tree->pool = rspamd_mempool_new (rspamd_mempool_suggest_size ()); + tree->size = 0; + + tree->root = radix_alloc (tree); + if (tree->root == NULL) { + return NULL; + } + + tree->root->right = NULL; + tree->root->left = NULL; + tree->root->parent = NULL; + tree->root->value = RADIX_NO_VALUE; + + return tree; +} + +enum radix_insert_type { + RADIX_INSERT, + RADIX_ADD, + RADIX_REPLACE +}; + +static uintptr_t +radix32tree_insert_common (radix_tree_t * tree, guint32 key, guint32 mask, uintptr_t value, enum radix_insert_type type) +{ + guint32 bit; + radix_node_t *node, *next; + + bit = 0x80000000; + + node = tree->root; + next = tree->root; + /* Find a place in trie to insert */ + while (bit & mask) { + if (key & bit) { + next = node->right; + } + else { + next = node->left; + } + + if (next == NULL) { + break; + } + + bit >>= 1; + node = next; + } + + if (next) { + if (node->value != RADIX_NO_VALUE) { + /* Value was found, switch on insert type */ + switch (type) { + case RADIX_INSERT: + return 1; + case RADIX_ADD: + node->value += value; + return value; + case RADIX_REPLACE: + node->value = value; + return 1; + } + } + + node->value = value; + node->key = key; + return 0; + } + /* Inserting value in trie creating all path components */ + while (bit & mask) { + next = radix_alloc (tree); + if (next == NULL) { + return -1; + } + + next->right = NULL; + next->left = NULL; + next->parent = node; + next->value = RADIX_NO_VALUE; + + if (key & bit) { + node->right = next; + + } + else { + node->left = next; + } + + bit >>= 1; + node = next; + } + + node->value = value; + node->key = key; + + return 0; +} + +gint +radix32tree_insert (radix_tree_t *tree, guint32 key, guint32 mask, uintptr_t value) +{ + return (gint)radix32tree_insert_common (tree, key, mask, value, RADIX_INSERT); +} + +uintptr_t +radix32tree_add (radix_tree_t *tree, guint32 key, guint32 mask, uintptr_t value) +{ + return radix32tree_insert_common (tree, key, mask, value, RADIX_ADD); +} + +gint +radix32tree_replace (radix_tree_t *tree, guint32 key, guint32 mask, uintptr_t value) +{ + return (gint)radix32tree_insert_common (tree, key, mask, value, RADIX_REPLACE); +} + +/* + * per recursion step: + * ptr + ptr + ptr + gint = 4 words + * result = 1 word + * 5 words total in stack + */ +static gboolean +radix_recurse_nodes (radix_node_t *node, radix_tree_traverse_func func, void *user_data, gint level) +{ + if (node->left) { + if (radix_recurse_nodes (node->left, func, user_data, level + 1)) { + return TRUE; + } + } + + if (node->value != RADIX_NO_VALUE) { + if (func (node->key, level, node->value, user_data)) { + return TRUE; + } + } + + if (node->right) { + if (radix_recurse_nodes (node->right, func, user_data, level + 1)) { + return TRUE; + } + } + + return FALSE; +} + +void +radix32tree_traverse (radix_tree_t *tree, radix_tree_traverse_func func, void *user_data) +{ + radix_recurse_nodes (tree->root, func, user_data, 0); +} + + +gint +radix32tree_delete (radix_tree_t * tree, guint32 key, guint32 mask) +{ + guint32 bit; + radix_node_t *node; + + bit = 0x80000000; + node = tree->root; + + while (node && (bit & mask)) { + if (key & bit) { + node = node->right; + + } + else { + node = node->left; + } + + bit >>= 1; + } + + if (node == NULL || node->parent == NULL) { + return -1; + } + + if (node->right || node->left) { + if (node->value != RADIX_NO_VALUE) { + node->value = RADIX_NO_VALUE; + return 0; + } + + return -1; + } + + for (;;) { + if (node->parent->right == node) { + node->parent->right = NULL; + + } + else { + node->parent->left = NULL; + } + + node = node->parent; + + if (node->right || node->left) { + break; + } + + if (node->value != RADIX_NO_VALUE) { + break; + } + + if (node->parent == NULL) { + break; + } + } + + return 0; +} + + +uintptr_t +radix32tree_find (radix_tree_t * tree, guint32 key) +{ + guint32 bit; + uintptr_t value; + radix_node_t *node; + + bit = 0x80000000; + value = RADIX_NO_VALUE; + node = tree->root; + + while (node) { + if (node->value != RADIX_NO_VALUE) { + value = node->value; + } + + if (key & bit) { + node = node->right; + + } + else { + node = node->left; + } + + bit >>= 1; + } + + return value; +} + + +static void * +radix_alloc (radix_tree_t * tree) +{ + gchar *p; + + p = rspamd_mempool_alloc (tree->pool, sizeof (radix_node_t)); + + tree->size += sizeof (radix_node_t); + + return p; +} + +void +radix_tree_free (radix_tree_t * tree) +{ + + g_return_if_fail (tree != NULL); + rspamd_mempool_delete (tree->pool); + g_free (tree); +} + +/* + * vi:ts=4 + */ diff --git a/src/libutil/radix.h b/src/libutil/radix.h new file mode 100644 index 000000000..4cc2873c7 --- /dev/null +++ b/src/libutil/radix.h @@ -0,0 +1,82 @@ +#ifndef RADIX_H +#define RADIX_H + +#include "config.h" +#include "mem_pool.h" + +#define RADIX_NO_VALUE (uintptr_t)-1 + +typedef struct radix_node_s radix_node_t; + +struct radix_node_s { + radix_node_t *right; + radix_node_t *left; + radix_node_t *parent; + uintptr_t value; + guint32 key; +}; + + +typedef struct { + radix_node_t *root; + size_t size; + rspamd_mempool_t *pool; +} radix_tree_t; + +typedef gboolean (*radix_tree_traverse_func)(guint32 key, guint32 mask, uintptr_t value, void *user_data); + +/** + * Create new radix tree + */ +radix_tree_t *radix_tree_create (void); + +/** + * Insert value to radix tree + * returns: 1 if value already exists + * 0 if operation was successfull + * -1 if there was some error + */ +gint radix32tree_insert (radix_tree_t *tree, guint32 key, guint32 mask, uintptr_t value); + +/** + * Add value to radix tree or insert it if value does not exists + * returns: value if value already exists and was added + * 0 if value was inserted + * -1 if there was some error + */ +uintptr_t radix32tree_add (radix_tree_t *tree, guint32 key, guint32 mask, uintptr_t value); + +/** + * Replace value in radix tree or insert it if value does not exists + * returns: 1 if value already exists and was replaced + * 0 if value was inserted + * -1 if there was some error + */ +gint radix32tree_replace (radix_tree_t *tree, guint32 key, guint32 mask, uintptr_t value); + +/** + * Delete value from radix tree + * returns: 1 if value does not exist + * 0 if value was deleted + * -1 if there was some error + */ +gint radix32tree_delete (radix_tree_t *tree, guint32 key, guint32 mask); + +/** + * Find value in radix tree + * returns: value if value was found + * RADIX_NO_VALUE if value was not found + */ +uintptr_t radix32tree_find (radix_tree_t *tree, guint32 key); + +/** + * Traverse via the whole tree calling specified callback + */ +void radix32tree_traverse (radix_tree_t *tree, radix_tree_traverse_func func, void *user_data); + +/** + * Frees radix tree + */ +void radix_tree_free (radix_tree_t *tree); + +#endif diff --git a/src/libutil/rrd.c b/src/libutil/rrd.c new file mode 100644 index 000000000..a0e21eaed --- /dev/null +++ b/src/libutil/rrd.c @@ -0,0 +1,1015 @@ +/* Copyright (c) 2010-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "rrd.h" +#include "util.h" + +static GQuark +rrd_error_quark (void) +{ + return g_quark_from_static_string ("rrd-error"); +} + +/** + * Convert rrd dst type from string to numeric value + */ +enum rrd_dst_type +rrd_dst_from_string (const gchar *str) +{ + if (g_ascii_strcasecmp (str, "counter") == 0) { + return RRD_DST_COUNTER; + } + else if (g_ascii_strcasecmp (str, "absolute") == 0) { + return RRD_DST_ABSOLUTE; + } + else if (g_ascii_strcasecmp (str, "gauge") == 0) { + return RRD_DST_GAUGE; + } + else if (g_ascii_strcasecmp (str, "cdef") == 0) { + return RRD_DST_CDEF; + } + else if (g_ascii_strcasecmp (str, "derive") == 0) { + return RRD_DST_DERIVE; + } + return -1; +} + +/** + * Convert numeric presentation of dst to string + */ +const gchar* +rrd_dst_to_string (enum rrd_dst_type type) +{ + switch (type) { + case RRD_DST_COUNTER: + return "COUNTER"; + case RRD_DST_ABSOLUTE: + return "ABSOLUTE"; + case RRD_DST_GAUGE: + return "GAUGE"; + case RRD_DST_CDEF: + return "CDEF"; + case RRD_DST_DERIVE: + return "DERIVE"; + default: + return "U"; + } + + return "U"; +} + +/** + * Convert rrd consolidation function type from string to numeric value + */ +enum rrd_cf_type +rrd_cf_from_string (const gchar *str) +{ + if (g_ascii_strcasecmp (str, "average") == 0) { + return RRD_CF_AVERAGE; + } + else if (g_ascii_strcasecmp (str, "minimum") == 0) { + return RRD_CF_MINIMUM; + } + else if (g_ascii_strcasecmp (str, "maximum") == 0) { + return RRD_CF_MAXIMUM; + } + else if (g_ascii_strcasecmp (str, "last") == 0) { + return RRD_CF_LAST; + } + /* XXX: add other CF functions supported by rrd */ + + return -1; +} + +/** + * Convert numeric presentation of cf to string + */ +const gchar* +rrd_cf_to_string (enum rrd_cf_type type) +{ + switch (type) { + case RRD_CF_AVERAGE: + return "AVERAGE"; + case RRD_CF_MINIMUM: + return "MINIMUM"; + case RRD_CF_MAXIMUM: + return "MAXIMUM"; + case RRD_CF_LAST: + return "LAST"; + default: + return "U"; + } + + /* XXX: add other CF functions supported by rrd */ + + return "U"; +} + +void +rrd_make_default_rra (const gchar *cf_name, gulong pdp_cnt, gulong rows, struct rrd_rra_def *rra) +{ + rra->pdp_cnt = pdp_cnt; + rra->row_cnt = rows; + rspamd_strlcpy (rra->cf_nam, cf_name, sizeof (rra->cf_nam)); + memset (rra->par, 0, sizeof (rra->par)); + rra->par[RRA_cdp_xff_val].dv = 0.5; +} + +void +rrd_make_default_ds (const gchar *name, gulong pdp_step, struct rrd_ds_def *ds) +{ + rspamd_strlcpy (ds->ds_nam, name, sizeof (ds->ds_nam)); + rspamd_strlcpy (ds->dst, "COUNTER", sizeof (ds->dst)); + memset (ds->par, 0, sizeof (ds->par)); + ds->par[RRD_DS_mrhb_cnt].lv = pdp_step * 2; + ds->par[RRD_DS_min_val].dv = NAN; + ds->par[RRD_DS_max_val].dv = NAN; +} + +/** + * Check rrd file for correctness (size, cookies, etc) + */ +static gboolean +rspamd_rrd_check_file (const gchar *filename, gboolean need_data, GError **err) +{ + gint fd, i; + struct stat st; + struct rrd_file_head head; + struct rrd_rra_def rra; + gint head_size; + + fd = open (filename, O_RDWR); + if (fd == -1) { + g_set_error (err, rrd_error_quark (), errno, "rrd open error: %s", strerror (errno)); + return FALSE; + } + + if (fstat (fd, &st) == -1) { + g_set_error (err, rrd_error_quark (), errno, "rrd stat error: %s", strerror (errno)); + close (fd); + return FALSE; + } + if (st.st_size < (goffset)sizeof (struct rrd_file_head)) { + /* We have trimmed file */ + g_set_error (err, rrd_error_quark (), EINVAL, "rrd size is bad: %ud", (guint)st.st_size); + close (fd); + return FALSE; + } + + /* Try to read header */ + if (read (fd, &head, sizeof (head)) != sizeof (head)) { + g_set_error (err, rrd_error_quark (), errno, "rrd read head error: %s", strerror (errno)); + close (fd); + return FALSE; + } + /* Check magic */ + if (memcmp (head.cookie, RRD_COOKIE, sizeof (head.cookie)) != 0 || + memcmp (head.version, RRD_VERSION, sizeof (head.version)) != 0 || + head.float_cookie != RRD_FLOAT_COOKIE) { + g_set_error (err, rrd_error_quark (), EINVAL, "rrd head cookies error: %s", strerror (errno)); + close (fd); + return FALSE; + } + /* Check for other params */ + if (head.ds_cnt <= 0 || head.rra_cnt <= 0) { + g_set_error (err, rrd_error_quark (), EINVAL, "rrd head cookies error: %s", strerror (errno)); + close (fd); + return FALSE; + } + /* Now we can calculate the overall size of rrd */ + head_size = sizeof (struct rrd_file_head) + + sizeof (struct rrd_ds_def) * head.ds_cnt + + sizeof (struct rrd_rra_def) * head.rra_cnt + + sizeof (struct rrd_live_head) + + sizeof (struct rrd_pdp_prep) * head.ds_cnt + + sizeof (struct rrd_cdp_prep) * head.ds_cnt * head.rra_cnt + + sizeof (struct rrd_rra_ptr) * head.rra_cnt; + if (st.st_size < (goffset)head_size) { + g_set_error (err, rrd_error_quark (), errno, "rrd file seems to have stripped header: %d", head_size); + close (fd); + return FALSE; + } + + if (need_data) { + /* Now check rra */ + if (lseek (fd, sizeof (struct rrd_ds_def) * head.ds_cnt, SEEK_CUR) == -1) { + g_set_error (err, rrd_error_quark (), errno, "rrd head lseek error: %s", strerror (errno)); + close (fd); + return FALSE; + } + for (i = 0; i < (gint)head.rra_cnt; i ++) { + if (read (fd, &rra, sizeof (rra)) != sizeof (rra)) { + g_set_error (err, rrd_error_quark (), errno, "rrd read rra error: %s", strerror (errno)); + close (fd); + return FALSE; + } + head_size += rra.row_cnt * head.ds_cnt * sizeof (gdouble); + } + + if (st.st_size != head_size) { + g_set_error (err, rrd_error_quark (), EINVAL, "rrd file seems to have incorrect size: %d, must be %d", (gint)st.st_size, head_size); + close (fd); + return FALSE; + } + } + + close (fd); + return TRUE; +} + +/** + * Adjust pointers in mmapped rrd file + * @param file + */ +static void +rspamd_rrd_adjust_pointers (struct rspamd_rrd_file *file, gboolean completed) +{ + guint8 *ptr; + + ptr = file->map; + file->stat_head = (struct rrd_file_head *)ptr; + ptr += sizeof (struct rrd_file_head); + file->ds_def = (struct rrd_ds_def *)ptr; + ptr += sizeof (struct rrd_ds_def) * file->stat_head->ds_cnt; + file->rra_def = (struct rrd_rra_def *)ptr; + ptr += sizeof (struct rrd_rra_def) * file->stat_head->rra_cnt; + file->live_head = (struct rrd_live_head *)ptr; + ptr += sizeof (struct rrd_live_head); + file->pdp_prep = (struct rrd_pdp_prep *)ptr; + ptr += sizeof (struct rrd_pdp_prep) * file->stat_head->ds_cnt; + file->cdp_prep = (struct rrd_cdp_prep *)ptr; + ptr += sizeof (struct rrd_cdp_prep) * file->stat_head->rra_cnt * file->stat_head->ds_cnt; + file->rra_ptr = (struct rrd_rra_ptr *)ptr; + if (completed) { + ptr += sizeof (struct rrd_rra_ptr) * file->stat_head->rra_cnt; + file->rrd_value = (gdouble *)ptr; + } + else { + file->rrd_value = NULL; + } +} + +/** + * Open completed or incompleted rrd file + * @param filename + * @param completed + * @param err + * @return + */ +static struct rspamd_rrd_file* +rspamd_rrd_open_common (const gchar *filename, gboolean completed, GError **err) +{ + struct rspamd_rrd_file *new; + gint fd; + struct stat st; + + if (!rspamd_rrd_check_file (filename, completed, err)) { + return NULL; + } + + new = g_slice_alloc0 (sizeof (struct rspamd_rrd_file)); + + if (new == NULL) { + g_set_error (err, rrd_error_quark (), ENOMEM, "not enough memory"); + return NULL; + } + + /* Open file */ + fd = open (filename, O_RDWR); + if (fd == -1) { + g_set_error (err, rrd_error_quark (), errno, "rrd open error: %s", strerror (errno)); + return FALSE; + } + + if (fstat (fd, &st) == -1) { + g_set_error (err, rrd_error_quark (), errno, "rrd stat error: %s", strerror (errno)); + close (fd); + return FALSE; + } + /* Mmap file */ + new->size = st.st_size; + if ((new->map = mmap (NULL, st.st_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0)) == MAP_FAILED) { + close (fd); + g_set_error (err, rrd_error_quark (), ENOMEM, "mmap failed: %s", strerror (errno)); + g_slice_free1 (sizeof (struct rspamd_rrd_file), new); + return NULL; + } + + close (fd); + + /* Adjust pointers */ + rspamd_rrd_adjust_pointers (new, completed); + + /* Mark it as finalized */ + new->finalized = completed; + + new->filename = g_strdup (filename); + + return new; +} + +/** + * Open (and mmap) existing RRD file + * @param filename path + * @param err error pointer + * @return rrd file structure + */ +struct rspamd_rrd_file* +rspamd_rrd_open (const gchar *filename, GError **err) +{ + return rspamd_rrd_open_common (filename, TRUE, err); +} + +/** + * Create basic header for rrd file + * @param filename file path + * @param ds_count number of data sources + * @param rra_count number of round robin archives + * @param pdp_step step of primary data points + * @param err error pointer + * @return TRUE if file has been created + */ +struct rspamd_rrd_file* +rspamd_rrd_create (const gchar *filename, gulong ds_count, gulong rra_count, gulong pdp_step, GError **err) +{ + struct rspamd_rrd_file *new; + struct rrd_file_head head; + struct rrd_ds_def ds; + struct rrd_rra_def rra; + struct rrd_live_head lh; + struct rrd_pdp_prep pdp; + struct rrd_cdp_prep cdp; + struct rrd_rra_ptr rra_ptr; + gint fd; + guint i, j; + struct timeval tv; + + /* Open file */ + fd = open (filename, O_RDWR | O_CREAT | O_TRUNC, 0644); + if (fd == -1) { + g_set_error (err, rrd_error_quark (), errno, "rrd create error: %s", strerror (errno)); + return NULL; + } + + /* Fill header */ + memset (&head, 0, sizeof (head)); + head.rra_cnt = rra_count; + head.ds_cnt = ds_count; + head.pdp_step = pdp_step; + memcpy (head.cookie, RRD_COOKIE, sizeof (head.cookie)); + memcpy (head.version, RRD_VERSION, sizeof (head.version)); + head.float_cookie = RRD_FLOAT_COOKIE; + + if (write (fd, &head, sizeof (head)) != sizeof (head)) { + close (fd); + g_set_error (err, rrd_error_quark (), errno, "rrd write error: %s", strerror (errno)); + return NULL; + } + + /* Fill DS section */ + memset (&ds.ds_nam, 0, sizeof (ds.ds_nam)); + memcpy (&ds.dst, "COUNTER", sizeof ("COUNTER")); + memset (&ds.par, 0, sizeof (ds.par)); + for (i = 0; i < ds_count; i ++) { + if (write (fd, &ds, sizeof (ds)) != sizeof (ds)) { + close (fd); + g_set_error (err, rrd_error_quark (), errno, "rrd write error: %s", strerror (errno)); + return NULL; + } + } + + /* Fill RRA section */ + memcpy (&rra.cf_nam, "AVERAGE", sizeof ("AVERAGE")); + rra.pdp_cnt = 1; + memset (&rra.par, 0, sizeof (rra.par)); + for (i = 0; i < rra_count; i ++) { + if (write (fd, &rra, sizeof (rra)) != sizeof (rra)) { + close (fd); + g_set_error (err, rrd_error_quark (), errno, "rrd write error: %s", strerror (errno)); + return NULL; + } + } + + /* Fill live header */ + gettimeofday (&tv, NULL); + lh.last_up = tv.tv_sec; + lh.last_up_usec = tv.tv_usec; + + if (write (fd, &lh, sizeof (lh)) != sizeof (lh)) { + close (fd); + g_set_error (err, rrd_error_quark (), errno, "rrd write error: %s", strerror (errno)); + return NULL; + } + + /* Fill pdp prep */ + memcpy (&pdp.last_ds, "U", sizeof ("U")); + memset (&pdp.scratch, 0, sizeof (pdp.scratch)); + pdp.scratch[PDP_val].dv = 0.; + pdp.scratch[PDP_unkn_sec_cnt].lv = 0; + for (i = 0; i < ds_count; i ++) { + if (write (fd, &pdp, sizeof (pdp)) != sizeof (pdp)) { + close (fd); + g_set_error (err, rrd_error_quark (), errno, "rrd write error: %s", strerror (errno)); + return NULL; + } + } + + /* Fill cdp prep */ + memset (&cdp.scratch, 0, sizeof (cdp.scratch)); + cdp.scratch[CDP_val].dv = NAN; + for (i = 0; i < rra_count; i ++) { + cdp.scratch[CDP_unkn_pdp_cnt].lv = 0; + for (j = 0; j < ds_count; j ++) { + if (write (fd, &cdp, sizeof (cdp)) != sizeof (cdp)) { + close (fd); + g_set_error (err, rrd_error_quark (), errno, "rrd write error: %s", strerror (errno)); + return NULL; + } + } + } + + /* Set row pointers */ + memset (&rra_ptr, 0, sizeof (rra_ptr)); + for (i = 0; i < rra_count; i ++) { + if (write (fd, &rra_ptr, sizeof (rra_ptr)) != sizeof (rra_ptr)) { + close (fd); + g_set_error (err, rrd_error_quark (), errno, "rrd write error: %s", strerror (errno)); + return NULL; + } + } + + close (fd); + new = rspamd_rrd_open_common (filename, FALSE, err); + + return new; +} + +/** + * Add data sources to rrd file + * @param filename path to file + * @param ds array of struct rrd_ds_def + * @param err error pointer + * @return TRUE if data sources were added + */ +gboolean +rspamd_rrd_add_ds (struct rspamd_rrd_file *file, GArray *ds, GError **err) +{ + + if (file == NULL || file->stat_head->ds_cnt * sizeof (struct rrd_ds_def) != ds->len) { + g_set_error (err, rrd_error_quark (), EINVAL, "rrd add ds failed: wrong arguments"); + return FALSE; + } + + /* Straightforward memcpy */ + memcpy (file->ds_def, ds->data, ds->len); + + return TRUE; +} + +/** + * Add round robin archives to rrd file + * @param filename path to file + * @param ds array of struct rrd_rra_def + * @param err error pointer + * @return TRUE if archives were added + */ +gboolean +rspamd_rrd_add_rra (struct rspamd_rrd_file *file, GArray *rra, GError **err) +{ + if (file == NULL || file->stat_head->rra_cnt * sizeof (struct rrd_rra_def) != rra->len) { + g_set_error (err, rrd_error_quark (), EINVAL, "rrd add rra failed: wrong arguments"); + return FALSE; + } + + /* Straightforward memcpy */ + memcpy (file->rra_def, rra->data, rra->len); + + return TRUE; +} + +/** + * Finalize rrd file header and initialize all RRA in the file + * @param filename file path + * @param err error pointer + * @return TRUE if rrd file is ready for use + */ +gboolean +rspamd_rrd_finalize (struct rspamd_rrd_file *file, GError **err) +{ + gint fd; + guint i; + gint count = 0; + gdouble vbuf[1024]; + struct stat st; + + if (file == NULL || file->filename == NULL) { + g_set_error (err, rrd_error_quark (), EINVAL, "rrd add rra failed: wrong arguments"); + return FALSE; + } + + fd = open (file->filename, O_RDWR); + if (fd == -1) { + g_set_error (err, rrd_error_quark (), errno, "rrd open error: %s", strerror (errno)); + return FALSE; + } + + if (lseek (fd, 0, SEEK_END) == -1) { + g_set_error (err, rrd_error_quark (), errno, "rrd seek error: %s", strerror (errno)); + close (fd); + return FALSE; + } + + /* Adjust CDP */ + for (i = 0; i < file->stat_head->rra_cnt; i ++) { + file->cdp_prep->scratch[CDP_unkn_pdp_cnt].lv = 0; + /* Randomize row pointer */ + file->rra_ptr->cur_row = g_random_int () % file->rra_def[i].row_cnt; + /* Calculate values count */ + count += file->rra_def[i].row_cnt * file->stat_head->ds_cnt; + } + + munmap (file->map, file->size); + /* Write values */ + for (i = 0; i < G_N_ELEMENTS (vbuf); i ++) { + vbuf[i] = NAN; + } + + while (count > 0) { + /* Write values in buffered matter */ + if (write (fd, vbuf, MIN ((gint)G_N_ELEMENTS (vbuf), count) * sizeof (gdouble)) == -1) { + g_set_error (err, rrd_error_quark (), errno, "rrd write error: %s", strerror (errno)); + close (fd); + return FALSE; + } + count -= G_N_ELEMENTS (vbuf); + } + + if (fstat (fd, &st) == -1) { + g_set_error (err, rrd_error_quark (), errno, "rrd stat error: %s", strerror (errno)); + close (fd); + return FALSE; + } + + /* Mmap again */ + file->size = st.st_size; + if ((file->map = mmap (NULL, st.st_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0)) == MAP_FAILED) { + close (fd); + g_set_error (err, rrd_error_quark (), ENOMEM, "mmap failed: %s", strerror (errno)); + g_slice_free1 (sizeof (struct rspamd_rrd_file), file); + return FALSE; + } + close (fd); + /* Adjust pointers */ + rspamd_rrd_adjust_pointers (file, TRUE); + + file->finalized = TRUE; + + return TRUE; +} + +/** + * Update pdp_prep data + * @param file rrd file + * @param vals new values + * @param pdp_new new pdp array + * @param interval time elapsed from the last update + * @return + */ +static gboolean +rspamd_rrd_update_pdp_prep (struct rspamd_rrd_file *file, gdouble *vals, gdouble *pdp_new, gdouble interval) +{ + guint i; + enum rrd_dst_type type; + + for (i = 0; i < file->stat_head->ds_cnt; i ++) { + type = rrd_dst_from_string (file->ds_def[i].dst); + + if (file->ds_def[i].par[RRD_DS_mrhb_cnt].lv < interval) { + rspamd_strlcpy (file->pdp_prep[i].last_ds, "U", sizeof (file->pdp_prep[i].last_ds)); + } + + if (file->ds_def[i].par[RRD_DS_mrhb_cnt].lv >= interval) { + switch (type) { + case RRD_DST_COUNTER: + case RRD_DST_DERIVE: + if (file->pdp_prep[i].last_ds[0] == 'U') { + pdp_new[i] = NAN; + } + else { + pdp_new[i] = vals[i] - strtod (file->pdp_prep[i].last_ds, NULL); + } + break; + case RRD_DST_GAUGE: + pdp_new[i] = vals[i] * interval; + break; + case RRD_DST_ABSOLUTE: + pdp_new[i] = vals[i]; + break; + default: + return FALSE; + } + } + else { + pdp_new[i] = NAN; + } + /* Copy value to the last_ds */ + if (!isnan (vals[i])) { + rspamd_snprintf (file->pdp_prep[i].last_ds, sizeof (file->pdp_prep[i].last_ds), "%.4f", vals[i]); + } + else { + file->pdp_prep[i].last_ds[0] = 'U'; + file->pdp_prep[i].last_ds[1] = '\0'; + } + } + + + return TRUE; +} + +/** + * Update step for this pdp + * @param file + * @param pdp_new new pdp array + * @param pdp_temp temp pdp array + * @param interval time till last update + * @param pre_int pre interval + * @param post_int post intervall + * @param pdp_diff time till last pdp update + */ +static void +rspamd_rrd_update_pdp_step (struct rspamd_rrd_file *file, gdouble *pdp_new, gdouble *pdp_temp, gdouble interval, + gdouble pre_int, gdouble post_int, gulong pdp_diff) +{ + guint i; + rrd_value_t *scratch; + gulong heartbeat; + + + for (i = 0; i < file->stat_head->ds_cnt; i ++) { + scratch = file->pdp_prep[i].scratch; + heartbeat = file->ds_def[i].par[RRD_DS_mrhb_cnt].lv; + if (!isnan (pdp_new[i])) { + if (isnan (scratch[PDP_val].dv)) { + scratch[PDP_val].dv = 0; + } + scratch[PDP_val].dv += pdp_new[i] / interval * pre_int; + pre_int = 0.0; + } + /* Check interval value for heartbeat for this DS */ + if ((interval > heartbeat) || (file->stat_head->pdp_step / 2.0 < scratch[PDP_unkn_sec_cnt].lv)) { + pdp_temp[i] = NAN; + } + else { + pdp_temp[i] = scratch[PDP_val].dv / + ((double) (pdp_diff - scratch[PDP_unkn_sec_cnt].lv) - pre_int); + } + + if (isnan (pdp_new[i])) { + scratch[PDP_unkn_sec_cnt].lv = floor (post_int); + scratch[PDP_val].dv = NAN; + } else { + scratch[PDP_unkn_sec_cnt].lv = 0; + scratch[PDP_val].dv = pdp_new[i] / interval * post_int; + } + } +} + +/** + * Update CDP for this rra + * @param file rrd file + * @param pdp_steps how much pdp steps elapsed from the last update + * @param pdp_offset offset from pdp + * @param rra_steps how much steps must be updated for this rra + * @param rra_index index of desired rra + * @param pdp_temp temporary pdp points + */ +static void +rspamd_rrd_update_cdp (struct rspamd_rrd_file *file, gdouble pdp_steps, gdouble pdp_offset, gulong *rra_steps, gulong rra_index, + gdouble *pdp_temp) +{ + guint i; + struct rrd_rra_def *rra; + rrd_value_t *scratch; + enum rrd_cf_type cf; + gdouble last_cdp, cur_cdp; + gulong pdp_in_cdp; + + rra = &file->rra_def[rra_index]; + cf = rrd_cf_from_string (rra->cf_nam); + + /* Iterate over all DS for this RRA */ + for (i = 0; i < file->stat_head->ds_cnt; i ++) { + /* Get CDP for this RRA and DS */ + scratch = file->cdp_prep[rra_index * file->stat_head->ds_cnt + i].scratch; + if (rra->pdp_cnt > 1) { + /* Do we have any CDP to update for this rra ? */ + if (rra_steps[rra_index] > 0) { + if (isnan (pdp_temp[i])) { + /* New pdp is nan */ + /* Increment unknown points count */ + scratch[CDP_unkn_pdp_cnt].lv += pdp_offset; + /* Reset secondary value */ + scratch[CDP_secondary_val].dv = NAN; + } + else { + scratch[CDP_secondary_val].dv = pdp_temp[i]; + } + + /* Check XFF for this rra */ + if (scratch[CDP_unkn_pdp_cnt].lv > rra->pdp_cnt * rra->par[RRA_cdp_xff_val].lv) { + /* XFF is reached */ + scratch[CDP_primary_val].dv = NAN; + } + else { + /* Need to initialize CDP using specified consolidation */ + switch (cf) { + case RRD_CF_AVERAGE: + last_cdp = isnan (scratch[CDP_val].dv) ? 0.0 : scratch[CDP_val].dv; + cur_cdp = isnan (pdp_temp[i]) ? 0.0 : pdp_temp[i]; + scratch[CDP_primary_val].dv = (last_cdp + cur_cdp * pdp_offset) / (rra->pdp_cnt - scratch[CDP_unkn_pdp_cnt].lv); + break; + case RRD_CF_MAXIMUM: + last_cdp = isnan (scratch[CDP_val].dv) ? -INFINITY : scratch[CDP_val].dv; + cur_cdp = isnan (pdp_temp[i]) ? -INFINITY : pdp_temp[i]; + scratch[CDP_primary_val].dv = MAX (last_cdp, cur_cdp); + break; + case RRD_CF_MINIMUM: + last_cdp = isnan (scratch[CDP_val].dv) ? INFINITY : scratch[CDP_val].dv; + cur_cdp = isnan (pdp_temp[i]) ? INFINITY : pdp_temp[i]; + scratch[CDP_primary_val].dv = MIN (last_cdp, cur_cdp); + break; + case RRD_CF_LAST: + default: + scratch[CDP_primary_val].dv = pdp_temp[i]; + break; + } + } + /* Init carry of this CDP */ + pdp_in_cdp = (pdp_steps - pdp_offset) / rra->pdp_cnt; + if (pdp_in_cdp == 0 || isnan (pdp_temp[i])) { + /* Set overflow */ + switch (cf) { + case RRD_CF_AVERAGE: + scratch[CDP_val].dv = 0; + break; + case RRD_CF_MAXIMUM: + scratch[CDP_val].dv = -INFINITY; + break; + case RRD_CF_MINIMUM: + scratch[CDP_val].dv = INFINITY; + break; + default: + scratch[CDP_val].dv = NAN; + break; + } + } + else { + /* Special carry for average */ + if (cf == RRD_CF_AVERAGE) { + scratch[CDP_val].dv = pdp_temp[i] * pdp_in_cdp; + } + else { + scratch[CDP_val].dv = pdp_temp[i]; + } + } + } + /* In this case we just need to update cdp_prep for this RRA */ + else { + if (isnan (pdp_temp[i])) { + /* Just increase undefined zone */ + scratch[CDP_unkn_pdp_cnt].lv += pdp_steps; + } + else { + /* Calculate cdp value */ + last_cdp = scratch[CDP_val].dv; + switch (cf) { + case RRD_CF_AVERAGE: + if (isnan (last_cdp)) { + scratch[CDP_val].dv = pdp_temp[i] * pdp_steps; + } + else { + scratch[CDP_val].dv = last_cdp + pdp_temp[i] * pdp_steps; + } + break; + case RRD_CF_MAXIMUM: + scratch[CDP_val].dv = MAX (last_cdp, pdp_temp[i]); + break; + case RRD_CF_MINIMUM: + scratch[CDP_val].dv = MIN (last_cdp, pdp_temp[i]); + break; + case RRD_CF_LAST: + scratch[CDP_val].dv = pdp_temp[i]; + break; + default: + scratch[CDP_val].dv = NAN; + break; + } + } + } + } + else { + /* We have nothing to consolidate, but we may miss some pdp */ + if (pdp_steps > 2) { + /* Just write PDP value */ + scratch[CDP_primary_val].dv = pdp_temp[i]; + scratch[CDP_secondary_val].dv = pdp_temp[i]; + } + } + } +} + +/** + * Update RRA in a file + * @param file rrd file + * @param rra_steps steps for each rra + * @param now current time + */ +void +rspamd_rrd_write_rra (struct rspamd_rrd_file *file, gulong *rra_steps) +{ + guint i, j, scratch_idx, cdp_idx, k; + struct rrd_rra_def *rra; + gdouble *rra_row; + + /* Iterate over all RRA */ + for (i = 0; i < file->stat_head->rra_cnt; i ++) { + rra = &file->rra_def[i]; + /* How much steps need to be updated */ + for (j = 0, scratch_idx = CDP_primary_val; j < rra_steps[i]; j ++, scratch_idx = CDP_secondary_val) { + /* Move row ptr */ + if (++file->rra_ptr[i].cur_row >= rra->row_cnt) { + file->rra_ptr[i].cur_row = 0; + } + /* Calculate seek */ + rra_row = file->rrd_value + (file->stat_head->ds_cnt * i + file->rra_ptr[i].cur_row); + /* Iterate over DS */ + for (k = 0; k < file->stat_head->ds_cnt; k ++) { + cdp_idx = i * file->stat_head->ds_cnt + k; + memcpy (rra_row, &file->cdp_prep[cdp_idx].scratch[scratch_idx].dv, sizeof (gdouble)); + rra_row ++; + } + } + } +} + +/** + * Add record to rrd file + * @param file rrd file object + * @param points points (must be row suitable for this RRA, depending on ds count) + * @param err error pointer + * @return TRUE if a row has been added + */ +gboolean +rspamd_rrd_add_record (struct rspamd_rrd_file* file, GArray *points, GError **err) +{ + gdouble interval, *pdp_new, *pdp_temp, pre_int, post_int; + guint i; + gulong pdp_steps, cur_pdp_count, prev_pdp_step, cur_pdp_step, + prev_pdp_age, cur_pdp_age, *rra_steps, pdp_offset; + struct timeval tv; + + if (file == NULL || file->stat_head->ds_cnt * sizeof (gdouble) != points->len) { + g_set_error (err, rrd_error_quark (), EINVAL, "rrd add points failed: wrong arguments"); + return FALSE; + } + + /* Get interval */ + gettimeofday (&tv, NULL); + interval = (gdouble)(tv.tv_sec - file->live_head->last_up) + + (gdouble)(tv.tv_usec - file->live_head->last_up_usec) / 1e6f; + + /* Update PDP preparation values */ + pdp_new = g_malloc (sizeof (gdouble) * file->stat_head->ds_cnt); + pdp_temp = g_malloc (sizeof (gdouble) * file->stat_head->ds_cnt); + /* How much steps need to be updated in each RRA */ + rra_steps = g_malloc0 (sizeof (gulong) * file->stat_head->rra_cnt); + + if (!rspamd_rrd_update_pdp_prep (file, (gdouble *)points->data, pdp_new, interval)) { + g_set_error (err, rrd_error_quark (), EINVAL, "rrd update pdp failed: wrong arguments"); + g_free (pdp_new); + g_free (pdp_temp); + g_free (rra_steps); + return FALSE; + } + + /* Calculate elapsed steps */ + /* Age in seconds for previous pdp store */ + prev_pdp_age = file->live_head->last_up % file->stat_head->pdp_step; + /* Time in seconds for last pdp update */ + prev_pdp_step = file->live_head->last_up - prev_pdp_age; + /* Age in seconds from current time to required pdp time */ + cur_pdp_age = tv.tv_sec % file->stat_head->pdp_step; + /* Time of desired pdp step */ + cur_pdp_step = tv.tv_sec - cur_pdp_age; + + if (cur_pdp_step > prev_pdp_step) { + pre_int = (gdouble)(cur_pdp_step - file->live_head->last_up) - ((double)file->live_head->last_up_usec) / 1e6f; + post_int = (gdouble)cur_pdp_age + ((double)tv.tv_usec) / 1e6f; + } + else { + pre_int = interval; + post_int = 0; + } + cur_pdp_count = cur_pdp_step / file->stat_head->pdp_step; + pdp_steps = (cur_pdp_step - prev_pdp_step) / file->stat_head->pdp_step; + + + if (pdp_steps == 0) { + /* Simple update of pdp prep */ + for (i = 0; i < file->stat_head->ds_cnt; i ++) { + if (isnan (pdp_new[i])) { + /* Increment unknown period */ + file->pdp_prep[i].scratch[PDP_unkn_sec_cnt].lv += floor (interval); + } + else { + if (isnan (file->pdp_prep[i].scratch[PDP_val].dv)) { + /* Reset pdp to the current value */ + file->pdp_prep[i].scratch[PDP_val].dv = pdp_new[i]; + } + else { + /* Increment pdp value */ + file->pdp_prep[i].scratch[PDP_val].dv += pdp_new[i]; + } + } + } + } + else { + /* Complex update of PDP, CDP and RRA */ + + /* Update PDP for this step */ + rspamd_rrd_update_pdp_step (file, pdp_new, pdp_temp, interval, pre_int, post_int, pdp_steps * file->stat_head->pdp_step); + + + /* Update CDP points for each RRA*/ + for (i = 0; i < file->stat_head->rra_cnt; i ++) { + /* Calculate pdp offset for this RRA */ + pdp_offset = file->rra_def[i].pdp_cnt - cur_pdp_count % file->rra_def[i].pdp_cnt; + /* How much steps we got for this RRA */ + if (pdp_offset <= pdp_steps) { + rra_steps[i] = (pdp_steps - pdp_offset) / file->rra_def[i].pdp_cnt + 1; + } + else { + /* This rra have not passed enough pdp steps */ + rra_steps[i] = 0; + } + /* Update this specific CDP */ + rspamd_rrd_update_cdp (file, pdp_steps, pdp_offset, rra_steps, i, pdp_temp); + /* Write RRA */ + rspamd_rrd_write_rra (file, rra_steps); + } + } + file->live_head->last_up = tv.tv_sec; + file->live_head->last_up_usec = tv.tv_usec; + + /* Sync and invalidate */ + msync (file->map, file->size, MS_ASYNC | MS_INVALIDATE); + + g_free (pdp_new); + g_free (pdp_temp); + g_free (rra_steps); + + return TRUE; +} + +/** + * Close rrd file + * @param file + * @return + */ +gint +rspamd_rrd_close (struct rspamd_rrd_file* file) +{ + if (file == NULL) { + errno = EINVAL; + return -1; + } + + munmap (file->map, file->size); + if (file->filename != NULL) { + g_free (file->filename); + } + g_slice_free1 (sizeof (struct rspamd_rrd_file), file); + + return 0; +} diff --git a/src/libutil/rrd.h b/src/libutil/rrd.h new file mode 100644 index 000000000..ff6902894 --- /dev/null +++ b/src/libutil/rrd.h @@ -0,0 +1,374 @@ +/* Copyright (c) 2010-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +#ifndef RRD_H_ +#define RRD_H_ + +#include "config.h" + +/** + * This file contains basic structure and functions to operate with round-robin databases + */ + +#define RRD_COOKIE "RRD" +#define RRD_VERSION "0003" +#define RRD_FLOAT_COOKIE ((double)8.642135E130) + +typedef union { + unsigned long lv; + double dv; +} rrd_value_t; + +struct rrd_file_head { + /* Data Base Identification Section ** */ + gchar cookie[4]; /* RRD */ + gchar version[5]; /* version of the format */ + gdouble float_cookie; /* is it the correct double representation ? */ + + /* Data Base Structure Definition **** */ + gulong ds_cnt; /* how many different ds provid input to the rrd */ + gulong rra_cnt; /* how many rras will be maintained in the rrd */ + gulong pdp_step; /* pdp interval in seconds */ + + rrd_value_t par[10]; /* global parameters ... unused + at the moment */ +}; + +enum rrd_dst_type { + RRD_DST_COUNTER = 0, /* data source types available */ + RRD_DST_ABSOLUTE, + RRD_DST_GAUGE, + RRD_DST_DERIVE, + RRD_DST_CDEF +}; +enum rrd_ds_param { + RRD_DS_mrhb_cnt = 0, /* minimum required heartbeat */ + RRD_DS_min_val, /* the processed input of a ds must */ + RRD_DS_max_val, /* be between max_val and min_val + * both can be set to UNKNOWN if you + * do not care. Data outside the limits + * set to UNKNOWN */ + RRD_DS_cdef = RRD_DS_mrhb_cnt +}; /* pointer to encoded rpn expression only applies to DST_CDEF */ + + +/* The magic number here is one less than DS_NAM_SIZE */ +#define RRD_DS_NAM_SIZE 20 + +#define RRD_DST_SIZE 20 + +struct rrd_ds_def { + gchar ds_nam[RRD_DS_NAM_SIZE]; /* Name of the data source (null terminated) */ + gchar dst[RRD_DST_SIZE]; /* Type of data source (null terminated) */ + rrd_value_t par[10]; /* index of this array see ds_param_en */ +}; + +/* RRA definition */ + +enum rrd_cf_type { + RRD_CF_AVERAGE = 0, /* data consolidation functions */ + RRD_CF_MINIMUM, + RRD_CF_MAXIMUM, + RRD_CF_LAST, + RRD_CF_HWPREDICT, + /* An array of predictions using the seasonal + * Holt-Winters algorithm. Requires an RRA of type + * CF_SEASONAL for this data source. */ + RRD_CF_SEASONAL, + /* An array of seasonal effects. Requires an RRA of + * type CF_HWPREDICT for this data source. */ + RRD_CF_DEVPREDICT, + /* An array of deviation predictions based upon + * smoothed seasonal deviations. Requires an RRA of + * type CF_DEVSEASONAL for this data source. */ + RRD_CF_DEVSEASONAL, + /* An array of smoothed seasonal deviations. Requires + * an RRA of type CF_HWPREDICT for this data source. + * */ + RRD_CF_FAILURES, + /* HWPREDICT that follows a moving baseline */ + RRD_CF_MHWPREDICT + /* new entries must come last !!! */ +}; + + +#define MAX_RRA_PAR_EN 10 + +enum rrd_rra_param { + RRA_cdp_xff_val = 0, /* what part of the consolidated + * datapoint must be known, to produce a + * valid entry in the rra */ + /* CF_HWPREDICT: */ + RRA_hw_alpha = 1, + /* exponential smoothing parameter for the intercept in + * the Holt-Winters prediction algorithm. */ + RRA_hw_beta = 2, + /* exponential smoothing parameter for the slope in + * the Holt-Winters prediction algorithm. */ + + RRA_dependent_rra_idx = 3, + /* For CF_HWPREDICT: index of the RRA with the seasonal + * effects of the Holt-Winters algorithm (of type + * CF_SEASONAL). + * For CF_DEVPREDICT: index of the RRA with the seasonal + * deviation predictions (of type CF_DEVSEASONAL). + * For CF_SEASONAL: index of the RRA with the Holt-Winters + * intercept and slope coefficient (of type CF_HWPREDICT). + * For CF_DEVSEASONAL: index of the RRA with the + * Holt-Winters prediction (of type CF_HWPREDICT). + * For CF_FAILURES: index of the CF_DEVSEASONAL array. + * */ + + /* CF_SEASONAL and CF_DEVSEASONAL: */ + RRA_seasonal_gamma = 1, + /* exponential smoothing parameter for seasonal effects. */ + + RRA_seasonal_smoothing_window = 2, + /* fraction of the season to include in the running average + * smoother */ + + /* RRA_dependent_rra_idx = 3, */ + + RRA_seasonal_smooth_idx = 4, + /* an integer between 0 and row_count - 1 which + * is index in the seasonal cycle for applying + * the period smoother. */ + + /* CF_FAILURES: */ + RRA_delta_pos = 1, /* confidence bound scaling parameters */ + RRA_delta_neg = 2, + /* RRA_dependent_rra_idx = 3, */ + RRA_window_len = 4, + RRA_failure_threshold = 5 + /* For CF_FAILURES, number of violations within the last + * window required to mark a failure. */ +}; + + +#define RRD_CF_NAM_SIZE 20 + +struct rrd_rra_def { + gchar cf_nam[RRD_CF_NAM_SIZE]; /* consolidation function (null term) */ + gulong row_cnt; /* number of entries in the store */ + gulong pdp_cnt; /* how many primary data points are + * required for a consolidated data point?*/ + rrd_value_t par[MAX_RRA_PAR_EN]; /* index see rra_param_en */ + +}; + +struct rrd_live_head { + time_t last_up; /* when was rrd last updated */ + glong last_up_usec; /* micro seconds part of the update timestamp. Always >= 0 */ +}; + +#define RRD_LAST_DS_LEN 30 + +enum rrd_pdp_param { + PDP_unkn_sec_cnt = 0, /* how many seconds of the current + * pdp value is unknown data? */ + PDP_val +}; /* current value of the pdp. + this depends on dst */ + +struct rrd_pdp_prep { + gchar last_ds[RRD_LAST_DS_LEN]; /* the last reading from the data + * source. this is stored in ASCII + * to cater for very large counters + * we might encounter in connection + * with SNMP. */ + rrd_value_t scratch[10]; /* contents according to pdp_par_en */ +}; + +#define RRD_MAX_CDP_PAR_EN 10 +#define RRD_MAX_CDP_FAILURES_IDX 8 +/* max CDP scratch entries avail to record violations for a FAILURES RRA */ +#define RRD_MAX_FAILURES_WINDOW_LEN 28 + +enum rrd_cdp_param { + CDP_val = 0, + /* the base_interval is always an + * average */ + CDP_unkn_pdp_cnt, + /* how many unknown pdp were + * integrated. This and the cdp_xff + * will decide if this is going to + * be a UNKNOWN or a valid value */ + CDP_hw_intercept, + /* Current intercept coefficient for the Holt-Winters + * prediction algorithm. */ + CDP_hw_last_intercept, + /* Last iteration intercept coefficient for the Holt-Winters + * prediction algorihtm. */ + CDP_hw_slope, + /* Current slope coefficient for the Holt-Winters + * prediction algorithm. */ + CDP_hw_last_slope, + /* Last iteration slope coeffient. */ + CDP_null_count, + /* Number of sequential Unknown (DNAN) values + 1 preceding + * the current prediction. + * */ + CDP_last_null_count, + /* Last iteration count of Unknown (DNAN) values. */ + CDP_primary_val = 8, + /* optimization for bulk updates: the value of the first CDP + * value to be written in the bulk update. */ + CDP_secondary_val = 9, + /* optimization for bulk updates: the value of subsequent + * CDP values to be written in the bulk update. */ + CDP_hw_seasonal = CDP_hw_intercept, + /* Current seasonal coefficient for the Holt-Winters + * prediction algorithm. This is stored in CDP prep to avoid + * redundant seek operations. */ + CDP_hw_last_seasonal = CDP_hw_last_intercept, + /* Last iteration seasonal coefficient. */ + CDP_seasonal_deviation = CDP_hw_intercept, + CDP_last_seasonal_deviation = CDP_hw_last_intercept, + CDP_init_seasonal = CDP_null_count +}; + +struct rrd_cdp_prep { + rrd_value_t scratch[RRD_MAX_CDP_PAR_EN]; + /* contents according to cdp_par_en * + * init state should be NAN */ +}; + +struct rrd_rra_ptr { + gulong cur_row; /* current row in the rra */ +}; + +/* Final rrd file structure */ +struct rspamd_rrd_file { + struct rrd_file_head *stat_head; /* the static header */ + struct rrd_ds_def *ds_def; /* list of data source definitions */ + struct rrd_rra_def *rra_def; /* list of round robin archive def */ + struct rrd_live_head *live_head; /* rrd v >= 3 last_up with us */ + struct rrd_pdp_prep *pdp_prep; /* pdp data prep area */ + struct rrd_cdp_prep *cdp_prep; /* cdp prep area */ + struct rrd_rra_ptr *rra_ptr; /* list of rra pointers */ + gdouble *rrd_value; /* list of rrd values */ + + gchar *filename; + guint8* map; /* mmapped area */ + gsize size; /* its size */ + gboolean finalized; +}; + + +/* Public API */ + +/** + * Open (and mmap) existing RRD file + * @param filename path + * @param err error pointer + * @return rrd file structure + */ +struct rspamd_rrd_file* rspamd_rrd_open (const gchar *filename, GError **err); + +/** + * Create basic header for rrd file + * @param filename file path + * @param ds_count number of data sources + * @param rra_count number of round robin archives + * @param pdp_step step of primary data points + * @param err error pointer + * @return TRUE if file has been created + */ +struct rspamd_rrd_file* rspamd_rrd_create (const gchar *filename, gulong ds_count, gulong rra_count, gulong pdp_step, GError **err); + +/** + * Add data sources to rrd file + * @param filename path to file + * @param ds array of struct rrd_ds_def + * @param err error pointer + * @return TRUE if data sources were added + */ +gboolean rspamd_rrd_add_ds (struct rspamd_rrd_file* file, GArray *ds, GError **err); + +/** + * Add round robin archives to rrd file + * @param filename path to file + * @param ds array of struct rrd_rra_def + * @param err error pointer + * @return TRUE if archives were added + */ +gboolean rspamd_rrd_add_rra (struct rspamd_rrd_file *file, GArray *rra, GError **err); + +/** + * Finalize rrd file header and initialize all RRA in the file + * @param filename file path + * @param err error pointer + * @return TRUE if rrd file is ready for use + */ +gboolean rspamd_rrd_finalize (struct rspamd_rrd_file *file, GError **err); + +/** + * Add record to rrd file + * @param file rrd file object + * @param points points (must be row suitable for this RRA, depending on ds count) + * @param err error pointer + * @return TRUE if a row has been added + */ +gboolean rspamd_rrd_add_record (struct rspamd_rrd_file* file, GArray *points, GError **err); + +/** + * Close rrd file + * @param file + * @return + */ +gint rspamd_rrd_close (struct rspamd_rrd_file* file); + +/* + * Conversion functions + */ + +/** + * Convert rrd dst type from string to numeric value + */ +enum rrd_dst_type rrd_dst_from_string (const gchar *str); +/** + * Convert numeric presentation of dst to string + */ +const gchar* rrd_dst_to_string (enum rrd_dst_type type); +/** + * Convert rrd consolidation function type from string to numeric value + */ +enum rrd_cf_type rrd_cf_from_string (const gchar *str); +/** + * Convert numeric presentation of cf to string + */ +const gchar* rrd_cf_to_string (enum rrd_cf_type type); + +/* Default RRA and DS */ + +/** + * Create default RRA + */ +void rrd_make_default_rra (const gchar *cf_name, gulong pdp_cnt, gulong rows, struct rrd_rra_def *rra); + +/** + * Create default DS + */ +void rrd_make_default_ds (const gchar *name, gulong pdp_step, struct rrd_ds_def *ds); +#endif /* RRD_H_ */ diff --git a/src/libutil/trie.c b/src/libutil/trie.c new file mode 100644 index 000000000..394c4e939 --- /dev/null +++ b/src/libutil/trie.c @@ -0,0 +1,230 @@ +/* Copyright (c) 2010, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "mem_pool.h" +#include "trie.h" + +rspamd_trie_t* +rspamd_trie_create (gboolean icase) +{ + rspamd_trie_t *new; + + new = g_malloc (sizeof (rspamd_trie_t)); + + new->icase = icase; + new->pool = rspamd_mempool_new (rspamd_mempool_suggest_size ()); + new->root.fail = NULL; + new->root.final = 0; + new->root.id = 0; + new->root.next = NULL; + new->root.match = NULL; + new->fail_states = g_ptr_array_sized_new (8); + + return new; +} + +/* + * Insert a single character as the specified level of the suffix tree + */ +static struct rspamd_trie_state * +rspamd_trie_insert_char (rspamd_trie_t *trie, guint depth, struct rspamd_trie_state *pos, gchar c) +{ + struct rspamd_trie_match *new_match; + struct rspamd_trie_state *new_pos; + + /* New match is inserted before pos */ + new_match = rspamd_mempool_alloc (trie->pool, sizeof (struct rspamd_trie_match)); + new_match->next = pos->match; + new_match->c = c; + + /* Now set match link */ + pos->match = new_match; + + new_match->state = rspamd_mempool_alloc (trie->pool, sizeof (struct rspamd_trie_state)); + new_pos = new_match->state; + new_pos->match = NULL; + new_pos->fail = &trie->root; + new_pos->final = 0; + new_pos->id = -1; + + if (trie->fail_states->len < depth + 1) { + /* Grow fail states array if depth is more than its size */ + guint size = trie->fail_states->len; + + size = MAX (size * 2, depth + 1); + g_ptr_array_set_size (trie->fail_states, size); + } + + new_pos->next = trie->fail_states->pdata[depth]; + trie->fail_states->pdata[depth] = new_pos; + + return new_pos; +} + +/* Traverse the specified node to find corresponding match */ +static inline struct rspamd_trie_match * +check_match (struct rspamd_trie_state *s, gchar c) +{ + struct rspamd_trie_match *match = s->match; + + while (match && match->c != c) { + match = match->next; + } + + return match; +} + +void +rspamd_trie_insert (rspamd_trie_t *trie, const gchar *pattern, gint pattern_id) +{ + const guchar *p = pattern; + struct rspamd_trie_state *q, *q1, *r, *cur_node; + struct rspamd_trie_match *m, *n; + guint i, depth = 0; + gchar c; + + /* Insert pattern to the trie */ + + cur_node = &trie->root; + + while (*p) { + c = trie->icase ? g_ascii_tolower (*p) : *p; + m = check_match (cur_node, c); + if (m == NULL) { + /* Insert a character at specified level depth */ + cur_node = rspamd_trie_insert_char (trie, depth, cur_node, c); + } + else { + cur_node = m->state; + } + p ++; + depth ++; + } + + cur_node->final = depth; + cur_node->id = pattern_id; + + /* Update fail states and build fail states graph */ + /* Go through the whole depth of prefixes */ + for (i = 0; i < trie->fail_states->len; i++) { + q = trie->fail_states->pdata[i]; + while (q) { + m = q->match; + while (m) { + c = m->c; + q1 = m->state; + r = q->fail; + /* Move q->fail to last known fail location for this character (or to NULL) */ + while (r && (n = check_match (r, c)) == NULL) { + r = r->fail; + } + + /* We have found new fail location for character c, so set it in q1 */ + if (r != NULL) { + q1->fail = n->state; + if (q1->fail->final > q1->final) { + q1->final = q1->fail->final; + } + } + else { + /* Search from root */ + if ((n = check_match (&trie->root, c))) { + q1->fail = n->state; + } + else { + q1->fail = &trie->root; + } + } + + m = m->next; + } + + q = q->next; + } + } +} + +const gchar* +rspamd_trie_lookup (rspamd_trie_t *trie, const gchar *buffer, gsize buflen, gint *matched_id) +{ + const guchar *p = buffer, *prev, *ret; + struct rspamd_trie_state *cur_node; + struct rspamd_trie_match *m = NULL; + gchar c; + + + cur_node = &trie->root; + prev = p; + ret = p; + + while (buflen) { + c = trie->icase ? g_ascii_tolower (*p) : *p; + + /* Match pattern or use fail-path to restore state */ + while (cur_node != NULL && (m = check_match (cur_node, c)) == NULL) { + cur_node = cur_node->fail; + } + + /* Shift left in the text */ + if (cur_node == &trie->root) { + /* 1 character pattern found */ + ret = prev; + } + else if (cur_node == NULL) { + /* We have tried the pattern but eventually it was not found */ + cur_node = &trie->root; + ret = p; + p ++; + prev = p; + buflen --; + continue; + } + + if (m != NULL) { + /* Match found */ + cur_node = m->state; + + if (cur_node->final) { + /* The complete pattern found */ + if (matched_id != NULL) { + *matched_id = cur_node->id; + } + return (const gchar *) ret; + } + } + p ++; + prev = p; + buflen --; + } + + return NULL; +} + +void +rspamd_trie_free (rspamd_trie_t *trie) +{ + g_ptr_array_free (trie->fail_states, TRUE); + rspamd_mempool_delete (trie->pool); + g_free (trie); +} diff --git a/src/libutil/trie.h b/src/libutil/trie.h new file mode 100644 index 000000000..2792ee4a5 --- /dev/null +++ b/src/libutil/trie.h @@ -0,0 +1,86 @@ +/* Copyright (c) 2010, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY Rambler media ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Rambler BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +#ifndef TRIE_H_ +#define TRIE_H_ + +#include "config.h" +#include "mem_pool.h" + +/* + * Rspamd implements basic bitwise prefixed trie structure + */ + +struct rspamd_trie_match; + +struct rspamd_trie_state { + struct rspamd_trie_state *next; + struct rspamd_trie_state *fail; + struct rspamd_trie_match *match; + guint final; + gint id; +}; + +struct rspamd_trie_match { + struct rspamd_trie_match *next; + struct rspamd_trie_state *state; + gchar c; +}; + +typedef struct rspamd_trie_s { + struct rspamd_trie_state root; + GPtrArray *fail_states; + gboolean icase; + rspamd_mempool_t *pool; +} rspamd_trie_t; + +/* + * Create a new suffix trie + */ +rspamd_trie_t* rspamd_trie_create (gboolean icase); + +/* + * Insert a pattern into the trie + * @param trie suffix trie + * @param pattern text of element + * @param pattern_id id of element + */ +void rspamd_trie_insert (rspamd_trie_t *trie, const gchar *pattern, gint pattern_id); + +/* + * Search for a text using suffix trie + * @param trie suffix trie + * @param buffer a text where to search for trie patterns + * @param buflen a length of text + * @param mached_id on a successfull search here would be stored id of pattern found + * @return Position in a text where pattern was found or NULL if no patterns were found + */ +const gchar* rspamd_trie_lookup (rspamd_trie_t *trie, const gchar *buffer, gsize buflen, gint *matched_id); + +/* + * Deallocate suffix trie + */ +void rspamd_trie_free (rspamd_trie_t *trie); + +#endif /* TRIE_H_ */ diff --git a/src/libutil/upstream.c b/src/libutil/upstream.c new file mode 100644 index 000000000..f82d3ba50 --- /dev/null +++ b/src/libutil/upstream.c @@ -0,0 +1,525 @@ +/* + * Copyright (c) 2009-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "upstream.h" + + +#ifdef _THREAD_SAFE +pthread_rwlock_t upstream_mtx = PTHREAD_RWLOCK_INITIALIZER; +# define U_RLOCK() do { pthread_rwlock_rdlock (&upstream_mtx); } while (0) +# define U_WLOCK() do { pthread_rwlock_wrlock (&upstream_mtx); } while (0) +# define U_UNLOCK() do { pthread_rwlock_unlock (&upstream_mtx); } while (0) +#else +# define U_RLOCK() do {} while (0) +# define U_WLOCK() do {} while (0) +# define U_UNLOCK() do {} while (0) +#endif + +#define MAX_TRIES 20 +#define HASH_COMPAT + +/* + * Poly: 0xedb88320 + * Init: 0x0 + */ + +static const guint32 crc32lookup[256] = { + 0x00000000U, 0x77073096U, 0xee0e612cU, 0x990951baU, 0x076dc419U, 0x706af48fU, + 0xe963a535U, 0x9e6495a3U, 0x0edb8832U, 0x79dcb8a4U, 0xe0d5e91eU, 0x97d2d988U, + 0x09b64c2bU, 0x7eb17cbdU, 0xe7b82d07U, 0x90bf1d91U, 0x1db71064U, 0x6ab020f2U, + 0xf3b97148U, 0x84be41deU, 0x1adad47dU, 0x6ddde4ebU, 0xf4d4b551U, 0x83d385c7U, + 0x136c9856U, 0x646ba8c0U, 0xfd62f97aU, 0x8a65c9ecU, 0x14015c4fU, 0x63066cd9U, + 0xfa0f3d63U, 0x8d080df5U, 0x3b6e20c8U, 0x4c69105eU, 0xd56041e4U, 0xa2677172U, + 0x3c03e4d1U, 0x4b04d447U, 0xd20d85fdU, 0xa50ab56bU, 0x35b5a8faU, 0x42b2986cU, + 0xdbbbc9d6U, 0xacbcf940U, 0x32d86ce3U, 0x45df5c75U, 0xdcd60dcfU, 0xabd13d59U, + 0x26d930acU, 0x51de003aU, 0xc8d75180U, 0xbfd06116U, 0x21b4f4b5U, 0x56b3c423U, + 0xcfba9599U, 0xb8bda50fU, 0x2802b89eU, 0x5f058808U, 0xc60cd9b2U, 0xb10be924U, + 0x2f6f7c87U, 0x58684c11U, 0xc1611dabU, 0xb6662d3dU, 0x76dc4190U, 0x01db7106U, + 0x98d220bcU, 0xefd5102aU, 0x71b18589U, 0x06b6b51fU, 0x9fbfe4a5U, 0xe8b8d433U, + 0x7807c9a2U, 0x0f00f934U, 0x9609a88eU, 0xe10e9818U, 0x7f6a0dbbU, 0x086d3d2dU, + 0x91646c97U, 0xe6635c01U, 0x6b6b51f4U, 0x1c6c6162U, 0x856530d8U, 0xf262004eU, + 0x6c0695edU, 0x1b01a57bU, 0x8208f4c1U, 0xf50fc457U, 0x65b0d9c6U, 0x12b7e950U, + 0x8bbeb8eaU, 0xfcb9887cU, 0x62dd1ddfU, 0x15da2d49U, 0x8cd37cf3U, 0xfbd44c65U, + 0x4db26158U, 0x3ab551ceU, 0xa3bc0074U, 0xd4bb30e2U, 0x4adfa541U, 0x3dd895d7U, + 0xa4d1c46dU, 0xd3d6f4fbU, 0x4369e96aU, 0x346ed9fcU, 0xad678846U, 0xda60b8d0U, + 0x44042d73U, 0x33031de5U, 0xaa0a4c5fU, 0xdd0d7cc9U, 0x5005713cU, 0x270241aaU, + 0xbe0b1010U, 0xc90c2086U, 0x5768b525U, 0x206f85b3U, 0xb966d409U, 0xce61e49fU, + 0x5edef90eU, 0x29d9c998U, 0xb0d09822U, 0xc7d7a8b4U, 0x59b33d17U, 0x2eb40d81U, + 0xb7bd5c3bU, 0xc0ba6cadU, 0xedb88320U, 0x9abfb3b6U, 0x03b6e20cU, 0x74b1d29aU, + 0xead54739U, 0x9dd277afU, 0x04db2615U, 0x73dc1683U, 0xe3630b12U, 0x94643b84U, + 0x0d6d6a3eU, 0x7a6a5aa8U, 0xe40ecf0bU, 0x9309ff9dU, 0x0a00ae27U, 0x7d079eb1U, + 0xf00f9344U, 0x8708a3d2U, 0x1e01f268U, 0x6906c2feU, 0xf762575dU, 0x806567cbU, + 0x196c3671U, 0x6e6b06e7U, 0xfed41b76U, 0x89d32be0U, 0x10da7a5aU, 0x67dd4accU, + 0xf9b9df6fU, 0x8ebeeff9U, 0x17b7be43U, 0x60b08ed5U, 0xd6d6a3e8U, 0xa1d1937eU, + 0x38d8c2c4U, 0x4fdff252U, 0xd1bb67f1U, 0xa6bc5767U, 0x3fb506ddU, 0x48b2364bU, + 0xd80d2bdaU, 0xaf0a1b4cU, 0x36034af6U, 0x41047a60U, 0xdf60efc3U, 0xa867df55U, + 0x316e8eefU, 0x4669be79U, 0xcb61b38cU, 0xbc66831aU, 0x256fd2a0U, 0x5268e236U, + 0xcc0c7795U, 0xbb0b4703U, 0x220216b9U, 0x5505262fU, 0xc5ba3bbeU, 0xb2bd0b28U, + 0x2bb45a92U, 0x5cb36a04U, 0xc2d7ffa7U, 0xb5d0cf31U, 0x2cd99e8bU, 0x5bdeae1dU, + 0x9b64c2b0U, 0xec63f226U, 0x756aa39cU, 0x026d930aU, 0x9c0906a9U, 0xeb0e363fU, + 0x72076785U, 0x05005713U, 0x95bf4a82U, 0xe2b87a14U, 0x7bb12baeU, 0x0cb61b38U, + 0x92d28e9bU, 0xe5d5be0dU, 0x7cdcefb7U, 0x0bdbdf21U, 0x86d3d2d4U, 0xf1d4e242U, + 0x68ddb3f8U, 0x1fda836eU, 0x81be16cdU, 0xf6b9265bU, 0x6fb077e1U, 0x18b74777U, + 0x88085ae6U, 0xff0f6a70U, 0x66063bcaU, 0x11010b5cU, 0x8f659effU, 0xf862ae69U, + 0x616bffd3U, 0x166ccf45U, 0xa00ae278U, 0xd70dd2eeU, 0x4e048354U, 0x3903b3c2U, + 0xa7672661U, 0xd06016f7U, 0x4969474dU, 0x3e6e77dbU, 0xaed16a4aU, 0xd9d65adcU, + 0x40df0b66U, 0x37d83bf0U, 0xa9bcae53U, 0xdebb9ec5U, 0x47b2cf7fU, 0x30b5ffe9U, + 0xbdbdf21cU, 0xcabac28aU, 0x53b39330U, 0x24b4a3a6U, 0xbad03605U, 0xcdd70693U, + 0x54de5729U, 0x23d967bfU, 0xb3667a2eU, 0xc4614ab8U, 0x5d681b02U, 0x2a6f2b94U, + 0xb40bbe37U, 0xc30c8ea1U, 0x5a05df1bU, 0x2d02ef8dU +}; + +/* + * Check upstream parameters and mark it whether valid or dead + */ +static void +check_upstream (struct upstream *up, time_t now, time_t error_timeout, time_t revive_timeout, size_t max_errors) +{ + if (up->dead) { + if (now - up->time >= revive_timeout) { + U_WLOCK (); + up->dead = 0; + up->errors = 0; + up->time = 0; + up->weight = up->priority; + U_UNLOCK (); + } + } + else { + if (now - up->time >= error_timeout && up->errors >= max_errors) { + U_WLOCK (); + up->dead = 1; + up->time = now; + up->weight = 0; + U_UNLOCK (); + } + } +} + +/* + * Call this function after failed upstream request + */ +void +upstream_fail (struct upstream *up, time_t now) +{ + if (up->time != 0) { + up->errors++; + } + else { + U_WLOCK (); + up->time = now; + up->errors++; + U_UNLOCK (); + } +} + +/* + * Call this function after successfull upstream request + */ +void +upstream_ok (struct upstream *up, time_t now) +{ + if (up->errors != 0) { + U_WLOCK (); + up->errors = 0; + up->time = 0; + U_UNLOCK (); + } + + up->weight--; +} + +/* + * Mark all upstreams as active. This function is used when all upstreams are marked as inactive + */ +void +revive_all_upstreams (void *ups, size_t members, size_t msize) +{ + guint i; + struct upstream *cur; + guchar *p; + + U_WLOCK (); + p = ups; + for (i = 0; i < members; i++) { + cur = (struct upstream *)p; + cur->time = 0; + cur->errors = 0; + cur->dead = 0; + cur->weight = cur->priority; + p += msize; + } + U_UNLOCK (); +} + +/* + * Scan all upstreams for errors and mark upstreams dead or alive depends on conditions, + * return number of alive upstreams + */ +static gint +rescan_upstreams (void *ups, size_t members, size_t msize, time_t now, time_t error_timeout, time_t revive_timeout, size_t max_errors) +{ + guint i, alive; + struct upstream *cur; + guchar *p; + + /* Recheck all upstreams */ + p = ups; + alive = members; + for (i = 0; i < members; i++) { + cur = (struct upstream *)p; + check_upstream (cur, now, error_timeout, revive_timeout, max_errors); + alive -= cur->dead; + p += msize; + } + + /* All upstreams are dead */ + if (alive == 0) { + revive_all_upstreams (ups, members, msize); + alive = members; + } + + + return alive; + +} + +/* Return alive upstream by its number */ +static struct upstream * +get_upstream_by_number (void *ups, size_t members, size_t msize, gint selected) +{ + guint i; + u_char *p, *c; + struct upstream *cur; + + i = 0; + p = ups; + c = ups; + U_RLOCK (); + for (;;) { + /* Out of range, return NULL */ + if (p > c + members * msize) { + break; + } + + cur = (struct upstream *)p; + p += msize; + + if (cur->dead) { + /* Skip inactive upstreams */ + continue; + } + /* Return selected upstream */ + if ((gint)i == selected) { + U_UNLOCK (); + return cur; + } + i++; + } + U_UNLOCK (); + + /* Error */ + return NULL; + +} + +/* + * Get hash key for specified key (perl hash) + */ +static guint32 +get_hash_for_key (guint32 hash, const gchar *key, size_t keylen) +{ + guint32 h, index; + const gchar *end = key + keylen; + + h = ~hash; + + if (end != key) { + while (key < end) { + index = (h ^ (u_char) * key) & 0x000000ffU; + h = (h >> 8) ^ crc32lookup[index]; + ++key; + } + } + else { + while (*key) { + index = (h ^ (u_char) * key) & 0x000000ffU; + h = (h >> 8) ^ crc32lookup[index]; + ++key; + } + } + + return (~h); +} + +/* + * Recheck all upstreams and return random active upstream + */ +struct upstream * +get_random_upstream (void *ups, size_t members, size_t msize, time_t now, time_t error_timeout, + time_t revive_timeout, size_t max_errors) +{ + gint alive, selected; + + alive = rescan_upstreams (ups, members, msize, now, error_timeout, revive_timeout, max_errors); + selected = rand () % alive; + + return get_upstream_by_number (ups, members, msize, selected); +} + +/* + * Return upstream by hash, that is calculated from active upstreams number + */ +struct upstream * +get_upstream_by_hash (void *ups, size_t members, size_t msize, time_t now, time_t error_timeout, + time_t revive_timeout, size_t max_errors, const gchar *key, size_t keylen) +{ + gint alive, tries = 0, r; + guint32 h = 0, ht; + gchar *p, numbuf[4]; + struct upstream *cur; + + alive = rescan_upstreams (ups, members, msize, now, error_timeout, revive_timeout, max_errors); + + if (alive == 0) { + return NULL; + } + + h = get_hash_for_key (0, key, keylen); +#ifdef HASH_COMPAT + h = (h >> 16) & 0x7fff; +#endif + h %= members; + + for (;;) { + p = (gchar *)ups + msize * h; + cur = (struct upstream *)p; + if (!cur->dead) { + break; + } + r = snprintf (numbuf, sizeof (numbuf), "%d", tries); + ht = get_hash_for_key (0, numbuf, r); + ht = get_hash_for_key (ht, key, keylen); +#ifdef HASH_COMPAT + h += (ht >> 16) & 0x7fff; +#else + h += ht; +#endif + h %= members; + tries++; + if (tries > MAX_TRIES) { + return NULL; + } + } + + U_RLOCK (); + p = ups; + U_UNLOCK (); + return cur; +} + +/* + * Recheck all upstreams and return upstream in round-robin order according to weight and priority + */ +struct upstream * +get_upstream_round_robin (void *ups, size_t members, size_t msize, time_t now, time_t error_timeout, + time_t revive_timeout, size_t max_errors) +{ + guint max_weight, i; + struct upstream *cur, *selected = NULL; + u_char *p; + + /* Recheck all upstreams */ + (void)rescan_upstreams (ups, members, msize, now, error_timeout, revive_timeout, max_errors); + + p = ups; + max_weight = 0; + selected = (struct upstream *)p; + U_RLOCK (); + for (i = 0; i < members; i++) { + cur = (struct upstream *)p; + if (!cur->dead) { + if (max_weight < (guint)cur->weight) { + max_weight = cur->weight; + selected = cur; + } + } + p += msize; + } + U_UNLOCK (); + + if (max_weight == 0) { + p = ups; + U_WLOCK (); + for (i = 0; i < members; i++) { + cur = (struct upstream *)p; + cur->weight = cur->priority; + if (!cur->dead) { + if (max_weight < cur->priority) { + max_weight = cur->priority; + selected = cur; + } + } + p += msize; + } + U_UNLOCK (); + } + + return selected; +} + +/* + * Recheck all upstreams and return upstream in round-robin order according to only priority (master-slaves) + */ +struct upstream * +get_upstream_master_slave (void *ups, size_t members, size_t msize, time_t now, time_t error_timeout, + time_t revive_timeout, size_t max_errors) +{ + guint max_weight, i; + struct upstream *cur, *selected = NULL; + u_char *p; + + /* Recheck all upstreams */ + (void)rescan_upstreams (ups, members, msize, now, error_timeout, revive_timeout, max_errors); + + p = ups; + max_weight = 0; + selected = (struct upstream *)p; + U_RLOCK (); + for (i = 0; i < members; i++) { + cur = (struct upstream *)p; + if (!cur->dead) { + if (max_weight < cur->priority) { + max_weight = cur->priority; + selected = cur; + } + } + p += msize; + } + U_UNLOCK (); + + return selected; +} + +/* + * Ketama manipulation functions + */ + +static gint +ketama_sort_cmp (const void *a1, const void *a2) +{ + return *((guint32 *) a1) - *((guint32 *) a2); +} + +/* + * Add ketama points for specified upstream + */ +gint +upstream_ketama_add (struct upstream *up, gchar *up_key, size_t keylen, size_t keypoints) +{ + guint32 h = 0; + gchar tmp[4]; + guint i; + + /* Allocate ketama points array */ + if (up->ketama_points == NULL) { + up->ketama_points_size = keypoints; + up->ketama_points = malloc (sizeof (guint32) * up->ketama_points_size); + if (up->ketama_points == NULL) { + return -1; + } + } + + h = get_hash_for_key (h, up_key, keylen); + + for (i = 0; i < keypoints; i++) { + tmp[0] = i & 0xff; + tmp[1] = (i >> 8) & 0xff; + tmp[2] = (i >> 16) & 0xff; + tmp[3] = (i >> 24) & 0xff; + + h = get_hash_for_key (h, tmp, sizeof (tmp) * sizeof (gchar)); + up->ketama_points[i] = h; + } + /* Keep points sorted */ + qsort (up->ketama_points, keypoints, sizeof (guint32), ketama_sort_cmp); + + return 0; +} + +/* + * Return upstream by hash and find nearest ketama point in some server + */ +struct upstream * +get_upstream_by_hash_ketama (void *ups, size_t members, size_t msize, + time_t now, time_t error_timeout, time_t revive_timeout, size_t max_errors, const gchar *key, size_t keylen) +{ + guint alive, i; + guint32 h = 0, step, middle, d, min_diff = UINT_MAX; + gchar *p; + struct upstream *cur = NULL, *nearest = NULL; + + alive = rescan_upstreams (ups, members, msize, now, error_timeout, revive_timeout, max_errors); + + if (alive == 0) { + return NULL; + } + + h = get_hash_for_key (h, key, keylen); + + U_RLOCK (); + p = ups; + nearest = (struct upstream *)p; + for (i = 0; i < members; i++) { + cur = (struct upstream *)p; + if (!cur->dead && cur->ketama_points != NULL) { + /* Find nearest ketama point for this key */ + step = cur->ketama_points_size / 2; + middle = step; + while (step != 1) { + d = cur->ketama_points[middle] - h; + if (abs (d) < (gint)min_diff) { + min_diff = abs (d); + nearest = cur; + } + step /= 2; + if (d > 0) { + middle -= step; + } + else { + middle += step; + } + } + } + } + U_UNLOCK (); + return nearest; +} + +#undef U_LOCK +#undef U_UNLOCK +/* + * vi:ts=4 + */ diff --git a/src/libutil/upstream.h b/src/libutil/upstream.h new file mode 100644 index 000000000..da0a00013 --- /dev/null +++ b/src/libutil/upstream.h @@ -0,0 +1,127 @@ +#ifndef UPSTREAM_H +#define UPSTREAM_H + +#include <sys/types.h> +#include <stdint.h> + +/** + * Structure of generic upstream + */ +struct upstream { + guint errors; /**< Errors for this upstream */ + time_t time; /**< Time of marking */ + guint dead; /**< Dead flag */ + guint priority; /**< Fixed priority */ + gint16 weight; /**< Dynamic weight */ + guint32 *ketama_points; /**< Ketama points array */ + size_t ketama_points_size; /**< Ketama array size */ +}; + +/** + * Upstream error logic + * 1. During error time we count upstream_ok and upstream_fail + * 2. If failcount is more then maxerrors then we mark upstream as unavailable for dead time + * 3. After dead time we mark upstream as alive and go to the step 1 + * 4. If all upstreams are dead, marks every upstream as alive + */ + +/** + * Add an error to an upstream + */ +void upstream_fail (struct upstream *up, time_t now); + +/** + * Increase upstream successes count + */ +void upstream_ok (struct upstream *up, time_t now); + +/** + * Make all upstreams alive + */ +void revive_all_upstreams (void *ups, size_t members, size_t msize); + +/** + * Add ketama points for upstream + */ +gint upstream_ketama_add (struct upstream *up, gchar *up_key, size_t keylen, size_t keypoints); + +/** + * Get a random upstream from array of upstreams + * @param ups array of structures that contains struct upstream as their first element + * @param members number of elements in array + * @param msize size of each member + * @param now current time + * @param error_timeout time during which we are counting errors + * @param revive_timeout time during which we counts upstream dead + * @param max_errors maximum errors during error_timeout to mark upstream dead + */ +struct upstream* get_random_upstream (void *ups, size_t members, size_t msize, + time_t now, time_t error_timeout, + time_t revive_timeout, size_t max_errors); + +/** + * Get upstream based on hash from array of upstreams + * @param ups array of structures that contains struct upstream as their first element + * @param members number of elements in array + * @param msize size of each member + * @param now current time + * @param error_timeout time during which we are counting errors + * @param revive_timeout time during which we counts upstream dead + * @param max_errors maximum errors during error_timeout to mark upstream dead + * @param key key for hashing + * @param keylen length of the key + */ +struct upstream* get_upstream_by_hash (void *ups, size_t members, size_t msize, + time_t now, time_t error_timeout, + time_t revive_timeout, size_t max_errors, + const gchar *key, size_t keylen); + +/** + * Get an upstream from array of upstreams based on its current weight + * @param ups array of structures that contains struct upstream as their first element + * @param members number of elements in array + * @param msize size of each member + * @param now current time + * @param error_timeout time during which we are counting errors + * @param revive_timeout time during which we counts upstream dead + * @param max_errors maximum errors during error_timeout to mark upstream dead + */ +struct upstream* get_upstream_round_robin (void *ups, size_t members, size_t msize, + time_t now, time_t error_timeout, + time_t revive_timeout, size_t max_errors); + +/** + * Get upstream based on hash from array of upstreams, this functions is using ketama algorithm + * @param ups array of structures that contains struct upstream as their first element + * @param members number of elements in array + * @param msize size of each member + * @param now current time + * @param error_timeout time during which we are counting errors + * @param revive_timeout time during which we counts upstream dead + * @param max_errors maximum errors during error_timeout to mark upstream dead + * @param key key for hashing + * @param keylen length of the key + */ +struct upstream* get_upstream_by_hash_ketama (void *ups, size_t members, size_t msize, time_t now, + time_t error_timeout, time_t revive_timeout, size_t max_errors, + const gchar *key, size_t keylen); + +/** + * Get an upstream from array of upstreams based on its current priority (not weight) + * @param ups array of structures that contains struct upstream as their first element + * @param members number of elements in array + * @param msize size of each member + * @param now current time + * @param error_timeout time during which we are counting errors + * @param revive_timeout time during which we counts upstream dead + * @param max_errors maximum errors during error_timeout to mark upstream dead + */ +struct upstream* get_upstream_master_slave (void *ups, size_t members, size_t msize, + time_t now, time_t error_timeout, + time_t revive_timeout, size_t max_errors); + + +#endif /* UPSTREAM_H */ +/* + * vi:ts=4 + */ diff --git a/src/libutil/util.c b/src/libutil/util.c new file mode 100644 index 000000000..03b38e087 --- /dev/null +++ b/src/libutil/util.c @@ -0,0 +1,2275 @@ +/* + * Copyright (c) 2009-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +#include "config.h" +#include "util.h" +#include "cfg_file.h" +#include "main.h" +#include "statfile.h" +#include "filter.h" +#include "message.h" + +#ifdef HAVE_OPENSSL +#include <openssl/rand.h> +#include <openssl/err.h> +#endif + +#ifdef HAVE_TERMIOS_H +#include <termios.h> +#endif +#ifdef HAVE_READPASSPHRASE_H +#include <readpassphrase.h> +#endif + +/* Check log messages intensity once per minute */ +#define CHECK_TIME 60 +/* More than 2 log messages per second */ +#define BUF_INTENSITY 2 +/* Default connect timeout for sync sockets */ +#define CONNECT_TIMEOUT 3 + +gint +make_socket_nonblocking (gint fd) +{ + gint ofl; + + ofl = fcntl (fd, F_GETFL, 0); + + if (fcntl (fd, F_SETFL, ofl | O_NONBLOCK) == -1) { + msg_warn ("fcntl failed: %d, '%s'", errno, strerror (errno)); + return -1; + } + return 0; +} + +gint +make_socket_blocking (gint fd) +{ + gint ofl; + + ofl = fcntl (fd, F_GETFL, 0); + + if (fcntl (fd, F_SETFL, ofl & (~O_NONBLOCK)) == -1) { + msg_warn ("fcntl failed: %d, '%s'", errno, strerror (errno)); + return -1; + } + return 0; +} + +gint +poll_sync_socket (gint fd, gint timeout, short events) +{ + gint r; + struct pollfd fds[1]; + + fds->fd = fd; + fds->events = events; + fds->revents = 0; + while ((r = poll (fds, 1, timeout)) < 0) { + if (errno != EINTR) { + break; + } + } + + return r; +} + +static gint +make_inet_socket (gint type, struct addrinfo *addr, gboolean is_server, gboolean async, GList **list) +{ + gint fd, r, optlen, on = 1, s_error; + struct addrinfo *cur; + + cur = addr; + while (cur) { + /* Create socket */ + fd = socket (cur->ai_family, type, 0); + if (fd == -1) { + msg_warn ("socket failed: %d, '%s'", errno, strerror (errno)); + goto out; + } + + if (make_socket_nonblocking (fd) < 0) { + goto out; + } + + /* Set close on exec */ + if (fcntl (fd, F_SETFD, FD_CLOEXEC) == -1) { + msg_warn ("fcntl failed: %d, '%s'", errno, strerror (errno)); + goto out; + } + + if (is_server) { + setsockopt (fd, SOL_SOCKET, SO_REUSEADDR, (const void *)&on, sizeof (gint)); +#ifdef HAVE_IPV6_V6ONLY + if (cur->ai_family == AF_INET6) { + setsockopt (fd, IPPROTO_IPV6, IPV6_V6ONLY, (const void *)&on, sizeof (gint)); + } +#endif + r = bind (fd, cur->ai_addr, cur->ai_addrlen); + } + else { + r = connect (fd, cur->ai_addr, cur->ai_addrlen); + } + + if (r == -1) { + if (errno != EINPROGRESS) { + msg_warn ("bind/connect failed: %d, '%s'", errno, strerror (errno)); + goto out; + } + if (!async) { + /* Try to poll */ + if (poll_sync_socket (fd, CONNECT_TIMEOUT * 1000, POLLOUT) <= 0) { + errno = ETIMEDOUT; + msg_warn ("bind/connect failed: timeout"); + goto out; + } + else { + /* Make synced again */ + if (make_socket_blocking (fd) < 0) { + goto out; + } + } + } + } + else { + /* Still need to check SO_ERROR on socket */ + optlen = sizeof (s_error); + getsockopt (fd, SOL_SOCKET, SO_ERROR, (void *)&s_error, &optlen); + if (s_error) { + errno = s_error; + goto out; + } + } + if (list == NULL) { + /* Go out immediately */ + break; + } + else if (fd != -1) { + *list = g_list_prepend (*list, GINT_TO_POINTER (fd)); + cur = cur->ai_next; + continue; + } +out: + if (fd != -1) { + close (fd); + } + fd = -1; + cur = cur->ai_next; + } + return (fd); +} + +gint +make_tcp_socket (struct addrinfo *addr, gboolean is_server, gboolean async) +{ + return make_inet_socket (SOCK_STREAM, addr, is_server, async, NULL); +} + +gint +make_udp_socket (struct addrinfo *addr, gboolean is_server, gboolean async) +{ + return make_inet_socket (SOCK_DGRAM, addr, is_server, async, NULL); +} + +gint +make_unix_socket (const gchar *path, struct sockaddr_un *addr, gint type, gboolean is_server, gboolean async) +{ + gint fd = -1, s_error, r, optlen, serrno, on = 1; + struct stat st; + + if (path == NULL) + return -1; + + addr->sun_family = AF_UNIX; + + rspamd_strlcpy (addr->sun_path, path, sizeof (addr->sun_path)); +#ifdef FREEBSD + addr->sun_len = SUN_LEN (addr); +#endif + + if (is_server) { + /* Unlink socket if it exists already */ + if (lstat (addr->sun_path, &st) != -1) { + if (S_ISSOCK (st.st_mode)) { + if (unlink (addr->sun_path) == -1) { + msg_warn ("unlink %s failed: %d, '%s'", addr->sun_path, errno, strerror (errno)); + goto out; + } + } + else { + msg_warn ("%s is not a socket", addr->sun_path); + goto out; + } + } + } + fd = socket (PF_LOCAL, type, 0); + + if (fd == -1) { + msg_warn ("socket failed %s: %d, '%s'", addr->sun_path, errno, strerror (errno)); + return -1; + } + + if (make_socket_nonblocking (fd) < 0) { + goto out; + } + + /* Set close on exec */ + if (fcntl (fd, F_SETFD, FD_CLOEXEC) == -1) { + msg_warn ("fcntl failed %s: %d, '%s'", addr->sun_path, errno, strerror (errno)); + goto out; + } + if (is_server) { + setsockopt (fd, SOL_SOCKET, SO_REUSEADDR, (const void *)&on, sizeof (gint)); + r = bind (fd, (struct sockaddr *)addr, SUN_LEN (addr)); + } + else { + r = connect (fd, (struct sockaddr *)addr, SUN_LEN (addr)); + } + + if (r == -1) { + if (errno != EINPROGRESS) { + msg_warn ("bind/connect failed %s: %d, '%s'", addr->sun_path, errno, strerror (errno)); + goto out; + } + if (!async) { + /* Try to poll */ + if (poll_sync_socket (fd, CONNECT_TIMEOUT * 1000, POLLOUT) <= 0) { + errno = ETIMEDOUT; + msg_warn ("bind/connect failed %s: timeout", addr->sun_path); + goto out; + } + else { + /* Make synced again */ + if (make_socket_blocking (fd) < 0) { + goto out; + } + } + } + } + else { + /* Still need to check SO_ERROR on socket */ + optlen = sizeof (s_error); + getsockopt (fd, SOL_SOCKET, SO_ERROR, (void *)&s_error, &optlen); + if (s_error) { + errno = s_error; + goto out; + } + } + + + return (fd); + + out: + serrno = errno; + if (fd != -1) { + close (fd); + } + errno = serrno; + return (-1); +} + +/** + * Make a universal socket + * @param credits host, ip or path to unix socket + * @param port port (used for network sockets) + * @param async make this socket asynced + * @param is_server make this socket as server socket + * @param try_resolve try name resolution for a socket (BLOCKING) + */ +gint +make_universal_socket (const gchar *credits, guint16 port, + gint type, gboolean async, gboolean is_server, gboolean try_resolve) +{ + struct sockaddr_un un; + struct stat st; + struct addrinfo hints, *res; + gint r; + gchar portbuf[8]; + + if (*credits == '/') { + if (is_server) { + return make_unix_socket (credits, &un, type, is_server, async); + } + else { + r = stat (credits, &st); + if (r == -1) { + /* Unix socket doesn't exists it must be created first */ + errno = ENOENT; + return -1; + } + else { + if ((st.st_mode & S_IFSOCK) == 0) { + /* Path is not valid socket */ + errno = EINVAL; + return -1; + } + else { + return make_unix_socket (credits, &un, type, is_server, async); + } + } + } + } + else { + /* TCP related part */ + memset (&hints, 0, sizeof (hints)); + hints.ai_family = AF_UNSPEC; /* Allow IPv4 or IPv6 */ + hints.ai_socktype = type; /* Type of the socket */ + hints.ai_flags = is_server ? AI_PASSIVE : 0; + hints.ai_protocol = 0; /* Any protocol */ + hints.ai_canonname = NULL; + hints.ai_addr = NULL; + hints.ai_next = NULL; + + if (!try_resolve) { + hints.ai_flags |= AI_NUMERICHOST | AI_NUMERICSERV; + } + + rspamd_snprintf (portbuf, sizeof (portbuf), "%d", (int)port); + if ((r = getaddrinfo (credits, portbuf, &hints, &res)) == 0) { + r = make_inet_socket (type, res, is_server, async, NULL); + freeaddrinfo (res); + return r; + } + else { + msg_err ("address resolution for %s failed: %s", credits, gai_strerror (r)); + return FALSE; + } + } +} + +/** + * Make universal stream socket + * @param credits host, ip or path to unix socket + * @param port port (used for network sockets) + * @param async make this socket asynced + * @param is_server make this socket as server socket + * @param try_resolve try name resolution for a socket (BLOCKING) + */ +GList* +make_universal_sockets_list (const gchar *credits, guint16 port, + gint type, gboolean async, gboolean is_server, gboolean try_resolve) +{ + struct sockaddr_un un; + struct stat st; + struct addrinfo hints, *res; + gint r, fd, serrno; + gchar portbuf[8], **strv, **cur; + GList *result = NULL, *rcur; + + strv = g_strsplit_set (credits, ",", -1); + if (strv == NULL) { + msg_err ("invalid sockets credentials: %s", credits); + return NULL; + } + cur = strv; + while (*cur != NULL) { + if (*credits == '/') { + if (is_server) { + fd = make_unix_socket (credits, &un, type, is_server, async); + } + else { + r = stat (credits, &st); + if (r == -1) { + /* Unix socket doesn't exists it must be created first */ + errno = ENOENT; + goto err; + } + else { + if ((st.st_mode & S_IFSOCK) == 0) { + /* Path is not valid socket */ + errno = EINVAL; + goto err; + } + else { + fd = make_unix_socket (credits, &un, type, is_server, async); + } + } + } + if (fd != -1) { + result = g_list_prepend (result, GINT_TO_POINTER (fd)); + } + else { + goto err; + } + } + else { + /* TCP related part */ + memset (&hints, 0, sizeof (hints)); + hints.ai_family = AF_UNSPEC; /* Allow IPv4 or IPv6 */ + hints.ai_socktype = type; /* Type of the socket */ + hints.ai_flags = is_server ? AI_PASSIVE : 0; + hints.ai_protocol = 0; /* Any protocol */ + hints.ai_canonname = NULL; + hints.ai_addr = NULL; + hints.ai_next = NULL; + + if (!try_resolve) { + hints.ai_flags |= AI_NUMERICHOST | AI_NUMERICSERV; + } + + rspamd_snprintf (portbuf, sizeof (portbuf), "%d", (int)port); + if ((r = getaddrinfo (credits, portbuf, &hints, &res)) == 0) { + r = make_inet_socket (type, res, is_server, async, &result); + freeaddrinfo (res); + if (r == -1) { + goto err; + } + } + else { + msg_err ("address resolution for %s failed: %s", credits, gai_strerror (r)); + goto err; + } + } + cur ++; + } + + g_strfreev (strv); + return result; + +err: + g_strfreev (strv); + serrno = errno; + rcur = result; + while (rcur != NULL) { + fd = GPOINTER_TO_INT (rcur->data); + if (fd != -1) { + close (fd); + } + rcur = g_list_next (rcur); + } + if (result != NULL) { + g_list_free (result); + } + + errno = serrno; + return NULL; +} + +gint +make_socketpair (gint pair[2]) +{ + gint r; + + r = socketpair (AF_LOCAL, SOCK_STREAM, 0, pair); + + if (r == -1) { + msg_warn ("socketpair failed: %d, '%s'", errno, strerror (errno), pair[0], pair[1]); + return -1; + } + /* Set close on exec */ + if (fcntl (pair[0], F_SETFD, FD_CLOEXEC) == -1) { + msg_warn ("fcntl failed: %d, '%s'", errno, strerror (errno)); + goto out; + } + if (fcntl (pair[1], F_SETFD, FD_CLOEXEC) == -1) { + msg_warn ("fcntl failed: %d, '%s'", errno, strerror (errno)); + goto out; + } + + return 0; + +out: + close (pair[0]); + close (pair[1]); + return (-1); +} + +gint +write_pid (struct rspamd_main *main) +{ + pid_t pid; + + if (main->cfg->pid_file == NULL) { + return -1; + } + main->pfh = rspamd_pidfile_open (main->cfg->pid_file, 0644, &pid); + + if (main->pfh == NULL) { + return -1; + } + + if (main->is_privilleged) { + /* Force root user as owner of pid file */ +#ifdef HAVE_PIDFILE_FILENO + if (fchown (pidfile_fileno (main->pfh), 0, 0) == -1) { +#else + if (fchown (main->pfh->pf_fd, 0, 0) == -1) { +#endif + msg_err ("cannot chown of pidfile %s to 0:0 user", main->cfg->pid_file); + } + } + + rspamd_pidfile_write (main->pfh); + + return 0; +} + +#ifdef HAVE_SA_SIGINFO +void +init_signals (struct sigaction *signals, void (*sig_handler)(gint, siginfo_t *, void *)) +#else +void +init_signals (struct sigaction *signals, void (*sig_handler)(gint)) +#endif +{ + struct sigaction sigpipe_act; + /* Setting up signal handlers */ + /* SIGUSR1 - reopen config file */ + /* SIGUSR2 - worker is ready for accept */ + sigemptyset (&signals->sa_mask); + sigaddset (&signals->sa_mask, SIGTERM); + sigaddset (&signals->sa_mask, SIGINT); + sigaddset (&signals->sa_mask, SIGHUP); + sigaddset (&signals->sa_mask, SIGCHLD); + sigaddset (&signals->sa_mask, SIGUSR1); + sigaddset (&signals->sa_mask, SIGUSR2); + sigaddset (&signals->sa_mask, SIGALRM); + + +#ifdef HAVE_SA_SIGINFO + signals->sa_flags = SA_SIGINFO; + signals->sa_handler = NULL; + signals->sa_sigaction = sig_handler; +#else + signals->sa_handler = sig_handler; + signals->sa_flags = 0; +#endif + sigaction (SIGTERM, signals, NULL); + sigaction (SIGINT, signals, NULL); + sigaction (SIGHUP, signals, NULL); + sigaction (SIGCHLD, signals, NULL); + sigaction (SIGUSR1, signals, NULL); + sigaction (SIGUSR2, signals, NULL); + sigaction (SIGALRM, signals, NULL); + + /* Ignore SIGPIPE as we handle write errors manually */ + sigemptyset (&sigpipe_act.sa_mask); + sigaddset (&sigpipe_act.sa_mask, SIGPIPE); + sigpipe_act.sa_handler = SIG_IGN; + sigpipe_act.sa_flags = 0; + sigaction (SIGPIPE, &sigpipe_act, NULL); +} + +static void +pass_signal_cb (gpointer key, gpointer value, gpointer ud) +{ + struct rspamd_worker *cur = value; + gint signo = GPOINTER_TO_INT (ud); + + kill (cur->pid, signo); +} + +void +pass_signal_worker (GHashTable * workers, gint signo) +{ + g_hash_table_foreach (workers, pass_signal_cb, GINT_TO_POINTER (signo)); +} + +void +convert_to_lowercase (gchar *str, guint size) +{ + while (size--) { + *str = g_ascii_tolower (*str); + str++; + } +} + +#ifndef HAVE_SETPROCTITLE + +static gchar *title_buffer = 0; +static size_t title_buffer_size = 0; +static gchar *title_progname, *title_progname_full; + +gint +setproctitle (const gchar *fmt, ...) +{ + if (!title_buffer || !title_buffer_size) { + errno = ENOMEM; + return -1; + } + + memset (title_buffer, '\0', title_buffer_size); + + ssize_t written; + + if (fmt) { + ssize_t written2; + va_list ap; + + written = snprintf (title_buffer, title_buffer_size, "%s: ", title_progname); + if (written < 0 || (size_t) written >= title_buffer_size) + return -1; + + va_start (ap, fmt); + written2 = vsnprintf (title_buffer + written, title_buffer_size - written, fmt, ap); + va_end (ap); + if (written2 < 0 || (size_t) written2 >= title_buffer_size - written) + return -1; + } + else { + written = snprintf (title_buffer, title_buffer_size, "%s", title_progname); + if (written < 0 || (size_t) written >= title_buffer_size) + return -1; + } + + written = strlen (title_buffer); + memset (title_buffer + written, '\0', title_buffer_size - written); + + return 0; +} + +/* + It has to be _init function, because __attribute__((constructor)) + functions gets called without arguments. +*/ + +gint +init_title (gint argc, gchar *argv[], gchar *envp[]) +{ +#if defined(DARWIN) || defined(SOLARIS) + /* XXX: try to handle these OSes too */ + return 0; +#else + gchar *begin_of_buffer = 0, *end_of_buffer = 0; + gint i; + + for (i = 0; i < argc; ++i) { + if (!begin_of_buffer) + begin_of_buffer = argv[i]; + if (!end_of_buffer || end_of_buffer + 1 == argv[i]) + end_of_buffer = argv[i] + strlen (argv[i]); + } + + for (i = 0; envp[i]; ++i) { + if (!begin_of_buffer) + begin_of_buffer = envp[i]; + if (!end_of_buffer || end_of_buffer + 1 == envp[i]) + end_of_buffer = envp[i] + strlen (envp[i]); + } + + if (!end_of_buffer) + return 0; + + gchar **new_environ = g_malloc ((i + 1) * sizeof (envp[0])); + + if (!new_environ) + return 0; + + for (i = 0; envp[i]; ++i) { + if (!(new_environ[i] = g_strdup (envp[i]))) + goto cleanup_enomem; + } + new_environ[i] = 0; + + if (program_invocation_name) { + title_progname_full = g_strdup (program_invocation_name); + + if (!title_progname_full) + goto cleanup_enomem; + + gchar *p = strrchr (title_progname_full, '/'); + + if (p) + title_progname = p + 1; + else + title_progname = title_progname_full; + + program_invocation_name = title_progname_full; + program_invocation_short_name = title_progname; + } + + environ = new_environ; + title_buffer = begin_of_buffer; + title_buffer_size = end_of_buffer - begin_of_buffer; + + return 0; + + cleanup_enomem: + for (--i; i >= 0; --i) { + g_free (new_environ[i]); + } + g_free (new_environ); + return 0; +#endif +} +#endif + +#ifndef HAVE_PIDFILE +extern gchar *__progname; +static gint _rspamd_pidfile_remove (rspamd_pidfh_t *pfh, gint freeit); + +static gint +rspamd_pidfile_verify (rspamd_pidfh_t *pfh) +{ + struct stat sb; + + if (pfh == NULL || pfh->pf_fd == -1) + return (-1); + /* + * Check remembered descriptor. + */ + if (fstat (pfh->pf_fd, &sb) == -1) + return (errno); + if (sb.st_dev != pfh->pf_dev || sb.st_ino != pfh->pf_ino) + return -1; + return 0; +} + +static gint +rspamd_pidfile_read (const gchar *path, pid_t * pidptr) +{ + gchar buf[16], *endptr; + gint error, fd, i; + + fd = open (path, O_RDONLY); + if (fd == -1) + return (errno); + + i = read (fd, buf, sizeof (buf) - 1); + error = errno; /* Remember errno in case close() wants to change it. */ + close (fd); + if (i == -1) + return error; + else if (i == 0) + return EAGAIN; + buf[i] = '\0'; + + *pidptr = strtol (buf, &endptr, 10); + if (endptr != &buf[i]) + return EINVAL; + + return 0; +} + +rspamd_pidfh_t * +rspamd_pidfile_open (const gchar *path, mode_t mode, pid_t * pidptr) +{ + rspamd_pidfh_t *pfh; + struct stat sb; + gint error, fd, len, count; + struct timespec rqtp; + + pfh = g_malloc (sizeof (*pfh)); + if (pfh == NULL) + return NULL; + + if (path == NULL) + len = snprintf (pfh->pf_path, sizeof (pfh->pf_path), "/var/run/%s.pid", g_get_prgname ()); + else + len = snprintf (pfh->pf_path, sizeof (pfh->pf_path), "%s", path); + if (len >= (gint)sizeof (pfh->pf_path)) { + g_free (pfh); + errno = ENAMETOOLONG; + return NULL; + } + + /* + * Open the PID file and obtain exclusive lock. + * We truncate PID file here only to remove old PID immediatelly, + * PID file will be truncated again in pidfile_write(), so + * pidfile_write() can be called multiple times. + */ + fd = open (pfh->pf_path, O_WRONLY | O_CREAT | O_TRUNC | O_NONBLOCK, mode); + lock_file (fd, TRUE); + if (fd == -1) { + count = 0; + rqtp.tv_sec = 0; + rqtp.tv_nsec = 5000000; + if (errno == EWOULDBLOCK && pidptr != NULL) { + again: + errno = rspamd_pidfile_read (pfh->pf_path, pidptr); + if (errno == 0) + errno = EEXIST; + else if (errno == EAGAIN) { + if (++count <= 3) { + nanosleep (&rqtp, 0); + goto again; + } + } + } + g_free (pfh); + return NULL; + } + /* + * Remember file information, so in pidfile_write() we are sure we write + * to the proper descriptor. + */ + if (fstat (fd, &sb) == -1) { + error = errno; + unlink (pfh->pf_path); + close (fd); + g_free (pfh); + errno = error; + return NULL; + } + + pfh->pf_fd = fd; + pfh->pf_dev = sb.st_dev; + pfh->pf_ino = sb.st_ino; + + return pfh; +} + +gint +rspamd_pidfile_write (rspamd_pidfh_t *pfh) +{ + gchar pidstr[16]; + gint error, fd; + + /* + * Check remembered descriptor, so we don't overwrite some other + * file if pidfile was closed and descriptor reused. + */ + errno = rspamd_pidfile_verify (pfh); + if (errno != 0) { + /* + * Don't close descriptor, because we are not sure if it's ours. + */ + return -1; + } + fd = pfh->pf_fd; + + /* + * Truncate PID file, so multiple calls of pidfile_write() are allowed. + */ + if (ftruncate (fd, 0) == -1) { + error = errno; + _rspamd_pidfile_remove (pfh, 0); + errno = error; + return -1; + } + + rspamd_snprintf (pidstr, sizeof (pidstr), "%P", getpid ()); + if (pwrite (fd, pidstr, strlen (pidstr), 0) != (ssize_t) strlen (pidstr)) { + error = errno; + _rspamd_pidfile_remove (pfh, 0); + errno = error; + return -1; + } + + return 0; +} + +gint +rspamd_pidfile_close (rspamd_pidfh_t *pfh) +{ + gint error; + + error = rspamd_pidfile_verify (pfh); + if (error != 0) { + errno = error; + return -1; + } + + if (close (pfh->pf_fd) == -1) + error = errno; + g_free (pfh); + if (error != 0) { + errno = error; + return -1; + } + return 0; +} + +static gint +_rspamd_pidfile_remove (rspamd_pidfh_t *pfh, gint freeit) +{ + gint error; + + error = rspamd_pidfile_verify (pfh); + if (error != 0) { + errno = error; + return -1; + } + + if (unlink (pfh->pf_path) == -1) + error = errno; + if (!unlock_file (pfh->pf_fd, FALSE)) { + if (error == 0) + error = errno; + } + if (close (pfh->pf_fd) == -1) { + if (error == 0) + error = errno; + } + if (freeit) + g_free (pfh); + else + pfh->pf_fd = -1; + if (error != 0) { + errno = error; + return -1; + } + return 0; +} + +gint +rspamd_pidfile_remove (rspamd_pidfh_t *pfh) +{ + + return (_rspamd_pidfile_remove (pfh, 1)); +} +#endif + +/* Replace %r with rcpt value and %f with from value, new string is allocated in pool */ +gchar * +resolve_stat_filename (rspamd_mempool_t * pool, gchar *pattern, gchar *rcpt, gchar *from) +{ + gint need_to_format = 0, len = 0; + gint rcptlen, fromlen; + gchar *c = pattern, *new, *s; + + if (rcpt) { + rcptlen = strlen (rcpt); + } + else { + rcptlen = 0; + } + + if (from) { + fromlen = strlen (from); + } + else { + fromlen = 0; + } + + /* Calculate length */ + while (*c++) { + if (*c == '%' && *(c + 1) == 'r') { + len += rcptlen; + c += 2; + need_to_format = 1; + continue; + } + else if (*c == '%' && *(c + 1) == 'f') { + len += fromlen; + c += 2; + need_to_format = 1; + continue; + } + len++; + } + + /* Do not allocate extra memory if we do not need to format string */ + if (!need_to_format) { + return pattern; + } + + /* Allocate new string */ + new = rspamd_mempool_alloc (pool, len); + c = pattern; + s = new; + + /* Format string */ + while (*c++) { + if (*c == '%' && *(c + 1) == 'r') { + c += 2; + memcpy (s, rcpt, rcptlen); + s += rcptlen; + continue; + } + else if (*c == '%' && *(c + 1) == 'r') { + c += 2; + memcpy (s, from, fromlen); + s += fromlen; + continue; + } + *s++ = *c; + } + + *s = '\0'; + + return new; +} + +#ifdef HAVE_CLOCK_GETTIME +const gchar * +calculate_check_time (struct timeval *tv, struct timespec *begin, gint resolution, guint32 *scan_time) +#else +const gchar * +calculate_check_time (struct timeval *begin, gint resolution, guint32 *scan_time) +#endif +{ + double vdiff, diff; + static gchar res[64]; + static gchar fmt[sizeof ("%.10f ms real, %.10f ms virtual")]; + struct timeval tv_now; + + if (gettimeofday (&tv_now, NULL) == -1) { + msg_warn ("gettimeofday failed: %s", strerror (errno)); + } +#ifdef HAVE_CLOCK_GETTIME + struct timespec ts; + + diff = (tv_now.tv_sec - tv->tv_sec) * 1000. + /* Seconds */ + (tv_now.tv_usec - tv->tv_usec) / 1000.; /* Microseconds */ +#ifdef HAVE_CLOCK_PROCESS_CPUTIME_ID + clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &ts); +#elif defined(HAVE_CLOCK_VIRTUAL) + clock_gettime (CLOCK_VIRTUAL, &ts); +#else + clock_gettime (CLOCK_REALTIME, &ts); +#endif + + vdiff = (ts.tv_sec - begin->tv_sec) * 1000. + /* Seconds */ + (ts.tv_nsec - begin->tv_nsec) / 1000000.; /* Nanoseconds */ +#else + diff = (tv_now.tv_sec - begin->tv_sec) * 1000. + /* Seconds */ + (tv_now.tv_usec - begin->tv_usec) / 1000.; /* Microseconds */ + + vdiff = diff; +#endif + + *scan_time = diff; + + sprintf (fmt, "%%.%dfms real, %%.%dfms virtual", resolution, resolution); + snprintf (res, sizeof (res), fmt, diff, vdiff); + + return (const gchar *)res; +} + +#ifndef g_tolower +# define g_tolower(x) (((x) >= 'A' && (x) <= 'Z') ? (x) - 'A' + 'a' : (x)) +#endif + + +gboolean +rspamd_strcase_equal (gconstpointer v, gconstpointer v2) +{ + if (g_ascii_strcasecmp ((const gchar *)v, (const gchar *)v2) == 0) { + return TRUE; + } + + return FALSE; +} + + +guint +rspamd_strcase_hash (gconstpointer key) +{ + const gchar *p = key; + gchar buf[256]; + guint h = 0, i = 0; + + + while (*p != '\0') { + buf[i] = g_ascii_tolower (*p); + i++; + p++; + if (i == sizeof (buf)) { + h ^= murmur32_hash (buf, i); + i = 0; + } + } + + if (i > 0) { + h ^= murmur32_hash (buf, i); + } + + return h; +} + +guint +rspamd_str_hash (gconstpointer key) +{ + gsize len; + + len = strlen ((const gchar *)key); + + return murmur32_hash (key, len); +} + +gboolean +rspamd_str_equal (gconstpointer v, gconstpointer v2) +{ + return strcmp ((const gchar *)v, (const gchar *)v2) == 0; +} + +gboolean +fstr_strcase_equal (gconstpointer v, gconstpointer v2) +{ + const f_str_t *f1 = v, *f2 = v2; + if (f1->len == f2->len && g_ascii_strncasecmp (f1->begin, f2->begin, f1->len) == 0) { + return TRUE; + } + + return FALSE; +} + + +guint +fstr_strcase_hash (gconstpointer key) +{ + const f_str_t *f = key; + const gchar *p; + guint h = 0, i = 0; + gchar buf[256]; + + p = f->begin; + while (p - f->begin < (gint)f->len) { + buf[i] = g_ascii_tolower (*p); + i++; + p++; + if (i == sizeof (buf)) { + h ^= murmur32_hash (buf, i); + i = 0; + } + } + + if (i > 0) { + h ^= murmur32_hash (buf, i); + } + + return h; +} + +void +gperf_profiler_init (struct config_file *cfg, const gchar *descr) +{ +#if defined(WITH_GPERF_TOOLS) + gchar prof_path[PATH_MAX]; + + if (getenv ("CPUPROFILE")) { + + /* disable inherited Profiler enabled in master process */ + ProfilerStop (); + } + /* Try to create temp directory for gmon.out and chdir to it */ + if (cfg->profile_path == NULL) { + cfg->profile_path = g_strdup_printf ("%s/rspamd-profile", cfg->temp_dir); + } + + snprintf (prof_path, sizeof (prof_path), "%s-%s.%d", cfg->profile_path, descr, (gint)getpid ()); + if (ProfilerStart (prof_path)) { + /* start ITIMER_PROF timer */ + ProfilerRegisterThread (); + } + else { + msg_warn ("cannot start google perftools profiler"); + } + +#endif +} + +#ifdef HAVE_FLOCK +/* Flock version */ +gboolean +lock_file (gint fd, gboolean async) +{ + gint flags; + + if (async) { + flags = LOCK_EX | LOCK_NB; + } + else { + flags = LOCK_EX; + } + + if (flock (fd, flags) == -1) { + if (async && errno == EAGAIN) { + return FALSE; + } + msg_warn ("lock on file failed: %s", strerror (errno)); + return FALSE; + } + + return TRUE; +} + +gboolean +unlock_file (gint fd, gboolean async) +{ + gint flags; + + if (async) { + flags = LOCK_UN | LOCK_NB; + } + else { + flags = LOCK_UN; + } + + if (flock (fd, flags) == -1) { + if (async && errno == EAGAIN) { + return FALSE; + } + msg_warn ("lock on file failed: %s", strerror (errno)); + return FALSE; + } + + return TRUE; + +} +#else /* HAVE_FLOCK */ +/* Fctnl version */ +gboolean +lock_file (gint fd, gboolean async) +{ + struct flock fl = { + .l_type = F_WRLCK, + .l_whence = SEEK_SET, + .l_start = 0, + .l_len = 0 + }; + + if (fcntl (fd, async ? F_SETLK : F_SETLKW, &fl) == -1) { + if (async && (errno == EAGAIN || errno == EACCES)) { + return FALSE; + } + msg_warn ("lock on file failed: %s", strerror (errno)); + return FALSE; + } + + return TRUE; +} + +gboolean +unlock_file (gint fd, gboolean async) +{ + struct flock fl = { + .l_type = F_UNLCK, + .l_whence = SEEK_SET, + .l_start = 0, + .l_len = 0 + }; + + if (fcntl (fd, async ? F_SETLK : F_SETLKW, &fl) == -1) { + if (async && (errno == EAGAIN || errno == EACCES)) { + return FALSE; + } + msg_warn ("lock on file failed: %s", strerror (errno)); + return FALSE; + } + + return TRUE; + +} +#endif /* HAVE_FLOCK */ + + +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION < 22)) +void +g_ptr_array_unref (GPtrArray *array) +{ + g_ptr_array_free (array, TRUE); +} +#endif +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION < 14)) +void +g_queue_clear (GQueue *queue) +{ + g_return_if_fail (queue != NULL); + + g_list_free (queue->head); + queue->head = queue->tail = NULL; + queue->length = 0; +} +#endif + +gsize +rspamd_strlcpy (gchar *dst, const gchar *src, gsize siz) +{ + gchar *d = dst; + const gchar *s = src; + gsize n = siz; + + /* Copy as many bytes as will fit */ + if (n != 0) { + while (--n != 0) { + if ((*d++ = *s++) == '\0') { + break; + } + } + } + + if (n == 0 && siz != 0) { + *d = '\0'; + } + + return (s - src - 1); /* count does not include NUL */ +} + +gsize +rspamd_strlcpy_tolower (gchar *dst, const gchar *src, gsize siz) +{ + gchar *d = dst; + const gchar *s = src; + gsize n = siz; + + /* Copy as many bytes as will fit */ + if (n != 0) { + while (--n != 0) { + if ((*d++ = g_ascii_tolower (*s++)) == '\0') { + break; + } + } + } + + if (n == 0 && siz != 0) { + *d = '\0'; + } + + return (s - src - 1); /* count does not include NUL */ +} + +/* Compare two emails for building emails tree */ +gint +compare_email_func (gconstpointer a, gconstpointer b) +{ + const struct uri *u1 = a, *u2 = b; + gint r; + + if (u1->hostlen != u2->hostlen || u1->hostlen == 0) { + return u1->hostlen - u2->hostlen; + } + else { + if ((r = g_ascii_strncasecmp (u1->host, u2->host, u1->hostlen)) == 0){ + if (u1->userlen != u2->userlen || u1->userlen == 0) { + return u1->userlen - u2->userlen; + } + else { + return g_ascii_strncasecmp (u1->user, u2->user, u1->userlen); + } + } + else { + return r; + } + } + + return 0; +} + +gint +compare_url_func (gconstpointer a, gconstpointer b) +{ + const struct uri *u1 = a, *u2 = b; + int r; + + if (u1->hostlen != u2->hostlen || u1->hostlen == 0) { + return u1->hostlen - u2->hostlen; + } + else { + r = g_ascii_strncasecmp (u1->host, u2->host, u1->hostlen); + if (r == 0 && u1->is_phished != u2->is_phished) { + /* Always insert phished urls to the tree */ + return -1; + } + } + + return r; +} + +/* + * Find the first occurrence of find in s, ignore case. + */ +gchar * +rspamd_strncasestr (const gchar *s, const gchar *find, gint len) +{ + gchar c, sc; + gsize mlen; + + if ((c = *find++) != 0) { + c = g_ascii_tolower (c); + mlen = strlen (find); + do { + do { + if ((sc = *s++) == 0 || len -- == 0) + return (NULL); + } while (g_ascii_tolower (sc) != c); + } while (g_ascii_strncasecmp (s, find, mlen) != 0); + s--; + } + return ((gchar *)s); +} + +/* + * Try to convert string of length to long + */ +gboolean +rspamd_strtol (const gchar *s, gsize len, glong *value) +{ + const gchar *p = s, *end = s + len; + gchar c; + glong v = 0; + const glong cutoff = G_MAXLONG / 10, cutlim = G_MAXLONG % 10; + gboolean neg; + + /* Case negative values */ + if (*p == '-') { + neg = TRUE; + p ++; + } + else { + neg = FALSE; + } + /* Some preparations for range errors */ + + while (p < end) { + c = *p; + if (c >= '0' && c <= '9') { + c -= '0'; + if (v > cutoff || (v == cutoff && c > cutlim)) { + /* Range error */ + *value = neg ? G_MINLONG : G_MAXLONG; + return FALSE; + } + else { + v *= 10; + v += c; + } + } + else { + return FALSE; + } + p ++; + } + + *value = neg ? -(v) : v; + return TRUE; +} + +/* + * Try to convert string of length to long + */ +gboolean +rspamd_strtoul (const gchar *s, gsize len, gulong *value) +{ + const gchar *p = s, *end = s + len; + gchar c; + gulong v = 0; + const gulong cutoff = G_MAXULONG / 10, cutlim = G_MAXULONG % 10; + + /* Some preparations for range errors */ + while (p < end) { + c = *p; + if (c >= '0' && c <= '9') { + c -= '0'; + if (v > cutoff || (v == cutoff && (guint8)c > cutlim)) { + /* Range error */ + *value = G_MAXULONG; + return FALSE; + } + else { + v *= 10; + v += c; + } + } + else { + return FALSE; + } + p ++; + } + + *value = v; + return TRUE; +} + +gint +rspamd_fallocate (gint fd, off_t offset, off_t len) +{ +#if defined(HAVE_FALLOCATE) + return fallocate (fd, 0, offset, len); +#elif defined(HAVE_POSIX_FALLOCATE) + return posix_fallocate (fd, offset, len); +#else + /* Return 0 as nothing can be done on this system */ + return 0; +#endif +} + + +/** + * Create new mutex + * @return mutex or NULL + */ +inline rspamd_mutex_t* +rspamd_mutex_new (void) +{ + rspamd_mutex_t *new; + + new = g_slice_alloc (sizeof (rspamd_mutex_t)); +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30)) + g_mutex_init (&new->mtx); +#else + g_static_mutex_init (&new->mtx); +#endif + + return new; +} + +/** + * Lock mutex + * @param mtx + */ +inline void +rspamd_mutex_lock (rspamd_mutex_t *mtx) +{ +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30)) + g_mutex_lock (&mtx->mtx); +#else + g_static_mutex_lock (&mtx->mtx); +#endif +} + +/** + * Unlock mutex + * @param mtx + */ +inline void +rspamd_mutex_unlock (rspamd_mutex_t *mtx) +{ +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30)) + g_mutex_unlock (&mtx->mtx); +#else + g_static_mutex_unlock (&mtx->mtx); +#endif +} + +void +rspamd_mutex_free (rspamd_mutex_t *mtx) +{ +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30)) + g_mutex_clear (&mtx->mtx); +#endif + g_slice_free1 (sizeof (rspamd_mutex_t), mtx); +} + +/** + * Create new rwlock + * @return + */ +rspamd_rwlock_t* +rspamd_rwlock_new (void) +{ + rspamd_rwlock_t *new; + + new = g_malloc (sizeof (rspamd_rwlock_t)); +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30)) + g_rw_lock_init (&new->rwlock); +#else + g_static_rw_lock_init (&new->rwlock); +#endif + + return new; +} + +/** + * Lock rwlock for writing + * @param mtx + */ +inline void +rspamd_rwlock_writer_lock (rspamd_rwlock_t *mtx) +{ +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30)) + g_rw_lock_writer_lock (&mtx->rwlock); +#else + g_static_rw_lock_writer_lock (&mtx->rwlock); +#endif +} + +/** + * Lock rwlock for reading + * @param mtx + */ +inline void +rspamd_rwlock_reader_lock (rspamd_rwlock_t *mtx) +{ +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30)) + g_rw_lock_reader_lock (&mtx->rwlock); +#else + g_static_rw_lock_reader_lock (&mtx->rwlock); +#endif +} + +/** + * Unlock rwlock from writing + * @param mtx + */ +inline void +rspamd_rwlock_writer_unlock (rspamd_rwlock_t *mtx) +{ +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30)) + g_rw_lock_writer_unlock (&mtx->rwlock); +#else + g_static_rw_lock_writer_unlock (&mtx->rwlock); +#endif +} + +/** + * Unlock rwlock from reading + * @param mtx + */ +inline void +rspamd_rwlock_reader_unlock (rspamd_rwlock_t *mtx) +{ +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30)) + g_rw_lock_reader_unlock (&mtx->rwlock); +#else + g_static_rw_lock_reader_unlock (&mtx->rwlock); +#endif +} + +void +rspamd_rwlock_free (rspamd_rwlock_t *mtx) +{ +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30)) + g_rw_lock_clear (&mtx->rwlock); +#endif + g_slice_free1 (sizeof (rspamd_rwlock_t), mtx); +} + +struct rspamd_thread_data { + gchar *name; + gint id; + GThreadFunc func; + gpointer data; +}; + +static gpointer +rspamd_thread_func (gpointer ud) +{ + struct rspamd_thread_data *td = ud; + sigset_t s_mask; + + /* Ignore signals in thread */ + sigemptyset (&s_mask); + sigaddset (&s_mask, SIGTERM); + sigaddset (&s_mask, SIGINT); + sigaddset (&s_mask, SIGHUP); + sigaddset (&s_mask, SIGCHLD); + sigaddset (&s_mask, SIGUSR1); + sigaddset (&s_mask, SIGUSR2); + sigaddset (&s_mask, SIGALRM); + sigaddset (&s_mask, SIGPIPE); + + sigprocmask (SIG_BLOCK, &s_mask, NULL); + + ud = td->func (td->data); + g_free (td->name); + g_free (td); + + return ud; +} + +/** + * Create new named thread + * @param name name pattern + * @param func function to start + * @param data data to pass to function + * @param err error pointer + * @return new thread object that can be joined + */ +GThread* +rspamd_create_thread (const gchar *name, GThreadFunc func, gpointer data, GError **err) +{ + GThread *new; + struct rspamd_thread_data *td; + static gint32 id; + guint r; + + r = strlen (name); + td = g_malloc (sizeof (struct rspamd_thread_data)); + td->id = ++id; + td->name = g_malloc (r + sizeof ("4294967296")); + td->func = func; + td->data = data; + + rspamd_snprintf (td->name, r + sizeof ("4294967296"), "%s-%d", name, id); +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30)) + new = g_thread_try_new (td->name, rspamd_thread_func, td, err); +#else + new = g_thread_create (rspamd_thread_func, td, TRUE, err); +#endif + + return new; +} + +guint32 +murmur32_hash (const guint8 *in, gsize len) +{ + + + const guint32 c1 = 0xcc9e2d51; + const guint32 c2 = 0x1b873593; + + const int nblocks = len / 4; + const guint32 *blocks = (const guint32 *)(in); + const guint8 *tail; + guint32 h = 0; + gint i; + guint32 k; + + if (in == NULL || len == 0) { + return 0; + } + + tail = (const guint8 *)(in + (nblocks * 4)); + + for (i = 0; i < nblocks; i++) { + k = blocks[i]; + + k *= c1; + k = (k << 15) | (k >> (32 - 15)); + k *= c2; + + h ^= k; + h = (h << 13) | (h >> (32 - 13)); + h = (h * 5) + 0xe6546b64; + } + + k = 0; + switch (len & 3) { + case 3: + k ^= tail[2] << 16; + case 2: + k ^= tail[1] << 8; + case 1: + k ^= tail[0]; + k *= c1; + k = (k << 13) | (k >> (32 - 15)); + k *= c2; + h ^= k; + }; + + h ^= len; + + h ^= h >> 16; + h *= 0x85ebca6b; + h ^= h >> 13; + h *= 0xc2b2ae35; + h ^= h >> 16; + + return h; +} + +void +murmur128_hash (const guint8 *in, gsize len, guint64 out[]) +{ + const guint64 c1 = 0x87c37b91114253d5ULL; + const guint64 c2 = 0x4cf5ad432745937fULL; + const gint nblocks = len / 16; + const guint64 *blocks = (const guint64 *)(in); + const guint8 *tail; + guint64 h1 = 0; + guint64 h2 = 0; + int i; + guint64 k1, k2; + + if (in == NULL || len == 0 || out == NULL) { + return; + } + + tail = (const guint8 *)(in + (nblocks * 16)); + + for (i = 0; i < nblocks; i++) { + k1 = blocks[i*2+0]; + k2 = blocks[i*2+1]; + + k1 *= c1; + k1 = (k1 << 31) | (k1 >> (64 - 31)); + k1 *= c2; + h1 ^= k1; + + h1 = (h1 << 27) | (h1 >> (64 - 27)); + h1 += h2; + h1 = h1*5+0x52dce729; + + k2 *= c2; + k2 = (k2 << 33) | (k2 >> (64 - 33)); + k2 *= c1; + h2 ^= k2; + + h2 = (h2 << 31) | (h2 >> (64 - 31)); + h2 += h1; + h2 = h2*5+0x38495ab5; + } + + k1 = k2 = 0; + switch (len & 15) { + case 15: + k2 ^= (guint64)(tail[14]) << 48; + case 14: + k2 ^= (guint64)(tail[13]) << 40; + case 13: + k2 ^= (guint64)(tail[12]) << 32; + case 12: + k2 ^= (guint64)(tail[11]) << 24; + case 11: + k2 ^= (guint64)(tail[10]) << 16; + case 10: + k2 ^= (guint64)(tail[ 9]) << 8; + case 9: + k2 ^= (guint64)(tail[ 8]) << 0; + k2 *= c2; + k2 = (k2 << 33) | (k2 >> (64 - 33)); + k2 *= c1; + h2 ^= k2; + + case 8: + k1 ^= (guint64)(tail[ 7]) << 56; + case 7: + k1 ^= (guint64)(tail[ 6]) << 48; + case 6: + k1 ^= (guint64)(tail[ 5]) << 40; + case 5: + k1 ^= (guint64)(tail[ 4]) << 32; + case 4: + k1 ^= (guint64)(tail[ 3]) << 24; + case 3: + k1 ^= (guint64)(tail[ 2]) << 16; + case 2: + k1 ^= (guint64)(tail[ 1]) << 8; + case 1: + k1 ^= (guint64)(tail[ 0]) << 0; + k1 *= c1; + k1 = (k1 << 31) | (k1 >> (64 - 31)); + k1 *= c2; + h1 ^= k1; + }; + + //---------- + // finalization + + h1 ^= len; + h2 ^= len; + + h1 += h2; + h2 += h1; + + h1 ^= h1 >> 33; + h1 *= 0xff51afd7ed558ccdULL; + h1 ^= h1 >> 33; + h1 *= 0xc4ceb9fe1a85ec53ULL; + h1 ^= h1 >> 33; + + h2 ^= h2 >> 33; + h2 *= 0xff51afd7ed558ccdULL; + h2 ^= h2 >> 33; + h2 *= 0xc4ceb9fe1a85ec53ULL; + h2 ^= h2 >> 33; + + h1 += h2; + h2 += h1; + + out[0] = h1; + out[1] = h2; +} + +struct hash_copy_callback_data { + gpointer (*key_copy_func)(gconstpointer data, gpointer ud); + gpointer (*value_copy_func)(gconstpointer data, gpointer ud); + gpointer ud; + GHashTable *dst; +}; + +static void +copy_foreach_callback (gpointer key, gpointer value, gpointer ud) +{ + struct hash_copy_callback_data *cb = ud; + gpointer nkey, nvalue; + + nkey = cb->key_copy_func ? cb->key_copy_func (key, cb->ud) : (gpointer)key; + nvalue = cb->value_copy_func ? cb->value_copy_func (value, cb->ud) : (gpointer)value; + g_hash_table_insert (cb->dst, nkey, nvalue); +} +/** + * Deep copy of one hash table to another + * @param src source hash + * @param dst destination hash + * @param key_copy_func function called to copy or modify keys (or NULL) + * @param value_copy_func function called to copy or modify values (or NULL) + * @param ud user data for copy functions + */ +void rspamd_hash_table_copy (GHashTable *src, GHashTable *dst, + gpointer (*key_copy_func)(gconstpointer data, gpointer ud), + gpointer (*value_copy_func)(gconstpointer data, gpointer ud), + gpointer ud) +{ + struct hash_copy_callback_data cb; + if (src != NULL && dst != NULL) { + cb.key_copy_func = key_copy_func; + cb.value_copy_func = value_copy_func; + cb.ud = ud; + cb.dst = dst; + g_hash_table_foreach (src, copy_foreach_callback, &cb); + } +} + +/** + * Utility function to provide mem_pool copy for rspamd_hash_table_copy function + * @param data string to copy + * @param ud memory pool to use + * @return + */ +gpointer +rspamd_str_pool_copy (gconstpointer data, gpointer ud) +{ + rspamd_mempool_t *pool = ud; + + return data ? rspamd_mempool_strdup (pool, data) : NULL; +} + +gboolean +parse_ipmask_v4 (const char *line, struct in_addr *ina, int *mask) +{ + const char *pos; + char ip_buf[INET_ADDRSTRLEN + 1], mask_buf[3] = { '\0', '\0', '\0' }; + + bzero (ip_buf, sizeof (ip_buf)); + + if ((pos = strchr (line, '/')) != NULL) { + rspamd_strlcpy (ip_buf, line, MIN ((gsize)(pos - line), sizeof (ip_buf))); + rspamd_strlcpy (mask_buf, pos + 1, sizeof (mask_buf)); + } + else { + rspamd_strlcpy (ip_buf, line, sizeof (ip_buf)); + } + + if (!inet_aton (ip_buf, ina)) { + return FALSE; + } + + if (mask_buf[0] != '\0') { + /* Also parse mask */ + *mask = (mask_buf[0] - '0') * 10 + mask_buf[1] - '0'; + if (*mask > 32) { + return FALSE; + } + } + else { + *mask = 32; + } + + *mask = G_MAXUINT32 << (32 - *mask); + + return TRUE; +} + +static volatile sig_atomic_t saved_signo[NSIG]; + +static +void read_pass_tmp_sig_handler (int s) +{ + + saved_signo[s] = 1; +} + +#ifndef _PATH_TTY +# define _PATH_TTY "/dev/tty" +#endif + +gint +rspamd_read_passphrase (gchar *buf, gint size, gint rwflag, gpointer key) +{ +#ifdef HAVE_PASSPHRASE_H + gint len = 0; + gchar pass[BUFSIZ]; + + if (readpassphrase ("Enter passphrase: ", buf, size, RPP_ECHO_OFF | RPP_REQUIRE_TTY) == NULL) { + return 0; + } + + return strlen (buf); +#else + struct sigaction sa, savealrm, saveint, savehup, savequit, saveterm; + struct sigaction savetstp, savettin, savettou, savepipe; + struct termios term, oterm; + gint input, output, i; + gchar *end, *p, ch; + +restart: + if ((input = output = open (_PATH_TTY, O_RDWR)) == -1) { + errno = ENOTTY; + return 0; + } + if (fcntl (input, F_SETFD, FD_CLOEXEC) == -1) { + msg_warn ("fcntl failed: %d, '%s'", errno, strerror (errno)); + } + + /* Turn echo off */ + if (tcgetattr (input, &oterm) != 0) { + errno = ENOTTY; + return 0; + } + memcpy(&term, &oterm, sizeof(term)); + term.c_lflag &= ~(ECHO | ECHONL); + (void)tcsetattr(input, TCSAFLUSH, &term); + (void)write (output, "Enter passphrase: ", sizeof ("Enter passphrase: ") - 1); + + /* Save the current sighandler */ + for (i = 0; i < NSIG; i++) { + saved_signo[i] = 0; + } + sigemptyset(&sa.sa_mask); + sa.sa_flags = 0; + sa.sa_handler = read_pass_tmp_sig_handler; + (void)sigaction (SIGALRM, &sa, &savealrm); + (void)sigaction (SIGHUP, &sa, &savehup); + (void)sigaction (SIGINT, &sa, &saveint); + (void)sigaction (SIGPIPE, &sa, &savepipe); + (void)sigaction (SIGQUIT, &sa, &savequit); + (void)sigaction (SIGTERM, &sa, &saveterm); + (void)sigaction (SIGTSTP, &sa, &savetstp); + (void)sigaction (SIGTTIN, &sa, &savettin); + (void)sigaction (SIGTTOU, &sa, &savettou); + + /* Now read a passphrase */ + p = buf; + end = p + size - 1; + while (read (input, &ch, 1) == 1 && ch != '\n' && ch != '\r') { + if (p < end) { + *p++ = ch; + } + } + *p = '\0'; + (void)write (output, "\n", 1); + + /* Restore terminal state */ + if (memcmp (&term, &oterm, sizeof (term)) != 0) { + while (tcsetattr (input, TCSAFLUSH, &oterm) == -1 && + errno == EINTR && !saved_signo[SIGTTOU]); + } + + /* Restore signal handlers */ + (void)sigaction (SIGALRM, &savealrm, NULL); + (void)sigaction (SIGHUP, &savehup, NULL); + (void)sigaction (SIGINT, &saveint, NULL); + (void)sigaction (SIGQUIT, &savequit, NULL); + (void)sigaction (SIGPIPE, &savepipe, NULL); + (void)sigaction (SIGTERM, &saveterm, NULL); + (void)sigaction (SIGTSTP, &savetstp, NULL); + (void)sigaction (SIGTTIN, &savettin, NULL); + (void)sigaction (SIGTTOU, &savettou, NULL); + + close (input); + + /* Send signals pending */ + for (i = 0; i < NSIG; i++) { + if (saved_signo[i]) { + kill(getpid(), i); + switch (i) { + case SIGTSTP: + case SIGTTIN: + case SIGTTOU: + goto restart; + } + } + } + + return p - buf; +#endif +} + +gboolean +rspamd_ip_is_valid (rspamd_inet_addr_t *addr) +{ + const struct in_addr ip4_any = { INADDR_ANY }, ip4_none = { INADDR_NONE }; + const struct in6_addr ip6_any = IN6ADDR_ANY_INIT; + + gboolean ret = FALSE; + + if (G_LIKELY (addr->af == AF_INET)) { + if (memcmp (&addr->addr.s4.sin_addr, &ip4_any, sizeof (struct in_addr)) != 0 && + memcmp (&addr->addr.s4.sin_addr, &ip4_none, + sizeof (struct in_addr)) != 0) { + ret = TRUE; + } + } + else if (G_UNLIKELY (addr->af == AF_INET6)) { + if (memcmp (&addr->addr.s6.sin6_addr, &ip6_any, + sizeof (struct in6_addr)) != 0) { + ret = TRUE; + } + } + + return ret; +} + +/* + * GString ucl emitting functions + */ +static int +rspamd_gstring_append_character (unsigned char c, size_t len, void *ud) +{ + GString *buf = ud; + gsize old_len; + + if (len == 1) { + g_string_append_c (buf, c); + } + else { + if (buf->allocated_len - buf->len <= len) { + old_len = buf->len; + g_string_set_size (buf, buf->len + len + 1); + buf->len = old_len; + } + memset (&buf->str[buf->len], c, len); + buf->len += len; + } + + return 0; +} + +static int +rspamd_gstring_append_len (const unsigned char *str, size_t len, void *ud) +{ + GString *buf = ud; + + g_string_append_len (buf, str, len); + + return 0; +} + +static int +rspamd_gstring_append_int (int64_t val, void *ud) +{ + GString *buf = ud; + + rspamd_printf_gstring (buf, "%L", (intmax_t)val); + return 0; +} + +static int +rspamd_gstring_append_double (double val, void *ud) +{ + GString *buf = ud; + const double delta = 0.0000001; + + if (val == (double)(int)val) { + rspamd_printf_gstring (buf, "%.1f", val); + } + else if (fabs (val - (double)(int)val) < delta) { + /* Write at maximum precision */ + rspamd_printf_gstring (buf, "%.*g", DBL_DIG, val); + } + else { + rspamd_printf_gstring (buf, "%f", val); + } + + return 0; +} + +void +rspamd_ucl_emit_gstring (ucl_object_t *obj, enum ucl_emitter emit_type, GString *target) +{ + struct ucl_emitter_functions func = { + .ucl_emitter_append_character = rspamd_gstring_append_character, + .ucl_emitter_append_len = rspamd_gstring_append_len, + .ucl_emitter_append_int = rspamd_gstring_append_int, + .ucl_emitter_append_double = rspamd_gstring_append_double + }; + + func.ud = target; + ucl_object_emit_full (obj, emit_type, &func); +} + +gint +rspamd_accept_from_socket (gint sock, rspamd_inet_addr_t *addr) +{ + gint nfd, serrno; + socklen_t len = sizeof (addr->addr.ss); + + if ((nfd = accept (sock, &addr->addr.sa, &len)) == -1) { + if (errno == EAGAIN || errno == EINTR || errno == EWOULDBLOCK) { + return 0; + } + return -1; + } + + addr->slen = len; + addr->af = addr->addr.sa.sa_family; + + if (make_socket_nonblocking (nfd) < 0) { + goto out; + } + + /* Set close on exec */ + if (fcntl (nfd, F_SETFD, FD_CLOEXEC) == -1) { + msg_warn ("fcntl failed: %d, '%s'", errno, strerror (errno)); + goto out; + } + + return (nfd); + + out: + serrno = errno; + close (nfd); + errno = serrno; + return (-1); + +} + +gboolean +rspamd_parse_inet_address (rspamd_inet_addr_t *target, const char *src) +{ + gboolean ret = FALSE; + + if (inet_pton (AF_INET6, src, &target->addr.s6.sin6_addr) == 1) { + target->af = AF_INET6; + target->slen = sizeof (target->addr.s6); + ret = TRUE; + } + else if (inet_pton (AF_INET, src, &target->addr.s4.sin_addr) == 1) { + target->af = AF_INET; + target->slen = sizeof (target->addr.s4); + ret = TRUE; + } + + target->addr.sa.sa_family = target->af; + + return ret; +} + +const char* +rspamd_inet_address_to_string (rspamd_inet_addr_t *addr) +{ + static char addr_str[INET6_ADDRSTRLEN + 1]; + + switch (addr->af) { + case AF_INET: + return inet_ntop (addr->af, &addr->addr.s4.sin_addr, addr_str, + sizeof (addr_str)); + case AF_INET6: + return inet_ntop (addr->af, &addr->addr.s6.sin6_addr, addr_str, + sizeof (addr_str)); + case AF_UNIX: + return addr->addr.su.sun_path; + } + + return "undefined"; +} + +uint16_t +rspamd_inet_address_get_port (rspamd_inet_addr_t *addr) +{ + switch (addr->af) { + case AF_INET: + return ntohs (addr->addr.s4.sin_port); + case AF_INET6: + return ntohs (addr->addr.s6.sin6_port); + } + + return 0; +} diff --git a/src/libutil/util.h b/src/libutil/util.h new file mode 100644 index 000000000..0605fe87d --- /dev/null +++ b/src/libutil/util.h @@ -0,0 +1,491 @@ +#ifndef RSPAMD_UTIL_H +#define RSPAMD_UTIL_H + +#include "config.h" +#include "mem_pool.h" +#include "radix.h" +#include "statfile.h" +#include "printf.h" +#include "fstring.h" +#include "ucl.h" + +struct config_file; +struct rspamd_main; +struct workq; +struct statfile; +struct classifier_config; + +/** + * Union that is used for storing sockaddrs + */ +union sa_union { + struct sockaddr_storage ss; + struct sockaddr sa; + struct sockaddr_in s4; + struct sockaddr_in6 s6; + struct sockaddr_un su; +}; + +typedef struct _rspamd_inet_addr_s { + union sa_union addr; + socklen_t slen; + int af; +} rspamd_inet_addr_t; + + +/* + * Create socket and bind or connect it to specified address and port + */ +gint make_tcp_socket (struct addrinfo *, gboolean is_server, gboolean async); +/* + * Create socket and bind or connect it to specified address and port + */ +gint make_udp_socket (struct addrinfo *, gboolean is_server, gboolean async); + +/* + * Create and bind or connect unix socket + */ +gint make_unix_socket (const gchar *, struct sockaddr_un *, gint type, gboolean is_server, gboolean async); + +/** + * Make a universal socket + * @param credits host, ip or path to unix socket + * @param port port (used for network sockets) + * @param type type of socket (SO_STREAM or SO_DGRAM) + * @param async make this socket asynced + * @param is_server make this socket as server socket + * @param try_resolve try name resolution for a socket (BLOCKING) + */ +gint make_universal_socket (const gchar *credits, guint16 port, gint type, + gboolean async, gboolean is_server, gboolean try_resolve); + +/** + * Make a universal sockets + * @param credits host, ip or path to unix socket (several items may be separated by ',') + * @param port port (used for network sockets) + * @param type type of socket (SO_STREAM or SO_DGRAM) + * @param async make this socket asynced + * @param is_server make this socket as server socket + * @param try_resolve try name resolution for a socket (BLOCKING) + */ +GList* make_universal_sockets_list (const gchar *credits, guint16 port, gint type, + gboolean async, gboolean is_server, gboolean try_resolve); +/* + * Create socketpair + */ +gint make_socketpair (gint pair[2]); + +/* + * Write pid to file + */ +gint write_pid (struct rspamd_main *); + +/* + * Make specified socket non-blocking + */ +gint make_socket_nonblocking (gint); +/* + * Make specified socket blocking + */ +gint make_socket_blocking (gint); + +/* + * Poll a sync socket for specified events + */ +gint poll_sync_socket (gint fd, gint timeout, short events); + +/* + * Init signals + */ +#ifdef HAVE_SA_SIGINFO +void init_signals (struct sigaction *sa, void (*sig_handler)(gint, siginfo_t *, void *)); +#else +void init_signals (struct sigaction *sa, void (*sig_handler)(gint)); +#endif + +/* + * Send specified signal to each worker + */ +void pass_signal_worker (GHashTable *, gint ); +/* + * Convert string to lowercase + */ +void convert_to_lowercase (gchar *str, guint size); + +#ifndef HAVE_SETPROCTITLE +/* + * Process title utility functions + */ +gint init_title(gint argc, gchar *argv[], gchar *envp[]); +gint setproctitle(const gchar *fmt, ...); +#endif + +#ifndef HAVE_PIDFILE +/* + * Pidfile functions from FreeBSD libutil code + */ +typedef struct rspamd_pidfh_s { + gint pf_fd; +#ifdef HAVE_PATH_MAX + gchar pf_path[PATH_MAX + 1]; +#elif defined(HAVE_MAXPATHLEN) + gchar pf_path[MAXPATHLEN + 1]; +#else + gchar pf_path[1024 + 1]; +#endif + dev_t pf_dev; + ino_t pf_ino; +} rspamd_pidfh_t; +rspamd_pidfh_t *rspamd_pidfile_open(const gchar *path, mode_t mode, pid_t *pidptr); +gint rspamd_pidfile_write(rspamd_pidfh_t *pfh); +gint rspamd_pidfile_close(rspamd_pidfh_t *pfh); +gint rspamd_pidfile_remove(rspamd_pidfh_t *pfh); +#else +typedef struct pidfh rspamd_pidfh_t; +#define rspamd_pidfile_open pidfile_open +#define rspamd_pidfile_write pidfile_write +#define rspamd_pidfile_close pidfile_close +#define rspamd_pidfile_remove pidfile_remove +#endif + +/* + * Replace %r with rcpt value and %f with from value, new string is allocated in pool + */ +gchar* resolve_stat_filename (rspamd_mempool_t *pool, gchar *pattern, gchar *rcpt, gchar *from); +#ifdef HAVE_CLOCK_GETTIME +/* + * Calculate check time with specified resolution of timer + */ +const gchar* calculate_check_time (struct timeval *tv, struct timespec *begin, gint resolution, guint32 *scan_ms); +#else +const gchar* calculate_check_time (struct timeval *begin, gint resolution, guint32 *scan_ms); +#endif + +/* + * File locking functions + */ +gboolean lock_file (gint fd, gboolean async); +gboolean unlock_file (gint fd, gboolean async); + +/* + * Hash table utility functions for case insensitive hashing + */ +guint rspamd_strcase_hash (gconstpointer key); +gboolean rspamd_strcase_equal (gconstpointer v, gconstpointer v2); + +/* + * Hash table utility functions for case sensitive hashing + */ +guint rspamd_str_hash (gconstpointer key); +gboolean rspamd_str_equal (gconstpointer v, gconstpointer v2); + + +/* + * Hash table utility functions for hashing fixed strings + */ +guint fstr_strcase_hash (gconstpointer key); +gboolean fstr_strcase_equal (gconstpointer v, gconstpointer v2); + +/* + * Google perf-tools initialization function + */ +void gperf_profiler_init (struct config_file *cfg, const gchar *descr); + +/* + * Workarounds for older versions of glib + */ +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION < 22)) +void g_ptr_array_unref (GPtrArray *array); +#endif +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION < 14)) +void g_queue_clear (GQueue *queue); +#endif + + +/** + * Copy src to dest limited to len, in compare with standart strlcpy(3) rspamd strlcpy does not + * traverse the whole string and it is possible to use it for non NULL terminated strings. This is + * more like memccpy(dst, src, size, '\0') + * + * @param dst destination string + * @param src source string + * @param siz length of destination buffer + * @return bytes copied + */ +gsize rspamd_strlcpy (gchar *dst, const gchar *src, gsize siz); + +/** + * Lowercase strlcpy variant + * @param dst + * @param src + * @param siz + * @return + */ +gsize rspamd_strlcpy_tolower (gchar *dst, const gchar *src, gsize siz); + +/* + * Convert milliseconds to timeval fields + */ +#define msec_to_tv(msec, tv) do { (tv)->tv_sec = (msec) / 1000; (tv)->tv_usec = ((msec) - (tv)->tv_sec * 1000) * 1000; } while(0) +#define double_to_tv(dbl, tv) do { (tv)->tv_sec = (int)(dbl); (tv)->tv_usec = ((dbl) - (int)(dbl))*1000*1000; } while(0) +#define tv_to_msec(tv) (tv)->tv_sec * 1000 + (tv)->tv_usec / 1000 + +/* Compare two emails for building emails tree */ +gint compare_email_func (gconstpointer a, gconstpointer b); + +/* Compare two urls for building emails tree */ +gint compare_url_func (gconstpointer a, gconstpointer b); + +/* + * Find string find in string s ignoring case + */ +gchar* rspamd_strncasestr (const gchar *s, const gchar *find, gint len); + +/* + * Try to convert string of length to long + */ +gboolean rspamd_strtol (const gchar *s, gsize len, glong *value); + +/* + * Try to convert string of length to unsigned long + */ +gboolean rspamd_strtoul (const gchar *s, gsize len, gulong *value); + +/** + * Try to allocate a file on filesystem (using fallocate or posix_fallocate) + * @param fd descriptor + * @param offset offset of file + * @param len length to allocate + * @return -1 in case of failure + */ +gint rspamd_fallocate (gint fd, off_t offset, off_t len); + +/** + * Return worker's control structure by its type + * @param type + * @return worker's control structure or NULL + */ +extern worker_t* get_worker_by_type (GQuark type); + +/** + * Utils for working with threads to be compatible with all glib versions + */ +typedef struct rspamd_mutex_s { +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30)) + GMutex mtx; +#else + GStaticMutex mtx; +#endif +} rspamd_mutex_t; + +typedef struct rspamd_rwlock_s { +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30)) + GRWLock rwlock; +#else + GStaticRWLock rwlock; +#endif +} rspamd_rwlock_t; + + +/** + * Create new mutex + * @return mutex or NULL + */ +rspamd_mutex_t* rspamd_mutex_new (void); + +/** + * Lock mutex + * @param mtx + */ +void rspamd_mutex_lock (rspamd_mutex_t *mtx); + +/** + * Unlock mutex + * @param mtx + */ +void rspamd_mutex_unlock (rspamd_mutex_t *mtx); + +/** + * Clear rspamd mutex + * @param mtx + */ +void rspamd_mutex_free (rspamd_mutex_t *mtx); + +/** + * Create new rwloc + * @return + */ +rspamd_rwlock_t* rspamd_rwlock_new (void); + +/** + * Lock rwlock for writing + * @param mtx + */ +void rspamd_rwlock_writer_lock (rspamd_rwlock_t *mtx); + +/** + * Lock rwlock for reading + * @param mtx + */ +void rspamd_rwlock_reader_lock (rspamd_rwlock_t *mtx); + +/** + * Unlock rwlock from writing + * @param mtx + */ +void rspamd_rwlock_writer_unlock (rspamd_rwlock_t *mtx); + +/** + * Unlock rwlock from reading + * @param mtx + */ +void rspamd_rwlock_reader_unlock (rspamd_rwlock_t *mtx); + +/** + * Free rwlock + * @param mtx + */ +void rspamd_rwlock_free (rspamd_rwlock_t *mtx); + +static inline void +rspamd_cond_wait (GCond *cond, rspamd_mutex_t *mtx) +{ +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30)) + g_cond_wait (cond, &mtx->mtx); +#else + g_cond_wait (cond, g_static_mutex_get_mutex (&mtx->mtx)); +#endif +} + +/** + * Create new named thread + * @param name name pattern + * @param func function to start + * @param data data to pass to function + * @param err error pointer + * @return new thread object that can be joined + */ +GThread* rspamd_create_thread (const gchar *name, GThreadFunc func, gpointer data, GError **err); + +/** + * Return 32bit murmur hash value for specified input + * @param in input data + * @param len length of the input data + * @code + * MurmurHash3 was created by Austin Appleby in 2008. The cannonical + * implementations are in C++ and placed in the public. + * + * https://sites.google.com/site/murmurhash/ + * + * Seungyoung Kim has ported it's cannonical implementation to C language + * in 2012 and published it as a part of qLibc component. + * @endcode + * @return + */ +guint32 murmur32_hash (const guint8 *in, gsize len); + +/** + * Return 32bit murmur hash value for specified input + * @param in input data + * @param len length of the input data + * @param out array of 2 guint64 variables + * @code + * MurmurHash3 was created by Austin Appleby in 2008. The cannonical + * implementations are in C++ and placed in the public. + * + * https://sites.google.com/site/murmurhash/ + * + * Seungyoung Kim has ported it's cannonical implementation to C language + * in 2012 and published it as a part of qLibc component. + * @endcode + * @return + */ +void murmur128_hash (const guint8 *in, gsize len, guint64 out[]); + +/** + * Deep copy of one hash table to another + * @param src source hash + * @param dst destination hash + * @param key_copy_func function called to copy or modify keys (or NULL) + * @param value_copy_func function called to copy or modify values (or NULL) + * @param ud user data for copy functions + */ +void rspamd_hash_table_copy (GHashTable *src, GHashTable *dst, + gpointer (*key_copy_func)(gconstpointer data, gpointer ud), + gpointer (*value_copy_func)(gconstpointer data, gpointer ud), + gpointer ud); + +/** + * Utility function to provide mem_pool copy for rspamd_hash_table_copy function + * @param data string to copy + * @param ud memory pool to use + * @return + */ +gpointer rspamd_str_pool_copy (gconstpointer data, gpointer ud); + +/** + * Parse ipv4 address with optional mask in CIDR format + * @param line cidr notation of ipv4 address + * @param ina destination address + * @param mask destination mask + * @return + */ +gboolean parse_ipmask_v4 (const char *line, struct in_addr *ina, int *mask); + +/** + * Read passphrase from tty + * @param buf buffer to fill with a password + * @param size size of the buffer + * @param rwflag unused flag + * @param key unused key + * @return size of password read + */ +gint rspamd_read_passphrase (gchar *buf, gint size, gint rwflag, gpointer key); + +/** + * Check whether specified ip is valid (not INADDR_ANY or INADDR_NONE) for ipv4 or ipv6 + * @param ptr pointer to struct in_addr or struct in6_addr + * @param af address family (AF_INET or AF_INET6) + * @return TRUE if the address is valid + */ +gboolean rspamd_ip_is_valid (rspamd_inet_addr_t *addr); + +/** + * Emit UCL object to gstring + * @param obj object to emit + * @param emit_type emitter type + * @param target target string + */ +void rspamd_ucl_emit_gstring (ucl_object_t *obj, enum ucl_emitter emit_type, GString *target); + +/** + * Accept from listening socket filling addr structure + * @param sock listening socket + * @param addr + * @return + */ +gint rspamd_accept_from_socket (gint sock, rspamd_inet_addr_t *addr); + +/** + * Try to parse address from string + * @param target target to fill + * @param src IP string representation + * @return TRUE if addr has been parsed + */ +gboolean rspamd_parse_inet_address (rspamd_inet_addr_t *target, const char *src); + +/** + * Returns string representation of inet address + * @param addr + * @return statically allocated string pointer (not thread safe) + */ +const char* rspamd_inet_address_to_string (rspamd_inet_addr_t *addr); + +/** + * Returns port number for the specified inet address in host byte order + * @param addr + * @return + */ +uint16_t rspamd_inet_address_get_port (rspamd_inet_addr_t *addr); + +#endif |