summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--config.h.in6
-rw-r--r--contrib/metrohash/metro.h141
-rw-r--r--contrib/mumhash/mum.h372
-rw-r--r--src/controller.c86
-rw-r--r--src/fuzzy_storage.c6
-rw-r--r--src/libcryptobox/cryptobox.c103
-rw-r--r--src/libcryptobox/cryptobox.h46
-rw-r--r--src/libcryptobox/keypairs_cache.c4
-rw-r--r--src/libmime/filter.c5
-rw-r--r--src/libmime/message.c23
-rw-r--r--src/libserver/cfg_file.h3
-rw-r--r--src/libserver/cfg_rcl.c12
-rw-r--r--src/libserver/cfg_utils.c51
-rw-r--r--src/libserver/dynamic_cfg.c5
-rw-r--r--src/libserver/events.c12
-rw-r--r--src/libserver/fuzzy_backend.c46
-rw-r--r--src/libserver/fuzzy_backend.h4
-rw-r--r--src/libserver/protocol.c5
-rw-r--r--src/libserver/re_cache.c14
-rw-r--r--src/libserver/task.c26
-rw-r--r--src/libserver/url.c9
-rw-r--r--src/libstat/tokenizers/osb.c7
-rw-r--r--src/libutil/addr.c16
-rw-r--r--src/libutil/bloom.c11
-rw-r--r--src/libutil/fstring.c16
-rw-r--r--src/libutil/fstring.h8
-rw-r--r--src/libutil/logger.c10
-rw-r--r--src/libutil/map.c1005
-rw-r--r--src/libutil/map.h33
-rw-r--r--src/libutil/map_private.h66
-rw-r--r--src/libutil/shingles.c98
-rw-r--r--src/libutil/shingles.h10
-rw-r--r--src/libutil/str_util.c38
-rw-r--r--src/libutil/upstream.c5
-rw-r--r--src/libutil/util.c7
-rw-r--r--src/lua/lua_common.c14
-rw-r--r--src/lua/lua_logger.c2
-rw-r--r--src/lua/lua_map.c351
-rw-r--r--src/lua/lua_task.c5
-rw-r--r--src/plugins/dkim_check.c9
-rw-r--r--src/plugins/fuzzy_check.c142
-rw-r--r--src/plugins/spf.c4
-rw-r--r--src/plugins/surbl.c36
-rw-r--r--src/rspamd.c20
-rw-r--r--src/rspamd_proxy.c11
-rw-r--r--test/lua/unit/url.lua4
-rw-r--r--test/rspamd_shingles_test.c127
-rw-r--r--utils/rspamd_stats.pl173
48 files changed, 2412 insertions, 795 deletions
diff --git a/config.h.in b/config.h.in
index d858f65c6..f7bdedbfb 100644
--- a/config.h.in
+++ b/config.h.in
@@ -323,8 +323,14 @@ typedef off_t goffset;
# define RSPAMD_ALIGNED(x) __declspec(align(x))
#elif defined(__GNUC__)
# define RSPAMD_ALIGNED(x) __attribute__((aligned(x)))
+#ifndef __clang__
+# define RSPAMD_OPTIMIZE(x) __attribute__((__optimize__ (x)))
+#else
+# define RSPAMD_OPTIMIZE(x)
+#endif
#else
# define RSPAMD_ALIGNED(x)
+# define RSPAMD_OPTIMIZE(x)
#endif
#endif
diff --git a/contrib/metrohash/metro.h b/contrib/metrohash/metro.h
new file mode 100644
index 000000000..36c44d65c
--- /dev/null
+++ b/contrib/metrohash/metro.h
@@ -0,0 +1,141 @@
+/*-
+The MIT License (MIT)
+
+Copyright (c) 2015 J. Andrew Rogers
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+ */
+#ifndef CONTRIB_METROHASH_METRO_H_
+#define CONTRIB_METROHASH_METRO_H_
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#ifdef _MSC_VER
+typedef unsigned __int32 uint32_t;
+typedef unsigned __int64 uint64_t;
+#else
+#include <stdint.h>
+#endif
+
+/* rotate right idiom recognized by compiler*/
+inline static uint64_t rotate_right(uint64_t v, unsigned k)
+{
+ return (v >> k) | (v << (64 - k));
+}
+
+inline static uint64_t read_u64(const void * const ptr)
+{
+ return * (uint64_t *) ptr;
+}
+
+inline static uint64_t read_u32(const void * const ptr)
+{
+ return * (uint32_t *) ptr;
+}
+
+inline static uint64_t read_u16(const void * const ptr)
+{
+ return * (uint16_t *) ptr;
+}
+
+inline static uint64_t read_u8 (const void * const ptr)
+{
+ return * (uint8_t *) ptr;
+}
+
+static inline uint64_t metrohash64_1(const uint8_t * key, uint64_t len, uint64_t seed)
+{
+ static const uint64_t k0 = 0xC83A91E1;
+ static const uint64_t k1 = 0x8648DBDB;
+ static const uint64_t k2 = 0x7BDEC03B;
+ static const uint64_t k3 = 0x2F5870A5;
+
+ const uint8_t * ptr = key;
+ const uint8_t * const end = ptr + len;
+
+ uint64_t hash = ((((uint64_t) seed) + k2) * k0) + len;
+
+ if (len >= 32)
+ {
+ uint64_t v[4];
+ v[0] = hash;
+ v[1] = hash;
+ v[2] = hash;
+ v[3] = hash;
+
+ do
+ {
+ v[0] += read_u64(ptr) * k0; ptr += 8; v[0] = rotate_right(v[0],29) + v[2];
+ v[1] += read_u64(ptr) * k1; ptr += 8; v[1] = rotate_right(v[1],29) + v[3];
+ v[2] += read_u64(ptr) * k2; ptr += 8; v[2] = rotate_right(v[2],29) + v[0];
+ v[3] += read_u64(ptr) * k3; ptr += 8; v[3] = rotate_right(v[3],29) + v[1];
+ }
+ while (ptr <= (end - 32));
+
+ v[2] ^= rotate_right(((v[0] + v[3]) * k0) + v[1], 33) * k1;
+ v[3] ^= rotate_right(((v[1] + v[2]) * k1) + v[0], 33) * k0;
+ v[0] ^= rotate_right(((v[0] + v[2]) * k0) + v[3], 33) * k1;
+ v[1] ^= rotate_right(((v[1] + v[3]) * k1) + v[2], 33) * k0;
+ hash += v[0] ^ v[1];
+ }
+
+ if ((end - ptr) >= 16)
+ {
+ uint64_t v0 = hash + (read_u64(ptr) * k0); ptr += 8; v0 = rotate_right(v0,33) * k1;
+ uint64_t v1 = hash + (read_u64(ptr) * k1); ptr += 8; v1 = rotate_right(v1,33) * k2;
+ v0 ^= rotate_right(v0 * k0, 35) + v1;
+ v1 ^= rotate_right(v1 * k3, 35) + v0;
+ hash += v1;
+ }
+
+ if ((end - ptr) >= 8)
+ {
+ hash += read_u64(ptr) * k3; ptr += 8;
+ hash ^= rotate_right(hash, 33) * k1;
+
+ }
+
+ if ((end - ptr) >= 4)
+ {
+ hash += read_u32(ptr) * k3; ptr += 4;
+ hash ^= rotate_right(hash, 15) * k1;
+ }
+
+ if ((end - ptr) >= 2)
+ {
+ hash += read_u16(ptr) * k3; ptr += 2;
+ hash ^= rotate_right(hash, 13) * k1;
+ }
+
+ if ((end - ptr) >= 1)
+ {
+ hash += read_u8 (ptr) * k3;
+ hash ^= rotate_right(hash, 25) * k1;
+ }
+
+ hash ^= rotate_right(hash, 33);
+ hash *= k0;
+ hash ^= rotate_right(hash, 33);
+
+ return hash;
+}
+
+#endif /* CONTRIB_METROHASH_METRO_H_ */
diff --git a/contrib/mumhash/mum.h b/contrib/mumhash/mum.h
new file mode 100644
index 000000000..1daebf3de
--- /dev/null
+++ b/contrib/mumhash/mum.h
@@ -0,0 +1,372 @@
+/* Copyright (c) 2016 Vladimir Makarov <vmakarov@gcc.gnu.org>
+
+ Permission is hereby granted, free of charge, to any person
+ obtaining a copy of this software and associated documentation
+ files (the "Software"), to deal in the Software without
+ restriction, including without limitation the rights to use, copy,
+ modify, merge, publish, distribute, sublicense, and/or sell copies
+ of the Software, and to permit persons to whom the Software is
+ furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ SOFTWARE.
+*/
+
+/* This file implements MUM (MUltiply and Mix) hashing. We randomize
+ input data by 64x64-bit multiplication and mixing hi- and low-parts
+ of the multiplication result by using an addition and then mix it
+ into the current state. We use prime numbers randomly generated
+ with the equal probability of their bit values for the
+ multiplication. When all primes are used once, the state is
+ randomized and the same prime numbers are used again for data
+ randomization.
+
+ The MUM hashing passes all SMHasher tests. Pseudo Random Number
+ Generator based on MUM also passes NIST Statistical Test Suite for
+ Random and Pseudorandom Number Generators for Cryptographic
+ Applications (version 2.2.1) with 1000 bitstreams each containing
+ 1M bits. MUM hashing is also faster Spooky64 and City64 on small
+ strings (at least upto 512-bit) on Haswell and Power7. The MUM bulk
+ speed (speed on very long data) is bigger than Spooky and City on
+ Power7. On Haswell the bulk speed is bigger than Spooky one and
+ close to City speed. */
+
+#ifndef __MUM_HASH__
+#define __MUM_HASH__
+
+#include "config.h"
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#ifdef _MSC_VER
+typedef unsigned __int32 uint32_t;
+typedef unsigned __int64 uint64_t;
+#else
+#include <stdint.h>
+#endif
+
+#ifdef __GNUC__
+#define _MUM_ATTRIBUTE_UNUSED __attribute__((unused))
+#define _MUM_OPTIMIZE(opts) RSPAMD_OPTIMIZE(opts)
+#define _MUM_TARGET(opts)
+#else
+#define _MUM_ATTRIBUTE_UNUSED
+#define _MUM_OPTIMIZE(opts)
+#define _MUM_TARGET(opts)
+#endif
+
+/* Macro saying to use 128-bit integers implemented by GCC for some
+ targets. */
+#ifndef _MUM_USE_INT128
+/* In GCC uint128_t is defined if HOST_BITS_PER_WIDE_INT >= 64.
+ HOST_WIDE_INT is long if HOST_BITS_PER_LONG > HOST_BITS_PER_INT,
+ otherwise int. */
+#if defined(__GNUC__) && UINT_MAX != ULONG_MAX
+#define _MUM_USE_INT128 1
+#else
+#define _MUM_USE_INT128 0
+#endif
+#endif
+
+
+/* Here are different primes randomly generated with the equal
+ probability of their bit values. They are used to randomize input
+ values. */
+static uint64_t _mum_hash_step_prime = 0x2e0bb864e9ea7df5ULL;
+static uint64_t _mum_key_step_prime = 0xcdb32970830fcaa1ULL;
+static uint64_t _mum_block_start_prime = 0xc42b5e2e6480b23bULL;
+static uint64_t _mum_unroll_prime = 0x7b51ec3d22f7096fULL;
+static uint64_t _mum_tail_prime = 0xaf47d47c99b1461bULL;
+static uint64_t _mum_finish_prime1 = 0xa9a7ae7ceff79f3fULL;
+static uint64_t _mum_finish_prime2 = 0xaf47d47c99b1461bULL;
+
+static uint64_t _mum_primes [] = {
+ 0X9ebdcae10d981691, 0X32b9b9b97a27ac7d, 0X29b5584d83d35bbd, 0X4b04e0e61401255f,
+ 0X25e8f7b1f1c9d027, 0X80d4c8c000f3e881, 0Xbd1255431904b9dd, 0X8a3bd4485eee6d81,
+ 0X3bc721b2aad05197, 0X71b1a19b907d6e33, 0X525e6c1084a8534b, 0X9e4c2cd340c1299f,
+ 0Xde3add92e94caa37, 0X7e14eadb1f65311d, 0X3f5aa40f89812853, 0X33b15a3b587d15c9,
+};
+
+/* Multiply 64-bit V and P and return sum of high and low parts of the
+ result. */
+static inline uint64_t
+_mum (uint64_t v, uint64_t p) {
+ uint64_t hi, lo;
+#if _MUM_USE_INT128
+#if defined(__aarch64__)
+ /* AARCH64 needs 2 insns to calculate 128-bit result of the
+ multiplication. If we use a generic code we actually call a
+ function doing 128x128->128 bit multiplication. The function is
+ very slow. */
+ lo = v * p, hi;
+ asm ("umulh %0, %1, %2" : "=r" (hi) : "r" (v), "r" (p));
+#else
+ __uint128_t r = (__uint128_t) v * (__uint128_t) p;
+ hi = (uint64_t) (r >> 64);
+ lo = (uint64_t) r;
+#endif
+#else
+ /* Implementation of 64x64->128-bit multiplication by four 32x32->64
+ bit multiplication. */
+ uint64_t hv = v >> 32, hp = p >> 32;
+ uint64_t lv = (uint32_t) v, lp = (uint32_t) p;
+ uint64_t rh = hv * hp;
+ uint64_t rm_0 = hv * lp;
+ uint64_t rm_1 = hp * lv;
+ uint64_t rl = lv * lp;
+ uint64_t t, carry = 0;
+
+ /* We could ignore a carry bit here if we did not care about the
+ same hash for 32-bit and 64-bit targets. */
+ t = rl + (rm_0 << 32);
+#ifdef MUM_TARGET_INDEPENDENT_HASH
+ carry = t < rl;
+#endif
+ lo = t + (rm_1 << 32);
+#ifdef MUM_TARGET_INDEPENDENT_HASH
+ carry += lo < t;
+#endif
+ hi = rh + (rm_0 >> 32) + (rm_1 >> 32) + carry;
+#endif
+ /* We could use XOR here too but, for some reasons, on Haswell and
+ Power7 using an addition improves hashing performance by 10% for
+ small strings. */
+ return hi + lo;
+}
+
+#if defined(_MSC_VER)
+#define _mum_bswap_64(x) _byteswap_uint64_t (x)
+#elif defined(__APPLE__)
+#include <libkern/OSByteOrder.h>
+#define _mum_bswap_64(x) OSSwapInt64 (x)
+#elif defined(__GNUC__)
+#define _mum_bswap64(x) __builtin_bswap64 (x)
+#else
+#include <byteswap.h>
+#define _mum_bswap64(x) bswap64 (x)
+#endif
+
+static inline uint64_t
+_mum_le (uint64_t v) {
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || !defined(MUM_TARGET_INDEPENDENT_HASH)
+ return v;
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ return _mum_bswap64 (v);
+#else
+#error "Unknown endianess"
+#endif
+}
+/* Macro defining how many times the most nested loop in
+ _mum_hash_aligned will be unrolled by the compiler (although it can
+ make an own decision:). Use only a constant here to help a
+ compiler to unroll a major loop.
+
+ The macro value affects the result hash for strings > 128 bit. The
+ unroll factor greatly affects the hashing speed. We prefer the
+ speed. */
+#ifndef _MUM_UNROLL_FACTOR_POWER
+#if defined(__PPC64__) && !defined(MUM_TARGET_INDEPENDENT_HASH)
+#define _MUM_UNROLL_FACTOR_POWER 3
+#elif defined(__aarch64__) && !defined(MUM_TARGET_INDEPENDENT_HASH)
+#define _MUM_UNROLL_FACTOR_POWER 4
+#else
+#define _MUM_UNROLL_FACTOR_POWER 2
+#endif
+#endif
+
+#if _MUM_UNROLL_FACTOR_POWER < 1
+#error "too small unroll factor"
+#elif _MUM_UNROLL_FACTOR_POWER > 4
+#error "We have not enough primes for such unroll factor"
+#endif
+
+#define _MUM_UNROLL_FACTOR (1 << _MUM_UNROLL_FACTOR_POWER)
+
+static inline uint64_t _MUM_OPTIMIZE("unroll-loops")
+_mum_hash_aligned (uint64_t start, const void *key, size_t len) {
+ uint64_t result = start;
+ const unsigned char *str = (const unsigned char *) key;
+ union {uint64_t i; unsigned char s[sizeof (uint64_t)];} p;
+ int i;
+ size_t n;
+
+ result = _mum (result, _mum_block_start_prime);
+ while (len > _MUM_UNROLL_FACTOR * sizeof (uint64_t)) {
+ /* This loop could be vectorized when we have vector insns for
+ 64x64->128-bit multiplication. AVX2 currently only have a
+ vector insn for 4 32x32->64-bit multiplication. */
+ for (i = 0; i < _MUM_UNROLL_FACTOR; i++)
+ result ^= _mum (_mum_le (((uint64_t *) str)[i]), _mum_primes[i]);
+ len -= _MUM_UNROLL_FACTOR * sizeof (uint64_t);
+ str += _MUM_UNROLL_FACTOR * sizeof (uint64_t);
+ /* We will use the same prime numbers on the next iterations --
+ randomize the state. */
+ result = _mum (result, _mum_unroll_prime);
+ }
+ n = len / sizeof (uint64_t);
+ for (i = 0; i < n; i++)
+ result ^= _mum (_mum_le (((uint64_t *) str)[i]), _mum_primes[i]);
+ len -= n * sizeof (uint64_t); str += n * sizeof (uint64_t);
+ if (len) {
+ p.i = 0; memcpy (p.s, str, len);
+ result ^= _mum (_mum_le (((uint64_t *) p.s)[0]), _mum_tail_prime);
+ }
+ return result;
+}
+
+/* Final randomization of H. */
+static inline uint64_t
+_mum_final (uint64_t h) {
+ h ^= _mum (h, _mum_finish_prime1);
+ h ^= _mum (h, _mum_finish_prime2);
+ return h;
+}
+
+#if defined(__x86_64__) && defined(__GNUC__)
+#if 0
+/* We want to use AVX2 insn MULX instead of generic x86-64 MULQ where
+ it is possible. Although on modern Intel processors MULQ takes
+ 3-cycles vs. 4 for MULX, MULX permits more freedom in insn
+ scheduling as it uses less fixed registers. */
+static inline uint64_t _MUM_TARGET("arch=haswell")
+_mum_hash_avx2 (const void * key, size_t len, uint64_t seed) {
+ return _mum_final (_mum_hash_aligned (seed + len, key, len));
+}
+#endif
+#endif
+
+#ifndef _MUM_UNALIGNED_ACCESS
+#if defined(__x86_64__) || defined(__i386__) || defined(__PPC64__) \
+ || defined(__s390__) || defined(__m32c__) || defined(cris) \
+ || defined(__CR16__) || defined(__vax__) || defined(__m68k__) \
+ || defined(__aarch64__)
+#define _MUM_UNALIGNED_ACCESS 1
+#else
+#define _MUM_UNALIGNED_ACCESS 0
+#endif
+#endif
+
+/* When we need an aligned access to data being hashed we move part of
+ the unaligned data to an aligned block of given size and then
+ process it, repeating processing the data by the block. */
+#ifndef _MUM_BLOCK_LEN
+#define _MUM_BLOCK_LEN 1024
+#endif
+
+#if _MUM_BLOCK_LEN < 8
+#error "too small block length"
+#endif
+
+static inline uint64_t
+#if defined(__x86_64__)
+_MUM_TARGET("inline-all-stringops")
+#endif
+_mum_hash_default (const void *key, size_t len, uint64_t seed) {
+ uint64_t result;
+ const unsigned char *str = (const unsigned char *) key;
+ size_t block_len;
+ uint64_t buf[_MUM_BLOCK_LEN / sizeof (uint64_t)];
+
+ result = seed + len;
+ if (_MUM_UNALIGNED_ACCESS || ((size_t) str & 0x7) == 0)
+ result = _mum_hash_aligned (result, key, len);
+ else {
+ while (len != 0) {
+ block_len = len < _MUM_BLOCK_LEN ? len : _MUM_BLOCK_LEN;
+ memcpy (buf, str, block_len);
+ result = _mum_hash_aligned (result, buf, block_len);
+ len -= block_len;
+ str += block_len;
+ }
+ }
+ return _mum_final (result);
+}
+
+static inline uint64_t
+_mum_next_factor (void) {
+ uint64_t start = 0;
+ int i;
+
+ for (i = 0; i < 8; i++)
+ start = (start << 8) | rand() % 256;
+ return start;
+}
+
+/* ++++++++++++++++++++++++++ Interface functions: +++++++++++++++++++ */
+
+/* Set random multiplicators depending on SEED. */
+static inline void
+mum_hash_randomize (uint64_t seed) {
+ int i;
+
+ srand (seed);
+ _mum_hash_step_prime = _mum_next_factor ();
+ _mum_key_step_prime = _mum_next_factor ();
+ _mum_finish_prime1 = _mum_next_factor ();
+ _mum_finish_prime2 = _mum_next_factor ();
+ _mum_block_start_prime = _mum_next_factor ();
+ _mum_unroll_prime = _mum_next_factor ();
+ _mum_tail_prime = _mum_next_factor ();
+ for (i = 0; i < sizeof (_mum_primes) / sizeof (uint64_t); i++)
+ _mum_primes[i] = _mum_next_factor ();
+}
+
+/* Start hashing data with SEED. Return the state. */
+static inline uint64_t
+mum_hash_init (uint64_t seed) {
+ return seed;
+}
+
+/* Process data KEY with the state H and return the updated state. */
+static inline uint64_t
+mum_hash_step (uint64_t h, uint64_t key)
+{
+ return _mum (h, _mum_hash_step_prime) ^ _mum (key, _mum_key_step_prime);
+}
+
+/* Return the result of hashing using the current state H. */
+static inline uint64_t
+mum_hash_finish (uint64_t h) {
+ return _mum_final (h);
+}
+
+/* Fast hashing of KEY with SEED. The hash is always the same for the
+ same key on any target. */
+static inline size_t
+mum_hash64 (uint64_t key, uint64_t seed) {
+ return mum_hash_finish (mum_hash_step (mum_hash_init (seed), key));
+}
+
+/* Hash data KEY of length LEN and SEED. The hash depends on the
+ target endianess and the unroll factor. */
+static inline uint64_t
+mum_hash (const void *key, size_t len, uint64_t seed) {
+#if defined(__x86_64__) && defined(__GNUC__)
+#if 0
+ static int avx2_support = 0;
+
+ if (avx2_support > 0)
+ return _mum_hash_avx2 (key, len, seed);
+
+ else if (! avx2_support) {
+ __builtin_cpu_init ();
+ avx2_support = __builtin_cpu_supports ("avx2") ? 1 : -1;
+ if (avx2_support > 0)
+ return _mum_hash_avx2 (key, len, seed);
+ }
+#endif
+#endif
+ return _mum_hash_default (key, len, seed);
+}
+
+#endif
diff --git a/src/controller.c b/src/controller.c
index 06782e850..8b16eda19 100644
--- a/src/controller.c
+++ b/src/controller.c
@@ -145,7 +145,7 @@ struct rspamd_controller_worker_ctx {
/* SSL private key */
gchar *ssl_key;
/* A map of secure IP */
- GList *secure_ip;
+ const ucl_object_t *secure_ip;
radix_compressed_t *secure_map;
/* Static files dir */
@@ -775,10 +775,10 @@ rspamd_controller_handle_maps (struct rspamd_http_connection_entry *conn_ent,
struct rspamd_controller_session *session = conn_ent->ud;
GList *cur, *tmp = NULL;
struct rspamd_map *map;
+ struct rspamd_map_backend *bk;
gboolean editable;
ucl_object_t *obj, *top;
-
if (!rspamd_controller_check_password (conn_ent, session, msg, FALSE)) {
return 0;
}
@@ -788,8 +788,10 @@ rspamd_controller_handle_maps (struct rspamd_http_connection_entry *conn_ent,
cur = session->ctx->cfg->maps;
while (cur) {
map = cur->data;
- if (map->protocol == MAP_PROTO_FILE) {
- if (access (map->uri, R_OK) == 0) {
+ bk = g_ptr_array_index (map->backends, 0);
+
+ if (bk->protocol == MAP_PROTO_FILE) {
+ if (access (bk->uri, R_OK) == 0) {
tmp = g_list_prepend (tmp, map);
}
}
@@ -799,7 +801,8 @@ rspamd_controller_handle_maps (struct rspamd_http_connection_entry *conn_ent,
cur = tmp;
while (cur) {
map = cur->data;
- editable = (access (map->uri, W_OK) == 0);
+ bk = g_ptr_array_index (map->backends, 0);
+ editable = (access (bk->uri, W_OK) == 0);
obj = ucl_object_typed_new (UCL_OBJECT);
ucl_object_insert_key (obj, ucl_object_fromint (map->id),
@@ -808,7 +811,7 @@ rspamd_controller_handle_maps (struct rspamd_http_connection_entry *conn_ent,
ucl_object_insert_key (obj, ucl_object_fromstring (map->description),
"description", 0, false);
}
- ucl_object_insert_key (obj, ucl_object_fromstring (map->uri),
+ ucl_object_insert_key (obj, ucl_object_fromstring (bk->uri),
"uri", 0, false);
ucl_object_insert_key (obj, ucl_object_frombool (editable),
"editable", 0, false);
@@ -840,6 +843,7 @@ rspamd_controller_handle_get_map (struct rspamd_http_connection_entry *conn_ent,
struct rspamd_controller_session *session = conn_ent->ud;
GList *cur;
struct rspamd_map *map;
+ struct rspamd_map_backend *bk;
const rspamd_ftok_t *idstr;
struct stat st;
gint fd;
@@ -870,7 +874,8 @@ rspamd_controller_handle_get_map (struct rspamd_http_connection_entry *conn_ent,
cur = session->ctx->cfg->maps;
while (cur) {
map = cur->data;
- if (map->id == id && map->protocol == MAP_PROTO_FILE) {
+ bk = g_ptr_array_index (map->backends, 0);
+ if (map->id == id && bk->protocol == MAP_PROTO_FILE) {
found = TRUE;
break;
}
@@ -883,8 +888,10 @@ rspamd_controller_handle_get_map (struct rspamd_http_connection_entry *conn_ent,
return 0;
}
- if (stat (map->uri, &st) == -1 || (fd = open (map->uri, O_RDONLY)) == -1) {
- msg_err_session ("cannot open map %s: %s", map->uri, strerror (errno));
+ bk = g_ptr_array_index (map->backends, 0);
+
+ if (stat (bk->uri, &st) == -1 || (fd = open (bk->uri, O_RDONLY)) == -1) {
+ msg_err_session ("cannot open map %s: %s", bk->uri, strerror (errno));
rspamd_controller_send_error (conn_ent, 500, "500 map open error");
return 0;
}
@@ -898,7 +905,7 @@ rspamd_controller_handle_get_map (struct rspamd_http_connection_entry *conn_ent,
if (read (fd, reply->body->str, st.st_size) == -1) {
close (fd);
rspamd_http_message_free (reply);
- msg_err_session ("cannot read map %s: %s", map->uri, strerror (errno));
+ msg_err_session ("cannot read map %s: %s", bk->uri, strerror (errno));
rspamd_controller_send_error (conn_ent, 500, "500 map read error");
return 0;
}
@@ -1800,6 +1807,7 @@ rspamd_controller_handle_savemap (struct rspamd_http_connection_entry *conn_ent,
struct rspamd_controller_session *session = conn_ent->ud;
GList *cur;
struct rspamd_map *map;
+ struct rspamd_map_backend *bk;
struct rspamd_controller_worker_ctx *ctx;
const rspamd_ftok_t *idstr;
gulong id;
@@ -1838,7 +1846,8 @@ rspamd_controller_handle_savemap (struct rspamd_http_connection_entry *conn_ent,
cur = ctx->cfg->maps;
while (cur) {
map = cur->data;
- if (map->id == id && map->protocol == MAP_PROTO_FILE) {
+ bk = g_ptr_array_index (map->backends, 0);
+ if (map->id == id && bk->protocol == MAP_PROTO_FILE) {
found = TRUE;
break;
}
@@ -1851,24 +1860,24 @@ rspamd_controller_handle_savemap (struct rspamd_http_connection_entry *conn_ent,
return 0;
}
- if (g_atomic_int_get (map->locked)) {
- msg_info_session ("map locked: %s", map->uri);
+ bk = g_ptr_array_index (map->backends, 0);
+ if (g_atomic_int_compare_and_exchange (map->locked, 0, 1)) {
+ msg_info_session ("map locked: %s", bk->uri);
rspamd_controller_send_error (conn_ent, 404, "Map is locked");
return 0;
}
/* Set lock */
- g_atomic_int_set (map->locked, 1);
- fd = open (map->uri, O_WRONLY | O_TRUNC);
+ fd = open (bk->uri, O_WRONLY | O_TRUNC);
if (fd == -1) {
g_atomic_int_set (map->locked, 0);
- msg_info_session ("map %s open error: %s", map->uri, strerror (errno));
+ msg_info_session ("map %s open error: %s", bk->uri, strerror (errno));
rspamd_controller_send_error (conn_ent, 404, "Map id not found");
return 0;
}
if (write (fd, msg->body_buf.begin, msg->body_buf.len) == -1) {
- msg_info_session ("map %s write error: %s", map->uri, strerror (errno));
+ msg_info_session ("map %s write error: %s", bk->uri, strerror (errno));
close (fd);
g_atomic_int_set (map->locked, 0);
rspamd_controller_send_error (conn_ent, 500, "Map write error");
@@ -1877,7 +1886,7 @@ rspamd_controller_handle_savemap (struct rspamd_http_connection_entry *conn_ent,
msg_info_session ("<%s>, map %s saved",
rspamd_inet_address_to_string (session->from_addr),
- map->uri);
+ bk->uri);
/* Close and unlock */
close (fd);
g_atomic_int_set (map->locked, 0);
@@ -2481,7 +2490,7 @@ init_controller_worker (struct rspamd_config *cfg)
rspamd_rcl_register_worker_option (cfg,
type,
"secure_ip",
- rspamd_rcl_parse_struct_string_list,
+ rspamd_rcl_parse_struct_ucl,
ctx,
G_STRUCT_OFFSET (struct rspamd_controller_worker_ctx, secure_ip),
0,
@@ -2490,7 +2499,7 @@ init_controller_worker (struct rspamd_config *cfg)
rspamd_rcl_register_worker_option (cfg,
type,
"trusted_ips",
- rspamd_rcl_parse_struct_string_list,
+ rspamd_rcl_parse_struct_ucl,
ctx,
G_STRUCT_OFFSET (struct rspamd_controller_worker_ctx, secure_ip),
0,
@@ -2536,12 +2545,12 @@ void
start_controller_worker (struct rspamd_worker *worker)
{
struct rspamd_controller_worker_ctx *ctx = worker->ctx;
- GList *cur;
struct module_ctx *mctx;
GHashTableIter iter;
gpointer key, value;
struct rspamd_keypair_cache *cache;
- gchar *secure_ip;
+ const ucl_object_t *cur;
+ ucl_object_iter_t it = NULL;
gpointer m;
ctx->ev_base = rspamd_prepare_worker (worker,
@@ -2556,26 +2565,31 @@ start_controller_worker (struct rspamd_worker *worker)
ctx->custom_commands = g_hash_table_new (rspamd_strcase_hash,
rspamd_strcase_equal);
if (ctx->secure_ip != NULL) {
- cur = ctx->secure_ip;
-
- while (cur) {
- secure_ip = cur->data;
- /* Try map syntax */
- if (!rspamd_map_is_map (secure_ip)) {
- if (!radix_add_generic_iplist (secure_ip,
- &ctx->secure_map)) {
- msg_warn_ctx ("cannot load or parse ip list from '%s'",
- secure_ip);
+ if (ucl_object_type (ctx->secure_ip) == UCL_ARRAY) {
+ while ((cur = ucl_object_iterate (ctx->secure_ip, &it, true)) != NULL) {
+ /* Try map syntax */
+ if (ucl_object_type (cur) == UCL_STRING &&
+ !rspamd_map_is_map (ucl_object_tostring (cur))) {
+ if (!radix_add_generic_iplist (ucl_object_tostring (cur),
+ &ctx->secure_map)) {
+ msg_warn_ctx ("cannot load or parse ip list from '%s'",
+ ucl_object_tostring (cur));
+ }
+ }
+ else {
+ rspamd_map_add_from_ucl (worker->srv->cfg, cur,
+ "Allow webui access from the specified IP",
+ rspamd_radix_read, rspamd_radix_fin,
+ (void **)&ctx->secure_map);
}
}
- else {
- rspamd_map_add (worker->srv->cfg, secure_ip,
+ }
+ else {
+ rspamd_map_add_from_ucl (worker->srv->cfg, ctx->secure_ip,
"Allow webui access from the specified IP",
rspamd_radix_read, rspamd_radix_fin,
(void **)&ctx->secure_map);
- }
- cur = g_list_next (cur);
}
}
diff --git a/src/fuzzy_storage.c b/src/fuzzy_storage.c
index f43cb3f44..7803307b0 100644
--- a/src/fuzzy_storage.c
+++ b/src/fuzzy_storage.c
@@ -209,6 +209,7 @@ rspamd_fuzzy_process_updates_queue (struct rspamd_fuzzy_storage_ctx *ctx)
struct rspamd_fuzzy_cmd *cmd;
gpointer ptr;
guint nupdates = 0;
+ time_t now = time (NULL);
if (ctx->updates_pending &&
g_queue_get_length (ctx->updates_pending) > 0 &&
@@ -227,7 +228,7 @@ rspamd_fuzzy_process_updates_queue (struct rspamd_fuzzy_storage_ctx *ctx)
}
if (cmd->cmd == FUZZY_WRITE) {
- rspamd_fuzzy_backend_add (ctx->backend, ptr);
+ rspamd_fuzzy_backend_add (ctx->backend, ptr, now);
}
else {
rspamd_fuzzy_backend_del (ctx->backend, ptr);
@@ -248,7 +249,8 @@ rspamd_fuzzy_process_updates_queue (struct rspamd_fuzzy_storage_ctx *ctx)
}
g_queue_clear (ctx->updates_pending);
- msg_info ("updated fuzzy storage: %ud updates processed", nupdates);
+ msg_info ("updated fuzzy storage: %ud updates processed, version: %d",
+ nupdates, rspamd_fuzzy_backend_version (ctx->backend));
}
else {
msg_err ("cannot commit update transaction to fuzzy backend, "
diff --git a/src/libcryptobox/cryptobox.c b/src/libcryptobox/cryptobox.c
index 1680f1848..71180da12 100644
--- a/src/libcryptobox/cryptobox.c
+++ b/src/libcryptobox/cryptobox.c
@@ -31,7 +31,10 @@
#include "catena/catena.h"
#include "ottery.h"
#include "printf.h"
-
+#include "xxhash.h"
+#define MUM_TARGET_INDEPENDENT_HASH 1 /* For 32/64 bit equal hashes */
+#include "../../contrib/mumhash/mum.h"
+#include "../../contrib/metrohash/metro.h"
#ifdef HAVE_CPUID_H
#include <cpuid.h>
#endif
@@ -1408,3 +1411,101 @@ void rspamd_cryptobox_hash (guchar *out,
rspamd_cryptobox_hash_update (&st, data, len);
rspamd_cryptobox_hash_final (&st, out);
}
+
+
+void
+rspamd_cryptobox_fast_hash_init (rspamd_cryptobox_fast_hash_state_t *st,
+ guint64 seed)
+{
+#if defined(__LP64__) || defined(_LP64)
+ XXH64_state_t *rst = (XXH64_state_t *)st;
+ XXH64_reset (rst, seed);
+#else
+ XXH32_state_t *rst = (XXH32_state_t *)st;
+ XXH32_reset (rst, seed);
+#endif
+}
+
+void
+rspamd_cryptobox_fast_hash_update (rspamd_cryptobox_fast_hash_state_t *st,
+ const void *data, gsize len)
+{
+#if defined(__LP64__) || defined(_LP64)
+ XXH64_state_t *rst = (XXH64_state_t *)st;
+ XXH64_update (rst, data, len);
+#else
+ XXH32_state_t *rst = (XXH32_state_t *)st;
+ XXH32_update (rst, data, len);
+#endif
+}
+
+guint64
+rspamd_cryptobox_fast_hash_final (rspamd_cryptobox_fast_hash_state_t *st)
+{
+#if defined(__LP64__) || defined(_LP64)
+ XXH64_state_t *rst = (XXH64_state_t *)st;
+ return XXH64_digest (rst);
+#else
+ XXH32_state_t *rst = (XXH32_state_t *)st;
+ XXH32_digest (rst);
+#endif
+
+}
+
+/**
+ * One in all function
+ */
+static inline guint64
+rspamd_cryptobox_fast_hash_machdep (const void *data,
+ gsize len, guint64 seed)
+{
+ if (len >= 8 && len % 8 == 0) {
+ return mum_hash (data, len, seed);
+ }
+ else {
+#if defined(__LP64__) || defined(_LP64)
+ return metrohash64_1 (data, len, seed);
+#endif
+ }
+
+ return XXH32 (data, len, seed);
+}
+
+static inline guint64
+rspamd_cryptobox_fast_hash_indep (const void *data,
+ gsize len, guint64 seed)
+{
+ if (len >= 8 && len % 8 == 0) {
+ return mum_hash (data, len, seed);
+ }
+
+ return metrohash64_1 (data, len, seed);
+}
+
+guint64
+rspamd_cryptobox_fast_hash (const void *data,
+ gsize len, guint64 seed)
+{
+ return rspamd_cryptobox_fast_hash_machdep (data, len, seed);
+}
+
+guint64
+rspamd_cryptobox_fast_hash_specific (
+ enum rspamd_cryptobox_fast_hash_type type,
+ const void *data,
+ gsize len, guint64 seed)
+{
+ switch (type) {
+ case RSPAMD_CRYPTOBOX_XXHASH32:
+ return XXH32 (data, len, seed);
+ case RSPAMD_CRYPTOBOX_XXHASH64:
+ return XXH64 (data, len, seed);
+ case RSPAMD_CRYPTOBOX_MUMHASH:
+ return mum_hash (data, len, seed);
+ case RSPAMD_CRYPTOBOX_HASHFAST_INDEPENDENT:
+ return rspamd_cryptobox_fast_hash_indep (data, len, seed);
+ case RSPAMD_CRYPTOBOX_HASHFAST:
+ default:
+ return rspamd_cryptobox_fast_hash_machdep (data, len, seed);
+ }
+}
diff --git a/src/libcryptobox/cryptobox.h b/src/libcryptobox/cryptobox.h
index 9631bd8d0..6facf0a0e 100644
--- a/src/libcryptobox/cryptobox.h
+++ b/src/libcryptobox/cryptobox.h
@@ -324,4 +324,50 @@ void rspamd_cryptobox_hash (guchar *out,
const guchar *key,
gsize keylen);
+/* Non crypto hash IUF interface */
+typedef struct RSPAMD_ALIGNED(32) rspamd_cryptobox_fast_hash_state_s {
+ unsigned char opaque[88];
+} rspamd_cryptobox_fast_hash_state_t;
+
+/**
+ * Init cryptobox hash state using key if needed, `st` must point to the buffer
+ * with at least rspamd_cryptobox_HASHSTATEBYTES bytes length. If keylen == 0, then
+ * non-keyed hash is generated
+ */
+void rspamd_cryptobox_fast_hash_init (rspamd_cryptobox_fast_hash_state_t *st,
+ guint64 seed);
+
+/**
+ * Update hash with data portion
+ */
+void rspamd_cryptobox_fast_hash_update (rspamd_cryptobox_fast_hash_state_t *st,
+ const void *data, gsize len);
+
+/**
+ * Output hash to the buffer of rspamd_cryptobox_HASHBYTES length
+ */
+guint64 rspamd_cryptobox_fast_hash_final (rspamd_cryptobox_fast_hash_state_t *st);
+
+/**
+ * One in all function
+ */
+guint64 rspamd_cryptobox_fast_hash (const void *data,
+ gsize len, guint64 seed);
+
+enum rspamd_cryptobox_fast_hash_type {
+ RSPAMD_CRYPTOBOX_XXHASH64 = 0,
+ RSPAMD_CRYPTOBOX_XXHASH32,
+ RSPAMD_CRYPTOBOX_MUMHASH,
+ RSPAMD_CRYPTOBOX_METROHASH,
+ RSPAMD_CRYPTOBOX_HASHFAST,
+ RSPAMD_CRYPTOBOX_HASHFAST_INDEPENDENT
+};
+/**
+ * Platform independent version
+ */
+guint64 rspamd_cryptobox_fast_hash_specific (
+ enum rspamd_cryptobox_fast_hash_type type,
+ const void *data,
+ gsize len, guint64 seed);
+
#endif /* CRYPTOBOX_H_ */
diff --git a/src/libcryptobox/keypairs_cache.c b/src/libcryptobox/keypairs_cache.c
index 069158789..887aaa85d 100644
--- a/src/libcryptobox/keypairs_cache.c
+++ b/src/libcryptobox/keypairs_cache.c
@@ -18,7 +18,6 @@
#include "keypairs_cache.h"
#include "keypair_private.h"
#include "hash.h"
-#include "xxhash.h"
struct rspamd_keypair_elt {
struct rspamd_cryptobox_nm *nm;
@@ -43,7 +42,8 @@ rspamd_keypair_hash (gconstpointer ptr)
{
struct rspamd_keypair_elt *elt = (struct rspamd_keypair_elt *)ptr;
- return XXH64 (elt->pair, sizeof (elt->pair), rspamd_hash_seed ());
+ return rspamd_cryptobox_fast_hash (elt->pair, sizeof (elt->pair),
+ rspamd_hash_seed ());
}
static gboolean
diff --git a/src/libmime/filter.c b/src/libmime/filter.c
index e1a33f3e2..6aaa19aaf 100644
--- a/src/libmime/filter.c
+++ b/src/libmime/filter.c
@@ -19,7 +19,7 @@
#include "rspamd.h"
#include "message.h"
#include "lua/lua_common.h"
-#include "xxhash.h"
+#include "cryptobox.h"
#include <math.h>
@@ -273,7 +273,8 @@ rspamd_action_from_str (const gchar *data, gint *result)
{
guint64 h;
- h = XXH64 (data, strlen (data), 0xdeadbabe);
+ h = rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_XXHASH64,
+ data, strlen (data), 0xdeadbabe);
switch (h) {
case 0x9917BFDB46332B8CULL: /* reject */
diff --git a/src/libmime/message.c b/src/libmime/message.c
index 6c4004f61..791bd6837 100644
--- a/src/libmime/message.c
+++ b/src/libmime/message.c
@@ -24,7 +24,7 @@
#include "email_addr.h"
#include "utlist.h"
#include "tokenizers/tokenizers.h"
-#include "xxhash.h"
+#include "cryptobox.h"
#ifdef WITH_SNOWBALL
#include "libstemmer.h"
@@ -42,6 +42,7 @@
static const gchar gtube_pattern[] = "XJS*C4JDBQADN1.NSBN3*2IDNEN*"
"GTUBE-STANDARD-ANTI-UBE-TEST-EMAIL*C.34X";
static rspamd_regexp_t *utf_compatible_re = NULL;
+static const guint64 words_hash_seed = 0xdeadbabe;
static GQuark
rspamd_message_quark (void)
@@ -1074,7 +1075,12 @@ rspamd_normalize_text_part (struct rspamd_task *task,
}
if (w->len > 0) {
- h = XXH64 (w->begin, w->len, rspamd_hash_seed ());
+ /*
+ * We use static hash seed if we would want to use that in shingles
+ * computation in future
+ */
+ h = rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_HASHFAST_INDEPENDENT,
+ w->begin, w->len, words_hash_seed);
g_array_append_val (part->normalized_hashes, h);
}
}
@@ -1094,7 +1100,7 @@ rspamd_words_levenshtein_distance (struct rspamd_task *task,
{
guint s1len, s2len, x, y, lastdiag, olddiag;
guint *column, ret;
- guint64 *h1, *h2;
+ guint64 h1, h2;
gint eq;
static const guint max_words = 8192;
@@ -1118,9 +1124,9 @@ rspamd_words_levenshtein_distance (struct rspamd_task *task,
for (y = 1, lastdiag = x - 1; y <= s1len; y++) {
olddiag = column[y];
- h1 = &g_array_index (w1, guint64, y - 1);
- h2 = &g_array_index (w2, guint64, x - 1);
- eq = h1 == h2;
+ h1 = g_array_index (w1, guint64, y - 1);
+ h2 = g_array_index (w2, guint64, x - 1);
+ eq = (h1 == h2) ? 1 : 0;
/*
* Cost of replacement is twice higher than cost of add/delete
* to calculate percentage properly
@@ -1262,7 +1268,6 @@ process_text_part (struct rspamd_task *task,
type,
text_part);
text_part->orig = part_content;
- rspamd_url_text_extract (task->task_pool, task, text_part, FALSE);
g_ptr_array_add (task->text_parts, text_part);
}
else {
@@ -1304,6 +1309,10 @@ process_text_part (struct rspamd_task *task,
c = p + 1;
}
}
+
+ if (!IS_PART_HTML (text_part)) {
+ rspamd_url_text_extract (task->task_pool, task, text_part, FALSE);
+ }
}
struct mime_foreach_data {
diff --git a/src/libserver/cfg_file.h b/src/libserver/cfg_file.h
index cd6d25683..e14fbd90a 100644
--- a/src/libserver/cfg_file.h
+++ b/src/libserver/cfg_file.h
@@ -277,7 +277,7 @@ struct rspamd_config {
gchar *pid_file; /**< name of pid file */
gchar *temp_dir; /**< dir for temp files */
gchar *control_socket_path; /**< path to the control socket */
- gchar *local_addrs; /**< tree of local addresses */
+ const ucl_object_t *local_addrs; /**< tree of local addresses */
#ifdef WITH_GPERF_TOOLS
gchar *profile_path;
#endif
@@ -353,7 +353,6 @@ struct rspamd_config {
gint clock_res; /**< resolution of clock used */
GList *maps; /**< maps active */
- rspamd_mempool_t *map_pool; /**< static maps pool */
gdouble map_timeout; /**< maps watch timeout */
struct symbols_cache *cache; /**< symbols cache object */
diff --git a/src/libserver/cfg_rcl.c b/src/libserver/cfg_rcl.c
index b90c1116b..f774ac126 100644
--- a/src/libserver/cfg_rcl.c
+++ b/src/libserver/cfg_rcl.c
@@ -1894,7 +1894,7 @@ rspamd_rcl_config_init (struct rspamd_config *cfg)
"Limit of files count in `cores_dir`");
rspamd_rcl_add_default_handler (sub,
"local_addrs",
- rspamd_rcl_parse_struct_string,
+ rspamd_rcl_parse_struct_ucl,
G_STRUCT_OFFSET (struct rspamd_config, local_addrs),
0,
"Use the specified addresses as local ones");
@@ -2976,13 +2976,13 @@ static guint
rspamd_worker_param_key_hash (gconstpointer p)
{
const struct rspamd_worker_param_key *k = p;
- XXH64_state_t st;
+ rspamd_cryptobox_fast_hash_state_t st;
- XXH64_reset (&st, rspamd_hash_seed ());
- XXH64_update (&st, k->name, strlen (k->name));
- XXH64_update (&st, &k->ptr, sizeof (gpointer));
+ rspamd_cryptobox_fast_hash_init (&st, rspamd_hash_seed ());
+ rspamd_cryptobox_fast_hash_update (&st, k->name, strlen (k->name));
+ rspamd_cryptobox_fast_hash_update (&st, &k->ptr, sizeof (gpointer));
- return XXH64_digest (&st);
+ return rspamd_cryptobox_fast_hash_final (&st);
}
static gboolean
diff --git a/src/libserver/cfg_utils.c b/src/libserver/cfg_utils.c
index 8cc54c792..85fd6af80 100644
--- a/src/libserver/cfg_utils.c
+++ b/src/libserver/cfg_utils.c
@@ -42,13 +42,11 @@ struct rspamd_ucl_map_cbdata {
struct rspamd_config *cfg;
GString *buf;
};
-static gchar * rspamd_ucl_read_cb (rspamd_mempool_t * pool,
- gchar * chunk,
+static gchar * rspamd_ucl_read_cb (gchar * chunk,
gint len,
struct map_cb_data *data,
gboolean final);
-static void rspamd_ucl_fin_cb (rspamd_mempool_t * pool,
- struct map_cb_data *data);
+static void rspamd_ucl_fin_cb (struct map_cb_data *data);
gboolean
rspamd_parse_bind_line (struct rspamd_config *cfg,
@@ -1135,8 +1133,7 @@ rspamd_config_check_statfiles (struct rspamd_classifier_config *cf)
}
static gchar *
-rspamd_ucl_read_cb (rspamd_mempool_t * pool,
- gchar * chunk,
+rspamd_ucl_read_cb (gchar * chunk,
gint len,
struct map_cb_data *data,
gboolean final)
@@ -1157,7 +1154,7 @@ rspamd_ucl_read_cb (rspamd_mempool_t * pool,
}
static void
-rspamd_ucl_fin_cb (rspamd_mempool_t * pool, struct map_cb_data *data)
+rspamd_ucl_fin_cb (struct map_cb_data *data)
{
struct rspamd_ucl_map_cbdata *cbdata = data->cur_data, *prev =
data->prev_data;
@@ -1180,34 +1177,26 @@ rspamd_ucl_fin_cb (rspamd_mempool_t * pool, struct map_cb_data *data)
return;
}
- checksum = XXH64 (cbdata->buf->str, cbdata->buf->len, 0);
- if (data->map->checksum != checksum) {
- /* New data available */
- parser = ucl_parser_new (0);
- if (!ucl_parser_add_chunk (parser, cbdata->buf->str,
+ checksum = rspamd_cryptobox_fast_hash (cbdata->buf->str, cbdata->buf->len, 0);
+ /* New data available */
+ parser = ucl_parser_new (0);
+ if (!ucl_parser_add_chunk (parser, cbdata->buf->str,
cbdata->buf->len)) {
- msg_err_config ("cannot parse map %s: %s",
- data->map->uri,
+ msg_err_config ("cannot parse map %s: %s",
+ data->map->name,
ucl_parser_get_error (parser));
- ucl_parser_free (parser);
- }
- else {
- obj = ucl_parser_get_object (parser);
- ucl_parser_free (parser);
- it = NULL;
-
- while ((cur = ucl_object_iterate (obj, &it, true))) {
- ucl_object_replace_key (cbdata->cfg->rcl_obj, (ucl_object_t *)cur,
- cur->key, cur->keylen, false);
- }
- ucl_object_unref (obj);
- data->map->checksum = checksum;
- }
+ ucl_parser_free (parser);
}
else {
- msg_info_config ("do not reload map %s, checksum is the same: %d",
- data->map->uri,
- checksum);
+ obj = ucl_parser_get_object (parser);
+ ucl_parser_free (parser);
+ it = NULL;
+
+ while ((cur = ucl_object_iterate (obj, &it, true))) {
+ ucl_object_replace_key (cbdata->cfg->rcl_obj, (ucl_object_t *)cur,
+ cur->key, cur->keylen, false);
+ }
+ ucl_object_unref (obj);
}
}
diff --git a/src/libserver/dynamic_cfg.c b/src/libserver/dynamic_cfg.c
index 3a76f20f9..d31418588 100644
--- a/src/libserver/dynamic_cfg.c
+++ b/src/libserver/dynamic_cfg.c
@@ -134,8 +134,7 @@ apply_dynamic_conf (const ucl_object_t *top, struct rspamd_config *cfg)
/* Callbacks for reading json dynamic rules */
static gchar *
-json_config_read_cb (rspamd_mempool_t * pool,
- gchar * chunk,
+json_config_read_cb (gchar * chunk,
gint len,
struct map_cb_data *data,
gboolean final)
@@ -167,7 +166,7 @@ json_config_read_cb (rspamd_mempool_t * pool,
}
static void
-json_config_fin_cb (rspamd_mempool_t * pool, struct map_cb_data *data)
+json_config_fin_cb (struct map_cb_data *data)
{
struct config_json_buf *jb;
ucl_object_t *top;
diff --git a/src/libserver/events.c b/src/libserver/events.c
index a8cb086cb..44a5d9191 100644
--- a/src/libserver/events.c
+++ b/src/libserver/events.c
@@ -16,7 +16,7 @@
#include "config.h"
#include "rspamd.h"
#include "events.h"
-#include "xxhash.h"
+#include "cryptobox.h"
#define RSPAMD_SESSION_FLAG_WATCHING (1 << 0)
#define RSPAMD_SESSION_FLAG_DESTROYING (1 << 1)
@@ -81,7 +81,7 @@ static guint
rspamd_event_hash (gconstpointer a)
{
const struct rspamd_async_event *ev = a;
- XXH64_state_t st;
+ rspamd_cryptobox_fast_hash_state_t st;
union {
event_finalizer_t f;
gpointer p;
@@ -89,11 +89,11 @@ rspamd_event_hash (gconstpointer a)
u.f = ev->fin;
- XXH64_reset (&st, rspamd_hash_seed ());
- XXH64_update (&st, &ev->user_data, sizeof (gpointer));
- XXH64_update (&st, &u, sizeof (u));
+ rspamd_cryptobox_fast_hash_init (&st, rspamd_hash_seed ());
+ rspamd_cryptobox_fast_hash_update (&st, &ev->user_data, sizeof (gpointer));
+ rspamd_cryptobox_fast_hash_update (&st, &u, sizeof (u));
- return XXH64_digest (&st);
+ return rspamd_cryptobox_fast_hash_final (&st);
}
diff --git a/src/libserver/fuzzy_backend.c b/src/libserver/fuzzy_backend.c
index a13d27f09..26e595e5f 100644
--- a/src/libserver/fuzzy_backend.c
+++ b/src/libserver/fuzzy_backend.c
@@ -93,6 +93,7 @@ enum rspamd_fuzzy_statement_idx {
RSPAMD_FUZZY_BACKEND_EXPIRE,
RSPAMD_FUZZY_BACKEND_VACUUM,
RSPAMD_FUZZY_BACKEND_DELETE_ORPHANED,
+ RSPAMD_FUZZY_BACKEND_VERSION,
RSPAMD_FUZZY_BACKEND_MAX
};
static struct rspamd_fuzzy_stmts {
@@ -212,6 +213,13 @@ static struct rspamd_fuzzy_stmts {
.stmt = NULL,
.result = SQLITE_DONE
},
+ {
+ .idx = RSPAMD_FUZZY_BACKEND_VERSION,
+ .sql = "PRAGMA user_version;",
+ .args = "",
+ .stmt = NULL,
+ .result = SQLITE_ROW
+ },
};
static GQuark
@@ -624,7 +632,8 @@ rspamd_fuzzy_backend_prepare_update (struct rspamd_fuzzy_backend *backend)
gboolean
rspamd_fuzzy_backend_add (struct rspamd_fuzzy_backend *backend,
- const struct rspamd_fuzzy_cmd *cmd)
+ const struct rspamd_fuzzy_cmd *cmd,
+ time_t timestamp)
{
int rc, i;
gint64 id, flag;
@@ -680,7 +689,7 @@ rspamd_fuzzy_backend_add (struct rspamd_fuzzy_backend *backend,
(gint) cmd->flag,
cmd->digest,
(gint64) cmd->value,
- (gint64) time (NULL));
+ (gint64) timestamp);
if (rc == SQLITE_OK) {
if (cmd->shingles_count > 0) {
@@ -722,7 +731,9 @@ rspamd_fuzzy_backend_add (struct rspamd_fuzzy_backend *backend,
gboolean
rspamd_fuzzy_backend_finish_update (struct rspamd_fuzzy_backend *backend)
{
- gint rc, wal_frames, wal_checkpointed;
+ gint rc, wal_frames, wal_checkpointed, ver;
+ gint64 version = 0;
+ gchar version_buf[128];
rc = rspamd_fuzzy_backend_run_stmt (backend, TRUE,
RSPAMD_FUZZY_BACKEND_TRANSACTION_COMMIT);
@@ -745,6 +756,17 @@ rspamd_fuzzy_backend_finish_update (struct rspamd_fuzzy_backend *backend)
}
}
+ /* Get and update version */
+ ver = rspamd_fuzzy_backend_version (backend);
+ ++ver;
+ rspamd_snprintf (version_buf, sizeof (version_buf), "PRAGMA user_version=%d;",
+ ver);
+
+ if (sqlite3_exec (backend->db, version_buf, NULL, NULL, NULL) != SQLITE_OK) {
+ msg_err_fuzzy_backend ("cannot set database version to %L: %s",
+ version, sqlite3_errmsg (backend->db));
+ }
+
return TRUE;
}
@@ -953,6 +975,24 @@ rspamd_fuzzy_backend_count (struct rspamd_fuzzy_backend *backend)
return 0;
}
+gint
+rspamd_fuzzy_backend_version (struct rspamd_fuzzy_backend *backend)
+{
+ gint ret = 0;
+
+ if (backend) {
+ if (rspamd_fuzzy_backend_run_stmt (backend, FALSE,
+ RSPAMD_FUZZY_BACKEND_VERSION) == SQLITE_OK) {
+ ret = sqlite3_column_int64 (
+ prepared_stmts[RSPAMD_FUZZY_BACKEND_VERSION].stmt, 0);
+ }
+
+ rspamd_fuzzy_backend_cleanup_stmt (backend, RSPAMD_FUZZY_BACKEND_VERSION);
+ }
+
+ return ret;
+}
+
gsize
rspamd_fuzzy_backend_expired (struct rspamd_fuzzy_backend *backend)
{
diff --git a/src/libserver/fuzzy_backend.h b/src/libserver/fuzzy_backend.h
index 04da9bbba..bcd199d1a 100644
--- a/src/libserver/fuzzy_backend.h
+++ b/src/libserver/fuzzy_backend.h
@@ -56,7 +56,8 @@ gboolean rspamd_fuzzy_backend_prepare_update (struct rspamd_fuzzy_backend *backe
*/
gboolean rspamd_fuzzy_backend_add (
struct rspamd_fuzzy_backend *backend,
- const struct rspamd_fuzzy_cmd *cmd);
+ const struct rspamd_fuzzy_cmd *cmd,
+ time_t timestamp);
/**
* Delete digest from the database
@@ -89,6 +90,7 @@ gboolean rspamd_fuzzy_backend_sync (struct rspamd_fuzzy_backend *backend,
void rspamd_fuzzy_backend_close (struct rspamd_fuzzy_backend *backend);
gsize rspamd_fuzzy_backend_count (struct rspamd_fuzzy_backend *backend);
+gint rspamd_fuzzy_backend_version (struct rspamd_fuzzy_backend *backend);
gsize rspamd_fuzzy_backend_expired (struct rspamd_fuzzy_backend *backend);
const gchar * rspamd_fuzzy_backend_id (struct rspamd_fuzzy_backend *backend);
diff --git a/src/libserver/protocol.c b/src/libserver/protocol.c
index a394ddaff..d314d3fdc 100644
--- a/src/libserver/protocol.c
+++ b/src/libserver/protocol.c
@@ -23,7 +23,7 @@
#include "http.h"
#include "email_addr.h"
#include "worker_private.h"
-#include "xxhash.h"
+#include "cryptobox.h"
/* Max line size */
#define OUTBUFSIZ BUFSIZ
@@ -401,7 +401,8 @@ rspamd_protocol_handle_headers (struct rspamd_task *task,
guint64 h;
guint32 *hp;
- h = XXH64 (hv_tok->begin, hv_tok->len, 0xdeadbabe);
+ h = rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_XXHASH64,
+ hv_tok->begin, hv_tok->len, 0xdeadbabe);
hp = rspamd_mempool_alloc (task->task_pool, sizeof (*hp));
memcpy (hp, &h, sizeof (*hp));
rspamd_mempool_set_variable (task->task_pool, "settings_hash",
diff --git a/src/libserver/re_cache.c b/src/libserver/re_cache.c
index 5882114f9..3e308415d 100644
--- a/src/libserver/re_cache.c
+++ b/src/libserver/re_cache.c
@@ -15,7 +15,6 @@
*/
#include "libmime/message.h"
#include "re_cache.h"
-#include "xxhash.h"
#include "cryptobox.h"
#include "ref.h"
#include "libserver/url.h"
@@ -123,16 +122,16 @@ rspamd_re_cache_class_id (enum rspamd_re_type type,
gpointer type_data,
gsize datalen)
{
- XXH64_state_t st;
+ rspamd_cryptobox_fast_hash_state_t st;
- XXH64_reset (&st, 0xdeadbabe);
- XXH64_update (&st, &type, sizeof (type));
+ rspamd_cryptobox_fast_hash_init (&st, 0xdeadbabe);
+ rspamd_cryptobox_fast_hash_update (&st, &type, sizeof (type));
if (datalen > 0) {
- XXH64_update (&st, type_data, datalen);
+ rspamd_cryptobox_fast_hash_update (&st, type_data, datalen);
}
- return XXH64_digest (&st);
+ return rspamd_cryptobox_fast_hash_final (&st);
}
static void
@@ -1174,7 +1173,8 @@ rspamd_re_cache_type_from_string (const char *str)
*/
if (str != NULL) {
- h = XXH64 (str, strlen (str), 0xdeadbabe);
+ h = rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_XXHASH64,
+ str, strlen (str), 0xdeadbabe);
switch (h) {
case G_GUINT64_CONSTANT(0x298b9c8a58887d44): /* header */
diff --git a/src/libserver/task.c b/src/libserver/task.c
index 3d6387b52..ce95b927d 100644
--- a/src/libserver/task.c
+++ b/src/libserver/task.c
@@ -26,6 +26,11 @@
#include "utlist.h"
#include <math.h>
+/*
+ * Do not print more than this amount of elts
+ */
+static const int max_log_elts = 7;
+
static GQuark
rspamd_task_quark (void)
{
@@ -752,7 +757,7 @@ rspamd_task_log_metric_res (struct rspamd_task *task,
rspamd_fstring_t *symbuf;
struct symbol *sym;
GPtrArray *sorted_symbols;
- guint i;
+ guint i, j;
mres = g_hash_table_lookup (task->results, DEFAULT_METRIC);
@@ -810,8 +815,16 @@ rspamd_task_log_metric_res (struct rspamd_task *task,
rspamd_printf_fstring (&symbuf, "{");
+ j = 0;
+
for (cur = sym->options; cur != NULL; cur = g_list_next (cur)) {
rspamd_printf_fstring (&symbuf, "%s;", cur->data);
+
+ if (j >= max_log_elts) {
+ rspamd_printf_fstring (&symbuf, "...;");
+ break;
+ }
+ j ++;
}
rspamd_printf_fstring (&symbuf, "}");
@@ -892,6 +905,7 @@ rspamd_task_write_ialist (struct rspamd_task *task,
lim = internet_address_list_length (ialist);
}
+
varbuf = rspamd_fstring_new ();
for (i = 0; i < lim; i++) {
@@ -908,6 +922,11 @@ rspamd_task_write_ialist (struct rspamd_task *task,
varbuf = rspamd_fstring_append (varbuf, ",", 1);
}
}
+
+ if (i >= max_log_elts) {
+ varbuf = rspamd_fstring_append (varbuf, "...", 3);
+ break;
+ }
}
if (varbuf->len > 0) {
@@ -951,6 +970,11 @@ rspamd_task_write_addr_list (struct rspamd_task *task,
varbuf = rspamd_fstring_append (varbuf, ",", 1);
}
}
+
+ if (i >= max_log_elts) {
+ varbuf = rspamd_fstring_append (varbuf, "...", 3);
+ break;
+ }
}
if (varbuf->len > 0) {
diff --git a/src/libserver/url.c b/src/libserver/url.c
index fe70585be..70a5f3c9b 100644
--- a/src/libserver/url.c
+++ b/src/libserver/url.c
@@ -1795,10 +1795,9 @@ url_tld_end (struct url_callback_data *cb,
{
const gchar *p;
- /* A url must be finished by tld, so it must be followed by space character */
p = pos + match->m_len;
- if (p == cb->end || g_ascii_isspace (*p) || *p == ',') {
+ if (p == cb->end) {
match->m_len = p - match->m_begin;
return TRUE;
}
@@ -2302,7 +2301,7 @@ rspamd_url_text_extract (rspamd_mempool_t *pool,
{
struct rspamd_url_mimepart_cbdata mcbd;
- if (part->content == NULL || part->content->len == 0) {
+ if (part->stripped_content == NULL || part->stripped_content->len == 0) {
msg_warn_task ("got empty text part");
return;
}
@@ -2310,8 +2309,8 @@ rspamd_url_text_extract (rspamd_mempool_t *pool,
mcbd.task = task;
mcbd.part = part;
- rspamd_url_find_multiple (task->task_pool, part->content->data,
- part->content->len, is_html,
+ rspamd_url_find_multiple (task->task_pool, part->stripped_content->data,
+ part->stripped_content->len, is_html,
rspamd_url_text_part_callback, &mcbd);
/* Handle offsets of this part */
diff --git a/src/libstat/tokenizers/osb.c b/src/libstat/tokenizers/osb.c
index 906c1de25..c2e050f23 100644
--- a/src/libstat/tokenizers/osb.c
+++ b/src/libstat/tokenizers/osb.c
@@ -19,7 +19,6 @@
#include "tokenizers.h"
#include "stat_internal.h"
-#include "xxhash.h"
#include "cryptobox.h"
/* Size for features pipe */
@@ -280,7 +279,8 @@ rspamd_tokenizer_osb (struct rspamd_stat_ctx *ctx,
window_size = osb_cf->window_size;
if (prefix) {
- seed = XXH64 (prefix, strlen (prefix), osb_cf->seed);
+ seed = rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_XXHASH64,
+ prefix, strlen (prefix), osb_cf->seed);
}
else {
seed = osb_cf->seed;
@@ -300,7 +300,8 @@ rspamd_tokenizer_osb (struct rspamd_stat_ctx *ctx,
else {
/* We know that the words are normalized */
if (osb_cf->ht == RSPAMD_OSB_HASH_XXHASH) {
- cur = XXH64 (token->begin, token->len, osb_cf->seed);
+ cur = rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_XXHASH64,
+ token->begin, token->len, osb_cf->seed);
}
else {
rspamd_cryptobox_siphash ((guchar *)&cur, token->begin,
diff --git a/src/libutil/addr.c b/src/libutil/addr.c
index 8fc85e6c7..18414a98c 100644
--- a/src/libutil/addr.c
+++ b/src/libutil/addr.c
@@ -17,7 +17,7 @@
#include "addr.h"
#include "util.h"
#include "logger.h"
-#include "xxhash.h"
+#include "cryptobox.h"
#include "radix.h"
#include "unix-std.h"
/* pwd and grp */
@@ -1352,28 +1352,28 @@ guint
rspamd_inet_address_hash (gconstpointer a)
{
const rspamd_inet_addr_t *addr = a;
- XXH64_state_t st;
+ rspamd_cryptobox_fast_hash_state_t st;
- XXH64_reset (&st, rspamd_hash_seed ());
- XXH64_update (&st, &addr->af, sizeof (addr->af));
+ rspamd_cryptobox_fast_hash_init (&st, rspamd_hash_seed ());
+ rspamd_cryptobox_fast_hash_update (&st, &addr->af, sizeof (addr->af));
if (addr->af == AF_UNIX && addr->u.un) {
- XXH64_update (&st, addr->u.un, sizeof (*addr->u.un));
+ rspamd_cryptobox_fast_hash_update (&st, addr->u.un, sizeof (*addr->u.un));
}
else {
/* We ignore port part here */
if (addr->af == AF_INET) {
- XXH64_update (&st, &addr->u.in.addr.s4.sin_addr,
+ rspamd_cryptobox_fast_hash_update (&st, &addr->u.in.addr.s4.sin_addr,
sizeof (addr->u.in.addr.s4.sin_addr));
}
else {
- XXH64_update (&st, &addr->u.in.addr.s6.sin6_addr,
+ rspamd_cryptobox_fast_hash_update (&st, &addr->u.in.addr.s6.sin6_addr,
sizeof (addr->u.in.addr.s6.sin6_addr));
}
}
- return XXH64_digest (&st);
+ return rspamd_cryptobox_fast_hash_final (&st);
}
gboolean
diff --git a/src/libutil/bloom.c b/src/libutil/bloom.c
index 8c8b80ae6..2447b1b10 100644
--- a/src/libutil/bloom.c
+++ b/src/libutil/bloom.c
@@ -15,7 +15,7 @@
*/
#include "config.h"
#include "bloom.h"
-#include "xxhash.h"
+#include "cryptobox.h"
/* 4 bits are used for counting (implementing delete operation) */
#define SIZE_BIT 4
@@ -107,7 +107,8 @@ rspamd_bloom_add (rspamd_bloom_filter_t * bloom, const gchar *s)
}
len = strlen (s);
for (n = 0; n < bloom->nfuncs; ++n) {
- v = XXH64 (s, len, bloom->seeds[n]) % bloom->asize;
+ v = rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_XXHASH64,
+ s, len, bloom->seeds[n]) % bloom->asize;
INCBIT (bloom->a, v, t);
}
@@ -126,7 +127,8 @@ rspamd_bloom_del (rspamd_bloom_filter_t * bloom, const gchar *s)
}
len = strlen (s);
for (n = 0; n < bloom->nfuncs; ++n) {
- v = XXH64 (s, len, bloom->seeds[n]) % bloom->asize;
+ v = rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_XXHASH64,
+ s, len, bloom->seeds[n]) % bloom->asize;
DECBIT (bloom->a, v, t);
}
@@ -145,7 +147,8 @@ rspamd_bloom_check (rspamd_bloom_filter_t * bloom, const gchar *s)
}
len = strlen (s);
for (n = 0; n < bloom->nfuncs; ++n) {
- v = XXH64 (s, len, bloom->seeds[n]) % bloom->asize;
+ v = rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_XXHASH64,
+ s, len, bloom->seeds[n]) % bloom->asize;
if (!(GETBIT (bloom->a, v))) {
return FALSE;
}
diff --git a/src/libutil/fstring.c b/src/libutil/fstring.c
index 997dd6b46..285940f9c 100644
--- a/src/libutil/fstring.c
+++ b/src/libutil/fstring.c
@@ -400,3 +400,19 @@ rspamd_ftok_cstr_equal (const rspamd_ftok_t *s, const gchar *pat,
return (rspamd_ftok_cmp (s, &srch) == 0);
}
+
+gchar *
+rspamd_ftokdup (const rspamd_ftok_t *src)
+{
+ gchar *newstr;
+
+ if (src == NULL) {
+ return NULL;
+ }
+
+ newstr = g_malloc (src->len + 1);
+ memcpy (newstr, src->begin, src->len);
+ newstr[src->len] = '\0';
+
+ return newstr;
+}
diff --git a/src/libutil/fstring.h b/src/libutil/fstring.h
index 127557e40..10916d876 100644
--- a/src/libutil/fstring.h
+++ b/src/libutil/fstring.h
@@ -153,4 +153,12 @@ rspamd_ftok_t *rspamd_ftok_map (const rspamd_fstring_t *s);
rspamd_fstring_t * rspamd_fstring_grow (rspamd_fstring_t *str,
gsize needed_len) G_GNUC_WARN_UNUSED_RESULT;
+/**
+ * Copies ftok to zero terminated string (must be freed using g_free)
+ * @param src
+ * @return
+ */
+gchar *rspamd_ftokdup (const rspamd_ftok_t *src) G_GNUC_WARN_UNUSED_RESULT;
+
+
#endif
diff --git a/src/libutil/logger.c b/src/libutil/logger.c
index 21e489b09..f81730448 100644
--- a/src/libutil/logger.c
+++ b/src/libutil/logger.c
@@ -18,7 +18,7 @@
#include "util.h"
#include "rspamd.h"
#include "map.h"
-#include "xxhash.h"
+#include "cryptobox.h"
#include "unix-std.h"
#ifdef HAVE_SYSLOG_H
@@ -82,12 +82,6 @@ static rspamd_logger_t *default_logger = NULL;
} \
} while (0)
-#if defined(__LP64__) || defined(_LP64)
-#define XXH_ONESHOT XXH64
-#else
-#define XXH_ONESHOT XXH32
-#endif
-
static void
syslog_log_function (const gchar *log_domain, const gchar *module,
const gchar *id, const gchar *function,
@@ -106,7 +100,7 @@ static void
static inline guint64
rspamd_log_calculate_cksum (const gchar *message, size_t mlen)
{
- return XXH_ONESHOT (message, mlen, rspamd_hash_seed ());
+ return rspamd_cryptobox_fast_hash (message, mlen, rspamd_hash_seed ());
}
/*
diff --git a/src/libutil/map.c b/src/libutil/map.c
index 8de6e76c2..f99c35784 100644
--- a/src/libutil/map.c
+++ b/src/libutil/map.c
@@ -35,10 +35,29 @@
#include <pcre2.h>
#endif
+#undef MAP_DEBUG_REFS
+#ifdef MAP_DEBUG_REFS
+#define MAP_RETAIN(x) do { \
+ msg_err ("retain ref %p, refcount: %d -> %d", (x), (x)->ref.refcount, (x)->ref.refcount + 1); \
+ REF_RETAIN(x); \
+} while (0)
+
+#define MAP_RELEASE(x) do { \
+ msg_err ("release ref %p, refcount: %d -> %d", (x), (x)->ref.refcount, (x)->ref.refcount - 1); \
+ REF_RELEASE(x); \
+} while (0)
+#else
+#define MAP_RETAIN REF_RETAIN
+#define MAP_RELEASE REF_RELEASE
+#endif
+
static const gchar *hash_fill = "1";
-static void free_http_cbdata_common (struct http_callback_data *cbd);
+static void free_http_cbdata_common (struct http_callback_data *cbd, gboolean plan_new);
static void free_http_cbdata_dtor (gpointer p);
static void free_http_cbdata (struct http_callback_data *cbd);
+static void rspamd_map_periodic_callback (gint fd, short what, void *ud);
+static void rspamd_map_schedule_periodic (struct rspamd_map *map, gboolean locked,
+ gboolean initial, gboolean errored);
/**
* Write HTTP request
*/
@@ -47,9 +66,9 @@ write_http_request (struct http_callback_data *cbd)
{
gchar datebuf[128];
struct rspamd_http_message *msg;
- rspamd_mempool_t *pool;
+ struct rspamd_map *map;
- pool = cbd->map->pool;
+ map = cbd->map;
if (cbd->fd != -1) {
close (cbd->fd);
@@ -60,10 +79,15 @@ write_http_request (struct http_callback_data *cbd)
if (cbd->fd != -1) {
msg = rspamd_http_new_message (HTTP_REQUEST);
+ if (cbd->check) {
+ msg->method = HTTP_HEAD;
+ }
+
if (cbd->stage == map_load_file) {
msg->url = rspamd_fstring_new_init (cbd->data->path, strlen (cbd->data->path));
- if (cbd->data->last_checked != 0 && cbd->stage == map_load_file) {
+ if (cbd->check &&
+ cbd->data->last_checked != 0 && cbd->stage == map_load_file) {
rspamd_http_date_format (datebuf, sizeof (datebuf),
cbd->data->last_checked);
rspamd_http_message_add_header (msg, "If-Modified-Since", datebuf);
@@ -83,11 +107,12 @@ write_http_request (struct http_callback_data *cbd)
rspamd_http_connection_write_message (cbd->conn, msg, cbd->data->host,
NULL, cbd, cbd->fd, &cbd->tv, cbd->ev_base);
- REF_RETAIN (cbd);
+ MAP_RETAIN (cbd);
}
else {
- msg_err_pool ("cannot connect to %s: %s", cbd->data->host,
+ msg_err_map ("cannot connect to %s: %s", cbd->data->host,
strerror (errno));
+ cbd->periodic->errored = TRUE;
}
}
@@ -99,7 +124,6 @@ rspamd_map_check_sig_pk (const char *fname,
struct rspamd_cryptobox_pubkey *pk)
{
gchar fpath[PATH_MAX];
- rspamd_mempool_t *pool = map->pool;
guchar *data;
GString *b32_key;
gsize len = 0;
@@ -109,12 +133,12 @@ rspamd_map_check_sig_pk (const char *fname,
data = rspamd_file_xmap (fpath, PROT_READ, &len);
if (data == NULL) {
- msg_err_pool ("can't open signature %s: %s", fpath, strerror (errno));
+ msg_err_map ("can't open signature %s: %s", fpath, strerror (errno));
return FALSE;
}
if (len != rspamd_cryptobox_signature_bytes (RSPAMD_CRYPTOBOX_MODE_25519)) {
- msg_err_pool ("can't open signature %s: invalid signature", fpath);
+ msg_err_map ("can't open signature %s: invalid signature", fpath);
munmap (data, len);
return FALSE;
@@ -122,7 +146,7 @@ rspamd_map_check_sig_pk (const char *fname,
if (!rspamd_cryptobox_verify (data, input, inlen,
rspamd_pubkey_get_pk (pk, NULL), RSPAMD_CRYPTOBOX_MODE_25519)) {
- msg_err_pool ("can't verify signature %s: incorrect signature", fpath);
+ msg_err_map ("can't verify signature %s: incorrect signature", fpath);
munmap (data, len);
return FALSE;
@@ -130,7 +154,7 @@ rspamd_map_check_sig_pk (const char *fname,
b32_key = rspamd_pubkey_print (pk,
RSPAMD_KEYPAIR_BASE32|RSPAMD_KEYPAIR_PUBKEY);
- msg_info_pool ("verified signature in file %s using trusted key %v",
+ msg_info_map ("verified signature in file %s using trusted key %v",
fpath, b32_key);
g_string_free (b32_key, TRUE);
@@ -141,25 +165,26 @@ rspamd_map_check_sig_pk (const char *fname,
static gboolean
rspamd_map_check_file_sig (const char *fname,
- struct rspamd_map *map, const guchar *input,
+ struct rspamd_map *map,
+ struct rspamd_map_backend *bk,
+ const guchar *input,
gsize inlen)
{
gchar fpath[PATH_MAX];
- rspamd_mempool_t *pool = map->pool;
guchar *data;
struct rspamd_cryptobox_pubkey *pk = NULL;
GString *b32_key;
gboolean ret;
gsize len = 0;
- if (map->trusted_pubkey == NULL) {
+ if (bk->trusted_pubkey == NULL) {
/* Try to load and check pubkey */
rspamd_snprintf (fpath, sizeof (fpath), "%s.pub", fname);
data = rspamd_file_xmap (fpath, PROT_READ, &len);
if (data == NULL) {
- msg_err_pool ("can't open pubkey %s: %s", fpath, strerror (errno));
+ msg_err_map ("can't open pubkey %s: %s", fpath, strerror (errno));
return FALSE;
}
@@ -168,7 +193,7 @@ rspamd_map_check_file_sig (const char *fname,
munmap (data, len);
if (pk == NULL) {
- msg_err_pool ("can't load pubkey %s", fpath);
+ msg_err_map ("can't load pubkey %s", fpath);
return FALSE;
}
@@ -178,7 +203,7 @@ rspamd_map_check_file_sig (const char *fname,
g_assert (b32_key != NULL);
if (g_hash_table_lookup (map->cfg->trusted_keys, b32_key->str) == NULL) {
- msg_err_pool ("pubkey loaded from %s is untrusted: %v", fpath,
+ msg_err_map ("pubkey loaded from %s is untrusted: %v", fpath,
b32_key);
g_string_free (b32_key, TRUE);
rspamd_pubkey_unref (pk);
@@ -189,7 +214,7 @@ rspamd_map_check_file_sig (const char *fname,
g_string_free (b32_key, TRUE);
}
else {
- pk = rspamd_pubkey_ref (map->trusted_pubkey);
+ pk = rspamd_pubkey_ref (bk->trusted_pubkey);
}
ret = rspamd_map_check_sig_pk (fname, map, input, inlen, pk);
@@ -202,10 +227,11 @@ rspamd_map_check_file_sig (const char *fname,
* Callback for destroying HTTP callback data
*/
static void
-free_http_cbdata_common (struct http_callback_data *cbd)
+free_http_cbdata_common (struct http_callback_data *cbd, gboolean plan_new)
{
char fpath[PATH_MAX];
struct stat st;
+ struct map_periodic_cbdata *periodic = cbd->periodic;
if (cbd->out_fd != -1) {
close (cbd->out_fd);
@@ -243,7 +269,8 @@ free_http_cbdata_common (struct http_callback_data *cbd)
rspamd_inet_address_destroy (cbd->addr);
}
- g_atomic_int_set (cbd->map->locked, 0);
+ MAP_RELEASE (cbd->bk);
+ MAP_RELEASE (periodic);
g_slice_free1 (sizeof (struct http_callback_data), cbd);
}
@@ -253,18 +280,18 @@ free_http_cbdata (struct http_callback_data *cbd)
cbd->map->dtor = NULL;
cbd->map->dtor_data = NULL;
- free_http_cbdata_common (cbd);
+ free_http_cbdata_common (cbd, TRUE);
}
static void
free_http_cbdata_dtor (gpointer p)
{
struct http_callback_data *cbd = p;
- rspamd_mempool_t *pool;
+ struct rspamd_map *map;
- pool = cbd->map->pool;
- msg_warn_pool ("connection with http server is terminated: worker is stopping");
- free_http_cbdata_common (cbd);
+ map = cbd->map;
+ msg_warn_map ("connection with http server is terminated: worker is stopping");
+ free_http_cbdata_common (cbd, FALSE);
}
/*
@@ -275,13 +302,13 @@ http_map_error (struct rspamd_http_connection *conn,
GError *err)
{
struct http_callback_data *cbd = conn->ud;
- rspamd_mempool_t *pool;
-
- pool = cbd->map->pool;
+ struct rspamd_map *map;
- msg_err_pool ("connection with http server terminated incorrectly: %s",
- err->message);
- REF_RELEASE (cbd);
+ map = cbd->map;
+ cbd->periodic->errored = TRUE;
+ msg_err_map ("connection with http server terminated incorrectly: %e", err);
+ rspamd_map_periodic_callback (-1, EV_TIMEOUT, cbd->periodic);
+ MAP_RELEASE (cbd);
}
static int
@@ -290,17 +317,27 @@ http_map_finish (struct rspamd_http_connection *conn,
{
struct http_callback_data *cbd = conn->ud;
struct rspamd_map *map;
- rspamd_mempool_t *pool;
+ struct rspamd_map_backend *bk;
char fpath[PATH_MAX];
guchar *aux_data, *in = NULL;
gsize inlen = 0;
struct stat st;
map = cbd->map;
- pool = cbd->map->pool;
+ bk = cbd->bk;
if (msg->code == 200) {
+ if (cbd->check) {
+ cbd->periodic->need_modify = TRUE;
+ /* Reset the whole chain */
+ cbd->periodic->cur_backend = 0;
+ rspamd_map_periodic_callback (-1, EV_TIMEOUT, cbd->periodic);
+ MAP_RELEASE (cbd);
+
+ return 0;
+ }
+
if (cbd->stage == map_load_file) {
if (msg->last_modified) {
cbd->data->last_checked = msg->last_modified;
@@ -310,13 +347,13 @@ http_map_finish (struct rspamd_http_connection *conn,
}
/* Maybe we need to check signature ? */
- if (map->is_signed) {
+ if (bk->is_signed) {
close (cbd->out_fd);
- if (map->trusted_pubkey) {
+ if (bk->trusted_pubkey) {
/* No need to load key */
cbd->stage = map_load_signature;
- cbd->pk = rspamd_pubkey_ref (map->trusted_pubkey);
+ cbd->pk = rspamd_pubkey_ref (bk->trusted_pubkey);
rspamd_snprintf (fpath, sizeof (fpath), "%s.sig",
cbd->tmpfile);
}
@@ -329,24 +366,25 @@ http_map_finish (struct rspamd_http_connection *conn,
cbd->out_fd = rspamd_file_xopen (fpath, O_RDWR|O_CREAT, 00644);
if (cbd->out_fd == -1) {
- msg_err_pool ("cannot open pubkey file %s for writing: %s",
+ msg_err_map ("cannot open pubkey file %s for writing: %s",
fpath, strerror (errno));
- goto end;
+ goto err;
}
rspamd_http_connection_reset (cbd->conn);
write_http_request (cbd);
+ MAP_RELEASE (cbd);
- goto end;
+ return 0;
}
else {
/* Unsinged version - just open file */
in = rspamd_file_xmap (cbd->tmpfile, PROT_READ, &inlen);
if (in == NULL) {
- msg_err_pool ("cannot read tempfile %s: %s", cbd->tmpfile,
+ msg_err_map ("cannot read tempfile %s: %s", cbd->tmpfile,
strerror (errno));
- goto end;
+ goto err;
}
}
}
@@ -355,9 +393,9 @@ http_map_finish (struct rspamd_http_connection *conn,
(void)lseek (cbd->out_fd, 0, SEEK_SET);
if (fstat (cbd->out_fd, &st) == -1) {
- msg_err_pool ("cannot stat pubkey file %s: %s",
+ msg_err_map ("cannot stat pubkey file %s: %s",
fpath, strerror (errno));
- goto end;
+ goto err;
}
aux_data = mmap (NULL, st.st_size, PROT_READ, MAP_SHARED,
@@ -366,9 +404,9 @@ http_map_finish (struct rspamd_http_connection *conn,
cbd->out_fd = -1;
if (aux_data == MAP_FAILED) {
- msg_err_pool ("cannot map pubkey file %s: %s",
+ msg_err_map ("cannot map pubkey file %s: %s",
fpath, strerror (errno));
- goto end;
+ goto err;
}
cbd->pk = rspamd_pubkey_from_base32 (aux_data, st.st_size,
@@ -376,25 +414,26 @@ http_map_finish (struct rspamd_http_connection *conn,
munmap (aux_data, st.st_size);
if (cbd->pk == NULL) {
- msg_err_pool ("cannot load pubkey file %s: bad pubkey",
+ msg_err_map ("cannot load pubkey file %s: bad pubkey",
fpath);
- goto end;
+ goto err;
}
rspamd_snprintf (fpath, sizeof (fpath), "%s.sig", cbd->tmpfile);
cbd->out_fd = rspamd_file_xopen (fpath, O_RDWR|O_CREAT, 00644);
if (cbd->out_fd == -1) {
- msg_err_pool ("cannot open signature file %s for writing: %s",
+ msg_err_map ("cannot open signature file %s for writing: %s",
fpath, strerror (errno));
- goto end;
+ goto err;
}
cbd->stage = map_load_signature;
rspamd_http_connection_reset (cbd->conn);
write_http_request (cbd);
+ MAP_RELEASE (cbd);
- goto end;
+ return 0;
}
else if (cbd->stage == map_load_signature) {
/* We can now check signature */
@@ -404,26 +443,25 @@ http_map_finish (struct rspamd_http_connection *conn,
in = rspamd_file_xmap (cbd->tmpfile, PROT_READ, &inlen);
if (in == NULL) {
- msg_err_pool ("cannot read tempfile %s: %s", cbd->tmpfile,
+ msg_err_map ("cannot read tempfile %s: %s", cbd->tmpfile,
strerror (errno));
- goto end;
+ goto err;
}
if (!rspamd_map_check_sig_pk (cbd->tmpfile, map, in, inlen, cbd->pk)) {
- goto end;
+ goto err;
}
}
g_assert (in != NULL);
- map->read_callback (map->pool, in, inlen, &cbd->cbdata, TRUE);
- map->fin_callback (map->pool, &cbd->cbdata);
-
- *map->user_data = cbd->cbdata.cur_data;
- msg_info_pool ("read map data from %s", cbd->data->host);
+ map->read_callback (in, inlen, &cbd->periodic->cbdata, TRUE);
+ msg_info_map ("read map data from %s", cbd->data->host);
+ cbd->periodic->cur_backend ++;
+ rspamd_map_periodic_callback (-1, EV_TIMEOUT, cbd->periodic);
}
- else if (msg->code == 304 && cbd->stage == map_load_file) {
- msg_debug_pool ("data is not modified for server %s",
+ else if (msg->code == 304 && (cbd->check && cbd->stage == map_load_file)) {
+ msg_debug_map ("data is not modified for server %s",
cbd->data->host);
if (msg->last_modified) {
@@ -434,12 +472,17 @@ http_map_finish (struct rspamd_http_connection *conn,
}
}
else {
- msg_info_pool ("cannot load map %s from %s: HTTP error %d",
- map->uri, cbd->data->host, msg->code);
+ msg_info_map ("cannot load map %s from %s: HTTP error %d",
+ bk->uri, cbd->data->host, msg->code);
}
-end:
- REF_RELEASE (cbd);
+ MAP_RELEASE (cbd);
+ return 0;
+
+err:
+ cbd->periodic->errored = 1;
+ rspamd_map_periodic_callback (-1, EV_TIMEOUT, cbd->periodic);
+ MAP_RELEASE (cbd);
return 0;
}
@@ -451,18 +494,18 @@ http_map_read (struct rspamd_http_connection *conn,
gsize len)
{
struct http_callback_data *cbd = conn->ud;
- rspamd_mempool_t *pool;
+ struct rspamd_map *map;
if (msg->code != 200 || len == 0) {
/* Ignore not full replies */
return 0;
}
- pool = cbd->map->pool;
+ map = cbd->map;
if (write (cbd->out_fd, chunk, len) == -1) {
- msg_err_pool ("cannot write to %s: %s", cbd->tmpfile, strerror (errno));
- REF_RELEASE (cbd);
+ msg_err_map ("cannot write to %s: %s", cbd->tmpfile, strerror (errno));
+ MAP_RELEASE (cbd);
return -1;
}
@@ -474,38 +517,32 @@ http_map_read (struct rspamd_http_connection *conn,
* Callback for reading data from file
*/
static gboolean
-read_map_file (struct rspamd_map *map, struct file_map_data *data)
+read_map_file (struct rspamd_map *map, struct file_map_data *data,
+ struct rspamd_map_backend *bk, struct map_periodic_cbdata *periodic)
{
- struct map_cb_data cbdata;
guchar *bytes;
gsize len;
- rspamd_mempool_t *pool = map->pool;
if (map->read_callback == NULL || map->fin_callback == NULL) {
- msg_err_pool ("bad callback for reading map file");
+ msg_err_map ("bad callback for reading map file");
return FALSE;
}
if (access (data->filename, R_OK) == -1) {
/* File does not exist, skipping */
- msg_err_pool ("map file is unavailable for reading");
- return FALSE;
+ msg_err_map ("map file is unavailable for reading");
+ return TRUE;
}
bytes = rspamd_file_xmap (data->filename, PROT_READ, &len);
if (bytes == NULL) {
- msg_err_pool ("can't open map %s: %s", data->filename, strerror (errno));
+ msg_err_map ("can't open map %s: %s", data->filename, strerror (errno));
return FALSE;
}
- cbdata.state = 0;
- cbdata.prev_data = *map->user_data;
- cbdata.cur_data = NULL;
- cbdata.map = map;
-
- if (map->is_signed) {
- if (!rspamd_map_check_file_sig (data->filename, map, bytes, len)) {
+ if (bk->is_signed) {
+ if (!rspamd_map_check_file_sig (data->filename, map, bk, bytes, len)) {
munmap (bytes, len);
return FALSE;
@@ -513,9 +550,7 @@ read_map_file (struct rspamd_map *map, struct file_map_data *data)
}
if (len > 0) {
- map->read_callback (map->pool, bytes, len, &cbdata, TRUE);
- map->fin_callback (map->pool, &cbdata);
- *map->user_data = cbdata.cur_data;
+ map->read_callback (bytes, len, &periodic->cbdata, TRUE);
}
munmap (bytes, len);
@@ -524,85 +559,74 @@ read_map_file (struct rspamd_map *map, struct file_map_data *data)
}
static void
-jitter_timeout_event (struct rspamd_map *map,
- gboolean locked, gboolean initial, gboolean errored)
+rspamd_map_periodic_dtor (struct map_periodic_cbdata *periodic)
{
- const gdouble error_mult = 20.0, lock_mult = 0.5;
- gdouble jittered_sec;
- gdouble timeout;
-
- if (initial) {
- timeout = 0.0;
- }
- else if (errored) {
- timeout = map->cfg->map_timeout * error_mult;
- }
- else if (locked) {
- timeout = map->cfg->map_timeout * lock_mult;
+ if (periodic->need_modify) {
+ /* We are done */
+ periodic->map->fin_callback (&periodic->cbdata);
+ *periodic->map->user_data = periodic->cbdata.cur_data;
}
else {
- timeout = map->cfg->map_timeout;
+ /* Not modified */
}
- /* Plan event again with jitter */
- evtimer_del (&map->ev);
- jittered_sec = rspamd_time_jitter (timeout, 0);
- double_to_tv (jittered_sec, &map->tv);
-
- evtimer_add (&map->ev, &map->tv);
+ rspamd_map_schedule_periodic (periodic->map, FALSE, FALSE, FALSE);
+ g_atomic_int_set (periodic->map->locked, 0);
+ g_slice_free1 (sizeof (*periodic), periodic);
}
-/**
- * Common file callback
- */
static void
-file_callback (gint fd, short what, void *ud)
+rspamd_map_schedule_periodic (struct rspamd_map *map,
+ gboolean locked, gboolean initial, gboolean errored)
{
- struct rspamd_map *map = ud;
- struct file_map_data *data = map->map_data;
- struct stat st;
- rspamd_mempool_t *pool;
+ const gdouble error_mult = 20.0, lock_mult = 0.5;
+ gdouble jittered_sec;
+ gdouble timeout;
+ struct map_periodic_cbdata *cbd;
- pool = map->pool;
+ timeout = map->poll_timeout;
- if (!g_atomic_int_compare_and_exchange (map->locked, 0, 1)) {
- msg_debug_pool (
- "don't try to reread map as it is locked by other process, will reread it later");
- jitter_timeout_event (map, TRUE, FALSE, FALSE);
- return;
+ if (initial) {
+ timeout = 0.0;
}
- if (stat (data->filename, &st) != -1 &&
- (st.st_mtime > data->st.st_mtime || data->st.st_mtime == -1)) {
- /* File was modified since last check */
- memcpy (&data->st, &st, sizeof (struct stat));
+ if (errored) {
+ timeout = map->poll_timeout * error_mult;
}
- else {
- g_atomic_int_set (map->locked, 0);
- jitter_timeout_event (map, FALSE, FALSE, FALSE);
- return;
+ else if (locked) {
+ timeout = map->poll_timeout * lock_mult;
}
- msg_info_pool ("rereading map file %s", data->filename);
+ cbd = g_slice_alloc0 (sizeof (*cbd));
+ cbd->cbdata.state = 0;
+ cbd->cbdata.prev_data = *map->user_data;
+ cbd->cbdata.cur_data = NULL;
+ cbd->cbdata.map = map;
+ cbd->map = map;
+ REF_INIT_RETAIN (cbd, rspamd_map_periodic_dtor);
- if (!read_map_file (map, data)) {
- jitter_timeout_event (map, FALSE, FALSE, TRUE);
+ if (initial) {
+ evtimer_set (&map->ev, rspamd_map_periodic_callback, cbd);
+ event_base_set (map->ev_base, &map->ev);
}
else {
- jitter_timeout_event (map, FALSE, FALSE, FALSE);
+ evtimer_del (&map->ev);
+ evtimer_set (&map->ev, rspamd_map_periodic_callback, cbd);
}
- g_atomic_int_set (map->locked, 0);
-}
+ jittered_sec = rspamd_time_jitter (timeout, 0);
+ double_to_tv (jittered_sec, &map->tv);
+ evtimer_add (&map->ev, &map->tv);
+}
static void
rspamd_map_dns_callback (struct rdns_reply *reply, void *arg)
{
struct http_callback_data *cbd = arg;
- rspamd_mempool_t *pool;
+ struct rspamd_map *map;
- pool = cbd->map->pool;
+ map = cbd->map;
if (reply->code == RDNS_RC_NOERROR) {
/*
@@ -641,36 +665,25 @@ rspamd_map_dns_callback (struct rdns_reply *reply, void *arg)
}
else {
/* We could not resolve host, so cowardly fail here */
- msg_err_pool ("cannot resolve %s", cbd->data->host);
+ msg_err_map ("cannot resolve %s", cbd->data->host);
}
}
- REF_RELEASE (cbd);
+ MAP_RELEASE (cbd);
}
/**
* Async HTTP callback
*/
static void
-http_callback (gint fd, short what, void *ud)
+rspamd_map_common_http_callback (struct rspamd_map *map, struct rspamd_map_backend *bk,
+ struct map_periodic_cbdata *periodic, gboolean check)
{
- struct rspamd_map *map = ud;
struct http_map_data *data;
struct http_callback_data *cbd;
- rspamd_mempool_t *pool;
gchar tmpbuf[PATH_MAX];
- data = map->map_data;
- pool = map->pool;
-
- if (!g_atomic_int_compare_and_exchange (map->locked, 0, 1)) {
- msg_debug_pool (
- "don't try to reread map as it is locked by other process, will reread it later");
- jitter_timeout_event (map, TRUE, FALSE, FALSE);
- return;
- }
-
- /* Plan event */
+ data = bk->data.hd;
cbd = g_slice_alloc0 (sizeof (struct http_callback_data));
rspamd_snprintf (tmpbuf, sizeof (tmpbuf),
@@ -679,10 +692,11 @@ http_callback (gint fd, short what, void *ud)
cbd->out_fd = mkstemp (tmpbuf);
if (cbd->out_fd == -1) {
- g_slice_free1 (sizeof (*cbd), cbd);
- msg_err_pool ("cannot create tempfile: %s", strerror (errno));
- jitter_timeout_event (map, FALSE, FALSE, TRUE);
+ msg_err_map ("cannot create tempfile: %s", strerror (errno));
g_atomic_int_set (map->locked, 0);
+ g_slice_free1 (sizeof (*cbd), cbd);
+ periodic->errored = TRUE;
+ rspamd_map_periodic_callback (-1, EV_TIMEOUT, periodic);
return;
}
@@ -692,39 +706,165 @@ http_callback (gint fd, short what, void *ud)
cbd->map = map;
cbd->data = data;
cbd->fd = -1;
- cbd->cbdata.state = 0;
- cbd->cbdata.prev_data = *cbd->map->user_data;
- cbd->cbdata.cur_data = NULL;
- cbd->cbdata.map = cbd->map;
+ cbd->check = check;
+ cbd->periodic = periodic;
+ MAP_RETAIN (periodic);
+ cbd->bk = bk;
+ MAP_RETAIN (bk);
cbd->stage = map_resolve_host2;
double_to_tv (map->cfg->map_timeout, &cbd->tv);
REF_INIT_RETAIN (cbd, free_http_cbdata);
- msg_debug_pool ("reading map data from %s", data->host);
+ msg_debug_map ("%s map data from %s", check ? "checking" : "reading",
+ data->host);
/* Send both A and AAAA requests */
if (map->r->r) {
if (rdns_make_request_full (map->r->r, rspamd_map_dns_callback, cbd,
map->cfg->dns_timeout, map->cfg->dns_retransmits, 1,
data->host, RDNS_REQUEST_A)) {
- REF_RETAIN (cbd);
+ MAP_RETAIN (cbd);
}
if (rdns_make_request_full (map->r->r, rspamd_map_dns_callback, cbd,
map->cfg->dns_timeout, map->cfg->dns_retransmits, 1,
data->host, RDNS_REQUEST_AAAA)) {
- REF_RETAIN (cbd);
+ MAP_RETAIN (cbd);
}
- jitter_timeout_event (map, FALSE, FALSE, FALSE);
map->dtor = free_http_cbdata_dtor;
map->dtor_data = cbd;
}
else {
- msg_warn_pool ("cannot load map: DNS resolver is not initialized");
- jitter_timeout_event (map, FALSE, FALSE, TRUE);
+ msg_warn_map ("cannot load map: DNS resolver is not initialized");
+ cbd->periodic->errored = TRUE;
}
- /* We don't need own ref as it is now refcounted by DNS requests */
- REF_RELEASE (cbd);
+ /* We don't need own ref as it is now ref counted by DNS handlers */
+ MAP_RELEASE (cbd);
+}
+
+static void
+rspamd_map_http_check_callback (gint fd, short what, void *ud)
+{
+ struct map_periodic_cbdata *cbd = ud;
+ struct rspamd_map *map;
+ struct rspamd_map_backend *bk;
+
+ map = cbd->map;
+ bk = g_ptr_array_index (cbd->map->backends, cbd->cur_backend);
+
+ rspamd_map_common_http_callback (map, bk, cbd, TRUE);
+}
+
+static void
+rspamd_map_http_read_callback (gint fd, short what, void *ud)
+{
+ struct map_periodic_cbdata *cbd = ud;
+ struct rspamd_map *map;
+ struct rspamd_map_backend *bk;
+
+ map = cbd->map;
+ bk = g_ptr_array_index (cbd->map->backends, cbd->cur_backend);
+ rspamd_map_common_http_callback (map, bk, cbd, FALSE);
+}
+
+static void
+rspamd_map_file_check_callback (gint fd, short what, void *ud)
+{
+ struct rspamd_map *map;
+ struct map_periodic_cbdata *periodic = ud;
+ struct file_map_data *data;
+ struct rspamd_map_backend *bk;
+ struct stat st;
+
+ map = periodic->map;
+
+ bk = g_ptr_array_index (map->backends, periodic->cur_backend);
+ data = bk->data.fd;
+
+ if (stat (data->filename, &st) != -1 &&
+ (st.st_mtime > data->st.st_mtime || data->st.st_mtime == -1)) {
+ /* File was modified since last check */
+ memcpy (&data->st, &st, sizeof (struct stat));
+ periodic->need_modify = TRUE;
+ periodic->cur_backend = 0;
+ rspamd_map_periodic_callback (-1, EV_TIMEOUT, periodic);
+
+ return;
+ }
+
+ /* Switch to the next backend */
+ periodic->cur_backend ++;
+ rspamd_map_periodic_callback (-1, EV_TIMEOUT, periodic);
+}
+
+static void
+rspamd_map_file_read_callback (gint fd, short what, void *ud)
+{
+ struct rspamd_map *map;
+ struct map_periodic_cbdata *periodic = ud;
+ struct file_map_data *data;
+ struct rspamd_map_backend *bk;
+
+ map = periodic->map;
+
+ bk = g_ptr_array_index (map->backends, periodic->cur_backend);
+ data = bk->data.fd;
+
+ msg_info_map ("rereading map file %s", data->filename);
+
+ if (!read_map_file (map, data, bk, periodic)) {
+ periodic->errored = TRUE;
+ }
+
+ /* Switch to the next backend */
+ periodic->cur_backend ++;
+ rspamd_map_periodic_callback (-1, EV_TIMEOUT, periodic);
+}
+
+static void
+rspamd_map_periodic_callback (gint fd, short what, void *ud)
+{
+ struct rspamd_map_backend *bk;
+ struct map_periodic_cbdata *cbd = ud;
+
+ if (cbd->errored) {
+ /* We should not check other backends if some backend has failed */
+ rspamd_map_schedule_periodic (cbd->map, FALSE, FALSE, TRUE);
+ g_atomic_int_set (cbd->map->locked, 0);
+ MAP_RELEASE (cbd);
+
+ return;
+ }
+
+ /* For each backend we need to check for modifications */
+ if (cbd->cur_backend >= cbd->map->backends->len) {
+ /* Last backend */
+ MAP_RELEASE (cbd);
+
+ return;
+ }
+
+ bk = g_ptr_array_index (cbd->map->backends, cbd->cur_backend);
+ g_assert (bk != NULL);
+
+ if (cbd->need_modify) {
+ /* Load data from the next backend */
+ if (bk->protocol == MAP_PROTO_HTTP) {
+ rspamd_map_http_read_callback (fd, what, cbd);
+ }
+ else {
+ rspamd_map_file_read_callback (fd, what, cbd);
+ }
+ }
+ else {
+ /* Check the next backend */
+ if (bk->protocol == MAP_PROTO_HTTP) {
+ rspamd_map_http_check_callback (fd, what, cbd);
+ }
+ else {
+ rspamd_map_file_check_callback (fd, what, cbd);
+ }
+ }
}
/* Start watching event for all maps */
@@ -735,29 +875,21 @@ rspamd_map_watch (struct rspamd_config *cfg,
{
GList *cur = cfg->maps;
struct rspamd_map *map;
- struct file_map_data *fdata;
/* First of all do synced read of data */
while (cur) {
map = cur->data;
map->ev_base = ev_base;
map->r = resolver;
- event_base_set (map->ev_base, &map->ev);
- if (map->protocol == MAP_PROTO_FILE) {
- evtimer_set (&map->ev, file_callback, map);
- /* Read initial data */
- fdata = map->map_data;
- if (fdata->st.st_mtime != -1) {
- /* Do not try to read non-existent file */
- read_map_file (map, map->map_data);
- }
- /* Plan event with jitter */
- jitter_timeout_event (map, FALSE, TRUE, FALSE);
+ if (!g_atomic_int_compare_and_exchange (map->locked, 0, 1)) {
+ msg_debug_map (
+ "don't try to reread map as it is locked by other process, "
+ "will reread it later");
+ rspamd_map_schedule_periodic (map, TRUE, TRUE, FALSE);
}
- else if (map->protocol == MAP_PROTO_HTTP) {
- evtimer_set (&map->ev, http_callback, map);
- jitter_timeout_event (map, FALSE, TRUE, FALSE);
+ else {
+ rspamd_map_schedule_periodic (map, FALSE, TRUE, FALSE);
}
cur = g_list_next (cur);
@@ -769,10 +901,17 @@ rspamd_map_remove_all (struct rspamd_config *cfg)
{
struct rspamd_map *map;
GList *cur;
+ struct rspamd_map_backend *bk;
+ guint i;
for (cur = cfg->maps; cur != NULL; cur = g_list_next (cur)) {
map = cur->data;
+ for (i = 0; i < map->backends->len; i ++) {
+ bk = g_ptr_array_index (map->backends, i);
+ MAP_RELEASE (bk);
+ }
+
if (map->dtor) {
map->dtor (map->dtor_data);
}
@@ -780,26 +919,21 @@ rspamd_map_remove_all (struct rspamd_config *cfg)
g_list_free (cfg->maps);
cfg->maps = NULL;
-
- if (cfg->map_pool != NULL) {
- rspamd_mempool_delete (cfg->map_pool);
- cfg->map_pool = NULL;
- }
}
static const gchar *
rspamd_map_check_proto (struct rspamd_config *cfg,
- const gchar *map_line, struct rspamd_map *map)
+ const gchar *map_line, struct rspamd_map_backend *bk)
{
const gchar *pos = map_line, *end, *end_key;
- g_assert (map != NULL);
+ g_assert (bk != NULL);
g_assert (pos != NULL);
end = pos + strlen (pos);
if (g_ascii_strncasecmp (pos, "sign+", sizeof ("sign+") - 1) == 0) {
- map->is_signed = TRUE;
+ bk->is_signed = TRUE;
pos += sizeof ("sign+") - 1;
}
@@ -808,10 +942,10 @@ rspamd_map_check_proto (struct rspamd_config *cfg,
end_key = memchr (pos, '+', end - pos);
if (end_key != NULL) {
- map->trusted_pubkey = rspamd_pubkey_from_base32 (pos, end_key - pos,
+ bk->trusted_pubkey = rspamd_pubkey_from_base32 (pos, end_key - pos,
RSPAMD_KEYPAIR_SIGN, RSPAMD_CRYPTOBOX_MODE_25519);
- if (map->trusted_pubkey == NULL) {
+ if (bk->trusted_pubkey == NULL) {
msg_err_config ("cannot read pubkey from map: %s",
map_line);
return NULL;
@@ -820,10 +954,10 @@ rspamd_map_check_proto (struct rspamd_config *cfg,
}
else if (end - pos > 64) {
/* Try hex encoding */
- map->trusted_pubkey = rspamd_pubkey_from_hex (pos, 64,
+ bk->trusted_pubkey = rspamd_pubkey_from_hex (pos, 64,
RSPAMD_KEYPAIR_SIGN, RSPAMD_CRYPTOBOX_MODE_25519);
- if (map->trusted_pubkey == NULL) {
+ if (bk->trusted_pubkey == NULL) {
msg_err_config ("cannot read pubkey from map: %s",
map_line);
return NULL;
@@ -841,24 +975,24 @@ rspamd_map_check_proto (struct rspamd_config *cfg,
}
}
- map->protocol = MAP_PROTO_FILE;
+ bk->protocol = MAP_PROTO_FILE;
if (g_ascii_strncasecmp (pos, "http://",
sizeof ("http://") - 1) == 0) {
- map->protocol = MAP_PROTO_HTTP;
+ bk->protocol = MAP_PROTO_HTTP;
/* Include http:// */
- map->uri = rspamd_mempool_strdup (cfg->cfg_pool, pos);
+ bk->uri = g_strdup (pos);
pos += sizeof ("http://") - 1;
}
else if (g_ascii_strncasecmp (pos, "file://", sizeof ("file://") -
1) == 0) {
pos += sizeof ("file://") - 1;
/* Exclude file:// */
- map->uri = rspamd_mempool_strdup (cfg->cfg_pool, pos);
+ bk->uri = g_strdup (pos);
}
else if (*pos == '/') {
/* Trivial file case */
- map->uri = rspamd_mempool_strdup (cfg->cfg_pool, pos);
+ bk->uri = g_strdup (pos);
}
else {
msg_err_config ("invalid map fetching protocol: %s", map_line);
@@ -893,95 +1027,79 @@ rspamd_map_is_map (const gchar *map_line)
return ret;
}
-struct rspamd_map *
-rspamd_map_add (struct rspamd_config *cfg,
- const gchar *map_line,
- const gchar *description,
- map_cb_t read_callback,
- map_fin_cb_t fin_callback,
- void **user_data)
+static void
+rspamd_map_backend_dtor (struct rspamd_map_backend *bk)
{
- struct rspamd_map *new_map;
- const gchar *def;
- struct file_map_data *fdata;
- struct http_map_data *hdata;
- gchar *cksum_encoded, cksum[rspamd_cryptobox_HASHBYTES];
- rspamd_mempool_t *pool;
- struct http_parser_url up;
- rspamd_ftok_t tok;
-
- if (cfg->map_pool == NULL) {
- cfg->map_pool = rspamd_mempool_new (rspamd_mempool_suggest_size (),
- "map");
- memcpy (cfg->map_pool->tag.uid, cfg->cfg_pool->tag.uid,
- sizeof (cfg->map_pool->tag.uid));
+ if (bk->protocol == MAP_PROTO_FILE) {
+ g_free (bk->data.fd->filename);
+ g_slice_free1 (sizeof (*bk->data.fd), bk->data.fd);
+ }
+ else {
+ g_free (bk->data.hd->host);
+ g_free (bk->data.hd->path);
+ g_slice_free1 (sizeof (*bk->data.hd), bk->data.hd);
}
- new_map = rspamd_mempool_alloc0 (cfg->map_pool, sizeof (struct rspamd_map));
+ g_slice_free1 (sizeof (*bk), bk);
+}
- /* First of all detect protocol line */
- if (rspamd_map_check_proto (cfg, map_line, new_map) == NULL) {
- return NULL;
- }
+static struct rspamd_map_backend *
+rspamd_map_parse_backend (struct rspamd_config *cfg, const gchar *map_line)
+{
+ struct rspamd_map_backend *bk;
+ struct file_map_data *fdata = NULL;
+ struct http_map_data *hdata = NULL;
+ struct http_parser_url up;
+ rspamd_ftok_t tok;
- new_map->read_callback = read_callback;
- new_map->fin_callback = fin_callback;
- new_map->user_data = user_data;
- new_map->cfg = cfg;
- new_map->id = g_random_int ();
- new_map->locked =
- rspamd_mempool_alloc0_shared (cfg->cfg_pool, sizeof (gint));
- def = new_map->uri;
+ bk = g_slice_alloc0 (sizeof (*bk));
+ REF_INIT_RETAIN (bk, rspamd_map_backend_dtor);
- if (description != NULL) {
- new_map->description =
- rspamd_mempool_strdup (cfg->cfg_pool, description);
+ if (!rspamd_map_check_proto (cfg, map_line, bk)) {
+ goto err;
}
/* Now check for each proto separately */
- if (new_map->protocol == MAP_PROTO_FILE) {
- fdata =
- rspamd_mempool_alloc0 (cfg->map_pool,
- sizeof (struct file_map_data));
- if (access (def, R_OK) == -1) {
+ if (bk->protocol == MAP_PROTO_FILE) {
+ fdata = g_slice_alloc0 (sizeof (struct file_map_data));
+
+ if (access (bk->uri, R_OK) == -1) {
if (errno != ENOENT) {
- msg_err_config ("cannot open file '%s': %s", def, strerror
- (errno));
+ msg_err_config ("cannot open file '%s': %s", bk->uri, strerror (errno));
return NULL;
}
msg_info_config (
- "map '%s' is not found, but it can be loaded automatically later",
- def);
+ "map '%s' is not found, but it can be loaded automatically later",
+ bk->uri);
/* We still can add this file */
fdata->st.st_mtime = -1;
}
else {
- stat (def, &fdata->st);
+ stat (bk->uri, &fdata->st);
}
- fdata->filename = rspamd_mempool_strdup (cfg->map_pool, def);
- new_map->map_data = fdata;
+
+ fdata->filename = g_strdup (bk->uri);
+ bk->data.fd = fdata;
}
- else if (new_map->protocol == MAP_PROTO_HTTP) {
- hdata =
- rspamd_mempool_alloc0 (cfg->map_pool,
- sizeof (struct http_map_data));
+ else if (bk->protocol == MAP_PROTO_HTTP) {
+ hdata = g_slice_alloc0 (sizeof (struct http_map_data));
memset (&up, 0, sizeof (up));
- if (http_parser_parse_url (new_map->uri, strlen (new_map->uri), FALSE,
+ if (http_parser_parse_url (bk->uri, strlen (bk->uri), FALSE,
&up) != 0) {
- msg_err_config ("cannot parse HTTP url: %s", new_map->uri);
- return NULL;
+ msg_err_config ("cannot parse HTTP url: %s", bk->uri);
+ goto err;
}
else {
if (!(up.field_set & 1 << UF_HOST)) {
- msg_err_config ("cannot parse HTTP url: %s: no host", new_map->uri);
+ msg_err_config ("cannot parse HTTP url: %s: no host", bk->uri);
return NULL;
}
- tok.begin = new_map->uri + up.field_data[UF_HOST].off;
+ tok.begin = bk->uri + up.field_data[UF_HOST].off;
tok.len = up.field_data[UF_HOST].len;
- hdata->host = rspamd_mempool_ftokdup (cfg->map_pool, &tok);
+ hdata->host = rspamd_ftokdup (&tok);
if (up.field_set & 1 << UF_PORT) {
hdata->port = up.port;
@@ -991,31 +1109,235 @@ rspamd_map_add (struct rspamd_config *cfg,
}
if (up.field_set & 1 << UF_PATH) {
- tok.begin = new_map->uri + up.field_data[UF_PATH].off;
+ tok.begin = bk->uri + up.field_data[UF_PATH].off;
tok.len = strlen (tok.begin);
- hdata->path = rspamd_mempool_ftokdup (cfg->map_pool, &tok);
+ hdata->path = rspamd_ftokdup (&tok);
}
}
- new_map->map_data = hdata;
+ bk->data.hd = hdata;
+ }
+
+ return bk;
+
+err:
+ MAP_RELEASE (bk);
+
+ if (hdata) {
+ g_slice_free1 (sizeof (*hdata), hdata);
+ }
+
+ if (fdata) {
+ g_slice_free1 (sizeof (*fdata), fdata);
+ }
+
+ return NULL;
+}
+static void
+rspamd_map_calculate_hash (struct rspamd_map *map)
+{
+ struct rspamd_map_backend *bk;
+ guint i;
+ rspamd_cryptobox_hash_state_t st;
+ gchar *cksum_encoded, cksum[rspamd_cryptobox_HASHBYTES];
+
+ rspamd_cryptobox_hash_init (&st, NULL, 0);
+
+ for (i = 0; i < map->backends->len; i ++) {
+ bk = g_ptr_array_index (map->backends, i);
+ rspamd_cryptobox_hash_update (&st, bk->uri, strlen (bk->uri));
}
- /* Temp pool */
- rspamd_cryptobox_hash (cksum, new_map->uri, strlen (new_map->uri), NULL, 0);
+ rspamd_cryptobox_hash_final (&st, cksum);
cksum_encoded = rspamd_encode_base32 (cksum, sizeof (cksum));
- new_map->pool = rspamd_mempool_new (rspamd_mempool_suggest_size (), "map");
- rspamd_strlcpy (new_map->pool->tag.uid, cksum_encoded,
- sizeof (new_map->pool->tag.uid));
+ rspamd_strlcpy (map->tag, cksum_encoded, sizeof (map->tag));
g_free (cksum_encoded);
- pool = new_map->pool;
- msg_info_pool ("added map %s", new_map->uri);
+}
+struct rspamd_map *
+rspamd_map_add (struct rspamd_config *cfg,
+ const gchar *map_line,
+ const gchar *description,
+ map_cb_t read_callback,
+ map_fin_cb_t fin_callback,
+ void **user_data)
+{
+ struct rspamd_map *map;
+ struct rspamd_map_backend *bk;
+
+ bk = rspamd_map_parse_backend (cfg, map_line);
+ if (bk == NULL) {
+ return NULL;
+ }
+
+ map = g_slice_alloc0 (sizeof (struct rspamd_map));
+ map->read_callback = read_callback;
+ map->fin_callback = fin_callback;
+ map->user_data = user_data;
+ map->cfg = cfg;
+ map->id = g_random_int ();
+ map->locked =
+ rspamd_mempool_alloc0_shared (cfg->cfg_pool, sizeof (gint));
+ map->backends = g_ptr_array_sized_new (1);
+ g_ptr_array_add (map->backends, bk);
+ map->name = g_strdup (map_line);
+ map->poll_timeout = cfg->map_timeout;
+
+ if (description != NULL) {
+ map->description = g_strdup (description);
+ }
- cfg->maps = g_list_prepend (cfg->maps, new_map);
+ rspamd_map_calculate_hash (map);
+ msg_info_map ("added map %s", bk->uri);
- return new_map;
+ cfg->maps = g_list_prepend (cfg->maps, map);
+
+ return map;
+}
+
+struct rspamd_map*
+rspamd_map_add_from_ucl (struct rspamd_config *cfg,
+ const ucl_object_t *obj,
+ const gchar *description,
+ map_cb_t read_callback,
+ map_fin_cb_t fin_callback,
+ void **user_data)
+{
+ ucl_object_iter_t it = NULL;
+ const ucl_object_t *cur, *elt;
+ struct rspamd_map *map;
+ struct rspamd_map_backend *bk;
+
+ g_assert (obj != NULL);
+
+ if (ucl_object_type (obj) == UCL_STRING) {
+ /* Just a plain string */
+ return rspamd_map_add (cfg, ucl_object_tostring (obj), NULL,
+ read_callback, fin_callback, user_data);
+ }
+
+ map = g_slice_alloc0 (sizeof (struct rspamd_map));
+ map->read_callback = read_callback;
+ map->fin_callback = fin_callback;
+ map->user_data = user_data;
+ map->cfg = cfg;
+ map->id = g_random_int ();
+ map->locked =
+ rspamd_mempool_alloc0_shared (cfg->cfg_pool, sizeof (gint));
+ map->backends = g_ptr_array_new ();
+ map->poll_timeout = cfg->map_timeout;
+
+ if (description) {
+ map->description = g_strdup (description);
+ }
+
+ if (ucl_object_type (obj) == UCL_ARRAY) {
+ /* Add array of maps as multiple backends */
+ while ((cur = ucl_object_iterate (obj, &it, true)) != NULL) {
+ if (ucl_object_type (cur) == UCL_STRING) {
+ bk = rspamd_map_parse_backend (cfg, ucl_object_tostring (cur));
+
+ if (bk != NULL) {
+ g_ptr_array_add (map->backends, bk);
+
+ if (!map->name) {
+ map->name = g_strdup (ucl_object_tostring (cur));
+ }
+ }
+ }
+ else {
+ msg_err_config ("bad map element type: %s",
+ ucl_object_type_to_string (ucl_object_type (cur)));
+ }
+ }
+
+ if (map->backends->len == 0) {
+ msg_err_config ("map has no urls to be loaded");
+ goto err;
+ }
+ }
+ else if (ucl_object_type (obj) == UCL_OBJECT) {
+ elt = ucl_object_lookup (obj, "name");
+ if (elt && ucl_object_type (elt) == UCL_STRING) {
+ map->name = g_strdup (ucl_object_tostring (elt));
+ }
+
+ elt = ucl_object_lookup (obj, "description");
+ if (elt && ucl_object_type (elt) == UCL_STRING) {
+ if (map->description) {
+ g_free (map->description);
+ }
+
+ map->description = g_strdup (ucl_object_tostring (elt));
+ }
+
+ elt = ucl_object_lookup_any (obj, "timeout", "poll", "poll_time",
+ "watch_interval", NULL);
+ if (elt) {
+ map->poll_timeout = ucl_object_todouble (elt);
+ }
+
+ elt = ucl_object_lookup_any (obj, "upstreams", "url", "urls", NULL);
+ if (elt == NULL) {
+ msg_err_config ("map has no urls to be loaded");
+ goto err;
+ }
+
+ if (ucl_object_type (obj) == UCL_ARRAY) {
+ /* Add array of maps as multiple backends */
+ while ((cur = ucl_object_iterate (elt, &it, true)) != NULL) {
+ if (ucl_object_type (cur) == UCL_STRING) {
+ bk = rspamd_map_parse_backend (cfg, ucl_object_tostring (cur));
+
+ if (bk != NULL) {
+ g_ptr_array_add (map->backends, bk);
+
+ if (!map->name) {
+ map->name = g_strdup (ucl_object_tostring (cur));
+ }
+ }
+ }
+ else {
+ msg_err_config ("bad map element type: %s",
+ ucl_object_type_to_string (ucl_object_type (cur)));
+ goto err;
+ }
+ }
+
+ if (map->backends->len == 0) {
+ msg_err_config ("map has no urls to be loaded");
+ goto err;
+ }
+ }
+ else if (ucl_object_type (elt) == UCL_STRING) {
+ bk = rspamd_map_parse_backend (cfg, ucl_object_tostring (elt));
+
+ if (bk != NULL) {
+ g_ptr_array_add (map->backends, bk);
+
+ if (!map->name) {
+ map->name = g_strdup (ucl_object_tostring (cur));
+ }
+ }
+ }
+
+ if (map->backends->len == 0) {
+ msg_err_config ("map has no urls to be loaded");
+ goto err;
+ }
+ }
+
+ return map;
+
+err:
+ g_ptr_array_free (map->backends, TRUE);
+ g_free (map->name);
+ g_free (map->description);
+ g_slice_free1 (sizeof (*map), map);
+
+ return NULL;
}
/**
@@ -1023,18 +1345,18 @@ rspamd_map_add (struct rspamd_config *cfg,
*/
#define MAP_STORE_KEY do { \
- key = rspamd_mempool_alloc (pool, p - c + 1); \
+ key = g_malloc (p - c + 1); \
rspamd_strlcpy (key, c, p - c + 1); \
} while (0)
#define MAP_STORE_VALUE do { \
- value = rspamd_mempool_alloc (pool, p - c + 1); \
+ value = g_malloc (p - c + 1); \
rspamd_strlcpy (value, c, p - c + 1); \
value = g_strstrip (value); \
} while (0)
gchar *
-rspamd_parse_kv_list (rspamd_mempool_t * pool,
+rspamd_parse_kv_list (
gchar * chunk,
gint len,
struct map_cb_data *data,
@@ -1058,6 +1380,7 @@ rspamd_parse_kv_list (rspamd_mempool_t * pool,
};
gchar *c, *p, *key = NULL, *value = NULL, *end;
+ struct rspamd_map *map = data->map;
p = chunk;
c = p;
@@ -1095,8 +1418,9 @@ rspamd_parse_kv_list (rspamd_mempool_t * pool,
/* Store a single key */
MAP_STORE_KEY;
func (data->cur_data, key, default_value);
- msg_debug_pool ("insert key only pair: %s -> %s",
+ msg_debug_map ("insert key only pair: %s -> %s",
key, default_value);
+ g_free (key);
}
key = NULL;
@@ -1107,8 +1431,9 @@ rspamd_parse_kv_list (rspamd_mempool_t * pool,
/* Store a single key */
MAP_STORE_KEY;
func (data->cur_data, key, default_value);
- msg_debug_pool ("insert key only pair: %s -> %s",
+ msg_debug_map ("insert key only pair: %s -> %s",
key, default_value);
+ g_free (key);
}
data->state = map_read_eol;
@@ -1176,8 +1501,9 @@ rspamd_parse_kv_list (rspamd_mempool_t * pool,
/* Store a single key */
MAP_STORE_KEY;
func (data->cur_data, key, default_value);
- msg_debug_pool ("insert key only pair: %s -> %s",
+ msg_debug_map ("insert key only pair: %s -> %s",
key, default_value);
+ g_free (key);
key = NULL;
}
@@ -1188,8 +1514,10 @@ rspamd_parse_kv_list (rspamd_mempool_t * pool,
/* Store a single key */
MAP_STORE_KEY;
func (data->cur_data, key, default_value);
- msg_debug_pool ("insert key only pair: %s -> %s",
+
+ msg_debug_map ("insert key only pair: %s -> %s",
key, default_value);
+ g_free (key);
key = NULL;
}
@@ -1234,15 +1562,18 @@ rspamd_parse_kv_list (rspamd_mempool_t * pool,
/* Store a single key */
MAP_STORE_VALUE;
func (data->cur_data, key, value);
- msg_debug_pool ("insert key value pair: %s -> %s",
+ msg_debug_map ("insert key value pair: %s -> %s",
key, value);
+ g_free (key);
+ g_free (value);
key = NULL;
value = NULL;
}
else {
func (data->cur_data, key, default_value);
- msg_debug_pool ("insert key only pair: %s -> %s",
+ msg_debug_map ("insert key only pair: %s -> %s",
key, default_value);
+ g_free (key);
key = NULL;
}
@@ -1253,15 +1584,18 @@ rspamd_parse_kv_list (rspamd_mempool_t * pool,
/* Store a single key */
MAP_STORE_VALUE;
func (data->cur_data, key, value);
- msg_debug_pool ("insert key value pair: %s -> %s",
+ msg_debug_map ("insert key value pair: %s -> %s",
key, value);
+ g_free (key);
+ g_free (value);
key = NULL;
value = NULL;
}
else {
func (data->cur_data, key, default_value);
- msg_debug_pool ("insert key only pair: %s -> %s",
+ msg_debug_map ("insert key only pair: %s -> %s",
key, default_value);
+ g_free (key);
key = NULL;
}
@@ -1314,8 +1648,9 @@ rspamd_parse_kv_list (rspamd_mempool_t * pool,
/* Store a single key */
MAP_STORE_KEY;
func (data->cur_data, key, default_value);
- msg_debug_pool ("insert key only pair: %s -> %s",
+ msg_debug_map ("insert key only pair: %s -> %s",
key, default_value);
+ g_free (key);
key = NULL;
}
break;
@@ -1325,15 +1660,18 @@ rspamd_parse_kv_list (rspamd_mempool_t * pool,
/* Store a single key */
MAP_STORE_VALUE;
func (data->cur_data, key, value);
- msg_debug_pool ("insert key value pair: %s -> %s",
+ msg_debug_map ("insert key value pair: %s -> %s",
key, value);
+ g_free (key);
+ g_free (value);
key = NULL;
value = NULL;
}
else {
func (data->cur_data, key, default_value);
- msg_debug_pool ("insert key only pair: %s -> %s",
+ msg_debug_map ("insert key only pair: %s -> %s",
key, default_value);
+ g_free (key);
key = NULL;
}
break;
@@ -1347,80 +1685,95 @@ rspamd_parse_kv_list (rspamd_mempool_t * pool,
* Radix tree helper function
*/
static void
-radix_tree_insert_helper (gpointer st, gconstpointer key, gpointer value)
+radix_tree_insert_helper (gpointer st, gconstpointer key, gconstpointer value)
{
radix_compressed_t *tree = (radix_compressed_t *)st;
rspamd_radix_add_iplist ((gchar *)key, " ,;", tree, value);
}
+static void
+hash_insert_helper (gpointer st, gconstpointer key, gconstpointer value)
+{
+ GHashTable *ht = st;
+ gpointer k, v;
+
+ k = g_strdup (key);
+ v = g_strdup (value);
+ g_hash_table_replace (ht, k, v);
+}
+
/* Helpers */
gchar *
-rspamd_hosts_read (rspamd_mempool_t * pool,
+rspamd_hosts_read (
gchar * chunk,
gint len,
struct map_cb_data *data,
gboolean final)
{
if (data->cur_data == NULL) {
- data->cur_data = g_hash_table_new (rspamd_strcase_hash,
- rspamd_strcase_equal);
+ data->cur_data = g_hash_table_new_full (rspamd_strcase_hash,
+ rspamd_strcase_equal, g_free, g_free);
}
- return rspamd_parse_kv_list (pool,
+ return rspamd_parse_kv_list (
chunk,
len,
data,
- (insert_func) g_hash_table_insert,
+ hash_insert_helper,
hash_fill,
final);
}
void
-rspamd_hosts_fin (rspamd_mempool_t * pool, struct map_cb_data *data)
+rspamd_hosts_fin (struct map_cb_data *data)
{
+ struct rspamd_map *map = data->map;
+
if (data->prev_data) {
- g_hash_table_destroy (data->prev_data);
+ g_hash_table_unref (data->prev_data);
}
if (data->cur_data) {
- msg_info_pool ("read hash of %d elements", g_hash_table_size
+ msg_info_map ("read hash of %d elements", g_hash_table_size
(data->cur_data));
}
}
gchar *
-rspamd_kv_list_read (rspamd_mempool_t * pool,
+rspamd_kv_list_read (
gchar * chunk,
gint len,
struct map_cb_data *data,
gboolean final)
{
if (data->cur_data == NULL) {
- data->cur_data = g_hash_table_new (rspamd_strcase_hash,
- rspamd_strcase_equal);
+ data->cur_data = g_hash_table_new_full (rspamd_strcase_hash,
+ rspamd_strcase_equal, g_free, g_free);
}
- return rspamd_parse_kv_list (pool,
+ return rspamd_parse_kv_list (
chunk,
len,
data,
- (insert_func) g_hash_table_insert,
+ hash_insert_helper,
"",
final);
}
void
-rspamd_kv_list_fin (rspamd_mempool_t * pool, struct map_cb_data *data)
+rspamd_kv_list_fin (struct map_cb_data *data)
{
+ struct rspamd_map *map = data->map;
+
if (data->prev_data) {
- g_hash_table_destroy (data->prev_data);
+ g_hash_table_unref (data->prev_data);
}
if (data->cur_data) {
- msg_info_pool ("read hash of %d elements", g_hash_table_size
+ msg_info_map ("read hash of %d elements", g_hash_table_size
(data->cur_data));
}
}
gchar *
-rspamd_radix_read (rspamd_mempool_t * pool,
+rspamd_radix_read (
gchar * chunk,
gint len,
struct map_cb_data *data,
@@ -1428,30 +1781,33 @@ rspamd_radix_read (rspamd_mempool_t * pool,
{
radix_compressed_t *tree;
rspamd_mempool_t *rpool;
+ struct rspamd_map *map = data->map;
if (data->cur_data == NULL) {
tree = radix_create_compressed ();
rpool = radix_get_pool (tree);
- memcpy (rpool->tag.uid, pool->tag.uid, sizeof (rpool->tag.uid));
+ memcpy (rpool->tag.uid, map->tag, sizeof (rpool->tag.uid));
data->cur_data = tree;
}
- return rspamd_parse_kv_list (pool,
+ return rspamd_parse_kv_list (
chunk,
len,
data,
- (insert_func) radix_tree_insert_helper,
+ radix_tree_insert_helper,
hash_fill,
final);
}
void
-rspamd_radix_fin (rspamd_mempool_t * pool, struct map_cb_data *data)
+rspamd_radix_fin (struct map_cb_data *data)
{
+ struct rspamd_map *map = data->map;
+
if (data->prev_data) {
radix_destroy_compressed (data->prev_data);
}
if (data->cur_data) {
- msg_info_pool ("read radix trie of %z elements: %s",
+ msg_info_map ("read radix trie of %z elements: %s",
radix_get_size (data->cur_data), radix_get_info (data->cur_data));
}
}
@@ -1494,6 +1850,10 @@ rspamd_regexp_map_destroy (struct rspamd_regexp_map *re_map)
rspamd_regexp_unref (re);
}
+ for (i = 0; i < re_map->values->len; i ++) {
+ g_free (g_ptr_array_index (re_map->values, i));
+ }
+
g_ptr_array_free (re_map->regexps, TRUE);
g_ptr_array_free (re_map->values, TRUE);
@@ -1519,18 +1879,18 @@ rspamd_regexp_map_destroy (struct rspamd_regexp_map *re_map)
}
static void
-rspamd_re_map_insert_helper (gpointer st, gpointer key, gpointer value)
+rspamd_re_map_insert_helper (gpointer st, gconstpointer key, gconstpointer value)
{
struct rspamd_regexp_map *re_map = st;
+ struct rspamd_map *map;
rspamd_regexp_t *re;
GError *err = NULL;
- rspamd_mempool_t *pool;
- pool = re_map->map->pool;
+ map = re_map->map;
re = rspamd_regexp_new (key, NULL, &err);
if (re == NULL) {
- msg_err_pool ("cannot parse regexp %s: %e", key, err);
+ msg_err_map ("cannot parse regexp %s: %e", key, err);
if (err) {
g_error_free (err);
@@ -1540,7 +1900,7 @@ rspamd_re_map_insert_helper (gpointer st, gpointer key, gpointer value)
}
g_ptr_array_add (re_map->regexps, re);
- g_ptr_array_add (re_map->values, value);
+ g_ptr_array_add (re_map->values, g_strdup (value));
}
static void
@@ -1550,14 +1910,14 @@ rspamd_re_map_finalize (struct rspamd_regexp_map *re_map)
guint i;
hs_platform_info_t plt;
hs_compile_error_t *err;
- rspamd_mempool_t *pool;
+ struct rspamd_map *map;
rspamd_regexp_t *re;
gint pcre_flags;
- pool = re_map->map->pool;
+ map = re_map->map;
if (hs_populate_platform (&plt) != HS_SUCCESS) {
- msg_err_pool ("cannot populate hyperscan platform");
+ msg_err_map ("cannot populate hyperscan platform");
return;
}
@@ -1605,7 +1965,7 @@ rspamd_re_map_finalize (struct rspamd_regexp_map *re_map)
&re_map->hs_db,
&err) != HS_SUCCESS) {
- msg_err_pool ("cannot create tree of regexp when processing '%s': %s",
+ msg_err_map ("cannot create tree of regexp when processing '%s': %s",
re_map->patterns[err->expression], err->message);
re_map->hs_db = NULL;
hs_free_compile_error (err);
@@ -1614,7 +1974,7 @@ rspamd_re_map_finalize (struct rspamd_regexp_map *re_map)
}
if (hs_alloc_scratch (re_map->hs_db, &re_map->hs_scratch) != HS_SUCCESS) {
- msg_err_pool ("cannot allocate scratch space for hyperscan");
+ msg_err_map ("cannot allocate scratch space for hyperscan");
hs_free_database (re_map->hs_db);
re_map->hs_db = NULL;
}
@@ -1622,7 +1982,7 @@ rspamd_re_map_finalize (struct rspamd_regexp_map *re_map)
}
gchar *
-rspamd_regexp_list_read (rspamd_mempool_t *pool,
+rspamd_regexp_list_read (
gchar *chunk,
gint len,
struct map_cb_data *data,
@@ -1635,19 +1995,20 @@ rspamd_regexp_list_read (rspamd_mempool_t *pool,
data->cur_data = re_map;
}
- return rspamd_parse_kv_list (pool,
+ return rspamd_parse_kv_list (
chunk,
len,
data,
- (insert_func) rspamd_re_map_insert_helper,
+ rspamd_re_map_insert_helper,
hash_fill,
final);
}
void
-rspamd_regexp_list_fin (rspamd_mempool_t *pool, struct map_cb_data *data)
+rspamd_regexp_list_fin (struct map_cb_data *data)
{
struct rspamd_regexp_map *re_map;
+ struct rspamd_map *map = data->map;
if (data->prev_data) {
rspamd_regexp_map_destroy (data->prev_data);
@@ -1655,7 +2016,7 @@ rspamd_regexp_list_fin (rspamd_mempool_t *pool, struct map_cb_data *data)
if (data->cur_data) {
re_map = data->cur_data;
rspamd_re_map_finalize (re_map);
- msg_info_pool ("read regexp list of %ud elements",
+ msg_info_map ("read regexp list of %ud elements",
re_map->regexps->len);
}
}
diff --git a/src/libutil/map.h b/src/libutil/map.h
index f7cbc3076..3b6439efb 100644
--- a/src/libutil/map.h
+++ b/src/libutil/map.h
@@ -4,6 +4,7 @@
#include "config.h"
#include <event.h>
+#include "ucl.h"
#include "mem_pool.h"
#include "radix.h"
#include "dns.h"
@@ -18,9 +19,9 @@ struct map_cb_data;
/**
* Callback types
*/
-typedef gchar * (*map_cb_t)(rspamd_mempool_t *pool, gchar *chunk, gint len,
+typedef gchar * (*map_cb_t)(gchar *chunk, gint len,
struct map_cb_data *data, gboolean final);
-typedef void (*map_fin_cb_t)(rspamd_mempool_t *pool, struct map_cb_data *data);
+typedef void (*map_fin_cb_t)(struct map_cb_data *data);
/**
* Common map object
@@ -56,6 +57,16 @@ struct rspamd_map* rspamd_map_add (struct rspamd_config *cfg,
void **user_data);
/**
+ * Add map from ucl
+ */
+struct rspamd_map* rspamd_map_add_from_ucl (struct rspamd_config *cfg,
+ const ucl_object_t *obj,
+ const gchar *description,
+ map_cb_t read_callback,
+ map_fin_cb_t fin_callback,
+ void **user_data);
+
+/**
* Start watching of maps by adding events to libevent event loop
*/
void rspamd_map_watch (struct rspamd_config *cfg,
@@ -77,50 +88,50 @@ typedef void (*insert_func) (gpointer st, gconstpointer key,
/**
* Radix list is a list like ip/mask
*/
-gchar * rspamd_radix_read (rspamd_mempool_t *pool,
+gchar * rspamd_radix_read (
gchar *chunk,
gint len,
struct map_cb_data *data,
gboolean final);
-void rspamd_radix_fin (rspamd_mempool_t *pool, struct map_cb_data *data);
+void rspamd_radix_fin (struct map_cb_data *data);
/**
* Host list is an ordinal list of hosts or domains
*/
-gchar * rspamd_hosts_read (rspamd_mempool_t *pool,
+gchar * rspamd_hosts_read (
gchar *chunk,
gint len,
struct map_cb_data *data,
gboolean final);
-void rspamd_hosts_fin (rspamd_mempool_t *pool, struct map_cb_data *data);
+void rspamd_hosts_fin (struct map_cb_data *data);
/**
* Kv list is an ordinal list of keys and values separated by whitespace
*/
-gchar * rspamd_kv_list_read (rspamd_mempool_t *pool,
+gchar * rspamd_kv_list_read (
gchar *chunk,
gint len,
struct map_cb_data *data,
gboolean final);
-void rspamd_kv_list_fin (rspamd_mempool_t *pool, struct map_cb_data *data);
+void rspamd_kv_list_fin (struct map_cb_data *data);
/**
* Regexp list is a list of regular expressions
*/
struct rspamd_regexp_map;
-gchar * rspamd_regexp_list_read (rspamd_mempool_t *pool,
+gchar * rspamd_regexp_list_read (
gchar *chunk,
gint len,
struct map_cb_data *data,
gboolean final);
-void rspamd_regexp_list_fin (rspamd_mempool_t *pool, struct map_cb_data *data);
+void rspamd_regexp_list_fin (struct map_cb_data *data);
/**
* FSM for lists parsing (support comments, blank lines and partial replies)
*/
gchar *
-rspamd_parse_kv_list (rspamd_mempool_t * pool,
+rspamd_parse_kv_list (
gchar * chunk,
gint len,
struct map_cb_data *data,
diff --git a/src/libutil/map_private.h b/src/libutil/map_private.h
index c26517574..9bdca5f90 100644
--- a/src/libutil/map_private.h
+++ b/src/libutil/map_private.h
@@ -24,30 +24,57 @@
typedef void (*rspamd_map_dtor) (gpointer p);
+#define msg_err_map(...) rspamd_default_log_function (G_LOG_LEVEL_CRITICAL, \
+ "map", map->tag, \
+ G_STRFUNC, \
+ __VA_ARGS__)
+#define msg_warn_map(...) rspamd_default_log_function (G_LOG_LEVEL_WARNING, \
+ "map", map->tag, \
+ G_STRFUNC, \
+ __VA_ARGS__)
+#define msg_info_map(...) rspamd_default_log_function (G_LOG_LEVEL_INFO, \
+ "map", map->tag, \
+ G_STRFUNC, \
+ __VA_ARGS__)
+#define msg_debug_map(...) rspamd_default_log_function (G_LOG_LEVEL_DEBUG, \
+ "map", map->tag, \
+ G_STRFUNC, \
+ __VA_ARGS__)
+
enum fetch_proto {
MAP_PROTO_FILE,
MAP_PROTO_HTTP,
};
-struct rspamd_map {
- rspamd_mempool_t *pool;
- struct rspamd_dns_resolver *r;
+
+struct rspamd_map_backend {
+ enum fetch_proto protocol;
gboolean is_signed;
struct rspamd_cryptobox_pubkey *trusted_pubkey;
+ union {
+ struct file_map_data *fd;
+ struct http_map_data *hd;
+ } data;
+ gchar *uri;
+ ref_entry_t ref;
+};
+
+struct rspamd_map {
+ struct rspamd_dns_resolver *r;
struct rspamd_config *cfg;
- enum fetch_proto protocol;
+ GPtrArray *backends;
map_cb_t read_callback;
map_fin_cb_t fin_callback;
void **user_data;
- struct event ev;
- struct timeval tv;
struct event_base *ev_base;
- void *map_data;
- gchar *uri;
gchar *description;
+ gchar *name;
guint32 id;
- guint32 checksum;
+ struct event ev;
+ struct timeval tv;
+ gdouble poll_timeout;
/* Shared lock for temporary disabling of map reading (e.g. when this map is written by UI) */
gint *locked;
+ gchar tag[MEMPOOL_UID_LEN];
rspamd_map_dtor dtor;
gpointer dtor_data;
};
@@ -56,7 +83,7 @@ struct rspamd_map {
* Data specific to file maps
*/
struct file_map_data {
- const gchar *filename;
+ gchar *filename;
struct stat st;
};
@@ -64,12 +91,12 @@ struct file_map_data {
* Data specific to HTTP maps
*/
struct http_map_data {
- struct addrinfo *addr;
- guint16 port;
gchar *path;
gchar *host;
+ gchar *last_signature;
time_t last_checked;
gboolean request_sent;
+ guint16 port;
};
enum rspamd_map_http_stage {
@@ -80,20 +107,31 @@ enum rspamd_map_http_stage {
map_load_signature
};
+struct map_periodic_cbdata {
+ struct rspamd_map *map;
+ struct map_cb_data cbdata;
+ gboolean need_modify;
+ gboolean errored;
+ guint cur_backend;
+ ref_entry_t ref;
+};
+
struct http_callback_data {
struct event_base *ev_base;
struct rspamd_http_connection *conn;
rspamd_inet_addr_t *addr;
- struct timeval tv;
struct rspamd_map *map;
+ struct rspamd_map_backend *bk;
struct http_map_data *data;
- struct map_cb_data cbdata;
+ struct map_periodic_cbdata *periodic;
struct rspamd_cryptobox_pubkey *pk;
+ gboolean check;
gchar *tmpfile;
enum rspamd_map_http_stage stage;
gint out_fd;
gint fd;
+ struct timeval tv;
ref_entry_t ref;
};
diff --git a/src/libutil/shingles.c b/src/libutil/shingles.c
index c2acae6d3..66f6b457c 100644
--- a/src/libutil/shingles.c
+++ b/src/libutil/shingles.c
@@ -19,15 +19,16 @@
#define SHINGLES_WINDOW 3
-struct rspamd_shingle*
+struct rspamd_shingle* RSPAMD_OPTIMIZE("unroll-loops")
rspamd_shingles_generate (GArray *input,
const guchar key[16],
rspamd_mempool_t *pool,
rspamd_shingles_filter filter,
- gpointer filterd)
+ gpointer filterd,
+ enum rspamd_shingle_alg alg)
{
struct rspamd_shingle *res;
- GArray *hashes[RSPAMD_SHINGLE_SIZE];
+ guint64 **hashes;
rspamd_sipkey_t keys[RSPAMD_SHINGLE_SIZE];
guchar shabuf[rspamd_cryptobox_HASHBYTES], *out_key;
const guchar *cur_key;
@@ -35,7 +36,9 @@ rspamd_shingles_generate (GArray *input,
rspamd_ftok_t *word;
rspamd_cryptobox_hash_state_t bs;
guint64 val;
- gint i, j, beg = 0;
+ gint i, j, k;
+ gsize hlen, beg = 0;
+ enum rspamd_cryptobox_fast_hash_type ht;
if (pool != NULL) {
res = rspamd_mempool_alloc (pool, sizeof (*res));
@@ -50,9 +53,11 @@ rspamd_shingles_generate (GArray *input,
out_key = (guchar *)&keys[0];
/* Init hashes pipes and keys */
+ hashes = g_slice_alloc (sizeof (*hashes) * RSPAMD_SHINGLE_SIZE);
+ hlen = input->len > SHINGLES_WINDOW ? (input->len - SHINGLES_WINDOW + 1) : 1;
+
for (i = 0; i < RSPAMD_SHINGLE_SIZE; i ++) {
- hashes[i] = g_array_sized_new (FALSE, FALSE, sizeof (guint64),
- input->len + SHINGLES_WINDOW);
+ hashes[i] = g_slice_alloc (hlen * sizeof (guint64));
/*
* To generate a set of hashes we just apply sha256 to the
* initial key as many times as many hashes are required and
@@ -71,32 +76,83 @@ rspamd_shingles_generate (GArray *input,
}
/* Now parse input words into a vector of hashes using rolling window */
- for (i = 0; i <= (gint)input->len; i ++) {
- if (i - beg >= SHINGLES_WINDOW || i == (gint)input->len) {
- for (j = beg; j < i; j ++) {
- word = &g_array_index (input, rspamd_ftok_t, j);
- row = rspamd_fstring_append (row, word->begin, word->len);
+ if (alg == RSPAMD_SHINGLES_OLD) {
+ for (i = 0; i <= (gint)input->len; i ++) {
+ if (i - beg >= SHINGLES_WINDOW || i == (gint)input->len) {
+ for (j = beg; j < i; j ++) {
+ word = &g_array_index (input, rspamd_ftok_t, j);
+ row = rspamd_fstring_append (row, word->begin, word->len);
+ }
+
+ /* Now we need to create a new row here */
+ for (j = 0; j < RSPAMD_SHINGLE_SIZE; j ++) {
+ rspamd_cryptobox_siphash ((guchar *)&val, row->str, row->len,
+ keys[j]);
+ g_assert (hlen > beg);
+ hashes[j][beg] = val;
+ }
+
+ beg++;
+
+ row = rspamd_fstring_assign (row, "", 0);
}
- beg++;
+ }
+ }
+ else {
+ guint64 res[SHINGLES_WINDOW * RSPAMD_SHINGLE_SIZE];
+
+ switch (alg) {
+ case RSPAMD_SHINGLES_XXHASH:
+ ht = RSPAMD_CRYPTOBOX_XXHASH64;
+ break;
+ case RSPAMD_SHINGLES_MUMHASH:
+ ht = RSPAMD_CRYPTOBOX_MUMHASH;
+ break;
+ default:
+ ht = RSPAMD_CRYPTOBOX_HASHFAST_INDEPENDENT;
+ break;
+ }
- /* Now we need to create a new row here */
- for (j = 0; j < RSPAMD_SHINGLE_SIZE; j ++) {
- rspamd_cryptobox_siphash ((guchar *)&val, row->str, row->len,
- keys[j]);
- g_array_append_val (hashes[j], val);
+ memset (res, 0, sizeof (res));
+
+ for (i = 0; i <= (gint)input->len; i ++) {
+ if (i - beg >= SHINGLES_WINDOW || i == (gint)input->len) {
+
+ for (j = 0; j < RSPAMD_SHINGLE_SIZE; j ++) {
+ /* Shift hashes window to right */
+ for (k = 0; k < SHINGLES_WINDOW - 1; k ++) {
+ res[j * SHINGLES_WINDOW + k] =
+ res[j * SHINGLES_WINDOW + k + 1];
+ }
+
+ word = &g_array_index (input, rspamd_ftok_t, beg);
+ /* Insert the last element to the pipe */
+ res[j * SHINGLES_WINDOW + SHINGLES_WINDOW - 1] =
+ rspamd_cryptobox_fast_hash_specific (ht,
+ word->begin, word->len,
+ *(guint64 *)keys[j]);
+ val = 0;
+ for (k = 0; k < SHINGLES_WINDOW; k ++) {
+ val ^= res[j * SHINGLES_WINDOW + k] >> (8 * (SHINGLES_WINDOW - k - 1));
+ }
+
+ g_assert (hlen > beg);
+ hashes[j][beg] = val;
+ }
+ beg++;
}
-
- row = rspamd_fstring_assign (row, "", 0);
}
}
/* Now we need to filter all hashes and make a shingles result */
for (i = 0; i < RSPAMD_SHINGLE_SIZE; i ++) {
- res->hashes[i] = filter ((guint64 *)hashes[i]->data, hashes[i]->len,
+ res->hashes[i] = filter (hashes[i], hlen,
i, key, filterd);
- g_array_free (hashes[i], TRUE);
+ g_slice_free1 (hlen * sizeof (guint64), hashes[i]);
}
+ g_slice_free1 (sizeof (*hashes) * RSPAMD_SHINGLE_SIZE, hashes);
+
rspamd_fstring_free (row);
return res;
diff --git a/src/libutil/shingles.h b/src/libutil/shingles.h
index d252a78f6..fd7d3bfc7 100644
--- a/src/libutil/shingles.h
+++ b/src/libutil/shingles.h
@@ -25,6 +25,13 @@ struct rspamd_shingle {
guint64 hashes[RSPAMD_SHINGLE_SIZE];
};
+enum rspamd_shingle_alg {
+ RSPAMD_SHINGLES_OLD = 0,
+ RSPAMD_SHINGLES_XXHASH,
+ RSPAMD_SHINGLES_MUMHASH,
+ RSPAMD_SHINGLES_FAST
+};
+
/**
* Shingles filtering function
* @param input input array of hashes
@@ -48,7 +55,8 @@ struct rspamd_shingle* rspamd_shingles_generate (GArray *input,
const guchar key[16],
rspamd_mempool_t *pool,
rspamd_shingles_filter filter,
- gpointer filterd);
+ gpointer filterd,
+ enum rspamd_shingle_alg alg);
/**
* Compares two shingles and return result as a floating point value - 1.0
diff --git a/src/libutil/str_util.c b/src/libutil/str_util.c
index 7d40b15fa..a25dc32d5 100644
--- a/src/libutil/str_util.c
+++ b/src/libutil/str_util.c
@@ -15,7 +15,7 @@
*/
#include "config.h"
#include "util.h"
-#include "xxhash.h"
+#include "cryptobox.h"
#include "url.h"
#include <math.h>
@@ -182,20 +182,6 @@ rspamd_strcase_equal (gconstpointer v, gconstpointer v2)
return FALSE;
}
-#if defined(__LP64__) || defined(_LP64)
-#define XXH_STATE XXH64_state_t
-#define XXH_RESET XXH64_reset
-#define XXH_UPDATE XXH64_update
-#define XXH_DIGEST XXH64_digest
-#define XXH_ONESHOT XXH64
-#else
-#define XXH_STATE XXH32_state_t
-#define XXH_RESET XXH32_reset
-#define XXH_UPDATE XXH32_update
-#define XXH_DIGEST XXH32_digest
-#define XXH_ONESHOT XXH32
-#endif
-
static guint
rspamd_icase_hash (const gchar *in, gsize len)
{
@@ -208,10 +194,10 @@ rspamd_icase_hash (const gchar *in, gsize len)
} c;
guint32 pp;
} u;
- XXH_STATE st;
+ rspamd_cryptobox_fast_hash_state_t st;
fp = len - leftover;
- XXH_RESET (&st, rspamd_hash_seed ());
+ rspamd_cryptobox_fast_hash_init (&st, rspamd_hash_seed ());
for (i = 0; i != fp; i += 4) {
u.c.c1 = s[i], u.c.c2 = s[i + 1], u.c.c3 = s[i + 2], u.c.c4 = s[i + 3];
@@ -219,7 +205,7 @@ rspamd_icase_hash (const gchar *in, gsize len)
u.c.c2 = lc_map[u.c.c2];
u.c.c3 = lc_map[u.c.c3];
u.c.c4 = lc_map[u.c.c4];
- XXH_UPDATE (&st, &u.pp, sizeof (u));
+ rspamd_cryptobox_fast_hash_update (&st, &u.pp, sizeof (u));
}
u.pp = 0;
@@ -230,11 +216,11 @@ rspamd_icase_hash (const gchar *in, gsize len)
u.c.c2 = lc_map[(guchar)s[i++]];
case 1:
u.c.c1 = lc_map[(guchar)s[i]];
- XXH_UPDATE (&st, &u.pp, leftover);
+ rspamd_cryptobox_fast_hash_update (&st, &u.pp, leftover);
break;
}
- return XXH_DIGEST (&st);
+ return rspamd_cryptobox_fast_hash_final (&st);
}
guint
@@ -255,7 +241,7 @@ rspamd_str_hash (gconstpointer key)
len = strlen ((const gchar *)key);
- return XXH_ONESHOT (key, len, rspamd_hash_seed ());
+ return rspamd_cryptobox_fast_hash (key, len, rspamd_hash_seed ());
}
gboolean
@@ -1814,17 +1800,17 @@ guint
rspamd_url_hash (gconstpointer u)
{
const struct rspamd_url *url = u;
- XXH_STATE st;
+ rspamd_cryptobox_fast_hash_state_t st;
- XXH_RESET (&st, rspamd_hash_seed ());
+ rspamd_cryptobox_fast_hash_init (&st, rspamd_hash_seed ());
if (url->urllen > 0) {
- XXH_UPDATE (&st, url->string, url->urllen);
+ rspamd_cryptobox_fast_hash_update (&st, url->string, url->urllen);
}
- XXH_UPDATE (&st, &url->flags, sizeof (url->flags));
+ rspamd_cryptobox_fast_hash_update (&st, &url->flags, sizeof (url->flags));
- return XXH_DIGEST (&st);
+ return rspamd_cryptobox_fast_hash_final (&st);
}
/* Compare two emails for building emails tree */
diff --git a/src/libutil/upstream.c b/src/libutil/upstream.c
index 020039d71..fe07e89d2 100644
--- a/src/libutil/upstream.c
+++ b/src/libutil/upstream.c
@@ -19,7 +19,7 @@
#include "ref.h"
#include "cfg_file.h"
#include "rdns.h"
-#include "xxhash.h"
+#include "cryptobox.h"
#include "utlist.h"
struct upstream_inet_addr_entry {
@@ -785,7 +785,8 @@ rspamd_upstream_get_hashed (struct upstream_list *ups, const guint8 *key, guint
guint32 idx;
/* Generate 64 bits input key */
- k = XXH64 (key, keylen, ups->hash_seed);
+ k = rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_XXHASH64,
+ key, keylen, ups->hash_seed);
rspamd_mutex_lock (ups->lock);
idx = rspamd_consistent_hash (k, ups->alive->len);
diff --git a/src/libutil/util.c b/src/libutil/util.c
index 4fc3eb613..4ce90ba06 100644
--- a/src/libutil/util.c
+++ b/src/libutil/util.c
@@ -1981,12 +1981,13 @@ rspamd_config_libs (struct rspamd_external_libs_ctx *ctx,
if (ctx != NULL) {
if (cfg->local_addrs) {
- if (!rspamd_map_is_map (cfg->local_addrs)) {
- radix_add_generic_iplist (cfg->local_addrs,
+ if (ucl_object_type (cfg->local_addrs) == UCL_STRING &&
+ !rspamd_map_is_map (ucl_object_tostring (cfg->local_addrs))) {
+ radix_add_generic_iplist (ucl_object_tostring (cfg->local_addrs),
(radix_compressed_t **)ctx->local_addrs);
}
else {
- rspamd_map_add (cfg, cfg->local_addrs,
+ rspamd_map_add_from_ucl (cfg, cfg->local_addrs,
"Local addresses", rspamd_radix_read, rspamd_radix_fin,
(void **) ctx->local_addrs);
}
diff --git a/src/lua/lua_common.c b/src/lua/lua_common.c
index 63aab5830..4563d9f84 100644
--- a/src/lua/lua_common.c
+++ b/src/lua/lua_common.c
@@ -677,6 +677,20 @@ rspamd_lua_parse_table_arguments (lua_State *L, gint pos,
lua_pop (L, 1);
}
break;
+ case 'O':
+ if (t != LUA_TNONE) {
+ *(va_arg (ap, ucl_object_t **)) = ucl_object_lua_import (L,
+ idx);
+ }
+ else {
+ failed = TRUE;
+ *(va_arg (ap, ucl_object_t **)) = NULL;
+ }
+
+ if (is_table) {
+ lua_pop (L, 1);
+ }
+ break;
case 'U':
if (t == LUA_TNIL || t == LUA_TNONE) {
failed = TRUE;
diff --git a/src/lua/lua_logger.c b/src/lua/lua_logger.c
index 462838ba1..a01cb8436 100644
--- a/src/lua/lua_logger.c
+++ b/src/lua/lua_logger.c
@@ -509,7 +509,7 @@ lua_logger_logx (lua_State *L, GLogLevelFlags level, gboolean is_string)
if (map) {
if (map->map) {
- uid = map->map->pool->tag.uid;
+ uid = map->map->tag;
}
else {
uid = "embedded";
diff --git a/src/lua/lua_map.c b/src/lua/lua_map.c
index c01088343..a74ee205c 100644
--- a/src/lua/lua_map.c
+++ b/src/lua/lua_map.c
@@ -271,7 +271,7 @@ lua_config_add_kv_map (lua_State *L)
static gchar *
-lua_map_read (rspamd_mempool_t *pool, gchar *chunk, gint len,
+lua_map_read (gchar *chunk, gint len,
struct map_cb_data *data,
gboolean final)
{
@@ -301,10 +301,13 @@ lua_map_read (rspamd_mempool_t *pool, gchar *chunk, gint len,
}
static void
-lua_map_fin (rspamd_mempool_t * pool, struct map_cb_data *data)
+lua_map_fin (struct map_cb_data *data)
{
struct lua_map_callback_data *cbdata;
struct rspamd_lua_map **pmap;
+ struct rspamd_map *map;
+
+ map = data->map;
if (data->prev_data) {
data->prev_data = NULL;
@@ -314,12 +317,12 @@ lua_map_fin (rspamd_mempool_t * pool, struct map_cb_data *data)
cbdata = (struct lua_map_callback_data *)data->cur_data;
}
else {
- msg_err_pool ("no data read for map");
+ msg_err_map ("no data read for map");
return;
}
if (cbdata->ref == -1) {
- msg_err_pool ("map has no callback set");
+ msg_err_map ("map has no callback set");
}
else if (cbdata->data != NULL && cbdata->data->len != 0) {
lua_rawgeti (cbdata->L, LUA_REGISTRYINDEX, cbdata->ref);
@@ -329,7 +332,7 @@ lua_map_fin (rspamd_mempool_t * pool, struct map_cb_data *data)
rspamd_lua_setclass (cbdata->L, "rspamd{map}", -1);
if (lua_pcall (cbdata->L, 2, 0, 0) != 0) {
- msg_info_pool ("call to %s failed: %s", "local function",
+ msg_info_map ("call to %s failed: %s", "local function",
lua_tostring (cbdata->L, -1));
lua_pop (cbdata->L, 1);
}
@@ -342,8 +345,9 @@ gint
lua_config_add_map (lua_State *L)
{
struct rspamd_config *cfg = lua_check_config (L, 1);
- const gchar *map_line = NULL, *description = NULL;
+ const char *description = NULL;
const gchar *type = NULL;
+ ucl_object_t *map_obj = NULL;
struct lua_map_callback_data *cbdata;
struct rspamd_lua_map *map, **pmap;
struct rspamd_map *m;
@@ -351,132 +355,22 @@ lua_config_add_map (lua_State *L)
GError *err = NULL;
if (cfg) {
- if (lua_type (L, 2) == LUA_TTABLE) {
- if (!rspamd_lua_parse_table_arguments (L, 2, &err,
- "*type=S;description=S;callback=F;*url=S",
- &type, &description, &cbidx, &map_line)) {
- ret = luaL_error (L, "invalid table arguments: %s", err->message);
- g_error_free (err);
-
- return ret;
- }
-
- g_assert (type != NULL && map_line != NULL);
-
- if (strcmp (type, "callback") == 0) {
- if (cbidx == -1) {
- ret = luaL_error (L, "invalid table arguments: callback missing");
- return ret;
- }
-
- map = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (*map));
- map->type = RSPAMD_LUA_MAP_CALLBACK;
- map->data.cbdata = rspamd_mempool_alloc0 (cfg->cfg_pool,
- sizeof (*map->data.cbdata));
- cbdata = map->data.cbdata;
- cbdata->L = L;
- cbdata->data = NULL;
- cbdata->lua_map = map;
- cbdata->ref = cbidx;
-
- if ((m = rspamd_map_add (cfg, map_line, description,
- lua_map_read, lua_map_fin,
- (void **)&map->data.cbdata)) == NULL) {
- msg_warn_config ("invalid map %s", map_line);
- luaL_unref (L, LUA_REGISTRYINDEX, cbidx);
- lua_pushnil (L);
+ if (!rspamd_lua_parse_table_arguments (L, 2, &err,
+ "*url=O;description=S;callback=F;type=S",
+ &map_obj, &description, &cbidx, &type)) {
+ ret = luaL_error (L, "invalid table arguments: %s", err->message);
+ g_error_free (err);
- return 1;
- }
- }
- else if (strcmp (type, "set") == 0) {
- map = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (*map));
- map->data.hash = g_hash_table_new (rspamd_strcase_hash,
- rspamd_strcase_equal);
- map->type = RSPAMD_LUA_MAP_SET;
-
- if ((m = rspamd_map_add (cfg, map_line, description,
- rspamd_hosts_read,
- rspamd_hosts_fin,
- (void **)&map->data.hash)) == NULL) {
- msg_warn_config ("invalid set map %s", map_line);
- g_hash_table_destroy (map->data.hash);
- lua_pushnil (L);
-
- return 1;
- }
- }
- else if (strcmp (type, "map") == 0) {
- map = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (*map));
- map->data.hash = g_hash_table_new (rspamd_strcase_hash,
- rspamd_strcase_equal);
- map->type = RSPAMD_LUA_MAP_HASH;
-
- if ((m = rspamd_map_add (cfg, map_line, description,
- rspamd_kv_list_read,
- rspamd_kv_list_fin,
- (void **)&map->data.hash)) == NULL) {
- msg_warn_config ("invalid hash map %s", map_line);
- g_hash_table_destroy (map->data.hash);
- lua_pushnil (L);
- return 1;
- }
- }
- else if (strcmp (type, "radix") == 0) {
- map = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (*map));
- map->data.radix = radix_create_compressed ();
- map->type = RSPAMD_LUA_MAP_RADIX;
-
- if ((m = rspamd_map_add (cfg, map_line, description,
- rspamd_radix_read,
- rspamd_radix_fin,
- (void **)&map->data.radix)) == NULL) {
- msg_warn_config ("invalid radix map %s", map_line);
- radix_destroy_compressed (map->data.radix);
- lua_pushnil (L);
- return 1;
- }
- }
- else if (strcmp (type, "regexp") == 0) {
- map = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (*map));
- map->data.re_map = NULL;
- map->type = RSPAMD_LUA_MAP_REGEXP;
-
- if ((m = rspamd_map_add (cfg, map_line, description,
- rspamd_regexp_list_read,
- rspamd_regexp_list_fin,
- (void **)&map->data.re_map)) == NULL) {
- msg_warn_config ("invalid regexp map %s", map_line);
- lua_pushnil (L);
- return 1;
- }
- }
- else {
- ret = luaL_error (L, "invalid arguments: unknown type '%s'", type);
+ return ret;
+ }
- return ret;
- }
+ g_assert (map_obj != NULL);
- map->map = m;
- pmap = lua_newuserdata (L, sizeof (void *));
- *pmap = map;
- rspamd_lua_setclass (L, "rspamd{map}", -1);
+ if (type == NULL) {
+ type = "callback";
}
- else {
- /*
- * Legacy format add_map(map_line, description, callback)
- */
- map_line = luaL_checkstring (L, 2);
-
- if (lua_gettop (L) == 4) {
- description = lua_tostring (L, 3);
- cbidx = 4;
- }
- else {
- description = NULL;
- cbidx = 3;
- }
+ if (strcmp (type, "callback") == 0) {
map = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (*map));
map->type = RSPAMD_LUA_MAP_CALLBACK;
map->data.cbdata = rspamd_mempool_alloc0 (cfg->cfg_pool,
@@ -485,38 +379,104 @@ lua_config_add_map (lua_State *L)
cbdata->L = L;
cbdata->data = NULL;
cbdata->lua_map = map;
+ cbdata->ref = cbidx;
+
+ if ((m = rspamd_map_add_from_ucl (cfg, map_obj, description,
+ lua_map_read, lua_map_fin,
+ (void **)&map->data.cbdata)) == NULL) {
+
+ if (cbidx != -1) {
+ luaL_unref (L, LUA_REGISTRYINDEX, cbidx);
+ }
+
+ lua_pushnil (L);
- if (lua_type (L, cbidx) == LUA_TFUNCTION) {
- lua_pushvalue (L, cbidx);
- /* Get a reference */
- cbdata->ref = luaL_ref (L, LUA_REGISTRYINDEX);
+ return 1;
}
- else {
- /*
- * Now we can create maps with delayed callbacks, to allow better
- * closures generation
- */
- cbdata->ref = -1;
+ }
+ else if (strcmp (type, "set") == 0) {
+ map = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (*map));
+ map->data.hash = g_hash_table_new (rspamd_strcase_hash,
+ rspamd_strcase_equal);
+ map->type = RSPAMD_LUA_MAP_SET;
+
+ if ((m = rspamd_map_add_from_ucl (cfg, map_obj, description,
+ rspamd_hosts_read,
+ rspamd_hosts_fin,
+ (void **)&map->data.hash)) == NULL) {
+ g_hash_table_destroy (map->data.hash);
+ lua_pushnil (L);
+ ucl_object_unref (map_obj);
+
+ return 1;
}
+ }
+ else if (strcmp (type, "map") == 0) {
+ map = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (*map));
+ map->data.hash = g_hash_table_new (rspamd_strcase_hash,
+ rspamd_strcase_equal);
+ map->type = RSPAMD_LUA_MAP_HASH;
+
+ if ((m = rspamd_map_add_from_ucl (cfg, map_obj, description,
+ rspamd_kv_list_read,
+ rspamd_kv_list_fin,
+ (void **)&map->data.hash)) == NULL) {
+ g_hash_table_destroy (map->data.hash);
+ lua_pushnil (L);
+ ucl_object_unref (map_obj);
- if ((m = rspamd_map_add (cfg, map_line, description,
- lua_map_read, lua_map_fin,
- (void **)&map->data.cbdata)) == NULL) {
- msg_warn_config ("invalid map %s", map_line);
+ return 1;
+ }
+ }
+ else if (strcmp (type, "radix") == 0) {
+ map = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (*map));
+ map->data.radix = radix_create_compressed ();
+ map->type = RSPAMD_LUA_MAP_RADIX;
+
+ if ((m = rspamd_map_add_from_ucl (cfg, map_obj, description,
+ rspamd_radix_read,
+ rspamd_radix_fin,
+ (void **)&map->data.radix)) == NULL) {
+ radix_destroy_compressed (map->data.radix);
lua_pushnil (L);
+ ucl_object_unref (map_obj);
+
+ return 1;
}
- else {
- map->map = m;
- pmap = lua_newuserdata (L, sizeof (void *));
- *pmap = map;
- rspamd_lua_setclass (L, "rspamd{map}", -1);
+ }
+ else if (strcmp (type, "regexp") == 0) {
+ map = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (*map));
+ map->data.re_map = NULL;
+ map->type = RSPAMD_LUA_MAP_REGEXP;
+
+ if ((m = rspamd_map_add_from_ucl (cfg, map_obj, description,
+ rspamd_regexp_list_read,
+ rspamd_regexp_list_fin,
+ (void **)&map->data.re_map)) == NULL) {
+ lua_pushnil (L);
+ ucl_object_unref (map_obj);
+
+ return 1;
}
}
+ else {
+ ret = luaL_error (L, "invalid arguments: unknown type '%s'", type);
+ ucl_object_unref (map_obj);
+
+ return ret;
+ }
+
+ map->map = m;
+ pmap = lua_newuserdata (L, sizeof (void *));
+ *pmap = map;
+ rspamd_lua_setclass (L, "rspamd{map}", -1);
}
else {
return luaL_error (L, "invalid arguments");
}
+ ucl_object_unref (map_obj);
+
return 1;
}
@@ -614,11 +574,17 @@ lua_map_is_signed (lua_State *L)
{
struct rspamd_lua_map *map = lua_check_map (L, 1);
gboolean ret = FALSE;
+ struct rspamd_map_backend *bk;
+ guint i;
if (map != NULL) {
if (map->map) {
- if (map->map->is_signed) {
- ret = TRUE;
+ for (i = 0; i < map->map->backends->len; i ++) {
+ bk = g_ptr_array_index (map->map->backends, i);
+ if (bk->is_signed) {
+ ret = TRUE;
+ break;
+ }
}
}
}
@@ -635,19 +601,28 @@ lua_map_get_proto (lua_State *L)
{
struct rspamd_lua_map *map = lua_check_map (L, 1);
const gchar *ret = "undefined";
+ struct rspamd_map_backend *bk;
+ guint i;
if (map != NULL) {
if ((map->flags & RSPAMD_LUA_MAP_FLAG_EMBEDDED) || map->map == NULL) {
ret = "embedded";
+ lua_pushstring (L, ret);
+
+ return 1;
}
else {
- switch (map->map->protocol) {
- case MAP_PROTO_FILE:
- ret = "file";
- break;
- case MAP_PROTO_HTTP:
- ret = "http";
- break;
+ for (i = 0; i < map->map->backends->len; i ++) {
+ bk = g_ptr_array_index (map->map->backends, i);
+ switch (bk->protocol) {
+ case MAP_PROTO_FILE:
+ ret = "file";
+ break;
+ case MAP_PROTO_HTTP:
+ ret = "http";
+ break;
+ }
+ lua_pushstring (L, ret);
}
}
}
@@ -655,14 +630,16 @@ lua_map_get_proto (lua_State *L)
return luaL_error (L, "invalid arguments");
}
- lua_pushstring (L, ret);
- return 1;
+
+ return map->map->backends->len;
}
static int
lua_map_get_sign_key (lua_State *L)
{
struct rspamd_lua_map *map = lua_check_map (L, 1);
+ struct rspamd_map_backend *bk;
+ guint i;
GString *ret = NULL;
if (map != NULL) {
@@ -671,33 +648,42 @@ lua_map_get_sign_key (lua_State *L)
return 1;
}
- if (map->map && map->map->trusted_pubkey) {
- ret = rspamd_pubkey_print (map->map->trusted_pubkey,
- RSPAMD_KEYPAIR_PUBKEY|RSPAMD_KEYPAIR_BASE32);
+ for (i = 0; i < map->map->backends->len; i ++) {
+ bk = g_ptr_array_index (map->map->backends, i);
+
+ if (bk->trusted_pubkey) {
+ ret = rspamd_pubkey_print (bk->trusted_pubkey,
+ RSPAMD_KEYPAIR_PUBKEY|RSPAMD_KEYPAIR_BASE32);
+ }
+ else {
+ ret = NULL;
+ }
+
+ if (ret) {
+ lua_pushlstring (L, ret->str, ret->len);
+ g_string_free (ret, TRUE);
+ }
+ else {
+ lua_pushnil (L);
+ }
}
}
else {
return luaL_error (L, "invalid arguments");
}
- if (ret) {
- lua_pushlstring (L, ret->str, ret->len);
- g_string_free (ret, TRUE);
- }
- else {
- lua_pushnil (L);
- }
-
- return 1;
+ return map->map->backends->len;
}
static int
lua_map_set_sign_key (lua_State *L)
{
struct rspamd_lua_map *map = lua_check_map (L, 1);
+ struct rspamd_map_backend *bk;
const gchar *pk_str;
struct rspamd_cryptobox_pubkey *pk;
gsize len;
+ guint i;
pk_str = lua_tolstring (L, 2, &len);
@@ -714,12 +700,17 @@ lua_map_set_sign_key (lua_State *L)
return luaL_error (L, "invalid pubkey string");
}
- if (map->map->trusted_pubkey) {
- /* Unref old pk */
- rspamd_pubkey_unref (map->map->trusted_pubkey);
+ for (i = 0; i < map->map->backends->len; i ++) {
+ bk = g_ptr_array_index (map->map->backends, i);
+ if (bk->trusted_pubkey) {
+ /* Unref old pk */
+ rspamd_pubkey_unref (bk->trusted_pubkey);
+ }
+
+ bk->trusted_pubkey = rspamd_pubkey_ref (pk);
}
- map->map->trusted_pubkey = pk;
+ rspamd_pubkey_unref (pk);
}
else {
return luaL_error (L, "invalid arguments");
@@ -753,21 +744,29 @@ lua_map_get_uri (lua_State *L)
{
struct rspamd_lua_map *map = lua_check_map (L, 1);
const gchar *ret = "undefined";
+ struct rspamd_map_backend *bk;
+ guint i;
if (map != NULL) {
if ((map->flags & RSPAMD_LUA_MAP_FLAG_EMBEDDED) || map->map == NULL) {
ret = "embedded";
+ lua_pushstring (L, ret);
+
+ return 1;
}
else {
- ret = map->map->uri;
+ for (i = 0; i < map->map->backends->len; i ++) {
+ bk = g_ptr_array_index (map->map->backends, i);
+ ret = bk->uri;
+ lua_pushstring (L, ret);
+ }
}
}
else {
return luaL_error (L, "invalid arguments");
}
- lua_pushstring (L, ret);
- return 1;
+ return map->map->backends->len;
}
void
diff --git a/src/lua/lua_task.c b/src/lua/lua_task.c
index 31b0aac76..7a756679b 100644
--- a/src/lua/lua_task.c
+++ b/src/lua/lua_task.c
@@ -23,7 +23,7 @@
#include "cfg_file.h"
#include "email_addr.h"
#include "utlist.h"
-#include "xxhash.h"
+#include "cryptobox.h"
/***
* @module rspamd_task
@@ -1568,7 +1568,8 @@ lua_task_str_to_get_type (lua_State *L, gint pos)
type = lua_tolstring (L, pos, &sz);
if (type && sz > 0) {
- h = XXH64 (type, sz, 0xdeadbabe);
+ h = rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_XXHASH64,
+ type, sz, 0xdeadbabe);
switch (h) {
case 0xDA081341FB600389ULL: /* mime */
diff --git a/src/plugins/dkim_check.c b/src/plugins/dkim_check.c
index 9cf89674e..3ea31485f 100644
--- a/src/plugins/dkim_check.c
+++ b/src/plugins/dkim_check.c
@@ -271,13 +271,14 @@ dkim_module_config (struct rspamd_config *cfg)
}
if ((value =
rspamd_config_get_module_opt (cfg, "dkim", "whitelist")) != NULL) {
+
str = ucl_obj_tostring (value);
- if (!rspamd_map_is_map (str)) {
+ if (str && !rspamd_map_is_map (str)) {
radix_add_generic_iplist (str,
&dkim_module_ctx->whitelist_ip);
}
else {
- rspamd_map_add (cfg, str,
+ rspamd_map_add_from_ucl (cfg, value,
"DKIM whitelist", rspamd_radix_read, rspamd_radix_fin,
(void **)&dkim_module_ctx->whitelist_ip);
@@ -285,7 +286,7 @@ dkim_module_config (struct rspamd_config *cfg)
}
if ((value =
rspamd_config_get_module_opt (cfg, "dkim", "domains")) != NULL) {
- if (!rspamd_map_add (cfg, ucl_obj_tostring (value),
+ if (!rspamd_map_add_from_ucl (cfg, value,
"DKIM domains", rspamd_kv_list_read, rspamd_kv_list_fin,
(void **)&dkim_module_ctx->dkim_domains)) {
msg_warn_config ("cannot load dkim domains list from %s",
@@ -297,7 +298,7 @@ dkim_module_config (struct rspamd_config *cfg)
}
if (!got_trusted && (value =
rspamd_config_get_module_opt (cfg, "dkim", "trusted_domains")) != NULL) {
- if (!rspamd_map_add (cfg, ucl_obj_tostring (value),
+ if (!rspamd_map_add_from_ucl (cfg, value,
"DKIM domains", rspamd_kv_list_read, rspamd_kv_list_fin,
(void **)&dkim_module_ctx->dkim_domains)) {
msg_warn_config ("cannot load dkim domains list from %s",
diff --git a/src/plugins/fuzzy_check.c b/src/plugins/fuzzy_check.c
index 385b8aadc..bf4cd3a0a 100644
--- a/src/plugins/fuzzy_check.c
+++ b/src/plugins/fuzzy_check.c
@@ -69,6 +69,8 @@ struct fuzzy_mime_type {
struct fuzzy_rule {
struct upstream_list *servers;
const gchar *symbol;
+ const gchar *algorithm_str;
+ enum rspamd_shingle_alg alg;
GHashTable *mappings;
GList *mime_types;
GList *fuzzy_headers;
@@ -363,6 +365,7 @@ fuzzy_parse_rule (struct rspamd_config *cfg, const ucl_object_t *obj, gint cb_id
rule = fuzzy_rule_new (fuzzy_module_ctx->default_symbol,
fuzzy_module_ctx->fuzzy_pool);
rule->learn_condition_cb = -1;
+ rule->alg = RSPAMD_SHINGLES_OLD;
if ((value = ucl_object_lookup (obj, "mime_types")) != NULL) {
it = NULL;
@@ -410,6 +413,46 @@ fuzzy_parse_rule (struct rspamd_config *cfg, const ucl_object_t *obj, gint cb_id
rule->skip_unknown = ucl_obj_toboolean (value);
}
+ if ((value = ucl_object_lookup (obj, "algorithm")) != NULL) {
+ rule->algorithm_str = ucl_object_tostring (value);
+
+ if (rule->algorithm_str) {
+ if (g_ascii_strcasecmp (rule->algorithm_str, "old") == 0 ||
+ g_ascii_strcasecmp (rule->algorithm_str, "siphash") == 0) {
+ rule->alg = RSPAMD_SHINGLES_OLD;
+ }
+ else if (g_ascii_strcasecmp (rule->algorithm_str, "xxhash") == 0) {
+ rule->alg = RSPAMD_SHINGLES_XXHASH;
+ }
+ else if (g_ascii_strcasecmp (rule->algorithm_str, "mumhash") == 0) {
+ rule->alg = RSPAMD_SHINGLES_MUMHASH;
+ }
+ else if (g_ascii_strcasecmp (rule->algorithm_str, "fasthash") == 0 ||
+ g_ascii_strcasecmp (rule->algorithm_str, "fast") == 0) {
+ rule->alg = RSPAMD_SHINGLES_FAST;
+ }
+ else {
+ msg_warn_config ("unknown algorithm: %s, use siphash by default");
+ }
+ }
+ }
+
+ /* Set a consistent and short string name */
+ switch (rule->alg) {
+ case RSPAMD_SHINGLES_OLD:
+ rule->algorithm_str = "sip";
+ break;
+ case RSPAMD_SHINGLES_XXHASH:
+ rule->algorithm_str = "xx";
+ break;
+ case RSPAMD_SHINGLES_MUMHASH:
+ rule->algorithm_str = "mum";
+ break;
+ case RSPAMD_SHINGLES_FAST:
+ rule->algorithm_str = "fast";
+ break;
+ }
+
if ((value = ucl_object_lookup (obj, "servers")) != NULL) {
rule->servers = rspamd_upstreams_create (cfg->ups_ctx);
@@ -832,12 +875,12 @@ fuzzy_check_module_config (struct rspamd_config *cfg)
str = ucl_obj_tostring (value);
- if (!rspamd_map_is_map (str)) {
+ if (str && !rspamd_map_is_map (str)) {
radix_add_generic_iplist (str,
&fuzzy_module_ctx->whitelist);
}
else {
- rspamd_map_add (cfg, str,
+ rspamd_map_add_from_ucl (cfg, value,
"Fuzzy whitelist", rspamd_radix_read, rspamd_radix_fin,
(void **)&fuzzy_module_ctx->whitelist);
@@ -1023,6 +1066,37 @@ fuzzy_cmd_from_task_meta (struct fuzzy_rule *rule,
return io;
}
+static void *
+fuzzy_cmd_get_cached (struct fuzzy_rule *rule,
+ rspamd_mempool_t *pool,
+ struct mime_text_part *part)
+{
+ gchar key[32];
+ gint key_part;
+
+ memcpy (&key_part, rule->shingles_key->str, sizeof (key_part));
+ rspamd_snprintf (key, sizeof (key), "%p%s%d", part, rule->algorithm_str,
+ key_part);
+
+ return rspamd_mempool_get_variable (pool, key);
+}
+
+static void
+fuzzy_cmd_set_cached (struct fuzzy_rule *rule,
+ rspamd_mempool_t *pool,
+ struct mime_text_part *part,
+ struct rspamd_fuzzy_encrypted_shingle_cmd *data)
+{
+ gchar key[32];
+ gint key_part;
+
+ memcpy (&key_part, rule->shingles_key->str, sizeof (key_part));
+ rspamd_snprintf (key, sizeof (key), "%p%s%d", part, rule->algorithm_str,
+ key_part);
+ /* Key is copied */
+ rspamd_mempool_set_variable (pool, key, data, NULL);
+}
+
/*
* Create fuzzy command from a text part
*/
@@ -1035,7 +1109,7 @@ fuzzy_cmd_from_text_part (struct fuzzy_rule *rule,
struct mime_text_part *part)
{
struct rspamd_fuzzy_shingle_cmd *shcmd;
- struct rspamd_fuzzy_encrypted_shingle_cmd *encshcmd;
+ struct rspamd_fuzzy_encrypted_shingle_cmd *encshcmd, *cached;
struct rspamd_shingle *sh;
guint i;
rspamd_cryptobox_hash_state_t st;
@@ -1043,40 +1117,56 @@ fuzzy_cmd_from_text_part (struct fuzzy_rule *rule,
GArray *words;
struct fuzzy_cmd_io *io;
- if (rule->peer_key) {
- encshcmd = rspamd_mempool_alloc0 (pool, sizeof (*encshcmd));
+ cached = fuzzy_cmd_get_cached (rule, pool, part);
+
+ if (cached) {
+ /* Copy cached */
+ encshcmd = rspamd_mempool_alloc (pool, sizeof (*encshcmd));
+ memcpy (encshcmd, cached, sizeof (*encshcmd));
shcmd = &encshcmd->cmd;
}
else {
- shcmd = rspamd_mempool_alloc0 (pool, sizeof (*shcmd));
- encshcmd = NULL;
- }
+ encshcmd = rspamd_mempool_alloc0 (pool, sizeof (*encshcmd));
+ shcmd = &encshcmd->cmd;
- /*
- * Generate hash from all words in the part
- */
- rspamd_cryptobox_hash_init (&st, rule->hash_key->str, rule->hash_key->len);
- words = fuzzy_preprocess_words (part, pool);
+ /*
+ * Generate hash from all words in the part
+ */
+ rspamd_cryptobox_hash_init (&st, rule->hash_key->str, rule->hash_key->len);
+ words = fuzzy_preprocess_words (part, pool);
- for (i = 0; i < words->len; i ++) {
- word = &g_array_index (words, rspamd_ftok_t, i);
- rspamd_cryptobox_hash_update (&st, word->begin, word->len);
- }
- rspamd_cryptobox_hash_final (&st, shcmd->basic.digest);
+ for (i = 0; i < words->len; i ++) {
+ word = &g_array_index (words, rspamd_ftok_t, i);
+ rspamd_cryptobox_hash_update (&st, word->begin, word->len);
+ }
+ rspamd_cryptobox_hash_final (&st, shcmd->basic.digest);
+
+ msg_debug_pool ("loading shingles of type %s with key %*xs",
+ rule->algorithm_str,
+ 16, rule->shingles_key->str);
+ sh = rspamd_shingles_generate (words,
+ rule->shingles_key->str, pool,
+ rspamd_shingles_default_filter, NULL,
+ rule->alg);
+ if (sh != NULL) {
+ memcpy (&shcmd->sgl, sh, sizeof (shcmd->sgl));
+ shcmd->basic.shingles_count = RSPAMD_SHINGLE_SIZE;
+ }
- msg_debug_pool ("loading shingles with key %*xs", 16,
- rule->shingles_key->str);
- sh = rspamd_shingles_generate (words,
- rule->shingles_key->str, pool,
- rspamd_shingles_default_filter, NULL);
- if (sh != NULL) {
- memcpy (&shcmd->sgl, sh, sizeof (shcmd->sgl));
- shcmd->basic.shingles_count = RSPAMD_SHINGLE_SIZE;
+ /*
+ * We always save encrypted command as it can handle both
+ * encrypted and unencrypted requests.
+ *
+ * Since it is copied when obtained from the cache, it is safe to use
+ * it this way.
+ */
+ fuzzy_cmd_set_cached (rule, pool, part, encshcmd);
}
shcmd->basic.tag = ottery_rand_uint32 ();
shcmd->basic.cmd = c;
shcmd->basic.version = RSPAMD_FUZZY_VERSION;
+
if (c != FUZZY_CHECK) {
shcmd->basic.flag = flag;
shcmd->basic.value = weight;
diff --git a/src/plugins/spf.c b/src/plugins/spf.c
index 153422f51..67c8732e7 100644
--- a/src/plugins/spf.c
+++ b/src/plugins/spf.c
@@ -213,12 +213,12 @@ spf_module_config (struct rspamd_config *cfg)
str = ucl_obj_tostring (value);
- if (!rspamd_map_is_map (str)) {
+ if (str && !rspamd_map_is_map (str)) {
radix_add_generic_iplist (str,
&spf_module_ctx->whitelist_ip);
}
else {
- rspamd_map_add (cfg, str,
+ rspamd_map_add_from_ucl (cfg, value,
"SPF whitelist", rspamd_radix_read, rspamd_radix_fin,
(void **)&spf_module_ctx->whitelist_ip);
diff --git a/src/plugins/surbl.c b/src/plugins/surbl.c
index 4dda832b3..87b8effa7 100644
--- a/src/plugins/surbl.c
+++ b/src/plugins/surbl.c
@@ -80,7 +80,7 @@ module_t surbl_module = {
};
static void
-exception_insert (gpointer st, gconstpointer key, gpointer value)
+exception_insert (gpointer st, gconstpointer key, gconstpointer value)
{
GHashTable **t = st;
gint level = 0;
@@ -103,27 +103,27 @@ exception_insert (gpointer st, gconstpointer key, gpointer value)
val = g_malloc (sizeof (rspamd_ftok_t));
val->begin = key;
val->len = strlen (key);
+
if (t[level] == NULL) {
t[level] = g_hash_table_new_full (rspamd_ftok_icase_hash,
rspamd_ftok_icase_equal,
g_free,
- NULL);
+ g_free);
}
- g_hash_table_insert (t[level], val, value);
+
+ g_hash_table_insert (t[level], val, g_strdup (value));
}
static gchar *
-read_exceptions_list (rspamd_mempool_t * pool,
- gchar * chunk,
+read_exceptions_list (gchar * chunk,
gint len,
struct map_cb_data *data,
gboolean final)
{
if (data->cur_data == NULL) {
- data->cur_data = rspamd_mempool_alloc0 (pool,
- sizeof (GHashTable *) * MAX_LEVELS);
+ data->cur_data = g_malloc (sizeof (GHashTable *) * MAX_LEVELS);
}
- return rspamd_parse_kv_list (pool,
+ return rspamd_parse_kv_list (
chunk,
len,
data,
@@ -133,7 +133,7 @@ read_exceptions_list (rspamd_mempool_t * pool,
}
static void
-fin_exceptions_list (rspamd_mempool_t * pool, struct map_cb_data *data)
+fin_exceptions_list (struct map_cb_data *data)
{
GHashTable **t;
gint i;
@@ -145,11 +145,12 @@ fin_exceptions_list (rspamd_mempool_t * pool, struct map_cb_data *data)
g_hash_table_destroy (t[i]);
}
}
+ g_free (t);
}
}
static void
-redirector_insert (gpointer st, gconstpointer key, gpointer value)
+redirector_insert (gpointer st, gconstpointer key, gconstpointer value)
{
GHashTable *tld_hash = st;
const gchar *p = key, *begin = key;
@@ -200,8 +201,7 @@ redirector_item_free (gpointer p)
}
static gchar *
-read_redirectors_list (rspamd_mempool_t * pool,
- gchar * chunk,
+read_redirectors_list (gchar * chunk,
gint len,
struct map_cb_data *data,
gboolean final)
@@ -217,17 +217,17 @@ read_redirectors_list (rspamd_mempool_t * pool,
data->cur_data = tld_hash;
}
- return rspamd_parse_kv_list (pool,
+ return rspamd_parse_kv_list (
chunk,
len,
data,
- (insert_func) redirector_insert,
+ redirector_insert,
"",
final);
}
void
-fin_redirectors_list (rspamd_mempool_t * pool, struct map_cb_data *data)
+fin_redirectors_list (struct map_cb_data *data)
{
GHashTable *tld_hash;
@@ -528,7 +528,7 @@ surbl_module_config (struct rspamd_config *cfg)
if ((value =
rspamd_config_get_module_opt (cfg, "surbl",
"redirector_hosts_map")) != NULL) {
- if (!rspamd_map_add (cfg, ucl_obj_tostring (value),
+ if (!rspamd_map_add_from_ucl (cfg, value,
"SURBL redirectors list", read_redirectors_list, fin_redirectors_list,
(void **)&surbl_module_ctx->redirector_map_data)) {
@@ -546,7 +546,7 @@ surbl_module_config (struct rspamd_config *cfg)
}
if ((value =
rspamd_config_get_module_opt (cfg, "surbl", "exceptions")) != NULL) {
- if (rspamd_map_add (cfg, ucl_obj_tostring (value),
+ if (rspamd_map_add_from_ucl (cfg, value,
"SURBL exceptions list", read_exceptions_list, fin_exceptions_list,
(void **)&surbl_module_ctx->exceptions)) {
surbl_module_ctx->tld2_file = rspamd_mempool_strdup (
@@ -556,7 +556,7 @@ surbl_module_config (struct rspamd_config *cfg)
}
if ((value =
rspamd_config_get_module_opt (cfg, "surbl", "whitelist")) != NULL) {
- if (rspamd_map_add (cfg, ucl_obj_tostring (value),
+ if (rspamd_map_add_from_ucl (cfg, value,
"SURBL whitelist", rspamd_hosts_read, rspamd_hosts_fin,
(void **)&surbl_module_ctx->whitelist)) {
surbl_module_ctx->whitelist_file = rspamd_mempool_strdup (
diff --git a/src/rspamd.c b/src/rspamd.c
index e67977cbd..72e676267 100644
--- a/src/rspamd.c
+++ b/src/rspamd.c
@@ -21,7 +21,7 @@
#include "libserver/worker_util.h"
#include "libserver/rspamd_control.h"
#include "ottery.h"
-#include "xxhash.h"
+#include "cryptobox.h"
#include "utlist.h"
#include "unix-std.h"
/* sysexits */
@@ -409,30 +409,30 @@ systemd_get_socket (struct rspamd_main *rspamd_main, gint number)
static inline uintptr_t
make_listen_key (struct rspamd_worker_bind_conf *cf)
{
- XXH64_state_t st;
+ rspamd_cryptobox_fast_hash_state_t st;
guint i, keylen;
guint8 *key;
rspamd_inet_addr_t *addr;
guint16 port;
- XXH64_reset (&st, rspamd_hash_seed ());
+ rspamd_cryptobox_fast_hash_init (&st, rspamd_hash_seed ());
if (cf->is_systemd) {
- XXH64_update (&st, "systemd", sizeof ("systemd"));
- XXH64_update (&st, &cf->cnt, sizeof (cf->cnt));
+ rspamd_cryptobox_fast_hash_update (&st, "systemd", sizeof ("systemd"));
+ rspamd_cryptobox_fast_hash_update (&st, &cf->cnt, sizeof (cf->cnt));
}
else {
- XXH64_update (&st, cf->name, strlen (cf->name));
+ rspamd_cryptobox_fast_hash_update (&st, cf->name, strlen (cf->name));
for (i = 0; i < cf->cnt; i ++) {
addr = g_ptr_array_index (cf->addrs, i);
key = rspamd_inet_address_get_radix_key (
addr, &keylen);
- XXH64_update (&st, key, keylen);
+ rspamd_cryptobox_fast_hash_update (&st, key, keylen);
port = rspamd_inet_address_get_port (addr);
- XXH64_update (&st, &port, sizeof (port));
+ rspamd_cryptobox_fast_hash_update (&st, &port, sizeof (port));
}
}
- return XXH64_digest (&st);
+ return rspamd_cryptobox_fast_hash_final (&st);
}
static void
@@ -959,7 +959,7 @@ rspamd_control_handler (gint fd, short what, gpointer arg)
static guint
rspamd_spair_hash (gconstpointer p)
{
- return XXH64 (p, PAIR_ID_LEN, rspamd_hash_seed ());
+ return rspamd_cryptobox_fast_hash (p, PAIR_ID_LEN, rspamd_hash_seed ());
}
static gboolean
diff --git a/src/rspamd_proxy.c b/src/rspamd_proxy.c
index 06192dafc..32a2937e8 100644
--- a/src/rspamd_proxy.c
+++ b/src/rspamd_proxy.c
@@ -69,6 +69,7 @@ struct rspamd_http_upstream {
struct rspamd_http_mirror {
gchar *name;
+ gchar *settings_id;
struct upstream_list *u;
struct rspamd_cryptobox_pubkey *key;
gdouble prob;
@@ -362,6 +363,11 @@ rspamd_proxy_parse_mirror (rspamd_mempool_t *pool,
lua_settop (L, 0);
}
+ elt = ucl_object_lookup_any (obj, "settings", "settings_id", NULL);
+ if (elt && ucl_object_type (elt) == UCL_STRING) {
+ up->settings_id = g_strdup (ucl_object_tostring (elt));
+ }
+
g_ptr_array_add (ctx->mirrors, up);
return TRUE;
@@ -902,6 +908,11 @@ proxy_open_mirror_connections (struct rspamd_proxy_session *session)
rspamd_http_message_remove_header (msg, "Content-Length");
rspamd_http_message_remove_header (msg, "Key");
+ if (m->settings_id != NULL) {
+ rspamd_http_message_remove_header (msg, "Settings-ID");
+ rspamd_http_message_add_header (msg, "Settings-ID", m->settings_id);
+ }
+
bk_conn->backend_conn = rspamd_http_connection_new (
NULL,
proxy_backend_mirror_error_handler,
diff --git a/test/lua/unit/url.lua b/test/lua/unit/url.lua
index 06082afe0..de274425d 100644
--- a/test/lua/unit/url.lua
+++ b/test/lua/unit/url.lua
@@ -17,8 +17,8 @@ context("URL check functions", function()
test("Extract urls from text", function()
local pool = mpool.create()
local cases = {
- {"test.com text", {"test.com", nil}},
- {" test.com text", {"test.com", nil}},
+ {"test.com", {"test.com", nil}},
+ {" test.com", {"test.com", nil}},
{"<test.com> text", {"test.com", nil}},
{"test.com. text", {"test.com", nil}},
{"mailto:A.User@example.com text", {"example.com", "A.User"}},
diff --git a/test/rspamd_shingles_test.c b/test/rspamd_shingles_test.c
index 3eed5eee5..971502c93 100644
--- a/test/rspamd_shingles_test.c
+++ b/test/rspamd_shingles_test.c
@@ -41,6 +41,7 @@ generate_fuzzy_words (gsize cnt, gsize max_len)
for (i = 0; i < cnt; i ++) {
wlen = ottery_rand_range (max_len) + 1;
+ /* wlen = max_len; */
w.len = wlen;
t = g_malloc (wlen);
@@ -81,7 +82,8 @@ free_fuzzy_words (GArray *ar)
}
static void
-test_case (gsize cnt, gsize max_len, gdouble perm_factor)
+test_case (gsize cnt, gsize max_len, gdouble perm_factor,
+ enum rspamd_shingle_alg alg)
{
GArray *input;
struct rspamd_shingle *sgl, *sgl_permuted;
@@ -91,32 +93,129 @@ test_case (gsize cnt, gsize max_len, gdouble perm_factor)
ottery_rand_bytes (key, sizeof (key));
input = generate_fuzzy_words (cnt, max_len);
- ts1 = rspamd_get_ticks ();
+ ts1 = rspamd_get_virtual_ticks ();
sgl = rspamd_shingles_generate (input, key, NULL,
- rspamd_shingles_default_filter, NULL);
- ts2 = rspamd_get_ticks ();
+ rspamd_shingles_default_filter, NULL, alg);
+ ts2 = rspamd_get_virtual_ticks ();
permute_vector (input, perm_factor);
sgl_permuted = rspamd_shingles_generate (input, key, NULL,
- rspamd_shingles_default_filter, NULL);
+ rspamd_shingles_default_filter, NULL, alg);
res = rspamd_shingles_compare (sgl, sgl_permuted);
- msg_debug ("percentage of common shingles: %.3f, generate time: %hd usec",
- res, (gint)(ts1 - ts2) * 1000);
- g_assert_cmpfloat (fabs ((1.0 - res) - sqrt (perm_factor)), <=, 0.20);
+ msg_info ("%d (%z words of %z max len, %.2f perm factor):"
+ " percentage of common shingles: %.3f, generate time: %.4f sec",
+ alg, cnt, max_len, perm_factor, res, ts2 - ts1);
+ //g_assert_cmpfloat (fabs ((1.0 - res) - sqrt (perm_factor)), <=, 0.25);
free_fuzzy_words (input);
g_free (sgl);
g_free (sgl_permuted);
}
+static const guint64 expected_old[RSPAMD_SHINGLE_SIZE] = {
+ 0x2a97e024235cedc5, 0x46238acbcc55e9e0, 0x2378ff151af075b3, 0xde1f29a95cad109,
+ 0x5d3bbbdb5db5d19f, 0x4d75a0ec52af10a6, 0x215ecd6372e755b5, 0x7b52295758295350,
+ 0x17387d1beddc7f62, 0x26264ca879ffcada, 0x49d4a65ec0ab9914, 0xa2763e6995350cf,
+ 0x3f4570231449c13f, 0x3309f857a0e54ee5, 0x24e4c5b561b0fce3, 0x1f153e3b275bfd1b,
+ 0x4d067dbc97c3fd78, 0x9ffa2d076fa4f8bc, 0x3d8907f84b9ffc6c, 0x1cfd664c5262d256,
+ 0xcdd7e744b699c15, 0x5544a2bbe05124f7, 0x5a4029b5d6a06f7, 0xd5adfbdc756c0e4,
+ 0xa504b23d9689a67e, 0x15d945f7007de115, 0xbf676c0522a2c51d, 0x1c8d8163ad4b0f93,
+ 0xa2c4ba20799344d7, 0x27c6f13c02134388, 0xa1d443d31fd5a3, 0x99fbca9f8563080,
+};
+
+static const guint64 expected_xxhash[RSPAMD_SHINGLE_SIZE] = {
+ 0x33b134be11a705a, 0x36e2ea657aa36903, 0x6547b57f7470ce9d, 0x8253eb6d2f8f158e,
+ 0x1cc99e3cf22388f, 0x2396da27ea36ffe8, 0x1b457d208ad3d96c, 0x2d6ac733d7a2c107,
+ 0x17849cbed75cc4d1, 0x4dd94e772330e804, 0x39f592fa32014ed4, 0xa2f6229ad356461,
+ 0x6dc825879a057b37, 0x886b12cef4338b05, 0x8b23af68c186518a, 0x16932b40339aaf02,
+ 0x412090c6bb0b719c, 0x4d4a88cbdf1935f3, 0x233bcbddb5f67a7, 0x474719442a33dcca,
+ 0x2da7ec30563e622, 0x7ab90086960e1ad2, 0x3ea2b45582539f75, 0x108cd9287d95a6c5,
+ 0x69ba7c67c115597, 0x10880860eb75e982, 0x16f3d90e6ab995a6, 0x5f24ea09379b9f5c,
+ 0x3c2dc04088e8fe54, 0x340b8cf1c6f1227, 0x193bc348ed2e9ce7, 0x68454ef43da9c748,
+};
+
+static const guint64 expected_mumhash[RSPAMD_SHINGLE_SIZE] = {
+ 0x38d35473b80a7fc3, 0x1300531adc2d16a1, 0x26883bc89f78f4bd, 0x57de365ef6d1a62,
+ 0x773603185fcbb20a, 0x39c6cbd7ebbeaa88, 0x676c7445ad167e70, 0x432315d1ecc4c0b1,
+ 0x1380b95756dbb078, 0x9ee12832fa53b90e, 0x72970be210f0dd0b, 0x62909bd520f5956,
+ 0x66196965a45eb32a, 0x2466a9ca5436620e, 0x157b828b10e10f6e, 0x429bb673a523a7e5,
+ 0x51a6ace94f320f88, 0x23f53a30bd7d7147, 0xbee557664d3bc34c, 0x65730c88cd212a9,
+ 0x87e72c0cd05fd0e, 0x417a744669baeb3d, 0x78e26f7917829324, 0x439777dcfc25fdf4,
+ 0x582eac6ff013f00b, 0x1e40aa90e367f4af, 0x301d14a28d6c23a2, 0x34140ecb21b6c69,
+ 0x390a091c8b4c31b9, 0x2e35fecf9fff0ae7, 0x94322e1a5cf31f1b, 0x33cb9190905e049a,
+};
+
+static const guint64 expected_fasthash[RSPAMD_SHINGLE_SIZE] = {
+ 0x3843a716f94828a6, 0x13fd5386dda3b28d, 0x71cb09de527c40a, 0x5d6f59ffd839c62,
+ 0x7ce3633acd568476, 0x9014298cbd00167, 0x6708ec29eedb5350, 0x2882931ff2c5c410,
+ 0x1839d8b947b12571, 0x58f7bc3829173302, 0x4dac8103da51abc4, 0x6c5cbcc6fb1de28,
+ 0x31fefcef9bafb755, 0x6f2d1a0b1feca401, 0x3e71f3718e520b06, 0x42f6ba11164ab231,
+ 0x21164d010bd76f4a, 0x4c597ccc7b60f620, 0x2cf1ca3383b77574, 0x54ff9c01660b8add,
+ 0x2ca344758f40380d, 0x1b962321bd37d0f2, 0x9323bb99c32bc418, 0x375659d0eef2b8f2,
+ 0x1dbd23a1030084b7, 0x83cb978dee06aa0a, 0x42c97be5b27a7763, 0x3b6d6b7270ed765,
+ 0x125c12fdba584aed, 0x1c826397afe58763, 0x8bdbe2d43f3eda96, 0x954cda70edf6591f,
+};
+
void
rspamd_shingles_test_func (void)
{
- //test_case (5, 100, 0.5);
- test_case (200, 10, 0.1);
- test_case (500, 20, 0.01);
- test_case (5000, 20, 0.01);
- test_case (5000, 15, 0);
- test_case (5000, 30, 1.0);
+ enum rspamd_shingle_alg alg = RSPAMD_SHINGLES_OLD;
+ struct rspamd_shingle *sgl;
+ guchar key[16];
+ GArray *input;
+ rspamd_ftok_t tok;
+ int i;
+
+ memset (key, 0, sizeof (key));
+ input = g_array_sized_new (FALSE, FALSE, sizeof (rspamd_ftok_t), 5);
+
+ for (i = 0; i < 5; i ++) {
+ gchar *b = g_alloca (8);
+ memset (b, 0, 8);
+ memcpy (b + 1, "test", 4);
+ b[0] = 'a' + i;
+ tok.begin = b;
+ tok.len = 5 + ((i + 1) % 4);
+ g_array_append_val (input, tok);
+ }
+
+ sgl = rspamd_shingles_generate (input, key, NULL,
+ rspamd_shingles_default_filter, NULL, RSPAMD_SHINGLES_OLD);
+ for (i = 0; i < RSPAMD_SHINGLE_SIZE; i ++) {
+ g_assert (sgl->hashes[i] == expected_old[i]);
+ }
+ g_free (sgl);
+
+ sgl = rspamd_shingles_generate (input, key, NULL,
+ rspamd_shingles_default_filter, NULL, RSPAMD_SHINGLES_XXHASH);
+ for (i = 0; i < RSPAMD_SHINGLE_SIZE; i ++) {
+ g_assert (sgl->hashes[i] == expected_xxhash[i]);
+ }
+ g_free (sgl);
+
+ sgl = rspamd_shingles_generate (input, key, NULL,
+ rspamd_shingles_default_filter, NULL, RSPAMD_SHINGLES_MUMHASH);
+ for (i = 0; i < RSPAMD_SHINGLE_SIZE; i ++) {
+ g_assert (sgl->hashes[i] == expected_mumhash[i]);
+ }
+ g_free (sgl);
+
+ sgl = rspamd_shingles_generate (input, key, NULL,
+ rspamd_shingles_default_filter, NULL, RSPAMD_SHINGLES_FAST);
+ for (i = 0; i < RSPAMD_SHINGLE_SIZE; i ++) {
+ g_assert (sgl->hashes[i] == expected_fasthash[i]);
+ }
+ g_free (sgl);
+
+ for (alg = RSPAMD_SHINGLES_OLD; alg <= RSPAMD_SHINGLES_FAST; alg ++) {
+ test_case (200, 10, 0.1, alg);
+ test_case (500, 20, 0.01, alg);
+ test_case (5000, 20, 0.01, alg);
+ test_case (5000, 15, 0, alg);
+ test_case (5000, 30, 1.0, alg);
+ test_case (50000, 30, 0.02, alg);
+ test_case (50000, 5, 0.02, alg);
+ test_case (50000, 16, 0.02, alg);
+ }
}
diff --git a/utils/rspamd_stats.pl b/utils/rspamd_stats.pl
new file mode 100644
index 000000000..efb89bab3
--- /dev/null
+++ b/utils/rspamd_stats.pl
@@ -0,0 +1,173 @@
+#!/usr/bin/env perl
+
+use Data::Dumper;
+use Getopt::Long;
+use warnings;
+use strict;
+use Time::Piece;
+
+my @symbols_search;
+my $start = "";
+my $end = "";
+my $reject_score = 30.0;
+my $junk_score = 7.5;
+my $log_file = "/var/log/rspamd/rspamd.log";
+my $dateformat = "%Y-%m-%d %H:%M:%S";
+
+GetOptions(
+ "reject-score=f" => \$reject_score,
+ "junk-score=f" => \$junk_score,
+ "start=s" => \$start,
+ "end=s" => \$end,
+ "symbol=s@" => \@symbols_search,
+ "log=s" => \$log_file,
+ "dateformat=s" => \$dateformat);
+
+# Global vars
+my $total = 0;
+my $total_spam = 0;
+my $total_junk = 0;
+my $junk_symbols = 0;
+my $spam_symbols = 0;
+my $ham_symbols = 0;
+my $ham_spam_change = 0;
+my $ham_junk_change = 0;
+my $diff_alpha = 0.1;
+my %sym_res;
+
+my $st = 0;
+my $ed = 0;
+
+if ($start ne "") {
+ $st = Time::Piece->strptime($start, $dateformat);
+}
+
+if ($end ne "") {
+ $ed = Time::Piece->strptime($end, $dateformat);
+}
+
+my $rspamd_log;
+
+if ($log_file eq '-') {
+ $rspamd_log = \*STDIN;
+}
+else {
+ open($rspamd_log, '<', $log_file) or die "cannot open $log_file";
+}
+
+foreach my $s (@symbols_search) {
+ $sym_res{$s} = {
+ hits => 0,
+ spam_hits => 0,
+ junk_hits => 0,
+ spam_change => 0,
+ junk_change => 0,
+ weight => 0,
+ };
+}
+
+while(<$rspamd_log>) {
+ if (/^.*rspamd_task_write_log.*$/) {
+ my @elts = split /\s+/;
+ my $ts = $elts[0] . ' ' . $elts[1];
+
+ if ($st or $ed) {
+ my $dt = Time::Piece->strptime($ts, $dateformat) or die "cannot parse $ts";
+
+ if ($dt) {
+ if ($st != 0 && $dt < $st) {
+ next;
+ }
+ if ($ed != 0 && $dt > $ed) {
+ next;
+ }
+ }
+ }
+
+ if ($_ !~ /\[(-?\d+(?:\.\d+)?)\/(-?\d+(?:\.\d+)?)\]\s+\[([^\]]+)\]/) {
+ #print "BAD\n";
+ next;
+ }
+
+ $total ++;
+ my $score = $1 * 1.0;
+
+ if ($score >= $reject_score) {
+ $total_spam ++;
+ }
+ elsif ($score >= $junk_score) {
+ $total_junk ++;
+ }
+
+ # Symbols
+ my @symbols = split /,/, $3;
+
+ foreach my $s (@symbols_search) {
+ my @selected = grep /$s/, @symbols;
+
+ if (scalar(@selected) > 0) {
+ $selected[0] =~ /^[^\(]+\(([^\)]+)\).*$/;
+ my $sym_score = $1;
+
+ if ($sym_score < $diff_alpha) {
+ next;
+ }
+
+ my $r = $sym_res{$s};
+ $r->{hits} ++;
+ $r->{weight} += $sym_score;
+ my $is_spam = 0;
+ my $is_junk = 0;
+
+ if ($score >= $reject_score) {
+ $is_spam = 1;
+ $r->{spam_hits} ++;
+ }
+ elsif ($score >= $junk_score) {
+ $is_junk = 1;
+ $r->{junk_hits} ++;
+ }
+
+ my $score_without = $score - $sym_score;
+
+ if ($is_spam && $score_without < $reject_score) {
+ $r->{spam_change} ++;
+ }
+ if ($is_junk && $score_without < $junk_score) {
+ $r->{junk_change} ++;
+ }
+ }
+ }
+ }
+}
+
+my $total_ham = $total - ($total_spam + $total_junk);
+
+if ($total > 0) {
+ while (my ($s, $r) = each(%sym_res)) {
+ if ($r->{hits} > 0) {
+ my $th = $r->{hits};
+ my $sh = $r->{spam_hits};
+ my $jh = $r->{junk_hits};
+ my $hh = $r->{hits} - $sh - $jh;
+ my $htp = $hh * 100.0 / $total_ham if $total_ham != 0;
+ my $stp = $sh * 100.0 / $total_spam if $total_spam != 0;
+ my $jtp = $jh * 100.0 / $total_junk if $total_junk != 0;
+ printf "Symbol: %s (weight %.3f) (%d hits, %.3f%%)\nHam hits: %d (%.3f%%), total ham: %d (ham with $s: %.3f%%)\nSpam hits: %d (%.3f%%), total spam: %d (spam with $s: %.3f%%)\nJunk hits: %d (%.3f%%), total junk: %d (junk with $s: %.3f%%)\n",
+ $s, $r->{weight} / $r->{hits}, $th, ($th / $total * 100.0),
+ $hh, ($hh / $th * 100.0), $total_ham, ($htp or 0),
+ $sh, ($sh / $th * 100.0), $total_spam, ($stp or 0),
+ $jh, ($jh / $th * 100.0), $total_junk, ($jtp or 0);
+ my $schp = $r->{spam_change} / $total_spam * 100.0 if $total_spam;
+ my $jchp = $r->{junk_change} / $total_junk * 100.0 if $total_junk;
+ printf "Spam changes (ham/junk -> spam): %d (%.3f%%), total percentage (changes / spam hits): %.3f%%\nJunk changes (ham -> junk): %d (%.3f%%), total percentage (changes / junk hits): %.3f%%\n",
+ $r->{spam_change}, ($r->{spam_change} / $th * 100.0), ($schp or 0),
+ $r->{junk_change}, ($r->{junk_change} / $th * 100.0), ($jchp or 0);
+ }
+ else {
+ print "Symbol $s has not been met\n";
+ }
+
+ print '*' x 20 . "\n";
+ }
+}