diff options
48 files changed, 2412 insertions, 795 deletions
diff --git a/config.h.in b/config.h.in index d858f65c6..f7bdedbfb 100644 --- a/config.h.in +++ b/config.h.in @@ -323,8 +323,14 @@ typedef off_t goffset; # define RSPAMD_ALIGNED(x) __declspec(align(x)) #elif defined(__GNUC__) # define RSPAMD_ALIGNED(x) __attribute__((aligned(x))) +#ifndef __clang__ +# define RSPAMD_OPTIMIZE(x) __attribute__((__optimize__ (x))) +#else +# define RSPAMD_OPTIMIZE(x) +#endif #else # define RSPAMD_ALIGNED(x) +# define RSPAMD_OPTIMIZE(x) #endif #endif diff --git a/contrib/metrohash/metro.h b/contrib/metrohash/metro.h new file mode 100644 index 000000000..36c44d65c --- /dev/null +++ b/contrib/metrohash/metro.h @@ -0,0 +1,141 @@ +/*- +The MIT License (MIT) + +Copyright (c) 2015 J. Andrew Rogers + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + */ +#ifndef CONTRIB_METROHASH_METRO_H_ +#define CONTRIB_METROHASH_METRO_H_ + +#include <stddef.h> +#include <stdlib.h> +#include <string.h> +#include <limits.h> +#ifdef _MSC_VER +typedef unsigned __int32 uint32_t; +typedef unsigned __int64 uint64_t; +#else +#include <stdint.h> +#endif + +/* rotate right idiom recognized by compiler*/ +inline static uint64_t rotate_right(uint64_t v, unsigned k) +{ + return (v >> k) | (v << (64 - k)); +} + +inline static uint64_t read_u64(const void * const ptr) +{ + return * (uint64_t *) ptr; +} + +inline static uint64_t read_u32(const void * const ptr) +{ + return * (uint32_t *) ptr; +} + +inline static uint64_t read_u16(const void * const ptr) +{ + return * (uint16_t *) ptr; +} + +inline static uint64_t read_u8 (const void * const ptr) +{ + return * (uint8_t *) ptr; +} + +static inline uint64_t metrohash64_1(const uint8_t * key, uint64_t len, uint64_t seed) +{ + static const uint64_t k0 = 0xC83A91E1; + static const uint64_t k1 = 0x8648DBDB; + static const uint64_t k2 = 0x7BDEC03B; + static const uint64_t k3 = 0x2F5870A5; + + const uint8_t * ptr = key; + const uint8_t * const end = ptr + len; + + uint64_t hash = ((((uint64_t) seed) + k2) * k0) + len; + + if (len >= 32) + { + uint64_t v[4]; + v[0] = hash; + v[1] = hash; + v[2] = hash; + v[3] = hash; + + do + { + v[0] += read_u64(ptr) * k0; ptr += 8; v[0] = rotate_right(v[0],29) + v[2]; + v[1] += read_u64(ptr) * k1; ptr += 8; v[1] = rotate_right(v[1],29) + v[3]; + v[2] += read_u64(ptr) * k2; ptr += 8; v[2] = rotate_right(v[2],29) + v[0]; + v[3] += read_u64(ptr) * k3; ptr += 8; v[3] = rotate_right(v[3],29) + v[1]; + } + while (ptr <= (end - 32)); + + v[2] ^= rotate_right(((v[0] + v[3]) * k0) + v[1], 33) * k1; + v[3] ^= rotate_right(((v[1] + v[2]) * k1) + v[0], 33) * k0; + v[0] ^= rotate_right(((v[0] + v[2]) * k0) + v[3], 33) * k1; + v[1] ^= rotate_right(((v[1] + v[3]) * k1) + v[2], 33) * k0; + hash += v[0] ^ v[1]; + } + + if ((end - ptr) >= 16) + { + uint64_t v0 = hash + (read_u64(ptr) * k0); ptr += 8; v0 = rotate_right(v0,33) * k1; + uint64_t v1 = hash + (read_u64(ptr) * k1); ptr += 8; v1 = rotate_right(v1,33) * k2; + v0 ^= rotate_right(v0 * k0, 35) + v1; + v1 ^= rotate_right(v1 * k3, 35) + v0; + hash += v1; + } + + if ((end - ptr) >= 8) + { + hash += read_u64(ptr) * k3; ptr += 8; + hash ^= rotate_right(hash, 33) * k1; + + } + + if ((end - ptr) >= 4) + { + hash += read_u32(ptr) * k3; ptr += 4; + hash ^= rotate_right(hash, 15) * k1; + } + + if ((end - ptr) >= 2) + { + hash += read_u16(ptr) * k3; ptr += 2; + hash ^= rotate_right(hash, 13) * k1; + } + + if ((end - ptr) >= 1) + { + hash += read_u8 (ptr) * k3; + hash ^= rotate_right(hash, 25) * k1; + } + + hash ^= rotate_right(hash, 33); + hash *= k0; + hash ^= rotate_right(hash, 33); + + return hash; +} + +#endif /* CONTRIB_METROHASH_METRO_H_ */ diff --git a/contrib/mumhash/mum.h b/contrib/mumhash/mum.h new file mode 100644 index 000000000..1daebf3de --- /dev/null +++ b/contrib/mumhash/mum.h @@ -0,0 +1,372 @@ +/* Copyright (c) 2016 Vladimir Makarov <vmakarov@gcc.gnu.org> + + Permission is hereby granted, free of charge, to any person + obtaining a copy of this software and associated documentation + files (the "Software"), to deal in the Software without + restriction, including without limitation the rights to use, copy, + modify, merge, publish, distribute, sublicense, and/or sell copies + of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ + +/* This file implements MUM (MUltiply and Mix) hashing. We randomize + input data by 64x64-bit multiplication and mixing hi- and low-parts + of the multiplication result by using an addition and then mix it + into the current state. We use prime numbers randomly generated + with the equal probability of their bit values for the + multiplication. When all primes are used once, the state is + randomized and the same prime numbers are used again for data + randomization. + + The MUM hashing passes all SMHasher tests. Pseudo Random Number + Generator based on MUM also passes NIST Statistical Test Suite for + Random and Pseudorandom Number Generators for Cryptographic + Applications (version 2.2.1) with 1000 bitstreams each containing + 1M bits. MUM hashing is also faster Spooky64 and City64 on small + strings (at least upto 512-bit) on Haswell and Power7. The MUM bulk + speed (speed on very long data) is bigger than Spooky and City on + Power7. On Haswell the bulk speed is bigger than Spooky one and + close to City speed. */ + +#ifndef __MUM_HASH__ +#define __MUM_HASH__ + +#include "config.h" +#include <stddef.h> +#include <stdlib.h> +#include <string.h> +#include <limits.h> +#ifdef _MSC_VER +typedef unsigned __int32 uint32_t; +typedef unsigned __int64 uint64_t; +#else +#include <stdint.h> +#endif + +#ifdef __GNUC__ +#define _MUM_ATTRIBUTE_UNUSED __attribute__((unused)) +#define _MUM_OPTIMIZE(opts) RSPAMD_OPTIMIZE(opts) +#define _MUM_TARGET(opts) +#else +#define _MUM_ATTRIBUTE_UNUSED +#define _MUM_OPTIMIZE(opts) +#define _MUM_TARGET(opts) +#endif + +/* Macro saying to use 128-bit integers implemented by GCC for some + targets. */ +#ifndef _MUM_USE_INT128 +/* In GCC uint128_t is defined if HOST_BITS_PER_WIDE_INT >= 64. + HOST_WIDE_INT is long if HOST_BITS_PER_LONG > HOST_BITS_PER_INT, + otherwise int. */ +#if defined(__GNUC__) && UINT_MAX != ULONG_MAX +#define _MUM_USE_INT128 1 +#else +#define _MUM_USE_INT128 0 +#endif +#endif + + +/* Here are different primes randomly generated with the equal + probability of their bit values. They are used to randomize input + values. */ +static uint64_t _mum_hash_step_prime = 0x2e0bb864e9ea7df5ULL; +static uint64_t _mum_key_step_prime = 0xcdb32970830fcaa1ULL; +static uint64_t _mum_block_start_prime = 0xc42b5e2e6480b23bULL; +static uint64_t _mum_unroll_prime = 0x7b51ec3d22f7096fULL; +static uint64_t _mum_tail_prime = 0xaf47d47c99b1461bULL; +static uint64_t _mum_finish_prime1 = 0xa9a7ae7ceff79f3fULL; +static uint64_t _mum_finish_prime2 = 0xaf47d47c99b1461bULL; + +static uint64_t _mum_primes [] = { + 0X9ebdcae10d981691, 0X32b9b9b97a27ac7d, 0X29b5584d83d35bbd, 0X4b04e0e61401255f, + 0X25e8f7b1f1c9d027, 0X80d4c8c000f3e881, 0Xbd1255431904b9dd, 0X8a3bd4485eee6d81, + 0X3bc721b2aad05197, 0X71b1a19b907d6e33, 0X525e6c1084a8534b, 0X9e4c2cd340c1299f, + 0Xde3add92e94caa37, 0X7e14eadb1f65311d, 0X3f5aa40f89812853, 0X33b15a3b587d15c9, +}; + +/* Multiply 64-bit V and P and return sum of high and low parts of the + result. */ +static inline uint64_t +_mum (uint64_t v, uint64_t p) { + uint64_t hi, lo; +#if _MUM_USE_INT128 +#if defined(__aarch64__) + /* AARCH64 needs 2 insns to calculate 128-bit result of the + multiplication. If we use a generic code we actually call a + function doing 128x128->128 bit multiplication. The function is + very slow. */ + lo = v * p, hi; + asm ("umulh %0, %1, %2" : "=r" (hi) : "r" (v), "r" (p)); +#else + __uint128_t r = (__uint128_t) v * (__uint128_t) p; + hi = (uint64_t) (r >> 64); + lo = (uint64_t) r; +#endif +#else + /* Implementation of 64x64->128-bit multiplication by four 32x32->64 + bit multiplication. */ + uint64_t hv = v >> 32, hp = p >> 32; + uint64_t lv = (uint32_t) v, lp = (uint32_t) p; + uint64_t rh = hv * hp; + uint64_t rm_0 = hv * lp; + uint64_t rm_1 = hp * lv; + uint64_t rl = lv * lp; + uint64_t t, carry = 0; + + /* We could ignore a carry bit here if we did not care about the + same hash for 32-bit and 64-bit targets. */ + t = rl + (rm_0 << 32); +#ifdef MUM_TARGET_INDEPENDENT_HASH + carry = t < rl; +#endif + lo = t + (rm_1 << 32); +#ifdef MUM_TARGET_INDEPENDENT_HASH + carry += lo < t; +#endif + hi = rh + (rm_0 >> 32) + (rm_1 >> 32) + carry; +#endif + /* We could use XOR here too but, for some reasons, on Haswell and + Power7 using an addition improves hashing performance by 10% for + small strings. */ + return hi + lo; +} + +#if defined(_MSC_VER) +#define _mum_bswap_64(x) _byteswap_uint64_t (x) +#elif defined(__APPLE__) +#include <libkern/OSByteOrder.h> +#define _mum_bswap_64(x) OSSwapInt64 (x) +#elif defined(__GNUC__) +#define _mum_bswap64(x) __builtin_bswap64 (x) +#else +#include <byteswap.h> +#define _mum_bswap64(x) bswap64 (x) +#endif + +static inline uint64_t +_mum_le (uint64_t v) { +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || !defined(MUM_TARGET_INDEPENDENT_HASH) + return v; +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + return _mum_bswap64 (v); +#else +#error "Unknown endianess" +#endif +} +/* Macro defining how many times the most nested loop in + _mum_hash_aligned will be unrolled by the compiler (although it can + make an own decision:). Use only a constant here to help a + compiler to unroll a major loop. + + The macro value affects the result hash for strings > 128 bit. The + unroll factor greatly affects the hashing speed. We prefer the + speed. */ +#ifndef _MUM_UNROLL_FACTOR_POWER +#if defined(__PPC64__) && !defined(MUM_TARGET_INDEPENDENT_HASH) +#define _MUM_UNROLL_FACTOR_POWER 3 +#elif defined(__aarch64__) && !defined(MUM_TARGET_INDEPENDENT_HASH) +#define _MUM_UNROLL_FACTOR_POWER 4 +#else +#define _MUM_UNROLL_FACTOR_POWER 2 +#endif +#endif + +#if _MUM_UNROLL_FACTOR_POWER < 1 +#error "too small unroll factor" +#elif _MUM_UNROLL_FACTOR_POWER > 4 +#error "We have not enough primes for such unroll factor" +#endif + +#define _MUM_UNROLL_FACTOR (1 << _MUM_UNROLL_FACTOR_POWER) + +static inline uint64_t _MUM_OPTIMIZE("unroll-loops") +_mum_hash_aligned (uint64_t start, const void *key, size_t len) { + uint64_t result = start; + const unsigned char *str = (const unsigned char *) key; + union {uint64_t i; unsigned char s[sizeof (uint64_t)];} p; + int i; + size_t n; + + result = _mum (result, _mum_block_start_prime); + while (len > _MUM_UNROLL_FACTOR * sizeof (uint64_t)) { + /* This loop could be vectorized when we have vector insns for + 64x64->128-bit multiplication. AVX2 currently only have a + vector insn for 4 32x32->64-bit multiplication. */ + for (i = 0; i < _MUM_UNROLL_FACTOR; i++) + result ^= _mum (_mum_le (((uint64_t *) str)[i]), _mum_primes[i]); + len -= _MUM_UNROLL_FACTOR * sizeof (uint64_t); + str += _MUM_UNROLL_FACTOR * sizeof (uint64_t); + /* We will use the same prime numbers on the next iterations -- + randomize the state. */ + result = _mum (result, _mum_unroll_prime); + } + n = len / sizeof (uint64_t); + for (i = 0; i < n; i++) + result ^= _mum (_mum_le (((uint64_t *) str)[i]), _mum_primes[i]); + len -= n * sizeof (uint64_t); str += n * sizeof (uint64_t); + if (len) { + p.i = 0; memcpy (p.s, str, len); + result ^= _mum (_mum_le (((uint64_t *) p.s)[0]), _mum_tail_prime); + } + return result; +} + +/* Final randomization of H. */ +static inline uint64_t +_mum_final (uint64_t h) { + h ^= _mum (h, _mum_finish_prime1); + h ^= _mum (h, _mum_finish_prime2); + return h; +} + +#if defined(__x86_64__) && defined(__GNUC__) +#if 0 +/* We want to use AVX2 insn MULX instead of generic x86-64 MULQ where + it is possible. Although on modern Intel processors MULQ takes + 3-cycles vs. 4 for MULX, MULX permits more freedom in insn + scheduling as it uses less fixed registers. */ +static inline uint64_t _MUM_TARGET("arch=haswell") +_mum_hash_avx2 (const void * key, size_t len, uint64_t seed) { + return _mum_final (_mum_hash_aligned (seed + len, key, len)); +} +#endif +#endif + +#ifndef _MUM_UNALIGNED_ACCESS +#if defined(__x86_64__) || defined(__i386__) || defined(__PPC64__) \ + || defined(__s390__) || defined(__m32c__) || defined(cris) \ + || defined(__CR16__) || defined(__vax__) || defined(__m68k__) \ + || defined(__aarch64__) +#define _MUM_UNALIGNED_ACCESS 1 +#else +#define _MUM_UNALIGNED_ACCESS 0 +#endif +#endif + +/* When we need an aligned access to data being hashed we move part of + the unaligned data to an aligned block of given size and then + process it, repeating processing the data by the block. */ +#ifndef _MUM_BLOCK_LEN +#define _MUM_BLOCK_LEN 1024 +#endif + +#if _MUM_BLOCK_LEN < 8 +#error "too small block length" +#endif + +static inline uint64_t +#if defined(__x86_64__) +_MUM_TARGET("inline-all-stringops") +#endif +_mum_hash_default (const void *key, size_t len, uint64_t seed) { + uint64_t result; + const unsigned char *str = (const unsigned char *) key; + size_t block_len; + uint64_t buf[_MUM_BLOCK_LEN / sizeof (uint64_t)]; + + result = seed + len; + if (_MUM_UNALIGNED_ACCESS || ((size_t) str & 0x7) == 0) + result = _mum_hash_aligned (result, key, len); + else { + while (len != 0) { + block_len = len < _MUM_BLOCK_LEN ? len : _MUM_BLOCK_LEN; + memcpy (buf, str, block_len); + result = _mum_hash_aligned (result, buf, block_len); + len -= block_len; + str += block_len; + } + } + return _mum_final (result); +} + +static inline uint64_t +_mum_next_factor (void) { + uint64_t start = 0; + int i; + + for (i = 0; i < 8; i++) + start = (start << 8) | rand() % 256; + return start; +} + +/* ++++++++++++++++++++++++++ Interface functions: +++++++++++++++++++ */ + +/* Set random multiplicators depending on SEED. */ +static inline void +mum_hash_randomize (uint64_t seed) { + int i; + + srand (seed); + _mum_hash_step_prime = _mum_next_factor (); + _mum_key_step_prime = _mum_next_factor (); + _mum_finish_prime1 = _mum_next_factor (); + _mum_finish_prime2 = _mum_next_factor (); + _mum_block_start_prime = _mum_next_factor (); + _mum_unroll_prime = _mum_next_factor (); + _mum_tail_prime = _mum_next_factor (); + for (i = 0; i < sizeof (_mum_primes) / sizeof (uint64_t); i++) + _mum_primes[i] = _mum_next_factor (); +} + +/* Start hashing data with SEED. Return the state. */ +static inline uint64_t +mum_hash_init (uint64_t seed) { + return seed; +} + +/* Process data KEY with the state H and return the updated state. */ +static inline uint64_t +mum_hash_step (uint64_t h, uint64_t key) +{ + return _mum (h, _mum_hash_step_prime) ^ _mum (key, _mum_key_step_prime); +} + +/* Return the result of hashing using the current state H. */ +static inline uint64_t +mum_hash_finish (uint64_t h) { + return _mum_final (h); +} + +/* Fast hashing of KEY with SEED. The hash is always the same for the + same key on any target. */ +static inline size_t +mum_hash64 (uint64_t key, uint64_t seed) { + return mum_hash_finish (mum_hash_step (mum_hash_init (seed), key)); +} + +/* Hash data KEY of length LEN and SEED. The hash depends on the + target endianess and the unroll factor. */ +static inline uint64_t +mum_hash (const void *key, size_t len, uint64_t seed) { +#if defined(__x86_64__) && defined(__GNUC__) +#if 0 + static int avx2_support = 0; + + if (avx2_support > 0) + return _mum_hash_avx2 (key, len, seed); + + else if (! avx2_support) { + __builtin_cpu_init (); + avx2_support = __builtin_cpu_supports ("avx2") ? 1 : -1; + if (avx2_support > 0) + return _mum_hash_avx2 (key, len, seed); + } +#endif +#endif + return _mum_hash_default (key, len, seed); +} + +#endif diff --git a/src/controller.c b/src/controller.c index 06782e850..8b16eda19 100644 --- a/src/controller.c +++ b/src/controller.c @@ -145,7 +145,7 @@ struct rspamd_controller_worker_ctx { /* SSL private key */ gchar *ssl_key; /* A map of secure IP */ - GList *secure_ip; + const ucl_object_t *secure_ip; radix_compressed_t *secure_map; /* Static files dir */ @@ -775,10 +775,10 @@ rspamd_controller_handle_maps (struct rspamd_http_connection_entry *conn_ent, struct rspamd_controller_session *session = conn_ent->ud; GList *cur, *tmp = NULL; struct rspamd_map *map; + struct rspamd_map_backend *bk; gboolean editable; ucl_object_t *obj, *top; - if (!rspamd_controller_check_password (conn_ent, session, msg, FALSE)) { return 0; } @@ -788,8 +788,10 @@ rspamd_controller_handle_maps (struct rspamd_http_connection_entry *conn_ent, cur = session->ctx->cfg->maps; while (cur) { map = cur->data; - if (map->protocol == MAP_PROTO_FILE) { - if (access (map->uri, R_OK) == 0) { + bk = g_ptr_array_index (map->backends, 0); + + if (bk->protocol == MAP_PROTO_FILE) { + if (access (bk->uri, R_OK) == 0) { tmp = g_list_prepend (tmp, map); } } @@ -799,7 +801,8 @@ rspamd_controller_handle_maps (struct rspamd_http_connection_entry *conn_ent, cur = tmp; while (cur) { map = cur->data; - editable = (access (map->uri, W_OK) == 0); + bk = g_ptr_array_index (map->backends, 0); + editable = (access (bk->uri, W_OK) == 0); obj = ucl_object_typed_new (UCL_OBJECT); ucl_object_insert_key (obj, ucl_object_fromint (map->id), @@ -808,7 +811,7 @@ rspamd_controller_handle_maps (struct rspamd_http_connection_entry *conn_ent, ucl_object_insert_key (obj, ucl_object_fromstring (map->description), "description", 0, false); } - ucl_object_insert_key (obj, ucl_object_fromstring (map->uri), + ucl_object_insert_key (obj, ucl_object_fromstring (bk->uri), "uri", 0, false); ucl_object_insert_key (obj, ucl_object_frombool (editable), "editable", 0, false); @@ -840,6 +843,7 @@ rspamd_controller_handle_get_map (struct rspamd_http_connection_entry *conn_ent, struct rspamd_controller_session *session = conn_ent->ud; GList *cur; struct rspamd_map *map; + struct rspamd_map_backend *bk; const rspamd_ftok_t *idstr; struct stat st; gint fd; @@ -870,7 +874,8 @@ rspamd_controller_handle_get_map (struct rspamd_http_connection_entry *conn_ent, cur = session->ctx->cfg->maps; while (cur) { map = cur->data; - if (map->id == id && map->protocol == MAP_PROTO_FILE) { + bk = g_ptr_array_index (map->backends, 0); + if (map->id == id && bk->protocol == MAP_PROTO_FILE) { found = TRUE; break; } @@ -883,8 +888,10 @@ rspamd_controller_handle_get_map (struct rspamd_http_connection_entry *conn_ent, return 0; } - if (stat (map->uri, &st) == -1 || (fd = open (map->uri, O_RDONLY)) == -1) { - msg_err_session ("cannot open map %s: %s", map->uri, strerror (errno)); + bk = g_ptr_array_index (map->backends, 0); + + if (stat (bk->uri, &st) == -1 || (fd = open (bk->uri, O_RDONLY)) == -1) { + msg_err_session ("cannot open map %s: %s", bk->uri, strerror (errno)); rspamd_controller_send_error (conn_ent, 500, "500 map open error"); return 0; } @@ -898,7 +905,7 @@ rspamd_controller_handle_get_map (struct rspamd_http_connection_entry *conn_ent, if (read (fd, reply->body->str, st.st_size) == -1) { close (fd); rspamd_http_message_free (reply); - msg_err_session ("cannot read map %s: %s", map->uri, strerror (errno)); + msg_err_session ("cannot read map %s: %s", bk->uri, strerror (errno)); rspamd_controller_send_error (conn_ent, 500, "500 map read error"); return 0; } @@ -1800,6 +1807,7 @@ rspamd_controller_handle_savemap (struct rspamd_http_connection_entry *conn_ent, struct rspamd_controller_session *session = conn_ent->ud; GList *cur; struct rspamd_map *map; + struct rspamd_map_backend *bk; struct rspamd_controller_worker_ctx *ctx; const rspamd_ftok_t *idstr; gulong id; @@ -1838,7 +1846,8 @@ rspamd_controller_handle_savemap (struct rspamd_http_connection_entry *conn_ent, cur = ctx->cfg->maps; while (cur) { map = cur->data; - if (map->id == id && map->protocol == MAP_PROTO_FILE) { + bk = g_ptr_array_index (map->backends, 0); + if (map->id == id && bk->protocol == MAP_PROTO_FILE) { found = TRUE; break; } @@ -1851,24 +1860,24 @@ rspamd_controller_handle_savemap (struct rspamd_http_connection_entry *conn_ent, return 0; } - if (g_atomic_int_get (map->locked)) { - msg_info_session ("map locked: %s", map->uri); + bk = g_ptr_array_index (map->backends, 0); + if (g_atomic_int_compare_and_exchange (map->locked, 0, 1)) { + msg_info_session ("map locked: %s", bk->uri); rspamd_controller_send_error (conn_ent, 404, "Map is locked"); return 0; } /* Set lock */ - g_atomic_int_set (map->locked, 1); - fd = open (map->uri, O_WRONLY | O_TRUNC); + fd = open (bk->uri, O_WRONLY | O_TRUNC); if (fd == -1) { g_atomic_int_set (map->locked, 0); - msg_info_session ("map %s open error: %s", map->uri, strerror (errno)); + msg_info_session ("map %s open error: %s", bk->uri, strerror (errno)); rspamd_controller_send_error (conn_ent, 404, "Map id not found"); return 0; } if (write (fd, msg->body_buf.begin, msg->body_buf.len) == -1) { - msg_info_session ("map %s write error: %s", map->uri, strerror (errno)); + msg_info_session ("map %s write error: %s", bk->uri, strerror (errno)); close (fd); g_atomic_int_set (map->locked, 0); rspamd_controller_send_error (conn_ent, 500, "Map write error"); @@ -1877,7 +1886,7 @@ rspamd_controller_handle_savemap (struct rspamd_http_connection_entry *conn_ent, msg_info_session ("<%s>, map %s saved", rspamd_inet_address_to_string (session->from_addr), - map->uri); + bk->uri); /* Close and unlock */ close (fd); g_atomic_int_set (map->locked, 0); @@ -2481,7 +2490,7 @@ init_controller_worker (struct rspamd_config *cfg) rspamd_rcl_register_worker_option (cfg, type, "secure_ip", - rspamd_rcl_parse_struct_string_list, + rspamd_rcl_parse_struct_ucl, ctx, G_STRUCT_OFFSET (struct rspamd_controller_worker_ctx, secure_ip), 0, @@ -2490,7 +2499,7 @@ init_controller_worker (struct rspamd_config *cfg) rspamd_rcl_register_worker_option (cfg, type, "trusted_ips", - rspamd_rcl_parse_struct_string_list, + rspamd_rcl_parse_struct_ucl, ctx, G_STRUCT_OFFSET (struct rspamd_controller_worker_ctx, secure_ip), 0, @@ -2536,12 +2545,12 @@ void start_controller_worker (struct rspamd_worker *worker) { struct rspamd_controller_worker_ctx *ctx = worker->ctx; - GList *cur; struct module_ctx *mctx; GHashTableIter iter; gpointer key, value; struct rspamd_keypair_cache *cache; - gchar *secure_ip; + const ucl_object_t *cur; + ucl_object_iter_t it = NULL; gpointer m; ctx->ev_base = rspamd_prepare_worker (worker, @@ -2556,26 +2565,31 @@ start_controller_worker (struct rspamd_worker *worker) ctx->custom_commands = g_hash_table_new (rspamd_strcase_hash, rspamd_strcase_equal); if (ctx->secure_ip != NULL) { - cur = ctx->secure_ip; - - while (cur) { - secure_ip = cur->data; - /* Try map syntax */ - if (!rspamd_map_is_map (secure_ip)) { - if (!radix_add_generic_iplist (secure_ip, - &ctx->secure_map)) { - msg_warn_ctx ("cannot load or parse ip list from '%s'", - secure_ip); + if (ucl_object_type (ctx->secure_ip) == UCL_ARRAY) { + while ((cur = ucl_object_iterate (ctx->secure_ip, &it, true)) != NULL) { + /* Try map syntax */ + if (ucl_object_type (cur) == UCL_STRING && + !rspamd_map_is_map (ucl_object_tostring (cur))) { + if (!radix_add_generic_iplist (ucl_object_tostring (cur), + &ctx->secure_map)) { + msg_warn_ctx ("cannot load or parse ip list from '%s'", + ucl_object_tostring (cur)); + } + } + else { + rspamd_map_add_from_ucl (worker->srv->cfg, cur, + "Allow webui access from the specified IP", + rspamd_radix_read, rspamd_radix_fin, + (void **)&ctx->secure_map); } } - else { - rspamd_map_add (worker->srv->cfg, secure_ip, + } + else { + rspamd_map_add_from_ucl (worker->srv->cfg, ctx->secure_ip, "Allow webui access from the specified IP", rspamd_radix_read, rspamd_radix_fin, (void **)&ctx->secure_map); - } - cur = g_list_next (cur); } } diff --git a/src/fuzzy_storage.c b/src/fuzzy_storage.c index f43cb3f44..7803307b0 100644 --- a/src/fuzzy_storage.c +++ b/src/fuzzy_storage.c @@ -209,6 +209,7 @@ rspamd_fuzzy_process_updates_queue (struct rspamd_fuzzy_storage_ctx *ctx) struct rspamd_fuzzy_cmd *cmd; gpointer ptr; guint nupdates = 0; + time_t now = time (NULL); if (ctx->updates_pending && g_queue_get_length (ctx->updates_pending) > 0 && @@ -227,7 +228,7 @@ rspamd_fuzzy_process_updates_queue (struct rspamd_fuzzy_storage_ctx *ctx) } if (cmd->cmd == FUZZY_WRITE) { - rspamd_fuzzy_backend_add (ctx->backend, ptr); + rspamd_fuzzy_backend_add (ctx->backend, ptr, now); } else { rspamd_fuzzy_backend_del (ctx->backend, ptr); @@ -248,7 +249,8 @@ rspamd_fuzzy_process_updates_queue (struct rspamd_fuzzy_storage_ctx *ctx) } g_queue_clear (ctx->updates_pending); - msg_info ("updated fuzzy storage: %ud updates processed", nupdates); + msg_info ("updated fuzzy storage: %ud updates processed, version: %d", + nupdates, rspamd_fuzzy_backend_version (ctx->backend)); } else { msg_err ("cannot commit update transaction to fuzzy backend, " diff --git a/src/libcryptobox/cryptobox.c b/src/libcryptobox/cryptobox.c index 1680f1848..71180da12 100644 --- a/src/libcryptobox/cryptobox.c +++ b/src/libcryptobox/cryptobox.c @@ -31,7 +31,10 @@ #include "catena/catena.h" #include "ottery.h" #include "printf.h" - +#include "xxhash.h" +#define MUM_TARGET_INDEPENDENT_HASH 1 /* For 32/64 bit equal hashes */ +#include "../../contrib/mumhash/mum.h" +#include "../../contrib/metrohash/metro.h" #ifdef HAVE_CPUID_H #include <cpuid.h> #endif @@ -1408,3 +1411,101 @@ void rspamd_cryptobox_hash (guchar *out, rspamd_cryptobox_hash_update (&st, data, len); rspamd_cryptobox_hash_final (&st, out); } + + +void +rspamd_cryptobox_fast_hash_init (rspamd_cryptobox_fast_hash_state_t *st, + guint64 seed) +{ +#if defined(__LP64__) || defined(_LP64) + XXH64_state_t *rst = (XXH64_state_t *)st; + XXH64_reset (rst, seed); +#else + XXH32_state_t *rst = (XXH32_state_t *)st; + XXH32_reset (rst, seed); +#endif +} + +void +rspamd_cryptobox_fast_hash_update (rspamd_cryptobox_fast_hash_state_t *st, + const void *data, gsize len) +{ +#if defined(__LP64__) || defined(_LP64) + XXH64_state_t *rst = (XXH64_state_t *)st; + XXH64_update (rst, data, len); +#else + XXH32_state_t *rst = (XXH32_state_t *)st; + XXH32_update (rst, data, len); +#endif +} + +guint64 +rspamd_cryptobox_fast_hash_final (rspamd_cryptobox_fast_hash_state_t *st) +{ +#if defined(__LP64__) || defined(_LP64) + XXH64_state_t *rst = (XXH64_state_t *)st; + return XXH64_digest (rst); +#else + XXH32_state_t *rst = (XXH32_state_t *)st; + XXH32_digest (rst); +#endif + +} + +/** + * One in all function + */ +static inline guint64 +rspamd_cryptobox_fast_hash_machdep (const void *data, + gsize len, guint64 seed) +{ + if (len >= 8 && len % 8 == 0) { + return mum_hash (data, len, seed); + } + else { +#if defined(__LP64__) || defined(_LP64) + return metrohash64_1 (data, len, seed); +#endif + } + + return XXH32 (data, len, seed); +} + +static inline guint64 +rspamd_cryptobox_fast_hash_indep (const void *data, + gsize len, guint64 seed) +{ + if (len >= 8 && len % 8 == 0) { + return mum_hash (data, len, seed); + } + + return metrohash64_1 (data, len, seed); +} + +guint64 +rspamd_cryptobox_fast_hash (const void *data, + gsize len, guint64 seed) +{ + return rspamd_cryptobox_fast_hash_machdep (data, len, seed); +} + +guint64 +rspamd_cryptobox_fast_hash_specific ( + enum rspamd_cryptobox_fast_hash_type type, + const void *data, + gsize len, guint64 seed) +{ + switch (type) { + case RSPAMD_CRYPTOBOX_XXHASH32: + return XXH32 (data, len, seed); + case RSPAMD_CRYPTOBOX_XXHASH64: + return XXH64 (data, len, seed); + case RSPAMD_CRYPTOBOX_MUMHASH: + return mum_hash (data, len, seed); + case RSPAMD_CRYPTOBOX_HASHFAST_INDEPENDENT: + return rspamd_cryptobox_fast_hash_indep (data, len, seed); + case RSPAMD_CRYPTOBOX_HASHFAST: + default: + return rspamd_cryptobox_fast_hash_machdep (data, len, seed); + } +} diff --git a/src/libcryptobox/cryptobox.h b/src/libcryptobox/cryptobox.h index 9631bd8d0..6facf0a0e 100644 --- a/src/libcryptobox/cryptobox.h +++ b/src/libcryptobox/cryptobox.h @@ -324,4 +324,50 @@ void rspamd_cryptobox_hash (guchar *out, const guchar *key, gsize keylen); +/* Non crypto hash IUF interface */ +typedef struct RSPAMD_ALIGNED(32) rspamd_cryptobox_fast_hash_state_s { + unsigned char opaque[88]; +} rspamd_cryptobox_fast_hash_state_t; + +/** + * Init cryptobox hash state using key if needed, `st` must point to the buffer + * with at least rspamd_cryptobox_HASHSTATEBYTES bytes length. If keylen == 0, then + * non-keyed hash is generated + */ +void rspamd_cryptobox_fast_hash_init (rspamd_cryptobox_fast_hash_state_t *st, + guint64 seed); + +/** + * Update hash with data portion + */ +void rspamd_cryptobox_fast_hash_update (rspamd_cryptobox_fast_hash_state_t *st, + const void *data, gsize len); + +/** + * Output hash to the buffer of rspamd_cryptobox_HASHBYTES length + */ +guint64 rspamd_cryptobox_fast_hash_final (rspamd_cryptobox_fast_hash_state_t *st); + +/** + * One in all function + */ +guint64 rspamd_cryptobox_fast_hash (const void *data, + gsize len, guint64 seed); + +enum rspamd_cryptobox_fast_hash_type { + RSPAMD_CRYPTOBOX_XXHASH64 = 0, + RSPAMD_CRYPTOBOX_XXHASH32, + RSPAMD_CRYPTOBOX_MUMHASH, + RSPAMD_CRYPTOBOX_METROHASH, + RSPAMD_CRYPTOBOX_HASHFAST, + RSPAMD_CRYPTOBOX_HASHFAST_INDEPENDENT +}; +/** + * Platform independent version + */ +guint64 rspamd_cryptobox_fast_hash_specific ( + enum rspamd_cryptobox_fast_hash_type type, + const void *data, + gsize len, guint64 seed); + #endif /* CRYPTOBOX_H_ */ diff --git a/src/libcryptobox/keypairs_cache.c b/src/libcryptobox/keypairs_cache.c index 069158789..887aaa85d 100644 --- a/src/libcryptobox/keypairs_cache.c +++ b/src/libcryptobox/keypairs_cache.c @@ -18,7 +18,6 @@ #include "keypairs_cache.h" #include "keypair_private.h" #include "hash.h" -#include "xxhash.h" struct rspamd_keypair_elt { struct rspamd_cryptobox_nm *nm; @@ -43,7 +42,8 @@ rspamd_keypair_hash (gconstpointer ptr) { struct rspamd_keypair_elt *elt = (struct rspamd_keypair_elt *)ptr; - return XXH64 (elt->pair, sizeof (elt->pair), rspamd_hash_seed ()); + return rspamd_cryptobox_fast_hash (elt->pair, sizeof (elt->pair), + rspamd_hash_seed ()); } static gboolean diff --git a/src/libmime/filter.c b/src/libmime/filter.c index e1a33f3e2..6aaa19aaf 100644 --- a/src/libmime/filter.c +++ b/src/libmime/filter.c @@ -19,7 +19,7 @@ #include "rspamd.h" #include "message.h" #include "lua/lua_common.h" -#include "xxhash.h" +#include "cryptobox.h" #include <math.h> @@ -273,7 +273,8 @@ rspamd_action_from_str (const gchar *data, gint *result) { guint64 h; - h = XXH64 (data, strlen (data), 0xdeadbabe); + h = rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_XXHASH64, + data, strlen (data), 0xdeadbabe); switch (h) { case 0x9917BFDB46332B8CULL: /* reject */ diff --git a/src/libmime/message.c b/src/libmime/message.c index 6c4004f61..791bd6837 100644 --- a/src/libmime/message.c +++ b/src/libmime/message.c @@ -24,7 +24,7 @@ #include "email_addr.h" #include "utlist.h" #include "tokenizers/tokenizers.h" -#include "xxhash.h" +#include "cryptobox.h" #ifdef WITH_SNOWBALL #include "libstemmer.h" @@ -42,6 +42,7 @@ static const gchar gtube_pattern[] = "XJS*C4JDBQADN1.NSBN3*2IDNEN*" "GTUBE-STANDARD-ANTI-UBE-TEST-EMAIL*C.34X"; static rspamd_regexp_t *utf_compatible_re = NULL; +static const guint64 words_hash_seed = 0xdeadbabe; static GQuark rspamd_message_quark (void) @@ -1074,7 +1075,12 @@ rspamd_normalize_text_part (struct rspamd_task *task, } if (w->len > 0) { - h = XXH64 (w->begin, w->len, rspamd_hash_seed ()); + /* + * We use static hash seed if we would want to use that in shingles + * computation in future + */ + h = rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_HASHFAST_INDEPENDENT, + w->begin, w->len, words_hash_seed); g_array_append_val (part->normalized_hashes, h); } } @@ -1094,7 +1100,7 @@ rspamd_words_levenshtein_distance (struct rspamd_task *task, { guint s1len, s2len, x, y, lastdiag, olddiag; guint *column, ret; - guint64 *h1, *h2; + guint64 h1, h2; gint eq; static const guint max_words = 8192; @@ -1118,9 +1124,9 @@ rspamd_words_levenshtein_distance (struct rspamd_task *task, for (y = 1, lastdiag = x - 1; y <= s1len; y++) { olddiag = column[y]; - h1 = &g_array_index (w1, guint64, y - 1); - h2 = &g_array_index (w2, guint64, x - 1); - eq = h1 == h2; + h1 = g_array_index (w1, guint64, y - 1); + h2 = g_array_index (w2, guint64, x - 1); + eq = (h1 == h2) ? 1 : 0; /* * Cost of replacement is twice higher than cost of add/delete * to calculate percentage properly @@ -1262,7 +1268,6 @@ process_text_part (struct rspamd_task *task, type, text_part); text_part->orig = part_content; - rspamd_url_text_extract (task->task_pool, task, text_part, FALSE); g_ptr_array_add (task->text_parts, text_part); } else { @@ -1304,6 +1309,10 @@ process_text_part (struct rspamd_task *task, c = p + 1; } } + + if (!IS_PART_HTML (text_part)) { + rspamd_url_text_extract (task->task_pool, task, text_part, FALSE); + } } struct mime_foreach_data { diff --git a/src/libserver/cfg_file.h b/src/libserver/cfg_file.h index cd6d25683..e14fbd90a 100644 --- a/src/libserver/cfg_file.h +++ b/src/libserver/cfg_file.h @@ -277,7 +277,7 @@ struct rspamd_config { gchar *pid_file; /**< name of pid file */ gchar *temp_dir; /**< dir for temp files */ gchar *control_socket_path; /**< path to the control socket */ - gchar *local_addrs; /**< tree of local addresses */ + const ucl_object_t *local_addrs; /**< tree of local addresses */ #ifdef WITH_GPERF_TOOLS gchar *profile_path; #endif @@ -353,7 +353,6 @@ struct rspamd_config { gint clock_res; /**< resolution of clock used */ GList *maps; /**< maps active */ - rspamd_mempool_t *map_pool; /**< static maps pool */ gdouble map_timeout; /**< maps watch timeout */ struct symbols_cache *cache; /**< symbols cache object */ diff --git a/src/libserver/cfg_rcl.c b/src/libserver/cfg_rcl.c index b90c1116b..f774ac126 100644 --- a/src/libserver/cfg_rcl.c +++ b/src/libserver/cfg_rcl.c @@ -1894,7 +1894,7 @@ rspamd_rcl_config_init (struct rspamd_config *cfg) "Limit of files count in `cores_dir`"); rspamd_rcl_add_default_handler (sub, "local_addrs", - rspamd_rcl_parse_struct_string, + rspamd_rcl_parse_struct_ucl, G_STRUCT_OFFSET (struct rspamd_config, local_addrs), 0, "Use the specified addresses as local ones"); @@ -2976,13 +2976,13 @@ static guint rspamd_worker_param_key_hash (gconstpointer p) { const struct rspamd_worker_param_key *k = p; - XXH64_state_t st; + rspamd_cryptobox_fast_hash_state_t st; - XXH64_reset (&st, rspamd_hash_seed ()); - XXH64_update (&st, k->name, strlen (k->name)); - XXH64_update (&st, &k->ptr, sizeof (gpointer)); + rspamd_cryptobox_fast_hash_init (&st, rspamd_hash_seed ()); + rspamd_cryptobox_fast_hash_update (&st, k->name, strlen (k->name)); + rspamd_cryptobox_fast_hash_update (&st, &k->ptr, sizeof (gpointer)); - return XXH64_digest (&st); + return rspamd_cryptobox_fast_hash_final (&st); } static gboolean diff --git a/src/libserver/cfg_utils.c b/src/libserver/cfg_utils.c index 8cc54c792..85fd6af80 100644 --- a/src/libserver/cfg_utils.c +++ b/src/libserver/cfg_utils.c @@ -42,13 +42,11 @@ struct rspamd_ucl_map_cbdata { struct rspamd_config *cfg; GString *buf; }; -static gchar * rspamd_ucl_read_cb (rspamd_mempool_t * pool, - gchar * chunk, +static gchar * rspamd_ucl_read_cb (gchar * chunk, gint len, struct map_cb_data *data, gboolean final); -static void rspamd_ucl_fin_cb (rspamd_mempool_t * pool, - struct map_cb_data *data); +static void rspamd_ucl_fin_cb (struct map_cb_data *data); gboolean rspamd_parse_bind_line (struct rspamd_config *cfg, @@ -1135,8 +1133,7 @@ rspamd_config_check_statfiles (struct rspamd_classifier_config *cf) } static gchar * -rspamd_ucl_read_cb (rspamd_mempool_t * pool, - gchar * chunk, +rspamd_ucl_read_cb (gchar * chunk, gint len, struct map_cb_data *data, gboolean final) @@ -1157,7 +1154,7 @@ rspamd_ucl_read_cb (rspamd_mempool_t * pool, } static void -rspamd_ucl_fin_cb (rspamd_mempool_t * pool, struct map_cb_data *data) +rspamd_ucl_fin_cb (struct map_cb_data *data) { struct rspamd_ucl_map_cbdata *cbdata = data->cur_data, *prev = data->prev_data; @@ -1180,34 +1177,26 @@ rspamd_ucl_fin_cb (rspamd_mempool_t * pool, struct map_cb_data *data) return; } - checksum = XXH64 (cbdata->buf->str, cbdata->buf->len, 0); - if (data->map->checksum != checksum) { - /* New data available */ - parser = ucl_parser_new (0); - if (!ucl_parser_add_chunk (parser, cbdata->buf->str, + checksum = rspamd_cryptobox_fast_hash (cbdata->buf->str, cbdata->buf->len, 0); + /* New data available */ + parser = ucl_parser_new (0); + if (!ucl_parser_add_chunk (parser, cbdata->buf->str, cbdata->buf->len)) { - msg_err_config ("cannot parse map %s: %s", - data->map->uri, + msg_err_config ("cannot parse map %s: %s", + data->map->name, ucl_parser_get_error (parser)); - ucl_parser_free (parser); - } - else { - obj = ucl_parser_get_object (parser); - ucl_parser_free (parser); - it = NULL; - - while ((cur = ucl_object_iterate (obj, &it, true))) { - ucl_object_replace_key (cbdata->cfg->rcl_obj, (ucl_object_t *)cur, - cur->key, cur->keylen, false); - } - ucl_object_unref (obj); - data->map->checksum = checksum; - } + ucl_parser_free (parser); } else { - msg_info_config ("do not reload map %s, checksum is the same: %d", - data->map->uri, - checksum); + obj = ucl_parser_get_object (parser); + ucl_parser_free (parser); + it = NULL; + + while ((cur = ucl_object_iterate (obj, &it, true))) { + ucl_object_replace_key (cbdata->cfg->rcl_obj, (ucl_object_t *)cur, + cur->key, cur->keylen, false); + } + ucl_object_unref (obj); } } diff --git a/src/libserver/dynamic_cfg.c b/src/libserver/dynamic_cfg.c index 3a76f20f9..d31418588 100644 --- a/src/libserver/dynamic_cfg.c +++ b/src/libserver/dynamic_cfg.c @@ -134,8 +134,7 @@ apply_dynamic_conf (const ucl_object_t *top, struct rspamd_config *cfg) /* Callbacks for reading json dynamic rules */ static gchar * -json_config_read_cb (rspamd_mempool_t * pool, - gchar * chunk, +json_config_read_cb (gchar * chunk, gint len, struct map_cb_data *data, gboolean final) @@ -167,7 +166,7 @@ json_config_read_cb (rspamd_mempool_t * pool, } static void -json_config_fin_cb (rspamd_mempool_t * pool, struct map_cb_data *data) +json_config_fin_cb (struct map_cb_data *data) { struct config_json_buf *jb; ucl_object_t *top; diff --git a/src/libserver/events.c b/src/libserver/events.c index a8cb086cb..44a5d9191 100644 --- a/src/libserver/events.c +++ b/src/libserver/events.c @@ -16,7 +16,7 @@ #include "config.h" #include "rspamd.h" #include "events.h" -#include "xxhash.h" +#include "cryptobox.h" #define RSPAMD_SESSION_FLAG_WATCHING (1 << 0) #define RSPAMD_SESSION_FLAG_DESTROYING (1 << 1) @@ -81,7 +81,7 @@ static guint rspamd_event_hash (gconstpointer a) { const struct rspamd_async_event *ev = a; - XXH64_state_t st; + rspamd_cryptobox_fast_hash_state_t st; union { event_finalizer_t f; gpointer p; @@ -89,11 +89,11 @@ rspamd_event_hash (gconstpointer a) u.f = ev->fin; - XXH64_reset (&st, rspamd_hash_seed ()); - XXH64_update (&st, &ev->user_data, sizeof (gpointer)); - XXH64_update (&st, &u, sizeof (u)); + rspamd_cryptobox_fast_hash_init (&st, rspamd_hash_seed ()); + rspamd_cryptobox_fast_hash_update (&st, &ev->user_data, sizeof (gpointer)); + rspamd_cryptobox_fast_hash_update (&st, &u, sizeof (u)); - return XXH64_digest (&st); + return rspamd_cryptobox_fast_hash_final (&st); } diff --git a/src/libserver/fuzzy_backend.c b/src/libserver/fuzzy_backend.c index a13d27f09..26e595e5f 100644 --- a/src/libserver/fuzzy_backend.c +++ b/src/libserver/fuzzy_backend.c @@ -93,6 +93,7 @@ enum rspamd_fuzzy_statement_idx { RSPAMD_FUZZY_BACKEND_EXPIRE, RSPAMD_FUZZY_BACKEND_VACUUM, RSPAMD_FUZZY_BACKEND_DELETE_ORPHANED, + RSPAMD_FUZZY_BACKEND_VERSION, RSPAMD_FUZZY_BACKEND_MAX }; static struct rspamd_fuzzy_stmts { @@ -212,6 +213,13 @@ static struct rspamd_fuzzy_stmts { .stmt = NULL, .result = SQLITE_DONE }, + { + .idx = RSPAMD_FUZZY_BACKEND_VERSION, + .sql = "PRAGMA user_version;", + .args = "", + .stmt = NULL, + .result = SQLITE_ROW + }, }; static GQuark @@ -624,7 +632,8 @@ rspamd_fuzzy_backend_prepare_update (struct rspamd_fuzzy_backend *backend) gboolean rspamd_fuzzy_backend_add (struct rspamd_fuzzy_backend *backend, - const struct rspamd_fuzzy_cmd *cmd) + const struct rspamd_fuzzy_cmd *cmd, + time_t timestamp) { int rc, i; gint64 id, flag; @@ -680,7 +689,7 @@ rspamd_fuzzy_backend_add (struct rspamd_fuzzy_backend *backend, (gint) cmd->flag, cmd->digest, (gint64) cmd->value, - (gint64) time (NULL)); + (gint64) timestamp); if (rc == SQLITE_OK) { if (cmd->shingles_count > 0) { @@ -722,7 +731,9 @@ rspamd_fuzzy_backend_add (struct rspamd_fuzzy_backend *backend, gboolean rspamd_fuzzy_backend_finish_update (struct rspamd_fuzzy_backend *backend) { - gint rc, wal_frames, wal_checkpointed; + gint rc, wal_frames, wal_checkpointed, ver; + gint64 version = 0; + gchar version_buf[128]; rc = rspamd_fuzzy_backend_run_stmt (backend, TRUE, RSPAMD_FUZZY_BACKEND_TRANSACTION_COMMIT); @@ -745,6 +756,17 @@ rspamd_fuzzy_backend_finish_update (struct rspamd_fuzzy_backend *backend) } } + /* Get and update version */ + ver = rspamd_fuzzy_backend_version (backend); + ++ver; + rspamd_snprintf (version_buf, sizeof (version_buf), "PRAGMA user_version=%d;", + ver); + + if (sqlite3_exec (backend->db, version_buf, NULL, NULL, NULL) != SQLITE_OK) { + msg_err_fuzzy_backend ("cannot set database version to %L: %s", + version, sqlite3_errmsg (backend->db)); + } + return TRUE; } @@ -953,6 +975,24 @@ rspamd_fuzzy_backend_count (struct rspamd_fuzzy_backend *backend) return 0; } +gint +rspamd_fuzzy_backend_version (struct rspamd_fuzzy_backend *backend) +{ + gint ret = 0; + + if (backend) { + if (rspamd_fuzzy_backend_run_stmt (backend, FALSE, + RSPAMD_FUZZY_BACKEND_VERSION) == SQLITE_OK) { + ret = sqlite3_column_int64 ( + prepared_stmts[RSPAMD_FUZZY_BACKEND_VERSION].stmt, 0); + } + + rspamd_fuzzy_backend_cleanup_stmt (backend, RSPAMD_FUZZY_BACKEND_VERSION); + } + + return ret; +} + gsize rspamd_fuzzy_backend_expired (struct rspamd_fuzzy_backend *backend) { diff --git a/src/libserver/fuzzy_backend.h b/src/libserver/fuzzy_backend.h index 04da9bbba..bcd199d1a 100644 --- a/src/libserver/fuzzy_backend.h +++ b/src/libserver/fuzzy_backend.h @@ -56,7 +56,8 @@ gboolean rspamd_fuzzy_backend_prepare_update (struct rspamd_fuzzy_backend *backe */ gboolean rspamd_fuzzy_backend_add ( struct rspamd_fuzzy_backend *backend, - const struct rspamd_fuzzy_cmd *cmd); + const struct rspamd_fuzzy_cmd *cmd, + time_t timestamp); /** * Delete digest from the database @@ -89,6 +90,7 @@ gboolean rspamd_fuzzy_backend_sync (struct rspamd_fuzzy_backend *backend, void rspamd_fuzzy_backend_close (struct rspamd_fuzzy_backend *backend); gsize rspamd_fuzzy_backend_count (struct rspamd_fuzzy_backend *backend); +gint rspamd_fuzzy_backend_version (struct rspamd_fuzzy_backend *backend); gsize rspamd_fuzzy_backend_expired (struct rspamd_fuzzy_backend *backend); const gchar * rspamd_fuzzy_backend_id (struct rspamd_fuzzy_backend *backend); diff --git a/src/libserver/protocol.c b/src/libserver/protocol.c index a394ddaff..d314d3fdc 100644 --- a/src/libserver/protocol.c +++ b/src/libserver/protocol.c @@ -23,7 +23,7 @@ #include "http.h" #include "email_addr.h" #include "worker_private.h" -#include "xxhash.h" +#include "cryptobox.h" /* Max line size */ #define OUTBUFSIZ BUFSIZ @@ -401,7 +401,8 @@ rspamd_protocol_handle_headers (struct rspamd_task *task, guint64 h; guint32 *hp; - h = XXH64 (hv_tok->begin, hv_tok->len, 0xdeadbabe); + h = rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_XXHASH64, + hv_tok->begin, hv_tok->len, 0xdeadbabe); hp = rspamd_mempool_alloc (task->task_pool, sizeof (*hp)); memcpy (hp, &h, sizeof (*hp)); rspamd_mempool_set_variable (task->task_pool, "settings_hash", diff --git a/src/libserver/re_cache.c b/src/libserver/re_cache.c index 5882114f9..3e308415d 100644 --- a/src/libserver/re_cache.c +++ b/src/libserver/re_cache.c @@ -15,7 +15,6 @@ */ #include "libmime/message.h" #include "re_cache.h" -#include "xxhash.h" #include "cryptobox.h" #include "ref.h" #include "libserver/url.h" @@ -123,16 +122,16 @@ rspamd_re_cache_class_id (enum rspamd_re_type type, gpointer type_data, gsize datalen) { - XXH64_state_t st; + rspamd_cryptobox_fast_hash_state_t st; - XXH64_reset (&st, 0xdeadbabe); - XXH64_update (&st, &type, sizeof (type)); + rspamd_cryptobox_fast_hash_init (&st, 0xdeadbabe); + rspamd_cryptobox_fast_hash_update (&st, &type, sizeof (type)); if (datalen > 0) { - XXH64_update (&st, type_data, datalen); + rspamd_cryptobox_fast_hash_update (&st, type_data, datalen); } - return XXH64_digest (&st); + return rspamd_cryptobox_fast_hash_final (&st); } static void @@ -1174,7 +1173,8 @@ rspamd_re_cache_type_from_string (const char *str) */ if (str != NULL) { - h = XXH64 (str, strlen (str), 0xdeadbabe); + h = rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_XXHASH64, + str, strlen (str), 0xdeadbabe); switch (h) { case G_GUINT64_CONSTANT(0x298b9c8a58887d44): /* header */ diff --git a/src/libserver/task.c b/src/libserver/task.c index 3d6387b52..ce95b927d 100644 --- a/src/libserver/task.c +++ b/src/libserver/task.c @@ -26,6 +26,11 @@ #include "utlist.h" #include <math.h> +/* + * Do not print more than this amount of elts + */ +static const int max_log_elts = 7; + static GQuark rspamd_task_quark (void) { @@ -752,7 +757,7 @@ rspamd_task_log_metric_res (struct rspamd_task *task, rspamd_fstring_t *symbuf; struct symbol *sym; GPtrArray *sorted_symbols; - guint i; + guint i, j; mres = g_hash_table_lookup (task->results, DEFAULT_METRIC); @@ -810,8 +815,16 @@ rspamd_task_log_metric_res (struct rspamd_task *task, rspamd_printf_fstring (&symbuf, "{"); + j = 0; + for (cur = sym->options; cur != NULL; cur = g_list_next (cur)) { rspamd_printf_fstring (&symbuf, "%s;", cur->data); + + if (j >= max_log_elts) { + rspamd_printf_fstring (&symbuf, "...;"); + break; + } + j ++; } rspamd_printf_fstring (&symbuf, "}"); @@ -892,6 +905,7 @@ rspamd_task_write_ialist (struct rspamd_task *task, lim = internet_address_list_length (ialist); } + varbuf = rspamd_fstring_new (); for (i = 0; i < lim; i++) { @@ -908,6 +922,11 @@ rspamd_task_write_ialist (struct rspamd_task *task, varbuf = rspamd_fstring_append (varbuf, ",", 1); } } + + if (i >= max_log_elts) { + varbuf = rspamd_fstring_append (varbuf, "...", 3); + break; + } } if (varbuf->len > 0) { @@ -951,6 +970,11 @@ rspamd_task_write_addr_list (struct rspamd_task *task, varbuf = rspamd_fstring_append (varbuf, ",", 1); } } + + if (i >= max_log_elts) { + varbuf = rspamd_fstring_append (varbuf, "...", 3); + break; + } } if (varbuf->len > 0) { diff --git a/src/libserver/url.c b/src/libserver/url.c index fe70585be..70a5f3c9b 100644 --- a/src/libserver/url.c +++ b/src/libserver/url.c @@ -1795,10 +1795,9 @@ url_tld_end (struct url_callback_data *cb, { const gchar *p; - /* A url must be finished by tld, so it must be followed by space character */ p = pos + match->m_len; - if (p == cb->end || g_ascii_isspace (*p) || *p == ',') { + if (p == cb->end) { match->m_len = p - match->m_begin; return TRUE; } @@ -2302,7 +2301,7 @@ rspamd_url_text_extract (rspamd_mempool_t *pool, { struct rspamd_url_mimepart_cbdata mcbd; - if (part->content == NULL || part->content->len == 0) { + if (part->stripped_content == NULL || part->stripped_content->len == 0) { msg_warn_task ("got empty text part"); return; } @@ -2310,8 +2309,8 @@ rspamd_url_text_extract (rspamd_mempool_t *pool, mcbd.task = task; mcbd.part = part; - rspamd_url_find_multiple (task->task_pool, part->content->data, - part->content->len, is_html, + rspamd_url_find_multiple (task->task_pool, part->stripped_content->data, + part->stripped_content->len, is_html, rspamd_url_text_part_callback, &mcbd); /* Handle offsets of this part */ diff --git a/src/libstat/tokenizers/osb.c b/src/libstat/tokenizers/osb.c index 906c1de25..c2e050f23 100644 --- a/src/libstat/tokenizers/osb.c +++ b/src/libstat/tokenizers/osb.c @@ -19,7 +19,6 @@ #include "tokenizers.h" #include "stat_internal.h" -#include "xxhash.h" #include "cryptobox.h" /* Size for features pipe */ @@ -280,7 +279,8 @@ rspamd_tokenizer_osb (struct rspamd_stat_ctx *ctx, window_size = osb_cf->window_size; if (prefix) { - seed = XXH64 (prefix, strlen (prefix), osb_cf->seed); + seed = rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_XXHASH64, + prefix, strlen (prefix), osb_cf->seed); } else { seed = osb_cf->seed; @@ -300,7 +300,8 @@ rspamd_tokenizer_osb (struct rspamd_stat_ctx *ctx, else { /* We know that the words are normalized */ if (osb_cf->ht == RSPAMD_OSB_HASH_XXHASH) { - cur = XXH64 (token->begin, token->len, osb_cf->seed); + cur = rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_XXHASH64, + token->begin, token->len, osb_cf->seed); } else { rspamd_cryptobox_siphash ((guchar *)&cur, token->begin, diff --git a/src/libutil/addr.c b/src/libutil/addr.c index 8fc85e6c7..18414a98c 100644 --- a/src/libutil/addr.c +++ b/src/libutil/addr.c @@ -17,7 +17,7 @@ #include "addr.h" #include "util.h" #include "logger.h" -#include "xxhash.h" +#include "cryptobox.h" #include "radix.h" #include "unix-std.h" /* pwd and grp */ @@ -1352,28 +1352,28 @@ guint rspamd_inet_address_hash (gconstpointer a) { const rspamd_inet_addr_t *addr = a; - XXH64_state_t st; + rspamd_cryptobox_fast_hash_state_t st; - XXH64_reset (&st, rspamd_hash_seed ()); - XXH64_update (&st, &addr->af, sizeof (addr->af)); + rspamd_cryptobox_fast_hash_init (&st, rspamd_hash_seed ()); + rspamd_cryptobox_fast_hash_update (&st, &addr->af, sizeof (addr->af)); if (addr->af == AF_UNIX && addr->u.un) { - XXH64_update (&st, addr->u.un, sizeof (*addr->u.un)); + rspamd_cryptobox_fast_hash_update (&st, addr->u.un, sizeof (*addr->u.un)); } else { /* We ignore port part here */ if (addr->af == AF_INET) { - XXH64_update (&st, &addr->u.in.addr.s4.sin_addr, + rspamd_cryptobox_fast_hash_update (&st, &addr->u.in.addr.s4.sin_addr, sizeof (addr->u.in.addr.s4.sin_addr)); } else { - XXH64_update (&st, &addr->u.in.addr.s6.sin6_addr, + rspamd_cryptobox_fast_hash_update (&st, &addr->u.in.addr.s6.sin6_addr, sizeof (addr->u.in.addr.s6.sin6_addr)); } } - return XXH64_digest (&st); + return rspamd_cryptobox_fast_hash_final (&st); } gboolean diff --git a/src/libutil/bloom.c b/src/libutil/bloom.c index 8c8b80ae6..2447b1b10 100644 --- a/src/libutil/bloom.c +++ b/src/libutil/bloom.c @@ -15,7 +15,7 @@ */ #include "config.h" #include "bloom.h" -#include "xxhash.h" +#include "cryptobox.h" /* 4 bits are used for counting (implementing delete operation) */ #define SIZE_BIT 4 @@ -107,7 +107,8 @@ rspamd_bloom_add (rspamd_bloom_filter_t * bloom, const gchar *s) } len = strlen (s); for (n = 0; n < bloom->nfuncs; ++n) { - v = XXH64 (s, len, bloom->seeds[n]) % bloom->asize; + v = rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_XXHASH64, + s, len, bloom->seeds[n]) % bloom->asize; INCBIT (bloom->a, v, t); } @@ -126,7 +127,8 @@ rspamd_bloom_del (rspamd_bloom_filter_t * bloom, const gchar *s) } len = strlen (s); for (n = 0; n < bloom->nfuncs; ++n) { - v = XXH64 (s, len, bloom->seeds[n]) % bloom->asize; + v = rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_XXHASH64, + s, len, bloom->seeds[n]) % bloom->asize; DECBIT (bloom->a, v, t); } @@ -145,7 +147,8 @@ rspamd_bloom_check (rspamd_bloom_filter_t * bloom, const gchar *s) } len = strlen (s); for (n = 0; n < bloom->nfuncs; ++n) { - v = XXH64 (s, len, bloom->seeds[n]) % bloom->asize; + v = rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_XXHASH64, + s, len, bloom->seeds[n]) % bloom->asize; if (!(GETBIT (bloom->a, v))) { return FALSE; } diff --git a/src/libutil/fstring.c b/src/libutil/fstring.c index 997dd6b46..285940f9c 100644 --- a/src/libutil/fstring.c +++ b/src/libutil/fstring.c @@ -400,3 +400,19 @@ rspamd_ftok_cstr_equal (const rspamd_ftok_t *s, const gchar *pat, return (rspamd_ftok_cmp (s, &srch) == 0); } + +gchar * +rspamd_ftokdup (const rspamd_ftok_t *src) +{ + gchar *newstr; + + if (src == NULL) { + return NULL; + } + + newstr = g_malloc (src->len + 1); + memcpy (newstr, src->begin, src->len); + newstr[src->len] = '\0'; + + return newstr; +} diff --git a/src/libutil/fstring.h b/src/libutil/fstring.h index 127557e40..10916d876 100644 --- a/src/libutil/fstring.h +++ b/src/libutil/fstring.h @@ -153,4 +153,12 @@ rspamd_ftok_t *rspamd_ftok_map (const rspamd_fstring_t *s); rspamd_fstring_t * rspamd_fstring_grow (rspamd_fstring_t *str, gsize needed_len) G_GNUC_WARN_UNUSED_RESULT; +/** + * Copies ftok to zero terminated string (must be freed using g_free) + * @param src + * @return + */ +gchar *rspamd_ftokdup (const rspamd_ftok_t *src) G_GNUC_WARN_UNUSED_RESULT; + + #endif diff --git a/src/libutil/logger.c b/src/libutil/logger.c index 21e489b09..f81730448 100644 --- a/src/libutil/logger.c +++ b/src/libutil/logger.c @@ -18,7 +18,7 @@ #include "util.h" #include "rspamd.h" #include "map.h" -#include "xxhash.h" +#include "cryptobox.h" #include "unix-std.h" #ifdef HAVE_SYSLOG_H @@ -82,12 +82,6 @@ static rspamd_logger_t *default_logger = NULL; } \ } while (0) -#if defined(__LP64__) || defined(_LP64) -#define XXH_ONESHOT XXH64 -#else -#define XXH_ONESHOT XXH32 -#endif - static void syslog_log_function (const gchar *log_domain, const gchar *module, const gchar *id, const gchar *function, @@ -106,7 +100,7 @@ static void static inline guint64 rspamd_log_calculate_cksum (const gchar *message, size_t mlen) { - return XXH_ONESHOT (message, mlen, rspamd_hash_seed ()); + return rspamd_cryptobox_fast_hash (message, mlen, rspamd_hash_seed ()); } /* diff --git a/src/libutil/map.c b/src/libutil/map.c index 8de6e76c2..f99c35784 100644 --- a/src/libutil/map.c +++ b/src/libutil/map.c @@ -35,10 +35,29 @@ #include <pcre2.h> #endif +#undef MAP_DEBUG_REFS +#ifdef MAP_DEBUG_REFS +#define MAP_RETAIN(x) do { \ + msg_err ("retain ref %p, refcount: %d -> %d", (x), (x)->ref.refcount, (x)->ref.refcount + 1); \ + REF_RETAIN(x); \ +} while (0) + +#define MAP_RELEASE(x) do { \ + msg_err ("release ref %p, refcount: %d -> %d", (x), (x)->ref.refcount, (x)->ref.refcount - 1); \ + REF_RELEASE(x); \ +} while (0) +#else +#define MAP_RETAIN REF_RETAIN +#define MAP_RELEASE REF_RELEASE +#endif + static const gchar *hash_fill = "1"; -static void free_http_cbdata_common (struct http_callback_data *cbd); +static void free_http_cbdata_common (struct http_callback_data *cbd, gboolean plan_new); static void free_http_cbdata_dtor (gpointer p); static void free_http_cbdata (struct http_callback_data *cbd); +static void rspamd_map_periodic_callback (gint fd, short what, void *ud); +static void rspamd_map_schedule_periodic (struct rspamd_map *map, gboolean locked, + gboolean initial, gboolean errored); /** * Write HTTP request */ @@ -47,9 +66,9 @@ write_http_request (struct http_callback_data *cbd) { gchar datebuf[128]; struct rspamd_http_message *msg; - rspamd_mempool_t *pool; + struct rspamd_map *map; - pool = cbd->map->pool; + map = cbd->map; if (cbd->fd != -1) { close (cbd->fd); @@ -60,10 +79,15 @@ write_http_request (struct http_callback_data *cbd) if (cbd->fd != -1) { msg = rspamd_http_new_message (HTTP_REQUEST); + if (cbd->check) { + msg->method = HTTP_HEAD; + } + if (cbd->stage == map_load_file) { msg->url = rspamd_fstring_new_init (cbd->data->path, strlen (cbd->data->path)); - if (cbd->data->last_checked != 0 && cbd->stage == map_load_file) { + if (cbd->check && + cbd->data->last_checked != 0 && cbd->stage == map_load_file) { rspamd_http_date_format (datebuf, sizeof (datebuf), cbd->data->last_checked); rspamd_http_message_add_header (msg, "If-Modified-Since", datebuf); @@ -83,11 +107,12 @@ write_http_request (struct http_callback_data *cbd) rspamd_http_connection_write_message (cbd->conn, msg, cbd->data->host, NULL, cbd, cbd->fd, &cbd->tv, cbd->ev_base); - REF_RETAIN (cbd); + MAP_RETAIN (cbd); } else { - msg_err_pool ("cannot connect to %s: %s", cbd->data->host, + msg_err_map ("cannot connect to %s: %s", cbd->data->host, strerror (errno)); + cbd->periodic->errored = TRUE; } } @@ -99,7 +124,6 @@ rspamd_map_check_sig_pk (const char *fname, struct rspamd_cryptobox_pubkey *pk) { gchar fpath[PATH_MAX]; - rspamd_mempool_t *pool = map->pool; guchar *data; GString *b32_key; gsize len = 0; @@ -109,12 +133,12 @@ rspamd_map_check_sig_pk (const char *fname, data = rspamd_file_xmap (fpath, PROT_READ, &len); if (data == NULL) { - msg_err_pool ("can't open signature %s: %s", fpath, strerror (errno)); + msg_err_map ("can't open signature %s: %s", fpath, strerror (errno)); return FALSE; } if (len != rspamd_cryptobox_signature_bytes (RSPAMD_CRYPTOBOX_MODE_25519)) { - msg_err_pool ("can't open signature %s: invalid signature", fpath); + msg_err_map ("can't open signature %s: invalid signature", fpath); munmap (data, len); return FALSE; @@ -122,7 +146,7 @@ rspamd_map_check_sig_pk (const char *fname, if (!rspamd_cryptobox_verify (data, input, inlen, rspamd_pubkey_get_pk (pk, NULL), RSPAMD_CRYPTOBOX_MODE_25519)) { - msg_err_pool ("can't verify signature %s: incorrect signature", fpath); + msg_err_map ("can't verify signature %s: incorrect signature", fpath); munmap (data, len); return FALSE; @@ -130,7 +154,7 @@ rspamd_map_check_sig_pk (const char *fname, b32_key = rspamd_pubkey_print (pk, RSPAMD_KEYPAIR_BASE32|RSPAMD_KEYPAIR_PUBKEY); - msg_info_pool ("verified signature in file %s using trusted key %v", + msg_info_map ("verified signature in file %s using trusted key %v", fpath, b32_key); g_string_free (b32_key, TRUE); @@ -141,25 +165,26 @@ rspamd_map_check_sig_pk (const char *fname, static gboolean rspamd_map_check_file_sig (const char *fname, - struct rspamd_map *map, const guchar *input, + struct rspamd_map *map, + struct rspamd_map_backend *bk, + const guchar *input, gsize inlen) { gchar fpath[PATH_MAX]; - rspamd_mempool_t *pool = map->pool; guchar *data; struct rspamd_cryptobox_pubkey *pk = NULL; GString *b32_key; gboolean ret; gsize len = 0; - if (map->trusted_pubkey == NULL) { + if (bk->trusted_pubkey == NULL) { /* Try to load and check pubkey */ rspamd_snprintf (fpath, sizeof (fpath), "%s.pub", fname); data = rspamd_file_xmap (fpath, PROT_READ, &len); if (data == NULL) { - msg_err_pool ("can't open pubkey %s: %s", fpath, strerror (errno)); + msg_err_map ("can't open pubkey %s: %s", fpath, strerror (errno)); return FALSE; } @@ -168,7 +193,7 @@ rspamd_map_check_file_sig (const char *fname, munmap (data, len); if (pk == NULL) { - msg_err_pool ("can't load pubkey %s", fpath); + msg_err_map ("can't load pubkey %s", fpath); return FALSE; } @@ -178,7 +203,7 @@ rspamd_map_check_file_sig (const char *fname, g_assert (b32_key != NULL); if (g_hash_table_lookup (map->cfg->trusted_keys, b32_key->str) == NULL) { - msg_err_pool ("pubkey loaded from %s is untrusted: %v", fpath, + msg_err_map ("pubkey loaded from %s is untrusted: %v", fpath, b32_key); g_string_free (b32_key, TRUE); rspamd_pubkey_unref (pk); @@ -189,7 +214,7 @@ rspamd_map_check_file_sig (const char *fname, g_string_free (b32_key, TRUE); } else { - pk = rspamd_pubkey_ref (map->trusted_pubkey); + pk = rspamd_pubkey_ref (bk->trusted_pubkey); } ret = rspamd_map_check_sig_pk (fname, map, input, inlen, pk); @@ -202,10 +227,11 @@ rspamd_map_check_file_sig (const char *fname, * Callback for destroying HTTP callback data */ static void -free_http_cbdata_common (struct http_callback_data *cbd) +free_http_cbdata_common (struct http_callback_data *cbd, gboolean plan_new) { char fpath[PATH_MAX]; struct stat st; + struct map_periodic_cbdata *periodic = cbd->periodic; if (cbd->out_fd != -1) { close (cbd->out_fd); @@ -243,7 +269,8 @@ free_http_cbdata_common (struct http_callback_data *cbd) rspamd_inet_address_destroy (cbd->addr); } - g_atomic_int_set (cbd->map->locked, 0); + MAP_RELEASE (cbd->bk); + MAP_RELEASE (periodic); g_slice_free1 (sizeof (struct http_callback_data), cbd); } @@ -253,18 +280,18 @@ free_http_cbdata (struct http_callback_data *cbd) cbd->map->dtor = NULL; cbd->map->dtor_data = NULL; - free_http_cbdata_common (cbd); + free_http_cbdata_common (cbd, TRUE); } static void free_http_cbdata_dtor (gpointer p) { struct http_callback_data *cbd = p; - rspamd_mempool_t *pool; + struct rspamd_map *map; - pool = cbd->map->pool; - msg_warn_pool ("connection with http server is terminated: worker is stopping"); - free_http_cbdata_common (cbd); + map = cbd->map; + msg_warn_map ("connection with http server is terminated: worker is stopping"); + free_http_cbdata_common (cbd, FALSE); } /* @@ -275,13 +302,13 @@ http_map_error (struct rspamd_http_connection *conn, GError *err) { struct http_callback_data *cbd = conn->ud; - rspamd_mempool_t *pool; - - pool = cbd->map->pool; + struct rspamd_map *map; - msg_err_pool ("connection with http server terminated incorrectly: %s", - err->message); - REF_RELEASE (cbd); + map = cbd->map; + cbd->periodic->errored = TRUE; + msg_err_map ("connection with http server terminated incorrectly: %e", err); + rspamd_map_periodic_callback (-1, EV_TIMEOUT, cbd->periodic); + MAP_RELEASE (cbd); } static int @@ -290,17 +317,27 @@ http_map_finish (struct rspamd_http_connection *conn, { struct http_callback_data *cbd = conn->ud; struct rspamd_map *map; - rspamd_mempool_t *pool; + struct rspamd_map_backend *bk; char fpath[PATH_MAX]; guchar *aux_data, *in = NULL; gsize inlen = 0; struct stat st; map = cbd->map; - pool = cbd->map->pool; + bk = cbd->bk; if (msg->code == 200) { + if (cbd->check) { + cbd->periodic->need_modify = TRUE; + /* Reset the whole chain */ + cbd->periodic->cur_backend = 0; + rspamd_map_periodic_callback (-1, EV_TIMEOUT, cbd->periodic); + MAP_RELEASE (cbd); + + return 0; + } + if (cbd->stage == map_load_file) { if (msg->last_modified) { cbd->data->last_checked = msg->last_modified; @@ -310,13 +347,13 @@ http_map_finish (struct rspamd_http_connection *conn, } /* Maybe we need to check signature ? */ - if (map->is_signed) { + if (bk->is_signed) { close (cbd->out_fd); - if (map->trusted_pubkey) { + if (bk->trusted_pubkey) { /* No need to load key */ cbd->stage = map_load_signature; - cbd->pk = rspamd_pubkey_ref (map->trusted_pubkey); + cbd->pk = rspamd_pubkey_ref (bk->trusted_pubkey); rspamd_snprintf (fpath, sizeof (fpath), "%s.sig", cbd->tmpfile); } @@ -329,24 +366,25 @@ http_map_finish (struct rspamd_http_connection *conn, cbd->out_fd = rspamd_file_xopen (fpath, O_RDWR|O_CREAT, 00644); if (cbd->out_fd == -1) { - msg_err_pool ("cannot open pubkey file %s for writing: %s", + msg_err_map ("cannot open pubkey file %s for writing: %s", fpath, strerror (errno)); - goto end; + goto err; } rspamd_http_connection_reset (cbd->conn); write_http_request (cbd); + MAP_RELEASE (cbd); - goto end; + return 0; } else { /* Unsinged version - just open file */ in = rspamd_file_xmap (cbd->tmpfile, PROT_READ, &inlen); if (in == NULL) { - msg_err_pool ("cannot read tempfile %s: %s", cbd->tmpfile, + msg_err_map ("cannot read tempfile %s: %s", cbd->tmpfile, strerror (errno)); - goto end; + goto err; } } } @@ -355,9 +393,9 @@ http_map_finish (struct rspamd_http_connection *conn, (void)lseek (cbd->out_fd, 0, SEEK_SET); if (fstat (cbd->out_fd, &st) == -1) { - msg_err_pool ("cannot stat pubkey file %s: %s", + msg_err_map ("cannot stat pubkey file %s: %s", fpath, strerror (errno)); - goto end; + goto err; } aux_data = mmap (NULL, st.st_size, PROT_READ, MAP_SHARED, @@ -366,9 +404,9 @@ http_map_finish (struct rspamd_http_connection *conn, cbd->out_fd = -1; if (aux_data == MAP_FAILED) { - msg_err_pool ("cannot map pubkey file %s: %s", + msg_err_map ("cannot map pubkey file %s: %s", fpath, strerror (errno)); - goto end; + goto err; } cbd->pk = rspamd_pubkey_from_base32 (aux_data, st.st_size, @@ -376,25 +414,26 @@ http_map_finish (struct rspamd_http_connection *conn, munmap (aux_data, st.st_size); if (cbd->pk == NULL) { - msg_err_pool ("cannot load pubkey file %s: bad pubkey", + msg_err_map ("cannot load pubkey file %s: bad pubkey", fpath); - goto end; + goto err; } rspamd_snprintf (fpath, sizeof (fpath), "%s.sig", cbd->tmpfile); cbd->out_fd = rspamd_file_xopen (fpath, O_RDWR|O_CREAT, 00644); if (cbd->out_fd == -1) { - msg_err_pool ("cannot open signature file %s for writing: %s", + msg_err_map ("cannot open signature file %s for writing: %s", fpath, strerror (errno)); - goto end; + goto err; } cbd->stage = map_load_signature; rspamd_http_connection_reset (cbd->conn); write_http_request (cbd); + MAP_RELEASE (cbd); - goto end; + return 0; } else if (cbd->stage == map_load_signature) { /* We can now check signature */ @@ -404,26 +443,25 @@ http_map_finish (struct rspamd_http_connection *conn, in = rspamd_file_xmap (cbd->tmpfile, PROT_READ, &inlen); if (in == NULL) { - msg_err_pool ("cannot read tempfile %s: %s", cbd->tmpfile, + msg_err_map ("cannot read tempfile %s: %s", cbd->tmpfile, strerror (errno)); - goto end; + goto err; } if (!rspamd_map_check_sig_pk (cbd->tmpfile, map, in, inlen, cbd->pk)) { - goto end; + goto err; } } g_assert (in != NULL); - map->read_callback (map->pool, in, inlen, &cbd->cbdata, TRUE); - map->fin_callback (map->pool, &cbd->cbdata); - - *map->user_data = cbd->cbdata.cur_data; - msg_info_pool ("read map data from %s", cbd->data->host); + map->read_callback (in, inlen, &cbd->periodic->cbdata, TRUE); + msg_info_map ("read map data from %s", cbd->data->host); + cbd->periodic->cur_backend ++; + rspamd_map_periodic_callback (-1, EV_TIMEOUT, cbd->periodic); } - else if (msg->code == 304 && cbd->stage == map_load_file) { - msg_debug_pool ("data is not modified for server %s", + else if (msg->code == 304 && (cbd->check && cbd->stage == map_load_file)) { + msg_debug_map ("data is not modified for server %s", cbd->data->host); if (msg->last_modified) { @@ -434,12 +472,17 @@ http_map_finish (struct rspamd_http_connection *conn, } } else { - msg_info_pool ("cannot load map %s from %s: HTTP error %d", - map->uri, cbd->data->host, msg->code); + msg_info_map ("cannot load map %s from %s: HTTP error %d", + bk->uri, cbd->data->host, msg->code); } -end: - REF_RELEASE (cbd); + MAP_RELEASE (cbd); + return 0; + +err: + cbd->periodic->errored = 1; + rspamd_map_periodic_callback (-1, EV_TIMEOUT, cbd->periodic); + MAP_RELEASE (cbd); return 0; } @@ -451,18 +494,18 @@ http_map_read (struct rspamd_http_connection *conn, gsize len) { struct http_callback_data *cbd = conn->ud; - rspamd_mempool_t *pool; + struct rspamd_map *map; if (msg->code != 200 || len == 0) { /* Ignore not full replies */ return 0; } - pool = cbd->map->pool; + map = cbd->map; if (write (cbd->out_fd, chunk, len) == -1) { - msg_err_pool ("cannot write to %s: %s", cbd->tmpfile, strerror (errno)); - REF_RELEASE (cbd); + msg_err_map ("cannot write to %s: %s", cbd->tmpfile, strerror (errno)); + MAP_RELEASE (cbd); return -1; } @@ -474,38 +517,32 @@ http_map_read (struct rspamd_http_connection *conn, * Callback for reading data from file */ static gboolean -read_map_file (struct rspamd_map *map, struct file_map_data *data) +read_map_file (struct rspamd_map *map, struct file_map_data *data, + struct rspamd_map_backend *bk, struct map_periodic_cbdata *periodic) { - struct map_cb_data cbdata; guchar *bytes; gsize len; - rspamd_mempool_t *pool = map->pool; if (map->read_callback == NULL || map->fin_callback == NULL) { - msg_err_pool ("bad callback for reading map file"); + msg_err_map ("bad callback for reading map file"); return FALSE; } if (access (data->filename, R_OK) == -1) { /* File does not exist, skipping */ - msg_err_pool ("map file is unavailable for reading"); - return FALSE; + msg_err_map ("map file is unavailable for reading"); + return TRUE; } bytes = rspamd_file_xmap (data->filename, PROT_READ, &len); if (bytes == NULL) { - msg_err_pool ("can't open map %s: %s", data->filename, strerror (errno)); + msg_err_map ("can't open map %s: %s", data->filename, strerror (errno)); return FALSE; } - cbdata.state = 0; - cbdata.prev_data = *map->user_data; - cbdata.cur_data = NULL; - cbdata.map = map; - - if (map->is_signed) { - if (!rspamd_map_check_file_sig (data->filename, map, bytes, len)) { + if (bk->is_signed) { + if (!rspamd_map_check_file_sig (data->filename, map, bk, bytes, len)) { munmap (bytes, len); return FALSE; @@ -513,9 +550,7 @@ read_map_file (struct rspamd_map *map, struct file_map_data *data) } if (len > 0) { - map->read_callback (map->pool, bytes, len, &cbdata, TRUE); - map->fin_callback (map->pool, &cbdata); - *map->user_data = cbdata.cur_data; + map->read_callback (bytes, len, &periodic->cbdata, TRUE); } munmap (bytes, len); @@ -524,85 +559,74 @@ read_map_file (struct rspamd_map *map, struct file_map_data *data) } static void -jitter_timeout_event (struct rspamd_map *map, - gboolean locked, gboolean initial, gboolean errored) +rspamd_map_periodic_dtor (struct map_periodic_cbdata *periodic) { - const gdouble error_mult = 20.0, lock_mult = 0.5; - gdouble jittered_sec; - gdouble timeout; - - if (initial) { - timeout = 0.0; - } - else if (errored) { - timeout = map->cfg->map_timeout * error_mult; - } - else if (locked) { - timeout = map->cfg->map_timeout * lock_mult; + if (periodic->need_modify) { + /* We are done */ + periodic->map->fin_callback (&periodic->cbdata); + *periodic->map->user_data = periodic->cbdata.cur_data; } else { - timeout = map->cfg->map_timeout; + /* Not modified */ } - /* Plan event again with jitter */ - evtimer_del (&map->ev); - jittered_sec = rspamd_time_jitter (timeout, 0); - double_to_tv (jittered_sec, &map->tv); - - evtimer_add (&map->ev, &map->tv); + rspamd_map_schedule_periodic (periodic->map, FALSE, FALSE, FALSE); + g_atomic_int_set (periodic->map->locked, 0); + g_slice_free1 (sizeof (*periodic), periodic); } -/** - * Common file callback - */ static void -file_callback (gint fd, short what, void *ud) +rspamd_map_schedule_periodic (struct rspamd_map *map, + gboolean locked, gboolean initial, gboolean errored) { - struct rspamd_map *map = ud; - struct file_map_data *data = map->map_data; - struct stat st; - rspamd_mempool_t *pool; + const gdouble error_mult = 20.0, lock_mult = 0.5; + gdouble jittered_sec; + gdouble timeout; + struct map_periodic_cbdata *cbd; - pool = map->pool; + timeout = map->poll_timeout; - if (!g_atomic_int_compare_and_exchange (map->locked, 0, 1)) { - msg_debug_pool ( - "don't try to reread map as it is locked by other process, will reread it later"); - jitter_timeout_event (map, TRUE, FALSE, FALSE); - return; + if (initial) { + timeout = 0.0; } - if (stat (data->filename, &st) != -1 && - (st.st_mtime > data->st.st_mtime || data->st.st_mtime == -1)) { - /* File was modified since last check */ - memcpy (&data->st, &st, sizeof (struct stat)); + if (errored) { + timeout = map->poll_timeout * error_mult; } - else { - g_atomic_int_set (map->locked, 0); - jitter_timeout_event (map, FALSE, FALSE, FALSE); - return; + else if (locked) { + timeout = map->poll_timeout * lock_mult; } - msg_info_pool ("rereading map file %s", data->filename); + cbd = g_slice_alloc0 (sizeof (*cbd)); + cbd->cbdata.state = 0; + cbd->cbdata.prev_data = *map->user_data; + cbd->cbdata.cur_data = NULL; + cbd->cbdata.map = map; + cbd->map = map; + REF_INIT_RETAIN (cbd, rspamd_map_periodic_dtor); - if (!read_map_file (map, data)) { - jitter_timeout_event (map, FALSE, FALSE, TRUE); + if (initial) { + evtimer_set (&map->ev, rspamd_map_periodic_callback, cbd); + event_base_set (map->ev_base, &map->ev); } else { - jitter_timeout_event (map, FALSE, FALSE, FALSE); + evtimer_del (&map->ev); + evtimer_set (&map->ev, rspamd_map_periodic_callback, cbd); } - g_atomic_int_set (map->locked, 0); -} + jittered_sec = rspamd_time_jitter (timeout, 0); + double_to_tv (jittered_sec, &map->tv); + evtimer_add (&map->ev, &map->tv); +} static void rspamd_map_dns_callback (struct rdns_reply *reply, void *arg) { struct http_callback_data *cbd = arg; - rspamd_mempool_t *pool; + struct rspamd_map *map; - pool = cbd->map->pool; + map = cbd->map; if (reply->code == RDNS_RC_NOERROR) { /* @@ -641,36 +665,25 @@ rspamd_map_dns_callback (struct rdns_reply *reply, void *arg) } else { /* We could not resolve host, so cowardly fail here */ - msg_err_pool ("cannot resolve %s", cbd->data->host); + msg_err_map ("cannot resolve %s", cbd->data->host); } } - REF_RELEASE (cbd); + MAP_RELEASE (cbd); } /** * Async HTTP callback */ static void -http_callback (gint fd, short what, void *ud) +rspamd_map_common_http_callback (struct rspamd_map *map, struct rspamd_map_backend *bk, + struct map_periodic_cbdata *periodic, gboolean check) { - struct rspamd_map *map = ud; struct http_map_data *data; struct http_callback_data *cbd; - rspamd_mempool_t *pool; gchar tmpbuf[PATH_MAX]; - data = map->map_data; - pool = map->pool; - - if (!g_atomic_int_compare_and_exchange (map->locked, 0, 1)) { - msg_debug_pool ( - "don't try to reread map as it is locked by other process, will reread it later"); - jitter_timeout_event (map, TRUE, FALSE, FALSE); - return; - } - - /* Plan event */ + data = bk->data.hd; cbd = g_slice_alloc0 (sizeof (struct http_callback_data)); rspamd_snprintf (tmpbuf, sizeof (tmpbuf), @@ -679,10 +692,11 @@ http_callback (gint fd, short what, void *ud) cbd->out_fd = mkstemp (tmpbuf); if (cbd->out_fd == -1) { - g_slice_free1 (sizeof (*cbd), cbd); - msg_err_pool ("cannot create tempfile: %s", strerror (errno)); - jitter_timeout_event (map, FALSE, FALSE, TRUE); + msg_err_map ("cannot create tempfile: %s", strerror (errno)); g_atomic_int_set (map->locked, 0); + g_slice_free1 (sizeof (*cbd), cbd); + periodic->errored = TRUE; + rspamd_map_periodic_callback (-1, EV_TIMEOUT, periodic); return; } @@ -692,39 +706,165 @@ http_callback (gint fd, short what, void *ud) cbd->map = map; cbd->data = data; cbd->fd = -1; - cbd->cbdata.state = 0; - cbd->cbdata.prev_data = *cbd->map->user_data; - cbd->cbdata.cur_data = NULL; - cbd->cbdata.map = cbd->map; + cbd->check = check; + cbd->periodic = periodic; + MAP_RETAIN (periodic); + cbd->bk = bk; + MAP_RETAIN (bk); cbd->stage = map_resolve_host2; double_to_tv (map->cfg->map_timeout, &cbd->tv); REF_INIT_RETAIN (cbd, free_http_cbdata); - msg_debug_pool ("reading map data from %s", data->host); + msg_debug_map ("%s map data from %s", check ? "checking" : "reading", + data->host); /* Send both A and AAAA requests */ if (map->r->r) { if (rdns_make_request_full (map->r->r, rspamd_map_dns_callback, cbd, map->cfg->dns_timeout, map->cfg->dns_retransmits, 1, data->host, RDNS_REQUEST_A)) { - REF_RETAIN (cbd); + MAP_RETAIN (cbd); } if (rdns_make_request_full (map->r->r, rspamd_map_dns_callback, cbd, map->cfg->dns_timeout, map->cfg->dns_retransmits, 1, data->host, RDNS_REQUEST_AAAA)) { - REF_RETAIN (cbd); + MAP_RETAIN (cbd); } - jitter_timeout_event (map, FALSE, FALSE, FALSE); map->dtor = free_http_cbdata_dtor; map->dtor_data = cbd; } else { - msg_warn_pool ("cannot load map: DNS resolver is not initialized"); - jitter_timeout_event (map, FALSE, FALSE, TRUE); + msg_warn_map ("cannot load map: DNS resolver is not initialized"); + cbd->periodic->errored = TRUE; } - /* We don't need own ref as it is now refcounted by DNS requests */ - REF_RELEASE (cbd); + /* We don't need own ref as it is now ref counted by DNS handlers */ + MAP_RELEASE (cbd); +} + +static void +rspamd_map_http_check_callback (gint fd, short what, void *ud) +{ + struct map_periodic_cbdata *cbd = ud; + struct rspamd_map *map; + struct rspamd_map_backend *bk; + + map = cbd->map; + bk = g_ptr_array_index (cbd->map->backends, cbd->cur_backend); + + rspamd_map_common_http_callback (map, bk, cbd, TRUE); +} + +static void +rspamd_map_http_read_callback (gint fd, short what, void *ud) +{ + struct map_periodic_cbdata *cbd = ud; + struct rspamd_map *map; + struct rspamd_map_backend *bk; + + map = cbd->map; + bk = g_ptr_array_index (cbd->map->backends, cbd->cur_backend); + rspamd_map_common_http_callback (map, bk, cbd, FALSE); +} + +static void +rspamd_map_file_check_callback (gint fd, short what, void *ud) +{ + struct rspamd_map *map; + struct map_periodic_cbdata *periodic = ud; + struct file_map_data *data; + struct rspamd_map_backend *bk; + struct stat st; + + map = periodic->map; + + bk = g_ptr_array_index (map->backends, periodic->cur_backend); + data = bk->data.fd; + + if (stat (data->filename, &st) != -1 && + (st.st_mtime > data->st.st_mtime || data->st.st_mtime == -1)) { + /* File was modified since last check */ + memcpy (&data->st, &st, sizeof (struct stat)); + periodic->need_modify = TRUE; + periodic->cur_backend = 0; + rspamd_map_periodic_callback (-1, EV_TIMEOUT, periodic); + + return; + } + + /* Switch to the next backend */ + periodic->cur_backend ++; + rspamd_map_periodic_callback (-1, EV_TIMEOUT, periodic); +} + +static void +rspamd_map_file_read_callback (gint fd, short what, void *ud) +{ + struct rspamd_map *map; + struct map_periodic_cbdata *periodic = ud; + struct file_map_data *data; + struct rspamd_map_backend *bk; + + map = periodic->map; + + bk = g_ptr_array_index (map->backends, periodic->cur_backend); + data = bk->data.fd; + + msg_info_map ("rereading map file %s", data->filename); + + if (!read_map_file (map, data, bk, periodic)) { + periodic->errored = TRUE; + } + + /* Switch to the next backend */ + periodic->cur_backend ++; + rspamd_map_periodic_callback (-1, EV_TIMEOUT, periodic); +} + +static void +rspamd_map_periodic_callback (gint fd, short what, void *ud) +{ + struct rspamd_map_backend *bk; + struct map_periodic_cbdata *cbd = ud; + + if (cbd->errored) { + /* We should not check other backends if some backend has failed */ + rspamd_map_schedule_periodic (cbd->map, FALSE, FALSE, TRUE); + g_atomic_int_set (cbd->map->locked, 0); + MAP_RELEASE (cbd); + + return; + } + + /* For each backend we need to check for modifications */ + if (cbd->cur_backend >= cbd->map->backends->len) { + /* Last backend */ + MAP_RELEASE (cbd); + + return; + } + + bk = g_ptr_array_index (cbd->map->backends, cbd->cur_backend); + g_assert (bk != NULL); + + if (cbd->need_modify) { + /* Load data from the next backend */ + if (bk->protocol == MAP_PROTO_HTTP) { + rspamd_map_http_read_callback (fd, what, cbd); + } + else { + rspamd_map_file_read_callback (fd, what, cbd); + } + } + else { + /* Check the next backend */ + if (bk->protocol == MAP_PROTO_HTTP) { + rspamd_map_http_check_callback (fd, what, cbd); + } + else { + rspamd_map_file_check_callback (fd, what, cbd); + } + } } /* Start watching event for all maps */ @@ -735,29 +875,21 @@ rspamd_map_watch (struct rspamd_config *cfg, { GList *cur = cfg->maps; struct rspamd_map *map; - struct file_map_data *fdata; /* First of all do synced read of data */ while (cur) { map = cur->data; map->ev_base = ev_base; map->r = resolver; - event_base_set (map->ev_base, &map->ev); - if (map->protocol == MAP_PROTO_FILE) { - evtimer_set (&map->ev, file_callback, map); - /* Read initial data */ - fdata = map->map_data; - if (fdata->st.st_mtime != -1) { - /* Do not try to read non-existent file */ - read_map_file (map, map->map_data); - } - /* Plan event with jitter */ - jitter_timeout_event (map, FALSE, TRUE, FALSE); + if (!g_atomic_int_compare_and_exchange (map->locked, 0, 1)) { + msg_debug_map ( + "don't try to reread map as it is locked by other process, " + "will reread it later"); + rspamd_map_schedule_periodic (map, TRUE, TRUE, FALSE); } - else if (map->protocol == MAP_PROTO_HTTP) { - evtimer_set (&map->ev, http_callback, map); - jitter_timeout_event (map, FALSE, TRUE, FALSE); + else { + rspamd_map_schedule_periodic (map, FALSE, TRUE, FALSE); } cur = g_list_next (cur); @@ -769,10 +901,17 @@ rspamd_map_remove_all (struct rspamd_config *cfg) { struct rspamd_map *map; GList *cur; + struct rspamd_map_backend *bk; + guint i; for (cur = cfg->maps; cur != NULL; cur = g_list_next (cur)) { map = cur->data; + for (i = 0; i < map->backends->len; i ++) { + bk = g_ptr_array_index (map->backends, i); + MAP_RELEASE (bk); + } + if (map->dtor) { map->dtor (map->dtor_data); } @@ -780,26 +919,21 @@ rspamd_map_remove_all (struct rspamd_config *cfg) g_list_free (cfg->maps); cfg->maps = NULL; - - if (cfg->map_pool != NULL) { - rspamd_mempool_delete (cfg->map_pool); - cfg->map_pool = NULL; - } } static const gchar * rspamd_map_check_proto (struct rspamd_config *cfg, - const gchar *map_line, struct rspamd_map *map) + const gchar *map_line, struct rspamd_map_backend *bk) { const gchar *pos = map_line, *end, *end_key; - g_assert (map != NULL); + g_assert (bk != NULL); g_assert (pos != NULL); end = pos + strlen (pos); if (g_ascii_strncasecmp (pos, "sign+", sizeof ("sign+") - 1) == 0) { - map->is_signed = TRUE; + bk->is_signed = TRUE; pos += sizeof ("sign+") - 1; } @@ -808,10 +942,10 @@ rspamd_map_check_proto (struct rspamd_config *cfg, end_key = memchr (pos, '+', end - pos); if (end_key != NULL) { - map->trusted_pubkey = rspamd_pubkey_from_base32 (pos, end_key - pos, + bk->trusted_pubkey = rspamd_pubkey_from_base32 (pos, end_key - pos, RSPAMD_KEYPAIR_SIGN, RSPAMD_CRYPTOBOX_MODE_25519); - if (map->trusted_pubkey == NULL) { + if (bk->trusted_pubkey == NULL) { msg_err_config ("cannot read pubkey from map: %s", map_line); return NULL; @@ -820,10 +954,10 @@ rspamd_map_check_proto (struct rspamd_config *cfg, } else if (end - pos > 64) { /* Try hex encoding */ - map->trusted_pubkey = rspamd_pubkey_from_hex (pos, 64, + bk->trusted_pubkey = rspamd_pubkey_from_hex (pos, 64, RSPAMD_KEYPAIR_SIGN, RSPAMD_CRYPTOBOX_MODE_25519); - if (map->trusted_pubkey == NULL) { + if (bk->trusted_pubkey == NULL) { msg_err_config ("cannot read pubkey from map: %s", map_line); return NULL; @@ -841,24 +975,24 @@ rspamd_map_check_proto (struct rspamd_config *cfg, } } - map->protocol = MAP_PROTO_FILE; + bk->protocol = MAP_PROTO_FILE; if (g_ascii_strncasecmp (pos, "http://", sizeof ("http://") - 1) == 0) { - map->protocol = MAP_PROTO_HTTP; + bk->protocol = MAP_PROTO_HTTP; /* Include http:// */ - map->uri = rspamd_mempool_strdup (cfg->cfg_pool, pos); + bk->uri = g_strdup (pos); pos += sizeof ("http://") - 1; } else if (g_ascii_strncasecmp (pos, "file://", sizeof ("file://") - 1) == 0) { pos += sizeof ("file://") - 1; /* Exclude file:// */ - map->uri = rspamd_mempool_strdup (cfg->cfg_pool, pos); + bk->uri = g_strdup (pos); } else if (*pos == '/') { /* Trivial file case */ - map->uri = rspamd_mempool_strdup (cfg->cfg_pool, pos); + bk->uri = g_strdup (pos); } else { msg_err_config ("invalid map fetching protocol: %s", map_line); @@ -893,95 +1027,79 @@ rspamd_map_is_map (const gchar *map_line) return ret; } -struct rspamd_map * -rspamd_map_add (struct rspamd_config *cfg, - const gchar *map_line, - const gchar *description, - map_cb_t read_callback, - map_fin_cb_t fin_callback, - void **user_data) +static void +rspamd_map_backend_dtor (struct rspamd_map_backend *bk) { - struct rspamd_map *new_map; - const gchar *def; - struct file_map_data *fdata; - struct http_map_data *hdata; - gchar *cksum_encoded, cksum[rspamd_cryptobox_HASHBYTES]; - rspamd_mempool_t *pool; - struct http_parser_url up; - rspamd_ftok_t tok; - - if (cfg->map_pool == NULL) { - cfg->map_pool = rspamd_mempool_new (rspamd_mempool_suggest_size (), - "map"); - memcpy (cfg->map_pool->tag.uid, cfg->cfg_pool->tag.uid, - sizeof (cfg->map_pool->tag.uid)); + if (bk->protocol == MAP_PROTO_FILE) { + g_free (bk->data.fd->filename); + g_slice_free1 (sizeof (*bk->data.fd), bk->data.fd); + } + else { + g_free (bk->data.hd->host); + g_free (bk->data.hd->path); + g_slice_free1 (sizeof (*bk->data.hd), bk->data.hd); } - new_map = rspamd_mempool_alloc0 (cfg->map_pool, sizeof (struct rspamd_map)); + g_slice_free1 (sizeof (*bk), bk); +} - /* First of all detect protocol line */ - if (rspamd_map_check_proto (cfg, map_line, new_map) == NULL) { - return NULL; - } +static struct rspamd_map_backend * +rspamd_map_parse_backend (struct rspamd_config *cfg, const gchar *map_line) +{ + struct rspamd_map_backend *bk; + struct file_map_data *fdata = NULL; + struct http_map_data *hdata = NULL; + struct http_parser_url up; + rspamd_ftok_t tok; - new_map->read_callback = read_callback; - new_map->fin_callback = fin_callback; - new_map->user_data = user_data; - new_map->cfg = cfg; - new_map->id = g_random_int (); - new_map->locked = - rspamd_mempool_alloc0_shared (cfg->cfg_pool, sizeof (gint)); - def = new_map->uri; + bk = g_slice_alloc0 (sizeof (*bk)); + REF_INIT_RETAIN (bk, rspamd_map_backend_dtor); - if (description != NULL) { - new_map->description = - rspamd_mempool_strdup (cfg->cfg_pool, description); + if (!rspamd_map_check_proto (cfg, map_line, bk)) { + goto err; } /* Now check for each proto separately */ - if (new_map->protocol == MAP_PROTO_FILE) { - fdata = - rspamd_mempool_alloc0 (cfg->map_pool, - sizeof (struct file_map_data)); - if (access (def, R_OK) == -1) { + if (bk->protocol == MAP_PROTO_FILE) { + fdata = g_slice_alloc0 (sizeof (struct file_map_data)); + + if (access (bk->uri, R_OK) == -1) { if (errno != ENOENT) { - msg_err_config ("cannot open file '%s': %s", def, strerror - (errno)); + msg_err_config ("cannot open file '%s': %s", bk->uri, strerror (errno)); return NULL; } msg_info_config ( - "map '%s' is not found, but it can be loaded automatically later", - def); + "map '%s' is not found, but it can be loaded automatically later", + bk->uri); /* We still can add this file */ fdata->st.st_mtime = -1; } else { - stat (def, &fdata->st); + stat (bk->uri, &fdata->st); } - fdata->filename = rspamd_mempool_strdup (cfg->map_pool, def); - new_map->map_data = fdata; + + fdata->filename = g_strdup (bk->uri); + bk->data.fd = fdata; } - else if (new_map->protocol == MAP_PROTO_HTTP) { - hdata = - rspamd_mempool_alloc0 (cfg->map_pool, - sizeof (struct http_map_data)); + else if (bk->protocol == MAP_PROTO_HTTP) { + hdata = g_slice_alloc0 (sizeof (struct http_map_data)); memset (&up, 0, sizeof (up)); - if (http_parser_parse_url (new_map->uri, strlen (new_map->uri), FALSE, + if (http_parser_parse_url (bk->uri, strlen (bk->uri), FALSE, &up) != 0) { - msg_err_config ("cannot parse HTTP url: %s", new_map->uri); - return NULL; + msg_err_config ("cannot parse HTTP url: %s", bk->uri); + goto err; } else { if (!(up.field_set & 1 << UF_HOST)) { - msg_err_config ("cannot parse HTTP url: %s: no host", new_map->uri); + msg_err_config ("cannot parse HTTP url: %s: no host", bk->uri); return NULL; } - tok.begin = new_map->uri + up.field_data[UF_HOST].off; + tok.begin = bk->uri + up.field_data[UF_HOST].off; tok.len = up.field_data[UF_HOST].len; - hdata->host = rspamd_mempool_ftokdup (cfg->map_pool, &tok); + hdata->host = rspamd_ftokdup (&tok); if (up.field_set & 1 << UF_PORT) { hdata->port = up.port; @@ -991,31 +1109,235 @@ rspamd_map_add (struct rspamd_config *cfg, } if (up.field_set & 1 << UF_PATH) { - tok.begin = new_map->uri + up.field_data[UF_PATH].off; + tok.begin = bk->uri + up.field_data[UF_PATH].off; tok.len = strlen (tok.begin); - hdata->path = rspamd_mempool_ftokdup (cfg->map_pool, &tok); + hdata->path = rspamd_ftokdup (&tok); } } - new_map->map_data = hdata; + bk->data.hd = hdata; + } + + return bk; + +err: + MAP_RELEASE (bk); + + if (hdata) { + g_slice_free1 (sizeof (*hdata), hdata); + } + + if (fdata) { + g_slice_free1 (sizeof (*fdata), fdata); + } + + return NULL; +} +static void +rspamd_map_calculate_hash (struct rspamd_map *map) +{ + struct rspamd_map_backend *bk; + guint i; + rspamd_cryptobox_hash_state_t st; + gchar *cksum_encoded, cksum[rspamd_cryptobox_HASHBYTES]; + + rspamd_cryptobox_hash_init (&st, NULL, 0); + + for (i = 0; i < map->backends->len; i ++) { + bk = g_ptr_array_index (map->backends, i); + rspamd_cryptobox_hash_update (&st, bk->uri, strlen (bk->uri)); } - /* Temp pool */ - rspamd_cryptobox_hash (cksum, new_map->uri, strlen (new_map->uri), NULL, 0); + rspamd_cryptobox_hash_final (&st, cksum); cksum_encoded = rspamd_encode_base32 (cksum, sizeof (cksum)); - new_map->pool = rspamd_mempool_new (rspamd_mempool_suggest_size (), "map"); - rspamd_strlcpy (new_map->pool->tag.uid, cksum_encoded, - sizeof (new_map->pool->tag.uid)); + rspamd_strlcpy (map->tag, cksum_encoded, sizeof (map->tag)); g_free (cksum_encoded); - pool = new_map->pool; - msg_info_pool ("added map %s", new_map->uri); +} +struct rspamd_map * +rspamd_map_add (struct rspamd_config *cfg, + const gchar *map_line, + const gchar *description, + map_cb_t read_callback, + map_fin_cb_t fin_callback, + void **user_data) +{ + struct rspamd_map *map; + struct rspamd_map_backend *bk; + + bk = rspamd_map_parse_backend (cfg, map_line); + if (bk == NULL) { + return NULL; + } + + map = g_slice_alloc0 (sizeof (struct rspamd_map)); + map->read_callback = read_callback; + map->fin_callback = fin_callback; + map->user_data = user_data; + map->cfg = cfg; + map->id = g_random_int (); + map->locked = + rspamd_mempool_alloc0_shared (cfg->cfg_pool, sizeof (gint)); + map->backends = g_ptr_array_sized_new (1); + g_ptr_array_add (map->backends, bk); + map->name = g_strdup (map_line); + map->poll_timeout = cfg->map_timeout; + + if (description != NULL) { + map->description = g_strdup (description); + } - cfg->maps = g_list_prepend (cfg->maps, new_map); + rspamd_map_calculate_hash (map); + msg_info_map ("added map %s", bk->uri); - return new_map; + cfg->maps = g_list_prepend (cfg->maps, map); + + return map; +} + +struct rspamd_map* +rspamd_map_add_from_ucl (struct rspamd_config *cfg, + const ucl_object_t *obj, + const gchar *description, + map_cb_t read_callback, + map_fin_cb_t fin_callback, + void **user_data) +{ + ucl_object_iter_t it = NULL; + const ucl_object_t *cur, *elt; + struct rspamd_map *map; + struct rspamd_map_backend *bk; + + g_assert (obj != NULL); + + if (ucl_object_type (obj) == UCL_STRING) { + /* Just a plain string */ + return rspamd_map_add (cfg, ucl_object_tostring (obj), NULL, + read_callback, fin_callback, user_data); + } + + map = g_slice_alloc0 (sizeof (struct rspamd_map)); + map->read_callback = read_callback; + map->fin_callback = fin_callback; + map->user_data = user_data; + map->cfg = cfg; + map->id = g_random_int (); + map->locked = + rspamd_mempool_alloc0_shared (cfg->cfg_pool, sizeof (gint)); + map->backends = g_ptr_array_new (); + map->poll_timeout = cfg->map_timeout; + + if (description) { + map->description = g_strdup (description); + } + + if (ucl_object_type (obj) == UCL_ARRAY) { + /* Add array of maps as multiple backends */ + while ((cur = ucl_object_iterate (obj, &it, true)) != NULL) { + if (ucl_object_type (cur) == UCL_STRING) { + bk = rspamd_map_parse_backend (cfg, ucl_object_tostring (cur)); + + if (bk != NULL) { + g_ptr_array_add (map->backends, bk); + + if (!map->name) { + map->name = g_strdup (ucl_object_tostring (cur)); + } + } + } + else { + msg_err_config ("bad map element type: %s", + ucl_object_type_to_string (ucl_object_type (cur))); + } + } + + if (map->backends->len == 0) { + msg_err_config ("map has no urls to be loaded"); + goto err; + } + } + else if (ucl_object_type (obj) == UCL_OBJECT) { + elt = ucl_object_lookup (obj, "name"); + if (elt && ucl_object_type (elt) == UCL_STRING) { + map->name = g_strdup (ucl_object_tostring (elt)); + } + + elt = ucl_object_lookup (obj, "description"); + if (elt && ucl_object_type (elt) == UCL_STRING) { + if (map->description) { + g_free (map->description); + } + + map->description = g_strdup (ucl_object_tostring (elt)); + } + + elt = ucl_object_lookup_any (obj, "timeout", "poll", "poll_time", + "watch_interval", NULL); + if (elt) { + map->poll_timeout = ucl_object_todouble (elt); + } + + elt = ucl_object_lookup_any (obj, "upstreams", "url", "urls", NULL); + if (elt == NULL) { + msg_err_config ("map has no urls to be loaded"); + goto err; + } + + if (ucl_object_type (obj) == UCL_ARRAY) { + /* Add array of maps as multiple backends */ + while ((cur = ucl_object_iterate (elt, &it, true)) != NULL) { + if (ucl_object_type (cur) == UCL_STRING) { + bk = rspamd_map_parse_backend (cfg, ucl_object_tostring (cur)); + + if (bk != NULL) { + g_ptr_array_add (map->backends, bk); + + if (!map->name) { + map->name = g_strdup (ucl_object_tostring (cur)); + } + } + } + else { + msg_err_config ("bad map element type: %s", + ucl_object_type_to_string (ucl_object_type (cur))); + goto err; + } + } + + if (map->backends->len == 0) { + msg_err_config ("map has no urls to be loaded"); + goto err; + } + } + else if (ucl_object_type (elt) == UCL_STRING) { + bk = rspamd_map_parse_backend (cfg, ucl_object_tostring (elt)); + + if (bk != NULL) { + g_ptr_array_add (map->backends, bk); + + if (!map->name) { + map->name = g_strdup (ucl_object_tostring (cur)); + } + } + } + + if (map->backends->len == 0) { + msg_err_config ("map has no urls to be loaded"); + goto err; + } + } + + return map; + +err: + g_ptr_array_free (map->backends, TRUE); + g_free (map->name); + g_free (map->description); + g_slice_free1 (sizeof (*map), map); + + return NULL; } /** @@ -1023,18 +1345,18 @@ rspamd_map_add (struct rspamd_config *cfg, */ #define MAP_STORE_KEY do { \ - key = rspamd_mempool_alloc (pool, p - c + 1); \ + key = g_malloc (p - c + 1); \ rspamd_strlcpy (key, c, p - c + 1); \ } while (0) #define MAP_STORE_VALUE do { \ - value = rspamd_mempool_alloc (pool, p - c + 1); \ + value = g_malloc (p - c + 1); \ rspamd_strlcpy (value, c, p - c + 1); \ value = g_strstrip (value); \ } while (0) gchar * -rspamd_parse_kv_list (rspamd_mempool_t * pool, +rspamd_parse_kv_list ( gchar * chunk, gint len, struct map_cb_data *data, @@ -1058,6 +1380,7 @@ rspamd_parse_kv_list (rspamd_mempool_t * pool, }; gchar *c, *p, *key = NULL, *value = NULL, *end; + struct rspamd_map *map = data->map; p = chunk; c = p; @@ -1095,8 +1418,9 @@ rspamd_parse_kv_list (rspamd_mempool_t * pool, /* Store a single key */ MAP_STORE_KEY; func (data->cur_data, key, default_value); - msg_debug_pool ("insert key only pair: %s -> %s", + msg_debug_map ("insert key only pair: %s -> %s", key, default_value); + g_free (key); } key = NULL; @@ -1107,8 +1431,9 @@ rspamd_parse_kv_list (rspamd_mempool_t * pool, /* Store a single key */ MAP_STORE_KEY; func (data->cur_data, key, default_value); - msg_debug_pool ("insert key only pair: %s -> %s", + msg_debug_map ("insert key only pair: %s -> %s", key, default_value); + g_free (key); } data->state = map_read_eol; @@ -1176,8 +1501,9 @@ rspamd_parse_kv_list (rspamd_mempool_t * pool, /* Store a single key */ MAP_STORE_KEY; func (data->cur_data, key, default_value); - msg_debug_pool ("insert key only pair: %s -> %s", + msg_debug_map ("insert key only pair: %s -> %s", key, default_value); + g_free (key); key = NULL; } @@ -1188,8 +1514,10 @@ rspamd_parse_kv_list (rspamd_mempool_t * pool, /* Store a single key */ MAP_STORE_KEY; func (data->cur_data, key, default_value); - msg_debug_pool ("insert key only pair: %s -> %s", + + msg_debug_map ("insert key only pair: %s -> %s", key, default_value); + g_free (key); key = NULL; } @@ -1234,15 +1562,18 @@ rspamd_parse_kv_list (rspamd_mempool_t * pool, /* Store a single key */ MAP_STORE_VALUE; func (data->cur_data, key, value); - msg_debug_pool ("insert key value pair: %s -> %s", + msg_debug_map ("insert key value pair: %s -> %s", key, value); + g_free (key); + g_free (value); key = NULL; value = NULL; } else { func (data->cur_data, key, default_value); - msg_debug_pool ("insert key only pair: %s -> %s", + msg_debug_map ("insert key only pair: %s -> %s", key, default_value); + g_free (key); key = NULL; } @@ -1253,15 +1584,18 @@ rspamd_parse_kv_list (rspamd_mempool_t * pool, /* Store a single key */ MAP_STORE_VALUE; func (data->cur_data, key, value); - msg_debug_pool ("insert key value pair: %s -> %s", + msg_debug_map ("insert key value pair: %s -> %s", key, value); + g_free (key); + g_free (value); key = NULL; value = NULL; } else { func (data->cur_data, key, default_value); - msg_debug_pool ("insert key only pair: %s -> %s", + msg_debug_map ("insert key only pair: %s -> %s", key, default_value); + g_free (key); key = NULL; } @@ -1314,8 +1648,9 @@ rspamd_parse_kv_list (rspamd_mempool_t * pool, /* Store a single key */ MAP_STORE_KEY; func (data->cur_data, key, default_value); - msg_debug_pool ("insert key only pair: %s -> %s", + msg_debug_map ("insert key only pair: %s -> %s", key, default_value); + g_free (key); key = NULL; } break; @@ -1325,15 +1660,18 @@ rspamd_parse_kv_list (rspamd_mempool_t * pool, /* Store a single key */ MAP_STORE_VALUE; func (data->cur_data, key, value); - msg_debug_pool ("insert key value pair: %s -> %s", + msg_debug_map ("insert key value pair: %s -> %s", key, value); + g_free (key); + g_free (value); key = NULL; value = NULL; } else { func (data->cur_data, key, default_value); - msg_debug_pool ("insert key only pair: %s -> %s", + msg_debug_map ("insert key only pair: %s -> %s", key, default_value); + g_free (key); key = NULL; } break; @@ -1347,80 +1685,95 @@ rspamd_parse_kv_list (rspamd_mempool_t * pool, * Radix tree helper function */ static void -radix_tree_insert_helper (gpointer st, gconstpointer key, gpointer value) +radix_tree_insert_helper (gpointer st, gconstpointer key, gconstpointer value) { radix_compressed_t *tree = (radix_compressed_t *)st; rspamd_radix_add_iplist ((gchar *)key, " ,;", tree, value); } +static void +hash_insert_helper (gpointer st, gconstpointer key, gconstpointer value) +{ + GHashTable *ht = st; + gpointer k, v; + + k = g_strdup (key); + v = g_strdup (value); + g_hash_table_replace (ht, k, v); +} + /* Helpers */ gchar * -rspamd_hosts_read (rspamd_mempool_t * pool, +rspamd_hosts_read ( gchar * chunk, gint len, struct map_cb_data *data, gboolean final) { if (data->cur_data == NULL) { - data->cur_data = g_hash_table_new (rspamd_strcase_hash, - rspamd_strcase_equal); + data->cur_data = g_hash_table_new_full (rspamd_strcase_hash, + rspamd_strcase_equal, g_free, g_free); } - return rspamd_parse_kv_list (pool, + return rspamd_parse_kv_list ( chunk, len, data, - (insert_func) g_hash_table_insert, + hash_insert_helper, hash_fill, final); } void -rspamd_hosts_fin (rspamd_mempool_t * pool, struct map_cb_data *data) +rspamd_hosts_fin (struct map_cb_data *data) { + struct rspamd_map *map = data->map; + if (data->prev_data) { - g_hash_table_destroy (data->prev_data); + g_hash_table_unref (data->prev_data); } if (data->cur_data) { - msg_info_pool ("read hash of %d elements", g_hash_table_size + msg_info_map ("read hash of %d elements", g_hash_table_size (data->cur_data)); } } gchar * -rspamd_kv_list_read (rspamd_mempool_t * pool, +rspamd_kv_list_read ( gchar * chunk, gint len, struct map_cb_data *data, gboolean final) { if (data->cur_data == NULL) { - data->cur_data = g_hash_table_new (rspamd_strcase_hash, - rspamd_strcase_equal); + data->cur_data = g_hash_table_new_full (rspamd_strcase_hash, + rspamd_strcase_equal, g_free, g_free); } - return rspamd_parse_kv_list (pool, + return rspamd_parse_kv_list ( chunk, len, data, - (insert_func) g_hash_table_insert, + hash_insert_helper, "", final); } void -rspamd_kv_list_fin (rspamd_mempool_t * pool, struct map_cb_data *data) +rspamd_kv_list_fin (struct map_cb_data *data) { + struct rspamd_map *map = data->map; + if (data->prev_data) { - g_hash_table_destroy (data->prev_data); + g_hash_table_unref (data->prev_data); } if (data->cur_data) { - msg_info_pool ("read hash of %d elements", g_hash_table_size + msg_info_map ("read hash of %d elements", g_hash_table_size (data->cur_data)); } } gchar * -rspamd_radix_read (rspamd_mempool_t * pool, +rspamd_radix_read ( gchar * chunk, gint len, struct map_cb_data *data, @@ -1428,30 +1781,33 @@ rspamd_radix_read (rspamd_mempool_t * pool, { radix_compressed_t *tree; rspamd_mempool_t *rpool; + struct rspamd_map *map = data->map; if (data->cur_data == NULL) { tree = radix_create_compressed (); rpool = radix_get_pool (tree); - memcpy (rpool->tag.uid, pool->tag.uid, sizeof (rpool->tag.uid)); + memcpy (rpool->tag.uid, map->tag, sizeof (rpool->tag.uid)); data->cur_data = tree; } - return rspamd_parse_kv_list (pool, + return rspamd_parse_kv_list ( chunk, len, data, - (insert_func) radix_tree_insert_helper, + radix_tree_insert_helper, hash_fill, final); } void -rspamd_radix_fin (rspamd_mempool_t * pool, struct map_cb_data *data) +rspamd_radix_fin (struct map_cb_data *data) { + struct rspamd_map *map = data->map; + if (data->prev_data) { radix_destroy_compressed (data->prev_data); } if (data->cur_data) { - msg_info_pool ("read radix trie of %z elements: %s", + msg_info_map ("read radix trie of %z elements: %s", radix_get_size (data->cur_data), radix_get_info (data->cur_data)); } } @@ -1494,6 +1850,10 @@ rspamd_regexp_map_destroy (struct rspamd_regexp_map *re_map) rspamd_regexp_unref (re); } + for (i = 0; i < re_map->values->len; i ++) { + g_free (g_ptr_array_index (re_map->values, i)); + } + g_ptr_array_free (re_map->regexps, TRUE); g_ptr_array_free (re_map->values, TRUE); @@ -1519,18 +1879,18 @@ rspamd_regexp_map_destroy (struct rspamd_regexp_map *re_map) } static void -rspamd_re_map_insert_helper (gpointer st, gpointer key, gpointer value) +rspamd_re_map_insert_helper (gpointer st, gconstpointer key, gconstpointer value) { struct rspamd_regexp_map *re_map = st; + struct rspamd_map *map; rspamd_regexp_t *re; GError *err = NULL; - rspamd_mempool_t *pool; - pool = re_map->map->pool; + map = re_map->map; re = rspamd_regexp_new (key, NULL, &err); if (re == NULL) { - msg_err_pool ("cannot parse regexp %s: %e", key, err); + msg_err_map ("cannot parse regexp %s: %e", key, err); if (err) { g_error_free (err); @@ -1540,7 +1900,7 @@ rspamd_re_map_insert_helper (gpointer st, gpointer key, gpointer value) } g_ptr_array_add (re_map->regexps, re); - g_ptr_array_add (re_map->values, value); + g_ptr_array_add (re_map->values, g_strdup (value)); } static void @@ -1550,14 +1910,14 @@ rspamd_re_map_finalize (struct rspamd_regexp_map *re_map) guint i; hs_platform_info_t plt; hs_compile_error_t *err; - rspamd_mempool_t *pool; + struct rspamd_map *map; rspamd_regexp_t *re; gint pcre_flags; - pool = re_map->map->pool; + map = re_map->map; if (hs_populate_platform (&plt) != HS_SUCCESS) { - msg_err_pool ("cannot populate hyperscan platform"); + msg_err_map ("cannot populate hyperscan platform"); return; } @@ -1605,7 +1965,7 @@ rspamd_re_map_finalize (struct rspamd_regexp_map *re_map) &re_map->hs_db, &err) != HS_SUCCESS) { - msg_err_pool ("cannot create tree of regexp when processing '%s': %s", + msg_err_map ("cannot create tree of regexp when processing '%s': %s", re_map->patterns[err->expression], err->message); re_map->hs_db = NULL; hs_free_compile_error (err); @@ -1614,7 +1974,7 @@ rspamd_re_map_finalize (struct rspamd_regexp_map *re_map) } if (hs_alloc_scratch (re_map->hs_db, &re_map->hs_scratch) != HS_SUCCESS) { - msg_err_pool ("cannot allocate scratch space for hyperscan"); + msg_err_map ("cannot allocate scratch space for hyperscan"); hs_free_database (re_map->hs_db); re_map->hs_db = NULL; } @@ -1622,7 +1982,7 @@ rspamd_re_map_finalize (struct rspamd_regexp_map *re_map) } gchar * -rspamd_regexp_list_read (rspamd_mempool_t *pool, +rspamd_regexp_list_read ( gchar *chunk, gint len, struct map_cb_data *data, @@ -1635,19 +1995,20 @@ rspamd_regexp_list_read (rspamd_mempool_t *pool, data->cur_data = re_map; } - return rspamd_parse_kv_list (pool, + return rspamd_parse_kv_list ( chunk, len, data, - (insert_func) rspamd_re_map_insert_helper, + rspamd_re_map_insert_helper, hash_fill, final); } void -rspamd_regexp_list_fin (rspamd_mempool_t *pool, struct map_cb_data *data) +rspamd_regexp_list_fin (struct map_cb_data *data) { struct rspamd_regexp_map *re_map; + struct rspamd_map *map = data->map; if (data->prev_data) { rspamd_regexp_map_destroy (data->prev_data); @@ -1655,7 +2016,7 @@ rspamd_regexp_list_fin (rspamd_mempool_t *pool, struct map_cb_data *data) if (data->cur_data) { re_map = data->cur_data; rspamd_re_map_finalize (re_map); - msg_info_pool ("read regexp list of %ud elements", + msg_info_map ("read regexp list of %ud elements", re_map->regexps->len); } } diff --git a/src/libutil/map.h b/src/libutil/map.h index f7cbc3076..3b6439efb 100644 --- a/src/libutil/map.h +++ b/src/libutil/map.h @@ -4,6 +4,7 @@ #include "config.h" #include <event.h> +#include "ucl.h" #include "mem_pool.h" #include "radix.h" #include "dns.h" @@ -18,9 +19,9 @@ struct map_cb_data; /** * Callback types */ -typedef gchar * (*map_cb_t)(rspamd_mempool_t *pool, gchar *chunk, gint len, +typedef gchar * (*map_cb_t)(gchar *chunk, gint len, struct map_cb_data *data, gboolean final); -typedef void (*map_fin_cb_t)(rspamd_mempool_t *pool, struct map_cb_data *data); +typedef void (*map_fin_cb_t)(struct map_cb_data *data); /** * Common map object @@ -56,6 +57,16 @@ struct rspamd_map* rspamd_map_add (struct rspamd_config *cfg, void **user_data); /** + * Add map from ucl + */ +struct rspamd_map* rspamd_map_add_from_ucl (struct rspamd_config *cfg, + const ucl_object_t *obj, + const gchar *description, + map_cb_t read_callback, + map_fin_cb_t fin_callback, + void **user_data); + +/** * Start watching of maps by adding events to libevent event loop */ void rspamd_map_watch (struct rspamd_config *cfg, @@ -77,50 +88,50 @@ typedef void (*insert_func) (gpointer st, gconstpointer key, /** * Radix list is a list like ip/mask */ -gchar * rspamd_radix_read (rspamd_mempool_t *pool, +gchar * rspamd_radix_read ( gchar *chunk, gint len, struct map_cb_data *data, gboolean final); -void rspamd_radix_fin (rspamd_mempool_t *pool, struct map_cb_data *data); +void rspamd_radix_fin (struct map_cb_data *data); /** * Host list is an ordinal list of hosts or domains */ -gchar * rspamd_hosts_read (rspamd_mempool_t *pool, +gchar * rspamd_hosts_read ( gchar *chunk, gint len, struct map_cb_data *data, gboolean final); -void rspamd_hosts_fin (rspamd_mempool_t *pool, struct map_cb_data *data); +void rspamd_hosts_fin (struct map_cb_data *data); /** * Kv list is an ordinal list of keys and values separated by whitespace */ -gchar * rspamd_kv_list_read (rspamd_mempool_t *pool, +gchar * rspamd_kv_list_read ( gchar *chunk, gint len, struct map_cb_data *data, gboolean final); -void rspamd_kv_list_fin (rspamd_mempool_t *pool, struct map_cb_data *data); +void rspamd_kv_list_fin (struct map_cb_data *data); /** * Regexp list is a list of regular expressions */ struct rspamd_regexp_map; -gchar * rspamd_regexp_list_read (rspamd_mempool_t *pool, +gchar * rspamd_regexp_list_read ( gchar *chunk, gint len, struct map_cb_data *data, gboolean final); -void rspamd_regexp_list_fin (rspamd_mempool_t *pool, struct map_cb_data *data); +void rspamd_regexp_list_fin (struct map_cb_data *data); /** * FSM for lists parsing (support comments, blank lines and partial replies) */ gchar * -rspamd_parse_kv_list (rspamd_mempool_t * pool, +rspamd_parse_kv_list ( gchar * chunk, gint len, struct map_cb_data *data, diff --git a/src/libutil/map_private.h b/src/libutil/map_private.h index c26517574..9bdca5f90 100644 --- a/src/libutil/map_private.h +++ b/src/libutil/map_private.h @@ -24,30 +24,57 @@ typedef void (*rspamd_map_dtor) (gpointer p); +#define msg_err_map(...) rspamd_default_log_function (G_LOG_LEVEL_CRITICAL, \ + "map", map->tag, \ + G_STRFUNC, \ + __VA_ARGS__) +#define msg_warn_map(...) rspamd_default_log_function (G_LOG_LEVEL_WARNING, \ + "map", map->tag, \ + G_STRFUNC, \ + __VA_ARGS__) +#define msg_info_map(...) rspamd_default_log_function (G_LOG_LEVEL_INFO, \ + "map", map->tag, \ + G_STRFUNC, \ + __VA_ARGS__) +#define msg_debug_map(...) rspamd_default_log_function (G_LOG_LEVEL_DEBUG, \ + "map", map->tag, \ + G_STRFUNC, \ + __VA_ARGS__) + enum fetch_proto { MAP_PROTO_FILE, MAP_PROTO_HTTP, }; -struct rspamd_map { - rspamd_mempool_t *pool; - struct rspamd_dns_resolver *r; + +struct rspamd_map_backend { + enum fetch_proto protocol; gboolean is_signed; struct rspamd_cryptobox_pubkey *trusted_pubkey; + union { + struct file_map_data *fd; + struct http_map_data *hd; + } data; + gchar *uri; + ref_entry_t ref; +}; + +struct rspamd_map { + struct rspamd_dns_resolver *r; struct rspamd_config *cfg; - enum fetch_proto protocol; + GPtrArray *backends; map_cb_t read_callback; map_fin_cb_t fin_callback; void **user_data; - struct event ev; - struct timeval tv; struct event_base *ev_base; - void *map_data; - gchar *uri; gchar *description; + gchar *name; guint32 id; - guint32 checksum; + struct event ev; + struct timeval tv; + gdouble poll_timeout; /* Shared lock for temporary disabling of map reading (e.g. when this map is written by UI) */ gint *locked; + gchar tag[MEMPOOL_UID_LEN]; rspamd_map_dtor dtor; gpointer dtor_data; }; @@ -56,7 +83,7 @@ struct rspamd_map { * Data specific to file maps */ struct file_map_data { - const gchar *filename; + gchar *filename; struct stat st; }; @@ -64,12 +91,12 @@ struct file_map_data { * Data specific to HTTP maps */ struct http_map_data { - struct addrinfo *addr; - guint16 port; gchar *path; gchar *host; + gchar *last_signature; time_t last_checked; gboolean request_sent; + guint16 port; }; enum rspamd_map_http_stage { @@ -80,20 +107,31 @@ enum rspamd_map_http_stage { map_load_signature }; +struct map_periodic_cbdata { + struct rspamd_map *map; + struct map_cb_data cbdata; + gboolean need_modify; + gboolean errored; + guint cur_backend; + ref_entry_t ref; +}; + struct http_callback_data { struct event_base *ev_base; struct rspamd_http_connection *conn; rspamd_inet_addr_t *addr; - struct timeval tv; struct rspamd_map *map; + struct rspamd_map_backend *bk; struct http_map_data *data; - struct map_cb_data cbdata; + struct map_periodic_cbdata *periodic; struct rspamd_cryptobox_pubkey *pk; + gboolean check; gchar *tmpfile; enum rspamd_map_http_stage stage; gint out_fd; gint fd; + struct timeval tv; ref_entry_t ref; }; diff --git a/src/libutil/shingles.c b/src/libutil/shingles.c index c2acae6d3..66f6b457c 100644 --- a/src/libutil/shingles.c +++ b/src/libutil/shingles.c @@ -19,15 +19,16 @@ #define SHINGLES_WINDOW 3 -struct rspamd_shingle* +struct rspamd_shingle* RSPAMD_OPTIMIZE("unroll-loops") rspamd_shingles_generate (GArray *input, const guchar key[16], rspamd_mempool_t *pool, rspamd_shingles_filter filter, - gpointer filterd) + gpointer filterd, + enum rspamd_shingle_alg alg) { struct rspamd_shingle *res; - GArray *hashes[RSPAMD_SHINGLE_SIZE]; + guint64 **hashes; rspamd_sipkey_t keys[RSPAMD_SHINGLE_SIZE]; guchar shabuf[rspamd_cryptobox_HASHBYTES], *out_key; const guchar *cur_key; @@ -35,7 +36,9 @@ rspamd_shingles_generate (GArray *input, rspamd_ftok_t *word; rspamd_cryptobox_hash_state_t bs; guint64 val; - gint i, j, beg = 0; + gint i, j, k; + gsize hlen, beg = 0; + enum rspamd_cryptobox_fast_hash_type ht; if (pool != NULL) { res = rspamd_mempool_alloc (pool, sizeof (*res)); @@ -50,9 +53,11 @@ rspamd_shingles_generate (GArray *input, out_key = (guchar *)&keys[0]; /* Init hashes pipes and keys */ + hashes = g_slice_alloc (sizeof (*hashes) * RSPAMD_SHINGLE_SIZE); + hlen = input->len > SHINGLES_WINDOW ? (input->len - SHINGLES_WINDOW + 1) : 1; + for (i = 0; i < RSPAMD_SHINGLE_SIZE; i ++) { - hashes[i] = g_array_sized_new (FALSE, FALSE, sizeof (guint64), - input->len + SHINGLES_WINDOW); + hashes[i] = g_slice_alloc (hlen * sizeof (guint64)); /* * To generate a set of hashes we just apply sha256 to the * initial key as many times as many hashes are required and @@ -71,32 +76,83 @@ rspamd_shingles_generate (GArray *input, } /* Now parse input words into a vector of hashes using rolling window */ - for (i = 0; i <= (gint)input->len; i ++) { - if (i - beg >= SHINGLES_WINDOW || i == (gint)input->len) { - for (j = beg; j < i; j ++) { - word = &g_array_index (input, rspamd_ftok_t, j); - row = rspamd_fstring_append (row, word->begin, word->len); + if (alg == RSPAMD_SHINGLES_OLD) { + for (i = 0; i <= (gint)input->len; i ++) { + if (i - beg >= SHINGLES_WINDOW || i == (gint)input->len) { + for (j = beg; j < i; j ++) { + word = &g_array_index (input, rspamd_ftok_t, j); + row = rspamd_fstring_append (row, word->begin, word->len); + } + + /* Now we need to create a new row here */ + for (j = 0; j < RSPAMD_SHINGLE_SIZE; j ++) { + rspamd_cryptobox_siphash ((guchar *)&val, row->str, row->len, + keys[j]); + g_assert (hlen > beg); + hashes[j][beg] = val; + } + + beg++; + + row = rspamd_fstring_assign (row, "", 0); } - beg++; + } + } + else { + guint64 res[SHINGLES_WINDOW * RSPAMD_SHINGLE_SIZE]; + + switch (alg) { + case RSPAMD_SHINGLES_XXHASH: + ht = RSPAMD_CRYPTOBOX_XXHASH64; + break; + case RSPAMD_SHINGLES_MUMHASH: + ht = RSPAMD_CRYPTOBOX_MUMHASH; + break; + default: + ht = RSPAMD_CRYPTOBOX_HASHFAST_INDEPENDENT; + break; + } - /* Now we need to create a new row here */ - for (j = 0; j < RSPAMD_SHINGLE_SIZE; j ++) { - rspamd_cryptobox_siphash ((guchar *)&val, row->str, row->len, - keys[j]); - g_array_append_val (hashes[j], val); + memset (res, 0, sizeof (res)); + + for (i = 0; i <= (gint)input->len; i ++) { + if (i - beg >= SHINGLES_WINDOW || i == (gint)input->len) { + + for (j = 0; j < RSPAMD_SHINGLE_SIZE; j ++) { + /* Shift hashes window to right */ + for (k = 0; k < SHINGLES_WINDOW - 1; k ++) { + res[j * SHINGLES_WINDOW + k] = + res[j * SHINGLES_WINDOW + k + 1]; + } + + word = &g_array_index (input, rspamd_ftok_t, beg); + /* Insert the last element to the pipe */ + res[j * SHINGLES_WINDOW + SHINGLES_WINDOW - 1] = + rspamd_cryptobox_fast_hash_specific (ht, + word->begin, word->len, + *(guint64 *)keys[j]); + val = 0; + for (k = 0; k < SHINGLES_WINDOW; k ++) { + val ^= res[j * SHINGLES_WINDOW + k] >> (8 * (SHINGLES_WINDOW - k - 1)); + } + + g_assert (hlen > beg); + hashes[j][beg] = val; + } + beg++; } - - row = rspamd_fstring_assign (row, "", 0); } } /* Now we need to filter all hashes and make a shingles result */ for (i = 0; i < RSPAMD_SHINGLE_SIZE; i ++) { - res->hashes[i] = filter ((guint64 *)hashes[i]->data, hashes[i]->len, + res->hashes[i] = filter (hashes[i], hlen, i, key, filterd); - g_array_free (hashes[i], TRUE); + g_slice_free1 (hlen * sizeof (guint64), hashes[i]); } + g_slice_free1 (sizeof (*hashes) * RSPAMD_SHINGLE_SIZE, hashes); + rspamd_fstring_free (row); return res; diff --git a/src/libutil/shingles.h b/src/libutil/shingles.h index d252a78f6..fd7d3bfc7 100644 --- a/src/libutil/shingles.h +++ b/src/libutil/shingles.h @@ -25,6 +25,13 @@ struct rspamd_shingle { guint64 hashes[RSPAMD_SHINGLE_SIZE]; }; +enum rspamd_shingle_alg { + RSPAMD_SHINGLES_OLD = 0, + RSPAMD_SHINGLES_XXHASH, + RSPAMD_SHINGLES_MUMHASH, + RSPAMD_SHINGLES_FAST +}; + /** * Shingles filtering function * @param input input array of hashes @@ -48,7 +55,8 @@ struct rspamd_shingle* rspamd_shingles_generate (GArray *input, const guchar key[16], rspamd_mempool_t *pool, rspamd_shingles_filter filter, - gpointer filterd); + gpointer filterd, + enum rspamd_shingle_alg alg); /** * Compares two shingles and return result as a floating point value - 1.0 diff --git a/src/libutil/str_util.c b/src/libutil/str_util.c index 7d40b15fa..a25dc32d5 100644 --- a/src/libutil/str_util.c +++ b/src/libutil/str_util.c @@ -15,7 +15,7 @@ */ #include "config.h" #include "util.h" -#include "xxhash.h" +#include "cryptobox.h" #include "url.h" #include <math.h> @@ -182,20 +182,6 @@ rspamd_strcase_equal (gconstpointer v, gconstpointer v2) return FALSE; } -#if defined(__LP64__) || defined(_LP64) -#define XXH_STATE XXH64_state_t -#define XXH_RESET XXH64_reset -#define XXH_UPDATE XXH64_update -#define XXH_DIGEST XXH64_digest -#define XXH_ONESHOT XXH64 -#else -#define XXH_STATE XXH32_state_t -#define XXH_RESET XXH32_reset -#define XXH_UPDATE XXH32_update -#define XXH_DIGEST XXH32_digest -#define XXH_ONESHOT XXH32 -#endif - static guint rspamd_icase_hash (const gchar *in, gsize len) { @@ -208,10 +194,10 @@ rspamd_icase_hash (const gchar *in, gsize len) } c; guint32 pp; } u; - XXH_STATE st; + rspamd_cryptobox_fast_hash_state_t st; fp = len - leftover; - XXH_RESET (&st, rspamd_hash_seed ()); + rspamd_cryptobox_fast_hash_init (&st, rspamd_hash_seed ()); for (i = 0; i != fp; i += 4) { u.c.c1 = s[i], u.c.c2 = s[i + 1], u.c.c3 = s[i + 2], u.c.c4 = s[i + 3]; @@ -219,7 +205,7 @@ rspamd_icase_hash (const gchar *in, gsize len) u.c.c2 = lc_map[u.c.c2]; u.c.c3 = lc_map[u.c.c3]; u.c.c4 = lc_map[u.c.c4]; - XXH_UPDATE (&st, &u.pp, sizeof (u)); + rspamd_cryptobox_fast_hash_update (&st, &u.pp, sizeof (u)); } u.pp = 0; @@ -230,11 +216,11 @@ rspamd_icase_hash (const gchar *in, gsize len) u.c.c2 = lc_map[(guchar)s[i++]]; case 1: u.c.c1 = lc_map[(guchar)s[i]]; - XXH_UPDATE (&st, &u.pp, leftover); + rspamd_cryptobox_fast_hash_update (&st, &u.pp, leftover); break; } - return XXH_DIGEST (&st); + return rspamd_cryptobox_fast_hash_final (&st); } guint @@ -255,7 +241,7 @@ rspamd_str_hash (gconstpointer key) len = strlen ((const gchar *)key); - return XXH_ONESHOT (key, len, rspamd_hash_seed ()); + return rspamd_cryptobox_fast_hash (key, len, rspamd_hash_seed ()); } gboolean @@ -1814,17 +1800,17 @@ guint rspamd_url_hash (gconstpointer u) { const struct rspamd_url *url = u; - XXH_STATE st; + rspamd_cryptobox_fast_hash_state_t st; - XXH_RESET (&st, rspamd_hash_seed ()); + rspamd_cryptobox_fast_hash_init (&st, rspamd_hash_seed ()); if (url->urllen > 0) { - XXH_UPDATE (&st, url->string, url->urllen); + rspamd_cryptobox_fast_hash_update (&st, url->string, url->urllen); } - XXH_UPDATE (&st, &url->flags, sizeof (url->flags)); + rspamd_cryptobox_fast_hash_update (&st, &url->flags, sizeof (url->flags)); - return XXH_DIGEST (&st); + return rspamd_cryptobox_fast_hash_final (&st); } /* Compare two emails for building emails tree */ diff --git a/src/libutil/upstream.c b/src/libutil/upstream.c index 020039d71..fe07e89d2 100644 --- a/src/libutil/upstream.c +++ b/src/libutil/upstream.c @@ -19,7 +19,7 @@ #include "ref.h" #include "cfg_file.h" #include "rdns.h" -#include "xxhash.h" +#include "cryptobox.h" #include "utlist.h" struct upstream_inet_addr_entry { @@ -785,7 +785,8 @@ rspamd_upstream_get_hashed (struct upstream_list *ups, const guint8 *key, guint guint32 idx; /* Generate 64 bits input key */ - k = XXH64 (key, keylen, ups->hash_seed); + k = rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_XXHASH64, + key, keylen, ups->hash_seed); rspamd_mutex_lock (ups->lock); idx = rspamd_consistent_hash (k, ups->alive->len); diff --git a/src/libutil/util.c b/src/libutil/util.c index 4fc3eb613..4ce90ba06 100644 --- a/src/libutil/util.c +++ b/src/libutil/util.c @@ -1981,12 +1981,13 @@ rspamd_config_libs (struct rspamd_external_libs_ctx *ctx, if (ctx != NULL) { if (cfg->local_addrs) { - if (!rspamd_map_is_map (cfg->local_addrs)) { - radix_add_generic_iplist (cfg->local_addrs, + if (ucl_object_type (cfg->local_addrs) == UCL_STRING && + !rspamd_map_is_map (ucl_object_tostring (cfg->local_addrs))) { + radix_add_generic_iplist (ucl_object_tostring (cfg->local_addrs), (radix_compressed_t **)ctx->local_addrs); } else { - rspamd_map_add (cfg, cfg->local_addrs, + rspamd_map_add_from_ucl (cfg, cfg->local_addrs, "Local addresses", rspamd_radix_read, rspamd_radix_fin, (void **) ctx->local_addrs); } diff --git a/src/lua/lua_common.c b/src/lua/lua_common.c index 63aab5830..4563d9f84 100644 --- a/src/lua/lua_common.c +++ b/src/lua/lua_common.c @@ -677,6 +677,20 @@ rspamd_lua_parse_table_arguments (lua_State *L, gint pos, lua_pop (L, 1); } break; + case 'O': + if (t != LUA_TNONE) { + *(va_arg (ap, ucl_object_t **)) = ucl_object_lua_import (L, + idx); + } + else { + failed = TRUE; + *(va_arg (ap, ucl_object_t **)) = NULL; + } + + if (is_table) { + lua_pop (L, 1); + } + break; case 'U': if (t == LUA_TNIL || t == LUA_TNONE) { failed = TRUE; diff --git a/src/lua/lua_logger.c b/src/lua/lua_logger.c index 462838ba1..a01cb8436 100644 --- a/src/lua/lua_logger.c +++ b/src/lua/lua_logger.c @@ -509,7 +509,7 @@ lua_logger_logx (lua_State *L, GLogLevelFlags level, gboolean is_string) if (map) { if (map->map) { - uid = map->map->pool->tag.uid; + uid = map->map->tag; } else { uid = "embedded"; diff --git a/src/lua/lua_map.c b/src/lua/lua_map.c index c01088343..a74ee205c 100644 --- a/src/lua/lua_map.c +++ b/src/lua/lua_map.c @@ -271,7 +271,7 @@ lua_config_add_kv_map (lua_State *L) static gchar * -lua_map_read (rspamd_mempool_t *pool, gchar *chunk, gint len, +lua_map_read (gchar *chunk, gint len, struct map_cb_data *data, gboolean final) { @@ -301,10 +301,13 @@ lua_map_read (rspamd_mempool_t *pool, gchar *chunk, gint len, } static void -lua_map_fin (rspamd_mempool_t * pool, struct map_cb_data *data) +lua_map_fin (struct map_cb_data *data) { struct lua_map_callback_data *cbdata; struct rspamd_lua_map **pmap; + struct rspamd_map *map; + + map = data->map; if (data->prev_data) { data->prev_data = NULL; @@ -314,12 +317,12 @@ lua_map_fin (rspamd_mempool_t * pool, struct map_cb_data *data) cbdata = (struct lua_map_callback_data *)data->cur_data; } else { - msg_err_pool ("no data read for map"); + msg_err_map ("no data read for map"); return; } if (cbdata->ref == -1) { - msg_err_pool ("map has no callback set"); + msg_err_map ("map has no callback set"); } else if (cbdata->data != NULL && cbdata->data->len != 0) { lua_rawgeti (cbdata->L, LUA_REGISTRYINDEX, cbdata->ref); @@ -329,7 +332,7 @@ lua_map_fin (rspamd_mempool_t * pool, struct map_cb_data *data) rspamd_lua_setclass (cbdata->L, "rspamd{map}", -1); if (lua_pcall (cbdata->L, 2, 0, 0) != 0) { - msg_info_pool ("call to %s failed: %s", "local function", + msg_info_map ("call to %s failed: %s", "local function", lua_tostring (cbdata->L, -1)); lua_pop (cbdata->L, 1); } @@ -342,8 +345,9 @@ gint lua_config_add_map (lua_State *L) { struct rspamd_config *cfg = lua_check_config (L, 1); - const gchar *map_line = NULL, *description = NULL; + const char *description = NULL; const gchar *type = NULL; + ucl_object_t *map_obj = NULL; struct lua_map_callback_data *cbdata; struct rspamd_lua_map *map, **pmap; struct rspamd_map *m; @@ -351,132 +355,22 @@ lua_config_add_map (lua_State *L) GError *err = NULL; if (cfg) { - if (lua_type (L, 2) == LUA_TTABLE) { - if (!rspamd_lua_parse_table_arguments (L, 2, &err, - "*type=S;description=S;callback=F;*url=S", - &type, &description, &cbidx, &map_line)) { - ret = luaL_error (L, "invalid table arguments: %s", err->message); - g_error_free (err); - - return ret; - } - - g_assert (type != NULL && map_line != NULL); - - if (strcmp (type, "callback") == 0) { - if (cbidx == -1) { - ret = luaL_error (L, "invalid table arguments: callback missing"); - return ret; - } - - map = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (*map)); - map->type = RSPAMD_LUA_MAP_CALLBACK; - map->data.cbdata = rspamd_mempool_alloc0 (cfg->cfg_pool, - sizeof (*map->data.cbdata)); - cbdata = map->data.cbdata; - cbdata->L = L; - cbdata->data = NULL; - cbdata->lua_map = map; - cbdata->ref = cbidx; - - if ((m = rspamd_map_add (cfg, map_line, description, - lua_map_read, lua_map_fin, - (void **)&map->data.cbdata)) == NULL) { - msg_warn_config ("invalid map %s", map_line); - luaL_unref (L, LUA_REGISTRYINDEX, cbidx); - lua_pushnil (L); + if (!rspamd_lua_parse_table_arguments (L, 2, &err, + "*url=O;description=S;callback=F;type=S", + &map_obj, &description, &cbidx, &type)) { + ret = luaL_error (L, "invalid table arguments: %s", err->message); + g_error_free (err); - return 1; - } - } - else if (strcmp (type, "set") == 0) { - map = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (*map)); - map->data.hash = g_hash_table_new (rspamd_strcase_hash, - rspamd_strcase_equal); - map->type = RSPAMD_LUA_MAP_SET; - - if ((m = rspamd_map_add (cfg, map_line, description, - rspamd_hosts_read, - rspamd_hosts_fin, - (void **)&map->data.hash)) == NULL) { - msg_warn_config ("invalid set map %s", map_line); - g_hash_table_destroy (map->data.hash); - lua_pushnil (L); - - return 1; - } - } - else if (strcmp (type, "map") == 0) { - map = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (*map)); - map->data.hash = g_hash_table_new (rspamd_strcase_hash, - rspamd_strcase_equal); - map->type = RSPAMD_LUA_MAP_HASH; - - if ((m = rspamd_map_add (cfg, map_line, description, - rspamd_kv_list_read, - rspamd_kv_list_fin, - (void **)&map->data.hash)) == NULL) { - msg_warn_config ("invalid hash map %s", map_line); - g_hash_table_destroy (map->data.hash); - lua_pushnil (L); - return 1; - } - } - else if (strcmp (type, "radix") == 0) { - map = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (*map)); - map->data.radix = radix_create_compressed (); - map->type = RSPAMD_LUA_MAP_RADIX; - - if ((m = rspamd_map_add (cfg, map_line, description, - rspamd_radix_read, - rspamd_radix_fin, - (void **)&map->data.radix)) == NULL) { - msg_warn_config ("invalid radix map %s", map_line); - radix_destroy_compressed (map->data.radix); - lua_pushnil (L); - return 1; - } - } - else if (strcmp (type, "regexp") == 0) { - map = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (*map)); - map->data.re_map = NULL; - map->type = RSPAMD_LUA_MAP_REGEXP; - - if ((m = rspamd_map_add (cfg, map_line, description, - rspamd_regexp_list_read, - rspamd_regexp_list_fin, - (void **)&map->data.re_map)) == NULL) { - msg_warn_config ("invalid regexp map %s", map_line); - lua_pushnil (L); - return 1; - } - } - else { - ret = luaL_error (L, "invalid arguments: unknown type '%s'", type); + return ret; + } - return ret; - } + g_assert (map_obj != NULL); - map->map = m; - pmap = lua_newuserdata (L, sizeof (void *)); - *pmap = map; - rspamd_lua_setclass (L, "rspamd{map}", -1); + if (type == NULL) { + type = "callback"; } - else { - /* - * Legacy format add_map(map_line, description, callback) - */ - map_line = luaL_checkstring (L, 2); - - if (lua_gettop (L) == 4) { - description = lua_tostring (L, 3); - cbidx = 4; - } - else { - description = NULL; - cbidx = 3; - } + if (strcmp (type, "callback") == 0) { map = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (*map)); map->type = RSPAMD_LUA_MAP_CALLBACK; map->data.cbdata = rspamd_mempool_alloc0 (cfg->cfg_pool, @@ -485,38 +379,104 @@ lua_config_add_map (lua_State *L) cbdata->L = L; cbdata->data = NULL; cbdata->lua_map = map; + cbdata->ref = cbidx; + + if ((m = rspamd_map_add_from_ucl (cfg, map_obj, description, + lua_map_read, lua_map_fin, + (void **)&map->data.cbdata)) == NULL) { + + if (cbidx != -1) { + luaL_unref (L, LUA_REGISTRYINDEX, cbidx); + } + + lua_pushnil (L); - if (lua_type (L, cbidx) == LUA_TFUNCTION) { - lua_pushvalue (L, cbidx); - /* Get a reference */ - cbdata->ref = luaL_ref (L, LUA_REGISTRYINDEX); + return 1; } - else { - /* - * Now we can create maps with delayed callbacks, to allow better - * closures generation - */ - cbdata->ref = -1; + } + else if (strcmp (type, "set") == 0) { + map = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (*map)); + map->data.hash = g_hash_table_new (rspamd_strcase_hash, + rspamd_strcase_equal); + map->type = RSPAMD_LUA_MAP_SET; + + if ((m = rspamd_map_add_from_ucl (cfg, map_obj, description, + rspamd_hosts_read, + rspamd_hosts_fin, + (void **)&map->data.hash)) == NULL) { + g_hash_table_destroy (map->data.hash); + lua_pushnil (L); + ucl_object_unref (map_obj); + + return 1; } + } + else if (strcmp (type, "map") == 0) { + map = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (*map)); + map->data.hash = g_hash_table_new (rspamd_strcase_hash, + rspamd_strcase_equal); + map->type = RSPAMD_LUA_MAP_HASH; + + if ((m = rspamd_map_add_from_ucl (cfg, map_obj, description, + rspamd_kv_list_read, + rspamd_kv_list_fin, + (void **)&map->data.hash)) == NULL) { + g_hash_table_destroy (map->data.hash); + lua_pushnil (L); + ucl_object_unref (map_obj); - if ((m = rspamd_map_add (cfg, map_line, description, - lua_map_read, lua_map_fin, - (void **)&map->data.cbdata)) == NULL) { - msg_warn_config ("invalid map %s", map_line); + return 1; + } + } + else if (strcmp (type, "radix") == 0) { + map = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (*map)); + map->data.radix = radix_create_compressed (); + map->type = RSPAMD_LUA_MAP_RADIX; + + if ((m = rspamd_map_add_from_ucl (cfg, map_obj, description, + rspamd_radix_read, + rspamd_radix_fin, + (void **)&map->data.radix)) == NULL) { + radix_destroy_compressed (map->data.radix); lua_pushnil (L); + ucl_object_unref (map_obj); + + return 1; } - else { - map->map = m; - pmap = lua_newuserdata (L, sizeof (void *)); - *pmap = map; - rspamd_lua_setclass (L, "rspamd{map}", -1); + } + else if (strcmp (type, "regexp") == 0) { + map = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (*map)); + map->data.re_map = NULL; + map->type = RSPAMD_LUA_MAP_REGEXP; + + if ((m = rspamd_map_add_from_ucl (cfg, map_obj, description, + rspamd_regexp_list_read, + rspamd_regexp_list_fin, + (void **)&map->data.re_map)) == NULL) { + lua_pushnil (L); + ucl_object_unref (map_obj); + + return 1; } } + else { + ret = luaL_error (L, "invalid arguments: unknown type '%s'", type); + ucl_object_unref (map_obj); + + return ret; + } + + map->map = m; + pmap = lua_newuserdata (L, sizeof (void *)); + *pmap = map; + rspamd_lua_setclass (L, "rspamd{map}", -1); } else { return luaL_error (L, "invalid arguments"); } + ucl_object_unref (map_obj); + return 1; } @@ -614,11 +574,17 @@ lua_map_is_signed (lua_State *L) { struct rspamd_lua_map *map = lua_check_map (L, 1); gboolean ret = FALSE; + struct rspamd_map_backend *bk; + guint i; if (map != NULL) { if (map->map) { - if (map->map->is_signed) { - ret = TRUE; + for (i = 0; i < map->map->backends->len; i ++) { + bk = g_ptr_array_index (map->map->backends, i); + if (bk->is_signed) { + ret = TRUE; + break; + } } } } @@ -635,19 +601,28 @@ lua_map_get_proto (lua_State *L) { struct rspamd_lua_map *map = lua_check_map (L, 1); const gchar *ret = "undefined"; + struct rspamd_map_backend *bk; + guint i; if (map != NULL) { if ((map->flags & RSPAMD_LUA_MAP_FLAG_EMBEDDED) || map->map == NULL) { ret = "embedded"; + lua_pushstring (L, ret); + + return 1; } else { - switch (map->map->protocol) { - case MAP_PROTO_FILE: - ret = "file"; - break; - case MAP_PROTO_HTTP: - ret = "http"; - break; + for (i = 0; i < map->map->backends->len; i ++) { + bk = g_ptr_array_index (map->map->backends, i); + switch (bk->protocol) { + case MAP_PROTO_FILE: + ret = "file"; + break; + case MAP_PROTO_HTTP: + ret = "http"; + break; + } + lua_pushstring (L, ret); } } } @@ -655,14 +630,16 @@ lua_map_get_proto (lua_State *L) return luaL_error (L, "invalid arguments"); } - lua_pushstring (L, ret); - return 1; + + return map->map->backends->len; } static int lua_map_get_sign_key (lua_State *L) { struct rspamd_lua_map *map = lua_check_map (L, 1); + struct rspamd_map_backend *bk; + guint i; GString *ret = NULL; if (map != NULL) { @@ -671,33 +648,42 @@ lua_map_get_sign_key (lua_State *L) return 1; } - if (map->map && map->map->trusted_pubkey) { - ret = rspamd_pubkey_print (map->map->trusted_pubkey, - RSPAMD_KEYPAIR_PUBKEY|RSPAMD_KEYPAIR_BASE32); + for (i = 0; i < map->map->backends->len; i ++) { + bk = g_ptr_array_index (map->map->backends, i); + + if (bk->trusted_pubkey) { + ret = rspamd_pubkey_print (bk->trusted_pubkey, + RSPAMD_KEYPAIR_PUBKEY|RSPAMD_KEYPAIR_BASE32); + } + else { + ret = NULL; + } + + if (ret) { + lua_pushlstring (L, ret->str, ret->len); + g_string_free (ret, TRUE); + } + else { + lua_pushnil (L); + } } } else { return luaL_error (L, "invalid arguments"); } - if (ret) { - lua_pushlstring (L, ret->str, ret->len); - g_string_free (ret, TRUE); - } - else { - lua_pushnil (L); - } - - return 1; + return map->map->backends->len; } static int lua_map_set_sign_key (lua_State *L) { struct rspamd_lua_map *map = lua_check_map (L, 1); + struct rspamd_map_backend *bk; const gchar *pk_str; struct rspamd_cryptobox_pubkey *pk; gsize len; + guint i; pk_str = lua_tolstring (L, 2, &len); @@ -714,12 +700,17 @@ lua_map_set_sign_key (lua_State *L) return luaL_error (L, "invalid pubkey string"); } - if (map->map->trusted_pubkey) { - /* Unref old pk */ - rspamd_pubkey_unref (map->map->trusted_pubkey); + for (i = 0; i < map->map->backends->len; i ++) { + bk = g_ptr_array_index (map->map->backends, i); + if (bk->trusted_pubkey) { + /* Unref old pk */ + rspamd_pubkey_unref (bk->trusted_pubkey); + } + + bk->trusted_pubkey = rspamd_pubkey_ref (pk); } - map->map->trusted_pubkey = pk; + rspamd_pubkey_unref (pk); } else { return luaL_error (L, "invalid arguments"); @@ -753,21 +744,29 @@ lua_map_get_uri (lua_State *L) { struct rspamd_lua_map *map = lua_check_map (L, 1); const gchar *ret = "undefined"; + struct rspamd_map_backend *bk; + guint i; if (map != NULL) { if ((map->flags & RSPAMD_LUA_MAP_FLAG_EMBEDDED) || map->map == NULL) { ret = "embedded"; + lua_pushstring (L, ret); + + return 1; } else { - ret = map->map->uri; + for (i = 0; i < map->map->backends->len; i ++) { + bk = g_ptr_array_index (map->map->backends, i); + ret = bk->uri; + lua_pushstring (L, ret); + } } } else { return luaL_error (L, "invalid arguments"); } - lua_pushstring (L, ret); - return 1; + return map->map->backends->len; } void diff --git a/src/lua/lua_task.c b/src/lua/lua_task.c index 31b0aac76..7a756679b 100644 --- a/src/lua/lua_task.c +++ b/src/lua/lua_task.c @@ -23,7 +23,7 @@ #include "cfg_file.h" #include "email_addr.h" #include "utlist.h" -#include "xxhash.h" +#include "cryptobox.h" /*** * @module rspamd_task @@ -1568,7 +1568,8 @@ lua_task_str_to_get_type (lua_State *L, gint pos) type = lua_tolstring (L, pos, &sz); if (type && sz > 0) { - h = XXH64 (type, sz, 0xdeadbabe); + h = rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_XXHASH64, + type, sz, 0xdeadbabe); switch (h) { case 0xDA081341FB600389ULL: /* mime */ diff --git a/src/plugins/dkim_check.c b/src/plugins/dkim_check.c index 9cf89674e..3ea31485f 100644 --- a/src/plugins/dkim_check.c +++ b/src/plugins/dkim_check.c @@ -271,13 +271,14 @@ dkim_module_config (struct rspamd_config *cfg) } if ((value = rspamd_config_get_module_opt (cfg, "dkim", "whitelist")) != NULL) { + str = ucl_obj_tostring (value); - if (!rspamd_map_is_map (str)) { + if (str && !rspamd_map_is_map (str)) { radix_add_generic_iplist (str, &dkim_module_ctx->whitelist_ip); } else { - rspamd_map_add (cfg, str, + rspamd_map_add_from_ucl (cfg, value, "DKIM whitelist", rspamd_radix_read, rspamd_radix_fin, (void **)&dkim_module_ctx->whitelist_ip); @@ -285,7 +286,7 @@ dkim_module_config (struct rspamd_config *cfg) } if ((value = rspamd_config_get_module_opt (cfg, "dkim", "domains")) != NULL) { - if (!rspamd_map_add (cfg, ucl_obj_tostring (value), + if (!rspamd_map_add_from_ucl (cfg, value, "DKIM domains", rspamd_kv_list_read, rspamd_kv_list_fin, (void **)&dkim_module_ctx->dkim_domains)) { msg_warn_config ("cannot load dkim domains list from %s", @@ -297,7 +298,7 @@ dkim_module_config (struct rspamd_config *cfg) } if (!got_trusted && (value = rspamd_config_get_module_opt (cfg, "dkim", "trusted_domains")) != NULL) { - if (!rspamd_map_add (cfg, ucl_obj_tostring (value), + if (!rspamd_map_add_from_ucl (cfg, value, "DKIM domains", rspamd_kv_list_read, rspamd_kv_list_fin, (void **)&dkim_module_ctx->dkim_domains)) { msg_warn_config ("cannot load dkim domains list from %s", diff --git a/src/plugins/fuzzy_check.c b/src/plugins/fuzzy_check.c index 385b8aadc..bf4cd3a0a 100644 --- a/src/plugins/fuzzy_check.c +++ b/src/plugins/fuzzy_check.c @@ -69,6 +69,8 @@ struct fuzzy_mime_type { struct fuzzy_rule { struct upstream_list *servers; const gchar *symbol; + const gchar *algorithm_str; + enum rspamd_shingle_alg alg; GHashTable *mappings; GList *mime_types; GList *fuzzy_headers; @@ -363,6 +365,7 @@ fuzzy_parse_rule (struct rspamd_config *cfg, const ucl_object_t *obj, gint cb_id rule = fuzzy_rule_new (fuzzy_module_ctx->default_symbol, fuzzy_module_ctx->fuzzy_pool); rule->learn_condition_cb = -1; + rule->alg = RSPAMD_SHINGLES_OLD; if ((value = ucl_object_lookup (obj, "mime_types")) != NULL) { it = NULL; @@ -410,6 +413,46 @@ fuzzy_parse_rule (struct rspamd_config *cfg, const ucl_object_t *obj, gint cb_id rule->skip_unknown = ucl_obj_toboolean (value); } + if ((value = ucl_object_lookup (obj, "algorithm")) != NULL) { + rule->algorithm_str = ucl_object_tostring (value); + + if (rule->algorithm_str) { + if (g_ascii_strcasecmp (rule->algorithm_str, "old") == 0 || + g_ascii_strcasecmp (rule->algorithm_str, "siphash") == 0) { + rule->alg = RSPAMD_SHINGLES_OLD; + } + else if (g_ascii_strcasecmp (rule->algorithm_str, "xxhash") == 0) { + rule->alg = RSPAMD_SHINGLES_XXHASH; + } + else if (g_ascii_strcasecmp (rule->algorithm_str, "mumhash") == 0) { + rule->alg = RSPAMD_SHINGLES_MUMHASH; + } + else if (g_ascii_strcasecmp (rule->algorithm_str, "fasthash") == 0 || + g_ascii_strcasecmp (rule->algorithm_str, "fast") == 0) { + rule->alg = RSPAMD_SHINGLES_FAST; + } + else { + msg_warn_config ("unknown algorithm: %s, use siphash by default"); + } + } + } + + /* Set a consistent and short string name */ + switch (rule->alg) { + case RSPAMD_SHINGLES_OLD: + rule->algorithm_str = "sip"; + break; + case RSPAMD_SHINGLES_XXHASH: + rule->algorithm_str = "xx"; + break; + case RSPAMD_SHINGLES_MUMHASH: + rule->algorithm_str = "mum"; + break; + case RSPAMD_SHINGLES_FAST: + rule->algorithm_str = "fast"; + break; + } + if ((value = ucl_object_lookup (obj, "servers")) != NULL) { rule->servers = rspamd_upstreams_create (cfg->ups_ctx); @@ -832,12 +875,12 @@ fuzzy_check_module_config (struct rspamd_config *cfg) str = ucl_obj_tostring (value); - if (!rspamd_map_is_map (str)) { + if (str && !rspamd_map_is_map (str)) { radix_add_generic_iplist (str, &fuzzy_module_ctx->whitelist); } else { - rspamd_map_add (cfg, str, + rspamd_map_add_from_ucl (cfg, value, "Fuzzy whitelist", rspamd_radix_read, rspamd_radix_fin, (void **)&fuzzy_module_ctx->whitelist); @@ -1023,6 +1066,37 @@ fuzzy_cmd_from_task_meta (struct fuzzy_rule *rule, return io; } +static void * +fuzzy_cmd_get_cached (struct fuzzy_rule *rule, + rspamd_mempool_t *pool, + struct mime_text_part *part) +{ + gchar key[32]; + gint key_part; + + memcpy (&key_part, rule->shingles_key->str, sizeof (key_part)); + rspamd_snprintf (key, sizeof (key), "%p%s%d", part, rule->algorithm_str, + key_part); + + return rspamd_mempool_get_variable (pool, key); +} + +static void +fuzzy_cmd_set_cached (struct fuzzy_rule *rule, + rspamd_mempool_t *pool, + struct mime_text_part *part, + struct rspamd_fuzzy_encrypted_shingle_cmd *data) +{ + gchar key[32]; + gint key_part; + + memcpy (&key_part, rule->shingles_key->str, sizeof (key_part)); + rspamd_snprintf (key, sizeof (key), "%p%s%d", part, rule->algorithm_str, + key_part); + /* Key is copied */ + rspamd_mempool_set_variable (pool, key, data, NULL); +} + /* * Create fuzzy command from a text part */ @@ -1035,7 +1109,7 @@ fuzzy_cmd_from_text_part (struct fuzzy_rule *rule, struct mime_text_part *part) { struct rspamd_fuzzy_shingle_cmd *shcmd; - struct rspamd_fuzzy_encrypted_shingle_cmd *encshcmd; + struct rspamd_fuzzy_encrypted_shingle_cmd *encshcmd, *cached; struct rspamd_shingle *sh; guint i; rspamd_cryptobox_hash_state_t st; @@ -1043,40 +1117,56 @@ fuzzy_cmd_from_text_part (struct fuzzy_rule *rule, GArray *words; struct fuzzy_cmd_io *io; - if (rule->peer_key) { - encshcmd = rspamd_mempool_alloc0 (pool, sizeof (*encshcmd)); + cached = fuzzy_cmd_get_cached (rule, pool, part); + + if (cached) { + /* Copy cached */ + encshcmd = rspamd_mempool_alloc (pool, sizeof (*encshcmd)); + memcpy (encshcmd, cached, sizeof (*encshcmd)); shcmd = &encshcmd->cmd; } else { - shcmd = rspamd_mempool_alloc0 (pool, sizeof (*shcmd)); - encshcmd = NULL; - } + encshcmd = rspamd_mempool_alloc0 (pool, sizeof (*encshcmd)); + shcmd = &encshcmd->cmd; - /* - * Generate hash from all words in the part - */ - rspamd_cryptobox_hash_init (&st, rule->hash_key->str, rule->hash_key->len); - words = fuzzy_preprocess_words (part, pool); + /* + * Generate hash from all words in the part + */ + rspamd_cryptobox_hash_init (&st, rule->hash_key->str, rule->hash_key->len); + words = fuzzy_preprocess_words (part, pool); - for (i = 0; i < words->len; i ++) { - word = &g_array_index (words, rspamd_ftok_t, i); - rspamd_cryptobox_hash_update (&st, word->begin, word->len); - } - rspamd_cryptobox_hash_final (&st, shcmd->basic.digest); + for (i = 0; i < words->len; i ++) { + word = &g_array_index (words, rspamd_ftok_t, i); + rspamd_cryptobox_hash_update (&st, word->begin, word->len); + } + rspamd_cryptobox_hash_final (&st, shcmd->basic.digest); + + msg_debug_pool ("loading shingles of type %s with key %*xs", + rule->algorithm_str, + 16, rule->shingles_key->str); + sh = rspamd_shingles_generate (words, + rule->shingles_key->str, pool, + rspamd_shingles_default_filter, NULL, + rule->alg); + if (sh != NULL) { + memcpy (&shcmd->sgl, sh, sizeof (shcmd->sgl)); + shcmd->basic.shingles_count = RSPAMD_SHINGLE_SIZE; + } - msg_debug_pool ("loading shingles with key %*xs", 16, - rule->shingles_key->str); - sh = rspamd_shingles_generate (words, - rule->shingles_key->str, pool, - rspamd_shingles_default_filter, NULL); - if (sh != NULL) { - memcpy (&shcmd->sgl, sh, sizeof (shcmd->sgl)); - shcmd->basic.shingles_count = RSPAMD_SHINGLE_SIZE; + /* + * We always save encrypted command as it can handle both + * encrypted and unencrypted requests. + * + * Since it is copied when obtained from the cache, it is safe to use + * it this way. + */ + fuzzy_cmd_set_cached (rule, pool, part, encshcmd); } shcmd->basic.tag = ottery_rand_uint32 (); shcmd->basic.cmd = c; shcmd->basic.version = RSPAMD_FUZZY_VERSION; + if (c != FUZZY_CHECK) { shcmd->basic.flag = flag; shcmd->basic.value = weight; diff --git a/src/plugins/spf.c b/src/plugins/spf.c index 153422f51..67c8732e7 100644 --- a/src/plugins/spf.c +++ b/src/plugins/spf.c @@ -213,12 +213,12 @@ spf_module_config (struct rspamd_config *cfg) str = ucl_obj_tostring (value); - if (!rspamd_map_is_map (str)) { + if (str && !rspamd_map_is_map (str)) { radix_add_generic_iplist (str, &spf_module_ctx->whitelist_ip); } else { - rspamd_map_add (cfg, str, + rspamd_map_add_from_ucl (cfg, value, "SPF whitelist", rspamd_radix_read, rspamd_radix_fin, (void **)&spf_module_ctx->whitelist_ip); diff --git a/src/plugins/surbl.c b/src/plugins/surbl.c index 4dda832b3..87b8effa7 100644 --- a/src/plugins/surbl.c +++ b/src/plugins/surbl.c @@ -80,7 +80,7 @@ module_t surbl_module = { }; static void -exception_insert (gpointer st, gconstpointer key, gpointer value) +exception_insert (gpointer st, gconstpointer key, gconstpointer value) { GHashTable **t = st; gint level = 0; @@ -103,27 +103,27 @@ exception_insert (gpointer st, gconstpointer key, gpointer value) val = g_malloc (sizeof (rspamd_ftok_t)); val->begin = key; val->len = strlen (key); + if (t[level] == NULL) { t[level] = g_hash_table_new_full (rspamd_ftok_icase_hash, rspamd_ftok_icase_equal, g_free, - NULL); + g_free); } - g_hash_table_insert (t[level], val, value); + + g_hash_table_insert (t[level], val, g_strdup (value)); } static gchar * -read_exceptions_list (rspamd_mempool_t * pool, - gchar * chunk, +read_exceptions_list (gchar * chunk, gint len, struct map_cb_data *data, gboolean final) { if (data->cur_data == NULL) { - data->cur_data = rspamd_mempool_alloc0 (pool, - sizeof (GHashTable *) * MAX_LEVELS); + data->cur_data = g_malloc (sizeof (GHashTable *) * MAX_LEVELS); } - return rspamd_parse_kv_list (pool, + return rspamd_parse_kv_list ( chunk, len, data, @@ -133,7 +133,7 @@ read_exceptions_list (rspamd_mempool_t * pool, } static void -fin_exceptions_list (rspamd_mempool_t * pool, struct map_cb_data *data) +fin_exceptions_list (struct map_cb_data *data) { GHashTable **t; gint i; @@ -145,11 +145,12 @@ fin_exceptions_list (rspamd_mempool_t * pool, struct map_cb_data *data) g_hash_table_destroy (t[i]); } } + g_free (t); } } static void -redirector_insert (gpointer st, gconstpointer key, gpointer value) +redirector_insert (gpointer st, gconstpointer key, gconstpointer value) { GHashTable *tld_hash = st; const gchar *p = key, *begin = key; @@ -200,8 +201,7 @@ redirector_item_free (gpointer p) } static gchar * -read_redirectors_list (rspamd_mempool_t * pool, - gchar * chunk, +read_redirectors_list (gchar * chunk, gint len, struct map_cb_data *data, gboolean final) @@ -217,17 +217,17 @@ read_redirectors_list (rspamd_mempool_t * pool, data->cur_data = tld_hash; } - return rspamd_parse_kv_list (pool, + return rspamd_parse_kv_list ( chunk, len, data, - (insert_func) redirector_insert, + redirector_insert, "", final); } void -fin_redirectors_list (rspamd_mempool_t * pool, struct map_cb_data *data) +fin_redirectors_list (struct map_cb_data *data) { GHashTable *tld_hash; @@ -528,7 +528,7 @@ surbl_module_config (struct rspamd_config *cfg) if ((value = rspamd_config_get_module_opt (cfg, "surbl", "redirector_hosts_map")) != NULL) { - if (!rspamd_map_add (cfg, ucl_obj_tostring (value), + if (!rspamd_map_add_from_ucl (cfg, value, "SURBL redirectors list", read_redirectors_list, fin_redirectors_list, (void **)&surbl_module_ctx->redirector_map_data)) { @@ -546,7 +546,7 @@ surbl_module_config (struct rspamd_config *cfg) } if ((value = rspamd_config_get_module_opt (cfg, "surbl", "exceptions")) != NULL) { - if (rspamd_map_add (cfg, ucl_obj_tostring (value), + if (rspamd_map_add_from_ucl (cfg, value, "SURBL exceptions list", read_exceptions_list, fin_exceptions_list, (void **)&surbl_module_ctx->exceptions)) { surbl_module_ctx->tld2_file = rspamd_mempool_strdup ( @@ -556,7 +556,7 @@ surbl_module_config (struct rspamd_config *cfg) } if ((value = rspamd_config_get_module_opt (cfg, "surbl", "whitelist")) != NULL) { - if (rspamd_map_add (cfg, ucl_obj_tostring (value), + if (rspamd_map_add_from_ucl (cfg, value, "SURBL whitelist", rspamd_hosts_read, rspamd_hosts_fin, (void **)&surbl_module_ctx->whitelist)) { surbl_module_ctx->whitelist_file = rspamd_mempool_strdup ( diff --git a/src/rspamd.c b/src/rspamd.c index e67977cbd..72e676267 100644 --- a/src/rspamd.c +++ b/src/rspamd.c @@ -21,7 +21,7 @@ #include "libserver/worker_util.h" #include "libserver/rspamd_control.h" #include "ottery.h" -#include "xxhash.h" +#include "cryptobox.h" #include "utlist.h" #include "unix-std.h" /* sysexits */ @@ -409,30 +409,30 @@ systemd_get_socket (struct rspamd_main *rspamd_main, gint number) static inline uintptr_t make_listen_key (struct rspamd_worker_bind_conf *cf) { - XXH64_state_t st; + rspamd_cryptobox_fast_hash_state_t st; guint i, keylen; guint8 *key; rspamd_inet_addr_t *addr; guint16 port; - XXH64_reset (&st, rspamd_hash_seed ()); + rspamd_cryptobox_fast_hash_init (&st, rspamd_hash_seed ()); if (cf->is_systemd) { - XXH64_update (&st, "systemd", sizeof ("systemd")); - XXH64_update (&st, &cf->cnt, sizeof (cf->cnt)); + rspamd_cryptobox_fast_hash_update (&st, "systemd", sizeof ("systemd")); + rspamd_cryptobox_fast_hash_update (&st, &cf->cnt, sizeof (cf->cnt)); } else { - XXH64_update (&st, cf->name, strlen (cf->name)); + rspamd_cryptobox_fast_hash_update (&st, cf->name, strlen (cf->name)); for (i = 0; i < cf->cnt; i ++) { addr = g_ptr_array_index (cf->addrs, i); key = rspamd_inet_address_get_radix_key ( addr, &keylen); - XXH64_update (&st, key, keylen); + rspamd_cryptobox_fast_hash_update (&st, key, keylen); port = rspamd_inet_address_get_port (addr); - XXH64_update (&st, &port, sizeof (port)); + rspamd_cryptobox_fast_hash_update (&st, &port, sizeof (port)); } } - return XXH64_digest (&st); + return rspamd_cryptobox_fast_hash_final (&st); } static void @@ -959,7 +959,7 @@ rspamd_control_handler (gint fd, short what, gpointer arg) static guint rspamd_spair_hash (gconstpointer p) { - return XXH64 (p, PAIR_ID_LEN, rspamd_hash_seed ()); + return rspamd_cryptobox_fast_hash (p, PAIR_ID_LEN, rspamd_hash_seed ()); } static gboolean diff --git a/src/rspamd_proxy.c b/src/rspamd_proxy.c index 06192dafc..32a2937e8 100644 --- a/src/rspamd_proxy.c +++ b/src/rspamd_proxy.c @@ -69,6 +69,7 @@ struct rspamd_http_upstream { struct rspamd_http_mirror { gchar *name; + gchar *settings_id; struct upstream_list *u; struct rspamd_cryptobox_pubkey *key; gdouble prob; @@ -362,6 +363,11 @@ rspamd_proxy_parse_mirror (rspamd_mempool_t *pool, lua_settop (L, 0); } + elt = ucl_object_lookup_any (obj, "settings", "settings_id", NULL); + if (elt && ucl_object_type (elt) == UCL_STRING) { + up->settings_id = g_strdup (ucl_object_tostring (elt)); + } + g_ptr_array_add (ctx->mirrors, up); return TRUE; @@ -902,6 +908,11 @@ proxy_open_mirror_connections (struct rspamd_proxy_session *session) rspamd_http_message_remove_header (msg, "Content-Length"); rspamd_http_message_remove_header (msg, "Key"); + if (m->settings_id != NULL) { + rspamd_http_message_remove_header (msg, "Settings-ID"); + rspamd_http_message_add_header (msg, "Settings-ID", m->settings_id); + } + bk_conn->backend_conn = rspamd_http_connection_new ( NULL, proxy_backend_mirror_error_handler, diff --git a/test/lua/unit/url.lua b/test/lua/unit/url.lua index 06082afe0..de274425d 100644 --- a/test/lua/unit/url.lua +++ b/test/lua/unit/url.lua @@ -17,8 +17,8 @@ context("URL check functions", function() test("Extract urls from text", function() local pool = mpool.create() local cases = { - {"test.com text", {"test.com", nil}}, - {" test.com text", {"test.com", nil}}, + {"test.com", {"test.com", nil}}, + {" test.com", {"test.com", nil}}, {"<test.com> text", {"test.com", nil}}, {"test.com. text", {"test.com", nil}}, {"mailto:A.User@example.com text", {"example.com", "A.User"}}, diff --git a/test/rspamd_shingles_test.c b/test/rspamd_shingles_test.c index 3eed5eee5..971502c93 100644 --- a/test/rspamd_shingles_test.c +++ b/test/rspamd_shingles_test.c @@ -41,6 +41,7 @@ generate_fuzzy_words (gsize cnt, gsize max_len) for (i = 0; i < cnt; i ++) { wlen = ottery_rand_range (max_len) + 1; + /* wlen = max_len; */ w.len = wlen; t = g_malloc (wlen); @@ -81,7 +82,8 @@ free_fuzzy_words (GArray *ar) } static void -test_case (gsize cnt, gsize max_len, gdouble perm_factor) +test_case (gsize cnt, gsize max_len, gdouble perm_factor, + enum rspamd_shingle_alg alg) { GArray *input; struct rspamd_shingle *sgl, *sgl_permuted; @@ -91,32 +93,129 @@ test_case (gsize cnt, gsize max_len, gdouble perm_factor) ottery_rand_bytes (key, sizeof (key)); input = generate_fuzzy_words (cnt, max_len); - ts1 = rspamd_get_ticks (); + ts1 = rspamd_get_virtual_ticks (); sgl = rspamd_shingles_generate (input, key, NULL, - rspamd_shingles_default_filter, NULL); - ts2 = rspamd_get_ticks (); + rspamd_shingles_default_filter, NULL, alg); + ts2 = rspamd_get_virtual_ticks (); permute_vector (input, perm_factor); sgl_permuted = rspamd_shingles_generate (input, key, NULL, - rspamd_shingles_default_filter, NULL); + rspamd_shingles_default_filter, NULL, alg); res = rspamd_shingles_compare (sgl, sgl_permuted); - msg_debug ("percentage of common shingles: %.3f, generate time: %hd usec", - res, (gint)(ts1 - ts2) * 1000); - g_assert_cmpfloat (fabs ((1.0 - res) - sqrt (perm_factor)), <=, 0.20); + msg_info ("%d (%z words of %z max len, %.2f perm factor):" + " percentage of common shingles: %.3f, generate time: %.4f sec", + alg, cnt, max_len, perm_factor, res, ts2 - ts1); + //g_assert_cmpfloat (fabs ((1.0 - res) - sqrt (perm_factor)), <=, 0.25); free_fuzzy_words (input); g_free (sgl); g_free (sgl_permuted); } +static const guint64 expected_old[RSPAMD_SHINGLE_SIZE] = { + 0x2a97e024235cedc5, 0x46238acbcc55e9e0, 0x2378ff151af075b3, 0xde1f29a95cad109, + 0x5d3bbbdb5db5d19f, 0x4d75a0ec52af10a6, 0x215ecd6372e755b5, 0x7b52295758295350, + 0x17387d1beddc7f62, 0x26264ca879ffcada, 0x49d4a65ec0ab9914, 0xa2763e6995350cf, + 0x3f4570231449c13f, 0x3309f857a0e54ee5, 0x24e4c5b561b0fce3, 0x1f153e3b275bfd1b, + 0x4d067dbc97c3fd78, 0x9ffa2d076fa4f8bc, 0x3d8907f84b9ffc6c, 0x1cfd664c5262d256, + 0xcdd7e744b699c15, 0x5544a2bbe05124f7, 0x5a4029b5d6a06f7, 0xd5adfbdc756c0e4, + 0xa504b23d9689a67e, 0x15d945f7007de115, 0xbf676c0522a2c51d, 0x1c8d8163ad4b0f93, + 0xa2c4ba20799344d7, 0x27c6f13c02134388, 0xa1d443d31fd5a3, 0x99fbca9f8563080, +}; + +static const guint64 expected_xxhash[RSPAMD_SHINGLE_SIZE] = { + 0x33b134be11a705a, 0x36e2ea657aa36903, 0x6547b57f7470ce9d, 0x8253eb6d2f8f158e, + 0x1cc99e3cf22388f, 0x2396da27ea36ffe8, 0x1b457d208ad3d96c, 0x2d6ac733d7a2c107, + 0x17849cbed75cc4d1, 0x4dd94e772330e804, 0x39f592fa32014ed4, 0xa2f6229ad356461, + 0x6dc825879a057b37, 0x886b12cef4338b05, 0x8b23af68c186518a, 0x16932b40339aaf02, + 0x412090c6bb0b719c, 0x4d4a88cbdf1935f3, 0x233bcbddb5f67a7, 0x474719442a33dcca, + 0x2da7ec30563e622, 0x7ab90086960e1ad2, 0x3ea2b45582539f75, 0x108cd9287d95a6c5, + 0x69ba7c67c115597, 0x10880860eb75e982, 0x16f3d90e6ab995a6, 0x5f24ea09379b9f5c, + 0x3c2dc04088e8fe54, 0x340b8cf1c6f1227, 0x193bc348ed2e9ce7, 0x68454ef43da9c748, +}; + +static const guint64 expected_mumhash[RSPAMD_SHINGLE_SIZE] = { + 0x38d35473b80a7fc3, 0x1300531adc2d16a1, 0x26883bc89f78f4bd, 0x57de365ef6d1a62, + 0x773603185fcbb20a, 0x39c6cbd7ebbeaa88, 0x676c7445ad167e70, 0x432315d1ecc4c0b1, + 0x1380b95756dbb078, 0x9ee12832fa53b90e, 0x72970be210f0dd0b, 0x62909bd520f5956, + 0x66196965a45eb32a, 0x2466a9ca5436620e, 0x157b828b10e10f6e, 0x429bb673a523a7e5, + 0x51a6ace94f320f88, 0x23f53a30bd7d7147, 0xbee557664d3bc34c, 0x65730c88cd212a9, + 0x87e72c0cd05fd0e, 0x417a744669baeb3d, 0x78e26f7917829324, 0x439777dcfc25fdf4, + 0x582eac6ff013f00b, 0x1e40aa90e367f4af, 0x301d14a28d6c23a2, 0x34140ecb21b6c69, + 0x390a091c8b4c31b9, 0x2e35fecf9fff0ae7, 0x94322e1a5cf31f1b, 0x33cb9190905e049a, +}; + +static const guint64 expected_fasthash[RSPAMD_SHINGLE_SIZE] = { + 0x3843a716f94828a6, 0x13fd5386dda3b28d, 0x71cb09de527c40a, 0x5d6f59ffd839c62, + 0x7ce3633acd568476, 0x9014298cbd00167, 0x6708ec29eedb5350, 0x2882931ff2c5c410, + 0x1839d8b947b12571, 0x58f7bc3829173302, 0x4dac8103da51abc4, 0x6c5cbcc6fb1de28, + 0x31fefcef9bafb755, 0x6f2d1a0b1feca401, 0x3e71f3718e520b06, 0x42f6ba11164ab231, + 0x21164d010bd76f4a, 0x4c597ccc7b60f620, 0x2cf1ca3383b77574, 0x54ff9c01660b8add, + 0x2ca344758f40380d, 0x1b962321bd37d0f2, 0x9323bb99c32bc418, 0x375659d0eef2b8f2, + 0x1dbd23a1030084b7, 0x83cb978dee06aa0a, 0x42c97be5b27a7763, 0x3b6d6b7270ed765, + 0x125c12fdba584aed, 0x1c826397afe58763, 0x8bdbe2d43f3eda96, 0x954cda70edf6591f, +}; + void rspamd_shingles_test_func (void) { - //test_case (5, 100, 0.5); - test_case (200, 10, 0.1); - test_case (500, 20, 0.01); - test_case (5000, 20, 0.01); - test_case (5000, 15, 0); - test_case (5000, 30, 1.0); + enum rspamd_shingle_alg alg = RSPAMD_SHINGLES_OLD; + struct rspamd_shingle *sgl; + guchar key[16]; + GArray *input; + rspamd_ftok_t tok; + int i; + + memset (key, 0, sizeof (key)); + input = g_array_sized_new (FALSE, FALSE, sizeof (rspamd_ftok_t), 5); + + for (i = 0; i < 5; i ++) { + gchar *b = g_alloca (8); + memset (b, 0, 8); + memcpy (b + 1, "test", 4); + b[0] = 'a' + i; + tok.begin = b; + tok.len = 5 + ((i + 1) % 4); + g_array_append_val (input, tok); + } + + sgl = rspamd_shingles_generate (input, key, NULL, + rspamd_shingles_default_filter, NULL, RSPAMD_SHINGLES_OLD); + for (i = 0; i < RSPAMD_SHINGLE_SIZE; i ++) { + g_assert (sgl->hashes[i] == expected_old[i]); + } + g_free (sgl); + + sgl = rspamd_shingles_generate (input, key, NULL, + rspamd_shingles_default_filter, NULL, RSPAMD_SHINGLES_XXHASH); + for (i = 0; i < RSPAMD_SHINGLE_SIZE; i ++) { + g_assert (sgl->hashes[i] == expected_xxhash[i]); + } + g_free (sgl); + + sgl = rspamd_shingles_generate (input, key, NULL, + rspamd_shingles_default_filter, NULL, RSPAMD_SHINGLES_MUMHASH); + for (i = 0; i < RSPAMD_SHINGLE_SIZE; i ++) { + g_assert (sgl->hashes[i] == expected_mumhash[i]); + } + g_free (sgl); + + sgl = rspamd_shingles_generate (input, key, NULL, + rspamd_shingles_default_filter, NULL, RSPAMD_SHINGLES_FAST); + for (i = 0; i < RSPAMD_SHINGLE_SIZE; i ++) { + g_assert (sgl->hashes[i] == expected_fasthash[i]); + } + g_free (sgl); + + for (alg = RSPAMD_SHINGLES_OLD; alg <= RSPAMD_SHINGLES_FAST; alg ++) { + test_case (200, 10, 0.1, alg); + test_case (500, 20, 0.01, alg); + test_case (5000, 20, 0.01, alg); + test_case (5000, 15, 0, alg); + test_case (5000, 30, 1.0, alg); + test_case (50000, 30, 0.02, alg); + test_case (50000, 5, 0.02, alg); + test_case (50000, 16, 0.02, alg); + } } diff --git a/utils/rspamd_stats.pl b/utils/rspamd_stats.pl new file mode 100644 index 000000000..efb89bab3 --- /dev/null +++ b/utils/rspamd_stats.pl @@ -0,0 +1,173 @@ +#!/usr/bin/env perl + +use Data::Dumper; +use Getopt::Long; +use warnings; +use strict; +use Time::Piece; + +my @symbols_search; +my $start = ""; +my $end = ""; +my $reject_score = 30.0; +my $junk_score = 7.5; +my $log_file = "/var/log/rspamd/rspamd.log"; +my $dateformat = "%Y-%m-%d %H:%M:%S"; + +GetOptions( + "reject-score=f" => \$reject_score, + "junk-score=f" => \$junk_score, + "start=s" => \$start, + "end=s" => \$end, + "symbol=s@" => \@symbols_search, + "log=s" => \$log_file, + "dateformat=s" => \$dateformat); + +# Global vars +my $total = 0; +my $total_spam = 0; +my $total_junk = 0; +my $junk_symbols = 0; +my $spam_symbols = 0; +my $ham_symbols = 0; +my $ham_spam_change = 0; +my $ham_junk_change = 0; +my $diff_alpha = 0.1; +my %sym_res; + +my $st = 0; +my $ed = 0; + +if ($start ne "") { + $st = Time::Piece->strptime($start, $dateformat); +} + +if ($end ne "") { + $ed = Time::Piece->strptime($end, $dateformat); +} + +my $rspamd_log; + +if ($log_file eq '-') { + $rspamd_log = \*STDIN; +} +else { + open($rspamd_log, '<', $log_file) or die "cannot open $log_file"; +} + +foreach my $s (@symbols_search) { + $sym_res{$s} = { + hits => 0, + spam_hits => 0, + junk_hits => 0, + spam_change => 0, + junk_change => 0, + weight => 0, + }; +} + +while(<$rspamd_log>) { + if (/^.*rspamd_task_write_log.*$/) { + my @elts = split /\s+/; + my $ts = $elts[0] . ' ' . $elts[1]; + + if ($st or $ed) { + my $dt = Time::Piece->strptime($ts, $dateformat) or die "cannot parse $ts"; + + if ($dt) { + if ($st != 0 && $dt < $st) { + next; + } + if ($ed != 0 && $dt > $ed) { + next; + } + } + } + + if ($_ !~ /\[(-?\d+(?:\.\d+)?)\/(-?\d+(?:\.\d+)?)\]\s+\[([^\]]+)\]/) { + #print "BAD\n"; + next; + } + + $total ++; + my $score = $1 * 1.0; + + if ($score >= $reject_score) { + $total_spam ++; + } + elsif ($score >= $junk_score) { + $total_junk ++; + } + + # Symbols + my @symbols = split /,/, $3; + + foreach my $s (@symbols_search) { + my @selected = grep /$s/, @symbols; + + if (scalar(@selected) > 0) { + $selected[0] =~ /^[^\(]+\(([^\)]+)\).*$/; + my $sym_score = $1; + + if ($sym_score < $diff_alpha) { + next; + } + + my $r = $sym_res{$s}; + $r->{hits} ++; + $r->{weight} += $sym_score; + my $is_spam = 0; + my $is_junk = 0; + + if ($score >= $reject_score) { + $is_spam = 1; + $r->{spam_hits} ++; + } + elsif ($score >= $junk_score) { + $is_junk = 1; + $r->{junk_hits} ++; + } + + my $score_without = $score - $sym_score; + + if ($is_spam && $score_without < $reject_score) { + $r->{spam_change} ++; + } + if ($is_junk && $score_without < $junk_score) { + $r->{junk_change} ++; + } + } + } + } +} + +my $total_ham = $total - ($total_spam + $total_junk); + +if ($total > 0) { + while (my ($s, $r) = each(%sym_res)) { + if ($r->{hits} > 0) { + my $th = $r->{hits}; + my $sh = $r->{spam_hits}; + my $jh = $r->{junk_hits}; + my $hh = $r->{hits} - $sh - $jh; + my $htp = $hh * 100.0 / $total_ham if $total_ham != 0; + my $stp = $sh * 100.0 / $total_spam if $total_spam != 0; + my $jtp = $jh * 100.0 / $total_junk if $total_junk != 0; + printf "Symbol: %s (weight %.3f) (%d hits, %.3f%%)\nHam hits: %d (%.3f%%), total ham: %d (ham with $s: %.3f%%)\nSpam hits: %d (%.3f%%), total spam: %d (spam with $s: %.3f%%)\nJunk hits: %d (%.3f%%), total junk: %d (junk with $s: %.3f%%)\n", + $s, $r->{weight} / $r->{hits}, $th, ($th / $total * 100.0), + $hh, ($hh / $th * 100.0), $total_ham, ($htp or 0), + $sh, ($sh / $th * 100.0), $total_spam, ($stp or 0), + $jh, ($jh / $th * 100.0), $total_junk, ($jtp or 0); + my $schp = $r->{spam_change} / $total_spam * 100.0 if $total_spam; + my $jchp = $r->{junk_change} / $total_junk * 100.0 if $total_junk; + printf "Spam changes (ham/junk -> spam): %d (%.3f%%), total percentage (changes / spam hits): %.3f%%\nJunk changes (ham -> junk): %d (%.3f%%), total percentage (changes / junk hits): %.3f%%\n", + $r->{spam_change}, ($r->{spam_change} / $th * 100.0), ($schp or 0), + $r->{junk_change}, ($r->{junk_change} / $th * 100.0), ($jchp or 0); + } + else { + print "Symbol $s has not been met\n"; + } + + print '*' x 20 . "\n"; + } +} |