diff options
author | Larry Hynes <larry@larryhynes.com> | 2016-05-13 19:45:03 +0100 |
---|---|---|
committer | Larry Hynes <larry@larryhynes.com> | 2016-05-13 19:45:03 +0100 |
commit | 96b4bd69ef1e5e30cb0d1ffad2617b2c7bfd629d (patch) | |
tree | 2179566b59a65ae27500c2e4e9dcae7149d60486 /src | |
parent | 5559a4a7b7ff358545a434434a94c3b9e82e0976 (diff) | |
parent | 25b777b360662a391454f64346589dbec792831f (diff) | |
download | rspamd-96b4bd69ef1e5e30cb0d1ffad2617b2c7bfd629d.tar.gz rspamd-96b4bd69ef1e5e30cb0d1ffad2617b2c7bfd629d.zip |
Merge remote-tracking branch 'upstream/master'
* upstream/master: (36 commits)
[Feature] Add versions to fuzzy storage
[Fix] Another fix for maps scheduling
[Fix] Fix events handling when scheduling map wacth
[Fix] Try to fix false positive URL detections in text parts
[Fix] Include fuzzy key to distinguish storages with different keys
[Feature] Rework and improve fuzzy storage
[Test] Add static test for shingles
[Minor] Adjust hashes
[Feature] Use metrohash as well
[Minor] 8 bytes fits perfect for mumhash
[Fix] Fix handling of the same words
[Feature] Further micro-optimizations for hashing and shingles
[Feature] Optimize alignment to speed up hashing
[Feature] Try to select the optimal possible function for input
[Feature] Limit logging of elements that could have too many items
[Feature] Add more algorithms for shingles generation
[Fix] Fix compilation issue
[Feature] Use mumhash for words hashing
[Feature] Add and use mumhash for non-crypto hashing
[Feature] Add a simple script to evaluate rspamd rules in the logs
...
Diffstat (limited to 'src')
42 files changed, 1605 insertions, 779 deletions
diff --git a/src/controller.c b/src/controller.c index 06782e850..8b16eda19 100644 --- a/src/controller.c +++ b/src/controller.c @@ -145,7 +145,7 @@ struct rspamd_controller_worker_ctx { /* SSL private key */ gchar *ssl_key; /* A map of secure IP */ - GList *secure_ip; + const ucl_object_t *secure_ip; radix_compressed_t *secure_map; /* Static files dir */ @@ -775,10 +775,10 @@ rspamd_controller_handle_maps (struct rspamd_http_connection_entry *conn_ent, struct rspamd_controller_session *session = conn_ent->ud; GList *cur, *tmp = NULL; struct rspamd_map *map; + struct rspamd_map_backend *bk; gboolean editable; ucl_object_t *obj, *top; - if (!rspamd_controller_check_password (conn_ent, session, msg, FALSE)) { return 0; } @@ -788,8 +788,10 @@ rspamd_controller_handle_maps (struct rspamd_http_connection_entry *conn_ent, cur = session->ctx->cfg->maps; while (cur) { map = cur->data; - if (map->protocol == MAP_PROTO_FILE) { - if (access (map->uri, R_OK) == 0) { + bk = g_ptr_array_index (map->backends, 0); + + if (bk->protocol == MAP_PROTO_FILE) { + if (access (bk->uri, R_OK) == 0) { tmp = g_list_prepend (tmp, map); } } @@ -799,7 +801,8 @@ rspamd_controller_handle_maps (struct rspamd_http_connection_entry *conn_ent, cur = tmp; while (cur) { map = cur->data; - editable = (access (map->uri, W_OK) == 0); + bk = g_ptr_array_index (map->backends, 0); + editable = (access (bk->uri, W_OK) == 0); obj = ucl_object_typed_new (UCL_OBJECT); ucl_object_insert_key (obj, ucl_object_fromint (map->id), @@ -808,7 +811,7 @@ rspamd_controller_handle_maps (struct rspamd_http_connection_entry *conn_ent, ucl_object_insert_key (obj, ucl_object_fromstring (map->description), "description", 0, false); } - ucl_object_insert_key (obj, ucl_object_fromstring (map->uri), + ucl_object_insert_key (obj, ucl_object_fromstring (bk->uri), "uri", 0, false); ucl_object_insert_key (obj, ucl_object_frombool (editable), "editable", 0, false); @@ -840,6 +843,7 @@ rspamd_controller_handle_get_map (struct rspamd_http_connection_entry *conn_ent, struct rspamd_controller_session *session = conn_ent->ud; GList *cur; struct rspamd_map *map; + struct rspamd_map_backend *bk; const rspamd_ftok_t *idstr; struct stat st; gint fd; @@ -870,7 +874,8 @@ rspamd_controller_handle_get_map (struct rspamd_http_connection_entry *conn_ent, cur = session->ctx->cfg->maps; while (cur) { map = cur->data; - if (map->id == id && map->protocol == MAP_PROTO_FILE) { + bk = g_ptr_array_index (map->backends, 0); + if (map->id == id && bk->protocol == MAP_PROTO_FILE) { found = TRUE; break; } @@ -883,8 +888,10 @@ rspamd_controller_handle_get_map (struct rspamd_http_connection_entry *conn_ent, return 0; } - if (stat (map->uri, &st) == -1 || (fd = open (map->uri, O_RDONLY)) == -1) { - msg_err_session ("cannot open map %s: %s", map->uri, strerror (errno)); + bk = g_ptr_array_index (map->backends, 0); + + if (stat (bk->uri, &st) == -1 || (fd = open (bk->uri, O_RDONLY)) == -1) { + msg_err_session ("cannot open map %s: %s", bk->uri, strerror (errno)); rspamd_controller_send_error (conn_ent, 500, "500 map open error"); return 0; } @@ -898,7 +905,7 @@ rspamd_controller_handle_get_map (struct rspamd_http_connection_entry *conn_ent, if (read (fd, reply->body->str, st.st_size) == -1) { close (fd); rspamd_http_message_free (reply); - msg_err_session ("cannot read map %s: %s", map->uri, strerror (errno)); + msg_err_session ("cannot read map %s: %s", bk->uri, strerror (errno)); rspamd_controller_send_error (conn_ent, 500, "500 map read error"); return 0; } @@ -1800,6 +1807,7 @@ rspamd_controller_handle_savemap (struct rspamd_http_connection_entry *conn_ent, struct rspamd_controller_session *session = conn_ent->ud; GList *cur; struct rspamd_map *map; + struct rspamd_map_backend *bk; struct rspamd_controller_worker_ctx *ctx; const rspamd_ftok_t *idstr; gulong id; @@ -1838,7 +1846,8 @@ rspamd_controller_handle_savemap (struct rspamd_http_connection_entry *conn_ent, cur = ctx->cfg->maps; while (cur) { map = cur->data; - if (map->id == id && map->protocol == MAP_PROTO_FILE) { + bk = g_ptr_array_index (map->backends, 0); + if (map->id == id && bk->protocol == MAP_PROTO_FILE) { found = TRUE; break; } @@ -1851,24 +1860,24 @@ rspamd_controller_handle_savemap (struct rspamd_http_connection_entry *conn_ent, return 0; } - if (g_atomic_int_get (map->locked)) { - msg_info_session ("map locked: %s", map->uri); + bk = g_ptr_array_index (map->backends, 0); + if (g_atomic_int_compare_and_exchange (map->locked, 0, 1)) { + msg_info_session ("map locked: %s", bk->uri); rspamd_controller_send_error (conn_ent, 404, "Map is locked"); return 0; } /* Set lock */ - g_atomic_int_set (map->locked, 1); - fd = open (map->uri, O_WRONLY | O_TRUNC); + fd = open (bk->uri, O_WRONLY | O_TRUNC); if (fd == -1) { g_atomic_int_set (map->locked, 0); - msg_info_session ("map %s open error: %s", map->uri, strerror (errno)); + msg_info_session ("map %s open error: %s", bk->uri, strerror (errno)); rspamd_controller_send_error (conn_ent, 404, "Map id not found"); return 0; } if (write (fd, msg->body_buf.begin, msg->body_buf.len) == -1) { - msg_info_session ("map %s write error: %s", map->uri, strerror (errno)); + msg_info_session ("map %s write error: %s", bk->uri, strerror (errno)); close (fd); g_atomic_int_set (map->locked, 0); rspamd_controller_send_error (conn_ent, 500, "Map write error"); @@ -1877,7 +1886,7 @@ rspamd_controller_handle_savemap (struct rspamd_http_connection_entry *conn_ent, msg_info_session ("<%s>, map %s saved", rspamd_inet_address_to_string (session->from_addr), - map->uri); + bk->uri); /* Close and unlock */ close (fd); g_atomic_int_set (map->locked, 0); @@ -2481,7 +2490,7 @@ init_controller_worker (struct rspamd_config *cfg) rspamd_rcl_register_worker_option (cfg, type, "secure_ip", - rspamd_rcl_parse_struct_string_list, + rspamd_rcl_parse_struct_ucl, ctx, G_STRUCT_OFFSET (struct rspamd_controller_worker_ctx, secure_ip), 0, @@ -2490,7 +2499,7 @@ init_controller_worker (struct rspamd_config *cfg) rspamd_rcl_register_worker_option (cfg, type, "trusted_ips", - rspamd_rcl_parse_struct_string_list, + rspamd_rcl_parse_struct_ucl, ctx, G_STRUCT_OFFSET (struct rspamd_controller_worker_ctx, secure_ip), 0, @@ -2536,12 +2545,12 @@ void start_controller_worker (struct rspamd_worker *worker) { struct rspamd_controller_worker_ctx *ctx = worker->ctx; - GList *cur; struct module_ctx *mctx; GHashTableIter iter; gpointer key, value; struct rspamd_keypair_cache *cache; - gchar *secure_ip; + const ucl_object_t *cur; + ucl_object_iter_t it = NULL; gpointer m; ctx->ev_base = rspamd_prepare_worker (worker, @@ -2556,26 +2565,31 @@ start_controller_worker (struct rspamd_worker *worker) ctx->custom_commands = g_hash_table_new (rspamd_strcase_hash, rspamd_strcase_equal); if (ctx->secure_ip != NULL) { - cur = ctx->secure_ip; - - while (cur) { - secure_ip = cur->data; - /* Try map syntax */ - if (!rspamd_map_is_map (secure_ip)) { - if (!radix_add_generic_iplist (secure_ip, - &ctx->secure_map)) { - msg_warn_ctx ("cannot load or parse ip list from '%s'", - secure_ip); + if (ucl_object_type (ctx->secure_ip) == UCL_ARRAY) { + while ((cur = ucl_object_iterate (ctx->secure_ip, &it, true)) != NULL) { + /* Try map syntax */ + if (ucl_object_type (cur) == UCL_STRING && + !rspamd_map_is_map (ucl_object_tostring (cur))) { + if (!radix_add_generic_iplist (ucl_object_tostring (cur), + &ctx->secure_map)) { + msg_warn_ctx ("cannot load or parse ip list from '%s'", + ucl_object_tostring (cur)); + } + } + else { + rspamd_map_add_from_ucl (worker->srv->cfg, cur, + "Allow webui access from the specified IP", + rspamd_radix_read, rspamd_radix_fin, + (void **)&ctx->secure_map); } } - else { - rspamd_map_add (worker->srv->cfg, secure_ip, + } + else { + rspamd_map_add_from_ucl (worker->srv->cfg, ctx->secure_ip, "Allow webui access from the specified IP", rspamd_radix_read, rspamd_radix_fin, (void **)&ctx->secure_map); - } - cur = g_list_next (cur); } } diff --git a/src/fuzzy_storage.c b/src/fuzzy_storage.c index f43cb3f44..7803307b0 100644 --- a/src/fuzzy_storage.c +++ b/src/fuzzy_storage.c @@ -209,6 +209,7 @@ rspamd_fuzzy_process_updates_queue (struct rspamd_fuzzy_storage_ctx *ctx) struct rspamd_fuzzy_cmd *cmd; gpointer ptr; guint nupdates = 0; + time_t now = time (NULL); if (ctx->updates_pending && g_queue_get_length (ctx->updates_pending) > 0 && @@ -227,7 +228,7 @@ rspamd_fuzzy_process_updates_queue (struct rspamd_fuzzy_storage_ctx *ctx) } if (cmd->cmd == FUZZY_WRITE) { - rspamd_fuzzy_backend_add (ctx->backend, ptr); + rspamd_fuzzy_backend_add (ctx->backend, ptr, now); } else { rspamd_fuzzy_backend_del (ctx->backend, ptr); @@ -248,7 +249,8 @@ rspamd_fuzzy_process_updates_queue (struct rspamd_fuzzy_storage_ctx *ctx) } g_queue_clear (ctx->updates_pending); - msg_info ("updated fuzzy storage: %ud updates processed", nupdates); + msg_info ("updated fuzzy storage: %ud updates processed, version: %d", + nupdates, rspamd_fuzzy_backend_version (ctx->backend)); } else { msg_err ("cannot commit update transaction to fuzzy backend, " diff --git a/src/libcryptobox/cryptobox.c b/src/libcryptobox/cryptobox.c index 1680f1848..71180da12 100644 --- a/src/libcryptobox/cryptobox.c +++ b/src/libcryptobox/cryptobox.c @@ -31,7 +31,10 @@ #include "catena/catena.h" #include "ottery.h" #include "printf.h" - +#include "xxhash.h" +#define MUM_TARGET_INDEPENDENT_HASH 1 /* For 32/64 bit equal hashes */ +#include "../../contrib/mumhash/mum.h" +#include "../../contrib/metrohash/metro.h" #ifdef HAVE_CPUID_H #include <cpuid.h> #endif @@ -1408,3 +1411,101 @@ void rspamd_cryptobox_hash (guchar *out, rspamd_cryptobox_hash_update (&st, data, len); rspamd_cryptobox_hash_final (&st, out); } + + +void +rspamd_cryptobox_fast_hash_init (rspamd_cryptobox_fast_hash_state_t *st, + guint64 seed) +{ +#if defined(__LP64__) || defined(_LP64) + XXH64_state_t *rst = (XXH64_state_t *)st; + XXH64_reset (rst, seed); +#else + XXH32_state_t *rst = (XXH32_state_t *)st; + XXH32_reset (rst, seed); +#endif +} + +void +rspamd_cryptobox_fast_hash_update (rspamd_cryptobox_fast_hash_state_t *st, + const void *data, gsize len) +{ +#if defined(__LP64__) || defined(_LP64) + XXH64_state_t *rst = (XXH64_state_t *)st; + XXH64_update (rst, data, len); +#else + XXH32_state_t *rst = (XXH32_state_t *)st; + XXH32_update (rst, data, len); +#endif +} + +guint64 +rspamd_cryptobox_fast_hash_final (rspamd_cryptobox_fast_hash_state_t *st) +{ +#if defined(__LP64__) || defined(_LP64) + XXH64_state_t *rst = (XXH64_state_t *)st; + return XXH64_digest (rst); +#else + XXH32_state_t *rst = (XXH32_state_t *)st; + XXH32_digest (rst); +#endif + +} + +/** + * One in all function + */ +static inline guint64 +rspamd_cryptobox_fast_hash_machdep (const void *data, + gsize len, guint64 seed) +{ + if (len >= 8 && len % 8 == 0) { + return mum_hash (data, len, seed); + } + else { +#if defined(__LP64__) || defined(_LP64) + return metrohash64_1 (data, len, seed); +#endif + } + + return XXH32 (data, len, seed); +} + +static inline guint64 +rspamd_cryptobox_fast_hash_indep (const void *data, + gsize len, guint64 seed) +{ + if (len >= 8 && len % 8 == 0) { + return mum_hash (data, len, seed); + } + + return metrohash64_1 (data, len, seed); +} + +guint64 +rspamd_cryptobox_fast_hash (const void *data, + gsize len, guint64 seed) +{ + return rspamd_cryptobox_fast_hash_machdep (data, len, seed); +} + +guint64 +rspamd_cryptobox_fast_hash_specific ( + enum rspamd_cryptobox_fast_hash_type type, + const void *data, + gsize len, guint64 seed) +{ + switch (type) { + case RSPAMD_CRYPTOBOX_XXHASH32: + return XXH32 (data, len, seed); + case RSPAMD_CRYPTOBOX_XXHASH64: + return XXH64 (data, len, seed); + case RSPAMD_CRYPTOBOX_MUMHASH: + return mum_hash (data, len, seed); + case RSPAMD_CRYPTOBOX_HASHFAST_INDEPENDENT: + return rspamd_cryptobox_fast_hash_indep (data, len, seed); + case RSPAMD_CRYPTOBOX_HASHFAST: + default: + return rspamd_cryptobox_fast_hash_machdep (data, len, seed); + } +} diff --git a/src/libcryptobox/cryptobox.h b/src/libcryptobox/cryptobox.h index 9631bd8d0..6facf0a0e 100644 --- a/src/libcryptobox/cryptobox.h +++ b/src/libcryptobox/cryptobox.h @@ -324,4 +324,50 @@ void rspamd_cryptobox_hash (guchar *out, const guchar *key, gsize keylen); +/* Non crypto hash IUF interface */ +typedef struct RSPAMD_ALIGNED(32) rspamd_cryptobox_fast_hash_state_s { + unsigned char opaque[88]; +} rspamd_cryptobox_fast_hash_state_t; + +/** + * Init cryptobox hash state using key if needed, `st` must point to the buffer + * with at least rspamd_cryptobox_HASHSTATEBYTES bytes length. If keylen == 0, then + * non-keyed hash is generated + */ +void rspamd_cryptobox_fast_hash_init (rspamd_cryptobox_fast_hash_state_t *st, + guint64 seed); + +/** + * Update hash with data portion + */ +void rspamd_cryptobox_fast_hash_update (rspamd_cryptobox_fast_hash_state_t *st, + const void *data, gsize len); + +/** + * Output hash to the buffer of rspamd_cryptobox_HASHBYTES length + */ +guint64 rspamd_cryptobox_fast_hash_final (rspamd_cryptobox_fast_hash_state_t *st); + +/** + * One in all function + */ +guint64 rspamd_cryptobox_fast_hash (const void *data, + gsize len, guint64 seed); + +enum rspamd_cryptobox_fast_hash_type { + RSPAMD_CRYPTOBOX_XXHASH64 = 0, + RSPAMD_CRYPTOBOX_XXHASH32, + RSPAMD_CRYPTOBOX_MUMHASH, + RSPAMD_CRYPTOBOX_METROHASH, + RSPAMD_CRYPTOBOX_HASHFAST, + RSPAMD_CRYPTOBOX_HASHFAST_INDEPENDENT +}; +/** + * Platform independent version + */ +guint64 rspamd_cryptobox_fast_hash_specific ( + enum rspamd_cryptobox_fast_hash_type type, + const void *data, + gsize len, guint64 seed); + #endif /* CRYPTOBOX_H_ */ diff --git a/src/libcryptobox/keypairs_cache.c b/src/libcryptobox/keypairs_cache.c index 069158789..887aaa85d 100644 --- a/src/libcryptobox/keypairs_cache.c +++ b/src/libcryptobox/keypairs_cache.c @@ -18,7 +18,6 @@ #include "keypairs_cache.h" #include "keypair_private.h" #include "hash.h" -#include "xxhash.h" struct rspamd_keypair_elt { struct rspamd_cryptobox_nm *nm; @@ -43,7 +42,8 @@ rspamd_keypair_hash (gconstpointer ptr) { struct rspamd_keypair_elt *elt = (struct rspamd_keypair_elt *)ptr; - return XXH64 (elt->pair, sizeof (elt->pair), rspamd_hash_seed ()); + return rspamd_cryptobox_fast_hash (elt->pair, sizeof (elt->pair), + rspamd_hash_seed ()); } static gboolean diff --git a/src/libmime/filter.c b/src/libmime/filter.c index e1a33f3e2..6aaa19aaf 100644 --- a/src/libmime/filter.c +++ b/src/libmime/filter.c @@ -19,7 +19,7 @@ #include "rspamd.h" #include "message.h" #include "lua/lua_common.h" -#include "xxhash.h" +#include "cryptobox.h" #include <math.h> @@ -273,7 +273,8 @@ rspamd_action_from_str (const gchar *data, gint *result) { guint64 h; - h = XXH64 (data, strlen (data), 0xdeadbabe); + h = rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_XXHASH64, + data, strlen (data), 0xdeadbabe); switch (h) { case 0x9917BFDB46332B8CULL: /* reject */ diff --git a/src/libmime/message.c b/src/libmime/message.c index 6c4004f61..791bd6837 100644 --- a/src/libmime/message.c +++ b/src/libmime/message.c @@ -24,7 +24,7 @@ #include "email_addr.h" #include "utlist.h" #include "tokenizers/tokenizers.h" -#include "xxhash.h" +#include "cryptobox.h" #ifdef WITH_SNOWBALL #include "libstemmer.h" @@ -42,6 +42,7 @@ static const gchar gtube_pattern[] = "XJS*C4JDBQADN1.NSBN3*2IDNEN*" "GTUBE-STANDARD-ANTI-UBE-TEST-EMAIL*C.34X"; static rspamd_regexp_t *utf_compatible_re = NULL; +static const guint64 words_hash_seed = 0xdeadbabe; static GQuark rspamd_message_quark (void) @@ -1074,7 +1075,12 @@ rspamd_normalize_text_part (struct rspamd_task *task, } if (w->len > 0) { - h = XXH64 (w->begin, w->len, rspamd_hash_seed ()); + /* + * We use static hash seed if we would want to use that in shingles + * computation in future + */ + h = rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_HASHFAST_INDEPENDENT, + w->begin, w->len, words_hash_seed); g_array_append_val (part->normalized_hashes, h); } } @@ -1094,7 +1100,7 @@ rspamd_words_levenshtein_distance (struct rspamd_task *task, { guint s1len, s2len, x, y, lastdiag, olddiag; guint *column, ret; - guint64 *h1, *h2; + guint64 h1, h2; gint eq; static const guint max_words = 8192; @@ -1118,9 +1124,9 @@ rspamd_words_levenshtein_distance (struct rspamd_task *task, for (y = 1, lastdiag = x - 1; y <= s1len; y++) { olddiag = column[y]; - h1 = &g_array_index (w1, guint64, y - 1); - h2 = &g_array_index (w2, guint64, x - 1); - eq = h1 == h2; + h1 = g_array_index (w1, guint64, y - 1); + h2 = g_array_index (w2, guint64, x - 1); + eq = (h1 == h2) ? 1 : 0; /* * Cost of replacement is twice higher than cost of add/delete * to calculate percentage properly @@ -1262,7 +1268,6 @@ process_text_part (struct rspamd_task *task, type, text_part); text_part->orig = part_content; - rspamd_url_text_extract (task->task_pool, task, text_part, FALSE); g_ptr_array_add (task->text_parts, text_part); } else { @@ -1304,6 +1309,10 @@ process_text_part (struct rspamd_task *task, c = p + 1; } } + + if (!IS_PART_HTML (text_part)) { + rspamd_url_text_extract (task->task_pool, task, text_part, FALSE); + } } struct mime_foreach_data { diff --git a/src/libserver/cfg_file.h b/src/libserver/cfg_file.h index cd6d25683..e14fbd90a 100644 --- a/src/libserver/cfg_file.h +++ b/src/libserver/cfg_file.h @@ -277,7 +277,7 @@ struct rspamd_config { gchar *pid_file; /**< name of pid file */ gchar *temp_dir; /**< dir for temp files */ gchar *control_socket_path; /**< path to the control socket */ - gchar *local_addrs; /**< tree of local addresses */ + const ucl_object_t *local_addrs; /**< tree of local addresses */ #ifdef WITH_GPERF_TOOLS gchar *profile_path; #endif @@ -353,7 +353,6 @@ struct rspamd_config { gint clock_res; /**< resolution of clock used */ GList *maps; /**< maps active */ - rspamd_mempool_t *map_pool; /**< static maps pool */ gdouble map_timeout; /**< maps watch timeout */ struct symbols_cache *cache; /**< symbols cache object */ diff --git a/src/libserver/cfg_rcl.c b/src/libserver/cfg_rcl.c index b90c1116b..f774ac126 100644 --- a/src/libserver/cfg_rcl.c +++ b/src/libserver/cfg_rcl.c @@ -1894,7 +1894,7 @@ rspamd_rcl_config_init (struct rspamd_config *cfg) "Limit of files count in `cores_dir`"); rspamd_rcl_add_default_handler (sub, "local_addrs", - rspamd_rcl_parse_struct_string, + rspamd_rcl_parse_struct_ucl, G_STRUCT_OFFSET (struct rspamd_config, local_addrs), 0, "Use the specified addresses as local ones"); @@ -2976,13 +2976,13 @@ static guint rspamd_worker_param_key_hash (gconstpointer p) { const struct rspamd_worker_param_key *k = p; - XXH64_state_t st; + rspamd_cryptobox_fast_hash_state_t st; - XXH64_reset (&st, rspamd_hash_seed ()); - XXH64_update (&st, k->name, strlen (k->name)); - XXH64_update (&st, &k->ptr, sizeof (gpointer)); + rspamd_cryptobox_fast_hash_init (&st, rspamd_hash_seed ()); + rspamd_cryptobox_fast_hash_update (&st, k->name, strlen (k->name)); + rspamd_cryptobox_fast_hash_update (&st, &k->ptr, sizeof (gpointer)); - return XXH64_digest (&st); + return rspamd_cryptobox_fast_hash_final (&st); } static gboolean diff --git a/src/libserver/cfg_utils.c b/src/libserver/cfg_utils.c index 8cc54c792..85fd6af80 100644 --- a/src/libserver/cfg_utils.c +++ b/src/libserver/cfg_utils.c @@ -42,13 +42,11 @@ struct rspamd_ucl_map_cbdata { struct rspamd_config *cfg; GString *buf; }; -static gchar * rspamd_ucl_read_cb (rspamd_mempool_t * pool, - gchar * chunk, +static gchar * rspamd_ucl_read_cb (gchar * chunk, gint len, struct map_cb_data *data, gboolean final); -static void rspamd_ucl_fin_cb (rspamd_mempool_t * pool, - struct map_cb_data *data); +static void rspamd_ucl_fin_cb (struct map_cb_data *data); gboolean rspamd_parse_bind_line (struct rspamd_config *cfg, @@ -1135,8 +1133,7 @@ rspamd_config_check_statfiles (struct rspamd_classifier_config *cf) } static gchar * -rspamd_ucl_read_cb (rspamd_mempool_t * pool, - gchar * chunk, +rspamd_ucl_read_cb (gchar * chunk, gint len, struct map_cb_data *data, gboolean final) @@ -1157,7 +1154,7 @@ rspamd_ucl_read_cb (rspamd_mempool_t * pool, } static void -rspamd_ucl_fin_cb (rspamd_mempool_t * pool, struct map_cb_data *data) +rspamd_ucl_fin_cb (struct map_cb_data *data) { struct rspamd_ucl_map_cbdata *cbdata = data->cur_data, *prev = data->prev_data; @@ -1180,34 +1177,26 @@ rspamd_ucl_fin_cb (rspamd_mempool_t * pool, struct map_cb_data *data) return; } - checksum = XXH64 (cbdata->buf->str, cbdata->buf->len, 0); - if (data->map->checksum != checksum) { - /* New data available */ - parser = ucl_parser_new (0); - if (!ucl_parser_add_chunk (parser, cbdata->buf->str, + checksum = rspamd_cryptobox_fast_hash (cbdata->buf->str, cbdata->buf->len, 0); + /* New data available */ + parser = ucl_parser_new (0); + if (!ucl_parser_add_chunk (parser, cbdata->buf->str, cbdata->buf->len)) { - msg_err_config ("cannot parse map %s: %s", - data->map->uri, + msg_err_config ("cannot parse map %s: %s", + data->map->name, ucl_parser_get_error (parser)); - ucl_parser_free (parser); - } - else { - obj = ucl_parser_get_object (parser); - ucl_parser_free (parser); - it = NULL; - - while ((cur = ucl_object_iterate (obj, &it, true))) { - ucl_object_replace_key (cbdata->cfg->rcl_obj, (ucl_object_t *)cur, - cur->key, cur->keylen, false); - } - ucl_object_unref (obj); - data->map->checksum = checksum; - } + ucl_parser_free (parser); } else { - msg_info_config ("do not reload map %s, checksum is the same: %d", - data->map->uri, - checksum); + obj = ucl_parser_get_object (parser); + ucl_parser_free (parser); + it = NULL; + + while ((cur = ucl_object_iterate (obj, &it, true))) { + ucl_object_replace_key (cbdata->cfg->rcl_obj, (ucl_object_t *)cur, + cur->key, cur->keylen, false); + } + ucl_object_unref (obj); } } diff --git a/src/libserver/dynamic_cfg.c b/src/libserver/dynamic_cfg.c index 3a76f20f9..d31418588 100644 --- a/src/libserver/dynamic_cfg.c +++ b/src/libserver/dynamic_cfg.c @@ -134,8 +134,7 @@ apply_dynamic_conf (const ucl_object_t *top, struct rspamd_config *cfg) /* Callbacks for reading json dynamic rules */ static gchar * -json_config_read_cb (rspamd_mempool_t * pool, - gchar * chunk, +json_config_read_cb (gchar * chunk, gint len, struct map_cb_data *data, gboolean final) @@ -167,7 +166,7 @@ json_config_read_cb (rspamd_mempool_t * pool, } static void -json_config_fin_cb (rspamd_mempool_t * pool, struct map_cb_data *data) +json_config_fin_cb (struct map_cb_data *data) { struct config_json_buf *jb; ucl_object_t *top; diff --git a/src/libserver/events.c b/src/libserver/events.c index a8cb086cb..44a5d9191 100644 --- a/src/libserver/events.c +++ b/src/libserver/events.c @@ -16,7 +16,7 @@ #include "config.h" #include "rspamd.h" #include "events.h" -#include "xxhash.h" +#include "cryptobox.h" #define RSPAMD_SESSION_FLAG_WATCHING (1 << 0) #define RSPAMD_SESSION_FLAG_DESTROYING (1 << 1) @@ -81,7 +81,7 @@ static guint rspamd_event_hash (gconstpointer a) { const struct rspamd_async_event *ev = a; - XXH64_state_t st; + rspamd_cryptobox_fast_hash_state_t st; union { event_finalizer_t f; gpointer p; @@ -89,11 +89,11 @@ rspamd_event_hash (gconstpointer a) u.f = ev->fin; - XXH64_reset (&st, rspamd_hash_seed ()); - XXH64_update (&st, &ev->user_data, sizeof (gpointer)); - XXH64_update (&st, &u, sizeof (u)); + rspamd_cryptobox_fast_hash_init (&st, rspamd_hash_seed ()); + rspamd_cryptobox_fast_hash_update (&st, &ev->user_data, sizeof (gpointer)); + rspamd_cryptobox_fast_hash_update (&st, &u, sizeof (u)); - return XXH64_digest (&st); + return rspamd_cryptobox_fast_hash_final (&st); } diff --git a/src/libserver/fuzzy_backend.c b/src/libserver/fuzzy_backend.c index a13d27f09..26e595e5f 100644 --- a/src/libserver/fuzzy_backend.c +++ b/src/libserver/fuzzy_backend.c @@ -93,6 +93,7 @@ enum rspamd_fuzzy_statement_idx { RSPAMD_FUZZY_BACKEND_EXPIRE, RSPAMD_FUZZY_BACKEND_VACUUM, RSPAMD_FUZZY_BACKEND_DELETE_ORPHANED, + RSPAMD_FUZZY_BACKEND_VERSION, RSPAMD_FUZZY_BACKEND_MAX }; static struct rspamd_fuzzy_stmts { @@ -212,6 +213,13 @@ static struct rspamd_fuzzy_stmts { .stmt = NULL, .result = SQLITE_DONE }, + { + .idx = RSPAMD_FUZZY_BACKEND_VERSION, + .sql = "PRAGMA user_version;", + .args = "", + .stmt = NULL, + .result = SQLITE_ROW + }, }; static GQuark @@ -624,7 +632,8 @@ rspamd_fuzzy_backend_prepare_update (struct rspamd_fuzzy_backend *backend) gboolean rspamd_fuzzy_backend_add (struct rspamd_fuzzy_backend *backend, - const struct rspamd_fuzzy_cmd *cmd) + const struct rspamd_fuzzy_cmd *cmd, + time_t timestamp) { int rc, i; gint64 id, flag; @@ -680,7 +689,7 @@ rspamd_fuzzy_backend_add (struct rspamd_fuzzy_backend *backend, (gint) cmd->flag, cmd->digest, (gint64) cmd->value, - (gint64) time (NULL)); + (gint64) timestamp); if (rc == SQLITE_OK) { if (cmd->shingles_count > 0) { @@ -722,7 +731,9 @@ rspamd_fuzzy_backend_add (struct rspamd_fuzzy_backend *backend, gboolean rspamd_fuzzy_backend_finish_update (struct rspamd_fuzzy_backend *backend) { - gint rc, wal_frames, wal_checkpointed; + gint rc, wal_frames, wal_checkpointed, ver; + gint64 version = 0; + gchar version_buf[128]; rc = rspamd_fuzzy_backend_run_stmt (backend, TRUE, RSPAMD_FUZZY_BACKEND_TRANSACTION_COMMIT); @@ -745,6 +756,17 @@ rspamd_fuzzy_backend_finish_update (struct rspamd_fuzzy_backend *backend) } } + /* Get and update version */ + ver = rspamd_fuzzy_backend_version (backend); + ++ver; + rspamd_snprintf (version_buf, sizeof (version_buf), "PRAGMA user_version=%d;", + ver); + + if (sqlite3_exec (backend->db, version_buf, NULL, NULL, NULL) != SQLITE_OK) { + msg_err_fuzzy_backend ("cannot set database version to %L: %s", + version, sqlite3_errmsg (backend->db)); + } + return TRUE; } @@ -953,6 +975,24 @@ rspamd_fuzzy_backend_count (struct rspamd_fuzzy_backend *backend) return 0; } +gint +rspamd_fuzzy_backend_version (struct rspamd_fuzzy_backend *backend) +{ + gint ret = 0; + + if (backend) { + if (rspamd_fuzzy_backend_run_stmt (backend, FALSE, + RSPAMD_FUZZY_BACKEND_VERSION) == SQLITE_OK) { + ret = sqlite3_column_int64 ( + prepared_stmts[RSPAMD_FUZZY_BACKEND_VERSION].stmt, 0); + } + + rspamd_fuzzy_backend_cleanup_stmt (backend, RSPAMD_FUZZY_BACKEND_VERSION); + } + + return ret; +} + gsize rspamd_fuzzy_backend_expired (struct rspamd_fuzzy_backend *backend) { diff --git a/src/libserver/fuzzy_backend.h b/src/libserver/fuzzy_backend.h index 04da9bbba..bcd199d1a 100644 --- a/src/libserver/fuzzy_backend.h +++ b/src/libserver/fuzzy_backend.h @@ -56,7 +56,8 @@ gboolean rspamd_fuzzy_backend_prepare_update (struct rspamd_fuzzy_backend *backe */ gboolean rspamd_fuzzy_backend_add ( struct rspamd_fuzzy_backend *backend, - const struct rspamd_fuzzy_cmd *cmd); + const struct rspamd_fuzzy_cmd *cmd, + time_t timestamp); /** * Delete digest from the database @@ -89,6 +90,7 @@ gboolean rspamd_fuzzy_backend_sync (struct rspamd_fuzzy_backend *backend, void rspamd_fuzzy_backend_close (struct rspamd_fuzzy_backend *backend); gsize rspamd_fuzzy_backend_count (struct rspamd_fuzzy_backend *backend); +gint rspamd_fuzzy_backend_version (struct rspamd_fuzzy_backend *backend); gsize rspamd_fuzzy_backend_expired (struct rspamd_fuzzy_backend *backend); const gchar * rspamd_fuzzy_backend_id (struct rspamd_fuzzy_backend *backend); diff --git a/src/libserver/protocol.c b/src/libserver/protocol.c index a394ddaff..d314d3fdc 100644 --- a/src/libserver/protocol.c +++ b/src/libserver/protocol.c @@ -23,7 +23,7 @@ #include "http.h" #include "email_addr.h" #include "worker_private.h" -#include "xxhash.h" +#include "cryptobox.h" /* Max line size */ #define OUTBUFSIZ BUFSIZ @@ -401,7 +401,8 @@ rspamd_protocol_handle_headers (struct rspamd_task *task, guint64 h; guint32 *hp; - h = XXH64 (hv_tok->begin, hv_tok->len, 0xdeadbabe); + h = rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_XXHASH64, + hv_tok->begin, hv_tok->len, 0xdeadbabe); hp = rspamd_mempool_alloc (task->task_pool, sizeof (*hp)); memcpy (hp, &h, sizeof (*hp)); rspamd_mempool_set_variable (task->task_pool, "settings_hash", diff --git a/src/libserver/re_cache.c b/src/libserver/re_cache.c index 5882114f9..3e308415d 100644 --- a/src/libserver/re_cache.c +++ b/src/libserver/re_cache.c @@ -15,7 +15,6 @@ */ #include "libmime/message.h" #include "re_cache.h" -#include "xxhash.h" #include "cryptobox.h" #include "ref.h" #include "libserver/url.h" @@ -123,16 +122,16 @@ rspamd_re_cache_class_id (enum rspamd_re_type type, gpointer type_data, gsize datalen) { - XXH64_state_t st; + rspamd_cryptobox_fast_hash_state_t st; - XXH64_reset (&st, 0xdeadbabe); - XXH64_update (&st, &type, sizeof (type)); + rspamd_cryptobox_fast_hash_init (&st, 0xdeadbabe); + rspamd_cryptobox_fast_hash_update (&st, &type, sizeof (type)); if (datalen > 0) { - XXH64_update (&st, type_data, datalen); + rspamd_cryptobox_fast_hash_update (&st, type_data, datalen); } - return XXH64_digest (&st); + return rspamd_cryptobox_fast_hash_final (&st); } static void @@ -1174,7 +1173,8 @@ rspamd_re_cache_type_from_string (const char *str) */ if (str != NULL) { - h = XXH64 (str, strlen (str), 0xdeadbabe); + h = rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_XXHASH64, + str, strlen (str), 0xdeadbabe); switch (h) { case G_GUINT64_CONSTANT(0x298b9c8a58887d44): /* header */ diff --git a/src/libserver/task.c b/src/libserver/task.c index 3d6387b52..ce95b927d 100644 --- a/src/libserver/task.c +++ b/src/libserver/task.c @@ -26,6 +26,11 @@ #include "utlist.h" #include <math.h> +/* + * Do not print more than this amount of elts + */ +static const int max_log_elts = 7; + static GQuark rspamd_task_quark (void) { @@ -752,7 +757,7 @@ rspamd_task_log_metric_res (struct rspamd_task *task, rspamd_fstring_t *symbuf; struct symbol *sym; GPtrArray *sorted_symbols; - guint i; + guint i, j; mres = g_hash_table_lookup (task->results, DEFAULT_METRIC); @@ -810,8 +815,16 @@ rspamd_task_log_metric_res (struct rspamd_task *task, rspamd_printf_fstring (&symbuf, "{"); + j = 0; + for (cur = sym->options; cur != NULL; cur = g_list_next (cur)) { rspamd_printf_fstring (&symbuf, "%s;", cur->data); + + if (j >= max_log_elts) { + rspamd_printf_fstring (&symbuf, "...;"); + break; + } + j ++; } rspamd_printf_fstring (&symbuf, "}"); @@ -892,6 +905,7 @@ rspamd_task_write_ialist (struct rspamd_task *task, lim = internet_address_list_length (ialist); } + varbuf = rspamd_fstring_new (); for (i = 0; i < lim; i++) { @@ -908,6 +922,11 @@ rspamd_task_write_ialist (struct rspamd_task *task, varbuf = rspamd_fstring_append (varbuf, ",", 1); } } + + if (i >= max_log_elts) { + varbuf = rspamd_fstring_append (varbuf, "...", 3); + break; + } } if (varbuf->len > 0) { @@ -951,6 +970,11 @@ rspamd_task_write_addr_list (struct rspamd_task *task, varbuf = rspamd_fstring_append (varbuf, ",", 1); } } + + if (i >= max_log_elts) { + varbuf = rspamd_fstring_append (varbuf, "...", 3); + break; + } } if (varbuf->len > 0) { diff --git a/src/libserver/url.c b/src/libserver/url.c index fe70585be..70a5f3c9b 100644 --- a/src/libserver/url.c +++ b/src/libserver/url.c @@ -1795,10 +1795,9 @@ url_tld_end (struct url_callback_data *cb, { const gchar *p; - /* A url must be finished by tld, so it must be followed by space character */ p = pos + match->m_len; - if (p == cb->end || g_ascii_isspace (*p) || *p == ',') { + if (p == cb->end) { match->m_len = p - match->m_begin; return TRUE; } @@ -2302,7 +2301,7 @@ rspamd_url_text_extract (rspamd_mempool_t *pool, { struct rspamd_url_mimepart_cbdata mcbd; - if (part->content == NULL || part->content->len == 0) { + if (part->stripped_content == NULL || part->stripped_content->len == 0) { msg_warn_task ("got empty text part"); return; } @@ -2310,8 +2309,8 @@ rspamd_url_text_extract (rspamd_mempool_t *pool, mcbd.task = task; mcbd.part = part; - rspamd_url_find_multiple (task->task_pool, part->content->data, - part->content->len, is_html, + rspamd_url_find_multiple (task->task_pool, part->stripped_content->data, + part->stripped_content->len, is_html, rspamd_url_text_part_callback, &mcbd); /* Handle offsets of this part */ diff --git a/src/libstat/tokenizers/osb.c b/src/libstat/tokenizers/osb.c index 906c1de25..c2e050f23 100644 --- a/src/libstat/tokenizers/osb.c +++ b/src/libstat/tokenizers/osb.c @@ -19,7 +19,6 @@ #include "tokenizers.h" #include "stat_internal.h" -#include "xxhash.h" #include "cryptobox.h" /* Size for features pipe */ @@ -280,7 +279,8 @@ rspamd_tokenizer_osb (struct rspamd_stat_ctx *ctx, window_size = osb_cf->window_size; if (prefix) { - seed = XXH64 (prefix, strlen (prefix), osb_cf->seed); + seed = rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_XXHASH64, + prefix, strlen (prefix), osb_cf->seed); } else { seed = osb_cf->seed; @@ -300,7 +300,8 @@ rspamd_tokenizer_osb (struct rspamd_stat_ctx *ctx, else { /* We know that the words are normalized */ if (osb_cf->ht == RSPAMD_OSB_HASH_XXHASH) { - cur = XXH64 (token->begin, token->len, osb_cf->seed); + cur = rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_XXHASH64, + token->begin, token->len, osb_cf->seed); } else { rspamd_cryptobox_siphash ((guchar *)&cur, token->begin, diff --git a/src/libutil/addr.c b/src/libutil/addr.c index 8fc85e6c7..18414a98c 100644 --- a/src/libutil/addr.c +++ b/src/libutil/addr.c @@ -17,7 +17,7 @@ #include "addr.h" #include "util.h" #include "logger.h" -#include "xxhash.h" +#include "cryptobox.h" #include "radix.h" #include "unix-std.h" /* pwd and grp */ @@ -1352,28 +1352,28 @@ guint rspamd_inet_address_hash (gconstpointer a) { const rspamd_inet_addr_t *addr = a; - XXH64_state_t st; + rspamd_cryptobox_fast_hash_state_t st; - XXH64_reset (&st, rspamd_hash_seed ()); - XXH64_update (&st, &addr->af, sizeof (addr->af)); + rspamd_cryptobox_fast_hash_init (&st, rspamd_hash_seed ()); + rspamd_cryptobox_fast_hash_update (&st, &addr->af, sizeof (addr->af)); if (addr->af == AF_UNIX && addr->u.un) { - XXH64_update (&st, addr->u.un, sizeof (*addr->u.un)); + rspamd_cryptobox_fast_hash_update (&st, addr->u.un, sizeof (*addr->u.un)); } else { /* We ignore port part here */ if (addr->af == AF_INET) { - XXH64_update (&st, &addr->u.in.addr.s4.sin_addr, + rspamd_cryptobox_fast_hash_update (&st, &addr->u.in.addr.s4.sin_addr, sizeof (addr->u.in.addr.s4.sin_addr)); } else { - XXH64_update (&st, &addr->u.in.addr.s6.sin6_addr, + rspamd_cryptobox_fast_hash_update (&st, &addr->u.in.addr.s6.sin6_addr, sizeof (addr->u.in.addr.s6.sin6_addr)); } } - return XXH64_digest (&st); + return rspamd_cryptobox_fast_hash_final (&st); } gboolean diff --git a/src/libutil/bloom.c b/src/libutil/bloom.c index 8c8b80ae6..2447b1b10 100644 --- a/src/libutil/bloom.c +++ b/src/libutil/bloom.c @@ -15,7 +15,7 @@ */ #include "config.h" #include "bloom.h" -#include "xxhash.h" +#include "cryptobox.h" /* 4 bits are used for counting (implementing delete operation) */ #define SIZE_BIT 4 @@ -107,7 +107,8 @@ rspamd_bloom_add (rspamd_bloom_filter_t * bloom, const gchar *s) } len = strlen (s); for (n = 0; n < bloom->nfuncs; ++n) { - v = XXH64 (s, len, bloom->seeds[n]) % bloom->asize; + v = rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_XXHASH64, + s, len, bloom->seeds[n]) % bloom->asize; INCBIT (bloom->a, v, t); } @@ -126,7 +127,8 @@ rspamd_bloom_del (rspamd_bloom_filter_t * bloom, const gchar *s) } len = strlen (s); for (n = 0; n < bloom->nfuncs; ++n) { - v = XXH64 (s, len, bloom->seeds[n]) % bloom->asize; + v = rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_XXHASH64, + s, len, bloom->seeds[n]) % bloom->asize; DECBIT (bloom->a, v, t); } @@ -145,7 +147,8 @@ rspamd_bloom_check (rspamd_bloom_filter_t * bloom, const gchar *s) } len = strlen (s); for (n = 0; n < bloom->nfuncs; ++n) { - v = XXH64 (s, len, bloom->seeds[n]) % bloom->asize; + v = rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_XXHASH64, + s, len, bloom->seeds[n]) % bloom->asize; if (!(GETBIT (bloom->a, v))) { return FALSE; } diff --git a/src/libutil/fstring.c b/src/libutil/fstring.c index 997dd6b46..285940f9c 100644 --- a/src/libutil/fstring.c +++ b/src/libutil/fstring.c @@ -400,3 +400,19 @@ rspamd_ftok_cstr_equal (const rspamd_ftok_t *s, const gchar *pat, return (rspamd_ftok_cmp (s, &srch) == 0); } + +gchar * +rspamd_ftokdup (const rspamd_ftok_t *src) +{ + gchar *newstr; + + if (src == NULL) { + return NULL; + } + + newstr = g_malloc (src->len + 1); + memcpy (newstr, src->begin, src->len); + newstr[src->len] = '\0'; + + return newstr; +} diff --git a/src/libutil/fstring.h b/src/libutil/fstring.h index 127557e40..10916d876 100644 --- a/src/libutil/fstring.h +++ b/src/libutil/fstring.h @@ -153,4 +153,12 @@ rspamd_ftok_t *rspamd_ftok_map (const rspamd_fstring_t *s); rspamd_fstring_t * rspamd_fstring_grow (rspamd_fstring_t *str, gsize needed_len) G_GNUC_WARN_UNUSED_RESULT; +/** + * Copies ftok to zero terminated string (must be freed using g_free) + * @param src + * @return + */ +gchar *rspamd_ftokdup (const rspamd_ftok_t *src) G_GNUC_WARN_UNUSED_RESULT; + + #endif diff --git a/src/libutil/logger.c b/src/libutil/logger.c index 21e489b09..f81730448 100644 --- a/src/libutil/logger.c +++ b/src/libutil/logger.c @@ -18,7 +18,7 @@ #include "util.h" #include "rspamd.h" #include "map.h" -#include "xxhash.h" +#include "cryptobox.h" #include "unix-std.h" #ifdef HAVE_SYSLOG_H @@ -82,12 +82,6 @@ static rspamd_logger_t *default_logger = NULL; } \ } while (0) -#if defined(__LP64__) || defined(_LP64) -#define XXH_ONESHOT XXH64 -#else -#define XXH_ONESHOT XXH32 -#endif - static void syslog_log_function (const gchar *log_domain, const gchar *module, const gchar *id, const gchar *function, @@ -106,7 +100,7 @@ static void static inline guint64 rspamd_log_calculate_cksum (const gchar *message, size_t mlen) { - return XXH_ONESHOT (message, mlen, rspamd_hash_seed ()); + return rspamd_cryptobox_fast_hash (message, mlen, rspamd_hash_seed ()); } /* diff --git a/src/libutil/map.c b/src/libutil/map.c index 8de6e76c2..f99c35784 100644 --- a/src/libutil/map.c +++ b/src/libutil/map.c @@ -35,10 +35,29 @@ #include <pcre2.h> #endif +#undef MAP_DEBUG_REFS +#ifdef MAP_DEBUG_REFS +#define MAP_RETAIN(x) do { \ + msg_err ("retain ref %p, refcount: %d -> %d", (x), (x)->ref.refcount, (x)->ref.refcount + 1); \ + REF_RETAIN(x); \ +} while (0) + +#define MAP_RELEASE(x) do { \ + msg_err ("release ref %p, refcount: %d -> %d", (x), (x)->ref.refcount, (x)->ref.refcount - 1); \ + REF_RELEASE(x); \ +} while (0) +#else +#define MAP_RETAIN REF_RETAIN +#define MAP_RELEASE REF_RELEASE +#endif + static const gchar *hash_fill = "1"; -static void free_http_cbdata_common (struct http_callback_data *cbd); +static void free_http_cbdata_common (struct http_callback_data *cbd, gboolean plan_new); static void free_http_cbdata_dtor (gpointer p); static void free_http_cbdata (struct http_callback_data *cbd); +static void rspamd_map_periodic_callback (gint fd, short what, void *ud); +static void rspamd_map_schedule_periodic (struct rspamd_map *map, gboolean locked, + gboolean initial, gboolean errored); /** * Write HTTP request */ @@ -47,9 +66,9 @@ write_http_request (struct http_callback_data *cbd) { gchar datebuf[128]; struct rspamd_http_message *msg; - rspamd_mempool_t *pool; + struct rspamd_map *map; - pool = cbd->map->pool; + map = cbd->map; if (cbd->fd != -1) { close (cbd->fd); @@ -60,10 +79,15 @@ write_http_request (struct http_callback_data *cbd) if (cbd->fd != -1) { msg = rspamd_http_new_message (HTTP_REQUEST); + if (cbd->check) { + msg->method = HTTP_HEAD; + } + if (cbd->stage == map_load_file) { msg->url = rspamd_fstring_new_init (cbd->data->path, strlen (cbd->data->path)); - if (cbd->data->last_checked != 0 && cbd->stage == map_load_file) { + if (cbd->check && + cbd->data->last_checked != 0 && cbd->stage == map_load_file) { rspamd_http_date_format (datebuf, sizeof (datebuf), cbd->data->last_checked); rspamd_http_message_add_header (msg, "If-Modified-Since", datebuf); @@ -83,11 +107,12 @@ write_http_request (struct http_callback_data *cbd) rspamd_http_connection_write_message (cbd->conn, msg, cbd->data->host, NULL, cbd, cbd->fd, &cbd->tv, cbd->ev_base); - REF_RETAIN (cbd); + MAP_RETAIN (cbd); } else { - msg_err_pool ("cannot connect to %s: %s", cbd->data->host, + msg_err_map ("cannot connect to %s: %s", cbd->data->host, strerror (errno)); + cbd->periodic->errored = TRUE; } } @@ -99,7 +124,6 @@ rspamd_map_check_sig_pk (const char *fname, struct rspamd_cryptobox_pubkey *pk) { gchar fpath[PATH_MAX]; - rspamd_mempool_t *pool = map->pool; guchar *data; GString *b32_key; gsize len = 0; @@ -109,12 +133,12 @@ rspamd_map_check_sig_pk (const char *fname, data = rspamd_file_xmap (fpath, PROT_READ, &len); if (data == NULL) { - msg_err_pool ("can't open signature %s: %s", fpath, strerror (errno)); + msg_err_map ("can't open signature %s: %s", fpath, strerror (errno)); return FALSE; } if (len != rspamd_cryptobox_signature_bytes (RSPAMD_CRYPTOBOX_MODE_25519)) { - msg_err_pool ("can't open signature %s: invalid signature", fpath); + msg_err_map ("can't open signature %s: invalid signature", fpath); munmap (data, len); return FALSE; @@ -122,7 +146,7 @@ rspamd_map_check_sig_pk (const char *fname, if (!rspamd_cryptobox_verify (data, input, inlen, rspamd_pubkey_get_pk (pk, NULL), RSPAMD_CRYPTOBOX_MODE_25519)) { - msg_err_pool ("can't verify signature %s: incorrect signature", fpath); + msg_err_map ("can't verify signature %s: incorrect signature", fpath); munmap (data, len); return FALSE; @@ -130,7 +154,7 @@ rspamd_map_check_sig_pk (const char *fname, b32_key = rspamd_pubkey_print (pk, RSPAMD_KEYPAIR_BASE32|RSPAMD_KEYPAIR_PUBKEY); - msg_info_pool ("verified signature in file %s using trusted key %v", + msg_info_map ("verified signature in file %s using trusted key %v", fpath, b32_key); g_string_free (b32_key, TRUE); @@ -141,25 +165,26 @@ rspamd_map_check_sig_pk (const char *fname, static gboolean rspamd_map_check_file_sig (const char *fname, - struct rspamd_map *map, const guchar *input, + struct rspamd_map *map, + struct rspamd_map_backend *bk, + const guchar *input, gsize inlen) { gchar fpath[PATH_MAX]; - rspamd_mempool_t *pool = map->pool; guchar *data; struct rspamd_cryptobox_pubkey *pk = NULL; GString *b32_key; gboolean ret; gsize len = 0; - if (map->trusted_pubkey == NULL) { + if (bk->trusted_pubkey == NULL) { /* Try to load and check pubkey */ rspamd_snprintf (fpath, sizeof (fpath), "%s.pub", fname); data = rspamd_file_xmap (fpath, PROT_READ, &len); if (data == NULL) { - msg_err_pool ("can't open pubkey %s: %s", fpath, strerror (errno)); + msg_err_map ("can't open pubkey %s: %s", fpath, strerror (errno)); return FALSE; } @@ -168,7 +193,7 @@ rspamd_map_check_file_sig (const char *fname, munmap (data, len); if (pk == NULL) { - msg_err_pool ("can't load pubkey %s", fpath); + msg_err_map ("can't load pubkey %s", fpath); return FALSE; } @@ -178,7 +203,7 @@ rspamd_map_check_file_sig (const char *fname, g_assert (b32_key != NULL); if (g_hash_table_lookup (map->cfg->trusted_keys, b32_key->str) == NULL) { - msg_err_pool ("pubkey loaded from %s is untrusted: %v", fpath, + msg_err_map ("pubkey loaded from %s is untrusted: %v", fpath, b32_key); g_string_free (b32_key, TRUE); rspamd_pubkey_unref (pk); @@ -189,7 +214,7 @@ rspamd_map_check_file_sig (const char *fname, g_string_free (b32_key, TRUE); } else { - pk = rspamd_pubkey_ref (map->trusted_pubkey); + pk = rspamd_pubkey_ref (bk->trusted_pubkey); } ret = rspamd_map_check_sig_pk (fname, map, input, inlen, pk); @@ -202,10 +227,11 @@ rspamd_map_check_file_sig (const char *fname, * Callback for destroying HTTP callback data */ static void -free_http_cbdata_common (struct http_callback_data *cbd) +free_http_cbdata_common (struct http_callback_data *cbd, gboolean plan_new) { char fpath[PATH_MAX]; struct stat st; + struct map_periodic_cbdata *periodic = cbd->periodic; if (cbd->out_fd != -1) { close (cbd->out_fd); @@ -243,7 +269,8 @@ free_http_cbdata_common (struct http_callback_data *cbd) rspamd_inet_address_destroy (cbd->addr); } - g_atomic_int_set (cbd->map->locked, 0); + MAP_RELEASE (cbd->bk); + MAP_RELEASE (periodic); g_slice_free1 (sizeof (struct http_callback_data), cbd); } @@ -253,18 +280,18 @@ free_http_cbdata (struct http_callback_data *cbd) cbd->map->dtor = NULL; cbd->map->dtor_data = NULL; - free_http_cbdata_common (cbd); + free_http_cbdata_common (cbd, TRUE); } static void free_http_cbdata_dtor (gpointer p) { struct http_callback_data *cbd = p; - rspamd_mempool_t *pool; + struct rspamd_map *map; - pool = cbd->map->pool; - msg_warn_pool ("connection with http server is terminated: worker is stopping"); - free_http_cbdata_common (cbd); + map = cbd->map; + msg_warn_map ("connection with http server is terminated: worker is stopping"); + free_http_cbdata_common (cbd, FALSE); } /* @@ -275,13 +302,13 @@ http_map_error (struct rspamd_http_connection *conn, GError *err) { struct http_callback_data *cbd = conn->ud; - rspamd_mempool_t *pool; - - pool = cbd->map->pool; + struct rspamd_map *map; - msg_err_pool ("connection with http server terminated incorrectly: %s", - err->message); - REF_RELEASE (cbd); + map = cbd->map; + cbd->periodic->errored = TRUE; + msg_err_map ("connection with http server terminated incorrectly: %e", err); + rspamd_map_periodic_callback (-1, EV_TIMEOUT, cbd->periodic); + MAP_RELEASE (cbd); } static int @@ -290,17 +317,27 @@ http_map_finish (struct rspamd_http_connection *conn, { struct http_callback_data *cbd = conn->ud; struct rspamd_map *map; - rspamd_mempool_t *pool; + struct rspamd_map_backend *bk; char fpath[PATH_MAX]; guchar *aux_data, *in = NULL; gsize inlen = 0; struct stat st; map = cbd->map; - pool = cbd->map->pool; + bk = cbd->bk; if (msg->code == 200) { + if (cbd->check) { + cbd->periodic->need_modify = TRUE; + /* Reset the whole chain */ + cbd->periodic->cur_backend = 0; + rspamd_map_periodic_callback (-1, EV_TIMEOUT, cbd->periodic); + MAP_RELEASE (cbd); + + return 0; + } + if (cbd->stage == map_load_file) { if (msg->last_modified) { cbd->data->last_checked = msg->last_modified; @@ -310,13 +347,13 @@ http_map_finish (struct rspamd_http_connection *conn, } /* Maybe we need to check signature ? */ - if (map->is_signed) { + if (bk->is_signed) { close (cbd->out_fd); - if (map->trusted_pubkey) { + if (bk->trusted_pubkey) { /* No need to load key */ cbd->stage = map_load_signature; - cbd->pk = rspamd_pubkey_ref (map->trusted_pubkey); + cbd->pk = rspamd_pubkey_ref (bk->trusted_pubkey); rspamd_snprintf (fpath, sizeof (fpath), "%s.sig", cbd->tmpfile); } @@ -329,24 +366,25 @@ http_map_finish (struct rspamd_http_connection *conn, cbd->out_fd = rspamd_file_xopen (fpath, O_RDWR|O_CREAT, 00644); if (cbd->out_fd == -1) { - msg_err_pool ("cannot open pubkey file %s for writing: %s", + msg_err_map ("cannot open pubkey file %s for writing: %s", fpath, strerror (errno)); - goto end; + goto err; } rspamd_http_connection_reset (cbd->conn); write_http_request (cbd); + MAP_RELEASE (cbd); - goto end; + return 0; } else { /* Unsinged version - just open file */ in = rspamd_file_xmap (cbd->tmpfile, PROT_READ, &inlen); if (in == NULL) { - msg_err_pool ("cannot read tempfile %s: %s", cbd->tmpfile, + msg_err_map ("cannot read tempfile %s: %s", cbd->tmpfile, strerror (errno)); - goto end; + goto err; } } } @@ -355,9 +393,9 @@ http_map_finish (struct rspamd_http_connection *conn, (void)lseek (cbd->out_fd, 0, SEEK_SET); if (fstat (cbd->out_fd, &st) == -1) { - msg_err_pool ("cannot stat pubkey file %s: %s", + msg_err_map ("cannot stat pubkey file %s: %s", fpath, strerror (errno)); - goto end; + goto err; } aux_data = mmap (NULL, st.st_size, PROT_READ, MAP_SHARED, @@ -366,9 +404,9 @@ http_map_finish (struct rspamd_http_connection *conn, cbd->out_fd = -1; if (aux_data == MAP_FAILED) { - msg_err_pool ("cannot map pubkey file %s: %s", + msg_err_map ("cannot map pubkey file %s: %s", fpath, strerror (errno)); - goto end; + goto err; } cbd->pk = rspamd_pubkey_from_base32 (aux_data, st.st_size, @@ -376,25 +414,26 @@ http_map_finish (struct rspamd_http_connection *conn, munmap (aux_data, st.st_size); if (cbd->pk == NULL) { - msg_err_pool ("cannot load pubkey file %s: bad pubkey", + msg_err_map ("cannot load pubkey file %s: bad pubkey", fpath); - goto end; + goto err; } rspamd_snprintf (fpath, sizeof (fpath), "%s.sig", cbd->tmpfile); cbd->out_fd = rspamd_file_xopen (fpath, O_RDWR|O_CREAT, 00644); if (cbd->out_fd == -1) { - msg_err_pool ("cannot open signature file %s for writing: %s", + msg_err_map ("cannot open signature file %s for writing: %s", fpath, strerror (errno)); - goto end; + goto err; } cbd->stage = map_load_signature; rspamd_http_connection_reset (cbd->conn); write_http_request (cbd); + MAP_RELEASE (cbd); - goto end; + return 0; } else if (cbd->stage == map_load_signature) { /* We can now check signature */ @@ -404,26 +443,25 @@ http_map_finish (struct rspamd_http_connection *conn, in = rspamd_file_xmap (cbd->tmpfile, PROT_READ, &inlen); if (in == NULL) { - msg_err_pool ("cannot read tempfile %s: %s", cbd->tmpfile, + msg_err_map ("cannot read tempfile %s: %s", cbd->tmpfile, strerror (errno)); - goto end; + goto err; } if (!rspamd_map_check_sig_pk (cbd->tmpfile, map, in, inlen, cbd->pk)) { - goto end; + goto err; } } g_assert (in != NULL); - map->read_callback (map->pool, in, inlen, &cbd->cbdata, TRUE); - map->fin_callback (map->pool, &cbd->cbdata); - - *map->user_data = cbd->cbdata.cur_data; - msg_info_pool ("read map data from %s", cbd->data->host); + map->read_callback (in, inlen, &cbd->periodic->cbdata, TRUE); + msg_info_map ("read map data from %s", cbd->data->host); + cbd->periodic->cur_backend ++; + rspamd_map_periodic_callback (-1, EV_TIMEOUT, cbd->periodic); } - else if (msg->code == 304 && cbd->stage == map_load_file) { - msg_debug_pool ("data is not modified for server %s", + else if (msg->code == 304 && (cbd->check && cbd->stage == map_load_file)) { + msg_debug_map ("data is not modified for server %s", cbd->data->host); if (msg->last_modified) { @@ -434,12 +472,17 @@ http_map_finish (struct rspamd_http_connection *conn, } } else { - msg_info_pool ("cannot load map %s from %s: HTTP error %d", - map->uri, cbd->data->host, msg->code); + msg_info_map ("cannot load map %s from %s: HTTP error %d", + bk->uri, cbd->data->host, msg->code); } -end: - REF_RELEASE (cbd); + MAP_RELEASE (cbd); + return 0; + +err: + cbd->periodic->errored = 1; + rspamd_map_periodic_callback (-1, EV_TIMEOUT, cbd->periodic); + MAP_RELEASE (cbd); return 0; } @@ -451,18 +494,18 @@ http_map_read (struct rspamd_http_connection *conn, gsize len) { struct http_callback_data *cbd = conn->ud; - rspamd_mempool_t *pool; + struct rspamd_map *map; if (msg->code != 200 || len == 0) { /* Ignore not full replies */ return 0; } - pool = cbd->map->pool; + map = cbd->map; if (write (cbd->out_fd, chunk, len) == -1) { - msg_err_pool ("cannot write to %s: %s", cbd->tmpfile, strerror (errno)); - REF_RELEASE (cbd); + msg_err_map ("cannot write to %s: %s", cbd->tmpfile, strerror (errno)); + MAP_RELEASE (cbd); return -1; } @@ -474,38 +517,32 @@ http_map_read (struct rspamd_http_connection *conn, * Callback for reading data from file */ static gboolean -read_map_file (struct rspamd_map *map, struct file_map_data *data) +read_map_file (struct rspamd_map *map, struct file_map_data *data, + struct rspamd_map_backend *bk, struct map_periodic_cbdata *periodic) { - struct map_cb_data cbdata; guchar *bytes; gsize len; - rspamd_mempool_t *pool = map->pool; if (map->read_callback == NULL || map->fin_callback == NULL) { - msg_err_pool ("bad callback for reading map file"); + msg_err_map ("bad callback for reading map file"); return FALSE; } if (access (data->filename, R_OK) == -1) { /* File does not exist, skipping */ - msg_err_pool ("map file is unavailable for reading"); - return FALSE; + msg_err_map ("map file is unavailable for reading"); + return TRUE; } bytes = rspamd_file_xmap (data->filename, PROT_READ, &len); if (bytes == NULL) { - msg_err_pool ("can't open map %s: %s", data->filename, strerror (errno)); + msg_err_map ("can't open map %s: %s", data->filename, strerror (errno)); return FALSE; } - cbdata.state = 0; - cbdata.prev_data = *map->user_data; - cbdata.cur_data = NULL; - cbdata.map = map; - - if (map->is_signed) { - if (!rspamd_map_check_file_sig (data->filename, map, bytes, len)) { + if (bk->is_signed) { + if (!rspamd_map_check_file_sig (data->filename, map, bk, bytes, len)) { munmap (bytes, len); return FALSE; @@ -513,9 +550,7 @@ read_map_file (struct rspamd_map *map, struct file_map_data *data) } if (len > 0) { - map->read_callback (map->pool, bytes, len, &cbdata, TRUE); - map->fin_callback (map->pool, &cbdata); - *map->user_data = cbdata.cur_data; + map->read_callback (bytes, len, &periodic->cbdata, TRUE); } munmap (bytes, len); @@ -524,85 +559,74 @@ read_map_file (struct rspamd_map *map, struct file_map_data *data) } static void -jitter_timeout_event (struct rspamd_map *map, - gboolean locked, gboolean initial, gboolean errored) +rspamd_map_periodic_dtor (struct map_periodic_cbdata *periodic) { - const gdouble error_mult = 20.0, lock_mult = 0.5; - gdouble jittered_sec; - gdouble timeout; - - if (initial) { - timeout = 0.0; - } - else if (errored) { - timeout = map->cfg->map_timeout * error_mult; - } - else if (locked) { - timeout = map->cfg->map_timeout * lock_mult; + if (periodic->need_modify) { + /* We are done */ + periodic->map->fin_callback (&periodic->cbdata); + *periodic->map->user_data = periodic->cbdata.cur_data; } else { - timeout = map->cfg->map_timeout; + /* Not modified */ } - /* Plan event again with jitter */ - evtimer_del (&map->ev); - jittered_sec = rspamd_time_jitter (timeout, 0); - double_to_tv (jittered_sec, &map->tv); - - evtimer_add (&map->ev, &map->tv); + rspamd_map_schedule_periodic (periodic->map, FALSE, FALSE, FALSE); + g_atomic_int_set (periodic->map->locked, 0); + g_slice_free1 (sizeof (*periodic), periodic); } -/** - * Common file callback - */ static void -file_callback (gint fd, short what, void *ud) +rspamd_map_schedule_periodic (struct rspamd_map *map, + gboolean locked, gboolean initial, gboolean errored) { - struct rspamd_map *map = ud; - struct file_map_data *data = map->map_data; - struct stat st; - rspamd_mempool_t *pool; + const gdouble error_mult = 20.0, lock_mult = 0.5; + gdouble jittered_sec; + gdouble timeout; + struct map_periodic_cbdata *cbd; - pool = map->pool; + timeout = map->poll_timeout; - if (!g_atomic_int_compare_and_exchange (map->locked, 0, 1)) { - msg_debug_pool ( - "don't try to reread map as it is locked by other process, will reread it later"); - jitter_timeout_event (map, TRUE, FALSE, FALSE); - return; + if (initial) { + timeout = 0.0; } - if (stat (data->filename, &st) != -1 && - (st.st_mtime > data->st.st_mtime || data->st.st_mtime == -1)) { - /* File was modified since last check */ - memcpy (&data->st, &st, sizeof (struct stat)); + if (errored) { + timeout = map->poll_timeout * error_mult; } - else { - g_atomic_int_set (map->locked, 0); - jitter_timeout_event (map, FALSE, FALSE, FALSE); - return; + else if (locked) { + timeout = map->poll_timeout * lock_mult; } - msg_info_pool ("rereading map file %s", data->filename); + cbd = g_slice_alloc0 (sizeof (*cbd)); + cbd->cbdata.state = 0; + cbd->cbdata.prev_data = *map->user_data; + cbd->cbdata.cur_data = NULL; + cbd->cbdata.map = map; + cbd->map = map; + REF_INIT_RETAIN (cbd, rspamd_map_periodic_dtor); - if (!read_map_file (map, data)) { - jitter_timeout_event (map, FALSE, FALSE, TRUE); + if (initial) { + evtimer_set (&map->ev, rspamd_map_periodic_callback, cbd); + event_base_set (map->ev_base, &map->ev); } else { - jitter_timeout_event (map, FALSE, FALSE, FALSE); + evtimer_del (&map->ev); + evtimer_set (&map->ev, rspamd_map_periodic_callback, cbd); } - g_atomic_int_set (map->locked, 0); -} + jittered_sec = rspamd_time_jitter (timeout, 0); + double_to_tv (jittered_sec, &map->tv); + evtimer_add (&map->ev, &map->tv); +} static void rspamd_map_dns_callback (struct rdns_reply *reply, void *arg) { struct http_callback_data *cbd = arg; - rspamd_mempool_t *pool; + struct rspamd_map *map; - pool = cbd->map->pool; + map = cbd->map; if (reply->code == RDNS_RC_NOERROR) { /* @@ -641,36 +665,25 @@ rspamd_map_dns_callback (struct rdns_reply *reply, void *arg) } else { /* We could not resolve host, so cowardly fail here */ - msg_err_pool ("cannot resolve %s", cbd->data->host); + msg_err_map ("cannot resolve %s", cbd->data->host); } } - REF_RELEASE (cbd); + MAP_RELEASE (cbd); } /** * Async HTTP callback */ static void -http_callback (gint fd, short what, void *ud) +rspamd_map_common_http_callback (struct rspamd_map *map, struct rspamd_map_backend *bk, + struct map_periodic_cbdata *periodic, gboolean check) { - struct rspamd_map *map = ud; struct http_map_data *data; struct http_callback_data *cbd; - rspamd_mempool_t *pool; gchar tmpbuf[PATH_MAX]; - data = map->map_data; - pool = map->pool; - - if (!g_atomic_int_compare_and_exchange (map->locked, 0, 1)) { - msg_debug_pool ( - "don't try to reread map as it is locked by other process, will reread it later"); - jitter_timeout_event (map, TRUE, FALSE, FALSE); - return; - } - - /* Plan event */ + data = bk->data.hd; cbd = g_slice_alloc0 (sizeof (struct http_callback_data)); rspamd_snprintf (tmpbuf, sizeof (tmpbuf), @@ -679,10 +692,11 @@ http_callback (gint fd, short what, void *ud) cbd->out_fd = mkstemp (tmpbuf); if (cbd->out_fd == -1) { - g_slice_free1 (sizeof (*cbd), cbd); - msg_err_pool ("cannot create tempfile: %s", strerror (errno)); - jitter_timeout_event (map, FALSE, FALSE, TRUE); + msg_err_map ("cannot create tempfile: %s", strerror (errno)); g_atomic_int_set (map->locked, 0); + g_slice_free1 (sizeof (*cbd), cbd); + periodic->errored = TRUE; + rspamd_map_periodic_callback (-1, EV_TIMEOUT, periodic); return; } @@ -692,39 +706,165 @@ http_callback (gint fd, short what, void *ud) cbd->map = map; cbd->data = data; cbd->fd = -1; - cbd->cbdata.state = 0; - cbd->cbdata.prev_data = *cbd->map->user_data; - cbd->cbdata.cur_data = NULL; - cbd->cbdata.map = cbd->map; + cbd->check = check; + cbd->periodic = periodic; + MAP_RETAIN (periodic); + cbd->bk = bk; + MAP_RETAIN (bk); cbd->stage = map_resolve_host2; double_to_tv (map->cfg->map_timeout, &cbd->tv); REF_INIT_RETAIN (cbd, free_http_cbdata); - msg_debug_pool ("reading map data from %s", data->host); + msg_debug_map ("%s map data from %s", check ? "checking" : "reading", + data->host); /* Send both A and AAAA requests */ if (map->r->r) { if (rdns_make_request_full (map->r->r, rspamd_map_dns_callback, cbd, map->cfg->dns_timeout, map->cfg->dns_retransmits, 1, data->host, RDNS_REQUEST_A)) { - REF_RETAIN (cbd); + MAP_RETAIN (cbd); } if (rdns_make_request_full (map->r->r, rspamd_map_dns_callback, cbd, map->cfg->dns_timeout, map->cfg->dns_retransmits, 1, data->host, RDNS_REQUEST_AAAA)) { - REF_RETAIN (cbd); + MAP_RETAIN (cbd); } - jitter_timeout_event (map, FALSE, FALSE, FALSE); map->dtor = free_http_cbdata_dtor; map->dtor_data = cbd; } else { - msg_warn_pool ("cannot load map: DNS resolver is not initialized"); - jitter_timeout_event (map, FALSE, FALSE, TRUE); + msg_warn_map ("cannot load map: DNS resolver is not initialized"); + cbd->periodic->errored = TRUE; } - /* We don't need own ref as it is now refcounted by DNS requests */ - REF_RELEASE (cbd); + /* We don't need own ref as it is now ref counted by DNS handlers */ + MAP_RELEASE (cbd); +} + +static void +rspamd_map_http_check_callback (gint fd, short what, void *ud) +{ + struct map_periodic_cbdata *cbd = ud; + struct rspamd_map *map; + struct rspamd_map_backend *bk; + + map = cbd->map; + bk = g_ptr_array_index (cbd->map->backends, cbd->cur_backend); + + rspamd_map_common_http_callback (map, bk, cbd, TRUE); +} + +static void +rspamd_map_http_read_callback (gint fd, short what, void *ud) +{ + struct map_periodic_cbdata *cbd = ud; + struct rspamd_map *map; + struct rspamd_map_backend *bk; + + map = cbd->map; + bk = g_ptr_array_index (cbd->map->backends, cbd->cur_backend); + rspamd_map_common_http_callback (map, bk, cbd, FALSE); +} + +static void +rspamd_map_file_check_callback (gint fd, short what, void *ud) +{ + struct rspamd_map *map; + struct map_periodic_cbdata *periodic = ud; + struct file_map_data *data; + struct rspamd_map_backend *bk; + struct stat st; + + map = periodic->map; + + bk = g_ptr_array_index (map->backends, periodic->cur_backend); + data = bk->data.fd; + + if (stat (data->filename, &st) != -1 && + (st.st_mtime > data->st.st_mtime || data->st.st_mtime == -1)) { + /* File was modified since last check */ + memcpy (&data->st, &st, sizeof (struct stat)); + periodic->need_modify = TRUE; + periodic->cur_backend = 0; + rspamd_map_periodic_callback (-1, EV_TIMEOUT, periodic); + + return; + } + + /* Switch to the next backend */ + periodic->cur_backend ++; + rspamd_map_periodic_callback (-1, EV_TIMEOUT, periodic); +} + +static void +rspamd_map_file_read_callback (gint fd, short what, void *ud) +{ + struct rspamd_map *map; + struct map_periodic_cbdata *periodic = ud; + struct file_map_data *data; + struct rspamd_map_backend *bk; + + map = periodic->map; + + bk = g_ptr_array_index (map->backends, periodic->cur_backend); + data = bk->data.fd; + + msg_info_map ("rereading map file %s", data->filename); + + if (!read_map_file (map, data, bk, periodic)) { + periodic->errored = TRUE; + } + + /* Switch to the next backend */ + periodic->cur_backend ++; + rspamd_map_periodic_callback (-1, EV_TIMEOUT, periodic); +} + +static void +rspamd_map_periodic_callback (gint fd, short what, void *ud) +{ + struct rspamd_map_backend *bk; + struct map_periodic_cbdata *cbd = ud; + + if (cbd->errored) { + /* We should not check other backends if some backend has failed */ + rspamd_map_schedule_periodic (cbd->map, FALSE, FALSE, TRUE); + g_atomic_int_set (cbd->map->locked, 0); + MAP_RELEASE (cbd); + + return; + } + + /* For each backend we need to check for modifications */ + if (cbd->cur_backend >= cbd->map->backends->len) { + /* Last backend */ + MAP_RELEASE (cbd); + + return; + } + + bk = g_ptr_array_index (cbd->map->backends, cbd->cur_backend); + g_assert (bk != NULL); + + if (cbd->need_modify) { + /* Load data from the next backend */ + if (bk->protocol == MAP_PROTO_HTTP) { + rspamd_map_http_read_callback (fd, what, cbd); + } + else { + rspamd_map_file_read_callback (fd, what, cbd); + } + } + else { + /* Check the next backend */ + if (bk->protocol == MAP_PROTO_HTTP) { + rspamd_map_http_check_callback (fd, what, cbd); + } + else { + rspamd_map_file_check_callback (fd, what, cbd); + } + } } /* Start watching event for all maps */ @@ -735,29 +875,21 @@ rspamd_map_watch (struct rspamd_config *cfg, { GList *cur = cfg->maps; struct rspamd_map *map; - struct file_map_data *fdata; /* First of all do synced read of data */ while (cur) { map = cur->data; map->ev_base = ev_base; map->r = resolver; - event_base_set (map->ev_base, &map->ev); - if (map->protocol == MAP_PROTO_FILE) { - evtimer_set (&map->ev, file_callback, map); - /* Read initial data */ - fdata = map->map_data; - if (fdata->st.st_mtime != -1) { - /* Do not try to read non-existent file */ - read_map_file (map, map->map_data); - } - /* Plan event with jitter */ - jitter_timeout_event (map, FALSE, TRUE, FALSE); + if (!g_atomic_int_compare_and_exchange (map->locked, 0, 1)) { + msg_debug_map ( + "don't try to reread map as it is locked by other process, " + "will reread it later"); + rspamd_map_schedule_periodic (map, TRUE, TRUE, FALSE); } - else if (map->protocol == MAP_PROTO_HTTP) { - evtimer_set (&map->ev, http_callback, map); - jitter_timeout_event (map, FALSE, TRUE, FALSE); + else { + rspamd_map_schedule_periodic (map, FALSE, TRUE, FALSE); } cur = g_list_next (cur); @@ -769,10 +901,17 @@ rspamd_map_remove_all (struct rspamd_config *cfg) { struct rspamd_map *map; GList *cur; + struct rspamd_map_backend *bk; + guint i; for (cur = cfg->maps; cur != NULL; cur = g_list_next (cur)) { map = cur->data; + for (i = 0; i < map->backends->len; i ++) { + bk = g_ptr_array_index (map->backends, i); + MAP_RELEASE (bk); + } + if (map->dtor) { map->dtor (map->dtor_data); } @@ -780,26 +919,21 @@ rspamd_map_remove_all (struct rspamd_config *cfg) g_list_free (cfg->maps); cfg->maps = NULL; - - if (cfg->map_pool != NULL) { - rspamd_mempool_delete (cfg->map_pool); - cfg->map_pool = NULL; - } } static const gchar * rspamd_map_check_proto (struct rspamd_config *cfg, - const gchar *map_line, struct rspamd_map *map) + const gchar *map_line, struct rspamd_map_backend *bk) { const gchar *pos = map_line, *end, *end_key; - g_assert (map != NULL); + g_assert (bk != NULL); g_assert (pos != NULL); end = pos + strlen (pos); if (g_ascii_strncasecmp (pos, "sign+", sizeof ("sign+") - 1) == 0) { - map->is_signed = TRUE; + bk->is_signed = TRUE; pos += sizeof ("sign+") - 1; } @@ -808,10 +942,10 @@ rspamd_map_check_proto (struct rspamd_config *cfg, end_key = memchr (pos, '+', end - pos); if (end_key != NULL) { - map->trusted_pubkey = rspamd_pubkey_from_base32 (pos, end_key - pos, + bk->trusted_pubkey = rspamd_pubkey_from_base32 (pos, end_key - pos, RSPAMD_KEYPAIR_SIGN, RSPAMD_CRYPTOBOX_MODE_25519); - if (map->trusted_pubkey == NULL) { + if (bk->trusted_pubkey == NULL) { msg_err_config ("cannot read pubkey from map: %s", map_line); return NULL; @@ -820,10 +954,10 @@ rspamd_map_check_proto (struct rspamd_config *cfg, } else if (end - pos > 64) { /* Try hex encoding */ - map->trusted_pubkey = rspamd_pubkey_from_hex (pos, 64, + bk->trusted_pubkey = rspamd_pubkey_from_hex (pos, 64, RSPAMD_KEYPAIR_SIGN, RSPAMD_CRYPTOBOX_MODE_25519); - if (map->trusted_pubkey == NULL) { + if (bk->trusted_pubkey == NULL) { msg_err_config ("cannot read pubkey from map: %s", map_line); return NULL; @@ -841,24 +975,24 @@ rspamd_map_check_proto (struct rspamd_config *cfg, } } - map->protocol = MAP_PROTO_FILE; + bk->protocol = MAP_PROTO_FILE; if (g_ascii_strncasecmp (pos, "http://", sizeof ("http://") - 1) == 0) { - map->protocol = MAP_PROTO_HTTP; + bk->protocol = MAP_PROTO_HTTP; /* Include http:// */ - map->uri = rspamd_mempool_strdup (cfg->cfg_pool, pos); + bk->uri = g_strdup (pos); pos += sizeof ("http://") - 1; } else if (g_ascii_strncasecmp (pos, "file://", sizeof ("file://") - 1) == 0) { pos += sizeof ("file://") - 1; /* Exclude file:// */ - map->uri = rspamd_mempool_strdup (cfg->cfg_pool, pos); + bk->uri = g_strdup (pos); } else if (*pos == '/') { /* Trivial file case */ - map->uri = rspamd_mempool_strdup (cfg->cfg_pool, pos); + bk->uri = g_strdup (pos); } else { msg_err_config ("invalid map fetching protocol: %s", map_line); @@ -893,95 +1027,79 @@ rspamd_map_is_map (const gchar *map_line) return ret; } -struct rspamd_map * -rspamd_map_add (struct rspamd_config *cfg, - const gchar *map_line, - const gchar *description, - map_cb_t read_callback, - map_fin_cb_t fin_callback, - void **user_data) +static void +rspamd_map_backend_dtor (struct rspamd_map_backend *bk) { - struct rspamd_map *new_map; - const gchar *def; - struct file_map_data *fdata; - struct http_map_data *hdata; - gchar *cksum_encoded, cksum[rspamd_cryptobox_HASHBYTES]; - rspamd_mempool_t *pool; - struct http_parser_url up; - rspamd_ftok_t tok; - - if (cfg->map_pool == NULL) { - cfg->map_pool = rspamd_mempool_new (rspamd_mempool_suggest_size (), - "map"); - memcpy (cfg->map_pool->tag.uid, cfg->cfg_pool->tag.uid, - sizeof (cfg->map_pool->tag.uid)); + if (bk->protocol == MAP_PROTO_FILE) { + g_free (bk->data.fd->filename); + g_slice_free1 (sizeof (*bk->data.fd), bk->data.fd); + } + else { + g_free (bk->data.hd->host); + g_free (bk->data.hd->path); + g_slice_free1 (sizeof (*bk->data.hd), bk->data.hd); } - new_map = rspamd_mempool_alloc0 (cfg->map_pool, sizeof (struct rspamd_map)); + g_slice_free1 (sizeof (*bk), bk); +} - /* First of all detect protocol line */ - if (rspamd_map_check_proto (cfg, map_line, new_map) == NULL) { - return NULL; - } +static struct rspamd_map_backend * +rspamd_map_parse_backend (struct rspamd_config *cfg, const gchar *map_line) +{ + struct rspamd_map_backend *bk; + struct file_map_data *fdata = NULL; + struct http_map_data *hdata = NULL; + struct http_parser_url up; + rspamd_ftok_t tok; - new_map->read_callback = read_callback; - new_map->fin_callback = fin_callback; - new_map->user_data = user_data; - new_map->cfg = cfg; - new_map->id = g_random_int (); - new_map->locked = - rspamd_mempool_alloc0_shared (cfg->cfg_pool, sizeof (gint)); - def = new_map->uri; + bk = g_slice_alloc0 (sizeof (*bk)); + REF_INIT_RETAIN (bk, rspamd_map_backend_dtor); - if (description != NULL) { - new_map->description = - rspamd_mempool_strdup (cfg->cfg_pool, description); + if (!rspamd_map_check_proto (cfg, map_line, bk)) { + goto err; } /* Now check for each proto separately */ - if (new_map->protocol == MAP_PROTO_FILE) { - fdata = - rspamd_mempool_alloc0 (cfg->map_pool, - sizeof (struct file_map_data)); - if (access (def, R_OK) == -1) { + if (bk->protocol == MAP_PROTO_FILE) { + fdata = g_slice_alloc0 (sizeof (struct file_map_data)); + + if (access (bk->uri, R_OK) == -1) { if (errno != ENOENT) { - msg_err_config ("cannot open file '%s': %s", def, strerror - (errno)); + msg_err_config ("cannot open file '%s': %s", bk->uri, strerror (errno)); return NULL; } msg_info_config ( - "map '%s' is not found, but it can be loaded automatically later", - def); + "map '%s' is not found, but it can be loaded automatically later", + bk->uri); /* We still can add this file */ fdata->st.st_mtime = -1; } else { - stat (def, &fdata->st); + stat (bk->uri, &fdata->st); } - fdata->filename = rspamd_mempool_strdup (cfg->map_pool, def); - new_map->map_data = fdata; + + fdata->filename = g_strdup (bk->uri); + bk->data.fd = fdata; } - else if (new_map->protocol == MAP_PROTO_HTTP) { - hdata = - rspamd_mempool_alloc0 (cfg->map_pool, - sizeof (struct http_map_data)); + else if (bk->protocol == MAP_PROTO_HTTP) { + hdata = g_slice_alloc0 (sizeof (struct http_map_data)); memset (&up, 0, sizeof (up)); - if (http_parser_parse_url (new_map->uri, strlen (new_map->uri), FALSE, + if (http_parser_parse_url (bk->uri, strlen (bk->uri), FALSE, &up) != 0) { - msg_err_config ("cannot parse HTTP url: %s", new_map->uri); - return NULL; + msg_err_config ("cannot parse HTTP url: %s", bk->uri); + goto err; } else { if (!(up.field_set & 1 << UF_HOST)) { - msg_err_config ("cannot parse HTTP url: %s: no host", new_map->uri); + msg_err_config ("cannot parse HTTP url: %s: no host", bk->uri); return NULL; } - tok.begin = new_map->uri + up.field_data[UF_HOST].off; + tok.begin = bk->uri + up.field_data[UF_HOST].off; tok.len = up.field_data[UF_HOST].len; - hdata->host = rspamd_mempool_ftokdup (cfg->map_pool, &tok); + hdata->host = rspamd_ftokdup (&tok); if (up.field_set & 1 << UF_PORT) { hdata->port = up.port; @@ -991,31 +1109,235 @@ rspamd_map_add (struct rspamd_config *cfg, } if (up.field_set & 1 << UF_PATH) { - tok.begin = new_map->uri + up.field_data[UF_PATH].off; + tok.begin = bk->uri + up.field_data[UF_PATH].off; tok.len = strlen (tok.begin); - hdata->path = rspamd_mempool_ftokdup (cfg->map_pool, &tok); + hdata->path = rspamd_ftokdup (&tok); } } - new_map->map_data = hdata; + bk->data.hd = hdata; + } + + return bk; + +err: + MAP_RELEASE (bk); + + if (hdata) { + g_slice_free1 (sizeof (*hdata), hdata); + } + + if (fdata) { + g_slice_free1 (sizeof (*fdata), fdata); + } + + return NULL; +} +static void +rspamd_map_calculate_hash (struct rspamd_map *map) +{ + struct rspamd_map_backend *bk; + guint i; + rspamd_cryptobox_hash_state_t st; + gchar *cksum_encoded, cksum[rspamd_cryptobox_HASHBYTES]; + + rspamd_cryptobox_hash_init (&st, NULL, 0); + + for (i = 0; i < map->backends->len; i ++) { + bk = g_ptr_array_index (map->backends, i); + rspamd_cryptobox_hash_update (&st, bk->uri, strlen (bk->uri)); } - /* Temp pool */ - rspamd_cryptobox_hash (cksum, new_map->uri, strlen (new_map->uri), NULL, 0); + rspamd_cryptobox_hash_final (&st, cksum); cksum_encoded = rspamd_encode_base32 (cksum, sizeof (cksum)); - new_map->pool = rspamd_mempool_new (rspamd_mempool_suggest_size (), "map"); - rspamd_strlcpy (new_map->pool->tag.uid, cksum_encoded, - sizeof (new_map->pool->tag.uid)); + rspamd_strlcpy (map->tag, cksum_encoded, sizeof (map->tag)); g_free (cksum_encoded); - pool = new_map->pool; - msg_info_pool ("added map %s", new_map->uri); +} +struct rspamd_map * +rspamd_map_add (struct rspamd_config *cfg, + const gchar *map_line, + const gchar *description, + map_cb_t read_callback, + map_fin_cb_t fin_callback, + void **user_data) +{ + struct rspamd_map *map; + struct rspamd_map_backend *bk; + + bk = rspamd_map_parse_backend (cfg, map_line); + if (bk == NULL) { + return NULL; + } + + map = g_slice_alloc0 (sizeof (struct rspamd_map)); + map->read_callback = read_callback; + map->fin_callback = fin_callback; + map->user_data = user_data; + map->cfg = cfg; + map->id = g_random_int (); + map->locked = + rspamd_mempool_alloc0_shared (cfg->cfg_pool, sizeof (gint)); + map->backends = g_ptr_array_sized_new (1); + g_ptr_array_add (map->backends, bk); + map->name = g_strdup (map_line); + map->poll_timeout = cfg->map_timeout; + + if (description != NULL) { + map->description = g_strdup (description); + } - cfg->maps = g_list_prepend (cfg->maps, new_map); + rspamd_map_calculate_hash (map); + msg_info_map ("added map %s", bk->uri); - return new_map; + cfg->maps = g_list_prepend (cfg->maps, map); + + return map; +} + +struct rspamd_map* +rspamd_map_add_from_ucl (struct rspamd_config *cfg, + const ucl_object_t *obj, + const gchar *description, + map_cb_t read_callback, + map_fin_cb_t fin_callback, + void **user_data) +{ + ucl_object_iter_t it = NULL; + const ucl_object_t *cur, *elt; + struct rspamd_map *map; + struct rspamd_map_backend *bk; + + g_assert (obj != NULL); + + if (ucl_object_type (obj) == UCL_STRING) { + /* Just a plain string */ + return rspamd_map_add (cfg, ucl_object_tostring (obj), NULL, + read_callback, fin_callback, user_data); + } + + map = g_slice_alloc0 (sizeof (struct rspamd_map)); + map->read_callback = read_callback; + map->fin_callback = fin_callback; + map->user_data = user_data; + map->cfg = cfg; + map->id = g_random_int (); + map->locked = + rspamd_mempool_alloc0_shared (cfg->cfg_pool, sizeof (gint)); + map->backends = g_ptr_array_new (); + map->poll_timeout = cfg->map_timeout; + + if (description) { + map->description = g_strdup (description); + } + + if (ucl_object_type (obj) == UCL_ARRAY) { + /* Add array of maps as multiple backends */ + while ((cur = ucl_object_iterate (obj, &it, true)) != NULL) { + if (ucl_object_type (cur) == UCL_STRING) { + bk = rspamd_map_parse_backend (cfg, ucl_object_tostring (cur)); + + if (bk != NULL) { + g_ptr_array_add (map->backends, bk); + + if (!map->name) { + map->name = g_strdup (ucl_object_tostring (cur)); + } + } + } + else { + msg_err_config ("bad map element type: %s", + ucl_object_type_to_string (ucl_object_type (cur))); + } + } + + if (map->backends->len == 0) { + msg_err_config ("map has no urls to be loaded"); + goto err; + } + } + else if (ucl_object_type (obj) == UCL_OBJECT) { + elt = ucl_object_lookup (obj, "name"); + if (elt && ucl_object_type (elt) == UCL_STRING) { + map->name = g_strdup (ucl_object_tostring (elt)); + } + + elt = ucl_object_lookup (obj, "description"); + if (elt && ucl_object_type (elt) == UCL_STRING) { + if (map->description) { + g_free (map->description); + } + + map->description = g_strdup (ucl_object_tostring (elt)); + } + + elt = ucl_object_lookup_any (obj, "timeout", "poll", "poll_time", + "watch_interval", NULL); + if (elt) { + map->poll_timeout = ucl_object_todouble (elt); + } + + elt = ucl_object_lookup_any (obj, "upstreams", "url", "urls", NULL); + if (elt == NULL) { + msg_err_config ("map has no urls to be loaded"); + goto err; + } + + if (ucl_object_type (obj) == UCL_ARRAY) { + /* Add array of maps as multiple backends */ + while ((cur = ucl_object_iterate (elt, &it, true)) != NULL) { + if (ucl_object_type (cur) == UCL_STRING) { + bk = rspamd_map_parse_backend (cfg, ucl_object_tostring (cur)); + + if (bk != NULL) { + g_ptr_array_add (map->backends, bk); + + if (!map->name) { + map->name = g_strdup (ucl_object_tostring (cur)); + } + } + } + else { + msg_err_config ("bad map element type: %s", + ucl_object_type_to_string (ucl_object_type (cur))); + goto err; + } + } + + if (map->backends->len == 0) { + msg_err_config ("map has no urls to be loaded"); + goto err; + } + } + else if (ucl_object_type (elt) == UCL_STRING) { + bk = rspamd_map_parse_backend (cfg, ucl_object_tostring (elt)); + + if (bk != NULL) { + g_ptr_array_add (map->backends, bk); + + if (!map->name) { + map->name = g_strdup (ucl_object_tostring (cur)); + } + } + } + + if (map->backends->len == 0) { + msg_err_config ("map has no urls to be loaded"); + goto err; + } + } + + return map; + +err: + g_ptr_array_free (map->backends, TRUE); + g_free (map->name); + g_free (map->description); + g_slice_free1 (sizeof (*map), map); + + return NULL; } /** @@ -1023,18 +1345,18 @@ rspamd_map_add (struct rspamd_config *cfg, */ #define MAP_STORE_KEY do { \ - key = rspamd_mempool_alloc (pool, p - c + 1); \ + key = g_malloc (p - c + 1); \ rspamd_strlcpy (key, c, p - c + 1); \ } while (0) #define MAP_STORE_VALUE do { \ - value = rspamd_mempool_alloc (pool, p - c + 1); \ + value = g_malloc (p - c + 1); \ rspamd_strlcpy (value, c, p - c + 1); \ value = g_strstrip (value); \ } while (0) gchar * -rspamd_parse_kv_list (rspamd_mempool_t * pool, +rspamd_parse_kv_list ( gchar * chunk, gint len, struct map_cb_data *data, @@ -1058,6 +1380,7 @@ rspamd_parse_kv_list (rspamd_mempool_t * pool, }; gchar *c, *p, *key = NULL, *value = NULL, *end; + struct rspamd_map *map = data->map; p = chunk; c = p; @@ -1095,8 +1418,9 @@ rspamd_parse_kv_list (rspamd_mempool_t * pool, /* Store a single key */ MAP_STORE_KEY; func (data->cur_data, key, default_value); - msg_debug_pool ("insert key only pair: %s -> %s", + msg_debug_map ("insert key only pair: %s -> %s", key, default_value); + g_free (key); } key = NULL; @@ -1107,8 +1431,9 @@ rspamd_parse_kv_list (rspamd_mempool_t * pool, /* Store a single key */ MAP_STORE_KEY; func (data->cur_data, key, default_value); - msg_debug_pool ("insert key only pair: %s -> %s", + msg_debug_map ("insert key only pair: %s -> %s", key, default_value); + g_free (key); } data->state = map_read_eol; @@ -1176,8 +1501,9 @@ rspamd_parse_kv_list (rspamd_mempool_t * pool, /* Store a single key */ MAP_STORE_KEY; func (data->cur_data, key, default_value); - msg_debug_pool ("insert key only pair: %s -> %s", + msg_debug_map ("insert key only pair: %s -> %s", key, default_value); + g_free (key); key = NULL; } @@ -1188,8 +1514,10 @@ rspamd_parse_kv_list (rspamd_mempool_t * pool, /* Store a single key */ MAP_STORE_KEY; func (data->cur_data, key, default_value); - msg_debug_pool ("insert key only pair: %s -> %s", + + msg_debug_map ("insert key only pair: %s -> %s", key, default_value); + g_free (key); key = NULL; } @@ -1234,15 +1562,18 @@ rspamd_parse_kv_list (rspamd_mempool_t * pool, /* Store a single key */ MAP_STORE_VALUE; func (data->cur_data, key, value); - msg_debug_pool ("insert key value pair: %s -> %s", + msg_debug_map ("insert key value pair: %s -> %s", key, value); + g_free (key); + g_free (value); key = NULL; value = NULL; } else { func (data->cur_data, key, default_value); - msg_debug_pool ("insert key only pair: %s -> %s", + msg_debug_map ("insert key only pair: %s -> %s", key, default_value); + g_free (key); key = NULL; } @@ -1253,15 +1584,18 @@ rspamd_parse_kv_list (rspamd_mempool_t * pool, /* Store a single key */ MAP_STORE_VALUE; func (data->cur_data, key, value); - msg_debug_pool ("insert key value pair: %s -> %s", + msg_debug_map ("insert key value pair: %s -> %s", key, value); + g_free (key); + g_free (value); key = NULL; value = NULL; } else { func (data->cur_data, key, default_value); - msg_debug_pool ("insert key only pair: %s -> %s", + msg_debug_map ("insert key only pair: %s -> %s", key, default_value); + g_free (key); key = NULL; } @@ -1314,8 +1648,9 @@ rspamd_parse_kv_list (rspamd_mempool_t * pool, /* Store a single key */ MAP_STORE_KEY; func (data->cur_data, key, default_value); - msg_debug_pool ("insert key only pair: %s -> %s", + msg_debug_map ("insert key only pair: %s -> %s", key, default_value); + g_free (key); key = NULL; } break; @@ -1325,15 +1660,18 @@ rspamd_parse_kv_list (rspamd_mempool_t * pool, /* Store a single key */ MAP_STORE_VALUE; func (data->cur_data, key, value); - msg_debug_pool ("insert key value pair: %s -> %s", + msg_debug_map ("insert key value pair: %s -> %s", key, value); + g_free (key); + g_free (value); key = NULL; value = NULL; } else { func (data->cur_data, key, default_value); - msg_debug_pool ("insert key only pair: %s -> %s", + msg_debug_map ("insert key only pair: %s -> %s", key, default_value); + g_free (key); key = NULL; } break; @@ -1347,80 +1685,95 @@ rspamd_parse_kv_list (rspamd_mempool_t * pool, * Radix tree helper function */ static void -radix_tree_insert_helper (gpointer st, gconstpointer key, gpointer value) +radix_tree_insert_helper (gpointer st, gconstpointer key, gconstpointer value) { radix_compressed_t *tree = (radix_compressed_t *)st; rspamd_radix_add_iplist ((gchar *)key, " ,;", tree, value); } +static void +hash_insert_helper (gpointer st, gconstpointer key, gconstpointer value) +{ + GHashTable *ht = st; + gpointer k, v; + + k = g_strdup (key); + v = g_strdup (value); + g_hash_table_replace (ht, k, v); +} + /* Helpers */ gchar * -rspamd_hosts_read (rspamd_mempool_t * pool, +rspamd_hosts_read ( gchar * chunk, gint len, struct map_cb_data *data, gboolean final) { if (data->cur_data == NULL) { - data->cur_data = g_hash_table_new (rspamd_strcase_hash, - rspamd_strcase_equal); + data->cur_data = g_hash_table_new_full (rspamd_strcase_hash, + rspamd_strcase_equal, g_free, g_free); } - return rspamd_parse_kv_list (pool, + return rspamd_parse_kv_list ( chunk, len, data, - (insert_func) g_hash_table_insert, + hash_insert_helper, hash_fill, final); } void -rspamd_hosts_fin (rspamd_mempool_t * pool, struct map_cb_data *data) +rspamd_hosts_fin (struct map_cb_data *data) { + struct rspamd_map *map = data->map; + if (data->prev_data) { - g_hash_table_destroy (data->prev_data); + g_hash_table_unref (data->prev_data); } if (data->cur_data) { - msg_info_pool ("read hash of %d elements", g_hash_table_size + msg_info_map ("read hash of %d elements", g_hash_table_size (data->cur_data)); } } gchar * -rspamd_kv_list_read (rspamd_mempool_t * pool, +rspamd_kv_list_read ( gchar * chunk, gint len, struct map_cb_data *data, gboolean final) { if (data->cur_data == NULL) { - data->cur_data = g_hash_table_new (rspamd_strcase_hash, - rspamd_strcase_equal); + data->cur_data = g_hash_table_new_full (rspamd_strcase_hash, + rspamd_strcase_equal, g_free, g_free); } - return rspamd_parse_kv_list (pool, + return rspamd_parse_kv_list ( chunk, len, data, - (insert_func) g_hash_table_insert, + hash_insert_helper, "", final); } void -rspamd_kv_list_fin (rspamd_mempool_t * pool, struct map_cb_data *data) +rspamd_kv_list_fin (struct map_cb_data *data) { + struct rspamd_map *map = data->map; + if (data->prev_data) { - g_hash_table_destroy (data->prev_data); + g_hash_table_unref (data->prev_data); } if (data->cur_data) { - msg_info_pool ("read hash of %d elements", g_hash_table_size + msg_info_map ("read hash of %d elements", g_hash_table_size (data->cur_data)); } } gchar * -rspamd_radix_read (rspamd_mempool_t * pool, +rspamd_radix_read ( gchar * chunk, gint len, struct map_cb_data *data, @@ -1428,30 +1781,33 @@ rspamd_radix_read (rspamd_mempool_t * pool, { radix_compressed_t *tree; rspamd_mempool_t *rpool; + struct rspamd_map *map = data->map; if (data->cur_data == NULL) { tree = radix_create_compressed (); rpool = radix_get_pool (tree); - memcpy (rpool->tag.uid, pool->tag.uid, sizeof (rpool->tag.uid)); + memcpy (rpool->tag.uid, map->tag, sizeof (rpool->tag.uid)); data->cur_data = tree; } - return rspamd_parse_kv_list (pool, + return rspamd_parse_kv_list ( chunk, len, data, - (insert_func) radix_tree_insert_helper, + radix_tree_insert_helper, hash_fill, final); } void -rspamd_radix_fin (rspamd_mempool_t * pool, struct map_cb_data *data) +rspamd_radix_fin (struct map_cb_data *data) { + struct rspamd_map *map = data->map; + if (data->prev_data) { radix_destroy_compressed (data->prev_data); } if (data->cur_data) { - msg_info_pool ("read radix trie of %z elements: %s", + msg_info_map ("read radix trie of %z elements: %s", radix_get_size (data->cur_data), radix_get_info (data->cur_data)); } } @@ -1494,6 +1850,10 @@ rspamd_regexp_map_destroy (struct rspamd_regexp_map *re_map) rspamd_regexp_unref (re); } + for (i = 0; i < re_map->values->len; i ++) { + g_free (g_ptr_array_index (re_map->values, i)); + } + g_ptr_array_free (re_map->regexps, TRUE); g_ptr_array_free (re_map->values, TRUE); @@ -1519,18 +1879,18 @@ rspamd_regexp_map_destroy (struct rspamd_regexp_map *re_map) } static void -rspamd_re_map_insert_helper (gpointer st, gpointer key, gpointer value) +rspamd_re_map_insert_helper (gpointer st, gconstpointer key, gconstpointer value) { struct rspamd_regexp_map *re_map = st; + struct rspamd_map *map; rspamd_regexp_t *re; GError *err = NULL; - rspamd_mempool_t *pool; - pool = re_map->map->pool; + map = re_map->map; re = rspamd_regexp_new (key, NULL, &err); if (re == NULL) { - msg_err_pool ("cannot parse regexp %s: %e", key, err); + msg_err_map ("cannot parse regexp %s: %e", key, err); if (err) { g_error_free (err); @@ -1540,7 +1900,7 @@ rspamd_re_map_insert_helper (gpointer st, gpointer key, gpointer value) } g_ptr_array_add (re_map->regexps, re); - g_ptr_array_add (re_map->values, value); + g_ptr_array_add (re_map->values, g_strdup (value)); } static void @@ -1550,14 +1910,14 @@ rspamd_re_map_finalize (struct rspamd_regexp_map *re_map) guint i; hs_platform_info_t plt; hs_compile_error_t *err; - rspamd_mempool_t *pool; + struct rspamd_map *map; rspamd_regexp_t *re; gint pcre_flags; - pool = re_map->map->pool; + map = re_map->map; if (hs_populate_platform (&plt) != HS_SUCCESS) { - msg_err_pool ("cannot populate hyperscan platform"); + msg_err_map ("cannot populate hyperscan platform"); return; } @@ -1605,7 +1965,7 @@ rspamd_re_map_finalize (struct rspamd_regexp_map *re_map) &re_map->hs_db, &err) != HS_SUCCESS) { - msg_err_pool ("cannot create tree of regexp when processing '%s': %s", + msg_err_map ("cannot create tree of regexp when processing '%s': %s", re_map->patterns[err->expression], err->message); re_map->hs_db = NULL; hs_free_compile_error (err); @@ -1614,7 +1974,7 @@ rspamd_re_map_finalize (struct rspamd_regexp_map *re_map) } if (hs_alloc_scratch (re_map->hs_db, &re_map->hs_scratch) != HS_SUCCESS) { - msg_err_pool ("cannot allocate scratch space for hyperscan"); + msg_err_map ("cannot allocate scratch space for hyperscan"); hs_free_database (re_map->hs_db); re_map->hs_db = NULL; } @@ -1622,7 +1982,7 @@ rspamd_re_map_finalize (struct rspamd_regexp_map *re_map) } gchar * -rspamd_regexp_list_read (rspamd_mempool_t *pool, +rspamd_regexp_list_read ( gchar *chunk, gint len, struct map_cb_data *data, @@ -1635,19 +1995,20 @@ rspamd_regexp_list_read (rspamd_mempool_t *pool, data->cur_data = re_map; } - return rspamd_parse_kv_list (pool, + return rspamd_parse_kv_list ( chunk, len, data, - (insert_func) rspamd_re_map_insert_helper, + rspamd_re_map_insert_helper, hash_fill, final); } void -rspamd_regexp_list_fin (rspamd_mempool_t *pool, struct map_cb_data *data) +rspamd_regexp_list_fin (struct map_cb_data *data) { struct rspamd_regexp_map *re_map; + struct rspamd_map *map = data->map; if (data->prev_data) { rspamd_regexp_map_destroy (data->prev_data); @@ -1655,7 +2016,7 @@ rspamd_regexp_list_fin (rspamd_mempool_t *pool, struct map_cb_data *data) if (data->cur_data) { re_map = data->cur_data; rspamd_re_map_finalize (re_map); - msg_info_pool ("read regexp list of %ud elements", + msg_info_map ("read regexp list of %ud elements", re_map->regexps->len); } } diff --git a/src/libutil/map.h b/src/libutil/map.h index f7cbc3076..3b6439efb 100644 --- a/src/libutil/map.h +++ b/src/libutil/map.h @@ -4,6 +4,7 @@ #include "config.h" #include <event.h> +#include "ucl.h" #include "mem_pool.h" #include "radix.h" #include "dns.h" @@ -18,9 +19,9 @@ struct map_cb_data; /** * Callback types */ -typedef gchar * (*map_cb_t)(rspamd_mempool_t *pool, gchar *chunk, gint len, +typedef gchar * (*map_cb_t)(gchar *chunk, gint len, struct map_cb_data *data, gboolean final); -typedef void (*map_fin_cb_t)(rspamd_mempool_t *pool, struct map_cb_data *data); +typedef void (*map_fin_cb_t)(struct map_cb_data *data); /** * Common map object @@ -56,6 +57,16 @@ struct rspamd_map* rspamd_map_add (struct rspamd_config *cfg, void **user_data); /** + * Add map from ucl + */ +struct rspamd_map* rspamd_map_add_from_ucl (struct rspamd_config *cfg, + const ucl_object_t *obj, + const gchar *description, + map_cb_t read_callback, + map_fin_cb_t fin_callback, + void **user_data); + +/** * Start watching of maps by adding events to libevent event loop */ void rspamd_map_watch (struct rspamd_config *cfg, @@ -77,50 +88,50 @@ typedef void (*insert_func) (gpointer st, gconstpointer key, /** * Radix list is a list like ip/mask */ -gchar * rspamd_radix_read (rspamd_mempool_t *pool, +gchar * rspamd_radix_read ( gchar *chunk, gint len, struct map_cb_data *data, gboolean final); -void rspamd_radix_fin (rspamd_mempool_t *pool, struct map_cb_data *data); +void rspamd_radix_fin (struct map_cb_data *data); /** * Host list is an ordinal list of hosts or domains */ -gchar * rspamd_hosts_read (rspamd_mempool_t *pool, +gchar * rspamd_hosts_read ( gchar *chunk, gint len, struct map_cb_data *data, gboolean final); -void rspamd_hosts_fin (rspamd_mempool_t *pool, struct map_cb_data *data); +void rspamd_hosts_fin (struct map_cb_data *data); /** * Kv list is an ordinal list of keys and values separated by whitespace */ -gchar * rspamd_kv_list_read (rspamd_mempool_t *pool, +gchar * rspamd_kv_list_read ( gchar *chunk, gint len, struct map_cb_data *data, gboolean final); -void rspamd_kv_list_fin (rspamd_mempool_t *pool, struct map_cb_data *data); +void rspamd_kv_list_fin (struct map_cb_data *data); /** * Regexp list is a list of regular expressions */ struct rspamd_regexp_map; -gchar * rspamd_regexp_list_read (rspamd_mempool_t *pool, +gchar * rspamd_regexp_list_read ( gchar *chunk, gint len, struct map_cb_data *data, gboolean final); -void rspamd_regexp_list_fin (rspamd_mempool_t *pool, struct map_cb_data *data); +void rspamd_regexp_list_fin (struct map_cb_data *data); /** * FSM for lists parsing (support comments, blank lines and partial replies) */ gchar * -rspamd_parse_kv_list (rspamd_mempool_t * pool, +rspamd_parse_kv_list ( gchar * chunk, gint len, struct map_cb_data *data, diff --git a/src/libutil/map_private.h b/src/libutil/map_private.h index c26517574..9bdca5f90 100644 --- a/src/libutil/map_private.h +++ b/src/libutil/map_private.h @@ -24,30 +24,57 @@ typedef void (*rspamd_map_dtor) (gpointer p); +#define msg_err_map(...) rspamd_default_log_function (G_LOG_LEVEL_CRITICAL, \ + "map", map->tag, \ + G_STRFUNC, \ + __VA_ARGS__) +#define msg_warn_map(...) rspamd_default_log_function (G_LOG_LEVEL_WARNING, \ + "map", map->tag, \ + G_STRFUNC, \ + __VA_ARGS__) +#define msg_info_map(...) rspamd_default_log_function (G_LOG_LEVEL_INFO, \ + "map", map->tag, \ + G_STRFUNC, \ + __VA_ARGS__) +#define msg_debug_map(...) rspamd_default_log_function (G_LOG_LEVEL_DEBUG, \ + "map", map->tag, \ + G_STRFUNC, \ + __VA_ARGS__) + enum fetch_proto { MAP_PROTO_FILE, MAP_PROTO_HTTP, }; -struct rspamd_map { - rspamd_mempool_t *pool; - struct rspamd_dns_resolver *r; + +struct rspamd_map_backend { + enum fetch_proto protocol; gboolean is_signed; struct rspamd_cryptobox_pubkey *trusted_pubkey; + union { + struct file_map_data *fd; + struct http_map_data *hd; + } data; + gchar *uri; + ref_entry_t ref; +}; + +struct rspamd_map { + struct rspamd_dns_resolver *r; struct rspamd_config *cfg; - enum fetch_proto protocol; + GPtrArray *backends; map_cb_t read_callback; map_fin_cb_t fin_callback; void **user_data; - struct event ev; - struct timeval tv; struct event_base *ev_base; - void *map_data; - gchar *uri; gchar *description; + gchar *name; guint32 id; - guint32 checksum; + struct event ev; + struct timeval tv; + gdouble poll_timeout; /* Shared lock for temporary disabling of map reading (e.g. when this map is written by UI) */ gint *locked; + gchar tag[MEMPOOL_UID_LEN]; rspamd_map_dtor dtor; gpointer dtor_data; }; @@ -56,7 +83,7 @@ struct rspamd_map { * Data specific to file maps */ struct file_map_data { - const gchar *filename; + gchar *filename; struct stat st; }; @@ -64,12 +91,12 @@ struct file_map_data { * Data specific to HTTP maps */ struct http_map_data { - struct addrinfo *addr; - guint16 port; gchar *path; gchar *host; + gchar *last_signature; time_t last_checked; gboolean request_sent; + guint16 port; }; enum rspamd_map_http_stage { @@ -80,20 +107,31 @@ enum rspamd_map_http_stage { map_load_signature }; +struct map_periodic_cbdata { + struct rspamd_map *map; + struct map_cb_data cbdata; + gboolean need_modify; + gboolean errored; + guint cur_backend; + ref_entry_t ref; +}; + struct http_callback_data { struct event_base *ev_base; struct rspamd_http_connection *conn; rspamd_inet_addr_t *addr; - struct timeval tv; struct rspamd_map *map; + struct rspamd_map_backend *bk; struct http_map_data *data; - struct map_cb_data cbdata; + struct map_periodic_cbdata *periodic; struct rspamd_cryptobox_pubkey *pk; + gboolean check; gchar *tmpfile; enum rspamd_map_http_stage stage; gint out_fd; gint fd; + struct timeval tv; ref_entry_t ref; }; diff --git a/src/libutil/shingles.c b/src/libutil/shingles.c index c2acae6d3..66f6b457c 100644 --- a/src/libutil/shingles.c +++ b/src/libutil/shingles.c @@ -19,15 +19,16 @@ #define SHINGLES_WINDOW 3 -struct rspamd_shingle* +struct rspamd_shingle* RSPAMD_OPTIMIZE("unroll-loops") rspamd_shingles_generate (GArray *input, const guchar key[16], rspamd_mempool_t *pool, rspamd_shingles_filter filter, - gpointer filterd) + gpointer filterd, + enum rspamd_shingle_alg alg) { struct rspamd_shingle *res; - GArray *hashes[RSPAMD_SHINGLE_SIZE]; + guint64 **hashes; rspamd_sipkey_t keys[RSPAMD_SHINGLE_SIZE]; guchar shabuf[rspamd_cryptobox_HASHBYTES], *out_key; const guchar *cur_key; @@ -35,7 +36,9 @@ rspamd_shingles_generate (GArray *input, rspamd_ftok_t *word; rspamd_cryptobox_hash_state_t bs; guint64 val; - gint i, j, beg = 0; + gint i, j, k; + gsize hlen, beg = 0; + enum rspamd_cryptobox_fast_hash_type ht; if (pool != NULL) { res = rspamd_mempool_alloc (pool, sizeof (*res)); @@ -50,9 +53,11 @@ rspamd_shingles_generate (GArray *input, out_key = (guchar *)&keys[0]; /* Init hashes pipes and keys */ + hashes = g_slice_alloc (sizeof (*hashes) * RSPAMD_SHINGLE_SIZE); + hlen = input->len > SHINGLES_WINDOW ? (input->len - SHINGLES_WINDOW + 1) : 1; + for (i = 0; i < RSPAMD_SHINGLE_SIZE; i ++) { - hashes[i] = g_array_sized_new (FALSE, FALSE, sizeof (guint64), - input->len + SHINGLES_WINDOW); + hashes[i] = g_slice_alloc (hlen * sizeof (guint64)); /* * To generate a set of hashes we just apply sha256 to the * initial key as many times as many hashes are required and @@ -71,32 +76,83 @@ rspamd_shingles_generate (GArray *input, } /* Now parse input words into a vector of hashes using rolling window */ - for (i = 0; i <= (gint)input->len; i ++) { - if (i - beg >= SHINGLES_WINDOW || i == (gint)input->len) { - for (j = beg; j < i; j ++) { - word = &g_array_index (input, rspamd_ftok_t, j); - row = rspamd_fstring_append (row, word->begin, word->len); + if (alg == RSPAMD_SHINGLES_OLD) { + for (i = 0; i <= (gint)input->len; i ++) { + if (i - beg >= SHINGLES_WINDOW || i == (gint)input->len) { + for (j = beg; j < i; j ++) { + word = &g_array_index (input, rspamd_ftok_t, j); + row = rspamd_fstring_append (row, word->begin, word->len); + } + + /* Now we need to create a new row here */ + for (j = 0; j < RSPAMD_SHINGLE_SIZE; j ++) { + rspamd_cryptobox_siphash ((guchar *)&val, row->str, row->len, + keys[j]); + g_assert (hlen > beg); + hashes[j][beg] = val; + } + + beg++; + + row = rspamd_fstring_assign (row, "", 0); } - beg++; + } + } + else { + guint64 res[SHINGLES_WINDOW * RSPAMD_SHINGLE_SIZE]; + + switch (alg) { + case RSPAMD_SHINGLES_XXHASH: + ht = RSPAMD_CRYPTOBOX_XXHASH64; + break; + case RSPAMD_SHINGLES_MUMHASH: + ht = RSPAMD_CRYPTOBOX_MUMHASH; + break; + default: + ht = RSPAMD_CRYPTOBOX_HASHFAST_INDEPENDENT; + break; + } - /* Now we need to create a new row here */ - for (j = 0; j < RSPAMD_SHINGLE_SIZE; j ++) { - rspamd_cryptobox_siphash ((guchar *)&val, row->str, row->len, - keys[j]); - g_array_append_val (hashes[j], val); + memset (res, 0, sizeof (res)); + + for (i = 0; i <= (gint)input->len; i ++) { + if (i - beg >= SHINGLES_WINDOW || i == (gint)input->len) { + + for (j = 0; j < RSPAMD_SHINGLE_SIZE; j ++) { + /* Shift hashes window to right */ + for (k = 0; k < SHINGLES_WINDOW - 1; k ++) { + res[j * SHINGLES_WINDOW + k] = + res[j * SHINGLES_WINDOW + k + 1]; + } + + word = &g_array_index (input, rspamd_ftok_t, beg); + /* Insert the last element to the pipe */ + res[j * SHINGLES_WINDOW + SHINGLES_WINDOW - 1] = + rspamd_cryptobox_fast_hash_specific (ht, + word->begin, word->len, + *(guint64 *)keys[j]); + val = 0; + for (k = 0; k < SHINGLES_WINDOW; k ++) { + val ^= res[j * SHINGLES_WINDOW + k] >> (8 * (SHINGLES_WINDOW - k - 1)); + } + + g_assert (hlen > beg); + hashes[j][beg] = val; + } + beg++; } - - row = rspamd_fstring_assign (row, "", 0); } } /* Now we need to filter all hashes and make a shingles result */ for (i = 0; i < RSPAMD_SHINGLE_SIZE; i ++) { - res->hashes[i] = filter ((guint64 *)hashes[i]->data, hashes[i]->len, + res->hashes[i] = filter (hashes[i], hlen, i, key, filterd); - g_array_free (hashes[i], TRUE); + g_slice_free1 (hlen * sizeof (guint64), hashes[i]); } + g_slice_free1 (sizeof (*hashes) * RSPAMD_SHINGLE_SIZE, hashes); + rspamd_fstring_free (row); return res; diff --git a/src/libutil/shingles.h b/src/libutil/shingles.h index d252a78f6..fd7d3bfc7 100644 --- a/src/libutil/shingles.h +++ b/src/libutil/shingles.h @@ -25,6 +25,13 @@ struct rspamd_shingle { guint64 hashes[RSPAMD_SHINGLE_SIZE]; }; +enum rspamd_shingle_alg { + RSPAMD_SHINGLES_OLD = 0, + RSPAMD_SHINGLES_XXHASH, + RSPAMD_SHINGLES_MUMHASH, + RSPAMD_SHINGLES_FAST +}; + /** * Shingles filtering function * @param input input array of hashes @@ -48,7 +55,8 @@ struct rspamd_shingle* rspamd_shingles_generate (GArray *input, const guchar key[16], rspamd_mempool_t *pool, rspamd_shingles_filter filter, - gpointer filterd); + gpointer filterd, + enum rspamd_shingle_alg alg); /** * Compares two shingles and return result as a floating point value - 1.0 diff --git a/src/libutil/str_util.c b/src/libutil/str_util.c index 7d40b15fa..a25dc32d5 100644 --- a/src/libutil/str_util.c +++ b/src/libutil/str_util.c @@ -15,7 +15,7 @@ */ #include "config.h" #include "util.h" -#include "xxhash.h" +#include "cryptobox.h" #include "url.h" #include <math.h> @@ -182,20 +182,6 @@ rspamd_strcase_equal (gconstpointer v, gconstpointer v2) return FALSE; } -#if defined(__LP64__) || defined(_LP64) -#define XXH_STATE XXH64_state_t -#define XXH_RESET XXH64_reset -#define XXH_UPDATE XXH64_update -#define XXH_DIGEST XXH64_digest -#define XXH_ONESHOT XXH64 -#else -#define XXH_STATE XXH32_state_t -#define XXH_RESET XXH32_reset -#define XXH_UPDATE XXH32_update -#define XXH_DIGEST XXH32_digest -#define XXH_ONESHOT XXH32 -#endif - static guint rspamd_icase_hash (const gchar *in, gsize len) { @@ -208,10 +194,10 @@ rspamd_icase_hash (const gchar *in, gsize len) } c; guint32 pp; } u; - XXH_STATE st; + rspamd_cryptobox_fast_hash_state_t st; fp = len - leftover; - XXH_RESET (&st, rspamd_hash_seed ()); + rspamd_cryptobox_fast_hash_init (&st, rspamd_hash_seed ()); for (i = 0; i != fp; i += 4) { u.c.c1 = s[i], u.c.c2 = s[i + 1], u.c.c3 = s[i + 2], u.c.c4 = s[i + 3]; @@ -219,7 +205,7 @@ rspamd_icase_hash (const gchar *in, gsize len) u.c.c2 = lc_map[u.c.c2]; u.c.c3 = lc_map[u.c.c3]; u.c.c4 = lc_map[u.c.c4]; - XXH_UPDATE (&st, &u.pp, sizeof (u)); + rspamd_cryptobox_fast_hash_update (&st, &u.pp, sizeof (u)); } u.pp = 0; @@ -230,11 +216,11 @@ rspamd_icase_hash (const gchar *in, gsize len) u.c.c2 = lc_map[(guchar)s[i++]]; case 1: u.c.c1 = lc_map[(guchar)s[i]]; - XXH_UPDATE (&st, &u.pp, leftover); + rspamd_cryptobox_fast_hash_update (&st, &u.pp, leftover); break; } - return XXH_DIGEST (&st); + return rspamd_cryptobox_fast_hash_final (&st); } guint @@ -255,7 +241,7 @@ rspamd_str_hash (gconstpointer key) len = strlen ((const gchar *)key); - return XXH_ONESHOT (key, len, rspamd_hash_seed ()); + return rspamd_cryptobox_fast_hash (key, len, rspamd_hash_seed ()); } gboolean @@ -1814,17 +1800,17 @@ guint rspamd_url_hash (gconstpointer u) { const struct rspamd_url *url = u; - XXH_STATE st; + rspamd_cryptobox_fast_hash_state_t st; - XXH_RESET (&st, rspamd_hash_seed ()); + rspamd_cryptobox_fast_hash_init (&st, rspamd_hash_seed ()); if (url->urllen > 0) { - XXH_UPDATE (&st, url->string, url->urllen); + rspamd_cryptobox_fast_hash_update (&st, url->string, url->urllen); } - XXH_UPDATE (&st, &url->flags, sizeof (url->flags)); + rspamd_cryptobox_fast_hash_update (&st, &url->flags, sizeof (url->flags)); - return XXH_DIGEST (&st); + return rspamd_cryptobox_fast_hash_final (&st); } /* Compare two emails for building emails tree */ diff --git a/src/libutil/upstream.c b/src/libutil/upstream.c index 020039d71..fe07e89d2 100644 --- a/src/libutil/upstream.c +++ b/src/libutil/upstream.c @@ -19,7 +19,7 @@ #include "ref.h" #include "cfg_file.h" #include "rdns.h" -#include "xxhash.h" +#include "cryptobox.h" #include "utlist.h" struct upstream_inet_addr_entry { @@ -785,7 +785,8 @@ rspamd_upstream_get_hashed (struct upstream_list *ups, const guint8 *key, guint guint32 idx; /* Generate 64 bits input key */ - k = XXH64 (key, keylen, ups->hash_seed); + k = rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_XXHASH64, + key, keylen, ups->hash_seed); rspamd_mutex_lock (ups->lock); idx = rspamd_consistent_hash (k, ups->alive->len); diff --git a/src/libutil/util.c b/src/libutil/util.c index 4fc3eb613..4ce90ba06 100644 --- a/src/libutil/util.c +++ b/src/libutil/util.c @@ -1981,12 +1981,13 @@ rspamd_config_libs (struct rspamd_external_libs_ctx *ctx, if (ctx != NULL) { if (cfg->local_addrs) { - if (!rspamd_map_is_map (cfg->local_addrs)) { - radix_add_generic_iplist (cfg->local_addrs, + if (ucl_object_type (cfg->local_addrs) == UCL_STRING && + !rspamd_map_is_map (ucl_object_tostring (cfg->local_addrs))) { + radix_add_generic_iplist (ucl_object_tostring (cfg->local_addrs), (radix_compressed_t **)ctx->local_addrs); } else { - rspamd_map_add (cfg, cfg->local_addrs, + rspamd_map_add_from_ucl (cfg, cfg->local_addrs, "Local addresses", rspamd_radix_read, rspamd_radix_fin, (void **) ctx->local_addrs); } diff --git a/src/lua/lua_common.c b/src/lua/lua_common.c index 63aab5830..4563d9f84 100644 --- a/src/lua/lua_common.c +++ b/src/lua/lua_common.c @@ -677,6 +677,20 @@ rspamd_lua_parse_table_arguments (lua_State *L, gint pos, lua_pop (L, 1); } break; + case 'O': + if (t != LUA_TNONE) { + *(va_arg (ap, ucl_object_t **)) = ucl_object_lua_import (L, + idx); + } + else { + failed = TRUE; + *(va_arg (ap, ucl_object_t **)) = NULL; + } + + if (is_table) { + lua_pop (L, 1); + } + break; case 'U': if (t == LUA_TNIL || t == LUA_TNONE) { failed = TRUE; diff --git a/src/lua/lua_logger.c b/src/lua/lua_logger.c index 462838ba1..a01cb8436 100644 --- a/src/lua/lua_logger.c +++ b/src/lua/lua_logger.c @@ -509,7 +509,7 @@ lua_logger_logx (lua_State *L, GLogLevelFlags level, gboolean is_string) if (map) { if (map->map) { - uid = map->map->pool->tag.uid; + uid = map->map->tag; } else { uid = "embedded"; diff --git a/src/lua/lua_map.c b/src/lua/lua_map.c index c01088343..a74ee205c 100644 --- a/src/lua/lua_map.c +++ b/src/lua/lua_map.c @@ -271,7 +271,7 @@ lua_config_add_kv_map (lua_State *L) static gchar * -lua_map_read (rspamd_mempool_t *pool, gchar *chunk, gint len, +lua_map_read (gchar *chunk, gint len, struct map_cb_data *data, gboolean final) { @@ -301,10 +301,13 @@ lua_map_read (rspamd_mempool_t *pool, gchar *chunk, gint len, } static void -lua_map_fin (rspamd_mempool_t * pool, struct map_cb_data *data) +lua_map_fin (struct map_cb_data *data) { struct lua_map_callback_data *cbdata; struct rspamd_lua_map **pmap; + struct rspamd_map *map; + + map = data->map; if (data->prev_data) { data->prev_data = NULL; @@ -314,12 +317,12 @@ lua_map_fin (rspamd_mempool_t * pool, struct map_cb_data *data) cbdata = (struct lua_map_callback_data *)data->cur_data; } else { - msg_err_pool ("no data read for map"); + msg_err_map ("no data read for map"); return; } if (cbdata->ref == -1) { - msg_err_pool ("map has no callback set"); + msg_err_map ("map has no callback set"); } else if (cbdata->data != NULL && cbdata->data->len != 0) { lua_rawgeti (cbdata->L, LUA_REGISTRYINDEX, cbdata->ref); @@ -329,7 +332,7 @@ lua_map_fin (rspamd_mempool_t * pool, struct map_cb_data *data) rspamd_lua_setclass (cbdata->L, "rspamd{map}", -1); if (lua_pcall (cbdata->L, 2, 0, 0) != 0) { - msg_info_pool ("call to %s failed: %s", "local function", + msg_info_map ("call to %s failed: %s", "local function", lua_tostring (cbdata->L, -1)); lua_pop (cbdata->L, 1); } @@ -342,8 +345,9 @@ gint lua_config_add_map (lua_State *L) { struct rspamd_config *cfg = lua_check_config (L, 1); - const gchar *map_line = NULL, *description = NULL; + const char *description = NULL; const gchar *type = NULL; + ucl_object_t *map_obj = NULL; struct lua_map_callback_data *cbdata; struct rspamd_lua_map *map, **pmap; struct rspamd_map *m; @@ -351,132 +355,22 @@ lua_config_add_map (lua_State *L) GError *err = NULL; if (cfg) { - if (lua_type (L, 2) == LUA_TTABLE) { - if (!rspamd_lua_parse_table_arguments (L, 2, &err, - "*type=S;description=S;callback=F;*url=S", - &type, &description, &cbidx, &map_line)) { - ret = luaL_error (L, "invalid table arguments: %s", err->message); - g_error_free (err); - - return ret; - } - - g_assert (type != NULL && map_line != NULL); - - if (strcmp (type, "callback") == 0) { - if (cbidx == -1) { - ret = luaL_error (L, "invalid table arguments: callback missing"); - return ret; - } - - map = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (*map)); - map->type = RSPAMD_LUA_MAP_CALLBACK; - map->data.cbdata = rspamd_mempool_alloc0 (cfg->cfg_pool, - sizeof (*map->data.cbdata)); - cbdata = map->data.cbdata; - cbdata->L = L; - cbdata->data = NULL; - cbdata->lua_map = map; - cbdata->ref = cbidx; - - if ((m = rspamd_map_add (cfg, map_line, description, - lua_map_read, lua_map_fin, - (void **)&map->data.cbdata)) == NULL) { - msg_warn_config ("invalid map %s", map_line); - luaL_unref (L, LUA_REGISTRYINDEX, cbidx); - lua_pushnil (L); + if (!rspamd_lua_parse_table_arguments (L, 2, &err, + "*url=O;description=S;callback=F;type=S", + &map_obj, &description, &cbidx, &type)) { + ret = luaL_error (L, "invalid table arguments: %s", err->message); + g_error_free (err); - return 1; - } - } - else if (strcmp (type, "set") == 0) { - map = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (*map)); - map->data.hash = g_hash_table_new (rspamd_strcase_hash, - rspamd_strcase_equal); - map->type = RSPAMD_LUA_MAP_SET; - - if ((m = rspamd_map_add (cfg, map_line, description, - rspamd_hosts_read, - rspamd_hosts_fin, - (void **)&map->data.hash)) == NULL) { - msg_warn_config ("invalid set map %s", map_line); - g_hash_table_destroy (map->data.hash); - lua_pushnil (L); - - return 1; - } - } - else if (strcmp (type, "map") == 0) { - map = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (*map)); - map->data.hash = g_hash_table_new (rspamd_strcase_hash, - rspamd_strcase_equal); - map->type = RSPAMD_LUA_MAP_HASH; - - if ((m = rspamd_map_add (cfg, map_line, description, - rspamd_kv_list_read, - rspamd_kv_list_fin, - (void **)&map->data.hash)) == NULL) { - msg_warn_config ("invalid hash map %s", map_line); - g_hash_table_destroy (map->data.hash); - lua_pushnil (L); - return 1; - } - } - else if (strcmp (type, "radix") == 0) { - map = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (*map)); - map->data.radix = radix_create_compressed (); - map->type = RSPAMD_LUA_MAP_RADIX; - - if ((m = rspamd_map_add (cfg, map_line, description, - rspamd_radix_read, - rspamd_radix_fin, - (void **)&map->data.radix)) == NULL) { - msg_warn_config ("invalid radix map %s", map_line); - radix_destroy_compressed (map->data.radix); - lua_pushnil (L); - return 1; - } - } - else if (strcmp (type, "regexp") == 0) { - map = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (*map)); - map->data.re_map = NULL; - map->type = RSPAMD_LUA_MAP_REGEXP; - - if ((m = rspamd_map_add (cfg, map_line, description, - rspamd_regexp_list_read, - rspamd_regexp_list_fin, - (void **)&map->data.re_map)) == NULL) { - msg_warn_config ("invalid regexp map %s", map_line); - lua_pushnil (L); - return 1; - } - } - else { - ret = luaL_error (L, "invalid arguments: unknown type '%s'", type); + return ret; + } - return ret; - } + g_assert (map_obj != NULL); - map->map = m; - pmap = lua_newuserdata (L, sizeof (void *)); - *pmap = map; - rspamd_lua_setclass (L, "rspamd{map}", -1); + if (type == NULL) { + type = "callback"; } - else { - /* - * Legacy format add_map(map_line, description, callback) - */ - map_line = luaL_checkstring (L, 2); - - if (lua_gettop (L) == 4) { - description = lua_tostring (L, 3); - cbidx = 4; - } - else { - description = NULL; - cbidx = 3; - } + if (strcmp (type, "callback") == 0) { map = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (*map)); map->type = RSPAMD_LUA_MAP_CALLBACK; map->data.cbdata = rspamd_mempool_alloc0 (cfg->cfg_pool, @@ -485,38 +379,104 @@ lua_config_add_map (lua_State *L) cbdata->L = L; cbdata->data = NULL; cbdata->lua_map = map; + cbdata->ref = cbidx; + + if ((m = rspamd_map_add_from_ucl (cfg, map_obj, description, + lua_map_read, lua_map_fin, + (void **)&map->data.cbdata)) == NULL) { + + if (cbidx != -1) { + luaL_unref (L, LUA_REGISTRYINDEX, cbidx); + } + + lua_pushnil (L); - if (lua_type (L, cbidx) == LUA_TFUNCTION) { - lua_pushvalue (L, cbidx); - /* Get a reference */ - cbdata->ref = luaL_ref (L, LUA_REGISTRYINDEX); + return 1; } - else { - /* - * Now we can create maps with delayed callbacks, to allow better - * closures generation - */ - cbdata->ref = -1; + } + else if (strcmp (type, "set") == 0) { + map = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (*map)); + map->data.hash = g_hash_table_new (rspamd_strcase_hash, + rspamd_strcase_equal); + map->type = RSPAMD_LUA_MAP_SET; + + if ((m = rspamd_map_add_from_ucl (cfg, map_obj, description, + rspamd_hosts_read, + rspamd_hosts_fin, + (void **)&map->data.hash)) == NULL) { + g_hash_table_destroy (map->data.hash); + lua_pushnil (L); + ucl_object_unref (map_obj); + + return 1; } + } + else if (strcmp (type, "map") == 0) { + map = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (*map)); + map->data.hash = g_hash_table_new (rspamd_strcase_hash, + rspamd_strcase_equal); + map->type = RSPAMD_LUA_MAP_HASH; + + if ((m = rspamd_map_add_from_ucl (cfg, map_obj, description, + rspamd_kv_list_read, + rspamd_kv_list_fin, + (void **)&map->data.hash)) == NULL) { + g_hash_table_destroy (map->data.hash); + lua_pushnil (L); + ucl_object_unref (map_obj); - if ((m = rspamd_map_add (cfg, map_line, description, - lua_map_read, lua_map_fin, - (void **)&map->data.cbdata)) == NULL) { - msg_warn_config ("invalid map %s", map_line); + return 1; + } + } + else if (strcmp (type, "radix") == 0) { + map = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (*map)); + map->data.radix = radix_create_compressed (); + map->type = RSPAMD_LUA_MAP_RADIX; + + if ((m = rspamd_map_add_from_ucl (cfg, map_obj, description, + rspamd_radix_read, + rspamd_radix_fin, + (void **)&map->data.radix)) == NULL) { + radix_destroy_compressed (map->data.radix); lua_pushnil (L); + ucl_object_unref (map_obj); + + return 1; } - else { - map->map = m; - pmap = lua_newuserdata (L, sizeof (void *)); - *pmap = map; - rspamd_lua_setclass (L, "rspamd{map}", -1); + } + else if (strcmp (type, "regexp") == 0) { + map = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (*map)); + map->data.re_map = NULL; + map->type = RSPAMD_LUA_MAP_REGEXP; + + if ((m = rspamd_map_add_from_ucl (cfg, map_obj, description, + rspamd_regexp_list_read, + rspamd_regexp_list_fin, + (void **)&map->data.re_map)) == NULL) { + lua_pushnil (L); + ucl_object_unref (map_obj); + + return 1; } } + else { + ret = luaL_error (L, "invalid arguments: unknown type '%s'", type); + ucl_object_unref (map_obj); + + return ret; + } + + map->map = m; + pmap = lua_newuserdata (L, sizeof (void *)); + *pmap = map; + rspamd_lua_setclass (L, "rspamd{map}", -1); } else { return luaL_error (L, "invalid arguments"); } + ucl_object_unref (map_obj); + return 1; } @@ -614,11 +574,17 @@ lua_map_is_signed (lua_State *L) { struct rspamd_lua_map *map = lua_check_map (L, 1); gboolean ret = FALSE; + struct rspamd_map_backend *bk; + guint i; if (map != NULL) { if (map->map) { - if (map->map->is_signed) { - ret = TRUE; + for (i = 0; i < map->map->backends->len; i ++) { + bk = g_ptr_array_index (map->map->backends, i); + if (bk->is_signed) { + ret = TRUE; + break; + } } } } @@ -635,19 +601,28 @@ lua_map_get_proto (lua_State *L) { struct rspamd_lua_map *map = lua_check_map (L, 1); const gchar *ret = "undefined"; + struct rspamd_map_backend *bk; + guint i; if (map != NULL) { if ((map->flags & RSPAMD_LUA_MAP_FLAG_EMBEDDED) || map->map == NULL) { ret = "embedded"; + lua_pushstring (L, ret); + + return 1; } else { - switch (map->map->protocol) { - case MAP_PROTO_FILE: - ret = "file"; - break; - case MAP_PROTO_HTTP: - ret = "http"; - break; + for (i = 0; i < map->map->backends->len; i ++) { + bk = g_ptr_array_index (map->map->backends, i); + switch (bk->protocol) { + case MAP_PROTO_FILE: + ret = "file"; + break; + case MAP_PROTO_HTTP: + ret = "http"; + break; + } + lua_pushstring (L, ret); } } } @@ -655,14 +630,16 @@ lua_map_get_proto (lua_State *L) return luaL_error (L, "invalid arguments"); } - lua_pushstring (L, ret); - return 1; + + return map->map->backends->len; } static int lua_map_get_sign_key (lua_State *L) { struct rspamd_lua_map *map = lua_check_map (L, 1); + struct rspamd_map_backend *bk; + guint i; GString *ret = NULL; if (map != NULL) { @@ -671,33 +648,42 @@ lua_map_get_sign_key (lua_State *L) return 1; } - if (map->map && map->map->trusted_pubkey) { - ret = rspamd_pubkey_print (map->map->trusted_pubkey, - RSPAMD_KEYPAIR_PUBKEY|RSPAMD_KEYPAIR_BASE32); + for (i = 0; i < map->map->backends->len; i ++) { + bk = g_ptr_array_index (map->map->backends, i); + + if (bk->trusted_pubkey) { + ret = rspamd_pubkey_print (bk->trusted_pubkey, + RSPAMD_KEYPAIR_PUBKEY|RSPAMD_KEYPAIR_BASE32); + } + else { + ret = NULL; + } + + if (ret) { + lua_pushlstring (L, ret->str, ret->len); + g_string_free (ret, TRUE); + } + else { + lua_pushnil (L); + } } } else { return luaL_error (L, "invalid arguments"); } - if (ret) { - lua_pushlstring (L, ret->str, ret->len); - g_string_free (ret, TRUE); - } - else { - lua_pushnil (L); - } - - return 1; + return map->map->backends->len; } static int lua_map_set_sign_key (lua_State *L) { struct rspamd_lua_map *map = lua_check_map (L, 1); + struct rspamd_map_backend *bk; const gchar *pk_str; struct rspamd_cryptobox_pubkey *pk; gsize len; + guint i; pk_str = lua_tolstring (L, 2, &len); @@ -714,12 +700,17 @@ lua_map_set_sign_key (lua_State *L) return luaL_error (L, "invalid pubkey string"); } - if (map->map->trusted_pubkey) { - /* Unref old pk */ - rspamd_pubkey_unref (map->map->trusted_pubkey); + for (i = 0; i < map->map->backends->len; i ++) { + bk = g_ptr_array_index (map->map->backends, i); + if (bk->trusted_pubkey) { + /* Unref old pk */ + rspamd_pubkey_unref (bk->trusted_pubkey); + } + + bk->trusted_pubkey = rspamd_pubkey_ref (pk); } - map->map->trusted_pubkey = pk; + rspamd_pubkey_unref (pk); } else { return luaL_error (L, "invalid arguments"); @@ -753,21 +744,29 @@ lua_map_get_uri (lua_State *L) { struct rspamd_lua_map *map = lua_check_map (L, 1); const gchar *ret = "undefined"; + struct rspamd_map_backend *bk; + guint i; if (map != NULL) { if ((map->flags & RSPAMD_LUA_MAP_FLAG_EMBEDDED) || map->map == NULL) { ret = "embedded"; + lua_pushstring (L, ret); + + return 1; } else { - ret = map->map->uri; + for (i = 0; i < map->map->backends->len; i ++) { + bk = g_ptr_array_index (map->map->backends, i); + ret = bk->uri; + lua_pushstring (L, ret); + } } } else { return luaL_error (L, "invalid arguments"); } - lua_pushstring (L, ret); - return 1; + return map->map->backends->len; } void diff --git a/src/lua/lua_task.c b/src/lua/lua_task.c index 31b0aac76..7a756679b 100644 --- a/src/lua/lua_task.c +++ b/src/lua/lua_task.c @@ -23,7 +23,7 @@ #include "cfg_file.h" #include "email_addr.h" #include "utlist.h" -#include "xxhash.h" +#include "cryptobox.h" /*** * @module rspamd_task @@ -1568,7 +1568,8 @@ lua_task_str_to_get_type (lua_State *L, gint pos) type = lua_tolstring (L, pos, &sz); if (type && sz > 0) { - h = XXH64 (type, sz, 0xdeadbabe); + h = rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_XXHASH64, + type, sz, 0xdeadbabe); switch (h) { case 0xDA081341FB600389ULL: /* mime */ diff --git a/src/plugins/dkim_check.c b/src/plugins/dkim_check.c index 9cf89674e..3ea31485f 100644 --- a/src/plugins/dkim_check.c +++ b/src/plugins/dkim_check.c @@ -271,13 +271,14 @@ dkim_module_config (struct rspamd_config *cfg) } if ((value = rspamd_config_get_module_opt (cfg, "dkim", "whitelist")) != NULL) { + str = ucl_obj_tostring (value); - if (!rspamd_map_is_map (str)) { + if (str && !rspamd_map_is_map (str)) { radix_add_generic_iplist (str, &dkim_module_ctx->whitelist_ip); } else { - rspamd_map_add (cfg, str, + rspamd_map_add_from_ucl (cfg, value, "DKIM whitelist", rspamd_radix_read, rspamd_radix_fin, (void **)&dkim_module_ctx->whitelist_ip); @@ -285,7 +286,7 @@ dkim_module_config (struct rspamd_config *cfg) } if ((value = rspamd_config_get_module_opt (cfg, "dkim", "domains")) != NULL) { - if (!rspamd_map_add (cfg, ucl_obj_tostring (value), + if (!rspamd_map_add_from_ucl (cfg, value, "DKIM domains", rspamd_kv_list_read, rspamd_kv_list_fin, (void **)&dkim_module_ctx->dkim_domains)) { msg_warn_config ("cannot load dkim domains list from %s", @@ -297,7 +298,7 @@ dkim_module_config (struct rspamd_config *cfg) } if (!got_trusted && (value = rspamd_config_get_module_opt (cfg, "dkim", "trusted_domains")) != NULL) { - if (!rspamd_map_add (cfg, ucl_obj_tostring (value), + if (!rspamd_map_add_from_ucl (cfg, value, "DKIM domains", rspamd_kv_list_read, rspamd_kv_list_fin, (void **)&dkim_module_ctx->dkim_domains)) { msg_warn_config ("cannot load dkim domains list from %s", diff --git a/src/plugins/fuzzy_check.c b/src/plugins/fuzzy_check.c index 385b8aadc..bf4cd3a0a 100644 --- a/src/plugins/fuzzy_check.c +++ b/src/plugins/fuzzy_check.c @@ -69,6 +69,8 @@ struct fuzzy_mime_type { struct fuzzy_rule { struct upstream_list *servers; const gchar *symbol; + const gchar *algorithm_str; + enum rspamd_shingle_alg alg; GHashTable *mappings; GList *mime_types; GList *fuzzy_headers; @@ -363,6 +365,7 @@ fuzzy_parse_rule (struct rspamd_config *cfg, const ucl_object_t *obj, gint cb_id rule = fuzzy_rule_new (fuzzy_module_ctx->default_symbol, fuzzy_module_ctx->fuzzy_pool); rule->learn_condition_cb = -1; + rule->alg = RSPAMD_SHINGLES_OLD; if ((value = ucl_object_lookup (obj, "mime_types")) != NULL) { it = NULL; @@ -410,6 +413,46 @@ fuzzy_parse_rule (struct rspamd_config *cfg, const ucl_object_t *obj, gint cb_id rule->skip_unknown = ucl_obj_toboolean (value); } + if ((value = ucl_object_lookup (obj, "algorithm")) != NULL) { + rule->algorithm_str = ucl_object_tostring (value); + + if (rule->algorithm_str) { + if (g_ascii_strcasecmp (rule->algorithm_str, "old") == 0 || + g_ascii_strcasecmp (rule->algorithm_str, "siphash") == 0) { + rule->alg = RSPAMD_SHINGLES_OLD; + } + else if (g_ascii_strcasecmp (rule->algorithm_str, "xxhash") == 0) { + rule->alg = RSPAMD_SHINGLES_XXHASH; + } + else if (g_ascii_strcasecmp (rule->algorithm_str, "mumhash") == 0) { + rule->alg = RSPAMD_SHINGLES_MUMHASH; + } + else if (g_ascii_strcasecmp (rule->algorithm_str, "fasthash") == 0 || + g_ascii_strcasecmp (rule->algorithm_str, "fast") == 0) { + rule->alg = RSPAMD_SHINGLES_FAST; + } + else { + msg_warn_config ("unknown algorithm: %s, use siphash by default"); + } + } + } + + /* Set a consistent and short string name */ + switch (rule->alg) { + case RSPAMD_SHINGLES_OLD: + rule->algorithm_str = "sip"; + break; + case RSPAMD_SHINGLES_XXHASH: + rule->algorithm_str = "xx"; + break; + case RSPAMD_SHINGLES_MUMHASH: + rule->algorithm_str = "mum"; + break; + case RSPAMD_SHINGLES_FAST: + rule->algorithm_str = "fast"; + break; + } + if ((value = ucl_object_lookup (obj, "servers")) != NULL) { rule->servers = rspamd_upstreams_create (cfg->ups_ctx); @@ -832,12 +875,12 @@ fuzzy_check_module_config (struct rspamd_config *cfg) str = ucl_obj_tostring (value); - if (!rspamd_map_is_map (str)) { + if (str && !rspamd_map_is_map (str)) { radix_add_generic_iplist (str, &fuzzy_module_ctx->whitelist); } else { - rspamd_map_add (cfg, str, + rspamd_map_add_from_ucl (cfg, value, "Fuzzy whitelist", rspamd_radix_read, rspamd_radix_fin, (void **)&fuzzy_module_ctx->whitelist); @@ -1023,6 +1066,37 @@ fuzzy_cmd_from_task_meta (struct fuzzy_rule *rule, return io; } +static void * +fuzzy_cmd_get_cached (struct fuzzy_rule *rule, + rspamd_mempool_t *pool, + struct mime_text_part *part) +{ + gchar key[32]; + gint key_part; + + memcpy (&key_part, rule->shingles_key->str, sizeof (key_part)); + rspamd_snprintf (key, sizeof (key), "%p%s%d", part, rule->algorithm_str, + key_part); + + return rspamd_mempool_get_variable (pool, key); +} + +static void +fuzzy_cmd_set_cached (struct fuzzy_rule *rule, + rspamd_mempool_t *pool, + struct mime_text_part *part, + struct rspamd_fuzzy_encrypted_shingle_cmd *data) +{ + gchar key[32]; + gint key_part; + + memcpy (&key_part, rule->shingles_key->str, sizeof (key_part)); + rspamd_snprintf (key, sizeof (key), "%p%s%d", part, rule->algorithm_str, + key_part); + /* Key is copied */ + rspamd_mempool_set_variable (pool, key, data, NULL); +} + /* * Create fuzzy command from a text part */ @@ -1035,7 +1109,7 @@ fuzzy_cmd_from_text_part (struct fuzzy_rule *rule, struct mime_text_part *part) { struct rspamd_fuzzy_shingle_cmd *shcmd; - struct rspamd_fuzzy_encrypted_shingle_cmd *encshcmd; + struct rspamd_fuzzy_encrypted_shingle_cmd *encshcmd, *cached; struct rspamd_shingle *sh; guint i; rspamd_cryptobox_hash_state_t st; @@ -1043,40 +1117,56 @@ fuzzy_cmd_from_text_part (struct fuzzy_rule *rule, GArray *words; struct fuzzy_cmd_io *io; - if (rule->peer_key) { - encshcmd = rspamd_mempool_alloc0 (pool, sizeof (*encshcmd)); + cached = fuzzy_cmd_get_cached (rule, pool, part); + + if (cached) { + /* Copy cached */ + encshcmd = rspamd_mempool_alloc (pool, sizeof (*encshcmd)); + memcpy (encshcmd, cached, sizeof (*encshcmd)); shcmd = &encshcmd->cmd; } else { - shcmd = rspamd_mempool_alloc0 (pool, sizeof (*shcmd)); - encshcmd = NULL; - } + encshcmd = rspamd_mempool_alloc0 (pool, sizeof (*encshcmd)); + shcmd = &encshcmd->cmd; - /* - * Generate hash from all words in the part - */ - rspamd_cryptobox_hash_init (&st, rule->hash_key->str, rule->hash_key->len); - words = fuzzy_preprocess_words (part, pool); + /* + * Generate hash from all words in the part + */ + rspamd_cryptobox_hash_init (&st, rule->hash_key->str, rule->hash_key->len); + words = fuzzy_preprocess_words (part, pool); - for (i = 0; i < words->len; i ++) { - word = &g_array_index (words, rspamd_ftok_t, i); - rspamd_cryptobox_hash_update (&st, word->begin, word->len); - } - rspamd_cryptobox_hash_final (&st, shcmd->basic.digest); + for (i = 0; i < words->len; i ++) { + word = &g_array_index (words, rspamd_ftok_t, i); + rspamd_cryptobox_hash_update (&st, word->begin, word->len); + } + rspamd_cryptobox_hash_final (&st, shcmd->basic.digest); + + msg_debug_pool ("loading shingles of type %s with key %*xs", + rule->algorithm_str, + 16, rule->shingles_key->str); + sh = rspamd_shingles_generate (words, + rule->shingles_key->str, pool, + rspamd_shingles_default_filter, NULL, + rule->alg); + if (sh != NULL) { + memcpy (&shcmd->sgl, sh, sizeof (shcmd->sgl)); + shcmd->basic.shingles_count = RSPAMD_SHINGLE_SIZE; + } - msg_debug_pool ("loading shingles with key %*xs", 16, - rule->shingles_key->str); - sh = rspamd_shingles_generate (words, - rule->shingles_key->str, pool, - rspamd_shingles_default_filter, NULL); - if (sh != NULL) { - memcpy (&shcmd->sgl, sh, sizeof (shcmd->sgl)); - shcmd->basic.shingles_count = RSPAMD_SHINGLE_SIZE; + /* + * We always save encrypted command as it can handle both + * encrypted and unencrypted requests. + * + * Since it is copied when obtained from the cache, it is safe to use + * it this way. + */ + fuzzy_cmd_set_cached (rule, pool, part, encshcmd); } shcmd->basic.tag = ottery_rand_uint32 (); shcmd->basic.cmd = c; shcmd->basic.version = RSPAMD_FUZZY_VERSION; + if (c != FUZZY_CHECK) { shcmd->basic.flag = flag; shcmd->basic.value = weight; diff --git a/src/plugins/spf.c b/src/plugins/spf.c index 153422f51..67c8732e7 100644 --- a/src/plugins/spf.c +++ b/src/plugins/spf.c @@ -213,12 +213,12 @@ spf_module_config (struct rspamd_config *cfg) str = ucl_obj_tostring (value); - if (!rspamd_map_is_map (str)) { + if (str && !rspamd_map_is_map (str)) { radix_add_generic_iplist (str, &spf_module_ctx->whitelist_ip); } else { - rspamd_map_add (cfg, str, + rspamd_map_add_from_ucl (cfg, value, "SPF whitelist", rspamd_radix_read, rspamd_radix_fin, (void **)&spf_module_ctx->whitelist_ip); diff --git a/src/plugins/surbl.c b/src/plugins/surbl.c index 4dda832b3..87b8effa7 100644 --- a/src/plugins/surbl.c +++ b/src/plugins/surbl.c @@ -80,7 +80,7 @@ module_t surbl_module = { }; static void -exception_insert (gpointer st, gconstpointer key, gpointer value) +exception_insert (gpointer st, gconstpointer key, gconstpointer value) { GHashTable **t = st; gint level = 0; @@ -103,27 +103,27 @@ exception_insert (gpointer st, gconstpointer key, gpointer value) val = g_malloc (sizeof (rspamd_ftok_t)); val->begin = key; val->len = strlen (key); + if (t[level] == NULL) { t[level] = g_hash_table_new_full (rspamd_ftok_icase_hash, rspamd_ftok_icase_equal, g_free, - NULL); + g_free); } - g_hash_table_insert (t[level], val, value); + + g_hash_table_insert (t[level], val, g_strdup (value)); } static gchar * -read_exceptions_list (rspamd_mempool_t * pool, - gchar * chunk, +read_exceptions_list (gchar * chunk, gint len, struct map_cb_data *data, gboolean final) { if (data->cur_data == NULL) { - data->cur_data = rspamd_mempool_alloc0 (pool, - sizeof (GHashTable *) * MAX_LEVELS); + data->cur_data = g_malloc (sizeof (GHashTable *) * MAX_LEVELS); } - return rspamd_parse_kv_list (pool, + return rspamd_parse_kv_list ( chunk, len, data, @@ -133,7 +133,7 @@ read_exceptions_list (rspamd_mempool_t * pool, } static void -fin_exceptions_list (rspamd_mempool_t * pool, struct map_cb_data *data) +fin_exceptions_list (struct map_cb_data *data) { GHashTable **t; gint i; @@ -145,11 +145,12 @@ fin_exceptions_list (rspamd_mempool_t * pool, struct map_cb_data *data) g_hash_table_destroy (t[i]); } } + g_free (t); } } static void -redirector_insert (gpointer st, gconstpointer key, gpointer value) +redirector_insert (gpointer st, gconstpointer key, gconstpointer value) { GHashTable *tld_hash = st; const gchar *p = key, *begin = key; @@ -200,8 +201,7 @@ redirector_item_free (gpointer p) } static gchar * -read_redirectors_list (rspamd_mempool_t * pool, - gchar * chunk, +read_redirectors_list (gchar * chunk, gint len, struct map_cb_data *data, gboolean final) @@ -217,17 +217,17 @@ read_redirectors_list (rspamd_mempool_t * pool, data->cur_data = tld_hash; } - return rspamd_parse_kv_list (pool, + return rspamd_parse_kv_list ( chunk, len, data, - (insert_func) redirector_insert, + redirector_insert, "", final); } void -fin_redirectors_list (rspamd_mempool_t * pool, struct map_cb_data *data) +fin_redirectors_list (struct map_cb_data *data) { GHashTable *tld_hash; @@ -528,7 +528,7 @@ surbl_module_config (struct rspamd_config *cfg) if ((value = rspamd_config_get_module_opt (cfg, "surbl", "redirector_hosts_map")) != NULL) { - if (!rspamd_map_add (cfg, ucl_obj_tostring (value), + if (!rspamd_map_add_from_ucl (cfg, value, "SURBL redirectors list", read_redirectors_list, fin_redirectors_list, (void **)&surbl_module_ctx->redirector_map_data)) { @@ -546,7 +546,7 @@ surbl_module_config (struct rspamd_config *cfg) } if ((value = rspamd_config_get_module_opt (cfg, "surbl", "exceptions")) != NULL) { - if (rspamd_map_add (cfg, ucl_obj_tostring (value), + if (rspamd_map_add_from_ucl (cfg, value, "SURBL exceptions list", read_exceptions_list, fin_exceptions_list, (void **)&surbl_module_ctx->exceptions)) { surbl_module_ctx->tld2_file = rspamd_mempool_strdup ( @@ -556,7 +556,7 @@ surbl_module_config (struct rspamd_config *cfg) } if ((value = rspamd_config_get_module_opt (cfg, "surbl", "whitelist")) != NULL) { - if (rspamd_map_add (cfg, ucl_obj_tostring (value), + if (rspamd_map_add_from_ucl (cfg, value, "SURBL whitelist", rspamd_hosts_read, rspamd_hosts_fin, (void **)&surbl_module_ctx->whitelist)) { surbl_module_ctx->whitelist_file = rspamd_mempool_strdup ( diff --git a/src/rspamd.c b/src/rspamd.c index e67977cbd..72e676267 100644 --- a/src/rspamd.c +++ b/src/rspamd.c @@ -21,7 +21,7 @@ #include "libserver/worker_util.h" #include "libserver/rspamd_control.h" #include "ottery.h" -#include "xxhash.h" +#include "cryptobox.h" #include "utlist.h" #include "unix-std.h" /* sysexits */ @@ -409,30 +409,30 @@ systemd_get_socket (struct rspamd_main *rspamd_main, gint number) static inline uintptr_t make_listen_key (struct rspamd_worker_bind_conf *cf) { - XXH64_state_t st; + rspamd_cryptobox_fast_hash_state_t st; guint i, keylen; guint8 *key; rspamd_inet_addr_t *addr; guint16 port; - XXH64_reset (&st, rspamd_hash_seed ()); + rspamd_cryptobox_fast_hash_init (&st, rspamd_hash_seed ()); if (cf->is_systemd) { - XXH64_update (&st, "systemd", sizeof ("systemd")); - XXH64_update (&st, &cf->cnt, sizeof (cf->cnt)); + rspamd_cryptobox_fast_hash_update (&st, "systemd", sizeof ("systemd")); + rspamd_cryptobox_fast_hash_update (&st, &cf->cnt, sizeof (cf->cnt)); } else { - XXH64_update (&st, cf->name, strlen (cf->name)); + rspamd_cryptobox_fast_hash_update (&st, cf->name, strlen (cf->name)); for (i = 0; i < cf->cnt; i ++) { addr = g_ptr_array_index (cf->addrs, i); key = rspamd_inet_address_get_radix_key ( addr, &keylen); - XXH64_update (&st, key, keylen); + rspamd_cryptobox_fast_hash_update (&st, key, keylen); port = rspamd_inet_address_get_port (addr); - XXH64_update (&st, &port, sizeof (port)); + rspamd_cryptobox_fast_hash_update (&st, &port, sizeof (port)); } } - return XXH64_digest (&st); + return rspamd_cryptobox_fast_hash_final (&st); } static void @@ -959,7 +959,7 @@ rspamd_control_handler (gint fd, short what, gpointer arg) static guint rspamd_spair_hash (gconstpointer p) { - return XXH64 (p, PAIR_ID_LEN, rspamd_hash_seed ()); + return rspamd_cryptobox_fast_hash (p, PAIR_ID_LEN, rspamd_hash_seed ()); } static gboolean diff --git a/src/rspamd_proxy.c b/src/rspamd_proxy.c index 06192dafc..32a2937e8 100644 --- a/src/rspamd_proxy.c +++ b/src/rspamd_proxy.c @@ -69,6 +69,7 @@ struct rspamd_http_upstream { struct rspamd_http_mirror { gchar *name; + gchar *settings_id; struct upstream_list *u; struct rspamd_cryptobox_pubkey *key; gdouble prob; @@ -362,6 +363,11 @@ rspamd_proxy_parse_mirror (rspamd_mempool_t *pool, lua_settop (L, 0); } + elt = ucl_object_lookup_any (obj, "settings", "settings_id", NULL); + if (elt && ucl_object_type (elt) == UCL_STRING) { + up->settings_id = g_strdup (ucl_object_tostring (elt)); + } + g_ptr_array_add (ctx->mirrors, up); return TRUE; @@ -902,6 +908,11 @@ proxy_open_mirror_connections (struct rspamd_proxy_session *session) rspamd_http_message_remove_header (msg, "Content-Length"); rspamd_http_message_remove_header (msg, "Key"); + if (m->settings_id != NULL) { + rspamd_http_message_remove_header (msg, "Settings-ID"); + rspamd_http_message_add_header (msg, "Settings-ID", m->settings_id); + } + bk_conn->backend_conn = rspamd_http_connection_new ( NULL, proxy_backend_mirror_error_handler, |