diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2020-02-11 12:34:40 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2020-02-11 12:34:40 +0000 |
commit | 41e28d1148a308957735875abb271d22ba0b4432 (patch) | |
tree | 8d549898192d0d513568140cd71e9475f982dea9 /src/libserver/maps | |
parent | cb070eea5c1a2e39336171b37688202f334f7d70 (diff) | |
download | rspamd-41e28d1148a308957735875abb271d22ba0b4432.tar.gz rspamd-41e28d1148a308957735875abb271d22ba0b4432.zip |
[Rework] Further project structure reorganisation
Diffstat (limited to 'src/libserver/maps')
-rw-r--r-- | src/libserver/maps/map.c | 2923 | ||||
-rw-r--r-- | src/libserver/maps/map.h | 138 | ||||
-rw-r--r-- | src/libserver/maps/map_helpers.c | 1397 | ||||
-rw-r--r-- | src/libserver/maps/map_helpers.h | 246 | ||||
-rw-r--r-- | src/libserver/maps/map_private.h | 219 |
5 files changed, 4923 insertions, 0 deletions
diff --git a/src/libserver/maps/map.c b/src/libserver/maps/map.c new file mode 100644 index 000000000..ff3a38f90 --- /dev/null +++ b/src/libserver/maps/map.c @@ -0,0 +1,2923 @@ +/*- + * Copyright 2019 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * Implementation of map files handling + */ + +#include "config.h" +#include "map.h" +#include "map_private.h" +#include "libserver/http/http_connection.h" +#include "libserver/http/http_private.h" +#include "rspamd.h" +#include "contrib/zstd/zstd.h" +#include "contrib/libev/ev.h" +#include "contrib/uthash/utlist.h" + +#undef MAP_DEBUG_REFS +#ifdef MAP_DEBUG_REFS +#define MAP_RETAIN(x, t) do { \ + msg_err (G_GNUC_PRETTY_FUNCTION ": " t ": retain ref %p, refcount: %d -> %d", (x), (x)->ref.refcount, (x)->ref.refcount + 1); \ + REF_RETAIN(x); \ +} while (0) + +#define MAP_RELEASE(x, t) do { \ + msg_err (G_GNUC_PRETTY_FUNCTION ": " t ": release ref %p, refcount: %d -> %d", (x), (x)->ref.refcount, (x)->ref.refcount - 1); \ + REF_RELEASE(x); \ +} while (0) +#else +#define MAP_RETAIN(x, t) REF_RETAIN(x) +#define MAP_RELEASE(x, t) REF_RELEASE(x) +#endif + +enum rspamd_map_periodic_opts { + RSPAMD_MAP_SCHEDULE_NORMAL = 0, + RSPAMD_MAP_SCHEDULE_ERROR = (1u << 0u), + RSPAMD_MAP_SCHEDULE_LOCKED = (1u << 1u), + RSPAMD_MAP_SCHEDULE_INIT = (1u << 2u), +}; + +static void free_http_cbdata_common (struct http_callback_data *cbd, + gboolean plan_new); +static void free_http_cbdata_dtor (gpointer p); +static void free_http_cbdata (struct http_callback_data *cbd); +static void rspamd_map_process_periodic (struct map_periodic_cbdata *cbd); +static void rspamd_map_schedule_periodic (struct rspamd_map *map, int how); +static gboolean read_map_file_chunks (struct rspamd_map *map, + struct map_cb_data *cbdata, + const gchar *fname, + gsize len, + goffset off); +static gboolean rspamd_map_save_http_cached_file (struct rspamd_map *map, + struct rspamd_map_backend *bk, + struct http_map_data *htdata, + const guchar *data, + gsize len); +static gboolean rspamd_map_update_http_cached_file (struct rspamd_map *map, + struct rspamd_map_backend *bk, + struct http_map_data *htdata); + +guint rspamd_map_log_id = (guint)-1; +RSPAMD_CONSTRUCTOR(rspamd_map_log_init) +{ + rspamd_map_log_id = rspamd_logger_add_debug_module("map"); +} + +/** + * Write HTTP request + */ +static void +write_http_request (struct http_callback_data *cbd) +{ + gchar datebuf[128]; + struct rspamd_http_message *msg; + + msg = rspamd_http_new_message (HTTP_REQUEST); + + if (cbd->bk->protocol == MAP_PROTO_HTTPS) { + msg->flags |= RSPAMD_HTTP_FLAG_SSL; + } + + if (cbd->check) { + msg->method = HTTP_HEAD; + } + + msg->url = rspamd_fstring_append (msg->url, + cbd->data->path, strlen (cbd->data->path)); + + if (cbd->check) { + if (cbd->data->last_modified != 0) { + rspamd_http_date_format (datebuf, sizeof (datebuf), + cbd->data->last_modified); + rspamd_http_message_add_header (msg, "If-Modified-Since", + datebuf); + } + if (cbd->data->etag) { + rspamd_http_message_add_header_len (msg, "If-None-Match", + cbd->data->etag->str, cbd->data->etag->len); + } + } + + msg->url = rspamd_fstring_append (msg->url, cbd->data->rest, + strlen (cbd->data->rest)); + + if (cbd->data->userinfo) { + rspamd_http_message_add_header (msg, "Authorization", + cbd->data->userinfo); + } + + MAP_RETAIN (cbd, "http_callback_data"); + rspamd_http_connection_write_message (cbd->conn, + msg, + cbd->data->host, + NULL, + cbd, + cbd->timeout); +} + +/** + * Callback for destroying HTTP callback data + */ +static void +free_http_cbdata_common (struct http_callback_data *cbd, gboolean plan_new) +{ + struct map_periodic_cbdata *periodic = cbd->periodic; + + if (cbd->shmem_data) { + rspamd_http_message_shmem_unref (cbd->shmem_data); + } + + if (cbd->pk) { + rspamd_pubkey_unref (cbd->pk); + } + + if (cbd->conn) { + rspamd_http_connection_unref (cbd->conn); + cbd->conn = NULL; + } + + if (cbd->addrs) { + rspamd_inet_addr_t *addr; + guint i; + + PTR_ARRAY_FOREACH (cbd->addrs, i, addr) { + rspamd_inet_address_free (addr); + } + + g_ptr_array_free (cbd->addrs, TRUE); + } + + + MAP_RELEASE (cbd->bk, "rspamd_map_backend"); + + if (periodic) { + /* Detached in case of HTTP error */ + MAP_RELEASE (periodic, "periodic"); + } + + g_free (cbd); +} + +static void +free_http_cbdata (struct http_callback_data *cbd) +{ + cbd->map->tmp_dtor = NULL; + cbd->map->tmp_dtor_data = NULL; + + free_http_cbdata_common (cbd, TRUE); +} + +static void +free_http_cbdata_dtor (gpointer p) +{ + struct http_callback_data *cbd = p; + struct rspamd_map *map; + + map = cbd->map; + if (cbd->stage == http_map_http_conn) { + REF_RELEASE (cbd); + } + else { + /* We cannot terminate DNS requests sent */ + cbd->stage = http_map_terminated; + } + + msg_warn_map ("%s: " + "connection with http server is terminated: worker is stopping", + map->name); +} + +/* + * HTTP callbacks + */ +static void +http_map_error (struct rspamd_http_connection *conn, + GError *err) +{ + struct http_callback_data *cbd = conn->ud; + struct rspamd_map *map; + + map = cbd->map; + + if (cbd->periodic) { + cbd->periodic->errored = TRUE; + msg_err_map ("error reading %s(%s): " + "connection with http server terminated incorrectly: %e", + cbd->bk->uri, + cbd->addr ? rspamd_inet_address_to_string_pretty (cbd->addr) : "", + err); + + rspamd_map_process_periodic (cbd->periodic); + } + + MAP_RELEASE (cbd, "http_callback_data"); +} + +static void +rspamd_map_cache_cb (struct ev_loop *loop, ev_timer *w, int revents) +{ + struct rspamd_http_map_cached_cbdata *cache_cbd = (struct rspamd_http_map_cached_cbdata *) + w->data; + struct rspamd_map *map; + struct http_map_data *data; + + map = cache_cbd->map; + data = cache_cbd->data; + + if (cache_cbd->gen != cache_cbd->data->gen) { + /* We have another update, so this cache element is obviously expired */ + /* + * Important!: we do not set cache availability to zero here, as there + * might be fresh cache + */ + msg_info_map ("cached data is now expired (gen mismatch %L != %L) for %s", + cache_cbd->gen, cache_cbd->data->gen, map->name); + MAP_RELEASE (cache_cbd->shm, "rspamd_http_map_cached_cbdata"); + ev_timer_stop (loop, &cache_cbd->timeout); + g_free (cache_cbd); + } + else if (cache_cbd->data->last_checked >= cache_cbd->last_checked) { + /* + * We checked map but we have not found anything more recent, + * reschedule cache check + */ + if (cache_cbd->map->poll_timeout > + rspamd_get_calendar_ticks () - cache_cbd->data->last_checked) { + w->repeat = cache_cbd->map->poll_timeout - + (rspamd_get_calendar_ticks () - cache_cbd->data->last_checked); + } + else { + w->repeat = cache_cbd->map->poll_timeout; + } + + cache_cbd->last_checked = cache_cbd->data->last_checked; + msg_debug_map ("cached data is up to date for %s", map->name); + ev_timer_again (loop, &cache_cbd->timeout); + } + else { + data->cur_cache_cbd = NULL; + g_atomic_int_set (&data->cache->available, 0); + MAP_RELEASE (cache_cbd->shm, "rspamd_http_map_cached_cbdata"); + msg_info_map ("cached data is now expired for %s", map->name); + ev_timer_stop (loop, &cache_cbd->timeout); + g_free (cache_cbd); + } +} + +static int +http_map_finish (struct rspamd_http_connection *conn, + struct rspamd_http_message *msg) +{ + struct http_callback_data *cbd = conn->ud; + struct rspamd_map *map; + struct rspamd_map_backend *bk; + struct http_map_data *data; + struct rspamd_http_map_cached_cbdata *cache_cbd; + const rspamd_ftok_t *expires_hdr, *etag_hdr; + char next_check_date[128]; + guchar *in = NULL; + gsize dlen = 0; + + map = cbd->map; + bk = cbd->bk; + data = bk->data.hd; + + if (msg->code == 200) { + + if (cbd->check) { + msg_info_map ("need to reread map from %s", cbd->bk->uri); + cbd->periodic->need_modify = TRUE; + /* Reset the whole chain */ + cbd->periodic->cur_backend = 0; + /* Reset cache, old cached data will be cleaned on timeout */ + g_atomic_int_set (&data->cache->available, 0); + data->cur_cache_cbd = NULL; + + rspamd_map_process_periodic (cbd->periodic); + MAP_RELEASE (cbd, "http_callback_data"); + + return 0; + } + + cbd->data->last_checked = msg->date; + + if (msg->last_modified) { + cbd->data->last_modified = msg->last_modified; + } + else { + cbd->data->last_modified = msg->date; + } + + + /* Unsigned version - just open file */ + cbd->shmem_data = rspamd_http_message_shmem_ref (msg); + cbd->data_len = msg->body_buf.len; + + if (cbd->data_len == 0) { + msg_err_map ("cannot read empty map"); + goto err; + } + + g_assert (cbd->shmem_data != NULL); + + in = rspamd_shmem_xmap (cbd->shmem_data->shm_name, PROT_READ, &dlen); + + if (in == NULL) { + msg_err_map ("cannot read tempfile %s: %s", + cbd->shmem_data->shm_name, + strerror (errno)); + goto err; + } + + /* Check for expires */ + double cached_timeout = map->poll_timeout * 2; + + expires_hdr = rspamd_http_message_find_header (msg, "Expires"); + + if (expires_hdr) { + time_t hdate; + + hdate = rspamd_http_parse_date (expires_hdr->begin, expires_hdr->len); + + if (hdate != (time_t)-1 && hdate > msg->date) { + cached_timeout = map->next_check - msg->date + + map->poll_timeout * 2; + + map->next_check = hdate; + } + } + + /* Check for etag */ + etag_hdr = rspamd_http_message_find_header (msg, "ETag"); + + if (etag_hdr) { + if (cbd->data->etag) { + /* Remove old etag */ + rspamd_fstring_free (cbd->data->etag); + } + + cbd->data->etag = rspamd_fstring_new_init (etag_hdr->begin, + etag_hdr->len); + } + else { + if (cbd->data->etag) { + /* Remove and clear old etag */ + rspamd_fstring_free (cbd->data->etag); + cbd->data->etag = NULL; + } + } + + MAP_RETAIN (cbd->shmem_data, "shmem_data"); + cbd->data->gen ++; + /* + * We know that a map is in the locked state + */ + g_atomic_int_set (&data->cache->available, 1); + /* Store cached data */ + rspamd_strlcpy (data->cache->shmem_name, cbd->shmem_data->shm_name, + sizeof (data->cache->shmem_name)); + data->cache->len = cbd->data_len; + data->cache->last_modified = cbd->data->last_modified; + cache_cbd = g_malloc0 (sizeof (*cache_cbd)); + cache_cbd->shm = cbd->shmem_data; + cache_cbd->event_loop = cbd->event_loop; + cache_cbd->map = map; + cache_cbd->data = cbd->data; + cache_cbd->last_checked = cbd->data->last_checked; + cache_cbd->gen = cbd->data->gen; + MAP_RETAIN (cache_cbd->shm, "shmem_data"); + + ev_timer_init (&cache_cbd->timeout, rspamd_map_cache_cb, cached_timeout, + 0.0); + ev_timer_start (cbd->event_loop, &cache_cbd->timeout); + cache_cbd->timeout.data = cache_cbd; + data->cur_cache_cbd = cache_cbd; + + if (map->next_check) { + rspamd_http_date_format (next_check_date, sizeof (next_check_date), + map->next_check); + } + else { + rspamd_http_date_format (next_check_date, sizeof (next_check_date), + rspamd_get_calendar_ticks () + map->poll_timeout); + } + + + if (cbd->bk->is_compressed) { + ZSTD_DStream *zstream; + ZSTD_inBuffer zin; + ZSTD_outBuffer zout; + guchar *out; + gsize outlen, r; + + zstream = ZSTD_createDStream (); + ZSTD_initDStream (zstream); + + zin.pos = 0; + zin.src = in; + zin.size = dlen; + + if ((outlen = ZSTD_getDecompressedSize (zin.src, zin.size)) == 0) { + outlen = ZSTD_DStreamOutSize (); + } + + out = g_malloc (outlen); + + zout.dst = out; + zout.pos = 0; + zout.size = outlen; + + while (zin.pos < zin.size) { + r = ZSTD_decompressStream (zstream, &zout, &zin); + + if (ZSTD_isError (r)) { + msg_err_map ("%s(%s): cannot decompress data: %s", + cbd->bk->uri, + rspamd_inet_address_to_string_pretty (cbd->addr), + ZSTD_getErrorName (r)); + ZSTD_freeDStream (zstream); + g_free (out); + MAP_RELEASE (cbd->shmem_data, "shmem_data"); + goto err; + } + + if (zout.pos == zout.size) { + /* We need to extend output buffer */ + zout.size = zout.size * 2 + 1.0; + out = g_realloc (zout.dst, zout.size); + zout.dst = out; + } + } + + ZSTD_freeDStream (zstream); + msg_info_map ("%s(%s): read map data %z bytes compressed, " + "%z uncompressed, next check at %s", + cbd->bk->uri, + rspamd_inet_address_to_string_pretty (cbd->addr), + dlen, zout.pos, next_check_date); + map->read_callback (out, zout.pos, &cbd->periodic->cbdata, TRUE); + rspamd_map_save_http_cached_file (map, bk, cbd->data, out, zout.pos); + g_free (out); + } + else { + msg_info_map ("%s(%s): read map data %z bytes, next check at %s", + cbd->bk->uri, + rspamd_inet_address_to_string_pretty (cbd->addr), + dlen, next_check_date); + rspamd_map_save_http_cached_file (map, bk, cbd->data, in, cbd->data_len); + map->read_callback (in, cbd->data_len, &cbd->periodic->cbdata, TRUE); + } + + MAP_RELEASE (cbd->shmem_data, "shmem_data"); + + cbd->periodic->cur_backend ++; + munmap (in, dlen); + rspamd_map_process_periodic (cbd->periodic); + } + else if (msg->code == 304 && cbd->check) { + cbd->data->last_checked = msg->date; + + if (msg->last_modified) { + cbd->data->last_modified = msg->last_modified; + } + else { + cbd->data->last_modified = msg->date; + } + + expires_hdr = rspamd_http_message_find_header (msg, "Expires"); + + if (expires_hdr) { + time_t hdate; + + hdate = rspamd_http_parse_date (expires_hdr->begin, expires_hdr->len); + if (hdate != (time_t)-1 && hdate > msg->date) { + map->next_check = hdate; + } + } + + etag_hdr = rspamd_http_message_find_header (msg, "ETag"); + + if (etag_hdr) { + if (cbd->data->etag) { + /* Remove old etag */ + rspamd_fstring_free (cbd->data->etag); + cbd->data->etag = rspamd_fstring_new_init (etag_hdr->begin, + etag_hdr->len); + } + } + + if (map->next_check) { + rspamd_http_date_format (next_check_date, sizeof (next_check_date), + map->next_check); + msg_info_map ("data is not modified for server %s, next check at %s " + "(http cache based)", + cbd->data->host, next_check_date); + } + else { + rspamd_http_date_format (next_check_date, sizeof (next_check_date), + rspamd_get_calendar_ticks () + map->poll_timeout); + msg_info_map ("data is not modified for server %s, next check at %s " + "(timer based)", + cbd->data->host, next_check_date); + } + + rspamd_map_update_http_cached_file (map, bk, cbd->data); + cbd->periodic->cur_backend ++; + rspamd_map_process_periodic (cbd->periodic); + } + else { + msg_info_map ("cannot load map %s from %s: HTTP error %d", + bk->uri, cbd->data->host, msg->code); + goto err; + } + + MAP_RELEASE (cbd, "http_callback_data"); + return 0; + +err: + cbd->periodic->errored = 1; + rspamd_map_process_periodic (cbd->periodic); + MAP_RELEASE (cbd, "http_callback_data"); + + return 0; +} + +static gboolean +read_map_file_chunks (struct rspamd_map *map, struct map_cb_data *cbdata, + const gchar *fname, gsize len, goffset off) +{ + gint fd; + gssize r, avail; + gsize buflen = 1024 * 1024; + gchar *pos, *bytes; + + fd = rspamd_file_xopen (fname, O_RDONLY, 0, TRUE); + + if (fd == -1) { + msg_err_map ("can't open map for buffered reading %s: %s", + fname, strerror (errno)); + return FALSE; + } + + if (lseek (fd, off, SEEK_SET) == -1) { + msg_err_map ("can't seek in map to pos %d for buffered reading %s: %s", + (gint)off, fname, strerror (errno)); + return FALSE; + } + + buflen = MIN (len, buflen); + bytes = g_malloc (buflen); + avail = buflen; + pos = bytes; + + while ((r = read (fd, pos, avail)) > 0) { + gchar *end = bytes + (pos - bytes) + r; + msg_debug_map ("%s: read map chunk, %z bytes", fname, + r); + pos = map->read_callback (bytes, end - bytes, cbdata, r == len); + + if (pos && pos > bytes && pos < end) { + guint remain = end - pos; + + memmove (bytes, pos, remain); + pos = bytes + remain; + /* Need to preserve the remain */ + avail = ((gssize)buflen) - remain; + + if (avail <= 0) { + /* Try realloc, too large element */ + g_assert (buflen >= remain); + bytes = g_realloc (bytes, buflen * 2); + + pos = bytes + remain; /* Adjust */ + avail += buflen; + buflen *= 2; + } + } + else { + avail = buflen; + pos = bytes; + } + + len -= r; + } + + if (r == -1) { + msg_err_map ("can't read from map %s: %s", fname, strerror (errno)); + close (fd); + g_free (bytes); + + return FALSE; + } + + close (fd); + g_free (bytes); + + return TRUE; +} + +static gboolean +rspamd_map_check_sig_pk_mem (const guchar *sig, + gsize siglen, + struct rspamd_map *map, + const guchar *input, + gsize inlen, + struct rspamd_cryptobox_pubkey *pk) +{ + GString *b32_key; + gboolean ret = TRUE; + + if (siglen != rspamd_cryptobox_signature_bytes (RSPAMD_CRYPTOBOX_MODE_25519)) { + msg_err_map ("can't open signature for %s: invalid size: %z", map->name, siglen); + + ret = FALSE; + } + + if (ret && !rspamd_cryptobox_verify (sig, siglen, input, inlen, + rspamd_pubkey_get_pk (pk, NULL), RSPAMD_CRYPTOBOX_MODE_25519)) { + msg_err_map ("can't verify signature for %s: incorrect signature", map->name); + + ret = FALSE; + } + + if (ret) { + b32_key = rspamd_pubkey_print (pk, + RSPAMD_KEYPAIR_BASE32 | RSPAMD_KEYPAIR_PUBKEY); + msg_info_map ("verified signature for %s using trusted key %v", + map->name, b32_key); + g_string_free (b32_key, TRUE); + } + + return ret; +} + +static gboolean +rspamd_map_check_file_sig (const char *fname, + struct rspamd_map *map, + struct rspamd_map_backend *bk, + const guchar *input, + gsize inlen) { + guchar *data; + struct rspamd_cryptobox_pubkey *pk = NULL; + GString *b32_key; + gboolean ret = TRUE; + gsize len = 0; + gchar fpath[PATH_MAX]; + + if (bk->trusted_pubkey == NULL) { + /* Try to load and check pubkey */ + rspamd_snprintf (fpath, sizeof (fpath), "%s.pub", fname); + data = rspamd_file_xmap (fpath, PROT_READ, &len, TRUE); + + if (data == NULL) { + msg_err_map ("can't open pubkey %s: %s", fpath, strerror (errno)); + return FALSE; + } + + pk = rspamd_pubkey_from_base32 (data, len, RSPAMD_KEYPAIR_SIGN, + RSPAMD_CRYPTOBOX_MODE_25519); + munmap (data, len); + + if (pk == NULL) { + msg_err_map ("can't load pubkey %s", fpath); + return FALSE; + } + + /* We just check pk against the trusted db of keys */ + b32_key = rspamd_pubkey_print (pk, + RSPAMD_KEYPAIR_BASE32 | RSPAMD_KEYPAIR_PUBKEY); + g_assert (b32_key != NULL); + + if (g_hash_table_lookup (map->cfg->trusted_keys, b32_key->str) == NULL) { + msg_err_map ("pubkey loaded from %s is untrusted: %v", fpath, + b32_key); + g_string_free (b32_key, TRUE); + rspamd_pubkey_unref (pk); + + return FALSE; + } + + g_string_free (b32_key, TRUE); + } + else { + pk = rspamd_pubkey_ref (bk->trusted_pubkey); + } + + rspamd_snprintf (fpath, sizeof (fpath), "%s.sig", fname); + data = rspamd_shmem_xmap (fpath, PROT_READ, &len); + + if (data == NULL) { + msg_err_map ("can't open signature %s: %s", fpath, strerror (errno)); + ret = FALSE; + } + + if (ret) { + ret = rspamd_map_check_sig_pk_mem (data, len, map, input, inlen, pk); + munmap (data, len); + } + + rspamd_pubkey_unref (pk); + + return ret; +} + +/** + * Callback for reading data from file + */ +static gboolean +read_map_file (struct rspamd_map *map, struct file_map_data *data, + struct rspamd_map_backend *bk, struct map_periodic_cbdata *periodic) +{ + gchar *bytes; + gsize len; + struct stat st; + + if (map->read_callback == NULL || map->fin_callback == NULL) { + msg_err_map ("%s: bad callback for reading map file", + data->filename); + return FALSE; + } + + if (stat (data->filename, &st) == -1) { + /* File does not exist, skipping */ + if (errno != ENOENT) { + msg_err_map ("%s: map file is unavailable for reading: %s", + data->filename, strerror (errno)); + + return FALSE; + } + else { + msg_info_map ("%s: map file is not found; " + "it will be read automatically if created", + data->filename); + return TRUE; + } + } + + ev_stat_stat (map->event_loop, &data->st_ev); + len = st.st_size; + + if (bk->is_signed) { + bytes = rspamd_file_xmap (data->filename, PROT_READ, &len, TRUE); + + if (bytes == NULL) { + msg_err_map ("can't open map %s: %s", data->filename, strerror (errno)); + return FALSE; + } + + if (!rspamd_map_check_file_sig (data->filename, map, bk, bytes, len)) { + munmap (bytes, len); + + return FALSE; + } + + munmap (bytes, len); + } + + if (len > 0) { + if (bk->is_compressed) { + bytes = rspamd_file_xmap (data->filename, PROT_READ, &len, TRUE); + + if (bytes == NULL) { + msg_err_map ("can't open map %s: %s", data->filename, strerror (errno)); + return FALSE; + } + + ZSTD_DStream *zstream; + ZSTD_inBuffer zin; + ZSTD_outBuffer zout; + guchar *out; + gsize outlen, r; + + zstream = ZSTD_createDStream (); + ZSTD_initDStream (zstream); + + zin.pos = 0; + zin.src = bytes; + zin.size = len; + + if ((outlen = ZSTD_getDecompressedSize (zin.src, zin.size)) == 0) { + outlen = ZSTD_DStreamOutSize (); + } + + out = g_malloc (outlen); + + zout.dst = out; + zout.pos = 0; + zout.size = outlen; + + while (zin.pos < zin.size) { + r = ZSTD_decompressStream (zstream, &zout, &zin); + + if (ZSTD_isError (r)) { + msg_err_map ("%s: cannot decompress data: %s", + data->filename, + ZSTD_getErrorName (r)); + ZSTD_freeDStream (zstream); + g_free (out); + munmap (bytes, len); + return FALSE; + } + + if (zout.pos == zout.size) { + /* We need to extend output buffer */ + zout.size = zout.size * 2 + 1; + out = g_realloc (zout.dst, zout.size); + zout.dst = out; + } + } + + ZSTD_freeDStream (zstream); + msg_info_map ("%s: read map data, %z bytes compressed, " + "%z uncompressed)", data->filename, + len, zout.pos); + map->read_callback (out, zout.pos, &periodic->cbdata, TRUE); + g_free (out); + + munmap (bytes, len); + } + else { + /* Perform buffered read: fail-safe */ + if (!read_map_file_chunks (map, &periodic->cbdata, data->filename, + len, 0)) { + return FALSE; + } + } + } + else { + /* Empty map */ + map->read_callback (NULL, 0, &periodic->cbdata, TRUE); + } + + return TRUE; +} + +static gboolean +read_map_static (struct rspamd_map *map, struct static_map_data *data, + struct rspamd_map_backend *bk, struct map_periodic_cbdata *periodic) +{ + guchar *bytes; + gsize len; + + if (map->read_callback == NULL || map->fin_callback == NULL) { + msg_err_map ("%s: bad callback for reading map file", map->name); + data->processed = TRUE; + return FALSE; + } + + bytes = data->data; + len = data->len; + + if (len > 0) { + if (bk->is_compressed) { + ZSTD_DStream *zstream; + ZSTD_inBuffer zin; + ZSTD_outBuffer zout; + guchar *out; + gsize outlen, r; + + zstream = ZSTD_createDStream (); + ZSTD_initDStream (zstream); + + zin.pos = 0; + zin.src = bytes; + zin.size = len; + + if ((outlen = ZSTD_getDecompressedSize (zin.src, zin.size)) == 0) { + outlen = ZSTD_DStreamOutSize (); + } + + out = g_malloc (outlen); + + zout.dst = out; + zout.pos = 0; + zout.size = outlen; + + while (zin.pos < zin.size) { + r = ZSTD_decompressStream (zstream, &zout, &zin); + + if (ZSTD_isError (r)) { + msg_err_map ("%s: cannot decompress data: %s", + map->name, + ZSTD_getErrorName (r)); + ZSTD_freeDStream (zstream); + g_free (out); + + return FALSE; + } + + if (zout.pos == zout.size) { + /* We need to extend output buffer */ + zout.size = zout.size * 2 + 1; + out = g_realloc (zout.dst, zout.size); + zout.dst = out; + } + } + + ZSTD_freeDStream (zstream); + msg_info_map ("%s: read map data, %z bytes compressed, " + "%z uncompressed)", + map->name, + len, zout.pos); + map->read_callback (out, zout.pos, &periodic->cbdata, TRUE); + g_free (out); + } + else { + msg_info_map ("%s: read map data, %z bytes", + map->name, len); + map->read_callback (bytes, len, &periodic->cbdata, TRUE); + } + } + else { + map->read_callback (NULL, 0, &periodic->cbdata, TRUE); + } + + data->processed = TRUE; + + return TRUE; +} + +static void +rspamd_map_periodic_dtor (struct map_periodic_cbdata *periodic) +{ + struct rspamd_map *map; + + map = periodic->map; + msg_debug_map ("periodic dtor %p", periodic); + + if (periodic->need_modify) { + /* We are done */ + periodic->map->fin_callback (&periodic->cbdata, periodic->map->user_data); + } + else { + /* Not modified */ + } + + if (periodic->locked) { + g_atomic_int_set (periodic->map->locked, 0); + msg_debug_map ("unlocked map %s", periodic->map->name); + + if (periodic->map->wrk->state == rspamd_worker_state_running) { + rspamd_map_schedule_periodic (periodic->map, + RSPAMD_SYMBOL_RESULT_NORMAL); + } + else { + msg_debug_map ("stop scheduling periodics for %s; terminating state", + periodic->map->name); + } + } + + g_free (periodic); +} + +/* Called on timer execution */ +static void +rspamd_map_periodic_callback (struct ev_loop *loop, ev_timer *w, int revents) +{ + struct map_periodic_cbdata *cbd = (struct map_periodic_cbdata *)w->data; + + MAP_RETAIN (cbd, "periodic"); + ev_timer_stop (loop, w); + rspamd_map_process_periodic (cbd); + MAP_RELEASE (cbd, "periodic"); +} + +static void +rspamd_map_schedule_periodic (struct rspamd_map *map, int how) +{ + const gdouble error_mult = 20.0, lock_mult = 0.1; + static const gdouble min_timer_interval = 2.0; + const gchar *reason = "unknown reason"; + gdouble jittered_sec; + gdouble timeout; + struct map_periodic_cbdata *cbd; + + if (map->scheduled_check || (map->wrk && + map->wrk->state != rspamd_worker_state_running)) { + /* + * Do not schedule check if some check is already scheduled or + * if worker is going to die + */ + return; + } + + if (!(how & RSPAMD_MAP_SCHEDULE_INIT) && map->static_only) { + /* No need to schedule anything for static maps */ + return; + } + + if (map->non_trivial && map->next_check != 0) { + timeout = map->next_check - rspamd_get_calendar_ticks (); + + if (timeout > 0 && timeout < map->poll_timeout) { + /* Early check case, jitter */ + gdouble poll_timeout = map->poll_timeout; + + if (how & RSPAMD_MAP_SCHEDULE_ERROR) { + poll_timeout = map->poll_timeout * error_mult; + reason = "early active non-trivial check (after error)"; + } + else if (how & RSPAMD_MAP_SCHEDULE_LOCKED) { + poll_timeout = map->poll_timeout * lock_mult; + reason = "early active non-trivial check (after being locked)"; + } + else { + reason = "early active non-trivial check"; + } + + jittered_sec = MIN (timeout, poll_timeout); + + } + else if (timeout <= 0) { + /* Data is already expired, need to check */ + jittered_sec = 0.0; + reason = "expired non-trivial data"; + } + else { + /* No need to check now, wait till next_check */ + jittered_sec = timeout; + reason = "valid non-trivial data"; + } + } + else { + timeout = map->poll_timeout; + + if (how & RSPAMD_MAP_SCHEDULE_INIT) { + timeout = 0.0; + reason = "init scheduled check"; + } + else { + if (how & RSPAMD_MAP_SCHEDULE_ERROR) { + timeout = map->poll_timeout * error_mult; + reason = "errored scheduled check"; + } + else if (how & RSPAMD_MAP_SCHEDULE_LOCKED) { + timeout = map->poll_timeout * lock_mult; + reason = "locked scheduled check"; + } + else { + reason = "normal scheduled check"; + } + } + + jittered_sec = rspamd_time_jitter (timeout, 0); + } + + /* Now, we do some sanity checks for jittered seconds */ + if (!(how & RSPAMD_MAP_SCHEDULE_INIT)) { + /* Never allow too low interval between timer checks, it is epxensive */ + if (jittered_sec < min_timer_interval) { + jittered_sec = rspamd_time_jitter (min_timer_interval, 0); + } + + if (map->non_trivial) { + /* + * Even if we are reported that we need to reload cache often, we + * still want to be sane in terms of events... + */ + if (jittered_sec < min_timer_interval * 2.0) { + if (map->nelts > 0) { + jittered_sec = min_timer_interval * 3.0; + } + } + } + } + + cbd = g_malloc0 (sizeof (*cbd)); + cbd->cbdata.state = 0; + cbd->cbdata.prev_data = *map->user_data; + cbd->cbdata.cur_data = NULL; + cbd->cbdata.map = map; + cbd->map = map; + map->scheduled_check = cbd; + REF_INIT_RETAIN (cbd, rspamd_map_periodic_dtor); + + cbd->ev.data = cbd; + ev_timer_init (&cbd->ev, rspamd_map_periodic_callback, jittered_sec, 0.0); + ev_timer_start (map->event_loop, &cbd->ev); + + msg_debug_map ("schedule new periodic event %p in %.3f seconds for %s; reason: %s", + cbd, jittered_sec, map->name, reason); +} + +static void +rspamd_map_dns_callback (struct rdns_reply *reply, void *arg) +{ + struct http_callback_data *cbd = arg; + struct rdns_reply_entry *cur_rep; + struct rspamd_map *map; + guint flags = RSPAMD_HTTP_CLIENT_SIMPLE|RSPAMD_HTTP_CLIENT_SHARED; + + map = cbd->map; + + msg_debug_map ("got dns reply with code %s on stage %d", + rdns_strerror (reply->code), cbd->stage); + + if (cbd->stage == http_map_terminated) { + MAP_RELEASE (cbd, "http_callback_data"); + return; + } + + if (reply->code == RDNS_RC_NOERROR) { + /* + * We just get the first address hoping that a resolver performs + * round-robin rotation well + */ + + DL_FOREACH (reply->entries, cur_rep) { + rspamd_inet_addr_t *addr; + addr = rspamd_inet_address_from_rnds (reply->entries); + + if (addr != NULL) { + rspamd_inet_address_set_port (addr, cbd->data->port); + g_ptr_array_add (cbd->addrs, (void *)addr); + } + } + + if (cbd->stage == http_map_resolve_host2) { + /* We have still one request pending */ + cbd->stage = http_map_resolve_host1; + } + else if (cbd->stage == http_map_resolve_host1) { + cbd->stage = http_map_http_conn; + } + } + else if (cbd->stage < http_map_http_conn) { + if (cbd->stage == http_map_resolve_host2) { + /* We have still one request pending */ + cbd->stage = http_map_resolve_host1; + } + else if (cbd->addrs->len == 0) { + /* We could not resolve host, so cowardly fail here */ + msg_err_map ("cannot resolve %s: %s", cbd->data->host, + rdns_strerror (reply->code)); + cbd->periodic->errored = 1; + rspamd_map_process_periodic (cbd->periodic); + } + else { + /* We have at least one address, so we can continue... */ + cbd->stage = http_map_http_conn; + } + } + + if (cbd->stage == http_map_http_conn && cbd->addrs->len > 0) { + guint selected_addr_idx; + + selected_addr_idx = rspamd_random_uint64_fast () % cbd->addrs->len; + cbd->addr = (rspamd_inet_addr_t *)g_ptr_array_index (cbd->addrs, + selected_addr_idx); + + msg_debug_map ("open http connection to %s", + rspamd_inet_address_to_string_pretty (cbd->addr)); + cbd->conn = rspamd_http_connection_new_client (NULL, + NULL, + http_map_error, + http_map_finish, + flags, + cbd->addr); + + if (cbd->conn != NULL) { + write_http_request (cbd); + } + else { + cbd->periodic->errored = TRUE; + msg_err_map ("error reading %s(%s): " + "connection with http server terminated incorrectly: %s", + cbd->bk->uri, + cbd->addr ? rspamd_inet_address_to_string_pretty (cbd->addr) : "", + strerror (errno)); + + rspamd_map_process_periodic (cbd->periodic); + } + } + + MAP_RELEASE (cbd, "http_callback_data"); +} + +static gboolean +rspamd_map_read_cached (struct rspamd_map *map, struct rspamd_map_backend *bk, + struct map_periodic_cbdata *periodic, const gchar *host) +{ + gsize len; + gpointer in; + struct http_map_data *data; + + data = bk->data.hd; + + in = rspamd_shmem_xmap (data->cache->shmem_name, PROT_READ, &len); + + if (in == NULL) { + msg_err ("cannot map cache from %s: %s", data->cache->shmem_name, + strerror (errno)); + return FALSE; + } + + if (len < data->cache->len) { + msg_err ("cannot map cache from %s: bad length %z, %z expected", + data->cache->shmem_name, + len, data->cache->len); + munmap (in, len); + + return FALSE; + } + + if (bk->is_compressed) { + ZSTD_DStream *zstream; + ZSTD_inBuffer zin; + ZSTD_outBuffer zout; + guchar *out; + gsize outlen, r; + + zstream = ZSTD_createDStream (); + ZSTD_initDStream (zstream); + + zin.pos = 0; + zin.src = in; + zin.size = len; + + if ((outlen = ZSTD_getDecompressedSize (zin.src, zin.size)) == 0) { + outlen = ZSTD_DStreamOutSize (); + } + + out = g_malloc (outlen); + + zout.dst = out; + zout.pos = 0; + zout.size = outlen; + + while (zin.pos < zin.size) { + r = ZSTD_decompressStream (zstream, &zout, &zin); + + if (ZSTD_isError (r)) { + msg_err_map ("%s: cannot decompress data: %s", + bk->uri, + ZSTD_getErrorName (r)); + ZSTD_freeDStream (zstream); + g_free (out); + munmap (in, len); + return FALSE; + } + + if (zout.pos == zout.size) { + /* We need to extend output buffer */ + zout.size = zout.size * 2 + 1; + out = g_realloc (zout.dst, zout.size); + zout.dst = out; + } + } + + ZSTD_freeDStream (zstream); + msg_info_map ("%s: read map data cached %z bytes compressed, " + "%z uncompressed", bk->uri, + len, zout.pos); + map->read_callback (out, zout.pos, &periodic->cbdata, TRUE); + g_free (out); + } + else { + msg_info_map ("%s: read map data cached %z bytes", bk->uri, + len); + map->read_callback (in, len, &periodic->cbdata, TRUE); + } + + munmap (in, len); + + return TRUE; +} + +static gboolean +rspamd_map_has_http_cached_file (struct rspamd_map *map, + struct rspamd_map_backend *bk) +{ + gchar path[PATH_MAX]; + guchar digest[rspamd_cryptobox_HASHBYTES]; + struct rspamd_config *cfg = map->cfg; + struct stat st; + + if (cfg->maps_cache_dir == NULL || cfg->maps_cache_dir[0] == '\0') { + return FALSE; + } + + rspamd_cryptobox_hash (digest, bk->uri, strlen (bk->uri), NULL, 0); + rspamd_snprintf (path, sizeof (path), "%s%c%*xs.map", cfg->maps_cache_dir, + G_DIR_SEPARATOR, 20, digest); + + if (stat (path, &st) != -1 && st.st_size > + sizeof (struct rspamd_http_file_data)) { + return TRUE; + } + + return FALSE; +} + +static gboolean +rspamd_map_save_http_cached_file (struct rspamd_map *map, + struct rspamd_map_backend *bk, + struct http_map_data *htdata, + const guchar *data, + gsize len) +{ + gchar path[PATH_MAX]; + guchar digest[rspamd_cryptobox_HASHBYTES]; + struct rspamd_config *cfg = map->cfg; + gint fd; + struct rspamd_http_file_data header; + + if (cfg->maps_cache_dir == NULL || cfg->maps_cache_dir[0] == '\0') { + return FALSE; + } + + rspamd_cryptobox_hash (digest, bk->uri, strlen (bk->uri), NULL, 0); + rspamd_snprintf (path, sizeof (path), "%s%c%*xs.map", cfg->maps_cache_dir, + G_DIR_SEPARATOR, 20, digest); + + fd = rspamd_file_xopen (path, O_WRONLY | O_TRUNC | O_CREAT, + 00600, FALSE); + + if (fd == -1) { + return FALSE; + } + + if (!rspamd_file_lock (fd, FALSE)) { + msg_err_map ("cannot lock file %s: %s", path, strerror (errno)); + close (fd); + + return FALSE; + } + + memcpy (header.magic, rspamd_http_file_magic, sizeof (rspamd_http_file_magic)); + header.mtime = htdata->last_modified; + header.next_check = map->next_check; + header.data_off = sizeof (header); + + if (htdata->etag) { + header.data_off += RSPAMD_FSTRING_LEN (htdata->etag); + header.etag_len = RSPAMD_FSTRING_LEN (htdata->etag); + } + else { + header.etag_len = 0; + } + + if (write (fd, &header, sizeof (header)) != sizeof (header)) { + msg_err_map ("cannot write file %s (header stage): %s", path, strerror (errno)); + rspamd_file_unlock (fd, FALSE); + close (fd); + + return FALSE; + } + + if (header.etag_len > 0) { + if (write (fd, RSPAMD_FSTRING_DATA (htdata->etag), header.etag_len) != + header.etag_len) { + msg_err_map ("cannot write file %s (etag stage): %s", path, strerror (errno)); + rspamd_file_unlock (fd, FALSE); + close (fd); + + return FALSE; + } + } + + /* Now write the rest */ + if (write (fd, data, len) != len) { + msg_err_map ("cannot write file %s (data stage): %s", path, strerror (errno)); + rspamd_file_unlock (fd, FALSE); + close (fd); + + return FALSE; + } + + rspamd_file_unlock (fd, FALSE); + close (fd); + + msg_info_map ("saved data from %s in %s, %uz bytes", bk->uri, path, len + + sizeof (header) + header.etag_len); + + return TRUE; +} + +static gboolean +rspamd_map_update_http_cached_file (struct rspamd_map *map, + struct rspamd_map_backend *bk, + struct http_map_data *htdata) +{ + gchar path[PATH_MAX]; + guchar digest[rspamd_cryptobox_HASHBYTES]; + struct rspamd_config *cfg = map->cfg; + gint fd; + struct rspamd_http_file_data header; + + if (!rspamd_map_has_http_cached_file (map, bk)) { + return FALSE; + } + + rspamd_cryptobox_hash (digest, bk->uri, strlen (bk->uri), NULL, 0); + rspamd_snprintf (path, sizeof (path), "%s%c%*xs.map", cfg->maps_cache_dir, + G_DIR_SEPARATOR, 20, digest); + + fd = rspamd_file_xopen (path, O_WRONLY, + 00600, FALSE); + + if (fd == -1) { + return FALSE; + } + + if (!rspamd_file_lock (fd, FALSE)) { + msg_err_map ("cannot lock file %s: %s", path, strerror (errno)); + close (fd); + + return FALSE; + } + + memcpy (header.magic, rspamd_http_file_magic, sizeof (rspamd_http_file_magic)); + header.mtime = htdata->last_modified; + header.next_check = map->next_check; + header.data_off = sizeof (header); + + if (htdata->etag) { + header.data_off += RSPAMD_FSTRING_LEN (htdata->etag); + header.etag_len = RSPAMD_FSTRING_LEN (htdata->etag); + } + else { + header.etag_len = 0; + } + + if (write (fd, &header, sizeof (header)) != sizeof (header)) { + msg_err_map ("cannot update file %s (header stage): %s", path, strerror (errno)); + rspamd_file_unlock (fd, FALSE); + close (fd); + + return FALSE; + } + + if (header.etag_len > 0) { + if (write (fd, RSPAMD_FSTRING_DATA (htdata->etag), header.etag_len) != + header.etag_len) { + msg_err_map ("cannot update file %s (etag stage): %s", path, strerror (errno)); + rspamd_file_unlock (fd, FALSE); + close (fd); + + return FALSE; + } + } + + rspamd_file_unlock (fd, FALSE); + close (fd); + + return TRUE; +} + + +static gboolean +rspamd_map_read_http_cached_file (struct rspamd_map *map, + struct rspamd_map_backend *bk, + struct http_map_data *htdata, + struct map_cb_data *cbdata) +{ + gchar path[PATH_MAX]; + guchar digest[rspamd_cryptobox_HASHBYTES]; + struct rspamd_config *cfg = map->cfg; + gint fd; + struct stat st; + struct rspamd_http_file_data header; + + if (cfg->maps_cache_dir == NULL || cfg->maps_cache_dir[0] == '\0') { + return FALSE; + } + + rspamd_cryptobox_hash (digest, bk->uri, strlen (bk->uri), NULL, 0); + rspamd_snprintf (path, sizeof (path), "%s%c%*xs.map", cfg->maps_cache_dir, + G_DIR_SEPARATOR, 20, digest); + + fd = rspamd_file_xopen (path, O_RDONLY, 00600, FALSE); + + if (fd == -1) { + return FALSE; + } + + if (!rspamd_file_lock (fd, FALSE)) { + msg_err_map ("cannot lock file %s: %s", path, strerror (errno)); + close (fd); + + return FALSE; + } + + (void)fstat (fd, &st); + + if (read (fd, &header, sizeof (header)) != sizeof (header)) { + msg_err_map ("cannot read file %s (header stage): %s", path, strerror (errno)); + rspamd_file_unlock (fd, FALSE); + close (fd); + + return FALSE; + } + + if (memcmp (header.magic, rspamd_http_file_magic, + sizeof (rspamd_http_file_magic)) != 0) { + msg_warn_map ("invalid or old version magic in file %s; ignore it", path); + rspamd_file_unlock (fd, FALSE); + close (fd); + + return FALSE; + } + + map->next_check = header.next_check; + htdata->last_modified = header.mtime; + + if (header.etag_len > 0) { + rspamd_fstring_t *etag = rspamd_fstring_sized_new (header.etag_len); + + if (read (fd, RSPAMD_FSTRING_DATA (etag), header.etag_len) != header.etag_len) { + msg_err_map ("cannot read file %s (etag stage): %s", path, + strerror (errno)); + rspamd_file_unlock (fd, FALSE); + rspamd_fstring_free (etag); + close (fd); + + return FALSE; + } + + etag->len = header.etag_len; + + if (htdata->etag) { + /* FIXME: should be dealt somehow better */ + msg_warn_map ("etag is already defined as %V; cached is %V; ignore cached", + htdata->etag, etag); + rspamd_fstring_free (etag); + } + else { + htdata->etag = etag; + } + } + + rspamd_file_unlock (fd, FALSE); + close (fd); + + /* Now read file data */ + /* Perform buffered read: fail-safe */ + if (!read_map_file_chunks (map, cbdata, path, + st.st_size - header.data_off, header.data_off)) { + return FALSE; + } + + struct tm tm; + gchar ncheck_buf[32], lm_buf[32]; + + rspamd_localtime (map->next_check, &tm); + strftime (ncheck_buf, sizeof (ncheck_buf) - 1, "%Y-%m-%d %H:%M:%S", &tm); + rspamd_localtime (htdata->last_modified, &tm); + strftime (lm_buf, sizeof (lm_buf) - 1, "%Y-%m-%d %H:%M:%S", &tm); + + msg_info_map ("read cached data for %s from %s, %uz bytes; next check at: %s;" + " last modified on: %s; etag: %V", + bk->uri, + path, + (size_t)(st.st_size - header.data_off), + ncheck_buf, + lm_buf, + htdata->etag); + + return TRUE; +} + +/** + * Async HTTP callback + */ +static void +rspamd_map_common_http_callback (struct rspamd_map *map, + struct rspamd_map_backend *bk, + struct map_periodic_cbdata *periodic, + gboolean check) +{ + struct http_map_data *data; + struct http_callback_data *cbd; + guint flags = RSPAMD_HTTP_CLIENT_SIMPLE|RSPAMD_HTTP_CLIENT_SHARED; + + data = bk->data.hd; + + if (g_atomic_int_get (&data->cache->available) == 1) { + /* Read cached data */ + if (check) { + if (data->last_modified < data->cache->last_modified) { + periodic->need_modify = TRUE; + /* Reset the whole chain */ + periodic->cur_backend = 0; + rspamd_map_process_periodic (periodic); + } + else { + if (map->active_http) { + /* Check even if there is a cached version */ + goto check; + } + else { + /* Switch to the next backend */ + periodic->cur_backend++; + rspamd_map_process_periodic (periodic); + } + } + + return; + } + else { + if (map->active_http && + data->last_modified > data->cache->last_modified) { + goto check; + } + else if (rspamd_map_read_cached (map, bk, periodic, data->host)) { + /* Switch to the next backend */ + periodic->cur_backend++; + data->last_modified = data->cache->last_modified; + rspamd_map_process_periodic (periodic); + + return; + } + } + } + else if (!map->active_http) { + /* Switch to the next backend */ + periodic->cur_backend ++; + rspamd_map_process_periodic (periodic); + + return; + } + +check: + cbd = g_malloc0 (sizeof (struct http_callback_data)); + + cbd->event_loop = map->event_loop; + cbd->addrs = g_ptr_array_sized_new (4); + cbd->map = map; + cbd->data = data; + cbd->check = check; + cbd->periodic = periodic; + MAP_RETAIN (periodic, "periodic"); + cbd->bk = bk; + MAP_RETAIN (bk, "rspamd_map_backend"); + cbd->stage = http_map_terminated; + REF_INIT_RETAIN (cbd, free_http_cbdata); + + msg_debug_map ("%s map data from %s", check ? "checking" : "reading", + data->host); + + /* Try address */ + rspamd_inet_addr_t *addr = NULL; + + if (rspamd_parse_inet_address (&addr, data->host, + strlen (data->host), RSPAMD_INET_ADDRESS_PARSE_DEFAULT)) { + rspamd_inet_address_set_port (addr, cbd->data->port); + g_ptr_array_add (cbd->addrs, (void *)addr); + cbd->conn = rspamd_http_connection_new_client ( + NULL, + NULL, + http_map_error, + http_map_finish, + flags, + addr); + + if (cbd->conn != NULL) { + cbd->stage = http_map_http_conn; + write_http_request (cbd); + cbd->addr = addr; + MAP_RELEASE (cbd, "http_callback_data"); + } + else { + msg_warn_map ("cannot load map: cannot connect to %s: %s", + data->host, strerror (errno)); + MAP_RELEASE (cbd, "http_callback_data"); + } + + return; + } + else if (map->r->r) { + /* Send both A and AAAA requests */ + guint nreq = 0; + + if (rdns_make_request_full (map->r->r, rspamd_map_dns_callback, cbd, + map->cfg->dns_timeout, map->cfg->dns_retransmits, 1, + data->host, RDNS_REQUEST_A)) { + MAP_RETAIN (cbd, "http_callback_data"); + nreq ++; + } + if (rdns_make_request_full (map->r->r, rspamd_map_dns_callback, cbd, + map->cfg->dns_timeout, map->cfg->dns_retransmits, 1, + data->host, RDNS_REQUEST_AAAA)) { + MAP_RETAIN (cbd, "http_callback_data"); + nreq ++; + } + + if (nreq == 2) { + cbd->stage = http_map_resolve_host2; + } + else if (nreq == 1) { + cbd->stage = http_map_resolve_host1; + } + + map->tmp_dtor = free_http_cbdata_dtor; + map->tmp_dtor_data = cbd; + } + else { + msg_warn_map ("cannot load map: DNS resolver is not initialized"); + cbd->periodic->errored = TRUE; + } + + MAP_RELEASE (cbd, "http_callback_data"); +} + +static void +rspamd_map_http_check_callback (struct map_periodic_cbdata *cbd) +{ + struct rspamd_map *map; + struct rspamd_map_backend *bk; + + map = cbd->map; + bk = g_ptr_array_index (cbd->map->backends, cbd->cur_backend); + + rspamd_map_common_http_callback (map, bk, cbd, TRUE); +} + +static void +rspamd_map_http_read_callback (struct map_periodic_cbdata *cbd) +{ + struct rspamd_map *map; + struct rspamd_map_backend *bk; + + map = cbd->map; + bk = g_ptr_array_index (cbd->map->backends, cbd->cur_backend); + rspamd_map_common_http_callback (map, bk, cbd, FALSE); +} + +static void +rspamd_map_file_check_callback (struct map_periodic_cbdata *periodic) +{ + struct rspamd_map *map; + struct file_map_data *data; + struct rspamd_map_backend *bk; + + map = periodic->map; + bk = g_ptr_array_index (map->backends, periodic->cur_backend); + data = bk->data.fd; + + if (data->need_modify) { + periodic->need_modify = TRUE; + periodic->cur_backend = 0; + data->need_modify = FALSE; + + rspamd_map_process_periodic (periodic); + + return; + } + + map = periodic->map; + /* Switch to the next backend as the rest is handled by ev_stat */ + periodic->cur_backend ++; + rspamd_map_process_periodic (periodic); +} + +static void +rspamd_map_static_check_callback (struct map_periodic_cbdata *periodic) +{ + struct rspamd_map *map; + struct static_map_data *data; + struct rspamd_map_backend *bk; + + map = periodic->map; + bk = g_ptr_array_index (map->backends, periodic->cur_backend); + data = bk->data.sd; + + if (!data->processed) { + periodic->need_modify = TRUE; + periodic->cur_backend = 0; + + rspamd_map_process_periodic (periodic); + + return; + } + + /* Switch to the next backend */ + periodic->cur_backend ++; + rspamd_map_process_periodic (periodic); +} + +static void +rspamd_map_file_read_callback (struct map_periodic_cbdata *periodic) +{ + struct rspamd_map *map; + struct file_map_data *data; + struct rspamd_map_backend *bk; + + map = periodic->map; + + bk = g_ptr_array_index (map->backends, periodic->cur_backend); + data = bk->data.fd; + + msg_info_map ("rereading map file %s", data->filename); + + if (!read_map_file (map, data, bk, periodic)) { + periodic->errored = TRUE; + } + + /* Switch to the next backend */ + periodic->cur_backend ++; + rspamd_map_process_periodic (periodic); +} + +static void +rspamd_map_static_read_callback (struct map_periodic_cbdata *periodic) +{ + struct rspamd_map *map; + struct static_map_data *data; + struct rspamd_map_backend *bk; + + map = periodic->map; + + bk = g_ptr_array_index (map->backends, periodic->cur_backend); + data = bk->data.sd; + + msg_info_map ("rereading static map"); + + if (!read_map_static (map, data, bk, periodic)) { + periodic->errored = TRUE; + } + + /* Switch to the next backend */ + periodic->cur_backend ++; + rspamd_map_process_periodic (periodic); +} + +static void +rspamd_map_process_periodic (struct map_periodic_cbdata *cbd) +{ + struct rspamd_map_backend *bk; + struct rspamd_map *map; + + map = cbd->map; + map->scheduled_check = NULL; + + if (!map->file_only && !cbd->locked) { + if (!g_atomic_int_compare_and_exchange (cbd->map->locked, + 0, 1)) { + msg_debug_map ( + "don't try to reread map %s as it is locked by other process, " + "will reread it later", cbd->map->name); + rspamd_map_schedule_periodic (map, RSPAMD_MAP_SCHEDULE_LOCKED); + MAP_RELEASE (cbd, "periodic"); + + return; + } + else { + msg_debug_map ("locked map %s", cbd->map->name); + cbd->locked = TRUE; + } + } + + if (cbd->errored) { + /* We should not check other backends if some backend has failed */ + rspamd_map_schedule_periodic (cbd->map, RSPAMD_MAP_SCHEDULE_ERROR); + + if (cbd->locked) { + g_atomic_int_set (cbd->map->locked, 0); + cbd->locked = FALSE; + } + + msg_debug_map ("unlocked map %s, refcount=%d", cbd->map->name, + cbd->ref.refcount); + MAP_RELEASE (cbd, "periodic"); + + return; + } + + /* For each backend we need to check for modifications */ + if (cbd->cur_backend >= cbd->map->backends->len) { + /* Last backend */ + msg_debug_map ("finished map: %d of %d", cbd->cur_backend, + cbd->map->backends->len); + MAP_RELEASE (cbd, "periodic"); + + return; + } + + if (cbd->map->wrk && cbd->map->wrk->state == rspamd_worker_state_running) { + bk = g_ptr_array_index (cbd->map->backends, cbd->cur_backend); + g_assert (bk != NULL); + + if (cbd->need_modify) { + /* Load data from the next backend */ + switch (bk->protocol) { + case MAP_PROTO_HTTP: + case MAP_PROTO_HTTPS: + rspamd_map_http_read_callback (cbd); + break; + case MAP_PROTO_FILE: + rspamd_map_file_read_callback (cbd); + break; + case MAP_PROTO_STATIC: + rspamd_map_static_read_callback (cbd); + break; + } + } else { + /* Check the next backend */ + switch (bk->protocol) { + case MAP_PROTO_HTTP: + case MAP_PROTO_HTTPS: + rspamd_map_http_check_callback (cbd); + break; + case MAP_PROTO_FILE: + rspamd_map_file_check_callback (cbd); + break; + case MAP_PROTO_STATIC: + rspamd_map_static_check_callback (cbd); + break; + } + } + } +} + +static void +rspamd_map_on_stat (struct ev_loop *loop, ev_stat *w, int revents) +{ + struct rspamd_map *map = (struct rspamd_map *)w->data; + + if (w->attr.st_nlink > 0) { + + if (w->attr.st_mtime > w->prev.st_mtime) { + msg_info_map ("old mtime is %t (size = %Hz), " + "new mtime is %t (size = %Hz) for map file %s", + w->prev.st_mtime, (gsize)w->prev.st_size, + w->attr.st_mtime, (gsize)w->attr.st_size, + w->path); + + /* Fire need modify flag */ + struct rspamd_map_backend *bk; + guint i; + + PTR_ARRAY_FOREACH (map->backends, i, bk) { + if (bk->protocol == MAP_PROTO_FILE) { + bk->data.fd->need_modify = TRUE; + } + } + + map->next_check = 0; + + if (map->scheduled_check) { + ev_timer_stop (map->event_loop, &map->scheduled_check->ev); + MAP_RELEASE (map->scheduled_check, "rspamd_map_on_stat"); + map->scheduled_check = NULL; + } + + rspamd_map_schedule_periodic (map, RSPAMD_MAP_SCHEDULE_INIT); + } + } +} + +/* Start watching event for all maps */ +void +rspamd_map_watch (struct rspamd_config *cfg, + struct ev_loop *event_loop, + struct rspamd_dns_resolver *resolver, + struct rspamd_worker *worker, + enum rspamd_map_watch_type how) +{ + GList *cur = cfg->maps; + struct rspamd_map *map; + struct rspamd_map_backend *bk; + guint i; + + g_assert (how > RSPAMD_MAP_WATCH_MIN && how < RSPAMD_MAP_WATCH_MAX); + + /* First of all do synced read of data */ + while (cur) { + map = cur->data; + map->event_loop = event_loop; + map->r = resolver; + + if (map->wrk == NULL && how != RSPAMD_MAP_WATCH_WORKER) { + /* Generic scanner map */ + map->wrk = worker; + + if (how == RSPAMD_MAP_WATCH_PRIMARY_CONTROLLER) { + map->active_http = TRUE; + } + else { + map->active_http = FALSE; + } + } + else if (map->wrk != NULL && map->wrk == worker) { + /* Map is bound to a specific worker */ + map->active_http = TRUE; + } + else { + /* Skip map for this worker as irrelevant */ + cur = g_list_next (cur); + continue; + } + + if (!map->active_http) { + /* Check cached version more frequently as it is cheap */ + + if (map->poll_timeout >= cfg->map_timeout && + cfg->map_file_watch_multiplier < 1.0) { + map->poll_timeout = + map->poll_timeout * cfg->map_file_watch_multiplier; + } + } + + map->file_only = TRUE; + map->static_only = TRUE; + + PTR_ARRAY_FOREACH (map->backends, i, bk) { + bk->event_loop = event_loop; + + if (bk->protocol == MAP_PROTO_FILE) { + struct file_map_data *data; + + data = bk->data.fd; + + ev_stat_init (&data->st_ev, rspamd_map_on_stat, + data->filename, map->poll_timeout * cfg->map_file_watch_multiplier); + data->st_ev.data = map; + ev_stat_start (event_loop, &data->st_ev); + map->static_only = FALSE; + } + else if ((bk->protocol == MAP_PROTO_HTTP || + bk->protocol == MAP_PROTO_HTTPS)) { + if (map->active_http) { + map->non_trivial = TRUE; + } + + map->static_only = FALSE; + map->file_only = FALSE; + } + } + + rspamd_map_schedule_periodic (map, RSPAMD_MAP_SCHEDULE_INIT); + + cur = g_list_next (cur); + } +} + +void +rspamd_map_preload (struct rspamd_config *cfg) +{ + GList *cur = cfg->maps; + struct rspamd_map *map; + struct rspamd_map_backend *bk; + guint i; + gboolean map_ok; + + /* First of all do synced read of data */ + while (cur) { + map = cur->data; + map_ok = TRUE; + + PTR_ARRAY_FOREACH (map->backends, i, bk) { + if (!(bk->protocol == MAP_PROTO_FILE || + bk->protocol == MAP_PROTO_STATIC)) { + + if (bk->protocol == MAP_PROTO_HTTP || + bk->protocol == MAP_PROTO_HTTPS) { + if (!rspamd_map_has_http_cached_file (map, bk)) { + + if (!map->fallback_backend) { + map_ok = FALSE; + } + break; + } + else { + continue; /* We are yet fine */ + } + } + map_ok = FALSE; + break; + } + } + + if (map_ok) { + struct map_periodic_cbdata fake_cbd; + gboolean succeed = TRUE; + + memset (&fake_cbd, 0, sizeof (fake_cbd)); + fake_cbd.cbdata.state = 0; + fake_cbd.cbdata.prev_data = *map->user_data; + fake_cbd.cbdata.cur_data = NULL; + fake_cbd.cbdata.map = map; + fake_cbd.map = map; + + PTR_ARRAY_FOREACH (map->backends, i, bk) { + fake_cbd.cur_backend = i; + + if (bk->protocol == MAP_PROTO_FILE) { + if (!read_map_file (map, bk->data.fd, bk, &fake_cbd)) { + succeed = FALSE; + break; + } + } + else if (bk->protocol == MAP_PROTO_STATIC) { + if (!read_map_static (map, bk->data.sd, bk, &fake_cbd)) { + succeed = FALSE; + break; + } + } + else if (bk->protocol == MAP_PROTO_HTTP || + bk->protocol == MAP_PROTO_HTTPS) { + if (!rspamd_map_read_http_cached_file (map, bk, bk->data.hd, + &fake_cbd.cbdata)) { + + if (map->fallback_backend) { + /* Try fallback */ + g_assert (map->fallback_backend->protocol == + MAP_PROTO_FILE); + if (!read_map_file (map, + map->fallback_backend->data.fd, + map->fallback_backend, &fake_cbd)) { + succeed = FALSE; + break; + } + } + else { + succeed = FALSE; + break; + } + } + } + else { + g_assert_not_reached (); + } + } + + if (succeed) { + map->fin_callback (&fake_cbd.cbdata, map->user_data); + } + else { + msg_info_map ("preload of %s failed", map->name); + } + + } + + cur = g_list_next (cur); + } +} + +void +rspamd_map_remove_all (struct rspamd_config *cfg) +{ + struct rspamd_map *map; + GList *cur; + struct rspamd_map_backend *bk; + struct map_cb_data cbdata; + guint i; + + for (cur = cfg->maps; cur != NULL; cur = g_list_next (cur)) { + map = cur->data; + + if (map->tmp_dtor) { + map->tmp_dtor (map->tmp_dtor_data); + } + + if (map->dtor) { + cbdata.prev_data = NULL; + cbdata.map = map; + cbdata.cur_data = *map->user_data; + + map->dtor (&cbdata); + *map->user_data = NULL; + } + + for (i = 0; i < map->backends->len; i ++) { + bk = g_ptr_array_index (map->backends, i); + + MAP_RELEASE (bk, "rspamd_map_backend"); + } + + if (map->fallback_backend) { + MAP_RELEASE (map->fallback_backend, "rspamd_map_backend"); + } + } + + g_list_free (cfg->maps); + cfg->maps = NULL; +} + +static const gchar * +rspamd_map_check_proto (struct rspamd_config *cfg, + const gchar *map_line, struct rspamd_map_backend *bk) +{ + const gchar *pos = map_line, *end, *end_key; + + g_assert (bk != NULL); + g_assert (pos != NULL); + + end = pos + strlen (pos); + + /* Static check */ + if (g_ascii_strcasecmp (pos, "static") == 0) { + bk->protocol = MAP_PROTO_STATIC; + bk->uri = g_strdup (pos); + + return pos; + } + else if (g_ascii_strcasecmp (pos, "zst+static") == 0) { + bk->protocol = MAP_PROTO_STATIC; + bk->uri = g_strdup (pos + 4); + bk->is_compressed = TRUE; + + return pos + 4; + } + + for (;;) { + if (g_ascii_strncasecmp (pos, "sign+", sizeof ("sign+") - 1) == 0) { + bk->is_signed = TRUE; + pos += sizeof ("sign+") - 1; + } + else if (g_ascii_strncasecmp (pos, "fallback+", sizeof ("fallback+") - 1) == 0) { + bk->is_fallback = TRUE; + pos += sizeof ("fallback+") - 1; + } + else if (g_ascii_strncasecmp (pos, "key=", sizeof ("key=") - 1) == 0) { + pos += sizeof ("key=") - 1; + end_key = memchr (pos, '+', end - pos); + + if (end_key != NULL) { + bk->trusted_pubkey = rspamd_pubkey_from_base32 (pos, end_key - pos, + RSPAMD_KEYPAIR_SIGN, RSPAMD_CRYPTOBOX_MODE_25519); + + if (bk->trusted_pubkey == NULL) { + msg_err_config ("cannot read pubkey from map: %s", + map_line); + return NULL; + } + pos = end_key + 1; + } else if (end - pos > 64) { + /* Try hex encoding */ + bk->trusted_pubkey = rspamd_pubkey_from_hex (pos, 64, + RSPAMD_KEYPAIR_SIGN, RSPAMD_CRYPTOBOX_MODE_25519); + + if (bk->trusted_pubkey == NULL) { + msg_err_config ("cannot read pubkey from map: %s", + map_line); + return NULL; + } + pos += 64; + } else { + msg_err_config ("cannot read pubkey from map: %s", + map_line); + return NULL; + } + + if (*pos == '+' || *pos == ':') { + pos++; + } + } + else { + /* No known flags */ + break; + } + } + + bk->protocol = MAP_PROTO_FILE; + + if (g_ascii_strncasecmp (pos, "http://", sizeof ("http://") - 1) == 0) { + bk->protocol = MAP_PROTO_HTTP; + /* Include http:// */ + bk->uri = g_strdup (pos); + pos += sizeof ("http://") - 1; + } + else if (g_ascii_strncasecmp (pos, "https://", sizeof ("https://") - 1) == 0) { + bk->protocol = MAP_PROTO_HTTPS; + /* Include https:// */ + bk->uri = g_strdup (pos); + pos += sizeof ("https://") - 1; + } + else if (g_ascii_strncasecmp (pos, "file://", sizeof ("file://") - 1) == 0) { + pos += sizeof ("file://") - 1; + /* Exclude file:// */ + bk->uri = g_strdup (pos); + } + else if (*pos == '/') { + /* Trivial file case */ + bk->uri = g_strdup (pos); + } + else { + msg_err_config ("invalid map fetching protocol: %s", map_line); + + return NULL; + } + + if (bk->protocol != MAP_PROTO_FILE && bk->is_signed) { + msg_err_config ("signed maps are no longer supported for HTTP(s): %s", map_line); + } + + return pos; +} + +gboolean +rspamd_map_is_map (const gchar *map_line) +{ + gboolean ret = FALSE; + + g_assert (map_line != NULL); + + if (map_line[0] == '/') { + ret = TRUE; + } + else if (g_ascii_strncasecmp (map_line, "sign+", sizeof ("sign+") - 1) == 0) { + ret = TRUE; + } + else if (g_ascii_strncasecmp (map_line, "fallback+", sizeof ("fallback+") - 1) == 0) { + ret = TRUE; + } + else if (g_ascii_strncasecmp (map_line, "file://", sizeof ("file://") - 1) == 0) { + ret = TRUE; + } + else if (g_ascii_strncasecmp (map_line, "http://", sizeof ("http://") - 1) == 0) { + ret = TRUE; + } + else if (g_ascii_strncasecmp (map_line, "https://", sizeof ("https://") - 1) == 0) { + ret = TRUE; + } + + return ret; +} + +static void +rspamd_map_backend_dtor (struct rspamd_map_backend *bk) +{ + g_free (bk->uri); + + switch (bk->protocol) { + case MAP_PROTO_FILE: + if (bk->data.fd) { + ev_stat_stop (bk->event_loop, &bk->data.fd->st_ev); + g_free (bk->data.fd->filename); + g_free (bk->data.fd); + } + break; + case MAP_PROTO_STATIC: + if (bk->data.sd) { + if (bk->data.sd->data) { + g_free (bk->data.sd->data); + } + + g_free (bk->data.sd); + } + break; + case MAP_PROTO_HTTP: + case MAP_PROTO_HTTPS: + if (bk->data.hd) { + struct http_map_data *data = bk->data.hd; + + g_free (data->host); + g_free (data->path); + g_free (data->rest); + + if (data->userinfo) { + g_free (data->userinfo); + } + + if (data->etag) { + rspamd_fstring_free (data->etag); + } + + if (g_atomic_int_compare_and_exchange (&data->cache->available, 1, 0)) { + if (data->cur_cache_cbd) { + MAP_RELEASE (data->cur_cache_cbd->shm, + "rspamd_http_map_cached_cbdata"); + ev_timer_stop (data->cur_cache_cbd->event_loop, + &data->cur_cache_cbd->timeout); + g_free (data->cur_cache_cbd); + data->cur_cache_cbd = NULL; + } + + unlink (data->cache->shmem_name); + } + + g_free (bk->data.hd); + } + break; + } + + if (bk->trusted_pubkey) { + rspamd_pubkey_unref (bk->trusted_pubkey); + } + + g_free (bk); +} + +static struct rspamd_map_backend * +rspamd_map_parse_backend (struct rspamd_config *cfg, const gchar *map_line) +{ + struct rspamd_map_backend *bk; + struct file_map_data *fdata = NULL; + struct http_map_data *hdata = NULL; + struct static_map_data *sdata = NULL; + struct http_parser_url up; + const gchar *end, *p; + rspamd_ftok_t tok; + + bk = g_malloc0 (sizeof (*bk)); + REF_INIT_RETAIN (bk, rspamd_map_backend_dtor); + + if (!rspamd_map_check_proto (cfg, map_line, bk)) { + goto err; + } + + if (bk->is_fallback && bk->protocol != MAP_PROTO_FILE) { + msg_err_config ("fallback backend must be file for %s", bk->uri); + + goto err; + } + + end = map_line + strlen (map_line); + if (end - map_line > 5) { + p = end - 5; + if (g_ascii_strcasecmp (p, ".zstd") == 0) { + bk->is_compressed = TRUE; + } + p = end - 4; + if (g_ascii_strcasecmp (p, ".zst") == 0) { + bk->is_compressed = TRUE; + } + } + + /* Now check for each proto separately */ + if (bk->protocol == MAP_PROTO_FILE) { + fdata = g_malloc0 (sizeof (struct file_map_data)); + + if (access (bk->uri, R_OK) == -1) { + if (errno != ENOENT) { + msg_err_config ("cannot open file '%s': %s", bk->uri, strerror (errno)); + goto err; + } + + msg_info_config ( + "map '%s' is not found, but it can be loaded automatically later", + bk->uri); + } + + fdata->filename = g_strdup (bk->uri); + bk->data.fd = fdata; + } + else if (bk->protocol == MAP_PROTO_HTTP || bk->protocol == MAP_PROTO_HTTPS) { + hdata = g_malloc0 (sizeof (struct http_map_data)); + + memset (&up, 0, sizeof (up)); + if (http_parser_parse_url (bk->uri, strlen (bk->uri), FALSE, + &up) != 0) { + msg_err_config ("cannot parse HTTP url: %s", bk->uri); + goto err; + } + else { + if (!(up.field_set & 1u << UF_HOST)) { + msg_err_config ("cannot parse HTTP url: %s: no host", bk->uri); + goto err; + } + + tok.begin = bk->uri + up.field_data[UF_HOST].off; + tok.len = up.field_data[UF_HOST].len; + hdata->host = rspamd_ftokdup (&tok); + + if (up.field_set & (1u << UF_PORT)) { + hdata->port = up.port; + } + else { + if (bk->protocol == MAP_PROTO_HTTP) { + hdata->port = 80; + } + else { + hdata->port = 443; + } + } + + if (up.field_set & (1u << UF_PATH)) { + tok.begin = bk->uri + up.field_data[UF_PATH].off; + tok.len = up.field_data[UF_PATH].len; + + hdata->path = rspamd_ftokdup (&tok); + + /* We also need to check query + fragment */ + if (up.field_set & ((1u << UF_QUERY) | (1u << UF_FRAGMENT))) { + tok.begin = bk->uri + up.field_data[UF_PATH].off + + up.field_data[UF_PATH].len; + tok.len = strlen (tok.begin); + hdata->rest = rspamd_ftokdup (&tok); + } + else { + hdata->rest = g_strdup (""); + } + } + + if (up.field_set & (1u << UF_USERINFO)) { + /* Create authorisation header for basic auth */ + guint len = sizeof ("Basic ") + + up.field_data[UF_USERINFO].len * 8 / 5 + 4; + hdata->userinfo = g_malloc (len); + rspamd_snprintf (hdata->userinfo, len, "Basic %*Bs", + (int)up.field_data[UF_USERINFO].len, + bk->uri + up.field_data[UF_USERINFO].off); + } + } + + hdata->cache = rspamd_mempool_alloc0_shared (cfg->cfg_pool, + sizeof (*hdata->cache)); + + bk->data.hd = hdata; + } + else if (bk->protocol == MAP_PROTO_STATIC) { + sdata = g_malloc0 (sizeof (*sdata)); + bk->data.sd = sdata; + } + + bk->id = rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_T1HA, + bk->uri, strlen (bk->uri), 0xdeadbabe); + + return bk; + +err: + MAP_RELEASE (bk, "rspamd_map_backend"); + + if (hdata) { + g_free (hdata); + } + + return NULL; +} + +static void +rspamd_map_calculate_hash (struct rspamd_map *map) +{ + struct rspamd_map_backend *bk; + guint i; + rspamd_cryptobox_hash_state_t st; + gchar *cksum_encoded, cksum[rspamd_cryptobox_HASHBYTES]; + + rspamd_cryptobox_hash_init (&st, NULL, 0); + + for (i = 0; i < map->backends->len; i ++) { + bk = g_ptr_array_index (map->backends, i); + rspamd_cryptobox_hash_update (&st, bk->uri, strlen (bk->uri)); + } + + rspamd_cryptobox_hash_final (&st, cksum); + cksum_encoded = rspamd_encode_base32 (cksum, sizeof (cksum)); + rspamd_strlcpy (map->tag, cksum_encoded, sizeof (map->tag)); + g_free (cksum_encoded); +} + +static gboolean +rspamd_map_add_static_string (struct rspamd_config *cfg, + const ucl_object_t *elt, + GString *target) +{ + gsize sz; + const gchar *dline; + + if (ucl_object_type (elt) != UCL_STRING) { + msg_err_config ("map has static backend but `data` is " + "not string like: %s", + ucl_object_type_to_string (elt->type)); + return FALSE; + } + + /* Otherwise, we copy data to the backend */ + dline = ucl_object_tolstring (elt, &sz); + + if (sz == 0) { + msg_err_config ("map has static backend but empty no data"); + return FALSE; + } + + g_string_append_len (target, dline, sz); + g_string_append_c (target, '\n'); + + return TRUE; +} + +struct rspamd_map * +rspamd_map_add (struct rspamd_config *cfg, + const gchar *map_line, + const gchar *description, + map_cb_t read_callback, + map_fin_cb_t fin_callback, + map_dtor_t dtor, + void **user_data, + struct rspamd_worker *worker) +{ + struct rspamd_map *map; + struct rspamd_map_backend *bk; + + bk = rspamd_map_parse_backend (cfg, map_line); + if (bk == NULL) { + return NULL; + } + + if (bk->is_fallback) { + msg_err_config ("cannot add map with fallback only backend: %s", bk->uri); + REF_RELEASE (bk); + + return NULL; + } + + map = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (struct rspamd_map)); + map->read_callback = read_callback; + map->fin_callback = fin_callback; + map->dtor = dtor; + map->user_data = user_data; + map->cfg = cfg; + map->id = rspamd_random_uint64_fast (); + map->locked = + rspamd_mempool_alloc0_shared (cfg->cfg_pool, sizeof (gint)); + map->backends = g_ptr_array_sized_new (1); + map->wrk = worker; + rspamd_mempool_add_destructor (cfg->cfg_pool, rspamd_ptr_array_free_hard, + map->backends); + g_ptr_array_add (map->backends, bk); + map->name = rspamd_mempool_strdup (cfg->cfg_pool, map_line); + + if (bk->protocol == MAP_PROTO_FILE) { + map->poll_timeout = (cfg->map_timeout * cfg->map_file_watch_multiplier); + } else { + map->poll_timeout = cfg->map_timeout; + } + + if (description != NULL) { + map->description = rspamd_mempool_strdup (cfg->cfg_pool, description); + } + + rspamd_map_calculate_hash (map); + msg_info_map ("added map %s", bk->uri); + + cfg->maps = g_list_prepend (cfg->maps, map); + + return map; +} + +static inline void +rspamd_map_add_backend (struct rspamd_map *map, struct rspamd_map_backend *bk) +{ + if (bk->is_fallback) { + if (map->fallback_backend) { + msg_warn_map ("redefining fallback backend from %s to %s", + map->fallback_backend->uri, bk->uri); + } + + map->fallback_backend = bk; + } + else { + g_ptr_array_add (map->backends, bk); + } +} + +struct rspamd_map* +rspamd_map_add_from_ucl (struct rspamd_config *cfg, + const ucl_object_t *obj, + const gchar *description, + map_cb_t read_callback, + map_fin_cb_t fin_callback, + map_dtor_t dtor, + void **user_data, + struct rspamd_worker *worker) +{ + ucl_object_iter_t it = NULL; + const ucl_object_t *cur, *elt; + struct rspamd_map *map; + struct rspamd_map_backend *bk; + guint i; + + g_assert (obj != NULL); + + if (ucl_object_type (obj) == UCL_STRING) { + /* Just a plain string */ + return rspamd_map_add (cfg, ucl_object_tostring (obj), description, + read_callback, fin_callback, dtor, user_data, worker); + } + + map = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (struct rspamd_map)); + map->read_callback = read_callback; + map->fin_callback = fin_callback; + map->dtor = dtor; + map->user_data = user_data; + map->cfg = cfg; + map->id = rspamd_random_uint64_fast (); + map->locked = + rspamd_mempool_alloc0_shared (cfg->cfg_pool, sizeof (gint)); + map->backends = g_ptr_array_new (); + map->wrk = worker; + rspamd_mempool_add_destructor (cfg->cfg_pool, rspamd_ptr_array_free_hard, + map->backends); + map->poll_timeout = cfg->map_timeout; + + if (description) { + map->description = rspamd_mempool_strdup (cfg->cfg_pool, description); + } + + if (ucl_object_type (obj) == UCL_ARRAY) { + /* Add array of maps as multiple backends */ + while ((cur = ucl_object_iterate (obj, &it, true)) != NULL) { + if (ucl_object_type (cur) == UCL_STRING) { + bk = rspamd_map_parse_backend (cfg, ucl_object_tostring (cur)); + + if (bk != NULL) { + rspamd_map_add_backend (map, bk); + + if (!map->name) { + map->name = rspamd_mempool_strdup (cfg->cfg_pool, + ucl_object_tostring (cur)); + } + } + } + else { + msg_err_config ("bad map element type: %s", + ucl_object_type_to_string (ucl_object_type (cur))); + } + } + + if (map->backends->len == 0) { + msg_err_config ("map has no urls to be loaded: empty list"); + goto err; + } + } + else if (ucl_object_type (obj) == UCL_OBJECT) { + elt = ucl_object_lookup (obj, "name"); + if (elt && ucl_object_type (elt) == UCL_STRING) { + map->name = rspamd_mempool_strdup (cfg->cfg_pool, + ucl_object_tostring (elt)); + } + + elt = ucl_object_lookup (obj, "description"); + if (elt && ucl_object_type (elt) == UCL_STRING) { + map->description = rspamd_mempool_strdup (cfg->cfg_pool, + ucl_object_tostring (elt)); + } + + elt = ucl_object_lookup_any (obj, "timeout", "poll", "poll_time", + "watch_interval", NULL); + if (elt) { + map->poll_timeout = ucl_object_todouble (elt); + } + + elt = ucl_object_lookup_any (obj, "upstreams", "url", "urls", NULL); + if (elt == NULL) { + msg_err_config ("map has no urls to be loaded: no elt"); + goto err; + } + + if (ucl_object_type (elt) == UCL_ARRAY) { + /* Add array of maps as multiple backends */ + it = ucl_object_iterate_new (elt); + + while ((cur = ucl_object_iterate_safe (it, true)) != NULL) { + if (ucl_object_type (cur) == UCL_STRING) { + bk = rspamd_map_parse_backend (cfg, ucl_object_tostring (cur)); + + if (bk != NULL) { + rspamd_map_add_backend (map, bk); + + if (!map->name) { + map->name = rspamd_mempool_strdup (cfg->cfg_pool, + ucl_object_tostring (cur)); + } + } + } + else { + msg_err_config ("bad map element type: %s", + ucl_object_type_to_string (ucl_object_type (cur))); + ucl_object_iterate_free (it); + goto err; + } + } + + ucl_object_iterate_free (it); + + if (map->backends->len == 0) { + msg_err_config ("map has no urls to be loaded: empty object list"); + goto err; + } + } + else if (ucl_object_type (elt) == UCL_STRING) { + bk = rspamd_map_parse_backend (cfg, ucl_object_tostring (elt)); + + if (bk != NULL) { + rspamd_map_add_backend (map, bk); + + if (!map->name) { + map->name = rspamd_mempool_strdup (cfg->cfg_pool, + ucl_object_tostring (elt)); + } + } + } + + if (!map->backends || map->backends->len == 0) { + msg_err_config ("map has no urls to be loaded: no valid backends"); + goto err; + } + } + else { + msg_err_config ("map has invalid type for value: %s", + ucl_object_type_to_string (ucl_object_type (obj))); + goto err; + } + + gboolean all_local = TRUE; + + PTR_ARRAY_FOREACH (map->backends, i, bk) { + if (bk->protocol == MAP_PROTO_STATIC) { + GString *map_data; + /* We need data field in ucl */ + elt = ucl_object_lookup (obj, "data"); + + if (elt == NULL) { + msg_err_config ("map has static backend but no `data` field"); + goto err; + } + + + if (ucl_object_type (elt) == UCL_STRING) { + map_data = g_string_sized_new (32); + + if (rspamd_map_add_static_string (cfg, elt, map_data)) { + bk->data.sd->data = map_data->str; + bk->data.sd->len = map_data->len; + g_string_free (map_data, FALSE); + } + else { + g_string_free (map_data, TRUE); + msg_err_config ("map has static backend with invalid `data` field"); + goto err; + } + } + else if (ucl_object_type (elt) == UCL_ARRAY) { + map_data = g_string_sized_new (32); + it = ucl_object_iterate_new (elt); + + while ((cur = ucl_object_iterate_safe (it, true))) { + if (!rspamd_map_add_static_string (cfg, cur, map_data)) { + g_string_free (map_data, TRUE); + msg_err_config ("map has static backend with invalid " + "`data` field"); + ucl_object_iterate_free (it); + goto err; + } + } + + ucl_object_iterate_free (it); + bk->data.sd->data = map_data->str; + bk->data.sd->len = map_data->len; + g_string_free (map_data, FALSE); + } + } + else if (bk->protocol != MAP_PROTO_FILE) { + all_local = FALSE; + } + } + + if (all_local) { + map->poll_timeout = (map->poll_timeout * + cfg->map_file_watch_multiplier); + } + + rspamd_map_calculate_hash (map); + msg_debug_map ("added map from ucl"); + + cfg->maps = g_list_prepend (cfg->maps, map); + + return map; + +err: + + if (map) { + PTR_ARRAY_FOREACH (map->backends, i, bk) { + MAP_RELEASE (bk, "rspamd_map_backend"); + } + } + + return NULL; +} + +rspamd_map_traverse_function +rspamd_map_get_traverse_function (struct rspamd_map *map) +{ + if (map) { + return map->traverse_function; + } + + return NULL; +} + +void +rspamd_map_traverse (struct rspamd_map *map, rspamd_map_traverse_cb cb, + gpointer cbdata, gboolean reset_hits) +{ + if (*map->user_data && map->traverse_function) { + map->traverse_function (*map->user_data, cb, cbdata, reset_hits); + } +} diff --git a/src/libserver/maps/map.h b/src/libserver/maps/map.h new file mode 100644 index 000000000..ce49bacbb --- /dev/null +++ b/src/libserver/maps/map.h @@ -0,0 +1,138 @@ +#ifndef RSPAMD_MAP_H +#define RSPAMD_MAP_H + +#include "config.h" +#include "contrib/libev/ev.h" + +#include "ucl.h" +#include "mem_pool.h" +#include "radix.h" +#include "dns.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Maps API is designed to load lists data from different dynamic sources. + * It monitor files and HTTP locations for modifications and reload them if they are + * modified. + */ +struct map_cb_data; +struct rspamd_worker; + +/** + * Callback types + */ +typedef gchar *(*map_cb_t) (gchar *chunk, gint len, + struct map_cb_data *data, gboolean final); + +typedef void (*map_fin_cb_t) (struct map_cb_data *data, void **target); + +typedef void (*map_dtor_t) (struct map_cb_data *data); + +typedef gboolean (*rspamd_map_traverse_cb) (gconstpointer key, + gconstpointer value, gsize hits, gpointer ud); + +typedef void (*rspamd_map_traverse_function) (void *data, + rspamd_map_traverse_cb cb, + gpointer cbdata, gboolean reset_hits); + +/** + * Common map object + */ +struct rspamd_config; +struct rspamd_map; + +/** + * Callback data for async load + */ +struct map_cb_data { + struct rspamd_map *map; + gint state; + void *prev_data; + void *cur_data; +}; + +/** + * Returns TRUE if line looks like a map definition + * @param map_line + * @return + */ +gboolean rspamd_map_is_map (const gchar *map_line); + +/** + * Add map from line + */ +struct rspamd_map *rspamd_map_add (struct rspamd_config *cfg, + const gchar *map_line, + const gchar *description, + map_cb_t read_callback, + map_fin_cb_t fin_callback, + map_dtor_t dtor, + void **user_data, + struct rspamd_worker *worker); + +/** + * Add map from ucl + */ +struct rspamd_map *rspamd_map_add_from_ucl (struct rspamd_config *cfg, + const ucl_object_t *obj, + const gchar *description, + map_cb_t read_callback, + map_fin_cb_t fin_callback, + map_dtor_t dtor, + void **user_data, + struct rspamd_worker *worker); + +enum rspamd_map_watch_type { + RSPAMD_MAP_WATCH_MIN = 9, + RSPAMD_MAP_WATCH_PRIMARY_CONTROLLER, + RSPAMD_MAP_WATCH_SCANNER, + RSPAMD_MAP_WATCH_WORKER, + RSPAMD_MAP_WATCH_MAX +}; + +/** + * Start watching of maps by adding events to libevent event loop + */ +void rspamd_map_watch (struct rspamd_config *cfg, + struct ev_loop *event_loop, + struct rspamd_dns_resolver *resolver, + struct rspamd_worker *worker, + enum rspamd_map_watch_type how); + +/** + * Preloads maps where all backends are file + * @param cfg + */ +void rspamd_map_preload (struct rspamd_config *cfg); + +/** + * Remove all maps watched (remove events) + */ +void rspamd_map_remove_all (struct rspamd_config *cfg); + +/** + * Get traverse function for specific map + * @param map + * @return + */ +rspamd_map_traverse_function rspamd_map_get_traverse_function (struct rspamd_map *map); + +/** + * Perform map traverse + * @param map + * @param cb + * @param cbdata + * @param reset_hits + * @return + */ +void rspamd_map_traverse (struct rspamd_map *map, rspamd_map_traverse_cb cb, + gpointer cbdata, gboolean reset_hits); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/libserver/maps/map_helpers.c b/src/libserver/maps/map_helpers.c new file mode 100644 index 000000000..d179d44f5 --- /dev/null +++ b/src/libserver/maps/map_helpers.c @@ -0,0 +1,1397 @@ +/*- + * Copyright 2018 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "map_helpers.h" +#include "map_private.h" +#include "khash.h" +#include "radix.h" +#include "rspamd.h" +#include "cryptobox.h" +#include "contrib/fastutf8/fastutf8.h" + +#ifdef WITH_HYPERSCAN +#include "hs.h" +#endif +#ifndef WITH_PCRE2 +#include <pcre.h> +#else +#include <pcre2.h> +#endif + + +static const guint64 map_hash_seed = 0xdeadbabeULL; +static const gchar *hash_fill = "1"; + +struct rspamd_map_helper_value { + gsize hits; + gconstpointer key; + gchar value[]; /* Null terminated */ +}; + +KHASH_INIT (rspamd_map_hash, const gchar *, + struct rspamd_map_helper_value *, true, + rspamd_strcase_hash, rspamd_strcase_equal); + +struct rspamd_radix_map_helper { + rspamd_mempool_t *pool; + khash_t(rspamd_map_hash) *htb; + radix_compressed_t *trie; + rspamd_cryptobox_fast_hash_state_t hst; +}; + +struct rspamd_hash_map_helper { + rspamd_mempool_t *pool; + khash_t(rspamd_map_hash) *htb; + rspamd_cryptobox_fast_hash_state_t hst; +}; + +struct rspamd_regexp_map_helper { + rspamd_mempool_t *pool; + struct rspamd_map *map; + GPtrArray *regexps; + GPtrArray *values; + khash_t(rspamd_map_hash) *htb; + rspamd_cryptobox_fast_hash_state_t hst; + enum rspamd_regexp_map_flags map_flags; +#ifdef WITH_HYPERSCAN + hs_database_t *hs_db; + hs_scratch_t *hs_scratch; + gchar **patterns; + gint *flags; + gint *ids; +#endif +}; + +/** + * FSM for parsing lists + */ + +#define MAP_STORE_KEY do { \ + while (g_ascii_isspace (*c) && p > c) { c ++; } \ + key = g_malloc (p - c + 1); \ + rspamd_strlcpy (key, c, p - c + 1); \ + key = g_strstrip (key); \ +} while (0) + +#define MAP_STORE_VALUE do { \ + while (g_ascii_isspace (*c) && p > c) { c ++; } \ + value = g_malloc (p - c + 1); \ + rspamd_strlcpy (value, c, p - c + 1); \ + value = g_strstrip (value); \ +} while (0) + +gchar * +rspamd_parse_kv_list ( + gchar * chunk, + gint len, + struct map_cb_data *data, + insert_func func, + const gchar *default_value, + gboolean final) +{ + enum { + map_skip_spaces_before_key = 0, + map_read_key, + map_read_key_quoted, + map_read_key_slashed, + map_skip_spaces_after_key, + map_backslash_quoted, + map_backslash_slashed, + map_read_key_after_slash, + map_read_value, + map_read_comment_start, + map_skip_comment, + map_read_eol, + }; + + gchar *c, *p, *key = NULL, *value = NULL, *end; + struct rspamd_map *map = data->map; + guint line_number = 0; + + p = chunk; + c = p; + end = p + len; + + while (p < end) { + switch (data->state) { + case map_skip_spaces_before_key: + if (g_ascii_isspace (*p)) { + p ++; + } + else { + if (*p == '"') { + p++; + c = p; + data->state = map_read_key_quoted; + } + else if (*p == '/') { + /* Note that c is on '/' here as '/' is a part of key */ + c = p; + p++; + data->state = map_read_key_slashed; + } + else { + c = p; + data->state = map_read_key; + } + } + break; + case map_read_key: + /* read key */ + /* Check here comments, eol and end of buffer */ + if (*p == '#' && (p == c || *(p - 1) != '\\')) { + if (p - c > 0) { + /* Store a single key */ + MAP_STORE_KEY; + func (data->cur_data, key, default_value); + msg_debug_map ("insert key only pair: %s -> %s; line: %d", + key, default_value, line_number); + g_free (key); + } + + key = NULL; + data->state = map_read_comment_start; + } + else if (*p == '\r' || *p == '\n') { + if (p - c > 0) { + /* Store a single key */ + MAP_STORE_KEY; + func (data->cur_data, key, default_value); + msg_debug_map ("insert key only pair: %s -> %s; line: %d", + key, default_value, line_number); + g_free (key); + } + + data->state = map_read_eol; + key = NULL; + } + else if (g_ascii_isspace (*p)) { + if (p - c > 0) { + MAP_STORE_KEY; + data->state = map_skip_spaces_after_key; + } + else { + msg_err_map ("empty or invalid key found on line %d", line_number); + data->state = map_skip_comment; + } + } + else { + p++; + } + break; + case map_read_key_quoted: + if (*p == '\\') { + data->state = map_backslash_quoted; + p ++; + } + else if (*p == '"') { + /* Allow empty keys in this case */ + if (p - c >= 0) { + MAP_STORE_KEY; + data->state = map_skip_spaces_after_key; + } + else { + g_assert_not_reached (); + } + p ++; + } + else { + p ++; + } + break; + case map_read_key_slashed: + if (*p == '\\') { + data->state = map_backslash_slashed; + p ++; + } + else if (*p == '/') { + /* Allow empty keys in this case */ + if (p - c >= 0) { + data->state = map_read_key_after_slash; + } + else { + g_assert_not_reached (); + } + } + else { + p ++; + } + break; + case map_read_key_after_slash: + /* + * This state is equal to reading of key but '/' is not + * treated specially + */ + if (*p == '#') { + if (p - c > 0) { + /* Store a single key */ + MAP_STORE_KEY; + func (data->cur_data, key, default_value); + msg_debug_map ("insert key only pair: %s -> %s; line: %d", + key, default_value, line_number); + g_free (key); + key = NULL; + } + + data->state = map_read_comment_start; + } + else if (*p == '\r' || *p == '\n') { + if (p - c > 0) { + /* Store a single key */ + MAP_STORE_KEY; + func (data->cur_data, key, default_value); + + msg_debug_map ("insert key only pair: %s -> %s; line: %d", + key, default_value, line_number); + g_free (key); + key = NULL; + } + + data->state = map_read_eol; + key = NULL; + } + else if (g_ascii_isspace (*p)) { + if (p - c > 0) { + MAP_STORE_KEY; + data->state = map_skip_spaces_after_key; + } + else { + msg_err_map ("empty or invalid key found on line %d", line_number); + data->state = map_skip_comment; + } + } + else { + p ++; + } + break; + case map_backslash_quoted: + p ++; + data->state = map_read_key_quoted; + break; + case map_backslash_slashed: + p ++; + data->state = map_read_key_slashed; + break; + case map_skip_spaces_after_key: + if (*p == ' ' || *p == '\t') { + p ++; + } + else { + c = p; + data->state = map_read_value; + } + break; + case map_read_value: + if (key == NULL) { + /* Ignore line */ + msg_err_map ("empty or invalid key found on line %d", line_number); + data->state = map_skip_comment; + } + else { + if (*p == '#') { + if (p - c > 0) { + /* Store a single key */ + MAP_STORE_VALUE; + func (data->cur_data, key, value); + msg_debug_map ("insert key value pair: %s -> %s; line: %d", + key, value, line_number); + g_free (key); + g_free (value); + key = NULL; + value = NULL; + } else { + func (data->cur_data, key, default_value); + msg_debug_map ("insert key only pair: %s -> %s; line: %d", + key, default_value, line_number); + g_free (key); + key = NULL; + } + + data->state = map_read_comment_start; + } else if (*p == '\r' || *p == '\n') { + if (p - c > 0) { + /* Store a single key */ + MAP_STORE_VALUE; + func (data->cur_data, key, value); + msg_debug_map ("insert key value pair: %s -> %s", + key, value); + g_free (key); + g_free (value); + key = NULL; + value = NULL; + } else { + func (data->cur_data, key, default_value); + msg_debug_map ("insert key only pair: %s -> %s", + key, default_value); + g_free (key); + key = NULL; + } + + data->state = map_read_eol; + key = NULL; + } + else { + p++; + } + } + break; + case map_read_comment_start: + if (*p == '#') { + data->state = map_skip_comment; + p ++; + key = NULL; + value = NULL; + } + else { + g_assert_not_reached (); + } + break; + case map_skip_comment: + if (*p == '\r' || *p == '\n') { + data->state = map_read_eol; + } + else { + p ++; + } + break; + case map_read_eol: + /* Skip \r\n and whitespaces */ + if (*p == '\r' || *p == '\n') { + if (*p == '\n') { + /* We don't care about \r only line separators, they are too rare */ + line_number ++; + } + p++; + } + else { + data->state = map_skip_spaces_before_key; + } + break; + default: + g_assert_not_reached (); + break; + } + } + + if (final) { + /* Examine the state */ + switch (data->state) { + case map_read_key: + if (p - c > 0) { + /* Store a single key */ + MAP_STORE_KEY; + func (data->cur_data, key, default_value); + msg_debug_map ("insert key only pair: %s -> %s", + key, default_value); + g_free (key); + key = NULL; + } + break; + case map_read_value: + if (key == NULL) { + /* Ignore line */ + msg_err_map ("empty or invalid key found on line %d", line_number); + data->state = map_skip_comment; + } + else { + if (p - c > 0) { + /* Store a single key */ + MAP_STORE_VALUE; + func (data->cur_data, key, value); + msg_debug_map ("insert key value pair: %s -> %s", + key, value); + g_free (key); + g_free (value); + key = NULL; + value = NULL; + } else { + func (data->cur_data, key, default_value); + msg_debug_map ("insert key only pair: %s -> %s", + key, default_value); + g_free (key); + key = NULL; + } + } + break; + } + + data->state = map_skip_spaces_before_key; + } + + return c; +} + +/** + * Radix tree helper function + */ +void +rspamd_map_helper_insert_radix (gpointer st, gconstpointer key, gconstpointer value) +{ + struct rspamd_radix_map_helper *r = (struct rspamd_radix_map_helper *)st; + struct rspamd_map_helper_value *val; + gsize vlen; + khiter_t k; + gconstpointer nk; + gint res; + + vlen = strlen (value); + val = rspamd_mempool_alloc0 (r->pool, sizeof (*val) + + vlen + 1); + memcpy (val->value, value, vlen); + + k = kh_get (rspamd_map_hash, r->htb, key); + + if (k == kh_end (r->htb)) { + nk = rspamd_mempool_strdup (r->pool, key); + k = kh_put (rspamd_map_hash, r->htb, nk, &res); + } + + nk = kh_key (r->htb, k); + val->key = nk; + kh_value (r->htb, k) = val; + rspamd_radix_add_iplist (key, ",", r->trie, val, FALSE); + rspamd_cryptobox_fast_hash_update (&r->hst, nk, strlen (nk)); +} + +void +rspamd_map_helper_insert_radix_resolve (gpointer st, gconstpointer key, gconstpointer value) +{ + struct rspamd_radix_map_helper *r = (struct rspamd_radix_map_helper *)st; + struct rspamd_map_helper_value *val; + gsize vlen; + khiter_t k; + gconstpointer nk; + gint res; + + vlen = strlen (value); + val = rspamd_mempool_alloc0 (r->pool, sizeof (*val) + + vlen + 1); + memcpy (val->value, value, vlen); + + k = kh_get (rspamd_map_hash, r->htb, key); + + if (k == kh_end (r->htb)) { + nk = rspamd_mempool_strdup (r->pool, key); + k = kh_put (rspamd_map_hash, r->htb, nk, &res); + } + + nk = kh_key (r->htb, k); + val->key = nk; + kh_value (r->htb, k) = val; + rspamd_radix_add_iplist (key, ",", r->trie, val, TRUE); + rspamd_cryptobox_fast_hash_update (&r->hst, nk, strlen (nk)); +} + +void +rspamd_map_helper_insert_hash (gpointer st, gconstpointer key, gconstpointer value) +{ + struct rspamd_hash_map_helper *ht = st; + struct rspamd_map_helper_value *val; + khiter_t k; + gconstpointer nk; + gsize vlen; + gint r; + + k = kh_get (rspamd_map_hash, ht->htb, key); + vlen = strlen (value); + + if (k == kh_end (ht->htb)) { + nk = rspamd_mempool_strdup (ht->pool, key); + k = kh_put (rspamd_map_hash, ht->htb, nk, &r); + } + else { + val = kh_value (ht->htb, k); + + if (strcmp (value, val->value) == 0) { + /* Same element, skip */ + return; + } + } + + /* Null termination due to alloc0 */ + val = rspamd_mempool_alloc0 (ht->pool, sizeof (*val) + vlen + 1); + memcpy (val->value, value, vlen); + + nk = kh_key (ht->htb, k); + val->key = nk; + kh_value (ht->htb, k) = val; + rspamd_cryptobox_fast_hash_update (&ht->hst, nk, strlen (nk)); +} + +void +rspamd_map_helper_insert_re (gpointer st, gconstpointer key, gconstpointer value) +{ + struct rspamd_regexp_map_helper *re_map = st; + struct rspamd_map *map; + rspamd_regexp_t *re; + gchar *escaped; + GError *err = NULL; + gint pcre_flags; + gsize escaped_len; + struct rspamd_map_helper_value *val; + khiter_t k; + gconstpointer nk; + gsize vlen; + gint r; + + map = re_map->map; + + if (re_map->map_flags & RSPAMD_REGEXP_MAP_FLAG_GLOB) { + escaped = rspamd_str_regexp_escape (key, strlen (key), &escaped_len, + RSPAMD_REGEXP_ESCAPE_GLOB|RSPAMD_REGEXP_ESCAPE_UTF); + re = rspamd_regexp_new (escaped, NULL, &err); + g_free (escaped); + } + else { + re = rspamd_regexp_new (key, NULL, &err); + } + + if (re == NULL) { + msg_err_map ("cannot parse regexp %s: %e", key, err); + + if (err) { + g_error_free (err); + } + + return; + } + + vlen = strlen (value); + val = rspamd_mempool_alloc0 (re_map->pool, sizeof (*val) + + vlen + 1); + memcpy (val->value, value, vlen); + + k = kh_get (rspamd_map_hash, re_map->htb, key); + + if (k == kh_end (re_map->htb)) { + nk = rspamd_mempool_strdup (re_map->pool, key); + k = kh_put (rspamd_map_hash, re_map->htb, nk, &r); + } + + nk = kh_key (re_map->htb, k); + val->key = nk; + kh_value (re_map->htb, k) = val; + rspamd_cryptobox_fast_hash_update (&re_map->hst, nk, strlen (nk)); + + pcre_flags = rspamd_regexp_get_pcre_flags (re); + +#ifndef WITH_PCRE2 + if (pcre_flags & PCRE_FLAG(UTF8)) { + re_map->map_flags |= RSPAMD_REGEXP_MAP_FLAG_UTF; + } +#else + if (pcre_flags & PCRE_FLAG(UTF)) { + re_map->map_flags |= RSPAMD_REGEXP_MAP_FLAG_UTF; + } +#endif + + g_ptr_array_add (re_map->regexps, re); + g_ptr_array_add (re_map->values, val); +} + +static void +rspamd_map_helper_traverse_regexp (void *data, + rspamd_map_traverse_cb cb, + gpointer cbdata, + gboolean reset_hits) +{ + gconstpointer k; + struct rspamd_map_helper_value *val; + struct rspamd_regexp_map_helper *re_map = data; + + kh_foreach (re_map->htb, k, val, { + if (!cb (k, val->value, val->hits, cbdata)) { + break; + } + + if (reset_hits) { + val->hits = 0; + } + }); +} + +struct rspamd_hash_map_helper * +rspamd_map_helper_new_hash (struct rspamd_map *map) +{ + struct rspamd_hash_map_helper *htb; + rspamd_mempool_t *pool; + + if (map) { + pool = rspamd_mempool_new (rspamd_mempool_suggest_size (), + map->tag, 0); + } + else { + pool = rspamd_mempool_new (rspamd_mempool_suggest_size (), + NULL, 0); + } + + htb = rspamd_mempool_alloc0 (pool, sizeof (*htb)); + htb->htb = kh_init (rspamd_map_hash); + htb->pool = pool; + rspamd_cryptobox_fast_hash_init (&htb->hst, map_hash_seed); + + return htb; +} + +void +rspamd_map_helper_destroy_hash (struct rspamd_hash_map_helper *r) +{ + if (r == NULL || r->pool == NULL) { + return; + } + + rspamd_mempool_t *pool = r->pool; + kh_destroy (rspamd_map_hash, r->htb); + memset (r, 0, sizeof (*r)); + rspamd_mempool_delete (pool); +} + +static void +rspamd_map_helper_traverse_hash (void *data, + rspamd_map_traverse_cb cb, + gpointer cbdata, + gboolean reset_hits) +{ + gconstpointer k; + struct rspamd_map_helper_value *val; + struct rspamd_hash_map_helper *ht = data; + + kh_foreach (ht->htb, k, val, { + if (!cb (k, val->value, val->hits, cbdata)) { + break; + } + + if (reset_hits) { + val->hits = 0; + } + }); +} + +struct rspamd_radix_map_helper * +rspamd_map_helper_new_radix (struct rspamd_map *map) +{ + struct rspamd_radix_map_helper *r; + rspamd_mempool_t *pool; + + if (map) { + pool = rspamd_mempool_new (rspamd_mempool_suggest_size (), + map->tag, 0); + } + else { + pool = rspamd_mempool_new (rspamd_mempool_suggest_size (), + NULL, 0); + } + + r = rspamd_mempool_alloc0 (pool, sizeof (*r)); + r->trie = radix_create_compressed_with_pool (pool); + r->htb = kh_init (rspamd_map_hash); + r->pool = pool; + rspamd_cryptobox_fast_hash_init (&r->hst, map_hash_seed); + + return r; +} + +void +rspamd_map_helper_destroy_radix (struct rspamd_radix_map_helper *r) +{ + if (r == NULL || !r->pool) { + return; + } + + kh_destroy (rspamd_map_hash, r->htb); + rspamd_mempool_t *pool = r->pool; + memset (r, 0, sizeof (*r)); + rspamd_mempool_delete (pool); +} + +static void +rspamd_map_helper_traverse_radix (void *data, + rspamd_map_traverse_cb cb, + gpointer cbdata, + gboolean reset_hits) +{ + gconstpointer k; + struct rspamd_map_helper_value *val; + struct rspamd_radix_map_helper *r = data; + + kh_foreach (r->htb, k, val, { + if (!cb (k, val->value, val->hits, cbdata)) { + break; + } + + if (reset_hits) { + val->hits = 0; + } + }); +} + +struct rspamd_regexp_map_helper * +rspamd_map_helper_new_regexp (struct rspamd_map *map, + enum rspamd_regexp_map_flags flags) +{ + struct rspamd_regexp_map_helper *re_map; + rspamd_mempool_t *pool; + + pool = rspamd_mempool_new (rspamd_mempool_suggest_size (), + map->tag, 0); + + re_map = rspamd_mempool_alloc0 (pool, sizeof (*re_map)); + re_map->pool = pool; + re_map->values = g_ptr_array_new (); + re_map->regexps = g_ptr_array_new (); + re_map->map = map; + re_map->map_flags = flags; + re_map->htb = kh_init (rspamd_map_hash); + rspamd_cryptobox_fast_hash_init (&re_map->hst, map_hash_seed); + + return re_map; +} + + +void +rspamd_map_helper_destroy_regexp (struct rspamd_regexp_map_helper *re_map) +{ + rspamd_regexp_t *re; + guint i; + + if (!re_map || !re_map->regexps) { + return; + } + +#ifdef WITH_HYPERSCAN + if (re_map->hs_scratch) { + hs_free_scratch (re_map->hs_scratch); + } + if (re_map->hs_db) { + hs_free_database (re_map->hs_db); + } + if (re_map->patterns) { + for (i = 0; i < re_map->regexps->len; i ++) { + g_free (re_map->patterns[i]); + } + + g_free (re_map->patterns); + } + if (re_map->flags) { + g_free (re_map->flags); + } + if (re_map->ids) { + g_free (re_map->ids); + } +#endif + + for (i = 0; i < re_map->regexps->len; i ++) { + re = g_ptr_array_index (re_map->regexps, i); + rspamd_regexp_unref (re); + } + + g_ptr_array_free (re_map->regexps, TRUE); + g_ptr_array_free (re_map->values, TRUE); + kh_destroy (rspamd_map_hash, re_map->htb); + + rspamd_mempool_t *pool = re_map->pool; + memset (re_map, 0, sizeof (*re_map)); + rspamd_mempool_delete (pool); +} + +gchar * +rspamd_kv_list_read ( + gchar * chunk, + gint len, + struct map_cb_data *data, + gboolean final) +{ + if (data->cur_data == NULL) { + data->cur_data = rspamd_map_helper_new_hash (data->map); + } + + return rspamd_parse_kv_list ( + chunk, + len, + data, + rspamd_map_helper_insert_hash, + "", + final); +} + +void +rspamd_kv_list_fin (struct map_cb_data *data, void **target) +{ + struct rspamd_map *map = data->map; + struct rspamd_hash_map_helper *htb; + + if (data->cur_data) { + htb = (struct rspamd_hash_map_helper *)data->cur_data; + msg_info_map ("read hash of %d elements", kh_size (htb->htb)); + data->map->traverse_function = rspamd_map_helper_traverse_hash; + data->map->nelts = kh_size (htb->htb); + data->map->digest = rspamd_cryptobox_fast_hash_final (&htb->hst); + } + + if (target) { + *target = data->cur_data; + } + + if (data->prev_data) { + htb = (struct rspamd_hash_map_helper *)data->prev_data; + rspamd_map_helper_destroy_hash (htb); + } +} + +void +rspamd_kv_list_dtor (struct map_cb_data *data) +{ + struct rspamd_hash_map_helper *htb; + + if (data->cur_data) { + htb = (struct rspamd_hash_map_helper *)data->cur_data; + rspamd_map_helper_destroy_hash (htb); + } +} + +gchar * +rspamd_radix_read ( + gchar * chunk, + gint len, + struct map_cb_data *data, + gboolean final) +{ + struct rspamd_radix_map_helper *r; + struct rspamd_map *map = data->map; + + if (data->cur_data == NULL) { + r = rspamd_map_helper_new_radix (map); + data->cur_data = r; + } + + return rspamd_parse_kv_list ( + chunk, + len, + data, + rspamd_map_helper_insert_radix, + hash_fill, + final); +} + +void +rspamd_radix_fin (struct map_cb_data *data, void **target) +{ + struct rspamd_map *map = data->map; + struct rspamd_radix_map_helper *r; + + if (data->cur_data) { + r = (struct rspamd_radix_map_helper *)data->cur_data; + msg_info_map ("read radix trie of %z elements: %s", + radix_get_size (r->trie), radix_get_info (r->trie)); + data->map->traverse_function = rspamd_map_helper_traverse_radix; + data->map->nelts = kh_size (r->htb); + data->map->digest = rspamd_cryptobox_fast_hash_final (&r->hst); + } + + if (target) { + *target = data->cur_data; + } + + if (data->prev_data) { + r = (struct rspamd_radix_map_helper *)data->prev_data; + rspamd_map_helper_destroy_radix (r); + } +} + +void +rspamd_radix_dtor (struct map_cb_data *data) +{ + struct rspamd_radix_map_helper *r; + + if (data->cur_data) { + r = (struct rspamd_radix_map_helper *)data->cur_data; + rspamd_map_helper_destroy_radix (r); + } +} + +static void +rspamd_re_map_finalize (struct rspamd_regexp_map_helper *re_map) +{ +#ifdef WITH_HYPERSCAN + guint i; + hs_platform_info_t plt; + hs_compile_error_t *err; + struct rspamd_map *map; + rspamd_regexp_t *re; + gint pcre_flags; + + map = re_map->map; + + if (!(map->cfg->libs_ctx->crypto_ctx->cpu_config & CPUID_SSSE3)) { + msg_info_map ("disable hyperscan for map %s, ssse3 instructons are not supported by CPU", + map->name); + return; + } + + if (hs_populate_platform (&plt) != HS_SUCCESS) { + msg_err_map ("cannot populate hyperscan platform"); + return; + } + + re_map->patterns = g_new (gchar *, re_map->regexps->len); + re_map->flags = g_new (gint, re_map->regexps->len); + re_map->ids = g_new (gint, re_map->regexps->len); + + for (i = 0; i < re_map->regexps->len; i ++) { + const gchar *pat; + gchar *escaped; + gint pat_flags; + + re = g_ptr_array_index (re_map->regexps, i); + pcre_flags = rspamd_regexp_get_pcre_flags (re); + pat = rspamd_regexp_get_pattern (re); + pat_flags = rspamd_regexp_get_flags (re); + + if (pat_flags & RSPAMD_REGEXP_FLAG_UTF) { + escaped = rspamd_str_regexp_escape (pat, strlen (pat), NULL, + RSPAMD_REGEXP_ESCAPE_RE|RSPAMD_REGEXP_ESCAPE_UTF); + re_map->flags[i] |= HS_FLAG_UTF8; + } + else { + escaped = rspamd_str_regexp_escape (pat, strlen (pat), NULL, + RSPAMD_REGEXP_ESCAPE_RE); + } + + re_map->patterns[i] = escaped; + re_map->flags[i] = HS_FLAG_SINGLEMATCH; + +#ifndef WITH_PCRE2 + if (pcre_flags & PCRE_FLAG(UTF8)) { + re_map->flags[i] |= HS_FLAG_UTF8; + } +#else + if (pcre_flags & PCRE_FLAG(UTF)) { + re_map->flags[i] |= HS_FLAG_UTF8; + } +#endif + if (pcre_flags & PCRE_FLAG(CASELESS)) { + re_map->flags[i] |= HS_FLAG_CASELESS; + } + if (pcre_flags & PCRE_FLAG(MULTILINE)) { + re_map->flags[i] |= HS_FLAG_MULTILINE; + } + if (pcre_flags & PCRE_FLAG(DOTALL)) { + re_map->flags[i] |= HS_FLAG_DOTALL; + } + if (rspamd_regexp_get_maxhits (re) == 1) { + re_map->flags[i] |= HS_FLAG_SINGLEMATCH; + } + + re_map->ids[i] = i; + } + + if (re_map->regexps->len > 0 && re_map->patterns) { + if (hs_compile_multi ((const gchar **)re_map->patterns, + re_map->flags, + re_map->ids, + re_map->regexps->len, + HS_MODE_BLOCK, + &plt, + &re_map->hs_db, + &err) != HS_SUCCESS) { + + msg_err_map ("cannot create tree of regexp when processing '%s': %s", + err->expression >= 0 ? + re_map->patterns[err->expression] : + "unknown regexp", err->message); + re_map->hs_db = NULL; + hs_free_compile_error (err); + + return; + } + + if (hs_alloc_scratch (re_map->hs_db, &re_map->hs_scratch) != HS_SUCCESS) { + msg_err_map ("cannot allocate scratch space for hyperscan"); + hs_free_database (re_map->hs_db); + re_map->hs_db = NULL; + } + } + else { + msg_err_map ("regexp map is empty"); + } +#endif +} + +gchar * +rspamd_regexp_list_read_single ( + gchar *chunk, + gint len, + struct map_cb_data *data, + gboolean final) +{ + struct rspamd_regexp_map_helper *re_map; + + if (data->cur_data == NULL) { + re_map = rspamd_map_helper_new_regexp (data->map, 0); + data->cur_data = re_map; + } + + return rspamd_parse_kv_list ( + chunk, + len, + data, + rspamd_map_helper_insert_re, + hash_fill, + final); +} + +gchar * +rspamd_glob_list_read_single ( + gchar *chunk, + gint len, + struct map_cb_data *data, + gboolean final) +{ + struct rspamd_regexp_map_helper *re_map; + + if (data->cur_data == NULL) { + re_map = rspamd_map_helper_new_regexp (data->map, RSPAMD_REGEXP_MAP_FLAG_GLOB); + data->cur_data = re_map; + } + + return rspamd_parse_kv_list ( + chunk, + len, + data, + rspamd_map_helper_insert_re, + hash_fill, + final); +} + +gchar * +rspamd_regexp_list_read_multiple ( + gchar *chunk, + gint len, + struct map_cb_data *data, + gboolean final) +{ + struct rspamd_regexp_map_helper *re_map; + + if (data->cur_data == NULL) { + re_map = rspamd_map_helper_new_regexp (data->map, + RSPAMD_REGEXP_MAP_FLAG_MULTIPLE); + data->cur_data = re_map; + } + + return rspamd_parse_kv_list ( + chunk, + len, + data, + rspamd_map_helper_insert_re, + hash_fill, + final); +} + +gchar * +rspamd_glob_list_read_multiple ( + gchar *chunk, + gint len, + struct map_cb_data *data, + gboolean final) +{ + struct rspamd_regexp_map_helper *re_map; + + if (data->cur_data == NULL) { + re_map = rspamd_map_helper_new_regexp (data->map, + RSPAMD_REGEXP_MAP_FLAG_GLOB|RSPAMD_REGEXP_MAP_FLAG_MULTIPLE); + data->cur_data = re_map; + } + + return rspamd_parse_kv_list ( + chunk, + len, + data, + rspamd_map_helper_insert_re, + hash_fill, + final); +} + + +void +rspamd_regexp_list_fin (struct map_cb_data *data, void **target) +{ + struct rspamd_regexp_map_helper *re_map; + struct rspamd_map *map = data->map; + + if (data->cur_data) { + re_map = data->cur_data; + rspamd_re_map_finalize (re_map); + msg_info_map ("read regexp list of %ud elements", + re_map->regexps->len); + data->map->traverse_function = rspamd_map_helper_traverse_regexp; + data->map->nelts = kh_size (re_map->htb); + data->map->digest = rspamd_cryptobox_fast_hash_final (&re_map->hst); + } + + if (target) { + *target = data->cur_data; + } + + if (data->prev_data) { + rspamd_map_helper_destroy_regexp (data->prev_data); + } +} +void +rspamd_regexp_list_dtor (struct map_cb_data *data) +{ + if (data->cur_data) { + rspamd_map_helper_destroy_regexp (data->cur_data); + } +} + +#ifdef WITH_HYPERSCAN +static int +rspamd_match_hs_single_handler (unsigned int id, unsigned long long from, + unsigned long long to, + unsigned int flags, void *context) +{ + guint *i = context; + /* Always return non-zero as we need a single match here */ + + *i = id; + + return 1; +} +#endif + +gconstpointer +rspamd_match_regexp_map_single (struct rspamd_regexp_map_helper *map, + const gchar *in, gsize len) +{ + guint i; + rspamd_regexp_t *re; + gint res = 0; + gpointer ret = NULL; + struct rspamd_map_helper_value *val; + gboolean validated = FALSE; + + g_assert (in != NULL); + + if (map == NULL || len == 0 || map->regexps == NULL) { + return NULL; + } + + if (map->map_flags & RSPAMD_REGEXP_MAP_FLAG_UTF) { + if (rspamd_fast_utf8_validate (in, len) == 0) { + validated = TRUE; + } + } + else { + validated = TRUE; + } + +#ifdef WITH_HYPERSCAN + if (map->hs_db && map->hs_scratch) { + + if (validated) { + + res = hs_scan (map->hs_db, in, len, 0, map->hs_scratch, + rspamd_match_hs_single_handler, (void *)&i); + + if (res == HS_SCAN_TERMINATED) { + res = 1; + val = g_ptr_array_index (map->values, i); + + ret = val->value; + val->hits ++; + } + + return ret; + } + } +#endif + + if (!res) { + /* PCRE version */ + for (i = 0; i < map->regexps->len; i ++) { + re = g_ptr_array_index (map->regexps, i); + + if (rspamd_regexp_search (re, in, len, NULL, NULL, !validated, NULL)) { + val = g_ptr_array_index (map->values, i); + + ret = val->value; + val->hits ++; + break; + } + } + } + + return ret; +} + +#ifdef WITH_HYPERSCAN +struct rspamd_multiple_cbdata { + GPtrArray *ar; + struct rspamd_regexp_map_helper *map; +}; + +static int +rspamd_match_hs_multiple_handler (unsigned int id, unsigned long long from, + unsigned long long to, + unsigned int flags, void *context) +{ + struct rspamd_multiple_cbdata *cbd = context; + struct rspamd_map_helper_value *val; + + + if (id < cbd->map->values->len) { + val = g_ptr_array_index (cbd->map->values, id); + val->hits ++; + g_ptr_array_add (cbd->ar, val->value); + } + + /* Always return zero as we need all matches here */ + return 0; +} +#endif + +GPtrArray* +rspamd_match_regexp_map_all (struct rspamd_regexp_map_helper *map, + const gchar *in, gsize len) +{ + guint i; + rspamd_regexp_t *re; + GPtrArray *ret; + gint res = 0; + gboolean validated = FALSE; + struct rspamd_map_helper_value *val; + + if (map == NULL || map->regexps == NULL || len == 0) { + return NULL; + } + + g_assert (in != NULL); + + if (map->map_flags & RSPAMD_REGEXP_MAP_FLAG_UTF) { + if (rspamd_fast_utf8_validate (in, len) == 0) { + validated = TRUE; + } + } + else { + validated = TRUE; + } + + ret = g_ptr_array_new (); + +#ifdef WITH_HYPERSCAN + if (map->hs_db && map->hs_scratch) { + + if (validated) { + struct rspamd_multiple_cbdata cbd; + + cbd.ar = ret; + cbd.map = map; + + if (hs_scan (map->hs_db, in, len, 0, map->hs_scratch, + rspamd_match_hs_multiple_handler, &cbd) == HS_SUCCESS) { + res = 1; + } + } + } +#endif + + if (!res) { + /* PCRE version */ + for (i = 0; i < map->regexps->len; i ++) { + re = g_ptr_array_index (map->regexps, i); + + if (rspamd_regexp_search (re, in, len, NULL, NULL, + !validated, NULL)) { + val = g_ptr_array_index (map->values, i); + val->hits ++; + g_ptr_array_add (ret, val->value); + } + } + } + + if (ret->len > 0) { + return ret; + } + + g_ptr_array_free (ret, TRUE); + + return NULL; +} + +gconstpointer +rspamd_match_hash_map (struct rspamd_hash_map_helper *map, const gchar *in) +{ + khiter_t k; + struct rspamd_map_helper_value *val; + + if (map == NULL || map->htb == NULL) { + return NULL; + } + + k = kh_get (rspamd_map_hash, map->htb, in); + + if (k != kh_end (map->htb)) { + val = kh_value (map->htb, k); + val->hits ++; + + return val->value; + } + + return NULL; +} + +gconstpointer +rspamd_match_radix_map (struct rspamd_radix_map_helper *map, + const guchar *in, gsize inlen) +{ + struct rspamd_map_helper_value *val; + + if (map == NULL || map->trie == NULL) { + return NULL; + } + + val = (struct rspamd_map_helper_value *)radix_find_compressed (map->trie, + in, inlen); + + if (val != (gconstpointer)RADIX_NO_VALUE) { + val->hits ++; + + return val->value; + } + + return NULL; +} + +gconstpointer +rspamd_match_radix_map_addr (struct rspamd_radix_map_helper *map, + const rspamd_inet_addr_t *addr) +{ + struct rspamd_map_helper_value *val; + + if (map == NULL || map->trie == NULL) { + return NULL; + } + + val = (struct rspamd_map_helper_value *)radix_find_compressed_addr (map->trie, addr); + + if (val != (gconstpointer)RADIX_NO_VALUE) { + val->hits ++; + + return val->value; + } + + return NULL; +}
\ No newline at end of file diff --git a/src/libserver/maps/map_helpers.h b/src/libserver/maps/map_helpers.h new file mode 100644 index 000000000..4f7b5b804 --- /dev/null +++ b/src/libserver/maps/map_helpers.h @@ -0,0 +1,246 @@ +/*- + * Copyright 2018 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef RSPAMD_MAP_HELPERS_H +#define RSPAMD_MAP_HELPERS_H + +#include "config.h" +#include "map.h" +#include "addr.h" + +/** + * @file map_helpers.h + * + * Defines helper structures to deal with different map types + */ + + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Common structures, abstract for simplicity + */ +struct rspamd_radix_map_helper; +struct rspamd_hash_map_helper; +struct rspamd_regexp_map_helper; +struct rspamd_map_helper_value; + +enum rspamd_regexp_map_flags { + RSPAMD_REGEXP_MAP_FLAG_UTF = (1u << 0), + RSPAMD_REGEXP_MAP_FLAG_MULTIPLE = (1u << 1), + RSPAMD_REGEXP_MAP_FLAG_GLOB = (1u << 2), +}; + +typedef void (*insert_func) (gpointer st, gconstpointer key, + gconstpointer value); + +/** + * Radix list is a list like ip/mask + */ +gchar *rspamd_radix_read ( + gchar *chunk, + gint len, + struct map_cb_data *data, + gboolean final); + +void rspamd_radix_fin (struct map_cb_data *data, void **target); + +void rspamd_radix_dtor (struct map_cb_data *data); + +/** + * Kv list is an ordinal list of keys and values separated by whitespace + */ +gchar *rspamd_kv_list_read ( + gchar *chunk, + gint len, + struct map_cb_data *data, + gboolean final); + +void rspamd_kv_list_fin (struct map_cb_data *data, void **target); + +void rspamd_kv_list_dtor (struct map_cb_data *data); + +/** + * Regexp list is a list of regular expressions + */ + +gchar *rspamd_regexp_list_read_single ( + gchar *chunk, + gint len, + struct map_cb_data *data, + gboolean final); + +gchar *rspamd_regexp_list_read_multiple ( + gchar *chunk, + gint len, + struct map_cb_data *data, + gboolean final); + +gchar *rspamd_glob_list_read_single ( + gchar *chunk, + gint len, + struct map_cb_data *data, + gboolean final); + +gchar *rspamd_glob_list_read_multiple ( + gchar *chunk, + gint len, + struct map_cb_data *data, + gboolean final); + +void rspamd_regexp_list_fin (struct map_cb_data *data, void **target); + +void rspamd_regexp_list_dtor (struct map_cb_data *data); + +/** + * FSM for lists parsing (support comments, blank lines and partial replies) + */ +gchar * +rspamd_parse_kv_list ( + gchar *chunk, + gint len, + struct map_cb_data *data, + insert_func func, + const gchar *default_value, + gboolean final); + +/** + * Find a single (any) matching regexp for the specified text or NULL if + * no matches found + * @param map + * @param in + * @param len + * @return + */ +gconstpointer rspamd_match_regexp_map_single (struct rspamd_regexp_map_helper *map, + const gchar *in, gsize len); + +/** + * Find a multiple (all) matching regexp for the specified text or NULL if + * no matches found. Returns GPtrArray that *must* be freed by a caller if not NULL + * @param map + * @param in + * @param len + * @return + */ +GPtrArray *rspamd_match_regexp_map_all (struct rspamd_regexp_map_helper *map, + const gchar *in, gsize len); + +/** + * Find value matching specific key in a hash map + * @param map + * @param in + * @param len + * @return + */ +gconstpointer rspamd_match_hash_map (struct rspamd_hash_map_helper *map, + const gchar *in); + +/** + * Find value matching specific key in a hash map + * @param map + * @param in raw ip address + * @param inlen ip address length (4 for IPv4 and 16 for IPv6) + * @return + */ +gconstpointer rspamd_match_radix_map (struct rspamd_radix_map_helper *map, + const guchar *in, gsize inlen); + +gconstpointer rspamd_match_radix_map_addr (struct rspamd_radix_map_helper *map, + const rspamd_inet_addr_t *addr); + +/** + * Creates radix map helper + * @param map + * @return + */ +struct rspamd_radix_map_helper *rspamd_map_helper_new_radix (struct rspamd_map *map); + +/** + * Inserts new value into radix map + * @param st + * @param key + * @param value + */ +void rspamd_map_helper_insert_radix (gpointer st, gconstpointer key, gconstpointer value); + +/** + * Inserts new value into radix map performing synchronous resolving + * @param st + * @param key + * @param value + */ +void rspamd_map_helper_insert_radix_resolve (gpointer st, gconstpointer key, + gconstpointer value); + +/** + * Destroys radix map helper + * @param r + */ +void rspamd_map_helper_destroy_radix (struct rspamd_radix_map_helper *r); + + +/** + * Creates hash map helper + * @param map + * @return + */ +struct rspamd_hash_map_helper *rspamd_map_helper_new_hash (struct rspamd_map *map); + +/** + * Inserts a new value into a hash map + * @param st + * @param key + * @param value + */ +void rspamd_map_helper_insert_hash (gpointer st, gconstpointer key, gconstpointer value); + +/** + * Destroys hash map helper + * @param r + */ +void rspamd_map_helper_destroy_hash (struct rspamd_hash_map_helper *r); + +/** + * Create new regexp map + * @param map + * @param flags + * @return + */ +struct rspamd_regexp_map_helper *rspamd_map_helper_new_regexp (struct rspamd_map *map, + enum rspamd_regexp_map_flags flags); + +/** + * Inserts a new regexp into regexp map + * @param st + * @param key + * @param value + */ +void rspamd_map_helper_insert_re (gpointer st, gconstpointer key, gconstpointer value); + +/** + * Destroy regexp map + * @param re_map + */ +void rspamd_map_helper_destroy_regexp (struct rspamd_regexp_map_helper *re_map); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/libserver/maps/map_private.h b/src/libserver/maps/map_private.h new file mode 100644 index 000000000..347f63538 --- /dev/null +++ b/src/libserver/maps/map_private.h @@ -0,0 +1,219 @@ +/*- + * Copyright 2016 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef SRC_LIBUTIL_MAP_PRIVATE_H_ +#define SRC_LIBUTIL_MAP_PRIVATE_H_ + +#include "config.h" +#include "mem_pool.h" +#include "keypair.h" +#include "unix-std.h" +#include "map.h" +#include "ref.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef void (*rspamd_map_tmp_dtor) (gpointer p); + +extern guint rspamd_map_log_id; +#define msg_err_map(...) rspamd_default_log_function (G_LOG_LEVEL_CRITICAL, \ + "map", map->tag, \ + G_STRFUNC, \ + __VA_ARGS__) +#define msg_warn_map(...) rspamd_default_log_function (G_LOG_LEVEL_WARNING, \ + "map", map->tag, \ + G_STRFUNC, \ + __VA_ARGS__) +#define msg_info_map(...) rspamd_default_log_function (G_LOG_LEVEL_INFO, \ + "map", map->tag, \ + G_STRFUNC, \ + __VA_ARGS__) +#define msg_debug_map(...) rspamd_conditional_debug_fast (NULL, NULL, \ + rspamd_map_log_id, "map", map->tag, \ + G_STRFUNC, \ + __VA_ARGS__) + +enum fetch_proto { + MAP_PROTO_FILE, + MAP_PROTO_HTTP, + MAP_PROTO_HTTPS, + MAP_PROTO_STATIC +}; + +/** + * Data specific to file maps + */ +struct file_map_data { + gchar *filename; + gboolean need_modify; + ev_stat st_ev; +}; + + +struct http_map_data; + +struct rspamd_http_map_cached_cbdata { + ev_timer timeout; + struct ev_loop *event_loop; + struct rspamd_storage_shmem *shm; + struct rspamd_map *map; + struct http_map_data *data; + guint64 gen; + time_t last_checked; +}; + +struct rspamd_map_cachepoint { + gint available; + gsize len; + time_t last_modified; + gchar shmem_name[256]; +}; + +/** + * Data specific to HTTP maps + */ +struct http_map_data { + /* Shared cache data */ + struct rspamd_map_cachepoint *cache; + /* Non-shared for cache owner, used to cleanup cache */ + struct rspamd_http_map_cached_cbdata *cur_cache_cbd; + gchar *userinfo; + gchar *path; + gchar *host; + gchar *rest; + rspamd_fstring_t *etag; + time_t last_modified; + time_t last_checked; + gboolean request_sent; + guint64 gen; + guint16 port; +}; + +struct static_map_data { + guchar *data; + gsize len; + gboolean processed; +}; + +union rspamd_map_backend_data { + struct file_map_data *fd; + struct http_map_data *hd; + struct static_map_data *sd; +}; + +struct rspamd_map_backend { + enum fetch_proto protocol; + gboolean is_signed; + gboolean is_compressed; + gboolean is_fallback; + struct ev_loop *event_loop; + guint32 id; + struct rspamd_cryptobox_pubkey *trusted_pubkey; + union rspamd_map_backend_data data; + gchar *uri; + ref_entry_t ref; +}; + +struct map_periodic_cbdata; + +struct rspamd_map { + struct rspamd_dns_resolver *r; + struct rspamd_config *cfg; + GPtrArray *backends; + struct rspamd_map_backend *fallback_backend; + map_cb_t read_callback; + map_fin_cb_t fin_callback; + map_dtor_t dtor; + void **user_data; + struct ev_loop *event_loop; + struct rspamd_worker *wrk; + gchar *description; + gchar *name; + guint32 id; + struct map_periodic_cbdata *scheduled_check; + rspamd_map_tmp_dtor tmp_dtor; + gpointer tmp_dtor_data; + rspamd_map_traverse_function traverse_function; + gpointer lua_map; + gsize nelts; + guint64 digest; + /* Should we check HTTP or just load cached data */ + ev_tstamp timeout; + gdouble poll_timeout; + time_t next_check; + gboolean active_http; + gboolean non_trivial; /* E.g. has http backends in active mode */ + gboolean file_only; /* No HTTP backends found */ + gboolean static_only; /* No need to check */ + /* Shared lock for temporary disabling of map reading (e.g. when this map is written by UI) */ + gint *locked; + gchar tag[MEMPOOL_UID_LEN]; +}; + +enum rspamd_map_http_stage { + http_map_resolve_host2 = 0, /* 2 requests sent */ + http_map_resolve_host1, /* 1 requests sent */ + http_map_http_conn, /* http connection */ + http_map_terminated /* terminated when doing resolving */ +}; + +struct map_periodic_cbdata { + struct rspamd_map *map; + struct map_cb_data cbdata; + ev_timer ev; + gboolean need_modify; + gboolean errored; + gboolean locked; + guint cur_backend; + ref_entry_t ref; +}; + +static const gchar rspamd_http_file_magic[] = + {'r', 'm', 'c', 'd', '2', '0', '0', '0'}; + +struct rspamd_http_file_data { + guchar magic[sizeof (rspamd_http_file_magic)]; + goffset data_off; + gulong mtime; + gulong next_check; + gulong etag_len; +}; + +struct http_callback_data { + struct ev_loop *event_loop; + struct rspamd_http_connection *conn; + GPtrArray *addrs; + rspamd_inet_addr_t *addr; + struct rspamd_map *map; + struct rspamd_map_backend *bk; + struct http_map_data *data; + struct map_periodic_cbdata *periodic; + struct rspamd_cryptobox_pubkey *pk; + struct rspamd_storage_shmem *shmem_data; + gsize data_len; + gboolean check; + enum rspamd_map_http_stage stage; + ev_tstamp timeout; + + ref_entry_t ref; +}; + +#ifdef __cplusplus +} +#endif + +#endif /* SRC_LIBUTIL_MAP_PRIVATE_H_ */ |