/* * Copyright 2023 Vsevolod Stakhov * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "config.h" #include "rspamd.h" #include "message.h" #include "utlist.h" #include "libserver/http/http_private.h" #include "worker_private.h" #include "libserver/cfg_file_private.h" #include "libmime/scan_result_private.h" #include "lua/lua_common.h" #include "unix-std.h" #include "protocol_internal.h" #include "libserver/mempool_vars_internal.h" #include "contrib/fastutf8/fastutf8.h" #include "task.h" #include #ifdef SYS_ZSTD #include "zstd.h" #else #include "contrib/zstd/zstd.h" #endif INIT_LOG_MODULE(protocol) #define msg_err_protocol(...) rspamd_default_log_function(G_LOG_LEVEL_CRITICAL, \ "protocol", task->task_pool->tag.uid, \ G_STRFUNC, \ __VA_ARGS__) #define msg_warn_protocol(...) rspamd_default_log_function(G_LOG_LEVEL_WARNING, \ "protocol", task->task_pool->tag.uid, \ G_STRFUNC, \ __VA_ARGS__) #define msg_info_protocol(...) rspamd_default_log_function(G_LOG_LEVEL_INFO, \ "protocol", task->task_pool->tag.uid, \ G_STRFUNC, \ __VA_ARGS__) #define msg_debug_protocol(...) rspamd_conditional_debug_fast(NULL, NULL, \ rspamd_protocol_log_id, "protocol", task->task_pool->tag.uid, \ G_STRFUNC, \ __VA_ARGS__) static GQuark rspamd_protocol_quark(void) { return g_quark_from_static_string("protocol-error"); } /* * Remove <> from the fixed string and copy it to the pool */ static gchar * rspamd_protocol_escape_braces(struct rspamd_task *task, rspamd_ftok_t *in) { guint nchars = 0; const gchar *p; rspamd_ftok_t tok; gboolean has_obrace = FALSE; g_assert(in != NULL); g_assert(in->len > 0); p = in->begin; while ((g_ascii_isspace(*p) || *p == '<') && nchars < in->len) { if (*p == '<') { has_obrace = TRUE; } p++; nchars++; } tok.begin = p; p = in->begin + in->len - 1; tok.len = in->len - nchars; while (g_ascii_isspace(*p) && tok.len > 0) { p--; tok.len--; } if (has_obrace && *p == '>') { tok.len--; } return rspamd_mempool_ftokdup(task->task_pool, &tok); } #define COMPARE_CMD(str, cmd, len) (sizeof(cmd) - 1 == (len) && rspamd_lc_cmp((str), (cmd), (len)) == 0) static gboolean rspamd_protocol_handle_url(struct rspamd_task *task, struct rspamd_http_message *msg) { GHashTable *query_args; GHashTableIter it; struct http_parser_url u; const gchar *p; gsize pathlen; rspamd_ftok_t *key, *value; gpointer k, v; if (msg->url == NULL || msg->url->len == 0) { g_set_error(&task->err, rspamd_protocol_quark(), 400, "missing command"); return FALSE; } if (http_parser_parse_url(msg->url->str, msg->url->len, 0, &u) != 0) { g_set_error(&task->err, rspamd_protocol_quark(), 400, "bad request URL"); return FALSE; } if (!(u.field_set & (1 << UF_PATH))) { g_set_error(&task->err, rspamd_protocol_quark(), 400, "bad request URL: missing path"); return FALSE; } p = msg->url->str + u.field_data[UF_PATH].off; pathlen = u.field_data[UF_PATH].len; if (*p == '/') { p++; pathlen--; } switch (*p) { case 'c': case 'C': /* check */ if (COMPARE_CMD(p, MSG_CMD_CHECK_V2, pathlen)) { task->cmd = CMD_CHECK_V2; msg_debug_protocol("got checkv2 command"); } else if (COMPARE_CMD(p, MSG_CMD_CHECK, pathlen)) { task->cmd = CMD_CHECK; msg_debug_protocol("got check command"); } else { goto err; } break; case 's': case 'S': /* symbols, skip */ if (COMPARE_CMD(p, MSG_CMD_SYMBOLS, pathlen)) { task->cmd = CMD_CHECK; msg_debug_protocol("got symbols -> old check command"); } else if (COMPARE_CMD(p, MSG_CMD_SCAN, pathlen)) { task->cmd = CMD_CHECK; msg_debug_protocol("got scan -> old check command"); } else if (COMPARE_CMD(p, MSG_CMD_SKIP, pathlen)) { msg_debug_protocol("got skip command"); task->cmd = CMD_SKIP; } else { goto err; } break; case 'p': case 'P': /* ping, process */ if (COMPARE_CMD(p, MSG_CMD_PING, pathlen)) { msg_debug_protocol("got ping command"); task->cmd = CMD_PING; task->flags |= RSPAMD_TASK_FLAG_SKIP; task->processed_stages |= RSPAMD_TASK_STAGE_DONE; /* Skip all */ } else if (COMPARE_CMD(p, MSG_CMD_PROCESS, pathlen)) { msg_debug_protocol("got process -> old check command"); task->cmd = CMD_CHECK; } else { goto err; } break; case 'r': case 'R': /* report, report_ifspam */ if (COMPARE_CMD(p, MSG_CMD_REPORT, pathlen)) { msg_debug_protocol("got report -> old check command"); task->cmd = CMD_CHECK; } else if (COMPARE_CMD(p, MSG_CMD_REPORT_IFSPAM, pathlen)) { msg_debug_protocol("got reportifspam -> old check command"); task->cmd = CMD_CHECK; } else { goto err; } break; default: goto err; } if (u.field_set & (1u << UF_QUERY)) { /* In case if we have a query, we need to store it somewhere */ query_args = rspamd_http_message_parse_query(msg); /* Insert the rest of query params as HTTP headers */ g_hash_table_iter_init(&it, query_args); while (g_hash_table_iter_next(&it, &k, &v)) { gchar *key_cpy; key = k; value = v; key_cpy = rspamd_mempool_ftokdup(task->task_pool, key); rspamd_http_message_add_header_len(msg, key_cpy, value->begin, value->len); msg_debug_protocol("added header \"%T\" -> \"%T\" from HTTP query", key, value); } g_hash_table_unref(query_args); } return TRUE; err: g_set_error(&task->err, rspamd_protocol_quark(), 400, "invalid command"); return FALSE; } static void rspamd_protocol_process_recipients(struct rspamd_task *task, const rspamd_ftok_t *hdr) { enum { skip_spaces, quoted_string, normal_string, } state = skip_spaces; const gchar *p, *end, *start_addr; struct rspamd_email_address *addr; p = hdr->begin; end = hdr->begin + hdr->len; start_addr = NULL; while (p < end) { switch (state) { case skip_spaces: if (g_ascii_isspace(*p)) { p++; } else if (*p == '"') { start_addr = p; p++; state = quoted_string; } else { state = normal_string; start_addr = p; } break; case quoted_string: if (*p == '"') { state = normal_string; p++; } else if (*p == '\\') { /* Quoted pair */ p += 2; } else { p++; } break; case normal_string: if (*p == '"') { state = quoted_string; p++; } else if (*p == ',' && start_addr != NULL && p > start_addr) { /* We have finished address, check what we have */ addr = rspamd_email_address_from_smtp(start_addr, p - start_addr); if (addr) { if (task->rcpt_envelope == NULL) { task->rcpt_envelope = g_ptr_array_sized_new( 2); } g_ptr_array_add(task->rcpt_envelope, addr); } else { msg_err_protocol("bad rcpt address: '%*s'", (int) (p - start_addr), start_addr); task->flags |= RSPAMD_TASK_FLAG_BROKEN_HEADERS; } start_addr = NULL; p++; state = skip_spaces; } else { p++; } break; } } /* Check remainder */ if (start_addr && p > start_addr) { switch (state) { case normal_string: addr = rspamd_email_address_from_smtp(start_addr, end - start_addr); if (addr) { if (task->rcpt_envelope == NULL) { task->rcpt_envelope = g_ptr_array_sized_new( 2); } g_ptr_array_add(task->rcpt_envelope, addr); } else { msg_err_protocol("bad rcpt address: '%*s'", (int) (end - start_addr), start_addr); task->flags |= RSPAMD_TASK_FLAG_BROKEN_HEADERS; } break; case skip_spaces: /* Do nothing */ break; case quoted_string: default: msg_err_protocol("bad state when parsing rcpt address: '%*s'", (int) (end - start_addr), start_addr); task->flags |= RSPAMD_TASK_FLAG_BROKEN_HEADERS; } } } #define COMPARE_FLAG_LIT(lit) (len == sizeof(lit) - 1 && memcmp((lit), str, len) == 0) #define CHECK_PROTOCOL_FLAG(lit, fl) \ do { \ if (!known && COMPARE_FLAG_LIT(lit)) { \ task->protocol_flags |= (fl); \ known = TRUE; \ msg_debug_protocol("add protocol flag %s", lit); \ } \ } while (0) #define CHECK_TASK_FLAG(lit, fl) \ do { \ if (!known && COMPARE_FLAG_LIT(lit)) { \ task->flags |= (fl); \ known = TRUE; \ msg_debug_protocol("add task flag %s", lit); \ } \ } while (0) static void rspamd_protocol_handle_flag(struct rspamd_task *task, const gchar *str, gsize len) { gboolean known = FALSE; CHECK_TASK_FLAG("pass_all", RSPAMD_TASK_FLAG_PASS_ALL); CHECK_TASK_FLAG("no_log", RSPAMD_TASK_FLAG_NO_LOG); CHECK_TASK_FLAG("skip", RSPAMD_TASK_FLAG_SKIP); CHECK_TASK_FLAG("skip_process", RSPAMD_TASK_FLAG_SKIP_PROCESS); CHECK_TASK_FLAG("no_stat", RSPAMD_TASK_FLAG_NO_STAT); CHECK_TASK_FLAG("ssl", RSPAMD_TASK_FLAG_SSL); CHECK_TASK_FLAG("profile", RSPAMD_TASK_FLAG_PROFILE); CHECK_PROTOCOL_FLAG("milter", RSPAMD_TASK_PROTOCOL_FLAG_MILTER); CHECK_PROTOCOL_FLAG("zstd", RSPAMD_TASK_PROTOCOL_FLAG_COMPRESSED); CHECK_PROTOCOL_FLAG("ext_urls", RSPAMD_TASK_PROTOCOL_FLAG_EXT_URLS); CHECK_PROTOCOL_FLAG("body_block", RSPAMD_TASK_PROTOCOL_FLAG_BODY_BLOCK); CHECK_PROTOCOL_FLAG("groups", RSPAMD_TASK_PROTOCOL_FLAG_GROUPS); if (!known) { msg_warn_protocol("unknown flag: %*s", (gint) len, str); } } #undef COMPARE_FLAG #undef CHECK_PROTOCOL_FLAG static void rspamd_protocol_process_flags(struct rspamd_task *task, const rspamd_ftok_t *hdr) { enum { skip_spaces, read_flag, } state = skip_spaces; const gchar *p, *end, *start; p = hdr->begin; end = hdr->begin + hdr->len; start = NULL; while (p < end) { switch (state) { case skip_spaces: if (g_ascii_isspace(*p)) { p++; } else { state = read_flag; start = p; } break; case read_flag: if (*p == ',') { if (p > start) { rspamd_protocol_handle_flag(task, start, p - start); } start = NULL; state = skip_spaces; p++; } else { p++; } break; } } /* Check remainder */ if (start && end > start && state == read_flag) { rspamd_protocol_handle_flag(task, start, end - start); } } #define IF_HEADER(name) \ srch.begin = (name); \ srch.len = sizeof(name) - 1; \ if (rspamd_ftok_casecmp(hn_tok, &srch) == 0) gboolean rspamd_protocol_handle_headers(struct rspamd_task *task, struct rspamd_http_message *msg) { rspamd_ftok_t *hn_tok, *hv_tok, srch; gboolean has_ip = FALSE, seen_settings_header = FALSE; struct rspamd_http_header *header, *h; gchar *ntok; kh_foreach_value (msg->headers, header, { DL_FOREACH (header, h) { ntok = rspamd_mempool_ftokdup (task->task_pool, &h->name); hn_tok = rspamd_mempool_alloc (task->task_pool, sizeof (*hn_tok)); hn_tok->begin = ntok; hn_tok->len = h->name.len; ntok = rspamd_mempool_ftokdup (task->task_pool, &h->value); hv_tok = rspamd_mempool_alloc (task->task_pool, sizeof (*hv_tok)); hv_tok->begin = ntok; hv_tok->len = h->value.len; switch (*hn_tok->begin) { case 'd': case 'D': IF_HEADER(DELIVER_TO_HEADER) { task->deliver_to = rspamd_protocol_escape_braces(task, hv_tok); msg_debug_protocol("read deliver-to header, value: %s", task->deliver_to); } else { msg_debug_protocol("wrong header: %T", hn_tok); } break; case 'h': case 'H': IF_HEADER(HELO_HEADER) { task->helo = rspamd_mempool_ftokdup(task->task_pool, hv_tok); msg_debug_protocol("read helo header, value: %s", task->helo); } IF_HEADER(HOSTNAME_HEADER) { task->hostname = rspamd_mempool_ftokdup(task->task_pool, hv_tok); msg_debug_protocol("read hostname header, value: %s", task->hostname); } break; case 'f': case 'F': IF_HEADER(FROM_HEADER) { if (hv_tok->len == 0) { /* Replace '' with '<>' to fix parsing issue */ RSPAMD_FTOK_ASSIGN(hv_tok, "<>"); } task->from_envelope = rspamd_email_address_from_smtp( hv_tok->begin, hv_tok->len); msg_debug_protocol("read from header, value: %T", hv_tok); if (!task->from_envelope) { msg_err_protocol("bad from header: '%T'", hv_tok); task->flags |= RSPAMD_TASK_FLAG_BROKEN_HEADERS; } } IF_HEADER(FILENAME_HEADER) { task->msg.fpath = rspamd_mempool_ftokdup(task->task_pool, hv_tok); msg_debug_protocol("read filename header, value: %s", task->msg.fpath); } IF_HEADER(FLAGS_HEADER) { msg_debug_protocol("read flags header, value: %T", hv_tok); rspamd_protocol_process_flags(task, hv_tok); } break; case 'q': case 'Q': IF_HEADER(QUEUE_ID_HEADER) { task->queue_id = rspamd_mempool_ftokdup(task->task_pool, hv_tok); msg_debug_protocol("read queue_id header, value: %s", task->queue_id); } else { msg_debug_protocol("wrong header: %T", hn_tok); } break; case 'r': case 'R': IF_HEADER(RCPT_HEADER) { rspamd_protocol_process_recipients(task, hv_tok); msg_debug_protocol("read rcpt header, value: %T", hv_tok); } IF_HEADER(RAW_DATA_HEADER) { srch.begin = "yes"; srch.len = 3; msg_debug_protocol("read raw data header, value: %T", hv_tok); if (rspamd_ftok_casecmp(hv_tok, &srch) == 0) { task->flags &= ~RSPAMD_TASK_FLAG_MIME; msg_debug_protocol("disable mime parsing"); } } break; case 'i': case 'I': IF_HEADER(IP_ADDR_HEADER) { if (!rspamd_parse_inet_address(&task->from_addr, hv_tok->begin, hv_tok->len, RSPAMD_INET_ADDRESS_PARSE_DEFAULT)) { msg_err_protocol("bad ip header: '%T'", hv_tok); } else { msg_debug_protocol("read IP header, value: %T", hv_tok); has_ip = TRUE; } } else { msg_debug_protocol("wrong header: %T", hn_tok); } break; case 'p': case 'P': IF_HEADER(PASS_HEADER) { srch.begin = "all"; srch.len = 3; msg_debug_protocol("read pass header, value: %T", hv_tok); if (rspamd_ftok_casecmp(hv_tok, &srch) == 0) { task->flags |= RSPAMD_TASK_FLAG_PASS_ALL; msg_debug_protocol("pass all filters"); } } IF_HEADER(PROFILE_HEADER) { msg_debug_protocol("read profile header, value: %T", hv_tok); task->flags |= RSPAMD_TASK_FLAG_PROFILE; } break; case 's': case 'S': IF_HEADER(SETTINGS_ID_HEADER) { msg_debug_protocol("read settings-id header, value: %T", hv_tok); task->settings_elt = rspamd_config_find_settings_name_ref( task->cfg, hv_tok->begin, hv_tok->len); if (task->settings_elt == NULL) { GString *known_ids = g_string_new(NULL); struct rspamd_config_settings_elt *cur; DL_FOREACH(task->cfg->setting_ids, cur) { rspamd_printf_gstring(known_ids, "%s(%ud);", cur->name, cur->id); } msg_warn_protocol("unknown settings id: %T(%d); known_ids: %v", hv_tok, rspamd_config_name_to_id(hv_tok->begin, hv_tok->len), known_ids); g_string_free(known_ids, TRUE); } else { msg_debug_protocol("applied settings id %T -> %ud", hv_tok, task->settings_elt->id); } } IF_HEADER(SETTINGS_HEADER) { msg_debug_protocol("read settings header, value: %T", hv_tok); seen_settings_header = TRUE; } break; case 'u': case 'U': IF_HEADER(USER_HEADER) { /* * We must ignore User header in case of spamc, as SA has * different meaning of this header */ msg_debug_protocol("read user header, value: %T", hv_tok); if (!RSPAMD_TASK_IS_SPAMC(task)) { task->auth_user = rspamd_mempool_ftokdup(task->task_pool, hv_tok); } else { msg_info_protocol("ignore user header: legacy SA protocol"); } } IF_HEADER(URLS_HEADER) { msg_debug_protocol("read urls header, value: %T", hv_tok); srch.begin = "extended"; srch.len = 8; if (rspamd_ftok_casecmp(hv_tok, &srch) == 0) { task->protocol_flags |= RSPAMD_TASK_PROTOCOL_FLAG_EXT_URLS; msg_debug_protocol("extended urls information"); } /* TODO: add more formats there */ } IF_HEADER(USER_AGENT_HEADER) { msg_debug_protocol("read user-agent header, value: %T", hv_tok); if (hv_tok->len == 6 && rspamd_lc_cmp(hv_tok->begin, "rspamc", 6) == 0) { task->protocol_flags |= RSPAMD_TASK_PROTOCOL_FLAG_LOCAL_CLIENT; } } break; case 'l': case 'L': IF_HEADER(NO_LOG_HEADER) { msg_debug_protocol("read log header, value: %T", hv_tok); srch.begin = "no"; srch.len = 2; if (rspamd_ftok_casecmp(hv_tok, &srch) == 0) { task->flags |= RSPAMD_TASK_FLAG_NO_LOG; } } break; case 'm': case 'M': IF_HEADER(MLEN_HEADER) { msg_debug_protocol("read message length header, value: %T", hv_tok); task->protocol_flags |= RSPAMD_TASK_PROTOCOL_FLAG_HAS_CONTROL; } IF_HEADER(MTA_TAG_HEADER) { gchar *mta_tag; mta_tag = rspamd_mempool_ftokdup(task->task_pool, hv_tok); rspamd_mempool_set_variable(task->task_pool, RSPAMD_MEMPOOL_MTA_TAG, mta_tag, NULL); msg_debug_protocol("read MTA-Tag header, value: %s", mta_tag); } IF_HEADER(MTA_NAME_HEADER) { gchar *mta_name; mta_name = rspamd_mempool_ftokdup(task->task_pool, hv_tok); rspamd_mempool_set_variable(task->task_pool, RSPAMD_MEMPOOL_MTA_NAME, mta_name, NULL); msg_debug_protocol("read MTA-Name header, value: %s", mta_name); } IF_HEADER(MILTER_HEADER) { task->protocol_flags |= RSPAMD_TASK_PROTOCOL_FLAG_MILTER; msg_debug_protocol("read Milter header, value: %T", hv_tok); } break; case 't': case 'T': IF_HEADER(TLS_CIPHER_HEADER) { task->flags |= RSPAMD_TASK_FLAG_SSL; msg_debug_protocol("read TLS cipher header, value: %T", hv_tok); } break; default: msg_debug_protocol("generic header: %T", hn_tok); break; } rspamd_task_add_request_header (task, hn_tok, hv_tok); } }); /* End of kh_foreach_value */ if (seen_settings_header && task->settings_elt) { msg_warn_task("ignore settings id %s as settings header is also presented", task->settings_elt->name); REF_RELEASE(task->settings_elt); task->settings_elt = NULL; } if (!has_ip) { task->flags |= RSPAMD_TASK_FLAG_NO_IP; } return TRUE; } #define BOOL_TO_FLAG(val, flags, flag) \ do { \ if ((val)) (flags) |= (flag); \ else \ (flags) &= ~(flag); \ } while (0) gboolean rspamd_protocol_parse_task_flags(rspamd_mempool_t *pool, const ucl_object_t *obj, gpointer ud, struct rspamd_rcl_section *section, GError **err) { struct rspamd_rcl_struct_parser *pd = ud; gint *target; const gchar *key; gboolean value; target = (gint *) (((gchar *) pd->user_struct) + pd->offset); key = ucl_object_key(obj); value = ucl_object_toboolean(obj); if (key != NULL) { if (g_ascii_strcasecmp(key, "pass_all") == 0) { BOOL_TO_FLAG(value, *target, RSPAMD_TASK_FLAG_PASS_ALL); } else if (g_ascii_strcasecmp(key, "no_log") == 0) { BOOL_TO_FLAG(value, *target, RSPAMD_TASK_FLAG_NO_LOG); } } return TRUE; } static struct rspamd_rcl_sections_map *control_parser = NULL; RSPAMD_CONSTRUCTOR(rspamd_protocol_control_parser_ctor) { struct rspamd_rcl_section *sub = rspamd_rcl_add_section(&control_parser, NULL, "*", NULL, NULL, UCL_OBJECT, FALSE, TRUE); /* Default handlers */ rspamd_rcl_add_default_handler(sub, "ip", rspamd_rcl_parse_struct_addr, G_STRUCT_OFFSET(struct rspamd_task, from_addr), 0, NULL); rspamd_rcl_add_default_handler(sub, "from", rspamd_rcl_parse_struct_mime_addr, G_STRUCT_OFFSET(struct rspamd_task, from_envelope), 0, NULL); rspamd_rcl_add_default_handler(sub, "rcpt", rspamd_rcl_parse_struct_mime_addr, G_STRUCT_OFFSET(struct rspamd_task, rcpt_envelope), 0, NULL); rspamd_rcl_add_default_handler(sub, "helo", rspamd_rcl_parse_struct_string, G_STRUCT_OFFSET(struct rspamd_task, helo), 0, NULL); rspamd_rcl_add_default_handler(sub, "user", rspamd_rcl_parse_struct_string, G_STRUCT_OFFSET(struct rspamd_task, auth_user), 0, NULL); rspamd_rcl_add_default_handler(sub, "pass_all", rspamd_protocol_parse_task_flags, G_STRUCT_OFFSET(struct rspamd_task, flags), 0, NULL); rspamd_rcl_add_default_handler(sub, "json", rspamd_protocol_parse_task_flags, G_STRUCT_OFFSET(struct rspamd_task, flags), 0, NULL); } RSPAMD_DESTRUCTOR(rspamd_protocol_control_parser_dtor) { rspamd_rcl_sections_free(control_parser); } gboolean rspamd_protocol_handle_control(struct rspamd_task *task, const ucl_object_t *control) { GError *err = NULL; if (!rspamd_rcl_parse(control_parser, task->cfg, task, task->task_pool, control, &err)) { msg_warn_protocol("cannot parse control block: %e", err); g_error_free(err); return FALSE; } return TRUE; } gboolean rspamd_protocol_handle_request(struct rspamd_task *task, struct rspamd_http_message *msg) { gboolean ret = TRUE; if (msg->method == HTTP_SYMBOLS) { msg_debug_protocol("got legacy SYMBOLS method, enable rspamc protocol workaround"); task->cmd = CMD_CHECK_RSPAMC; } else if (msg->method == HTTP_CHECK) { msg_debug_protocol("got legacy CHECK method, enable rspamc protocol workaround"); task->cmd = CMD_CHECK_RSPAMC; } else { ret = rspamd_protocol_handle_url(task, msg); } if (msg->flags & RSPAMD_HTTP_FLAG_SPAMC) { msg_debug_protocol("got legacy SA input, enable spamc protocol workaround"); task->cmd = CMD_CHECK_SPAMC; } return ret; } /* Structure for writing tree data */ struct tree_cb_data { ucl_object_t *top; khash_t(rspamd_url_host_hash) * seen; struct rspamd_task *task; }; static ucl_object_t * rspamd_protocol_extended_url(struct rspamd_task *task, struct rspamd_url *url, const gchar *encoded, gsize enclen) { ucl_object_t *obj, *elt; obj = ucl_object_typed_new(UCL_OBJECT); elt = ucl_object_fromstring_common(encoded, enclen, 0); ucl_object_insert_key(obj, elt, "url", 0, false); if (url->tldlen > 0) { elt = ucl_object_fromstring_common(rspamd_url_tld_unsafe(url), url->tldlen, 0); ucl_object_insert_key(obj, elt, "tld", 0, false); } if (url->hostlen > 0) { elt = ucl_object_fromstring_common(rspamd_url_host_unsafe(url), url->hostlen, 0); ucl_object_insert_key(obj, elt, "host", 0, false); } ucl_object_t *flags = ucl_object_typed_new(UCL_ARRAY); for (unsigned int i = 0; i < RSPAMD_URL_MAX_FLAG_SHIFT; i++) { if (url->flags & (1u << i)) { ucl_object_t *fl = ucl_object_fromstring(rspamd_url_flag_to_string(1u << i)); ucl_array_append(flags, fl); } } ucl_object_insert_key(obj, flags, "flags", 0, false); if (url->ext && url->ext->linked_url) { encoded = rspamd_url_encode(url->ext->linked_url, &enclen, task->task_pool); elt = rspamd_protocol_extended_url(task, url->ext->linked_url, encoded, enclen); ucl_object_insert_key(obj, elt, "linked_url", 0, false); } return obj; } /* * Callback for writing urls */ static void urls_protocol_cb(struct rspamd_url *url, struct tree_cb_data *cb) { ucl_object_t *obj; struct rspamd_task *task = cb->task; const gchar *user_field = "unknown", *encoded = NULL; gboolean has_user = FALSE; guint len = 0; gsize enclen = 0; if (!(task->protocol_flags & RSPAMD_TASK_PROTOCOL_FLAG_EXT_URLS)) { if (url->hostlen > 0) { if (rspamd_url_host_set_has(cb->seen, url)) { return; } goffset err_offset; if ((err_offset = rspamd_fast_utf8_validate(rspamd_url_host_unsafe(url), url->hostlen)) == 0) { obj = ucl_object_fromstring_common(rspamd_url_host_unsafe(url), url->hostlen, 0); } else { obj = ucl_object_fromstring_common(rspamd_url_host_unsafe(url), err_offset - 1, 0); } } else { return; } rspamd_url_host_set_add(cb->seen, url); } else { encoded = rspamd_url_encode(url, &enclen, task->task_pool); obj = rspamd_protocol_extended_url(task, url, encoded, enclen); } ucl_array_append(cb->top, obj); if (cb->task->cfg->log_urls) { if (task->auth_user) { user_field = task->auth_user; len = strlen(task->auth_user); has_user = TRUE; } else if (task->from_envelope) { user_field = task->from_envelope->addr; len = task->from_envelope->addr_len; } if (!encoded) { encoded = rspamd_url_encode(url, &enclen, task->task_pool); } msg_notice_task_encrypted("<%s> %s: %*s; ip: %s; URL: %*s", MESSAGE_FIELD_CHECK(task, message_id), has_user ? "user" : "from", len, user_field, rspamd_inet_address_to_string(task->from_addr), (gint) enclen, encoded); } } static ucl_object_t * rspamd_urls_tree_ucl(khash_t(rspamd_url_hash) * set, struct rspamd_task *task) { struct tree_cb_data cb; ucl_object_t *obj; struct rspamd_url *u; obj = ucl_object_typed_new(UCL_ARRAY); cb.top = obj; cb.task = task; cb.seen = kh_init(rspamd_url_host_hash); kh_foreach_key(set, u, { if (!(u->protocol & PROTOCOL_MAILTO)) { urls_protocol_cb(u, &cb); } }); kh_destroy(rspamd_url_host_hash, cb.seen); return obj; } static void emails_protocol_cb(struct rspamd_url *url, struct tree_cb_data *cb) { ucl_object_t *obj; if (url->userlen > 0 && url->hostlen > 0) { obj = ucl_object_fromlstring(rspamd_url_user_unsafe(url), url->userlen + url->hostlen + 1); ucl_array_append(cb->top, obj); } } static ucl_object_t * rspamd_emails_tree_ucl(khash_t(rspamd_url_hash) * set, struct rspamd_task *task) { struct tree_cb_data cb; ucl_object_t *obj; struct rspamd_url *u; obj = ucl_object_typed_new(UCL_ARRAY); cb.top = obj; cb.task = task; kh_foreach_key(set, u, { if ((u->protocol & PROTOCOL_MAILTO)) { emails_protocol_cb(u, &cb); } }); return obj; } /* Write new subject */ static const gchar * rspamd_protocol_rewrite_subject(struct rspamd_task *task) { GString *subj_buf; gchar *res; const gchar *s, *c, *p; gsize slen = 0; c = rspamd_mempool_get_variable(task->task_pool, "metric_subject"); if (c == NULL) { c = task->cfg->subject; } if (c == NULL) { c = SPAM_SUBJECT; } p = c; s = MESSAGE_FIELD_CHECK(task, subject); if (s) { slen = strlen(s); } subj_buf = g_string_sized_new(strlen(c) + slen); while (*p) { if (*p == '%') { switch (p[1]) { case 's': g_string_append_len(subj_buf, c, p - c); if (s) { g_string_append_len(subj_buf, s, slen); } c = p + 2; p += 2; break; case 'd': g_string_append_len(subj_buf, c, p - c); rspamd_printf_gstring(subj_buf, "%.2f", task->result->score); c = p + 2; p += 2; break; case '%': g_string_append_len(subj_buf, c, p - c); g_string_append_c(subj_buf, '%'); c = p + 2; p += 2; break; default: p++; /* Just % something unknown */ break; } } else { p++; } } if (p > c) { g_string_append_len(subj_buf, c, p - c); } res = rspamd_mime_header_encode(subj_buf->str, subj_buf->len); rspamd_mempool_add_destructor(task->task_pool, (rspamd_mempool_destruct_t) g_free, res); g_string_free(subj_buf, TRUE); return res; } static ucl_object_t * rspamd_metric_symbol_ucl(struct rspamd_task *task, struct rspamd_symbol_result *sym) { ucl_object_t *obj = NULL, *ar; const gchar *description = NULL; struct rspamd_symbol_option *opt; if (sym->sym != NULL) { description = sym->sym->description; } obj = ucl_object_typed_new(UCL_OBJECT); ucl_object_insert_key(obj, ucl_object_fromstring(sym->name), "name", 0, false); ucl_object_insert_key(obj, ucl_object_fromdouble(sym->score), "score", 0, false); if (task->cmd == CMD_CHECK_V2) { if (sym->sym) { ucl_object_insert_key(obj, ucl_object_fromdouble(sym->sym->score), "metric_score", 0, false); } else { ucl_object_insert_key(obj, ucl_object_fromdouble(0.0), "metric_score", 0, false); } } if (description) { ucl_object_insert_key(obj, ucl_object_fromstring(description), "description", 0, false); } if (sym->options != NULL) { ar = ucl_object_typed_new(UCL_ARRAY); DL_FOREACH(sym->opts_head, opt) { ucl_array_append(ar, ucl_object_fromstring_common(opt->option, opt->optlen, 0)); } ucl_object_insert_key(obj, ar, "options", 0, false); } return obj; } static ucl_object_t * rspamd_metric_group_ucl(struct rspamd_task *task, struct rspamd_symbols_group *gr, gdouble score) { ucl_object_t *obj = NULL; obj = ucl_object_typed_new(UCL_OBJECT); ucl_object_insert_key(obj, ucl_object_fromdouble(score), "score", 0, false); if (gr->description) { ucl_object_insert_key(obj, ucl_object_fromstring(gr->description), "description", 0, false); } return obj; } static ucl_object_t * rspamd_scan_result_ucl(struct rspamd_task *task, struct rspamd_scan_result *mres, ucl_object_t *top) { struct rspamd_symbol_result *sym; gboolean is_spam; struct rspamd_action *action; ucl_object_t *obj = NULL, *sobj; const gchar *subject; struct rspamd_passthrough_result *pr = NULL; action = rspamd_check_action_metric(task, &pr, NULL); is_spam = !(action->flags & RSPAMD_ACTION_HAM); if (task->cmd == CMD_CHECK) { obj = ucl_object_typed_new(UCL_OBJECT); ucl_object_insert_key(obj, ucl_object_frombool(is_spam), "is_spam", 0, false); } else { obj = top; } if (pr) { if (pr->message && !(pr->flags & RSPAMD_PASSTHROUGH_NO_SMTP_MESSAGE)) { /* Add smtp message if it does not exist: see #3269 for details */ if (ucl_object_lookup(task->messages, "smtp_message") == NULL) { ucl_object_insert_key(task->messages, ucl_object_fromstring_common(pr->message, 0, UCL_STRING_RAW), "smtp_message", 0, false); } } ucl_object_insert_key(obj, ucl_object_fromstring(pr->module), "passthrough_module", 0, false); } ucl_object_insert_key(obj, ucl_object_frombool(RSPAMD_TASK_IS_SKIPPED(task)), "is_skipped", 0, false); if (!isnan(mres->score)) { ucl_object_insert_key(obj, ucl_object_fromdouble(mres->score), "score", 0, false); } else { ucl_object_insert_key(obj, ucl_object_fromdouble(0.0), "score", 0, false); } ucl_object_insert_key(obj, ucl_object_fromdouble(rspamd_task_get_required_score(task, mres)), "required_score", 0, false); ucl_object_insert_key(obj, ucl_object_fromstring(action->name), "action", 0, false); if (action->action_type == METRIC_ACTION_REWRITE_SUBJECT) { subject = rspamd_protocol_rewrite_subject(task); if (subject) { ucl_object_insert_key(obj, ucl_object_fromstring(subject), "subject", 0, false); } } if (action->flags & RSPAMD_ACTION_MILTER) { /* Treat milter action specially */ if (action->action_type == METRIC_ACTION_DISCARD) { ucl_object_insert_key(obj, ucl_object_fromstring("discard"), "reject", 0, false); } else if (action->action_type == METRIC_ACTION_QUARANTINE) { ucl_object_insert_key(obj, ucl_object_fromstring("quarantine"), "reject", 0, false); } } /* Now handle symbols */ if (task->cmd != CMD_CHECK) { /* Insert actions thresholds */ ucl_object_t *actions_obj = ucl_object_typed_new(UCL_OBJECT); for (int i = task->result->nactions - 1; i >= 0; i--) { struct rspamd_action_config *action_lim = &task->result->actions_config[i]; if (!isnan(action_lim->cur_limit) && !(action_lim->action->flags & (RSPAMD_ACTION_NO_THRESHOLD | RSPAMD_ACTION_HAM))) { ucl_object_insert_key(actions_obj, ucl_object_fromdouble(action_lim->cur_limit), action_lim->action->name, 0, true); } } ucl_object_insert_key(obj, actions_obj, "thresholds", 0, false); /* For checkv2 we insert symbols as a separate object */ obj = ucl_object_typed_new(UCL_OBJECT); } kh_foreach_value(mres->symbols, sym, { if (!(sym->flags & RSPAMD_SYMBOL_RESULT_IGNORED)) { sobj = rspamd_metric_symbol_ucl(task, sym); ucl_object_insert_key(obj, sobj, sym->name, 0, false); } }) if (task->cmd != CMD_CHECK) { /* For checkv2 we insert symbols as a separate object */ ucl_object_insert_key(top, obj, "symbols", 0, false); } else { /* For legacy check we just insert it as "default" all together */ ucl_object_insert_key(top, obj, DEFAULT_METRIC, 0, false); } /* Handle groups if needed */ if (task->protocol_flags & RSPAMD_TASK_PROTOCOL_FLAG_GROUPS) { struct rspamd_symbols_group *gr; gdouble gr_score; obj = ucl_object_typed_new(UCL_OBJECT); ucl_object_reserve(obj, kh_size(mres->sym_groups)); kh_foreach(mres->sym_groups, gr, gr_score, { if (task->cfg->public_groups_only && !(gr->flags & RSPAMD_SYMBOL_GROUP_PUBLIC)) { continue; } sobj = rspamd_metric_group_ucl(task, gr, gr_score); ucl_object_insert_key(obj, sobj, gr->name, 0, false); }); ucl_object_insert_key(top, obj, "groups", 0, false); } return obj; } void rspamd_ucl_torspamc_output(const ucl_object_t *top, rspamd_fstring_t **out) { const ucl_object_t *symbols, *score, *required_score, *is_spam, *elt, *cur; ucl_object_iter_t iter = NULL; score = ucl_object_lookup(top, "score"); required_score = ucl_object_lookup(top, "required_score"); is_spam = ucl_object_lookup(top, "is_spam"); rspamd_printf_fstring(out, "Metric: default; %s; %.2f / %.2f / 0.0\r\n", ucl_object_toboolean(is_spam) ? "True" : "False", ucl_object_todouble(score), ucl_object_todouble(required_score)); elt = ucl_object_lookup(top, "action"); if (elt != NULL) { rspamd_printf_fstring(out, "Action: %s\r\n", ucl_object_tostring(elt)); } elt = ucl_object_lookup(top, "subject"); if (elt != NULL) { rspamd_printf_fstring(out, "Subject: %s\r\n", ucl_object_tostring(elt)); } symbols = ucl_object_lookup(top, "symbols"); if (symbols != NULL) { iter = NULL; while ((elt = ucl_object_iterate(symbols, &iter, true)) != NULL) { if (elt->type == UCL_OBJECT) { const ucl_object_t *sym_score; sym_score = ucl_object_lookup(elt, "score"); rspamd_printf_fstring(out, "Symbol: %s(%.2f)\r\n", ucl_object_key(elt), ucl_object_todouble(sym_score)); } } } elt = ucl_object_lookup(top, "messages"); if (elt != NULL) { iter = NULL; while ((cur = ucl_object_iterate(elt, &iter, true)) != NULL) { if (cur->type == UCL_STRING) { rspamd_printf_fstring(out, "Message: %s\r\n", ucl_object_tostring(cur)); } } } elt = ucl_object_lookup(top, "message-id"); if (elt != NULL) { rspamd_printf_fstring(out, "Message-ID: %s\r\n", ucl_object_tostring(elt)); } } void rspamd_ucl_tospamc_output(const ucl_object_t *top, rspamd_fstring_t **out) { const ucl_object_t *symbols, *score, *required_score, *is_spam, *elt; ucl_object_iter_t iter = NULL; rspamd_fstring_t *f; score = ucl_object_lookup(top, "score"); required_score = ucl_object_lookup(top, "required_score"); is_spam = ucl_object_lookup(top, "is_spam"); rspamd_printf_fstring(out, "Spam: %s ; %.2f / %.2f\r\n\r\n", ucl_object_toboolean(is_spam) ? "True" : "False", ucl_object_todouble(score), ucl_object_todouble(required_score)); symbols = ucl_object_lookup(top, "symbols"); if (symbols != NULL) { while ((elt = ucl_object_iterate(symbols, &iter, true)) != NULL) { if (elt->type == UCL_OBJECT) { rspamd_printf_fstring(out, "%s,", ucl_object_key(elt)); } } /* Ugly hack, but the whole spamc is ugly */ f = *out; if (f->str[f->len - 1] == ',') { f->len--; *out = rspamd_fstring_append(*out, CRLF, 2); } } } static void rspamd_protocol_output_profiling(struct rspamd_task *task, ucl_object_t *top) { GHashTable *tbl; GHashTableIter it; gpointer k, v; ucl_object_t *prof; gdouble val; prof = ucl_object_typed_new(UCL_OBJECT); tbl = rspamd_mempool_get_variable(task->task_pool, "profile"); if (tbl) { g_hash_table_iter_init(&it, tbl); while (g_hash_table_iter_next(&it, &k, &v)) { val = *(gdouble *) v; ucl_object_insert_key(prof, ucl_object_fromdouble(val), (const char *) k, 0, false); } } ucl_object_insert_key(top, prof, "profile", 0, false); } ucl_object_t * rspamd_protocol_write_ucl(struct rspamd_task *task, enum rspamd_protocol_flags flags) { ucl_object_t *top = NULL; GString *dkim_sig; GList *dkim_sigs; const ucl_object_t *milter_reply; rspamd_task_set_finish_time(task); top = ucl_object_typed_new(UCL_OBJECT); rspamd_mempool_add_destructor(task->task_pool, (rspamd_mempool_destruct_t) ucl_object_unref, top); if (flags & RSPAMD_PROTOCOL_METRICS) { rspamd_scan_result_ucl(task, task->result, top); } if (flags & RSPAMD_PROTOCOL_MESSAGES) { if (G_UNLIKELY(task->cfg->compat_messages)) { const ucl_object_t *cur; ucl_object_t *msg_object; ucl_object_iter_t iter = NULL; msg_object = ucl_object_typed_new(UCL_ARRAY); while ((cur = ucl_object_iterate(task->messages, &iter, true)) != NULL) { if (cur->type == UCL_STRING) { ucl_array_append(msg_object, ucl_object_ref(cur)); } } ucl_object_insert_key(top, msg_object, "messages", 0, false); } else { ucl_object_insert_key(top, ucl_object_ref(task->messages), "messages", 0, false); } } if (flags & RSPAMD_PROTOCOL_URLS && task->message) { if (kh_size(MESSAGE_FIELD(task, urls)) > 0) { ucl_object_insert_key(top, rspamd_urls_tree_ucl(MESSAGE_FIELD(task, urls), task), "urls", 0, false); ucl_object_insert_key(top, rspamd_emails_tree_ucl(MESSAGE_FIELD(task, urls), task), "emails", 0, false); } } if (flags & RSPAMD_PROTOCOL_EXTRA) { if (G_UNLIKELY(RSPAMD_TASK_IS_PROFILING(task))) { rspamd_protocol_output_profiling(task, top); } } if (flags & RSPAMD_PROTOCOL_BASIC) { ucl_object_insert_key(top, ucl_object_fromstring(MESSAGE_FIELD_CHECK(task, message_id)), "message-id", 0, false); ucl_object_insert_key(top, ucl_object_fromdouble(task->time_real_finish - task->task_timestamp), "time_real", 0, false); } if (flags & RSPAMD_PROTOCOL_DKIM) { dkim_sigs = rspamd_mempool_get_variable(task->task_pool, RSPAMD_MEMPOOL_DKIM_SIGNATURE); if (dkim_sigs) { if (dkim_sigs->next) { /* Multiple DKIM signatures */ ucl_object_t *ar = ucl_object_typed_new(UCL_ARRAY); for (; dkim_sigs != NULL; dkim_sigs = dkim_sigs->next) { GString *folded_header; dkim_sig = (GString *) dkim_sigs->data; if (task->protocol_flags & RSPAMD_TASK_PROTOCOL_FLAG_MILTER || !task->message) { folded_header = rspamd_header_value_fold( "DKIM-Signature", strlen("DKIM-Signature"), dkim_sig->str, dkim_sig->len, 80, RSPAMD_TASK_NEWLINES_LF, NULL); } else { folded_header = rspamd_header_value_fold( "DKIM-Signature", strlen("DKIM-Signature"), dkim_sig->str, dkim_sig->len, 80, MESSAGE_FIELD(task, nlines_type), NULL); } ucl_array_append(ar, ucl_object_fromstring_common(folded_header->str, folded_header->len, UCL_STRING_RAW)); g_string_free(folded_header, TRUE); } ucl_object_insert_key(top, ar, "dkim-signature", 0, false); } else { /* Single DKIM signature */ GString *folded_header; dkim_sig = (GString *) dkim_sigs->data; if (task->protocol_flags & RSPAMD_TASK_PROTOCOL_FLAG_MILTER) { folded_header = rspamd_header_value_fold( "DKIM-Signature", strlen("DKIM-Signature"), dkim_sig->str, dkim_sig->len, 80, RSPAMD_TASK_NEWLINES_LF, NULL); } else { folded_header = rspamd_header_value_fold( "DKIM-Signature", strlen("DKIM-Signature"), dkim_sig->str, dkim_sig->len, 80, MESSAGE_FIELD(task, nlines_type), NULL); } ucl_object_insert_key(top, ucl_object_fromstring_common(folded_header->str, folded_header->len, UCL_STRING_RAW), "dkim-signature", 0, false); g_string_free(folded_header, TRUE); } } } if (flags & RSPAMD_PROTOCOL_RMILTER) { milter_reply = rspamd_mempool_get_variable(task->task_pool, RSPAMD_MEMPOOL_MILTER_REPLY); if (milter_reply) { if (task->cmd != CMD_CHECK) { ucl_object_insert_key(top, ucl_object_ref(milter_reply), "milter", 0, false); } else { ucl_object_insert_key(top, ucl_object_ref(milter_reply), "rmilter", 0, false); } } } return top; } void rspamd_protocol_http_reply(struct rspamd_http_message *msg, struct rspamd_task *task, ucl_object_t **pobj) { struct rspamd_scan_result *metric_res; const struct rspamd_re_cache_stat *restat; ucl_object_t *top = NULL; rspamd_fstring_t *reply; gint flags = RSPAMD_PROTOCOL_DEFAULT; struct rspamd_action *action; /* Removed in 2.0 */ #if 0 GHashTableIter hiter; gpointer h, v; /* Write custom headers */ g_hash_table_iter_init (&hiter, task->reply_headers); while (g_hash_table_iter_next (&hiter, &h, &v)) { rspamd_ftok_t *hn = h, *hv = v; rspamd_http_message_add_header (msg, hn->begin, hv->begin); } #endif flags |= RSPAMD_PROTOCOL_URLS; top = rspamd_protocol_write_ucl(task, flags); if (pobj) { *pobj = top; } if (!(task->flags & RSPAMD_TASK_FLAG_NO_LOG)) { rspamd_roll_history_update(task->worker->srv->history, task); } else { msg_debug_protocol("skip history update due to no log flag"); } rspamd_task_write_log(task); if (task->cfg->log_flags & RSPAMD_LOG_FLAG_RE_CACHE) { restat = rspamd_re_cache_get_stat(task->re_rt); g_assert(restat != NULL); msg_notice_task( "regexp statistics: %ud pcre regexps scanned, %ud regexps matched," " %ud regexps total, %ud regexps cached," " %HL scanned using pcre, %HL scanned total", restat->regexp_checked, restat->regexp_matched, restat->regexp_total, restat->regexp_fast_cached, restat->bytes_scanned_pcre, restat->bytes_scanned); } reply = rspamd_fstring_sized_new(1000); if (msg->method < HTTP_SYMBOLS && !RSPAMD_TASK_IS_SPAMC(task)) { msg_debug_protocol("writing json reply"); rspamd_ucl_emit_fstring(top, UCL_EMIT_JSON_COMPACT, &reply); } else { if (RSPAMD_TASK_IS_SPAMC(task)) { msg_debug_protocol("writing spamc legacy reply to client"); rspamd_ucl_tospamc_output(top, &reply); } else { msg_debug_protocol("writing rspamc legacy reply to client"); rspamd_ucl_torspamc_output(top, &reply); } } if (task->protocol_flags & RSPAMD_TASK_PROTOCOL_FLAG_BODY_BLOCK) { /* Check if we need to insert a body block */ if (task->flags & RSPAMD_TASK_FLAG_MESSAGE_REWRITE) { GString *hdr_offset = g_string_sized_new(30); rspamd_printf_gstring(hdr_offset, "%z", RSPAMD_FSTRING_LEN(reply)); rspamd_http_message_add_header(msg, MESSAGE_OFFSET_HEADER, hdr_offset->str); msg_debug_protocol("write body block at position %s", hdr_offset->str); g_string_free(hdr_offset, TRUE); /* In case of milter, we append just body, otherwise - full message */ if (task->protocol_flags & RSPAMD_TASK_PROTOCOL_FLAG_MILTER) { const gchar *start; goffset len, hdr_off; start = task->msg.begin; len = task->msg.len; hdr_off = MESSAGE_FIELD(task, raw_headers_content).len; if (hdr_off < len) { start += hdr_off; len -= hdr_off; /* The problem here is that we need not end of headers, we need * start of body. * * Hence, we need to skip one \r\n till there is anything else in * a line. */ if (*start == '\r' && len > 0) { start++; len--; } if (*start == '\n' && len > 0) { start++; len--; } msg_debug_protocol("milter version of body block size %d", (int) len); reply = rspamd_fstring_append(reply, start, len); } } else { msg_debug_protocol("general version of body block size %d", (int) task->msg.len); reply = rspamd_fstring_append(reply, task->msg.begin, task->msg.len); } } } if ((task->protocol_flags & RSPAMD_TASK_PROTOCOL_FLAG_COMPRESSED) && rspamd_libs_reset_compression(task->cfg->libs_ctx)) { /* We can compress output */ ZSTD_inBuffer zin; ZSTD_outBuffer zout; ZSTD_CStream *zstream; rspamd_fstring_t *compressed_reply; gsize r; zstream = task->cfg->libs_ctx->out_zstream; compressed_reply = rspamd_fstring_sized_new(ZSTD_compressBound(reply->len)); zin.pos = 0; zin.src = reply->str; zin.size = reply->len; zout.pos = 0; zout.dst = compressed_reply->str; zout.size = compressed_reply->allocated; while (zin.pos < zin.size) { r = ZSTD_compressStream(zstream, &zout, &zin); if (ZSTD_isError(r)) { msg_err_protocol("cannot compress: %s", ZSTD_getErrorName(r)); rspamd_fstring_free(compressed_reply); rspamd_http_message_set_body_from_fstring_steal(msg, reply); goto end; } } ZSTD_flushStream(zstream, &zout); r = ZSTD_endStream(zstream, &zout); if (ZSTD_isError(r)) { msg_err_protocol("cannot finalize compress: %s", ZSTD_getErrorName(r)); rspamd_fstring_free(compressed_reply); rspamd_http_message_set_body_from_fstring_steal(msg, reply); goto end; } msg_info_protocol("writing compressed results: %z bytes before " "%z bytes after", zin.pos, zout.pos); compressed_reply->len = zout.pos; rspamd_fstring_free(reply); rspamd_http_message_set_body_from_fstring_steal(msg, compressed_reply); rspamd_http_message_add_header(msg, COMPRESSION_HEADER, "zstd"); if (task->cfg->libs_ctx->out_dict && task->cfg->libs_ctx->out_dict->id != 0) { gchar dict_str[32]; rspamd_snprintf(dict_str, sizeof(dict_str), "%ud", task->cfg->libs_ctx->out_dict->id); rspamd_http_message_add_header(msg, "Dictionary", dict_str); } } else { rspamd_http_message_set_body_from_fstring_steal(msg, reply); } end: if (!(task->flags & RSPAMD_TASK_FLAG_NO_STAT)) { /* Update stat for default metric */ msg_debug_protocol("skip stats update due to no_stat flag"); metric_res = task->result; if (metric_res != NULL) { action = rspamd_check_action_metric(task, NULL, NULL); /* TODO: handle custom actions in stats */ if (action->action_type == METRIC_ACTION_SOFT_REJECT && (task->flags & RSPAMD_TASK_FLAG_GREYLISTED)) { /* Set stat action to greylist to display greylisted messages */ #ifndef HAVE_ATOMIC_BUILTINS task->worker->srv->stat->actions_stat[METRIC_ACTION_GREYLIST]++; #else __atomic_add_fetch(&task->worker->srv->stat->actions_stat[METRIC_ACTION_GREYLIST], 1, __ATOMIC_RELEASE); #endif } else if (action->action_type < METRIC_ACTION_MAX) { #ifndef HAVE_ATOMIC_BUILTINS task->worker->srv->stat->actions_stat[action->action_type]++; #else __atomic_add_fetch(&task->worker->srv->stat->actions_stat[action->action_type], 1, __ATOMIC_RELEASE); #endif } } /* Increase counters */ #ifndef HAVE_ATOMIC_BUILTINS task->worker->srv->stat->messages_scanned++; #else __atomic_add_fetch(&task->worker->srv->stat->messages_scanned, 1, __ATOMIC_RELEASE); #endif /* Set average processing time */ guint32 slot; float processing_time = task->time_real_finish - task->task_timestamp; #ifndef HAVE_ATOMIC_BUILTINS slot = task->worker->srv->stat->avg_time.cur_slot++; #else slot = __atomic_fetch_add(&task->worker->srv->stat->avg_time.cur_slot, 1, __ATOMIC_RELEASE); #endif slot = slot % MAX_AVG_TIME_SLOTS; /* TODO: this should be atomic but it is not supported in C */ task->worker->srv->stat->avg_time.avg_time[slot] = processing_time; } } void rspamd_protocol_write_log_pipe(struct rspamd_task *task) { struct rspamd_worker_log_pipe *lp; struct rspamd_protocol_log_message_sum *ls; lua_State *L = task->cfg->lua_state; struct rspamd_scan_result *mres; struct rspamd_symbol_result *sym; gint id, i; guint32 n = 0, nextra = 0; gsize sz; GArray *extra; struct rspamd_protocol_log_symbol_result er; struct rspamd_task **ptask; /* Get extra results from lua plugins */ extra = g_array_new(FALSE, FALSE, sizeof(er)); lua_getglobal(L, "rspamd_plugins"); if (lua_istable(L, -1)) { lua_pushnil(L); while (lua_next(L, -2)) { if (lua_istable(L, -1)) { lua_pushvalue(L, -2); /* stack: * -1: copy of key * -2: value (module table) * -3: key (module name) * -4: global */ lua_pushstring(L, "log_callback"); lua_gettable(L, -3); /* stack: * -1: func * -2: copy of key * -3: value (module table) * -3: key (module name) * -4: global */ if (lua_isfunction(L, -1)) { ptask = lua_newuserdata(L, sizeof(*ptask)); *ptask = task; rspamd_lua_setclass(L, "rspamd{task}", -1); /* stack: * -1: task * -2: func * -3: key copy * -4: value (module table) * -5: key (module name) * -6: global */ msg_debug_protocol("calling for %s", lua_tostring(L, -3)); if (lua_pcall(L, 1, 1, 0) != 0) { msg_info_protocol("call to log callback %s failed: %s", lua_tostring(L, -2), lua_tostring(L, -1)); lua_pop(L, 1); /* stack: * -1: key copy * -2: value * -3: key */ } else { /* stack: * -1: result * -2: key copy * -3: value * -4: key */ if (lua_istable(L, -1)) { /* Another iteration */ lua_pushnil(L); while (lua_next(L, -2)) { /* stack: * -1: value * -2: key * -3: result table (pcall) * -4: key copy (parent) * -5: value (parent) * -6: key (parent) */ if (lua_istable(L, -1)) { er.id = 0; er.score = 0.0; lua_rawgeti(L, -1, 1); if (lua_isnumber(L, -1)) { er.id = lua_tonumber(L, -1); } lua_rawgeti(L, -2, 2); if (lua_isnumber(L, -1)) { er.score = lua_tonumber(L, -1); } /* stack: * -1: value[2] * -2: value[1] * -3: values * -4: key * -5: result table (pcall) * -6: key copy (parent) * -7: value (parent) * -8: key (parent) */ lua_pop(L, 2); /* Values */ g_array_append_val(extra, er); } lua_pop(L, 1); /* Value for lua_next */ } lua_pop(L, 1); /* Table result of pcall */ } else { msg_info_protocol("call to log callback %s returned " "wrong type: %s", lua_tostring(L, -2), lua_typename(L, lua_type(L, -1))); lua_pop(L, 1); /* Returned error */ } } } else { lua_pop(L, 1); /* stack: * -1: key copy * -2: value * -3: key */ } } lua_pop(L, 2); /* Top table + key copy */ } lua_pop(L, 1); /* rspamd_plugins global */ } else { lua_pop(L, 1); } nextra = extra->len; LL_FOREACH(task->cfg->log_pipes, lp) { if (lp->fd != -1) { switch (lp->type) { case RSPAMD_LOG_PIPE_SYMBOLS: mres = task->result; if (mres) { n = kh_size(mres->symbols); sz = sizeof(*ls) + sizeof(struct rspamd_protocol_log_symbol_result) * (n + nextra); ls = g_malloc0(sz); /* Handle settings id */ if (task->settings_elt) { ls->settings_id = task->settings_elt->id; } else { ls->settings_id = 0; } ls->score = mres->score; ls->required_score = rspamd_task_get_required_score(task, mres); ls->nresults = n; ls->nextra = nextra; i = 0; kh_foreach_value(mres->symbols, sym, { id = rspamd_symcache_find_symbol(task->cfg->cache, sym->name); if (id >= 0) { ls->results[i].id = id; ls->results[i].score = sym->score; } else { ls->results[i].id = -1; ls->results[i].score = 0.0; } i++; }); memcpy(&ls->results[n], extra->data, nextra * sizeof(er)); } else { sz = sizeof(*ls); ls = g_malloc0(sz); ls->nresults = 0; } /* We don't really care about return value here */ if (write(lp->fd, ls, sz) == -1) { msg_info_protocol("cannot write to log pipe: %s", strerror(errno)); } g_free(ls); break; default: msg_err_protocol("unknown log format %d", lp->type); break; } } } g_array_free(extra, TRUE); } void rspamd_protocol_write_reply(struct rspamd_task *task, ev_tstamp timeout) { struct rspamd_http_message *msg; const gchar *ctype = "application/json"; rspamd_fstring_t *reply; msg = rspamd_http_new_message(HTTP_RESPONSE); if (rspamd_http_connection_is_encrypted(task->http_conn)) { msg_info_protocol("<%s> writing encrypted reply", MESSAGE_FIELD_CHECK(task, message_id)); } /* Compatibility */ if (task->cmd == CMD_CHECK_RSPAMC) { msg->method = HTTP_SYMBOLS; } else if (task->cmd == CMD_CHECK_SPAMC) { msg->method = HTTP_SYMBOLS; msg->flags |= RSPAMD_HTTP_FLAG_SPAMC; } if (task->err != NULL) { msg_debug_protocol("writing error reply to client"); ucl_object_t *top = NULL; top = ucl_object_typed_new(UCL_OBJECT); msg->code = 500 + task->err->code % 100; msg->status = rspamd_fstring_new_init(task->err->message, strlen(task->err->message)); ucl_object_insert_key(top, ucl_object_fromstring(task->err->message), "error", 0, false); ucl_object_insert_key(top, ucl_object_fromstring(g_quark_to_string(task->err->domain)), "error_domain", 0, false); reply = rspamd_fstring_sized_new(256); rspamd_ucl_emit_fstring(top, UCL_EMIT_JSON_COMPACT, &reply); ucl_object_unref(top); /* We also need to validate utf8 */ if (rspamd_fast_utf8_validate(reply->str, reply->len) != 0) { gsize valid_len; gchar *validated; /* We copy reply several times here but it should be a rare case */ validated = rspamd_str_make_utf_valid(reply->str, reply->len, &valid_len, task->task_pool); rspamd_http_message_set_body(msg, validated, valid_len); rspamd_fstring_free(reply); } else { rspamd_http_message_set_body_from_fstring_steal(msg, reply); } } else { msg->status = rspamd_fstring_new_init("OK", 2); switch (task->cmd) { case CMD_CHECK: case CMD_CHECK_RSPAMC: case CMD_CHECK_SPAMC: case CMD_SKIP: case CMD_CHECK_V2: rspamd_protocol_http_reply(msg, task, NULL); rspamd_protocol_write_log_pipe(task); break; case CMD_PING: msg_debug_protocol("writing pong to client"); rspamd_http_message_set_body(msg, "pong" CRLF, 6); ctype = "text/plain"; break; default: msg_err_protocol("BROKEN"); break; } } ev_now_update(task->event_loop); msg->date = ev_time(); rspamd_http_connection_reset(task->http_conn); rspamd_http_connection_write_message(task->http_conn, msg, NULL, ctype, task, timeout); task->processed_stages |= RSPAMD_TASK_STAGE_REPLIED; }