- /*
- * Copyright 2024 Vsevolod Stakhov
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- #include "config.h"
- #include "rspamd.h"
- #include "message.h"
- #include "utlist.h"
- #include "libserver/http/http_private.h"
- #include "worker_private.h"
- #include "libserver/cfg_file_private.h"
- #include "libmime/scan_result_private.h"
- #include "lua/lua_common.h"
- #include "unix-std.h"
- #include "protocol_internal.h"
- #include "libserver/mempool_vars_internal.h"
- #include "contrib/fastutf8/fastutf8.h"
- #include "task.h"
- #include "lua/lua_classnames.h"
- #include <math.h>
-
- #ifdef SYS_ZSTD
- #include "zstd.h"
- #else
- #include "contrib/zstd/zstd.h"
- #endif
-
- INIT_LOG_MODULE(protocol)
-
- #define msg_err_protocol(...) rspamd_default_log_function(G_LOG_LEVEL_CRITICAL, \
- "protocol", task->task_pool->tag.uid, \
- G_STRFUNC, \
- __VA_ARGS__)
- #define msg_warn_protocol(...) rspamd_default_log_function(G_LOG_LEVEL_WARNING, \
- "protocol", task->task_pool->tag.uid, \
- G_STRFUNC, \
- __VA_ARGS__)
- #define msg_info_protocol(...) rspamd_default_log_function(G_LOG_LEVEL_INFO, \
- "protocol", task->task_pool->tag.uid, \
- G_STRFUNC, \
- __VA_ARGS__)
- #define msg_debug_protocol(...) rspamd_conditional_debug_fast(NULL, NULL, \
- rspamd_protocol_log_id, "protocol", task->task_pool->tag.uid, \
- G_STRFUNC, \
- __VA_ARGS__)
-
- static GQuark
- rspamd_protocol_quark(void)
- {
- return g_quark_from_static_string("protocol-error");
- }
-
- /*
- * Remove <> from the fixed string and copy it to the pool
- */
- static gchar *
- rspamd_protocol_escape_braces(struct rspamd_task *task, rspamd_ftok_t *in)
- {
- guint nchars = 0;
- const gchar *p;
- rspamd_ftok_t tok;
- gboolean has_obrace = FALSE;
-
- g_assert(in != NULL);
- g_assert(in->len > 0);
-
- p = in->begin;
-
- while ((g_ascii_isspace(*p) || *p == '<') && nchars < in->len) {
- if (*p == '<') {
- has_obrace = TRUE;
- }
-
- p++;
- nchars++;
- }
-
- tok.begin = p;
-
- p = in->begin + in->len - 1;
- tok.len = in->len - nchars;
-
- while (g_ascii_isspace(*p) && tok.len > 0) {
- p--;
- tok.len--;
- }
-
- if (has_obrace && *p == '>') {
- tok.len--;
- }
-
- return rspamd_mempool_ftokdup(task->task_pool, &tok);
- }
-
- #define COMPARE_CMD(str, cmd, len) (sizeof(cmd) - 1 == (len) && rspamd_lc_cmp((str), (cmd), (len)) == 0)
-
- static gboolean
- rspamd_protocol_handle_url(struct rspamd_task *task,
- struct rspamd_http_message *msg)
- {
- GHashTable *query_args;
- GHashTableIter it;
- struct http_parser_url u;
- const gchar *p;
- gsize pathlen;
- rspamd_ftok_t *key, *value;
- gpointer k, v;
-
- if (msg->url == NULL || msg->url->len == 0) {
- g_set_error(&task->err, rspamd_protocol_quark(), 400, "missing command");
- return FALSE;
- }
-
- if (http_parser_parse_url(msg->url->str, msg->url->len, 0, &u) != 0) {
- g_set_error(&task->err, rspamd_protocol_quark(), 400, "bad request URL");
-
- return FALSE;
- }
-
- if (!(u.field_set & (1 << UF_PATH))) {
- g_set_error(&task->err, rspamd_protocol_quark(), 400,
- "bad request URL: missing path");
-
- return FALSE;
- }
-
- p = msg->url->str + u.field_data[UF_PATH].off;
- pathlen = u.field_data[UF_PATH].len;
-
- if (*p == '/') {
- p++;
- pathlen--;
- }
-
- switch (*p) {
- case 'c':
- case 'C':
- /* check */
- if (COMPARE_CMD(p, MSG_CMD_CHECK_V2, pathlen)) {
- task->cmd = CMD_CHECK_V2;
- msg_debug_protocol("got checkv2 command");
- }
- else if (COMPARE_CMD(p, MSG_CMD_CHECK, pathlen)) {
- task->cmd = CMD_CHECK;
- msg_debug_protocol("got check command");
- }
- else {
- goto err;
- }
- break;
- case 's':
- case 'S':
- /* symbols, skip */
- if (COMPARE_CMD(p, MSG_CMD_SYMBOLS, pathlen)) {
- task->cmd = CMD_CHECK;
- msg_debug_protocol("got symbols -> old check command");
- }
- else if (COMPARE_CMD(p, MSG_CMD_SCAN, pathlen)) {
- task->cmd = CMD_CHECK;
- msg_debug_protocol("got scan -> old check command");
- }
- else if (COMPARE_CMD(p, MSG_CMD_SKIP, pathlen)) {
- msg_debug_protocol("got skip command");
- task->cmd = CMD_SKIP;
- }
- else {
- goto err;
- }
- break;
- case 'p':
- case 'P':
- /* ping, process */
- if (COMPARE_CMD(p, MSG_CMD_PING, pathlen)) {
- msg_debug_protocol("got ping command");
- task->cmd = CMD_PING;
- task->flags |= RSPAMD_TASK_FLAG_SKIP;
- task->processed_stages |= RSPAMD_TASK_STAGE_DONE; /* Skip all */
- }
- else if (COMPARE_CMD(p, MSG_CMD_PROCESS, pathlen)) {
- msg_debug_protocol("got process -> old check command");
- task->cmd = CMD_CHECK;
- }
- else {
- goto err;
- }
- break;
- case 'r':
- case 'R':
- /* report, report_ifspam */
- if (COMPARE_CMD(p, MSG_CMD_REPORT, pathlen)) {
- msg_debug_protocol("got report -> old check command");
- task->cmd = CMD_CHECK;
- }
- else if (COMPARE_CMD(p, MSG_CMD_REPORT_IFSPAM, pathlen)) {
- msg_debug_protocol("got reportifspam -> old check command");
- task->cmd = CMD_CHECK;
- }
- else {
- goto err;
- }
- break;
- default:
- goto err;
- }
-
- if (u.field_set & (1u << UF_QUERY)) {
- /* In case if we have a query, we need to store it somewhere */
- query_args = rspamd_http_message_parse_query(msg);
-
- /* Insert the rest of query params as HTTP headers */
- g_hash_table_iter_init(&it, query_args);
-
- while (g_hash_table_iter_next(&it, &k, &v)) {
- gchar *key_cpy;
- key = k;
- value = v;
-
- key_cpy = rspamd_mempool_ftokdup(task->task_pool, key);
-
- rspamd_http_message_add_header_len(msg, key_cpy,
- value->begin, value->len);
- msg_debug_protocol("added header \"%T\" -> \"%T\" from HTTP query",
- key, value);
- }
-
- g_hash_table_unref(query_args);
- }
-
- return TRUE;
-
- err:
- g_set_error(&task->err, rspamd_protocol_quark(), 400, "invalid command");
-
- return FALSE;
- }
-
- static void
- rspamd_protocol_process_recipients(struct rspamd_task *task,
- const rspamd_ftok_t *hdr)
- {
- enum {
- skip_spaces,
- quoted_string,
- normal_string,
- } state = skip_spaces;
- const gchar *p, *end, *start_addr;
- struct rspamd_email_address *addr;
-
- p = hdr->begin;
- end = hdr->begin + hdr->len;
- start_addr = NULL;
-
- while (p < end) {
- switch (state) {
- case skip_spaces:
- if (g_ascii_isspace(*p)) {
- p++;
- }
- else if (*p == '"') {
- start_addr = p;
- p++;
- state = quoted_string;
- }
- else {
- state = normal_string;
- start_addr = p;
- }
- break;
- case quoted_string:
- if (*p == '"') {
- state = normal_string;
- p++;
- }
- else if (*p == '\\') {
- /* Quoted pair */
- p += 2;
- }
- else {
- p++;
- }
- break;
- case normal_string:
- if (*p == '"') {
- state = quoted_string;
- p++;
- }
- else if (*p == ',' && start_addr != NULL && p > start_addr) {
- /* We have finished address, check what we have */
- addr = rspamd_email_address_from_smtp(start_addr,
- p - start_addr);
-
- if (addr) {
- if (task->rcpt_envelope == NULL) {
- task->rcpt_envelope = g_ptr_array_sized_new(
- 2);
- }
-
- g_ptr_array_add(task->rcpt_envelope, addr);
- }
- else {
- msg_err_protocol("bad rcpt address: '%*s'",
- (int) (p - start_addr), start_addr);
- task->flags |= RSPAMD_TASK_FLAG_BROKEN_HEADERS;
- }
- start_addr = NULL;
- p++;
- state = skip_spaces;
- }
- else {
- p++;
- }
- break;
- }
- }
-
- /* Check remainder */
- if (start_addr && p > start_addr) {
- switch (state) {
- case normal_string:
- addr = rspamd_email_address_from_smtp(start_addr, end - start_addr);
-
- if (addr) {
- if (task->rcpt_envelope == NULL) {
- task->rcpt_envelope = g_ptr_array_sized_new(
- 2);
- }
-
- g_ptr_array_add(task->rcpt_envelope, addr);
- }
- else {
- msg_err_protocol("bad rcpt address: '%*s'",
- (int) (end - start_addr), start_addr);
- task->flags |= RSPAMD_TASK_FLAG_BROKEN_HEADERS;
- }
- break;
- case skip_spaces:
- /* Do nothing */
- break;
- case quoted_string:
- default:
- msg_err_protocol("bad state when parsing rcpt address: '%*s'",
- (int) (end - start_addr), start_addr);
- task->flags |= RSPAMD_TASK_FLAG_BROKEN_HEADERS;
- }
- }
- }
-
- #define COMPARE_FLAG_LIT(lit) (len == sizeof(lit) - 1 && memcmp((lit), str, len) == 0)
- #define CHECK_PROTOCOL_FLAG(lit, fl) \
- do { \
- if (!known && COMPARE_FLAG_LIT(lit)) { \
- task->protocol_flags |= (fl); \
- known = TRUE; \
- msg_debug_protocol("add protocol flag %s", lit); \
- } \
- } while (0)
- #define CHECK_TASK_FLAG(lit, fl) \
- do { \
- if (!known && COMPARE_FLAG_LIT(lit)) { \
- task->flags |= (fl); \
- known = TRUE; \
- msg_debug_protocol("add task flag %s", lit); \
- } \
- } while (0)
-
- static void
- rspamd_protocol_handle_flag(struct rspamd_task *task, const gchar *str,
- gsize len)
- {
- gboolean known = FALSE;
-
- CHECK_TASK_FLAG("pass_all", RSPAMD_TASK_FLAG_PASS_ALL);
- CHECK_TASK_FLAG("no_log", RSPAMD_TASK_FLAG_NO_LOG);
- CHECK_TASK_FLAG("skip", RSPAMD_TASK_FLAG_SKIP);
- CHECK_TASK_FLAG("skip_process", RSPAMD_TASK_FLAG_SKIP_PROCESS);
- CHECK_TASK_FLAG("no_stat", RSPAMD_TASK_FLAG_NO_STAT);
- CHECK_TASK_FLAG("ssl", RSPAMD_TASK_FLAG_SSL);
- CHECK_TASK_FLAG("profile", RSPAMD_TASK_FLAG_PROFILE);
-
- CHECK_PROTOCOL_FLAG("milter", RSPAMD_TASK_PROTOCOL_FLAG_MILTER);
- CHECK_PROTOCOL_FLAG("zstd", RSPAMD_TASK_PROTOCOL_FLAG_COMPRESSED);
- CHECK_PROTOCOL_FLAG("ext_urls", RSPAMD_TASK_PROTOCOL_FLAG_EXT_URLS);
- CHECK_PROTOCOL_FLAG("body_block", RSPAMD_TASK_PROTOCOL_FLAG_BODY_BLOCK);
- CHECK_PROTOCOL_FLAG("groups", RSPAMD_TASK_PROTOCOL_FLAG_GROUPS);
-
- if (!known) {
- msg_warn_protocol("unknown flag: %*s", (gint) len, str);
- }
- }
-
- #undef COMPARE_FLAG
- #undef CHECK_PROTOCOL_FLAG
-
- static void
- rspamd_protocol_process_flags(struct rspamd_task *task, const rspamd_ftok_t *hdr)
- {
- enum {
- skip_spaces,
- read_flag,
- } state = skip_spaces;
- const gchar *p, *end, *start;
-
- p = hdr->begin;
- end = hdr->begin + hdr->len;
- start = NULL;
-
- while (p < end) {
- switch (state) {
- case skip_spaces:
- if (g_ascii_isspace(*p)) {
- p++;
- }
- else {
- state = read_flag;
- start = p;
- }
- break;
- case read_flag:
- if (*p == ',') {
- if (p > start) {
- rspamd_protocol_handle_flag(task, start, p - start);
- }
- start = NULL;
- state = skip_spaces;
- p++;
- }
- else {
- p++;
- }
- break;
- }
- }
-
- /* Check remainder */
- if (start && end > start && state == read_flag) {
- rspamd_protocol_handle_flag(task, start, end - start);
- }
- }
-
- #define IF_HEADER(name) \
- srch.begin = (name); \
- srch.len = sizeof(name) - 1; \
- if (rspamd_ftok_casecmp(hn_tok, &srch) == 0)
-
- gboolean
- rspamd_protocol_handle_headers(struct rspamd_task *task,
- struct rspamd_http_message *msg)
- {
- rspamd_ftok_t *hn_tok, *hv_tok, srch;
- gboolean has_ip = FALSE, seen_settings_header = FALSE;
- struct rspamd_http_header *header, *h;
- gchar *ntok;
-
- kh_foreach_value (msg->headers, header, {
- DL_FOREACH (header, h) {
- ntok = rspamd_mempool_ftokdup (task->task_pool, &h->name);
- hn_tok = rspamd_mempool_alloc (task->task_pool, sizeof (*hn_tok));
- hn_tok->begin = ntok;
- hn_tok->len = h->name.len;
-
-
- ntok = rspamd_mempool_ftokdup (task->task_pool, &h->value);
- hv_tok = rspamd_mempool_alloc (task->task_pool, sizeof (*hv_tok));
- hv_tok->begin = ntok;
- hv_tok->len = h->value.len;
-
- switch (*hn_tok->begin) {
- case 'd':
- case 'D':
- IF_HEADER(DELIVER_TO_HEADER)
- {
- task->deliver_to = rspamd_protocol_escape_braces(task, hv_tok);
- msg_debug_protocol("read deliver-to header, value: %s",
- task->deliver_to);
- }
- else
- {
- msg_debug_protocol("wrong header: %T", hn_tok);
- }
- break;
- case 'h':
- case 'H':
- IF_HEADER(HELO_HEADER)
- {
- task->helo = rspamd_mempool_ftokdup(task->task_pool, hv_tok);
- msg_debug_protocol("read helo header, value: %s", task->helo);
- }
- IF_HEADER(HOSTNAME_HEADER)
- {
- task->hostname = rspamd_mempool_ftokdup(task->task_pool,
- hv_tok);
- msg_debug_protocol("read hostname header, value: %s", task->hostname);
- }
- break;
- case 'f':
- case 'F':
- IF_HEADER(FROM_HEADER)
- {
- if (hv_tok->len == 0) {
- /* Replace '' with '<>' to fix parsing issue */
- RSPAMD_FTOK_ASSIGN(hv_tok, "<>");
- }
- task->from_envelope = rspamd_email_address_from_smtp(
- hv_tok->begin,
- hv_tok->len);
- msg_debug_protocol("read from header, value: %T", hv_tok);
-
- if (!task->from_envelope) {
- msg_err_protocol("bad from header: '%T'", hv_tok);
- task->flags |= RSPAMD_TASK_FLAG_BROKEN_HEADERS;
- }
- }
- IF_HEADER(FILENAME_HEADER)
- {
- task->msg.fpath = rspamd_mempool_ftokdup(task->task_pool,
- hv_tok);
- msg_debug_protocol("read filename header, value: %s", task->msg.fpath);
- }
- IF_HEADER(FLAGS_HEADER)
- {
- msg_debug_protocol("read flags header, value: %T", hv_tok);
- rspamd_protocol_process_flags(task, hv_tok);
- }
- break;
- case 'q':
- case 'Q':
- IF_HEADER(QUEUE_ID_HEADER)
- {
- task->queue_id = rspamd_mempool_ftokdup(task->task_pool,
- hv_tok);
- msg_debug_protocol("read queue_id header, value: %s", task->queue_id);
- }
- else
- {
- msg_debug_protocol("wrong header: %T", hn_tok);
- }
- break;
- case 'r':
- case 'R':
- IF_HEADER(RCPT_HEADER)
- {
- rspamd_protocol_process_recipients(task, hv_tok);
- msg_debug_protocol("read rcpt header, value: %T", hv_tok);
- }
- IF_HEADER(RAW_DATA_HEADER)
- {
- srch.begin = "yes";
- srch.len = 3;
-
- msg_debug_protocol("read raw data header, value: %T", hv_tok);
-
- if (rspamd_ftok_casecmp(hv_tok, &srch) == 0) {
- task->flags &= ~RSPAMD_TASK_FLAG_MIME;
- msg_debug_protocol("disable mime parsing");
- }
- }
- break;
- case 'i':
- case 'I':
- IF_HEADER(IP_ADDR_HEADER)
- {
- if (!rspamd_parse_inet_address(&task->from_addr,
- hv_tok->begin, hv_tok->len,
- RSPAMD_INET_ADDRESS_PARSE_DEFAULT)) {
- msg_err_protocol("bad ip header: '%T'", hv_tok);
- }
- else {
- msg_debug_protocol("read IP header, value: %T", hv_tok);
- has_ip = TRUE;
- }
- }
- else
- {
- msg_debug_protocol("wrong header: %T", hn_tok);
- }
- break;
- case 'p':
- case 'P':
- IF_HEADER(PASS_HEADER)
- {
- srch.begin = "all";
- srch.len = 3;
-
- msg_debug_protocol("read pass header, value: %T", hv_tok);
-
- if (rspamd_ftok_casecmp(hv_tok, &srch) == 0) {
- task->flags |= RSPAMD_TASK_FLAG_PASS_ALL;
- msg_debug_protocol("pass all filters");
- }
- }
- IF_HEADER(PROFILE_HEADER)
- {
- msg_debug_protocol("read profile header, value: %T", hv_tok);
- task->flags |= RSPAMD_TASK_FLAG_PROFILE;
- }
- break;
- case 's':
- case 'S':
- IF_HEADER(SETTINGS_ID_HEADER)
- {
- msg_debug_protocol("read settings-id header, value: %T", hv_tok);
- task->settings_elt = rspamd_config_find_settings_name_ref(
- task->cfg, hv_tok->begin, hv_tok->len);
-
- if (task->settings_elt == NULL) {
- GString *known_ids = g_string_new(NULL);
- struct rspamd_config_settings_elt *cur;
-
- DL_FOREACH(task->cfg->setting_ids, cur)
- {
- rspamd_printf_gstring(known_ids, "%s(%ud);",
- cur->name, cur->id);
- }
-
- msg_warn_protocol("unknown settings id: %T(%d); known_ids: %v",
- hv_tok,
- rspamd_config_name_to_id(hv_tok->begin, hv_tok->len),
- known_ids);
-
- g_string_free(known_ids, TRUE);
- }
- else {
- msg_debug_protocol("applied settings id %T -> %ud", hv_tok,
- task->settings_elt->id);
- }
- }
- IF_HEADER(SETTINGS_HEADER)
- {
- msg_debug_protocol("read settings header, value: %T", hv_tok);
- seen_settings_header = TRUE;
- }
- break;
- case 'u':
- case 'U':
- IF_HEADER(USER_HEADER)
- {
- /*
- * We must ignore User header in case of spamc, as SA has
- * different meaning of this header
- */
- msg_debug_protocol("read user header, value: %T", hv_tok);
- if (!RSPAMD_TASK_IS_SPAMC(task)) {
- task->auth_user = rspamd_mempool_ftokdup(task->task_pool,
- hv_tok);
- }
- else {
- msg_info_protocol("ignore user header: legacy SA protocol");
- }
- }
- IF_HEADER(URLS_HEADER)
- {
- msg_debug_protocol("read urls header, value: %T", hv_tok);
-
- srch.begin = "extended";
- srch.len = 8;
-
- if (rspamd_ftok_casecmp(hv_tok, &srch) == 0) {
- task->protocol_flags |= RSPAMD_TASK_PROTOCOL_FLAG_EXT_URLS;
- msg_debug_protocol("extended urls information");
- }
-
- /* TODO: add more formats there */
- }
- IF_HEADER(USER_AGENT_HEADER)
- {
- msg_debug_protocol("read user-agent header, value: %T", hv_tok);
-
- if (hv_tok->len == 6 &&
- rspamd_lc_cmp(hv_tok->begin, "rspamc", 6) == 0) {
- task->protocol_flags |= RSPAMD_TASK_PROTOCOL_FLAG_LOCAL_CLIENT;
- }
- }
- break;
- case 'l':
- case 'L':
- IF_HEADER(NO_LOG_HEADER)
- {
- msg_debug_protocol("read log header, value: %T", hv_tok);
- srch.begin = "no";
- srch.len = 2;
-
- if (rspamd_ftok_casecmp(hv_tok, &srch) == 0) {
- task->flags |= RSPAMD_TASK_FLAG_NO_LOG;
- }
- }
- break;
- case 'm':
- case 'M':
- IF_HEADER(MLEN_HEADER)
- {
- msg_debug_protocol("read message length header, value: %T",
- hv_tok);
- task->protocol_flags |= RSPAMD_TASK_PROTOCOL_FLAG_HAS_CONTROL;
- }
- IF_HEADER(MTA_TAG_HEADER)
- {
- gchar *mta_tag;
- mta_tag = rspamd_mempool_ftokdup(task->task_pool, hv_tok);
- rspamd_mempool_set_variable(task->task_pool,
- RSPAMD_MEMPOOL_MTA_TAG,
- mta_tag, NULL);
- msg_debug_protocol("read MTA-Tag header, value: %s", mta_tag);
- }
- IF_HEADER(MTA_NAME_HEADER)
- {
- gchar *mta_name;
- mta_name = rspamd_mempool_ftokdup(task->task_pool, hv_tok);
- rspamd_mempool_set_variable(task->task_pool,
- RSPAMD_MEMPOOL_MTA_NAME,
- mta_name, NULL);
- msg_debug_protocol("read MTA-Name header, value: %s", mta_name);
- }
- IF_HEADER(MILTER_HEADER)
- {
- task->protocol_flags |= RSPAMD_TASK_PROTOCOL_FLAG_MILTER;
- msg_debug_protocol("read Milter header, value: %T", hv_tok);
- }
- break;
- case 't':
- case 'T':
- IF_HEADER(TLS_CIPHER_HEADER)
- {
- task->flags |= RSPAMD_TASK_FLAG_SSL;
- msg_debug_protocol("read TLS cipher header, value: %T", hv_tok);
- }
- break;
- default:
- msg_debug_protocol("generic header: %T", hn_tok);
- break;
- }
-
- rspamd_task_add_request_header (task, hn_tok, hv_tok);
- }
- }); /* End of kh_foreach_value */
-
- if (seen_settings_header && task->settings_elt) {
- msg_warn_task("ignore settings id %s as settings header is also presented",
- task->settings_elt->name);
- REF_RELEASE(task->settings_elt);
-
- task->settings_elt = NULL;
- }
-
- if (!has_ip) {
- task->flags |= RSPAMD_TASK_FLAG_NO_IP;
- }
-
- return TRUE;
- }
-
- #define BOOL_TO_FLAG(val, flags, flag) \
- do { \
- if ((val)) (flags) |= (flag); \
- else \
- (flags) &= ~(flag); \
- } while (0)
-
- gboolean
- rspamd_protocol_parse_task_flags(rspamd_mempool_t *pool,
- const ucl_object_t *obj,
- gpointer ud,
- struct rspamd_rcl_section *section,
- GError **err)
- {
- struct rspamd_rcl_struct_parser *pd = ud;
- gint *target;
- const gchar *key;
- gboolean value;
-
- target = (gint *) (((gchar *) pd->user_struct) + pd->offset);
- key = ucl_object_key(obj);
- value = ucl_object_toboolean(obj);
-
- if (key != NULL) {
- if (g_ascii_strcasecmp(key, "pass_all") == 0) {
- BOOL_TO_FLAG(value, *target, RSPAMD_TASK_FLAG_PASS_ALL);
- }
- else if (g_ascii_strcasecmp(key, "no_log") == 0) {
- BOOL_TO_FLAG(value, *target, RSPAMD_TASK_FLAG_NO_LOG);
- }
- }
-
- return TRUE;
- }
-
- static struct rspamd_rcl_sections_map *control_parser = NULL;
-
- RSPAMD_CONSTRUCTOR(rspamd_protocol_control_parser_ctor)
- {
-
- struct rspamd_rcl_section *sub = rspamd_rcl_add_section(&control_parser, NULL,
- "*",
- NULL,
- NULL,
- UCL_OBJECT,
- FALSE,
- TRUE);
- /* Default handlers */
- rspamd_rcl_add_default_handler(sub,
- "ip",
- rspamd_rcl_parse_struct_addr,
- G_STRUCT_OFFSET(struct rspamd_task, from_addr),
- 0,
- NULL);
- rspamd_rcl_add_default_handler(sub,
- "from",
- rspamd_rcl_parse_struct_mime_addr,
- G_STRUCT_OFFSET(struct rspamd_task, from_envelope),
- 0,
- NULL);
- rspamd_rcl_add_default_handler(sub,
- "rcpt",
- rspamd_rcl_parse_struct_mime_addr,
- G_STRUCT_OFFSET(struct rspamd_task, rcpt_envelope),
- 0,
- NULL);
- rspamd_rcl_add_default_handler(sub,
- "helo",
- rspamd_rcl_parse_struct_string,
- G_STRUCT_OFFSET(struct rspamd_task, helo),
- 0,
- NULL);
- rspamd_rcl_add_default_handler(sub,
- "user",
- rspamd_rcl_parse_struct_string,
- G_STRUCT_OFFSET(struct rspamd_task, auth_user),
- 0,
- NULL);
- rspamd_rcl_add_default_handler(sub,
- "pass_all",
- rspamd_protocol_parse_task_flags,
- G_STRUCT_OFFSET(struct rspamd_task, flags),
- 0,
- NULL);
- rspamd_rcl_add_default_handler(sub,
- "json",
- rspamd_protocol_parse_task_flags,
- G_STRUCT_OFFSET(struct rspamd_task, flags),
- 0,
- NULL);
- }
-
- RSPAMD_DESTRUCTOR(rspamd_protocol_control_parser_dtor)
- {
- rspamd_rcl_sections_free(control_parser);
- }
-
- gboolean
- rspamd_protocol_handle_control(struct rspamd_task *task,
- const ucl_object_t *control)
- {
- GError *err = NULL;
-
- if (!rspamd_rcl_parse(control_parser, task->cfg, task, task->task_pool,
- control, &err)) {
- msg_warn_protocol("cannot parse control block: %e", err);
- g_error_free(err);
-
- return FALSE;
- }
-
- return TRUE;
- }
-
- gboolean
- rspamd_protocol_handle_request(struct rspamd_task *task,
- struct rspamd_http_message *msg)
- {
- gboolean ret = TRUE;
-
- if (msg->method == HTTP_SYMBOLS) {
- msg_debug_protocol("got legacy SYMBOLS method, enable rspamc protocol workaround");
- task->cmd = CMD_CHECK_RSPAMC;
- }
- else if (msg->method == HTTP_CHECK) {
- msg_debug_protocol("got legacy CHECK method, enable rspamc protocol workaround");
- task->cmd = CMD_CHECK_RSPAMC;
- }
- else {
- ret = rspamd_protocol_handle_url(task, msg);
- }
-
- if (msg->flags & RSPAMD_HTTP_FLAG_SPAMC) {
- msg_debug_protocol("got legacy SA input, enable spamc protocol workaround");
- task->cmd = CMD_CHECK_SPAMC;
- }
-
- return ret;
- }
-
- /* Structure for writing tree data */
- struct tree_cb_data {
- ucl_object_t *top;
- khash_t(rspamd_url_host_hash) * seen;
- struct rspamd_task *task;
- };
-
- static ucl_object_t *
- rspamd_protocol_extended_url(struct rspamd_task *task,
- struct rspamd_url *url,
- const gchar *encoded, gsize enclen)
- {
- ucl_object_t *obj, *elt;
-
- obj = ucl_object_typed_new(UCL_OBJECT);
-
- elt = ucl_object_fromstring_common(encoded, enclen, 0);
- ucl_object_insert_key(obj, elt, "url", 0, false);
-
- if (url->tldlen > 0) {
- elt = ucl_object_fromstring_common(rspamd_url_tld_unsafe(url),
- url->tldlen, 0);
- ucl_object_insert_key(obj, elt, "tld", 0, false);
- }
- if (url->hostlen > 0) {
- elt = ucl_object_fromstring_common(rspamd_url_host_unsafe(url),
- url->hostlen, 0);
- ucl_object_insert_key(obj, elt, "host", 0, false);
- }
-
- ucl_object_t *flags = ucl_object_typed_new(UCL_ARRAY);
-
- for (unsigned int i = 0; i < RSPAMD_URL_MAX_FLAG_SHIFT; i++) {
- if (url->flags & (1u << i)) {
- ucl_object_t *fl = ucl_object_fromstring(rspamd_url_flag_to_string(1u << i));
- ucl_array_append(flags, fl);
- }
- }
-
- ucl_object_insert_key(obj, flags, "flags", 0, false);
-
- if (url->ext && url->ext->linked_url) {
- encoded = rspamd_url_encode(url->ext->linked_url, &enclen, task->task_pool);
- elt = rspamd_protocol_extended_url(task, url->ext->linked_url, encoded,
- enclen);
- ucl_object_insert_key(obj, elt, "linked_url", 0, false);
- }
-
- return obj;
- }
-
- /*
- * Callback for writing urls
- */
- static void
- urls_protocol_cb(struct rspamd_url *url, struct tree_cb_data *cb)
- {
- ucl_object_t *obj;
- struct rspamd_task *task = cb->task;
- const gchar *user_field = "unknown", *encoded = NULL;
- gboolean has_user = FALSE;
- guint len = 0;
- gsize enclen = 0;
-
- if (!(task->protocol_flags & RSPAMD_TASK_PROTOCOL_FLAG_EXT_URLS)) {
- if (url->hostlen > 0) {
- if (rspamd_url_host_set_has(cb->seen, url)) {
- return;
- }
-
- goffset err_offset;
-
- if ((err_offset = rspamd_fast_utf8_validate(rspamd_url_host_unsafe(url),
- url->hostlen)) == 0) {
- obj = ucl_object_fromstring_common(rspamd_url_host_unsafe(url),
- url->hostlen, 0);
- }
- else {
- obj = ucl_object_fromstring_common(rspamd_url_host_unsafe(url),
- err_offset - 1, 0);
- }
- }
- else {
- return;
- }
-
- rspamd_url_host_set_add(cb->seen, url);
- }
- else {
- encoded = rspamd_url_encode(url, &enclen, task->task_pool);
- obj = rspamd_protocol_extended_url(task, url, encoded, enclen);
- }
-
- ucl_array_append(cb->top, obj);
-
- if (cb->task->cfg->log_urls) {
- if (task->auth_user) {
- user_field = task->auth_user;
- len = strlen(task->auth_user);
- has_user = TRUE;
- }
- else if (task->from_envelope) {
- user_field = task->from_envelope->addr;
- len = task->from_envelope->addr_len;
- }
-
- if (!encoded) {
- encoded = rspamd_url_encode(url, &enclen, task->task_pool);
- }
-
- msg_notice_task_encrypted("<%s> %s: %*s; ip: %s; URL: %*s",
- MESSAGE_FIELD_CHECK(task, message_id),
- has_user ? "user" : "from",
- len, user_field,
- rspamd_inet_address_to_string(task->from_addr),
- (gint) enclen, encoded);
- }
- }
-
- static ucl_object_t *
- rspamd_urls_tree_ucl(khash_t(rspamd_url_hash) * set,
- struct rspamd_task *task)
- {
- struct tree_cb_data cb;
- ucl_object_t *obj;
- struct rspamd_url *u;
-
- obj = ucl_object_typed_new(UCL_ARRAY);
- cb.top = obj;
- cb.task = task;
- cb.seen = kh_init(rspamd_url_host_hash);
-
- kh_foreach_key(set, u, {
- if (!(u->protocol & PROTOCOL_MAILTO)) {
- urls_protocol_cb(u, &cb);
- }
- });
-
- kh_destroy(rspamd_url_host_hash, cb.seen);
-
- return obj;
- }
-
- static void
- emails_protocol_cb(struct rspamd_url *url, struct tree_cb_data *cb)
- {
- ucl_object_t *obj;
-
- if (url->userlen > 0 && url->hostlen > 0) {
- obj = ucl_object_fromlstring(rspamd_url_user_unsafe(url),
- url->userlen + url->hostlen + 1);
- ucl_array_append(cb->top, obj);
- }
- }
-
- static ucl_object_t *
- rspamd_emails_tree_ucl(khash_t(rspamd_url_hash) * set,
- struct rspamd_task *task)
- {
- struct tree_cb_data cb;
- ucl_object_t *obj;
- struct rspamd_url *u;
-
- obj = ucl_object_typed_new(UCL_ARRAY);
- cb.top = obj;
- cb.task = task;
-
- kh_foreach_key(set, u, {
- if ((u->protocol & PROTOCOL_MAILTO)) {
- emails_protocol_cb(u, &cb);
- }
- });
-
-
- return obj;
- }
-
-
- /* Write new subject */
- static const gchar *
- rspamd_protocol_rewrite_subject(struct rspamd_task *task)
- {
- GString *subj_buf;
- gchar *res;
- const gchar *s, *c, *p;
- gsize slen = 0;
-
- c = rspamd_mempool_get_variable(task->task_pool, "metric_subject");
-
- if (c == NULL) {
- c = task->cfg->subject;
- }
-
- if (c == NULL) {
- c = SPAM_SUBJECT;
- }
-
- p = c;
- s = MESSAGE_FIELD_CHECK(task, subject);
-
- if (s) {
- slen = strlen(s);
- }
-
- subj_buf = g_string_sized_new(strlen(c) + slen);
-
- while (*p) {
- if (*p == '%') {
- switch (p[1]) {
- case 's':
- g_string_append_len(subj_buf, c, p - c);
-
- if (s) {
- g_string_append_len(subj_buf, s, slen);
- }
- c = p + 2;
- p += 2;
- break;
- case 'd':
- g_string_append_len(subj_buf, c, p - c);
- rspamd_printf_gstring(subj_buf, "%.2f", task->result->score);
- c = p + 2;
- p += 2;
- break;
- case '%':
- g_string_append_len(subj_buf, c, p - c);
- g_string_append_c(subj_buf, '%');
- c = p + 2;
- p += 2;
- break;
- default:
- p++; /* Just % something unknown */
- break;
- }
- }
- else {
- p++;
- }
- }
-
- if (p > c) {
- g_string_append_len(subj_buf, c, p - c);
- }
-
- res = rspamd_mime_header_encode(subj_buf->str, subj_buf->len);
-
- rspamd_mempool_add_destructor(task->task_pool,
- (rspamd_mempool_destruct_t) g_free,
- res);
- g_string_free(subj_buf, TRUE);
-
- return res;
- }
-
- static ucl_object_t *
- rspamd_metric_symbol_ucl(struct rspamd_task *task, struct rspamd_symbol_result *sym)
- {
- ucl_object_t *obj = NULL, *ar;
- const gchar *description = NULL;
- struct rspamd_symbol_option *opt;
-
- if (sym->sym != NULL) {
- description = sym->sym->description;
- }
-
- obj = ucl_object_typed_new(UCL_OBJECT);
- ucl_object_insert_key(obj, ucl_object_fromstring(sym->name), "name", 0, false);
- ucl_object_insert_key(obj, ucl_object_fromdouble(sym->score), "score", 0, false);
-
- if (task->cmd == CMD_CHECK_V2) {
- if (sym->sym) {
- ucl_object_insert_key(obj, ucl_object_fromdouble(sym->sym->score), "metric_score", 0, false);
- }
- else {
- ucl_object_insert_key(obj, ucl_object_fromdouble(0.0),
- "metric_score", 0, false);
- }
- }
-
- if (description) {
- ucl_object_insert_key(obj, ucl_object_fromstring(description),
- "description", 0, false);
- }
-
- if (sym->options != NULL) {
- ar = ucl_object_typed_new(UCL_ARRAY);
-
- DL_FOREACH(sym->opts_head, opt)
- {
- ucl_array_append(ar, ucl_object_fromstring_common(opt->option,
- opt->optlen, 0));
- }
-
- ucl_object_insert_key(obj, ar, "options", 0, false);
- }
-
- return obj;
- }
-
- static ucl_object_t *
- rspamd_metric_group_ucl(struct rspamd_task *task,
- struct rspamd_symbols_group *gr, gdouble score)
- {
- ucl_object_t *obj = NULL;
-
- obj = ucl_object_typed_new(UCL_OBJECT);
- ucl_object_insert_key(obj, ucl_object_fromdouble(score),
- "score", 0, false);
-
- if (gr->description) {
- ucl_object_insert_key(obj, ucl_object_fromstring(gr->description),
- "description", 0, false);
- }
-
- return obj;
- }
-
- static ucl_object_t *
- rspamd_scan_result_ucl(struct rspamd_task *task,
- struct rspamd_scan_result *mres, ucl_object_t *top)
- {
- struct rspamd_symbol_result *sym;
- gboolean is_spam;
- struct rspamd_action *action;
- ucl_object_t *obj = NULL, *sobj;
- const gchar *subject;
- struct rspamd_passthrough_result *pr = NULL;
-
- action = rspamd_check_action_metric(task, &pr, NULL);
- is_spam = !(action->flags & RSPAMD_ACTION_HAM);
-
- if (task->cmd == CMD_CHECK) {
- obj = ucl_object_typed_new(UCL_OBJECT);
- ucl_object_insert_key(obj,
- ucl_object_frombool(is_spam),
- "is_spam", 0, false);
- }
- else {
- obj = top;
- }
-
- if (pr) {
- if (pr->message && !(pr->flags & RSPAMD_PASSTHROUGH_NO_SMTP_MESSAGE)) {
- /* Add smtp message if it does not exist: see #3269 for details */
- if (ucl_object_lookup(task->messages, "smtp_message") == NULL) {
- ucl_object_insert_key(task->messages,
- ucl_object_fromstring_common(pr->message, 0, UCL_STRING_RAW),
- "smtp_message", 0,
- false);
- }
- }
-
- ucl_object_insert_key(obj,
- ucl_object_fromstring(pr->module),
- "passthrough_module", 0, false);
- }
-
- ucl_object_insert_key(obj,
- ucl_object_frombool(RSPAMD_TASK_IS_SKIPPED(task)),
- "is_skipped", 0, false);
-
- if (!isnan(mres->score)) {
- ucl_object_insert_key(obj, ucl_object_fromdouble(mres->score),
- "score", 0, false);
- }
- else {
- ucl_object_insert_key(obj,
- ucl_object_fromdouble(0.0), "score", 0, false);
- }
-
- ucl_object_insert_key(obj,
- ucl_object_fromdouble(rspamd_task_get_required_score(task, mres)),
- "required_score", 0, false);
- ucl_object_insert_key(obj,
- ucl_object_fromstring(action->name),
- "action", 0, false);
-
- if (action->action_type == METRIC_ACTION_REWRITE_SUBJECT) {
- subject = rspamd_protocol_rewrite_subject(task);
-
- if (subject) {
- ucl_object_insert_key(obj, ucl_object_fromstring(subject),
- "subject", 0, false);
- }
- }
- if (action->flags & RSPAMD_ACTION_MILTER) {
- /* Treat milter action specially */
- if (action->action_type == METRIC_ACTION_DISCARD) {
- ucl_object_insert_key(obj, ucl_object_fromstring("discard"),
- "reject", 0, false);
- }
- else if (action->action_type == METRIC_ACTION_QUARANTINE) {
- ucl_object_insert_key(obj, ucl_object_fromstring("quarantine"),
- "reject", 0, false);
- }
- }
-
- /* Now handle symbols */
- if (task->cmd != CMD_CHECK) {
- /* Insert actions thresholds */
- ucl_object_t *actions_obj = ucl_object_typed_new(UCL_OBJECT);
-
- for (int i = task->result->nactions - 1; i >= 0; i--) {
- struct rspamd_action_config *action_lim = &task->result->actions_config[i];
-
- if (!isnan(action_lim->cur_limit) &&
- !(action_lim->action->flags & (RSPAMD_ACTION_NO_THRESHOLD | RSPAMD_ACTION_HAM))) {
- ucl_object_insert_key(actions_obj, ucl_object_fromdouble(action_lim->cur_limit),
- action_lim->action->name, 0, true);
- }
- }
-
- ucl_object_insert_key(obj, actions_obj, "thresholds", 0, false);
-
- /* For checkv2 we insert symbols as a separate object */
- obj = ucl_object_typed_new(UCL_OBJECT);
- }
-
- kh_foreach_value(mres->symbols, sym, {
- if (!(sym->flags & RSPAMD_SYMBOL_RESULT_IGNORED)) {
- sobj = rspamd_metric_symbol_ucl(task, sym);
- ucl_object_insert_key(obj, sobj, sym->name, 0, false);
- }
- })
-
- if (task->cmd != CMD_CHECK)
- {
- /* For checkv2 we insert symbols as a separate object */
- ucl_object_insert_key(top, obj, "symbols", 0, false);
- }
- else
- {
- /* For legacy check we just insert it as "default" all together */
- ucl_object_insert_key(top, obj, DEFAULT_METRIC, 0, false);
- }
-
- /* Handle groups if needed */
- if (task->protocol_flags & RSPAMD_TASK_PROTOCOL_FLAG_GROUPS) {
- struct rspamd_symbols_group *gr;
- gdouble gr_score;
-
- obj = ucl_object_typed_new(UCL_OBJECT);
- ucl_object_reserve(obj, kh_size(mres->sym_groups));
-
- kh_foreach(mres->sym_groups, gr, gr_score, {
- if (task->cfg->public_groups_only &&
- !(gr->flags & RSPAMD_SYMBOL_GROUP_PUBLIC)) {
- continue;
- }
- sobj = rspamd_metric_group_ucl(task, gr, gr_score);
- ucl_object_insert_key(obj, sobj, gr->name, 0, false);
- });
-
- ucl_object_insert_key(top, obj, "groups", 0, false);
- }
-
- return obj;
- }
-
- void rspamd_ucl_torspamc_output(const ucl_object_t *top,
- rspamd_fstring_t **out)
- {
- const ucl_object_t *symbols, *score,
- *required_score, *is_spam, *elt, *cur;
- ucl_object_iter_t iter = NULL;
-
- score = ucl_object_lookup(top, "score");
- required_score = ucl_object_lookup(top, "required_score");
- is_spam = ucl_object_lookup(top, "is_spam");
- rspamd_printf_fstring(out,
- "Metric: default; %s; %.2f / %.2f / 0.0\r\n",
- ucl_object_toboolean(is_spam) ? "True" : "False",
- ucl_object_todouble(score),
- ucl_object_todouble(required_score));
- elt = ucl_object_lookup(top, "action");
- if (elt != NULL) {
- rspamd_printf_fstring(out, "Action: %s\r\n",
- ucl_object_tostring(elt));
- }
-
- elt = ucl_object_lookup(top, "subject");
- if (elt != NULL) {
- rspamd_printf_fstring(out, "Subject: %s\r\n",
- ucl_object_tostring(elt));
- }
-
- symbols = ucl_object_lookup(top, "symbols");
-
- if (symbols != NULL) {
- iter = NULL;
- while ((elt = ucl_object_iterate(symbols, &iter, true)) != NULL) {
- if (elt->type == UCL_OBJECT) {
- const ucl_object_t *sym_score;
- sym_score = ucl_object_lookup(elt, "score");
- rspamd_printf_fstring(out, "Symbol: %s(%.2f)\r\n",
- ucl_object_key(elt),
- ucl_object_todouble(sym_score));
- }
- }
- }
-
- elt = ucl_object_lookup(top, "messages");
- if (elt != NULL) {
- iter = NULL;
- while ((cur = ucl_object_iterate(elt, &iter, true)) != NULL) {
- if (cur->type == UCL_STRING) {
- rspamd_printf_fstring(out, "Message: %s\r\n",
- ucl_object_tostring(cur));
- }
- }
- }
-
- elt = ucl_object_lookup(top, "message-id");
- if (elt != NULL) {
- rspamd_printf_fstring(out, "Message-ID: %s\r\n",
- ucl_object_tostring(elt));
- }
- }
-
- void rspamd_ucl_tospamc_output(const ucl_object_t *top,
- rspamd_fstring_t **out)
- {
- const ucl_object_t *symbols, *score,
- *required_score, *is_spam, *elt;
- ucl_object_iter_t iter = NULL;
- rspamd_fstring_t *f;
-
- score = ucl_object_lookup(top, "score");
- required_score = ucl_object_lookup(top, "required_score");
- is_spam = ucl_object_lookup(top, "is_spam");
- rspamd_printf_fstring(out,
- "Spam: %s ; %.2f / %.2f\r\n\r\n",
- ucl_object_toboolean(is_spam) ? "True" : "False",
- ucl_object_todouble(score),
- ucl_object_todouble(required_score));
-
- symbols = ucl_object_lookup(top, "symbols");
-
- if (symbols != NULL) {
- while ((elt = ucl_object_iterate(symbols, &iter, true)) != NULL) {
- if (elt->type == UCL_OBJECT) {
- rspamd_printf_fstring(out, "%s,",
- ucl_object_key(elt));
- }
- }
- /* Ugly hack, but the whole spamc is ugly */
- f = *out;
- if (f->str[f->len - 1] == ',') {
- f->len--;
-
- *out = rspamd_fstring_append(*out, CRLF, 2);
- }
- }
- }
-
- static void
- rspamd_protocol_output_profiling(struct rspamd_task *task,
- ucl_object_t *top)
- {
- GHashTable *tbl;
- GHashTableIter it;
- gpointer k, v;
- ucl_object_t *prof;
- gdouble val;
-
- prof = ucl_object_typed_new(UCL_OBJECT);
- tbl = rspamd_mempool_get_variable(task->task_pool, "profile");
-
- if (tbl) {
- g_hash_table_iter_init(&it, tbl);
-
- while (g_hash_table_iter_next(&it, &k, &v)) {
- val = *(gdouble *) v;
- ucl_object_insert_key(prof, ucl_object_fromdouble(val),
- (const char *) k, 0, false);
- }
- }
-
- ucl_object_insert_key(top, prof, "profile", 0, false);
- }
-
- ucl_object_t *
- rspamd_protocol_write_ucl(struct rspamd_task *task,
- enum rspamd_protocol_flags flags)
- {
- ucl_object_t *top = NULL;
- GString *dkim_sig;
- GList *dkim_sigs;
- const ucl_object_t *milter_reply;
-
- rspamd_task_set_finish_time(task);
- top = ucl_object_typed_new(UCL_OBJECT);
-
- rspamd_mempool_add_destructor(task->task_pool,
- (rspamd_mempool_destruct_t) ucl_object_unref, top);
-
- if (flags & RSPAMD_PROTOCOL_METRICS) {
- rspamd_scan_result_ucl(task, task->result, top);
- }
-
- if (flags & RSPAMD_PROTOCOL_MESSAGES) {
- if (G_UNLIKELY(task->cfg->compat_messages)) {
- const ucl_object_t *cur;
- ucl_object_t *msg_object;
- ucl_object_iter_t iter = NULL;
-
- msg_object = ucl_object_typed_new(UCL_ARRAY);
-
- while ((cur = ucl_object_iterate(task->messages, &iter, true)) != NULL) {
- if (cur->type == UCL_STRING) {
- ucl_array_append(msg_object, ucl_object_ref(cur));
- }
- }
-
- ucl_object_insert_key(top, msg_object, "messages", 0, false);
- }
- else {
- ucl_object_insert_key(top, ucl_object_ref(task->messages),
- "messages", 0, false);
- }
- }
-
- if (flags & RSPAMD_PROTOCOL_URLS && task->message) {
- if (kh_size(MESSAGE_FIELD(task, urls)) > 0) {
- ucl_object_insert_key(top,
- rspamd_urls_tree_ucl(MESSAGE_FIELD(task, urls), task),
- "urls", 0, false);
- ucl_object_insert_key(top,
- rspamd_emails_tree_ucl(MESSAGE_FIELD(task, urls), task),
- "emails", 0, false);
- }
- }
-
- if (flags & RSPAMD_PROTOCOL_EXTRA) {
- if (G_UNLIKELY(RSPAMD_TASK_IS_PROFILING(task))) {
- rspamd_protocol_output_profiling(task, top);
- }
- }
-
- if (flags & RSPAMD_PROTOCOL_BASIC) {
- ucl_object_insert_key(top,
- ucl_object_fromstring(MESSAGE_FIELD_CHECK(task, message_id)),
- "message-id", 0, false);
- ucl_object_insert_key(top,
- ucl_object_fromdouble(task->time_real_finish - task->task_timestamp),
- "time_real", 0, false);
- }
-
- if (flags & RSPAMD_PROTOCOL_DKIM) {
- dkim_sigs = rspamd_mempool_get_variable(task->task_pool,
- RSPAMD_MEMPOOL_DKIM_SIGNATURE);
-
- if (dkim_sigs) {
- if (dkim_sigs->next) {
- /* Multiple DKIM signatures */
- ucl_object_t *ar = ucl_object_typed_new(UCL_ARRAY);
-
- for (; dkim_sigs != NULL; dkim_sigs = dkim_sigs->next) {
- GString *folded_header;
- dkim_sig = (GString *) dkim_sigs->data;
-
- if (task->protocol_flags & RSPAMD_TASK_PROTOCOL_FLAG_MILTER ||
- !task->message) {
-
- folded_header = rspamd_header_value_fold(
- "DKIM-Signature", strlen("DKIM-Signature"),
- dkim_sig->str, dkim_sig->len,
- 80, RSPAMD_TASK_NEWLINES_LF, NULL);
- }
- else {
- folded_header = rspamd_header_value_fold(
- "DKIM-Signature", strlen("DKIM-Signature"),
- dkim_sig->str, dkim_sig->len,
- 80,
- MESSAGE_FIELD(task, nlines_type),
- NULL);
- }
-
- ucl_array_append(ar,
- ucl_object_fromstring_common(folded_header->str,
- folded_header->len, UCL_STRING_RAW));
- g_string_free(folded_header, TRUE);
- }
-
- ucl_object_insert_key(top,
- ar,
- "dkim-signature", 0,
- false);
- }
- else {
- /* Single DKIM signature */
- GString *folded_header;
- dkim_sig = (GString *) dkim_sigs->data;
-
- if (task->protocol_flags & RSPAMD_TASK_PROTOCOL_FLAG_MILTER) {
- folded_header = rspamd_header_value_fold(
- "DKIM-Signature", strlen("DKIM-Signature"),
- dkim_sig->str, dkim_sig->len,
- 80, RSPAMD_TASK_NEWLINES_LF, NULL);
- }
- else {
- folded_header = rspamd_header_value_fold(
- "DKIM-Signature", strlen("DKIM-Signature"),
- dkim_sig->str, dkim_sig->len,
- 80, MESSAGE_FIELD(task, nlines_type),
- NULL);
- }
-
- ucl_object_insert_key(top,
- ucl_object_fromstring_common(folded_header->str,
- folded_header->len, UCL_STRING_RAW),
- "dkim-signature", 0, false);
- g_string_free(folded_header, TRUE);
- }
- }
- }
-
- if (flags & RSPAMD_PROTOCOL_RMILTER) {
- milter_reply = rspamd_mempool_get_variable(task->task_pool,
- RSPAMD_MEMPOOL_MILTER_REPLY);
-
- if (milter_reply) {
- if (task->cmd != CMD_CHECK) {
- ucl_object_insert_key(top, ucl_object_ref(milter_reply),
- "milter", 0, false);
- }
- else {
- ucl_object_insert_key(top, ucl_object_ref(milter_reply),
- "rmilter", 0, false);
- }
- }
- }
-
- return top;
- }
-
- void rspamd_protocol_http_reply(struct rspamd_http_message *msg,
- struct rspamd_task *task, ucl_object_t **pobj)
- {
- struct rspamd_scan_result *metric_res;
- const struct rspamd_re_cache_stat *restat;
-
- ucl_object_t *top = NULL;
- rspamd_fstring_t *reply;
- gint flags = RSPAMD_PROTOCOL_DEFAULT;
- struct rspamd_action *action;
-
- /* Removed in 2.0 */
- #if 0
- GHashTableIter hiter;
- gpointer h, v;
- /* Write custom headers */
- g_hash_table_iter_init (&hiter, task->reply_headers);
- while (g_hash_table_iter_next (&hiter, &h, &v)) {
- rspamd_ftok_t *hn = h, *hv = v;
-
- rspamd_http_message_add_header (msg, hn->begin, hv->begin);
- }
- #endif
-
- flags |= RSPAMD_PROTOCOL_URLS;
-
- top = rspamd_protocol_write_ucl(task, flags);
-
- if (pobj) {
- *pobj = top;
- }
-
- if (!(task->flags & RSPAMD_TASK_FLAG_NO_LOG)) {
- rspamd_roll_history_update(task->worker->srv->history, task);
- }
- else {
- msg_debug_protocol("skip history update due to no log flag");
- }
-
- rspamd_task_write_log(task);
-
- if (task->cfg->log_flags & RSPAMD_LOG_FLAG_RE_CACHE) {
- restat = rspamd_re_cache_get_stat(task->re_rt);
- g_assert(restat != NULL);
- msg_notice_task(
- "regexp statistics: %ud pcre regexps scanned, %ud regexps matched,"
- " %ud regexps total, %ud regexps cached,"
- " %HL scanned using pcre, %HL scanned total",
- restat->regexp_checked,
- restat->regexp_matched,
- restat->regexp_total,
- restat->regexp_fast_cached,
- restat->bytes_scanned_pcre,
- restat->bytes_scanned);
- }
-
- reply = rspamd_fstring_sized_new(1000);
-
- if (msg->method < HTTP_SYMBOLS && !RSPAMD_TASK_IS_SPAMC(task)) {
- msg_debug_protocol("writing json reply");
- rspamd_ucl_emit_fstring(top, UCL_EMIT_JSON_COMPACT, &reply);
- }
- else {
- if (RSPAMD_TASK_IS_SPAMC(task)) {
- msg_debug_protocol("writing spamc legacy reply to client");
- rspamd_ucl_tospamc_output(top, &reply);
- }
- else {
- msg_debug_protocol("writing rspamc legacy reply to client");
- rspamd_ucl_torspamc_output(top, &reply);
- }
- }
-
- if (task->protocol_flags & RSPAMD_TASK_PROTOCOL_FLAG_BODY_BLOCK) {
- /* Check if we need to insert a body block */
- if (task->flags & RSPAMD_TASK_FLAG_MESSAGE_REWRITE) {
- GString *hdr_offset = g_string_sized_new(30);
-
- rspamd_printf_gstring(hdr_offset, "%z", RSPAMD_FSTRING_LEN(reply));
- rspamd_http_message_add_header(msg, MESSAGE_OFFSET_HEADER,
- hdr_offset->str);
- msg_debug_protocol("write body block at position %s",
- hdr_offset->str);
- g_string_free(hdr_offset, TRUE);
-
- /* In case of milter, we append just body, otherwise - full message */
- if (task->protocol_flags & RSPAMD_TASK_PROTOCOL_FLAG_MILTER) {
- const gchar *start;
- goffset len, hdr_off;
-
- start = task->msg.begin;
- len = task->msg.len;
-
- hdr_off = MESSAGE_FIELD(task, raw_headers_content).len;
-
- if (hdr_off < len) {
- start += hdr_off;
- len -= hdr_off;
-
- /* The problem here is that we need not end of headers, we need
- * start of body.
- *
- * Hence, we need to skip one \r\n till there is anything else in
- * a line.
- */
-
- if (*start == '\r' && len > 0) {
- start++;
- len--;
- }
-
- if (*start == '\n' && len > 0) {
- start++;
- len--;
- }
-
- msg_debug_protocol("milter version of body block size %d",
- (int) len);
- reply = rspamd_fstring_append(reply, start, len);
- }
- }
- else {
- msg_debug_protocol("general version of body block size %d",
- (int) task->msg.len);
- reply = rspamd_fstring_append(reply,
- task->msg.begin, task->msg.len);
- }
- }
- }
-
- if ((task->protocol_flags & RSPAMD_TASK_PROTOCOL_FLAG_COMPRESSED) &&
- rspamd_libs_reset_compression(task->cfg->libs_ctx)) {
- /* We can compress output */
- ZSTD_inBuffer zin;
- ZSTD_outBuffer zout;
- ZSTD_CStream *zstream;
- rspamd_fstring_t *compressed_reply;
- gsize r;
-
- zstream = task->cfg->libs_ctx->out_zstream;
- compressed_reply = rspamd_fstring_sized_new(ZSTD_compressBound(reply->len));
- zin.pos = 0;
- zin.src = reply->str;
- zin.size = reply->len;
- zout.pos = 0;
- zout.dst = compressed_reply->str;
- zout.size = compressed_reply->allocated;
-
- while (zin.pos < zin.size) {
- r = ZSTD_compressStream(zstream, &zout, &zin);
-
- if (ZSTD_isError(r)) {
- msg_err_protocol("cannot compress: %s", ZSTD_getErrorName(r));
- rspamd_fstring_free(compressed_reply);
- rspamd_http_message_set_body_from_fstring_steal(msg, reply);
-
- goto end;
- }
- }
-
- ZSTD_flushStream(zstream, &zout);
- r = ZSTD_endStream(zstream, &zout);
-
- if (ZSTD_isError(r)) {
- msg_err_protocol("cannot finalize compress: %s", ZSTD_getErrorName(r));
- rspamd_fstring_free(compressed_reply);
- rspamd_http_message_set_body_from_fstring_steal(msg, reply);
-
- goto end;
- }
-
- msg_info_protocol("writing compressed results: %z bytes before "
- "%z bytes after",
- zin.pos, zout.pos);
- compressed_reply->len = zout.pos;
- rspamd_fstring_free(reply);
- rspamd_http_message_set_body_from_fstring_steal(msg, compressed_reply);
- rspamd_http_message_add_header(msg, COMPRESSION_HEADER, "zstd");
-
- if (task->cfg->libs_ctx->out_dict &&
- task->cfg->libs_ctx->out_dict->id != 0) {
- gchar dict_str[32];
-
- rspamd_snprintf(dict_str, sizeof(dict_str), "%ud",
- task->cfg->libs_ctx->out_dict->id);
- rspamd_http_message_add_header(msg, "Dictionary", dict_str);
- }
- }
- else {
- rspamd_http_message_set_body_from_fstring_steal(msg, reply);
- }
-
- end:
- if (!(task->flags & RSPAMD_TASK_FLAG_NO_STAT)) {
- /* Update stat for default metric */
-
- msg_debug_protocol("skip stats update due to no_stat flag");
- metric_res = task->result;
-
- if (metric_res != NULL) {
-
- action = rspamd_check_action_metric(task, NULL, NULL);
- /* TODO: handle custom actions in stats */
- if (action->action_type == METRIC_ACTION_SOFT_REJECT &&
- (task->flags & RSPAMD_TASK_FLAG_GREYLISTED)) {
- /* Set stat action to greylist to display greylisted messages */
- #ifndef HAVE_ATOMIC_BUILTINS
- task->worker->srv->stat->actions_stat[METRIC_ACTION_GREYLIST]++;
- #else
- __atomic_add_fetch(&task->worker->srv->stat->actions_stat[METRIC_ACTION_GREYLIST],
- 1, __ATOMIC_RELEASE);
- #endif
- }
- else if (action->action_type < METRIC_ACTION_MAX) {
- #ifndef HAVE_ATOMIC_BUILTINS
- task->worker->srv->stat->actions_stat[action->action_type]++;
- #else
- __atomic_add_fetch(&task->worker->srv->stat->actions_stat[action->action_type],
- 1, __ATOMIC_RELEASE);
- #endif
- }
- }
-
- /* Increase counters */
- #ifndef HAVE_ATOMIC_BUILTINS
- task->worker->srv->stat->messages_scanned++;
- #else
- __atomic_add_fetch(&task->worker->srv->stat->messages_scanned,
- 1, __ATOMIC_RELEASE);
- #endif
-
- /* Set average processing time */
- guint32 slot;
- float processing_time = task->time_real_finish - task->task_timestamp;
-
- #ifndef HAVE_ATOMIC_BUILTINS
- slot = task->worker->srv->stat->avg_time.cur_slot++;
- #else
- slot = __atomic_fetch_add(&task->worker->srv->stat->avg_time.cur_slot,
- 1, __ATOMIC_RELEASE);
- #endif
- slot = slot % MAX_AVG_TIME_SLOTS;
- /* TODO: this should be atomic but it is not supported in C */
- task->worker->srv->stat->avg_time.avg_time[slot] = processing_time;
- }
- }
-
- void rspamd_protocol_write_log_pipe(struct rspamd_task *task)
- {
- struct rspamd_worker_log_pipe *lp;
- struct rspamd_protocol_log_message_sum *ls;
- lua_State *L = task->cfg->lua_state;
- struct rspamd_scan_result *mres;
- struct rspamd_symbol_result *sym;
- gint id, i;
- guint32 n = 0, nextra = 0;
- gsize sz;
- GArray *extra;
- struct rspamd_protocol_log_symbol_result er;
- struct rspamd_task **ptask;
-
- /* Get extra results from lua plugins */
- extra = g_array_new(FALSE, FALSE, sizeof(er));
-
- lua_getglobal(L, "rspamd_plugins");
- if (lua_istable(L, -1)) {
- lua_pushnil(L);
-
- while (lua_next(L, -2)) {
- if (lua_istable(L, -1)) {
- lua_pushvalue(L, -2);
- /* stack:
- * -1: copy of key
- * -2: value (module table)
- * -3: key (module name)
- * -4: global
- */
- lua_pushstring(L, "log_callback");
- lua_gettable(L, -3);
- /* stack:
- * -1: func
- * -2: copy of key
- * -3: value (module table)
- * -3: key (module name)
- * -4: global
- */
- if (lua_isfunction(L, -1)) {
- ptask = lua_newuserdata(L, sizeof(*ptask));
- *ptask = task;
- rspamd_lua_setclass(L, rspamd_task_classname, -1);
- /* stack:
- * -1: task
- * -2: func
- * -3: key copy
- * -4: value (module table)
- * -5: key (module name)
- * -6: global
- */
- msg_debug_protocol("calling for %s", lua_tostring(L, -3));
- if (lua_pcall(L, 1, 1, 0) != 0) {
- msg_info_protocol("call to log callback %s failed: %s",
- lua_tostring(L, -2), lua_tostring(L, -1));
- lua_pop(L, 1);
- /* stack:
- * -1: key copy
- * -2: value
- * -3: key
- */
- }
- else {
- /* stack:
- * -1: result
- * -2: key copy
- * -3: value
- * -4: key
- */
- if (lua_istable(L, -1)) {
- /* Another iteration */
- lua_pushnil(L);
-
- while (lua_next(L, -2)) {
- /* stack:
- * -1: value
- * -2: key
- * -3: result table (pcall)
- * -4: key copy (parent)
- * -5: value (parent)
- * -6: key (parent)
- */
- if (lua_istable(L, -1)) {
- er.id = 0;
- er.score = 0.0;
-
- lua_rawgeti(L, -1, 1);
- if (lua_isnumber(L, -1)) {
- er.id = lua_tonumber(L, -1);
- }
- lua_rawgeti(L, -2, 2);
- if (lua_isnumber(L, -1)) {
- er.score = lua_tonumber(L, -1);
- }
- /* stack:
- * -1: value[2]
- * -2: value[1]
- * -3: values
- * -4: key
- * -5: result table (pcall)
- * -6: key copy (parent)
- * -7: value (parent)
- * -8: key (parent)
- */
- lua_pop(L, 2); /* Values */
- g_array_append_val(extra, er);
- }
-
- lua_pop(L, 1); /* Value for lua_next */
- }
-
- lua_pop(L, 1); /* Table result of pcall */
- }
- else {
- msg_info_protocol("call to log callback %s returned "
- "wrong type: %s",
- lua_tostring(L, -2),
- lua_typename(L, lua_type(L, -1)));
- lua_pop(L, 1); /* Returned error */
- }
- }
- }
- else {
- lua_pop(L, 1);
- /* stack:
- * -1: key copy
- * -2: value
- * -3: key
- */
- }
- }
-
- lua_pop(L, 2); /* Top table + key copy */
- }
-
- lua_pop(L, 1); /* rspamd_plugins global */
- }
- else {
- lua_pop(L, 1);
- }
-
- nextra = extra->len;
-
- LL_FOREACH(task->cfg->log_pipes, lp)
- {
- if (lp->fd != -1) {
- switch (lp->type) {
- case RSPAMD_LOG_PIPE_SYMBOLS:
- mres = task->result;
-
- if (mres) {
- n = kh_size(mres->symbols);
- sz = sizeof(*ls) +
- sizeof(struct rspamd_protocol_log_symbol_result) *
- (n + nextra);
- ls = g_malloc0(sz);
-
- /* Handle settings id */
-
- if (task->settings_elt) {
- ls->settings_id = task->settings_elt->id;
- }
- else {
- ls->settings_id = 0;
- }
-
- ls->score = mres->score;
- ls->required_score = rspamd_task_get_required_score(task,
- mres);
- ls->nresults = n;
- ls->nextra = nextra;
-
- i = 0;
-
- kh_foreach_value(mres->symbols, sym, {
- id = rspamd_symcache_find_symbol(task->cfg->cache,
- sym->name);
-
- if (id >= 0) {
- ls->results[i].id = id;
- ls->results[i].score = sym->score;
- }
- else {
- ls->results[i].id = -1;
- ls->results[i].score = 0.0;
- }
-
- i++;
- });
-
- memcpy(&ls->results[n], extra->data, nextra * sizeof(er));
- }
- else {
- sz = sizeof(*ls);
- ls = g_malloc0(sz);
- ls->nresults = 0;
- }
-
- /* We don't really care about return value here */
- if (write(lp->fd, ls, sz) == -1) {
- msg_info_protocol("cannot write to log pipe: %s",
- strerror(errno));
- }
-
- g_free(ls);
- break;
- default:
- msg_err_protocol("unknown log format %d", lp->type);
- break;
- }
- }
- }
-
- g_array_free(extra, TRUE);
- }
-
- void rspamd_protocol_write_reply(struct rspamd_task *task, ev_tstamp timeout)
- {
- struct rspamd_http_message *msg;
- const gchar *ctype = "application/json";
- rspamd_fstring_t *reply;
-
- msg = rspamd_http_new_message(HTTP_RESPONSE);
-
- if (rspamd_http_connection_is_encrypted(task->http_conn)) {
- msg_info_protocol("<%s> writing encrypted reply",
- MESSAGE_FIELD_CHECK(task, message_id));
- }
-
- /* Compatibility */
- if (task->cmd == CMD_CHECK_RSPAMC) {
- msg->method = HTTP_SYMBOLS;
- }
- else if (task->cmd == CMD_CHECK_SPAMC) {
- msg->method = HTTP_SYMBOLS;
- msg->flags |= RSPAMD_HTTP_FLAG_SPAMC;
- }
-
- if (task->err != NULL) {
- msg_debug_protocol("writing error reply to client");
- ucl_object_t *top = NULL;
-
- top = ucl_object_typed_new(UCL_OBJECT);
- msg->code = 500 + task->err->code % 100;
- msg->status = rspamd_fstring_new_init(task->err->message,
- strlen(task->err->message));
- ucl_object_insert_key(top, ucl_object_fromstring(task->err->message),
- "error", 0, false);
- ucl_object_insert_key(top,
- ucl_object_fromstring(g_quark_to_string(task->err->domain)),
- "error_domain", 0, false);
- reply = rspamd_fstring_sized_new(256);
- rspamd_ucl_emit_fstring(top, UCL_EMIT_JSON_COMPACT, &reply);
- ucl_object_unref(top);
-
- /* We also need to validate utf8 */
- if (rspamd_fast_utf8_validate(reply->str, reply->len) != 0) {
- gsize valid_len;
- gchar *validated;
-
- /* We copy reply several times here but it should be a rare case */
- validated = rspamd_str_make_utf_valid(reply->str, reply->len,
- &valid_len, task->task_pool);
- rspamd_http_message_set_body(msg, validated, valid_len);
- rspamd_fstring_free(reply);
- }
- else {
- rspamd_http_message_set_body_from_fstring_steal(msg, reply);
- }
- }
- else {
- msg->status = rspamd_fstring_new_init("OK", 2);
-
- switch (task->cmd) {
- case CMD_CHECK:
- case CMD_CHECK_RSPAMC:
- case CMD_CHECK_SPAMC:
- case CMD_SKIP:
- case CMD_CHECK_V2:
- rspamd_protocol_http_reply(msg, task, NULL);
- rspamd_protocol_write_log_pipe(task);
- break;
- case CMD_PING:
- msg_debug_protocol("writing pong to client");
- rspamd_http_message_set_body(msg, "pong" CRLF, 6);
- ctype = "text/plain";
- break;
- default:
- msg_err_protocol("BROKEN");
- break;
- }
- }
-
- ev_now_update(task->event_loop);
- msg->date = ev_time();
-
- rspamd_http_connection_reset(task->http_conn);
- rspamd_http_connection_write_message(task->http_conn, msg, NULL,
- ctype, task, timeout);
-
- task->processed_stages |= RSPAMD_TASK_STAGE_REPLIED;
- }
|