diff options
Diffstat (limited to 'src/plugins')
29 files changed, 2010 insertions, 2192 deletions
diff --git a/src/plugins/chartable.c b/src/plugins/chartable.c index 414647153..b6e42457a 100644 --- a/src/plugins/chartable.c +++ b/src/plugins/chartable.c @@ -85,8 +85,12 @@ chartable_get_context (struct rspamd_config *cfg) chartable_module.ctx_offset); } -static void chartable_symbol_callback (struct rspamd_task *task, void *unused); -static void chartable_url_symbol_callback (struct rspamd_task *task, void *unused); +static void chartable_symbol_callback (struct rspamd_task *task, + struct rspamd_symcache_item *item, + void *unused); +static void chartable_url_symbol_callback (struct rspamd_task *task, + struct rspamd_symcache_item *item, + void *unused); gint chartable_module_init (struct rspamd_config *cfg, struct module_ctx **ctx) @@ -146,14 +150,14 @@ chartable_module_config (struct rspamd_config *cfg) chartable_module_ctx->threshold = DEFAULT_THRESHOLD; } - rspamd_symbols_cache_add_symbol (cfg->cache, + rspamd_symcache_add_symbol (cfg->cache, chartable_module_ctx->symbol, 0, chartable_symbol_callback, NULL, SYMBOL_TYPE_NORMAL, -1); - rspamd_symbols_cache_add_symbol (cfg->cache, + rspamd_symcache_add_symbol (cfg->cache, chartable_module_ctx->url_symbol, 0, chartable_url_symbol_callback, @@ -354,12 +358,12 @@ rspamd_chartable_process_word_utf (struct rspamd_task *task, guint *ncap, struct chartable_ctx *chartable_module_ctx) { - const gchar *p, *end; + const UChar32 *p, *end; gdouble badness = 0.0; UChar32 uc; UBlockCode sc; gint last_is_latin = -1; - guint same_script_count = 0, nsym = 0, i = 0; + guint same_script_count = 0, nsym = 0; enum { start_process = 0, got_alpha, @@ -367,13 +371,13 @@ rspamd_chartable_process_word_utf (struct rspamd_task *task, got_unknown, } state = start_process, prev_state = start_process; - p = w->begin; - end = p + w->len; + p = w->unicode.begin; + end = p + w->unicode.len; /* We assume that w is normalized */ - while (p + i < end) { - U8_NEXT (p, i, w->len, uc); + while (p < end) { + uc = *p++; if (((gint32)uc) < 0) { break; @@ -460,7 +464,8 @@ rspamd_chartable_process_word_utf (struct rspamd_task *task, } } - msg_debug_chartable ("word %*s, badness: %.2f", (gint)w->len, w->begin, + msg_debug_chartable ("word %*s, badness: %.2f", + (gint)w->normalized.len, w->normalized.begin, badness); return badness; @@ -486,11 +491,11 @@ rspamd_chartable_process_word_ascii (struct rspamd_task *task, got_unknown, } state = start_process; - p = w->begin; - end = p + w->len; + p = w->normalized.begin; + end = p + w->normalized.len; last_sc = 0; - if (w->len > chartable_module_ctx->max_word_len) { + if (w->normalized.len > chartable_module_ctx->max_word_len) { return 0.0; } @@ -545,7 +550,8 @@ rspamd_chartable_process_word_ascii (struct rspamd_task *task, badness = 4.0; } - msg_debug_chartable ("word %*s, badness: %.2f", (gint)w->len, w->begin, + msg_debug_chartable ("word %*s, badness: %.2f", + (gint)w->normalized.len, w->normalized.begin, badness); return badness; @@ -568,9 +574,9 @@ rspamd_chartable_process_part (struct rspamd_task *task, for (i = 0; i < part->utf_words->len; i++) { w = &g_array_index (part->utf_words, rspamd_stat_token_t, i); - if (w->len > 0 && (w->flags & RSPAMD_STAT_TOKEN_FLAG_TEXT)) { + if ((w->flags & RSPAMD_STAT_TOKEN_FLAG_TEXT)) { - if (IS_PART_UTF (part)) { + if (w->flags & RSPAMD_STAT_TOKEN_FLAG_UTF) { cur_score += rspamd_chartable_process_word_utf (task, w, FALSE, &ncap, chartable_module_ctx); } @@ -588,7 +594,7 @@ rspamd_chartable_process_part (struct rspamd_task *task, */ part->capital_letters += ncap; - cur_score /= (gdouble)part->utf_words->len; + cur_score /= (gdouble)part->nwords; if (cur_score > 2.0) { cur_score = 2.0; @@ -602,7 +608,9 @@ rspamd_chartable_process_part (struct rspamd_task *task, } static void -chartable_symbol_callback (struct rspamd_task *task, void *unused) +chartable_symbol_callback (struct rspamd_task *task, + struct rspamd_symcache_item *item, + void *unused) { guint i; struct rspamd_mime_text_part *part; @@ -613,59 +621,40 @@ chartable_symbol_callback (struct rspamd_task *task, void *unused) rspamd_chartable_process_part (task, part, chartable_module_ctx); } - if (task->subject != NULL) { - GArray *words; + if (task->meta_words != NULL) { rspamd_stat_token_t *w; - guint i; - gdouble cur_score = 0.0; - - UText utxt = UTEXT_INITIALIZER; - UErrorCode uc_err = U_ZERO_ERROR; - gsize slen = strlen (task->subject); - - utext_openUTF8 (&utxt, - task->subject, - slen, - &uc_err); - - words = rspamd_tokenize_text (task->subject, slen, - &utxt, - RSPAMD_TOKENIZE_UTF, - NULL, - NULL, - NULL); - - if (words && words->len > 0) { - for (i = 0; i < words->len; i++) { - w = &g_array_index (words, rspamd_stat_token_t, i); - cur_score += rspamd_chartable_process_word_utf (task, w, FALSE, - NULL, chartable_module_ctx); - } + gdouble cur_score = 0; + gsize arlen = task->meta_words->len; - cur_score /= (gdouble)words->len; - - if (cur_score > 2.0) { - cur_score = 2.0; - } + for (i = 0; i < arlen; i++) { + w = &g_array_index (task->meta_words, rspamd_stat_token_t, i); + cur_score += rspamd_chartable_process_word_utf (task, w, FALSE, + NULL, chartable_module_ctx); + } - if (cur_score > chartable_module_ctx->threshold) { - rspamd_task_insert_result (task, chartable_module_ctx->symbol, - cur_score, "subject"); + cur_score /= (gdouble)arlen; - } + if (cur_score > 2.0) { + cur_score = 2.0; } - if (words) { - g_array_free (words, TRUE); - } + if (cur_score > chartable_module_ctx->threshold) { + rspamd_task_insert_result (task, chartable_module_ctx->symbol, + cur_score, "subject"); - utext_close (&utxt); + } } + + rspamd_symcache_finalize_item (task, item); } static void -chartable_url_symbol_callback (struct rspamd_task *task, void *unused) +chartable_url_symbol_callback (struct rspamd_task *task, + struct rspamd_symcache_item *item, + void *unused) { + /* XXX: TODO: unbreak module once URLs unicode project is over */ +#if 0 struct rspamd_url *u; GHashTableIter it; gpointer k, v; @@ -684,10 +673,10 @@ chartable_url_symbol_callback (struct rspamd_task *task, void *unused) } if (u->hostlen > 0) { - w.begin = u->host; - w.len = u->hostlen; + w.stemmed.begin = u->host; + w.stemmed.len = u->hostlen; - if (g_utf8_validate (w.begin, w.len, NULL)) { + if (g_utf8_validate (w.stemmed.begin, w.stemmed.len, NULL)) { cur_score += rspamd_chartable_process_word_utf (task, &w, TRUE, NULL, chartable_module_ctx); } @@ -709,10 +698,10 @@ chartable_url_symbol_callback (struct rspamd_task *task, void *unused) } if (u->hostlen > 0) { - w.begin = u->host; - w.len = u->hostlen; + w.stemmed.begin = u->host; + w.stemmed.len = u->hostlen; - if (g_utf8_validate (w.begin, w.len, NULL)) { + if (g_utf8_validate (w.stemmed.begin, w.stemmed.len, NULL)) { cur_score += rspamd_chartable_process_word_utf (task, &w, TRUE, NULL, chartable_module_ctx); } @@ -728,4 +717,6 @@ chartable_url_symbol_callback (struct rspamd_task *task, void *unused) cur_score, NULL); } +#endif + rspamd_symcache_finalize_item (task, item); } diff --git a/src/plugins/dkim_check.c b/src/plugins/dkim_check.c index 1784612f0..6ea567178 100644 --- a/src/plugins/dkim_check.c +++ b/src/plugins/dkim_check.c @@ -50,6 +50,8 @@ #define DEFAULT_TIME_JITTER 60 #define DEFAULT_MAX_SIGS 5 +static const gchar *M = "rspamd dkim plugin"; + static const gchar default_sign_headers[] = "" "(o)from:(o)sender:(o)reply-to:(o)subject:(o)date:(o)message-id:" "(o)to:(o)cc:(o)mime-version:(o)content-type:(o)content-transfer-encoding:" @@ -83,15 +85,19 @@ struct dkim_check_result { rspamd_dkim_context_t *ctx; rspamd_dkim_key_t *key; struct rspamd_task *task; - gint res; + struct rspamd_dkim_check_result *res; gdouble mult_allow; gdouble mult_deny; - struct rspamd_async_watcher *w; + struct rspamd_symcache_item *item; struct dkim_check_result *next, *prev, *first; }; -static void dkim_symbol_callback (struct rspamd_task *task, void *unused); -static void dkim_sign_callback (struct rspamd_task *task, void *unused); +static void dkim_symbol_callback (struct rspamd_task *task, + struct rspamd_symcache_item *item, + void *unused); +static void dkim_sign_callback (struct rspamd_task *task, + struct rspamd_symcache_item *item, + void *unused); static gint lua_dkim_sign_handler (lua_State *L); static gint lua_dkim_verify_handler (lua_State *L); @@ -498,50 +504,50 @@ dkim_module_config (struct rspamd_config *cfg) return TRUE; } - cb_id = rspamd_symbols_cache_add_symbol (cfg->cache, + cb_id = rspamd_symcache_add_symbol (cfg->cache, "DKIM_CHECK", 0, dkim_symbol_callback, NULL, SYMBOL_TYPE_CALLBACK, -1); - rspamd_symbols_cache_add_symbol (cfg->cache, + rspamd_symcache_add_symbol (cfg->cache, dkim_module_ctx->symbol_reject, 0, NULL, NULL, - SYMBOL_TYPE_VIRTUAL|SYMBOL_TYPE_FINE, + SYMBOL_TYPE_VIRTUAL | SYMBOL_TYPE_FINE, cb_id); - rspamd_symbols_cache_add_symbol (cfg->cache, + rspamd_symcache_add_symbol (cfg->cache, dkim_module_ctx->symbol_na, 0, NULL, NULL, - SYMBOL_TYPE_VIRTUAL|SYMBOL_TYPE_FINE, + SYMBOL_TYPE_VIRTUAL | SYMBOL_TYPE_FINE, cb_id); - rspamd_symbols_cache_add_symbol (cfg->cache, + rspamd_symcache_add_symbol (cfg->cache, dkim_module_ctx->symbol_permfail, 0, NULL, NULL, - SYMBOL_TYPE_VIRTUAL|SYMBOL_TYPE_FINE, + SYMBOL_TYPE_VIRTUAL | SYMBOL_TYPE_FINE, cb_id); - rspamd_symbols_cache_add_symbol (cfg->cache, + rspamd_symcache_add_symbol (cfg->cache, dkim_module_ctx->symbol_tempfail, 0, NULL, NULL, - SYMBOL_TYPE_VIRTUAL|SYMBOL_TYPE_FINE, + SYMBOL_TYPE_VIRTUAL | SYMBOL_TYPE_FINE, cb_id); - rspamd_symbols_cache_add_symbol (cfg->cache, + rspamd_symcache_add_symbol (cfg->cache, dkim_module_ctx->symbol_allow, 0, NULL, NULL, - SYMBOL_TYPE_VIRTUAL|SYMBOL_TYPE_FINE, + SYMBOL_TYPE_VIRTUAL | SYMBOL_TYPE_FINE, cb_id); - rspamd_symbols_cache_add_symbol (cfg->cache, + rspamd_symcache_add_symbol (cfg->cache, "DKIM_TRACE", 0, NULL, NULL, - SYMBOL_TYPE_VIRTUAL|SYMBOL_TYPE_NOSTAT, + SYMBOL_TYPE_VIRTUAL | SYMBOL_TYPE_NOSTAT, cb_id); rspamd_config_add_symbol (cfg, "DKIM_TRACE", @@ -579,12 +585,12 @@ dkim_module_config (struct rspamd_config *cfg) cfg->cfg_pool, dkim_module_ctx->sign_condition_ref); - rspamd_symbols_cache_add_symbol (cfg->cache, + rspamd_symcache_add_symbol (cfg->cache, "DKIM_SIGN", 0, dkim_sign_callback, NULL, - SYMBOL_TYPE_CALLBACK|SYMBOL_TYPE_FINE, + SYMBOL_TYPE_CALLBACK | SYMBOL_TYPE_FINE, -1); msg_info_config ("init condition script for DKIM signing"); @@ -592,7 +598,7 @@ dkim_module_config (struct rspamd_config *cfg) * Allow dkim signing to be executed only after dkim check */ if (cb_id > 0) { - rspamd_symbols_cache_add_delayed_dependency (cfg->cache, + rspamd_symcache_add_delayed_dependency (cfg->cache, "DKIM_SIGN", dkim_module_ctx->symbol_reject); } @@ -816,7 +822,7 @@ lua_dkim_sign_handler (lua_State *L) if (pubkey != NULL) { /* Also check if private and public keys match */ rspamd_dkim_key_t *pk; - gsize keylen = strlen (pubkey); + keylen = strlen (pubkey); pk = rspamd_dkim_parse_key (pubkey, &keylen, NULL); @@ -942,6 +948,7 @@ dkim_module_check (struct dkim_check_result *res) const gchar *strict_value; struct dkim_check_result *first, *cur = NULL; struct dkim_ctx *dkim_module_ctx = dkim_get_context (res->task->cfg); + struct rspamd_task *task = res->task; first = res->first; @@ -950,8 +957,8 @@ dkim_module_check (struct dkim_check_result *res) continue; } - if (cur->key != NULL && cur->res == -1) { - cur->res = rspamd_dkim_check (cur->ctx, cur->key, cur->task); + if (cur->key != NULL && cur->res == NULL) { + cur->res = rspamd_dkim_check (cur->ctx, cur->key, task); if (dkim_module_ctx->dkim_domains != NULL) { /* Perform strict check */ @@ -972,60 +979,85 @@ dkim_module_check (struct dkim_check_result *res) if (cur->ctx == NULL) { continue; } - if (cur->res == -1) { + if (cur->res == NULL) { /* Still need a key */ all_done = FALSE; } } if (all_done) { + /* Create zero terminated array of results */ + struct rspamd_dkim_check_result **pres; + guint nres = 0, i = 0; + + DL_FOREACH (first, cur) { + if (cur->ctx == NULL || cur->res == NULL) { + continue; + } + + nres ++; + } + + pres = rspamd_mempool_alloc (task->task_pool, sizeof (*pres) * (nres + 1)); + pres[nres] = NULL; + DL_FOREACH (first, cur) { const gchar *symbol = NULL, *trace = NULL; gdouble symbol_weight = 1.0; - if (cur->ctx == NULL) { + if (cur->ctx == NULL || cur->res == NULL) { continue; } - if (cur->res == DKIM_REJECT) { + + pres[i++] = cur->res; + + if (cur->res->rcode == DKIM_REJECT) { symbol = dkim_module_ctx->symbol_reject; trace = "-"; symbol_weight = cur->mult_deny * 1.0; } - else if (cur->res == DKIM_CONTINUE) { + else if (cur->res->rcode == DKIM_CONTINUE) { symbol = dkim_module_ctx->symbol_allow; trace = "+"; symbol_weight = cur->mult_allow * 1.0; } - else if (cur->res == DKIM_PERM_ERROR) { + else if (cur->res->rcode == DKIM_PERM_ERROR) { trace = "~"; symbol = dkim_module_ctx->symbol_permfail; } - else if (cur->res == DKIM_TRYAGAIN) { + else if (cur->res->rcode == DKIM_TRYAGAIN) { trace = "?"; symbol = dkim_module_ctx->symbol_tempfail; } if (symbol != NULL) { const gchar *domain = rspamd_dkim_get_domain (cur->ctx); + const gchar *selector = rspamd_dkim_get_selector (cur->ctx); gsize tracelen; gchar *tracebuf; - tracelen = strlen (domain) + 3; /* :<trace>\0 */ - tracebuf = rspamd_mempool_alloc (cur->task->task_pool, + tracelen = strlen (domain) + strlen (selector) + 4; + tracebuf = rspamd_mempool_alloc (task->task_pool, tracelen); rspamd_snprintf (tracebuf, tracelen, "%s:%s", domain, trace); rspamd_task_insert_result (cur->task, - symbol, - symbol_weight, - domain); - rspamd_task_insert_result (cur->task, "DKIM_TRACE", 0.0, tracebuf); + + rspamd_snprintf (tracebuf, tracelen, "%s:s=%s", domain, selector); + rspamd_task_insert_result (task, + symbol, + symbol_weight, + tracebuf); } + } - rspamd_session_watcher_pop (res->task->s, res->w); + + rspamd_mempool_set_variable (task->task_pool, + RSPAMD_MEMPOOL_DKIM_CHECK_RESULTS, + pres, NULL); } } @@ -1064,10 +1096,12 @@ dkim_module_key_handler (rspamd_dkim_key_t *key, if (err != NULL) { if (err->code == DKIM_SIGERROR_NOKEY) { - res->res = DKIM_TRYAGAIN; + res->res = rspamd_dkim_create_result (ctx, DKIM_TRYAGAIN, task); + res->res->fail_reason = "DNS error when getting key"; } else { - res->res = DKIM_PERM_ERROR; + res->res = rspamd_dkim_create_result (ctx, DKIM_PERM_ERROR, task); + res->res->fail_reason = "invalid DKIM record"; } } } @@ -1080,7 +1114,9 @@ dkim_module_key_handler (rspamd_dkim_key_t *key, } static void -dkim_symbol_callback (struct rspamd_task *task, void *unused) +dkim_symbol_callback (struct rspamd_task *task, + struct rspamd_symcache_item *item, + void *unused) { GPtrArray *hlist; rspamd_dkim_context_t *ctx; @@ -1112,15 +1148,21 @@ dkim_symbol_callback (struct rspamd_task *task, void *unused) || (!dkim_module_ctx->check_local && rspamd_inet_address_is_local (task->from_addr, TRUE))) { msg_info_task ("skip DKIM checks for local networks and authorized users"); + rspamd_symcache_finalize_item (task, item); + return; } /* Check whitelist */ if (rspamd_match_radix_map_addr (dkim_module_ctx->whitelist_ip, task->from_addr) != NULL) { msg_info_task ("skip DKIM checks for whitelisted address"); + rspamd_symcache_finalize_item (task, item); + return; } + rspamd_symcache_item_async_inc (task, item, M); + /* Now check if a message has its signature */ hlist = rspamd_message_get_header_array (task, RSPAMD_DKIM_SIGNHEADER, @@ -1137,10 +1179,11 @@ dkim_symbol_callback (struct rspamd_task *task, void *unused) cur = rspamd_mempool_alloc0 (task->task_pool, sizeof (*cur)); cur->first = res; - cur->res = -1; + cur->res = NULL; cur->task = task; cur->mult_allow = 1.0; cur->mult_deny = 1.0; + cur->item = item; ctx = rspamd_create_dkim_context (rh->decoded, task->task_pool, @@ -1148,6 +1191,15 @@ dkim_symbol_callback (struct rspamd_task *task, void *unused) RSPAMD_DKIM_NORMAL, &err); + if (res == NULL) { + res = cur; + res->first = res; + res->prev = res; + } + else { + DL_APPEND (res, cur); + } + if (ctx == NULL) { if (err != NULL) { msg_info_task ("<%s> cannot parse DKIM context: %e", @@ -1197,17 +1249,6 @@ dkim_symbol_callback (struct rspamd_task *task, void *unused) } } - if (res == NULL) { - res = cur; - res->first = res; - res->prev = res; - res->w = rspamd_session_get_watcher (task->s); - } - else { - cur->w = res->w; - DL_APPEND (res, cur); - } - checked ++; if (checked > dkim_module_ctx->max_sigs) { @@ -1226,13 +1267,16 @@ dkim_symbol_callback (struct rspamd_task *task, void *unused) } if (res != NULL) { - rspamd_session_watcher_push (task->s); dkim_module_check (res); } + + rspamd_symcache_item_async_dec_check (task, item, M); } static void -dkim_sign_callback (struct rspamd_task *task, void *unused) +dkim_sign_callback (struct rspamd_task *task, + struct rspamd_symcache_item *item, + void *unused) { lua_State *L; struct rspamd_task **ptask; @@ -1285,6 +1329,7 @@ dkim_sign_callback (struct rspamd_task *task, void *unused) msg_err_task ("invalid return value from sign condition: %e", err); g_error_free (err); + rspamd_symcache_finalize_item (task, item); return; } @@ -1307,6 +1352,7 @@ dkim_sign_callback (struct rspamd_task *task, void *unused) lua_settop (L, 0); luaL_error (L, "unknown key type: %s", key_type); + rspamd_symcache_finalize_item (task, item); return; } @@ -1321,6 +1367,7 @@ dkim_sign_callback (struct rspamd_task *task, void *unused) if (arc_idx == 0) { lua_settop (L, 0); luaL_error (L, "no arc idx specified"); + rspamd_symcache_finalize_item (task, item); return; } @@ -1330,12 +1377,14 @@ dkim_sign_callback (struct rspamd_task *task, void *unused) if (arc_cv == NULL) { lua_settop (L, 0); luaL_error (L, "no arc cv specified"); + rspamd_symcache_finalize_item (task, item); return; } if (arc_idx == 0) { lua_settop (L, 0); luaL_error (L, "no arc idx specified"); + rspamd_symcache_finalize_item (task, item); return; } @@ -1344,6 +1393,7 @@ dkim_sign_callback (struct rspamd_task *task, void *unused) lua_settop (L, 0); luaL_error (L, "unknown sign type: %s", sign_type_str); + rspamd_symcache_finalize_item (task, item); return; } @@ -1376,6 +1426,7 @@ dkim_sign_callback (struct rspamd_task *task, void *unused) msg_err_task ("cannot load dkim key %s: %e", lru_key, err); g_error_free (err); + rspamd_symcache_finalize_item (task, item); return; } @@ -1400,6 +1451,7 @@ dkim_sign_callback (struct rspamd_task *task, void *unused) msg_err_task ("cannot load dkim key %s: %e", lru_key, err); g_error_free (err); + rspamd_symcache_finalize_item (task, item); return; } @@ -1419,6 +1471,7 @@ dkim_sign_callback (struct rspamd_task *task, void *unused) msg_err_task ("cannot create sign context: %e", err); g_error_free (err); + rspamd_symcache_finalize_item (task, item); return; } @@ -1446,9 +1499,13 @@ dkim_sign_callback (struct rspamd_task *task, void *unused) if (!sign) { msg_debug_task ("skip signing as dkim condition callback returned" " false"); + rspamd_symcache_finalize_item (task, item); + return; } } + + rspamd_symcache_finalize_item (task, item); } struct rspamd_dkim_lua_verify_cbdata { @@ -1461,7 +1518,7 @@ struct rspamd_dkim_lua_verify_cbdata { static void dkim_module_lua_push_verify_result (struct rspamd_dkim_lua_verify_cbdata *cbd, - gint code, GError *err) + struct rspamd_dkim_check_result *res, GError *err) { struct rspamd_task **ptask, *task; const gchar *error_str = "unknown error"; @@ -1469,7 +1526,7 @@ dkim_module_lua_push_verify_result (struct rspamd_dkim_lua_verify_cbdata *cbd, task = cbd->task; - switch (code) { + switch (res->rcode) { case DKIM_CONTINUE: error_str = NULL; success = TRUE; @@ -1525,13 +1582,19 @@ dkim_module_lua_push_verify_result (struct rspamd_dkim_lua_verify_cbdata *cbd, lua_pushstring (cbd->L, error_str); if (cbd->ctx) { - lua_pushstring (cbd->L, rspamd_dkim_get_domain (cbd->ctx)); + lua_pushstring (cbd->L, res->domain); + lua_pushstring (cbd->L, res->selector); + lua_pushstring (cbd->L, res->short_b); + lua_pushstring (cbd->L, res->fail_reason); } else { lua_pushnil (cbd->L); + lua_pushnil (cbd->L); + lua_pushnil (cbd->L); + lua_pushnil (cbd->L); } - if (lua_pcall (cbd->L, 4, 0, 0) != 0) { + if (lua_pcall (cbd->L, 7, 0, 0) != 0) { msg_err_task ("call to verify callback failed: %s", lua_tostring (cbd->L, -1)); lua_pop (cbd->L, 1); @@ -1542,14 +1605,14 @@ dkim_module_lua_push_verify_result (struct rspamd_dkim_lua_verify_cbdata *cbd, static void dkim_module_lua_on_key (rspamd_dkim_key_t *key, - gsize keylen, - rspamd_dkim_context_t *ctx, - gpointer ud, - GError *err) + gsize keylen, + rspamd_dkim_context_t *ctx, + gpointer ud, + GError *err) { struct rspamd_dkim_lua_verify_cbdata *cbd = ud; struct rspamd_task *task; - gint ret; + struct rspamd_dkim_check_result *res; struct dkim_ctx *dkim_module_ctx; task = cbd->task; @@ -1576,16 +1639,22 @@ dkim_module_lua_on_key (rspamd_dkim_key_t *key, if (err != NULL) { if (err->code == DKIM_SIGERROR_NOKEY) { - dkim_module_lua_push_verify_result (cbd, DKIM_TRYAGAIN, err); + res = rspamd_dkim_create_result (ctx, DKIM_TRYAGAIN, task); + res->fail_reason = "DNS error when getting key"; + } else { - dkim_module_lua_push_verify_result (cbd, DKIM_PERM_ERROR, err); + res = rspamd_dkim_create_result (ctx, DKIM_PERM_ERROR, task); + res->fail_reason = "invalid DKIM record"; } } else { - dkim_module_lua_push_verify_result (cbd, DKIM_TRYAGAIN, NULL); + res = rspamd_dkim_create_result (ctx, DKIM_TRYAGAIN, task); + res->fail_reason = "DNS error when getting key"; } + dkim_module_lua_push_verify_result (cbd, res, err); + if (err) { g_error_free (err); } @@ -1593,8 +1662,8 @@ dkim_module_lua_on_key (rspamd_dkim_key_t *key, return; } - ret = rspamd_dkim_check (cbd->ctx, cbd->key, cbd->task); - dkim_module_lua_push_verify_result (cbd, ret, NULL); + res = rspamd_dkim_check (cbd->ctx, cbd->key, cbd->task); + dkim_module_lua_push_verify_result (cbd, res, NULL); } static gint @@ -1605,7 +1674,7 @@ lua_dkim_verify_handler (lua_State *L) rspamd_dkim_context_t *ctx; struct rspamd_dkim_lua_verify_cbdata *cbd; rspamd_dkim_key_t *key; - gint ret; + struct rspamd_dkim_check_result *ret; GError *err = NULL; const gchar *type_str = NULL; enum rspamd_dkim_type type = RSPAMD_DKIM_NORMAL; diff --git a/src/plugins/fuzzy_check.c b/src/plugins/fuzzy_check.c index 23aeacb66..bfb6b4d72 100644 --- a/src/plugins/fuzzy_check.c +++ b/src/plugins/fuzzy_check.c @@ -45,11 +45,9 @@ #include "libutil/http_private.h" #include "libstat/stat_api.h" #include <math.h> +#include <src/libmime/message.h> #define DEFAULT_SYMBOL "R_FUZZY_HASH" -#define DEFAULT_UPSTREAM_ERROR_TIME 10 -#define DEFAULT_UPSTREAM_DEAD_TIME 300 -#define DEFAULT_UPSTREAM_MAXERRORS 10 #define DEFAULT_IO_TIMEOUT 500 #define DEFAULT_RETRANSMITS 3 @@ -58,6 +56,7 @@ #define RSPAMD_FUZZY_PLUGIN_VERSION RSPAMD_FUZZY_VERSION static const gint rspamd_fuzzy_hash_len = 5; +static const gchar *M = "fuzzy check"; struct fuzzy_ctx; struct fuzzy_mapping { @@ -66,33 +65,26 @@ struct fuzzy_mapping { double weight; }; -struct fuzzy_mime_type { - rspamd_regexp_t *type_re; - rspamd_regexp_t *subtype_re; -}; - struct fuzzy_rule { struct upstream_list *servers; const gchar *symbol; const gchar *algorithm_str; const gchar *name; + const ucl_object_t *ucl_obj; enum rspamd_shingle_alg alg; GHashTable *mappings; - GPtrArray *mime_types; GPtrArray *fuzzy_headers; GString *hash_key; GString *shingles_key; struct rspamd_cryptobox_keypair *local_key; struct rspamd_cryptobox_pubkey *peer_key; double max_score; - guint32 min_bytes; gboolean read_only; gboolean skip_unknown; - gboolean fuzzy_images; - gboolean short_text_direct_hash; gint learn_condition_cb; struct rspamd_hash_map_helper *skip_map; struct fuzzy_ctx *ctx; + gint lua_id; }; struct fuzzy_ctx { @@ -101,15 +93,13 @@ struct fuzzy_ctx { GPtrArray *fuzzy_rules; struct rspamd_config *cfg; const gchar *default_symbol; - guint32 min_hash_len; struct rspamd_radix_map_helper *whitelist; struct rspamd_keypair_cache *keypairs_cache; - gdouble text_multiplier; - guint32 min_bytes; - guint32 min_height; - guint32 min_width; guint32 io_timeout; guint32 retransmits; + gint check_mime_part_ref; /* Lua callback */ + gint process_rule_ref; /* Lua callback */ + gint cleanup_rules_ref; gboolean enabled; }; @@ -131,8 +121,8 @@ struct fuzzy_client_session { GPtrArray *commands; GPtrArray *results; struct rspamd_task *task; + struct rspamd_symcache_item *item; struct upstream *server; - rspamd_inet_addr_t *addr; struct fuzzy_rule *rule; struct event ev; struct event timev; @@ -149,7 +139,6 @@ struct fuzzy_learn_session { struct rspamd_http_connection_entry *http_entry; struct rspamd_async_session *session; struct upstream *server; - rspamd_inet_addr_t *addr; struct fuzzy_rule *rule; struct rspamd_task *task; struct event ev; @@ -170,14 +159,17 @@ struct fuzzy_learn_session { struct fuzzy_cmd_io { guint32 tag; guint32 flags; - struct rspamd_fuzzy_cmd cmd; struct iovec io; + struct rspamd_mime_part *part; + struct rspamd_fuzzy_cmd cmd; }; static const char *default_headers = "Subject,Content-Type,Reply-To,X-Mailer"; -static void fuzzy_symbol_callback (struct rspamd_task *task, void *unused); +static void fuzzy_symbol_callback (struct rspamd_task *task, + struct rspamd_symcache_item *item, + void *unused); /* Initialization */ gint fuzzy_check_module_init (struct rspamd_config *cfg, @@ -246,10 +238,10 @@ parse_flags (struct fuzzy_rule *rule, /* Add flag to hash table */ g_hash_table_insert (rule->mappings, GINT_TO_POINTER (map->fuzzy_flag), map); - rspamd_symbols_cache_add_symbol (cfg->cache, + rspamd_symcache_add_symbol (cfg->cache, map->symbol, 0, NULL, NULL, - SYMBOL_TYPE_VIRTUAL|SYMBOL_TYPE_FINE, + SYMBOL_TYPE_VIRTUAL | SYMBOL_TYPE_FINE, cb_id); } else { @@ -265,53 +257,6 @@ parse_flags (struct fuzzy_rule *rule, } } - -static GPtrArray * -parse_mime_types (struct rspamd_config *cfg, const gchar *str) -{ - gchar **strvec, *p; - gint num, i; - struct fuzzy_mime_type *type; - GPtrArray *res; - - strvec = g_strsplit_set (str, ",", 0); - num = g_strv_length (strvec); - res = g_ptr_array_sized_new (num); - - for (i = 0; i < num; i++) { - g_strstrip (strvec[i]); - - if ((p = strchr (strvec[i], '/')) != NULL) { - type = rspamd_mempool_alloc (cfg->cfg_pool, - sizeof (struct fuzzy_mime_type)); - type->type_re = rspamd_regexp_from_glob (strvec[i], p - strvec[i], - NULL); - type->subtype_re = rspamd_regexp_from_glob (p + 1, 0, NULL); - rspamd_mempool_add_destructor (cfg->cfg_pool, - (rspamd_mempool_destruct_t)rspamd_regexp_unref, - type->type_re); - rspamd_mempool_add_destructor (cfg->cfg_pool, - (rspamd_mempool_destruct_t)rspamd_regexp_unref, - type->subtype_re); - g_ptr_array_add (res, type); - } - else { - type = rspamd_mempool_alloc (cfg->cfg_pool, - sizeof (struct fuzzy_mime_type)); - type->type_re = rspamd_regexp_from_glob (strvec[i], 0, NULL); - rspamd_mempool_add_destructor (cfg->cfg_pool, - (rspamd_mempool_destruct_t)rspamd_regexp_unref, - type->type_re); - type->subtype_re = NULL; - g_ptr_array_add (res, type); - } - } - - g_strfreev (strvec); - - return res; -} - static GPtrArray * parse_fuzzy_headers (struct rspamd_config *cfg, const gchar *str) { @@ -334,39 +279,6 @@ parse_fuzzy_headers (struct rspamd_config *cfg, const gchar *str) return res; } -static gboolean -fuzzy_check_content_type (struct fuzzy_rule *rule, struct rspamd_content_type *ct) -{ - struct fuzzy_mime_type *ft; - guint i; - - PTR_ARRAY_FOREACH (rule->mime_types, i, ft) { - if (ft->type_re) { - - if (ct->type.len > 0 && - rspamd_regexp_match (ft->type_re, - ct->type.begin, - ct->type.len, - TRUE)) { - if (ft->subtype_re) { - if (ct->subtype.len > 0 && - rspamd_regexp_match (ft->subtype_re, - ct->subtype.begin, - ct->subtype.len, - TRUE)) { - return TRUE; - } - } - else { - return TRUE; - } - } - } - } - - return FALSE; -} - static double fuzzy_normalize (gint32 in, double weight) { @@ -431,6 +343,7 @@ fuzzy_parse_rule (struct rspamd_config *cfg, const ucl_object_t *obj, rule = fuzzy_rule_new (fuzzy_module_ctx->default_symbol, cfg->cfg_pool); + rule->ucl_obj = obj; rule->ctx = fuzzy_module_ctx; rule->learn_condition_cb = -1; rule->alg = RSPAMD_SHINGLES_OLD; @@ -445,36 +358,6 @@ fuzzy_parse_rule (struct rspamd_config *cfg, const ucl_object_t *obj, (void **)&rule->skip_map); } - if ((value = ucl_object_lookup (obj, "mime_types")) != NULL) { - it = NULL; - while ((cur = ucl_object_iterate (value, &it, value->type == UCL_ARRAY)) - != NULL) { - GPtrArray *tmp; - guint i; - gpointer ptr; - - tmp = parse_mime_types (cfg, ucl_obj_tostring (cur)); - - if (tmp) { - if (rule->mime_types) { - PTR_ARRAY_FOREACH (tmp, i, ptr) { - g_ptr_array_add (rule->mime_types, ptr); - } - - g_ptr_array_free (tmp, TRUE); - } - else { - rule->mime_types = tmp; - } - } - } - - if (rule->mime_types) { - rspamd_mempool_add_destructor (cfg->cfg_pool, - rspamd_ptr_array_free_hard, rule->mime_types); - } - } - if ((value = ucl_object_lookup (obj, "headers")) != NULL) { it = NULL; while ((cur = ucl_object_iterate (value, &it, value->type == UCL_ARRAY)) @@ -514,10 +397,6 @@ fuzzy_parse_rule (struct rspamd_config *cfg, const ucl_object_t *obj, rule->max_score = ucl_obj_todouble (value); } - if ((value = ucl_object_lookup (obj, "min_bytes")) != NULL) { - rule->min_bytes = ucl_obj_toint (value); - } - if ((value = ucl_object_lookup (obj, "symbol")) != NULL) { rule->symbol = ucl_obj_tostring (value); } @@ -538,14 +417,6 @@ fuzzy_parse_rule (struct rspamd_config *cfg, const ucl_object_t *obj, rule->skip_unknown = ucl_obj_toboolean (value); } - if ((value = ucl_object_lookup (obj, "short_text_direct_hash")) != NULL) { - rule->short_text_direct_hash = ucl_obj_toboolean (value); - } - - if ((value = ucl_object_lookup (obj, "fuzzy_images")) != NULL) { - rule->fuzzy_images = ucl_obj_toboolean (value); - } - if ((value = ucl_object_lookup (obj, "algorithm")) != NULL) { rule->algorithm_str = ucl_object_tostring (value); @@ -684,10 +555,10 @@ fuzzy_parse_rule (struct rspamd_config *cfg, const ucl_object_t *obj, g_ptr_array_add (fuzzy_module_ctx->fuzzy_rules, rule); if (rule->symbol != fuzzy_module_ctx->default_symbol) { - rspamd_symbols_cache_add_symbol (cfg->cache, rule->symbol, + rspamd_symcache_add_symbol (cfg->cache, rule->symbol, 0, NULL, NULL, - SYMBOL_TYPE_VIRTUAL|SYMBOL_TYPE_FINE, + SYMBOL_TYPE_VIRTUAL | SYMBOL_TYPE_FINE, cb_id); } @@ -699,6 +570,34 @@ fuzzy_parse_rule (struct rspamd_config *cfg, const ucl_object_t *obj, rule->algorithm_str); } + /* + * Process rule in Lua + */ + gint err_idx, ret; + GString *tb; + lua_State *L = (lua_State *)cfg->lua_state; + + lua_pushcfunction (L, &rspamd_lua_traceback); + err_idx = lua_gettop (L); + lua_rawgeti (L, LUA_REGISTRYINDEX, fuzzy_module_ctx->process_rule_ref); + ucl_object_push_lua (L, obj, true); + + if ((ret = lua_pcall (L, 1, 1, err_idx)) != 0) { + tb = lua_touserdata (L, -1); + msg_err_config ("call to process_rule lua " + "script failed (%d): %v", ret, tb); + + if (tb) { + g_string_free (tb, TRUE); + } + rule->lua_id = -1; + } + else { + rule->lua_id = lua_tonumber (L, -1); + } + + lua_settop (L, 0); + rspamd_mempool_add_destructor (cfg->cfg_pool, fuzzy_free_rule, rule); @@ -718,6 +617,9 @@ fuzzy_check_module_init (struct rspamd_config *cfg, struct module_ctx **ctx) fuzzy_module_ctx->keypairs_cache = rspamd_keypair_cache_new (32); fuzzy_module_ctx->fuzzy_rules = g_ptr_array_new (); fuzzy_module_ctx->cfg = cfg; + fuzzy_module_ctx->process_rule_ref = -1; + fuzzy_module_ctx->check_mime_part_ref = -1; + fuzzy_module_ctx->cleanup_rules_ref = -1; rspamd_mempool_add_destructor (cfg->cfg_pool, (rspamd_mempool_destruct_t)rspamd_mempool_delete, @@ -1005,58 +907,78 @@ fuzzy_check_module_config (struct rspamd_config *cfg) } fuzzy_module_ctx->enabled = TRUE; + fuzzy_module_ctx->check_mime_part_ref = -1; + fuzzy_module_ctx->process_rule_ref = -1; + fuzzy_module_ctx->cleanup_rules_ref = -1; - if ((value = - rspamd_config_get_module_opt (cfg, "fuzzy_check", "symbol")) != NULL) { - fuzzy_module_ctx->default_symbol = ucl_obj_tostring (value); + /* Interact with lua_fuzzy */ + if (luaL_dostring (L, "return require \"lua_fuzzy\"") != 0) { + msg_err_config ("cannot require lua_fuzzy: %s", + lua_tostring (L, -1)); + fuzzy_module_ctx->enabled = FALSE; } else { - fuzzy_module_ctx->default_symbol = DEFAULT_SYMBOL; - } + if (lua_type (L, -1) != LUA_TTABLE) { + msg_err_config ("lua fuzzy must return " + "table and not %s", + lua_typename (L, lua_type (L, -1))); + fuzzy_module_ctx->enabled = FALSE; + } else { + lua_pushstring (L, "process_rule"); + lua_gettable (L, -2); + + if (lua_type (L, -1) != LUA_TFUNCTION) { + msg_err_config ("process_rule must return " + "function and not %s", + lua_typename (L, lua_type (L, -1))); + fuzzy_module_ctx->enabled = FALSE; + } + else { + fuzzy_module_ctx->process_rule_ref = luaL_ref (L, LUA_REGISTRYINDEX); + } - if ((value = - rspamd_config_get_module_opt (cfg, "fuzzy_check", - "min_length")) != NULL) { - fuzzy_module_ctx->min_hash_len = ucl_obj_toint (value); - } - else { - fuzzy_module_ctx->min_hash_len = 0; - } + lua_pushstring (L, "check_mime_part"); + lua_gettable (L, -2); - if ((value = - rspamd_config_get_module_opt (cfg, "fuzzy_check", - "min_bytes")) != NULL) { - fuzzy_module_ctx->min_bytes = ucl_obj_toint (value); - } - else { - fuzzy_module_ctx->min_bytes = 0; - } + if (lua_type (L, -1) != LUA_TFUNCTION) { + msg_err_config ("check_mime_part must return " + "function and not %s", + lua_typename (L, lua_type (L, -1))); + fuzzy_module_ctx->enabled = FALSE; + } + else { + fuzzy_module_ctx->check_mime_part_ref = luaL_ref (L, LUA_REGISTRYINDEX); + } - if ((value = - rspamd_config_get_module_opt (cfg, "fuzzy_check", - "text_multiplier")) != NULL) { - fuzzy_module_ctx->text_multiplier = ucl_object_todouble (value); - } - else { - fuzzy_module_ctx->text_multiplier = 2.0; - } + lua_pushstring (L, "cleanup_rules"); + lua_gettable (L, -2); - if ((value = - rspamd_config_get_module_opt (cfg, "fuzzy_check", - "min_height")) != NULL) { - fuzzy_module_ctx->min_height = ucl_obj_toint (value); + if (lua_type (L, -1) != LUA_TFUNCTION) { + msg_err_config ("cleanup_rules must return " + "function and not %s", + lua_typename (L, lua_type (L, -1))); + fuzzy_module_ctx->enabled = FALSE; + } + else { + fuzzy_module_ctx->cleanup_rules_ref = luaL_ref (L, LUA_REGISTRYINDEX); + } + } } - else { - fuzzy_module_ctx->min_height = 0; + + lua_settop (L, 0); + + if (!fuzzy_module_ctx->enabled) { + return TRUE; } + if ((value = - rspamd_config_get_module_opt (cfg, "fuzzy_check", - "min_width")) != NULL) { - fuzzy_module_ctx->min_width = ucl_obj_toint (value); + rspamd_config_get_module_opt (cfg, "fuzzy_check", "symbol")) != NULL) { + fuzzy_module_ctx->default_symbol = ucl_obj_tostring (value); } else { - fuzzy_module_ctx->min_width = 0; + fuzzy_module_ctx->default_symbol = DEFAULT_SYMBOL; } + if ((value = rspamd_config_get_module_opt (cfg, "fuzzy_check", "timeout")) != NULL) { fuzzy_module_ctx->io_timeout = ucl_obj_todouble (value) * 1000; @@ -1088,10 +1010,10 @@ fuzzy_check_module_config (struct rspamd_config *cfg) if ((value = rspamd_config_get_module_opt (cfg, "fuzzy_check", "rule")) != NULL) { - cb_id = rspamd_symbols_cache_add_symbol (cfg->cache, - "FUZZY_CALLBACK", 0, fuzzy_symbol_callback, NULL, - SYMBOL_TYPE_CALLBACK|SYMBOL_TYPE_FINE, - -1); + cb_id = rspamd_symcache_add_symbol (cfg->cache, + "FUZZY_CALLBACK", 0, fuzzy_symbol_callback, NULL, + SYMBOL_TYPE_CALLBACK | SYMBOL_TYPE_FINE, + -1); /* * Here we can have 2 possibilities: @@ -1137,6 +1059,10 @@ fuzzy_check_module_config (struct rspamd_config *cfg) } } } + + /* We want that to check bad mime attachments */ + rspamd_symcache_add_delayed_dependency (cfg->cache, + "FUZZY_CALLBACK", "MIME_TYPES_CALLBACK"); } if (fuzzy_module_ctx->fuzzy_rules == NULL) { @@ -1163,7 +1089,7 @@ fuzzy_check_module_config (struct rspamd_config *cfg) lua_settable (L, -3); } - lua_pop (L, 1); /* Remove global function */ + lua_settop (L, 0); return res; } @@ -1171,6 +1097,43 @@ fuzzy_check_module_config (struct rspamd_config *cfg) gint fuzzy_check_module_reconfig (struct rspamd_config *cfg) { + struct fuzzy_ctx *fuzzy_module_ctx = fuzzy_get_context (cfg); + + if (fuzzy_module_ctx->cleanup_rules_ref != -1) { + /* Sync lua_fuzzy rules */ + gint err_idx, ret; + GString *tb; + lua_State *L = (lua_State *)cfg->lua_state; + + lua_pushcfunction (L, &rspamd_lua_traceback); + err_idx = lua_gettop (L); + lua_rawgeti (L, LUA_REGISTRYINDEX, fuzzy_module_ctx->cleanup_rules_ref); + + if ((ret = lua_pcall (L, 0, 0, err_idx)) != 0) { + tb = lua_touserdata (L, -1); + msg_err_config ("call to cleanup_rules lua " + "script failed (%d): %v", ret, tb); + + if (tb) { + g_string_free (tb, TRUE); + } + } + + luaL_unref (cfg->lua_state, LUA_REGISTRYINDEX, + fuzzy_module_ctx->cleanup_rules_ref); + lua_settop (L, 0); + } + + if (fuzzy_module_ctx->check_mime_part_ref != -1) { + luaL_unref (cfg->lua_state, LUA_REGISTRYINDEX, + fuzzy_module_ctx->check_mime_part_ref); + } + + if (fuzzy_module_ctx->process_rule_ref != -1) { + luaL_unref (cfg->lua_state, LUA_REGISTRYINDEX, + fuzzy_module_ctx->process_rule_ref); + } + return fuzzy_check_module_config (cfg); } @@ -1364,18 +1327,71 @@ fuzzy_cmd_set_cached (struct fuzzy_rule *rule, rspamd_mempool_set_variable (pool, key, data, NULL); } +static gboolean +fuzzy_rule_check_mimepart (struct rspamd_task *task, + struct fuzzy_rule *rule, + struct rspamd_mime_part *part, + gboolean *need_check, + gboolean *fuzzy_check) +{ + if (rule->lua_id != -1 && rule->ctx->check_mime_part_ref != -1) { + gint err_idx, ret; + GString *tb; + lua_State *L = (lua_State *)task->cfg->lua_state; + struct rspamd_task **ptask; + struct rspamd_mime_part **ppart; + + lua_pushcfunction (L, &rspamd_lua_traceback); + err_idx = lua_gettop (L); + lua_rawgeti (L, LUA_REGISTRYINDEX, rule->ctx->check_mime_part_ref); + + ptask = lua_newuserdata (L, sizeof (*ptask)); + *ptask = task; + rspamd_lua_setclass (L, "rspamd{task}", -1); + + ppart = lua_newuserdata (L, sizeof (*ppart)); + *ppart = part; + rspamd_lua_setclass (L, "rspamd{mimepart}", -1); + + lua_pushnumber (L, rule->lua_id); + + if ((ret = lua_pcall (L, 3, 2, err_idx)) != 0) { + tb = lua_touserdata (L, -1); + msg_err_task ("call to check_mime_part lua " + "script failed (%d): %v", ret, tb); + + if (tb) { + g_string_free (tb, TRUE); + } + ret = FALSE; + } + else { + ret = TRUE; + *need_check = lua_toboolean (L, -2); + *fuzzy_check = lua_toboolean (L, -1); + } + + lua_settop (L, 0); + + return ret; + } + + return FALSE; +} + /* * Create fuzzy command from a text part */ static struct fuzzy_cmd_io * fuzzy_cmd_from_text_part (struct rspamd_task *task, - struct fuzzy_rule *rule, - int c, - gint flag, - guint32 weight, - gboolean short_text, - rspamd_mempool_t *pool, - struct rspamd_mime_text_part *part) + struct fuzzy_rule *rule, + int c, + gint flag, + guint32 weight, + gboolean short_text, + rspamd_mempool_t *pool, + struct rspamd_mime_text_part *part, + struct rspamd_mime_part *mp) { struct rspamd_fuzzy_shingle_cmd *shcmd = NULL; struct rspamd_fuzzy_cmd *cmd = NULL; @@ -1389,7 +1405,7 @@ fuzzy_cmd_from_text_part (struct rspamd_task *task, GArray *words; struct fuzzy_cmd_io *io; - cached = fuzzy_cmd_get_cached (rule, pool, part); + cached = fuzzy_cmd_get_cached (rule, pool, mp); if (cached) { /* Copy cached */ @@ -1443,7 +1459,12 @@ fuzzy_cmd_from_text_part (struct rspamd_task *task, for (i = 0; i < words->len; i ++) { word = &g_array_index (words, rspamd_stat_token_t, i); - rspamd_cryptobox_hash_update (&st, word->begin, word->len); + + if (!((word->flags & RSPAMD_STAT_TOKEN_FLAG_SKIPPED) + || word->stemmed.len == 0)) { + rspamd_cryptobox_hash_update (&st, word->stemmed.begin, + word->stemmed.len); + } } rspamd_cryptobox_hash_final (&st, shcmd->basic.digest); @@ -1472,10 +1493,11 @@ fuzzy_cmd_from_text_part (struct rspamd_task *task, * Since it is copied when obtained from the cache, it is safe to use * it this way. */ - fuzzy_cmd_set_cached (rule, pool, part, cached); + fuzzy_cmd_set_cached (rule, pool, mp, cached); } io = rspamd_mempool_alloc (pool, sizeof (*io)); + io->part = mp; if (!short_text) { shcmd->basic.tag = ottery_rand_uint32 (); @@ -1536,11 +1558,12 @@ fuzzy_cmd_from_text_part (struct rspamd_task *task, static struct fuzzy_cmd_io * fuzzy_cmd_from_image_part (struct fuzzy_rule *rule, - int c, - gint flag, - guint32 weight, - rspamd_mempool_t *pool, - struct rspamd_image *img) + int c, + gint flag, + guint32 weight, + rspamd_mempool_t *pool, + struct rspamd_image *img, + struct rspamd_mime_part *mp) { struct rspamd_fuzzy_shingle_cmd *shcmd; struct rspamd_fuzzy_encrypted_shingle_cmd *encshcmd; @@ -1548,7 +1571,7 @@ fuzzy_cmd_from_image_part (struct fuzzy_rule *rule, struct rspamd_shingle *sh; struct rspamd_cached_shingles *cached; - cached = fuzzy_cmd_get_cached (rule, pool, img); + cached = fuzzy_cmd_get_cached (rule, pool, mp); if (cached) { /* Copy cached */ @@ -1598,7 +1621,7 @@ fuzzy_cmd_from_image_part (struct fuzzy_rule *rule, cached = rspamd_mempool_alloc (pool, sizeof (*cached)); cached->sh = sh; memcpy (cached->digest, shcmd->basic.digest, sizeof (cached->digest)); - fuzzy_cmd_set_cached (rule, pool, img, cached); + fuzzy_cmd_set_cached (rule, pool, mp, cached); } shcmd->basic.tag = ottery_rand_uint32 (); @@ -1611,6 +1634,7 @@ fuzzy_cmd_from_image_part (struct fuzzy_rule *rule, } io = rspamd_mempool_alloc (pool, sizeof (*io)); + io->part = mp; io->tag = shcmd->basic.tag; io->flags = FUZZY_CMD_FLAG_IMAGE; memcpy (&io->cmd, &shcmd->basic, sizeof (io->cmd)); @@ -1631,11 +1655,12 @@ fuzzy_cmd_from_image_part (struct fuzzy_rule *rule, static struct fuzzy_cmd_io * fuzzy_cmd_from_data_part (struct fuzzy_rule *rule, - int c, - gint flag, - guint32 weight, - rspamd_mempool_t *pool, - guchar digest[rspamd_cryptobox_HASHBYTES]) + int c, + gint flag, + guint32 weight, + rspamd_mempool_t *pool, + guchar digest[rspamd_cryptobox_HASHBYTES], + struct rspamd_mime_part *mp) { struct rspamd_fuzzy_cmd *cmd; struct rspamd_fuzzy_encrypted_cmd *enccmd = NULL; @@ -1662,6 +1687,7 @@ fuzzy_cmd_from_data_part (struct fuzzy_rule *rule, io = rspamd_mempool_alloc (pool, sizeof (*io)); io->flags = 0; io->tag = cmd->tag; + io->part = mp; memcpy (&io->cmd, cmd, sizeof (io->cmd)); if (rule->peer_key) { @@ -2106,6 +2132,9 @@ fuzzy_check_session_is_completed (struct fuzzy_client_session *session) if (nreplied == session->commands->len) { fuzzy_insert_metric_results (session->task, session->results); + if (session->item) { + rspamd_symcache_item_async_dec_check (session->task, session->item, M); + } rspamd_session_remove_event (session->task->s, fuzzy_io_fin, session); return TRUE; @@ -2174,11 +2203,16 @@ fuzzy_check_io_callback (gint fd, short what, void *arg) /* Error state */ msg_err_task ("got error on IO with server %s(%s), on %s, %d, %s", rspamd_upstream_name (session->server), - rspamd_inet_address_to_string_pretty (session->addr), + rspamd_inet_address_to_string_pretty ( + rspamd_upstream_addr (session->server)), session->state == 1 ? "read" : "write", errno, strerror (errno)); rspamd_upstream_fail (session->server, FALSE); + + if (session->item) { + rspamd_symcache_item_async_dec_check (session->task, session->item, M); + } rspamd_session_remove_event (session->task->s, fuzzy_io_fin, session); } else { @@ -2215,9 +2249,13 @@ fuzzy_check_timer_callback (gint fd, short what, void *arg) if (session->retransmits >= session->rule->ctx->retransmits) { msg_err_task ("got IO timeout with server %s(%s), after %d retransmits", rspamd_upstream_name (session->server), - rspamd_inet_address_to_string_pretty (session->addr), + rspamd_inet_address_to_string_pretty ( + rspamd_upstream_addr (session->server)), session->retransmits); rspamd_upstream_fail (session->server, FALSE); + if (session->item) { + rspamd_symcache_item_async_dec_check (session->task, session->item, M); + } rspamd_session_remove_event (session->task->s, fuzzy_io_fin, session); } else { @@ -2285,7 +2323,7 @@ fuzzy_controller_io_callback (gint fd, short what, void *arg) session->task->message_id, strerror (errno)); if (*(session->err) == NULL) { g_set_error (session->err, - g_quark_from_static_string ("fuzzy check"), + g_quark_from_static_string (M), errno, "read socket error: %s", strerror (errno)); } ret = return_error; @@ -2349,7 +2387,7 @@ fuzzy_controller_io_callback (gint fd, short what, void *arg) if (*(session->err) == NULL) { g_set_error (session->err, - g_quark_from_static_string ("fuzzy check"), + g_quark_from_static_string (M), rep->v1.value, "fuzzy hash is skipped"); } } @@ -2368,7 +2406,7 @@ fuzzy_controller_io_callback (gint fd, short what, void *arg) if (*(session->err) == NULL) { g_set_error (session->err, - g_quark_from_static_string ("fuzzy check"), + g_quark_from_static_string (M), rep->v1.value, "process fuzzy error"); } } @@ -2397,7 +2435,7 @@ fuzzy_controller_io_callback (gint fd, short what, void *arg) if (!fuzzy_cmd_vector_to_wire (fd, session->commands)) { if (*(session->err) == NULL) { g_set_error (session->err, - g_quark_from_static_string ("fuzzy check"), + g_quark_from_static_string (M), errno, "write socket error: %s", strerror (errno)); } ret = return_error; @@ -2420,7 +2458,8 @@ fuzzy_controller_io_callback (gint fd, short what, void *arg) else if (ret == return_error) { msg_err_task ("got error in IO with server %s(%s), %d, %s", rspamd_upstream_name (session->server), - rspamd_inet_address_to_string_pretty (session->addr), + rspamd_inet_address_to_string_pretty ( + rspamd_upstream_addr (session->server)), errno, strerror (errno)); rspamd_upstream_fail (session->server, FALSE); } @@ -2523,7 +2562,8 @@ fuzzy_controller_timer_callback (gint fd, short what, void *arg) msg_err_task_check ("got IO timeout with server %s(%s), " "after %d retransmits", rspamd_upstream_name (session->server), - rspamd_inet_address_to_string_pretty (session->addr), + rspamd_inet_address_to_string_pretty ( + rspamd_upstream_addr (session->server)), session->retransmits); if (session->session) { @@ -2582,18 +2622,12 @@ fuzzy_generate_commands (struct rspamd_task *task, struct fuzzy_rule *rule, struct rspamd_mime_part *mime_part; struct rspamd_image *image; struct fuzzy_cmd_io *io, *cur; - guint i, j, min_bytes = 0; + guint i, j; GPtrArray *res; + gboolean check_part, fuzzy_check; res = g_ptr_array_sized_new (task->parts->len + 1); - if (rule->min_bytes) { - min_bytes = rule->min_bytes; - } - else { - min_bytes = rule->ctx->min_bytes; - } - if (c == FUZZY_STAT) { io = fuzzy_cmd_stat (rule, c, flag, value, task->task_pool); if (io) { @@ -2603,212 +2637,63 @@ fuzzy_generate_commands (struct rspamd_task *task, struct fuzzy_rule *rule, goto end; } - if (G_LIKELY (!(flags & FUZZY_CHECK_FLAG_NOTEXT))) { - for (i = 0; i < task->text_parts->len; i ++) { - gdouble fac; - gboolean short_text = FALSE; + PTR_ARRAY_FOREACH (task->parts, i, mime_part) { + check_part = FALSE; + fuzzy_check = FALSE; - part = g_ptr_array_index (task->text_parts, i); + if (fuzzy_rule_check_mimepart (task, rule, mime_part, &check_part, + &fuzzy_check)) { + io = NULL; - if (IS_PART_EMPTY (part)) { - continue; - } + if (check_part) { + if (mime_part->flags & RSPAMD_MIME_PART_TEXT && + !(flags & FUZZY_CHECK_FLAG_NOTEXT)) { + part = mime_part->specific.txt; - /* Check length of part */ - fac = rule->ctx->text_multiplier * part->utf_content->len; - if ((double)min_bytes > fac) { - if (!rule->short_text_direct_hash) { - msg_info_task ( - "<%s>, part is shorter than %d bytes: %.0f " - "(%d * %.2f bytes), " - "skip fuzzy check", - task->message_id, min_bytes, - fac, - part->utf_content->len, - rule->ctx->text_multiplier); - continue; - } - else { - msg_info_task ( - "<%s>, part is shorter than %d bytes: %.0f " - "(%d * %.2f bytes), " - "use direct hash", - task->message_id, min_bytes, - fac, - part->utf_content->len, - rule->ctx->text_multiplier); - short_text = TRUE; + io = fuzzy_cmd_from_text_part (task, rule, + c, + flag, + value, + !fuzzy_check, + task->task_pool, + part, + mime_part); } - } + else if (mime_part->flags & RSPAMD_MIME_PART_IMAGE && + !(flags & FUZZY_CHECK_FLAG_NOIMAGES)) { + image = mime_part->specific.img; - if (part->utf_words == NULL || - part->utf_words->len == 0) { - msg_info_task ("<%s>, part hash empty, skip fuzzy check", - task->message_id); - continue; - } - - if (rule->ctx->min_hash_len != 0 && - part->utf_words->len < - rule->ctx->min_hash_len) { - if (!rule->short_text_direct_hash) { - msg_info_task ( - "<%s>, part hash is shorter than %d symbols, " - "skip fuzzy check", - task->message_id, - rule->ctx->min_hash_len); - continue; + io = fuzzy_cmd_from_data_part (rule, c, flag, value, + task->task_pool, + image->parent->digest, + mime_part); + io->flags |= FUZZY_CMD_FLAG_IMAGE; } else { - msg_info_task ( - "<%s>, part hash is shorter than %d symbols, " - "use direct hash", - task->message_id, - rule->ctx->min_hash_len); - short_text = TRUE; - } - } - - io = fuzzy_cmd_from_text_part (task, rule, - c, - flag, - value, - short_text, - task->task_pool, - part); - - if (io) { - gboolean skip_existing = FALSE; - - PTR_ARRAY_FOREACH (res, j, cur) { - if (memcmp (cur->cmd.digest, io->cmd.digest, - sizeof (io->cmd.digest)) == 0) { - skip_existing = TRUE; - break; - } - } - - if (!skip_existing) { - g_ptr_array_add (res, io); + io = fuzzy_cmd_from_data_part (rule, c, flag, value, + task->task_pool, + mime_part->digest, mime_part); } - } - } - } - - /* Process other parts and images */ - for (i = 0; i < task->parts->len; i ++) { - mime_part = g_ptr_array_index (task->parts, i); + if (io) { + gboolean skip_existing = FALSE; - if (mime_part->flags & RSPAMD_MIME_PART_IMAGE) { - - if (G_LIKELY (!(flags & FUZZY_CHECK_FLAG_NOIMAGES))) { - image = mime_part->specific.img; - - if (image->data->len > 0) { - /* Check: - * - min height - * - min width - * - min bytes - */ - - if ((rule->ctx->min_height == 0 || - image->height >= rule->ctx->min_height) && - (rule->ctx->min_width == 0 || - image->width >= rule->ctx->min_width) && - (min_bytes == 0 || - mime_part->parsed_data.len >= min_bytes)) { - io = fuzzy_cmd_from_data_part (rule, c, flag, value, - task->task_pool, - image->parent->digest); - if (io) { - gboolean skip_existing = FALSE; - - PTR_ARRAY_FOREACH (res, j, cur) { - if (memcmp (cur->cmd.digest, io->cmd.digest, - sizeof (io->cmd.digest)) == 0) { - skip_existing = TRUE; - break; - } - } - - if (!skip_existing) { - g_ptr_array_add (res, io); - } - } - - if (rule->fuzzy_images) { - /* Try to normalize image */ - if (!image->is_normalized) { - rspamd_image_normalize (task, image); - } - } - - if (image->is_normalized) { - io = fuzzy_cmd_from_image_part (rule, c, flag, - value, - task->task_pool, - image); - if (io) { - gboolean skip_existing = FALSE; - - PTR_ARRAY_FOREACH (res, j, cur) { - if (memcmp (cur->cmd.digest, io->cmd.digest, - sizeof (io->cmd.digest)) == 0) { - skip_existing = TRUE; - break; - } - } - - if (!skip_existing) { - g_ptr_array_add (res, io); - } - } + PTR_ARRAY_FOREACH (res, j, cur) { + if (memcmp (cur->cmd.digest, io->cmd.digest, + sizeof (io->cmd.digest)) == 0) { + skip_existing = TRUE; + break; } } - } - } - - continue; - } - - if (G_LIKELY (!(flags & FUZZY_CHECK_FLAG_NOIMAGES))) { - if (mime_part->ct && - !(mime_part->flags & (RSPAMD_MIME_PART_TEXT|RSPAMD_MIME_PART_IMAGE)) && - mime_part->parsed_data.len > 0 && - fuzzy_check_content_type (rule, mime_part->ct)) { - if (min_bytes == 0 || mime_part->parsed_data.len >= min_bytes) { - io = fuzzy_cmd_from_data_part (rule, c, flag, value, - task->task_pool, - mime_part->digest); - if (io) { - gboolean skip_existing = FALSE; - - PTR_ARRAY_FOREACH (res, j, cur) { - if (memcmp (cur->cmd.digest, io->cmd.digest, - sizeof (io->cmd.digest)) == 0) { - skip_existing = TRUE; - break; - } - } - if (!skip_existing) { - g_ptr_array_add (res, io); - } + if (!skip_existing) { + g_ptr_array_add (res, io); } } } } } - /* Process metadata */ -#if 0 - io = fuzzy_cmd_from_task_meta (rule, c, flag, value, - task->task_pool, task); - if (io) { - g_ptr_array_add (res, io); - } -#endif end: if (res->len == 0) { g_ptr_array_free (res, TRUE); @@ -2856,7 +2741,6 @@ register_fuzzy_client_call (struct rspamd_task *task, session->fd = sock; session->server = selected; session->rule = rule; - session->addr = addr; session->results = g_ptr_array_sized_new (32); event_set (&session->ev, sock, EV_WRITE, fuzzy_check_io_callback, @@ -2869,8 +2753,12 @@ register_fuzzy_client_call (struct rspamd_task *task, event_base_set (session->task->ev_base, &session->timev); event_add (&session->timev, &session->tv); - rspamd_session_add_event (task->s, NULL, fuzzy_io_fin, session, - g_quark_from_static_string ("fuzzy check")); + rspamd_session_add_event (task->s, fuzzy_io_fin, session, M); + session->item = rspamd_symcache_get_cur_item (task); + + if (session->item) { + rspamd_symcache_item_async_inc (task, session->item, M); + } } } } @@ -2878,7 +2766,9 @@ register_fuzzy_client_call (struct rspamd_task *task, /* This callback is called when we check message in fuzzy hashes storage */ static void -fuzzy_symbol_callback (struct rspamd_task *task, void *unused) +fuzzy_symbol_callback (struct rspamd_task *task, + struct rspamd_symcache_item *item, + void *unused) { struct fuzzy_rule *rule; guint i; @@ -2886,6 +2776,8 @@ fuzzy_symbol_callback (struct rspamd_task *task, void *unused) struct fuzzy_ctx *fuzzy_module_ctx = fuzzy_get_context (task->cfg); if (!fuzzy_module_ctx->enabled) { + rspamd_symcache_finalize_item (task, item); + return; } @@ -2896,10 +2788,14 @@ fuzzy_symbol_callback (struct rspamd_task *task, void *unused) msg_info_task ("<%s>, address %s is whitelisted, skip fuzzy check", task->message_id, rspamd_inet_address_to_string (task->from_addr)); + rspamd_symcache_finalize_item (task, item); + return; } } + rspamd_symcache_item_async_inc (task, item, M); + PTR_ARRAY_FOREACH (fuzzy_module_ctx->fuzzy_rules, i, rule) { commands = fuzzy_generate_commands (task, rule, FUZZY_CHECK, 0, 0, 0); @@ -2907,6 +2803,8 @@ fuzzy_symbol_callback (struct rspamd_task *task, void *unused) register_fuzzy_client_call (task, rule, commands); } } + + rspamd_symcache_item_async_dec_check (task, item, M); } void @@ -2963,7 +2861,6 @@ register_fuzzy_controller_call (struct rspamd_http_connection_entry *entry, msec_to_tv (fuzzy_module_ctx->io_timeout, &s->tv); s->task = task; - s->addr = addr; s->commands = commands; s->http_entry = entry; s->server = selected; @@ -3009,9 +2906,9 @@ fuzzy_process_handler (struct rspamd_http_connection_entry *conn_ent, struct fuzzy_ctx *fuzzy_module_ctx; /* Prepare task */ - task = rspamd_task_new (session->wrk, session->cfg, NULL, session->lang_det); + task = rspamd_task_new (session->wrk, session->cfg, NULL, + session->lang_det, conn_ent->rt->ev_base); task->cfg = ctx->cfg; - task->ev_base = conn_ent->rt->ev_base; saved = rspamd_mempool_alloc0 (session->pool, sizeof (gint)); err = rspamd_mempool_alloc0 (session->pool, sizeof (GError *)); fuzzy_module_ctx = fuzzy_get_context (ctx->cfg); @@ -3030,6 +2927,7 @@ fuzzy_process_handler (struct rspamd_http_connection_entry *conn_ent, rspamd_task_free (task); rspamd_controller_send_error (conn_ent, 400, "Message processing error"); + return; } @@ -3325,7 +3223,6 @@ fuzzy_check_send_lua_learn (struct fuzzy_rule *rule, msec_to_tv (rule->ctx->io_timeout, &s->tv); s->task = task; - s->addr = addr; s->commands = commands; s->http_entry = NULL; s->server = selected; @@ -3343,7 +3240,10 @@ fuzzy_check_send_lua_learn (struct fuzzy_rule *rule, event_base_set (s->task->ev_base, &s->timev); event_add (&s->timev, &s->tv); - rspamd_session_add_event (task->s, NULL, fuzzy_lua_fin, s, g_quark_from_static_string ("fuzzy check")); + rspamd_session_add_event (task->s, + fuzzy_lua_fin, + s, + M); (*saved)++; ret = 1; diff --git a/src/plugins/lua/antivirus.lua b/src/plugins/lua/antivirus.lua index 025e36043..ed3d93e79 100644 --- a/src/plugins/lua/antivirus.lua +++ b/src/plugins/lua/antivirus.lua @@ -15,11 +15,10 @@ limitations under the License. ]] -- local rspamd_logger = require "rspamd_logger" -local rspamd_util = require "rspamd_util" local rspamd_regexp = require "rspamd_regexp" -local tcp = require "rspamd_tcp" -local upstream_list = require "rspamd_upstream_list" local lua_util = require "lua_util" +local fun = require "fun" +local lua_antivirus = require "lua_antivirus" local redis_params local N = "antivirus" @@ -69,760 +68,6 @@ antivirus { return end -local default_message = '${SCANNER}: virus found: "${VIRUS}"' - -local function match_patterns(default_sym, found, patterns) - if type(patterns) ~= 'table' then return default_sym end - if not patterns[1] then - for sym, pat in pairs(patterns) do - if pat:match(found) then - return sym - end - end - return default_sym - else - for _, p in ipairs(patterns) do - for sym, pat in pairs(p) do - if pat:match(found) then - return sym - end - end - end - return default_sym - end -end - -local function yield_result(task, rule, vname) - local all_whitelisted = true - if type(vname) == 'string' then - local symname = match_patterns(rule['symbol'], vname, rule['patterns']) - if rule['whitelist'] and rule['whitelist']:get_key(vname) then - rspamd_logger.infox(task, '%s: "%s" is in whitelist', rule['type'], vname) - return - end - task:insert_result(symname, 1.0, vname) - rspamd_logger.infox(task, '%s: virus found: "%s"', rule['type'], vname) - elseif type(vname) == 'table' then - for _, vn in ipairs(vname) do - local symname = match_patterns(rule['symbol'], vn, rule['patterns']) - if rule['whitelist'] and rule['whitelist']:get_key(vn) then - rspamd_logger.infox(task, '%s: "%s" is in whitelist', rule['type'], vn) - else - all_whitelisted = false - task:insert_result(symname, 1.0, vn) - rspamd_logger.infox(task, '%s: virus found: "%s"', rule['type'], vn) - end - end - end - if rule['action'] then - if type(vname) == 'table' then - if all_whitelisted then return end - vname = table.concat(vname, '; ') - end - task:set_pre_result(rule['action'], - lua_util.template(rule.message or 'Rejected', { - SCANNER = rule['type'], - VIRUS = vname, - }), N) - end -end - -local function clamav_config(opts) - local clamav_conf = { - scan_mime_parts = true; - scan_text_mime = false; - scan_image_mime = false; - default_port = 3310, - log_clean = false, - timeout = 15.0, - retransmits = 2, - cache_expire = 3600, -- expire redis in one hour - message = default_message, - } - - for k,v in pairs(opts) do - clamav_conf[k] = v - end - - if not clamav_conf.prefix then - clamav_conf.prefix = 'rs_cl' - end - - if not clamav_conf['servers'] then - rspamd_logger.errx(rspamd_config, 'no servers defined') - - return nil - end - - clamav_conf['upstreams'] = upstream_list.create(rspamd_config, - clamav_conf['servers'], - clamav_conf.default_port) - - if clamav_conf['upstreams'] then - return clamav_conf - end - - rspamd_logger.errx(rspamd_config, 'cannot parse servers %s', - clamav_conf['servers']) - return nil -end - -local function fprot_config(opts) - local fprot_conf = { - scan_mime_parts = true; - scan_text_mime = false; - scan_image_mime = false; - default_port = 10200, - timeout = 15.0, - log_clean = false, - retransmits = 2, - cache_expire = 3600, -- expire redis in one hour - message = default_message, - } - - for k,v in pairs(opts) do - fprot_conf[k] = v - end - - if not fprot_conf.prefix then - fprot_conf.prefix = 'rs_fp' - end - - if not fprot_conf['servers'] then - rspamd_logger.errx(rspamd_config, 'no servers defined') - - return nil - end - - fprot_conf['upstreams'] = upstream_list.create(rspamd_config, - fprot_conf['servers'], - fprot_conf.default_port) - - if fprot_conf['upstreams'] then - return fprot_conf - end - - rspamd_logger.errx(rspamd_config, 'cannot parse servers %s', - fprot_conf['servers']) - return nil -end - -local function sophos_config(opts) - local sophos_conf = { - scan_mime_parts = true; - scan_text_mime = false; - scan_image_mime = false; - default_port = 4010, - timeout = 15.0, - log_clean = false, - retransmits = 2, - cache_expire = 3600, -- expire redis in one hour - message = default_message, - savdi_report_encrypted = false, - savdi_report_oversize = false, - } - - for k,v in pairs(opts) do - sophos_conf[k] = v - end - - if not sophos_conf.prefix then - sophos_conf.prefix = 'rs_sp' - end - - if not sophos_conf['servers'] then - rspamd_logger.errx(rspamd_config, 'no servers defined') - - return nil - end - - sophos_conf['upstreams'] = upstream_list.create(rspamd_config, - sophos_conf['servers'], - sophos_conf.default_port) - - if sophos_conf['upstreams'] then - return sophos_conf - end - - rspamd_logger.errx(rspamd_config, 'cannot parse servers %s', - sophos_conf['servers']) - return nil -end - -local function savapi_config(opts) - local savapi_conf = { - scan_mime_parts = true; - scan_text_mime = false; - scan_image_mime = false; - default_port = 4444, -- note: You must set ListenAddress in savapi.conf - product_id = 0, - log_clean = false, - timeout = 15.0, - retransmits = 2, - cache_expire = 3600, -- expire redis in one hour - message = default_message, - } - - for k,v in pairs(opts) do - savapi_conf[k] = v - end - - if not savapi_conf.prefix then - savapi_conf.prefix = 'rs_ap' - end - - if not savapi_conf['servers'] then - rspamd_logger.errx(rspamd_config, 'no servers defined') - - return nil - end - - savapi_conf['upstreams'] = upstream_list.create(rspamd_config, - savapi_conf['servers'], - savapi_conf.default_port) - - if savapi_conf['upstreams'] then - return savapi_conf - end - - rspamd_logger.errx(rspamd_config, 'cannot parse servers %s', - savapi_conf['servers']) - return nil -end - -local function message_not_too_large(task, content, rule) - local max_size = tonumber(rule['max_size']) - if not max_size then return true end - if #content > max_size then - rspamd_logger.infox("skip %s AV check as it is too large: %s (%s is allowed)", - rule.type, #content, max_size) - return false - end - return true -end - -local function need_av_check(task, content, rule) - return message_not_too_large(task, content, rule) -end - -local function check_av_cache(task, digest, rule, fn) - local key = digest - - local function redis_av_cb(err, data) - if data and type(data) == 'string' then - -- Cached - if data ~= 'OK' then - lua_util.debugm(N, task, 'got cached result for %s: %s', key, data) - data = rspamd_str_split(data, '\x30') - yield_result(task, rule, data) - else - lua_util.debugm(N, task, 'got cached result for %s: %s', key, data) - end - else - if err then - rspamd_logger.errx(task, 'Got error checking cache: %1', err) - end - fn() - end - end - - if redis_params then - - key = rule['prefix'] .. key - - if rspamd_redis_make_request(task, - redis_params, -- connect params - key, -- hash key - false, -- is write - redis_av_cb, --callback - 'GET', -- command - {key} -- arguments) - ) then - return true - end - end - - return false -end - -local function save_av_cache(task, digest, rule, to_save) - local key = digest - - local function redis_set_cb(err) - -- Do nothing - if err then - rspamd_logger.errx(task, 'failed to save virus cache for %s -> "%s": %s', - to_save, key, err) - else - lua_util.debugm(N, task, 'saved cached result for %s: %s', key, to_save) - end - end - - if type(to_save) == 'table' then - to_save = table.concat(to_save, '\x30') - end - - if redis_params then - key = rule['prefix'] .. key - - rspamd_redis_make_request(task, - redis_params, -- connect params - key, -- hash key - true, -- is write - redis_set_cb, --callback - 'SETEX', -- command - { key, rule['cache_expire'], to_save } - ) - end - - return false -end - -local function fprot_check(task, content, digest, rule) - local function fprot_check_uncached () - local upstream = rule.upstreams:get_upstream_round_robin() - local addr = upstream:get_addr() - local retransmits = rule.retransmits - local scan_id = task:get_queue_id() - if not scan_id then scan_id = task:get_uid() end - local header = string.format('SCAN STREAM %s SIZE %d\n', scan_id, - #content) - local footer = '\n' - - local function fprot_callback(err, data) - if err then - -- set current upstream to fail because an error occurred - upstream:fail() - - -- retry with another upstream until retransmits exceeds - if retransmits > 0 then - - retransmits = retransmits - 1 - - -- Select a different upstream! - upstream = rule.upstreams:get_upstream_round_robin() - addr = upstream:get_addr() - - lua_util.debugm(N, task, '%s [%s]: retry IP: %s', rule['symbol'], rule['type'], addr) - - tcp.request({ - task = task, - host = addr:to_string(), - port = addr:get_port(), - timeout = rule['timeout'], - callback = fprot_callback, - data = { header, content, footer }, - stop_pattern = '\n' - }) - else - rspamd_logger.errx(task, '%s [%s]: failed to scan, maximum retransmits exceed', rule['symbol'], rule['type']) - task:insert_result(rule['symbol_fail'], 0.0, 'failed to scan and retransmits exceed') - end - else - upstream:ok() - data = tostring(data) - local cached - local clean = string.match(data, '^0 <clean>') - if clean then - cached = 'OK' - if rule['log_clean'] then - rspamd_logger.infox(task, '%s [%s]: message or mime_part is clean', rule['symbol'], rule['type']) - end - else - -- returncodes: 1: infected, 2: suspicious, 3: both, 4-255: some error occured - -- see http://www.f-prot.com/support/helpfiles/unix/appendix_c.html for more detail - local vname = string.match(data, '^[1-3] <[%w%s]-: (.-)>') - if not vname then - rspamd_logger.errx(task, 'Unhandled response: %s', data) - else - yield_result(task, rule, vname) - cached = vname - end - end - if cached then - save_av_cache(task, digest, rule, cached) - end - end - end - - tcp.request({ - task = task, - host = addr:to_string(), - port = addr:get_port(), - timeout = rule['timeout'], - callback = fprot_callback, - data = { header, content, footer }, - stop_pattern = '\n' - }) - end - - if need_av_check(task, content, rule) then - if check_av_cache(task, digest, rule, fprot_check_uncached) then - return - else - fprot_check_uncached() - end - end -end - -local function clamav_check(task, content, digest, rule) - local function clamav_check_uncached () - local upstream = rule.upstreams:get_upstream_round_robin() - local addr = upstream:get_addr() - local retransmits = rule.retransmits - local header = rspamd_util.pack("c9 c1 >I4", "zINSTREAM", "\0", - #content) - local footer = rspamd_util.pack(">I4", 0) - - local function clamav_callback(err, data) - if err then - - -- set current upstream to fail because an error occurred - upstream:fail() - - -- retry with another upstream until retransmits exceeds - if retransmits > 0 then - - retransmits = retransmits - 1 - - -- Select a different upstream! - upstream = rule.upstreams:get_upstream_round_robin() - addr = upstream:get_addr() - - lua_util.debugm(N, task, '%s [%s]: retry IP: %s', rule['symbol'], rule['type'], addr) - - tcp.request({ - task = task, - host = addr:to_string(), - port = addr:get_port(), - timeout = rule['timeout'], - callback = clamav_callback, - data = { header, content, footer }, - stop_pattern = '\0' - }) - else - rspamd_logger.errx(task, '%s [%s]: failed to scan, maximum retransmits exceed', rule['symbol'], rule['type']) - task:insert_result(rule['symbol_fail'], 0.0, 'failed to scan and retransmits exceed') - end - - else - upstream:ok() - data = tostring(data) - local cached - lua_util.debugm(N, task, '%s [%s]: got reply: %s', rule['symbol'], rule['type'], data) - if data == 'stream: OK' then - cached = 'OK' - if rule['log_clean'] then - rspamd_logger.infox(task, '%s [%s]: message or mime_part is clean', rule['symbol'], rule['type']) - else - lua_util.debugm(N, task, '%s [%s]: message or mime_part is clean', rule['symbol'], rule['type']) - end - else - local vname = string.match(data, 'stream: (.+) FOUND') - if vname then - yield_result(task, rule, vname) - cached = vname - else - rspamd_logger.errx(task, 'unhandled response: %s', data) - task:insert_result(rule['symbol_fail'], 0.0, 'unhandled response') - end - end - if cached then - save_av_cache(task, digest, rule, cached) - end - end - end - - tcp.request({ - task = task, - host = addr:to_string(), - port = addr:get_port(), - timeout = rule['timeout'], - callback = clamav_callback, - data = { header, content, footer }, - stop_pattern = '\0' - }) - end - - if need_av_check(task, content, rule) then - if check_av_cache(task, digest, rule, clamav_check_uncached) then - return - else - clamav_check_uncached() - end - end -end - -local function sophos_check(task, content, digest, rule) - local function sophos_check_uncached () - local upstream = rule.upstreams:get_upstream_round_robin() - local addr = upstream:get_addr() - local retransmits = rule.retransmits - local protocol = 'SSSP/1.0\n' - local streamsize = string.format('SCANDATA %d\n', #content) - local bye = 'BYE\n' - - local function sophos_callback(err, data, conn) - - if err then - - -- set current upstream to fail because an error occurred - upstream:fail() - - -- retry with another upstream until retransmits exceeds - if retransmits > 0 then - - retransmits = retransmits - 1 - - -- Select a different upstream! - upstream = rule.upstreams:get_upstream_round_robin() - addr = upstream:get_addr() - - lua_util.debugm(N, task, '%s [%s]: retry IP: %s', rule['symbol'], rule['type'], addr) - - tcp.request({ - task = task, - host = addr:to_string(), - port = addr:get_port(), - timeout = rule['timeout'], - callback = sophos_callback, - data = { protocol, streamsize, content, bye } - }) - else - rspamd_logger.errx(task, '%s [%s]: failed to scan, maximum retransmits exceed', rule['symbol'], rule['type']) - task:insert_result(rule['symbol_fail'], 0.0, 'failed to scan and retransmits exceed') - end - else - upstream:ok() - data = tostring(data) - lua_util.debugm(N, task, '%s [%s]: got reply: %s', rule['symbol'], rule['type'], data) - local vname = string.match(data, 'VIRUS (%S+) ') - if vname then - yield_result(task, rule, vname) - save_av_cache(task, digest, rule, vname) - else - if string.find(data, 'DONE OK') then - if rule['log_clean'] then - rspamd_logger.infox(task, '%s [%s]: message or mime_part is clean', rule['symbol'], rule['type']) - else - lua_util.debugm(N, task, '%s [%s]: message or mime_part is clean', rule['symbol'], rule['type']) - end - save_av_cache(task, digest, rule, 'OK') - -- not finished - continue - elseif string.find(data, 'ACC') or string.find(data, 'OK SSSP') then - conn:add_read(sophos_callback) - -- set pseudo virus if configured, else do nothing since it's no fatal - elseif string.find(data, 'FAIL 0212') then - rspamd_logger.infox(task, 'Message is ENCRYPTED (0212 SOPHOS_SAVI_ERROR_FILE_ENCRYPTED): %s', data) - if rule['savdi_report_encrypted'] then - yield_result(task, rule, "SAVDI_FILE_ENCRYPTED") - save_av_cache(task, digest, rule, "SAVDI_FILE_ENCRYPTED") - end - -- set pseudo virus if configured, else set fail since part was not scanned - elseif string.find(data, 'REJ 4') then - if rule['savdi_report_oversize'] then - rspamd_logger.infox(task, 'SAVDI: Message is OVERSIZED (SSSP reject code 4): %s', data) - yield_result(task, rule, "SAVDI_FILE_OVERSIZED") - save_av_cache(task, digest, rule, "SAVDI_FILE_OVERSIZED") - else - rspamd_logger.errx(task, 'SAVDI: Message is OVERSIZED (SSSP reject code 4): %s', data) - task:insert_result(rule['symbol_fail'], 0.0, 'Message is OVERSIZED (SSSP reject code 4):' .. data) - end - -- excplicitly set REJ1 message when SAVDIreports a protocol error - elseif string.find(data, 'REJ 1') then - rspamd_logger.errx(task, 'SAVDI (Protocol error (REJ 1)): %s', data) - task:insert_result(rule['symbol_fail'], 0.0, 'SAVDI (Protocol error (REJ 1)):' .. data) - else - rspamd_logger.errx(task, 'unhandled response: %s', data) - task:insert_result(rule['symbol_fail'], 0.0, 'unhandled response') - end - - end - end - end - - tcp.request({ - task = task, - host = addr:to_string(), - port = addr:get_port(), - timeout = rule['timeout'], - callback = sophos_callback, - data = { protocol, streamsize, content, bye } - }) - end - - if need_av_check(task, content, rule) then - if check_av_cache(task, digest, rule, sophos_check_uncached) then - return - else - sophos_check_uncached() - end - end -end - -local function savapi_check(task, content, digest, rule) - local function savapi_check_uncached () - local upstream = rule.upstreams:get_upstream_round_robin() - local addr = upstream:get_addr() - local retransmits = rule.retransmits - local message_file = task:store_in_file(tonumber("0644", 8)) - local vnames = {} - - -- Forward declaration for recursive calls - local savapi_scan1_cb - - local function savapi_fin_cb(err, conn) - local vnames_reordered = {} - -- Swap table - for virus,_ in pairs(vnames) do - table.insert(vnames_reordered, virus) - end - lua_util.debugm(N, task, "%s: number of virus names found %s", rule['type'], #vnames_reordered) - if #vnames_reordered > 0 then - local vname = {} - for _,virus in ipairs(vnames_reordered) do - table.insert(vname, virus) - end - - yield_result(task, rule, vname) - save_av_cache(task, digest, rule, vname) - end - if conn then - conn:close() - end - end - - local function savapi_scan2_cb(err, data, conn) - local result = tostring(data) - lua_util.debugm(N, task, "%s: got reply: %s", rule['type'], result) - - -- Terminal response - clean - if string.find(result, '200') or string.find(result, '210') then - if rule['log_clean'] then - rspamd_logger.infox(task, '%s: message or mime_part is clean', rule['type']) - end - save_av_cache(task, digest, rule, 'OK') - conn:add_write(savapi_fin_cb, 'QUIT\n') - - -- Terminal response - infected - elseif string.find(result, '319') then - conn:add_write(savapi_fin_cb, 'QUIT\n') - - -- Non-terminal response - elseif string.find(result, '310') then - local virus - virus = result:match "310.*<<<%s(.*)%s+;.*;.*" - if not virus then - virus = result:match "310%s(.*)%s+;.*;.*" - if not virus then - rspamd_logger.errx(task, "%s: virus result unparseable: %s", rule['type'], result) - return - end - end - -- Store unique virus names - vnames[virus] = 1 - -- More content is expected - conn:add_write(savapi_scan1_cb, '\n') - end - end - - savapi_scan1_cb = function(err, conn) - conn:add_read(savapi_scan2_cb, '\n') - end - - -- 100 PRODUCT:xyz - local function savapi_greet2_cb(err, data, conn) - local result = tostring(data) - if string.find(result, '100 PRODUCT') then - lua_util.debugm(N, task, "%s: scanning file: %s", rule['type'], message_file) - conn:add_write(savapi_scan1_cb, {string.format('SCAN %s\n', message_file)}) - else - rspamd_logger.errx(task, '%s: invalid product id %s', rule['type'], rule['product_id']) - conn:add_write(savapi_fin_cb, 'QUIT\n') - end - end - - local function savapi_greet1_cb(err, conn) - conn:add_read(savapi_greet2_cb, '\n') - end - - local function savapi_callback_init(err, data, conn) - if err then - - -- set current upstream to fail because an error occurred - upstream:fail() - - -- retry with another upstream until retransmits exceeds - if retransmits > 0 then - - retransmits = retransmits - 1 - - -- Select a different upstream! - upstream = rule.upstreams:get_upstream_round_robin() - addr = upstream:get_addr() - - lua_util.debugm(N, task, '%s [%s]: retry IP: %s', rule['symbol'], rule['type'], addr) - - tcp.request({ - task = task, - host = addr:to_string(), - port = addr:get_port(), - timeout = rule['timeout'], - callback = savapi_callback_init, - stop_pattern = {'\n'}, - }) - else - rspamd_logger.errx(task, '%s [%s]: failed to scan, maximum retransmits exceed', rule['symbol'], rule['type']) - task:insert_result(rule['symbol_fail'], 0.0, 'failed to scan and retransmits exceed') - end - else - upstream:ok() - local result = tostring(data) - - -- 100 SAVAPI:4.0 greeting - if string.find(result, '100') then - conn:add_write(savapi_greet1_cb, {string.format('SET PRODUCT %s\n', rule['product_id'])}) - end - end - end - - tcp.request({ - task = task, - host = addr:to_string(), - port = addr:get_port(), - timeout = rule['timeout'], - callback = savapi_callback_init, - stop_pattern = {'\n'}, - }) - end - - if need_av_check(task, content, rule) then - if check_av_cache(task, digest, rule, savapi_check_uncached) then - return - else - savapi_check_uncached() - end - end -end - -local av_types = { - clamav = { - configure = clamav_config, - check = clamav_check - }, - fprot = { - configure = fprot_config, - check = fprot_check - }, - sophos = { - configure = sophos_config, - check = sophos_check - }, - savapi = { - configure = savapi_config, - check = savapi_check - }, -} local function add_antivirus_rule(sym, opts) if not opts['type'] then @@ -830,8 +75,14 @@ local function add_antivirus_rule(sym, opts) return nil end - if not opts['symbol'] then opts['symbol'] = sym end - local cfg = av_types[opts['type']] + if not opts['symbol'] then opts['symbol'] = sym:upper() end + local cfg = lua_antivirus.av_types[opts['type']] + + if not cfg then + rspamd_logger.errx(rspamd_config, 'unknown antivirus type: %s', + opts['type']) + return nil + end if not opts['symbol_fail'] then opts['symbol_fail'] = string.upper(opts['type']) .. '_FAIL' @@ -845,15 +96,10 @@ local function add_antivirus_rule(sym, opts) end -- WORKAROUND for deprecated attachments_only - if not cfg then - rspamd_logger.errx(rspamd_config, 'unknown antivirus type: %s', - opts['type']) - end - local rule = cfg.configure(opts) rule.type = opts.type rule.symbol_fail = opts.symbol_fail - + rule.redis_params = redis_params if not rule then rspamd_logger.errx(rspamd_config, 'cannot configure %s for %s', @@ -890,21 +136,20 @@ local function add_antivirus_rule(sym, opts) if rule.scan_mime_parts then local parts = task:get_parts() or {} - for _,p in ipairs(parts) do - if ( - (p:is_image() and rule.scan_image_mime) - or (p:is_text() and rule.scan_text_mime) - or (p:is_multipart() and rule.scan_text_mime) - or (not p:is_image() and not p:is_text() and not p:is_multipart()) - ) then + local filter_func = function(p) + return (rule.scan_image_mime and p:is_image()) + or (rule.scan_text_mime and p:is_text()) + or (p:is_attachment()) + end - local content = p:get_content() + fun.each(function(p) + local content = p:get_content() - if content and #content > 0 then - cfg.check(task, content, p:get_digest(), rule) - end + if content and #content > 0 then + cfg.check(task, content, p:get_digest(), rule) end - end + end, fun.filter(filter_func, parts)) + else cfg.check(task, task:get_content(), task:get_digest(), rule) end @@ -917,8 +162,10 @@ if opts and type(opts) == 'table' then redis_params = rspamd_parse_redis_server('antivirus') local has_valid = false for k, m in pairs(opts) do - if type(m) == 'table' and m['type'] and m['servers'] then + if type(m) == 'table' and m.servers then + if not m.type then m.type = k end local cb = add_antivirus_rule(k, m) + if not cb then rspamd_logger.errx(rspamd_config, 'cannot add rule: "' .. k .. '"') else diff --git a/src/plugins/lua/arc.lua b/src/plugins/lua/arc.lua index eeae65289..e940db619 100644 --- a/src/plugins/lua/arc.lua +++ b/src/plugins/lua/arc.lua @@ -483,9 +483,16 @@ local function arc_sign_seal(task, params, header) sha_ctx:update(s) lua_util.debugm(N, task, 'initial update signature with header: %s', s) + local nl_type + if task:has_flag("milter") then + nl_type = "lf" + else + nl_type = task:get_newlines_type() + end + local sig = rspamd_rsa.sign_memory(privkey, sha_ctx:bin()) cur_arc_seal = string.format('%s%s', cur_arc_seal, - sig:base64()) + sig:base64(70, nl_type)) task:set_milter_reply({ add_headers = { @@ -591,7 +598,7 @@ local function arc_signing_cb(task) if err and err == 'No such file or directory' then lua_util.debugm(N, task, 'cannot read key from %s: %s', p.key, err) else - rspamd_logger.warnx(N, task, 'cannot read key from %s: %s', p.key, err) + rspamd_logger.warnx(task, 'cannot read key from %s: %s', p.key, err) end return false end diff --git a/src/plugins/lua/asn.lua b/src/plugins/lua/asn.lua index 86f1c42d2..2e5b8466e 100644 --- a/src/plugins/lua/asn.lua +++ b/src/plugins/lua/asn.lua @@ -74,8 +74,9 @@ local function asn_check(task) asn_set(parts[1], parts[2], parts[3]) end - task:get_resolver():resolve_txt(task:get_session(), task:get_mempool(), - req_name, rspamd_dns_cb) + task:get_resolver():resolve_txt({task = task, + name = req_name, + callback = rspamd_dns_cb}) end local ip = task:get_from_ip() diff --git a/src/plugins/lua/bayes_expiry.lua b/src/plugins/lua/bayes_expiry.lua index 9495cf0cd..d15ed43ac 100644 --- a/src/plugins/lua/bayes_expiry.lua +++ b/src/plugins/lua/bayes_expiry.lua @@ -44,15 +44,19 @@ local function check_redis_classifier(cls, cfg) if cls.new_schema then local symbol_spam, symbol_ham local expiry = (cls.expiry or cls.expire) + if type(expiry) == 'table' then + expiry = expiry[1] + end + if cls.lazy then settings.lazy = cls.lazy end -- Load symbols from statfiles - local statfiles = cls.statfile - for _,stf in ipairs(statfiles) do - local symbol = stf.symbol or 'undefined' + + local function check_statfile_table(tbl, def_sym) + local symbol = tbl.symbol or def_sym local spam - if stf.spam then - spam = stf.spam + if tbl.spam then + spam = tbl.spam else if string.match(symbol:upper(), 'SPAM') then spam = true @@ -68,7 +72,27 @@ local function check_redis_classifier(cls, cfg) end end - if not symbol_spam or not symbol_ham or not expiry then + local statfiles = cls.statfile + if statfiles[1] then + for _,stf in ipairs(statfiles) do + if not stf.symbol then + for k,v in pairs(stf) do + check_statfile_table(v, k) + end + else + check_statfile_table(stf, 'undefined') + end + end + else + for stn,stf in pairs(statfiles) do + check_statfile_table(stf, stn) + end + end + + if not symbol_spam or not symbol_ham or type(expiry) ~= 'number' then + logger.debugm(N, rspamd_config, + 'disable expiry for classifier %s: no expiry %s', + symbol_spam, cls) return end -- Now try to load redis_params if needed @@ -88,6 +112,9 @@ local function check_redis_classifier(cls, cfg) return end + logger.debugm(N, rspamd_config, "enabled expiry for %s/%s -> %s expiry", + symbol_spam, symbol_ham, expiry) + table.insert(settings.classifiers, { symbol_spam = symbol_spam, symbol_ham = symbol_ham, @@ -391,24 +418,17 @@ rspamd_config:add_on_load(function (_, ev_base, worker) local unique_redis_params = {} -- Push redis script to all unique redis servers for _,cls in ipairs(settings.classifiers) do - local seen = false - for _,rp in ipairs(unique_redis_params) do - if lutil.table_cmp(rp, cls.redis_params) then - seen = true - end - end - - if not seen then - table.insert(unique_redis_params, cls.redis_params) + if not unique_redis_params[cls.redis_params.hash] then + unique_redis_params[cls.redis_params.hash] = cls.redis_params end end - for _,rp in ipairs(unique_redis_params) do + for h,rp in pairs(unique_redis_params) do local script_id = lredis.add_redis_script(lutil.template(expiry_script, template), rp) for _,cls in ipairs(settings.classifiers) do - if lutil.table_cmp(rp, cls.redis_params) then + if cls.redis_params.hash == h then cls.script = script_id end end diff --git a/src/plugins/lua/clickhouse.lua b/src/plugins/lua/clickhouse.lua index d95325b24..5fc642760 100644 --- a/src/plugins/lua/clickhouse.lua +++ b/src/plugins/lua/clickhouse.lua @@ -50,6 +50,7 @@ local settings = { full_urls = false, from_tables = nil, enable_symbols = false, + database = 'default', use_https = false, use_gzip = true, allow_local = false, @@ -285,6 +286,7 @@ local function clickhouse_send_data(task) end local function clickhouse_collect(task) + if task:has_flag('skip') then return end if not settings.allow_local and rspamd_lua_utils.is_rspamc_or_controller(task) then return end for _,sym in ipairs(settings.stop_symbols) do @@ -488,27 +490,26 @@ local function clickhouse_collect(task) table.insert(row, {}) end + local flatten_urls = function(f, ...) + return fun.totable(fun.map(function(k,v) return f(k,v) end, ...)) + end + -- Urls step - local urls_tlds = {} local urls_urls = {} if task:has_urls(false) then + for _,u in ipairs(task:get_urls(false)) do - urls_tlds[u:get_tld()] = true if settings['full_urls'] then - urls_urls[u:get_text()] = true + urls_urls[u:get_text()] = u else - urls_urls[u:get_host()] = true + urls_urls[u:get_host()] = u end end - end - - local flatten_urls = function(...) - return fun.totable(fun.map(function(k,_) return k end, ...)) - end - if #urls_tlds > 0 then - table.insert(row, flatten_urls(urls_tlds)) - table.insert(row, flatten_urls(urls_urls)) + -- Get tlds + table.insert(row, flatten_urls(function(_,u) return u:get_tld() end, urls_urls)) + -- Get hosts/full urls + table.insert(row, flatten_urls(function(k, _) return k end, urls_urls)) else table.insert(row, {}) table.insert(row, {}) @@ -516,9 +517,10 @@ local function clickhouse_collect(task) -- Emails step if task:has_urls(true) then - table.insert(row, flatten_urls(fun.map(function(u) - return string.format('%s@%s', u:get_user(), u:get_host()),true - end, task:get_emails()))) + table.insert(row, flatten_urls(function(k, _) return k end, + fun.map(function(u) + return string.format('%s@%s', u:get_user(), u:get_host()),true + end, task:get_emails()))) else table.insert(row, {}) end diff --git a/src/plugins/lua/clustering.lua b/src/plugins/lua/clustering.lua index d6c78ef79..a5d72fc64 100644 --- a/src/plugins/lua/clustering.lua +++ b/src/plugins/lua/clustering.lua @@ -186,18 +186,21 @@ local function clusterting_filter_cb(task, rule) end local function clusterting_idempotent_cb(task, rule) - local action = task:get_action() + if task:has_flag('skip') then return end + if not rule.allow_local and lua_util.is_rspamc_or_controller(task) then return end + + local verdict = lua_util.get_task_verdict(task) local score - if action == 'no action' then + if verdict == 'ham' then score = rule.ham_mult - elseif action == 'reject' then + elseif verdict == 'spam' then score = rule.spam_mult - elseif action == 'add header' or action == 'rewrite subject' then + elseif verdict == 'junk' then score = rule.junk_mult else - rspamd_logger.debugm(N, task, 'skip rule %s, action=%s', - rule.name, action) + rspamd_logger.debugm(N, task, 'skip rule %s, verdict=%s', + rule.name, verdict) return end diff --git a/src/plugins/lua/dcc.lua b/src/plugins/lua/dcc.lua index 311dc608e..8c5dddeeb 100644 --- a/src/plugins/lua/dcc.lua +++ b/src/plugins/lua/dcc.lua @@ -128,10 +128,11 @@ local function check_dcc (task) else rspamd_logger.errx(task, 'failed to scan, maximum retransmits exceed') - upstream:fail() + if upstream then upstream:fail() end end else -- Parse the response + if upstream then upstream:ok() end local _,_,result,disposition,header = tostring(data):find("(.-)\n(.-)\n(.-)\n") lua_util.debugm(N, task, 'DCC result=%1 disposition=%2 header="%3"', result, disposition, header) diff --git a/src/plugins/lua/dkim_signing.lua b/src/plugins/lua/dkim_signing.lua index 8d621bbb2..77acc2f61 100644 --- a/src/plugins/lua/dkim_signing.lua +++ b/src/plugins/lua/dkim_signing.lua @@ -163,7 +163,7 @@ local function dkim_signing_cb(task) if err and err == 'No such file or directory' then lua_util.debugm(N, task, 'cannot read key from "%s": %s', p.key, err) else - rspamd_logger.warnx(N, task, 'cannot read key from "%s": %s', p.key, err) + rspamd_logger.warnx(task, 'cannot read key from "%s": %s', p.key, err) end return false end diff --git a/src/plugins/lua/dmarc.lua b/src/plugins/lua/dmarc.lua index b55c5d41f..1bac6fb29 100644 --- a/src/plugins/lua/dmarc.lua +++ b/src/plugins/lua/dmarc.lua @@ -415,12 +415,23 @@ local function dmarc_validate_policy(task, policy, hdrfromdom, dmarc_esld) policy.domain .. ' : ' .. reason_str, policy.dmarc_policy) disposition = what else - if (math.random(100) > policy.pct) then + local coin = math.random(100) + if (coin > policy.pct) then if (not no_sampling_domains or not no_sampling_domains:get_key(policy.domain)) then - task:insert_result(dmarc_symbols['softfail'], 1.0, + + if what == 'reject' then + disposition = 'quarantine' + else + disposition = 'softfail' + end + + task:insert_result(dmarc_symbols[disposition], 1.0, policy.domain .. ' : ' .. reason_str, policy.dmarc_policy, "sampled_out") sampled_out = true + lua_util.debugm(N, task, + 'changed dmarc policy from %s to %s, sampled out: %s < %s', + what, disposition, coin, policy.pct) else task:insert_result(dmarc_symbols[what], 1.0, policy.domain .. ' : ' .. reason_str, policy.dmarc_policy, "local_policy") diff --git a/src/plugins/lua/elastic.lua b/src/plugins/lua/elastic.lua index 87e01063a..aa3702112 100644 --- a/src/plugins/lua/elastic.lua +++ b/src/plugins/lua/elastic.lua @@ -20,7 +20,6 @@ local rspamd_http = require "rspamd_http" local lua_util = require "lua_util" local util = require "rspamd_util" local ucl = require "ucl" -local hash = require "rspamd_cryptobox_hash" local rspamd_redis = require "lua_redis" local upstream_list = require "rspamd_upstream_list" @@ -30,6 +29,7 @@ end local rows = {} local nrows = 0 +local failed_sends = 0 local elastic_template local redis_params local N = "elastic" @@ -37,7 +37,7 @@ local E = {} local connect_prefix = 'http://' local enabled = true local settings = { - limit = 10, + limit = 500, index_pattern = 'rspamd-%Y.%m.%d', template_file = rspamd_paths['PLUGINSDIR'] .. '/elastic/rspamd_template.json', kibana_file = rspamd_paths['PLUGINSDIR'] ..'/elastic/kibana.json', @@ -52,6 +52,7 @@ local settings = { user = nil, password = nil, no_ssl_verify = false, + max_fail = 3, } local function read_file(path) @@ -78,34 +79,7 @@ local function elastic_send_data(task) local push_url = connect_prefix .. ip_addr .. '/'..es_index..'/_bulk' local bulk_json = table.concat(tbl, "\n") - local function http_index_data_callback(err, code, body, _) - -- todo error handling we may store the rows it into redis and send it again late - lua_util.debugm(N, task, "After create data %1", body) - if code ~= 200 then - rspamd_logger.infox(task, "cannot push data to elastic backend (%s): %s (%s)", - push_url, err, code) - if settings['failover'] then - local h = hash.create() - h:update(bulk_json) - local key = settings['key_prefix'] ..es_index..":".. h:base32():sub(1, 20) - local data = util.zstd_compress(bulk_json) - local function redis_set_cb(rerr) - if rerr ~=nil then - rspamd_logger.errx(task, 'redis_set_cb received error: %1', rerr) - end - end - rspamd_redis.make_request(task, - redis_params, -- connect params - key, -- hash key - true, -- is write - redis_set_cb, --callback - 'SETEX', -- command - {key, tostring(settings['expire']), data} -- arguments - ) - end - end - end - rspamd_http.request({ + local err, response = rspamd_http.request({ url = push_url, headers = { ['Content-Type'] = 'application/x-ndjson', @@ -117,11 +91,27 @@ local function elastic_send_data(task) no_ssl_verify = settings.no_ssl_verify, user = settings.user, password = settings.password, - callback = http_index_data_callback, timeout = settings.timeout, }) + if err then + rspamd_logger.infox(task, "cannot push data to elastic backend (%s): %s; failed attempts: %s/%s", + push_url, err, failed_sends, settings.max_fail) + else + if response.code ~= 200 then + rspamd_logger.infox(task, "cannot push data to elastic backend (%s): wrong http code %s (%s); failed attempts: %s/%s", + push_url, err, response.code, failed_sends, settings.max_fail) + else + lua_util.debugm(N, task, "successfully sent %s (%s bytes) rows to ES", + nrows, #bulk_json) + + return true + end + end + + return false end + local function get_general_metadata(task) local r = {} local ip_addr = task:get_ip() @@ -175,6 +165,7 @@ local function get_general_metadata(task) else r.from = 'unknown' end + local syminf = task:get_symbols_all() r.symbols = syminf r.asn = {} @@ -182,6 +173,7 @@ local function get_general_metadata(task) r.asn.country = pool:get_variable("country") or 'unknown' r.asn.asn = pool:get_variable("asn") or 0 r.asn.ipnet = pool:get_variable("ipnet") or 'unknown' + local function process_header(name) local hdr = task:get_header_full(name) if hdr then @@ -208,15 +200,30 @@ end local function elastic_collect(task) if not enabled then return end + if task:has_flag('skip') then return end if not settings.allow_local and lua_util.is_rspamc_or_controller(task) then return end + local row = {['rspamd_meta'] = get_general_metadata(task), ['@timestamp'] = tostring(util.get_time() * 1000)} table.insert(rows, row) nrows = nrows + 1 if nrows > settings['limit'] then - elastic_send_data(task) - nrows = 0 - rows = {} + lua_util.debugm(N, task, 'send elastic search rows: %s', nrows) + if elastic_send_data(task) then + nrows = 0 + rows = {} + failed_sends = 0; + else + failed_sends = failed_sends + 1 + + if failed_sends > settings.max_fail then + rspamd_logger.errx(task, 'cannot send %s rows to ES %s times, stop trying', + nrows, failed_sends) + nrows = 0 + rows = {} + failed_sends = 0; + end + end end end @@ -244,6 +251,7 @@ local function check_elastic_server(cfg, ev_base, _) for _,plugin in pairs(value['plugins']) do if plugin['name'] == 'ingest-geoip' then plugin_found = true + lua_util.debugm(N, "ingest-geoip plugin has been found") end end if not plugin_found then @@ -274,7 +282,7 @@ end -- import ingest pipeline and kibana dashboard/visualization local function initial_setup(cfg, ev_base, worker) - if not (worker:get_name() == 'controller' and worker:get_index() == 0) then return end + if not worker:is_primary_controller() then return end local upstream = settings.upstream:get_upstream_round_robin() local ip_addr = upstream:get_addr():to_string(true) diff --git a/src/plugins/lua/emails.lua b/src/plugins/lua/emails.lua index 282f53bb4..1727e49e1 100644 --- a/src/plugins/lua/emails.lua +++ b/src/plugins/lua/emails.lua @@ -185,14 +185,15 @@ local function gen_check_emails(rule) end local replyto = get_raw_header('Reply-To') - if not replyto then return false end - local rt = util.parse_mail_address(replyto, task:get_mempool()) - - if rt and rt[1] then - rspamd_lua_utils.remove_email_aliases(rt[1]) - if not checked[rt[1].addr] then - check_email_rule(task, rule, rt[1]) - checked[rt[1].addr] = true + if replyto then + local rt = util.parse_mail_address(replyto, task:get_mempool()) + + if rt and rt[1] then + rspamd_lua_utils.remove_email_aliases(rt[1]) + if not checked[rt[1].addr] then + check_email_rule(task, rule, rt[1]) + checked[rt[1].addr] = true + end end end end diff --git a/src/plugins/lua/forged_recipients.lua b/src/plugins/lua/forged_recipients.lua index 887b1bf82..8abc55a50 100644 --- a/src/plugins/lua/forged_recipients.lua +++ b/src/plugins/lua/forged_recipients.lua @@ -40,36 +40,38 @@ local function check_forged_headers(task) if not mime_rcpt then return elseif #mime_rcpt == 0 then - local sra = smtp_rcpt[1].addr .. (#smtp_rcpt > 1 and ' ...' or '') - task:insert_result(symbol_rcpt, score, '', sra) return end -- Find pair for each smtp recipient in To or Cc headers for _,sr in ipairs(smtp_rcpt) do res = false for _,mr in ipairs(mime_rcpt) do - if mr['addr'] and sr['addr'] and - string.lower(mr['addr']) == string.lower(sr['addr']) then + if mr.addr and mr.addr ~= '' then + if sr['addr'] and + string.lower(mr['addr']) == string.lower(sr['addr']) then + res = true + break + elseif delivered_to and delivered_to == mr['addr'] then + -- allow alias expansion and forwarding (Postfix) + res = true + break + elseif auser and auser == sr['addr'] then + -- allow user to BCC themselves + res = true + break + elseif ((smtp_from or E)[1] or E).addr and + smtp_from[1]['addr'] == sr['addr'] then + -- allow sender to BCC themselves + res = true + break + elseif mr['user'] and sr['user'] and + string.lower(mr['user']) == string.lower(sr['user']) then + -- If we have the same username but for another domain, then + -- lower the overall score + score = score / 2 + end + else res = true - break - elseif delivered_to and delivered_to == mr['addr'] then - -- allow alias expansion and forwarding (Postfix) - res = true - break - elseif auser and auser == sr['addr'] then - -- allow user to BCC themselves - res = true - break - elseif ((smtp_from or E)[1] or E).addr and - smtp_from[1]['addr'] == sr['addr'] then - -- allow sender to BCC themselves - res = true - break - elseif mr['user'] and sr['user'] and - string.lower(mr['user']) == string.lower(sr['user']) then - -- If we have the same username but for another domain, then - -- lower the overall score - score = score / 2 end end if not res then diff --git a/src/plugins/lua/greylist.lua b/src/plugins/lua/greylist.lua index 7427f999e..5a1f6c0f9 100644 --- a/src/plugins/lua/greylist.lua +++ b/src/plugins/lua/greylist.lua @@ -43,7 +43,7 @@ end local redis_params local whitelisted_ip -local whitelist_domains_map = nil +local whitelist_domains_map local toint =math.ifloor or math.floor local settings = { expire = 86400, -- 1 day by default @@ -56,6 +56,8 @@ local settings = { ipv4_mask = 19, -- Mask bits for ipv4 ipv6_mask = 64, -- Mask bits for ipv6 report_time = false, -- Tell when greylisting is epired (appended to `message`) + check_local = false, + check_authed = false, } local rspamd_logger = require "rspamd_logger" @@ -132,7 +134,7 @@ local function envelope_key(task) end -- Returns pair of booleans: found,greylisted -local function check_time(task, tm, type) +local function check_time(task, tm, type, now) local t = tonumber(tm) if not t then @@ -140,7 +142,6 @@ local function check_time(task, tm, type) return false,false end - local now = rspamd_util.get_time() if now - t < settings['timeout'] then return true,true else @@ -154,7 +155,10 @@ end local function greylist_message(task, end_time, why) task:insert_result(settings['symbol'], 0.0, 'greylisted', end_time, why) - if rspamd_lua_utils.is_rspamc_or_controller(task) then return end + if not settings.check_local and rspamd_lua_utils.is_rspamc_or_controller(task) then + return + end + if settings.message_func then task:set_pre_result(settings['action'], settings.message_func(task, end_time), N) @@ -172,7 +176,9 @@ end local function greylist_check(task) local ip = task:get_ip() - if task:get_user() or (ip and ip:is_local()) then + if ((not settings.check_authed and task:get_user()) or + (not settings.check_local and ip and ip:is_local())) then + rspamd_logger.infox(task, "skip greylisting for local networks and/or authorized users"); return end @@ -195,36 +201,54 @@ local function greylist_check(task) local greylisted_meta = false if data then + local end_time_body,end_time_meta + local now = rspamd_util.get_time() + if data[1] and type(data[1]) ~= 'userdata' then - ret_body,greylisted_body = check_time(task, data[1], 'body') + local tm = tonumber(data[1]) or now + ret_body,greylisted_body = check_time(task, data[1], 'body', now) if greylisted_body then - local end_time = rspamd_util.time_to_string(rspamd_util.get_time() - + settings['timeout']) - task:get_mempool():set_variable("grey_greylisted_body", end_time) + end_time_body = tm + settings['timeout'] + task:get_mempool():set_variable("grey_greylisted_body", + rspamd_util.time_to_string(end_time_body)) end end + if data[2] and type(data[2]) ~= 'userdata' then if not ret_body or greylisted_body then - ret_meta,greylisted_meta = check_time(task, data[2], 'meta') + local tm = tonumber(data[2]) or now + ret_meta,greylisted_meta = check_time(task, data[2], 'meta', now) if greylisted_meta then - local end_time = rspamd_util.time_to_string(rspamd_util.get_time() - + settings['timeout']) - task:get_mempool():set_variable("grey_greylisted_meta", end_time) + end_time_meta = tm + settings['timeout'] + task:get_mempool():set_variable("grey_greylisted_meta", + rspamd_util.time_to_string(end_time_meta)) end end end + local how + local end_time_str + if not ret_body and not ret_meta then - local end_time = rspamd_util.time_to_string(rspamd_util.get_time() - + settings['timeout']) - task:get_mempool():set_variable("grey_greylisted", end_time) + -- no record found + task:get_mempool():set_variable("grey_greylisted", 'true') elseif greylisted_body and greylisted_meta then - local end_time = rspamd_util.time_to_string(rspamd_util.get_time() + - settings['timeout']) - rspamd_logger.infox(task, 'greylisted until "%s"', - end_time) - greylist_message(task, end_time, 'too early') + end_time_str = rspamd_util.time_to_string( + math.min(end_time_body, end_time_meta)) + how = 'meta and body' + elseif greylisted_body then + end_time_str = rspamd_util.time_to_string(end_time_body) + how = 'body only' + elseif greylisted_meta then + end_time_str = rspamd_util.time_to_string(end_time_meta) + how = 'meta only' + end + + if how and end_time_str then + rspamd_logger.infox(task, 'greylisted until "%s" (%s)', + end_time_str, how) + greylist_message(task, end_time_str, 'too early') end elseif err then rspamd_logger.errx(task, 'got error while getting greylisting keys: %1', err) @@ -265,7 +289,8 @@ local function greylist_set(task) end end - if task:get_user() or (ip and ip:is_local()) then + if ((not settings.check_authed and task:get_user()) or + (not settings.check_local and ip and ip:is_local())) then if action == 'greylist' then -- We are going to accept message rspamd_logger.infox(task, 'Downgrading metric action from "greylist" to "no action"') @@ -331,7 +356,7 @@ local function greylist_set(task) is_whitelisted, rspamd_util.time_to_string(rspamd_util.get_time() + settings['expire'])) - if is_rspamc then return end + if not settings.check_local and is_rspamc then return end ret,conn,upstream = rspamd_redis_make_request(task, redis_params, -- connect params @@ -350,7 +375,7 @@ local function greylist_set(task) rspamd_logger.errx(task, 'got error while connecting to redis') end elseif do_greylisting or do_greylisting_required then - if is_rspamc then return end + if not settings.check_local and is_rspamc then return end local t = tostring(toint(rspamd_util.get_time())) local end_time = rspamd_util.time_to_string(t + settings['timeout']) rspamd_logger.infox(task, 'greylisted until "%s", new record', end_time) diff --git a/src/plugins/lua/metadata_exporter.lua b/src/plugins/lua/metadata_exporter.lua index 57fe0f105..951142368 100644 --- a/src/plugins/lua/metadata_exporter.lua +++ b/src/plugins/lua/metadata_exporter.lua @@ -688,6 +688,7 @@ end local function gen_exporter(rule) return function (task) + if task:has_flag('skip') then return end local selector = rule.selector or 'default' local selected = selectors[selector](task) if selected then diff --git a/src/plugins/lua/milter_headers.lua b/src/plugins/lua/milter_headers.lua index 209985bbd..7659a3fe6 100644 --- a/src/plugins/lua/milter_headers.lua +++ b/src/plugins/lua/milter_headers.lua @@ -25,6 +25,7 @@ local logger = require "rspamd_logger" local util = require "rspamd_util" local N = 'milter_headers' local lua_util = require "lua_util" +local ts = require("tableshape").types local E = {} local HOSTNAME = util.get_hostname() @@ -518,9 +519,35 @@ local function milter_headers(task) end end -local opts = rspamd_config:get_all_opt(N) or rspamd_config:get_all_opt('rmilter_headers') +local config_schema = ts.shape{ + use = ts.array_of(ts.string) + ts.string / function(s) return {s} end, + remove_upstream_spam_flag = ts.boolean:is_optional(), + extended_spam_headers = ts.boolean:is_optional(), + skip_local = ts.boolean:is_optional(), + skip_authenticated = ts.boolean:is_optional(), + local_headers = ts.array_of(ts.string):is_optional(), + authenticated_headers = ts.array_of(ts.string):is_optional(), + extended_headers_rcpt = + (ts.array_of(ts.string) + ts.string / function(s) return {s} end):is_optional(), + custom = ts.map_of(ts.string, ts.string):is_optional(), +} + +local opts = rspamd_config:get_all_opt(N) or + rspamd_config:get_all_opt('rmilter_headers') + if not opts then return end +-- Process config +do + local res,err = config_schema:transform(opts) + if not res then + logger.errx(rspamd_config, 'invalid config for %s: %s', N, err) + return + else + opts = res + end +end + local have_routine = {} local function activate_routine(s) if settings.routines[s] or custom_routines[s] then @@ -537,33 +564,28 @@ local function activate_routine(s) logger.errx(rspamd_config, 'routine "%s" does not exist', s) end end -if opts['remove_upstream_spam_flag'] then activate_routine('remove-spam-flag') end -if opts['extended_spam_headers'] then + +if opts.remove_upstream_spam_flag ~= nil then + settings.remove_upstream_spam_flag = opts.remove_upstream_spam_flag +end + +if opts.extended_spam_headers then activate_routine('x-spamd-result') activate_routine('x-rspamd-server') activate_routine('x-rspamd-queue-id') end -if type(opts['use']) == 'string' then - opts['use'] = {opts['use']} -elseif (type(opts['use']) == 'table' and not opts['use'][1] and not next(active_routines)) then - logger.debugm(N, rspamd_config, 'no functions are enabled') - return -end -if type(opts['use']) ~= 'table' then - logger.errx(rspamd_config, 'unexpected type for "use" option: %s', type(opts['use'])) - return -end -if type(opts['local_headers']) == 'table' and opts['local_headers'][1] then - for _, h in ipairs(opts['local_headers']) do + +if opts.local_headers then + for _, h in ipairs(opts.local_headers) do settings.local_headers[h] = true end end -if type(opts['authenticated_headers']) == 'table' and opts['authenticated_headers'][1] then - for _, h in ipairs(opts['authenticated_headers']) do +if opts.authenticated_headers then + for _, h in ipairs(opts.authenticated_headers) do settings.authenticated_headers[h] = true end end -if type(opts['custom']) == 'table' then +if opts.custom then for k, v in pairs(opts['custom']) do local f, err = load(v) if not f then @@ -573,27 +595,35 @@ if type(opts['custom']) == 'table' then end end end + if type(opts['skip_local']) == 'boolean' then settings.skip_local = opts['skip_local'] end + if type(opts['skip_authenticated']) == 'boolean' then settings.skip_authenticated = opts['skip_authenticated'] end + for _, s in ipairs(opts['use']) do if not have_routine[s] then activate_routine(s) end end + +if settings.remove_upstream_spam_flag then + activate_routine('remove-spam-flag') +end + if (#active_routines < 1) then logger.errx(rspamd_config, 'no active routines') return end -logger.infox(rspamd_config, 'active routines [%s]', table.concat(active_routines, ',')) -if type(opts['extended_headers_rcpt']) == 'string' then - opts['extended_headers_rcpt'] = {opts['extended_headers_rcpt']} -end -if type(opts['extended_headers_rcpt']) == 'table' and opts['extended_headers_rcpt'][1] then - for _, e in ipairs(opts['extended_headers_rcpt']) do + +logger.infox(rspamd_config, 'active routines [%s]', + table.concat(active_routines, ',')) + +if opts.extended_headers_rcpt then + for _, e in ipairs(opts.extended_headers_rcpt) do if string.find(e, '^[^@]+@[^@]+$') then if not settings.extended_headers_rcpt.addr then settings.extended_headers_rcpt.addr = {} @@ -617,6 +647,7 @@ if type(opts['extended_headers_rcpt']) == 'table' and opts['extended_headers_rcp end end end + rspamd_config:register_symbol({ name = 'MILTER_HEADERS', type = 'idempotent', diff --git a/src/plugins/lua/mime_types.lua b/src/plugins/lua/mime_types.lua index e8ce709da..36407dac0 100644 --- a/src/plugins/lua/mime_types.lua +++ b/src/plugins/lua/mime_types.lua @@ -599,7 +599,7 @@ local full_extensions_map = { {"roff", "application/x-troff"}, {"rpm", "audio/x-pn-realaudio-plugin"}, {"rqy", "text/x-ms-rqy"}, - {"rtf", {"application/rtf","application/msword"}}, + {"rtf", {"application/rtf","application/msword", "text/richtext"}}, {"rtx", "text/richtext"}, {"rvt", "application/octet-stream" }, {"ruleset", "application/xml"}, @@ -813,7 +813,7 @@ local function check_mime_type(task) return ext[1],ext[2],parts end - local function check_filename(fname, ct, is_archive) + local function check_filename(fname, ct, is_archive, part) local ext,ext2,parts = gen_extension(fname) -- ext is the last extension, LOWERCASED -- ext2 is the one before last extension LOWERCASED @@ -834,12 +834,16 @@ local function check_mime_type(task) -- Double extension + bad extension == VERY bad task:insert_result(settings['symbol_double_extension'], badness_mult, string.format(".%s.%s", ext2, ext)) + task:insert_result('MIME_TRACE', 0.0, + string.format("%s:%s", part:get_id(), '-')) return end end if badness_mult then -- Just bad extension task:insert_result(settings['symbol_bad_extension'], badness_mult, ext) + task:insert_result('MIME_TRACE', 0.0, + string.format("%s:%s", part:get_id(), '-')) end end @@ -861,6 +865,8 @@ local function check_mime_type(task) if settings['archive_extensions'][ext] then -- Archive in archive task:insert_result(settings['symbol_archive_in_archive'], 1.0, ext) + task:insert_result('MIME_TRACE', 0.0, + string.format("%s:%s", part:get_id(), '-')) end else if ext2 then @@ -871,7 +877,10 @@ local function check_mime_type(task) -- Exclude multipart archive extensions, e.g. .zip.001 and not string.match(ext, '^%d+$') then - task:insert_result(settings['symbol_archive_in_archive'], 1.0, string.format(".%s.%s", ext2, ext)) + task:insert_result(settings['symbol_archive_in_archive'], + 1.0, string.format(".%s.%s", ext2, ext)) + task:insert_result('MIME_TRACE', 0.0, + string.format("%s:%s", part:get_id(), '-')) end else check_extension(settings['bad_extensions'][ext], nil) @@ -902,17 +911,24 @@ local function check_mime_type(task) if parts then for _,p in ipairs(parts) do local mtype,subtype = p:get_type() + local dtype,dsubtype = p:get_detected_type() if not mtype then task:insert_result(settings['symbol_unknown'], 1.0, 'missing content type') + task:insert_result('MIME_TRACE', 0.0, + string.format("%s:%s", p:get_id(), '~')) else -- Check for attachment local filename = p:get_filename() local ct = string.format('%s/%s', mtype, subtype):lower() + local detected_ct + if dtype and dsubtype then + detected_ct = string.format('%s/%s', dtype, dsubtype) + end if filename then filename = filename:gsub('[^%s%g]', '?') - check_filename(filename, ct, false) + check_filename(filename, ct, false, p) end if p:is_archive() then @@ -932,6 +948,8 @@ local function check_mime_type(task) if arch:is_encrypted() then task:insert_result(settings['symbol_encrypted_archive'], 1.0, filename) + task:insert_result('MIME_TRACE', 0.0, + string.format("%s:%s", p:get_id(), '-')) end if check then @@ -944,28 +962,60 @@ local function check_mime_type(task) end if f['encrypted'] then - task:insert_result(settings['symbol_encrypted_archive'], 1.0, f['name']) + task:insert_result(settings['symbol_encrypted_archive'], + 1.0, f['name']) + task:insert_result('MIME_TRACE', 0.0, + string.format("%s:%s", p:get_id(), '-')) end if f['name'] then - check_filename(f['name'], nil, true) + check_filename(f['name'], nil, true, p) end end end end if map then - local v = map:get_key(ct) + local v + local detected_different = false + if detected_ct and detected_ct ~= ct then + v = map:get_key(detected_ct) + detected_different = true + else + v = map:get_key(ct) + end if v then local n = tonumber(v) - if n > 0 then - task:insert_result(settings['symbol_bad'], n, ct) - elseif n < 0 then - task:insert_result(settings['symbol_good'], -n, ct) + if n then + if n > 0 then + if detected_different then + -- Penalize case + n = n * 1.5 + task:insert_result(settings['symbol_bad'], n, + string.format('%s:%s', ct, detected_ct)) + else + task:insert_result(settings['symbol_bad'], n, ct) + end + task:insert_result('MIME_TRACE', 0.0, + string.format("%s:%s", p:get_id(), '-')) + elseif n < 0 then + task:insert_result(settings['symbol_good'], -n, ct) + task:insert_result('MIME_TRACE', 0.0, + string.format("%s:%s", p:get_id(), '+')) + else + -- Neutral content type + task:insert_result('MIME_TRACE', 0.0, + string.format("%s:%s", p:get_id(), '~')) + end + else + logger.warnx(task, 'unknown value: "%s" for content type %s in the map', + v, ct) end else task:insert_result(settings['symbol_unknown'], 1.0, ct) + task:insert_result('MIME_TRACE', 0.0, + string.format("%s:%s", p:get_id(), '~')) end end end @@ -1071,6 +1121,13 @@ if opts then parent = id, group = 'mime_types', }) + rspamd_config:register_symbol({ + type = 'virtual,nostat', + name = 'MIME_TRACE', + parent = id, + group = 'mime_types', + score = 0, + }) else lua_util.disable_module(N, "config") end diff --git a/src/plugins/lua/neural.lua b/src/plugins/lua/neural.lua index 71ae3afb6..43d7bb127 100644 --- a/src/plugins/lua/neural.lua +++ b/src/plugins/lua/neural.lua @@ -904,6 +904,8 @@ local function check_anns(rule, _, ev_base) end local function ann_push_vector(task) + if task:has_flag('skip') then return end + if not settings.allow_local and lua_util.is_rspamc_or_controller(task) then return end local scores = task:get_metric_score() for _,rule in pairs(settings.rules) do local sid = "0" diff --git a/src/plugins/lua/ratelimit.lua b/src/plugins/lua/ratelimit.lua index 61d19966b..f2358a483 100644 --- a/src/plugins/lua/ratelimit.lua +++ b/src/plugins/lua/ratelimit.lua @@ -28,6 +28,7 @@ local lua_maps = require "lua_maps" local lua_util = require "lua_util" local rspamd_hash = require "rspamd_cryptobox_hash" local lua_selectors = require "lua_selectors" +local ts = require("tableshape").types -- A plugin that implements ratelimits using redis @@ -84,23 +85,26 @@ local bucket_check_script = [[ if last < tonumber(KEYS[2]) then local rate = tonumber(KEYS[3]) dynr = tonumber(redis.call('HGET', KEYS[1], 'dr')) / 10000.0 + if dynr == 0 then dynr = 0.0001 end rate = rate * dynr leaked = ((now - last) * rate) + if leaked > burst then leaked = burst end burst = burst - leaked redis.call('HINCRBYFLOAT', KEYS[1], 'b', -(leaked)) redis.call('HSET', KEYS[1], 'l', KEYS[2]) end + + dynb = tonumber(redis.call('HGET', KEYS[1], 'db')) / 10000.0 + if dynb == 0 then dynb = 0.0001 end + + if burst > 0 and (burst + 1) > tonumber(KEYS[4]) * dynb then + return {1, tostring(burst), tostring(dynr), tostring(dynb), tostring(leaked)} + end else burst = 0 redis.call('HSET', KEYS[1], 'b', '0') end - dynb = tonumber(redis.call('HGET', KEYS[1], 'db')) / 10000.0 - - if (burst + 1) > tonumber(KEYS[4]) * dynb then - return {1, tostring(burst), tostring(dynr), tostring(dynb), tostring(leaked)} - end - return {0, tostring(burst), tostring(dynr), tostring(dynb), tostring(leaked)} ]] local bucket_check_id @@ -132,20 +136,55 @@ local bucket_update_script = [[ return {1, 1, 1} end - local burst = tonumber(redis.call('HGET', KEYS[1], 'b')) - local db = tonumber(redis.call('HGET', KEYS[1], 'db')) / 10000 - local dr = tonumber(redis.call('HGET', KEYS[1], 'dr')) / 10000 + local dr, db = 1.0, 1.0 - if dr < tonumber(KEYS[5]) and dr > 1.0 / tonumber(KEYS[5]) then - dr = dr * tonumber(KEYS[3]) - redis.call('HSET', KEYS[1], 'dr', tostring(math.floor(dr * 10000))) + if tonumber(KEYS[5]) > 1 then + local rate_mult = tonumber(KEYS[3]) + local rate_limit = tonumber(KEYS[5]) + dr = tonumber(redis.call('HGET', KEYS[1], 'dr')) / 10000 + + if rate_mult > 1.0 and dr < rate_limit then + dr = dr * rate_mult + if dr > 0.0001 then + redis.call('HSET', KEYS[1], 'dr', tostring(math.floor(dr * 10000))) + else + redis.call('HSET', KEYS[1], 'dr', '1') + end + elseif rate_mult < 1.0 and dr > (1.0 / rate_limit) then + dr = dr * rate_mult + if dr > 0.0001 then + redis.call('HSET', KEYS[1], 'dr', tostring(math.floor(dr * 10000))) + else + redis.call('HSET', KEYS[1], 'dr', '1') + end + end end - if db < tonumber(KEYS[6]) and db > 1.0 / tonumber(KEYS[6]) then - db = db * tonumber(KEYS[4]) - redis.call('HSET', KEYS[1], 'db', tostring(math.floor(db * 10000))) + if tonumber(KEYS[6]) > 1 then + local rate_mult = tonumber(KEYS[4]) + local rate_limit = tonumber(KEYS[6]) + db = tonumber(redis.call('HGET', KEYS[1], 'db')) / 10000 + + if rate_mult > 1.0 and db < rate_limit then + db = db * rate_mult + if db > 0.0001 then + redis.call('HSET', KEYS[1], 'db', tostring(math.floor(db * 10000))) + else + redis.call('HSET', KEYS[1], 'db', '1') + end + elseif rate_mult < 1.0 and db > (1.0 / rate_limit) then + db = db * rate_mult + if db > 0.0001 then + redis.call('HSET', KEYS[1], 'db', tostring(math.floor(db * 10000))) + else + redis.call('HSET', KEYS[1], 'db', '1') + end + end end + local burst = tonumber(redis.call('HGET', KEYS[1], 'b')) + if burst < 0 then burst = 0 end + redis.call('HINCRBYFLOAT', KEYS[1], 'b', 1) redis.call('HSET', KEYS[1], 'l', KEYS[2]) redis.call('EXPIRE', KEYS[1], KEYS[7]) @@ -154,8 +193,8 @@ local bucket_update_script = [[ ]] local bucket_update_id --- message_func(task, limit_type, prefix, bucket) -local message_func = function(_, limit_type, _, _) +-- message_func(task, limit_type, prefix, bucket, limit_key) +local message_func = function(_, limit_type, _, _, _) return string.format('Ratelimit "%s" exceeded', limit_type) end @@ -229,48 +268,67 @@ local function parse_string_limit(lim, no_error) return nil end +local function str_to_rate(str) + local divider,divisor = parse_string_limit(str, false) + + if not divisor then + rspamd_logger.errx(rspamd_config, 'bad rate string: %s', str) + + return nil + end + + return divisor / divider +end + +local bucket_schema = ts.shape{ + burst = ts.number + ts.string / lua_util.dehumanize_number, + rate = ts.number + ts.string / str_to_rate +} + local function parse_limit(name, data) - local buckets = {} if type(data) == 'table' then - -- 3 cases here: + -- 2 cases here: -- * old limit in format [burst, rate] - -- * vector of strings in Andrew's string format + -- * vector of strings in Andrew's string format (removed from 1.8.2) -- * proper bucket table if #data == 2 and tonumber(data[1]) and tonumber(data[2]) then -- Old style ratelimit rspamd_logger.warnx(rspamd_config, 'old style ratelimit for %s', name) if tonumber(data[1]) > 0 and tonumber(data[2]) > 0 then - table.insert(buckets, { + return { burst = data[1], rate = data[2] - }) + } elseif data[1] ~= 0 then rspamd_logger.warnx(rspamd_config, 'invalid numbers for %s', name) else rspamd_logger.infox(rspamd_config, 'disable limit %s, burst is zero', name) end + + return nil else - -- Recursively map parse_limit and flatten the list - fun.each(function(l) - -- Flatten list - for _,b in ipairs(l) do table.insert(buckets, b) end - end, fun.map(function(d) return parse_limit(d, name) end, data)) + local parsed_bucket,err = bucket_schema:transform(data) + + if not parsed_bucket or err then + rspamd_logger.errx(rspamd_config, 'cannot parse bucket for %s: %s; original value: %s', + name, err, data) + else + return parsed_bucket + end end elseif type(data) == 'string' then local rep_rate, burst = parse_string_limit(data) - + rspamd_logger.warnx(rspamd_config, 'old style rate bucket config detected for %s: %s', + name, data) if rep_rate and burst then - table.insert(buckets, { + return { burst = burst, - rate = 1.0 / rep_rate -- reciprocal - }) + rate = burst / rep_rate -- reciprocal + } end end - -- Filter valid - return fun.totable(fun.filter(function(val) - return type(val.burst) == 'number' and type(val.rate) == 'number' - end, buckets)) + return nil end --- Check whether this addr is bounce @@ -461,8 +519,6 @@ local function limit_to_prefixes(task, k, v, prefixes) end end end - - end return n @@ -515,8 +571,8 @@ local function ratelimit_cb(task) if ret then local bucket = parse_limit(k, bd) - if bucket[1] then - prefixes[redis_key] = make_prefix(redis_key, k, bucket[1]) + if bucket then + prefixes[redis_key] = make_prefix(redis_key, k, bucket) end nprefixes = nprefixes + 1 else @@ -525,7 +581,7 @@ local function ratelimit_cb(task) end end - local function gen_check_cb(prefix, bucket, lim_name) + local function gen_check_cb(prefix, bucket, lim_name, lim_key) return function(err, data) if err then rspamd_logger.errx('cannot check limit %s: %s %s', prefix, err, data) @@ -538,25 +594,26 @@ local function ratelimit_cb(task) if data[1] == 1 then -- set symbol only and do NOT soft reject if settings.symbol then - task:insert_result(settings.symbol, 0.0, lim_name .. "(" .. prefix .. ")") + task:insert_result(settings.symbol, 0.0, + string.format('%s(%s)', lim_name, lim_key)) rspamd_logger.infox(task, - 'set_symbol_only: ratelimit "%s(%s)" exceeded, (%s / %s): %s (%s:%s dyn)', + 'set_symbol_only: ratelimit "%s(%s)" exceeded, (%s / %s): %s (%s:%s dyn); redis key: %s', lim_name, prefix, bucket.burst, bucket.rate, - data[2], data[3], data[4]) + data[2], data[3], data[4], lim_key) return -- set INFO symbol and soft reject elseif settings.info_symbol then task:insert_result(settings.info_symbol, 1.0, - lim_name .. "(" .. prefix .. ")") + string.format('%s(%s)', lim_name, lim_key)) end rspamd_logger.infox(task, - 'ratelimit "%s(%s)" exceeded, (%s / %s): %s (%s:%s dyn)', + 'ratelimit "%s(%s)" exceeded, (%s / %s): %s (%s:%s dyn); redis key: %s', lim_name, prefix, bucket.burst, bucket.rate, - data[2], data[3], data[4]) + data[2], data[3], data[4], lim_key) task:set_pre_result('soft reject', - message_func(task, lim_name, prefix, bucket), N) + message_func(task, lim_name, prefix, bucket, lim_key), N) end end end @@ -579,7 +636,7 @@ local function ratelimit_cb(task) value.name, pr, value.hash, bucket.burst, bucket.rate) lua_redis.exec_redis_script(bucket_check_id, {key = value.hash, task = task, is_write = true}, - gen_check_cb(pr, bucket, value.name), + gen_check_cb(pr, bucket, value.name, value.hash), {value.hash, tostring(now), tostring(rate), tostring(bucket.burst), tostring(settings.expire)}) end @@ -587,6 +644,8 @@ local function ratelimit_cb(task) end local function ratelimit_update_cb(task) + if task:has_flag('skip') then return end + if not settings.allow_local and lua_util.is_rspamc_or_controller(task) then return end local prefixes = task:cache_get('ratelimit_prefixes') if prefixes then @@ -596,7 +655,7 @@ local function ratelimit_update_cb(task) return end - local is_spam = not (task:get_metric_action() == 'no action') + local verdict = lua_util.get_task_verdict(task) -- Update each bucket for k, v in pairs(prefixes) do @@ -615,12 +674,15 @@ local function ratelimit_update_cb(task) end local now = rspamd_util.get_time() now = lua_util.round(now * 1000.0) -- Get milliseconds - local mult_burst = bucket.ham_factor_burst or 1.0 - local mult_rate = bucket.ham_factor_burst or 1.0 + local mult_burst = 1.0 + local mult_rate = 1.0 - if is_spam then + if verdict == 'spam' or verdict == 'junk' then mult_burst = bucket.spam_factor_burst or 1.0 mult_rate = bucket.spam_factor_rate or 1.0 + elseif verdict == 'ham' then + mult_burst = bucket.ham_factor_burst or 1.0 + mult_rate = bucket.ham_factor_rate or 1.0 end lua_redis.exec_redis_script(bucket_update_id, @@ -645,42 +707,72 @@ if opts then if opts['rates'] and type(opts['rates']) == 'table' then -- new way of setting limits fun.each(function(t, lim) - local buckets - if type(lim) == 'table' and lim.selector and lim.bucket then - local selector = lua_selectors.parse_selector(rspamd_config, lim.selector) - if not selector then - rspamd_logger.errx(rspamd_config, 'bad ratelimit selector for %s: "%s"', - t, lim.selector) - return - end - local bucket = parse_limit(t, lim.bucket) + local buckets = {} + + if type(lim) == 'table' and lim.bucket then - if not bucket then - rspamd_logger.errx(rspamd_config, 'bad ratelimit bucket for %s: "%s"', - t, lim.bucket) - return + if lim.bucket[1] then + for _,bucket in ipairs(lim.bucket) do + local b = parse_limit(t, bucket) + + if not b then + rspamd_logger.errx(rspamd_config, 'bad ratelimit bucket for %s: "%s"', + t, b) + return + end + + table.insert(buckets, b) + end + else + local bucket = parse_limit(t, lim.bucket) + + if not bucket then + rspamd_logger.errx(rspamd_config, 'bad ratelimit bucket for %s: "%s"', + t, lim.bucket) + return + end + + buckets = {bucket} end + settings.limits[t] = { - selector = selector, - buckets = bucket + buckets = buckets } + if lim.selector then + local selector = lua_selectors.parse_selector(rspamd_config, lim.selector) + if not selector then + rspamd_logger.errx(rspamd_config, 'bad ratelimit selector for %s: "%s"', + t, lim.selector) + settings.limits[t] = nil + return + end + + settings.limits[t].selector = selector + end else + rspamd_logger.warnx(rspamd_config, 'old syntax for ratelimits: %s', lim) buckets = parse_limit(t, lim) - if buckets and #buckets > 0 then + if buckets then settings.limits[t] = { - buckets = buckets + buckets = {buckets} } end end end, opts['rates']) end - local enabled_limits = fun.totable(fun.map(function(t) - return t + -- Display what's enabled + fun.each(function(s) + rspamd_logger.infox(rspamd_config, 'enabled ratelimit: %s', s) + end, fun.map(function(n,d) + return string.format('%s [%s]', n, + table.concat(fun.totable(fun.map(function(v) + return string.format('%s msgs burst, %s msgs/sec rate', + v.burst, v.rate) + end, d.buckets)), '; ') + ) end, settings.limits)) - rspamd_logger.infox(rspamd_config, - 'enabled rate buckets: [%1]', table.concat(enabled_limits, ',')) -- Ret, ret, ret: stupid legacy stuff: -- If we have a string with commas then load it as as static map diff --git a/src/plugins/lua/rbl.lua b/src/plugins/lua/rbl.lua index 53f537ac0..3bbf46ff3 100644 --- a/src/plugins/lua/rbl.lua +++ b/src/plugins/lua/rbl.lua @@ -24,6 +24,7 @@ local rspamd_logger = require 'rspamd_logger' local rspamd_util = require 'rspamd_util' local fun = require 'fun' local lua_util = require 'lua_util' +local ts = require("tableshape").types -- This plugin implements various types of RBL checks -- Documentation can be found here: @@ -32,49 +33,49 @@ local lua_util = require 'lua_util' local E = {} local N = 'rbl' -local rbls = {} -local local_exclusions = nil +local local_exclusions local default_monitored = '1.0.0.127' -local symbols = { - dkim_allow_symbol = 'R_DKIM_ALLOW', -} - -local dkim_config = rspamd_config:get_all_opt("dkim") -if (dkim_config or E).symbol_allow then - symbols['dkim_allow_symbol'] = dkim_config['symbol_allow'] -end - local function validate_dns(lstr) if lstr:match('%.%.') then + -- two dots in a row return false end for v in lstr:gmatch('[^%.]+') do if not v:match('^[%w-]+$') or v:len() > 63 or v:match('^-') or v:match('-$') then + -- too long label or weird labels return false end end return true end -local hash_alg = { - sha1 = true, - md5 = true, - sha256 = true, - sha384 = true, - sha512 = true, -} +local function maybe_make_hash(data, rule) + if rule.hash then + local h = hash.create_specific(rule.hash, data) + local s + if rule.hash_format then + if rule.hash_format == 'base32' then + s = h:base32() + elseif rule.hash_format == 'base64' then + s = h:base64() + else + s = h:hex() + end + else + s = h:hex() + end + + if rule.hash_len then + s = s:sub(1, rule.hash_len) + end -local function make_hash(data, specific) - local h - if not hash_alg[specific] then - h = hash.create(data) + return s else - h = hash.create_specific(specific, data) + return data end - return h:hex() end local function is_excluded_ip(rip) @@ -84,8 +85,8 @@ local function is_excluded_ip(rip) return false end -local function ip_to_rbl(ip, rbl) - return table.concat(ip:inversed_str_octets(), '.') .. '.' .. rbl +local function ip_to_rbl(ip) + return table.concat(ip:inversed_str_octets(), '.') end local function gen_check_rcvd_conditions(rbl, received_total) @@ -155,328 +156,305 @@ local function gen_check_rcvd_conditions(rbl, received_total) end end -local function rbl_cb (task) - local function gen_rbl_callback(rule) - return function (_, to_resolve, results, err) - if err and (err ~= 'requested record is not found' and err ~= 'no records with this name') then - rspamd_logger.errx(task, 'error looking up %s: %s', to_resolve, err) +local function rbl_dns_process(task, rbl, to_resolve, results, err) + if err and (err ~= 'requested record is not found' and + err ~= 'no records with this name') then + rspamd_logger.errx(task, 'error looking up %s: %s', to_resolve, err) + end + if not results then + lua_util.debugm(N, task, + 'DNS RESPONSE: label=%1 results=%2 error=%3 rbl=%4', + to_resolve, false, err, rbl.symbol) + return + else + lua_util.debugm(N, task, + 'DNS RESPONSE: label=%1 results=%2 error=%3 rbl=%4', + to_resolve, true, err, rbl.symbol) + end + + if rbl.returncodes == nil and rbl.symbol ~= nil then + task:insert_result(rbl.symbol, 1, to_resolve) + return + end + + for _,result in ipairs(results) do + local ipstr = result:to_string() + lua_util.debugm(N, task, '%s DNS result %s', to_resolve, ipstr) + local foundrc = false + -- Check return codes + for s,i in pairs(rbl.returncodes) do + for _,v in ipairs(i) do + if string.find(ipstr, '^' .. v .. '$') then + foundrc = true + task:insert_result(s, 1, to_resolve .. ' : ' .. ipstr) + break + end end - if not results then - lua_util.debugm(N, task, 'DNS RESPONSE: label=%1 results=%2 error=%3 rbl=%4', to_resolve, false, err, rule['rbls'][1]['symbol']) - return + end + if not foundrc then + if rbl.unknown and rbl.symbol then + task:insert_result(rbl.symbol, 1, to_resolve) else - lua_util.debugm(N, task, 'DNS RESPONSE: label=%1 results=%2 error=%3 rbl=%4', to_resolve, true, err, rule['rbls'][1]['symbol']) + rspamd_logger.errx(task, 'RBL %1 returned unknown result: %2', + rbl.rbl, ipstr) end + end + end +end - for _,rbl in ipairs(rule.rbls) do - if rbl['returncodes'] == nil and rbl['symbol'] ~= nil then - task:insert_result(rbl['symbol'], 1, to_resolve) - return - end - for _,result in pairs(results) do - local ipstr = result:to_string() - local foundrc - lua_util.debugm(N, task, '%s DNS result %s', to_resolve, ipstr) - for s,i in pairs(rbl['returncodes']) do - if type(i) == 'string' then - if string.find(ipstr, '^' .. i .. '$') then - foundrc = i - task:insert_result(s, 1, to_resolve .. ' : ' .. ipstr) - break - end - elseif type(i) == 'table' then - for _,v in pairs(i) do - if string.find(ipstr, '^' .. v .. '$') then - foundrc = v - task:insert_result(s, 1, to_resolve .. ' : ' .. ipstr) - break - end - end - end - end - if not foundrc then - if rbl['unknown'] and rbl['symbol'] then - task:insert_result(rbl['symbol'], 1, to_resolve) - else - rspamd_logger.errx(task, 'RBL %1 returned unknown result: %2', - rbl['rbl'], ipstr) - end - end - end +local function gen_rbl_callback(rule) + -- Here, we have functional approach: we form a pipeline of functions + -- f1, f2, ... fn. Each function accepts task and return boolean value + -- that allows to process pipeline further + -- Each function in the pipeline can add something to `dns_req` vector as a side effect + + local function add_dns_request(req, forced, requests_table) + if requests_table[req] then + -- Duplicate request + if forced and not requests_table[req].forced then + requests_table[req].forced = true end + else + local nreq = { + forced = forced, + n = string.format('%s.%s', + maybe_make_hash(req, rule), + rule.rbl) + } + requests_table[req] = nreq end end - local params = {} -- indexed by rbl name + local function is_alive(_, _) + if rule.monitored then + if not rule.monitored:alive() then + return false + end + end - local function gen_rbl_rule(to_resolve, rbl) - lua_util.debugm(N, task, 'DNS REQUEST: label=%1 rbl=%2', to_resolve, rbl['symbol']) - if not params[to_resolve] then - local nrule = { - to_resolve = to_resolve, - rbls = {rbl}, - forced = true, - } - nrule.callback = gen_rbl_callback(nrule) - params[to_resolve] = nrule - else - table.insert(params[to_resolve].rbls, rbl) + return true + end + + local function check_user(task, _) + if task:get_user() then + return false end - return params[to_resolve] + return true end - local havegot = { - emails = {}, - received = {} - } - local notgot = {} + local function check_local(task, _) + local ip = task:get_from_ip() - local alive_rbls = fun.filter(function(_, rbl) - if rbl.monitored then - if not rbl.monitored:alive() then - return false - end + if not ip:is_valid() then + ip = nil + end + + if ip and ip:is_local() or is_excluded_ip(ip) then + return false end return true - end, rbls) - - -- Now exclude rbls, that are disabled by configuration - local enabled_rbls = fun.filter(function(_, rbl) - if rbl['exclude_users'] then - if not havegot['user'] and not notgot['user'] then - havegot['user'] = task:get_user() - if havegot['user'] == nil then - notgot['user'] = true - end - end - if havegot['user'] ~= nil then - return false - end + end + + local function check_helo(task, requests_table) + local helo = task:get_helo() + + if not helo then + return false end - if (rbl['exclude_local'] or rbl['exclude_private_ips']) and not notgot['from'] then - if not havegot['from'] then - havegot['from'] = task:get_from_ip() - if not havegot['from']:is_valid() then - notgot['from'] = true + add_dns_request(helo, true, requests_table) + end + + local function check_dkim(task, requests_table) + local das = task:get_symbol('DKIM_TRACE') + local mime_from_domain + local ret = false + + if das and das[1] and das[1].options then + + if rule.dkim_match_from then + -- We check merely mime from + mime_from_domain = ((task:get_from('mime') or E)[1] or E).domain + if mime_from_domain then + mime_from_domain = rspamd_util.get_tld(mime_from_domain) end end - if havegot['from'] and not notgot['from'] and ((rbl['exclude_local'] and - is_excluded_ip(havegot['from'])) or (rbl['exclude_private_ips'] and - havegot['from']:is_local())) then - return false - end - end - -- Helo checks - if rbl['helo'] then - if notgot['helo'] then - return false - end - if not havegot['helo'] then - if rbl['hash'] then - havegot['helo'] = task:get_helo() - if havegot['helo'] then - havegot['helo'] = make_hash(havegot['helo'], rbl['hash']) + for _, d in ipairs(das[1].options) do + + local domain,result = d:match('^([^%:]*):([%+%-%~])$') + + -- We must ignore bad signatures, omg + if domain and result and result == '+' then + if rule.dkim_match_from then + -- We check merely mime from + local domain_tld = domain + if not rule.dkim_domainonly then + -- Adjust + domain_tld = rspamd_util.get_tld(domain) + end + + if mime_from_domain and mime_from_domain == domain_tld then + add_dns_request(domain_tld, true, requests_table) + ret = true + end else - notgot['helo'] = true - return false - end - else - havegot['helo'] = task:get_helo() - if havegot['helo'] == nil or not validate_dns(havegot['helo']) then - havegot['helo'] = nil - notgot['helo'] = true - return false + if rule.dkim_domainonly then + add_dns_request(rspamd_util.get_tld(domain), false, requests_table) + ret = true + else + add_dns_request(domain, false, requests_table) + ret = true + end end end end - elseif rbl['dkim'] then - -- DKIM checks - if notgot['dkim'] then - return false - end - if not havegot['dkim'] then - local das = task:get_symbol(symbols['dkim_allow_symbol']) - if ((das or E)[1] or E).options then - havegot['dkim'] = das[1]['options'] + end + + return ret + end + + local function check_emails(task, requests_table) + local emails = task:get_emails() + + if not emails then + return false + end + + for _,email in ipairs(emails) do + if rule.emails_domainonly then + add_dns_request(email:get_tld(), false, requests_table) + else + if rule.hash then + -- Leave @ as is + add_dns_request(string.format('%s@%s', + email:get_user(), email:get_domain()), false, requests_table) else - notgot['dkim'] = true - return false - end - end - elseif rbl['emails'] then - -- Emails checks - if notgot['emails'] then - return false - end - if #havegot['emails'] == 0 then - havegot['emails'] = task:get_emails() - if havegot['emails'] == nil then - notgot['emails'] = true - havegot['emails'] = {} - return false - end - end - elseif rbl['from'] then - if notgot['from'] then - return false - end - if not havegot['from'] then - havegot['from'] = task:get_from_ip() - if not havegot['from']:is_valid() then - notgot['from'] = true - return false - end - end - elseif rbl['received'] then - if notgot['received'] then - return false - end - if #havegot['received'] == 0 then - havegot['received'] = task:get_received_headers() - if next(havegot['received']) == nil then - notgot['received'] = true - havegot['received'] = {} - return false - end - end - elseif rbl['rdns'] then - if notgot['rdns'] then - return false - end - if not havegot['rdns'] then - havegot['rdns'] = task:get_hostname() - if havegot['rdns'] == nil or havegot['rdns'] == 'unknown' then - notgot['rdns'] = true - return false + -- Replace @ with . + add_dns_request(string.format('%s.%s', + email:get_user(), email:get_domain()), false, requests_table) end end end return true - end, alive_rbls) - - -- Now we iterate over enabled rbls and fill params - -- Helo RBLs - fun.each(function(_, rbl) - local to_resolve = havegot['helo'] .. '.' .. rbl['rbl'] - gen_rbl_rule(to_resolve, rbl) - end, - fun.filter(function(_, rbl) - if rbl['helo'] then return true end - return false - end, enabled_rbls)) + end - -- DKIM RBLs - fun.each(function(_, rbl) - for _, d in ipairs(havegot['dkim']) do - if rbl['dkim_domainonly'] then - d = rspamd_util.get_tld(d) - end - local to_resolve = d .. '.' .. rbl['rbl'] - gen_rbl_rule(to_resolve, rbl) + local function check_from(task, requests_table) + local ip = task:get_from_ip() + + if not ip or not ip:is_valid() then + return true end - end, - fun.filter(function(_, rbl) - if rbl['dkim'] then return true end - return false - end, enabled_rbls)) - - -- Emails RBLs - fun.each(function(_, rbl) - if rbl['emails'] == 'domain_only' then - local cleanList = {} - for _, email in ipairs(havegot['emails']) do - cleanList[email:get_host()] = true - end - for k in pairs(cleanList) do - local to_resolve - if rbl['hash'] then - to_resolve = make_hash(tostring(k), rbl['hash']) .. '.' .. rbl['rbl'] - else - to_resolve = k .. '.' .. rbl['rbl'] - end - gen_rbl_rule(to_resolve, rbl) - end - else - for _, email in ipairs(havegot['emails']) do - local to_resolve - if rbl['hash'] then - to_resolve = make_hash(email:get_user() .. '@' .. email:get_host(), rbl['hash']) .. '.' .. rbl['rbl'] - else - local upart = email:get_user() - if validate_dns(upart) then - to_resolve = upart .. '.' .. email:get_host() .. '.' .. rbl['rbl'] - end - end - if to_resolve then - gen_rbl_rule(to_resolve, rbl) - end - end + if (ip:get_version() == 6 and rule.ipv6) or + (ip:get_version() == 4 and rule.ipv4) then + add_dns_request(ip_to_rbl(ip), true, requests_table) end - end, - fun.filter(function(_, rbl) - if rbl['emails'] then return true end - return false - end, enabled_rbls)) - - -- RDNS lists - fun.each(function(_, rbl) - local to_resolve = havegot['rdns'] .. '.' .. rbl['rbl'] - gen_rbl_rule(to_resolve, rbl) - end, - fun.filter(function(_, rbl) - if rbl['rdns'] then return true end - return false - end, enabled_rbls)) - - -- From lists - fun.each(function(_, rbl) - if (havegot['from']:get_version() == 6 and rbl['ipv6']) or - (havegot['from']:get_version() == 4 and rbl['ipv4']) then - local to_resolve = ip_to_rbl(havegot['from'], rbl['rbl']) - gen_rbl_rule(to_resolve, rbl) - end - end, - fun.filter(function(_, rbl) - if rbl['from'] then return true end - return false - end, enabled_rbls)) - havegot['received'] = fun.filter(function(h) - return not h['flags']['artificial'] - end, havegot['received']):totable() + return true + end + + local function check_received(task, requests_table) + local received = fun.filter(function(h) + return not h['flags']['artificial'] + end, task:get_received_headers()):totable() - local received_total = #havegot['received'] - -- Received lists - fun.each(function(_, rbl) - local check_conditions = gen_check_rcvd_conditions(rbl, received_total) - for pos,rh in ipairs(havegot['received']) do + local received_total = #received + local check_conditions = gen_check_rcvd_conditions(rule, received_total) + + for pos,rh in ipairs(received) do if check_conditions(rh, pos) then - local to_resolve = ip_to_rbl(rh['real_ip'], rbl['rbl']) - local rule = gen_rbl_rule(to_resolve, rbl) - -- Disable forced for received resolving, as we have no control on - -- those headers count - rule.forced = false + add_dns_request(ip_to_rbl(rh.real_ip), false, requests_table) end end - end, - fun.filter(function(_, rbl) - if rbl['received'] then return true end - return false - end, enabled_rbls)) - local r = task:get_resolver() - for _,p in pairs(params) do - r:resolve_a({ - task = task, - name = p.to_resolve, - callback = p.callback, - forced = p.forced - }) + return true + end + + local function check_rdns(task, requests_table) + local hostname = task:get_hostname() + if hostname == nil or hostname == 'unknown' then + return false + end + + add_dns_request(hostname, true, requests_table) + + return true + end + + -- Create function pipeline depending on rbl settings + local pipeline = { + is_alive, -- generic for all + } + + if rule.exclude_users then + pipeline[#pipeline + 1] = check_user + end + + if rule.exclude_local or rule.exclude_private_ips then + pipeline[#pipeline + 1] = check_local + end + + if rule.helo then + pipeline[#pipeline + 1] = check_helo + end + + if rule.dkim then + pipeline[#pipeline + 1] = check_dkim + end + + if rule.emails then + pipeline[#pipeline + 1] = check_emails + end + + if rule.from then + pipeline[#pipeline + 1] = check_from + end + + if rule.received then + pipeline[#pipeline + 1] = check_received + end + + if rule.rdns then + pipeline[#pipeline + 1] = check_rdns + end + + return function(task) + -- DNS requests to issue (might be hashed afterwards) + local dns_req = {} + + local function rbl_dns_callback(_, to_resolve, results, err) + rbl_dns_process(task, rule, to_resolve, results, err) + end + + -- Execute functions pipeline + for _,f in ipairs(pipeline) do + if not f(task, dns_req) then + lua_util.debugm(N, task, "skip rbl check: %s; pipeline condition returned false", + rule.symbol) + return + end + end + + -- Now check all DNS requests pending and emit them + local r = task:get_resolver() + for name,p in pairs(dns_req) do + if validate_dns(p.n) then + lua_util.debugm(N, task, "rbl %s; resolve %s -> %s", + rule.symbol, name, p.n) + r:resolve_a({ + task = task, + name = p.n, + callback = rbl_dns_callback, + forced = p.forced + }) + else + rspamd_logger.warnx(task, 'cannot send invalid DNS request %s for %s', + p.n, rule.symbol) + end + end end end @@ -491,26 +469,28 @@ end -- Plugin defaults should not be changed - override these in config -- New defaults should not alter behaviour local default_defaults = { - ['default_enabled'] = {[1] = true, [2] = 'enabled'}, - ['default_ipv4'] = {[1] = true, [2] = 'ipv4'}, - ['default_ipv6'] = {[1] = false, [2] = 'ipv6'}, - ['default_received'] = {[1] = true, [2] = 'received'}, - ['default_from'] = {[1] = false, [2] = 'from'}, - ['default_unknown'] = {[1] = false, [2] = 'unknown'}, - ['default_rdns'] = {[1] = false, [2] = 'rdns'}, - ['default_helo'] = {[1] = false, [2] = 'helo'}, - ['default_dkim'] = {[1] = false, [2] = 'dkim'}, - ['default_dkim_domainonly'] = {[1] = true, [2] = 'dkim_domainonly'}, - ['default_emails'] = {[1] = false, [2] = 'emails'}, - ['default_exclude_private_ips'] = {[1] = true, [2] = 'exclude_private_ips'}, - ['default_exclude_users'] = {[1] = false, [2] = 'exclude_users'}, - ['default_exclude_local'] = {[1] = true, [2] = 'exclude_local'}, - ['default_is_whitelist'] = {[1] = false, [2] = 'is_whitelist'}, - ['default_ignore_whitelist'] = {[1] = false, [2] = 'ignore_whitelists'}, + ['default_enabled'] = true, + ['default_ipv4'] = true, + ['default_ipv6'] = true, + ['default_received'] = false, + ['default_from'] = true, + ['default_unknown'] = false, + ['default_rdns'] = false, + ['default_helo'] = false, + ['default_dkim'] = false, + ['default_dkim_domainonly'] = true, + ['default_emails'] = false, + ['default_emails_domainonly'] = false, + ['default_exclude_private_ips'] = true, + ['default_exclude_users'] = false, + ['default_exclude_local'] = true, + ['default_is_whitelist'] = false, + ['default_ignore_whitelist'] = false, } +-- Enrich with defaults for default, default_v in pairs(default_defaults) do if opts[default] == nil then - opts[default] = default_v[1] + opts[default] = default_v end end @@ -521,136 +501,175 @@ end local white_symbols = {} local black_symbols = {} -local need_dkim = false -local id = rspamd_config:register_symbol({ - type = 'callback', - callback = rbl_cb, - name = 'RBL_CALLBACK', - flags = 'empty,nice' +local rule_schema = ts.shape({ + enabled = ts.boolean:is_optional(), + disabled = ts.boolean:is_optional(), + rbl = ts.string, + symbol = ts.string:is_optional(), + returncodes = ts.map_of( + ts.string / string.upper, + ( + ts.array_of(ts.string) + (ts.string / function(s) + return { s } + end) + ) + ):is_optional(), + whitelist_exception = ( + ts.array_of(ts.string) + (ts.string / function(s) return {s} end) + ):is_optional(), + local_exclude_ip_map = ts.string:is_optional(), + hash = ts.one_of{"sha1", "sha256", "sha384", "sha512", "md5", "blake2"}:is_optional(), + hash_format = ts.one_of{"hex", "base32", "base64"}:is_optional(), + hash_len = (ts.integer + ts.string / tonumber):is_optional(), +}, { + extra_fields = ts.map_of(ts.string, ts.boolean) }) -local is_monitored = {} -local rbls_count = 0 -for key,rbl in pairs(opts['rbls']) do - (function() - if type(rbl) ~= 'table' or rbl['disabled'] then - rspamd_logger.infox(rspamd_config, 'disable rbl "%s"', key) - return - end - for default, default_v in pairs(default_defaults) do - if(rbl[default_v[2]] == nil) then - rbl[default_v[2]] = opts[default] - end - end - if not rbl['enabled'] then return end - if type(rbl['returncodes']) == 'table' then - for s,_ in pairs(rbl['returncodes']) do - if type(rspamd_config.get_api_version) ~= 'nil' then - rspamd_config:register_symbol({ - name = s, - parent = id, - type = 'virtual' - }) +local monitored_addresses = {} - if rbl['dkim'] then - need_dkim = true - end - if(rbl['is_whitelist']) then - if type(rbl['whitelist_exception']) == 'string' then - if (rbl['whitelist_exception'] ~= s) then - table.insert(white_symbols, s) - end - elseif type(rbl['whitelist_exception']) == 'table' then - local foundException = false - for _, e in pairs(rbl['whitelist_exception']) do - if e == s then - foundException = true - break - end - end - if not foundException then - table.insert(white_symbols, s) - end - else - table.insert(white_symbols, s) - end - else - if rbl['ignore_whitelists'] == false then - table.insert(black_symbols, s) - end - end - end - end - end - if not rbl['symbol'] and - ((rbl['returncodes'] and rbl['unknown']) or - (not rbl['returncodes'])) then - rbl['symbol'] = key - end - if rbl['symbol'] then +local function add_rbl(key, rbl) + if not rbl.symbol then + rbl.symbol = key:upper() + end + + local flags_tbl = {'no_squeeze'} + if rbl.is_whitelist then + flags_tbl[#flags_tbl + 1] = 'nice' + end + + if not (rbl.dkim or rbl.emails) then + flags_tbl[#flags_tbl + 1] = 'empty' + end + + local id = rspamd_config:register_symbol{ + type = 'callback', + callback = gen_rbl_callback(rbl), + name = rbl.symbol, + flags = table.concat(flags_tbl, ',') + } + + if rbl.dkim then + rspamd_config:register_dependency(rbl.symbol, 'DKIM_CHECK') + end + + if rbl.returncodes then + for s,_ in pairs(rbl['returncodes']) do rspamd_config:register_symbol({ - name = rbl['symbol'], + name = s, parent = id, type = 'virtual' }) - rbls_count = rbls_count + 1 - if rbl['dkim'] then - need_dkim = true - end - if (rbl['is_whitelist']) then - if type(rbl['whitelist_exception']) == 'string' then - if (rbl['whitelist_exception'] ~= rbl['symbol']) then - table.insert(white_symbols, rbl['symbol']) - end - elseif type(rbl['whitelist_exception']) == 'table' then - local foundException = false - for _, e in pairs(rbl['whitelist_exception']) do - if e == rbl['symbol'] then - foundException = true - break - end - end - if not foundException then - table.insert(white_symbols, rbl['symbol']) - end - else - table.insert(white_symbols, rbl['symbol']) + if rbl.is_whitelist then + if rbl.whitelist_exception then + local foundException = false + for _, e in ipairs(rbl.whitelist_exception) do + if e == s then + foundException = true + break end + end + if not foundException then + table.insert(white_symbols, s) + end + else + table.insert(white_symbols, s) + end else - if rbl['ignore_whitelists'] == false then - table.insert(black_symbols, rbl['symbol']) + if rbl.ignore_whitelist == false then + table.insert(black_symbols, s) end end end - if rbl['rbl'] then - if not rbl['disable_monitoring'] and not rbl['is_whitelist'] and not is_monitored[rbl['rbl']] then - is_monitored[rbl['rbl']] = true - rbl.monitored = rspamd_config:register_monitored(rbl['rbl'], 'dns', + end + + if not rbl.is_whitelist and rbl.ignore_whitelist == false then + table.insert(black_symbols, rbl.symbol) + end + -- Process monitored + if not rbl.disable_monitoring and not rbl.is_whitelist then + if not monitored_addresses[rbl.rbl] then + monitored_addresses[rbl.rbl] = true + rbl.monitored = rspamd_config:register_monitored(rbl['rbl'], 'dns', { rcode = 'nxdomain', - prefix = rbl['monitored_address'] or default_monitored + prefix = rbl.monitored_address or default_monitored }) + end + end +end + +for key,rbl in pairs(opts.rbls or opts.rules) do + if type(rbl) ~= 'table' or rbl.disabled == true or rbl.enabled == false then + rspamd_logger.infox(rspamd_config, 'disable rbl "%s"', key) + else + for default,_ in pairs(default_defaults) do + local rbl_opt = default:sub(#('default_') + 1) + if rbl[rbl_opt] == nil then + rbl[rbl_opt] = opts[default] end + end - rbls[key] = rbl + local res,err = rule_schema:transform(rbl) + if not res then + rspamd_logger.errx(rspamd_config, 'invalid config for %s: %s, RBL is DISABLED', + key, err) + else + add_rbl(key, res) end - end)() + end -- rbl.enabled end -if rbls_count == 0 then - lua_util.disable_module(N, "config") -end +-- We now create two symbols: +-- * RBL_CALLBACK_WHITE that depends on all symbols white +-- * RBL_CALLBACK that depends on all symbols black to participate in depends chains + +local function rbl_callback_white(task) + local found_whitelist = false + for _, w in ipairs(white_symbols) do + if task:has_symbol(w) then + lua_util.debugm(N, task,'found whitelist %s', w) + found_whitelist = true + break + end + end -for _, w in pairs(white_symbols) do - for _, b in pairs(black_symbols) do - local csymbol = 'RBL_COMPOSITE_' .. w .. '_' .. b - rspamd_config:set_metric_symbol(csymbol, 0, 'Autogenerated composite') - rspamd_config:add_composite(csymbol, w .. ' & ' .. b) + if found_whitelist then + -- Disable all symbols black + for _, b in ipairs(black_symbols) do + lua_util.debugm(N, task,'disable %s, whitelist found', b) + task:disable_symbol(b) + end end + lua_util.debugm(N, task, "finished rbl whitelists processing") +end + +local function rbl_callback_fin(task) + -- Do nothing + lua_util.debugm(N, task, "finished rbl processing") +end + +rspamd_config:register_symbol{ + type = 'callback', + callback = rbl_callback_white, + name = 'RBL_CALLBACK_WHITE', + flags = 'nice,empty,no_squeeze' +} + +rspamd_config:register_symbol{ + type = 'callback', + callback = rbl_callback_fin, + name = 'RBL_CALLBACK', + flags = 'empty,no_squeeze' +} + +for _, w in ipairs(white_symbols) do + rspamd_config:register_dependency('RBL_CALLBACK_WHITE', w) end -if need_dkim then - rspamd_config:register_dependency('RBL_CALLBACK', symbols['dkim_allow_symbol']) + +for _, b in ipairs(black_symbols) do + rspamd_config:register_dependency(b, 'RBL_CALLBACK_WHITE') + rspamd_config:register_dependency('RBL_CALLBACK', b) end diff --git a/src/plugins/lua/replies.lua b/src/plugins/lua/replies.lua index fe15211ef..234a41ca3 100644 --- a/src/plugins/lua/replies.lua +++ b/src/plugins/lua/replies.lua @@ -37,6 +37,10 @@ local settings = { score = -4, -- Default score use_auth = true, use_local = true, + cookie = nil, + cookie_key = nil, + cookie_is_pattern = false, + cookie_valid_time = '2w', -- 2 weeks by default } local N = "replies" @@ -128,16 +132,123 @@ local function replies_set(task) end end +local function replies_check_cookie(task) + local function cookie_matched(extra, ts) + local dt = task:get_date{format = 'connect', gmt = true} + + if dt < ts then + rspamd_logger.infox(task, 'ignore cookie as its date is in future') + + return + end + + if settings.cookie_valid_time then + if dt - ts > settings.cookie_valid_time then + rspamd_logger.infox(task, + 'ignore cookie as its timestamp is too old: %s (%s current time)', + ts, dt) + + return + end + end + + if extra then + task:insert_result(settings['symbol'], 1.0, + string.format('cookie:%s:%s', extra, ts)) + else + task:insert_result(settings['symbol'], 1.0, + string.format('cookie:%s', ts)) + end + if settings['action'] ~= nil then + local ip_addr = task:get_ip() + if (settings.use_auth and + task:get_user()) or + (settings.use_local and ip_addr and ip_addr:is_local()) then + rspamd_logger.infox(task, "not forcing action for local network or authorized user"); + else + task:set_pre_result(settings['action'], settings['message'], N) + end + end + end + + -- If in-reply-to header not present return + local irt = task:get_header('in-reply-to') + if irt == nil then + return + end + + local cr = require "rspamd_cryptobox" + -- Extract user part if needed + local extracted_cookie = irt:match('^%<?([^@]+)@.*$') + if not extracted_cookie then + -- Assume full message id as a cookie + extracted_cookie = irt + end + + local dec_cookie,ts = cr.decrypt_cookie(settings.cookie_key, extracted_cookie) + + if dec_cookie then + -- We have something that looks like a cookie + if settings.cookie_is_pattern then + local m = dec_cookie:match(settings.cookie) + + if m then + cookie_matched(m, ts) + end + else + -- Direct match + if dec_cookie == settings.cookie then + cookie_matched(nil, ts) + end + end + end +end + local opts = rspamd_config:get_all_opt('replies') if not (opts and type(opts) == 'table') then rspamd_logger.infox(rspamd_config, 'module is unconfigured') return end if opts then + settings = lua_util.override_defaults(settings, opts) redis_params = lua_redis.parse_redis_server('replies') if not redis_params then - rspamd_logger.infox(rspamd_config, 'no servers are specified, disabling module') - lua_util.disable_module(N, "redis") + if not (settings.cookie and settings.cookie_key) then + rspamd_logger.infox(rspamd_config, 'no servers are specified, disabling module') + lua_util.disable_module(N, "redis") + else + -- Cookies mode + -- Check key sanity: + local pattern = {'^'} + for i=1,32 do pattern[i + 1] = '[a-zA-Z0-9]' end + pattern[34] = '$' + if not settings.cookie_key:match(table.concat(pattern, '')) then + rspamd_logger.errx(rspamd_config, + 'invalid cookies key: %s, must be 32 hex digits', settings.cookie_key) + lua_util.disable_module(N, "config") + + return + end + + if settings.cookie_valid_time then + settings.cookie_valid_time = lua_util.parse_time_interval(settings.cookie_valid_time) + end + + local id = rspamd_config:register_symbol({ + name = 'REPLIES_CHECK', + type = 'prefilter,nostat', + callback = replies_check_cookie, + priority = 10, + group = "replies" + }) + rspamd_config:register_symbol({ + name = settings['symbol'], + parent = id, + type = 'virtual', + score = settings.score, + group = "replies", + }) + end else rspamd_config:register_symbol({ name = 'REPLIES_SET', @@ -161,8 +272,4 @@ if opts then group = "replies", }) end - - for k,v in pairs(opts) do - settings[k] = v - end end diff --git a/src/plugins/lua/reputation.lua b/src/plugins/lua/reputation.lua index 7831f2770..374771c9b 100644 --- a/src/plugins/lua/reputation.lua +++ b/src/plugins/lua/reputation.lua @@ -38,10 +38,9 @@ local redis_params = nil local default_expiry = 864000 -- 10 day by default local keymap_schema = ts.shape{ - ['reject'] = ts.string, - ['add header'] = ts.string, - ['rewrite subject'] = ts.string, - ['no action'] = ts.string + ['spam'] = ts.string, + ['junk'] = ts.string, + ['ham'] = ts.string, } -- Get reputation from ham/spam/probable hits @@ -109,7 +108,8 @@ local function gen_dkim_queries(task, rule) local dom,res = lpeg.match(gr, opt) if dom and res then - ret[dom] = res + local tld = rspamd_util.get_tld(dom) + ret[tld] = res end end end @@ -165,14 +165,14 @@ local function dkim_reputation_filter(task, rule) end local function dkim_reputation_idempotent(task, rule) - local action = task:get_metric_action() + local verdict = lua_util.get_task_verdict(task) local token = { } local cfg = rule.selector.config local need_set = false -- TODO: take metric score into consideration - local k = cfg.keys_map[action] + local k = cfg.keys_map[verdict] if k then token[k] = 1.0 @@ -218,10 +218,9 @@ local dkim_selector = { -- s is for spam, -- p is for probable spam keys_map = { - ['reject'] = 's', - ['add header'] = 'p', - ['rewrite subject'] = 'p', - ['no action'] = 'h' + ['spam'] = 's', + ['junk'] = 'p', + ['ham'] = 'h' }, symbol = 'DKIM_SCORE', -- symbol to be inserted lower_bound = 10, -- minimum number of messages to be scored @@ -312,14 +311,14 @@ local function url_reputation_filter(task, rule) end local function url_reputation_idempotent(task, rule) - local action = task:get_metric_action() + local verdict = lua_util.get_task_verdict(task) local token = { } local cfg = rule.selector.config local need_set = false -- TODO: take metric score into consideration - local k = cfg.keys_map[action] + local k = cfg.keys_map[verdict] if k then token[k] = 1.0 @@ -343,10 +342,9 @@ local url_selector = { -- s is for spam, -- p is for probable spam keys_map = { - ['reject'] = 's', - ['add header'] = 'p', - ['rewrite subject'] = 'p', - ['no action'] = 'h' + ['spam'] = 's', + ['junk'] = 'p', + ['ham'] = 'h' }, symbol = 'URL_SCORE', -- symbol to be inserted lower_bound = 10, -- minimum number of messages to be scored @@ -357,7 +355,7 @@ local url_selector = { outbound = true, inbound = true, }, - dependencies = {"SURBL_CALLBACK"}, + dependencies = {"SURBL_REDIRECTOR_CALLBACK"}, filter = url_reputation_filter, -- used to get scores idempotent = url_reputation_idempotent -- used to set scores } @@ -509,13 +507,13 @@ local function ip_reputation_idempotent(task, rule) end end - local action = task:get_metric_action() + local verdict = lua_util.get_task_verdict(task) local token = { } local need_set = false -- TODO: take metric score into consideration - local k = cfg.keys_map[action] + local k = cfg.keys_map[verdict] if k then token[k] = 1.0 @@ -545,10 +543,9 @@ local ip_selector = { -- s is for spam, -- p is for probable spam keys_map = { - ['reject'] = 's', - ['add header'] = 'p', - ['rewrite subject'] = 'p', - ['no action'] = 'h' + ['spam'] = 's', + ['junk'] = 'p', + ['ham'] = 'h' }, scores = { -- how each component is evaluated ['asn'] = 0.4, @@ -603,7 +600,7 @@ local function spf_reputation_filter(task, rule) end local function spf_reputation_idempotent(task, rule) - local action = task:get_metric_action() + local verdict = lua_util.get_task_verdict(task) local spf_record = task:get_mempool():get_variable('spf_record') local spf_allow = task:has_symbol('R_SPF_ALLOW') local token = { @@ -614,7 +611,7 @@ local function spf_reputation_idempotent(task, rule) if not spf_record or not spf_allow then return end -- TODO: take metric score into consideration - local k = cfg.keys_map[action] + local k = cfg.keys_map[verdict] if k then token[k] = 1.0 @@ -639,10 +636,9 @@ local spf_selector = { -- s is for spam, -- p is for probable spam keys_map = { - ['reject'] = 's', - ['add header'] = 'p', - ['rewrite subject'] = 'p', - ['no action'] = 'h' + ['spam'] = 's', + ['junk'] = 'p', + ['ham'] = 'h' }, symbol = 'SPF_SCORE', -- symbol to be inserted lower_bound = 10, -- minimum number of messages to be scored @@ -707,18 +703,20 @@ local function generic_reputation_filter(task, rule) if selector_res then if type(selector_res) == 'table' then fun.each(function(e) - lua_util.debugm(N, task, 'check generic reputation %s', e) + lua_util.debugm(N, task, 'check generic reputation (%s) %s', + rule['symbol'], e) rule.backend.get_token(task, rule, e, tokens_cb) end, selector_res) else - lua_util.debugm(N, task, 'check generic reputation %s', selector_res) + lua_util.debugm(N, task, 'check generic reputation (%s) %s', + rule['symbol'], selector_res) rule.backend.get_token(task, rule, selector_res, tokens_cb) end end end local function generic_reputation_idempotent(task, rule) - local action = task:get_metric_action() + local verdict = lua_util.get_task_verdict(task) local cfg = rule.selector.config local need_set = false local token = {} @@ -726,7 +724,7 @@ local function generic_reputation_idempotent(task, rule) local selector_res = cfg.selector(task) if not selector_res then return end - local k = cfg.keys_map[action] + local k = cfg.keys_map[verdict] if k then token[k] = 1.0 @@ -736,13 +734,13 @@ local function generic_reputation_idempotent(task, rule) if need_set then if type(selector_res) == 'table' then fun.each(function(e) - lua_util.debugm(N, task, 'set generic selector %s = %s', - e, token) + lua_util.debugm(N, task, 'set generic selector (%s) %s = %s', + rule['symbol'], e, token) rule.backend.set_token(task, rule, e, token) end, selector_res) else - lua_util.debugm(N, task, 'set generic selector %s = %s', - selector_res, token) + lua_util.debugm(N, task, 'set generic selector (%s) %s = %s', + rule['symbol'], selector_res, token) rule.backend.set_token(task, rule, selector_res, token) end end @@ -767,10 +765,9 @@ local generic_selector = { -- s is for spam, -- p is for probable spam keys_map = { - ['reject'] = 's', - ['add header'] = 'p', - ['rewrite subject'] = 'p', - ['no action'] = 'h' + ['spam'] = 's', + ['junk'] = 'p', + ['ham'] = 'h' }, lower_bound = 10, -- minimum number of messages to be scored min_score = nil, @@ -990,22 +987,22 @@ local function reputation_redis_get_token(task, rule, token, continuation_cb) values[data[i]] = ndata end end - lua_util.debugm(N, task, 'got values for key %s -> %s', - key, values) + lua_util.debugm(N, task, 'rule %s - got values for key %s -> %s', + rule['symbol'], key, values) continuation_cb(nil, key, values) else - rspamd_logger.errx(task, 'invalid type while getting reputation keys %s: %s', - key, type(data)) + rspamd_logger.errx(task, 'rule %s - invalid type while getting reputation keys %s: %s', + rule['symbol'], key, type(data)) continuation_cb("invalid type", key, nil) end elseif err then - rspamd_logger.errx(task, 'got error while getting reputation keys %s: %s', - key, err) + rspamd_logger.errx(task, 'rule %s - got error while getting reputation keys %s: %s', + rule['symbol'], key, err) continuation_cb(err, key, nil) else - rspamd_logger.errx(task, 'got error while getting reputation keys %s: %s', - key, "unknown error") + rspamd_logger.errx(task, 'rule %s - got error while getting reputation keys %s: %s', + rule['symbol'], key, "unknown error") continuation_cb("unknown error", key, nil) end end @@ -1024,8 +1021,8 @@ local function reputation_redis_set_token(task, rule, token, values, continuatio local function redis_set_cb(err, data) if err then - rspamd_logger.errx(task, 'got error while setting reputation keys %s: %s', - key, err) + rspamd_logger.errx(task, 'rule %s - got error while setting reputation keys %s: %s', + rule['symbol'], key, err) if continuation_cb then continuation_cb(err, key) end @@ -1042,8 +1039,8 @@ local function reputation_redis_set_token(task, rule, token, values, continuatio table.insert(args, k) table.insert(args, v) end - lua_util.debugm(N, task, 'set values for key %s -> %s', - key, values) + lua_util.debugm(N, task, 'rule %s - set values for key %s -> %s', + rule['symbol'], key, values) local ret = lua_redis.exec_redis_script(rule.backend.script_set, {task = task, is_write = true}, redis_set_cb, @@ -1217,7 +1214,7 @@ local function parse_rule(name, tbl) local symbol = name if tbl.symbol then - symbol = name + symbol = tbl.symbol end rule.symbol = symbol diff --git a/src/plugins/lua/settings.lua b/src/plugins/lua/settings.lua index 970062d3b..a6ce955c1 100644 --- a/src/plugins/lua/settings.lua +++ b/src/plugins/lua/settings.lua @@ -193,25 +193,40 @@ local function check_settings(task) end local function check_specific_setting(rule_name, rule, ip, client_ip, from, rcpt, - user, auth_user) + user, auth_user, hostname, matched) local res = false - if rule['authenticated'] then + if rule.authenticated then if auth_user then res = true + matched[#matched + 1] = 'authenticated' end if not res then return nil end end - if rule['ip'] then + if rule['local'] then if not ip or not ip:is_valid() then return nil end - for _, i in ipairs(rule['ip']) do + + if ip:is_local() then + matched[#matched + 1] = 'local' + res = true + else + return nil + end + end + + if rule.ip then + if not ip or not ip:is_valid() then + return nil + end + for _, i in ipairs(rule.ip) do res = check_ip_setting(i, ip) if res then + matched[#matched + 1] = 'ip' break end end @@ -220,13 +235,14 @@ local function check_settings(task) end end - if rule['client_ip'] then + if rule.client_ip then if not client_ip or not client_ip:is_valid() then return nil end - for _, i in ipairs(rule['client_ip']) do + for _, i in ipairs(rule.client_ip) do res = check_ip_setting(i, client_ip) if res then + matched[#matched + 1] = 'client_ip' break end end @@ -235,13 +251,14 @@ local function check_settings(task) end end - if rule['from'] then + if rule.from then if not from then return nil end - for _, i in ipairs(rule['from']) do + for _, i in ipairs(rule.from) do res = check_addr_setting(i, from) if res then + matched[#matched + 1] = 'from' break end end @@ -250,13 +267,15 @@ local function check_settings(task) end end - if rule['rcpt'] then + if rule.rcpt then if not rcpt then return nil end - for _, i in ipairs(rule['rcpt']) do + for _, i in ipairs(rule.rcpt) do res = check_addr_setting(i, rcpt) + if res then + matched[#matched + 1] = 'rcpt' break end end @@ -265,13 +284,14 @@ local function check_settings(task) end end - if rule['user'] then + if rule.user then if not user then return nil end - for _, i in ipairs(rule['user']) do + for _, i in ipairs(rule.user) do res = check_addr_setting(i, user) if res then + matched[#matched + 1] = 'user' break end end @@ -280,11 +300,28 @@ local function check_settings(task) end end - if rule['request_header'] then - for k, v in pairs(rule['request_header']) do + if rule.hostname then + if #hostname == 0 then + return nil + end + for _, i in ipairs(rule.hostname) do + res = check_addr_setting(i, hostname) + if res then + matched[#matched + 1] = 'hostname' + break + end + end + if not res then + return nil + end + end + + if rule.request_header then + for k, v in pairs(rule.request_header) do local h = task:get_request_header(k) res = (h and v:match(h)) if res then + matched[#matched + 1] = 'req_header: ' .. k break end end @@ -293,13 +330,14 @@ local function check_settings(task) end end - if rule['header'] then - for _, e in ipairs(rule['header']) do + if rule.header then + for _, e in ipairs(rule.header) do for k, v in pairs(e) do for _, p in ipairs(v) do local h = task:get_header(k) res = (h and p:match(h)) if res then + matched[#matched + 1] = 'header: ' .. k break end end @@ -316,21 +354,11 @@ local function check_settings(task) end end - if rule['selector'] then - local sel = selectors_cache[rule_name] - if not sel then - sel = lua_selectors.create_selector_closure(rspamd_config, rule.selector, - rule.delimiter or "") + if rule.selector then + res = fun.all(function(s) return s(task) end, rule.selector) - if sel then - selectors_cache[rule_name] = sel - end - end - - if sel then - if sel(task) then - res = true - end + if res then + matched[#matched + 1] = 'selector' end end @@ -361,6 +389,7 @@ local function check_settings(task) local from = task:get_from() local rcpt = task:get_recipients() local uname = task:get_user() + local hostname = task:get_hostname() or '' local user = {} if uname then user[1] = {} @@ -380,19 +409,23 @@ local function check_settings(task) for pri = max_pri,1,-1 do if not applied and settings[pri] then for _,s in ipairs(settings[pri]) do - local rule = check_specific_setting(s.name, s.rule, ip, client_ip, from, rcpt, user, uname) - if rule then - rspamd_logger.infox(task, "<%1> apply settings according to rule %2", - task:get_message_id(), s.name) - if rule['apply'] then - apply_settings(task, rule['apply']) + local matched = {} + local rule = check_specific_setting(s.name, s.rule, + ip, client_ip, from, rcpt, user, uname, hostname, matched) + + -- Can use xor here but more complicated for reading + if (rule and not s.rule.inverse) or (not rule and s.rule.inverse) then + rspamd_logger.infox(task, "<%s> apply settings according to rule %s (%s matched)", + task:get_message_id(), s.name, table.concat(matched, ',')) + if s.rule['apply'] then + apply_settings(task, s.rule['apply']) applied = true end - if rule['symbols'] then + if s.rule['symbols'] then -- Add symbols, specified in the settings fun.each(function(val) task:insert_result(val, 1.0) - end, rule['symbols']) + end, s.rule['symbols']) end end end @@ -556,9 +589,21 @@ local function process_settings_table(tbl) out['user'] = check_table(elt['user'], user) end end + if elt['hostname'] then + local hostname = process_addr(elt['hostname']) + if hostname then + out['hostname'] = check_table(elt['hostname'], hostname) + end + end if elt['authenticated'] then out['authenticated'] = true end + if elt['local'] then + out['local'] = true + end + if elt['inverse'] then + out['inverse'] = true + end if elt['request_header'] then local rho = {} for k, v in pairs(elt['request_header']) do @@ -602,6 +647,26 @@ local function process_settings_table(tbl) end end + if elt['selector'] then + local sel = selectors_cache[name] + if not sel then + sel = lua_selectors.create_selector_closure(rspamd_config, elt.selector, + elt.delimiter or "") + + if sel then + selectors_cache[name] = sel + end + end + + if sel then + if out.selector then + table.insert(out['selector'], sel) + else + out['selector'] = {sel} + end + end + end + -- Now we must process actions if elt['symbols'] then out['symbols'] = elt['symbols'] end if elt['id'] then diff --git a/src/plugins/lua/whitelist.lua b/src/plugins/lua/whitelist.lua index 1c8612386..b9dce612b 100644 --- a/src/plugins/lua/whitelist.lua +++ b/src/plugins/lua/whitelist.lua @@ -162,9 +162,11 @@ local function whitelist_cb(symbol, rule, task) if dkim_opts then fun.each(function(val) if val[2] == '+' then - find_domain(val[1], 'dkim_success') + local tld = rspamd_util.get_tld(val[1]) + find_domain(tld, 'dkim_success') elseif val[2] == '-' then - find_domain(val[1], 'dkim_fail') + local tld = rspamd_util.get_tld(val[1]) + find_domain(tld, 'dkim_fail') end end, fun.map(function(s) diff --git a/src/plugins/regexp.c b/src/plugins/regexp.c index 92cccc338..897bbd277 100644 --- a/src/plugins/regexp.c +++ b/src/plugins/regexp.c @@ -39,7 +39,9 @@ struct regexp_ctx { gsize max_size; }; -static void process_regexp_item (struct rspamd_task *task, void *user_data); +static void process_regexp_item (struct rspamd_task *task, + struct rspamd_symcache_item *item, + void *user_data); /* Initialization */ @@ -170,7 +172,7 @@ regexp_module_config (struct rspamd_config *cfg) res = FALSE; } else { - rspamd_symbols_cache_add_symbol (cfg->cache, + rspamd_symcache_add_symbol (cfg->cache, cur_item->symbol, 0, process_regexp_item, @@ -187,12 +189,12 @@ regexp_module_config (struct rspamd_config *cfg) cur_item->symbol = ucl_object_key (value); cur_item->lua_function = ucl_object_toclosure (value); - rspamd_symbols_cache_add_symbol (cfg->cache, - cur_item->symbol, - 0, - process_regexp_item, - cur_item, - SYMBOL_TYPE_NORMAL, -1); + rspamd_symcache_add_symbol (cfg->cache, + cur_item->symbol, + 0, + process_regexp_item, + cur_item, + SYMBOL_TYPE_NORMAL, -1); nlua ++; } else if (value->type == UCL_OBJECT) { @@ -243,7 +245,7 @@ regexp_module_config (struct rspamd_config *cfg) } if (cur_item && (is_lua || valid_expression)) { - id = rspamd_symbols_cache_add_symbol (cfg->cache, + id = rspamd_symcache_add_symbol (cfg->cache, cur_item->symbol, 0, process_regexp_item, @@ -255,8 +257,10 @@ regexp_module_config (struct rspamd_config *cfg) if (elt != NULL && ucl_object_type (elt) == UCL_USERDATA) { struct ucl_lua_funcdata *conddata; + g_assert (cur_item->symbol != NULL); conddata = ucl_object_toclosure (elt); - rspamd_symbols_cache_add_condition (cfg->cache, id, + rspamd_symcache_add_condition_delayed (cfg->cache, + cur_item->symbol, conddata->L, conddata->idx); } @@ -275,39 +279,96 @@ regexp_module_config (struct rspamd_config *cfg) elt = ucl_object_lookup (value, "score"); if (elt) { - score = ucl_object_todouble (elt); + if (ucl_object_type (elt) != UCL_FLOAT && ucl_object_type (elt) != UCL_INT) { + msg_err_config ( + "score attribute is not numeric for symbol: '%s'", + cur_item->symbol); + + res = FALSE; + } + else { + score = ucl_object_todouble (elt); + } } elt = ucl_object_lookup (value, "one_shot"); if (elt) { - if (ucl_object_toboolean (elt)) { - nshots = 1; + if (ucl_object_type (elt) != UCL_BOOLEAN) { + msg_err_config ( + "one_shot attribute is not numeric for symbol: '%s'", + cur_item->symbol); + + res = FALSE; + } + else { + if (ucl_object_toboolean (elt)) { + nshots = 1; + } } } if ((elt = ucl_object_lookup (value, "any_shot")) != NULL) { - if (ucl_object_toboolean (elt)) { - nshots = -1; + if (ucl_object_type (elt) != UCL_BOOLEAN) { + msg_err_config ( + "any_shot attribute is not numeric for symbol: '%s'", + cur_item->symbol); + + res = FALSE; + } + else { + if (ucl_object_toboolean (elt)) { + nshots = -1; + } } } if ((elt = ucl_object_lookup (value, "nshots")) != NULL) { - nshots = ucl_object_toint (elt); + if (ucl_object_type (elt) != UCL_FLOAT && ucl_object_type (elt) != UCL_INT) { + msg_err_config ( + "nshots attribute is not numeric for symbol: '%s'", + cur_item->symbol); + + res = FALSE; + } + else { + nshots = ucl_object_toint (elt); + } } elt = ucl_object_lookup (value, "one_param"); if (elt) { - if (ucl_object_toboolean (elt)) { - flags |= RSPAMD_SYMBOL_FLAG_ONEPARAM; + if (ucl_object_type (elt) != UCL_BOOLEAN) { + msg_err_config ( + "one_param attribute is not numeric for symbol: '%s'", + cur_item->symbol); + + res = FALSE; + } + else { + if (ucl_object_toboolean (elt)) { + flags |= RSPAMD_SYMBOL_FLAG_ONEPARAM; + } } } elt = ucl_object_lookup (value, "priority"); if (elt) { - priority = ucl_object_toint (elt); + if (ucl_object_type (elt) != UCL_FLOAT && ucl_object_type (elt) != UCL_INT) { + msg_err_config ( + "priority attribute is not numeric for symbol: '%s'", + cur_item->symbol); + + res = FALSE; + } + else { + priority = ucl_object_toint (elt); + } + } + else { + priority = ucl_object_get_priority (value) + 1; } rspamd_config_add_symbol (cfg, cur_item->symbol, @@ -415,7 +476,9 @@ rspamd_lua_call_expression_func (struct ucl_lua_funcdata *lua_data, static void -process_regexp_item (struct rspamd_task *task, void *user_data) +process_regexp_item (struct rspamd_task *task, + struct rspamd_symcache_item *symcache_item, + void *user_data) { struct regexp_module_item *item = user_data; gint res = FALSE; @@ -449,4 +512,6 @@ process_regexp_item (struct rspamd_task *task, void *user_data) if (res) { rspamd_task_insert_result (task, item->symbol, res, NULL); } + + rspamd_symcache_finalize_item (task, symcache_item); } diff --git a/src/plugins/spf.c b/src/plugins/spf.c index 46160878f..eedaf6c2e 100644 --- a/src/plugins/spf.c +++ b/src/plugins/spf.c @@ -45,6 +45,8 @@ #define DEFAULT_SYMBOL_NA "R_SPF_NA" #define DEFAULT_CACHE_SIZE 2048 +static const gchar *M = "rspamd spf plugin"; + struct spf_ctx { struct module_ctx ctx; const gchar *symbol_fail; @@ -62,7 +64,9 @@ struct spf_ctx { gboolean check_authed; }; -static void spf_symbol_callback (struct rspamd_task *task, void *unused); +static void spf_symbol_callback (struct rspamd_task *task, + struct rspamd_symcache_item *item, + void *unused); /* Initialization */ gint spf_module_init (struct rspamd_config *cfg, struct module_ctx **ctx); @@ -295,38 +299,38 @@ spf_module_config (struct rspamd_config *cfg) &spf_module_ctx->whitelist_ip, NULL); } - cb_id = rspamd_symbols_cache_add_symbol (cfg->cache, - spf_module_ctx->symbol_fail, - 0, - spf_symbol_callback, - NULL, - SYMBOL_TYPE_NORMAL|SYMBOL_TYPE_FINE|SYMBOL_TYPE_EMPTY, -1); - rspamd_symbols_cache_add_symbol (cfg->cache, + cb_id = rspamd_symcache_add_symbol (cfg->cache, + spf_module_ctx->symbol_fail, + 0, + spf_symbol_callback, + NULL, + SYMBOL_TYPE_NORMAL | SYMBOL_TYPE_FINE | SYMBOL_TYPE_EMPTY, -1); + rspamd_symcache_add_symbol (cfg->cache, spf_module_ctx->symbol_softfail, 0, NULL, NULL, SYMBOL_TYPE_VIRTUAL, cb_id); - rspamd_symbols_cache_add_symbol (cfg->cache, + rspamd_symcache_add_symbol (cfg->cache, spf_module_ctx->symbol_permfail, 0, NULL, NULL, SYMBOL_TYPE_VIRTUAL, cb_id); - rspamd_symbols_cache_add_symbol (cfg->cache, + rspamd_symcache_add_symbol (cfg->cache, spf_module_ctx->symbol_na, 0, NULL, NULL, SYMBOL_TYPE_VIRTUAL, cb_id); - rspamd_symbols_cache_add_symbol (cfg->cache, + rspamd_symcache_add_symbol (cfg->cache, spf_module_ctx->symbol_neutral, 0, NULL, NULL, SYMBOL_TYPE_VIRTUAL, cb_id); - rspamd_symbols_cache_add_symbol (cfg->cache, + rspamd_symcache_add_symbol (cfg->cache, spf_module_ctx->symbol_allow, 0, NULL, NULL, SYMBOL_TYPE_VIRTUAL, cb_id); - rspamd_symbols_cache_add_symbol (cfg->cache, + rspamd_symcache_add_symbol (cfg->cache, spf_module_ctx->symbol_dnsfail, 0, NULL, NULL, SYMBOL_TYPE_VIRTUAL, @@ -510,7 +514,7 @@ spf_plugin_callback (struct spf_resolved *record, struct rspamd_task *task, gpointer ud) { struct spf_resolved *l; - struct rspamd_async_watcher *w = ud; + struct rspamd_symcache_item *item = (struct rspamd_symcache_item *)ud; struct spf_ctx *spf_module_ctx = spf_get_context (task->cfg); if (record && record->na) { @@ -560,16 +564,17 @@ spf_plugin_callback (struct spf_resolved *record, struct rspamd_task *task, spf_record_unref (l); } - rspamd_session_watcher_pop (task->s, w); + rspamd_symcache_item_async_dec_check (task, item, M); } static void -spf_symbol_callback (struct rspamd_task *task, void *unused) +spf_symbol_callback (struct rspamd_task *task, + struct rspamd_symcache_item *item, + void *unused) { const gchar *domain; struct spf_resolved *l; - struct rspamd_async_watcher *w; gint *dmarc_checks; struct spf_ctx *spf_module_ctx = spf_get_context (task->cfg); @@ -591,6 +596,7 @@ spf_symbol_callback (struct rspamd_task *task, void *unused) if (rspamd_match_radix_map_addr (spf_module_ctx->whitelist_ip, task->from_addr) != NULL) { + rspamd_symcache_finalize_item (task, item); return; } @@ -598,10 +604,13 @@ spf_symbol_callback (struct rspamd_task *task, void *unused) || (!spf_module_ctx->check_local && rspamd_inet_address_is_local (task->from_addr, TRUE))) { msg_info_task ("skip SPF checks for local networks and authorized users"); + rspamd_symcache_finalize_item (task, item); + return; } domain = rspamd_spf_get_domain (task); + rspamd_symcache_item_async_inc (task, item, M); if (domain) { if ((l = @@ -612,9 +621,8 @@ spf_symbol_callback (struct rspamd_task *task, void *unused) spf_record_unref (l); } else { - w = rspamd_session_get_watcher (task->s); - if (!rspamd_spf_resolve (task, spf_plugin_callback, w)) { + if (!rspamd_spf_resolve (task, spf_plugin_callback, item)) { msg_info_task ("cannot make spf request for [%s]", task->message_id); rspamd_task_insert_result (task, @@ -623,8 +631,10 @@ spf_symbol_callback (struct rspamd_task *task, void *unused) "(SPF): spf DNS fail"); } else { - rspamd_session_watcher_push (task->s); + rspamd_symcache_item_async_inc (task, item, M); } } } + + rspamd_symcache_item_async_dec_check (task, item, M); } diff --git a/src/plugins/surbl.c b/src/plugins/surbl.c index c27e5c858..4bc17db20 100644 --- a/src/plugins/surbl.c +++ b/src/plugins/surbl.c @@ -64,6 +64,8 @@ INIT_LOG_MODULE(surbl) +static const gchar *M = "surbl"; + #define DEFAULT_SURBL_WEIGHT 10 #define DEFAULT_REDIRECTOR_READ_TIMEOUT 5.0 #define DEFAULT_SURBL_SYMBOL "SURBL_DNS" @@ -108,7 +110,7 @@ struct dns_param { struct rspamd_task *task; gchar *host_resolve; struct suffix_item *suffix; - struct rspamd_async_watcher *w; + struct rspamd_symcache_item *item; struct surbl_module_ctx *ctx; }; @@ -120,7 +122,7 @@ struct redirector_param { struct rspamd_http_connection *conn; GHashTable *tree; struct suffix_item *suffix; - struct rspamd_async_watcher *w; + struct rspamd_symcache_item *item; gint sock; guint redirector_requests; }; @@ -136,8 +138,12 @@ static const guint64 rspamd_surbl_cb_magic = 0xe09b8536f80de0d1ULL; static const gchar *rspamd_surbl_default_monitored = "facebook.com"; static const guint default_max_redirected_urls = 10; -static void surbl_test_url (struct rspamd_task *task, void *user_data); -static void surbl_test_redirector (struct rspamd_task *task, void *user_data); +static void surbl_test_url (struct rspamd_task *task, + struct rspamd_symcache_item *item, + void *user_data); +static void surbl_test_redirector (struct rspamd_task *task, + struct rspamd_symcache_item *item, + void *user_data); static void surbl_dns_callback (struct rdns_reply *reply, gpointer arg); static void surbl_dns_ip_callback (struct rdns_reply *reply, gpointer arg); static void process_dns_results (struct rspamd_task *task, @@ -605,7 +611,7 @@ register_bit_symbols (struct rspamd_config *cfg, struct suffix_item *suffix, while (g_hash_table_iter_next (&it, &k, &v)) { bit = v; - rspamd_symbols_cache_add_symbol (cfg->cache, bit->symbol, + rspamd_symcache_add_symbol (cfg->cache, bit->symbol, 0, NULL, NULL, SYMBOL_TYPE_VIRTUAL, parent_id); msg_debug_config ("bit: %d", bit->bit); @@ -614,13 +620,13 @@ register_bit_symbols (struct rspamd_config *cfg, struct suffix_item *suffix, else if (suffix->bits != NULL) { for (i = 0; i < suffix->bits->len; i++) { bit = &g_array_index (suffix->bits, struct surbl_bit_item, i); - rspamd_symbols_cache_add_symbol (cfg->cache, bit->symbol, + rspamd_symcache_add_symbol (cfg->cache, bit->symbol, 0, NULL, NULL, SYMBOL_TYPE_VIRTUAL, parent_id); } } else { - rspamd_symbols_cache_add_symbol (cfg->cache, suffix->symbol, + rspamd_symcache_add_symbol (cfg->cache, suffix->symbol, 0, NULL, NULL, SYMBOL_TYPE_VIRTUAL, parent_id); } @@ -756,10 +762,30 @@ surbl_module_parse_rule (const ucl_object_t* value, struct rspamd_config* cfg) continue; } - cb_id = rspamd_symbols_cache_add_symbol (cfg->cache, "SURBL_CALLBACK", + GString *sym = g_string_sized_new (127); + gchar *p; + + rspamd_printf_gstring (sym, "SURBL_%s", + new_suffix->suffix); + + p = sym->str; + + while (*p) { + if (*p == '.') { + *p = '_'; + } + else { + *p = g_ascii_toupper (*p); + } + + p ++; + } + + cb_id = rspamd_symcache_add_symbol (cfg->cache, sym->str, 0, surbl_test_url, new_suffix, SYMBOL_TYPE_CALLBACK, -1); - rspamd_symbols_cache_add_dependency (cfg->cache, cb_id, + rspamd_symcache_add_dependency (cfg->cache, cb_id, SURBL_REDIRECTOR_CALLBACK); + g_string_free (sym, TRUE); nrules++; new_suffix->callback_id = cb_id; cur = ucl_object_lookup (cur_rule, "bits"); @@ -889,7 +915,7 @@ surbl_module_parse_rule (const ucl_object_t* value, struct rspamd_config* cfg) if (new_suffix->symbol) { /* Register just a symbol itself */ - rspamd_symbols_cache_add_symbol (cfg->cache, + rspamd_symcache_add_symbol (cfg->cache, new_suffix->symbol, 0, NULL, NULL, SYMBOL_TYPE_VIRTUAL, cb_id); nrules++; @@ -943,7 +969,7 @@ surbl_module_config (struct rspamd_config *cfg) lua_pop (L, 1); /* Remove global function */ - (void)rspamd_symbols_cache_add_symbol (cfg->cache, SURBL_REDIRECTOR_CALLBACK, + (void) rspamd_symcache_add_symbol (cfg->cache, SURBL_REDIRECTOR_CALLBACK, 0, surbl_test_redirector, NULL, SYMBOL_TYPE_CALLBACK, -1); @@ -967,9 +993,9 @@ surbl_module_config (struct rspamd_config *cfg) rspamd_config_get_module_opt (cfg, "surbl", "redirector_symbol")) != NULL) { surbl_module_ctx->redirector_symbol = ucl_obj_tostring (value); - rspamd_symbols_cache_add_symbol (cfg->cache, - surbl_module_ctx->redirector_symbol, - 0, NULL, NULL, SYMBOL_TYPE_COMPOSITE, -1); + rspamd_symcache_add_symbol (cfg->cache, + surbl_module_ctx->redirector_symbol, + 0, NULL, NULL, SYMBOL_TYPE_COMPOSITE, -1); } else { surbl_module_ctx->redirector_symbol = NULL; @@ -988,7 +1014,7 @@ surbl_module_config (struct rspamd_config *cfg) surbl_module_ctx->use_tags = ucl_obj_toboolean (value); } else { - surbl_module_ctx->use_tags = TRUE; + surbl_module_ctx->use_tags = FALSE; } if ((value = @@ -1084,7 +1110,7 @@ surbl_module_config (struct rspamd_config *cfg) } if (cur_suffix->options & SURBL_OPTION_CHECKDKIM) { - rspamd_symbols_cache_add_dependency (cfg->cache, + rspamd_symcache_add_dependency (cfg->cache, cur_suffix->callback_id, "DKIM_TRACE"); } @@ -1319,6 +1345,7 @@ format_surbl_request (rspamd_mempool_t * pool, static void make_surbl_requests (struct rspamd_url *url, struct rspamd_task *task, + struct rspamd_symcache_item *item, struct suffix_item *suffix, gboolean forced, GHashTable *tree, struct surbl_ctx *surbl_module_ctx) @@ -1375,8 +1402,8 @@ make_surbl_requests (struct rspamd_url *url, struct rspamd_task *task, if (make_dns_request_task (task, surbl_dns_ip_callback, (void *) param, RDNS_REQUEST_A, surbl_req)) { - param->w = rspamd_session_get_watcher (task->s); - rspamd_session_watcher_push (task->s); + param->item = item; + rspamd_symcache_item_async_inc (task, item, M); } } } @@ -1402,8 +1429,8 @@ make_surbl_requests (struct rspamd_url *url, struct rspamd_task *task, if (make_dns_request_task (task, surbl_dns_callback, (void *) param, RDNS_REQUEST_A, surbl_req)) { - param->w = rspamd_session_get_watcher (task->s); - rspamd_session_watcher_push (task->s); + param->item = item; + rspamd_symcache_item_async_inc (task, item, M); } } else if (err != NULL) { @@ -1428,6 +1455,7 @@ process_dns_results (struct rspamd_task *task, gboolean got_result = FALSE; struct surbl_bit_item *bit; struct in_addr ina; + struct surbl_ctx *surbl_module_ctx = surbl_get_context (task->cfg); if (suffix->ips && g_hash_table_size (suffix->ips) > 0) { @@ -1438,7 +1466,10 @@ process_dns_results (struct rspamd_task *task, resolved_name, suffix->suffix, bit->bit); rspamd_task_insert_result (task, bit->symbol, 1, resolved_name); - rspamd_url_add_tag (uri, "surbl", bit->symbol, task->task_pool); + + if (surbl_module_ctx->use_tags) { + rspamd_url_add_tag (uri, "surbl", bit->symbol, task->task_pool); + } got_result = TRUE; } } @@ -1458,7 +1489,10 @@ process_dns_results (struct rspamd_task *task, resolved_name, suffix->suffix, bit->bit); rspamd_task_insert_result (task, bit->symbol, 1, resolved_name); - rspamd_url_add_tag (uri, "surbl", bit->symbol, task->task_pool); + + if (surbl_module_ctx->use_tags) { + rspamd_url_add_tag (uri, "surbl", bit->symbol, task->task_pool); + } } } } @@ -1469,7 +1503,10 @@ process_dns_results (struct rspamd_task *task, task->message_id, resolved_name, suffix->suffix); rspamd_task_insert_result (task, suffix->symbol, 1, resolved_name); - rspamd_url_add_tag (uri, "surbl", suffix->symbol, task->task_pool); + + if (surbl_module_ctx->use_tags) { + rspamd_url_add_tag (uri, "surbl", suffix->symbol, task->task_pool); + } } else { ina.s_addr = addr; @@ -1508,7 +1545,7 @@ surbl_dns_callback (struct rdns_reply *reply, gpointer arg) param->suffix->suffix); } - rspamd_session_watcher_pop (param->task->s, param->w); + rspamd_symcache_item_async_dec_check (param->task, param->item, M); } static void @@ -1547,7 +1584,7 @@ surbl_dns_ip_callback (struct rdns_reply *reply, gpointer arg) if (make_dns_request_task (task, surbl_dns_callback, param, RDNS_REQUEST_A, to_resolve->str)) { - rspamd_session_watcher_push_specific (task->s, param->w); + rspamd_symcache_item_async_inc (param->task, param->item, M); } g_string_free (to_resolve, TRUE); @@ -1561,7 +1598,7 @@ surbl_dns_ip_callback (struct rdns_reply *reply, gpointer arg) } - rspamd_session_watcher_pop (param->task->s, param->w); + rspamd_symcache_item_async_dec_check (param->task, param->item, M); } static void @@ -1569,6 +1606,10 @@ free_redirector_session (void *ud) { struct redirector_param *param = (struct redirector_param *)ud; + if (param->item) { + rspamd_symcache_item_async_dec_check (param->task, param->item, M); + } + rspamd_http_connection_unref (param->conn); close (param->sock); } @@ -1595,12 +1636,14 @@ surbl_redirector_finish (struct rspamd_http_connection *conn, { struct redirector_param *param = (struct redirector_param *)conn->ud; struct rspamd_task *task; + struct surbl_ctx *surbl_module_ctx; gint r, urllen; struct rspamd_url *redirected_url, *existing; const rspamd_ftok_t *hdr; gchar *urlstr; task = param->task; + surbl_module_ctx = surbl_get_context (task->cfg); if (msg->code == 200) { hdr = rspamd_http_message_find_header (msg, "Uri"); @@ -1630,8 +1673,10 @@ surbl_redirector_finish (struct rspamd_http_connection *conn, existing->count ++; } - rspamd_url_add_tag (param->url, "redirector", urlstr, - task->task_pool); + if (surbl_module_ctx->use_tags) { + rspamd_url_add_tag (param->url, "redirector", urlstr, + task->task_pool); + } } else { msg_info_surbl ("cannot parse redirector reply: %s", urlstr); @@ -1677,6 +1722,7 @@ register_redirector_call (struct rspamd_url *url, struct rspamd_task *task, msg_info_surbl ("<%s> cannot create tcp socket failed: %s", task->message_id, strerror (errno)); + return; } @@ -1700,7 +1746,14 @@ register_redirector_call (struct rspamd_url *url, struct rspamd_task *task, timeout = rspamd_mempool_alloc (task->task_pool, sizeof (struct timeval)); double_to_tv (surbl_module_ctx->read_timeout, timeout); - rspamd_session_add_event (task->s, NULL, free_redirector_session, param, g_quark_from_static_string ("surbl")); + rspamd_session_add_event (task->s, + free_redirector_session, param, + M); + param->item = rspamd_symcache_get_cur_item (task); + + if (param->item) { + rspamd_symcache_item_async_inc (param->task, param->item, M); + } rspamd_http_connection_write_message (param->conn, msg, NULL, NULL, param, s, timeout, task->ev_base); @@ -1738,6 +1791,9 @@ surbl_test_tags (struct rspamd_task *task, struct redirector_param *param, /* We know results for this URL */ DL_FOREACH (tag, cur) { + msg_info_surbl ("<%s> domain [%s] is in surbl %s (tags)", + task->message_id, + ftld, cur->data); rspamd_task_insert_result (task, cur->data, 1, ftld); } @@ -1819,6 +1875,7 @@ surbl_tree_redirector_callback (gpointer key, gpointer value, void *data) *purl = url; rspamd_lua_setclass (L, "rspamd{url}", -1); lua_pushlightuserdata (L, nparam); + rspamd_symcache_set_cur_item (task, param->item); if (lua_pcall (L, 3, 0, 0) != 0) { msg_err_task ("cannot call for redirector script: %s", @@ -1826,8 +1883,7 @@ surbl_tree_redirector_callback (gpointer key, gpointer value, void *data) lua_pop (L, 1); } else { - nparam->w = rspamd_session_get_watcher (task->s); - rspamd_session_watcher_push (task->s); + nparam->item = param->item; } } else { @@ -1851,10 +1907,16 @@ surbl_tree_url_callback (gpointer key, gpointer value, void *data) return; } + if (url->flags & RSPAMD_URL_FLAG_HTML_DISPLAYED) { + /* Skip urls that are displayed only */ + return; + } + task = param->task; surbl_module_ctx = param->ctx; - msg_debug_surbl ("check url %*s", url->urllen, url->string); + msg_debug_surbl ("check url %*s in %s", url->urllen, url->string, + param->suffix->suffix); if (surbl_module_ctx->use_tags && surbl_test_tags (param->task, param, url)) { return; @@ -1865,12 +1927,14 @@ surbl_tree_url_callback (gpointer key, gpointer value, void *data) return; } - make_surbl_requests (url, param->task, param->suffix, FALSE, + make_surbl_requests (url, param->task, param->item, param->suffix, FALSE, param->tree, surbl_module_ctx); } static void -surbl_test_url (struct rspamd_task *task, void *user_data) +surbl_test_url (struct rspamd_task *task, + struct rspamd_symcache_item *item, + void *user_data) { struct redirector_param *param; struct suffix_item *suffix = user_data; @@ -1883,6 +1947,8 @@ surbl_test_url (struct rspamd_task *task, void *user_data) if (!rspamd_monitored_alive (suffix->m)) { msg_info_surbl ("disable surbl %s as it is reported to be offline", suffix->suffix); + rspamd_symcache_finalize_item (task, item); + return; } @@ -1891,11 +1957,15 @@ surbl_test_url (struct rspamd_task *task, void *user_data) param->suffix = suffix; param->tree = g_hash_table_new (rspamd_strcase_hash, rspamd_strcase_equal); param->ctx = surbl_module_ctx; + param->item = item; + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t)g_hash_table_unref, param->tree); g_hash_table_foreach (task->urls, surbl_tree_url_callback, param); + rspamd_symcache_item_async_inc (task, item, M); + /* We also need to check and process img URLs */ if (suffix->options & SURBL_OPTION_CHECKIMAGES) { for (i = 0; i < task->text_parts->len; i ++) { @@ -1940,10 +2010,14 @@ surbl_test_url (struct rspamd_task *task, void *user_data) } } } + + rspamd_symcache_item_async_dec_check (task, item, M); } static void -surbl_test_redirector (struct rspamd_task *task, void *user_data) +surbl_test_redirector (struct rspamd_task *task, + struct rspamd_symcache_item *item, + void *user_data) { struct redirector_param *param; guint i, j; @@ -1953,14 +2027,19 @@ surbl_test_redirector (struct rspamd_task *task, void *user_data) struct surbl_ctx *surbl_module_ctx = surbl_get_context (task->cfg); if (!surbl_module_ctx->use_redirector || !surbl_module_ctx->redirector_tlds) { + rspamd_symcache_finalize_item (task, item); + return; } + rspamd_symcache_item_async_inc (task, item, M); + param = rspamd_mempool_alloc0 (task->task_pool, sizeof (*param)); param->task = task; param->suffix = NULL; param->redirector_requests = 0; param->ctx = surbl_module_ctx; + param->item = item; g_hash_table_foreach (task->urls, surbl_tree_redirector_callback, param); /* We also need to check and process img URLs */ @@ -1984,6 +2063,8 @@ surbl_test_redirector (struct rspamd_task *task, void *user_data) } } } + + rspamd_symcache_item_async_dec_check (task, item, M); } @@ -2097,12 +2178,14 @@ surbl_continue_process_handler (lua_State *L) gsize urllen; struct rspamd_url *redirected_url; gchar *urlstr; + struct surbl_ctx *surbl_module_ctx; nurl = lua_tolstring (L, 1, &urllen); param = (struct redirector_param *)lua_topointer (L, 2); if (param != NULL) { task = param->task; + surbl_module_ctx = surbl_get_context (task->cfg); if (nurl != NULL) { msg_info_surbl ("<%s> got reply from redirector: '%*s' -> '%*s'", @@ -2125,8 +2208,10 @@ surbl_continue_process_handler (lua_State *L) redirected_url->flags |= RSPAMD_URL_FLAG_REDIRECTED; } - rspamd_url_add_tag (param->url, "redirector", urlstr, - task->task_pool); + if (surbl_module_ctx->use_tags) { + rspamd_url_add_tag (param->url, "redirector", urlstr, + task->task_pool); + } } else { msg_info_surbl ("<%s> could not resolve '%*s' on redirector", @@ -2139,9 +2224,6 @@ surbl_continue_process_handler (lua_State *L) param->task->message_id, param->url->urllen, param->url->string); } - - rspamd_session_watcher_pop (task->s, param->w); - param->w = NULL; } else { return luaL_error (L, "invalid arguments"); |