From e0593fbd3b6d9c39ad7891387fc6a0faa4eea29c Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Sat, 19 Feb 2011 16:24:17 +0300 Subject: [PATCH] * Add ability to save symbols that are inside composites with '-' flag. Remove warnings from rspamc. Add new rules [1] Submitted by: Victor Ustugov [1] --- conf/lua/regexp/headers.lua | 63 ++++++++++++++++++++++++++++++++++++- rspamd.xml.sample | 22 ++++++++++++- src/client/rspamc.c | 18 +++++------ src/dns.c | 8 ++--- src/filter.c | 22 ++++++++++--- 5 files changed, 114 insertions(+), 19 deletions(-) diff --git a/conf/lua/regexp/headers.lua b/conf/lua/regexp/headers.lua index 838bca2f3..66dd912de 100644 --- a/conf/lua/regexp/headers.lua +++ b/conf/lua/regexp/headers.lua @@ -73,14 +73,63 @@ reconf['SORTED_RECIPS'] = 'is_recipients_sorted()' -- Spam string at the end of message to make statistics faults reconf['TRACKER_ID'] = '/^[a-z0-9]{6,24}[-_a-z0-9]{2,36}[a-z0-9]{6,24}\\s*\\z/isPr' + -- From that contains encoded characters while base 64 is not needed as all symbols are 7bit --- Regexp that checks that from header is encoded with base64 (search in raw headers) +-- Regexp that checks that From header is encoded with base64 (search in raw headers) local from_encoded_b64 = 'From=/\\=\\?\\S+\\?B\\?/iX' -- From contains only 7bit characters (parsed headers are used) local from_needs_mime = 'From=/[\\x00-\\x08\\x0b\\x0c\\x0e-\\x1f\\x7f-\\xff]/Hr' -- Final rule reconf['FROM_EXCESS_BASE64'] = string.format('%s & !%s', from_encoded_b64, from_needs_mime) +-- From that contains encoded characters while quoted-printable is not needed as all symbols are 7bit +-- Regexp that checks that From header is encoded with quoted-printable (search in raw headers) +local from_encoded_qp = 'From=/\\=\\?\\S+\\?Q\\?/iX' +-- Final rule +reconf['FROM_EXCESS_QP'] = string.format('%s & !%s', from_encoded_qp, from_needs_mime) + +-- To that contains encoded characters while base 64 is not needed as all symbols are 7bit +-- Regexp that checks that To header is encoded with base64 (search in raw headers) +local to_encoded_b64 = 'To=/\\=\\?\\S+\\?B\\?/iX' +-- To contains only 7bit characters (parsed headers are used) +local to_needs_mime = 'To=/[\\x00-\\x08\\x0b\\x0c\\x0e-\\x1f\\x7f-\\xff]/Hr' +-- Final rule +reconf['TO_EXCESS_BASE64'] = string.format('%s & !%s', to_encoded_b64, to_needs_mime) + +-- To that contains encoded characters while quoted-printable is not needed as all symbols are 7bit +-- Regexp that checks that To header is encoded with quoted-printable (search in raw headers) +local to_encoded_qp = 'To=/\\=\\?\\S+\\?Q\\?/iX' +-- Final rule +reconf['TO_EXCESS_QP'] = string.format('%s & !%s', to_encoded_qp, to_needs_mime) + +-- Reply-To that contains encoded characters while base 64 is not needed as all symbols are 7bit +-- Regexp that checks that Reply-To header is encoded with base64 (search in raw headers) +local replyto_encoded_b64 = 'Reply-To=/\\=\\?\\S+\\?B\\?/iX' +-- Reply-To contains only 7bit characters (parsed headers are used) +local replyto_needs_mime = 'Reply-To=/[\\x00-\\x08\\x0b\\x0c\\x0e-\\x1f\\x7f-\\xff]/Hr' +-- Final rule +reconf['REPLYTO_EXCESS_BASE64'] = string.format('%s & !%s', replyto_encoded_b64, replyto_needs_mime) + +-- Reply-To that contains encoded characters while quoted-printable is not needed as all symbols are 7bit +-- Regexp that checks that Reply-To header is encoded with quoted-printable (search in raw headers) +local replyto_encoded_qp = 'Reply-To=/\\=\\?\\S+\\?Q\\?/iX' +-- Final rule +reconf['REPLYTO_EXCESS_QP'] = string.format('%s & !%s', replyto_encoded_qp, replyto_needs_mime) + +-- Cc that contains encoded characters while base 64 is not needed as all symbols are 7bit +-- Regexp that checks that Cc header is encoded with base64 (search in raw headers) +local cc_encoded_b64 = 'Cc=/\\=\\?\\S+\\?B\\?/iX' +-- Co contains only 7bit characters (parsed headers are used) +local cc_needs_mime = 'Cc=/[\\x00-\\x08\\x0b\\x0c\\x0e-\\x1f\\x7f-\\xff]/Hr' +-- Final rule +reconf['CC_EXCESS_BASE64'] = string.format('%s & !%s', cc_encoded_b64, cc_needs_mime) + +-- Cc that contains encoded characters while quoted-printable is not needed as all symbols are 7bit +-- Regexp that checks that Cc header is encoded with quoted-printable (search in raw headers) +local cc_encoded_qp = 'Cc=/\\=\\?\\S+\\?Q\\?/iX' +-- Final rule +reconf['CC_EXCESS_QP'] = string.format('%s & !%s', cc_encoded_qp, cc_needs_mime) + -- Detect forged outlook headers -- OE X-Mailer header @@ -130,6 +179,18 @@ reconf['FORGED_OUTLOOK_TAGS'] = string.format('!%s & %s & %s & !(%s & %s & %s & yahoo_bulk, any_outlook_mua, mime_html, tag_exists_html, tag_exists_head, tag_exists_meta, tag_exists_body) +-- Detect forged The Bat! headers +-- The Bat! X-Mailer header +local thebat_mua_any = 'X-Mailer=/^\\s*The Bat!/H' +-- The Bat! common Message-ID template +local thebat_msgid_common = 'Message-ID=/^\\d+\\.\\d+\\@\\S+$/mH' +-- Correct The Bat! Message-ID template +local thebat_msgid = 'Message-ID=/^\\d+\\.(19[789]\\d|20\\d\\d)(0\\d|1[012])([012]\\d|3[01])([0-5]\\d)([0-5]\\d)([0-5]\\d)\\@\\S+/mH' +-- Summary rule for forged The Bat! Message-ID header +reconf['FORGED_MUA_THEBAT_MSGID'] = string.format('(%s) & !(%s) & (%s) & !(%s)', thebat_mua_any, thebat_msgid, thebat_msgid_common, unusable_msgid) +-- Summary rule for forged The Bat! Message-ID header with unknown template +reconf['FORGED_MUA_THEBAT_MSGID_UNKNOWN'] = string.format('(%s) & !(%s) & !(%s) & !(%s)', thebat_mua_any, thebat_msgid, thebat_msgid_common, unusable_msgid) + -- Message id validity local sane_msgid = 'Message-Id=/^[^<>\\\\ \\t\\n\\r\\x0b\\x80-\\xff]+\\@[^<>\\\\ \\t\\n\\r\\x0b\\x80-\\xff]+\\s*$/mH' local msgid_comment = 'Message-Id=/\\(.*\\)/mH' diff --git a/rspamd.xml.sample b/rspamd.xml.sample index e099e5868..f8e30277e 100644 --- a/rspamd.xml.sample +++ b/rspamd.xml.sample @@ -69,6 +69,10 @@ FORGED_MUA_OUTLOOK FORGED_MUA_OUTLOOK_MAILLIST + + FORGED_MUA_THEBAT_MSGID + + FORGED_MUA_THEBAT_MSGID_UNKNOWN FM_FAKE_HELO_VERIZON @@ -77,8 +81,24 @@ MISSING_MIMEOLE MISSING_TO + - FROM_EXCESS_BASE64 + FROM_EXCESS_BASE64 + + FROM_EXCESS_QP + + TO_EXCESS_BASE64 + + TO_EXCESS_QP + + REPLYTO_EXCESS_BASE64 + + REPLYTO_EXCESS_QP + + CC_EXCESS_BASE64 + + CC_EXCESS_QP + R_MIXED_CHARSET diff --git a/src/client/rspamc.c b/src/client/rspamc.c index ab6ba5ad7..ceb76169a 100644 --- a/src/client/rspamc.c +++ b/src/client/rspamc.c @@ -161,16 +161,16 @@ show_metric_result (gpointer key, gpointer value, gpointer ud) gboolean first; if (metric->is_skipped) { - PRINT_FUNC ("\n%s: Skipped\n", key); + PRINT_FUNC ("\n%s: Skipped\n", (const gchar *)key); } else { if (tty) { - PRINT_FUNC ("\n\033[1m%s:\033[0m %s [ %.2f / %.2f ]\n", key, + PRINT_FUNC ("\n\033[1m%s:\033[0m %s [ %.2f / %.2f ]\n", (const gchar *)key, metric->score > metric->required_score ? "True" : "False", metric->score, metric->required_score); } else { - PRINT_FUNC ("\n%s: %s [ %.2f / %.2f ]\n", key, + PRINT_FUNC ("\n%s: %s [ %.2f / %.2f ]\n", (const gchar *)key, metric->score > metric->required_score ? "True" : "False", metric->score, metric->required_score); } @@ -205,10 +205,10 @@ show_metric_result (gpointer key, gpointer value, gpointer ud) cur = g_list_first (s->options); while (cur) { if (cur->next) { - PRINT_FUNC ("%s,", cur->data); + PRINT_FUNC ("%s,", (const gchar *)cur->data); } else { - PRINT_FUNC ("%s", cur->data); + PRINT_FUNC ("%s", (const gchar *)cur->data); } cur = g_list_next (cur); } @@ -231,10 +231,10 @@ show_metric_result (gpointer key, gpointer value, gpointer ud) cur = g_list_first (s->options); while (cur) { if (cur->next) { - PRINT_FUNC ("%s,", cur->data); + PRINT_FUNC ("%s,", (const gchar *)cur->data); } else { - PRINT_FUNC ("%s)", cur->data); + PRINT_FUNC ("%s)", (const gchar *)cur->data); } cur = g_list_next (cur); } @@ -268,10 +268,10 @@ print_rspamd_result (struct rspamd_result *res) PRINT_FUNC ("\n"); while (g_hash_table_iter_next (&it, &k, &v)) { if (tty) { - PRINT_FUNC ("\033[1m%s:\033[0m %s\n", k, v); + PRINT_FUNC ("\033[1m%s:\033[0m %s\n", (const gchar *)k, (const gchar *)v); } else { - PRINT_FUNC ("%s: %s\n", k, v); + PRINT_FUNC ("%s: %s\n", (const gchar *)k, (const gchar *)v); } } PRINT_FUNC ("\n"); diff --git a/src/dns.c b/src/dns.c index 82c796c1e..a87001260 100644 --- a/src/dns.c +++ b/src/dns.c @@ -874,6 +874,10 @@ dns_parse_rr (guint8 *in, union rspamd_reply_element *elt, guint8 **pos, struct p += datalen; } else { + if (p - *pos > *remain - sizeof (guint16) * 3) { + msg_info ("stripped dns reply while reading SRV record"); + return -1; + } GET16 (elt->srv.priority); GET16 (elt->srv.weight); GET16 (elt->srv.port); @@ -894,10 +898,6 @@ dns_parse_rr (guint8 *in, union rspamd_reply_element *elt, guint8 **pos, struct return 1; } return 0; - -err: - msg_info ("incomplete RR, only %d bytes remain, packet length %d", (gint)*remain, (gint)(*pos - in)); - return -1; } static struct rspamd_dns_reply * diff --git a/src/filter.c b/src/filter.c index 93f51d763..4b8585a7c 100644 --- a/src/filter.c +++ b/src/filter.c @@ -335,6 +335,7 @@ struct composites_data { struct symbol_remove_data { struct symbol *ms; gboolean remove_weight; + gboolean remove_symbol; }; static gint @@ -364,7 +365,7 @@ composites_foreach_callback (gpointer key, gpointer value, void *data) if (expr->type == EXPR_STR) { /* Find corresponding symbol */ sym = expr->content.operand; - if (*sym == '~') { + if (*sym == '~' || *sym == '-') { sym ++; } if (g_hash_table_lookup (cd->metric_res->symbols, sym) == NULL) { @@ -412,7 +413,7 @@ composites_foreach_callback (gpointer key, gpointer value, void *data) r = rspamd_snprintf (logbuf, sizeof (logbuf), "<%s>, insert symbol %s instead of symbols: ", cd->task->message_id, key); while (s) { sym = s->data; - if (*sym == '~') { + if (*sym == '~' || *sym == '-') { ms = g_hash_table_lookup (cd->metric_res->symbols, sym + 1); } else { @@ -422,7 +423,18 @@ composites_foreach_callback (gpointer key, gpointer value, void *data) if (ms != NULL) { rd = memory_pool_alloc (cd->task->task_pool, sizeof (struct symbol_remove_data)); rd->ms = ms; - rd->remove_weight = *sym != '~'; + if (G_UNLIKELY (*sym == '~')) { + rd->remove_weight = FALSE; + rd->remove_symbol = TRUE; + } + else if (G_UNLIKELY (*sym == '-')) { + rd->remove_symbol = FALSE; + rd->remove_weight = FALSE; + } + else { + rd->remove_symbol = TRUE; + rd->remove_weight = TRUE; + } if (!g_tree_lookup (cd->symbols_to_remove, rd)) { g_tree_insert (cd->symbols_to_remove, (gpointer)ms->name, rd); } @@ -523,7 +535,9 @@ composites_remove_symbols (gpointer key, gpointer value, gpointer data) struct composites_data *cd = data; struct symbol_remove_data *rd = value; - g_hash_table_remove (cd->metric_res->symbols, key); + if (rd->remove_symbol) { + g_hash_table_remove (cd->metric_res->symbols, key); + } if (rd->remove_weight) { cd->metric_res->score -= rd->ms->score; } -- 2.39.5