From 4135a3b422a6c14d6fe7e63d1bd5b32a32595616 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Fri, 7 Aug 2015 14:09:14 +0100 Subject: Move lua rules to rules dir. --- rules/regexp/drugs.lua | 83 ++++++++ rules/regexp/fraud.lua | 74 +++++++ rules/regexp/headers.lua | 491 +++++++++++++++++++++++++++++++++++++++++++++++ rules/regexp/lotto.lua | 33 ++++ 4 files changed, 681 insertions(+) create mode 100644 rules/regexp/drugs.lua create mode 100644 rules/regexp/fraud.lua create mode 100644 rules/regexp/headers.lua create mode 100644 rules/regexp/lotto.lua (limited to 'rules/regexp') diff --git a/rules/regexp/drugs.lua b/rules/regexp/drugs.lua new file mode 100644 index 000000000..7af31cd69 --- /dev/null +++ b/rules/regexp/drugs.lua @@ -0,0 +1,83 @@ +-- Actually these regular expressions were obtained from SpamAssassin project, so they are licensed by apache license: +-- +-- Licensed to the Apache Software Foundation (ASF) under one or more +-- contributor license agreements. See the NOTICE file distributed with +-- this work for additional information regarding copyright ownership. +-- The ASF licenses this file to you under the Apache License, Version 2.0 +-- (the "License"); you may not use this file except in compliance with +-- the License. You may obtain a copy of the License at: +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- +-- Drugs spam (viagra, pills etc) +-- XXX: remove this legacy to statfile + + +local reconf = config['regexp'] + +local drugs_diet1 = '/(?:\\b|\\s)[_\\W]{0,3}p[_\\W]{0,3}h[_\\W]{0,3}[e3\\xE8-\\xEB][_\\W]{0,3}n[_\\W]{0,3}t[_\\W]{0,3}[e3\\xE8-\\xEB][_\\W]{0,3}r[_\\W]{0,3}m[_\\W]{0,3}[i1!|l\\xEC-\\xEF][_\\W]{0,3}n[_\\W]{0,3}[e3\\xE8-\\xEB][_\\W]{0,3}(?:\\b|\\s)/irP' +local drugs_diet2 = '/(?:\\b|\\s)_{0,3}[i1!|l\\xEC-\\xEF][_\\W]?o[_\\W]?n[_\\W]?[a4\\xE0-\\xE6@][_\\W]?m[_\\W]?[i1!|l\\xEC-\\xEF][_\\W]?n_{0,3}\\b/irP' +local drugs_diet3 = '/\\bbontril\\b/irP' +local drugs_diet4 = '/\\bphendimetrazine\\b/irP' +local drugs_diet5 = '/\\bdiethylpropion\\b/irP' +local drugs_diet6 = '/(?:\\b|\\s)[_\\W]{0,3}M[_\\W]{0,3}[e3\\xE8-\\xEB][_\\W]{0,3}r[_\\W]{0,3}[i1!|l\\xEC-\\xEF][_\\W]{0,3}d[_\\W]{0,3}[i1!|l\\xEC-\\xEF][_\\W]{0,3}[a4\\xE0-\\xE6@][_\\W]{0,3}(?:\\b|\\s)/irP' +local drugs_diet7 = '/\\b_{0,3}t[_\\W]?[e3\\xE8-\\xEB][_\\W]?n[_\\W]?u[_\\W]?a[_\\W]?t[_\\W]?[e3\\xE8-\\xEB]_{0,3}(?:\\b|\\s)/irP' +local drugs_diet8 = '/\\b_{0,3}d[_\\W]?[i1!|l\\xEC-\\xEF][_\\W]?d[_\\W]?r[_\\W][e3\\xE8-\\xEB[_\\W]?xx?_{0,3}\\b/irP' +local drugs_diet9 = '/\\b_{0,3}a[_\\W]?d[_\\W]?[i1!|l\\xEC-\\xEF][_\\W]?p[_\\W]?[e3\\xE8-\\xEB][_\\W]?x_{0,3}\\b/irP' +local drugs_diet10 = '/\\b_{0,3}x?x[_\\W]?[e3\\xE8-\\xEB][_\\W]?n[_\\W]?[i1!|l\\xEC-\\xEF][_\\W]?c[_\\W]?[a4\\xE0-\\xE6@][_\\W]?l_{0,3}\\b/irP' +reconf['DRUGS_DIET'] = string.format('((%s) | (%s) | (%s)) & ((%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s))', reconf['R_UNDISC_RCPT'], reconf['R_BAD_CTE_7BIT'], reconf['R_NO_SPACE_IN_FROM'], drugs_diet1, drugs_diet2, drugs_diet3, drugs_diet4, drugs_diet5, drugs_diet6, drugs_diet7, drugs_diet8, drugs_diet9, drugs_diet10) +local drugs_erectile1 = '/(?:\\b|\\s)[_\\W]{0,3}(?:\\\\\\/|V)[_\\W]{0,3}[ij1!|l\\xEC\\xED\\xEE\\xEF][_\\W]{0,3}[a40\\xE0-\\xE6@][_\\W]{0,3}[xyz]?[gj][_\\W]{0,3}r[_\\W]{0,3}[a40\\xE0-\\xE6@][_\\W]{0,3}x?[_\\W]{0,3}(?:\\b|\\s)/irP' +local drugs_erectile2 = '/\\bV(?:agira|igara|iaggra|iaegra)\\b/irP' +local drugs_erectile3 = '/(?:\\A|[\\s\\x00-\\x2f\\x3a-\\x40\\x5b-\\x60\\x7b-\\x7f])[_\\W]{0,3}C[_\\W]{0,3}[ij1!|l\\xEC\\xED\\xEE\\xEF][_\\W]{0,3}[a4\\xE0-\\xE6@][_\\W]{0,3}l?[l!|1][_\\W]{0,3}[i1!|l\\xEC-\\xEF][_\\W]{0,3}s[_\\W]{0,3}(?:\\b|\\s)/irP' +local drugs_erectile4 = '/\\bC(?:alis|ilias|ilais)\\b/irP' +local drugs_erectile5 = '/\\b_{0,3}s[_\\W]?[i1!|l\\xEC-\\xEF][_\\W]?l[_\\W]?d[_\\W]?[e3\\xE8-\\xEB][_\\W]?n[_\\W]?[a4\\xE0-\\xE6@][_\\W]?f[_\\W]?[i1!|l\\xEC-\\xEF][_\\W]?l c[_\\W]?[i1!|l\\xEC-\\xEF][_\\W]?t[_\\W]?r[_\\W]?[a4\\xE0-\\xE6@][_\\W]?t[_\\W]?[e3\\xE8-\\xEB]_{0,3}(?:\\b|\\s)/irP' +local drugs_erectile6 = '/\\b_{0,3}L[_\\W]?[e3\\xE8-\\xEB][_\\W]?(?:\\\\\\/|V)[_\\W]?[i1!|l\\xEC-\\xEF][_\\W]?t[_\\W]?r[_\\W]?[a4\\xE0-\\xE6@][_\\W]?(?:\\b|\\s)/irP' +local drugs_erectile8 = '/\\b_{0,3}T[_\\W]?[a4\\xE0-\\xE6@][_\\W]?d[_\\W]?[a4\\xE0-\\xE6@][_\\W]?l[_\\W]?[a4\\xE0-\\xE6@][_\\W]?f[_\\W]?[i1!|l\\xEC-\\xEF][_\\W]?l_{0,3}\\b/irP' +local drugs_erectile10 = '/\\b_{0,3}V[_\\W]?(?:i|\\ï\\;)[_\\W]?(?:a|\\à|\\å)\\;?[_\\W]?g[_\\W]?r[_\\W]?(?:a|\\à|\\å)\\b/irP' +local drugs_erectile11 = '/(?:\\b|\\s)_{0,3}[a4\\xE0-\\xE6@][_\\W]{0,3}p[_\\W]{0,3}c[_\\W]{0,3}[a4\\xE0-\\xE6@][_\\W]{0,3}[l!|1][_\\W]{0,3}[i1!|l\\xEC-\\xEF][_\\W]{0,3}s_{0,3}\\b/irP' +reconf['DRUGS_ERECTILE'] = string.format('((%s) | (%s) | (%s)) & ((%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s))', reconf['R_UNDISC_RCPT'], reconf['R_BAD_CTE_7BIT'], reconf['R_NO_SPACE_IN_FROM'], drugs_erectile1, drugs_erectile2, drugs_erectile3, drugs_erectile4, drugs_erectile5, drugs_erectile6, drugs_erectile8, drugs_erectile10, drugs_erectile11) +local drugs_anxiety1 = '/(?:\\b|\\s)[_\\W]{0,3}x?x[_\\W]{0,3}[a4\\xE0-\\xE6@][_\\W]{0,3}n[_\\W]{0,3}[ea4\\xE1\\xE2\\xE3@][_\\W]{0,3}xx?_{0,3}\\b/irP' +local drugs_anxiety2 = '/\\bAlprazolam\\b/irP' +local drugs_anxiety3 = '/(?:\\b|\\s)[_\\W]{0,3}(?:\\\\\\/|V)[_\\W]{0,3}[a4\\xE0-\\xE6@][_\\W]{0,3}[l|][_\\W]{0,3}[i1!|l\\xEC-\\xEF][_\\W]{0,3}[u\\xB5\\xF9-\\xFC][_\\W]{0,3}m\\b/irP' +local drugs_anxiety4 = '/\\b_{0,3}D[_\\W]?[i1!|l\\xEC-\\xEF][_\\W]?[a4\\xE0-\\xE6@][_\\W]?z[_\\W]?[ea3\\xE9\\xEA\\xEB][_\\W]?p[_\\W]?[a4\\xE0-\\xE6@][_\\W]?m_{0,3}\\b/irP' +local drugs_anxiety5 = '/(?:\\b|\\s)[a4\\xE0-\\xE6@][_\\W]?t[_\\W]?[i1!|l\\xEC-\\xEF][_\\W]?v[_\\W]?[a4\\xE0-\\xE6@][_\\W]?n_{0,3}\\b/irP' +local drugs_anxiety6 = '/\\b_{0,3}l[_\\W]?[o0\\xF2-\\xF6][_\\W]?r[_\\W]?[a4\\xE0-\\xE6@][_\\W]?z[_\\W]?[e3\\xE8-\\xEB][_\\W]?p[_\\W]?[a4\\xE0-\\xE6@][_\\W]?m_{0,3}\\b/irP' +local drugs_anxiety7 = '/\\b_{0,3}c[_\\W]?l[_\\W]?[o0\\xF2-\\xF6][_\\W]?n[_\\W]?[a4\\xE0-\\xE6@][_\\W]?z[_\\W]?e[_\\W]?p[_\\W]?[a4\\xE0-\\xE6@][_\\W]?m\\b/irP' +local drugs_anxiety8 = '/\\bklonopin\\b/irP' +local drugs_anxiety9 = '/\\brivotril\\b/irP' +reconf['DRUGS_ANXIETY'] = string.format('((%s) | (%s) | (%s)) & ((%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s))', reconf['R_UNDISC_RCPT'], reconf['R_BAD_CTE_7BIT'], reconf['R_NO_SPACE_IN_FROM'], drugs_anxiety1, drugs_anxiety2, drugs_anxiety3, drugs_anxiety4, drugs_anxiety5, drugs_anxiety6, drugs_anxiety7, drugs_anxiety8, drugs_anxiety9) +reconf['DRUGS_ANXIETY_EREC'] = string.format('(%s) & (%s)', reconf['DRUGS_ERECTILE'], reconf['DRUGS_ANXIETY']) +local drugs_pain1 = '/\\b_{0,3}h[_\\W]?y[_\\W]?d[_\\W]?r[_\\W]?[o0\\xF2-\\xF6][_\\W]?c[_\\W]?[o0\\xF2-\\xF6][_\\W]?d[_\\W]?[o0\\xF2-\\xF6][_\\W]?n[_\\W]?e_{0,3}\\b/irP' +local drugs_pain2 = '/\\b_{0,3}c[o0\\xF2-\\xF6]deine_{0,3}\\b/irP' +local drugs_pain3 = '/(?:\\b|\\s)[_\\W]{0,3}[u\\xB5\\xF9-\\xFC][_\\W]{0,3}l[_\\W]{0,3}t[_\\W]{0,3}r[_\\W]{0,3}[a4\\xE0-\\xE6@][_\\W]{0,3}m_{0,3}\\b/irP' +local drugs_pain4 = '/(?:\\b|\\s)[_\\W]{0,3}(?:\\\\\\/|V)[_\\W]{0,3}[i1!|l\\xEC-\\xEF][_\\W]{0,3}c[_\\W]{0,3}[o0\\xF2-\\xF6][_\\W]{0,3}d[_\\W]{0,3}[i1!|l\\xEC-\\xEF][_\\W]{0,3}ns?[_\\W]{0,3}(?:\\b|\\s)/irP' +local drugs_pain5 = '/\\b_{0,3}t[_\\W]?r[_\\W]?[a4\\xE0-\\xE6@][_\\W]?m[_\\W]?[a4\\xE0-\\xE6@][_\\W]?d[_\\W]?[o0\\xF2-\\xF6][_\\W]?[l!|1]_{0,3}\\b/irP' +local drugs_pain6 = '/\\b_{0,3}u[_\\W]?l[_\\W]?t[_\\W]?r[_\\W]?a[_\\W]?c[_\\W]?e[_\\W]?t_{0,3}\\b/irP' +local drugs_pain7 = '/\\b_{0,3}f[_\\W]?[i1!|l\\xEC-\\xEF][_\\W]?[o0\\xF2-\\xF6][_\\W]?r[_\\W]?[i1!|l\\xEC-\\xEF][_\\W]?c[_\\W]?[e3\\xE8-\\xEB][_\\W]?[t7]_{0,3}\\b/irP' +local drugs_pain8 = '/\\b_{0,3}c[_\\W]?[e3\\xE8-\\xEB][_\\W]?l[_\\W]?[e3\\xE8-\\xEB][_\\W]?b[_\\W]?r[_\\W]?[e3\\xE8-\\xEB][_\\W]?x_{0,3}\\b/irP' +local drugs_pain9 = '/(?:\\b|\\s)_{0,3}[i1!|l\\xEC-\\xEF]m[i1!|l\\xEC-\\xEF]tr[e3\\xE8-\\xEB]x_{0,3}\\b/irP' +local drugs_pain10 = '/(?:\\b|\\s)[_\\W]{0,3}(?:\\\\\\/|V)[_\\W]{0,3}[i1!|l\\xEC-\\xEF][_\\W]{0,3}[o0\\xF2-\\xF6][_\\W]{0,3}x[_\\W]{0,3}xx?_{0,3}\\b/irP' +local drugs_pain11 = '/\\bzebutal\\b/irP' +local drugs_pain12 = '/\\besgic plus\\b/irP' +local drugs_pain13 = '/\\bD[_\\W]?[a4\\xE0-\\xE6@][_\\W]?r[_\\W]?v[_\\W]?[o0\\xF2-\\xF6][_\\W]?n\\b/irP' +local drugs_pain14 = '/N[o0\\xF2-\\xF6]rc[o0\\xF2-\\xF6]/irP' +local drugs_pain = string.format('((%s) | (%s) | (%s)) & ((%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) || (%s) | (%s))', reconf['R_UNDISC_RCPT'], reconf['R_BAD_CTE_7BIT'], reconf['R_NO_SPACE_IN_FROM'], drugs_pain1, drugs_pain2, drugs_pain3, drugs_pain4, drugs_pain5, drugs_pain6, drugs_pain7, drugs_pain8, drugs_pain9, drugs_pain10, drugs_pain11, drugs_pain12, drugs_pain13, drugs_pain14) +local drugs_sleep1 = '/(?:\\b|\\s)[_\\W]{0,3}[a4\\xE0-\\xE6@][_\\W]{0,3}m[_\\W]{0,3}b[_\\W]{0,3}[i1!|l\\xEC-\\xEF][_\\W]{0,3}[e3\\xE8-\\xEB][_\\W]{0,3}n[_\\W]{0,3}(?:\\b|\\s)/irP' +local drugs_sleep2 = '/(?:\\b|\\s)[_\\W]{0,3}S[_\\W]{0,3}[o0\\xF2-\\xF6][_\\W]{0,3}n[_\\W]{0,3}[a4\\xE0-\\xE6@][_\\W]{0,3}t[_\\W]{0,3}[a4\\xE0-\\xE6@][_\\W]{0,3}(?:\\b|\\s)/irP' +local drugs_sleep3 = '/\\b_{0,3}R[_\\W]?[e3\\xE8-\\xEB][_\\W]?s[_\\W]?t[_\\W]?[o0\\xF2-\\xF6][_\\W]?r[_\\W]?i[_\\W]?l_{0,3}\\b/irP' +local drugs_sleep4 = '/\\b_{0,3}H[_\\W]?[a4\\xE0-\\xE6@][_\\W]?l[_\\W]?c[_\\W]?i[_\\W]?[o0\\xF2-\\xF6][_\\W]?n_{0,3}\\b/irP' +local drugs_sleep = string.format('(%s) | (%s) | (%s) | (%s)', drugs_sleep1, drugs_sleep2, drugs_sleep3, drugs_sleep4) +local drugs_muscle1 = '/(?:\\b|\\s)[_\\W]{0,3}s[_\\W]{0,3}[o0\\xF2-\\xF6][_\\W]{0,3}m[_\\W]{0,3}[a4\\xE0-\\xE6@][_\\W]{0,3}(?:\\b|\\s)/irP' +local drugs_muscle2 = '/\\b_{0,3}cycl[o0\\xF2-\\xF6]b[e3\\xE8-\\xEB]nz[a4\\xE0-\\xE6@]pr[i1!|l\\xEC-\\xEF]n[e3\\xE8-\\xEB]_{0,3}(?:\\b|\\s)/irP' +local drugs_muscle3 = '/\\b_{0,3}f[_\\W]?l[_\\W]?[e3\\xE8-\\xEB][_\\W]?x[_\\W]?[e3\\xE8-\\xEB][_\\W]?r[_\\W]?[i1!|l\\xEC-\\xEF]_{0,3}[_\\W]?l_{0,3}\\b/irP' +local drugs_muscle4 = '/\\b_{0,3}z[_\\W]?a[_\\W]?n[_\\W]?a[_\\W]?f[_\\W]?l[_\\W]?e[_\\W]?x_{0,3}\\b/irP' +local drugs_muscle5 = '/\\bskelaxin\\b/irP' +reconf['DRUGS_MUSCLE'] = string.format('((%s) | (%s) | (%s)) & ((%s) | (%s) | (%s) | (%s) | (%s))', reconf['R_UNDISC_RCPT'], reconf['R_BAD_CTE_7BIT'], reconf['R_NO_SPACE_IN_FROM'], drugs_muscle1, drugs_muscle2, drugs_muscle3, drugs_muscle4, drugs_muscle5) +reconf['DRUGS_MANYKINDS'] = string.format('((%s) | (%s) | (%s)) & ((%s) + (%s) + (%s) + (%s) + (%s) + (%s) >= 3)', reconf['R_UNDISC_RCPT'], reconf['R_BAD_CTE_7BIT'], reconf['R_NO_SPACE_IN_FROM'], reconf['DRUGS_ERECTILE'], reconf['DRUGS_DIET'], drugs_pain, drugs_sleep, reconf['DRUGS_MUSCLE'], reconf['DRUGS_ANXIETY']) + diff --git a/rules/regexp/fraud.lua b/rules/regexp/fraud.lua new file mode 100644 index 000000000..2571a8712 --- /dev/null +++ b/rules/regexp/fraud.lua @@ -0,0 +1,74 @@ +-- Actually these regular expressions were obtained from SpamAssassin project, so they are licensed by apache license: +-- +-- Licensed to the Apache Software Foundation (ASF) under one or more +-- contributor license agreements. See the NOTICE file distributed with +-- this work for additional information regarding copyright ownership. +-- The ASF licenses this file to you under the Apache License, Version 2.0 +-- (the "License"); you may not use this file except in compliance with +-- the License. You may obtain a copy of the License at: +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- +-- Fraud messages (Nigeria spam, viagra, etc) +local reconf = config['regexp'] + +local fraud_dbi = '/(?:\\bdollars?\\b|\\busd(?:ollars)?(?:[0-9]|\\b)|\\bus\\$|\\$[0-9,.]{6,}|\\$[0-9].{0,8}[mb]illion|\\$[0-9.,]{2,10} ?m|\\beuros?\\b|u[.]?s[.]? [0-9.]+ m)/irP' +local fraud_kjv = '/(?:claim|concerning) (?:the|this) money/irP' +local fraud_irj = '/(?:finance|holding|securit(?:ies|y)) (?:company|firm|storage house)/irP' +local fraud_neb = '/(?:government|bank) of nigeria/irP' +local fraud_xjr = '/(?:who was a|as a|an? honest|you being a|to any) foreigner/irP' +local fraud_dpr = '/\\b(?:(?:respond|reply) (?:urgently|immediately)|(?:urgent|immediate|earliest) (?:reply|response))\\b/irP' +local fraud_pts = '/\\b(?:ass?ass?inat(?:ed|ion)|murder(?:e?d)?|kill(?:ed|ing)\\b[^.]{0,99}\\b(?:war veterans|rebels?))\\b/irP' +local fraud_bep = '/\\b(?:bank of nigeria|central bank of|trust bank|apex bank|amalgamated bank)\\b/irP' +local fraud_tdp = '/\\b(?:business partner(?:s|ship)?|silent partner(?:s|ship)?)\\b/irP' +local fraud_gan = '/\\b(?:charles taylor|serena|abacha|gu[eйи]i|sese[- ]?seko|kabila)\\b/irP' +local fraud_irt = '/\\b(?:compliments? of the|dear friend|dear sir|yours faithfully|season\'?s greetings)\\b/irP' +local fraud_aon = '/\\b(?:confidential|private|alternate|alternative) (?:(?:e-? *)?mail)\\b/irP' +local fraud_wny = '/\\b(?:disburse?(?:ment)?|incurr?(?:ed)?|remunerr?at(?:ed?|ion)|remm?itt?(?:ed|ance|ing)?)\\b/irP' +local fraud_ipk = '/\\b(?:in|to|visit) your country\\b/irP' +local fraud_qxx = '/\\b(?:my name is|i am) (?:mrs?|engr|barrister|dr|prince(?:ss)?)[. ]/irP' +local fraud_iou = '/\\b(?:no risks?|risk-? *free|free of risks?|100% safe)\\b/irP' +local fraud_ezy = '/\\b(?:of|the) late president\\b/irP' +local fraud_mly = '/\\b(?:reply|respond)\\b[^.]{0,50}\\b(?:to|through)\\b[^.]{0,50}\\@\\b/irP' +local fraud_zfj = '/\\b(?:wife|son|brother|daughter) of the late\\b/irP' +local fraud_kdt = '/\\bU\\.?S\\.?(?:D\\.?)?\\s*(?:\\$\\s*)?(?:\\d+,\\d+,\\d+|\\d+\\.\\d+\\.\\d+|\\d+(?:\\.\\d+)?\\s*milli?on)/irP' +local fraud_ulk = '/\\baffidavits?\\b/irP' +local fraud_bgp = '/\\battached to ticket number\\b/irP' +local fraud_fbi = '/\\bdisburs/irP' +local fraud_jbu = '/\\bforeign account\\b/irP' +local fraud_yww = '/\\bfurnish you with\\b/irP' +local fraud_jyg = '/\\bgive\\s+you .{0,15}(?:fund|money|total|sum|contact|percent)\\b/irP' +local fraud_xvw = '/\\bhonest cooperation\\b/irP' +local fraud_uuy = '/\\blegitimate business(?:es)?\\b/irP' +local fraud_snt = '/\\blocate(?: .{1,20})? extended relative/irP' +local fraud_ltx = '/\\bmilli?on (?:.{1,25} thousand\\s*)?(?:(?:united states|u\\.?s\\.?) dollars|(?i:U\\.?S\\.?D?))\\b/irP' +local fraud_jnb = '/\\boperat(?:e|ing)\\b[^.]{0,99}\\b(?:for(?:ei|ie)gn|off-? ?shore|over-? ?seas?) (?:bank )?accounts?\\b/irP' +local fraud_qfy = '/\\bover-? *(?:invoiced?|cost(?:s|ing)?)\\b/irP' +local fraud_wdr = '/\\bprivate lawyer\\b/irP' +local fraud_wfc = '/\\bsecur(?:e|ing) (?:the )?(?:funds?|monies)\\b/irP' +local fraud_aum = '/\\bthe desk of\\b/irP' +local fraud_mcq = '/\\btransaction\\b.{1,30}\\b(?:magnitude|diplomatic|strict|absolute|secret|confiden(?:tial|ce)|guarantee)/irP' +local fraud_etx = '/\\byour\\b[^.]{0,99}\\b(?:contact (?:details|information)|private (?:e?[- ]?mail|telephone|tel|phone|fax))\\b/irP' +local fraud_pvn = '/as the beneficiary/irP' +local fraud_fvu = '/award notification/irP' +local fraud_ckf = '/computer ballot system/irP' +local fraud_fcw = '/fiduciary agent/irP' +local fraud_mqo = '/foreign (?:business partner|customer)/irP' +local fraud_tcc = '/foreign (?:offshore )?(?:bank|account)/irP' +local fraud_gbw = '/god gives .{1,10}second chance/irP' +local fraud_nrg = '/i am contacting you/irP' +local fraud_rlx = '/lott(?:o|ery) (?:co,?ordinator|international)/irP' +local fraud_axf = '/magnanimity/irP' +local fraud_thj = '/modalit(?:y|ies)/irP' +local fraud_yqv = '/nigerian? (?:national|government)/irP' +local fraud_yja = '/over-invoice/irP' +local fraud_ypo = '/the total sum/irP' +local fraud_uoq = '/vital documents/irP' +reconf['ADVANCE_FEE_2'] = string.format('((%s) | (%s) | (%s)) & ((%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) >= 2)', reconf['R_UNDISC_RCPT'], reconf['R_BAD_CTE_7BIT'], reconf['R_NO_SPACE_IN_FROM'], fraud_kjv, fraud_irj, fraud_neb, fraud_xjr, fraud_ezy, fraud_zfj, fraud_kdt, fraud_bgp, fraud_fbi, fraud_jbu, fraud_jyg, fraud_xvw, fraud_snt, fraud_ltx, fraud_mcq, fraud_pvn, fraud_fvu, fraud_ckf, fraud_fcw, fraud_mqo, fraud_tcc, fraud_gbw, fraud_nrg, fraud_rlx, fraud_axf, fraud_thj, fraud_yqv, fraud_yja, fraud_ypo, fraud_uoq, fraud_dbi, fraud_bep, fraud_dpr, fraud_qxx, fraud_qfy, fraud_pts, fraud_tdp, fraud_gan, fraud_ipk, fraud_aon, fraud_wny, fraud_aum, fraud_wfc, fraud_yww, fraud_ulk, fraud_iou, fraud_jnb, fraud_irt, fraud_etx, fraud_wdr, fraud_uuy, fraud_mly) +reconf['ADVANCE_FEE_3'] = string.format('((%s) | (%s) | (%s)) & ((%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) >= 3)', reconf['R_UNDISC_RCPT'], reconf['R_BAD_CTE_7BIT'], reconf['R_NO_SPACE_IN_FROM'], fraud_kjv, fraud_irj, fraud_neb, fraud_xjr, fraud_ezy, fraud_zfj, fraud_kdt, fraud_bgp, fraud_fbi, fraud_jbu, fraud_jyg, fraud_xvw, fraud_snt, fraud_ltx, fraud_mcq, fraud_pvn, fraud_fvu, fraud_ckf, fraud_fcw, fraud_mqo, fraud_tcc, fraud_gbw, fraud_nrg, fraud_rlx, fraud_axf, fraud_thj, fraud_yqv, fraud_yja, fraud_ypo, fraud_uoq, fraud_dbi, fraud_bep, fraud_dpr, fraud_qxx, fraud_qfy, fraud_pts, fraud_tdp, fraud_gan, fraud_ipk, fraud_aon, fraud_wny, fraud_aum, fraud_wfc, fraud_yww, fraud_ulk, fraud_iou, fraud_jnb, fraud_irt, fraud_etx, fraud_wdr, fraud_uuy, fraud_mly) diff --git a/rules/regexp/headers.lua b/rules/regexp/headers.lua new file mode 100644 index 000000000..f026d548b --- /dev/null +++ b/rules/regexp/headers.lua @@ -0,0 +1,491 @@ +-- Actually these regular expressions were obtained from SpamAssassin project, so they are licensed by apache license: +-- +-- Licensed to the Apache Software Foundation (ASF) under one or more +-- contributor license agreements. See the NOTICE file distributed with +-- this work for additional information regarding copyright ownership. +-- The ASF licenses this file to you under the Apache License, Version 2.0 +-- (the "License"); you may not use this file except in compliance with +-- the License. You may obtain a copy of the License at: +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- +-- Definitions of header regexps + +local reconf = config['regexp'] +local rspamd_regexp = require "rspamd_regexp" + +-- Subject needs encoding +-- Define encodings types +local subject_encoded_b64 = 'Subject=/=\\?\\S+\\?B\\?/iX' +local subject_encoded_qp = 'Subject=/=\\?\\S+\\?Q\\?/iX' +-- Define whether subject must be encoded (contains non-7bit characters) +local subject_needs_mime = 'Subject=/[\\x00-\\x08\\x0b\\x0c\\x0e-\\x1f\\x7f-\\xff]/X' +-- Final rule +reconf['SUBJECT_NEEDS_ENCODING'] = string.format('!(%s) & !(%s) & (%s)', subject_encoded_b64, subject_encoded_qp, subject_needs_mime) + +-- Detects that there is no space in From header (e.g. Some Name) +reconf['R_NO_SPACE_IN_FROM'] = 'From=/\\S<[-\\w\\.]+\\@[-\\w\\.]+>/X' + +-- Detects missing subject +local has_subject = 'header_exists(Subject)' +local empty_subject = 'Subject=/^$/' +-- Final rule +reconf['MISSING_SUBJECT'] = string.format('!(%s) | (%s)', has_subject, empty_subject) + +-- Detects bad content-transfer-encoding for text parts +-- For text parts (text/plain and text/html mainly) +local r_ctype_text = 'content_type_is_type(text)' +-- Content transfer encoding is 7bit +local r_cte_7bit = 'compare_transfer_encoding(7bit)' +-- And body contains 8bit characters +local r_body_8bit = '/[^\\x01-\\x7f]/Pr' +reconf['R_BAD_CTE_7BIT'] = string.format('(%s) & (%s) & (%s)', r_ctype_text, r_cte_7bit, r_body_8bit) + +-- Detects missing To header +reconf['MISSING_TO']= '!raw_header_exists(To)'; + +-- Detects undisclosed recipients +local undisc_rcpt = 'To=/^[-.\\w]{1,64})\\"?\\s<\\k\\@/H' + +-- Charset is missing in message +reconf['R_MISSING_CHARSET']= string.format('content_type_is_type(text) & !content_type_has_param(charset) & !%s', r_cte_7bit); + +-- Subject seems to be spam +reconf['R_SAJDING'] = 'Subject=/\\bsajding(?:om|a)?\\b/iH' + +-- Find forged Outlook MUA +-- Yahoo groups messages +local yahoo_bulk = 'Received=/from \\[\\S+\\] by \\S+\\.(?:groups|scd|dcn)\\.yahoo\\.com with NNFMP/H' +-- Outlook MUA +local outlook_mua = 'X-Mailer=/^Microsoft Outlook\\b/H' +local any_outlook_mua = 'X-Mailer=/^Microsoft Outlook\\b/H' +reconf['FORGED_OUTLOOK_HTML'] = string.format('!%s & %s & %s', yahoo_bulk, outlook_mua, 'has_only_html_part()') + +-- Recipients seems to be likely with each other (only works when recipients count is more than 5 recipients) +reconf['SUSPICIOUS_RECIPS'] = 'compare_recipients_distance(0.65)' + +-- Recipients list seems to be sorted +reconf['SORTED_RECIPS'] = 'is_recipients_sorted()' + +-- Spam string at the end of message to make statistics faults +reconf['TRACKER_ID'] = '/^[a-z0-9]{6,24}[-_a-z0-9]{2,36}[a-z0-9]{6,24}\\s*\\z/isPr' + + +-- From that contains encoded characters while base 64 is not needed as all symbols are 7bit +-- Regexp that checks that From header is encoded with base64 (search in raw headers) +local from_encoded_b64 = 'From=/\\=\\?\\S+\\?B\\?/iX' +-- From contains only 7bit characters (parsed headers are used) +local from_needs_mime = 'From=/[\\x00-\\x08\\x0b\\x0c\\x0e-\\x1f\\x7f-\\xff]/Hr' +-- Final rule +reconf['FROM_EXCESS_BASE64'] = string.format('%s & !%s', from_encoded_b64, from_needs_mime) + +-- From that contains encoded characters while quoted-printable is not needed as all symbols are 7bit +-- Regexp that checks that From header is encoded with quoted-printable (search in raw headers) +local from_encoded_qp = 'From=/\\=\\?\\S+\\?Q\\?/iX' +-- Final rule +reconf['FROM_EXCESS_QP'] = string.format('%s & !%s', from_encoded_qp, from_needs_mime) + +-- To that contains encoded characters while base 64 is not needed as all symbols are 7bit +-- Regexp that checks that To header is encoded with base64 (search in raw headers) +local to_encoded_b64 = 'To=/\\=\\?\\S+\\?B\\?/iX' +-- To contains only 7bit characters (parsed headers are used) +local to_needs_mime = 'To=/[\\x00-\\x08\\x0b\\x0c\\x0e-\\x1f\\x7f-\\xff]/Hr' +-- Final rule +reconf['TO_EXCESS_BASE64'] = string.format('%s & !%s', to_encoded_b64, to_needs_mime) + +-- To that contains encoded characters while quoted-printable is not needed as all symbols are 7bit +-- Regexp that checks that To header is encoded with quoted-printable (search in raw headers) +local to_encoded_qp = 'To=/\\=\\?\\S+\\?Q\\?/iX' +-- Final rule +reconf['TO_EXCESS_QP'] = string.format('%s & !%s', to_encoded_qp, to_needs_mime) + +-- Reply-To that contains encoded characters while base 64 is not needed as all symbols are 7bit +-- Regexp that checks that Reply-To header is encoded with base64 (search in raw headers) +local replyto_encoded_b64 = 'Reply-To=/\\=\\?\\S+\\?B\\?/iX' +-- Reply-To contains only 7bit characters (parsed headers are used) +local replyto_needs_mime = 'Reply-To=/[\\x00-\\x08\\x0b\\x0c\\x0e-\\x1f\\x7f-\\xff]/Hr' +-- Final rule +reconf['REPLYTO_EXCESS_BASE64'] = string.format('%s & !%s', replyto_encoded_b64, replyto_needs_mime) + +-- Reply-To that contains encoded characters while quoted-printable is not needed as all symbols are 7bit +-- Regexp that checks that Reply-To header is encoded with quoted-printable (search in raw headers) +local replyto_encoded_qp = 'Reply-To=/\\=\\?\\S+\\?Q\\?/iX' +-- Final rule +reconf['REPLYTO_EXCESS_QP'] = string.format('%s & !%s', replyto_encoded_qp, replyto_needs_mime) + +-- Cc that contains encoded characters while base 64 is not needed as all symbols are 7bit +-- Regexp that checks that Cc header is encoded with base64 (search in raw headers) +local cc_encoded_b64 = 'Cc=/\\=\\?\\S+\\?B\\?/iX' +-- Co contains only 7bit characters (parsed headers are used) +local cc_needs_mime = 'Cc=/[\\x00-\\x08\\x0b\\x0c\\x0e-\\x1f\\x7f-\\xff]/Hr' +-- Final rule +reconf['CC_EXCESS_BASE64'] = string.format('%s & !%s', cc_encoded_b64, cc_needs_mime) + +-- Cc that contains encoded characters while quoted-printable is not needed as all symbols are 7bit +-- Regexp that checks that Cc header is encoded with quoted-printable (search in raw headers) +local cc_encoded_qp = 'Cc=/\\=\\?\\S+\\?Q\\?/iX' +-- Final rule +reconf['CC_EXCESS_QP'] = string.format('%s & !%s', cc_encoded_qp, cc_needs_mime) + + +-- Detect forged outlook headers +-- OE X-Mailer header +local oe_mua = 'X-Mailer=/\\bOutlook Express [456]\\./H' +-- OE Message ID format +local oe_msgid_1 = 'Message-Id=/^?$/mH' +local oe_msgid_2 = 'Message-Id=/^?$/H' +-- EZLM remail of message +local lyris_ezml_remailer = 'List-Unsubscribe=/$/H' +-- Header of wacky sendmail +local wacky_sendmail_version = 'Received=/\\/CWT\\/DCE\\)/H' +-- Iplanet received header +local iplanet_messaging_server = 'Received=/iPlanet Messaging Server/H' +-- Hotmail message id +local hotmail_baydav_msgid = 'Message-Id=/^$/H' +-- Sympatico message id +local sympatico_msgid = 'Message-Id=/^?$/H' +-- Mailman message id +local mailman_msgid = 'Message-ID=/^$/H' +-- Message id seems to be forged +local unusable_msgid = string.format('(%s | %s | %s | %s | %s | %s)', + lyris_ezml_remailer, wacky_sendmail_version, iplanet_messaging_server, hotmail_baydav_msgid, sympatico_msgid, mailman_msgid) +-- Outlook express data seems to be forged +local forged_oe = string.format('(%s & !%s & !%s & !%s)', oe_mua, oe_msgid_1, oe_msgid_2, unusable_msgid) +-- Outlook specific headers +local outlook_dollars_mua = 'X-Mailer=/^Microsoft Outlook(?: 8| CWS, Build 9|, Build 10)\\./H' +local outlook_dollars_other = 'Message-Id=/^?/H' +local vista_msgid = 'Message-Id=/^?$/H' +local ims_msgid = 'Message-Id=/^?$/H' +-- Forged outlook headers +local forged_outlook_dollars = string.format('(%s & !%s & !%s & !%s & !%s & !%s)', + outlook_dollars_mua, oe_msgid_2, outlook_dollars_other, vista_msgid, ims_msgid, unusable_msgid) +-- Outlook versions that should be excluded from summary rule +local fmo_excl_o3416 = 'X-Mailer=/^Microsoft Outlook, Build 10.0.3416$/H' +local fmo_excl_oe3790 = 'X-Mailer=/^Microsoft Outlook Express 6.00.3790.3959$/H' +-- Summary rule for forged outlook +reconf['FORGED_MUA_OUTLOOK'] = string.format('(%s | %s) & !%s & !%s & !%s', + forged_oe, forged_outlook_dollars, fmo_excl_o3416, fmo_excl_oe3790, vista_msgid) + +-- HTML outlook signs +local mime_html = 'content_type_is_type(text) & content_type_is_subtype(/.?html/)' +local tag_exists_html = 'has_html_tag(html)' +local tag_exists_head = 'has_html_tag(head)' +local tag_exists_meta = 'has_html_tag(meta)' +local tag_exists_body = 'has_html_tag(body)' +reconf['FORGED_OUTLOOK_TAGS'] = string.format('!%s & %s & %s & !(%s & %s & %s & %s)', + yahoo_bulk, any_outlook_mua, mime_html, tag_exists_html, tag_exists_head, + tag_exists_meta, tag_exists_body) + +-- Forged OE/MSO boundary +reconf['SUSPICIOUS_BOUNDARY'] = 'Content-Type=/^\\s*multipart.+boundary="----=_NextPart_000_[A-Z\\d]{4}_(00EBFFA4|0102FFA4|32C6FFA4|3302FFA4)\\.[A-Z\\d]{8}"[\\r\\n]*$/siX' +-- Forged OE/MSO boundary +reconf['SUSPICIOUS_BOUNDARY2'] = 'Content-Type=/^\\s*multipart.+boundary="----=_NextPart_000_[A-Z\\d]{4}_(01C6527E)\\.[A-Z\\d]{8}"[\\r\\n]*$/siX' +-- Forged OE/MSO boundary +reconf['SUSPICIOUS_BOUNDARY3'] = 'Content-Type=/^\\s*multipart.+boundary="-----000-00\\d\\d-01C[\\dA-F]{5}-[\\dA-F]{8}"[\\r\\n]*$/siX' +-- Forged OE/MSO boundary +local suspicious_boundary_01C4 = 'Content-Type=/^\\s*multipart.+boundary="----=_NextPart_000_[A-Z\\d]{4}_01C4[\\dA-F]{4}\\.[A-Z\\d]{8}"[\\r\\n]*$/siX' +local suspicious_boundary_01C4_date = 'Date=/^\\s*\\w\\w\\w,\\s+\\d+\\s+\\w\\w\\w 20(0[56789]|1\\d)/' +reconf['SUSPICIOUS_BOUNDARY4'] = string.format('(%s) & (%s)', suspicious_boundary_01C4, suspicious_boundary_01C4_date) + +-- Detect forged The Bat! headers +-- The Bat! X-Mailer header +local thebat_mua_any = 'X-Mailer=/^\\s*The Bat!/H' +-- The Bat! common Message-ID template +local thebat_msgid_common = 'Message-ID=/^?$/mH' +-- Correct The Bat! Message-ID template +local thebat_msgid = 'Message-ID=/^?/mH' +-- Summary rule for forged The Bat! Message-ID header +reconf['FORGED_MUA_THEBAT_MSGID'] = string.format('(%s) & !(%s) & (%s) & !(%s)', thebat_mua_any, thebat_msgid, thebat_msgid_common, unusable_msgid) +-- Summary rule for forged The Bat! Message-ID header with unknown template +reconf['FORGED_MUA_THEBAT_MSGID_UNKNOWN'] = string.format('(%s) & !(%s) & !(%s) & !(%s)', thebat_mua_any, thebat_msgid, thebat_msgid_common, unusable_msgid) + + +-- Detect forged KMail headers +-- KMail User-Agent header +local kmail_mua = 'User-Agent=/^\\s*KMail\\/1\\.\\d+\\.\\d+/H' +-- KMail common Message-ID template +local kmail_msgid_common = 'Message-Id=/^?$/mH' +function kmail_msgid (task) + local regexp_text = '<(\\S+)>\\|(19[789]\\d|20\\d\\d)(0\\d|1[012])([012]\\d|3[01])([0-5]\\d)([0-5]\\d)\\.\\d+\\.\\1$' + local re = rspamd_regexp.create_cached(regexp_text) + local header_msgid = task:get_header('Message-Id') + if header_msgid then + local header_from = task:get_header('From') + if header_from and re:match(header_from.."|"..header_msgid) then return true end + end + return false +end +-- Summary rule for forged KMail Message-ID header +reconf['FORGED_MUA_KMAIL_MSGID'] = string.format('(%s) & (%s) & !(%s) & !(%s)', kmail_mua, kmail_msgid_common, 'kmail_msgid', unusable_msgid) +-- Summary rule for forged KMail Message-ID header with unknown template +reconf['FORGED_MUA_KMAIL_MSGID_UNKNOWN'] = string.format('(%s) & !(%s) & !(%s)', kmail_mua, kmail_msgid_common, unusable_msgid) + +-- Detect forged Opera Mail headers +-- Opera Mail User-Agent header +local opera1x_mua = 'User-Agent=/^\\s*Opera Mail\\/1[01]\\.\\d+ /H' +-- Opera Mail Message-ID template +local opera1x_msgid = 'Message-ID=/^?$/H' +-- Suspicious Opera Mail User-Agent header +local suspicious_opera10w_mua = 'User-Agent=/^\\s*Opera Mail\\/10\\.\\d+ \\(Windows\\)$/H' +-- Suspicious Opera Mail Message-ID, apparently from KMail +local suspicious_opera10w_msgid = 'Message-Id=/^$/H' +-- Summary rule for forged Opera Mail User-Agent header and Message-ID header from KMail +reconf['SUSPICIOUS_OPERA_10W_MSGID'] = string.format('(%s) & (%s)', suspicious_opera10w_mua, suspicious_opera10w_msgid) +-- Summary rule for forged Opera Mail Message-ID header +reconf['FORGED_MUA_OPERA_MSGID'] = string.format('(%s) & !(%s) & !(%s) & !(%s)', opera1x_mua, opera1x_msgid, reconf['SUSPICIOUS_OPERA_10W_MSGID'], unusable_msgid) + + +-- Detect forged Mozilla Mail/Thunderbird/Seamonkey headers +-- Mozilla based X-Mailer +local user_agent_mozilla5 = 'User-Agent=/^\\s*Mozilla\\/5\\.0/H' +local user_agent_thunderbird = 'User-Agent=/^\\s*(Thunderbird|Mozilla Thunderbird|Mozilla\\/.*Gecko\\/.*Thunderbird\\/)/H' +local user_agent_seamonkey = 'User-Agent=/^\\s*Mozilla\\/5\\.0\\s.+\\sSeaMonkey\\/\\d+\\.\\d+/H' +local user_agent_mozilla = string.format('(%s) & !(%s) & !(%s)', user_agent_mozilla5, user_agent_thunderbird, user_agent_seamonkey) +-- Mozilla based common Message-ID template +local mozilla_msgid_common = 'Message-ID=/^\\s*<[\\dA-F]{8}\\.\\d{1,7}\\@([^>\\.]+\\.)+[^>\\.]+>$/H' +local mozilla_msgid = 'Message-ID=/^\\s*<(3[3-9A-F]|4[\\dA-F]|5[\\dA-F])[\\dA-F]{6}\\.(\\d0){1,4}\\d\\@([^>\\.]+\\.)+[^>\\.]+>$/H' +-- Summary rule for forged Mozilla Mail Message-ID header +reconf['FORGED_MUA_MOZILLA_MAIL_MSGID'] = string.format('(%s) & (%s) & !(%s) & !(%s)', user_agent_mozilla, mozilla_msgid_common, mozilla_msgid, unusable_msgid) +reconf['FORGED_MUA_MOZILLA_MAIL_MSGID_UNKNOWN'] = string.format('(%s) & !(%s) & !(%s) & !(%s)', user_agent_mozilla, mozilla_msgid_common, mozilla_msgid, unusable_msgid) +-- Summary rule for forged Thunderbird Message-ID header +reconf['FORGED_MUA_THUNDERBIRD_MSGID'] = string.format('(%s) & (%s) & !(%s) & !(%s)', user_agent_thunderbird, mozilla_msgid_common, mozilla_msgid, unusable_msgid) +reconf['FORGED_MUA_THUNDERBIRD_MSGID_UNKNOWN'] = string.format('(%s) & !(%s) & !(%s) & !(%s)', user_agent_thunderbird, mozilla_msgid_common, mozilla_msgid, unusable_msgid) +-- Summary rule for forged Seamonkey Message-ID header +reconf['FORGED_MUA_SEAMONKEY_MSGID'] = string.format('(%s) & (%s) & !(%s) & !(%s)', user_agent_seamonkey, mozilla_msgid_common, mozilla_msgid, unusable_msgid) +reconf['FORGED_MUA_SEAMONKEY_MSGID_UNKNOWN'] = string.format('(%s) & !(%s) & !(%s) & !(%s)', user_agent_seamonkey, mozilla_msgid_common, mozilla_msgid, unusable_msgid) + + +-- Message id validity +local sane_msgid = 'Message-Id=/^\\\\ \\t\\n\\r\\x0b\\x80-\\xff]+\\@[^<>\\\\ \\t\\n\\r\\x0b\\x80-\\xff]+>?\\s*$/H' +local msgid_comment = 'Message-Id=/\\(.*\\)/H' +reconf['INVALID_MSGID'] = string.format('(%s) & !((%s) | (%s))', has_mid, sane_msgid, msgid_comment) + + +-- Only Content-Type header without other MIME headers +local cd = 'header_exists(Content-Disposition)' +local cte = 'header_exists(Content-Transfer-Encoding)' +local ct = 'header_exists(Content-Type)' +local mime_version = 'raw_header_exists(MIME-Version)' +local ct_text_plain = 'content_type_is_type(text) & content_type_is_subtype(plain)' +reconf['MIME_HEADER_CTYPE_ONLY'] = string.format('!(%s) & !(%s) & (%s) & !(%s) & !(%s)', cd, cte, ct, mime_version, ct_text_plain) + + +-- Forged Exchange messages +local msgid_dollars_ok = 'Message-Id=/[0-9a-f]{4,}\\$[0-9a-f]{4,}\\$[0-9a-f]{4,}\\@\\S+/H' +local mimeole_ms = 'X-MimeOLE=/^Produced By Microsoft MimeOLE/H' +local rcvd_with_exchange = 'Received=/with Microsoft Exchange Server/H' +reconf['RATWARE_MS_HASH'] = string.format('(%s) & !(%s) & !(%s)', msgid_dollars_ok, mimeole_ms, rcvd_with_exchange) + +-- Reply-type in content-type +reconf['STOX_REPLY_TYPE'] = 'Content-Type=/text\\/plain; .* reply-type=original/H' + +-- Fake Verizon headers +local fhelo_verizon = 'X-Spam-Relays-Untrusted=/^[^\\]]+ helo=[^ ]+verizon\\.net /iH' +local fhost_verizon = 'X-Spam-Relays-Untrusted=/^[^\\]]+ rdns=[^ ]+verizon\\.net /iH' +reconf['FM_FAKE_HELO_VERIZON'] = string.format('(%s) & !(%s)', fhelo_verizon, fhost_verizon) + +-- Forged yahoo msgid +local at_yahoo_msgid = 'Message-Id=/\\@yahoo\\.com\\b/iH' +local at_yahoogroups_msgid = 'Message-Id=/\\@yahoogroups\\.com\\b/iH' +local from_yahoo_com = 'From=/\\@yahoo\\.com\\b/iH' +reconf['FORGED_MSGID_YAHOO'] = string.format('(%s) & !(%s)', at_yahoo_msgid, from_yahoo_com) +local r_from_yahoo_groups = 'From=/rambler.ru\\@returns\\.groups\\.yahoo\\.com\\b/iH' +local r_from_yahoo_groups_ro = 'From=/ro.ru\\@returns\\.groups\\.yahoo\\.com\\b/iH' + +-- Forged The Bat! MUA headers +local thebat_mua_v1 = 'X-Mailer=/^The Bat! \\(v1\\./H' +local ctype_has_boundary = 'Content-Type=/boundary/iH' +local bat_boundary = 'Content-Type=/boundary=\\"?-{10}/H' +local mailman_21 = 'X-Mailman-Version=/\\d/H' +reconf['FORGED_MUA_THEBAT_BOUN'] = string.format('(%s) & (%s) & !(%s) & !(%s)', thebat_mua_v1, ctype_has_boundary, bat_boundary, mailman_21) + +-- Two received headers with ip addresses +local double_ip_spam_1 = 'Received=/from \\[\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\] by \\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3} with/H' +local double_ip_spam_2 = 'Received=/from\\s+\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\s+by\\s+\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3};/H' +reconf['RCVD_DOUBLE_IP_SPAM'] = string.format('(%s) | (%s)', double_ip_spam_1, double_ip_spam_2) + +-- Quoted reply-to from yahoo (seems to be forged) +local repto_quote = 'Reply-To=/\\".*\\"\\s*\\\\/\\\\,\\-:=]' + local re = rspamd_regexp.create_cached(regexp_text, 's') + if headers_recv then + for _,header_to in ipairs(headers_to) do + for _,header_r in ipairs(headers_recv) do + if re:match(header_to['value'].."|"..header_r['value']) then + return true + end + end + checked = checked + 1 + if checked > 5 then + -- Stop on 5 rcpt + return false + end + end + end + end + return false +end + +rspamd_config.INVALID_EXIM_RECEIVED2 = function (task) + local checked = 0 + local headers_to = task:get_header_full('To') + if headers_to then + local headers_recv = task:get_header_full('Received') + local regexp_text = '^[^\\n]*??\\|.*from \\d+\\.\\d+\\.\\d+\\.\\d+ \\(HELO \\S+\\)[\\s\\r\\n]*by \\1 with esmtp \\([A-Z]{9,12} [A-Z]{5,6}\\)[\\s\\r\\n]+id [a-zA-Z\\d]{6}-[a-zA-Z\\d]{6}-[a-zA-Z\\d]{2}[\\s\\r\\n]+' + local re = rspamd_regexp.create_cached(regexp_text, 's') + if headers_recv then + for _,header_to in ipairs(headers_to) do + for _,header_r in ipairs(headers_recv) do + if re:match(header_to['value'].."|"..header_r['value']) then + return true + end + end + checked = checked + 1 + if checked > 5 then + -- Stop on 5 rcpt + return false + end + end + end + end + return false +end diff --git a/rules/regexp/lotto.lua b/rules/regexp/lotto.lua new file mode 100644 index 000000000..2c83c1c43 --- /dev/null +++ b/rules/regexp/lotto.lua @@ -0,0 +1,33 @@ +-- Actually these regular expressions were obtained from SpamAssassin project, so they are licensed by apache license: +-- +-- Licensed to the Apache Software Foundation (ASF) under one or more +-- contributor license agreements. See the NOTICE file distributed with +-- this work for additional information regarding copyright ownership. +-- The ASF licenses this file to you under the Apache License, Version 2.0 +-- (the "License"); you may not use this file except in compliance with +-- the License. You may obtain a copy of the License at: +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- +-- Rules that are specific for lotto spam messages + +local reconf = config['regexp'] + +local r_lotto_from = 'From=/(?:lottery|News center|congratulation to you|NED INFO|BRITISH NATIONAL HEADQUATERS|MICROSOFT ON LINE SUPPORT TEAM|prize|online notification)/iH' +local r_lotto_subject = 'Subject=/(?:\\xA3\\d|pounds?|FINAL NOTIFICATION|FOR YOUR ATTENTION|File in Your Claims?|ATTN|prize|Claims requirement|amount|confirm|your e-mail address won|congratulations)/iH' +local r_lotto_body = '/(?:won|winning|\\xA3\\d|pounds?|GBP|LOTTERY|awards|prize)/isrP' +local kam_lotto1 = '/(e-?mail address (have emerged a winner|has won|attached to (ticket|reference)|was one of the ten winners)|random selection in our computerized email selection system)/isrP' +local kam_lotto2 = '/((ticket|serial|lucky) number|secret pin ?code|batch number|reference number|promotion date)/isrP' +local kam_lotto3 = '/(won|claim|cash prize|pounds? sterling)/isrP' +local kam_lotto4 = '/(claims (officer|agent)|lottery coordinator|fiduciary (officer|agent)|fiduaciary claims)/isrP' +local kam_lotto5 = '/(freelotto group|Royal Heritage Lottery|UK National (Online)? Lottery|U\\.?K\\.? Grand Promotions|Lottery Department UK|Euromillion Loteria|Luckyday International Lottery|International Lottery)/isrP' +local kam_lotto6 = '/(Dear Lucky Winner|Winning Notification|Attention:Winner|Dear Winner)/isrP' +local kam_lotto7 = 'Subject=/(Your Lucky Day|(Attention:|ONLINE) WINNER)/iH' +reconf['R_LOTTO'] = string.format('((%s) | (%s) | (%s)) & (((%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s) + (%s)) >= 3)', reconf['R_UNDISC_RCPT'], reconf['R_BAD_CTE_7BIT'], reconf['R_NO_SPACE_IN_FROM'], r_lotto_from, r_lotto_subject, r_lotto_body, kam_lotto1, kam_lotto2, kam_lotto3, kam_lotto4, kam_lotto5, kam_lotto6) + -- cgit v1.2.3