From: Vsevolod Stakhov Date: Tue, 13 Apr 2010 15:37:29 +0000 (+0400) Subject: * Add lua config for most common rules X-Git-Tag: 0.3.0~28 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=d4b35de4315753629ac5b107968e6194eac85d24;p=rspamd.git * Add lua config for most common rules * Reset lua state while reloading --- diff --git a/conf/lua/regexp/drugs.lua b/conf/lua/regexp/drugs.lua new file mode 100644 index 000000000..30583063a --- /dev/null +++ b/conf/lua/regexp/drugs.lua @@ -0,0 +1,66 @@ +-- Drugs spam (viagra, pills etc) +-- XXX: remove this legacy to statfile + + +local reconf = config['regexp'] + +local drugs_diet1 = '/(?:\b|\s)[_\W]{0,3}p[_\W]{0,3}h[_\W]{0,3}[e3\xE8-\xEB][_\W]{0,3}n[_\W]{0,3}t[_\W]{0,3}[e3\xE8-\xEB][_\W]{0,3}r[_\W]{0,3}m[_\W]{0,3}[i1!|l\xEC-\xEF][_\W]{0,3}n[_\W]{0,3}[e3\xE8-\xEB][_\W]{0,3}(?:\b|\s)/irP' +local drugs_diet2 = '/(?:\b|\s)_{0,3}[i1!|l\xEC-\xEF][_\W]?o[_\W]?n[_\W]?[a4\xE0-\xE6@][_\W]?m[_\W]?[i1!|l\xEC-\xEF][_\W]?n_{0,3}\b/irP' +local drugs_diet3 = '/\bbontril\b/irP' +local drugs_diet4 = '/\bphendimetrazine\b/irP' +local drugs_diet5 = '/\bdiethylpropion\b/irP' +local drugs_diet6 = '/(?:\b|\s)[_\W]{0,3}M[_\W]{0,3}[e3\xE8-\xEB][_\W]{0,3}r[_\W]{0,3}[i1!|l\xEC-\xEF][_\W]{0,3}d[_\W]{0,3}[i1!|l\xEC-\xEF][_\W]{0,3}[a4\xE0-\xE6@][_\W]{0,3}(?:\b|\s)/irP' +local drugs_diet7 = '/\b_{0,3}t[_\W]?[e3\xE8-\xEB][_\W]?n[_\W]?u[_\W]?a[_\W]?t[_\W]?[e3\xE8-\xEB]_{0,3}(?:\b|\s)/irP' +local drugs_diet8 = '/\b_{0,3}d[_\W]?[i1!|l\xEC-\xEF][_\W]?d[_\W]?r[_\W][e3\xE8-\xEB[_\W]?xx?_{0,3}\b/irP' +local drugs_diet9 = '/\b_{0,3}a[_\W]?d[_\W]?[i1!|l\xEC-\xEF][_\W]?p[_\W]?[e3\xE8-\xEB][_\W]?x_{0,3}\b/irP' +local drugs_diet10 = '/\b_{0,3}x?x[_\W]?[e3\xE8-\xEB][_\W]?n[_\W]?[i1!|l\xEC-\xEF][_\W]?c[_\W]?[a4\xE0-\xE6@][_\W]?l_{0,3}\b/irP' +reconf['DRUGS_DIET'] = string.format('((%s) | (%s) | (%s)) & ((%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s))', reconf['R_UNDISC_RCPT'], reconf['R_BAD_CTE_7BIT'], reconf['R_NO_SPACE_IN_FROM'], drugs_diet1, drugs_diet2, drugs_diet3, drugs_diet4, drugs_diet5, drugs_diet6, drugs_diet7, drugs_diet8, drugs_diet9, drugs_diet10) +local drugs_erectile1 = '/(?:\b|\s)[_\W]{0,3}(?:\\\/|V)[_\W]{0,3}[ij1!|l\xEC\xED\xEE\xEF][_\W]{0,3}[a40\xE0-\xE6@][_\W]{0,3}[xyz]?[gj][_\W]{0,3}r[_\W]{0,3}[a40\xE0-\xE6@][_\W]{0,3}x?[_\W]{0,3}(?:\b|\s)/irP' +local drugs_erectile2 = '/\bV(?:agira|igara|iaggra|iaegra)\b/irP' +local drugs_erectile3 = '/(?:\A|[\s\x00-\x2f\x3a-\x40\x5b-\x60\x7b-\x7f])[_\W]{0,3}C[_\W]{0,3}[ij1!|l\xEC\xED\xEE\xEF][_\W]{0,3}[a4\xE0-\xE6@][_\W]{0,3}l?[l!|1][_\W]{0,3}[i1!|l\xEC-\xEF][_\W]{0,3}s[_\W]{0,3}(?:\b|\s)/irP' +local drugs_erectile4 = '/\bC(?:alis|ilias|ilais)\b/irP' +local drugs_erectile5 = '/\b_{0,3}s[_\W]?[i1!|l\xEC-\xEF][_\W]?l[_\W]?d[_\W]?[e3\xE8-\xEB][_\W]?n[_\W]?[a4\xE0-\xE6@][_\W]?f[_\W]?[i1!|l\xEC-\xEF][_\W]?l c[_\W]?[i1!|l\xEC-\xEF][_\W]?t[_\W]?r[_\W]?[a4\xE0-\xE6@][_\W]?t[_\W]?[e3\xE8-\xEB]_{0,3}(?:\b|\s)/irP' +local drugs_erectile6 = '/\b_{0,3}L[_\W]?[e3\xE8-\xEB][_\W]?(?:\\\/|V)[_\W]?[i1!|l\xEC-\xEF][_\W]?t[_\W]?r[_\W]?[a4\xE0-\xE6@][_\W]?(?:\b|\s)/irP' +local drugs_erectile8 = '/\b_{0,3}T[_\W]?[a4\xE0-\xE6@][_\W]?d[_\W]?[a4\xE0-\xE6@][_\W]?l[_\W]?[a4\xE0-\xE6@][_\W]?f[_\W]?[i1!|l\xEC-\xEF][_\W]?l_{0,3}\b/irP' +local drugs_erectile10 = '/\b_{0,3}V[_\W]?(?:i|\ï\;)[_\W]?(?:a|\à|\å)\;?[_\W]?g[_\W]?r[_\W]?(?:a|\à|\å)\b/irP' +local drugs_erectile11 = '/(?:\b|\s)_{0,3}[a4\xE0-\xE6@][_\W]{0,3}p[_\W]{0,3}c[_\W]{0,3}[a4\xE0-\xE6@][_\W]{0,3}[l!|1][_\W]{0,3}[i1!|l\xEC-\xEF][_\W]{0,3}s_{0,3}\b/irP' +reconf['DRUGS_ERECTILE'] = string.format('((%s) | (%s) | (%s)) & ((%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s))', reconf['R_UNDISC_RCPT'], reconf['R_BAD_CTE_7BIT'], reconf['R_NO_SPACE_IN_FROM'], drugs_erectile1, drugs_erectile2, drugs_erectile3, drugs_erectile4, drugs_erectile5, drugs_erectile6, drugs_erectile8, drugs_erectile10, drugs_erectile11) +local drugs_anxiety1 = '/(?:\b|\s)[_\W]{0,3}x?x[_\W]{0,3}[a4\xE0-\xE6@][_\W]{0,3}n[_\W]{0,3}[ea4\xE1\xE2\xE3@][_\W]{0,3}xx?_{0,3}\b/irP' +local drugs_anxiety2 = '/\bAlprazolam\b/irP' +local drugs_anxiety3 = '/(?:\b|\s)[_\W]{0,3}(?:\\\/|V)[_\W]{0,3}[a4\xE0-\xE6@][_\W]{0,3}[l|][_\W]{0,3}[i1!|l\xEC-\xEF][_\W]{0,3}[u\xB5\xF9-\xFC][_\W]{0,3}m\b/irP' +local drugs_anxiety4 = '/\b_{0,3}D[_\W]?[i1!|l\xEC-\xEF][_\W]?[a4\xE0-\xE6@][_\W]?z[_\W]?[ea3\xE9\xEA\xEB][_\W]?p[_\W]?[a4\xE0-\xE6@][_\W]?m_{0,3}\b/irP' +local drugs_anxiety5 = '/(?:\b|\s)[a4\xE0-\xE6@][_\W]?t[_\W]?[i1!|l\xEC-\xEF][_\W]?v[_\W]?[a4\xE0-\xE6@][_\W]?n_{0,3}\b/irP' +local drugs_anxiety6 = '/\b_{0,3}l[_\W]?[o0\xF2-\xF6][_\W]?r[_\W]?[a4\xE0-\xE6@][_\W]?z[_\W]?[e3\xE8-\xEB][_\W]?p[_\W]?[a4\xE0-\xE6@][_\W]?m_{0,3}\b/irP' +local drugs_anxiety7 = '/\b_{0,3}c[_\W]?l[_\W]?[o0\xF2-\xF6][_\W]?n[_\W]?[a4\xE0-\xE6@][_\W]?z[_\W]?e[_\W]?p[_\W]?[a4\xE0-\xE6@][_\W]?m\b/irP' +local drugs_anxiety8 = '/\bklonopin\b/irP' +local drugs_anxiety9 = '/\brivotril\b/irP' +reconf['DRUGS_ANXIETY'] = string.format('((%s) | (%s) | (%s)) & ((%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s))', reconf['R_UNDISC_RCPT'], reconf['R_BAD_CTE_7BIT'], reconf['R_NO_SPACE_IN_FROM'], drugs_anxiety1, drugs_anxiety2, drugs_anxiety3, drugs_anxiety4, drugs_anxiety5, drugs_anxiety6, drugs_anxiety7, drugs_anxiety8, drugs_anxiety9) +reconf['DRUGS_ANXIETY_EREC'] = string.format('(%s) & (%s)', reconf['DRUGS_ERECTILE'], reconf['DRUGS_ANXIETY']) +local drugs_pain1 = '/\b_{0,3}h[_\W]?y[_\W]?d[_\W]?r[_\W]?[o0\xF2-\xF6][_\W]?c[_\W]?[o0\xF2-\xF6][_\W]?d[_\W]?[o0\xF2-\xF6][_\W]?n[_\W]?e_{0,3}\b/irP' +local drugs_pain2 = '/\b_{0,3}c[o0\xF2-\xF6]deine_{0,3}\b/irP' +local drugs_pain3 = '/(?:\b|\s)[_\W]{0,3}[u\xB5\xF9-\xFC][_\W]{0,3}l[_\W]{0,3}t[_\W]{0,3}r[_\W]{0,3}[a4\xE0-\xE6@][_\W]{0,3}m_{0,3}\b/irP' +local drugs_pain4 = '/(?:\b|\s)[_\W]{0,3}(?:\\\/|V)[_\W]{0,3}[i1!|l\xEC-\xEF][_\W]{0,3}c[_\W]{0,3}[o0\xF2-\xF6][_\W]{0,3}d[_\W]{0,3}[i1!|l\xEC-\xEF][_\W]{0,3}ns?[_\W]{0,3}(?:\b|\s)/irP' +local drugs_pain5 = '/\b_{0,3}t[_\W]?r[_\W]?[a4\xE0-\xE6@][_\W]?m[_\W]?[a4\xE0-\xE6@][_\W]?d[_\W]?[o0\xF2-\xF6][_\W]?[l!|1]_{0,3}\b/irP' +local drugs_pain6 = '/\b_{0,3}u[_\W]?l[_\W]?t[_\W]?r[_\W]?a[_\W]?c[_\W]?e[_\W]?t_{0,3}\b/irP' +local drugs_pain7 = '/\b_{0,3}f[_\W]?[i1!|l\xEC-\xEF][_\W]?[o0\xF2-\xF6][_\W]?r[_\W]?[i1!|l\xEC-\xEF][_\W]?c[_\W]?[e3\xE8-\xEB][_\W]?[t7]_{0,3}\b/irP' +local drugs_pain8 = '/\b_{0,3}c[_\W]?[e3\xE8-\xEB][_\W]?l[_\W]?[e3\xE8-\xEB][_\W]?b[_\W]?r[_\W]?[e3\xE8-\xEB][_\W]?x_{0,3}\b/irP' +local drugs_pain9 = '/(?:\b|\s)_{0,3}[i1!|l\xEC-\xEF]m[i1!|l\xEC-\xEF]tr[e3\xE8-\xEB]x_{0,3}\b/irP' +local drugs_pain10 = '/(?:\b|\s)[_\W]{0,3}(?:\\\/|V)[_\W]{0,3}[i1!|l\xEC-\xEF][_\W]{0,3}[o0\xF2-\xF6][_\W]{0,3}x[_\W]{0,3}xx?_{0,3}\b/irP' +local drugs_pain11 = '/\bzebutal\b/irP' +local drugs_pain12 = '/\besgic plus\b/irP' +local drugs_pain13 = '/\bD[_\W]?[a4\xE0-\xE6@][_\W]?r[_\W]?v[_\W]?[o0\xF2-\xF6][_\W]?n\b/irP' +local drugs_pain14 = '/N[o0\xF2-\xF6]rc[o0\xF2-\xF6]/irP' +local drugs_pain = string.format('((%s) | (%s) | (%s)) & ((%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) || (%s) | (%s))', reconf['R_UNDISC_RCPT'], reconf['R_BAD_CTE_7BIT'], reconf['R_NO_SPACE_IN_FROM'], drugs_pain1, drugs_pain2, drugs_pain3, drugs_pain4, drugs_pain5, drugs_pain6, drugs_pain7, drugs_pain8, drugs_pain9, drugs_pain10, drugs_pain11, drugs_pain12, drugs_pain13, drugs_pain14) +local drugs_sleep1 = '/(?:\b|\s)[_\W]{0,3}[a4\xE0-\xE6@][_\W]{0,3}m[_\W]{0,3}b[_\W]{0,3}[i1!|l\xEC-\xEF][_\W]{0,3}[e3\xE8-\xEB][_\W]{0,3}n[_\W]{0,3}(?:\b|\s)/irP' +local drugs_sleep2 = '/(?:\b|\s)[_\W]{0,3}S[_\W]{0,3}[o0\xF2-\xF6][_\W]{0,3}n[_\W]{0,3}[a4\xE0-\xE6@][_\W]{0,3}t[_\W]{0,3}[a4\xE0-\xE6@][_\W]{0,3}(?:\b|\s)/irP' +local drugs_sleep3 = '/\b_{0,3}R[_\W]?[e3\xE8-\xEB][_\W]?s[_\W]?t[_\W]?[o0\xF2-\xF6][_\W]?r[_\W]?i[_\W]?l_{0,3}\b/irP' +local drugs_sleep4 = '/\b_{0,3}H[_\W]?[a4\xE0-\xE6@][_\W]?l[_\W]?c[_\W]?i[_\W]?[o0\xF2-\xF6][_\W]?n_{0,3}\b/irP' +local drugs_sleep = string.format('(%s) | (%s) | (%s) | (%s)', drugs_sleep1, drugs_sleep2, drugs_sleep3, drugs_sleep4) +local drugs_muscle1 = '/(?:\b|\s)[_\W]{0,3}s[_\W]{0,3}[o0\xF2-\xF6][_\W]{0,3}m[_\W]{0,3}[a4\xE0-\xE6@][_\W]{0,3}(?:\b|\s)/irP' +local drugs_muscle2 = '/\b_{0,3}cycl[o0\xF2-\xF6]b[e3\xE8-\xEB]nz[a4\xE0-\xE6@]pr[i1!|l\xEC-\xEF]n[e3\xE8-\xEB]_{0,3}(?:\b|\s)/irP' +local drugs_muscle3 = '/\b_{0,3}f[_\W]?l[_\W]?[e3\xE8-\xEB][_\W]?x[_\W]?[e3\xE8-\xEB][_\W]?r[_\W]?[i1!|l\xEC-\xEF]_{0,3}[_\W]?l_{0,3}\b/irP' +local drugs_muscle4 = '/\b_{0,3}z[_\W]?a[_\W]?n[_\W]?a[_\W]?f[_\W]?l[_\W]?e[_\W]?x_{0,3}\b/irP' +local drugs_muscle5 = '/\bskelaxin\b/irP' +reconf['DRUGS_MUSCLE'] = string.format('((%s) | (%s) | (%s)) & ((%s) | (%s) | (%s) | (%s) | (%s))', reconf['R_UNDISC_RCPT'], reconf['R_BAD_CTE_7BIT'], reconf['R_NO_SPACE_IN_FROM'], drugs_muscle1, drugs_muscle2, drugs_muscle3, drugs_muscle4, drugs_muscle5) +reconf['DRUGS_MANYKINDS'] = string.format('((%s) | (%s) | (%s)) & regexp_match_number(3, (%s), (%s), (%s), (%s), (%s), (%s))', reconf['R_UNDISC_RCPT'], reconf['R_BAD_CTE_7BIT'], reconf['R_NO_SPACE_IN_FROM'], reconf['DRUGS_ERECTILE'], reconf['DRUGS_DIET'], drugs_pain, drugs_sleep, reconf['DRUGS_MUSCLE'], reconf['DRUGS_ANXIETY']) + diff --git a/conf/lua/regexp/fraud.lua b/conf/lua/regexp/fraud.lua new file mode 100644 index 000000000..5800fa271 --- /dev/null +++ b/conf/lua/regexp/fraud.lua @@ -0,0 +1,57 @@ +-- Fraud messages (Nigeria spam, viagra, etc) +local reconf = config['regexp'] + +local fraud_dbi = '/(?:\bdollars?\b|\busd(?:ollars)?(?:[0-9]|\b)|\bus\$|\$[0-9,.]{6,}|\$[0-9].{0,8}[mb]illion|\$[0-9.,]{2,10} ?m|\beuros?\b|u[.]?s[.]? [0-9.]+ m)/irP' +local fraud_kjv = '/(?:claim|concerning) (?:the|this) money/irP' +local fraud_irj = '/(?:finance|holding|securit(?:ies|y)) (?:company|firm|storage house)/irP' +local fraud_neb = '/(?:government|bank) of nigeria/irP' +local fraud_xjr = '/(?:who was a|as a|an? honest|you being a|to any) foreigner/irP' +local fraud_dpr = '/\b(?:(?:respond|reply) (?:urgently|immediately)|(?:urgent|immediate|earliest) (?:reply|response))\b/irP' +local fraud_pts = '/\b(?:ass?ass?inat(?:ed|ion)|murder(?:e?d)?|kill(?:ed|ing)\b[^.]{0,99}\b(?:war veterans|rebels?))\b/irP' +local fraud_bep = '/\b(?:bank of nigeria|central bank of|trust bank|apex bank|amalgamated bank)\b/irP' +local fraud_tdp = '/\b(?:business partner(?:s|ship)?|silent partner(?:s|ship)?)\b/irP' +local fraud_gan = '/\b(?:charles taylor|serena|abacha|gu[eйи]i|sese[- ]?seko|kabila)\b/irP' +local fraud_irt = '/\b(?:compliments? of the|dear friend|dear sir|yours faithfully|season\'?s greetings)\b/irP' +local fraud_aon = '/\b(?:confidential|private|alternate|alternative) (?:(?:e-? *)?mail)\b/irP' +local fraud_wny = '/\b(?:disburse?(?:ment)?|incurr?(?:ed)?|remunerr?at(?:ed?|ion)|remm?itt?(?:ed|ance|ing)?)\b/irP' +local fraud_ipk = '/\b(?:in|to|visit) your country\b/irP' +local fraud_qxx = '/\b(?:my name is|i am) (?:mrs?|engr|barrister|dr|prince(?:ss)?)[. ]/irP' +local fraud_iou = '/\b(?:no risks?|risk-? *free|free of risks?|100% safe)\b/irP' +local fraud_ezy = '/\b(?:of|the) late president\b/irP' +local fraud_mly = '/\b(?:reply|respond)\b[^.]{0,50}\b(?:to|through)\b[^.]{0,50}\@\b/irP' +local fraud_zfj = '/\b(?:wife|son|brother|daughter) of the late\b/irP' +local fraud_kdt = '/\bU\.?S\.?(?:D\.?)?\s*(?:\$\s*)?(?:\d+,\d+,\d+|\d+\.\d+\.\d+|\d+(?:\.\d+)?\s*milli?on)/irP' +local fraud_ulk = '/\baffidavits?\b/irP' +local fraud_bgp = '/\battached to ticket number\b/irP' +local fraud_fbi = '/\bdisburs/irP' +local fraud_jbu = '/\bforeign account\b/irP' +local fraud_yww = '/\bfurnish you with\b/irP' +local fraud_jyg = '/\bgive\s+you .{0,15}(?:fund|money|total|sum|contact|percent)\b/irP' +local fraud_xvw = '/\bhonest cooperation\b/irP' +local fraud_uuy = '/\blegitimate business(?:es)?\b/irP' +local fraud_snt = '/\blocate(?: .{1,20})? extended relative/irP' +local fraud_ltx = '/\bmilli?on (?:.{1,25} thousand\s*)?(?:(?:united states|u\.?s\.?) dollars|(?i:U\.?S\.?D?))\b/irP' +local fraud_jnb = '/\boperat(?:e|ing)\b[^.]{0,99}\b(?:for(?:ei|ie)gn|off-? ?shore|over-? ?seas?) (?:bank )?accounts?\b/irP' +local fraud_qfy = '/\bover-? *(?:invoiced?|cost(?:s|ing)?)\b/irP' +local fraud_wdr = '/\bprivate lawyer\b/irP' +local fraud_wfc = '/\bsecur(?:e|ing) (?:the )?(?:funds?|monies)\b/irP' +local fraud_aum = '/\bthe desk of\b/irP' +local fraud_mcq = '/\btransaction\b.{1,30}\b(?:magnitude|diplomatic|strict|absolute|secret|confiden(?:tial|ce)|guarantee)/irP' +local fraud_etx = '/\byour\b[^.]{0,99}\b(?:contact (?:details|information)|private (?:e?[- ]?mail|telephone|tel|phone|fax))\b/irP' +local fraud_pvn = '/as the beneficiary/irP' +local fraud_fvu = '/award notification/irP' +local fraud_ckf = '/computer ballot system/irP' +local fraud_fcw = '/fiduciary agent/irP' +local fraud_mqo = '/foreign (?:business partner|customer)/irP' +local fraud_tcc = '/foreign (?:offshore )?(?:bank|account)/irP' +local fraud_gbw = '/god gives .{1,10}second chance/irP' +local fraud_nrg = '/i am contacting you/irP' +local fraud_rlx = '/lott(?:o|ery) (?:co,?ordinator|international)/irP' +local fraud_axf = '/magnanimity/irP' +local fraud_thj = '/modalit(?:y|ies)/irP' +local fraud_yqv = '/nigerian? (?:national|government)/irP' +local fraud_yja = '/over-invoice/irP' +local fraud_ypo = '/the total sum/irP' +local fraud_uoq = '/vital documents/irP' +reconf['ADVANCE_FEE_2'] = string.format('((%s) | (%s) | (%s)) & regexp_match_number(2, (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s))', reconf['R_UNDISC_RCPT'], reconf['R_BAD_CTE_7BIT'], reconf['R_NO_SPACE_IN_FROM'], fraud_kjv, fraud_irj, fraud_neb, fraud_xjr, fraud_ezy, fraud_zfj, fraud_kdt, fraud_bgp, fraud_fbi, fraud_jbu, fraud_jyg, fraud_xvw, fraud_snt, fraud_ltx, fraud_mcq, fraud_pvn, fraud_fvu, fraud_ckf, fraud_fcw, fraud_mqo, fraud_tcc, fraud_gbw, fraud_nrg, fraud_rlx, fraud_axf, fraud_thj, fraud_yqv, fraud_yja, fraud_ypo, fraud_uoq, fraud_dbi, fraud_bep, fraud_dpr, fraud_qxx, fraud_qfy, fraud_pts, fraud_tdp, fraud_gan, fraud_ipk, fraud_aon, fraud_wny, fraud_aum, fraud_wfc, fraud_yww, fraud_ulk, fraud_iou, fraud_jnb, fraud_irt, fraud_etx, fraud_wdr, fraud_uuy, fraud_mly) +reconf['ADVANCE_FEE_3'] = string.format('((%s) | (%s) | (%s)) & regexp_match_number(3, (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s), (%s))', reconf['R_UNDISC_RCPT'], reconf['R_BAD_CTE_7BIT'], reconf['R_NO_SPACE_IN_FROM'], fraud_kjv, fraud_irj, fraud_neb, fraud_xjr, fraud_ezy, fraud_zfj, fraud_kdt, fraud_bgp, fraud_fbi, fraud_jbu, fraud_jyg, fraud_xvw, fraud_snt, fraud_ltx, fraud_mcq, fraud_pvn, fraud_fvu, fraud_ckf, fraud_fcw, fraud_mqo, fraud_tcc, fraud_gbw, fraud_nrg, fraud_rlx, fraud_axf, fraud_thj, fraud_yqv, fraud_yja, fraud_ypo, fraud_uoq, fraud_dbi, fraud_bep, fraud_dpr, fraud_qxx, fraud_qfy, fraud_pts, fraud_tdp, fraud_gan, fraud_ipk, fraud_aon, fraud_wny, fraud_aum, fraud_wfc, fraud_yww, fraud_ulk, fraud_iou, fraud_jnb, fraud_irt, fraud_etx, fraud_wdr, fraud_uuy, fraud_mly) diff --git a/conf/lua/regexp/headers.lua b/conf/lua/regexp/headers.lua new file mode 100644 index 000000000..4ece9aed2 --- /dev/null +++ b/conf/lua/regexp/headers.lua @@ -0,0 +1,216 @@ +-- Definitions of header regexps + +local reconf = config['regexp'] + +-- Subject needs encoding +-- Define encodings types +local subject_encoded_b64 = 'Subject=/=\?\S+\?B\?/iX' +local subject_encoded_qp = 'Subject=/=\?\S+\?Q\?/iX' +-- Define whether subject must be encoded (contains non-7bit characters) +local subject_needs_mime = 'Subject=/[\x00-\x08\x0b\x0c\x0e-\x1f\x7f-\xff]/X' +-- Final rule +reconf['SUBJECT_NEEDS_ENCODING'] = string.format('!(%s) & !(%s) & (%s)', subject_encoded_b64, subject_encoded_qp, subject_needs_mime) + + +-- Detects missing subject +local has_subject = 'header_exists(Subject)' +local empty_subject = 'Subject=/^$/' +-- Final rule +reconf['MISSING_SUBJECT'] = string.format('!(%s) | (%s)', has_subject, empty_subject) + +-- Detects bad content-transfer-encoding for text parts +-- For text parts (text/plain and text/html mainly) +local r_ctype_text = 'content_type_is_type(text)' +-- Content transfer encoding is 7bit +local r_cte_7bit = 'compare_transfer_encoding(7bit)' +-- And body contains 8bit characters +local r_body_8bit = '/[^\x01-\x7f]/Pr' +reconf['R_BAD_CTE_7BIT'] = string.format('(%s) & (%s) & (%s)', r_ctype_text, r_cte_7bit, r_body_8bit) + +-- Detects missing To header +reconf['MISSING_TO']= '!header_exists(To)'; + +-- Detects undisclosed recipients +local undisc_rcpt = 'To=/^[-.\w]{1,64})\"?\s<\k\@/H' + +-- Charset is missing in message +reconf['R_MISSING_CHARSET']= string.format('content_type_is_type(text) & !content_type_has_param(charset) & !%s', r_cte_7bit); + +-- Subject seems to be spam +reconf['R_SAJDING'] = 'Subject=/\bsajding(?:om|a)?\b/iH' + +-- Messages that have only HTML part +reconf['MIME_HTML_ONLY'] = 'has_only_html_part()' + + +-- Find forged Outlook MUA +-- Yahoo groups messages +local yahoo_bulk = 'Received=/from \[\S+\] by \S+\.(?:groups|scd|dcn)\.yahoo\.com with NNFMP/H' +-- Outlook MUA +local outlook_mua = 'X-Mailer=/^Microsoft Outlook\b/H' +local any_outlook_mua = 'X-Mailer=/^Microsoft Outlook\b/H' +reconf['FORGED_OUTLOOK_HTML'] = string.format('!%s & %s & %s', yahoo_bulk, outlook_mua, reconf['MIME_HTML_ONLY']) + +-- Recipients seems to be likely with each other (only works when recipients count is more than 5 recipients) +reconf['SUSPICIOUS_RECIPS'] = 'compare_recipients_distance(0.65)' + +-- Recipients list seems to be sorted +reconf['SORTED_RECIPS'] = 'is_recipients_sorted()' + +-- Spam string at the end of message to make statistics faults +reconf['TRACKER_ID'] = '/^[a-z0-9]{6,24}[-_a-z0-9]{2,36}[a-z0-9]{6,24}\s*\z/isPr' + +-- From that contains encoded characters while base 64 is not needed as all symbols are 7bit +-- Regexp that checks that from header is encoded with base64 (search in raw headers) +local from_encoded_b64 = 'From=/\=\?\S+\?B\?/iX' +-- From contains only 7bit characters (parsed headers are used) +local from_needs_mime = 'From=/[\x00-\x08\x0b\x0c\x0e-\x1f\x7f-\xff]/H' +-- Final rule +reconf['FROM_EXCESS_BASE64'] = string.format('%s & !%s', from_encoded_b64, from_needs_mime) + + +-- Detect forged outlook headers +-- OE X-Mailer header +local oe_mua = 'X-Mailer=/\bOutlook Express [456]\./H' +-- OE Message ID format +local oe_msgid_1 = 'Message-Id=/^[A-Za-z0-9-]{7}[A-Za-z0-9]{20}\@hotmail\.com$/mH' +local oe_msgid_2 = 'Message-Id=/^(?:[0-9a-f]{8}|[0-9a-f]{12})\$[0-9a-f]{8}\$[0-9a-f]{8}\@\S+$/mH' +-- EZLM remail of message +local lyris_ezml_remailer = 'List-Unsubscribe=/$/H' +-- Header of wacky sendmail +local wacky_sendmail_version = 'Received=/\/CWT\/DCE\)/H' +-- Iplanet received header +local iplanet_messaging_server = 'Received=/iPlanet Messaging Server/H' +-- Hotmail message id +local hotmail_baydav_msgid = 'Message-Id=/^BAY\d+-DAV\d+[A-Z0-9]{25}\@phx\.gbl$/mH' +-- Sympatico message id +local sympatico_msgid = 'Message-Id=/^BAYC\d+-PASMTP\d+[A-Z0-9]{25}\@CEZ\.ICE$/mH' +-- Message id seems to be forged +local unusable_msgid = string.format('(%s | %s | %s | %s | %s)', + lyris_ezml_remailer, wacky_sendmail_version, iplanet_messaging_server, hotmail_baydav_msgid, sympatico_msgid) +-- Outlook express data seems to be forged +local forged_oe = string.format('(%s & !%s & !%s & !%s)', oe_mua, oe_msgid_1, oe_msgid_2, unusable_msgid) +-- Outlook specific headers +local outlook_dollars_mua = 'X-Mailer=/^Microsoft Outlook(?: 8| CWS, Build 9|, Build 10)\./H' +local outlook_dollars_other = 'Message-Id=/^\!\~\!/mH' +local vista_msgid = 'Message-Id=/^[A-F\d]{32}\@\S+$/mH' +local ims_msgid = 'Message-Id=/^[A-F\d]{36,40}\@\S+$/mH' +-- Forged outlook headers +local forged_outlook_dollars = string.format('(%s & !%s & !%s & !%s & !%s & !%s', + outlook_dollars_mua, oe_msgid_2, outlook_dollars_other, vista_msgid, ims_msgid, unusable_msgid) +-- Outlook versions that should be excluded from summary rule +local fmo_excl_o3416 = 'X-Mailer=/^Microsoft Outlook, Build 10.0.3416$/H' +local fmo_excl_oe3790 = 'X-Mailer=/^Microsoft Outlook Express 6.00.3790.3959$/H' +-- Summary rule for forged outlook +reconf['FORGED_MUA_OUTLOOK'] = string.format('(%s | %s) & !%s & !%s & !%s', + forged_oe, forged_outlook_dollars, fmo_excl_o3416, fmo_excl_oe3790, vista_msgid) + +-- HTML outlook signs +local mime_html = 'content_type_is_type(text) & content_type_is_subtype(/.?html/)' +local tag_exists_html = 'has_html_tag(html)' +local tag_exists_head = 'has_html_tag(head)' +local tag_exists_meta = 'has_html_tag(meta)' +local tag_exists_body = 'has_html_tag(body)' +reconf['FORGED_OUTLOOK_TAGS'] = string.format('!%s & %s & %s & !(%s & %s & %s & %s)', + yahoo_bulk, any_outlook_mua, mime_html, tag_exists_html, tag_exists_head, + tag_exists_meta, tag_exists_body) + +-- Message id validity +local sane_msgid = 'Message-Id=/^[^<>\\ \t\n\r\x0b\x80-\xff]+\@[^<>\\ \t\n\r\x0b\x80-\xff]+\s*$/mH' +local msgid_comment = 'Message-Id=/\(.*\)/mH' +reconf['INVALID_MSGID'] = string.format('(%s) & !((%s) | (%s))', has_mid, sane_msgid, msgid_comment) + + +-- Only Content-Type header without other MIME headers +local cd = 'header_exists(Content-Disposition)' +local cte = 'header_exists(Content-Transfer-Encoding)' +local ct = 'header_exists(Content-Type)' +local mime_version = 'raw_header_exists(MIME-Version)' +local ct_text_plain = 'content_type_is_type(text) & content_type_is_subtype(plain)' +reconf['MIME_HEADER_CTYPE_ONLY'] = string.format('!(%s) & !(%s) & (%s) & !(%s) & !(%s)', cd, cte, ct, mime_version, ct_text_plain) + + +-- Forged Exchange messages +local msgid_dollars_ok = 'Message-Id=/[0-9a-f]{4,}\$[0-9a-f]{4,}\$[0-9a-f]{4,}\@\S+/Hr' +local mimeole_ms = 'X-MimeOLE=/^Produced By Microsoft MimeOLE/H' +local rcvd_with_exchange = 'Received=/with Microsoft Exchange Server/H' +reconf['R_MUA_EXCHANGE'] = 'X-MimeOLE=/Microsoft Exchange/H' +reconf['RATWARE_MS_HASH'] = string.format('(%s) & !(%s) & !(%s)', msgid_dollars_ok, mimeole_ms, rcvd_with_exchange) + +-- Reply-type in content-type +reconf['STOX_REPLY_TYPE'] = 'Content-Type=/text\/plain; .* reply-type=original/H' + +-- Fake Verizon headers +local fhelo_verizon = 'X-Spam-Relays-Untrusted=/^[^\]]+ helo=[^ ]+verizon\.net /iH' +local fhost_verizon = 'X-Spam-Relays-Untrusted=/^[^\]]+ rdns=[^ ]+verizon\.net /iH' +reconf['FM_FAKE_HELO_VERIZON'] = string.format('(%s) & !(%s)', fhelo_verizon, fhost_verizon) + +-- Forged yahoo msgid +local at_yahoo_msgid = 'Message-Id=/\@yahoo\.com\b/iH' +local at_yahoogroups_msgid = 'Message-Id=/\@yahoogroups\.com\b/iH' +local from_yahoo_com = 'From=/\@yahoo\.com\b/iH' +reconf['FORGED_MSGID_YAHOO'] = string.format('(%s) & !(%s)', at_yahoo_msgid, from_yahoo_com) +local r_from_yahoo_groups = 'From=/rambler.ru\@returns\.groups\.yahoo\.com\b/iH' +local r_from_yahoo_groups_ro = 'From=/ro.ru\@returns\.groups\.yahoo\.com\b/iH' +reconf['FROM_CBR'] = 'From=/\@cbr\.ru\b/iH' +reconf['FROM_CSHOP'] = 'From=/\@cshop\.ru\b/iH' +reconf['FROM_MIRHOSTING'] = 'From=/\@mirhosting\.com\b/iH' +reconf['FROM_PASSIFLORA'] = 'From=/\@passiflora\.ru\b/iH' +reconf['FROM_WORLDBANK'] = 'From=/\@worldbank\.org\b/iH' + +-- Forged The Bat! MUA headers +local thebat_mua_v1 = 'X-Mailer=/^The Bat! \(v1\./H' +local ctype_has_boundary = 'Content-Type=/boundary/iH' +local bat_boundary = 'Content-Type=/boundary=\"?-{10}/H' +local mailman_21 = 'X-Mailman-Version=/\d/H' +reconf['FORGED_MUA_THEBAT_BOUN'] = string.format('(%s) & (%s) & !(%s) & !(%s)', thebat_mua_v1, ctype_has_boundary, bat_boundary, mailman_21) + +-- Two received headers with ip addresses +local double_ip_spam_1 = 'Received=/from \[\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\] by \d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3} with/H' +local double_ip_spam_2 = 'Received=/from\s+\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\s+by\s+\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3};/H' +reconf['RCVD_DOUBLE_IP_SPAM'] = string.format('(%s) | (%s)', double_ip_spam_1, double_ip_spam_2) + +-- Quoted reply-to from yahoo (seems to be forged) +local repto_quote = 'Reply-To=/\".*\"\s*\lua_state; /* First check for global variable 'config' */ lua_getglobal (L, "config"); - if (lua_isnil (L, 1)) { + if (lua_isnil (L, -1)) { /* Assign global table to set up attributes */ lua_newtable (L); lua_setglobal (L, "config"); @@ -744,9 +744,40 @@ handle_lua (struct config_file *cfg, struct rspamd_xml_userdata *ctx, GHashTable } /* First check "src" attribute */ if ((val = g_hash_table_lookup (attrs, "src")) != NULL) { - if (luaL_dofile (L, val) != 0) { - msg_err ("cannot load lua file %s: %s", val, lua_tostring (L, -1)); - return FALSE; + /* Chdir */ + tmp1 = g_strdup (val); + tmp2 = g_strdup (val); + lua_dir = dirname (tmp1); + lua_file = basename (tmp2); + if (lua_dir && lua_file) { + cur_dir = g_malloc (PATH_MAX); + getcwd (cur_dir, PATH_MAX); + if (chdir (lua_dir) != -1) { + if (luaL_dofile (L, lua_file) != 0) { + msg_err ("cannot load lua file %s: %s", val, lua_tostring (L, -1)); + chdir (cur_dir); + g_free (cur_dir); + g_free (tmp1); + g_free (tmp2); + return FALSE; + } + } + else { + msg_err ("cannot chdir to %s: %s", lua_dir, strerror (errno));; + chdir (cur_dir); + g_free (cur_dir); + g_free (tmp1); + g_free (tmp2); + return FALSE; + + } + chdir (cur_dir); + g_free (cur_dir); + g_free (tmp1); + g_free (tmp2); + } + else { + msg_err ("directory for file %s does not exists", val); } } else if (data != NULL && *data != '\0') { diff --git a/src/lua/lua_common.c b/src/lua/lua_common.c index ac3d3074b..dd79d9eab 100644 --- a/src/lua/lua_common.c +++ b/src/lua/lua_common.c @@ -200,6 +200,7 @@ init_lua (struct config_file *cfg) (void)luaopen_classifier (L); (void)luaopen_statfile (L); cfg->lua_state = L; + memory_pool_add_destructor (cfg->cfg_pool, (pool_destruct_func)lua_close, L); } } diff --git a/src/main.c b/src/main.c index e3bf1738d..2c02e82b1 100644 --- a/src/main.c +++ b/src/main.c @@ -264,7 +264,7 @@ reread_config (struct rspamd_main *rspamd) cfg_file = memory_pool_strdup (tmp_cfg->cfg_pool, rspamd->cfg->cfg_name); /* Save some variables */ tmp_cfg->cfg_name = cfg_file; - tmp_cfg->lua_state = rspamd->cfg->lua_state; + init_lua (tmp_cfg); if (! load_rspamd_config (tmp_cfg)) { msg_err ("cannot parse new config file, revert to old one");