From 2d8eebcf7a0951d3d1189ddface7678fea76dd4c Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Tue, 20 Apr 2010 16:32:23 +0400 Subject: [PATCH] * Bugfixes: - handle '\' characters in lua strings correctly - fix lua initialization - avoid of using global lua state (global L) - fix listen sockets hash to allow multiply workers of same type but on different listen sockets - fix modules options inserting to allow multiply options of the same name - fix parsing of lua options - fix lua rules --- conf/lua/regexp/drugs.lua | 102 +++++++++++++++++------------------ conf/lua/regexp/fraud.lua | 66 +++++++++++------------ conf/lua/regexp/headers.lua | 104 ++++++++++++++++++------------------ conf/lua/regexp/lotto.lua | 6 +-- conf/lua/rspamd.lua | 15 +++--- src/cfg_file.h | 2 +- src/cfg_utils.c | 2 +- src/cfg_xml.c | 13 +---- src/classifiers/winnow.c | 2 +- src/controller.c | 2 +- src/lua/lua_cfg_file.c | 2 +- src/lua/lua_common.c | 54 ++++++++----------- src/lua/lua_common.h | 2 +- src/main.c | 38 ++++++++++++- 14 files changed, 212 insertions(+), 198 deletions(-) diff --git a/conf/lua/regexp/drugs.lua b/conf/lua/regexp/drugs.lua index 30583063a..2c8b608dd 100644 --- a/conf/lua/regexp/drugs.lua +++ b/conf/lua/regexp/drugs.lua @@ -4,63 +4,63 @@ local reconf = config['regexp'] -local drugs_diet1 = '/(?:\b|\s)[_\W]{0,3}p[_\W]{0,3}h[_\W]{0,3}[e3\xE8-\xEB][_\W]{0,3}n[_\W]{0,3}t[_\W]{0,3}[e3\xE8-\xEB][_\W]{0,3}r[_\W]{0,3}m[_\W]{0,3}[i1!|l\xEC-\xEF][_\W]{0,3}n[_\W]{0,3}[e3\xE8-\xEB][_\W]{0,3}(?:\b|\s)/irP' -local drugs_diet2 = '/(?:\b|\s)_{0,3}[i1!|l\xEC-\xEF][_\W]?o[_\W]?n[_\W]?[a4\xE0-\xE6@][_\W]?m[_\W]?[i1!|l\xEC-\xEF][_\W]?n_{0,3}\b/irP' -local drugs_diet3 = '/\bbontril\b/irP' -local drugs_diet4 = '/\bphendimetrazine\b/irP' -local drugs_diet5 = '/\bdiethylpropion\b/irP' -local drugs_diet6 = '/(?:\b|\s)[_\W]{0,3}M[_\W]{0,3}[e3\xE8-\xEB][_\W]{0,3}r[_\W]{0,3}[i1!|l\xEC-\xEF][_\W]{0,3}d[_\W]{0,3}[i1!|l\xEC-\xEF][_\W]{0,3}[a4\xE0-\xE6@][_\W]{0,3}(?:\b|\s)/irP' -local drugs_diet7 = '/\b_{0,3}t[_\W]?[e3\xE8-\xEB][_\W]?n[_\W]?u[_\W]?a[_\W]?t[_\W]?[e3\xE8-\xEB]_{0,3}(?:\b|\s)/irP' -local drugs_diet8 = '/\b_{0,3}d[_\W]?[i1!|l\xEC-\xEF][_\W]?d[_\W]?r[_\W][e3\xE8-\xEB[_\W]?xx?_{0,3}\b/irP' -local drugs_diet9 = '/\b_{0,3}a[_\W]?d[_\W]?[i1!|l\xEC-\xEF][_\W]?p[_\W]?[e3\xE8-\xEB][_\W]?x_{0,3}\b/irP' -local drugs_diet10 = '/\b_{0,3}x?x[_\W]?[e3\xE8-\xEB][_\W]?n[_\W]?[i1!|l\xEC-\xEF][_\W]?c[_\W]?[a4\xE0-\xE6@][_\W]?l_{0,3}\b/irP' +local drugs_diet1 = '/(?:\\b|\\s)[_\\W]{0,3}p[_\\W]{0,3}h[_\\W]{0,3}[e3\\xE8-\\xEB][_\\W]{0,3}n[_\\W]{0,3}t[_\\W]{0,3}[e3\\xE8-\\xEB][_\\W]{0,3}r[_\\W]{0,3}m[_\\W]{0,3}[i1!|l\\xEC-\\xEF][_\\W]{0,3}n[_\\W]{0,3}[e3\\xE8-\\xEB][_\\W]{0,3}(?:\\b|\\s)/irP' +local drugs_diet2 = '/(?:\\b|\\s)_{0,3}[i1!|l\\xEC-\\xEF][_\\W]?o[_\\W]?n[_\\W]?[a4\\xE0-\\xE6@][_\\W]?m[_\\W]?[i1!|l\\xEC-\\xEF][_\\W]?n_{0,3}\\b/irP' +local drugs_diet3 = '/\\bbontril\\b/irP' +local drugs_diet4 = '/\\bphendimetrazine\\b/irP' +local drugs_diet5 = '/\\bdiethylpropion\\b/irP' +local drugs_diet6 = '/(?:\\b|\\s)[_\\W]{0,3}M[_\\W]{0,3}[e3\\xE8-\\xEB][_\\W]{0,3}r[_\\W]{0,3}[i1!|l\\xEC-\\xEF][_\\W]{0,3}d[_\\W]{0,3}[i1!|l\\xEC-\\xEF][_\\W]{0,3}[a4\\xE0-\\xE6@][_\\W]{0,3}(?:\\b|\\s)/irP' +local drugs_diet7 = '/\\b_{0,3}t[_\\W]?[e3\\xE8-\\xEB][_\\W]?n[_\\W]?u[_\\W]?a[_\\W]?t[_\\W]?[e3\\xE8-\\xEB]_{0,3}(?:\\b|\\s)/irP' +local drugs_diet8 = '/\\b_{0,3}d[_\\W]?[i1!|l\\xEC-\\xEF][_\\W]?d[_\\W]?r[_\\W][e3\\xE8-\\xEB[_\\W]?xx?_{0,3}\\b/irP' +local drugs_diet9 = '/\\b_{0,3}a[_\\W]?d[_\\W]?[i1!|l\\xEC-\\xEF][_\\W]?p[_\\W]?[e3\\xE8-\\xEB][_\\W]?x_{0,3}\\b/irP' +local drugs_diet10 = '/\\b_{0,3}x?x[_\\W]?[e3\\xE8-\\xEB][_\\W]?n[_\\W]?[i1!|l\\xEC-\\xEF][_\\W]?c[_\\W]?[a4\\xE0-\\xE6@][_\\W]?l_{0,3}\\b/irP' reconf['DRUGS_DIET'] = string.format('((%s) | (%s) | (%s)) & ((%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s))', reconf['R_UNDISC_RCPT'], reconf['R_BAD_CTE_7BIT'], reconf['R_NO_SPACE_IN_FROM'], drugs_diet1, drugs_diet2, drugs_diet3, drugs_diet4, drugs_diet5, drugs_diet6, drugs_diet7, drugs_diet8, drugs_diet9, drugs_diet10) -local drugs_erectile1 = '/(?:\b|\s)[_\W]{0,3}(?:\\\/|V)[_\W]{0,3}[ij1!|l\xEC\xED\xEE\xEF][_\W]{0,3}[a40\xE0-\xE6@][_\W]{0,3}[xyz]?[gj][_\W]{0,3}r[_\W]{0,3}[a40\xE0-\xE6@][_\W]{0,3}x?[_\W]{0,3}(?:\b|\s)/irP' -local drugs_erectile2 = '/\bV(?:agira|igara|iaggra|iaegra)\b/irP' -local drugs_erectile3 = '/(?:\A|[\s\x00-\x2f\x3a-\x40\x5b-\x60\x7b-\x7f])[_\W]{0,3}C[_\W]{0,3}[ij1!|l\xEC\xED\xEE\xEF][_\W]{0,3}[a4\xE0-\xE6@][_\W]{0,3}l?[l!|1][_\W]{0,3}[i1!|l\xEC-\xEF][_\W]{0,3}s[_\W]{0,3}(?:\b|\s)/irP' -local drugs_erectile4 = '/\bC(?:alis|ilias|ilais)\b/irP' -local drugs_erectile5 = '/\b_{0,3}s[_\W]?[i1!|l\xEC-\xEF][_\W]?l[_\W]?d[_\W]?[e3\xE8-\xEB][_\W]?n[_\W]?[a4\xE0-\xE6@][_\W]?f[_\W]?[i1!|l\xEC-\xEF][_\W]?l c[_\W]?[i1!|l\xEC-\xEF][_\W]?t[_\W]?r[_\W]?[a4\xE0-\xE6@][_\W]?t[_\W]?[e3\xE8-\xEB]_{0,3}(?:\b|\s)/irP' -local drugs_erectile6 = '/\b_{0,3}L[_\W]?[e3\xE8-\xEB][_\W]?(?:\\\/|V)[_\W]?[i1!|l\xEC-\xEF][_\W]?t[_\W]?r[_\W]?[a4\xE0-\xE6@][_\W]?(?:\b|\s)/irP' -local drugs_erectile8 = '/\b_{0,3}T[_\W]?[a4\xE0-\xE6@][_\W]?d[_\W]?[a4\xE0-\xE6@][_\W]?l[_\W]?[a4\xE0-\xE6@][_\W]?f[_\W]?[i1!|l\xEC-\xEF][_\W]?l_{0,3}\b/irP' -local drugs_erectile10 = '/\b_{0,3}V[_\W]?(?:i|\ï\;)[_\W]?(?:a|\à|\å)\;?[_\W]?g[_\W]?r[_\W]?(?:a|\à|\å)\b/irP' -local drugs_erectile11 = '/(?:\b|\s)_{0,3}[a4\xE0-\xE6@][_\W]{0,3}p[_\W]{0,3}c[_\W]{0,3}[a4\xE0-\xE6@][_\W]{0,3}[l!|1][_\W]{0,3}[i1!|l\xEC-\xEF][_\W]{0,3}s_{0,3}\b/irP' +local drugs_erectile1 = '/(?:\\b|\\s)[_\\W]{0,3}(?:\\\\\\/|V)[_\\W]{0,3}[ij1!|l\\xEC\\xED\\xEE\\xEF][_\\W]{0,3}[a40\\xE0-\\xE6@][_\\W]{0,3}[xyz]?[gj][_\\W]{0,3}r[_\\W]{0,3}[a40\\xE0-\\xE6@][_\\W]{0,3}x?[_\\W]{0,3}(?:\\b|\\s)/irP' +local drugs_erectile2 = '/\\bV(?:agira|igara|iaggra|iaegra)\\b/irP' +local drugs_erectile3 = '/(?:\\A|[\\s\\x00-\\x2f\\x3a-\\x40\\x5b-\\x60\\x7b-\\x7f])[_\\W]{0,3}C[_\\W]{0,3}[ij1!|l\\xEC\\xED\\xEE\\xEF][_\\W]{0,3}[a4\\xE0-\\xE6@][_\\W]{0,3}l?[l!|1][_\\W]{0,3}[i1!|l\\xEC-\\xEF][_\\W]{0,3}s[_\\W]{0,3}(?:\\b|\\s)/irP' +local drugs_erectile4 = '/\\bC(?:alis|ilias|ilais)\\b/irP' +local drugs_erectile5 = '/\\b_{0,3}s[_\\W]?[i1!|l\\xEC-\\xEF][_\\W]?l[_\\W]?d[_\\W]?[e3\\xE8-\\xEB][_\\W]?n[_\\W]?[a4\\xE0-\\xE6@][_\\W]?f[_\\W]?[i1!|l\\xEC-\\xEF][_\\W]?l c[_\\W]?[i1!|l\\xEC-\\xEF][_\\W]?t[_\\W]?r[_\\W]?[a4\\xE0-\\xE6@][_\\W]?t[_\\W]?[e3\\xE8-\\xEB]_{0,3}(?:\\b|\\s)/irP' +local drugs_erectile6 = '/\\b_{0,3}L[_\\W]?[e3\\xE8-\\xEB][_\\W]?(?:\\\\\\/|V)[_\\W]?[i1!|l\\xEC-\\xEF][_\\W]?t[_\\W]?r[_\\W]?[a4\\xE0-\\xE6@][_\\W]?(?:\\b|\\s)/irP' +local drugs_erectile8 = '/\\b_{0,3}T[_\\W]?[a4\\xE0-\\xE6@][_\\W]?d[_\\W]?[a4\\xE0-\\xE6@][_\\W]?l[_\\W]?[a4\\xE0-\\xE6@][_\\W]?f[_\\W]?[i1!|l\\xEC-\\xEF][_\\W]?l_{0,3}\\b/irP' +local drugs_erectile10 = '/\\b_{0,3}V[_\\W]?(?:i|\\ï\\;)[_\\W]?(?:a|\\à|\\å)\\;?[_\\W]?g[_\\W]?r[_\\W]?(?:a|\\à|\\å)\\b/irP' +local drugs_erectile11 = '/(?:\\b|\\s)_{0,3}[a4\\xE0-\\xE6@][_\\W]{0,3}p[_\\W]{0,3}c[_\\W]{0,3}[a4\\xE0-\\xE6@][_\\W]{0,3}[l!|1][_\\W]{0,3}[i1!|l\\xEC-\\xEF][_\\W]{0,3}s_{0,3}\\b/irP' reconf['DRUGS_ERECTILE'] = string.format('((%s) | (%s) | (%s)) & ((%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s))', reconf['R_UNDISC_RCPT'], reconf['R_BAD_CTE_7BIT'], reconf['R_NO_SPACE_IN_FROM'], drugs_erectile1, drugs_erectile2, drugs_erectile3, drugs_erectile4, drugs_erectile5, drugs_erectile6, drugs_erectile8, drugs_erectile10, drugs_erectile11) -local drugs_anxiety1 = '/(?:\b|\s)[_\W]{0,3}x?x[_\W]{0,3}[a4\xE0-\xE6@][_\W]{0,3}n[_\W]{0,3}[ea4\xE1\xE2\xE3@][_\W]{0,3}xx?_{0,3}\b/irP' -local drugs_anxiety2 = '/\bAlprazolam\b/irP' -local drugs_anxiety3 = '/(?:\b|\s)[_\W]{0,3}(?:\\\/|V)[_\W]{0,3}[a4\xE0-\xE6@][_\W]{0,3}[l|][_\W]{0,3}[i1!|l\xEC-\xEF][_\W]{0,3}[u\xB5\xF9-\xFC][_\W]{0,3}m\b/irP' -local drugs_anxiety4 = '/\b_{0,3}D[_\W]?[i1!|l\xEC-\xEF][_\W]?[a4\xE0-\xE6@][_\W]?z[_\W]?[ea3\xE9\xEA\xEB][_\W]?p[_\W]?[a4\xE0-\xE6@][_\W]?m_{0,3}\b/irP' -local drugs_anxiety5 = '/(?:\b|\s)[a4\xE0-\xE6@][_\W]?t[_\W]?[i1!|l\xEC-\xEF][_\W]?v[_\W]?[a4\xE0-\xE6@][_\W]?n_{0,3}\b/irP' -local drugs_anxiety6 = '/\b_{0,3}l[_\W]?[o0\xF2-\xF6][_\W]?r[_\W]?[a4\xE0-\xE6@][_\W]?z[_\W]?[e3\xE8-\xEB][_\W]?p[_\W]?[a4\xE0-\xE6@][_\W]?m_{0,3}\b/irP' -local drugs_anxiety7 = '/\b_{0,3}c[_\W]?l[_\W]?[o0\xF2-\xF6][_\W]?n[_\W]?[a4\xE0-\xE6@][_\W]?z[_\W]?e[_\W]?p[_\W]?[a4\xE0-\xE6@][_\W]?m\b/irP' -local drugs_anxiety8 = '/\bklonopin\b/irP' -local drugs_anxiety9 = '/\brivotril\b/irP' +local drugs_anxiety1 = '/(?:\\b|\\s)[_\\W]{0,3}x?x[_\\W]{0,3}[a4\\xE0-\\xE6@][_\\W]{0,3}n[_\\W]{0,3}[ea4\\xE1\\xE2\\xE3@][_\\W]{0,3}xx?_{0,3}\\b/irP' +local drugs_anxiety2 = '/\\bAlprazolam\\b/irP' +local drugs_anxiety3 = '/(?:\\b|\\s)[_\\W]{0,3}(?:\\\\\\/|V)[_\\W]{0,3}[a4\\xE0-\\xE6@][_\\W]{0,3}[l|][_\\W]{0,3}[i1!|l\\xEC-\\xEF][_\\W]{0,3}[u\\xB5\\xF9-\\xFC][_\\W]{0,3}m\\b/irP' +local drugs_anxiety4 = '/\\b_{0,3}D[_\\W]?[i1!|l\\xEC-\\xEF][_\\W]?[a4\\xE0-\\xE6@][_\\W]?z[_\\W]?[ea3\\xE9\\xEA\\xEB][_\\W]?p[_\\W]?[a4\\xE0-\\xE6@][_\\W]?m_{0,3}\\b/irP' +local drugs_anxiety5 = '/(?:\\b|\\s)[a4\\xE0-\\xE6@][_\\W]?t[_\\W]?[i1!|l\\xEC-\\xEF][_\\W]?v[_\\W]?[a4\\xE0-\\xE6@][_\\W]?n_{0,3}\\b/irP' +local drugs_anxiety6 = '/\\b_{0,3}l[_\\W]?[o0\\xF2-\\xF6][_\\W]?r[_\\W]?[a4\\xE0-\\xE6@][_\\W]?z[_\\W]?[e3\\xE8-\\xEB][_\\W]?p[_\\W]?[a4\\xE0-\\xE6@][_\\W]?m_{0,3}\\b/irP' +local drugs_anxiety7 = '/\\b_{0,3}c[_\\W]?l[_\\W]?[o0\\xF2-\\xF6][_\\W]?n[_\\W]?[a4\\xE0-\\xE6@][_\\W]?z[_\\W]?e[_\\W]?p[_\\W]?[a4\\xE0-\\xE6@][_\\W]?m\\b/irP' +local drugs_anxiety8 = '/\\bklonopin\\b/irP' +local drugs_anxiety9 = '/\\brivotril\\b/irP' reconf['DRUGS_ANXIETY'] = string.format('((%s) | (%s) | (%s)) & ((%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s))', reconf['R_UNDISC_RCPT'], reconf['R_BAD_CTE_7BIT'], reconf['R_NO_SPACE_IN_FROM'], drugs_anxiety1, drugs_anxiety2, drugs_anxiety3, drugs_anxiety4, drugs_anxiety5, drugs_anxiety6, drugs_anxiety7, drugs_anxiety8, drugs_anxiety9) reconf['DRUGS_ANXIETY_EREC'] = string.format('(%s) & (%s)', reconf['DRUGS_ERECTILE'], reconf['DRUGS_ANXIETY']) -local drugs_pain1 = '/\b_{0,3}h[_\W]?y[_\W]?d[_\W]?r[_\W]?[o0\xF2-\xF6][_\W]?c[_\W]?[o0\xF2-\xF6][_\W]?d[_\W]?[o0\xF2-\xF6][_\W]?n[_\W]?e_{0,3}\b/irP' -local drugs_pain2 = '/\b_{0,3}c[o0\xF2-\xF6]deine_{0,3}\b/irP' -local drugs_pain3 = '/(?:\b|\s)[_\W]{0,3}[u\xB5\xF9-\xFC][_\W]{0,3}l[_\W]{0,3}t[_\W]{0,3}r[_\W]{0,3}[a4\xE0-\xE6@][_\W]{0,3}m_{0,3}\b/irP' -local drugs_pain4 = '/(?:\b|\s)[_\W]{0,3}(?:\\\/|V)[_\W]{0,3}[i1!|l\xEC-\xEF][_\W]{0,3}c[_\W]{0,3}[o0\xF2-\xF6][_\W]{0,3}d[_\W]{0,3}[i1!|l\xEC-\xEF][_\W]{0,3}ns?[_\W]{0,3}(?:\b|\s)/irP' -local drugs_pain5 = '/\b_{0,3}t[_\W]?r[_\W]?[a4\xE0-\xE6@][_\W]?m[_\W]?[a4\xE0-\xE6@][_\W]?d[_\W]?[o0\xF2-\xF6][_\W]?[l!|1]_{0,3}\b/irP' -local drugs_pain6 = '/\b_{0,3}u[_\W]?l[_\W]?t[_\W]?r[_\W]?a[_\W]?c[_\W]?e[_\W]?t_{0,3}\b/irP' -local drugs_pain7 = '/\b_{0,3}f[_\W]?[i1!|l\xEC-\xEF][_\W]?[o0\xF2-\xF6][_\W]?r[_\W]?[i1!|l\xEC-\xEF][_\W]?c[_\W]?[e3\xE8-\xEB][_\W]?[t7]_{0,3}\b/irP' -local drugs_pain8 = '/\b_{0,3}c[_\W]?[e3\xE8-\xEB][_\W]?l[_\W]?[e3\xE8-\xEB][_\W]?b[_\W]?r[_\W]?[e3\xE8-\xEB][_\W]?x_{0,3}\b/irP' -local drugs_pain9 = '/(?:\b|\s)_{0,3}[i1!|l\xEC-\xEF]m[i1!|l\xEC-\xEF]tr[e3\xE8-\xEB]x_{0,3}\b/irP' -local drugs_pain10 = '/(?:\b|\s)[_\W]{0,3}(?:\\\/|V)[_\W]{0,3}[i1!|l\xEC-\xEF][_\W]{0,3}[o0\xF2-\xF6][_\W]{0,3}x[_\W]{0,3}xx?_{0,3}\b/irP' -local drugs_pain11 = '/\bzebutal\b/irP' -local drugs_pain12 = '/\besgic plus\b/irP' -local drugs_pain13 = '/\bD[_\W]?[a4\xE0-\xE6@][_\W]?r[_\W]?v[_\W]?[o0\xF2-\xF6][_\W]?n\b/irP' -local drugs_pain14 = '/N[o0\xF2-\xF6]rc[o0\xF2-\xF6]/irP' +local drugs_pain1 = '/\\b_{0,3}h[_\\W]?y[_\\W]?d[_\\W]?r[_\\W]?[o0\\xF2-\\xF6][_\\W]?c[_\\W]?[o0\\xF2-\\xF6][_\\W]?d[_\\W]?[o0\\xF2-\\xF6][_\\W]?n[_\\W]?e_{0,3}\\b/irP' +local drugs_pain2 = '/\\b_{0,3}c[o0\\xF2-\\xF6]deine_{0,3}\\b/irP' +local drugs_pain3 = '/(?:\\b|\\s)[_\\W]{0,3}[u\\xB5\\xF9-\\xFC][_\\W]{0,3}l[_\\W]{0,3}t[_\\W]{0,3}r[_\\W]{0,3}[a4\\xE0-\\xE6@][_\\W]{0,3}m_{0,3}\\b/irP' +local drugs_pain4 = '/(?:\\b|\\s)[_\\W]{0,3}(?:\\\\\\/|V)[_\\W]{0,3}[i1!|l\\xEC-\\xEF][_\\W]{0,3}c[_\\W]{0,3}[o0\\xF2-\\xF6][_\\W]{0,3}d[_\\W]{0,3}[i1!|l\\xEC-\\xEF][_\\W]{0,3}ns?[_\\W]{0,3}(?:\\b|\\s)/irP' +local drugs_pain5 = '/\\b_{0,3}t[_\\W]?r[_\\W]?[a4\\xE0-\\xE6@][_\\W]?m[_\\W]?[a4\\xE0-\\xE6@][_\\W]?d[_\\W]?[o0\\xF2-\\xF6][_\\W]?[l!|1]_{0,3}\\b/irP' +local drugs_pain6 = '/\\b_{0,3}u[_\\W]?l[_\\W]?t[_\\W]?r[_\\W]?a[_\\W]?c[_\\W]?e[_\\W]?t_{0,3}\\b/irP' +local drugs_pain7 = '/\\b_{0,3}f[_\\W]?[i1!|l\\xEC-\\xEF][_\\W]?[o0\\xF2-\\xF6][_\\W]?r[_\\W]?[i1!|l\\xEC-\\xEF][_\\W]?c[_\\W]?[e3\\xE8-\\xEB][_\\W]?[t7]_{0,3}\\b/irP' +local drugs_pain8 = '/\\b_{0,3}c[_\\W]?[e3\\xE8-\\xEB][_\\W]?l[_\\W]?[e3\\xE8-\\xEB][_\\W]?b[_\\W]?r[_\\W]?[e3\\xE8-\\xEB][_\\W]?x_{0,3}\\b/irP' +local drugs_pain9 = '/(?:\\b|\\s)_{0,3}[i1!|l\\xEC-\\xEF]m[i1!|l\\xEC-\\xEF]tr[e3\\xE8-\\xEB]x_{0,3}\\b/irP' +local drugs_pain10 = '/(?:\\b|\\s)[_\\W]{0,3}(?:\\\\\\/|V)[_\\W]{0,3}[i1!|l\\xEC-\\xEF][_\\W]{0,3}[o0\\xF2-\\xF6][_\\W]{0,3}x[_\\W]{0,3}xx?_{0,3}\\b/irP' +local drugs_pain11 = '/\\bzebutal\\b/irP' +local drugs_pain12 = '/\\besgic plus\\b/irP' +local drugs_pain13 = '/\\bD[_\\W]?[a4\\xE0-\\xE6@][_\\W]?r[_\\W]?v[_\\W]?[o0\\xF2-\\xF6][_\\W]?n\\b/irP' +local drugs_pain14 = '/N[o0\\xF2-\\xF6]rc[o0\\xF2-\\xF6]/irP' local drugs_pain = string.format('((%s) | (%s) | (%s)) & ((%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) || (%s) | (%s))', reconf['R_UNDISC_RCPT'], reconf['R_BAD_CTE_7BIT'], reconf['R_NO_SPACE_IN_FROM'], drugs_pain1, drugs_pain2, drugs_pain3, drugs_pain4, drugs_pain5, drugs_pain6, drugs_pain7, drugs_pain8, drugs_pain9, drugs_pain10, drugs_pain11, drugs_pain12, drugs_pain13, drugs_pain14) -local drugs_sleep1 = '/(?:\b|\s)[_\W]{0,3}[a4\xE0-\xE6@][_\W]{0,3}m[_\W]{0,3}b[_\W]{0,3}[i1!|l\xEC-\xEF][_\W]{0,3}[e3\xE8-\xEB][_\W]{0,3}n[_\W]{0,3}(?:\b|\s)/irP' -local drugs_sleep2 = '/(?:\b|\s)[_\W]{0,3}S[_\W]{0,3}[o0\xF2-\xF6][_\W]{0,3}n[_\W]{0,3}[a4\xE0-\xE6@][_\W]{0,3}t[_\W]{0,3}[a4\xE0-\xE6@][_\W]{0,3}(?:\b|\s)/irP' -local drugs_sleep3 = '/\b_{0,3}R[_\W]?[e3\xE8-\xEB][_\W]?s[_\W]?t[_\W]?[o0\xF2-\xF6][_\W]?r[_\W]?i[_\W]?l_{0,3}\b/irP' -local drugs_sleep4 = '/\b_{0,3}H[_\W]?[a4\xE0-\xE6@][_\W]?l[_\W]?c[_\W]?i[_\W]?[o0\xF2-\xF6][_\W]?n_{0,3}\b/irP' +local drugs_sleep1 = '/(?:\\b|\\s)[_\\W]{0,3}[a4\\xE0-\\xE6@][_\\W]{0,3}m[_\\W]{0,3}b[_\\W]{0,3}[i1!|l\\xEC-\\xEF][_\\W]{0,3}[e3\\xE8-\\xEB][_\\W]{0,3}n[_\\W]{0,3}(?:\\b|\\s)/irP' +local drugs_sleep2 = '/(?:\\b|\\s)[_\\W]{0,3}S[_\\W]{0,3}[o0\\xF2-\\xF6][_\\W]{0,3}n[_\\W]{0,3}[a4\\xE0-\\xE6@][_\\W]{0,3}t[_\\W]{0,3}[a4\\xE0-\\xE6@][_\\W]{0,3}(?:\\b|\\s)/irP' +local drugs_sleep3 = '/\\b_{0,3}R[_\\W]?[e3\\xE8-\\xEB][_\\W]?s[_\\W]?t[_\\W]?[o0\\xF2-\\xF6][_\\W]?r[_\\W]?i[_\\W]?l_{0,3}\\b/irP' +local drugs_sleep4 = '/\\b_{0,3}H[_\\W]?[a4\\xE0-\\xE6@][_\\W]?l[_\\W]?c[_\\W]?i[_\\W]?[o0\\xF2-\\xF6][_\\W]?n_{0,3}\\b/irP' local drugs_sleep = string.format('(%s) | (%s) | (%s) | (%s)', drugs_sleep1, drugs_sleep2, drugs_sleep3, drugs_sleep4) -local drugs_muscle1 = '/(?:\b|\s)[_\W]{0,3}s[_\W]{0,3}[o0\xF2-\xF6][_\W]{0,3}m[_\W]{0,3}[a4\xE0-\xE6@][_\W]{0,3}(?:\b|\s)/irP' -local drugs_muscle2 = '/\b_{0,3}cycl[o0\xF2-\xF6]b[e3\xE8-\xEB]nz[a4\xE0-\xE6@]pr[i1!|l\xEC-\xEF]n[e3\xE8-\xEB]_{0,3}(?:\b|\s)/irP' -local drugs_muscle3 = '/\b_{0,3}f[_\W]?l[_\W]?[e3\xE8-\xEB][_\W]?x[_\W]?[e3\xE8-\xEB][_\W]?r[_\W]?[i1!|l\xEC-\xEF]_{0,3}[_\W]?l_{0,3}\b/irP' -local drugs_muscle4 = '/\b_{0,3}z[_\W]?a[_\W]?n[_\W]?a[_\W]?f[_\W]?l[_\W]?e[_\W]?x_{0,3}\b/irP' -local drugs_muscle5 = '/\bskelaxin\b/irP' +local drugs_muscle1 = '/(?:\\b|\\s)[_\\W]{0,3}s[_\\W]{0,3}[o0\\xF2-\\xF6][_\\W]{0,3}m[_\\W]{0,3}[a4\\xE0-\\xE6@][_\\W]{0,3}(?:\\b|\\s)/irP' +local drugs_muscle2 = '/\\b_{0,3}cycl[o0\\xF2-\\xF6]b[e3\\xE8-\\xEB]nz[a4\\xE0-\\xE6@]pr[i1!|l\\xEC-\\xEF]n[e3\\xE8-\\xEB]_{0,3}(?:\\b|\\s)/irP' +local drugs_muscle3 = '/\\b_{0,3}f[_\\W]?l[_\\W]?[e3\\xE8-\\xEB][_\\W]?x[_\\W]?[e3\\xE8-\\xEB][_\\W]?r[_\\W]?[i1!|l\\xEC-\\xEF]_{0,3}[_\\W]?l_{0,3}\\b/irP' +local drugs_muscle4 = '/\\b_{0,3}z[_\\W]?a[_\\W]?n[_\\W]?a[_\\W]?f[_\\W]?l[_\\W]?e[_\\W]?x_{0,3}\\b/irP' +local drugs_muscle5 = '/\\bskelaxin\\b/irP' reconf['DRUGS_MUSCLE'] = string.format('((%s) | (%s) | (%s)) & ((%s) | (%s) | (%s) | (%s) | (%s))', reconf['R_UNDISC_RCPT'], reconf['R_BAD_CTE_7BIT'], reconf['R_NO_SPACE_IN_FROM'], drugs_muscle1, drugs_muscle2, drugs_muscle3, drugs_muscle4, drugs_muscle5) reconf['DRUGS_MANYKINDS'] = string.format('((%s) | (%s) | (%s)) & regexp_match_number(3, (%s), (%s), (%s), (%s), (%s), (%s))', reconf['R_UNDISC_RCPT'], reconf['R_BAD_CTE_7BIT'], reconf['R_NO_SPACE_IN_FROM'], reconf['DRUGS_ERECTILE'], reconf['DRUGS_DIET'], drugs_pain, drugs_sleep, reconf['DRUGS_MUSCLE'], reconf['DRUGS_ANXIETY']) diff --git a/conf/lua/regexp/fraud.lua b/conf/lua/regexp/fraud.lua index 5800fa271..40465744f 100644 --- a/conf/lua/regexp/fraud.lua +++ b/conf/lua/regexp/fraud.lua @@ -1,43 +1,43 @@ -- Fraud messages (Nigeria spam, viagra, etc) local reconf = config['regexp'] -local fraud_dbi = '/(?:\bdollars?\b|\busd(?:ollars)?(?:[0-9]|\b)|\bus\$|\$[0-9,.]{6,}|\$[0-9].{0,8}[mb]illion|\$[0-9.,]{2,10} ?m|\beuros?\b|u[.]?s[.]? [0-9.]+ m)/irP' +local fraud_dbi = '/(?:\\bdollars?\\b|\\busd(?:ollars)?(?:[0-9]|\\b)|\\bus\\$|\\$[0-9,.]{6,}|\\$[0-9].{0,8}[mb]illion|\\$[0-9.,]{2,10} ?m|\\beuros?\\b|u[.]?s[.]? [0-9.]+ m)/irP' local fraud_kjv = '/(?:claim|concerning) (?:the|this) money/irP' local fraud_irj = '/(?:finance|holding|securit(?:ies|y)) (?:company|firm|storage house)/irP' local fraud_neb = '/(?:government|bank) of nigeria/irP' local fraud_xjr = '/(?:who was a|as a|an? honest|you being a|to any) foreigner/irP' -local fraud_dpr = '/\b(?:(?:respond|reply) (?:urgently|immediately)|(?:urgent|immediate|earliest) (?:reply|response))\b/irP' -local fraud_pts = '/\b(?:ass?ass?inat(?:ed|ion)|murder(?:e?d)?|kill(?:ed|ing)\b[^.]{0,99}\b(?:war veterans|rebels?))\b/irP' -local fraud_bep = '/\b(?:bank of nigeria|central bank of|trust bank|apex bank|amalgamated bank)\b/irP' -local fraud_tdp = '/\b(?:business partner(?:s|ship)?|silent partner(?:s|ship)?)\b/irP' -local fraud_gan = '/\b(?:charles taylor|serena|abacha|gu[eйи]i|sese[- ]?seko|kabila)\b/irP' -local fraud_irt = '/\b(?:compliments? of the|dear friend|dear sir|yours faithfully|season\'?s greetings)\b/irP' -local fraud_aon = '/\b(?:confidential|private|alternate|alternative) (?:(?:e-? *)?mail)\b/irP' -local fraud_wny = '/\b(?:disburse?(?:ment)?|incurr?(?:ed)?|remunerr?at(?:ed?|ion)|remm?itt?(?:ed|ance|ing)?)\b/irP' -local fraud_ipk = '/\b(?:in|to|visit) your country\b/irP' -local fraud_qxx = '/\b(?:my name is|i am) (?:mrs?|engr|barrister|dr|prince(?:ss)?)[. ]/irP' -local fraud_iou = '/\b(?:no risks?|risk-? *free|free of risks?|100% safe)\b/irP' -local fraud_ezy = '/\b(?:of|the) late president\b/irP' -local fraud_mly = '/\b(?:reply|respond)\b[^.]{0,50}\b(?:to|through)\b[^.]{0,50}\@\b/irP' -local fraud_zfj = '/\b(?:wife|son|brother|daughter) of the late\b/irP' -local fraud_kdt = '/\bU\.?S\.?(?:D\.?)?\s*(?:\$\s*)?(?:\d+,\d+,\d+|\d+\.\d+\.\d+|\d+(?:\.\d+)?\s*milli?on)/irP' -local fraud_ulk = '/\baffidavits?\b/irP' -local fraud_bgp = '/\battached to ticket number\b/irP' -local fraud_fbi = '/\bdisburs/irP' -local fraud_jbu = '/\bforeign account\b/irP' -local fraud_yww = '/\bfurnish you with\b/irP' -local fraud_jyg = '/\bgive\s+you .{0,15}(?:fund|money|total|sum|contact|percent)\b/irP' -local fraud_xvw = '/\bhonest cooperation\b/irP' -local fraud_uuy = '/\blegitimate business(?:es)?\b/irP' -local fraud_snt = '/\blocate(?: .{1,20})? extended relative/irP' -local fraud_ltx = '/\bmilli?on (?:.{1,25} thousand\s*)?(?:(?:united states|u\.?s\.?) dollars|(?i:U\.?S\.?D?))\b/irP' -local fraud_jnb = '/\boperat(?:e|ing)\b[^.]{0,99}\b(?:for(?:ei|ie)gn|off-? ?shore|over-? ?seas?) (?:bank )?accounts?\b/irP' -local fraud_qfy = '/\bover-? *(?:invoiced?|cost(?:s|ing)?)\b/irP' -local fraud_wdr = '/\bprivate lawyer\b/irP' -local fraud_wfc = '/\bsecur(?:e|ing) (?:the )?(?:funds?|monies)\b/irP' -local fraud_aum = '/\bthe desk of\b/irP' -local fraud_mcq = '/\btransaction\b.{1,30}\b(?:magnitude|diplomatic|strict|absolute|secret|confiden(?:tial|ce)|guarantee)/irP' -local fraud_etx = '/\byour\b[^.]{0,99}\b(?:contact (?:details|information)|private (?:e?[- ]?mail|telephone|tel|phone|fax))\b/irP' +local fraud_dpr = '/\\b(?:(?:respond|reply) (?:urgently|immediately)|(?:urgent|immediate|earliest) (?:reply|response))\\b/irP' +local fraud_pts = '/\\b(?:ass?ass?inat(?:ed|ion)|murder(?:e?d)?|kill(?:ed|ing)\\b[^.]{0,99}\\b(?:war veterans|rebels?))\\b/irP' +local fraud_bep = '/\\b(?:bank of nigeria|central bank of|trust bank|apex bank|amalgamated bank)\\b/irP' +local fraud_tdp = '/\\b(?:business partner(?:s|ship)?|silent partner(?:s|ship)?)\\b/irP' +local fraud_gan = '/\\b(?:charles taylor|serena|abacha|gu[eйи]i|sese[- ]?seko|kabila)\\b/irP' +local fraud_irt = '/\\b(?:compliments? of the|dear friend|dear sir|yours faithfully|season\'?s greetings)\\b/irP' +local fraud_aon = '/\\b(?:confidential|private|alternate|alternative) (?:(?:e-? *)?mail)\\b/irP' +local fraud_wny = '/\\b(?:disburse?(?:ment)?|incurr?(?:ed)?|remunerr?at(?:ed?|ion)|remm?itt?(?:ed|ance|ing)?)\\b/irP' +local fraud_ipk = '/\\b(?:in|to|visit) your country\\b/irP' +local fraud_qxx = '/\\b(?:my name is|i am) (?:mrs?|engr|barrister|dr|prince(?:ss)?)[. ]/irP' +local fraud_iou = '/\\b(?:no risks?|risk-? *free|free of risks?|100% safe)\\b/irP' +local fraud_ezy = '/\\b(?:of|the) late president\\b/irP' +local fraud_mly = '/\\b(?:reply|respond)\\b[^.]{0,50}\\b(?:to|through)\\b[^.]{0,50}\\@\\b/irP' +local fraud_zfj = '/\\b(?:wife|son|brother|daughter) of the late\\b/irP' +local fraud_kdt = '/\\bU\\.?S\\.?(?:D\\.?)?\\s*(?:\\$\\s*)?(?:\\d+,\\d+,\\d+|\\d+\\.\\d+\\.\\d+|\\d+(?:\\.\\d+)?\\s*milli?on)/irP' +local fraud_ulk = '/\\baffidavits?\\b/irP' +local fraud_bgp = '/\\battached to ticket number\\b/irP' +local fraud_fbi = '/\\bdisburs/irP' +local fraud_jbu = '/\\bforeign account\\b/irP' +local fraud_yww = '/\\bfurnish you with\\b/irP' +local fraud_jyg = '/\\bgive\\s+you .{0,15}(?:fund|money|total|sum|contact|percent)\\b/irP' +local fraud_xvw = '/\\bhonest cooperation\\b/irP' +local fraud_uuy = '/\\blegitimate business(?:es)?\\b/irP' +local fraud_snt = '/\\blocate(?: .{1,20})? extended relative/irP' +local fraud_ltx = '/\\bmilli?on (?:.{1,25} thousand\\s*)?(?:(?:united states|u\\.?s\\.?) dollars|(?i:U\\.?S\\.?D?))\\b/irP' +local fraud_jnb = '/\\boperat(?:e|ing)\\b[^.]{0,99}\\b(?:for(?:ei|ie)gn|off-? ?shore|over-? ?seas?) (?:bank )?accounts?\\b/irP' +local fraud_qfy = '/\\bover-? *(?:invoiced?|cost(?:s|ing)?)\\b/irP' +local fraud_wdr = '/\\bprivate lawyer\\b/irP' +local fraud_wfc = '/\\bsecur(?:e|ing) (?:the )?(?:funds?|monies)\\b/irP' +local fraud_aum = '/\\bthe desk of\\b/irP' +local fraud_mcq = '/\\btransaction\\b.{1,30}\\b(?:magnitude|diplomatic|strict|absolute|secret|confiden(?:tial|ce)|guarantee)/irP' +local fraud_etx = '/\\byour\\b[^.]{0,99}\\b(?:contact (?:details|information)|private (?:e?[- ]?mail|telephone|tel|phone|fax))\\b/irP' local fraud_pvn = '/as the beneficiary/irP' local fraud_fvu = '/award notification/irP' local fraud_ckf = '/computer ballot system/irP' diff --git a/conf/lua/regexp/headers.lua b/conf/lua/regexp/headers.lua index 4ece9aed2..b775f72a9 100644 --- a/conf/lua/regexp/headers.lua +++ b/conf/lua/regexp/headers.lua @@ -4,13 +4,15 @@ local reconf = config['regexp'] -- Subject needs encoding -- Define encodings types -local subject_encoded_b64 = 'Subject=/=\?\S+\?B\?/iX' -local subject_encoded_qp = 'Subject=/=\?\S+\?Q\?/iX' +local subject_encoded_b64 = 'Subject=/=\\?\\S+\\?B\\?/iX' +local subject_encoded_qp = 'Subject=/=\\?\\S+\\?Q\\?/iX' -- Define whether subject must be encoded (contains non-7bit characters) -local subject_needs_mime = 'Subject=/[\x00-\x08\x0b\x0c\x0e-\x1f\x7f-\xff]/X' +local subject_needs_mime = 'Subject=/[\\x00-\\x08\\x0b\\x0c\\x0e-\\x1f\\x7f-\\xff]/X' -- Final rule reconf['SUBJECT_NEEDS_ENCODING'] = string.format('!(%s) & !(%s) & (%s)', subject_encoded_b64, subject_encoded_qp, subject_needs_mime) +-- Detects that there is no space in From header (e.g. Some Name) +reconf['R_NO_SPACE_IN_FROM'] = 'From=/\\S<[-\\w\\.]+\\@[-\\w\\.]+>/X' -- Detects missing subject local has_subject = 'header_exists(Subject)' @@ -24,7 +26,7 @@ local r_ctype_text = 'content_type_is_type(text)' -- Content transfer encoding is 7bit local r_cte_7bit = 'compare_transfer_encoding(7bit)' -- And body contains 8bit characters -local r_body_8bit = '/[^\x01-\x7f]/Pr' +local r_body_8bit = '/[^\\x01-\\x7f]/Pr' reconf['R_BAD_CTE_7BIT'] = string.format('(%s) & (%s) & (%s)', r_ctype_text, r_cte_7bit, r_body_8bit) -- Detects missing To header @@ -39,16 +41,16 @@ local has_mid = 'header_exists(Message-Id)' reconf['MISSING_MID'] = '!header_exists(Message-Id)'; -- Received seems to be fake -reconf['R_RCVD_SPAMBOTS'] = 'Received=/^from \[\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\] by [-.\w+]{5,255}; [SMTWF][a-z][a-z], [\s\d]?\d [JFMAJSOND][a-z][a-z] \d{4} \d{2}:\d{2}:\d{2} [-+]\d{4}$/mH' +reconf['R_RCVD_SPAMBOTS'] = 'Received=/^from \\[\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\] by [-.\\w+]{5,255}; [SMTWF][a-z][a-z], [\\s\\d]?\\d [JFMAJSOND][a-z][a-z] \\d{4} \\d{2}:\\d{2}:\\d{2} [-+]\\d{4}$/mH' -- To header seems to be autogenerated -reconf['R_TO_SEEMS_AUTO'] = 'To=/\"?(?[-.\w]{1,64})\"?\s<\k\@/H' +reconf['R_TO_SEEMS_AUTO'] = 'To=/\\"?(?[-.\\w]{1,64})\\"?\\s<\\k\\@/H' -- Charset is missing in message reconf['R_MISSING_CHARSET']= string.format('content_type_is_type(text) & !content_type_has_param(charset) & !%s', r_cte_7bit); -- Subject seems to be spam -reconf['R_SAJDING'] = 'Subject=/\bsajding(?:om|a)?\b/iH' +reconf['R_SAJDING'] = 'Subject=/\\bsajding(?:om|a)?\\b/iH' -- Messages that have only HTML part reconf['MIME_HTML_ONLY'] = 'has_only_html_part()' @@ -56,10 +58,10 @@ reconf['MIME_HTML_ONLY'] = 'has_only_html_part()' -- Find forged Outlook MUA -- Yahoo groups messages -local yahoo_bulk = 'Received=/from \[\S+\] by \S+\.(?:groups|scd|dcn)\.yahoo\.com with NNFMP/H' +local yahoo_bulk = 'Received=/from \\[\\S+\\] by \\S+\\.(?:groups|scd|dcn)\\.yahoo\\.com with NNFMP/H' -- Outlook MUA -local outlook_mua = 'X-Mailer=/^Microsoft Outlook\b/H' -local any_outlook_mua = 'X-Mailer=/^Microsoft Outlook\b/H' +local outlook_mua = 'X-Mailer=/^Microsoft Outlook\\b/H' +local any_outlook_mua = 'X-Mailer=/^Microsoft Outlook\\b/H' reconf['FORGED_OUTLOOK_HTML'] = string.format('!%s & %s & %s', yahoo_bulk, outlook_mua, reconf['MIME_HTML_ONLY']) -- Recipients seems to be likely with each other (only works when recipients count is more than 5 recipients) @@ -69,43 +71,43 @@ reconf['SUSPICIOUS_RECIPS'] = 'compare_recipients_distance(0.65)' reconf['SORTED_RECIPS'] = 'is_recipients_sorted()' -- Spam string at the end of message to make statistics faults -reconf['TRACKER_ID'] = '/^[a-z0-9]{6,24}[-_a-z0-9]{2,36}[a-z0-9]{6,24}\s*\z/isPr' +reconf['TRACKER_ID'] = '/^[a-z0-9]{6,24}[-_a-z0-9]{2,36}[a-z0-9]{6,24}\\s*\\z/isPr' -- From that contains encoded characters while base 64 is not needed as all symbols are 7bit -- Regexp that checks that from header is encoded with base64 (search in raw headers) -local from_encoded_b64 = 'From=/\=\?\S+\?B\?/iX' +local from_encoded_b64 = 'From=/\\=\\?\\S+\\?B\\?/iX' -- From contains only 7bit characters (parsed headers are used) -local from_needs_mime = 'From=/[\x00-\x08\x0b\x0c\x0e-\x1f\x7f-\xff]/H' +local from_needs_mime = 'From=/[\\x00-\\x08\\x0b\\x0c\\x0e-\\x1f\\x7f-\\xff]/H' -- Final rule reconf['FROM_EXCESS_BASE64'] = string.format('%s & !%s', from_encoded_b64, from_needs_mime) -- Detect forged outlook headers -- OE X-Mailer header -local oe_mua = 'X-Mailer=/\bOutlook Express [456]\./H' +local oe_mua = 'X-Mailer=/\\bOutlook Express [456]\\./H' -- OE Message ID format -local oe_msgid_1 = 'Message-Id=/^[A-Za-z0-9-]{7}[A-Za-z0-9]{20}\@hotmail\.com$/mH' -local oe_msgid_2 = 'Message-Id=/^(?:[0-9a-f]{8}|[0-9a-f]{12})\$[0-9a-f]{8}\$[0-9a-f]{8}\@\S+$/mH' +local oe_msgid_1 = 'Message-Id=/^[A-Za-z0-9-]{7}[A-Za-z0-9]{20}\\@hotmail\\.com$/mH' +local oe_msgid_2 = 'Message-Id=/^(?:[0-9a-f]{8}|[0-9a-f]{12})\\$[0-9a-f]{8}\\$[0-9a-f]{8}\\@\\S+$/mH' -- EZLM remail of message -local lyris_ezml_remailer = 'List-Unsubscribe=/$/H' +local lyris_ezml_remailer = 'List-Unsubscribe=/$/H' -- Header of wacky sendmail -local wacky_sendmail_version = 'Received=/\/CWT\/DCE\)/H' +local wacky_sendmail_version = 'Received=/\\/CWT\\/DCE\\)/H' -- Iplanet received header local iplanet_messaging_server = 'Received=/iPlanet Messaging Server/H' -- Hotmail message id -local hotmail_baydav_msgid = 'Message-Id=/^BAY\d+-DAV\d+[A-Z0-9]{25}\@phx\.gbl$/mH' +local hotmail_baydav_msgid = 'Message-Id=/^BAY\\d+-DAV\\d+[A-Z0-9]{25}\\@phx\\.gbl$/mH' -- Sympatico message id -local sympatico_msgid = 'Message-Id=/^BAYC\d+-PASMTP\d+[A-Z0-9]{25}\@CEZ\.ICE$/mH' +local sympatico_msgid = 'Message-Id=/^BAYC\\d+-PASMTP\\d+[A-Z0-9]{25}\\@CEZ\\.ICE$/mH' -- Message id seems to be forged local unusable_msgid = string.format('(%s | %s | %s | %s | %s)', lyris_ezml_remailer, wacky_sendmail_version, iplanet_messaging_server, hotmail_baydav_msgid, sympatico_msgid) -- Outlook express data seems to be forged local forged_oe = string.format('(%s & !%s & !%s & !%s)', oe_mua, oe_msgid_1, oe_msgid_2, unusable_msgid) -- Outlook specific headers -local outlook_dollars_mua = 'X-Mailer=/^Microsoft Outlook(?: 8| CWS, Build 9|, Build 10)\./H' -local outlook_dollars_other = 'Message-Id=/^\!\~\!/mH' -local vista_msgid = 'Message-Id=/^[A-F\d]{32}\@\S+$/mH' -local ims_msgid = 'Message-Id=/^[A-F\d]{36,40}\@\S+$/mH' +local outlook_dollars_mua = 'X-Mailer=/^Microsoft Outlook(?: 8| CWS, Build 9|, Build 10)\\./H' +local outlook_dollars_other = 'Message-Id=/^\\!\\~\\!/mH' +local vista_msgid = 'Message-Id=/^[A-F\\d]{32}\\@\\S+$/mH' +local ims_msgid = 'Message-Id=/^[A-F\\d]{36,40}\\@\\S+$/mH' -- Forged outlook headers local forged_outlook_dollars = string.format('(%s & !%s & !%s & !%s & !%s & !%s', outlook_dollars_mua, oe_msgid_2, outlook_dollars_other, vista_msgid, ims_msgid, unusable_msgid) @@ -127,8 +129,8 @@ reconf['FORGED_OUTLOOK_TAGS'] = string.format('!%s & %s & %s & !(%s & %s & %s & tag_exists_meta, tag_exists_body) -- Message id validity -local sane_msgid = 'Message-Id=/^[^<>\\ \t\n\r\x0b\x80-\xff]+\@[^<>\\ \t\n\r\x0b\x80-\xff]+\s*$/mH' -local msgid_comment = 'Message-Id=/\(.*\)/mH' +local sane_msgid = 'Message-Id=/^[^<>\\\\ \\t\\n\\r\\x0b\\x80-\\xff]+\\@[^<>\\\\ \\t\\n\\r\\x0b\\x80-\\xff]+\\s*$/mH' +local msgid_comment = 'Message-Id=/\\(.*\\)/mH' reconf['INVALID_MSGID'] = string.format('(%s) & !((%s) | (%s))', has_mid, sane_msgid, msgid_comment) @@ -142,60 +144,60 @@ reconf['MIME_HEADER_CTYPE_ONLY'] = string.format('!(%s) & !(%s) & (%s) & !(%s) & -- Forged Exchange messages -local msgid_dollars_ok = 'Message-Id=/[0-9a-f]{4,}\$[0-9a-f]{4,}\$[0-9a-f]{4,}\@\S+/Hr' +local msgid_dollars_ok = 'Message-Id=/[0-9a-f]{4,}\\$[0-9a-f]{4,}\\$[0-9a-f]{4,}\\@\\S+/Hr' local mimeole_ms = 'X-MimeOLE=/^Produced By Microsoft MimeOLE/H' local rcvd_with_exchange = 'Received=/with Microsoft Exchange Server/H' reconf['R_MUA_EXCHANGE'] = 'X-MimeOLE=/Microsoft Exchange/H' reconf['RATWARE_MS_HASH'] = string.format('(%s) & !(%s) & !(%s)', msgid_dollars_ok, mimeole_ms, rcvd_with_exchange) -- Reply-type in content-type -reconf['STOX_REPLY_TYPE'] = 'Content-Type=/text\/plain; .* reply-type=original/H' +reconf['STOX_REPLY_TYPE'] = 'Content-Type=/text\\/plain; .* reply-type=original/H' -- Fake Verizon headers -local fhelo_verizon = 'X-Spam-Relays-Untrusted=/^[^\]]+ helo=[^ ]+verizon\.net /iH' -local fhost_verizon = 'X-Spam-Relays-Untrusted=/^[^\]]+ rdns=[^ ]+verizon\.net /iH' +local fhelo_verizon = 'X-Spam-Relays-Untrusted=/^[^\\]]+ helo=[^ ]+verizon\\.net /iH' +local fhost_verizon = 'X-Spam-Relays-Untrusted=/^[^\\]]+ rdns=[^ ]+verizon\\.net /iH' reconf['FM_FAKE_HELO_VERIZON'] = string.format('(%s) & !(%s)', fhelo_verizon, fhost_verizon) -- Forged yahoo msgid -local at_yahoo_msgid = 'Message-Id=/\@yahoo\.com\b/iH' -local at_yahoogroups_msgid = 'Message-Id=/\@yahoogroups\.com\b/iH' -local from_yahoo_com = 'From=/\@yahoo\.com\b/iH' +local at_yahoo_msgid = 'Message-Id=/\\@yahoo\\.com\\b/iH' +local at_yahoogroups_msgid = 'Message-Id=/\\@yahoogroups\\.com\\b/iH' +local from_yahoo_com = 'From=/\\@yahoo\\.com\\b/iH' reconf['FORGED_MSGID_YAHOO'] = string.format('(%s) & !(%s)', at_yahoo_msgid, from_yahoo_com) -local r_from_yahoo_groups = 'From=/rambler.ru\@returns\.groups\.yahoo\.com\b/iH' -local r_from_yahoo_groups_ro = 'From=/ro.ru\@returns\.groups\.yahoo\.com\b/iH' -reconf['FROM_CBR'] = 'From=/\@cbr\.ru\b/iH' -reconf['FROM_CSHOP'] = 'From=/\@cshop\.ru\b/iH' -reconf['FROM_MIRHOSTING'] = 'From=/\@mirhosting\.com\b/iH' -reconf['FROM_PASSIFLORA'] = 'From=/\@passiflora\.ru\b/iH' -reconf['FROM_WORLDBANK'] = 'From=/\@worldbank\.org\b/iH' +local r_from_yahoo_groups = 'From=/rambler.ru\\@returns\\.groups\\.yahoo\\.com\\b/iH' +local r_from_yahoo_groups_ro = 'From=/ro.ru\\@returns\\.groups\\.yahoo\\.com\\b/iH' +reconf['FROM_CBR'] = 'From=/\\@cbr\\.ru\\b/iH' +reconf['FROM_CSHOP'] = 'From=/\\@cshop\\.ru\\b/iH' +reconf['FROM_MIRHOSTING'] = 'From=/\\@mirhosting\\.com\\b/iH' +reconf['FROM_PASSIFLORA'] = 'From=/\\@passiflora\\.ru\\b/iH' +reconf['FROM_WORLDBANK'] = 'From=/\\@worldbank\\.org\\b/iH' -- Forged The Bat! MUA headers -local thebat_mua_v1 = 'X-Mailer=/^The Bat! \(v1\./H' +local thebat_mua_v1 = 'X-Mailer=/^The Bat! \\(v1\\./H' local ctype_has_boundary = 'Content-Type=/boundary/iH' -local bat_boundary = 'Content-Type=/boundary=\"?-{10}/H' -local mailman_21 = 'X-Mailman-Version=/\d/H' +local bat_boundary = 'Content-Type=/boundary=\\"?-{10}/H' +local mailman_21 = 'X-Mailman-Version=/\\d/H' reconf['FORGED_MUA_THEBAT_BOUN'] = string.format('(%s) & (%s) & !(%s) & !(%s)', thebat_mua_v1, ctype_has_boundary, bat_boundary, mailman_21) -- Two received headers with ip addresses -local double_ip_spam_1 = 'Received=/from \[\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\] by \d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3} with/H' -local double_ip_spam_2 = 'Received=/from\s+\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\s+by\s+\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3};/H' +local double_ip_spam_1 = 'Received=/from \\[\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\] by \\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3} with/H' +local double_ip_spam_2 = 'Received=/from\\s+\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\s+by\\s+\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3};/H' reconf['RCVD_DOUBLE_IP_SPAM'] = string.format('(%s) | (%s)', double_ip_spam_1, double_ip_spam_2) -- Quoted reply-to from yahoo (seems to be forged) -local repto_quote = 'Reply-To=/\".*\"\s*\section_pointer; - /* First try to find option with this name */ - while (cur_opt) { - cur = cur_opt->data; - if (strcmp (cur->param, name) == 0) { - /* cur->value is in pool */ - cur->value = data; - cur->is_lua = is_lua; - return TRUE; - } - cur_opt = g_list_next (cur_opt); - } - /* Not found, insert */ + /* Insert option */ cur = memory_pool_alloc0 (cfg->cfg_pool, sizeof (struct module_opt)); cur->param = name; cur->value = data; diff --git a/src/classifiers/winnow.c b/src/classifiers/winnow.c index eaa9343fe..f1551fb6e 100644 --- a/src/classifiers/winnow.c +++ b/src/classifiers/winnow.c @@ -184,7 +184,7 @@ winnow_classify (struct classifier_ctx *ctx, statfile_pool_t * pool, GTree * inp if (data.count != 0) { res = data.sum / data.count; if (st->normalizer != NULL) { - res = st->normalizer (res, st->normalizer_data); + res = st->normalizer (task->cfg, res, st->normalizer_data); } } else { diff --git a/src/controller.c b/src/controller.c index 64df87d6c..875bedbcc 100644 --- a/src/controller.c +++ b/src/controller.c @@ -776,7 +776,7 @@ controller_read_socket (f_str_t * in, void *arg) maybe_write_binlog (session->learn_classifier, st, statfile, tokens); if (st->normalizer != NULL) { - sum = st->normalizer (sum, st->normalizer_data); + sum = st->normalizer (session->cfg, sum, st->normalizer_data); } free_task (task, FALSE); diff --git a/src/lua/lua_cfg_file.c b/src/lua/lua_cfg_file.c index e087600fd..799a603c0 100644 --- a/src/lua/lua_cfg_file.c +++ b/src/lua/lua_cfg_file.c @@ -57,6 +57,7 @@ lua_check_element (memory_pool_t *pool, const gchar *name, GList **options, stru /* New option */ *opt = memory_pool_alloc0 (pool, sizeof (struct module_opt)); (*opt)->is_lua = TRUE; + (*opt)->param = memory_pool_strdup (pool, name); *options = g_list_prepend (*options, *opt); } } @@ -76,7 +77,6 @@ lua_process_module (lua_State *L, const gchar *param, struct config_file *cfg) } /* Now iterate throught module table */ - lua_gettable (L, -1); for (lua_pushnil(L); lua_next(L, -2); lua_pop(L, 1)) { /* key - -2, value - -1 */ name = luaL_checkstring (L, -2); diff --git a/src/lua/lua_common.c b/src/lua/lua_common.c index dd79d9eab..fc5fe0772 100644 --- a/src/lua/lua_common.c +++ b/src/lua/lua_common.c @@ -28,7 +28,6 @@ /* Lua module init function */ #define MODULE_INIT_FUNC "module_init" -lua_State *L = NULL; const luaL_reg null_reg[] = { {"__tostring", lua_class_tostring}, {NULL, NULL} @@ -184,24 +183,25 @@ luaopen_logger (lua_State * L) void init_lua (struct config_file *cfg) { - if (L == NULL) { - L = lua_open (); - luaL_openlibs (L); - - (void)luaopen_rspamd (L); - (void)luaopen_logger (L); - (void)luaopen_config (L); - (void)luaopen_metric (L); - (void)luaopen_radix (L); - (void)luaopen_hash_table (L); - (void)luaopen_task (L); - (void)luaopen_textpart (L); - (void)luaopen_message (L); - (void)luaopen_classifier (L); - (void)luaopen_statfile (L); - cfg->lua_state = L; - memory_pool_add_destructor (cfg->cfg_pool, (pool_destruct_func)lua_close, L); - } + lua_State *L; + + L = lua_open (); + luaL_openlibs (L); + + (void)luaopen_rspamd (L); + (void)luaopen_logger (L); + (void)luaopen_config (L); + (void)luaopen_metric (L); + (void)luaopen_radix (L); + (void)luaopen_hash_table (L); + (void)luaopen_task (L); + (void)luaopen_textpart (L); + (void)luaopen_message (L); + (void)luaopen_classifier (L); + (void)luaopen_statfile (L); + cfg->lua_state = L; + memory_pool_add_destructor (cfg->cfg_pool, (pool_destruct_func)lua_close, L); + } @@ -213,6 +213,7 @@ init_lua_filters (struct config_file *cfg) GList *cur, *tmp; struct script_module *module; struct statfile *st; + lua_State *L = cfg->lua_state; cur = g_list_first (cfg->script_modules); while (cur) { @@ -413,23 +414,12 @@ lua_consolidation_func (struct worker_task *task, const char *metric_name, const return data.score; } -void -add_luabuf (const char *line) -{ - int error; - - error = luaL_loadbuffer (L, line, strlen (line), "config") || lua_pcall (L, 0, 0, 0); - if (error) { - yyerror ("lua error: %s", lua_tostring (L, -1)); - lua_pop (L, 1); /* pop error message from the stack */ - } -} - double -lua_normalizer_func (double score, void *params) +lua_normalizer_func (struct config_file *cfg, double score, void *params) { GList *p = params; double res = score; + lua_State *L = cfg->lua_state; /* Call specified function and put input score on stack */ if (!p->data) { diff --git a/src/lua/lua_common.h b/src/lua/lua_common.h index dd4e75a8f..f89ccaa30 100644 --- a/src/lua/lua_common.h +++ b/src/lua/lua_common.h @@ -44,7 +44,7 @@ void add_luabuf (const char *line); GList *call_classifier_pre_callbacks (struct classifier_config *ccf, struct worker_task *task); double call_classifier_post_callbacks (struct classifier_config *ccf, struct worker_task *task, double in); -double lua_normalizer_func (double score, void *params); +double lua_normalizer_func (struct config_file *cfg, double score, void *params); /* Config file functions */ void lua_post_load_config (struct config_file *cfg); diff --git a/src/main.c b/src/main.c index 2c02e82b1..123a69940 100644 --- a/src/main.c +++ b/src/main.c @@ -465,6 +465,37 @@ fork_delayed (struct rspamd_main *rspamd) } } +static inline uintptr_t +make_listen_key (struct in_addr *addr, int port, int family, char *path) +{ + uintptr_t res = 0; + char *key; + + if (family == AF_INET) { + /* Make fnv hash from bytes of addr and port */ + key = (char *)&addr->s_addr; + while (key - (char *)&addr->s_addr < sizeof (addr->s_addr)) { + res ^= (char)*key++; + res += (res << 1) + (res << 4) + (res << 7) + (res << 8) + (res << 24); + } + key = (char *)&port; + while (key - (char *)&port < sizeof (addr->s_addr)) { + res ^= (char)*key++; + res += (res << 1) + (res << 4) + (res << 7) + (res << 8) + (res << 24); + } + } + else { + /* Make fnv hash from bytes of path */ + key = path; + while (*key) { + res ^= (char)*key++; + res += (res << 1) + (res << 4) + (res << 7) + (res << 8) + (res << 24); + } + } + + return res; +} + static void spawn_workers (struct rspamd_main *rspamd) { @@ -479,13 +510,16 @@ spawn_workers (struct rspamd_main *rspamd) cf = cur->data; if (cf->has_socket) { - if ((p = g_hash_table_lookup (listen_sockets, GINT_TO_POINTER (cf->type))) == NULL) { + if ((p = g_hash_table_lookup (listen_sockets, GINT_TO_POINTER ( + make_listen_key (&cf->bind_addr, cf->bind_port, cf->bind_family, cf->bind_host)))) == NULL) { /* Create listen socket */ listen_sock = create_listen_socket (&cf->bind_addr, cf->bind_port, cf->bind_family, cf->bind_host); if (listen_sock == -1) { exit (-errno); } - g_hash_table_insert (listen_sockets, GINT_TO_POINTER (cf->type), GINT_TO_POINTER (listen_sock)); + g_hash_table_insert (listen_sockets, GINT_TO_POINTER ( + make_listen_key (&cf->bind_addr, cf->bind_port, cf->bind_family, cf->bind_host)), + GINT_TO_POINTER (listen_sock)); } else { /* We had socket for this type of worker */ -- 2.39.5