aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorcebka@mailsupport.rambler.ru <cebka@mailsupport.rambler.ru>2008-09-23 18:12:36 +0400
committercebka@mailsupport.rambler.ru <cebka@mailsupport.rambler.ru>2008-09-23 18:12:36 +0400
commitffe2b54bd024bb45fe7ee182d452ad61283df8ca (patch)
treed63a7589dfea70ac173dc44bd2b1c88303afa024
parent193acf73e51b24ccca8048ebb6aaec2971594268 (diff)
downloadrspamd-ffe2b54bd024bb45fe7ee182d452ad61283df8ca.tar.gz
rspamd-ffe2b54bd024bb45fe7ee182d452ad61283df8ca.zip
* Rework logic of filters
* Add some documentation to rspamd (about new filters logic and about protocol)
-rw-r--r--README.koi8.txt82
-rw-r--r--cfg_file.h30
-rw-r--r--cfg_file.l17
-rw-r--r--cfg_file.y209
-rw-r--r--cfg_utils.c144
-rwxr-xr-xconfigure4
-rw-r--r--filter.c43
-rw-r--r--filter.h46
-rw-r--r--main.c9
-rw-r--r--main.h28
-rw-r--r--plugins/surbl.c26
-rw-r--r--worker.c197
12 files changed, 403 insertions, 432 deletions
diff --git a/README.koi8.txt b/README.koi8.txt
new file mode 100644
index 000000000..fdecd4132
--- /dev/null
+++ b/README.koi8.txt
@@ -0,0 +1,82 @@
+API Rspamd.
+===========
+
+TODO.
+
+Логика работы фильтров rspamd.
+==============================
+
+1) Все фильтры регистрируются в конфиг файле в описании цепочек фильтров:
+header_filters=regexp,my_func
+где имя фильтра - это либо название c модуля, либо название перл функции
+Типы фильтров:
+* header_filters - фильтр заголовков
+* mime_filters - фильтр для каждой mime части
+* message_filters - фильтр всего сообщения целиком
+* url_filters - фильтры URL ссылок
+Свои результаты фильтры регистрируют при помощи добавления результата в метрику.
+
+2) Метрика - это символьное значение, в котором регистрируют свои результаты фильтры
+Существует метрика по умолчанию - "default", в которой регистрируют результаты фильтры,
+для которых явно не определена метрика
+Для каждой метрики существует специальная функция консолидации, которая рассчитывает коэффициенты
+результатов согласно внутренней логике соответствия символов и коэффициентов. По умолчанию такой
+функцией является простая сумма, которая настраивается особым образом в конфигурационном файле:
+
+# Блок factors
+factors {
+ # Например, "SURBL_DNS"=5.0
+ "SYMBOL_NAME" = coefficient;
+};
+Также для метрики можно зарегистрировать особую функцию, прописав в описании метрики
+metric {
+ name = "test_metric";
+ function = "some_function";
+ required_score = 20.0;
+};
+Пока поддерживаются только перловые функции.
+
+3) Результат - это пара значений: SYMBOL:FLAG, при этом, SYMBOL - это
+строчка, характеризующая результат, а FLAG - сработал данный фильтр или нет
+(1 или 0). Результат добавляется в метрику, после чего передается функции консолидации.
+
+4) Итог - на выходе мы имеем обработанное сообщение, список метрик и их символов и результаты
+проверки.
+
+Протокол.
+=========
+
+Формат ответа:
+SPAMD/1.1 0 EX_OK
+\ / \/
+ Версия Код
+ ошибки
+Spam: False ; 2 / 5
+Это формат совместимости с sa-spamd (без метрик)
+
+Новый формат ответа:
+RSPAMD/1.0 0 EX_OK
+Metric: Name ; Spam_Result ; Spam_Mark / Spam_Mark_Required
+Metric: Name2 ; Spam_Result2 ; Spam_Mark2 / Spam_Mark_Required2
+
+Заголовков типа metric может быть несколько.
+Формат вывода символов:
+SYMBOL1, SYMBOL2, SYMBOL3 -- формат совместимости с sa-spamd
+Metric: SYMBOL1, SYMBOL2, SYMBOL3 -- формат rspamd
+
+Формат ответа зависит от формата запроса:
+PROCESS SPAMC/1.2
+\ / \ /
+Команда Версия
+
+SPAMC - протокол совместимости с sa-spamd
+RSPAMC - новый протокол rspamd
+В любом из режимов работы поддерживаются следующие заголовки:
+Content-Length - длина сообщения
+Helo - HELO, полученный от клиента
+From - MAIL FROM
+IP - IP клиента
+Recipient-Number - число реципиентов
+Rcpt - реципиент
+
+Эти значения могут использоваться в фильтрах rspamd.
diff --git a/cfg_file.h b/cfg_file.h
index 1a211e680..592a0eb27 100644
--- a/cfg_file.h
+++ b/cfg_file.h
@@ -19,6 +19,7 @@
#include <glib.h>
#include "upstream.h"
#include "memcached.h"
+#include "filter.h"
#define DEFAULT_BIND_PORT 768
#define MAX_MEMCACHED_SERVERS 48
@@ -53,7 +54,6 @@ enum script_type {
SCRIPT_MIME,
SCRIPT_URL,
SCRIPT_MESSAGE,
- SCRIPT_CHAIN,
};
struct memcached_server {
@@ -69,20 +69,6 @@ struct perl_module {
LIST_ENTRY (perl_module) next;
};
-struct script_param {
- char *symbol;
- char *function;
- enum script_type type;
- LIST_ENTRY (script_param) next;
-};
-
-struct filter_chain {
- unsigned int metric;
- unsigned int scripts_number;
- LIST_HEAD (scriptq, script_param) *scripts;
- LIST_ENTRY (filter_chain) next;
-};
-
struct module_opt {
char *param;
char *value;
@@ -110,18 +96,26 @@ struct config_file {
unsigned int memcached_maxerrors;
unsigned int memcached_connect_timeout;
- LIST_HEAD (perlq, filter_chain) filters;
LIST_HEAD (modulesq, perl_module) perl_modules;
- LIST_HEAD (cmodulesq, c_module) c_modules;
+ LIST_HEAD (headersq, filter) header_filters;
+ LIST_HEAD (mimesq, filter) mime_filters;
+ LIST_HEAD (messagesq, filter) message_filters;
+ LIST_HEAD (urlsq, filter) url_filters;
+ char *header_filters_str;
+ char *mime_filters_str;
+ char *message_filters_str;
+ char *url_filters_str;
GHashTable* modules_opts;
GHashTable* variables;
+ GHashTable* metrics;
+ GHashTable* factors;
+ GHashTable* c_modules;
};
int add_memcached_server (struct config_file *cf, char *str);
int parse_bind_line (struct config_file *cf, char *str);
void init_defaults (struct config_file *cfg);
void free_config (struct config_file *cfg);
-int parse_script (char *str, struct script_param *param, enum script_type type);
char* get_module_opt (struct config_file *cfg, char *module_name, char *opt_name);
size_t parse_limit (const char *limit);
unsigned int parse_seconds (const char *t);
diff --git a/cfg_file.l b/cfg_file.l
index fd5f3bc7f..3cb8441bd 100644
--- a/cfg_file.l
+++ b/cfg_file.l
@@ -37,14 +37,15 @@ memcached return MEMCACHED;
bind_socket return BINDSOCK;
servers return SERVERS;
require return REQUIRE;
-module return MODULE;
-filter return FILTER;
+header_filters return HEADER_FILTERS;
+mime_filters return MIME_FILTERS;
+message_filters return MESSAGE_FILTERS;
+url_filters return URL_FILTERS;
+factors return FACTORS;
metric return METRIC;
-script_header return SCRIPT_HEADER;
-script_mime return SCRIPT_MIME;
-script_message return SCRIPT_MESSAGE;
-script_url return SCRIPT_URL;
-script_chain return SCRIPT_CHAIN;
+name return NAME;
+required_score return REQUIRED_SCORE;
+function return FUNCTION;
\{ return OBRACE;
\} return EBRACE;
@@ -58,12 +59,12 @@ yes|YES|no|NO|[yY]|[nN] yylval.flag=parse_flag(yytext); return FLAG;
\" return QUOTE;
\$[a-zA-Z_][a-zA-Z0-9_]+ yylval.string=strdup(yytext + 1); return VARIABLE;
[0-9]+ yylval.number=strtol(yytext, NULL, 10); return NUMBER;
+-?[0-9]+\.?[0-9]* yylval.fract=strtod(yytext, NULL); return FRACT;
[0-9]+[kKmMgG]? yylval.limit=parse_limit(yytext); return SIZELIMIT;
[0-9]+[sS]|[0-9]+[mM][sS] yylval.seconds=parse_seconds(yytext); return SECONDS;
[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3} yylval.string=strdup(yytext); return IPADDR;
[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\/[0-9]{1,2} yylval.string=strdup(yytext); return IPNETWORK;
[a-zA-Z0-9.-]+:[0-9]{1,5} yylval.string=strdup(yytext); return HOSTPORT;
-[a-zA-Z0-9]+:[a-zA-Z0-9_:]+ yylval.string=strdup(yytext); return SCRIPT_PARAM;
[a-zA-Z<][a-zA-Z@+>_-]* yylval.string=strdup(yytext); return STRING;
\/[^/\n]+\/ yylval.string=strdup(yytext); return REGEXP;
[a-zA-Z0-9].[a-zA-Z0-9\/.-]+ yylval.string=strdup(yytext); return DOMAIN;
diff --git a/cfg_file.y b/cfg_file.y
index fb31c4186..9ba219614 100644
--- a/cfg_file.y
+++ b/cfg_file.y
@@ -24,9 +24,8 @@ extern struct config_file *cfg;
extern int yylineno;
extern char *yytext;
-struct scriptq *cur_scripts;
-unsigned int cur_scripts_num = 0;
LIST_HEAD (moduleoptq, module_opt) *cur_module_opt = NULL;
+struct metric *cur_metric = NULL;
%}
@@ -37,7 +36,7 @@ LIST_HEAD (moduleoptq, module_opt) *cur_module_opt = NULL;
char flag;
unsigned int seconds;
unsigned int number;
- struct script_param *param;
+ double fract;
}
%token ERROR STRING QUOTEDSTRING FLAG
@@ -47,8 +46,9 @@ LIST_HEAD (moduleoptq, module_opt) *cur_module_opt = NULL;
%token TEMPDIR PIDFILE SERVERS ERROR_TIME DEAD_TIME MAXERRORS CONNECT_TIMEOUT PROTOCOL RECONNECT_TIMEOUT
%token READ_SERVERS WRITE_SERVER DIRECTORY_SERVERS MAILBOX_QUERY USERS_QUERY LASTLOGIN_QUERY
%token MEMCACHED WORKERS REQUIRE MODULE
-%token FILTER METRIC SCRIPT_HEADER SCRIPT_MIME SCRIPT_MESSAGE SCRIPT_URL SCRIPT_CHAIN SCRIPT_PARAM
%token MODULE_OPT PARAM VARIABLE
+%token HEADER_FILTERS MIME_FILTERS MESSAGE_FILTERS URL_FILTERS FACTORS METRIC NAME
+%token REQUIRED_SCORE FUNCTION FRACT
%type <string> STRING
%type <string> VARIABLE
@@ -58,14 +58,12 @@ LIST_HEAD (moduleoptq, module_opt) *cur_module_opt = NULL;
%type <string> IPADDR IPNETWORK
%type <string> HOSTPORT
%type <string> DOMAIN
-%type <string> SCRIPT_PARAM
%type <limit> SIZELIMIT
%type <flag> FLAG
%type <seconds> SECONDS
%type <number> NUMBER
%type <string> memcached_hosts bind_cred
-%type <number> metric
-%type <param> filter_param
+%type <fract> FRACT
%%
file : /* empty */
@@ -79,9 +77,14 @@ command :
| memcached
| workers
| require
- | filter
+ | header_filters
+ | mime_filters
+ | message_filters
+ | url_filters
| module_opt
| variable
+ | factors
+ | metric
;
tempdir :
@@ -134,6 +137,30 @@ bind_cred:
}
;
+header_filters:
+ HEADER_FILTERS EQSIGN QUOTEDSTRING {
+ cfg->header_filters_str = g_strdup ($3);
+ }
+ ;
+
+mime_filters:
+ MIME_FILTERS EQSIGN QUOTEDSTRING {
+ cfg->mime_filters_str = g_strdup ($3);
+ }
+ ;
+
+message_filters:
+ MESSAGE_FILTERS EQSIGN QUOTEDSTRING {
+ cfg->message_filters_str = g_strdup ($3);
+ }
+ ;
+
+url_filters:
+ URL_FILTERS EQSIGN QUOTEDSTRING {
+ cfg->url_filters_str = g_strdup ($3);
+ }
+ ;
+
memcached:
MEMCACHED OBRACE memcachedbody EBRACE
;
@@ -217,140 +244,80 @@ workers:
}
;
-filter:
- FILTER OBRACE filterbody EBRACE
- ;
-
-filterbody:
- metric SEMICOLON filter_chain {
- struct filter_chain *cur_chain;
- cur_chain = (struct filter_chain *) g_malloc (sizeof (struct filter_chain));
- if (cur_chain == NULL) {
- yyerror ("yyparse: g_malloc: %s", strerror (errno));
+metric:
+ METRIC OBRACE metricbody EBRACE {
+ if (cur_metric == NULL || cur_metric->name == NULL) {
+ yyerror ("yyparse: not enough arguments in metric definition");
YYERROR;
}
-
- cur_chain->metric = $1;
- cur_chain->scripts = cur_scripts;
- cur_chain->scripts_number = cur_scripts_num;
- LIST_INSERT_HEAD (&cfg->filters, cur_chain, next);
-
+ g_hash_table_insert (cfg->metrics, cur_metric->name, cur_metric);
+ cur_metric = NULL;
}
;
-metric:
- METRIC EQSIGN NUMBER {
- $$ = $3;
- }
+metricbody:
+ | metriccmd SEMICOLON
+ | metricbody metriccmd SEMICOLON
;
-
-filter_chain:
- filter_param SEMICOLON {
- cur_scripts = (struct scriptq *)g_malloc (sizeof (struct scriptq));
- if (cur_scripts == NULL) {
- yyerror ("yyparse: g_malloc: %s", strerror (errno));
- YYERROR;
- }
- LIST_INIT (cur_scripts);
- if ($1 == NULL) {
- yyerror ("yyparse: g_malloc: %s", strerror(errno));
- YYERROR;
- }
- LIST_INSERT_HEAD (cur_scripts, $1, next);
- cur_scripts_num = 1;
- }
- | filter_chain filter_param SEMICOLON {
- if ($2 == NULL) {
- yyerror ("yyparse: g_malloc: %s", strerror(errno));
- YYERROR;
+metriccmd:
+ | metricname
+ | metricfunction
+ | metricscore
+ ;
+
+metricname:
+ NAME EQSIGN QUOTEDSTRING {
+ if (cur_metric == NULL) {
+ cur_metric = g_malloc (sizeof (struct metric));
}
- LIST_INSERT_HEAD (cur_scripts, $2, next);
- cur_scripts_num ++;
+ cur_metric->name = g_strdup ($3);
}
;
-filter_param:
- SCRIPT_HEADER EQSIGN SCRIPT_PARAM {
- struct script_param *cur;
-
- cur = g_malloc (sizeof (struct script_param));
- if (cur == NULL) {
- yyerror ("yyparse: g_malloc: %s", strerror(errno));
- YYERROR;
+metricfunction:
+ FUNCTION EQSIGN QUOTEDSTRING {
+ if (cur_metric == NULL) {
+ cur_metric = g_malloc (sizeof (struct metric));
}
- if (parse_script ($3, cur, SCRIPT_HEADER) == -1) {
- yyerror ("yyparse: cannot parse filter param %s", $3);
- YYERROR;
- }
-
- $$ = cur;
- free ($3);
+ cur_metric->func_name = g_strdup ($3);
}
- | SCRIPT_MIME EQSIGN SCRIPT_PARAM {
- struct script_param *cur;
+ ;
- cur = g_malloc (sizeof (struct script_param));
- if (cur == NULL) {
- yyerror ("yyparse: g_malloc: %s", strerror(errno));
- YYERROR;
+metricscore:
+ REQUIRED_SCORE EQSIGN NUMBER {
+ if (cur_metric == NULL) {
+ cur_metric = g_malloc (sizeof (struct metric));
}
- if (parse_script ($3, cur, SCRIPT_MIME) == -1) {
- yyerror ("yyparse: cannot parse filter param %s", $3);
- YYERROR;
- }
-
- $$ = cur;
- free ($3);
+ cur_metric->required_score = $3;
}
- | SCRIPT_MESSAGE EQSIGN SCRIPT_PARAM {
- struct script_param *cur;
-
- cur = g_malloc (sizeof (struct script_param));
- if (cur == NULL) {
- yyerror ("yyparse: g_malloc: %s", strerror(errno));
- YYERROR;
- }
- if (parse_script ($3, cur, SCRIPT_MESSAGE) == -1) {
- yyerror ("yyparse: cannot parse filter param %s", $3);
- YYERROR;
+ | REQUIRED_SCORE EQSIGN FRACT {
+ if (cur_metric == NULL) {
+ cur_metric = g_malloc (sizeof (struct metric));
}
-
- $$ = cur;
- free ($3);
+ cur_metric->required_score = $3;
}
- | SCRIPT_URL EQSIGN SCRIPT_PARAM {
- struct script_param *cur;
-
- cur = g_malloc (sizeof (struct script_param));
- if (cur == NULL) {
- yyerror ("yyparse: g_malloc: %s", strerror(errno));
- YYERROR;
- }
- if (parse_script ($3, cur, SCRIPT_URL) == -1) {
- yyerror ("yyparse: cannot parse filter param %s", $3);
- YYERROR;
- }
+ ;
- $$ = cur;
- free ($3);
- }
- | SCRIPT_CHAIN EQSIGN SCRIPT_PARAM {
- struct script_param *cur;
+factors:
+ FACTORS OBRACE factorsbody EBRACE
+ ;
- cur = g_malloc (sizeof (struct script_param));
- if (cur == NULL) {
- yyerror ("yyparse: g_malloc: %s", strerror(errno));
- YYERROR;
- }
- if (parse_script ($3, cur, SCRIPT_CHAIN) == -1) {
- yyerror ("yyparse: cannot parse filter param %s", $3);
- YYERROR;
- }
+factorsbody:
+ factorparam SEMICOLON
+ | factorsbody factorparam SEMICOLON
+ ;
- $$ = cur;
- free ($3);
+factorparam:
+ QUOTEDSTRING EQSIGN FRACT {
+ double *tmp = g_malloc (sizeof (double));
+ *tmp = $3;
+ g_hash_table_insert (cfg->factors, $1, tmp);
}
- ;
+ | QUOTEDSTRING EQSIGN NUMBER {
+ double *tmp = g_malloc (sizeof (double));
+ *tmp = $3;
+ g_hash_table_insert (cfg->factors, $1, tmp);
+ };
require:
REQUIRE OBRACE requirebody EBRACE
diff --git a/cfg_utils.c b/cfg_utils.c
index a00a075b3..a2a783cc2 100644
--- a/cfg_utils.c
+++ b/cfg_utils.c
@@ -150,20 +150,16 @@ init_defaults (struct config_file *cfg)
cfg->workers_number = DEFAULT_WORKERS_NUM;
cfg->modules_opts = g_hash_table_new (g_str_hash, g_str_equal);
cfg->variables = g_hash_table_new (g_str_hash, g_str_equal);
+ cfg->metrics = g_hash_table_new (g_str_hash, g_str_equal);
+ cfg->factors = g_hash_table_new (g_str_hash, g_str_equal);
+ cfg->c_modules = g_hash_table_new (g_str_hash, g_str_equal);
- LIST_INIT (&cfg->filters);
LIST_INIT (&cfg->perl_modules);
- LIST_INIT (&cfg->c_modules);
}
void
free_config (struct config_file *cfg)
{
- struct filter_chain *chain, *tmp_chain;
- struct script_param *param, *tmp_param;
- struct perl_module *module, *tmp_module;
- struct c_module *cmodule, *tmp_cmodule;
-
if (cfg->pid_file) {
g_free (cfg->pid_file);
}
@@ -173,34 +169,17 @@ free_config (struct config_file *cfg)
if (cfg->bind_host) {
g_free (cfg->bind_host);
}
-
- LIST_FOREACH_SAFE (chain, &cfg->filters, next, tmp_chain) {
- LIST_FOREACH_SAFE (param, chain->scripts, next, tmp_param) {
- if (param->symbol) {
- free (param->symbol);
- }
- if (param->function) {
- free (param->function);
- }
- LIST_REMOVE (param, next);
- free (param);
- }
- LIST_REMOVE (chain, next);
- free (chain);
+ if (cfg->header_filters_str) {
+ g_free (cfg->header_filters_str);
}
- LIST_FOREACH_SAFE (module, &cfg->perl_modules, next, tmp_module) {
- if (module->path) {
- free (module->path);
- }
- LIST_REMOVE (module, next);
- free (module);
+ if (cfg->mime_filters_str) {
+ g_free (cfg->mime_filters_str);
}
-
- LIST_FOREACH_SAFE (cmodule, &cfg->c_modules, next, tmp_cmodule) {
- if (cmodule->ctx) {
- free (cmodule->ctx);
- }
- free (cmodule);
+ if (cfg->message_filters_str) {
+ g_free (cfg->message_filters_str);
+ }
+ if (cfg->url_filters_str) {
+ g_free (cfg->url_filters_str);
}
g_hash_table_foreach (cfg->modules_opts, clean_hash_bucket, NULL);
@@ -208,25 +187,12 @@ free_config (struct config_file *cfg)
g_hash_table_unref (cfg->modules_opts);
g_hash_table_remove_all (cfg->variables);
g_hash_table_unref (cfg->variables);
-}
-
-int
-parse_script (char *str, struct script_param *param, enum script_type type)
-{
- char *cur_tok;
-
- bzero (param, sizeof (struct script_param));
- param->type = type;
-
- /* symbol:path:function -> cur_tok - symbol, str -> function */
- cur_tok = strsep (&str, ":");
-
- if (str == NULL || cur_tok == NULL || *cur_tok == '\0') return -1;
-
- param->symbol = strdup (cur_tok);
- param->function = strdup (str);
-
- return 0;
+ g_hash_table_remove_all (cfg->metrics);
+ g_hash_table_unref (cfg->metrics);
+ g_hash_table_remove_all (cfg->factors);
+ g_hash_table_unref (cfg->factors);
+ g_hash_table_remove_all (cfg->c_modules);
+ g_hash_table_unref (cfg->c_modules);
}
char*
@@ -391,6 +357,76 @@ substitute_all_variables (gpointer key, gpointer value, gpointer data)
var = substitute_variable (cfg, var, 1);
}
+static void
+parse_filters_str (struct config_file *cfg, const char *str, enum script_type type)
+{
+ gchar **strvec, **p;
+ struct filter *cur;
+ int i;
+
+ strvec = g_strsplit (str, ",", 0);
+ if (strvec == NULL) {
+ return;
+ }
+
+ p = strvec;
+ while (*p++) {
+ cur = NULL;
+ /* Search modules from known C modules */
+ for (i = 0; i < MODULES_NUM; i++) {
+ if (strcasecmp (modules[i].name, *p) == 0) {
+ cur = g_malloc (sizeof (struct filter));
+ cur->type = C_FILTER;
+ switch (type) {
+ case SCRIPT_HEADER:
+ cur->func_name = g_strdup (*p);
+ LIST_INSERT_HEAD (&cfg->header_filters, cur, next);
+ break;
+ case SCRIPT_MIME:
+ cur->func_name = g_strdup (*p);
+ LIST_INSERT_HEAD (&cfg->mime_filters, cur, next);
+ break;
+ case SCRIPT_MESSAGE:
+ cur->func_name = g_strdup (*p);
+ LIST_INSERT_HEAD (&cfg->message_filters, cur, next);
+ break;
+ case SCRIPT_URL:
+ cur->func_name = g_strdup (*p);
+ LIST_INSERT_HEAD (&cfg->url_filters, cur, next);
+ break;
+ }
+ break;
+ }
+ }
+ if (cur != NULL) {
+ /* Go to next iteration */
+ continue;
+ }
+ cur = g_malloc (sizeof (struct filter));
+ cur->type = PERL_FILTER;
+ switch (type) {
+ case SCRIPT_HEADER:
+ cur->func_name = g_strdup (*p);
+ LIST_INSERT_HEAD (&cfg->header_filters, cur, next);
+ break;
+ case SCRIPT_MIME:
+ cur->func_name = g_strdup (*p);
+ LIST_INSERT_HEAD (&cfg->mime_filters, cur, next);
+ break;
+ case SCRIPT_MESSAGE:
+ cur->func_name = g_strdup (*p);
+ LIST_INSERT_HEAD (&cfg->message_filters, cur, next);
+ break;
+ case SCRIPT_URL:
+ cur->func_name = g_strdup (*p);
+ LIST_INSERT_HEAD (&cfg->url_filters, cur, next);
+ break;
+ }
+ }
+
+ g_strfreev (strvec);
+}
+
/*
* Substitute all variables in strings
*/
@@ -399,6 +435,10 @@ post_load_config (struct config_file *cfg)
{
g_hash_table_foreach (cfg->variables, substitute_all_variables, cfg);
g_hash_table_foreach (cfg->modules_opts, substitute_module_variables, cfg);
+ parse_filters_str (cfg, cfg->header_filters_str, SCRIPT_HEADER);
+ parse_filters_str (cfg, cfg->mime_filters_str, SCRIPT_MIME);
+ parse_filters_str (cfg, cfg->message_filters_str, SCRIPT_MESSAGE);
+ parse_filters_str (cfg, cfg->url_filters_str, SCRIPT_URL);
}
/*
diff --git a/configure b/configure
index 8150e03d4..20a413419 100755
--- a/configure
+++ b/configure
@@ -21,7 +21,7 @@ YACC_OUTPUT="cfg_yacc.c"
LEX_OUTPUT="cfg_lex.c"
CONFIG="config.h"
-SOURCES="upstream.c cfg_utils.c memcached.c main.c util.c worker.c fstring.c url.c perl.c protocol.c mem_pool.c plugins/surbl.c ${LEX_OUTPUT} ${YACC_OUTPUT}"
+SOURCES="upstream.c cfg_utils.c memcached.c main.c util.c worker.c fstring.c url.c perl.c protocol.c mem_pool.c filter.c plugins/surbl.c ${LEX_OUTPUT} ${YACC_OUTPUT}"
MODULES="surbl"
CFLAGS="$CFLAGS -W -Wpointer-arith -Wno-unused-parameter"
@@ -30,7 +30,7 @@ CFLAGS="$CFLAGS -Wunused-value -ggdb -I${LOCALBASE}/include"
CFLAGS="$CFLAGS "
LDFLAGS="$LDFLAGS -L/usr/lib -L${LOCALBASE}/lib"
OPT_FLAGS="-O -pipe -fno-omit-frame-pointer"
-DEPS="config.h cfg_file.h memcached.h util.h main.h upstream.h fstring.h url.h perl.h mem_pool.h protocol.h ${LEX_OUTPUT} ${YACC_OUTPUT}"
+DEPS="config.h cfg_file.h memcached.h util.h main.h upstream.h fstring.h url.h perl.h mem_pool.h protocol.h filter.h ${LEX_OUTPUT} ${YACC_OUTPUT}"
EXEC=rspamd
USER=postfix
GROUP=postfix
diff --git a/filter.c b/filter.c
new file mode 100644
index 000000000..7fcd4a4f7
--- /dev/null
+++ b/filter.c
@@ -0,0 +1,43 @@
+#include <sys/types.h>
+#include <glib.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include "mem_pool.h"
+#include "filter.h"
+#include "main.h"
+#include "cfg_file.h"
+
+void
+insert_result (struct worker_task *task, const char *metric_name, const char *symbol, u_char flag)
+{
+ struct filter_result *result;
+ struct metric *metric;
+ struct metric_result *metric_res;
+
+ metric = g_hash_table_lookup (task->worker->srv->cfg->metrics, metric_name);
+ if (metric == NULL) {
+ return;
+ }
+
+ result = memory_pool_alloc (task->task_pool, sizeof (struct filter_result));
+ result->symbol = symbol;
+ result->flag = flag;
+ metric_res = g_hash_table_lookup (task->results, metric_name);
+
+ if (metric_res == NULL) {
+ /* Create new metric chain */
+ metric_res = memory_pool_alloc (task->task_pool, sizeof (struct metric_result));
+ LIST_INIT (&metric_res->results);
+ metric_res->metric = metric;
+ g_hash_table_insert (task->results, (gpointer)metric_name, metric_res);
+ }
+
+ LIST_INSERT_HEAD (&metric_res->results, result, next);
+}
+
+int
+process_filters (struct worker_task *task)
+{
+ /* TODO: Implement new logic of filters chains */
+}
diff --git a/filter.h b/filter.h
new file mode 100644
index 000000000..3eb97ac93
--- /dev/null
+++ b/filter.h
@@ -0,0 +1,46 @@
+#ifndef RSPAMD_FILTER_H
+#define RSPAMD_FILTER_H
+
+#include <sys/types.h>
+#ifndef HAVE_OWN_QUEUE_H
+#include <sys/queue.h>
+#else
+#include "queue.h"
+#endif
+#include <glib.h>
+
+struct worker_task;
+
+typedef void (*metric_cons_func)(struct worker_task *task, const char *metric_name);
+typedef void (*filter_func)(struct worker_task *task);
+
+enum filter_type { C_FILTER, PERL_FILTER };
+
+struct filter {
+ char *func_name;
+ enum filter_type type;
+ LIST_ENTRY (filter) next;
+};
+
+struct metric {
+ char *name;
+ char *func_name;
+ metric_cons_func func;
+ double required_score;
+};
+
+struct filter_result {
+ const char *symbol;
+ u_char flag;
+ LIST_ENTRY (filter_result) next;
+};
+
+struct metric_result {
+ struct metric *metric;
+ LIST_HEAD (resultq, filter_result) results;
+};
+
+int process_filters (struct worker_task *task);
+void insert_result (struct worker_task *task, const char *metric_name, const char *symbol, u_char flag);
+
+#endif
diff --git a/main.c b/main.c
index 0478599ce..c67ce419f 100644
--- a/main.c
+++ b/main.c
@@ -152,7 +152,7 @@ int
main (int argc, char **argv)
{
struct rspamd_main *rspamd;
- struct c_module *cur_module = NULL;
+ struct module_ctx *cur_module = NULL;
int res = 0, i, listen_sock;
struct sigaction signals;
struct rspamd_worker *cur, *cur_tmp, *active_worker;
@@ -227,10 +227,9 @@ main (int argc, char **argv)
/* Init C modules */
for (i = 0; i < MODULES_NUM; i ++) {
- cur_module = g_malloc (sizeof (struct c_module));
- cur_module->name = modules[i].name;
- if (modules[i].module_init_func(cfg, &cur_module->ctx) == 0) {
- LIST_INSERT_HEAD (&cfg->c_modules, cur_module, next);
+ cur_module = g_malloc (sizeof (struct module_ctx));
+ if (modules[i].module_init_func(cfg, &cur_module) == 0) {
+ g_hash_table_insert (cfg->c_modules, (gpointer)modules[i].name, cur_module);
}
}
diff --git a/main.h b/main.h
index e4f7fc600..94eceebcf 100644
--- a/main.h
+++ b/main.h
@@ -24,6 +24,7 @@
#include "url.h"
#include "memcached.h"
#include "protocol.h"
+#include "filter.h"
#include <glib.h>
#include <gmime/gmime.h>
@@ -32,6 +33,8 @@
#define FIXED_CONFIG_FILE "./rspamd.conf"
/* Time in seconds to exit for old worker */
#define SOFT_SHUTDOWN_TIME 60
+/* Default metric name */
+#define DEFAULT_METRIC "default"
/* Logging in postfix style */
#define msg_err g_error
@@ -59,8 +62,6 @@ struct rspamd_worker {
struct pidfh;
struct config_file;
-struct filter_chain;
-
/* Struct that determine main server object (for logging purposes) */
struct rspamd_main {
@@ -74,21 +75,6 @@ struct rspamd_main {
TAILQ_HEAD (workq, rspamd_worker) workers;
};
-struct filter_result {
- const char *symbol;
- struct filter_chain *chain;
- int mark;
- TAILQ_ENTRY (filter_result) next;
-};
-
-struct chain_result {
- struct filter_chain *chain;
- int *marks;
- unsigned int marks_num;
- int result_mark;
- TAILQ_ENTRY (chain_result) next;
-};
-
struct mime_part {
GMimeContentType *type;
GByteArray *content;
@@ -96,7 +82,6 @@ struct mime_part {
};
struct save_point {
- enum { C_FILTER, PERL_FILTER } save_type;
void *entry;
void *chain;
unsigned int saved;
@@ -133,10 +118,8 @@ struct worker_task {
TAILQ_HEAD (mime_partq, mime_part) parts;
/* URLs extracted from message */
TAILQ_HEAD (uriq, uri) urls;
- /* List of filter results */
- TAILQ_HEAD (resultsq, filter_result) results;
- /* Results of all chains */
- TAILQ_HEAD (chainsq, chain_result) chain_results;
+ /* Hash of metric result structures */
+ GHashTable *results;
struct config_file *cfg;
struct save_point save;
/* Memory pool that is associated with this task */
@@ -157,7 +140,6 @@ struct c_module {
};
void start_worker (struct rspamd_worker *worker, int listen_sock);
-int process_filters (struct worker_task *task);
#endif
diff --git a/plugins/surbl.c b/plugins/surbl.c
index c8a545ed1..d35ab6069 100644
--- a/plugins/surbl.c
+++ b/plugins/surbl.c
@@ -29,6 +29,7 @@
#define DEFAULT_REDIRECTOR_READ_TIMEOUT 5000
#define DEFAULT_SURBL_MAX_URLS 1000
#define DEFAULT_SURBL_URL_EXPIRE 86400
+#define DEFAULT_SURBL_SYMBOL "SURBL_DNS"
#define DEFAULT_SURBL_SUFFIX "multi.surbl.org"
struct surbl_ctx {
@@ -44,6 +45,8 @@ struct surbl_ctx {
unsigned int max_urls;
unsigned int url_expire;
char *suffix;
+ char *symbol;
+ char *metric;
GHashTable *hosters;
GHashTable *whitelist;
unsigned use_redirector:1;
@@ -144,6 +147,18 @@ surbl_module_init (struct config_file *cfg, struct module_ctx **ctx)
else {
surbl_module_ctx->suffix = DEFAULT_SURBL_SUFFIX;
}
+ if ((value = get_module_opt (cfg, "surbl", "symbol")) != NULL) {
+ surbl_module_ctx->symbol = value;
+ }
+ else {
+ surbl_module_ctx->symbol = DEFAULT_SURBL_SYMBOL;
+ }
+ if ((value = get_module_opt (cfg, "surbl", "metric")) != NULL) {
+ surbl_module_ctx->metric = value;
+ }
+ else {
+ surbl_module_ctx->metric = DEFAULT_METRIC;
+ }
surbl_module_ctx->hosters = g_hash_table_new (g_str_hash, g_str_equal);
surbl_module_ctx->whitelist = g_hash_table_new (g_str_hash, g_str_equal);
@@ -265,13 +280,14 @@ static void
dns_callback (int result, char type, int count, int ttl, void *addresses, void *data)
{
struct memcached_param *param = (struct memcached_param *)data;
- struct filter_result *res;
/* If we have result from DNS server, this url exists in SURBL, so increase score */
if (result != DNS_ERR_NONE || type != DNS_IPv4_A) {
msg_info ("surbl_check: url %s is in surbl %s", param->url->host, surbl_module_ctx->suffix);
- res = TAILQ_LAST (&param->task->results, resultsq);
- res->mark += surbl_module_ctx->weight;
+ insert_result (param->task, surbl_module_ctx->metric, surbl_module_ctx->symbol, 1);
+ }
+ else {
+ insert_result (param->task, surbl_module_ctx->metric, surbl_module_ctx->symbol, 0);
}
param->task->save.saved --;
@@ -292,7 +308,6 @@ memcached_callback (memcached_ctx_t *ctx, memc_error_t error, void *data)
{
struct memcached_param *param = (struct memcached_param *)data;
int *url_count;
- struct filter_result *res;
char *surbl_req;
switch (ctx->op) {
@@ -335,8 +350,7 @@ memcached_callback (memcached_ctx_t *ctx, memc_error_t error, void *data)
/* Do not check DNS for urls that have count more than max_urls */
if (*url_count > surbl_module_ctx->max_urls) {
msg_info ("memcached_callback: url '%s' has count %d, max: %d", struri (param->url), *url_count, surbl_module_ctx->max_urls);
- res = TAILQ_LAST (&param->task->results, resultsq);
- res->mark += surbl_module_ctx->weight;
+ insert_result (param->task, surbl_module_ctx->metric, surbl_module_ctx->symbol, 1);
}
(*url_count) ++;
memc_set (param->ctx, param->ctx->param, surbl_module_ctx->url_expire);
diff --git a/worker.c b/worker.c
index 9846a5736..0c67fe7ce 100644
--- a/worker.c
+++ b/worker.c
@@ -74,9 +74,6 @@ sigusr_handler (int fd, short what, void *arg)
static void
free_task (struct worker_task *task)
{
- struct uri *cur;
- struct filter_result *res;
- struct chain_result *chain_res;
struct mime_part *part;
if (task) {
@@ -86,18 +83,6 @@ free_task (struct worker_task *task)
if (task->memc_ctx) {
memc_close_ctx (task->memc_ctx);
}
- while (!TAILQ_EMPTY (&task->urls)) {
- cur = TAILQ_FIRST (&task->urls);
- TAILQ_REMOVE (&task->urls, cur, next);
- }
- while (!TAILQ_EMPTY (&task->results)) {
- res = TAILQ_FIRST (&task->results);
- TAILQ_REMOVE (&task->results, res, next);
- }
- while (!TAILQ_EMPTY (&task->chain_results)) {
- chain_res = TAILQ_FIRST (&task->chain_results);
- TAILQ_REMOVE (&task->chain_results, chain_res, next);
- }
while (!TAILQ_EMPTY (&task->parts)) {
part = TAILQ_FIRST (&task->parts);
@@ -175,187 +160,6 @@ mime_foreach_callback (GMimeObject *part, gpointer user_data)
}
}
-int
-process_filters (struct worker_task *task)
-{
- struct filter_result *res = NULL;
- struct chain_result *chain_res = NULL;
- struct c_module *c_filter = NULL;
- struct filter_chain *chain = NULL;
- struct script_param *perl_script = NULL;
- int i = 0;
-
- /* First process C modules */
- if (task->save.saved > 0) {
- if (task->save.save_type == C_FILTER) {
- task->save.saved = 0;
- c_filter = (struct c_module *)task->save.entry;
- }
- else if (task->save.save_type == PERL_FILTER) {
- chain = (struct filter_chain *)task->save.chain;
- perl_script = (struct script_param *)task->save.entry;
- task->save.saved = 0;
- }
- }
- else {
- c_filter = LIST_FIRST (&task->cfg->c_modules);
- chain = LIST_FIRST (&task->cfg->filters);
- if (chain) {
- perl_script = LIST_FIRST (chain->scripts);
- }
- }
- while (c_filter != NULL) {
- res = memory_pool_alloc (task->task_pool, sizeof (struct filter_result));
- if (res == NULL) {
- msg_err ("process_filters: malloc failed, %m");
- return -1;
- }
- res->chain = NULL;
- res->symbol = c_filter->name;
- res->mark = 0;
- if (c_filter->ctx->header_filter != NULL) {
- res->mark += c_filter->ctx->header_filter (task);
- if (task->save.saved > 0) {
- TAILQ_INSERT_TAIL (&task->results, res, next);
- task->save.save_type = C_FILTER;
- goto save_point;
- }
- }
- if (c_filter->ctx->message_filter != NULL) {
- res->mark += c_filter->ctx->message_filter (task);
- if (task->save.saved > 0) {
- TAILQ_INSERT_TAIL (&task->results, res, next);
- task->save.save_type = C_FILTER;
- goto save_point;
- }
- }
- if (c_filter->ctx->mime_filter != NULL) {
- res->mark += c_filter->ctx->mime_filter (task);
- if (task->save.saved > 0) {
- TAILQ_INSERT_TAIL (&task->results, res, next);
- task->save.save_type = C_FILTER;
- goto save_point;
- }
- }
- if (c_filter->ctx->url_filter != NULL) {
- res->mark += c_filter->ctx->url_filter (task);
- if (task->save.saved > 0) {
- TAILQ_INSERT_TAIL (&task->results, res, next);
- task->save.save_type = C_FILTER;
- goto save_point;
- }
- }
- TAILQ_INSERT_TAIL (&task->results, res, next);
- c_filter = LIST_NEXT (c_filter, next);
- }
-
- /* Process perl chains */
- while (chain != NULL) {
- chain_res = memory_pool_alloc (task->task_pool, sizeof (struct chain_result));
- if (chain_res == NULL) {
- msg_err ("process_filters: malloc failed, %m");
- return -1;
- }
- i = 0;
- chain_res->chain = chain;
- chain_res->marks = memory_pool_alloc (task->task_pool, sizeof (int) * chain->scripts_number);
- chain_res->result_mark = 0;
- if (chain_res->marks == NULL) {
- free (chain_res);
- msg_err ("process_filters: malloc failed, %m");
- return -1;
- }
- while (perl_script != NULL) {
- if (perl_script->type == SCRIPT_CHAIN) {
- /* Skip chain filters first */
- continue;
- }
- res = memory_pool_alloc (task->task_pool, sizeof (struct filter_result));
- if (res == NULL) {
- msg_err ("process_filters: malloc failed, %m");
- return -1;
- }
- res->chain = chain;
- res->symbol = perl_script->symbol;
- res->mark = 0;
- switch (perl_script->type) {
- case SCRIPT_HEADER:
- res->mark += perl_call_header_filter (perl_script->function, task);
- if (task->save.saved > 0) {
- TAILQ_INSERT_TAIL (&task->results, res, next);
- task->save.save_type = PERL_FILTER;
- goto save_point;
- }
- break;
- case SCRIPT_MESSAGE:
- res->mark += perl_call_message_filter (perl_script->function, task);
- if (task->save.saved > 0) {
- TAILQ_INSERT_TAIL (&task->results, res, next);
- task->save.save_type = PERL_FILTER;
- goto save_point;
- }
- break;
- case SCRIPT_MIME:
- res->mark += perl_call_mime_filter (perl_script->function, task);
- if (task->save.saved > 0) {
- TAILQ_INSERT_TAIL (&task->results, res, next);
- task->save.save_type = PERL_FILTER;
- goto save_point;
- }
- break;
- case SCRIPT_URL:
- res->mark += perl_call_url_filter (perl_script->function, task);
- if (task->save.saved > 0) {
- TAILQ_INSERT_TAIL (&task->results, res, next);
- task->save.save_type = PERL_FILTER;
- goto save_point;
- }
- break;
- }
- TAILQ_INSERT_TAIL (&task->results, res, next);
- chain_res->marks[i++] = res->mark;
- perl_script = LIST_NEXT (perl_script, next);
- }
- chain_res->marks_num = i;
- TAILQ_INSERT_TAIL (&task->chain_results, chain_res, next);
- chain = LIST_NEXT (chain, next);
- }
-
- /* Now process chain results */
- TAILQ_FOREACH (chain_res, &task->chain_results, next) {
- i = 0;
- LIST_FOREACH (perl_script, chain_res->chain->scripts, next) {
- if (perl_script->type != SCRIPT_CHAIN) {
- /* Skip not chain filters */
- continue;
- }
- /* Increment i; if i would be equal to zero that would mean that this chain has no chain filter script */
- i ++;
- chain_res->result_mark += perl_call_url_filter (perl_script->function, task, chain_res->marks, chain_res->marks_num);
- }
- /* If chain has no chain filter, just do addition of all marks */
- if (i == 0) {
- for (i = 0; i < chain_res->marks_num; i++) {
- chain_res->result_mark += chain_res->marks[i];
- }
- }
- }
-
- task->state = WRITE_REPLY;
- bufferevent_enable (task->bev, EV_WRITE);
- return 0;
-
-save_point:
- if (task->save.save_type == C_FILTER) {
- task->save.entry = LIST_NEXT (c_filter, next);
- }
- else if (task->save.save_type == PERL_FILTER) {
- task->save.chain = LIST_NEXT (chain, next);
- task->save.entry = LIST_NEXT (perl_script, next);
- }
- return 1;
-}
-
static int
process_message (struct worker_task *task)
{
@@ -493,7 +297,6 @@ accept_socket (int fd, short what, void *arg)
new_task->parts_count = 0;
new_task->cfg = worker->srv->cfg;
TAILQ_INIT (&new_task->urls);
- TAILQ_INIT (&new_task->results);
TAILQ_INIT (&new_task->parts);
new_task->task_pool = memory_pool_new (TASK_POOL_SIZE);
new_task->memc_ctx = memory_pool_alloc (new_task->task_pool, sizeof (memcached_ctx_t));