From: cebka@mailsupport.rambler.ru Date: Tue, 23 Sep 2008 14:12:36 +0000 (+0400) Subject: * Rework logic of filters X-Git-Tag: 0.2.7~370 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=ffe2b54bd024bb45fe7ee182d452ad61283df8ca;p=rspamd.git * Rework logic of filters * Add some documentation to rspamd (about new filters logic and about protocol) --- diff --git a/README.koi8.txt b/README.koi8.txt new file mode 100644 index 000000000..fdecd4132 --- /dev/null +++ b/README.koi8.txt @@ -0,0 +1,82 @@ +API Rspamd. +=========== + +TODO. + +ìÏÇÉËÁ ÒÁÂÏÔÙ ÆÉÌØÔÒÏ× rspamd. +============================== + +1) ÷ÓÅ ÆÉÌØÔÒÙ ÒÅÇÉÓÔÒÉÒÕÀÔÓÑ × ËÏÎÆÉÇ ÆÁÊÌÅ × ÏÐÉÓÁÎÉÉ ÃÅÐÏÞÅË ÆÉÌØÔÒÏ×: +header_filters=regexp,my_func +ÇÄÅ ÉÍÑ ÆÉÌØÔÒÁ - ÜÔÏ ÌÉÂÏ ÎÁÚ×ÁÎÉÅ c ÍÏÄÕÌÑ, ÌÉÂÏ ÎÁÚ×ÁÎÉÅ ÐÅÒÌ ÆÕÎËÃÉÉ +ôÉÐÙ ÆÉÌØÔÒÏ×: +* header_filters - ÆÉÌØÔÒ ÚÁÇÏÌÏ×ËÏ× +* mime_filters - ÆÉÌØÔÒ ÄÌÑ ËÁÖÄÏÊ mime ÞÁÓÔÉ +* message_filters - ÆÉÌØÔÒ ×ÓÅÇÏ ÓÏÏÂÝÅÎÉÑ ÃÅÌÉËÏÍ +* url_filters - ÆÉÌØÔÒÙ URL ÓÓÙÌÏË +ó×ÏÉ ÒÅÚÕÌØÔÁÔÙ ÆÉÌØÔÒÙ ÒÅÇÉÓÔÒÉÒÕÀÔ ÐÒÉ ÐÏÍÏÝÉ ÄÏÂÁ×ÌÅÎÉÑ ÒÅÚÕÌØÔÁÔÁ × ÍÅÔÒÉËÕ. + +2) íÅÔÒÉËÁ - ÜÔÏ ÓÉÍ×ÏÌØÎÏÅ ÚÎÁÞÅÎÉÅ, × ËÏÔÏÒÏÍ ÒÅÇÉÓÔÒÉÒÕÀÔ Ó×ÏÉ ÒÅÚÕÌØÔÁÔÙ ÆÉÌØÔÒÙ +óÕÝÅÓÔ×ÕÅÔ ÍÅÔÒÉËÁ ÐÏ ÕÍÏÌÞÁÎÉÀ - "default", × ËÏÔÏÒÏÊ ÒÅÇÉÓÔÒÉÒÕÀÔ ÒÅÚÕÌØÔÁÔÙ ÆÉÌØÔÒÙ, +ÄÌÑ ËÏÔÏÒÙÈ Ñ×ÎÏ ÎÅ ÏÐÒÅÄÅÌÅÎÁ ÍÅÔÒÉËÁ +äÌÑ ËÁÖÄÏÊ ÍÅÔÒÉËÉ ÓÕÝÅÓÔ×ÕÅÔ ÓÐÅÃÉÁÌØÎÁÑ ÆÕÎËÃÉÑ ËÏÎÓÏÌÉÄÁÃÉÉ, ËÏÔÏÒÁÑ ÒÁÓÓÞÉÔÙ×ÁÅÔ ËÏÜÆÆÉÃÉÅÎÔÙ +ÒÅÚÕÌØÔÁÔÏ× ÓÏÇÌÁÓÎÏ ×ÎÕÔÒÅÎÎÅÊ ÌÏÇÉËÅ ÓÏÏÔ×ÅÔÓÔ×ÉÑ ÓÉÍ×ÏÌÏ× É ËÏÜÆÆÉÃÉÅÎÔÏ×. ðÏ ÕÍÏÌÞÁÎÉÀ ÔÁËÏÊ +ÆÕÎËÃÉÅÊ Ñ×ÌÑÅÔÓÑ ÐÒÏÓÔÁÑ ÓÕÍÍÁ, ËÏÔÏÒÁÑ ÎÁÓÔÒÁÉ×ÁÅÔÓÑ ÏÓÏÂÙÍ ÏÂÒÁÚÏÍ × ËÏÎÆÉÇÕÒÁÃÉÏÎÎÏÍ ÆÁÊÌÅ: + +# âÌÏË factors +factors { + # îÁÐÒÉÍÅÒ, "SURBL_DNS"=5.0 + "SYMBOL_NAME" = coefficient; +}; +ôÁËÖÅ ÄÌÑ ÍÅÔÒÉËÉ ÍÏÖÎÏ ÚÁÒÅÇÉÓÔÒÉÒÏ×ÁÔØ ÏÓÏÂÕÀ ÆÕÎËÃÉÀ, ÐÒÏÐÉÓÁ× × ÏÐÉÓÁÎÉÉ ÍÅÔÒÉËÉ +metric { + name = "test_metric"; + function = "some_function"; + required_score = 20.0; +}; +ðÏËÁ ÐÏÄÄÅÒÖÉ×ÁÀÔÓÑ ÔÏÌØËÏ ÐÅÒÌÏ×ÙÅ ÆÕÎËÃÉÉ. + +3) òÅÚÕÌØÔÁÔ - ÜÔÏ ÐÁÒÁ ÚÎÁÞÅÎÉÊ: SYMBOL:FLAG, ÐÒÉ ÜÔÏÍ, SYMBOL - ÜÔÏ +ÓÔÒÏÞËÁ, ÈÁÒÁËÔÅÒÉÚÕÀÝÁÑ ÒÅÚÕÌØÔÁÔ, Á FLAG - ÓÒÁÂÏÔÁÌ ÄÁÎÎÙÊ ÆÉÌØÔÒ ÉÌÉ ÎÅÔ +(1 ÉÌÉ 0). òÅÚÕÌØÔÁÔ ÄÏÂÁ×ÌÑÅÔÓÑ × ÍÅÔÒÉËÕ, ÐÏÓÌÅ ÞÅÇÏ ÐÅÒÅÄÁÅÔÓÑ ÆÕÎËÃÉÉ ËÏÎÓÏÌÉÄÁÃÉÉ. + +4) éÔÏÇ - ÎÁ ×ÙÈÏÄÅ ÍÙ ÉÍÅÅÍ ÏÂÒÁÂÏÔÁÎÎÏÅ ÓÏÏÂÝÅÎÉÅ, ÓÐÉÓÏË ÍÅÔÒÉË É ÉÈ ÓÉÍ×ÏÌÏ× É ÒÅÚÕÌØÔÁÔÙ +ÐÒÏ×ÅÒËÉ. + +ðÒÏÔÏËÏÌ. +========= + +æÏÒÍÁÔ ÏÔ×ÅÔÁ: +SPAMD/1.1 0 EX_OK +\ / \/ + ÷ÅÒÓÉÑ ëÏÄ + ÏÛÉÂËÉ +Spam: False ; 2 / 5 +üÔÏ ÆÏÒÍÁÔ ÓÏ×ÍÅÓÔÉÍÏÓÔÉ Ó sa-spamd (ÂÅÚ ÍÅÔÒÉË) + +îÏ×ÙÊ ÆÏÒÍÁÔ ÏÔ×ÅÔÁ: +RSPAMD/1.0 0 EX_OK +Metric: Name ; Spam_Result ; Spam_Mark / Spam_Mark_Required +Metric: Name2 ; Spam_Result2 ; Spam_Mark2 / Spam_Mark_Required2 + +úÁÇÏÌÏ×ËÏ× ÔÉÐÁ metric ÍÏÖÅÔ ÂÙÔØ ÎÅÓËÏÌØËÏ. +æÏÒÍÁÔ ×Ù×ÏÄÁ ÓÉÍ×ÏÌÏ×: +SYMBOL1, SYMBOL2, SYMBOL3 -- ÆÏÒÍÁÔ ÓÏ×ÍÅÓÔÉÍÏÓÔÉ Ó sa-spamd +Metric: SYMBOL1, SYMBOL2, SYMBOL3 -- ÆÏÒÍÁÔ rspamd + +æÏÒÍÁÔ ÏÔ×ÅÔÁ ÚÁ×ÉÓÉÔ ÏÔ ÆÏÒÍÁÔÁ ÚÁÐÒÏÓÁ: +PROCESS SPAMC/1.2 +\ / \ / +ëÏÍÁÎÄÁ ÷ÅÒÓÉÑ + +SPAMC - ÐÒÏÔÏËÏÌ ÓÏ×ÍÅÓÔÉÍÏÓÔÉ Ó sa-spamd +RSPAMC - ÎÏ×ÙÊ ÐÒÏÔÏËÏÌ rspamd +÷ ÌÀÂÏÍ ÉÚ ÒÅÖÉÍÏ× ÒÁÂÏÔÙ ÐÏÄÄÅÒÖÉ×ÁÀÔÓÑ ÓÌÅÄÕÀÝÉÅ ÚÁÇÏÌÏ×ËÉ: +Content-Length - ÄÌÉÎÁ ÓÏÏÂÝÅÎÉÑ +Helo - HELO, ÐÏÌÕÞÅÎÎÙÊ ÏÔ ËÌÉÅÎÔÁ +From - MAIL FROM +IP - IP ËÌÉÅÎÔÁ +Recipient-Number - ÞÉÓÌÏ ÒÅÃÉÐÉÅÎÔÏ× +Rcpt - ÒÅÃÉÐÉÅÎÔ + +üÔÉ ÚÎÁÞÅÎÉÑ ÍÏÇÕÔ ÉÓÐÏÌØÚÏ×ÁÔØÓÑ × ÆÉÌØÔÒÁÈ rspamd. diff --git a/cfg_file.h b/cfg_file.h index 1a211e680..592a0eb27 100644 --- a/cfg_file.h +++ b/cfg_file.h @@ -19,6 +19,7 @@ #include #include "upstream.h" #include "memcached.h" +#include "filter.h" #define DEFAULT_BIND_PORT 768 #define MAX_MEMCACHED_SERVERS 48 @@ -53,7 +54,6 @@ enum script_type { SCRIPT_MIME, SCRIPT_URL, SCRIPT_MESSAGE, - SCRIPT_CHAIN, }; struct memcached_server { @@ -69,20 +69,6 @@ struct perl_module { LIST_ENTRY (perl_module) next; }; -struct script_param { - char *symbol; - char *function; - enum script_type type; - LIST_ENTRY (script_param) next; -}; - -struct filter_chain { - unsigned int metric; - unsigned int scripts_number; - LIST_HEAD (scriptq, script_param) *scripts; - LIST_ENTRY (filter_chain) next; -}; - struct module_opt { char *param; char *value; @@ -110,18 +96,26 @@ struct config_file { unsigned int memcached_maxerrors; unsigned int memcached_connect_timeout; - LIST_HEAD (perlq, filter_chain) filters; LIST_HEAD (modulesq, perl_module) perl_modules; - LIST_HEAD (cmodulesq, c_module) c_modules; + LIST_HEAD (headersq, filter) header_filters; + LIST_HEAD (mimesq, filter) mime_filters; + LIST_HEAD (messagesq, filter) message_filters; + LIST_HEAD (urlsq, filter) url_filters; + char *header_filters_str; + char *mime_filters_str; + char *message_filters_str; + char *url_filters_str; GHashTable* modules_opts; GHashTable* variables; + GHashTable* metrics; + GHashTable* factors; + GHashTable* c_modules; }; int add_memcached_server (struct config_file *cf, char *str); int parse_bind_line (struct config_file *cf, char *str); void init_defaults (struct config_file *cfg); void free_config (struct config_file *cfg); -int parse_script (char *str, struct script_param *param, enum script_type type); char* get_module_opt (struct config_file *cfg, char *module_name, char *opt_name); size_t parse_limit (const char *limit); unsigned int parse_seconds (const char *t); diff --git a/cfg_file.l b/cfg_file.l index fd5f3bc7f..3cb8441bd 100644 --- a/cfg_file.l +++ b/cfg_file.l @@ -37,14 +37,15 @@ memcached return MEMCACHED; bind_socket return BINDSOCK; servers return SERVERS; require return REQUIRE; -module return MODULE; -filter return FILTER; +header_filters return HEADER_FILTERS; +mime_filters return MIME_FILTERS; +message_filters return MESSAGE_FILTERS; +url_filters return URL_FILTERS; +factors return FACTORS; metric return METRIC; -script_header return SCRIPT_HEADER; -script_mime return SCRIPT_MIME; -script_message return SCRIPT_MESSAGE; -script_url return SCRIPT_URL; -script_chain return SCRIPT_CHAIN; +name return NAME; +required_score return REQUIRED_SCORE; +function return FUNCTION; \{ return OBRACE; \} return EBRACE; @@ -58,12 +59,12 @@ yes|YES|no|NO|[yY]|[nN] yylval.flag=parse_flag(yytext); return FLAG; \" return QUOTE; \$[a-zA-Z_][a-zA-Z0-9_]+ yylval.string=strdup(yytext + 1); return VARIABLE; [0-9]+ yylval.number=strtol(yytext, NULL, 10); return NUMBER; +-?[0-9]+\.?[0-9]* yylval.fract=strtod(yytext, NULL); return FRACT; [0-9]+[kKmMgG]? yylval.limit=parse_limit(yytext); return SIZELIMIT; [0-9]+[sS]|[0-9]+[mM][sS] yylval.seconds=parse_seconds(yytext); return SECONDS; [0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3} yylval.string=strdup(yytext); return IPADDR; [0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\/[0-9]{1,2} yylval.string=strdup(yytext); return IPNETWORK; [a-zA-Z0-9.-]+:[0-9]{1,5} yylval.string=strdup(yytext); return HOSTPORT; -[a-zA-Z0-9]+:[a-zA-Z0-9_:]+ yylval.string=strdup(yytext); return SCRIPT_PARAM; [a-zA-Z<][a-zA-Z@+>_-]* yylval.string=strdup(yytext); return STRING; \/[^/\n]+\/ yylval.string=strdup(yytext); return REGEXP; [a-zA-Z0-9].[a-zA-Z0-9\/.-]+ yylval.string=strdup(yytext); return DOMAIN; diff --git a/cfg_file.y b/cfg_file.y index fb31c4186..9ba219614 100644 --- a/cfg_file.y +++ b/cfg_file.y @@ -24,9 +24,8 @@ extern struct config_file *cfg; extern int yylineno; extern char *yytext; -struct scriptq *cur_scripts; -unsigned int cur_scripts_num = 0; LIST_HEAD (moduleoptq, module_opt) *cur_module_opt = NULL; +struct metric *cur_metric = NULL; %} @@ -37,7 +36,7 @@ LIST_HEAD (moduleoptq, module_opt) *cur_module_opt = NULL; char flag; unsigned int seconds; unsigned int number; - struct script_param *param; + double fract; } %token ERROR STRING QUOTEDSTRING FLAG @@ -47,8 +46,9 @@ LIST_HEAD (moduleoptq, module_opt) *cur_module_opt = NULL; %token TEMPDIR PIDFILE SERVERS ERROR_TIME DEAD_TIME MAXERRORS CONNECT_TIMEOUT PROTOCOL RECONNECT_TIMEOUT %token READ_SERVERS WRITE_SERVER DIRECTORY_SERVERS MAILBOX_QUERY USERS_QUERY LASTLOGIN_QUERY %token MEMCACHED WORKERS REQUIRE MODULE -%token FILTER METRIC SCRIPT_HEADER SCRIPT_MIME SCRIPT_MESSAGE SCRIPT_URL SCRIPT_CHAIN SCRIPT_PARAM %token MODULE_OPT PARAM VARIABLE +%token HEADER_FILTERS MIME_FILTERS MESSAGE_FILTERS URL_FILTERS FACTORS METRIC NAME +%token REQUIRED_SCORE FUNCTION FRACT %type STRING %type VARIABLE @@ -58,14 +58,12 @@ LIST_HEAD (moduleoptq, module_opt) *cur_module_opt = NULL; %type IPADDR IPNETWORK %type HOSTPORT %type DOMAIN -%type SCRIPT_PARAM %type SIZELIMIT %type FLAG %type SECONDS %type NUMBER %type memcached_hosts bind_cred -%type metric -%type filter_param +%type FRACT %% file : /* empty */ @@ -79,9 +77,14 @@ command : | memcached | workers | require - | filter + | header_filters + | mime_filters + | message_filters + | url_filters | module_opt | variable + | factors + | metric ; tempdir : @@ -134,6 +137,30 @@ bind_cred: } ; +header_filters: + HEADER_FILTERS EQSIGN QUOTEDSTRING { + cfg->header_filters_str = g_strdup ($3); + } + ; + +mime_filters: + MIME_FILTERS EQSIGN QUOTEDSTRING { + cfg->mime_filters_str = g_strdup ($3); + } + ; + +message_filters: + MESSAGE_FILTERS EQSIGN QUOTEDSTRING { + cfg->message_filters_str = g_strdup ($3); + } + ; + +url_filters: + URL_FILTERS EQSIGN QUOTEDSTRING { + cfg->url_filters_str = g_strdup ($3); + } + ; + memcached: MEMCACHED OBRACE memcachedbody EBRACE ; @@ -217,140 +244,80 @@ workers: } ; -filter: - FILTER OBRACE filterbody EBRACE - ; - -filterbody: - metric SEMICOLON filter_chain { - struct filter_chain *cur_chain; - cur_chain = (struct filter_chain *) g_malloc (sizeof (struct filter_chain)); - if (cur_chain == NULL) { - yyerror ("yyparse: g_malloc: %s", strerror (errno)); +metric: + METRIC OBRACE metricbody EBRACE { + if (cur_metric == NULL || cur_metric->name == NULL) { + yyerror ("yyparse: not enough arguments in metric definition"); YYERROR; } - - cur_chain->metric = $1; - cur_chain->scripts = cur_scripts; - cur_chain->scripts_number = cur_scripts_num; - LIST_INSERT_HEAD (&cfg->filters, cur_chain, next); - + g_hash_table_insert (cfg->metrics, cur_metric->name, cur_metric); + cur_metric = NULL; } ; -metric: - METRIC EQSIGN NUMBER { - $$ = $3; - } +metricbody: + | metriccmd SEMICOLON + | metricbody metriccmd SEMICOLON ; - -filter_chain: - filter_param SEMICOLON { - cur_scripts = (struct scriptq *)g_malloc (sizeof (struct scriptq)); - if (cur_scripts == NULL) { - yyerror ("yyparse: g_malloc: %s", strerror (errno)); - YYERROR; - } - LIST_INIT (cur_scripts); - if ($1 == NULL) { - yyerror ("yyparse: g_malloc: %s", strerror(errno)); - YYERROR; - } - LIST_INSERT_HEAD (cur_scripts, $1, next); - cur_scripts_num = 1; - } - | filter_chain filter_param SEMICOLON { - if ($2 == NULL) { - yyerror ("yyparse: g_malloc: %s", strerror(errno)); - YYERROR; +metriccmd: + | metricname + | metricfunction + | metricscore + ; + +metricname: + NAME EQSIGN QUOTEDSTRING { + if (cur_metric == NULL) { + cur_metric = g_malloc (sizeof (struct metric)); } - LIST_INSERT_HEAD (cur_scripts, $2, next); - cur_scripts_num ++; + cur_metric->name = g_strdup ($3); } ; -filter_param: - SCRIPT_HEADER EQSIGN SCRIPT_PARAM { - struct script_param *cur; - - cur = g_malloc (sizeof (struct script_param)); - if (cur == NULL) { - yyerror ("yyparse: g_malloc: %s", strerror(errno)); - YYERROR; +metricfunction: + FUNCTION EQSIGN QUOTEDSTRING { + if (cur_metric == NULL) { + cur_metric = g_malloc (sizeof (struct metric)); } - if (parse_script ($3, cur, SCRIPT_HEADER) == -1) { - yyerror ("yyparse: cannot parse filter param %s", $3); - YYERROR; - } - - $$ = cur; - free ($3); + cur_metric->func_name = g_strdup ($3); } - | SCRIPT_MIME EQSIGN SCRIPT_PARAM { - struct script_param *cur; + ; - cur = g_malloc (sizeof (struct script_param)); - if (cur == NULL) { - yyerror ("yyparse: g_malloc: %s", strerror(errno)); - YYERROR; +metricscore: + REQUIRED_SCORE EQSIGN NUMBER { + if (cur_metric == NULL) { + cur_metric = g_malloc (sizeof (struct metric)); } - if (parse_script ($3, cur, SCRIPT_MIME) == -1) { - yyerror ("yyparse: cannot parse filter param %s", $3); - YYERROR; - } - - $$ = cur; - free ($3); + cur_metric->required_score = $3; } - | SCRIPT_MESSAGE EQSIGN SCRIPT_PARAM { - struct script_param *cur; - - cur = g_malloc (sizeof (struct script_param)); - if (cur == NULL) { - yyerror ("yyparse: g_malloc: %s", strerror(errno)); - YYERROR; - } - if (parse_script ($3, cur, SCRIPT_MESSAGE) == -1) { - yyerror ("yyparse: cannot parse filter param %s", $3); - YYERROR; + | REQUIRED_SCORE EQSIGN FRACT { + if (cur_metric == NULL) { + cur_metric = g_malloc (sizeof (struct metric)); } - - $$ = cur; - free ($3); + cur_metric->required_score = $3; } - | SCRIPT_URL EQSIGN SCRIPT_PARAM { - struct script_param *cur; - - cur = g_malloc (sizeof (struct script_param)); - if (cur == NULL) { - yyerror ("yyparse: g_malloc: %s", strerror(errno)); - YYERROR; - } - if (parse_script ($3, cur, SCRIPT_URL) == -1) { - yyerror ("yyparse: cannot parse filter param %s", $3); - YYERROR; - } + ; - $$ = cur; - free ($3); - } - | SCRIPT_CHAIN EQSIGN SCRIPT_PARAM { - struct script_param *cur; +factors: + FACTORS OBRACE factorsbody EBRACE + ; - cur = g_malloc (sizeof (struct script_param)); - if (cur == NULL) { - yyerror ("yyparse: g_malloc: %s", strerror(errno)); - YYERROR; - } - if (parse_script ($3, cur, SCRIPT_CHAIN) == -1) { - yyerror ("yyparse: cannot parse filter param %s", $3); - YYERROR; - } +factorsbody: + factorparam SEMICOLON + | factorsbody factorparam SEMICOLON + ; - $$ = cur; - free ($3); +factorparam: + QUOTEDSTRING EQSIGN FRACT { + double *tmp = g_malloc (sizeof (double)); + *tmp = $3; + g_hash_table_insert (cfg->factors, $1, tmp); } - ; + | QUOTEDSTRING EQSIGN NUMBER { + double *tmp = g_malloc (sizeof (double)); + *tmp = $3; + g_hash_table_insert (cfg->factors, $1, tmp); + }; require: REQUIRE OBRACE requirebody EBRACE diff --git a/cfg_utils.c b/cfg_utils.c index a00a075b3..a2a783cc2 100644 --- a/cfg_utils.c +++ b/cfg_utils.c @@ -150,20 +150,16 @@ init_defaults (struct config_file *cfg) cfg->workers_number = DEFAULT_WORKERS_NUM; cfg->modules_opts = g_hash_table_new (g_str_hash, g_str_equal); cfg->variables = g_hash_table_new (g_str_hash, g_str_equal); + cfg->metrics = g_hash_table_new (g_str_hash, g_str_equal); + cfg->factors = g_hash_table_new (g_str_hash, g_str_equal); + cfg->c_modules = g_hash_table_new (g_str_hash, g_str_equal); - LIST_INIT (&cfg->filters); LIST_INIT (&cfg->perl_modules); - LIST_INIT (&cfg->c_modules); } void free_config (struct config_file *cfg) { - struct filter_chain *chain, *tmp_chain; - struct script_param *param, *tmp_param; - struct perl_module *module, *tmp_module; - struct c_module *cmodule, *tmp_cmodule; - if (cfg->pid_file) { g_free (cfg->pid_file); } @@ -173,34 +169,17 @@ free_config (struct config_file *cfg) if (cfg->bind_host) { g_free (cfg->bind_host); } - - LIST_FOREACH_SAFE (chain, &cfg->filters, next, tmp_chain) { - LIST_FOREACH_SAFE (param, chain->scripts, next, tmp_param) { - if (param->symbol) { - free (param->symbol); - } - if (param->function) { - free (param->function); - } - LIST_REMOVE (param, next); - free (param); - } - LIST_REMOVE (chain, next); - free (chain); + if (cfg->header_filters_str) { + g_free (cfg->header_filters_str); } - LIST_FOREACH_SAFE (module, &cfg->perl_modules, next, tmp_module) { - if (module->path) { - free (module->path); - } - LIST_REMOVE (module, next); - free (module); + if (cfg->mime_filters_str) { + g_free (cfg->mime_filters_str); } - - LIST_FOREACH_SAFE (cmodule, &cfg->c_modules, next, tmp_cmodule) { - if (cmodule->ctx) { - free (cmodule->ctx); - } - free (cmodule); + if (cfg->message_filters_str) { + g_free (cfg->message_filters_str); + } + if (cfg->url_filters_str) { + g_free (cfg->url_filters_str); } g_hash_table_foreach (cfg->modules_opts, clean_hash_bucket, NULL); @@ -208,25 +187,12 @@ free_config (struct config_file *cfg) g_hash_table_unref (cfg->modules_opts); g_hash_table_remove_all (cfg->variables); g_hash_table_unref (cfg->variables); -} - -int -parse_script (char *str, struct script_param *param, enum script_type type) -{ - char *cur_tok; - - bzero (param, sizeof (struct script_param)); - param->type = type; - - /* symbol:path:function -> cur_tok - symbol, str -> function */ - cur_tok = strsep (&str, ":"); - - if (str == NULL || cur_tok == NULL || *cur_tok == '\0') return -1; - - param->symbol = strdup (cur_tok); - param->function = strdup (str); - - return 0; + g_hash_table_remove_all (cfg->metrics); + g_hash_table_unref (cfg->metrics); + g_hash_table_remove_all (cfg->factors); + g_hash_table_unref (cfg->factors); + g_hash_table_remove_all (cfg->c_modules); + g_hash_table_unref (cfg->c_modules); } char* @@ -391,6 +357,76 @@ substitute_all_variables (gpointer key, gpointer value, gpointer data) var = substitute_variable (cfg, var, 1); } +static void +parse_filters_str (struct config_file *cfg, const char *str, enum script_type type) +{ + gchar **strvec, **p; + struct filter *cur; + int i; + + strvec = g_strsplit (str, ",", 0); + if (strvec == NULL) { + return; + } + + p = strvec; + while (*p++) { + cur = NULL; + /* Search modules from known C modules */ + for (i = 0; i < MODULES_NUM; i++) { + if (strcasecmp (modules[i].name, *p) == 0) { + cur = g_malloc (sizeof (struct filter)); + cur->type = C_FILTER; + switch (type) { + case SCRIPT_HEADER: + cur->func_name = g_strdup (*p); + LIST_INSERT_HEAD (&cfg->header_filters, cur, next); + break; + case SCRIPT_MIME: + cur->func_name = g_strdup (*p); + LIST_INSERT_HEAD (&cfg->mime_filters, cur, next); + break; + case SCRIPT_MESSAGE: + cur->func_name = g_strdup (*p); + LIST_INSERT_HEAD (&cfg->message_filters, cur, next); + break; + case SCRIPT_URL: + cur->func_name = g_strdup (*p); + LIST_INSERT_HEAD (&cfg->url_filters, cur, next); + break; + } + break; + } + } + if (cur != NULL) { + /* Go to next iteration */ + continue; + } + cur = g_malloc (sizeof (struct filter)); + cur->type = PERL_FILTER; + switch (type) { + case SCRIPT_HEADER: + cur->func_name = g_strdup (*p); + LIST_INSERT_HEAD (&cfg->header_filters, cur, next); + break; + case SCRIPT_MIME: + cur->func_name = g_strdup (*p); + LIST_INSERT_HEAD (&cfg->mime_filters, cur, next); + break; + case SCRIPT_MESSAGE: + cur->func_name = g_strdup (*p); + LIST_INSERT_HEAD (&cfg->message_filters, cur, next); + break; + case SCRIPT_URL: + cur->func_name = g_strdup (*p); + LIST_INSERT_HEAD (&cfg->url_filters, cur, next); + break; + } + } + + g_strfreev (strvec); +} + /* * Substitute all variables in strings */ @@ -399,6 +435,10 @@ post_load_config (struct config_file *cfg) { g_hash_table_foreach (cfg->variables, substitute_all_variables, cfg); g_hash_table_foreach (cfg->modules_opts, substitute_module_variables, cfg); + parse_filters_str (cfg, cfg->header_filters_str, SCRIPT_HEADER); + parse_filters_str (cfg, cfg->mime_filters_str, SCRIPT_MIME); + parse_filters_str (cfg, cfg->message_filters_str, SCRIPT_MESSAGE); + parse_filters_str (cfg, cfg->url_filters_str, SCRIPT_URL); } /* diff --git a/configure b/configure index 8150e03d4..20a413419 100755 --- a/configure +++ b/configure @@ -21,7 +21,7 @@ YACC_OUTPUT="cfg_yacc.c" LEX_OUTPUT="cfg_lex.c" CONFIG="config.h" -SOURCES="upstream.c cfg_utils.c memcached.c main.c util.c worker.c fstring.c url.c perl.c protocol.c mem_pool.c plugins/surbl.c ${LEX_OUTPUT} ${YACC_OUTPUT}" +SOURCES="upstream.c cfg_utils.c memcached.c main.c util.c worker.c fstring.c url.c perl.c protocol.c mem_pool.c filter.c plugins/surbl.c ${LEX_OUTPUT} ${YACC_OUTPUT}" MODULES="surbl" CFLAGS="$CFLAGS -W -Wpointer-arith -Wno-unused-parameter" @@ -30,7 +30,7 @@ CFLAGS="$CFLAGS -Wunused-value -ggdb -I${LOCALBASE}/include" CFLAGS="$CFLAGS " LDFLAGS="$LDFLAGS -L/usr/lib -L${LOCALBASE}/lib" OPT_FLAGS="-O -pipe -fno-omit-frame-pointer" -DEPS="config.h cfg_file.h memcached.h util.h main.h upstream.h fstring.h url.h perl.h mem_pool.h protocol.h ${LEX_OUTPUT} ${YACC_OUTPUT}" +DEPS="config.h cfg_file.h memcached.h util.h main.h upstream.h fstring.h url.h perl.h mem_pool.h protocol.h filter.h ${LEX_OUTPUT} ${YACC_OUTPUT}" EXEC=rspamd USER=postfix GROUP=postfix diff --git a/filter.c b/filter.c new file mode 100644 index 000000000..7fcd4a4f7 --- /dev/null +++ b/filter.c @@ -0,0 +1,43 @@ +#include +#include +#include +#include + +#include "mem_pool.h" +#include "filter.h" +#include "main.h" +#include "cfg_file.h" + +void +insert_result (struct worker_task *task, const char *metric_name, const char *symbol, u_char flag) +{ + struct filter_result *result; + struct metric *metric; + struct metric_result *metric_res; + + metric = g_hash_table_lookup (task->worker->srv->cfg->metrics, metric_name); + if (metric == NULL) { + return; + } + + result = memory_pool_alloc (task->task_pool, sizeof (struct filter_result)); + result->symbol = symbol; + result->flag = flag; + metric_res = g_hash_table_lookup (task->results, metric_name); + + if (metric_res == NULL) { + /* Create new metric chain */ + metric_res = memory_pool_alloc (task->task_pool, sizeof (struct metric_result)); + LIST_INIT (&metric_res->results); + metric_res->metric = metric; + g_hash_table_insert (task->results, (gpointer)metric_name, metric_res); + } + + LIST_INSERT_HEAD (&metric_res->results, result, next); +} + +int +process_filters (struct worker_task *task) +{ + /* TODO: Implement new logic of filters chains */ +} diff --git a/filter.h b/filter.h new file mode 100644 index 000000000..3eb97ac93 --- /dev/null +++ b/filter.h @@ -0,0 +1,46 @@ +#ifndef RSPAMD_FILTER_H +#define RSPAMD_FILTER_H + +#include +#ifndef HAVE_OWN_QUEUE_H +#include +#else +#include "queue.h" +#endif +#include + +struct worker_task; + +typedef void (*metric_cons_func)(struct worker_task *task, const char *metric_name); +typedef void (*filter_func)(struct worker_task *task); + +enum filter_type { C_FILTER, PERL_FILTER }; + +struct filter { + char *func_name; + enum filter_type type; + LIST_ENTRY (filter) next; +}; + +struct metric { + char *name; + char *func_name; + metric_cons_func func; + double required_score; +}; + +struct filter_result { + const char *symbol; + u_char flag; + LIST_ENTRY (filter_result) next; +}; + +struct metric_result { + struct metric *metric; + LIST_HEAD (resultq, filter_result) results; +}; + +int process_filters (struct worker_task *task); +void insert_result (struct worker_task *task, const char *metric_name, const char *symbol, u_char flag); + +#endif diff --git a/main.c b/main.c index 0478599ce..c67ce419f 100644 --- a/main.c +++ b/main.c @@ -152,7 +152,7 @@ int main (int argc, char **argv) { struct rspamd_main *rspamd; - struct c_module *cur_module = NULL; + struct module_ctx *cur_module = NULL; int res = 0, i, listen_sock; struct sigaction signals; struct rspamd_worker *cur, *cur_tmp, *active_worker; @@ -227,10 +227,9 @@ main (int argc, char **argv) /* Init C modules */ for (i = 0; i < MODULES_NUM; i ++) { - cur_module = g_malloc (sizeof (struct c_module)); - cur_module->name = modules[i].name; - if (modules[i].module_init_func(cfg, &cur_module->ctx) == 0) { - LIST_INSERT_HEAD (&cfg->c_modules, cur_module, next); + cur_module = g_malloc (sizeof (struct module_ctx)); + if (modules[i].module_init_func(cfg, &cur_module) == 0) { + g_hash_table_insert (cfg->c_modules, (gpointer)modules[i].name, cur_module); } } diff --git a/main.h b/main.h index e4f7fc600..94eceebcf 100644 --- a/main.h +++ b/main.h @@ -24,6 +24,7 @@ #include "url.h" #include "memcached.h" #include "protocol.h" +#include "filter.h" #include #include @@ -32,6 +33,8 @@ #define FIXED_CONFIG_FILE "./rspamd.conf" /* Time in seconds to exit for old worker */ #define SOFT_SHUTDOWN_TIME 60 +/* Default metric name */ +#define DEFAULT_METRIC "default" /* Logging in postfix style */ #define msg_err g_error @@ -59,8 +62,6 @@ struct rspamd_worker { struct pidfh; struct config_file; -struct filter_chain; - /* Struct that determine main server object (for logging purposes) */ struct rspamd_main { @@ -74,21 +75,6 @@ struct rspamd_main { TAILQ_HEAD (workq, rspamd_worker) workers; }; -struct filter_result { - const char *symbol; - struct filter_chain *chain; - int mark; - TAILQ_ENTRY (filter_result) next; -}; - -struct chain_result { - struct filter_chain *chain; - int *marks; - unsigned int marks_num; - int result_mark; - TAILQ_ENTRY (chain_result) next; -}; - struct mime_part { GMimeContentType *type; GByteArray *content; @@ -96,7 +82,6 @@ struct mime_part { }; struct save_point { - enum { C_FILTER, PERL_FILTER } save_type; void *entry; void *chain; unsigned int saved; @@ -133,10 +118,8 @@ struct worker_task { TAILQ_HEAD (mime_partq, mime_part) parts; /* URLs extracted from message */ TAILQ_HEAD (uriq, uri) urls; - /* List of filter results */ - TAILQ_HEAD (resultsq, filter_result) results; - /* Results of all chains */ - TAILQ_HEAD (chainsq, chain_result) chain_results; + /* Hash of metric result structures */ + GHashTable *results; struct config_file *cfg; struct save_point save; /* Memory pool that is associated with this task */ @@ -157,7 +140,6 @@ struct c_module { }; void start_worker (struct rspamd_worker *worker, int listen_sock); -int process_filters (struct worker_task *task); #endif diff --git a/plugins/surbl.c b/plugins/surbl.c index c8a545ed1..d35ab6069 100644 --- a/plugins/surbl.c +++ b/plugins/surbl.c @@ -29,6 +29,7 @@ #define DEFAULT_REDIRECTOR_READ_TIMEOUT 5000 #define DEFAULT_SURBL_MAX_URLS 1000 #define DEFAULT_SURBL_URL_EXPIRE 86400 +#define DEFAULT_SURBL_SYMBOL "SURBL_DNS" #define DEFAULT_SURBL_SUFFIX "multi.surbl.org" struct surbl_ctx { @@ -44,6 +45,8 @@ struct surbl_ctx { unsigned int max_urls; unsigned int url_expire; char *suffix; + char *symbol; + char *metric; GHashTable *hosters; GHashTable *whitelist; unsigned use_redirector:1; @@ -144,6 +147,18 @@ surbl_module_init (struct config_file *cfg, struct module_ctx **ctx) else { surbl_module_ctx->suffix = DEFAULT_SURBL_SUFFIX; } + if ((value = get_module_opt (cfg, "surbl", "symbol")) != NULL) { + surbl_module_ctx->symbol = value; + } + else { + surbl_module_ctx->symbol = DEFAULT_SURBL_SYMBOL; + } + if ((value = get_module_opt (cfg, "surbl", "metric")) != NULL) { + surbl_module_ctx->metric = value; + } + else { + surbl_module_ctx->metric = DEFAULT_METRIC; + } surbl_module_ctx->hosters = g_hash_table_new (g_str_hash, g_str_equal); surbl_module_ctx->whitelist = g_hash_table_new (g_str_hash, g_str_equal); @@ -265,13 +280,14 @@ static void dns_callback (int result, char type, int count, int ttl, void *addresses, void *data) { struct memcached_param *param = (struct memcached_param *)data; - struct filter_result *res; /* If we have result from DNS server, this url exists in SURBL, so increase score */ if (result != DNS_ERR_NONE || type != DNS_IPv4_A) { msg_info ("surbl_check: url %s is in surbl %s", param->url->host, surbl_module_ctx->suffix); - res = TAILQ_LAST (¶m->task->results, resultsq); - res->mark += surbl_module_ctx->weight; + insert_result (param->task, surbl_module_ctx->metric, surbl_module_ctx->symbol, 1); + } + else { + insert_result (param->task, surbl_module_ctx->metric, surbl_module_ctx->symbol, 0); } param->task->save.saved --; @@ -292,7 +308,6 @@ memcached_callback (memcached_ctx_t *ctx, memc_error_t error, void *data) { struct memcached_param *param = (struct memcached_param *)data; int *url_count; - struct filter_result *res; char *surbl_req; switch (ctx->op) { @@ -335,8 +350,7 @@ memcached_callback (memcached_ctx_t *ctx, memc_error_t error, void *data) /* Do not check DNS for urls that have count more than max_urls */ if (*url_count > surbl_module_ctx->max_urls) { msg_info ("memcached_callback: url '%s' has count %d, max: %d", struri (param->url), *url_count, surbl_module_ctx->max_urls); - res = TAILQ_LAST (¶m->task->results, resultsq); - res->mark += surbl_module_ctx->weight; + insert_result (param->task, surbl_module_ctx->metric, surbl_module_ctx->symbol, 1); } (*url_count) ++; memc_set (param->ctx, param->ctx->param, surbl_module_ctx->url_expire); diff --git a/worker.c b/worker.c index 9846a5736..0c67fe7ce 100644 --- a/worker.c +++ b/worker.c @@ -74,9 +74,6 @@ sigusr_handler (int fd, short what, void *arg) static void free_task (struct worker_task *task) { - struct uri *cur; - struct filter_result *res; - struct chain_result *chain_res; struct mime_part *part; if (task) { @@ -86,18 +83,6 @@ free_task (struct worker_task *task) if (task->memc_ctx) { memc_close_ctx (task->memc_ctx); } - while (!TAILQ_EMPTY (&task->urls)) { - cur = TAILQ_FIRST (&task->urls); - TAILQ_REMOVE (&task->urls, cur, next); - } - while (!TAILQ_EMPTY (&task->results)) { - res = TAILQ_FIRST (&task->results); - TAILQ_REMOVE (&task->results, res, next); - } - while (!TAILQ_EMPTY (&task->chain_results)) { - chain_res = TAILQ_FIRST (&task->chain_results); - TAILQ_REMOVE (&task->chain_results, chain_res, next); - } while (!TAILQ_EMPTY (&task->parts)) { part = TAILQ_FIRST (&task->parts); @@ -175,187 +160,6 @@ mime_foreach_callback (GMimeObject *part, gpointer user_data) } } -int -process_filters (struct worker_task *task) -{ - struct filter_result *res = NULL; - struct chain_result *chain_res = NULL; - struct c_module *c_filter = NULL; - struct filter_chain *chain = NULL; - struct script_param *perl_script = NULL; - int i = 0; - - /* First process C modules */ - if (task->save.saved > 0) { - if (task->save.save_type == C_FILTER) { - task->save.saved = 0; - c_filter = (struct c_module *)task->save.entry; - } - else if (task->save.save_type == PERL_FILTER) { - chain = (struct filter_chain *)task->save.chain; - perl_script = (struct script_param *)task->save.entry; - task->save.saved = 0; - } - } - else { - c_filter = LIST_FIRST (&task->cfg->c_modules); - chain = LIST_FIRST (&task->cfg->filters); - if (chain) { - perl_script = LIST_FIRST (chain->scripts); - } - } - while (c_filter != NULL) { - res = memory_pool_alloc (task->task_pool, sizeof (struct filter_result)); - if (res == NULL) { - msg_err ("process_filters: malloc failed, %m"); - return -1; - } - res->chain = NULL; - res->symbol = c_filter->name; - res->mark = 0; - if (c_filter->ctx->header_filter != NULL) { - res->mark += c_filter->ctx->header_filter (task); - if (task->save.saved > 0) { - TAILQ_INSERT_TAIL (&task->results, res, next); - task->save.save_type = C_FILTER; - goto save_point; - } - } - if (c_filter->ctx->message_filter != NULL) { - res->mark += c_filter->ctx->message_filter (task); - if (task->save.saved > 0) { - TAILQ_INSERT_TAIL (&task->results, res, next); - task->save.save_type = C_FILTER; - goto save_point; - } - } - if (c_filter->ctx->mime_filter != NULL) { - res->mark += c_filter->ctx->mime_filter (task); - if (task->save.saved > 0) { - TAILQ_INSERT_TAIL (&task->results, res, next); - task->save.save_type = C_FILTER; - goto save_point; - } - } - if (c_filter->ctx->url_filter != NULL) { - res->mark += c_filter->ctx->url_filter (task); - if (task->save.saved > 0) { - TAILQ_INSERT_TAIL (&task->results, res, next); - task->save.save_type = C_FILTER; - goto save_point; - } - } - TAILQ_INSERT_TAIL (&task->results, res, next); - c_filter = LIST_NEXT (c_filter, next); - } - - /* Process perl chains */ - while (chain != NULL) { - chain_res = memory_pool_alloc (task->task_pool, sizeof (struct chain_result)); - if (chain_res == NULL) { - msg_err ("process_filters: malloc failed, %m"); - return -1; - } - i = 0; - chain_res->chain = chain; - chain_res->marks = memory_pool_alloc (task->task_pool, sizeof (int) * chain->scripts_number); - chain_res->result_mark = 0; - if (chain_res->marks == NULL) { - free (chain_res); - msg_err ("process_filters: malloc failed, %m"); - return -1; - } - while (perl_script != NULL) { - if (perl_script->type == SCRIPT_CHAIN) { - /* Skip chain filters first */ - continue; - } - res = memory_pool_alloc (task->task_pool, sizeof (struct filter_result)); - if (res == NULL) { - msg_err ("process_filters: malloc failed, %m"); - return -1; - } - res->chain = chain; - res->symbol = perl_script->symbol; - res->mark = 0; - switch (perl_script->type) { - case SCRIPT_HEADER: - res->mark += perl_call_header_filter (perl_script->function, task); - if (task->save.saved > 0) { - TAILQ_INSERT_TAIL (&task->results, res, next); - task->save.save_type = PERL_FILTER; - goto save_point; - } - break; - case SCRIPT_MESSAGE: - res->mark += perl_call_message_filter (perl_script->function, task); - if (task->save.saved > 0) { - TAILQ_INSERT_TAIL (&task->results, res, next); - task->save.save_type = PERL_FILTER; - goto save_point; - } - break; - case SCRIPT_MIME: - res->mark += perl_call_mime_filter (perl_script->function, task); - if (task->save.saved > 0) { - TAILQ_INSERT_TAIL (&task->results, res, next); - task->save.save_type = PERL_FILTER; - goto save_point; - } - break; - case SCRIPT_URL: - res->mark += perl_call_url_filter (perl_script->function, task); - if (task->save.saved > 0) { - TAILQ_INSERT_TAIL (&task->results, res, next); - task->save.save_type = PERL_FILTER; - goto save_point; - } - break; - } - TAILQ_INSERT_TAIL (&task->results, res, next); - chain_res->marks[i++] = res->mark; - perl_script = LIST_NEXT (perl_script, next); - } - chain_res->marks_num = i; - TAILQ_INSERT_TAIL (&task->chain_results, chain_res, next); - chain = LIST_NEXT (chain, next); - } - - /* Now process chain results */ - TAILQ_FOREACH (chain_res, &task->chain_results, next) { - i = 0; - LIST_FOREACH (perl_script, chain_res->chain->scripts, next) { - if (perl_script->type != SCRIPT_CHAIN) { - /* Skip not chain filters */ - continue; - } - /* Increment i; if i would be equal to zero that would mean that this chain has no chain filter script */ - i ++; - chain_res->result_mark += perl_call_url_filter (perl_script->function, task, chain_res->marks, chain_res->marks_num); - } - /* If chain has no chain filter, just do addition of all marks */ - if (i == 0) { - for (i = 0; i < chain_res->marks_num; i++) { - chain_res->result_mark += chain_res->marks[i]; - } - } - } - - task->state = WRITE_REPLY; - bufferevent_enable (task->bev, EV_WRITE); - return 0; - -save_point: - if (task->save.save_type == C_FILTER) { - task->save.entry = LIST_NEXT (c_filter, next); - } - else if (task->save.save_type == PERL_FILTER) { - task->save.chain = LIST_NEXT (chain, next); - task->save.entry = LIST_NEXT (perl_script, next); - } - return 1; -} - static int process_message (struct worker_task *task) { @@ -493,7 +297,6 @@ accept_socket (int fd, short what, void *arg) new_task->parts_count = 0; new_task->cfg = worker->srv->cfg; TAILQ_INIT (&new_task->urls); - TAILQ_INIT (&new_task->results); TAILQ_INIT (&new_task->parts); new_task->task_pool = memory_pool_new (TASK_POOL_SIZE); new_task->memc_ctx = memory_pool_alloc (new_task->task_pool, sizeof (memcached_ctx_t));